merge.

checkpoint. does not compile.
author: Sergei Golubchik <sergii@pisem.net> 2010-11-25 18:17:28 +0100
committer: Sergei Golubchik <sergii@pisem.net> 2010-11-25 18:17:28 +0100
commit: 65ca700def99289cc31a7040537f5aa6e12bf485 (patch)
tree: 97b3a07299b626c519da0e80c122b5b79b933914 /sql
parent: 2ab57de38d13d927ddff2d51aed4af34e13998f5 (diff)
parent: 6e5bcca7935d3c62f84bb640e5357664a210ee12 (diff)
download: mariadb-git-65ca700def99289cc31a7040537f5aa6e12bf485.tar.gz
179 files changed, 33398 insertions, 6388 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index 9d307da364d..9bcce146ebe 100644
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -20,6 +20,7 @@ ${CMAKE_SOURCE_DIR}/regex
 ${ZLIB_INCLUDE_DIR}
 ${SSL_INCLUDE_DIRS}
 ${CMAKE_BINARY_DIR}/sql
+${CMAKE_SOURCE_DIR}/extra/libevent 
 )
 
 SET(GEN_SOURCES
@@ -50,6 +51,10 @@ SET (SQL_SOURCE
                log_event_old.cc rpl_record_old.cc
                message.h mf_iocache.cc my_decimal.cc ../sql-common/my_time.c
                mysqld.cc net_serv.cc  keycaches.cc
+               ../sql-common/client_plugin.c create_options.cc
+               multi_range_read.cc opt_index_cond_pushdown.cc
+               opt_subselect.cc opt_table_elimination.cc
+               sql_expression_cache.cc sql_join_cache.cc
                opt_range.cc opt_range.h opt_sum.cc 
                ../sql-common/pack.c parse_file.cc password.c procedure.cc 
                protocol.cc records.cc repl_failsafe.cc rpl_filter.cc set_var.cc 
@@ -85,7 +90,7 @@ ADD_LIBRARY(sql STATIC ${SQL_SOURCE})
 ADD_DEPENDENCIES(sql GenServerSource)
 DTRACE_INSTRUMENT(sql)
 TARGET_LINK_LIBRARIES(sql ${MYSQLD_STATIC_PLUGIN_LIBS} 
-  mysys dbug strings vio regex   
+  mysys dbug strings vio regex libevent psapi.lib
   ${LIBWRAP} ${LIBCRYPT} ${LIBDL}
   ${SSL_LIBRARIES})
 
diff --git a/sql/Makefile.am b/sql/Makefile.am
index 1bc195a2981..2bc0d386935 100644
--- a/sql/Makefile.am
+++ b/sql/Makefile.am
@@ -22,7 +22,8 @@ MYSQLLIBdir=            $(pkglibdir)
 pkgplugindir =		$(pkglibdir)/plugin
 INCLUDES =		@ZLIB_INCLUDES@ \
 			-I$(top_builddir)/include -I$(top_srcdir)/include \
-			-I$(top_srcdir)/regex -I$(srcdir) $(openssl_includes)
+			-I$(top_srcdir)/regex -I$(srcdir) $(openssl_includes) \
+			$(libevent_includes)
 WRAPLIBS=		@WRAPLIBS@
 SUBDIRS =		share
 libexec_PROGRAMS =	mysqld
@@ -81,7 +82,7 @@ mysqld_DEPENDENCIES=	@mysql_plugin_libs@ $(SUPPORTING_LIBS) libndb.la
 LDADD = $(SUPPORTING_LIBS) @ZLIB_LIBS@ @NDB_SCI_LIBS@
 mysqld_LDADD =		libndb.la \
 			@MYSQLD_EXTRA_LDFLAGS@ \
-			@pstack_libs@ \
+			@pstack_libs@ $(libevent_libs) \
 			@mysql_plugin_libs@ \
 			$(LDADD)  $(CXXLDFLAGS) $(WRAPLIBS) @LIBDL@ \
 			$(yassl_libs) $(openssl_libs) @MYSQLD_EXTRA_LIBS@
@@ -102,6 +103,8 @@ noinst_HEADERS =	item.h item_func.h item_sum.h item_cmpfunc.h \
 			opt_range.h protocol.h rpl_tblmap.h rpl_utility.h \
 			rpl_reporting.h sql_locale.h sql_parse.h \
 			log.h sql_show.h rpl_rli.h rpl_mi.h \
+                        opt_subselect.h log_slow.h multi_range_read.h \
+			create_options.h sql_expression_cache.h \
 			sql_select.h structs.h table.h sql_udf.h hash_filo.h \
 			lex.h lex_symbol.h sql_acl.h sql_crypt.h sql_base.h \
 			sql_table.h key.h lock.h thr_malloc.h strfunc.h \
@@ -141,6 +144,9 @@ mysqld_SOURCES =	sql_lex.cc sql_handler.cc sql_partition.cc \
 			main.cc mysqld.cc password.c hash_filo.cc hostname.cc \
 			sql_connect.cc scheduler.cc sql_parse.cc \
 			keycaches.cc set_var.cc sql_yacc.yy sys_vars.cc \
+                        sql_join_cache.cc opt_subselect.cc create_options.cc \
+                        opt_table_elimination.cc multi_range_read.cc \
+			opt_index_cond_pushdown.cc sql_expression_cache.cc \
 			sql_base.cc table.cc sql_select.cc sql_insert.cc \
 			sql_reload.cc datadict.cc sql_profile.cc \
 			sql_prepare.cc sql_error.cc sql_locale.cc \
@@ -176,7 +182,7 @@ mysqld_SOURCES =	sql_lex.cc sql_handler.cc sql_partition.cc \
 			rpl_handler.cc mdl.cc transaction.cc sql_audit.cc  \
 			sql_alter.cc sql_partition_admin.cc sha2.cc
 
-nodist_mysqld_SOURCES =	mini_client_errors.c pack.c client.c my_time.c my_user.c 
+nodist_mysqld_SOURCES =	mini_client_errors.c pack.c client.c my_time.c my_user.c client_plugin.c
 
 libndb_la_CPPFLAGS=	@ndbcluster_includes@
 libndb_la_SOURCES=	ha_ndbcluster.cc \
@@ -190,10 +196,10 @@ mysql_tzinfo_to_sql_SOURCES = tztime.cc
 mysql_tzinfo_to_sql_CXXFLAGS= -DTZINFO2SQL
 
 DEFS =			-DMYSQL_SERVER \
-			-DDEFAULT_MYSQL_HOME="\"$(MYSQLBASEdir)\"" \
-			-DMYSQL_DATADIR="\"$(MYSQLDATAdir)\"" \
-			-DSHAREDIR="\"$(MYSQLSHAREdir)\"" \
-			-DPLUGINDIR="\"$(pkgplugindir)\"" \
+			-DDEFAULT_MYSQL_HOME='"$(MYSQLBASEdir)"' \
+			-DMYSQL_DATADIR='"$(MYSQLDATAdir)"' \
+			-DSHAREDIR='"$(MYSQLSHAREdir)"' \
+			-DPLUGINDIR='"$(pkgplugindir)"' \
 			-DHAVE_EVENT_SCHEDULER \
 			@DEFS@
 
@@ -202,7 +208,7 @@ BUILT_SOURCES =		$(BUILT_MAINT_SRC) lex_hash.h link_sources
 EXTRA_DIST =		udf_example.c udf_example.def $(BUILT_MAINT_SRC) \
 			nt_servc.cc nt_servc.h \
 			message.mc  message.h message.rc MSG00001.bin \
-			CMakeLists.txt
+			CMakeLists.txt opt_range_mrr.cc
 
 CLEANFILES =        	lex_hash.h sql_yacc.output link_sources
 DISTCLEANFILES =        $(EXTRA_PROGRAMS)
@@ -217,6 +223,8 @@ link_sources:
 	@LN_CP_F@ $(top_srcdir)/sql-common/pack.c pack.c
 	rm -f client.c
 	@LN_CP_F@ $(top_srcdir)/sql-common/client.c client.c
+	rm -f client_plugin.c
+	@LN_CP_F@ $(top_srcdir)/sql-common/client_plugin.c client_plugin.c
 	rm -f my_time.c
 	@LN_CP_F@ $(top_srcdir)/sql-common/my_time.c my_time.c
 	rm -f my_user.c
diff --git a/sql/authors.h b/sql/authors.h
index db0ab00fb86..40fcd4c47f8 100644
--- a/sql/authors.h
+++ b/sql/authors.h
@@ -37,24 +37,36 @@ struct show_table_authors_st {
 */
 
 struct show_table_authors_st show_table_authors[]= {
+  { "Michael (Monty) Widenius", "Tusby, Finland",
+    "Lead developer and main author" },
+  { "David Axmark", "London, England",
+    "MySQL founder; Small stuff long time ago, Monty ripped it out!" },
+  { "Sergei Golubchik", "Kerpen, Germany",
+    "Full-text search, precision math" },
+  { "Igor Babaev", "Bellevue, USA", "Optimizer, keycache, core work"},
+  { "Sergey Petrunia", "St. Petersburg, Russia", "Optimizer"},
+  { "Oleksandr Byelkin", "Lugansk, Ukraine",
+    "Query Cache (4.0), Subqueries (4.1), Views (5.0)" },
   { "Brian (Krow) Aker", "Seattle, WA, USA",
     "Architecture, archive, federated, bunch of little stuff :)" },
   { "Marc Alff", "Denver, CO, USA", "Signal, Resignal, Performance schema" },
   { "Venu Anuganti", "", "Client/server protocol (4.1)" },
-  { "David Axmark", "Uppsala, Sweden",
-    "Small stuff long time ago, Monty ripped it out!" },
+  { "Kristian Nielsen", "Copenhagen, Denmark",
+    "General build stuff," },
   { "Alexander (Bar) Barkov", "Izhevsk, Russia",
     "Unicode and character sets (4.1)" },
+  { "Guilhem Bichot", "Bordeaux, France", "Replication (since 4.0)" },
+  { "Konstantin Osipov", "Moscow, Russia",
+    "Prepared statements (4.1), Cursors (5.0)" },
+  { "Dmitri Lenev", "Moscow, Russia",
+    "Time zones support (4.1), Triggers (5.0)" },
   { "Omer BarNir", "Sunnyvale, CA, USA",
     "Testing (sometimes) and general QA stuff" },
-  { "Guilhem Bichot", "Bordeaux, France", "Replication (since 4.0)" },
   { "John Birrell", "", "Emulation of pthread_mutex() for OS/2" },
   { "Andreas F. Bobak", "", "AGGREGATE extension to user-defined functions" },
   { "Alexey Botchkov (Holyfoot)", "Izhevsk, Russia",
     "GIS extensions (4.1), embedded server (4.1), precision math (5.0)"},
   { "Reggie Burnett", "Nashville, TN, USA", "Windows development, Connectors" },
-  { "Oleksandr Byelkin", "Lugansk, Ukraine",
-    "Query Cache (4.0), Subqueries (4.1), Views (5.0)" },
   { "Kent Boortz", "Orebro, Sweden", "Test platform, and general build stuff" },
   { "Tim Bunce", "", "mysqlhotcopy" },
   { "Yves Carlier", "", "mysqlaccess" },
@@ -71,8 +83,6 @@ struct show_table_authors_st show_table_authors[]= {
   { "Yuri Dario", "", "OS/2 port" },
   { "Andrei Elkin", "Espoo, Finland", "Replication" },
   { "Patrick Galbraith", "Sharon, NH", "Federated Engine, mysqlslap" },
-  { "Sergei Golubchik", "Kerpen, Germany",
-    "Full-text search, precision math" },
   { "Lenz Grimmer", "Hamburg, Germany",
     "Production (build and release) engineering" },
   { "Nikolay Grishakin", "Austin, TX, USA", "Testing - Server" },
@@ -88,8 +98,6 @@ struct show_table_authors_st show_table_authors[]= {
   { "Hakan Küçükyılmaz", "Walldorf, Germany", "Testing - Server" },
   { "Greg (Groggy) Lehey", "Uchunga, SA, Australia", "Backup" },
   { "Matthias Leich", "Berlin, Germany", "Testing - Server" },
-  { "Dmitri Lenev", "Moscow, Russia",
-    "Time zones support (4.1), Triggers (5.0)" },
   { "Arjen Lentz", "Brisbane, Australia",
     "Documentation (2001-2004), Dutch error messages, LOG2()" },
   { "Marc Liyanage", "", "Created Mac OS X packages" },
@@ -102,8 +110,6 @@ struct show_table_authors_st show_table_authors[]= {
   { "Jonathan (Jeb) Miller", "Kyle, TX, USA",
     "Testing - Cluster, Replication" },
   { "Elliot Murphy", "Cocoa, FL, USA", "Replication and backup" },
-  { "Kristian Nielsen", "Copenhagen, Denmark",
-    "General build stuff" },
   { "Pekka Nouisiainen", "Stockholm, Sweden",
     "NDB Cluster: BLOB support, character set support, ordered indexes" },
   { "Alexander Nozdrin", "Moscow, Russia",
@@ -111,8 +117,6 @@ struct show_table_authors_st show_table_authors[]= {
   { "Per Eric Olsson", "", "Testing of dynamic record format" },
   { "Jonas Oreland", "Stockholm, Sweden",
     "NDB Cluster, Online Backup, lots of other things" },
-  { "Konstantin Osipov", "Moscow, Russia",
-    "Prepared statements (4.1), Cursors (5.0)" },
   { "Alexander (Sasha) Pachev", "Provo, UT, USA",
     "Statement-based replication, SHOW CREATE TABLE, mysql-bench" },
   { "Irena Pancirov", "", "Port to Windows with Borland compiler" },
@@ -150,10 +154,10 @@ struct show_table_authors_st show_table_authors[]= {
   { "Sergey Vojtovich", "Izhevsk, Russia", "Plugins infrastructure (5.1)" },
   { "Matt Wagner", "Northfield, MN, USA", "Bug fixing" },
   { "Jim Winstead Jr.", "Los Angeles, CA, USA", "Bug fixing" },
-  { "Michael (Monty) Widenius", "Tusby, Finland",
-    "Lead developer and main author" },
   { "Peter Zaitsev", "Tacoma, WA, USA",
     "SHA1(), AES_ENCRYPT(), AES_DECRYPT(), bug fixing" },
+  {"Mark Mark Callaghan", "Texas, USA", "Statistics patches"},
+  {"Percona", "CA, USA", "Microslow patches"},
   {NULL, NULL, NULL}
 };
 
diff --git a/sql/client_settings.h b/sql/client_settings.h
index 7d103d5904d..3152bc47845 100644
--- a/sql/client_settings.h
+++ b/sql/client_settings.h
@@ -21,6 +21,7 @@
 #endif /* CLIENT_SETTINGS_INCLUDED */
 
 #include <thr_alarm.h>
+#include <sql_common.h>
 
 #define CLIENT_CAPABILITIES (CLIENT_LONG_PASSWORD | CLIENT_LONG_FLAG |	  \
                              CLIENT_SECURE_CONNECTION | CLIENT_TRANSACTIONS | \
@@ -30,10 +31,10 @@
 #define set_sigpipe(mysql)
 #define reset_sigpipe(mysql)
 #define read_user_name(A) {}
-#undef HAVE_SMEM
 #undef _CUSTOMCONFIG_
 
-#define mysql_server_init(a,b,c) 0
+#define mysql_server_init(a,b,c) mysql_client_plugin_init()
+#define mysql_server_end()       mysql_client_plugin_deinit()
 
 #ifdef HAVE_REPLICATION
 C_MODE_START
diff --git a/sql/create_options.cc b/sql/create_options.cc
new file mode 100644
index 00000000000..4478fc14791
--- /dev/null
+++ b/sql/create_options.cc
@@ -0,0 +1,614 @@
+/* Copyright (C) 2010 Monty Program Ab
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+/**
+  @file
+
+  Engine defined options of tables/fields/keys in CREATE/ALTER TABLE.
+*/
+
+#include "mysql_priv.h"
+#include "create_options.h"
+#include <my_getopt.h>
+
+#define FRM_QUOTED_VALUE 0x8000
+
+/**
+  Links this item to the given list end
+
+  @param start           The list beginning or NULL
+  @param end             The list last element or does not matter
+*/
+
+void engine_option_value::link(engine_option_value **start,
+                               engine_option_value **end)
+{
+  DBUG_ENTER("engine_option_value::link");
+  DBUG_PRINT("enter", ("name: '%s' (%u)  value: '%s' (%u)",
+                       name.str, (uint) name.length,
+                       value.str, (uint) value.length));
+  engine_option_value *opt;
+  /* check duplicates to avoid writing them to frm*/
+  for(opt= *start;
+      opt && ((opt->parsed && !opt->value.str) ||
+              my_strnncoll(system_charset_info,
+                           (uchar *)name.str, name.length,
+                           (uchar*)opt->name.str, opt->name.length));
+      opt= opt->next) /* no-op */;
+  if (opt)
+  {
+    opt->value.str= NULL;       /* remove previous value */
+    opt->parsed= TRUE;          /* and don't issue warnings for it anymore */
+  }
+  /*
+    Add this option to the end of the list
+
+    @note: We add even if it is opt->value.str == NULL because it can be
+    ALTER TABLE to remove the option.
+  */
+  if (*start)
+  {
+    (*end)->next= this;
+    *end= this;
+  }
+  else
+  {
+    /*
+      note that is *start == 0, the value of *end does not matter,
+      it can be uninitialized.
+    */
+    *start= *end= this;
+  }
+  DBUG_VOID_RETURN;
+}
+
+static bool report_wrong_value(THD *thd, const char *name, const char *val,
+                               my_bool suppress_warning)
+{
+  if (suppress_warning)
+    return 0;
+
+  if (!(thd->variables.sql_mode & MODE_IGNORE_BAD_TABLE_OPTIONS) &&
+      !thd->slave_thread)
+  {
+    my_error(ER_BAD_OPTION_VALUE, MYF(0), val, name);
+    return 1;
+  }
+
+  push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_BAD_OPTION_VALUE,
+                      ER(ER_BAD_OPTION_VALUE), val, name);
+  return 0;
+}
+
+static bool report_unknown_option(THD *thd, engine_option_value *val,
+                                  my_bool suppress_warning)
+{
+  DBUG_ENTER("report_unknown_option");
+
+  if (val->parsed || suppress_warning)
+  {
+    DBUG_PRINT("info", ("parsed => exiting"));
+    DBUG_RETURN(FALSE);
+  }
+
+  if (!(thd->variables.sql_mode & MODE_IGNORE_BAD_TABLE_OPTIONS) &&
+      !thd->slave_thread)
+  {
+    my_error(ER_UNKNOWN_OPTION, MYF(0), val->name.str);
+    DBUG_RETURN(TRUE);
+  }
+
+  push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                      ER_UNKNOWN_OPTION, ER(ER_UNKNOWN_OPTION), val->name.str);
+  DBUG_RETURN(FALSE);
+}
+
+static bool set_one_value(ha_create_table_option *opt,
+                          THD *thd, LEX_STRING *value, void *base,
+                          my_bool suppress_warning,
+                          MEM_ROOT *root)
+{
+  DBUG_ENTER("set_one_value");
+  DBUG_PRINT("enter", ("opt: 0x%lx type: %u name '%s' value: '%s'",
+                       (ulong) opt,
+                       opt->type, opt->name,
+                       (value->str ? value->str : "<DEFAULT>")));
+  switch (opt->type)
+  {
+  case HA_OPTION_TYPE_ULL:
+    {
+      ulonglong *val= (ulonglong*)((char*)base + opt->offset);
+      if (!value->str)
+      {
+        *val= opt->def_value;
+        DBUG_RETURN(0);
+      }
+
+      my_option optp=
+        { opt->name, 1, 0, (uchar **)val, 0, 0, GET_ULL,
+          REQUIRED_ARG, opt->def_value, opt->min_value, opt->max_value,
+          0, (long) opt->block_size, 0};
+
+      ulonglong orig_val= strtoull(value->str, NULL, 10);
+      my_bool unused;
+      *val= orig_val;
+      *val= getopt_ull_limit_value(*val, &optp, &unused);
+      if (*val == orig_val)
+        DBUG_RETURN(0);
+
+      DBUG_RETURN(report_wrong_value(thd, opt->name, value->str,
+                                     suppress_warning));
+    }
+  case HA_OPTION_TYPE_STRING:
+    {
+      char **val= (char **)((char *)base + opt->offset);
+      if (!value->str)
+      {
+        *val= 0;
+        DBUG_RETURN(0);
+      }
+
+      if (!(*val= strmake_root(root, value->str, value->length)))
+        DBUG_RETURN(1);
+      DBUG_RETURN(0);
+    }
+  case HA_OPTION_TYPE_ENUM:
+    {
+      uint *val= (uint *)((char *)base + opt->offset), num;
+
+      *val= (uint) opt->def_value;
+      if (!value->str)
+        DBUG_RETURN(0);
+
+      const char *start= opt->values, *end;
+
+      num= 0;
+      while (*start)
+      {
+        for (end=start;
+             *end && *end != ',';
+             end+= my_mbcharlen(system_charset_info, *end)) /* no-op */;
+        if (!my_strnncoll(system_charset_info,
+                          (uchar*)start, end-start,
+                          (uchar*)value->str, value->length))
+        {
+          *val= num;
+          DBUG_RETURN(0);
+        }
+        if (*end) *end++;
+        start= end;
+        num++;
+      }
+
+      DBUG_RETURN(report_wrong_value(thd, opt->name, value->str,
+                                     suppress_warning));
+    }
+  case HA_OPTION_TYPE_BOOL:
+    {
+      bool *val= (bool *)((char *)base + opt->offset);
+      *val= opt->def_value;
+
+      if (!value->str)
+        DBUG_RETURN(0);
+
+      if (!my_strnncoll(system_charset_info,
+                        (const uchar*)"NO", 2,
+                        (uchar *)value->str, value->length) ||
+          !my_strnncoll(system_charset_info,
+                        (const uchar*)"OFF", 3,
+                        (uchar *)value->str, value->length) ||
+          !my_strnncoll(system_charset_info,
+                        (const uchar*)"0", 1,
+                        (uchar *)value->str, value->length))
+      {
+        *val= FALSE;
+        DBUG_RETURN(FALSE);
+      }
+
+      if (!my_strnncoll(system_charset_info,
+                        (const uchar*)"YES", 3,
+                        (uchar *)value->str, value->length) ||
+          !my_strnncoll(system_charset_info,
+                        (const uchar*)"ON", 2,
+                        (uchar *)value->str, value->length) ||
+          !my_strnncoll(system_charset_info,
+                        (const uchar*)"1", 1,
+                        (uchar *)value->str, value->length))
+      {
+        *val= TRUE;
+        DBUG_RETURN(FALSE);
+      }
+
+      DBUG_RETURN(report_wrong_value(thd, opt->name, value->str,
+                                     suppress_warning));
+    }
+  }
+  DBUG_ASSERT(0);
+  my_error(ER_UNKNOWN_ERROR, MYF(0));
+  DBUG_RETURN(1);
+}
+
+static const size_t ha_option_type_sizeof[]=
+{ sizeof(ulonglong), sizeof(char *), sizeof(uint), sizeof(bool)};
+
+/**
+  Creates option structure and parses list of options in it
+
+  @param thd              thread handler
+  @param option_struct    where to store pointer on the option struct
+  @param option_list      list of options given by user
+  @param rules            list of option description by engine
+  @param suppress_warning second parse so we do not need warnings
+  @param root             MEM_ROOT where allocate memory
+
+  @retval TRUE  Error
+  @retval FALSE OK
+*/
+
+my_bool parse_option_list(THD* thd, void **option_struct,
+                          engine_option_value *option_list,
+                          ha_create_table_option *rules,
+                          my_bool suppress_warning,
+                          MEM_ROOT *root)
+{
+  ha_create_table_option *opt;
+  size_t option_struct_size= 0;
+  engine_option_value *val= option_list;
+  DBUG_ENTER("parse_option_list");
+  DBUG_PRINT("enter",
+             ("struct: 0x%lx list: 0x%lx rules: 0x%lx suppres %u root 0x%lx",
+              (ulong) *option_struct, (ulong)option_list, (ulong)rules,
+              (uint) suppress_warning, (ulong) root));
+
+  if (rules)
+  {
+    LEX_STRING default_val= {NULL, 0};
+    for (opt= rules; opt->name; opt++)
+      set_if_bigger(option_struct_size, opt->offset +
+                    ha_option_type_sizeof[opt->type]);
+
+    *option_struct= alloc_root(root, option_struct_size);
+
+    /* set all values to default */
+    for (opt= rules; opt->name; opt++)
+      set_one_value(opt, thd, &default_val, *option_struct,
+                    suppress_warning, root);
+  }
+
+  for (; val; val= val->next)
+  {
+    for (opt= rules; opt && opt->name; opt++)
+    {
+      if (my_strnncoll(system_charset_info,
+                       (uchar*)opt->name, opt->name_length,
+                       (uchar*)val->name.str, val->name.length))
+        continue;
+
+      if (set_one_value(opt, thd, &val->value,
+                        *option_struct, suppress_warning || val->parsed, root))
+        DBUG_RETURN(TRUE);
+      val->parsed= true;
+      break;
+    }
+    if (report_unknown_option(thd, val, suppress_warning))
+      DBUG_RETURN(TRUE);
+    val->parsed= true;
+  }
+
+  DBUG_RETURN(FALSE);
+}
+
+
+/**
+  Parses all table/fields/keys options
+
+  @param thd             thread handler
+  @param file            handler of the table
+  @parem share           descriptor of the table
+
+  @retval TRUE  Error
+  @retval FALSE OK
+*/
+
+my_bool parse_engine_table_options(THD *thd, handlerton *ht,
+                                   TABLE_SHARE *share)
+{
+  MEM_ROOT *root= &share->mem_root;
+  DBUG_ENTER("parse_engine_table_options");
+
+  if (parse_option_list(thd, &share->option_struct, share->option_list,
+                        ht->table_options, TRUE, root))
+    DBUG_RETURN(TRUE);
+
+  for (Field **field= share->field; *field; field++)
+  {
+    if (parse_option_list(thd, &(*field)->option_struct, (*field)->option_list,
+                          ht->field_options, TRUE, root))
+      DBUG_RETURN(TRUE);
+  }
+
+  for (uint index= 0; index < share->keys; index ++)
+  {
+    if (parse_option_list(thd, &share->key_info[index].option_struct,
+                          share->key_info[index].option_list,
+                          ht->index_options, TRUE, root))
+      DBUG_RETURN(TRUE);
+  }
+
+  DBUG_RETURN(FALSE);
+}
+
+
+/**
+  Returns representation length of key and value in the frm file
+*/
+
+uint engine_option_value::frm_length()
+{
+  /*
+    1 byte  - name length
+    2 bytes - value length
+
+    if value.str is NULL, this option is not written to frm (=DEFAULT)
+  */
+  return value.str ? 1 + name.length + 2 + value.length : 0;
+}
+
+
+/**
+  Returns length of representation of option list in the frm file
+*/
+
+static uint option_list_frm_length(engine_option_value *opt)
+{
+  uint res= 0;
+
+  for (; opt; opt= opt->next)
+    res+= opt->frm_length();
+
+  return res;
+}
+
+
+/**
+  Calculates length of options image in the .frm
+
+  @param table_option_list list of table options
+  @param create_fields     field descriptors list
+  @param keys              number of keys
+  @param key_info          array of key descriptors
+
+  @returns length of image in frm
+*/
+
+uint engine_table_options_frm_length(engine_option_value *table_option_list,
+                                     List<Create_field> &create_fields,
+                                     uint keys, KEY *key_info)
+{
+  List_iterator<Create_field> it(create_fields);
+  Create_field *field;
+  uint res, index;
+  DBUG_ENTER("engine_table_options_frm_length");
+
+  res= option_list_frm_length(table_option_list);
+
+  while ((field= it++))
+    res+= option_list_frm_length(field->option_list);
+
+  for (index= 0; index < keys; index++, key_info++)
+    res+= option_list_frm_length(key_info->option_list);
+
+  /*
+    if there's at least one option somewhere (res > 0)
+    we write option lists for all fields and keys, zero-terminated.
+    If there're no options we write nothing at all (backward compatibility)
+  */
+  DBUG_RETURN(res ? res + 1 + create_fields.elements + keys : 0);
+}
+
+
+/**
+  Writes image of the key and value to the frm image buffer
+
+  @param buff            pointer to the buffer free space beginning
+
+  @returns pointer to byte after last recorded in the buffer
+*/
+
+uchar *engine_option_value::frm_image(uchar *buff)
+{
+  if (value.str)
+  {
+    *buff++= name.length;
+    memcpy(buff, name.str, name.length);
+    buff+= name.length;
+    int2store(buff, value.length | (quoted_value ? FRM_QUOTED_VALUE : 0));
+    buff+= 2;
+    memcpy(buff, (const uchar *) value.str, value.length);
+    buff+= value.length;
+  }
+  return buff;
+}
+
+/**
+  Writes image of the key and value to the frm image buffer
+
+  @param buff            pointer to the buffer to store the options in
+  @param opt             list of options;
+
+  @returns pointer to the end of the stored data in the buffer
+*/
+static uchar *option_list_frm_image(uchar *buff, engine_option_value *opt)
+{
+  for (; opt; opt= opt->next)
+    buff= opt->frm_image(buff);
+
+  *buff++= 0;
+  return buff;
+}
+
+
+/**
+  Writes options image in the .frm buffer
+
+  @param buff              pointer to the buffer
+  @param table_option_list list of table options
+  @param create_fields     field descriptors list
+  @param keys              number of keys
+  @param key_info          array of key descriptors
+
+  @returns pointer to byte after last recorded in the buffer
+*/
+
+uchar *engine_table_options_frm_image(uchar *buff,
+                                      engine_option_value *table_option_list,
+                                      List<Create_field> &create_fields,
+                                      uint keys, KEY *key_info)
+{
+  List_iterator<Create_field> it(create_fields);
+  Create_field *field;
+  KEY *key_info_end= key_info + keys;
+  DBUG_ENTER("engine_table_options_frm_image");
+
+  buff= option_list_frm_image(buff, table_option_list);
+
+  while ((field= it++))
+    buff= option_list_frm_image(buff, field->option_list);
+
+  while (key_info < key_info_end)
+    buff= option_list_frm_image(buff, (key_info++)->option_list);
+
+  DBUG_RETURN(buff);
+}
+
+/**
+  Reads name and value from buffer, then link it in the list
+
+  @param buff            the buffer to read from
+  @param start           The list beginning or NULL
+  @param end             The list last element or does not matter
+  @param root            MEM_ROOT for allocating
+
+  @returns pointer to byte after last recorded in the buffer
+*/
+uchar *engine_option_value::frm_read(const uchar *buff, engine_option_value **start,
+                                     engine_option_value **end, MEM_ROOT *root)
+{
+  LEX_STRING name, value;
+  uint len;
+
+  name.length= buff[0];
+  buff++;
+  if (!(name.str= strmake_root(root, (const char*)buff, name.length)))
+    return NULL;
+  buff+= name.length;
+  len= uint2korr(buff);
+  value.length= len & ~FRM_QUOTED_VALUE;
+  buff+= 2;
+  if (!(value.str= strmake_root(root, (const char*)buff, value.length)))
+    return NULL;
+  buff+= value.length;
+
+  engine_option_value *ptr=new (root)
+    engine_option_value(name, value, len & FRM_QUOTED_VALUE, start, end);
+  if (!ptr)
+    return NULL;
+
+  return (uchar *)buff;
+}
+
+
+/**
+  Reads options from this buffer
+
+  @param buff            the buffer to read from
+  @param length          buffer length
+  @param share           table descriptor
+  @param root            MEM_ROOT for allocating
+
+  @retval TRUE  Error
+  @retval FALSE OK
+*/
+
+my_bool engine_table_options_frm_read(const uchar *buff, uint length,
+                                      TABLE_SHARE *share)
+{
+  const uchar *buff_end= buff + length;
+  engine_option_value *end;
+  MEM_ROOT *root= &share->mem_root;
+  uint count;
+  DBUG_ENTER("engine_table_options_frm_read");
+
+  while (buff < buff_end && *buff)
+  {
+    if (!(buff= engine_option_value::frm_read(buff, &share->option_list, &end,
+                                              root)))
+      DBUG_RETURN(TRUE);
+  }
+  buff++;
+
+  for (count=0; count < share->fields; count++)
+  {
+    while (buff < buff_end && *buff)
+    {
+      if (!(buff= engine_option_value::frm_read(buff,
+                                                &share->field[count]->option_list,
+                                                &end, root)))
+        DBUG_RETURN(TRUE);
+    }
+    buff++;
+  }
+
+  for (count=0; count < share->keys; count++)
+  {
+    while (buff < buff_end && *buff)
+    {
+      if (!(buff= engine_option_value::frm_read(buff,
+                                                &share->key_info[count].option_list,
+                                                &end, root)))
+        DBUG_RETURN(TRUE);
+    }
+    buff++;
+  }
+
+  if (buff < buff_end)
+    sql_print_warning("Table '%s' was created in a later MariaDB version - "
+                      "unknown table attributes were ignored",
+                      share->table_name.str);
+
+  DBUG_RETURN(buff > buff_end);
+}
+
+/**
+  Merges two lists of engine_option_value's with duplicate removal.
+*/
+
+engine_option_value *merge_engine_table_options(engine_option_value *first,
+                                                engine_option_value *second,
+                                                MEM_ROOT *root)
+{
+  engine_option_value *end, *opt;
+  DBUG_ENTER("merge_engine_table_options");
+  LINT_INIT(end);
+
+  /* find last element */
+  if (first && second)
+    for (end= first; end->next; end= end->next) /* no-op */;
+
+  for (opt= second; opt; opt= opt->next)
+    new (root) engine_option_value(opt->name, opt->value, opt->quoted_value,
+                                   &first, &end);
+  DBUG_RETURN(first);
+}
diff --git a/sql/create_options.h b/sql/create_options.h
new file mode 100644
index 00000000000..b66bbf43570
--- /dev/null
+++ b/sql/create_options.h
@@ -0,0 +1,92 @@
+/* Copyright (C) 2010 Monty Program Ab
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+/**
+  @file
+
+  Engine defined options of tables/fields/keys in CREATE/ALTER TABLE.
+*/
+
+#ifndef SQL_CREATE_OPTIONS_INCLUDED
+#define SQL_CREATE_OPTIONS_INCLUDED
+
+#include "handler.h"
+
+class engine_option_value: public Sql_alloc
+{
+ public:
+  LEX_STRING name;
+  LEX_STRING value;
+  engine_option_value *next;    ///< parser puts them in a FIFO linked list
+  bool parsed;                  ///< to detect unrecognized options
+  bool quoted_value;            ///< option=VAL vs. option='VAL'
+
+  engine_option_value(LEX_STRING &name_arg, LEX_STRING &value_arg, bool quoted,
+                      engine_option_value **start, engine_option_value **end) :
+    name(name_arg), value(value_arg),
+    next(NULL), parsed(false), quoted_value(quoted)
+  {
+    link(start, end);
+  }
+  engine_option_value(LEX_STRING &name_arg,
+                      engine_option_value **start, engine_option_value **end) :
+    name(name_arg), value(null_lex_str),
+    next(NULL), parsed(false), quoted_value(false)
+  {
+    link(start, end);
+  }
+  engine_option_value(LEX_STRING &name_arg, ulonglong value_arg,
+                      engine_option_value **start, engine_option_value **end,
+                      MEM_ROOT *root) :
+    name(name_arg), next(NULL), parsed(false), quoted_value(false)
+  {
+    if ((value.str= (char *)alloc_root(root, 22)))
+    {
+      value.length= longlong10_to_str(value_arg, value.str, 10) - value.str;
+      link(start, end);
+    }
+  }
+  static uchar *frm_read(const uchar *buff, engine_option_value **start,
+                         engine_option_value **end, MEM_ROOT *root);
+  void link(engine_option_value **start, engine_option_value **end);
+  uint frm_length();
+  uchar *frm_image(uchar *buff);
+};
+
+typedef struct st_key KEY;
+class Create_field;
+
+my_bool parse_engine_table_options(THD *thd, handlerton *ht,
+                                   TABLE_SHARE *share);
+my_bool parse_option_list(THD* thd, void **option_struct,
+                          engine_option_value *option_list,
+                          ha_create_table_option *rules,
+                          my_bool suppress_warning,
+                          MEM_ROOT *root);
+my_bool engine_table_options_frm_read(const uchar *buff,
+                                      uint length,
+                                      TABLE_SHARE *share);
+engine_option_value *merge_engine_table_options(engine_option_value *source,
+                                                engine_option_value *changes,
+                                                MEM_ROOT *root);
+
+uint engine_table_options_frm_length(engine_option_value *table_option_list,
+                                     List<Create_field> &create_fields,
+                                     uint keys, KEY *key_info);
+uchar *engine_table_options_frm_image(uchar *buff,
+                                      engine_option_value *table_option_list,
+                                      List<Create_field> &create_fields,
+                                      uint keys, KEY *key_info);
+#endif
diff --git a/sql/event_data_objects.cc b/sql/event_data_objects.cc
index 52c509621ac..5a1844d730e 100644
--- a/sql/event_data_objects.cc
+++ b/sql/event_data_objects.cc
@@ -739,7 +739,6 @@ bool get_next_time(const Time_zone *time_zone, my_time_t *next,
      would give an error then.
     */
     DBUG_RETURN(1);
-    break;
   case INTERVAL_LAST:
     DBUG_ASSERT(0);
   }
@@ -1382,7 +1381,7 @@ Event_job_data::execute(THD *thd, bool drop)
 
   DBUG_ENTER("Event_job_data::execute");
 
-  mysql_reset_thd_for_next_command(thd);
+  mysql_reset_thd_for_next_command(thd, 0);
 
   /*
     MySQL parser currently assumes that current database is either
@@ -1468,8 +1467,7 @@ Event_job_data::execute(THD *thd, bool drop)
 
     DBUG_ASSERT(sphead);
 
-    if (thd->enable_slow_log)
-      sphead->m_flags|= sp_head::LOG_SLOW_STATEMENTS;
+    sphead->m_flags|= sp_head::LOG_SLOW_STATEMENTS;
     sphead->m_flags|= sp_head::LOG_GENERAL_LOG;
 
     sphead->set_info(0, 0, &thd->lex->sp_chistics, sql_mode);
diff --git a/sql/event_data_objects.h b/sql/event_data_objects.h
index 9d17213bcb8..a70d38f2a9d 100644
--- a/sql/event_data_objects.h
+++ b/sql/event_data_objects.h
@@ -95,9 +95,9 @@ public:
   my_time_t execute_at;
   my_time_t starts;
   my_time_t ends;
-  my_bool starts_null;
-  my_bool ends_null;
-  my_bool execute_at_null;
+  bool starts_null;
+  bool ends_null;
+  bool execute_at_null;
 
   longlong expression;
   interval_type interval;
diff --git a/sql/event_db_repository.cc b/sql/event_db_repository.cc
index db508e4ea41..961f7966a12 100644
--- a/sql/event_db_repository.cc
+++ b/sql/event_db_repository.cc
@@ -114,7 +114,8 @@ const TABLE_FIELD_TYPE event_table_fields[ET_FIELD_COUNT] =
   {
     { C_STRING_WITH_LEN("sql_mode") },
     { C_STRING_WITH_LEN("set('REAL_AS_FLOAT','PIPES_AS_CONCAT','ANSI_QUOTES',"
-    "'IGNORE_SPACE','NOT_USED','ONLY_FULL_GROUP_BY','NO_UNSIGNED_SUBTRACTION',"
+    "'IGNORE_SPACE','IGNORE_BAD_TABLE_OPTIONS','ONLY_FULL_GROUP_BY',"
+    "'NO_UNSIGNED_SUBTRACTION',"
     "'NO_DIR_IN_CREATE','POSTGRESQL','ORACLE','MSSQL','DB2','MAXDB',"
     "'NO_KEY_OPTIONS','NO_TABLE_OPTIONS','NO_FIELD_OPTIONS','MYSQL323','MYSQL40',"
     "'ANSI','NO_AUTO_VALUE_ON_ZERO','NO_BACKSLASH_ESCAPES','STRICT_TRANS_TABLES',"
@@ -431,17 +432,18 @@ Event_db_repository::index_read_for_db_for_i_s(THD *thd, TABLE *schema_table,
   }
 
   key_copy(key_buf, event_table->record[0], key_info, key_len);
-  if (!(ret= event_table->file->index_read_map(event_table->record[0], key_buf,
-                                               (key_part_map)1,
-                                               HA_READ_PREFIX)))
+  if (!(ret= event_table->file->ha_index_read_map(event_table->record[0],
+                                                  key_buf,
+                                                  (key_part_map)1,
+                                                  HA_READ_PREFIX)))
   {
     DBUG_PRINT("info",("Found rows. Let's retrieve them. ret=%d", ret));
     do
     {
       ret= copy_event_to_schema_table(thd, schema_table, event_table);
       if (ret == 0)
-        ret= event_table->file->index_next_same(event_table->record[0],
-                                                key_buf, key_len);
+        ret= event_table->file->ha_index_next_same(event_table->record[0],
+                                                   key_buf, key_len);
     } while (ret == 0);
   }
   DBUG_PRINT("info", ("Scan finished. ret=%d", ret));
@@ -481,7 +483,8 @@ Event_db_repository::table_scan_all_for_i_s(THD *thd, TABLE *schema_table,
   READ_RECORD read_record_info;
   DBUG_ENTER("Event_db_repository::table_scan_all_for_i_s");
 
-  init_read_record(&read_record_info, thd, event_table, NULL, 1, 0, FALSE);
+  if (init_read_record(&read_record_info, thd, event_table, NULL, 1, 0, FALSE))
+    DBUG_RETURN(TRUE);
 
   /*
     rr_sequential, in read_record(), returns 137==HA_ERR_END_OF_FILE,
@@ -616,7 +619,7 @@ Event_db_repository::open_event_table(THD *thd, enum thr_lock_type lock_type,
 
 bool
 Event_db_repository::create_event(THD *thd, Event_parse_data *parse_data,
-                                  my_bool create_if_not)
+                                  bool create_if_not)
 {
   int ret= 1;
   TABLE *table= NULL;
@@ -912,8 +915,9 @@ Event_db_repository::find_named_event(LEX_STRING db, LEX_STRING name,
 
   key_copy(key, table->record[0], table->key_info, table->key_info->key_length);
 
-  if (table->file->index_read_idx_map(table->record[0], 0, key, HA_WHOLE_KEY,
-                                      HA_READ_KEY_EXACT))
+  if (table->file->ha_index_read_idx_map(table->record[0], 0, key,
+                                         HA_WHOLE_KEY,
+                                         HA_READ_KEY_EXACT))
   {
     DBUG_PRINT("info", ("Row not found"));
     DBUG_RETURN(TRUE);
@@ -948,7 +952,9 @@ Event_db_repository::drop_schema_events(THD *thd, LEX_STRING schema)
     DBUG_VOID_RETURN;
 
   /* only enabled events are in memory, so we go now and delete the rest */
-  init_read_record(&read_record_info, thd, table, NULL, 1, 0, FALSE);
+  if (init_read_record(&read_record_info, thd, table, NULL, 1, 0, FALSE))
+    goto end;
+
   while (!ret && !(read_record_info.read_record(&read_record_info)) )
   {
     char *et_field= get_field(thd->mem_root, table->field[field]);
@@ -970,6 +976,8 @@ Event_db_repository::drop_schema_events(THD *thd, LEX_STRING schema)
     }
   }
   end_read_record(&read_record_info);
+
+end:
   close_thread_tables(thd);
   /*
     Make sure to only release the MDL lock on mysql.event, not other
diff --git a/sql/event_db_repository.h b/sql/event_db_repository.h
index ea7f3bbac0e..4f1a6a28e4a 100644
--- a/sql/event_db_repository.h
+++ b/sql/event_db_repository.h
@@ -73,7 +73,7 @@ public:
   Event_db_repository(){}
 
   bool
-  create_event(THD *thd, Event_parse_data *parse_data, my_bool create_if_not);
+  create_event(THD *thd, Event_parse_data *parse_data, bool create_if_not);
 
   bool
   update_event(THD *thd, Event_parse_data *parse_data, LEX_STRING *new_dbname,
diff --git a/sql/event_parse_data.h b/sql/event_parse_data.h
index 4ca46b40d3f..b8cf4954195 100644
--- a/sql/event_parse_data.h
+++ b/sql/event_parse_data.h
@@ -76,9 +76,9 @@ public:
   my_time_t starts;
   my_time_t ends;
   my_time_t execute_at;
-  my_bool starts_null;
-  my_bool ends_null;
-  my_bool execute_at_null;
+  bool starts_null;
+  bool ends_null;
+  bool execute_at_null;
 
   sp_name *identifier;
   Item* item_expression;
diff --git a/sql/event_queue.cc b/sql/event_queue.cc
index f0310c676d1..7e49c9888ad 100644
--- a/sql/event_queue.cc
+++ b/sql/event_queue.cc
@@ -136,9 +136,9 @@ Event_queue::init_queue(THD *thd)
 
   LOCK_QUEUE_DATA();
 
-  if (init_queue_ex(&queue, EVENT_QUEUE_INITIAL_SIZE , 0 /*offset*/,
-                    0 /*max_on_top*/, event_queue_element_compare_q,
-                    NULL, EVENT_QUEUE_EXTENT))
+  if (::init_queue(&queue, EVENT_QUEUE_INITIAL_SIZE , 0 /*offset*/,
+                   0 /*max_on_top*/, event_queue_element_compare_q,
+                   NullS, 0, EVENT_QUEUE_EXTENT))
   {
     sql_print_error("Event Scheduler: Can't initialize the execution queue");
     goto err;
@@ -325,11 +325,13 @@ void
 Event_queue::drop_matching_events(THD *thd, LEX_STRING pattern,
                            bool (*comparator)(LEX_STRING, Event_basic *))
 {
-  uint i= 0;
+  uint i;
   DBUG_ENTER("Event_queue::drop_matching_events");
   DBUG_PRINT("enter", ("pattern=%s", pattern.str));
 
-  while (i < queue.elements)
+  for (i= queue_first_element(&queue) ;
+       i <= queue_last_element(&queue) ;
+       )
   {
     Event_queue_element *et= (Event_queue_element *) queue_element(&queue, i);
     DBUG_PRINT("info", ("[%s.%s]?", et->dbname.str, et->name.str));
@@ -339,7 +341,8 @@ Event_queue::drop_matching_events(THD *thd, LEX_STRING pattern,
         The queue is ordered. If we remove an element, then all elements
         after it will shift one position to the left, if we imagine it as
         an array from left to the right. In this case we should not
-        increment the counter and the (i < queue.elements) condition is ok.
+        increment the counter and the (i <= queue_last_element() condition
+        is ok.
       */
       queue_remove(&queue, i);
       delete et;
@@ -403,7 +406,9 @@ Event_queue::find_n_remove_event(LEX_STRING db, LEX_STRING name)
   uint i;
   DBUG_ENTER("Event_queue::find_n_remove_event");
 
-  for (i= 0; i < queue.elements; ++i)
+  for (i= queue_first_element(&queue);
+       i <= queue_last_element(&queue);
+       i++)
   {
     Event_queue_element *et= (Event_queue_element *) queue_element(&queue, i);
     DBUG_PRINT("info", ("[%s.%s]==[%s.%s]?", db.str, name.str,
@@ -441,7 +446,9 @@ Event_queue::recalculate_activation_times(THD *thd)
 
   LOCK_QUEUE_DATA();
   DBUG_PRINT("info", ("%u loaded events to be recalculated", queue.elements));
-  for (i= 0; i < queue.elements; i++)
+  for (i= queue_first_element(&queue);
+       i <= queue_last_element(&queue);
+       i++)
   {
     ((Event_queue_element*)queue_element(&queue, i))->compute_next_execution_time();
     ((Event_queue_element*)queue_element(&queue, i))->update_timing_fields(thd);
@@ -454,16 +461,19 @@ Event_queue::recalculate_activation_times(THD *thd)
     have removed all. The queue has been ordered in a way the disabled
     events are at the end.
   */
-  for (i= queue.elements; i > 0; i--)
+  for (i= queue_last_element(&queue);
+       (int) i >= (int) queue_first_element(&queue);
+       i--)
   {
-    Event_queue_element *element = (Event_queue_element*)queue_element(&queue, i - 1);
+    Event_queue_element *element=
+      (Event_queue_element*)queue_element(&queue, i);
     if (element->status != Event_parse_data::DISABLED)
       break;
     /*
       This won't cause queue re-order, because we remove
       always the last element.
     */
-    queue_remove(&queue, i - 1);
+    queue_remove(&queue, i);
     delete element;
   }
   UNLOCK_QUEUE_DATA();
@@ -499,7 +509,9 @@ Event_queue::empty_queue()
   sql_print_information("Event Scheduler: Purging the queue. %u events",
                         queue.elements);
   /* empty the queue */
-  for (i= 0; i < queue.elements; ++i)
+  for (i= queue_first_element(&queue);
+       i <= queue_last_element(&queue);
+       i++)
   {
     Event_queue_element *et= (Event_queue_element *) queue_element(&queue, i);
     delete et;
@@ -525,7 +537,9 @@ Event_queue::dbug_dump_queue(time_t now)
   uint i;
   DBUG_ENTER("Event_queue::dbug_dump_queue");
   DBUG_PRINT("info", ("Dumping queue . Elements=%u", queue.elements));
-  for (i = 0; i < queue.elements; i++)
+  for (i= queue_first_element(&queue);
+       i <= queue_last_element(&queue);
+       i++)
   {
     et= ((Event_queue_element*)queue_element(&queue, i));
     DBUG_PRINT("info", ("et: 0x%lx  name: %s.%s", (long) et,
@@ -595,7 +609,7 @@ Event_queue::get_top_for_execution_if_time(THD *thd,
       continue;
     }
 
-    top= ((Event_queue_element*) queue_element(&queue, 0));
+    top= (Event_queue_element*) queue_top(&queue);
 
     thd->set_current_time(); /* Get current time */
 
@@ -641,10 +655,10 @@ Event_queue::get_top_for_execution_if_time(THD *thd,
                             top->dbname.str, top->name.str,
                             top->dropped? "Dropping.":"");
       delete top;
-      queue_remove(&queue, 0);
+      queue_remove_top(&queue);
     }
     else
-      queue_replaced(&queue);
+      queue_replace_top(&queue);
 
     dbug_dump_queue(thd->query_start());
     break;
diff --git a/sql/event_scheduler.cc b/sql/event_scheduler.cc
index aa4d376d86e..bf16ddcb05a 100644
--- a/sql/event_scheduler.cc
+++ b/sql/event_scheduler.cc
@@ -349,6 +349,14 @@ Event_scheduler::Event_scheduler(Event_queue *queue_arg)
   mysql_mutex_init(key_event_scheduler_LOCK_scheduler_state,
                    &LOCK_scheduler_state, MY_MUTEX_INIT_FAST);
   mysql_cond_init(key_event_scheduler_COND_state, &COND_state, NULL);
+
+#ifdef SAFE_MUTEX
+  /* Ensure right mutex order */
+  mysql_mutex_lock(&LOCK_scheduler_state);
+  mysql_mutex_lock(&LOCK_global_system_variables);
+  mysql_mutex_unlock(&LOCK_global_system_variables);
+  mysql_mutex_unlock(&LOCK_scheduler_state);
+#endif
 }
 
 
diff --git a/sql/events.cc b/sql/events.cc
index 10a7535425f..9361242c570 100644
--- a/sql/events.cc
+++ b/sql/events.cc
@@ -233,7 +233,6 @@ common_1_lev_code:
   case INTERVAL_MICROSECOND:
     my_error(ER_NOT_SUPPORTED_YET, MYF(0), "MICROSECOND");
     return 1;
-    break;
   case INTERVAL_QUARTER:
     expr/= 3;
     close_quote= FALSE;
@@ -791,7 +790,7 @@ Events::fill_schema_events(THD *thd, TABLE_LIST *tables, COND * /* cond */)
 */
 
 bool
-Events::init(my_bool opt_noacl_or_bootstrap)
+Events::init(bool opt_noacl_or_bootstrap)
 {
 
   THD *thd;
@@ -1091,7 +1090,12 @@ Events::load_events_from_db(THD *thd)
     DBUG_RETURN(TRUE);
   }
 
-  init_read_record(&read_record_info, thd, table, NULL, 0, 1, FALSE);
+  if (init_read_record(&read_record_info, thd, table, NULL, 0, 1, FALSE))
+  {
+    close_thread_tables(thd);
+    DBUG_RETURN(TRUE);
+  }
+
   while (!(read_record_info.read_record(&read_record_info)))
   {
     Event_queue_element *et;
@@ -1142,8 +1146,9 @@ Events::load_events_from_db(THD *thd)
       }
     }
   }
-  sql_print_information("Event Scheduler: Loaded %d event%s",
-                        count, (count == 1) ? "" : "s");
+  if (global_system_variables.log_warnings)
+    sql_print_information("Event Scheduler: Loaded %d event%s",
+                          count, (count == 1) ? "" : "s");
   ret= FALSE;
 
 end:
diff --git a/sql/events.h b/sql/events.h
index 1482e736d29..39e9510c15b 100644
--- a/sql/events.h
+++ b/sql/events.h
@@ -103,7 +103,7 @@ public:
   get_db_repository() { return db_repository; }
 
   static bool
-  init(my_bool opt_noacl);
+  init(bool opt_noacl);
 
   static void
   deinit();
diff --git a/sql/field.cc b/sql/field.cc
index be7441f6bfd..26f3e0452c8 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -63,7 +63,7 @@ const char field_separator=',';
 ((ulong) ((LL(1) << min(arg, 4) * 8) - LL(1)))
 
 #define ASSERT_COLUMN_MARKED_FOR_READ DBUG_ASSERT(!table || (!table->read_set || bitmap_is_set(table->read_set, field_index)))
-#define ASSERT_COLUMN_MARKED_FOR_WRITE DBUG_ASSERT(!table || (!table->write_set || bitmap_is_set(table->write_set, field_index)))
+#define ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED DBUG_ASSERT(!table || (!table->write_set || bitmap_is_set(table->write_set, field_index) || bitmap_is_set(table->vcol_set, field_index)))
 
 #define FLAGSTR(S,F) ((S) & (F) ? #F " " : "")
 
@@ -1220,7 +1220,8 @@ bool Field_num::get_int(CHARSET_INFO *cs, const char *from, uint len,
   if (unsigned_flag)
   {
 
-    if ((((ulonglong) *rnd > unsigned_max) && (*rnd= (longlong) unsigned_max)) ||
+    if ((((ulonglong) *rnd > unsigned_max) &&
+         (*rnd= (longlong) unsigned_max)) ||
         error == MY_ERRNO_ERANGE)
     {
       goto out_of_range;
@@ -1283,7 +1284,7 @@ int Field::warn_if_overflow(int op_result)
   This is used for printing bit_fields as numbers while debugging.
 */
 
-String *Field::val_int_as_str(String *val_buffer, my_bool unsigned_val)
+String *Field::val_int_as_str(String *val_buffer, bool unsigned_val)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
   CHARSET_INFO *cs= &my_charset_bin;
@@ -1305,13 +1306,13 @@ String *Field::val_int_as_str(String *val_buffer, my_bool unsigned_val)
 Field::Field(uchar *ptr_arg,uint32 length_arg,uchar *null_ptr_arg,
 	     uchar null_bit_arg,
 	     utype unireg_check_arg, const char *field_name_arg)
-  :ptr(ptr_arg), null_ptr(null_ptr_arg),
-   table(0), orig_table(0), table_name(0),
-   field_name(field_name_arg),
-   key_start(0), part_of_key(0), part_of_key_not_clustered(0),
-   part_of_sortkey(0), unireg_check(unireg_check_arg),
-   field_length(length_arg), null_bit(null_bit_arg), 
-   is_created_from_null_item(FALSE)
+  :ptr(ptr_arg), null_ptr(null_ptr_arg), table(0), orig_table(0),
+  table_name(0), field_name(field_name_arg), option_list(0),
+  option_struct(0), key_start(0), part_of_key(0),
+  part_of_key_not_clustered(0), part_of_sortkey(0),
+  unireg_check(unireg_check_arg), field_length(length_arg),
+  null_bit(null_bit_arg), is_created_from_null_item(FALSE), vcol_info(0),
+  stored_in_db(TRUE)
 {
   flags=null_ptr ? 0: NOT_NULL_FLAG;
   comment.str= (char*) "";
@@ -1641,7 +1642,7 @@ longlong Field::convert_decimal2longlong(const my_decimal *val,
 
 int Field_num::store_decimal(const my_decimal *val)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int err= 0;
   longlong i= convert_decimal2longlong(val, unsigned_flag, &err);
   return test(err | store(i, unsigned_flag));
@@ -1713,7 +1714,7 @@ void Field_num::make_field(Send_field *field)
 
 int Field_str::store_decimal(const my_decimal *d)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   double val;
   /* TODO: use decimal2string? */
   int err= warn_if_overflow(my_decimal2double(E_DEC_FATAL_ERROR &
@@ -1734,8 +1735,8 @@ my_decimal *Field_str::val_decimal(my_decimal *decimal_value)
 uint Field::fill_cache_field(CACHE_FIELD *copy)
 {
   uint store_length;
-  copy->str=ptr;
-  copy->length=pack_length();
+  copy->str= ptr;
+  copy->length= pack_length();
   copy->field= this;
   if (flags & BLOB_FLAG)
   {
@@ -1747,9 +1748,15 @@ uint Field::fill_cache_field(CACHE_FIELD *copy)
            (type() == MYSQL_TYPE_STRING && copy->length >= 4 &&
             copy->length < 256))
   {
-    copy->type= CACHE_STRIPPED;
+    copy->type= CACHE_STRIPPED;			    /* Remove end space */
     store_length= 2;
   }
+  else if (type() ==  MYSQL_TYPE_VARCHAR)
+  {
+    copy->type= pack_length()-row_pack_length() == 1 ? CACHE_VARSTR1:
+                                                      CACHE_VARSTR2;
+    store_length= 0;
+  }
   else
   {
     copy->type= 0;
@@ -1789,7 +1796,7 @@ bool Field::get_time(MYSQL_TIME *ltime)
 
 int Field::store_time(MYSQL_TIME *ltime, timestamp_type type_arg)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   char buff[MAX_DATE_STRING_REP_LENGTH];
   uint length= (uint) my_TIME_to_str(ltime, buff);
   /* Avoid conversion when field character set is ASCII compatible */
@@ -1918,7 +1925,7 @@ void Field_decimal::overflow(bool negative)
 
 int Field_decimal::store(const char *from_arg, uint len, CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   char buff[STRING_BUFFER_USUAL_SIZE];
   String tmp(buff,sizeof(buff), &my_charset_bin);
   const uchar *from= (uchar*) from_arg;
@@ -2284,7 +2291,7 @@ int Field_decimal::store(const char *from_arg, uint len, CHARSET_INFO *cs)
 
 int Field_decimal::store(double nr)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   if (unsigned_flag && nr < 0)
   {
     overflow(1);
@@ -2323,7 +2330,7 @@ int Field_decimal::store(double nr)
 
 int Field_decimal::store(longlong nr, bool unsigned_val)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   char buff[22];
   uint length, int_part;
   char fyllchar;
@@ -2603,7 +2610,7 @@ void Field_new_decimal::set_value_on_overflow(my_decimal *decimal_value,
 
 bool Field_new_decimal::store_value(const my_decimal *decimal_value)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= 0;
   DBUG_ENTER("Field_new_decimal::store_value");
 #ifndef DBUG_OFF
@@ -2648,7 +2655,7 @@ bool Field_new_decimal::store_value(const my_decimal *decimal_value)
 int Field_new_decimal::store(const char *from, uint length,
                              CHARSET_INFO *charset_arg)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int err;
   my_decimal decimal_value;
   DBUG_ENTER("Field_new_decimal::store(char*)");
@@ -2709,7 +2716,7 @@ int Field_new_decimal::store(const char *from, uint length,
 
 int Field_new_decimal::store(double nr)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   my_decimal decimal_value;
   int err;
   DBUG_ENTER("Field_new_decimal::store(double)");
@@ -2733,7 +2740,7 @@ int Field_new_decimal::store(double nr)
 
 int Field_new_decimal::store(longlong nr, bool unsigned_val)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   my_decimal decimal_value;
   int err;
 
@@ -2755,7 +2762,7 @@ int Field_new_decimal::store(longlong nr, bool unsigned_val)
 
 int Field_new_decimal::store_decimal(const my_decimal *decimal_value)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   return store_value(decimal_value);
 }
 
@@ -2960,7 +2967,7 @@ Field_new_decimal::unpack(uchar* to,
 
 int Field_tiny::store(const char *from,uint len,CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error;
   longlong rnd;
   
@@ -2972,7 +2979,7 @@ int Field_tiny::store(const char *from,uint len,CHARSET_INFO *cs)
 
 int Field_tiny::store(double nr)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= 0;
   nr=rint(nr);
   if (unsigned_flag)
@@ -2985,18 +2992,18 @@ int Field_tiny::store(double nr)
     }
     else if (nr > 255.0)
     {
-      *ptr=(char) 255;
+      *ptr= (uchar) 255;
       set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1);
       error= 1;
     }
     else
-      *ptr=(char) nr;
+      *ptr= (uchar) nr;
   }
   else
   {
     if (nr < -128.0)
     {
-      *ptr= (char) -128;
+      *ptr= (uchar) -128;
       set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1);
       error= 1;
     }
@@ -3007,7 +3014,7 @@ int Field_tiny::store(double nr)
       error= 1;
     }
     else
-      *ptr=(char) (int) nr;
+      *ptr=(uchar) (int) nr;
   }
   return error;
 }
@@ -3015,7 +3022,7 @@ int Field_tiny::store(double nr)
 
 int Field_tiny::store(longlong nr, bool unsigned_val)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= 0;
 
   if (unsigned_flag)
@@ -3136,7 +3143,7 @@ void Field_tiny::sql_type(String &res) const
 
 int Field_short::store(const char *from,uint len,CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int store_tmp;
   int error;
   longlong rnd;
@@ -3157,7 +3164,7 @@ int Field_short::store(const char *from,uint len,CHARSET_INFO *cs)
 
 int Field_short::store(double nr)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= 0;
   int16 res;
   nr=rint(nr);
@@ -3209,7 +3216,7 @@ int Field_short::store(double nr)
 
 int Field_short::store(longlong nr, bool unsigned_val)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= 0;
   int16 res;
 
@@ -3384,7 +3391,7 @@ void Field_short::sql_type(String &res) const
 
 int Field_medium::store(const char *from,uint len,CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int store_tmp;
   int error;
   longlong rnd;
@@ -3398,7 +3405,7 @@ int Field_medium::store(const char *from,uint len,CHARSET_INFO *cs)
 
 int Field_medium::store(double nr)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= 0;
   nr=rint(nr);
   if (unsigned_flag)
@@ -3444,7 +3451,7 @@ int Field_medium::store(double nr)
 
 int Field_medium::store(longlong nr, bool unsigned_val)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= 0;
 
   if (unsigned_flag)
@@ -3575,7 +3582,7 @@ void Field_medium::sql_type(String &res) const
 
 int Field_long::store(const char *from,uint len,CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   long store_tmp;
   int error;
   longlong rnd;
@@ -3596,7 +3603,7 @@ int Field_long::store(const char *from,uint len,CHARSET_INFO *cs)
 
 int Field_long::store(double nr)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= 0;
   int32 res;
   nr=rint(nr);
@@ -3648,7 +3655,7 @@ int Field_long::store(double nr)
 
 int Field_long::store(longlong nr, bool unsigned_val)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= 0;
   int32 res;
 
@@ -3823,7 +3830,7 @@ void Field_long::sql_type(String &res) const
 
 int Field_longlong::store(const char *from,uint len,CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= 0;
   char *end;
   ulonglong tmp;
@@ -3853,7 +3860,7 @@ int Field_longlong::store(const char *from,uint len,CHARSET_INFO *cs)
 
 int Field_longlong::store(double nr)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= 0;
   longlong res;
 
@@ -3905,7 +3912,7 @@ int Field_longlong::store(double nr)
 
 int Field_longlong::store(longlong nr, bool unsigned_val)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= 0;
 
   if (nr < 0)                                   // Only possible error
@@ -4119,7 +4126,7 @@ int Field_float::store(const char *from,uint len,CHARSET_INFO *cs)
   char *end;
   double nr= my_strntod(cs,(char*) from,len,&end,&error);
   if (error || (!len || ((uint) (end-from) != len &&
-                table->in_use->count_cuted_fields)))
+                         table->in_use->count_cuted_fields)))
   {
     set_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
                 (error ? ER_WARN_DATA_OUT_OF_RANGE : WARN_DATA_TRUNCATED), 1);
@@ -4132,7 +4139,7 @@ int Field_float::store(const char *from,uint len,CHARSET_INFO *cs)
 
 int Field_float::store(double nr)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= truncate(&nr, FLT_MAX);
   float j= (float)nr;
 
@@ -4341,7 +4348,7 @@ int Field_double::store(const char *from,uint len,CHARSET_INFO *cs)
   char *end;
   double nr= my_strntod(cs,(char*) from, len, &end, &error);
   if (error || (!len || ((uint) (end-from) != len &&
-                table->in_use->count_cuted_fields)))
+                         table->in_use->count_cuted_fields)))
   {
     set_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
                 (error ? ER_WARN_DATA_OUT_OF_RANGE : WARN_DATA_TRUNCATED), 1);
@@ -4354,7 +4361,7 @@ int Field_double::store(const char *from,uint len,CHARSET_INFO *cs)
 
 int Field_double::store(double nr)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= truncate(&nr, DBL_MAX);
 
 #ifdef WORDS_BIGENDIAN
@@ -4738,7 +4745,7 @@ timestamp_auto_set_type Field_timestamp::get_auto_set_type() const
 
 int Field_timestamp::store(const char *from,uint len,CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   MYSQL_TIME l_time;
   my_time_t tmp= 0;
   int error;
@@ -4801,7 +4808,7 @@ int Field_timestamp::store(double nr)
 
 int Field_timestamp::store(longlong nr, bool unsigned_val)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   MYSQL_TIME l_time;
   my_time_t timestamp= 0;
   int error;
@@ -5101,7 +5108,7 @@ int Field_time::store_time(MYSQL_TIME *ltime, timestamp_type time_type)
 
 int Field_time::store(double nr)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   long tmp;
   int error= 0;
   if (nr > (double)TIME_MAX_VALUE)
@@ -5139,7 +5146,7 @@ int Field_time::store(double nr)
 
 int Field_time::store(longlong nr, bool unsigned_val)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   long tmp;
   int error= 0;
   if (nr < (longlong) -TIME_MAX_VALUE && !unsigned_val)
@@ -5298,12 +5305,12 @@ void Field_time::sql_type(String &res) const
 
 int Field_year::store(const char *from, uint len,CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   char *end;
   int error;
   longlong nr= cs->cset->strntoull10rnd(cs, from, len, 0, &end, &error);
 
-  if (nr < 0 || (nr >= 100 && nr <= 1900) || nr > 2155 ||
+  if (nr < 0 || (nr >= 100 && nr <= 1900) || nr > 2155 || 
       error == MY_ERRNO_ERANGE)
   {
     *ptr=0;
@@ -5335,7 +5342,7 @@ int Field_year::store(const char *from, uint len,CHARSET_INFO *cs)
 
 int Field_year::store(double nr)
 {
-  if (nr < 0.0 || nr >= 2155.0)
+  if (nr < 0.0 || nr > 2155.0)
   {
     (void) Field_year::store((longlong) -1, FALSE);
     return 1;
@@ -5346,7 +5353,7 @@ int Field_year::store(double nr)
 
 int Field_year::store(longlong nr, bool unsigned_val)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   if (nr < 0 || (nr >= 100 && nr <= 1900) || nr > 2155)
   {
     *ptr= 0;
@@ -5420,7 +5427,7 @@ void Field_year::sql_type(String &res) const
 
 int Field_date::store(const char *from, uint len,CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   MYSQL_TIME l_time;
   uint32 tmp;
   int error;
@@ -5475,7 +5482,7 @@ int Field_date::store(double nr)
 
 int Field_date::store(longlong nr, bool unsigned_val)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   MYSQL_TIME not_used;
   int error;
   longlong initial_nr= nr;
@@ -5655,7 +5662,7 @@ void Field_date::sql_type(String &res) const
 
 int Field_newdate::store(const char *from,uint len,CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   long tmp;
   MYSQL_TIME l_time;
   int error;
@@ -5705,7 +5712,7 @@ int Field_newdate::store(double nr)
 
 int Field_newdate::store(longlong nr, bool unsigned_val)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   MYSQL_TIME l_time;
   longlong tmp;
   int error;
@@ -5741,7 +5748,7 @@ int Field_newdate::store(longlong nr, bool unsigned_val)
 
 int Field_newdate::store_time(MYSQL_TIME *ltime,timestamp_type time_type)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   long tmp;
   int error= 0;
   if (time_type == MYSQL_TIMESTAMP_DATE ||
@@ -5889,7 +5896,7 @@ void Field_newdate::sql_type(String &res) const
 
 int Field_datetime::store(const char *from,uint len,CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   MYSQL_TIME time_tmp;
   int error;
   ulonglong tmp= 0;
@@ -5942,7 +5949,7 @@ int Field_datetime::store(double nr)
 
 int Field_datetime::store(longlong nr, bool unsigned_val)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   MYSQL_TIME not_used;
   int error;
   longlong initial_nr= nr;
@@ -5980,7 +5987,7 @@ int Field_datetime::store(longlong nr, bool unsigned_val)
 
 int Field_datetime::store_time(MYSQL_TIME *ltime,timestamp_type time_type)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   longlong tmp;
   int error= 0;
   /*
@@ -6283,7 +6290,7 @@ Field_longstr::report_if_important_data(const char *pstr, const char *end,
 
 int Field_string::store(const char *from,uint length,CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   uint copy_length;
   const char *well_formed_error_pos;
   const char *cannot_convert_error_pos;
@@ -6324,7 +6331,7 @@ int Field_string::store(const char *from,uint length,CHARSET_INFO *cs)
 
 int Field_str::store(double nr)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   char buff[DOUBLE_TO_STRING_CONVERSION_BUFFER_SIZE];
   uint local_char_length= field_length / charset()->mbmaxlen;
   size_t length;
@@ -6542,8 +6549,7 @@ void Field_string::sql_type(String &res) const
 
   length= cs->cset->snprintf(cs,(char*) res.ptr(),
                              res.alloced_length(), "%s(%d)",
-                             ((type() == MYSQL_TYPE_VAR_STRING &&
-                               !thd->variables.new_mode) ?
+                             (type() == MYSQL_TYPE_VAR_STRING ?
                               (has_charset() ? "varchar" : "varbinary") :
 			      (has_charset() ? "char" : "binary")),
                              (int) field_length / charset()->mbmaxlen);
@@ -6784,7 +6790,7 @@ int Field_varstring::do_save_field_metadata(uchar *metadata_ptr)
 
 int Field_varstring::store(const char *from,uint length,CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   uint copy_length;
   const char *well_formed_error_pos;
   const char *cannot_convert_error_pos;
@@ -7344,7 +7350,7 @@ void Field_blob::put_length(uchar *pos, uint32 length)
 
 int Field_blob::store(const char *from,uint length,CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   uint copy_length, new_length;
   const char *well_formed_error_pos;
   const char *cannot_convert_error_pos;
@@ -7974,7 +7980,7 @@ void Field_enum::store_type(ulonglong value)
 
 int Field_enum::store(const char *from,uint length,CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int err= 0;
   uint32 not_used;
   char buff[STRING_BUFFER_USUAL_SIZE];
@@ -8023,7 +8029,7 @@ int Field_enum::store(double nr)
 
 int Field_enum::store(longlong nr, bool unsigned_val)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= 0;
   if ((ulonglong) nr > typelib->count || nr == 0)
   {
@@ -8192,7 +8198,7 @@ Field *Field_enum::new_field(MEM_ROOT *root, TABLE *new_table,
 
 int Field_set::store(const char *from,uint length,CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   bool got_warning= 0;
   int err= 0;
   char *not_used;
@@ -8232,7 +8238,7 @@ int Field_set::store(const char *from,uint length,CHARSET_INFO *cs)
 
 int Field_set::store(longlong nr, bool unsigned_val)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int error= 0;
   ulonglong max_nr;
 
@@ -8555,10 +8561,11 @@ uint Field_bit::is_equal(Create_field *new_field)
                        
 int Field_bit::store(const char *from, uint length, CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int delta;
 
-  for (; length && !*from; from++, length--) ;         // skip left 0's
+  for (; length && !*from; from++, length--)          // skip left 0's
+    ;
   delta= bytes_in_rec - length;
 
   if (delta < -1 ||
@@ -8980,11 +8987,12 @@ Field_bit_as_char::Field_bit_as_char(uchar *ptr_arg, uint32 len_arg,
 
 int Field_bit_as_char::store(const char *from, uint length, CHARSET_INFO *cs)
 {
-  ASSERT_COLUMN_MARKED_FOR_WRITE;
+  ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
   int delta;
   uchar bits= (uchar) (field_length & 7);
 
-  for (; length && !*from; from++, length--) ;         // skip left 0's
+  for (; length && !*from; from++, length--)          // skip left 0's
+    ;
   delta= bytes_in_rec - length;
 
   if (delta < 0 ||
@@ -9172,6 +9180,9 @@ void Create_field::init_for_tmp_table(enum_field_types sql_type_arg,
                        FLAGSTR(pack_flag, FIELDFLAG_BLOB),
                        FLAGSTR(pack_flag, FIELDFLAG_DECIMAL),
                        f_packtype(pack_flag)));
+  vcol_info= 0;
+  stored_in_db= TRUE;
+
   DBUG_VOID_RETURN;
 }
 
@@ -9192,6 +9203,7 @@ void Create_field::init_for_tmp_table(enum_field_types sql_type_arg,
   @param fld_interval_list     Interval list (if any)
   @param fld_charset           Field charset
   @param fld_geom_type         Field geometry type (if any)
+  @param fld_vcol_info         Virtual column data
 
   @retval
     FALSE on success
@@ -9204,17 +9216,20 @@ bool Create_field::init(THD *thd, char *fld_name, enum_field_types fld_type,
                         uint fld_type_modifier, Item *fld_default_value,
                         Item *fld_on_update_value, LEX_STRING *fld_comment,
                         char *fld_change, List<String> *fld_interval_list,
-                        CHARSET_INFO *fld_charset, uint fld_geom_type)
+                        CHARSET_INFO *fld_charset, uint fld_geom_type,
+			Virtual_column_info *fld_vcol_info,
+                        engine_option_value *create_opt)
 {
   uint sign_len, allowed_type_modifier= 0;
   ulong max_field_charlength= MAX_FIELD_CHARLENGTH;
 
   DBUG_ENTER("Create_field::init()");
-  
+
   field= 0;
   field_name= fld_name;
   def= fld_default_value;
   flags= fld_type_modifier;
+  option_list= create_opt;
   unireg_check= (fld_type_modifier & AUTO_INCREMENT_FLAG ?
                  Field::NEXT_NUMBER : Field::NONE);
   decimals= fld_decimals ? (uint)atoi(fld_decimals) : 0;
@@ -9235,6 +9250,33 @@ bool Create_field::init(THD *thd, char *fld_name, enum_field_types fld_type,
   interval_list.empty();
 
   comment= *fld_comment;
+  vcol_info= fld_vcol_info;
+  stored_in_db= TRUE;
+
+  /* Initialize data for a computed field */
+  if ((uchar)fld_type == (uchar)MYSQL_TYPE_VIRTUAL)
+  {
+    DBUG_ASSERT(vcol_info && vcol_info->expr_item);
+    stored_in_db= vcol_info->is_stored();
+    /*
+      Walk through the Item tree checking if all items are valid
+      to be part of the virtual column
+    */
+    if (vcol_info->expr_item->walk(&Item::check_vcol_func_processor, 0, NULL))
+    {
+      my_error(ER_VIRTUAL_COLUMN_FUNCTION_IS_NOT_ALLOWED, MYF(0), field_name);
+      DBUG_RETURN(TRUE);
+    }
+
+    /*
+      Make a field created for the real type.
+      Note that regular and computed fields differ from each other only by
+      Field::vcol_info. It is is always NULL for a column that is not
+      computed.
+    */
+    sql_type= fld_type= vcol_info->get_real_type();
+  }
+
   /*
     Set NO_DEFAULT_VALUE_FLAG if this field doesn't have a default value and
     it is NOT NULL, not an AUTO_INCREMENT field and not a TIMESTAMP.
@@ -9466,7 +9508,7 @@ bool Create_field::init(THD *thd, char *fld_name, enum_field_types fld_type,
     }
     break;
   case MYSQL_TYPE_DATE:
-    /* Old date type. */
+    /* We don't support creation of MYSQL_TYPE_DATE anymore */
     sql_type= MYSQL_TYPE_NEWDATE;
     /* fall trough */
   case MYSQL_TYPE_NEWDATE:
@@ -9524,7 +9566,7 @@ bool Create_field::init(THD *thd, char *fld_name, enum_field_types fld_type,
     }
   case MYSQL_TYPE_DECIMAL:
     DBUG_ASSERT(0); /* Was obsolete */
-  }
+ }
   /* Remember the value of length */
   char_length= length;
 
@@ -9634,7 +9676,6 @@ uint pack_length_to_packflag(uint type)
   return 0;					// This shouldn't happen
 }
 
-
 Field *make_field(TABLE_SHARE *share, uchar *ptr, uint32 field_length,
 		  uchar *null_pos, uchar null_bit,
 		  uint pack_flag,
@@ -9834,6 +9875,10 @@ Create_field::Create_field(Field *old_field,Field *orig_field)
   charset=    old_field->charset();		// May be NULL ptr
   comment=    old_field->comment;
   decimals=   old_field->decimals();
+  vcol_info=  old_field->vcol_info;
+  stored_in_db= old_field->stored_in_db;
+  option_list= old_field->option_list;
+  option_struct= old_field->option_struct;
 
   /* Fix if the original table had 4 byte pointer blobs */
   if (flags & BLOB_FLAG)
@@ -9906,7 +9951,6 @@ Create_field::Create_field(Field *old_field,Field *orig_field)
   }
 }
 
-
 /**
   maximum possible character length for blob.
   
@@ -9941,6 +9985,19 @@ uint32 Field_blob::char_length()
 
 
 /**
+  Makes a clone of this object for ALTER/CREATE TABLE
+
+  @param mem_root        MEM_ROOT where to clone the field
+*/
+
+Create_field *Create_field::clone(MEM_ROOT *mem_root) const
+{
+  Create_field *res= new (mem_root) Create_field(*this);
+  return res;
+}
+
+
+/**
   maximum possible display length for blob.
 
   @return
@@ -10096,7 +10153,7 @@ Field::set_datetime_warning(MYSQL_ERROR::enum_warning_level level, uint code,
   {
     /* DBL_DIG is enough to print '-[digits].E+###' */
     char str_nr[DBL_DIG + 8];
-    uint str_len= sprintf(str_nr, "%g", nr);
+    uint str_len= my_sprintf(str_nr, (str_nr, "%g", nr));
     make_truncated_value_warning(thd, level, str_nr, str_len, ts_type,
                                  field_name);
   }
diff --git a/sql/field.h b/sql/field.h
index 7b250c34fe4..a7718df604a 100644
--- a/sql/field.h
+++ b/sql/field.h
@@ -79,6 +79,71 @@ inline uint get_set_pack_length(int elements)
   return len > 4 ? 8 : len;
 }
 
+/*
+  Virtual_column_info is the class to contain additional
+  characteristics that is specific for a virtual/computed
+  field such as:
+   - the defining expression that is evaluated to compute the value
+  of the field 
+  - whether the field is to be stored in the database
+  - whether the field is used in a partitioning expression
+*/
+
+class Virtual_column_info: public Sql_alloc
+{
+private:
+  /*
+    The following data is only updated by the parser and read
+    when a Create_field object is created/initialized.
+  */
+  enum_field_types field_type;   /* Real field type*/
+  /* Flag indicating  that the field is physically stored in the database */
+  bool stored_in_db;
+  /* Flag indicating that the field used in a partitioning expression */
+  bool in_partitioning_expr;
+
+public:
+  /* The expression to compute the value of the virtual column */
+  Item *expr_item;
+  /* Text representation of the defining expression */
+  LEX_STRING expr_str;
+
+  Virtual_column_info()
+  : field_type((enum enum_field_types)MYSQL_TYPE_VIRTUAL),
+    stored_in_db(FALSE), in_partitioning_expr(FALSE), 
+    expr_item(NULL)
+  {
+    expr_str.str= NULL;
+    expr_str.length= 0;
+  };
+  ~Virtual_column_info() {}
+  enum_field_types get_real_type()
+  {
+    return field_type;
+  }
+  void set_field_type(enum_field_types fld_type)
+  {
+    /* Calling this function can only be done once. */
+    field_type= fld_type;
+  }
+  bool is_stored()
+  {
+    return stored_in_db;
+  }
+  void set_stored_in_db_flag(bool stored)
+  {
+    stored_in_db= stored;
+  }
+  bool is_in_partitioning_expr()
+  {
+    return in_partitioning_expr;
+  }
+  void mark_as_in_partitioning_expr()
+  {
+    in_partitioning_expr= TRUE;
+  }
+};
+
 class Field
 {
   Field(const Item &);				/* Prevent use of these */
@@ -95,20 +160,23 @@ public:
   */
   uchar		*null_ptr;
   /*
-    Note that you can use table->in_use as replacement for current_thd member 
+    Note that you can use table->in_use as replacement for current_thd member
     only inside of val_*() and store() members (e.g. you can't use it in cons)
   */
   TABLE *table;                                 // Pointer for table
   TABLE *orig_table;                            // Pointer to original table
   const char	**table_name, *field_name;
+  /** reference to the list of options or NULL */
+  engine_option_value *option_list;
+  void *option_struct;                  /* structure with parsed options */
   LEX_STRING	comment;
   /* Field is part of the following keys */
   key_map	key_start, part_of_key, part_of_key_not_clustered;
   key_map       part_of_sortkey;
-  /* 
-    We use three additional unireg types for TIMESTAMP to overcome limitation 
-    of current binary format of .frm file. We'd like to be able to support 
-    NOW() as default and on update value for such fields but unable to hold 
+  /*
+    We use three additional unireg types for TIMESTAMP to overcome limitation
+    of current binary format of .frm file. We'd like to be able to support
+    NOW() as default and on update value for such fields but unable to hold
     this info anywhere except unireg_check field. This issue will be resolved
     in more clean way with transition to new text based .frm format.
     See also comment for Field_timestamp::Field_timestamp().
@@ -141,6 +209,19 @@ public:
    */
   bool is_created_from_null_item;
 
+  /* 
+    This is additional data provided for any computed(virtual) field.
+    In particular it includes a pointer to the item by  which this field
+    can be computed from other fields.
+  */
+  Virtual_column_info *vcol_info;
+  /*
+    Flag indicating that the field is physically stored in tables
+    rather than just computed from other fields.
+    As of now, FALSE can be set only for computed virtual columns.
+  */
+  bool stored_in_db;
+
   Field(uchar *ptr_arg,uint32 length_arg,uchar *null_ptr_arg,
         uchar null_bit_arg, utype unireg_check_arg,
         const char *field_name_arg);
@@ -170,7 +251,7 @@ public:
      This trickery is used to decrease a number of malloc calls.
   */
   virtual String *val_str(String*,String *)=0;
-  String *val_int_as_str(String *val_buffer, my_bool unsigned_flag);
+  String *val_int_as_str(String *val_buffer, bool unsigned_flag);
   /*
    str_needs_quotes() returns TRUE if the value returned by val_str() needs
    to be quoted when used in constructing an SQL query.
@@ -279,11 +360,11 @@ public:
     return test(record[(uint) (null_ptr -table->record[0])] &
 		null_bit);
   }
-  inline bool is_null_in_record_with_offset(my_ptrdiff_t offset)
+  inline bool is_null_in_record_with_offset(my_ptrdiff_t col_offset)
   {
     if (!null_ptr)
       return 0;
-    return test(null_ptr[offset] & null_bit);
+    return test(null_ptr[col_offset] & null_bit);
   }
   inline void set_null(my_ptrdiff_t row_offset= 0)
     { if (null_ptr) null_ptr[row_offset]|= null_bit; }
@@ -382,7 +463,7 @@ public:
       Number of copied bytes (excluding padded zero bytes -- see above).
   */
 
-  virtual uint get_key_image(uchar *buff, uint length, imagetype type)
+  virtual uint get_key_image(uchar *buff, uint length, imagetype type_arg)
   {
     get_image(buff, length, &my_charset_bin);
     return length;
@@ -724,7 +805,7 @@ public:
 /* base class for float and double and decimal (old one) */
 class Field_real :public Field_num {
 public:
-  my_bool not_fixed;
+  bool not_fixed;
 
   Field_real(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg,
              uchar null_bit_arg, utype unireg_check_arg,
@@ -1151,7 +1232,7 @@ public:
                 NONE, field_name_arg, dec_arg, 0, 0)
     {}
   Field_double(uint32 len_arg, bool maybe_null_arg, const char *field_name_arg,
-	       uint8 dec_arg, my_bool not_fixed_arg)
+	       uint8 dec_arg, bool not_fixed_arg)
     :Field_real((uchar*) 0, len_arg, maybe_null_arg ? (uchar*) "" : 0, (uint) 0,
                 NONE, field_name_arg, dec_arg, 0, 0)
     {not_fixed= not_fixed_arg; }
@@ -1246,7 +1327,7 @@ public:
       Field::set_default();
   }
   /* Get TIMESTAMP field value as seconds since begging of Unix Epoch */
-  inline long get_timestamp(my_bool *null_value)
+  inline long get_timestamp(bool *null_value)
   {
     if ((*null_value= is_null()))
       return 0;
@@ -1549,8 +1630,8 @@ public:
                              uint16 mflags, int *order_var);
   uint row_pack_length() { return field_length; }
   int pack_cmp(const uchar *a,const uchar *b,uint key_length,
-               my_bool insert_or_update);
-  int pack_cmp(const uchar *b,uint key_length,my_bool insert_or_update);
+               bool insert_or_update);
+  int pack_cmp(const uchar *b,uint key_length,bool insert_or_update);
   uint packed_col_length(const uchar *to, uint length);
   uint max_packed_col_length(uint max_length);
   uint size_of() const { return sizeof(*this); }
@@ -2092,14 +2173,31 @@ public:
   CHARSET_INFO *charset;
   Field::geometry_type geom_type;
   Field *field;				// For alter table
+  engine_option_value *option_list;
+  /** structure with parsed options (for comparing fields in ALTER TABLE) */
+  void *option_struct;
 
   uint8 row,col,sc_length,interval_id;	// For rea_create_table
   uint	offset,pack_flag;
-  Create_field() :after(0) {}
+
+    /* 
+    This is additinal data provided for any computed(virtual) field.
+    In particular it includes a pointer to the item by  which this field
+    can be computed from other fields.
+  */
+  Virtual_column_info *vcol_info;
+  /*
+    Flag indicating that the field is physically stored in tables
+    rather than just computed from other fields.
+    As of now, FALSE can be set only for computed virtual columns.
+  */
+  bool stored_in_db;
+
+  Create_field() :after(0), option_list(NULL), option_struct(NULL)
+  {}
   Create_field(Field *field, Field *orig_field);
   /* Used to make a clone of this object for ALTER/CREATE TABLE */
-  Create_field *clone(MEM_ROOT *mem_root) const
-    { return new (mem_root) Create_field(*this); }
+  Create_field *clone(MEM_ROOT *mem_root) const;
   void create_length_to_internal_length(void);
 
   /* Init for a tmp table field. To be extended if need be. */
@@ -2112,7 +2210,8 @@ public:
             char *decimals, uint type_modifier, Item *default_value,
             Item *on_update_value, LEX_STRING *comment, char *change,
             List<String> *interval_list, CHARSET_INFO *cs,
-            uint uint_geom_type);
+            uint uint_geom_type, Virtual_column_info *vcol_info,
+            engine_option_value *option_list);
 
   bool field_flags_are_binary()
   {
@@ -2151,7 +2250,7 @@ class Copy_field :public Sql_alloc {
 public:
   uchar *from_ptr,*to_ptr;
   uchar *from_null_ptr,*to_null_ptr;
-  my_bool *null_row;
+  bool *null_row;
   uint	from_bit,to_bit;
   uint from_length,to_length;
   Field *from_field,*to_field;
diff --git a/sql/field_conv.cc b/sql/field_conv.cc
index ea6ff82e0aa..a27b471442b 100644
--- a/sql/field_conv.cc
+++ b/sql/field_conv.cc
@@ -676,10 +676,10 @@ Copy_field::get_copy_func(Field *to,Field *from)
       */
       if (to->real_type() != from->real_type() ||
           !compatible_db_low_byte_first ||
-          (((to->table->in_use->variables.sql_mode &
+          ((to->table->in_use->variables.sql_mode &
             (MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE | MODE_INVALID_DATES)) &&
-           to->type() == MYSQL_TYPE_DATE) ||
-           to->type() == MYSQL_TYPE_DATETIME))
+           (to->type() == MYSQL_TYPE_DATE ||
+            to->type() == MYSQL_TYPE_DATETIME)))
       {
 	if (from->real_type() == MYSQL_TYPE_ENUM ||
 	    from->real_type() == MYSQL_TYPE_SET)
@@ -695,8 +695,7 @@ Copy_field::get_copy_func(Field *to,Field *from)
           if (from->real_type() == MYSQL_TYPE_ENUM &&
               to->real_type() == MYSQL_TYPE_ENUM)
             return do_field_enum;
-          else
-            return do_field_string;
+          return do_field_string;
         }
       }
       else if (to->charset() != from->charset())
@@ -720,10 +719,8 @@ Copy_field::get_copy_func(Field *to,Field *from)
       {
         if (to->charset() == &my_charset_bin)
           return do_expand_binary;
-        else
-          return do_expand_string;
+        return do_expand_string;
       }
-
     }
     else if (to->real_type() != from->real_type() ||
 	     to_length != from_length ||
@@ -749,7 +746,7 @@ Copy_field::get_copy_func(Field *to,Field *from)
       }
     }
   }
-    /* Eq fields */
+  /* Identical field types */
   switch (to_length) {
   case 1: return do_field_1;
   case 2: return do_field_2;
@@ -775,8 +772,8 @@ int field_conv(Field *to,Field *from)
 	to->real_type() != MYSQL_TYPE_SET &&
         to->real_type() != MYSQL_TYPE_BIT &&
         (to->real_type() != MYSQL_TYPE_NEWDECIMAL ||
-         (to->field_length == from->field_length &&
-          (((Field_num*)to)->dec == ((Field_num*)from)->dec))) &&
+         ((to->field_length == from->field_length &&
+           (((Field_num*)to)->dec == ((Field_num*)from)->dec)))) &&
         from->charset() == to->charset() &&
 	to->table->s->db_low_byte_first == from->table->s->db_low_byte_first &&
         (!(to->table->in_use->variables.sql_mode &
@@ -787,7 +784,7 @@ int field_conv(Field *to,Field *from)
          ((Field_varstring*)from)->length_bytes ==
           ((Field_varstring*)to)->length_bytes))
     {						// Identical fields
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
       /* This may happen if one does 'UPDATE ... SET x=x' */
       if (to->ptr != from->ptr)
 #endif
diff --git a/sql/filesort.cc b/sql/filesort.cc
index 419f18263cc..0b61c65d2b7 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -197,6 +197,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
   {
     status_var_increment(thd->status_var.filesort_scan_count);
   }
+  thd->query_plan_flags|= QPLAN_FILESORT;
 #ifdef CAN_TRUST_RANGE
   if (select && select->quick && select->quick->records > 0L)
   {
@@ -262,6 +263,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
   }
   else
   {
+    thd->query_plan_flags|= QPLAN_FILESORT_DISK;
     if (table_sort.buffpek && table_sort.buffpek_len < maxbuffer)
     {
       my_free(table_sort.buffpek);
@@ -510,8 +512,7 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
   THD *thd= current_thd;
   volatile THD::killed_state *killed= &thd->killed;
   handler *file;
-  MY_BITMAP *save_read_set, *save_write_set;
-  bool skip_record;
+  MY_BITMAP *save_read_set, *save_write_set, *save_vcol_set;
   DBUG_ENTER("find_all_keys");
   DBUG_PRINT("info",("using: %s",
                      (select ? select->quick ? "ranges" : "where":
@@ -525,7 +526,7 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
   ref_pos= ref_buff;
   quick_select=select && select->quick;
   record=0;
-  flag= ((!indexfile && file->ha_table_flags() & HA_REC_NOT_IN_SEQ)
+  flag= ((!indexfile && (file->ha_table_flags() & HA_REC_NOT_IN_SEQ))
 	 || quick_select);
   if (indexfile || flag)
     ref_pos= &file->ref[0];
@@ -533,7 +534,8 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
   if (! indexfile && ! quick_select)
   {
     next_pos=(uchar*) 0;			/* Find records in sequence */
-    file->ha_rnd_init(1);
+    if (file->ha_rnd_init_with_error(1))
+      DBUG_RETURN(HA_POS_ERROR);
     file->extra_opt(HA_EXTRA_CACHE,
 		    current_thd->variables.read_buff_size);
   }
@@ -547,6 +549,7 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
   /* Remember original bitmaps */
   save_read_set=  sort_form->read_set;
   save_write_set= sort_form->write_set;
+  save_vcol_set= sort_form->vcol_set;
   /* Set up temporary column read map for columns used by sort */
   bitmap_clear_all(&sort_form->tmp_set);
   /* Temporary set for register_used_fields and register_field_in_read_map */
@@ -555,7 +558,8 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
   if (select && select->cond)
     select->cond->walk(&Item::register_field_in_read_map, 1,
                        (uchar*) sort_form);
-  sort_form->column_bitmaps_set(&sort_form->tmp_set, &sort_form->tmp_set);
+  sort_form->column_bitmaps_set(&sort_form->tmp_set, &sort_form->tmp_set, 
+                                &sort_form->tmp_set);
 
   for (;;)
   {
@@ -563,6 +567,8 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
     {
       if ((error= select->quick->get_next()))
         break;
+      if (!error)
+        update_virtual_fields(thd, sort_form);
       file->position(sort_form->record[0]);
       DBUG_EXECUTE_IF("debug_filesort", dbug_print_record(sort_form, TRUE););
     }
@@ -575,11 +581,13 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
 	  error= my_errno ? my_errno : -1;		/* Abort */
 	  break;
 	}
-	error=file->rnd_pos(sort_form->record[0],next_pos);
+	error=file->ha_rnd_pos(sort_form->record[0],next_pos);
       }
       else
       {
-	error=file->rnd_next(sort_form->record[0]);
+	error=file->ha_rnd_next(sort_form->record[0]);
+	if (!error)
+	  update_virtual_fields(thd, sort_form);
 	if (!flag)
 	{
 	  my_store_ptr(ref_pos,ref_length,record); // Position to row
@@ -604,8 +612,8 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
     }
     if (error == 0)
       param->examined_rows++;
-    if (!error && (!select ||
-                   (!select->skip_record(thd, &skip_record) && !skip_record)))
+   
+    if (error == 0 && (!select || select->skip_record(thd) > 0))
     {
       if (idx == param->keys)
       {
@@ -618,6 +626,7 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
     }
     else
       file->unlock_row();
+      
     /* It does not make sense to read more keys in case of a fatal error */
     if (thd->is_error())
       break;
@@ -633,7 +642,7 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
     DBUG_RETURN(HA_POS_ERROR);
   
   /* Signal we should use orignal column read and write maps */
-  sort_form->column_bitmaps_set(save_read_set, save_write_set);
+  sort_form->column_bitmaps_set(save_read_set, save_write_set, save_vcol_set);
 
   DBUG_PRINT("test",("error: %d  indexpos: %d",error,indexpos));
   if (error != HA_ERR_END_OF_FILE)
@@ -954,13 +963,13 @@ static void make_sortkey(register SORTPARAM *param,
       if (addonf->null_bit && field->is_null())
       {
         nulls[addonf->null_offset]|= addonf->null_bit;
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
 	bzero(to, addonf->length);
 #endif
       }
       else
       {
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
         uchar *end= field->pack(to, field->ptr);
 	uint length= (uint) ((to + addonf->length) - end);
 	DBUG_ASSERT((int) length >= 0);
@@ -1000,7 +1009,14 @@ static void register_used_fields(SORTPARAM *param)
     if ((field= sort_field->field))
     {
       if (field->table == table)
-      bitmap_set_bit(bitmap, field->field_index);
+      {
+        if (field->vcol_info)
+	{
+          Item *vcol_item= field->vcol_info->expr_item;
+          vcol_item->walk(&Item::register_field_in_read_map, 1, (uchar *) 0);
+        }                   
+        bitmap_set_bit(bitmap, field->field_index);
+      }
     }
     else
     {						// Item
@@ -1143,7 +1159,9 @@ uint read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek,
 void reuse_freed_buff(QUEUE *queue, BUFFPEK *reuse, uint key_length)
 {
   uchar *reuse_end= reuse->base + reuse->max_keys * key_length;
-  for (uint i= 0; i < queue->elements; ++i)
+  for (uint i= queue_first_element(queue);
+       i <= queue_last_element(queue);
+       i++)
   {
     BUFFPEK *bp= (BUFFPEK *) queue_element(queue, i);
     if (bp->base + bp->max_keys * key_length == reuse->base)
@@ -1201,6 +1219,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
   DBUG_ENTER("merge_buffers");
 
   status_var_increment(current_thd->status_var.filesort_merge_passes);
+  current_thd->query_plan_fsort_passes++;
   if (param->not_killable)
   {
     killed= &not_killable;
@@ -1214,7 +1233,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
   offset= rec_length-res_length;
   maxcount= (ulong) (param->keys/((uint) (Tb-Fb) +1));
   to_start_filepos= my_b_tell(to_file);
-  strpos= (uchar*) sort_buffer;
+  strpos= sort_buffer;
   org_max_rows=max_rows= param->max_rows;
 
   /* The following will fire if there is not enough space in sort_buffer */
@@ -1231,14 +1250,14 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
     first_cmp_arg= (void*) &sort_length;
   }
   if (init_queue(&queue, (uint) (Tb-Fb)+1, offsetof(BUFFPEK,key), 0,
-                 (queue_compare) cmp, first_cmp_arg))
+                 (queue_compare) cmp, first_cmp_arg, 0, 0))
     DBUG_RETURN(1);                                /* purecov: inspected */
   for (buffpek= Fb ; buffpek <= Tb ; buffpek++)
   {
     buffpek->base= strpos;
     buffpek->max_keys= maxcount;
     strpos+= (uint) (error= (int) read_to_buffer(from_file, buffpek,
-                                                                         rec_length));
+                                                 rec_length));
     if (error == -1)
       goto err;					/* purecov: inspected */
     buffpek->max_keys= buffpek->mem_count;	// If less data in buffers than expected
@@ -1268,7 +1287,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
       error= 0;                                       /* purecov: inspected */
       goto end;                                       /* purecov: inspected */
     }
-    queue_replaced(&queue);                        // Top element has been used
+    queue_replace_top(&queue);            // Top element has been used
   }
   else
     cmp= 0;                                        // Not unique
@@ -1316,18 +1335,18 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
         if (!(error= (int) read_to_buffer(from_file,buffpek,
                                           rec_length)))
         {
-          (void) queue_remove(&queue,0);
+          (void) queue_remove_top(&queue);
           reuse_freed_buff(&queue, buffpek, rec_length);
           break;                        /* One buffer have been removed */
         }
         else if (error == -1)
           goto err;                        /* purecov: inspected */
       }
-      queue_replaced(&queue);              /* Top element has been replaced */
+      queue_replace_top(&queue);   	/* Top element has been replaced */
     }
   }
   buffpek= (BUFFPEK*) queue_top(&queue);
-  buffpek->base= sort_buffer;
+  buffpek->base= (uchar*) sort_buffer;
   buffpek->max_keys= param->keys;
 
   /*
@@ -1367,7 +1386,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
            strpos != end ;
            strpos+= rec_length)
       {     
-        if (my_b_write(to_file, (uchar *) strpos, res_length))
+        if (my_b_write(to_file, strpos, res_length))
         {
           error=1; goto err;                        
         }
diff --git a/sql/ha_ndbcluster.cc b/sql/ha_ndbcluster.cc
index 15b017ece81..e90c3ac3e6a 100644
--- a/sql/ha_ndbcluster.cc
+++ b/sql/ha_ndbcluster.cc
@@ -4405,8 +4405,8 @@ void ha_ndbcluster::start_bulk_insert(ha_rows rows)
 int ha_ndbcluster::end_bulk_insert()
 {
   int error= 0;
-
   DBUG_ENTER("end_bulk_insert");
+
   // Check if last inserts need to be flushed
   if (m_bulk_insert_not_flushed)
   {
@@ -4758,7 +4758,7 @@ int ha_ndbcluster::external_lock(THD *thd, int lock_type)
   Thd_ndb *thd_ndb= get_thd_ndb(thd);
   Ndb *ndb= thd_ndb->ndb;
 
-  DBUG_PRINT("enter", ("this: 0x%lx  thd: 0x%lx  thd_ndb: %lx  "
+  DBUG_PRINT("enter", ("this: 0x%lx  thd: 0x%lx  thd_ndb: 0x%lx  "
                        "thd_ndb->lock_count: %d",
                        (long) this, (long) thd, (long) thd_ndb,
                        thd_ndb->lock_count));
@@ -6435,9 +6435,10 @@ void ha_ndbcluster::get_auto_increment(ulonglong offset, ulonglong increment,
   for (;;)
   {
     Ndb_tuple_id_range_guard g(m_share);
-    if (m_skip_auto_increment &&
-        ndb->readAutoIncrementValue(m_table, g.range, auto_value) ||
-        ndb->getAutoIncrementValue(m_table, g.range, auto_value, cache_size, increment, offset))
+    if ((m_skip_auto_increment &&
+         ndb->readAutoIncrementValue(m_table, g.range, auto_value)) ||
+        ndb->getAutoIncrementValue(m_table, g.range, auto_value, cache_size,
+                                    increment, offset))
     {
       if (--retries &&
           ndb->getNdbError().status == NdbError::TemporaryError)
@@ -8983,6 +8984,8 @@ ha_ndbcluster::null_value_index_search(KEY_MULTI_RANGE *ranges,
   DBUG_RETURN(FALSE);
 }
 
+#if 0 
+/* MRR/NDB is disabled, for details see method declarations in ha_ndbcluster.h */
 int
 ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
                                       KEY_MULTI_RANGE *ranges, 
@@ -9395,6 +9398,7 @@ found_next:
   m_multi_range_result_ptr += reclength;
   DBUG_RETURN(0);
 }
+#endif 
 
 int
 ha_ndbcluster::setup_recattr(const NdbRecAttr* curr)
@@ -9503,7 +9507,7 @@ pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused)))
   thd->client_capabilities = 0;
   my_net_init(&thd->net, 0);
   thd->main_security_ctx.master_access= ~0;
-  thd->main_security_ctx.priv_user = 0;
+  thd->main_security_ctx.priv_user[0] = 0;
   /* Do not use user-supplied timeout value for system threads. */
   thd->variables.lock_wait_timeout= LONG_TIMEOUT;
 
@@ -10271,8 +10275,10 @@ bool ha_ndbcluster::check_if_incompatible_data(HA_CREATE_INFO *create_info,
   {
     Field *field= table->field[i];
     const NDBCOL *col= tab->getColumn(i);
-    if (col->getStorageType() == NDB_STORAGETYPE_MEMORY && create_info->storage_media != HA_SM_MEMORY ||
-        col->getStorageType() == NDB_STORAGETYPE_DISK && create_info->storage_media != HA_SM_DISK)
+    if ((col->getStorageType() == NDB_STORAGETYPE_MEMORY &&
+         create_info->storage_media != HA_SM_MEMORY) ||
+        (col->getStorageType() == NDB_STORAGETYPE_DISK &&
+         create_info->storage_media != HA_SM_DISK))
     {
       DBUG_PRINT("info", ("Column storage media is changed"));
       DBUG_RETURN(COMPATIBLE_DATA_NO);
@@ -11030,5 +11036,24 @@ mysql_declare_plugin(ndbcluster)
   NULL                        /* config options                  */
 }
 mysql_declare_plugin_end;
+maria_declare_plugin(ndbcluster)
+{
+  MYSQL_STORAGE_ENGINE_PLUGIN,
+  &ndbcluster_storage_engine,
+  ndbcluster_hton_name,
+  "MySQL AB",
+  "Clustered, fault-tolerant tables",
+  PLUGIN_LICENSE_GPL,
+  ndbcluster_init, /* Plugin Init */
+  NULL, /* Plugin Deinit */
+  0x0100 /* 1.0 */,
+  ndb_status_variables_export,/* status variables                */
+  NULL,                       /* system variables                */
+  "1.0",                      /* string version */
+  MariaDB_PLUGIN_MATURITY_GAMMA /* maturity */
+}
+maria_declare_plugin_end;
 
+#else
+int Sun_ar_require_a_symbol_here= 0;
 #endif
diff --git a/sql/ha_ndbcluster.h b/sql/ha_ndbcluster.h
index de1e36b61d2..83205084209 100644
--- a/sql/ha_ndbcluster.h
+++ b/sql/ha_ndbcluster.h
@@ -262,10 +262,20 @@ class ha_ndbcluster: public handler
   /**
    * Multi range stuff
    */
+#if 0 
+  /*
+    MRR/NDB is disabled in MariaDB. This is because in MariaDB, we've
+    backported
+     - the latest version of MRR interface (BKA needs this)
+     - the latest version of DS-MRR implementation
+    but didn't backport the latest version MRR/NDB implementation.
+
+  */
   int read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
                              KEY_MULTI_RANGE*ranges, uint range_count,
                              bool sorted, HANDLER_BUFFER *buffer);
   int read_multi_range_next(KEY_MULTI_RANGE **found_range_p);
+#endif  
   bool null_value_index_search(KEY_MULTI_RANGE *ranges,
 			       KEY_MULTI_RANGE *end_range,
 			       HANDLER_BUFFER *buffer);
diff --git a/sql/ha_ndbcluster_binlog.cc b/sql/ha_ndbcluster_binlog.cc
index 861b44f74b1..e46b6b25f9c 100644
--- a/sql/ha_ndbcluster_binlog.cc
+++ b/sql/ha_ndbcluster_binlog.cc
@@ -3656,7 +3656,7 @@ pthread_handler_t ndb_binlog_thread_func(void *arg)
   thd->client_capabilities= 0;
   my_net_init(&thd->net, 0);
   thd->main_security_ctx.master_access= ~0;
-  thd->main_security_ctx.priv_user= 0;
+  thd->main_security_ctx.priv_user[0]= 0;
   /* Do not use user-supplied timeout value for system threads. */
   thd->variables.lock_wait_timeout= LONG_TIMEOUT;
 
diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc
index d7d4917be7a..b0f50c0ca58 100644
--- a/sql/ha_partition.cc
+++ b/sql/ha_partition.cc
@@ -53,6 +53,7 @@
 
 #include "sql_priv.h"
 #include "sql_parse.h"                          // append_file_to_dir
+#include "create_options.h"
 
 #ifdef WITH_PARTITION_STORAGE_ENGINE
 #include "ha_partition.h"
@@ -1040,8 +1041,8 @@ static bool print_admin_msg(THD* thd, const char* msg_type,
   va_list args;
   Protocol *protocol= thd->protocol;
   uint length, msg_length;
-  char msgbuf[MI_MAX_MSG_BUF];
-  char name[NAME_LEN*2+2];
+  char msgbuf[HA_MAX_MSG_BUF];
+  char name[SAFE_NAME_LEN*2+2];
 
   va_start(args, fmt);
   msg_length= my_vsnprintf(msgbuf, sizeof(msgbuf), fmt, args);
@@ -1051,7 +1052,7 @@ static bool print_admin_msg(THD* thd, const char* msg_type,
 
   if (!thd->vio_ok())
   {
-    sql_print_error("%s", msgbuf);
+    sql_print_error(fmt, args);
     return TRUE;
   }
 
@@ -1709,11 +1710,11 @@ int ha_partition::copy_partitions(ulonglong * const copied,
     uint32 new_part;
 
     late_extra_cache(reorg_part);
-    if ((result= file->ha_rnd_init(1)))
+    if ((result= file->ha_rnd_init_with_error(1)))
       goto error;
     while (TRUE)
     {
-      if ((result= file->rnd_next(m_rec0)))
+      if ((result= file->ha_rnd_next(m_rec0)))
       {
         if (result == HA_ERR_RECORD_DELETED)
           continue;                              //Probably MyISAM
@@ -1935,6 +1936,8 @@ uint ha_partition::del_ren_cre_table(const char *from,
     {
       if ((error= set_up_table_before_create(table_arg, from_buff,
                                              create_info, i, NULL)) ||
+          parse_engine_table_options(ha_thd(), (*file)->ht,
+                                     (*file)->table_share) ||
           ((error= (*file)->ha_create(from_buff, table_arg, create_info))))
         goto create_error;
     }
@@ -2551,7 +2554,7 @@ int ha_partition::open(const char *name, int mode, uint test_if_locked)
     DBUG_RETURN(1);
   m_start_key.length= 0;
   m_rec0= table->record[0];
-  m_rec_length= table_share->reclength;
+  m_rec_length= table_share->stored_rec_length;
   alloc_len= m_tot_parts * (m_rec_length + PARTITION_BYTES_IN_POS);
   alloc_len+= table_share->max_key_length;
   if (!m_ordered_rec_buffer)
@@ -2640,7 +2643,7 @@ int ha_partition::open(const char *name, int mode, uint test_if_locked)
     Initialize priority queue, initialized to reading forward.
   */
   if ((error= init_queue(&m_queue, m_tot_parts, (uint) PARTITION_BYTES_IN_POS,
-                         0, key_rec_cmp, (void*)this)))
+                         0, key_rec_cmp, (void*)this, 0, 0)))
     goto err_handler;
 
   /*
@@ -3573,7 +3576,7 @@ ha_rows ha_partition::guess_bulk_insert_rows()
     0                       Success
 
   Note: end_bulk_insert can be called without start_bulk_insert
-        being called, see bug¤44108.
+        being called, see bug#44108.
 
 */
 
@@ -3786,6 +3789,7 @@ int ha_partition::rnd_next(uchar *buf)
   int result= HA_ERR_END_OF_FILE;
   uint part_id= m_part_spec.start_part;
   DBUG_ENTER("ha_partition::rnd_next");
+  decrement_statistics(&SSV::ha_read_rnd_next_count);
 
   if (NO_CURRENT_PART_ID == part_id)
   {
@@ -3801,7 +3805,7 @@ int ha_partition::rnd_next(uchar *buf)
   
   while (TRUE)
   {
-    result= file->rnd_next(buf);
+    result= file->ha_rnd_next(buf);
     if (!result)
     {
       m_last_part= part_id;
@@ -3887,10 +3891,10 @@ void ha_partition::position(const uchar *record)
 	 (ref_length - PARTITION_BYTES_IN_POS));
 
 #ifdef SUPPORTING_PARTITION_OVER_DIFFERENT_ENGINES
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
   bzero(ref + PARTITION_BYTES_IN_POS + ref_length,
         max_ref_length-ref_length);
-#endif /* HAVE_purify */
+#endif /* HAVE_valgrind */
 #endif
   DBUG_VOID_RETURN;
 }
@@ -3929,6 +3933,7 @@ int ha_partition::rnd_pos(uchar * buf, uchar *pos)
   uint part_id;
   handler *file;
   DBUG_ENTER("ha_partition::rnd_pos");
+  decrement_statistics(&SSV::ha_read_rnd_count);
 
   part_id= uint2korr((const uchar *) pos);
   DBUG_ASSERT(part_id < m_tot_parts);
@@ -4141,6 +4146,7 @@ int ha_partition::index_read_map(uchar *buf, const uchar *key,
                                  enum ha_rkey_function find_flag)
 {
   DBUG_ENTER("ha_partition::index_read_map");
+  decrement_statistics(&SSV::ha_read_key_count);
   end_range= 0;
   m_index_scan_type= partition_index_read;
   m_start_key.key= key;
@@ -4268,6 +4274,7 @@ int ha_partition::common_index_read(uchar *buf, bool have_start_key)
 int ha_partition::index_first(uchar * buf)
 {
   DBUG_ENTER("ha_partition::index_first");
+  decrement_statistics(&SSV::ha_read_first_count);
 
   end_range= 0;
   m_index_scan_type= partition_index_first;
@@ -4299,6 +4306,7 @@ int ha_partition::index_first(uchar * buf)
 int ha_partition::index_last(uchar * buf)
 {
   DBUG_ENTER("ha_partition::index_last");
+  decrement_statistics(&SSV::ha_read_last_count);
 
   m_index_scan_type= partition_index_last;
   DBUG_RETURN(common_first_last(buf));
@@ -4327,39 +4335,6 @@ int ha_partition::common_first_last(uchar *buf)
 
 
 /*
-  Read last using key
-
-  SYNOPSIS
-    index_read_last_map()
-    buf                   Read row in MySQL Row Format
-    key                   Key
-    keypart_map           Which part of key is used
-
-  RETURN VALUE
-    >0                    Error code
-    0                     Success
-
-  DESCRIPTION
-    This is used in join_read_last_key to optimise away an ORDER BY.
-    Can only be used on indexes supporting HA_READ_ORDER
-*/
-
-int ha_partition::index_read_last_map(uchar *buf, const uchar *key,
-                                      key_part_map keypart_map)
-{
-  DBUG_ENTER("ha_partition::index_read_last");
-
-  m_ordered= TRUE;				// Safety measure
-  end_range= 0;
-  m_index_scan_type= partition_index_read_last;
-  m_start_key.key= key;
-  m_start_key.keypart_map= keypart_map;
-  m_start_key.flag= HA_READ_PREFIX_LAST;
-  DBUG_RETURN(common_index_read(buf, TRUE));
-}
-
-
-/*
   Optimization of the default implementation to take advantage of dynamic
   partition pruning.
 */
@@ -4429,6 +4404,7 @@ int ha_partition::index_read_idx_map(uchar *buf, uint index,
 int ha_partition::index_next(uchar * buf)
 {
   DBUG_ENTER("ha_partition::index_next");
+  decrement_statistics(&SSV::ha_read_next_count);
 
   /*
     TODO(low priority):
@@ -4465,6 +4441,7 @@ int ha_partition::index_next(uchar * buf)
 int ha_partition::index_next_same(uchar *buf, const uchar *key, uint keylen)
 {
   DBUG_ENTER("ha_partition::index_next_same");
+  decrement_statistics(&SSV::ha_read_next_count);
 
   DBUG_ASSERT(keylen == m_start_key.length);
   DBUG_ASSERT(m_index_scan_type != partition_index_last);
@@ -4492,6 +4469,7 @@ int ha_partition::index_next_same(uchar *buf, const uchar *key, uint keylen)
 int ha_partition::index_prev(uchar * buf)
 {
   DBUG_ENTER("ha_partition::index_prev");
+  decrement_statistics(&SSV::ha_read_prev_count);
 
   /* TODO: read comment in index_next */
   DBUG_ASSERT(m_index_scan_type != partition_index_first);
@@ -4702,8 +4680,8 @@ int ha_partition::handle_unordered_next(uchar *buf, bool is_next_same)
   }
   else if (is_next_same)
   {
-    if (!(error= file->index_next_same(buf, m_start_key.key,
-                                       m_start_key.length)))
+    if (!(error= file->ha_index_next_same(buf, m_start_key.key,
+                                          m_start_key.length)))
     {
       m_last_part= m_part_spec.start_part;
       DBUG_RETURN(0);
@@ -4711,7 +4689,7 @@ int ha_partition::handle_unordered_next(uchar *buf, bool is_next_same)
   }
   else 
   {
-    if (!(error= file->index_next(buf)))
+    if (!(error= file->ha_index_next(buf)))
     {
       m_last_part= m_part_spec.start_part;
       DBUG_RETURN(0);                           // Row was in range
@@ -4766,13 +4744,26 @@ int ha_partition::handle_unordered_scan_next_partition(uchar * buf)
       break;
     case partition_index_read:
       DBUG_PRINT("info", ("index_read on partition %d", i));
-      error= file->index_read_map(buf, m_start_key.key,
-                                  m_start_key.keypart_map,
-                                  m_start_key.flag);
+      error= file->ha_index_read_map(buf, m_start_key.key,
+                                     m_start_key.keypart_map,
+                                     m_start_key.flag);
       break;
     case partition_index_first:
       DBUG_PRINT("info", ("index_first on partition %d", i));
-      error= file->index_first(buf);
+      /*
+        MyISAM engine can fail if we call index_first() when indexes disabled
+        that happens if the table is empty.
+        Here we use file->stats.records instead of file->records() because
+        file->records() is supposed to return an EXACT count, and it can be
+        possibly slow. We don't need an exact number, an approximate one- from
+        the last ::info() call - is sufficient.
+      */
+      if (file->stats.records == 0)
+      {
+        error= HA_ERR_END_OF_FILE;
+        break;
+      }
+      error= file->ha_index_first(buf);
       break;
     case partition_index_first_unordered:
       /*
@@ -4835,7 +4826,7 @@ int ha_partition::handle_unordered_scan_next_partition(uchar * buf)
 int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order)
 {
   uint i;
-  uint j= 0;
+  uint j= queue_first_element(&m_queue);
   bool found= FALSE;
   DBUG_ENTER("ha_partition::handle_ordered_index_scan");
 
@@ -4853,23 +4844,43 @@ int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order)
 
     switch (m_index_scan_type) {
     case partition_index_read:
-      error= file->index_read_map(rec_buf_ptr,
-                                  m_start_key.key,
-                                  m_start_key.keypart_map,
-                                  m_start_key.flag);
+      error= file->ha_index_read_map(rec_buf_ptr,
+                                     m_start_key.key,
+                                     m_start_key.keypart_map,
+                                     m_start_key.flag);
       break;
     case partition_index_first:
-      error= file->index_first(rec_buf_ptr);
+      /*
+        MyISAM engine can fail if we call index_first() when indexes disabled
+        that happens if the table is empty.
+        Here we use file->stats.records instead of file->records() because
+        file->records() is supposed to return an EXACT count, and it can be
+        possibly slow. We don't need an exact number, an approximate one- from
+        the last ::info() call - is sufficient.
+      */
+      if (file->stats.records == 0)
+      {
+        error= HA_ERR_END_OF_FILE;
+        break;
+      }
+      error= file->ha_index_first(rec_buf_ptr);
       reverse_order= FALSE;
       break;
     case partition_index_last:
-      error= file->index_last(rec_buf_ptr);
-      reverse_order= TRUE;
-      break;
-    case partition_index_read_last:
-      error= file->index_read_last_map(rec_buf_ptr,
-                                       m_start_key.key,
-                                       m_start_key.keypart_map);
+      /*
+        MyISAM engine can fail if we call index_last() when indexes disabled
+        that happens if the table is empty.
+        Here we use file->stats.records instead of file->records() because
+        file->records() is supposed to return an EXACT count, and it can be
+        possibly slow. We don't need an exact number, an approximate one- from
+        the last ::info() call - is sufficient.
+      */
+      if (file->stats.records == 0)
+      {
+        error= HA_ERR_END_OF_FILE;
+        break;
+      }
+      error= file->ha_index_last(rec_buf_ptr);
       reverse_order= TRUE;
       break;
     case partition_read_range:
@@ -4909,7 +4920,7 @@ int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order)
     */
     queue_set_max_at_top(&m_queue, reverse_order);
     queue_set_cmp_arg(&m_queue, (void*)m_curr_key_info);
-    m_queue.elements= j;
+    m_queue.elements= j - queue_first_element(&m_queue);
     queue_fix(&m_queue);
     return_top_record(buf);
     table->status= 0;
@@ -4971,16 +4982,16 @@ int ha_partition::handle_ordered_next(uchar *buf, bool is_next_same)
     memcpy(rec_buf(part_id), table->record[0], m_rec_length);
   }
   else if (!is_next_same)
-    error= file->index_next(rec_buf(part_id));
+    error= file->ha_index_next(rec_buf(part_id));
   else
-    error= file->index_next_same(rec_buf(part_id), m_start_key.key,
-				 m_start_key.length);
+    error= file->ha_index_next_same(rec_buf(part_id), m_start_key.key,
+                                    m_start_key.length);
   if (error)
   {
     if (error == HA_ERR_END_OF_FILE)
     {
       /* Return next buffered row */
-      queue_remove(&m_queue, (uint) 0);
+      queue_remove_top(&m_queue);
       if (m_queue.elements)
       {
          DBUG_PRINT("info", ("Record returned from partition %u (2)",
@@ -4992,7 +5003,7 @@ int ha_partition::handle_ordered_next(uchar *buf, bool is_next_same)
     }
     DBUG_RETURN(error);
   }
-  queue_replaced(&m_queue);
+  queue_replace_top(&m_queue);
   return_top_record(buf);
   DBUG_PRINT("info", ("Record returned from partition %u", m_top_entry));
   DBUG_RETURN(0);
@@ -5019,11 +5030,11 @@ int ha_partition::handle_ordered_prev(uchar *buf)
   handler *file= m_file[part_id];
   DBUG_ENTER("ha_partition::handle_ordered_prev");
 
-  if ((error= file->index_prev(rec_buf(part_id))))
+  if ((error= file->ha_index_prev(rec_buf(part_id))))
   {
     if (error == HA_ERR_END_OF_FILE)
     {
-      queue_remove(&m_queue, (uint) 0);
+      queue_remove_top(&m_queue);
       if (m_queue.elements)
       {
 	return_top_record(buf);
@@ -5035,7 +5046,7 @@ int ha_partition::handle_ordered_prev(uchar *buf)
     }
     DBUG_RETURN(error);
   }
-  queue_replaced(&m_queue);
+  queue_replace_top(&m_queue);
   return_top_record(buf);
   DBUG_PRINT("info", ("Record returned from partition %d", m_top_entry));
   DBUG_RETURN(0);
@@ -5354,7 +5365,7 @@ void ha_partition::get_dynamic_partition_info(PARTITION_STATS *stat_info,
   stat_info->update_time=          file->stats.update_time;
   stat_info->check_time=           file->stats.check_time;
   stat_info->check_sum= 0;
-  if (file->ha_table_flags() & HA_HAS_CHECKSUM)
+  if (file->ha_table_flags() & (HA_HAS_OLD_CHECKSUM | HA_HAS_NEW_CHECKSUM))
     stat_info->check_sum= file->checksum();
   return;
 }
@@ -5681,6 +5692,7 @@ int ha_partition::extra(enum ha_extra_function operation)
   case HA_EXTRA_KEYREAD:
   case HA_EXTRA_NO_KEYREAD:
   case HA_EXTRA_FLUSH:
+  case HA_EXTRA_PREPARE_FOR_FORCED_CLOSE:
     DBUG_RETURN(loop_extra(operation));
 
     /* Category 2), used by non-MyISAM handlers */
@@ -5696,7 +5708,6 @@ int ha_partition::extra(enum ha_extra_function operation)
   /* Category 3), used by MyISAM handlers */
   case HA_EXTRA_PREPARE_FOR_RENAME:
     DBUG_RETURN(prepare_for_rename());
-    break;
   case HA_EXTRA_PREPARE_FOR_UPDATE:
     /*
       Needs to be run on the first partition in the range now, and 
@@ -5717,9 +5728,7 @@ int ha_partition::extra(enum ha_extra_function operation)
   case HA_EXTRA_PREPARE_FOR_DROP:
   case HA_EXTRA_FLUSH_CACHE:
   {
-    if (m_myisam)
-      DBUG_RETURN(loop_extra(operation));
-    break;
+    DBUG_RETURN(loop_extra(operation));
   }
   case HA_EXTRA_NO_READCHECK:
   {
@@ -6855,5 +6864,22 @@ mysql_declare_plugin(partition)
   NULL                        /* config options                  */
 }
 mysql_declare_plugin_end;
+maria_declare_plugin(partition)
+{
+  MYSQL_STORAGE_ENGINE_PLUGIN,
+  &partition_storage_engine,
+  "partition",
+  "Mikael Ronstrom, MySQL AB",
+  "Partition Storage Engine Helper",
+  PLUGIN_LICENSE_GPL,
+  partition_initialize, /* Plugin Init */
+  NULL, /* Plugin Deinit */
+  0x0100, /* 1.0 */
+  NULL,                       /* status variables                */
+  NULL,                       /* system variables                */
+  "1.0",                      /* string version                  */
+  MariaDB_PLUGIN_MATURITY_STABLE /* maturity                     */
+}
+maria_declare_plugin_end;
 
 #endif
diff --git a/sql/ha_partition.h b/sql/ha_partition.h
index 78f7a2b9662..efd0707252e 100644
--- a/sql/ha_partition.h
+++ b/sql/ha_partition.h
@@ -45,9 +45,8 @@ private:
     partition_index_first= 1,
     partition_index_first_unordered= 2,
     partition_index_last= 3,
-    partition_index_read_last= 4,
-    partition_read_range = 5,
-    partition_no_index_scan= 6
+    partition_read_range = 4,
+    partition_no_index_scan= 5
   };
   /* Data for the partition handler */
   int  m_mode;                          // Open mode
@@ -232,6 +231,7 @@ public:
     DBUG_RETURN(0);
   }
   virtual void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share);
+  bool check_if_supported_virtual_columns(void) { return TRUE;}
   virtual bool check_if_incompatible_data(HA_CREATE_INFO *create_info,
                                           uint table_changes);
 private:
@@ -448,8 +448,6 @@ public:
   virtual int index_first(uchar * buf);
   virtual int index_last(uchar * buf);
   virtual int index_next_same(uchar * buf, const uchar * key, uint keylen);
-  virtual int index_read_last_map(uchar * buf, const uchar * key,
-                                  key_part_map keypart_map);
 
   /*
     read_first_row is virtual method but is only implemented by
@@ -1100,6 +1098,29 @@ public:
     -------------------------------------------------------------------------
     virtual void append_create_info(String *packet)
   */
+
+  /*
+    the following heavily relies on the fact that all partitions
+    are in the same storage engine.
+
+    When this limitation is lifted, the following hack should go away,
+    and a proper interface for engines needs to be introduced:
+
+      an PARTITION_SHARE structure that has a pointer to the TABLE_SHARE.
+      is given to engines everywhere where TABLE_SHARE is used now
+      has members like option_struct, ha_data
+      perhaps TABLE needs to be split the same way too...
+
+    this can also be done before partition will support a mix of engines,
+    but preferably together with other incompatible API changes.
+  */
+  virtual handlerton *partition_ht() const
+  {
+    handlerton *h= m_file[0]->ht;
+    for (uint i=1; i < m_tot_parts; i++)
+      DBUG_ASSERT(h == m_file[i]->ht);
+    return h;
+  }
 };
 
 #endif /* HA_PARTITION_INCLUDED */
diff --git a/sql/handler.cc b/sql/handler.cc
index 567dbe6ea49..94b3e175ca0 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -36,10 +36,11 @@
 #include "sql_base.h"           // free_io_cache
 #include "discover.h"           // writefrm
 #include "log_event.h"          // *_rows_log_event
+#include "create_options.h"
 #include "rpl_filter.h"
 #include <myisampack.h>
 #include "transaction.h"
-#include <errno.h>
+#include "myisam.h"
 #include "probes_mysql.h"
 
 #ifdef WITH_PARTITION_STORAGE_ENGINE
@@ -73,12 +74,13 @@ static const LEX_STRING sys_table_aliases[]=
   { C_STRING_WITH_LEN("NDB") },       { C_STRING_WITH_LEN("NDBCLUSTER") },
   { C_STRING_WITH_LEN("HEAP") },      { C_STRING_WITH_LEN("MEMORY") },
   { C_STRING_WITH_LEN("MERGE") },     { C_STRING_WITH_LEN("MRG_MYISAM") },
+  { C_STRING_WITH_LEN("Maria") },      { C_STRING_WITH_LEN("Aria") },
   {NullS, 0}
 };
 
 const char *ha_row_type[] = {
   "", "FIXED", "DYNAMIC", "COMPRESSED", "REDUNDANT", "COMPACT",
-  /* Reserved to be "PAGE" in future versions */ "?",
+  "PAGE",
   "?","?","?"
 };
 
@@ -97,7 +99,7 @@ static plugin_ref ha_default_plugin(THD *thd)
 {
   if (thd->variables.table_plugin)
     return thd->variables.table_plugin;
-  return my_plugin_lock(thd, &global_system_variables.table_plugin);
+  return my_plugin_lock(thd, global_system_variables.table_plugin);
 }
 
 
@@ -178,13 +180,8 @@ plugin_ref ha_lock_engine(THD *thd, const handlerton *hton)
 {
   if (hton)
   {
-    st_plugin_int **plugin= hton2plugin + hton->slot;
-    
-#ifdef DBUG_OFF
-    return my_plugin_lock(thd, plugin);
-#else
-    return my_plugin_lock(thd, &plugin);
-#endif
+    st_plugin_int *plugin= hton2plugin[hton->slot];
+    return my_plugin_lock(thd, plugin_int_to_ref(plugin));
   }
   return NULL;
 }
@@ -393,8 +390,7 @@ int ha_finalize_handlerton(st_plugin_int *plugin)
   if (!hton)
     goto end;
 
-  switch (hton->state)
-  {
+  switch (hton->state) {
   case SHOW_OPTION_NO:
   case SHOW_OPTION_DISABLED:
     break;
@@ -463,8 +459,8 @@ int ha_initialize_handlerton(st_plugin_int *plugin)
   if (plugin->plugin->init && plugin->plugin->init(hton))
   {
     sql_print_error("Plugin '%s' init function returned error.",
-                    plugin->name.str);
-    goto err;  
+		    plugin->name.str);
+    goto err;
   }
 
   /*
@@ -491,13 +487,19 @@ int ha_initialize_handlerton(st_plugin_int *plugin)
         if (idx == (int) DB_TYPE_DEFAULT)
         {
           sql_print_warning("Too many storage engines!");
-          goto err_deinit;
+          my_free(hton, MYF(0));
+          plugin->data= 0;
+	  goto err_deinit;
         }
         if (hton->db_type != DB_TYPE_UNKNOWN)
           sql_print_warning("Storage engine '%s' has conflicting typecode. "
                             "Assigning value %d.", plugin->plugin->name, idx);
         hton->db_type= (enum legacy_db_type) idx;
       }
+      installed_htons[hton->db_type]= hton;
+      tmp= hton->savepoint_offset;
+      hton->savepoint_offset= savepoint_alloc_size;
+      savepoint_alloc_size+= tmp;
 
       /*
         In case a plugin is uninstalled and re-installed later, it should
@@ -1121,6 +1123,12 @@ int ha_commit_trans(THD *thd, bool all)
   my_xid xid= thd->transaction.xid_state.xid.get_my_xid();
   DBUG_ENTER("ha_commit_trans");
 
+  /* Just a random warning to test warnings pushed during autocommit. */
+  DBUG_EXECUTE_IF("warn_during_ha_commit_trans",
+    push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                 ER_WARNING_NOT_COMPLETE_ROLLBACK,
+                 ER(ER_WARNING_NOT_COMPLETE_ROLLBACK)););
+
   /*
     We must not commit the normal transaction if a statement
     transaction is pending. Otherwise statement transaction
@@ -1277,6 +1285,7 @@ int ha_commit_one_phase(THD *thd, bool all)
         my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
         error=1;
       }
+      /* Should this be done only if is_real_trans is set ? */
       status_var_increment(thd->status_var.ha_commit_count);
       ha_info_next= ha_info->next();
       ha_info->reset(); /* keep it conveniently zero-filled */
@@ -1291,6 +1300,9 @@ int ha_commit_one_phase(THD *thd, bool all)
 #endif
     }
   }
+#ifdef WITH_ARIA_STORAGE_ENGINE
+    ha_maria::implicit_commit(thd, TRUE);
+#endif
   /* Free resources and perform other cleanup even for 'empty' transactions. */
   if (is_real_trans)
     thd->transaction.cleanup();
@@ -1526,7 +1538,7 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin,
     while ((got= hton->recover(hton, info->list, info->len)) > 0 )
     {
       sql_print_information("Found %d prepared transaction(s) in %s",
-                            got, ha_resolve_storage_engine_name(hton));
+                            got, hton_name(hton)->str);
       for (int i=0; i < got; i ++)
       {
         my_xid x=info->list[i].get_my_xid();
@@ -1593,16 +1605,6 @@ int ha_recover(HASH *commit_list)
   if (info.commit_list)
     sql_print_information("Starting crash recovery...");
 
-#ifndef WILL_BE_DELETED_LATER
-  /*
-    for now, only InnoDB supports 2pc. It means we can always safely
-    rollback all pending transactions, without risking inconsistent data
-  */
-  DBUG_ASSERT(total_ha_2pc == (ulong) opt_bin_log+1); // only InnoDB and binlog
-  tc_heuristic_recover= TC_HEURISTIC_RECOVER_ROLLBACK; // forcing ROLLBACK
-  info.dry_run=FALSE;
-#endif
-
   for (info.len= MAX_XID_LIST_SIZE ; 
        info.list==0 && info.len > MIN_XID_LIST_SIZE; info.len/=2)
   {
@@ -2051,6 +2053,10 @@ int ha_delete_table(THD *thd, handlerton *table_type, const char *path,
 handler *handler::clone(MEM_ROOT *mem_root)
 {
   handler *new_handler= get_new_handler(table->s, mem_root, table->s->db_type());
+
+  if (!new_handler)
+    return NULL;
+
   /*
     Allocate handler->ref here because otherwise ha_open will allocate it
     on this->table->mem_root and we will not be able to reclaim that memory 
@@ -2058,19 +2064,29 @@ handler *handler::clone(MEM_ROOT *mem_root)
   */
   if (!(new_handler->ref= (uchar*) alloc_root(mem_root, ALIGN_SIZE(ref_length)*2)))
     return NULL;
-  if (new_handler && !new_handler->ha_open(table,
-                                           table->s->normalized_path.str,
-                                           table->db_stat,
-                                           HA_OPEN_IGNORE_IF_LOCKED))
-    return new_handler;
-  return NULL;
+  if (new_handler->ha_open(table,
+                           table->s->normalized_path.str,
+                           table->db_stat,
+                           HA_OPEN_IGNORE_IF_LOCKED))
+    return NULL;
+  new_handler->cloned= 1;                      // Marker for debugging
+  return new_handler;
 }
 
-
-
-void handler::ha_statistic_increment(ulong SSV::*offset) const
+double handler::keyread_time(uint index, uint ranges, ha_rows rows)
 {
-  status_var_increment(table->in_use->status_var.*offset);
+  /*
+    It is assumed that we will read trough the whole key range and that all
+    key blocks are half full (normally things are much better). It is also
+    assumed that each time we read the next key from the index, the handler
+    performs a random seek, thus the cost is proportional to the number of
+    blocks read. This model does not take into account clustered indexes -
+    engines that support that (e.g. InnoDB) may want to overwrite this method.
+  */
+  double keys_per_block= (stats.block_size/2.0/
+                          (table->key_info[index].key_length +
+                           ref_length) + 1);
+  return (rows + keys_per_block - 1)/ keys_per_block;
 }
 
 void **handler::ha_data(THD *thd) const
@@ -2141,10 +2157,24 @@ int handler::ha_open(TABLE *table_arg, const char *name, int mode,
       dup_ref=ref+ALIGN_SIZE(ref_length);
     cached_table_flags= table_flags();
   }
+  rows_read= rows_changed= 0;
+  memset(index_rows_read, 0, sizeof(index_rows_read));
   DBUG_RETURN(error);
 }
 
 
+/* Initialize handler for random reading, with error handling */
+
+int handler::ha_rnd_init_with_error(bool scan)
+{
+  int error;
+  if (!(error= ha_rnd_init(scan)))
+    return 0;
+  table->file->print_error(error, MYF(0));
+  return error;
+}
+
+
 /**
   Read first row (only) from a table.
 
@@ -2156,8 +2186,6 @@ int handler::read_first_row(uchar * buf, uint primary_key)
   register int error;
   DBUG_ENTER("handler::read_first_row");
 
-  ha_statistic_increment(&SSV::ha_read_first_count);
-
   /*
     If there is very few deleted rows in the table, find the first row by
     scanning the table.
@@ -2166,15 +2194,17 @@ int handler::read_first_row(uchar * buf, uint primary_key)
   if (stats.deleted < 10 || primary_key >= MAX_KEY ||
       !(index_flags(primary_key, 0, 0) & HA_READ_ORDER))
   {
-    (void) ha_rnd_init(1);
-    while ((error= rnd_next(buf)) == HA_ERR_RECORD_DELETED) ;
-    (void) ha_rnd_end();
+    if ((!(error= ha_rnd_init(1))))
+    {
+      while ((error= ha_rnd_next(buf)) == HA_ERR_RECORD_DELETED) ;
+      (void) ha_rnd_end();
+    }
   }
   else
   {
     /* Find the first row through the primary key */
-    (void) ha_index_init(primary_key, 0);
-    error=index_first(buf);
+    if (!(error = ha_index_init(primary_key, 0)))
+      error= ha_index_first(buf);
     (void) ha_index_end();
   }
   DBUG_RETURN(error);
@@ -2350,7 +2380,7 @@ int handler::update_auto_increment()
 
   if ((nr= table->next_number_field->val_int()) != 0 ||
       (table->auto_increment_field_not_null &&
-      thd->variables.sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO))
+       thd->variables.sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO))
   {
     /*
       Update next_insert_id if we had already generated a value in this
@@ -2545,10 +2575,10 @@ void handler::get_auto_increment(ulonglong offset, ulonglong increment,
   table->mark_columns_used_by_index_no_reset(table->s->next_number_index,
                                         table->read_set);
   column_bitmaps_signal();
-  index_init(table->s->next_number_index, 1);
+  ha_index_init(table->s->next_number_index, 1);
   if (table->s->next_number_keypart == 0)
   {						// Autoincrement at key-start
-    error=index_last(table->record[1]);
+    error=ha_index_last(table->record[1]);
     /*
       MySQL implicitely assumes such method does locking (as MySQL decides to
       use nr+increment without checking again with the handler, in
@@ -2562,9 +2592,10 @@ void handler::get_auto_increment(ulonglong offset, ulonglong increment,
     key_copy(key, table->record[0],
              table->key_info + table->s->next_number_index,
              table->s->next_number_key_offset);
-    error= index_read_map(table->record[1], key,
-                          make_prev_keypart_map(table->s->next_number_keypart),
-                          HA_READ_PREFIX_LAST);
+    error= ha_index_read_map(table->record[1], key,
+                             make_prev_keypart_map(table->s->
+                                                   next_number_keypart),
+                             HA_READ_PREFIX_LAST);
     /*
       MySQL needs to call us for next row: assume we are inserting ("a",null)
       here, we return 3, and next this statement will want to insert
@@ -2579,7 +2610,7 @@ void handler::get_auto_increment(ulonglong offset, ulonglong increment,
   else
     nr= ((ulonglong) table->next_number_field->
          val_int_offset(table->s->rec_buff_length)+1);
-  index_end();
+  ha_index_end();
   (void) extra(HA_EXTRA_NO_KEYREAD);
   *first_value= nr;
 }
@@ -2640,8 +2671,18 @@ void handler::print_keydup_error(uint key_nr, const char *msg)
     - table->s->path
     - table->alias
 */
+
+#ifndef DBUG_OFF
+#define SET_FATAL_ERROR fatal_error=1
+#else
+#define SET_FATAL_ERROR
+#endif
+
 void handler::print_error(int error, myf errflag)
 {
+#ifndef DBUG_OFF
+  bool fatal_error= 0;
+#endif
   DBUG_ENTER("handler::print_error");
   DBUG_PRINT("enter",("error: %d",error));
 
@@ -2659,6 +2700,13 @@ void handler::print_error(int error, myf errflag)
   case HA_ERR_KEY_NOT_FOUND:
   case HA_ERR_NO_ACTIVE_RECORD:
   case HA_ERR_END_OF_FILE:
+    /*
+      This errors is not not normally fatal (for example for reads). However
+      if you get it during an update or delete, then its fatal.
+      As the user is calling print_error() (which is not done on read), we
+      assume something when wrong with the update or delete.
+    */
+    SET_FATAL_ERROR;
     textno=ER_KEY_NOT_FOUND;
     break;
   case HA_ERR_WRONG_MRG_TABLE_DEF:
@@ -2666,11 +2714,14 @@ void handler::print_error(int error, myf errflag)
     break;
   case HA_ERR_FOUND_DUPP_KEY:
   {
-    uint key_nr=get_dup_key(error);
-    if ((int) key_nr >= 0)
+    if (table)
     {
-      print_keydup_error(key_nr, ER(ER_DUP_ENTRY_WITH_KEY_NAME));
-      DBUG_VOID_RETURN;
+      uint key_nr=get_dup_key(error);
+      if ((int) key_nr >= 0)
+      {
+        print_keydup_error(key_nr, ER(ER_DUP_ENTRY_WITH_KEY_NAME));
+        DBUG_VOID_RETURN;
+      }
     }
     textno=ER_DUP_KEY;
     break;
@@ -2707,21 +2758,26 @@ void handler::print_error(int error, myf errflag)
     textno=ER_DUP_UNIQUE;
     break;
   case HA_ERR_RECORD_CHANGED:
+    SET_FATAL_ERROR;
     textno=ER_CHECKREAD;
     break;
   case HA_ERR_CRASHED:
+    SET_FATAL_ERROR;
     textno=ER_NOT_KEYFILE;
     break;
   case HA_ERR_WRONG_IN_RECORD:
+    SET_FATAL_ERROR;
     textno= ER_CRASHED_ON_USAGE;
     break;
   case HA_ERR_CRASHED_ON_USAGE:
+    SET_FATAL_ERROR;
     textno=ER_CRASHED_ON_USAGE;
     break;
   case HA_ERR_NOT_A_TABLE:
     textno= error;
     break;
   case HA_ERR_CRASHED_ON_REPAIR:
+    SET_FATAL_ERROR;
     textno=ER_CRASHED_ON_REPAIR;
     break;
   case HA_ERR_OUT_OF_MEM:
@@ -2823,7 +2879,10 @@ void handler::print_error(int error, myf errflag)
 	if (temporary)
 	  my_error(ER_GET_TEMPORARY_ERRMSG, MYF(0), error, str.ptr(), engine);
 	else
+        {
+          SET_FATAL_ERROR;
 	  my_error(ER_GET_ERRMSG, MYF(0), error, str.ptr(), engine);
+        }
       }
       else
 	my_error(ER_GET_ERRNO,errflag,error);
@@ -2831,6 +2890,7 @@ void handler::print_error(int error, myf errflag)
     }
   }
   my_error(textno, errflag, table_share->table_name.str, error);
+  DBUG_ASSERT(!fatal_error || !debug_assert_if_crashed_table);
   DBUG_VOID_RETURN;
 }
 
@@ -3133,11 +3193,14 @@ int handler::ha_check(THD *thd, HA_CHECK_OPT *check_opt)
   if it is started.
 */
 
-inline
 void
-handler::mark_trx_read_write()
+handler::mark_trx_read_write_part2()
 {
   Ha_trx_info *ha_info= &ha_thd()->ha_data[ht->slot].ha_info[0];
+
+  /* Don't call this function again for this statement */
+  mark_trx_done= TRUE;
+
   /*
     When a storage engine method is called, the transaction must
     have been started, unless it's a DDL call, for which the
@@ -3488,7 +3551,7 @@ int ha_enable_transaction(THD *thd, bool on)
 int handler::index_next_same(uchar *buf, const uchar *key, uint keylen)
 {
   int error;
-  DBUG_ENTER("index_next_same");
+  DBUG_ENTER("handler::index_next_same");
   if (!(error=index_next(buf)))
   {
     my_ptrdiff_t ptrdiff= buf - table->record[0];
@@ -3533,6 +3596,7 @@ int handler::index_next_same(uchar *buf, const uchar *key, uint keylen)
         key_part->field->move_field_offset(-ptrdiff);
     }
   }
+  DBUG_PRINT("return",("%i", error));
   DBUG_RETURN(error);
 }
 
@@ -3552,12 +3616,128 @@ void handler::get_dynamic_partition_info(PARTITION_STATS *stat_info,
   stat_info->update_time=          stats.update_time;
   stat_info->check_time=           stats.check_time;
   stat_info->check_sum=            0;
-  if (table_flags() & (ulong) HA_HAS_CHECKSUM)
+  if (table_flags() & (HA_HAS_OLD_CHECKSUM | HA_HAS_OLD_CHECKSUM))
     stat_info->check_sum= checksum();
   return;
 }
 
 
+/*
+  Updates the global table stats with the TABLE this handler represents
+*/
+
+void handler::update_global_table_stats()
+{
+  TABLE_STATS * table_stats;
+
+  status_var_add(table->in_use->status_var.rows_read, rows_read);
+
+  if (!table->in_use->userstat_running)
+  {
+    rows_read= rows_changed= 0;
+    return;
+  }
+
+  if (rows_read + rows_changed == 0)
+    return;                                     // Nothing to update.
+
+  DBUG_ASSERT(table->s && table->s->table_cache_key.str);
+
+  pthread_mutex_lock(&LOCK_global_table_stats);
+  /* Gets the global table stats, creating one if necessary. */
+  if (!(table_stats= (TABLE_STATS*)
+        hash_search(&global_table_stats,
+                    (uchar*) table->s->table_cache_key.str,
+                    table->s->table_cache_key.length)))
+  {
+    if (!(table_stats = ((TABLE_STATS*)
+                         my_malloc(sizeof(TABLE_STATS),
+                                   MYF(MY_WME | MY_ZEROFILL)))))
+    {
+      /* Out of memory error already given */
+      goto end;
+    }
+    memcpy(table_stats->table, table->s->table_cache_key.str,
+           table->s->table_cache_key.length);
+    table_stats->table_name_length= table->s->table_cache_key.length;
+    table_stats->engine_type= ht->db_type;
+    /* No need to set variables to 0, as we use MY_ZEROFILL above */
+
+    if (my_hash_insert(&global_table_stats, (uchar*) table_stats))
+    {
+      /* Out of memory error is already given */
+      my_free(table_stats, 0);
+      goto end;
+    }
+  }
+  // Updates the global table stats.
+  table_stats->rows_read+=    rows_read;
+  table_stats->rows_changed+= rows_changed;
+  table_stats->rows_changed_x_indexes+= (rows_changed *
+                                         (table->s->keys ? table->s->keys :
+                                          1));
+  rows_read= rows_changed= 0;
+end:
+  pthread_mutex_unlock(&LOCK_global_table_stats);
+}
+
+
+/*
+  Updates the global index stats with this handler's accumulated index reads.
+*/
+
+void handler::update_global_index_stats()
+{
+  DBUG_ASSERT(table->s);
+
+  if (!table->in_use->userstat_running)
+  {
+    /* Reset all index read values */
+    bzero(index_rows_read, sizeof(index_rows_read[0]) * table->s->keys);
+    return;
+  }
+
+  for (uint index = 0; index < table->s->keys; index++)
+  {
+    if (index_rows_read[index])
+    {
+      INDEX_STATS* index_stats;
+      uint key_length;
+      KEY *key_info = &table->key_info[index];  // Rows were read using this
+
+      DBUG_ASSERT(key_info->cache_name);
+      if (!key_info->cache_name)
+        continue;
+      key_length= table->s->table_cache_key.length + key_info->name_length + 1;
+      pthread_mutex_lock(&LOCK_global_index_stats);
+      // Gets the global index stats, creating one if necessary.
+      if (!(index_stats= (INDEX_STATS*) hash_search(&global_index_stats,
+                                                    key_info->cache_name,
+                                                    key_length)))
+      {
+        if (!(index_stats = ((INDEX_STATS*)
+                             my_malloc(sizeof(INDEX_STATS),
+                                       MYF(MY_WME | MY_ZEROFILL)))))
+          goto end;                             // Error is already given
+
+        memcpy(index_stats->index, key_info->cache_name, key_length);
+        index_stats->index_name_length= key_length;
+        if (my_hash_insert(&global_index_stats, (uchar*) index_stats))
+        {
+          my_free(index_stats, 0);
+          goto end;
+        }
+      }
+      /* Updates the global index stats. */
+      index_stats->rows_read+= index_rows_read[index];
+      index_rows_read[index]= 0;
+end:
+      pthread_mutex_unlock(&LOCK_global_index_stats);
+    }
+  }
+}
+
+
 /****************************************************************************
 ** Some general functions that isn't in the handler class
 ****************************************************************************/
@@ -3728,11 +3908,13 @@ int ha_init_key_cache(const char *name, KEY_CACHE *key_cache)
     uint tmp_block_size= (uint) key_cache->param_block_size;
     uint division_limit= key_cache->param_division_limit;
     uint age_threshold=  key_cache->param_age_threshold;
+    uint partitions= key_cache->param_partitions;
     mysql_mutex_unlock(&LOCK_global_system_variables);
     DBUG_RETURN(!init_key_cache(key_cache,
 				tmp_block_size,
 				tmp_buff_size,
-				division_limit, age_threshold));
+				division_limit, age_threshold,
+                                partitions));
   }
   DBUG_RETURN(0);
 }
@@ -3762,10 +3944,12 @@ int ha_resize_key_cache(KEY_CACHE *key_cache)
 
 
 /**
-  Change parameters for key cache (like size)
+  Change parameters for key cache (like division_limit)
 */
 int ha_change_key_cache_param(KEY_CACHE *key_cache)
 {
+  DBUG_ENTER("ha_change_key_cache_param");
+
   if (key_cache->key_cache_inited)
   {
     mysql_mutex_lock(&LOCK_global_system_variables);
@@ -3774,9 +3958,35 @@ int ha_change_key_cache_param(KEY_CACHE *key_cache)
     mysql_mutex_unlock(&LOCK_global_system_variables);
     change_key_cache_param(key_cache, division_limit, age_threshold);
   }
-  return 0;
+  DBUG_RETURN(0);
 }
 
+
+/**
+  Repartition key cache 
+*/
+int ha_repartition_key_cache(KEY_CACHE *key_cache)
+{
+  DBUG_ENTER("ha_repartition_key_cache");
+
+  if (key_cache->key_cache_inited)
+  {
+    pthread_mutex_lock(&LOCK_global_system_variables);
+    size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
+    long tmp_block_size= (long) key_cache->param_block_size;
+    uint division_limit= key_cache->param_division_limit;
+    uint age_threshold=  key_cache->param_age_threshold;
+    uint partitions= key_cache->param_partitions;
+    pthread_mutex_unlock(&LOCK_global_system_variables);
+    DBUG_RETURN(!repartition_key_cache(key_cache, tmp_block_size,
+				       tmp_buff_size,
+				       division_limit, age_threshold,
+                                       partitions));
+  }
+  DBUG_RETURN(0);
+}
+
+
 /**
   Move all tables from one key cache to another one.
 */
@@ -3877,7 +4087,8 @@ ha_find_files(THD *thd,const char *db,const char *path,
   int error= 0;
   DBUG_ENTER("ha_find_files");
   DBUG_PRINT("enter", ("db: '%s'  path: '%s'  wild: '%s'  dir: %d", 
-		       db, path, wild ? wild : "NULL", dir));
+		       val_or_null(db), val_or_null(path),
+                       val_or_null(wild), dir));
   st_find_files_args args= {db, path, wild, dir, files};
 
   plugin_foreach(thd, find_files_handlerton,
@@ -4069,133 +4280,6 @@ void ha_binlog_log_query(THD *thd, handlerton *hton,
 }
 #endif
 
-/**
-  Read the first row of a multi-range set.
-
-  @param found_range_p       Returns a pointer to the element in 'ranges' that
-                             corresponds to the returned row.
-  @param ranges              An array of KEY_MULTI_RANGE range descriptions.
-  @param range_count         Number of ranges in 'ranges'.
-  @param sorted	      If result should be sorted per key.
-  @param buffer              A HANDLER_BUFFER for internal handler usage.
-
-  @note
-    - Record is read into table->record[0].
-    - *found_range_p returns a valid value only if read_multi_range_first()
-    returns 0.
-    - Sorting is done within each range. If you want an overall sort, enter
-    'ranges' with sorted ranges.
-
-  @retval
-    0			OK, found a row
-  @retval
-    HA_ERR_END_OF_FILE	No rows in range
-  @retval
-    \#			Error code
-*/
-int handler::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
-                                    KEY_MULTI_RANGE *ranges, uint range_count,
-                                    bool sorted, HANDLER_BUFFER *buffer)
-{
-  int result= HA_ERR_END_OF_FILE;
-  DBUG_ENTER("handler::read_multi_range_first");
-  multi_range_sorted= sorted;
-  multi_range_buffer= buffer;
-
-  table->mark_columns_used_by_index_no_reset(active_index, table->read_set);
-  table->column_bitmaps_set(table->read_set, table->write_set);
-
-  for (multi_range_curr= ranges, multi_range_end= ranges + range_count;
-       multi_range_curr < multi_range_end;
-       multi_range_curr++)
-  {
-    result= read_range_first(multi_range_curr->start_key.keypart_map ?
-                             &multi_range_curr->start_key : 0,
-                             multi_range_curr->end_key.keypart_map ?
-                             &multi_range_curr->end_key : 0,
-                             test(multi_range_curr->range_flag & EQ_RANGE),
-                             multi_range_sorted);
-    if (result != HA_ERR_END_OF_FILE)
-      break;
-  }
-
-  *found_range_p= multi_range_curr;
-  DBUG_PRINT("exit",("result %d", result));
-  DBUG_RETURN(result);
-}
-
-
-/**
-  Read the next row of a multi-range set.
-
-  @param found_range_p       Returns a pointer to the element in 'ranges' that
-                             corresponds to the returned row.
-
-  @note
-    - Record is read into table->record[0].
-    - *found_range_p returns a valid value only if read_multi_range_next()
-    returns 0.
-
-  @retval
-    0			OK, found a row
-  @retval
-    HA_ERR_END_OF_FILE	No (more) rows in range
-  @retval
-    \#			Error code
-*/
-int handler::read_multi_range_next(KEY_MULTI_RANGE **found_range_p)
-{
-  int result;
-  DBUG_ENTER("handler::read_multi_range_next");
-
-  /* We should not be called after the last call returned EOF. */
-  DBUG_ASSERT(multi_range_curr < multi_range_end);
-
-  do
-  {
-    /* Save a call if there can be only one row in range. */
-    if (multi_range_curr->range_flag != (UNIQUE_RANGE | EQ_RANGE))
-    {
-      result= read_range_next();
-
-      /* On success or non-EOF errors jump to the end. */
-      if (result != HA_ERR_END_OF_FILE)
-        break;
-    }
-    else
-    {
-      if (was_semi_consistent_read())
-        goto scan_it_again;
-      /*
-        We need to set this for the last range only, but checking this
-        condition is more expensive than just setting the result code.
-      */
-      result= HA_ERR_END_OF_FILE;
-    }
-
-    multi_range_curr++;
-scan_it_again:
-    /* Try the next range(s) until one matches a record. */
-    for (; multi_range_curr < multi_range_end; multi_range_curr++)
-    {
-      result= read_range_first(multi_range_curr->start_key.keypart_map ?
-                               &multi_range_curr->start_key : 0,
-                               multi_range_curr->end_key.keypart_map ?
-                               &multi_range_curr->end_key : 0,
-                               test(multi_range_curr->range_flag & EQ_RANGE),
-                               multi_range_sorted);
-      if (result != HA_ERR_END_OF_FILE)
-        break;
-    }
-  }
-  while ((result == HA_ERR_END_OF_FILE) &&
-         (multi_range_curr < multi_range_end));
-
-  *found_range_p= multi_range_curr;
-  DBUG_PRINT("exit",("handler::read_multi_range_next: result %d", result));
-  DBUG_RETURN(result);
-}
-
 
 /**
   Read first row between two ranges.
@@ -4235,17 +4319,16 @@ int handler::read_range_first(const key_range *start_key,
   range_key_part= table->key_info[active_index].key_part;
 
   if (!start_key)			// Read first record
-    result= index_first(table->record[0]);
+    result= ha_index_first(table->record[0]);
   else
-    result= index_read_map(table->record[0],
-                           start_key->key,
-                           start_key->keypart_map,
-                           start_key->flag);
+    result= ha_index_read_map(table->record[0],
+                              start_key->key,
+                              start_key->keypart_map,
+                              start_key->flag);
   if (result)
     DBUG_RETURN((result == HA_ERR_KEY_NOT_FOUND) 
 		? HA_ERR_END_OF_FILE
 		: result);
-
   DBUG_RETURN (compare_key(end_range) <= 0 ? 0 : HA_ERR_END_OF_FILE);
 }
 
@@ -4271,11 +4354,11 @@ int handler::read_range_next()
   if (eq_range)
   {
     /* We trust that index_next_same always gives a row in range */
-    DBUG_RETURN(index_next_same(table->record[0],
-                                end_range->key,
-                                end_range->length));
+    DBUG_RETURN(ha_index_next_same(table->record[0],
+                                   end_range->key,
+                                   end_range->length));
   }
-  result= index_next(table->record[0]);
+  result= ha_index_next(table->record[0]);
   if (result)
     DBUG_RETURN(result);
   DBUG_RETURN(compare_key(end_range) <= 0 ? 0 : HA_ERR_END_OF_FILE);
@@ -4300,7 +4383,7 @@ int handler::read_range_next()
 int handler::compare_key(key_range *range)
 {
   int cmp;
-  if (!range)
+  if (!range || in_range_check_pushed_down)
     return 0;					// No max range
   cmp= key_cmp(range_key_part, range->key, range->length);
   if (!cmp)
@@ -4309,11 +4392,30 @@ int handler::compare_key(key_range *range)
 }
 
 
+/*
+  Same as compare_key() but doesn't check have in_range_check_pushed_down.
+  This is used by index condition pushdown implementation.
+*/
+
+int handler::compare_key2(key_range *range)
+{
+  int cmp;
+  if (!range)
+    return 0;					// no max range
+  cmp= key_cmp(range_key_part, range->key, range->length);
+  if (!cmp)
+    cmp= key_compare_result_on_equal;
+  return cmp;
+}
+
+
 int handler::index_read_idx_map(uchar * buf, uint index, const uchar * key,
                                 key_part_map keypart_map,
                                 enum ha_rkey_function find_flag)
 {
   int error, error1;
+  LINT_INIT(error1);
+
   error= index_init(index, 0);
   if (!error)
   {
@@ -4443,7 +4545,7 @@ bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat)
   {
     if (db_type->state != SHOW_OPTION_YES)
     {
-      const LEX_STRING *name=&hton2plugin[db_type->slot]->name;
+      const LEX_STRING *name= hton_name(db_type);
       result= stat_print(thd, name->str, name->length,
                          "", 0, "DISABLED", 8) ? 1 : 0;
     }
@@ -4454,6 +4556,8 @@ bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat)
 
   if (!result)
     my_eof(thd);
+  else if (!thd->is_error())
+    my_error(ER_GET_ERRNO, MYF(0), 0);
   return result;
 }
 
@@ -4714,12 +4818,13 @@ int handler::ha_write_row(uchar *buf)
 
   MYSQL_INSERT_ROW_START(table_share->db.str, table_share->table_name.str);
   mark_trx_read_write();
+  increment_statistics(&SSV::ha_write_count);
 
   error= write_row(buf);
   MYSQL_INSERT_ROW_DONE(error);
   if (unlikely(error))
     DBUG_RETURN(error);
-
+  rows_changed++;
   if (unlikely(error= binlog_log_row(table, 0, buf, log_func)))
     DBUG_RETURN(error); /* purecov: inspected */
   DBUG_RETURN(0);
@@ -4739,11 +4844,13 @@ int handler::ha_update_row(const uchar *old_data, uchar *new_data)
 
   MYSQL_UPDATE_ROW_START(table_share->db.str, table_share->table_name.str);
   mark_trx_read_write();
+  increment_statistics(&SSV::ha_update_count);
 
   error= update_row(old_data, new_data);
   MYSQL_UPDATE_ROW_DONE(error);
   if (unlikely(error))
     return error;
+  rows_changed++;
   if (unlikely(error= binlog_log_row(table, old_data, new_data, log_func)))
     return error;
   return 0;
@@ -4756,11 +4863,13 @@ int handler::ha_delete_row(const uchar *buf)
 
   MYSQL_DELETE_ROW_START(table_share->db.str, table_share->table_name.str);
   mark_trx_read_write();
+  increment_statistics(&SSV::ha_delete_count);
 
   error= delete_row(buf);
   MYSQL_DELETE_ROW_DONE(error);
   if (unlikely(error))
     return error;
+  rows_changed++;
   if (unlikely(error= binlog_log_row(table, buf, 0, log_func)))
     return error;
   return 0;
diff --git a/sql/handler.h b/sql/handler.h
index b1d64a1114b..bbdc38eba4d 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -1,6 +1,6 @@
 #ifndef HANDLER_INCLUDED
 #define HANDLER_INCLUDED
-
+#error don't forget to merge mysql_priv.h!
 /* Copyright 2000-2008 MySQL AB, 2008-2009 Sun Microsystems, Inc.
 
    This program is free software; you can redistribute it and/or modify
@@ -33,6 +33,10 @@
 #include <ft_global.h>
 #include <keycache.h>
 
+#if MAX_KEY > 128
+#error MAX_KEY is too large.  Values up to 128 are supported.
+#endif
+
 // the following is for checking tables
 
 #define HA_ADMIN_ALREADY_DONE	  1
@@ -113,7 +117,8 @@
 #define HA_CAN_FULLTEXT        (1 << 21)
 #define HA_CAN_SQL_HANDLER     (1 << 22)
 #define HA_NO_AUTO_INCREMENT   (1 << 23)
-#define HA_HAS_CHECKSUM        (1 << 24)
+/* Has automatic checksums and uses the old checksum format */
+#define HA_HAS_OLD_CHECKSUM    (1 << 24)
 /* Table data are stored in separate files (for lower_case_table_names) */
 #define HA_FILE_BASED	       (1 << 26)
 #define HA_NO_VARCHAR	       (1 << 27)
@@ -153,6 +158,10 @@
     ordered.
 */
 #define HA_DUPLICATE_KEY_NOT_IN_ORDER    (LL(1) << 36)
+/* Has automatic checksums and uses the new checksum format */
+#define HA_HAS_NEW_CHECKSUM    (LL(1) << 37)
+
+#define HA_MRR_CANT_SORT       (LL(1) << 38)
 
 /*
   Set of all binlog flags. Currently only contain the capabilities
@@ -169,6 +178,15 @@
 #define HA_KEYREAD_ONLY         64	/* Support HA_EXTRA_KEYREAD */
 
 /*
+  Index scan will not return records in rowid order. Not guaranteed to be
+  set for unordered (e.g. HASH) indexes.
+*/
+#define HA_KEY_SCAN_NOT_ROR     128 
+#define HA_DO_INDEX_COND_PUSHDOWN  256 /* Supports Index Condition Pushdown */
+
+
+
+/*
   bits in alter_table_flags:
 */
 /*
@@ -221,12 +239,6 @@
 #define HA_FAST_CHANGE_PARTITION                (1L << 13)
 #define HA_PARTITION_ONE_PHASE                  (1L << 14)
 
-/*
-  Index scan will not return records in rowid order. Not guaranteed to be
-  set for unordered (e.g. HASH) indexes.
-*/
-#define HA_KEY_SCAN_NOT_ROR     128 
-
 /* operations for disable/enable indexes */
 #define HA_KEY_SWITCH_NONUNIQ      0
 #define HA_KEY_SWITCH_ALL          1
@@ -273,8 +285,6 @@
 #define HA_LEX_CREATE_TMP_TABLE	1
 #define HA_LEX_CREATE_IF_NOT_EXISTS 2
 #define HA_LEX_CREATE_TABLE_LIKE 4
-#define HA_OPTION_NO_CHECKSUM	(1L << 17)
-#define HA_OPTION_NO_DELAY_KEY_WRITE (1L << 18)
 #define HA_MAX_REC_LENGTH	65535
 
 /* Table caching type */
@@ -315,6 +325,11 @@ enum legacy_db_type
   DB_TYPE_FIRST_DYNAMIC=42,
   DB_TYPE_DEFAULT=127 // Must be last
 };
+/*
+  Better name for DB_TYPE_UNKNOWN. Should be used for engines that do not have
+  a hard-coded type value here.
+ */
+#define DB_TYPE_AUTOASSIGN DB_TYPE_UNKNOWN
 
 enum row_type { ROW_TYPE_NOT_USED=-1, ROW_TYPE_DEFAULT, ROW_TYPE_FIXED,
 		ROW_TYPE_DYNAMIC, ROW_TYPE_COMPRESSED,
@@ -363,9 +378,8 @@ enum enum_binlog_command {
 #define HA_CREATE_USED_PASSWORD         (1L << 17)
 #define HA_CREATE_USED_CONNECTION       (1L << 18)
 #define HA_CREATE_USED_KEY_BLOCK_SIZE   (1L << 19)
-/** Unused. Reserved for future versions. */
+/* The following two are used by Maria engine: */
 #define HA_CREATE_USED_TRANSACTIONAL    (1L << 20)
-/** Unused. Reserved for future versions. */
 #define HA_CREATE_USED_PAGE_CHECKSUM    (1L << 21)
 
 typedef ulonglong my_xid; // this line is the same as in log_event.h
@@ -546,6 +560,7 @@ struct TABLE;
 enum enum_schema_tables
 {
   SCH_CHARSETS= 0,
+  SCH_CLIENT_STATS,
   SCH_COLLATIONS,
   SCH_COLLATION_CHARACTER_SET_APPLICABILITY,
   SCH_COLUMNS,
@@ -555,15 +570,17 @@ enum enum_schema_tables
   SCH_FILES,
   SCH_GLOBAL_STATUS,
   SCH_GLOBAL_VARIABLES,
+  SCH_INDEX_STATS,
+  SCH_KEY_CACHES,
   SCH_KEY_COLUMN_USAGE,
   SCH_OPEN_TABLES,
   SCH_PARAMETERS,
   SCH_PARTITIONS,
   SCH_PLUGINS,
+  SCH_PROCEDURES,
   SCH_PROCESSLIST,
   SCH_PROFILES,
   SCH_REFERENTIAL_CONSTRAINTS,
-  SCH_PROCEDURES,
   SCH_SCHEMATA,
   SCH_SCHEMA_PRIVILEGES,
   SCH_SESSION_STATUS,
@@ -575,8 +592,10 @@ enum enum_schema_tables
   SCH_TABLE_CONSTRAINTS,
   SCH_TABLE_NAMES,
   SCH_TABLE_PRIVILEGES,
+  SCH_TABLE_STATS,
   SCH_TRIGGERS,
   SCH_USER_PRIVILEGES,
+  SCH_USER_STATS,
   SCH_VARIABLES,
   SCH_VIEWS
 };
@@ -615,6 +634,103 @@ struct handler_log_file_data {
   enum log_status status;
 };
 
+/*
+  Definitions for engine-specific table/field/index options in the CREATE TABLE.
+
+  Options are declared with HA_*OPTION_* macros (HA_TOPTION_NUMBER,
+  HA_FOPTION_ENUM, HA_IOPTION_STRING, etc).
+
+  Every macros takes the option name, and the name of the underlying field of
+  the appropriate C structure. The "appropriate C structure" is
+  ha_table_option_struct for table level options,
+  ha_field_option_struct for field level options,
+  ha_index_option_struct for key level options. The engine either
+  defines a structure of this name, or uses #define's to map
+  these "appropriate" names to the actual structure type name.
+
+  ULL options use a ulonglong as the backing store.
+  HA_*OPTION_NUMBER() takes the option name, the structure field name,
+  the default value for the option, min, max, and blk_siz values.
+
+  STRING options use a char* as a backing store.
+  HA_*OPTION_STRING takes the option name and the structure field name.
+  The default value will be 0.
+
+  ENUM options use a uint as a backing store (not enum!!!).
+  HA_*OPTION_ENUM takes the option name, the structure field name,
+  the default value for the option as a number, and a string with the
+  permitted values for this enum - one string with comma separated values,
+  for example: "gzip,bzip2,lzma"
+
+  BOOL options use a bool as a backing store.
+  HA_*OPTION_BOOL takes the option name, the structure field name,
+  and the default value for the option.
+  From the SQL, BOOL options accept YES/NO, ON/OFF, and 1/0.
+
+  The name of the option is limited to 255 bytes,
+  the value (for string options) - to the 32767 bytes.
+
+  See ha_example.cc for an example.
+*/
+enum ha_option_type { HA_OPTION_TYPE_ULL,    /* unsigned long long */
+                      HA_OPTION_TYPE_STRING, /* char * */
+                      HA_OPTION_TYPE_ENUM,   /* uint */
+                      HA_OPTION_TYPE_BOOL};  /* bool */
+
+#define HA_xOPTION_NUMBER(name, struc, field, def, min, max, blk_siz)   \
+  { HA_OPTION_TYPE_ULL, name, sizeof(name)-1,                        \
+    offsetof(struc, field), def, min, max, blk_siz, 0 }
+#define HA_xOPTION_STRING(name, struc, field)                        \
+  { HA_OPTION_TYPE_STRING, name, sizeof(name)-1,                     \
+    offsetof(struc, field), 0, 0, 0, 0, 0 }
+#define HA_xOPTION_ENUM(name, struc, field, values, def)             \
+  { HA_OPTION_TYPE_ENUM, name, sizeof(name)-1,                       \
+    offsetof(struc, field), def, 0,                                  \
+    sizeof(values)-1, 0, values }
+#define HA_xOPTION_BOOL(name, struc, field, def)                     \
+  { HA_OPTION_TYPE_BOOL, name, sizeof(name)-1,                       \
+    offsetof(struc, field), def, 0, 1, 0, 0 }
+#define HA_xOPTION_END { HA_OPTION_TYPE_ULL, 0, 0, 0, 0, 0, 0, 0, 0 }
+
+#define HA_TOPTION_NUMBER(name, field, def, min, max, blk_siz)          \
+  HA_xOPTION_NUMBER(name, ha_table_option_struct, field, def, min, max, blk_siz)
+#define HA_TOPTION_STRING(name, field)                               \
+  HA_xOPTION_STRING(name, ha_table_option_struct, field)
+#define HA_TOPTION_ENUM(name, field, values, def)                    \
+  HA_xOPTION_ENUM(name, ha_table_option_struct, field, values, def)
+#define HA_TOPTION_BOOL(name, field, def)                            \
+  HA_xOPTION_BOOL(name, ha_table_option_struct, field, def)
+#define HA_TOPTION_END HA_xOPTION_END
+
+#define HA_FOPTION_NUMBER(name, field, def, min, max, blk_siz)          \
+  HA_xOPTION_NUMBER(name, ha_field_option_struct, field, def, min, max, blk_siz)
+#define HA_FOPTION_STRING(name, field)                               \
+  HA_xOPTION_STRING(name, ha_field_option_struct, field)
+#define HA_FOPTION_ENUM(name, field, values, def)                    \
+  HA_xOPTION_ENUM(name, ha_field_option_struct, field, values, def)
+#define HA_FOPTION_BOOL(name, field, def)                            \
+  HA_xOPTION_BOOL(name, ha_field_option_struct, field, def)
+#define HA_FOPTION_END HA_xOPTION_END
+
+#define HA_IOPTION_NUMBER(name, field, def, min, max, blk_siz)          \
+  HA_xOPTION_NUMBER(name, ha_index_option_struct, field, def, min, max, blk_siz)
+#define HA_IOPTION_STRING(name, field)                               \
+  HA_xOPTION_STRING(name, ha_index_option_struct, field)
+#define HA_IOPTION_ENUM(name, field, values, def)                    \
+  HA_xOPTION_ENUM(name, ha_index_option_struct, field, values, def)
+#define HA_IOPTION_BOOL(name, field, values, def)                    \
+  HA_xOPTION_BOOL(name, ha_index_option_struct, field, values, def)
+#define HA_IOPTION_END HA_xOPTION_END
+
+typedef struct st_ha_create_table_option {
+  enum ha_option_type type;
+  const char *name;
+  size_t name_length;
+  ptrdiff_t offset;
+  ulonglong def_value;
+  ulonglong min_value, max_value, block_size;
+  const char *values;
+} ha_create_table_option;
 
 enum handler_iterator_type
 {
@@ -676,8 +792,9 @@ struct handlerton
   SHOW_COMP_OPTION state;
 
   /*
-    Historical number used for frm file to determine the correct storage engine.
-    This is going away and new engines will just use "name" for this.
+    Historical number used for frm file to determine the correct
+    storage engine.  This is going away and new engines will just use
+    "name" for this.
   */
   enum legacy_db_type db_type;
   /*
@@ -787,17 +904,28 @@ struct handlerton
    int (*table_exists_in_engine)(handlerton *hton, THD* thd, const char *db,
                                  const char *name);
    uint32 license; /* Flag for Engine License */
-   void *data; /* Location for engines to keep personal structures */
+   /*
+     Optional clauses in the CREATE/ALTER TABLE
+   */
+   ha_create_table_option *table_options; // table level options
+   ha_create_table_option *field_options; // these are specified per field
+   ha_create_table_option *index_options; // these are specified per index
+
 };
 
 
+inline LEX_STRING *hton_name(const handlerton *hton)
+{
+  return &(hton2plugin[hton->slot]->name);
+}
+
+
 /* Possible flags of a handlerton (there can be 32 of them) */
 #define HTON_NO_FLAGS                 0
 #define HTON_CLOSE_CURSORS_AT_COMMIT (1 << 0)
 #define HTON_ALTER_NOT_SUPPORTED     (1 << 1) //Engine does not support alter
 #define HTON_CAN_RECREATE            (1 << 2) //Delete all is used fro truncate
 #define HTON_HIDDEN                  (1 << 3) //Engine does not appear in lists
-#define HTON_FLUSH_AFTER_RENAME      (1 << 4)
 #define HTON_NOT_USER_SELECTABLE     (1 << 5)
 #define HTON_TEMPORARY_NOT_SUPPORTED (1 << 6) //Having temporary tables not supported
 #define HTON_SUPPORT_LOG_TABLES      (1 << 7) //Engine supports log tables
@@ -843,6 +971,7 @@ struct THD_TRANS
 
   void reset() { no_2pc= FALSE; modified_non_trans_table= FALSE; }
   bool is_empty() const { return ha_list == NULL; }
+  THD_TRANS() {}                        /* Remove gcc warning */
 };
 
 
@@ -953,9 +1082,9 @@ typedef struct {
   ulonglong delete_length;
   ha_rows records;
   ulong mean_rec_length;
-  ulong create_time;
-  ulong check_time;
-  ulong update_time;
+  time_t create_time;
+  time_t check_time;
+  time_t update_time;
   ulonglong check_sum;
 } PARTITION_STATS;
 
@@ -968,7 +1097,7 @@ class partition_info;
 struct st_partition_iter;
 #define NOT_A_PARTITION_ID ((uint32)-1)
 
-enum enum_ha_unused { HA_CHOICE_UNDEF, HA_CHOICE_NO, HA_CHOICE_YES };
+enum ha_choice { HA_CHOICE_UNDEF, HA_CHOICE_NO, HA_CHOICE_YES };
 
 typedef struct st_ha_create_information
 {
@@ -1000,11 +1129,16 @@ typedef struct st_ha_create_information
   uint options;				/* OR of HA_CREATE_ options */
   uint merge_insert_method;
   uint extra_size;                      /* length of extra data segment */
-  enum enum_ha_unused unused1;
-  bool frm_only;                        /* 1 if no ha_create_table() */
-  bool varchar;                         /* 1 if table has a VARCHAR */
-  enum ha_storage_media storage_media;  /* DEFAULT, DISK or MEMORY */
-  enum enum_ha_unused unused2;
+  enum ha_choice transactional;
+  bool frm_only;                        ///< 1 if no ha_create_table()
+  bool varchar;                         ///< 1 if table has a VARCHAR
+  enum ha_storage_media storage_media;  ///< DEFAULT, DISK or MEMORY
+  enum ha_choice page_checksum;         ///< If we have page_checksums
+  engine_option_value *option_list;     ///< list of table create options
+  /* the following three are only for ALTER TABLE, check_if_incompatible_data() */
+  void *option_struct;           ///< structure with parsed table options
+  void **fileds_option_struct;   ///< array of field option structures
+  void **indexes_option_struct;  ///< array of index option structures
 } HA_CREATE_INFO;
 
 
@@ -1079,11 +1213,191 @@ typedef struct st_ha_check_opt
   st_ha_check_opt() {}                        /* Remove gcc warning */
   uint flags;       /* isam layer flags (e.g. for myisamchk) */
   uint sql_flags;   /* sql layer flags - for something myisamchk cannot do */
-  KEY_CACHE *key_cache;	/* new key cache when changing key cache */
+  KEY_CACHE *key_cache; /* new key cache when changing key cache */
   void init();
 } HA_CHECK_OPT;
 
 
+/********************************************************************************
+ * MRR
+ ********************************************************************************/
+
+typedef void *range_seq_t;
+
+typedef struct st_range_seq_if
+{
+  /*
+    Initialize the traversal of range sequence
+    
+    SYNOPSIS
+      init()
+        init_params  The seq_init_param parameter 
+        n_ranges     The number of ranges obtained 
+        flags        A combination of HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
+
+    RETURN
+      An opaque value to be used as RANGE_SEQ_IF::next() parameter
+  */
+  range_seq_t (*init)(void *init_params, uint n_ranges, uint flags);
+
+
+  /*
+    Get the next range in the range sequence
+
+    SYNOPSIS
+      next()
+        seq    The value returned by RANGE_SEQ_IF::init()
+        range  OUT Information about the next range
+    
+    RETURN
+      0 - Ok, the range structure filled with info about the next range
+      1 - No more ranges
+  */
+  uint (*next) (range_seq_t seq, KEY_MULTI_RANGE *range);
+
+  /*
+    Check whether range_info orders to skip the next record
+
+    SYNOPSIS
+      skip_record()
+        seq         The value returned by RANGE_SEQ_IF::init()
+        range_info  Information about the next range 
+                    (Ignored if MRR_NO_ASSOCIATION is set)
+        rowid       Rowid of the record to be checked (ignored if set to 0)
+    
+    RETURN
+      1 - Record with this range_info and/or this rowid shall be filtered
+          out from the stream of records returned by multi_range_read_next()
+      0 - The record shall be left in the stream
+  */ 
+  bool (*skip_record) (range_seq_t seq, char *range_info, uchar *rowid);
+
+  /*
+    Check if the record combination matches the index condition
+    SYNOPSIS
+      skip_index_tuple()
+        seq         The value returned by RANGE_SEQ_IF::init()
+        range_info  Information about the next range 
+    
+    RETURN
+      0 - The record combination satisfies the index condition
+      1 - Otherwise
+  */ 
+  bool (*skip_index_tuple) (range_seq_t seq, char *range_info);
+} RANGE_SEQ_IF;
+
+class COST_VECT
+{ 
+public:
+  double io_count;     /* number of I/O                 */
+  double avg_io_cost;  /* cost of an average I/O oper.  */
+  double cpu_cost;     /* cost of operations in CPU     */
+  double mem_cost;     /* cost of used memory           */ 
+  double import_cost;  /* cost of remote operations     */
+  
+  enum { IO_COEFF=1 };
+  enum { CPU_COEFF=1 };
+  enum { MEM_COEFF=1 };
+  enum { IMPORT_COEFF=1 };
+
+  COST_VECT() {}                              // keep gcc happy
+
+  double total_cost() 
+  {
+    return IO_COEFF*io_count*avg_io_cost + CPU_COEFF * cpu_cost +
+           MEM_COEFF*mem_cost + IMPORT_COEFF*import_cost;
+  }
+
+  void zero()
+  {
+    avg_io_cost= 1.0;
+    io_count= cpu_cost= mem_cost= import_cost= 0.0;
+  }
+
+  void multiply(double m)
+  {
+    io_count *= m;
+    cpu_cost *= m;
+    import_cost *= m;
+    /* Don't multiply mem_cost */
+  }
+
+  void add(const COST_VECT* cost)
+  {
+    double io_count_sum= io_count + cost->io_count;
+    add_io(cost->io_count, cost->avg_io_cost);
+    io_count= io_count_sum;
+    cpu_cost += cost->cpu_cost;
+  }
+  void add_io(double add_io_cnt, double add_avg_cost)
+  {
+    double io_count_sum= io_count + add_io_cnt;
+    avg_io_cost= (io_count * avg_io_cost + 
+                  add_io_cnt * add_avg_cost) / io_count_sum;
+    io_count= io_count_sum;
+  }
+
+  /*
+    To be used when we go from old single value-based cost calculations to
+    the new COST_VECT-based.
+  */
+  void convert_from_cost(double cost)
+  {
+    zero();
+    avg_io_cost= 1.0;
+    io_count= cost;
+  }
+};
+
+void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, 
+                         COST_VECT *cost);
+
+/*
+  The below two are not used (and not handled) in this milestone of this WL
+  entry because there seems to be no use for them at this stage of
+  implementation.
+*/
+#define HA_MRR_SINGLE_POINT 1
+#define HA_MRR_FIXED_KEY  2
+
+/* 
+  Indicates that RANGE_SEQ_IF::next(&range) doesn't need to fill in the
+  'range' parameter.
+*/
+#define HA_MRR_NO_ASSOCIATION 4
+
+/* 
+  The MRR user will provide ranges in key order, and MRR implementation
+  must return rows in key order.
+*/
+#define HA_MRR_SORTED 8
+
+/* MRR implementation doesn't have to retrieve full records */
+#define HA_MRR_INDEX_ONLY 16
+
+/* 
+  The passed memory buffer is of maximum possible size, the caller can't
+  assume larger buffer.
+*/
+#define HA_MRR_LIMITS 32
+
+
+/*
+  Flag set <=> default MRR implementation is used
+  (The choice is made by **_info[_const]() function which may set this
+   flag. SQL layer remembers the flag value and then passes it to
+   multi_read_range_init().
+*/
+#define HA_MRR_USE_DEFAULT_IMPL 64
+
+/*
+  Used only as parameter to multi_range_read_info():
+  Flag set <=> the caller guarantees that the bounds of the scanned ranges
+  will not have NULL values.
+*/
+#define HA_MRR_NO_NULL_ENDPOINTS 128
+
+
 
 /*
   This is a buffer area that the handler can use to store rows.
@@ -1094,8 +1408,8 @@ typedef struct st_ha_check_opt
 
 typedef struct st_handler_buffer
 {
-  const uchar *buffer;         /* Buffer one can start using */
-  const uchar *buffer_end;     /* End of buffer */
+  /* const? */uchar *buffer;         /* Buffer one can start using */
+  /* const? */uchar *buffer_end;     /* End of buffer */
   uchar *end_of_used_area;     /* End of area that was used by handler */
 } HANDLER_BUFFER;
 
@@ -1121,16 +1435,21 @@ public:
   ha_rows records;
   ha_rows deleted;			/* Deleted records */
   ulong mean_rec_length;		/* physical reclength */
-  ulong create_time;			/* When table was created */
-  ulong check_time;
-  ulong update_time;
+  time_t create_time;			/* When table was created */
+  time_t check_time;
+  time_t update_time;
   uint block_size;			/* index block size */
 
+  /*
+    number of buffer bytes that native mrr implementation needs,
+  */
+  uint mrr_length_per_rec; 
+
   ha_statistics():
     data_file_length(0), max_data_file_length(0),
     index_file_length(0), delete_length(0), auto_increment_value(0),
     records(0), deleted(0), mean_rec_length(0), create_time(0),
-    check_time(0), update_time(0), block_size(0)
+    check_time(0), update_time(0), block_size(0), mrr_length_per_rec(0)
   {}
 };
 
@@ -1169,11 +1488,18 @@ public:
 
   ha_statistics stats;
 
-  /** The following are for read_multi_range */
-  bool multi_range_sorted;
-  KEY_MULTI_RANGE *multi_range_curr;
-  KEY_MULTI_RANGE *multi_range_end;
-  HANDLER_BUFFER *multi_range_buffer;
+  /** MultiRangeRead-related members: */
+  range_seq_t mrr_iter;    /* Interator to traverse the range sequence */
+  RANGE_SEQ_IF mrr_funcs;  /* Range sequence traversal functions */
+  HANDLER_BUFFER *multi_range_buffer; /* MRR buffer info */
+  uint ranges_in_seq; /* Total number of ranges in the traversed sequence */
+  /* TRUE <=> source MRR ranges and the output are ordered */
+  bool mrr_is_output_sorted;
+
+  /** TRUE <=> we're currently traversing a range in mrr_cur_range. */
+  bool mrr_have_range;
+  /** Current range (the one we're now returning rows from) */
+  KEY_MULTI_RANGE mrr_cur_range;
 
   /** The following are for read_range() */
   key_range save_end_range, *end_range;
@@ -1184,12 +1510,20 @@ public:
   uint errkey;				/* Last dup key */
   uint key_used_on_scan;
   uint active_index;
+  /* 
+    TRUE <=> the engine guarantees that returned records are within the range
+    being scanned.
+  */
+  bool in_range_check_pushed_down;
+
   /** Length of ref (1-8 or the clustered key length) */
   uint ref_length;
   FT_INFO *ft_handler;
   enum {NONE=0, INDEX, RND} inited;
   bool locked;
   bool implicit_emptied;                /* Can be !=0 only if HEAP */
+  bool mark_trx_done;
+  bool cloned;                          /* 1 if this was created with clone */
   const COND *pushed_cond;
   /**
     next_insert_id is the next value which should be inserted into the
@@ -1212,6 +1546,15 @@ public:
     Interval returned by get_auto_increment() and being consumed by the
     inserter.
   */
+  /* Statistics  variables */
+  ulonglong rows_read;
+  ulonglong rows_changed;
+  /* One bigger than needed to avoid to test if key == MAX_KEY */
+  ulonglong index_rows_read[MAX_KEY+1];
+
+  Item *pushed_idx_cond;
+  uint pushed_idx_cond_keyno;  /* The index which the above condition is for */
+
   Discrete_interval auto_inc_interval_for_cur_row;
   /**
      Number of reserved auto-increment intervals. Serves as a heuristic
@@ -1239,13 +1582,18 @@ public:
     :table_share(share_arg), table(0),
     estimation_rows_to_insert(0), ht(ht_arg),
     ref(0), key_used_on_scan(MAX_KEY), active_index(MAX_KEY),
+    in_range_check_pushed_down(FALSE),
     ref_length(sizeof(my_off_t)),
     ft_handler(0), inited(NONE),
-    locked(FALSE), implicit_emptied(0),
+    locked(FALSE), implicit_emptied(0), mark_trx_done(FALSE), cloned(0),
     pushed_cond(0), next_insert_id(0), insert_id_for_cur_row(0),
+    pushed_idx_cond(NULL),
+    pushed_idx_cond_keyno(MAX_KEY),
     auto_inc_intervals_count(0),
     m_psi(NULL)
-    {}
+  {
+    reset_statistics();
+  }
   virtual ~handler(void)
   {
     DBUG_ASSERT(locked == FALSE);
@@ -1267,6 +1615,7 @@ public:
     DBUG_ASSERT(inited==NONE);
     if (!(result= index_init(idx, sorted)))
       inited=INDEX;
+    end_range= NULL;
     DBUG_RETURN(result);
   }
   int ha_index_end()
@@ -1274,9 +1623,12 @@ public:
     DBUG_ENTER("ha_index_end");
     DBUG_ASSERT(inited==INDEX);
     inited=NONE;
+    end_range= NULL;
     DBUG_RETURN(index_end());
   }
-  int ha_rnd_init(bool scan)
+  /* This is called after index_init() if we need to do a index scan */
+  virtual int prepare_index_scan() { return 0; }
+  int ha_rnd_init(bool scan) __attribute__ ((warn_unused_result))
   {
     int result;
     DBUG_ENTER("ha_rnd_init");
@@ -1291,7 +1643,15 @@ public:
     inited=NONE;
     DBUG_RETURN(rnd_end());
   }
+  int ha_rnd_init_with_error(bool scan) __attribute__ ((warn_unused_result));
   int ha_reset();
+  /* Tell handler (not storage engine) this is start of a new statement */
+  void ha_start_of_new_statement()
+  {
+    ft_handler= 0;
+    mark_trx_done= FALSE;
+  }
+
   /* this is necessary in many places, e.g. in HANDLER command */
   int ha_index_or_rnd_end()
   {
@@ -1363,15 +1723,31 @@ public:
   virtual void print_error(int error, myf errflag);
   virtual bool get_error_message(int error, String *buf);
   uint get_dup_key(int error);
+  void reset_statistics()
+  {
+    rows_read= rows_changed= 0;
+    bzero(index_rows_read, sizeof(index_rows_read));
+  }
   virtual void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share)
   {
     table= table_arg;
     table_share= share;
+    reset_statistics();
   }
   virtual double scan_time()
   { return ulonglong2double(stats.data_file_length) / IO_SIZE + 2; }
   virtual double read_time(uint index, uint ranges, ha_rows rows)
   { return rows2double(ranges+rows); }
+
+  /**
+    Calculate cost of 'keyread' scan for given index and number of records.
+
+     @param index    index to read
+     @param ranges   #of ranges to read
+     @param rows     #of records to read
+  */
+  virtual double keyread_time(uint index, uint ranges, ha_rows rows);
+
   virtual const key_map *keys_to_use_for_scanning() { return &key_map_empty; }
   bool has_transactions()
   { return (ha_table_flags() & HA_NO_TRANSACTIONS) == 0; }
@@ -1475,22 +1851,23 @@ public:
   }
   /**
      @brief
-     Positions an index cursor to the index specified in the handle. Fetches the
-     row if available. If the key value is null, begin at the first key of the
-     index.
+     Positions an index cursor to the index specified in the
+     handle. Fetches the row if available. If the key value is null,
+     begin at the first key of the index.
   */
+protected:
   virtual int index_read_map(uchar * buf, const uchar * key,
                              key_part_map keypart_map,
                              enum ha_rkey_function find_flag)
   {
     uint key_len= calculate_key_len(table, active_index, key, keypart_map);
-    return  index_read(buf, key, key_len, find_flag);
+    return index_read(buf, key, key_len, find_flag);
   }
   /**
      @brief
-     Positions an index cursor to the index specified in the handle. Fetches the
-     row if available. If the key value is null, begin at the first key of the
-     index.
+     Positions an index cursor to the index specified in the
+     handle. Fetches the row if available. If the key value is null,
+     begin at the first key of the index.
   */
   virtual int index_read_idx_map(uchar * buf, uint index, const uchar * key,
                                  key_part_map keypart_map,
@@ -1504,30 +1881,46 @@ public:
   virtual int index_last(uchar * buf)
    { return  HA_ERR_WRONG_COMMAND; }
   virtual int index_next_same(uchar *buf, const uchar *key, uint keylen);
-  /**
-     @brief
-     The following functions works like index_read, but it find the last
-     row with the current key value or prefix.
-  */
-  virtual int index_read_last_map(uchar * buf, const uchar * key,
-                                  key_part_map keypart_map)
+  inline void update_index_statistics()
   {
-    uint key_len= calculate_key_len(table, active_index, key, keypart_map);
-    return index_read_last(buf, key, key_len);
+    index_rows_read[active_index]++;
+    rows_read++;
   }
-  virtual int read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
-                                     KEY_MULTI_RANGE *ranges, uint range_count,
-                                     bool sorted, HANDLER_BUFFER *buffer);
-  virtual int read_multi_range_next(KEY_MULTI_RANGE **found_range_p);
+public:
+
+  /* Similar functions like the above, but does statistics counting */
+  inline int ha_index_read_map(uchar * buf, const uchar * key,
+                               key_part_map keypart_map,
+                               enum ha_rkey_function find_flag);
+  inline int ha_index_read_idx_map(uchar * buf, uint index, const uchar * key,
+                                   key_part_map keypart_map,
+                                   enum ha_rkey_function find_flag);
+  inline int ha_index_next(uchar * buf);
+  inline int ha_index_prev(uchar * buf);
+  inline int ha_index_first(uchar * buf);
+  inline int ha_index_last(uchar * buf);
+  inline int ha_index_next_same(uchar *buf, const uchar *key, uint keylen);
+  virtual ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+                                              void *seq_init_param, 
+                                              uint n_ranges, uint *bufsz,
+                                              uint *flags, COST_VECT *cost);
+  virtual ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+                                        uint *bufsz, uint *flags, COST_VECT *cost);
+  virtual int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
+                                    uint n_ranges, uint mode,
+                                    HANDLER_BUFFER *buf);
+  virtual int multi_range_read_next(char **range_info);
   virtual int read_range_first(const key_range *start_key,
                                const key_range *end_key,
                                bool eq_range, bool sorted);
   virtual int read_range_next();
   int compare_key(key_range *range);
+  int compare_key2(key_range *range);
   virtual int ft_init() { return HA_ERR_WRONG_COMMAND; }
   void ft_end() { ft_handler=NULL; }
   virtual FT_INFO *ft_init_ext(uint flags, uint inx,String *key)
     { return NULL; }
+private:
   virtual int ft_read(uchar *buf) { return HA_ERR_WRONG_COMMAND; }
   virtual int rnd_next(uchar *buf)=0;
   virtual int rnd_pos(uchar * buf, uchar *pos)=0;
@@ -1537,20 +1930,33 @@ public:
     It will return the row with the PK given in the record argument.
   */
   virtual int rnd_pos_by_record(uchar *record)
-    {
-      position(record);
-      return rnd_pos(record, ref);
-    }
+  {
+    position(record);
+    return rnd_pos(record, ref);
+  }
   virtual int read_first_row(uchar *buf, uint primary_key);
+public:
+
+  /* Same as above, but with statistics */
+  inline int ha_ft_read(uchar *buf);
+  inline int ha_rnd_next(uchar *buf);
+  inline int ha_rnd_pos(uchar *buf, uchar *pos);
+  inline int ha_rnd_pos_by_record(uchar *buf);
+  inline int ha_read_first_row(uchar *buf, uint primary_key);
+
   /**
-    The following function is only needed for tables that may be temporary
-    tables during joins.
+    The following 3 function is only needed for tables that may be
+    internal temporary tables during joins.
   */
-  virtual int restart_rnd_next(uchar *buf, uchar *pos)
+  virtual int remember_rnd_pos()
+    { return HA_ERR_WRONG_COMMAND; }
+  virtual int restart_rnd_next(uchar *buf)
     { return HA_ERR_WRONG_COMMAND; }
   virtual int rnd_same(uchar *buf, uint inx)
     { return HA_ERR_WRONG_COMMAND; }
-  virtual ha_rows records_in_range(uint inx, key_range *min_key, key_range *max_key)
+
+  virtual ha_rows records_in_range(uint inx, key_range *min_key,
+                                   key_range *max_key)
     { return (ha_rows) 10; }
   /*
     If HA_PRIMARY_KEY_REQUIRED_FOR_POSITION is set, then it sets ref
@@ -1666,7 +2072,8 @@ public:
   */
   virtual const char **bas_ext() const =0;
 
-  virtual int get_default_no_partitions(HA_CREATE_INFO *info) { return 1;}
+  virtual int get_default_no_partitions(HA_CREATE_INFO *create_info)
+  { return 1;}
   virtual void set_auto_partitions(partition_info *part_info) { return; }
   virtual bool get_no_parts(const char *name,
                             uint *no_parts)
@@ -1709,13 +2116,14 @@ public:
   virtual bool is_crashed() const  { return 0; }
   virtual bool auto_repair() const { return 0; }
 
+  void update_global_table_stats();
+  void update_global_index_stats();
 
 #define CHF_CREATE_FLAG 0
 #define CHF_DELETE_FLAG 1
 #define CHF_RENAME_FLAG 2
 #define CHF_INDEX_FLAG  3
 
-
   /**
     @note lock_count() can return > 1 if the table is MERGE or partitioned.
   */
@@ -1826,6 +2234,7 @@ public:
    Pops the top if condition stack, if stack is not empty.
  */
  virtual void cond_pop() { return; };
+ virtual Item *idx_cond_push(uint keyno, Item* idx_cond) { return idx_cond; }
  virtual bool check_if_incompatible_data(HA_CREATE_INFO *create_info,
 					 uint table_changes)
  { return COMPATIBLE_DATA_NO; }
@@ -1843,9 +2252,24 @@ public:
     return 0;
   }
 
+  LEX_STRING *engine_name() { return hton_name(ht); }
+
+  /*
+    @brief
+    Check whether the engine supports virtual columns
+    
+    @retval
+      FALSE   if the engine does not support virtual columns    
+    @retval
+      TRUE    if the engine supports virtual columns
+  */
+  virtual bool check_if_supported_virtual_columns(void) { return FALSE;}
+
 protected:
+  /* deprecated, don't use in new engines */
+  inline void ha_statistic_increment(ulong SSV::*offset) const { }
+
   /* Service methods for use by storage engines. */
-  void ha_statistic_increment(ulong SSV::*offset) const;
   void **ha_data(THD *) const;
   THD *ha_thd(void) const;
 
@@ -1895,10 +2319,18 @@ protected:
     tables.
   */
   virtual int delete_table(const char *name);
+
 private:
   /* Private helpers */
-  inline void mark_trx_read_write();
-private:
+  void mark_trx_read_write_part2();
+  inline void mark_trx_read_write()
+  {
+    if (!mark_trx_done)
+      mark_trx_read_write_part2();
+  }
+  inline void increment_statistics(ulong SSV::*offset) const;
+  inline void decrement_statistics(ulong SSV::*offset) const;
+
   /*
     Low-level primitives for storage engines.  These should be
     overridden by the storage engine class. To call these methods, use
@@ -1985,8 +2417,6 @@ private:
   virtual int index_read(uchar * buf, const uchar * key, uint key_len,
                          enum ha_rkey_function find_flag)
    { return  HA_ERR_WRONG_COMMAND; }
-  virtual int index_read_last(uchar * buf, const uchar * key, uint key_len)
-   { return (my_errno= HA_ERR_WRONG_COMMAND); }
   /**
     This method is similar to update_row, however the handler doesn't need
     to execute the updates at this point in time. The handler can be certain
@@ -2051,8 +2481,17 @@ private:
   { return HA_ERR_WRONG_COMMAND; }
   virtual int rename_partitions(const char *path)
   { return HA_ERR_WRONG_COMMAND; }
+  friend class ha_partition;
+  friend class DsMrr_impl;
+public:
+  /* XXX to be removed, see ha_partition::partition_ht() */
+  virtual handlerton *partition_ht() const
+  { return ht; }
 };
 
+#include "multi_range_read.h"
+
+bool key_uses_partial_cols(TABLE *table, uint keyno);
 
 	/* Some extern variables used with handlers */
 
@@ -2081,7 +2520,7 @@ static inline enum legacy_db_type ha_legacy_type(const handlerton *db_type)
 
 static inline const char *ha_resolve_storage_engine_name(const handlerton *db_type)
 {
-  return db_type == NULL ? "UNKNOWN" : hton2plugin[db_type->slot]->name.str;
+  return db_type == NULL ? "UNKNOWN" : hton_name(db_type)->str;
 }
 
 static inline bool ha_check_storage_engine_flag(const handlerton *db_type, uint32 flag)
@@ -2131,6 +2570,7 @@ int ha_table_exists_in_engine(THD* thd, const char* db, const char* name);
 extern "C" int ha_init_key_cache(const char *name, KEY_CACHE *key_cache);
 int ha_resize_key_cache(KEY_CACHE *key_cache);
 int ha_change_key_cache_param(KEY_CACHE *key_cache);
+int ha_repartition_key_cache(KEY_CACHE *key_cache);
 int ha_change_key_cache(KEY_CACHE *old_key_cache, KEY_CACHE *new_key_cache);
 
 /* report to InnoDB that control passes to the client */
@@ -2187,10 +2627,8 @@ const char *get_canonical_filename(handler *file, const char *path,
                                    char *tmp_path);
 bool mysql_xa_recover(THD *thd);
 
-
 inline const char *table_case_name(HA_CREATE_INFO *info, const char *name)
 {
   return ((lower_case_table_names == 2 && info->alias) ? info->alias : name);
 }
-
-#endif /* HANDLER_INCLUDED */
+#endif
diff --git a/sql/hash_filo.cc b/sql/hash_filo.cc
index d10b7fa4907..9d6e4363efc 100644
--- a/sql/hash_filo.cc
+++ b/sql/hash_filo.cc
@@ -25,3 +25,8 @@
 
 #include "sql_priv.h"
 #include "hash_filo.h"
+
+#ifdef __WIN__
+// Remove linker warning 4221 about empty file
+namespace { char dummy; };
+#endif // __WIN__
diff --git a/sql/hash_filo.h b/sql/hash_filo.h
index 859b4713940..8301f5c8c3b 100644
--- a/sql/hash_filo.h
+++ b/sql/hash_filo.h
@@ -111,7 +111,7 @@ public:
     return entry;
   }
 
-  my_bool add(hash_filo_element *entry)
+  bool add(hash_filo_element *entry)
   {
     if (cache.records == size)
     {
diff --git a/sql/item.cc b/sql/item.cc
index e782e90b874..8dd6d3b3159 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -43,6 +43,9 @@
 
 const String my_null_string("NULL", 4, default_charset_info);
 
+static int save_field_in_field(Field *from, bool *null_value,
+                               Field *to, bool no_conversions);
+
 /****************************************************************************/
 
 /* Hybrid_type_traits {_real} */
@@ -303,7 +306,7 @@ my_decimal *Item::val_decimal_from_string(my_decimal *decimal_value)
   String *res;
 
   if (!(res= val_str(&str_value)))
-    return NULL;
+    return 0;
 
   if (str2my_decimal(E_DEC_FATAL_ERROR & ~E_DEC_BAD_NUM,
                      res->ptr(), res->length(), res->charset(),
@@ -421,15 +424,15 @@ int Item::save_str_value_in_field(Field *field, String *result)
 
 
 Item::Item():
-  rsize(0), name(0), orig_name(0), name_length(0), fixed(0),
-  is_autogenerated_name(TRUE),
+  is_expensive_cache(-1), rsize(0), name(0), orig_name(0), name_length(0),
+  fixed(0), is_autogenerated_name(TRUE),
   collation(&my_charset_bin, DERIVATION_COERCIBLE)
 {
   marker= 0;
   maybe_null=null_value=with_sum_func=unsigned_flag=0;
   decimals= 0; max_length= 0;
   with_subselect= 0;
-  cmp_context= (Item_result)-1;
+  cmp_context= IMPOSSIBLE_RESULT;
 
   /* Put item in free list so that we can free all items at end */
   THD *thd= current_thd;
@@ -458,6 +461,7 @@ Item::Item():
   tables.
 */
 Item::Item(THD *thd, Item *item):
+  is_expensive_cache(-1),
   rsize(0),
   str_value(item->str_value),
   name(item->name),
@@ -472,8 +476,8 @@ Item::Item(THD *thd, Item *item):
   with_sum_func(item->with_sum_func),
   fixed(item->fixed),
   is_autogenerated_name(item->is_autogenerated_name),
-  collation(item->collation),
   with_subselect(item->with_subselect),
+  collation(item->collation),
   cmp_context(item->cmp_context)
 {
   next= thd->free_list;				// Put in free list
@@ -589,6 +593,36 @@ Item* Item::transform(Item_transformer transformer, uchar *arg)
 }
 
 
+/**
+  Create and set up an expression cache for this item
+
+  @param thd             Thread handle
+  @param depends_on      List of the expression parameters
+
+  @details
+  The function creates an expression cache for an item and its parameters
+  specified by the 'depends_on' list. Then the expression cache is placed
+  into a cache wrapper that is returned as the result of the function.
+
+  @returns
+  A pointer to created wrapper item if successful, NULL - otherwise
+*/
+
+Item* Item::set_expr_cache(THD *thd, List<Item *> &depends_on)
+{
+  DBUG_ENTER("Item::set_expr_cache");
+  Item_cache_wrapper *wrapper;
+  if ((wrapper= new Item_cache_wrapper(this)) &&
+      !wrapper->fix_fields(thd, (Item**)&wrapper))
+  {
+    if (wrapper->set_cache(thd, depends_on))
+      DBUG_RETURN(NULL);
+    DBUG_RETURN(wrapper);
+  }
+  DBUG_RETURN(NULL);
+}
+
+
 Item_ident::Item_ident(Name_resolution_context *context_arg,
                        const char *db_name_arg,const char *table_name_arg,
 		       const char *field_name_arg)
@@ -597,7 +631,7 @@ Item_ident::Item_ident(Name_resolution_context *context_arg,
    db_name(db_name_arg), table_name(table_name_arg),
    field_name(field_name_arg),
    alias_name_used(FALSE), cached_field_index(NO_CACHED_FIELD_INDEX),
-   cached_table(0), depended_from(0)
+   cached_table(0), depended_from(0), can_be_depended(TRUE)
 {
   name = (char*) field_name_arg;
 }
@@ -609,7 +643,7 @@ Item_ident::Item_ident(TABLE_LIST *view_arg, const char *field_name_arg)
    db_name(NullS), table_name(view_arg->alias),
    field_name(field_name_arg),
    alias_name_used(FALSE), cached_field_index(NO_CACHED_FIELD_INDEX),
-   cached_table(NULL), depended_from(NULL)
+   cached_table(NULL), depended_from(NULL), can_be_depended(TRUE)
 {
   name = (char*) field_name_arg;
 }
@@ -631,7 +665,8 @@ Item_ident::Item_ident(THD *thd, Item_ident *item)
    alias_name_used(item->alias_name_used),
    cached_field_index(item->cached_field_index),
    cached_table(item->cached_table),
-   depended_from(item->depended_from)
+   depended_from(item->depended_from),
+   can_be_depended(item->can_be_depended)
 {}
 
 void Item_ident::cleanup()
@@ -649,7 +684,8 @@ void Item_ident::cleanup()
   db_name= orig_db_name; 
   table_name= orig_table_name;
   field_name= orig_field_name;
-  depended_from= 0;
+  /* Store if this Item was depended */
+  can_be_depended= test(depended_from);
   DBUG_VOID_RETURN;
 }
 
@@ -698,6 +734,16 @@ bool Item_field::collect_item_field_processor(uchar *arg)
 }
 
 
+bool Item_field::add_field_to_set_processor(uchar *arg)
+{
+  DBUG_ENTER("Item_field::add_field_to_set_processor");
+  DBUG_PRINT("info", ("%s", field->field_name ? field->field_name : "noname"));
+  TABLE *table= (TABLE *) arg;
+  if (field->table == table)
+    bitmap_set_bit(&table->tmp_set, field->field_index);
+  DBUG_RETURN(FALSE);
+}
+
 /**
   Check if an Item_field references some field from a list of fields.
 
@@ -742,9 +788,24 @@ bool Item_field::register_field_in_read_map(uchar *arg)
   TABLE *table= (TABLE *) arg;
   if (field->table == table || !table)
     bitmap_set_bit(field->table->read_set, field->field_index);
+  if (field->vcol_info && field->vcol_info->expr_item)
+    return field->vcol_info->expr_item->walk(&Item::register_field_in_read_map, 
+                                             1, arg);
   return 0;
 }
 
+/*
+  @brief
+  Mark field in bitmap supplied as *arg
+*/
+
+bool Item_field::register_field_in_bitmap(uchar *arg)
+{
+  MY_BITMAP *bitmap= (MY_BITMAP *) arg;
+  DBUG_ASSERT(bitmap);
+  bitmap_set_bit(bitmap, field->field_index);
+  return 0;
+}
 
 bool Item::check_cols(uint c)
 {
@@ -1474,7 +1535,7 @@ void Item::split_sum_func2(THD *thd, Item **ref_pointer_array,
   /* An item of type Item_sum  is registered <=> ref_by != 0 */ 
   if (type() == SUM_FUNC_ITEM && skip_registered && 
       ((Item_sum *) this)->ref_by)
-    return;                                                 
+    return;
   if ((type() != SUM_FUNC_ITEM && with_sum_func) ||
       (type() == FUNC_ITEM &&
        (((Item_func *) this)->functype() == Item_func::ISNOTNULLTEST_FUNC ||
@@ -1587,7 +1648,9 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
     if (collation == &my_charset_bin)
     {
       if (derivation <= dt.derivation)
-	; // Do nothing
+      {
+	/* Do nothing */
+      }
       else
       {
 	set(dt); 
@@ -1603,7 +1666,7 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
     else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) &&
              left_is_superset(this, &dt))
     {
-      // Do nothing
+      /* Do nothing */
     }
     else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) &&
              left_is_superset(&dt, this))
@@ -1614,7 +1677,7 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
              derivation < dt.derivation &&
              dt.derivation >= DERIVATION_SYSCONST)
     {
-      // Do nothing;
+      /* Do nothing */
     }
     else if ((flags & MY_COLL_ALLOW_COERCIBLE_CONV) &&
              dt.derivation < derivation &&
@@ -1632,7 +1695,7 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
   }
   else if (derivation < dt.derivation)
   {
-    // Do nothing
+    /* Do nothing */
   }
   else if (dt.derivation < derivation)
   {
@@ -1642,7 +1705,7 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
   { 
     if (collation == dt.collation)
     {
-      // Do nothing
+      /* Do nothing */
     }
     else 
     {
@@ -1821,7 +1884,10 @@ bool agg_item_set_converter(DTCollation &coll, const char *fname,
         subselect transformation does not happen in view_prepare_mode
         and thus val_...() methods can not be called for const items.
       */
-      bool resolve_const= ((*arg)->type() == Item::SUBSELECT_ITEM &&
+      bool resolve_const= (((*arg)->type() == Item::SUBSELECT_ITEM ||
+                            ((*arg)->get_cached_item() &&
+                             (*arg)->get_cached_item()->type() ==
+                             Item::SUBSELECT_ITEM)) &&
                            thd->lex->view_prepare_mode) ? FALSE : TRUE;
       conv= new Item_func_conv_charset(*arg, coll.collation, resolve_const);
     }
@@ -2044,6 +2110,15 @@ void Item_field::reset_field(Field *f)
   name= (char*) f->field_name;
 }
 
+
+bool Item_field::enumerate_field_refs_processor(uchar *arg)
+{
+  Field_enumerator *fe= (Field_enumerator*)arg;
+  fe->visit_field(this);
+  return FALSE;
+}
+
+
 const char *Item_ident::full_name() const
 {
   char *tmp;
@@ -2199,6 +2274,12 @@ bool Item_field::get_time(MYSQL_TIME *ltime)
   return 0;
 }
 
+void Item_field::save_result(Field *to)
+{
+  save_field_in_field(result_field, &null_value, to, TRUE);
+}
+
+
 double Item_field::val_result()
 {
   if ((null_value=result_field->is_null()))
@@ -2292,6 +2373,24 @@ table_map Item_field::used_tables() const
 }
 
 
+void Item_field::fix_after_pullout(st_select_lex *new_parent, Item **ref)
+{
+  if (new_parent == depended_from)
+    depended_from= NULL;
+  Name_resolution_context *ctx= new Name_resolution_context();
+  ctx->outer_context= NULL; // We don't build a complete name resolver
+  ctx->table_list= NULL;    // We rely on first_name_resolution_table instead
+  ctx->select_lex= new_parent;
+  ctx->first_name_resolution_table= context->first_name_resolution_table;
+  ctx->last_name_resolution_table=  context->last_name_resolution_table;
+  ctx->error_processor=             context->error_processor;
+  ctx->error_processor_data=        context->error_processor_data;
+  ctx->resolve_in_select_list=      context->resolve_in_select_list;
+  ctx->security_ctx=                context->security_ctx;
+  this->context=ctx;
+}
+
+
 Item *Item_field::get_tmp_table_item(THD *thd)
 {
   Item_field *new_item= new Item_field(thd, this);
@@ -2899,7 +2998,7 @@ bool Item_param::set_from_user_var(THD *thd, const user_var_entry *entry)
     unsigned_flag= entry->unsigned_flag;
     if (limit_clause_param)
     {
-      my_bool unused;
+      bool unused;
       set_int(entry->val_int(&unused), MY_INT64_NUM_DECIMAL_DIGITS);
       item_type= Item::INT_ITEM;
       DBUG_RETURN(!unsigned_flag && value.integer < 0 ? 1 : 0);
@@ -3812,6 +3911,15 @@ bool Item::fix_fields(THD *thd, Item **ref)
   return FALSE;
 }
 
+
+void Item_ref_null_helper::save_val(Field *to)
+{
+  DBUG_ASSERT(fixed == 1);
+  (*ref)->save_val(to);
+  owner->was_null|= null_value= (*ref)->null_value;
+}
+
+
 double Item_ref_null_helper::val_real()
 {
   DBUG_ASSERT(fixed == 1);
@@ -3875,7 +3983,7 @@ bool Item_ref_null_helper::get_date(MYSQL_TIME *ltime, uint fuzzydate)
                          substitution)
 */
 
-static void mark_as_dependent(THD *thd, SELECT_LEX *last, SELECT_LEX *current,
+static bool mark_as_dependent(THD *thd, SELECT_LEX *last, SELECT_LEX *current,
                               Item_ident *resolved_item,
                               Item_ident *mark_item)
 {
@@ -3884,9 +3992,11 @@ static void mark_as_dependent(THD *thd, SELECT_LEX *last, SELECT_LEX *current,
   const char *table_name= (resolved_item->table_name ?
                            resolved_item->table_name : "");
   /* store pointer on SELECT_LEX from which item is dependent */
-  if (mark_item)
+  if (mark_item && mark_item->can_be_depended)
     mark_item->depended_from= last;
-  current->mark_as_dependent(last);
+  if (current->mark_as_dependent(thd, last, /** resolved_item psergey-thu
+    **/mark_item))
+    return TRUE;
   if (thd->lex->describe & DESCRIBE_EXTENDED)
   {
     push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
@@ -3896,6 +4006,7 @@ static void mark_as_dependent(THD *thd, SELECT_LEX *last, SELECT_LEX *current,
                  resolved_item->field_name,
                  current->select_number, last->select_number);
   }
+  return FALSE;
 }
 
 
@@ -3987,7 +4098,7 @@ static Item** find_field_in_group_list(Item *find_item, ORDER *group_list)
   int         found_match_degree= 0;
   Item_ident *cur_field;
   int         cur_match_degree= 0;
-  char        name_buff[NAME_LEN+1];
+  char        name_buff[SAFE_NAME_LEN+1];
 
   if (find_item->type() == Item::FIELD_ITEM ||
       find_item->type() == Item::REF_ITEM)
@@ -4347,6 +4458,9 @@ Item_field::fix_outer_field(THD *thd, Field **from_field, Item **reference)
                             ((ref_type == REF_ITEM || ref_type == FIELD_ITEM) ?
                              (Item_ident*) (*reference) :
                              0));
+          context->select_lex->
+              register_dependency_item(last_checked_context->select_lex,
+                                       reference);
           /*
             A reference to a view field had been found and we
             substituted it instead of this Item (find_field_in_tables
@@ -4445,8 +4559,12 @@ Item_field::fix_outer_field(THD *thd, Field **from_field, Item **reference)
       return -1;
 
     mark_as_dependent(thd, last_checked_context->select_lex,
-                      context->select_lex, this,
+                      context->select_lex, rf,
                       rf);
+    context->select_lex->
+              register_dependency_item(last_checked_context->select_lex,
+                                       reference);
+
     return 0;
   }
   else
@@ -4454,6 +4572,9 @@ Item_field::fix_outer_field(THD *thd, Field **from_field, Item **reference)
     mark_as_dependent(thd, last_checked_context->select_lex,
                       context->select_lex,
                       this, (Item_ident*)*reference);
+    context->select_lex->
+              register_dependency_item(last_checked_context->select_lex,
+                                       reference);
     if (last_checked_context->select_lex->having_fix_field)
     {
       Item_ref *rf;
@@ -4589,31 +4710,18 @@ bool Item_field::fix_fields(THD *thd, Item **reference)
               It's not an Item_field in the select list so we must make a new
               Item_ref to point to the Item in the select list and replace the
               Item_field created by the parser with the new Item_ref.
-
-              NOTE: If we are fixing an alias reference inside ORDER/GROUP BY
-              item tree, then we use new Item_ref as an intermediate value
-              to resolve referenced item only.
-              In this case the new Item_ref item is unused.
             */
             Item_ref *rf= new Item_ref(context, db_name,table_name,field_name);
             if (!rf)
               return 1;
-
-            bool save_group_fix_field= thd->lex->current_select->group_fix_field;
-            /*
-              No need for recursive resolving of aliases.
-            */
-            thd->lex->current_select->group_fix_field= 0;
-
             bool ret= rf->fix_fields(thd, (Item **) &rf) || rf->check_cols(1);
-            thd->lex->current_select->group_fix_field= save_group_fix_field;
             if (ret)
               return TRUE;
-
-            if (save_group_fix_field && alias_name_used)
-              thd->change_item_tree(reference, *rf->ref);
-            else
-              thd->change_item_tree(reference, rf);
+           
+            SELECT_LEX *select= thd->lex->current_select;
+            thd->change_item_tree(reference,
+                                  select->parsing_place == IN_GROUP_BY && 
+				  alias_name_used  ?  *rf->ref : rf);
 
             return FALSE;
           }
@@ -4746,6 +4854,21 @@ error:
   return TRUE;
 }
 
+/*
+  @brief
+  Mark virtual columns as used in a partitioning expression 
+*/
+
+bool Item_field::vcol_in_partition_func_processor(uchar *int_arg)
+{
+  DBUG_ASSERT(fixed);
+  if (field->vcol_info)
+  {
+    field->vcol_info->mark_as_in_partitioning_expr();
+  }
+  return FALSE;
+}
+
 
 Item *Item_field::safe_charset_converter(CHARSET_INFO *tocs)
 {
@@ -4758,6 +4881,7 @@ void Item_field::cleanup()
 {
   DBUG_ENTER("Item_field::cleanup");
   Item_ident::cleanup();
+  depended_from= NULL;
   /*
     Even if this object was created by direct link to field in setup_wild()
     it will be linked correctly next time by name of field and table alias.
@@ -4982,7 +5106,7 @@ Item *Item_field::replace_equal_field(uchar *arg)
         return this;
       return const_item;
     }
-    Item_field *subst= item_equal->get_first();
+    Item_field *subst= item_equal->get_first(this);
     if (subst && field->table != subst->field->table && !field->eq(subst->field))
       return subst;
   }
@@ -5294,47 +5418,69 @@ void Item_field::make_field(Send_field *tmp_field)
 
 
 /**
-  Set a field's value from a item.
-*/
+  Save a field value in another field
 
-void Item_field::save_org_in_field(Field *to)
-{
-  if (field->is_null())
-  {
-    null_value=1;
-    set_field_to_null_with_conversions(to, 1);
-  }
-  else
-  {
-    to->set_notnull();
-    field_conv(to,field);
-    null_value=0;
-  }
-}
+  @param from             Field to take the value from
+  @param [out] null_value Pointer to the null_value flag to set
+  @param to               Field to save the value in
+  @param no_conversions   How to deal with NULL value
 
-int Item_field::save_in_field(Field *to, bool no_conversions)
+  @details
+  The function takes the value of the field 'from' and, if this value
+  is not null, it saves in the field 'to' setting off the flag referenced
+  by 'null_value'. Otherwise this flag is set on and field 'to' is
+  also set to null possibly with conversion.
+
+  @note
+  This function is used by the functions Item_field::save_in_field,
+  Item_field::save_org_in_field and Item_ref::save_in_field
+
+  @retval FALSE OK
+  @retval TRUE  Error
+
+*/
+
+static int save_field_in_field(Field *from, bool *null_value,
+                               Field *to, bool no_conversions)
 {
   int res;
-  if (result_field->is_null())
+  DBUG_ENTER("save_field_in_field");
+  if (from->is_null())
   {
-    null_value=1;
-    return set_field_to_null_with_conversions(to, no_conversions);
+    (*null_value)= 1;
+    DBUG_RETURN(set_field_to_null_with_conversions(to, no_conversions));
   }
   to->set_notnull();
 
   /*
     If we're setting the same field as the one we're reading from there's 
     nothing to do. This can happen in 'SET x = x' type of scenarios.
-  */  
-  if (to == result_field)
+  */
+  if (to == from)
   {
-    null_value=0;
-    return 0;
+    (*null_value)= 0;
+    DBUG_RETURN(0);
   }
 
-  res= field_conv(to,result_field);
-  null_value=0;
-  return res;
+  res= field_conv(to, from);
+  (*null_value)= 0;
+  DBUG_RETURN(res);
+}
+
+
+/**
+  Set a field's value from a item.
+*/
+
+void Item_field::save_org_in_field(Field *to)
+{
+  save_field_in_field(field, &null_value, to, TRUE);
+}
+
+
+int Item_field::save_in_field(Field *to, bool no_conversions)
+{
+  return save_field_in_field(result_field, &null_value, to, no_conversions);
 }
 
 
@@ -6076,6 +6222,35 @@ Item_ref::Item_ref(Name_resolution_context *context_arg,
     set_properties();
 }
 
+/*
+  A Field_enumerator-compatible class that invokes mark_as_dependent() for
+  each field that is a reference to some ancestor of current_select.
+*/
+class Dependency_marker: public Field_enumerator
+{
+public:
+  THD *thd;
+  st_select_lex *current_select;
+  virtual void visit_field(Item_field *item)
+  {
+    // Find which select the field is in. This is achieved by walking up 
+    // the select tree and looking for the table of interest.
+    st_select_lex *sel;
+    for (sel= current_select; sel; sel= sel->outer_select())
+    {
+      TABLE_LIST *tbl;
+      for (tbl= sel->leaf_tables; tbl; tbl= tbl->next_leaf)
+      {
+        if (tbl->table == item->field->table)
+        {
+          if (sel != current_select)
+            mark_as_dependent(thd, sel, current_select, item, item);
+          return;
+        }
+      }
+    }
+  }
+};
 
 Item_ref::Item_ref(TABLE_LIST *view_arg, Item **item,
                    const char *field_name_arg, bool alias_name_used_arg)
@@ -6264,6 +6439,9 @@ bool Item_ref::fix_fields(THD *thd, Item **reference)
                                 refer_type == FIELD_ITEM) ?
                                (Item_ident*) (*reference) :
                                0));
+           context->select_lex->
+              register_dependency_item(last_checked_context->select_lex,
+                                       reference);
             /*
               view reference found, we substituted it instead of this
               Item, so can quit
@@ -6313,7 +6491,10 @@ bool Item_ref::fix_fields(THD *thd, Item **reference)
           goto error;
         thd->change_item_tree(reference, fld);
         mark_as_dependent(thd, last_checked_context->select_lex,
-                          thd->lex->current_select, this, fld);
+                          thd->lex->current_select, fld, fld);
+        context->select_lex->
+              register_dependency_item(last_checked_context->select_lex,
+                                       reference);
         /*
           A reference is resolved to a nest level that's outer or the same as
           the nest level of the enclosing set function : adjust the value of
@@ -6337,6 +6518,9 @@ bool Item_ref::fix_fields(THD *thd, Item **reference)
       DBUG_ASSERT(*ref && (*ref)->fixed);
       mark_as_dependent(thd, last_checked_context->select_lex,
                         context->select_lex, this, this);
+      context->select_lex->
+              register_dependency_item(last_checked_context->select_lex,
+                                       reference);
       /*
         A reference is resolved to a nest level that's outer or the same as
         the nest level of the enclosing set function : adjust the value of
@@ -6349,6 +6533,24 @@ bool Item_ref::fix_fields(THD *thd, Item **reference)
                       last_checked_context->select_lex->nest_level);
     }
   }
+  else
+  {
+    if (depended_from && reference)
+    {
+      DBUG_ASSERT(context->select_lex != depended_from);
+      context->select_lex->register_dependency_item(depended_from, reference);
+    }
+    /*
+      It could be that we're referring to something that's in ancestor selects.
+      We must make an appropriate mark_as_dependent() call for each such
+      outside reference.
+    */
+    Dependency_marker dep_marker;
+    dep_marker.current_select= current_sel;
+    dep_marker.thd= thd;
+    (*ref)->walk(&Item::enumerate_field_refs_processor, FALSE,
+                 (uchar*)&dep_marker);
+  }
 
   DBUG_ASSERT(*ref);
   /*
@@ -6528,6 +6730,25 @@ bool Item_ref::val_bool_result()
 }
 
 
+void Item_ref::save_result(Field *to)
+{
+  if (result_field)
+  {
+    save_field_in_field(result_field, &null_value, to, TRUE);
+    return;
+  }
+  (*ref)->save_result(to);
+  null_value= (*ref)->null_value;
+}
+
+
+void Item_ref::save_val(Field *to)
+{
+  (*ref)->save_result(to);
+  null_value= (*ref)->null_value;
+}
+
+
 double Item_ref::val_real()
 {
   DBUG_ASSERT(fixed);
@@ -6645,6 +6866,13 @@ void Item_ref_null_helper::print(String *str, enum_query_type query_type)
 }
 
 
+void Item_direct_ref::save_val(Field *to)
+{
+  (*ref)->save_val(to);
+  null_value=(*ref)->null_value;
+}
+
+
 double Item_direct_ref::val_real()
 {
   double tmp=(*ref)->val_real();
@@ -6697,6 +6925,385 @@ bool Item_direct_ref::get_date(MYSQL_TIME *ltime,uint fuzzydate)
 }
 
 
+Item_cache_wrapper::~Item_cache_wrapper()
+{
+  delete expr_cache;
+  /* expr_value is Item so it will be destroyed from list of Items */
+}
+
+
+Item_cache_wrapper::Item_cache_wrapper(Item *item_arg)
+:orig_item(item_arg), expr_cache(NULL), expr_value(NULL)
+{
+  DBUG_ASSERT(orig_item->fixed);
+  max_length= orig_item->max_length;
+  maybe_null= orig_item->maybe_null;
+  decimals=   orig_item->decimals;
+  collation.set(orig_item->collation);
+  with_sum_func= orig_item->with_sum_func;
+  unsigned_flag= orig_item->unsigned_flag;
+  name= item_arg->name;
+  name_length= item_arg->name_length;
+
+  if ((expr_value= Item_cache::get_cache(orig_item)))
+    expr_value->setup(orig_item);
+
+  fixed= 1;
+}
+
+
+void Item_cache_wrapper::print(String *str, enum_query_type query_type)
+{
+  str->append(func_name());
+  if (expr_cache)
+    expr_cache->print(str, query_type);
+  else
+    str->append(STRING_WITH_LEN("<<DISABLED>>"));
+  str->append('(');
+  orig_item->print(str, query_type);
+  str->append(')');
+}
+
+
+/**
+  Prepare the expression cache wrapper (do nothing)
+
+  @retval FALSE OK
+*/
+
+bool Item_cache_wrapper::fix_fields(THD *thd  __attribute__((unused)),
+                                    Item **it __attribute__((unused)))
+{
+  DBUG_ASSERT(orig_item->fixed);
+  DBUG_ASSERT(fixed);
+  return FALSE;
+}
+
+
+/**
+  Clean the expression cache wrapper up before reusing it.
+*/
+
+void Item_cache_wrapper::cleanup()
+{
+  delete expr_cache;
+  expr_cache= 0;
+  // expr_value is Item so it will be destroyed from list of Items
+  expr_value= 0;
+}
+
+
+/**
+  Create an expression cache that uses a temporary table
+
+  @param thd           Thread handle
+  @param depends_on    Parameters of the expression to create cache for
+
+  @details
+  The function takes 'depends_on' as the list of all parameters for
+  the expression wrapped into this object and creates an expression
+  cache in a temporary table containing the field for the parameters
+  and the result of the expression.
+
+  @retval FALSE OK
+  @retval TRUE  Error
+*/
+
+bool Item_cache_wrapper::set_cache(THD *thd, List<Item*> &depends_on)
+{
+  DBUG_ENTER("Item_cache_wrapper::set_cache");
+  expr_cache= new Expression_cache_tmptable(thd, depends_on, expr_value);
+  DBUG_RETURN(expr_cache == NULL);
+}
+
+
+/**
+  Check if the current values of the parameters are in the expression cache
+
+  @details
+  The function checks whether the current set of the parameters of the
+  referenced item can be found in the expression cache. If so the function
+  returns the item by which the result of the expression can be easily
+  extracted from the cache with the corresponding val_* method.
+
+  @retval NULL    - parameters are not in the cache
+  @retval <item*> - item providing the result of the expression found in cache
+*/
+
+Item *Item_cache_wrapper::check_cache()
+{
+  DBUG_ENTER("Item_cache_wrapper::check_cache");
+  if (expr_cache)
+  {
+    Expression_cache_tmptable::result res;
+    Item *cached_value;
+    res= expr_cache->check_value(&cached_value);
+    if (res == Expression_cache_tmptable::HIT)
+      DBUG_RETURN(cached_value);
+  }
+  DBUG_RETURN(NULL);
+}
+
+
+/**
+  Get the value of the cached expression and put it in the cache
+*/
+
+inline void Item_cache_wrapper::cache()
+{
+  expr_value->store(orig_item);
+  expr_value->cache_value();
+  expr_cache->put_value(expr_value); // put in expr_cache
+}
+
+
+/**
+  Get the value of the possibly cached item into the field.
+*/
+
+void Item_cache_wrapper::save_val(Field *to)
+{
+  Item *cached_value;
+  DBUG_ENTER("Item_cache_wrapper::val_int");
+  if (!expr_cache)
+  {
+    orig_item->save_val(to);
+    null_value= orig_item->null_value;
+    DBUG_VOID_RETURN;
+  }
+
+  if ((cached_value= check_cache()))
+  {
+    cached_value->save_val(to);
+    null_value= cached_value->null_value;
+    DBUG_VOID_RETURN;
+  }
+  cache();
+  null_value= expr_value->null_value;
+  expr_value->save_val(to);
+  DBUG_VOID_RETURN;
+}
+
+
+/**
+  Get the integer value of the possibly cached item.
+*/
+
+longlong Item_cache_wrapper::val_int()
+{
+  Item *cached_value;
+  DBUG_ENTER("Item_cache_wrapper::val_int");
+  if (!expr_cache)
+  {
+    longlong tmp= orig_item->val_int();
+    null_value= orig_item->null_value;
+    DBUG_RETURN(tmp);
+  }
+
+  if ((cached_value= check_cache()))
+  {
+    longlong tmp= cached_value->val_int();
+    null_value= cached_value->null_value;
+    DBUG_RETURN(tmp);
+  }
+  cache();
+  null_value= expr_value->null_value;
+  DBUG_RETURN(expr_value->val_int());
+}
+
+
+/**
+  Get the real value of the possibly cached item
+*/
+
+double Item_cache_wrapper::val_real()
+{
+  Item *cached_value;
+  DBUG_ENTER("Item_cache_wrapper::val_real");
+  if (!expr_cache)
+  {
+    double tmp= orig_item->val_real();
+    null_value= orig_item->null_value;
+    DBUG_RETURN(tmp);
+  }
+
+  if ((cached_value= check_cache()))
+  {
+    double tmp= cached_value->val_real();
+    null_value= cached_value->null_value;
+    DBUG_RETURN(tmp);
+  }
+  cache();
+  null_value= expr_value->null_value;
+  DBUG_RETURN(expr_value->val_real());
+}
+
+
+/**
+  Get the string value of the possibly cached item
+*/
+
+String *Item_cache_wrapper::val_str(String* str)
+{
+  Item *cached_value;
+  DBUG_ENTER("Item_cache_wrapper::val_str");
+  if (!expr_cache)
+  {
+    String *tmp= orig_item->val_str(str);
+    null_value= orig_item->null_value;
+    DBUG_RETURN(tmp);
+  }
+
+  if ((cached_value= check_cache()))
+  {
+    String *tmp= cached_value->val_str(str);
+    null_value= cached_value->null_value;
+    DBUG_RETURN(tmp);
+  }
+  cache();
+  if ((null_value= expr_value->null_value))
+    DBUG_RETURN(NULL);
+  DBUG_RETURN(expr_value->val_str(str));
+}
+
+
+/**
+  Get the decimal value of the possibly cached item
+*/
+
+my_decimal *Item_cache_wrapper::val_decimal(my_decimal* decimal_value)
+{
+  Item *cached_value;
+  DBUG_ENTER("Item_cache_wrapper::val_decimal");
+  if (!expr_cache)
+  {
+    my_decimal *tmp= orig_item->val_decimal(decimal_value);
+    null_value= orig_item->null_value;
+    DBUG_RETURN(tmp);
+  }
+
+  if ((cached_value= check_cache()))
+  {
+    my_decimal *tmp= cached_value->val_decimal(decimal_value);
+    null_value= cached_value->null_value;
+    DBUG_RETURN(tmp);
+  }
+  cache();
+  if ((null_value= expr_value->null_value))
+    DBUG_RETURN(NULL);
+  DBUG_RETURN(expr_value->val_decimal(decimal_value));
+}
+
+
+/**
+  Get the boolean value of the possibly cached item
+*/
+
+bool Item_cache_wrapper::val_bool()
+{
+  Item *cached_value;
+  DBUG_ENTER("Item_cache_wrapper::val_bool");
+  if (!expr_cache)
+  {
+    bool tmp= orig_item->val_bool();
+    null_value= orig_item->null_value;
+    DBUG_RETURN(tmp);
+  }
+
+  if ((cached_value= check_cache()))
+  {
+    bool tmp= cached_value->val_bool();
+    null_value= cached_value->null_value;
+    DBUG_RETURN(tmp);
+  }
+  cache();
+  null_value= expr_value->null_value;
+  DBUG_RETURN(expr_value->val_bool());
+}
+
+
+/**
+  Check for NULL the value of the possibly cached item
+*/
+
+bool Item_cache_wrapper::is_null()
+{
+  Item *cached_value;
+  DBUG_ENTER("Item_cache_wrapper::is_null");
+  if (!expr_cache)
+  {
+    bool tmp= orig_item->is_null();
+    null_value= orig_item->null_value;
+    DBUG_RETURN(tmp);
+  }
+
+  if ((cached_value= check_cache()))
+  {
+    bool tmp= cached_value->is_null();
+    null_value= cached_value->null_value;
+    DBUG_RETURN(tmp);
+  }
+  cache();
+  DBUG_RETURN((null_value= expr_value->null_value));
+}
+
+
+/**
+  Get the date value of the possibly cached item
+*/
+
+bool Item_cache_wrapper::get_date(MYSQL_TIME *ltime, uint fuzzydate)
+{
+  Item *cached_value;
+  DBUG_ENTER("Item_cache_wrapper::get_date");
+  if (!expr_cache)
+    DBUG_RETURN((null_value= orig_item->get_date(ltime, fuzzydate)));
+
+  if ((cached_value= check_cache()))
+    DBUG_RETURN((null_value= cached_value->get_date(ltime, fuzzydate)));
+
+  cache();
+  DBUG_RETURN((null_value= expr_value->get_date(ltime, fuzzydate)));
+}
+
+
+/**
+  Get the time value of the possibly cached item
+*/
+
+bool Item_cache_wrapper::get_time(MYSQL_TIME *ltime)
+{
+  Item *cached_value;
+  DBUG_ENTER("Item_cache_wrapper::get_time");
+  if (!expr_cache)
+    DBUG_RETURN((null_value= orig_item->get_time(ltime)));
+
+  if ((cached_value= check_cache()))
+    DBUG_RETURN((null_value= cached_value->get_time(ltime)));
+
+  cache();
+  DBUG_RETURN((null_value= expr_value->get_time(ltime)));
+}
+
+
+int Item_cache_wrapper::save_in_field(Field *to, bool no_conversions)
+{
+  int res;
+  DBUG_ASSERT(!result_field);
+  res= orig_item->save_in_field(to, no_conversions);
+  null_value= orig_item->null_value;
+  return res;
+}
+
+
+Item* Item_cache_wrapper::get_tmp_table_item(THD *thd_arg)
+{
+  if (!orig_item->with_sum_func && !orig_item->const_item())
+    return new Item_field(result_field);
+  return copy_or_same(thd_arg);
+}
+
+
 /**
   Prepare referenced field then call usual Item_direct_ref::fix_fields .
 
@@ -6765,6 +7372,59 @@ bool Item_outer_ref::fix_fields(THD *thd, Item **reference)
 }
 
 
+void Item_outer_ref::fix_after_pullout(st_select_lex *new_parent, Item **ref)
+{
+  if (depended_from == new_parent)
+  {
+    *ref= outer_ref;
+    (*ref)->fix_after_pullout(new_parent, ref);
+  }
+}
+
+void Item_ref::fix_after_pullout(st_select_lex *new_parent, Item **refptr)
+{
+  (*ref)->fix_after_pullout(new_parent, ref);
+  if (depended_from == new_parent)
+    depended_from= NULL;
+}
+
+
+/**
+  Mark references from inner selects used in group by clause
+
+  The method is used by the walk method when called for the expressions
+  from the group by clause. The callsare  occurred in the function
+  fix_inner_refs invoked by JOIN::prepare.
+  The parameter passed to Item_outer_ref::check_inner_refs_processor
+  is the iterator over the list of inner references from the subselects
+  of the select to be prepared. The function marks those references
+  from this list whose occurrences are encountered in the group by 
+  expressions passed to the walk method.  
+ 
+  @param arg  pointer to the iterator over a list of inner references
+
+  @return
+    FALSE always
+*/
+
+bool Item_outer_ref::check_inner_refs_processor(uchar *arg)
+{
+  List_iterator_fast<Item_outer_ref> *it=
+    ((List_iterator_fast<Item_outer_ref> *) arg);
+  Item_outer_ref *ref;
+  while ((ref= (*it)++))
+  {
+    if (ref == this)
+    {
+      ref->found_in_group_by= 1;
+      break;
+    }
+  }
+  (*it).rewind();
+  return FALSE;
+}
+
+
 /**
   Compare two view column references for equality.
 
@@ -7400,7 +8060,7 @@ bool  Item_cache_int::cache_value()
 }
 
 
-void Item_cache_int::store(Item *item, longlong val_arg)
+void Item_cache_int::store_longlong(Item *item, longlong val_arg)
 {
   /* An explicit values is given, save it. */
   value_cached= TRUE;
diff --git a/sql/item.h b/sql/item.h
index 8e8199ecac8..4c5475fb080 100644
--- a/sql/item.h
+++ b/sql/item.h
@@ -1,5 +1,5 @@
-#ifndef ITEM_INCLUDED
-#define ITEM_INCLUDED
+#ifndef SQL_ITEM_INCLUDED
+#define SQL_ITEM_INCLUDED
 
 /* Copyright (c) 2000, 2010 Oracle and/or its affiliates. All rights reserved.
 
@@ -28,6 +28,23 @@
 #include "thr_malloc.h"                         /* sql_calloc */
 #include "field.h"                              /* Derivation */
 
+static inline
+bool trace_unsupported_func(const char *where, const char *processor_name)
+{
+  char buff[64];                                                         
+  sprintf(buff, "%s::%s", where, processor_name);
+  DBUG_ENTER(buff);
+  sprintf(buff, "%s returns TRUE: unsupported function", processor_name);
+  DBUG_PRINT("info", ("%s", buff));
+  DBUG_RETURN(TRUE);
+}
+
+static inline
+bool trace_unsupported_by_check_vcol_func_processor(const char *where)
+{
+  return trace_unsupported_func(where, "check_vcol_func_processor");
+}
+
 class Protocol;
 struct TABLE_LIST;
 void item_init(void);			/* Init item functions */
@@ -522,12 +539,15 @@ public:
              SUBSELECT_ITEM, ROW_ITEM, CACHE_ITEM, TYPE_HOLDER,
              PARAM_ITEM, TRIGGER_FIELD_ITEM, DECIMAL_ITEM,
              XPATH_NODESET, XPATH_NODESET_CMP,
-             VIEW_FIXER_ITEM};
+             VIEW_FIXER_ITEM, EXPR_CACHE_ITEM};
 
   enum cond_result { COND_UNDEF,COND_OK,COND_TRUE,COND_FALSE };
 
   enum traverse_order { POSTFIX, PREFIX };
   
+  /* Cache of the result of is_expensive(). */
+  int8 is_expensive_cache;
+  
   /* Reuse size, only used by SP local variable assignment, otherwize 0 */
   uint rsize;
 
@@ -551,17 +571,17 @@ public:
   uint name_length;                     /* Length of name */
   int8 marker;
   uint8 decimals;
-  my_bool maybe_null;			/* If item may be null */
-  my_bool null_value;			/* if item is null */
-  my_bool unsigned_flag;
-  my_bool with_sum_func;
-  my_bool fixed;                        /* If item fixed with fix_fields */
-  my_bool is_autogenerated_name;        /* indicate was name of this Item
+  bool maybe_null;			/* If item may be null */
+  bool null_value;			/* if item is null */
+  bool unsigned_flag;
+  bool with_sum_func;
+  bool fixed;                           /* If item fixed with fix_fields */
+  bool is_autogenerated_name;           /* indicate was name of this Item
                                            autogenerated or set by user */
-  DTCollation collation;
-  my_bool with_subselect;               /* If this item is a subselect or some
+  bool with_subselect;                  /* If this item is a subselect or some
                                            of its arguments is or contains a
                                            subselect */
+  DTCollation collation;
   Item_result cmp_context;              /* Comparison context */
   // alloc & destruct is done as start of select using sql_alloc
   Item();
@@ -588,6 +608,12 @@ public:
   Field *make_string_field(TABLE *table);
   virtual bool fix_fields(THD *, Item **);
   /*
+    Fix after some tables has been pulled out. Basically re-calculate all
+    attributes that are dependent on the tables.
+  */
+  virtual void fix_after_pullout(st_select_lex *new_parent, Item **ref) {};
+
+  /*
     should be used in case where we are sure that we do not need
     complete fix_fields() procedure.
   */
@@ -814,6 +840,17 @@ public:
   */
   virtual bool val_bool();
   virtual String *val_nodeset(String*) { return 0; }
+
+  /*
+    save_val() is method of val_* family which stores value in the given
+    field.
+  */
+  virtual void save_val(Field *to) { save_org_in_field(to); }
+  /*
+    save_result() is method of val*result() family which stores value in
+    the given field.
+  */
+  virtual void save_result(Field *to) { save_val(to); }
   /* Helper functions, see item_sum.cc */
   String *val_string_from_real(String *str);
   String *val_string_from_int(String *str);
@@ -849,7 +886,11 @@ public:
   virtual bool val_bool_result() { return val_bool(); }
   virtual bool is_null_result() { return is_null(); }
 
-  /* bit map of tables used by item */
+  /* 
+    Bitmap of tables used by item
+    (note: if you need to check dependencies on individual columns, check out
+     class Field_enumerator)
+  */
   virtual table_map used_tables() const { return (table_map) 0L; }
   /*
     Return table map of tables that can't be NULL tables (tables that are
@@ -952,6 +993,7 @@ public:
     set value of aggregate function in case of no rows for grouping were found
   */
   virtual void no_rows_in_result() {}
+  virtual void restore_to_before_no_rows_in_result() {}
   virtual Item *copy_or_same(THD *thd) { return this; }
   virtual Item *copy_andor_structure(THD *thd) { return this; }
   virtual Item *real_item() { return this; }
@@ -1020,15 +1062,37 @@ public:
   virtual bool remove_fixed(uchar * arg) { fixed= 0; return 0; }
   virtual bool cleanup_processor(uchar *arg);
   virtual bool collect_item_field_processor(uchar * arg) { return 0; }
+  virtual bool add_field_to_set_processor(uchar * arg) { return 0; }
   virtual bool find_item_in_field_list_processor(uchar *arg) { return 0; }
   virtual bool change_context_processor(uchar *context) { return 0; }
   virtual bool reset_query_id_processor(uchar *query_id_arg) { return 0; }
   virtual bool is_expensive_processor(uchar *arg) { return 0; }
-  virtual bool find_item_processor(uchar *arg) { return this == (void *) arg; }
   virtual bool register_field_in_read_map(uchar *arg) { return 0; }
-
   virtual bool cache_const_expr_analyzer(uchar **arg);
   virtual Item* cache_const_expr_transformer(uchar *arg);
+  virtual bool enumerate_field_refs_processor(uchar *arg) { return 0; }
+  virtual bool mark_as_eliminated_processor(uchar *arg) { return 0; }
+
+  /* To call bool function for all arguments */
+  struct bool_func_call_args
+  {
+    Item *original_func_item;
+    void (Item::*bool_function)();
+  };
+  bool call_bool_func_processor(uchar *org_item)
+  {
+    bool_func_call_args *info= (bool_func_call_args*) org_item;
+    /* Avoid recursion, as walk also calls for original item */
+    if (info->original_func_item != this)
+      (this->*(info->bool_function))();
+    return FALSE;
+  }
+  /*
+    The next function differs from the previous one that a bitmap to be updated
+    is passed as uchar *arg.
+  */
+  virtual bool register_field_in_bitmap(uchar *arg) { return 0; }
+
   /*
     Check if a partition function is allowed
     SYNOPSIS
@@ -1081,11 +1145,43 @@ public:
     fields.
   */
   virtual bool check_partition_func_processor(uchar *bool_arg) { return TRUE;}
+  /*
+    @brief
+    Processor used to mark virtual columns used in partitioning expression
+
+    @param
+    arg     always ignored
+
+    @retval
+      FALSE      always
+  */
+  virtual bool vcol_in_partition_func_processor(uchar *arg)
+  {
+    return FALSE;
+  }
+
   virtual bool subst_argument_checker(uchar **arg)
-  { 
+  {
     if (*arg)
-      *arg= NULL; 
-    return TRUE;     
+      *arg= NULL;
+    return TRUE;
+  }
+  /*
+    @brief
+    Processor used to check acceptability of an item in the defining
+    expression for a virtual column 
+    
+    @param
+      arg     always ignored
+      
+    @retval
+      FALSE    the item is accepted in the definition of a virtual column
+    @retval 
+      TRUE     otherwise
+  */
+  virtual bool check_vcol_func_processor(uchar *arg)
+  {
+    return trace_unsupported_by_check_vcol_func_processor(full_name());
   }
 
   virtual Item *equal_fields_propagator(uchar * arg) { return this; }
@@ -1118,6 +1214,8 @@ public:
     return FALSE;
   }
 
+  virtual bool check_inner_refs_processor(uchar *arg) { return FALSE; }
+
   /*
     For SP local variable returns pointer to Item representing its
     current value and pointer to current Item otherwise.
@@ -1146,6 +1244,8 @@ public:
 
   virtual Item *neg_transformer(THD *thd) { return NULL; }
   virtual Item *update_value_transformer(uchar *select_arg) { return this; }
+  virtual Item *expr_cache_insert_transformer(uchar *thd_arg) { return this; }
+  virtual bool expr_cache_is_needed(THD *) { return FALSE; }
   virtual Item *safe_charset_converter(CHARSET_INFO *tocs);
   void delete_self()
   {
@@ -1205,6 +1305,28 @@ public:
       return is_datetime() || cmp_context == STRING_RESULT;
     return FALSE;
   }
+  /*
+    Test whether an expression is expensive to compute. Used during
+    optimization to avoid computing expensive expressions during this
+    phase. Also used to force temp tables when sorting on expensive
+    functions.
+    TODO:
+    Normally we should have a method:
+      cost Item::execution_cost(),
+    where 'cost' is either 'double' or some structure of various cost
+    parameters.
+
+    NOTE
+      This function is now used to prevent evaluation of materialized IN
+      subquery predicates before it is allowed. grep for 
+      DontEvaluateMaterializedSubqueryTooEarly to see the uses.
+  */
+  virtual bool is_expensive()
+  {
+    if (is_expensive_cache < 0)
+      is_expensive_cache= walk(&Item::is_expensive_processor, 0, (uchar*)0);
+    return test(is_expensive_cache);
+  }
   virtual Field::geometry_type get_geometry_type() const
     { return Field::GEOM_GEOMETRY; };
   String *check_well_formed_result(String *str, bool send_error= 0);
@@ -1242,11 +1364,36 @@ public:
     Return TRUE if the item points to a column of an outer-joined table.
   */
   virtual bool is_outer_field() const { DBUG_ASSERT(fixed); return FALSE; }
+  Item* set_expr_cache(THD *thd, List<Item*> &depends_on);
+  virtual Item *get_cached_item() { return NULL; }
 };
 
+/*
+  Class to be used to enumerate all field references in an item tree. This
+  includes references to outside but not fields of the tables within a
+  subquery.
+  Suggested usage:
 
-class sp_head;
+    class My_enumerator : public Field_enumerator 
+    {
+      virtual void visit_field() { ... your actions ...} 
+    }
+
+    My_enumerator enumerator;
+    item->walk(Item::enumerate_field_refs_processor, ...,(uchar*)&enumerator);
+
+  This is similar to Visitor pattern.
+*/
+
+class Field_enumerator
+{
+public:
+  virtual void visit_field(Item_field *field)= 0;
+  virtual ~Field_enumerator() {};             /* purecov: inspected */
+  Field_enumerator() {}                       /* Remove gcc warning */
+};
 
+class sp_head;
 
 class Item_basic_constant :public Item
 {
@@ -1541,6 +1688,10 @@ public:
   {
     return value_item->send(protocol, str);
   }
+  bool check_vcol_func_processor(uchar *arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor("name_const");
+  }
 };
 
 bool agg_item_collations(DTCollation &c, const char *name,
@@ -1578,6 +1729,7 @@ public:
   virtual Item_num *neg()= 0;
   Item *safe_charset_converter(CHARSET_INFO *tocs);
   bool check_partition_func_processor(uchar *int_arg) { return FALSE;}
+  bool check_vcol_func_processor(uchar *arg) { return FALSE;}
 };
 
 #define NO_CACHED_FIELD_INDEX ((uint)(-1))
@@ -1615,6 +1767,19 @@ public:
   */
   TABLE_LIST *cached_table;
   st_select_lex *depended_from;
+  /*
+    Some Items resolved in another select should not be marked as dependency
+    of the subquery where they are. During normal name resolution, we check
+    this. Stored procedures and prepared statements first try to resolve an
+    ident item using a cached table reference and field position from the
+    previous query execution (cached_table/cached_field_index). If the
+    tables were not changed, the ident matches the table/field, and we have
+    faster resolution of the ident without looking through all tables and
+    fields in the query. But in this case, we can not check all conditions
+    about this ident item dependency, so we should cache the condition in
+    this variable.
+  */
+  bool can_be_depended;
   Item_ident(Name_resolution_context *context_arg,
              const char *db_name_arg, const char *table_name_arg,
              const char *field_name_arg);
@@ -1697,6 +1862,7 @@ public:
   longlong val_int();
   my_decimal *val_decimal(my_decimal *);
   String *val_str(String*);
+  void save_result(Field *to);
   double val_result();
   longlong val_int_result();
   String *str_result(String* tmp);
@@ -1706,6 +1872,7 @@ public:
   bool send(Protocol *protocol, String *str_arg);
   void reset_field(Field *f);
   bool fix_fields(THD *, Item **);
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
   void make_field(Send_field *tmp_field);
   int save_in_field(Field *field,bool no_conversions);
   void save_org_in_field(Field *field);
@@ -1736,9 +1903,14 @@ public:
   void update_null_value();
   Item *get_tmp_table_item(THD *thd);
   bool collect_item_field_processor(uchar * arg);
+  bool add_field_to_set_processor(uchar * arg);
   bool find_item_in_field_list_processor(uchar *arg);
   bool register_field_in_read_map(uchar *arg);
+  bool register_field_in_bitmap(uchar *arg);
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool vcol_in_partition_func_processor(uchar *bool_arg);
+  bool check_vcol_func_processor(uchar *arg) { return FALSE;}
+  bool enumerate_field_refs_processor(uchar *arg);
   void cleanup();
   bool result_as_longlong()
   {
@@ -1805,6 +1977,7 @@ public:
 
   Item *safe_charset_converter(CHARSET_INFO *tocs);
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *arg) { return FALSE;}
 };
 
 class Item_null_result :public Item_null
@@ -1818,7 +1991,11 @@ public:
     save_in_field(result_field, no_conversions);
   }
   bool check_partition_func_processor(uchar *int_arg) {return TRUE;}
-};  
+  bool check_vcol_func_processor(uchar *arg)
+  {
+    return trace_unsupported_by_check_vcol_func_processor(full_name());
+  }
+};
 
 /* Item represents one placeholder ('?') of prepared statement */
 
@@ -2015,6 +2192,7 @@ public:
   { return (uint)(max_length - test(value < 0)); }
   bool eq(const Item *, bool binary_cmp) const;
   bool check_partition_func_processor(uchar *bool_arg) { return FALSE;}
+  bool check_vcol_func_processor(uchar *arg) { return FALSE;}
 };
 
 
@@ -2032,7 +2210,6 @@ public:
   virtual void print(String *str, enum_query_type query_type);
   Item_num *neg ();
   uint decimal_precision() const { return max_length; }
-  bool check_partition_func_processor(uchar *bool_arg) { return FALSE;}
 };
 
 
@@ -2074,6 +2251,7 @@ public:
   bool eq(const Item *, bool binary_cmp) const;
   void set_decimal_value(my_decimal *value_par);
   bool check_partition_func_processor(uchar *bool_arg) { return FALSE;}
+  bool check_vcol_func_processor(uchar *arg) { return FALSE;}
 };
 
 
@@ -2231,6 +2409,7 @@ public:
   }
   virtual void print(String *str, enum_query_type query_type);
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *arg) { return FALSE;}
 
   /**
     Return TRUE if character-set-introducer was explicitly specified in the
@@ -2284,7 +2463,7 @@ double_from_string_with_check (CHARSET_INFO *cs, const char *cptr, char *end);
 class Item_static_string_func :public Item_string
 {
   const char *func_name;
-public:
+ public:
   Item_static_string_func(const char *name_par, const char *str, uint length,
                           CHARSET_INFO *cs,
                           Derivation dv= DERIVATION_COERCIBLE)
@@ -2298,6 +2477,10 @@ public:
   }
 
   bool check_partition_func_processor(uchar *int_arg) {return TRUE;}
+  bool check_vcol_func_processor(uchar *arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name);
+  }
 };
 
 
@@ -2305,10 +2488,14 @@ public:
 class Item_partition_func_safe_string: public Item_string
 {
 public:
-  Item_partition_func_safe_string(const char *name, uint length,
+  Item_partition_func_safe_string(const char *name_arg, uint length,
                                   CHARSET_INFO *cs= NULL):
-    Item_string(name, length, cs)
+    Item_string(name_arg, length, cs)
   {}
+  bool check_vcol_func_processor(uchar *arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor("safe_string");
+  }
 };
 
 
@@ -2327,8 +2514,8 @@ public:
 class Item_blob :public Item_partition_func_safe_string
 {
 public:
-  Item_blob(const char *name, uint length) :
-    Item_partition_func_safe_string(name, length, &my_charset_bin)
+  Item_blob(const char *name_arg, uint length) :
+    Item_partition_func_safe_string(name_arg, length, &my_charset_bin)
   { max_length= length; }
   enum Type type() const { return TYPE_HOLDER; }
   enum_field_types field_type() const { return MYSQL_TYPE_BLOB; }
@@ -2356,8 +2543,8 @@ class Item_return_int :public Item_int
   enum_field_types int_field_type;
 public:
   Item_return_int(const char *name_arg, uint length,
-		  enum_field_types field_type_arg, longlong value= 0)
-    :Item_int(name_arg, value, length), int_field_type(field_type_arg)
+		  enum_field_types field_type_arg, longlong value_arg= 0)
+    :Item_int(name_arg, value_arg, length), int_field_type(field_type_arg)
   {
     unsigned_flag=1;
   }
@@ -2388,6 +2575,7 @@ public:
   bool eq(const Item *item, bool binary_cmp) const;
   virtual Item *safe_charset_converter(CHARSET_INFO *tocs);
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *arg) { return FALSE;}
 private:
   void hex_string_init(const char *str, uint str_length);
 };
@@ -2420,6 +2608,7 @@ public:
     save_in_field(result_field, no_conversions);
   }
   void cleanup();
+  bool check_vcol_func_processor(uchar *arg) { return FALSE;}
   /*
     This method is used for debug purposes to print the name of an
     item to the debug log. The second use of this method is as
@@ -2482,6 +2671,8 @@ public:
     Item *it= ((Item *) item)->real_item();
     return ref && (*ref)->eq(it, binary_cmp);
   }
+  void save_val(Field *to);
+  void save_result(Field *to);
   double val_real();
   longlong val_int();
   my_decimal *val_decimal(my_decimal *);
@@ -2498,6 +2689,7 @@ public:
   bool send(Protocol *prot, String *tmp);
   void make_field(Send_field *field);
   bool fix_fields(THD *, Item **);
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
   int save_in_field(Field *field, bool no_conversions);
   void save_org_in_field(Field *field);
   enum Item_result result_type () const { return (*ref)->result_type(); }
@@ -2514,6 +2706,10 @@ public:
     if (!depended_from) 
       (*ref)->update_used_tables(); 
   }
+  bool const_item() const 
+  {
+    return (*ref)->const_item();
+  }
   table_map not_null_tables() const { return (*ref)->not_null_tables(); }
   void set_result_field(Field *field)	{ result_field= field; }
   bool is_result_field() { return 1; }
@@ -2526,9 +2722,22 @@ public:
     return ref ? (*ref)->real_item() : this;
   }
   bool walk(Item_processor processor, bool walk_subquery, uchar *arg)
+  { 
+    if (ref && *ref)
+      return (*ref)->walk(processor, walk_subquery, arg) ||
+             (this->*processor)(arg); 
+    else
+      return FALSE;
+  }
+  bool enumerate_field_refs_processor(uchar *arg)
+  { return (*ref)->enumerate_field_refs_processor(arg); }
+  void no_rows_in_result()
+  {
+    (*ref)->no_rows_in_result();
+  }
+  void restore_to_before_no_rows_in_result()
   {
-    return (*ref)->walk(processor, walk_subquery, arg) ||
-           (this->*processor)(arg);
+    (*ref)->restore_to_before_no_rows_in_result();
   }
   virtual void print(String *str, enum_query_type query_type);
   bool result_as_longlong()
@@ -2567,6 +2776,10 @@ public:
     if (ref && result_type() == ROW_RESULT)
       (*ref)->bring_value();
   }
+  bool check_vcol_func_processor(uchar *arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor("ref");
+  }
   bool get_time(MYSQL_TIME *ltime)
   {
     DBUG_ASSERT(fixed);
@@ -2579,7 +2792,6 @@ public:
     DBUG_ASSERT(ref);
     return (*ref)->is_outer_field();
   }
-
 };
 
 
@@ -2606,6 +2818,7 @@ public:
               alias_name_used_arg)
   {}
 
+  void save_val(Field *to);
   double val_real();
   longlong val_int();
   String *val_str(String* tmp);
@@ -2616,6 +2829,131 @@ public:
   virtual Ref_Type ref_type() { return DIRECT_REF; }
 };
 
+class Expression_cache;
+class Item_cache;
+
+
+/**
+  The objects of this class can store its values in an expression cache.
+*/
+
+class Item_cache_wrapper :public Item_result_field
+{
+private:
+  /* Pointer on the cached expression */
+  Item *orig_item;
+  Expression_cache *expr_cache;
+  /*
+    In order to put the expression into the expression cache and return
+    value of val_*() method, we will need to get the expression value twice
+    (probably in different types).  In order to avoid making two
+    (potentially costly) orig_item->val_*() calls, we store expression value
+    in this Item_cache object.
+  */
+  Item_cache *expr_value;
+
+  Item *check_cache();
+  inline void cache();
+
+public:
+  Item_cache_wrapper(Item *item_arg);
+  ~Item_cache_wrapper();
+
+  const char *func_name() const { return "<expr_cache>"; }
+  enum Type type() const { return EXPR_CACHE_ITEM; }
+  virtual Item *get_cached_item() { return orig_item; }
+
+  bool set_cache(THD *thd, List<Item*> &depends_on);
+
+  bool fix_fields(THD *thd, Item **it);
+  void fix_length_and_dec() {}
+  void cleanup();
+
+  /* Methods of getting value which should be cached in the cache */
+  void save_val(Field *to);
+  double val_real();
+  longlong val_int();
+  String *val_str(String* tmp);
+  my_decimal *val_decimal(my_decimal *);
+  bool val_bool();
+  bool is_null();
+  bool get_date(MYSQL_TIME *ltime, uint fuzzydate);
+  bool get_time(MYSQL_TIME *ltime);
+  bool send(Protocol *protocol, String *buffer)
+  {
+    if (result_field)
+      return protocol->store(result_field);
+    return Item::send(protocol, buffer);
+  }
+  void save_org_in_field(Field *field)
+  {
+    save_val(field);
+  }
+  void save_in_result_field(bool no_conversions)
+  {
+    save_val(result_field);
+  }
+  Item* get_tmp_table_item(THD *thd_arg);
+
+  /* Following methods make this item transparent as much as possible */
+
+  virtual void print(String *str, enum_query_type query_type);
+  virtual const char *full_name() const { return orig_item->full_name(); }
+  virtual void make_field(Send_field *field) { orig_item->make_field(field); }
+  bool eq(const Item *item, bool binary_cmp) const
+  {
+    Item *it= ((Item *) item)->real_item();
+    return orig_item->eq(it, binary_cmp);
+  }
+  void fix_after_pullout(st_select_lex *new_parent, Item **refptr)
+  {
+    orig_item->fix_after_pullout(new_parent, &orig_item);
+  }
+  int save_in_field(Field *to, bool no_conversions);
+  enum Item_result result_type () const { return orig_item->result_type(); }
+  enum_field_types field_type() const   { return orig_item->field_type(); }
+  table_map used_tables() const { return orig_item->used_tables(); }
+  void update_used_tables() { orig_item->update_used_tables(); }
+  bool const_item() const { return orig_item->const_item(); }
+  table_map not_null_tables() const { return orig_item->not_null_tables(); }
+  bool walk(Item_processor processor, bool walk_subquery, uchar *arg)
+  {
+    return orig_item->walk(processor, walk_subquery, arg) ||
+      (this->*processor)(arg);
+  }
+  bool enumerate_field_refs_processor(uchar *arg)
+  { return orig_item->enumerate_field_refs_processor(arg); }
+  bool result_as_longlong() { return orig_item->result_as_longlong(); }
+  Item_field *filed_for_view_update()
+  { return orig_item->filed_for_view_update(); }
+
+  /* Row emulation: forwarding of ROW-related calls to orig_item */
+  uint cols()
+  { return result_type() == ROW_RESULT ? orig_item->cols() : 1; }
+  Item* element_index(uint i)
+  { return result_type() == ROW_RESULT ? orig_item->element_index(i) : this; }
+  Item** addr(uint i)
+  { return result_type() == ROW_RESULT ? orig_item->addr(i) : 0; }
+  bool check_cols(uint c)
+  {
+    return (result_type() == ROW_RESULT ?
+            orig_item->check_cols(c) :
+            Item::check_cols(c));
+  }
+  bool null_inside()
+  { return result_type() == ROW_RESULT ? orig_item->null_inside() : 0; }
+  void bring_value()
+  {
+    if (result_type() == ROW_RESULT)
+      orig_item->bring_value();
+  }
+  bool check_vcol_func_processor(uchar *arg)
+  {
+    return trace_unsupported_by_check_vcol_func_processor("cache");
+  }
+};
+
+
 /*
   Class for view fields, the same as Item_direct_ref, but call fix_fields
   of reference if it is not called yet
@@ -2669,12 +3007,13 @@ public:
     of the outer select.
   */
   bool found_in_select_list;
+  bool found_in_group_by;
   Item_outer_ref(Name_resolution_context *context_arg,
                  Item_field *outer_field_arg)
     :Item_direct_ref(context_arg, 0, outer_field_arg->table_name,
                      outer_field_arg->field_name),
     outer_ref(outer_field_arg), in_sum_func(0),
-    found_in_select_list(0)
+    found_in_select_list(0), found_in_group_by(0)
   {
     ref= &outer_ref;
     set_properties();
@@ -2685,18 +3024,21 @@ public:
                  bool alias_name_used_arg)
     :Item_direct_ref(context_arg, item, table_name_arg, field_name_arg,
                      alias_name_used_arg),
-    outer_ref(0), in_sum_func(0), found_in_select_list(1)
+    outer_ref(0), in_sum_func(0), found_in_select_list(1), found_in_group_by(0)
   {}
   void save_in_result_field(bool no_conversions)
   {
     outer_ref->save_org_in_field(result_field);
   }
   bool fix_fields(THD *, Item **);
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
   table_map used_tables() const
   {
     return (*ref)->const_item() ? 0 : OUTER_REF_TABLE_BIT;
   }
+  table_map not_null_tables() const { return 0; }
   virtual Ref_Type ref_type() { return OUTER_REF; }
+  bool check_inner_refs_processor(uchar * arg); 
 };
 
 
@@ -2722,6 +3064,7 @@ public:
 		       const char *table_name_arg, const char *field_name_arg)
     :Item_ref(context_arg, item, table_name_arg, field_name_arg),
      owner(master) {}
+  void save_val(Field *to);
   double val_real();
   longlong val_int();
   String* val_str(String* s);
@@ -2753,7 +3096,7 @@ class Item_int_with_ref :public Item_int
 {
   Item *ref;
 public:
-  Item_int_with_ref(longlong i, Item *ref_arg, my_bool unsigned_arg) :
+  Item_int_with_ref(longlong i, Item *ref_arg, bool unsigned_arg) :
     Item_int(i), ref(ref_arg)
   {
     unsigned_flag= unsigned_arg;
@@ -2868,6 +3211,10 @@ public:
   table_map used_tables() const { return (table_map) 1L; }
   bool const_item() const { return 0; }
   bool is_null() { return null_value; }
+  bool check_vcol_func_processor(uchar *arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor("copy");
+  }
 
   /*  
     Override the methods below as pure virtual to make sure all the 
@@ -2983,10 +3330,21 @@ public:
 };
 
 
+/*
+  Cached_item_XXX objects are not exactly caches. They do the following:
+
+  Each Cached_item_XXX object has
+   - its source item
+   - saved value of the source item
+   - cmp() method that compares the saved value with the current value of the
+     source item, and if they were not equal saves item's value into the saved
+     value.
+*/
+
 class Cached_item :public Sql_alloc
 {
 public:
-  my_bool null_value;
+  bool null_value;
   Cached_item() :null_value(0) {}
   virtual bool cmp(void)=0;
   virtual ~Cached_item(); /*line -e1509 */
@@ -3039,9 +3397,10 @@ class Cached_item_field :public Cached_item
   uint length;
 
 public:
-  Cached_item_field(Item_field *item)
+  Cached_item_field(Field *arg_field) : field(arg_field)
   {
-    field= item->field;
+    field= arg_field;
+    /* TODO: take the memory allocation below out of the constructor. */
     buff= (uchar*) sql_calloc(length=field->pack_length());
   }
   bool cmp(void);
@@ -3111,6 +3470,10 @@ public:
     return arg->walk(processor, walk_subquery, args) ||
 	    (this->*processor)(args);
   }
+  bool check_vcol_func_processor(uchar *arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor("values");
+  }
 };
 
 
@@ -3195,6 +3558,10 @@ private:
     BEFORE INSERT of BEFORE UPDATE trigger.
   */
   bool read_only;
+  virtual bool check_vcol_func_processor(uchar *arg)
+  {
+    return trace_unsupported_by_check_vcol_func_processor("trigger");
+  }
 };
 
 
@@ -3225,7 +3592,8 @@ public:
     cached_field_type(MYSQL_TYPE_STRING),
     value_cached(0)
   {
-    fixed= 1; 
+    fixed= 1;
+    maybe_null= 1;
     null_value= 1;
   }
   Item_cache(enum_field_types field_type_arg):
@@ -3234,6 +3602,7 @@ public:
     value_cached(0)
   {
     fixed= 1;
+    maybe_null= 1;
     null_value= 1;
   }
 
@@ -3266,6 +3635,10 @@ public:
   {
     return this == item;
   }
+  bool check_vcol_func_processor(uchar *arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor("cache");
+  }
   /**
      Check if saved item has a non-NULL value.
      Will cache value of saved item if not already done. 
@@ -3300,7 +3673,7 @@ public:
     Item_cache(field_type_arg), value(0) {}
 
   virtual void store(Item *item){ Item_cache::store(item); }
-  void store(Item *item, longlong val_arg);
+  void store_longlong(Item *item, longlong val_arg);
   double val_real();
   longlong val_int();
   String* val_str(String *str);
@@ -3515,11 +3888,12 @@ void mark_select_range_as_dependent(THD *thd,
                                     Field *found_field, Item *found_item,
                                     Item_ident *resolved_item);
 
-extern Cached_item *new_Cached_item(THD *thd, Item *item);
+extern Cached_item *new_Cached_item(THD *thd, Item *item,
+                                    bool pass_through_ref);
 extern Item_result item_cmp_type(Item_result a,Item_result b);
 extern void resolve_const_item(THD *thd, Item **ref, Item *cmp_item);
 extern int stored_field_cmp_to_item(THD *thd, Field *field, Item *item);
 
 extern const String my_null_string;
 
-#endif /* ITEM_INCLUDED */
+#endif /* SQL_ITEM_INCLUDED */
diff --git a/sql/item_buff.cc b/sql/item_buff.cc
index 87197c7d9b6..b0dbadcfda2 100644
--- a/sql/item_buff.cc
+++ b/sql/item_buff.cc
@@ -34,11 +34,15 @@
   Create right type of Cached_item for an item.
 */
 
-Cached_item *new_Cached_item(THD *thd, Item *item)
+Cached_item *new_Cached_item(THD *thd, Item *item, bool pass_through_ref)
 {
-  if (item->real_item()->type() == Item::FIELD_ITEM &&
+  if (pass_through_ref && item->real_item()->type() == Item::FIELD_ITEM &&
       !(((Item_field *) (item->real_item()))->field->flags & BLOB_FLAG))
-    return new Cached_item_field((Item_field *) (item->real_item()));
+  {
+    Item_field *real_item= (Item_field *) item->real_item();
+    Field *cached_field= real_item->field;
+    return new Cached_item_field(cached_field);
+  }
   switch (item->result_type()) {
   case STRING_RESULT:
     return new Cached_item_str(thd, (Item_field *) item);
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
index b7aad733e67..e8cf6e97a28 100644
--- a/sql/item_cmpfunc.cc
+++ b/sql/item_cmpfunc.cc
@@ -827,7 +827,6 @@ Arg_comparator::can_compare_as_dates(Item *a, Item *b, ulonglong *const_value)
   return cmp_type;
 }
 
-
 /*
   Retrieves correct TIME value from the given item.
 
@@ -881,10 +880,15 @@ get_time_value(THD *thd, Item ***item_arg, Item **cache_arg,
       (item->type() != Item::FUNC_ITEM ||
        ((Item_func*)item)->functype() != Item_func::GUSERVAR_FUNC))
   {
+    Query_arena backup;
+    Query_arena *save_arena= thd->switch_to_arena_for_cached_items(&backup);
     Item_cache_int *cache= new Item_cache_int();
+    if (save_arena)
+      thd->set_query_arena(save_arena);
+
     /* Mark the cache as non-const to prevent re-caching. */
     cache->set_used_tables(1);
-    cache->store(item, value);
+    cache->store_longlong(item, value);
     *cache_arg= cache;
     *item_arg= cache_arg;
   }
@@ -896,7 +900,6 @@ int Arg_comparator::set_cmp_func(Item_result_field *owner_arg,
                                         Item **a1, Item **a2,
                                         Item_result type)
 {
-  enum enum_date_cmp_type cmp_type;
   ulonglong const_value= (ulonglong)-1;
   thd= current_thd;
   owner= owner_arg;
@@ -905,7 +908,7 @@ int Arg_comparator::set_cmp_func(Item_result_field *owner_arg,
   b= a2;
   thd= current_thd;
 
-  if ((cmp_type= can_compare_as_dates(*a, *b, &const_value)))
+  if (can_compare_as_dates(*a, *b, &const_value))
   {
     a_type= (*a)->field_type();
     b_type= (*b)->field_type();
@@ -918,18 +921,23 @@ int Arg_comparator::set_cmp_func(Item_result_field *owner_arg,
         cache_converted_constant can't be used here because it can't
         correctly convert a DATETIME value from string to int representation.
       */
+      Query_arena backup;
+      Query_arena *save_arena= thd->switch_to_arena_for_cached_items(&backup);
       Item_cache_int *cache= new Item_cache_int();
+      if (save_arena)
+        thd->set_query_arena(save_arena);
+
       /* Mark the cache as non-const to prevent re-caching. */
       cache->set_used_tables(1);
       if (!(*a)->is_datetime())
       {
-        cache->store((*a), const_value);
+        cache->store_longlong((*a), const_value);
         a_cache= cache;
         a= (Item **)&a_cache;
       }
       else
       {
-        cache->store((*b), const_value);
+        cache->store_longlong((*b), const_value);
         b_cache= cache;
         b= (Item **)&b_cache;
       }
@@ -1155,10 +1163,15 @@ get_datetime_value(THD *thd, Item ***item_arg, Item **cache_arg,
       (item->type() != Item::FUNC_ITEM ||
        ((Item_func*)item)->functype() != Item_func::GUSERVAR_FUNC))
   {
+    Query_arena backup;
+    Query_arena *save_arena= thd->switch_to_arena_for_cached_items(&backup);
     Item_cache_int *cache= new Item_cache_int(MYSQL_TYPE_DATETIME);
+    if (save_arena)
+      thd->set_query_arena(save_arena);
+      
     /* Mark the cache as non-const to prevent re-caching. */
     cache->set_used_tables(1);
-    cache->store(item, value);
+    cache->store_longlong(item, value);
     *cache_arg= cache;
     *item_arg= cache_arg;
   }
@@ -1771,6 +1784,52 @@ bool Item_in_optimizer::fix_fields(THD *thd, Item **ref)
 
 
 /**
+  Add an expression cache for this subquery if it is needed
+
+  @param thd_arg         Thread handle
+
+  @details
+  The function checks whether an expression cache is needed for this item
+  and if if so wraps the item into an item of the class
+  Item_exp_cache_wrapper with an appropriate expression cache set up there.
+
+  @note
+  used from Item::transform()
+
+  @return
+  new wrapper item if an expression cache is needed,
+  this item - otherwise
+*/
+
+Item *Item_in_optimizer::expr_cache_insert_transformer(uchar *thd_arg)
+{
+  THD *thd= (THD*) thd_arg;
+  DBUG_ENTER("Item_in_optimizer::expr_cache_insert_transformer");
+  List<Item*> &depends_on= ((Item_subselect *)args[1])->depends_on;
+
+  if (expr_cache)
+    DBUG_RETURN(expr_cache);
+
+  /* Add left expression to the list of the parameters of the subquery */
+  if (args[0]->cols() == 1)
+    depends_on.push_front((Item**)args);
+  else
+  {
+    for (uint i= 0; i < args[0]->cols(); i++)
+    {
+      depends_on.push_front(args[0]->addr(i));
+    }
+  }
+
+  if (args[1]->expr_cache_is_needed(thd) &&
+      (expr_cache= set_expr_cache(thd, depends_on)))
+    DBUG_RETURN(expr_cache);
+
+  depends_on.pop();
+  DBUG_RETURN(this);
+}
+
+/**
    The implementation of optimized \<outer expression\> [NOT] IN \<subquery\>
    predicates. The implementation works as follows.
 
@@ -1839,7 +1898,8 @@ bool Item_in_optimizer::fix_fields(THD *thd, Item **ref)
 
      @see Item_in_subselect::val_bool()
      @see Item_is_not_null_test::val_int()
- */
+*/
+
 longlong Item_in_optimizer::val_int()
 {
   bool tmp;
@@ -1941,6 +2001,7 @@ void Item_in_optimizer::cleanup()
   Item_bool_func::cleanup();
   if (!save_cache)
     cache= 0;
+  expr_cache= 0;
   DBUG_VOID_RETURN;
 }
 
@@ -1952,6 +2013,70 @@ bool Item_in_optimizer::is_null()
 }
 
 
+/**
+  Transform an Item_in_optimizer and its arguments with a callback function.
+
+  @param transformer the transformer callback function to be applied to the
+         nodes of the tree of the object
+  @param parameter to be passed to the transformer
+
+  @detail
+    Recursively transform the left and the right operand of this Item. The
+    Right operand is an Item_in_subselect or its subclass. To avoid the
+    creation of new Items, we use the fact the the left operand of the
+    Item_in_subselect is the same as the one of 'this', so instead of
+    transforming its operand, we just assign the left operand of the
+    Item_in_subselect to be equal to the left operand of 'this'.
+    The transformation is not applied further to the subquery operand
+    if the IN predicate.
+
+  @returns
+    @retval pointer to the transformed item
+    @retval NULL if an error occurred
+*/
+
+Item *Item_in_optimizer::transform(Item_transformer transformer, uchar *argument)
+{
+  Item *new_item;
+
+  DBUG_ASSERT(!current_thd->is_stmt_prepare());
+  DBUG_ASSERT(arg_count == 2);
+
+  /* Transform the left IN operand. */
+  new_item= (*args)->transform(transformer, argument);
+  if (!new_item)
+    return 0;
+  /*
+    THD::change_item_tree() should be called only if the tree was
+    really transformed, i.e. when a new item has been created.
+    Otherwise we'll be allocating a lot of unnecessary memory for
+    change records at each execution.
+  */
+  if ((*args) != new_item)
+    current_thd->change_item_tree(args, new_item);
+
+  /*
+    Transform the right IN operand which should be an Item_in_subselect or a
+    subclass of it. The left operand of the IN must be the same as the left
+    operand of this Item_in_optimizer, so in this case there is no further
+    transformation, we only make both operands the same.
+    TODO: is it the way it should be?
+  */
+  DBUG_ASSERT((args[1])->type() == Item::SUBSELECT_ITEM &&
+              (((Item_subselect*)(args[1]))->substype() ==
+               Item_subselect::IN_SUBS ||
+               ((Item_subselect*)(args[1]))->substype() ==
+               Item_subselect::ALL_SUBS ||
+               ((Item_subselect*)(args[1]))->substype() ==
+               Item_subselect::ANY_SUBS));
+
+  Item_in_subselect *in_arg= (Item_in_subselect*)args[1];
+  in_arg->left_expr= args[0];
+
+  return (this->*transformer)(argument);
+}
+
+
 longlong Item_func_eq::val_int()
 {
   DBUG_ASSERT(fixed == 1);
@@ -3049,6 +3174,16 @@ void Item_func_case::fix_length_and_dec()
     nagg++;
     if (!(found_types= collect_cmp_types(agg, nagg)))
       return;
+    if (with_sum_func || current_thd->lex->current_select->group_list.elements)
+    {
+      /*
+        See TODO commentary in the setup_copy_fields function:
+        item in a group may be wrapped with an Item_copy_string item.
+        That item has a STRING_RESULT result type, so we need
+        to take this type into account.
+      */
+      found_types |= (1 << item_cmp_type(left_result_type, STRING_RESULT));
+    }
 
     for (i= 0; i <= (uint)DECIMAL_RESULT; i++)
     {
@@ -3085,9 +3220,8 @@ void Item_func_case::fix_length_and_dec()
       agg_num_lengths(args[i + 1]);
     if (else_expr_num != -1) 
       agg_num_lengths(args[else_expr_num]);
-    max_length= my_decimal_precision_to_length_no_truncation(max_length +
-                                                             decimals, decimals,
-                                                             unsigned_flag);
+    max_length= my_decimal_precision_to_length(max_length + decimals, decimals,
+                                               unsigned_flag);
   }
 }
 
@@ -3323,19 +3457,19 @@ int cmp_longlong(void *cmp_arg,
       One of the args is unsigned and is too big to fit into the 
       positive signed range. Report no match.
     */  
-    if ((a->unsigned_flag && ((ulonglong) a->val) > (ulonglong) LONGLONG_MAX) ||
+    if ((a->unsigned_flag && ((ulonglong) a->val) > (ulonglong) LONGLONG_MAX)
+        ||
         (b->unsigned_flag && ((ulonglong) b->val) > (ulonglong) LONGLONG_MAX))
       return a->unsigned_flag ? 1 : -1;
     /*
       Although the signedness differs both args can fit into the signed 
       positive range. Make them signed and compare as usual.
     */  
-    return cmp_longs (a->val, b->val);
+    return cmp_longs(a->val, b->val);
   }
   if (a->unsigned_flag)
-    return cmp_ulongs ((ulonglong) a->val, (ulonglong) b->val);
-  else
-    return cmp_longs (a->val, b->val);
+    return cmp_ulongs((ulonglong) a->val, (ulonglong) b->val);
+  return cmp_longs(a->val, b->val);
 }
 
 static int cmp_double(void *cmp_arg, double *a,double *b)
@@ -4201,9 +4335,13 @@ Item_cond::fix_fields(THD *thd, Item **ref)
   DBUG_ASSERT(fixed == 0);
   List_iterator<Item> li(list);
   Item *item;
+  TABLE_LIST *save_emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest;
   uchar buff[sizeof(char*)];			// Max local vars in function
   not_null_tables_cache= used_tables_cache= 0;
   const_item_cache= 1;
+
+  if (functype() != COND_AND_FUNC)
+    thd->thd_marker.emb_on_expr_nest= NULL;
   /*
     and_table_cache is the value that Item_cond_or() returns for
     not_null_tables()
@@ -4262,11 +4400,45 @@ Item_cond::fix_fields(THD *thd, Item **ref)
       maybe_null=1;
   }
   thd->lex->current_select->cond_count+= list.elements;
+  thd->thd_marker.emb_on_expr_nest= save_emb_on_expr_nest;
   fix_length_and_dec();
   fixed= 1;
   return FALSE;
 }
 
+
+void Item_cond::fix_after_pullout(st_select_lex *new_parent, Item **ref)
+{
+  List_iterator<Item> li(list);
+  Item *item;
+
+  used_tables_cache=0;
+  const_item_cache=1;
+
+  and_tables_cache= ~(table_map) 0; // Here and below we do as fix_fields does
+  not_null_tables_cache= 0;
+
+  while ((item=li++))
+  {
+    table_map tmp_table_map;
+    item->fix_after_pullout(new_parent, li.ref());
+    item= *li.ref();
+    used_tables_cache|= item->used_tables();
+    const_item_cache&= item->const_item();
+
+    if (item->const_item())
+      and_tables_cache= (table_map) 0;
+    else
+    {
+      tmp_table_map= item->not_null_tables();
+      not_null_tables_cache|= tmp_table_map;
+      and_tables_cache&= tmp_table_map;
+      const_item_cache= FALSE;
+    }  
+  }
+}
+
+
 bool Item_cond::walk(Item_processor processor, bool walk_subquery, uchar *arg)
 {
   List_iterator_fast<Item> li(list);
@@ -4761,8 +4933,7 @@ bool Item_func_like::fix_fields(THD *thd, Item **ref)
       We could also do boyer-more for non-const items, but as we would have to
       recompute the tables for each row it's not worth it.
     */
-    if (args[1]->const_item() && !use_strnxfrm(collation.collation) &&
-       !(specialflag & SPECIAL_NO_NEW_FUNC))
+    if (args[1]->const_item() && !use_strnxfrm(collation.collation))
     {
       String* res2 = args[1]->val_str(&cmp.value2);
       if (!res2)
@@ -5523,33 +5694,7 @@ void Item_equal::merge(Item_equal *item)
 
 void Item_equal::sort(Item_field_cmpfunc compare, void *arg)
 {
-  bool swap;
-  List_iterator<Item_field> it(fields);
-  do
-  {
-    Item_field *item1= it++;
-    Item_field **ref1= it.ref();
-    Item_field *item2;
-
-    swap= FALSE;
-    while ((item2= it++))
-    {
-      Item_field **ref2= it.ref();
-      if (compare(item1, item2, arg) < 0)
-      {
-        Item_field *item= *ref1;
-        *ref1= *ref2;
-        *ref2= item;
-        swap= TRUE;
-      }
-      else
-      {
-        item1= item2;
-        ref1= ref2;
-      }
-    }
-    it.rewind();
-  } while (swap);
+  exchange_sort<Item_field>(&fields, compare, arg);
 }
 
 
@@ -5618,6 +5763,7 @@ void Item_equal::update_used_tables()
   not_null_tables_cache= used_tables_cache= 0;
   if ((const_item_cache= cond_false))
     return;
+  const_item_cache= 1;
   while ((item=li++))
   {
     item->update_used_tables();
@@ -5634,7 +5780,7 @@ longlong Item_equal::val_int()
     return 0;
   List_iterator_fast<Item_field> it(fields);
   Item *item= const_item ? const_item : it++;
-  if ((null_value= item->null_value))
+  if ((null_value= item->is_null()))
     return 0;
   eval_item->store_value(item);
   while ((item_field= it++))
@@ -5642,7 +5788,7 @@ longlong Item_equal::val_int()
     /* Skip fields of non-const tables. They haven't been read yet */
     if (item_field->field->table->const_table)
     {
-      if ((null_value= item_field->null_value) || eval_item->cmp(item_field))
+      if ((null_value= item_field->is_null()) || eval_item->cmp(item_field))
         return 0;
     }
   }
@@ -5651,7 +5797,7 @@ longlong Item_equal::val_int()
 
 void Item_equal::fix_length_and_dec()
 {
-  Item *item= get_first();
+  Item *item= get_first(NULL);
   eval_item= cmp_item::get_comparator(item->result_type(),
                                       item->collation.collation);
 }
@@ -5714,3 +5860,127 @@ void Item_equal::print(String *str, enum_query_type query_type)
   str->append(')');
 }
 
+
+/*
+  @brief Get the first equal field of multiple equality.
+  @param[in] field   the field to get equal field to
+
+  @details Get the first field of multiple equality that is equal to the
+  given field. In order to make semi-join materialization strategy work
+  correctly we can't propagate equal fields from upper select to a
+  materialized semi-join.
+  Thus the fields is returned according to following rules:
+
+  1) If the given field belongs to a semi-join then the first field in
+     multiple equality which belong to the same semi-join is returned.
+     Otherwise NULL is returned.
+  2) If the given field doesn't belong to a semi-join then
+     the first field in the multiple equality that doesn't belong to any
+     semi-join is returned.
+     If all fields in the equality are belong to semi-join(s) then NULL
+     is returned.
+  3) If no field is given then the first field in the multiple equality
+     is returned without regarding whether it belongs to a semi-join or not.
+
+  @retval Found first field in the multiple equality.
+  @retval 0 if no field found.
+*/
+
+Item_field* Item_equal::get_first(Item_field *field)
+{
+  List_iterator<Item_field> it(fields);
+  Item_field *item;
+  JOIN_TAB *field_tab;
+  if (!field)
+    return fields.head();
+
+  /*
+    Of all equal fields, return the first one we can use. Normally, this is the
+    field which belongs to the table that is the first in the join order.
+
+    There is one exception to this: When semi-join materialization strategy is
+    used, and the given field belongs to a table within the semi-join nest, we
+    must pick the first field in the semi-join nest.
+
+    Example: suppose we have a join order:
+
+       ot1 ot2  SJ-Mat(it1  it2  it3)  ot3
+
+    and equality ot2.col = it1.col = it2.col
+    If we're looking for best substitute for 'it2.col', we should pick it1.col
+    and not ot2.col.
+    
+    eliminate_item_equal() also has code that deals with equality substitution
+    in presense of SJM nests.
+  */
+
+  field_tab= field->field->table->reginfo.join_tab;
+
+  TABLE_LIST *emb_nest= field->field->table->pos_in_table_list->embedding;
+
+  if (emb_nest && emb_nest->sj_mat_info && emb_nest->sj_mat_info->is_used)
+  {
+    /*
+      It's a field from an materialized semi-join. We can substitute it only
+      for a field from the same semi-join.
+    */
+    JOIN_TAB *first;
+    JOIN *join= field_tab->join;
+    int tab_idx= field_tab - field_tab->join->join_tab;
+
+    /* Find the first table of this semi-join nest */
+    for (int i= tab_idx; i >= (int)join->const_tables; i--)
+    {
+      if (join->join_tab[i].table->map & emb_nest->sj_inner_tables)
+        first= join->join_tab + i;
+      else
+        // Found first tab that doesn't belong to current SJ.
+        break;
+    }
+    /* Find an item to substitute for. */
+    while ((item= it++))
+    {
+      if (item->field->table->reginfo.join_tab >= first)
+      {
+        /*
+          If we found given field then return NULL to avoid unnecessary
+          substitution.
+        */
+        return (item != field) ? item : NULL;
+      }
+    }
+  }
+  else
+  {
+#if 0    
+    /*
+      The field is not in SJ-Materialization nest. We must return the first
+      field that's not embedded in a SJ-Materialization nest.
+      Example: suppose we have a join order:
+
+          SJ-Mat(it1  it2)  ot1  ot2
+
+      and equality ot2.col = ot1.col = it2.col
+      If we're looking for best substitute for 'ot2.col', we should pick ot1.col
+      and not it2.col, because when we run a join between ot1 and ot2
+      execution of SJ-Mat(...) has already finished and we can't rely on the
+      value of it*.*.
+      psergey-fix-fix: ^^ THAT IS INCORRECT ^^. Pick the first, whatever that
+      is.
+    */
+    while ((item= it++))
+    {
+      TABLE_LIST *emb_nest= item->field->table->pos_in_table_list->embedding;
+      if (!emb_nest || !emb_nest->sj_mat_info || 
+          !emb_nest->sj_mat_info->is_used)
+      {
+        return item;
+      }
+    }
+#endif
+    return fields.head();
+  }
+  // Shouldn't get here.
+  DBUG_ASSERT(0);
+  return NULL;
+}
diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h
index f9851011563..fd136cfbe0d 100644
--- a/sql/item_cmpfunc.h
+++ b/sql/item_cmpfunc.h
@@ -237,7 +237,7 @@ public:
 
 
 class Item_cache;
-#define UNKNOWN ((my_bool)-1)
+#define UNKNOWN (-1)
 
 
 /*
@@ -259,6 +259,7 @@ class Item_in_optimizer: public Item_bool_func
 {
 protected:
   Item_cache *cache;
+  Item *expr_cache;
   bool save_cache;
   /* 
     Stores the value of "NULL IN (SELECT ...)" for uncorrelated subqueries:
@@ -266,10 +267,10 @@ protected:
       FALSE   - result is FALSE
       TRUE    - result is NULL
   */
-  my_bool result_for_null_param;
+  int result_for_null_param;
 public:
   Item_in_optimizer(Item *a, Item_in_subselect *b):
-    Item_bool_func(a, reinterpret_cast<Item *>(b)), cache(0),
+    Item_bool_func(a, reinterpret_cast<Item *>(b)), cache(0), expr_cache(0),
     save_cache(0), result_for_null_param(UNKNOWN)
   {}
   bool fix_fields(THD *, Item **);
@@ -280,6 +281,8 @@ public:
   const char *func_name() const { return "<in_optimizer>"; }
   Item_cache **get_cache() { return &cache; }
   void keep_top_level_cache();
+  Item *transform(Item_transformer transformer, uchar *arg);
+  virtual Item *expr_cache_insert_transformer(uchar *thd_arg);
 };
 
 class Comp_creator
@@ -387,6 +390,7 @@ public:
   CHARSET_INFO *compare_collation() { return cmp.cmp_collation.collation; }
   uint decimal_precision() const { return 1; }
   void top_level_item() { abort_on_null= TRUE; }
+  Arg_comparator *get_comparator() { return &cmp; }
   void cleanup()
   {
     Item_int_func::cleanup();
@@ -503,13 +507,23 @@ public:
 class Item_func_eq :public Item_bool_rowready_func2
 {
 public:
-  Item_func_eq(Item *a,Item *b) :Item_bool_rowready_func2(a,b) {}
+  Item_func_eq(Item *a,Item *b) :
+    Item_bool_rowready_func2(a,b), in_equality_no(UINT_MAX)
+  {}
   longlong val_int();
   enum Functype functype() const { return EQ_FUNC; }
   enum Functype rev_functype() const { return EQ_FUNC; }
   cond_result eq_cmp_result() const { return COND_TRUE; }
   const char *func_name() const { return "="; }
   Item *negated_item();
+  /* 
+    - If this equality is created from the subquery's IN-equality:
+      number of the item it was created from, e.g. for
+       (a,b) IN (SELECT c,d ...)  a=c will have in_equality_no=0, 
+       and b=d will have in_equality_no=1.
+    - Otherwise, UINT_MAX
+  */
+  uint in_equality_no;
 };
 
 class Item_func_equal :public Item_bool_rowready_func2
@@ -679,7 +693,7 @@ struct interval_range
 class Item_func_interval :public Item_int_func
 {
   Item_row *row;
-  my_bool use_decimal_comparison;
+  bool use_decimal_comparison;
   interval_range *intervals;
 public:
   Item_func_interval(Item_row *a)
@@ -804,7 +818,7 @@ public:
   virtual uchar *get_value(Item *item)=0;
   void sort()
   {
-    my_qsort2(base,used_count,size,compare,collation);
+    my_qsort2(base,used_count,size,compare,(void*)collation);
   }
   int find(Item *item);
   
@@ -887,7 +901,7 @@ public:
   void value_to_item(uint pos, Item *item)
   {
     ((Item_int*) item)->value= ((packed_longlong*) base)[pos].val;
-    ((Item_int*) item)->unsigned_flag= (my_bool)
+    ((Item_int*) item)->unsigned_flag= (bool)
       ((packed_longlong*) base)[pos].unsigned_flag;
   }
   Item_result result_type() { return INT_RESULT; }
@@ -1502,6 +1516,7 @@ public:
     list.prepand(nlist);
   }
   bool fix_fields(THD *, Item **ref);
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
 
   enum Type type() const { return COND_ITEM; }
   List<Item>* argument_list() { return &list; }
@@ -1621,7 +1636,8 @@ public:
   void add(Item_field *f);
   uint members();
   bool contains(Field *field);
-  Item_field* get_first() { return fields.head(); }
+  Item_field* get_first(Item_field *field);
+  uint n_fields() { return fields.elements; }
   void merge(Item_equal *item);
   void update_const();
   enum Functype functype() const { return MULT_EQUAL_FUNC; }
@@ -1638,6 +1654,9 @@ public:
   virtual void print(String *str, enum_query_type query_type);
   CHARSET_INFO *compare_collation() 
   { return fields.head()->collation.collation; }
+  friend Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels,
+                           Item_equal *item_equal);
+  friend bool setup_sj_materialization(struct st_join_table *tab);
 }; 
 
 class COND_EQUAL: public Sql_alloc
@@ -1744,14 +1763,34 @@ inline bool is_cond_or(Item *item)
 class Item_cond_xor :public Item_cond
 {
 public:
-  Item_cond_xor() :Item_cond() {}
-  Item_cond_xor(Item *i1,Item *i2) :Item_cond(i1,i2) {}
+  Item_cond_xor(Item *i1,Item *i2) :Item_cond(i1,i2) 
+  {
+    /* 
+      Items must be stored in args[] as well because this Item_cond is
+      treated as a FUNC_ITEM (see type()). I.e., users of it will get
+      it's children by calling arguments(), not argument_list(). This
+      is a temporary solution until XOR is optimized and treated like
+      a full Item_cond citizen.
+     */
+    arg_count= 2;
+    args= tmp_arg;
+    args[0]= i1; 
+    args[1]= i2;
+  }
   enum Functype functype() const { return COND_XOR_FUNC; }
   /* TODO: remove the next line when implementing XOR optimization */
   enum Type type() const { return FUNC_ITEM; }
   longlong val_int();
   const char *func_name() const { return "xor"; }
   void top_level_item() {}
+  /* Since child Items are stored in args[], Items cannot be added.
+     However, since Item_cond_xor is treated as a FUNC_ITEM (see
+     type()), the methods below should never be called. 
+  */
+  bool add(Item *item) { DBUG_ASSERT(FALSE); return FALSE; }
+  bool add_at_head(Item *item) { DBUG_ASSERT(FALSE); return FALSE; }
+  bool add_at_head(List<Item> *nlist) { DBUG_ASSERT(FALSE); return FALSE; }
+  void copy_andor_arguments(THD *thd, Item_cond *item) { DBUG_ASSERT(FALSE); }
 };
 
 
diff --git a/sql/item_create.cc b/sql/item_create.cc
index 672e59986d5..7bddc29b812 100644
--- a/sql/item_create.cc
+++ b/sql/item_create.cc
@@ -82,7 +82,7 @@ public:
     @param thd The current thread
     @return An item representing the function call
   */
-  virtual Item *create(THD *thd) = 0;
+  virtual Item *create_builder(THD *thd) = 0;
 
 protected:
   /** Constructor. */
@@ -107,7 +107,7 @@ public:
     @param arg1 The first argument of the function
     @return An item representing the function call
   */
-  virtual Item *create(THD *thd, Item *arg1) = 0;
+  virtual Item *create_1_arg(THD *thd, Item *arg1) = 0;
 
 protected:
   /** Constructor. */
@@ -133,7 +133,7 @@ public:
     @param arg2 The second argument of the function
     @return An item representing the function call
   */
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2) = 0;
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2) = 0;
 
 protected:
   /** Constructor. */
@@ -160,7 +160,7 @@ public:
     @param arg3 The third argument of the function
     @return An item representing the function call
   */
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2, Item *arg3) = 0;
+  virtual Item *create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3) = 0;
 
 protected:
   /** Constructor. */
@@ -177,8 +177,8 @@ protected:
 class Create_sp_func : public Create_qfunc
 {
 public:
-  virtual Item *create(THD *thd, LEX_STRING db, LEX_STRING name,
-                       bool use_explicit_name, List<Item> *item_list);
+  virtual Item *create_with_db(THD *thd, LEX_STRING db, LEX_STRING name,
+                               bool use_explicit_name, List<Item> *item_list);
 
   static Create_sp_func s_singleton;
 
@@ -223,7 +223,7 @@ protected:
 class Create_func_abs : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_abs s_singleton;
 
@@ -236,7 +236,7 @@ protected:
 class Create_func_acos : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_acos s_singleton;
 
@@ -249,7 +249,7 @@ protected:
 class Create_func_addtime : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_addtime s_singleton;
 
@@ -262,7 +262,7 @@ protected:
 class Create_func_aes_encrypt : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_aes_encrypt s_singleton;
 
@@ -275,7 +275,7 @@ protected:
 class Create_func_aes_decrypt : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_aes_decrypt s_singleton;
 
@@ -289,7 +289,7 @@ protected:
 class Create_func_area : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_area s_singleton;
 
@@ -304,7 +304,7 @@ protected:
 class Create_func_as_wkb : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_as_wkb s_singleton;
 
@@ -319,7 +319,7 @@ protected:
 class Create_func_as_wkt : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_as_wkt s_singleton;
 
@@ -333,7 +333,7 @@ protected:
 class Create_func_asin : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_asin s_singleton;
 
@@ -359,7 +359,7 @@ protected:
 class Create_func_benchmark : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_benchmark s_singleton;
 
@@ -372,7 +372,7 @@ protected:
 class Create_func_bin : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_bin s_singleton;
 
@@ -385,7 +385,7 @@ protected:
 class Create_func_bit_count : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_bit_count s_singleton;
 
@@ -398,7 +398,7 @@ protected:
 class Create_func_bit_length : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_bit_length s_singleton;
 
@@ -411,7 +411,7 @@ protected:
 class Create_func_ceiling : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_ceiling s_singleton;
 
@@ -425,7 +425,7 @@ protected:
 class Create_func_centroid : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_centroid s_singleton;
 
@@ -439,7 +439,7 @@ protected:
 class Create_func_char_length : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_char_length s_singleton;
 
@@ -452,7 +452,7 @@ protected:
 class Create_func_coercibility : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_coercibility s_singleton;
 
@@ -465,7 +465,7 @@ protected:
 class Create_func_compress : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_compress s_singleton;
 
@@ -504,7 +504,7 @@ protected:
 class Create_func_connection_id : public Create_func_arg0
 {
 public:
-  virtual Item *create(THD *thd);
+  virtual Item *create_builder(THD *thd);
 
   static Create_func_connection_id s_singleton;
 
@@ -518,7 +518,7 @@ protected:
 class Create_func_contains : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_contains s_singleton;
 
@@ -532,7 +532,7 @@ protected:
 class Create_func_conv : public Create_func_arg3
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2, Item *arg3);
+  virtual Item *create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3);
 
   static Create_func_conv s_singleton;
 
@@ -545,7 +545,7 @@ protected:
 class Create_func_convert_tz : public Create_func_arg3
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2, Item *arg3);
+  virtual Item *create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3);
 
   static Create_func_convert_tz s_singleton;
 
@@ -558,7 +558,7 @@ protected:
 class Create_func_cos : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_cos s_singleton;
 
@@ -571,7 +571,7 @@ protected:
 class Create_func_cot : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_cot s_singleton;
 
@@ -584,7 +584,7 @@ protected:
 class Create_func_crc32 : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_crc32 s_singleton;
 
@@ -598,7 +598,7 @@ protected:
 class Create_func_crosses : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_crosses s_singleton;
 
@@ -612,7 +612,7 @@ protected:
 class Create_func_date_format : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_date_format s_singleton;
 
@@ -625,7 +625,7 @@ protected:
 class Create_func_datediff : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_datediff s_singleton;
 
@@ -638,7 +638,7 @@ protected:
 class Create_func_dayname : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_dayname s_singleton;
 
@@ -651,7 +651,7 @@ protected:
 class Create_func_dayofmonth : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_dayofmonth s_singleton;
 
@@ -664,7 +664,7 @@ protected:
 class Create_func_dayofweek : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_dayofweek s_singleton;
 
@@ -677,7 +677,7 @@ protected:
 class Create_func_dayofyear : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_dayofyear s_singleton;
 
@@ -690,7 +690,7 @@ protected:
 class Create_func_decode : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_decode s_singleton;
 
@@ -703,7 +703,7 @@ protected:
 class Create_func_degrees : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_degrees s_singleton;
 
@@ -743,7 +743,7 @@ protected:
 class Create_func_dimension : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_dimension s_singleton;
 
@@ -758,7 +758,7 @@ protected:
 class Create_func_disjoint : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_disjoint s_singleton;
 
@@ -785,7 +785,7 @@ protected:
 class Create_func_encode : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_encode s_singleton;
 
@@ -812,7 +812,7 @@ protected:
 class Create_func_endpoint : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_endpoint s_singleton;
 
@@ -827,7 +827,7 @@ protected:
 class Create_func_envelope : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_envelope s_singleton;
 
@@ -842,7 +842,7 @@ protected:
 class Create_func_equals : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_equals s_singleton;
 
@@ -856,7 +856,7 @@ protected:
 class Create_func_exp : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_exp s_singleton;
 
@@ -883,7 +883,7 @@ protected:
 class Create_func_exteriorring : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_exteriorring s_singleton;
 
@@ -910,7 +910,7 @@ protected:
 class Create_func_find_in_set : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_find_in_set s_singleton;
 
@@ -923,7 +923,7 @@ protected:
 class Create_func_floor : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_floor s_singleton;
 
@@ -949,7 +949,7 @@ protected:
 class Create_func_found_rows : public Create_func_arg0
 {
 public:
-  virtual Item *create(THD *thd);
+  virtual Item *create_builder(THD *thd);
 
   static Create_func_found_rows s_singleton;
 
@@ -962,7 +962,7 @@ protected:
 class Create_func_from_days : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_from_days s_singleton;
 
@@ -1019,7 +1019,7 @@ protected:
 class Create_func_geometry_type : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_geometry_type s_singleton;
 
@@ -1034,7 +1034,7 @@ protected:
 class Create_func_geometryn : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_geometryn s_singleton;
 
@@ -1048,7 +1048,7 @@ protected:
 class Create_func_get_lock : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_get_lock s_singleton;
 
@@ -1062,7 +1062,7 @@ protected:
 class Create_func_glength : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_glength s_singleton;
 
@@ -1089,7 +1089,7 @@ protected:
 class Create_func_hex : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_hex s_singleton;
 
@@ -1102,7 +1102,7 @@ protected:
 class Create_func_ifnull : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_ifnull s_singleton;
 
@@ -1115,7 +1115,7 @@ protected:
 class Create_func_inet_ntoa : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_inet_ntoa s_singleton;
 
@@ -1128,7 +1128,7 @@ protected:
 class Create_func_inet_aton : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_inet_aton s_singleton;
 
@@ -1141,7 +1141,7 @@ protected:
 class Create_func_instr : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_instr s_singleton;
 
@@ -1155,7 +1155,7 @@ protected:
 class Create_func_interiorringn : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_interiorringn s_singleton;
 
@@ -1170,7 +1170,7 @@ protected:
 class Create_func_intersects : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_intersects s_singleton;
 
@@ -1184,7 +1184,7 @@ protected:
 class Create_func_is_free_lock : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_is_free_lock s_singleton;
 
@@ -1197,7 +1197,7 @@ protected:
 class Create_func_is_used_lock : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_is_used_lock s_singleton;
 
@@ -1211,7 +1211,7 @@ protected:
 class Create_func_isclosed : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_isclosed s_singleton;
 
@@ -1226,7 +1226,7 @@ protected:
 class Create_func_isempty : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_isempty s_singleton;
 
@@ -1240,7 +1240,7 @@ protected:
 class Create_func_isnull : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_isnull s_singleton;
 
@@ -1254,7 +1254,7 @@ protected:
 class Create_func_issimple : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_issimple s_singleton;
 
@@ -1268,7 +1268,7 @@ protected:
 class Create_func_last_day : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_last_day s_singleton;
 
@@ -1294,7 +1294,7 @@ protected:
 class Create_func_lcase : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_lcase s_singleton;
 
@@ -1320,7 +1320,7 @@ protected:
 class Create_func_length : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_length s_singleton;
 
@@ -1333,7 +1333,7 @@ protected:
 class Create_func_ln : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_ln s_singleton;
 
@@ -1346,7 +1346,7 @@ protected:
 class Create_func_load_file : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_load_file s_singleton;
 
@@ -1385,7 +1385,7 @@ protected:
 class Create_func_log10 : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_log10 s_singleton;
 
@@ -1398,7 +1398,7 @@ protected:
 class Create_func_log2 : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_log2 s_singleton;
 
@@ -1411,7 +1411,7 @@ protected:
 class Create_func_lpad : public Create_func_arg3
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2, Item *arg3);
+  virtual Item *create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3);
 
   static Create_func_lpad s_singleton;
 
@@ -1424,7 +1424,7 @@ protected:
 class Create_func_ltrim : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_ltrim s_singleton;
 
@@ -1437,7 +1437,7 @@ protected:
 class Create_func_makedate : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_makedate s_singleton;
 
@@ -1450,7 +1450,7 @@ protected:
 class Create_func_maketime : public Create_func_arg3
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2, Item *arg3);
+  virtual Item *create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3);
 
   static Create_func_maketime s_singleton;
 
@@ -1489,7 +1489,7 @@ protected:
 class Create_func_md5 : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_md5 s_singleton;
 
@@ -1502,7 +1502,7 @@ protected:
 class Create_func_monthname : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_monthname s_singleton;
 
@@ -1515,7 +1515,7 @@ protected:
 class Create_func_name_const : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_name_const s_singleton;
 
@@ -1528,7 +1528,7 @@ protected:
 class Create_func_nullif : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_nullif s_singleton;
 
@@ -1542,7 +1542,7 @@ protected:
 class Create_func_numgeometries : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_numgeometries s_singleton;
 
@@ -1557,7 +1557,7 @@ protected:
 class Create_func_numinteriorring : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_numinteriorring s_singleton;
 
@@ -1572,7 +1572,7 @@ protected:
 class Create_func_numpoints : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_numpoints s_singleton;
 
@@ -1586,7 +1586,7 @@ protected:
 class Create_func_oct : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_oct s_singleton;
 
@@ -1599,7 +1599,7 @@ protected:
 class Create_func_ord : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_ord s_singleton;
 
@@ -1613,7 +1613,7 @@ protected:
 class Create_func_overlaps : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_overlaps s_singleton;
 
@@ -1627,7 +1627,7 @@ protected:
 class Create_func_period_add : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_period_add s_singleton;
 
@@ -1640,7 +1640,7 @@ protected:
 class Create_func_period_diff : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_period_diff s_singleton;
 
@@ -1653,7 +1653,7 @@ protected:
 class Create_func_pi : public Create_func_arg0
 {
 public:
-  virtual Item *create(THD *thd);
+  virtual Item *create_builder(THD *thd);
 
   static Create_func_pi s_singleton;
 
@@ -1667,7 +1667,7 @@ protected:
 class Create_func_pointn : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_pointn s_singleton;
 
@@ -1681,7 +1681,7 @@ protected:
 class Create_func_pow : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_pow s_singleton;
 
@@ -1694,7 +1694,7 @@ protected:
 class Create_func_quote : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_quote s_singleton;
 
@@ -1707,7 +1707,7 @@ protected:
 class Create_func_radians : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_radians s_singleton;
 
@@ -1733,7 +1733,7 @@ protected:
 class Create_func_release_lock : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_release_lock s_singleton;
 
@@ -1746,7 +1746,7 @@ protected:
 class Create_func_reverse : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_reverse s_singleton;
 
@@ -1772,7 +1772,7 @@ protected:
 class Create_func_row_count : public Create_func_arg0
 {
 public:
-  virtual Item *create(THD *thd);
+  virtual Item *create_builder(THD *thd);
 
   static Create_func_row_count s_singleton;
 
@@ -1785,7 +1785,7 @@ protected:
 class Create_func_rpad : public Create_func_arg3
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2, Item *arg3);
+  virtual Item *create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3);
 
   static Create_func_rpad s_singleton;
 
@@ -1798,7 +1798,7 @@ protected:
 class Create_func_rtrim : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_rtrim s_singleton;
 
@@ -1811,7 +1811,7 @@ protected:
 class Create_func_sec_to_time : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_sec_to_time s_singleton;
 
@@ -1824,7 +1824,7 @@ protected:
 class Create_func_sha : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_sha s_singleton;
 
@@ -1850,7 +1850,7 @@ protected:
 class Create_func_sign : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_sign s_singleton;
 
@@ -1863,7 +1863,7 @@ protected:
 class Create_func_sin : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_sin s_singleton;
 
@@ -1876,7 +1876,7 @@ protected:
 class Create_func_sleep : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_sleep s_singleton;
 
@@ -1889,7 +1889,7 @@ protected:
 class Create_func_soundex : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_soundex s_singleton;
 
@@ -1902,7 +1902,7 @@ protected:
 class Create_func_space : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_space s_singleton;
 
@@ -1915,7 +1915,7 @@ protected:
 class Create_func_sqrt : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_sqrt s_singleton;
 
@@ -1929,7 +1929,7 @@ protected:
 class Create_func_srid : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_srid s_singleton;
 
@@ -1944,7 +1944,7 @@ protected:
 class Create_func_startpoint : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_startpoint s_singleton;
 
@@ -1958,7 +1958,7 @@ protected:
 class Create_func_str_to_date : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_str_to_date s_singleton;
 
@@ -1971,7 +1971,7 @@ protected:
 class Create_func_strcmp : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_strcmp s_singleton;
 
@@ -1984,7 +1984,7 @@ protected:
 class Create_func_substr_index : public Create_func_arg3
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2, Item *arg3);
+  virtual Item *create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3);
 
   static Create_func_substr_index s_singleton;
 
@@ -1997,7 +1997,7 @@ protected:
 class Create_func_subtime : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_subtime s_singleton;
 
@@ -2010,7 +2010,7 @@ protected:
 class Create_func_tan : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_tan s_singleton;
 
@@ -2023,7 +2023,7 @@ protected:
 class Create_func_time_format : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_time_format s_singleton;
 
@@ -2036,7 +2036,7 @@ protected:
 class Create_func_time_to_sec : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_time_to_sec s_singleton;
 
@@ -2049,7 +2049,7 @@ protected:
 class Create_func_timediff : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_timediff s_singleton;
 
@@ -2062,7 +2062,7 @@ protected:
 class Create_func_to_days : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_to_days s_singleton;
 
@@ -2088,7 +2088,7 @@ protected:
 class Create_func_touches : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_touches s_singleton;
 
@@ -2102,7 +2102,7 @@ protected:
 class Create_func_ucase : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_ucase s_singleton;
 
@@ -2115,7 +2115,7 @@ protected:
 class Create_func_uncompress : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_uncompress s_singleton;
 
@@ -2128,7 +2128,7 @@ protected:
 class Create_func_uncompressed_length : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_uncompressed_length s_singleton;
 
@@ -2141,7 +2141,7 @@ protected:
 class Create_func_unhex : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_unhex s_singleton;
 
@@ -2167,7 +2167,7 @@ protected:
 class Create_func_uuid : public Create_func_arg0
 {
 public:
-  virtual Item *create(THD *thd);
+  virtual Item *create_builder(THD *thd);
 
   static Create_func_uuid s_singleton;
 
@@ -2180,7 +2180,7 @@ protected:
 class Create_func_uuid_short : public Create_func_arg0
 {
 public:
-  virtual Item *create(THD *thd);
+  virtual Item *create_builder(THD *thd);
 
   static Create_func_uuid_short s_singleton;
 
@@ -2193,7 +2193,7 @@ protected:
 class Create_func_version : public Create_func_arg0
 {
 public:
-  virtual Item *create(THD *thd);
+  virtual Item *create_builder(THD *thd);
 
   static Create_func_version s_singleton;
 
@@ -2206,7 +2206,7 @@ protected:
 class Create_func_weekday : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_weekday s_singleton;
 
@@ -2219,7 +2219,7 @@ protected:
 class Create_func_weekofyear : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_weekofyear s_singleton;
 
@@ -2233,7 +2233,7 @@ protected:
 class Create_func_within : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_within s_singleton;
 
@@ -2248,7 +2248,7 @@ protected:
 class Create_func_x : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_x s_singleton;
 
@@ -2262,7 +2262,7 @@ protected:
 class Create_func_xml_extractvalue : public Create_func_arg2
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+  virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2);
 
   static Create_func_xml_extractvalue s_singleton;
 
@@ -2275,7 +2275,7 @@ protected:
 class Create_func_xml_update : public Create_func_arg3
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1, Item *arg2, Item *arg3);
+  virtual Item *create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3);
 
   static Create_func_xml_update s_singleton;
 
@@ -2289,7 +2289,7 @@ protected:
 class Create_func_y : public Create_func_arg1
 {
 public:
-  virtual Item *create(THD *thd, Item *arg1);
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
 
   static Create_func_y s_singleton;
 
@@ -2384,7 +2384,7 @@ Create_qfunc::create_func(THD *thd, LEX_STRING name, List<Item> *item_list)
   if (thd->lex->copy_db_to(&db.str, &db.length))
     return NULL;
 
-  return create(thd, db, name, false, item_list);
+  return create_with_db(thd, db, name, false, item_list);
 }
 
 
@@ -2502,8 +2502,8 @@ Create_udf_func::create(THD *thd, udf_func *udf, List<Item> *item_list)
 Create_sp_func Create_sp_func::s_singleton;
 
 Item*
-Create_sp_func::create(THD *thd, LEX_STRING db, LEX_STRING name,
-                       bool use_explicit_name, List<Item> *item_list)
+Create_sp_func::create_with_db(THD *thd, LEX_STRING db, LEX_STRING name,
+                               bool use_explicit_name, List<Item> *item_list)
 {
   int arg_count= 0;
   Item *func= NULL;
@@ -2570,7 +2570,7 @@ Create_func_arg0::create_func(THD *thd, LEX_STRING name, List<Item> *item_list)
     return NULL;
   }
 
-  return create(thd);
+  return create_builder(thd);
 }
 
 
@@ -2596,7 +2596,7 @@ Create_func_arg1::create_func(THD *thd, LEX_STRING name, List<Item> *item_list)
     return NULL;
   }
 
-  return create(thd, param_1);
+  return create_1_arg(thd, param_1);
 }
 
 
@@ -2624,7 +2624,7 @@ Create_func_arg2::create_func(THD *thd, LEX_STRING name, List<Item> *item_list)
     return NULL;
   }
 
-  return create(thd, param_1, param_2);
+  return create_2_arg(thd, param_1, param_2);
 }
 
 
@@ -2654,14 +2654,14 @@ Create_func_arg3::create_func(THD *thd, LEX_STRING name, List<Item> *item_list)
     return NULL;
   }
 
-  return create(thd, param_1, param_2, param_3);
+  return create_3_arg(thd, param_1, param_2, param_3);
 }
 
 
 Create_func_abs Create_func_abs::s_singleton;
 
 Item*
-Create_func_abs::create(THD *thd, Item *arg1)
+Create_func_abs::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_abs(arg1);
 }
@@ -2670,7 +2670,7 @@ Create_func_abs::create(THD *thd, Item *arg1)
 Create_func_acos Create_func_acos::s_singleton;
 
 Item*
-Create_func_acos::create(THD *thd, Item *arg1)
+Create_func_acos::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_acos(arg1);
 }
@@ -2679,7 +2679,7 @@ Create_func_acos::create(THD *thd, Item *arg1)
 Create_func_addtime Create_func_addtime::s_singleton;
 
 Item*
-Create_func_addtime::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_addtime::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_add_time(arg1, arg2, 0, 0);
 }
@@ -2688,7 +2688,7 @@ Create_func_addtime::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_aes_encrypt Create_func_aes_encrypt::s_singleton;
 
 Item*
-Create_func_aes_encrypt::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_aes_encrypt::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_aes_encrypt(arg1, arg2);
 }
@@ -2697,7 +2697,7 @@ Create_func_aes_encrypt::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_aes_decrypt Create_func_aes_decrypt::s_singleton;
 
 Item*
-Create_func_aes_decrypt::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_aes_decrypt::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_aes_decrypt(arg1, arg2);
 }
@@ -2707,7 +2707,7 @@ Create_func_aes_decrypt::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_area Create_func_area::s_singleton;
 
 Item*
-Create_func_area::create(THD *thd, Item *arg1)
+Create_func_area::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_area(arg1);
 }
@@ -2718,7 +2718,7 @@ Create_func_area::create(THD *thd, Item *arg1)
 Create_func_as_wkb Create_func_as_wkb::s_singleton;
 
 Item*
-Create_func_as_wkb::create(THD *thd, Item *arg1)
+Create_func_as_wkb::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_as_wkb(arg1);
 }
@@ -2729,7 +2729,7 @@ Create_func_as_wkb::create(THD *thd, Item *arg1)
 Create_func_as_wkt Create_func_as_wkt::s_singleton;
 
 Item*
-Create_func_as_wkt::create(THD *thd, Item *arg1)
+Create_func_as_wkt::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_as_wkt(arg1);
 }
@@ -2739,7 +2739,7 @@ Create_func_as_wkt::create(THD *thd, Item *arg1)
 Create_func_asin Create_func_asin::s_singleton;
 
 Item*
-Create_func_asin::create(THD *thd, Item *arg1)
+Create_func_asin::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_asin(arg1);
 }
@@ -2785,7 +2785,7 @@ Create_func_atan::create_native(THD *thd, LEX_STRING name,
 Create_func_benchmark Create_func_benchmark::s_singleton;
 
 Item*
-Create_func_benchmark::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_benchmark::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT);
   return new (thd->mem_root) Item_func_benchmark(arg1, arg2);
@@ -2795,7 +2795,7 @@ Create_func_benchmark::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_bin Create_func_bin::s_singleton;
 
 Item*
-Create_func_bin::create(THD *thd, Item *arg1)
+Create_func_bin::create_1_arg(THD *thd, Item *arg1)
 {
   Item *i10= new (thd->mem_root) Item_int((int32) 10,2);
   Item *i2= new (thd->mem_root) Item_int((int32) 2,1);
@@ -2806,7 +2806,7 @@ Create_func_bin::create(THD *thd, Item *arg1)
 Create_func_bit_count Create_func_bit_count::s_singleton;
 
 Item*
-Create_func_bit_count::create(THD *thd, Item *arg1)
+Create_func_bit_count::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_bit_count(arg1);
 }
@@ -2815,7 +2815,7 @@ Create_func_bit_count::create(THD *thd, Item *arg1)
 Create_func_bit_length Create_func_bit_length::s_singleton;
 
 Item*
-Create_func_bit_length::create(THD *thd, Item *arg1)
+Create_func_bit_length::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_bit_length(arg1);
 }
@@ -2824,7 +2824,7 @@ Create_func_bit_length::create(THD *thd, Item *arg1)
 Create_func_ceiling Create_func_ceiling::s_singleton;
 
 Item*
-Create_func_ceiling::create(THD *thd, Item *arg1)
+Create_func_ceiling::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_ceiling(arg1);
 }
@@ -2834,7 +2834,7 @@ Create_func_ceiling::create(THD *thd, Item *arg1)
 Create_func_centroid Create_func_centroid::s_singleton;
 
 Item*
-Create_func_centroid::create(THD *thd, Item *arg1)
+Create_func_centroid::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_centroid(arg1);
 }
@@ -2844,7 +2844,7 @@ Create_func_centroid::create(THD *thd, Item *arg1)
 Create_func_char_length Create_func_char_length::s_singleton;
 
 Item*
-Create_func_char_length::create(THD *thd, Item *arg1)
+Create_func_char_length::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_char_length(arg1);
 }
@@ -2853,7 +2853,7 @@ Create_func_char_length::create(THD *thd, Item *arg1)
 Create_func_coercibility Create_func_coercibility::s_singleton;
 
 Item*
-Create_func_coercibility::create(THD *thd, Item *arg1)
+Create_func_coercibility::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_coercibility(arg1);
 }
@@ -2905,7 +2905,7 @@ Create_func_concat_ws::create_native(THD *thd, LEX_STRING name,
 Create_func_compress Create_func_compress::s_singleton;
 
 Item*
-Create_func_compress::create(THD *thd, Item *arg1)
+Create_func_compress::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_compress(arg1);
 }
@@ -2914,7 +2914,7 @@ Create_func_compress::create(THD *thd, Item *arg1)
 Create_func_connection_id Create_func_connection_id::s_singleton;
 
 Item*
-Create_func_connection_id::create(THD *thd)
+Create_func_connection_id::create_builder(THD *thd)
 {
   thd->lex->safe_to_cache_query= 0;
   return new (thd->mem_root) Item_func_connection_id();
@@ -2925,7 +2925,7 @@ Create_func_connection_id::create(THD *thd)
 Create_func_contains Create_func_contains::s_singleton;
 
 Item*
-Create_func_contains::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_contains::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_spatial_rel(arg1, arg2,
                                                    Item_func::SP_CONTAINS_FUNC);
@@ -2936,7 +2936,7 @@ Create_func_contains::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_conv Create_func_conv::s_singleton;
 
 Item*
-Create_func_conv::create(THD *thd, Item *arg1, Item *arg2, Item *arg3)
+Create_func_conv::create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3)
 {
   return new (thd->mem_root) Item_func_conv(arg1, arg2, arg3);
 }
@@ -2945,7 +2945,7 @@ Create_func_conv::create(THD *thd, Item *arg1, Item *arg2, Item *arg3)
 Create_func_convert_tz Create_func_convert_tz::s_singleton;
 
 Item*
-Create_func_convert_tz::create(THD *thd, Item *arg1, Item *arg2, Item *arg3)
+Create_func_convert_tz::create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3)
 {
   return new (thd->mem_root) Item_func_convert_tz(arg1, arg2, arg3);
 }
@@ -2954,7 +2954,7 @@ Create_func_convert_tz::create(THD *thd, Item *arg1, Item *arg2, Item *arg3)
 Create_func_cos Create_func_cos::s_singleton;
 
 Item*
-Create_func_cos::create(THD *thd, Item *arg1)
+Create_func_cos::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_cos(arg1);
 }
@@ -2963,7 +2963,7 @@ Create_func_cos::create(THD *thd, Item *arg1)
 Create_func_cot Create_func_cot::s_singleton;
 
 Item*
-Create_func_cot::create(THD *thd, Item *arg1)
+Create_func_cot::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_cot(arg1);
 }
@@ -2972,7 +2972,7 @@ Create_func_cot::create(THD *thd, Item *arg1)
 Create_func_crc32 Create_func_crc32::s_singleton;
 
 Item*
-Create_func_crc32::create(THD *thd, Item *arg1)
+Create_func_crc32::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_crc32(arg1);
 }
@@ -2982,7 +2982,7 @@ Create_func_crc32::create(THD *thd, Item *arg1)
 Create_func_crosses Create_func_crosses::s_singleton;
 
 Item*
-Create_func_crosses::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_crosses::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_spatial_rel(arg1, arg2,
                                                    Item_func::SP_CROSSES_FUNC);
@@ -2993,7 +2993,7 @@ Create_func_crosses::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_date_format Create_func_date_format::s_singleton;
 
 Item*
-Create_func_date_format::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_date_format::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_date_format(arg1, arg2, 0);
 }
@@ -3002,7 +3002,7 @@ Create_func_date_format::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_datediff Create_func_datediff::s_singleton;
 
 Item*
-Create_func_datediff::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_datediff::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   Item *i1= new (thd->mem_root) Item_func_to_days(arg1);
   Item *i2= new (thd->mem_root) Item_func_to_days(arg2);
@@ -3014,7 +3014,7 @@ Create_func_datediff::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_dayname Create_func_dayname::s_singleton;
 
 Item*
-Create_func_dayname::create(THD *thd, Item *arg1)
+Create_func_dayname::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_dayname(arg1);
 }
@@ -3023,7 +3023,7 @@ Create_func_dayname::create(THD *thd, Item *arg1)
 Create_func_dayofmonth Create_func_dayofmonth::s_singleton;
 
 Item*
-Create_func_dayofmonth::create(THD *thd, Item *arg1)
+Create_func_dayofmonth::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_dayofmonth(arg1);
 }
@@ -3032,7 +3032,7 @@ Create_func_dayofmonth::create(THD *thd, Item *arg1)
 Create_func_dayofweek Create_func_dayofweek::s_singleton;
 
 Item*
-Create_func_dayofweek::create(THD *thd, Item *arg1)
+Create_func_dayofweek::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_weekday(arg1, 1);
 }
@@ -3041,7 +3041,7 @@ Create_func_dayofweek::create(THD *thd, Item *arg1)
 Create_func_dayofyear Create_func_dayofyear::s_singleton;
 
 Item*
-Create_func_dayofyear::create(THD *thd, Item *arg1)
+Create_func_dayofyear::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_dayofyear(arg1);
 }
@@ -3050,7 +3050,7 @@ Create_func_dayofyear::create(THD *thd, Item *arg1)
 Create_func_decode Create_func_decode::s_singleton;
 
 Item*
-Create_func_decode::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_decode::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_decode(arg1, arg2);
 }
@@ -3059,7 +3059,7 @@ Create_func_decode::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_degrees Create_func_degrees::s_singleton;
 
 Item*
-Create_func_degrees::create(THD *thd, Item *arg1)
+Create_func_degrees::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_units((char*) "degrees", arg1,
                                              180/M_PI, 0.0);
@@ -3144,7 +3144,7 @@ Create_func_des_encrypt::create_native(THD *thd, LEX_STRING name,
 Create_func_dimension Create_func_dimension::s_singleton;
 
 Item*
-Create_func_dimension::create(THD *thd, Item *arg1)
+Create_func_dimension::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_dimension(arg1);
 }
@@ -3155,7 +3155,7 @@ Create_func_dimension::create(THD *thd, Item *arg1)
 Create_func_disjoint Create_func_disjoint::s_singleton;
 
 Item*
-Create_func_disjoint::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_disjoint::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_spatial_rel(arg1, arg2,
                                                    Item_func::SP_DISJOINT_FUNC);
@@ -3187,7 +3187,7 @@ Create_func_elt::create_native(THD *thd, LEX_STRING name,
 Create_func_encode Create_func_encode::s_singleton;
 
 Item*
-Create_func_encode::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_encode::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_encode(arg1, arg2);
 }
@@ -3235,7 +3235,7 @@ Create_func_encrypt::create_native(THD *thd, LEX_STRING name,
 Create_func_endpoint Create_func_endpoint::s_singleton;
 
 Item*
-Create_func_endpoint::create(THD *thd, Item *arg1)
+Create_func_endpoint::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_spatial_decomp(arg1,
                                                       Item_func::SP_ENDPOINT);
@@ -3247,7 +3247,7 @@ Create_func_endpoint::create(THD *thd, Item *arg1)
 Create_func_envelope Create_func_envelope::s_singleton;
 
 Item*
-Create_func_envelope::create(THD *thd, Item *arg1)
+Create_func_envelope::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_envelope(arg1);
 }
@@ -3258,7 +3258,7 @@ Create_func_envelope::create(THD *thd, Item *arg1)
 Create_func_equals Create_func_equals::s_singleton;
 
 Item*
-Create_func_equals::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_equals::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_spatial_rel(arg1, arg2,
                                                    Item_func::SP_EQUALS_FUNC);
@@ -3269,7 +3269,7 @@ Create_func_equals::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_exp Create_func_exp::s_singleton;
 
 Item*
-Create_func_exp::create(THD *thd, Item *arg1)
+Create_func_exp::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_exp(arg1);
 }
@@ -3332,7 +3332,7 @@ Create_func_export_set::create_native(THD *thd, LEX_STRING name,
 Create_func_exteriorring Create_func_exteriorring::s_singleton;
 
 Item*
-Create_func_exteriorring::create(THD *thd, Item *arg1)
+Create_func_exteriorring::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_spatial_decomp(arg1,
                                                       Item_func::SP_EXTERIORRING);
@@ -3364,7 +3364,7 @@ Create_func_field::create_native(THD *thd, LEX_STRING name,
 Create_func_find_in_set Create_func_find_in_set::s_singleton;
 
 Item*
-Create_func_find_in_set::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_find_in_set::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_find_in_set(arg1, arg2);
 }
@@ -3373,7 +3373,7 @@ Create_func_find_in_set::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_floor Create_func_floor::s_singleton;
 
 Item*
-Create_func_floor::create(THD *thd, Item *arg1)
+Create_func_floor::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_floor(arg1);
 }
@@ -3416,7 +3416,7 @@ Create_func_format::create_native(THD *thd, LEX_STRING name,
 Create_func_found_rows Create_func_found_rows::s_singleton;
 
 Item*
-Create_func_found_rows::create(THD *thd)
+Create_func_found_rows::create_builder(THD *thd)
 {
   DBUG_ENTER("Create_func_found_rows::create");
   thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION);
@@ -3428,7 +3428,7 @@ Create_func_found_rows::create(THD *thd)
 Create_func_from_days Create_func_from_days::s_singleton;
 
 Item*
-Create_func_from_days::create(THD *thd, Item *arg1)
+Create_func_from_days::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_from_days(arg1);
 }
@@ -3556,7 +3556,7 @@ Create_func_geometry_from_wkb::create_native(THD *thd, LEX_STRING name,
 Create_func_geometry_type Create_func_geometry_type::s_singleton;
 
 Item*
-Create_func_geometry_type::create(THD *thd, Item *arg1)
+Create_func_geometry_type::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_geometry_type(arg1);
 }
@@ -3567,7 +3567,7 @@ Create_func_geometry_type::create(THD *thd, Item *arg1)
 Create_func_geometryn Create_func_geometryn::s_singleton;
 
 Item*
-Create_func_geometryn::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_geometryn::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_spatial_decomp_n(arg1, arg2,
                                                         Item_func::SP_GEOMETRYN);
@@ -3578,7 +3578,7 @@ Create_func_geometryn::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_get_lock Create_func_get_lock::s_singleton;
 
 Item*
-Create_func_get_lock::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_get_lock::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION);
   thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT);
@@ -3590,7 +3590,7 @@ Create_func_get_lock::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_glength Create_func_glength::s_singleton;
 
 Item*
-Create_func_glength::create(THD *thd, Item *arg1)
+Create_func_glength::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_glength(arg1);
 }
@@ -3621,7 +3621,7 @@ Create_func_greatest::create_native(THD *thd, LEX_STRING name,
 Create_func_hex Create_func_hex::s_singleton;
 
 Item*
-Create_func_hex::create(THD *thd, Item *arg1)
+Create_func_hex::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_hex(arg1);
 }
@@ -3630,7 +3630,7 @@ Create_func_hex::create(THD *thd, Item *arg1)
 Create_func_ifnull Create_func_ifnull::s_singleton;
 
 Item*
-Create_func_ifnull::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_ifnull::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_ifnull(arg1, arg2);
 }
@@ -3639,7 +3639,7 @@ Create_func_ifnull::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_inet_ntoa Create_func_inet_ntoa::s_singleton;
 
 Item*
-Create_func_inet_ntoa::create(THD *thd, Item *arg1)
+Create_func_inet_ntoa::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_inet_ntoa(arg1);
 }
@@ -3648,7 +3648,7 @@ Create_func_inet_ntoa::create(THD *thd, Item *arg1)
 Create_func_inet_aton Create_func_inet_aton::s_singleton;
 
 Item*
-Create_func_inet_aton::create(THD *thd, Item *arg1)
+Create_func_inet_aton::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_inet_aton(arg1);
 }
@@ -3657,7 +3657,7 @@ Create_func_inet_aton::create(THD *thd, Item *arg1)
 Create_func_instr Create_func_instr::s_singleton;
 
 Item*
-Create_func_instr::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_instr::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_locate(arg1, arg2);
 }
@@ -3667,7 +3667,7 @@ Create_func_instr::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_interiorringn Create_func_interiorringn::s_singleton;
 
 Item*
-Create_func_interiorringn::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_interiorringn::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_spatial_decomp_n(arg1, arg2,
                                                         Item_func::SP_INTERIORRINGN);
@@ -3679,7 +3679,7 @@ Create_func_interiorringn::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_intersects Create_func_intersects::s_singleton;
 
 Item*
-Create_func_intersects::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_intersects::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_spatial_rel(arg1, arg2,
                                                    Item_func::SP_INTERSECTS_FUNC);
@@ -3690,7 +3690,7 @@ Create_func_intersects::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_is_free_lock Create_func_is_free_lock::s_singleton;
 
 Item*
-Create_func_is_free_lock::create(THD *thd, Item *arg1)
+Create_func_is_free_lock::create_1_arg(THD *thd, Item *arg1)
 {
   thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION);
   thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT);
@@ -3701,7 +3701,7 @@ Create_func_is_free_lock::create(THD *thd, Item *arg1)
 Create_func_is_used_lock Create_func_is_used_lock::s_singleton;
 
 Item*
-Create_func_is_used_lock::create(THD *thd, Item *arg1)
+Create_func_is_used_lock::create_1_arg(THD *thd, Item *arg1)
 {
   thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION);
   thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT);
@@ -3713,7 +3713,7 @@ Create_func_is_used_lock::create(THD *thd, Item *arg1)
 Create_func_isclosed Create_func_isclosed::s_singleton;
 
 Item*
-Create_func_isclosed::create(THD *thd, Item *arg1)
+Create_func_isclosed::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_isclosed(arg1);
 }
@@ -3724,7 +3724,7 @@ Create_func_isclosed::create(THD *thd, Item *arg1)
 Create_func_isempty Create_func_isempty::s_singleton;
 
 Item*
-Create_func_isempty::create(THD *thd, Item *arg1)
+Create_func_isempty::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_isempty(arg1);
 }
@@ -3734,7 +3734,7 @@ Create_func_isempty::create(THD *thd, Item *arg1)
 Create_func_isnull Create_func_isnull::s_singleton;
 
 Item*
-Create_func_isnull::create(THD *thd, Item *arg1)
+Create_func_isnull::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_isnull(arg1);
 }
@@ -3744,7 +3744,7 @@ Create_func_isnull::create(THD *thd, Item *arg1)
 Create_func_issimple Create_func_issimple::s_singleton;
 
 Item*
-Create_func_issimple::create(THD *thd, Item *arg1)
+Create_func_issimple::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_issimple(arg1);
 }
@@ -3754,7 +3754,7 @@ Create_func_issimple::create(THD *thd, Item *arg1)
 Create_func_last_day Create_func_last_day::s_singleton;
 
 Item*
-Create_func_last_day::create(THD *thd, Item *arg1)
+Create_func_last_day::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_last_day(arg1);
 }
@@ -3800,7 +3800,7 @@ Create_func_last_insert_id::create_native(THD *thd, LEX_STRING name,
 Create_func_lcase Create_func_lcase::s_singleton;
 
 Item*
-Create_func_lcase::create(THD *thd, Item *arg1)
+Create_func_lcase::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_lcase(arg1);
 }
@@ -3830,7 +3830,7 @@ Create_func_least::create_native(THD *thd, LEX_STRING name,
 Create_func_length Create_func_length::s_singleton;
 
 Item*
-Create_func_length::create(THD *thd, Item *arg1)
+Create_func_length::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_length(arg1);
 }
@@ -3839,7 +3839,7 @@ Create_func_length::create(THD *thd, Item *arg1)
 Create_func_ln Create_func_ln::s_singleton;
 
 Item*
-Create_func_ln::create(THD *thd, Item *arg1)
+Create_func_ln::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_ln(arg1);
 }
@@ -3848,7 +3848,7 @@ Create_func_ln::create(THD *thd, Item *arg1)
 Create_func_load_file Create_func_load_file::s_singleton;
 
 Item*
-Create_func_load_file::create(THD *thd, Item *arg1)
+Create_func_load_file::create_1_arg(THD *thd, Item *arg1)
 {
   DBUG_ENTER("Create_func_load_file::create");
   thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION);
@@ -3938,7 +3938,7 @@ Create_func_log::create_native(THD *thd, LEX_STRING name,
 Create_func_log10 Create_func_log10::s_singleton;
 
 Item*
-Create_func_log10::create(THD *thd, Item *arg1)
+Create_func_log10::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_log10(arg1);
 }
@@ -3947,7 +3947,7 @@ Create_func_log10::create(THD *thd, Item *arg1)
 Create_func_log2 Create_func_log2::s_singleton;
 
 Item*
-Create_func_log2::create(THD *thd, Item *arg1)
+Create_func_log2::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_log2(arg1);
 }
@@ -3956,7 +3956,7 @@ Create_func_log2::create(THD *thd, Item *arg1)
 Create_func_lpad Create_func_lpad::s_singleton;
 
 Item*
-Create_func_lpad::create(THD *thd, Item *arg1, Item *arg2, Item *arg3)
+Create_func_lpad::create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3)
 {
   return new (thd->mem_root) Item_func_lpad(arg1, arg2, arg3);
 }
@@ -3965,7 +3965,7 @@ Create_func_lpad::create(THD *thd, Item *arg1, Item *arg2, Item *arg3)
 Create_func_ltrim Create_func_ltrim::s_singleton;
 
 Item*
-Create_func_ltrim::create(THD *thd, Item *arg1)
+Create_func_ltrim::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_ltrim(arg1);
 }
@@ -3974,7 +3974,7 @@ Create_func_ltrim::create(THD *thd, Item *arg1)
 Create_func_makedate Create_func_makedate::s_singleton;
 
 Item*
-Create_func_makedate::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_makedate::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_makedate(arg1, arg2);
 }
@@ -3983,7 +3983,7 @@ Create_func_makedate::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_maketime Create_func_maketime::s_singleton;
 
 Item*
-Create_func_maketime::create(THD *thd, Item *arg1, Item *arg2, Item *arg3)
+Create_func_maketime::create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3)
 {
   return new (thd->mem_root) Item_func_maketime(arg1, arg2, arg3);
 }
@@ -4058,7 +4058,7 @@ Create_func_master_pos_wait::create_native(THD *thd, LEX_STRING name,
 Create_func_md5 Create_func_md5::s_singleton;
 
 Item*
-Create_func_md5::create(THD *thd, Item *arg1)
+Create_func_md5::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_md5(arg1);
 }
@@ -4067,7 +4067,7 @@ Create_func_md5::create(THD *thd, Item *arg1)
 Create_func_monthname Create_func_monthname::s_singleton;
 
 Item*
-Create_func_monthname::create(THD *thd, Item *arg1)
+Create_func_monthname::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_monthname(arg1);
 }
@@ -4076,7 +4076,7 @@ Create_func_monthname::create(THD *thd, Item *arg1)
 Create_func_name_const Create_func_name_const::s_singleton;
 
 Item*
-Create_func_name_const::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_name_const::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_name_const(arg1, arg2);
 }
@@ -4085,7 +4085,7 @@ Create_func_name_const::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_nullif Create_func_nullif::s_singleton;
 
 Item*
-Create_func_nullif::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_nullif::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_nullif(arg1, arg2);
 }
@@ -4095,7 +4095,7 @@ Create_func_nullif::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_numgeometries Create_func_numgeometries::s_singleton;
 
 Item*
-Create_func_numgeometries::create(THD *thd, Item *arg1)
+Create_func_numgeometries::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_numgeometries(arg1);
 }
@@ -4106,7 +4106,7 @@ Create_func_numgeometries::create(THD *thd, Item *arg1)
 Create_func_numinteriorring Create_func_numinteriorring::s_singleton;
 
 Item*
-Create_func_numinteriorring::create(THD *thd, Item *arg1)
+Create_func_numinteriorring::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_numinteriorring(arg1);
 }
@@ -4117,7 +4117,7 @@ Create_func_numinteriorring::create(THD *thd, Item *arg1)
 Create_func_numpoints Create_func_numpoints::s_singleton;
 
 Item*
-Create_func_numpoints::create(THD *thd, Item *arg1)
+Create_func_numpoints::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_numpoints(arg1);
 }
@@ -4127,7 +4127,7 @@ Create_func_numpoints::create(THD *thd, Item *arg1)
 Create_func_oct Create_func_oct::s_singleton;
 
 Item*
-Create_func_oct::create(THD *thd, Item *arg1)
+Create_func_oct::create_1_arg(THD *thd, Item *arg1)
 {
   Item *i10= new (thd->mem_root) Item_int((int32) 10,2);
   Item *i8= new (thd->mem_root) Item_int((int32) 8,1);
@@ -4138,7 +4138,7 @@ Create_func_oct::create(THD *thd, Item *arg1)
 Create_func_ord Create_func_ord::s_singleton;
 
 Item*
-Create_func_ord::create(THD *thd, Item *arg1)
+Create_func_ord::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_ord(arg1);
 }
@@ -4148,7 +4148,7 @@ Create_func_ord::create(THD *thd, Item *arg1)
 Create_func_overlaps Create_func_overlaps::s_singleton;
 
 Item*
-Create_func_overlaps::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_overlaps::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_spatial_rel(arg1, arg2,
                                                    Item_func::SP_OVERLAPS_FUNC);
@@ -4159,7 +4159,7 @@ Create_func_overlaps::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_period_add Create_func_period_add::s_singleton;
 
 Item*
-Create_func_period_add::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_period_add::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_period_add(arg1, arg2);
 }
@@ -4168,7 +4168,7 @@ Create_func_period_add::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_period_diff Create_func_period_diff::s_singleton;
 
 Item*
-Create_func_period_diff::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_period_diff::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_period_diff(arg1, arg2);
 }
@@ -4177,7 +4177,7 @@ Create_func_period_diff::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_pi Create_func_pi::s_singleton;
 
 Item*
-Create_func_pi::create(THD *thd)
+Create_func_pi::create_builder(THD *thd)
 {
   return new (thd->mem_root) Item_static_float_func("pi()", M_PI, 6, 8);
 }
@@ -4187,7 +4187,7 @@ Create_func_pi::create(THD *thd)
 Create_func_pointn Create_func_pointn::s_singleton;
 
 Item*
-Create_func_pointn::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_pointn::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_spatial_decomp_n(arg1, arg2,
                                                         Item_func::SP_POINTN);
@@ -4198,7 +4198,7 @@ Create_func_pointn::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_pow Create_func_pow::s_singleton;
 
 Item*
-Create_func_pow::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_pow::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_pow(arg1, arg2);
 }
@@ -4207,7 +4207,7 @@ Create_func_pow::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_quote Create_func_quote::s_singleton;
 
 Item*
-Create_func_quote::create(THD *thd, Item *arg1)
+Create_func_quote::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_quote(arg1);
 }
@@ -4216,7 +4216,7 @@ Create_func_quote::create(THD *thd, Item *arg1)
 Create_func_radians Create_func_radians::s_singleton;
 
 Item*
-Create_func_radians::create(THD *thd, Item *arg1)
+Create_func_radians::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_units((char*) "radians", arg1,
                                              M_PI/180, 0.0);
@@ -4242,8 +4242,11 @@ Create_func_rand::create_native(THD *thd, LEX_STRING name,
     into a table, the order in which the rows are modified may differ
     between master and slave, because the order is undefined.  Hence,
     the statement is unsafe to log in statement format.
+
+    For normal INSERT's this is howevever safe
   */
-  thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION);
+  if (thd->lex->sql_command != SQLCOM_INSERT)
+    thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION);
 
   switch (arg_count) {
   case 0:
@@ -4273,7 +4276,7 @@ Create_func_rand::create_native(THD *thd, LEX_STRING name,
 Create_func_release_lock Create_func_release_lock::s_singleton;
 
 Item*
-Create_func_release_lock::create(THD *thd, Item *arg1)
+Create_func_release_lock::create_1_arg(THD *thd, Item *arg1)
 {
   thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION);
   thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT);
@@ -4284,7 +4287,7 @@ Create_func_release_lock::create(THD *thd, Item *arg1)
 Create_func_reverse Create_func_reverse::s_singleton;
 
 Item*
-Create_func_reverse::create(THD *thd, Item *arg1)
+Create_func_reverse::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_reverse(arg1);
 }
@@ -4331,7 +4334,7 @@ Create_func_round::create_native(THD *thd, LEX_STRING name,
 Create_func_row_count Create_func_row_count::s_singleton;
 
 Item*
-Create_func_row_count::create(THD *thd)
+Create_func_row_count::create_builder(THD *thd)
 {
   DBUG_ENTER("Create_func_row_count::create");
   thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION);
@@ -4343,7 +4346,7 @@ Create_func_row_count::create(THD *thd)
 Create_func_rpad Create_func_rpad::s_singleton;
 
 Item*
-Create_func_rpad::create(THD *thd, Item *arg1, Item *arg2, Item *arg3)
+Create_func_rpad::create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3)
 {
   return new (thd->mem_root) Item_func_rpad(arg1, arg2, arg3);
 }
@@ -4352,7 +4355,7 @@ Create_func_rpad::create(THD *thd, Item *arg1, Item *arg2, Item *arg3)
 Create_func_rtrim Create_func_rtrim::s_singleton;
 
 Item*
-Create_func_rtrim::create(THD *thd, Item *arg1)
+Create_func_rtrim::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_rtrim(arg1);
 }
@@ -4361,7 +4364,7 @@ Create_func_rtrim::create(THD *thd, Item *arg1)
 Create_func_sec_to_time Create_func_sec_to_time::s_singleton;
 
 Item*
-Create_func_sec_to_time::create(THD *thd, Item *arg1)
+Create_func_sec_to_time::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_sec_to_time(arg1);
 }
@@ -4370,7 +4373,7 @@ Create_func_sec_to_time::create(THD *thd, Item *arg1)
 Create_func_sha Create_func_sha::s_singleton;
 
 Item*
-Create_func_sha::create(THD *thd, Item *arg1)
+Create_func_sha::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_sha(arg1);
 }
@@ -4388,7 +4391,7 @@ Create_func_sha2::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_sign Create_func_sign::s_singleton;
 
 Item*
-Create_func_sign::create(THD *thd, Item *arg1)
+Create_func_sign::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_sign(arg1);
 }
@@ -4397,7 +4400,7 @@ Create_func_sign::create(THD *thd, Item *arg1)
 Create_func_sin Create_func_sin::s_singleton;
 
 Item*
-Create_func_sin::create(THD *thd, Item *arg1)
+Create_func_sin::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_sin(arg1);
 }
@@ -4406,7 +4409,7 @@ Create_func_sin::create(THD *thd, Item *arg1)
 Create_func_sleep Create_func_sleep::s_singleton;
 
 Item*
-Create_func_sleep::create(THD *thd, Item *arg1)
+Create_func_sleep::create_1_arg(THD *thd, Item *arg1)
 {
   thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION);
   thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT);
@@ -4417,7 +4420,7 @@ Create_func_sleep::create(THD *thd, Item *arg1)
 Create_func_soundex Create_func_soundex::s_singleton;
 
 Item*
-Create_func_soundex::create(THD *thd, Item *arg1)
+Create_func_soundex::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_soundex(arg1);
 }
@@ -4426,7 +4429,7 @@ Create_func_soundex::create(THD *thd, Item *arg1)
 Create_func_space Create_func_space::s_singleton;
 
 Item*
-Create_func_space::create(THD *thd, Item *arg1)
+Create_func_space::create_1_arg(THD *thd, Item *arg1)
 {
   /**
     TODO: Fix Bug#23637
@@ -4454,7 +4457,7 @@ Create_func_space::create(THD *thd, Item *arg1)
 Create_func_sqrt Create_func_sqrt::s_singleton;
 
 Item*
-Create_func_sqrt::create(THD *thd, Item *arg1)
+Create_func_sqrt::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_sqrt(arg1);
 }
@@ -4464,7 +4467,7 @@ Create_func_sqrt::create(THD *thd, Item *arg1)
 Create_func_srid Create_func_srid::s_singleton;
 
 Item*
-Create_func_srid::create(THD *thd, Item *arg1)
+Create_func_srid::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_srid(arg1);
 }
@@ -4475,7 +4478,7 @@ Create_func_srid::create(THD *thd, Item *arg1)
 Create_func_startpoint Create_func_startpoint::s_singleton;
 
 Item*
-Create_func_startpoint::create(THD *thd, Item *arg1)
+Create_func_startpoint::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_spatial_decomp(arg1,
                                                       Item_func::SP_STARTPOINT);
@@ -4486,7 +4489,7 @@ Create_func_startpoint::create(THD *thd, Item *arg1)
 Create_func_str_to_date Create_func_str_to_date::s_singleton;
 
 Item*
-Create_func_str_to_date::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_str_to_date::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_str_to_date(arg1, arg2);
 }
@@ -4495,7 +4498,7 @@ Create_func_str_to_date::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_strcmp Create_func_strcmp::s_singleton;
 
 Item*
-Create_func_strcmp::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_strcmp::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_strcmp(arg1, arg2);
 }
@@ -4504,7 +4507,7 @@ Create_func_strcmp::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_substr_index Create_func_substr_index::s_singleton;
 
 Item*
-Create_func_substr_index::create(THD *thd, Item *arg1, Item *arg2, Item *arg3)
+Create_func_substr_index::create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3)
 {
   return new (thd->mem_root) Item_func_substr_index(arg1, arg2, arg3);
 }
@@ -4513,7 +4516,7 @@ Create_func_substr_index::create(THD *thd, Item *arg1, Item *arg2, Item *arg3)
 Create_func_subtime Create_func_subtime::s_singleton;
 
 Item*
-Create_func_subtime::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_subtime::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_add_time(arg1, arg2, 0, 1);
 }
@@ -4522,7 +4525,7 @@ Create_func_subtime::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_tan Create_func_tan::s_singleton;
 
 Item*
-Create_func_tan::create(THD *thd, Item *arg1)
+Create_func_tan::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_tan(arg1);
 }
@@ -4531,7 +4534,7 @@ Create_func_tan::create(THD *thd, Item *arg1)
 Create_func_time_format Create_func_time_format::s_singleton;
 
 Item*
-Create_func_time_format::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_time_format::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_date_format(arg1, arg2, 1);
 }
@@ -4540,7 +4543,7 @@ Create_func_time_format::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_time_to_sec Create_func_time_to_sec::s_singleton;
 
 Item*
-Create_func_time_to_sec::create(THD *thd, Item *arg1)
+Create_func_time_to_sec::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_time_to_sec(arg1);
 }
@@ -4549,7 +4552,7 @@ Create_func_time_to_sec::create(THD *thd, Item *arg1)
 Create_func_timediff Create_func_timediff::s_singleton;
 
 Item*
-Create_func_timediff::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_timediff::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_timediff(arg1, arg2);
 }
@@ -4558,7 +4561,7 @@ Create_func_timediff::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_to_days Create_func_to_days::s_singleton;
 
 Item*
-Create_func_to_days::create(THD *thd, Item *arg1)
+Create_func_to_days::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_to_days(arg1);
 }
@@ -4577,7 +4580,7 @@ Create_func_to_seconds::create(THD *thd, Item *arg1)
 Create_func_touches Create_func_touches::s_singleton;
 
 Item*
-Create_func_touches::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_touches::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_spatial_rel(arg1, arg2,
                                                    Item_func::SP_TOUCHES_FUNC);
@@ -4588,7 +4591,7 @@ Create_func_touches::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_ucase Create_func_ucase::s_singleton;
 
 Item*
-Create_func_ucase::create(THD *thd, Item *arg1)
+Create_func_ucase::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_ucase(arg1);
 }
@@ -4597,7 +4600,7 @@ Create_func_ucase::create(THD *thd, Item *arg1)
 Create_func_uncompress Create_func_uncompress::s_singleton;
 
 Item*
-Create_func_uncompress::create(THD *thd, Item *arg1)
+Create_func_uncompress::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_uncompress(arg1);
 }
@@ -4606,7 +4609,7 @@ Create_func_uncompress::create(THD *thd, Item *arg1)
 Create_func_uncompressed_length Create_func_uncompressed_length::s_singleton;
 
 Item*
-Create_func_uncompressed_length::create(THD *thd, Item *arg1)
+Create_func_uncompressed_length::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_uncompressed_length(arg1);
 }
@@ -4615,7 +4618,7 @@ Create_func_uncompressed_length::create(THD *thd, Item *arg1)
 Create_func_unhex Create_func_unhex::s_singleton;
 
 Item*
-Create_func_unhex::create(THD *thd, Item *arg1)
+Create_func_unhex::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_unhex(arg1);
 }
@@ -4660,7 +4663,7 @@ Create_func_unix_timestamp::create_native(THD *thd, LEX_STRING name,
 Create_func_uuid Create_func_uuid::s_singleton;
 
 Item*
-Create_func_uuid::create(THD *thd)
+Create_func_uuid::create_builder(THD *thd)
 {
   DBUG_ENTER("Create_func_uuid::create");
   thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION);
@@ -4672,7 +4675,7 @@ Create_func_uuid::create(THD *thd)
 Create_func_uuid_short Create_func_uuid_short::s_singleton;
 
 Item*
-Create_func_uuid_short::create(THD *thd)
+Create_func_uuid_short::create_builder(THD *thd)
 {
   DBUG_ENTER("Create_func_uuid_short::create");
   thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION);
@@ -4684,7 +4687,7 @@ Create_func_uuid_short::create(THD *thd)
 Create_func_version Create_func_version::s_singleton;
 
 Item*
-Create_func_version::create(THD *thd)
+Create_func_version::create_builder(THD *thd)
 {
   thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION);
   return new (thd->mem_root) Item_static_string_func("version()",
@@ -4698,7 +4701,7 @@ Create_func_version::create(THD *thd)
 Create_func_weekday Create_func_weekday::s_singleton;
 
 Item*
-Create_func_weekday::create(THD *thd, Item *arg1)
+Create_func_weekday::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_weekday(arg1, 0);
 }
@@ -4707,7 +4710,7 @@ Create_func_weekday::create(THD *thd, Item *arg1)
 Create_func_weekofyear Create_func_weekofyear::s_singleton;
 
 Item*
-Create_func_weekofyear::create(THD *thd, Item *arg1)
+Create_func_weekofyear::create_1_arg(THD *thd, Item *arg1)
 {
   Item *i1= new (thd->mem_root) Item_int((char*) "0", 3, 1);
   return new (thd->mem_root) Item_func_week(arg1, i1);
@@ -4718,7 +4721,7 @@ Create_func_weekofyear::create(THD *thd, Item *arg1)
 Create_func_within Create_func_within::s_singleton;
 
 Item*
-Create_func_within::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_within::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_spatial_rel(arg1, arg2,
                                                    Item_func::SP_WITHIN_FUNC);
@@ -4730,7 +4733,7 @@ Create_func_within::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_x Create_func_x::s_singleton;
 
 Item*
-Create_func_x::create(THD *thd, Item *arg1)
+Create_func_x::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_x(arg1);
 }
@@ -4740,7 +4743,7 @@ Create_func_x::create(THD *thd, Item *arg1)
 Create_func_xml_extractvalue Create_func_xml_extractvalue::s_singleton;
 
 Item*
-Create_func_xml_extractvalue::create(THD *thd, Item *arg1, Item *arg2)
+Create_func_xml_extractvalue::create_2_arg(THD *thd, Item *arg1, Item *arg2)
 {
   return new (thd->mem_root) Item_func_xml_extractvalue(arg1, arg2);
 }
@@ -4749,7 +4752,7 @@ Create_func_xml_extractvalue::create(THD *thd, Item *arg1, Item *arg2)
 Create_func_xml_update Create_func_xml_update::s_singleton;
 
 Item*
-Create_func_xml_update::create(THD *thd, Item *arg1, Item *arg2, Item *arg3)
+Create_func_xml_update::create_3_arg(THD *thd, Item *arg1, Item *arg2, Item *arg3)
 {
   return new (thd->mem_root) Item_func_xml_update(arg1, arg2, arg3);
 }
@@ -4759,7 +4762,7 @@ Create_func_xml_update::create(THD *thd, Item *arg1, Item *arg2, Item *arg3)
 Create_func_y Create_func_y::s_singleton;
 
 Item*
-Create_func_y::create(THD *thd, Item *arg1)
+Create_func_y::create_1_arg(THD *thd, Item *arg1)
 {
   return new (thd->mem_root) Item_func_y(arg1);
 }
diff --git a/sql/item_create.h b/sql/item_create.h
index fc21e0a4d33..3ef4b0efbf3 100644
--- a/sql/item_create.h
+++ b/sql/item_create.h
@@ -93,8 +93,9 @@ public:
     @param item_list The list of arguments to the function, can be NULL
     @return An item representing the parsed function call
   */
-  virtual Item* create(THD *thd, LEX_STRING db, LEX_STRING name,
-                       bool use_explicit_name, List<Item> *item_list) = 0;
+  virtual Item *create_with_db(THD *thd, LEX_STRING db, LEX_STRING name,
+                               bool use_explicit_name,
+                               List<Item> *item_list) = 0;
 
 protected:
   /** Constructor. */
diff --git a/sql/item_func.cc b/sql/item_func.cc
index 7906ed71bc7..65585378f6a 100644
--- a/sql/item_func.cc
+++ b/sql/item_func.cc
@@ -173,7 +173,9 @@ Item_func::fix_fields(THD *thd, Item **ref)
 {
   DBUG_ASSERT(fixed == 0);
   Item **arg,**arg_end;
+  TABLE_LIST *save_emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest;
   uchar buff[STACK_BUFF_ALLOC];			// Max argument in function
+  thd->thd_marker.emb_on_expr_nest= NULL;
 
   used_tables_cache= not_null_tables_cache= 0;
   const_item_cache=1;
@@ -220,10 +222,33 @@ Item_func::fix_fields(THD *thd, Item **ref)
   if (thd->is_error()) // An error inside fix_length_and_dec occured
     return TRUE;
   fixed= 1;
+  thd->thd_marker.emb_on_expr_nest= save_emb_on_expr_nest;
   return FALSE;
 }
 
 
+void Item_func::fix_after_pullout(st_select_lex *new_parent, Item **ref)
+{
+  Item **arg,**arg_end;
+
+  used_tables_cache= not_null_tables_cache= 0;
+  const_item_cache=1;
+
+  if (arg_count)
+  {
+    for (arg=args, arg_end=args+arg_count; arg != arg_end ; arg++)
+    {
+      (*arg)->fix_after_pullout(new_parent, arg);
+      Item *item= *arg;
+
+      used_tables_cache|=     item->used_tables();
+      not_null_tables_cache|= item->not_null_tables();
+      const_item_cache&=      item->const_item();
+    }
+  }
+}
+
+
 bool Item_func::walk(Item_processor processor, bool walk_subquery,
                      uchar *argument)
 {
@@ -472,7 +497,6 @@ Field *Item_func::tmp_table_field(TABLE *table)
     break;
   case STRING_RESULT:
     return make_string_field(table);
-    break;
   case DECIMAL_RESULT:
     field= Field_new_decimal::create_from_item(this);
     break;
@@ -488,12 +512,12 @@ Field *Item_func::tmp_table_field(TABLE *table)
   return field;
 }
 
-
+/*
 bool Item_func::is_expensive_processor(uchar *arg)
 {
   return is_expensive();
 }
-
+*/
 
 my_decimal *Item_func::val_decimal(my_decimal *decimal_value)
 {
@@ -2327,10 +2351,12 @@ double Item_func_round::real_op()
 {
   double value= args[0]->val_real();
 
-  if (!(null_value= args[0]->null_value || args[1]->null_value))
-    return my_double_round(value, args[1]->val_int(), args[1]->unsigned_flag,
-                           truncate);
-
+  if (!(null_value= args[0]->null_value))
+  {
+    longlong dec= args[1]->val_int();
+    if (!(null_value= args[1]->null_value))
+      return my_double_round(value, dec, args[1]->unsigned_flag, truncate);
+  }
   return 0.0;
 }
 
@@ -2403,7 +2429,7 @@ void Item_func_rand::seed_random(Item *arg)
     args[0] is a constant.
   */
   uint32 tmp= (uint32) arg->val_int();
-  randominit(rand, (uint32) (tmp*0x10001L+55555555L),
+  my_rnd_init(rand, (uint32) (tmp*0x10001L+55555555L),
              (uint32) (tmp*0x10000001L));
 }
 
@@ -2423,7 +2449,7 @@ bool Item_func_rand::fix_fields(THD *thd,Item **ref)
       No need to send a Rand log event if seed was given eg: RAND(seed),
       as it will be replicated in the query as such.
     */
-    if (!rand && !(rand= (struct rand_struct*)
+    if (!rand && !(rand= (struct my_rnd_struct*)
                    thd->stmt_arena->alloc(sizeof(*rand))))
       return TRUE;
   }
@@ -3024,8 +3050,7 @@ longlong Item_func_find_in_set::val_int()
   }
   null_value=0;
 
-  int diff;
-  if ((diff=buffer->length() - find->length()) >= 0)
+  if ((int) (buffer->length() - find->length()) >= 0)
   {
     my_wc_t wc= 0;
     CHARSET_INFO *cs= cmp_collation.collation;
@@ -3424,11 +3449,15 @@ void Item_udf_func::print(String *str, enum_query_type query_type)
 
 double Item_func_udf_float::val_real()
 {
+  double res;
+  my_bool tmp_null_value;
   DBUG_ASSERT(fixed == 1);
   DBUG_ENTER("Item_func_udf_float::val");
   DBUG_PRINT("info",("result_type: %d  arg_count: %d",
 		     args[0]->result_type(), arg_count));
-  DBUG_RETURN(udf.val(&null_value));
+  res= udf.val(&tmp_null_value);
+  null_value= tmp_null_value;
+  DBUG_RETURN(res);
 }
 
 
@@ -3445,9 +3474,13 @@ String *Item_func_udf_float::val_str(String *str)
 
 longlong Item_func_udf_int::val_int()
 {
+  longlong res;
+  my_bool tmp_null_value;
   DBUG_ASSERT(fixed == 1);
   DBUG_ENTER("Item_func_udf_int::val_int");
-  DBUG_RETURN(udf.val_int(&null_value));
+  res= udf.val_int(&tmp_null_value);
+  null_value= tmp_null_value;
+  DBUG_RETURN(res);
 }
 
 
@@ -3464,8 +3497,10 @@ String *Item_func_udf_int::val_str(String *str)
 
 longlong Item_func_udf_decimal::val_int()
 {
-  my_decimal dec_buf, *dec= udf.val_decimal(&null_value, &dec_buf);
+  my_bool tmp_null_value;
   longlong result;
+  my_decimal dec_buf, *dec= udf.val_decimal(&tmp_null_value, &dec_buf);
+  null_value= tmp_null_value;
   if (null_value)
     return 0;
   my_decimal2int(E_DEC_FATAL_ERROR, dec, unsigned_flag, &result);
@@ -3475,8 +3510,10 @@ longlong Item_func_udf_decimal::val_int()
 
 double Item_func_udf_decimal::val_real()
 {
-  my_decimal dec_buf, *dec= udf.val_decimal(&null_value, &dec_buf);
+  my_bool tmp_null_value;
   double result;
+  my_decimal dec_buf, *dec= udf.val_decimal(&tmp_null_value, &dec_buf);
+  null_value= tmp_null_value;
   if (null_value)
     return 0.0;
   my_decimal2double(E_DEC_FATAL_ERROR, dec, &result);
@@ -3486,18 +3523,24 @@ double Item_func_udf_decimal::val_real()
 
 my_decimal *Item_func_udf_decimal::val_decimal(my_decimal *dec_buf)
 {
+  my_decimal *res;
+  my_bool tmp_null_value;
   DBUG_ASSERT(fixed == 1);
   DBUG_ENTER("Item_func_udf_decimal::val_decimal");
   DBUG_PRINT("info",("result_type: %d  arg_count: %d",
                      args[0]->result_type(), arg_count));
 
-  DBUG_RETURN(udf.val_decimal(&null_value, dec_buf));
+  res= udf.val_decimal(&tmp_null_value, dec_buf);
+  null_value= tmp_null_value;
+  DBUG_RETURN(res);
 }
 
 
 String *Item_func_udf_decimal::val_str(String *str)
 {
-  my_decimal dec_buf, *dec= udf.val_decimal(&null_value, &dec_buf);
+  my_bool tmp_null_value;
+  my_decimal dec_buf, *dec= udf.val_decimal(&tmp_null_value, &dec_buf);
+  null_value= tmp_null_value;
   if (null_value)
     return 0;
   if (str->length() < DECIMAL_MAX_STR_LENGTH)
@@ -4201,10 +4244,30 @@ bool Item_func_set_user_var::register_field_in_read_map(uchar *arg)
     TABLE *table= (TABLE *) arg;
     if (result_field->table == table || !table)
       bitmap_set_bit(result_field->table->read_set, result_field->field_index);
+    if (result_field->vcol_info)
+      return result_field->vcol_info->
+               expr_item->walk(&Item::register_field_in_read_map, 1, arg);
   }
   return 0;
 }
 
+/*
+  Mark field in bitmap supplied as *arg
+
+*/
+
+bool Item_func_set_user_var::register_field_in_bitmap(uchar *arg)
+{
+  MY_BITMAP *bitmap = (MY_BITMAP *) arg;
+  DBUG_ASSERT(bitmap);
+  if (result_field)
+  {
+    if (!bitmap)
+      return 1;
+    bitmap_set_bit(bitmap, result_field->field_index);
+  }
+  return 0;
+}
 
 /**
   Set value to user variable.
@@ -4310,7 +4373,7 @@ Item_func_set_user_var::update_hash(void *ptr, uint length,
 
 /** Get the value of a variable as a double. */
 
-double user_var_entry::val_real(my_bool *null_value)
+double user_var_entry::val_real(bool *null_value)
 {
   if ((*null_value= (value == 0)))
     return 0.0;
@@ -4329,7 +4392,8 @@ double user_var_entry::val_real(my_bool *null_value)
   case STRING_RESULT:
     return my_atof(value);                      // This is null terminated
   case ROW_RESULT:
-    DBUG_ASSERT(1);				// Impossible
+  case IMPOSSIBLE_RESULT:
+    DBUG_ASSERT(0);				// Impossible
     break;
   }
   return 0.0;					// Impossible
@@ -4338,7 +4402,7 @@ double user_var_entry::val_real(my_bool *null_value)
 
 /** Get the value of a variable as an integer. */
 
-longlong user_var_entry::val_int(my_bool *null_value) const
+longlong user_var_entry::val_int(bool *null_value) const
 {
   if ((*null_value= (value == 0)))
     return LL(0);
@@ -4360,7 +4424,8 @@ longlong user_var_entry::val_int(my_bool *null_value) const
     return my_strtoll10(value, (char**) 0, &error);// String is null terminated
   }
   case ROW_RESULT:
-    DBUG_ASSERT(1);				// Impossible
+  case IMPOSSIBLE_RESULT:
+    DBUG_ASSERT(0);				// Impossible
     break;
   }
   return LL(0);					// Impossible
@@ -4369,7 +4434,7 @@ longlong user_var_entry::val_int(my_bool *null_value) const
 
 /** Get the value of a variable as a string. */
 
-String *user_var_entry::val_str(my_bool *null_value, String *str,
+String *user_var_entry::val_str(bool *null_value, String *str,
 				uint decimals)
 {
   if ((*null_value= (value == 0)))
@@ -4391,8 +4456,10 @@ String *user_var_entry::val_str(my_bool *null_value, String *str,
   case STRING_RESULT:
     if (str->copy(value, length, collation.collation))
       str= 0;					// EOM error
+    break;
   case ROW_RESULT:
-    DBUG_ASSERT(1);				// Impossible
+  case IMPOSSIBLE_RESULT:
+    DBUG_ASSERT(0);				// Impossible
     break;
   }
   return(str);
@@ -4400,7 +4467,7 @@ String *user_var_entry::val_str(my_bool *null_value, String *str,
 
 /** Get the value of a variable as a decimal. */
 
-my_decimal *user_var_entry::val_decimal(my_bool *null_value, my_decimal *val)
+my_decimal *user_var_entry::val_decimal(bool *null_value, my_decimal *val)
 {
   if ((*null_value= (value == 0)))
     return 0;
@@ -4419,7 +4486,8 @@ my_decimal *user_var_entry::val_decimal(my_bool *null_value, my_decimal *val)
     str2my_decimal(E_DEC_FATAL_ERROR, value, length, collation.collation, val);
     break;
   case ROW_RESULT:
-    DBUG_ASSERT(1);				// Impossible
+  case IMPOSSIBLE_RESULT:
+    DBUG_ASSERT(0);				// Impossible
     break;
   }
   return(val);
@@ -4768,7 +4836,7 @@ int Item_func_set_user_var::save_in_field(Field *field, bool no_conversions,
 
   if (result_type() == STRING_RESULT ||
       (result_type() == REAL_RESULT &&
-      field->result_type() == STRING_RESULT))
+       field->result_type() == STRING_RESULT))
   {
     String *result;
     CHARSET_INFO *cs= collation.collation;
diff --git a/sql/item_func.h b/sql/item_func.h
index 15730516c71..5156d4d238b 100644
--- a/sql/item_func.h
+++ b/sql/item_func.h
@@ -120,6 +120,7 @@ public:
   // Constructor used for Item_cond_and/or (see Item comment)
   Item_func(THD *thd, Item_func *item);
   bool fix_fields(THD *, Item **ref);
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
   table_map used_tables() const;
   table_map not_null_tables() const;
   void update_used_tables();
@@ -185,8 +186,8 @@ public:
                 Item_transformer transformer, uchar *arg_t);
   void traverse_cond(Cond_traverser traverser,
                      void * arg, traverse_order order);
-  bool is_expensive_processor(uchar *arg);
-  virtual bool is_expensive() { return 0; }
+ // bool is_expensive_processor(uchar *arg);
+ // virtual bool is_expensive() { return 0; }
   inline void raise_numeric_overflow(const char *type_name)
   {
     char buf[256];
@@ -265,6 +266,21 @@ public:
   {
     return functype() == *(Functype *) arg;
   }
+
+  void no_rows_in_result()
+  {
+    bool_func_call_args info;
+    info.original_func_item= this;
+    info.bool_function= &Item::no_rows_in_result;
+    walk(&Item::call_bool_func_processor, FALSE, (uchar*) &info);
+  }
+  void restore_to_before_no_rows_in_result()
+  {
+    bool_func_call_args info;
+    info.original_func_item= this;
+    info.bool_function= &Item::restore_to_before_no_rows_in_result;
+    walk(&Item::call_bool_func_processor, FALSE, (uchar*) &info);
+  }
 };
 
 
@@ -409,6 +425,7 @@ public:
   void fix_length_and_dec();
   bool fix_fields(THD *thd, Item **ref);
   longlong val_int() { DBUG_ASSERT(fixed == 1); return value; }
+  bool check_vcol_func_processor(uchar *int_arg) { return TRUE;}
 };
 
 
@@ -471,6 +488,7 @@ public:
   Item_func_additive_op(Item *a,Item *b) :Item_num_op(a,b) {}
   void result_precision();
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 
@@ -506,6 +524,7 @@ public:
   my_decimal *decimal_op(my_decimal *);
   void result_precision();
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 
@@ -538,6 +557,7 @@ public:
   }
 
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 
@@ -552,6 +572,7 @@ public:
   void result_precision();
   void fix_length_and_dec();
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 
@@ -568,6 +589,7 @@ public:
   void fix_num_length_and_dec();
   uint decimal_precision() const { return args[0]->decimal_precision(); }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 
@@ -581,6 +603,7 @@ public:
   const char *func_name() const { return "abs"; }
   void fix_length_and_dec();
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 // A class to handle logarithmic and trigonometric functions
@@ -744,6 +767,7 @@ public:
   double real_op();
   my_decimal *decimal_op(my_decimal *);
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 
@@ -756,6 +780,7 @@ public:
   double real_op();
   my_decimal *decimal_op(my_decimal *);
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 /* This handles round and truncate */
@@ -776,7 +801,7 @@ public:
 
 class Item_func_rand :public Item_real_func
 {
-  struct rand_struct *rand;
+  struct my_rnd_struct *rand;
   bool first_eval; // TRUE if val_real() is called 1st time
 public:
   Item_func_rand(Item *a) :Item_real_func(a), rand(0), first_eval(TRUE) {}
@@ -787,6 +812,10 @@ public:
   void update_used_tables();
   bool fix_fields(THD *thd, Item **ref);
   void cleanup() { first_eval= TRUE; Item_real_func::cleanup(); }
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 private:
   void seed_random (Item * val);  
 };
@@ -1069,6 +1098,10 @@ public:
       max_length= args[0]->max_length;
   }
   bool fix_fields(THD *thd, Item **ref);
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 
@@ -1082,6 +1115,10 @@ public:
   const char *func_name() const { return "benchmark"; }
   void fix_length_and_dec() { max_length=1; maybe_null=0; }
   virtual void print(String *str, enum_query_type query_type);
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 
@@ -1097,6 +1134,10 @@ public:
     used_tables_cache|= RAND_TABLE_BIT;
   }
   longlong val_int();
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 
@@ -1107,6 +1148,7 @@ class Item_udf_func :public Item_func
 {
 protected:
   udf_handler udf;
+  bool is_expensive_processor(uchar *arg) { return TRUE; }
 
 public:
   Item_udf_func(udf_func *udf_arg)
@@ -1346,6 +1388,10 @@ class Item_func_get_lock :public Item_int_func
   longlong val_int();
   const char *func_name() const { return "get_lock"; }
   void fix_length_and_dec() { max_length=1; maybe_null=1;}
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 class Item_func_release_lock :public Item_int_func
@@ -1356,6 +1402,10 @@ public:
   longlong val_int();
   const char *func_name() const { return "release_lock"; }
   void fix_length_and_dec() { max_length=1; maybe_null=1;}
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 /* replication functions */
@@ -1369,6 +1419,10 @@ public:
   longlong val_int();
   const char *func_name() const { return "master_pos_wait"; }
   void fix_length_and_dec() { max_length=21; maybe_null=1;}
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 
@@ -1441,6 +1495,7 @@ public:
   }
   void save_org_in_field(Field *field) { (void)save_in_field(field, 1, 0); }
   bool register_field_in_read_map(uchar *arg);
+  bool register_field_in_bitmap(uchar *arg);
   bool set_entry(THD *thd, bool create_if_not_exists);
   void cleanup();
 };
@@ -1529,7 +1584,7 @@ class Item_func_get_system_var :public Item_func
   longlong cached_llval;
   double cached_dval;
   String cached_strval;
-  my_bool cached_null_value;
+  bool cached_null_value;
   query_id_t used_query_id;
   uchar cache_present;
 
@@ -1576,7 +1631,6 @@ public:
 
 
 /* for fulltext search */
-#include <ft_global.h>
 
 class Item_func_match :public Item_real_func
 {
@@ -1617,6 +1671,11 @@ public:
 
   bool fix_index();
   void init_search(bool no_order);
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    /* TODO: consider adding in support for the MATCH-based virtual columns */
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 
@@ -1636,6 +1695,17 @@ public:
   longlong val_int();
   const char *func_name() const { return "is_free_lock"; }
   void fix_length_and_dec() { decimals=0; max_length=1; maybe_null=1;}
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+#if 0
+    DBUG_ENTER("Item_func_is_free_lock::check_vcol_func_processor");
+    DBUG_PRINT("info",
+      ("check_vcol_func_processor returns TRUE: unsupported function"));
+    DBUG_RETURN(TRUE);
+#else
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+#endif
+  }
 };
 
 class Item_func_is_used_lock :public Item_int_func
@@ -1646,6 +1716,10 @@ public:
   longlong val_int();
   const char *func_name() const { return "is_used_lock"; }
   void fix_length_and_dec() { decimals=0; max_length=10; maybe_null=1;}
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 /* For type casts */
@@ -1665,6 +1739,11 @@ public:
   longlong val_int();
   const char *func_name() const { return "row_count"; }
   void fix_length_and_dec() { decimals= 0; maybe_null=0; }
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 
@@ -1694,6 +1773,9 @@ private:
   bool execute();
   bool execute_impl(THD *thd);
   bool init_result_field(THD *thd);
+
+protected:
+  bool is_expensive_processor(uchar *arg) { return TRUE; }
   
 public:
 
@@ -1773,6 +1855,10 @@ public:
   {
     return sp_result_field;
   }
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 
@@ -1783,6 +1869,10 @@ public:
   longlong val_int();
   const char *func_name() const { return "found_rows"; }
   void fix_length_and_dec() { decimals= 0; maybe_null=0; }
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 
@@ -1797,6 +1887,10 @@ public:
   void fix_length_and_dec()
   { max_length= 21; unsigned_flag=1; }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 Item *get_system_var(THD *thd, enum_var_type var_type, LEX_STRING name,
diff --git a/sql/item_row.cc b/sql/item_row.cc
index 2fbf0c12499..27b846cd15b 100644
--- a/sql/item_row.cc
+++ b/sql/item_row.cc
@@ -138,6 +138,20 @@ void Item_row::update_used_tables()
   }
 }
 
+
+void Item_row::fix_after_pullout(st_select_lex *new_parent, Item **ref)
+{
+  used_tables_cache= 0;
+  const_item_cache= 1;
+  for (uint i= 0; i < arg_count; i++)
+  {
+    items[i]->fix_after_pullout(new_parent, &items[i]);
+    used_tables_cache|= items[i]->used_tables();
+    const_item_cache&= items[i]->const_item();
+  }
+}
+
+
 bool Item_row::check_cols(uint c)
 {
   if (c != arg_count)
diff --git a/sql/item_row.h b/sql/item_row.h
index 87988e718ca..63028e3bd7d 100644
--- a/sql/item_row.h
+++ b/sql/item_row.h
@@ -63,6 +63,7 @@ public:
     return 0;
   };
   bool fix_fields(THD *thd, Item **ref);
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
   void cleanup();
   void split_sum_func(THD *thd, Item **ref_pointer_array, List<Item> &fields);
   table_map used_tables() const { return used_tables_cache; };
@@ -81,6 +82,7 @@ public:
   bool check_cols(uint c);
   bool null_inside() { return with_null; };
   void bring_value();
+  bool check_vcol_func_processor(uchar *int_arg) {return FALSE; } 
 };
 
 #endif /* ITEM_ROW_INCLUDED */
diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc
index 6d3514bf356..8e8bb6c7f09 100644
--- a/sql/item_strfunc.cc
+++ b/sql/item_strfunc.cc
@@ -687,7 +687,8 @@ String *Item_func_des_encrypt::val_str(String *str)
 
   tail= 8 - (res_length % 8);                   // 1..8 marking extra length
   res_length+=tail;
-  tmp_arg.realloc(res_length);
+  if (tmp_arg.realloc(res_length))
+    goto error;
   tmp_arg.length(0);
   tmp_arg.append(res->ptr(), res->length());
   code= ER_OUT_OF_RESOURCES;
@@ -2897,7 +2898,7 @@ String *Item_func_conv::val_str(String *str)
                                    from_base, &endptr, &err);
   }
 
-  ptr= longlong2str(dec, ans, to_base);
+  ptr= longlong2str(dec, ans, to_base, 1);
   if (str->copy(ans, (uint32) (ptr-ans), default_charset()))
     return &my_empty_string;
   return str;
@@ -3064,7 +3065,7 @@ String *Item_func_hex::val_str_ascii(String *str)
 
     if ((null_value= args[0]->null_value))
       return 0;
-    ptr= longlong2str(dec,ans,16);
+    ptr= longlong2str(dec,ans,16,1);
     if (str->copy(ans,(uint32) (ptr-ans), &my_charset_numeric))
       return &my_empty_string;			// End of memory
     return str;
@@ -3473,7 +3474,7 @@ longlong Item_func_crc32::val_int()
 String *Item_func_compress::val_str(String *str)
 {
   int err= Z_OK, code;
-  ulong new_size;
+  size_t new_size;
   String *res;
   Byte *body;
   char *tmp, *last_char;
@@ -3509,8 +3510,8 @@ String *Item_func_compress::val_str(String *str)
   body= ((Byte*)buffer.ptr()) + 4;
 
   // As far as we have checked res->is_empty() we can use ptr()
-  if ((err= compress(body, &new_size,
-		     (const Bytef*)res->ptr(), res->length())) != Z_OK)
+  if ((err= my_compress_buffer(body, &new_size, (const uchar *)res->ptr(),
+                               res->length())) != Z_OK)
   {
     code= err==Z_MEM_ERROR ? ER_ZLIB_Z_MEM_ERROR : ER_ZLIB_Z_BUF_ERROR;
     push_warning(current_thd,MYSQL_ERROR::WARN_LEVEL_WARN,code,ER(code));
@@ -3587,156 +3588,17 @@ err:
 }
 #endif
 
-/*
-  UUID, as in
-    DCE 1.1: Remote Procedure Call,
-    Open Group Technical Standard Document Number C706, October 1997,
-    (supersedes C309 DCE: Remote Procedure Call 8/1994,
-    which was basis for ISO/IEC 11578:1996 specification)
-*/
-
-static struct rand_struct uuid_rand;
-static uint nanoseq;
-static ulonglong uuid_time=0;
-static char clock_seq_and_node_str[]="-0000-000000000000";
-
-/**
-  number of 100-nanosecond intervals between
-  1582-10-15 00:00:00.00 and 1970-01-01 00:00:00.00.
-*/
-#define UUID_TIME_OFFSET ((ulonglong) 141427 * 24 * 60 * 60 * \
-                          1000 * 1000 * 10)
-
-#define UUID_VERSION      0x1000
-#define UUID_VARIANT      0x8000
-
-static void tohex(char *to, uint from, uint len)
-{
-  to+= len;
-  while (len--)
-  {
-    *--to= _dig_vec_lower[from & 15];
-    from >>= 4;
-  }
-}
-
-static void set_clock_seq_str()
-{
-  uint16 clock_seq= ((uint)(my_rnd(&uuid_rand)*16383)) | UUID_VARIANT;
-  tohex(clock_seq_and_node_str+1, clock_seq, 4);
-  nanoseq= 0;
-}
 
 String *Item_func_uuid::val_str(String *str)
 {
   DBUG_ASSERT(fixed == 1);
-  char *s;
-  THD *thd= current_thd;
-
-  mysql_mutex_lock(&LOCK_uuid_generator);
-  if (! uuid_time) /* first UUID() call. initializing data */
-  {
-    ulong tmp=sql_rnd_with_mutex();
-    uchar mac[6];
-    int i;
-    if (my_gethwaddr(mac))
-    {
-      /* purecov: begin inspected */
-      /*
-        generating random "hardware addr"
-        and because specs explicitly specify that it should NOT correlate
-        with a clock_seq value (initialized random below), we use a separate
-        randominit() here
-      */
-      randominit(&uuid_rand, tmp + (ulong) thd, tmp + (ulong)global_query_id);
-      for (i=0; i < (int)sizeof(mac); i++)
-        mac[i]=(uchar)(my_rnd(&uuid_rand)*255);
-      /* purecov: end */    
-    }
-    s=clock_seq_and_node_str+sizeof(clock_seq_and_node_str)-1;
-    for (i=sizeof(mac)-1 ; i>=0 ; i--)
-    {
-      *--s=_dig_vec_lower[mac[i] & 15];
-      *--s=_dig_vec_lower[mac[i] >> 4];
-    }
-    randominit(&uuid_rand, tmp + (ulong) server_start_time,
-	       tmp + (ulong) thd->status_var.bytes_sent);
-    set_clock_seq_str();
-  }
-
-  ulonglong tv= my_getsystime() + UUID_TIME_OFFSET + nanoseq;
-
-  if (likely(tv > uuid_time))
-  {
-    /*
-      Current time is ahead of last timestamp, as it should be.
-      If we "borrowed time", give it back, just as long as we
-      stay ahead of the previous timestamp.
-    */
-    if (nanoseq)
-    {
-      DBUG_ASSERT((tv > uuid_time) && (nanoseq > 0));
-      /*
-        -1 so we won't make tv= uuid_time for nanoseq >= (tv - uuid_time)
-      */
-      ulong delta= min(nanoseq, (ulong) (tv - uuid_time -1));
-      tv-= delta;
-      nanoseq-= delta;
-    }
-  }
-  else
-  {
-    if (unlikely(tv == uuid_time))
-    {
-      /*
-        For low-res system clocks. If several requests for UUIDs
-        end up on the same tick, we add a nano-second to make them
-        different.
-        ( current_timestamp + nanoseq * calls_in_this_period )
-        may end up > next_timestamp; this is OK. Nonetheless, we'll
-        try to unwind nanoseq when we get a chance to.
-        If nanoseq overflows, we'll start over with a new numberspace
-        (so the if() below is needed so we can avoid the ++tv and thus
-        match the follow-up if() if nanoseq overflows!).
-      */
-      if (likely(++nanoseq))
-        ++tv;
-    }
+  uchar guid[MY_UUID_SIZE];
 
-    if (unlikely(tv <= uuid_time))
-    {
-      /*
-        If the admin changes the system clock (or due to Daylight
-        Saving Time), the system clock may be turned *back* so we
-        go through a period once more for which we already gave out
-        UUIDs.  To avoid duplicate UUIDs despite potentially identical
-        times, we make a new random component.
-        We also come here if the nanoseq "borrowing" overflows.
-        In either case, we throw away any nanoseq borrowing since it's
-        irrelevant in the new numberspace.
-      */
-      set_clock_seq_str();
-      tv= my_getsystime() + UUID_TIME_OFFSET;
-      nanoseq= 0;
-      DBUG_PRINT("uuid",("making new numberspace"));
-    }
-  }
-
-  uuid_time=tv;
-  mysql_mutex_unlock(&LOCK_uuid_generator);
-
-  uint32 time_low=            (uint32) (tv & 0xFFFFFFFF);
-  uint16 time_mid=            (uint16) ((tv >> 32) & 0xFFFF);
-  uint16 time_hi_and_version= (uint16) ((tv >> 48) | UUID_VERSION);
-
-  str->realloc(UUID_LENGTH+1);
-  str->length(UUID_LENGTH);
+  str->realloc(MY_UUID_STRING_LENGTH+1);
+  str->length(MY_UUID_STRING_LENGTH);
   str->set_charset(system_charset_info);
-  s=(char *) str->ptr();
-  s[8]=s[13]='-';
-  tohex(s, time_low, 8);
-  tohex(s+9, time_mid, 4);
-  tohex(s+14, time_hi_and_version, 4);
-  strmov(s+18, clock_seq_and_node_str);
+  my_uuid(guid);
+  my_uuid2str(guid, (char *)str->ptr());
+
   return str;
 }
diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h
index 5dcef2e671f..8c85d1cb8fd 100644
--- a/sql/item_strfunc.h
+++ b/sql/item_strfunc.h
@@ -377,6 +377,10 @@ public:
   String *val_str(String *);
   void fix_length_and_dec() { maybe_null=1; max_length = 13; }
   const char *func_name() const { return "encrypt"; }
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 #include "sql_crypt.h"
@@ -425,6 +429,11 @@ public:
     call
   */
   virtual const char *fully_qualified_func_name() const = 0;
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(
+                                           fully_qualified_func_name());
+  }
 };
 
 
@@ -693,6 +702,10 @@ public:
     maybe_null=1;
     max_length=MAX_BLOB_WIDTH;
   }
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 
@@ -874,7 +887,7 @@ public:
   String *val_str(String *) ZLIB_DEPENDED_FUNCTION
 };
 
-#define UUID_LENGTH (8+1+4+1+4+1+4+1+12)
+
 class Item_func_uuid: public Item_str_func
 {
 public:
@@ -883,10 +896,14 @@ public:
   {
     collation.set(system_charset_info,
                   DERIVATION_COERCIBLE, MY_REPERTOIRE_ASCII);
-    fix_char_length(UUID_LENGTH);
+    fix_char_length(MY_UUID_STRING_LENGTH);
   }
   const char *func_name() const{ return "uuid"; }
   String *val_str(String *);
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 extern String my_empty_string;
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc
index 5cf585e1a56..53950b70e69 100644
--- a/sql/item_subselect.cc
+++ b/sql/item_subselect.cc
@@ -39,20 +39,18 @@
 #include "sql_select.h"
 #include "sql_parse.h"                          // check_stack_overrun
 
-inline Item * and_items(Item* cond, Item *item)
-{
-  return (cond? (new Item_cond_and(cond, item)) : item);
-}
 
 Item_subselect::Item_subselect():
   Item_result_field(), value_assigned(0), thd(0), substitution(0),
-  engine(0), old_engine(0), used_tables_cache(0), have_to_be_excluded(0),
-  const_item_cache(1), engine_changed(0), changed(0), is_correlated(FALSE)
+  expr_cache(0), engine(0), old_engine(0), used_tables_cache(0),
+  have_to_be_excluded(0), const_item_cache(1), inside_first_fix_fields(0),
+  done_first_fix_fields(FALSE), eliminated(FALSE), engine_changed(0),
+  changed(0), is_correlated(FALSE)
 {
   with_subselect= 1;
   reset();
   /*
-    item value is NULL if select_subselect not changed this value
+    Item value is NULL if select_result_interceptor didn't change this value
     (i.e. some rows will be found returned)
   */
   null_value= TRUE;
@@ -60,7 +58,7 @@ Item_subselect::Item_subselect():
 
 
 void Item_subselect::init(st_select_lex *select_lex,
-			  select_subselect *result)
+			  select_result_interceptor *result)
 {
   /*
     Please see Item_singlerow_subselect::invalidate_and_restore_select_lex(),
@@ -102,6 +100,8 @@ void Item_subselect::init(st_select_lex *select_lex,
     SELECT_LEX *upper= unit->outer_select();
     if (upper->parsing_place == IN_HAVING)
       upper->subquery_in_having= 1;
+    /* The subquery is an expression cache candidate */
+    upper->expr_cache_may_be_used[upper->parsing_place]= TRUE;
   }
   DBUG_VOID_RETURN;
 }
@@ -125,8 +125,10 @@ void Item_subselect::cleanup()
   }
   if (engine)
     engine->cleanup();
+  depends_on.empty();
   reset();
   value_assigned= 0;
+  expr_cache= 0;
   DBUG_VOID_RETURN;
 }
 
@@ -153,6 +155,22 @@ void Item_singlerow_subselect::cleanup()
   DBUG_VOID_RETURN;
 }
 
+
+void Item_in_subselect::cleanup()
+{
+  DBUG_ENTER("Item_in_subselect::cleanup");
+  if (left_expr_cache)
+  {
+    left_expr_cache->delete_elements();
+    delete left_expr_cache;
+    left_expr_cache= NULL;
+  }
+  first_execution= TRUE;
+  is_constant= FALSE;
+  Item_subselect::cleanup();
+  DBUG_VOID_RETURN;
+}
+
 Item_subselect::~Item_subselect()
 {
   delete engine;
@@ -174,19 +192,42 @@ bool Item_subselect::fix_fields(THD *thd_param, Item **ref)
 
   DBUG_ASSERT(fixed == 0);
   engine->set_thd((thd= thd_param));
+  if (!done_first_fix_fields)
+  {
+    done_first_fix_fields= TRUE;
+    inside_first_fix_fields= TRUE;
+    upper_refs.empty();
+    /*
+      psergey-todo: remove _first_fix_fields calls, we need changes on every
+      execution
+    */
+  }
+
+  eliminated= FALSE;
+  parent_select= thd_param->lex->current_select;
 
   if (check_stack_overrun(thd, STACK_MIN_SIZE, (uchar*)&res))
     return TRUE;
+  
 
   if (!(res= engine->prepare()))
   {
     // all transformation is done (used by prepared statements)
     changed= 1;
+  inside_first_fix_fields= FALSE;
 
+
+    // all transformation is done (used by prepared statements)
+    changed= 1;
+
+    /*
+      Substitute the current item with an Item_in_optimizer that was
+      created by Item_in_subselect::select_in_like_transformer and
+      call fix_fields for the substituted item which in turn calls
+      engine->prepare for the subquery predicate.
+    */
     if (substitution)
     {
-      int ret= 0;
-
       // did we changed top item of WHERE condition
       if (unit->outer_select()->where == (*ref))
 	unit->outer_select()->where= substitution; // correct WHERE for PS
@@ -200,20 +241,21 @@ bool Item_subselect::fix_fields(THD *thd_param, Item **ref)
       substitution= 0;
       thd->where= "checking transformed subquery";
       if (!(*ref)->fixed)
-	ret= (*ref)->fix_fields(thd, ref);
-      thd->where= save_where;
-      return ret;
+	res= (*ref)->fix_fields(thd, ref);
+      goto end;
+//psergey-merge:  done_first_fix_fields= FALSE;
     }
     // Is it one field subselect?
     if (engine->cols() > max_columns)
     {
       my_error(ER_OPERAND_COLUMNS, MYF(0), 1);
-      return TRUE;
+//psergey-merge:  done_first_fix_fields= FALSE;
+      goto end;
     }
     fix_length_and_dec();
   }
   else
-    goto err;
+    goto end;
   
   if ((uncacheable= engine->uncacheable()))
   {
@@ -223,12 +265,170 @@ bool Item_subselect::fix_fields(THD *thd_param, Item **ref)
   }
   fixed= 1;
 
-err:
+end:
+  done_first_fix_fields= FALSE;
   thd->where= save_where;
   return res;
 }
 
 
+bool Item_subselect::enumerate_field_refs_processor(uchar *arg)
+{
+  List_iterator<Ref_to_outside> it(upper_refs);
+  Ref_to_outside *upper;
+  
+  while ((upper= it++))
+  {
+    if (upper->item->walk(&Item::enumerate_field_refs_processor, FALSE, arg))
+      return TRUE;
+  }
+  return FALSE;
+}
+
+bool Item_subselect::mark_as_eliminated_processor(uchar *arg)
+{
+  eliminated= TRUE;
+  return FALSE;
+}
+
+
+bool Item_subselect::mark_as_dependent(THD *thd, st_select_lex *select, 
+                                       Item *item)
+{
+  if (inside_first_fix_fields)
+  {
+    is_correlated= TRUE;
+    Ref_to_outside *upper;
+    if (!(upper= new (thd->stmt_arena->mem_root) Ref_to_outside()))
+      return TRUE;
+    upper->select= select;
+    upper->item= item;
+    if (upper_refs.push_back(upper, thd->stmt_arena->mem_root))
+      return TRUE;
+  }
+  return FALSE;
+}
+
+
+/*
+  Adjust attributes after our parent select has been merged into grandparent
+
+  DESCRIPTION
+    Subquery is a composite object which may be correlated, that is, it may
+    have
+    1. references to tables of the parent select (i.e. one that has the clause
+      with the subquery predicate)
+    2. references to tables of the grandparent select
+    3. references to tables of further ancestors.
+    
+    Before the pullout, this item indicates:
+    - #1 with table bits in used_tables()
+    - #2 and #3 with OUTER_REF_TABLE_BIT.
+
+    After parent has been merged with grandparent:
+    - references to parent and grandparent tables should be indicated with 
+      table bits.
+    - references to greatgrandparent and further ancestors - with
+      OUTER_REF_TABLE_BIT.
+*/
+
+void Item_subselect::fix_after_pullout(st_select_lex *new_parent, Item **ref)
+{
+  recalc_used_tables(new_parent, TRUE);
+  parent_select= new_parent;
+}
+
+
+class Field_fixer: public Field_enumerator
+{
+public:
+  table_map used_tables; /* Collect used_tables here */
+  st_select_lex *new_parent; /* Select we're in */
+  virtual void visit_field(Item_field *item)
+  {
+    //for (TABLE_LIST *tbl= new_parent->leaf_tables; tbl; tbl= tbl->next_local)
+    //{
+    //  if (tbl->table == field->table)
+    //  {
+        used_tables|= item->field->table->map;
+    //    return;
+    //  }
+    //}
+    //used_tables |= OUTER_REF_TABLE_BIT;
+  }
+};
+
+
+/*
+  Recalculate used_tables_cache 
+*/
+
+void Item_subselect::recalc_used_tables(st_select_lex *new_parent, 
+                                        bool after_pullout)
+{
+  List_iterator<Ref_to_outside> it(upper_refs);
+  Ref_to_outside *upper;
+  
+  used_tables_cache= 0;
+  while ((upper= it++))
+  {
+    bool found= FALSE;
+    /*
+      Check if
+        1. the upper reference refers to the new immediate parent select, or
+        2. one of the further ancestors.
+
+      We rely on the fact that the tree of selects is modified by some kind of
+      'flattening', i.e. a process where child selects are merged into their
+      parents.
+      The merged selects are removed from the select tree but keep pointers to
+      their parents.
+    */
+    for (st_select_lex *sel= upper->select; sel; sel= sel->outer_select())
+    {
+      /* 
+        If we've reached the new parent select by walking upwards from
+        reference's original select, this means that the reference is now 
+        referring to the direct parent:
+      */
+      if (sel == new_parent)
+      {
+        found= TRUE;
+        /* 
+          upper->item may be NULL when we've referred to a grouping function,
+          in which case we don't care about what it's table_map really is,
+          because item->with_sum_func==1 will ensure correct placement of the
+          item.
+        */
+        if (upper->item)
+        {
+          // Now, iterate over fields and collect used_tables() attribute:
+          Field_fixer fixer;
+          fixer.used_tables= 0;
+          fixer.new_parent= new_parent;
+          upper->item->walk(&Item::enumerate_field_refs_processor, FALSE,
+                            (uchar*)&fixer);
+          used_tables_cache |= fixer.used_tables;
+          /*
+          if (after_pullout)
+            upper->item->fix_after_pullout(new_parent, &(upper->item));
+          upper->item->update_used_tables();
+          used_tables_cache |= upper->item->used_tables();
+          */
+        }
+      }
+    }
+    if (!found)
+      used_tables_cache|= OUTER_REF_TABLE_BIT;
+  }
+  /* 
+    Don't update const_tables_cache yet as we don't yet know which of the
+    parent's tables are constant. Parent will call update_used_tables() after
+    he has done const table detection, and that will be our chance to update
+    const_tables_cache.
+  */
+}
+
 bool Item_subselect::walk(Item_processor processor, bool walk_subquery,
                           uchar *argument)
 {
@@ -246,6 +446,7 @@ bool Item_subselect::walk(Item_processor processor, bool walk_subquery,
       if (lex->having && (lex->having)->walk(processor, walk_subquery,
                                              argument))
         return 1;
+      /* TODO: why does this walk WHERE/HAVING but not ON expressions of outer joins? */
 
       while ((item=li++))
       {
@@ -295,6 +496,97 @@ bool Item_subselect::exec()
   return (res);
 }
 
+
+/**
+  Check if an expression cache is needed for this subquery
+
+  @param thd             Thread handle
+
+  @details
+  The function checks whether a cache is needed for a subquery and whether
+  the result of the subquery can be put in cache.
+
+  @retval TRUE  cache is needed
+  @retval FALSE otherwise
+*/
+
+bool Item_subselect::expr_cache_is_needed(THD *thd)
+{
+  return (depends_on.elements &&
+          engine->cols() == 1 &&
+          optimizer_flag(thd, OPTIMIZER_SWITCH_SUBQUERY_CACHE) &&
+          !(engine->uncacheable() & (UNCACHEABLE_RAND |
+                                     UNCACHEABLE_SIDEEFFECT)));
+}
+
+
+/**
+  Check if an expression cache is needed for this subquery
+
+  @param thd             Thread handle
+
+  @details
+  The function checks whether a cache is needed for a subquery and whether
+  the result of the subquery can be put in cache.
+
+  @note
+  This method allows many columns in the subquery because it is supported by
+  Item_in optimizer and result of the IN subquery will be scalar in this
+  case.
+
+  @retval TRUE  cache is needed
+  @retval FALSE otherwise
+*/
+
+bool Item_in_subselect::expr_cache_is_needed(THD *thd)
+{
+  return (depends_on.elements &&
+          optimizer_flag(thd, OPTIMIZER_SWITCH_SUBQUERY_CACHE) &&
+          !(engine->uncacheable() & (UNCACHEABLE_RAND |
+                                     UNCACHEABLE_SIDEEFFECT)));
+}
+
+
+/*
+  Compute the IN predicate if the left operand's cache changed.
+*/
+
+bool Item_in_subselect::exec()
+{
+  DBUG_ENTER("Item_in_subselect::exec");
+  /*
+    Initialize the cache of the left predicate operand. This has to be done as
+    late as now, because Cached_item directly contains a resolved field (not
+    an item, and in some cases (when temp tables are created), these fields
+    end up pointing to the wrong field. One solution is to change Cached_item
+    to not resolve its field upon creation, but to resolve it dynamically
+    from a given Item_ref object.
+    TODO: the cache should be applied conditionally based on:
+    - rules - e.g. only if the left operand is known to be ordered, and/or
+    - on a cost-based basis, that takes into account the cost of a cache
+      lookup, the cache hit rate, and the savings per cache hit.
+  */
+  if (!left_expr_cache && exec_method == MATERIALIZATION)
+    init_left_expr_cache();
+
+  /*
+    If the new left operand is already in the cache, reuse the old result.
+    Use the cached result only if this is not the first execution of IN
+    because the cache is not valid for the first execution.
+  */
+  if (!first_execution && left_expr_cache &&
+      test_if_item_cache_changed(*left_expr_cache) < 0)
+    DBUG_RETURN(FALSE);
+
+  /*
+    The exec() method below updates item::value, and item::null_value, thus if
+    we don't call it, the next call to item::val_int() will return whatever
+    result was computed by its previous call.
+  */
+  DBUG_RETURN(Item_subselect::exec());
+}
+
+
 Item::Type Item_subselect::type() const
 {
   return SUBSELECT_ITEM;
@@ -327,6 +619,7 @@ Item *Item_subselect::get_tmp_table_item(THD *thd_arg)
 
 void Item_subselect::update_used_tables()
 {
+  recalc_used_tables(parent_select, FALSE);
   if (!engine->uncacheable())
   {
     // did all used tables become static?
@@ -435,6 +728,7 @@ void Item_maxmin_subselect::print(String *str, enum_query_type query_type)
 
 void Item_singlerow_subselect::reset()
 {
+  eliminated= FALSE;
   null_value= TRUE;
   if (value)
     value->null_value= TRUE;
@@ -454,8 +748,9 @@ void Item_singlerow_subselect::reset()
 Item_subselect::trans_res
 Item_singlerow_subselect::select_transformer(JOIN *join)
 {
+  DBUG_ENTER("Item_singlerow_subselect::select_transformer");
   if (changed)
-    return RES_OK;
+    DBUG_RETURN(RES_OK);
 
   SELECT_LEX *select_lex= join->select_lex;
   Query_arena *arena= thd->stmt_arena;
@@ -498,15 +793,18 @@ Item_singlerow_subselect::select_transformer(JOIN *join)
     */
     substitution->walk(&Item::remove_dependence_processor, 0,
 		       (uchar *) select_lex->outer_select());
-    return RES_REDUCE;
+    DBUG_RETURN(RES_REDUCE);
   }
-  return RES_OK;
+  DBUG_RETURN(RES_OK);
 }
 
 
 void Item_singlerow_subselect::store(uint i, Item *item)
 {
   row[i]->store(item);
+  //psergey-merge: can do without that: row[i]->cache_value();
+  //psergey-backport-timours: ^ really, without that ^ 
+  //psergey-try-merge-again:
   row[i]->cache_value();
 }
 
@@ -547,6 +845,40 @@ void Item_singlerow_subselect::fix_length_and_dec()
     maybe_null= engine->may_be_null();
 }
 
+
+/**
+  Add an expression cache for this subquery if it is needed
+
+  @param thd_arg         Thread handle
+
+  @details
+  The function checks whether an expression cache is needed for this item
+  and if if so wraps the item into an item of the class
+  Item_exp_cache_wrapper with an appropriate expression cache set up there.
+
+  @note
+  used from Item::transform()
+
+  @return
+  new wrapper item if an expression cache is needed,
+  this item - otherwise
+*/
+
+Item* Item_singlerow_subselect::expr_cache_insert_transformer(uchar *thd_arg)
+{
+  THD *thd= (THD*) thd_arg;
+  DBUG_ENTER("Item_singlerow_subselect::expr_cache_insert_transformer");
+
+  if (expr_cache)
+    DBUG_RETURN(expr_cache);
+
+  if (expr_cache_is_needed(thd) &&
+      (expr_cache= set_expr_cache(thd, depends_on)))
+    DBUG_RETURN(expr_cache);
+  DBUG_RETURN(this);
+}
+
+
 uint Item_singlerow_subselect::cols()
 {
   return engine->cols();
@@ -693,8 +1025,9 @@ bool Item_in_subselect::test_limit(st_select_lex_unit *unit_arg)
 
 Item_in_subselect::Item_in_subselect(Item * left_exp,
 				     st_select_lex *select_lex):
-  Item_exists_subselect(), optimizer(0), transformed(0),
-  pushed_cond_guards(NULL), upper_item(0)
+  Item_exists_subselect(), left_expr_cache(0), first_execution(TRUE),
+  is_constant(FALSE), optimizer(0), pushed_cond_guards(NULL),
+  exec_method(NOT_TRANSFORMED), upper_item(0)
 {
   DBUG_ENTER("Item_in_subselect::Item_in_subselect");
   left_expr= left_exp;
@@ -708,13 +1041,18 @@ Item_in_subselect::Item_in_subselect(Item * left_exp,
   DBUG_VOID_RETURN;
 }
 
+int Item_in_subselect::get_identifier()
+{
+  return engine->get_identifier();
+}
+
 Item_allany_subselect::Item_allany_subselect(Item * left_exp,
                                              chooser_compare_func_creator fc,
 					     st_select_lex *select_lex,
 					     bool all_arg)
   :Item_in_subselect(), func_creator(fc), all(all_arg)
 {
-  DBUG_ENTER("Item_in_subselect::Item_in_subselect");
+  DBUG_ENTER("Item_allany_subselect::Item_allany_subselect");
   left_expr= left_exp;
   func= func_creator(all_arg);
   init(select_lex, new select_exists_subselect(this));
@@ -736,6 +1074,40 @@ void Item_exists_subselect::fix_length_and_dec()
   unit->global_parameters->select_limit= new Item_int((int32) 1);
 }
 
+
+/**
+  Add an expression cache for this subquery if it is needed
+
+  @param thd_arg         Thread handle
+
+  @details
+  The function checks whether an expression cache is needed for this item
+  and if if so wraps the item into an item of the class
+  Item_exp_cache_wrapper with an appropriate expression cache set up there.
+
+  @note
+  used from Item::transform()
+
+  @return
+  new wrapper item if an expression cache is needed,
+  this item - otherwise
+*/
+
+Item* Item_exists_subselect::expr_cache_insert_transformer(uchar *thd_arg)
+{
+  THD *thd= (THD*) thd_arg;
+  DBUG_ENTER("Item_exists_subselect::expr_cache_insert_transformer");
+
+  if (expr_cache)
+    DBUG_RETURN(expr_cache);
+
+  if (substype() == EXISTS_SUBS && expr_cache_is_needed(thd) &&
+      (expr_cache= set_expr_cache(thd, depends_on)))
+    DBUG_RETURN(expr_cache);
+  DBUG_RETURN(this);
+}
+
+
 double Item_exists_subselect::val_real()
 {
   DBUG_ASSERT(fixed == 1);
@@ -885,6 +1257,8 @@ bool Item_in_subselect::val_bool()
 {
   DBUG_ASSERT(fixed == 1);
   null_value= was_null= FALSE;
+  if (is_constant)
+    return value;
   if (exec())
   {
     reset();
@@ -960,10 +1334,10 @@ my_decimal *Item_in_subselect::val_decimal(my_decimal *decimal_value)
           HAVING trigcond(<is_not_null_test>(ie))
 
   RETURN
-    RES_OK     - OK, either subquery was transformed, or appopriate
-                 predicates where injected into it.
-    RES_REDUCE - The subquery was reduced to non-subquery
-    RES_ERROR  - Error
+    RES_OK     Either subquery was transformed, or appopriate
+                       predicates where injected into it.
+    RES_REDUCE The subquery was reduced to non-subquery
+    RES_ERROR  Error
 */
 
 Item_subselect::trans_res
@@ -977,6 +1351,7 @@ Item_in_subselect::single_value_transformer(JOIN *join,
     Check that the right part of the subselect contains no more than one
     column. E.g. in SELECT 1 IN (SELECT * ..) the right part is (SELECT * ...)
   */
+  // psergey: duplicated_subselect_card_check
   if (select_lex->item_list.elements > 1)
   {
     my_error(ER_OPERAND_COLUMNS, MYF(0), 1);
@@ -1071,9 +1446,9 @@ Item_in_subselect::single_value_transformer(JOIN *join,
     SELECT_LEX_UNIT *master_unit= select_lex->master_unit();
     substitution= optimizer;
 
-    SELECT_LEX *current= thd->lex->current_select, *up;
+    SELECT_LEX *current= thd->lex->current_select;
 
-    thd->lex->current_select= up= current->return_after_parsing();
+    thd->lex->current_select= current->return_after_parsing();
     //optimizer never use Item **ref => we can pass 0 as parameter
     if (!optimizer || optimizer->fix_left(thd, 0))
     {
@@ -1082,6 +1457,9 @@ Item_in_subselect::single_value_transformer(JOIN *join,
     }
     thd->lex->current_select= current;
 
+    /* We will refer to upper level cache array => we have to save it for SP */
+    optimizer->keep_top_level_cache();
+
     /*
       As far as  Item_ref_in_optimizer do not substitute itself on fix_fields
       we can use same item for all selects.
@@ -1092,7 +1470,9 @@ Item_in_subselect::single_value_transformer(JOIN *join,
 			      (char *)in_left_expr_name);
 
     master_unit->uncacheable|= UNCACHEABLE_DEPENDENT;
+    //psergey: placed then removed: select_lex->uncacheable|= UNCACHEABLE_DEPENDENT;
   }
+
   if (!abort_on_null && left_expr->maybe_null && !pushed_cond_guards)
   {
     if (!(pushed_cond_guards= (bool*)join->thd->alloc(sizeof(bool))))
@@ -1100,6 +1480,60 @@ Item_in_subselect::single_value_transformer(JOIN *join,
     pushed_cond_guards[0]= TRUE;
   }
 
+  /*
+    If this IN predicate can be computed via materialization, do not
+    perform the IN -> EXISTS transformation.
+  */
+  if (exec_method == MATERIALIZATION)
+    DBUG_RETURN(RES_OK);
+
+  /* Perform the IN=>EXISTS transformation. */
+  DBUG_RETURN(single_value_in_to_exists_transformer(join, func));
+}
+
+
+/**
+  Transofrm an IN predicate into EXISTS via predicate injection.
+
+  @details The transformation injects additional predicates into the subquery
+  (and makes the subquery correlated) as follows.
+
+  - If the subquery has aggregates, GROUP BY, or HAVING, convert to
+
+    SELECT ie FROM ...  HAVING subq_having AND 
+                               trigcond(oe $cmp$ ref_or_null_helper<ie>)
+                                   
+    the addition is wrapped into trigger only when we want to distinguish
+    between NULL and FALSE results.
+
+  - Otherwise (no aggregates/GROUP BY/HAVING) convert it to one of the
+    following:
+
+    = If we don't need to distinguish between NULL and FALSE subquery:
+        
+      SELECT 1 FROM ... WHERE (oe $cmp$ ie) AND subq_where
+
+    = If we need to distinguish between those:
+
+      SELECT 1 FROM ...
+        WHERE  subq_where AND trigcond((oe $cmp$ ie) OR (ie IS NULL))
+        HAVING trigcond(<is_not_null_test>(ie))
+
+    @param join  Join object of the subquery (i.e. 'child' join).
+    @param func  Subquery comparison creator
+
+    @retval RES_OK     Either subquery was transformed, or appopriate
+                       predicates where injected into it.
+    @retval RES_REDUCE The subquery was reduced to non-subquery
+    @retval RES_ERROR  Error
+*/
+
+Item_subselect::trans_res
+Item_in_subselect::single_value_in_to_exists_transformer(JOIN * join, Comp_creator *func)
+{
+  SELECT_LEX *select_lex= join->select_lex;
+  DBUG_ENTER("Item_in_subselect::single_value_in_to_exists_transformer");
+
   select_lex->uncacheable|= UNCACHEABLE_DEPENDENT;
   if (join->having || select_lex->with_sum_func ||
       select_lex->group_list.elements)
@@ -1279,27 +1713,29 @@ Item_subselect::trans_res
 Item_in_subselect::row_value_transformer(JOIN *join)
 {
   SELECT_LEX *select_lex= join->select_lex;
-  Item *having_item= 0;
   uint cols_num= left_expr->cols();
-  bool is_having_used= (join->having || select_lex->with_sum_func ||
-                        select_lex->group_list.first ||
-                        !select_lex->table_list.elements);
+
   DBUG_ENTER("Item_in_subselect::row_value_transformer");
 
-  if (select_lex->item_list.elements != left_expr->cols())
+  // psergey: duplicated_subselect_card_check
+  if (select_lex->item_list.elements != cols_num)
   {
-    my_error(ER_OPERAND_COLUMNS, MYF(0), left_expr->cols());
+    my_error(ER_OPERAND_COLUMNS, MYF(0), cols_num);
     DBUG_RETURN(RES_ERROR);
   }
 
+  /*
+    Wrap the current IN predicate in an Item_in_optimizer. The actual
+    substitution in the Item tree takes place in Item_subselect::fix_fields.
+  */
   if (!substitution)
   {
     //first call for this unit
     SELECT_LEX_UNIT *master_unit= select_lex->master_unit();
     substitution= optimizer;
 
-    SELECT_LEX *current= thd->lex->current_select, *up;
-    thd->lex->current_select= up= current->return_after_parsing();
+    SELECT_LEX *current= thd->lex->current_select;
+    thd->lex->current_select= current->return_after_parsing();
     //optimizer never use Item **ref => we can pass 0 as parameter
     if (!optimizer || optimizer->fix_left(thd, 0))
     {
@@ -1323,6 +1759,48 @@ Item_in_subselect::row_value_transformer(JOIN *join)
     }
   }
 
+  /*
+    If this IN predicate can be computed via materialization, do not
+    perform the IN -> EXISTS transformation.
+  */
+  if (exec_method == MATERIALIZATION)
+    DBUG_RETURN(RES_OK);
+
+  /* Perform the IN=>EXISTS transformation. */
+  DBUG_RETURN(row_value_in_to_exists_transformer(join));
+}
+
+
+/**
+  Tranform a (possibly non-correlated) IN subquery into a correlated EXISTS.
+
+  @todo
+  The IF-ELSE below can be refactored so that there is no duplication of the
+  statements that create the new conditions. For this we have to invert the IF
+  and the FOR statements as this:
+  for (each left operand)
+    create the equi-join condition
+    if (is_having_used || !abort_on_null)
+      create the "is null" and is_not_null_test items
+    if (is_having_used)
+      add the equi-join and the null tests to HAVING
+    else
+      add the equi-join and the "is null" to WHERE
+      add the is_not_null_test to HAVING
+*/
+
+Item_subselect::trans_res
+Item_in_subselect::row_value_in_to_exists_transformer(JOIN * join)
+{
+  SELECT_LEX *select_lex= join->select_lex;
+  Item *having_item= 0;
+  uint cols_num= left_expr->cols();
+  bool is_having_used= (join->having || select_lex->with_sum_func ||
+                        select_lex->group_list.first ||
+                        !select_lex->table_list.elements);
+
+  DBUG_ENTER("Item_in_subselect::row_value_in_to_exists_transformer");
+
   select_lex->uncacheable|= UNCACHEABLE_DEPENDENT;
   if (is_having_used)
   {
@@ -1545,7 +2023,7 @@ Item_subselect::trans_res
 Item_in_subselect::select_in_like_transformer(JOIN *join, Comp_creator *func)
 {
   Query_arena *arena, backup;
-  SELECT_LEX *current= thd->lex->current_select, *up;
+  SELECT_LEX *current= thd->lex->current_select;
   const char *save_where= thd->where;
   Item_subselect::trans_res res= RES_ERROR;
   bool result;
@@ -1566,9 +2044,7 @@ Item_in_subselect::select_in_like_transformer(JOIN *join, Comp_creator *func)
   }
 
   if (changed)
-  {
     DBUG_RETURN(RES_OK);
-  }
 
   thd->where= "IN/ALL/ANY subquery";
 
@@ -1576,6 +2052,8 @@ Item_in_subselect::select_in_like_transformer(JOIN *join, Comp_creator *func)
     In some optimisation cases we will not need this Item_in_optimizer
     object, but we can't know it here, but here we need address correct
     reference on left expresion.
+
+    //psergey: he means degenerate cases like "... IN (SELECT 1)"
   */
   if (!optimizer)
   {
@@ -1587,7 +2065,7 @@ Item_in_subselect::select_in_like_transformer(JOIN *join, Comp_creator *func)
       goto err;
   }
 
-  thd->lex->current_select= up= current->return_after_parsing();
+  thd->lex->current_select= current->return_after_parsing();
   result= (!left_expr->fixed &&
            left_expr->fix_fields(thd, optimizer->arguments()));
   /* fix_fields can change reference to left_expr, we need reassign it */
@@ -1597,7 +2075,12 @@ Item_in_subselect::select_in_like_transformer(JOIN *join, Comp_creator *func)
   if (result)
     goto err;
 
-  transformed= 1;
+  /*
+    If we didn't choose an execution method up to this point, we choose
+    the IN=>EXISTS transformation.
+  */
+  if (exec_method == NOT_TRANSFORMED)
+    exec_method= IN_TO_EXISTS;
   arena= thd->activate_stmt_arena_if_needed(&backup);
 
   /*
@@ -1631,7 +2114,7 @@ err:
 
 void Item_in_subselect::print(String *str, enum_query_type query_type)
 {
-  if (transformed)
+  if (exec_method == IN_TO_EXISTS)
     str->append(STRING_WITH_LEN("<exists>"));
   else
   {
@@ -1644,28 +2127,250 @@ void Item_in_subselect::print(String *str, enum_query_type query_type)
 
 bool Item_in_subselect::fix_fields(THD *thd_arg, Item **ref)
 {
-  bool result = 0;
-  
-  if (thd_arg->lex->view_prepare_mode && left_expr && !left_expr->fixed)
-    result = left_expr->fix_fields(thd_arg, &left_expr);
+  uint outer_cols_num;
+  List<Item> *inner_cols;
+
+  if (exec_method == SEMI_JOIN)
+    return !( (*ref)= new Item_int(1));
+
+  /*
+    Check if the outer and inner IN operands match in those cases when we
+    will not perform IN=>EXISTS transformation. Currently this is when we
+    use subquery materialization.
+
+    The condition below is true when this method was called recursively from
+    inside JOIN::prepare for the JOIN object created by the call chain
+    Item_subselect::fix_fields -> subselect_single_select_engine::prepare,
+    which creates a JOIN object for the subquery and calls JOIN::prepare for
+    the JOIN of the subquery.
+    Notice that in some cases, this doesn't happen, and the check_cols()
+    test for each Item happens later in
+    Item_in_subselect::row_value_in_to_exists_transformer.
+    The reason for this mess is that our JOIN::prepare phase works top-down
+    instead of bottom-up, so we first do name resoluton and semantic checks
+    for the outer selects, then for the inner.
+  */
+  if (engine &&
+      engine->engine_type() == subselect_engine::SINGLE_SELECT_ENGINE &&
+      ((subselect_single_select_engine*)engine)->join)
+  {
+    outer_cols_num= left_expr->cols();
+
+    if (unit->is_union())
+      inner_cols= &(unit->types);
+    else
+      inner_cols= &(unit->first_select()->item_list);
+    if (outer_cols_num != inner_cols->elements)
+    {
+      my_error(ER_OPERAND_COLUMNS, MYF(0), outer_cols_num);
+      return TRUE;
+    }
+    if (outer_cols_num > 1)
+    {
+      List_iterator<Item> inner_col_it(*inner_cols);
+      Item *inner_col;
+      for (uint i= 0; i < outer_cols_num; i++)
+      {
+        inner_col= inner_col_it++;
+        if (inner_col->check_cols(left_expr->element_index(i)->cols()))
+          return TRUE;
+      }
+    }
+  }
+
+  if (thd_arg->lex->view_prepare_mode && left_expr && !left_expr->fixed &&
+      left_expr->fix_fields(thd_arg, &left_expr))
+    return TRUE;
+  if (Item_subselect::fix_fields(thd_arg, ref))
+    return TRUE;
+
+  fixed= TRUE;
+
+  return FALSE;
+}
+
+
+void Item_in_subselect::fix_after_pullout(st_select_lex *new_parent, Item **ref)
+{
+  left_expr->fix_after_pullout(new_parent, &left_expr);
+  Item_subselect::fix_after_pullout(new_parent, ref);
+}
+
+void Item_in_subselect::update_used_tables()
+{
+  Item_subselect::update_used_tables();
+  left_expr->update_used_tables();
+  used_tables_cache |= left_expr->used_tables();
+}
+
+/**
+  Try to create an engine to compute the subselect via materialization,
+  and if this fails, revert to execution via the IN=>EXISTS transformation.
+
+  @details
+    The purpose of this method is to hide the implementation details
+    of this Item's execution. The method creates a new engine for
+    materialized execution, and initializes the engine.
+
+    If this initialization fails
+    - either because it wasn't possible to create the needed temporary table
+      and its index,
+    - or because of a memory allocation error,
+    then we revert back to execution via the IN=>EXISTS tranformation.
+
+    The initialization of the new engine is divided in two parts - a permanent
+    one that lives across prepared statements, and one that is repeated for each
+    execution.
+
+  @returns
+    @retval TRUE  memory allocation error occurred
+    @retval FALSE an execution method was chosen successfully
+*/
+
+bool Item_in_subselect::setup_engine()
+{
+  subselect_hash_sj_engine *new_engine= NULL;
+  bool res= FALSE;
+
+  DBUG_ENTER("Item_in_subselect::setup_engine");
+
+  if (engine->engine_type() == subselect_engine::SINGLE_SELECT_ENGINE)
+  {
+    /* Create/initialize objects in permanent memory. */
+    subselect_single_select_engine *old_engine;
+    Query_arena *arena= thd->stmt_arena, backup;
+
+    old_engine= (subselect_single_select_engine*) engine;
+
+    if (arena->is_conventional())
+      arena= 0;
+    else
+      thd->set_n_backup_active_arena(arena, &backup);
+
+    if (!(new_engine= new subselect_hash_sj_engine(thd, this,
+                                                   old_engine)) ||
+        new_engine->init_permanent(unit->get_unit_column_types()))
+    {
+      Item_subselect::trans_res trans_res;
+      /*
+        If for some reason we cannot use materialization for this IN predicate,
+        delete all materialization-related objects, and apply the IN=>EXISTS
+        transformation.
+      */
+      delete new_engine;
+      new_engine= NULL;
+      exec_method= NOT_TRANSFORMED;
+      if (left_expr->cols() == 1)
+        trans_res= single_value_in_to_exists_transformer(old_engine->join,
+                                                         &eq_creator);
+      else
+        trans_res= row_value_in_to_exists_transformer(old_engine->join);
+      res= (trans_res != Item_subselect::RES_OK);
+    }
+    if (new_engine)
+      engine= new_engine;
+
+    if (arena)
+      thd->restore_active_arena(arena, &backup);
+  }
+  else
+  {
+    DBUG_ASSERT(engine->engine_type() == subselect_engine::HASH_SJ_ENGINE);
+    new_engine= (subselect_hash_sj_engine*) engine;
+  }
+
+  /* Initilizations done in runtime memory, repeated for each execution. */
+  if (new_engine)
+  {
+    /*
+      Reset the LIMIT 1 set in Item_exists_subselect::fix_length_and_dec.
+      TODO:
+      Currently we set the subquery LIMIT to infinity, and this is correct
+      because we forbid at parse time LIMIT inside IN subqueries (see
+      Item_in_subselect::test_limit). However, once we allow this, here
+      we should set the correct limit if given in the query.
+    */
+    unit->global_parameters->select_limit= NULL;
+    if ((res= new_engine->init_runtime()))
+      DBUG_RETURN(res);
+  }
 
-  return result || Item_subselect::fix_fields(thd_arg, ref);
+  DBUG_RETURN(res);
+}
+
+
+/**
+  Initialize the cache of the left operand of the IN predicate.
+
+  @note This method has the same purpose as alloc_group_fields(),
+  but it takes a different kind of collection of items, and the
+  list we push to is dynamically allocated.
+
+  @retval TRUE  if a memory allocation error occurred or the cache is
+                not applicable to the current query
+  @retval FALSE if success
+*/
+
+bool Item_in_subselect::init_left_expr_cache()
+{
+  JOIN *outer_join;
+
+  outer_join= unit->outer_select()->join;
+  /*
+    An IN predicate might be evaluated in a query for which all tables have
+    been optimzied away.
+  */ 
+  if (!outer_join || !outer_join->tables || !outer_join->tables_list)
+    return TRUE;
+
+  if (!(left_expr_cache= new List<Cached_item>))
+    return TRUE;
+
+  for (uint i= 0; i < left_expr->cols(); i++)
+  {
+    Cached_item *cur_item_cache= new_Cached_item(thd,
+                                                 left_expr->element_index(i),
+                                                 FALSE);
+    if (!cur_item_cache || left_expr_cache->push_front(cur_item_cache))
+      return TRUE;
+  }
+  return FALSE;
+}
+
+
+/*
+  Callback to test if an IN predicate is expensive.
+
+  @details
+    IN predicates are considered expensive only if they will be executed via
+    materialization. The return value affects the behavior of
+    make_cond_for_table() in such a way that it is unchanged when we use
+    the IN=>EXISTS transformation to compute IN.
+
+  @retval TRUE  if the predicate is expensive
+  @retval FALSE otherwise
+*/
+
+bool Item_in_subselect::is_expensive_processor(uchar *arg)
+{
+  return exec_method == MATERIALIZATION;
 }
 
 
 Item_subselect::trans_res
 Item_allany_subselect::select_transformer(JOIN *join)
 {
-  transformed= 1;
+  DBUG_ENTER("Item_allany_subselect::select_transformer");
+  exec_method= IN_TO_EXISTS;
   if (upper_item)
     upper_item->show= 1;
-  return select_in_like_transformer(join, func);
+  DBUG_RETURN(select_in_like_transformer(join, func));
 }
 
 
 void Item_allany_subselect::print(String *str, enum_query_type query_type)
 {
-  if (transformed)
+  if (exec_method == IN_TO_EXISTS)
     str->append(STRING_WITH_LEN("<exists>"));
   else
   {
@@ -1688,20 +2393,23 @@ void subselect_engine::set_thd(THD *thd_arg)
 
 subselect_single_select_engine::
 subselect_single_select_engine(st_select_lex *select,
-			       select_subselect *result_arg,
+			       select_result_interceptor *result_arg,
 			       Item_subselect *item_arg)
   :subselect_engine(item_arg, result_arg),
-   prepared(0), optimized(0), executed(0),
-   select_lex(select), join(0)
+   prepared(0), executed(0), select_lex(select), join(0)
 {
   select_lex->master_unit()->item= item_arg;
 }
 
+int subselect_single_select_engine::get_identifier()
+{
+  return select_lex->select_number; 
+}
 
 void subselect_single_select_engine::cleanup()
 {
   DBUG_ENTER("subselect_single_select_engine::cleanup");
-  prepared= optimized= executed= 0;
+  prepared= executed= 0;
   join= 0;
   result->cleanup();
   DBUG_VOID_RETURN;
@@ -1744,19 +2452,19 @@ bool subselect_union_engine::no_rows()
   return test(!unit->fake_select_lex->join->send_records);
 }
 
+
 void subselect_uniquesubquery_engine::cleanup()
 {
   DBUG_ENTER("subselect_uniquesubquery_engine::cleanup");
-  /*
-    subselect_uniquesubquery_engine have not 'result' assigbed, so we do not
-    cleanup() it
-  */
+  /* Tell handler we don't need the index anymore */
+  if (tab->table->file->inited)
+    tab->table->file->ha_index_end();
   DBUG_VOID_RETURN;
 }
 
 
 subselect_union_engine::subselect_union_engine(st_select_lex_unit *u,
-					       select_subselect *result_arg,
+					       select_result_interceptor *result_arg,
 					       Item_subselect *item_arg)
   :subselect_engine(item_arg, result_arg)
 {
@@ -1765,10 +2473,40 @@ subselect_union_engine::subselect_union_engine(st_select_lex_unit *u,
 }
 
 
+/**
+  Create and prepare the JOIN object that represents the query execution
+  plan for the subquery.
+
+  @details
+  This method is called from Item_subselect::fix_fields. For prepared
+  statements it is called both during the PREPARE and EXECUTE phases in the
+  following ways:
+  - During PREPARE the optimizer needs some properties
+    (join->fields_list.elements) of the JOIN to proceed with preparation of
+    the remaining query (namely to complete ::fix_fields for the subselect
+    related classes. In the end of PREPARE the JOIN is deleted.
+  - When we EXECUTE the query, Item_subselect::fix_fields is called again, and
+    the JOIN object is re-created again, prepared and executed. In the end of
+    execution it is deleted.
+  In all cases the JOIN is created in runtime memory (not in the permanent
+  memory root).
+
+  @todo
+  Re-check what properties of 'join' are needed during prepare, and see if
+  we can avoid creating a JOIN during JOIN::prepare of the outer join.
+
+  @retval 0  if success
+  @retval 1  if error
+*/
+
 int subselect_single_select_engine::prepare()
 {
   if (prepared)
     return 0;
+  if (select_lex->join)
+  {
+    select_lex->cleanup();
+  }
   join= new JOIN(thd, select_lex->item_list,
 		 select_lex->options | SELECT_NO_UNLOCK, result);
   if (!join || !result)
@@ -1799,8 +2537,8 @@ int subselect_union_engine::prepare()
 
 int subselect_uniquesubquery_engine::prepare()
 {
-  //this never should be called
-  DBUG_ASSERT(0);
+  /* Should never be called. */
+  DBUG_ASSERT(FALSE);
   return 1;
 }
 
@@ -1847,7 +2585,7 @@ void subselect_engine::set_row(List<Item> &item_list, Item_cache **row)
     if (!(row[i]= Item_cache::get_cache(sel_item)))
       return;
     row[i]->setup(sel_item);
-    row[i]->store(sel_item);
+ //psergey-backport-timours:   row[i]->store(sel_item);
   }
   if (item_list.elements > 1)
     res_type= ROW_RESULT;
@@ -1896,11 +2634,10 @@ int subselect_single_select_engine::exec()
   char const *save_where= thd->where;
   SELECT_LEX *save_select= thd->lex->current_select;
   thd->lex->current_select= select_lex;
-  if (!optimized)
+  if (!join->optimized)
   {
     SELECT_LEX_UNIT *unit= select_lex->master_unit();
 
-    optimized= 1;
     unit->set_limit(unit->global_parameters);
     if (join->optimize())
     {
@@ -2043,21 +2780,31 @@ int subselect_uniquesubquery_engine::scan_table()
   if (table->file->inited)
     table->file->ha_index_end();
  
-  table->file->ha_rnd_init(1);
+  if (table->file->ha_rnd_init_with_error(1))
+    DBUG_RETURN(1);
   table->file->extra_opt(HA_EXTRA_CACHE,
                          current_thd->variables.read_buff_size);
   table->null_row= 0;
   for (;;)
   {
-    error=table->file->rnd_next(table->record[0]);
-    if (error && error != HA_ERR_END_OF_FILE)
-    {
-      error= report_error(table, error);
-      break;
+    error=table->file->ha_rnd_next(table->record[0]);
+    if (error) {
+      if (error == HA_ERR_RECORD_DELETED)
+      {
+        error= 0;
+        continue;
+      }
+      if (error == HA_ERR_END_OF_FILE)
+      {
+        error= 0;
+        break;
+      }
+      else
+      {
+        error= report_error(table, error);
+        break;
+      }
     }
-    /* No more rows */
-    if (table->status)
-      break;
 
     if (!cond || cond->val_int())
     {
@@ -2168,6 +2915,56 @@ bool subselect_uniquesubquery_engine::copy_ref_key()
 
 
 /*
+  @retval  1  A NULL was found in the outer reference, index lookup is
+              not applicable, the outer ref is unsusable as a lookup key,
+              use some other method to find a match.
+  @retval  0  The outer ref was copied into an index lookup key.
+  @retval -1  The outer ref cannot possibly match any row, IN is FALSE.
+*/
+/* TIMOUR: this method is a variant of copy_ref_key(), needs refactoring. */
+
+int subselect_uniquesubquery_engine::copy_ref_key_simple()
+{
+  for (store_key **copy= tab->ref.key_copy ; *copy ; copy++)
+  {
+    enum store_key::store_key_result store_res;
+    store_res= (*copy)->copy();
+    tab->ref.key_err= store_res;
+
+    /*
+      When there is a NULL part in the key we don't need to make index
+      lookup for such key thus we don't need to copy whole key.
+      If we later should do a sequential scan return OK. Fail otherwise.
+
+      See also the comment for the subselect_uniquesubquery_engine::exec()
+      function.
+    */
+    null_keypart= (*copy)->null_key;
+    if (null_keypart)
+      return 1;
+
+    /*
+      Check if the error is equal to STORE_KEY_FATAL. This is not expressed 
+      using the store_key::store_key_result enum because ref.key_err is a 
+      boolean and we want to detect both TRUE and STORE_KEY_FATAL from the 
+      space of the union of the values of [TRUE, FALSE] and 
+      store_key::store_key_result.  
+      TODO: fix the variable an return types.
+    */
+    if (store_res == store_key::STORE_KEY_FATAL)
+    {
+      /*
+       Error converting the left IN operand to the column type of the right
+       IN operand. 
+      */
+      return -1;
+    }
+  }
+  return 0;
+}
+
+
+/*
   Execute subselect
 
   SYNOPSIS
@@ -2207,7 +3004,13 @@ int subselect_uniquesubquery_engine::exec()
  
   /* TODO: change to use of 'full_scan' here? */
   if (copy_ref_key())
+  {
+    /*
+      TIMOUR: copy_ref_key() == 1 means NULL result, not error, why return 1?
+      Check who reiles on this result.
+    */
     DBUG_RETURN(1);
+  }
   if (table->status)
   {
     /* 
@@ -2223,10 +3026,11 @@ int subselect_uniquesubquery_engine::exec()
  
   if (!table->file->inited)
     table->file->ha_index_init(tab->ref.key, 0);
-  error= table->file->index_read_map(table->record[0],
-                                     tab->ref.key_buff,
-                                     make_prev_keypart_map(tab->ref.key_parts),
-                                     HA_READ_KEY_EXACT);
+  error= table->file->ha_index_read_map(table->record[0],
+                                        tab->ref.key_buff,
+                                        make_prev_keypart_map(tab->
+                                                              ref.key_parts),
+                                        HA_READ_KEY_EXACT);
   if (error &&
       error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
     error= report_error(table, error);
@@ -2247,10 +3051,51 @@ int subselect_uniquesubquery_engine::exec()
 }
 
 
+/*
+  TIMOUR: write comment
+*/
+
+int subselect_uniquesubquery_engine::index_lookup()
+{
+  DBUG_ENTER("subselect_uniquesubquery_engine::index_lookup");
+  int error;
+  TABLE *table= tab->table;
+ 
+  if (!table->file->inited)
+    table->file->ha_index_init(tab->ref.key, 0);
+  error= table->file->ha_index_read_map(table->record[0],
+                                        tab->ref.key_buff,
+                                        make_prev_keypart_map(tab->
+                                                              ref.key_parts),
+                                        HA_READ_KEY_EXACT);
+  DBUG_PRINT("info", ("lookup result: %i", error));
+
+  if (error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
+  {
+    /*
+      TIMOUR: I don't understand at all when do we need to call report_error.
+      In most places where we access an index, we don't do this. Why here?
+    */
+    error= report_error(table, error);
+    DBUG_RETURN(error);
+  }
+
+  table->null_row= 0;
+  if (!error && (!cond || cond->val_int()))
+    ((Item_in_subselect *) item)->value= 1;
+  else
+    ((Item_in_subselect *) item)->value= 0;
+
+  DBUG_RETURN(0);
+}
+
+
+
 subselect_uniquesubquery_engine::~subselect_uniquesubquery_engine()
 {
   /* Tell handler we don't need the index anymore */
-  tab->table->file->ha_index_end();
+  //psergey-merge-todo: the following was gone in 6.0:
+ //psergey-merge: don't need this after all: tab->table->file->ha_index_end();
 }
 
 
@@ -2258,7 +3103,7 @@ subselect_uniquesubquery_engine::~subselect_uniquesubquery_engine()
   Index-lookup subselect 'engine' - run the subquery
 
   SYNOPSIS
-    subselect_uniquesubquery_engine:exec()
+    subselect_indexsubquery_engine:exec()
       full_scan 
 
   DESCRIPTION
@@ -2344,10 +3189,11 @@ int subselect_indexsubquery_engine::exec()
 
   if (!table->file->inited)
     table->file->ha_index_init(tab->ref.key, 1);
-  error= table->file->index_read_map(table->record[0],
-                                     tab->ref.key_buff,
-                                     make_prev_keypart_map(tab->ref.key_parts),
-                                     HA_READ_KEY_EXACT);
+  error= table->file->ha_index_read_map(table->record[0],
+                                        tab->ref.key_buff,
+                                        make_prev_keypart_map(tab->
+                                                              ref.key_parts),
+                                        HA_READ_KEY_EXACT);
   if (error &&
       error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
     error= report_error(table, error);
@@ -2368,9 +3214,9 @@ int subselect_indexsubquery_engine::exec()
             ((Item_in_subselect *) item)->value= 1;
           break;
         }
-        error= table->file->index_next_same(table->record[0],
-                                            tab->ref.key_buff,
-                                            tab->ref.key_length);
+        error= table->file->ha_index_next_same(table->record[0],
+                                               tab->ref.key_buff,
+                                               tab->ref.key_length);
         if (error && error != HA_ERR_END_OF_FILE)
         {
           error= report_error(table, error);
@@ -2395,8 +3241,10 @@ int subselect_indexsubquery_engine::exec()
 
 uint subselect_single_select_engine::cols()
 {
-  DBUG_ASSERT(select_lex->join != 0); // should be called after fix_fields()
-  return select_lex->join->fields_list.elements;
+  //psergey-sj-backport: the following assert was gone in 6.0:
+  //DBUG_ASSERT(select_lex->join != 0); // should be called after fix_fields()
+  //return select_lex->join->fields_list.elements;
+  return select_lex->item_list.elements;
 }
 
 
@@ -2478,10 +3326,20 @@ void subselect_union_engine::print(String *str, enum_query_type query_type)
 void subselect_uniquesubquery_engine::print(String *str,
                                             enum_query_type query_type)
 {
+  char *table_name= tab->table->s->table_name.str;
   str->append(STRING_WITH_LEN("<primary_index_lookup>("));
   tab->ref.items[0]->print(str, query_type);
   str->append(STRING_WITH_LEN(" in "));
-  str->append(tab->table->s->table_name.str, tab->table->s->table_name.length);
+  if (tab->table->s->table_category == TABLE_CATEGORY_TEMPORARY)
+  {
+    /*
+      Temporary tables' names change across runs, so they can't be used for
+      EXPLAIN EXTENDED.
+    */
+    str->append(STRING_WITH_LEN("<temporary table>"));
+  }
+  else
+    str->append(table_name, tab->table->s->table_name.length);
   KEY *key_info= tab->table->key_info+ tab->ref.key;
   str->append(STRING_WITH_LEN(" on "));
   str->append(key_info->name);
@@ -2493,6 +3351,29 @@ void subselect_uniquesubquery_engine::print(String *str,
   str->append(')');
 }
 
+/*
+TODO:
+The above ::print method should be changed as below. Do it after
+all other tests pass.
+
+void subselect_uniquesubquery_engine::print(String *str)
+{
+  KEY *key_info= tab->table->key_info + tab->ref.key;
+  str->append(STRING_WITH_LEN("<primary_index_lookup>("));
+  for (uint i= 0; i < key_info->key_parts; i++)
+    tab->ref.items[i]->print(str);
+  str->append(STRING_WITH_LEN(" in "));
+  str->append(tab->table->s->table_name.str, tab->table->s->table_name.length);
+  str->append(STRING_WITH_LEN(" on "));
+  str->append(key_info->name);
+  if (cond)
+  {
+    str->append(STRING_WITH_LEN(" where "));
+    cond->print(str);
+  }
+  str->append(')');
+}
+*/
 
 void subselect_indexsubquery_engine::print(String *str,
                                            enum_query_type query_type)
@@ -2532,7 +3413,7 @@ void subselect_indexsubquery_engine::print(String *str,
 */
 
 bool subselect_single_select_engine::change_result(Item_subselect *si,
-                                                 select_subselect *res)
+                                                 select_result_interceptor *res)
 {
   item= si;
   result= res;
@@ -2553,7 +3434,7 @@ bool subselect_single_select_engine::change_result(Item_subselect *si,
 */
 
 bool subselect_union_engine::change_result(Item_subselect *si,
-                                         select_subselect *res)
+                                           select_result_interceptor *res)
 {
   item= si;
   int rc= unit->change_result(res, result);
@@ -2575,7 +3456,7 @@ bool subselect_union_engine::change_result(Item_subselect *si,
 */
 
 bool subselect_uniquesubquery_engine::change_result(Item_subselect *si,
-                                                  select_subselect *res)
+                                                    select_result_interceptor *res)
 {
   DBUG_ASSERT(0);
   return TRUE;
@@ -2643,5 +3524,1721 @@ bool subselect_union_engine::no_tables()
 bool subselect_uniquesubquery_engine::no_tables()
 {
   /* returning value is correct, but this method should never be called */
+  DBUG_ASSERT(FALSE);
+  return 0;
+}
+
+
+/******************************************************************************
+  WL#1110 - Implementation of class subselect_hash_sj_engine
+******************************************************************************/
+
+
+/**
+  Check if an IN predicate should be executed via partial matching using
+  only schema information.
+
+  @details
+  This test essentially has three results:
+  - partial matching is applicable, but cannot be executed due to a
+    limitation in the total number of indexes, as a result we can't
+    use subquery materialization at all.
+  - partial matching is either applicable or not, and this can be
+    determined by looking at 'this->max_keys'.
+  If max_keys > 1, then we need partial matching because there are
+  more indexes than just the one we use during materialization to
+  remove duplicates.
+
+  @note
+  TIMOUR: The schema-based analysis for partial matching can be done once for
+  prepared statement and remembered. It is done here to remove the need to
+  save/restore all related variables between each re-execution, thus making
+  the code simpler.
+
+  @retval PARTIAL_MATCH  if a partial match should be used
+  @retval COMPLETE_MATCH if a complete match (index lookup) should be used
+*/
+
+subselect_hash_sj_engine::exec_strategy
+subselect_hash_sj_engine::get_strategy_using_schema()
+{
+  Item_in_subselect *item_in= (Item_in_subselect *) item;
+
+  if (item_in->is_top_level_item())
+    return COMPLETE_MATCH;
+  else
+  {
+    List_iterator<Item> inner_col_it(*item_in->unit->get_unit_column_types());
+    Item *outer_col, *inner_col;
+
+    for (uint i= 0; i < item_in->left_expr->cols(); i++)
+    {
+      outer_col= item_in->left_expr->element_index(i);
+      inner_col= inner_col_it++;
+
+      if (!inner_col->maybe_null && !outer_col->maybe_null)
+        bitmap_set_bit(&non_null_key_parts, i);
+      else
+      {
+        bitmap_set_bit(&partial_match_key_parts, i);
+        ++count_partial_match_columns;
+      }
+    }
+  }
+
+  /* If no column contains NULLs use regular hash index lookups. */
+  if (count_partial_match_columns)
+    return PARTIAL_MATCH;
+  return COMPLETE_MATCH;
+}
+
+
+/**
+  Test whether an IN predicate must be computed via partial matching
+  based on the NULL statistics for each column of a materialized subquery.
+
+  @details The procedure analyzes column NULL statistics, updates the
+  matching type of columns that cannot be NULL or that contain only NULLs.
+  Based on this, the procedure determines the final execution strategy for
+  the [NOT] IN predicate.
+
+  @retval PARTIAL_MATCH  if a partial match should be used
+  @retval COMPLETE_MATCH if a complete match (index lookup) should be used
+*/
+
+subselect_hash_sj_engine::exec_strategy
+subselect_hash_sj_engine::get_strategy_using_data()
+{
+  Item_in_subselect *item_in= (Item_in_subselect *) item;
+  select_materialize_with_stats *result_sink=
+    (select_materialize_with_stats *) result;
+  Item *outer_col;
+
+  /*
+    If we already determined that a complete match is enough based on schema
+    information, nothing can be better.
+  */
+  if (strategy == COMPLETE_MATCH)
+    return COMPLETE_MATCH;
+
+  for (uint i= 0; i < item_in->left_expr->cols(); i++)
+  {
+    if (!bitmap_is_set(&partial_match_key_parts, i))
+      continue;
+    outer_col= item_in->left_expr->element_index(i);
+    /*
+      If column 'i' doesn't contain NULLs, and the corresponding outer reference
+      cannot have a NULL value, then 'i' is a non-nullable column.
+    */
+    if (result_sink->get_null_count_of_col(i) == 0 && !outer_col->maybe_null)
+    {
+      bitmap_clear_bit(&partial_match_key_parts, i);
+      bitmap_set_bit(&non_null_key_parts, i);
+      --count_partial_match_columns;
+    }
+    if (result_sink->get_null_count_of_col(i) ==
+               tmp_table->file->stats.records)
+      ++count_null_only_columns;
+  }
+
+  /* If no column contains NULLs use regular hash index lookups. */
+  if (!count_partial_match_columns)
+    return COMPLETE_MATCH;
+  return PARTIAL_MATCH;
+}
+
+
+void
+subselect_hash_sj_engine::choose_partial_match_strategy(
+  bool has_non_null_key, bool has_covering_null_row,
+  MY_BITMAP *partial_match_key_parts)
+{
+  size_t pm_buff_size;
+
+  DBUG_ASSERT(strategy == PARTIAL_MATCH);
+  /*
+    Choose according to global optimizer switch. If only one of the switches is
+    'ON', then the remaining strategy is the only possible one. The only cases
+    when this will be overriden is when the total size of all buffers for the
+    merge strategy is bigger than the 'rowid_merge_buff_size' system variable,
+    or if there isn't enough physical memory to allocate the buffers.
+  */
+  if (!optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) &&
+       optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN))
+    strategy= PARTIAL_MATCH_SCAN;
+  else if
+     ( optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) &&
+      !optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN))
+    strategy= PARTIAL_MATCH_MERGE;
+
+  /*
+    If both switches are ON, or both are OFF, we interpret that as "let the
+    optimizer decide". Perform a cost based choice between the two partial
+    matching strategies.
+  */
+  /*
+    TIMOUR: the above interpretation of the switch values could be changed to:
+    - if both are ON - let the optimizer decide,
+    - if both are OFF - do not use partial matching, therefore do not use
+      materialization in non-top-level predicates.
+    The problem with this is that we know for sure if we need partial matching
+    only after the subquery is materialized, and this is too late to revert to
+    the IN=>EXISTS strategy.
+  */
+  if (strategy == PARTIAL_MATCH)
+  {
+    /*
+      TIMOUR: Currently we use a super simplistic measure. This will be
+      addressed in a separate task.
+    */
+    if (tmp_table->file->stats.records < 100)
+      strategy= PARTIAL_MATCH_SCAN;
+    else
+      strategy= PARTIAL_MATCH_MERGE;
+  }
+
+  /* Check if there is enough memory for the rowid merge strategy. */
+  if (strategy == PARTIAL_MATCH_MERGE)
+  {
+    pm_buff_size= rowid_merge_buff_size(has_non_null_key,
+                                        has_covering_null_row,
+                                        partial_match_key_parts);
+    if (pm_buff_size > thd->variables.rowid_merge_buff_size)
+      strategy= PARTIAL_MATCH_SCAN;
+  }
+}
+
+
+/*
+  Compute the memory size of all buffers proportional to the number of rows
+  in tmp_table.
+
+  @details
+  If the result is bigger than thd->variables.rowid_merge_buff_size, partial
+  matching via merging is not applicable.
+*/
+
+size_t subselect_hash_sj_engine::rowid_merge_buff_size(
+  bool has_non_null_key, bool has_covering_null_row,
+  MY_BITMAP *partial_match_key_parts)
+{
+  size_t buff_size; /* Total size of all buffers used by partial matching. */
+  ha_rows row_count= tmp_table->file->stats.records;
+  uint rowid_length= tmp_table->file->ref_length;
+  select_materialize_with_stats *result_sink=
+    (select_materialize_with_stats *) result;
+
+  /* Size of the subselect_rowid_merge_engine::row_num_to_rowid buffer. */
+  buff_size= row_count * rowid_length * sizeof(uchar);
+
+  if (has_non_null_key)
+  {
+    /* Add the size of Ordered_key::key_buff of the only non-NULL key. */
+    buff_size+= row_count * sizeof(rownum_t);
+  }
+
+  if (!has_covering_null_row)
+  {
+    for (uint i= 0; i < partial_match_key_parts->n_bits; i++)
+    {
+      if (!bitmap_is_set(partial_match_key_parts, i) ||
+          result_sink->get_null_count_of_col(i) == row_count)
+        continue; /* In these cases we wouldn't construct Ordered keys. */
+
+      /* Add the size of Ordered_key::key_buff */
+      buff_size+= (row_count - result_sink->get_null_count_of_col(i)) *
+                         sizeof(rownum_t);
+      /* Add the size of Ordered_key::null_key */
+      buff_size+= bitmap_buffer_size(result_sink->get_max_null_of_col(i));
+    }
+  }
+
+  return buff_size;
+}
+
+
+/*
+  Initialize a MY_BITMAP with a buffer allocated on the current
+  memory root.
+  TIMOUR: move to bitmap C file?
+*/
+
+static my_bool
+bitmap_init_memroot(MY_BITMAP *map, uint n_bits, MEM_ROOT *mem_root)
+{
+  my_bitmap_map *bitmap_buf;
+
+  if (!(bitmap_buf= (my_bitmap_map*) alloc_root(mem_root,
+                                                bitmap_buffer_size(n_bits))) ||
+      bitmap_init(map, bitmap_buf, n_bits, FALSE))
+    return TRUE;
+  bitmap_clear_all(map);
+  return FALSE;
+}
+
+
+/**
+  Create all structures needed for IN execution that can live between PS
+  reexecution.
+
+  @param tmp_columns the items that produce the data for the temp table
+
+  @details
+  - Create a temporary table to store the result of the IN subquery. The
+    temporary table has one hash index on all its columns.
+  - Create a new result sink that sends the result stream of the subquery to
+    the temporary table,
+
+  @notice:
+    Currently Item_subselect::init() already chooses and creates at parse
+    time an engine with a corresponding JOIN to execute the subquery.
+
+  @retval TRUE  if error
+  @retval FALSE otherwise
+*/
+
+bool subselect_hash_sj_engine::init_permanent(List<Item> *tmp_columns)
+{
+  /* Options to create_tmp_table. */
+  ulonglong tmp_create_options= thd->options | TMP_TABLE_ALL_COLUMNS;
+                             /* | TMP_TABLE_FORCE_MYISAM; TIMOUR: force MYISAM */
+
+  DBUG_ENTER("subselect_hash_sj_engine::init_permanent");
+
+  if (bitmap_init_memroot(&non_null_key_parts, tmp_columns->elements,
+                            thd->mem_root) ||
+      bitmap_init_memroot(&partial_match_key_parts, tmp_columns->elements,
+                            thd->mem_root))
+    DBUG_RETURN(TRUE);
+
+  /*
+    Create and initialize a select result interceptor that stores the
+    result stream in a temporary table. The temporary table itself is
+    managed (created/filled/etc) internally by the interceptor.
+  */
+/*
+  TIMOUR:
+  Select a more efficient result sink when we know there is no need to collect
+  data statistics.
+
+  if (strategy == COMPLETE_MATCH)
+  {
+    if (!(result= new select_union))
+      DBUG_RETURN(TRUE);
+  }
+  else if (strategy == PARTIAL_MATCH)
+  {
+  if (!(result= new select_materialize_with_stats))
+    DBUG_RETURN(TRUE);
+  }
+*/
+  if (!(result= new select_materialize_with_stats))
+    DBUG_RETURN(TRUE);
+
+  if (((select_union*) result)->create_result_table(
+                         thd, tmp_columns, TRUE, tmp_create_options,
+                         "materialized subselect", TRUE))
+    DBUG_RETURN(TRUE);
+
+  tmp_table= ((select_union*) result)->table;
+
+  /*
+    If the subquery has blobs, or the total key lenght is bigger than
+    some length, or the total number of key parts is more than the
+    allowed maximum (currently MAX_REF_PARTS == 16), then the created
+    index cannot be used for lookups and we can't use hash semi
+    join. If this is the case, delete the temporary table since it
+    will not be used, and tell the caller we failed to initialize the
+    engine.
+  */
+  if (tmp_table->s->keys == 0)
+  {
+    DBUG_ASSERT(
+      tmp_table->s->uniques ||
+      tmp_table->key_info->key_length >= tmp_table->file->max_key_length() ||
+      tmp_table->key_info->key_parts > tmp_table->file->max_key_parts());
+    free_tmp_table(thd, tmp_table);
+    tmp_table= NULL;
+    delete result;
+    result= NULL;
+    DBUG_RETURN(TRUE);
+  }
+
+  /*
+    Make sure there is only one index on the temp table, and it doesn't have
+    the extra key part created when s->uniques > 0.
+  */
+  DBUG_ASSERT(tmp_table->s->keys == 1 &&
+              ((Item_in_subselect *) item)->left_expr->cols() ==
+              tmp_table->key_info->key_parts);
+
+  if (make_semi_join_conds() ||
+      /* A unique_engine is used both for complete and partial matching. */
+      !(lookup_engine= make_unique_engine()))
+    DBUG_RETURN(TRUE);
+
+  DBUG_RETURN(FALSE);
+}
+
+
+/*
+  Create an artificial condition to post-filter those rows matched by index
+  lookups that cannot be distinguished by the index lookup procedure.
+
+  @notes
+  The need for post-filtering may occur e.g. because of
+  truncation. Prepared statements execution requires that fix_fields is
+  called for every execution. In order to call fix_fields we need to
+  create a Name_resolution_context and a corresponding TABLE_LIST for
+  the temporary table for the subquery, so that all column references
+  to the materialized subquery table can be resolved correctly.
+
+  @returns
+    @retval TRUE  memory allocation error occurred
+    @retval FALSE the conditions were created and resolved (fixed)
+*/
+
+bool subselect_hash_sj_engine::make_semi_join_conds()
+{
+  /*
+    Table reference for tmp_table that is used to resolve column references
+    (Item_fields) to columns in tmp_table.
+  */
+  TABLE_LIST *tmp_table_ref;
+  /* Name resolution context for all tmp_table columns created below. */
+  Name_resolution_context *context;
+  Item_in_subselect *item_in= (Item_in_subselect *) item;
+
+  DBUG_ENTER("subselect_hash_sj_engine::make_semi_join_conds");
+  DBUG_ASSERT(semi_join_conds == NULL);
+
+  if (!(semi_join_conds= new Item_cond_and))
+    DBUG_RETURN(TRUE);
+
+  if (!(tmp_table_ref= (TABLE_LIST*) thd->alloc(sizeof(TABLE_LIST))))
+    DBUG_RETURN(TRUE);
+
+  tmp_table_ref->init_one_table("", "materialized subselect", TL_READ);
+  tmp_table_ref->table= tmp_table;
+
+  context= new Name_resolution_context;
+  context->init();
+  context->first_name_resolution_table=
+    context->last_name_resolution_table= tmp_table_ref;
+  
+  for (uint i= 0; i < item_in->left_expr->cols(); i++)
+  {
+    Item_func_eq *eq_cond; /* New equi-join condition for the current column. */
+    /* Item for the corresponding field from the materialized temp table. */
+    Item_field *right_col_item;
+
+    if (!(right_col_item= new Item_field(thd, context, tmp_table->field[i])) ||
+        !(eq_cond= new Item_func_eq(item_in->left_expr->element_index(i),
+                                    right_col_item)) ||
+        (((Item_cond_and*)semi_join_conds)->add(eq_cond)))
+    {
+      delete semi_join_conds;
+      semi_join_conds= NULL;
+      DBUG_RETURN(TRUE);
+    }
+  }
+  if (semi_join_conds->fix_fields(thd, (Item**)&semi_join_conds))
+    DBUG_RETURN(TRUE);
+
+  DBUG_RETURN(FALSE);
+}
+
+
+/**
+  Create a new uniquesubquery engine for the execution of an IN predicate.
+
+  @details
+  Create and initialize a new JOIN_TAB, and Table_ref objects to perform
+  lookups into the indexed temporary table.
+
+  @retval A new subselect_hash_sj_engine object
+  @retval NULL if a memory allocation error occurs
+*/
+
+subselect_uniquesubquery_engine*
+subselect_hash_sj_engine::make_unique_engine()
+{
+  Item_in_subselect *item_in= (Item_in_subselect *) item;
+  Item_iterator_row it(item_in->left_expr);
+  /* The only index on the temporary table. */
+  KEY *tmp_key= tmp_table->key_info;
+  JOIN_TAB *tab;
+
+  DBUG_ENTER("subselect_hash_sj_engine::make_unique_engine");
+
+  /*
+    Create and initialize the JOIN_TAB that represents an index lookup
+    plan operator into the materialized subquery result. Notice that:
+    - this JOIN_TAB has no corresponding JOIN (and doesn't need one), and
+    - here we initialize only those members that are used by
+      subselect_uniquesubquery_engine, so these objects are incomplete.
+  */
+  if (!(tab= (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB))))
+    DBUG_RETURN(NULL);
+
+  tab->table= tmp_table;
+  tab->ref.tmp_table_index_lookup_init(thd, tmp_key, it, FALSE);
+
+  DBUG_RETURN(new subselect_uniquesubquery_engine(thd, tab, item,
+                                                  semi_join_conds));
+}
+
+
+/**
+  Initialize members of the engine that need to be re-initilized at each
+  execution.
+
+  @retval TRUE  if a memory allocation error occurred
+  @retval FALSE if success
+*/
+
+bool subselect_hash_sj_engine::init_runtime()
+{
+  /*
+    Create and optimize the JOIN that will be used to materialize
+    the subquery if not yet created.
+  */
+  materialize_engine->prepare();
+  /*
+    Repeat name resolution for 'cond' since cond is not part of any
+    clause of the query, and it is not 'fixed' during JOIN::prepare.
+  */
+  if (semi_join_conds && !semi_join_conds->fixed &&
+      semi_join_conds->fix_fields(thd, (Item**)&semi_join_conds))
+    return TRUE;
+  /* Let our engine reuse this query plan for materialization. */
+  materialize_join= materialize_engine->join;
+  materialize_join->change_result(result);
+  return FALSE;
+}
+
+
+subselect_hash_sj_engine::~subselect_hash_sj_engine()
+{
+  delete lookup_engine;
+  delete result;
+  if (tmp_table)
+    free_tmp_table(thd, tmp_table);
+}
+
+
+/**
+  Cleanup performed after each PS execution.
+
+  @details
+  Called in the end of JOIN::prepare for PS from Item_subselect::cleanup.
+*/
+
+void subselect_hash_sj_engine::cleanup()
+{
+  enum_engine_type lookup_engine_type= lookup_engine->engine_type();
+  is_materialized= FALSE;
+  bitmap_clear_all(&non_null_key_parts);
+  bitmap_clear_all(&partial_match_key_parts);
+  count_partial_match_columns= 0;
+  count_null_only_columns= 0;
+  strategy= UNDEFINED;
+  materialize_engine->cleanup();
+  if (lookup_engine_type == TABLE_SCAN_ENGINE ||
+      lookup_engine_type == ROWID_MERGE_ENGINE)
+  {
+    subselect_engine *inner_lookup_engine;
+    inner_lookup_engine=
+      ((subselect_partial_match_engine*) lookup_engine)->lookup_engine;
+    /*
+      Partial match engines are recreated for each PS execution inside
+      subselect_hash_sj_engine::exec().
+    */
+    delete lookup_engine;
+    lookup_engine= inner_lookup_engine;
+  }
+  DBUG_ASSERT(lookup_engine->engine_type() == UNIQUESUBQUERY_ENGINE);
+  lookup_engine->cleanup();
+  result->cleanup(); /* Resets the temp table as well. */
+}
+
+
+/**
+  Execute a subquery IN predicate via materialization.
+
+  @details
+  If needed materialize the subquery into a temporary table, then
+  copmpute the predicate via a lookup into this table.
+
+  @retval TRUE  if error
+  @retval FALSE otherwise
+*/
+
+int subselect_hash_sj_engine::exec()
+{
+  Item_in_subselect *item_in= (Item_in_subselect *) item;
+  SELECT_LEX *save_select= thd->lex->current_select;
+  subselect_partial_match_engine *pm_engine= NULL;
+  int res= 0;
+
+  DBUG_ENTER("subselect_hash_sj_engine::exec");
+
+  /*
+    Optimize and materialize the subquery during the first execution of
+    the subquery predicate.
+  */
+  thd->lex->current_select= materialize_engine->select_lex;
+  if ((res= materialize_join->optimize()))
+    goto err; /* purecov: inspected */
+  DBUG_ASSERT(!is_materialized); /* We should materialize only once. */
+  materialize_join->exec();
+  if ((res= test(materialize_join->error || thd->is_fatal_error)))
+    goto err;
+
+  /*
+    TODO:
+    - Unlock all subquery tables as we don't need them. To implement this
+      we need to add new functionality to JOIN::join_free that can unlock
+      all tables in a subquery (and all its subqueries).
+    - The temp table used for grouping in the subquery can be freed
+      immediately after materialization (yet it's done together with
+      unlocking).
+  */
+  is_materialized= TRUE;
+  /*
+    If the subquery returned no rows, the temporary table is empty, so we know
+    directly that the result of IN is FALSE. We first update the table
+    statistics, then we test if the temporary table for the query result is
+    empty.
+  */
+  tmp_table->file->info(HA_STATUS_VARIABLE);
+  if (!tmp_table->file->stats.records)
+  {
+    item_in->value= FALSE;
+    /* The value of IN will not change during this execution. */
+    item_in->is_constant= TRUE;
+    item_in->set_first_execution();
+    /* TIMOUR: check if we need this: item_in->null_value= FALSE; */
+    DBUG_RETURN(FALSE);
+  }
+
+  /*
+    TIMOUR: The schema-based analysis for partial matching can be done once for
+    prepared statement and remembered. It is done here to remove the need to
+    save/restore all related variables between each re-execution, thus making
+    the code simpler.
+  */
+  strategy= get_strategy_using_schema();
+  /* This call may discover that we don't need partial matching at all. */
+  strategy= get_strategy_using_data();
+  if (strategy == PARTIAL_MATCH)
+  {
+    uint count_pm_keys; /* Total number of keys needed for partial matching. */
+    MY_BITMAP *nn_key_parts; /* The key parts of the only non-NULL index. */
+    uint covering_null_row_width;
+    select_materialize_with_stats *result_sink=
+      (select_materialize_with_stats *) result;
+
+    nn_key_parts= (count_partial_match_columns < tmp_table->s->fields) ?
+                  &non_null_key_parts : NULL;
+
+    if (result_sink->get_max_nulls_in_row() ==
+        tmp_table->s->fields -
+        (nn_key_parts ? bitmap_bits_set(nn_key_parts) : 0))
+      covering_null_row_width= result_sink->get_max_nulls_in_row();
+    else
+      covering_null_row_width= 0;
+
+    if (covering_null_row_width)
+      count_pm_keys= nn_key_parts ? 1 : 0;
+    else
+      count_pm_keys= count_partial_match_columns - count_null_only_columns +
+        (nn_key_parts ? 1 : 0);
+
+    choose_partial_match_strategy(test(nn_key_parts),
+                                  test(covering_null_row_width),
+                                  &partial_match_key_parts);
+    DBUG_ASSERT(strategy == PARTIAL_MATCH_MERGE ||
+                strategy == PARTIAL_MATCH_SCAN);
+    if (strategy == PARTIAL_MATCH_MERGE)
+    {
+      pm_engine=
+        new subselect_rowid_merge_engine((subselect_uniquesubquery_engine*)
+                                         lookup_engine, tmp_table,
+                                         count_pm_keys,
+                                         covering_null_row_width,
+                                         item, result,
+                                         semi_join_conds->argument_list());
+      if (!pm_engine ||
+          ((subselect_rowid_merge_engine*) pm_engine)->
+            init(nn_key_parts, &partial_match_key_parts))
+      {
+        /*
+          The call to init() would fail if there was not enough memory to allocate
+          all buffers for the rowid merge strategy. In this case revert to table
+          scanning which doesn't need any big buffers.
+        */
+        delete pm_engine;
+        pm_engine= NULL;
+        strategy= PARTIAL_MATCH_SCAN;
+      }
+    }
+
+    if (strategy == PARTIAL_MATCH_SCAN)
+    {
+      if (!(pm_engine=
+            new subselect_table_scan_engine((subselect_uniquesubquery_engine*)
+                                            lookup_engine, tmp_table,
+                                            item, result,
+                                            semi_join_conds->argument_list(),
+                                            covering_null_row_width)))
+      {
+        /* This is an irrecoverable error. */
+        res= 1;
+        goto err;
+      }
+    }
+  }
+
+  if (pm_engine)
+    lookup_engine= pm_engine;
+  item_in->change_engine(lookup_engine);
+
+err:
+  thd->lex->current_select= save_select;
+  DBUG_RETURN(res);
+}
+
+
+/**
+  Print the state of this engine into a string for debugging and views.
+*/
+
+void subselect_hash_sj_engine::print(String *str, enum_query_type query_type)
+{
+  str->append(STRING_WITH_LEN(" <materialize> ("));
+  materialize_engine->print(str, query_type);
+  str->append(STRING_WITH_LEN(" ), "));
+
+  if (lookup_engine)
+    lookup_engine->print(str, query_type);
+  else
+    str->append(STRING_WITH_LEN(
+           "<engine selected at execution time>"
+         ));
+}
+
+void subselect_hash_sj_engine::fix_length_and_dec(Item_cache** row)
+{
+  DBUG_ASSERT(FALSE);
+}
+
+void subselect_hash_sj_engine::exclude()
+{
+  DBUG_ASSERT(FALSE);
+}
+
+bool subselect_hash_sj_engine::no_tables()
+{
+  DBUG_ASSERT(FALSE);
+  return FALSE;
+}
+
+bool subselect_hash_sj_engine::change_result(Item_subselect *si,
+                                             select_result_interceptor *res)
+{
+  DBUG_ASSERT(FALSE);
+  return TRUE;
+}
+
+
+Ordered_key::Ordered_key(uint keyid_arg, TABLE *tbl_arg, Item *search_key_arg,
+                         ha_rows null_count_arg, ha_rows min_null_row_arg,
+                         ha_rows max_null_row_arg, uchar *row_num_to_rowid_arg)
+  : keyid(keyid_arg), tbl(tbl_arg), search_key(search_key_arg),
+    row_num_to_rowid(row_num_to_rowid_arg), null_count(null_count_arg)
+{
+  DBUG_ASSERT(tbl->file->stats.records > null_count);
+  key_buff_elements= tbl->file->stats.records - null_count;
+  cur_key_idx= HA_POS_ERROR;
+
+  DBUG_ASSERT((null_count && min_null_row_arg && max_null_row_arg) ||
+              (!null_count && !min_null_row_arg && !max_null_row_arg));
+  if (null_count)
+  {
+    /* The counters are 1-based, for key access we need 0-based indexes. */
+    min_null_row= min_null_row_arg - 1;
+    max_null_row= max_null_row_arg - 1;
+  }
+  else
+    min_null_row= max_null_row= 0;
+}
+
+
+Ordered_key::~Ordered_key()
+{
+  my_free((char*) key_buff, MYF(0));
+  bitmap_free(&null_key);
+}
+
+
+/*
+  Cleanup that needs to be done for each PS (re)execution.
+*/
+
+void Ordered_key::cleanup()
+{
+  /*
+    Currently these keys are recreated for each PS re-execution, thus
+    there is nothing to cleanup, the whole object goes away after execution
+    is over. All handler related initialization/deinitialization is done by
+    the parent subselect_rowid_merge_engine object.
+  */
+}
+
+
+/*
+  Initialize a multi-column index.
+*/
+
+bool Ordered_key::init(MY_BITMAP *columns_to_index)
+{
+  THD *thd= tbl->in_use;
+  uint cur_key_col= 0;
+  Item_field *cur_tmp_field;
+  Item_func_lt *fn_less_than;
+
+  key_column_count= bitmap_bits_set(columns_to_index);
+
+  // TIMOUR: check for mem allocation err, revert to scan
+
+  key_columns= (Item_field**) thd->alloc(key_column_count *
+                                         sizeof(Item_field*));
+  compare_pred= (Item_func_lt**) thd->alloc(key_column_count *
+                                            sizeof(Item_func_lt*));
+
+  for (uint i= 0; i < columns_to_index->n_bits; i++)
+  {
+    if (!bitmap_is_set(columns_to_index, i))
+      continue;
+    cur_tmp_field= new Item_field(tbl->field[i]);
+    /* Create the predicate (tmp_column[i] < outer_ref[i]). */
+    fn_less_than= new Item_func_lt(cur_tmp_field,
+                                   search_key->element_index(i));
+    fn_less_than->fix_fields(thd, (Item**) &fn_less_than);
+    key_columns[cur_key_col]= cur_tmp_field;
+    compare_pred[cur_key_col]= fn_less_than;
+    ++cur_key_col;
+  }
+
+  if (alloc_keys_buffers())
+  {
+    /* TIMOUR revert to partial match via table scan. */
+    return TRUE;
+  }
+  return FALSE;
+}
+
+
+/*
+  Initialize a single-column index.
+*/
+
+bool Ordered_key::init(int col_idx)
+{
+  THD *thd= tbl->in_use;
+
+  key_column_count= 1;
+
+  // TIMOUR: check for mem allocation err, revert to scan
+
+  key_columns= (Item_field**) thd->alloc(sizeof(Item_field*));
+  compare_pred= (Item_func_lt**) thd->alloc(sizeof(Item_func_lt*));
+
+  key_columns[0]= new Item_field(tbl->field[col_idx]);
+  /* Create the predicate (tmp_column[i] < outer_ref[i]). */
+  compare_pred[0]= new Item_func_lt(key_columns[0],
+                                    search_key->element_index(col_idx));
+  compare_pred[0]->fix_fields(thd, (Item**)&compare_pred[0]);
+
+  if (alloc_keys_buffers())
+  {
+    /* TIMOUR revert to partial match via table scan. */
+    return TRUE;
+  }
+  return FALSE;
+}
+
+
+/*
+  Allocate the buffers for both the row number, and the NULL-bitmap indexes.
+*/
+
+bool Ordered_key::alloc_keys_buffers()
+{
+  DBUG_ASSERT(key_buff_elements > 0);
+
+  if (!(key_buff= (rownum_t*) my_malloc(key_buff_elements * sizeof(rownum_t),
+                                        MYF(MY_WME))))
+    return TRUE;
+
+  /*
+    TIMOUR: it is enough to create bitmaps with size
+    (max_null_row - min_null_row), and then use min_null_row as
+    lookup offset.
+  */
+  /* Notice that max_null_row is max array index, we need count, so +1. */
+  if (bitmap_init(&null_key, NULL, max_null_row + 1, FALSE))
+    return TRUE;
+
+  cur_key_idx= HA_POS_ERROR;
+
+  return FALSE;
+}
+
+
+/*
+  Quick sort comparison function that compares two rows of the same table
+  indentfied with their row numbers.
+
+  @retval -1
+  @retval  0
+  @retval +1
+*/
+
+int
+Ordered_key::cmp_keys_by_row_data(ha_rows a, ha_rows b)
+{
+  uchar *rowid_a, *rowid_b;
+  int error, cmp_res;
+  /* The length in bytes of the rowids (positions) of tmp_table. */
+  uint rowid_length= tbl->file->ref_length;
+
+  if (a == b)
+    return 0;
+  /* Get the corresponding rowids. */
+  rowid_a= row_num_to_rowid + a * rowid_length;
+  rowid_b= row_num_to_rowid + b * rowid_length;
+  /* Fetch the rows for comparison. */
+  error= tbl->file->ha_rnd_pos(tbl->record[0], rowid_a);
+  DBUG_ASSERT(!error);
+  error= tbl->file->ha_rnd_pos(tbl->record[1], rowid_b);
+  DBUG_ASSERT(!error);
+  /*
+    Compare the two rows by the corresponding values of the indexed
+    columns.
+  */
+  for (uint i= 0; i < key_column_count; i++)
+  {
+    Field *cur_field= key_columns[i]->field;
+    if ((cmp_res= cur_field->cmp_offset(tbl->s->rec_buff_length)))
+      return (cmp_res > 0 ? 1 : -1);
+  }
+  return 0;
+}
+
+
+int
+Ordered_key::cmp_keys_by_row_data_and_rownum(Ordered_key *key,
+                                             rownum_t* a, rownum_t* b)
+{
+  /* The result of comparing the two keys according to their row data. */
+  int cmp_row_res= key->cmp_keys_by_row_data(*a, *b);
+  if (cmp_row_res)
+    return cmp_row_res;
+  return (*a < *b) ? -1 : (*a > *b) ? 1 : 0;
+}
+
+
+void Ordered_key::sort_keys()
+{
+  my_qsort2(key_buff, key_buff_elements, sizeof(rownum_t),
+            (qsort2_cmp) &cmp_keys_by_row_data_and_rownum, (void*) this);
+  /* Invalidate the current row position. */
+  cur_key_idx= HA_POS_ERROR;
+}
+
+
+/*
+  The fraction of rows that do not contain NULL in the columns indexed by
+  this key.
+
+  @retval  1  if there are no NULLs
+  @retval  0  if only NULLs
+*/
+
+double Ordered_key::null_selectivity()
+{
+  /* We should not be processing empty tables. */
+  DBUG_ASSERT(tbl->file->stats.records);
+  return (1 - (double) null_count / (double) tbl->file->stats.records);
+}
+
+
+/*
+  Compare the value(s) of the current key in 'search_key' with the
+  data of the current table record.
+
+  @notes The comparison result follows from the way compare_pred
+  is created in Ordered_key::init. Currently compare_pred compares
+  a field in of the current row with the corresponding Item that
+  contains the search key.
+
+  @param row_num  Number of the row (not index in the key_buff array)
+
+  @retval -1  if (current row  < search_key)
+  @retval  0  if (current row == search_key)
+  @retval +1  if (current row  > search_key)
+*/
+
+int Ordered_key::cmp_key_with_search_key(rownum_t row_num)
+{
+  /* The length in bytes of the rowids (positions) of tmp_table. */
+  uint rowid_length= tbl->file->ref_length;
+  uchar *cur_rowid= row_num_to_rowid + row_num * rowid_length;
+  int error, cmp_res;
+
+  error= tbl->file->ha_rnd_pos(tbl->record[0], cur_rowid);
+  DBUG_ASSERT(!error);
+
+  for (uint i= 0; i < key_column_count; i++)
+  {
+    cmp_res= compare_pred[i]->get_comparator()->compare();
+    /* Unlike Arg_comparator::compare_row() here there should be no NULLs. */
+    DBUG_ASSERT(!compare_pred[i]->null_value);
+    if (cmp_res)
+      return (cmp_res > 0 ? 1 : -1);
+  }
+  return 0;
+}
+
+
+/*
+  Find a key in a sorted array of keys via binary search.
+
+  see create_subq_in_equalities()
+*/
+
+bool Ordered_key::lookup()
+{
+  DBUG_ASSERT(key_buff_elements);
+
+  ha_rows lo= 0;
+  ha_rows hi= key_buff_elements - 1;
+  ha_rows mid;
+  int cmp_res;
+
+  while (lo <= hi)
+  {
+    mid= lo + (hi - lo) / 2;
+    cmp_res= cmp_key_with_search_key(key_buff[mid]);
+    /*
+      In order to find the minimum match, check if the pevious element is
+      equal or smaller than the found one. If equal, we need to search further
+      to the left.
+    */
+    if (!cmp_res && mid > 0)
+      cmp_res= !cmp_key_with_search_key(key_buff[mid - 1]) ? 1 : 0;
+
+    if (cmp_res == -1)
+    {
+      /* row[mid] < search_key */
+      lo= mid + 1;
+    }
+    else if (cmp_res == 1)
+    {
+      /* row[mid] > search_key */
+      if (!mid)
+        goto not_found;
+      hi= mid - 1;
+    }
+    else
+    {
+      /* row[mid] == search_key */
+      cur_key_idx= mid;
+      return TRUE;
+    }
+  }
+not_found:
+  cur_key_idx= HA_POS_ERROR;
+  return FALSE;
+}
+
+
+/*
+  Move the current index pointer to the next key with the same column
+  values as the current key. Since the index is sorted, all such keys
+  are contiguous.
+*/
+
+bool Ordered_key::next_same()
+{
+  DBUG_ASSERT(key_buff_elements);
+
+  if (cur_key_idx < key_buff_elements - 1)
+  {
+    /*
+      TIMOUR:
+      The below is quite inefficient, since as a result we will fetch every
+      row (except the last one) twice. There must be a more efficient way,
+      e.g. swapping record[0] and record[1], and reading only the new record.
+    */
+    if (!cmp_keys_by_row_data(key_buff[cur_key_idx], key_buff[cur_key_idx + 1]))
+    {
+      ++cur_key_idx;
+      return TRUE;
+    }
+  }
+  return FALSE;
+}
+
+
+void Ordered_key::print(String *str)
+{
+  uint i;
+  str->append("{idx=");
+  str->qs_append(keyid);
+  str->append(", (");
+  for (i= 0; i < key_column_count - 1; i++)
+  {
+    str->append(key_columns[i]->field->field_name);
+    str->append(", ");
+  }
+  str->append(key_columns[i]->field->field_name);
+  str->append("), ");
+
+  str->append("null_bitmap: (bits=");
+  str->qs_append(null_key.n_bits);
+  str->append(", nulls= ");
+  str->qs_append((double)null_count);
+  str->append(", min_null= ");
+  str->qs_append((double)min_null_row);
+  str->append(", max_null= ");
+  str->qs_append((double)max_null_row);
+  str->append("), ");
+
+  str->append('}');
+}
+
+
+subselect_partial_match_engine::subselect_partial_match_engine(
+  subselect_uniquesubquery_engine *engine_arg,
+  TABLE *tmp_table_arg, Item_subselect *item_arg,
+  select_result_interceptor *result_arg,
+  List<Item> *equi_join_conds_arg,
+  uint covering_null_row_width_arg)
+  :subselect_engine(item_arg, result_arg),
+   tmp_table(tmp_table_arg), lookup_engine(engine_arg),
+   equi_join_conds(equi_join_conds_arg),
+   covering_null_row_width(covering_null_row_width_arg)
+{}
+
+
+int subselect_partial_match_engine::exec()
+{
+  Item_in_subselect *item_in= (Item_in_subselect *) item;
+  int res;
+
+  /* Try to find a matching row by index lookup. */
+  res= lookup_engine->copy_ref_key_simple();
+  if (res == -1)
+  {
+    /* The result is FALSE based on the outer reference. */
+    item_in->value= 0;
+    item_in->null_value= 0;
+    return 0;
+  }
+  else if (res == 0)
+  {
+    /* Search for a complete match. */
+    if ((res= lookup_engine->index_lookup()))
+    {
+      /* An error occured during lookup(). */
+      item_in->value= 0;
+      item_in->null_value= 0;
+      return res;
+    }
+    else if (item_in->value)
+    {
+      /*
+        A complete match was found, the result of IN is TRUE.
+        Notice: (this->item == lookup_engine->item)
+      */
+      return 0;
+    }
+  }
+
+  if (covering_null_row_width == tmp_table->s->fields)
+  {
+    /*
+      If there is a NULL-only row that coveres all columns the result of IN
+      is UNKNOWN. 
+    */
+    item_in->value= 0;
+    /*
+      TIMOUR: which one is the right way to propagate an UNKNOWN result?
+      Should we also set empty_result_set= FALSE; ???
+    */
+    //item_in->was_null= 1;
+    item_in->null_value= 1;
+    return 0;
+  }
+
+  /*
+    There is no complete match. Look for a partial match (UNKNOWN result), or
+    no match (FALSE).
+  */
+  if (tmp_table->file->inited)
+    tmp_table->file->ha_index_end();
+
+  if (partial_match())
+  {
+    /* The result of IN is UNKNOWN. */
+    item_in->value= 0;
+    /*
+      TIMOUR: which one is the right way to propagate an UNKNOWN result?
+      Should we also set empty_result_set= FALSE; ???
+    */
+    //item_in->was_null= 1;
+    item_in->null_value= 1;
+  }
+  else
+  {
+    /* The result of IN is FALSE. */
+    item_in->value= 0;
+    /*
+      TIMOUR: which one is the right way to propagate an UNKNOWN result?
+      Should we also set empty_result_set= FALSE; ???
+    */
+    //item_in->was_null= 0;
+    item_in->null_value= 0;
+  }
+
   return 0;
 }
+
+
+void subselect_partial_match_engine::print(String *str,
+                                           enum_query_type query_type)
+{
+  /*
+    Should never be called as the actual engine cannot be known at query
+    optimization time.
+    DBUG_ASSERT(FALSE);
+  */
+}
+
+
+/*
+  @param non_null_key_parts  
+  @param partial_match_key_parts  A union of all single-column NULL key parts.
+  @param count_partial_match_columns Number of NULL keyparts (set bits above).
+
+  @retval FALSE  the engine was initialized successfully
+  @retval TRUE   there was some (memory allocation) error during initialization,
+                 such errors should be interpreted as revert to other strategy
+*/
+
+bool
+subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts,
+                                   MY_BITMAP *partial_match_key_parts)
+{
+  /* The length in bytes of the rowids (positions) of tmp_table. */
+  uint rowid_length= tmp_table->file->ref_length;
+  ha_rows row_count= tmp_table->file->stats.records;
+  rownum_t cur_rownum= 0;
+  select_materialize_with_stats *result_sink=
+    (select_materialize_with_stats *) result;
+  uint cur_keyid= 0;
+  Item_in_subselect *item_in= (Item_in_subselect*) item;
+  int error;
+
+  if (keys_count == 0)
+  {
+    /* There is nothing to initialize, we will only do regular lookups. */
+    return FALSE;
+  }
+
+  DBUG_ASSERT(!covering_null_row_width || (covering_null_row_width &&
+                                           keys_count == 1 &&
+                                           non_null_key_parts));
+  /*
+    Allocate buffers to hold the merged keys and the mapping between rowids and
+    row numbers.
+  */
+  if (!(merge_keys= (Ordered_key**) thd->alloc(keys_count *
+                                               sizeof(Ordered_key*))) ||
+      !(row_num_to_rowid= (uchar*) my_malloc(row_count * rowid_length *
+                                             sizeof(uchar), MYF(MY_WME))))
+    return TRUE;
+
+  /* Create the only non-NULL key if there is any. */
+  if (non_null_key_parts)
+  {
+    non_null_key= new Ordered_key(cur_keyid, tmp_table, item_in->left_expr,
+                                  0, 0, 0, row_num_to_rowid);
+    if (non_null_key->init(non_null_key_parts))
+      return TRUE;
+    merge_keys[cur_keyid]= non_null_key;
+    merge_keys[cur_keyid]->first();
+    ++cur_keyid;
+  }
+
+  /*
+    If there is a covering NULL row, the only key that is needed is the
+    only non-NULL key that is already created above. We create keys on
+    NULL-able columns only if there is no covering NULL row.
+  */
+  if (!covering_null_row_width)
+  {
+    if (bitmap_init_memroot(&matching_keys, keys_count, thd->mem_root) ||
+        bitmap_init_memroot(&matching_outer_cols, keys_count, thd->mem_root) ||
+        bitmap_init_memroot(&null_only_columns, keys_count, thd->mem_root))
+      return TRUE;
+
+    /*
+      Create one single-column NULL-key for each column in
+      partial_match_key_parts.
+    */
+    for (uint i= 0; i < partial_match_key_parts->n_bits; i++)
+    {
+      if (!bitmap_is_set(partial_match_key_parts, i))
+        continue;
+
+      if (result_sink->get_null_count_of_col(i) == row_count)
+      {
+        bitmap_set_bit(&null_only_columns, cur_keyid);
+        continue;
+      }
+      else
+      {
+        merge_keys[cur_keyid]= new Ordered_key(
+                                     cur_keyid, tmp_table,
+                                     item_in->left_expr->element_index(i),
+                                     result_sink->get_null_count_of_col(i),
+                                     result_sink->get_min_null_of_col(i),
+                                     result_sink->get_max_null_of_col(i),
+                                     row_num_to_rowid);
+        if (merge_keys[cur_keyid]->init(i))
+          return TRUE;
+        merge_keys[cur_keyid]->first();
+      }
+      ++cur_keyid;
+    }
+  }
+  DBUG_ASSERT(cur_keyid == keys_count);
+
+  /* Populate the indexes with data from the temporary table. */
+  if (tmp_table->file->ha_rnd_init_with_error(1))
+    return TRUE;
+  tmp_table->file->extra_opt(HA_EXTRA_CACHE,
+                             current_thd->variables.read_buff_size);
+  tmp_table->null_row= 0;
+  while (TRUE)
+  {
+    error= tmp_table->file->ha_rnd_next(tmp_table->record[0]);
+    if (error == HA_ERR_RECORD_DELETED)
+    {
+      /* We get this for duplicate records that should not be in tmp_table. */
+      continue;
+    }
+    /*
+      This is a temp table that we fully own, there should be no other
+      cause to stop the iteration than EOF.
+    */
+    DBUG_ASSERT(!error || error == HA_ERR_END_OF_FILE);
+    if (error == HA_ERR_END_OF_FILE)
+    {
+      DBUG_ASSERT(cur_rownum == tmp_table->file->stats.records);
+      break;
+    }
+
+    /*
+      Save the position of this record in the row_num -> rowid mapping.
+    */
+    tmp_table->file->position(tmp_table->record[0]);
+    memcpy(row_num_to_rowid + cur_rownum * rowid_length,
+           tmp_table->file->ref, rowid_length);
+
+    /* Add the current row number to the corresponding keys. */
+    if (non_null_key)
+    {
+      /* By definition there are no NULLs in the non-NULL key. */
+      non_null_key->add_key(cur_rownum);
+    }
+
+    for (uint i= (non_null_key ? 1 : 0); i < keys_count; i++)
+    {
+      /*
+        Check if the first and only indexed column contains NULL in the curent
+        row, and add the row number to the corresponding key.
+      */
+      if (tmp_table->field[merge_keys[i]->get_field_idx(0)]->is_null())
+        merge_keys[i]->set_null(cur_rownum);
+      else
+        merge_keys[i]->add_key(cur_rownum);
+    }
+    ++cur_rownum;
+  }
+
+  tmp_table->file->ha_rnd_end();
+
+  /* Sort all the keys by their NULL selectivity. */
+  my_qsort(merge_keys, keys_count, sizeof(Ordered_key*),
+           (qsort_cmp) cmp_keys_by_null_selectivity);
+
+  /* Sort the keys in each of the indexes. */
+  for (uint i= 0; i < keys_count; i++)
+    merge_keys[i]->sort_keys();
+
+  if (init_queue(&pq, keys_count, 0, FALSE,
+                 subselect_rowid_merge_engine::cmp_keys_by_cur_rownum, NULL,
+                 0, 0))
+    return TRUE;
+
+  return FALSE;
+}
+
+
+subselect_rowid_merge_engine::~subselect_rowid_merge_engine()
+{
+  /* None of the resources below is allocated if there are no ordered keys. */
+  if (keys_count)
+  {
+    my_free((char*) row_num_to_rowid, MYF(0));
+    for (uint i= 0; i < keys_count; i++)
+      delete merge_keys[i];
+    delete_queue(&pq);
+    if (tmp_table->file->inited == handler::RND)
+      tmp_table->file->ha_rnd_end();
+  }
+}
+
+
+void subselect_rowid_merge_engine::cleanup()
+{
+}
+
+
+/*
+  Quick sort comparison function to compare keys in order of decreasing bitmap
+  selectivity, so that the most selective keys come first.
+
+  @param  k1 first key to compare
+  @param  k2 second key to compare
+
+  @retval  1  if k1 is less selective than k2
+  @retval  0  if k1 is equally selective as k2
+  @retval -1  if k1 is more selective than k2
+*/
+
+int
+subselect_rowid_merge_engine::cmp_keys_by_null_selectivity(Ordered_key **k1,
+                                                           Ordered_key **k2)
+{
+  double k1_sel= (*k1)->null_selectivity();
+  double k2_sel= (*k2)->null_selectivity();
+  if (k1_sel < k2_sel)
+    return 1;
+  if (k1_sel > k2_sel)
+    return -1;
+  return 0;
+}
+
+
+/*
+*/
+
+int
+subselect_rowid_merge_engine::cmp_keys_by_cur_rownum(void *arg,
+                                                     uchar *k1, uchar *k2)
+{
+  rownum_t r1= ((Ordered_key*) k1)->current();
+  rownum_t r2= ((Ordered_key*) k2)->current();
+
+  return (r1 < r2) ? -1 : (r1 > r2) ? 1 : 0;
+}
+
+
+/*
+  Check if certain table row contains a NULL in all columns for which there is
+  no match in the corresponding value index.
+
+  @retval TRUE if a NULL row exists
+  @retval FALSE otherwise
+*/
+
+bool subselect_rowid_merge_engine::test_null_row(rownum_t row_num)
+{
+  Ordered_key *cur_key;
+  uint cur_id;
+  for (uint i = 0; i < keys_count; i++)
+  {
+    cur_key= merge_keys[i];
+    cur_id= cur_key->get_keyid();
+    if (bitmap_is_set(&matching_keys, cur_id))
+    {
+      /*
+        The key 'i' (with id 'cur_keyid') already matches a value in row 'row_num',
+        thus we skip it as it can't possibly match a NULL.
+      */
+      continue;
+    }
+    if (!cur_key->is_null(row_num))
+      return FALSE;
+  }
+  return TRUE;
+}
+
+
+/*
+  @retval TRUE  there is a partial match (UNKNOWN)
+  @retval FALSE  there is no match at all (FALSE)
+*/
+
+bool subselect_rowid_merge_engine::partial_match()
+{
+  Ordered_key *min_key; /* Key that contains the current minimum position. */
+  rownum_t min_row_num; /* Current row number of min_key. */
+  Ordered_key *cur_key;
+  rownum_t cur_row_num;
+  uint count_nulls_in_search_key= 0;
+  bool res= FALSE;
+
+  /* If there is a non-NULL key, it must be the first key in the keys array. */
+  DBUG_ASSERT(!non_null_key || (non_null_key && merge_keys[0] == non_null_key));
+  /* The prioryty queue for keys must be empty. */
+  DBUG_ASSERT(!pq.elements);
+
+  /* All data accesses during execution are via handler::ha_rnd_pos() */
+  if (tmp_table->file->ha_rnd_init_with_error(0))
+  {
+    res= FALSE;
+    goto end;
+  }
+
+  /* Check if there is a match for the columns of the only non-NULL key. */
+  if (non_null_key && !non_null_key->lookup())
+  {
+    res= FALSE;
+    goto end;
+  }
+
+  /*
+    If there is a NULL (sub)row that covers all NULL-able columns,
+    then there is a guranteed partial match, and we don't need to search
+    for the matching row.
+   */
+  if (covering_null_row_width)
+  {
+    res= TRUE;
+    goto end;
+  }
+
+  if (non_null_key)
+    queue_insert(&pq, (uchar *) non_null_key);
+  /*
+    Do not add the non_null_key, since it was already processed above.
+  */
+  bitmap_clear_all(&matching_outer_cols);
+  for (uint i= test(non_null_key); i < keys_count; i++)
+  {
+    DBUG_ASSERT(merge_keys[i]->get_column_count() == 1);
+    if (merge_keys[i]->get_search_key(0)->is_null())
+    {
+      ++count_nulls_in_search_key;
+      bitmap_set_bit(&matching_outer_cols, merge_keys[i]->get_keyid());
+    }
+    else if (merge_keys[i]->lookup())
+      queue_insert(&pq, (uchar *) merge_keys[i]);
+  }
+
+  /*
+    If the outer reference consists of only NULLs, or if it has NULLs in all
+    nullable columns, the result is UNKNOWN.
+  */
+  if (count_nulls_in_search_key ==
+      ((Item_in_subselect *) item)->left_expr->cols() -
+      (non_null_key ? non_null_key->get_column_count() : 0))
+  {
+    res= TRUE;
+    goto end;
+  }
+
+  /*
+    If there is no NULL (sub)row that covers all NULL columns, and there is no
+    single match for any of the NULL columns, the result is FALSE.
+  */
+  if (pq.elements - test(non_null_key) == 0)
+  {
+    res= FALSE;
+    goto end;
+  }
+
+  DBUG_ASSERT(pq.elements);
+
+  min_key= (Ordered_key*) queue_remove_top(&pq);
+  min_row_num= min_key->current();
+  bitmap_copy(&matching_keys, &null_only_columns);
+  bitmap_set_bit(&matching_keys, min_key->get_keyid());
+  bitmap_union(&matching_keys, &matching_outer_cols);
+  if (min_key->next_same())
+    queue_insert(&pq, (uchar *) min_key);
+
+  if (pq.elements == 0)
+  {
+    /*
+      Check the only matching row of the only key min_key for NULL matches
+      in the other columns.
+    */
+    res= test_null_row(min_row_num);
+    goto end;
+  }
+
+  while (TRUE)
+  {
+    cur_key= (Ordered_key*) queue_remove_top(&pq);
+    cur_row_num= cur_key->current();
+
+    if (cur_row_num == min_row_num)
+      bitmap_set_bit(&matching_keys, cur_key->get_keyid());
+    else
+    {
+      /* Follows from the correct use of priority queue. */
+      DBUG_ASSERT(cur_row_num > min_row_num);
+      if (test_null_row(min_row_num))
+      {
+        res= TRUE;
+        goto end;
+      }
+      else
+      {
+        min_key= cur_key;
+        min_row_num= cur_row_num;
+        bitmap_copy(&matching_keys, &null_only_columns);
+        bitmap_set_bit(&matching_keys, min_key->get_keyid());
+        bitmap_union(&matching_keys, &matching_outer_cols);
+      }
+    }
+
+    if (cur_key->next_same())
+      queue_insert(&pq, (uchar *) cur_key);
+
+    if (pq.elements == 0)
+    {
+      /* Check the last row of the last column in PQ for NULL matches. */
+      res= test_null_row(min_row_num);
+      goto end;
+    }
+  }
+
+  /* We should never get here - all branches must be handled explicitly above. */
+  DBUG_ASSERT(FALSE);
+
+end:
+  queue_remove_all(&pq);
+  tmp_table->file->ha_rnd_end();
+  return res;
+}
+
+
+subselect_table_scan_engine::subselect_table_scan_engine(
+  subselect_uniquesubquery_engine *engine_arg,
+  TABLE *tmp_table_arg,
+  Item_subselect *item_arg,
+  select_result_interceptor *result_arg,
+  List<Item> *equi_join_conds_arg,
+  uint covering_null_row_width_arg)
+  :subselect_partial_match_engine(engine_arg, tmp_table_arg, item_arg,
+                                  result_arg, equi_join_conds_arg,
+                                  covering_null_row_width_arg)
+{}
+
+
+/*
+  TIMOUR:
+  This method is based on subselect_uniquesubquery_engine::scan_table().
+  Consider refactoring somehow, 80% of the code is the same.
+
+  for each row_i in tmp_table
+  {
+    count_matches= 0;
+    for each row element row_i[j]
+    {
+      if (outer_ref[j] is NULL || row_i[j] is NULL || outer_ref[j] == row_i[j])
+        ++count_matches;
+    }
+    if (count_matches == outer_ref.elements)
+      return TRUE
+  }
+  return FALSE
+*/
+
+bool subselect_table_scan_engine::partial_match()
+{
+  List_iterator_fast<Item> equality_it(*equi_join_conds);
+  Item *cur_eq;
+  uint count_matches;
+  int error;
+  bool res;
+
+  if (tmp_table->file->ha_rnd_init_with_error(1))
+  {
+    res= FALSE;
+    goto end;
+  }
+
+  tmp_table->file->extra_opt(HA_EXTRA_CACHE,
+                             current_thd->variables.read_buff_size);
+  /*
+  TIMOUR:
+  scan_table() also calls "table->null_row= 0;", why, do we need it?
+  */
+  for (;;)
+  {
+    error= tmp_table->file->ha_rnd_next(tmp_table->record[0]);
+    if (error) {
+      if (error == HA_ERR_RECORD_DELETED)
+      {
+        error= 0;
+        continue;
+      }
+      if (error == HA_ERR_END_OF_FILE)
+      {
+        error= 0;
+        break;
+      }
+      else
+      {
+        error= report_error(tmp_table, error);
+        break;
+      }
+    }
+
+    equality_it.rewind();
+    count_matches= 0;
+    while ((cur_eq= equality_it++))
+    {
+      DBUG_ASSERT(cur_eq->type() == Item::FUNC_ITEM &&
+                  ((Item_func*)cur_eq)->functype() == Item_func::EQ_FUNC);
+      if (!cur_eq->val_int() && !cur_eq->null_value)
+        break;
+      ++count_matches;
+    }
+    if (count_matches == tmp_table->s->fields)
+    {
+      res= TRUE; /* Found a matching row. */
+      goto end;
+    }
+  }
+
+  res= FALSE;
+end:
+  tmp_table->file->ha_rnd_end();
+  return res;
+}
+
+
+void subselect_table_scan_engine::cleanup()
+{
+}
diff --git a/sql/item_subselect.h b/sql/item_subselect.h
index d8d18fd8ef6..88bd057b7f4 100644
--- a/sql/item_subselect.h
+++ b/sql/item_subselect.h
@@ -25,8 +25,9 @@
 class st_select_lex;
 class st_select_lex_unit;
 class JOIN;
-class select_subselect;
+class select_result_interceptor;
 class subselect_engine;
+class subselect_hash_sj_engine;
 class Item_bool_func2;
 class Comp_creator;
 
@@ -37,19 +38,34 @@ typedef class st_select_lex SELECT_LEX;
   including this file.
 */
 typedef Comp_creator* (*chooser_compare_func_creator)(bool invert);
+class Cached_item;
 
 /* base class for subselects */
 
 class Item_subselect :public Item_result_field
 {
-  my_bool value_assigned; /* value already assigned to subselect */
+  bool value_assigned; 		/* value already assigned to subselect */
 protected:
   /* thread handler, will be assigned in fix_fields only */
   THD *thd;
-  /* substitution instead of subselect in case of optimization */
+  /* 
+    Used inside Item_subselect::fix_fields() according to this scenario:
+      > Item_subselect::fix_fields
+        > engine->prepare
+          > child_join->prepare
+            (Here we realize we need to do the rewrite and set
+             substitution= some new Item, eg. Item_in_optimizer )
+          < child_join->prepare
+        < engine->prepare
+        *ref= substitution;
+      < Item_subselect::fix_fields
+  */
   Item *substitution;
+public:
   /* unit of subquery */
   st_select_lex_unit *unit;
+protected:
+  Item *expr_cache;
   /* engine that perform execution of subselect (single select or union) */
   subselect_engine *engine;
   /* old engine if engine was changed */
@@ -64,8 +80,42 @@ protected:
   bool have_to_be_excluded;
   /* cache of constant state */
   bool const_item_cache;
-
+  
+  bool inside_first_fix_fields;
+  bool done_first_fix_fields;
 public:
+  /* A reference from inside subquery predicate to somewhere outside of it */
+  class Ref_to_outside : public Sql_alloc
+  {
+  public:
+    st_select_lex *select; /* Select where the reference is pointing to */
+    /* 
+      What is being referred. This may be NULL when we're referring to an
+      aggregate function.
+    */ 
+    Item *item; 
+  };
+  /*
+    References from within this subquery to somewhere outside of it (i.e. to
+    parent select, grandparent select, etc)
+  */
+  List<Ref_to_outside> upper_refs;
+  st_select_lex *parent_select;
+
+  /**
+     List of references on items subquery depends on (externally resolved);
+
+     @note We can't store direct links on Items because it could be
+           substituted with other item (for example for grouping).
+   */
+  List<Item*> depends_on;
+
+  /*
+   TRUE<=>Table Elimination has made it redundant to evaluate this select
+          (and so it is not part of QEP, etc)
+  */
+  bool eliminated;
+  
   /* changed engine indicator */
   bool engine_changed;
   /* subquery is transformed */
@@ -83,17 +133,18 @@ public:
   virtual subs_type substype() { return UNKNOWN_SUBS; }
 
   /*
-     We need this method, because some compilers do not allow 'this'
-     pointer in constructor initialization list, but we need pass pointer
-     to subselect Item class to select_subselect classes constructor.
+    We need this method, because some compilers do not allow 'this'
+    pointer in constructor initialization list, but we need to pass a pointer
+    to subselect Item class to select_result_interceptor's constructor.
   */
   virtual void init (st_select_lex *select_lex,
-		     select_subselect *result);
+		     select_result_interceptor *result);
 
   ~Item_subselect();
   void cleanup();
   virtual void reset()
   {
+    eliminated= FALSE;
     null_value= 1;
   }
   virtual trans_res select_transformer(JOIN *join);
@@ -106,6 +157,9 @@ public:
     return null_value;
   }
   bool fix_fields(THD *thd, Item **ref);
+  bool mark_as_dependent(THD *thd, st_select_lex *select, Item *item);
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
+  void recalc_used_tables(st_select_lex *new_parent, bool after_pullout);
   virtual bool exec();
   virtual void fix_length_and_dec();
   table_map used_tables() const;
@@ -138,6 +192,12 @@ public:
   virtual void reset_value_registration() {}
   enum_parsing_place place() { return parsing_place; }
   bool walk(Item_processor processor, bool walk_subquery, uchar *arg);
+  bool mark_as_eliminated_processor(uchar *arg);
+  bool enumerate_field_refs_processor(uchar *arg);
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor("subselect");
+  }
   Item *safe_charset_converter(CHARSET_INFO *tocs);
 
   /**
@@ -146,8 +206,9 @@ public:
   */
   st_select_lex* get_select_lex();
   const char *func_name() const { DBUG_ASSERT(0); return "subselect"; }
+  virtual bool expr_cache_is_needed(THD *);
 
-  friend class select_subselect;
+  friend class select_result_interceptor;
   friend class Item_in_optimizer;
   friend bool Item_field::fix_fields(THD *, Item **);
   friend int  Item_field::fix_outer_field(THD *, Field **, Item **);
@@ -155,6 +216,8 @@ public:
   friend void mark_select_range_as_dependent(THD*,
                                              st_select_lex*, st_select_lex*,
                                              Field*, Item*, Item_ident*);
+  friend bool convert_join_subqueries_to_semijoins(JOIN *join);
+
 };
 
 /* single value subselect */
@@ -166,7 +229,8 @@ protected:
   Item_cache *value, **row;
 public:
   Item_singlerow_subselect(st_select_lex *select_lex);
-  Item_singlerow_subselect() :Item_subselect(), value(0), row (0) {}
+  Item_singlerow_subselect() :Item_subselect(), value(0), row (0)
+  {}
 
   void cleanup();
   subs_type substype() { return SINGLEROW_SUBS; }
@@ -204,6 +268,8 @@ public:
   */
   st_select_lex* invalidate_and_restore_select_lex();
 
+  Item* expr_cache_insert_transformer(uchar *thd_arg);
+
   friend class select_singlerow_subselect;
 };
 
@@ -238,6 +304,7 @@ public:
   subs_type substype() { return EXISTS_SUBS; }
   void reset() 
   {
+    eliminated= FALSE;
     value= 0;
   }
 
@@ -250,20 +317,23 @@ public:
   void fix_length_and_dec();
   virtual void print(String *str, enum_query_type query_type);
 
+  Item* expr_cache_insert_transformer(uchar *thd_arg);
+
   friend class select_exists_subselect;
   friend class subselect_uniquesubquery_engine;
   friend class subselect_indexsubquery_engine;
 };
 
 
-/*
-  IN subselect: this represents "left_exr IN (SELECT ...)"
+/**
+  Representation of IN subquery predicates of the form
+  "left_expr IN (SELECT ...)".
 
+  @details
   This class has: 
-   - (as a descendant of Item_subselect) a "subquery execution engine" which 
-      allows it to evaluate subqueries. (and this class participates in
-      execution by having was_null variable where part of execution result
-      is stored.
+   - A "subquery execution engine" (as a subclass of Item_subselect) that allows
+     it to evaluate subqueries. (and this class participates in execution by
+     having was_null variable where part of execution result is stored.
    - Transformation methods (todo: more on this).
 
   This class is not used directly, it is "wrapped" into Item_in_optimizer
@@ -272,8 +342,22 @@ public:
 
 class Item_in_subselect :public Item_exists_subselect
 {
-protected:
+public:
   Item *left_expr;
+protected:
+  /*
+    Cache of the left operand of the subquery predicate. Allocated in the
+    runtime memory root, for each execution, thus need not be freed.
+  */
+  List<Cached_item> *left_expr_cache;
+  bool first_execution;
+  /*
+    Set to TRUE if at query execution time we determine that this item's
+    value is a constant during this execution. We need this member because
+    it is not possible to substitute 'this' with a constant item.
+  */
+  bool is_constant;
+
   /*
     expr & optimizer used in subselect rewriting to store Item for
     all JOIN in UNION
@@ -282,10 +366,48 @@ protected:
   Item_in_optimizer *optimizer;
   bool was_null;
   bool abort_on_null;
-  bool transformed;
 public:
   /* Used to trigger on/off conditions that were pushed down to subselect */
   bool *pushed_cond_guards;
+  
+  /* Priority of this predicate in the convert-to-semi-join-nest process. */
+  int sj_convert_priority;
+  /*
+    Used by subquery optimizations to keep track about in which clause this
+    subquery predicate is located: 
+      (TABLE_LIST*) 1   - the predicate is an AND-part of the WHERE
+      join nest pointer - the predicate is an AND-part of ON expression
+                          of a join nest   
+      NULL              - for all other locations
+    See also THD::emb_on_expr_nest.
+  */
+  TABLE_LIST *emb_on_expr_nest;
+  /* 
+    Location of the subquery predicate. It is either
+     - pointer to join nest if the subquery predicate is in the ON expression
+     - (TABLE_LIST*)1 if the predicate is in the WHERE.
+  */
+  TABLE_LIST *expr_join_nest;
+  /*
+    Types of left_expr and subquery's select list allow to perform subquery
+    materialization. Currently, we set this to FALSE when it as well could
+    be TRUE. This is to be properly addressed with fix for BUG#36752.
+  */
+  bool types_allow_materialization;
+
+  /* 
+    Same as above, but they also allow to scan the materialized table. 
+  */
+  bool sjm_scan_allowed;
+
+  /* The method chosen to execute the IN predicate.  */
+  enum enum_exec_method {
+    NOT_TRANSFORMED, /* No execution method was chosen for this IN. */
+    SEMI_JOIN,   /* IN was converted to semi-join nest and should be removed. */
+    IN_TO_EXISTS, /* IN was converted to correlated EXISTS. */
+    MATERIALIZATION /* IN will be executed via subquery materialization. */
+  };
+  enum_exec_method exec_method;
 
   bool *get_cond_guard(int i)
   {
@@ -302,13 +424,15 @@ public:
 
   Item_in_subselect(Item * left_expr, st_select_lex *select_lex);
   Item_in_subselect()
-    :Item_exists_subselect(), optimizer(0), abort_on_null(0), transformed(0),
-     pushed_cond_guards(NULL), upper_item(0)
+    :Item_exists_subselect(), left_expr_cache(0), first_execution(TRUE),
+    is_constant(FALSE), optimizer(0), abort_on_null(0),
+    pushed_cond_guards(NULL), exec_method(NOT_TRANSFORMED), upper_item(0)
   {}
-
+  void cleanup();
   subs_type substype() { return IN_SUBS; }
   void reset() 
   {
+    eliminated= FALSE;
     value= 0;
     null_value= 0;
     was_null= 0;
@@ -317,6 +441,10 @@ public:
   trans_res select_in_like_transformer(JOIN *join, Comp_creator *func);
   trans_res single_value_transformer(JOIN *join, Comp_creator *func);
   trans_res row_value_transformer(JOIN * join);
+  trans_res single_value_in_to_exists_transformer(JOIN * join,
+                                                  Comp_creator *func);
+  trans_res row_value_in_to_exists_transformer(JOIN * join);
+  virtual bool exec();
   longlong val_int();
   double val_real();
   String *val_str(String*);
@@ -328,10 +456,26 @@ public:
   bool test_limit(st_select_lex_unit *unit);
   virtual void print(String *str, enum_query_type query_type);
   bool fix_fields(THD *thd, Item **ref);
-
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
+  void update_used_tables();
+  bool setup_engine();
+  bool init_left_expr_cache();
+  /* Inform 'this' that it was computed, and contains a valid result. */
+  void set_first_execution() { if (first_execution) first_execution= FALSE; }
+  bool is_expensive_processor(uchar *arg);
+  bool expr_cache_is_needed(THD *thd);
+  
+  /* 
+    Return the identifier that we could use to identify the subquery for the
+    user.
+  */
+  int get_identifier();
   friend class Item_ref_null_helper;
   friend class Item_is_not_null_test;
+  friend class Item_in_optimizer;
   friend class subselect_indexsubquery_engine;
+  friend class subselect_hash_sj_engine;
+  friend class subselect_partial_match_engine;
 };
 
 
@@ -356,7 +500,7 @@ public:
 class subselect_engine: public Sql_alloc
 {
 protected:
-  select_subselect *result; /* results storage class */
+  select_result_interceptor *result; /* results storage class */
   THD *thd; /* pointer to current THD */
   Item_subselect *item; /* item, that use this engine */
   enum Item_result res_type; /* type of results */
@@ -364,7 +508,12 @@ protected:
   bool maybe_null; /* may be null (first item in select) */
 public:
 
-  subselect_engine(Item_subselect *si, select_subselect *res)
+  enum enum_engine_type {ABSTRACT_ENGINE, SINGLE_SELECT_ENGINE,
+                         UNION_ENGINE, UNIQUESUBQUERY_ENGINE,
+                         INDEXSUBQUERY_ENGINE, HASH_SJ_ENGINE,
+                         ROWID_MERGE_ENGINE, TABLE_SCAN_ENGINE};
+
+  subselect_engine(Item_subselect *si, select_result_interceptor *res)
     :thd(0)
   {
     result= res;
@@ -414,12 +563,14 @@ public:
   virtual table_map upper_select_const_tables()= 0;
   static table_map calc_const_tables(TABLE_LIST *);
   virtual void print(String *str, enum_query_type query_type)= 0;
-  virtual bool change_result(Item_subselect *si, select_subselect *result)= 0;
+  virtual bool change_result(Item_subselect *si,
+                             select_result_interceptor *result)= 0;
   virtual bool no_tables()= 0;
   virtual bool is_executed() const { return FALSE; }
   /* Check if subquery produced any rows during last query execution */
   virtual bool no_rows() = 0;
-
+  virtual enum_engine_type engine_type() { return ABSTRACT_ENGINE; }
+  virtual int get_identifier() { DBUG_ASSERT(0); return 0; }
 protected:
   void set_row(List<Item> &item_list, Item_cache **row);
 };
@@ -427,14 +578,14 @@ protected:
 
 class subselect_single_select_engine: public subselect_engine
 {
-  my_bool prepared; /* simple subselect is prepared */
-  my_bool optimized; /* simple subselect is optimized */
-  my_bool executed; /* simple subselect is executed */
+  bool prepared; /* simple subselect is prepared */
+  bool optimized; /* simple subselect is optimized */
+  bool executed; /* simple subselect is executed */
   st_select_lex *select_lex; /* corresponding select_lex */
   JOIN * join; /* corresponding JOIN structure */
 public:
   subselect_single_select_engine(st_select_lex *select,
-				 select_subselect *result,
+				 select_result_interceptor *result,
 				 Item_subselect *item);
   void cleanup();
   int prepare();
@@ -445,11 +596,16 @@ public:
   void exclude();
   table_map upper_select_const_tables();
   virtual void print (String *str, enum_query_type query_type);
-  bool change_result(Item_subselect *si, select_subselect *result);
+  bool change_result(Item_subselect *si, select_result_interceptor *result);
   bool no_tables();
   bool may_be_null();
   bool is_executed() const { return executed; }
   bool no_rows();
+  virtual enum_engine_type engine_type() { return SINGLE_SELECT_ENGINE; }
+  int get_identifier();
+
+  friend class subselect_hash_sj_engine;
+  friend class Item_in_subselect;
 };
 
 
@@ -458,7 +614,7 @@ class subselect_union_engine: public subselect_engine
   st_select_lex_unit *unit;  /* corresponding unit structure */
 public:
   subselect_union_engine(st_select_lex_unit *u,
-			 select_subselect *result,
+			 select_result_interceptor *result,
 			 Item_subselect *item);
   void cleanup();
   int prepare();
@@ -469,10 +625,11 @@ public:
   void exclude();
   table_map upper_select_const_tables();
   virtual void print (String *str, enum_query_type query_type);
-  bool change_result(Item_subselect *si, select_subselect *result);
+  bool change_result(Item_subselect *si, select_result_interceptor *result);
   bool no_tables();
   bool is_executed() const;
   bool no_rows();
+  virtual enum_engine_type engine_type() { return UNION_ENGINE; }
 };
 
 
@@ -526,11 +683,14 @@ public:
   void exclude();
   table_map upper_select_const_tables() { return 0; }
   virtual void print (String *str, enum_query_type query_type);
-  bool change_result(Item_subselect *si, select_subselect *result);
+  bool change_result(Item_subselect *si, select_result_interceptor *result);
   bool no_tables();
+  int index_lookup(); /* TIMOUR: this method needs refactoring. */
   int scan_table();
   bool copy_ref_key();
+  int copy_ref_key_simple();  /* TIMOUR: this method needs refactoring. */
   bool no_rows() { return empty_result_set; }
+  virtual enum_engine_type engine_type() { return UNIQUESUBQUERY_ENGINE; }
 };
 
 
@@ -580,6 +740,7 @@ public:
   {}
   int exec();
   virtual void print (String *str, enum_query_type query_type);
+  virtual enum_engine_type engine_type() { return INDEXSUBQUERY_ENGINE; }
 };
 
 /*
@@ -597,9 +758,447 @@ inline bool Item_subselect::is_evaluated() const
   return engine->is_executed();
 }
 
+
 inline bool Item_subselect::is_uncacheable() const
 {
   return engine->uncacheable();
 }
 
+/**
+  Compute an IN predicate via a hash semi-join. This class is responsible for
+  the materialization of the subquery, and the selection of the correct and
+  optimal execution method (e.g. direct index lookup, or partial matching) for
+  the IN predicate.
+*/
+
+class subselect_hash_sj_engine : public subselect_engine
+{
+protected:
+  /* The table into which the subquery is materialized. */
+  TABLE *tmp_table;
+  /* TRUE if the subquery was materialized into a temp table. */
+  bool is_materialized;
+  /*
+    The old engine already chosen at parse time and stored in permanent memory.
+    Through this member we can re-create and re-prepare materialize_join for
+    each execution of a prepared statement. We also reuse the functionality
+    of subselect_single_select_engine::[prepare | cols].
+  */
+  subselect_single_select_engine *materialize_engine;
+  /* The engine used to compute the IN predicate. */
+  subselect_engine *lookup_engine;
+  /*
+    QEP to execute the subquery and materialize its result into a
+    temporary table. Created during the first call to exec().
+  */
+  JOIN *materialize_join;
+
+  /* Keyparts of the only non-NULL composite index in a rowid merge. */
+  MY_BITMAP non_null_key_parts;
+  /* Keyparts of the single column indexes with NULL, one keypart per index. */
+  MY_BITMAP partial_match_key_parts;
+  uint count_partial_match_columns;
+  uint count_null_only_columns;
+  /*
+    A conjunction of all the equality condtions between all pairs of expressions
+    that are arguments of an IN predicate. We need these to post-filter some
+    IN results because index lookups sometimes match values that are actually
+    not equal to the search key in SQL terms.
+ */
+  Item_cond_and *semi_join_conds;
+  /* Possible execution strategies that can be used to compute hash semi-join.*/
+  enum exec_strategy {
+    UNDEFINED,
+    COMPLETE_MATCH, /* Use regular index lookups. */
+    PARTIAL_MATCH,  /* Use some partial matching strategy. */
+    PARTIAL_MATCH_MERGE, /* Use partial matching through index merging. */
+    PARTIAL_MATCH_SCAN,  /* Use partial matching through table scan. */
+    IMPOSSIBLE      /* Subquery materialization is not applicable. */
+  };
+  /* The chosen execution strategy. Computed after materialization. */
+  exec_strategy strategy;
+protected:
+  exec_strategy get_strategy_using_schema();
+  exec_strategy get_strategy_using_data();
+  size_t rowid_merge_buff_size(bool has_non_null_key,
+                               bool has_covering_null_row,
+                               MY_BITMAP *partial_match_key_parts);
+  void choose_partial_match_strategy(bool has_non_null_key,
+                                     bool has_covering_null_row,
+                                     MY_BITMAP *partial_match_key_parts);
+  bool make_semi_join_conds();
+  subselect_uniquesubquery_engine* make_unique_engine();
+
+public:
+  subselect_hash_sj_engine(THD *thd, Item_subselect *in_predicate,
+                           subselect_single_select_engine *old_engine)
+    :subselect_engine(in_predicate, NULL), tmp_table(NULL),
+    is_materialized(FALSE), materialize_engine(old_engine), lookup_engine(NULL),
+    materialize_join(NULL), count_partial_match_columns(0),
+    count_null_only_columns(0), semi_join_conds(NULL), strategy(UNDEFINED)
+  {
+    set_thd(thd);
+  }
+  ~subselect_hash_sj_engine();
+
+  bool init_permanent(List<Item> *tmp_columns);
+  bool init_runtime();
+  void cleanup();
+  int prepare() { return 0; } /* Override virtual function in base class. */
+  int exec();
+  virtual void print(String *str, enum_query_type query_type);
+  uint cols()
+  {
+    return materialize_engine->cols();
+  }
+  uint8 uncacheable() { return materialize_engine->uncacheable(); }
+  table_map upper_select_const_tables() { return 0; }
+  bool no_rows() { return !tmp_table->file->stats.records; }
+  virtual enum_engine_type engine_type() { return HASH_SJ_ENGINE; }
+  /*
+    TODO: factor out all these methods in a base subselect_index_engine class
+    because all of them have dummy implementations and should never be called.
+  */
+  void fix_length_and_dec(Item_cache** row);//=>base class
+  void exclude(); //=>base class
+  //=>base class
+  bool change_result(Item_subselect *si, select_result_interceptor *result);
+  bool no_tables();//=>base class
+};
+
+
+/*
+  Distinguish the type od (0-based) row numbers from the type of the index into
+  an array of row numbers.
+*/
+typedef ha_rows rownum_t;
+
+
+/*
+  An Ordered_key is an in-memory table index that allows O(log(N)) time
+  lookups of a multi-part key.
+
+  If the index is over a single column, then this column may contain NULLs, and
+  the NULLs are stored and tested separately for NULL in O(1) via is_null().
+  Multi-part indexes assume that the indexed columns do not contain NULLs.
+
+  TODO:
+  = Due to the unnatural assymetry between single and multi-part indexes, it
+    makes sense to somehow refactor or extend the class.
+
+  = This class can be refactored into a base abstract interface, and two
+    subclasses:
+    - one to represent single-column indexes, and
+    - another to represent multi-column indexes.
+    Such separation would allow slightly more efficient implementation of
+    the single-column indexes.
+  = The current design requires such indexes to be fully recreated for each
+    PS (re)execution, however most of the comprising objects can be reused.
+*/
+
+class Ordered_key : public Sql_alloc
+{
+protected:
+  /*
+    Index of the key in an array of keys. This index allows to
+    construct (sub)sets of keys represented by bitmaps.
+  */
+  uint keyid;
+  /* The table being indexed. */
+  TABLE *tbl;
+  /* The columns being indexed. */
+  Item_field **key_columns;
+  /* Number of elements in 'key_columns' (number of key parts). */
+  uint key_column_count;
+  /*
+    An expression, or sequence of expressions that forms the search key.
+    The search key is a sequence when it is Item_row. Each element of the
+    sequence is accessible via Item::element_index(int i).
+  */
+  Item *search_key;
+
+/* Value index related members. */
+  /*
+    The actual value index, consists of a sorted sequence of row numbers.
+  */
+  rownum_t *key_buff;
+  /* Number of elements in key_buff. */
+  ha_rows key_buff_elements;
+  /* Current element in 'key_buff'. */
+  ha_rows cur_key_idx;
+  /*
+    Mapping from row numbers to row ids. The element row_num_to_rowid[i]
+    contains a buffer with the rowid for the row numbered 'i'.
+    The memory for this member is not maintanined by this class because
+    all Ordered_key indexes of the same table share the same mapping.
+  */
+  uchar *row_num_to_rowid;
+  /*
+    A sequence of predicates to compare the search key with the corresponding
+    columns of a table row from the index.
+  */
+  Item_func_lt **compare_pred;
+
+/* Null index related members. */
+  MY_BITMAP null_key;
+  /* Count of NULLs per column. */
+  ha_rows null_count;
+  /* The row number that contains the first NULL in a column. */
+  ha_rows min_null_row;
+  /* The row number that contains the last NULL in a column. */
+  ha_rows max_null_row;
+
+protected:
+  bool alloc_keys_buffers();
+  /*
+    Quick sort comparison function that compares two rows of the same table
+    indentfied with their row numbers.
+  */
+  int cmp_keys_by_row_data(rownum_t a, rownum_t b);
+  static int cmp_keys_by_row_data_and_rownum(Ordered_key *key,
+                                             rownum_t* a, rownum_t* b);
+
+  int cmp_key_with_search_key(rownum_t row_num);
+
+public:
+  Ordered_key(uint keyid_arg, TABLE *tbl_arg,
+              Item *search_key_arg, ha_rows null_count_arg,
+              ha_rows min_null_row_arg, ha_rows max_null_row_arg,
+              uchar *row_num_to_rowid_arg);
+  ~Ordered_key();
+  void cleanup();
+  /* Initialize a multi-column index. */
+  bool init(MY_BITMAP *columns_to_index);
+  /* Initialize a single-column index. */
+  bool init(int col_idx);
+
+  uint get_column_count() { return key_column_count; }
+  uint get_keyid() { return keyid; }
+  uint get_field_idx(uint i)
+  {
+    DBUG_ASSERT(i < key_column_count);
+    return key_columns[i]->field->field_index;
+  }
+  /*
+    Get the search key element that corresponds to the i-th key part of this
+    index.
+  */
+  Item *get_search_key(uint i)
+  {
+    return search_key->element_index(key_columns[i]->field->field_index);
+  }
+  void add_key(rownum_t row_num)
+  {
+    /* The caller must know how many elements to add. */
+    DBUG_ASSERT(key_buff_elements && cur_key_idx < key_buff_elements);
+    key_buff[cur_key_idx]= row_num;
+    ++cur_key_idx;
+  }
+
+  void sort_keys();
+  double null_selectivity();
+
+  /*
+    Position the current element at the first row that matches the key.
+    The key itself is propagated by evaluating the current value(s) of
+    this->search_key.
+  */
+  bool lookup();
+  /* Move the current index cursor to the first key. */
+  void first()
+  {
+    DBUG_ASSERT(key_buff_elements);
+    cur_key_idx= 0;
+  }
+  /* TODO */
+  bool next_same();
+  /* Move the current index cursor to the next key. */
+  bool next()
+  {
+    DBUG_ASSERT(key_buff_elements);
+    if (cur_key_idx < key_buff_elements - 1)
+    {
+      ++cur_key_idx;
+      return TRUE;
+    }
+    return FALSE;
+  };
+  /* Return the current index element. */
+  rownum_t current()
+  {
+    DBUG_ASSERT(key_buff_elements && cur_key_idx < key_buff_elements);
+    return key_buff[cur_key_idx];
+  }
+
+  void set_null(rownum_t row_num)
+  {
+    bitmap_set_bit(&null_key, row_num);
+  }
+  bool is_null(rownum_t row_num)
+  {
+    /*
+      Indexes consisting of only NULLs do not have a bitmap buffer at all.
+      Their only initialized member is 'n_bits', which is equal to the number
+      of temp table rows.
+    */
+    if (null_count == tbl->file->stats.records)
+    {
+      DBUG_ASSERT(tbl->file->stats.records == null_key.n_bits);
+      return TRUE;
+    }
+    if (row_num > max_null_row || row_num < min_null_row)
+      return FALSE;
+    return bitmap_is_set(&null_key, row_num);
+  }
+  void print(String *str);
+};
+
+
+class subselect_partial_match_engine : public subselect_engine
+{
+protected:
+  /* The temporary table that contains a materialized subquery. */
+  TABLE *tmp_table;
+  /*
+    The engine used to check whether an IN predicate is TRUE or not. If not
+    TRUE, then subselect_rowid_merge_engine further distinguishes between
+    FALSE and UNKNOWN.
+  */
+  subselect_uniquesubquery_engine *lookup_engine;
+  /* A list of equalities between each pair of IN operands. */
+  List<Item> *equi_join_conds;
+  /*
+    If there is a row, such that all its NULL-able components are NULL, this
+    member is set to the number of covered columns. If there is no covering
+    row, then this is 0.
+  */
+  uint covering_null_row_width;
+protected:
+  virtual bool partial_match()= 0;
+public:
+  subselect_partial_match_engine(subselect_uniquesubquery_engine *engine_arg,
+                                 TABLE *tmp_table_arg, Item_subselect *item_arg,
+                                 select_result_interceptor *result_arg,
+                                 List<Item> *equi_join_conds_arg,
+                                 uint covering_null_row_width_arg);
+  int prepare() { return 0; }
+  int exec();
+  void fix_length_and_dec(Item_cache**) {}
+  uint cols() { /* TODO: what is the correct value? */ return 1; }
+  uint8 uncacheable() { return UNCACHEABLE_DEPENDENT; }
+  void exclude() {}
+  table_map upper_select_const_tables() { return 0; }
+  bool change_result(Item_subselect*, select_result_interceptor*)
+  { DBUG_ASSERT(FALSE); return false; }
+  bool no_tables() { return false; }
+  bool no_rows()
+  {
+    /*
+      TODO: It is completely unclear what is the semantics of this
+      method. The current result is computed so that the call to no_rows()
+      from Item_in_optimizer::val_int() sets Item_in_optimizer::null_value
+      correctly.
+    */
+    return !(((Item_in_subselect *) item)->null_value);
+  }
+  void print(String*, enum_query_type);
+
+  friend void subselect_hash_sj_engine::cleanup();
+};
+
+
+class subselect_rowid_merge_engine: public subselect_partial_match_engine
+{
+protected:
+  /*
+    Mapping from row numbers to row ids. The rowids are stored sequentially
+    in the array - rowid[i] is located in row_num_to_rowid + i * rowid_length.
+  */
+  uchar *row_num_to_rowid;
+  /*
+    A subset of all the keys for which there is a match for the same row.
+    Used during execution. Computed for each outer reference
+  */
+  MY_BITMAP matching_keys;
+  /*
+    The columns of the outer reference that are NULL. Computed for each
+    outer reference.
+  */
+  MY_BITMAP matching_outer_cols;
+  /*
+    Columns that consist of only NULLs. Such columns match any value.
+    Computed once per query execution.
+  */
+  MY_BITMAP null_only_columns;
+  /*
+    Indexes of row numbers, sorted by <column_value, row_number>. If an
+    index may contain NULLs, the NULLs are stored efficiently in a bitmap.
+
+    The indexes are sorted by the selectivity of their NULL sub-indexes, the
+    one with the fewer NULLs is first. Thus, if there is any index on
+    non-NULL columns, it is contained in keys[0].
+  */
+  Ordered_key **merge_keys;
+  /* The number of elements in keys. */
+  uint keys_count;
+  /*
+    An index on all non-NULL columns of 'tmp_table'. The index has the
+    logical form: <[v_i1 | ... | v_ik], rownum>. It allows to find the row
+    number where the columns c_i1,...,c1_k contain the values v_i1,...,v_ik.
+    If such an index exists, it is always the first element of 'keys'.
+  */
+  Ordered_key *non_null_key;
+  /*
+    Priority queue of Ordered_key indexes, one per NULLable column.
+    This queue is used by the partial match algorithm in method exec().
+  */
+  QUEUE pq;
+protected:
+  /*
+    Comparison function to compare keys in order of decreasing bitmap
+    selectivity.
+  */
+  static int cmp_keys_by_null_selectivity(Ordered_key **k1, Ordered_key **k2);
+  /*
+    Comparison function used by the priority queue pq, the 'smaller' key
+    is the one with the smaller current row number.
+  */
+  static int cmp_keys_by_cur_rownum(void *arg, uchar *k1, uchar *k2);
+
+  bool test_null_row(rownum_t row_num);
+  bool partial_match();
+public:
+  subselect_rowid_merge_engine(subselect_uniquesubquery_engine *engine_arg,
+                               TABLE *tmp_table_arg, uint keys_count_arg,
+                               uint covering_null_row_width_arg,
+                               Item_subselect *item_arg,
+                               select_result_interceptor *result_arg,
+                               List<Item> *equi_join_conds_arg)
+    :subselect_partial_match_engine(engine_arg, tmp_table_arg, item_arg,
+                                    result_arg, equi_join_conds_arg,
+                                    covering_null_row_width_arg),
+    keys_count(keys_count_arg), non_null_key(NULL)
+  {
+    thd= lookup_engine->get_thd();
+  }
+  ~subselect_rowid_merge_engine();
+  bool init(MY_BITMAP *non_null_key_parts, MY_BITMAP *partial_match_key_parts);
+  void cleanup();
+  virtual enum_engine_type engine_type() { return ROWID_MERGE_ENGINE; }
+};
+
+
+class subselect_table_scan_engine: public subselect_partial_match_engine
+{
+protected:
+  bool partial_match();
+public:
+  subselect_table_scan_engine(subselect_uniquesubquery_engine *engine_arg,
+                              TABLE *tmp_table_arg, Item_subselect *item_arg,
+                              select_result_interceptor *result_arg,
+                              List<Item> *equi_join_conds_arg,
+                              uint covering_null_row_width_arg);
+  void cleanup();
+  virtual enum_engine_type engine_type() { return TABLE_SCAN_ENGINE; }
+};
 #endif /* ITEM_SUBSELECT_INCLUDED */
diff --git a/sql/item_sum.cc b/sql/item_sum.cc
index b6e5d6d3182..a9c77e2fea5 100644
--- a/sql/item_sum.cc
+++ b/sql/item_sum.cc
@@ -331,6 +331,7 @@ bool Item_sum::register_sum_func(THD *thd, Item **ref)
   if (aggr_level >= 0)
   {
     ref_by= ref;
+    thd->lex->current_select->register_dependency_item(aggr_sel, ref);
     /* Add the object to the list of registered objects assigned to aggr_sel */
     if (!aggr_sel->inner_sum_func_list)
       next= this;
@@ -362,7 +363,7 @@ bool Item_sum::register_sum_func(THD *thd, Item **ref)
          sl= sl->master_unit()->outer_select() )
       sl->master_unit()->item->with_sum_func= 1;
   }
-  thd->lex->current_select->mark_as_dependent(aggr_sel);
+  thd->lex->current_select->mark_as_dependent(thd, aggr_sel, NULL);
   return FALSE;
 }
 
@@ -538,11 +539,6 @@ void Item_sum::update_used_tables ()
       args[i]->update_used_tables();
       used_tables_cache|= args[i]->used_tables();
     }
-
-    used_tables_cache&= PSEUDO_TABLE_BITS;
-
-    /* the aggregate function is aggregated into its local context */
-    used_tables_cache |=  (1 << aggr_sel->join->tables) - 1;
   }
 }
 
@@ -1944,8 +1940,22 @@ void Item_sum_hybrid::cleanup()
 
 void Item_sum_hybrid::no_rows_in_result()
 {
-  was_values= FALSE;
-  clear();
+  /* We may be called here twice in case of ref field in function */
+  if (was_values)
+  {
+    was_values= FALSE;
+    was_null_value= value->null_value;
+    clear();
+  }
+}
+
+void Item_sum_hybrid::restore_to_before_no_rows_in_result()
+{
+  if (!was_values)
+  {
+    was_values= TRUE;
+    null_value= value->null_value= was_null_value;
+  }
 }
 
 
@@ -2624,8 +2634,10 @@ void Item_udf_sum::clear()
 
 bool Item_udf_sum::add()
 {
+  my_bool tmp_null_value;
   DBUG_ENTER("Item_udf_sum::add");
-  udf.add(&null_value);
+  udf.add(&tmp_null_value);
+  null_value= tmp_null_value;
   DBUG_RETURN(0);
 }
 
@@ -2661,11 +2673,15 @@ Item *Item_sum_udf_float::copy_or_same(THD* thd)
 
 double Item_sum_udf_float::val_real()
 {
+  my_bool tmp_null_value;
+  double res;
   DBUG_ASSERT(fixed == 1);
   DBUG_ENTER("Item_sum_udf_float::val");
   DBUG_PRINT("info",("result_type: %d  arg_count: %d",
 		     args[0]->result_type(), arg_count));
-  DBUG_RETURN(udf.val(&null_value));
+  res= udf.val(&tmp_null_value);
+  null_value= tmp_null_value;
+  DBUG_RETURN(res);
 }
 
 
@@ -2701,12 +2717,16 @@ longlong Item_sum_udf_decimal::val_int()
 
 my_decimal *Item_sum_udf_decimal::val_decimal(my_decimal *dec_buf)
 {
+  my_decimal *res;
+  my_bool tmp_null_value;
   DBUG_ASSERT(fixed == 1);
   DBUG_ENTER("Item_func_udf_decimal::val_decimal");
   DBUG_PRINT("info",("result_type: %d  arg_count: %d",
                      args[0]->result_type(), arg_count));
 
-  DBUG_RETURN(udf.val_decimal(&null_value, dec_buf));
+  res= udf.val_decimal(&tmp_null_value, dec_buf);
+  null_value= tmp_null_value;
+  DBUG_RETURN(res);
 }
 
 
@@ -2723,11 +2743,15 @@ Item *Item_sum_udf_int::copy_or_same(THD* thd)
 
 longlong Item_sum_udf_int::val_int()
 {
+  my_bool tmp_null_value;
+  longlong res;
   DBUG_ASSERT(fixed == 1);
   DBUG_ENTER("Item_sum_udf_int::val_int");
   DBUG_PRINT("info",("result_type: %d  arg_count: %d",
 		     args[0]->result_type(), arg_count));
-  DBUG_RETURN(udf.val_int(&null_value));
+  res= udf.val_int(&tmp_null_value);
+  null_value= tmp_null_value;
+  DBUG_RETURN(res);
 }
 
 
@@ -3060,10 +3084,10 @@ Item_func_group_concat::Item_func_group_concat(THD *thd,
     object being copied.
   */
   ORDER *tmp;
-  if (!(order= (ORDER **) thd->alloc(sizeof(ORDER *) * arg_count_order +
+  if (!(tmp= (ORDER *) thd->alloc(sizeof(ORDER *) * arg_count_order +
                                      sizeof(ORDER) * arg_count_order)))
     return;
-  tmp= (ORDER *)(order + arg_count_order);
+  order= (ORDER **)(tmp + arg_count_order);
   for (uint i= 0; i < arg_count_order; i++, tmp++)
   {
     memcpy(tmp, item->order[i], sizeof(ORDER));
@@ -3072,7 +3096,6 @@ Item_func_group_concat::Item_func_group_concat(THD *thd,
 }
 
 
-
 void Item_func_group_concat::cleanup()
 {
   DBUG_ENTER("Item_func_group_concat::cleanup");
diff --git a/sql/item_sum.h b/sql/item_sum.h
index 06a8c2c58a4..9e017aa98d5 100644
--- a/sql/item_sum.h
+++ b/sql/item_sum.h
@@ -366,6 +366,12 @@ protected:
   */
   Item **orig_args, *tmp_orig_args[2];
   table_map used_tables_cache;
+  
+  /*
+    TRUE <=> We've managed to calculate the value of this Item in
+    opt_sum_query(), hence it can be considered constant at all subsequent
+    steps.
+  */
   bool forced_const;
   static ulonglong ram_limitation(THD *thd);
 
@@ -418,6 +424,15 @@ public:
   virtual void fix_length_and_dec() { maybe_null=1; null_value=1; }
   virtual Item *result_item(Field *field)
     { return new Item_field(field); }
+  /*
+    Return bitmap of tables that are needed to evaluate the item.
+
+    The implementation takes into account the used strategy: items resolved
+    at optimization phase will report 0.
+    Items that depend on the number of join output records, but not columns
+    of any particular table (like COUNT(*)) will report 0 from used_tables(),
+    but will still return false from const_item().
+  */
   table_map used_tables() const { return used_tables_cache; }
   void update_used_tables ();
   bool is_null() { return null_value; }
@@ -510,6 +525,10 @@ public:
   virtual bool setup(THD *thd) { return false; }
 
   virtual void cleanup();
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name()); 
+  }
 };
 
 
@@ -786,6 +805,10 @@ public:
   }
   void fix_length_and_dec() {}
   enum Item_result result_type () const { return hybrid_type; }
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor("avg_field");
+  }
   const char *func_name() const { DBUG_ASSERT(0); return "avg_field"; }
 };
 
@@ -863,6 +886,10 @@ public:
   }
   void fix_length_and_dec() {}
   enum Item_result result_type () const { return hybrid_type; }
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor("var_field");
+  }
   const char *func_name() const { DBUG_ASSERT(0); return "variance_field"; }
 };
 
@@ -976,6 +1003,7 @@ protected:
   enum_field_types hybrid_field_type;
   int cmp_sign;
   bool was_values;  // Set if we have found at least one row (for max/min only)
+  bool was_null_value;
 
   public:
   Item_sum_hybrid(Item *item_par,int sign)
@@ -1007,6 +1035,7 @@ protected:
   void cleanup();
   bool any_value() { return was_values; }
   void no_rows_in_result();
+  void restore_to_before_no_rows_in_result();
   Field *create_tmp_field(bool group, TABLE *table,
 			  uint convert_blob_length);
   /*
diff --git a/sql/item_timefunc.cc b/sql/item_timefunc.cc
index 24cf4da0a95..6a5260ac235 100644
--- a/sql/item_timefunc.cc
+++ b/sql/item_timefunc.cc
@@ -559,8 +559,9 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
       %U,%u should be used with %Y and not %X or %x
     */
     if ((strict_week_number &&
-        (strict_week_number_year < 0 ||
-         strict_week_number_year_type != sunday_first_n_first_week_non_iso)) ||
+         (strict_week_number_year < 0 ||
+          strict_week_number_year_type !=
+          sunday_first_n_first_week_non_iso)) ||
         (!strict_week_number && strict_week_number_year >= 0))
       goto err;
 
@@ -775,11 +776,13 @@ bool make_date_time(DATE_TIME_FORMAT *format, MYSQL_TIME *l_time,
 	str->append(hours_i < 12 ? "AM" : "PM",2);
 	break;
       case 'r':
-	length= sprintf(intbuff, ((l_time->hour % 24) < 12) ?
-                        "%02d:%02d:%02d AM" : "%02d:%02d:%02d PM",
-		        (l_time->hour+11)%12+1,
-		        l_time->minute,
-		        l_time->second);
+	length= my_sprintf(intbuff, 
+		   (intbuff, 
+		    ((l_time->hour % 24) < 12) ?
+                    "%02d:%02d:%02d AM" : "%02d:%02d:%02d PM",
+		    (l_time->hour+11)%12+1,
+		    l_time->minute,
+		    l_time->second));
 	str->append(intbuff, length);
 	break;
       case 'S':
@@ -788,8 +791,12 @@ bool make_date_time(DATE_TIME_FORMAT *format, MYSQL_TIME *l_time,
 	str->append_with_prefill(intbuff, length, 2, '0');
 	break;
       case 'T':
-	length= sprintf(intbuff,  "%02d:%02d:%02d",
-                        l_time->hour, l_time->minute, l_time->second);
+	length= my_sprintf(intbuff, 
+		   (intbuff, 
+		    "%02d:%02d:%02d", 
+		    l_time->hour, 
+		    l_time->minute,
+		    l_time->second));
 	str->append(intbuff, length);
 	break;
       case 'U':
@@ -3051,7 +3058,7 @@ String *Item_func_maketime::val_str(String *str)
     char buf[28];
     char *ptr= longlong10_to_str(hour, buf, args[0]->unsigned_flag ? 10 : -10);
     int len = (int)(ptr - buf) +
-      sprintf(ptr, ":%02u:%02u", (uint) minute, (uint) second);
+      my_sprintf(ptr, (ptr, ":%02u:%02u", (uint)minute, (uint)second));
     make_truncated_value_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
                                  buf, len, MYSQL_TIMESTAMP_TIME,
                                  NullS);
diff --git a/sql/item_timefunc.h b/sql/item_timefunc.h
index 27dfd548f73..c4c8fe39933 100644
--- a/sql/item_timefunc.h
+++ b/sql/item_timefunc.h
@@ -75,6 +75,7 @@ public:
   enum_monotonicity_info get_monotonicity_info() const;
   longlong val_int_endpoint(bool left_endp, bool *incl_endp);
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 
@@ -118,6 +119,7 @@ public:
     maybe_null=1; 
   }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 
@@ -142,6 +144,7 @@ public:
     maybe_null= 1;
   }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 
@@ -155,6 +158,7 @@ public:
   enum Item_result result_type () const { return STRING_RESULT; }
   void fix_length_and_dec();
   bool check_partition_func_processor(uchar *int_arg) {return TRUE;}
+  bool check_vcol_func_processor(uchar *int_arg) {return FALSE;}
 };
 
 
@@ -171,6 +175,7 @@ public:
     maybe_null= 1;
   }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 
@@ -187,6 +192,7 @@ public:
     maybe_null=1;
   }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 
@@ -203,6 +209,7 @@ public:
     maybe_null=1;
   }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 
@@ -219,6 +226,7 @@ public:
      maybe_null=1;
   }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 
@@ -235,6 +243,7 @@ public:
     maybe_null=1;
   }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 
@@ -265,6 +274,7 @@ public:
     maybe_null=1;
   }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 
@@ -283,6 +293,7 @@ public:
     maybe_null=1;
   }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 
@@ -312,6 +323,7 @@ public:
     maybe_null= 1;
   }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 class Item_func_dayname :public Item_func_weekday
@@ -324,6 +336,7 @@ class Item_func_dayname :public Item_func_weekday
   enum Item_result result_type () const { return STRING_RESULT; }
   void fix_length_and_dec();
   bool check_partition_func_processor(uchar *int_arg) {return TRUE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 
@@ -350,6 +363,14 @@ public:
     decimals=0;
     max_length=10*MY_CHARSET_BIN_MB_MAXLEN;
   }
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    /*
+      TODO: Allow UNIX_TIMESTAMP called with an argument to be a part
+      of the expression for a virtual column
+    */
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 
@@ -365,6 +386,7 @@ public:
     max_length=10*MY_CHARSET_BIN_MB_MAXLEN;
   }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 
@@ -487,6 +509,10 @@ public:
   */
   virtual void store_now_in_TIME(MYSQL_TIME *now_time)=0;
   bool result_as_longlong() { return TRUE; }
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 
@@ -523,6 +549,10 @@ public:
   void fix_length_and_dec();
   bool get_date(MYSQL_TIME *res, uint fuzzy_date);
   virtual void store_now_in_TIME(MYSQL_TIME *now_time)=0;
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 
@@ -563,6 +593,10 @@ public:
   void fix_length_and_dec();
   bool get_date(MYSQL_TIME *res, uint fuzzy_date);
   virtual void store_now_in_TIME(MYSQL_TIME *now_time)=0;
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 
@@ -620,6 +654,7 @@ public:
   const char *func_name() const { return "from_days"; }
   bool get_date(MYSQL_TIME *res, uint fuzzy_date);
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 
@@ -745,6 +780,7 @@ class Item_extract :public Item_int_func
   bool eq(const Item *item, bool binary_cmp) const;
   virtual void print(String *str, enum_query_type query_type);
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 
@@ -988,6 +1024,7 @@ public:
     maybe_null=1;
   }
   bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
+  bool check_vcol_func_processor(uchar *int_arg) { return FALSE;}
 };
 
 
diff --git a/sql/item_xmlfunc.cc b/sql/item_xmlfunc.cc
index f124c37f0eb..792008ba35e 100644
--- a/sql/item_xmlfunc.cc
+++ b/sql/item_xmlfunc.cc
@@ -228,6 +228,11 @@ public:
     collation.collation= pxml->charset();
   }
   const char *func_name() const { return "nodeset"; }
+  bool check_vcol_func_processor(uchar *int_arg)
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
+
 };
 
 
@@ -534,6 +539,10 @@ public:
   enum Type type() const { return XPATH_NODESET_CMP; };
   const char *func_name() const { return "xpath_nodeset_to_const_comparator"; }
   bool is_bool_func() { return 1; }
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 
   longlong val_int()
   {
@@ -2040,8 +2049,8 @@ static int my_xpath_parse_OrExpr(MY_XPATH *xpath)
     Item *prev= xpath->item;
     if (!my_xpath_parse_AndExpr(xpath))
     {
-      return 0;
       xpath->error= 1;
+      return 0;
     }
     xpath->item= new Item_cond_or(nodeset2bool(xpath, prev),
                                   nodeset2bool(xpath, xpath->item));
diff --git a/sql/item_xmlfunc.h b/sql/item_xmlfunc.h
index 6373bde0aab..38dd0a5e1d1 100644
--- a/sql/item_xmlfunc.h
+++ b/sql/item_xmlfunc.h
@@ -43,6 +43,10 @@ public:
   }
   void fix_length_and_dec();
   String *parse_xml(String *raw_xml, String *parsed_xml_buf);
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor(func_name());
+  }
 };
 
 
diff --git a/sql/key.cc b/sql/key.cc
index 582334620ad..8c7eab2a688 100644
--- a/sql/key.cc
+++ b/sql/key.cc
@@ -281,8 +281,7 @@ bool key_cmp_if_same(TABLE *table,const uchar *key,uint idx,uint key_length)
       key++;
       store_length--;
     }
-    if (key_part->key_part_flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART |
-                                   HA_BIT_PART))
+    if (!(key_part->key_part_flag & HA_CAN_MEMCMP))
     {
       if (key_part->field->key_cmp(key, key_part->length))
 	return 1;
diff --git a/sql/lex.h b/sql/lex.h
index fbedddc6941..b1c23e81748 100644
--- a/sql/lex.h
+++ b/sql/lex.h
@@ -65,6 +65,7 @@ static SYMBOL symbols[] = {
   { "ALL",		SYM(ALL)},
   { "ALGORITHM",	SYM(ALGORITHM_SYM)},
   { "ALTER",		SYM(ALTER)},
+  { "ALWAYS",           SYM(ALWAYS_SYM)},
   { "ANALYZE",		SYM(ANALYZE_SYM)},
   { "AND",		SYM(AND_SYM)},
   { "ANY",              SYM(ANY_SYM)},
@@ -111,6 +112,7 @@ static SYMBOL symbols[] = {
   { "CIPHER",		SYM(CIPHER_SYM)},
   { "CLASS_ORIGIN",     SYM(CLASS_ORIGIN_SYM)},
   { "CLIENT",		SYM(CLIENT_SYM)},
+  { "CLIENT_STATISTICS", SYM(CLIENT_STATS_SYM)},
   { "CLOSE",		SYM(CLOSE_SYM)},
   { "COALESCE",		SYM(COALESCE)},
   { "CODE",             SYM(CODE_SYM)},
@@ -231,6 +233,7 @@ static SYMBOL symbols[] = {
   { "FULLTEXT",		SYM(FULLTEXT_SYM)},
   { "FUNCTION",		SYM(FUNCTION_SYM)},
   { "GENERAL",          SYM(GENERAL)},
+  { "GENERATED",        SYM(GENERATED_SYM)},
   { "GEOMETRY",		SYM(GEOMETRY_SYM)},
   { "GEOMETRYCOLLECTION",SYM(GEOMETRYCOLLECTION)},
   { "GET_FORMAT",       SYM(GET_FORMAT)},
@@ -257,6 +260,7 @@ static SYMBOL symbols[] = {
   { "IN",		SYM(IN_SYM)},
   { "INDEX",		SYM(INDEX_SYM)},
   { "INDEXES",		SYM(INDEXES)},
+  { "INDEX_STATISTICS",	SYM(INDEX_STATS_SYM)},
   { "INFILE",		SYM(INFILE)},
   { "INITIAL_SIZE",	SYM(INITIAL_SIZE_SYM)},
   { "INNER",		SYM(INNER_SYM)},
@@ -398,13 +402,16 @@ static SYMBOL symbols[] = {
   { "OUTFILE",		SYM(OUTFILE)},
   { "OWNER",		SYM(OWNER_SYM)},
   { "PACK_KEYS",	SYM(PACK_KEYS_SYM)},
-  { "PARSER",           SYM(PARSER_SYM)},
   { "PAGE",	        SYM(PAGE_SYM)},
+  { "PAGE_CHECKSUM",	SYM(PAGE_CHECKSUM_SYM)},
+  { "PARSER",           SYM(PARSER_SYM)},
+  { "PARSE_VCOL_EXPR",  SYM(PARSE_VCOL_EXPR_SYM)},
   { "PARTIAL",		SYM(PARTIAL)},
   { "PARTITION",        SYM(PARTITION_SYM)},
   { "PARTITIONING",     SYM(PARTITIONING_SYM)},
   { "PARTITIONS",       SYM(PARTITIONS_SYM)},
   { "PASSWORD",		SYM(PASSWORD)},
+  { "PERSISTENT",	SYM(PERSISTENT_SYM)},
   { "PHASE",            SYM(PHASE_SYM)},
   { "PLUGIN",           SYM(PLUGIN_SYM)},
   { "PLUGINS",          SYM(PLUGINS_SYM)},
@@ -545,6 +552,7 @@ static SYMBOL symbols[] = {
   { "TABLE_NAME",       SYM(TABLE_NAME_SYM)},
   { "TABLES",		SYM(TABLES)},
   { "TABLESPACE",	        SYM(TABLESPACE)},
+  { "TABLE_STATISTICS",	SYM(TABLE_STATS_SYM)},
   { "TABLE_CHECKSUM",	SYM(TABLE_CHECKSUM_SYM)},
   { "TEMPORARY",	SYM(TEMPORARY)},
   { "TEMPTABLE",	SYM(TEMPTABLE_SYM)},
@@ -562,6 +570,7 @@ static SYMBOL symbols[] = {
   { "TO",		SYM(TO_SYM)},
   { "TRAILING",		SYM(TRAILING)},
   { "TRANSACTION",	SYM(TRANSACTION_SYM)},
+  { "TRANSACTIONAL",	SYM(TRANSACTIONAL_SYM)},
   { "TRIGGER",          SYM(TRIGGER_SYM)},
   { "TRIGGERS",         SYM(TRIGGERS_SYM)},
   { "TRUE",		SYM(TRUE_SYM)},
@@ -587,6 +596,7 @@ static SYMBOL symbols[] = {
   { "USE",		SYM(USE_SYM)},
   { "USER",		SYM(USER)},
   { "USER_RESOURCES",	SYM(RESOURCES)},
+  { "USER_STATISTICS",	SYM(USER_STATS_SYM)},
   { "USE_FRM",		SYM(USE_FRM)},
   { "USING",		SYM(USING)},
   { "UTC_DATE",         SYM(UTC_DATE_SYM)},
@@ -599,13 +609,15 @@ static SYMBOL symbols[] = {
   { "VARCHARACTER",	SYM(VARCHAR)},
   { "VARIABLES",	SYM(VARIABLES)},
   { "VARYING",		SYM(VARYING)},
+  { "VIA",              SYM(VIA_SYM)},
+  { "VIEW",		SYM(VIEW_SYM)},
+  { "VIRTUAL",          SYM(VIRTUAL_SYM)},
   { "WAIT",		SYM(WAIT_SYM)},
   { "WARNINGS",		SYM(WARNINGS)},
   { "WEEK",		SYM(WEEK_SYM)},
   { "WHEN",		SYM(WHEN_SYM)},
   { "WHERE",		SYM(WHERE)},
   { "WHILE",            SYM(WHILE_SYM)},
-  { "VIEW",		SYM(VIEW_SYM)},
   { "WITH",		SYM(WITH)},
   { "WORK",		SYM(WORK_SYM)},
   { "WRAPPER",		SYM(WRAPPER_SYM)},
diff --git a/sql/lock.cc b/sql/lock.cc
index 0181a544824..66b59fa5466 100644
--- a/sql/lock.cc
+++ b/sql/lock.cc
@@ -34,8 +34,8 @@
       This is followed by a call to thr_multi_lock() for all tables.
 
   - When statement is done, we call mysql_unlock_tables().
-    This will call thr_multi_unlock() followed by
-    table_handler->external_lock(thd, F_UNLCK) for each table.
+    table_handler->external_lock(thd, F_UNLCK) followed by
+    thr_multi_unlock() for each table.
 
   - Note that mysql_unlock_tables() may be called several times as
     MySQL in some cases can free some tables earlier than others.
@@ -293,13 +293,13 @@ MYSQL_LOCK *mysql_lock_tables(THD *thd, TABLE **tables, uint count, uint flags)
   if (lock_tables_check(thd, tables, count, flags))
     DBUG_RETURN(NULL);
 
-  if (! (sql_lock= get_lock_data(thd, tables, count, GET_LOCK_STORE_LOCKS)))
+  if (! (sql_lock= get_lock_data(thd, tables, count, GET_LOCK_STORE_LOCKS)) ||
+        ! sql_lock->table_count)
     DBUG_RETURN(NULL);
 
   thd_proc_info(thd, "System lock");
   DBUG_PRINT("info", ("thd->proc_info %s", thd->proc_info));
-  if (sql_lock->table_count && lock_external(thd, sql_lock->table,
-                                             sql_lock->table_count))
+  if (lock_external(thd, sql_lock->table, sql_lock->table_count))
   {
     /* Clear the lock type of all lock data to avoid reusage. */
     reset_lock_data_and_free(&sql_lock);
@@ -316,8 +316,7 @@ MYSQL_LOCK *mysql_lock_tables(THD *thd, TABLE **tables, uint count, uint flags)
                                                    &thd->lock_info, timeout)];
   if (rc)
   {
-    if (sql_lock->table_count)
-      (void) unlock_external(thd, sql_lock->table, sql_lock->table_count);
+    (void) unlock_external(thd, sql_lock->table, sql_lock->table_count);
     reset_lock_data_and_free(&sql_lock);
     if (! thd->killed)
       my_error(rc, MYF(0));
@@ -383,7 +382,7 @@ void mysql_unlock_tables(THD *thd, MYSQL_LOCK *sql_lock)
   if (sql_lock->lock_count)
     thr_multi_unlock(sql_lock->locks,sql_lock->lock_count);
   if (sql_lock->table_count)
-    (void) unlock_external(thd,sql_lock->table,sql_lock->table_count);
+    unlock_external(thd,sql_lock->table,sql_lock->table_count);
   my_free(sql_lock);
   DBUG_VOID_RETURN;
 }
@@ -411,25 +410,8 @@ void mysql_unlock_read_tables(THD *thd, MYSQL_LOCK *sql_lock)
   uint i,found;
   DBUG_ENTER("mysql_unlock_read_tables");
 
-  /* Move all write locks first */
-  THR_LOCK_DATA **lock=sql_lock->locks;
-  for (i=found=0 ; i < sql_lock->lock_count ; i++)
-  {
-    if (sql_lock->locks[i]->type > TL_WRITE_ALLOW_WRITE)
-    {
-      swap_variables(THR_LOCK_DATA *, *lock, sql_lock->locks[i]);
-      lock++;
-      found++;
-    }
-  }
-  /* unlock the read locked tables */
-  if (i != found)
-  {
-    thr_multi_unlock(lock,i-found);
-    sql_lock->lock_count= found;
-  }
+  /* Call external lock for all tables to be unlocked */
 
-  /* Then do the same for the external locks */
   /* Move all write locked tables first */
   TABLE **table=sql_lock->table;
   for (i=found=0 ; i < sql_lock->table_count ; i++)
@@ -448,6 +430,27 @@ void mysql_unlock_read_tables(THD *thd, MYSQL_LOCK *sql_lock)
     (void) unlock_external(thd,table,i-found);
     sql_lock->table_count=found;
   }
+
+  /* Call thr_unlock() for all tables to be unlocked */
+
+  /* Move all write locks first */
+  THR_LOCK_DATA **lock=sql_lock->locks;
+  for (i=found=0 ; i < sql_lock->lock_count ; i++)
+  {
+    if (sql_lock->locks[i]->type >= TL_WRITE_ALLOW_WRITE)
+    {
+      swap_variables(THR_LOCK_DATA *, *lock, sql_lock->locks[i]);
+      lock++;
+      found++;
+    }
+  }
+  /* unlock the read locked tables */
+  if (i != found)
+  {
+    thr_multi_unlock(lock, i-found, 0);
+    sql_lock->lock_count= found;
+  }
+
   /* Fix the lock positions in TABLE */
   table= sql_lock->table;
   found= 0;
@@ -543,8 +546,21 @@ void mysql_lock_abort(THD *thd, TABLE *table, bool upgrade_lock)
 
   if ((locked= get_lock_data(thd, &table, 1, GET_LOCK_UNLOCK)))
   {
-    for (uint i=0; i < locked->lock_count; i++)
-      thr_abort_locks(locked->locks[i]->lock, upgrade_lock);
+    if (table->children_attached)
+    {
+      /*
+        Don't abort locks for underlying tables just because merge table
+        is deleted. Doing would cause anyone accessing these tables to
+        spin in open_table/close_table forever until lock is released.
+      */
+      thr_multi_unlock(locked->locks, locked->lock_count,
+                       THR_UNLOCK_UPDATE_STATUS);
+    }
+    else
+    {
+      for (uint i=0; i < locked->lock_count; i++)
+        thr_abort_locks(locked->locks[i]->lock, upgrade_lock);
+    }
     my_free(locked);
   }
   DBUG_VOID_RETURN;
@@ -583,21 +599,36 @@ bool mysql_lock_abort_for_thread(THD *thd, TABLE *table)
 }
 
 
+/**
+  Merge two thr_lock:s
+  mysql_lock_merge()
+
+  @param a	Original locks
+  @param b	New locks
+
+  @retval	New lock structure that contains a and b
+
+  @note
+  a and b are freed with my_free()
+*/
+
 MYSQL_LOCK *mysql_lock_merge(MYSQL_LOCK *a,MYSQL_LOCK *b)
 {
   MYSQL_LOCK *sql_lock;
   TABLE **table, **end_table;
   DBUG_ENTER("mysql_lock_merge");
+  DBUG_PRINT("enter", ("a->lock_count: %u  b->lock_count: %u",
+                       a->lock_count, b->lock_count));
 
   if (!(sql_lock= (MYSQL_LOCK*)
 	my_malloc(sizeof(*sql_lock)+
-		  sizeof(THR_LOCK_DATA*)*(a->lock_count+b->lock_count)+
+		  sizeof(THR_LOCK_DATA*)*((a->lock_count+b->lock_count)*2) +
 		  sizeof(TABLE*)*(a->table_count+b->table_count),MYF(MY_WME))))
     DBUG_RETURN(0);				// Fatal error
   sql_lock->lock_count=a->lock_count+b->lock_count;
   sql_lock->table_count=a->table_count+b->table_count;
   sql_lock->locks=(THR_LOCK_DATA**) (sql_lock+1);
-  sql_lock->table=(TABLE**) (sql_lock->locks+sql_lock->lock_count);
+  sql_lock->table=(TABLE**) (sql_lock->locks+sql_lock->lock_count*2);
   memcpy(sql_lock->locks,a->locks,a->lock_count*sizeof(*a->locks));
   memcpy(sql_lock->locks+a->lock_count,b->locks,
 	 b->lock_count*sizeof(*b->locks));
@@ -618,6 +649,18 @@ MYSQL_LOCK *mysql_lock_merge(MYSQL_LOCK *a,MYSQL_LOCK *b)
     (*table)->lock_data_start+= a->lock_count;
   }
 
+  /*
+    Ensure that locks of the same tables share same data structures if we
+    reopen a table that is already open. This can happen for example with
+    MERGE tables.
+  */
+
+  /* Copy the lock data array. thr_merge_lock() reorders its content */
+  memcpy(sql_lock->locks + sql_lock->lock_count, sql_lock->locks,
+         sql_lock->lock_count * sizeof(*sql_lock->locks));
+  thr_merge_locks(sql_lock->locks + sql_lock->lock_count,
+                  a->lock_count, b->lock_count);
+
   /* Delete old, not needed locks */
   my_free(a);
   my_free(b);
@@ -687,7 +730,7 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count,
 
   /*
     Allocating twice the number of pointers for lock data for use in
-    thr_mulit_lock(). This function reorders the lock data, but cannot
+    thr_multi_lock(). This function reorders the lock data, but cannot
     update the table values. So the second part of the array is copied
     from the first part immediately before calling thr_multi_lock().
   */
diff --git a/sql/log.cc b/sql/log.cc
index 0e15c3b8e79..114cb3236b6 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -483,6 +483,7 @@ bool Log_to_csv_event_handler::
   Open_tables_backup open_tables_backup;
   ulonglong save_thd_options;
   bool save_time_zone_used;
+  DBUG_ENTER("log_general");
 
   /*
     CSV uses TIME_to_timestamp() internally if table needs to be repaired
@@ -518,7 +519,7 @@ bool Log_to_csv_event_handler::
   need_close= TRUE;
 
   if (table->file->extra(HA_EXTRA_MARK_AS_LOG_TABLE) ||
-      table->file->ha_rnd_init(0))
+      table->file->ha_rnd_init_with_error(0))
     goto err;
 
   need_rnd_end= TRUE;
@@ -596,7 +597,7 @@ err:
 
   thd->variables.option_bits= save_thd_options;
   thd->time_zone_used= save_time_zone_used;
-  return result;
+  DBUG_RETURN(result);
 }
 
 
@@ -664,7 +665,7 @@ bool Log_to_csv_event_handler::
   need_close= TRUE;
 
   if (table->file->extra(HA_EXTRA_MARK_AS_LOG_TABLE) ||
-      table->file->ha_rnd_init(0))
+      table->file->ha_rnd_init_with_error(0))
     goto err;
 
   need_rnd_end= TRUE;
@@ -920,6 +921,13 @@ void Log_to_file_event_handler::flush()
     mysql_slow_log.reopen_file();
 }
 
+void Log_to_file_event_handler::flush_slow_log()
+{
+  /* reopen slow log file */
+  if (opt_slow_log)
+    mysql_slow_log.reopen_file();
+}
+
 /*
   Log error with all enabled log event handlers
 
@@ -1015,8 +1023,6 @@ void LOGGER::init_log_tables()
 
 bool LOGGER::flush_logs(THD *thd)
 {
-  int rc= 0;
-
   /*
     Now we lock logger, as nobody should be able to use logging routines while
     log tables are closed
@@ -1028,7 +1034,25 @@ bool LOGGER::flush_logs(THD *thd)
 
   /* end of log flush */
   logger.unlock();
-  return rc;
+  return 0;
+}
+
+
+#error remove percona's flush log implementation
+bool LOGGER::flush_slow_log(THD *thd)
+{
+  /*
+    Now we lock logger, as nobody should be able to use logging routines while
+    log tables are closed
+  */
+  logger.lock_exclusive();
+
+  /* reopen log files */
+  file_log_handler->flush_slow_log();
+
+  /* end of log flush */
+  logger.unlock();
+  return 0;
 }
 
 
@@ -1131,7 +1155,7 @@ bool LOGGER::slow_log_print(THD *thd, const char *query, uint query_length,
     /* fill in user_host value: the format is "%s[%s] @ %s [%s]" */
     user_host_len= (strxnmov(user_host_buff, MAX_USER_HOST_SIZE,
                              sctx->priv_user ? sctx->priv_user : "", "[",
-                             sctx->user ? sctx->user : "", "] @ ",
+                             sctx->user ? sctx->user : (thd->slave_thread ? "SQL_SLAVE" : ""), "] @ ",
                              sctx->host ? sctx->host : "", " [",
                              sctx->ip ? sctx->ip : "", "]", NullS) -
                     user_host_buff);
@@ -1154,6 +1178,17 @@ bool LOGGER::slow_log_print(THD *thd, const char *query, uint query_length,
       query_length= command_name[thd->command].length;
     }
 
+    if (!query_length) 
+    {
+      /*
+        Not a real query; Reset counts for slow query logging
+        (QQ: Wonder if this is really needed)
+      */
+      thd->sent_row_count= thd->examined_row_count= 0;
+      thd->query_plan_flags= QPLAN_INIT;
+      thd->query_plan_fsort_passes= 0;
+    }
+
     for (current_handler= slow_log_handler_list; *current_handler ;)
       error= (*current_handler++)->log_slow(thd, current_time, thd->start_time,
                                             user_host_buff, user_host_len,
@@ -1351,6 +1386,7 @@ void LOGGER::deactivate_log_handler(THD *thd, uint log_type)
 {
   my_bool *tmp_opt= 0;
   MYSQL_LOG *file_log;
+  LINT_INIT(file_log);
 
   switch (log_type) {
   case QUERY_LOG_SLOW:
@@ -1874,6 +1910,7 @@ bool MYSQL_BIN_LOG::check_write_error(THD *thd)
   DBUG_RETURN(checked);
 }
 
+
 /**
   @note
   How do we handle this (unlikely but legal) case:
@@ -1947,17 +1984,17 @@ static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv)
 
 int check_binlog_magic(IO_CACHE* log, const char** errmsg)
 {
-  char magic[4];
+  uchar magic[4];
   DBUG_ASSERT(my_b_tell(log) == 0);
 
-  if (my_b_read(log, (uchar*) magic, sizeof(magic)))
+  if (my_b_read(log, magic, sizeof(magic)))
   {
     *errmsg = "I/O error reading the header from the binary log";
     sql_print_error("%s, errno=%d, io cache code=%d", *errmsg, my_errno,
 		    log->error);
     return 1;
   }
-  if (memcmp(magic, BINLOG_MAGIC, sizeof(magic)))
+  if (bcmp(magic, BINLOG_MAGIC, sizeof(magic)))
   {
     *errmsg = "Binlog has bad magic number;  It's not a binary log file that can be used by this version of MySQL";
     return 1;
@@ -2577,19 +2614,39 @@ bool MYSQL_QUERY_LOG::write(THD *thd, time_t current_time,
       if (my_b_write(&log_file, (uchar*) "\n", 1))
         tmp_errno= errno;
     }
+    
     /* For slow query log */
     sprintf(query_time_buff, "%.6f", ulonglong2double(query_utime)/1000000.0);
     sprintf(lock_time_buff,  "%.6f", ulonglong2double(lock_utime)/1000000.0);
     if (my_b_printf(&log_file,
-                    "# Query_time: %s  Lock_time: %s"
-                    " Rows_sent: %lu  Rows_examined: %lu\n",
+                    "# Thread_id: %lu  Schema: %s  QC_hit: %s\n" \
+                    "# Query_time: %s  Lock_time: %s  Rows_sent: %lu  Rows_examined: %lu\n",
+                    (ulong) thd->thread_id, (thd->db ? thd->db : ""),
+                    ((thd->query_plan_flags & QPLAN_QC) ? "Yes" : "No"),
                     query_time_buff, lock_time_buff,
                     (ulong) thd->sent_row_count,
-                    (ulong) thd->examined_row_count) == (uint) -1)
+                    (ulong) thd->examined_row_count) == (size_t) -1)
       tmp_errno= errno;
+     if ((thd->variables.log_slow_verbosity & LOG_SLOW_VERBOSITY_QUERY_PLAN) &&
+         (thd->query_plan_flags &
+          (QPLAN_FULL_SCAN | QPLAN_FULL_JOIN | QPLAN_TMP_TABLE |
+           QPLAN_TMP_DISK | QPLAN_FILESORT | QPLAN_FILESORT_DISK)) &&
+         my_b_printf(&log_file,
+                     "# Full_scan: %s  Full_join: %s  "
+                     "Tmp_table: %s  Tmp_table_on_disk: %s\n"
+                     "# Filesort: %s  Filesort_on_disk: %s  Merge_passes: %lu\n",
+                     ((thd->query_plan_flags & QPLAN_FULL_SCAN) ? "Yes" : "No"),
+                     ((thd->query_plan_flags & QPLAN_FULL_JOIN) ? "Yes" : "No"),
+                     ((thd->query_plan_flags & QPLAN_TMP_TABLE) ? "Yes" : "No"),
+                     ((thd->query_plan_flags & QPLAN_TMP_DISK) ? "Yes" : "No"),
+                     ((thd->query_plan_flags & QPLAN_FILESORT) ? "Yes" : "No"),
+                     ((thd->query_plan_flags & QPLAN_FILESORT_DISK) ?
+                      "Yes" : "No"),
+                     thd->query_plan_fsort_passes) == (size_t) -1)
+       tmp_errno= errno;
     if (thd->db && strcmp(thd->db, db))
     {						// Database changed
-      if (my_b_printf(&log_file,"use %s;\n",thd->db) == (uint) -1)
+      if (my_b_printf(&log_file,"use %s;\n",thd->db) == (size_t) -1)
         tmp_errno= errno;
       strmov(db,thd->db);
     }
@@ -2881,7 +2938,7 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
 	an extension for the binary log files.
 	In this case we write a standard header to it.
       */
-      if (my_b_safe_write(&log_file, (uchar*) BINLOG_MAGIC,
+      if (my_b_safe_write(&log_file, BINLOG_MAGIC,
 			  BIN_LOG_HEADER_SIZE))
         goto err;
       bytes_written+= BIN_LOG_HEADER_SIZE;
@@ -4675,6 +4732,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
   */
   if (likely(is_open()))
   {
+    my_off_t my_org_b_tell;
 #ifdef HAVE_REPLICATION
     /*
       In the future we need to add to the following if tests like
@@ -4682,7 +4740,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
       binlog_[wild_]{do|ignore}_table?" (WL#1049)"
     */
     const char *local_db= event_info->get_db();
-    if ((thd && !(thd->variables.option_bits & OPTION_BIN_LOG)) ||
+    if ((!(thd->variables.option_bits & OPTION_BIN_LOG)) ||
 	(thd->lex->sql_command != SQLCOM_ROLLBACK_TO_SAVEPOINT &&
          thd->lex->sql_command != SQLCOM_SAVEPOINT &&
          !binlog_filter->db_ok(local_db)))
@@ -4690,6 +4748,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
 #endif /* HAVE_REPLICATION */
 
     IO_CACHE *file= NULL;
+    my_org_b_tell= my_b_tell(file);
 
     if (event_info->use_direct_logging())
     {
@@ -4723,7 +4782,6 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
        of the SQL command. If row-based binlogging, Insert_id, Rand
        and other kind of "setting context" events are not needed.
     */
-    if (thd)
     {
       if (!thd->is_current_stmt_binlog_format_row())
       {
@@ -4784,6 +4842,9 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
         DBUG_EVALUATE_IF("injecting_fault_writing", 1, 0))
       goto err;
 
+    status_var_add(thd->status_var.binlog_bytes_written,
+                   my_b_tell(file) - my_org_b_tell);
+
     error= 0;
 
 err:
@@ -4937,6 +4998,7 @@ uint MYSQL_BIN_LOG::next_file_id()
 
   SYNOPSIS
     write_cache()
+    thd      Current_thread
     cache    Cache to write to the binary log
     lock_log True if the LOCK_log mutex should be aquired, false otherwise
     sync_log True if the log should be flushed and synced
@@ -4946,7 +5008,8 @@ uint MYSQL_BIN_LOG::next_file_id()
     be reset as a READ_CACHE to be able to read the contents from it.
  */
 
-int MYSQL_BIN_LOG::write_cache(IO_CACHE *cache, bool lock_log, bool sync_log)
+int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache, bool lock_log,
+                               bool sync_log)
 {
   Mutex_sentry sentry(lock_log ? &LOCK_log : NULL);
 
@@ -4993,6 +5056,7 @@ int MYSQL_BIN_LOG::write_cache(IO_CACHE *cache, bool lock_log, bool sync_log)
       /* write the first half of the split header */
       if (my_b_write(&log_file, header, carry))
         return ER_ERROR_ON_WRITE;
+      status_var_add(thd->status_var.binlog_bytes_written, carry);
 
       /*
         copy fixed second half of header to cache so the correct
@@ -5061,6 +5125,8 @@ int MYSQL_BIN_LOG::write_cache(IO_CACHE *cache, bool lock_log, bool sync_log)
     /* Write data to the binary log file */
     if (my_b_write(&log_file, cache->read_pos, length))
       return ER_ERROR_ON_WRITE;
+    status_var_add(thd->status_var.binlog_bytes_written, length);
+
     cache->read_pos=cache->read_end;		// Mark buffer used up
   } while ((length= my_b_fill(cache)));
 
@@ -5112,6 +5178,7 @@ bool MYSQL_BIN_LOG::write_incident(THD *thd, bool lock)
   if (lock)
     mysql_mutex_lock(&LOCK_log);
   error= ev.write(&log_file);
+  status_var_add(thd->status_var.binlog_bytes_written, ev.data_written);
   if (lock)
   {
     if (!error && !(error= flush_and_sync(0)))
@@ -5171,20 +5238,29 @@ bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event,
       Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE, FALSE, TRUE, 0);
       if (qinfo.write(&log_file))
         goto err;
+
+      status_var_add(thd->status_var.binlog_bytes_written, qinfo.data_written);
+
       DBUG_EXECUTE_IF("crash_before_writing_xid",
                       {
-                        if ((write_error= write_cache(cache, false, true)))
+                        if ((write_error= write_cache(thd, cache, FALSE,
+                                                      TRUE)))
                           DBUG_PRINT("info", ("error writing binlog cache: %d",
                                                write_error));
                         DBUG_PRINT("info", ("crashing before writing xid"));
                         DBUG_ABORT();
                       });
 
-      if ((write_error= write_cache(cache, false, false)))
+      if ((write_error= write_cache(thd, cache, FALSE, FALSE)))
         goto err;
 
-      if (commit_event && commit_event->write(&log_file))
-        goto err;
+      if (commit_event)
+      {
+        if (commit_event->write(&log_file))
+          goto err;
+        status_var_add(thd->status_var.binlog_bytes_written,
+                       commit_event->data_written);
+      }
 
       if (incident && write_incident(thd, FALSE))
         goto err;
@@ -5704,39 +5780,39 @@ void sql_print_information(const char *format, ...)
 /********* transaction coordinator log for 2pc - mmap() based solution *******/
 
 /*
-  the log consists of a file, mmapped to a memory.
-  file is divided on pages of tc_log_page_size size.
-  (usable size of the first page is smaller because of log header)
-  there's PAGE control structure for each page
-  each page (or rather PAGE control structure) can be in one of three
-  states - active, syncing, pool.
-  there could be only one page in active or syncing states,
-  but many in pool - pool is fifo queue.
-  usual lifecycle of a page is pool->active->syncing->pool
-  "active" page - is a page where new xid's are logged.
-  the page stays active as long as syncing slot is taken.
-  "syncing" page is being synced to disk. no new xid can be added to it.
-  when the sync is done the page is moved to a pool and an active page
+  the log consists of a file, mapped to memory.
+  file is divided into pages of tc_log_page_size size.
+  (usable size of the first page is smaller because of the log header)
+  there is a PAGE control structure for each page
+  each page (or rather its PAGE control structure) can be in one of
+  the three states - active, syncing, pool.
+  there could be only one page in the active or syncing state,
+  but many in pool - pool is a fifo queue.
+  the usual lifecycle of a page is pool->active->syncing->pool.
+  the "active" page is a page where new xid's are logged.
+  the page stays active as long as the syncing slot is taken.
+  the "syncing" page is being synced to disk. no new xid can be added to it.
+  when the syncing is done the page is moved to a pool and an active page
   becomes "syncing".
 
   the result of such an architecture is a natural "commit grouping" -
   If commits are coming faster than the system can sync, they do not
-  stall. Instead, all commit that came since the last sync are
-  logged to the same page, and they all are synced with the next -
+  stall. Instead, all commits that came since the last sync are
+  logged to the same "active" page, and they all are synced with the next -
   one - sync. Thus, thought individual commits are delayed, throughput
   is not decreasing.
 
-  when a xid is added to an active page, the thread of this xid waits
+  when an xid is added to an active page, the thread of this xid waits
   for a page's condition until the page is synced. when syncing slot
   becomes vacant one of these waiters is awaken to take care of syncing.
   it syncs the page and signals all waiters that the page is synced.
   PAGE::waiters is used to count these waiters, and a page may never
   become active again until waiters==0 (that is all waiters from the
-  previous sync have noticed the sync was completed)
+  previous sync have noticed that the sync was completed)
 
   note, that the page becomes "dirty" and has to be synced only when a
   new xid is added into it. Removing a xid from a page does not make it
-  dirty - we don't sync removals to disk.
+  dirty - we don't sync xid removals to disk.
 */
 
 ulong tc_log_page_waits= 0;
@@ -5745,7 +5821,7 @@ ulong tc_log_page_waits= 0;
 
 #define TC_LOG_HEADER_SIZE (sizeof(tc_log_magic)+1)
 
-static const char tc_log_magic[]={(char) 254, 0x23, 0x05, 0x74};
+static const uchar tc_log_magic[]={(uchar) 254, 0x23, 0x05, 0x74};
 
 ulong opt_tc_log_size= TC_LOG_MIN_SIZE;
 ulong tc_log_max_pages_used=0, tc_log_page_size=0, tc_log_cur_pages_used=0;
@@ -5803,7 +5879,8 @@ int TC_LOG_MMAP::open(const char *opt_name)
   inited=2;
 
   npages=(uint)file_length/tc_log_page_size;
-  DBUG_ASSERT(npages >= 3);             // to guarantee non-empty pool
+  if (npages < 3)             // to guarantee non-empty pool
+    goto err;
   if (!(pages=(PAGE *)my_malloc(npages*sizeof(PAGE), MYF(MY_WME|MY_ZEROFILL))))
     goto err;
   inited=3;
@@ -5814,9 +5891,9 @@ int TC_LOG_MMAP::open(const char *opt_name)
     pg->state=POOL;
     mysql_mutex_init(key_PAGE_lock, &pg->lock, MY_MUTEX_INIT_FAST);
     mysql_cond_init(key_PAGE_cond, &pg->cond, 0);
-    pg->start=(my_xid *)(data + i*tc_log_page_size);
-    pg->end=(my_xid *)(pg->start + tc_log_page_size);
+    pg->ptr= pg->start=(my_xid *)(data + i*tc_log_page_size);
     pg->size=pg->free=tc_log_page_size/sizeof(my_xid);
+    pg->end=pg->start + pg->size;
   }
   pages[0].size=pages[0].free=
                 (tc_log_page_size-TC_LOG_HEADER_SIZE)/sizeof(my_xid);
@@ -5860,7 +5937,7 @@ err:
     -# if there're waiters - take the one with the most free space.
 
   @todo
-    TODO page merging. try to allocate adjacent page first,
+    page merging. try to allocate adjacent page first,
     so that they can be flushed both in one sync
 */
 
@@ -5869,8 +5946,7 @@ void TC_LOG_MMAP::get_active_from_pool()
   PAGE **p, **best_p=0;
   int best_free;
 
-  if (syncing)
-    mysql_mutex_lock(&LOCK_pool);
+  mysql_mutex_lock(&LOCK_pool);
 
   do
   {
@@ -5890,20 +5966,21 @@ void TC_LOG_MMAP::get_active_from_pool()
   }
   while ((*best_p == 0 || best_free == 0) && overflow());
 
+  safe_mutex_assert_owner(&LOCK_active);
   active=*best_p;
-  if (active->free == active->size) // we've chosen an empty page
-  {
-    tc_log_cur_pages_used++;
-    set_if_bigger(tc_log_max_pages_used, tc_log_cur_pages_used);
-  }
 
   if ((*best_p)->next)              // unlink the page from the pool
     *best_p=(*best_p)->next;
   else
     pool_last=*best_p;
+  mysql_mutex_unlock(&LOCK_pool);
 
-  if (syncing)
-    mysql_mutex_unlock(&LOCK_pool);
+  mysql_mutex_lock(&active->lock);
+  if (active->free == active->size) // we've chosen an empty page
+  {
+    tc_log_cur_pages_used++;
+    set_if_bigger(tc_log_max_pages_used, tc_log_cur_pages_used);
+  }
 }
 
 /**
@@ -5958,7 +6035,7 @@ int TC_LOG_MMAP::log_xid(THD *thd, my_xid xid)
   mysql_mutex_lock(&LOCK_active);
 
   /*
-    if active page is full - just wait...
+    if the active page is full - just wait...
     frankly speaking, active->free here accessed outside of mutex
     protection, but it's safe, because it only means we may miss an
     unlog() for the active page, and we're not waiting for it here -
@@ -5970,9 +6047,17 @@ int TC_LOG_MMAP::log_xid(THD *thd, my_xid xid)
   /* no active page ? take one from the pool */
   if (active == 0)
     get_active_from_pool();
+  else
+    mysql_mutex_lock(&active->lock);
 
   p=active;
-  mysql_mutex_lock(&p->lock);
+
+  /*
+    p->free is always > 0 here because to decrease it one needs
+    to take p->lock and before it one needs to take LOCK_active.
+    But checked that active->free > 0 under LOCK_active and
+    haven't release it ever since
+  */
 
   /* searching for an empty slot */
   while (*p->ptr)
@@ -5986,38 +6071,51 @@ int TC_LOG_MMAP::log_xid(THD *thd, my_xid xid)
   *p->ptr++= xid;
   p->free--;
   p->state= DIRTY;
-
-  /* to sync or not to sync - this is the question */
-  mysql_mutex_unlock(&LOCK_active);
-  mysql_mutex_lock(&LOCK_sync);
   mysql_mutex_unlock(&p->lock);
 
+  mysql_mutex_lock(&LOCK_sync);
   if (syncing)
   {                                          // somebody's syncing. let's wait
+    mysql_mutex_unlock(&LOCK_active);
+    mysql_mutex_lock(&p->lock);
     p->waiters++;
-    /*
-      note - it must be while (), not do ... while () here
-      as p->state may be not DIRTY when we come here
-    */
-    while (p->state == DIRTY && syncing)
+    for (;;)
+    {
+      int not_dirty = p->state != DIRTY;
+      mysql_mutex_unlock(&p->lock);
+      if (not_dirty || !syncing)
+        break;
       mysql_cond_wait(&p->cond, &LOCK_sync);
+      mysql_mutex_lock(&p->lock);
+    }
     p->waiters--;
     err= p->state == ERROR;
     if (p->state != DIRTY)                   // page was synced
     {
-      if (p->waiters == 0)
-        mysql_cond_signal(&COND_pool);       // in case somebody's waiting
       mysql_mutex_unlock(&LOCK_sync);
+      if (p->waiters == 0)
+        mysql_cond_signal(&COND_pool);     // in case somebody's waiting
+      mysql_mutex_unlock(&p->lock);
       goto done;                             // we're done
     }
-  }                                          // page was not synced! do it now
-  DBUG_ASSERT(active == p && syncing == 0);
-  mysql_mutex_lock(&LOCK_active);
-  syncing=p;                                 // place is vacant - take it
-  active=0;                                  // page is not active anymore
-  mysql_cond_broadcast(&COND_active);        // in case somebody's waiting
-  mysql_mutex_unlock(&LOCK_active);
-  mysql_mutex_unlock(&LOCK_sync);
+    DBUG_ASSERT(!syncing);
+    mysql_mutex_unlock(&p->lock);
+    syncing = p;
+    mysql_mutex_unlock(&LOCK_sync);
+
+    mysql_mutex_lock(&LOCK_active);
+    active=0;                                  // page is not active anymore
+    mysql_cond_broadcast(&COND_active);
+    mysql_mutex_unlock(&LOCK_active);
+  }
+  else
+  {
+    syncing = p;                               // place is vacant - take it
+    mysql_mutex_unlock(&LOCK_sync);
+    active = 0;                                // page is not active anymore
+    mysql_cond_broadcast(&COND_active);
+    mysql_mutex_unlock(&LOCK_active);
+  }
   err= sync();
 
 done:
@@ -6034,7 +6132,7 @@ int TC_LOG_MMAP::sync()
     sit down and relax - this can take a while...
     note - no locks are held at this point
   */
-  err= my_msync(fd, syncing->start, 1, MS_SYNC);
+  err= my_msync(fd, syncing->start, syncing->size * sizeof(my_xid), MS_SYNC);
 
   /* page is synced. let's move it to the pool */
   mysql_mutex_lock(&LOCK_pool);
@@ -6042,14 +6140,23 @@ int TC_LOG_MMAP::sync()
   pool_last=syncing;
   syncing->next=0;
   syncing->state= err ? ERROR : POOL;
-  mysql_cond_broadcast(&syncing->cond);      // signal "sync done"
-  mysql_cond_signal(&COND_pool);             // in case somebody's waiting
+  mysql_cond_signal(&COND_pool);           // in case somebody's waiting
   mysql_mutex_unlock(&LOCK_pool);
 
   /* marking 'syncing' slot free */
   mysql_mutex_lock(&LOCK_sync);
+  mysql_cond_broadcast(&syncing->cond);    // signal "sync done"
   syncing=0;
-  mysql_cond_signal(&active->cond);        // wake up a new syncer
+  /*
+    we check the "active" pointer without LOCK_active. Still, it's safe -
+    "active" can change from NULL to not NULL any time, but it
+    will take LOCK_sync before waiting on active->cond. That is, it can never
+    miss a signal.
+    And "active" can change to NULL only by the syncing thread
+    (the thread that will send a signal below)
+  */
+  if (active)
+    mysql_cond_signal(&active->cond);      // wake up a new syncer
   mysql_mutex_unlock(&LOCK_sync);
   return err;
 }
@@ -6066,13 +6173,13 @@ void TC_LOG_MMAP::unlog(ulong cookie, my_xid xid)
 
   DBUG_ASSERT(*x == xid);
   DBUG_ASSERT(x >= p->start && x < p->end);
-  *x=0;
 
   mysql_mutex_lock(&p->lock);
+  *x=0;
   p->free++;
   DBUG_ASSERT(p->free <= p->size);
   set_if_smaller(p->ptr, x);
-  if (p->free == p->size)               // the page is completely empty
+  if (p->free == p->size)              // the page is completely empty
     statistic_decrement(tc_log_cur_pages_used, &LOCK_status);
   if (p->waiters == 0)                 // the page is in pool and ready to rock
     mysql_cond_signal(&COND_pool);     // ping ... for overflow()
@@ -6115,7 +6222,7 @@ int TC_LOG_MMAP::recover()
   HASH xids;
   PAGE *p=pages, *end_p=pages+npages;
 
-  if (memcmp(data, tc_log_magic, sizeof(tc_log_magic)))
+  if (bcmp(data, tc_log_magic, sizeof(tc_log_magic)))
   {
     sql_print_error("Bad magic header in tc log");
     goto err1;
@@ -6420,3 +6527,20 @@ mysql_declare_plugin(binlog)
   NULL                        /* config options                  */
 }
 mysql_declare_plugin_end;
+maria_declare_plugin(binlog)
+{
+  MYSQL_STORAGE_ENGINE_PLUGIN,
+  &binlog_storage_engine,
+  "binlog",
+  "MySQL AB",
+  "This is a pseudo storage engine to represent the binlog in a transaction",
+  PLUGIN_LICENSE_GPL,
+  binlog_init, /* Plugin Init */
+  NULL, /* Plugin Deinit */
+  0x0100 /* 1.0 */,
+  NULL,                       /* status variables                */
+  NULL,                       /* system variables                */
+  "1.0",                      /* string version */
+  MariaDB_PLUGIN_MATURITY_STABLE /* maturity */
+}
+maria_declare_plugin_end;
diff --git a/sql/log.h b/sql/log.h
index 5df05e7ff90..aeecc80a928 100644
--- a/sql/log.h
+++ b/sql/log.h
@@ -396,9 +396,9 @@ public:
 
   bool write(Log_event* event_info); // binary log write
   bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event, bool incident);
-
   bool write_incident(THD *thd, bool lock);
-  int  write_cache(IO_CACHE *cache, bool lock_log, bool flush_and_sync);
+  int  write_cache(THD *thd, IO_CACHE *cache, bool lock_log,
+                   bool flush_and_sync);
   void set_write_error(THD *thd);
   bool check_write_error(THD *thd);
 
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 16290c58685..d45fa1e1ba7 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -840,10 +840,9 @@ Log_event::do_shall_skip(Relay_log_info *rli)
   if ((server_id == ::server_id && !rli->replicate_same_server_id) ||
       (rli->slave_skip_counter == 1 && rli->is_in_group()))
     return EVENT_SKIP_IGNORE;
-  else if (rli->slave_skip_counter > 0)
+  if (rli->slave_skip_counter > 0)
     return EVENT_SKIP_COUNT;
-  else
-    return EVENT_SKIP_NOT;
+  return EVENT_SKIP_NOT;
 }
 
 
@@ -1140,7 +1139,7 @@ failed my_b_read"));
     goto err;
   }
   if ((res= read_log_event(buf, data_len, &error, description_event)))
-    res->register_temp_buf(buf);
+    res->register_temp_buf(buf, TRUE);
 
 err:
   UNLOCK_MUTEX;
@@ -1655,11 +1654,11 @@ log_event_print_value(IO_CACHE *file, const uchar *ptr,
       int i, end;
       char buff[512], *pos;
       pos= buff;
-      pos+= sprintf(buff, "%s", dec.sign() ? "-" : "");
+      pos+= my_sprintf(buff, (buff, "%s", dec.sign() ? "-" : ""));
       end= ROUND_UP(dec.frac) + ROUND_UP(dec.intg)-1;
       for (i=0; i < end; i++)
-        pos+= sprintf(pos, "%09d.", dec.buf[i]);
-      pos+= sprintf(pos, "%09d", dec.buf[i]);
+        pos+= my_sprintf(pos, (pos, "%09d.", dec.buf[i]));
+      pos+= my_sprintf(pos, (pos, "%09d", dec.buf[i]));
       my_b_printf(file, "%s", buff);
       my_snprintf(typestr, typestr_length, "DECIMAL(%d,%d)",
                   precision, decimals);
@@ -1708,13 +1707,13 @@ log_event_print_value(IO_CACHE *file, const uchar *ptr,
 
   case MYSQL_TYPE_DATETIME:
     {
-      size_t d, t;
+      ulong d, t;
       uint64 i64= uint8korr(ptr); /* YYYYMMDDhhmmss */
-      d= i64 / 1000000;
-      t= i64 % 1000000;
+      d= (ulong) (i64 / 1000000);
+      t= (ulong) (i64 % 1000000);
       my_b_printf(file, "%04d-%02d-%02d %02d:%02d:%02d",
-                  d / 10000, (d % 10000) / 100, d % 100,
-                  t / 10000, (t % 10000) / 100, t % 100);
+                  (int) (d / 10000), (int) (d % 10000) / 100, (int) (d % 100),
+                  (int) (t / 10000), (int) (t % 10000) / 100, (int) t % 100);
       my_snprintf(typestr, typestr_length, "DATETIME");
       return 8;
     }
@@ -2165,7 +2164,7 @@ static void write_str_with_code_and_len(uchar **dst, const char *src,
   */
   DBUG_ASSERT(len <= 255);
   DBUG_ASSERT(src);
-  *((*dst)++)= code;
+  *((*dst)++)= (uchar) code;
   *((*dst)++)= (uchar) len;
   bmove(*dst, src, len);
   (*dst)+= len;
@@ -4755,7 +4754,7 @@ int Load_log_event::do_apply_event(NET* net, Relay_log_info const *rli,
   */
   lex_start(thd);
   thd->lex->local_file= local_fname;
-  mysql_reset_thd_for_next_command(thd);
+  mysql_reset_thd_for_next_command(thd, 0);
 
   if (!use_rli_only_for_errors)
   {
@@ -5628,8 +5627,9 @@ void User_var_log_event::pack_info(Protocol* protocol)
       }
       break;
     case ROW_RESULT:
+    case IMPOSSIBLE_RESULT:
     default:
-      DBUG_ASSERT(1);
+      DBUG_ASSERT(0);
       return;
     }
   }
@@ -5743,8 +5743,9 @@ bool User_var_log_event::write(IO_CACHE* file)
       pos= (uchar*) val;
       break;
     case ROW_RESULT:
+    case IMPOSSIBLE_RESULT:
     default:
-      DBUG_ASSERT(1);
+      DBUG_ASSERT(0);
       return 0;
     }
     int4store(buf1 + 2 + UV_CHARSET_NUMBER_SIZE, val_len);
@@ -5864,7 +5865,7 @@ void User_var_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info)
       break;
     case ROW_RESULT:
     default:
-      DBUG_ASSERT(1);
+      DBUG_ASSERT(0);
       return;
     }
   }
@@ -5926,8 +5927,9 @@ int User_var_log_event::do_apply_event(Relay_log_info const *rli)
       it= new Item_string(val, val_len, charset);
       break;
     case ROW_RESULT:
+    case IMPOSSIBLE_RESULT:
     default:
-      DBUG_ASSERT(1);
+      DBUG_ASSERT(0);
       return 0;
     }
   }
@@ -6368,7 +6370,7 @@ void Create_file_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info
 #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
 void Create_file_log_event::pack_info(Protocol *protocol)
 {
-  char buf[NAME_LEN*2 + 30 + 21*2], *pos;
+  char buf[SAFE_NAME_LEN*2 + 30 + 21*2], *pos;
   pos= strmov(buf, "db=");
   memcpy(pos, db, db_len);
   pos= strmov(pos + db_len, ";table=");
@@ -6554,9 +6556,10 @@ void Append_block_log_event::print(FILE* file,
 void Append_block_log_event::pack_info(Protocol *protocol)
 {
   char buf[256];
-  size_t length;
-  length= my_snprintf(buf, sizeof(buf), ";file_id=%u;block_len=%u",
-                      file_id, block_len);
+  uint length;
+  length= (uint) my_sprintf(buf,
+			    (buf, ";file_id=%u;block_len=%u", file_id,
+			     block_len));
   protocol->store(buf, length, &my_charset_bin);
 }
 
@@ -6591,7 +6594,7 @@ int Append_block_log_event::do_apply_event(Relay_log_info const *rli)
       as the present method does not call mysql_parse().
     */
     lex_start(thd);
-    mysql_reset_thd_for_next_command(thd);
+    mysql_reset_thd_for_next_command(thd, 0);
     /* old copy may exist already */
     mysql_file_delete(key_file_log_event_data, fname, MYF(0));
     if ((fd= mysql_file_create(key_file_log_event_data,
@@ -6711,9 +6714,9 @@ void Delete_file_log_event::print(FILE* file,
 void Delete_file_log_event::pack_info(Protocol *protocol)
 {
   char buf[64];
-  size_t length;
-  length= my_snprintf(buf, sizeof(buf), ";file_id=%u", (uint) file_id);
-  protocol->store(buf, length, &my_charset_bin);
+  uint length;
+  length= (uint) my_sprintf(buf, (buf, ";file_id=%u", (uint) file_id));
+  protocol->store(buf, (int32) length, &my_charset_bin);
 }
 #endif
 
@@ -6809,9 +6812,9 @@ void Execute_load_log_event::print(FILE* file,
 void Execute_load_log_event::pack_info(Protocol *protocol)
 {
   char buf[64];
-  size_t length;
-  length= my_snprintf(buf, sizeof(buf), ";file_id=%u", (uint) file_id);
-  protocol->store(buf, length, &my_charset_bin);
+  uint length;
+  length= (uint) my_sprintf(buf, (buf, ";file_id=%u", (uint) file_id));
+  protocol->store(buf, (int32) length, &my_charset_bin);
 }
 
 
@@ -7445,7 +7448,7 @@ int Rows_log_event::do_add_row_data(uchar *row_data, size_t length)
     Don't print debug messages when running valgrind since they can
     trigger false warnings.
    */
-#ifndef HAVE_purify
+#ifndef HAVE_valgrind
   DBUG_DUMP("row_data", row_data, min(length, 32));
 #endif
 
@@ -7537,7 +7540,7 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli)
       we need to do any changes to that value after this function.
     */
     lex_start(thd);
-    mysql_reset_thd_for_next_command(thd);
+    mysql_reset_thd_for_next_command(thd, 0);
     /*
       The current statement is just about to begin and 
       has not yet modified anything. Note, all.modified is reset
@@ -8242,7 +8245,7 @@ Table_map_log_event::Table_map_log_event(const char *buf, uint event_len,
     Don't print debug messages when running valgrind since they can
     trigger false warnings.
    */
-#ifndef HAVE_purify
+#ifndef HAVE_valgrind
   DBUG_DUMP("event buffer", (uchar*) buf, event_len);
 #endif
 
@@ -8330,6 +8333,111 @@ Table_map_log_event::~Table_map_log_event()
   my_free(m_memory);
 }
 
+
+#ifdef MYSQL_CLIENT
+
+/*
+  Rewrite database name for the event to name specified by new_db
+  SYNOPSIS
+    new_db   Database name to change to
+    new_len  Length
+    desc     Event describing binlog that we're writing to.
+
+  DESCRIPTION
+    Reset db name. This function assumes that temp_buf member contains event
+    representation taken from a binary log. It resets m_dbnam and m_dblen and
+    rewrites temp_buf with new db name.
+
+  RETURN 
+    0     - Success
+    other - Error
+*/
+
+int Table_map_log_event::rewrite_db(const char* new_db, size_t new_len,
+                                    const Format_description_log_event* desc)
+{
+  DBUG_ENTER("Table_map_log_event::rewrite_db");
+  DBUG_ASSERT(temp_buf);
+
+  uint header_len= min(desc->common_header_len,
+                       LOG_EVENT_MINIMAL_HEADER_LEN) + TABLE_MAP_HEADER_LEN;
+  int len_diff;
+
+  if (!(len_diff= new_len - m_dblen))
+  {
+    memcpy((void*) (temp_buf + header_len + 1), new_db, m_dblen + 1);
+    memcpy((void*) m_dbnam, new_db, m_dblen + 1);
+    DBUG_RETURN(0);
+  }
+
+  // Create new temp_buf
+  ulong event_cur_len= uint4korr(temp_buf + EVENT_LEN_OFFSET);
+  ulong event_new_len= event_cur_len + len_diff;
+  char* new_temp_buf= (char*) my_malloc(event_new_len, MYF(MY_WME));
+
+  if (!new_temp_buf)
+  {
+    sql_print_error("Table_map_log_event::rewrite_db: "
+                    "failed to allocate new temp_buf (%d bytes required)",
+                    event_new_len);
+    DBUG_RETURN(-1);
+  }
+
+  // Rewrite temp_buf
+  char* ptr= new_temp_buf;
+  ulong cnt= 0;
+
+  // Copy header and change event length
+  memcpy(ptr, temp_buf, header_len);
+  int4store(ptr + EVENT_LEN_OFFSET, event_new_len);
+  ptr += header_len;
+  cnt += header_len;
+
+  // Write new db name length and new name
+  *ptr++ = new_len;
+  memcpy(ptr, new_db, new_len + 1);
+  ptr += new_len + 1;
+  cnt += m_dblen + 2;
+
+  // Copy rest part
+  memcpy(ptr, temp_buf + cnt, event_cur_len - cnt);
+
+  // Reregister temp buf
+  free_temp_buf();
+  register_temp_buf(new_temp_buf, TRUE);
+
+  // Reset m_dbnam and m_dblen members
+  m_dblen= new_len;
+
+  // m_dbnam resides in m_memory together with m_tblnam and m_coltype
+  uchar* memory= m_memory;
+  char const* tblnam= m_tblnam;
+  uchar* coltype= m_coltype;
+
+  m_memory= (uchar*) my_multi_malloc(MYF(MY_WME),
+                                     &m_dbnam, (uint) m_dblen + 1,
+                                     &m_tblnam, (uint) m_tbllen + 1,
+                                     &m_coltype, (uint) m_colcnt,
+                                     NullS);
+
+  if (!m_memory)
+  {
+    sql_print_error("Table_map_log_event::rewrite_db: "
+                    "failed to allocate new m_memory (%d + %d + %d bytes required)",
+                    m_dblen + 1, m_tbllen + 1, m_colcnt);
+    DBUG_RETURN(-1);
+  }
+
+  memcpy((void*)m_dbnam, new_db, m_dblen + 1);
+  memcpy((void*)m_tblnam, tblnam, m_tbllen + 1);
+  memcpy(m_coltype, coltype, m_colcnt);
+
+  my_free(memory, MYF(MY_WME));
+  DBUG_RETURN(0);
+}
+#endif /* MYSQL_CLIENT */
+
+
 /*
   Return value is an error code, one of:
 
@@ -8805,7 +8913,7 @@ Rows_log_event::write_row(const Relay_log_info *const rli,
     if (table->file->ha_table_flags() & HA_DUPLICATE_POS)
     {
       DBUG_PRINT("info",("Locating offending record using rnd_pos()"));
-      error= table->file->rnd_pos(table->record[1], table->file->dup_ref);
+      error= table->file->ha_rnd_pos(table->record[1], table->file->dup_ref);
       if (error)
       {
         DBUG_PRINT("info",("rnd_pos() returns error %d",error));
@@ -8837,10 +8945,10 @@ Rows_log_event::write_row(const Relay_log_info *const rli,
 
       key_copy((uchar*)key.get(), table->record[0], table->key_info + keynum,
                0);
-      error= table->file->index_read_idx_map(table->record[1], keynum,
-                                             (const uchar*)key.get(),
-                                             HA_WHOLE_KEY,
-                                             HA_READ_KEY_EXACT);
+      error= table->file->ha_index_read_idx_map(table->record[1], keynum,
+                                                (const uchar*)key.get(),
+                                                HA_WHOLE_KEY,
+                                                HA_READ_KEY_EXACT);
       if (error)
       {
         DBUG_PRINT("info",("index_read_idx() returns %s", HA_ERR(error)));
@@ -9062,10 +9170,10 @@ record_compare_exit:
 /**
   Locate the current row in event's table.
 
-  The current row is pointed by @c m_curr_row. Member @c m_width tells how many 
-  columns are there in the row (this can be differnet from the number of columns 
-  in the table). It is assumed that event's table is already open and pointed 
-  by @c m_table.
+  The current row is pointed by @c m_curr_row. Member @c m_width tells
+  how many columns are there in the row (this can be differnet from
+  the number of columns in the table). It is assumed that event's
+  table is already open and pointed by @c m_table.
 
   If a corresponding record is found in the table it is stored in 
   @c m_table->record[0]. Note that when record is located based on a primary 
@@ -9126,13 +9234,14 @@ int Rows_log_event::find_row(const Relay_log_info *rli)
       length. Something along these lines should work:
 
       ADD>>>  store_record(table,record[1]);
-              int error= table->file->rnd_pos(table->record[0], table->file->ref);
+              int error= table->file->ha_rnd_pos(table->record[0],
+              table->file->ref);
       ADD>>>  DBUG_ASSERT(memcmp(table->record[1], table->record[0],
                                  table->s->reclength) == 0);
 
     */
     DBUG_PRINT("info",("locating record using primary key (position)"));
-    int error= table->file->rnd_pos_by_record(table->record[0]);
+    int error= table->file->ha_rnd_pos_by_record(table->record[0]);
     if (error)
     {
       DBUG_PRINT("info",("rnd_pos returns error %d",error));
@@ -9178,7 +9287,7 @@ int Rows_log_event::find_row(const Relay_log_info *rli)
       Don't print debug messages when running valgrind since they can
       trigger false warnings.
      */
-#ifndef HAVE_purify
+#ifndef HAVE_valgrind
     DBUG_DUMP("key data", m_key, table->key_info->key_length);
 #endif
 
@@ -9192,9 +9301,9 @@ int Rows_log_event::find_row(const Relay_log_info *rli)
       table->record[0][table->s->null_bytes - 1]|=
         256U - (1U << table->s->last_null_bit_pos);
 
-    if ((error= table->file->index_read_map(table->record[0], m_key, 
-                                            HA_WHOLE_KEY,
-                                            HA_READ_KEY_EXACT)))
+    if ((error= table->file->ha_index_read_map(table->record[0], m_key, 
+                                               HA_WHOLE_KEY,
+                                               HA_READ_KEY_EXACT)))
     {
       DBUG_PRINT("info",("no record matching the key found in the table"));
       if (error == HA_ERR_RECORD_DELETED)
@@ -9208,7 +9317,7 @@ int Rows_log_event::find_row(const Relay_log_info *rli)
     Don't print debug messages when running valgrind since they can
     trigger false warnings.
    */
-#ifndef HAVE_purify
+#ifndef HAVE_valgrind
     DBUG_PRINT("info",("found first matching record")); 
     DBUG_DUMP("record[0]", table->record[0], table->s->reclength);
 #endif
@@ -9283,7 +9392,7 @@ int Rows_log_event::find_row(const Relay_log_info *rli)
           256U - (1U << table->s->last_null_bit_pos);
       }
 
-      while ((error= table->file->index_next(table->record[0])))
+      while ((error= table->file->ha_index_next(table->record[0])))
       {
         /* We just skip records that has already been deleted */
         if (error == HA_ERR_RECORD_DELETED)
@@ -9307,11 +9416,10 @@ int Rows_log_event::find_row(const Relay_log_info *rli)
     int restart_count= 0; // Number of times scanning has restarted from top
 
     /* We don't have a key: search the table using rnd_next() */
-    if ((error= table->file->ha_rnd_init(1)))
+    if ((error= table->file->ha_rnd_init_with_error(1)))
     {
       DBUG_PRINT("info",("error initializing table scan"
                          " (ha_rnd_init returns %d)",error));
-      table->file->print_error(error, MYF(0));
       goto err;
     }
 
@@ -9319,7 +9427,7 @@ int Rows_log_event::find_row(const Relay_log_info *rli)
     do
     {
   restart_rnd_next:
-      error= table->file->rnd_next(table->record[0]);
+      error= table->file->ha_rnd_next(table->record[0]);
 
       DBUG_PRINT("info", ("error: %s", HA_ERR(error)));
       switch (error) {
@@ -9336,7 +9444,14 @@ int Rows_log_event::find_row(const Relay_log_info *rli)
 
       case HA_ERR_END_OF_FILE:
         if (++restart_count < 2)
-          table->file->ha_rnd_init(1);
+        {
+          int error2;
+          if ((error2= table->file->ha_rnd_init_with_error(1)))
+          {
+            error= error2;
+            goto err;
+          }
+        }
         break;
 
       default:
@@ -9602,7 +9717,7 @@ Update_rows_log_event::do_exec_row(const Relay_log_info *const rli)
     Now we have the right row to update.  The old row (the one we're
     looking for) is in record[1] and the new row is in record[0].
   */
-#ifndef HAVE_purify
+#ifndef HAVE_valgrind
   /*
     Don't print debug messages when running valgrind since they can
     trigger false warnings.
@@ -9680,7 +9795,6 @@ Incident_log_event::description() const
   };
 
   DBUG_PRINT("info", ("m_incident: %d", m_incident));
-
   return description[m_incident];
 }
 
@@ -9776,7 +9890,6 @@ st_print_event_info::st_print_event_info()
 }
 #endif
 
-
 #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
 Heartbeat_log_event::Heartbeat_log_event(const char* buf, uint event_len,
                     const Format_description_log_event* description_event)
@@ -9788,3 +9901,28 @@ Heartbeat_log_event::Heartbeat_log_event(const char* buf, uint event_len,
   log_ident= buf + header_size;
 }
 #endif
+
+#if defined(MYSQL_SERVER)
+/*
+  Access to the current replication position.
+
+  There is a dummy replacement for this in the embedded library that returns
+  FALSE; this is used by XtraDB to allow it to access replication stuff while
+  still being able to use the same plugin in both stand-alone and embedded.
+*/
+bool rpl_get_position_info(const char **log_file_name, ulonglong *log_pos,
+                           const char **group_relay_log_name,
+                           ulonglong *relay_log_pos)
+{
+#if defined(EMBEDDED_LIBRARY) || !defined(HAVE_REPLICATION)
+  return FALSE;
+#else
+  const Relay_log_info *rli= &(active_mi->rli);
+  *log_file_name= rli->group_master_log_name;
+  *log_pos= rli->group_master_log_pos +
+    (rli->future_event_relay_log_pos - rli->group_relay_log_pos);
+  *group_relay_log_name= rli->group_relay_log_name;
+  *relay_log_pos= rli->future_event_relay_log_pos;
+  return TRUE;
+#endif
+}
diff --git a/sql/log_event.h b/sql/log_event.h
index 5d7250d8ebd..d900ddd36e6 100644
--- a/sql/log_event.h
+++ b/sql/log_event.h
@@ -404,7 +404,7 @@ struct sql_ex_info
 #define ELQ_DUP_HANDLING_OFFSET ELQ_FILE_ID_OFFSET + 12
 
 /* 4 bytes which all binlogs should begin with */
-#define BINLOG_MAGIC        "\xfe\x62\x69\x6e"
+#define BINLOG_MAGIC        (const uchar*) "\xfe\x62\x69\x6e"
 
 /*
   The 2 flags below were useless :
@@ -930,6 +930,13 @@ public:
      event's type, and its content is distributed in the event-specific fields.
   */
   char *temp_buf;
+  
+  /*
+    TRUE <=> this event 'owns' temp_buf and should call my_free() when done
+    with it
+  */
+  bool event_owns_temp_buf;
+
   /*
     Timestamp on the master(for debugging and replication of
     NOW()/TIMESTAMP).  It is important for queries and LOAD DATA
@@ -1086,12 +1093,17 @@ public:
   Log_event(const char* buf, const Format_description_log_event
             *description_event);
   virtual ~Log_event() { free_temp_buf();}
-  void register_temp_buf(char* buf) { temp_buf = buf; }
+  void register_temp_buf(char* buf, bool must_free) 
+  { 
+    temp_buf= buf; 
+    event_owns_temp_buf= must_free;
+  }
   void free_temp_buf()
   {
     if (temp_buf)
     {
-      my_free(temp_buf);
+      if (event_owns_temp_buf)
+        my_free(temp_buf);
       temp_buf = 0;
     }
   }
@@ -1426,7 +1438,7 @@ protected:
     MODE_PIPES_AS_CONCAT==0x2
     MODE_ANSI_QUOTES==0x4
     MODE_IGNORE_SPACE==0x8
-    MODE_NOT_USED==0x10
+    MODE_IGNORE_BAD_TABLE_OPTIONS==0x10
     MODE_ONLY_FULL_GROUP_BY==0x20
     MODE_NO_UNSIGNED_SUBTRACTION==0x40
     MODE_NO_DIR_IN_CREATE==0x80
@@ -3431,6 +3443,8 @@ public:
     return new table_def(m_coltype, m_colcnt, m_field_metadata,
                          m_field_metadata_size, m_null_bits, m_flags);
   }
+  int rewrite_db(const char* new_name, size_t new_name_len,
+                 const Format_description_log_event*);
 #endif
   ulong get_table_id() const        { return m_table_id; }
   const char *get_table_name() const { return m_tblnam; }
@@ -4085,6 +4099,10 @@ private:
 int append_query_string(CHARSET_INFO *csinfo,
                         String const *from, String *to);
 
+bool rpl_get_position_info(const char **log_file_name, ulonglong *log_pos,
+                           const char **group_relay_log_name,
+                           ulonglong *relay_log_pos);
+
 /**
   @} (end of group Replication)
 */
diff --git a/sql/log_event_old.cc b/sql/log_event_old.cc
index 88df4970816..c8a5bf01a74 100644
--- a/sql/log_event_old.cc
+++ b/sql/log_event_old.cc
@@ -73,7 +73,7 @@ Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, const Relay_log_info
       we need to do any changes to that value after this function.
     */
     lex_start(ev_thd);
-    mysql_reset_thd_for_next_command(ev_thd);
+    mysql_reset_thd_for_next_command(ev_thd, 0);
 
     /*
       This is a row injection, so we flag the "statement" as
@@ -545,7 +545,7 @@ replace_record(THD *thd, TABLE *table,
      */
     if (table->file->ha_table_flags() & HA_DUPLICATE_POS)
     {
-      error= table->file->rnd_pos(table->record[1], table->file->dup_ref);
+      error= table->file->ha_rnd_pos(table->record[1], table->file->dup_ref);
       if (error)
       {
         DBUG_PRINT("info",("rnd_pos() returns error %d",error));
@@ -571,10 +571,10 @@ replace_record(THD *thd, TABLE *table,
 
       key_copy((uchar*)key.get(), table->record[0], table->key_info + keynum,
                0);
-      error= table->file->index_read_idx_map(table->record[1], keynum,
-                                             (const uchar*)key.get(),
-                                             HA_WHOLE_KEY,
-                                             HA_READ_KEY_EXACT);
+      error= table->file->ha_index_read_idx_map(table->record[1], keynum,
+                                                (const uchar*)key.get(),
+                                                HA_WHOLE_KEY,
+                                                HA_READ_KEY_EXACT);
       if (error)
       {
         DBUG_PRINT("info", ("index_read_idx() returns error %d", error));
@@ -686,13 +686,13 @@ static int find_and_fetch_row(TABLE *table, uchar *key)
       length. Something along these lines should work:
 
       ADD>>>  store_record(table,record[1]);
-              int error= table->file->rnd_pos(table->record[0], table->file->ref);
+              int error= table->file->ha_rnd_pos(table->record[0], table->file->ref);
       ADD>>>  DBUG_ASSERT(memcmp(table->record[1], table->record[0],
                                  table->s->reclength) == 0);
 
     */
     table->file->position(table->record[0]);
-    int error= table->file->rnd_pos(table->record[0], table->file->ref);
+    int error= table->file->ha_rnd_pos(table->record[0], table->file->ref);
     /*
       rnd_pos() returns the record in table->record[0], so we have to
       move it to table->record[1].
@@ -716,7 +716,7 @@ static int find_and_fetch_row(TABLE *table, uchar *key)
     Don't print debug messages when running valgrind since they can
     trigger false warnings.
    */
-#ifndef HAVE_purify
+#ifndef HAVE_valgrind
     DBUG_DUMP("table->record[0]", table->record[0], table->s->reclength);
     DBUG_DUMP("table->record[1]", table->record[1], table->s->reclength);
 #endif
@@ -730,8 +730,9 @@ static int find_and_fetch_row(TABLE *table, uchar *key)
     my_ptrdiff_t const pos=
       table->s->null_bytes > 0 ? table->s->null_bytes - 1 : 0;
     table->record[1][pos]= 0xFF;
-    if ((error= table->file->index_read_map(table->record[1], key, HA_WHOLE_KEY,
-                                            HA_READ_KEY_EXACT)))
+    if ((error= table->file->ha_index_read_map(table->record[1], key,
+                                               HA_WHOLE_KEY,
+                                               HA_READ_KEY_EXACT)))
     {
       table->file->print_error(error, MYF(0));
       table->file->ha_index_end();
@@ -742,7 +743,7 @@ static int find_and_fetch_row(TABLE *table, uchar *key)
     Don't print debug messages when running valgrind since they can
     trigger false warnings.
    */
-#ifndef HAVE_purify
+#ifndef HAVE_valgrind
     DBUG_DUMP("table->record[0]", table->record[0], table->s->reclength);
     DBUG_DUMP("table->record[1]", table->record[1], table->s->reclength);
 #endif
@@ -785,7 +786,7 @@ static int find_and_fetch_row(TABLE *table, uchar *key)
           256U - (1U << table->s->last_null_bit_pos);
       }
 
-      while ((error= table->file->index_next(table->record[1])))
+      while ((error= table->file->ha_index_next(table->record[1])))
       {
         /* We just skip records that has already been deleted */
         if (error == HA_ERR_RECORD_DELETED)
@@ -807,14 +808,14 @@ static int find_and_fetch_row(TABLE *table, uchar *key)
     int error;
 
     /* We don't have a key: search the table using rnd_next() */
-    if ((error= table->file->ha_rnd_init(1)))
+    if ((error= table->file->ha_rnd_init_with_error(1)))
       return error;
 
     /* Continue until we find the right record or have made a full loop */
     do
     {
   restart_rnd_next:
-      error= table->file->rnd_next(table->record[1]);
+      error= table->file->ha_rnd_next(table->record[1]);
 
       DBUG_DUMP("record[0]", table->record[0], table->s->reclength);
       DBUG_DUMP("record[1]", table->record[1], table->s->reclength);
@@ -831,15 +832,19 @@ static int find_and_fetch_row(TABLE *table, uchar *key)
         goto restart_rnd_next;
 
       case HA_ERR_END_OF_FILE:
-  if (++restart_count < 2)
-    table->file->ha_rnd_init(1);
-  break;
+        if (++restart_count < 2)
+        {
+          int error2;
+          if ((error2= table->file->ha_rnd_init_with_error(1)))
+            DBUG_RETURN(error2);
+        }
+        break;
 
       default:
-  table->file->print_error(error, MYF(0));
+        table->file->print_error(error, MYF(0));
         DBUG_PRINT("info", ("Record not found"));
         table->file->ha_rnd_end();
-  DBUG_RETURN(error);
+        DBUG_RETURN(error);
       }
     }
     while (restart_count < 2 && record_compare(table));
@@ -1396,7 +1401,7 @@ int Old_rows_log_event::do_add_row_data(uchar *row_data, size_t length)
     Don't print debug messages when running valgrind since they can
     trigger false warnings.
    */
-#ifndef HAVE_purify
+#ifndef HAVE_valgrind
   DBUG_DUMP("row_data", row_data, min(length, 32));
 #endif
 
@@ -2051,7 +2056,7 @@ Old_rows_log_event::write_row(const Relay_log_info *const rli,
     if (table->file->ha_table_flags() & HA_DUPLICATE_POS)
     {
       DBUG_PRINT("info",("Locating offending record using rnd_pos()"));
-      error= table->file->rnd_pos(table->record[1], table->file->dup_ref);
+      error= table->file->ha_rnd_pos(table->record[1], table->file->dup_ref);
       if (error)
       {
         DBUG_PRINT("info",("rnd_pos() returns error %d",error));
@@ -2083,10 +2088,10 @@ Old_rows_log_event::write_row(const Relay_log_info *const rli,
 
       key_copy((uchar*)key.get(), table->record[0], table->key_info + keynum,
                0);
-      error= table->file->index_read_idx_map(table->record[1], keynum,
-                                             (const uchar*)key.get(),
-                                             HA_WHOLE_KEY,
-                                             HA_READ_KEY_EXACT);
+      error= table->file->ha_index_read_idx_map(table->record[1], keynum,
+                                                (const uchar*)key.get(),
+                                                HA_WHOLE_KEY,
+                                                HA_READ_KEY_EXACT);
       if (error)
       {
         DBUG_PRINT("info",("index_read_idx() returns error %d", error));
@@ -2237,13 +2242,13 @@ int Old_rows_log_event::find_row(const Relay_log_info *rli)
       length. Something along these lines should work:
 
       ADD>>>  store_record(table,record[1]);
-              int error= table->file->rnd_pos(table->record[0], table->file->ref);
+              int error= table->file->ha_rnd_pos(table->record[0], table->file->ref);
       ADD>>>  DBUG_ASSERT(memcmp(table->record[1], table->record[0],
                                  table->s->reclength) == 0);
 
     */
     DBUG_PRINT("info",("locating record using primary key (position)"));
-    int error= table->file->rnd_pos_by_record(table->record[0]);
+    int error= table->file->ha_rnd_pos_by_record(table->record[0]);
     if (error)
     {
       DBUG_PRINT("info",("rnd_pos returns error %d",error));
@@ -2289,7 +2294,7 @@ int Old_rows_log_event::find_row(const Relay_log_info *rli)
       Don't print debug messages when running valgrind since they can
       trigger false warnings.
      */
-#ifndef HAVE_purify
+#ifndef HAVE_valgrind
     DBUG_DUMP("key data", m_key, table->key_info->key_length);
 #endif
 
@@ -2303,9 +2308,9 @@ int Old_rows_log_event::find_row(const Relay_log_info *rli)
       table->s->null_bytes > 0 ? table->s->null_bytes - 1 : 0;
     table->record[0][pos]= 0xFF;
     
-    if ((error= table->file->index_read_map(table->record[0], m_key, 
-                                            HA_WHOLE_KEY,
-                                            HA_READ_KEY_EXACT)))
+    if ((error= table->file->ha_index_read_map(table->record[0], m_key, 
+                                               HA_WHOLE_KEY,
+                                               HA_READ_KEY_EXACT)))
     {
       DBUG_PRINT("info",("no record matching the key found in the table"));
       if (error == HA_ERR_RECORD_DELETED)
@@ -2319,7 +2324,7 @@ int Old_rows_log_event::find_row(const Relay_log_info *rli)
     Don't print debug messages when running valgrind since they can
     trigger false warnings.
    */
-#ifndef HAVE_purify
+#ifndef HAVE_valgrind
     DBUG_PRINT("info",("found first matching record")); 
     DBUG_DUMP("record[0]", table->record[0], table->s->reclength);
 #endif
@@ -2394,7 +2399,7 @@ int Old_rows_log_event::find_row(const Relay_log_info *rli)
           256U - (1U << table->s->last_null_bit_pos);
       }
 
-      while ((error= table->file->index_next(table->record[0])))
+      while ((error= table->file->ha_index_next(table->record[0])))
       {
         /* We just skip records that has already been deleted */
         if (error == HA_ERR_RECORD_DELETED)
@@ -2418,11 +2423,10 @@ int Old_rows_log_event::find_row(const Relay_log_info *rli)
     int restart_count= 0; // Number of times scanning has restarted from top
 
     /* We don't have a key: search the table using rnd_next() */
-    if ((error= table->file->ha_rnd_init(1)))
+    if ((error= table->file->ha_rnd_init_with_error(1)))
     {
       DBUG_PRINT("info",("error initializing table scan"
                          " (ha_rnd_init returns %d)",error));
-      table->file->print_error(error, MYF(0));
       DBUG_RETURN(error);
     }
 
@@ -2430,7 +2434,7 @@ int Old_rows_log_event::find_row(const Relay_log_info *rli)
     do
     {
   restart_rnd_next:
-      error= table->file->rnd_next(table->record[0]);
+      error= table->file->ha_rnd_next(table->record[0]);
 
       switch (error) {
 
@@ -2442,7 +2446,11 @@ int Old_rows_log_event::find_row(const Relay_log_info *rli)
 
       case HA_ERR_END_OF_FILE:
         if (++restart_count < 2)
-          table->file->ha_rnd_init(1);
+        {
+          int error2;
+          if ((error2= table->file->ha_rnd_init_with_error(1)))
+            DBUG_RETURN(error2);
+        }
         break;
 
       default:
@@ -2841,7 +2849,7 @@ Update_rows_log_event_old::do_exec_row(const Relay_log_info *const rli)
     Now we have the right row to update.  The old row (the one we're
     looking for) is in record[1] and the new row is in record[0].
   */
-#ifndef HAVE_purify
+#ifndef HAVE_valgrind
   /*
     Don't print debug messages when running valgrind since they can
     trigger false warnings.
diff --git a/sql/log_slow.h b/sql/log_slow.h
new file mode 100644
index 00000000000..5559c002fde
--- /dev/null
+++ b/sql/log_slow.h
@@ -0,0 +1,107 @@
+/* Copyright (C) 2009 Monty Program Ab
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 or later of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+/* Defining what to log to slow log */
+
+#define LOG_SLOW_VERBOSITY_INIT           0
+#define LOG_SLOW_VERBOSITY_INNODB         1 << 0
+#define LOG_SLOW_VERBOSITY_QUERY_PLAN     1 << 1
+
+#ifdef DEFINE_VARIABLES_LOG_SLOW
+
+/* Names here must be in same order as the bit's above */
+static const char *log_slow_verbosity_names[]=
+{
+  "innodb","query_plan",
+  NullS
+};
+
+static const unsigned int log_slow_verbosity_names_len[]=
+{
+  sizeof("innodb") -1,
+  sizeof("query_plan")-1
+};
+
+TYPELIB log_slow_verbosity_typelib=
+{  array_elements(log_slow_verbosity_names)-1,"", log_slow_verbosity_names,
+   (unsigned int *) log_slow_verbosity_names_len };
+
+#else
+extern TYPELIB log_slow_verbosity_typelib;
+#endif /* DEFINE_VARIABLES_LOG_SLOW */
+
+/* Defines for what kind of query plan was used and what to log */
+
+/*
+  We init the used query plan with a bit that is alwyas set and all 'no' bits
+  to enable easy testing of what to log in sql_log.cc
+*/
+#define QPLAN_INIT            (QPLAN_ALWAYS_SET | QPLAN_QC_NO)
+
+#define QPLAN_ADMIN           1 << 0
+#define QPLAN_FILESORT        1 << 1
+#define QPLAN_FILESORT_DISK   1 << 2
+#define QPLAN_FULL_JOIN       1 << 3
+#define QPLAN_FULL_SCAN       1 << 4
+#define QPLAN_QC              1 << 5
+#define QPLAN_QC_NO           1 << 6
+#define QPLAN_TMP_DISK        1 << 7
+#define QPLAN_TMP_TABLE       1 << 8
+/* ... */
+#define QPLAN_MAX             ((ulong) 1) << 31 /* reserved as placeholder */
+#define QPLAN_ALWAYS_SET      QPLAN_MAX
+#define QPLAN_VISIBLE_MASK    (~(QPLAN_ALWAYS_SET))
+
+#ifdef DEFINE_VARIABLES_LOG_SLOW
+/* Names here must be in same order as the bit's above */
+static const char *log_slow_filter_names[]= 
+{
+  "admin",
+  "filesort",
+  "filesort_on_disk",
+  "full_join",
+  "full_scan",
+  "query_cache",
+  "query_cache_miss",
+  "tmp_table",
+  "tmp_table_on_disk",
+  NullS
+};
+
+static const unsigned int log_slow_filter_names_len[]=
+{
+  sizeof("admin")-1,
+  sizeof("filesort")-1,
+  sizeof("filesort_on_disk")-1,
+  sizeof("full_join")-1,
+  sizeof("full_scan")-1,
+  sizeof("query_cache")-1,
+  sizeof("query_cache_miss")-1,
+  sizeof("tmp_table")-1,
+  sizeof("tmp_table_on_disk")-1
+};
+
+TYPELIB log_slow_filter_typelib=
+{  array_elements(log_slow_filter_names)-1,"", log_slow_filter_names,
+   (unsigned int *) log_slow_filter_names_len };
+
+#else
+extern TYPELIB log_slow_filter_typelib;
+#endif /* DEFINE_VARIABLES_LOG_SLOW */
+
+static inline ulong fix_log_slow_filter(ulong org_filter)
+{
+  return org_filter ? org_filter : QPLAN_ALWAYS_SET;
+}
diff --git a/sql/mf_iocache.cc b/sql/mf_iocache.cc
index d20d93ff0b9..af0cb660713 100644
--- a/sql/mf_iocache.cc
+++ b/sql/mf_iocache.cc
@@ -86,6 +86,12 @@ int _my_b_net_read(register IO_CACHE *info, uchar *Buffer,
 }
 
 } /* extern "C" */
+
+#elif defined(__WIN__)
+
+// Remove linker warning 4221 about empty file
+namespace { char dummy; };
+
 #endif /* HAVE_REPLICATION */
 
 
diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc
new file mode 100644
index 00000000000..53699c47688
--- /dev/null
+++ b/sql/multi_range_read.cc
@@ -0,0 +1,944 @@
+#include "mysql_priv.h"
+#include "sql_select.h"
+
+/****************************************************************************
+ * Default MRR implementation (MRR to non-MRR converter)
+ ***************************************************************************/
+
+/**
+  Get cost and other information about MRR scan over a known list of ranges
+
+  Calculate estimated cost and other information about an MRR scan for given
+  sequence of ranges.
+
+  @param keyno           Index number
+  @param seq             Range sequence to be traversed
+  @param seq_init_param  First parameter for seq->init()
+  @param n_ranges_arg    Number of ranges in the sequence, or 0 if the caller
+                         can't efficiently determine it
+  @param bufsz    INOUT  IN:  Size of the buffer available for use
+                         OUT: Size of the buffer that is expected to be actually
+                              used, or 0 if buffer is not needed.
+  @param flags    INOUT  A combination of HA_MRR_* flags
+  @param cost     OUT    Estimated cost of MRR access
+
+  @note
+    This method (or an overriding one in a derived class) must check for
+    thd->killed and return HA_POS_ERROR if it is not zero. This is required
+    for a user to be able to interrupt the calculation by killing the
+    connection/query.
+
+  @retval
+    HA_POS_ERROR  Error or the engine is unable to perform the requested
+                  scan. Values of OUT parameters are undefined.
+  @retval
+    other         OK, *cost contains cost of the scan, *bufsz and *flags
+                  contain scan parameters.
+*/
+
+ha_rows 
+handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+                                     void *seq_init_param, uint n_ranges_arg,
+                                     uint *bufsz, uint *flags, COST_VECT *cost)
+{
+  KEY_MULTI_RANGE range;
+  range_seq_t seq_it;
+  ha_rows rows, total_rows= 0;
+  uint n_ranges=0;
+  THD *thd= current_thd;
+  
+  /* Default MRR implementation doesn't need buffer */
+  *bufsz= 0;
+
+  seq_it= seq->init(seq_init_param, n_ranges, *flags);
+  while (!seq->next(seq_it, &range))
+  {
+    if (unlikely(thd->killed != 0))
+      return HA_POS_ERROR;
+    
+    n_ranges++;
+    key_range *min_endp, *max_endp;
+    if (range.range_flag & GEOM_FLAG)
+    {
+      /* In this case tmp_min_flag contains the handler-read-function */
+      range.start_key.flag= (ha_rkey_function) (range.range_flag ^ GEOM_FLAG);
+      min_endp= &range.start_key;
+      max_endp= NULL;
+    }
+    else
+    {
+      min_endp= range.start_key.length? &range.start_key : NULL;
+      max_endp= range.end_key.length? &range.end_key : NULL;
+    }
+    if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE))
+      rows= 1; /* there can be at most one row */
+    else
+    {
+      if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp, 
+                                                        max_endp)))
+      {
+        /* Can't scan one range => can't do MRR scan at all */
+        total_rows= HA_POS_ERROR;
+        break;
+      }
+    }
+    total_rows += rows;
+  }
+  
+  if (total_rows != HA_POS_ERROR)
+  {
+    /* The following calculation is the same as in multi_range_read_info(): */
+    *flags |= HA_MRR_USE_DEFAULT_IMPL;
+    cost->zero();
+    cost->avg_io_cost= 1; /* assume random seeks */
+    if ((*flags & HA_MRR_INDEX_ONLY) && total_rows > 2)
+      cost->io_count= keyread_time(keyno, n_ranges, (uint)total_rows);
+    else
+      cost->io_count= read_time(keyno, n_ranges, total_rows);
+    cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01;
+  }
+  return total_rows;
+}
+
+
+/**
+  Get cost and other information about MRR scan over some sequence of ranges
+
+  Calculate estimated cost and other information about an MRR scan for some
+  sequence of ranges.
+
+  The ranges themselves will be known only at execution phase. When this
+  function is called we only know number of ranges and a (rough) E(#records)
+  within those ranges.
+
+  Currently this function is only called for "n-keypart singlepoint" ranges,
+  i.e. each range is "keypart1=someconst1 AND ... AND keypartN=someconstN"
+
+  The flags parameter is a combination of those flags: HA_MRR_SORTED,
+  HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION, HA_MRR_LIMITS.
+
+  @param keyno           Index number
+  @param n_ranges        Estimated number of ranges (i.e. intervals) in the
+                         range sequence.
+  @param n_rows          Estimated total number of records contained within all
+                         of the ranges
+  @param bufsz    INOUT  IN:  Size of the buffer available for use
+                         OUT: Size of the buffer that will be actually used, or
+                              0 if buffer is not needed.
+  @param flags    INOUT  A combination of HA_MRR_* flags
+  @param cost     OUT    Estimated cost of MRR access
+
+  @retval
+    0     OK, *cost contains cost of the scan, *bufsz and *flags contain scan
+          parameters.
+  @retval
+    other Error or can't perform the requested scan
+*/
+
+ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
+                                       uint *bufsz, uint *flags, COST_VECT *cost)
+{
+  *bufsz= 0; /* Default implementation doesn't need a buffer */
+
+  *flags |= HA_MRR_USE_DEFAULT_IMPL;
+
+  cost->zero();
+  cost->avg_io_cost= 1; /* assume random seeks */
+
+  /* Produce the same cost as non-MRR code does */
+  if (*flags & HA_MRR_INDEX_ONLY)
+    cost->io_count= keyread_time(keyno, n_ranges, n_rows);
+  else
+    cost->io_count= read_time(keyno, n_ranges, n_rows);
+  return 0;
+}
+
+
+/**
+  Initialize the MRR scan
+
+  Initialize the MRR scan. This function may do heavyweight scan 
+  initialization like row prefetching/sorting/etc (NOTE: but better not do
+  it here as we may not need it, e.g. if we never satisfy WHERE clause on
+  previous tables. For many implementations it would be natural to do such
+  initializations in the first multi_read_range_next() call)
+
+  mode is a combination of the following flags: HA_MRR_SORTED,
+  HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION 
+
+  @param seq             Range sequence to be traversed
+  @param seq_init_param  First parameter for seq->init()
+  @param n_ranges        Number of ranges in the sequence
+  @param mode            Flags, see the description section for the details
+  @param buf             INOUT: memory buffer to be used
+
+  @note
+    One must have called index_init() before calling this function. Several
+    multi_range_read_init() calls may be made in course of one query.
+
+    Until WL#2623 is done (see its text, section 3.2), the following will 
+    also hold:
+    The caller will guarantee that if "seq->init == mrr_ranges_array_init"
+    then seq_init_param is an array of n_ranges KEY_MULTI_RANGE structures.
+    This property will only be used by NDB handler until WL#2623 is done.
+     
+    Buffer memory management is done according to the following scenario:
+    The caller allocates the buffer and provides it to the callee by filling
+    the members of HANDLER_BUFFER structure.
+    The callee consumes all or some fraction of the provided buffer space, and
+    sets the HANDLER_BUFFER members accordingly.
+    The callee may use the buffer memory until the next multi_range_read_init()
+    call is made, all records have been read, or until index_end() call is
+    made, whichever comes first.
+
+  @retval 0  OK
+  @retval 1  Error
+*/
+
+int
+handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
+                               uint n_ranges, uint mode, HANDLER_BUFFER *buf)
+{
+  DBUG_ENTER("handler::multi_range_read_init");
+  mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
+  mrr_funcs= *seq_funcs;
+  mrr_is_output_sorted= test(mode & HA_MRR_SORTED);
+  mrr_have_range= FALSE;
+  DBUG_RETURN(0);
+}
+
+
+/**
+  Get next record in MRR scan
+
+  Default MRR implementation: read the next record
+
+  @param range_info  OUT  Undefined if HA_MRR_NO_ASSOCIATION flag is in effect
+                          Otherwise, the opaque value associated with the range
+                          that contains the returned record.
+
+  @retval 0      OK
+  @retval other  Error code
+*/
+
+int handler::multi_range_read_next(char **range_info)
+{
+  int UNINIT_VAR(result);
+  int range_res;
+  DBUG_ENTER("handler::multi_range_read_next");
+
+  if (!mrr_have_range)
+  {
+    mrr_have_range= TRUE;
+    goto start;
+  }
+
+  do
+  {
+    /* Save a call if there can be only one row in range. */
+    if (mrr_cur_range.range_flag != (UNIQUE_RANGE | EQ_RANGE))
+    {
+      result= read_range_next();
+      /* On success or non-EOF errors jump to the end. */
+      if (result != HA_ERR_END_OF_FILE)
+        break;
+    }
+    else
+    {
+      if (was_semi_consistent_read())
+        goto scan_it_again;
+      /*
+        We need to set this for the last range only, but checking this
+        condition is more expensive than just setting the result code.
+      */
+      result= HA_ERR_END_OF_FILE;
+    }
+
+start:
+    /* Try the next range(s) until one matches a record. */
+    while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range)))
+    {
+scan_it_again:
+      result= read_range_first(mrr_cur_range.start_key.keypart_map ?
+                                 &mrr_cur_range.start_key : 0,
+                               mrr_cur_range.end_key.keypart_map ?
+                                 &mrr_cur_range.end_key : 0,
+                               test(mrr_cur_range.range_flag & EQ_RANGE),
+                               mrr_is_output_sorted);
+      if (result != HA_ERR_END_OF_FILE)
+        break;
+    }
+  }
+  while ((result == HA_ERR_END_OF_FILE) && !range_res);
+
+  *range_info= mrr_cur_range.ptr;
+  DBUG_PRINT("exit",("handler::multi_range_read_next result %d", result));
+  DBUG_RETURN(result);
+}
+
+/****************************************************************************
+ * DS-MRR implementation 
+ ***************************************************************************/
+
+/**
+  DS-MRR: Initialize and start MRR scan
+
+  Initialize and start the MRR scan. Depending on the mode parameter, this
+  may use default or DS-MRR implementation.
+
+  @param h               Table handler to be used
+  @param key             Index to be used
+  @param seq_funcs       Interval sequence enumeration functions
+  @param seq_init_param  Interval sequence enumeration parameter
+  @param n_ranges        Number of ranges in the sequence.
+  @param mode            HA_MRR_* modes to use
+  @param buf             INOUT Buffer to use
+
+  @retval 0     Ok, Scan started.
+  @retval other Error
+*/
+
+int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, 
+                           void *seq_init_param, uint n_ranges, uint mode,
+                           HANDLER_BUFFER *buf)
+{
+  uint elem_size;
+  Item *pushed_cond= NULL;
+  handler *new_h2= 0;
+  DBUG_ENTER("DsMrr_impl::dsmrr_init");
+
+  /*
+    index_merge may invoke a scan on an object for which dsmrr_info[_const]
+    has not been called, so set the owner handler here as well.
+  */
+  h= h_arg;
+  if (mode & HA_MRR_USE_DEFAULT_IMPL || mode & HA_MRR_SORTED)
+  {
+    use_default_impl= TRUE;
+    const int retval=
+      h->handler::multi_range_read_init(seq_funcs, seq_init_param,
+                                        n_ranges, mode, buf);
+    DBUG_RETURN(retval);
+  }
+  rowids_buf= buf->buffer;
+
+  is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION);
+
+  if (is_mrr_assoc)
+    status_var_increment(table->in_use->status_var.ha_multi_range_read_init_count);
+ 
+  rowids_buf_end= buf->buffer_end;
+  elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
+  rowids_buf_last= rowids_buf + 
+                      ((rowids_buf_end - rowids_buf)/ elem_size)*
+                      elem_size;
+  rowids_buf_end= rowids_buf_last;
+
+    /*
+    There can be two cases:
+    - This is the first call since index_init(), h2==NULL
+       Need to setup h2 then.
+    - This is not the first call, h2 is initalized and set up appropriately.
+       The caller might have called h->index_init(), need to switch h to
+       rnd_pos calls.
+  */
+  if (!h2)
+  {
+    /* Create a separate handler object to do rndpos() calls. */
+    THD *thd= current_thd;
+    /*
+      ::clone() takes up a lot of stack, especially on 64 bit platforms.
+      The constant 5 is an empiric result.
+    */
+    if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2))
+      DBUG_RETURN(1);
+    DBUG_ASSERT(h->active_index != MAX_KEY);
+    uint mrr_keyno= h->active_index;
+
+    /* Create a separate handler object to do rndpos() calls. */
+    if (!(new_h2= h->clone(thd->mem_root)) || 
+        new_h2->ha_external_lock(thd, F_RDLCK))
+    {
+      delete new_h2;
+      DBUG_RETURN(1);
+    }
+
+    if (mrr_keyno == h->pushed_idx_cond_keyno)
+      pushed_cond= h->pushed_idx_cond;
+
+    /*
+      Caution: this call will invoke this->dsmrr_close(). Do not put the
+      created secondary table handler into this->h2 or it will delete it.
+    */
+    if (h->ha_index_end())
+    {
+      h2=new_h2;
+      goto error;
+    }
+
+    h2= new_h2; /* Ok, now can put it into h2 */
+    table->prepare_for_position();
+    h2->extra(HA_EXTRA_KEYREAD);
+  
+    if (h2->ha_index_init(mrr_keyno, FALSE))
+      goto error;
+
+    use_default_impl= FALSE;
+    if (pushed_cond)
+      h2->idx_cond_push(mrr_keyno, pushed_cond);
+  }
+  else
+  {
+    /* 
+      We get here when the access alternates betwen MRR scan(s) and non-MRR
+      scans.
+
+      Calling h->index_end() will invoke dsmrr_close() for this object,
+      which will delete h2. We need to keep it, so save put it away and dont
+      let it be deleted:
+    */
+    handler *save_h2= h2;
+    h2= NULL;
+    int res= (h->inited == handler::INDEX && h->ha_index_end());
+    h2= save_h2;
+    use_default_impl= FALSE;
+    if (res)
+      goto error;
+  }
+
+  if (h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges,
+                                          mode, buf) || 
+      dsmrr_fill_buffer())
+  {
+    goto error;
+  }
+  /*
+    If the above call has scanned through all intervals in *seq, then
+    adjust *buf to indicate that the remaining buffer space will not be used.
+  */
+  if (dsmrr_eof) 
+    buf->end_of_used_area= rowids_buf_last;
+
+  /*
+     h->inited == INDEX may occur when 'range checked for each record' is
+     used.
+  */
+  if ((h->inited != handler::RND) && 
+      ((h->inited==handler::INDEX? h->ha_index_end(): FALSE) || 
+       (h->ha_rnd_init(FALSE))))
+      goto error;
+
+  use_default_impl= FALSE;
+  h->mrr_funcs= *seq_funcs;
+  
+  DBUG_RETURN(0);
+error:
+  h2->ha_index_or_rnd_end();
+  h2->ha_external_lock(current_thd, F_UNLCK);
+  h2->close();
+  delete h2;
+  h2= NULL;
+  DBUG_RETURN(1);
+}
+
+
+void DsMrr_impl::dsmrr_close()
+{
+  DBUG_ENTER("DsMrr_impl::dsmrr_close");
+  if (h2)
+  {
+    h2->ha_index_or_rnd_end();
+    h2->ha_external_lock(current_thd, F_UNLCK);
+    h2->close();
+    delete h2;
+    h2= NULL;
+  }
+  use_default_impl= TRUE;
+  DBUG_VOID_RETURN;
+}
+
+
+static int rowid_cmp(void *h, uchar *a, uchar *b)
+{
+  return ((handler*)h)->cmp_ref(a, b);
+}
+
+
+/**
+  DS-MRR: Fill the buffer with rowids and sort it by rowid
+
+  {This is an internal function of DiskSweep MRR implementation}
+  Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into 
+  buffer. When the buffer is full or scan is completed, sort the buffer by 
+  rowid and return.
+  
+  The function assumes that rowids buffer is empty when it is invoked. 
+  
+  @param h  Table handler
+
+  @retval 0      OK, the next portion of rowids is in the buffer,
+                 properly ordered
+  @retval other  Error
+*/
+
+int DsMrr_impl::dsmrr_fill_buffer()
+{
+  char *range_info;
+  int res;
+  DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer");
+
+  rowids_buf_cur= rowids_buf;
+  while ((rowids_buf_cur < rowids_buf_end) && 
+         !(res= h2->handler::multi_range_read_next(&range_info)))
+  {
+    KEY_MULTI_RANGE *curr_range= &h2->handler::mrr_cur_range;
+    if (h2->mrr_funcs.skip_index_tuple &&
+        h2->mrr_funcs.skip_index_tuple(h2->mrr_iter, curr_range->ptr))
+      continue;
+    
+    /* Put rowid, or {rowid, range_id} pair into the buffer */
+    h2->position(table->record[0]);
+    memcpy(rowids_buf_cur, h2->ref, h2->ref_length);
+    rowids_buf_cur += h2->ref_length;
+
+    if (is_mrr_assoc)
+    {
+      memcpy(rowids_buf_cur, &range_info, sizeof(void*));
+      rowids_buf_cur += sizeof(void*);
+    }
+  }
+
+  if (res && res != HA_ERR_END_OF_FILE)
+    DBUG_RETURN(res); 
+  dsmrr_eof= test(res == HA_ERR_END_OF_FILE);
+
+  /* Sort the buffer contents by rowid */
+  uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
+  uint n_rowids= (rowids_buf_cur - rowids_buf) / elem_size;
+  
+  my_qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp,
+            (void*)h);
+  rowids_buf_last= rowids_buf_cur;
+  rowids_buf_cur=  rowids_buf;
+  DBUG_RETURN(0);
+}
+
+
+/**
+  DS-MRR implementation: multi_range_read_next() function
+*/
+
+int DsMrr_impl::dsmrr_next(char **range_info)
+{
+  int res;
+  uchar *cur_range_info= 0;
+  uchar *rowid;
+
+  if (use_default_impl)
+    return h->handler::multi_range_read_next(range_info);
+  
+  do
+  {
+    if (rowids_buf_cur == rowids_buf_last)
+    {
+      if (dsmrr_eof)
+      {
+        res= HA_ERR_END_OF_FILE;
+        goto end;
+      }
+      res= dsmrr_fill_buffer();
+      if (res)
+        goto end;
+    }
+   
+    /* return eof if there are no rowids in the buffer after re-fill attempt */
+    if (rowids_buf_cur == rowids_buf_last)
+    {
+      res= HA_ERR_END_OF_FILE;
+      goto end;
+    }
+    rowid= rowids_buf_cur;
+
+    if (is_mrr_assoc)
+      memcpy(&cur_range_info, rowids_buf_cur + h->ref_length, sizeof(uchar**));
+
+    rowids_buf_cur += h->ref_length + sizeof(void*) * test(is_mrr_assoc);
+    if (h2->mrr_funcs.skip_record &&
+	h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid))
+      continue;
+    res= h->ha_rnd_pos(table->record[0], rowid);
+    break;
+  } while (true);
+ 
+  if (is_mrr_assoc)
+  {
+    memcpy(range_info, rowid + h->ref_length, sizeof(void*));
+  }
+end:
+  return res;
+}
+
+
+/**
+  DS-MRR implementation: multi_range_read_info() function
+*/
+ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows,
+                               uint *bufsz, uint *flags, COST_VECT *cost)
+{  
+  ha_rows res;
+  uint def_flags= *flags;
+  uint def_bufsz= *bufsz;
+
+  /* Get cost/flags/mem_usage of default MRR implementation */
+  res= h->handler::multi_range_read_info(keyno, n_ranges, rows, &def_bufsz,
+                                         &def_flags, cost);
+  DBUG_ASSERT(!res);
+
+  if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || 
+      choose_mrr_impl(keyno, rows, &def_flags, &def_bufsz, cost))
+  {
+    /* Default implementation is choosen */
+    DBUG_PRINT("info", ("Default MRR implementation choosen"));
+    *flags= def_flags;
+    *bufsz= def_bufsz;
+  }
+  else
+  {
+    /* *flags and *bufsz were set by choose_mrr_impl */
+    DBUG_PRINT("info", ("DS-MRR implementation choosen"));
+  }
+  return 0;
+}
+
+
+/**
+  DS-MRR Implementation: multi_range_read_info_const() function
+*/
+
+ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
+                                 void *seq_init_param, uint n_ranges, 
+                                 uint *bufsz, uint *flags, COST_VECT *cost)
+{
+  ha_rows rows;
+  uint def_flags= *flags;
+  uint def_bufsz= *bufsz;
+  /* Get cost/flags/mem_usage of default MRR implementation */
+  rows= h->handler::multi_range_read_info_const(keyno, seq, seq_init_param,
+                                                n_ranges, &def_bufsz, 
+                                                &def_flags, cost);
+  if (rows == HA_POS_ERROR)
+  {
+    /* Default implementation can't perform MRR scan => we can't either */
+    return rows;
+  }
+
+  /*
+    If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to
+    use the default MRR implementation (we need it for UPDATE/DELETE).
+    Otherwise, make a choice based on cost and @@optimizer_use_mrr.
+  */
+  if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
+      choose_mrr_impl(keyno, rows, flags, bufsz, cost))
+  {
+    DBUG_PRINT("info", ("Default MRR implementation choosen"));
+    *flags= def_flags;
+    *bufsz= def_bufsz;
+  }
+  else
+  {
+    /* *flags and *bufsz were set by choose_mrr_impl */
+    DBUG_PRINT("info", ("DS-MRR implementation choosen"));
+  }
+  return rows;
+}
+
+
+/**
+  Check if key has partially-covered columns
+
+  We can't use DS-MRR to perform range scans when the ranges are over
+  partially-covered keys, because we'll not have full key part values
+  (we'll have their prefixes from the index) and will not be able to check
+  if we've reached the end the range.
+
+  @param keyno  Key to check
+
+  @todo
+    Allow use of DS-MRR in cases where the index has partially-covered
+    components but they are not used for scanning.
+
+  @retval TRUE   Yes
+  @retval FALSE  No
+*/
+
+bool key_uses_partial_cols(TABLE *table, uint keyno)
+{
+  KEY_PART_INFO *kp= table->key_info[keyno].key_part;
+  KEY_PART_INFO *kp_end= kp + table->key_info[keyno].key_parts;
+  for (; kp != kp_end; kp++)
+  {
+    if (!kp->field->part_of_key.is_set(keyno))
+      return TRUE;
+  }
+  return FALSE;
+}
+
+/**
+  DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
+
+  Make the choice between using Default MRR implementation and DS-MRR.
+  This function contains common functionality factored out of dsmrr_info()
+  and dsmrr_info_const(). The function assumes that the default MRR
+  implementation's applicability requirements are satisfied.
+
+  @param keyno       Index number
+  @param rows        E(full rows to be retrieved)
+  @param flags  IN   MRR flags provided by the MRR user
+                OUT  If DS-MRR is choosen, flags of DS-MRR implementation
+                     else the value is not modified
+  @param bufsz  IN   If DS-MRR is choosen, buffer use of DS-MRR implementation
+                     else the value is not modified
+  @param cost   IN   Cost of default MRR implementation
+                OUT  If DS-MRR is choosen, cost of DS-MRR scan
+                     else the value is not modified
+
+  @retval TRUE   Default MRR implementation should be used
+  @retval FALSE  DS-MRR implementation should be used
+*/
+
+bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
+                                 uint *bufsz, COST_VECT *cost)
+{
+  COST_VECT dsmrr_cost;
+  bool res;
+  THD *thd= current_thd;
+  if (thd->variables.optimizer_use_mrr == 2 || *flags & HA_MRR_INDEX_ONLY ||
+      (keyno == table->s->primary_key && h->primary_key_is_clustered()) ||
+       key_uses_partial_cols(table, keyno))
+  {
+    /* Use the default implementation */
+    *flags |= HA_MRR_USE_DEFAULT_IMPL;
+    return TRUE;
+  }
+  
+  uint add_len= table->key_info[keyno].key_length + h->ref_length; 
+  *bufsz -= add_len;
+  if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost))
+    return TRUE;
+  *bufsz += add_len;
+  
+  bool force_dsmrr;
+  /* 
+    If @@optimizer_use_mrr==force, then set cost of DS-MRR to be minimum of
+    DS-MRR and Default implementations cost. This allows one to force use of
+    DS-MRR whenever it is applicable without affecting other cost-based
+    choices.
+  */
+  if ((force_dsmrr= (thd->variables.optimizer_use_mrr == 1)) &&
+      dsmrr_cost.total_cost() > cost->total_cost())
+    dsmrr_cost= *cost;
+
+  if (force_dsmrr || dsmrr_cost.total_cost() <= cost->total_cost())
+  {
+    *flags &= ~HA_MRR_USE_DEFAULT_IMPL;  /* Use the DS-MRR implementation */
+    *flags &= ~HA_MRR_SORTED;          /* We will return unordered output */
+    *cost= dsmrr_cost;
+    res= FALSE;
+  }
+  else
+  {
+    /* Use the default MRR implementation */
+    res= TRUE;
+  }
+  return res;
+}
+
+
+static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, COST_VECT *cost);
+
+
+/**
+  Get cost of DS-MRR scan
+
+  @param keynr              Index to be used
+  @param rows               E(Number of rows to be scanned)
+  @param flags              Scan parameters (HA_MRR_* flags)
+  @param buffer_size INOUT  Buffer size
+  @param cost        OUT    The cost
+
+  @retval FALSE  OK
+  @retval TRUE   Error, DS-MRR cannot be used (the buffer is too small
+                 for even 1 rowid)
+*/
+
+bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
+                                         uint *buffer_size, COST_VECT *cost)
+{
+  ulong max_buff_entries, elem_size;
+  ha_rows rows_in_full_step, rows_in_last_step;
+  uint n_full_steps;
+  double index_read_cost;
+
+  elem_size= h->ref_length + sizeof(void*) * (!test(flags & HA_MRR_NO_ASSOCIATION));
+  max_buff_entries = *buffer_size / elem_size;
+
+  if (!max_buff_entries)
+    return TRUE; /* Buffer has not enough space for even 1 rowid */
+
+  /* Number of iterations we'll make with full buffer */
+  n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries);
+  
+  /* 
+    Get numbers of rows we'll be processing in 
+     - non-last sweep, with full buffer 
+     - last iteration, with non-full buffer
+  */
+  rows_in_full_step= max_buff_entries;
+  rows_in_last_step= rows % max_buff_entries;
+  
+  /* Adjust buffer size if we expect to use only part of the buffer */
+  if (n_full_steps)
+  {
+    get_sort_and_sweep_cost(table, rows, cost);
+    cost->multiply(n_full_steps);
+  }
+  else
+  {
+    cost->zero();
+    *buffer_size= max(*buffer_size, 
+                      (size_t)(1.2*rows_in_last_step) * elem_size + 
+                      h->ref_length + table->key_info[keynr].key_length);
+  }
+  
+  COST_VECT last_step_cost;
+  get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost);
+  cost->add(&last_step_cost);
+ 
+  if (n_full_steps != 0)
+    cost->mem_cost= *buffer_size;
+  else
+    cost->mem_cost= (double)rows_in_last_step * elem_size;
+  
+  /* Total cost of all index accesses */
+  index_read_cost= h->keyread_time(keynr, 1, (double)rows);
+  cost->add_io(index_read_cost, 1 /* Random seeks */);
+  return FALSE;
+}
+
+
+/* 
+  Get cost of one sort-and-sweep step
+
+  SYNOPSIS
+    get_sort_and_sweep_cost()
+      table       Table being accessed
+      nrows       Number of rows to be sorted and retrieved
+      cost   OUT  The cost
+
+  DESCRIPTION
+    Get cost of these operations:
+     - sort an array of #nrows ROWIDs using qsort
+     - read #nrows records from table in a sweep.
+*/
+
+static 
+void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, COST_VECT *cost)
+{
+  if (nrows)
+  {
+    get_sweep_read_cost(table, nrows, FALSE, cost);
+    /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */
+    double cmp_op= rows2double(nrows) * (1.0 / TIME_FOR_COMPARE_ROWID);
+    if (cmp_op < 3)
+      cmp_op= 3;
+    cost->cpu_cost += cmp_op * log2(cmp_op);
+  }
+  else
+    cost->zero();
+}
+
+
+/**
+  Get cost of reading nrows table records in a "disk sweep"
+
+  A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made
+  for an ordered sequence of rowids.
+
+  We assume hard disk IO. The read is performed as follows:
+
+   1. The disk head is moved to the needed cylinder
+   2. The controller waits for the plate to rotate
+   3. The data is transferred
+
+  Time to do #3 is insignificant compared to #2+#1.
+
+  Time to move the disk head is proportional to head travel distance.
+
+  Time to wait for the plate to rotate depends on whether the disk head
+  was moved or not. 
+
+  If disk head wasn't moved, the wait time is proportional to distance
+  between the previous block and the block we're reading.
+
+  If the head was moved, we don't know how much we'll need to wait for the
+  plate to rotate. We assume the wait time to be a variate with a mean of
+  0.5 of full rotation time.
+
+  Our cost units are "random disk seeks". The cost of random disk seek is
+  actually not a constant, it depends one range of cylinders we're going
+  to access. We make it constant by introducing a fuzzy concept of "typical 
+  datafile length" (it's fuzzy as it's hard to tell whether it should
+  include index file, temp.tables etc). Then random seek cost is:
+
+    1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length
+
+  We define half_rotation_cost as DISK_SEEK_BASE_COST=0.9.
+
+  @param table             Table to be accessed
+  @param nrows             Number of rows to retrieve
+  @param interrupted       TRUE <=> Assume that the disk sweep will be
+                           interrupted by other disk IO. FALSE - otherwise.
+  @param cost         OUT  The cost.
+*/
+
+void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, 
+                         COST_VECT *cost)
+{
+  DBUG_ENTER("get_sweep_read_cost");
+
+  cost->zero();
+  if (table->file->primary_key_is_clustered())
+  {
+    cost->io_count= table->file->read_time(table->s->primary_key,
+                                           (uint) nrows, nrows);
+  }
+  else
+  {
+    double n_blocks=
+      ceil(ulonglong2double(table->file->stats.data_file_length) / IO_SIZE);
+    double busy_blocks=
+      n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(nrows)));
+    if (busy_blocks < 1.0)
+      busy_blocks= 1.0;
+
+    DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks,
+                       busy_blocks));
+    cost->io_count= busy_blocks;
+
+    if (!interrupted)
+    {
+      /* Assume reading is done in one 'sweep' */
+      cost->avg_io_cost= (DISK_SEEK_BASE_COST +
+                          DISK_SEEK_PROP_COST*n_blocks/busy_blocks);
+    }
+  }
+  DBUG_PRINT("info",("returning cost=%g", cost->total_cost()));
+  DBUG_VOID_RETURN;
+}
+
+
+/* **************************************************************************
+ * DS-MRR implementation ends
+ ***************************************************************************/
+
+
diff --git a/sql/multi_range_read.h b/sql/multi_range_read.h
new file mode 100644
index 00000000000..90e2e4c93d6
--- /dev/null
+++ b/sql/multi_range_read.h
@@ -0,0 +1,70 @@
+/*
+  This file contains declarations for 
+   - Disk-Sweep MultiRangeRead (DS-MRR) implementation
+*/
+
+/**
+  A Disk-Sweep MRR interface implementation
+
+  This implementation makes range (and, in the future, 'ref') scans to read
+  table rows in disk sweeps. 
+  
+  Currently it is used by MyISAM and InnoDB. Potentially it can be used with
+  any table handler that has non-clustered indexes and on-disk rows.
+*/
+
+class DsMrr_impl
+{
+public:
+  typedef void (handler::*range_check_toggle_func_t)(bool on);
+
+  DsMrr_impl()
+    : h2(NULL) {};
+  
+  /*
+    The "owner" handler object (the one that calls dsmrr_XXX functions.
+    It is used to retrieve full table rows by calling rnd_pos().
+  */
+  handler *h;
+  TABLE *table; /* Always equal to h->table */
+private:
+  /* Secondary handler object.  It is used for scanning the index */
+  handler *h2;
+
+  /* Buffer to store rowids, or (rowid, range_id) pairs */
+  uchar *rowids_buf;
+  uchar *rowids_buf_cur;   /* Current position when reading/writing */
+  uchar *rowids_buf_last;  /* When reading: end of used buffer space */
+  uchar *rowids_buf_end;   /* End of the buffer */
+
+  bool dsmrr_eof; /* TRUE <=> We have reached EOF when reading index tuples */
+
+  /* TRUE <=> need range association, buffer holds {rowid, range_id} pairs */
+  bool is_mrr_assoc;
+
+  bool use_default_impl; /* TRUE <=> shortcut all calls to default MRR impl */
+public:
+  void init(handler *h_arg, TABLE *table_arg)
+  {
+    h= h_arg; 
+    table= table_arg;
+  }
+  int dsmrr_init(handler *h, RANGE_SEQ_IF *seq_funcs, void *seq_init_param, 
+                 uint n_ranges, uint mode, HANDLER_BUFFER *buf);
+  void dsmrr_close();
+  int dsmrr_fill_buffer();
+  int dsmrr_next(char **range_info);
+
+  ha_rows dsmrr_info(uint keyno, uint n_ranges, uint keys, uint *bufsz,
+                     uint *flags, COST_VECT *cost);
+
+  ha_rows dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq, 
+                            void *seq_init_param, uint n_ranges, uint *bufsz,
+                            uint *flags, COST_VECT *cost);
+private:
+  bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz, 
+                       COST_VECT *cost);
+  bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, 
+                               uint *buffer_size, COST_VECT *cost);
+};
+
diff --git a/sql/my_decimal.cc b/sql/my_decimal.cc
index 8a31cec6721..72505054902 100644
--- a/sql/my_decimal.cc
+++ b/sql/my_decimal.cc
@@ -41,7 +41,7 @@ int decimal_operation_results(int result)
   case E_DEC_TRUNCATED:
     push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
 			WARN_DATA_TRUNCATED, ER(WARN_DATA_TRUNCATED),
-			"", (long)-1);
+			"", (ulong) 0);
     break;
   case E_DEC_OVERFLOW:
     push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
@@ -57,7 +57,7 @@ int decimal_operation_results(int result)
     push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
 			ER_TRUNCATED_WRONG_VALUE_FOR_FIELD,
 			ER(ER_TRUNCATED_WRONG_VALUE_FOR_FIELD),
-			"decimal", "", "", (long)-1);
+			"decimal", "", "", (ulong) 0);
     break;
   case E_DEC_OOM:
     my_error(ER_OUT_OF_RESOURCES, MYF(0));
@@ -305,12 +305,12 @@ print_decimal(const my_decimal *dec)
   int i, end;
   char buff[512], *pos;
   pos= buff;
-  pos+= sprintf(buff, "Decimal: sign: %d  intg: %d  frac: %d  { ",
-                dec->sign(), dec->intg, dec->frac);
+  pos+= my_sprintf(buff, (buff, "Decimal: sign: %d  intg: %d  frac: %d  { ",
+                          dec->sign(), dec->intg, dec->frac));
   end= ROUND_UP(dec->frac)+ROUND_UP(dec->intg)-1;
   for (i=0; i < end; i++)
-    pos+= sprintf(pos, "%09d, ", dec->buf[i]);
-  pos+= sprintf(pos, "%09d }\n", dec->buf[i]);
+    pos+= my_sprintf(pos, (pos, "%09d, ", dec->buf[i]));
+  pos+= my_sprintf(pos, (pos, "%09d }\n", dec->buf[i]));
   fputs(buff, DBUG_FILE);
 }
 
diff --git a/sql/my_decimal.h b/sql/my_decimal.h
index abf4b178422..330ded0c272 100644
--- a/sql/my_decimal.h
+++ b/sql/my_decimal.h
@@ -106,7 +106,7 @@ public:
   {
     len= DECIMAL_BUFF_LENGTH;
     buf= buffer;
-#if !defined (HAVE_purify) && !defined(DBUG_OFF)
+#if !defined (HAVE_valgrind) && !defined(DBUG_OFF)
     /* Set buffer to 'random' value to find wrong buffer usage */
     for (uint i= 0; i < DECIMAL_BUFF_LENGTH; i++)
       buffer[i]= i;
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index af98ed91adf..658ec2b1bd9 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -1,4 +1,5 @@
 /* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+   2009-2010 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -50,6 +51,9 @@
 #include "derror.h"       // init_errmessage
 #include "des_key_file.h" // load_des_key_file
 #include "sql_manager.h"  // stop_handle_manager, start_handle_manager
+
+#define DEFINE_VARIABLES_LOG_SLOW          // Declare variables in log_slow.h
+
 #include <m_ctype.h>
 #include <my_dir.h>
 #include <my_bit.h>
@@ -57,6 +61,7 @@
 #include "rpl_mi.h"
 #include "sql_repl.h"
 #include "rpl_filter.h"
+#include "client_settings.h"
 #include "repl_failsafe.h"
 #include <my_stacktrace.h>
 #include "mysqld_suffix.h"
@@ -65,6 +70,7 @@
 #include "sql_audit.h"
 #include "probes_mysql.h"
 #include "scheduler.h"
+#include <waiting_threads.h>
 #include "debug_sync.h"
 #include "sql_callback.h"
 
@@ -104,9 +110,9 @@
 char pstack_file_name[80];
 #endif /* __linux__ */
 
-/* We have HAVE_purify below as this speeds up the shutdown of MySQL */
+/* We have HAVE_valgrind below as this speeds up the shutdown of MySQL */
 
-#if defined(HAVE_DEC_3_2_THREADS) || defined(SIGNALS_DONT_BREAK_READ) || defined(HAVE_purify) && defined(__linux__)
+#if defined(HAVE_DEC_3_2_THREADS) || defined(SIGNALS_DONT_BREAK_READ) || defined(HAVE_valgrind) && defined(__linux__)
 #define HAVE_CLOSE_SERVER_SOCK 1
 #endif
 
@@ -195,6 +201,10 @@ typedef fp_except fp_except_t;
 # endif
 #endif
 
+#ifndef HAVE_FCNTL
+#define fcntl(X,Y,Z) 0
+#endif
+
 extern "C" my_bool reopen_fstreams(const char *filename,
                                    FILE *outstream, FILE *errstream);
 
@@ -283,6 +293,7 @@ static TYPELIB tc_heuristic_recover_typelib=
   tc_heuristic_recover_names, NULL
 };
 
+const int server_maturity= MariaDB_PLUGIN_MATURITY_UNKNOWN;
 const char *first_keyword= "first", *binary_keyword= "BINARY";
 const char *my_localhost= "localhost", *delayed_user= "DELAYED";
 
@@ -331,8 +342,8 @@ static bool volatile select_thread_in_use, signal_thread_in_use;
 static bool volatile ready_to_exit;
 static my_bool opt_debugging= 0, opt_external_locking= 0, opt_console= 0;
 static my_bool opt_short_log_format= 0;
+static my_bool opt_sync= 0;
 static uint kill_cached_threads, wake_thread;
-static ulong killed_threads;
 static ulong max_used_connections;
 static volatile ulong cached_thread_count= 0;
 static char *mysqld_user, *mysqld_chroot;
@@ -346,16 +357,17 @@ char *default_storage_engine;
 static char compiled_default_collation_name[]= MYSQL_DEFAULT_COLLATION_NAME;
 static I_List<THD> thread_cache;
 static bool binlog_format_used= false;
-
 LEX_STRING opt_init_connect, opt_init_slave;
-
 static mysql_cond_t COND_thread_cache, COND_flush_thread_cache;
 
+static ulong opt_my_crc_dbug_check;
+
 /* Global variables */
 
 bool opt_bin_log, opt_ignore_builtin_innodb= 0;
 my_bool opt_log, opt_slow_log;
 ulonglong log_output_options;
+my_bool opt_userstat_running;
 my_bool opt_log_queries_not_using_indexes= 0;
 bool opt_error_log= IF_WIN(1,0);
 bool opt_disable_networking=0, opt_skip_show_db=0;
@@ -386,7 +398,7 @@ bool in_bootstrap= FALSE;
    @brief 'grant_option' is used to indicate if privileges needs
    to be checked, in which case the lock, LOCK_grant, is used
    to protect access to the grant table.
-   @note This flag is dropped in 5.1 
+   @note This flag is dropped in 5.1
    @see grant_init()
  */
 bool volatile grant_option;
@@ -444,6 +456,7 @@ const char *binlog_format_names[]= {"MIXED", "STATEMENT", "ROW", NullS};
 static bool calling_initgroups= FALSE; /**< Used in SIGSEGV handler. */
 #endif
 uint mysqld_port, test_flags, select_errors, dropping_tables, ha_open_options;
+uint mysqld_extra_port;
 uint mysqld_port_timeout;
 ulong delay_key_write_options;
 uint protocol_version;
@@ -476,6 +489,8 @@ ulong delayed_insert_errors,flush_time;
 ulong specialflag=0;
 ulong binlog_cache_use= 0, binlog_cache_disk_use= 0;
 ulong max_connections, max_connect_errors;
+ulong extra_max_connections;
+ulonglong denied_connections;
 /**
   Limit of the total number of prepared statements in the server.
   Is necessary to protect the server against out-of-memory attacks.
@@ -615,6 +630,10 @@ mysql_mutex_t
   LOCK_global_system_variables,
   LOCK_user_conn, LOCK_slave_list, LOCK_active_mi,
   LOCK_connection_count, LOCK_error_messages;
+
+mysql_mutex_t LOCK_stats, LOCK_global_user_client_stats,
+              LOCK_global_table_stats, LOCK_global_index_stats;
+
 /**
   The below lock protects access to two global server variables:
   max_prepared_stmt_count and prepared_stmt_count. These variables
@@ -841,13 +860,12 @@ C_MODE_END
 #endif /* !EMBEDDED_LIBRARY */
 #endif /* WITH_PERFSCHEMA_STORAGE_ENGINE */
 
-static my_socket unix_sock,ip_sock;
-struct rand_struct sql_rand; ///< used by sql_class.cc:THD::THD()
+static my_socket unix_sock, base_ip_sock, extra_ip_sock;
+struct my_rnd_struct sql_rand; ///< used by sql_class.cc:THD::THD()
 
 #ifndef EMBEDDED_LIBRARY
 struct passwd *user_info;
 static pthread_t select_thread;
-static uint thr_kill_signal;
 #endif
 
 /* OS specific variables */
@@ -886,7 +904,7 @@ bool mysqld_embedded=1;
 static my_bool plugins_are_initialized= FALSE;
 
 #ifndef DBUG_OFF
-static const char* default_dbug_option;
+static const char* default_dbug_option, *current_dbug_option;
 #endif
 #ifdef HAVE_LIBWRAP
 const char *libwrapName= NULL;
@@ -907,6 +925,8 @@ my_bool opt_use_ssl  = 0;
 char *opt_ssl_ca= NULL, *opt_ssl_capath= NULL, *opt_ssl_cert= NULL,
      *opt_ssl_cipher= NULL, *opt_ssl_key= NULL;
 
+scheduler_functions *thread_scheduler, *extra_thread_scheduler;
+
 #ifdef HAVE_OPENSSL
 #include <openssl/crypto.h>
 #ifndef HAVE_YASSL
@@ -932,7 +952,7 @@ struct st_VioSSLFd *ssl_acceptor_fd;
   Number of currently active user connections. The variable is protected by
   LOCK_connection_count.
 */
-uint connection_count= 0;
+uint connection_count= 0, extra_connection_count= 0;
 
 /* Function declarations */
 
@@ -961,6 +981,7 @@ pthread_handler_t handle_connections_shared_memory(void *arg);
 pthread_handler_t handle_slave(void *arg);
 static void clean_up(bool print_message);
 static int test_if_case_insensitive(const char *dir_name);
+static void register_mutex_order();
 
 #ifndef EMBEDDED_LIBRARY
 static void usage(void);
@@ -1016,7 +1037,7 @@ static void close_connections(void)
 	break;
     }
 #ifdef EXTRA_DEBUG
-    if (error != 0 && !count++)
+    if (error != 0 && error != ETIMEDOUT && !count++)
       sql_print_error("Got error %d from mysql_cond_timedwait", error);
 #endif
     close_server_sock();
@@ -1029,11 +1050,17 @@ static void close_connections(void)
   DBUG_PRINT("quit",("Closing sockets"));
   if (!opt_disable_networking )
   {
-    if (ip_sock != INVALID_SOCKET)
+    if (base_ip_sock != INVALID_SOCKET)
+    {
+      (void) shutdown(base_ip_sock, SHUT_RDWR);
+      (void) closesocket(base_ip_sock);
+      base_ip_sock= INVALID_SOCKET;
+    }
+    if (extra_ip_sock != INVALID_SOCKET)
     {
-      (void) shutdown(ip_sock, SHUT_RDWR);
-      (void) closesocket(ip_sock);
-      ip_sock= INVALID_SOCKET;
+      (void) shutdown(extra_ip_sock, SHUT_RDWR);
+      (void) closesocket(extra_ip_sock);
+      extra_ip_sock= INVALID_SOCKET;
     }
   }
 #ifdef _WIN32
@@ -1090,7 +1117,7 @@ static void close_connections(void)
       continue;
 
     tmp->killed= THD::KILL_CONNECTION;
-    MYSQL_CALLBACK(thread_scheduler, post_kill_notification, (tmp));
+    thread_scheduler.post_kill_notification(tmp);
     mysql_mutex_lock(&tmp->LOCK_thd_data);
     if (tmp->mysys_var)
     {
@@ -1101,6 +1128,25 @@ static void close_connections(void)
         mysql_mutex_lock(tmp->mysys_var->current_mutex);
         mysql_cond_broadcast(tmp->mysys_var->current_cond);
         mysql_mutex_unlock(tmp->mysys_var->current_mutex);
+#error search for mysys_var->mutex / mysys_var->current_mutex lock order
+#error or/add safe_mutex_assert every time
+#if 0
+=======
+        uint i;
+        for (i=0; i < 2; i++)
+        {
+          int ret= pthread_mutex_trylock(tmp->mysys_var->current_mutex);
+          pthread_cond_broadcast(tmp->mysys_var->current_cond);
+          if (!ret)
+          {
+            /* Thread has surely got the signal, unlock and abort */
+            pthread_mutex_unlock(tmp->mysys_var->current_mutex);
+            break;
+          }
+          sleep(1);
+        }
+>>>>>>> MERGE-SOURCE
+#endif
       }
       mysql_mutex_unlock(&tmp->mysys_var->mutex);
     }
@@ -1160,26 +1206,42 @@ static void close_connections(void)
 }
 
 
+#ifdef HAVE_CLOSE_SERVER_SOCK
+static void close_socket(my_socket sock, const char *info)
+{
+  DBUG_ENTER("close_socket");
+
+  if (sock != INVALID_SOCKET)
+  {
+    DBUG_PRINT("info", ("calling shutdown on %s socket", info));
+    (void) shutdown(sock, SHUT_RDWR);
+#if defined(__NETWARE__)
+    /*
+      The following code is disabled for normal systems as it causes MySQL
+      to hang on AIX 4.3 during shutdown
+    */
+    DBUG_PRINT("info", ("calling closesocket on %s socket", info));
+    (void) closesocket(tmp_sock);
+#endif
+  }
+  DBUG_VOID_RETURN;
+}
+#endif
+
+
 static void close_server_sock()
 {
 #ifdef HAVE_CLOSE_SERVER_SOCK
   DBUG_ENTER("close_server_sock");
-  my_socket tmp_sock;
-  tmp_sock=ip_sock;
-  if (tmp_sock != INVALID_SOCKET)
-  {
-    ip_sock=INVALID_SOCKET;
-    DBUG_PRINT("info",("calling shutdown on TCP/IP socket"));
-    (void) shutdown(tmp_sock, SHUT_RDWR);
-  }
-  tmp_sock=unix_sock;
-  if (tmp_sock != INVALID_SOCKET)
-  {
-    unix_sock=INVALID_SOCKET;
-    DBUG_PRINT("info",("calling shutdown on unix socket"));
-    (void) shutdown(tmp_sock, SHUT_RDWR);
+
+  close_socket(base_ip_sock, "TCP/IP");
+  close_socket(extra_ip_sock, "TCP/IP");
+  close_socket(unix_sock, "unix/IP");
+
+  if (unix_sock != INVALID_SOCKET)
     (void) unlink(mysqld_unix_port);
-  }
+  base_ip_sock= extra_ip_sock= unix_sock= INVALID_SOCKET;
+
   DBUG_VOID_RETURN;
 #endif
 }
@@ -1272,18 +1334,21 @@ static void __cdecl kill_server(int sig_ptr)
   else
     sql_print_error(ER_DEFAULT(ER_GOT_SIGNAL),my_progname,sig); /* purecov: inspected */
 
-#if defined(HAVE_SMEM) && defined(__WIN__)    
-  /*    
-   Send event to smem_event_connect_request for aborting    
-   */    
-  if (!SetEvent(smem_event_connect_request))    
-  {      
-	  DBUG_PRINT("error",
-		("Got error: %ld from SetEvent of smem_event_connect_request",
-		 GetLastError()));    
-  }
-#endif  
-  
+#if defined(HAVE_SMEM) && defined(__WIN__)
+  /*
+   Send event to smem_event_connect_request for aborting
+   */
+  if (opt_enable_shared_memory)
+  {
+    if (!SetEvent(smem_event_connect_request))
+    {
+      DBUG_PRINT("error",
+                 ("Got error: %ld from SetEvent of smem_event_connect_request",
+                  GetLastError()));
+    }
+  }
+#endif
+
   close_connections();
   if (sig != MYSQL_KILL_SIGNAL &&
       sig != 0)
@@ -1460,7 +1525,9 @@ void clean_up(bool print_message)
   table_def_free();
   mdl_destroy();
   key_caches.delete_elements((void (*)(const char*, uchar*)) free_key_cache);
+  wt_end();
   multi_keycache_free();
+  sp_cache_end();
   free_status_vars();
   end_thr_alarm(1);			/* Free allocated memory */
   my_free_open_file_info();
@@ -1470,6 +1537,10 @@ void clean_up(bool print_message)
   my_free(opt_bin_logname);
   bitmap_free(&temp_pool);
   free_max_user_conn();
+  free_global_user_stats();
+  free_global_client_stats();
+  free_global_table_stats();
+  free_global_index_stats();
 #ifdef HAVE_REPLICATION
   end_slave_list();
 #endif
@@ -1490,7 +1561,8 @@ void clean_up(bool print_message)
   if (print_message && my_default_lc_messages && server_start_time)
     sql_print_information(ER_DEFAULT(ER_SHUTDOWN_COMPLETE),my_progname);
   cleanup_errmsgs();
-  MYSQL_CALLBACK(thread_scheduler, end, ());
+  thread_scheduler.end();
+  mysql_library_end();
   finish_client_errs();
   (void) my_error_unregister(ER_ERROR_FIRST, ER_ERROR_LAST); // finish server errs
   DBUG_PRINT("quit", ("Error messages freed"));
@@ -1538,6 +1610,7 @@ static void wait_for_signal_thread_to_end()
 
 static void clean_up_mutexes()
 {
+  DBUG_ENTER("clean_up_mutexes");
   mysql_rwlock_destroy(&LOCK_grant);
   mysql_mutex_destroy(&LOCK_thread_count);
   mysql_mutex_destroy(&LOCK_status);
@@ -1548,6 +1621,10 @@ static void clean_up_mutexes()
   mysql_mutex_destroy(&LOCK_crypt);
   mysql_mutex_destroy(&LOCK_user_conn);
   mysql_mutex_destroy(&LOCK_connection_count);
+  mysql_mutex_destroy(&LOCK_stats);
+  mysql_mutex_destroy(&LOCK_global_user_client_stats);
+  mysql_mutex_destroy(&LOCK_global_table_stats);
+  mysql_mutex_destroy(&LOCK_global_index_stats);
   Events::destroy_mutexes();
 #ifdef HAVE_OPENSSL
   mysql_mutex_destroy(&LOCK_des_key_file);
@@ -1575,10 +1652,39 @@ static void clean_up_mutexes()
   mysql_cond_destroy(&COND_thread_cache);
   mysql_cond_destroy(&COND_flush_thread_cache);
   mysql_cond_destroy(&COND_manager);
+  mysql_mutex_destroy(&LOCK_server_started);
+  mysql_cond_destroy(&COND_server_started);
+  DBUG_VOID_RETURN;
 }
 #endif /*EMBEDDED_LIBRARY*/
 
 
+/**
+   Register order of mutex for wrong mutex deadlock detector
+
+   By aquiring all mutex in order here, the mutex order detector in
+   mysys/thr_mutex.c, will give a warning on first wrong mutex usage!
+*/
+
+#ifdef SAFE_MUTEX
+#define always_in_that_order(A,B)               \
+  pthread_mutex_lock(A); pthread_mutex_lock(B); \
+  pthread_mutex_unlock(B); pthread_mutex_unlock(A)
+#else
+#define always_in_that_order(A,B)
+#endif
+
+static void register_mutex_order()
+{
+  /*
+    We must have LOCK_open before LOCK_global_system_variables because
+    LOCK_open is hold while sql_plugin.c::intern_sys_var_ptr() is called.
+  */
+  always_in_that_order(&LOCK_open, &LOCK_global_system_variables);
+}
+#undef always_in_that_order
+
+
 /****************************************************************************
 ** Init IP and UNIX socket
 ****************************************************************************/
@@ -1749,127 +1855,135 @@ static void set_root(const char *path)
 #endif
 }
 
+/**
+   Activate usage of a tcp port
+*/
 
-static void network_init(void)
+static my_socket activate_tcp_port(uint port)
 {
-#ifdef HAVE_SYS_UN_H
-  struct sockaddr_un	UNIXaddr;
-#endif
-  int	arg;
-  int   ret;
-  uint  waited;
-  uint  this_wait;
-  uint  retry;
-  char port_buf[NI_MAXSERV];
-  DBUG_ENTER("network_init");
-  LINT_INIT(ret);
-
-  if (MYSQL_CALLBACK_ELSE(thread_scheduler, init, (), 0))
-    unireg_abort(1);			/* purecov: inspected */
+  struct addrinfo *ai, *a;
+  struct addrinfo hints;
+  int error;
+  DBUG_PRINT("general",("IP Socket is %d",mysqld_port));
 
-  set_ports();
+  bzero(&hints, sizeof (hints));
+  hints.ai_flags= AI_PASSIVE;
+  hints.ai_socktype= SOCK_STREAM;
+  hints.ai_family= AF_UNSPEC;
 
-  if (mysqld_port != 0 && !opt_disable_networking && !opt_bootstrap)
+  my_snprintf(port_buf, NI_MAXSERV, "%d", mysqld_port);
+  error= getaddrinfo(my_bind_addr_str, port_buf, &hints, &ai);
+  if (error != 0)
   {
-    struct addrinfo *ai, *a;
-    struct addrinfo hints;
-    int error;
-    DBUG_PRINT("general",("IP Socket is %d",mysqld_port));
-
-    bzero(&hints, sizeof (hints));
-    hints.ai_flags= AI_PASSIVE;
-    hints.ai_socktype= SOCK_STREAM;
-    hints.ai_family= AF_UNSPEC;
-
-    my_snprintf(port_buf, NI_MAXSERV, "%d", mysqld_port);
-    error= getaddrinfo(my_bind_addr_str, port_buf, &hints, &ai);
-    if (error != 0)
-    {
-      DBUG_PRINT("error",("Got error: %d from getaddrinfo()", error));
-      sql_perror(ER_DEFAULT(ER_IPSOCK_ERROR));  /* purecov: tested */
-      unireg_abort(1);				/* purecov: tested */
-    }
+    DBUG_PRINT("error",("Got error: %d from getaddrinfo()", error));
+    sql_perror(ER_DEFAULT(ER_IPSOCK_ERROR));  /* purecov: tested */
+    unireg_abort(1);				/* purecov: tested */
+  }
 
-    for (a= ai; a != NULL; a= a->ai_next)
-    {
-      ip_sock= socket(a->ai_family, a->ai_socktype, a->ai_protocol);
-      if (ip_sock != INVALID_SOCKET)
-        break;
-    }
+  for (a= ai; a != NULL; a= a->ai_next)
+  {
+    ip_sock= socket(a->ai_family, a->ai_socktype, a->ai_protocol);
+    if (ip_sock != INVALID_SOCKET)
+      break;
+  }
 
-    if (ip_sock == INVALID_SOCKET)
-    {
-      DBUG_PRINT("error",("Got error: %d from socket()",socket_errno));
-      sql_perror(ER_DEFAULT(ER_IPSOCK_ERROR));  /* purecov: tested */
-      unireg_abort(1);				/* purecov: tested */
-    }
+  if (ip_sock == INVALID_SOCKET)
+  {
+    DBUG_PRINT("error",("Got error: %d from socket()",socket_errno));
+    sql_perror(ER_DEFAULT(ER_IPSOCK_ERROR));  /* purecov: tested */
+    unireg_abort(1);				/* purecov: tested */
+  }
 
 #ifndef __WIN__
-    /*
-      We should not use SO_REUSEADDR on windows as this would enable a
-      user to open two mysqld servers with the same TCP/IP port.
-    */
-    arg= 1;
-    (void) setsockopt(ip_sock,SOL_SOCKET,SO_REUSEADDR,(char*)&arg,sizeof(arg));
+  /*
+    We should not use SO_REUSEADDR on windows as this would enable a
+    user to open two mysqld servers with the same TCP/IP port.
+  */
+  arg= 1;
+  (void) setsockopt(ip_sock,SOL_SOCKET,SO_REUSEADDR,(char*)&arg,sizeof(arg));
 #endif /* __WIN__ */
 
 #ifdef IPV6_V6ONLY
-     /*
-       For interoperability with older clients, IPv6 socket should
-       listen on both IPv6 and IPv4 wildcard addresses.
-       Turn off IPV6_V6ONLY option.
+   /*
+     For interoperability with older clients, IPv6 socket should
+     listen on both IPv6 and IPv4 wildcard addresses.
+     Turn off IPV6_V6ONLY option.
 
-       NOTE: this will work starting from Windows Vista only.
-       On Windows XP dual stack is not available, so it will not
-       listen on the corresponding IPv4-address.
-     */
-    if (a->ai_family == AF_INET6)
-    {
-      arg= 0;
-      (void) setsockopt(ip_sock, IPPROTO_IPV6, IPV6_V6ONLY, (char*)&arg,
-                sizeof(arg));
-    }
+     NOTE: this will work starting from Windows Vista only.
+     On Windows XP dual stack is not available, so it will not
+     listen on the corresponding IPv4-address.
+   */
+  if (a->ai_family == AF_INET6)
+  {
+    arg= 0;
+    (void) setsockopt(ip_sock, IPPROTO_IPV6, IPV6_V6ONLY, (char*)&arg,
+              sizeof(arg));
+  }
 #endif
-    /*
-      Sometimes the port is not released fast enough when stopping and
-      restarting the server. This happens quite often with the test suite
-      on busy Linux systems. Retry to bind the address at these intervals:
-      Sleep intervals: 1, 2, 4,  6,  9, 13, 17, 22, ...
-      Retry at second: 1, 3, 7, 13, 22, 35, 52, 74, ...
-      Limit the sequence by mysqld_port_timeout (set --port-open-timeout=#).
-    */
-    for (waited= 0, retry= 1; ; retry++, waited+= this_wait)
-    {
-      if (((ret= bind(ip_sock, a->ai_addr, a->ai_addrlen)) >= 0 ) ||
-          (socket_errno != SOCKET_EADDRINUSE) ||
-          (waited >= mysqld_port_timeout))
-        break;
-      sql_print_information("Retrying bind on TCP/IP port %u", mysqld_port);
-      this_wait= retry * retry / 3 + 1;
-      sleep(this_wait);
-    }
-    freeaddrinfo(ai);
-    if (ret < 0)
-    {
-      DBUG_PRINT("error",("Got error: %d from bind",socket_errno));
-      sql_perror("Can't start server: Bind on TCP/IP port");
-      sql_print_error("Do you already have another mysqld server running on port: %d ?",mysqld_port);
-      unireg_abort(1);
-    }
-    if (listen(ip_sock,(int) back_log) < 0)
-    {
-      sql_perror("Can't start server: listen() on TCP/IP port");
-      sql_print_error("listen() on TCP/IP failed with error %d",
-		      socket_errno);
-      unireg_abort(1);
-    }
+  /*
+    Sometimes the port is not released fast enough when stopping and
+    restarting the server. This happens quite often with the test suite
+    on busy Linux systems. Retry to bind the address at these intervals:
+    Sleep intervals: 1, 2, 4,  6,  9, 13, 17, 22, ...
+    Retry at second: 1, 3, 7, 13, 22, 35, 52, 74, ...
+    Limit the sequence by mysqld_port_timeout (set --port-open-timeout=#).
+  */
+  for (waited= 0, retry= 1; ; retry++, waited+= this_wait)
+  {
+    if (((ret= bind(ip_sock, a->ai_addr, a->ai_addrlen)) >= 0 ) ||
+        (socket_errno != SOCKET_EADDRINUSE) ||
+        (waited >= mysqld_port_timeout))
+      break;
+    sql_print_information("Retrying bind on TCP/IP port %u", mysqld_port);
+    this_wait= retry * retry / 3 + 1;
+    sleep(this_wait);
   }
+  freeaddrinfo(ai);
+  if (ret < 0)
+  {
+    DBUG_PRINT("error",("Got error: %d from bind",socket_errno));
+    sql_perror("Can't start server: Bind on TCP/IP port");
+    sql_print_error("Do you already have another mysqld server running on port: %d ?",mysqld_port);
+    unireg_abort(1);
+  }
+  if (listen(ip_sock,(int) back_log) < 0)
+  {
+    sql_perror("Can't start server: listen() on TCP/IP port");
+    sql_print_error("listen() on TCP/IP failed with error %d",
+                    socket_errno);
+    unireg_abort(1);
+  }
+  DBUG_RETURN(ip_sock);
+}
 
-#ifdef _WIN32
+static void network_init(void)
+{
+#ifdef HAVE_SYS_UN_H
+  struct sockaddr_un	UNIXaddr;
+#endif
+  int	arg=1;
+  char port_buf[NI_MAXSERV];
+  DBUG_ENTER("network_init");
+
+  if (MYSQL_CALLBACK(thread_scheduler, init, ()))
+    unireg_abort(1);			/* purecov: inspected */
+
+  set_ports();
+
+  if (!opt_disable_networking && !opt_bootstrap)
+  {
+    if (mysqld_port)
+      base_ip_sock= activate_tcp_port(mysqld_port);
+    if (mysqld_extra_port)
+      extra_ip_sock= activate_tcp_port(mysqld_extra_port);
+  }
+
+#ifdef __NT__
   /* create named pipe */
   if (Service.IsNT() && mysqld_unix_port[0] && !opt_bootstrap &&
       opt_enable_named_pipe)
   {
+
     strxnmov(pipe_name, sizeof(pipe_name)-1, "\\\\.\\pipe\\",
 	     mysqld_unix_port, NullS);
     bzero((char*) &saPipeSecurity, sizeof(saPipeSecurity));
@@ -1935,11 +2049,10 @@ static void network_init(void)
     UNIXaddr.sun_family = AF_UNIX;
     strmov(UNIXaddr.sun_path, mysqld_unix_port);
     (void) unlink(mysqld_unix_port);
-    arg= 1;
     (void) setsockopt(unix_sock,SOL_SOCKET,SO_REUSEADDR,(char*)&arg,
 		      sizeof(arg));
     umask(0);
-    if (bind(unix_sock, reinterpret_cast<struct sockaddr *>(&UNIXaddr),
+    if (bind(unix_sock, my_reinterpret_cast(struct sockaddr *) (&UNIXaddr),
 	     sizeof(UNIXaddr)) < 0)
     {
       sql_perror("Can't start server : Bind on unix socket"); /* purecov: tested */
@@ -1959,6 +2072,7 @@ static void network_init(void)
   DBUG_VOID_RETURN;
 }
 
+
 #endif /*!EMBEDDED_LIBRARY*/
 
 
@@ -2003,17 +2117,14 @@ void close_connection(THD *thd, uint errcode, bool lock)
 #endif /* EMBEDDED_LIBRARY */
 
 
-/** Called when a thread is aborted. */
+/** Called when mysqld is aborted with ^C */
 /* ARGSUSED */
-extern "C" sig_handler end_thread_signal(int sig __attribute__((unused)))
+extern "C" sig_handler end_mysqld_signal(int sig __attribute__((unused)))
 {
-  THD *thd=current_thd;
-  DBUG_ENTER("end_thread_signal");
-  if (thd && ! thd->bootstrap)
-  {
-    statistic_increment(killed_threads, &LOCK_status);
-    MYSQL_CALLBACK(thread_scheduler, end_thread, (thd,0)); /* purecov: inspected */
-  }
+  DBUG_ENTER("end_mysqld_signal");
+  /* Don't call kill_mysql() if signal thread is not running */
+  if (signal_thread_in_use)
+    kill_mysql();                          // Take down mysqld nicely
   DBUG_VOID_RETURN;				/* purecov: deadcode */
 }
 
@@ -2036,7 +2147,7 @@ void unlink_thd(THD *thd)
   thd->cleanup();
 
   mysql_mutex_lock(&LOCK_connection_count);
-  --connection_count;
+  (*thd->scheduler->connection_count)--;
   mysql_mutex_unlock(&LOCK_connection_count);
 
   mysql_mutex_lock(&LOCK_thread_count);
@@ -2146,6 +2257,8 @@ bool one_thread_per_connection_end(THD *thd, bool put_in_cache)
 {
   DBUG_ENTER("one_thread_per_connection_end");
   unlink_thd(thd);
+  /* Mark that current_thd is not valid anymore */
+  my_pthread_setspecific_ptr(THR_THD,  0);
   if (put_in_cache)
     put_in_cache= cache_thread();
   mysql_mutex_unlock(&LOCK_thread_count);
@@ -2243,8 +2356,8 @@ static BOOL WINAPI console_event_handler( DWORD type )
 #ifdef DEBUG_UNHANDLED_EXCEPTION_FILTER
 #define DEBUGGER_ATTACH_TIMEOUT 120
 /*
-  Wait for debugger to attach and break into debugger. If debugger is not attached,
-  resume after timeout.
+  Wait for debugger to attach and break into debugger. If debugger is
+  not attached, resume after timeout.
 */
 static void wait_for_debugger(int timeout_sec)
 {
@@ -2374,6 +2487,9 @@ extern "C" sig_handler handle_segfault(int sig)
 {
   time_t curr_time;
   struct tm tm;
+#ifdef HAVE_STACKTRACE
+  THD *thd=current_thd;
+#endif
 
   /*
     Strictly speaking, one needs a mutex here
@@ -2392,13 +2508,13 @@ extern "C" sig_handler handle_segfault(int sig)
   curr_time= my_time(0);
   localtime_r(&curr_time, &tm);
 
-  fprintf(stderr,"\
-%02d%02d%02d %2d:%02d:%02d - mysqld got " SIGNAL_FMT " ;\n\
+  fprintf(stderr, "%02d%02d%02d %2d:%02d:%02d ",
+          tm.tm_year % 100, tm.tm_mon+1, tm.tm_mday,
+          tm.tm_hour, tm.tm_min, tm.tm_sec);
+  fprintf(stderr,"[ERROR] mysqld got " SIGNAL_FMT " ;\n\
 This could be because you hit a bug. It is also possible that this binary\n\
 or one of the libraries it was linked against is corrupt, improperly built,\n\
 or misconfigured. This error can also be caused by malfunctioning hardware.\n",
-          tm.tm_year % 100, tm.tm_mon+1, tm.tm_mday,
-          tm.tm_hour, tm.tm_min, tm.tm_sec,
 	  sig);
   fprintf(stderr, "\
 We will try our best to scrape up some info that will hopefully help diagnose\n\
@@ -2408,16 +2524,16 @@ and this may fail.\n\n");
           (ulong) dflt_key_cache->key_cache_mem_size);
   fprintf(stderr, "read_buffer_size=%ld\n", (long) global_system_variables.read_buff_size);
   fprintf(stderr, "max_used_connections=%lu\n", max_used_connections);
-  fprintf(stderr, "max_threads=%u\n", thread_scheduler->max_threads);
+  fprintf(stderr, "max_threads=%u\n", thread_scheduler->max_threads +
+          (uint) extra_max_connections);
   fprintf(stderr, "thread_count=%u\n", thread_count);
-  fprintf(stderr, "connection_count=%u\n", connection_count);
   fprintf(stderr, "It is possible that mysqld could use up to \n\
 key_buffer_size + (read_buffer_size + sort_buffer_size)*max_threads = %lu K\n\
 bytes of memory\n", ((ulong) dflt_key_cache->key_cache_mem_size +
 		     (global_system_variables.read_buff_size +
 		      global_system_variables.sortbuff_size) *
-		     thread_scheduler->max_threads +
-                     max_connections * sizeof(THD)) / 1024);
+		     (thread_scheduler->max_threads + extra_max_connections) +
+                     (max_connections + extra_max_connections)* sizeof(THD)) / 1024);
   fprintf(stderr, "Hope that's ok; if not, decrease some variables in the equation.\n\n");
 
 #if defined(HAVE_LINUXTHREADS)
@@ -2433,7 +2549,6 @@ the thread stack. Please read http://dev.mysql.com/doc/mysql/en/linux.html\n\n",
 #endif /* HAVE_LINUXTHREADS */
 
 #ifdef HAVE_STACKTRACE
-  THD *thd=current_thd;
 
   if (!(test_flags & TEST_NO_STACKTRACE))
   {
@@ -2497,7 +2612,7 @@ You should either build a dynamically-linked binary, or force LinuxThreads\n\
 to be used with the LD_ASSUME_KERNEL environment variable. Please consult\n\
 the documentation for your distribution on how to do that.\n");
 #endif
-  
+
   if (locked_in_memory)
   {
     fprintf(stderr, "\n\
@@ -2518,9 +2633,13 @@ bugs.\n");
   }
 #endif
 
+end:
 #ifndef __WIN__
-  /* On Windows, do not terminate, but pass control to exception filter */
+  /* Terminate */
   exit(1);
+#else
+  /* On Windows, do not terminate, but pass control to exception filter */
+  ;
 #endif
 }
 
@@ -2570,7 +2689,7 @@ static void init_signals(void)
   {
     /* Change limits so that we will get a core file */
     STRUCT_RLIMIT rl;
-    rl.rlim_cur = rl.rlim_max = RLIM_INFINITY;
+    rl.rlim_cur = rl.rlim_max = (rlim_t) RLIM_INFINITY;
     if (setrlimit(RLIMIT_CORE, &rl) && global_system_variables.log_warnings)
       sql_print_warning("setrlimit could not change the size of core files to 'infinity';  We may not be able to generate a core file on signals");
   }
@@ -2599,12 +2718,13 @@ static void init_signals(void)
     sigaddset(&set,THR_SERVER_ALARM);
   if (test_flags & TEST_SIGINT)
   {
-    my_sigset(thr_kill_signal, end_thread_signal);
-    // May be SIGINT
-    sigdelset(&set, thr_kill_signal);
+    /* Allow SIGINT to break mysqld. This is for debugging with --gdb */
+    my_sigset(SIGINT, end_mysqld_signal);
+    sigdelset(&set, SIGINT);
   }
   else
     sigaddset(&set,SIGINT);
+
   sigprocmask(SIG_SETMASK,&set,NULL);
   pthread_sigmask(SIG_SETMASK,&set,NULL);
   DBUG_VOID_RETURN;
@@ -2663,12 +2783,13 @@ pthread_handler_t signal_hand(void *arg __attribute__((unused)))
     This should actually be '+ max_number_of_slaves' instead of +10,
     but the +10 should be quite safe.
   */
-  init_thr_alarm(thread_scheduler->max_threads +
+  init_thr_alarm(thread_scheduler->max_threads + extra_max_connections +
 		 global_system_variables.max_insert_delayed_threads + 10);
-  if (thd_lib_detected != THD_LIB_LT && (test_flags & TEST_SIGINT))
+  if (test_flags & TEST_SIGINT)
   {
-    (void) sigemptyset(&set);			// Setup up SIGINT for debug
-    (void) sigaddset(&set,SIGINT);		// For debugging
+    /* Allow SIGINT to break mysqld. This is for debugging with --gdb */
+    (void) sigemptyset(&set);
+    (void) sigaddset(&set,SIGINT);
     (void) pthread_sigmask(SIG_UNBLOCK,&set,NULL);
   }
   (void) sigemptyset(&set);			// Setup up SIGINT for debug
@@ -2812,6 +2933,9 @@ extern "C" void my_message_sql(uint error, const char *str, myf MyFlags);
 void my_message_sql(uint error, const char *str, myf MyFlags)
 {
   THD *thd= current_thd;
+  MYSQL_ERROR::enum_warning_level level;
+  sql_print_message_func func;
+
   DBUG_ENTER("my_message_sql");
   DBUG_PRINT("error", ("error: %u  message: '%s'", error, str));
 
@@ -2825,31 +2949,44 @@ void my_message_sql(uint error, const char *str, myf MyFlags)
     TODO:
     DBUG_ASSERT(error != 0);
   */
-
+#warning FIXME
   if (error == 0)
   {
     /* At least, prevent new abuse ... */
-    DBUG_ASSERT(strncmp(str, "MyISAM table", 12) == 0);
+    DBUG_ASSERT(strncmp(str, "MyISAM table", 12) == 0 ||
+                strncmp(str, "Aria table", 11) == 0);
     error= ER_UNKNOWN_ERROR;
   }
 
   mysql_audit_general(thd, MYSQL_AUDIT_GENERAL_ERROR, error, str);
+  if (MyFlags & ME_JUST_INFO)
+  {
+    level= MYSQL_ERROR::WARN_LEVEL_NOTE;
+    func= sql_print_information;
+  }
+  else if (MyFlags & ME_JUST_WARNING)
+  {
+    level= MYSQL_ERROR::WARN_LEVEL_WARN;
+    func= sql_print_warning;
+  }
+  else
+  {
+    level= MYSQL_ERROR::WARN_LEVEL_ERROR;
+    func= sql_print_error;
+  }
 
   if (thd)
   {
     if (MyFlags & ME_FATALERROR)
       thd->is_fatal_error= 1;
-    (void) thd->raise_condition(error,
-                                NULL,
-                                MYSQL_ERROR::WARN_LEVEL_ERROR,
-                                str);
+    (void) thd->raise_condition(error, NULL, level, str);
   }
 
   /* When simulating OOM, skip writing to error log to avoid mtr errors */
   DBUG_EXECUTE_IF("simulate_out_of_memory", DBUG_VOID_RETURN;);
 
-  if (!thd || MyFlags & ME_NOREFRESH)
-    sql_print_error("%s: %s",my_progname,str); /* purecov: inspected */
+  if (!thd || (MyFlags & ME_NOREFRESH))
+    (*func)("%s: %s", my_progname_short, str); /* purecov: inspected */
   DBUG_VOID_RETURN;
 }
 
@@ -2892,7 +3029,9 @@ const char *load_default_groups[]= {
 #ifdef WITH_NDBCLUSTER_STORAGE_ENGINE
 "mysql_cluster",
 #endif
-"mysqld","server", MYSQL_BASE_VERSION, 0, 0};
+"mysqld", "server", MYSQL_BASE_VERSION,
+"mariadb", MARIADB_BASE_VERSION,
+0, 0};
 
 #if defined(__WIN__) && !defined(EMBEDDED_LIBRARY)
 static const int load_default_groups_sz=
@@ -3014,6 +3153,7 @@ SHOW_VAR com_status_vars[]= {
   {"show_binlog_events",   (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_BINLOG_EVENTS]), SHOW_LONG_STATUS},
   {"show_binlogs",         (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_BINLOGS]), SHOW_LONG_STATUS},
   {"show_charsets",        (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CHARSETS]), SHOW_LONG_STATUS},
+  {"show_client_statistics",    (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CLIENT_STATS]), SHOW_LONG_STATUS},
   {"show_collations",      (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_COLLATIONS]), SHOW_LONG_STATUS},
   {"show_contributors",    (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CONTRIBUTORS]), SHOW_LONG_STATUS},
   {"show_create_db",       (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CREATE_DB]), SHOW_LONG_STATUS},
@@ -3035,6 +3175,7 @@ SHOW_VAR com_status_vars[]= {
   {"show_function_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_STATUS_FUNC]), SHOW_LONG_STATUS},
   {"show_grants",          (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_GRANTS]), SHOW_LONG_STATUS},
   {"show_keys",            (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_KEYS]), SHOW_LONG_STATUS},
+  {"show_index_statistics",     (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_INDEX_STATS]), SHOW_LONG_STATUS},
   {"show_master_status",   (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_MASTER_STAT]), SHOW_LONG_STATUS},
   {"show_new_master",      (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_NEW_MASTER]), SHOW_LONG_STATUS},
   {"show_open_tables",     (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_OPEN_TABLES]), SHOW_LONG_STATUS},
@@ -3052,9 +3193,11 @@ SHOW_VAR com_status_vars[]= {
   {"show_slave_status",    (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_SLAVE_STAT]), SHOW_LONG_STATUS},
   {"show_status",          (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_STATUS]), SHOW_LONG_STATUS},
   {"show_storage_engines", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_STORAGE_ENGINES]), SHOW_LONG_STATUS},
+  {"show_table_statistics",     (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TABLE_STATS]), SHOW_LONG_STATUS},
   {"show_table_status",    (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TABLE_STATUS]), SHOW_LONG_STATUS},
   {"show_tables",          (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TABLES]), SHOW_LONG_STATUS},
   {"show_triggers",        (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TRIGGERS]), SHOW_LONG_STATUS},
+  {"show_user_statistics",      (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_USER_STATS]), SHOW_LONG_STATUS},
   {"show_variables",       (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_VARIABLES]), SHOW_LONG_STATUS},
   {"show_warnings",        (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_WARNS]), SHOW_LONG_STATUS},
   {"slave_start",          (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SLAVE_START]), SHOW_LONG_STATUS},
@@ -3294,7 +3437,8 @@ static int init_common_variables()
     uint files, wanted_files, max_open_files;
 
     /* MyISAM requires two file handles per table. */
-    wanted_files= 10+max_connections+table_cache_size*2;
+    wanted_files= (10 + max_connections + extra_max_connections +
+                   table_cache_size*2);
     /*
       We are trying to allocate no less than max_connections*5 file
       handles (i.e. we are trying to set the limit so that they will
@@ -3305,7 +3449,8 @@ static int init_common_variables()
       can't get max_connections*5 but still got no less than was
       requested (value of wanted_files).
     */
-    max_open_files= max(max(wanted_files, max_connections*5),
+    max_open_files= max(max(wanted_files,
+                            (max_connections + extra_max_connections)*5),
                         open_files_limit);
     files= my_set_max_open_files(max_open_files);
 
@@ -3351,6 +3496,7 @@ static int init_common_variables()
   if (init_errmessage())	/* Read error messages from file */
     return 1;
   init_client_errs();
+  mysql_library_init(never,never,never); /* for replication */
   lex_init();
   if (item_create_init())
     return 1;
@@ -3407,7 +3553,7 @@ static int init_common_variables()
   global_system_variables.collation_connection=  default_charset_info;
   global_system_variables.character_set_results= default_charset_info;
   global_system_variables.character_set_client=  default_charset_info;
-  if (!(character_set_filesystem= 
+  if (!(character_set_filesystem=
         get_charset_by_csname(character_set_filesystem_name,
                               MY_CS_PRIMARY, MYF(MY_WME))))
     return 1;
@@ -3422,14 +3568,14 @@ static int init_common_variables()
   global_system_variables.lc_time_names= my_default_lc_time_names;
 
   /* check log options and issue warnings if needed */
-  if (opt_log && opt_logname && !(log_output_options & LOG_FILE) &&
-      !(log_output_options & LOG_NONE))
+  if (opt_log && opt_logname && *opt_logname &&
+      !(log_output_options & (LOG_FILE | LOG_NONE)))
     sql_print_warning("Although a path was specified for the "
                       "--log option, log tables are used. "
                       "To enable logging to files use the --log-output option.");
 
-  if (opt_slow_log && opt_slow_logname && !(log_output_options & LOG_FILE)
-      && !(log_output_options & LOG_NONE))
+  if (opt_slow_log && opt_slow_logname && *opt_slow_logname &&
+      !(log_output_options & (LOG_FILE | LOG_NONE)))
     sql_print_warning("Although a path was specified for the "
                       "--log-slow-queries option, log tables are used. "
                       "To enable logging to files use the --log-output=file option.");
@@ -3440,7 +3586,6 @@ static int init_common_variables()
     my_free(VAR); /* it could be an allocated empty string "" */ \
     VAR= my_strdup(ALT, MYF(0));                                \
   }
-
   FIX_LOG_VAR(opt_logname,
               make_default_log_name(buff, ".log"));
   FIX_LOG_VAR(opt_slow_logname,
@@ -3543,6 +3688,12 @@ static int init_thread_environment()
                    &LOCK_uuid_generator, MY_MUTEX_INIT_FAST);
   mysql_mutex_init(key_LOCK_connection_count,
                    &LOCK_connection_count, MY_MUTEX_INIT_FAST);
+  mysql_mutex_init(&LOCK_stats, MY_MUTEX_INIT_FAST);
+  mysql_mutex_init(&LOCK_global_user_client_stats,
+              MY_MUTEX_INIT_FAST);
+  mysql_mutex_init(&LOCK_global_table_stats, MY_MUTEX_INIT_FAST);
+  mysql_mutex_init(&LOCK_global_index_stats, MY_MUTEX_INIT_FAST);
+
 #ifdef HAVE_OPENSSL
   mysql_mutex_init(key_LOCK_des_key_file,
                    &LOCK_des_key_file, MY_MUTEX_INIT_FAST);
@@ -3589,26 +3740,27 @@ static int init_thread_environment()
     sql_print_error("Can't create thread-keys");
     return 1;
   }
+  register_mutex_order();
   return 0;
 }
 
 
 #if defined(HAVE_OPENSSL) && !defined(HAVE_YASSL)
 static unsigned long openssl_id_function()
-{ 
+{
   return (unsigned long) pthread_self();
-} 
+}
 
 
 static openssl_lock_t *openssl_dynlock_create(const char *file, int line)
-{ 
+{
   openssl_lock_t *lock= new openssl_lock_t;
   mysql_rwlock_init(key_rwlock_openssl, &lock->lock);
   return lock;
 }
 
 
-static void openssl_dynlock_destroy(openssl_lock_t *lock, const char *file, 
+static void openssl_dynlock_destroy(openssl_lock_t *lock, const char *file,
 				    int line)
 {
   mysql_rwlock_destroy(&lock->lock);
@@ -3628,7 +3780,7 @@ static void openssl_lock_function(int mode, int n, const char *file, int line)
 }
 
 
-static void openssl_lock(int mode, openssl_lock_t *lock, const char *file, 
+static void openssl_lock(int mode, openssl_lock_t *lock, const char *file,
 			 int line)
 {
   int err;
@@ -3653,7 +3805,7 @@ static void openssl_lock(int mode, openssl_lock_t *lock, const char *file,
     sql_print_error("Fatal: OpenSSL interface problem (mode=0x%x)", mode);
     abort();
   }
-  if (err) 
+  if (err)
   {
     sql_print_error("Fatal: can't %s OpenSSL lock", what);
     abort();
@@ -3724,12 +3876,14 @@ static int init_server_components()
   query_cache_set_min_res_unit(query_cache_min_res_unit);
   query_cache_init();
   query_cache_resize(query_cache_size);
-  randominit(&sql_rand,(ulong) server_start_time,(ulong) server_start_time/2);
+  my_rnd_init(&sql_rand,(ulong) server_start_time,(ulong) server_start_time/2);
   setup_fpu();
   init_thr_lock();
+  my_uuid_init((ulong) (my_rnd(&sql_rand))*12345,12345);
 #ifdef HAVE_REPLICATION
   init_slave_list();
 #endif
+  wt_init();
 
   /* Setup logs */
 
@@ -3880,6 +4034,9 @@ a file name for --log-bin-index option", opt_binlog_index_name);
   /* call ha_init_key_cache() on all key caches to init them */
   process_key_caches(&ha_init_key_cache);
 
+  init_global_table_stats();
+  init_global_index_stats();
+
   /* Allow storage engine to give real error messages */
   if (ha_init_errors())
     DBUG_RETURN(1);
@@ -3924,8 +4081,7 @@ a file name for --log-bin-index option", opt_binlog_index_name);
 
     if (remaining_argc > 1)
     {
-      fprintf(stderr, "%s: Too many arguments (first extra is '%s').\n"
-              "Use --verbose --help to get a list of available options\n",
+      fprintf(stderr, "%s: Too many arguments (first extra is '%s').\n",
               my_progname, remaining_argv[1]);
       unireg_abort(1);
     }
@@ -3945,7 +4101,6 @@ a file name for --log-bin-index option", opt_binlog_index_name);
     unireg_abort(1);
   }
 
-#ifdef WITH_CSV_STORAGE_ENGINE
   if (opt_bootstrap)
     log_output_options= LOG_FILE;
   else
@@ -3979,10 +4134,6 @@ a file name for --log-bin-index option", opt_binlog_index_name);
     logger.set_handlers(LOG_FILE, opt_slow_log ? log_output_options:LOG_NONE,
                         opt_log ? log_output_options:LOG_NONE);
   }
-#else
-  logger.set_handlers(LOG_FILE, opt_slow_log ? LOG_FILE:LOG_NONE,
-                      opt_log ? LOG_FILE:LOG_NONE);
-#endif
 
   /*
     Set the default storage engine
@@ -4014,9 +4165,18 @@ a file name for --log-bin-index option", opt_binlog_index_name);
       Need to unlock as global_system_variables.table_plugin
       was acquired during plugin_init()
     */
+    mysql_mutex_lock(&LOCK_global_system_variables);
     plugin_unlock(0, global_system_variables.table_plugin);
     global_system_variables.table_plugin= plugin;
+    mysql_mutex_unlock(&LOCK_global_system_variables);
   }
+#if defined(WITH_ARIA_STORAGE_ENGINE) && defined(USE_MARIA_FOR_TMP_TABLES)
+  if (!ha_storage_engine_is_enabled(maria_hton) && !opt_bootstrap)
+  {
+    sql_print_error("Aria engine is not enabled or did not start. The Aria engine must be enabled to continue as mysqld was configured with --with-aria-tmp-tables");
+    unireg_abort(1);
+  }
+#endif
 
   tc_log= (total_ha_2pc > 1 ? (opt_bin_log  ?
                                (TC_LOG *) &mysql_bin_log :
@@ -4075,6 +4235,8 @@ a file name for --log-bin-index option", opt_binlog_index_name);
 
   init_max_user_conn();
   init_update_queries();
+  init_global_user_stats();
+  init_global_client_stats();
   DBUG_RETURN(0);
 }
 
@@ -4148,7 +4310,7 @@ static void handle_connections_methods()
       handler_count--;
     }
   }
-#endif 
+#endif
 
   while (handler_count > 0)
     mysql_cond_wait(&COND_handler_count, &LOCK_thread_count);
@@ -4326,13 +4488,6 @@ int mysqld_main(int argc, char **argv)
   my_init();                                   // init my_sys library & pthreads
   init_error_log_mutex();
 
-  /* Set signal used to kill MySQL */
-#if defined(SIGUSR2)
-  thr_kill_signal= thd_lib_detected == THD_LIB_LT ? SIGINT : SIGUSR2;
-#else
-  thr_kill_signal= SIGINT;
-#endif
-
   /* Initialize audit interface globals. Audit plugins are inited later. */
   mysql_audit_initialize();
 
@@ -4476,7 +4631,6 @@ int mysqld_main(int argc, char **argv)
     init signals & alarm
     After this we can't quit by a simple unireg_abort
   */
-  error_handler_hook= my_message_sql;
   start_signal_handler();				// Creates pidfile
 
   if (mysql_rm_tmp_tables() || acl_init(opt_noacl) ||
@@ -4548,7 +4702,13 @@ int mysqld_main(int argc, char **argv)
   {
     select_thread_in_use= 0;                    // Allow 'kill' to work
     bootstrap(mysql_stdin);
-    unireg_abort(bootstrap_error ? 1 : 0);
+    if (!kill_in_progress)
+      unireg_abort(bootstrap_error ? 1 : 0);
+    else
+    {
+      sleep(2);                                 // Wait for kill
+      exit(0);
+    }
   }
   if (opt_init_file && *opt_init_file)
   {
@@ -4582,7 +4742,7 @@ int mysqld_main(int argc, char **argv)
 #endif /* _WIN32 || HAVE_SMEM */
 
   /* (void) pthread_attr_destroy(&connection_attrib); */
-  
+
   DBUG_PRINT("quit",("Exiting main thread"));
 
 #ifndef __WIN__
@@ -4668,10 +4828,8 @@ static char *add_quoted_string(char *to, const char *from, char *to_end)
   @param file_path		Path to this program
   @param startup_option	Startup option to mysqld
 
-  @retval
-    0		option handled
-  @retval
-    1		Could not handle option
+  @retval 0	option handled
+  @retval 1	Could not handle option
 */
 
 static bool
@@ -4952,7 +5110,7 @@ void create_thread_to_handle_connection(THD *thd)
       mysql_mutex_unlock(&LOCK_thread_count);
 
       mysql_mutex_lock(&LOCK_connection_count);
-      --connection_count;
+      (*thd->scheduler->connection_count)--;
       mysql_mutex_unlock(&LOCK_connection_count);
 
       statistic_increment(aborted_connects,&LOCK_status);
@@ -4997,20 +5155,22 @@ static void create_new_thread(THD *thd)
 
   mysql_mutex_lock(&LOCK_connection_count);
 
-  if (connection_count >= max_connections + 1 || abort_loop)
+  if (*thd->scheduler->connection_count >=
+      *thd->scheduler->max_connections + 1|| abort_loop)
   {
     mysql_mutex_unlock(&LOCK_connection_count);
 
     DBUG_PRINT("error",("Too many connections"));
     close_connection(thd, ER_CON_COUNT_ERROR, 1);
+    statistic_increment(denied_connections, &LOCK_status);
     delete thd;
     DBUG_VOID_RETURN;
   }
 
-  ++connection_count;
+  ++*thd->scheduler->connection_count;
 
-  if (connection_count > max_used_connections)
-    max_used_connections= connection_count;
+  if (connection_count + extra_connection_count > max_used_connections)
+    max_used_connections= connection_count + extra_connection_count;
 
   mysql_mutex_unlock(&LOCK_connection_count);
 
@@ -5027,7 +5187,7 @@ static void create_new_thread(THD *thd)
 
   thread_count++;
 
-  MYSQL_CALLBACK(thread_scheduler, add_connection, (thd));
+  MYSQL_CALLBACK(thd->scheduler, add_connection, (thd));
 
   DBUG_VOID_RETURN;
 }
@@ -5039,10 +5199,11 @@ inline void kill_broken_server()
 {
   /* hack to get around signals ignored in syscalls for problem OS's */
   if (unix_sock == INVALID_SOCKET ||
-      (!opt_disable_networking && ip_sock == INVALID_SOCKET))
+      (!opt_disable_networking && base_ip_sock == INVALID_SOCKET))
   {
     select_thread_in_use = 0;
     /* The following call will never return */
+    DBUG_PRINT("general", ("killing server because socket is closed"));
     kill_server((void*) MYSQL_KILL_SIGNAL);
   }
 }
@@ -5062,46 +5223,42 @@ void handle_connections_sockets()
   THD *thd;
   struct sockaddr_storage cAddr;
   int ip_flags=0,socket_flags=0,flags=0,retval;
+  int extra_ip_flags=0;
   st_vio *vio_tmp;
 #ifdef HAVE_POLL
   int socket_count= 0;
-  struct pollfd fds[2]; // for ip_sock and unix_sock
+  struct pollfd fds[3]; // for ip_sock, unix_sock and extra_ip_sock
+#define setup_fds(X)                    \
+    fds[socket_count].fd= X;            \
+    fds[socket_count].events= POLLIN;   \
+    socket_count++
 #else
   fd_set readFDs,clientFDs;
-  uint max_used_connection= (uint) (max(ip_sock,unix_sock)+1);
+  uint max_used_connection;
+#define setup_fds(X)    FD_SET(X,&clientFDs)
+  FD_ZERO(&clientFDs);
 #endif
 
   DBUG_ENTER("handle_connections_sockets");
 
-#ifndef HAVE_POLL
-  FD_ZERO(&clientFDs);
-#endif
+  max_used_connection= (uint) (max(base_ip_sock, unix_sock));
+  max_used_connection= (uint) (max(extra_ip_sock, (int) max_used_connection));
+  max_used_connection++;
 
-  if (ip_sock != INVALID_SOCKET)
+  if (base_ip_sock != INVALID_SOCKET)
   {
-#ifdef HAVE_POLL
-    fds[socket_count].fd= ip_sock;
-    fds[socket_count].events= POLLIN;
-    socket_count++;
-#else
-    FD_SET(ip_sock,&clientFDs);
-#endif    
-#ifdef HAVE_FCNTL
-    ip_flags = fcntl(ip_sock, F_GETFL, 0);
-#endif
+    setup_fds(base_ip_sock);
+    ip_flags = fcntl(base_ip_sock, F_GETFL, 0);
+  }
+  if (extra_ip_sock != INVALID_SOCKET)
+  {
+    setup_fds(extra_ip_sock);
+    extra_ip_flags = fcntl(extra_ip_sock, F_GETFL, 0);
   }
 #ifdef HAVE_SYS_UN_H
-#ifdef HAVE_POLL
-  fds[socket_count].fd= unix_sock;
-  fds[socket_count].events= POLLIN;
-  socket_count++;
-#else
-  FD_SET(unix_sock,&clientFDs);
-#endif
-#ifdef HAVE_FCNTL
+  setup_fds(unix_sock);
   socket_flags=fcntl(unix_sock, F_GETFL, 0);
 #endif
-#endif
 
   DBUG_PRINT("general",("Waiting for connections."));
   MAYBE_BROKEN_SYSCALL;
@@ -5139,26 +5296,26 @@ void handle_connections_sockets()
       if (fds[i].revents & POLLIN)
       {
         sock= fds[i].fd;
-#ifdef HAVE_FCNTL
         flags= fcntl(sock, F_GETFL, 0);
-#else
-        flags= 0;
-#endif // HAVE_FCNTL
         break;
       }
     }
 #else  // HAVE_POLL
-#ifdef HAVE_SYS_UN_H
-    if (FD_ISSET(unix_sock,&readFDs))
+    if (FD_ISSET(base_ip_sock,&readFDs))
     {
-      sock = unix_sock;
-      flags= socket_flags;
+      sock=  base_ip_sock;
+      flags= ip_flags;
     }
     else
-#endif // HAVE_SYS_UN_H
+    if (FD_ISSET(extra_ip_sock,&readFDs))
     {
-      sock = ip_sock;
-      flags= ip_flags;
+      sock=  extra_ip_sock;
+      flags= extra_ip_flags;
+    }
+    else
+    {
+      sock = unix_sock;
+      flags= socket_flags;
     }
 #endif // HAVE_POLL
 
@@ -5205,7 +5362,7 @@ void handle_connections_sockets()
 
 #ifdef HAVE_LIBWRAP
     {
-      if (sock == ip_sock)
+      if (sock == base_ip_sock || sock == extra_ip_sock)
       {
 	struct request_info req;
 	signal(SIGCHLD, SIG_DFL);
@@ -5286,6 +5443,11 @@ void handle_connections_sockets()
     if (sock == unix_sock)
       thd->security_ctx->host=(char*) my_localhost;
 
+    if (sock == extra_ip_sock)
+    {
+      thd->extra_port= 1;
+      thd->scheduler= &extra_thread_scheduler;
+    }
     create_new_thread(thd);
   }
   DBUG_VOID_RETURN;
@@ -5820,7 +5982,7 @@ struct my_option my_long_options[]=
 #ifdef HAVE_MMAP
   {"log-tc-size", 0, "Size of transaction coordinator log.",
    &opt_tc_log_size, &opt_tc_log_size, 0, GET_ULONG,
-   REQUIRED_ARG, TC_LOG_MIN_SIZE, TC_LOG_MIN_SIZE, ULONG_MAX, 0,
+   REQUIRED_ARG, TC_LOG_MIN_SIZE, TC_LOG_MIN_SIZE, (longlong) ULONG_MAX, 0,
    TC_LOG_PAGE_SIZE, 0},
 #endif
   {"master-info-file", 0,
@@ -5920,6 +6082,9 @@ struct my_option my_long_options[]=
    "Show user and password in SHOW SLAVE HOSTS on this master.",
    &opt_show_slave_auth_info, &opt_show_slave_auth_info, 0,
    GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+  {"skip-bdb", OPT_DEPRECATED_OPTION,
+   "Deprecated option; Exist only for compatiblity with old my.cnf files",
+   0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
 #ifndef DISABLE_GRANT_OPTIONS
   {"skip-grant-tables", 0,
    "Start without grant tables. This gives all users FULL ACCESS to all tables.",
@@ -5928,8 +6093,6 @@ struct my_option my_long_options[]=
 #endif
   {"skip-host-cache", OPT_SKIP_HOST_CACHE, "Don't cache host names.", 0, 0, 0,
    GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
-  {"skip-new", OPT_SKIP_NEW, "Don't use new, possibly wrong routines.",
-   0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
   {"skip-slave-start", 0,
    "If set, slave is not autostarted.", &opt_skip_slave_start,
    &opt_skip_slave_start, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
@@ -5965,7 +6128,7 @@ struct my_option my_long_options[]=
      purify. These are not suppressed: instead we disable symlinks
      option if compiled with valgrind support.
    */
-   IF_PURIFY(0,1), 0, 0, 0, 0, 0},
+   IF_VALGRIND(0,1), 0, 0, 0, 0, 0},
   {"sysdate-is-now", 0,
    "Non-default option to alias SYSDATE() to NOW() to make it safe-replicable. "
    "Since 5.0, SYSDATE() returns a `dynamic' value different for different "
@@ -6316,7 +6479,7 @@ static int show_ssl_ctx_get_session_cache_mode(THD *thd, SHOW_VAR *var, char *bu
 }
 
 /*
-   Functions relying on SSL 
+   Functions relying on SSL
    Note: In the show_ssl_* functions, we need to check if we have a
          valid vio-object since this isn't always true, specifically
          when session_status or global_status is requested from
@@ -6410,6 +6573,46 @@ static int show_ssl_get_cipher_list(THD *thd, SHOW_VAR *var, char *buff)
 
 #endif /* HAVE_OPENSSL && !EMBEDDED_LIBRARY */
 
+static int show_default_keycache(THD *thd, SHOW_VAR *var, char *buff)
+{
+  struct st_data {
+    KEY_CACHE_STATISTICS stats;
+    SHOW_VAR var[8];
+  } *data;
+  SHOW_VAR *v;
+
+  data=(st_data *)buff;
+  v= data->var;
+
+  var->type= SHOW_ARRAY;
+  var->value= (char*)v;
+
+  get_key_cache_statistics(dflt_key_cache, 0, &data->stats);
+
+#define set_one_keycache_var(X,Y)       \
+  v->name= X;                           \
+  v->type= SHOW_LONGLONG;               \
+  v->value= (char*)&data->stats.Y;      \
+  v++;
+
+  set_one_keycache_var("blocks_not_flushed", blocks_changed);
+  set_one_keycache_var("blocks_unused",      blocks_unused);
+  set_one_keycache_var("blocks_used",        blocks_used);
+  set_one_keycache_var("blocks_warm",        blocks_warm);
+  set_one_keycache_var("read_requests",      read_requests);
+  set_one_keycache_var("reads",              reads);
+  set_one_keycache_var("write_requests",     write_requests);
+  set_one_keycache_var("writes",             writes);
+
+  v->name= 0;
+
+  DBUG_ASSERT((char*)(v+1) <= buff + SHOW_VAR_FUNC_BUFF_SIZE);
+
+#undef set_one_keycache_var
+
+  return 0;
+}
+
 
 /*
   Variables shown by SHOW STATUS in alphabetical order
@@ -6418,19 +6621,24 @@ static int show_ssl_get_cipher_list(THD *thd, SHOW_VAR *var, char *buff)
 SHOW_VAR status_vars[]= {
   {"Aborted_clients",          (char*) &aborted_threads,        SHOW_LONG},
   {"Aborted_connects",         (char*) &aborted_connects,       SHOW_LONG},
+  {"Access_denied_errors",     (char*) offsetof(STATUS_VAR, access_denied_errors), SHOW_LONG_STATUS},
   {"Binlog_cache_disk_use",    (char*) &binlog_cache_disk_use,  SHOW_LONG},
   {"Binlog_cache_use",         (char*) &binlog_cache_use,       SHOW_LONG},
+  {"Busy_time",                (char*) offsetof(STATUS_VAR, busy_time), SHOW_DOUBLE_STATUS},
   {"Bytes_received",           (char*) offsetof(STATUS_VAR, bytes_received), SHOW_LONGLONG_STATUS},
   {"Bytes_sent",               (char*) offsetof(STATUS_VAR, bytes_sent), SHOW_LONGLONG_STATUS},
+  {"Binlog_bytes_written",     (char*) offsetof(STATUS_VAR, binlog_bytes_written), SHOW_LONGLONG_STATUS},
   {"Com",                      (char*) com_status_vars, SHOW_ARRAY},
   {"Compression",              (char*) &show_net_compression, SHOW_FUNC},
   {"Connections",              (char*) &thread_id,              SHOW_LONG_NOFLUSH},
+  {"Cpu_time",                 (char*) offsetof(STATUS_VAR, cpu_time), SHOW_DOUBLE_STATUS},
   {"Created_tmp_disk_tables",  (char*) offsetof(STATUS_VAR, created_tmp_disk_tables), SHOW_LONG_STATUS},
   {"Created_tmp_files",	       (char*) &my_tmp_file_created,	SHOW_LONG},
   {"Created_tmp_tables",       (char*) offsetof(STATUS_VAR, created_tmp_tables), SHOW_LONG_STATUS},
   {"Delayed_errors",           (char*) &delayed_insert_errors,  SHOW_LONG},
   {"Delayed_insert_threads",   (char*) &delayed_insert_threads, SHOW_LONG_NOFLUSH},
   {"Delayed_writes",           (char*) &delayed_insert_writes,  SHOW_LONG},
+  {"Empty_queries",            (char*) offsetof(STATUS_VAR, empty_queries), SHOW_LONG_STATUS},
   {"Flush_commands",           (char*) &refresh_version,        SHOW_LONG_NOFLUSH},
   {"Handler_commit",           (char*) offsetof(STATUS_VAR, ha_commit_count), SHOW_LONG_STATUS},
   {"Handler_delete",           (char*) offsetof(STATUS_VAR, ha_delete_count), SHOW_LONG_STATUS},
@@ -6448,13 +6656,7 @@ SHOW_VAR status_vars[]= {
   {"Handler_savepoint_rollback",(char*) offsetof(STATUS_VAR, ha_savepoint_rollback_count), SHOW_LONG_STATUS},
   {"Handler_update",           (char*) offsetof(STATUS_VAR, ha_update_count), SHOW_LONG_STATUS},
   {"Handler_write",            (char*) offsetof(STATUS_VAR, ha_write_count), SHOW_LONG_STATUS},
-  {"Key_blocks_not_flushed",   (char*) offsetof(KEY_CACHE, global_blocks_changed), SHOW_KEY_CACHE_LONG},
-  {"Key_blocks_unused",        (char*) offsetof(KEY_CACHE, blocks_unused), SHOW_KEY_CACHE_LONG},
-  {"Key_blocks_used",          (char*) offsetof(KEY_CACHE, blocks_used), SHOW_KEY_CACHE_LONG},
-  {"Key_read_requests",        (char*) offsetof(KEY_CACHE, global_cache_r_requests), SHOW_KEY_CACHE_LONGLONG},
-  {"Key_reads",                (char*) offsetof(KEY_CACHE, global_cache_read), SHOW_KEY_CACHE_LONGLONG},
-  {"Key_write_requests",       (char*) offsetof(KEY_CACHE, global_cache_w_requests), SHOW_KEY_CACHE_LONGLONG},
-  {"Key_writes",               (char*) offsetof(KEY_CACHE, global_cache_write), SHOW_KEY_CACHE_LONGLONG},
+  {"Key",                      (char*) &show_default_keycache, SHOW_FUNC},
   {"Last_query_cost",          (char*) offsetof(STATUS_VAR, last_query_cost), SHOW_DOUBLE_STATUS},
   {"Max_used_connections",     (char*) &max_used_connections,  SHOW_LONG},
   {"Not_flushed_delayed_rows", (char*) &delayed_rows_in_use,    SHOW_LONG_NOFLUSH},
@@ -6466,6 +6668,8 @@ SHOW_VAR status_vars[]= {
   {"Opened_tables",            (char*) offsetof(STATUS_VAR, opened_tables), SHOW_LONG_STATUS},
   {"Opened_table_definitions", (char*) offsetof(STATUS_VAR, opened_shares), SHOW_LONG_STATUS},
   {"Prepared_stmt_count",      (char*) &show_prepared_stmt_count, SHOW_FUNC},
+  {"Rows_sent",                (char*) offsetof(STATUS_VAR, rows_sent), SHOW_LONG_STATUS},
+  {"Rows_read",                (char*) offsetof(STATUS_VAR, rows_read), SHOW_LONG_STATUS},
 #ifdef HAVE_QUERY_CACHE
   {"Qcache_free_blocks",       (char*) &query_cache.free_memory_blocks, SHOW_LONG_NOFLUSH},
   {"Qcache_free_memory",       (char*) &query_cache.free_memory, SHOW_LONG_NOFLUSH},
@@ -6526,6 +6730,13 @@ SHOW_VAR status_vars[]= {
   {"Ssl_version",              (char*) &show_ssl_get_version, SHOW_FUNC},
 #endif
 #endif /* HAVE_OPENSSL */
+  {"Syncs",                    (char*) &my_sync_count,          SHOW_LONG_NOFLUSH},
+  /*
+    Expression cache used only for caching subqueries now, so its statistic
+    variables we call subquery_cache*.
+  */
+  {"Subquery_cache_hit",       (char*) &subquery_cache_hit, SHOW_LONG},
+  {"Subquery_cache_miss",      (char*) &subquery_cache_miss, SHOW_LONG},
   {"Table_locks_immediate",    (char*) &locks_immediate,        SHOW_LONG},
   {"Table_locks_waited",       (char*) &locks_waited,           SHOW_LONG},
 #ifdef HAVE_MMAP
@@ -6678,7 +6889,10 @@ static int mysql_init_variables(void)
   /* Things reset to zero */
   opt_skip_slave_start= opt_reckless_slave = 0;
   mysql_home[0]= pidfile_name[0]= log_error_file[0]= 0;
+#if defined(HAVE_REALPATH) && !defined(HAVE_valgrind) && !defined(HAVE_BROKEN_REALPATH)
+  /*  We can only test for sub paths if my_symlink.c is using realpath */
   myisam_test_invalid_symlink= test_if_data_home_dir;
+#endif
   opt_log= opt_slow_log= 0;
   opt_bin_log= 0;
   opt_disable_networking= opt_skip_show_db=0;
@@ -6701,6 +6915,7 @@ static int mysql_init_variables(void)
   abort_loop= select_thread_in_use= signal_thread_in_use= 0;
   ready_to_exit= shutdown_in_progress= grant_option= 0;
   aborted_threads= aborted_connects= 0;
+  subquery_cache_miss= subquery_cache_hit= 0;
   delayed_insert_threads= delayed_insert_writes= delayed_rows_in_use= 0;
   delayed_insert_errors= thread_created= 0;
   specialflag= 0;
@@ -6726,7 +6941,7 @@ static int mysql_init_variables(void)
   character_set_filesystem= &my_charset_bin;
 
   opt_specialflag= SPECIAL_ENGLISH;
-  unix_sock= ip_sock= INVALID_SOCKET;
+  unix_sock= base_ip_sock= extra_ip_sock= INVALID_SOCKET;
   mysql_home_ptr= mysql_home;
   pidfile_name_ptr= pidfile_name;
   log_error_file_ptr= log_error_file;
@@ -6747,6 +6962,7 @@ static int mysql_init_variables(void)
     sql_print_error("Cannot allocate the keycache");
     return 1;
   }
+
   /* set key_cache_hash.default_value = dflt_key_cache */
   multi_keycache_init();
 
@@ -6770,8 +6986,10 @@ static int mysql_init_variables(void)
   
   /* Variables that depends on compile options */
 #ifndef DBUG_OFF
+#warning test that --disable-debug --debug works
   default_dbug_option=IF_WIN("d:t:i:O,\\mysqld.trace",
 			     "d:t:i:o,/tmp/mysqld.trace");
+  current_dbug_option= default_dbug_option;
 #endif
   opt_error_log= IF_WIN(1,0);
 #ifdef ENABLED_PROFILING
@@ -6866,12 +7084,27 @@ mysqld_get_one_option(int optid,
                       char *argument)
 {
   switch(optid) {
-  case '#':
 #ifndef DBUG_OFF
-    DBUG_SET_INITIAL(argument ? argument : default_dbug_option);
-#endif
+  case '#':
+    if (!argument)
+      argument= (char*) default_dbug_option;
+    if (argument[0] == '0' && !argument[1])
+    {
+      DEBUGGER_OFF;
+      break;
+    }
+    DEBUGGER_ON;
+    if (argument[0] == '1' && !argument[1])
+      break;
+    DBUG_SET_INITIAL(argument);
     opt_endinfo=1;				/* unireg: memory allocation */
     break;
+#endif
+  case OPT_DEPRECATED_OPTION:
+    sql_print_warning("'%s' is deprecated. It does nothing and exists only "
+                      "for compatiblity with old my.cnf files.",
+                      opt->name);
+    break;
   case 'a':
     global_system_variables.sql_mode= MODE_ANSI;
     global_system_variables.tx_isolation= ISO_SERIALIZABLE;
@@ -6942,6 +7175,7 @@ mysqld_get_one_option(int optid,
   }
   case (int)OPT_REPLICATE_REWRITE_DB:
   {
+    /* See also OPT_REWRITE_DB handling in client/mysqlbinlog.cc */
     char* key = argument,*p, *val;
 
     if (!(p= strstr(argument, "->")))
@@ -7022,23 +7256,16 @@ mysqld_get_one_option(int optid,
     WARN_DEPRECATED(NULL, 7, 0, "--log-slow-queries", "'--slow-query-log'/'--slow-query-log-file'");
     opt_slow_log= 1;
     break;
-  case (int) OPT_SKIP_NEW:
-    opt_specialflag|= SPECIAL_NO_NEW_FUNC;
-    delay_key_write_options= DELAY_KEY_WRITE_NONE;
-    myisam_concurrent_insert=0;
-    myisam_recover_options= HA_RECOVER_OFF;
-    sp_automatic_privileges=0;
-    my_use_symdir=0;
-    ha_open_options&= ~(HA_OPEN_ABORT_IF_CRASHED | HA_OPEN_DELAY_KEY_WRITE);
-#ifdef HAVE_QUERY_CACHE
-    query_cache_size=0;
 #endif
     break;
   case (int) OPT_SAFE:
-    opt_specialflag|= SPECIAL_SAFE_MODE;
-    delay_key_write_options= DELAY_KEY_WRITE_NONE;
+    opt_specialflag|= SPECIAL_SAFE_MODE | SPECIAL_NO_NEW_FUNC;
+    delay_key_write_options= (uint) DELAY_KEY_WRITE_NONE;
     myisam_recover_options= HA_RECOVER_DEFAULT;
     ha_open_options&= ~(HA_OPEN_DELAY_KEY_WRITE);
+#ifdef HAVE_QUERY_CACHE
+    query_cache_size=0;
+#endif
     break;
   case (int) OPT_SKIP_PRIOR:
     opt_specialflag|= SPECIAL_NO_PRIOR;
@@ -7092,11 +7319,15 @@ mysqld_get_one_option(int optid,
     server_id_supplied = 1;
     break;
   case OPT_ONE_THREAD:
-    thread_handling= SCHEDULER_ONE_THREAD_PER_CONNECTION;
+    thread_handling= SCHEDULER_NO_THREADS;
     break;
   case OPT_LOWER_CASE_TABLE_NAMES:
     lower_case_table_names_used= 1;
     break;
+  case OPT_TEST_IGNORE_WRONG_OPTIONS:
+    /* Used for testing options */
+    opt_ignore_wrong_options= 1;
+    break;
 #if defined(ENABLED_DEBUG_SYNC)
   case OPT_DEBUG_SYNC_TIMEOUT:
     /*
@@ -7153,6 +7384,7 @@ mysql_getopt_value(const char *keyname, uint key_length,
   case OPT_KEY_CACHE_BLOCK_SIZE:
   case OPT_KEY_CACHE_DIVISION_LIMIT:
   case OPT_KEY_CACHE_AGE_THRESHOLD:
+  case OPT_KEY_CACHE_PARTITIONS:
   {
     KEY_CACHE *key_cache;
     if (!(key_cache= get_or_create_key_cache(keyname, key_length)))
@@ -7170,6 +7402,8 @@ mysql_getopt_value(const char *keyname, uint key_length,
       return &key_cache->param_division_limit;
     case OPT_KEY_CACHE_AGE_THRESHOLD:
       return &key_cache->param_age_threshold;
+    case OPT_KEY_CACHE_PARTITIONS:
+      return (uchar**) &key_cache->param_partitions;
     }
   }
   }
@@ -7265,7 +7499,7 @@ static int get_options(int *argc_ptr, char ***argv_ptr)
   }
 
   if (opt_disable_networking)
-    mysqld_port= 0;
+    mysqld_port= mysqld_extra_port= 0;
 
   if (opt_skip_show_db)
     opt_specialflag|= SPECIAL_SKIP_SHOW_DB;
@@ -7310,6 +7544,8 @@ static int get_options(int *argc_ptr, char ***argv_ptr)
   /* Set global MyISAM variables from delay_key_write_options */
   fix_delay_key_write(0, 0, OPT_GLOBAL);
 
+  global_system_variables.log_slow_filter=
+    fix_log_slow_filter(global_system_variables.log_slow_filter);
 #ifndef EMBEDDED_LIBRARY
   if (mysqld_chroot)
     set_root(mysqld_chroot);
@@ -7326,6 +7562,7 @@ static int get_options(int *argc_ptr, char ***argv_ptr)
     In most cases the global variables will not be used
   */
   my_disable_locking= myisam_single_user= test(opt_external_locking == 0);
+  my_disable_sync= opt_sync == 0;
   my_default_record_cache_size=global_system_variables.read_buff_size;
 
   global_system_variables.long_query_time= (ulonglong)
@@ -7343,12 +7580,19 @@ static int get_options(int *argc_ptr, char ***argv_ptr)
     return 1;
 
 #ifdef EMBEDDED_LIBRARY
-  one_thread_scheduler();
+  one_thread_scheduler(thread_scheduler);
+  one_thread_scheduler(extra_thread_scheduler);
 #else
   if (thread_handling <= SCHEDULER_ONE_THREAD_PER_CONNECTION)
-    one_thread_per_connection_scheduler();
-  else                  /* thread_handling == SCHEDULER_NO_THREADS) */
-    one_thread_scheduler();
+    one_thread_per_connection_scheduler(thread_scheduler, &max_connections,
+                                        &connection_count);
+  else if (thread_handling == SCHEDULER_NO_THREADS)
+    one_thread_scheduler(thread_scheduler);
+  else
+    pool_of_threads_scheduler(thread_scheduler);  /* purecov: tested */
+  one_thread_per_connection_scheduler(extra_thread_scheduler,
+                                      &extra_max_connections,
+                                      &extra_connection_count);
 #endif
 
   global_system_variables.engine_condition_pushdown=
@@ -7447,23 +7691,22 @@ bool is_secure_file_path(char *path)
     /*
       The supplied file path might have been a file and not a directory.
     */
-    int length= (int)dirname_length(path);
-    if (length >= FN_REFLEN)
-      return FALSE;
+    size_t length= dirname_length(path);        // Guaranteed to be < FN_REFLEN
     memcpy(buff2, path, length);
     buff2[length]= '\0';
     if (length == 0 || my_realpath(buff1, buff2, 0))
       return FALSE;
   }
   convert_dirname(buff2, buff1, NullS);
-  if (strncmp(opt_secure_file_priv, buff2, strlen(opt_secure_file_priv)))
-    return FALSE;
-  return TRUE;
+  return is_prefix(buff2, opt_secure_file_priv) ? TRUE : FALSE;
 }
 
+
 static int fix_paths(void)
 {
   char buff[FN_REFLEN],*pos;
+  DBUG_ENTER("fix_paths");
+
   convert_dirname(mysql_home,mysql_home,NullS);
   /* Resolve symlinks to allow 'mysql_home' to be a relative symlink */
   my_realpath(mysql_home,mysql_home,MYF(0));
@@ -7508,7 +7751,7 @@ static int fix_paths(void)
   charsets_dir=mysql_charsets_dir;
 
   if (init_tmpdir(&mysql_tmpdir_list, opt_mysql_tmpdir))
-    return 1;
+    DBUG_RETURN(1);
   if (!opt_mysql_tmpdir)
     opt_mysql_tmpdir= mysql_tmpdir;
 #ifdef HAVE_REPLICATION
@@ -7518,30 +7761,30 @@ static int fix_paths(void)
   /*
     Convert the secure-file-priv option to system format, allowing
     a quick strcmp to check if read or write is in an allowed dir
-   */
+  */
   if (opt_secure_file_priv)
   {
     if (*opt_secure_file_priv == 0)
     {
+      /* For easy check later */
+      my_free(opt_secure_file_priv, MYF(0));
       opt_secure_file_priv= 0;
     }
     else
     {
-      if (strlen(opt_secure_file_priv) >= FN_REFLEN)
-        opt_secure_file_priv[FN_REFLEN-1]= '\0';
+      char *secure_file_real_path;
       if (my_realpath(buff, opt_secure_file_priv, 0))
       {
         sql_print_warning("Failed to normalize the argument for --secure-file-priv.");
-        return 1;
+        DBUG_RETURN(1);
       }
-      char *secure_file_real_path= (char *)my_malloc(FN_REFLEN, MYF(MY_FAE));
+      secure_file_real_path= (char *)my_malloc(FN_REFLEN, MYF(MY_FAE));
       convert_dirname(secure_file_real_path, buff, NullS);
       my_free(opt_secure_file_priv);
       opt_secure_file_priv= secure_file_real_path;
     }
   }
-  
-  return 0;
+  DBUG_RETURN(0);
 }
 
 /**
@@ -7549,12 +7792,9 @@ static int fix_paths(void)
 
   @param dir_name			Directory to test
 
-  @retval
-    -1  Don't know (Test failed)
-  @retval
-    0   File system is case sensitive
-  @retval
-    1   File system is case insensitive
+  @retval -1  Don't know (Test failed)
+  @retval  0   File system is case sensitive
+  @retval  1   File system is case insensitive
 */
 
 static int test_if_case_insensitive(const char *dir_name)
@@ -7622,6 +7862,8 @@ void refresh_status(THD *thd)
 
   /* Reset thread's status variables */
   bzero((uchar*) &thd->status_var, sizeof(thd->status_var));
+  bzero((uchar*) &thd->org_status_var, sizeof(thd->org_status_var)); 
+  thd->start_bytes_received= 0;
 
   /* Reset some global variables */
   reset_status_vars();
diff --git a/sql/mysqld.h b/sql/mysqld.h
index 6e81c240f7d..66acf086886 100644
--- a/sql/mysqld.h
+++ b/sql/mysqld.h
@@ -191,7 +191,7 @@ extern const char *in_left_expr_name, *in_additional_cond, *in_having_cond;
 extern SHOW_VAR status_vars[];
 extern struct system_variables max_system_variables;
 extern struct system_status_var global_status_var;
-extern struct rand_struct sql_rand;
+extern struct my_rnd_struct sql_rand;
 extern const char *opt_date_time_formats[];
 extern handlerton *partition_hton;
 extern handlerton *myisam_hton;
@@ -358,11 +358,13 @@ enum options_mysqld
   OPT_CONSOLE,
   OPT_DEBUG_SYNC_TIMEOUT,
   OPT_DELAY_KEY_WRITE_ALL,
+  OPT_DEPRECATED_OPTION,
   OPT_ISAM_LOG,
   OPT_KEY_BUFFER_SIZE,
   OPT_KEY_CACHE_AGE_THRESHOLD,
   OPT_KEY_CACHE_BLOCK_SIZE,
   OPT_KEY_CACHE_DIVISION_LIMIT,
+  OPT_KEY_CACHE_PARTITIONS,
   OPT_LOWER_CASE_TABLE_NAMES,
   OPT_ONE_THREAD,
   OPT_POOL_OF_THREADS,
@@ -377,7 +379,6 @@ enum options_mysqld
   OPT_SERVER_ID,
   OPT_SKIP_HOST_CACHE,
   OPT_SKIP_LOCK,
-  OPT_SKIP_NEW,
   OPT_SKIP_PRIOR,
   OPT_SKIP_RESOLVE,
   OPT_SKIP_STACK_TRACE,
@@ -502,4 +503,26 @@ inline THD *_current_thd(void)
 #endif
 #define current_thd _current_thd()
 
+/*
+  @todo remove, make it static in ha_maria.cc
+  currently it's needed for sql_select.cc
+*/
+extern handlerton *maria_hton;
+
+extern HASH global_user_stats;
+extern HASH global_client_stats;
+extern HASH global_table_stats;
+extern HASH global_index_stats;
+
+extern mysql_mutex_t LOCK_global_user_client_stats;
+extern mysql_mutex_t LOCK_global_table_stats;
+extern mysql_mutex_t LOCK_global_index_stats;
+extern mysql_mutex_t LOCK_stats;
+
+extern uint extra_connection_count;
+extern my_bool opt_userstat_running, debug_assert_if_crashed_table;
+extern uint mysqld_extra_port;
+extern ulong extra_max_connections;
+extern ulonglong denied_connections;
+extern ulong thread_created;
 #endif /* MYSQLD_INCLUDED */
diff --git a/sql/net_serv.cc b/sql/net_serv.cc
index 488e27296b6..a2f17075ea6 100644
--- a/sql/net_serv.cc
+++ b/sql/net_serv.cc
@@ -124,11 +124,9 @@ my_bool my_net_init(NET *net, Vio* vio)
   net->last_error[0]=0;
   net->compress=0; net->reading_or_writing=0;
   net->where_b = net->remain_in_buf=0;
+  net->net_skip_rest_factor= 0;
   net->last_errno=0;
   net->unused= 0;
-#if defined(MYSQL_SERVER) && !defined(EMBEDDED_LIBRARY)
-  net->skip_big_packet= FALSE;
-#endif
 
   if (vio != 0)					/* If real connection */
   {
@@ -222,7 +220,7 @@ my_bool net_realloc(NET *net, size_t length)
     -1   Don't know if data is ready or not
 */
 
-#if !defined(EMBEDDED_LIBRARY)
+#if !defined(EMBEDDED_LIBRARY) && defined(DBUG_OFF)
 
 static int net_data_is_ready(my_socket sd)
 {
@@ -264,34 +262,39 @@ static int net_data_is_ready(my_socket sd)
 #endif /* EMBEDDED_LIBRARY */
 
 /**
-  Remove unwanted characters from connection
-  and check if disconnected.
+   Intialize NET handler for new reads:
 
-    Read from socket until there is nothing more to read. Discard
-    what is read.
+   - Read from socket until there is nothing more to read. Discard
+     what is read.
+   - Initialize net for new net_read/net_write calls.
 
-    If there is anything when to read 'net_clear' is called this
-    normally indicates an error in the protocol.
+   If there is anything when to read 'net_clear' is called this
+   normally indicates an error in the protocol. Normally one should not
+   need to do clear the communication buffer. If one compiles without
+   -DUSE_NET_CLEAR then one wins one read call / query.
 
-    When connection is properly closed (for TCP it means with
-    a FIN packet), then select() considers a socket "ready to read",
-    in the sense that there's EOF to read, but read() returns 0.
+   When connection is properly closed (for TCP it means with
+   a FIN packet), then select() considers a socket "ready to read",
+   in the sense that there's EOF to read, but read() returns 0.
 
   @param net			NET handler
   @param clear_buffer           if <> 0, then clear all data from comm buff
 */
 
-void net_clear(NET *net, my_bool clear_buffer)
+void net_clear(NET *net, my_bool clear_buffer __attribute__((unused)))
 {
-#if !defined(EMBEDDED_LIBRARY)
-  size_t count;
-  int ready;
-#endif
   DBUG_ENTER("net_clear");
 
-#if !defined(EMBEDDED_LIBRARY)
+/*
+  We don't do a clear in case of not DBUG_OFF to catch bugs in the
+  protocol handling.
+*/
+
+#if (!defined(EMBEDDED_LIBRARY) && defined(DBUG_OFF)) || defined(USE_NET_CLEAR)
   if (clear_buffer)
   {
+    size_t count;
+    int ready;
     while ((ready= net_data_is_ready(net->vio->sd)) > 0)
     {
       /* The socket is ready */
@@ -762,6 +765,7 @@ static my_bool net_safe_read(NET *net, uchar *buff, size_t length,
 static my_bool my_net_skip_rest(NET *net, uint32 remain, thr_alarm_t *alarmed,
 				ALARM *alarm_buff)
 {
+  longlong limit= net->max_packet_size*net->net_skip_rest_factor;
   uint32 old=remain;
   DBUG_ENTER("my_net_skip_rest");
   DBUG_PRINT("enter",("bytes_to_skip: %u", (uint) remain));
@@ -785,11 +789,15 @@ static my_bool my_net_skip_rest(NET *net, uint32 remain, thr_alarm_t *alarmed,
 	DBUG_RETURN(1);
       update_statistics(thd_increment_bytes_received(length));
       remain -= (uint32) length;
+      limit-= length;
+      if (limit < 0)
+        DBUG_RETURN(1);
     }
     if (old != MAX_PACKET_LENGTH)
       break;
     if (net_safe_read(net, net->buff, NET_HEADER_SIZE, alarmed))
       DBUG_RETURN(1);
+    limit-= NET_HEADER_SIZE;
     old=remain= uint3korr(net->buff);
     net->pkt_nr++;
   }
@@ -939,7 +947,6 @@ my_real_read(NET *net, size_t *complen)
 		    (int) net->buff[net->where_b + 3],
 		    (uint) (uchar) net->pkt_nr);
             fflush(stderr);
-            DBUG_ASSERT(0);
 #endif
 	  }
 	  len= packet_error;
@@ -979,7 +986,6 @@ my_real_read(NET *net, size_t *complen)
 	  {
 #if defined(MYSQL_SERVER) && !defined(NO_ALARM)
 	    if (!net->compress &&
-                net->skip_big_packet &&
 		!my_net_skip_rest(net, (uint32) len, &alarmed, &alarm_buff))
 	      net->error= 3;		/* Successfully skiped packet */
 #endif
diff --git a/sql/opt_index_cond_pushdown.cc b/sql/opt_index_cond_pushdown.cc
new file mode 100644
index 00000000000..277343b81a5
--- /dev/null
+++ b/sql/opt_index_cond_pushdown.cc
@@ -0,0 +1,387 @@
+#include "mysql_priv.h"
+#include "sql_select.h"
+
+/****************************************************************************
+ * Index Condition Pushdown code starts
+ ***************************************************************************/
+/* 
+  Check if given expression uses only table fields covered by the given index
+
+  SYNOPSIS
+    uses_index_fields_only()
+      item           Expression to check
+      tbl            The table having the index
+      keyno          The index number
+      other_tbls_ok  TRUE <=> Fields of other non-const tables are allowed
+
+  DESCRIPTION
+    Check if given expression only uses fields covered by index #keyno in the
+    table tbl. The expression can use any fields in any other tables.
+    
+    The expression is guaranteed not to be AND or OR - those constructs are 
+    handled outside of this function.
+
+  RETURN
+    TRUE   Yes
+    FALSE  No
+*/
+
+bool uses_index_fields_only(Item *item, TABLE *tbl, uint keyno, 
+                            bool other_tbls_ok)
+{
+  if (item->const_item())
+    return TRUE;
+
+  /* 
+    Don't push down the triggered conditions. Nested outer joins execution 
+    code may need to evaluate a condition several times (both triggered and
+    untriggered), and there is no way to put thi
+    TODO: Consider cloning the triggered condition and using the copies for:
+      1. push the first copy down, to have most restrictive index condition
+         possible
+      2. Put the second copy into tab->select_cond. 
+  */
+  if (item->type() == Item::FUNC_ITEM && 
+      ((Item_func*)item)->functype() == Item_func::TRIG_COND_FUNC)
+    return FALSE;
+
+  if (!(item->used_tables() & tbl->map))
+    return other_tbls_ok;
+
+  Item::Type item_type= item->type();
+  switch (item_type) {
+  case Item::FUNC_ITEM:
+    {
+      /* This is a function, apply condition recursively to arguments */
+      Item_func *item_func= (Item_func*)item;
+      Item **child;
+      Item **item_end= (item_func->arguments()) + item_func->argument_count();
+      for (child= item_func->arguments(); child != item_end; child++)
+      {
+        if (!uses_index_fields_only(*child, tbl, keyno, other_tbls_ok))
+          return FALSE;
+      }
+      return TRUE;
+    }
+  case Item::COND_ITEM:
+    {
+      /*
+        This is a AND/OR condition. Regular AND/OR clauses are handled by
+        make_cond_for_index() which will chop off the part that can be
+        checked with index. This code is for handling non-top-level AND/ORs,
+        e.g. func(x AND y).
+      */
+      List_iterator<Item> li(*((Item_cond*)item)->argument_list());
+      Item *item;
+      while ((item=li++))
+      {
+        if (!uses_index_fields_only(item, tbl, keyno, other_tbls_ok))
+          return FALSE;
+      }
+      return TRUE;
+    }
+  case Item::FIELD_ITEM:
+    {
+      Item_field *item_field= (Item_field*)item;
+      if (item_field->field->table != tbl) 
+        return TRUE;
+      /*
+        The below is probably a repetition - the first part checks the
+        other two, but let's play it safe:
+      */
+      return item_field->field->part_of_key.is_set(keyno) &&
+             item_field->field->type() != MYSQL_TYPE_GEOMETRY &&
+             item_field->field->type() != MYSQL_TYPE_BLOB;
+    }
+  case Item::REF_ITEM:
+    return uses_index_fields_only(item->real_item(), tbl, keyno,
+                                  other_tbls_ok);
+  default:
+    return FALSE; /* Play it safe, don't push unknown non-const items */
+  }
+}
+
+#define ICP_COND_USES_INDEX_ONLY 10
+
+/*
+  Get a part of the condition that can be checked using only index fields
+
+  SYNOPSIS
+    make_cond_for_index()
+      cond           The source condition
+      table          The table that is partially available
+      keyno          The index in the above table. Only fields covered by the index
+                     are available
+      other_tbls_ok  TRUE <=> Fields of other non-const tables are allowed
+
+  DESCRIPTION
+    Get a part of the condition that can be checked when for the given table 
+    we have values only of fields covered by some index. The condition may
+    refer to other tables, it is assumed that we have values of all of their 
+    fields.
+
+    Example:
+      make_cond_for_index(
+         "cond(t1.field) AND cond(t2.key1) AND cond(t2.non_key) AND cond(t2.key2)",
+          t2, keyno(t2.key1)) 
+      will return
+        "cond(t1.field) AND cond(t2.key2)"
+
+  RETURN
+    Index condition, or NULL if no condition could be inferred.
+*/
+
+Item *make_cond_for_index(Item *cond, TABLE *table, uint keyno,
+                          bool other_tbls_ok)
+{
+  if (!cond)
+    return NULL;
+  if (cond->type() == Item::COND_ITEM)
+  {
+    uint n_marked= 0;
+    if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
+    {
+      table_map used_tables= 0;
+      Item_cond_and *new_cond=new Item_cond_and;
+      if (!new_cond)
+	return (COND*) 0;
+      List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
+      Item *item;
+      while ((item=li++))
+      {
+	Item *fix= make_cond_for_index(item, table, keyno, other_tbls_ok);
+	if (fix)
+        {
+	  new_cond->argument_list()->push_back(fix);
+          used_tables|= fix->used_tables();
+        }
+        n_marked += test(item->marker == ICP_COND_USES_INDEX_ONLY);
+      }
+      if (n_marked ==((Item_cond*)cond)->argument_list()->elements)
+        cond->marker= ICP_COND_USES_INDEX_ONLY;
+      switch (new_cond->argument_list()->elements) {
+      case 0:
+	return (COND*) 0;
+      case 1:
+        new_cond->used_tables_cache= used_tables;
+	return new_cond->argument_list()->head();
+      default:
+	new_cond->quick_fix_field();
+        new_cond->used_tables_cache= used_tables;
+	return new_cond;
+      }
+    }
+    else /* It's OR */
+    {
+      Item_cond_or *new_cond=new Item_cond_or;
+      if (!new_cond)
+	return (COND*) 0;
+      List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
+      Item *item;
+      while ((item=li++))
+      {
+	Item *fix= make_cond_for_index(item, table, keyno, other_tbls_ok);
+	if (!fix)
+	  return (COND*) 0;
+	new_cond->argument_list()->push_back(fix);
+        n_marked += test(item->marker == ICP_COND_USES_INDEX_ONLY);
+      }
+      if (n_marked ==((Item_cond*)cond)->argument_list()->elements)
+        cond->marker= ICP_COND_USES_INDEX_ONLY;
+      new_cond->quick_fix_field();
+      new_cond->used_tables_cache= ((Item_cond_or*) cond)->used_tables_cache;
+      new_cond->top_level_item();
+      return new_cond;
+    }
+  }
+
+  if (!uses_index_fields_only(cond, table, keyno, other_tbls_ok))
+    return (COND*) 0;
+  cond->marker= ICP_COND_USES_INDEX_ONLY;
+  return cond;
+}
+
+
+Item *make_cond_remainder(Item *cond, bool exclude_index)
+{
+  if (exclude_index && cond->marker == ICP_COND_USES_INDEX_ONLY)
+    return 0; /* Already checked */
+
+  if (cond->type() == Item::COND_ITEM)
+  {
+    table_map tbl_map= 0;
+    if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
+    {
+      /* Create new top level AND item */
+      Item_cond_and *new_cond=new Item_cond_and;
+      if (!new_cond)
+	return (COND*) 0;
+      List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
+      Item *item;
+      while ((item=li++))
+      {
+	Item *fix= make_cond_remainder(item, exclude_index);
+	if (fix)
+        {
+	  new_cond->argument_list()->push_back(fix);
+          tbl_map |= fix->used_tables();
+        }
+      }
+      switch (new_cond->argument_list()->elements) {
+      case 0:
+	return (COND*) 0;
+      case 1:
+	return new_cond->argument_list()->head();
+      default:
+	new_cond->quick_fix_field();
+        ((Item_cond*)new_cond)->used_tables_cache= tbl_map;
+	return new_cond;
+      }
+    }
+    else /* It's OR */
+    {
+      Item_cond_or *new_cond=new Item_cond_or;
+      if (!new_cond)
+	return (COND*) 0;
+      List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
+      Item *item;
+      while ((item=li++))
+      {
+	Item *fix= make_cond_remainder(item, FALSE);
+	if (!fix)
+	  return (COND*) 0;
+	new_cond->argument_list()->push_back(fix);
+        tbl_map |= fix->used_tables();
+      }
+      new_cond->quick_fix_field();
+      ((Item_cond*)new_cond)->used_tables_cache= tbl_map;
+      new_cond->top_level_item();
+      return new_cond;
+    }
+  }
+  return cond;
+}
+
+
+/*
+  Try to extract and push the index condition
+
+  SYNOPSIS
+    push_index_cond()
+      tab            A join tab that has tab->table->file and its condition
+                     in tab->select_cond
+      keyno          Index for which extract and push the condition
+      other_tbls_ok  TRUE <=> Fields of other non-const tables are allowed
+
+  DESCRIPTION
+    Try to extract and push the index condition down to table handler
+*/
+
+void push_index_cond(JOIN_TAB *tab, uint keyno, bool other_tbls_ok)
+{
+  DBUG_ENTER("push_index_cond");
+  Item *idx_cond;
+  bool do_index_cond_pushdown=
+    ((tab->table->file->index_flags(keyno, 0, 1) &
+      HA_DO_INDEX_COND_PUSHDOWN) &&
+     optimizer_flag(tab->join->thd, OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN));
+
+  /*
+    Do not try index condition pushdown on indexes which have partially-covered
+    columns. Unpacking from a column prefix into index tuple is not a supported 
+    operation in some engines, see e.g. MySQL BUG#42991.
+    TODO: a better solution would be not to consider partially-covered columns
+    as parts of the index and still produce/check index condition for
+    fully-covered index columns.
+  */
+  KEY *key_info= tab->table->key_info + keyno;
+  for (uint kp= 0; kp < key_info->key_parts; kp++)
+  {
+    if ((key_info->key_part[kp].key_part_flag & HA_PART_KEY_SEG))
+    {
+      do_index_cond_pushdown= FALSE;
+      break;
+    }
+  }
+
+  if (do_index_cond_pushdown)
+  {
+    DBUG_EXECUTE("where",
+                 print_where(tab->select_cond, "full cond", QT_ORDINARY););
+
+    idx_cond= make_cond_for_index(tab->select_cond, tab->table, keyno,
+                                  other_tbls_ok);
+
+    DBUG_EXECUTE("where",
+                 print_where(idx_cond, "idx cond", QT_ORDINARY););
+
+    if (idx_cond)
+    {
+      Item *idx_remainder_cond= 0;
+      tab->pre_idx_push_select_cond= tab->select_cond;
+      /*
+        For BKA cache we store condition to special BKA cache field
+        because evaluation of the condition requires additional operations
+        before the evaluation. This condition is used in 
+        JOIN_CACHE_BKA[_UNIQUE]::skip_index_tuple() functions.
+      */
+      if (tab->use_join_cache &&
+          /*
+            if cache is used then the value is TRUE only 
+            for BKA[_UNIQUE] cache (see check_join_cache_usage func).
+            In this case other_tbls_ok is an equivalent of
+            cache->is_key_access().
+          */
+          other_tbls_ok &&
+          (idx_cond->used_tables() &
+           ~(tab->table->map | tab->join->const_table_map)))
+        tab->cache_idx_cond= idx_cond;
+      else
+        idx_remainder_cond= tab->table->file->idx_cond_push(keyno, idx_cond);
+
+      /*
+        Disable eq_ref's "lookup cache" if we've pushed down an index
+        condition. 
+        TODO: This check happens to work on current ICP implementations, but
+        there may exist a compliant implementation that will not work 
+        correctly with it. Sort this out when we stabilize the condition
+        pushdown APIs.
+      */
+      if (idx_remainder_cond != idx_cond)
+        tab->ref.disable_cache= TRUE;
+
+      Item *row_cond= make_cond_remainder(tab->select_cond, TRUE);
+
+      DBUG_EXECUTE("where",
+                   print_where(row_cond, "remainder cond", QT_ORDINARY););
+      
+      if (row_cond)
+      {
+        if (!idx_remainder_cond)
+          tab->select_cond= row_cond;
+        else
+        {
+          COND *new_cond= new Item_cond_and(row_cond, idx_remainder_cond);
+          tab->select_cond= new_cond;
+	  tab->select_cond->quick_fix_field();
+          ((Item_cond_and*)tab->select_cond)->used_tables_cache= 
+            row_cond->used_tables() | idx_remainder_cond->used_tables();
+        }
+      }
+      else
+        tab->select_cond= idx_remainder_cond;
+      if (tab->select)
+      {
+        DBUG_EXECUTE("where",
+                     print_where(tab->select->cond,
+                                 "select_cond",
+                                 QT_ORDINARY););
+
+        tab->select->cond= tab->select_cond;
+      }
+    }
+  }
+  DBUG_VOID_RETURN;
+}
+
+
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index ffff9e3f6c8..829d97373a1 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -34,7 +34,7 @@
     
     The lists are returned in form of complicated structure of interlinked
     SEL_TREE/SEL_IMERGE/SEL_ARG objects.
-    See check_quick_keys, find_used_partitions for examples of how to walk 
+    See quick_range_seq_next, find_used_partitions for examples of how to walk 
     this structure.
     All direct "users" of this module are located within this file, too.
 
@@ -59,6 +59,48 @@
 
   Record retrieval code for range/index_merge/groupby-min-max.
     Implementations of QUICK_*_SELECT classes.
+
+  KeyTupleFormat
+  ~~~~~~~~~~~~~~
+  The code in this file (and elsewhere) makes operations on key value tuples.
+  Those tuples are stored in the following format:
+  
+  The tuple is a sequence of key part values. The length of key part value
+  depends only on its type (and not depends on the what value is stored)
+  
+    KeyTuple: keypart1-data, keypart2-data, ...
+  
+  The value of each keypart is stored in the following format:
+  
+    keypart_data: [isnull_byte] keypart-value-bytes
+
+  If a keypart may have a NULL value (key_part->field->real_maybe_null() can
+  be used to check this), then the first byte is a NULL indicator with the 
+  following valid values:
+    1  - keypart has NULL value.
+    0  - keypart has non-NULL value.
+
+  <questionable-statement> If isnull_byte==1 (NULL value), then the following
+  keypart->length bytes must be 0.
+  </questionable-statement>
+
+  keypart-value-bytes holds the value. Its format depends on the field type.
+  The length of keypart-value-bytes may or may not depend on the value being
+  stored. The default is that length is static and equal to 
+  KEY_PART_INFO::length.
+  
+  Key parts with (key_part_flag & HA_BLOB_PART) have length depending of the 
+  value:
+  
+     keypart-value-bytes: value_length value_bytes
+
+  The value_length part itself occupies HA_KEY_BLOB_LENGTH=2 bytes.
+
+  See key_copy() and key_restore() for code to move data between index tuple
+  and table record
+
+  CAUTION: the above description is only sergefp's understanding of the 
+           subject and may omit some details.
 */
 
 #ifdef USE_PRAGMA_IMPLEMENTATION
@@ -409,6 +451,7 @@ public:
   /* returns a number of keypart values (0 or 1) appended to the key buffer */
   int store_min(uint length, uchar **min_key,uint min_key_flag)
   {
+    /* "(kp1 > c1) AND (kp2 OP c2) AND ..." -> (kp1 > c1) */
     if ((min_flag & GEOM_FLAG) ||
         (!(min_flag & NO_MIN_RANGE) &&
 	!(min_key_flag & (NO_MIN_RANGE | NEAR_MIN))))
@@ -653,6 +696,11 @@ public:
   */
   bool using_real_indexes;
   
+  /*
+    Aggressively remove "scans" that do not have conditions on first
+    keyparts. Such scans are usable when doing partition pruning but not
+    regular range optimization.
+  */
   bool remove_jump_scans;
   
   /*
@@ -670,6 +718,7 @@ public:
 
   /* Number of SEL_ARG objects allocated by SEL_ARG::clone_tree operations */
   uint alloced_sel_args; 
+  bool force_default_mrr;
 };
 
 class PARAM : public RANGE_OPT_PARAM
@@ -697,6 +746,7 @@ public:
   uint8 first_null_comp; /* first null component if any, 0 - otherwise */
 };
 
+
 class TABLE_READ_PLAN;
   class TRP_RANGE;
   class TRP_ROR_INTERSECT;
@@ -715,15 +765,14 @@ static SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param,COND *cond_func,Field *field,
 static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond);
 
 static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts);
-static ha_rows check_quick_select(PARAM *param,uint index,SEL_ARG *key_tree,
-                                  bool update_tbl_stats);
-static ha_rows check_quick_keys(PARAM *param,uint index,SEL_ARG *key_tree,
-                                uchar *min_key, uint min_key_flag, int,
-                                uchar *max_key, uint max_key_flag, int);
+static ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
+                                  SEL_ARG *tree, bool update_tbl_stats, 
+                                  uint *mrr_flags, uint *bufsize,
+                                  COST_VECT *cost);
 
 QUICK_RANGE_SELECT *get_quick_select(PARAM *param,uint index,
-                                     SEL_ARG *key_tree,
-                                     MEM_ROOT *alloc = NULL);
+                                     SEL_ARG *key_tree, uint mrr_flags, 
+                                     uint mrr_buf_size, MEM_ROOT *alloc);
 static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
                                        bool index_read_must_be_used,
                                        bool update_tbl_stats,
@@ -742,8 +791,6 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
 static
 TRP_GROUP_MIN_MAX *get_best_group_min_max(PARAM *param, SEL_TREE *tree,
                                           double read_time);
-static double get_index_only_read_time(const PARAM* param, ha_rows records,
-                                       int keynr);
 
 #ifndef DBUG_OFF
 static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
@@ -770,7 +817,9 @@ static SEL_ARG null_element(SEL_ARG::IMPOSSIBLE);
 static bool null_part_in_key(KEY_PART *key_part, const uchar *key,
                              uint length);
 bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, RANGE_OPT_PARAM* param);
+static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts);
 
+#include "opt_range_mrr.cc"
 
 /*
   SEL_IMERGE is a list of possible ways to do index merge, i.e. it is
@@ -1132,25 +1181,22 @@ QUICK_SELECT_I::QUICK_SELECT_I()
 {}
 
 QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr,
-                                       bool no_alloc, MEM_ROOT *parent_alloc)
-  :dont_free(0),error(0),free_file(0),in_range(0),cur_range(NULL),last_range(0)
+                                       bool no_alloc, MEM_ROOT *parent_alloc,
+                                       bool *create_error)
+  :doing_key_read(0),/*error(0),*/free_file(0),/*in_range(0),*/cur_range(NULL),last_range(0),dont_free(0)
 {
   my_bitmap_map *bitmap;
   DBUG_ENTER("QUICK_RANGE_SELECT::QUICK_RANGE_SELECT");
 
   in_ror_merged_scan= 0;
-  sorted= 0;
   index= key_nr;
   head=  table;
   key_part_info= head->key_info[index].key_part;
   my_init_dynamic_array(&ranges, sizeof(QUICK_RANGE*), 16, 16);
 
   /* 'thd' is not accessible in QUICK_RANGE_SELECT::reset(). */
-  multi_range_bufsiz= thd->variables.read_rnd_buff_size;
-  multi_range_count= thd->variables.multi_range_count;
-  multi_range_length= 0;
-  multi_range= NULL;
-  multi_range_buff= NULL;
+  mrr_buf_size= thd->variables.mrr_buff_size;
+  mrr_buf_desc= NULL;
 
   if (!no_alloc && !parent_alloc)
   {
@@ -1165,12 +1211,12 @@ QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr,
   save_read_set= head->read_set;
   save_write_set= head->write_set;
 
-  /* Allocate a bitmap for used columns */
+  /* Allocate a bitmap for used columns (Q: why not on MEM_ROOT?) */
   if (!(bitmap= (my_bitmap_map*) my_malloc(head->s->column_bitmap_size,
                                            MYF(MY_WME))))
   {
     column_bitmap.bitmap= 0;
-    error= 1;
+    *create_error= 1;
   }
   else
     bitmap_init(&column_bitmap, bitmap, head->s->fields, FALSE);
@@ -1178,6 +1224,20 @@ QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr,
 }
 
 
+void QUICK_RANGE_SELECT::need_sorted_output()
+{
+  if (!(mrr_flags & HA_MRR_SORTED))
+  {
+    /*
+      Native implementation can't produce sorted output. We'll have to
+      switch to default
+    */
+    mrr_flags |= HA_MRR_USE_DEFAULT_IMPL; 
+  }
+  mrr_flags |= HA_MRR_SORTED;
+}
+
+
 int QUICK_RANGE_SELECT::init()
 {
   DBUG_ENTER("QUICK_RANGE_SELECT::init");
@@ -1204,7 +1264,8 @@ QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT()
     if (file) 
     {
       range_end();
-      head->set_keyread(FALSE);
+      if (doing_key_read)
+        file->extra(HA_EXTRA_NO_KEYREAD);
       if (free_file)
       {
         DBUG_PRINT("info", ("Freeing separate handler 0x%lx (free: %d)", (long) file,
@@ -1219,8 +1280,7 @@ QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT()
     my_free(column_bitmap.bitmap);
   }
   head->column_bitmaps_set(save_read_set, save_write_set);
-  my_free(multi_range);
-  my_free(multi_range_buff);
+  my_free(mrr_buf_desc);
   DBUG_VOID_RETURN;
 }
 
@@ -1345,6 +1405,7 @@ int QUICK_ROR_INTERSECT_SELECT::init()
 int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler)
 {
   handler *save_file= file, *org_file;
+  my_bool org_key_read;
   THD *thd;
   DBUG_ENTER("QUICK_RANGE_SELECT::init_ror_merged_scan");
 
@@ -1404,12 +1465,17 @@ end:
     The now bitmap is stored in 'column_bitmap' which is used in ::get_next()
   */
   org_file= head->file;
+  org_key_read= head->key_read;
   head->file= file;
-  /* We don't have to set 'head->keyread' here as the 'file' is unique */
+  head->key_read= 0;
   if (!head->no_keyread)
+  {
+    doing_key_read= 1;
     head->mark_columns_used_by_index(index);
+  }
   head->prepare_for_position();
   head->file= org_file;
+  head->key_read= org_key_read;
   bitmap_copy(&column_bitmap, head->read_set);
   head->column_bitmaps_set(&column_bitmap, &column_bitmap);
 
@@ -1461,7 +1527,7 @@ int QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan(bool reuse_handler)
     quick->record= head->record[0];
   }
 
-  if (need_to_fetch_row && head->file->ha_rnd_init(1))
+  if (need_to_fetch_row && head->file->ha_rnd_init_with_error(1))
   {
     DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
     DBUG_RETURN(1);
@@ -1577,7 +1643,7 @@ int QUICK_ROR_UNION_SELECT::init()
   DBUG_ENTER("QUICK_ROR_UNION_SELECT::init");
   if (init_queue(&queue, quick_selects.elements, 0,
                  FALSE , QUICK_ROR_UNION_SELECT_queue_cmp,
-                 (void*) this))
+                 (void*) this, 0, 0))
   {
     bzero(&queue, sizeof(QUEUE));
     DBUG_RETURN(1);
@@ -1636,7 +1702,7 @@ int QUICK_ROR_UNION_SELECT::reset()
     queue_insert(&queue, (uchar*)quick);
   }
 
-  if (head->file->ha_rnd_init(1))
+  if (head->file->ha_rnd_init_with_error(1))
   {
     DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
     DBUG_RETURN(1);
@@ -1908,9 +1974,11 @@ class TRP_RANGE : public TABLE_READ_PLAN
 public:
   SEL_ARG *key; /* set of intervals to be used in "range" method retrieval */
   uint     key_idx; /* key number in PARAM::key */
+  uint     mrr_flags; 
+  uint     mrr_buf_size;
 
-  TRP_RANGE(SEL_ARG *key_arg, uint idx_arg)
-   : key(key_arg), key_idx(idx_arg)
+  TRP_RANGE(SEL_ARG *key_arg, uint idx_arg, uint mrr_flags_arg)
+   : key(key_arg), key_idx(idx_arg), mrr_flags(mrr_flags_arg)
   {}
   virtual ~TRP_RANGE() {}                     /* Remove gcc warning */
 
@@ -1919,7 +1987,8 @@ public:
   {
     DBUG_ENTER("TRP_RANGE::make_quick");
     QUICK_RANGE_SELECT *quick;
-    if ((quick= get_quick_select(param, key_idx, key, parent_alloc)))
+    if ((quick= get_quick_select(param, key_idx, key,  mrr_flags, 
+                                 mrr_buf_size, parent_alloc)))
     {
       quick->records= records;
       quick->read_time= read_cost;
@@ -2148,7 +2217,8 @@ static int fill_used_fields_bitmap(PARAM *param)
 
 int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
 				  table_map prev_tables,
-				  ha_rows limit, bool force_quick_range)
+				  ha_rows limit, bool force_quick_range, 
+                                  bool ordered_output)
 {
   uint idx;
   double scan_time;
@@ -2204,6 +2274,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
     param.imerge_cost_buff_size= 0;
     param.using_real_indexes= TRUE;
     param.remove_jump_scans= TRUE;
+    param.force_default_mrr= ordered_output;
 
     thd->no_errors=1;				// Don't warn about NULL
     init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
@@ -2257,9 +2328,8 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
     if (!head->covering_keys.is_clear_all())
     {
       int key_for_use= find_shortest_key(head, &head->covering_keys);
-      double key_read_time= (get_index_only_read_time(&param, records,
-                                                     key_for_use) +
-                             (double) records / TIME_FOR_COMPARE);
+      double key_read_time= head->file->keyread_time(key_for_use, 1, records) +
+                            (double) records / TIME_FOR_COMPARE;
       DBUG_PRINT("info",  ("'all'+'using index' scan will be using key %d, "
                            "read time %g", key_for_use, key_read_time));
       if (key_read_time < read_time)
@@ -3849,8 +3919,9 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
 
   DBUG_PRINT("info", ("index_merge scans cost %g", imerge_cost));
   if (imerge_too_expensive || (imerge_cost > read_time) ||
-      ((non_cpk_scan_records+cpk_scan_records >= param->table->file->stats.records) &&
-      read_time != DBL_MAX))
+      ((non_cpk_scan_records+cpk_scan_records >=
+        param->table->file->stats.records) &&
+       read_time != DBL_MAX))
   {
     /*
       Bail out if it is obvious that both index_merge and ROR-union will be
@@ -4024,43 +4095,6 @@ skip_to_ror_scan:
   DBUG_RETURN(imerge_trp);
 }
 
-
-/*
-  Calculate cost of 'index only' scan for given index and number of records.
-
-  SYNOPSIS
-    get_index_only_read_time()
-      param    parameters structure
-      records  #of records to read
-      keynr    key to read
-
-  NOTES
-    It is assumed that we will read trough the whole key range and that all
-    key blocks are half full (normally things are much better). It is also
-    assumed that each time we read the next key from the index, the handler
-    performs a random seek, thus the cost is proportional to the number of
-    blocks read.
-
-  TODO:
-    Move this to handler->read_time() by adding a flag 'index-only-read' to
-    this call. The reason for doing this is that the current function doesn't
-    handle the case when the row is stored in the b-tree (like in innodb
-    clustered index)
-*/
-
-static double get_index_only_read_time(const PARAM* param, ha_rows records,
-                                       int keynr)
-{
-  double read_time;
-  uint keys_per_block= (param->table->file->stats.block_size/2/
-			(param->table->key_info[keynr].key_length+
-			 param->table->file->ref_length) + 1);
-  read_time=((double) (records+keys_per_block-1)/
-             (double) keys_per_block);
-  return read_time;
-}
-
-
 typedef struct st_ror_scan_info
 {
   uint      idx;      /* # of used key in param->keys */
@@ -4137,8 +4171,8 @@ ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
       bitmap_set_bit(&ror_scan->covered_fields, key_part->fieldnr-1);
   }
   ror_scan->index_read_cost=
-    get_index_only_read_time(param, param->table->quick_rows[ror_scan->keynr],
-                             ror_scan->keynr);
+    param->table->file->keyread_time(ror_scan->keynr, 1,
+                                     param->table->quick_rows[ror_scan->keynr]);
   DBUG_RETURN(ror_scan);
 }
 
@@ -4925,11 +4959,12 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
                                        bool update_tbl_stats,
                                        double read_time)
 {
-  int idx;
+  uint idx;
   SEL_ARG **key,**end, **key_to_read= NULL;
   ha_rows UNINIT_VAR(best_records);              /* protected by key_to_read */
+  uint    UNINIT_VAR(best_mrr_flags),            /* protected by key_to_read */
+          UNINIT_VAR(best_buf_size);             /* protected by key_to_read */
   TRP_RANGE* read_plan= NULL;
-  bool pk_is_clustered= param->table->file->primary_key_is_clustered();
   DBUG_ENTER("get_key_scans_params");
   /*
     Note that there may be trees that have type SEL_TREE::KEY but contain no
@@ -4940,64 +4975,40 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
                                       "tree scans"););
   tree->ror_scans_map.clear_all();
   tree->n_ror_scans= 0;
-  for (idx= 0,key=tree->keys, end=key+param->keys;
-       key != end ;
-       key++,idx++)
+  for (idx= 0,key=tree->keys, end=key+param->keys; key != end; key++,idx++)
   {
-    ha_rows found_records;
-    double found_read_time;
     if (*key)
     {
+      ha_rows found_records;
+      COST_VECT cost;
+      double found_read_time;
+      uint mrr_flags, buf_size;
       uint keynr= param->real_keynr[idx];
       if ((*key)->type == SEL_ARG::MAYBE_KEY ||
           (*key)->maybe_flag)
         param->needed_reg->set_bit(keynr);
 
-      bool read_index_only= index_read_must_be_used ? TRUE :
-                            (bool) param->table->covering_keys.is_set(keynr);
+      bool read_index_only= index_read_must_be_used || 
+                            param->table->covering_keys.is_set(keynr);
+
+      found_records= check_quick_select(param, idx, read_index_only, *key,
+                                        update_tbl_stats, &mrr_flags,
+                                        &buf_size, &cost);
 
-      found_records= check_quick_select(param, idx, *key, update_tbl_stats);
-      if (param->is_ror_scan)
+      if ((found_records != HA_POS_ERROR) && param->is_ror_scan)
       {
         tree->n_ror_scans++;
         tree->ror_scans_map.set_bit(idx);
       }
-      double cpu_cost= (double) found_records / TIME_FOR_COMPARE;
-      if (found_records != HA_POS_ERROR && found_records > 2 &&
-          read_index_only &&
-          (param->table->file->index_flags(keynr, param->max_key_part,1) &
-           HA_KEYREAD_ONLY) &&
-          !(pk_is_clustered && keynr == param->table->s->primary_key))
-      {
-        /*
-          We can resolve this by only reading through this key. 
-          0.01 is added to avoid races between range and 'index' scan.
-        */
-        found_read_time= get_index_only_read_time(param,found_records,keynr) +
-                         cpu_cost + 0.01;
-      }
-      else
-      {
-        /*
-          cost(read_through_index) = cost(disk_io) + cost(row_in_range_checks)
-          The row_in_range check is in QUICK_RANGE_SELECT::cmp_next function.
-        */
-	found_read_time= param->table->file->read_time(keynr,
-                                                       param->range_count,
-                                                       found_records) +
-			 cpu_cost + 0.01;
-      }
-      DBUG_PRINT("info",("key %s: found_read_time: %g (cur. read_time: %g)",
-                         param->table->key_info[keynr].name, found_read_time,
-                         read_time));
-
-      if (read_time > found_read_time && found_records != HA_POS_ERROR)
+      if (found_records != HA_POS_ERROR &&
+          read_time > (found_read_time= cost.total_cost()))
       {
         read_time=    found_read_time;
         best_records= found_records;
         key_to_read=  key;
+        best_mrr_flags= mrr_flags;
+        best_buf_size=  buf_size;
       }
-
     }
   }
 
@@ -5006,11 +5017,13 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
   if (key_to_read)
   {
     idx= key_to_read - tree->keys;
-    if ((read_plan= new (param->mem_root) TRP_RANGE(*key_to_read, idx)))
+    if ((read_plan= new (param->mem_root) TRP_RANGE(*key_to_read, idx,
+                                                    best_mrr_flags)))
     {
       read_plan->records= best_records;
       read_plan->is_ror= tree->ror_scans_map.is_set(idx);
       read_plan->read_cost= read_time;
+      read_plan->mrr_buf_size= best_buf_size;
       DBUG_PRINT("info",
                  ("Returning range plan for key %s, cost %g, records %lu",
                   param->table->key_info[param->real_keynr[idx]].name,
@@ -5073,7 +5086,9 @@ QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param,
     for (; first_scan != last_scan;++first_scan)
     {
       if (!(quick= get_quick_select(param, (*first_scan)->idx,
-                                    (*first_scan)->sel_arg, alloc)) ||
+                                    (*first_scan)->sel_arg,
+                                    HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED,
+                                    0, alloc)) ||
           quick_intrsect->push_quick_back(quick))
       {
         delete quick_intrsect;
@@ -5083,7 +5098,9 @@ QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param,
     if (cpk_scan)
     {
       if (!(quick= get_quick_select(param, cpk_scan->idx,
-                                    cpk_scan->sel_arg, alloc)))
+                                    cpk_scan->sel_arg,
+                                    HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED,
+                                    0, alloc)))
       {
         delete quick_intrsect;
         DBUG_RETURN(NULL);
@@ -5563,7 +5580,7 @@ static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond)
     DBUG_RETURN(tree);
   }
   /* Here when simple cond */
-  if (cond->const_item())
+  if (cond->const_item() && !cond->is_expensive())
   {
     /*
       During the cond->val_int() evaluation we can come across a subselect 
@@ -7487,329 +7504,126 @@ void SEL_ARG::test_use_count(SEL_ARG *root)
     sql_print_warning("Wrong use count: %u (should be %u) for tree at 0x%lx",
                       e_count, elements, (long unsigned int) this);
 }
-
 #endif
 
 /*
-  Calculate estimate of number records that will be retrieved by a range
-  scan on given index using given SEL_ARG intervals tree.
+  Calculate cost and E(#rows) for a given index and intervals tree 
+
   SYNOPSIS
-    check_quick_select
-      param  Parameter from test_quick_select
-      idx               Number of index to use in tree->keys
-      tree              Transformed selection condition, tree->keys[idx]
-                        holds the range tree to be used for scanning.
-      update_tbl_stats  If true, update table->quick_keys with information
+    check_quick_select()
+      param             Parameter from test_quick_select
+      idx               Number of index to use in PARAM::key SEL_TREE::key
+      index_only        TRUE  - assume only index tuples will be accessed
+                        FALSE - assume full table rows will be read
+      tree              Transformed selection condition, tree->key[idx] holds
+                        the intervals for the given index.
+      update_tbl_stats  TRUE <=> update table->quick_* with information
                         about range scan we've evaluated.
+      mrr_flags   INOUT MRR access flags
+      cost        OUT   Scan cost
 
   NOTES
     param->is_ror_scan is set to reflect if the key scan is a ROR (see
     is_key_scan_ror function for more info)
     param->table->quick_*, param->range_count (and maybe others) are
-    updated with data of given key scan, see check_quick_keys for details.
+    updated with data of given key scan, see quick_range_seq_next for details.
 
   RETURN
     Estimate # of records to be retrieved.
     HA_POS_ERROR if estimate calculation failed due to table handler problems.
-
 */
 
-static ha_rows
-check_quick_select(PARAM *param,uint idx,SEL_ARG *tree, bool update_tbl_stats)
-{
-  ha_rows records;
-  bool    cpk_scan;
-  uint key;
+static
+ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
+                           SEL_ARG *tree, bool update_tbl_stats, 
+                           uint *mrr_flags, uint *bufsize, COST_VECT *cost)
+{
+  SEL_ARG_RANGE_SEQ seq;
+  RANGE_SEQ_IF seq_if = {sel_arg_range_seq_init, sel_arg_range_seq_next, 0, 0};
+  handler *file= param->table->file;
+  ha_rows rows;
+  uint keynr= param->real_keynr[idx];
   DBUG_ENTER("check_quick_select");
+  
+  /* Handle cases when we don't have a valid non-empty list of range */
+  if (!tree)
+    DBUG_RETURN(HA_POS_ERROR);
+  if (tree->type == SEL_ARG::IMPOSSIBLE)
+    DBUG_RETURN(0L);
+  if (tree->type != SEL_ARG::KEY_RANGE || tree->part != 0)
+    DBUG_RETURN(HA_POS_ERROR);
 
-  param->is_ror_scan= FALSE;
-  param->first_null_comp= 0;
+  seq.keyno= idx;
+  seq.real_keyno= keynr;
+  seq.param= param;
+  seq.start= tree;
 
-  if (!tree)
-    DBUG_RETURN(HA_POS_ERROR);			// Can't use it
-  param->max_key_part=0;
   param->range_count=0;
-  key= param->real_keynr[idx];
+  param->max_key_part=0;
 
-  if (tree->type == SEL_ARG::IMPOSSIBLE)
-    DBUG_RETURN(0L);				// Impossible select. return
-  if (tree->type != SEL_ARG::KEY_RANGE || tree->part != 0)
-    DBUG_RETURN(HA_POS_ERROR);				// Don't use tree
+  param->is_ror_scan= TRUE;
+  if (file->index_flags(keynr, 0, TRUE) & HA_KEY_SCAN_NOT_ROR)
+    param->is_ror_scan= FALSE;
+  
+  *mrr_flags= param->force_default_mrr? HA_MRR_USE_DEFAULT_IMPL: 0;
+  /*
+    Pass HA_MRR_SORTED to see if MRR implementation can handle sorting.
+  */
+  *mrr_flags|= HA_MRR_NO_ASSOCIATION | HA_MRR_SORTED;
 
-  enum ha_key_alg key_alg= param->table->key_info[key].algorithm;
-  if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF))
-  {
-    /* Records are not ordered by rowid for other types of indexes. */
-    cpk_scan= FALSE;
-  }
-  else
-  {
-    /*
-      Clustered PK scan is a special case, check_quick_keys doesn't recognize
-      CPK scans as ROR scans (while actually any CPK scan is a ROR scan).
-    */
-    cpk_scan= ((param->table->s->primary_key == param->real_keynr[idx]) &&
-               param->table->file->primary_key_is_clustered());
-    param->is_ror_scan= !cpk_scan;
-  }
-  param->n_ranges= 0;
+  bool pk_is_clustered= file->primary_key_is_clustered();
+  if (index_only && 
+      (file->index_flags(keynr, param->max_key_part, 1) & HA_KEYREAD_ONLY) &&
+      !(pk_is_clustered && keynr == param->table->s->primary_key))
+     *mrr_flags |= HA_MRR_INDEX_ONLY;
+  
+  if (current_thd->lex->sql_command != SQLCOM_SELECT)
+    *mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
 
-  records= check_quick_keys(param, idx, tree,
-                            param->min_key, 0, -1,
-                            param->max_key, 0, -1);
-  if (records != HA_POS_ERROR)
+  *bufsize= param->thd->variables.mrr_buff_size;
+  rows= file->multi_range_read_info_const(keynr, &seq_if, (void*)&seq, 0,
+                                          bufsize, mrr_flags, cost);
+  if (rows != HA_POS_ERROR)
   {
+    param->table->quick_rows[keynr]=rows;
     if (update_tbl_stats)
     {
-      param->table->quick_keys.set_bit(key);
-      param->table->quick_key_parts[key]=param->max_key_part+1;
-      param->table->quick_n_ranges[key]= param->n_ranges;
+      param->table->quick_keys.set_bit(keynr);
+      param->table->quick_key_parts[keynr]=param->max_key_part+1;
+      param->table->quick_n_ranges[keynr]= param->range_count;
       param->table->quick_condition_rows=
-        min(param->table->quick_condition_rows, records);
+        min(param->table->quick_condition_rows, rows);
     }
-    /*
-      Need to save quick_rows in any case as it is used when calculating
-      cost of ROR intersection:
-    */
-    param->table->quick_rows[key]=records;
-    if (cpk_scan)
-      param->is_ror_scan= TRUE;
   }
-  if (param->table->file->index_flags(key, 0, TRUE) & HA_KEY_SCAN_NOT_ROR)
-    param->is_ror_scan= FALSE;
-  DBUG_PRINT("exit", ("Records: %lu", (ulong) records));
-  DBUG_RETURN(records);
-}
-
-
-/*
-  Recursively calculate estimate of # rows that will be retrieved by
-  key scan on key idx.
-  SYNOPSIS
-    check_quick_keys()
-      param         Parameter from test_quick select function.
-      idx           Number of key to use in PARAM::keys in list of used keys
-                    (param->real_keynr[idx] holds the key number in table)
-      key_tree      SEL_ARG tree being examined.
-      min_key       Buffer with partial min key value tuple
-      min_key_flag
-      max_key       Buffer with partial max key value tuple
-      max_key_flag
-
-  NOTES
-    The function does the recursive descent on the tree via SEL_ARG::left,
-    SEL_ARG::right, and SEL_ARG::next_key_part edges. The #rows estimates
-    are calculated using records_in_range calls at the leaf nodes and then
-    summed.
-
-    param->min_key and param->max_key are used to hold prefixes of key value
-    tuples.
-
-    The side effects are:
-
-    param->max_key_part is updated to hold the maximum number of key parts used
-      in scan minus 1.
-
-    param->range_count is incremented if the function finds a range that
-      wasn't counted by the caller.
-
-    param->is_ror_scan is cleared if the function detects that the key scan is
-      not a Rowid-Ordered Retrieval scan ( see comments for is_key_scan_ror
-      function for description of which key scans are ROR scans)
-
-  RETURN
-    #records      E(#records) for given subtree
-    HA_POS_ERROR  if subtree cannot be used for record retrieval
-
-*/
-
-static ha_rows
-check_quick_keys(PARAM *param, uint idx, SEL_ARG *key_tree,
-		 uchar *min_key, uint min_key_flag, int min_keypart,
-                 uchar *max_key, uint max_key_flag, int max_keypart)
-{
-  ha_rows records=0, tmp;
-  uint tmp_min_flag, tmp_max_flag, keynr, min_key_length, max_key_length;
-  uint tmp_min_keypart= min_keypart, tmp_max_keypart= max_keypart;
-  uchar *tmp_min_key, *tmp_max_key;
-  uint8 save_first_null_comp= param->first_null_comp;
-
-  param->max_key_part=max(param->max_key_part,key_tree->part);
-  if (key_tree->left != &null_element)
+  /* Figure out if the key scan is ROR (returns rows in ROWID order) or not */
+  enum ha_key_alg key_alg= param->table->key_info[seq.real_keyno].algorithm;
+  if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF))
   {
-    /*
-      There are at least two intervals for current key part, i.e. condition
-      was converted to something like
-        (keyXpartY less/equals c1) OR (keyXpartY more/equals c2).
-      This is not a ROR scan if the key is not Clustered Primary Key.
+    /* 
+      All scans are non-ROR scans for those index types.
+      TODO: Don't have this logic here, make table engines return 
+      appropriate flags instead.
     */
     param->is_ror_scan= FALSE;
-    records=check_quick_keys(param, idx, key_tree->left,
-                             min_key, min_key_flag, min_keypart,
-			     max_key, max_key_flag, max_keypart);
-    if (records == HA_POS_ERROR)			// Impossible
-      return records;
-  }
-
-  tmp_min_key= min_key;
-  tmp_max_key= max_key;
-  tmp_min_keypart+= key_tree->store_min(param->key[idx][key_tree->part].store_length,
-                                        &tmp_min_key, min_key_flag);
-  tmp_max_keypart+= key_tree->store_max(param->key[idx][key_tree->part].store_length,
-                                        &tmp_max_key, max_key_flag);
-  min_key_length= (uint) (tmp_min_key - param->min_key);
-  max_key_length= (uint) (tmp_max_key - param->max_key);
-
-  if (param->is_ror_scan)
-  {
-    /*
-      If the index doesn't cover entire key, mark the scan as non-ROR scan.
-      Actually we're cutting off some ROR scans here.
-    */
-    uint16 fieldnr= param->table->key_info[param->real_keynr[idx]].
-                    key_part[key_tree->part].fieldnr - 1;
-    if (param->table->field[fieldnr]->key_length() !=
-        param->key[idx][key_tree->part].length)
-      param->is_ror_scan= FALSE;
-  }
-
-  if (!param->first_null_comp && key_tree->is_null_interval())
-    param->first_null_comp= key_tree->part+1;
-
-  if (key_tree->next_key_part &&
-      key_tree->next_key_part->type == SEL_ARG::KEY_RANGE &&
-      key_tree->next_key_part->part == key_tree->part+1)
-  {						// const key as prefix
-    if (min_key_length == max_key_length &&
-	!memcmp(min_key, max_key, (uint) (tmp_max_key - max_key)) &&
-	!key_tree->min_flag && !key_tree->max_flag)
-    {
-      tmp=check_quick_keys(param,idx,key_tree->next_key_part, tmp_min_key,
-                           min_key_flag | key_tree->min_flag, tmp_min_keypart,
-                           tmp_max_key, max_key_flag | key_tree->max_flag,
-                           tmp_max_keypart);
-      goto end;					// Ugly, but efficient
-    }
-    else
-    {
-      /* The interval for current key part is not c1 <= keyXpartY <= c1 */
-      param->is_ror_scan= FALSE;
-    }
-
-    tmp_min_flag=key_tree->min_flag;
-    tmp_max_flag=key_tree->max_flag;
-    if (!tmp_min_flag)
-      tmp_min_keypart+=
-      key_tree->next_key_part->store_min_key(param->key[idx],
-                                             &tmp_min_key,
-                                             &tmp_min_flag,
-                                             MAX_KEY);
-    if (!tmp_max_flag)
-      tmp_max_keypart+=
-      key_tree->next_key_part->store_max_key(param->key[idx],
-                                             &tmp_max_key,
-                                             &tmp_max_flag,
-                                             MAX_KEY);
-    min_key_length= (uint) (tmp_min_key - param->min_key);
-    max_key_length= (uint) (tmp_max_key - param->max_key);
   }
-  else
+  else if (param->table->s->primary_key == keynr && pk_is_clustered)
   {
-    tmp_min_flag= min_key_flag | key_tree->min_flag;
-    tmp_max_flag= max_key_flag | key_tree->max_flag;
-  }
-
-  if (unlikely(param->thd->killed != 0))
-    return HA_POS_ERROR;
-
-  keynr=param->real_keynr[idx];
-  param->range_count++;
-  if (!tmp_min_flag && ! tmp_max_flag &&
-      (uint) key_tree->part+1 == param->table->key_info[keynr].key_parts &&
-      param->table->key_info[keynr].flags & HA_NOSAME &&
-      min_key_length == max_key_length &&
-      !memcmp(param->min_key, param->max_key, min_key_length) &&
-      !param->first_null_comp)
-  {
-    tmp=1;					// Max one record
-    param->n_ranges++;
-  }
-  else
-  {
-    if (param->is_ror_scan)
-    {
-      /*
-        If we get here, the condition on the key was converted to form
-        "(keyXpart1 = c1) AND ... AND (keyXpart{key_tree->part - 1} = cN) AND
-          somecond(keyXpart{key_tree->part})"
-        Check if
-          somecond is "keyXpart{key_tree->part} = const" and
-          uncovered "tail" of KeyX parts is either empty or is identical to
-          first members of clustered primary key.
-      */
-      if (!(min_key_length == max_key_length &&
-            !memcmp(min_key, max_key, (uint) (tmp_max_key - max_key)) &&
-            !key_tree->min_flag && !key_tree->max_flag &&
-            is_key_scan_ror(param, keynr, key_tree->part + 1)))
-        param->is_ror_scan= FALSE;
-    }
-    param->n_ranges++;
-
-    if (tmp_min_flag & GEOM_FLAG)
-    {
-      key_range min_range;
-      min_range.key=    param->min_key;
-      min_range.length= min_key_length;
-      min_range.keypart_map= make_keypart_map(tmp_min_keypart);
-      /* In this case tmp_min_flag contains the handler-read-function */
-      min_range.flag=   (ha_rkey_function) (tmp_min_flag ^ GEOM_FLAG);
-
-      tmp= param->table->file->records_in_range(keynr,
-                                                &min_range, (key_range*) 0);
-    }
-    else
-    {
-      key_range min_range, max_range;
-
-      min_range.key=    param->min_key;
-      min_range.length= min_key_length;
-      min_range.flag=   (tmp_min_flag & NEAR_MIN ? HA_READ_AFTER_KEY :
-                         HA_READ_KEY_EXACT);
-      min_range.keypart_map= make_keypart_map(tmp_min_keypart);
-      max_range.key=    param->max_key;
-      max_range.length= max_key_length;
-      max_range.flag=   (tmp_max_flag & NEAR_MAX ?
-                         HA_READ_BEFORE_KEY : HA_READ_AFTER_KEY);
-      max_range.keypart_map= make_keypart_map(tmp_max_keypart);
-      tmp=param->table->file->records_in_range(keynr,
-                                               (min_key_length ? &min_range :
-                                                (key_range*) 0),
-                                               (max_key_length ? &max_range :
-                                                (key_range*) 0));
-    }
+    /* Clustered PK scan is always a ROR scan (TODO: same as above) */
+    param->is_ror_scan= TRUE;
   }
- end:
-  if (tmp == HA_POS_ERROR)			// Impossible range
-    return tmp;
-  records+=tmp;
-  if (key_tree->right != &null_element)
+  else if (param->range_count > 1)
   {
-    /*
-      There are at least two intervals for current key part, i.e. condition
-      was converted to something like
-        (keyXpartY less/equals c1) OR (keyXpartY more/equals c2).
-      This is not a ROR scan if the key is not Clustered Primary Key.
+    /* 
+      Scaning multiple key values in the index: the records are ROR
+      for each value, but not between values. E.g, "SELECT ... x IN
+      (1,3)" returns ROR order for all records with x=1, then ROR
+      order for records with x=3
     */
     param->is_ror_scan= FALSE;
-    tmp=check_quick_keys(param, idx, key_tree->right,
-                         min_key, min_key_flag, min_keypart,
-                         max_key, max_key_flag, max_keypart);
-    if (tmp == HA_POS_ERROR)
-      return tmp;
-    records+=tmp;
   }
-  param->first_null_comp= save_first_null_comp;
-  return records;
+
+  DBUG_PRINT("exit", ("Records: %lu", (ulong) rows));
+  DBUG_RETURN(rows); //psergey-merge:todo: maintain first_null_comp.
 }
 
 
@@ -7837,13 +7651,14 @@ check_quick_keys(PARAM *param, uint idx, SEL_ARG *key_tree,
     where the index is defined on (key1_1, ..., key1_N [,a_1, ..., a_n])
 
     and the table has a clustered Primary Key defined as 
-
       PRIMARY KEY(a_1, ..., a_n, b1, ..., b_k) 
     
     i.e. the first key parts of it are identical to uncovered parts ot the 
     key being scanned. This function assumes that the index flags do not
     include HA_KEY_SCAN_NOT_ROR flag (that is checked elsewhere).
 
+    Check (1) is made in quick_range_seq_next()
+
   RETURN
     TRUE   The scan is ROR-scan
     FALSE  Otherwise
@@ -7856,9 +7671,19 @@ static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts)
   KEY_PART_INFO *key_part_end= (table_key->key_part +
                                 table_key->key_parts);
   uint pk_number;
+  
+  for (KEY_PART_INFO *kp= table_key->key_part; kp < key_part; kp++)
+  {
+    uint16 fieldnr= param->table->key_info[keynr].
+                    key_part[kp - table_key->key_part].fieldnr - 1;
+    if (param->table->field[fieldnr]->key_length() != kp->length)
+      return FALSE;
+  }
 
   if (key_part == key_part_end)
     return TRUE;
+
+  key_part= table_key->key_part + nparts;
   pk_number= param->table->s->primary_key;
   if (!param->table->file->primary_key_is_clustered() || pk_number == MAX_KEY)
     return FALSE;
@@ -7883,12 +7708,14 @@ static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts)
   SYNOPSIS
     get_quick_select()
       param
-      idx          Index of used key in param->key.
-      key_tree     SEL_ARG tree for the used key
-      parent_alloc If not NULL, use it to allocate memory for
-                   quick select data. Otherwise use quick->alloc.
+      idx            Index of used key in param->key.
+      key_tree       SEL_ARG tree for the used key
+      mrr_flags      MRR parameter for quick select
+      mrr_buf_size   MRR parameter for quick select
+      parent_alloc   If not NULL, use it to allocate memory for
+                     quick select data. Otherwise use quick->alloc.
   NOTES
-    The caller must call QUICK_SELECT::init for returned quick select
+    The caller must call QUICK_SELECT::init for returned quick select.
 
     CAUTION! This function may change thd->mem_root to a MEM_ROOT which will be
     deallocated when the returned quick select is deleted.
@@ -7899,25 +7726,26 @@ static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts)
 */
 
 QUICK_RANGE_SELECT *
-get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree,
-                 MEM_ROOT *parent_alloc)
+get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree, uint mrr_flags,
+                 uint mrr_buf_size, MEM_ROOT *parent_alloc)
 {
   QUICK_RANGE_SELECT *quick;
+  bool create_err= FALSE;
   DBUG_ENTER("get_quick_select");
 
   if (param->table->key_info[param->real_keynr[idx]].flags & HA_SPATIAL)
     quick=new QUICK_RANGE_SELECT_GEOM(param->thd, param->table,
                                       param->real_keynr[idx],
                                       test(parent_alloc),
-                                      parent_alloc);
+                                      parent_alloc, &create_err);
   else
     quick=new QUICK_RANGE_SELECT(param->thd, param->table,
                                  param->real_keynr[idx],
-                                 test(parent_alloc));
+                                 test(parent_alloc), NULL, &create_err);
 
   if (quick)
   {
-    if (quick->error ||
+    if (create_err ||
 	get_quick_keys(param,quick,param->key[idx],key_tree,param->min_key,0,
 		       param->max_key,0))
     {
@@ -7926,6 +7754,8 @@ get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree,
     }
     else
     {
+      quick->mrr_flags= mrr_flags;
+      quick->mrr_buf_size= mrr_buf_size;
       quick->key_parts=(KEY_PART*)
         memdup_root(parent_alloc? parent_alloc : &quick->alloc,
                     (char*) param->key[idx],
@@ -8076,7 +7906,20 @@ bool QUICK_RANGE_SELECT::unique_key_range()
 }
 
 
-/* Returns TRUE if any part of the key is NULL */
+
+/*
+  Return TRUE if any part of the key is NULL
+
+  SYNOPSIS
+    null_part_in_key()    
+      key_part  Array of key parts (index description)
+      key       Key values tuple
+      length    Length of key values tuple in bytes.
+
+  RETURN
+    TRUE   The tuple has at least one "keypartX is NULL"
+    FALSE  Otherwise
+*/
 
 static bool null_part_in_key(KEY_PART *key_part, const uchar *key, uint length)
 {
@@ -8133,6 +7976,19 @@ bool QUICK_ROR_UNION_SELECT::is_keys_used(const MY_BITMAP *fields)
 }
 
 
+FT_SELECT *get_ft_select(THD *thd, TABLE *table, uint key)
+{
+  bool create_err= FALSE;
+  FT_SELECT *fts= new FT_SELECT(thd, table, key, &create_err);
+  if (create_err)
+  {
+    delete fts;
+    return NULL;
+  }
+  else
+    return fts;
+}
+
 /*
   Create quick select from ref/ref_or_null scan.
 
@@ -8161,10 +8017,12 @@ QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
   KEY_PART *key_part;
   QUICK_RANGE *range;
   uint part;
+  bool create_err= FALSE;
+  COST_VECT cost;
 
   old_root= thd->mem_root;
   /* The following call may change thd->mem_root */
-  quick= new QUICK_RANGE_SELECT(thd, table, ref->key, 0);
+  quick= new QUICK_RANGE_SELECT(thd, table, ref->key, 0, 0, &create_err);
   /* save mem_root set by QUICK_RANGE_SELECT constructor */
   alloc= thd->mem_root;
   /*
@@ -8173,7 +8031,7 @@ QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
   */
   thd->mem_root= old_root;
 
-  if (!quick)
+  if (!quick || create_err)
     return 0;			/* no ranges found */
   if (quick->init())
     goto err;
@@ -8227,8 +8085,24 @@ QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
       goto err;
   }
 
-  return quick;
+  /* Call multi_range_read_info() to get the MRR flags and buffer size */
+  quick->mrr_flags= HA_MRR_NO_ASSOCIATION | 
+                    (table->key_read ? HA_MRR_INDEX_ONLY : 0);
+  if (thd->lex->sql_command != SQLCOM_SELECT)
+    quick->mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
+#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE
+  if (!ref->null_ref_key && !key_has_nulls(key_info, range->min_key,
+                                           ref->key_length))
+    quick->mrr_flags |= HA_MRR_NO_NULL_ENDPOINTS;
+#endif
+
+  quick->mrr_buf_size= thd->variables.mrr_buff_size;
+  if (table->file->multi_range_read_info(quick->index, 1, (uint)records,
+                                         &quick->mrr_buf_size,
+                                         &quick->mrr_flags, &cost))
+    goto err;
 
+  return quick;
 err:
   delete quick;
   return 0;
@@ -8261,7 +8135,10 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
   DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::read_keys_and_merge");
 
   /* We're going to just read rowids. */
-  head->set_keyread(TRUE);
+  if (!head->key_read)
+  {
+    head->enable_keyread();
+  }
   head->prepare_for_position();
 
   cur_quick_it.rewind();
@@ -8273,7 +8150,7 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
     reset here.
   */
   if (cur_quick->init() || cur_quick->reset())
-    DBUG_RETURN(1);
+    goto err;
 
   if (unique == NULL)
   {
@@ -8284,6 +8161,8 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
     unique= new Unique(refpos_order_cmp, (void *)file,
                        file->ref_length,
                        thd->variables.sortbuff_size);
+    if (!unique)
+      goto err;
   }
   else
     unique->reset();
@@ -8291,8 +8170,6 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
   DBUG_ASSERT(file->ref_length == unique->get_size());
   DBUG_ASSERT(thd->variables.sortbuff_size == unique->get_max_in_memory_size());
 
-  if (!unique)
-    DBUG_RETURN(1);
   for (;;)
   {
     while ((result= cur_quick->get_next()) == HA_ERR_END_OF_FILE)
@@ -8305,7 +8182,7 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
       if (cur_quick->file->inited != handler::NONE) 
         cur_quick->file->ha_index_end();
       if (cur_quick->init() || cur_quick->reset())
-        DBUG_RETURN(1);
+        goto err;
     }
 
     if (result)
@@ -8313,22 +8190,21 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
       if (result != HA_ERR_END_OF_FILE)
       {
         cur_quick->range_end();
-        DBUG_RETURN(result);
+        goto err;
       }
       break;
     }
 
     if (thd->killed)
-      DBUG_RETURN(1);
+      goto err;
 
     /* skip row if it will be retrieved by clustered PK scan */
     if (pk_quick_select && pk_quick_select->row_in_ranges())
       continue;
 
     cur_quick->file->position(cur_quick->record);
-    result= unique->unique_add((char*)cur_quick->file->ref);
-    if (result)
-      DBUG_RETURN(1);
+    if (unique->unique_add((char*)cur_quick->file->ref))
+      goto err;
   }
 
   /*
@@ -8338,10 +8214,17 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
   */
   result= unique->get(head);
   doing_pk_scan= FALSE;
-  /* index_merge currently doesn't support "using index" at all */
-  head->set_keyread(FALSE);
-  init_read_record(&read_record, thd, head, (SQL_SELECT*) 0, 1 , 1, TRUE);
+  /*
+    index_merge currently doesn't support "using index" at all
+  */
+  head->disable_keyread();
+  if (init_read_record(&read_record, thd, head, (SQL_SELECT*) 0, 1 , 1, TRUE))
+    result= 1;
   DBUG_RETURN(result);
+
+err:
+  head->disable_keyread();
+  DBUG_RETURN(1);
 }
 
 
@@ -8468,7 +8351,7 @@ int QUICK_ROR_INTERSECT_SELECT::get_next()
 
     /* We get here if we got the same row ref in all scans. */
     if (need_to_fetch_row)
-      error= head->file->rnd_pos(head->record[0], last_rowid);
+      error= head->file->ha_rnd_pos(head->record[0], last_rowid);
   } while (error == HA_ERR_RECORD_DELETED);
   DBUG_RETURN(error);
 }
@@ -8512,12 +8395,12 @@ int QUICK_ROR_UNION_SELECT::get_next()
       {
         if (error != HA_ERR_END_OF_FILE)
           DBUG_RETURN(error);
-        queue_remove(&queue, 0);
+        queue_remove_top(&queue);
       }
       else
       {
         quick->save_last_pos();
-        queue_replaced(&queue);
+        queue_replace_top(&queue);
       }
 
       if (!have_prev_rowid)
@@ -8534,7 +8417,7 @@ int QUICK_ROR_UNION_SELECT::get_next()
     cur_rowid= prev_rowid;
     prev_rowid= tmp;
 
-    error= head->file->rnd_pos(quick->record, prev_rowid);
+    error= head->file->ha_rnd_pos(quick->record, prev_rowid);
   } while (error == HA_ERR_RECORD_DELETED);
   DBUG_RETURN(error);
 }
@@ -8542,12 +8425,12 @@ int QUICK_ROR_UNION_SELECT::get_next()
 
 int QUICK_RANGE_SELECT::reset()
 {
-  uint  mrange_bufsiz;
+  uint  buf_size;
   uchar *mrange_buff;
+  int   error;
+  HANDLER_BUFFER empty_buf;
   DBUG_ENTER("QUICK_RANGE_SELECT::reset");
-  next=0;
   last_range= NULL;
-  in_range= FALSE;
   cur_range= (QUICK_RANGE**) ranges.buffer;
 
   if (file->inited == handler::NONE)
@@ -8558,69 +8441,43 @@ int QUICK_RANGE_SELECT::reset()
         DBUG_RETURN(error);
   }
 
-  /* Do not allocate the buffers twice. */
-  if (multi_range_length)
-  {
-    DBUG_ASSERT(multi_range_length == min(multi_range_count, ranges.elements));
-    DBUG_RETURN(0);
-  }
-
-  /* Allocate the ranges array. */
-  DBUG_ASSERT(ranges.elements);
-  multi_range_length= min(multi_range_count, ranges.elements);
-  DBUG_ASSERT(multi_range_length > 0);
-  while (multi_range_length && ! (multi_range= (KEY_MULTI_RANGE*)
-                                  my_malloc(multi_range_length *
-                                            sizeof(KEY_MULTI_RANGE),
-                                            MYF(MY_WME))))
-  {
-    /* Try to shrink the buffers until it is 0. */
-    multi_range_length/= 2;
-  }
-  if (! multi_range)
+  /* Allocate buffer if we need one but haven't allocated it yet */
+  if (mrr_buf_size && !mrr_buf_desc)
   {
-    multi_range_length= 0;
-    DBUG_RETURN(HA_ERR_OUT_OF_MEM);
-  }
-
-  /* Allocate the handler buffer if necessary.  */
-  if (file->ha_table_flags() & HA_NEED_READ_RANGE_BUFFER)
-  {
-    mrange_bufsiz= min(multi_range_bufsiz,
-                       ((uint)QUICK_SELECT_I::records + 1)* head->s->reclength);
-
-    while (mrange_bufsiz &&
-           ! my_multi_malloc(MYF(MY_WME),
-                             &multi_range_buff,
-                             (uint) sizeof(*multi_range_buff),
-                             &mrange_buff, (uint) mrange_bufsiz,
-                             NullS))
+    buf_size= mrr_buf_size;
+    while (buf_size && !my_multi_malloc(MYF(MY_WME),
+                                        &mrr_buf_desc, sizeof(*mrr_buf_desc),
+                                        &mrange_buff, buf_size,
+                                        NullS))
     {
       /* Try to shrink the buffers until both are 0. */
-      mrange_bufsiz/= 2;
+      buf_size/= 2;
     }
-    if (! multi_range_buff)
-    {
-      my_free(multi_range);
-      multi_range= NULL;
-      multi_range_length= 0;
+    if (!mrr_buf_desc)
       DBUG_RETURN(HA_ERR_OUT_OF_MEM);
-    }
 
     /* Initialize the handler buffer. */
-    multi_range_buff->buffer= mrange_buff;
-    multi_range_buff->buffer_end= mrange_buff + mrange_bufsiz;
-    multi_range_buff->end_of_used_area= mrange_buff;
-#ifdef HAVE_purify
+    mrr_buf_desc->buffer= mrange_buff;
+    mrr_buf_desc->buffer_end= mrange_buff + buf_size;
+    mrr_buf_desc->end_of_used_area= mrange_buff;
+#ifdef HAVE_valgrind
     /*
       We need this until ndb will use the buffer efficiently
       (Now ndb stores  complete row in here, instead of only the used fields
       which gives us valgrind warnings in compare_record[])
     */
-    bzero((char*) mrange_buff, mrange_bufsiz);
+    bzero((char*) mrange_buff, buf_size);
 #endif
   }
-  DBUG_RETURN(0);
+
+  if (!mrr_buf_desc)
+    empty_buf.buffer= empty_buf.buffer_end= empty_buf.end_of_used_area= NULL;
+ 
+  RANGE_SEQ_IF seq_funcs= {quick_range_seq_init, quick_range_seq_next, 0, 0};
+  error= file->multi_range_read_init(&seq_funcs, (void*)this, ranges.elements,
+                                     mrr_flags, mrr_buf_desc? mrr_buf_desc: 
+                                                              &empty_buf);
+  DBUG_RETURN(error);
 }
 
 
@@ -8641,13 +8498,8 @@ int QUICK_RANGE_SELECT::reset()
 
 int QUICK_RANGE_SELECT::get_next()
 {
-  int             result;
-  KEY_MULTI_RANGE *mrange;
+  char *dummy;
   DBUG_ENTER("QUICK_RANGE_SELECT::get_next");
-  DBUG_ASSERT(multi_range_length && multi_range &&
-              (cur_range >= (QUICK_RANGE**) ranges.buffer) &&
-              (cur_range <= (QUICK_RANGE**) ranges.buffer + ranges.elements));
-
   if (in_ror_merged_scan)
   {
     /*
@@ -8657,46 +8509,8 @@ int QUICK_RANGE_SELECT::get_next()
     head->column_bitmaps_set_no_signal(&column_bitmap, &column_bitmap);
   }
 
-  for (;;)
-  {
-    if (in_range)
-    {
-      /* We did already start to read this key. */
-      result= file->read_multi_range_next(&mrange);
-      if (result != HA_ERR_END_OF_FILE)
-        goto end;
-    }
-
-    uint count= min(multi_range_length, ranges.elements -
-                    (cur_range - (QUICK_RANGE**) ranges.buffer));
-    if (count == 0)
-    {
-      /* Ranges have already been used up before. None is left for read. */
-      in_range= FALSE;
-      if (in_ror_merged_scan)
-        head->column_bitmaps_set_no_signal(save_read_set, save_write_set);
-      DBUG_RETURN(HA_ERR_END_OF_FILE);
-    }
-    KEY_MULTI_RANGE *mrange_slot, *mrange_end;
-    for (mrange_slot= multi_range, mrange_end= mrange_slot+count;
-         mrange_slot < mrange_end;
-         mrange_slot++)
-    {
-      last_range= *(cur_range++);
-      last_range->make_min_endpoint(&mrange_slot->start_key);
-      last_range->make_max_endpoint(&mrange_slot->end_key);
-      mrange_slot->range_flag= last_range->flag;
-    }
-
-    result= file->read_multi_range_first(&mrange, multi_range, count,
-                                         sorted, multi_range_buff);
-    if (result != HA_ERR_END_OF_FILE)
-      goto end;
-    in_range= FALSE; /* No matching rows; go to next set of ranges. */
-  }
+  int result= file->multi_range_read_next(&dummy);
 
-end:
-  in_range= ! result;
   if (in_ror_merged_scan)
   {
     /* Restore bitmaps set on entry */
@@ -8705,6 +8519,7 @@ end:
   DBUG_RETURN(result);
 }
 
+
 /*
   Get the next record with a different prefix.
 
@@ -8745,8 +8560,8 @@ int QUICK_RANGE_SELECT::get_next_prefix(uint prefix_length,
     {
       /* Read the next record in the same range with prefix after cur_prefix. */
       DBUG_ASSERT(cur_prefix != NULL);
-      result= file->index_read_map(record, cur_prefix, keypart_map,
-                                   HA_READ_AFTER_KEY);
+      result= file->ha_index_read_map(record, cur_prefix, keypart_map,
+                                      HA_READ_AFTER_KEY);
       if (result || last_range->max_keypart_map == 0)
         DBUG_RETURN(result);
 
@@ -8795,8 +8610,8 @@ int QUICK_RANGE_SELECT_GEOM::get_next()
     if (last_range)
     {
       // Already read through key
-      result= file->index_next_same(record, last_range->min_key,
-				    last_range->min_length);
+      result= file->ha_index_next_same(record, last_range->min_key,
+                                       last_range->min_length);
       if (result != HA_ERR_END_OF_FILE)
 	DBUG_RETURN(result);
     }
@@ -8810,10 +8625,10 @@ int QUICK_RANGE_SELECT_GEOM::get_next()
     }
     last_range= *(cur_range++);
 
-    result= file->index_read_map(record, last_range->min_key,
-                                 last_range->min_keypart_map,
-                                 (ha_rkey_function)(last_range->flag ^
-                                                    GEOM_FLAG));
+    result= file->ha_index_read_map(record, last_range->min_key,
+                                    last_range->min_keypart_map,
+                                    (ha_rkey_function)(last_range->flag ^
+                                                       GEOM_FLAG));
     if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE)
       DBUG_RETURN(result);
     last_range= 0;				// Not found, to next range
@@ -8881,9 +8696,9 @@ QUICK_SELECT_DESC::QUICK_SELECT_DESC(QUICK_RANGE_SELECT *q,
     Use default MRR implementation for reverse scans. No table engine
     currently can do an MRR scan with output in reverse index order.
   */
-  multi_range_length= 0;
-  multi_range= NULL;
-  multi_range_buff= NULL;
+  mrr_buf_desc= NULL;
+  mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
+  mrr_buf_size= 0;
 
   QUICK_RANGE **pr= (QUICK_RANGE**)ranges.buffer;
   QUICK_RANGE **end_range= pr + ranges.elements;
@@ -8924,9 +8739,9 @@ int QUICK_SELECT_DESC::get_next()
     {						// Already read through key
       result = ((last_range->flag & EQ_RANGE && 
                  used_key_parts <= head->key_info[index].key_parts) ? 
-                file->index_next_same(record, last_range->min_key,
+                file->ha_index_next_same(record, last_range->min_key,
                                       last_range->min_length) :
-                file->index_prev(record));
+                file->ha_index_prev(record));
       if (!result)
       {
 	if (cmp_prev(*rev_it.ref()) == 0)
@@ -8942,7 +8757,7 @@ int QUICK_SELECT_DESC::get_next()
     if (last_range->flag & NO_MAX_RANGE)        // Read last record
     {
       int local_error;
-      if ((local_error=file->index_last(record)))
+      if ((local_error= file->ha_index_last(record)))
 	DBUG_RETURN(local_error);		// Empty table
       if (cmp_prev(last_range) == 0)
 	DBUG_RETURN(0);
@@ -8954,9 +8769,9 @@ int QUICK_SELECT_DESC::get_next()
         used_key_parts <= head->key_info[index].key_parts)
 
     {
-      result = file->index_read_map(record, last_range->max_key,
-                                    last_range->max_keypart_map,
-                                    HA_READ_KEY_EXACT);
+      result= file->ha_index_read_map(record, last_range->max_key,
+                                      last_range->max_keypart_map,
+                                      HA_READ_KEY_EXACT);
     }
     else
     {
@@ -8964,11 +8779,11 @@ int QUICK_SELECT_DESC::get_next()
                   (last_range->flag & EQ_RANGE && 
                    used_key_parts > head->key_info[index].key_parts) ||
                   range_reads_after_key(last_range));
-      result=file->index_read_map(record, last_range->max_key,
-                                  last_range->max_keypart_map,
-                                  ((last_range->flag & NEAR_MAX) ?
-                                   HA_READ_BEFORE_KEY :
-                                   HA_READ_PREFIX_LAST_OR_PREV));
+      result= file->ha_index_read_map(record, last_range->max_key,
+                                      last_range->max_keypart_map,
+                                      ((last_range->flag & NEAR_MAX) ?
+                                       HA_READ_BEFORE_KEY :
+                                       HA_READ_PREFIX_LAST_OR_PREV));
     }
     if (result)
     {
@@ -9012,6 +8827,7 @@ QUICK_SELECT_I *QUICK_RANGE_SELECT::make_reverse(uint used_key_parts_arg)
 /*
   Compare if found key is over max-value
   Returns 0 if key <= range->max_key
+  TODO: Figure out why can't this function be as simple as cmp_prev(). 
 */
 
 int QUICK_RANGE_SELECT::cmp_next(QUICK_RANGE *range_arg)
@@ -9158,7 +8974,7 @@ void QUICK_RANGE_SELECT::add_keys_and_lengths(String *key_names,
   uint length;
   KEY *key_info= head->key_info + index;
   key_names->append(key_info->name);
-  length= longlong2str(max_used_key_length, buf, 10) - buf;
+  length= longlong10_to_str(max_used_key_length, buf, 10) - buf;
   used_lengths->append(buf, length);
 }
 
@@ -9183,7 +8999,7 @@ void QUICK_INDEX_MERGE_SELECT::add_keys_and_lengths(String *key_names,
 
     KEY *key_info= head->key_info + quick->index;
     key_names->append(key_info->name);
-    length= longlong2str(quick->max_used_key_length, buf, 10) - buf;
+    length= longlong10_to_str(quick->max_used_key_length, buf, 10) - buf;
     used_lengths->append(buf, length);
   }
   if (pk_quick_select)
@@ -9191,7 +9007,8 @@ void QUICK_INDEX_MERGE_SELECT::add_keys_and_lengths(String *key_names,
     KEY *key_info= head->key_info + pk_quick_select->index;
     key_names->append(',');
     key_names->append(key_info->name);
-    length= longlong2str(pk_quick_select->max_used_key_length, buf, 10) - buf;
+    length= (longlong10_to_str(pk_quick_select->max_used_key_length, buf, 10)
+             - buf);
     used_lengths->append(',');
     used_lengths->append(buf, length);
   }
@@ -9216,7 +9033,7 @@ void QUICK_ROR_INTERSECT_SELECT::add_keys_and_lengths(String *key_names,
       used_lengths->append(',');
     }
     key_names->append(key_info->name);
-    length= longlong2str(quick->max_used_key_length, buf, 10) - buf;
+    length= longlong10_to_str(quick->max_used_key_length, buf, 10) - buf;
     used_lengths->append(buf, length);
   }
 
@@ -9225,7 +9042,7 @@ void QUICK_ROR_INTERSECT_SELECT::add_keys_and_lengths(String *key_names,
     KEY *key_info= head->key_info + cpk_quick->index;
     key_names->append(',');
     key_names->append(key_info->name);
-    length= longlong2str(cpk_quick->max_used_key_length, buf, 10) - buf;
+    length= longlong10_to_str(cpk_quick->max_used_key_length, buf, 10) - buf;
     used_lengths->append(',');
     used_lengths->append(buf, length);
   }
@@ -9762,8 +9579,14 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
       cur_index_tree= get_index_range_tree(cur_index, tree, param,
                                            &cur_param_idx);
       /* Check if this range tree can be used for prefix retrieval. */
+      COST_VECT dummy_cost;
+      uint mrr_flags= HA_MRR_USE_DEFAULT_IMPL;
+      uint mrr_bufsize=0;
       cur_quick_prefix_records= check_quick_select(param, cur_param_idx,
-                                                    cur_index_tree, TRUE);
+                                                   FALSE /*don't care*/,
+                                                   cur_index_tree, TRUE,
+                                                   &mrr_flags, &mrr_bufsize,
+                                                   &dummy_cost);
     }
     cost_group_min_max(table, cur_index_info, cur_used_key_parts,
                        cur_group_key_parts, tree, cur_index_tree,
@@ -9889,7 +9712,9 @@ check_group_min_max_predicates(COND *cond, Item_field *min_max_arg_item,
     the MIN/MAX argument field, and disallow the optimization only if this is
     so.
   */
-  if (cond_type == Item::SUBSELECT_ITEM)
+  if (cond_type == Item::SUBSELECT_ITEM ||
+      (cond->get_cached_item() &&
+       cond->get_cached_item()->type() == Item::SUBSELECT_ITEM))
     DBUG_RETURN(FALSE);
 
   /*
@@ -10358,6 +10183,7 @@ TRP_GROUP_MIN_MAX::make_quick(PARAM *param, bool retrieve_full_rows,
       /* Make a QUICK_RANGE_SELECT to be used for group prefix retrieval. */
       quick->quick_prefix_select= get_quick_select(param, param_idx,
                                                    index_tree,
+                                                   HA_MRR_USE_DEFAULT_IMPL, 0,
                                                    &quick->alloc);
 
     /*
@@ -10441,9 +10267,9 @@ QUICK_GROUP_MIN_MAX_SELECT(TABLE *table, JOIN *join_arg, bool have_min_arg,
    group_prefix_len(group_prefix_len_arg),
    group_key_parts(group_key_parts_arg), have_min(have_min_arg),
    have_max(have_max_arg), have_agg_distinct(have_agg_distinct_arg),
-   seen_first_key(FALSE), min_max_arg_part(min_max_arg_part_arg),
+   seen_first_key(FALSE), doing_key_read(FALSE), min_max_arg_part(min_max_arg_part_arg),
    key_infix(key_infix_arg), key_infix_len(key_infix_len_arg),
-   min_functions_it(NULL), max_functions_it(NULL), 
+   min_functions_it(NULL), max_functions_it(NULL),
    is_index_scan(is_index_scan_arg)
 {
   head=       table;
@@ -10571,7 +10397,12 @@ QUICK_GROUP_MIN_MAX_SELECT::~QUICK_GROUP_MIN_MAX_SELECT()
 {
   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::~QUICK_GROUP_MIN_MAX_SELECT");
   if (file->inited != handler::NONE) 
+  {
+    DBUG_ASSERT(file == head->file);
+    if (doing_key_read)
+      head->disable_keyread();
     file->ha_index_end();
+  }
   if (min_max_arg_part)
     delete_dynamic(&min_max_ranges);
   free_root(&alloc,MYF(0));
@@ -10754,12 +10585,16 @@ int QUICK_GROUP_MIN_MAX_SELECT::reset(void)
   int result;
   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::reset");
 
-  head->set_keyread(TRUE); /* We need only the key attributes */
+  if (!head->key_read)
+  {
+    doing_key_read= 1;
+    head->enable_keyread(); /* We need only the key attributes */
+  }
   if ((result= file->ha_index_init(index,1)))
     DBUG_RETURN(result);
   if (quick_prefix_select && quick_prefix_select->reset())
     DBUG_RETURN(1);
-  result= file->index_last(record);
+  result= file->ha_index_last(record);
   if (result == HA_ERR_END_OF_FILE)
     DBUG_RETURN(0);
   /* Save the prefix of the last group. */
@@ -10861,9 +10696,9 @@ int QUICK_GROUP_MIN_MAX_SELECT::get_next()
       first sub-group with the extended prefix.
     */
     if (!have_min && !have_max && key_infix_len > 0)
-      result= file->index_read_map(record, group_prefix,
-                                   make_prev_keypart_map(real_key_parts),
-                                   HA_READ_KEY_EXACT);
+      result= file->ha_index_read_map(record, group_prefix,
+                                      make_prev_keypart_map(real_key_parts),
+                                      HA_READ_KEY_EXACT);
 
     result= have_min ? min_res : have_max ? max_res : result;
   } while ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
@@ -10915,9 +10750,10 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min()
     /* Apply the constant equality conditions to the non-group select fields */
     if (key_infix_len > 0)
     {
-      if ((result= file->index_read_map(record, group_prefix,
-                                        make_prev_keypart_map(real_key_parts),
-                                        HA_READ_KEY_EXACT)))
+      if ((result=
+           file->ha_index_read_map(record, group_prefix,
+                                   make_prev_keypart_map(real_key_parts),
+                                   HA_READ_KEY_EXACT)))
         DBUG_RETURN(result);
     }
 
@@ -10932,9 +10768,9 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min()
     {
       /* Find the first subsequent record without NULL in the MIN/MAX field. */
       key_copy(tmp_record, record, index_info, 0);
-      result= file->index_read_map(record, tmp_record,
-                                   make_keypart_map(real_key_parts),
-                                   HA_READ_AFTER_KEY);
+      result= file->ha_index_read_map(record, tmp_record,
+                                      make_keypart_map(real_key_parts),
+                                      HA_READ_AFTER_KEY);
       /*
         Check if the new record belongs to the current group by comparing its
         prefix with the group's prefix. If it is from the next group, then the
@@ -10989,9 +10825,9 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_max()
   if (min_max_ranges.elements > 0)
     result= next_max_in_range();
   else
-    result= file->index_read_map(record, group_prefix,
-                                 make_prev_keypart_map(real_key_parts),
-                                 HA_READ_PREFIX_LAST);
+    result= file->ha_index_read_map(record, group_prefix,
+                                    make_prev_keypart_map(real_key_parts),
+                                    HA_READ_PREFIX_LAST);
   DBUG_RETURN(result);
 }
 
@@ -11085,7 +10921,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_prefix()
   {
     if (!seen_first_key)
     {
-      result= file->index_first(record);
+      result= file->ha_index_first(record);
       if (result)
         DBUG_RETURN(result);
       seen_first_key= TRUE;
@@ -11172,7 +11008,8 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()
                  HA_READ_AFTER_KEY : HA_READ_KEY_OR_NEXT;
     }
 
-    result= file->index_read_map(record, group_prefix, keypart_map, find_flag);
+    result= file->ha_index_read_map(record, group_prefix, keypart_map,
+                                    find_flag);
     if (result)
     {
       if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
@@ -11311,7 +11148,8 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range()
                  HA_READ_BEFORE_KEY : HA_READ_PREFIX_LAST_OR_PREV;
     }
 
-    result= file->index_read_map(record, group_prefix, keypart_map, find_flag);
+    result= file->ha_index_read_map(record, group_prefix, keypart_map,
+                                    find_flag);
 
     if (result)
     {
@@ -11447,7 +11285,7 @@ void QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths(String *key_names,
   char buf[64];
   uint length;
   key_names->append(index_info->name);
-  length= longlong2str(max_used_key_length, buf, 10) - buf;
+  length= longlong10_to_str(max_used_key_length, buf, 10) - buf;
   used_lengths->append(buf, length);
 }
 
@@ -11506,6 +11344,7 @@ static void print_ror_scans_arr(TABLE *table, const char *msg,
   DBUG_VOID_RETURN;
 }
 
+
 /*****************************************************************************
 ** Print a quick range for debugging
 ** TODO:
diff --git a/sql/opt_range.h b/sql/opt_range.h
index 33b3d561ad8..34335c7baf3 100644
--- a/sql/opt_range.h
+++ b/sql/opt_range.h
@@ -52,13 +52,19 @@ typedef struct st_key_part {
 } KEY_PART;
 
 
+/*
+  A "MIN_TUPLE < tbl.key_tuple < MAX_TUPLE" interval. 
+  
+  One of endpoints may be absent. 'flags' member has flags which tell whether
+  the endpoints are '<' or '<='.
+*/
 class QUICK_RANGE :public Sql_alloc {
  public:
   uchar *min_key,*max_key;
   uint16 min_length,max_length,flag;
   key_part_map min_keypart_map, // bitmap of used keyparts in min_key
                max_keypart_map; // bitmap of used keyparts in max_key
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
   uint16 dummy;					/* Avoid warnings on 'flag' */
 #endif
   QUICK_RANGE();				/* Full range */
@@ -75,7 +81,7 @@ class QUICK_RANGE :public Sql_alloc {
       min_keypart_map(min_keypart_map_arg),
       max_keypart_map(max_keypart_map_arg)
     {
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
       dummy=0;
 #endif
     }
@@ -201,13 +207,16 @@ class QUICK_RANGE :public Sql_alloc {
 
   4. Delete the select:
     delete quick;
-
+  
+  NOTE 
+    quick select doesn't use Sql_alloc/MEM_ROOT allocation because "range
+    checked for each record" functionality may create/destroy
+    O(#records_in_some_table) quick selects during query execution.
 */
 
 class QUICK_SELECT_I
 {
 public:
-  bool sorted;
   ha_rows records;  /* estimate of # of records to be retrieved */
   double  read_time; /* time to perform this retrieval          */
   TABLE   *head;
@@ -280,6 +289,12 @@ public:
   virtual bool reverse_sorted() = 0;
   virtual bool unique_key_range() { return false; }
 
+  /*
+    Request that this quick select produces sorted output. Not all quick
+    selects can do it, the caller is responsible for calling this function
+    only for those quick selects that can.
+  */
+  virtual void need_sorted_output() = 0;
   enum {
     QS_TYPE_RANGE = 0,
     QS_TYPE_INDEX_MERGE = 1,
@@ -364,6 +379,22 @@ struct st_qsel_param;
 class PARAM;
 class SEL_ARG;
 
+
+/*
+  MRR range sequence, array<QUICK_RANGE> implementation: sequence traversal
+  context.
+*/
+typedef struct st_quick_range_seq_ctx
+{
+  QUICK_RANGE **first;
+  QUICK_RANGE **cur;
+  QUICK_RANGE **last;
+} QUICK_RANGE_SEQ_CTX;
+
+range_seq_t quick_range_seq_init(void *init_param, uint n_ranges, uint flags);
+uint quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range);
+
+
 /*
   Quick select that does a range scan on a single key. The records are
   returned in key order.
@@ -371,60 +402,47 @@ class SEL_ARG;
 class QUICK_RANGE_SELECT : public QUICK_SELECT_I
 {
 protected:
-  bool next,dont_free,in_ror_merged_scan;
-public:
-  int error;
-protected:
+  /* true if we enabled key only reads */
+  bool doing_key_read;
   handler *file;
-  /*
-    If true, this quick select has its "own" handler object which should be
-    closed no later then this quick select is deleted.
-  */
-  bool free_file;
-  bool in_range;
-  uint multi_range_count; /* copy from thd->variables.multi_range_count */
-  uint multi_range_length; /* the allocated length for the array */
-  uint multi_range_bufsiz; /* copy from thd->variables.read_rnd_buff_size */
-  KEY_MULTI_RANGE *multi_range; /* the multi-range array (allocated and
-                                       freed by QUICK_RANGE_SELECT) */
-  HANDLER_BUFFER *multi_range_buff; /* the handler buffer (allocated and
-                                       freed by QUICK_RANGE_SELECT) */
+
+  /* Members to deal with case when this quick select is a ROR-merged scan */
+  bool in_ror_merged_scan;
   MY_BITMAP column_bitmap, *save_read_set, *save_write_set;
+  bool free_file;   /* TRUE <=> this->file is "owned" by this quick select */
 
-  friend class TRP_ROR_INTERSECT;
-  friend
-  QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
-                                               struct st_table_ref *ref,
-                                               ha_rows records);
-  friend bool get_quick_keys(PARAM *param,
-                             QUICK_RANGE_SELECT *quick,KEY_PART *key,
-                             SEL_ARG *key_tree,
-                             uchar *min_key, uint min_key_flag,
-                             uchar *max_key, uint max_key_flag);
-  friend QUICK_RANGE_SELECT *get_quick_select(PARAM*,uint idx,
-                                              SEL_ARG *key_tree,
-                                              MEM_ROOT *alloc);
-  friend class QUICK_SELECT_DESC;
-  friend class QUICK_INDEX_MERGE_SELECT;
-  friend class QUICK_ROR_INTERSECT_SELECT;
-  friend class QUICK_GROUP_MIN_MAX_SELECT;
+  /* Range pointers to be used when not using MRR interface */
+  /* Members needed to use the MRR interface */
+  QUICK_RANGE_SEQ_CTX qr_traversal_ctx;
+public:
+  uint mrr_flags; /* Flags to be used with MRR interface */
+protected:
+  uint mrr_buf_size; /* copy from thd->variables.mrr_buff_size */  
+  HANDLER_BUFFER *mrr_buf_desc; /* the handler buffer */
 
+  /* Info about index we're scanning */
+  
   DYNAMIC_ARRAY ranges;     /* ordered array of range ptrs */
   QUICK_RANGE **cur_range;  /* current element in ranges  */
-
+  
   QUICK_RANGE *last_range;
+  
   KEY_PART *key_parts;
   KEY_PART_INFO *key_part_info;
+  
+  bool dont_free; /* Used by QUICK_SELECT_DESC */
+
   int cmp_next(QUICK_RANGE *range);
   int cmp_prev(QUICK_RANGE *range);
   bool row_in_ranges();
 public:
   MEM_ROOT alloc;
 
-  QUICK_RANGE_SELECT(THD *thd, TABLE *table,uint index_arg,bool no_alloc=0,
-                     MEM_ROOT *parent_alloc=NULL);
+  QUICK_RANGE_SELECT(THD *thd, TABLE *table,uint index_arg,bool no_alloc,
+                     MEM_ROOT *parent_alloc, bool *create_err);
   ~QUICK_RANGE_SELECT();
-
+  
+  void need_sorted_output();
   int init();
   int reset(void);
   int get_next();
@@ -445,6 +463,27 @@ public:
   QUICK_SELECT_I *make_reverse(uint used_key_parts_arg);
 private:
   /* Default copy ctor used by QUICK_SELECT_DESC */
+  friend class TRP_ROR_INTERSECT;
+  friend
+  QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
+                                               struct st_table_ref *ref,
+                                               ha_rows records);
+  friend bool get_quick_keys(PARAM *param, QUICK_RANGE_SELECT *quick, 
+                             KEY_PART *key, SEL_ARG *key_tree, 
+                             uchar *min_key, uint min_key_flag,
+                             uchar *max_key, uint max_key_flag);
+  friend QUICK_RANGE_SELECT *get_quick_select(PARAM*,uint idx,
+                                              SEL_ARG *key_tree,
+                                              uint mrr_flags,
+                                              uint mrr_buf_size,
+                                              MEM_ROOT *alloc);
+  friend class QUICK_SELECT_DESC;
+  friend class QUICK_INDEX_MERGE_SELECT;
+  friend class QUICK_ROR_INTERSECT_SELECT;
+  friend class QUICK_GROUP_MIN_MAX_SELECT;
+  friend uint quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range);
+  friend range_seq_t quick_range_seq_init(void *init_param,
+                                          uint n_ranges, uint flags);
 };
 
 
@@ -452,8 +491,10 @@ class QUICK_RANGE_SELECT_GEOM: public QUICK_RANGE_SELECT
 {
 public:
   QUICK_RANGE_SELECT_GEOM(THD *thd, TABLE *table, uint index_arg,
-                          bool no_alloc, MEM_ROOT *parent_alloc)
-    :QUICK_RANGE_SELECT(thd, table, index_arg, no_alloc, parent_alloc)
+                          bool no_alloc, MEM_ROOT *parent_alloc, 
+                          bool *create_err)
+    :QUICK_RANGE_SELECT(thd, table, index_arg, no_alloc, parent_alloc,
+    create_err)
     {};
   virtual int get_next();
 };
@@ -526,6 +567,7 @@ public:
   ~QUICK_INDEX_MERGE_SELECT();
 
   int  init();
+  void need_sorted_output() { DBUG_ASSERT(0); /* Can't do it */ }
   int  reset(void);
   int  get_next();
   bool reverse_sorted() { return false; }
@@ -585,6 +627,7 @@ public:
   ~QUICK_ROR_INTERSECT_SELECT();
 
   int  init();
+  void need_sorted_output() { DBUG_ASSERT(0); /* Can't do it */ }
   int  reset(void);
   int  get_next();
   bool reverse_sorted() { return false; }
@@ -639,6 +682,7 @@ public:
   ~QUICK_ROR_UNION_SELECT();
 
   int  init();
+  void need_sorted_output() { DBUG_ASSERT(0); /* Can't do it */ }
   int  reset(void);
   int  get_next();
   bool reverse_sorted() { return false; }
@@ -717,6 +761,8 @@ private:
   bool have_max;         /*   a MIN, a MAX, or both.         */
   bool have_agg_distinct;/*   aggregate_function(DISTINCT ...).  */
   bool seen_first_key;   /* Denotes whether the first key was retrieved.*/
+  bool doing_key_read;   /* true if we enabled key only reads */
+
   KEY_PART_INFO *min_max_arg_part; /* The keypart of the only argument field */
                                    /* of all MIN/MAX functions.              */
   uint min_max_arg_len;  /* The length of the MIN/MAX argument field */
@@ -764,6 +810,7 @@ public:
   void adjust_prefix_ranges();
   bool alloc_buffers();
   int init();
+  void need_sorted_output() { /* always do it */ }
   int reset();
   int get_next();
   bool reverse_sorted() { return false; }
@@ -785,7 +832,8 @@ public:
 class QUICK_SELECT_DESC: public QUICK_RANGE_SELECT
 {
 public:
-  QUICK_SELECT_DESC(QUICK_RANGE_SELECT *q, uint used_key_parts);
+  QUICK_SELECT_DESC(QUICK_RANGE_SELECT *q, uint used_key_parts, 
+                    bool *create_err);
   int get_next();
   bool reverse_sorted() { return 1; }
   int get_type() { return QS_TYPE_RANGE_DESC; }
@@ -823,29 +871,41 @@ class SQL_SELECT :public Sql_alloc {
   {
     key_map tmp;
     tmp.set_all();
-    return test_quick_select(thd, tmp, 0, limit, force_quick_range) < 0;
+    return test_quick_select(thd, tmp, 0, limit, force_quick_range, FALSE) < 0;
   }
-  inline bool skip_record(THD *thd, bool *skip_record)
+  /* 
+    RETURN
+      0   if record must be skipped <-> (cond && cond->val_int() == 0)
+     -1   if error
+      1   otherwise
+  */   
+  inline int skip_record(THD *thd)
   {
-    *skip_record= cond ? cond->val_int() == FALSE : FALSE;
-    return thd->is_error();
+    int rc= test(!cond || cond->val_int());
+    if (thd->is_error())
+      rc= -1;
+    return rc;
   }
   int test_quick_select(THD *thd, key_map keys, table_map prev_tables,
-			ha_rows limit, bool force_quick_range);
+			ha_rows limit, bool force_quick_range, 
+                        bool ordered_output);
 };
 
 
-class FT_SELECT: public QUICK_RANGE_SELECT {
+class FT_SELECT: public QUICK_RANGE_SELECT 
+{
 public:
-  FT_SELECT(THD *thd, TABLE *table, uint key) :
-      QUICK_RANGE_SELECT (thd, table, key, 1) { (void) init(); }
+  FT_SELECT(THD *thd, TABLE *table, uint key, bool *create_err) :
+      QUICK_RANGE_SELECT (thd, table, key, 1, NULL, create_err) 
+  { (void) init(); }
   ~FT_SELECT() { file->ft_end(); }
-  int init() { return error=file->ft_init(); }
+  int init() { return file->ft_init(); }
   int reset() { return 0; }
-  int get_next() { return error=file->ft_read(record); }
+  int get_next() { return file->ha_ft_read(record); }
   int get_type() { return QS_TYPE_FULLTEXT; }
 };
 
+FT_SELECT *get_ft_select(THD *thd, TABLE *table, uint key);
 QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
                                              struct st_table_ref *ref,
                                              ha_rows records);
diff --git a/sql/opt_range_mrr.cc b/sql/opt_range_mrr.cc
new file mode 100644
index 00000000000..9eeec653355
--- /dev/null
+++ b/sql/opt_range_mrr.cc
@@ -0,0 +1,332 @@
+
+/****************************************************************************
+  MRR Range Sequence Interface implementation that walks a SEL_ARG* tree.
+ ****************************************************************************/
+
+/* MRR range sequence, SEL_ARG* implementation: stack entry */
+typedef struct st_range_seq_entry 
+{
+  /* 
+    Pointers in min and max keys. They point to right-after-end of key
+    images. The 0-th entry has these pointing to key tuple start.
+  */
+  uchar *min_key, *max_key;
+  
+  /* 
+    Flags, for {keypart0, keypart1, ... this_keypart} subtuple.
+    min_key_flag may have NULL_RANGE set.
+  */
+  uint min_key_flag, max_key_flag;
+  
+  /* Number of key parts */
+  uint min_key_parts, max_key_parts;
+  SEL_ARG *key_tree;
+} RANGE_SEQ_ENTRY;
+
+
+/*
+  MRR range sequence, SEL_ARG* implementation: SEL_ARG graph traversal context
+*/
+typedef struct st_sel_arg_range_seq
+{
+  uint keyno;      /* index of used tree in SEL_TREE structure */
+  uint real_keyno; /* Number of the index in tables */
+  PARAM *param;
+  SEL_ARG *start; /* Root node of the traversed SEL_ARG* graph */
+  
+  RANGE_SEQ_ENTRY stack[MAX_REF_PARTS];
+  int i; /* Index of last used element in the above array */
+  
+  bool at_start; /* TRUE <=> The traversal has just started */
+} SEL_ARG_RANGE_SEQ;
+
+
+/*
+  Range sequence interface, SEL_ARG* implementation: Initialize the traversal
+
+  SYNOPSIS
+    init()
+      init_params  SEL_ARG tree traversal context
+      n_ranges     [ignored] The number of ranges obtained 
+      flags        [ignored] HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
+
+  RETURN
+    Value of init_param
+*/
+
+range_seq_t sel_arg_range_seq_init(void *init_param, uint n_ranges, uint flags)
+{
+  SEL_ARG_RANGE_SEQ *seq= (SEL_ARG_RANGE_SEQ*)init_param;
+  seq->at_start= TRUE;
+  seq->stack[0].key_tree= NULL;
+  seq->stack[0].min_key= seq->param->min_key;
+  seq->stack[0].min_key_flag= 0;
+  seq->stack[0].min_key_parts= 0;
+
+  seq->stack[0].max_key= seq->param->max_key;
+  seq->stack[0].max_key_flag= 0;
+  seq->stack[0].max_key_parts= 0;
+  seq->i= 0;
+  return init_param;
+}
+
+
+static void step_down_to(SEL_ARG_RANGE_SEQ *arg, SEL_ARG *key_tree)
+{
+  RANGE_SEQ_ENTRY *cur= &arg->stack[arg->i+1];
+  RANGE_SEQ_ENTRY *prev= &arg->stack[arg->i];
+  
+  cur->key_tree= key_tree;
+  cur->min_key= prev->min_key;
+  cur->max_key= prev->max_key;
+  cur->min_key_parts= prev->min_key_parts;
+  cur->max_key_parts= prev->max_key_parts;
+
+  uint16 stor_length= arg->param->key[arg->keyno][key_tree->part].store_length;
+  cur->min_key_parts += key_tree->store_min(stor_length, &cur->min_key,
+                                            prev->min_key_flag);
+  cur->max_key_parts += key_tree->store_max(stor_length, &cur->max_key,
+                                            prev->max_key_flag);
+
+  cur->min_key_flag= prev->min_key_flag | key_tree->min_flag;
+  cur->max_key_flag= prev->max_key_flag | key_tree->max_flag;
+
+  if (key_tree->is_null_interval())
+    cur->min_key_flag |= NULL_RANGE;
+  (arg->i)++;
+}
+
+
+/*
+  Range sequence interface, SEL_ARG* implementation: get the next interval
+  
+  SYNOPSIS
+    sel_arg_range_seq_next()
+      rseq        Value returned from sel_arg_range_seq_init
+      range  OUT  Store information about the range here
+
+  DESCRIPTION
+    This is "get_next" function for Range sequence interface implementation
+    for SEL_ARG* tree.
+
+  IMPLEMENTATION
+    The traversal also updates those param members:
+      - is_ror_scan
+      - range_count
+      - max_key_part
+
+  RETURN
+    0  Ok
+    1  No more ranges in the sequence
+*/
+
+uint sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
+{
+  SEL_ARG *key_tree;
+  SEL_ARG_RANGE_SEQ *seq= (SEL_ARG_RANGE_SEQ*)rseq;
+  if (seq->at_start)
+  {
+    key_tree= seq->start;
+    seq->at_start= FALSE;
+    goto walk_up_n_right;
+  }
+
+  key_tree= seq->stack[seq->i].key_tree;
+  /* Ok, we're at some "full tuple" position in the tree */
+ 
+  /* Step down if we can */
+  if (key_tree->next && key_tree->next != &null_element)
+  {
+    //step down; (update the tuple, we'll step right and stay there)
+    seq->i--;
+    step_down_to(seq, key_tree->next);
+    key_tree= key_tree->next;
+    seq->param->is_ror_scan= FALSE;
+    goto walk_right_n_up;
+  }
+
+  /* Ok, can't step down, walk left until we can step down */
+  while (1)
+  {
+    if (seq->i == 1) // can't step left
+      return 1;
+    /* Step left */
+    seq->i--;
+    key_tree= seq->stack[seq->i].key_tree;
+
+    /* Step down if we can */
+    if (key_tree->next && key_tree->next != &null_element)
+    {
+      // Step down; update the tuple
+      seq->i--;
+      step_down_to(seq, key_tree->next);
+      key_tree= key_tree->next;
+      break;
+    }
+  }
+
+  /*
+    Ok, we've stepped down from the path to previous tuple.
+    Walk right-up while we can
+  */
+walk_right_n_up:
+  while (key_tree->next_key_part && key_tree->next_key_part != &null_element && 
+         key_tree->next_key_part->part == key_tree->part + 1 &&
+         key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
+  {
+    {
+      RANGE_SEQ_ENTRY *cur= &seq->stack[seq->i];
+      uint min_key_length= cur->min_key - seq->param->min_key;
+      uint max_key_length= cur->max_key - seq->param->max_key;
+      uint len= cur->min_key - cur[-1].min_key;
+      if (!(min_key_length == max_key_length &&
+            !memcmp(cur[-1].min_key, cur[-1].max_key, len) &&
+            !key_tree->min_flag && !key_tree->max_flag))
+      {
+        seq->param->is_ror_scan= FALSE;
+        if (!key_tree->min_flag)
+          cur->min_key_parts += 
+            key_tree->next_key_part->store_min_key(seq->param->key[seq->keyno],
+                                                   &cur->min_key,
+                                                   &cur->min_key_flag);
+        if (!key_tree->max_flag)
+          cur->max_key_parts += 
+            key_tree->next_key_part->store_max_key(seq->param->key[seq->keyno],
+                                                   &cur->max_key,
+                                                   &cur->max_key_flag);
+        break;
+      }
+    }
+  
+    /*
+      Ok, current atomic interval is in form "t.field=const" and there is
+      next_key_part interval. Step right, and walk up from there.
+    */
+    key_tree= key_tree->next_key_part;
+
+walk_up_n_right:
+    while (key_tree->prev && key_tree->prev != &null_element)
+    {
+      /* Step up */
+      key_tree= key_tree->prev;
+    }
+    step_down_to(seq, key_tree);
+  }
+
+  /* Ok got a tuple */
+  RANGE_SEQ_ENTRY *cur= &seq->stack[seq->i];
+  uint min_key_length= cur->min_key - seq->param->min_key;
+  
+  range->ptr= (char*)(int)(key_tree->part);
+  if (cur->min_key_flag & GEOM_FLAG)
+  {
+    range->range_flag= cur->min_key_flag;
+
+    /* Here minimum contains also function code bits, and maximum is +inf */
+    range->start_key.key=    seq->param->min_key;
+    range->start_key.length= min_key_length;
+    range->start_key.flag=  (ha_rkey_function) (cur->min_key_flag ^ GEOM_FLAG);
+  }
+  else
+  {
+    range->range_flag= cur->min_key_flag | cur->max_key_flag;
+    
+    range->start_key.key=    seq->param->min_key;
+    range->start_key.length= cur->min_key - seq->param->min_key;
+    range->start_key.keypart_map= make_prev_keypart_map(cur->min_key_parts);
+    range->start_key.flag= (cur->min_key_flag & NEAR_MIN ? HA_READ_AFTER_KEY : 
+                                                           HA_READ_KEY_EXACT);
+
+    range->end_key.key=    seq->param->max_key;
+    range->end_key.length= cur->max_key - seq->param->max_key;
+    range->end_key.flag= (cur->max_key_flag & NEAR_MAX ? HA_READ_BEFORE_KEY : 
+                                                         HA_READ_AFTER_KEY);
+    range->end_key.keypart_map= make_prev_keypart_map(cur->max_key_parts);
+
+    if (!(cur->min_key_flag & ~NULL_RANGE) && !cur->max_key_flag &&
+        (uint)key_tree->part+1 == seq->param->table->key_info[seq->real_keyno].key_parts &&
+        (seq->param->table->key_info[seq->real_keyno].flags & HA_NOSAME) &&
+        range->start_key.length == range->end_key.length &&
+        !memcmp(seq->param->min_key,seq->param->max_key,range->start_key.length))
+      range->range_flag= UNIQUE_RANGE | (cur->min_key_flag & NULL_RANGE);
+      
+    if (seq->param->is_ror_scan)
+    {
+      /*
+        If we get here, the condition on the key was converted to form
+        "(keyXpart1 = c1) AND ... AND (keyXpart{key_tree->part - 1} = cN) AND
+          somecond(keyXpart{key_tree->part})"
+        Check if
+          somecond is "keyXpart{key_tree->part} = const" and
+          uncovered "tail" of KeyX parts is either empty or is identical to
+          first members of clustered primary key.
+      */
+      if (!(!(cur->min_key_flag & ~NULL_RANGE) && !cur->max_key_flag &&
+            (range->start_key.length == range->end_key.length) &&
+            !memcmp(range->start_key.key, range->end_key.key, range->start_key.length) &&
+            is_key_scan_ror(seq->param, seq->real_keyno, key_tree->part + 1)))
+        seq->param->is_ror_scan= FALSE;
+    }
+  }
+  seq->param->range_count++;
+  seq->param->max_key_part=max(seq->param->max_key_part,key_tree->part);
+  return 0;
+}
+
+/****************************************************************************
+  MRR Range Sequence Interface implementation that walks array<QUICK_RANGE>
+ ****************************************************************************/
+
+/*
+  Range sequence interface implementation for array<QUICK_RANGE>: initialize
+  
+  SYNOPSIS
+    quick_range_seq_init()
+      init_param  Caller-opaque paramenter: QUICK_RANGE_SELECT* pointer
+      n_ranges    Number of ranges in the sequence (ignored)
+      flags       MRR flags (currently not used) 
+
+  RETURN
+    Opaque value to be passed to quick_range_seq_next
+*/
+
+range_seq_t quick_range_seq_init(void *init_param, uint n_ranges, uint flags)
+{
+  QUICK_RANGE_SELECT *quick= (QUICK_RANGE_SELECT*)init_param;
+  quick->qr_traversal_ctx.first=  (QUICK_RANGE**)quick->ranges.buffer;
+  quick->qr_traversal_ctx.cur=    (QUICK_RANGE**)quick->ranges.buffer;
+  quick->qr_traversal_ctx.last=   quick->qr_traversal_ctx.cur + 
+                                  quick->ranges.elements;
+  return &quick->qr_traversal_ctx;
+}
+
+
+/*
+  Range sequence interface implementation for array<QUICK_RANGE>: get next
+  
+  SYNOPSIS
+    quick_range_seq_next()
+      rseq        Value returned from quick_range_seq_init
+      range  OUT  Store information about the range here
+
+  RETURN
+    0  Ok
+    1  No more ranges in the sequence
+*/
+
+uint quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
+{
+  QUICK_RANGE_SEQ_CTX *ctx= (QUICK_RANGE_SEQ_CTX*)rseq;
+
+  if (ctx->cur == ctx->last)
+    return 1; /* no more ranges */
+
+  QUICK_RANGE *cur= *(ctx->cur);
+  cur->make_min_endpoint(&range->start_key);
+  cur->make_max_endpoint(&range->end_key);
+  range->range_flag= cur->flag;
+  ctx->cur++;
+  return 0;
+}
+
+
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
new file mode 100644
index 00000000000..a58313ec9fd
--- /dev/null
+++ b/sql/opt_subselect.cc
@@ -0,0 +1,3507 @@
+/**
+  @file
+
+  @brief
+    Subquery optimization code here.
+
+*/
+
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation				// gcc: Class implementation
+#endif
+
+#include "mysql_priv.h"
+#include "sql_select.h"
+#include "opt_subselect.h"
+
+#include <my_bit.h>
+
+// Our own:
+static
+bool subquery_types_allow_materialization(Item_in_subselect *in_subs);
+static bool replace_where_subcondition(JOIN *join, Item **expr, 
+                                       Item *old_cond, Item *new_cond,
+                                       bool do_fix_fields);
+static int subq_sj_candidate_cmp(Item_in_subselect* const *el1, 
+                                 Item_in_subselect* const *el2);
+static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred);
+static TABLE_LIST *alloc_join_nest(THD *thd);
+static 
+void fix_list_after_tbl_changes(SELECT_LEX *new_parent, List<TABLE_LIST> *tlist);
+static uint get_tmp_table_rec_length(List<Item> &items);
+bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables);
+static SJ_MATERIALIZATION_INFO *
+at_sjmat_pos(const JOIN *join, table_map remaining_tables, const JOIN_TAB *tab,
+             uint idx, bool *loose_scan);
+void best_access_path(JOIN *join, JOIN_TAB *s, 
+                             table_map remaining_tables, uint idx, 
+                             bool disable_jbuf, double record_count,
+                             POSITION *pos, POSITION *loose_scan_pos);
+
+static Item *create_subq_in_equalities(THD *thd, SJ_MATERIALIZATION_INFO *sjm, 
+                                Item_in_subselect *subq_pred);
+static void remove_sj_conds(Item **tree);
+static bool is_cond_sj_in_equality(Item *item);
+static bool sj_table_is_included(JOIN *join, JOIN_TAB *join_tab);
+static Item *remove_additional_cond(Item* conds);
+static void remove_subq_pushed_predicates(JOIN *join, Item **where);
+
+
+/*
+  Check if we need JOIN::prepare()-phase subquery rewrites and if yes, do them
+
+  DESCRIPTION
+    Check if we need to do
+     - subquery->semi-join rewrite
+     - if the subquery can be handled with materialization
+     - 'substitution' rewrite for table-less subqueries like "(select 1)"
+
+    and mark appropriately
+
+  RETURN
+     0  - OK
+    -1  - Some sort of query error
+*/
+
+int check_and_do_in_subquery_rewrites(JOIN *join)
+{
+  THD *thd=join->thd;
+  st_select_lex *select_lex= join->select_lex;
+  DBUG_ENTER("check_and_do_in_subquery_rewrites");
+  /*
+    If 
+      1) this join is inside a subquery (of any type except FROM-clause 
+         subquery) and
+      2) we aren't just normalizing a VIEW
+
+    Then perform early unconditional subquery transformations:
+     - Convert subquery predicate into semi-join, or
+     - Mark the subquery for execution using materialization, or
+     - Perform IN->EXISTS transformation, or
+     - Perform more/less ALL/ANY -> MIN/MAX rewrite
+     - Substitute trivial scalar-context subquery with its value
+
+    TODO: for PS, make the whole block execute only on the first execution
+  */
+  Item_subselect *subselect;
+  if (!thd->lex->view_prepare_mode &&                  // (1)
+    (subselect= select_lex->master_unit()->item))      // (2)
+  {
+    Item_in_subselect *in_subs= NULL;
+    if (subselect->substype() == Item_subselect::IN_SUBS)
+      in_subs= (Item_in_subselect*)subselect;
+
+    /* Resolve expressions and perform semantic analysis for IN query */
+    if (in_subs != NULL)
+      /*
+        TODO: Add the condition below to this if statement when we have proper
+        support for is_correlated handling for materialized semijoins.
+        If we were to add this condition now, the fix_fields() call in
+        convert_subq_to_sj() would force the flag is_correlated to be set
+        erroneously for prepared queries.
+
+        thd->stmt_arena->state != Query_arena::PREPARED)
+      */
+    {
+      /*
+        Check if the left and right expressions have the same # of
+        columns, i.e. we don't have a case like 
+          (oe1, oe2) IN (SELECT ie1, ie2, ie3 ...)
+
+        TODO why do we have this duplicated in IN->EXISTS transformers?
+        psergey-todo: fix these: grep for duplicated_subselect_card_check
+      */
+      if (select_lex->item_list.elements != in_subs->left_expr->cols())
+      {
+        my_error(ER_OPERAND_COLUMNS, MYF(0), in_subs->left_expr->cols());
+        DBUG_RETURN(-1);
+      }
+
+      SELECT_LEX *current= thd->lex->current_select;
+      thd->lex->current_select= current->return_after_parsing();
+      char const *save_where= thd->where;
+      thd->where= "IN/ALL/ANY subquery";
+        
+      bool failure= !in_subs->left_expr->fixed &&
+                     in_subs->left_expr->fix_fields(thd, &in_subs->left_expr);
+      thd->lex->current_select= current;
+      thd->where= save_where;
+      if (failure)
+        DBUG_RETURN(-1); /* purecov: deadcode */
+    }
+    DBUG_PRINT("info", ("Checking if subq can be converted to semi-join"));
+    /*
+      Check if we're in subquery that is a candidate for flattening into a
+      semi-join (which is done in flatten_subqueries()). The
+      requirements are:
+        1. Subquery predicate is an IN/=ANY subq predicate
+        2. Subquery is a single SELECT (not a UNION)
+        3. Subquery does not have GROUP BY or ORDER BY
+        4. Subquery does not use aggregate functions or HAVING
+        5. Subquery predicate is at the AND-top-level of ON/WHERE clause
+        6. We are not in a subquery of a single table UPDATE/DELETE that 
+             doesn't have a JOIN (TODO: We should handle this at some
+             point by switching to multi-table UPDATE/DELETE)
+        7. We're not in a table-less subquery like "SELECT 1"
+        8. No execution method was already chosen (by a prepared statement)
+        9. Parent select is not a table-less select
+        10. Neither parent nor child select have STRAIGHT_JOIN option.
+    */
+    if (optimizer_flag(thd, OPTIMIZER_SWITCH_SEMIJOIN) &&
+        in_subs &&                                                    // 1
+        !select_lex->is_part_of_union() &&                            // 2
+        !select_lex->group_list.elements && !join->order &&           // 3
+        !join->having && !select_lex->with_sum_func &&                // 4
+        thd->thd_marker.emb_on_expr_nest &&                           // 5
+        select_lex->outer_select()->join &&                           // 6
+        select_lex->master_unit()->first_select()->leaf_tables &&     // 7
+        in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED && // 8
+        select_lex->outer_select()->leaf_tables &&                    // 9
+        !((join->select_options |                                     // 10
+           select_lex->outer_select()->join->select_options)          // 10
+          & SELECT_STRAIGHT_JOIN))                                    // 10
+    {
+      DBUG_PRINT("info", ("Subquery is semi-join conversion candidate"));
+
+      (void)subquery_types_allow_materialization(in_subs);
+
+      in_subs->emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest;
+
+      /* Register the subquery for further processing in flatten_subqueries() */
+      select_lex->
+        outer_select()->join->sj_subselects.append(thd->mem_root, in_subs);
+      in_subs->expr_join_nest= thd->thd_marker.emb_on_expr_nest;
+    }
+    else
+    {
+      DBUG_PRINT("info", ("Subquery can't be converted to semi-join"));
+      /*
+        Check if the subquery predicate can be executed via materialization.
+        The required conditions are:
+        1. Subquery predicate is an IN/=ANY subq predicate
+        2. Subquery is a single SELECT (not a UNION)
+        3. Subquery is not a table-less query. In this case there is no
+           point in materializing.
+          3A The upper query is not a table-less SELECT ... FROM DUAL. We
+             can't do materialization for SELECT .. FROM DUAL because it
+             does not call setup_subquery_materialization(). We could make 
+             SELECT ... FROM DUAL call that function but that doesn't seem
+             to be the case that is worth handling.
+        4. Either the subquery predicate is a top-level predicate, or at
+           least one partial match strategy is enabled. If no partial match
+           strategy is enabled, then materialization cannot be used for
+           non-top-level queries because it cannot handle NULLs correctly.
+        5. Subquery is non-correlated
+           TODO:
+           This is an overly restrictive condition. It can be extended to:
+           (Subquery is non-correlated ||
+            Subquery is correlated to any query outer to IN predicate ||
+            (Subquery is correlated to the immediate outer query &&
+             Subquery !contains {GROUP BY, ORDER BY [LIMIT],
+             aggregate functions}) && subquery predicate is not under "NOT IN"))
+        6. No execution method was already chosen (by a prepared statement).
+
+        (*) The subquery must be part of a SELECT statement. The current
+             condition also excludes multi-table update statements.
+
+        Determine whether we will perform subquery materialization before
+        calling the IN=>EXISTS transformation, so that we know whether to
+        perform the whole transformation or only that part of it which wraps
+        Item_in_subselect in an Item_in_optimizer.
+      */
+      if (optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION)  && 
+          in_subs  &&                                                   // 1
+          !select_lex->is_part_of_union() &&                            // 2
+          select_lex->master_unit()->first_select()->leaf_tables &&     // 3
+          thd->lex->sql_command == SQLCOM_SELECT &&                     // *
+          select_lex->outer_select()->leaf_tables &&                    // 3A
+          subquery_types_allow_materialization(in_subs) &&
+          // psergey-todo: duplicated_subselect_card_check: where it's done?
+          (in_subs->is_top_level_item() ||
+           optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) ||
+           optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) &&//4
+          !in_subs->is_correlated &&                                  // 5
+          in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED) // 6
+      {
+          in_subs->exec_method= Item_in_subselect::MATERIALIZATION;
+      }
+
+      Item_subselect::trans_res trans_res;
+      if ((trans_res= subselect->select_transformer(join)) !=
+          Item_subselect::RES_OK)
+      {
+        DBUG_RETURN((trans_res == Item_subselect::RES_ERROR));
+      }
+    }
+  }
+  DBUG_RETURN(0);
+}
+
+
+/**
+  @brief Check if subquery's compared types allow materialization.
+
+  @param in_subs Subquery predicate, updated as follows:
+    types_allow_materialization TRUE if subquery materialization is allowed.
+    sjm_scan_allowed            If types_allow_materialization is TRUE,
+                                indicates whether it is possible to use subquery
+                                materialization and scan the materialized table.
+
+  @retval TRUE   If subquery types allow materialization.
+  @retval FALSE  Otherwise.
+
+  @details
+    This is a temporary fix for BUG#36752.
+    
+    There are two subquery materialization strategies:
+
+    1. Materialize and do index lookups in the materialized table. See 
+       BUG#36752 for description of restrictions we need to put on the
+       compared expressions.
+
+    2. Materialize and then do a full scan of the materialized table. At the
+       moment, this strategy's applicability criteria are even stricter than
+       in #1.
+
+       This is so because of the following: consider an uncorrelated subquery
+       
+       ...WHERE (ot1.col1, ot2.col2 ...) IN (SELECT ie1,ie2,... FROM it1 ...)
+
+       and a join order that could be used to do sjm-materialization: 
+          
+          SJM-Scan(it1, it1), ot1, ot2
+       
+       IN-equalities will be parts of conditions attached to the outer tables:
+
+         ot1:  ot1.col1 = ie1 AND ... (C1)
+         ot2:  ot1.col2 = ie2 AND ... (C2)
+       
+       besides those there may be additional references to ie1 and ie2
+       generated by equality propagation. The problem with evaluating C1 and
+       C2 is that ie{1,2} refer to subquery tables' columns, while we only have 
+       current value of materialization temptable. Our solution is to 
+        * require that all ie{N} are table column references. This allows 
+          to copy the values of materialization temptable columns to the
+          original table's columns (see setup_sj_materialization for more
+          details)
+        * require that compared columns have exactly the same type. This is
+          a temporary measure to avoid BUG#36752-type problems.
+*/
+
+static 
+bool subquery_types_allow_materialization(Item_in_subselect *in_subs)
+{
+  DBUG_ENTER("subquery_types_allow_materialization");
+
+  DBUG_ASSERT(in_subs->left_expr->fixed);
+
+  List_iterator<Item> it(in_subs->unit->first_select()->item_list);
+  uint elements= in_subs->unit->first_select()->item_list.elements;
+
+  in_subs->types_allow_materialization= FALSE;  // Assign default values
+  in_subs->sjm_scan_allowed= FALSE;
+  
+  bool all_are_fields= TRUE;
+  for (uint i= 0; i < elements; i++)
+  {
+    Item *outer= in_subs->left_expr->element_index(i);
+    Item *inner= it++;
+    all_are_fields &= (outer->real_item()->type() == Item::FIELD_ITEM && 
+                       inner->real_item()->type() == Item::FIELD_ITEM);
+    if (outer->result_type() != inner->result_type())
+      DBUG_RETURN(FALSE);
+    switch (outer->result_type()) {
+    case STRING_RESULT:
+      if (outer->is_datetime() != inner->is_datetime())
+        DBUG_RETURN(FALSE);
+
+      if (!(outer->collation.collation == inner->collation.collation 
+          /*&& outer->max_length <= inner->max_length */))
+        DBUG_RETURN(FALSE);
+    /*case INT_RESULT:
+      if (!(outer->unsigned_flag ^ inner->unsigned_flag))
+        DBUG_RETURN(FALSE); */
+    default:
+      ;/* suitable for materialization */
+    }
+
+    // Materialization does not work with BLOB columns
+    if (inner->field_type() == MYSQL_TYPE_BLOB || 
+	inner->field_type() == MYSQL_TYPE_GEOMETRY)
+        DBUG_RETURN(FALSE);
+  }
+    
+  in_subs->types_allow_materialization= TRUE;
+  in_subs->sjm_scan_allowed= all_are_fields;
+  DBUG_PRINT("info",("subquery_types_allow_materialization: ok, allowed"));
+  DBUG_RETURN(TRUE);
+}
+
+
+/*
+  Convert semi-join subquery predicates into semi-join join nests
+
+  SYNOPSIS
+    convert_join_subqueries_to_semijoins()
+ 
+  DESCRIPTION
+
+    Convert candidate subquery predicates into semi-join join nests. This 
+    transformation is performed once in query lifetime and is irreversible.
+    
+    Conversion of one subquery predicate
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    We start with a join that has a semi-join subquery:
+
+      SELECT ...
+      FROM ot, ...
+      WHERE oe IN (SELECT ie FROM it1 ... itN WHERE subq_where) AND outer_where
+
+    and convert it into a semi-join nest:
+
+      SELECT ...
+      FROM ot SEMI JOIN (it1 ... itN), ...
+      WHERE outer_where AND subq_where AND oe=ie
+
+    that is, in order to do the conversion, we need to 
+
+     * Create the "SEMI JOIN (it1 .. itN)" part and add it into the parent
+       query's FROM structure.
+     * Add "AND subq_where AND oe=ie" into parent query's WHERE (or ON if
+       the subquery predicate was in an ON expression)
+     * Remove the subquery predicate from the parent query's WHERE
+
+    Considerations when converting many predicates
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    A join may have at most MAX_TABLES tables. This may prevent us from
+    flattening all subqueries when the total number of tables in parent and
+    child selects exceeds MAX_TABLES.
+    We deal with this problem by flattening children's subqueries first and
+    then using a heuristic rule to determine each subquery predicate's
+    "priority".
+
+  RETURN 
+    FALSE  OK
+    TRUE   Error
+*/
+
+bool convert_join_subqueries_to_semijoins(JOIN *join)
+{
+  Query_arena *arena, backup;
+  Item_in_subselect **in_subq;
+  Item_in_subselect **in_subq_end;
+  THD *thd= join->thd;
+  DBUG_ENTER("convert_join_subqueries_to_semijoins");
+
+  if (join->sj_subselects.elements() == 0)
+    DBUG_RETURN(FALSE);
+
+  /* First, convert child join's subqueries. We proceed bottom-up here */
+  for (in_subq= join->sj_subselects.front(), 
+       in_subq_end= join->sj_subselects.back(); 
+       in_subq != in_subq_end; 
+       in_subq++)
+  {
+    st_select_lex *child_select= (*in_subq)->get_select_lex();
+    JOIN *child_join= child_select->join;
+    child_join->outer_tables = child_join->tables;
+
+    /*
+      child_select->where contains only the WHERE predicate of the
+      subquery itself here. We may be selecting from a VIEW, which has its
+      own predicate. The combined predicates are available in child_join->conds,
+      which was built by setup_conds() doing prepare_where() for all views.
+    */
+    child_select->where= child_join->conds;
+
+    if (convert_join_subqueries_to_semijoins(child_join))
+      DBUG_RETURN(TRUE);
+    (*in_subq)->sj_convert_priority= 
+      (*in_subq)->is_correlated * MAX_TABLES + child_join->outer_tables;
+  }
+  
+  // Temporary measure: disable semi-joins when they are together with outer
+  // joins.
+  for (TABLE_LIST *tbl= join->select_lex->leaf_tables; tbl; tbl=tbl->next_leaf)
+  {
+    TABLE_LIST *embedding= tbl->embedding;
+    if (tbl->on_expr || (tbl->embedding && !(embedding->sj_on_expr && 
+                                            !embedding->embedding)))
+    {
+      in_subq= join->sj_subselects.front();
+      arena= thd->activate_stmt_arena_if_needed(&backup);
+      goto skip_conversion;
+    }
+  }
+
+  //dump_TABLE_LIST_struct(select_lex, select_lex->leaf_tables);
+  /* 
+    2. Pick which subqueries to convert:
+      sort the subquery array
+      - prefer correlated subqueries over uncorrelated;
+      - prefer subqueries that have greater number of outer tables;
+  */
+  join->sj_subselects.sort(subq_sj_candidate_cmp);
+  // #tables-in-parent-query + #tables-in-subquery < MAX_TABLES
+  /* Replace all subqueries to be flattened with Item_int(1) */
+  arena= thd->activate_stmt_arena_if_needed(&backup);
+  for (in_subq= join->sj_subselects.front(); 
+       in_subq != in_subq_end && 
+       join->tables + (*in_subq)->unit->first_select()->join->tables < MAX_TABLES;
+       in_subq++)
+  {
+    Item **tree= ((*in_subq)->emb_on_expr_nest == (TABLE_LIST*)1)?
+                   &join->conds : &((*in_subq)->emb_on_expr_nest->on_expr);
+    if (replace_where_subcondition(join, tree, *in_subq, new Item_int(1),
+                                   FALSE))
+      DBUG_RETURN(TRUE); /* purecov: inspected */
+  }
+ 
+  for (in_subq= join->sj_subselects.front(); 
+       in_subq != in_subq_end && 
+       join->tables + (*in_subq)->unit->first_select()->join->tables < MAX_TABLES;
+       in_subq++)
+  {
+    if (convert_subq_to_sj(join, *in_subq))
+      DBUG_RETURN(TRUE);
+  }
+skip_conversion:
+  /* 
+    3. Finalize (perform IN->EXISTS rewrite) the subqueries that we didn't
+    convert:
+  */
+  for (; in_subq!= in_subq_end; in_subq++)
+  {
+    JOIN *child_join= (*in_subq)->unit->first_select()->join;
+    Item_subselect::trans_res res;
+    (*in_subq)->changed= 0;
+    (*in_subq)->fixed= 0;
+
+    SELECT_LEX *save_select_lex= thd->lex->current_select;
+    thd->lex->current_select= (*in_subq)->unit->first_select();
+
+    res= (*in_subq)->select_transformer(child_join);
+
+    thd->lex->current_select= save_select_lex;
+
+    if (res == Item_subselect::RES_ERROR)
+      DBUG_RETURN(TRUE);
+
+    (*in_subq)->changed= 1;
+    (*in_subq)->fixed= 1;
+
+    Item *substitute= (*in_subq)->substitution;
+    bool do_fix_fields= !(*in_subq)->substitution->fixed;
+    Item **tree= ((*in_subq)->emb_on_expr_nest == (TABLE_LIST*)1)?
+                   &join->conds : &((*in_subq)->emb_on_expr_nest->on_expr);
+    if (replace_where_subcondition(join, tree, *in_subq, substitute, 
+                                   do_fix_fields))
+      DBUG_RETURN(TRUE);
+    (*in_subq)->substitution= NULL;
+     
+    if (!thd->stmt_arena->is_conventional())
+    {
+      tree= ((*in_subq)->emb_on_expr_nest == (TABLE_LIST*)1)?
+             &join->select_lex->prep_where : 
+             &((*in_subq)->emb_on_expr_nest->prep_on_expr);
+
+      if (replace_where_subcondition(join, tree, *in_subq, substitute, 
+                                     FALSE))
+        DBUG_RETURN(TRUE);
+    }
+  }
+
+  if (arena)
+    thd->restore_active_arena(arena, &backup);
+  join->sj_subselects.clear();
+  DBUG_RETURN(FALSE);
+}
+
+/**
+   @brief Replaces an expression destructively inside the expression tree of
+   the WHERE clase.
+
+   @note Because of current requirements for semijoin flattening, we do not
+   need to recurse here, hence this function will only examine the top-level
+   AND conditions. (see JOIN::prepare, comment starting with "Check if the 
+   subquery predicate can be executed via materialization".
+   
+   @param join The top-level query.
+   @param old_cond The expression to be replaced.
+   @param new_cond The expression to be substituted.
+   @param do_fix_fields If true, Item::fix_fields(THD*, Item**) is called for
+   the new expression.
+   @return <code>true</code> if there was an error, <code>false</code> if
+   successful.
+*/
+static bool replace_where_subcondition(JOIN *join, Item **expr, 
+                                       Item *old_cond, Item *new_cond,
+                                       bool do_fix_fields)
+{
+  //Item **expr= (emb_nest == (TABLE_LIST*)1)? &join->conds : &emb_nest->on_expr;
+  if (*expr == old_cond)
+  {
+    *expr= new_cond;
+    if (do_fix_fields)
+      new_cond->fix_fields(join->thd, expr);
+    return FALSE;
+  }
+  
+  if ((*expr)->type() == Item::COND_ITEM) 
+  {
+    List_iterator<Item> li(*((Item_cond*)(*expr))->argument_list());
+    Item *item;
+    while ((item= li++))
+    {
+      if (item == old_cond) 
+      {
+        li.replace(new_cond);
+        if (do_fix_fields)
+          new_cond->fix_fields(join->thd, li.ref());
+        return FALSE;
+      }
+    }
+  }
+  // If we came here it means there were an error during prerequisites check.
+  DBUG_ASSERT(0);
+  return TRUE;
+}
+
+static int subq_sj_candidate_cmp(Item_in_subselect* const *el1, 
+                                 Item_in_subselect* const *el2)
+{
+  return ((*el1)->sj_convert_priority < (*el2)->sj_convert_priority) ? 1 : 
+         ( ((*el1)->sj_convert_priority == (*el2)->sj_convert_priority)? 0 : -1);
+}
+
+
+/*
+  Convert a subquery predicate into a TABLE_LIST semi-join nest
+
+  SYNOPSIS
+    convert_subq_to_sj()
+       parent_join  Parent join, the one that has subq_pred in its WHERE/ON 
+                    clause
+       subq_pred    Subquery predicate to be converted
+  
+  DESCRIPTION
+    Convert a subquery predicate into a TABLE_LIST semi-join nest. All the 
+    prerequisites are already checked, so the conversion is always successfull.
+
+    Prepared Statements: the transformation is permanent:
+     - Changes in TABLE_LIST structures are naturally permanent
+     - Item tree changes are performed on statement MEM_ROOT:
+        = we activate statement MEM_ROOT 
+        = this function is called before the first fix_prepare_information
+          call.
+
+    This is intended because the criteria for subquery-to-sj conversion remain
+    constant for the lifetime of the Prepared Statement.
+
+  RETURN
+    FALSE  OK
+    TRUE   Out of memory error
+*/
+
+static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred)
+{
+  SELECT_LEX *parent_lex= parent_join->select_lex;
+  TABLE_LIST *emb_tbl_nest= NULL;
+  List<TABLE_LIST> *emb_join_list= &parent_lex->top_join_list;
+  THD *thd= parent_join->thd;
+  DBUG_ENTER("convert_subq_to_sj");
+
+  /*
+    1. Find out where to put the predicate into.
+     Note: for "t1 LEFT JOIN t2" this will be t2, a leaf.
+  */
+  if ((void*)subq_pred->expr_join_nest != (void*)1)
+  {
+    if (subq_pred->expr_join_nest->nested_join)
+    {
+      /*
+        We're dealing with
+
+          ... [LEFT] JOIN  ( ... ) ON (subquery AND whatever) ...
+
+        The sj-nest will be inserted into the brackets nest.
+      */
+      emb_tbl_nest=  subq_pred->expr_join_nest;
+      emb_join_list= &emb_tbl_nest->nested_join->join_list;
+    }
+    else if (!subq_pred->expr_join_nest->outer_join)
+    {
+      /*
+        We're dealing with
+
+          ... INNER JOIN tblX ON (subquery AND whatever) ...
+
+        The sj-nest will be tblX's "sibling", i.e. another child of its
+        parent. This is ok because tblX is joined as an inner join.
+      */
+      emb_tbl_nest= subq_pred->expr_join_nest->embedding;
+      if (emb_tbl_nest)
+        emb_join_list= &emb_tbl_nest->nested_join->join_list;
+    }
+    else if (!subq_pred->expr_join_nest->nested_join)
+    {
+      TABLE_LIST *outer_tbl= subq_pred->expr_join_nest;      
+      TABLE_LIST *wrap_nest;
+      /*
+        We're dealing with
+
+          ... LEFT JOIN tbl ON (on_expr AND subq_pred) ...
+
+        we'll need to convert it into:
+
+          ... LEFT JOIN ( tbl SJ (subq_tables) ) ON (on_expr AND subq_pred) ...
+                        |                      |
+                        |<----- wrap_nest ---->|
+        
+        Q:  other subqueries may be pointing to this element. What to do?
+        A1: simple solution: copy *subq_pred->expr_join_nest= *parent_nest.
+            But we'll need to fix other pointers.
+        A2: Another way: have TABLE_LIST::next_ptr so the following
+            subqueries know the table has been nested.
+        A3: changes in the TABLE_LIST::outer_join will make everything work
+            automatically.
+      */
+      if (!(wrap_nest= alloc_join_nest(parent_join->thd)))
+      {
+        DBUG_RETURN(TRUE);
+      }
+      wrap_nest->embedding= outer_tbl->embedding;
+      wrap_nest->join_list= outer_tbl->join_list;
+      wrap_nest->alias= (char*) "(sj-wrap)";
+
+      wrap_nest->nested_join->join_list.empty();
+      wrap_nest->nested_join->join_list.push_back(outer_tbl);
+
+      outer_tbl->embedding= wrap_nest;
+      outer_tbl->join_list= &wrap_nest->nested_join->join_list;
+
+      /*
+        wrap_nest will take place of outer_tbl, so move the outer join flag
+        and on_expr
+      */
+      wrap_nest->outer_join= outer_tbl->outer_join;
+      outer_tbl->outer_join= 0;
+
+      wrap_nest->on_expr= outer_tbl->on_expr;
+      outer_tbl->on_expr= NULL;
+
+      List_iterator<TABLE_LIST> li(*wrap_nest->join_list);
+      TABLE_LIST *tbl;
+      while ((tbl= li++))
+      {
+        if (tbl == outer_tbl)
+        {
+          li.replace(wrap_nest);
+          break;
+        }
+      }
+      /*
+        Ok now wrap_nest 'contains' outer_tbl and we're ready to add the 
+        semi-join nest into it
+      */
+      emb_join_list= &wrap_nest->nested_join->join_list;
+      emb_tbl_nest=  wrap_nest;
+    }
+  }
+
+  TABLE_LIST *sj_nest;
+  NESTED_JOIN *nested_join;
+  if (!(sj_nest= alloc_join_nest(parent_join->thd)))
+  {
+    DBUG_RETURN(TRUE);
+  }
+  nested_join= sj_nest->nested_join;
+
+  sj_nest->join_list= emb_join_list;
+  sj_nest->embedding= emb_tbl_nest;
+  sj_nest->alias= (char*) "(sj-nest)";
+  sj_nest->sj_subq_pred= subq_pred;
+  /* Nests do not participate in those 'chains', so: */
+  /* sj_nest->next_leaf= sj_nest->next_local= sj_nest->next_global == NULL*/
+  emb_join_list->push_back(sj_nest);
+
+  /* 
+    nested_join->used_tables and nested_join->not_null_tables are
+    initialized in simplify_joins().
+  */
+  
+  /* 
+    2. Walk through subquery's top list and set 'embedding' to point to the
+       sj-nest.
+  */
+  st_select_lex *subq_lex= subq_pred->unit->first_select();
+  nested_join->join_list.empty();
+  List_iterator_fast<TABLE_LIST> li(subq_lex->top_join_list);
+  TABLE_LIST *tl, *last_leaf;
+  while ((tl= li++))
+  {
+    tl->embedding= sj_nest;
+    tl->join_list= &nested_join->join_list;
+    nested_join->join_list.push_back(tl);
+  }
+  
+  /*
+    Reconnect the next_leaf chain.
+    TODO: Do we have to put subquery's tables at the end of the chain?
+          Inserting them at the beginning would be a bit faster.
+    NOTE: We actually insert them at the front! That's because the order is
+          reversed in this list.
+  */
+  for (tl= parent_lex->leaf_tables; tl->next_leaf; tl= tl->next_leaf) ;
+  tl->next_leaf= subq_lex->leaf_tables;
+  last_leaf= tl;
+
+  /*
+    Same as above for next_local chain
+    (a theory: a next_local chain always starts with ::leaf_tables
+     because view's tables are inserted after the view)
+  */
+  for (tl= parent_lex->leaf_tables; tl->next_local; tl= tl->next_local) ;
+  tl->next_local= subq_lex->leaf_tables;
+
+  /* A theory: no need to re-connect the next_global chain */
+
+  /* 3. Remove the original subquery predicate from the WHERE/ON */
+
+  // The subqueries were replaced for Item_int(1) earlier
+  subq_pred->exec_method=
+    Item_in_subselect::SEMI_JOIN;         // for subsequent executions
+  /*TODO: also reset the 'with_subselect' there. */
+
+  /* n. Adjust the parent_join->tables counter */
+  uint table_no= parent_join->tables;
+  /* n. Walk through child's tables and adjust table->map */
+  for (tl= subq_lex->leaf_tables; tl; tl= tl->next_leaf, table_no++)
+  {
+    tl->table->tablenr= table_no;
+    tl->table->map= ((table_map)1) << table_no;
+    SELECT_LEX *old_sl= tl->select_lex;
+    tl->select_lex= parent_join->select_lex; 
+    for (TABLE_LIST *emb= tl->embedding;
+         emb && emb->select_lex == old_sl;
+         emb= emb->embedding)
+      emb->select_lex= parent_join->select_lex;
+  }
+  parent_join->tables += subq_lex->join->tables;
+
+  /* 
+    Put the subquery's WHERE into semi-join's sj_on_expr
+    Add the subquery-induced equalities too.
+  */
+  SELECT_LEX *save_lex= thd->lex->current_select;
+  thd->lex->current_select=subq_lex;
+  if (!subq_pred->left_expr->fixed &&
+       subq_pred->left_expr->fix_fields(thd, &subq_pred->left_expr))
+    DBUG_RETURN(TRUE);
+  thd->lex->current_select=save_lex;
+
+  sj_nest->nested_join->sj_corr_tables= subq_pred->used_tables();
+  sj_nest->nested_join->sj_depends_on=  subq_pred->used_tables() |
+                                        subq_pred->left_expr->used_tables();
+  sj_nest->sj_on_expr= subq_lex->join->conds;
+
+  /*
+    Create the IN-equalities and inject them into semi-join's ON expression.
+    Additionally, for LooseScan strategy
+     - Record the number of IN-equalities.
+     - Create list of pointers to (oe1, ..., ieN). We'll need the list to
+       see which of the expressions are bound and which are not (for those
+       we'll produce a distinct stream of (ie_i1,...ie_ik).
+
+       (TODO: can we just create a list of pointers and hope the expressions
+       will not substitute themselves on fix_fields()? or we need to wrap
+       them into Item_direct_view_refs and store pointers to those. The
+       pointers to Item_direct_view_refs are guaranteed to be stable as 
+       Item_direct_view_refs doesn't substitute itself with anything in 
+       Item_direct_view_ref::fix_fields.
+  */
+  sj_nest->sj_in_exprs= subq_pred->left_expr->cols();
+  sj_nest->nested_join->sj_outer_expr_list.empty();
+
+  if (subq_pred->left_expr->cols() == 1)
+  {
+    nested_join->sj_outer_expr_list.push_back(subq_pred->left_expr);
+    Item_func_eq *item_eq=
+      new Item_func_eq(subq_pred->left_expr, subq_lex->ref_pointer_array[0]);
+    item_eq->in_equality_no= 0;
+    sj_nest->sj_on_expr= and_items(sj_nest->sj_on_expr, item_eq);
+  }
+  else
+  {
+    for (uint i= 0; i < subq_pred->left_expr->cols(); i++)
+    {
+      nested_join->sj_outer_expr_list.push_back(subq_pred->left_expr->
+                                                element_index(i));
+      Item_func_eq *item_eq= 
+        new Item_func_eq(subq_pred->left_expr->element_index(i), 
+                         subq_lex->ref_pointer_array[i]);
+      item_eq->in_equality_no= i;
+      sj_nest->sj_on_expr= and_items(sj_nest->sj_on_expr, item_eq);
+    }
+  }
+  /* Fix the created equality and AND */
+  sj_nest->sj_on_expr->fix_fields(parent_join->thd, &sj_nest->sj_on_expr);
+
+  /*
+    Walk through sj nest's WHERE and ON expressions and call
+    item->fix_table_changes() for all items.
+  */
+  sj_nest->sj_on_expr->fix_after_pullout(parent_lex, &sj_nest->sj_on_expr);
+  fix_list_after_tbl_changes(parent_lex, &sj_nest->nested_join->join_list);
+
+
+  /* Unlink the child select_lex so it doesn't show up in EXPLAIN: */
+  subq_lex->master_unit()->exclude_level();
+
+  DBUG_EXECUTE("where",
+               print_where(sj_nest->sj_on_expr,"SJ-EXPR", QT_ORDINARY););
+
+  /* Inject sj_on_expr into the parent's WHERE or ON */
+  if (emb_tbl_nest)
+  {
+    emb_tbl_nest->on_expr= and_items(emb_tbl_nest->on_expr, 
+                                     sj_nest->sj_on_expr);
+    emb_tbl_nest->on_expr->fix_fields(parent_join->thd, &emb_tbl_nest->on_expr);
+  }
+  else
+  {
+    /* Inject into the WHERE */
+    parent_join->conds= and_items(parent_join->conds, sj_nest->sj_on_expr);
+    parent_join->conds->fix_fields(parent_join->thd, &parent_join->conds);
+    parent_join->select_lex->where= parent_join->conds;
+  }
+
+  if (subq_lex->ftfunc_list->elements)
+  {
+    Item_func_match *ifm;
+    List_iterator_fast<Item_func_match> li(*(subq_lex->ftfunc_list));
+    while ((ifm= li++))
+      parent_lex->ftfunc_list->push_front(ifm);
+  }
+
+  DBUG_RETURN(FALSE);
+}
+
+static TABLE_LIST *alloc_join_nest(THD *thd)
+{
+  TABLE_LIST *tbl;
+  if (!(tbl= (TABLE_LIST*) thd->calloc(ALIGN_SIZE(sizeof(TABLE_LIST))+
+                                       sizeof(NESTED_JOIN))))
+    return NULL;
+  tbl->nested_join= (NESTED_JOIN*) ((uchar*)tbl + 
+                                    ALIGN_SIZE(sizeof(TABLE_LIST)));
+  return tbl;
+}
+
+
+static
+void fix_list_after_tbl_changes(SELECT_LEX *new_parent, List<TABLE_LIST> *tlist)
+{
+  List_iterator<TABLE_LIST> it(*tlist);
+  TABLE_LIST *table;
+  while ((table= it++))
+  {
+    if (table->on_expr)
+      table->on_expr->fix_after_pullout(new_parent, &table->on_expr);
+    if (table->nested_join)
+      fix_list_after_tbl_changes(new_parent, &table->nested_join->join_list);
+  }
+}
+
+
+/*
+  Pull tables out of semi-join nests, if possible
+
+  SYNOPSIS
+    pull_out_semijoin_tables()
+      join  The join where to do the semi-join flattening
+
+  DESCRIPTION
+    Try to pull tables out of semi-join nests.
+     
+    PRECONDITIONS
+    When this function is called, the join may have several semi-join nests
+    but it is guaranteed that one semi-join nest does not contain another.
+   
+    ACTION
+    A table can be pulled out of the semi-join nest if
+     - It is a constant table, or
+     - It is accessed via eq_ref(outer_tables)
+
+    POSTCONDITIONS
+     * Tables that were pulled out have JOIN_TAB::emb_sj_nest == NULL
+     * Tables that were not pulled out have JOIN_TAB::emb_sj_nest pointing 
+       to semi-join nest they are in.
+     * Semi-join nests' TABLE_LIST::sj_inner_tables is updated accordingly
+
+    This operation is (and should be) performed at each PS execution since
+    tables may become/cease to be constant across PS reexecutions.
+    
+  NOTE
+    Table pullout may make uncorrelated subquery correlated. Consider this
+    example:
+    
+     ... WHERE oe IN (SELECT it1.primary_key WHERE p(it1, it2) ... ) 
+    
+    here table it1 can be pulled out (we have it1.primary_key=oe which gives
+    us functional dependency). Once it1 is pulled out, all references to it1
+    from p(it1, it2) become references to outside of the subquery and thus
+    make the subquery (i.e. its semi-join nest) correlated.
+    Making the subquery (i.e. its semi-join nest) correlated prevents us from
+    using Materialization or LooseScan to execute it. 
+
+  RETURN 
+    0 - OK
+    1 - Out of memory error
+*/
+
+int pull_out_semijoin_tables(JOIN *join)
+{
+  TABLE_LIST *sj_nest;
+  DBUG_ENTER("pull_out_semijoin_tables");
+  List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests);
+   
+  /* Try pulling out of the each of the semi-joins */
+  while ((sj_nest= sj_list_it++))
+  {
+    /* Action #1: Mark the constant tables to be pulled out */
+    table_map pulled_tables= 0;
+    List_iterator<TABLE_LIST> child_li(sj_nest->nested_join->join_list);
+    TABLE_LIST *tbl;
+    while ((tbl= child_li++))
+    {
+      if (tbl->table)
+      {
+        tbl->table->reginfo.join_tab->emb_sj_nest= sj_nest;
+#if 0 
+        /* 
+          Do not pull out tables because they are constant. This operation has
+          a problem:
+          - Some constant tables may become/cease to be constant across PS
+            re-executions
+          - Contrary to our initial assumption, it turned out that table pullout 
+            operation is not easily undoable.
+
+          The solution is to leave constant tables where they are. This will
+          affect only constant tables that are 1-row or empty, tables that are
+          constant because they are accessed via eq_ref(const) access will
+          still be pulled out as functionally-dependent.
+
+          This will cause us to miss the chance to flatten some of the 
+          subqueries, but since const tables do not generate many duplicates,
+          it really doesn't matter that much whether they were pulled out or
+          not.
+
+          All of this was done as fix for BUG#43768.
+        */
+        if (tbl->table->map & join->const_table_map)
+        {
+          pulled_tables |= tbl->table->map;
+          DBUG_PRINT("info", ("Table %s pulled out (reason: constant)",
+                              tbl->table->alias));
+        }
+#endif
+      }
+    }
+    
+    /*
+      Action #2: Find which tables we can pull out based on
+      update_ref_and_keys() data. Note that pulling one table out can allow
+      us to pull out some other tables too.
+    */
+    bool pulled_a_table;
+    do 
+    {
+      pulled_a_table= FALSE;
+      child_li.rewind();
+      while ((tbl= child_li++))
+      {
+        if (tbl->table && !(pulled_tables & tbl->table->map))
+        {
+          if (find_eq_ref_candidate(tbl->table, 
+                                    sj_nest->nested_join->used_tables & 
+                                    ~pulled_tables))
+          {
+            pulled_a_table= TRUE;
+            pulled_tables |= tbl->table->map;
+            DBUG_PRINT("info", ("Table %s pulled out (reason: func dep)",
+                                tbl->table->alias));
+            /*
+              Pulling a table out of uncorrelated subquery in general makes
+              makes it correlated. See the NOTE to this funtion. 
+            */
+            sj_nest->sj_subq_pred->is_correlated= TRUE;
+            sj_nest->nested_join->sj_corr_tables|= tbl->table->map;
+            sj_nest->nested_join->sj_depends_on|= tbl->table->map;
+          }
+        }
+      }
+    } while (pulled_a_table);
+ 
+    child_li.rewind();
+    /*
+      Action #3: Move the pulled out TABLE_LIST elements to the parents.
+    */
+    table_map inner_tables= sj_nest->nested_join->used_tables & 
+                            ~pulled_tables;
+    /* Record the bitmap of inner tables */
+    sj_nest->sj_inner_tables= inner_tables;
+    if (pulled_tables)
+    {
+      List<TABLE_LIST> *upper_join_list= (sj_nest->embedding != NULL)?
+                                           (&sj_nest->embedding->nested_join->join_list): 
+                                           (&join->select_lex->top_join_list);
+      Query_arena *arena, backup;
+      arena= join->thd->activate_stmt_arena_if_needed(&backup);
+      while ((tbl= child_li++))
+      {
+        if (tbl->table)
+        {
+          if (inner_tables & tbl->table->map)
+          {
+            /* This table is not pulled out */
+            tbl->table->reginfo.join_tab->emb_sj_nest= sj_nest;
+          }
+          else
+          {
+            /* This table has been pulled out of the semi-join nest */
+            tbl->table->reginfo.join_tab->emb_sj_nest= NULL;
+            /*
+              Pull the table up in the same way as simplify_joins() does:
+              update join_list and embedding pointers but keep next[_local]
+              pointers.
+            */
+            child_li.remove();
+            sj_nest->nested_join->used_tables &= ~tbl->table->map;
+            upper_join_list->push_back(tbl);
+            tbl->join_list= upper_join_list;
+            tbl->embedding= sj_nest->embedding;
+          }
+        }
+      }
+
+      /* Remove the sj-nest itself if we've removed everything from it */
+      if (!inner_tables)
+      {
+        List_iterator<TABLE_LIST> li(*upper_join_list);
+        /* Find the sj_nest in the list. */
+        while (sj_nest != li++) ;
+        li.remove();
+        /* Also remove it from the list of SJ-nests: */
+        sj_list_it.remove();
+      }
+
+      if (arena)
+        join->thd->restore_active_arena(arena, &backup);
+    }
+  }
+  DBUG_RETURN(0);
+}
+
+
+/* 
+  Optimize semi-join nests that could be run with sj-materialization
+
+  SYNOPSIS
+    optimize_semijoin_nests()
+      join           The join to optimize semi-join nests for
+      all_table_map  Bitmap of all tables in the join
+
+  DESCRIPTION
+    Optimize each of the semi-join nests that can be run with
+    materialization. For each of the nests, we
+     - Generate the best join order for this "sub-join" and remember it;
+     - Remember the sub-join execution cost (it's part of materialization
+       cost);
+     - Calculate other costs that will be incurred if we decide 
+       to use materialization strategy for this semi-join nest.
+
+    All obtained information is saved and will be used by the main join
+    optimization pass.
+
+  RETURN
+    FALSE  Ok 
+    TRUE   Out of memory error
+*/
+
+bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
+{
+  DBUG_ENTER("optimize_semijoin_nests");
+  List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests);
+  TABLE_LIST *sj_nest;
+  while ((sj_nest= sj_list_it++))
+  {
+    /* semi-join nests with only constant tables are not valid */
+   /// DBUG_ASSERT(sj_nest->sj_inner_tables & ~join->const_table_map);
+
+    sj_nest->sj_mat_info= NULL;
+    /*
+      The statement may have been executed with 'semijoin=on' earlier.
+      We need to verify that 'semijoin=on' still holds.
+     */
+    if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_SEMIJOIN) &&
+        optimizer_flag(join->thd, OPTIMIZER_SWITCH_MATERIALIZATION))
+    {
+      if ((sj_nest->sj_inner_tables  & ~join->const_table_map) && /* not everything was pulled out */
+          !sj_nest->sj_subq_pred->is_correlated && 
+           sj_nest->sj_subq_pred->types_allow_materialization)
+      {
+        join->emb_sjm_nest= sj_nest;
+        if (choose_plan(join, all_table_map))
+          DBUG_RETURN(TRUE); /* purecov: inspected */
+        /*
+          The best plan to run the subquery is now in join->best_positions,
+          save it.
+        */
+        uint n_tables= my_count_bits(sj_nest->sj_inner_tables & ~join->const_table_map);
+        SJ_MATERIALIZATION_INFO* sjm;
+        if (!(sjm= new SJ_MATERIALIZATION_INFO) ||
+            !(sjm->positions= (POSITION*)join->thd->alloc(sizeof(POSITION)*
+                                                          n_tables)))
+          DBUG_RETURN(TRUE); /* purecov: inspected */
+        sjm->tables= n_tables;
+        sjm->is_used= FALSE;
+        double subjoin_out_rows, subjoin_read_time;
+        get_partial_join_cost(join, n_tables,
+                              &subjoin_read_time, &subjoin_out_rows);
+
+        sjm->materialization_cost.convert_from_cost(subjoin_read_time);
+        sjm->rows= subjoin_out_rows;
+
+        List<Item> &right_expr_list= 
+          sj_nest->sj_subq_pred->unit->first_select()->item_list;
+        /*
+          Adjust output cardinality estimates. If the subquery has form
+
+           ... oe IN (SELECT t1.colX, t2.colY, func(X,Y,Z) )
+
+           then the number of distinct output record combinations has an
+           upper bound of product of number of records matching the tables 
+           that are used by the SELECT clause.
+           TODO:
+             We can get a more precise estimate if we
+              - use rec_per_key cardinality estimates. For simple cases like 
+                "oe IN (SELECT t.key ...)" it is trivial. 
+              - Functional dependencies between the tables in the semi-join
+                nest (the payoff is probably less here?)
+        */
+        {
+          for (uint i=0 ; i < join->const_tables + sjm->tables ; i++)
+          {
+            JOIN_TAB *tab= join->best_positions[i].table;
+            join->map2table[tab->table->tablenr]= tab;
+          }
+          List_iterator<Item> it(right_expr_list);
+          Item *item;
+          table_map map= 0;
+          while ((item= it++))
+            map |= item->used_tables();
+          map= map & ~PSEUDO_TABLE_BITS;
+          Table_map_iterator tm_it(map);
+          int tableno;
+          double rows= 1.0;
+          while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
+            rows *= join->map2table[tableno]->table->quick_condition_rows;
+          sjm->rows= min(sjm->rows, rows);
+        }
+        memcpy(sjm->positions, join->best_positions + join->const_tables, 
+               sizeof(POSITION) * n_tables);
+
+        /*
+          Calculate temporary table parameters and usage costs
+        */
+        uint rowlen= get_tmp_table_rec_length(right_expr_list);
+        double lookup_cost;
+        if (rowlen * subjoin_out_rows< join->thd->variables.max_heap_table_size)
+          lookup_cost= HEAP_TEMPTABLE_LOOKUP_COST;
+        else
+          lookup_cost= DISK_TEMPTABLE_LOOKUP_COST;
+
+        /*
+          Let materialization cost include the cost to write the data into the
+          temporary table:
+        */ 
+        sjm->materialization_cost.add_io(subjoin_out_rows, lookup_cost);
+        
+        /*
+          Set the cost to do a full scan of the temptable (will need this to 
+          consider doing sjm-scan):
+        */ 
+        sjm->scan_cost.zero();
+        sjm->scan_cost.add_io(sjm->rows, lookup_cost);
+
+        sjm->lookup_cost.convert_from_cost(lookup_cost);
+        sj_nest->sj_mat_info= sjm;
+        DBUG_EXECUTE("opt", print_sjm(sjm););
+      }
+    }
+  }
+  join->emb_sjm_nest= NULL;
+  DBUG_RETURN(FALSE);
+}
+
+/*
+  Get estimated record length for semi-join materialization temptable
+  
+  SYNOPSIS
+    get_tmp_table_rec_length()
+      items  IN subquery's select list.
+
+  DESCRIPTION
+    Calculate estimated record length for semi-join materialization
+    temptable. It's an estimate because we don't follow every bit of
+    create_tmp_table()'s logic. This isn't necessary as the return value of
+    this function is used only for cost calculations.
+
+  RETURN
+    Length of the temptable record, in bytes
+*/
+
+static uint get_tmp_table_rec_length(List<Item> &items)
+{
+  uint len= 0;
+  Item *item;
+  List_iterator<Item> it(items);
+  while ((item= it++))
+  {
+    switch (item->result_type()) {
+    case REAL_RESULT:
+      len += sizeof(double);
+      break;
+    case INT_RESULT:
+      if (item->max_length >= (MY_INT32_NUM_DECIMAL_DIGITS - 1))
+        len += 8;
+      else
+        len += 4;
+      break;
+    case STRING_RESULT:
+      enum enum_field_types type;
+      /* DATE/TIME and GEOMETRY fields have STRING_RESULT result type.  */
+      if ((type= item->field_type()) == MYSQL_TYPE_DATETIME ||
+          type == MYSQL_TYPE_TIME || type == MYSQL_TYPE_DATE ||
+          type == MYSQL_TYPE_TIMESTAMP || type == MYSQL_TYPE_GEOMETRY)
+        len += 8;
+      else
+        len += item->max_length;
+      break;
+    case DECIMAL_RESULT:
+      len += 10;
+      break;
+    case ROW_RESULT:
+    default:
+      DBUG_ASSERT(0); /* purecov: deadcode */
+      break;
+    }
+  }
+  return len;
+}
+
+//psergey-todo: is the below a kind of table elimination??
+/*
+  Check if table's KEYUSE elements have an eq_ref(outer_tables) candidate
+
+  SYNOPSIS
+    find_eq_ref_candidate()
+      table             Table to be checked
+      sj_inner_tables   Bitmap of inner tables. eq_ref(inner_table) doesn't
+                        count.
+
+  DESCRIPTION
+    Check if table's KEYUSE elements have an eq_ref(outer_tables) candidate
+
+  TODO
+    Check again if it is feasible to factor common parts with constant table
+    search
+
+  RETURN
+    TRUE  - There exists an eq_ref(outer-tables) candidate
+    FALSE - Otherwise
+*/
+
+bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables)
+{
+  KEYUSE *keyuse= table->reginfo.join_tab->keyuse;
+  uint key;
+
+  if (keyuse)
+  {
+    while (1) /* For each key */
+    {
+      key= keyuse->key;
+      KEY *keyinfo= table->key_info + key;
+      key_part_map bound_parts= 0;
+      if (keyinfo->flags & HA_NOSAME)
+      {
+        do  /* For all equalities on all key parts */
+        {
+          /* Check if this is "t.keypart = expr(outer_tables) */
+          if (!(keyuse->used_tables & sj_inner_tables) &&
+              !(keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL))
+          {
+            bound_parts |= 1 << keyuse->keypart;
+          }
+          keyuse++;
+        } while (keyuse->key == key && keyuse->table == table);
+
+        if (bound_parts == PREV_BITS(uint, keyinfo->key_parts))
+          return TRUE;
+        if (keyuse->table != table)
+          return FALSE;
+      }
+      else
+      {
+        do
+        {
+          keyuse++;
+          if (keyuse->table != table)
+            return FALSE;
+        }
+        while (keyuse->key == key);
+      }
+    }
+  }
+  return FALSE;
+}
+
+/*
+  Do semi-join optimization step after we've added a new tab to join prefix
+
+  SYNOPSIS
+    advance_sj_state()
+      join                        The join we're optimizing
+      remaining_tables            Tables not in the join prefix
+      new_join_tab                Join tab we've just added to the join prefix
+      idx                         Index of this join tab (i.e. number of tables
+                                  in the prefix minus one)
+      current_record_count INOUT  Estimate of #records in join prefix's output
+      current_read_time    INOUT  Cost to execute the join prefix
+      loose_scan_pos       IN     A POSITION with LooseScan plan to access 
+                                  table new_join_tab
+                                  (produced by the last best_access_path call)
+
+  DESCRIPTION
+    Update semi-join optimization state after we've added another tab (table 
+    and access method) to the join prefix.
+    
+    The state is maintained in join->positions[#prefix_size]. Each of the
+    available strategies has its own state variables.
+    
+    for each semi-join strategy
+    {
+      update strategy's state variables;
+
+      if (join prefix has all the tables that are needed to consider
+          using this strategy for the semi-join(s))
+      {
+        calculate cost of using the strategy
+        if ((this is the first strategy to handle the semi-join nest(s)  ||
+            the cost is less than other strategies))
+        {
+          // Pick this strategy
+          pos->sj_strategy= ..
+          ..
+        }
+      }
+
+    Most of the new state is saved join->positions[idx] (and hence no undo
+    is necessary). Several members of class JOIN are updated also, these
+    changes can be rolled back with restore_prev_sj_state().
+
+    See setup_semijoin_dups_elimination() for a description of what kinds of
+    join prefixes each strategy can handle.
+*/
+
+void advance_sj_state(JOIN *join, table_map remaining_tables, 
+                      const JOIN_TAB *new_join_tab, uint idx, 
+                      double *current_record_count, double *current_read_time, 
+                      POSITION *loose_scan_pos)
+{
+  TABLE_LIST *emb_sj_nest;
+  POSITION *pos= join->positions + idx;
+  remaining_tables &= ~new_join_tab->table->map;
+
+  pos->prefix_cost.convert_from_cost(*current_read_time);
+  pos->prefix_record_count= *current_record_count;
+  pos->sj_strategy= SJ_OPT_NONE;
+  
+  /* Initialize the state or copy it from prev. tables */
+  if (idx == join->const_tables)
+  {
+    pos->first_firstmatch_table= MAX_TABLES;
+    pos->first_loosescan_table= MAX_TABLES; 
+    pos->dupsweedout_tables= 0;
+    pos->sjm_scan_need_tables= 0;
+    LINT_INIT(pos->sjm_scan_last_inner);
+  }
+  else
+  {
+    // FirstMatch
+    pos->first_firstmatch_table=
+      (pos[-1].sj_strategy == SJ_OPT_FIRST_MATCH) ?
+      MAX_TABLES : pos[-1].first_firstmatch_table;
+    pos->first_firstmatch_rtbl= pos[-1].first_firstmatch_rtbl;
+    pos->firstmatch_need_tables= pos[-1].firstmatch_need_tables;
+
+    // LooseScan
+    pos->first_loosescan_table=
+      (pos[-1].sj_strategy == SJ_OPT_LOOSE_SCAN) ?
+      MAX_TABLES : pos[-1].first_loosescan_table;
+    pos->loosescan_need_tables= pos[-1].loosescan_need_tables;
+
+    // SJ-Materialization Scan
+    pos->sjm_scan_need_tables=
+      (pos[-1].sj_strategy == SJ_OPT_MATERIALIZE_SCAN) ?
+      0 : pos[-1].sjm_scan_need_tables;
+    pos->sjm_scan_last_inner= pos[-1].sjm_scan_last_inner;
+
+    // Duplicate Weedout
+    pos->dupsweedout_tables=      pos[-1].dupsweedout_tables;
+    pos->first_dupsweedout_table= pos[-1].first_dupsweedout_table;
+  }
+  
+  table_map handled_by_fm_or_ls= 0;
+  /* FirstMatch Strategy */
+  if (new_join_tab->emb_sj_nest &&
+      optimizer_flag(join->thd, OPTIMIZER_SWITCH_FIRSTMATCH))
+  {
+    const table_map outer_corr_tables=
+      new_join_tab->emb_sj_nest->nested_join->sj_corr_tables |
+      new_join_tab->emb_sj_nest->nested_join->sj_depends_on;
+    const table_map sj_inner_tables=
+      new_join_tab->emb_sj_nest->sj_inner_tables & ~join->const_table_map;
+
+    /* 
+      Enter condition:
+       1. The next join tab belongs to semi-join nest
+          (verified for the encompassing code block above).
+       2. We're not in a duplicate producer range yet
+       3. All outer tables that
+           - the subquery is correlated with, or
+           - referred to from the outer_expr 
+          are in the join prefix
+       4. All inner tables are still part of remaining_tables.
+    */
+    if (!join->cur_sj_inner_tables &&              // (2)
+        !(remaining_tables & outer_corr_tables) && // (3)
+        (sj_inner_tables ==                        // (4)
+         ((remaining_tables | new_join_tab->table->map) & sj_inner_tables)))
+    {
+      /* Start tracking potential FirstMatch range */
+      pos->first_firstmatch_table= idx;
+      pos->firstmatch_need_tables= sj_inner_tables;
+      pos->first_firstmatch_rtbl= remaining_tables;
+    }
+
+    if (pos->first_firstmatch_table != MAX_TABLES)
+    {
+      if (outer_corr_tables & pos->first_firstmatch_rtbl)
+      {
+        /*
+          Trying to add an sj-inner table whose sj-nest has an outer correlated 
+          table that was not in the prefix. This means FirstMatch can't be used.
+        */
+        pos->first_firstmatch_table= MAX_TABLES;
+      }
+      else
+      {
+        /* Record that we need all of this semi-join's inner tables, too */
+        pos->firstmatch_need_tables|= sj_inner_tables;
+      }
+    
+      if (!(pos->firstmatch_need_tables & remaining_tables))
+      {
+        /*
+          Got a complete FirstMatch range.
+            Calculate correct costs and fanout
+        */
+        double reopt_cost, reopt_rec_count, sj_inner_fanout;
+        optimize_wo_join_buffering(join, pos->first_firstmatch_table, idx,
+                                   remaining_tables, FALSE, idx,
+                                   &reopt_rec_count, &reopt_cost, 
+                                   &sj_inner_fanout);
+        /*
+          We don't yet know what are the other strategies, so pick the
+          FirstMatch.
+
+          We ought to save the alternate POSITIONs produced by
+          optimize_wo_join_buffering but the problem is that providing save
+          space uses too much space. Instead, we will re-calculate the
+          alternate POSITIONs after we've picked the best QEP.
+        */
+        pos->sj_strategy= SJ_OPT_FIRST_MATCH;
+        *current_read_time=    reopt_cost;
+        *current_record_count= reopt_rec_count / sj_inner_fanout;
+        handled_by_fm_or_ls=  pos->firstmatch_need_tables;
+      }
+    }
+  }
+
+  /* LooseScan Strategy */
+  {
+    POSITION *first=join->positions+pos->first_loosescan_table; 
+    /* 
+      LooseScan strategy can't handle interleaving between tables from the 
+      semi-join that LooseScan is handling and any other tables.
+
+      If we were considering LooseScan for the join prefix (1)
+         and the table we're adding creates an interleaving (2)
+      then 
+         stop considering loose scan
+    */
+    if ((pos->first_loosescan_table != MAX_TABLES) &&   // (1)
+        (first->table->emb_sj_nest->sj_inner_tables & remaining_tables) && //(2)
+        new_join_tab->emb_sj_nest != first->table->emb_sj_nest) //(2)
+    {
+      pos->first_loosescan_table= MAX_TABLES;
+    }
+
+    /*
+      If we got an option to use LooseScan for the current table, start
+      considering using LooseScan strategy
+    */
+    if (loose_scan_pos->read_time != DBL_MAX)
+    {
+      pos->first_loosescan_table= idx;
+      pos->loosescan_need_tables=
+        new_join_tab->emb_sj_nest->sj_inner_tables | 
+        new_join_tab->emb_sj_nest->nested_join->sj_depends_on |
+        new_join_tab->emb_sj_nest->nested_join->sj_corr_tables;
+    }
+    
+    if ((pos->first_loosescan_table != MAX_TABLES) && 
+        !(remaining_tables & pos->loosescan_need_tables))
+    {
+      /* 
+        Ok we have LooseScan plan and also have all LooseScan sj-nest's
+        inner tables and outer correlated tables into the prefix.
+      */
+
+      first=join->positions + pos->first_loosescan_table; 
+      uint n_tables= my_count_bits(first->table->emb_sj_nest->sj_inner_tables);
+      /* Got a complete LooseScan range. Calculate its cost */
+      double reopt_cost, reopt_rec_count, sj_inner_fanout;
+      /*
+        The same problem as with FirstMatch - we need to save POSITIONs
+        somewhere but reserving space for all cases would require too
+        much space. We will re-calculate POSITION structures later on. 
+      */
+      optimize_wo_join_buffering(join, pos->first_loosescan_table, idx,
+                                 remaining_tables, 
+                                 TRUE,  //first_alt
+                                 pos->first_loosescan_table + n_tables,
+                                 &reopt_rec_count, 
+                                 &reopt_cost, &sj_inner_fanout);
+      /*
+        We don't yet have any other strategies that could handle this
+        semi-join nest (the other options are Duplicate Elimination or
+        Materialization, which need at least the same set of tables in 
+        the join prefix to be considered) so unconditionally pick the 
+        LooseScan.
+      */
+      pos->sj_strategy= SJ_OPT_LOOSE_SCAN;
+      *current_read_time=    reopt_cost;
+      *current_record_count= reopt_rec_count / sj_inner_fanout;
+      handled_by_fm_or_ls= first->table->emb_sj_nest->sj_inner_tables;
+    }
+  }
+
+  /* 
+    Update join->cur_sj_inner_tables (Used by FirstMatch in this function and
+    LooseScan detector in best_access_path)
+  */
+  if ((emb_sj_nest= new_join_tab->emb_sj_nest))
+  {
+    join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables;
+    join->cur_dups_producing_tables |= emb_sj_nest->sj_inner_tables;
+
+    /* Remove the sj_nest if all of its SJ-inner tables are in cur_table_map */
+    if (!(remaining_tables &
+          emb_sj_nest->sj_inner_tables & ~new_join_tab->table->map))
+      join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables;
+  }
+  join->cur_dups_producing_tables &= ~handled_by_fm_or_ls;
+
+  /* 4. SJ-Materialization and SJ-Materialization-scan strategy handler */
+  bool sjm_scan;
+  SJ_MATERIALIZATION_INFO *mat_info;
+  if ((mat_info= at_sjmat_pos(join, remaining_tables,
+                              new_join_tab, idx, &sjm_scan)))
+  {
+    if (sjm_scan)
+    {
+      /*
+        We can't yet evaluate this option yet. This is because we can't
+        accout for fanout of sj-inner tables yet:
+
+          ntX  SJM-SCAN(it1 ... itN) | ot1 ... otN  |
+                                     ^(1)           ^(2)
+
+        we're now at position (1). SJM temptable in general has multiple
+        records, so at point (1) we'll get the fanout from sj-inner tables (ie
+        there will be multiple record combinations).
+
+        The final join result will not contain any semi-join produced
+        fanout, i.e. tables within SJM-SCAN(...) will not contribute to
+        the cardinality of the join output.  Extra fanout produced by 
+        SJM-SCAN(...) will be 'absorbed' into fanout produced by ot1 ...  otN.
+
+        The simple way to model this is to remove SJM-SCAN(...) fanout once
+        we reach the point #2.
+      */
+      pos->sjm_scan_need_tables=
+        new_join_tab->emb_sj_nest->sj_inner_tables | 
+        new_join_tab->emb_sj_nest->nested_join->sj_depends_on |
+        new_join_tab->emb_sj_nest->nested_join->sj_corr_tables;
+      pos->sjm_scan_last_inner= idx;
+    }
+    else
+    {
+      /* This is SJ-Materialization with lookups */
+      COST_VECT prefix_cost; 
+      signed int first_tab= (int)idx - mat_info->tables;
+      double prefix_rec_count;
+      if (first_tab < (int)join->const_tables)
+      {
+        prefix_cost.zero();
+        prefix_rec_count= 1.0;
+      }
+      else
+      {
+        prefix_cost= join->positions[first_tab].prefix_cost;
+        prefix_rec_count= join->positions[first_tab].prefix_record_count;
+      }
+
+      double mat_read_time= prefix_cost.total_cost();
+      mat_read_time += mat_info->materialization_cost.total_cost() +
+                       prefix_rec_count * mat_info->lookup_cost.total_cost();
+
+      if (mat_read_time < *current_read_time || join->cur_dups_producing_tables)
+      {
+        /*
+          NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION
+          elements to join->positions as that makes it hard to return things
+          back when making one step back in join optimization. That's done 
+          after the QEP has been chosen.
+        */
+        pos->sj_strategy= SJ_OPT_MATERIALIZE;
+        *current_read_time=    mat_read_time;
+        *current_record_count= prefix_rec_count;
+        join->cur_dups_producing_tables&=
+          ~new_join_tab->emb_sj_nest->sj_inner_tables;
+      }
+    }
+  }
+  
+  /* 4.A SJM-Scan second phase check */
+  if (pos->sjm_scan_need_tables && /* Have SJM-Scan prefix */
+      !(pos->sjm_scan_need_tables & remaining_tables))
+  {
+    TABLE_LIST *mat_nest= 
+      join->positions[pos->sjm_scan_last_inner].table->emb_sj_nest;
+    SJ_MATERIALIZATION_INFO *mat_info= mat_nest->sj_mat_info;
+
+    double prefix_cost;
+    double prefix_rec_count;
+    int first_tab= pos->sjm_scan_last_inner + 1 - mat_info->tables;
+    /* Get the prefix cost */
+    if (first_tab == (int)join->const_tables)
+    {
+      prefix_rec_count= 1.0;
+      prefix_cost= 0.0;
+    }
+    else
+    {
+      prefix_cost= join->positions[first_tab - 1].prefix_cost.total_cost();
+      prefix_rec_count= join->positions[first_tab - 1].prefix_record_count;
+    }
+
+    /* Add materialization cost */
+    prefix_cost += mat_info->materialization_cost.total_cost() +
+                   prefix_rec_count * mat_info->scan_cost.total_cost();
+    prefix_rec_count *= mat_info->rows;
+    
+    uint i;
+    table_map rem_tables= remaining_tables;
+    for (i= idx; i != (first_tab + mat_info->tables - 1); i--)
+      rem_tables |= join->positions[i].table->table->map;
+
+    POSITION curpos, dummy;
+    /* Need to re-run best-access-path as we prefix_rec_count has changed */
+    for (i= first_tab + mat_info->tables; i <= idx; i++)
+    {
+      best_access_path(join, join->positions[i].table, rem_tables, i, FALSE,
+                       prefix_rec_count, &curpos, &dummy);
+      prefix_rec_count *= curpos.records_read;
+      prefix_cost += curpos.read_time;
+    }
+
+    /*
+      Use the strategy if 
+       * it is cheaper then what we've had, or
+       * we haven't picked any other semi-join strategy yet
+      In the second case, we pick this strategy unconditionally because
+      comparing cost without semi-join duplicate removal with cost with
+      duplicate removal is not an apples-to-apples comparison.
+    */
+    if (prefix_cost < *current_read_time || join->cur_dups_producing_tables)
+    {
+      pos->sj_strategy= SJ_OPT_MATERIALIZE_SCAN;
+      *current_read_time=    prefix_cost;
+      *current_record_count= prefix_rec_count;
+      join->cur_dups_producing_tables&= ~mat_nest->sj_inner_tables;
+
+    }
+  }
+
+  /* 5. Duplicate Weedout strategy handler */
+  {
+    /* 
+       Duplicate weedout can be applied after all ON-correlated and 
+       correlated 
+    */
+    TABLE_LIST *nest;
+    if ((nest= new_join_tab->emb_sj_nest))
+    {
+      if (!pos->dupsweedout_tables)
+        pos->first_dupsweedout_table= idx;
+
+      pos->dupsweedout_tables |= nest->sj_inner_tables |
+                                 nest->nested_join->sj_depends_on |
+                                 nest->nested_join->sj_corr_tables;
+    }
+
+    if (pos->dupsweedout_tables && 
+        !(remaining_tables &
+          ~new_join_tab->table->map & pos->dupsweedout_tables))
+    {
+      /*
+        Ok, reached a state where we could put a dups weedout point.
+        Walk back and calculate
+          - the join cost (this is needed as the accumulated cost may assume 
+            some other duplicate elimination method)
+          - extra fanout that will be removed by duplicate elimination
+          - duplicate elimination cost
+        There are two cases:
+          1. We have other strategy/ies to remove all of the duplicates.
+          2. We don't.
+        
+        We need to calculate the cost in case #2 also because we need to make
+        choice between this join order and others.
+      */
+      uint first_tab= pos->first_dupsweedout_table;
+      double dups_cost;
+      double prefix_rec_count;
+      double sj_inner_fanout= 1.0;
+      double sj_outer_fanout= 1.0;
+      uint temptable_rec_size;
+      if (first_tab == join->const_tables)
+      {
+        prefix_rec_count= 1.0;
+        temptable_rec_size= 0;
+        dups_cost= 0.0;
+      }
+      else
+      {
+        dups_cost= join->positions[first_tab - 1].prefix_cost.total_cost();
+        prefix_rec_count= join->positions[first_tab - 1].prefix_record_count;
+        temptable_rec_size= 8; /* This is not true but we'll make it so */
+      }
+      
+      table_map dups_removed_fanout= 0;
+      for (uint j= pos->first_dupsweedout_table; j <= idx; j++)
+      {
+        POSITION *p= join->positions + j;
+        dups_cost += p->read_time;
+        if (p->table->emb_sj_nest)
+        {
+          sj_inner_fanout *= p->records_read;
+          dups_removed_fanout |= p->table->table->map;
+        }
+        else
+        {
+          sj_outer_fanout *= p->records_read;
+          temptable_rec_size += p->table->table->file->ref_length;
+        }
+      }
+
+      /*
+        Add the cost of temptable use. The table will have sj_outer_fanout
+        records, and we will make 
+        - sj_outer_fanout table writes
+        - sj_inner_fanout*sj_outer_fanout  lookups.
+
+      */
+      double one_lookup_cost;
+      if (sj_outer_fanout*temptable_rec_size > 
+          join->thd->variables.max_heap_table_size)
+        one_lookup_cost= DISK_TEMPTABLE_LOOKUP_COST;
+      else
+        one_lookup_cost= HEAP_TEMPTABLE_LOOKUP_COST;
+
+      double write_cost= join->positions[first_tab].prefix_record_count* 
+                         sj_outer_fanout * one_lookup_cost;
+      double full_lookup_cost= join->positions[first_tab].prefix_record_count* 
+                               sj_outer_fanout* sj_inner_fanout * 
+                               one_lookup_cost;
+      dups_cost += write_cost + full_lookup_cost;
+      
+      /*
+        Use the strategy if 
+         * it is cheaper then what we've had, or
+         * we haven't picked any other semi-join strategy yet
+        The second part is necessary because this strategy is the last one
+        to consider (it needs "the most" tables in the prefix) and we can't
+        leave duplicate-producing tables not handled by any strategy.
+      */
+      if (dups_cost < *current_read_time || join->cur_dups_producing_tables)
+      {
+        pos->sj_strategy= SJ_OPT_DUPS_WEEDOUT;
+        *current_read_time= dups_cost;
+        *current_record_count= *current_record_count / sj_inner_fanout;
+        join->cur_dups_producing_tables &= ~dups_removed_fanout;
+      }
+    }
+  }
+}
+
+
+/*
+  Remove the last join tab from from join->cur_sj_inner_tables bitmap
+  we assume remaining_tables doesnt contain @tab.
+*/
+
+void restore_prev_sj_state(const table_map remaining_tables, 
+                                  const JOIN_TAB *tab, uint idx)
+{
+  TABLE_LIST *emb_sj_nest;
+  if ((emb_sj_nest= tab->emb_sj_nest))
+  {
+    /* If we're removing the last SJ-inner table, remove the sj-nest */
+    if ((remaining_tables & emb_sj_nest->sj_inner_tables) == 
+        (emb_sj_nest->sj_inner_tables & ~tab->table->map))
+    {
+      tab->join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables;
+    }
+  }
+}
+
+
+/*
+  Given a semi-join nest, find out which of the IN-equalities are bound
+
+  SYNOPSIS
+    get_bound_sj_equalities()
+      sj_nest           Semi-join nest
+      remaining_tables  Tables that are not yet bound
+
+  DESCRIPTION
+    Given a semi-join nest, find out which of the IN-equalities have their
+    left part expression bound (i.e. the said expression doesn't refer to
+    any of remaining_tables and can be evaluated).
+
+  RETURN
+    Bitmap of bound IN-equalities.
+*/
+
+ulonglong get_bound_sj_equalities(TABLE_LIST *sj_nest, 
+                                  table_map remaining_tables)
+{
+  List_iterator<Item> li(sj_nest->nested_join->sj_outer_expr_list);
+  Item *item;
+  uint i= 0;
+  ulonglong res= 0;
+  while ((item= li++))
+  {
+    /*
+      Q: should this take into account equality propagation and how?
+      A: If e->outer_side is an Item_field, walk over the equality
+         class and see if there is an element that is bound?
+      (this is an optional feature)
+    */
+    if (!(item->used_tables() & remaining_tables))
+    {
+      res |= 1ULL << i;
+    }
+  }
+  return res;
+}
+
+
+/*
+  Check if the last tables of the partial join order allow to use
+  sj-materialization strategy for them
+
+  SYNOPSIS
+    at_sjmat_pos()
+      join              
+      remaining_tables
+      tab                the last table's join tab
+      idx                last table's index
+      loose_scan    OUT  TRUE <=> use LooseScan
+
+  RETURN
+    TRUE   Yes, can apply sj-materialization
+    FALSE  No, some of the requirements are not met
+*/
+
+static SJ_MATERIALIZATION_INFO *
+at_sjmat_pos(const JOIN *join, table_map remaining_tables, const JOIN_TAB *tab,
+             uint idx, bool *loose_scan)
+{
+  /*
+   Check if 
+    1. We're in a semi-join nest that can be run with SJ-materialization
+    2. All the tables correlated through the IN subquery are in the prefix
+  */
+  TABLE_LIST *emb_sj_nest= tab->emb_sj_nest;
+  table_map suffix= remaining_tables & ~tab->table->map;
+  if (emb_sj_nest && emb_sj_nest->sj_mat_info &&
+      !(suffix & emb_sj_nest->sj_inner_tables))
+  {
+    /* 
+      Walk back and check if all immediately preceding tables are from
+      this semi-join.
+    */
+    uint n_tables= my_count_bits(tab->emb_sj_nest->sj_inner_tables);
+    for (uint i= 1; i < n_tables ; i++)
+    {
+      if (join->positions[idx - i].table->emb_sj_nest != tab->emb_sj_nest)
+        return NULL;
+    }
+    *loose_scan= test(remaining_tables & ~tab->table->map &
+                             (emb_sj_nest->sj_inner_tables |
+                              emb_sj_nest->nested_join->sj_depends_on));
+    if (*loose_scan && !emb_sj_nest->sj_subq_pred->sjm_scan_allowed)
+      return NULL;
+    else
+      return emb_sj_nest->sj_mat_info;
+  }
+  return NULL;
+}
+
+
+
+/*
+  Fix semi-join strategies for the picked join order
+
+  SYNOPSIS
+    fix_semijoin_strategies_for_picked_join_order()
+      join  The join with the picked join order
+
+  DESCRIPTION
+    Fix semi-join strategies for the picked join order. This is a step that
+    needs to be done right after we have fixed the join order. What we do
+    here is switch join's semi-join strategy description from backward-based
+    to forwards based.
+    
+    When join optimization is in progress, we re-consider semi-join
+    strategies after we've added another table. Here's an illustration.
+    Suppose the join optimization is underway:
+
+    1) ot1  it1  it2 
+                 sjX  -- looking at (ot1, it1, it2) join prefix, we decide
+                         to use semi-join strategy sjX.
+
+    2) ot1  it1  it2  ot2 
+                 sjX  sjY -- Having added table ot2, we now may consider
+                             another semi-join strategy and decide to use a 
+                             different strategy sjY. Note that the record
+                             of sjX has remained under it2. That is
+                             necessary because we need to be able to get
+                             back to (ot1, it1, it2) join prefix.
+      what makes things even worse is that there are cases where the choice
+      of sjY changes the way we should access it2. 
+
+    3) [ot1  it1  it2  ot2  ot3]
+                  sjX  sjY  -- This means that after join optimization is
+                               finished, semi-join info should be read
+                               right-to-left (while nearly all plan refinement
+                               functions, EXPLAIN, etc proceed from left to 
+                               right)
+
+    This function does the needed reversal, making it possible to read the
+    join and semi-join order from left to right.
+*/    
+
+void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
+{
+  uint table_count=join->tables;
+  uint tablenr;
+  table_map remaining_tables= 0;
+  table_map handled_tabs= 0;
+  for (tablenr= table_count - 1 ; tablenr != join->const_tables - 1; tablenr--)
+  {
+    POSITION *pos= join->best_positions + tablenr;
+    JOIN_TAB *s= pos->table;
+    uint first;
+    LINT_INIT(first); // Set by every branch except SJ_OPT_NONE which doesn't use it
+
+    if ((handled_tabs & s->table->map) || pos->sj_strategy == SJ_OPT_NONE)
+    {
+      remaining_tables |= s->table->map;
+      continue;
+    }
+    
+    if (pos->sj_strategy == SJ_OPT_MATERIALIZE)
+    {
+      SJ_MATERIALIZATION_INFO *sjm= s->emb_sj_nest->sj_mat_info;
+      sjm->is_used= TRUE;
+      sjm->is_sj_scan= FALSE;
+      memcpy(pos - sjm->tables + 1, sjm->positions, 
+             sizeof(POSITION) * sjm->tables);
+      first= tablenr - sjm->tables + 1;
+      join->best_positions[first].n_sj_tables= sjm->tables;
+      join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE;
+    }
+    else if (pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN)
+    {
+      POSITION *first_inner= join->best_positions + pos->sjm_scan_last_inner;
+      SJ_MATERIALIZATION_INFO *sjm= first_inner->table->emb_sj_nest->sj_mat_info;
+      sjm->is_used= TRUE;
+      sjm->is_sj_scan= TRUE;
+      first= pos->sjm_scan_last_inner - sjm->tables + 1;
+      memcpy(join->best_positions + first, 
+             sjm->positions, sizeof(POSITION) * sjm->tables);
+      join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE_SCAN;
+      join->best_positions[first].n_sj_tables= sjm->tables;
+      /* 
+        Do what advance_sj_state did: re-run best_access_path for every table
+        in the [last_inner_table + 1; pos..) range
+      */
+      double prefix_rec_count;
+      /* Get the prefix record count */
+      if (first == join->const_tables)
+        prefix_rec_count= 1.0;
+      else
+        prefix_rec_count= join->best_positions[first-1].prefix_record_count;
+      
+      /* Add materialization record count*/
+      prefix_rec_count *= sjm->rows;
+      
+      uint i;
+      table_map rem_tables= remaining_tables;
+      for (i= tablenr; i != (first + sjm->tables - 1); i--)
+        rem_tables |= join->best_positions[i].table->table->map;
+
+      POSITION dummy;
+      join->cur_sj_inner_tables= 0;
+      for (i= first + sjm->tables; i <= tablenr; i++)
+      {
+        best_access_path(join, join->best_positions[i].table, rem_tables, i, FALSE,
+                         prefix_rec_count, join->best_positions + i, &dummy);
+        prefix_rec_count *= join->best_positions[i].records_read;
+        rem_tables &= ~join->best_positions[i].table->table->map;
+      }
+    }
+ 
+    if (pos->sj_strategy == SJ_OPT_FIRST_MATCH)
+    {
+      first= pos->first_firstmatch_table;
+      join->best_positions[first].sj_strategy= SJ_OPT_FIRST_MATCH;
+      join->best_positions[first].n_sj_tables= tablenr - first + 1;
+      POSITION dummy; // For loose scan paths
+      double record_count= (first== join->const_tables)? 1.0: 
+                           join->best_positions[tablenr - 1].prefix_record_count;
+      
+      table_map rem_tables= remaining_tables;
+      uint idx;
+      for (idx= first; idx <= tablenr; idx++)
+      {
+        rem_tables |= join->best_positions[idx].table->table->map;
+      }
+      /*
+        Re-run best_access_path to produce best access methods that do not use
+        join buffering
+      */ 
+      join->cur_sj_inner_tables= 0;
+      for (idx= first; idx <= tablenr; idx++)
+      {
+        if (join->best_positions[idx].use_join_buffer)
+        {
+           best_access_path(join, join->best_positions[idx].table, 
+                            rem_tables, idx, TRUE /* no jbuf */,
+                            record_count, join->best_positions + idx, &dummy);
+        }
+        record_count *= join->best_positions[idx].records_read;
+        rem_tables &= ~join->best_positions[idx].table->table->map;
+      }
+    }
+
+    if (pos->sj_strategy == SJ_OPT_LOOSE_SCAN) 
+    {
+      first= pos->first_loosescan_table;
+      POSITION *first_pos= join->best_positions + first;
+      POSITION loose_scan_pos; // For loose scan paths
+      double record_count= (first== join->const_tables)? 1.0: 
+                           join->best_positions[tablenr - 1].prefix_record_count;
+      
+      table_map rem_tables= remaining_tables;
+      uint idx;
+      for (idx= first; idx <= tablenr; idx++)
+        rem_tables |= join->best_positions[idx].table->table->map;
+      /*
+        Re-run best_access_path to produce best access methods that do not use
+        join buffering
+      */ 
+      join->cur_sj_inner_tables= 0;
+      for (idx= first; idx <= tablenr; idx++)
+      {
+        if (join->best_positions[idx].use_join_buffer || (idx == first))
+        {
+           best_access_path(join, join->best_positions[idx].table,
+                            rem_tables, idx, TRUE /* no jbuf */,
+                            record_count, join->best_positions + idx,
+                            &loose_scan_pos);
+           if (idx==first)
+             join->best_positions[idx]= loose_scan_pos;
+        }
+        rem_tables &= ~join->best_positions[idx].table->table->map;
+        record_count *= join->best_positions[idx].records_read;
+      }
+      first_pos->sj_strategy= SJ_OPT_LOOSE_SCAN;
+      first_pos->n_sj_tables= my_count_bits(first_pos->table->emb_sj_nest->sj_inner_tables);
+    }
+
+    if (pos->sj_strategy == SJ_OPT_DUPS_WEEDOUT)
+    {
+      /* 
+        Duplicate Weedout starting at pos->first_dupsweedout_table, ending at
+        this table.
+      */
+      first= pos->first_dupsweedout_table;
+      join->best_positions[first].sj_strategy= SJ_OPT_DUPS_WEEDOUT;
+      join->best_positions[first].n_sj_tables= tablenr - first + 1;
+    }
+    
+    uint i_end= first + join->best_positions[first].n_sj_tables;
+    for (uint i= first; i < i_end; i++)
+    {
+      if (i != first)
+        join->best_positions[i].sj_strategy= SJ_OPT_NONE;
+      handled_tabs |= join->best_positions[i].table->table->map;
+    }
+
+    if (tablenr != first)
+      pos->sj_strategy= SJ_OPT_NONE;
+    remaining_tables |= s->table->map;
+    //s->sj_strategy= pos->sj_strategy;
+    join->join_tab[first].sj_strategy= join->best_positions[first].sj_strategy;
+  }
+}
+
+/*
+  Setup semi-join materialization strategy for one semi-join nest
+  
+  SYNOPSIS
+
+  setup_sj_materialization()
+    tab  The first tab in the semi-join
+
+  DESCRIPTION
+    Setup execution structures for one semi-join materialization nest:
+    - Create the materialization temporary table
+    - If we're going to do index lookups
+        create TABLE_REF structure to make the lookus
+    - else (if we're going to do a full scan of the temptable)
+        create Copy_field structures to do copying.
+
+  RETURN
+    FALSE  Ok
+    TRUE   Error
+*/
+
+bool setup_sj_materialization(JOIN_TAB *tab)
+{
+  uint i;
+  DBUG_ENTER("setup_sj_materialization");
+  TABLE_LIST *emb_sj_nest= tab->table->pos_in_table_list->embedding;
+  SJ_MATERIALIZATION_INFO *sjm= emb_sj_nest->sj_mat_info;
+  THD *thd= tab->join->thd;
+  /* First the calls come to the materialization function */
+  List<Item> &item_list= emb_sj_nest->sj_subq_pred->unit->first_select()->item_list;
+
+  /* 
+    Set up the table to write to, do as select_union::create_result_table does
+  */
+  sjm->sjm_table_param.init();
+  sjm->sjm_table_param.field_count= item_list.elements;
+  sjm->sjm_table_param.bit_fields_as_long= TRUE;
+  List_iterator<Item> it(item_list);
+  Item *right_expr;
+  while((right_expr= it++))
+    sjm->sjm_table_cols.push_back(right_expr);
+
+  if (!(sjm->table= create_tmp_table(thd, &sjm->sjm_table_param, 
+                                     sjm->sjm_table_cols, (ORDER*) 0, 
+                                     TRUE /* distinct */, 
+                                     1, /*save_sum_fields*/
+                                     thd->options | TMP_TABLE_ALL_COLUMNS, 
+                                     HA_POS_ERROR /*rows_limit */, 
+                                     (char*)"sj-materialize")))
+    DBUG_RETURN(TRUE); /* purecov: inspected */
+  sjm->table->file->extra(HA_EXTRA_WRITE_CACHE);
+  sjm->table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
+  tab->join->sj_tmp_tables.push_back(sjm->table);
+  tab->join->sjm_info_list.push_back(sjm);
+  
+  sjm->materialized= FALSE;
+  if (!sjm->is_sj_scan)
+  {
+    KEY           *tmp_key; /* The only index on the temporary table. */
+    uint          tmp_key_parts; /* Number of keyparts in tmp_key. */
+    tmp_key= sjm->table->key_info;
+    tmp_key_parts= tmp_key->key_parts;
+    
+    /*
+      Create/initialize everything we will need to index lookups into the
+      temptable.
+    */
+    TABLE_REF *tab_ref;
+    if (!(tab_ref= (TABLE_REF*) thd->alloc(sizeof(TABLE_REF))))
+      DBUG_RETURN(TRUE); /* purecov: inspected */
+    tab_ref->key= 0; /* The only temp table index. */
+    tab_ref->key_length= tmp_key->key_length;
+    if (!(tab_ref->key_buff=
+          (uchar*) thd->calloc(ALIGN_SIZE(tmp_key->key_length) * 2)) ||
+        !(tab_ref->key_copy=
+          (store_key**) thd->alloc((sizeof(store_key*) *
+                                    (tmp_key_parts + 1)))) ||
+        !(tab_ref->items=
+          (Item**) thd->alloc(sizeof(Item*) * tmp_key_parts)))
+      DBUG_RETURN(TRUE); /* purecov: inspected */
+
+    tab_ref->key_buff2=tab_ref->key_buff+ALIGN_SIZE(tmp_key->key_length);
+    tab_ref->key_err=1;
+    tab_ref->null_rejecting= 1;
+    tab_ref->disable_cache= FALSE;
+
+    KEY_PART_INFO *cur_key_part= tmp_key->key_part;
+    store_key **ref_key= tab_ref->key_copy;
+    uchar *cur_ref_buff= tab_ref->key_buff;
+    
+    for (i= 0; i < tmp_key_parts; i++, cur_key_part++, ref_key++)
+    {
+      tab_ref->items[i]= emb_sj_nest->sj_subq_pred->left_expr->element_index(i);
+      int null_count= test(cur_key_part->field->real_maybe_null());
+      *ref_key= new store_key_item(thd, cur_key_part->field,
+                                   /* TODO:
+                                      the NULL byte is taken into account in
+                                      cur_key_part->store_length, so instead of
+                                      cur_ref_buff + test(maybe_null), we could
+                                      use that information instead.
+                                   */
+                                   cur_ref_buff + null_count,
+                                   null_count ? tab_ref->key_buff : 0,
+                                   cur_key_part->length, tab_ref->items[i],
+                                   FALSE);
+      cur_ref_buff+= cur_key_part->store_length;
+    }
+    *ref_key= NULL; /* End marker. */
+    tab_ref->key_err= 1;
+    tab_ref->key_parts= tmp_key_parts;
+    sjm->tab_ref= tab_ref;
+
+    /*
+      Remove the injected semi-join IN-equalities from join_tab conds. This
+      needs to be done because the IN-equalities refer to columns of
+      sj-inner tables which are not available after the materialization
+      has been finished.
+    */
+    for (i= 0; i < sjm->tables; i++)
+    {
+      remove_sj_conds(&tab[i].select_cond);
+      if (tab[i].select)
+        remove_sj_conds(&tab[i].select->cond);
+    }
+    if (!(sjm->in_equality= create_subq_in_equalities(thd, sjm,
+                                                      emb_sj_nest->sj_subq_pred)))
+      DBUG_RETURN(TRUE); /* purecov: inspected */
+  }
+  else
+  {
+    /*
+      We'll be doing full scan of the temptable.  
+      Setup copying of temptable columns back to the record buffers
+      for their source tables. We need this because IN-equalities
+      refer to the original tables.
+
+      EXAMPLE
+
+      Consider the query:
+        SELECT * FROM ot WHERE ot.col1 IN (SELECT it.col2 FROM it)
+      
+      Suppose it's executed with SJ-Materialization-scan. We choose to do scan
+      if we can't do the lookup, i.e. the join order is (it, ot). The plan
+      would look as follows:
+
+        table    access method      condition
+         it      materialize+scan    -
+         ot      (whatever)          ot1.col1=it.col2 (C2)
+
+      The condition C2 refers to current row of table it. The problem is
+      that by the time we evaluate C2, we would have finished with scanning
+      it itself and will be scanning the temptable. 
+
+      At the moment, our solution is to copy back: when we get the next
+      temptable record, we copy its columns to their corresponding columns
+      in the record buffers for the source tables. 
+    */
+    sjm->copy_field= new Copy_field[sjm->sjm_table_cols.elements];
+    it.rewind();
+    for (uint i=0; i < sjm->sjm_table_cols.elements; i++)
+    {
+      bool dummy;
+      Item_equal *item_eq;
+      Item *item= (it++)->real_item();
+      DBUG_ASSERT(item->type() == Item::FIELD_ITEM);
+      Field *copy_to= ((Item_field*)item)->field;
+      /*
+        Tricks with Item_equal are due to the following: suppose we have a
+        query:
+        
+        ... WHERE cond(ot.col) AND ot.col IN (SELECT it2.col FROM it1,it2
+                                               WHERE it1.col= it2.col)
+         then equality propagation will create an 
+         
+           Item_equal(it1.col, it2.col, ot.col) 
+         
+         then substitute_for_best_equal_field() will change the conditions
+         according to the join order:
+
+           it1
+           it2    it1.col=it2.col
+           ot     cond(it1.col)
+
+         although we've originally had "SELECT it2.col", conditions attached 
+         to subsequent outer tables will refer to it1.col, so SJM-Scan will
+         need to unpack data to there. 
+         That is, if an element from subquery's select list participates in 
+         equality propagation, then we need to unpack it to the first
+         element equality propagation member that refers to table that is
+         within the subquery.
+      */
+      item_eq= find_item_equal(tab->join->cond_equal, copy_to, &dummy);
+
+      if (item_eq)
+      {
+        List_iterator<Item_field> it(item_eq->fields);
+        Item_field *item;
+        while ((item= it++))
+        {
+          if (!(item->used_tables() & ~emb_sj_nest->sj_inner_tables))
+          {
+            copy_to= item->field;
+            break;
+          }
+        }
+      }
+      sjm->copy_field[i].set(copy_to, sjm->table->field[i], FALSE);
+      /* The write_set for source tables must be set up to allow the copying */
+      bitmap_set_bit(copy_to->table->write_set, copy_to->field_index);
+    }
+  }
+
+  DBUG_RETURN(FALSE);
+}
+
+
+
+/*
+  Create subquery IN-equalities assuming use of materialization strategy
+  
+  SYNOPSIS
+    create_subq_in_equalities()
+      thd        Thread handle
+      sjm        Semi-join materialization structure
+      subq_pred  The subquery predicate
+
+  DESCRIPTION
+    Create subquery IN-equality predicates. That is, for a subquery
+    
+      (oe1, oe2, ...) IN (SELECT ie1, ie2, ... FROM ...)
+    
+    create "oe1=ie1 AND ie1=ie2 AND ..." expression, such that ie1, ie2, ..
+    refer to the columns of the table that's used to materialize the
+    subquery.
+
+  RETURN 
+    Created condition
+*/
+
+static Item *create_subq_in_equalities(THD *thd, SJ_MATERIALIZATION_INFO *sjm, 
+                                Item_in_subselect *subq_pred)
+{
+  Item *res= NULL;
+  if (subq_pred->left_expr->cols() == 1)
+  {
+    if (!(res= new Item_func_eq(subq_pred->left_expr,
+                                new Item_field(sjm->table->field[0]))))
+      return NULL; /* purecov: inspected */
+  }
+  else
+  {
+    Item *conj;
+    for (uint i= 0; i < subq_pred->left_expr->cols(); i++)
+    {
+      if (!(conj= new Item_func_eq(subq_pred->left_expr->element_index(i), 
+                                   new Item_field(sjm->table->field[i]))) ||
+          !(res= and_items(res, conj)))
+        return NULL; /* purecov: inspected */
+    }
+  }
+  if (res->fix_fields(thd, &res))
+    return NULL; /* purecov: inspected */
+  return res;
+}
+
+
+
+
+static void remove_sj_conds(Item **tree)
+{
+  if (*tree)
+  {
+    if (is_cond_sj_in_equality(*tree))
+    {
+      *tree= NULL;
+      return;
+    }
+    else if ((*tree)->type() == Item::COND_ITEM) 
+    {
+      Item *item;
+      List_iterator<Item> li(*(((Item_cond*)*tree)->argument_list()));
+      while ((item= li++))
+      {
+        if (is_cond_sj_in_equality(item))
+          li.replace(new Item_int(1));
+      }
+    }
+  }
+}
+
+/* Check if given Item was injected by semi-join equality */
+static bool is_cond_sj_in_equality(Item *item)
+{
+  if (item->type() == Item::FUNC_ITEM &&
+      ((Item_func*)item)->functype()== Item_func::EQ_FUNC)
+  {
+    Item_func_eq *item_eq= (Item_func_eq*)item;
+    return test(item_eq->in_equality_no != UINT_MAX);
+  }
+  return FALSE;
+}
+
+
+/*
+  Create a temporary table to weed out duplicate rowid combinations
+
+  SYNOPSIS
+
+    create_duplicate_weedout_tmp_table()
+      thd                    Thread handle
+      uniq_tuple_length_arg  Length of the table's column
+      sjtbl                  Update sjtbl->[start_]recinfo values which 
+                             will be needed if we'll need to convert the 
+                             created temptable from HEAP to MyISAM/Maria.
+
+  DESCRIPTION
+    Create a temporary table to weed out duplicate rowid combinations. The
+    table has a single column that is a concatenation of all rowids in the
+    combination. 
+
+    Depending on the needed length, there are two cases:
+
+    1. When the length of the column < max_key_length:
+
+      CREATE TABLE tmp (col VARBINARY(n) NOT NULL, UNIQUE KEY(col));
+
+    2. Otherwise (not a valid SQL syntax but internally supported):
+
+      CREATE TABLE tmp (col VARBINARY NOT NULL, UNIQUE CONSTRAINT(col));
+
+    The code in this function was produced by extraction of relevant parts
+    from create_tmp_table().
+
+  RETURN
+    created table
+    NULL on error
+*/
+
+TABLE *create_duplicate_weedout_tmp_table(THD *thd, 
+                                          uint uniq_tuple_length_arg,
+                                          SJ_TMP_TABLE *sjtbl)
+{
+  MEM_ROOT *mem_root_save, own_root;
+  TABLE *table;
+  TABLE_SHARE *share;
+  uint  temp_pool_slot=MY_BIT_NONE;
+  char	*tmpname,path[FN_REFLEN];
+  Field **reg_field;
+  KEY_PART_INFO *key_part_info;
+  KEY *keyinfo;
+  uchar *group_buff;
+  uchar *bitmaps;
+  uint *blob_field;
+  ENGINE_COLUMNDEF *recinfo, *start_recinfo;
+  bool using_unique_constraint=FALSE;
+  bool use_packed_rows= FALSE;
+  Field *field, *key_field;
+  uint blob_count, null_pack_length, null_count;
+  uchar *null_flags;
+  uchar *pos;
+  DBUG_ENTER("create_duplicate_weedout_tmp_table");
+  DBUG_ASSERT(!sjtbl->is_degenerate);
+  /*
+    STEP 1: Get temporary table name
+  */
+  statistic_increment(thd->status_var.created_tmp_tables, &LOCK_status);
+  if (use_temp_pool && !(test_flags & TEST_KEEP_TMP_TABLES))
+    temp_pool_slot = bitmap_lock_set_next(&temp_pool);
+
+  if (temp_pool_slot != MY_BIT_NONE) // we got a slot
+    sprintf(path, "%s_%lx_%i", tmp_file_prefix,
+	    current_pid, temp_pool_slot);
+  else
+  {
+    /* if we run out of slots or we are not using tempool */
+    sprintf(path,"%s%lx_%lx_%x", tmp_file_prefix,current_pid,
+            thd->thread_id, thd->tmp_table++);
+  }
+  fn_format(path, path, mysql_tmpdir, "", MY_REPLACE_EXT|MY_UNPACK_FILENAME);
+
+  /* STEP 2: Figure if we'll be using a key or blob+constraint */
+  if (uniq_tuple_length_arg >= CONVERT_IF_BIGGER_TO_BLOB)
+    using_unique_constraint= TRUE;
+
+  /* STEP 3: Allocate memory for temptable description */
+  init_sql_alloc(&own_root, TABLE_ALLOC_BLOCK_SIZE, 0);
+  if (!multi_alloc_root(&own_root,
+                        &table, sizeof(*table),
+                        &share, sizeof(*share),
+                        &reg_field, sizeof(Field*) * (1+1),
+                        &blob_field, sizeof(uint)*2,
+                        &keyinfo, sizeof(*keyinfo),
+                        &key_part_info, sizeof(*key_part_info) * 2,
+                        &start_recinfo,
+                        sizeof(*recinfo)*(1*2+4),
+                        &tmpname, (uint) strlen(path)+1,
+                        &group_buff, (!using_unique_constraint ?
+                                      uniq_tuple_length_arg : 0),
+                        &bitmaps, bitmap_buffer_size(1)*3,
+                        NullS))
+  {
+    if (temp_pool_slot != MY_BIT_NONE)
+      bitmap_lock_clear_bit(&temp_pool, temp_pool_slot);
+    DBUG_RETURN(NULL);
+  }
+  strmov(tmpname,path);
+  
+
+  /* STEP 4: Create TABLE description */
+  bzero((char*) table,sizeof(*table));
+  bzero((char*) reg_field,sizeof(Field*)*2);
+
+  table->mem_root= own_root;
+  mem_root_save= thd->mem_root;
+  thd->mem_root= &table->mem_root;
+
+  table->field=reg_field;
+  table->alias= "weedout-tmp";
+  table->reginfo.lock_type=TL_WRITE;	/* Will be updated */
+  table->db_stat=HA_OPEN_KEYFILE+HA_OPEN_RNDFILE;
+  table->map=1;
+  table->temp_pool_slot = temp_pool_slot;
+  table->copy_blobs= 1;
+  table->in_use= thd;
+  table->quick_keys.init();
+  table->covering_keys.init();
+  table->keys_in_use_for_query.init();
+
+  table->s= share;
+  init_tmp_table_share(thd, share, "", 0, tmpname, tmpname);
+  share->blob_field= blob_field;
+  share->blob_ptr_size= portable_sizeof_char_ptr;
+  share->db_low_byte_first=1;                // True for HEAP and MyISAM
+  share->table_charset= NULL;
+  share->primary_key= MAX_KEY;               // Indicate no primary key
+  share->keys_for_keyread.init();
+  share->keys_in_use.init();
+
+  blob_count= 0;
+
+  /* Create the field */
+  {
+    /*
+      For the sake of uniformity, always use Field_varstring (altough we could
+      use Field_string for shorter keys)
+    */
+    field= new Field_varstring(uniq_tuple_length_arg, FALSE, "rowids", share,
+                               &my_charset_bin);
+    if (!field)
+      DBUG_RETURN(0);
+    field->table= table;
+    field->key_start.init(0);
+    field->part_of_key.init(0);
+    field->part_of_sortkey.init(0);
+    field->unireg_check= Field::NONE;
+    field->flags= (NOT_NULL_FLAG | BINARY_FLAG | NO_DEFAULT_VALUE_FLAG);
+    field->reset_fields();
+    field->init(table);
+    field->orig_table= NULL;
+     
+    field->field_index= 0;
+    
+    *(reg_field++)= field;
+    *blob_field= 0;
+    *reg_field= 0;
+
+    share->fields= 1;
+    share->blob_fields= 0;
+  }
+
+  uint reclength= field->pack_length();
+  if (using_unique_constraint)
+  { 
+    share->db_plugin= ha_lock_engine(0, TMP_ENGINE_HTON);
+    table->file= get_new_handler(share, &table->mem_root,
+                                 share->db_type());
+    DBUG_ASSERT(uniq_tuple_length_arg <= table->file->max_key_length());
+  }
+  else
+  {
+    share->db_plugin= ha_lock_engine(0, heap_hton);
+    table->file= get_new_handler(share, &table->mem_root,
+                                 share->db_type());
+  }
+  if (!table->file)
+    goto err;
+
+  null_count=1;
+  
+  null_pack_length= 1;
+  reclength += null_pack_length;
+
+  share->reclength= reclength;
+  {
+    uint alloc_length=ALIGN_SIZE(share->reclength + MI_UNIQUE_HASH_LENGTH+1);
+    share->rec_buff_length= alloc_length;
+    if (!(table->record[0]= (uchar*)
+                            alloc_root(&table->mem_root, alloc_length*3)))
+      goto err;
+    table->record[1]= table->record[0]+alloc_length;
+    share->default_values= table->record[1]+alloc_length;
+  }
+  setup_tmp_table_column_bitmaps(table, bitmaps);
+
+  recinfo= start_recinfo;
+  null_flags=(uchar*) table->record[0];
+  pos=table->record[0]+ null_pack_length;
+  if (null_pack_length)
+  {
+    bzero((uchar*) recinfo,sizeof(*recinfo));
+    recinfo->type=FIELD_NORMAL;
+    recinfo->length=null_pack_length;
+    recinfo++;
+    bfill(null_flags,null_pack_length,255);	// Set null fields
+
+    table->null_flags= (uchar*) table->record[0];
+    share->null_fields= null_count;
+    share->null_bytes= null_pack_length;
+  }
+  null_count=1;
+
+  {
+    //Field *field= *reg_field;
+    uint length;
+    bzero((uchar*) recinfo,sizeof(*recinfo));
+    field->move_field(pos,(uchar*) 0,0);
+
+    field->reset();
+    /*
+      Test if there is a default field value. The test for ->ptr is to skip
+      'offset' fields generated by initalize_tables
+    */
+    // Initialize the table field:
+    bzero(field->ptr, field->pack_length());
+
+    length=field->pack_length();
+    pos+= length;
+
+    /* Make entry for create table */
+    recinfo->length=length;
+    if (field->flags & BLOB_FLAG)
+      recinfo->type= FIELD_BLOB;
+    else if (use_packed_rows &&
+             field->real_type() == MYSQL_TYPE_STRING &&
+	     length >= MIN_STRING_LENGTH_TO_PACK_ROWS)
+      recinfo->type=FIELD_SKIP_ENDSPACE;
+    else
+      recinfo->type=FIELD_NORMAL;
+
+    field->table_name= &table->alias;
+  }
+
+  //param->recinfo=recinfo;
+  //store_record(table,s->default_values);        // Make empty default record
+
+  if (thd->variables.tmp_table_size == ~ (ulonglong) 0)		// No limit
+    share->max_rows= ~(ha_rows) 0;
+  else
+    share->max_rows= (ha_rows) (((share->db_type() == heap_hton) ?
+                                 min(thd->variables.tmp_table_size,
+                                     thd->variables.max_heap_table_size) :
+                                 thd->variables.tmp_table_size) /
+			         share->reclength);
+  set_if_bigger(share->max_rows,1);		// For dummy start options
+
+
+  //// keyinfo= param->keyinfo;
+  if (TRUE)
+  {
+    DBUG_PRINT("info",("Creating group key in temporary table"));
+    share->keys=1;
+    share->uniques= test(using_unique_constraint);
+    table->key_info=keyinfo;
+    keyinfo->key_part=key_part_info;
+    keyinfo->flags=HA_NOSAME;
+    keyinfo->usable_key_parts= keyinfo->key_parts= 1;
+    keyinfo->key_length=0;
+    keyinfo->rec_per_key=0;
+    keyinfo->algorithm= HA_KEY_ALG_UNDEF;
+    keyinfo->name= (char*) "weedout_key";
+    {
+      key_part_info->null_bit=0;
+      key_part_info->field=  field;
+      key_part_info->offset= field->offset(table->record[0]);
+      key_part_info->length= (uint16) field->key_length();
+      key_part_info->type=   (uint8) field->key_type();
+      key_part_info->key_type = FIELDFLAG_BINARY;
+      if (!using_unique_constraint)
+      {
+	if (!(key_field= field->new_key_field(thd->mem_root, table,
+                                              group_buff,
+                                              field->null_ptr,
+                                              field->null_bit)))
+	  goto err;
+        key_part_info->key_part_flag|= HA_END_SPACE_ARE_EQUAL; //todo need this?
+      }
+      keyinfo->key_length+=  key_part_info->length;
+    }
+  }
+
+  if (thd->is_fatal_error)				// If end of memory
+    goto err;
+  share->db_record_offset= 1;
+  if (share->db_type() == TMP_ENGINE_HTON)
+  {
+    recinfo++;
+    if (create_internal_tmp_table(table, keyinfo, start_recinfo, &recinfo, 0))
+      goto err;
+  }
+  sjtbl->start_recinfo= start_recinfo;
+  sjtbl->recinfo=       recinfo;
+  if (open_tmp_table(table))
+    goto err;
+
+  thd->mem_root= mem_root_save;
+  DBUG_RETURN(table);
+
+err:
+  thd->mem_root= mem_root_save;
+  free_tmp_table(thd,table);                    /* purecov: inspected */
+  if (temp_pool_slot != MY_BIT_NONE)
+    bitmap_lock_clear_bit(&temp_pool, temp_pool_slot);
+  DBUG_RETURN(NULL);				/* purecov: inspected */
+}
+
+
+/*
+  SemiJoinDuplicateElimination: Reset the temporary table
+*/
+
+int do_sj_reset(SJ_TMP_TABLE *sj_tbl)
+{
+  DBUG_ENTER("do_sj_reset");
+  if (sj_tbl->tmp_table)
+  {
+    int rc= sj_tbl->tmp_table->file->ha_delete_all_rows();
+    DBUG_RETURN(rc);
+  }
+  sj_tbl->have_degenerate_row= FALSE;
+  DBUG_RETURN(0);
+}
+
+/*
+  SemiJoinDuplicateElimination: Weed out duplicate row combinations
+
+  SYNPOSIS
+    do_sj_dups_weedout()
+      thd    Thread handle
+      sjtbl  Duplicate weedout table
+
+  DESCRIPTION
+    Try storing current record combination of outer tables (i.e. their
+    rowids) in the temporary table. This records the fact that we've seen 
+    this record combination and also tells us if we've seen it before.
+
+  RETURN
+    -1  Error
+    1   The row combination is a duplicate (discard it)
+    0   The row combination is not a duplicate (continue)
+*/
+
+int do_sj_dups_weedout(THD *thd, SJ_TMP_TABLE *sjtbl) 
+{
+  int error;
+  SJ_TMP_TABLE::TAB *tab= sjtbl->tabs;
+  SJ_TMP_TABLE::TAB *tab_end= sjtbl->tabs_end;
+  uchar *ptr;
+  uchar *nulls_ptr;
+
+  DBUG_ENTER("do_sj_dups_weedout");
+
+  if (sjtbl->is_degenerate)
+  {
+    if (sjtbl->have_degenerate_row) 
+      DBUG_RETURN(1);
+
+    sjtbl->have_degenerate_row= TRUE;
+    DBUG_RETURN(0);
+  }
+
+  ptr= sjtbl->tmp_table->record[0] + 1;
+  nulls_ptr= ptr;
+
+  /* Put the the rowids tuple into table->record[0]: */
+
+  // 1. Store the length 
+  if (((Field_varstring*)(sjtbl->tmp_table->field[0]))->length_bytes == 1)
+  {
+    *ptr= (uchar)(sjtbl->rowid_len + sjtbl->null_bytes);
+    ptr++;
+  }
+  else
+  {
+    int2store(ptr, sjtbl->rowid_len + sjtbl->null_bytes);
+    ptr += 2;
+  }
+
+  // 2. Zero the null bytes 
+  if (sjtbl->null_bytes)
+  {
+    bzero(ptr, sjtbl->null_bytes);
+    ptr += sjtbl->null_bytes; 
+  }
+
+  // 3. Put the rowids
+  for (uint i=0; tab != tab_end; tab++, i++)
+  {
+    handler *h= tab->join_tab->table->file;
+    if (tab->join_tab->table->maybe_null && tab->join_tab->table->null_row)
+    {
+      /* It's a NULL-complemented row */
+      *(nulls_ptr + tab->null_byte) |= tab->null_bit;
+      bzero(ptr + tab->rowid_offset, h->ref_length);
+    }
+    else
+    {
+      /* Copy the rowid value */
+      memcpy(ptr + tab->rowid_offset, h->ref, h->ref_length);
+    }
+  }
+
+  error= sjtbl->tmp_table->file->ha_write_row(sjtbl->tmp_table->record[0]);
+  if (error)
+  {
+    /* create_internal_tmp_table_from_heap will generate error if needed */
+    if (!sjtbl->tmp_table->file->is_fatal_error(error, HA_CHECK_DUP))
+      DBUG_RETURN(1); /* Duplicate */
+    if (create_internal_tmp_table_from_heap(thd, sjtbl->tmp_table,
+                                            sjtbl->start_recinfo,
+                                            &sjtbl->recinfo, error, 1))
+      DBUG_RETURN(-1);
+  }
+  DBUG_RETURN(0);
+}
+
+
+/*
+  Setup the strategies to eliminate semi-join duplicates.
+  
+  SYNOPSIS
+    setup_semijoin_dups_elimination()
+      join           Join to process
+      options        Join options (needed to see if join buffering will be 
+                     used or not)
+      no_jbuf_after  Another bit of information re where join buffering will
+                     be used.
+
+  DESCRIPTION
+    Setup the strategies to eliminate semi-join duplicates. ATM there are 4
+    strategies:
+
+    1. DuplicateWeedout (use of temptable to remove duplicates based on rowids
+                         of row combinations)
+    2. FirstMatch (pick only the 1st matching row combination of inner tables)
+    3. LooseScan (scanning the sj-inner table in a way that groups duplicates
+                  together and picking the 1st one)
+    4. SJ-Materialization.
+    
+    The join order has "duplicate-generating ranges", and every range is
+    served by one strategy or a combination of FirstMatch with with some
+    other strategy.
+    
+    "Duplicate-generating range" is defined as a range within the join order
+    that contains all of the inner tables of a semi-join. All ranges must be
+    disjoint, if tables of several semi-joins are interleaved, then the ranges
+    are joined together, which is equivalent to converting
+      SELECT ... WHERE oe1 IN (SELECT ie1 ...) AND oe2 IN (SELECT ie2 )
+    to
+      SELECT ... WHERE (oe1, oe2) IN (SELECT ie1, ie2 ... ...)
+    .
+
+    Applicability conditions are as follows:
+
+    DuplicateWeedout strategy
+    ~~~~~~~~~~~~~~~~~~~~~~~~~
+
+      (ot|nt)*  [ it ((it|ot|nt)* (it|ot))]  (nt)*
+      +------+  +=========================+  +---+
+        (1)                 (2)               (3)
+
+       (1) - Prefix of OuterTables (those that participate in 
+             IN-equality and/or are correlated with subquery) and outer 
+             Non-correlated tables.
+       (2) - The handled range. The range starts with the first sj-inner
+             table, and covers all sj-inner and outer tables 
+             Within the range,  Inner, Outer, outer non-correlated tables
+             may follow in any order.
+       (3) - The suffix of outer non-correlated tables.
+    
+    FirstMatch strategy
+    ~~~~~~~~~~~~~~~~~~~
+
+      (ot|nt)*  [ it ((it|nt)* it) ]  (nt)*
+      +------+  +==================+  +---+
+        (1)             (2)          (3)
+
+      (1) - Prefix of outer and non-correlated tables
+      (2) - The handled range, which may contain only inner and
+            non-correlated tables.
+      (3) - The suffix of outer non-correlated tables.
+
+    LooseScan strategy 
+    ~~~~~~~~~~~~~~~~~~
+
+     (ot|ct|nt) [ loosescan_tbl (ot|nt|it)* it ]  (ot|nt)*
+     +--------+   +===========+ +=============+   +------+
+        (1)           (2)          (3)              (4)
+     
+      (1) - Prefix that may contain any outer tables. The prefix must contain
+            all the non-trivially correlated outer tables. (non-trivially means
+            that the correlation is not just through the IN-equality).
+      
+      (2) - Inner table for which the LooseScan scan is performed.
+
+      (3) - The remainder of the duplicate-generating range. It is served by 
+            application of FirstMatch strategy, with the exception that
+            outer IN-correlated tables are considered to be non-correlated.
+
+      (4) - THe suffix of outer and outer non-correlated tables.
+
+  
+  The choice between the strategies is made by the join optimizer (see
+  advance_sj_state() and fix_semijoin_strategies_for_picked_join_order()).
+  This function sets up all fields/structures/etc needed for execution except
+  for setup/initialization of semi-join materialization which is done in 
+  setup_sj_materialization() (todo: can't we move that to here also?)
+
+  RETURN
+    FALSE  OK 
+    TRUE   Out of memory error
+*/
+
+int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, 
+                                    uint no_jbuf_after)
+{
+  uint i;
+  THD *thd= join->thd;
+  DBUG_ENTER("setup_semijoin_dups_elimination");
+
+  for (i= join->const_tables ; i < join->tables; )
+  {
+    JOIN_TAB *tab=join->join_tab + i;
+    POSITION *pos= join->best_positions + i;
+    uint keylen, keyno;
+    switch (pos->sj_strategy) {
+      case SJ_OPT_MATERIALIZE:
+      case SJ_OPT_MATERIALIZE_SCAN:
+        /* Do nothing */
+        i+= pos->n_sj_tables;
+        break;
+      case SJ_OPT_LOOSE_SCAN:
+      {
+        /* We jump from the last table to the first one */
+        tab->loosescan_match_tab= tab + pos->n_sj_tables - 1;
+
+        /* Calculate key length */
+        keylen= 0;
+        keyno= pos->loosescan_key;
+        for (uint kp=0; kp < pos->loosescan_parts; kp++)
+          keylen += tab->table->key_info[keyno].key_part[kp].store_length;
+
+        tab->loosescan_key_len= keylen;
+        if (pos->n_sj_tables > 1) 
+          tab[pos->n_sj_tables - 1].do_firstmatch= tab;
+        i+= pos->n_sj_tables;
+        break;
+      }
+      case SJ_OPT_DUPS_WEEDOUT:
+      {
+        /*
+          Check for join buffering. If there is one, move the first table
+          forwards, but do not destroy other duplicate elimination methods.
+        */
+        uint first_table= i;
+        uint join_cache_level= join->thd->variables.join_cache_level;
+        for (uint j= i; j < i + pos->n_sj_tables; j++)
+        {
+          /*
+            When we'll properly take join buffering into account during
+            join optimization, the below check should be changed to 
+            "if (join->best_positions[j].use_join_buffer && 
+                 j <= no_jbuf_after)".
+            For now, use a rough criteria:
+          */
+          JOIN_TAB *js_tab=join->join_tab + j; 
+          if (j != join->const_tables && js_tab->use_quick != 2 &&
+              j <= no_jbuf_after &&
+              ((js_tab->type == JT_ALL && join_cache_level != 0) ||
+               (join_cache_level > 4 && (tab->type == JT_REF || 
+                                         tab->type == JT_EQ_REF))))
+          {
+            /* Looks like we'll be using join buffer */
+            first_table= join->const_tables;
+            break;
+          }
+        }
+
+        SJ_TMP_TABLE::TAB sjtabs[MAX_TABLES];
+        SJ_TMP_TABLE::TAB *last_tab= sjtabs;
+        uint jt_rowid_offset= 0; // # tuple bytes are already occupied (w/o NULL bytes)
+        uint jt_null_bits= 0;    // # null bits in tuple bytes
+        /*
+          Walk through the range and remember
+           - tables that need their rowids to be put into temptable
+           - the last outer table
+        */
+        for (JOIN_TAB *j=join->join_tab + first_table; 
+             j < join->join_tab + i + pos->n_sj_tables; j++)
+        {
+          if (sj_table_is_included(join, j))
+          {
+            last_tab->join_tab= j;
+            last_tab->rowid_offset= jt_rowid_offset;
+            jt_rowid_offset += j->table->file->ref_length;
+            if (j->table->maybe_null)
+            {
+              last_tab->null_byte= jt_null_bits / 8;
+              last_tab->null_bit= jt_null_bits++;
+            }
+            last_tab++;
+            j->table->prepare_for_position();
+            j->keep_current_rowid= TRUE;
+          }
+        }
+
+        SJ_TMP_TABLE *sjtbl;
+        if (jt_rowid_offset) /* Temptable has at least one rowid */
+        {
+          uint tabs_size= (last_tab - sjtabs) * sizeof(SJ_TMP_TABLE::TAB);
+          if (!(sjtbl= (SJ_TMP_TABLE*)thd->alloc(sizeof(SJ_TMP_TABLE))) ||
+              !(sjtbl->tabs= (SJ_TMP_TABLE::TAB*) thd->alloc(tabs_size)))
+            DBUG_RETURN(TRUE); /* purecov: inspected */
+          memcpy(sjtbl->tabs, sjtabs, tabs_size);
+          sjtbl->is_degenerate= FALSE;
+          sjtbl->tabs_end= sjtbl->tabs + (last_tab - sjtabs);
+          sjtbl->rowid_len= jt_rowid_offset;
+          sjtbl->null_bits= jt_null_bits;
+          sjtbl->null_bytes= (jt_null_bits + 7)/8;
+          sjtbl->tmp_table= 
+            create_duplicate_weedout_tmp_table(thd, 
+                                               sjtbl->rowid_len + 
+                                               sjtbl->null_bytes,
+                                               sjtbl);
+          join->sj_tmp_tables.push_back(sjtbl->tmp_table);
+        }
+        else
+        {
+          /* 
+            This is a special case where the entire subquery predicate does 
+            not depend on anything at all, ie this is 
+              WHERE const IN (uncorrelated select)
+          */
+          if (!(sjtbl= (SJ_TMP_TABLE*)thd->alloc(sizeof(SJ_TMP_TABLE))))
+            DBUG_RETURN(TRUE); /* purecov: inspected */
+          sjtbl->tmp_table= NULL;
+          sjtbl->is_degenerate= TRUE;
+          sjtbl->have_degenerate_row= FALSE;
+        }
+        join->join_tab[first_table].flush_weedout_table= sjtbl;
+        join->join_tab[i + pos->n_sj_tables - 1].check_weed_out_table= sjtbl;
+
+        i+= pos->n_sj_tables;
+        break;
+      }
+      case SJ_OPT_FIRST_MATCH:
+      {
+        JOIN_TAB *j, *jump_to= tab-1;
+        for (j= tab; j != tab + pos->n_sj_tables; j++)
+        {
+          /*
+            NOTE: this loop probably doesn't do the right thing for the case 
+            where FirstMatch's duplicate-generating range is interleaved with
+            "unrelated" tables (as specified in WL#3750, section 2.2).
+          */
+          if (!j->emb_sj_nest)
+            jump_to= tab;
+          else
+          {
+            j->first_sj_inner_tab= tab;
+            j->last_sj_inner_tab= tab + pos->n_sj_tables - 1;
+          }
+        }
+        j[-1].do_firstmatch= jump_to;
+        i+= pos->n_sj_tables;
+        break;
+      }
+      case SJ_OPT_NONE:
+        i++;
+        break;
+    }
+  }
+  DBUG_RETURN(FALSE);
+}
+
+
+/*
+  Destroy all temporary tables created by NL-semijoin runtime
+*/
+
+void destroy_sj_tmp_tables(JOIN *join)
+{
+  List_iterator<TABLE> it(join->sj_tmp_tables);
+  TABLE *table;
+  while ((table= it++))
+  {
+    /* 
+      SJ-Materialization tables are initialized for either sequential reading 
+      or index lookup, DuplicateWeedout tables are not initialized for read 
+      (we only write to them), so need to call ha_index_or_rnd_end.
+    */
+    table->file->ha_index_or_rnd_end();
+    free_tmp_table(join->thd, table);
+  }
+  join->sj_tmp_tables.empty();
+  join->sjm_info_list.empty();
+}
+
+
+/*
+  Remove all records from all temp tables used by NL-semijoin runtime
+
+  SYNOPSIS
+    clear_sj_tmp_tables()
+      join  The join to remove tables for
+
+  DESCRIPTION
+    Remove all records from all temp tables used by NL-semijoin runtime. This 
+    must be done before every join re-execution.
+*/
+
+int clear_sj_tmp_tables(JOIN *join)
+{
+  int res;
+  List_iterator<TABLE> it(join->sj_tmp_tables);
+  TABLE *table;
+  while ((table= it++))
+  {
+    if ((res= table->file->ha_delete_all_rows()))
+      return res; /* purecov: inspected */
+  }
+
+  SJ_MATERIALIZATION_INFO *sjm;
+  List_iterator<SJ_MATERIALIZATION_INFO> it2(join->sjm_info_list);
+  while ((sjm= it2++))
+  {
+    sjm->materialized= FALSE;
+  }
+  return 0;
+}
+
+
+/*
+  Check if the table's rowid is included in the temptable
+
+  SYNOPSIS
+    sj_table_is_included()
+      join      The join
+      join_tab  The table to be checked
+
+  DESCRIPTION
+    SemiJoinDuplicateElimination: check the table's rowid should be included
+    in the temptable. This is so if
+
+    1. The table is not embedded within some semi-join nest
+    2. The has been pulled out of a semi-join nest, or
+
+    3. The table is functionally dependent on some previous table
+
+    [4. This is also true for constant tables that can't be
+        NULL-complemented but this function is not called for such tables]
+
+  RETURN
+    TRUE  - Include table's rowid
+    FALSE - Don't
+*/
+
+static bool sj_table_is_included(JOIN *join, JOIN_TAB *join_tab)
+{
+  if (join_tab->emb_sj_nest)
+    return FALSE;
+  
+  /* Check if this table is functionally dependent on the tables that
+     are within the same outer join nest
+  */
+  TABLE_LIST *embedding= join_tab->table->pos_in_table_list->embedding;
+  if (join_tab->type == JT_EQ_REF)
+  {
+    table_map depends_on= 0;
+    uint idx;
+
+    for (uint kp= 0; kp < join_tab->ref.key_parts; kp++)
+      depends_on |= join_tab->ref.items[kp]->used_tables();
+
+    Table_map_iterator it(depends_on & ~PSEUDO_TABLE_BITS);
+    while ((idx= it.next_bit())!=Table_map_iterator::BITMAP_END)
+    {
+      JOIN_TAB *ref_tab= join->map2table[idx];
+      if (embedding != ref_tab->table->pos_in_table_list->embedding)
+        return TRUE;
+    }
+    /* Ok, functionally dependent */
+    return FALSE;
+  }
+  /* Not functionally dependent => need to include*/
+  return TRUE;
+}
+
+
+/*
+  Index lookup-based subquery: save some flags for EXPLAIN output
+
+  SYNOPSIS
+    save_index_subquery_explain_info()
+      join_tab  Subquery's join tab (there is only one as index lookup is
+                only used for subqueries that are single-table SELECTs)
+      where     Subquery's WHERE clause
+
+  DESCRIPTION
+    For index lookup-based subquery (i.e. one executed with
+    subselect_uniquesubquery_engine or subselect_indexsubquery_engine),
+    check its EXPLAIN output row should contain 
+      "Using index" (TAB_INFO_FULL_SCAN_ON_NULL) 
+      "Using Where" (TAB_INFO_USING_WHERE)
+      "Full scan on NULL key" (TAB_INFO_FULL_SCAN_ON_NULL)
+    and set appropriate flags in join_tab->packed_info.
+*/
+
+static void save_index_subquery_explain_info(JOIN_TAB *join_tab, Item* where)
+{
+  join_tab->packed_info= TAB_INFO_HAVE_VALUE;
+  if (join_tab->table->covering_keys.is_set(join_tab->ref.key))
+    join_tab->packed_info |= TAB_INFO_USING_INDEX;
+  if (where)
+    join_tab->packed_info |= TAB_INFO_USING_WHERE;
+  for (uint i = 0; i < join_tab->ref.key_parts; i++)
+  {
+    if (join_tab->ref.cond_guards[i])
+    {
+      join_tab->packed_info |= TAB_INFO_FULL_SCAN_ON_NULL;
+      break;
+    }
+  }
+}
+
+
+/*
+  Check if the join can be rewritten to [unique_]indexsubquery_engine
+
+  DESCRIPTION
+    Check if the join can be changed into [unique_]indexsubquery_engine.
+
+    The check is done after join optimization, the idea is that if the join
+    has only one table and uses a [eq_]ref access generated from subselect's
+    IN-equality then we replace it with a subselect_indexsubquery_engine or a
+    subselect_uniquesubquery_engine.
+
+  RETURN 
+    0 - Ok, rewrite done (stop join optimization and return)
+    1 - Fatal error (stop join optimization and return)
+   -1 - No rewrite performed, continue with join optimization
+*/
+
+int rewrite_to_index_subquery_engine(JOIN *join)
+{
+  THD *thd= join->thd;
+  JOIN_TAB* join_tab=join->join_tab;
+  SELECT_LEX_UNIT *unit= join->unit;
+  DBUG_ENTER("rewrite_to_index_subquery_engine");
+  /*
+    is this simple IN subquery?
+  */
+  if (!join->group_list && !join->order &&
+      join->unit->item && 
+      join->unit->item->substype() == Item_subselect::IN_SUBS &&
+      join->tables == 1 && join->conds &&
+      !join->unit->is_union())
+  {
+    if (!join->having)
+    {
+      Item *where= join->conds;
+      if (join_tab[0].type == JT_EQ_REF &&
+	  join_tab[0].ref.items[0]->name == in_left_expr_name)
+      {
+        remove_subq_pushed_predicates(join, &where);
+        save_index_subquery_explain_info(join_tab, where);
+        join_tab[0].type= JT_UNIQUE_SUBQUERY;
+        join->error= 0;
+        DBUG_RETURN(unit->item->
+                    change_engine(new
+                                  subselect_uniquesubquery_engine(thd,
+                                                                  join_tab,
+                                                                  unit->item,
+                                                                  where)));
+      }
+      else if (join_tab[0].type == JT_REF &&
+	       join_tab[0].ref.items[0]->name == in_left_expr_name)
+      {
+	remove_subq_pushed_predicates(join, &where);
+        save_index_subquery_explain_info(join_tab, where);
+        join_tab[0].type= JT_INDEX_SUBQUERY;
+        join->error= 0;
+        DBUG_RETURN(unit->item->
+                    change_engine(new
+                                  subselect_indexsubquery_engine(thd,
+                                                                 join_tab,
+                                                                 unit->item,
+                                                                 where,
+                                                                 NULL,
+                                                                 0)));
+      }
+    } else if (join_tab[0].type == JT_REF_OR_NULL &&
+	       join_tab[0].ref.items[0]->name == in_left_expr_name &&
+               join->having->name == in_having_cond)
+    {
+      join_tab[0].type= JT_INDEX_SUBQUERY;
+      join->error= 0;
+      join->conds= remove_additional_cond(join->conds);
+      save_index_subquery_explain_info(join_tab, join->conds);
+      DBUG_RETURN(unit->item->
+		  change_engine(new subselect_indexsubquery_engine(thd,
+								   join_tab,
+								   unit->item,
+								   join->conds,
+                                                                   join->having,
+								   1)));
+    }
+  }
+
+  DBUG_RETURN(-1); /* Haven't done the rewrite */
+}
+
+
+/**
+  Remove additional condition inserted by IN/ALL/ANY transformation.
+
+  @param conds   condition for processing
+
+  @return
+    new conditions
+*/
+
+static Item *remove_additional_cond(Item* conds)
+{
+  if (conds->name == in_additional_cond)
+    return 0;
+  if (conds->type() == Item::COND_ITEM)
+  {
+    Item_cond *cnd= (Item_cond*) conds;
+    List_iterator<Item> li(*(cnd->argument_list()));
+    Item *item;
+    while ((item= li++))
+    {
+      if (item->name == in_additional_cond)
+      {
+	li.remove();
+	if (cnd->argument_list()->elements == 1)
+	  return cnd->argument_list()->head();
+	return conds;
+      }
+    }
+  }
+  return conds;
+}
+
+
+/*
+  Remove the predicates pushed down into the subquery
+
+  SYNOPSIS
+    remove_subq_pushed_predicates()
+      where   IN  Must be NULL
+              OUT The remaining WHERE condition, or NULL
+
+  DESCRIPTION
+    Given that this join will be executed using (unique|index)_subquery,
+    without "checking NULL", remove the predicates that were pushed down
+    into the subquery.
+
+    If the subquery compares scalar values, we can remove the condition that
+    was wrapped into trig_cond (it will be checked when needed by the subquery
+    engine)
+
+    If the subquery compares row values, we need to keep the wrapped
+    equalities in the WHERE clause: when the left (outer) tuple has both NULL
+    and non-NULL values, we'll do a full table scan and will rely on the
+    equalities corresponding to non-NULL parts of left tuple to filter out
+    non-matching records.
+
+    TODO: We can remove the equalities that will be guaranteed to be true by the
+    fact that subquery engine will be using index lookup. This must be done only
+    for cases where there are no conversion errors of significance, e.g. 257
+    that is searched in a byte. But this requires homogenization of the return 
+    codes of all Field*::store() methods.
+*/
+
+static void remove_subq_pushed_predicates(JOIN *join, Item **where)
+{
+  if (join->conds->type() == Item::FUNC_ITEM &&
+      ((Item_func *)join->conds)->functype() == Item_func::EQ_FUNC &&
+      ((Item_func *)join->conds)->arguments()[0]->type() == Item::REF_ITEM &&
+      ((Item_func *)join->conds)->arguments()[1]->type() == Item::FIELD_ITEM &&
+      test_if_ref (join->conds,
+                   (Item_field *)((Item_func *)join->conds)->arguments()[1],
+                   ((Item_func *)join->conds)->arguments()[0]))
+  {
+    *where= 0;
+    return;
+  }
+}
+
+
diff --git a/sql/opt_subselect.h b/sql/opt_subselect.h
new file mode 100644
index 00000000000..d0206b0fd05
--- /dev/null
+++ b/sql/opt_subselect.h
@@ -0,0 +1,368 @@
+/* */
+
+#ifdef USE_PRAGMA_INTERFACE
+#pragma interface			/* gcc class implementation */
+#endif
+
+int check_and_do_in_subquery_rewrites(JOIN *join);
+bool convert_join_subqueries_to_semijoins(JOIN *join);
+int pull_out_semijoin_tables(JOIN *join);
+bool optimize_semijoin_nests(JOIN *join, table_map all_table_map);
+
+// used by Loose_scan_opt
+ulonglong get_bound_sj_equalities(TABLE_LIST *sj_nest, 
+                                  table_map remaining_tables);
+
+/*
+  This is a class for considering possible loose index scan optimizations.
+  It's usage pattern is as follows:
+    best_access_path()
+    {
+       Loose_scan_opt opt;
+
+       opt.init()
+       for each index we can do ref access with
+       {
+         opt.next_ref_key();
+         for each keyuse 
+           opt.add_keyuse();
+         opt.check_ref_access();
+       }
+
+       if (some criteria for range scans)
+         opt.check_range_access();
+       
+       opt.get_best_option();
+    }
+*/
+
+class Loose_scan_opt
+{
+public:
+  /* All methods must check this before doing anything else */
+  bool try_loosescan;
+
+  /*
+    If we consider (oe1, .. oeN) IN (SELECT ie1, .. ieN) then ieK=oeK is
+    called sj-equality. If oeK depends only on preceding tables then such
+    equality is called 'bound'.
+  */
+  ulonglong bound_sj_equalities;
+ 
+  /* Accumulated properties of ref access we're now considering: */
+  ulonglong handled_sj_equalities;
+  key_part_map loose_scan_keyparts;
+  uint max_loose_keypart;
+  bool part1_conds_met;
+
+  /*
+    Use of quick select is a special case. Some of its properties:
+  */
+  uint quick_uses_applicable_index;
+  uint quick_max_loose_keypart;
+  
+  /* Best loose scan method so far */
+  uint   best_loose_scan_key;
+  double best_loose_scan_cost;
+  double best_loose_scan_records;
+  KEYUSE *best_loose_scan_start_key;
+
+  uint best_max_loose_keypart;
+
+  Loose_scan_opt():
+    try_loosescan(FALSE),
+    bound_sj_equalities(0),
+    quick_uses_applicable_index(FALSE)
+  {
+    UNINIT_VAR(quick_max_loose_keypart); /* Protected by quick_uses_applicable_index */
+    /* The following are protected by best_loose_scan_cost!= DBL_MAX */
+    UNINIT_VAR(best_loose_scan_key);
+    UNINIT_VAR(best_loose_scan_records);
+    UNINIT_VAR(best_max_loose_keypart);
+    UNINIT_VAR(best_loose_scan_start_key);
+  }
+  
+  void init(JOIN *join, JOIN_TAB *s, table_map remaining_tables)
+  {
+    /*
+      Discover the bound equalities. We need to do this if
+        1. The next table is an SJ-inner table, and
+        2. It is the first table from that semijoin, and
+        3. We're not within a semi-join range (i.e. all semi-joins either have
+           all or none of their tables in join_table_map), except
+           s->emb_sj_nest (which we've just entered, see #2).
+        4. All non-IN-equality correlation references from this sj-nest are 
+           bound
+        5. But some of the IN-equalities aren't (so this can't be handled by 
+           FirstMatch strategy)
+    */
+    best_loose_scan_cost= DBL_MAX;
+    if (!join->emb_sjm_nest && s->emb_sj_nest &&                        // (1)
+        s->emb_sj_nest->sj_in_exprs < 64 && 
+        ((remaining_tables & s->emb_sj_nest->sj_inner_tables) ==        // (2)
+         s->emb_sj_nest->sj_inner_tables) &&                            // (2)
+        join->cur_sj_inner_tables == 0 &&                                  // (3)
+        !(remaining_tables & 
+          s->emb_sj_nest->nested_join->sj_corr_tables) &&               // (4)
+        remaining_tables & s->emb_sj_nest->nested_join->sj_depends_on &&// (5)
+        optimizer_flag(join->thd, OPTIMIZER_SWITCH_LOOSE_SCAN))
+    {
+      /* This table is an LooseScan scan candidate */
+      bound_sj_equalities= get_bound_sj_equalities(s->emb_sj_nest, 
+                                                   remaining_tables);
+      try_loosescan= TRUE;
+      DBUG_PRINT("info", ("Will try LooseScan scan, bound_map=%llx",
+                          (longlong)bound_sj_equalities));
+    }
+  }
+
+  void next_ref_key()
+  {
+    handled_sj_equalities=0;
+    loose_scan_keyparts= 0;
+    max_loose_keypart= 0;
+    part1_conds_met= FALSE;
+  }
+  
+  void add_keyuse(table_map remaining_tables, KEYUSE *keyuse)
+  {
+    if (try_loosescan && keyuse->sj_pred_no != UINT_MAX)
+    {
+      if (!(remaining_tables & keyuse->used_tables))
+      {
+        /* 
+          This allows to use equality propagation to infer that some 
+          sj-equalities are bound.
+        */
+        bound_sj_equalities |= 1ULL << keyuse->sj_pred_no;
+      }
+      else
+      {
+        handled_sj_equalities |= 1ULL << keyuse->sj_pred_no;
+        loose_scan_keyparts |= ((key_part_map)1) << keyuse->keypart;
+        set_if_bigger(max_loose_keypart, keyuse->keypart);
+      }
+    }
+  }
+
+  bool have_a_case() { return test(handled_sj_equalities); }
+
+  void check_ref_access_part1(JOIN_TAB *s, uint key, KEYUSE *start_key, 
+                              table_map found_part)
+  {
+    /*
+      Check if we can use LooseScan semi-join strategy. We can if
+      1. This is the right table at right location
+      2. All IN-equalities are either
+         - "bound", ie. the outer_expr part refers to the preceding tables
+         - "handled", ie. covered by the index we're considering
+      3. Index order allows to enumerate subquery's duplicate groups in
+         order. This happens when the index definition matches this
+         pattern:
+
+           (handled_col|bound_col)* (other_col|bound_col)
+
+    */
+    if (try_loosescan &&                                       // (1)
+        (handled_sj_equalities | bound_sj_equalities) ==         // (2)
+        PREV_BITS(ulonglong, s->emb_sj_nest->sj_in_exprs) &&     // (2)
+        (PREV_BITS(key_part_map, max_loose_keypart+1) &        // (3)
+         (found_part | loose_scan_keyparts)) ==                // (3)
+         (found_part | loose_scan_keyparts) &&                 // (3)
+        !key_uses_partial_cols(s->table, key))
+    {
+      /* Ok, can use the strategy */
+      part1_conds_met= TRUE;
+      if (s->quick && s->quick->index == key && 
+          s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE)
+      {
+        quick_uses_applicable_index= TRUE;
+        quick_max_loose_keypart= max_loose_keypart;
+      }
+      DBUG_PRINT("info", ("Can use LooseScan scan"));
+
+      /* 
+        Check if this is a special case where there are no usable bound
+        IN-equalities, i.e. we have
+
+          outer_expr IN (SELECT innertbl.key FROM ...) 
+        
+        and outer_expr cannot be evaluated yet, so it's actually full
+        index scan and not a ref access
+      */
+      if (!(found_part & 1 ) && /* no usable ref access for 1st key part */
+          s->table->covering_keys.is_set(key))
+      {
+        DBUG_PRINT("info", ("Can use full index scan for LooseScan"));
+        
+        /* Calculate the cost of complete loose index scan.  */
+        double records= rows2double(s->table->file->stats.records);
+
+        /* The cost is entire index scan cost (divided by 2) */
+        double read_time= s->table->file->keyread_time(key, 1, records);
+
+        /*
+          Now find out how many different keys we will get (for now we
+          ignore the fact that we have "keypart_i=const" restriction for
+          some key components, that may make us think think that loose
+          scan will produce more distinct records than it actually will)
+        */
+        ulong rpc;
+        if ((rpc= s->table->key_info[key].rec_per_key[max_loose_keypart]))
+          records= records / rpc;
+
+        // TODO: previous version also did /2
+        if (read_time < best_loose_scan_cost)
+        {
+          best_loose_scan_key= key;
+          best_loose_scan_cost= read_time;
+          best_loose_scan_records= records;
+          best_max_loose_keypart= max_loose_keypart;
+          best_loose_scan_start_key= start_key;
+        }
+      }
+    }
+  }
+  
+  void check_ref_access_part2(uint key, KEYUSE *start_key, double records, 
+                              double read_time)
+  {
+    if (part1_conds_met && read_time < best_loose_scan_cost)
+    {
+      /* TODO use rec-per-key-based fanout calculations */
+      best_loose_scan_key= key;
+      best_loose_scan_cost= read_time;
+      best_loose_scan_records= records;
+      best_max_loose_keypart= max_loose_keypart;
+      best_loose_scan_start_key= start_key;
+    }
+  }
+
+  void check_range_access(JOIN *join, uint idx, QUICK_SELECT_I *quick)
+  {
+    /* TODO: this the right part restriction: */
+    if (quick_uses_applicable_index && idx == join->const_tables && 
+        quick->read_time < best_loose_scan_cost)
+    {
+      best_loose_scan_key= quick->index;
+      best_loose_scan_cost= quick->read_time;
+      /* this is ok because idx == join->const_tables */
+      best_loose_scan_records= rows2double(quick->records);
+      best_max_loose_keypart= quick_max_loose_keypart;
+      best_loose_scan_start_key= NULL;
+    }
+  }
+
+  void save_to_position(JOIN_TAB *tab, POSITION *pos)
+  {
+    pos->read_time=       best_loose_scan_cost;
+    if (best_loose_scan_cost != DBL_MAX)
+    {
+      pos->records_read=    best_loose_scan_records;
+      pos->key=             best_loose_scan_start_key;
+      pos->loosescan_key=   best_loose_scan_key;
+      pos->loosescan_parts= best_max_loose_keypart + 1;
+      pos->use_join_buffer= FALSE;
+      pos->table=           tab;
+      // todo need ref_depend_map ?
+      DBUG_PRINT("info", ("Produced a LooseScan plan, key %s, %s",
+                          tab->table->key_info[best_loose_scan_key].name,
+                          best_loose_scan_start_key? "(ref access)":
+                                                     "(range/index access)"));
+    }
+  }
+};
+
+
+void advance_sj_state(JOIN *join, const table_map remaining_tables, 
+                      const JOIN_TAB *new_join_tab, uint idx, 
+                      double *current_record_count, double *current_read_time,
+                      POSITION *loose_scan_pos);
+void restore_prev_sj_state(const table_map remaining_tables, 
+                                  const JOIN_TAB *tab, uint idx);
+
+void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
+bool setup_sj_materialization(JOIN_TAB *tab);
+
+TABLE *create_duplicate_weedout_tmp_table(THD *thd, uint uniq_tuple_length_arg,
+                                          SJ_TMP_TABLE *sjtbl);
+int do_sj_reset(SJ_TMP_TABLE *sj_tbl);
+int do_sj_dups_weedout(THD *thd, SJ_TMP_TABLE *sjtbl);
+
+/*
+  Temporary table used by semi-join DuplicateElimination strategy
+
+  This consists of the temptable itself and data needed to put records
+  into it. The table's DDL is as follows:
+
+    CREATE TABLE tmptable (col VARCHAR(n) BINARY, PRIMARY KEY(col));
+
+  where the primary key can be replaced with unique constraint if n exceeds
+  the limit (as it is always done for query execution-time temptables).
+
+  The record value is a concatenation of rowids of tables from the join we're
+  executing. If a join table is on the inner side of the outer join, we
+  assume that its rowid can be NULL and provide means to store this rowid in
+  the tuple.
+*/
+
+class SJ_TMP_TABLE : public Sql_alloc
+{
+public:
+  /*
+    Array of pointers to tables whose rowids compose the temporary table
+    record.
+  */
+  class TAB
+  {
+  public:
+    JOIN_TAB *join_tab;
+    uint rowid_offset;
+    ushort null_byte;
+    uchar null_bit;
+  };
+  TAB *tabs;
+  TAB *tabs_end;
+  
+  /* 
+    is_degenerate==TRUE means this is a special case where the temptable record
+    has zero length (and presence of a unique key means that the temptable can
+    have either 0 or 1 records). 
+    In this case we don't create the physical temptable but instead record
+    its state in SJ_TMP_TABLE::have_degenerate_row.
+  */
+  bool is_degenerate;
+
+  /* 
+    When is_degenerate==TRUE: the contents of the table (whether it has the
+    record or not).
+  */
+  bool have_degenerate_row;
+  
+  /* table record parameters */
+  uint null_bits;
+  uint null_bytes;
+  uint rowid_len;
+
+  /* The temporary table itself (NULL means not created yet) */
+  TABLE *tmp_table;
+  
+  /*
+    These are the members we got from temptable creation code. We'll need
+    them if we'll need to convert table from HEAP to MyISAM/Maria.
+  */
+  ENGINE_COLUMNDEF *start_recinfo;
+  ENGINE_COLUMNDEF *recinfo;
+
+  /* Pointer to next table (next->start_idx > this->end_idx) */
+  SJ_TMP_TABLE *next; 
+};
+
+int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, 
+                                    uint no_jbuf_after);
+void destroy_sj_tmp_tables(JOIN *join);
+int clear_sj_tmp_tables(JOIN *join);
+int rewrite_to_index_subquery_engine(JOIN *join);
+
+
+
diff --git a/sql/opt_sum.cc b/sql/opt_sum.cc
index e020c94a3bd..d94777787fc 100644
--- a/sql/opt_sum.cc
+++ b/sql/opt_sum.cc
@@ -110,7 +110,7 @@ static int get_index_min_value(TABLE *table, TABLE_REF *ref,
   int error;
   
   if (!ref->key_length)
-    error= table->file->index_first(table->record[0]);
+    error= table->file->ha_index_first(table->record[0]);
   else 
   {
     /*
@@ -132,10 +132,10 @@ static int get_index_min_value(TABLE *table, TABLE_REF *ref,
          Closed interval: Either The MIN argument is non-nullable, or
          we have a >= predicate for the MIN argument.
       */
-      error= table->file->index_read_map(table->record[0],
-                                         ref->key_buff,
-                                         make_prev_keypart_map(ref->key_parts),
-                                         HA_READ_KEY_OR_NEXT);
+      error= table->file->ha_index_read_map(table->record[0],
+                                            ref->key_buff,
+                                            make_prev_keypart_map(ref->key_parts),
+                                            HA_READ_KEY_OR_NEXT);
     else
     {
       /*
@@ -147,10 +147,10 @@ static int get_index_min_value(TABLE *table, TABLE_REF *ref,
         and it would not work.
       */
       DBUG_ASSERT(prefix_len < ref->key_length);
-      error= table->file->index_read_map(table->record[0],
-                                         ref->key_buff,
-                                         make_prev_keypart_map(ref->key_parts),
-                                         HA_READ_AFTER_KEY);
+      error= table->file->ha_index_read_map(table->record[0],
+                                            ref->key_buff,
+                                            make_prev_keypart_map(ref->key_parts),
+                                            HA_READ_AFTER_KEY);
       /* 
          If the found record is outside the group formed by the search
          prefix, or there is no such record at all, check if all
@@ -173,10 +173,10 @@ static int get_index_min_value(TABLE *table, TABLE_REF *ref,
            key_cmp_if_same(table, ref->key_buff, ref->key, prefix_len)))
       {
         DBUG_ASSERT(item_field->field->real_maybe_null());
-        error= table->file->index_read_map(table->record[0],
-                                           ref->key_buff,
-                                           make_prev_keypart_map(ref->key_parts),
-                                           HA_READ_KEY_EXACT);
+        error= table->file->ha_index_read_map(table->record[0],
+                                              ref->key_buff,
+                                              make_prev_keypart_map(ref->key_parts),
+                                              HA_READ_KEY_EXACT);
       }
     }
   }
@@ -199,12 +199,12 @@ static int get_index_min_value(TABLE *table, TABLE_REF *ref,
 static int get_index_max_value(TABLE *table, TABLE_REF *ref, uint range_fl)
 {
   return (ref->key_length ?
-          table->file->index_read_map(table->record[0], ref->key_buff,
-                                      make_prev_keypart_map(ref->key_parts),
-                                      range_fl & NEAR_MAX ?
-                                      HA_READ_BEFORE_KEY : 
-                                      HA_READ_PREFIX_LAST_OR_PREV) :
-          table->file->index_last(table->record[0]));
+          table->file->ha_index_read_map(table->record[0], ref->key_buff,
+                                         make_prev_keypart_map(ref->key_parts),
+                                         range_fl & NEAR_MAX ?
+                                         HA_READ_BEFORE_KEY : 
+                                         HA_READ_PREFIX_LAST_OR_PREV) :
+          table->file->ha_index_last(table->record[0]));
 }
 
 
@@ -374,18 +374,17 @@ int opt_sum_query(TABLE_LIST *tables, List<Item> &all_fields,COND *conds)
             const_result= 0;
             break;
           }
-          table->file->ha_index_init((uint) ref.key, 1);
-
-          error= is_max ? 
-                 get_index_max_value(table, &ref, range_fl) :
-                 get_index_min_value(table, &ref, item_field, range_fl,
-                                     prefix_len);
+          if (!(error= table->file->ha_index_init((uint) ref.key, 1)))
+            error= (is_max ? 
+                    get_index_max_value(table, &ref, range_fl) :
+                    get_index_min_value(table, &ref, item_field, range_fl,
+                                        prefix_len));
 
           /* Verify that the read tuple indeed matches the search key */
 	  if (!error && reckey_in_range(is_max, &ref, item_field->field, 
 			                conds, range_fl, prefix_len))
 	    error= HA_ERR_KEY_NOT_FOUND;
-          table->set_keyread(FALSE);
+          table->disable_keyread();
           table->file->ha_index_end();
           if (error)
 	  {
@@ -896,7 +895,7 @@ static bool find_key_for_maxmin(bool max_fl, TABLE_REF *ref,
             converted (for example to upper case)
           */
           if (field->part_of_key.is_set(idx))
-            table->set_keyread(TRUE);
+            table->enable_keyread();
           return 1;
         }
       }
@@ -984,15 +983,11 @@ static int maxmin_in_range(bool max_fl, Field* field, COND *cond)
       SELECT MAX(b) FROM t1 WHERE a=const AND b<const
     */
     if (max_fl != less_fl)
-      return cond->val_int() == 0;                // Return 1 if WHERE is false
+      return cond->val_int() == 0;               // Return 1 if WHERE is false
     return 0;
   }
-  case Item_func::EQ_FUNC:
-  case Item_func::EQUAL_FUNC:
-    break;
-  default:                                        // Keep compiler happy
-    DBUG_ASSERT(1);                               // Impossible
-    break;
+  default:
+    break;                                      // Ignore
   }
   return 0;
 }
diff --git a/sql/opt_table_elimination.cc b/sql/opt_table_elimination.cc
new file mode 100644
index 00000000000..1e4e87ebac7
--- /dev/null
+++ b/sql/opt_table_elimination.cc
@@ -0,0 +1,1866 @@
+/**
+  @file
+
+  @brief
+    Table Elimination Module
+
+  @defgroup Table_Elimination Table Elimination Module
+  @{
+*/
+
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation				// gcc: Class implementation
+#endif
+
+#include "mysql_priv.h"
+#include "my_bit.h"
+#include "sql_select.h"
+
+/*
+  OVERVIEW
+  ========
+
+  This file contains table elimination module. The idea behind table
+  elimination is as follows: suppose we have a left join
+ 
+    SELECT * FROM t1 LEFT JOIN 
+      (t2 JOIN t3) ON t2.primary_key=t1.col AND 
+                      t2.primary_key=t2.col
+    WHERE ...
+
+  such that
+  * columns of the inner tables are not used anywhere ouside the outer join
+    (not in WHERE, not in GROUP/ORDER BY clause, not in select list etc etc),
+  * inner side of the outer join is guaranteed to produce at most one matching
+    record combination for each record combination of outer tables.
+  
+  then the inner side of the outer join can be removed from the query, as it 
+  will always produce only one record combination (either real or 
+  null-complemented one) and we don't care about what that record combination 
+  is.
+
+
+  MODULE INTERFACE
+  ================
+
+  The module has one entry point - the eliminate_tables() function, which one
+  needs to call (once) at some point before join optimization.
+  eliminate_tables() operates over the JOIN structures. Logically, it
+  removes the inner tables of an outer join operation together with the
+  operation itself. Physically, it changes the following members:
+
+  * Eliminated tables are marked as constant and moved to the front of the
+    join order.
+
+  * In addition to this, they are recorded in JOIN::eliminated_tables bitmap.
+
+  * Items that became disused because they were in the ON expression of an 
+    eliminated outer join are notified by means of the Item tree walk which 
+    calls Item::mark_as_eliminated_processor for every item
+    - At the moment the only Item that cares whether it was eliminated is 
+      Item_subselect with its Item_subselect::eliminated flag which is used
+      by EXPLAIN code to check if the subquery should be shown in EXPLAIN.
+
+  Table elimination is redone on every PS re-execution.
+
+
+  TABLE ELIMINATION ALGORITHM FOR ONE OUTER JOIN
+  ==============================================
+
+  As described above, we can remove inner side of an outer join if it is 
+
+    1. not referred to from any other parts of the query
+    2. always produces one matching record combination.
+
+  We check #1 by doing a recursive descent down the join->join_list while
+  maintaining a union of used_tables() attribute of all Item expressions in
+  other parts of the query. When we encounter an outer join, we check if the
+  bitmap of tables on its inner side has intersection with tables that are used
+  elsewhere. No intersection means that inner side of the outer join could 
+  potentially be eliminated.
+
+  In order to check #2, one needs to prove that inner side of an outer join 
+  is functionally dependent on the outside. The proof is constructed from
+  functional dependencies of intermediate objects:
+
+  - Inner side of outer join is functionally dependent when each of its tables
+    are functionally dependent. (We assume a table is functionally dependent 
+    when its dependencies allow to uniquely identify one table record, or no
+    records).
+
+  - Table is functionally dependent when it has got a unique key whose columns
+    are functionally dependent.
+
+  - A column is functionally dependent when we could locate an AND-part of a
+    certain ON clause in form 
+      
+      tblX.columnY= expr 
+    
+    where expr is functionally depdendent. expr is functionally dependent when 
+    all columns that it refers to are functionally dependent.
+
+  These relationships are modeled as a bipartite directed graph that has
+  dependencies as edges and two kinds of nodes:
+
+  Value nodes:
+   - Table column values (each is a value of tblX.columnY)
+   - Table values (each node represents a table inside the join nest we're
+     trying to eliminate).
+  A value has one attribute, it is either bound (i.e. functionally dependent) 
+  or not.
+
+  Module nodes:
+   - Modules representing tblX.colY=expr equalities. Equality module has 
+      = incoming edges from columns used in expr 
+      = outgoing edge to tblX.colY column.
+   - Nodes representing unique keys. Unique key has
+      = incoming edges from key component value modules
+      = outgoing edge to key's table module
+   - Inner side of outer join module. Outer join module has
+      = incoming edges from table value modules
+      = No outgoing edges. Once we reach it, we know we can eliminate the 
+        outer join.
+  A module may depend on multiple values, and hence its primary attribute is
+  the number of its arguments that are not bound. 
+
+  The algorithm starts with equality nodes that don't have any incoming edges
+  (their expressions are either constant or depend only on tables that are
+  outside of the outer join in question) and performns a breadth-first
+  traversal. If we reach the outer join nest node, it means outer join is
+  functionally dependent and can be eliminated. Otherwise it cannot be
+  eliminated.
+ 
+  HANDLING MULTIPLE NESTED OUTER JOINS
+  ====================================
+
+  Outer joins that are not nested one within another are eliminated
+  independently. For nested outer joins we have the following considerations:
+  
+  1. ON expressions from children outer joins must be taken into account 
+   
+  Consider this example:
+
+    SELECT t0.* 
+    FROM 
+      t0  
+    LEFT JOIN 
+      (t1 LEFT JOIN t2 ON t2.primary_key=t1.col1)
+    ON 
+      t1.primary_key=t0.col AND t2.col1=t1.col2
+
+  Here we cannot eliminate the "... LEFT JOIN t2 ON ..." part alone because the
+  ON clause of top level outer join has references to table t2. 
+  We can eliminate the entire  "... LEFT JOIN (t1 LEFT JOIN t2) ON .." part,
+  but in order to do that, we must look at both ON expressions.
+  
+  2. ON expressions of parent outer joins are useless.
+  Consider an example:
+
+    SELECT t0.* 
+    FROM
+      t0 
+    LEFT JOIN 
+      (t1 LEFT JOIN t2 ON some_expr)
+    ON
+      t2.primary_key=t1.col  -- (*)
+  
+  Here the uppermost ON expression has a clause that gives us functional
+  dependency of table t2 on t1 and hence could be used to eliminate the
+  "... LEFT JOIN t2 ON..." part.
+  However, we would not actually encounter this situation, because before the
+  table elimination we run simplify_joins(), which, among other things, upon
+  seeing a functional dependency condition like (*) will convert the outer join
+  of
+    
+    "... LEFT JOIN t2 ON ..."
+  
+  into inner join and thus make table elimination not to consider eliminating
+  table t2.
+*/
+
+class Dep_value;
+  class Dep_value_field;
+  class Dep_value_table;
+ 
+
+class Dep_module;
+  class Dep_module_expr;
+  class Dep_module_goal;
+  class Dep_module_key;
+
+class Dep_analysis_context;
+
+
+/*
+  A value, something that can be bound or not bound. One can also iterate over 
+  unbound modules that depend on this value
+*/
+
+class Dep_value : public Sql_alloc
+{
+public:
+  Dep_value(): bound(FALSE) {}
+  virtual ~Dep_value(){} /* purecov: inspected */ /* stop compiler warnings */
+  
+  bool is_bound() { return bound; }
+  void make_bound() { bound= TRUE; }
+
+  /* Iteration over unbound modules that depend on this value */
+  typedef char *Iterator;
+  virtual Iterator init_unbound_modules_iter(char *buf)=0;
+  virtual Dep_module* get_next_unbound_module(Dep_analysis_context *dac,
+                                              Iterator iter) = 0;
+  static const size_t iterator_size;
+protected:
+  bool bound;
+};
+
+
+/*
+  A table field value. There is exactly only one such object for any tblX.fieldY
+  - the field depends on its table and equalities
+  - expressions that use the field are its dependencies
+*/
+
+class Dep_value_field : public Dep_value
+{
+public:
+  Dep_value_field(Dep_value_table *table_arg, Field *field_arg) :
+    table(table_arg), field(field_arg)
+  {}
+
+  Dep_value_table *table; /* Table this field is from */
+  Field *field; /* Field this object is representing */
+  
+  /* Iteration over unbound modules that are our dependencies */
+  Iterator init_unbound_modules_iter(char *buf);
+  Dep_module* get_next_unbound_module(Dep_analysis_context *dac, 
+                                      Iterator iter);
+  
+  void make_unbound_modules_iter_skip_keys(Iterator iter);
+  
+  static const size_t iterator_size;
+private:
+  /* 
+    Field_deps that belong to one table form a linked list, ordered by
+    field_index 
+  */
+  Dep_value_field *next_table_field;
+
+  /*
+    Offset to bits in Dep_analysis_context::expr_deps (see comment to that 
+    member for semantics of the bits).
+  */
+  uint bitmap_offset;
+
+  class Module_iter
+  {
+  public:
+    /* if not null, return this and advance */
+    Dep_module_key *key_dep;
+    /* Otherwise, this and advance */
+    uint equality_no;
+  };
+  friend class Dep_analysis_context;
+  friend class Field_dependency_recorder; 
+  friend class Dep_value_table;
+};
+
+const size_t Dep_value_field::iterator_size=
+  ALIGN_SIZE(sizeof(Dep_value_field::Module_iter));
+
+
+/*
+  A table value. There is one Dep_value_table object for every table that can
+  potentially be eliminated.
+
+  Table becomes bound as soon as some of its unique keys becomes bound
+  Once the table is bound:
+   - all of its fields are bound
+   - its embedding outer join has one less unknown argument
+*/
+
+class Dep_value_table : public Dep_value
+{
+public:
+  Dep_value_table(TABLE *table_arg) : 
+    table(table_arg), fields(NULL), keys(NULL)
+  {}
+  TABLE *table;  /* Table this object is representing */
+  /* Ordered list of fields that belong to this table */
+  Dep_value_field *fields;
+  Dep_module_key *keys; /* Ordered list of Unique keys in this table */
+
+  /* Iteration over unbound modules that are our dependencies */
+  Iterator init_unbound_modules_iter(char *buf);
+  Dep_module* get_next_unbound_module(Dep_analysis_context *dac, 
+                                      Iterator iter);
+  static const size_t iterator_size;
+private:
+  class Module_iter
+  {
+  public:
+    /* Space for field iterator */
+    char buf[Dep_value_field::iterator_size];
+    /* !NULL <=> iterating over depdenent modules of this field */
+    Dep_value_field *field_dep; 
+    bool returned_goal;
+  };
+};
+
+
+const size_t Dep_value_table::iterator_size=
+  ALIGN_SIZE(sizeof(Dep_value_table::Module_iter));
+
+const size_t Dep_value::iterator_size=
+  max(Dep_value_table::iterator_size, Dep_value_field::iterator_size);
+
+
+/*
+  A 'module'. Module has unsatisfied dependencies, number of whose is stored in
+  unbound_args. Modules also can be linked together in a list.
+*/
+
+class Dep_module : public Sql_alloc
+{
+public:
+  virtual ~Dep_module(){}  /* purecov: inspected */ /* stop compiler warnings */
+  
+  /* Mark as bound. Currently is non-virtual and does nothing */
+  void make_bound() {};
+
+  /* 
+    The final module will return TRUE here. When we see that TRUE was returned,
+    that will mean that functional dependency check succeeded.
+  */
+  virtual bool is_final () { return FALSE; }
+
+  /* 
+    Increment number of bound arguments. this is expected to change
+    is_applicable() from false to true after sufficient set of arguments is
+    bound.
+  */
+  void touch() { unbound_args--; }
+  bool is_applicable() { return !test(unbound_args); }
+  
+  /* Iteration over values that */
+  typedef char *Iterator;
+  virtual Iterator init_unbound_values_iter(char *buf)=0;
+  virtual Dep_value* get_next_unbound_value(Dep_analysis_context *dac,
+                                            Iterator iter)=0;
+  static const size_t iterator_size;
+protected:
+  uint unbound_args;
+  
+  Dep_module() : unbound_args(0) {}
+  /* to bump unbound_args when constructing depedendencies */
+  friend class Field_dependency_recorder; 
+  friend class Dep_analysis_context;
+};
+
+
+/*
+  This represents either
+   - "tbl.column= expr" equality dependency, i.e. tbl.column depends on fields
+     used in the expression, or
+   - tbl1.col1=tbl2.col2=... multi-equality.
+*/
+
+class Dep_module_expr : public Dep_module
+{
+public:
+  Dep_value_field *field;
+  Item  *expr;
+  
+  List<Dep_value_field> *mult_equal_fields;
+  /* Used during condition analysis only, similar to KEYUSE::level */
+  uint level;
+
+  Iterator init_unbound_values_iter(char *buf);
+  Dep_value* get_next_unbound_value(Dep_analysis_context *dac, Iterator iter);
+  static const size_t iterator_size;
+private:
+  class Value_iter
+  {
+  public:
+    Dep_value_field *field;
+    List_iterator<Dep_value_field> it;
+  };
+};
+
+const size_t Dep_module_expr::iterator_size=
+  ALIGN_SIZE(sizeof(Dep_module_expr::Value_iter));
+
+
+/*
+  A Unique key module
+   - Unique key has all of its components as arguments
+   - Once unique key is bound, its table value is known
+*/
+
+class Dep_module_key: public Dep_module
+{
+public:
+  Dep_module_key(Dep_value_table *table_arg, uint keyno_arg, uint n_parts_arg) :
+    table(table_arg), keyno(keyno_arg), next_table_key(NULL)
+  {
+    unbound_args= n_parts_arg;
+  }
+  Dep_value_table *table; /* Table this key is from */
+  uint keyno;  /* The index we're representing */
+  /* Unique keys form a linked list, ordered by keyno */
+  Dep_module_key *next_table_key;
+  
+  Iterator init_unbound_values_iter(char *buf);
+  Dep_value* get_next_unbound_value(Dep_analysis_context *dac, Iterator iter);
+  static const size_t iterator_size;
+private:
+  class Value_iter
+  {
+  public:
+    Dep_value_table *table;
+  };
+};
+
+const size_t Dep_module_key::iterator_size= 
+  ALIGN_SIZE(sizeof(Dep_module_key::Value_iter));
+
+const size_t Dep_module::iterator_size=
+  max(Dep_module_expr::iterator_size, Dep_module_key::iterator_size);
+
+
+/*
+  A module that represents outer join that we're trying to eliminate. If we 
+  manage to declare this module to be bound, then outer join can be eliminated.
+*/
+
+class Dep_module_goal: public Dep_module
+{
+public:
+  Dep_module_goal(uint n_children)  
+  {
+    unbound_args= n_children;
+  }
+  bool is_final() { return TRUE; }
+  /* 
+    This is the goal module, so the running wave algorithm should terminate
+    once it sees that this module is applicable and should never try to apply
+    it, hence no use for unbound value iterator implementation.
+  */
+  Iterator init_unbound_values_iter(char *buf)
+  { 
+    DBUG_ASSERT(0); 
+    return NULL;
+  }
+  Dep_value* get_next_unbound_value(Dep_analysis_context *dac, Iterator iter)
+  {
+    DBUG_ASSERT(0); 
+    return NULL;
+  }
+};
+
+
+/*
+  Functional dependency analyzer context
+*/
+class Dep_analysis_context
+{
+public:
+  bool setup_equality_modules_deps(List<Dep_module> *bound_modules);
+  bool run_wave(List<Dep_module> *new_bound_modules);
+
+  /* Tables that we're looking at eliminating */
+  table_map usable_tables;
+  
+  /* Array of equality dependencies */
+  Dep_module_expr *equality_mods;
+  uint n_equality_mods; /* Number of elements in the array */
+  uint n_equality_mods_alloced;
+
+  /* tablenr -> Dep_value_table* mapping. */
+  Dep_value_table *table_deps[MAX_KEY];
+  
+  /* Element for the outer join we're attempting to eliminate */
+  Dep_module_goal *outer_join_dep;
+
+  /* 
+    Bitmap of how expressions depend on bits. Given a Dep_value_field object,
+    one can check bitmap_is_set(expr_deps, field_val->bitmap_offset + expr_no)
+    to see if expression equality_mods[expr_no] depends on the given field.
+  */
+  MY_BITMAP expr_deps;
+  
+  Dep_value_table *create_table_value(TABLE *table);
+  Dep_value_field *get_field_value(Field *field);
+
+#ifndef DBUG_OFF
+  void dbug_print_deps();
+#endif 
+};
+
+
+void eliminate_tables(JOIN *join);
+
+static bool
+eliminate_tables_for_list(JOIN *join, 
+                          List<TABLE_LIST> *join_list,
+                          table_map tables_in_list,
+                          Item *on_expr,
+                          table_map tables_used_elsewhere);
+static
+bool check_func_dependency(JOIN *join, 
+                           table_map dep_tables,
+                           List_iterator<TABLE_LIST> *it, 
+                           TABLE_LIST *oj_tbl,
+                           Item* cond);
+static 
+void build_eq_mods_for_cond(Dep_analysis_context *dac, 
+                            Dep_module_expr **eq_mod, uint *and_level, 
+                            Item *cond);
+static 
+void check_equality(Dep_analysis_context *dac, Dep_module_expr **eq_mod, 
+                    uint and_level, Item_func *cond, Item *left, Item *right);
+static 
+Dep_module_expr *merge_eq_mods(Dep_module_expr *start, 
+                                 Dep_module_expr *new_fields, 
+                                 Dep_module_expr *end, uint and_level);
+static void mark_as_eliminated(JOIN *join, TABLE_LIST *tbl);
+static 
+void add_module_expr(Dep_analysis_context *dac, Dep_module_expr **eq_mod,
+                     uint and_level, Dep_value_field *field_val, Item *right,
+                     List<Dep_value_field>* mult_equal_fields);
+
+
+/*****************************************************************************/
+
+/*
+  Perform table elimination
+
+  SYNOPSIS
+    eliminate_tables()
+      join                   Join to work on
+
+  DESCRIPTION
+    This is the entry point for table elimination. Grep for MODULE INTERFACE
+    section in this file for calling convention.
+
+    The idea behind table elimination is that if we have an outer join:
+   
+      SELECT * FROM t1 LEFT JOIN 
+        (t2 JOIN t3) ON t2.primary_key=t1.col AND 
+                        t3.primary_key=t2.col
+    such that
+
+    1. columns of the inner tables are not used anywhere ouside the outer
+       join (not in WHERE, not in GROUP/ORDER BY clause, not in select list 
+       etc etc), and
+    2. inner side of the outer join is guaranteed to produce at most one
+       record combination for each record combination of outer tables.
+    
+    then the inner side of the outer join can be removed from the query.
+    This is because it will always produce one matching record (either a
+    real match or a NULL-complemented record combination), and since there
+    are no references to columns of the inner tables anywhere, it doesn't
+    matter which record combination it was.
+
+    This function primary handles checking #1. It collects a bitmap of
+    tables that are not used in select list/GROUP BY/ORDER BY/HAVING/etc and
+    thus can possibly be eliminated.
+
+    After this, if #1 is met, the function calls eliminate_tables_for_list()
+    that checks #2.
+  
+  SIDE EFFECTS
+    See the OVERVIEW section at the top of this file.
+
+*/
+
+void eliminate_tables(JOIN *join)
+{
+  THD* thd= join->thd;
+  Item *item;
+  table_map used_tables;
+  DBUG_ENTER("eliminate_tables");
+  
+  DBUG_ASSERT(join->eliminated_tables == 0);
+
+  /* If there are no outer joins, we have nothing to eliminate: */
+  if (!join->outer_join)
+    DBUG_VOID_RETURN;
+
+#ifndef DBUG_OFF
+  if (!optimizer_flag(thd, OPTIMIZER_SWITCH_TABLE_ELIMINATION))
+    DBUG_VOID_RETURN; /* purecov: inspected */
+#endif
+
+  /* Find the tables that are referred to from WHERE/HAVING */
+  used_tables= (join->conds?  join->conds->used_tables() : 0) | 
+               (join->having? join->having->used_tables() : 0);
+  
+  /* Add tables referred to from the select list */
+  List_iterator<Item> it(join->fields_list);
+  while ((item= it++))
+    used_tables |= item->used_tables();
+ 
+  /* Add tables referred to from ORDER BY and GROUP BY lists */
+  ORDER *all_lists[]= { join->order, join->group_list};
+  for (int i=0; i < 2; i++)
+  {
+    for (ORDER *cur_list= all_lists[i]; cur_list; cur_list= cur_list->next)
+      used_tables |= (*(cur_list->item))->used_tables();
+  }
+  
+  if (join->select_lex == &thd->lex->select_lex)
+  {
+
+    /* Multi-table UPDATE: don't eliminate tables referred from SET statement */
+    if (thd->lex->sql_command == SQLCOM_UPDATE_MULTI)
+    {
+      /* Multi-table UPDATE and DELETE: don't eliminate the tables we modify: */
+      used_tables |= thd->table_map_for_update;
+      List_iterator<Item> it2(thd->lex->value_list);
+      while ((item= it2++))
+        used_tables |= item->used_tables();
+    }
+
+    if (thd->lex->sql_command == SQLCOM_DELETE_MULTI)
+    {
+      TABLE_LIST *tbl;
+      for (tbl= (TABLE_LIST*)thd->lex->auxiliary_table_list.first;
+           tbl; tbl= tbl->next_local)
+      {
+        used_tables |= tbl->table->map;
+      }
+    }
+  }
+  
+  table_map all_tables= join->all_tables_map();
+  if (all_tables & ~used_tables)
+  {
+    /* There are some tables that we probably could eliminate. Try it. */
+    eliminate_tables_for_list(join, join->join_list, all_tables, NULL,
+                              used_tables);
+  }
+  DBUG_VOID_RETURN;
+}
+
+
+/*
+  Perform table elimination in a given join list
+
+  SYNOPSIS
+    eliminate_tables_for_list()
+      join                    The join we're working on
+      join_list               Join list to eliminate tables from (and if
+                              on_expr !=NULL, then try eliminating join_list
+                              itself)
+      list_tables             Bitmap of tables embedded in the join_list.
+      on_expr                 ON expression, if the join list is the inner side
+                              of an outer join.
+                              NULL means it's not an outer join but rather a
+                              top-level join list.
+      tables_used_elsewhere   Bitmap of tables that are referred to from
+                              somewhere outside of the join list (e.g.
+                              select list, HAVING, other ON expressions, etc).
+
+  DESCRIPTION
+    Perform table elimination in a given join list:
+    - First, walk through join list members and try doing table elimination for
+      them.
+    - Then, if the join list itself is an inner side of outer join
+      (on_expr!=NULL), then try to eliminate the entire join list.
+
+    See "HANDLING MULTIPLE NESTED OUTER JOINS" section at the top of this file
+    for more detailed description and justification.
+    
+  RETURN
+    TRUE   The entire join list eliminated
+    FALSE  Join list wasn't eliminated (but some of its child outer joins 
+           possibly were)
+*/
+
+static bool
+eliminate_tables_for_list(JOIN *join, List<TABLE_LIST> *join_list,
+                          table_map list_tables, Item *on_expr,
+                          table_map tables_used_elsewhere)
+{
+  TABLE_LIST *tbl;
+  List_iterator<TABLE_LIST> it(*join_list);
+  table_map tables_used_on_left= 0;
+  bool all_eliminated= TRUE;
+
+  while ((tbl= it++))
+  {
+    if (tbl->on_expr)
+    {
+      table_map outside_used_tables= tables_used_elsewhere | 
+                                     tables_used_on_left;
+      if (tbl->nested_join)
+      {
+        /* This is  "... LEFT JOIN (join_nest) ON cond" */
+        if (eliminate_tables_for_list(join,
+                                      &tbl->nested_join->join_list, 
+                                      tbl->nested_join->used_tables, 
+                                      tbl->on_expr,
+                                      outside_used_tables))
+        {
+          mark_as_eliminated(join, tbl);
+        }
+        else
+          all_eliminated= FALSE;
+      }
+      else
+      {
+        /* This is  "... LEFT JOIN tbl ON cond" */
+        if (!(tbl->table->map & outside_used_tables) &&
+            check_func_dependency(join, tbl->table->map, NULL, tbl, 
+                                  tbl->on_expr))
+        {
+          mark_as_eliminated(join, tbl);
+        }
+        else
+          all_eliminated= FALSE;
+      }
+      tables_used_on_left |= tbl->on_expr->used_tables();
+    }
+    else
+    {
+      DBUG_ASSERT(!tbl->nested_join || tbl->sj_on_expr);
+      //psergey-todo: is the following really correct or we'll need to descend
+      //down all ON clauses: ? 
+      if (tbl->sj_on_expr)
+        tables_used_on_left |= tbl->sj_on_expr->used_tables();
+    }
+  }
+
+  /* Try eliminating the nest we're called for */
+  if (all_eliminated && on_expr && !(list_tables & tables_used_elsewhere))
+  {
+    it.rewind();
+    return check_func_dependency(join, list_tables & ~join->eliminated_tables,
+                                 &it, NULL, on_expr);
+  }
+  return FALSE; /* not eliminated */
+}
+
+
+/*
+  Check if given condition makes given set of tables functionally dependent
+
+  SYNOPSIS
+    check_func_dependency()
+      join         Join we're procesing
+      dep_tables   Tables that we check to be functionally dependent (on
+                   everything else)
+      it           Iterator that enumerates these tables, or NULL if we're 
+                   checking one single table and it is specified in oj_tbl
+                   parameter.
+      oj_tbl       NULL, or one single table that we're checking
+      cond         Condition to use to prove functional dependency
+
+  DESCRIPTION
+    Check if we can use given condition to infer that the set of given tables
+    is functionally dependent on everything else.
+
+  RETURN 
+    TRUE  - Yes, functionally dependent
+    FALSE - No, or error
+*/
+
+static
+bool check_func_dependency(JOIN *join,
+                           table_map dep_tables,
+                           List_iterator<TABLE_LIST> *it, 
+                           TABLE_LIST *oj_tbl,
+                           Item* cond)
+{
+  Dep_analysis_context dac;
+  
+  /* 
+    Pre-alloc some Dep_module_expr structures. We don't need this to be
+    guaranteed upper bound.
+  */
+  dac.n_equality_mods_alloced= 
+    join->thd->lex->current_select->max_equal_elems +
+    (join->thd->lex->current_select->cond_count+1)*2 +
+    join->thd->lex->current_select->between_count;
+
+  bzero(dac.table_deps, sizeof(dac.table_deps));
+  if (!(dac.equality_mods= new Dep_module_expr[dac.n_equality_mods_alloced]))
+    return FALSE; /* purecov: inspected */
+
+  Dep_module_expr* last_eq_mod= dac.equality_mods;
+  
+  /* Create Dep_value_table objects for all tables we're trying to eliminate */
+  if (oj_tbl)
+  {
+    if (!dac.create_table_value(oj_tbl->table))
+      return FALSE; /* purecov: inspected */
+  }
+  else
+  {
+    TABLE_LIST *tbl; 
+    while ((tbl= (*it)++))
+    {
+      if (tbl->table && (tbl->table->map & dep_tables))
+      {
+        if (!dac.create_table_value(tbl->table))
+          return FALSE; /* purecov: inspected */
+      }
+    }
+  }
+  dac.usable_tables= dep_tables;
+
+  /*
+    Analyze the the ON expression and create Dep_module_expr objects and
+      Dep_value_field objects for the used fields.
+  */
+  uint and_level=0;
+  build_eq_mods_for_cond(&dac, &last_eq_mod, &and_level, cond);
+  if (!(dac.n_equality_mods= last_eq_mod - dac.equality_mods))
+    return FALSE;  /* No useful conditions */
+
+  List<Dep_module> bound_modules;
+
+  if (!(dac.outer_join_dep= new Dep_module_goal(my_count_bits(dep_tables))) ||
+      dac.setup_equality_modules_deps(&bound_modules))
+  {
+    return FALSE; /* OOM, default to non-dependent */ /* purecov: inspected */
+  }
+  
+  DBUG_EXECUTE("test", dac.dbug_print_deps(); );
+  
+  return dac.run_wave(&bound_modules);
+}
+
+
+/*
+  Running wave functional dependency check algorithm
+
+  SYNOPSIS
+   Dep_analysis_context::run_wave()
+     new_bound_modules  List of bound modules to start the running wave from. 
+                        The list is destroyed during execution
+  
+  DESCRIPTION
+    This function uses running wave algorithm to check if the join nest is
+    functionally-dependent. 
+    We start from provided list of bound modules, and then run the wave across 
+    dependency edges, trying the reach the Dep_module_goal module. If we manage
+    to reach it, then the join nest is functionally-dependent, otherwise it is
+    not.
+
+  RETURN 
+    TRUE   Yes, functionally dependent
+    FALSE  No.
+*/
+
+bool Dep_analysis_context::run_wave(List<Dep_module> *new_bound_modules)
+{
+  List<Dep_value> new_bound_values;
+  
+  Dep_value *value;
+  Dep_module *module;
+
+  while (!new_bound_modules->is_empty())
+  {
+    /*
+      The "wave" is in new_bound_modules list. Iterate over values that can be
+      reached from these modules but are not yet bound, and collect the next
+      wave generation in new_bound_values list.
+    */
+    List_iterator<Dep_module> modules_it(*new_bound_modules);
+    while ((module= modules_it++))
+    {
+      char iter_buf[Dep_module::iterator_size + ALIGN_MAX_UNIT];
+      Dep_module::Iterator iter;
+      iter= module->init_unbound_values_iter(iter_buf);
+      while ((value= module->get_next_unbound_value(this, iter)))
+      {
+        value->make_bound();
+        new_bound_values.push_back(value);
+      }
+    }
+    new_bound_modules->empty();
+    
+    /*
+      Now walk over list of values we've just found to be bound and check which
+      unbound modules can be reached from them. If there are some modules that
+      became bound, collect them in new_bound_modules list.
+    */
+    List_iterator<Dep_value> value_it(new_bound_values);
+    while ((value= value_it++))
+    {
+      char iter_buf[Dep_value::iterator_size + ALIGN_MAX_UNIT];
+      Dep_value::Iterator iter;
+      iter= value->init_unbound_modules_iter(iter_buf);
+      while ((module= value->get_next_unbound_module(this, iter)))
+      {
+        module->touch();
+        if (!module->is_applicable())
+          continue;
+        if (module->is_final())
+          return TRUE; /* Functionally dependent */
+        module->make_bound();
+        new_bound_modules->push_back(module);
+      }
+    }
+    new_bound_values.empty();
+  }
+  return FALSE;
+}
+
+
+/*
+  This is used to analyze expressions in "tbl.col=expr" dependencies so
+  that we can figure out which fields the expression depends on.
+*/
+
+class Field_dependency_recorder : public Field_enumerator
+{
+public:
+  Field_dependency_recorder(Dep_analysis_context *ctx_arg): ctx(ctx_arg)
+  {}
+  
+  void visit_field(Item_field *item)
+  {
+    Field *field= item->field;
+    Dep_value_table *tbl_dep;
+    if ((tbl_dep= ctx->table_deps[field->table->tablenr]))
+    {
+      for (Dep_value_field *field_dep= tbl_dep->fields; field_dep; 
+           field_dep= field_dep->next_table_field)
+      {
+        if (field->field_index == field_dep->field->field_index)
+        {
+          uint offs= field_dep->bitmap_offset + expr_offset;
+          if (!bitmap_is_set(&ctx->expr_deps, offs))
+            ctx->equality_mods[expr_offset].unbound_args++;
+          bitmap_set_bit(&ctx->expr_deps, offs);
+          return;
+        }
+      }
+      /* 
+        We got here if didn't find this field. It's not a part of 
+        a unique key, and/or there is no field=expr element for it.
+        Bump the dependency anyway, this will signal that this dependency
+        cannot be satisfied.
+      */
+      ctx->equality_mods[expr_offset].unbound_args++;
+    }
+    else
+      visited_other_tables= TRUE;
+  }
+
+  Dep_analysis_context *ctx;
+  /* Offset of the expression we're processing in the dependency bitmap */
+  uint expr_offset;
+
+  bool visited_other_tables;
+};
+
+
+
+
+/*
+  Setup inbound dependency relationships for tbl.col=expr equalities
+ 
+  SYNOPSIS
+    setup_equality_modules_deps()
+      bound_deps_list  Put here modules that were found not to depend on 
+                       any non-bound columns.
+
+  DESCRIPTION
+    Setup inbound dependency relationships for tbl.col=expr equalities:
+      - allocate a bitmap where we store such dependencies
+      - for each "tbl.col=expr" equality, analyze the expr part and find out
+        which fields it refers to and set appropriate dependencies.
+    
+  RETURN
+    FALSE  OK
+    TRUE   Out of memory
+*/
+
+bool Dep_analysis_context::setup_equality_modules_deps(List<Dep_module> 
+                                                       *bound_modules)
+{
+  DBUG_ENTER("setup_equality_modules_deps");
+ 
+  /*
+    Count Dep_value_field objects and assign each of them a unique 
+    bitmap_offset value.
+  */
+  uint offset= 0;
+  for (Dep_value_table **tbl_dep= table_deps; 
+       tbl_dep < table_deps + MAX_TABLES;
+       tbl_dep++)
+  {
+    if (*tbl_dep)
+    {
+      for (Dep_value_field *field_dep= (*tbl_dep)->fields;
+           field_dep;
+           field_dep= field_dep->next_table_field)
+      {
+        field_dep->bitmap_offset= offset;
+        offset += n_equality_mods;
+      }
+    }
+  }
+ 
+  void *buf;
+  if (!(buf= current_thd->alloc(bitmap_buffer_size(offset))) ||
+      bitmap_init(&expr_deps, (my_bitmap_map*)buf, offset, FALSE))
+  {
+    DBUG_RETURN(TRUE); /* purecov: inspected */
+  }
+  bitmap_clear_all(&expr_deps);
+
+  /* 
+    Analyze all "field=expr" dependencies, and have expr_deps encode
+    dependencies of expressions from fields.
+
+    Also collect a linked list of equalities that are bound.
+  */
+  Field_dependency_recorder deps_recorder(this);
+  for (Dep_module_expr *eq_mod= equality_mods; 
+       eq_mod < equality_mods + n_equality_mods;
+       eq_mod++)
+  {
+    deps_recorder.expr_offset= eq_mod - equality_mods;
+    deps_recorder.visited_other_tables= FALSE;
+    eq_mod->unbound_args= 0;
+    
+    if (eq_mod->field)
+    {
+      /* Regular tbl.col=expr(tblX1.col1, tblY1.col2, ...) */
+      eq_mod->expr->walk(&Item::enumerate_field_refs_processor, FALSE, 
+                               (uchar*)&deps_recorder);
+    }
+    else 
+    {
+      /* It's a multi-equality */
+      eq_mod->unbound_args= !test(eq_mod->expr);
+      List_iterator<Dep_value_field> it(*eq_mod->mult_equal_fields);
+      Dep_value_field* field_val;
+      while ((field_val= it++))
+      {
+        uint offs= field_val->bitmap_offset + eq_mod - equality_mods;
+        bitmap_set_bit(&expr_deps, offs);
+      }
+    }
+
+    if (!eq_mod->unbound_args)
+      bound_modules->push_back(eq_mod);
+  }
+
+  DBUG_RETURN(FALSE);
+}
+
+
+/*
+  Ordering that we're using whenever we need to maintain a no-duplicates list
+  of field value objects.
+*/
+
+static 
+int compare_field_values(Dep_value_field *a, Dep_value_field *b, void *unused)
+{
+  uint a_ratio= a->field->table->tablenr*MAX_FIELDS +
+                a->field->field_index;
+
+  uint b_ratio= b->field->table->tablenr*MAX_FIELDS +
+                b->field->field_index;
+  return (a_ratio < b_ratio)? -1 : ((a_ratio == b_ratio)? 0 : 1);
+}
+
+
+/*
+  Produce Dep_module_expr elements for given condition.
+
+  SYNOPSIS
+    build_eq_mods_for_cond()
+      ctx              Table elimination context
+      eq_mod    INOUT  Put produced equality conditions here
+      and_level INOUT  AND-level (like in add_key_fields)
+      cond             Condition to process
+
+  DESCRIPTION
+    Analyze the given condition and produce an array of Dep_module_expr 
+    dependencies from it. The idea of analysis is as follows:
+    There are useful equalities that have form 
+        
+        eliminable_tbl.field = expr      (denote as useful_equality)
+
+    The condition is composed of useful equalities and other conditions that
+    are combined together with AND and OR operators. We process the condition
+    in recursive fashion according to these basic rules:
+
+      useful_equality1 AND useful_equality2 -> make array of two 
+                                               Dep_module_expr objects
+
+      useful_equality AND other_cond -> discard other_cond
+      
+      useful_equality OR other_cond -> discard everything
+      
+      useful_equality1 OR useful_equality2 -> check if both sides of OR are the
+                                              same equality. If yes, that's the
+                                              result, otherwise discard 
+                                              everything.
+
+    The rules are used to map the condition into an array Dep_module_expr
+    elements. The array will specify functional dependencies that logically 
+    follow from the condition.
+
+  SEE ALSO
+    This function is modeled after add_key_fields()
+*/
+
+static 
+void build_eq_mods_for_cond(Dep_analysis_context *ctx, 
+                            Dep_module_expr **eq_mod,
+                            uint *and_level, Item *cond)
+{
+  if (cond->type() == Item_func::COND_ITEM)
+  {
+    List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
+    uint orig_offset= *eq_mod - ctx->equality_mods;
+    
+    /* AND/OR */
+    if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
+    {
+      Item *item;
+      while ((item=li++))
+        build_eq_mods_for_cond(ctx, eq_mod, and_level, item);
+
+      for (Dep_module_expr *mod_exp= ctx->equality_mods + orig_offset;
+           mod_exp != *eq_mod ; mod_exp++)
+      {
+        mod_exp->level= *and_level;
+      }
+    }
+    else
+    {
+      Item *item;
+      (*and_level)++;
+      build_eq_mods_for_cond(ctx, eq_mod, and_level, li++);
+      while ((item=li++))
+      {
+        Dep_module_expr *start_key_fields= *eq_mod;
+        (*and_level)++;
+        build_eq_mods_for_cond(ctx, eq_mod, and_level, item);
+        *eq_mod= merge_eq_mods(ctx->equality_mods + orig_offset, 
+                               start_key_fields, *eq_mod,
+                               ++(*and_level));
+      }
+    }
+    return;
+  }
+
+  if (cond->type() != Item::FUNC_ITEM)
+    return;
+
+  Item_func *cond_func= (Item_func*) cond;
+  Item **args= cond_func->arguments();
+
+  switch (cond_func->functype()) {
+  case Item_func::BETWEEN:
+  {
+    Item *fld;
+    if (!((Item_func_between*)cond)->negated &&
+        (fld= args[0]->real_item())->type() == Item::FIELD_ITEM &&
+        args[1]->eq(args[2], ((Item_field*)fld)->field->binary()))
+    {
+      check_equality(ctx, eq_mod, *and_level, cond_func, args[0], args[1]);
+      check_equality(ctx, eq_mod, *and_level, cond_func, args[1], args[0]);
+    }
+    break;
+  }
+  case Item_func::EQ_FUNC:
+  case Item_func::EQUAL_FUNC:
+  {
+    check_equality(ctx, eq_mod, *and_level, cond_func, args[0], args[1]);
+    check_equality(ctx, eq_mod, *and_level, cond_func, args[1], args[0]);
+    break;
+  }
+  case Item_func::ISNULL_FUNC:
+  {
+    Item *tmp=new Item_null;
+    if (tmp)
+      check_equality(ctx, eq_mod, *and_level, cond_func, args[0], tmp);
+    break;
+  }
+  case Item_func::MULT_EQUAL_FUNC:
+  {
+    /*
+      The condition is a 
+
+        tbl1.field1 = tbl2.field2 = tbl3.field3 [= const_expr]
+
+      multiple-equality. Do two things:
+       - Collect List<Dep_value_field> of tblX.colY where tblX is one of the
+         tables we're trying to eliminate.
+       - rembember if there was a bound value, either const_expr or tblY.colZ
+         swher tblY is not a table that we're trying to eliminate.
+      Store all collected information in a Dep_module_expr object.
+    */
+    Item_equal *item_equal= (Item_equal*)cond;
+    List<Dep_value_field> *fvl;
+    if (!(fvl= new List<Dep_value_field>))
+      break; /* purecov: inspected */
+
+    Item_equal_iterator it(*item_equal);
+    Item_field *item;
+    Item *bound_item= item_equal->get_const();
+    while ((item= it++))
+    {
+      if ((item->used_tables() & ctx->usable_tables))
+      {
+        Dep_value_field *field_val;
+        if ((field_val= ctx->get_field_value(item->field)))
+          fvl->push_back(field_val);
+      }
+      else
+      {
+        if (!bound_item)
+          bound_item= item;
+      }
+    }
+    /* 
+      Multiple equality is only useful if it includes at least one field from
+      the table that we could potentially eliminate:
+    */
+    if (fvl->elements)
+    {
+      
+      exchange_sort<Dep_value_field>(fvl, compare_field_values, NULL);
+      add_module_expr(ctx, eq_mod, *and_level, NULL, bound_item, fvl);
+    }
+    break;
+  }
+  default:
+    break;
+  }
+}
+
+
+/*
+  Perform an OR operation on two (adjacent) Dep_module_expr arrays.
+
+  SYNOPSIS
+     merge_eq_mods()
+       start        Start of left OR-part
+       new_fields   Start of right OR-part
+       end          End of right OR-part
+       and_level    AND-level (like in add_key_fields)
+
+  DESCRIPTION
+  This function is invoked for two adjacent arrays of Dep_module_expr elements:
+
+                      $LEFT_PART             $RIGHT_PART
+             +-----------------------+-----------------------+
+            start                new_fields                 end
+         
+  The goal is to produce an array which would correspond to the combined 
+  
+    $LEFT_PART OR $RIGHT_PART
+  
+  condition. This is achieved as follows: First, we apply distrubutive law:
+  
+    (fdep_A_1 AND fdep_A_2 AND ...)  OR  (fdep_B_1 AND fdep_B_2 AND ...) =
+
+     = AND_ij (fdep_A_[i] OR fdep_B_[j])
+  
+  Then we walk over the obtained "fdep_A_[i] OR fdep_B_[j]" pairs, and 
+   - Discard those that that have left and right part referring to different
+     columns. We can't infer anything useful from "col1=expr1 OR col2=expr2".
+   - When left and right parts refer to the same column,  we check if they are 
+     essentially the same. 
+     = If they are the same, we keep one copy 
+       "t.col=expr OR t.col=expr"  -> "t.col=expr 
+     = if they are different , then we discard both
+      "t.col=expr1 OR t.col=expr2" -> (nothing useful)
+
+  (no per-table or for-index FUNC_DEPS exist yet at this phase).
+
+  See also merge_key_fields().
+
+  RETURN 
+    End of the result array
+*/
+
+static 
+Dep_module_expr *merge_eq_mods(Dep_module_expr *start, 
+                               Dep_module_expr *new_fields,
+                               Dep_module_expr *end, uint and_level)
+{
+  if (start == new_fields)
+    return start;  /*  (nothing) OR (...) -> (nothing) */
+  if (new_fields == end)
+    return start;  /*  (...) OR (nothing) -> (nothing) */
+
+  Dep_module_expr *first_free= new_fields;
+
+  for (; new_fields != end ; new_fields++)
+  {
+    for (Dep_module_expr *old=start ; old != first_free ; old++)
+    {
+      if (old->field == new_fields->field)
+      {
+        if (!old->field)
+        {
+          /*
+            OR-ing two multiple equalities. We must compute an intersection of
+            used fields, and check the constants according to these rules:
+
+              a=b=c=d  OR a=c=e=f   ->  a=c  (compute intersection)
+              a=const1 OR a=b       ->  (nothing)
+              a=const1 OR a=const1  ->  a=const1 
+              a=const1 OR a=const2  ->  (nothing)
+            
+            If we're performing an OR operation over multiple equalities, e.g.
+
+              (a=b=c AND p=q) OR (a=b AND v=z)
+            
+            then we'll need to try combining each equality with each. ANDed
+            equalities are guaranteed to be disjoint, so we'll only get one
+            hit.
+          */
+          Field *eq_field= old->mult_equal_fields->head()->field;
+          if (old->expr && new_fields->expr &&
+              old->expr->eq_by_collation(new_fields->expr, eq_field->binary(),
+                                         eq_field->charset()))
+          {
+            /* Ok, keep */
+          }
+          else
+          {
+            /* no single constant/bound item. */
+            old->expr= NULL;
+          }
+           
+          List <Dep_value_field> *fv;
+          if (!(fv= new List<Dep_value_field>))
+            break; /* purecov: inspected */
+
+          List_iterator<Dep_value_field> it1(*old->mult_equal_fields);
+          List_iterator<Dep_value_field> it2(*new_fields->mult_equal_fields);
+          Dep_value_field *lfield= it1++;
+          Dep_value_field *rfield= it2++;
+          /* Intersect two ordered lists */
+          while (lfield && rfield)
+          {
+            if (lfield == rfield)
+            {
+              fv->push_back(lfield);
+              lfield=it1++;
+              rfield=it2++;
+            }
+            else
+            {
+              if (compare_field_values(lfield, rfield, NULL) < 0)
+                lfield= it1++;
+              else
+                rfield= it2++;
+            }
+          }
+
+          if (fv->elements + test(old->expr) > 1)
+          {
+            old->mult_equal_fields= fv;
+            old->level= and_level;
+          }
+        }
+        else if (!new_fields->expr->const_item())
+        {
+          /*
+            If the value matches, we can use the key reference.
+            If not, we keep it until we have examined all new values
+          */
+          if (old->expr->eq(new_fields->expr, 
+                            old->field->field->binary()))
+          {
+            old->level= and_level;
+          }
+        }
+        else if (old->expr->eq_by_collation(new_fields->expr,
+                                            old->field->field->binary(),
+                                            old->field->field->charset()))
+        {
+          old->level= and_level;
+        }
+        else
+        {
+          /* The expressions are different. */
+          if (old == --first_free)                // If last item
+            break;
+          *old= *first_free;                        // Remove old value
+          old--;                                // Retry this value
+        }
+      }
+    }
+  }
+
+  /* 
+    Ok, the results are within the [start, first_free) range, and the useful
+    elements have level==and_level. Now, remove all unusable elements:
+  */
+  for (Dep_module_expr *old=start ; old != first_free ;)
+  {
+    if (old->level != and_level)
+    {                                                // Not used in all levels
+      if (old == --first_free)
+        break;
+      *old= *first_free;                        // Remove old value
+      continue;
+    }
+    old++;
+  }
+  return first_free;
+}
+
+
+/*
+  Add an Dep_module_expr element for left=right condition
+
+  SYNOPSIS
+    check_equality()
+      fda               Table elimination context
+      eq_mod     INOUT  Store created Dep_module_expr here and increment ptr if
+                        you do so
+      and_level         AND-level (like in add_key_fields)
+      cond              Condition we've inferred the left=right equality from.
+      left              Left expression
+      right             Right expression
+      usable_tables     Create Dep_module_expr only if Left_expression's table 
+                        belongs to this set.
+
+  DESCRIPTION 
+    Check if the passed left=right equality is such that 
+     - 'left' is an Item_field referring to a field in a table we're checking
+       to be functionally depdendent,
+     - the equality allows to conclude that 'left' expression is functionally 
+       dependent on the 'right',
+    and if so, create an Dep_module_expr object.
+*/
+
+static 
+void check_equality(Dep_analysis_context *ctx, Dep_module_expr **eq_mod,
+                    uint and_level, Item_func *cond, Item *left, Item *right)
+{
+  if ((left->used_tables() & ctx->usable_tables) &&
+      !(right->used_tables() & RAND_TABLE_BIT) &&
+      left->real_item()->type() == Item::FIELD_ITEM)
+  {
+    Field *field= ((Item_field*)left->real_item())->field;
+    if (field->result_type() == STRING_RESULT)
+    {
+      if (right->result_type() != STRING_RESULT)
+      {
+        if (field->cmp_type() != right->result_type())
+          return;
+      }
+      else
+      {
+        /*
+          We can't assume there's a functional dependency if the effective
+          collation of the operation differ from the field collation.
+        */
+        if (field->cmp_type() == STRING_RESULT &&
+            ((Field_str*)field)->charset() != cond->compare_collation())
+          return;
+      }
+    }
+    Dep_value_field *field_val;
+    if ((field_val= ctx->get_field_value(field)))
+      add_module_expr(ctx, eq_mod, and_level, field_val, right, NULL);
+  }
+}
+
+
+/* 
+  Add a Dep_module_expr object with the specified parameters. 
+  
+  DESCRIPTION
+    Add a Dep_module_expr object with the specified parameters. Re-allocate
+    the ctx->equality_mods array if it has no space left.
+*/
+
+static 
+void add_module_expr(Dep_analysis_context *ctx, Dep_module_expr **eq_mod,
+                     uint and_level, Dep_value_field *field_val, 
+                     Item *right, List<Dep_value_field>* mult_equal_fields)
+{
+  if (*eq_mod == ctx->equality_mods + ctx->n_equality_mods_alloced)
+  {
+    /* 
+      We've filled the entire equality_mods array. Replace it with a bigger
+      one. We do it somewhat inefficiently but it doesn't matter.
+    */
+    /* purecov: begin inspected */
+    Dep_module_expr *new_arr;
+    if (!(new_arr= new Dep_module_expr[ctx->n_equality_mods_alloced *2]))
+      return;
+    ctx->n_equality_mods_alloced *= 2;
+    for (int i= 0; i < *eq_mod - ctx->equality_mods; i++)
+      new_arr[i]= ctx->equality_mods[i];
+
+    ctx->equality_mods= new_arr;
+    *eq_mod= new_arr + (*eq_mod - ctx->equality_mods);
+    /* purecov: end */
+  }
+
+  (*eq_mod)->field= field_val;
+  (*eq_mod)->expr= right;
+  (*eq_mod)->level= and_level;
+  (*eq_mod)->mult_equal_fields= mult_equal_fields;
+  (*eq_mod)++;
+}
+
+
+/*
+  Create a Dep_value_table object for the given table
+
+  SYNOPSIS
+    Dep_analysis_context::create_table_value()
+      table  Table to create object for
+
+  DESCRIPTION
+    Create a Dep_value_table object for the given table. Also create
+    Dep_module_key objects for all unique keys in the table.
+
+  RETURN
+    Created table value object
+    NULL if out of memory
+*/
+
+Dep_value_table *Dep_analysis_context::create_table_value(TABLE *table)
+{
+  Dep_value_table *tbl_dep;
+  if (!(tbl_dep= new Dep_value_table(table)))
+    return NULL; /* purecov: inspected */
+
+  Dep_module_key **key_list= &(tbl_dep->keys);
+  /* Add dependencies for unique keys */
+  for (uint i=0; i < table->s->keys; i++)
+  {
+    KEY *key= table->key_info + i; 
+    if (key->flags & HA_NOSAME)
+    {
+      Dep_module_key *key_dep;
+      if (!(key_dep= new Dep_module_key(tbl_dep, i, key->key_parts)))
+        return NULL;
+      *key_list= key_dep;
+      key_list= &(key_dep->next_table_key);
+    }
+  }
+  return table_deps[table->tablenr]= tbl_dep;
+}
+
+
+/* 
+  Get a Dep_value_field object for the given field, creating it if necessary
+
+  SYNOPSIS
+   Dep_analysis_context::get_field_value()
+      field  Field to create object for
+        
+  DESCRIPTION
+    Get a Dep_value_field object for the given field. First, we search for it 
+    in the list of Dep_value_field objects we have already created. If we don't 
+    find it, we create a new Dep_value_field and put it into the list of field
+    objects we have for the table.
+
+  RETURN
+    Created field value object
+    NULL if out of memory
+*/
+
+Dep_value_field *Dep_analysis_context::get_field_value(Field *field)
+{
+  TABLE *table= field->table;
+  Dep_value_table *tbl_dep= table_deps[table->tablenr];
+
+  /* Try finding the field in field list */
+  Dep_value_field **pfield= &(tbl_dep->fields);
+  while (*pfield && (*pfield)->field->field_index < field->field_index)
+  {
+    pfield= &((*pfield)->next_table_field);
+  }
+  if (*pfield && (*pfield)->field->field_index == field->field_index)
+    return *pfield;
+  
+  /* Create the field and insert it in the list */
+  Dep_value_field *new_field= new Dep_value_field(tbl_dep, field);
+  new_field->next_table_field= *pfield;
+  *pfield= new_field;
+
+  return new_field;
+}
+
+
+/* 
+  Iteration over unbound modules that are our dependencies.
+  for those we have:
+    - dependendencies of our fields
+    - outer join we're in 
+*/
+char *Dep_value_table::init_unbound_modules_iter(char *buf)
+{
+  Module_iter *iter= ALIGN_PTR(my_ptrdiff_t(buf), Module_iter);
+  iter->field_dep= fields;
+  if (fields)
+  {
+    fields->init_unbound_modules_iter(iter->buf);
+    fields->make_unbound_modules_iter_skip_keys(iter->buf);
+  }
+  iter->returned_goal= FALSE;
+  return (char*)iter;
+}
+
+
+Dep_module* 
+Dep_value_table::get_next_unbound_module(Dep_analysis_context *dac,
+                                         char *iter)
+{
+  Module_iter *di= (Module_iter*)iter;
+  while (di->field_dep)
+  {
+    Dep_module *res;
+    if ((res= di->field_dep->get_next_unbound_module(dac, di->buf)))
+      return res;
+    if ((di->field_dep= di->field_dep->next_table_field))
+    {
+      char *field_iter= ((Module_iter*)iter)->buf;
+      di->field_dep->init_unbound_modules_iter(field_iter);
+      di->field_dep->make_unbound_modules_iter_skip_keys(field_iter);
+    }
+  }
+  
+  if (!di->returned_goal)
+  {
+    di->returned_goal= TRUE;
+    return dac->outer_join_dep;
+  }
+  return NULL;
+}
+
+
+char *Dep_module_expr::init_unbound_values_iter(char *buf)
+{
+  Value_iter *iter= ALIGN_PTR(my_ptrdiff_t(buf), Value_iter);
+  iter->field= field;
+  if (!field)
+  {
+    new (&iter->it) List_iterator<Dep_value_field>(*mult_equal_fields);
+  }
+  return (char*)iter;
+}
+
+
+Dep_value* Dep_module_expr::get_next_unbound_value(Dep_analysis_context *dac,
+                                                   char *buf)
+{
+  Dep_value *res;
+  if (field)
+  {
+    res= ((Value_iter*)buf)->field;
+    ((Value_iter*)buf)->field= NULL;
+    return (!res || res->is_bound())? NULL : res;
+  }
+  else
+  {
+    while ((res= ((Value_iter*)buf)->it++))
+    {
+      if (!res->is_bound())
+        return res;
+    }
+    return NULL;
+  }
+}
+
+
+char *Dep_module_key::init_unbound_values_iter(char *buf)
+{
+  Value_iter *iter= ALIGN_PTR(my_ptrdiff_t(buf), Value_iter);
+  iter->table= table;
+  return (char*)iter;
+}
+
+
+Dep_value* Dep_module_key::get_next_unbound_value(Dep_analysis_context *dac,
+                                                  Dep_module::Iterator iter)
+{
+  Dep_value* res= ((Value_iter*)iter)->table;
+  ((Value_iter*)iter)->table= NULL;
+  return res;
+}
+
+
+Dep_value::Iterator Dep_value_field::init_unbound_modules_iter(char *buf)
+{
+  Module_iter *iter= ALIGN_PTR(my_ptrdiff_t(buf), Module_iter);
+  iter->key_dep= table->keys;
+  iter->equality_no= 0;
+  return (char*)iter;
+}
+
+
+void 
+Dep_value_field::make_unbound_modules_iter_skip_keys(Dep_value::Iterator iter)
+{
+  ((Module_iter*)iter)->key_dep= NULL;
+}
+
+
+Dep_module* Dep_value_field::get_next_unbound_module(Dep_analysis_context *dac,
+                                                     Dep_value::Iterator iter)
+{
+  Module_iter *di= (Module_iter*)iter;
+  Dep_module_key *key_dep= di->key_dep;
+  
+  /* 
+    First, enumerate all unique keys that are 
+    - not yet applicable
+    - have this field as a part of them
+  */
+  while (key_dep && (key_dep->is_applicable() ||
+         !field->part_of_key.is_set(key_dep->keyno)))
+  {
+    key_dep= key_dep->next_table_key;
+  }
+
+  if (key_dep)
+  {
+    di->key_dep= key_dep->next_table_key;
+    return key_dep;
+  }
+  else 
+    di->key_dep= NULL;
+  
+  /*
+    Then walk through [multi]equalities and find those that
+     - depend on this field
+     - and are not bound yet.
+  */
+  uint eq_no= di->equality_no;
+  while (eq_no < dac->n_equality_mods && 
+         (!bitmap_is_set(&dac->expr_deps, bitmap_offset + eq_no) ||
+         dac->equality_mods[eq_no].is_applicable()))
+  {
+    eq_no++;
+  }
+  
+  if (eq_no < dac->n_equality_mods)
+  {
+    di->equality_no= eq_no+1;
+    return &dac->equality_mods[eq_no];
+  }
+  return NULL;
+}
+
+
+/* 
+  Mark one table or the whole join nest as eliminated.
+*/
+
+static void mark_as_eliminated(JOIN *join, TABLE_LIST *tbl)
+{
+  TABLE *table;
+  /*
+    NOTE: there are TABLE_LIST object that have
+    tbl->table!= NULL && tbl->nested_join!=NULL and 
+    tbl->table == tbl->nested_join->join_list->element(..)->table
+  */
+  if (tbl->nested_join)
+  {
+    TABLE_LIST *child;
+    List_iterator<TABLE_LIST> it(tbl->nested_join->join_list);
+    while ((child= it++))
+      mark_as_eliminated(join, child);
+  }
+  else if ((table= tbl->table))
+  {
+    JOIN_TAB *tab= tbl->table->reginfo.join_tab;
+    if (!(join->const_table_map & tab->table->map))
+    {
+      DBUG_PRINT("info", ("Eliminated table %s", table->alias));
+      tab->type= JT_CONST;
+      join->eliminated_tables |= table->map;
+      join->const_table_map|= table->map;
+      set_position(join, join->const_tables++, tab, (KEYUSE*)0);
+    }
+  }
+
+  if (tbl->on_expr)
+    tbl->on_expr->walk(&Item::mark_as_eliminated_processor, FALSE, NULL);
+}
+
+
+#ifndef DBUG_OFF
+/* purecov: begin inspected */
+void Dep_analysis_context::dbug_print_deps()
+{
+  DBUG_ENTER("dbug_print_deps");
+  DBUG_LOCK_FILE;
+  
+  fprintf(DBUG_FILE,"deps {\n");
+  
+  /* Start with printing equalities */
+  for (Dep_module_expr *eq_mod= equality_mods; 
+       eq_mod != equality_mods + n_equality_mods; eq_mod++)
+  {
+    char buf[128];
+    String str(buf, sizeof(buf), &my_charset_bin);
+    str.length(0);
+    eq_mod->expr->print(&str, QT_ORDINARY);
+    if (eq_mod->field)
+    {
+      fprintf(DBUG_FILE, "  equality%ld: %s -> %s.%s\n", 
+              (long)(eq_mod - equality_mods),
+              str.c_ptr(),
+              eq_mod->field->table->table->alias,
+              eq_mod->field->field->field_name);
+    }
+    else
+    {
+      fprintf(DBUG_FILE, "  equality%ld: multi-equality", 
+              (long)(eq_mod - equality_mods));
+    }
+  }
+  fprintf(DBUG_FILE,"\n");
+
+  /* Then tables and their fields */
+  for (uint i=0; i < MAX_TABLES; i++)
+  {
+    Dep_value_table *table_dep;
+    if ((table_dep= table_deps[i]))
+    {
+      /* Print table */
+      fprintf(DBUG_FILE, "  table %s\n", table_dep->table->alias);
+      /* Print fields */
+      for (Dep_value_field *field_dep= table_dep->fields; field_dep; 
+           field_dep= field_dep->next_table_field)
+      {
+        fprintf(DBUG_FILE, "    field %s.%s ->", table_dep->table->alias,
+                field_dep->field->field_name);
+        uint ofs= field_dep->bitmap_offset;
+        for (uint bit= ofs; bit < ofs + n_equality_mods; bit++)
+        {
+          if (bitmap_is_set(&expr_deps, bit))
+            fprintf(DBUG_FILE, " equality%d ", bit - ofs);
+        }
+        fprintf(DBUG_FILE, "\n");
+      }
+    }
+  }
+  fprintf(DBUG_FILE,"\n}\n");
+  DBUG_UNLOCK_FILE;
+  DBUG_VOID_RETURN;
+}
+/* purecov: end */
+
+#endif 
+/**
+  @} (end of group Table_Elimination)
+*/
+
diff --git a/sql/parse_file.cc b/sql/parse_file.cc
index cce0bb262e7..9989bc57b24 100644
--- a/sql/parse_file.cc
+++ b/sql/parse_file.cc
@@ -219,7 +219,7 @@ sql_create_definition_file(const LEX_STRING *dir, const LEX_STRING *file_name,
   File_option *param;
   DBUG_ENTER("sql_create_definition_file");
   DBUG_PRINT("enter", ("Dir: %s, file: %s, base 0x%lx",
-		       dir ? dir->str : "(null)",
+		       dir ? dir->str : "",
                        file_name->str, (ulong) base));
 
   if (dir)
diff --git a/sql/parse_file.h b/sql/parse_file.h
index 1833e90cadd..a075928c6d0 100644
--- a/sql/parse_file.h
+++ b/sql/parse_file.h
@@ -97,12 +97,12 @@ class File_parser: public Sql_alloc
 {
   char *buff, *start, *end;
   LEX_STRING file_type;
-  my_bool content_ok;
+  bool content_ok;
 public:
   File_parser() :buff(0), start(0), end(0), content_ok(0)
     { file_type.str= 0; file_type.length= 0; }
 
-  my_bool ok() { return content_ok; }
+  bool ok() { return content_ok; }
   LEX_STRING *type() { return &file_type; }
   my_bool parse(uchar* base, MEM_ROOT *mem_root,
 		struct File_option *parameters, uint required,
diff --git a/sql/partition_info.cc b/sql/partition_info.cc
index caf28fdd83e..00c03806f77 100644
--- a/sql/partition_info.cc
+++ b/sql/partition_info.cc
@@ -1358,7 +1358,7 @@ void partition_info::print_no_partition_found(TABLE *table_arg)
       if (part_expr->null_value)
         buf_ptr= (char*)"NULL";
       else
-        longlong2str(err_value, buf,
+        longlong10_to_str(err_value, buf,
                      part_expr->unsigned_flag ? 10 : -10);
       dbug_tmp_restore_column_map(table_arg->read_set, old_map);
     }
diff --git a/sql/password.c b/sql/password.c
index b77cb618a46..0e2b454ff4d 100644
--- a/sql/password.c
+++ b/sql/password.c
@@ -70,41 +70,12 @@
 /*
   New (MySQL 3.21+) random generation structure initialization
   SYNOPSIS
-    randominit()
+    my_rnd_init()
     rand_st    OUT  Structure to initialize
     seed1      IN   First initialization parameter
     seed2      IN   Second initialization parameter
 */
 
-void randominit(struct rand_struct *rand_st, ulong seed1, ulong seed2)
-{                                               /* For mysql 3.21.# */
-#ifdef HAVE_purify
-  bzero((char*) rand_st,sizeof(*rand_st));      /* Avoid UMC varnings */
-#endif
-  rand_st->max_value= 0x3FFFFFFFL;
-  rand_st->max_value_dbl=(double) rand_st->max_value;
-  rand_st->seed1=seed1%rand_st->max_value ;
-  rand_st->seed2=seed2%rand_st->max_value;
-}
-
-
-/*
-    Generate random number.
-  SYNOPSIS
-    my_rnd()
-    rand_st    INOUT  Structure used for number generation
-  RETURN VALUE
-    generated pseudo random number
-*/
-
-double my_rnd(struct rand_struct *rand_st)
-{
-  rand_st->seed1=(rand_st->seed1*3+rand_st->seed2) % rand_st->max_value;
-  rand_st->seed2=(rand_st->seed1+rand_st->seed2+33) % rand_st->max_value;
-  return (((double) rand_st->seed1)/rand_st->max_value_dbl);
-}
-
-
 /*
     Generate binary hash from raw text string 
     Used for Pre-4.1 password handling
@@ -184,7 +155,7 @@ void make_scrambled_password_323(char *to, const char *password)
 
 void scramble_323(char *to, const char *message, const char *password)
 {
-  struct rand_struct rand_st;
+  struct my_rnd_struct rand_st;
   ulong hash_pass[2], hash_message[2];
 
   if (password && password[0])
@@ -193,7 +164,7 @@ void scramble_323(char *to, const char *message, const char *password)
     const char *message_end= message + SCRAMBLE_LENGTH_323;
     hash_password(hash_pass,password, (uint) strlen(password));
     hash_password(hash_message, message, SCRAMBLE_LENGTH_323);
-    randominit(&rand_st,hash_pass[0] ^ hash_message[0],
+    my_rnd_init(&rand_st,hash_pass[0] ^ hash_message[0],
                hash_pass[1] ^ hash_message[1]);
     for (; message < message_end; message++)
       *to++= (char) (floor(my_rnd(&rand_st)*31)+64);
@@ -223,16 +194,16 @@ void scramble_323(char *to, const char *message, const char *password)
 */
 
 my_bool
-check_scramble_323(const char *scrambled, const char *message,
+check_scramble_323(const unsigned char *scrambled, const char *message,
                    ulong *hash_pass)
 {
-  struct rand_struct rand_st;
+  struct my_rnd_struct rand_st;
   ulong hash_message[2];
-  char buff[16],*to,extra;                      /* Big enough for check */
-  const char *pos;
+  uchar buff[16],*to,extra;                      /* Big enough for check */
+  const uchar *pos;
 
   hash_password(hash_message, message, SCRAMBLE_LENGTH_323);
-  randominit(&rand_st,hash_pass[0] ^ hash_message[0],
+  my_rnd_init(&rand_st,hash_pass[0] ^ hash_message[0],
              hash_pass[1] ^ hash_message[1]);
   to=buff;
   DBUG_ASSERT(sizeof(buff) > SCRAMBLE_LENGTH_323);
@@ -244,7 +215,7 @@ check_scramble_323(const char *scrambled, const char *message,
   to=buff;
   while (*scrambled)
   {
-    if (*scrambled++ != (char) (*to++ ^ extra))
+    if (*scrambled++ != (uchar) (*to++ ^ extra))
       return 1;                                 /* Wrong password */
   }
   return 0;
@@ -313,7 +284,8 @@ void make_password_from_salt_323(char *to, const ulong *salt)
     rand_st  INOUT structure used for number generation
 */
 
-void create_random_string(char *to, uint length, struct rand_struct *rand_st)
+void create_random_string(char *to, uint length,
+                          struct my_rnd_struct *rand_st)
 {
   char *end= to + length;
   /* Use pointer arithmetics as it is faster way to do so. */
@@ -510,7 +482,7 @@ scramble(char *to, const char *message, const char *password)
 */
 
 my_bool
-check_scramble(const char *scramble_arg, const char *message,
+check_scramble(const uchar *scramble_arg, const char *message,
                const uint8 *hash_stage2)
 {
   SHA1_CONTEXT sha1_context;
@@ -523,7 +495,7 @@ check_scramble(const char *scramble_arg, const char *message,
   mysql_sha1_input(&sha1_context, hash_stage2, SHA1_HASH_SIZE);
   mysql_sha1_result(&sha1_context, buf);
   /* encrypt scramble */
-    my_crypt((char *) buf, buf, (const uchar *) scramble_arg, SCRAMBLE_LENGTH);
+    my_crypt((char *) buf, buf, scramble_arg, SCRAMBLE_LENGTH);
   /* now buf supposedly contains hash_stage1: so we can get hash_stage2 */
   mysql_sha1_reset(&sha1_context);
   mysql_sha1_input(&sha1_context, buf, SHA1_HASH_SIZE);
diff --git a/sql/procedure.h b/sql/procedure.h
index f34eb228f28..5e62f4dd5cc 100644
--- a/sql/procedure.h
+++ b/sql/procedure.h
@@ -16,6 +16,8 @@
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
 
+#ifndef PROCEDURE_INCLUDED
+#define PROCEDURE_INCLUDED
 
 /* When using sql procedures */
 
@@ -53,7 +55,11 @@ public:
   {
     init_make_field(tmp_field,field_type());
   }
-  unsigned int size_of() { return sizeof(*this);}  
+  unsigned int size_of() { return sizeof(*this);}
+  bool check_vcol_func_processor(uchar *int_arg) 
+  {
+    return trace_unsupported_by_check_vcol_func_processor("proc"); 
+  }
 };
 
 class Item_proc_real :public Item_proc
diff --git a/sql/protocol.cc b/sql/protocol.cc
index 953656d3a4f..1b5eaafbe7f 100644
--- a/sql/protocol.cc
+++ b/sql/protocol.cc
@@ -62,8 +62,6 @@ bool Protocol_binary::net_store_data(const uchar *from, size_t length)
 }
 
 
-
-
 /*
   net_store_data() - extended version with character set conversion.
   
@@ -83,6 +81,7 @@ bool Protocol::net_store_data(const uchar *from, size_t length,
   uint dummy_errors;
   /* Calculate maxumum possible result length */
   uint conv_length= to_cs->mbmaxlen * length / from_cs->mbminlen;
+
   if (conv_length > 250)
   {
     /*
@@ -349,24 +348,6 @@ static bool write_eof_packet(THD *thd, NET *net,
 }
 
 /**
-  Please client to send scrambled_password in old format.
-     
-  @param thd thread handle
-
-  @retval
-    0  ok
-  @retval
-   !0  error
-*/
-
-bool send_old_password_request(THD *thd)
-{
-  NET *net= &thd->net;
-  return my_net_write(net, eof_buff, 1) || net_flush(net);
-}
-
-
-/**
   @param thd Thread handler
   @param sql_errno The error code to send
   @param err A pointer to the error message
@@ -546,7 +527,6 @@ void Protocol::end_statement()
   DBUG_VOID_RETURN;
 }
 
-
 /**
   A default implementation of "OK" packet response to the client.
 
@@ -651,7 +631,8 @@ void Protocol::init(THD *thd_arg)
 
 void Protocol::end_partial_result_set(THD *thd_arg)
 {
-  net_send_eof(thd_arg, thd_arg->server_status, 0 /* no warnings, we're inside SP */);
+  net_send_eof(thd_arg, thd_arg->server_status,
+               0 /* no warnings, we're inside SP */);
 }
 
 
@@ -1001,8 +982,8 @@ bool Protocol_text::store(const char *from, size_t length,
 {
   CHARSET_INFO *tocs= this->thd->variables.character_set_results;
 #ifndef DBUG_OFF
-  DBUG_PRINT("info", ("Protocol_text::store field %u (%u): %s", field_pos,
-                      field_count, (length == 0? "" : from)));
+  DBUG_PRINT("info", ("Protocol_text::store field %u (%u): %.*s", field_pos,
+                      field_count, (int) length, (length == 0? "" : from)));
   DBUG_ASSERT(field_pos < field_count);
   DBUG_ASSERT(field_types == 0 ||
 	      field_types[field_pos] == MYSQL_TYPE_DECIMAL ||
@@ -1155,12 +1136,16 @@ bool Protocol_text::store(MYSQL_TIME *tm)
 #endif
   char buff[40];
   uint length;
-  length= sprintf(buff, "%04d-%02d-%02d %02d:%02d:%02d",
-                  (int) tm->year, (int) tm->month,
-                  (int) tm->day, (int) tm->hour,
-                  (int) tm->minute, (int) tm->second);
+  length= my_sprintf(buff,(buff, "%04d-%02d-%02d %02d:%02d:%02d",
+			   (int) tm->year,
+			   (int) tm->month,
+			   (int) tm->day,
+			   (int) tm->hour,
+			   (int) tm->minute,
+			   (int) tm->second));
   if (tm->second_part)
-    length+= sprintf(buff+length, ".%06d", (int) tm->second_part);
+    length+= my_sprintf(buff+length,(buff+length, ".%06d",
+                                     (int)tm->second_part));
   return net_store_data((uchar*) buff, length);
 }
 
@@ -1194,11 +1179,13 @@ bool Protocol_text::store_time(MYSQL_TIME *tm)
   char buff[40];
   uint length;
   uint day= (tm->year || tm->month) ? 0 : tm->day;
-  length= sprintf(buff, "%s%02ld:%02d:%02d", tm->neg ? "-" : "",
-                  (long) day*24L+(long) tm->hour, (int) tm->minute,
-                  (int) tm->second);
+  length= my_sprintf(buff,(buff, "%s%02ld:%02d:%02d",
+			   tm->neg ? "-" : "",
+			   (long) day*24L+(long) tm->hour,
+			   (int) tm->minute,
+			   (int) tm->second));
   if (tm->second_part)
-    length+= sprintf(buff+length, ".%06d", (int) tm->second_part);
+    length+= my_sprintf(buff+length,(buff+length, ".%06d", (int)tm->second_part));
   return net_store_data((uchar*) buff, length);
 }
 
diff --git a/sql/protocol.h b/sql/protocol.h
index f661c7663e5..1c86c6d6c49 100644
--- a/sql/protocol.h
+++ b/sql/protocol.h
@@ -204,7 +204,6 @@ public:
 void send_warning(THD *thd, uint sql_errno, const char *err=0);
 bool net_send_error(THD *thd, uint sql_errno, const char *err,
                     const char* sqlstate);
-bool send_old_password_request(THD *thd);
 uchar *net_store_data(uchar *to,const uchar *from, size_t length);
 uchar *net_store_data(uchar *to,int32 from);
 uchar *net_store_data(uchar *to,longlong from);
diff --git a/sql/records.cc b/sql/records.cc
index ccacdc33b36..a7af42fc86a 100644
--- a/sql/records.cc
+++ b/sql/records.cc
@@ -169,7 +169,8 @@ void init_read_record_idx(READ_RECORD *info, THD *thd, TABLE *table,
     This is the most basic access method of a table using rnd_init,
     rnd_next and rnd_end. No indexes are used.
 */
-void init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
+
+bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
 		      SQL_SELECT *select,
 		      int use_record_cache, bool print_error, 
                       bool disable_rr_cache)
@@ -208,7 +209,8 @@ void init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
     tempfile= &select->file;
   else
     tempfile= table->sort.io_cache;
-  if (tempfile && my_b_inited(tempfile)) // Test if ref-records was used
+  if (tempfile && my_b_inited(tempfile) &&
+      !(select && select->quick)) 
   {
     DBUG_PRINT("info",("using rr_from_tempfile"));
     info->read_record= (table->sort.addon_field ?
@@ -217,7 +219,8 @@ void init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
     reinit_io_cache(info->io_cache,READ_CACHE,0L,0,0);
     info->ref_pos=table->file->ref;
     if (!table->file->inited)
-      table->file->ha_rnd_init(0);
+      if (table->file->ha_rnd_init_with_error(0))
+        DBUG_RETURN(1);
 
     /*
       table->sort.addon_field is checked because if we use addon fields,
@@ -226,7 +229,6 @@ void init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
     */
     if (!disable_rr_cache &&
         !table->sort.addon_field &&
-        ! (specialflag & SPECIAL_SAFE_MODE) &&
 	thd->variables.read_rnd_buff_size &&
 	!(table->file->ha_table_flags() & HA_FAST_KEY_READ) &&
 	(table->db_stat & HA_READ_ONLY ||
@@ -254,7 +256,8 @@ void init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
   else if (table->sort.record_pointers)
   {
     DBUG_PRINT("info",("using record_pointers"));
-    table->file->ha_rnd_init(0);
+    if (table->file->ha_rnd_init_with_error(0))
+      DBUG_RETURN(1);
     info->cache_pos=table->sort.record_pointers;
     info->cache_end=info->cache_pos+ 
                     table->sort.found_records*info->ref_length;
@@ -265,7 +268,8 @@ void init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
   {
     DBUG_PRINT("info",("using rr_sequential"));
     info->read_record=rr_sequential;
-    table->file->ha_rnd_init(1);
+    if (table->file->ha_rnd_init_with_error(1))
+      DBUG_RETURN(1);
     /* We can use record cache if we don't update dynamic length tables */
     if (!table->no_cache &&
 	(use_record_cache > 0 ||
@@ -284,7 +288,7 @@ void init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
       !table->file->pushed_cond)
     table->file->cond_push(select->cond);
 
-  DBUG_VOID_RETURN;
+  DBUG_RETURN(0);
 } /* init_read_record */
 
 
@@ -340,6 +344,7 @@ static int rr_quick(READ_RECORD *info)
       break;
     }
   }
+  update_virtual_fields(info->thd, info->table);
   return tmp;
 }
 
@@ -359,7 +364,7 @@ static int rr_quick(READ_RECORD *info)
 
 static int rr_index_first(READ_RECORD *info)
 {
-  int tmp= info->file->index_first(info->record);
+  int tmp= info->file->ha_index_first(info->record);
   info->read_record= rr_index;
   if (tmp)
     tmp= rr_handle_error(info, tmp);
@@ -408,7 +413,7 @@ static int rr_index_last(READ_RECORD *info)
 
 static int rr_index(READ_RECORD *info)
 {
-  int tmp= info->file->index_next(info->record);
+  int tmp= info->file->ha_index_next(info->record);
   if (tmp)
     tmp= rr_handle_error(info, tmp);
   return tmp;
@@ -443,7 +448,7 @@ static int rr_index_desc(READ_RECORD *info)
 int rr_sequential(READ_RECORD *info)
 {
   int tmp;
-  while ((tmp=info->file->rnd_next(info->record)))
+  while ((tmp= info->file->ha_rnd_next(info->record)))
   {
     /*
       rnd_next can return RECORD_DELETED for MyISAM when one thread is
@@ -455,6 +460,8 @@ int rr_sequential(READ_RECORD *info)
       break;
     }
   }
+  if (!tmp)
+    update_virtual_fields(info->thd, info->table);
   return tmp;
 }
 
@@ -466,7 +473,7 @@ static int rr_from_tempfile(READ_RECORD *info)
   {
     if (my_b_read(info->io_cache,info->ref_pos,info->ref_length))
       return -1;					/* End of file */
-    if (!(tmp=info->file->rnd_pos(info->record,info->ref_pos)))
+    if (!(tmp= info->file->ha_rnd_pos(info->record,info->ref_pos)))
       break;
     /* The following is extremely unlikely to happen */
     if (tmp == HA_ERR_RECORD_DELETED ||
@@ -517,7 +524,7 @@ static int rr_from_pointers(READ_RECORD *info)
     cache_pos= info->cache_pos;
     info->cache_pos+= info->ref_length;
 
-    if (!(tmp=info->file->rnd_pos(info->record,cache_pos)))
+    if (!(tmp= info->file->ha_rnd_pos(info->record,cache_pos)))
       break;
 
     /* The following is extremely unlikely to happen */
@@ -580,7 +587,7 @@ static int init_rr_cache(THD *thd, READ_RECORD *info)
 					   info->struct_length+1,
 					   MYF(0))))
     DBUG_RETURN(1);
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
   // Avoid warnings in qsort
   bzero(info->cache,rec_cache_size+info->cache_records* info->struct_length+1);
 #endif
@@ -650,7 +657,7 @@ static int rr_from_cache(READ_RECORD *info)
       record=uint3korr(position);
       position+=3;
       record_pos=info->cache+record*info->reclength;
-      if ((error=(int16) info->file->rnd_pos(record_pos,info->ref_pos)))
+      if ((error=(int16) info->file->ha_rnd_pos(record_pos,info->ref_pos)))
       {
 	record_pos[info->error_offset]=1;
 	shortstore(record_pos,error);
diff --git a/sql/records.h b/sql/records.h
index 95464a11b4b..fa9bfe3df59 100644
--- a/sql/records.h
+++ b/sql/records.h
@@ -25,6 +25,7 @@ class handler;
 struct TABLE;
 class THD;
 class SQL_SELECT;
+class Copy_field;
 
 /**
   A context for reading through a single table using a chosen access method:
@@ -63,11 +64,17 @@ struct READ_RECORD
   struct st_io_cache *io_cache;
   bool print_error, ignore_not_found_rows;
 
+  /* 
+    SJ-Materialization runtime may need to read fields from the materialized
+    table and unpack them into original table fields:
+  */
+  Copy_field *copy_field;
+  Copy_field *copy_field_end;
 public:
   READ_RECORD() {}
 };
 
-void init_read_record(READ_RECORD *info, THD *thd, TABLE *reg_form,
+bool init_read_record(READ_RECORD *info, THD *thd, TABLE *reg_form,
 		      SQL_SELECT *select, int use_record_cache,
                       bool print_errors, bool disable_rr_cache);
 void init_read_record_idx(READ_RECORD *info, THD *thd, TABLE *table,
diff --git a/sql/rpl_filter.cc b/sql/rpl_filter.cc
index 42a9a034efd..6dfaa1bf253 100644
--- a/sql/rpl_filter.cc
+++ b/sql/rpl_filter.cc
@@ -48,6 +48,7 @@ Rpl_filter::~Rpl_filter()
 }
 
 
+#ifndef MYSQL_CLIENT
 /*
   Returns true if table should be logged/replicated 
 
@@ -94,7 +95,7 @@ Rpl_filter::tables_ok(const char* db, TABLE_LIST* tables)
   
   for (; tables; tables= tables->next_global)
   {
-    char hash_key[2*NAME_LEN+2];
+    char hash_key[SAFE_NAME_LEN*2+2];
     char *end;
     uint len;
 
@@ -132,6 +133,7 @@ Rpl_filter::tables_ok(const char* db, TABLE_LIST* tables)
               !do_table_inited && !wild_do_table_inited);
 }
 
+#endif
 
 /*
   Checks whether a db matches some do_db and ignore_db rules
@@ -158,8 +160,10 @@ Rpl_filter::db_ok(const char* db)
     and db was not selected, do not replicate.
   */
   if (!db)
+  {
+    DBUG_PRINT("exit", ("Don't replicate"));
     DBUG_RETURN(0);
-
+  }
   if (!do_db.is_empty()) // if the do's are not empty
   {
     I_List_iterator<i_string> it(do_db);
@@ -170,6 +174,7 @@ Rpl_filter::db_ok(const char* db)
       if (!strcmp(tmp->ptr, db))
 	DBUG_RETURN(1); // match
     }
+    DBUG_PRINT("exit", ("Don't replicate"));
     DBUG_RETURN(0);
   }
   else // there are some elements in the don't, otherwise we cannot get here
@@ -180,7 +185,10 @@ Rpl_filter::db_ok(const char* db)
     while ((tmp=it++))
     {
       if (!strcmp(tmp->ptr, db))
+      {
+        DBUG_PRINT("exit", ("Don't replicate"));
 	DBUG_RETURN(0); // match
+      }
     }
     DBUG_RETURN(1);
   }
@@ -222,7 +230,7 @@ Rpl_filter::db_ok_with_wild_table(const char *db)
 {
   DBUG_ENTER("Rpl_filter::db_ok_with_wild_table");
 
-  char hash_key[NAME_LEN+2];
+  char hash_key[SAFE_NAME_LEN+2];
   char *end;
   int len;
   end= strmov(hash_key, db);
@@ -517,6 +525,13 @@ Rpl_filter::get_wild_ignore_table(String* str)
 }
 
 
+bool
+Rpl_filter::rewrite_db_is_empty()
+{
+  return rewrite_db.is_empty();
+}
+
+
 const char*
 Rpl_filter::get_rewrite_db(const char* db, size_t *new_len)
 {
diff --git a/sql/rpl_filter.h b/sql/rpl_filter.h
index 20f5ce8efc4..3a261db6754 100644
--- a/sql/rpl_filter.h
+++ b/sql/rpl_filter.h
@@ -48,7 +48,9 @@ public:
  
   /* Checks - returns true if ok to replicate/log */
 
-  bool tables_ok(const char* db, TABLE_LIST* tables);
+#ifndef MYSQL_CLIENT
+  bool tables_ok(const char* db, TABLE_LIST *tables);
+#endif 
   bool db_ok(const char* db);
   bool db_ok_with_wild_table(const char *db);
 
@@ -75,6 +77,7 @@ public:
   void get_wild_do_table(String* str);
   void get_wild_ignore_table(String* str);
 
+  bool rewrite_db_is_empty();
   const char* get_rewrite_db(const char* db, size_t *new_len);
 
   I_List<i_string>* get_do_db();
diff --git a/sql/rpl_injector.cc b/sql/rpl_injector.cc
index 75ccb617e9e..43f8ddbaf43 100644
--- a/sql/rpl_injector.cc
+++ b/sql/rpl_injector.cc
@@ -40,6 +40,7 @@ injector::transaction::transaction(MYSQL_BIN_LOG *log, THD *thd)
   m_start_pos.m_file_name= my_strdup(log_info.log_file_name, MYF(0));
   m_start_pos.m_file_pos= log_info.pos;
 
+  m_thd->lex->start_transaction_opt= 0; /* for begin_trans() */
   trans_begin(m_thd);
 }
 
diff --git a/sql/rpl_injector.h b/sql/rpl_injector.h
index 36cb6aaa184..3cb4fa8301e 100644
--- a/sql/rpl_injector.h
+++ b/sql/rpl_injector.h
@@ -290,7 +290,7 @@ public:
           "START_STATE", "TABLE_STATE", "ROW_STATE", "STATE_COUNT"
         };
 
-        DBUG_ASSERT(0 <= target_state && target_state <= STATE_COUNT);
+        DBUG_ASSERT(target_state <= STATE_COUNT);
         DBUG_PRINT("info", ("In state %s", state_name[m_state]));
 #endif
 
diff --git a/sql/rpl_mi.cc b/sql/rpl_mi.cc
index cda876ee764..646593ed98f 100644
--- a/sql/rpl_mi.cc
+++ b/sql/rpl_mi.cc
@@ -50,6 +50,14 @@ Master_info::Master_info(bool is_slave_recovery)
   mysql_cond_init(key_master_info_data_cond, &data_cond, NULL);
   mysql_cond_init(key_master_info_start_cond, &start_cond, NULL);
   mysql_cond_init(key_master_info_stop_cond, &stop_cond, NULL);
+
+#ifdef SAFE_MUTEX
+  /* Define mutex order for locks to find wrong lock usage */
+  mysql_mutex_lock(&data_lock);
+  mysql_mutex_lock(&run_lock);
+  mysql_mutex_unlock(&run_lock);
+  mysql_mutex_unlock(&data_lock);
+#endif
 }
 
 Master_info::~Master_info()
diff --git a/sql/rpl_mi.h b/sql/rpl_mi.h
index c3734fdf59e..22ac6527c18 100644
--- a/sql/rpl_mi.h
+++ b/sql/rpl_mi.h
@@ -68,10 +68,10 @@ class Master_info : public Slave_reporting_capability
   char host[HOSTNAME_LENGTH+1];
   char user[USERNAME_LENGTH+1];
   char password[MAX_PASSWORD_LENGTH+1];
-  my_bool ssl; // enables use of SSL connection if true
+  bool ssl; // enables use of SSL connection if true
   char ssl_ca[FN_REFLEN], ssl_capath[FN_REFLEN], ssl_cert[FN_REFLEN];
   char ssl_cipher[FN_REFLEN], ssl_key[FN_REFLEN];
-  my_bool ssl_verify_server_cert;
+  bool ssl_verify_server_cert;
 
   my_off_t master_log_pos;
   File fd; // we keep the file open, so we need to remember the file pointer
diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc
index af9b452acd8..41550e96377 100644
--- a/sql/rpl_rli.cc
+++ b/sql/rpl_rli.cc
@@ -41,7 +41,7 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery)
    sync_counter(0), is_relay_log_recovery(is_slave_recovery),
    save_temporary_tables(0), cur_log_old_open_count(0), group_relay_log_pos(0), 
    event_relay_log_pos(0),
-#if HAVE_purify
+#if HAVE_valgrind
    is_fake(FALSE),
 #endif
    group_master_log_pos(0), log_space_total(0), ignore_log_space_limit(0),
diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h
index bf8381933c0..c3cfe1cf4a9 100644
--- a/sql/rpl_rli.h
+++ b/sql/rpl_rli.h
@@ -170,7 +170,7 @@ public:
   ulonglong event_relay_log_pos;
   ulonglong future_event_relay_log_pos;
 
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
   bool is_fake; /* Mark that this is a fake relay log info structure */
 #endif
 
diff --git a/sql/scheduler.cc b/sql/scheduler.cc
index d61a452b99e..4a5a99bf40e 100644
--- a/sql/scheduler.cc
+++ b/sql/scheduler.cc
@@ -27,6 +27,7 @@
 #include "sql_connect.h"         // init_new_connection_handler_thread
 #include "scheduler.h"
 #include "sql_callback.h"
+#include "sql_audit.h"
 
 /*
   End connection, in case when we are using 'no-threads'
@@ -42,6 +43,7 @@ static bool no_threads_end(THD *thd, bool put_in_cache)
 static scheduler_functions one_thread_scheduler_functions=
 {
   1,                                     // max_threads
+  NULL, NULL,
   NULL,                                  // init
   init_new_connection_handler_thread,    // init_new_connection_thread
 #ifndef EMBEDDED_LIBRARY
@@ -60,6 +62,7 @@ static scheduler_functions one_thread_scheduler_functions=
 static scheduler_functions one_thread_per_connection_scheduler_functions=
 {
   0,                                     // max_threads
+  NULL, NULL,
   NULL,                                  // init
   init_new_connection_handler_thread,    // init_new_connection_thread
   create_thread_to_handle_connection,    // add_connection
@@ -71,9 +74,6 @@ static scheduler_functions one_thread_per_connection_scheduler_functions=
 };
 #endif  // EMBEDDED_LIBRARY
 
-
-scheduler_functions *thread_scheduler= NULL;
-
 /** @internal
   Helper functions to allow mysys to call the thread scheduler when
   waiting for locks.
@@ -81,12 +81,16 @@ scheduler_functions *thread_scheduler= NULL;
 
 /**@{*/
 static void scheduler_wait_begin(void) {
-  MYSQL_CALLBACK(thread_scheduler,
-                 thd_wait_begin, (current_thd, THD_WAIT_ROW_TABLE_LOCK));
+  THD *thd=current_thd;
+  scheduler_functions *func= thd->scheduler;
+  MYSQL_CALLBACK(func,
+                 thd_wait_begin, (thd, THD_WAIT_ROW_TABLE_LOCK));
 }
 
 static void scheduler_wait_end(void) {
-  MYSQL_CALLBACK(thread_scheduler, thd_wait_end, (current_thd));
+  THD *thd=current_thd;
+  scheduler_functions *func= thd->scheduler;
+  MYSQL_CALLBACK(func, thd_wait_end, (thd));
 }
 /**@}*/
 
@@ -98,7 +102,8 @@ static void scheduler_wait_end(void) {
   mysqld.cc, so this init function will always be called.
  */
 static void scheduler_init() {
-  thr_set_lock_wait_callback(scheduler_wait_begin, scheduler_wait_end);
+  mysys_var->scheduler_before_lock_wait= &scheduler_wait_begin;
+  mysys_var->scheduler_after_lock_wait= &scheduler_wait_end;
 }
 
 /*
@@ -106,11 +111,15 @@ static void scheduler_init() {
 */
 
 #ifndef EMBEDDED_LIBRARY
-void one_thread_per_connection_scheduler()
+scheduler_functions *one_thread_per_connection_scheduler(
+    ulong *arg_max_connections,
+    uint *arg_connection_count)
 {
   scheduler_init();
-  one_thread_per_connection_scheduler_functions.max_threads= max_connections;
-  thread_scheduler= &one_thread_per_connection_scheduler_functions;
+  one_thread_per_connection_scheduler_functions.max_threads= *arg_max_connections + 1;
+  one_thread_per_connection_scheduler_functions.max_connections= arg_max_connections;
+  one_thread_per_connection_scheduler_functions.connection_count= arg_connection_count;
+  return &one_thread_per_connection_scheduler_functions;
 }
 #endif
 
@@ -118,10 +127,10 @@ void one_thread_per_connection_scheduler()
   Initailize scheduler for --thread-handling=no-threads
 */
 
-void one_thread_scheduler()
+scheduler_functions *one_thread_scheduler()
 {
   scheduler_init();
-  thread_scheduler= &one_thread_scheduler_functions;
+  return &one_thread_scheduler_functions;
 }
 
 
@@ -148,6 +157,12 @@ thd_scheduler::~thd_scheduler()
 {
 }
 
+/*
+  no pluggable schedulers in mariadb.
+  when we'll want it, we'll do it properly
+*/
+#if 0
+
 static scheduler_functions *saved_thread_scheduler;
 static uint saved_thread_handling;
 
@@ -181,6 +196,13 @@ int my_thread_scheduler_reset()
   saved_thread_scheduler= 0;
   return 0;
 }
+#else
+extern "C" int my_thread_scheduler_set(scheduler_functions *scheduler)
+{ return 1; }
 
+extern "C" int my_thread_scheduler_reset()
+{ return 1; }
+#endif
 
+#warning restore libevent
 
diff --git a/sql/scheduler.h b/sql/scheduler.h
index 40f0e28bc2c..32ba3e63953 100644
--- a/sql/scheduler.h
+++ b/sql/scheduler.h
@@ -30,7 +30,8 @@ class THD;
 
 struct scheduler_functions
 {
-  uint max_threads;
+  uint max_threads, *connection_count;
+  ulong *max_connections;
   bool (*init)(void);
   bool (*init_new_connection_thread)(void);
   void (*add_connection)(THD *thd);
@@ -62,8 +63,8 @@ enum scheduler_types
   SCHEDULER_TYPES_COUNT
 };
 
-void one_thread_per_connection_scheduler();
-void one_thread_scheduler();
+scheduler_functions *one_thread_per_connection_scheduler(ulong *, uint *);
+scheduler_functions *one_thread_scheduler();
 
 enum pool_command_op
 {
@@ -99,6 +100,4 @@ public:
   ~thd_scheduler();
 };
 
-extern scheduler_functions *thread_scheduler;
-
 #endif
diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt
index 9e7fdbfeae5..06f8ca5e507 100644
--- a/sql/share/errmsg-utf8.txt
+++ b/sql/share/errmsg-utf8.txt
@@ -2044,7 +2044,7 @@ ER_BLOBS_AND_NO_TERMINATED 42000 S1009
         ukr "Не можна використовувати сталу довжину строки з BLOB. Зкористайтеся 'fields terminated by'"
 ER_TEXTFILE_NOT_READABLE  
         cze "Soubor '%-.128s' mus-Bí být v adresáři databáze nebo čitelný pro všechny"
-        dan "Filen '%-.128s' skal være i database-folderen og kunne læses af alle"
+        dan "Filen '%-.128s' skal være i database-folderen, eller kunne læses af alle"
         nla "Het bestand '%-.128s' dient in de database directory voor the komen of leesbaar voor iedereen te zijn."
         eng "The file '%-.128s' must be in the database directory or be readable by all"
         jps "ファイル '%-.128s' は databse の directory にあるか全てのユーザーが読めるように許可されていなければなりません.",
@@ -3173,22 +3173,22 @@ ER_CANT_CREATE_THREAD
         swe "Kan inte skapa en ny tråd (errno %d)"
         ukr "Не можу створити нову гілку (помилка %d). Якщо ви не використали усю пам'ять, то прочитайте документацію до вашої ОС - можливо це помилка ОС"
 ER_WRONG_VALUE_COUNT_ON_ROW 21S01 
-        cze "Po-Bčet sloupců neodpovídá počtu hodnot na řádku %ld"
-        dan "Kolonne antallet stemmer ikke overens med antallet af værdier i post %ld"
-        nla "Kolom aantal komt niet overeen met waarde aantal in rij %ld"
-        eng "Column count doesn't match value count at row %ld"
-        est "Tulpade hulk erineb väärtuste hulgast real %ld"
-        ger "Anzahl der Felder stimmt nicht mit der Anzahl der Werte in Zeile %ld überein"
-        hun "Az oszlopban talalhato ertek nem egyezik meg a %ld sorban szamitott ertekkel"
-        ita "Il numero delle colonne non corrisponde al conteggio alla riga %ld"
-        kor "Row %ld에서 칼럼 카운트와 value 카운터와 일치하지 않습니다."
-        por "Contagem de colunas não confere com a contagem de valores na linha %ld"
-        rum "Numarul de coloane nu corespunde cu numarul de valori la linia %ld"
-        rus "Количество столбцов не совпадает с количеством значений в записи %ld"
-        serbian "Broj kolona ne odgovara broju vrednosti u slogu %ld"
-        spa "El número de columnas no corresponde al número en la línea %ld"
-        swe "Antalet kolumner motsvarar inte antalet värden på rad: %ld"
-        ukr "Кількість стовбців не співпадає з кількістю значень у строці %ld"
+        cze "Po-Bčet sloupců neodpovídá počtu hodnot na řádku %lu"
+        dan "Kolonne antallet stemmer ikke overens med antallet af værdier i post %lu"
+        nla "Kolom aantal komt niet overeen met waarde aantal in rij %lu"
+        eng "Column count doesn't match value count at row %lu"
+        est "Tulpade hulk erineb väärtuste hulgast real %lu"
+        ger "Anzahl der Felder stimmt nicht mit der Anzahl der Werte in Zeile %lu überein"
+        hun "Az oszlopban talalhato ertek nem egyezik meg a %lu sorban szamitott ertekkel"
+        ita "Il numero delle colonne non corrisponde al conteggio alla riga %lu"
+        kor "Row %lu에서 칼럼 카운트와 value 카운터와 일치하지 않습니다."
+        por "Contagem de colunas não confere com a contagem de valores na linha %lu"
+        rum "Numarul de coloane nu corespunde cu numarul de valori la linia %lu"
+        rus "Количество столбцов не совпадает с количеством значений в записи %lu"
+        serbian "Broj kolona ne odgovara broju vrednosti u slogu %lu"
+        spa "El número de columnas no corresponde al número en la línea %lu"
+        swe "Antalet kolumner motsvarar inte antalet värden på rad: %lu"
+        ukr "Кількість стовбців не співпадає з кількістю значень у строці %lu"
 ER_CANT_REOPEN_TABLE  
         cze "Nemohu znovuotev-Břít tabulku: '%-.192s"
         dan "Kan ikke genåbne tabel '%-.192s"
@@ -4881,29 +4881,29 @@ ER_ZLIB_Z_DATA_ERROR
 ER_CUT_VALUE_GROUP_CONCAT  
         eng "Row %u was cut by GROUP_CONCAT()"
 ER_WARN_TOO_FEW_RECORDS 01000 
-        eng "Row %ld doesn't contain data for all columns"
-        ger "Zeile %ld enthält nicht für alle Felder Daten"
-        nla "Rij %ld bevat niet de data voor alle kolommen"
-        por "Conta de registro é menor que a conta de coluna na linha %ld"
-        spa "Línea %ld no contiene datos para todas las columnas"
+        eng "Row %lu doesn't contain data for all columns"
+        ger "Zeile %lu enthält nicht für alle Felder Daten"
+        nla "Rij %lu bevat niet de data voor alle kolommen"
+        por "Conta de registro é menor que a conta de coluna na linha %lu"
+        spa "Línea %lu no contiene datos para todas las columnas"
 ER_WARN_TOO_MANY_RECORDS 01000 
-        eng "Row %ld was truncated; it contained more data than there were input columns"
-        ger "Zeile %ld gekürzt, die Zeile enthielt mehr Daten, als es Eingabefelder gibt"
-        nla "Regel %ld ingekort, bevatte meer data dan invoer kolommen"
-        por "Conta de registro é maior que a conta de coluna na linha %ld"
-        spa "Línea %ld fué truncada; La misma contine mas datos que las que existen en las columnas de entrada"
+        eng "Row %lu was truncated; it contained more data than there were input columns"
+        ger "Zeile %lu gekürzt, die Zeile enthielt mehr Daten, als es Eingabefelder gibt"
+        nla "Regel %lu ingekort, bevatte meer data dan invoer kolommen"
+        por "Conta de registro é maior que a conta de coluna na linha %lu"
+        spa "Línea %lu fué truncada; La misma contine mas datos que las que existen en las columnas de entrada"
 ER_WARN_NULL_TO_NOTNULL 22004 
-        eng "Column set to default value; NULL supplied to NOT NULL column '%s' at row %ld"
-        ger "Feld auf Vorgabewert gesetzt, da NULL für NOT-NULL-Feld '%s' in Zeile %ld angegeben"
-        por "Dado truncado, NULL fornecido para NOT NULL coluna '%s' na linha %ld"
-        spa "Datos truncado, NULL suministrado para NOT NULL columna '%s' en la línea %ld"
+        eng "Column set to default value; NULL supplied to NOT NULL column '%s' at row %lu"
+        ger "Feld auf Vorgabewert gesetzt, da NULL für NOT-NULL-Feld '%s' in Zeile %lu angegeben"
+        por "Dado truncado, NULL fornecido para NOT NULL coluna '%s' na linha %lu"
+        spa "Datos truncado, NULL suministrado para NOT NULL columna '%s' en la línea %lu"
 ER_WARN_DATA_OUT_OF_RANGE 22003 
-        eng "Out of range value for column '%s' at row %ld"
+        eng "Out of range value for column '%s' at row %lu"
 WARN_DATA_TRUNCATED 01000 
-        eng "Data truncated for column '%s' at row %ld"
-        ger "Daten abgeschnitten für Feld '%s' in Zeile %ld"
-        por "Dado truncado para coluna '%s' na linha %ld"
-        spa "Datos truncados para columna '%s' en la línea %ld"
+        eng "Data truncated for column '%s' at row %lu"
+        ger "Daten abgeschnitten für Feld '%s' in Zeile %lu"
+        por "Dado truncado para coluna '%s' na linha %lu"
+        spa "Datos truncados para columna '%s' en la línea %lu"
 ER_WARN_USING_OTHER_HANDLER  
         eng "Using storage engine %s for table '%s'"
         ger "Für Tabelle '%s' wird Speicher-Engine %s benutzt"
@@ -5082,8 +5082,8 @@ ER_UNKNOWN_TIME_ZONE
         eng "Unknown or incorrect time zone: '%-.64s'"
         ger "Unbekannte oder falsche Zeitzone: '%-.64s'"
 ER_WARN_INVALID_TIMESTAMP  
-        eng "Invalid TIMESTAMP value in column '%s' at row %ld"
-        ger "Ungültiger TIMESTAMP-Wert in Feld '%s', Zeile %ld"
+        eng "Invalid TIMESTAMP value in column '%s' at row %lu"
+        ger "Ungültiger TIMESTAMP-Wert in Feld '%s', Zeile %lu"
 ER_INVALID_CHARACTER_STRING  
         eng "Invalid %s character string: '%.64s'"
         ger "Ungültiger %s-Zeichen-String: '%.64s'"
@@ -5314,8 +5314,8 @@ ER_DIVISION_BY_ZERO 22012
         eng "Division by 0"
         ger "Division durch 0"
 ER_TRUNCATED_WRONG_VALUE_FOR_FIELD  
-        eng "Incorrect %-.32s value: '%-.128s' for column '%.192s' at row %ld"
-        ger "Falscher %-.32s-Wert: '%-.128s' für Feld '%.192s' in Zeile %ld"
+        eng "Incorrect %-.32s value: '%-.128s' for column '%.192s' at row %lu"
+        ger "Falscher %-.32s-Wert: '%-.128s' für Feld '%.192s' in Zeile %lu"
 ER_ILLEGAL_VALUE_FOR_TYPE 22007 
         eng "Illegal %s '%-.192s' value found during parsing"
         ger "Nicht zulässiger %s-Wert '%-.192s' beim Parsen gefunden"
@@ -5448,8 +5448,8 @@ ER_PROC_AUTO_REVOKE_FAIL
         eng "Failed to revoke all privileges to dropped routine"
         ger "Rücknahme aller Rechte für die gelöschte Routine fehlgeschlagen"
 ER_DATA_TOO_LONG 22001
-        eng "Data too long for column '%s' at row %ld"
-        ger "Daten zu lang für Feld '%s' in Zeile %ld"
+        eng "Data too long for column '%s' at row %lu"
+        ger "Daten zu lang für Feld '%s' in Zeile %lu"
 ER_SP_BAD_SQLSTATE 42000
         eng "Bad SQLSTATE: '%s'"
         ger "Ungültiger SQLSTATE: '%s'"
@@ -6346,3 +6346,30 @@ ER_STORED_FUNCTION_PREVENTS_SWITCH_SQL_LOG_BIN
 ER_FAILED_READ_FROM_PAR_FILE
   eng "Failed to read from the .par file"
   swe "Misslyckades läsa från .par filen"
+
+ER_VCOL_BASED_ON_VCOL
+  eng "A computed column cannot be based on a computed column"
+ER_VIRTUAL_COLUMN_FUNCTION_IS_NOT_ALLOWED
+  eng "Function or expression is not allowed for column '%s'"
+ER_DATA_CONVERSION_ERROR_FOR_VIRTUAL_COLUMN
+  eng "Generated value for computed column '%s' cannot be converted to type '%s'"
+ER_PRIMARY_KEY_BASED_ON_VIRTUAL_COLUMN
+  eng "Primary key cannot be defined upon a computed column"
+ER_KEY_BASED_ON_GENERATED_VIRTUAL_COLUMN
+  eng "Key/Index cannot be defined on a non-stored computed column"
+ER_WRONG_FK_OPTION_FOR_VIRTUAL_COLUMN
+  eng "Cannot define foreign key with %s clause on a computed column"
+ER_WARNING_NON_DEFAULT_VALUE_FOR_VIRTUAL_COLUMN
+  eng "The value specified for computed column '%s' in table '%s' ignored"
+ER_UNSUPPORTED_ACTION_ON_VIRTUAL_COLUMN
+  eng "'%s' is not yet supported for computed columns"
+ER_CONST_EXPR_IN_VCOL
+  eng "Constant expression in computed column function is not allowed"
+ER_ROW_EXPR_FOR_VCOL
+  eng "Expression for computed column cannot return a row"
+
+ER_UNKNOWN_OPTION
+  eng "Unknown option '%-.64s'"
+ER_BAD_OPTION_VALUE
+  eng "Incorrect value '%-.64s' for option '%-.64s'"
+
diff --git a/sql/slave.cc b/sql/slave.cc
index dff6b6946d4..ec693964726 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -1146,7 +1146,6 @@ err:
   DBUG_RETURN(ret);
 }
 
-
 /*
   Check if the error is caused by network.
   @param[in]   errorno   Number of the error.
@@ -1485,18 +1484,27 @@ be equal for the Statement-format replication to work";
     }
     else if (check_io_slave_killed(mi->io_thd, mi, NULL))
       goto slave_killed_err;
-    else if (is_network_error(mysql_errno(mysql)))
+    else if (is_network_error(err_code= mysql_errno(mysql)))
     {
-      mi->report(WARNING_LEVEL, mysql_errno(mysql),
-                 "Get master TIME_ZONE failed with error: %s", mysql_error(mysql));
+      mi->report(ERROR_LEVEL, err_code,
+                 "Get master TIME_ZONE failed with error: %s",
+                 mysql_error(mysql));
       goto network_err;
-    } 
+    }
+    else if (err_code == ER_UNKNOWN_SYSTEM_VARIABLE)
+    {
+      /* We use ERROR_LEVEL to get the error logged to file */
+      mi->report(ERROR_LEVEL, err_code,
+
+                 "MySQL master doesn't have a TIME_ZONE variable. Note that"
+                 "if your timezone is not same between master and slave, your "
+                 "slave may get wrong data into timestamp columns");
+    }
     else
     {
       /* Fatal error */
       errmsg= "The slave I/O thread stops because a fatal error is encountered \
 when it try to get the value of TIME_ZONE global variable from master.";
-      err_code= mysql_errno(mysql);
       sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
       goto err;
     }
@@ -2019,6 +2027,7 @@ static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
     + MAX_LOG_EVENT_HEADER;  /* note, incr over the global not session var */
   thd->slave_thread = 1;
   thd->enable_slow_log= opt_log_slow_slave_statements;
+  thd->variables.log_slow_filter= global_system_variables.log_slow_filter;
   set_slave_thread_options(thd);
   thd->client_capabilities = CLIENT_LOCAL_FILES;
   mysql_mutex_lock(&LOCK_thread_count);
@@ -2348,7 +2357,7 @@ int apply_event_and_update_pos(Log_event* ev, THD* thd, Relay_log_info* rli)
   if (exec_res == 0)
   {
     int error= ev->update_pos(rli);
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
     if (!rli->is_fake)
 #endif
     {
@@ -2504,6 +2513,7 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli)
     if (slave_trans_retries)
     {
       int temp_err;
+      LINT_INIT(temp_err);
       if (exec_res && (temp_err= has_temporary_error(thd)))
       {
         const char *errmsg;
@@ -3115,7 +3125,6 @@ pthread_handler_t handle_slave_sql(void *arg)
   my_off_t UNINIT_VAR(saved_log_pos);
   my_off_t UNINIT_VAR(saved_master_log_pos);
   my_off_t saved_skip= 0;
-
   Relay_log_info* rli = &((Master_info*)arg)->rli;
   const char *errmsg;
 
@@ -3123,6 +3132,8 @@ pthread_handler_t handle_slave_sql(void *arg)
   my_thread_init();
   DBUG_ENTER("handle_slave_sql");
 
+  LINT_INIT(saved_master_log_pos);
+  LINT_INIT(saved_log_pos);
   DBUG_ASSERT(rli->inited);
   mysql_mutex_lock(&rli->run_lock);
   DBUG_ASSERT(!rli->slave_running);
@@ -4199,10 +4210,11 @@ static int connect_to_master(THD* thd, MYSQL* mysql, Master_info* mi,
       suppress_warnings= 0;
       mi->report(ERROR_LEVEL, last_errno,
                  "error %s to master '%s@%s:%d'"
-                 " - retry-time: %d  retries: %lu",
+                 " - retry-time: %d  retries: %lu  message: %s",
                  (reconnect ? "reconnecting" : "connecting"),
                  mi->user, mi->host, mi->port,
-                 mi->connect_retry, master_retry_count);
+                 mi->connect_retry, master_retry_count,
+                 mysql_error(mysql));
     }
     /*
       By default we try forever. The reason is that failure will trigger
@@ -4374,11 +4386,11 @@ bool flush_relay_log_info(Relay_log_info* rli)
   my_b_seek(file, 0L);
   pos=strmov(buff, rli->group_relay_log_name);
   *pos++='\n';
-  pos=longlong2str(rli->group_relay_log_pos, pos, 10);
+  pos= longlong10_to_str(rli->group_relay_log_pos, pos, 10);
   *pos++='\n';
   pos=strmov(pos, rli->group_master_log_name);
   *pos++='\n';
-  pos=longlong2str(rli->group_master_log_pos, pos, 10);
+  pos=longlong10_to_str(rli->group_master_log_pos, pos, 10);
   *pos='\n';
   if (my_b_write(file, (uchar*) buff, (size_t) (pos-buff)+1))
     error=1;
diff --git a/sql/slave.h b/sql/slave.h
index 4073c90b612..02095fad6b1 100644
--- a/sql/slave.h
+++ b/sql/slave.h
@@ -111,7 +111,8 @@ extern MY_BITMAP slave_error_mask;
 extern char slave_skip_error_names[];
 extern bool use_slave_mask;
 extern char *slave_load_tmpdir;
-extern char *master_info_file, *relay_log_info_file;
+extern char *master_info_file;
+extern MYSQL_PLUGIN_IMPORT char *relay_log_info_file;
 extern char *opt_relay_logname, *opt_relaylog_index_name;
 extern my_bool opt_skip_slave_start, opt_reckless_slave;
 extern my_bool opt_log_slave_updates;
@@ -222,7 +223,7 @@ extern int disconnect_slave_event_count, abort_slave_event_count ;
 /* the master variables are defaults read from my.cnf or command line */
 extern uint master_port, master_connect_retry, report_port;
 extern char * master_user, *master_password, *master_host;
-extern char *master_info_file, *relay_log_info_file, *report_user;
+extern char *master_info_file, *report_user;
 extern char *report_host, *report_password;
 
 extern my_bool master_ssl;
diff --git a/sql/sp.cc b/sql/sp.cc
index 8821dc9365d..e6cf43c73a8 100644
--- a/sql/sp.cc
+++ b/sql/sp.cc
@@ -128,7 +128,8 @@ TABLE_FIELD_TYPE proc_table_fields[MYSQL_PROC_FIELD_COUNT] =
   {
     { C_STRING_WITH_LEN("sql_mode") },
     { C_STRING_WITH_LEN("set('REAL_AS_FLOAT','PIPES_AS_CONCAT','ANSI_QUOTES',"
-    "'IGNORE_SPACE','NOT_USED','ONLY_FULL_GROUP_BY','NO_UNSIGNED_SUBTRACTION',"
+    "'IGNORE_SPACE','IGNORE_BAD_TABLE_OPTIONS','ONLY_FULL_GROUP_BY',"
+    "'NO_UNSIGNED_SUBTRACTION',"
     "'NO_DIR_IN_CREATE','POSTGRESQL','ORACLE','MSSQL','DB2','MAXDB',"
     "'NO_KEY_OPTIONS','NO_TABLE_OPTIONS','NO_FIELD_OPTIONS','MYSQL323','MYSQL40',"
     "'ANSI','NO_AUTO_VALUE_ON_ZERO','NO_BACKSLASH_ESCAPES','STRICT_TRANS_TABLES',"
@@ -496,8 +497,9 @@ db_find_routine_aux(THD *thd, int type, sp_name *name, TABLE *table)
   key_copy(key, table->record[0], table->key_info,
            table->key_info->key_length);
 
-  if (table->file->index_read_idx_map(table->record[0], 0, key, HA_WHOLE_KEY,
-                                      HA_READ_KEY_EXACT))
+  if (table->file->ha_index_read_idx_map(table->record[0], 0, key,
+                                         HA_WHOLE_KEY,
+                                         HA_READ_KEY_EXACT))
     DBUG_RETURN(SP_KEY_NOT_FOUND);
 
   DBUG_RETURN(SP_OK);
@@ -765,7 +767,7 @@ db_load_routine(THD *thd, int type, sp_name *name, sp_head **sphp,
 {
   LEX *old_lex= thd->lex, newlex;
   String defstr;
-  char saved_cur_db_name_buf[NAME_LEN+1];
+  char saved_cur_db_name_buf[SAFE_NAME_LEN+1];
   LEX_STRING saved_cur_db_name=
     { saved_cur_db_name_buf, sizeof(saved_cur_db_name_buf) };
   bool cur_db_changed;
@@ -1368,6 +1370,7 @@ sp_drop_db_routines(THD *thd, char *db)
   int ret;
   uint key_len;
   MDL_ticket *mdl_savepoint= thd->mdl_context.mdl_savepoint();
+  uchar keybuf[MAX_KEY_LENGTH];
   DBUG_ENTER("sp_drop_db_routines");
   DBUG_PRINT("enter", ("db: %s", db));
 
@@ -1377,12 +1380,14 @@ sp_drop_db_routines(THD *thd, char *db)
 
   table->field[MYSQL_PROC_FIELD_DB]->store(db, strlen(db), system_charset_info);
   key_len= table->key_info->key_part[0].store_length;
+  table->field[MYSQL_PROC_FIELD_DB]->get_key_image(keybuf, key_len, Field::itRAW);
+
+
 
   ret= SP_OK;
   table->file->ha_index_init(0, 1);
-  if (! table->file->index_read_map(table->record[0],
-                                    (uchar *)table->field[MYSQL_PROC_FIELD_DB]->ptr,
-                                    (key_part_map)1, HA_READ_KEY_EXACT))
+  if (!table->file->ha_index_read_map(table->record[0], keybuf, (key_part_map)1,
+                                      HA_READ_KEY_EXACT))
   {
     int nxtres;
     bool deleted= FALSE;
@@ -1397,9 +1402,8 @@ sp_drop_db_routines(THD *thd, char *db)
 	nxtres= 0;
 	break;
       }
-    } while (! (nxtres= table->file->index_next_same(table->record[0],
-                                (uchar *)table->field[MYSQL_PROC_FIELD_DB]->ptr,
-						     key_len)));
+    } while (!(nxtres= table->file->ha_index_next_same(table->record[0],
+                                                       keybuf, key_len)));
     if (nxtres != HA_ERR_END_OF_FILE)
       ret= SP_KEY_NOT_FOUND;
     if (deleted)
diff --git a/sql/sp_cache.cc b/sql/sp_cache.cc
index f062276a9de..662570d8d8b 100644
--- a/sql/sp_cache.cc
+++ b/sql/sp_cache.cc
@@ -121,6 +121,12 @@ void sp_cache_clear(sp_cache **cp)
 }
 
 
+void sp_cache_end()
+{
+  pthread_mutex_destroy(&Cversion_lock);
+}
+
+
 /*
   Insert a routine into the cache.
 
diff --git a/sql/sp_cache.h b/sql/sp_cache.h
index c75e29474bc..2d379300354 100644
--- a/sql/sp_cache.h
+++ b/sql/sp_cache.h
@@ -56,6 +56,7 @@ class sp_name;
 */
 
 void sp_cache_init();
+void sp_cache_end();
 void sp_cache_clear(sp_cache **cp);
 void sp_cache_insert(sp_cache **cp, sp_head *sp);
 sp_head *sp_cache_lookup(sp_cache **cp, sp_name *name);
diff --git a/sql/sp_head.cc b/sql/sp_head.cc
index a95bbe094c0..750ddee49c3 100644
--- a/sql/sp_head.cc
+++ b/sql/sp_head.cc
@@ -841,6 +841,8 @@ sp_head::create_result_field(uint field_max_length, const char *field_name,
                       m_return_field_def.interval,
                       field_name ? field_name : (const char *) m_name.str);
 
+  field->vcol_info= m_return_field_def.vcol_info;
+  field->stored_in_db= m_return_field_def.stored_in_db;
   if (field)
     field->init(table);
 
@@ -1190,7 +1192,7 @@ bool
 sp_head::execute(THD *thd)
 {
   DBUG_ENTER("sp_head::execute");
-  char saved_cur_db_name_buf[NAME_LEN+1];
+  char saved_cur_db_name_buf[SAFE_NAME_LEN+1];
   LEX_STRING saved_cur_db_name=
     { saved_cur_db_name_buf, sizeof(saved_cur_db_name_buf) };
   bool cur_db_changed= FALSE;
@@ -1990,7 +1992,7 @@ sp_head::execute_procedure(THD *thd, List<Item> *args)
   ulonglong utime_before_sp_exec= thd->utime_after_lock;
   sp_rcontext *save_spcont, *octx;
   sp_rcontext *nctx = NULL;
-  bool save_enable_slow_log= false;
+  bool save_enable_slow_log;
   bool save_log_general= false;
   DBUG_ENTER("sp_head::execute_procedure");
   DBUG_PRINT("info", ("procedure %s", m_name.str));
@@ -2068,9 +2070,10 @@ sp_head::execute_procedure(THD *thd, List<Item> *args)
       if (spvar->mode == sp_param_out)
       {
         Item_null *null_item= new Item_null();
+        Item *tmp_item= null_item;
 
         if (!null_item ||
-            nctx->set_variable(thd, i, (Item **)&null_item))
+            nctx->set_variable(thd, i, &tmp_item))
         {
           err_status= TRUE;
           break;
@@ -2112,10 +2115,10 @@ sp_head::execute_procedure(THD *thd, List<Item> *args)
     DBUG_PRINT("info",(" %.*s: eval args done", (int) m_name.length, 
                        m_name.str));
   }
-  if (!(m_flags & LOG_SLOW_STATEMENTS) && thd->enable_slow_log)
+  save_enable_slow_log= thd->enable_slow_log;
+  if (!(m_flags & LOG_SLOW_STATEMENTS) && save_enable_slow_log)
   {
     DBUG_PRINT("info", ("Disabling slow log for the execution"));
-    save_enable_slow_log= true;
     thd->enable_slow_log= FALSE;
   }
   if (!(m_flags & LOG_GENERAL_LOG) && !(thd->variables.option_bits & OPTION_LOG_OFF))
@@ -2138,8 +2141,7 @@ sp_head::execute_procedure(THD *thd, List<Item> *args)
 
   if (save_log_general)
     thd->variables.option_bits &= ~OPTION_LOG_OFF;
-  if (save_enable_slow_log)
-    thd->enable_slow_log= true;
+  thd->enable_slow_log= save_enable_slow_log;
   /*
     In the case when we weren't able to employ reuse mechanism for
     OUT/INOUT paranmeters, we should reallocate memory. This
@@ -2260,6 +2262,8 @@ sp_head::reset_lex(THD *thd)
   sublex->dec= NULL;
   sublex->interval_list.empty();
   sublex->type= 0;
+  sublex->uint_geom_type= 0;
+  sublex->vcol_info= 0;
 
   /* Reset part of parser state which needs this. */
   thd->m_parser_state->m_yacc.reset_before_substatement();
@@ -2387,7 +2391,8 @@ sp_head::fill_field_definition(THD *thd, LEX *lex,
                       &lex->interval_list,
                       lex->charset ? lex->charset :
                                      thd->variables.collation_database,
-                      lex->uint_geom_type))
+                      lex->uint_geom_type,
+		      lex->vcol_info, NULL))
     return TRUE;
 
   if (field_def->interval_list.elements)
@@ -3991,7 +3996,7 @@ sp_head::merge_table_list(THD *thd, TABLE_LIST *table, LEX *lex_for_tmp_check)
   for (; table ; table= table->next_global)
     if (!table->derived && !table->schema_table)
     {
-      char tname[(NAME_LEN + 1) * 3];           // db\0table\0alias\0
+      char tname[(SAFE_NAME_LEN + 1) * 3];           // db\0table\0alias\0
       uint tlen, alen;
 
       tlen= table->db_length;
diff --git a/sql/sp_rcontext.h b/sql/sp_rcontext.h
index 1af758ed0af..e11361c3d6a 100644
--- a/sql/sp_rcontext.h
+++ b/sql/sp_rcontext.h
@@ -301,7 +301,7 @@ public:
   int
   close(THD *thd);
 
-  inline my_bool
+  inline bool
   is_open()
   {
     return test(server_side_cursor);
diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc
index 19373507955..4dda2d7263a 100644
--- a/sql/sql_acl.cc
+++ b/sql/sql_acl.cc
@@ -44,6 +44,8 @@
 #include "transaction.h"
 #include "lock.h"                               // MYSQL_LOCK_IGNORE_TIMEOUT
 #include "records.h"             // init_read_record, end_read_record
+#include <sql_common.h>
+#include <mysql/plugin_auth.h>
 
 bool mysql_user_table_is_in_short_password_format= false;
 
@@ -164,6 +166,87 @@ TABLE_FIELD_TYPE mysql_db_table_fields[MYSQL_DB_FIELD_COUNT] = {
 const TABLE_FIELD_DEF
   mysql_db_table_def= {MYSQL_DB_FIELD_COUNT, mysql_db_table_fields};
 
+static LEX_STRING native_password_plugin_name= {
+  C_STRING_WITH_LEN("mysql_native_password")
+};
+  
+static LEX_STRING old_password_plugin_name= {
+  C_STRING_WITH_LEN("mysql_old_password")
+};
+  
+/// @todo make it configurable
+LEX_STRING *default_auth_plugin_name= &native_password_plugin_name;
+
+static plugin_ref native_password_plugin;
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+static plugin_ref old_password_plugin;
+#endif
+
+/* Classes */
+
+struct acl_host_and_ip
+{
+  char *hostname;
+  long ip,ip_mask;                      // Used with masked ip:s
+};
+
+class ACL_ACCESS {
+public:
+  ulong sort;
+  ulong access;
+};
+
+/* ACL_HOST is used if no host is specified */
+
+class ACL_HOST :public ACL_ACCESS
+{
+public:
+  acl_host_and_ip host;
+  char *db;
+};
+
+class ACL_USER :public ACL_ACCESS
+{
+public:
+  acl_host_and_ip host;
+  uint hostname_length;
+  USER_RESOURCES user_resource;
+  char *user;
+  uint8 salt[SCRAMBLE_LENGTH+1];       // scrambled password in binary form
+  uint8 salt_len;        // 0 - no password, 4 - 3.20, 8 - 4.0,  20 - 4.1.1 
+  enum SSL_type ssl_type;
+  const char *ssl_cipher, *x509_issuer, *x509_subject;
+  LEX_STRING plugin;
+  LEX_STRING auth_string;
+
+  ACL_USER *copy(MEM_ROOT *root)
+  {
+    ACL_USER *dst= (ACL_USER *)alloc_root(root, sizeof(ACL_USER));
+    if (!dst)
+      return 0;
+    *dst= *this;
+    dst->user= safe_strdup_root(root, user);
+    dst->ssl_cipher= safe_strdup_root(root, ssl_cipher);
+    dst->x509_issuer= safe_strdup_root(root, x509_issuer);
+    dst->x509_subject= safe_strdup_root(root, x509_subject);
+    if (plugin.str == native_password_plugin_name.str ||
+        plugin.str == old_password_plugin_name.str)
+      dst->plugin= plugin;
+    else
+      dst->plugin.str= strmake_root(root, plugin.str, plugin.length);
+    dst->auth_string.str = safe_strdup_root(root, auth_string.str);
+    dst->host.hostname= safe_strdup_root(root, host.hostname);
+    return dst;
+  }
+};
+
+class ACL_DB :public ACL_ACCESS
+{
+public:
+  acl_host_and_ip host;
+  char *user,*db;
+};
+
 #ifndef NO_EMBEDDED_ACCESS_CHECKS
 
 #define FIRST_NON_YN_FIELD 26
@@ -187,6 +270,24 @@ static uchar* acl_entry_get_key(acl_entry *entry, size_t *length,
 #define IP_ADDR_STRLEN (3+1+3+1+3+1+3)
 #define ACL_KEY_LENGTH (IP_ADDR_STRLEN+1+NAME_LEN+1+USERNAME_LENGTH+1)
 
+#if defined(HAVE_OPENSSL)
+/*
+  Without SSL the handshake consists of one packet. This packet
+  has both client capabilites and scrambled password.
+  With SSL the handshake might consist of two packets. If the first
+  packet (client capabilities) has CLIENT_SSL flag set, we have to
+  switch to SSL and read the second packet. The scrambled password
+  is in the second packet and client_capabilites field will be ignored.
+  Maybe it is better to accept flags other than CLIENT_SSL from the
+  second packet?
+*/
+#define SSL_HANDSHAKE_SIZE      2
+#define NORMAL_HANDSHAKE_SIZE   6
+#define MIN_HANDSHAKE_SIZE      2
+#else
+#define MIN_HANDSHAKE_SIZE      6
+#endif /* HAVE_OPENSSL && !EMBEDDED_LIBRARY */
+
 static DYNAMIC_ARRAY acl_hosts,acl_users,acl_dbs;
 static MEM_ROOT mem, memex;
 static bool initialized=0;
@@ -202,9 +303,8 @@ static void init_check_host(void);
 static void rebuild_check_host(void);
 static ACL_USER *find_acl_user(const char *host, const char *user,
                                my_bool exact);
-static bool update_user_table(THD *thd, TABLE *table,
-                              const char *host, const char *user,
-			      const char *new_password, uint new_password_len);
+static bool update_user_table(THD *, TABLE *, const char *, const char *,
+                              const char *, uint);
 static void update_hostname(acl_host_and_ip *host, const char *hostname);
 static bool compare_hostname(const acl_host_and_ip *host,const char *hostname,
 			     const char *ip);
@@ -235,6 +335,48 @@ set_user_salt(ACL_USER *acl_user, const char *password, uint password_len)
     acl_user->salt_len= 0;
 }
 
+static char *fix_plugin_ptr(char *name)
+{
+  if (my_strcasecmp(system_charset_info, name,
+                    native_password_plugin_name.str) == 0)
+    return native_password_plugin_name.str;
+  else
+  if (my_strcasecmp(system_charset_info, name,
+                    old_password_plugin_name.str) == 0)
+    return old_password_plugin_name.str;
+  else
+    return name;
+}
+
+/**
+  Fix ACL::plugin pointer to point to a hard-coded string, if appropriate
+
+  Make sure that if ACL_USER's plugin is a built-in, then it points
+  to a hard coded string, not to an allocated copy. Run-time, for
+  authentication, we want to be able to detect built-ins by comparing
+  pointers, not strings.
+
+  Additionally - update the salt if the plugin is built-in.
+
+  @retval 0 the pointers were fixed
+  @retval 1 this ACL_USER uses a not built-in plugin
+*/
+static bool fix_user_plugin_ptr(ACL_USER *user)
+{
+  if (my_strcasecmp(system_charset_info, user->plugin.str,
+                    native_password_plugin_name.str) == 0)
+    user->plugin= native_password_plugin_name;
+  else
+  if (my_strcasecmp(system_charset_info, user->plugin.str,
+                    old_password_plugin_name.str) == 0)
+    user->plugin= old_password_plugin_name;
+  else
+    return true;
+  
+  set_user_salt(user, user->auth_string.str, user->auth_string.length);
+  return false;
+}
+
 /*
   Initialize structures responsible for user/db-level privilege checking and
   load privilege information for them from tables in the 'mysql' database.
@@ -263,6 +405,19 @@ my_bool acl_init(bool dont_read_acl_tables)
                            (my_hash_get_key) acl_entry_get_key,
                            (my_hash_free_key) free,
                            &my_charset_utf8_bin);
+
+  /*
+    cache built-in native authentication plugins,
+    to avoid hash searches and a global mutex lock on every connect
+  */
+  native_password_plugin= my_plugin_lock_by_name(0,
+           &native_password_plugin_name, MYSQL_AUTHENTICATION_PLUGIN);
+  old_password_plugin= my_plugin_lock_by_name(0,
+           &old_password_plugin_name, MYSQL_AUTHENTICATION_PLUGIN);
+
+  if (!native_password_plugin || !old_password_plugin)
+    DBUG_RETURN(1);
+
   if (dont_read_acl_tables)
   {
     DBUG_RETURN(0); /* purecov: tested */
@@ -309,7 +464,7 @@ static my_bool acl_load(THD *thd, TABLE_LIST *tables)
   READ_RECORD read_record_info;
   my_bool return_val= TRUE;
   bool check_no_resolve= specialflag & SPECIAL_NO_RESOLVE;
-  char tmp_name[NAME_LEN+1];
+  char tmp_name[SAFE_NAME_LEN+1];
   int password_length;
   ulong old_sql_mode= thd->variables.sql_mode;
   DBUG_ENTER("acl_load");
@@ -321,8 +476,10 @@ static my_bool acl_load(THD *thd, TABLE_LIST *tables)
   acl_cache->clear(1);				// Clear locked hostname cache
 
   init_sql_alloc(&mem, ACL_ALLOC_BLOCK_SIZE, 0);
-  init_read_record(&read_record_info,thd,table= tables[0].table,NULL,1,0, 
-                   FALSE);
+  if (init_read_record(&read_record_info,thd,table= tables[0].table,NULL,1,0, 
+                       FALSE))
+    goto end;
+
   table->use_all_columns();
   (void) my_init_dynamic_array(&acl_hosts,sizeof(ACL_HOST),20,50);
   while (!(read_record_info.read_record(&read_record_info)))
@@ -371,7 +528,10 @@ static my_bool acl_load(THD *thd, TABLE_LIST *tables)
   end_read_record(&read_record_info);
   freeze_size(&acl_hosts);
 
-  init_read_record(&read_record_info,thd,table=tables[1].table,NULL,1,0,FALSE);
+  if (init_read_record(&read_record_info,thd,table=tables[1].table,NULL,1,0,
+                       FALSE))
+    goto end;
+
   table->use_all_columns();
   (void) my_init_dynamic_array(&acl_users,sizeof(ACL_USER),50,100);
   password_length= table->field[2]->field_length /
@@ -419,6 +579,10 @@ static my_bool acl_load(THD *thd, TABLE_LIST *tables)
   while (!(read_record_info.read_record(&read_record_info)))
   {
     ACL_USER user;
+    char *password;
+    uint password_len;
+
+    bzero(&user, sizeof(user));
     update_hostname(&user.host, get_field(&mem, table->field[0]));
     user.user= get_field(&mem, table->field[1]);
     if (check_no_resolve && hostname_requires_resolving(user.host.hostname))
@@ -430,27 +594,34 @@ static my_bool acl_load(THD *thd, TABLE_LIST *tables)
       continue;
     }
 
-    const char *password= get_field(thd->mem_root, table->field[2]);
-    uint password_len= password ? strlen(password) : 0;
+    password= get_field(&mem, table->field[2]);
+    password_len= password ? strlen(password) : 0;
+    user.auth_string.str= password ? password : const_cast<char*>("");
+    user.auth_string.length= password_len;
     set_user_salt(&user, password, password_len);
-    if (user.salt_len == 0 && password_len != 0)
-    {
-      switch (password_len) {
-      case 45: /* 4.1: to be removed */
-        sql_print_warning("Found 4.1 style password for user '%s@%s'. "
-                          "Ignoring user. "
-                          "You should change password for this user.",
-                          user.user ? user.user : "",
-                          user.host.hostname ? user.host.hostname : "");
-        break;
-      default:
-        sql_print_warning("Found invalid password for user: '%s@%s'; "
-                          "Ignoring user", user.user ? user.user : "",
-                           user.host.hostname ? user.host.hostname : "");
-        break;
-      }
+
+    switch (password_len) {
+    case 0: /* no password */
+    case SCRAMBLED_PASSWORD_CHAR_LENGTH:
+      user.plugin= native_password_plugin_name;
+      break;
+    case SCRAMBLED_PASSWORD_CHAR_LENGTH_323:
+      user.plugin= old_password_plugin_name;
+      break;
+    case 45: /* 4.1: to be removed */
+      sql_print_warning("Found 4.1.0 style password for user '%s@%s'. "
+                        "Ignoring user. "
+                        "You should change password for this user.",
+                        user.user ? user.user : "",
+                        user.host.hostname ? user.host.hostname : "");
+      continue;
+    default:
+      sql_print_warning("Found invalid password for user: '%s@%s'; "
+                        "Ignoring user", user.user ? user.user : "",
+                         user.host.hostname ? user.host.hostname : "");
+      continue;
     }
-    else                                        // password is correct
+    
     {
       uint next_field;
       user.access= get_access(table,3,&next_field) & GLOBAL_ACLS;
@@ -527,13 +698,35 @@ static my_bool acl_load(THD *thd, TABLE_LIST *tables)
           ptr= get_field(thd->mem_root, table->field[next_field++]);
           user.user_resource.user_conn= ptr ? atoi(ptr) : 0;
         }
-        else
-          user.user_resource.user_conn= 0;
+
+        if (table->s->fields >= 41)
+        {
+          /* We may have plugin & auth_String fields */
+          char *tmpstr= get_field(&mem, table->field[next_field++]);
+          if (tmpstr)
+          {
+            user.plugin.str= tmpstr;
+            user.plugin.length= strlen(user.plugin.str);
+            if (user.auth_string.length)
+            {
+              sql_print_warning("'user' entry '%s@%s' has both a password "
+                                "and an authentication plugin specified. The "
+                                "password will be ignored.",
+                                user.user ? user.user : "",
+                                user.host.hostname ? user.host.hostname : "");
+            }
+            user.auth_string.str= get_field(&mem, table->field[next_field++]);
+            if (!user.auth_string.str)
+              user.auth_string.str= const_cast<char*>("");
+            user.auth_string.length= strlen(user.auth_string.str);
+
+            fix_user_plugin_ptr(&user);
+          }
+        }
       }
       else
       {
         user.ssl_type=SSL_TYPE_NONE;
-        bzero((char *)&(user.user_resource),sizeof(user.user_resource));
 #ifndef TO_BE_REMOVED
         if (table->s->fields <= 13)
         {						// Without grant
@@ -559,7 +752,10 @@ static my_bool acl_load(THD *thd, TABLE_LIST *tables)
   end_read_record(&read_record_info);
   freeze_size(&acl_users);
 
-  init_read_record(&read_record_info,thd,table=tables[2].table,NULL,1,0,FALSE);
+  if (init_read_record(&read_record_info,thd,table=tables[2].table,NULL,1,0,
+                       FALSE))
+    goto end;
+
   table->use_all_columns();
   (void) my_init_dynamic_array(&acl_dbs,sizeof(ACL_DB),50,100);
   while (!(read_record_info.read_record(&read_record_info)))
@@ -636,6 +832,8 @@ void acl_free(bool end)
   delete_dynamic(&acl_dbs);
   delete_dynamic(&acl_wild_hosts);
   my_hash_free(&acl_check_hosts);
+  plugin_unlock(0, native_password_plugin);
+  plugin_unlock(0, old_password_plugin);
   if (!end)
     acl_cache->clear(1); /* purecov: inspected */
   else
@@ -830,246 +1028,10 @@ static int acl_compare(ACL_ACCESS *a,ACL_ACCESS *b)
 
 
 /*
-  Seek ACL entry for a user, check password, SSL cypher, and if
-  everything is OK, update THD user data and USER_RESOURCES struct.
-
-  IMPLEMENTATION
-   This function does not check if the user has any sensible privileges:
-   only user's existence and  validity is checked.
-   Note, that entire operation is protected by acl_cache_lock.
+  Gets user credentials without authentication and resource limit checks.
 
   SYNOPSIS
     acl_getroot()
-    thd         thread handle. If all checks are OK,
-                thd->security_ctx->priv_user/master_access are updated.
-                thd->security_ctx->host/ip/user are used for checks.
-    mqh         user resources; on success mqh is reset, else
-                unchanged
-    passwd      scrambled & crypted password, received from client
-                (to check): thd->scramble or thd->scramble_323 is
-                used to decrypt passwd, so they must contain
-                original random string,
-    passwd_len  length of passwd, must be one of 0, 8,
-                SCRAMBLE_LENGTH_323, SCRAMBLE_LENGTH
-    'thd' and 'mqh' are updated on success; other params are IN.
-  
-  RETURN VALUE
-    0  success: thd->priv_user, thd->priv_host, thd->master_access, mqh are
-       updated
-    1  user not found or authentication failure
-    2  user found, has long (4.1.1) salt, but passwd is in old (3.23) format.
-   -1  user found, has short (3.23) salt, but passwd is in new (4.1.1) format.
-*/
-
-int acl_getroot(THD *thd, USER_RESOURCES  *mqh,
-                const char *passwd, uint passwd_len)
-{
-  ulong user_access= NO_ACCESS;
-  int res= 1;
-  ACL_USER *acl_user= 0;
-  Security_context *sctx= thd->security_ctx;
-  DBUG_ENTER("acl_getroot");
-
-  if (!initialized)
-  {
-    /* 
-      here if mysqld's been started with --skip-grant-tables option.
-    */
-    sctx->skip_grants();
-    bzero((char*) mqh, sizeof(*mqh));
-    DBUG_RETURN(0);
-  }
-
-  mysql_mutex_lock(&acl_cache->lock);
-
-  /*
-    Find acl entry in user database. Note, that find_acl_user is not the same,
-    because it doesn't take into account the case when user is not empty,
-    but acl_user->user is empty
-  */
-
-  for (uint i=0 ; i < acl_users.elements ; i++)
-  {
-    ACL_USER *acl_user_tmp= dynamic_element(&acl_users,i,ACL_USER*);
-    if (!acl_user_tmp->user || !strcmp(sctx->user, acl_user_tmp->user))
-    {
-      if (compare_hostname(&acl_user_tmp->host, sctx->host, sctx->ip))
-      {
-        /* check password: it should be empty or valid */
-        if (passwd_len == acl_user_tmp->salt_len)
-        {
-          if (acl_user_tmp->salt_len == 0 ||
-              (acl_user_tmp->salt_len == SCRAMBLE_LENGTH ?
-              check_scramble(passwd, thd->scramble, acl_user_tmp->salt) :
-              check_scramble_323(passwd, thd->scramble,
-                                 (ulong *) acl_user_tmp->salt)) == 0)
-          {
-            acl_user= acl_user_tmp;
-            res= 0;
-          }
-        }
-        else if (passwd_len == SCRAMBLE_LENGTH &&
-                 acl_user_tmp->salt_len == SCRAMBLE_LENGTH_323)
-          res= -1;
-        else if (passwd_len == SCRAMBLE_LENGTH_323 &&
-                 acl_user_tmp->salt_len == SCRAMBLE_LENGTH)
-          res= 2;
-        /* linear search complete: */
-        break;
-      }
-    }
-  }
-  /*
-    This was moved to separate tree because of heavy HAVE_OPENSSL case.
-    If acl_user is not null, res is 0.
-  */
-
-  if (acl_user)
-  {
-    /* OK. User found and password checked continue validation */
-#if defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY)
-    Vio *vio=thd->net.vio;
-    SSL *ssl= (SSL*) vio->ssl_arg;
-    X509 *cert;
-#endif
-
-    /*
-      At this point we know that user is allowed to connect
-      from given host by given username/password pair. Now
-      we check if SSL is required, if user is using SSL and
-      if X509 certificate attributes are OK
-    */
-    switch (acl_user->ssl_type) {
-    case SSL_TYPE_NOT_SPECIFIED:		// Impossible
-    case SSL_TYPE_NONE:				// SSL is not required
-      user_access= acl_user->access;
-      break;
-#if defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY)
-    case SSL_TYPE_ANY:				// Any kind of SSL is ok
-      if (vio_type(vio) == VIO_TYPE_SSL)
-	user_access= acl_user->access;
-      break;
-    case SSL_TYPE_X509: /* Client should have any valid certificate. */
-      /*
-	Connections with non-valid certificates are dropped already
-	in sslaccept() anyway, so we do not check validity here.
-
-	We need to check for absence of SSL because without SSL
-	we should reject connection.
-      */
-      if (vio_type(vio) == VIO_TYPE_SSL &&
-	  SSL_get_verify_result(ssl) == X509_V_OK &&
-	  (cert= SSL_get_peer_certificate(ssl)))
-      {
-	user_access= acl_user->access;
-        X509_free(cert);
-      }
-      break;
-    case SSL_TYPE_SPECIFIED: /* Client should have specified attrib */
-      /*
-	We do not check for absence of SSL because without SSL it does
-	not pass all checks here anyway.
-	If cipher name is specified, we compare it to actual cipher in
-	use.
-      */
-      if (vio_type(vio) != VIO_TYPE_SSL ||
-	  SSL_get_verify_result(ssl) != X509_V_OK)
-	break;
-      if (acl_user->ssl_cipher)
-      {
-	DBUG_PRINT("info",("comparing ciphers: '%s' and '%s'",
-			   acl_user->ssl_cipher,SSL_get_cipher(ssl)));
-	if (!strcmp(acl_user->ssl_cipher,SSL_get_cipher(ssl)))
-	  user_access= acl_user->access;
-	else
-	{
-	  if (global_system_variables.log_warnings)
-	    sql_print_information("X509 ciphers mismatch: should be '%s' but is '%s'",
-			      acl_user->ssl_cipher,
-			      SSL_get_cipher(ssl));
-	  break;
-	}
-      }
-      /* Prepare certificate (if exists) */
-      DBUG_PRINT("info",("checkpoint 1"));
-      if (!(cert= SSL_get_peer_certificate(ssl)))
-      {
-	user_access=NO_ACCESS;
-	break;
-      }
-      DBUG_PRINT("info",("checkpoint 2"));
-      /* If X509 issuer is specified, we check it... */
-      if (acl_user->x509_issuer)
-      {
-        DBUG_PRINT("info",("checkpoint 3"));
-        char *ptr = X509_NAME_oneline(X509_get_issuer_name(cert), 0, 0);
-        DBUG_PRINT("info",("comparing issuers: '%s' and '%s'",
-			   acl_user->x509_issuer, ptr));
-        if (strcmp(acl_user->x509_issuer, ptr))
-        {
-          if (global_system_variables.log_warnings)
-            sql_print_information("X509 issuer mismatch: should be '%s' "
-			      "but is '%s'", acl_user->x509_issuer, ptr);
-          free(ptr);
-          X509_free(cert);
-          user_access=NO_ACCESS;
-          break;
-        }
-        user_access= acl_user->access;
-        free(ptr);
-      }
-      DBUG_PRINT("info",("checkpoint 4"));
-      /* X509 subject is specified, we check it .. */
-      if (acl_user->x509_subject)
-      {
-        char *ptr= X509_NAME_oneline(X509_get_subject_name(cert), 0, 0);
-        DBUG_PRINT("info",("comparing subjects: '%s' and '%s'",
-                           acl_user->x509_subject, ptr));
-        if (strcmp(acl_user->x509_subject,ptr))
-        {
-          if (global_system_variables.log_warnings)
-            sql_print_information("X509 subject mismatch: should be '%s' but is '%s'",
-                            acl_user->x509_subject, ptr);
-          free(ptr);
-          X509_free(cert);
-          user_access=NO_ACCESS;
-          break;
-        }
-        user_access= acl_user->access;
-        free(ptr);
-      }
-      /* Deallocate the X509 certificate. */
-      X509_free(cert);
-      break;
-#else  /* HAVE_OPENSSL */
-    default:
-      /*
-        If we don't have SSL but SSL is required for this user the 
-        authentication should fail.
-      */
-      break;
-#endif /* HAVE_OPENSSL */
-    }
-    sctx->master_access= user_access;
-    sctx->priv_user= acl_user->user ? sctx->user : (char *) "";
-    *mqh= acl_user->user_resource;
-
-    if (acl_user->host.hostname)
-      strmake(sctx->priv_host, acl_user->host.hostname, MAX_HOSTNAME - 1);
-    else
-      *sctx->priv_host= 0;
-  }
-  mysql_mutex_unlock(&acl_cache->lock);
-  DBUG_RETURN(res);
-}
-
-
-/*
-  This is like acl_getroot() above, but it doesn't check password,
-  and we don't care about the user resources.
-
-  SYNOPSIS
-    acl_getroot_no_password()
       sctx               Context which should be initialized
       user               user name
       host               host name
@@ -1081,13 +1043,13 @@ int acl_getroot(THD *thd, USER_RESOURCES  *mqh,
     TRUE   Error
 */
 
-bool acl_getroot_no_password(Security_context *sctx, char *user, char *host,
-                             char *ip, char *db)
+bool acl_getroot(Security_context *sctx, char *user, char *host,
+                 char *ip, char *db)
 {
   int res= 1;
   uint i;
   ACL_USER *acl_user= 0;
-  DBUG_ENTER("acl_getroot_no_password");
+  DBUG_ENTER("acl_getroot");
 
   DBUG_PRINT("enter", ("Host: '%s', Ip: '%s', User: '%s', db: '%s'",
                        (host ? host : "(NULL)"), (ip ? ip : "(NULL)"),
@@ -1110,8 +1072,7 @@ bool acl_getroot_no_password(Security_context *sctx, char *user, char *host,
 
   sctx->master_access= 0;
   sctx->db_access= 0;
-  sctx->priv_user= (char *) "";
-  *sctx->priv_host= 0;
+  *sctx->priv_user= *sctx->priv_host= 0;
 
   /*
      Find acl entry in user database.
@@ -1153,7 +1114,11 @@ bool acl_getroot_no_password(Security_context *sctx, char *user, char *host,
       }
     }
     sctx->master_access= acl_user->access;
-    sctx->priv_user= acl_user->user ? user : (char *) "";
+
+    if (acl_user->user)
+      strmake(sctx->priv_user, user, USERNAME_LENGTH);
+    else
+      *sctx->priv_user= 0;
 
     if (acl_user->host.hostname)
       strmake(sctx->priv_host, acl_user->host.hostname, MAX_HOSTNAME - 1);
@@ -1179,7 +1144,9 @@ static void acl_update_user(const char *user, const char *host,
 			    const char *x509_issuer,
 			    const char *x509_subject,
 			    USER_RESOURCES  *mqh,
-			    ulong privileges)
+			    ulong privileges,
+			    const LEX_STRING *plugin,
+			    const LEX_STRING *auth)
 {
   mysql_mutex_assert_owner(&acl_cache->lock);
 
@@ -1191,8 +1158,24 @@ static void acl_update_user(const char *user, const char *host,
     {
       if ((!acl_user->host.hostname && !host[0]) ||
 	  (acl_user->host.hostname &&
-	  !my_strcasecmp(system_charset_info, host, acl_user->host.hostname)))
+           !my_strcasecmp(system_charset_info, host, acl_user->host.hostname)))
       {
+        if (plugin->str[0])
+        {
+          acl_user->plugin= *plugin;
+          acl_user->auth_string.str= auth->str ?
+            strmake_root(&mem, auth->str, auth->length) : const_cast<char*>("");
+          acl_user->auth_string.length= auth->length;
+          if (fix_user_plugin_ptr(acl_user))
+            acl_user->plugin.str= strmake_root(&mem, plugin->str, plugin->length);
+        }
+        else
+          if (password)
+          {
+            acl_user->auth_string.str= strmake_root(&mem, password, password_len);
+            acl_user->auth_string.length= password_len;
+            set_user_salt(acl_user, password, password_len);
+          }
 	acl_user->access=privileges;
 	if (mqh->specified_limits & USER_RESOURCES::QUERIES_PER_HOUR)
 	  acl_user->user_resource.questions=mqh->questions;
@@ -1212,8 +1195,6 @@ static void acl_update_user(const char *user, const char *host,
 	  acl_user->x509_subject= (x509_subject ?
 				   strdup_root(&mem,x509_subject) : 0);
 	}
-	if (password)
-	  set_user_salt(acl_user, password, password_len);
         /* search complete: */
 	break;
       }
@@ -1229,7 +1210,9 @@ static void acl_insert_user(const char *user, const char *host,
 			    const char *x509_issuer,
 			    const char *x509_subject,
 			    USER_RESOURCES *mqh,
-			    ulong privileges)
+			    ulong privileges,
+			    const LEX_STRING *plugin,
+			    const LEX_STRING *auth)
 {
   ACL_USER acl_user;
 
@@ -1237,6 +1220,24 @@ static void acl_insert_user(const char *user, const char *host,
 
   acl_user.user=*user ? strdup_root(&mem,user) : 0;
   update_hostname(&acl_user.host, *host ? strdup_root(&mem, host): 0);
+  if (plugin->str[0])
+  {
+    acl_user.plugin= *plugin;
+    acl_user.auth_string.str= auth->str ?
+      strmake_root(&mem, auth->str, auth->length) : const_cast<char*>("");
+    acl_user.auth_string.length= auth->length;
+    if (fix_user_plugin_ptr(&acl_user))
+      acl_user.plugin.str= strmake_root(&mem, plugin->str, plugin->length);
+  }
+  else
+  {
+    acl_user.plugin= password_len == SCRAMBLED_PASSWORD_CHAR_LENGTH_323 ?
+      old_password_plugin_name : native_password_plugin_name;
+    acl_user.auth_string.str= strmake_root(&mem, password, password_len);
+    acl_user.auth_string.length= password_len;
+    set_user_salt(&acl_user, password, password_len);
+  }
+
   acl_user.access=privileges;
   acl_user.user_resource = *mqh;
   acl_user.sort=get_sort(2,acl_user.host.hostname,acl_user.user);
@@ -1247,8 +1248,6 @@ static void acl_insert_user(const char *user, const char *host,
   acl_user.x509_issuer= x509_issuer  ? strdup_root(&mem,x509_issuer) : 0;
   acl_user.x509_subject=x509_subject ? strdup_root(&mem,x509_subject) : 0;
 
-  set_user_salt(&acl_user, password, password_len);
-
   (void) push_dynamic(&acl_users,(uchar*) &acl_user);
   if (!acl_user.host.hostname ||
       (acl_user.host.hostname[0] == wild_many && !acl_user.host.hostname[1]))
@@ -1275,10 +1274,11 @@ static void acl_update_db(const char *user, const char *host, const char *db,
     {
       if ((!acl_db->host.hostname && !host[0]) ||
 	  (acl_db->host.hostname &&
-          !strcmp(host, acl_db->host.hostname)))
+	   !strcmp(host, acl_db->host.hostname)))
       {
 	if ((!acl_db->db && !db[0]) ||
 	    (acl_db->db && !strcmp(db,acl_db->db)))
+
 	{
 	  if (privileges)
 	    acl_db->access=privileges;
@@ -1633,7 +1633,13 @@ bool change_password(THD *thd, const char *host, const char *user,
     goto end;
   }
   /* update loaded acl entry: */
-  set_user_salt(acl_user, new_password, new_password_len);
+  if (acl_user->plugin.str == native_password_plugin_name.str || 
+      acl_user->plugin.str == old_password_plugin_name.str)
+  {
+    acl_user->auth_string.str= strmake_root(&mem, new_password, new_password_len);
+    acl_user->auth_string.length= new_password_len;
+    set_user_salt(acl_user, new_password, new_password_len);
+  }
 
   if (update_user_table(thd, table,
 			acl_user->host.hostname ? acl_user->host.hostname : "",
@@ -1649,10 +1655,12 @@ bool change_password(THD *thd, const char *host, const char *user,
   result= 0;
   if (mysql_bin_log.is_open())
   {
-    query_length= sprintf(buff, "SET PASSWORD FOR '%-.120s'@'%-.120s'='%-.120s'",
-                          acl_user->user ? acl_user->user : "",
-                          acl_user->host.hostname ? acl_user->host.hostname : "",
-                          new_password);
+    query_length=
+      my_sprintf(buff,
+                 (buff,"SET PASSWORD FOR '%-.120s'@'%-.120s'='%-.120s'",
+                  acl_user->user ? acl_user->user : "",
+                  acl_user->host.hostname ? acl_user->host.hostname : "",
+                  new_password));
     thd->clear_error();
     result= thd->binlog_query(THD::STMT_QUERY_TYPE, buff, query_length,
                               FALSE, FALSE, FALSE, 0);
@@ -1898,9 +1906,9 @@ static bool update_user_table(THD *thd, TABLE *table,
   key_copy((uchar *) user_key, table->record[0], table->key_info,
            table->key_info->key_length);
 
-  if (table->file->index_read_idx_map(table->record[0], 0,
-                                      (uchar *) user_key, HA_WHOLE_KEY,
-                                      HA_READ_KEY_EXACT))
+  if (table->file->ha_index_read_idx_map(table->record[0], 0,
+                                         (uchar *) user_key, HA_WHOLE_KEY,
+                                         HA_READ_KEY_EXACT))
   {
     my_message(ER_PASSWORD_NO_MATCH, ER(ER_PASSWORD_NO_MATCH),
                MYF(0));	/* purecov: deadcode */
@@ -1990,9 +1998,9 @@ static int replace_user_table(THD *thd, TABLE *table, const LEX_USER &combo,
   key_copy(user_key, table->record[0], table->key_info,
            table->key_info->key_length);
 
-  if (table->file->index_read_idx_map(table->record[0], 0, user_key,
-                                      HA_WHOLE_KEY,
-                                      HA_READ_KEY_EXACT))
+  if (table->file->ha_index_read_idx_map(table->record[0], 0, user_key,
+                                         HA_WHOLE_KEY,
+                                         HA_READ_KEY_EXACT))
   {
     /* what == 'N' means revoke */
     if (what == 'N')
@@ -2024,6 +2032,15 @@ static int replace_user_table(THD *thd, TABLE *table, const LEX_USER &combo,
                thd->security_ctx->user, thd->security_ctx->host_or_ip);
       goto end;
     }
+    else if (combo.plugin.str[0])
+    {
+      if (!plugin_is_ready(&combo.plugin, MYSQL_AUTHENTICATION_PLUGIN))
+      {
+        my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), combo.plugin.str);
+        goto end;
+      }
+    }
+
     old_row_exists = 0;
     restore_record(table,s->default_values);
     table->field[0]->store(combo.host.str,combo.host.length,
@@ -2037,7 +2054,7 @@ static int replace_user_table(THD *thd, TABLE *table, const LEX_USER &combo,
   {
     old_row_exists = 1;
     store_record(table,record[1]);			// Save copy for update
-    if (combo.password.str)			// If password given
+    if (combo.password.str)                             // If password given
       table->field[2]->store(password, password_len, system_charset_info);
     else if (!rights && !revoke_grant &&
              lex->ssl_type == SSL_TYPE_NOT_SPECIFIED &&
@@ -2118,7 +2135,17 @@ static int replace_user_table(THD *thd, TABLE *table, const LEX_USER &combo,
         (mqh.specified_limits & USER_RESOURCES::USER_CONNECTIONS))
       table->field[next_field+3]->store((longlong) mqh.user_conn, TRUE);
     mqh_used= mqh_used || mqh.questions || mqh.updates || mqh.conn_per_hour;
+
+    next_field+=4;
+    if (table->s->fields >= 41 && combo.plugin.str[0])
+    {
+      table->field[next_field]->store(combo.plugin.str, combo.plugin.length,
+                  system_charset_info);
+      table->field[next_field+1]->store(combo.auth.str, combo.auth.length,
+                  system_charset_info);
+    }
   }
+
   if (old_row_exists)
   {
     /*
@@ -2162,7 +2189,9 @@ end:
 		      lex->x509_issuer,
 		      lex->x509_subject,
 		      &lex->mqh,
-		      rights);
+		      rights,
+		      &combo.plugin,
+		      &combo.auth);
     else
       acl_insert_user(combo.user.str, combo.host.str, password, password_len,
 		      lex->ssl_type,
@@ -2170,7 +2199,9 @@ end:
 		      lex->x509_issuer,
 		      lex->x509_subject,
 		      &lex->mqh,
-		      rights);
+		      rights,
+		      &combo.plugin,
+		      &combo.auth);
   }
   DBUG_RETURN(error);
 }
@@ -2214,9 +2245,9 @@ static int replace_db_table(TABLE *table, const char *db,
   key_copy(user_key, table->record[0], table->key_info,
            table->key_info->key_length);
 
-  if (table->file->index_read_idx_map(table->record[0],0, user_key,
-                                      HA_WHOLE_KEY,
-                                      HA_READ_KEY_EXACT))
+  if (table->file->ha_index_read_idx_map(table->record[0],0, user_key,
+                                         HA_WHOLE_KEY,
+                                         HA_READ_KEY_EXACT))
   {
     if (what == 'N')
     { // no row, no revoke
@@ -2448,8 +2479,9 @@ GRANT_TABLE::GRANT_TABLE(TABLE *form, TABLE *col_privs)
     col_privs->field[4]->store("",0, &my_charset_latin1);
 
     col_privs->file->ha_index_init(0, 1);
-    if (col_privs->file->index_read_map(col_privs->record[0], (uchar*) key,
-                                        (key_part_map)15, HA_READ_KEY_EXACT))
+    if (col_privs->file->ha_index_read_map(col_privs->record[0], (uchar*) key,
+                                           (key_part_map)15,
+                                           HA_READ_KEY_EXACT))
     {
       cols = 0; /* purecov: deadcode */
       col_privs->file->ha_index_end();
@@ -2475,7 +2507,7 @@ GRANT_TABLE::GRANT_TABLE(TABLE *form, TABLE *col_privs)
         privs= cols= 0;
         return;
       }
-    } while (!col_privs->file->index_next(col_privs->record[0]) &&
+    } while (!col_privs->file->ha_index_next(col_privs->record[0]) &&
              !key_cmp_if_same(col_privs,key,0,key_prefix_len));
     col_privs->file->ha_index_end();
   }
@@ -2510,7 +2542,7 @@ static GRANT_NAME *name_hash_search(HASH *name_hash,
                                     const char *user, const char *tname,
                                     bool exact, bool name_tolower)
 {
-  char helping [NAME_LEN*2+USERNAME_LENGTH+3], *name_ptr;
+  char helping [SAFE_NAME_LEN*2+USERNAME_LENGTH+3], *name_ptr;
   uint len;
   GRANT_NAME *grant_name,*found=0;
   HASH_SEARCH_STATE state;
@@ -2620,8 +2652,8 @@ static int replace_column_table(GRANT_TABLE *g_t,
     key_copy(user_key, table->record[0], table->key_info,
              table->key_info->key_length);
 
-    if (table->file->index_read_map(table->record[0], user_key, HA_WHOLE_KEY,
-                                    HA_READ_KEY_EXACT))
+    if (table->file->ha_index_read_map(table->record[0], user_key,
+                                       HA_WHOLE_KEY, HA_READ_KEY_EXACT))
     {
       if (revoke_grant)
       {
@@ -2702,9 +2734,9 @@ static int replace_column_table(GRANT_TABLE *g_t,
     key_copy(user_key, table->record[0], table->key_info,
              key_prefix_length);
 
-    if (table->file->index_read_map(table->record[0], user_key,
-                                    (key_part_map)15,
-                                    HA_READ_KEY_EXACT))
+    if (table->file->ha_index_read_map(table->record[0], user_key,
+                                       (key_part_map)15,
+                                       HA_READ_KEY_EXACT))
       goto end;
 
     /* Scan through all rows with the same host,db,user and table */
@@ -2755,7 +2787,7 @@ static int replace_column_table(GRANT_TABLE *g_t,
 	    my_hash_delete(&g_t->hash_columns,(uchar*) grant_column);
 	}
       }
-    } while (!table->file->index_next(table->record[0]) &&
+    } while (!table->file->ha_index_next(table->record[0]) &&
 	     !key_cmp_if_same(table, key, 0, key_prefix_length));
   }
 
@@ -2817,9 +2849,9 @@ static int replace_table_table(THD *thd, GRANT_TABLE *grant_table,
   key_copy(user_key, table->record[0], table->key_info,
            table->key_info->key_length);
 
-  if (table->file->index_read_idx_map(table->record[0], 0, user_key,
-                                      HA_WHOLE_KEY,
-                                      HA_READ_KEY_EXACT))
+  if (table->file->ha_index_read_idx_map(table->record[0], 0, user_key,
+                                         HA_WHOLE_KEY,
+                                         HA_READ_KEY_EXACT))
   {
     /*
       The following should never happen as we first check the in memory
@@ -2942,10 +2974,10 @@ static int replace_routine_table(THD *thd, GRANT_NAME *grant_name,
                          TRUE);
   store_record(table,record[1]);			// store at pos 1
 
-  if (table->file->index_read_idx_map(table->record[0], 0,
-                                      (uchar*) table->field[0]->ptr,
-                                      HA_WHOLE_KEY,
-                                      HA_READ_KEY_EXACT))
+  if (table->file->ha_index_read_idx_map(table->record[0], 0,
+                                         (uchar*) table->field[0]->ptr,
+                                         HA_WHOLE_KEY,
+                                         HA_READ_KEY_EXACT))
   {
     /*
       The following should never happen as we first check the in memory
@@ -3504,7 +3536,7 @@ bool mysql_grant(THD *thd, const char *db, List <LEX_USER> &list,
 {
   List_iterator <LEX_USER> str_list (list);
   LEX_USER *Str, *tmp_Str;
-  char tmp_db[NAME_LEN+1];
+  char tmp_db[SAFE_NAME_LEN+1];
   bool create_new_users=0;
   TABLE_LIST tables[2];
   bool save_binlog_row_based;
@@ -3706,7 +3738,7 @@ static my_bool grant_load_procs_priv(TABLE *p_table)
   p_table->file->ha_index_init(0, 1);
   p_table->use_all_columns();
 
-  if (!p_table->file->index_first(p_table->record[0]))
+  if (!p_table->file->ha_index_first(p_table->record[0]))
   {
     memex_ptr= &memex;
     my_pthread_setspecific_ptr(THR_MALLOC, &memex_ptr);
@@ -3758,7 +3790,7 @@ static my_bool grant_load_procs_priv(TABLE *p_table)
         goto end_unlock;
       }
     }
-    while (!p_table->file->index_next(p_table->record[0]));
+    while (!p_table->file->ha_index_next(p_table->record[0]));
   }
   /* Return ok */
   return_val= 0;
@@ -3808,7 +3840,7 @@ static my_bool grant_load(THD *thd, TABLE_LIST *tables)
   t_table->use_all_columns();
   c_table->use_all_columns();
 
-  if (!t_table->file->index_first(t_table->record[0]))
+  if (!t_table->file->ha_index_first(t_table->record[0]))
   {
     memex_ptr= &memex;
     my_pthread_setspecific_ptr(THR_MALLOC, &memex_ptr);
@@ -3843,7 +3875,7 @@ static my_bool grant_load(THD *thd, TABLE_LIST *tables)
 	goto end_unlock;
       }
     }
-    while (!t_table->file->index_next(t_table->record[0]));
+    while (!t_table->file->ha_index_next(t_table->record[0]));
   }
 
   return_val=0;					// Return ok
@@ -4160,6 +4192,8 @@ err:
   {
     char command[128];
     get_privilege_desc(command, sizeof(command), want_access);
+    status_var_increment(thd->status_var.access_denied_errors);
+
     my_error(ER_TABLEACCESS_DENIED_ERROR, MYF(0),
              command,
              sctx->priv_user,
@@ -4436,7 +4470,7 @@ static bool check_grant_db_routine(THD *thd, const char *db, HASH *hash)
 bool check_grant_db(THD *thd,const char *db)
 {
   Security_context *sctx= thd->security_ctx;
-  char helping [NAME_LEN+USERNAME_LENGTH+2];
+  char helping [SAFE_NAME_LEN + USERNAME_LENGTH+2];
   uint len;
   bool error= TRUE;
 
@@ -4574,14 +4608,14 @@ bool check_routine_level_acl(THD *thd, const char *db, const char *name,
 ulong get_table_grant(THD *thd, TABLE_LIST *table)
 {
   ulong privilege;
-  Security_context *sctx= thd->security_ctx;
-  const char *db = table->db ? table->db : thd->db;
   GRANT_TABLE *grant_table;
 
-  mysql_rwlock_rdlock(&LOCK_grant);
+  rw_rdlock(&LOCK_grant);
 #ifdef EMBEDDED_LIBRARY
   grant_table= NULL;
 #else
+  Security_context *sctx= thd->security_ctx;
+  const char *db = table->db ? table->db : thd->db;
   grant_table= table_hash_search(sctx->host, sctx->ip, db, sctx->priv_user,
 				 table->table_name, 0);
 #endif
@@ -4774,16 +4808,27 @@ bool mysql_show_grants(THD *thd,LEX_USER *lex_user)
     global.append(lex_user->host.str,lex_user->host.length,
 		  system_charset_info);
     global.append ('\'');
-    if (acl_user->salt_len)
+    if (acl_user->plugin.str == native_password_plugin_name.str ||
+        acl_user->plugin.str == old_password_plugin_name.str)
     {
-      char passwd_buff[SCRAMBLED_PASSWORD_CHAR_LENGTH+1];
-      if (acl_user->salt_len == SCRAMBLE_LENGTH)
-        make_password_from_salt(passwd_buff, acl_user->salt);
-      else
-        make_password_from_salt_323(passwd_buff, (ulong *) acl_user->salt);
-      global.append(STRING_WITH_LEN(" IDENTIFIED BY PASSWORD '"));
-      global.append(passwd_buff);
-      global.append('\'');
+      if (acl_user->auth_string.length)
+      {
+        DBUG_ASSERT(acl_user->salt_len);
+        global.append(STRING_WITH_LEN(" IDENTIFIED BY PASSWORD '"));
+        global.append(acl_user->auth_string.str, acl_user->auth_string.length);
+        global.append('\'');
+      }
+    }
+    else
+    {
+        global.append(STRING_WITH_LEN(" IDENTIFIED VIA "));
+        global.append(acl_user->plugin.str, acl_user->plugin.length);
+        if (acl_user->auth_string.length)
+        {
+          global.append(STRING_WITH_LEN(" USING '"));
+          global.append(acl_user->auth_string.str, acl_user->auth_string.length);
+          global.append('\'');
+        }
     }
     /* "show grants" SSL related stuff */
     if (acl_user->ssl_type == SSL_TYPE_ANY)
@@ -5407,9 +5452,9 @@ static int handle_grant_table(TABLE_LIST *tables, uint table_no, bool drop,
                         table->key_info->key_part[1].store_length);
     key_copy(user_key, table->record[0], table->key_info, key_prefix_length);
 
-    if ((error= table->file->index_read_idx_map(table->record[0], 0,
-                                                user_key, (key_part_map)3,
-                                                HA_READ_KEY_EXACT)))
+    if ((error= table->file->ha_index_read_idx_map(table->record[0], 0,
+                                                   user_key, (key_part_map)3,
+                                                   HA_READ_KEY_EXACT)))
     {
       if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
       {
@@ -5444,7 +5489,7 @@ static int handle_grant_table(TABLE_LIST *tables, uint table_no, bool drop,
       DBUG_PRINT("info",("scan table: '%s'  search: '%s'@'%s'",
                          table->s->table_name.str, user_str, host_str));
 #endif
-      while ((error= table->file->rnd_next(table->record[0])) != 
+      while ((error= table->file->ha_rnd_next(table->record[0])) != 
              HA_ERR_END_OF_FILE)
       {
         if (error)
@@ -5589,7 +5634,7 @@ static int handle_grant_struct(uint struct_no, bool drop,
       host= grant_name->host.hostname;
       break;
     default:
-      assert(0);
+      DBUG_ASSERT(0);
     }
     if (! user)
       user= "";
@@ -6478,38 +6523,44 @@ bool sp_grant_privileges(THD *thd, const char *sp_db, const char *sp_name,
   tables->db= (char*)sp_db;
   tables->table_name= tables->alias= (char*)sp_name;
 
-  combo->host.length= strlen(combo->host.str);
-  combo->user.length= strlen(combo->user.str);
-  combo->host.str= thd->strmake(combo->host.str,combo->host.length);
-  combo->user.str= thd->strmake(combo->user.str,combo->user.length);
+  thd->make_lex_string(&combo->user,
+                       combo->user.str, strlen(combo->user.str), 0);
+  thd->make_lex_string(&combo->host,
+                       combo->host.str, strlen(combo->host.str), 0);
 
+  combo->password= empty_lex_str;
+  combo->plugin= empty_lex_str;
+  combo->auth= empty_lex_str;
 
-  if(au && au->salt_len)
+  if(au)
   {
-    if (au->salt_len == SCRAMBLE_LENGTH)
-    {
-      make_password_from_salt(passwd_buff, au->salt);
-      combo->password.length= SCRAMBLED_PASSWORD_CHAR_LENGTH;
-    }
-    else if (au->salt_len == SCRAMBLE_LENGTH_323)
+    if (au->salt_len)
     {
-      make_password_from_salt_323(passwd_buff, (ulong *) au->salt);
-      combo->password.length= SCRAMBLED_PASSWORD_CHAR_LENGTH_323;
+      if (au->salt_len == SCRAMBLE_LENGTH)
+      {
+        make_password_from_salt(passwd_buff, au->salt);
+        combo->password.length= SCRAMBLED_PASSWORD_CHAR_LENGTH;
+      }
+      else if (au->salt_len == SCRAMBLE_LENGTH_323)
+      {
+        make_password_from_salt_323(passwd_buff, (ulong *) au->salt);
+        combo->password.length= SCRAMBLED_PASSWORD_CHAR_LENGTH_323;
+      }
+      else
+      {
+        push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_PASSWD_LENGTH,
+                            ER(ER_PASSWD_LENGTH), SCRAMBLED_PASSWORD_CHAR_LENGTH);
+        return TRUE;
+      }
+      combo->password.str= passwd_buff;
     }
-    else
+
+    if (au->plugin.str != native_password_plugin_name.str &&
+        au->plugin.str != old_password_plugin_name.str)
     {
-      push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
-                          ER_PASSWD_LENGTH,
-                          ER(ER_PASSWD_LENGTH),
-                          SCRAMBLED_PASSWORD_CHAR_LENGTH);
-      return TRUE;
+      combo->plugin= au->plugin;
+      combo->auth= au->auth_string;
     }
-    combo->password.str= passwd_buff;
-  }
-  else
-  {
-    combo->password.str= (char*)"";
-    combo->password.length= 0;
   }
 
   if (user_list.push_back(combo))
@@ -7109,3 +7160,1469 @@ get_cached_table_access(GRANT_INTERNAL_INFO *grant_internal_info,
 }
 
 
+/****************************************************************************
+   AUTHENTICATION CODE
+   including initial connect handshake, invoking appropriate plugins,
+   client-server plugin negotiation, COM_CHANGE_USER, and native
+   MySQL authentication plugins.
+****************************************************************************/
+
+/* few defines to have less ifdef's in the code below */
+#ifdef EMBEDDED_LIBRARY
+#undef HAVE_OPENSSL
+#ifdef NO_EMBEDDED_ACCESS_CHECKS
+#define initialized 0
+#define decrease_user_connections(X)        /* nothing */
+#define check_for_max_user_connections(X,Y)   0
+#define get_or_create_user_conn(A,B,C,D) 0
+#endif
+#endif
+#ifndef HAVE_OPENSSL
+#define ssl_acceptor_fd 0
+#define sslaccept(A,B,C,D) 1
+#define NORMAL_HANDSHAKE_SIZE   6
+#endif
+
+/**
+  The internal version of what plugins know as MYSQL_PLUGIN_VIO,
+  basically the context of the authentication session
+*/
+struct MPVIO_EXT : public MYSQL_PLUGIN_VIO
+{
+  MYSQL_SERVER_AUTH_INFO auth_info;
+  THD *thd;
+  ACL_USER *acl_user;       ///< a copy, independent from acl_users array
+  plugin_ref plugin;        ///< what plugin we're under
+  LEX_STRING db;            ///< db name from the handshake packet
+  /** when restarting a plugin this caches the last client reply */
+  struct {
+    char *plugin, *pkt;     ///< pointers into NET::buff
+    uint pkt_len;
+  } cached_client_reply;
+  /** this caches the first plugin packet for restart request on the client */
+  struct {
+    char *pkt;
+    uint pkt_len;
+  } cached_server_packet;
+  int packets_read, packets_written; ///< counters for send/received packets
+  uint connect_errors;      ///< if there were connect errors for this host
+  /** when plugin returns a failure this tells us what really happened */
+  enum { SUCCESS, FAILURE, RESTART } status;
+};
+
+
+/**
+  a helper function to report an access denied error in all the proper places
+*/
+
+static void login_failed_error(THD *thd, bool passwd_used)
+{
+  my_error(ER_ACCESS_DENIED_ERROR, MYF(0),
+           thd->main_security_ctx.user,
+           thd->main_security_ctx.host_or_ip,
+           passwd_used ? ER(ER_YES) : ER(ER_NO));
+  general_log_print(thd, COM_CONNECT, ER(ER_ACCESS_DENIED_ERROR),
+                    thd->main_security_ctx.user,
+                    thd->main_security_ctx.host_or_ip,
+                    passwd_used ? ER(ER_YES) : ER(ER_NO));
+  status_var_increment(thd->status_var.access_denied_errors);
+  /* 
+    Log access denied messages to the error log when log-warnings = 2
+    so that the overhead of the general query log is not required to track 
+    failed connections.
+  */
+  if (global_system_variables.log_warnings > 1)
+  {
+    sql_print_warning(ER(ER_ACCESS_DENIED_ERROR),
+                      thd->main_security_ctx.user,
+                      thd->main_security_ctx.host_or_ip,
+                      passwd_used ? ER(ER_YES) : ER(ER_NO));      
+  }
+}
+
+
+/**
+  sends a server handshake initialization packet, the very first packet
+  after the connection was established
+
+  Packet format:
+   
+    Bytes       Content
+    -----       ----
+    1           protocol version (always 10)
+    n           server version string, \0-terminated
+    4           thread id
+    8           first 8 bytes of the plugin provided data (scramble)
+    1           \0 byte, terminating the first part of a scramble
+    2           server capabilities (two lower bytes)
+    1           server character set
+    2           server status
+    2           server capabilities (two upper bytes)
+    1           length of the scramble
+    10          reserved, always 0
+    n           rest of the plugin provided data (at least 12 bytes)
+    1           \0 byte, terminating the second part of a scramble
+
+  @retval 0 ok
+  @retval 1 error
+*/
+
+static bool send_server_handshake_packet(MPVIO_EXT *mpvio,
+                                         const char *data, uint data_len)
+{
+  DBUG_ASSERT(mpvio->status == MPVIO_EXT::FAILURE);
+  DBUG_ASSERT(data_len <= 255);
+
+  THD *thd= mpvio->thd;
+  char *buff= (char *)my_alloca(1 + SERVER_VERSION_LENGTH + 1 + data_len + 64);
+  char scramble_buf[SCRAMBLE_LENGTH];
+  char *end= buff;
+
+  *end++= protocol_version;
+
+  thd->client_capabilities= CLIENT_BASIC_FLAGS;
+
+  if (data_len)
+  {
+    mpvio->cached_server_packet.pkt= (char*)thd->memdup(data, data_len);
+    mpvio->cached_server_packet.pkt_len= data_len;
+  }
+
+  if (data_len < SCRAMBLE_LENGTH)
+  {
+    if (data_len)
+    { /*
+        the first packet *must* have at least 20 bytes of a scramble.
+        if a plugin provided less, we pad it to 20 with zeros
+      */
+      memcpy(scramble_buf, data, data_len);
+      bzero(scramble_buf+data_len, SCRAMBLE_LENGTH-data_len);
+      data= scramble_buf;
+    }
+    else
+    {
+      /*
+        if the default plugin does not provide the data for the scramble at
+        all, we generate a scramble internally anyway, just in case the
+        user account (that will be known only later) uses a
+        native_password_plugin (which needs a scramble). If we don't send a
+        scramble now - wasting 20 bytes in the packet -
+        native_password_plugin will have to send it in a separate packet,
+        adding one more round trip.
+      */
+      create_random_string(thd->scramble, SCRAMBLE_LENGTH, &thd->rand);
+      data= thd->scramble;
+    }
+    data_len= SCRAMBLE_LENGTH;
+  }
+
+  if (opt_using_transactions)
+    thd->client_capabilities|= CLIENT_TRANSACTIONS;
+
+  thd->client_capabilities|= CAN_CLIENT_COMPRESS;
+
+  if (ssl_acceptor_fd)
+  {
+    thd->client_capabilities |= CLIENT_SSL;
+    thd->client_capabilities |= CLIENT_SSL_VERIFY_SERVER_CERT;
+  }
+
+  end= strnmov(end, server_version, SERVER_VERSION_LENGTH) + 1;
+  int4store((uchar*) end, mpvio->thd->thread_id);
+  end+= 4;
+
+  /*
+    Old clients does not understand long scrambles, but can ignore packet
+    tail: that's why first part of the scramble is placed here, and second
+    part at the end of packet.
+  */
+  end= (char*)memcpy(end, data, SCRAMBLE_LENGTH_323);
+  end+= SCRAMBLE_LENGTH_323;
+  *end++= 0;
+ 
+  int2store(end, thd->client_capabilities);
+  /* write server characteristics: up to 16 bytes allowed */
+  end[2]=(char) default_charset_info->number;
+  int2store(end+3, mpvio->thd->server_status);
+  int2store(end+5, thd->client_capabilities >> 16);
+  end[7]= data_len;
+  bzero(end+8, 10);
+  end+= 18;
+  /* write scramble tail */
+  end= (char*)memcpy(end, data + SCRAMBLE_LENGTH_323,
+                     data_len - SCRAMBLE_LENGTH_323);
+  end+= data_len - SCRAMBLE_LENGTH_323;
+  end= strmake(end, plugin_name(mpvio->plugin)->str,
+                    plugin_name(mpvio->plugin)->length);
+
+  int res= my_net_write(&mpvio->thd->net, (uchar*) buff, (size_t) (end-buff)) ||
+           net_flush(&mpvio->thd->net);
+  my_afree(buff);
+  return res;
+}
+
+static bool secure_auth(THD *thd)
+{
+  if (!opt_secure_auth)
+    return 0;
+
+  /*
+    If the server is running in secure auth mode, short scrambles are 
+    forbidden. Extra juggling to report the same error as the old code.
+  */
+  if (thd->client_capabilities & CLIENT_PROTOCOL_41)
+  {
+    my_error(ER_SERVER_IS_IN_SECURE_AUTH_MODE, MYF(0),
+             thd->security_ctx->user,
+             thd->security_ctx->host_or_ip);
+    general_log_print(thd, COM_CONNECT, ER(ER_SERVER_IS_IN_SECURE_AUTH_MODE),
+                      thd->security_ctx->user,
+                      thd->security_ctx->host_or_ip);
+  }
+  else
+  {
+    my_error(ER_NOT_SUPPORTED_AUTH_MODE, MYF(0));
+    general_log_print(thd, COM_CONNECT, ER(ER_NOT_SUPPORTED_AUTH_MODE));
+  }
+  return 1;
+}
+
+/**
+  sends a "change plugin" packet, requesting a client to restart authentication
+  using a different authentication plugin
+
+  Packet format:
+   
+    Bytes       Content
+    -----       ----
+    1           byte with the value 254
+    n           client plugin to use, \0-terminated
+    n           plugin provided data
+
+  In a special case of switching from native_password_plugin to
+  old_password_plugin, the packet contains only one - the first - byte,
+  plugin name is omitted, plugin data aren't needed as the scramble was
+  already sent. This one-byte packet is identical to the "use the short
+  scramble" packet in the protocol before plugins were introduced.
+
+  @retval 0 ok
+  @retval 1 error
+*/
+
+static bool send_plugin_request_packet(MPVIO_EXT *mpvio,
+                                       const uchar *data, uint data_len)
+{
+  DBUG_ASSERT(mpvio->packets_written == 1);
+  DBUG_ASSERT(mpvio->packets_read == 1);
+  NET *net= &mpvio->thd->net;
+  static uchar switch_plugin_request_buf[]= { 254 };
+
+
+  mpvio->status= MPVIO_EXT::FAILURE; // the status is no longer RESTART
+
+  const char *client_auth_plugin=
+    ((st_mysql_auth *)(plugin_decl(mpvio->plugin)->info))->client_auth_plugin;
+
+  DBUG_ASSERT(client_auth_plugin);
+  DBUG_ASSERT(my_strcasecmp(system_charset_info, client_auth_plugin,
+                            mpvio->cached_client_reply.plugin));
+
+  /*
+    we send an old "short 4.0 scramble request", if we need to request a
+    client to use 4.0 auth plugin (short scramble) and the scramble was
+    already sent to the client
+
+    below, cached_client_reply.plugin is the plugin name that client has used,
+    client_auth_plugin is derived from mysql.user table, for the given
+    user account, it's the plugin that the client need to use to login.
+  */
+  bool switch_from_long_to_short_scramble=
+    native_password_plugin_name.str == mpvio->cached_client_reply.plugin &&
+    client_auth_plugin == old_password_plugin_name.str;
+
+  if (switch_from_long_to_short_scramble)
+    return secure_auth(mpvio->thd) ||
+           my_net_write(net, switch_plugin_request_buf, 1) ||
+           net_flush(net);
+
+  /*
+    We never request a client to switch from a short to long scramble.
+    Plugin-aware clients can do that, but traditionally it meant to
+    ask an old 4.0 client to use the new 4.1 authentication protocol.
+  */
+  bool switch_from_short_to_long_scramble=
+    old_password_plugin_name.str == mpvio->cached_client_reply.plugin &&
+    client_auth_plugin == native_password_plugin_name.str;
+
+  if (switch_from_short_to_long_scramble)
+  {
+    my_error(ER_NOT_SUPPORTED_AUTH_MODE, MYF(0));
+    general_log_print(mpvio->thd, COM_CONNECT, ER(ER_NOT_SUPPORTED_AUTH_MODE));
+    return 1;
+  }
+
+  return net_write_command(net, switch_plugin_request_buf[0],
+                           (uchar*)client_auth_plugin,
+                           strlen(client_auth_plugin)+1,
+                           (uchar*)data, data_len);
+}
+
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+/**
+   Finds acl entry in user database for authentication purposes.
+   
+   Finds a user and copies it into mpvio. Reports an authentication
+   failure if a user is not found.
+
+   @note find_acl_user is not the same, because it doesn't take into
+   account the case when user is not empty, but acl_user->user is empty
+
+   @retval 0    found
+   @retval 1    not found
+*/
+
+static bool find_mpvio_user(MPVIO_EXT *mpvio, Security_context *sctx)
+{
+  DBUG_ASSERT(mpvio->acl_user == 0);
+
+  pthread_mutex_lock(&acl_cache->lock);
+  for (uint i=0 ; i < acl_users.elements ; i++)
+  {
+    ACL_USER *acl_user_tmp= dynamic_element(&acl_users,i,ACL_USER*);
+    if ((!acl_user_tmp->user || !strcmp(sctx->user, acl_user_tmp->user)) &&
+        compare_hostname(&acl_user_tmp->host, sctx->host, sctx->ip))
+    {
+      mpvio->acl_user= acl_user_tmp->copy(mpvio->thd->mem_root);
+      break;
+    }
+  }
+  pthread_mutex_unlock(&acl_cache->lock);
+
+  if (!mpvio->acl_user)
+  {
+    login_failed_error(mpvio->thd, 0);
+    return 1;
+  }
+
+  /* user account requires non-default plugin and the client is too old */
+  if (mpvio->acl_user->plugin.str != native_password_plugin_name.str &&
+      mpvio->acl_user->plugin.str != old_password_plugin_name.str &&
+      !(mpvio->thd->client_capabilities & CLIENT_PLUGIN_AUTH))
+  {
+    DBUG_ASSERT(my_strcasecmp(system_charset_info, mpvio->acl_user->plugin.str,
+                              native_password_plugin_name.str));
+    DBUG_ASSERT(my_strcasecmp(system_charset_info, mpvio->acl_user->plugin.str,
+                              old_password_plugin_name.str));
+    my_error(ER_NOT_SUPPORTED_AUTH_MODE, MYF(0));
+    general_log_print(mpvio->thd, COM_CONNECT, ER(ER_NOT_SUPPORTED_AUTH_MODE));
+    return 1;
+  }
+
+  mpvio->auth_info.user_name= sctx->user;
+  mpvio->auth_info.auth_string= mpvio->acl_user->auth_string.str;
+  strmake(mpvio->auth_info.authenticated_as, mpvio->acl_user->user ?
+          mpvio->acl_user->user : "", USERNAME_LENGTH);
+
+  return 0;
+}
+#endif
+
+
+/* the packet format is described in send_change_user_packet() */
+static bool parse_com_change_user_packet(MPVIO_EXT *mpvio, uint packet_length)
+{
+  THD *thd= mpvio->thd;
+  NET *net= &thd->net;
+  Security_context *sctx= thd->security_ctx;
+
+  char *user= (char*) net->read_pos;
+  char *end= user + packet_length;
+  /* Safe because there is always a trailing \0 at the end of the packet */
+  char *passwd= strend(user)+1;
+  uint user_len= passwd - user - 1;
+  char *db= passwd;
+  char db_buff[SAFE_NAME_LEN + 1];            // buffer to store db in utf8
+  char user_buff[USERNAME_LENGTH + 1];	      // buffer to store user in utf8
+  uint dummy_errors;
+
+  if (passwd >= end)
+  {
+    my_message(ER_UNKNOWN_COM_ERROR, ER(ER_UNKNOWN_COM_ERROR), MYF(0));
+    return 1;
+  }
+
+  /*
+    Old clients send null-terminated string as password; new clients send
+    the size (1 byte) + string (not null-terminated). Hence in case of empty
+    password both send '\0'.
+
+    This strlen() can't be easily deleted without changing protocol.
+
+    Cast *passwd to an unsigned char, so that it doesn't extend the sign for
+    *passwd > 127 and become 2**32-127+ after casting to uint.
+  */
+  uint passwd_len= (thd->client_capabilities & CLIENT_SECURE_CONNECTION ?
+                    (uchar)(*passwd++) : strlen(passwd));
+
+  db+= passwd_len + 1;
+  /*
+    Database name is always NUL-terminated, so in case of empty database
+    the packet must contain at least the trailing '\0'.
+  */
+  if (db >= end)
+  {
+    my_message(ER_UNKNOWN_COM_ERROR, ER(ER_UNKNOWN_COM_ERROR), MYF(0));
+    return 1;
+  }
+
+  uint db_len= strlen(db);
+
+  char *ptr= db + db_len + 1;
+
+  if (ptr+1 < end)
+  {
+    uint cs_number= uint2korr(ptr);
+    thd_init_client_charset(thd, cs_number);
+    thd->update_charset();
+  }
+
+  /* Convert database and user names to utf8 */
+  db_len= copy_and_convert(db_buff, sizeof(db_buff)-1, system_charset_info,
+                           db, db_len, thd->charset(), &dummy_errors);
+
+  user_len= copy_and_convert(user_buff, sizeof(user_buff)-1,
+                             system_charset_info, user, user_len,
+                             thd->charset(), &dummy_errors);
+
+  if (!(sctx->user= my_strndup(user_buff, user_len, MYF(MY_WME))))
+    return 1;
+
+  /* Clear variables that are allocated */
+  thd->user_connect= 0;
+  strmake(sctx->priv_user, sctx->user, USERNAME_LENGTH);
+
+  if (thd->make_lex_string(&mpvio->db, db_buff, db_len, 0) == 0)
+    return 1; /* The error is set by make_lex_string(). */
+
+  /*
+    Clear thd->db as it points to something, that will be freed when
+    connection is closed. We don't want to accidentally free a wrong
+    pointer if connect failed.
+  */
+  thd->reset_db(NULL, 0);
+
+  if (!initialized)
+  {
+    // if mysqld's been started with --skip-grant-tables option
+    mpvio->status= MPVIO_EXT::SUCCESS;
+    return 0;
+  }
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+  if (find_mpvio_user(mpvio, sctx))
+    return 1;
+
+  char *client_plugin;
+  if (thd->client_capabilities & CLIENT_PLUGIN_AUTH)
+  {
+    client_plugin= ptr + 2;
+    if (client_plugin >= end)
+    {
+      my_message(ER_UNKNOWN_COM_ERROR, ER(ER_UNKNOWN_COM_ERROR), MYF(0));
+      return 1;
+    }
+    client_plugin= fix_plugin_ptr(client_plugin);
+  }
+  else
+  {
+    if (thd->client_capabilities & CLIENT_SECURE_CONNECTION)
+      client_plugin= native_password_plugin_name.str;
+    else
+    {
+      client_plugin=  old_password_plugin_name.str;
+      /*
+        For a passwordless accounts we use native_password_plugin.
+        But when an old 4.0 client connects to it, we change it to
+        old_password_plugin, otherwise MySQL will think that server 
+        and client plugins don't match.
+      */
+      if (mpvio->acl_user->auth_string.length == 0)
+        mpvio->acl_user->plugin= old_password_plugin_name;
+    }
+  }
+  
+  /* remember the data part of the packet, to present it to plugin in read_packet() */
+  mpvio->cached_client_reply.pkt= passwd;
+  mpvio->cached_client_reply.pkt_len= passwd_len;
+  mpvio->cached_client_reply.plugin= client_plugin;
+  mpvio->status= MPVIO_EXT::RESTART;
+#endif
+
+  return 0;
+}
+
+
+/* the packet format is described in send_client_reply_packet() */
+static ulong parse_client_handshake_packet(MPVIO_EXT *mpvio,
+                                           uchar **buff, ulong pkt_len)
+{
+#ifndef EMBEDDED_LIBRARY
+  THD *thd= mpvio->thd;
+  NET *net= &thd->net;
+  char *end;
+
+  DBUG_ASSERT(mpvio->status == MPVIO_EXT::FAILURE);
+
+  if (pkt_len < MIN_HANDSHAKE_SIZE)
+    return packet_error;
+
+  if (mpvio->connect_errors)
+    reset_host_errors(thd->main_security_ctx.ip);
+
+  ulong client_capabilities= uint2korr(net->read_pos);
+  if (client_capabilities & CLIENT_PROTOCOL_41)
+  {
+    client_capabilities|= ((ulong) uint2korr(net->read_pos+2)) << 16;
+    thd->max_client_packet_length= uint4korr(net->read_pos+4);
+    DBUG_PRINT("info", ("client_character_set: %d", (uint) net->read_pos[8]));
+    thd_init_client_charset(thd, (uint) net->read_pos[8]);
+    thd->update_charset();
+    end= (char*) net->read_pos+32;
+  }
+  else
+  {
+    thd->max_client_packet_length= uint3korr(net->read_pos+2);
+    end= (char*) net->read_pos+5;
+  }
+
+  /* Disable those bits which are not supported by the client. */
+  thd->client_capabilities&= client_capabilities;
+
+  if (thd->client_capabilities & CLIENT_IGNORE_SPACE)
+    thd->variables.sql_mode|= MODE_IGNORE_SPACE;
+
+  DBUG_PRINT("info", ("client capabilities: %lu", thd->client_capabilities));
+  if (thd->client_capabilities & CLIENT_SSL)
+  {
+    char error_string[1024] __attribute__((unused));
+
+    /* Do the SSL layering. */
+    if (!ssl_acceptor_fd)
+      return packet_error;
+
+    DBUG_PRINT("info", ("IO layer change in progress..."));
+    if (sslaccept(ssl_acceptor_fd, net->vio, net->read_timeout, error_string))
+    {
+      DBUG_PRINT("error", ("Failed to accept new SSL connection"));
+      return packet_error;
+    }
+
+    DBUG_PRINT("info", ("Reading user information over SSL layer"));
+    pkt_len= my_net_read(net);
+    if (pkt_len == packet_error || pkt_len < NORMAL_HANDSHAKE_SIZE)
+    {
+      DBUG_PRINT("error", ("Failed to read user information (pkt_len= %lu)",
+			   pkt_len));
+      return packet_error;
+    }
+  }
+
+  if (end >= (char*) net->read_pos+ pkt_len +2)
+    return packet_error;
+
+  if (thd->client_capabilities & CLIENT_INTERACTIVE)
+    thd->variables.net_wait_timeout= thd->variables.net_interactive_timeout;
+  if ((thd->client_capabilities & CLIENT_TRANSACTIONS) &&
+      opt_using_transactions)
+    net->return_status= &thd->server_status;
+
+  char *user= end;
+  char *passwd= strend(user)+1;
+  uint user_len= passwd - user - 1, db_len;
+  char *db= passwd;
+  char db_buff[SAFE_NAME_LEN + 1];      // buffer to store db in utf8
+  char user_buff[USERNAME_LENGTH + 1];	// buffer to store user in utf8
+  uint dummy_errors;
+
+  /*
+    Old clients send null-terminated string as password; new clients send
+    the size (1 byte) + string (not null-terminated). Hence in case of empty
+    password both send '\0'.
+
+    This strlen() can't be easily deleted without changing protocol.
+
+    Cast *passwd to an unsigned char, so that it doesn't extend the sign for
+    *passwd > 127 and become 2**32-127+ after casting to uint.
+  */
+  uint passwd_len= thd->client_capabilities & CLIENT_SECURE_CONNECTION ?
+                   (uchar)(*passwd++) : strlen(passwd);
+
+  if (thd->client_capabilities & CLIENT_CONNECT_WITH_DB)
+  {
+    db= db + passwd_len + 1;
+    /* strlen() can't be easily deleted without changing protocol */
+    db_len= strlen(db);
+  }
+  else
+  {
+    db= 0;
+    db_len= 0;
+  }
+
+  if (passwd + passwd_len + db_len > (char *)net->read_pos + pkt_len)
+    return packet_error;
+
+  char *client_plugin= passwd + passwd_len + (db ? db_len + 1 : 0);
+
+  /* Since 4.1 all database names are stored in utf8 */
+  if (db)
+  {
+    db_len= copy_and_convert(db_buff, sizeof(db_buff)-1, system_charset_info,
+                             db, db_len, thd->charset(), &dummy_errors);
+    db= db_buff;
+  }
+
+  user_len= copy_and_convert(user_buff, sizeof(user_buff)-1,
+                             system_charset_info, user, user_len,
+                             thd->charset(), &dummy_errors);
+  user= user_buff;
+
+  /* If username starts and ends in "'", chop them off */
+  if (user_len > 1 && user[0] == '\'' && user[user_len - 1] == '\'')
+  {
+    user++;
+    user_len-= 2;
+  }
+
+  Security_context *sctx= thd->security_ctx;
+
+  if (thd->make_lex_string(&mpvio->db, db, db_len, 0) == 0)
+    return packet_error; /* The error is set by make_lex_string(). */
+  my_free(sctx->user);
+  if (!(sctx->user= my_strndup(user, user_len, MYF(MY_WME))))
+    return packet_error; /* The error is set by my_strdup(). */
+
+  /*
+    Clear thd->db as it points to something, that will be freed when
+    connection is closed. We don't want to accidentally free a wrong
+    pointer if connect failed.
+  */
+  thd->reset_db(NULL, 0);
+
+  if (!initialized)
+  {
+    // if mysqld's been started with --skip-grant-tables option
+    mpvio->status= MPVIO_EXT::SUCCESS;
+    return packet_error;
+  }
+
+  if (find_mpvio_user(mpvio, sctx))
+    return packet_error;
+
+  if (thd->client_capabilities & CLIENT_PLUGIN_AUTH)
+  {
+    if ((client_plugin + strlen(client_plugin)) > 
+          (char *)net->read_pos + pkt_len)
+      return packet_error;
+    client_plugin= fix_plugin_ptr(client_plugin);
+  }
+  else
+  {
+    if (thd->client_capabilities & CLIENT_SECURE_CONNECTION)
+      client_plugin= native_password_plugin_name.str;
+    else
+    {
+      client_plugin=  old_password_plugin_name.str;
+      /*
+        For a passwordless accounts we use native_password_plugin.
+        But when an old 4.0 client connects to it, we change it to
+        old_password_plugin, otherwise MySQL will think that server 
+        and client plugins don't match.
+      */
+      if (mpvio->acl_user->auth_string.length == 0)
+        mpvio->acl_user->plugin= old_password_plugin_name;
+    }
+  }
+  
+  /*
+    if the acl_user needs a different plugin to authenticate
+    (specified in GRANT ... AUTHENTICATED VIA plugin_name ..)
+    we need to restart the authentication in the server.
+    But perhaps the client has already used the correct plugin -
+    in that case the authentication on the client may not need to be
+    restarted and a server auth plugin will read the data that the client
+    has just send. Cache them to return in the next server_mpvio_read_packet().
+  */
+  if (my_strcasecmp(system_charset_info, mpvio->acl_user->plugin.str,
+                    plugin_name(mpvio->plugin)->str) != 0)
+  {
+    mpvio->cached_client_reply.pkt= passwd;
+    mpvio->cached_client_reply.pkt_len= passwd_len;
+    mpvio->cached_client_reply.plugin= client_plugin;
+    mpvio->status= MPVIO_EXT::RESTART;
+    return packet_error;
+  }
+
+  /*
+    ok, we don't need to restart the authentication on the server.
+    but if the client used the wrong plugin, we need to restart
+    the authentication on the client. Do it here, the server plugin
+    doesn't need to know.
+  */
+  const char *client_auth_plugin=
+    ((st_mysql_auth *)(plugin_decl(mpvio->plugin)->info))->client_auth_plugin;
+
+  if (client_auth_plugin &&
+      my_strcasecmp(system_charset_info, client_plugin, client_auth_plugin))
+  {
+    mpvio->cached_client_reply.plugin= client_plugin;
+    if (send_plugin_request_packet(mpvio,
+                                   (uchar*)mpvio->cached_server_packet.pkt,
+                                   mpvio->cached_server_packet.pkt_len))
+      return packet_error;
+
+    passwd_len= my_net_read(&mpvio->thd->net);
+    passwd = (char*)mpvio->thd->net.read_pos;
+  }
+
+  *buff= (uchar*)passwd;
+  return passwd_len;
+#else
+  return 0;
+#endif
+}
+
+
+/**
+  vio->write_packet() callback method for server authentication plugins
+
+  This function is called by a server authentication plugin, when it wants
+  to send data to the client.
+
+  It transparently wraps the data into a handshake packet,
+  and handles plugin negotiation with the client. If necessary,
+  it escapes the plugin data, if it starts with a mysql protocol packet byte.
+*/
+
+static int server_mpvio_write_packet(MYSQL_PLUGIN_VIO *param,
+                                   const uchar *packet, int packet_len)
+{
+  MPVIO_EXT *mpvio= (MPVIO_EXT*)param;
+  int res;
+
+  /* reset cached_client_reply */
+  mpvio->cached_client_reply.pkt= 0;
+  /* for the 1st packet we wrap plugin data into the handshake packet */
+  if (mpvio->packets_written == 0)
+    res= send_server_handshake_packet(mpvio, (char*)packet, packet_len);
+  else if (mpvio->status == MPVIO_EXT::RESTART)
+    res= send_plugin_request_packet(mpvio, packet, packet_len);
+  else if (packet_len > 0 && (*packet == 1 || *packet == 255 || *packet == 254))
+  {
+    /*
+      we cannot allow plugin data packet to start from 255 or 254 -
+      as the client will treat it as an error or "change plugin" packet.
+      We'll escape these bytes with \1. Consequently, we
+      have to escape \1 byte too.
+    */
+    res= net_write_command(&mpvio->thd->net, 1, (uchar*)"", 0,
+                           packet, packet_len);
+  }
+  else
+  {
+    res= my_net_write(&mpvio->thd->net, packet, packet_len) ||
+         net_flush(&mpvio->thd->net);
+  }
+  mpvio->packets_written++;
+  return res;
+}
+
+
+/**
+  vio->read_packet() callback method for server authentication plugins
+
+  This function is called by a server authentication plugin, when it wants
+  to read data from the client.
+
+  It transparently extracts the client plugin data, if embedded into
+  a client authentication handshake packet, and handles plugin negotiation
+  with the client, if necessary.
+*/
+
+static int server_mpvio_read_packet(MYSQL_PLUGIN_VIO *param, uchar **buf)
+{
+  MPVIO_EXT *mpvio= (MPVIO_EXT*)param;
+  ulong pkt_len;
+
+  if (mpvio->packets_written == 0)
+  {
+    /*
+      plugin wants to read the data without sending anything first.
+      send an empty packet to force a server handshake packet to be sent
+    */
+    if (server_mpvio_write_packet(mpvio, 0, 0))
+      pkt_len= packet_error;
+    else
+      pkt_len= my_net_read(&mpvio->thd->net);
+  }
+  else 
+  if (mpvio->cached_client_reply.pkt)
+  {
+    DBUG_ASSERT(mpvio->status == MPVIO_EXT::RESTART);
+    DBUG_ASSERT(mpvio->packets_read > 0);
+    /*
+      if the have the data cached from the last server_mpvio_read_packet
+      (which can be the case if it's a restarted authentication)
+      and a client has used the correct plugin, then we can return the
+      cached data straight away and avoid one round trip.
+    */
+    const char *client_auth_plugin=
+      ((st_mysql_auth *)(plugin_decl(mpvio->plugin)->info))->client_auth_plugin;
+    if (client_auth_plugin == 0 ||
+        my_strcasecmp(system_charset_info, mpvio->cached_client_reply.plugin,
+                      client_auth_plugin) == 0)
+    {
+      mpvio->status= MPVIO_EXT::FAILURE;
+      *buf= (uchar*)mpvio->cached_client_reply.pkt;
+      mpvio->cached_client_reply.pkt= 0;
+      mpvio->packets_read++;
+      return (int)mpvio->cached_client_reply.pkt_len;
+    }
+    /*
+      But if the client has used the wrong plugin, the cached data are
+      useless. Furthermore, we have to send a "change plugin" request
+      to the client.
+    */
+    if (server_mpvio_write_packet(mpvio, 0, 0))
+      pkt_len= packet_error;
+    else
+      pkt_len= my_net_read(&mpvio->thd->net);
+  }
+  else
+    pkt_len= my_net_read(&mpvio->thd->net);
+
+  if (pkt_len == packet_error)
+    goto err;
+
+  mpvio->packets_read++;
+
+  /*
+    the 1st packet has the plugin data wrapped into the client authentication
+    handshake packet
+  */
+  if (mpvio->packets_read == 1)
+  {
+    pkt_len= parse_client_handshake_packet(mpvio, buf, pkt_len);
+    if (pkt_len == packet_error)
+      goto err;
+  }
+  else
+    *buf = mpvio->thd->net.read_pos;
+
+  return (int)pkt_len;
+
+err:
+  if (mpvio->status == MPVIO_EXT::FAILURE && !mpvio->thd->is_error())
+  {
+    inc_host_errors(mpvio->thd->main_security_ctx.ip);
+    my_error(ER_HANDSHAKE_ERROR, MYF(0),
+             mpvio->thd->security_ctx->host_or_ip);
+  }
+  return -1;
+}
+
+
+/**
+  fills MYSQL_PLUGIN_VIO_INFO structure with the information about the
+  connection
+*/
+
+static void server_mpvio_info(MYSQL_PLUGIN_VIO *vio,
+                              MYSQL_PLUGIN_VIO_INFO *info)
+{
+  MPVIO_EXT *mpvio= (MPVIO_EXT*)vio;
+  mpvio_info(mpvio->thd->net.vio, info);
+}
+
+
+static bool acl_check_ssl(THD *thd, ACL_USER *acl_user)
+{
+#if defined(HAVE_OPENSSL)
+  Vio *vio=thd->net.vio;
+  SSL *ssl= (SSL*) vio->ssl_arg;
+  X509 *cert;
+#endif
+
+  /*
+    At this point we know that user is allowed to connect
+    from given host by given username/password pair. Now
+    we check if SSL is required, if user is using SSL and
+    if X509 certificate attributes are OK
+  */
+  switch (acl_user->ssl_type) {
+  case SSL_TYPE_NOT_SPECIFIED:                  // Impossible
+  case SSL_TYPE_NONE:                           // SSL is not required
+    return 0;
+#if defined(HAVE_OPENSSL)
+  case SSL_TYPE_ANY:                            // Any kind of SSL is ok
+    return vio_type(vio) != VIO_TYPE_SSL;
+  case SSL_TYPE_X509: /* Client should have any valid certificate. */
+    /*
+      Connections with non-valid certificates are dropped already
+      in sslaccept() anyway, so we do not check validity here.
+
+      We need to check for absence of SSL because without SSL
+      we should reject connection.
+    */
+    if (vio_type(vio) == VIO_TYPE_SSL &&
+        SSL_get_verify_result(ssl) == X509_V_OK &&
+        (cert= SSL_get_peer_certificate(ssl)))
+    {
+      X509_free(cert);
+      return 0;
+    }
+    return 1;
+  case SSL_TYPE_SPECIFIED: /* Client should have specified attrib */
+    /* If a cipher name is specified, we compare it to actual cipher in use. */
+    if (vio_type(vio) != VIO_TYPE_SSL ||
+        SSL_get_verify_result(ssl) != X509_V_OK)
+      return 1;
+    if (acl_user->ssl_cipher)
+    {
+      DBUG_PRINT("info",("comparing ciphers: '%s' and '%s'",
+                         acl_user->ssl_cipher,SSL_get_cipher(ssl)));
+      if (strcmp(acl_user->ssl_cipher,SSL_get_cipher(ssl)))
+      {
+        if (global_system_variables.log_warnings)
+          sql_print_information("X509 ciphers mismatch: should be '%s' but is '%s'",
+                            acl_user->ssl_cipher, SSL_get_cipher(ssl));
+        return 1;
+      }
+    }
+    /* Prepare certificate (if exists) */
+    if (!(cert= SSL_get_peer_certificate(ssl)))
+      return 1;
+    /* If X509 issuer is specified, we check it... */
+    if (acl_user->x509_issuer)
+    {
+      char *ptr = X509_NAME_oneline(X509_get_issuer_name(cert), 0, 0);
+      DBUG_PRINT("info",("comparing issuers: '%s' and '%s'",
+                         acl_user->x509_issuer, ptr));
+      if (strcmp(acl_user->x509_issuer, ptr))
+      {
+        if (global_system_variables.log_warnings)
+          sql_print_information("X509 issuer mismatch: should be '%s' "
+                            "but is '%s'", acl_user->x509_issuer, ptr);
+        free(ptr);
+        X509_free(cert);
+        return 1;
+      }
+      free(ptr);
+    }
+    /* X509 subject is specified, we check it .. */
+    if (acl_user->x509_subject)
+    {
+      char *ptr= X509_NAME_oneline(X509_get_subject_name(cert), 0, 0);
+      DBUG_PRINT("info",("comparing subjects: '%s' and '%s'",
+                         acl_user->x509_subject, ptr));
+      if (strcmp(acl_user->x509_subject,ptr))
+      {
+        if (global_system_variables.log_warnings)
+          sql_print_information("X509 subject mismatch: should be '%s' but is '%s'",
+                          acl_user->x509_subject, ptr);
+        free(ptr);
+        X509_free(cert);
+        return 1;
+      }
+      free(ptr);
+    }
+    X509_free(cert);
+    return 0;
+#else  /* HAVE_OPENSSL */
+  default:
+    /*
+      If we don't have SSL but SSL is required for this user the 
+      authentication should fail.
+    */
+    return 1;
+#endif /* HAVE_OPENSSL */
+  }
+  return 1;
+}
+
+static int do_auth_once(THD *thd, LEX_STRING *auth_plugin_name,
+                        MPVIO_EXT *mpvio)
+{
+  int res= CR_OK, old_status= MPVIO_EXT::FAILURE;
+  bool unlock_plugin= false;
+  plugin_ref plugin;
+
+#ifdef EMBEDDED_LIBRARY
+  plugin= native_password_plugin;
+#else
+  if (auth_plugin_name->str == native_password_plugin_name.str)
+    plugin= native_password_plugin;
+  else if (auth_plugin_name->str == old_password_plugin_name.str)
+    plugin= old_password_plugin;
+  else if ((plugin= my_plugin_lock_by_name(thd, auth_plugin_name,
+                                           MYSQL_AUTHENTICATION_PLUGIN)))
+    unlock_plugin= true;
+#endif
+
+  mpvio->plugin= plugin;
+  old_status= mpvio->status;
+
+  if (plugin)
+  {
+    st_mysql_auth *auth= (st_mysql_auth*)plugin_decl(plugin)->info;
+    res= auth->authenticate_user(mpvio, &mpvio->auth_info);
+
+    if (unlock_plugin)
+      plugin_unlock(thd, plugin);
+  }
+  else
+  {
+    /* Server cannot load the required plugin. */
+    my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), auth_plugin_name->str);
+    res= CR_ERROR;
+  }
+
+  /*
+    If the status was MPVIO_EXT::RESTART before the authenticate_user() call
+    it can never be MPVIO_EXT::RESTART after the call, because any call
+    to write_packet() or read_packet() will reset the status.
+
+    But (!) if a plugin never called a read_packet() or write_packet(), the
+    status will stay unchanged. We'll fix it, by resetting the status here.
+  */
+  if (old_status == MPVIO_EXT::RESTART && mpvio->status == MPVIO_EXT::RESTART)
+    mpvio->status= MPVIO_EXT::FAILURE; // reset to the default
+
+  return res;
+}
+
+/**
+  Perform the handshake, authorize the client and update thd sctx variables.
+
+  @param thd                     thread handle
+  @param connect_errors          number of previous failed connect attemps
+                                 from this host
+  @param com_change_user_pkt_len size of the COM_CHANGE_USER packet
+                                 (without the first, command, byte) or 0
+                                 if it's not a COM_CHANGE_USER (that is, if
+                                 it's a new connection)
+
+  @retval 0  success, thd is updated.
+  @retval 1  error
+*/
+
+bool acl_authenticate(THD *thd, uint connect_errors, uint com_change_user_pkt_len)
+{
+  int res= CR_OK;
+  MPVIO_EXT mpvio;
+  LEX_STRING *auth_plugin_name= default_auth_plugin_name;
+  enum  enum_server_command command= com_change_user_pkt_len ? COM_CHANGE_USER
+                                                             : COM_CONNECT;
+  DBUG_ENTER("acl_authenticate");
+
+  compile_time_assert(MYSQL_USERNAME_LENGTH == USERNAME_LENGTH);
+
+  bzero(&mpvio, sizeof(mpvio));
+  mpvio.read_packet= server_mpvio_read_packet;
+  mpvio.write_packet= server_mpvio_write_packet;
+  mpvio.info= server_mpvio_info;
+  mpvio.thd= thd;
+  mpvio.connect_errors= connect_errors;
+  mpvio.status= MPVIO_EXT::FAILURE;
+
+  if (command == COM_CHANGE_USER)
+  {
+    mpvio.packets_written++; // pretend that a server handshake packet was sent
+    mpvio.packets_read++;    // take COM_CHANGE_USER packet into account
+
+    if (parse_com_change_user_packet(&mpvio, com_change_user_pkt_len))
+      DBUG_RETURN(1);
+
+    DBUG_ASSERT(mpvio.status == MPVIO_EXT::RESTART ||
+                mpvio.status == MPVIO_EXT::SUCCESS);
+  }
+  else
+  {
+    /* mark the thd as having no scramble yet */
+    thd->scramble[SCRAMBLE_LENGTH]= 1;
+
+    /*
+      perform the first authentication attempt, with the default plugin.
+      This sends the server handshake packet, reads the client reply
+      with a user name, and performs the authentication if everyone has used
+      the correct plugin.
+    */
+    res= do_auth_once(thd, auth_plugin_name, &mpvio);
+  }
+
+  /*
+    retry the authentication, if - after receiving the user name -
+    we found that we need to switch to a non-default plugin
+  */
+  if (mpvio.status == MPVIO_EXT::RESTART)
+  {
+    DBUG_ASSERT(mpvio.acl_user);
+    DBUG_ASSERT(command == COM_CHANGE_USER ||
+                my_strcasecmp(system_charset_info, auth_plugin_name->str,
+                              mpvio.acl_user->plugin.str));
+    auth_plugin_name= &mpvio.acl_user->plugin;
+    res= do_auth_once(thd, auth_plugin_name, &mpvio);
+  }
+
+  Security_context *sctx= thd->security_ctx;
+  ACL_USER *acl_user= mpvio.acl_user;
+
+  thd->password= mpvio.auth_info.password_used;  // remember for error messages 
+
+  /*
+    Log the command here so that the user can check the log
+    for the tried logins and also to detect break-in attempts.
+
+    if sctx->user is unset it's protocol failure, bad packet.
+  */
+  if (sctx->user)
+  {
+    if (strcmp(sctx->priv_user, sctx->user))
+    {
+      general_log_print(thd, command, "%s@%s as %s on %s",
+                        sctx->user, sctx->host_or_ip,
+                        sctx->priv_user[0] ? sctx->priv_user : "anonymous",
+                        mpvio.db.str ? mpvio.db.str : (char*) "");
+    }
+    else
+      general_log_print(thd, command, (char*) "%s@%s on %s",
+                        sctx->user, sctx->host_or_ip,
+                        mpvio.db.str ? mpvio.db.str : (char*) "");
+  }
+
+  if (res > CR_OK && mpvio.status != MPVIO_EXT::SUCCESS)
+  {
+    DBUG_ASSERT(mpvio.status == MPVIO_EXT::FAILURE);
+
+    if (!thd->is_error())
+      login_failed_error(thd, thd->password);
+    DBUG_RETURN(1);
+  }
+
+  if (initialized) // if not --skip-grant-tables
+  {
+    sctx->master_access= acl_user->access;
+    strmake(sctx->priv_user, mpvio.auth_info.authenticated_as, USERNAME_LENGTH);
+    if (acl_user->host.hostname)
+      strmake(sctx->priv_host, acl_user->host.hostname, MAX_HOSTNAME);
+    else
+      *sctx->priv_host= 0;
+
+    /*
+      OK. Let's check the SSL. Historically it was checked after the password,
+      as an additional layer, not instead of the password
+      (in which case it would've been a plugin too).
+    */
+    if (acl_check_ssl(thd, acl_user))
+    {
+      login_failed_error(thd, thd->password);
+      DBUG_RETURN(1);
+    }
+
+    /* Don't allow the user to connect if he has done too many queries */
+    if ((acl_user->user_resource.questions || acl_user->user_resource.updates ||
+         acl_user->user_resource.conn_per_hour ||
+         acl_user->user_resource.user_conn ||
+         global_system_variables.max_user_connections) &&
+        get_or_create_user_conn(thd,
+          (opt_old_style_user_limits ? sctx->user : sctx->priv_user),
+          (opt_old_style_user_limits ? sctx->host_or_ip : sctx->priv_host),
+          &acl_user->user_resource))
+      DBUG_RETURN(1); // The error is set by get_or_create_user_conn()
+  }
+  else
+    sctx->skip_grants();
+
+  if (thd->user_connect &&
+      (thd->user_connect->user_resources.conn_per_hour ||
+       thd->user_connect->user_resources.user_conn ||
+       global_system_variables.max_user_connections) &&
+      check_for_max_user_connections(thd, thd->user_connect))
+  {
+    status_var_increment(denied_connections);
+    DBUG_RETURN(1); // The error is set in check_for_max_user_connections()
+  }
+
+  DBUG_PRINT("info",
+             ("Capabilities: %lu  packet_length: %ld  Host: '%s'  "
+              "Login user: '%s' Priv_user: '%s'  Using password: %s "
+              "Access: %lu  db: '%s'",
+              thd->client_capabilities, thd->max_client_packet_length,
+              sctx->host_or_ip, sctx->user, sctx->priv_user,
+              thd->password ? "yes": "no",
+              sctx->master_access, mpvio.db.str));
+
+  if (command == COM_CONNECT &&
+      !(thd->main_security_ctx.master_access & SUPER_ACL))
+  {
+    pthread_mutex_lock(&LOCK_connection_count);
+    bool count_ok= (*thd->scheduler->connection_count <=
+                    *thd->scheduler->max_connections);
+    VOID(pthread_mutex_unlock(&LOCK_connection_count));
+    if (!count_ok)
+    {                                         // too many connections
+      my_error(ER_CON_COUNT_ERROR, MYF(0));
+      DBUG_RETURN(1);
+    }
+  }
+
+  /*
+    This is the default access rights for the current database.  It's
+    set to 0 here because we don't have an active database yet (and we
+    may not have an active database to set.
+  */
+  sctx->db_access=0;
+
+  /* Change a database if necessary */
+  if (mpvio.db.length)
+  {
+    if (mysql_change_db(thd, &mpvio.db, FALSE))
+    {
+      /* mysql_change_db() has pushed the error message. */
+      if (thd->user_connect)
+      {
+        status_var_increment(thd->status_var.access_denied_errors);
+        decrease_user_connections(thd->user_connect);
+        thd->user_connect= 0;
+      }
+      DBUG_RETURN(1);
+    }
+  }
+
+  thd->net.net_skip_rest_factor= 2;  // skip at most 2*max_packet_size
+
+  if (res == CR_OK_HANDSHAKE_COMPLETE)
+    thd->main_da.disable_status();
+  else
+    my_ok(thd);
+
+  /* Ready to handle queries */
+  DBUG_RETURN(0);
+}
+
+
+/**
+  MySQL Server Password Authentication Plugin
+
+  In the MySQL authentication protocol:
+  1. the server sends the random scramble to the client
+  2. client sends the encrypted password back to the server
+  3. the server checks the password.
+*/
+
+static int native_password_authenticate(MYSQL_PLUGIN_VIO *vio, 
+                                        MYSQL_SERVER_AUTH_INFO *info)
+{
+  uchar *pkt;
+  int pkt_len;
+  MPVIO_EXT *mpvio=(MPVIO_EXT*)vio;
+  THD *thd=mpvio->thd;
+
+  /* generate the scramble, or reuse the old one */
+  if (thd->scramble[SCRAMBLE_LENGTH])
+  {
+    create_random_string(thd->scramble, SCRAMBLE_LENGTH, &thd->rand);
+    /* and send it to the client */
+    if (mpvio->write_packet(mpvio, (uchar*)thd->scramble, SCRAMBLE_LENGTH + 1))
+      return CR_ERROR;
+  }
+
+  /* reply and authenticate */
+
+  /*
+    <digression>
+      This is more complex than it looks.
+
+      The plugin (we) may be called right after the client was connected -
+      and will need to send a scramble, read reply, authenticate.
+
+      Or the plugin may be called after another plugin has sent a scramble,
+      and read the reply. If the client has used the correct client-plugin,
+      we won't need to read anything here from the client, the client
+      has already sent a reply with everything we need for authentication.
+
+      Or the plugin may be called after another plugin has sent a scramble,
+      and read the reply, but the client has used the wrong client-plugin.
+      We'll need to sent a "switch to another plugin" packet to the
+      client and read the reply. "Use the short scramble" packet is a special
+      case of "switch to another plugin" packet.
+
+      Or, perhaps, the plugin may be called after another plugin has
+      done the handshake but did not send a useful scramble. We'll need
+      to send a scramble (and perhaps a "switch to another plugin" packet)
+      and read the reply.
+
+      Besides, a client may be an old one, that doesn't understand plugins.
+      Or doesn't even understand 4.0 scramble.
+
+      And we want to keep the same protocol on the wire  unless non-native
+      plugins are involved.
+
+      Anyway, it still looks simple from a plugin point of view:
+      "send the scramble, read the reply and authenticate".
+      All the magic is transparently handled by the server.
+    </digression>
+  */
+
+  /* read the reply with the encrypted password */
+  if ((pkt_len= mpvio->read_packet(mpvio, &pkt)) < 0)
+    return CR_ERROR;
+
+#ifdef NO_EMBEDDED_ACCESS_CHECKS
+  return CR_OK;
+#endif
+
+  if (pkt_len == 0) /* no password */
+    return info->auth_string[0] ? CR_ERROR : CR_OK;
+
+  info->password_used = 1;
+  if (pkt_len == SCRAMBLE_LENGTH)
+    return info->auth_string[0] == 0 ||
+           check_scramble(pkt, thd->scramble, mpvio->acl_user->salt) ?
+             CR_ERROR : CR_OK;
+
+  inc_host_errors(mpvio->thd->main_security_ctx.ip);
+  my_error(ER_HANDSHAKE_ERROR, MYF(0), thd->main_security_ctx.host_or_ip);
+  return CR_ERROR;
+}
+
+
+static int old_password_authenticate(MYSQL_PLUGIN_VIO *vio, 
+                                     MYSQL_SERVER_AUTH_INFO *info)
+{
+  uchar *pkt;
+  int pkt_len;
+  MPVIO_EXT *mpvio=(MPVIO_EXT*)vio;
+  THD *thd=mpvio->thd;
+
+  /* generate the scramble, or reuse the old one */
+  if (thd->scramble[SCRAMBLE_LENGTH])
+  {
+    create_random_string(thd->scramble, SCRAMBLE_LENGTH, &thd->rand);
+    /* and send it to the client */
+    if (mpvio->write_packet(mpvio, (uchar*)thd->scramble, SCRAMBLE_LENGTH + 1))
+      return CR_ERROR;
+  }
+
+  /* read the reply and authenticate */
+  if ((pkt_len= mpvio->read_packet(mpvio, &pkt)) < 0)
+    return CR_ERROR;
+
+#ifdef NO_EMBEDDED_ACCESS_CHECKS
+  return CR_OK;
+#endif
+
+  /*
+    legacy: if switch_from_long_to_short_scramble,
+    the password is sent \0-terminated, the pkt_len is always 9 bytes.
+    We need to figure out the correct scramble length here.
+  */
+  if (pkt_len == SCRAMBLE_LENGTH_323+1)
+    pkt_len= strnlen((char*)pkt, pkt_len);
+
+  if (pkt_len == 0) /* no password */
+    return info->auth_string[0] ? CR_ERROR : CR_OK;
+
+  if (secure_auth(thd))
+    return CR_ERROR;
+
+  info->password_used = 1;
+
+  if (pkt_len == SCRAMBLE_LENGTH_323)
+    return info->auth_string[0] == 0 ||
+           check_scramble_323(pkt, thd->scramble,
+                             (ulong *)mpvio->acl_user->salt) ? CR_ERROR : CR_OK;
+
+  inc_host_errors(mpvio->thd->main_security_ctx.ip);
+  my_error(ER_HANDSHAKE_ERROR, MYF(0), thd->main_security_ctx.host_or_ip);
+  return CR_ERROR;
+}
+
+static struct st_mysql_auth native_password_handler=
+{
+  MYSQL_AUTHENTICATION_INTERFACE_VERSION,
+  native_password_plugin_name.str,
+  native_password_authenticate
+};
+
+static struct st_mysql_auth old_password_handler=
+{
+  MYSQL_AUTHENTICATION_INTERFACE_VERSION,
+  old_password_plugin_name.str,
+  old_password_authenticate
+};
+
+mysql_declare_plugin(mysql_password)
+{
+  MYSQL_AUTHENTICATION_PLUGIN,                  /* type constant    */
+  &native_password_handler,                     /* type descriptor  */
+  native_password_plugin_name.str,              /* Name             */
+  "R.J.Silk, Sergei Golubchik",                 /* Author           */
+  "Native MySQL authentication",                /* Description      */
+  PLUGIN_LICENSE_GPL,                           /* License          */
+  NULL,                                         /* Init function    */
+  NULL,                                         /* Deinit function  */
+  0x0100,                                       /* Version (1.0)    */
+  NULL,                                         /* status variables */
+  NULL,                                         /* system variables */
+  NULL                                          /* config options   */
+},
+{
+  MYSQL_AUTHENTICATION_PLUGIN,                  /* type constant    */
+  &old_password_handler,                        /* type descriptor  */
+  old_password_plugin_name.str,                 /* Name             */
+  "R.J.Silk, Sergei Golubchik",                 /* Author           */
+  "Old MySQL-4.0 authentication",               /* Description      */
+  PLUGIN_LICENSE_GPL,                           /* License          */
+  NULL,                                         /* Init function    */
+  NULL,                                         /* Deinit function  */
+  0x0100,                                       /* Version (1.0)    */
+  NULL,                                         /* status variables */
+  NULL,                                         /* system variables */
+  NULL                                          /* config options   */
+}
+mysql_declare_plugin_end;
+
+maria_declare_plugin(mysql_password)
+{
+  MYSQL_AUTHENTICATION_PLUGIN,                  /* type constant    */
+  &native_password_handler,                     /* type descriptor  */
+  native_password_plugin_name.str,              /* Name             */
+  "R.J.Silk, Sergei Golubchik",                 /* Author           */
+  "Native MySQL authentication",                /* Description      */
+  PLUGIN_LICENSE_GPL,                           /* License          */
+  NULL,                                         /* Init function    */
+  NULL,                                         /* Deinit function  */
+  0x0100,                                       /* Version (1.0)    */
+  NULL,                                         /* status variables */
+  NULL,                                         /* system variables */
+  "1.0",                                        /* String version   */
+  MariaDB_PLUGIN_MATURITY_BETA                  /* Maturity         */
+},
+{
+  MYSQL_AUTHENTICATION_PLUGIN,                  /* type constant    */
+  &old_password_handler,                        /* type descriptor  */
+  old_password_plugin_name.str,                 /* Name             */
+  "R.J.Silk, Sergei Golubchik",                 /* Author           */
+  "Old MySQL-4.0 authentication",               /* Description      */
+  PLUGIN_LICENSE_GPL,                           /* License          */
+  NULL,                                         /* Init function    */
+  NULL,                                         /* Deinit function  */
+  0x0100,                                       /* Version (1.0)    */
+  NULL,                                         /* status variables */
+  NULL,                                         /* system variables */
+  "1.0",                                        /* String version   */
+  MariaDB_PLUGIN_MATURITY_BETA                  /* Maturity         */
+}
+maria_declare_plugin_end;
diff --git a/sql/sql_acl.h b/sql/sql_acl.h
index c0b536b7740..2a62b605678 100644
--- a/sql/sql_acl.h
+++ b/sql/sql_acl.h
@@ -173,53 +173,6 @@ enum mysql_db_table_field
 extern const TABLE_FIELD_DEF mysql_db_table_def;
 extern bool mysql_user_table_is_in_short_password_format;
 
-/* Classes */
-
-struct acl_host_and_ip
-{
-  char *hostname;
-  long ip,ip_mask;                      // Used with masked ip:s
-};
-
-
-class ACL_ACCESS {
-public:
-  ulong sort;
-  ulong access;
-};
-
-
-/* ACL_HOST is used if no host is specified */
-
-class ACL_HOST :public ACL_ACCESS
-{
-public:
-  acl_host_and_ip host;
-  char *db;
-};
-
-
-class ACL_USER :public ACL_ACCESS
-{
-public:
-  acl_host_and_ip host;
-  uint hostname_length;
-  USER_RESOURCES user_resource;
-  char *user;
-  uint8 salt[SCRAMBLE_LENGTH+1];       // scrambled password in binary form
-  uint8 salt_len;        // 0 - no password, 4 - 3.20, 8 - 3.23, 20 - 4.1.1 
-  enum SSL_type ssl_type;
-  const char *ssl_cipher, *x509_issuer, *x509_subject;
-};
-
-
-class ACL_DB :public ACL_ACCESS
-{
-public:
-  acl_host_and_ip host;
-  char *user,*db;
-};
-
 /* prototypes */
 
 bool hostname_requires_resolving(const char *hostname);
@@ -228,10 +181,9 @@ my_bool acl_reload(THD *thd);
 void acl_free(bool end=0);
 ulong acl_get(const char *host, const char *ip,
 	      const char *user, const char *db, my_bool db_is_pattern);
-int acl_getroot(THD *thd, USER_RESOURCES *mqh, const char *passwd,
-                uint passwd_len);
-bool acl_getroot_no_password(Security_context *sctx, char *user, char *host,
-                             char *ip, char *db);
+bool acl_authenticate(THD *thd, uint connect_errors, uint com_change_user_pkt_len);
+bool acl_getroot(Security_context *sctx, char *user, char *host,
+                 char *ip, char *db);
 bool acl_check_host(const char *host, const char *ip);
 int check_change_password(THD *thd, const char *host, const char *user,
                            char *password, uint password_len);
diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc
index 1f96f1cf0e4..d21341fac08 100644
--- a/sql/sql_admin.cc
+++ b/sql/sql_admin.cc
@@ -29,6 +29,34 @@
 #include "sql_parse.h"                       // check_table_access
 #include "sql_admin.h"
 
+/* Prepare, run and cleanup for mysql_recreate_table() */
+
+static bool admin_recreate_table(THD *thd, TABLE_LIST *table_list)
+{
+  bool result_code;
+  DBUG_ENTER("admin_recreate_table");
+
+  trans_rollback_stmt(thd);
+  trans_rollback(thd);
+  close_thread_tables(thd);
+  thd->mdl_context.release_transactional_locks();
+  tmp_disable_binlog(thd); // binlogging is done by caller if wanted
+  result_code= mysql_recreate_table(thd, table);
+  reenable_binlog(thd);
+  /*
+    mysql_recreate_table() can push OK or ERROR.
+    Clear 'OK' status. If there is an error, keep it:
+    we will store the error message in a result set row 
+    and then clear.
+  */
+  if (thd->stmt_da->is_ok())
+    thd->stmt_da->reset_diagnostics_area();
+  table->table= NULL;
+  result_code= result_code ? HA_ADMIN_FAILED : HA_ADMIN_OK;
+  DBUG_RETURN(result_code);
+}
+
+
 static int send_check_errmsg(THD *thd, TABLE_LIST* table,
 			     const char* operator_name, const char* errmsg)
 
@@ -258,6 +286,7 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
   Protocol *protocol= thd->protocol;
   LEX *lex= thd->lex;
   int result_code;
+  bool need_repair_or_alter= 0;
   DBUG_ENTER("mysql_admin_table");
 
   field_list.push_back(item = new Item_empty_string("Table", NAME_CHAR_LEN*2));
@@ -276,7 +305,7 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
 
   for (table= tables; table; table= table->next_local)
   {
-    char table_name[NAME_LEN*2+2];
+    char table_name[SAFE_NAME_LEN*2+2];
     char* db = table->db;
     bool fatal_error=0;
     bool open_error;
@@ -519,28 +548,14 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
     if (operator_func == &handler::ha_repair &&
         !(check_opt->sql_flags & TT_USEFRM))
     {
-      if ((table->table->file->check_old_types() == HA_ADMIN_NEEDS_ALTER) ||
-          (table->table->file->ha_check_for_upgrade(check_opt) ==
-           HA_ADMIN_NEEDS_ALTER))
+      handler *file= table->table->file;
+      int check_old_types=   file->check_old_types();
+      int check_for_upgrade= file->ha_check_for_upgrade(check_opt);
+
+      if (check_old_types == HA_ADMIN_NEEDS_ALTER ||
+          check_for_upgrade == HA_ADMIN_NEEDS_ALTER)
       {
-        DBUG_PRINT("admin", ("recreating table"));
-        trans_rollback_stmt(thd);
-        trans_rollback(thd);
-        close_thread_tables(thd);
-        thd->mdl_context.release_transactional_locks();
-        tmp_disable_binlog(thd); // binlogging is done by caller if wanted
-        result_code= mysql_recreate_table(thd, table);
-        reenable_binlog(thd);
-        /*
-          mysql_recreate_table() can push OK or ERROR.
-          Clear 'OK' status. If there is an error, keep it:
-          we will store the error message in a result set row 
-          and then clear.
-        */
-        if (thd->stmt_da->is_ok())
-          thd->stmt_da->reset_diagnostics_area();
-        table->table= NULL;
-        result_code= result_code ? HA_ADMIN_FAILED : HA_ADMIN_OK;
+        result_code= admin_recreate_table(thd, table);
         goto send_result;
       }
     }
@@ -549,6 +564,14 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
     result_code = (table->table->file->*operator_func)(thd, check_opt);
     DBUG_PRINT("admin", ("operator_func returned: %d", result_code));
 
+    if (result_code == HA_ADMIN_NOT_IMPLEMENTED && need_repair_or_alter)
+    {
+      /*
+        repair was not implemented and we need to upgrade the table
+        to a new version so we recreate the table with ALTER TABLE
+      */
+      result_code= admin_recreate_table(thd, table);
+    }
 send_result:
 
     lex->cleanup_after_one_table_open();
@@ -642,37 +665,24 @@ send_result_message:
         reopen the table and do ha_innobase::analyze() on it.
         We have to end the row, so analyze could return more rows.
       */
-      trans_commit_stmt(thd);
-      trans_commit(thd);
-      close_thread_tables(thd);
-      thd->mdl_context.release_transactional_locks();
+      if (protocol->write())
+        goto err;
+      DBUG_PRINT("info", ("HA_ADMIN_TRY_ALTER, trying analyze..."));
       DEBUG_SYNC(thd, "ha_admin_try_alter");
       protocol->store(STRING_WITH_LEN("note"), system_charset_info);
       protocol->store(STRING_WITH_LEN(
           "Table does not support optimize, doing recreate + analyze instead"),
                       system_charset_info);
-      if (protocol->write())
-        goto err;
-      DBUG_PRINT("info", ("HA_ADMIN_TRY_ALTER, trying analyze..."));
       TABLE_LIST *save_next_local= table->next_local,
                  *save_next_global= table->next_global;
       table->next_local= table->next_global= 0;
-      tmp_disable_binlog(thd); // binlogging is done by caller if wanted
-      result_code= mysql_recreate_table(thd, table);
-      reenable_binlog(thd);
-      /*
-        mysql_recreate_table() can push OK or ERROR.
-        Clear 'OK' status. If there is an error, keep it:
-        we will store the error message in a result set row 
-        and then clear.
-      */
-      if (thd->stmt_da->is_ok())
-        thd->stmt_da->reset_diagnostics_area();
+      result_code= admin_recreate_table(thd, table);
+
       trans_commit_stmt(thd);
       trans_commit(thd);
       close_thread_tables(thd);
       thd->mdl_context.release_transactional_locks();
-      table->table= NULL;
+
       if (!result_code) // recreation went ok
       {
         /* Clear the ticket released above. */
diff --git a/sql/sql_analyse.cc b/sql/sql_analyse.cc
index 4b6756188dd..6dccd594588 100644
--- a/sql/sql_analyse.cc
+++ b/sql/sql_analyse.cc
@@ -246,7 +246,8 @@ bool test_if_number(NUM_INFO *info, const char *str, uint str_len)
       }
       DBUG_RETURN(0);
     }
-    for (str++; *(end - 1) == '0'; end--) ; // jump over zeros at the end
+    for (str++; *(end - 1) == '0'; end--)  // jump over zeros at the end
+      ;
     if (str == end)		     // number was something like '123.000'
     {
       char *endpos= (char*) str;
@@ -408,7 +409,7 @@ void field_real::add()
 
   if ((decs = decimals()) == NOT_FIXED_DEC)
   {
-    length= sprintf(buff, "%g", num);
+    length= my_sprintf(buff, (buff, "%g", num));
     if (rint(num) != num)
       max_notzero_dec_len = 1;
   }
@@ -419,7 +420,7 @@ void field_real::add()
     snprintf(buff, sizeof(buff)-1, "%-.*f", (int) decs, num);
     length = (uint) strlen(buff);
 #else
-    length= sprintf(buff, "%-.*f", (int) decs, num);
+    length= my_sprintf(buff, (buff, "%-.*f", (int) decs, num));
 #endif
 
     // We never need to check further than this
@@ -1006,9 +1007,9 @@ void field_decimal::get_opt_type(String *answer,
   my_decimal_set_zero(&zero);
   my_bool is_unsigned= (my_decimal_cmp(&zero, &min_arg) >= 0);
 
-  length= my_snprintf(buff, sizeof(buff), "DECIMAL(%d, %d)",
-                      (int) (max_length - (item->decimals ? 1 : 0)),
-                      item->decimals);
+  length= my_sprintf(buff, (buff, "DECIMAL(%d, %d)",
+                            (int) (max_length - (item->decimals ? 1 : 0)),
+                            item->decimals));
   if (is_unsigned)
     length= (uint) (strmov(buff+length, " UNSIGNED")- buff);
   answer->append(buff, length);
diff --git a/sql/sql_analyse.h b/sql/sql_analyse.h
index a1461247894..1ef299c5dbc 100644
--- a/sql/sql_analyse.h
+++ b/sql/sql_analyse.h
@@ -76,7 +76,7 @@ class field_info :public Sql_alloc
 protected:
   ulong   treemem, tree_elements, empty, nulls, min_length, max_length;
   uint	  room_in_tree;
-  my_bool found;
+  bool found;
   TREE	  tree;
   Item	  *item;
   analyse *pc;
diff --git a/sql/sql_array.h b/sql/sql_array.h
index dfaa9b02947..19ce4e2e62b 100644
--- a/sql/sql_array.h
+++ b/sql/sql_array.h
@@ -69,4 +69,74 @@ public:
   }
 };
 
+/* 
+  Array of pointers to Elem that uses memory from MEM_ROOT
+
+  MEM_ROOT has no realloc() so this is supposed to be used for cases when
+  reallocations are rare.
+*/
+
+template <class Elem> class Array
+{
+  enum {alloc_increment = 16};
+  Elem **buffer;
+  uint n_elements, max_element;
+public:
+  Array(MEM_ROOT *mem_root, uint prealloc=16)
+  {
+    buffer= (Elem**)alloc_root(mem_root, prealloc * sizeof(Elem**));
+    max_element = buffer? prealloc : 0;
+    n_elements= 0;
+  }
+
+  Elem& at(int idx)
+  {
+    return *(((Elem*)buffer) + idx);
+  }
+
+  Elem **front()
+  {
+    return buffer;
+  }
+
+  Elem **back()
+  {
+    return buffer + n_elements;
+  }
+
+  bool append(MEM_ROOT *mem_root, Elem *el)
+  {
+    if (n_elements == max_element)
+    {
+      Elem **newbuf;
+      if (!(newbuf= (Elem**)alloc_root(mem_root, (n_elements + alloc_increment)*
+                                                  sizeof(Elem**))))
+      {
+        return FALSE;
+      }
+      memcpy(newbuf, buffer, n_elements*sizeof(Elem*));
+      buffer= newbuf;
+    }
+    buffer[n_elements++]= el;
+    return FALSE;
+  }
+
+  int elements()
+  {
+    return n_elements;
+  }
+
+  void clear()
+  {
+    n_elements= 0;
+  }
+
+  typedef int (*CMP_FUNC)(Elem * const *el1, Elem *const *el2);
+
+  void sort(CMP_FUNC cmp_func)
+  {
+    my_qsort(buffer, n_elements, sizeof(Elem*), (qsort_cmp)cmp_func);
+  }
+};
+
 #endif /* SQL_ARRAY_INCLUDED */
diff --git a/sql/sql_base.cc b/sql/sql_base.cc
index c0ea60cac3b..af81246253b 100644
--- a/sql/sql_base.cc
+++ b/sql/sql_base.cc
@@ -253,8 +253,12 @@ static void check_unused(void)
 uint create_table_def_key(THD *thd, char *key, TABLE_LIST *table_list,
                           bool tmp_table)
 {
-  uint key_length= (uint) (strmov(strmov(key, table_list->db)+1,
-                                  table_list->table_name)-key)+1;
+  char *db_end= strnmov(key, table_list->db, MAX_DBKEY_LENGTH - 2);
+  *db_end++= '\0';
+  char *table_end= strnmov(db_end, table_list->table_name,
+                           key + MAX_DBKEY_LENGTH - 1 - db_end);
+  *table_end++= '\0';
+  uint key_length= (uint) (table_end-key);
   if (tmp_table)
   {
     int4store(key + key_length, thd->server_id);
@@ -616,7 +620,7 @@ get_table_share_with_discover(THD *thd, TABLE_LIST *table_list,
 {
   TABLE_SHARE *share;
   bool exists;
-  DBUG_ENTER("get_table_share_with_create");
+  DBUG_ENTER("get_table_share_with_discover");
 
   share= get_table_share(thd, table_list, key, key_length, db_flags, error,
                          hash_value);
@@ -754,7 +758,7 @@ void release_table_share(TABLE_SHARE *share)
 
 TABLE_SHARE *get_cached_table_share(const char *db, const char *table_name)
 {
-  char key[NAME_LEN*2+2];
+  char key[SAFE_NAME_LEN*2+2];
   TABLE_LIST table_list;
   uint key_length;
   mysql_mutex_assert_owner(&LOCK_open);
@@ -1528,9 +1532,12 @@ bool close_thread_table(THD *thd, TABLE **table_ptr)
   bool found_old_table= 0;
   TABLE *table= *table_ptr;
   DBUG_ENTER("close_thread_table");
+  DBUG_PRINT("tcache", ("table: '%s'.'%s' 0x%lx", table->s->db.str,
+                        table->s->table_name.str, (long) table));
   DBUG_ASSERT(table->key_read == 0);
   DBUG_ASSERT(!table->file || table->file->inited == handler::NONE);
   mysql_mutex_assert_not_owner(&LOCK_open);
+
   /*
     The metadata lock must be released after giving back
     the table to the table cache.
@@ -1541,6 +1548,12 @@ bool close_thread_table(THD *thd, TABLE **table_ptr)
                                              MDL_SHARED));
   table->mdl_ticket= NULL;
 
+  if (table->file)
+  {
+    table->file->update_global_table_stats();
+    table->file->update_global_index_stats();
+  }
+
   mysql_mutex_lock(&thd->LOCK_thd_data);
   *table_ptr=table->next;
   mysql_mutex_unlock(&thd->LOCK_thd_data);
@@ -2146,6 +2159,13 @@ void close_temporary(TABLE *table, bool free_share, bool delete_table)
   DBUG_PRINT("tmptable", ("closing table: '%s'.'%s'",
                           table->s->db.str, table->s->table_name.str));
 
+  /* in_use is not set for replication temporary tables during shutdown */
+  if (table->in_use)
+  {
+    table->file->update_global_table_stats();
+    table->file->update_global_index_stats();
+  }
+
   free_io_cache(table);
   closefrm(table, 0);
   if (delete_table)
@@ -2255,7 +2275,6 @@ void drop_open_table(THD *thd, TABLE *table, const char *db_name,
     DBUG_ASSERT(table == thd->open_tables);
 
     handlerton *table_type= table->s->db_type();
-
     table->file->extra(HA_EXTRA_PREPARE_FOR_DROP);
     close_thread_table(thd, &thd->open_tables);
     /* Remove the table share from the table cache. */
@@ -3045,12 +3064,13 @@ retry_share:
   table->tablenr=thd->current_tablenr++;
   table->used_fields=0;
   table->const_table=0;
-  table->null_row= table->maybe_null= 0;
+  table->null_row= 0;
+  table->maybe_null= 0;
   table->force_index= table->force_index_order= table->force_index_group= 0;
   table->status=STATUS_NO_RECORD;
   table->insert_values= 0;
   table->fulltext_searched= 0;
-  table->file->ft_handler= 0;
+  table->file->ha_start_of_new_statement();
   table->reginfo.impossible_range= 0;
   /* Catch wrong handling of the auto_increment_field_not_null. */
   DBUG_ASSERT(!table->auto_increment_field_not_null);
@@ -3061,6 +3081,11 @@ retry_share:
   table_list->updatable= 1; // It is not derived table nor non-updatable VIEW
   table->clear_column_bitmaps();
   table_list->table= table;
+  /*
+    Fill record with random values to find bugs where we access fields
+    without first reading them.
+  */
+  TRASH(table->record[0], table->s->reclength);
   DBUG_ASSERT(table->key_read == 0);
   /* Tables may be reused in a sub statement. */
   DBUG_ASSERT(! table->file->extra(HA_EXTRA_IS_ATTACHED_CHILDREN));
@@ -5852,6 +5877,9 @@ static void update_field_dependencies(THD *thd, Field *field, TABLE *table)
     table->covering_keys.intersect(field->part_of_key);
     table->merge_keys.merge(field->part_of_key);
 
+    if (field->vcol_info)
+      table->mark_virtual_col(field);
+
     if (thd->mark_used_columns == MARK_COLUMNS_READ)
       bitmap= table->read_set;
     else
@@ -6079,6 +6107,7 @@ find_field_in_natural_join(THD *thd, TABLE_LIST *table_ref, const char *name,
   {
     /* This is a base table. */
     DBUG_ASSERT(nj_col->view_field == NULL);
+    Item *ref= 0;
     /*
       This fix_fields is not necessary (initially this item is fixed by
       the Item_field constructor; after reopen_tables the Item_func_eq
@@ -6086,12 +6115,13 @@ find_field_in_natural_join(THD *thd, TABLE_LIST *table_ref, const char *name,
       reopening for columns that was dropped by the concurrent connection.
     */
     if (!nj_col->table_field->fixed &&
-        nj_col->table_field->fix_fields(thd, (Item **)&nj_col->table_field))
+        nj_col->table_field->fix_fields(thd, &ref))
     {
       DBUG_PRINT("info", ("column '%s' was dropped by the concurrent connection",
                           nj_col->table_field->name));
       DBUG_RETURN(NULL);
     }
+    DBUG_ASSERT(ref == 0);                      // Should not have changed
     DBUG_ASSERT(nj_col->table_ref->table == nj_col->table_field->field->table);
     found_field= nj_col->table_field->field;
     update_field_dependencies(thd, found_field, nj_col->table_ref->table);
@@ -6457,7 +6487,7 @@ find_field_in_tables(THD *thd, Item_ident *item,
   const char *table_name= item->table_name;
   const char *name= item->field_name;
   uint length=(uint) strlen(name);
-  char name_buff[NAME_LEN+1];
+  char name_buff[SAFE_NAME_LEN+1];
   TABLE_LIST *cur_table= first_table;
   TABLE_LIST *actual_table;
   bool allow_rowid;
@@ -6514,12 +6544,21 @@ find_field_in_tables(THD *thd, Item_ident *item,
           sub query as dependent on the outer query
         */
         if (current_sel != last_select)
+        {
           mark_select_range_as_dependent(thd, last_select, current_sel,
                                          found, *ref, item);
+          if (item->can_be_depended)
+          {
+            DBUG_ASSERT((*ref) == (Item*)item);
+            current_sel->register_dependency_item(last_select, ref);
+          }
+        }
       }
       return found;
     }
   }
+  else
+    item->can_be_depended= TRUE;
 
   if (db && lower_case_table_names)
   {
@@ -6614,7 +6653,7 @@ find_field_in_tables(THD *thd, Item_ident *item,
       (report_error == REPORT_ALL_ERRORS ||
        report_error == REPORT_EXCEPT_NON_UNIQUE))
   {
-    char buff[NAME_LEN*2 + 2];
+    char buff[SAFE_NAME_LEN*2 + 2];
     if (db && db[0])
     {
       strxnmov(buff,sizeof(buff)-1,db,".",table_name,NullS);
@@ -7648,6 +7687,9 @@ int setup_wild(THD *thd, TABLE_LIST *tables, List<Item> &fields,
     /* make * substituting permanent */
     SELECT_LEX *select_lex= thd->lex->current_select;
     select_lex->with_wild= 0;
+#ifdef HAVE_valgrind
+    if (&select_lex->item_list != &fields)      // Avoid warning
+#endif
     /*   
       The assignment below is translated to memcpy() call (at least on some
       platforms). memcpy() expects that source and destination areas do not
@@ -7997,7 +8039,7 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name,
 {
   Field_iterator_table_ref field_iterator;
   bool found;
-  char name_buff[NAME_LEN+1];
+  char name_buff[SAFE_NAME_LEN+1];
   DBUG_ENTER("insert_fields");
   DBUG_PRINT("arena", ("stmt arena: 0x%lx", (ulong)thd->stmt_arena));
 
@@ -8033,7 +8075,7 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name,
     DBUG_ASSERT(tables->is_leaf_for_name_resolution());
 
     if ((table_name && my_strcasecmp(table_alias_charset, table_name,
-                                    tables->alias)) ||
+                                     tables->alias)) ||
         (db_name && strcmp(tables->db,db_name)))
       continue;
 
@@ -8145,6 +8187,12 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name,
       {
         /* Mark fields as used to allow storage engine to optimze access */
         bitmap_set_bit(field->table->read_set, field->field_index);
+        /*
+          Mark virtual fields for write and others that the virtual fields
+          depend on for read.
+        */
+        if (field->vcol_info)
+          field->table->mark_virtual_col(field);
         if (table)
         {
           table->covering_keys.intersect(field->part_of_key);
@@ -8224,6 +8272,7 @@ int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves,
 {
   SELECT_LEX *select_lex= thd->lex->current_select;
   TABLE_LIST *table= NULL;	// For HP compilers
+  TABLE_LIST *save_emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest;
   /*
     it_is_update set to TRUE when tables of primary SELECT_LEX (SELECT_LEX
     which belong to LEX, i.e. most up SELECT) will be updated by
@@ -8250,13 +8299,19 @@ int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves,
       goto err_no_arena;
   }
 
+  thd->thd_marker.emb_on_expr_nest= (TABLE_LIST*)1;
   if (*conds)
   {
     thd->where="where clause";
+    DBUG_EXECUTE("where",
+                 print_where(*conds,
+                             "WHERE in setup_conds",
+                             QT_ORDINARY););
     if ((!(*conds)->fixed && (*conds)->fix_fields(thd, conds)) ||
 	(*conds)->check_cols(1))
       goto err_no_arena;
   }
+  thd->thd_marker.emb_on_expr_nest= save_emb_on_expr_nest;
 
   /*
     Apply fix_fields() to all ON clauses at all levels of nesting,
@@ -8272,9 +8327,10 @@ int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves,
       if (embedded->on_expr)
       {
         /* Make a join an a expression */
+        thd->thd_marker.emb_on_expr_nest= embedded;
         thd->where="on clause";
         if ((!embedded->on_expr->fixed &&
-            embedded->on_expr->fix_fields(thd, &embedded->on_expr)) ||
+             embedded->on_expr->fix_fields(thd, &embedded->on_expr)) ||
 	    embedded->on_expr->check_cols(1))
 	  goto err_no_arena;
         select_lex->cond_count++;
@@ -8296,6 +8352,7 @@ int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves,
       }
     }
   }
+  thd->thd_marker.emb_on_expr_nest= save_emb_on_expr_nest;
 
   if (!thd->stmt_arena->is_conventional())
   {
@@ -8350,7 +8407,10 @@ fill_record(THD * thd, List<Item> &fields, List<Item> &values,
   Item *value, *fld;
   Item_field *field;
   TABLE *table= 0;
+  List<TABLE> tbl_list;
+  bool abort_on_warning_saved= thd->abort_on_warning;
   DBUG_ENTER("fill_record");
+  tbl_list.empty();
 
   /*
     Reset the table->auto_increment_field_not_null as it is valid for
@@ -8372,6 +8432,8 @@ fill_record(THD * thd, List<Item> &fields, List<Item> &values,
     table->auto_increment_field_not_null= FALSE;
     f.rewind();
   }
+  else if (thd->lex->unit.insert_table_with_stored_vcol)
+    tbl_list.push_back(thd->lex->unit.insert_table_with_stored_vcol);
   while ((fld= f++))
   {
     if (!(field= fld->filed_for_view_update()))
@@ -8384,14 +8446,54 @@ fill_record(THD * thd, List<Item> &fields, List<Item> &values,
     table= rfield->table;
     if (rfield == table->next_number_field)
       table->auto_increment_field_not_null= TRUE;
+    if (rfield->vcol_info && 
+        value->type() != Item::DEFAULT_VALUE_ITEM && 
+        value->type() != Item::NULL_ITEM &&
+        table->s->table_category != TABLE_CATEGORY_TEMPORARY)
+    {
+      thd->abort_on_warning= FALSE;
+      push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                          ER_WARNING_NON_DEFAULT_VALUE_FOR_VIRTUAL_COLUMN,
+                          ER(ER_WARNING_NON_DEFAULT_VALUE_FOR_VIRTUAL_COLUMN),
+                          rfield->field_name, table->s->table_name.str);
+      thd->abort_on_warning= abort_on_warning_saved;
+    }
     if ((value->save_in_field(rfield, 0) < 0) && !ignore_errors)
     {
       my_message(ER_UNKNOWN_ERROR, ER(ER_UNKNOWN_ERROR), MYF(0));
       goto err;
     }
+    tbl_list.push_back(table);
   }
+  /* Update virtual fields*/
+  thd->abort_on_warning= FALSE;
+  if (tbl_list.head())
+  {
+    List_iterator_fast<TABLE> it(tbl_list);
+    TABLE *prev_table= 0;
+    while ((table= it++))
+    {
+      /*
+        Do simple optimization to prevent unnecessary re-generating 
+        values for virtual fields
+      */
+      if (table != prev_table)
+      {
+        prev_table= table;
+        if (table->vfield)
+        {
+          if (update_virtual_fields(thd, table, TRUE))
+          {
+            goto err;
+          }
+        }
+      }
+    }
+  }
+  thd->abort_on_warning= abort_on_warning_saved;
   DBUG_RETURN(thd->is_error());
 err:
+  thd->abort_on_warning= abort_on_warning_saved;
   if (table)
     table->auto_increment_field_not_null= FALSE;
   DBUG_RETURN(TRUE);
@@ -8427,9 +8529,31 @@ fill_record_n_invoke_before_triggers(THD *thd, List<Item> &fields,
                                      Table_triggers_list *triggers,
                                      enum trg_event_type event)
 {
-  return (fill_record(thd, fields, values, ignore_errors) ||
-          (triggers && triggers->process_triggers(thd, event,
-                                                 TRG_ACTION_BEFORE, TRUE)));
+  bool result;
+  result= (fill_record(thd, fields, values, ignore_errors) ||
+           (triggers && triggers->process_triggers(thd, event,
+                                                   TRG_ACTION_BEFORE, TRUE)));
+  /*
+    Re-calculate virtual fields to cater for cases when base columns are
+    updated by the triggers.
+  */
+  if (!result && triggers)
+  {
+    TABLE *table= 0;
+    List_iterator_fast<Item> f(fields);
+    Item *fld;
+    Item_field *item_field;
+    if (fields.elements)
+    {
+      fld= (Item_field*)f++;
+      item_field= fld->filed_for_view_update();
+      if (item_field && item_field->field &&
+          (table= item_field->field->table) &&
+        table->vfield)
+        result= update_virtual_fields(thd, table, TRUE);
+    }
+  }
+  return result;
 }
 
 
@@ -8442,6 +8566,7 @@ fill_record_n_invoke_before_triggers(THD *thd, List<Item> &fields,
     ptr           pointer on pointer to record
     values        list of fields
     ignore_errors TRUE if we should ignore errors
+    use_value     forces usage of value of the items instead of result
 
   NOTE
     fill_record() may set table->auto_increment_field_not_null and a
@@ -8454,14 +8579,18 @@ fill_record_n_invoke_before_triggers(THD *thd, List<Item> &fields,
 */
 
 bool
-fill_record(THD *thd, Field **ptr, List<Item> &values, bool ignore_errors)
+fill_record(THD *thd, Field **ptr, List<Item> &values, bool ignore_errors,
+            bool use_value)
 {
   List_iterator_fast<Item> v(values);
+  List<TABLE> tbl_list;
   Item *value;
   TABLE *table= 0;
+  bool abort_on_warning_saved= thd->abort_on_warning;
   DBUG_ENTER("fill_record");
 
   Field *field;
+  tbl_list.empty();
   /*
     Reset the table->auto_increment_field_not_null as it is valid for
     only one row.
@@ -8481,12 +8610,55 @@ fill_record(THD *thd, Field **ptr, List<Item> &values, bool ignore_errors)
     table= field->table;
     if (field == table->next_number_field)
       table->auto_increment_field_not_null= TRUE;
-    if (value->save_in_field(field, 0) < 0)
-      goto err;
+    if (field->vcol_info && 
+        value->type() != Item::DEFAULT_VALUE_ITEM && 
+        value->type() != Item::NULL_ITEM &&
+        table->s->table_category != TABLE_CATEGORY_TEMPORARY)
+    {
+      thd->abort_on_warning= FALSE;
+      push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                          ER_WARNING_NON_DEFAULT_VALUE_FOR_VIRTUAL_COLUMN,
+                          ER(ER_WARNING_NON_DEFAULT_VALUE_FOR_VIRTUAL_COLUMN),
+                          field->field_name, table->s->table_name.str);
+      thd->abort_on_warning= abort_on_warning_saved;
+    }
+    if (use_value)
+      value->save_val(field);
+    else
+      if (value->save_in_field(field, 0) < 0)
+        goto err;
+    tbl_list.push_back(table);
   }
+  /* Update virtual fields*/
+  thd->abort_on_warning= FALSE;
+  if (tbl_list.head())
+  {
+    List_iterator_fast<TABLE> t(tbl_list);
+    TABLE *prev_table= 0;
+    while ((table= t++))
+    {
+      /*
+        Do simple optimization to prevent unnecessary re-generating 
+        values for virtual fields
+      */
+      if (table != prev_table)
+      {
+        prev_table= table;
+        if (table->vfield)
+        {
+          if (update_virtual_fields(thd, table, TRUE))
+          {
+            goto err;
+          }
+        }
+      }
+    }
+  }
+  thd->abort_on_warning= abort_on_warning_saved;
   DBUG_RETURN(thd->is_error());
 
 err:
+  thd->abort_on_warning= abort_on_warning_saved;
   if (table)
     table->auto_increment_field_not_null= FALSE;
   DBUG_RETURN(TRUE);
@@ -8522,9 +8694,22 @@ fill_record_n_invoke_before_triggers(THD *thd, Field **ptr,
                                      Table_triggers_list *triggers,
                                      enum trg_event_type event)
 {
-  return (fill_record(thd, ptr, values, ignore_errors) ||
-          (triggers && triggers->process_triggers(thd, event,
-                                                 TRG_ACTION_BEFORE, TRUE)));
+  bool result;
+  result= (fill_record(thd, ptr, values, ignore_errors, FALSE) ||
+           (triggers && triggers->process_triggers(thd, event,
+                                                   TRG_ACTION_BEFORE, TRUE)));
+  /*
+    Re-calculate virtual fields to cater for cases when base columns are
+    updated by the triggers.
+  */
+  if (!result && triggers && *ptr)
+  {
+    TABLE *table= (*ptr)->table;
+    if (table->vfield)
+      result= update_virtual_fields(thd, table, TRUE);
+  }
+  return result;
+
 }
 
 
@@ -8569,7 +8754,7 @@ my_bool mysql_rm_tmp_tables(void)
         uint filePath_len= my_snprintf(filePath, sizeof(filePath),
                                        "%s%c%s", tmpdir, FN_LIBCHAR,
                                        file->name);
-        if (!memcmp(reg_ext, ext, ext_len))
+        if (!strcmp(reg_ext, ext))
         {
           handler *handler_file= 0;
           /* We should cut file extention before deleting of table */
@@ -8621,6 +8806,8 @@ void tdc_flush_unused_tables()
     free_cache_entry(unused_tables);
   mysql_mutex_unlock(&LOCK_open);
 }
+#error restore table->s->deleting
+#error restore changes from monty@askmonty.org-20101102152257-mwa7etvs9nxewjf2
 
 
 /**
diff --git a/sql/sql_base.h b/sql/sql_base.h
index 2e4554313e5..2f365771416 100644
--- a/sql/sql_base.h
+++ b/sql/sql_base.h
@@ -182,7 +182,7 @@ bool setup_fields(THD *thd, Item** ref_pointer_array,
                   List<Item> &item, enum_mark_columns mark_used_columns,
                   List<Item> *sum_func_list, bool allow_sum_func);
 bool fill_record(THD *thd, Field **field, List<Item> &values,
-                 bool ignore_errors);
+                 bool ignore_errors, bool use_value);
 
 Field *
 find_field_in_tables(THD *thd, Item_ident *item,
@@ -289,6 +289,7 @@ TABLE *find_table_for_mdl_upgrade(TABLE *list, const char *db,
                                   bool no_error);
 void mark_tmp_table_for_reuse(TABLE *table);
 bool check_if_table_exists(THD *thd, TABLE_LIST *table, bool *exists);
+int update_virtual_fields(THD *thd, TABLE *table, bool ignore_stored= FALSE);
 
 extern TABLE *unused_tables;
 extern Item **not_found_item;
diff --git a/sql/sql_binlog.cc b/sql/sql_binlog.cc
index 6f59c3f51e1..76c6e0e058a 100644
--- a/sql/sql_binlog.cc
+++ b/sql/sql_binlog.cc
@@ -76,7 +76,7 @@ void mysql_client_binlog_statement(THD* thd)
   if (!rli)
   {
     rli= thd->rli_fake= new Relay_log_info(FALSE);
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
     rli->is_fake= TRUE;
 #endif
     have_fd_event= FALSE;
@@ -112,7 +112,7 @@ void mysql_client_binlog_statement(THD* thd)
     char const *endptr= 0;
     int bytes_decoded= base64_decode(strptr, coded_len, buf, &endptr);
 
-#ifndef HAVE_purify
+#ifndef HAVE_valgrind
       /*
         This debug printout should not be used for valgrind builds
         since it will read from unassigned memory.
diff --git a/sql/sql_bitmap.h b/sql/sql_bitmap.h
index 54a207c8adf..cd3442d306c 100644
--- a/sql/sql_bitmap.h
+++ b/sql/sql_bitmap.h
@@ -64,13 +64,13 @@ public:
   }
   void subtract(Bitmap& map2) { bitmap_subtract(&map, &map2.map); }
   void merge(Bitmap& map2) { bitmap_union(&map, &map2.map); }
-  my_bool is_set(uint n) const { return bitmap_is_set(&map, n); }
-  my_bool is_prefix(uint n) const { return bitmap_is_prefix(&map, n); }
-  my_bool is_clear_all() const { return bitmap_is_clear_all(&map); }
-  my_bool is_set_all() const { return bitmap_is_set_all(&map); }
-  my_bool is_subset(const Bitmap& map2) const { return bitmap_is_subset(&map, &map2.map); }
-  my_bool is_overlapping(const Bitmap& map2) const { return bitmap_is_overlapping(&map, &map2.map); }
-  my_bool operator==(const Bitmap& map2) const { return bitmap_cmp(&map, &map2.map); }
+  bool is_set(uint n) const { return bitmap_is_set(&map, n); }
+  bool is_prefix(uint n) const { return bitmap_is_prefix(&map, n); }
+  bool is_clear_all() const { return bitmap_is_clear_all(&map); }
+  bool is_set_all() const { return bitmap_is_set_all(&map); }
+  bool is_subset(const Bitmap& map2) const { return bitmap_is_subset(&map, &map2.map); }
+  bool is_overlapping(const Bitmap& map2) const { return bitmap_is_overlapping(&map, &map2.map); }
+  bool operator==(const Bitmap& map2) const { return bitmap_cmp(&map, &map2.map); }
   char *print(char *buf) const
   {
     char *s=buf;
@@ -97,6 +97,34 @@ public:
   }
 };
 
+/* An iterator to quickly walk over bits in unlonglong bitmap. */
+class Table_map_iterator
+{
+  ulonglong bmp;
+  uint no;
+public:
+  Table_map_iterator(ulonglong t) : bmp(t), no(0) {}
+  int next_bit()
+  {
+    static const char last_bit[16]= {32, 0, 1, 0, 
+                                      2, 0, 1, 0, 
+                                      3, 0, 1, 0,
+                                      2, 0, 1, 0};
+    uint bit;
+    while ((bit= last_bit[bmp & 0xF]) == 32)
+    {
+      no += 4;
+      bmp= bmp >> 4;
+      if (!bmp)
+        return BITMAP_END;
+    }
+    bmp &= ~(1LL << bit);
+    return no + bit;
+  }
+  int operator++(int) { return next_bit(); }
+  enum { BITMAP_END= 64 };
+};
+
 template <> class Bitmap<64>
 {
   ulonglong map;
@@ -122,15 +150,20 @@ public:
   void intersect_extended(ulonglong map2) { map&= map2; }
   void subtract(Bitmap<64>& map2) { map&= ~map2.map; }
   void merge(Bitmap<64>& map2) { map|= map2.map; }
-  my_bool is_set(uint n) const { return test(map & (((ulonglong)1) << n)); }
-  my_bool is_prefix(uint n) const { return map == (((ulonglong)1) << n)-1; }
-  my_bool is_clear_all() const { return map == (ulonglong)0; }
-  my_bool is_set_all() const { return map == ~(ulonglong)0; }
-  my_bool is_subset(const Bitmap<64>& map2) const { return !(map & ~map2.map); }
-  my_bool is_overlapping(const Bitmap<64>& map2) const { return (map & map2.map)!= 0; }
-  my_bool operator==(const Bitmap<64>& map2) const { return map == map2.map; }
-  char *print(char *buf) const { longlong2str(map,buf,16); return buf; }
+  bool is_set(uint n) const { return test(map & (((ulonglong)1) << n)); }
+  bool is_prefix(uint n) const { return map == (((ulonglong)1) << n)-1; }
+  bool is_clear_all() const { return map == (ulonglong)0; }
+  bool is_set_all() const { return map == ~(ulonglong)0; }
+  bool is_subset(const Bitmap<64>& map2) const { return !(map & ~map2.map); }
+  bool is_overlapping(const Bitmap<64>& map2) const { return (map & map2.map)!= 0; }
+  bool operator==(const Bitmap<64>& map2) const { return map == map2.map; }
+  char *print(char *buf) const { longlong2str(map,buf,16,1); return buf; }
   ulonglong to_ulonglong() const { return map; }
+  class Iterator : public Table_map_iterator
+  {
+  public:
+    Iterator(Bitmap<64> &bmp) : Table_map_iterator(bmp.map) {}
+  };
 };
 
 
diff --git a/sql/sql_builtin.cc.in b/sql/sql_builtin.cc.in
index d9d9b610baf..c4b8531fc5a 100644
--- a/sql/sql_builtin.cc.in
+++ b/sql/sql_builtin.cc.in
@@ -13,24 +13,27 @@
    along with this program; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */
 
+#include <my_global.h>
 #include <mysql/plugin.h>
 
-typedef struct st_mysql_plugin builtin_plugin[];
+typedef struct st_maria_plugin builtin_maria_plugin[];
 
 #ifdef _MSC_VER
 extern "C"
 #else
 extern
 #endif
-builtin_plugin 
-  @mysql_mandatory_plugins@ @mysql_optional_plugins@ builtin_binlog_plugin;
+builtin_maria_plugin 
+  @mysql_mandatory_plugins@ @mysql_optional_plugins@
+  builtin_maria_binlog_plugin, builtin_maria_mysql_password_plugin;
 
-struct st_mysql_plugin *mysql_optional_plugins[]=
+struct st_maria_plugin *mysql_optional_plugins[]=
 {
   @mysql_optional_plugins@ 0
 };
 
-struct st_mysql_plugin *mysql_mandatory_plugins[]=
+struct st_maria_plugin *mysql_mandatory_plugins[]=
 {
-  builtin_binlog_plugin, @mysql_mandatory_plugins@ 0
+  builtin_maria_binlog_plugin, builtin_maria_mysql_password_plugin,
+  @mysql_mandatory_plugins@ 0
 };
diff --git a/sql/sql_cache.cc b/sql/sql_cache.cc
index b57c851edf4..a536c5b35b4 100644
--- a/sql/sql_cache.cc
+++ b/sql/sql_cache.cc
@@ -381,9 +381,11 @@ TODO list:
 
 static void debug_wait_for_kill(const char *info)
 {
-  DBUG_ENTER("debug_wait_for_kill");
   const char *prev_info;
   THD *thd;
+  char buff[1024];
+  DBUG_ENTER("debug_wait_for_kill");
+
   thd= current_thd;
   prev_info= thd->proc_info;
   thd->proc_info= info;
@@ -391,8 +393,16 @@ static void debug_wait_for_kill(const char *info)
   while(!thd->killed)
     my_sleep(1000);
   thd->killed= THD::NOT_KILLED;
+  /*
+    Remove the set debug variable, to ensure we don't get stuck on it again
+    This is needed as for MyISAM, invalidate_table() may be called twice
+    (Once from mysql_delete() and once from mi_update_status())
+  */
+  sprintf(buff, "-d,%s", info);
+  DBUG_SET(buff);
   sql_print_information("Exit debug_wait_for_kill");
   thd->proc_info= prev_info;
+
   DBUG_VOID_RETURN;
 }
 
@@ -725,7 +735,7 @@ inline void Query_cache_query::lock_writing()
   remove it.
 */
 
-my_bool Query_cache_query::try_lock_writing()
+bool Query_cache_query::try_lock_writing()
 {
   DBUG_ENTER("Query_cache_block::try_lock_writing");
   if (mysql_rwlock_trywrlock(&lock) != 0)
@@ -926,8 +936,8 @@ Query_cache::insert(Query_cache_tls *query_cache_tls,
 void
 Query_cache::abort(Query_cache_tls *query_cache_tls)
 {
+  THD *thd;
   DBUG_ENTER("query_cache_abort");
-  THD *thd= current_thd;
 
   /* See the comment on double-check locking usage above. */
   if (is_disabled() || query_cache_tls->first_query_block == NULL)
@@ -935,6 +945,7 @@ Query_cache::abort(Query_cache_tls *query_cache_tls)
 
   if (try_lock())
     DBUG_VOID_RETURN;
+  }
 
   /*
     While we were waiting another thread might have changed the status
@@ -943,6 +954,7 @@ Query_cache::abort(Query_cache_tls *query_cache_tls)
   Query_cache_block *query_block= query_cache_tls->first_query_block;
   if (query_block)
   {
+    thd= current_thd;
     thd_proc_info(thd, "storing result in query cache");
     DUMP(this);
     BLOCK_LOCK_WR(query_block);
@@ -985,6 +997,7 @@ void Query_cache::end_of_result(THD *thd)
 
   if (try_lock())
     DBUG_VOID_RETURN;
+  }
 
   query_block= query_cache_tls->first_query_block;
   if (query_block)
@@ -1724,6 +1737,7 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
 
   thd->limit_found_rows = query->found_rows();
   thd->status_var.last_query_cost= 0.0;
+  thd->query_plan_flags= (thd->query_plan_flags & ~QPLAN_QC_NO) | QPLAN_QC;
   if (!thd->stmt_da->is_set())
     thd->stmt_da->disable_status();
 
@@ -1735,6 +1749,10 @@ err_unlock:
   unlock();
 err:
   MYSQL_QUERY_CACHE_MISS(thd->query());
+  /*
+    query_plan_flags doesn't have to be changed here as it contains
+    QPLAN_QC_NO by default
+  */
   DBUG_RETURN(0);				// Query was not cached
 }
 
diff --git a/sql/sql_cache.h b/sql/sql_cache.h
index 792ae6ba960..5a6c8e25c77 100644
--- a/sql/sql_cache.h
+++ b/sql/sql_cache.h
@@ -138,7 +138,7 @@ struct Query_cache_block
   block_type type;
   TABLE_COUNTER_TYPE n_tables;			// number of tables in query
 
-  inline my_bool is_free(void) { return type == FREE; }
+  inline bool is_free(void) { return type == FREE; }
   void init(ulong length);
   void destroy();
   inline uint headers_len();
@@ -179,7 +179,7 @@ struct Query_cache_query
   }
   void lock_writing();
   void lock_reading();
-  my_bool try_lock_writing();
+  bool try_lock_writing();
   void unlock_writing();
   void unlock_reading();
 };
@@ -332,7 +332,7 @@ protected:
   
   uint mem_bin_num, mem_bin_steps;		// See at init_cache & find_bin
 
-  my_bool initialized;
+  bool initialized;
 
   /* Exclude/include from cyclic double linked list */
   static void double_linked_list_exclude(Query_cache_block *point,
diff --git a/sql/sql_callback.h b/sql/sql_callback.h
index 430514d3d7e..834d8b74c7a 100644
--- a/sql/sql_callback.h
+++ b/sql/sql_callback.h
@@ -31,12 +31,9 @@
  */
 
 #define MYSQL_CALLBACK(OBJ, FUNC, PARAMS)         \
-  do {                                            \
-    if ((OBJ) && ((OBJ)->FUNC))                   \
-      (OBJ)->FUNC PARAMS;                         \
-  } while (0)
+  (((OBJ) && ((OBJ)->FUNC)) ? (OBJ)->FUNC PARAMS : 0)
 
-#define MYSQL_CALLBACK_ELSE(OBJ, FUNC, PARAMS, ELSE)    \
+#define MYSQL_CALLBACK_ELSE0(OBJ, FUNC, PARAMS, ELSE)    \
   (((OBJ) && ((OBJ)->FUNC)) ? (OBJ)->FUNC PARAMS : (ELSE))
 
 
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index f65e5c05826..2fcb6120076 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -55,6 +55,7 @@
 #include "sp_rcontext.h"
 #include "sp_cache.h"
 #include "transaction.h"
+#include "sql_select.h" /* declares create_tmp_table() */
 #include "debug_sync.h"
 #include "sql_parse.h"                          // is_update_query
 #include "sql_callback.h"
@@ -123,6 +124,7 @@ Key::Key(const Key &rhs, MEM_ROOT *mem_root)
   key_create_info(rhs.key_create_info),
   columns(rhs.columns, mem_root),
   name(rhs.name),
+  option_list(rhs.option_list),
   generated(rhs.generated)
 {
   list_copy_and_replace_each_value(columns, mem_root);
@@ -212,6 +214,59 @@ bool foreign_key_prefix(Key *a, Key *b)
 #endif
 }
 
+/*
+  @brief
+  Check if the foreign key options are compatible with the specification
+  of the columns on which the key is created
+
+  @retval
+    FALSE   The foreign key options are compatible with key columns
+  @retval
+    TRUE    Otherwise
+*/
+bool Foreign_key::validate(List<Create_field> &table_fields)
+{
+  Create_field  *sql_field;
+  Key_part_spec *column;
+  List_iterator<Key_part_spec> cols(columns);
+  List_iterator<Create_field> it(table_fields);
+  DBUG_ENTER("Foreign_key::validate");
+  while ((column= cols++))
+  {
+    it.rewind();
+    while ((sql_field= it++) &&
+           my_strcasecmp(system_charset_info,
+                         column->field_name,
+                         sql_field->field_name)) {}
+    if (!sql_field)
+    {
+      my_error(ER_KEY_COLUMN_DOES_NOT_EXITS, MYF(0), column->field_name);
+      DBUG_RETURN(TRUE);
+    }
+    if (type == Key::FOREIGN_KEY && sql_field->vcol_info)
+    {
+      if (delete_opt == FK_OPTION_SET_NULL)
+      {
+        my_error(ER_WRONG_FK_OPTION_FOR_VIRTUAL_COLUMN, MYF(0), 
+                 "ON DELETE SET NULL");
+        DBUG_RETURN(TRUE);
+      }
+      if (update_opt == FK_OPTION_SET_NULL)
+      {
+        my_error(ER_WRONG_FK_OPTION_FOR_VIRTUAL_COLUMN, MYF(0), 
+                 "ON UPDATE SET NULL");
+        DBUG_RETURN(TRUE);
+      }
+      if (update_opt == FK_OPTION_CASCADE)
+      {
+        my_error(ER_WRONG_FK_OPTION_FOR_VIRTUAL_COLUMN, MYF(0), 
+                 "ON UPDATE CASCADE");
+        DBUG_RETURN(TRUE);
+      }
+    }
+  }
+  DBUG_RETURN(FALSE);
+}
 
 /****************************************************************************
 ** Thread specific functions
@@ -261,19 +316,16 @@ int thd_tablespace_op(const THD *thd)
 
 
 extern "C"
-const char *set_thd_proc_info(void *thd_arg, const char *info,
+const char *set_thd_proc_info(THD *thd, const char *info,
                               const char *calling_function,
                               const char *calling_file,
                               const unsigned int calling_line)
 {
-  THD *thd= (THD *) thd_arg;
-
   if (!thd)
     thd= current_thd;
 
   const char *old_info= thd->proc_info;
-  DBUG_PRINT("proc_info", ("%s:%d  %s", calling_file, calling_line,
-                           (info != NULL) ? info : "(null)"));
+  DBUG_PRINT("proc_info", ("%s:%d  %s", calling_file, calling_line, info));
 #if defined(ENABLED_PROFILING)
   thd->profiling.status_change(info, calling_function, calling_file, calling_line);
 #endif
@@ -534,6 +586,8 @@ THD::THD()
   init_sql_alloc(&main_mem_root, ALLOC_ROOT_MIN_BLOCK_SIZE, 0);
   stmt_arena= this;
   thread_stack= 0;
+  scheduler= &thread_scheduler;                 // Will be fixed later
+  extra_port= 0;
   catalog= (char*)"std"; // the only catalog we have for now
   main_security_ctx.init();
   security_ctx= &main_security_ctx;
@@ -569,6 +623,7 @@ THD::THD()
   mysys_var=0;
   binlog_evt_union.do_union= FALSE;
   enable_slow_log= 0;
+
 #ifndef DBUG_OFF
   dbug_sentry=THD_SENTRY_MAGIC;
 #endif
@@ -583,6 +638,10 @@ THD::THD()
   peer_port= 0;					// For SHOW PROCESSLIST
   transaction.m_pending_rows_event= 0;
   transaction.on= 1;
+  wt_thd_lazy_init(&transaction.wt, &variables.wt_deadlock_search_depth_short,
+                                    &variables.wt_timeout_short,
+                                    &variables.wt_deadlock_search_depth_long,
+                                    &variables.wt_timeout_long);
 #ifdef SIGNAL_WITH_VIO_CLOSE
   active_vio = 0;
 #endif
@@ -625,11 +684,12 @@ THD::THD()
 
   tablespace_op=FALSE;
   tmp= sql_rnd_with_mutex();
-  randominit(&rand, tmp + (ulong) &rand, tmp + (ulong) ::global_query_id);
+  my_rnd_init(&rand, tmp + (ulong) &rand, tmp + (ulong) ::global_query_id);
   substitute_null_with_insert_id = FALSE;
   thr_lock_info_init(&lock_info); /* safety: will be reset after start */
 
   m_internal_handler= NULL;
+  arena_for_cached_items= 0;
   current_user_used= FALSE;
   memset(&invoker_user, 0, sizeof(invoker_user));
   memset(&invoker_host, 0, sizeof(invoker_host));
@@ -638,6 +698,7 @@ THD::THD()
 
 void THD::push_internal_handler(Internal_error_handler *handler)
 {
+  DBUG_ENTER("THD::push_internal_handler");
   if (m_internal_handler)
   {
     handler->m_prev_internal_handler= m_internal_handler;
@@ -647,6 +708,7 @@ void THD::push_internal_handler(Internal_error_handler *handler)
   {
     m_internal_handler= handler;
   }
+  DBUG_VOID_RETURN;
 }
 
 bool THD::handle_condition(uint sql_errno,
@@ -671,17 +733,17 @@ bool THD::handle_condition(uint sql_errno,
       return TRUE;
     }
   }
-
   return FALSE;
 }
 
 
 Internal_error_handler *THD::pop_internal_handler()
 {
+  DBUG_ENTER("THD::pop_internal_handler");
   DBUG_ASSERT(m_internal_handler != NULL);
   Internal_error_handler *popped_handler= m_internal_handler;
   m_internal_handler= m_internal_handler->m_prev_internal_handler;
-  return popped_handler;
+  DBUG_RETURN(popped_handler);
 }
 
 
@@ -943,18 +1005,70 @@ void THD::init(void)
   update_charset();
   reset_current_stmt_binlog_format_row();
   bzero((char *) &status_var, sizeof(status_var));
+  bzero((char *) &org_status_var, sizeof(org_status_var));
 
   if (variables.sql_log_bin)
     variables.option_bits|= OPTION_BIN_LOG;
   else
     variables.option_bits&= ~OPTION_BIN_LOG;
 
+  select_commands= update_commands= other_commands= 0;
+  /* Set to handle counting of aborted connections */
+  userstat_running= opt_userstat_running;
+  last_global_update_time= current_connect_time= time(NULL);
 #if defined(ENABLED_DEBUG_SYNC)
   /* Initialize the Debug Sync Facility. See debug_sync.cc. */
   debug_sync_init_thread(this);
 #endif /* defined(ENABLED_DEBUG_SYNC) */
 }
 
+ 
+/* Updates some status variables to be used by update_global_user_stats */
+
+void THD::update_stats(void)
+{
+  /* sql_command == SQLCOM_END in case of parse errors or quit */
+  if (lex->sql_command != SQLCOM_END)
+  {
+    /* A SQL query. */
+    if (lex->sql_command == SQLCOM_SELECT)
+      select_commands++;
+    else if (sql_command_flags[lex->sql_command] & CF_STATUS_COMMAND)
+    {
+      /* Ignore 'SHOW ' commands */
+    }
+    else if (is_update_query(lex->sql_command))
+      update_commands++;
+    else
+      other_commands++;
+  }
+}
+
+
+void THD::update_all_stats()
+{
+  time_t save_time;
+  ulonglong end_cpu_time, end_utime;
+  double busy_time, cpu_time;
+
+  /* This is set at start of query if opt_userstat_running was set */
+  if (!userstat_running)
+    return;
+
+  end_cpu_time= my_getcputime();
+  end_utime=    my_micro_time_and_time(&save_time);
+  busy_time= (end_utime - start_utime)  / 1000000.0;
+  cpu_time=  (end_cpu_time - start_cpu_time) / 10000000.0;
+  /* In case there are bad values, 2629743 is the #seconds in a month. */
+  if (cpu_time > 2629743.0)
+    cpu_time= 0;
+  status_var_add(status_var.cpu_time, cpu_time);
+  status_var_add(status_var.busy_time, busy_time);
+
+  update_global_user_stats(this, TRUE, save_time);
+  userstat_running= 0;
+}
+
 
 /*
   Init THD for query processing.
@@ -1045,6 +1159,7 @@ void THD::cleanup(void)
 
   /* All metadata locks must have been released by now. */
   DBUG_ASSERT(!mdl_context.has_locks());
+  wt_thd_destroy(&transaction.wt);
 
 #if defined(ENABLED_DEBUG_SYNC)
   /* End the Debug Sync Facility. See debug_sync.cc. */
@@ -1130,9 +1245,8 @@ THD::~THD()
    from_var     from this array
 
   NOTES
-    This function assumes that all variables are long/ulong.
-    If this assumption will change, then we have to explictely add
-    the other variables after the while loop
+    This function assumes that all variables at start are long/ulong and
+    other types are handled explicitely
 */
 
 void add_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var)
@@ -1145,8 +1259,12 @@ void add_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var)
   while (to != end)
     *(to++)+= *(from++);
 
-  to_var->bytes_received+= from_var->bytes_received;
-  to_var->bytes_sent+= from_var->bytes_sent;
+  /* Handle the not ulong variables. See end of system_status_var */
+  to_var->bytes_received=       from_var->bytes_received;
+  to_var->bytes_sent+=          from_var->bytes_sent;
+  to_var->binlog_bytes_written= from_var->binlog_bytes_written;
+  to_var->cpu_time+=            from_var->cpu_time;
+  to_var->busy_time+=           from_var->busy_time;
 }
 
 /*
@@ -1159,7 +1277,8 @@ void add_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var)
     dec_var      minus this array
   
   NOTE
-    This function assumes that all variables are long/ulong.
+    This function assumes that all variables at start are long/ulong and
+    other types are handled explicitely
 */
 
 void add_diff_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var,
@@ -1173,10 +1292,23 @@ void add_diff_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var,
   while (to != end)
     *(to++)+= *(from++) - *(dec++);
 
-  to_var->bytes_received+= from_var->bytes_received - dec_var->bytes_received;;
-  to_var->bytes_sent+= from_var->bytes_sent - dec_var->bytes_sent;
+  to_var->bytes_received+=       from_var->bytes_received -
+                                 dec_var->bytes_received;
+  to_var->bytes_sent+=           from_var->bytes_sent - dec_var->bytes_sent;
+  to_var->binlog_bytes_written+= from_var->binlog_bytes_written -
+                                 dec_var->binlog_bytes_written;
+  to_var->cpu_time+=             from_var->cpu_time - dec_var->cpu_time;
+  to_var->busy_time+=            from_var->busy_time - dec_var->busy_time;
 }
 
+#define SECONDS_TO_WAIT_FOR_KILL 2
+#if !defined(__WIN__) && defined(HAVE_SELECT)
+/* my_sleep() can wait for sub second times */
+#define WAIT_FOR_KILL_TRY_TIMES 20
+#else
+#define WAIT_FOR_KILL_TRY_TIMES 2
+#endif
+
 
 void THD::awake(THD::killed_state state_to_set)
 {
@@ -1275,16 +1407,33 @@ bool THD::store_globals()
   */
   mysys_var->id= thread_id;
   real_id= pthread_self();                      // For debugging
+  mysys_var->stack_ends_here= thread_stack +    // for consistency, see libevent_thread_proc
+                              STACK_DIRECTION * (long)my_thread_stack_size;
 
   /*
     We have to call thr_lock_info_init() again here as THD may have been
     created in another thread
   */
   thr_lock_info_init(&lock_info);
+
+#warning add registration of mutex order if needed
   return 0;
 }
 
 
+/**
+   Untie THD from current thread
+
+   Used when using --thread-handling=pool-of-threads
+*/
+
+void THD::reset_globals()
+{
+  pthread_mutex_lock(&LOCK_thd_data);
+  mysys_var= 0;
+  pthread_mutex_unlock(&LOCK_thd_data);
+}
+
 /*
   Cleanup after query.
 
@@ -2111,7 +2260,7 @@ bool select_export::send_data(List<Item> &items)
                             ER_TRUNCATED_WRONG_VALUE_FOR_FIELD,
                             ER(ER_TRUNCATED_WRONG_VALUE_FOR_FIELD),
                             "string", printable_buff,
-                            item->name, row_count);
+                            item->name, (ulong) row_count);
       }
       else if (from_end_pos < res->ptr() + res->length())
       { 
@@ -2395,8 +2544,7 @@ bool select_max_min_finder_subselect::send_data(List<Item> &items)
     if (!cache)
     {
       cache= Item_cache::get_cache(val_item);
-      switch (val_item->result_type())
-      {
+      switch (val_item->result_type()) {
       case REAL_RESULT:
 	op= &select_max_min_finder_subselect::cmp_real;
 	break;
@@ -2410,6 +2558,7 @@ bool select_max_min_finder_subselect::send_data(List<Item> &items)
         op= &select_max_min_finder_subselect::cmp_decimal;
         break;
       case ROW_RESULT:
+      case IMPOSSIBLE_RESULT:
         // This case should never be choosen
 	DBUG_ASSERT(0);
 	op= 0;
@@ -2878,6 +3027,85 @@ bool select_dumpvar::send_eof()
   return 0;
 }
 
+
+bool
+select_materialize_with_stats::
+create_result_table(THD *thd_arg, List<Item> *column_types,
+                    bool is_union_distinct, ulonglong options,
+                    const char *table_alias, bool bit_fields_as_long)
+{
+  DBUG_ASSERT(table == 0);
+  tmp_table_param.field_count= column_types->elements;
+  tmp_table_param.bit_fields_as_long= bit_fields_as_long;
+
+  if (! (table= create_tmp_table(thd_arg, &tmp_table_param, *column_types,
+                                 (ORDER*) 0, is_union_distinct, 1,
+                                 options, HA_POS_ERROR, (char*) table_alias)))
+    return TRUE;
+
+  col_stat= (Column_statistics*) table->in_use->alloc(table->s->fields *
+                                                      sizeof(Column_statistics));
+  if (!col_stat)
+    return TRUE;
+
+  reset();
+  table->file->extra(HA_EXTRA_WRITE_CACHE);
+  table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
+  return FALSE;
+}
+
+
+void select_materialize_with_stats::reset()
+{
+  memset(col_stat, 0, table->s->fields * sizeof(Column_statistics));
+  max_nulls_in_row= 0;
+  count_rows= 0;
+}
+
+
+void select_materialize_with_stats::cleanup()
+{
+  reset();
+  select_union::cleanup();
+}
+
+
+/**
+  Override select_union::send_data to analyze each row for NULLs and to
+  update null_statistics before sending data to the client.
+
+  @return TRUE if fatal error when sending data to the client
+  @return FALSE on success
+*/
+
+bool select_materialize_with_stats::send_data(List<Item> &items)
+{
+  List_iterator_fast<Item> item_it(items);
+  Item *cur_item;
+  Column_statistics *cur_col_stat= col_stat;
+  uint nulls_in_row= 0;
+
+  ++count_rows;
+
+  while ((cur_item= item_it++))
+  {
+    if (cur_item->is_null())
+    {
+      ++cur_col_stat->null_count;
+      cur_col_stat->max_null_row= count_rows;
+      if (!cur_col_stat->min_null_row)
+        cur_col_stat->min_null_row= count_rows;
+      ++nulls_in_row;
+    }
+    ++cur_col_stat;
+  }
+  if (nulls_in_row > max_nulls_in_row)
+    max_nulls_in_row= nulls_in_row;
+
+  return select_union::send_data(items);
+}
+
+
 /****************************************************************************
   TMP_TABLE_PARAM
 ****************************************************************************/
@@ -2891,6 +3119,8 @@ void TMP_TABLE_PARAM::init()
   quick_group= 1;
   table_charset= 0;
   precomputed_group_by= 0;
+  bit_fields_as_long= 0;
+  skip_create_table= 0;
   DBUG_VOID_RETURN;
 }
 
@@ -2899,7 +3129,8 @@ void thd_increment_bytes_sent(ulong length)
 {
   THD *thd=current_thd;
   if (likely(thd != 0))
-  { /* current_thd==0 when close_connection() calls net_send_error() */
+  {
+    /* current_thd == 0 when close_connection() calls net_send_error() */
     thd->status_var.bytes_sent+= length;
   }
 }
@@ -2925,9 +3156,9 @@ void THD::set_status_var_init()
 
 void Security_context::init()
 {
-  host= user= priv_user= ip= 0;
+  host= user= ip= 0;
   host_or_ip= "connecting host";
-  priv_host[0]= '\0';
+  priv_user[0]= priv_host[0]= '\0';
   master_access= 0;
 #ifndef NO_EMBEDDED_ACCESS_CHECKS
   db_access= NO_ACCESS;
@@ -2959,8 +3190,7 @@ void Security_context::skip_grants()
   /* privileges for the user are unknown everything is allowed */
   host_or_ip= (char *)"";
   master_access= ~NO_ACCESS;
-  priv_user= (char *)"";
-  *priv_host= '\0';
+  *priv_user= *priv_host= '\0';
 }
 
 
@@ -3002,7 +3232,7 @@ bool Security_context::set_user(char *user_arg)
   of a statement under credentials of a different user, e.g.
   definer of a procedure, we authenticate this user in a local
   instance of Security_context by means of this method (and
-  ultimately by means of acl_getroot_no_password), and make the
+  ultimately by means of acl_getroot), and make the
   local instance active in the thread by re-setting
   thd->security_ctx pointer.
 
@@ -3036,19 +3266,12 @@ change_security_context(THD *thd,
   DBUG_ASSERT(definer_user->str && definer_host->str);
 
   *backup= NULL;
-  /*
-    The current security context may have NULL members
-    if we have just started the thread and not authenticated
-    any user. This use case is currently in events worker thread.
-  */
-  needs_change= (thd->security_ctx->priv_user == NULL ||
-                 strcmp(definer_user->str, thd->security_ctx->priv_user) ||
-                 thd->security_ctx->priv_host == NULL ||
+  needs_change= (strcmp(definer_user->str, thd->security_ctx->priv_user) ||
                  my_strcasecmp(system_charset_info, definer_host->str,
                                thd->security_ctx->priv_host));
   if (needs_change)
   {
-    if (acl_getroot_no_password(this, definer_user->str, definer_host->str,
+    if (acl_getroot(this, definer_user->str, definer_host->str,
                                 definer_host->str, db->str))
     {
       my_error(ER_NO_SUCH_USER, MYF(0), definer_user->str,
@@ -3140,7 +3363,7 @@ extern "C" unsigned long thd_get_thread_id(const MYSQL_THD thd)
 
 
 #ifdef INNODB_COMPATIBILITY_HOOKS
-extern "C" struct charset_info_st *thd_charset(MYSQL_THD thd)
+extern "C" const struct charset_info_st *thd_charset(MYSQL_THD thd)
 {
   return(thd->charset());
 }
@@ -3301,6 +3524,7 @@ void THD::reset_sub_statement_state(Sub_statement_state *backup,
   backup->count_cuted_fields= count_cuted_fields;
   backup->in_sub_stmt=     in_sub_stmt;
   backup->enable_slow_log= enable_slow_log;
+  backup->query_plan_flags= query_plan_flags;
   backup->limit_found_rows= limit_found_rows;
   backup->examined_row_count= examined_row_count;
   backup->sent_row_count=   sent_row_count;
@@ -3368,6 +3592,7 @@ void THD::restore_sub_statement_state(Sub_statement_state *backup)
   variables.option_bits= backup->option_bits;
   in_sub_stmt=      backup->in_sub_stmt;
   enable_slow_log=  backup->enable_slow_log;
+  query_plan_flags= backup->query_plan_flags;
   first_successful_insert_id_in_prev_stmt= 
     backup->first_successful_insert_id_in_prev_stmt;
   first_successful_insert_id_in_cur_stmt= 
@@ -3465,8 +3690,9 @@ void THD::get_definer(LEX_USER *definer)
   {
     definer->user = invoker_user;
     definer->host= invoker_host;
-    definer->password.str= NULL;
-    definer->password.length= 0;
+    definer->password= null_lex_str;
+    definer->plugin= empty_lex_str;
+    definer->auth= empty_lex_str;
   }
   else
 #endif
@@ -4113,7 +4339,6 @@ THD::binlog_prepare_pending_rows_event(TABLE*, uint32, MY_BITMAP const*,
 
 /* Declare in unnamed namespace. */
 CPP_UNNAMED_NS_START
-
   /**
      Class to handle temporary allocation of memory for row data.
 
@@ -4290,7 +4515,7 @@ int THD::binlog_update_row(TABLE* table, bool is_trans,
     Don't print debug messages when running valgrind since they can
     trigger false warnings.
    */
-#ifndef HAVE_purify
+#ifndef HAVE_valgrind
   DBUG_DUMP("before_record", before_record, table->s->reclength);
   DBUG_DUMP("after_record",  after_record, table->s->reclength);
   DBUG_DUMP("before_row",    before_row, before_size);
@@ -4571,11 +4796,10 @@ int THD::binlog_query(THD::enum_binlog_query_type qtype, char const *query_arg,
       binlog_table_maps= 0;
       DBUG_RETURN(error);
     }
-    break;
 
   case THD::QUERY_TYPE_COUNT:
   default:
-    DBUG_ASSERT(0 <= qtype && qtype < QUERY_TYPE_COUNT);
+    DBUG_ASSERT(qtype < QUERY_TYPE_COUNT);
   }
   DBUG_RETURN(0);
 }
diff --git a/sql/sql_class.h b/sql/sql_class.h
index e86e42dc094..4dc8abc64d5 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -27,6 +27,7 @@
 #ifdef MYSQL_SERVER
 #include "unireg.h"                    // REQUIRED: for other includes
 #endif
+#include <waiting_threads.h>
 #include "sql_const.h"
 #include <mysql/plugin_audit.h>
 #include "log.h"
@@ -42,6 +43,123 @@
 
 
 class Reprepare_observer;
+
+
+/**
+  Interface for Item iterator
+*/
+
+class Item_iterator
+{
+public:
+  /**
+    Shall set this iterator to the position before the first item
+
+    @note
+    This method also may perform some other initialization actions like
+    allocation of certain resources.
+  */
+  virtual void open()= 0;
+  /**
+    Shall return the next Item (or NULL if there is no next item) and
+    move pointer to position after it.
+  */
+  virtual Item *next()= 0;
+  /**
+    Shall force iterator to free resources (if it holds them)
+
+    @note
+    One should not use the iterator without open() call after close()
+  */
+  virtual void close()= 0;
+
+  virtual ~Item_iterator() {}
+};
+
+
+/**
+  Item iterator over List_iterator_fast for Item references
+*/
+
+class Item_iterator_ref_list: public Item_iterator
+{
+  List_iterator<Item*> list;
+public:
+  Item_iterator_ref_list(List_iterator<Item*> &arg_list):
+    list(arg_list) {}
+  void open() { list.rewind(); }
+  Item *next() { return *(list++); }
+  void close() {}
+};
+
+
+/**
+  Item iterator over Item interface for rows
+*/
+
+class Item_iterator_row: public Item_iterator
+{
+  Item *base_item;
+  uint current;
+public:
+  Item_iterator_row(Item *base) : base_item(base), current(0) {}
+  void open() { current= 0; }
+  Item *next()
+  {
+    if (current >= base_item->cols())
+      return NULL;
+    return base_item->element_index(current++);
+  }
+  void close() {}
+};
+
+
+/**
+  An interface that is used to take an action when
+  the locking module notices that a table version has changed
+  since the last execution. "Table" here may refer to any kind of
+  table -- a base table, a temporary table, a view or an
+  information schema table.
+
+  When we open and lock tables for execution of a prepared
+  statement, we must verify that they did not change
+  since statement prepare. If some table did change, the statement
+  parse tree *may* be no longer valid, e.g. in case it contains
+  optimizations that depend on table metadata.
+
+  This class provides an interface (a method) that is
+  invoked when such a situation takes place.
+  The implementation of the method simply reports an error, but
+  the exact details depend on the nature of the SQL statement.
+
+  At most 1 instance of this class is active at a time, in which
+  case THD::m_reprepare_observer is not NULL.
+
+  @sa check_and_update_table_version() for details of the
+  version tracking algorithm 
+
+  @sa Open_tables_state::m_reprepare_observer for the life cycle
+  of metadata observers.
+*/
+
+class Reprepare_observer
+{
+public:
+  /**
+    Check if a change of metadata is OK. In future
+    the signature of this method may be extended to accept the old
+    and the new versions, but since currently the check is very
+    simple, we only need the THD to report an error.
+  */
+  bool report_error(THD *thd);
+  bool is_invalidated() const { return m_invalidated; }
+  void reset_reprepare_observer() { m_invalidated= FALSE; }
+  Reprepare_observer() {}                     /* Remove gcc warning */
+private:
+  bool m_invalidated;
+};
+
+
 class Relay_log_info;
 
 class Query_log_event;
@@ -49,6 +167,7 @@ class Load_log_event;
 class Slave_log_event;
 class sp_rcontext;
 class sp_cache;
+class Lex_input_stream;
 class Parser_state;
 class Rows_log_event;
 class Sroutine_hash_entry;
@@ -74,7 +193,7 @@ enum enum_filetype { FILETYPE_CSV, FILETYPE_XML };
 #define MODE_PIPES_AS_CONCAT            2
 #define MODE_ANSI_QUOTES                4
 #define MODE_IGNORE_SPACE               8
-#define MODE_NOT_USED                   16
+#define MODE_IGNORE_BAD_TABLE_OPTIONS   16
 #define MODE_ONLY_FULL_GROUP_BY         32
 #define MODE_NO_UNSIGNED_SUBTRACTION    64
 #define MODE_NO_DIR_IN_CREATE           128
@@ -222,13 +341,15 @@ public:
   KEY_CREATE_INFO key_create_info;
   List<Key_part_spec> columns;
   LEX_STRING name;
+  engine_option_value *option_list;
   bool generated;
 
   Key(enum Keytype type_par, const LEX_STRING &name_arg,
       KEY_CREATE_INFO *key_info_arg,
-      bool generated_arg, List<Key_part_spec> &cols)
+      bool generated_arg, List<Key_part_spec> &cols,
+      engine_option_value *create_opt)
     :type(type_par), key_create_info(*key_info_arg), columns(cols),
-    name(name_arg), generated(generated_arg)
+    name(name_arg), option_list(create_opt), generated(generated_arg)
   {}
   Key(enum Keytype type_par, const char *name_arg, size_t name_len_arg,
       KEY_CREATE_INFO *key_info_arg, bool generated_arg,
@@ -266,7 +387,7 @@ public:
   Foreign_key(const LEX_STRING &name_arg, List<Key_part_spec> &cols,
 	      Table_ident *table,   List<Key_part_spec> &ref_cols,
 	      uint delete_opt_arg, uint update_opt_arg, uint match_opt_arg)
-    :Key(FOREIGN_KEY, name_arg, &default_key_create_info, 0, cols),
+    :Key(FOREIGN_KEY, name_arg, &default_key_create_info, 0, cols, NULL),
     ref_table(table), ref_columns(ref_cols),
     delete_opt(delete_opt_arg), update_opt(update_opt_arg),
     match_opt(match_opt_arg)
@@ -278,6 +399,8 @@ public:
   */
   virtual Key *clone(MEM_ROOT *mem_root) const
   { return new (mem_root) Foreign_key(*this, mem_root); }
+  /* Used to validate foreign key options */
+  bool validate(List<Create_field> &table_fields);
 };
 
 typedef struct st_mysql_lock
@@ -369,13 +492,16 @@ typedef struct system_variables
 {
   /*
     How dynamically allocated system variables are handled:
-    
+
     The global_system_variables and max_system_variables are "authoritative"
     They both should have the same 'version' and 'size'.
     When attempting to access a dynamic variable, if the session version
     is out of date, then the session version is updated and realloced if
     neccessary and bytes copied from global to make up for missing data.
-  */ 
+
+    Note that one should use my_bool instead of bool here, as the variables
+    are used with my_getopt.c
+  */
   ulong dynamic_variables_version;
   char* dynamic_variables_ptr;
   uint dynamic_variables_head;  /* largest valid variable offset */
@@ -393,6 +519,7 @@ typedef struct system_variables
   ulong bulk_insert_buff_size;
   ulong join_buff_size;
   ulong lock_wait_timeout;
+  ulong join_cache_level;
   ulong max_allowed_packet;
   ulong max_error_count;
   ulong max_length_for_sort_data;
@@ -400,7 +527,6 @@ typedef struct system_variables
   ulong max_tmp_tables;
   ulong max_insert_delayed_threads;
   ulong min_examined_row_limit;
-  ulong multi_range_count;
   ulong net_buffer_length;
   ulong net_interactive_timeout;
   ulong net_read_timeout;
@@ -409,12 +535,22 @@ typedef struct system_variables
   ulong net_write_timeout;
   ulong optimizer_prune_level;
   ulong optimizer_search_depth;
+  /*
+    Controls use of Engine-MRR:
+      0 - auto, based on cost
+      1 - force MRR when the storage engine is capable of doing it
+      2 - disable MRR.
+  */
+  ulong optimizer_use_mrr;
   ulong preload_buff_size;
   ulong profiling_history_size;
   ulong read_buff_size;
   ulong read_rnd_buff_size;
+  ulong mrr_buff_size;
   ulong div_precincrement;
   ulong sortbuff_size;
+  /* Total size of all buffers used by the subselect_rowid_merge_engine. */
+  ulong rowid_merge_buff_size;
   ulong max_sp_recursion_depth;
   ulong default_week_format;
   ulong max_seeks_for_key;
@@ -425,7 +561,10 @@ typedef struct system_variables
   ulong trans_prealloc_size;
   ulong log_warnings;
   ulong group_concat_max_len;
-
+  /* Flags for slow log filtering */
+  ulong log_slow_rate_limit; 
+  ulong log_slow_filter; 
+  ulong log_slow_verbosity; 
   ulong binlog_format; ///< binlog format for this thd (see enum_binlog_format)
   my_bool binlog_direct_non_trans_update;
   my_bool sql_log_bin;
@@ -471,11 +610,13 @@ typedef struct system_variables
 
   my_bool sysdate_is_now;
 
-  double long_query_time_double;
+  /* deadlock detection */
+  ulong wt_timeout_short, wt_deadlock_search_depth_short;
+  ulong wt_timeout_long, wt_deadlock_search_depth_long;
 
+  double long_query_time_double;
 } SV;
 
-
 /**
   Per thread status variables.
   Must be long/ulong up to last_system_status_var so that
@@ -497,6 +638,13 @@ typedef struct system_status_var
   ulong ha_read_prev_count;
   ulong ha_read_rnd_count;
   ulong ha_read_rnd_next_count;
+  /*
+    This number doesn't include calls to the default implementation and
+    calls made by range access. The intent is to count only calls made by
+    BatchedKeyAccess.
+  */
+  ulong ha_multi_range_read_init_count;
+
   ulong ha_rollback_count;
   ulong ha_update_count;
   ulong ha_write_count;
@@ -505,6 +653,7 @@ typedef struct system_status_var
   ulong ha_savepoint_count;
   ulong ha_savepoint_rollback_count;
 
+#if 0
   /* KEY_CACHE parts. These are copies of the original */
   ulong key_blocks_changed;
   ulong key_blocks_used;
@@ -513,6 +662,7 @@ typedef struct system_status_var
   ulong key_cache_w_requests;
   ulong key_cache_write;
   /* END OF KEY_CACHE parts */
+#endif
 
   ulong net_big_packet_count;
   ulong opened_tables;
@@ -522,6 +672,8 @@ typedef struct system_status_var
   ulong select_range_count;
   ulong select_range_check_count;
   ulong select_scan_count;
+  ulong rows_read;
+  ulong rows_sent;
   ulong long_query_count;
   ulong filesort_merge_passes;
   ulong filesort_range_count;
@@ -535,25 +687,30 @@ typedef struct system_status_var
   ulong com_stmt_fetch;
   ulong com_stmt_reset;
   ulong com_stmt_close;
+
+  ulong empty_queries;
+  ulong access_denied_errors;
+  ulong lost_connections;
   /*
     Number of statements sent from the client
   */
   ulong questions;
-
-  ulonglong bytes_received;
-  ulonglong bytes_sent;
   /*
     IMPORTANT!
     SEE last_system_status_var DEFINITION BELOW.
     Below 'last_system_status_var' are all variables that cannot be handled
     automatically by add_to_status()/add_diff_to_status().
   */
+  ulonglong bytes_received;
+  ulonglong bytes_sent;
+  ulonglong binlog_bytes_written;
   double last_query_cost;
+  double cpu_time, busy_time;
 } STATUS_VAR;
 
 /*
   This is used for 'SHOW STATUS'. It must be updated to the last ulong
-  variable in system_status_var which is makes sens to add to the global
+  variable in system_status_var which is makes sense to add to the global
   counter
 */
 
@@ -673,7 +830,7 @@ class Server_side_cursor;
    - prepared, that is, contain placeholders,
    - opened as cursors. We maintain 1 to 1 relationship between
      statement and cursor - if user wants to create another cursor for his
-     query, we create another statement for it. 
+     query, we create another statement for it.
   To perform some action with statement we reset THD part to the state  of
   that statement, do the action, and then save back modified state from THD
   to the statement. It will be changed in near future, and Statement will
@@ -862,7 +1019,8 @@ public:
     priv_user - The user privilege we are using. May be "" for anonymous user.
     ip - client IP
   */
-  char   *host, *user, *priv_user, *ip;
+  char   *host, *user, *ip;
+  char   priv_user[USERNAME_LENGTH];
   /* The host privilege we are using */
   char   priv_host[MAX_HOSTNAME];
   /* points to host if host is available, otherwise points to ip */
@@ -877,7 +1035,7 @@ public:
   {
     return (*priv_host ? priv_host : (char *)"%");
   }
-  
+
   bool set_user(char *user_arg);
 
 #ifndef NO_EMBEDDED_ACCESS_CHECKS
@@ -1084,6 +1242,7 @@ public:
   ulonglong limit_found_rows;
   ha_rows    cuted_fields, sent_row_count, examined_row_count;
   ulong client_capabilities;
+  ulong query_plan_flags; 
   uint in_sub_stmt;
   bool enable_slow_log;
   bool last_insert_id_used;
@@ -1194,6 +1353,7 @@ public:
     /* Ignore error */
     return TRUE;
   }
+  Dummy_error_handler() {}                    /* Remove gcc warning */
 };
 
 
@@ -1372,7 +1532,7 @@ public:
   /* Used to execute base64 coded binlog events in MySQL server */
   Relay_log_info* rli_fake;
 
-  void reset_for_next_command();
+  void reset_for_next_command(bool calculate_userstat);
   /*
     Constant for THD::where initialization in the beginning of every query.
 
@@ -1404,15 +1564,17 @@ public:
   Query_cache_tls query_cache_tls;
 #endif
   NET	  net;				// client connection descriptor
+  scheduler_functions *scheduler;       // Scheduler for this connection
   Protocol *protocol;			// Current protocol
   Protocol_text   protocol_text;	// Normal protocol
   Protocol_binary protocol_binary;	// Binary protocol
   HASH    user_vars;			// hash for user variables
   String  packet;			// dynamic buffer for network I/O
   String  convert_buffer;               // buffer for charset conversions
-  struct  rand_struct rand;		// used for authentication
+  struct  my_rnd_struct rand;		// used for authentication
   struct  system_variables variables;	// Changeable local variables
   struct  system_status_var status_var; // Per thread statistic vars
+  struct  system_status_var org_status_var; // For user statistics
   struct  system_status_var *initial_status_var; /* used by show status */
   THR_LOCK_INFO lock_info;              // Locking info of this thread
   /**
@@ -1481,7 +1643,7 @@ public:
   /*
     One thread can hold up to one named user-level lock. This variable
     points to a lock object if the lock is present. See item_func.cc and
-    chapter 'Miscellaneous functions', for functions GET_LOCK, RELEASE_LOCK. 
+    chapter 'Miscellaneous functions', for functions GET_LOCK, RELEASE_LOCK.
   */
   User_level_lock *ull;
 #ifndef DBUG_OFF
@@ -1496,7 +1658,7 @@ public:
   uint32     server_id;
   uint32     file_id;			// for LOAD DATA INFILE
   /* remote (peer) port */
-  uint16 peer_port;
+  uint16     peer_port;
   time_t     start_time, user_time;
   // track down slow pthread_create
   ulonglong  prior_thr_create_utime, thr_create_utime;
@@ -1507,10 +1669,20 @@ public:
 
   /* <> 0 if we are inside of trigger or stored function. */
   uint in_sub_stmt;
+  /* True when opt_userstat_running is set at start of query */
+  bool userstat_running;
 
   /* container for handler's private per-connection data */
   Ha_data ha_data[MAX_HA];
 
+  /* Place to store various things */
+  union 
+  { 
+    /*
+      Used by subquery optimizations, see Item_in_subselect::emb_on_expr_nest.
+    */
+    TABLE_LIST *emb_on_expr_nest;
+  } thd_marker;
 #ifndef MYSQL_CLIENT
   int binlog_setup_trx_data();
 
@@ -1611,6 +1783,7 @@ public:
     THD_TRANS stmt;			// Trans for current statement
     bool on;                            // see ha_enable_transaction()
     XID_STATE xid_state;
+    WT_THD wt;                          ///< for deadlock detection
     Rows_log_event *m_pending_rows_event;
 
     /*
@@ -1913,6 +2086,8 @@ public:
   /* Statement id is thread-wide. This counter is used to generate ids */
   ulong      statement_id_counter;
   ulong	     rand_saved_seed1, rand_saved_seed2;
+  ulong      query_plan_flags; 
+  ulong      query_plan_fsort_passes; 
   pthread_t  real_id;                           /* For debugging */
   my_thread_id  thread_id;
   uint	     tmp_table;
@@ -1964,6 +2139,8 @@ public:
 
   bool       slave_thread, one_shot_set;
   bool	     no_errors, password;
+  bool       extra_port;                        /* If extra connection */
+
   /**
     Set to TRUE if execution of the current compound statement
     can not continue. In particular, disables activation of
@@ -2002,7 +2179,7 @@ public:
   */
   bool       is_slave_error;
   bool       bootstrap, cleanup_done;
-  
+
   /**  is set if some thread specific value(s) used in a statement. */
   bool       thread_specific_used;
   /**  
@@ -2017,7 +2194,7 @@ public:
   bool	     no_warnings_for_error; /* no warnings on call to my_error() */
   /* set during loop of derived table processing */
   bool       derived_tables_processing;
-  my_bool    tablespace_op;	/* This is TRUE in DISCARD/IMPORT TABLESPACE */
+  bool       tablespace_op;	/* This is TRUE in DISCARD/IMPORT TABLESPACE */
 
   sp_rcontext *spcont;		// SP runtime context
   sp_cache   *sp_proc_cache;
@@ -2033,6 +2210,21 @@ public:
   */
   LOG_INFO*  current_linfo;
   NET*       slave_net;			// network connection from slave -> m.
+
+  /*
+    Used to update global user stats.  The global user stats are updated
+    occasionally with the 'diff' variables.  After the update, the 'diff'
+    variables are reset to 0.
+  */
+  /* Time when the current thread connected to MySQL. */
+  time_t current_connect_time;
+  /* Last time when THD stats were updated in global_user_stats. */
+  time_t last_global_update_time;
+  /* Number of commands not reflected in global_user_stats yet. */
+  uint select_commands, update_commands, other_commands;
+  ulonglong start_cpu_time;
+  ulonglong start_bytes_received;
+
   /* Used by the sys_var class to store temporary values */
   union
   {
@@ -2041,10 +2233,10 @@ public:
     ulong     ulong_value;
     ulonglong ulonglong_value;
   } sys_var_tmp;
-  
+
   struct {
-    /* 
-      If true, mysql_bin_log::write(Log_event) call will not write events to 
+    /*
+      If true, mysql_bin_log::write(Log_event) call will not write events to
       binlog, and maintain 2 below variables instead (use
       mysql_bin_log.start_union_events to turn this on)
     */
@@ -2055,13 +2247,13 @@ public:
     */
     bool unioned_events;
     /*
-      If TRUE, at least one mysql_bin_log::write(Log_event e), where 
-      e.cache_stmt == TRUE call has been made after last 
+      If TRUE, at least one mysql_bin_log::write(Log_event e), where
+      e.cache_stmt == TRUE call has been made after last
       mysql_bin_log.start_union_events() call.
     */
     bool unioned_events_trans;
-    
-    /* 
+
+    /*
       'queries' (actually SP statements) that run under inside this binlog
       union have thd->query_id >= first_query_id.
     */
@@ -2110,13 +2302,16 @@ public:
     killing mysqld) where it's vital to not allocate excessive and not used
     memory. Note, that we still don't return error from init_for_queries():
     if preallocation fails, we should notice that at the first call to
-    alloc_root. 
+    alloc_root.
   */
   void init_for_queries();
+  void update_all_stats();
+  void update_stats(void);
   void change_user(void);
   void cleanup(void);
   void cleanup_after_query();
   bool store_globals();
+  void reset_globals();
 #ifdef SIGNAL_WITH_VIO_CLOSE
   inline void set_active_vio(Vio* vio)
   {
@@ -2144,7 +2339,7 @@ public:
     
     QUERY_TYPE_COUNT
   };
-  
+
   int binlog_query(enum_binlog_query_type qtype,
                    char const *query, ulong query_len, bool is_trans,
                    bool direct, bool suppress_use,
@@ -2567,7 +2762,7 @@ public:
     *p_db_length= db_length;
     return FALSE;
   }
-  thd_scheduler scheduler;
+  thd_scheduler event_scheduler;
 
 public:
   inline Internal_error_handler *get_internal_handler()
@@ -2703,6 +2898,27 @@ public:
   LEX_STRING get_invoker_user() { return invoker_user; }
   LEX_STRING get_invoker_host() { return invoker_host; }
   bool has_invoker() { return invoker_user.length > 0; }
+
+private:
+  /* 
+    This reference points to the table arena when the expression
+    for a virtual column is being evaluated
+  */ 
+  Query_arena *arena_for_cached_items;
+
+public:
+  void reset_arena_for_cached_items(Query_arena *new_arena)
+  {
+    arena_for_cached_items= new_arena;
+  }
+  Query_arena *switch_to_arena_for_cached_items(Query_arena *backup)
+  {
+    if (!arena_for_cached_items)
+      return 0;
+    set_n_backup_active_arena(arena_for_cached_items, backup);
+    return backup;
+  }
+
 private:
 
   /** The current internal error handler for this thread, or NULL. */
@@ -3014,10 +3230,17 @@ public:
   int prepare2(void) { return 0; }
 };
 
+
+#if defined(WITH_ARIA_STORAGE_ENGINE) && defined(USE_MARIA_FOR_TMP_TABLES)
+#include <maria.h>
+#define ENGINE_COLUMNDEF MARIA_COLUMNDEF
+#else
 #include <myisam.h>
+#define ENGINE_COLUMNDEF MI_COLUMNDEF
+#endif
 
-/* 
-  Param to create temporary tables when doing SELECT:s 
+/*
+  Param to create temporary tables when doing SELECT:s
   NOTE
     This structure is copied using memcpy as a part of JOIN.
 */
@@ -3036,7 +3259,7 @@ public:
   Copy_field *save_copy_field, *save_copy_field_end;
   uchar	    *group_buff;
   Item	    **items_to_copy;			/* Fields in tmp table */
-  MI_COLUMNDEF *recinfo,*start_recinfo;
+  ENGINE_COLUMNDEF *recinfo, *start_recinfo;
   KEY *keyinfo;
   ha_rows end_write_records;
   /**
@@ -3070,8 +3293,8 @@ public:
   uint	quick_group;
   bool  using_indirect_summary_function;
   /* If >0 convert all blob fields to varchar(convert_blob_length) */
-  uint  convert_blob_length; 
-  CHARSET_INFO *table_charset; 
+  uint  convert_blob_length;
+  CHARSET_INFO *table_charset;
   bool schema_table;
   /*
     True if GROUP BY and its aggregate functions are already computed
@@ -3081,11 +3304,23 @@ public:
   */
   bool precomputed_group_by;
   bool force_copy_fields;
+  /*
+    If TRUE, create_tmp_field called from create_tmp_table will convert
+    all BIT fields to 64-bit longs. This is a workaround the limitation
+    that MEMORY tables cannot index BIT columns.
+  */
+  bool bit_fields_as_long;
+  /*
+    Whether to create or postpone actual creation of this temporary table.
+    TRUE <=> create_tmp_table will create only the TABLE structure.
+  */
+  bool skip_create_table;
 
   TMP_TABLE_PARAM()
     :copy_field(0), group_parts(0),
      group_length(0), group_null_parts(0), convert_blob_length(0),
-     schema_table(0), precomputed_group_by(0), force_copy_fields(0)
+     schema_table(0), precomputed_group_by(0), force_copy_fields(0),
+     bit_fields_as_long(0), skip_create_table(0)
   {}
   ~TMP_TABLE_PARAM()
   {
@@ -3104,19 +3339,20 @@ public:
 
 class select_union :public select_result_interceptor
 {
+protected:
   TMP_TABLE_PARAM tmp_table_param;
 public:
   TABLE *table;
 
-  select_union() :table(0) {}
+  select_union() :table(0) { tmp_table_param.init(); }
   int prepare(List<Item> &list, SELECT_LEX_UNIT *u);
   bool send_data(List<Item> &items);
   bool send_eof();
   bool flush();
-
-  bool create_result_table(THD *thd, List<Item> *column_types,
-                           bool is_distinct, ulonglong options,
-                           const char *alias);
+  void cleanup();
+  virtual bool create_result_table(THD *thd, List<Item> *column_types,
+                                   bool is_distinct, ulonglong options,
+                                   const char *alias, bool bit_fields_as_long);
 };
 
 /* Base subselect interface class */
@@ -3140,6 +3376,72 @@ public:
   bool send_data(List<Item> &items);
 };
 
+
+/*
+  This class specializes select_union to collect statistics about the
+  data stored in the temp table. Currently the class collects statistcs
+  about NULLs.
+*/
+
+class select_materialize_with_stats : public select_union
+{
+protected:
+  class Column_statistics
+  {
+  public:
+    /* Count of NULLs per column. */
+    ha_rows null_count;
+    /* The row number that contains the first NULL in a column. */
+    ha_rows min_null_row;
+    /* The row number that contains the last NULL in a column. */
+    ha_rows max_null_row;
+  };
+
+  /* Array of statistics data per column. */
+  Column_statistics* col_stat;
+
+  /*
+    The number of columns in the biggest sub-row that consists of only
+    NULL values.
+  */
+  ha_rows max_nulls_in_row;
+  /*
+    Count of rows writtent to the temp table. This is redundant as it is
+    already stored in handler::stats.records, however that one is relatively
+    expensive to compute (given we need that for evry row).
+  */
+  ha_rows count_rows;
+
+protected:
+  void reset();
+
+public:
+  select_materialize_with_stats() {}
+  virtual bool create_result_table(THD *thd, List<Item> *column_types,
+                                   bool is_distinct, ulonglong options,
+                                   const char *alias, bool bit_fields_as_long);
+  bool init_result_table(ulonglong select_options);
+  bool send_data(List<Item> &items);
+  void cleanup();
+  ha_rows get_null_count_of_col(uint idx)
+  {
+    DBUG_ASSERT(idx < table->s->fields);
+    return col_stat[idx].null_count;
+  }
+  ha_rows get_max_null_of_col(uint idx)
+  {
+    DBUG_ASSERT(idx < table->s->fields);
+    return col_stat[idx].max_null_row;
+  }
+  ha_rows get_min_null_of_col(uint idx)
+  {
+    DBUG_ASSERT(idx < table->s->fields);
+    return col_stat[idx].min_null_row;
+  }
+  ha_rows get_max_nulls_in_row() { return max_nulls_in_row; }
+};
+
+
 /* used in independent ALL/ANY optimisation */
 class select_max_min_finder_subselect :public select_subselect
 {
@@ -3167,6 +3469,67 @@ public:
   bool send_data(List<Item> &items);
 };
 
+
+
+
+/*
+  Optimizer and executor structure for the materialized semi-join info. This
+  structure contains
+   - The sj-materialization temporary table
+   - Members needed to make index lookup or a full scan of the temptable.
+*/
+class SJ_MATERIALIZATION_INFO : public Sql_alloc
+{
+public:
+  /* Optimal join sub-order */
+  struct st_position *positions;
+
+  uint tables; /* Number of tables in the sj-nest */
+
+  /* Expected #rows in the materialized table */
+  double rows;
+
+  /* 
+    Cost to materialize - execute the sub-join and write rows into temp.table
+  */
+  COST_VECT materialization_cost;
+
+  /* Cost to make one lookup in the temptable */
+  COST_VECT lookup_cost;
+  
+  /* Cost of scanning the materialized table */
+  COST_VECT scan_cost;
+
+  /* --- Execution structures ---------- */
+  
+  /*
+    TRUE <=> This structure is used for execution. We don't necessarily pick
+    sj-materialization, so some of SJ_MATERIALIZATION_INFO structures are not
+    used by materialization
+  */
+  bool is_used;
+  
+  bool materialized; /* TRUE <=> materialization already performed */
+  /*
+    TRUE  - the temptable is read with full scan
+    FALSE - we use the temptable for index lookups
+  */
+  bool is_sj_scan; 
+  
+  /* The temptable and its related info */
+  TMP_TABLE_PARAM sjm_table_param;
+  List<Item> sjm_table_cols;
+  TABLE *table;
+
+  /* Structure used to make index lookups */
+  struct st_table_ref *tab_ref;
+  Item *in_equality; /* See create_subq_in_equalities() */
+
+  Item *join_cond; /* See comments in make_join_select() */
+  Copy_field *copy_field; /* Needed for SJ_Materialization scan */
+};
+
+
 /* Structs used when sorting */
 
 typedef struct st_sort_field {
@@ -3205,7 +3568,7 @@ public:
     else
       db= db_arg;
   }
-  inline Table_ident(LEX_STRING table_arg) 
+  inline Table_ident(LEX_STRING table_arg)
     :table(table_arg), sel((SELECT_LEX_UNIT *)0)
   {
     db.str=0;
@@ -3243,15 +3606,15 @@ class user_var_entry
   Item_result type;
   bool unsigned_flag;
 
-  double val_real(my_bool *null_value);
-  longlong val_int(my_bool *null_value) const;
-  String *val_str(my_bool *null_value, String *str, uint decimals);
-  my_decimal *val_decimal(my_bool *null_value, my_decimal *result);
+  double val_real(bool *null_value);
+  longlong val_int(bool *null_value) const;
+  String *val_str(bool *null_value, String *str, uint decimals);
+  my_decimal *val_decimal(bool *null_value, my_decimal *result);
   DTCollation collation;
 };
 
 /*
-   Unique -- class for unique (removing of duplicates). 
+   Unique -- class for unique (removing of duplicates).
    Puts all values to the TREE. If the tree becomes too big,
    it's dumped to the file. User can request sorted values, or
    just iterate through them. In the last case tree merging is performed in
@@ -3285,13 +3648,13 @@ public:
   }
 
   bool get(TABLE *table);
-  static double get_use_cost(uint *buffer, uint nkeys, uint key_size, 
+  static double get_use_cost(uint *buffer, uint nkeys, uint key_size,
                              ulonglong max_in_memory_size);
-  inline static int get_cost_calc_buff_size(ulong nkeys, uint key_size, 
+  inline static int get_cost_calc_buff_size(ulong nkeys, uint key_size,
                                             ulonglong max_in_memory_size)
   {
     register ulonglong max_elems_in_tree=
-      (1 + max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size));
+      max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size);
     return (int) (sizeof(uint)*(1 + nkeys/max_elems_in_tree));
   }
 
@@ -3356,7 +3719,7 @@ class multi_update :public select_result_interceptor
   uint table_count;
   /*
    List of tables referenced in the CHECK OPTION condition of
-   the updated view excluding the updated table. 
+   the updated view excluding the updated table.
   */
   List <TABLE> unupdated_check_opt_tables;
   Copy_field *copy_field;
@@ -3556,6 +3919,149 @@ inline bool add_group_to_list(THD *thd, Item *item, bool asc)
   return thd->lex->current_select->add_group_to_list(thd, item, asc);
 }
 
+/* inline handler methods that need to know TABLE and THD structures */
+inline void handler::increment_statistics(ulong SSV::*offset) const
+{
+  status_var_increment(table->in_use->status_var.*offset);
+}
+
+inline void handler::decrement_statistics(ulong SSV::*offset) const
+{
+  status_var_decrement(table->in_use->status_var.*offset);
+}
+
+inline int handler::ha_index_read_map(uchar * buf, const uchar * key,
+                                      key_part_map keypart_map,
+                                      enum ha_rkey_function find_flag)
+{
+  DBUG_ASSERT(inited==INDEX);
+  increment_statistics(&SSV::ha_read_key_count);
+  int error= index_read_map(buf, key, keypart_map, find_flag);
+  if (!error)
+    update_index_statistics();
+  table->status=error ? STATUS_NOT_FOUND: 0;
+  return error;
+}
+
+inline int handler::ha_index_read_idx_map(uchar * buf, uint index,
+                                          const uchar * key,
+                                          key_part_map keypart_map,
+                                          enum ha_rkey_function find_flag)
+{
+  increment_statistics(&SSV::ha_read_key_count);
+  int error= index_read_idx_map(buf, index, key, keypart_map, find_flag);
+  if (!error)
+  {
+    rows_read++;
+    index_rows_read[index]++;
+  }
+  table->status=error ? STATUS_NOT_FOUND: 0;
+  return error;
+}
+
+inline int handler::ha_index_next(uchar * buf)
+{
+  DBUG_ASSERT(inited==INDEX);
+  increment_statistics(&SSV::ha_read_next_count);
+  int error= index_next(buf);
+  if (!error)
+    update_index_statistics();
+  table->status=error ? STATUS_NOT_FOUND: 0;
+  return error;
+}
+
+inline int handler::ha_index_prev(uchar * buf)
+{
+  DBUG_ASSERT(inited==INDEX);
+  increment_statistics(&SSV::ha_read_prev_count);
+  int error= index_prev(buf);
+  if (!error)
+    update_index_statistics();
+  table->status=error ? STATUS_NOT_FOUND: 0;
+  return error;
+}
+
+inline int handler::ha_index_first(uchar * buf)
+{
+  DBUG_ASSERT(inited==INDEX);
+  increment_statistics(&SSV::ha_read_first_count);
+  int error= index_first(buf);
+  if (!error)
+    update_index_statistics();
+  table->status=error ? STATUS_NOT_FOUND: 0;
+  return error;
+}
+
+inline int handler::ha_index_last(uchar * buf)
+{
+  DBUG_ASSERT(inited==INDEX);
+  increment_statistics(&SSV::ha_read_last_count);
+  int error= index_last(buf);
+  if (!error)
+    update_index_statistics();
+  table->status=error ? STATUS_NOT_FOUND: 0;
+  return error;
+}
+
+inline int handler::ha_index_next_same(uchar *buf, const uchar *key,
+                                       uint keylen)
+{
+  DBUG_ASSERT(inited==INDEX);
+  increment_statistics(&SSV::ha_read_next_count);
+  int error= index_next_same(buf, key, keylen);
+  if (!error)
+    update_index_statistics();
+  table->status=error ? STATUS_NOT_FOUND: 0;
+  return error;
+}
+
+inline int handler::ha_ft_read(uchar *buf)
+{
+  int error= ft_read(buf);
+  if (!error)
+    rows_read++;
+  table->status=error ? STATUS_NOT_FOUND: 0;
+  return error;
+}
+
+inline int handler::ha_rnd_next(uchar *buf)
+{
+  increment_statistics(&SSV::ha_read_rnd_next_count);
+  int error= rnd_next(buf);
+  if (!error)
+    rows_read++;
+  table->status=error ? STATUS_NOT_FOUND: 0;
+  return error;
+}
+
+inline int handler::ha_rnd_pos(uchar *buf, uchar *pos)
+{
+  increment_statistics(&SSV::ha_read_rnd_count);
+  int error= rnd_pos(buf, pos);
+  if (!error)
+    rows_read++;
+  table->status=error ? STATUS_NOT_FOUND: 0;
+  return error;
+}
+
+inline int handler::ha_rnd_pos_by_record(uchar *buf)
+{
+  int error= rnd_pos_by_record(buf);
+  if (!error)
+    rows_read++;
+  table->status=error ? STATUS_NOT_FOUND: 0;
+  return error;
+}
+
+inline int handler::ha_read_first_row(uchar *buf, uint primary_key)
+{
+  int error= read_first_row(buf, primary_key);
+  if (!error)
+    rows_read++;
+  table->status=error ? STATUS_NOT_FOUND: 0;
+  return error;
+}
+
 #endif /* MYSQL_SERVER */
 
 /**
@@ -3563,7 +4069,7 @@ inline bool add_group_to_list(THD *thd, Item *item, bool asc)
   three calling-info parameters.
 */
 extern "C"
-const char *set_thd_proc_info(void *thd_arg, const char *info,
+const char *set_thd_proc_info(THD *thd, const char *info,
                               const char *calling_func,
                               const char *calling_file,
                               const unsigned int calling_line);
diff --git a/sql/sql_connect.cc b/sql/sql_connect.cc
index 3ba6deed0a4..800ff7d7585 100644
--- a/sql/sql_connect.cc
+++ b/sql/sql_connect.cc
@@ -37,23 +37,12 @@
 #include "sql_acl.h"  // acl_getroot, NO_ACCESS, SUPER_ACL
 #include "sql_callback.h"
 
-#if defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY)
-/*
-  Without SSL the handshake consists of one packet. This packet
-  has both client capabilites and scrambled password.
-  With SSL the handshake might consist of two packets. If the first
-  packet (client capabilities) has CLIENT_SSL flag set, we have to
-  switch to SSL and read the second packet. The scrambled password
-  is in the second packet and client_capabilites field will be ignored.
-  Maybe it is better to accept flags other than CLIENT_SSL from the
-  second packet?
-*/
-#define SSL_HANDSHAKE_SIZE      2
-#define NORMAL_HANDSHAKE_SIZE   6
-#define MIN_HANDSHAKE_SIZE      2
-#else
-#define MIN_HANDSHAKE_SIZE      6
-#endif /* HAVE_OPENSSL && !EMBEDDED_LIBRARY */
+HASH global_user_stats, global_client_stats, global_table_stats;
+HASH global_index_stats;
+/* Protects the above global stats */
+extern mysql_mutex_t LOCK_global_user_client_stats;
+extern mysql_mutex_t LOCK_global_table_stats;
+extern mysql_mutex_t LOCK_global_index_stats;
 
 /*
   Get structure for logging connection data for the current user
@@ -62,9 +51,9 @@
 #ifndef NO_EMBEDDED_ACCESS_CHECKS
 static HASH hash_user_connections;
 
-static int get_or_create_user_conn(THD *thd, const char *user,
-				   const char *host,
-				   USER_RESOURCES *mqh)
+int get_or_create_user_conn(THD *thd, const char *user,
+                            const char *host,
+                            USER_RESOURCES *mqh)
 {
   int return_val= 0;
   size_t temp_len, user_len;
@@ -130,7 +119,6 @@ end:
     1	error
 */
 
-static
 int check_for_max_user_connections(THD *thd, USER_CONN *uc)
 {
   int error=0;
@@ -292,255 +280,6 @@ end:
 #endif /* NO_EMBEDDED_ACCESS_CHECKS */
 
 
-/**
-  Check if user exist and password supplied is correct.
-
-  @param  thd         thread handle, thd->security_ctx->{host,user,ip} are used
-  @param  command     originator of the check: now check_user is called
-                      during connect and change user procedures; used for
-                      logging.
-  @param  passwd      scrambled password received from client
-  @param  passwd_len  length of scrambled password
-  @param  db          database name to connect to, may be NULL
-  @param  check_count TRUE if establishing a new connection. In this case
-                      check that we have not exceeded the global
-                      max_connections limist
-
-  @note Host, user and passwd may point to communication buffer.
-  Current implementation does not depend on that, but future changes
-  should be done with this in mind; 'thd' is INOUT, all other params
-  are 'IN'.
-
-  @retval  0  OK; thd->security_ctx->user/master_access/priv_user/db_access and
-              thd->db are updated; OK is sent to the client.
-  @retval  1  error, e.g. access denied or handshake error, not sent to
-              the client. A message is pushed into the error stack.
-*/
-
-int
-check_user(THD *thd, enum enum_server_command command,
-	       const char *passwd, uint passwd_len, const char *db,
-	       bool check_count)
-{
-  DBUG_ENTER("check_user");
-  LEX_STRING db_str= { (char *) db, db ? strlen(db) : 0 };
-
-  /*
-    Clear thd->db as it points to something, that will be freed when
-    connection is closed. We don't want to accidentally free a wrong
-    pointer if connect failed. Also in case of 'CHANGE USER' failure,
-    current database will be switched to 'no database selected'.
-  */
-  thd->reset_db(NULL, 0);
-
-#ifdef NO_EMBEDDED_ACCESS_CHECKS
-  thd->main_security_ctx.master_access= GLOBAL_ACLS;       // Full rights
-  /* Change database if necessary */
-  if (db && db[0])
-  {
-    if (mysql_change_db(thd, &db_str, FALSE))
-      DBUG_RETURN(1);
-  }
-  my_ok(thd);
-  DBUG_RETURN(0);
-#else
-
-  my_bool opt_secure_auth_local;
-  mysql_mutex_lock(&LOCK_global_system_variables);
-  opt_secure_auth_local= opt_secure_auth;
-  mysql_mutex_unlock(&LOCK_global_system_variables);
-  
-  /*
-    If the server is running in secure auth mode, short scrambles are 
-    forbidden.
-  */
-  if (opt_secure_auth_local && passwd_len == SCRAMBLE_LENGTH_323)
-  {
-    my_error(ER_NOT_SUPPORTED_AUTH_MODE, MYF(0));
-    general_log_print(thd, COM_CONNECT, ER(ER_NOT_SUPPORTED_AUTH_MODE));
-    DBUG_RETURN(1);
-  }
-  if (passwd_len != 0 &&
-      passwd_len != SCRAMBLE_LENGTH &&
-      passwd_len != SCRAMBLE_LENGTH_323)
-  {
-    my_error(ER_HANDSHAKE_ERROR, MYF(0), thd->main_security_ctx.host_or_ip);
-    DBUG_RETURN(1);
-  }
-
-  USER_RESOURCES ur;
-  int res= acl_getroot(thd, &ur, passwd, passwd_len);
-#ifndef EMBEDDED_LIBRARY
-  if (res == -1)
-  {
-    /*
-      This happens when client (new) sends password scrambled with
-      scramble(), but database holds old value (scrambled with
-      scramble_323()). Here we please client to send scrambled_password
-      in old format.
-    */
-    NET *net= &thd->net;
-    if (opt_secure_auth_local)
-    {
-      my_error(ER_SERVER_IS_IN_SECURE_AUTH_MODE, MYF(0),
-               thd->main_security_ctx.user,
-               thd->main_security_ctx.host_or_ip);
-      general_log_print(thd, COM_CONNECT, ER(ER_SERVER_IS_IN_SECURE_AUTH_MODE),
-                        thd->main_security_ctx.user,
-                        thd->main_security_ctx.host_or_ip);
-      DBUG_RETURN(1);
-    }
-    /* We have to read very specific packet size */
-    if (send_old_password_request(thd) ||
-        my_net_read(net) != SCRAMBLE_LENGTH_323 + 1)
-    {
-      inc_host_errors(thd->main_security_ctx.ip);
-
-      my_error(ER_HANDSHAKE_ERROR, MYF(0), thd->main_security_ctx.host_or_ip);
-      DBUG_RETURN(1);
-    }
-    /* Final attempt to check the user based on reply */
-    /* So as passwd is short, errcode is always >= 0 */
-    res= acl_getroot(thd, &ur, (char *) net->read_pos, SCRAMBLE_LENGTH_323);
-  }
-#endif /*EMBEDDED_LIBRARY*/
-  /* here res is always >= 0 */
-  if (res == 0)
-  {
-    if (!(thd->main_security_ctx.master_access &
-          NO_ACCESS)) // authentication is OK
-    {
-      DBUG_PRINT("info",
-                 ("Capabilities: %lu  packet_length: %ld  Host: '%s'  "
-                  "Login user: '%s' Priv_user: '%s'  Using password: %s "
-                  "Access: %lu  db: '%s'",
-                  thd->client_capabilities,
-                  thd->max_client_packet_length,
-                  thd->main_security_ctx.host_or_ip,
-                  thd->main_security_ctx.user,
-                  thd->main_security_ctx.priv_user,
-                  passwd_len ? "yes": "no",
-                  thd->main_security_ctx.master_access,
-                  (thd->db ? thd->db : "*none*")));
-
-      if (check_count)
-      {
-        mysql_mutex_lock(&LOCK_connection_count);
-        bool count_ok= connection_count <= max_connections ||
-                       (thd->main_security_ctx.master_access & SUPER_ACL);
-        mysql_mutex_unlock(&LOCK_connection_count);
-
-        if (!count_ok)
-        {                                         // too many connections
-          my_error(ER_CON_COUNT_ERROR, MYF(0));
-          DBUG_RETURN(1);
-        }
-      }
-
-      /*
-        Log the command before authentication checks, so that the user can
-        check the log for the tried login tried and also to detect
-        break-in attempts.
-      */
-      general_log_print(thd, command,
-                        (thd->main_security_ctx.priv_user ==
-                         thd->main_security_ctx.user ?
-                         (char*) "%s@%s on %s" :
-                         (char*) "%s@%s as anonymous on %s"),
-                        thd->main_security_ctx.user,
-                        thd->main_security_ctx.host_or_ip,
-                        db ? db : (char*) "");
-
-      /*
-        This is the default access rights for the current database.  It's
-        set to 0 here because we don't have an active database yet (and we
-        may not have an active database to set.
-      */
-      thd->main_security_ctx.db_access=0;
-
-      /* Don't allow user to connect if he has done too many queries */
-      if ((ur.questions || ur.updates || ur.conn_per_hour || ur.user_conn ||
-	   global_system_variables.max_user_connections) &&
-	  get_or_create_user_conn(thd,
-            (opt_old_style_user_limits ? thd->main_security_ctx.user :
-             thd->main_security_ctx.priv_user),
-            (opt_old_style_user_limits ? thd->main_security_ctx.host_or_ip :
-             thd->main_security_ctx.priv_host),
-            &ur))
-      {
-        /* The error is set by get_or_create_user_conn(). */
-	DBUG_RETURN(1);
-      }
-      if (thd->user_connect &&
-	  (thd->user_connect->user_resources.conn_per_hour ||
-	   thd->user_connect->user_resources.user_conn ||
-	   global_system_variables.max_user_connections) &&
-	  check_for_max_user_connections(thd, thd->user_connect))
-      {
-        /* The error is set in check_for_max_user_connections(). */
-        DBUG_RETURN(1);
-      }
-
-      /* Change database if necessary */
-      if (db && db[0])
-      {
-        if (mysql_change_db(thd, &db_str, FALSE))
-        {
-          /* mysql_change_db() has pushed the error message. */
-          if (thd->user_connect)
-          {
-            decrease_user_connections(thd->user_connect);
-            thd->user_connect= 0;
-          }
-          DBUG_RETURN(1);
-        }
-      }
-      my_ok(thd);
-      thd->password= test(passwd_len);          // remember for error messages 
-#ifndef EMBEDDED_LIBRARY
-      /*
-        Allow the network layer to skip big packets. Although a malicious
-        authenticated session might use this to trick the server to read
-        big packets indefinitely, this is a previously established behavior
-        that needs to be preserved as to not break backwards compatibility.
-      */
-      thd->net.skip_big_packet= TRUE;
-#endif
-      /* Ready to handle queries */
-      DBUG_RETURN(0);
-    }
-  }
-  else if (res == 2) // client gave short hash, server has long hash
-  {
-    my_error(ER_NOT_SUPPORTED_AUTH_MODE, MYF(0));
-    general_log_print(thd, COM_CONNECT, ER(ER_NOT_SUPPORTED_AUTH_MODE));
-    DBUG_RETURN(1);
-  }
-  my_error(ER_ACCESS_DENIED_ERROR, MYF(0),
-           thd->main_security_ctx.user,
-           thd->main_security_ctx.host_or_ip,
-           passwd_len ? ER(ER_YES) : ER(ER_NO));
-  general_log_print(thd, COM_CONNECT, ER(ER_ACCESS_DENIED_ERROR),
-                    thd->main_security_ctx.user,
-                    thd->main_security_ctx.host_or_ip,
-                    passwd_len ? ER(ER_YES) : ER(ER_NO));
-  /*
-    Log access denied messages to the error log when log-warnings = 2
-    so that the overhead of the general query log is not required to track
-    failed connections.
-  */
-  if (global_system_variables.log_warnings > 1)
-  {
-    sql_print_warning(ER(ER_ACCESS_DENIED_ERROR),
-                      thd->main_security_ctx.user,
-                      thd->main_security_ctx.host_or_ip,
-                      passwd_len ? ER(ER_YES) : ER(ER_NO));
-  }
-  DBUG_RETURN(1);
-#endif /* NO_EMBEDDED_ACCESS_CHECKS */
-}
-
-
 /*
   Check for maximum allowable user connections, if the mysqld server is
   started with corresponding variable that is greater then 0.
@@ -563,10 +302,13 @@ extern "C" void free_user(struct user_conn *uc)
 void init_max_user_conn(void)
 {
 #ifndef NO_EMBEDDED_ACCESS_CHECKS
-  (void)
-    my_hash_init(&hash_user_connections,system_charset_info,max_connections,
+  if (my_hash_init(&hash_user_connections,system_charset_info,max_connections,
                  0,0, (my_hash_get_key) get_key_conn,
-                 (my_hash_free_key) free_user, 0);
+                 (my_hash_free_key) free_user, 0))
+  {
+    sql_print_error("Initializing hash_user_connections failed.");
+    exit(1);
+  }
 #endif
 }
 
@@ -620,6 +362,445 @@ void reset_mqh(LEX_USER *lu, bool get_them= 0)
 #endif /* NO_EMBEDDED_ACCESS_CHECKS */
 }
 
+/*****************************************************************************
+ Handle users statistics
+*****************************************************************************/
+
+/* 'mysql_system_user' is used for when the user is not defined for a THD. */
+static const char mysql_system_user[]= "#mysql_system#";
+
+// Returns 'user' if it's not NULL.  Returns 'mysql_system_user' otherwise.
+static const char * get_valid_user_string(char* user)
+{
+  return user ? user : mysql_system_user;
+}
+
+/*
+  Returns string as 'IP' for the client-side of the connection represented by
+  'client'. Does not allocate memory. May return "".
+*/
+
+static const char *get_client_host(THD *client)
+{
+  return client->security_ctx->host_or_ip[0] ?
+    client->security_ctx->host_or_ip :
+    client->security_ctx->host ? client->security_ctx->host : "";
+}
+
+extern "C" uchar *get_key_user_stats(USER_STATS *user_stats, size_t *length,
+                                     my_bool not_used __attribute__((unused)))
+{
+  *length= user_stats->user_name_length;
+  return (uchar*) user_stats->user;
+}
+
+void free_user_stats(USER_STATS* user_stats)
+{
+  my_free(user_stats, MYF(0));
+}
+
+void init_user_stats(USER_STATS *user_stats,
+                     const char *user,
+                     size_t user_length,
+                     const char *priv_user,
+                     uint total_connections,
+                     uint concurrent_connections,
+                     time_t connected_time,
+                     double busy_time,
+                     double cpu_time,
+                     ulonglong bytes_received,
+                     ulonglong bytes_sent,
+                     ulonglong binlog_bytes_written,
+                     ha_rows rows_sent,
+                     ha_rows rows_read,
+                     ha_rows rows_inserted,
+                     ha_rows rows_deleted,
+                     ha_rows rows_updated,
+                     ulonglong select_commands,
+                     ulonglong update_commands,
+                     ulonglong other_commands,
+                     ulonglong commit_trans,
+                     ulonglong rollback_trans,
+                     ulonglong denied_connections,
+                     ulonglong lost_connections,
+                     ulonglong access_denied_errors,
+                     ulonglong empty_queries)
+{
+  DBUG_ENTER("init_user_stats");
+  DBUG_PRINT("enter", ("user: %s  priv_user: %s", user, priv_user));
+
+  user_length= min(user_length, sizeof(user_stats->user)-1);
+  memcpy(user_stats->user, user, user_length);
+  user_stats->user[user_length]= 0;
+  user_stats->user_name_length= user_length;
+  strmake(user_stats->priv_user, priv_user, sizeof(user_stats->priv_user)-1);
+
+  user_stats->total_connections= total_connections;
+  user_stats->concurrent_connections= concurrent_connections;
+  user_stats->connected_time= connected_time;
+  user_stats->busy_time= busy_time;
+  user_stats->cpu_time= cpu_time;
+  user_stats->bytes_received= bytes_received;
+  user_stats->bytes_sent= bytes_sent;
+  user_stats->binlog_bytes_written= binlog_bytes_written;
+  user_stats->rows_sent= rows_sent;
+  user_stats->rows_updated= rows_updated;
+  user_stats->rows_read= rows_read;
+  user_stats->select_commands= select_commands;
+  user_stats->update_commands= update_commands;
+  user_stats->other_commands= other_commands;
+  user_stats->commit_trans= commit_trans;
+  user_stats->rollback_trans= rollback_trans;
+  user_stats->denied_connections= denied_connections;
+  user_stats->lost_connections= lost_connections;
+  user_stats->access_denied_errors= access_denied_errors;
+  user_stats->empty_queries= empty_queries;
+  DBUG_VOID_RETURN;
+}
+
+
+#ifdef COMPLEAT_PATCH_NOT_ADDED_YET
+
+void add_user_stats(USER_STATS *user_stats,
+                    uint total_connections,
+                    uint concurrent_connections,
+                    time_t connected_time,
+                    double busy_time,
+                    double cpu_time,
+                    ulonglong bytes_received,
+                    ulonglong bytes_sent,
+                    ulonglong binlog_bytes_written,
+                    ha_rows rows_sent,
+                    ha_rows rows_read,
+                    ha_rows rows_inserted,
+                    ha_rows rows_deleted,
+                    ha_rows rows_updated,
+                    ulonglong select_commands,
+                    ulonglong update_commands,
+                    ulonglong other_commands,
+                    ulonglong commit_trans,
+                    ulonglong rollback_trans,
+                    ulonglong denied_connections,
+                    ulonglong lost_connections,
+                    ulonglong access_denied_errors,
+                    ulonglong empty_queries)
+{
+  user_stats->total_connections+= total_connections;
+  user_stats->concurrent_connections+= concurrent_connections;
+  user_stats->connected_time+= connected_time;
+  user_stats->busy_time+= busy_time;
+  user_stats->cpu_time+= cpu_time;
+  user_stats->bytes_received+= bytes_received;
+  user_stats->bytes_sent+= bytes_sent;
+  user_stats->binlog_bytes_written+= binlog_bytes_written;
+  user_stats->rows_sent+=  rows_sent;
+  user_stats->rows_inserted+= rows_inserted;
+  user_stats->rows_deleted+=  rows_deleted;
+  user_stats->rows_updated+=  rows_updated;
+  user_stats->rows_read+= rows_read;
+  user_stats->select_commands+= select_commands;
+  user_stats->update_commands+= update_commands;
+  user_stats->other_commands+= other_commands;
+  user_stats->commit_trans+= commit_trans;
+  user_stats->rollback_trans+= rollback_trans;
+  user_stats->denied_connections+= denied_connections;
+  user_stats->lost_connections+= lost_connections;
+  user_stats->access_denied_errors+= access_denied_errors;
+  user_stats->empty_queries+= empty_queries;
+}
+#endif
+
+
+void init_global_user_stats(void)
+{
+  if (hash_init(&global_user_stats, system_charset_info, max_connections,
+                0, 0, (hash_get_key) get_key_user_stats,
+                (hash_free_key)free_user_stats, 0))
+  {
+    sql_print_error("Initializing global_user_stats failed.");
+    exit(1);
+  }
+}
+
+void init_global_client_stats(void)
+{
+  if (hash_init(&global_client_stats, system_charset_info, max_connections,
+                0, 0, (hash_get_key) get_key_user_stats,
+                (hash_free_key)free_user_stats, 0))
+  {
+    sql_print_error("Initializing global_client_stats failed.");
+    exit(1);
+  }
+}
+
+extern "C" uchar *get_key_table_stats(TABLE_STATS *table_stats, size_t *length,
+                                      my_bool not_used __attribute__((unused)))
+{
+  *length= table_stats->table_name_length;
+  return (uchar*) table_stats->table;
+}
+
+extern "C" void free_table_stats(TABLE_STATS* table_stats)
+{
+  my_free(table_stats, MYF(0));
+}
+
+void init_global_table_stats(void)
+{
+  if (hash_init(&global_table_stats, system_charset_info, max_connections,
+                0, 0, (hash_get_key) get_key_table_stats,
+                (hash_free_key)free_table_stats, 0)) {
+    sql_print_error("Initializing global_table_stats failed.");
+    exit(1);
+  }
+}
+
+extern "C" uchar *get_key_index_stats(INDEX_STATS *index_stats, size_t *length,
+                                     my_bool not_used __attribute__((unused)))
+{
+  *length= index_stats->index_name_length;
+  return (uchar*) index_stats->index;
+}
+
+extern "C" void free_index_stats(INDEX_STATS* index_stats)
+{
+  my_free(index_stats, MYF(0));
+}
+
+void init_global_index_stats(void)
+{
+  if (hash_init(&global_index_stats, system_charset_info, max_connections,
+                0, 0, (hash_get_key) get_key_index_stats,
+                (hash_free_key)free_index_stats, 0))
+  {
+    sql_print_error("Initializing global_index_stats failed.");
+    exit(1);
+  }
+}
+
+
+void free_global_user_stats(void)
+{
+  hash_free(&global_user_stats);
+}
+
+void free_global_table_stats(void)
+{
+  hash_free(&global_table_stats);
+}
+
+void free_global_index_stats(void)
+{
+  hash_free(&global_index_stats);
+}
+
+void free_global_client_stats(void)
+{
+  hash_free(&global_client_stats);
+}
+
+/*
+  Increments the global stats connection count for an entry from
+  global_client_stats or global_user_stats. Returns 0 on success
+  and 1 on error.
+*/
+
+static bool increment_count_by_name(const char *name, size_t name_length,
+                                   const char *role_name,
+                                   HASH *users_or_clients, THD *thd)
+{
+  USER_STATS *user_stats;
+
+  if (!(user_stats= (USER_STATS*) hash_search(users_or_clients, (uchar*) name,
+                                              name_length)))
+  {
+    /* First connection for this user or client */
+    if (!(user_stats= ((USER_STATS*)
+                       my_malloc(sizeof(USER_STATS),
+                                 MYF(MY_WME | MY_ZEROFILL)))))
+      return TRUE;                              // Out of memory
+
+    init_user_stats(user_stats, name, name_length, role_name,
+                    0, 0,      // connections
+                    0, 0, 0,   // time
+                    0, 0, 0,   // bytes sent, received and written
+                    0, 0,      // Rows sent and read
+                    0, 0, 0,   // rows inserted, deleted and updated
+                    0, 0, 0,   // select, update and other commands
+                    0, 0,      // commit and rollback trans
+                    thd->status_var.access_denied_errors,
+                    0,         // lost connections
+                    0,         // access denied errors
+                    0);        // empty queries
+
+    if (my_hash_insert(users_or_clients, (uchar*)user_stats))
+    {
+      my_free(user_stats, 0);
+      return TRUE;                              // Out of memory
+    }
+  }
+  user_stats->total_connections++;
+  return FALSE;
+}
+
+
+/*
+  Increments the global user and client stats connection count.
+
+  @param use_lock  if true, LOCK_global_user_client_stats will be locked
+
+  @retval 0 ok
+  @retval 1 error.
+*/
+
+#ifndef EMBEDDED_LIBRARY
+static bool increment_connection_count(THD* thd, bool use_lock)
+{
+  const char *user_string= get_valid_user_string(thd->main_security_ctx.user);
+  const char *client_string= get_client_host(thd);
+  bool return_value= FALSE;
+
+  if (!thd->userstat_running)
+    return FALSE;
+
+  if (use_lock)
+    pthread_mutex_lock(&LOCK_global_user_client_stats);
+
+  if (increment_count_by_name(user_string, strlen(user_string), user_string,
+                              &global_user_stats, thd))
+  {
+    return_value= TRUE;
+    goto end;
+  }
+  if (increment_count_by_name(client_string, strlen(client_string),
+                              user_string, &global_client_stats, thd))
+  {
+    return_value= TRUE;
+    goto end;
+  }
+
+end:
+  if (use_lock)
+    pthread_mutex_unlock(&LOCK_global_user_client_stats);
+  return return_value;
+}
+#endif
+
+/*
+  Used to update the global user and client stats
+*/
+
+static void update_global_user_stats_with_user(THD *thd,
+                                               USER_STATS *user_stats,
+                                               time_t now)
+{
+  DBUG_ASSERT(thd->userstat_running);
+
+  user_stats->connected_time+= now - thd->last_global_update_time;
+  user_stats->busy_time+=  (thd->status_var.busy_time -
+                            thd->org_status_var.busy_time);
+  user_stats->cpu_time+=   (thd->status_var.cpu_time -
+                            thd->org_status_var.cpu_time); 
+  /*
+    This is handle specially as bytes_recieved is incremented BEFORE
+    org_status_var is copied.
+  */
+  user_stats->bytes_received+= (thd->org_status_var.bytes_received-
+                                thd->start_bytes_received);
+  user_stats->bytes_sent+= (thd->status_var.bytes_sent -
+                            thd->org_status_var.bytes_sent);
+  user_stats->binlog_bytes_written+=
+    (thd->status_var.binlog_bytes_written -
+     thd->org_status_var.binlog_bytes_written);
+  user_stats->rows_read+=      (thd->status_var.rows_read -
+                                thd->org_status_var.rows_read);
+  user_stats->rows_sent+=      (thd->status_var.rows_sent -
+                                thd->org_status_var.rows_sent);
+  user_stats->rows_inserted+=  (thd->status_var.ha_write_count -
+                                thd->org_status_var.ha_write_count);
+  user_stats->rows_deleted+=   (thd->status_var.ha_delete_count -
+                                thd->org_status_var.ha_delete_count);
+  user_stats->rows_updated+=   (thd->status_var.ha_update_count -
+                                thd->org_status_var.ha_update_count);
+  user_stats->select_commands+= thd->select_commands;
+  user_stats->update_commands+= thd->update_commands;
+  user_stats->other_commands+=  thd->other_commands;
+  user_stats->commit_trans+=   (thd->status_var.ha_commit_count -
+                                thd->org_status_var.ha_commit_count);
+  user_stats->rollback_trans+= (thd->status_var.ha_rollback_count +
+                                thd->status_var.ha_savepoint_rollback_count -
+                                thd->org_status_var.ha_rollback_count -
+                                thd->org_status_var.
+                                ha_savepoint_rollback_count);
+  user_stats->access_denied_errors+=
+    (thd->status_var.access_denied_errors -
+     thd->org_status_var.access_denied_errors);
+  user_stats->empty_queries+=   (thd->status_var.empty_queries -
+                                 thd->org_status_var.empty_queries);
+
+  /* The following can only contain 0 or 1 and then connection ends */
+  user_stats->denied_connections+= thd->status_var.access_denied_errors;
+  user_stats->lost_connections+=   thd->status_var.lost_connections;
+}
+
+
+/*  Updates the global stats of a user or client */
+void update_global_user_stats(THD *thd, bool create_user, time_t now)
+{
+  const char *user_string, *client_string;
+  USER_STATS *user_stats;
+  size_t user_string_length, client_string_length;
+  DBUG_ASSERT(thd->userstat_running);
+
+  user_string= get_valid_user_string(thd->main_security_ctx.user);
+  user_string_length= strlen(user_string);
+  client_string= get_client_host(thd);
+  client_string_length= strlen(client_string);
+
+  pthread_mutex_lock(&LOCK_global_user_client_stats);
+
+  // Update by user name
+  if ((user_stats= (USER_STATS*) hash_search(&global_user_stats,
+                                             (uchar*) user_string,
+                                             user_string_length)))
+  {
+    /* Found user. */
+    update_global_user_stats_with_user(thd, user_stats, now);
+  }
+  else
+  {
+    /* Create the entry */
+    if (create_user)
+    {
+      increment_count_by_name(user_string, user_string_length, user_string,
+                              &global_user_stats, thd);
+    }
+  }
+
+  /* Update by client IP */
+  if ((user_stats= (USER_STATS*)hash_search(&global_client_stats,
+                                            (uchar*) client_string,
+                                            client_string_length)))
+  {
+    // Found by client IP
+    update_global_user_stats_with_user(thd, user_stats, now);
+  }
+  else
+  {
+    // Create the entry
+    if (create_user)
+    {
+      increment_count_by_name(client_string, client_string_length,
+                              user_string, &global_client_stats, thd);
+    }
+  }
+  /* Reset variables only used for counting */
+  thd->select_commands= thd->update_commands= thd->other_commands= 0;
+  thd->last_global_update_time= now;
+
+  pthread_mutex_unlock(&LOCK_global_user_client_stats);
+}
+
 
 void thd_init_client_charset(THD *thd, uint cs_number)
 {
@@ -673,17 +854,14 @@ bool init_new_connection_handler_thread()
     thd  thread handle
 
   RETURN
-     0  success, OK is sent to user, thd is updated.
-    -1  error, which is sent to user
-   > 0  error code (not sent to user)
+     0  success, thd is updated.
+     1  error
 */
 
 static int check_connection(THD *thd)
 {
   uint connect_errors= 0;
   NET *net= &thd->net;
-  ulong pkt_len= 0;
-  char *end;
 
   DBUG_PRINT("info",
              ("New connection received on %s", vio_description(net->vio)));
@@ -748,203 +926,10 @@ static int check_connection(THD *thd)
   }
   vio_keepalive(net->vio, TRUE);
   
-  ulong server_capabilites;
-  {
-    /* buff[] needs to big enough to hold the server_version variable */
-    char buff[SERVER_VERSION_LENGTH + 1 + SCRAMBLE_LENGTH + 1 + 64];
-    server_capabilites= CLIENT_BASIC_FLAGS;
-
-    if (opt_using_transactions)
-      server_capabilites|= CLIENT_TRANSACTIONS;
-#ifdef HAVE_COMPRESS
-    server_capabilites|= CLIENT_COMPRESS;
-#endif /* HAVE_COMPRESS */
-#if defined(HAVE_OPENSSL)
-    if (ssl_acceptor_fd)
-    {
-      server_capabilites |= CLIENT_SSL;       /* Wow, SSL is available! */
-      server_capabilites |= CLIENT_SSL_VERIFY_SERVER_CERT;
-    }
-#endif /* HAVE_OPENSSL */
-
-    end= strnmov(buff, server_version, SERVER_VERSION_LENGTH) + 1;
-    int4store((uchar*) end, thd->thread_id);
-    end+= 4;
-    /*
-      So as check_connection is the only entry point to authorization
-      procedure, scramble is set here. This gives us new scramble for
-      each handshake.
-    */
-    create_random_string(thd->scramble, SCRAMBLE_LENGTH, &thd->rand);
-    /*
-      Old clients does not understand long scrambles, but can ignore packet
-      tail: that's why first part of the scramble is placed here, and second
-      part at the end of packet.
-    */
-    end= strmake(end, thd->scramble, SCRAMBLE_LENGTH_323) + 1;
-   
-    int2store(end, server_capabilites);
-    /* write server characteristics: up to 16 bytes allowed */
-    end[2]=(char) default_charset_info->number;
-    int2store(end+3, thd->server_status);
-    bzero(end+5, 13);
-    end+= 18;
-    /* write scramble tail */
-    end= strmake(end, thd->scramble + SCRAMBLE_LENGTH_323, 
-                 SCRAMBLE_LENGTH - SCRAMBLE_LENGTH_323) + 1;
-
-    /* At this point we write connection message and read reply */
-    if (net_write_command(net, (uchar) protocol_version, (uchar*) "", 0,
-                          (uchar*) buff, (size_t) (end-buff)) ||
-	(pkt_len= my_net_read(net)) == packet_error ||
-	pkt_len < MIN_HANDSHAKE_SIZE)
-    {
-      inc_host_errors(thd->main_security_ctx.ip);
-
-      my_error(ER_HANDSHAKE_ERROR, MYF(0),
-               thd->main_security_ctx.host_or_ip);
-      return 1;
-    }
-  }
-#ifdef _CUSTOMCONFIG_
-#include "_cust_sql_parse.h"
-#endif
-  if (connect_errors)
-    reset_host_errors(thd->main_security_ctx.ip);
   if (thd->packet.alloc(thd->variables.net_buffer_length))
     return 1; /* The error is set by alloc(). */
 
-  thd->client_capabilities= uint2korr(net->read_pos);
-  if (thd->client_capabilities & CLIENT_PROTOCOL_41)
-  {
-    thd->client_capabilities|= ((ulong) uint2korr(net->read_pos+2)) << 16;
-    thd->max_client_packet_length= uint4korr(net->read_pos+4);
-    DBUG_PRINT("info", ("client_character_set: %d", (uint) net->read_pos[8]));
-    thd_init_client_charset(thd, (uint) net->read_pos[8]);
-    thd->update_charset();
-    end= (char*) net->read_pos+32;
-  }
-  else
-  {
-    thd->max_client_packet_length= uint3korr(net->read_pos+2);
-    end= (char*) net->read_pos+5;
-  }
-  /*
-    Disable those bits which are not supported by the server.
-    This is a precautionary measure, if the client lies. See Bug#27944.
-  */
-  thd->client_capabilities&= server_capabilites;
-
-  if (thd->client_capabilities & CLIENT_IGNORE_SPACE)
-    thd->variables.sql_mode|= MODE_IGNORE_SPACE;
-#if defined(HAVE_OPENSSL)
-  DBUG_PRINT("info", ("client capabilities: %lu", thd->client_capabilities));
-  if (thd->client_capabilities & CLIENT_SSL)
-  {
-    /* Do the SSL layering. */
-    if (!ssl_acceptor_fd)
-    {
-      inc_host_errors(thd->main_security_ctx.ip);
-      my_error(ER_HANDSHAKE_ERROR, MYF(0), thd->main_security_ctx.host_or_ip);
-      return 1;
-    }
-    DBUG_PRINT("info", ("IO layer change in progress..."));
-    if (sslaccept(ssl_acceptor_fd, net->vio, net->read_timeout))
-    {
-      DBUG_PRINT("error", ("Failed to accept new SSL connection"));
-      inc_host_errors(thd->main_security_ctx.ip);
-
-      my_error(ER_HANDSHAKE_ERROR, MYF(0), thd->main_security_ctx.host_or_ip);
-      return 1;
-    }
-    DBUG_PRINT("info", ("Reading user information over SSL layer"));
-    if ((pkt_len= my_net_read(net)) == packet_error ||
-	pkt_len < NORMAL_HANDSHAKE_SIZE)
-    {
-      DBUG_PRINT("error", ("Failed to read user information (pkt_len= %lu)",
-			   pkt_len));
-      inc_host_errors(thd->main_security_ctx.ip);
-
-      my_error(ER_HANDSHAKE_ERROR, MYF(0), thd->main_security_ctx.host_or_ip);
-      return 1;
-    }
-  }
-#endif /* HAVE_OPENSSL */
-
-  if (end >= (char*) net->read_pos+ pkt_len +2)
-  {
-    inc_host_errors(thd->main_security_ctx.ip);
-
-    my_error(ER_HANDSHAKE_ERROR, MYF(0), thd->main_security_ctx.host_or_ip);
-    return 1;
-  }
-
-  if (thd->client_capabilities & CLIENT_INTERACTIVE)
-    thd->variables.net_wait_timeout= thd->variables.net_interactive_timeout;
-  if ((thd->client_capabilities & CLIENT_TRANSACTIONS) &&
-      opt_using_transactions)
-    net->return_status= &thd->server_status;
-
-  char *user= end;
-  char *passwd= strend(user)+1;
-  uint user_len= passwd - user - 1;
-  char *db= passwd;
-  char db_buff[NAME_LEN + 1];           // buffer to store db in utf8
-  char user_buff[USERNAME_LENGTH + 1];	// buffer to store user in utf8
-  uint dummy_errors;
-
-  /*
-    Old clients send null-terminated string as password; new clients send
-    the size (1 byte) + string (not null-terminated). Hence in case of empty
-    password both send '\0'.
-
-    This strlen() can't be easily deleted without changing protocol.
-
-    Cast *passwd to an unsigned char, so that it doesn't extend the sign for
-    *passwd > 127 and become 2**32-127+ after casting to uint.
-  */
-  uint passwd_len= thd->client_capabilities & CLIENT_SECURE_CONNECTION ?
-    (uchar)(*passwd++) : strlen(passwd);
-  db= thd->client_capabilities & CLIENT_CONNECT_WITH_DB ?
-    db + passwd_len + 1 : 0;
-  /* strlen() can't be easily deleted without changing protocol */
-  uint db_len= db ? strlen(db) : 0;
-
-  if (passwd + passwd_len + db_len > (char *)net->read_pos + pkt_len)
-  {
-    inc_host_errors(thd->main_security_ctx.ip);
-
-    my_error(ER_HANDSHAKE_ERROR, MYF(0), thd->main_security_ctx.host_or_ip);
-    return 1;
-  }
-
-  /* Since 4.1 all database names are stored in utf8 */
-  if (db)
-  {
-    db_buff[copy_and_convert(db_buff, sizeof(db_buff)-1,
-                             system_charset_info,
-                             db, db_len,
-                             thd->charset(), &dummy_errors)]= 0;
-    db= db_buff;
-  }
-
-  user_buff[user_len= copy_and_convert(user_buff, sizeof(user_buff)-1,
-                                       system_charset_info, user, user_len,
-                                       thd->charset(), &dummy_errors)]= '\0';
-  user= user_buff;
-
-  /* If username starts and ends in "'", chop them off */
-  if (user_len > 1 && user[0] == '\'' && user[user_len - 1] == '\'')
-  {
-    user[user_len-1]= 0;
-    user++;
-    user_len-= 2;
-  }
-
-  my_free(thd->main_security_ctx.user);
-  if (!(thd->main_security_ctx.user= my_strdup(user, MYF(MY_WME))))
-    return 1; /* The error is set by my_strdup(). */
-  return check_user(thd, COM_CONNECT, passwd, passwd_len, db, TRUE);
+  return acl_authenticate(thd, connect_errors, 0);
 }
 
 
@@ -967,7 +952,7 @@ bool setup_connection_thread_globals(THD *thd)
   {
     close_connection(thd, ER_OUT_OF_RESOURCES, 1);
     statistic_increment(aborted_connects,&LOCK_status);
-    MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 0));
+    MYSQL_CALLBACK(thd->scheduler, end_thread, (thd, 0));
     return 1;                                   // Error
   }
   return 0;
@@ -989,7 +974,6 @@ bool setup_connection_thread_globals(THD *thd)
     1    error
 */
 
-
 bool login_connection(THD *thd)
 {
   NET *net= &thd->net;
@@ -1017,6 +1001,14 @@ bool login_connection(THD *thd)
   /* Connect completed, set read/write timeouts back to default */
   my_net_set_read_timeout(net, thd->variables.net_read_timeout);
   my_net_set_write_timeout(net, thd->variables.net_write_timeout);
+
+  /*  Updates global user connection stats. */
+  if (increment_connection_count(thd, TRUE))
+  {
+    net_send_error(thd, ER_OUTOFMEMORY);  // Out of memory
+    DBUG_RETURN(1);
+  }
+
   DBUG_RETURN(0);
 }
 
@@ -1046,6 +1038,7 @@ void end_connection(THD *thd)
   if (thd->killed || (net->error && net->vio != 0))
   {
     statistic_increment(aborted_threads,&LOCK_status);
+    status_var_increment(thd->status_var.lost_connections);
   }
 
   if (net->error && net->vio != 0)
@@ -1138,11 +1131,11 @@ void do_handle_one_connection(THD *thd_arg)
 
   thd->thr_create_utime= my_micro_time();
 
-  if (MYSQL_CALLBACK_ELSE(thread_scheduler, init_new_connection_thread, (), 0))
+  if (MYSQL_CALLBACK(thread_scheduler, init_new_connection_thread, ()))
   {
     close_connection(thd, ER_OUT_OF_RESOURCES, 1);
     statistic_increment(aborted_connects,&LOCK_status);
-    MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 0));
+    MYSQL_CALLBACK(thd->thread_scheduler, end_thread, (thd, 0));
     return;
   }
 
@@ -1175,10 +1168,14 @@ void do_handle_one_connection(THD *thd_arg)
   for (;;)
   {
     NET *net= &thd->net;
+    bool create_user= TRUE;
 
     lex_start(thd);
     if (login_connection(thd))
+    {
+      create_user= FALSE;
       goto end_thread;
+    }      
 
     MYSQL_CONNECTION_START(thd->thread_id, thd->security_ctx->priv_user,
                            (char *) thd->security_ctx->host_or_ip);
@@ -1196,12 +1193,15 @@ void do_handle_one_connection(THD *thd_arg)
    
 end_thread:
     close_connection(thd, 0, 1);
-    if (MYSQL_CALLBACK_ELSE(thread_scheduler, end_thread, (thd, 1), 0))
+
+    if (thd->userstat_running)
+      update_global_user_stats(thd, create_user, time(NULL));
+
+    if (MYSQL_CALLBACK(thd->scheduler, end_thread, (thd, 1)))
       return;                                 // Probably no-threads
 
     /*
-      If end_thread() returns, we are either running with
-      thread-handler=no-threads or this thread has been schedule to
+      If end_thread() returns, this thread has been schedule to
       handle the next connection.
     */
     thd= current_thd;
diff --git a/sql/sql_connect.h b/sql/sql_connect.h
index bfcc04ba093..9653d3988c0 100644
--- a/sql/sql_connect.h
+++ b/sql/sql_connect.h
@@ -24,7 +24,15 @@ typedef struct st_lex_user LEX_USER;
 typedef struct user_conn USER_CONN;
 
 void init_max_user_conn(void);
+void init_global_user_stats(void);
+void init_global_table_stats(void);
+void init_global_index_stats(void);
+void init_global_client_stats(void);
 void free_max_user_conn(void);
+void free_global_user_stats(void);
+void free_global_table_stats(void);
+void free_global_index_stats(void);
+void free_global_client_stats(void);
 
 pthread_handler_t handle_one_connection(void *arg);
 void do_handle_one_connection(THD *thd_arg);
@@ -36,12 +44,14 @@ void decrease_user_connections(USER_CONN *uc);
 void thd_init_client_charset(THD *thd, uint cs_number);
 bool setup_connection_thread_globals(THD *thd);
 
-int check_user(THD *thd, enum enum_server_command command,
-	       const char *passwd, uint passwd_len, const char *db,
-	       bool check_count);
-
 bool login_connection(THD *thd);
 void prepare_new_connection_state(THD* thd);
 void end_connection(THD *thd);
+void update_global_user_stats(THD* thd, bool create_user, time_t now);
+int get_or_create_user_conn(THD *thd, const char *user,
+                            const char *host, USER_RESOURCES *mqh);
+int check_for_max_user_connections(THD *thd, USER_CONN *uc);
+
+
 
 #endif /* SQL_CONNECT_INCLUDED */
diff --git a/sql/sql_const.h b/sql/sql_const.h
index dca66628ddb..d08a8f18308 100644
--- a/sql/sql_const.h
+++ b/sql/sql_const.h
@@ -30,7 +30,7 @@
 #define MAX_FIELD_NAME 34			/* Max colum name length +2 */
 #define MAX_SYS_VAR_LENGTH 32
 #define MAX_KEY MAX_INDEXES                     /* Max used keys */
-#define MAX_REF_PARTS 16			/* Max parts used as ref */
+#define MAX_REF_PARTS 32			/* Max parts used as ref */
 #define MAX_KEY_LENGTH 3072			/* max possible key */
 #if SIZEOF_OFF_T > 4
 #define MAX_REFLENGTH 8				/* Max length for record ref */
@@ -176,11 +176,11 @@
   The cost of average seek 
     DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*BLOCKS_IN_AVG_SEEK =1.0.
 */
-#define DISK_SEEK_BASE_COST ((double)0.5)
+#define DISK_SEEK_BASE_COST ((double)0.9)
 
 #define BLOCKS_IN_AVG_SEEK  128
 
-#define DISK_SEEK_PROP_COST ((double)0.5/BLOCKS_IN_AVG_SEEK)
+#define DISK_SEEK_PROP_COST ((double)0.1/BLOCKS_IN_AVG_SEEK)
 
 
 /**
@@ -190,6 +190,12 @@
 */
 #define MATCHING_ROWS_IN_OTHER_TABLE 10
 
+/*
+  Subquery materialization-related constants
+*/
+#define HEAP_TEMPTABLE_LOOKUP_COST 0.05
+#define DISK_TEMPTABLE_LOOKUP_COST 1.0
+
 #define MY_CHARSET_BIN_MB_MAXLEN 1
 
 /** Don't pack string keys shorter than this (if PACK_KEYS=1 isn't used). */
diff --git a/sql/sql_crypt.cc b/sql/sql_crypt.cc
index 455e7dc1e7b..0271884b5f3 100644
--- a/sql/sql_crypt.cc
+++ b/sql/sql_crypt.cc
@@ -33,7 +33,7 @@
 void SQL_CRYPT::init(ulong *rand_nr)
 {
   uint i;
-  randominit(&rand,rand_nr[0],rand_nr[1]);
+  my_rnd_init(&rand,rand_nr[0],rand_nr[1]);
 
   for (i=0 ; i<=255; i++)
    decode_buff[i]= (char) i;
diff --git a/sql/sql_crypt.h b/sql/sql_crypt.h
index 15cc495858b..4dfd5e82e5a 100644
--- a/sql/sql_crypt.h
+++ b/sql/sql_crypt.h
@@ -26,7 +26,7 @@
 
 class SQL_CRYPT :public Sql_alloc
 {
-  struct rand_struct rand,org_rand;
+  struct my_rnd_struct rand,org_rand;
   char decode_buff[256],encode_buff[256];
   uint shift;
  public:
diff --git a/sql/sql_cursor.cc b/sql/sql_cursor.cc
index 7a9834b4cde..5bd5f9df680 100644
--- a/sql/sql_cursor.cc
+++ b/sql/sql_cursor.cc
@@ -256,7 +256,7 @@ int Materialized_cursor::open(JOIN *join __attribute__((unused)))
   thd->set_n_backup_active_arena(this, &backup_arena);
   /* Create a list of fields and start sequential scan */
   rc= result->prepare(item_list, &fake_unit);
-  if (!rc && !(rc= table->file->ha_rnd_init(TRUE)))
+  if (!rc && !(rc= table->file->ha_rnd_init_with_error(TRUE)))
     is_rnd_inited= 1;
 
   thd->restore_active_arena(this, &backup_arena);
@@ -303,7 +303,7 @@ void Materialized_cursor::fetch(ulong num_rows)
   result->begin_dataset();
   for (fetch_limit+= num_rows; fetch_count < fetch_limit; fetch_count++)
   {
-    if ((res= table->file->rnd_next(table->record[0])))
+    if ((res= table->file->ha_rnd_next(table->record[0])))
       break;
     /* Send data only if the read was successful. */
     /*
@@ -367,7 +367,9 @@ bool Select_materialize::send_result_set_metadata(List<Item> &list, uint flags)
 {
   DBUG_ASSERT(table == 0);
   if (create_result_table(unit->thd, unit->get_unit_column_types(),
-                          FALSE, thd->variables.option_bits | TMP_TABLE_ALL_COLUMNS, ""))
+                          FALSE,
+                          thd->variables.option_bits | TMP_TABLE_ALL_COLUMNS,
+                          "", FALSE))
     return TRUE;
 
   materialized_cursor= new (&table->mem_root)
diff --git a/sql/sql_db.cc b/sql/sql_db.cc
index 517cb9139e9..8b92259573e 100644
--- a/sql/sql_db.cc
+++ b/sql/sql_db.cc
@@ -1357,13 +1357,9 @@ static inline bool
 cmp_db_names(const char *db1_name,
              const char *db2_name)
 {
-  return
-         /* db1 is NULL and db2 is NULL */
-         (!db1_name && !db2_name) ||
-
-         /* db1 is not-NULL, db2 is not-NULL, db1 == db2. */
-         (db1_name && db2_name &&
-         my_strcasecmp(system_charset_info, db1_name, db2_name) == 0);
+  return ((!db1_name && !db2_name) ||
+          (db1_name && db2_name &&
+           my_strcasecmp(system_charset_info, db1_name, db2_name) == 0));
 }
 
 
@@ -1436,12 +1432,9 @@ bool mysql_change_db(THD *thd, const LEX_STRING *new_db_name, bool force_switch)
   Security_context *sctx= thd->security_ctx;
   ulong db_access= sctx->db_access;
   CHARSET_INFO *db_default_cl;
-
   DBUG_ENTER("mysql_change_db");
-  DBUG_PRINT("enter",("name: '%s'", new_db_name->str));
 
-  if (new_db_name == NULL ||
-      new_db_name->length == 0)
+  if (new_db_name->length == 0)
   {
     if (force_switch)
     {
@@ -1450,8 +1443,7 @@ bool mysql_change_db(THD *thd, const LEX_STRING *new_db_name, bool force_switch)
         after loading stored program. The thing is that loading of stored
         program can happen when there is no current database.
 
-        TODO: actually, new_db_name and new_db_name->str seem to be always
-        non-NULL. In case of stored program, new_db_name->str == "" and
+        In case of stored program, new_db_name->str == "" and
         new_db_name->length == 0.
       */
 
@@ -1466,6 +1458,7 @@ bool mysql_change_db(THD *thd, const LEX_STRING *new_db_name, bool force_switch)
       DBUG_RETURN(TRUE);
     }
   }
+  DBUG_PRINT("enter",("name: '%s'", new_db_name->str));
 
   if (is_infoschema_db(new_db_name->str, new_db_name->length))
   {
diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc
index 2f69bac917e..9d13fa1b01b 100644
--- a/sql/sql_delete.cc
+++ b/sql/sql_delete.cc
@@ -137,7 +137,6 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
       - there should be no delete triggers associated with the table.
   */
   if (!using_limit && const_cond_result &&
-      !(specialflag & (SPECIAL_NO_NEW_FUNC | SPECIAL_SAFE_MODE)) &&
        (!thd->is_current_stmt_binlog_format_row() &&
         !(table->triggers && table->triggers->has_delete_triggers())))
   {
@@ -268,8 +267,15 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
     free_underlaid_joins(thd, select_lex);
     DBUG_RETURN(TRUE);
   }
-  if (usable_index==MAX_KEY || (select && select->quick))
-    init_read_record(&info, thd, table, select, 1, 1, FALSE);
+  if (usable_index == MAX_KEY || (select && select->quick))
+  {
+    if (init_read_record(&info, thd, table, select, 1, 1, FALSE))
+    {
+      delete select;
+      free_underlaid_joins(thd, select_lex);
+      DBUG_RETURN(TRUE);
+    }
+  }
   else
     init_read_record_idx(&info, thd, table, 1, usable_index, reverse);
 
@@ -297,11 +303,11 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
   while (!(error=info.read_record(&info)) && !thd->killed &&
 	 ! thd->is_error())
   {
+    update_virtual_fields(thd, table);
     thd->examined_row_count++;
     // thd->is_error() is tested to disallow delete row on error
-    if (!select || (!select->skip_record(thd, &skip_record) && !skip_record))
+    if (!select || select->skip_record(thd) > 0)
     {
-
       if (table->triggers &&
           table->triggers->process_triggers(thd, TRG_EVENT_DELETE,
                                             TRG_ACTION_BEFORE, FALSE))
@@ -337,8 +343,11 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
 	  InnoDB it can fail in a FOREIGN KEY error or an
 	  out-of-tablespace error.
 	*/
- 	error= 1;
-	break;
+        if (!select_lex->no_error)
+        {
+          error= 1;
+          break;
+        }
       }
     }
     else
@@ -892,7 +901,10 @@ int multi_delete::do_table_deletes(TABLE *table, bool ignore)
   READ_RECORD info;
   ha_rows last_deleted= deleted;
   DBUG_ENTER("do_deletes_for_table");
-  init_read_record(&info, thd, table, NULL, 0, 1, FALSE);
+
+  if (init_read_record(&info, thd, table, NULL, 0, 1, FALSE))
+    DBUG_RETURN(1);
+
   /*
     Ignore any rows not found in reference tables as they may already have
     been deleted by foreign key handling
diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc
index 47bc13beb53..a4063e0dce9 100644
--- a/sql/sql_derived.cc
+++ b/sql/sql_derived.cc
@@ -172,7 +172,8 @@ bool mysql_derived_prepare(THD *thd, LEX *lex, TABLE_LIST *orig_table_list)
     */
     if ((res= derived_result->create_result_table(thd, &unit->types, FALSE,
                                                  create_options,
-                                                 orig_table_list->alias)))
+                                                 orig_table_list->alias,
+                                                 FALSE)))
       goto exit;
 
     table= derived_result->table;
diff --git a/sql/sql_error.cc b/sql/sql_error.cc
index 8c038e10a1f..44bfe0a4f44 100644
--- a/sql/sql_error.cc
+++ b/sql/sql_error.cc
@@ -591,6 +591,9 @@ void push_warning(THD *thd, MYSQL_ERROR::enum_warning_level level,
 
   (void) thd->raise_condition(code, NULL, level, msg);
 
+  /* Make sure we also count warnings pushed after calling set_ok_status(). */
+  thd->stmt_da->increment_warning();
+
   DBUG_VOID_RETURN;
 }
 
diff --git a/sql/sql_error.h b/sql/sql_error.h
index 87e98e27673..c0f28c20286 100644
--- a/sql/sql_error.h
+++ b/sql/sql_error.h
@@ -97,6 +97,13 @@ public:
     return m_statement_warn_count;
   }
 
+  /* Used to count any warnings pushed after calling set_ok_status(). */
+  void increment_warning()
+  {
+    if (m_status != DA_EMPTY)
+      m_total_warn_count++;
+  }
+
   Diagnostics_area() { reset_diagnostics_area(); }
 
 private:
diff --git a/sql/sql_expression_cache.cc b/sql/sql_expression_cache.cc
new file mode 100644
index 00000000000..50fd7c7ee4d
--- /dev/null
+++ b/sql/sql_expression_cache.cc
@@ -0,0 +1,277 @@
+
+#include "mysql_priv.h"
+#include "sql_select.h"
+
+/*
+  Expression cache is used only for caching subqueries now, so its statistic
+  variables we call subquery_cache*.
+*/
+ulong subquery_cache_miss, subquery_cache_hit;
+
+Expression_cache_tmptable::Expression_cache_tmptable(THD *thd,
+                                                 List<Item*> &dependants,
+                                                 Item *value)
+  :cache_table(NULL), table_thd(thd), list(&dependants), val(value),
+   inited (0)
+{
+  DBUG_ENTER("Expression_cache_tmptable::Expression_cache_tmptable");
+  DBUG_VOID_RETURN;
+};
+
+
+/**
+  Field enumerator for TABLE::add_tmp_key
+
+  @param arg             reference variable with current field number
+
+  @return field number
+*/
+
+static uint field_enumerator(uchar *arg)
+{
+  return ((uint*)arg)[0]++;
+}
+
+
+/**
+  Initialize temporary table and auxiliary structures for the expression
+  cache
+
+  @details
+  The function creates a temporary table for the expression cache, defines
+  the search index and initializes auxiliary search structures used to check
+  whether a given set of of values of the expression parameters is in some
+  cache entry.
+*/
+
+void Expression_cache_tmptable::init()
+{
+  List_iterator<Item*> li(*list);
+  Item_iterator_ref_list it(li);
+  Item **item;
+  uint field_counter;
+  DBUG_ENTER("Expression_cache_tmptable::init");
+  DBUG_ASSERT(!inited);
+  inited= TRUE;
+  cache_table= NULL;
+
+  while ((item= li++))
+  {
+    DBUG_ASSERT(item);
+    if (*item)
+    {
+      DBUG_ASSERT((*item)->fixed);
+      items.push_back((*item));
+    }
+    else
+    {
+      /*
+        This is possible when optimizer already executed this subquery and
+        optimized out the condition predicate.
+      */
+      li.remove();
+    }
+  }
+
+  if (list->elements == 0)
+  {
+    DBUG_PRINT("info", ("All parameters were removed by optimizer."));
+    DBUG_VOID_RETURN;
+  }
+
+  cache_table_param.init();
+  /* dependent items and result */
+  cache_table_param.field_count= list->elements + 1;
+  /* postpone table creation to index description */
+  cache_table_param.skip_create_table= 1;
+  cache_table= NULL;
+
+  items.push_front(val);
+
+  if (!(cache_table= create_tmp_table(table_thd, &cache_table_param,
+                                      items, (ORDER*) NULL,
+                                      FALSE, FALSE,
+                                      ((table_thd->options |
+                                        TMP_TABLE_ALL_COLUMNS) &
+                                       ~(OPTION_BIG_TABLES |
+                                         TMP_TABLE_FORCE_MYISAM)),
+                                      HA_POS_ERROR,
+                                      (char *)"subquery-cache-table")))
+  {
+    DBUG_PRINT("error", ("create_tmp_table failed, caching switched off"));
+    DBUG_VOID_RETURN;
+  }
+
+  if (cache_table->s->db_type() != heap_hton)
+  {
+    DBUG_PRINT("error", ("we need only heap table"));
+    goto error;
+  }
+
+  /* This list do not contain result field */
+  it.open();
+
+  field_counter=1;
+
+  if (cache_table->alloc_keys(1) ||
+      cache_table->add_tmp_key(0, items.elements - 1, &field_enumerator,
+                                (uchar*)&field_counter, TRUE) ||
+      ref.tmp_table_index_lookup_init(table_thd, cache_table->key_info, it,
+                                      TRUE))
+  {
+    DBUG_PRINT("error", ("creating index failed"));
+    goto error;
+  }
+  cache_table->s->keys= 1;
+  cache_table->s->uniques= 1;
+  ref.null_rejecting= 1;
+  ref.disable_cache= FALSE;
+  ref.has_record= 0;
+  ref.use_count= 0;
+
+
+  if (open_tmp_table(cache_table))
+  {
+    DBUG_PRINT("error", ("Opening (creating) temporary table failed"));
+    goto error;
+  }
+
+  if (!(cached_result= new Item_field(cache_table->field[0])))
+  {
+    DBUG_PRINT("error", ("Creating Item_field failed"));
+    goto error;
+  }
+
+  DBUG_VOID_RETURN;
+
+error:
+  /* switch off cache */
+  free_tmp_table(table_thd, cache_table);
+  cache_table= NULL;
+  DBUG_VOID_RETURN;
+}
+
+
+Expression_cache_tmptable::~Expression_cache_tmptable()
+{
+  if (cache_table)
+    free_tmp_table(table_thd, cache_table);
+}
+
+
+/**
+  Check if a given set of parameters of the expression is in the cache
+
+  @param [out] value     the expression value found in the cache if any
+
+  @details
+  For a given set of the parameters of the expression the function
+  checks whether it can be found in some entry of the cache. If so
+  the function returns the result of the expression extracted from
+  the cache.
+
+  @retval Expression_cache::HIT if the set of parameters is in the cache
+  @retval Expression_cache::MISS - otherwise
+*/
+
+Expression_cache::result Expression_cache_tmptable::check_value(Item **value)
+{
+  int res;
+  DBUG_ENTER("Expression_cache_tmptable::check_value");
+
+  /*
+    We defer cache initialization to get item references that are
+    used at the execution phase.
+  */
+  if (!inited)
+    init();
+
+  if (cache_table)
+  {
+    DBUG_PRINT("info", ("status: %u  has_record %u",
+                        (uint)cache_table->status, (uint)ref.has_record));
+    if ((res= join_read_key2(table_thd, NULL, cache_table, &ref)) == 1)
+      DBUG_RETURN(ERROR);
+    if (res)
+    {
+      subquery_cache_miss++;
+      DBUG_RETURN(MISS);
+    }
+
+    subquery_cache_hit++;
+    *value= cached_result;
+    DBUG_RETURN(Expression_cache::HIT);
+  }
+  DBUG_RETURN(Expression_cache::MISS);
+}
+
+
+/**
+  Put a new entry into the expression cache
+
+  @param value     the result of the expression to be put into the cache
+
+  @details
+  The function evaluates 'value' and puts the result into the cache as the
+  result of the expression for the current set of parameters.
+
+  @retval FALSE OK
+  @retval TRUE  Error
+*/
+
+my_bool Expression_cache_tmptable::put_value(Item *value)
+{
+  int error;
+  DBUG_ENTER("Expression_cache_tmptable::put_value");
+  DBUG_ASSERT(inited);
+
+  if (!cache_table)
+  {
+    DBUG_PRINT("info", ("No table so behave as we successfully put value"));
+    DBUG_RETURN(FALSE);
+  }
+
+  *(items.head_ref())= value;
+  fill_record(table_thd, cache_table->field, items, TRUE, TRUE);
+  if (table_thd->is_error())
+    goto err;;
+
+  if ((error= cache_table->file->ha_write_row(cache_table->record[0])))
+  {
+    /* create_myisam_from_heap will generate error if needed */
+    if (cache_table->file->is_fatal_error(error, HA_CHECK_DUP) &&
+        create_internal_tmp_table_from_heap(table_thd, cache_table,
+                                            cache_table_param.start_recinfo,
+                                            &cache_table_param.recinfo,
+                                            error, 1))
+      goto err;
+  }
+  cache_table->status= 0; /* cache_table->record contains an existed record */
+  ref.has_record= TRUE; /* the same as above */
+  DBUG_PRINT("info", ("has_record: TRUE  status: 0"));
+
+  DBUG_RETURN(FALSE);
+
+err:
+  free_tmp_table(table_thd, cache_table);
+  cache_table= NULL;
+  DBUG_RETURN(TRUE);
+}
+
+
+void Expression_cache_tmptable::print(String *str, enum_query_type query_type)
+{
+  List_iterator<Item*> li(*list);
+  Item **item;
+  bool is_first= TRUE;
+
+  str->append('<');
+  while ((item= li++))
+  {
+    if (!is_first)
+      str->append(',');
+    (*item)->print(str, query_type);
+    is_first= FALSE;
+  }
+  str->append('>');
+}
diff --git a/sql/sql_expression_cache.h b/sql/sql_expression_cache.h
new file mode 100644
index 00000000000..88f71e0cf32
--- /dev/null
+++ b/sql/sql_expression_cache.h
@@ -0,0 +1,84 @@
+#ifndef SQL_EXPRESSION_CACHE_INCLUDED
+#define SQL_EXPRESSION_CACHE_INCLUDED
+
+#include "sql_select.h"
+
+/**
+  Interface for expression cache
+
+  @note
+  Parameters of an expression cache interface are set on the creation of the
+  cache. They are passed when a cache object of the implementation class is
+  constructed. That's why they are not visible in this interface.
+*/
+
+extern ulong subquery_cache_miss, subquery_cache_hit;
+
+class Expression_cache :public Sql_alloc
+{
+public:
+  enum result {ERROR, HIT, MISS};
+
+  Expression_cache(){};
+  virtual ~Expression_cache() {};
+  /**
+    Shall check the presence of expression value in the cache for a given
+    set of values of the expression parameters.  Return the result of the
+    expression if it's found in the cache.
+  */
+  virtual result check_value(Item **value)= 0;
+  /**
+    Shall put the value of an expression for given set of its parameters
+    into the expression cache
+  */
+  virtual my_bool put_value(Item *value)= 0;
+
+  /**
+    Print cache parameters
+  */
+  virtual void print(String *str, enum_query_type query_type)= 0;
+};
+
+struct st_table_ref;
+struct st_join_table;
+class Item_field;
+
+
+/**
+  Implementation of expression cache over a temporary table
+*/
+
+class Expression_cache_tmptable :public Expression_cache
+{
+public:
+  Expression_cache_tmptable(THD *thd, List<Item*> &dependants, Item *value);
+  virtual ~Expression_cache_tmptable();
+  virtual result check_value(Item **value);
+  virtual my_bool put_value(Item *value);
+
+  void print(String *str, enum_query_type query_type);
+
+private:
+  void init();
+
+  /* tmp table parameters */
+  TMP_TABLE_PARAM cache_table_param;
+  /* temporary table to store this cache */
+  TABLE *cache_table;
+  /* Thread handle for the temporary table */
+  THD *table_thd;
+  /* TABLE_REF for index lookup */
+  struct st_table_ref ref;
+  /* Cached result */
+  Item_field *cached_result;
+  /* List of references to the parameters of the expression */
+  List<Item*> *list;
+  /* List of items */
+  List<Item> items;
+  /* Value Item example */
+  Item *val;
+  /* Set on if the object has been succesfully initialized with init() */
+  bool inited;
+};
+
+#endif /* SQL_EXPRESSION_CACHE_INCLUDED */
diff --git a/sql/sql_handler.cc b/sql/sql_handler.cc
index a5c126a8521..9620b759adb 100644
--- a/sql/sql_handler.cc
+++ b/sql/sql_handler.cc
@@ -650,11 +650,11 @@ retry:
         {
           /* Check if we read from the same index. */
           DBUG_ASSERT((uint) keyno == table->file->get_index());
-          error= table->file->index_next(table->record[0]);
+          error= table->file->ha_index_next(table->record[0]);
         }
         else
         {
-          error= table->file->rnd_next(table->record[0]);
+          error= table->file->ha_rnd_next(table->record[0]);
         }
         break;
       }
@@ -664,13 +664,13 @@ retry:
       {
         table->file->ha_index_or_rnd_end();
         table->file->ha_index_init(keyno, 1);
-        error= table->file->index_first(table->record[0]);
+        error= table->file->ha_index_first(table->record[0]);
       }
       else
       {
         table->file->ha_index_or_rnd_end();
 	if (!(error= table->file->ha_rnd_init(1)))
-          error= table->file->rnd_next(table->record[0]);
+          error= table->file->ha_rnd_next(table->record[0]);
       }
       mode=RNEXT;
       break;
@@ -680,7 +680,7 @@ retry:
       DBUG_ASSERT((uint) keyno == table->file->get_index());
       if (table->file->inited != handler::NONE)
       {
-        error=table->file->index_prev(table->record[0]);
+        error=table->file->ha_index_prev(table->record[0]);
         break;
       }
       /* else fall through */
@@ -688,13 +688,13 @@ retry:
       DBUG_ASSERT(keyname != 0);
       table->file->ha_index_or_rnd_end();
       table->file->ha_index_init(keyno, 1);
-      error= table->file->index_last(table->record[0]);
+      error= table->file->ha_index_last(table->record[0]);
       mode=RPREV;
       break;
     case RNEXT_SAME:
       /* Continue scan on "(keypart1,keypart2,...)=(c1, c2, ...)  */
       DBUG_ASSERT(keyname != 0);
-      error= table->file->index_next_same(table->record[0], key, key_len);
+      error= table->file->ha_index_next_same(table->record[0], key, key_len);
       break;
     case RKEY:
     {
@@ -734,8 +734,8 @@ retry:
       table->file->ha_index_or_rnd_end();
       table->file->ha_index_init(keyno, 1);
       key_copy(key, table->record[0], table->key_info + keyno, key_len);
-      error= table->file->index_read_map(table->record[0],
-                                         key, keypart_map, ha_rkey_mode);
+      error= table->file->ha_index_read_map(table->record[0],
+                                            key, keypart_map, ha_rkey_mode);
       mode=rkey_to_rnext[(int)ha_rkey_mode];
       break;
     }
@@ -757,6 +757,8 @@ retry:
       }
       goto ok;
     }
+    /* Generate values for virtual fields */
+    update_virtual_fields(thd, table);
     if (cond && !cond->val_int())
     {
       if (thd->is_error())
diff --git a/sql/sql_help.cc b/sql/sql_help.cc
index 4e3df950134..7218d806c5f 100644
--- a/sql/sql_help.cc
+++ b/sql/sql_help.cc
@@ -188,11 +188,14 @@ int search_topics(THD *thd, TABLE *topics, struct st_find_field *find_fields,
 		  SQL_SELECT *select, List<String> *names,
 		  String *name, String *description, String *example)
 {
-  DBUG_ENTER("search_topics");
   int count= 0;
-
   READ_RECORD read_record_info;
-  init_read_record(&read_record_info, thd, topics, select, 1, 0, FALSE);
+  DBUG_ENTER("search_topics");
+
+  /* Should never happen. As this is part of help, we can ignore this */
+  if (init_read_record(&read_record_info, thd, topics, select, 1, 0, FALSE))
+    DBUG_RETURN(0);
+
   while (!read_record_info.read_record(&read_record_info))
   {
     if (!select->cond->val_int())		// Doesn't match like
@@ -228,11 +231,13 @@ int search_topics(THD *thd, TABLE *topics, struct st_find_field *find_fields,
 int search_keyword(THD *thd, TABLE *keywords, struct st_find_field *find_fields,
                    SQL_SELECT *select, int *key_id)
 {
-  DBUG_ENTER("search_keyword");
   int count= 0;
-
   READ_RECORD read_record_info;
-  init_read_record(&read_record_info, thd, keywords, select, 1, 0, FALSE);
+  DBUG_ENTER("search_keyword");
+  /* Should never happen. As this is part of help, we can ignore this */
+  if (init_read_record(&read_record_info, thd, keywords, select, 1, 0, FALSE))
+    DBUG_RETURN(0);
+
   while (!read_record_info.read_record(&read_record_info) && count<2)
   {
     if (!select->cond->val_int())		// Dosn't match like
@@ -300,13 +305,13 @@ int get_topics_for_keyword(THD *thd, TABLE *topics, TABLE *relations,
 
   rkey_id->store((longlong) key_id, TRUE);
   rkey_id->get_key_image(buff, rkey_id->pack_length(), Field::itRAW);
-  int key_res= relations->file->index_read_map(relations->record[0],
-                                               buff, (key_part_map) 1,
-                                               HA_READ_KEY_EXACT);
+  int key_res= relations->file->ha_index_read_map(relations->record[0],
+                                                  buff, (key_part_map) 1,
+                                                  HA_READ_KEY_EXACT);
 
   for ( ;
         !key_res && key_id == (int16) rkey_id->val_int() ;
-	key_res= relations->file->index_next(relations->record[0]))
+	key_res= relations->file->ha_index_next(relations->record[0]))
   {
     uchar topic_id_buff[8];
     longlong topic_id= rtopic_id->val_int();
@@ -314,8 +319,8 @@ int get_topics_for_keyword(THD *thd, TABLE *topics, TABLE *relations,
     field->store((longlong) topic_id, TRUE);
     field->get_key_image(topic_id_buff, field->pack_length(), Field::itRAW);
 
-    if (!topics->file->index_read_map(topics->record[0], topic_id_buff,
-                                      (key_part_map)1, HA_READ_KEY_EXACT))
+    if (!topics->file->ha_index_read_map(topics->record[0], topic_id_buff,
+                                         (key_part_map)1, HA_READ_KEY_EXACT))
     {
       memorize_variant_topic(thd,topics,count,find_fields,
 			     names,name,description,example);
@@ -353,10 +358,11 @@ int search_categories(THD *thd, TABLE *categories,
   Field *pcat_id= find_fields[help_category_help_category_id].field;
   int count= 0;
   READ_RECORD read_record_info;
-
   DBUG_ENTER("search_categories");
 
-  init_read_record(&read_record_info, thd, categories, select,1,0,FALSE);
+  /* Should never happen. As this is part of help, we can ignore this */
+  if (init_read_record(&read_record_info, thd, categories, select,1,0,FALSE))
+    DBUG_RETURN(0);
   while (!read_record_info.read_record(&read_record_info))
   {
     if (select && !select->cond->val_int())
@@ -387,10 +393,13 @@ int search_categories(THD *thd, TABLE *categories,
 void get_all_items_for_category(THD *thd, TABLE *items, Field *pfname,
 				SQL_SELECT *select, List<String> *res)
 {
+  READ_RECORD read_record_info;
   DBUG_ENTER("get_all_items_for_category");
 
-  READ_RECORD read_record_info;
-  init_read_record(&read_record_info, thd, items, select,1,0,FALSE);
+  /* Should never happen. As this is part of help, we can ignore this */
+  if (init_read_record(&read_record_info, thd, items, select,1,0,FALSE))
+    DBUG_VOID_RETURN;
+
   while (!read_record_info.read_record(&read_record_info))
   {
     if (!select->cond->val_int())
@@ -532,7 +541,8 @@ int send_variant_2_list(MEM_ROOT *mem_root, Protocol *protocol,
   String **end= pointers + names->elements;
 
   List_iterator<String> it(*names);
-  for (pos= pointers; pos!=end; (*pos++= it++)) ;
+  for (pos= pointers; pos!=end; (*pos++= it++))
+    ;
 
   my_qsort(pointers,names->elements,sizeof(String*),string_ptr_cmp);
 
diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc
index adbdc1ffbc8..c562a5c6cb4 100644
--- a/sql/sql_insert.cc
+++ b/sql/sql_insert.cc
@@ -86,22 +86,13 @@ static void unlink_blobs(register TABLE *table);
 #endif
 static bool check_view_insertability(THD *thd, TABLE_LIST *view);
 
-/* Define to force use of my_malloc() if the allocated memory block is big */
-
-#ifndef HAVE_ALLOCA
-#define my_safe_alloca(size, min_length) my_alloca(size)
-#define my_safe_afree(ptr, size, min_length) my_afree(ptr)
-#else
-#define my_safe_alloca(size, min_length) ((size <= min_length) ? my_alloca(size) : my_malloc(size,MYF(0)))
-#define my_safe_afree(ptr, size, min_length) if (size > min_length) my_free(ptr)
-#endif
-
 /*
   Check that insert/update fields are from the same single table of a view.
 
   SYNOPSIS
     check_view_single_update()
     fields            The insert/update fields to be checked.
+    values            Values to use for update
     view              The view for insert.
     map     [in/out]  The insert table map.
 
@@ -143,7 +134,6 @@ bool check_view_single_update(List<Item> &fields, List<Item> *values,
   /* Convert to real table bits */
   tables&= ~PSEUDO_TABLE_BITS;
 
-
   /* Check found map against provided map */
   if (*map)
   {
@@ -177,6 +167,10 @@ error:
     fields                      The insert fields.
     values                      The insert values.
     check_unique                If duplicate values should be rejected.
+    fields_and_values_from_different_maps
+				Set to 1 if fields and values are using
+                                different table maps, like on select ... insert
+    map                         Store here table map for used fields
 
   NOTE
     Clears TIMESTAMP_AUTO_SET_ON_INSERT from table->timestamp_field_type
@@ -291,6 +285,9 @@ static int check_insert_fields(THD *thd, TABLE_LIST *table_list,
       }
     }
   }
+  /* Mark virtual columns used in the insert statement */
+  if (table->vfield)
+    table->mark_virtual_columns_for_write(TRUE);
   // For the values we need select_priv
 #ifndef NO_EMBEDDED_ACCESS_CHECKS
   table->grant.want_privilege= (SELECT_ACL & ~table->grant.privilege);
@@ -332,7 +329,8 @@ static int check_update_fields(THD *thd, TABLE_LIST *insert_table_list,
                                List<Item> &update_values, table_map *map)
 {
   TABLE *table= insert_table_list->table;
-  my_bool timestamp_mark= 0;
+  my_bool timestamp_mark;
+  LINT_INIT(timestamp_mark);
 
   if (table->timestamp_field)
   {
@@ -417,10 +415,9 @@ void prepare_triggers_for_insert_stmt(TABLE *table)
   downgrade the lock in handler::store_lock() method.
 */
 
-static
-void upgrade_lock_type(THD *thd, thr_lock_type *lock_type,
-                       enum_duplicates duplic,
-                       bool is_multi_insert)
+void upgrade_lock_type_for_insert(THD *thd, thr_lock_type *lock_type,
+                                  enum_duplicates duplic,
+                                  bool is_multi_insert)
 {
   if (duplic == DUP_UPDATE ||
       (duplic == DUP_REPLACE && *lock_type == TL_WRITE_CONCURRENT_INSERT))
@@ -666,6 +663,7 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
   bool transactional_table, joins_freed= FALSE;
   bool changed;
   bool was_insert_delayed= (table_list->lock_type ==  TL_WRITE_DELAYED);
+  bool using_bulk_insert= 0;
   uint value_count;
   ulong counter = 1;
   ulonglong id;
@@ -693,8 +691,8 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
     Upgrade lock type if the requested lock is incompatible with
     the current connection mode or table operation.
   */
-  upgrade_lock_type(thd, &table_list->lock_type, duplic,
-                    values_list.elements > 1);
+  upgrade_lock_type_for_insert(thd, &table_list->lock_type, duplic,
+                               values_list.elements > 1);
 
   /*
     We can't write-delayed into a table locked with LOCK TABLES:
@@ -840,8 +838,12 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
       Engines can't handle a bulk insert in parallel with a read form the
       same table in the same connection.
     */
-    if (thd->locked_tables_mode <= LTM_LOCK_TABLES)
+    if (thd->locked_tables_mode <= LTM_LOCK_TABLES &&
+       values_list.elements > 1)
+    {
+      using_bulk_insert= 1;
       table->file->ha_start_bulk_insert(values_list.elements);
+    }
   }
 
   thd->abort_on_warning= (!ignore && (thd->variables.sql_mode &
@@ -964,8 +966,7 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
       auto_inc values from the delayed_insert thread as they share TABLE.
     */
     table->file->ha_release_auto_increment();
-    if (thd->locked_tables_mode <= LTM_LOCK_TABLES &&
-        table->file->ha_end_bulk_insert() && !error)
+    if (using_bulk_insert && table->file->ha_end_bulk_insert() && !error)
     {
       table->file->print_error(my_errno,MYF(0));
       error=1;
@@ -990,7 +991,7 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
 
     if ((changed && error <= 0) ||
         thd->transaction.stmt.modified_non_trans_table ||
-        was_insert_delayed)
+	was_insert_delayed)
     {
       if (mysql_bin_log.is_open())
       {
@@ -1333,7 +1334,7 @@ bool mysql_prepare_insert(THD *thd, TABLE_LIST *table_list,
   bool res= 0;
   table_map map= 0;
   DBUG_ENTER("mysql_prepare_insert");
-  DBUG_PRINT("enter", ("table_list 0x%lx, table 0x%lx, view %d",
+  DBUG_PRINT("enter", ("table_list: 0x%lx  table: 0x%lx  view: %d",
 		       (ulong)table_list, (ulong)table,
 		       (int)insert_into_view));
   /* INSERT should have a SELECT or VALUES clause */
@@ -1369,7 +1370,6 @@ bool mysql_prepare_insert(THD *thd, TABLE_LIST *table_list,
   if (mysql_prepare_insert_check_table(thd, table_list, fields, select_insert))
     DBUG_RETURN(TRUE);
 
-
   /* Prepare the fields in the statement. */
   if (values)
   {
@@ -1422,6 +1422,18 @@ bool mysql_prepare_insert(THD *thd, TABLE_LIST *table_list,
   if (!table)
     table= table_list->table;
 
+  if (!fields.elements && table->vfield)
+  {
+    for (Field **vfield_ptr= table->vfield; *vfield_ptr; vfield_ptr++)
+    {
+      if ((*vfield_ptr)->stored_in_db)
+      {
+        thd->lex->unit.insert_table_with_stored_vcol= table;
+        break;
+      }
+    }
+  }
+
   if (!select_insert)
   {
     Item *fake_conds= 0;
@@ -1563,7 +1575,7 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info)
 	goto err;
       if (table->file->ha_table_flags() & HA_DUPLICATE_POS)
       {
-	if (table->file->rnd_pos(table->record[1],table->file->dup_ref))
+	if (table->file->ha_rnd_pos(table->record[1],table->file->dup_ref))
 	  goto err;
       }
       else
@@ -1584,9 +1596,10 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info)
 	  }
 	}
 	key_copy((uchar*) key,table->record[0],table->key_info+key_nr,0);
-	if ((error=(table->file->index_read_idx_map(table->record[1],key_nr,
-                                                    (uchar*) key, HA_WHOLE_KEY,
-                                                    HA_READ_KEY_EXACT))))
+	if ((error= (table->file->ha_index_read_idx_map(table->record[1],
+                                                        key_nr, (uchar*) key,
+                                                        HA_WHOLE_KEY,
+                                                        HA_READ_KEY_EXACT))))
 	  goto err;
       }
       if (info->handle_duplicates == DUP_UPDATE)
@@ -1622,7 +1635,7 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info)
           table->file->adjust_next_insert_id_after_explicit_value(
             table->next_number_field->val_int());
         info->touched++;
-        if ((table->file->ha_table_flags() & HA_PARTIAL_COLUMN_READ &&
+        if (((table->file->ha_table_flags() & HA_PARTIAL_COLUMN_READ) &&
              !bitmap_is_subset(table->write_set, table->read_set)) ||
             compare_record(table))
         {
@@ -1898,8 +1911,10 @@ public:
      status(0), handler_thread_initialized(FALSE), group_count(0)
   {
     DBUG_ENTER("Delayed_insert constructor");
-    thd.security_ctx->user=thd.security_ctx->priv_user=(char*) delayed_user;
+    thd.security_ctx->user=(char*) delayed_user;
     thd.security_ctx->host=(char*) my_localhost;
+    strmake(thd.security_ctx->priv_user, thd.security_ctx->user,
+            USERNAME_LENGTH);
     thd.current_tablenr=0;
     thd.command=COM_DELAYED_INSERT;
     thd.lex->current_select= 0; 		// for my_message_sql
@@ -2270,7 +2285,7 @@ TABLE *Delayed_insert::get_local_table(THD* client_thd)
   copy= (TABLE*) client_thd->alloc(sizeof(*copy)+
 				   (share->fields+1)*sizeof(Field**)+
 				   share->reclength +
-                                   share->column_bitmap_size*2);
+                                   share->column_bitmap_size*3);
   if (!copy)
     goto error;
 
@@ -2280,7 +2295,7 @@ TABLE *Delayed_insert::get_local_table(THD* client_thd)
   /* Assign the pointers for the field pointers array and the record. */
   field= copy->field= (Field**) (copy + 1);
   bitmap= (uchar*) (field + share->fields + 1);
-  copy->record[0]= (bitmap + share->column_bitmap_size * 2);
+  copy->record[0]= (bitmap + share->column_bitmap_size*3);
   memcpy((char*) copy->record[0], (char*) table->record[0], share->reclength);
   /*
     Make a copy of all fields.
@@ -2322,10 +2337,13 @@ TABLE *Delayed_insert::get_local_table(THD* client_thd)
   copy->def_read_set.bitmap= (my_bitmap_map*) bitmap;
   copy->def_write_set.bitmap= ((my_bitmap_map*)
                                (bitmap + share->column_bitmap_size));
+  copy->def_vcol_set.bitmap= ((my_bitmap_map*)
+                               (bitmap + 2*share->column_bitmap_size));
   copy->tmp_set.bitmap= 0;                      // To catch errors
-  bzero((char*) bitmap, share->column_bitmap_size*2);
+  bzero((char*) bitmap, share->column_bitmap_size*3);
   copy->read_set=  &copy->def_read_set;
   copy->write_set= &copy->def_write_set;
+  copy->vcol_set= &copy->def_vcol_set;
 
   DBUG_RETURN(copy);
 
@@ -2473,6 +2491,7 @@ void kill_delayed_threads(void)
   while ((di= it++))
   {
     di->thd.killed= THD::KILL_CONNECTION;
+    pthread_mutex_lock(&di->thd.LOCK_thd_data);
     if (di->thd.mysys_var)
     {
       mysql_mutex_lock(&di->thd.mysys_var->mutex);
@@ -2490,6 +2509,7 @@ void kill_delayed_threads(void)
       }
       mysql_mutex_unlock(&di->thd.mysys_var->mutex);
     }
+    pthread_mutex_unlock(&di->thd.LOCK_thd_data);
   }
   mysql_mutex_unlock(&LOCK_delayed_insert); // For unlink from list
 }
@@ -2814,7 +2834,7 @@ bool Delayed_insert::handle_inserts(void)
       or if another thread is removing the current table definition
       from the table cache.
     */
-    my_error(ER_DELAYED_CANT_CHANGE_LOCK,MYF(ME_FATALERROR),
+    my_error(ER_DELAYED_CANT_CHANGE_LOCK, MYF(ME_FATALERROR | ME_NOREFRESH),
              table->s->table_name.str);
     goto err;
   }
@@ -2995,10 +3015,11 @@ bool Delayed_insert::handle_inserts(void)
 	if (thr_reschedule_write_lock(*thd.lock->locks,
                                 thd.variables.lock_wait_timeout))
 	{
-    /* This is not known to happen. */
-    my_error(ER_DELAYED_CANT_CHANGE_LOCK,MYF(ME_FATALERROR),
-             table->s->table_name.str);
-    goto err;
+          /* This is not known to happen. */
+          my_error(ER_DELAYED_CANT_CHANGE_LOCK,
+                   MYF(ME_FATALERROR | ME_NOREFRESH),
+                   table->s->table_name.str);
+          goto err;
 	}
 	if (!using_bin_log)
 	  table->file->extra(HA_EXTRA_WRITE_CACHE);
@@ -3452,8 +3473,7 @@ bool select_insert::send_eof()
   table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY);
   table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE);
 
-  changed= (info.copied || info.deleted || info.updated);
-  if (changed)
+  if ((changed= (info.copied || info.deleted || info.updated)))
   {
     /*
       We must invalidate the table in the query cache before binlog writing
@@ -3648,17 +3668,20 @@ static TABLE *create_table_from_items(THD *thd, HA_CREATE_INFO *create_info,
   tmp_table.s->db_low_byte_first= 
         test(create_info->db_type == myisam_hton ||
              create_info->db_type == heap_hton);
-  tmp_table.null_row=tmp_table.maybe_null=0;
+  tmp_table.null_row= 0;
+  tmp_table.maybe_null= 0;
 
   while ((item=it++))
   {
     Create_field *cr_field;
     Field *field, *def_field;
     if (item->type() == Item::FUNC_ITEM)
+    {
       if (item->result_type() != STRING_RESULT)
         field= item->tmp_table_field(&tmp_table);
       else
         field= item->tmp_table_field_from_field_type(&tmp_table, 0);
+    }
     else
       field= create_tmp_field(thd, &tmp_table, item, item->type(),
                               (Item ***) 0, &tmp_field, &def_field, 0, 0, 0, 0,
@@ -3747,6 +3770,12 @@ static TABLE *create_table_from_items(THD *thd, HA_CREATE_INFO *create_info,
   if (! ((*lock)= mysql_lock_tables(thd, &table, 1, 0)) ||
         hooks->postlock(&table, 1))
   {
+    /* purecov: begin tested */
+    /*
+      This can happen in innodb when you get a deadlock when using same table
+      in insert and select
+    */
+    my_error(ER_CANT_LOCK, MYF(0), my_errno);
     if (*lock)
     {
       mysql_unlock_tables(thd, *lock);
@@ -3754,6 +3783,7 @@ static TABLE *create_table_from_items(THD *thd, HA_CREATE_INFO *create_info,
     }
     drop_open_table(thd, table, create_table->db, create_table->table_name);
     DBUG_RETURN(0);
+    /* purecov: end */
   }
   DBUG_RETURN(table);
 }
@@ -3869,7 +3899,7 @@ select_create::prepare(List<Item> &values, SELECT_LEX_UNIT *u)
     DBUG_RETURN(-1);
   }
 
- /* First field to copy */
+  /* First field to copy */
   field= table->field+table->s->fields - values.elements;
 
   /* Mark all fields that are given values */
diff --git a/sql/sql_join_cache.cc b/sql/sql_join_cache.cc
new file mode 100644
index 00000000000..10064590a75
--- /dev/null
+++ b/sql/sql_join_cache.cc
@@ -0,0 +1,3294 @@
+/* Copyright (C) 2000-2006 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+/**
+  @file
+
+  @brief
+  join cache optimizations
+
+  @defgroup Query_Optimizer  Query Optimizer
+  @{
+*/
+
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation				// gcc: Class implementation
+#endif
+
+#include "mysql_priv.h"
+#include "sql_select.h"
+#include "opt_subselect.h"
+
+#define NO_MORE_RECORDS_IN_BUFFER  (uint)(-1)
+
+
+/*****************************************************************************
+ *  Join cache module
+******************************************************************************/
+
+/* 
+  Fill in the descriptor of a flag field associated with a join cache    
+
+  SYNOPSIS
+    add_field_flag_to_join_cache()
+      str           position in a record buffer to copy the field from/to
+      length        length of the field 
+      field  IN/OUT pointer to the field descriptor to fill in 
+
+  DESCRIPTION
+    The function fill in the descriptor of a cache flag field to which
+    the parameter 'field' points to. The function uses the first two
+    parameters to set the position in the record buffer from/to which 
+    the field value is to be copied and the length of the copied fragment. 
+    Before returning the result the function increments the value of
+    *field by 1.
+    The function ignores the fields 'blob_length' and 'ofset' of the
+    descriptor.
+
+  RETURN
+    the length of the field  
+*/
+
+static
+uint add_flag_field_to_join_cache(uchar *str, uint length, CACHE_FIELD **field)
+{
+  CACHE_FIELD *copy= *field;
+  copy->str= str;
+  copy->length= length;
+  copy->type= 0;
+  copy->field= 0;
+  copy->referenced_field_no= 0;
+  (*field)++;
+  return length;    
+}
+
+
+/* 
+  Fill in the descriptors of table data fields associated with a join cache    
+
+  SYNOPSIS
+    add_table_data_fields_to_join_cache()
+      tab              descriptors of fields from this table are to be filled
+      field_set        descriptors for only these fields are to be created
+      field_cnt IN/OUT     counter of data fields  
+      descr  IN/OUT        pointer to the first descriptor to be filled
+      field_ptr_cnt IN/OUT counter of pointers to the data fields
+      descr_ptr IN/OUT     pointer to the first pointer to blob descriptors 
+
+  DESCRIPTION
+    The function fills in the descriptors of cache data fields from the table
+    'tab'. The descriptors are filled only for the fields marked in the 
+    bitmap 'field_set'. 
+    The function fills the descriptors starting from the position pointed
+    by 'descr'. If an added field is of a BLOB type then a pointer to the 
+    its descriptor is added to the array descr_ptr.   
+    At the return 'descr' points to the position after the last added
+    descriptor  while 'descr_ptr' points to the position right after the
+    last added pointer.  
+
+  RETURN
+    the total length of the added fields  
+*/
+
+static
+uint add_table_data_fields_to_join_cache(JOIN_TAB *tab, 
+                                         MY_BITMAP *field_set,
+                                         uint *field_cnt, 
+                                         CACHE_FIELD **descr,
+                                         uint *field_ptr_cnt,
+                                         CACHE_FIELD ***descr_ptr)
+{
+  Field **fld_ptr;
+  uint len= 0;
+  CACHE_FIELD *copy= *descr;
+  CACHE_FIELD **copy_ptr= *descr_ptr;
+  uint used_fields= bitmap_bits_set(field_set);
+  for (fld_ptr= tab->table->field; used_fields; fld_ptr++)
+  {
+    if (bitmap_is_set(field_set, (*fld_ptr)->field_index))
+    {
+      len+= (*fld_ptr)->fill_cache_field(copy);
+      if (copy->type == CACHE_BLOB)
+      {
+        *copy_ptr= copy;
+        copy_ptr++;
+        (*field_ptr_cnt)++;
+      }
+      copy->field= *fld_ptr;
+      copy->referenced_field_no= 0;
+      copy++;
+      (*field_cnt)++;
+      used_fields--;
+    }
+  }
+  *descr= copy;
+  *descr_ptr= copy_ptr;
+  return len;
+}
+    
+
+/* 
+  Determine different counters of fields associated with a record in the cache  
+
+  SYNOPSIS
+    calc_record_fields()
+
+  DESCRIPTION
+    The function counts the number of total fields stored in a record
+    of the cache and saves this number in the 'fields' member. It also
+    determines the number of flag fields and the number of blobs.
+    The function sets 'with_match_flag' on if 'join_tab' needs a match flag
+    i.e. if it is the first inner table of an outer join or a semi-join.  
+
+  RETURN
+    none 
+*/
+
+void JOIN_CACHE::calc_record_fields()
+{
+  JOIN_TAB *tab = prev_cache ? prev_cache->join_tab :
+                               join->join_tab+join->const_tables;
+  tables= join_tab-tab;
+
+  fields= 0;
+  blobs= 0;
+  flag_fields= 0;
+  data_field_count= 0;
+  data_field_ptr_count= 0;
+  referenced_fields= 0;
+
+  for ( ; tab < join_tab ; tab++)
+  {	    
+    calc_used_field_length(join->thd, tab);
+    flag_fields+= test(tab->used_null_fields || tab->used_uneven_bit_fields);
+    flag_fields+= test(tab->table->maybe_null);
+    fields+= tab->used_fields;
+    blobs+= tab->used_blobs;
+
+    fields+= tab->check_rowid_field();
+  }
+  if ((with_match_flag= join_tab->use_match_flag()))
+    flag_fields++;
+  fields+= flag_fields;
+}
+
+/* 
+  Allocate memory for descriptors and pointers to them associated with the cache  
+
+  SYNOPSIS
+    alloc_fields()
+
+  DESCRIPTION
+    The function allocates memory for the array of fields descriptors
+    and the array of pointers to the field descriptors used to copy
+    join record data from record buffers into the join buffer and
+    backward. Some pointers refer to the field descriptor associated
+    with previous caches. They are placed at the beginning of the
+    array of pointers and its total number is specified by the parameter
+    'external fields'.
+    The pointer of the first array is assigned to field_descr and the 
+    number of elements is precalculated by the function calc_record_fields. 
+    The allocated arrays are adjacent.
+  
+  NOTES
+    The memory is allocated in join->thd->memroot
+
+  RETURN
+    pointer to the first array  
+*/
+
+int JOIN_CACHE::alloc_fields(uint external_fields)
+{
+  uint ptr_cnt= external_fields+blobs+1;
+  uint fields_size= sizeof(CACHE_FIELD)*fields;
+  field_descr= (CACHE_FIELD*) sql_alloc(fields_size +
+                                        sizeof(CACHE_FIELD*)*ptr_cnt);
+  blob_ptr= (CACHE_FIELD **) ((uchar *) field_descr + fields_size);
+  return (field_descr == NULL);
+}  
+
+/* 
+  Create descriptors of the record flag fields stored in the join buffer 
+
+  SYNOPSIS
+    create_flag_fields()
+
+  DESCRIPTION
+    The function creates descriptors of the record flag fields stored
+    in the join buffer. These are descriptors for:
+    - an optional match flag field,
+    - table null bitmap fields, 
+    - table null row fields.
+    The match flag field is created when 'join_tab' is the first inner
+    table of an outer join our a semi-join. A null bitmap field is
+    created for any table whose fields are to be stored in the join
+    buffer if at least one of these fields is nullable or is a BIT field
+    whose bits are partially stored with null bits. A null row flag
+    is created for any table assigned to the cache if it is an inner
+    table of an outer join.
+    The descriptor for flag fields are placed one after another at the
+    beginning of the array of field descriptors 'field_descr' that
+    contains 'fields' elements. If there is a match flag field the 
+    descriptor for it is always first in the sequence of flag fields.
+    The descriptors for other flag fields can follow in an arbitrary
+    order. 
+    The flag field values follow in a record stored in the join buffer
+    in the same order as field descriptors, with the match flag always
+    following first.
+    The function sets the value of 'flag_fields' to the total number
+    of the descriptors created for the flag fields.
+    The function sets the value of 'length' to the total length of the
+    flag fields.
+  
+  RETURN
+    none
+*/
+
+void JOIN_CACHE::create_flag_fields()
+{
+  CACHE_FIELD *copy;
+  JOIN_TAB *tab;
+
+  copy= field_descr;
+
+  length=0;
+
+  /* If there is a match flag the first field is always used for this flag */ 
+  if (with_match_flag)
+    length+= add_flag_field_to_join_cache((uchar*) &join_tab->found,
+                                          sizeof(join_tab->found),
+	                                  &copy);
+
+  /* Create fields for all null bitmaps and null row flags that are needed */
+  for (tab= join_tab-tables; tab < join_tab; tab++)
+  {
+    TABLE *table= tab->table;
+
+    /* Create a field for the null bitmap from table if needed */
+    if (tab->used_null_fields || tab->used_uneven_bit_fields)			    
+      length+= add_flag_field_to_join_cache(table->null_flags,
+                                            table->s->null_bytes,
+                                            &copy);
+ 
+    /* Create table for the null row flag if needed */
+    if (table->maybe_null)
+      length+= add_flag_field_to_join_cache((uchar*) &table->null_row,
+                                            sizeof(table->null_row),
+                                            &copy);
+  }
+
+  /* Theoretically the new value of flag_fields can be less than the old one */   
+  flag_fields= copy-field_descr;
+}
+
+
+/* 
+  Create descriptors of all remaining data fields stored in the join buffer    
+
+  SYNOPSIS
+    create_remaining_fields()
+      all_read_fields   indicates that descriptors for all read data fields
+                        are to be created
+
+  DESCRIPTION
+    The function creates descriptors for all remaining data fields of a
+    record from the join buffer. If the parameter 'all_read_fields' is
+    true the function creates fields for all read record fields that
+    comprise the partial join record joined with join_tab. Otherwise, 
+    for each table tab, the set of the read fields for which the descriptors
+    have to be added is determined as the difference between all read fields
+    and and those for which the descriptors have been already created.
+    The latter are supposed to be marked in the bitmap tab->table->tmp_set.
+    The function increases the value of 'length' to the the total length of
+    the added fields.
+   
+  NOTES
+    If 'all_read_fields' is false the function modifies the value of
+    tab->table->tmp_set for a each table whose fields are stored in the cache.
+    The function calls the method Field::fill_cache_field to figure out
+    the type of the cache field and the maximal length of its representation
+    in the join buffer. If this is a blob field then additionally a pointer
+    to this field is added as an element of the array blob_ptr. For a blob
+    field only the size of the length of the blob data is taken into account.
+    It is assumed that 'data_field_count' contains the number of descriptors
+    for data fields that have been already created and 'data_field_ptr_count'
+    contains the number of the pointers to such descriptors having been
+    stored up to the moment.
+
+  RETURN
+    none 
+*/
+
+void JOIN_CACHE:: create_remaining_fields(bool all_read_fields)
+{
+  JOIN_TAB *tab;
+  CACHE_FIELD *copy= field_descr+flag_fields+data_field_count;
+  CACHE_FIELD **copy_ptr= blob_ptr+data_field_ptr_count;
+
+  for (tab= join_tab-tables; tab < join_tab; tab++)
+  {
+    MY_BITMAP *rem_field_set;
+    TABLE *table= tab->table;
+
+    if (all_read_fields)
+      rem_field_set= table->read_set;
+    else
+    {
+      bitmap_invert(&table->tmp_set);
+      bitmap_intersect(&table->tmp_set, table->read_set);
+      rem_field_set= &table->tmp_set;
+    }  
+
+    length+= add_table_data_fields_to_join_cache(tab, rem_field_set,
+                                                 &data_field_count, &copy,
+                                                 &data_field_ptr_count,
+                                                 &copy_ptr);
+  
+    /* SemiJoinDuplicateElimination: allocate space for rowid if needed */
+    if (tab->keep_current_rowid)
+    {
+      copy->str= table->file->ref;
+      copy->length= table->file->ref_length;
+      copy->type= 0;
+      copy->field= 0;
+      copy->referenced_field_no= 0;
+      length+= copy->length;
+      data_field_count++;
+      copy++;
+    }
+  }
+}
+
+
+/* 
+  Calculate and set all cache constants      
+
+  SYNOPSIS
+    set_constants()
+
+  DESCRIPTION
+    The function calculates and set all precomputed constants that are used
+    when writing records into the join buffer and reading them from it.
+    It calculates the size of offsets of a record within the join buffer
+    and of a field within a record. It also calculates the number of bytes
+    used to store record lengths.
+    The function also calculates the maximal length of the representation
+    of record in the cache excluding blob_data. This value is used when
+    making a dicision whether more records should be added into the join
+    buffer or not.
+  
+  RETURN
+    none 
+*/
+
+void JOIN_CACHE::set_constants()
+{ 
+  /* 
+    Any record from a BKA cache is prepended with the record length.
+    We use the record length when reading the buffer and building key values
+    for each record. The length allows us not to read the fields that are
+    not needed for keys.
+    If a record has match flag it also may be skipped when the match flag
+    is on. It happens if the cache is used for a semi-join operation or
+    for outer join when the 'not exist' optimization can be applied.
+    If some of the fields are referenced from other caches then
+    the record length allows us to easily reach the saved offsets for
+    these fields since the offsets are stored at the very end of the record.
+    However at this moment we don't know whether we have referenced fields for
+    the cache or not. Later when a referenced field is registered for the cache
+    we adjust the value of the flag 'with_length'.
+  */ 
+  with_length= is_key_access() || 
+               join_tab->is_inner_table_of_semi_join_with_first_match() ||
+               join_tab->is_inner_table_of_outer_join();
+  /* 
+     At this moment we don't know yet the value of 'referenced_fields',
+     but in any case it can't be greater than the value of 'fields'.
+  */
+  uint len= length + fields*sizeof(uint)+blobs*sizeof(uchar *) +
+            (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) +
+            sizeof(ulong);
+  buff_size= max(join->thd->variables.join_buff_size, 2*len);
+  size_of_rec_ofs= offset_size(buff_size);
+  size_of_rec_len= blobs ? size_of_rec_ofs : offset_size(len); 
+  size_of_fld_ofs= size_of_rec_len;
+  /* 
+    The size of the offsets for referenced fields will be added later.
+    The values of 'pack_length' and 'pack_length_with_blob_ptrs' are adjusted
+    every time when the first reference to the referenced field is registered.
+  */
+  pack_length= (with_length ? size_of_rec_len : 0) +
+               (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) + 
+               length;
+  pack_length_with_blob_ptrs= pack_length + blobs*sizeof(uchar *);
+}
+
+
+/* 
+  Allocate memory for a join buffer      
+
+  SYNOPSIS
+    alloc_buffer()
+
+  DESCRIPTION
+    The function allocates a lump of memory for the cache join buffer. The
+    size of the allocated memory is 'buff_size' bytes. 
+  
+  RETURN
+    0 - if the memory has been successfully allocated
+    1 - otherwise
+*/
+
+int JOIN_CACHE::alloc_buffer()
+{
+  buff= (uchar*) my_malloc(buff_size, MYF(0));
+  return buff == NULL;
+}    	
+  
+
+/* 
+  Initialize a BNL cache       
+
+  SYNOPSIS
+    init()
+
+  DESCRIPTION
+    The function initializes the cache structure. It supposed to be called
+    right after a constructor for the JOIN_CACHE_BNL.
+    The function allocates memory for the join buffer and for descriptors of
+    the record fields stored in the buffer.
+
+  NOTES
+    The code of this function should have been included into the constructor
+    code itself. However the new operator for the class JOIN_CACHE_BNL would
+    never fail while memory allocation for the join buffer is not absolutely
+    unlikely to fail. That's why this memory allocation has to be placed in a
+    separate function that is called in a couple with a cache constructor.
+    It is quite natural to put almost all other constructor actions into
+    this function.     
+  
+  RETURN
+    0   initialization with buffer allocations has been succeeded
+    1   otherwise
+*/
+
+int JOIN_CACHE_BNL::init()
+{
+  DBUG_ENTER("JOIN_CACHE::init");
+
+  calc_record_fields();
+
+  if (alloc_fields(0))
+    DBUG_RETURN(1);
+
+  create_flag_fields();
+  
+  create_remaining_fields(TRUE);
+
+  set_constants();
+
+  if (alloc_buffer())
+    DBUG_RETURN(1); 
+  
+  reset(TRUE); 
+
+  DBUG_RETURN(0);
+}
+
+
+/* 
+  Initialize a BKA cache       
+
+  SYNOPSIS
+    init()
+
+  DESCRIPTION
+    The function initializes the cache structure. It supposed to be called
+    right after a constructor for the JOIN_CACHE_BKA.
+    The function allocates memory for the join buffer and for descriptors of
+    the record fields stored in the buffer.
+
+  NOTES
+    The code of this function should have been included into the constructor
+    code itself. However the new operator for the class JOIN_CACHE_BKA would
+    never fail while memory allocation for the join buffer is not absolutely
+    unlikely to fail. That's why this memory allocation has to be placed in a
+    separate function that is called in a couple with a cache constructor.
+    It is quite natural to put almost all other constructor actions into
+    this function.     
+  
+  RETURN
+    0   initialization with buffer allocations has been succeeded
+    1   otherwise
+*/
+
+int JOIN_CACHE_BKA::init()
+{
+  JOIN_TAB *tab;
+  JOIN_CACHE *cache;
+  local_key_arg_fields= 0;
+  external_key_arg_fields= 0;
+  DBUG_ENTER("JOIN_CACHE_BKA::init");
+
+  calc_record_fields();
+
+  /* Mark all fields that can be used as arguments for this key access */
+  TABLE_REF *ref= &join_tab->ref;
+  cache= this;
+  do
+  {
+    /* 
+      Traverse the ref expressions and find the occurrences of fields in them for
+      each table 'tab' whose fields are to be stored in the 'cache' join buffer.
+      Mark these fields in the bitmap tab->table->tmp_set.
+      For these fields count the number of them stored in this cache and the
+      total number of them stored in the previous caches. Save the result
+      of the counting 'in local_key_arg_fields' and 'external_key_arg_fields'
+      respectively.
+    */ 
+    for (tab= cache->join_tab-cache->tables; tab < cache->join_tab ; tab++)
+    { 
+      uint key_args;
+      bitmap_clear_all(&tab->table->tmp_set);
+      for (uint i= 0; i < ref->key_parts; i++)
+      {
+        Item *ref_item= ref->items[i]; 
+        if (!(tab->table->map & ref_item->used_tables()))
+	  continue;
+	 ref_item->walk(&Item::add_field_to_set_processor, 1,
+                        (uchar *) tab->table);
+      }
+      if ((key_args= bitmap_bits_set(&tab->table->tmp_set)))
+      {
+        if (cache == this)
+          local_key_arg_fields+= key_args;
+        else
+          external_key_arg_fields+= key_args;
+      }
+    }
+    cache= cache->prev_cache;
+  } 
+  while (cache);
+
+  if (alloc_fields(external_key_arg_fields))
+    DBUG_RETURN(1);
+
+  create_flag_fields();
+  
+  /* 
+    Save pointers to the cache fields in previous caches
+    that  are used to build keys for this key access.
+  */
+  cache= this;
+  uint ext_key_arg_cnt= external_key_arg_fields;
+  CACHE_FIELD *copy;
+  CACHE_FIELD **copy_ptr= blob_ptr;
+  while (ext_key_arg_cnt)
+  {
+    cache= cache->prev_cache;
+    for (tab= cache->join_tab-cache->tables; tab < cache->join_tab ; tab++)
+    { 
+      CACHE_FIELD *copy_end;
+      MY_BITMAP *key_read_set= &tab->table->tmp_set;
+      /* key_read_set contains the bitmap of tab's fields referenced by ref */ 
+      if (bitmap_is_clear_all(key_read_set))
+        continue;
+      copy_end= cache->field_descr+cache->fields;
+      for (copy= cache->field_descr+cache->flag_fields; copy < copy_end; copy++)
+      {
+        /*
+          (1) - when we store rowids for DuplicateWeedout, they have
+                copy->field==NULL
+        */
+        if (copy->field &&  // (1)
+            copy->field->table == tab->table &&
+            bitmap_is_set(key_read_set, copy->field->field_index))
+        {
+          *copy_ptr++= copy; 
+          ext_key_arg_cnt--;
+          if (!copy->referenced_field_no)
+          {
+            /* 
+              Register the referenced field 'copy': 
+              - set the offset number in copy->referenced_field_no,
+              - adjust the value of the flag 'with_length',
+              - adjust the values of 'pack_length' and 
+                of 'pack_length_with_blob_ptrs'.
+	    */
+            copy->referenced_field_no= ++cache->referenced_fields;
+            cache->with_length= TRUE;
+	    cache->pack_length+= cache->get_size_of_fld_offset();
+            cache->pack_length_with_blob_ptrs+= cache->get_size_of_fld_offset();
+          }        
+        }
+      }
+    } 
+  }
+  /* After this 'blob_ptr' shall not be be changed */ 
+  blob_ptr= copy_ptr;
+  
+  /* Now create local fields that are used to build ref for this key access */
+  copy= field_descr+flag_fields;
+  for (tab= join_tab-tables; tab < join_tab ; tab++)
+  {
+    length+= add_table_data_fields_to_join_cache(tab, &tab->table->tmp_set,
+                                                 &data_field_count, &copy,
+                                                 &data_field_ptr_count, 
+                                                 &copy_ptr);
+  }
+
+  use_emb_key= check_emb_key_usage();
+
+  create_remaining_fields(FALSE);
+
+  set_constants();
+
+  if (alloc_buffer())
+    DBUG_RETURN(1); 
+
+  reset(TRUE);
+
+  DBUG_RETURN(0);
+}  
+
+
+/* 
+  Check the possibility to read the access keys directly from the join buffer       
+
+  SYNOPSIS
+    check_emb_key_usage()
+
+  DESCRIPTION
+    The function checks some conditions at which the key values can be read
+    directly from the join buffer. This is possible when the key values can be
+    composed by concatenation of the record fields stored in the join buffer.
+    Sometimes when the access key is multi-component the function has to re-order
+    the fields written into the join buffer to make keys embedded. If key 
+    values for the key access are detected as embedded then 'use_emb_key'
+    is set to TRUE.
+
+  EXAMPLE
+    Let table t2 has an index defined on the columns a,b . Let's assume also
+    that the columns t2.a, t2.b as well as the columns t1.a, t1.b are all
+    of the integer type. Then if the query
+      SELECT COUNT(*) FROM t1, t2 WHERE t1.a=t2.a and t1.b=t2.b  
+    is executed with a join cache in such a way that t1 is the driving
+    table then the key values to access table t2 can be read directly
+    from the join buffer.
+  
+  NOTES
+    In some cases key values could be read directly from the join buffer but
+    we still do not consider them embedded. In the future we'll expand the
+    the class of keys which we identify as embedded.
+
+  RETURN
+    TRUE  - key values will be considered as embedded,
+    FALSE - otherwise.
+*/
+
+bool JOIN_CACHE_BKA::check_emb_key_usage()
+{
+  uint i;
+  Item *item; 
+  KEY_PART_INFO *key_part;
+  CACHE_FIELD *copy;
+  CACHE_FIELD *copy_end;
+  uint len= 0;
+  TABLE *table= join_tab->table;
+  TABLE_REF *ref= &join_tab->ref;
+  KEY *keyinfo= table->key_info+ref->key;
+
+  /* 
+    If some of the key arguments are not from the local cache the key
+    is not considered as embedded.
+    TODO:
+    Expand it to the case when ref->key_parts=1 and local_key_arg_fields=0.
+  */  
+  if (external_key_arg_fields != 0)
+    return FALSE;
+  /* 
+    If the number of the local key arguments is not equal to the number
+    of key parts the key value cannot be read directly from the join buffer.   
+  */
+  if (local_key_arg_fields != ref->key_parts)
+    return FALSE;
+
+  /* 
+    A key is not considered embedded if one of the following is true:
+    - one of its key parts is not equal to a field
+    - it is a partial key
+    - definition of the argument field does not coincide with the
+      definition of the corresponding key component
+    - some of the key components are nullable
+  */  
+  for (i=0; i < ref->key_parts; i++)
+  {
+    item= ref->items[i]->real_item();
+    if (item->type() != Item::FIELD_ITEM)
+      return FALSE;
+    key_part= keyinfo->key_part+i;
+    if (key_part->key_part_flag & HA_PART_KEY_SEG)
+      return FALSE;
+    if (!key_part->field->eq_def(((Item_field *) item)->field))
+      return FALSE;
+    if (key_part->field->maybe_null())
+      return FALSE;
+  }
+  
+  copy= field_descr+flag_fields;
+  copy_end= copy+local_key_arg_fields;
+  for ( ; copy < copy_end; copy++)
+  {
+    /* 
+      If some of the key arguments are of variable length the key
+      is not considered as embedded.
+    */
+    if (copy->type != 0)
+      return FALSE;
+    /* 
+      If some of the key arguments are bit fields whose bits are partially
+      stored with null bits the key is not considered as embedded.
+    */
+    if (copy->field->type() == MYSQL_TYPE_BIT &&
+	 ((Field_bit*) (copy->field))->bit_len)
+      return FALSE;
+    len+= copy->length;
+  }
+
+  emb_key_length= len;
+
+  /* 
+    Make sure that key fields follow the order of the corresponding
+    key components these fields are equal to. For this the descriptors
+    of the fields that comprise the key might be re-ordered.
+  */
+  for (i= 0; i < ref->key_parts; i++)
+  {
+    uint j;
+    Item *item= ref->items[i]->real_item();
+    Field *fld= ((Item_field *) item)->field;
+    CACHE_FIELD *init_copy= field_descr+flag_fields+i; 
+    for (j= i, copy= init_copy; i < local_key_arg_fields;  i++, copy++)
+    {
+      if (fld->eq(copy->field))
+      {
+        if (j != i)
+        {
+          CACHE_FIELD key_part_copy= *copy;
+          *copy= *init_copy;
+          *init_copy= key_part_copy;
+        }
+        break;
+      }
+    }
+  }
+
+  return TRUE;
+}    
+
+
+/* 
+  Calculate the increment of the MRR buffer for a record write       
+
+  SYNOPSIS
+    aux_buffer_incr()
+
+  DESCRIPTION
+    This implementation of the virtual function aux_buffer_incr determines
+    for how much the size of the MRR buffer should be increased when another
+    record is added to the cache.   
+
+  RETURN
+    the increment of the size of the MRR buffer for the next record
+*/
+
+uint JOIN_CACHE_BKA::aux_buffer_incr()
+{
+  uint incr= 0;
+  TABLE_REF *ref= &join_tab->ref;
+  TABLE *tab= join_tab->table;
+  uint rec_per_key= tab->key_info[ref->key].rec_per_key[ref->key_parts-1];
+  set_if_bigger(rec_per_key, 1);
+  if (records == 1)
+    incr=  ref->key_length + tab->file->ref_length;
+  incr+= tab->file->stats.mrr_length_per_rec * rec_per_key;
+  return incr; 
+}
+
+
+/*
+  Check if the record combination matches the index condition
+
+  SYNOPSIS
+    JOIN_CACHE_BKA::skip_index_tuple()
+      rseq             Value returned by bka_range_seq_init()
+      range_info       MRR range association data
+    
+  DESCRIPTION
+    This function is invoked from MRR implementation to check if an index
+    tuple matches the index condition. It is used in the case where the index
+    condition actually depends on both columns of the used index and columns
+    from previous tables.
+    
+    Accessing columns of the previous tables requires special handling with
+    BKA. The idea of BKA is to collect record combinations in a buffer and 
+    then do a batch of ref access lookups, i.e. by the time we're doing a
+    lookup its previous-records-combination is not in prev_table->record[0]
+    but somewhere in the join buffer.
+    
+    We need to get it from there back into prev_table(s)->record[0] before we
+    can evaluate the index condition, and that's why we need this function
+    instead of regular IndexConditionPushdown.
+
+  NOTE
+    Possible optimization:
+    Before we unpack the record from a previous table
+    check if this table is used in the condition.
+    If so then unpack the record otherwise skip the unpacking.
+    This should be done by a special virtual method
+    get_partial_record_by_pos().
+
+  RETURN
+    0    The record combination satisfies the index condition
+    1    Otherwise
+*/
+
+bool JOIN_CACHE_BKA::skip_index_tuple(range_seq_t rseq, char *range_info)
+{
+  DBUG_ENTER("JOIN_CACHE_BKA::skip_index_tuple");
+  JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
+  cache->get_record_by_pos((uchar*)range_info);
+  DBUG_RETURN(!join_tab->cache_idx_cond->val_int());
+}
+
+
+/*
+  Check if the record combination matches the index condition
+
+  SYNOPSIS
+    bka_skip_index_tuple()
+      rseq             Value returned by bka_range_seq_init()
+      range_info       MRR range association data
+    
+  DESCRIPTION
+    This is wrapper for JOIN_CACHE_BKA::skip_index_tuple method,
+    see comments there.
+
+  NOTE
+    This function is used as a RANGE_SEQ_IF::skip_index_tuple callback.
+ 
+  RETURN
+    0    The record combination satisfies the index condition
+    1    Otherwise
+*/
+
+static 
+bool bka_skip_index_tuple(range_seq_t rseq, char *range_info)
+{
+  DBUG_ENTER("bka_skip_index_tuple");
+  JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
+  DBUG_RETURN(cache->skip_index_tuple(rseq, range_info));
+}
+
+
+/* 
+  Write record fields and their required offsets into the join cache buffer
+
+  SYNOPSIS
+    write_record_data()
+      link        a reference to the associated info in the previous cache
+      is_full OUT true if it has been decided that no more records will be
+                  added to the join buffer
+
+  DESCRIPTION
+    This function put into the cache buffer the following info that it reads
+    from the join record buffers or computes somehow:
+    (1) the length of all fields written for the record (optional)
+    (2) an offset to the associated info in the previous cache (if there is any)
+        determined by the link parameter
+    (3) all flag fields of the tables whose data field are put into the cache:
+        - match flag (optional),
+        - null bitmaps for all tables,
+        - null row flags for all tables
+    (4) values of all data fields including
+        - full images of those fixed legth data fields that cannot have 
+          trailing spaces
+        - significant part of fixed length fields that can have trailing spaces
+          with the prepanded length 
+        - data of non-blob variable length fields with the prepanded data length  
+        - blob data from blob fields with the prepanded data length
+    (5) record offset values for the data fields that are referred to from 
+        other caches
+ 
+    The record is written at the current position stored in the field 'pos'.
+    At the end of the function 'pos' points at the position right after the 
+    written record data.
+    The function increments the number of records in the cache that is stored
+    in the 'records' field by 1. The function also modifies the values of
+    'curr_rec_pos' and 'last_rec_pos' to point to the written record.
+    The 'end_pos' cursor is modified accordingly.
+    The 'last_rec_blob_data_is_in_rec_buff' is set on if the blob data 
+    remains in the record buffers and not copied to the join buffer. It may
+    happen only to the blob data from the last record added into the cache.
+   
+    
+  RETURN
+    length of the written record data
+*/
+
+uint JOIN_CACHE::write_record_data(uchar * link, bool *is_full)
+{
+  uint len;
+  bool last_record;
+  CACHE_FIELD *copy;
+  CACHE_FIELD *copy_end;
+  uchar *cp= pos;
+  uchar *init_pos= cp;
+  uchar *rec_len_ptr= 0;
+ 
+  records++;  /* Increment the counter of records in the cache */
+
+  len= pack_length;
+
+  /* Make an adjustment for the size of the auxiliary buffer if there is any */
+  uint incr= aux_buffer_incr();
+  ulong rem= rem_space();
+  aux_buff_size+= len+incr < rem ? incr : rem;
+
+  /*
+    For each blob to be put into cache save its length and a pointer
+    to the value in the corresponding element of the blob_ptr array.
+    Blobs with null values are skipped.
+    Increment 'len' by the total length of all these blobs. 
+  */    
+  if (blobs)
+  {
+    CACHE_FIELD **copy_ptr= blob_ptr;
+    CACHE_FIELD **copy_ptr_end= copy_ptr+blobs;
+    for ( ; copy_ptr < copy_ptr_end; copy_ptr++)
+    {
+      Field_blob *blob_field= (Field_blob *) (*copy_ptr)->field;
+      if (!blob_field->is_null())
+      {
+        uint blob_len= blob_field->get_length();
+        (*copy_ptr)->blob_length= blob_len;
+        len+= blob_len;
+        blob_field->get_ptr(&(*copy_ptr)->str);
+      }
+    }
+  }
+
+  /*
+    Check whether we won't be able to add any new record into the cache after
+    this one because the cache will be full. Set last_record to TRUE if it's so.
+    The assume that the cache will be full after the record has been written
+    into it if either the remaining space of the cache is not big enough for the 
+    record's blob values or if there is a chance that not all non-blob fields
+    of the next record can be placed there.
+    This function is called only in the case when there is enough space left in
+    the cache to store at least non-blob parts of the current record.
+  */
+  last_record= (len+pack_length_with_blob_ptrs) > rem_space();
+  
+  /* 
+    Save the position for the length of the record in the cache if it's needed.
+    The length of the record will be inserted here when all fields of the record
+    are put into the cache.  
+  */
+  if (with_length)
+  {
+    rec_len_ptr= cp;   
+    cp+= size_of_rec_len;
+  }
+
+  /*
+    Put a reference to the fields of the record that are stored in the previous
+    cache if there is any. This reference is passed by the 'link' parameter.     
+  */
+  if (prev_cache)
+  {
+    cp+= prev_cache->get_size_of_rec_offset();
+    prev_cache->store_rec_ref(cp, link);
+  } 
+
+  curr_rec_pos= cp;
+  
+  /* If the there is a match flag set its value to 0 */
+  copy= field_descr;
+  if (with_match_flag)
+    *copy[0].str= 0;
+
+  /* First put into the cache the values of all flag fields */
+  copy_end= field_descr+flag_fields;
+  for ( ; copy < copy_end; copy++)
+  {
+    memcpy(cp, copy->str, copy->length);
+    cp+= copy->length;
+  } 
+  
+  /* Now put the values of the remaining fields as soon as they are not nulls */ 
+  copy_end= field_descr+fields;
+  for ( ; copy < copy_end; copy++)
+  {
+    Field *field= copy->field;
+    if (field && field->maybe_null() && field->is_null())
+    {
+      /* Do not copy a field if its value is null */
+      if (copy->referenced_field_no)
+        copy->offset= 0;
+      continue;              
+    }
+    /* Save the offset of the field to put it later at the end of the record */ 
+    if (copy->referenced_field_no)
+      copy->offset= cp-curr_rec_pos;
+
+    if (copy->type == CACHE_BLOB)
+    {
+      Field_blob *blob_field= (Field_blob *) copy->field;
+      if (last_record)
+      {
+        last_rec_blob_data_is_in_rec_buff= 1;
+        /* Put down the length of the blob and the pointer to the data */  
+	blob_field->get_image(cp, copy->length+sizeof(char*),
+                              blob_field->charset());
+	cp+= copy->length+sizeof(char*);
+      }
+      else
+      {
+        /* First put down the length of the blob and then copy the data */ 
+	blob_field->get_image(cp, copy->length, 
+			      blob_field->charset());
+	memcpy(cp+copy->length, copy->str, copy->blob_length);               
+	cp+= copy->length+copy->blob_length;
+      }
+    }
+    else
+    {
+      switch (copy->type) {
+      case CACHE_VARSTR1:
+        /* Copy the significant part of the short varstring field */ 
+        len= (uint) copy->str[0] + 1;
+        memcpy(cp, copy->str, len);
+        cp+= len;
+        break;
+      case CACHE_VARSTR2:
+        /* Copy the significant part of the long varstring field */
+        len= uint2korr(copy->str) + 2;
+        memcpy(cp, copy->str, len);
+        cp+= len;
+        break;
+      case CACHE_STRIPPED:
+      {
+        /* 
+          Put down the field value stripping all trailing spaces off.
+          After this insert the length of the written sequence of bytes.
+        */ 
+	uchar *str, *end;
+	for (str= copy->str, end= str+copy->length;
+	     end > str && end[-1] == ' ';
+	     end--) ;
+	len=(uint) (end-str);
+        int2store(cp, len);
+	memcpy(cp+2, str, len);
+	cp+= len+2;
+        break;
+      }
+      default:      
+        /* Copy the entire image of the field from the record buffer */
+	memcpy(cp, copy->str, copy->length);
+	cp+= copy->length;
+      }
+    }
+  }
+  
+  /* Add the offsets of the fields that are referenced from other caches */ 
+  if (referenced_fields)
+  {
+    uint cnt= 0;
+    for (copy= field_descr+flag_fields; copy < copy_end ; copy++)
+    {
+      if (copy->referenced_field_no)
+      {
+        store_fld_offset(cp+size_of_fld_ofs*(copy->referenced_field_no-1),
+                         copy->offset);
+        cnt++;
+      }
+    }
+    cp+= size_of_fld_ofs*cnt;
+  }
+
+  if (rec_len_ptr)
+    store_rec_length(rec_len_ptr, (ulong) (cp-rec_len_ptr-size_of_rec_len));
+  last_rec_pos= curr_rec_pos; 
+  end_pos= pos= cp;
+  *is_full= last_record;
+  return (uint) (cp-init_pos);
+}
+
+
+/* 
+  Reset the join buffer for reading/writing: default implementation
+
+  SYNOPSIS
+    reset()
+      for_writing  if it's TRUE the function reset the buffer for writing
+
+  DESCRIPTION
+    This default implementation of the virtual function reset() resets 
+    the join buffer for reading or writing.
+    If the buffer is reset for reading only the 'pos' value is reset
+    to point to the very beginning of the join buffer. If the buffer is
+    reset for writing additionally: 
+    - the counter of the records in the buffer is set to 0,
+    - the the value of 'last_rec_pos' gets pointing at the position just
+      before the buffer, 
+    - 'end_pos' is set to point to the beginning of the join buffer,
+    - the size of the auxiliary buffer is reset to 0,
+    - the flag 'last_rec_blob_data_is_in_rec_buff' is set to 0.
+    
+  RETURN
+    none
+*/
+
+void JOIN_CACHE::reset(bool for_writing)
+{
+  pos= buff;
+  curr_rec_link= 0;
+  if (for_writing)
+  {
+    records= 0;
+    last_rec_pos= buff;
+    aux_buff_size= 0;
+    end_pos= pos;
+    last_rec_blob_data_is_in_rec_buff= 0;
+  }
+}
+
+/* 
+  Add a record into the join buffer: the default implementation
+
+  SYNOPSIS
+    put_record()
+
+  DESCRIPTION
+    This default implementation of the virtual function put_record writes
+    the next matching record into the join buffer.
+    It also links the record having been written into the join buffer with
+    the matched record in the previous cache if there is any.
+    The implementation assumes that the function get_curr_link() 
+    will return exactly the pointer to this matched record.
+
+  RETURN
+    TRUE    if it has been decided that it should be the last record
+            in the join buffer,
+    FALSE   otherwise
+*/
+
+bool JOIN_CACHE::put_record()
+{
+  bool is_full;
+  uchar *link= 0;
+  if (prev_cache)
+    link= prev_cache->get_curr_rec_link();
+  write_record_data(link, &is_full);
+  return is_full;
+}
+  
+
+/* 
+  Read the next record from the join buffer: the default implementation
+
+  SYNOPSIS
+    get_record()
+
+  DESCRIPTION
+    This default implementation of the virtual function get_record
+    reads fields of the next record from the join buffer of this cache.
+    The function also reads all other fields associated with this record
+    from the the join buffers of the previous caches. The fields are read
+    into the corresponding record buffers.
+    It is supposed that 'pos' points to the position in the buffer 
+    right after the previous record when the function is called.
+    When the function returns the 'pos' values is updated to point
+    to the position after the read record.
+    The value of 'curr_rec_pos' is also updated by the function to
+    point to the beginning of the first field of the record in the
+    join buffer.    
+
+  RETURN
+    TRUE  - there are no more records to read from the join buffer
+    FALSE - otherwise
+*/
+
+bool JOIN_CACHE::get_record()
+{ 
+  bool res;
+  uchar *prev_rec_ptr= 0;
+  if (with_length)
+    pos+= size_of_rec_len;
+  if (prev_cache)
+  {
+    pos+= prev_cache->get_size_of_rec_offset();
+    prev_rec_ptr= prev_cache->get_rec_ref(pos);
+  }
+  curr_rec_pos= pos;
+  if (!(res= read_all_record_fields() == NO_MORE_RECORDS_IN_BUFFER))
+  {
+    pos+= referenced_fields*size_of_fld_ofs;
+    if (prev_cache)
+      prev_cache->get_record_by_pos(prev_rec_ptr);
+  } 
+  return res; 
+}
+
+
+/* 
+  Read a positioned record from the join buffer: the default implementation
+
+  SYNOPSIS
+    get_record_by_pos()
+      rec_ptr  position of the first field of the record in the join buffer
+
+  DESCRIPTION
+    This default implementation of the virtual function get_record_pos
+    reads the fields of the record positioned at 'rec_ptr' from the join buffer.
+    The function also reads all other fields associated with this record 
+    from the the join buffers of the previous caches. The fields are read
+    into the corresponding record buffers.
+
+  RETURN
+    none
+*/
+
+void JOIN_CACHE::get_record_by_pos(uchar *rec_ptr)
+{
+  uchar *save_pos= pos;
+  pos= rec_ptr;
+  read_all_record_fields();
+  pos= save_pos;
+  if (prev_cache)
+  {
+    uchar *prev_rec_ptr= prev_cache->get_rec_ref(rec_ptr);
+    prev_cache->get_record_by_pos(prev_rec_ptr);
+  }
+}
+
+
+/* 
+  Test the match flag from the referenced record: the default implementation
+
+  SYNOPSIS
+    get_match_flag_by_pos()
+      rec_ptr  position of the first field of the record in the join buffer
+
+  DESCRIPTION
+    This default implementation of the virtual function get_match_flag_by_pos
+    test the match flag for the record pointed by the reference at the position
+    rec_ptr. If the match flag in placed one of the previous buffers the function
+    first reaches the linked record fields in this buffer.
+
+  RETURN
+    TRUE    if the match flag is set on
+    FALSE   otherwise
+*/
+
+bool JOIN_CACHE::get_match_flag_by_pos(uchar *rec_ptr)
+{
+  if (with_match_flag)
+    return test(*rec_ptr);
+  if (prev_cache)
+  {
+    uchar *prev_rec_ptr= prev_cache->get_rec_ref(rec_ptr);
+    return prev_cache->get_match_flag_by_pos(prev_rec_ptr);
+  } 
+  DBUG_ASSERT(0);
+  return FALSE;
+}
+
+
+/* 
+  Read all flag and data fields of a record from the join buffer
+
+  SYNOPSIS
+    read_all_record_fields()
+
+  DESCRIPTION
+    The function reads all flag and data fields of a record from the join
+    buffer into the corresponding record buffers.
+    The fields are read starting from the position 'pos' which is
+    supposed to point to the beginning og the first record field.
+    The function increments the value of 'pos' by the length of the
+    read data. 
+
+  RETURN
+    (-1) - if there is no more records in the join buffer
+    length of the data read from the join buffer - otherwise
+*/
+
+uint JOIN_CACHE::read_all_record_fields()
+{
+  uchar *init_pos= pos;
+  
+  if (pos > last_rec_pos || !records)
+    return NO_MORE_RECORDS_IN_BUFFER;
+
+  /* First match flag, read null bitmaps and null_row flag for each table */
+  read_flag_fields();
+ 
+  /* Now read the remaining table fields if needed */
+  CACHE_FIELD *copy= field_descr+flag_fields;
+  CACHE_FIELD *copy_end= field_descr+fields;
+  bool blob_in_rec_buff= blob_data_is_in_rec_buff(init_pos);
+  for ( ; copy < copy_end; copy++)
+    read_record_field(copy, blob_in_rec_buff);
+
+  return (uint) (pos-init_pos);
+}
+
+
+/* 
+  Read all flag fields of a record from the join buffer
+
+  SYNOPSIS
+    read_flag_fields()
+
+  DESCRIPTION
+    The function reads all flag fields of a record from the join
+    buffer into the corresponding record buffers.
+    The fields are read starting from the position 'pos'.
+    The function increments the value of 'pos' by the length of the
+    read data. 
+
+  RETURN
+    length of the data read from the join buffer
+*/
+
+uint JOIN_CACHE::read_flag_fields()
+{
+  uchar *init_pos= pos;
+  CACHE_FIELD *copy= field_descr;
+  CACHE_FIELD *copy_end= copy+flag_fields;
+  for ( ; copy < copy_end; copy++)
+  {
+    memcpy(copy->str, pos, copy->length);
+    pos+= copy->length;
+  }
+  return (pos-init_pos);
+}
+
+
+/* 
+  Read a data record field from the join buffer
+
+  SYNOPSIS
+    read_record_field()
+      copy             the descriptor of the data field to be read
+      blob_in_rec_buff indicates whether this is the field from the record
+                       whose blob data are in record buffers
+
+  DESCRIPTION
+    The function reads the data field specified by the parameter copy
+    from the join buffer into the corresponding record buffer. 
+    The field is read starting from the position 'pos'.
+    The data of blob values is not copied from the join buffer.
+    The function increments the value of 'pos' by the length of the
+    read data. 
+
+  RETURN
+    length of the data read from the join buffer
+*/
+
+uint JOIN_CACHE::read_record_field(CACHE_FIELD *copy, bool blob_in_rec_buff)
+{
+  uint len;
+  /* Do not copy the field if its value is null */ 
+  if (copy->field && copy->field->maybe_null() && copy->field->is_null())
+    return 0;           
+  if (copy->type == CACHE_BLOB)
+  {
+    Field_blob *blob_field= (Field_blob *) copy->field;
+    /* 
+      Copy the length and the pointer to data but not the blob data 
+      itself to the record buffer
+    */ 
+    if (blob_in_rec_buff)
+    {
+      blob_field->set_image(pos, copy->length+sizeof(char*),
+			    blob_field->charset());
+      len= copy->length+sizeof(char*);
+    }
+    else
+    {
+      blob_field->set_ptr(pos, pos+copy->length);
+      len= copy->length+blob_field->get_length();
+    }
+  }
+  else
+  {
+    switch (copy->type) {
+    case CACHE_VARSTR1:
+      /* Copy the significant part of the short varstring field */
+      len= (uint) pos[0] + 1;
+      memcpy(copy->str, pos, len);
+      break;
+    case CACHE_VARSTR2:
+      /* Copy the significant part of the long varstring field */
+      len= uint2korr(pos) + 2;
+      memcpy(copy->str, pos, len);
+      break;
+    case CACHE_STRIPPED:
+      /* Pad the value by spaces that has been stripped off */
+      len= uint2korr(pos);
+      memcpy(copy->str, pos+2, len);
+      memset(copy->str+len, ' ', copy->length-len);
+      len+= 2;
+      break;
+    default:
+      /* Copy the entire image of the field from the record buffer */
+      len= copy->length;
+      memcpy(copy->str, pos, len);
+    }
+  }
+  pos+= len;
+  return len;
+}
+
+
+/* 
+  Read a referenced field from the join buffer
+
+  SYNOPSIS
+    read_referenced_field()
+      copy         pointer to the descriptor of the referenced field
+      rec_ptr      pointer to the record that may contain this field
+      len  IN/OUT  total length of the record fields 
+
+  DESCRIPTION
+    The function checks whether copy points to a data field descriptor
+    for this cache object. If it does not then the function returns
+    FALSE. Otherwise the function reads the field of the record in
+    the join buffer pointed by 'rec_ptr' into the corresponding record
+    buffer and returns TRUE.
+    If the value of *len is 0 then the function sets it to the total
+    length of the record fields including possible trailing offset
+    values. Otherwise *len is supposed to provide this value that
+    has been obtained earlier.  
+
+  RETURN
+    TRUE   'copy' points to a data descriptor of this join cache
+    FALSE  otherwise
+*/
+
+bool JOIN_CACHE::read_referenced_field(CACHE_FIELD *copy,
+                                       uchar *rec_ptr, 
+                                       uint *len)
+{
+  uchar *ptr;
+  uint offset;
+  if (copy < field_descr || copy >= field_descr+fields)
+    return FALSE;
+  if (!*len)
+  {
+    /* Get the total length of the record fields */ 
+    uchar *len_ptr= rec_ptr;
+    if (prev_cache)
+      len_ptr-= prev_cache->get_size_of_rec_offset();
+    *len= get_rec_length(len_ptr-size_of_rec_len);
+  }
+  
+  ptr= rec_ptr-(prev_cache ? prev_cache->get_size_of_rec_offset() : 0);  
+  offset= get_fld_offset(ptr+ *len - 
+                         size_of_fld_ofs*
+                         (referenced_fields+1-copy->referenced_field_no));  
+  bool is_null= FALSE;
+  if (offset == 0 && flag_fields)
+    is_null= TRUE;
+  if (is_null)
+    copy->field->set_null();
+  else
+  {
+    uchar *save_pos= pos;
+    copy->field->set_notnull(); 
+    pos= rec_ptr+offset;
+    read_record_field(copy, blob_data_is_in_rec_buff(rec_ptr));
+    pos= save_pos;
+  }
+  return TRUE;
+}
+   
+
+/* 
+  Skip record from join buffer if its match flag is on: default implementation
+
+  SYNOPSIS
+    skip_record_if_match()
+
+  DESCRIPTION
+    This default implementation of the virtual function skip_record_if_match
+    skips the next record from the join buffer if its  match flag is set on.
+    If the record is skipped the value of 'pos' is set to points to the position
+    right after the record.
+
+  RETURN
+    TRUE  - the match flag is on and the record has been skipped
+    FALSE - the match flag is off 
+*/
+
+bool JOIN_CACHE::skip_record_if_match()
+{
+  DBUG_ASSERT(with_length);
+  uint offset= size_of_rec_len;
+  if (prev_cache)
+    offset+= prev_cache->get_size_of_rec_offset();
+  /* Check whether the match flag is on */
+  if (get_match_flag_by_pos(pos+offset))
+  {
+    pos+= size_of_rec_len + get_rec_length(pos);
+    return TRUE;
+  }
+  return FALSE;
+}      
+
+
+/* 
+  Restore the fields of the last record from the join buffer
+ 
+  SYNOPSIS
+    restore_last_record()
+
+  DESCRIPTION
+    This function restore the values of the fields of the last record put
+    into join buffer in record buffers. The values most probably have been
+    overwritten by the field values from other records when they were read
+    from the join buffer into the record buffer in order to check pushdown
+    predicates.
+
+  RETURN
+    none
+*/
+
+void JOIN_CACHE::restore_last_record()
+{
+  if (records)
+    get_record_by_pos(last_rec_pos);
+}
+
+
+/*
+  Join records from the join buffer with records from the next join table    
+
+  SYNOPSIS
+    join_records()
+      skip_last    do not find matches for the last record from the buffer
+
+  DESCRIPTION
+    The functions extends all records from the join buffer by the matched
+    records from join_tab. In the case of outer join operation it also
+    adds null complementing extensions for the records from the join buffer
+    that have no match. 
+    No extensions are generated for the last record from the buffer if
+    skip_last is true.  
+
+  NOTES
+    The function must make sure that if linked join buffers are used then
+    a join buffer cannot be refilled again until all extensions in the
+    buffers chained to this one are generated.
+    Currently an outer join operation with several inner tables always uses
+    at least two linked buffers with the match join flags placed in the
+    first buffer. Any record composed of rows of the inner tables that
+    matches a record in this buffer must refer to the position of the
+    corresponding match flag.
+
+  IMPLEMENTATION
+    When generating extensions for outer tables of an outer join operation
+    first we generate all extensions for those records from the join buffer
+    that have matches, after which null complementing extension for all
+    unmatched records from the join buffer are generated.  
+      
+  RETURN
+    return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS.
+*/ 
+
+enum_nested_loop_state JOIN_CACHE::join_records(bool skip_last)
+{
+  JOIN_TAB *tab;
+  enum_nested_loop_state rc= NESTED_LOOP_OK;
+  bool outer_join_first_inner= join_tab->is_first_inner_for_outer_join();
+
+  if (outer_join_first_inner && !join_tab->first_unmatched)
+    join_tab->not_null_compl= TRUE;   
+
+  if (!join_tab->first_unmatched)
+  {
+    /* Find all records from join_tab that match records from join buffer */
+    rc= join_matching_records(skip_last);   
+    if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
+      goto finish;
+    if (outer_join_first_inner)
+    {
+      if (next_cache)
+      {
+        /* 
+          Ensure that all matches for outer records from join buffer are to be
+          found. Now we ensure that all full records are found for records from
+          join buffer. Generally this is an overkill.
+          TODO: Ensure that only matches of the inner table records have to be
+          found for the records from join buffer.
+	*/ 
+        rc= next_cache->join_records(skip_last);
+        if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
+          goto finish;
+      }
+      join_tab->not_null_compl= FALSE;
+      /* Prepare for generation of null complementing extensions */
+      for (tab= join_tab->first_inner; tab <= join_tab->last_inner; tab++)
+        tab->first_unmatched= join_tab->first_inner;
+    }
+  }
+  if (join_tab->first_unmatched)
+  {
+    if (is_key_access())
+      restore_last_record();
+
+    /* 
+      Generate all null complementing extensions for the records from
+      join buffer that don't have any matching rows from the inner tables.
+    */
+    reset(FALSE);
+    rc= join_null_complements(skip_last);   
+    if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
+      goto finish;
+  }
+  if(next_cache)
+  {
+    /* 
+      When using linked caches we must ensure the records in the next caches
+      that refer to the records in the join buffer are fully extended.
+      Otherwise we could have references to the records that have been
+      already erased from the join buffer and replaced for new records. 
+    */ 
+    rc= next_cache->join_records(skip_last);
+    if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
+      goto finish;
+  }
+  if (outer_join_first_inner)
+  {
+    /* 
+      All null complemented rows have been already generated for all
+      outer records from join buffer. Restore the state of the
+      first_unmatched values to 0 to avoid another null complementing.
+    */
+    for (tab= join_tab->first_inner; tab <= join_tab->last_inner; tab++)
+      tab->first_unmatched= 0;
+  } 
+ 
+  if (skip_last)
+  {
+    DBUG_ASSERT(!is_key_access());
+    /*
+       Restore the last record from the join buffer to generate
+       all extentions for it.
+    */
+    get_record();		               
+  }
+
+finish:
+  restore_last_record();
+  reset(TRUE);
+  return rc;
+}
+
+
+/*
+  Using BNL find matches from the next table for records from the join buffer   
+
+  SYNOPSIS
+    join_matching_records()
+      skip_last    do not look for matches for the last partial join record 
+
+  DESCRIPTION
+    The function retrieves all rows of the join_tab table and check whether
+    they match partial join records from the join buffer. If a match is found
+    the function will call the sub_select function trying to look for matches
+    for the remaining join operations.
+    This function currently is called only from the function join_records.    
+    If the value of skip_last is true the function writes the partial join
+    record from the record buffer into the join buffer to save its value for
+    the future processing in the caller function.
+
+  NOTES
+    The function produces all matching extensions for the records in the 
+    join buffer following the path of the Blocked Nested Loops algorithm. 
+    When an outer join operation is performed all unmatched records from
+    the join buffer must be extended by null values. The function 
+    'join_null_complements' serves this purpose.  
+      
+  RETURN
+    return one of enum_nested_loop_state.
+*/ 
+
+enum_nested_loop_state JOIN_CACHE_BNL::join_matching_records(bool skip_last)
+{
+  uint cnt;
+  int error;
+  JOIN_TAB *tab;
+  READ_RECORD *info;
+  enum_nested_loop_state rc= NESTED_LOOP_OK;
+  bool check_only_first_match= join_tab->check_only_first_match();
+  SQL_SELECT *select= join_tab->cache_select;
+
+  join_tab->table->null_row= 0;
+
+  /* Return at once if there are no records in the join buffer */
+  if (!records)     
+    return NESTED_LOOP_OK;   
+ 
+  /* 
+    When joining we read records from the join buffer back into record buffers.
+    If matches for the last partial join record are found through a call to
+    the sub_select function then this partial join record must be saved in the
+    join buffer in order to be restored just before the sub_select call.
+  */             
+  if (skip_last)     
+    put_record();     
+ 
+  if (join_tab->use_quick == 2 && join_tab->select->quick)
+  { 
+    /* A dynamic range access was used last. Clean up after it */
+    delete join_tab->select->quick;
+    join_tab->select->quick= 0;
+  }
+
+  for (tab= join->join_tab; tab != join_tab ; tab++)
+  {
+    tab->status= tab->table->status;
+    tab->table->status= 0;
+  }
+
+  /* Start retrieving all records of the joined table */
+  if ((error= join_init_read_record(join_tab))) 
+  {
+    rc= error < 0 ? NESTED_LOOP_NO_MORE_ROWS: NESTED_LOOP_ERROR;
+    goto finish;
+  }
+
+  info= &join_tab->read_record;
+  do
+  {
+    if (join_tab->keep_current_rowid)
+      join_tab->table->file->position(join_tab->table->record[0]);
+
+    if (join->thd->killed)
+    {
+      /* The user has aborted the execution of the query */
+      join->thd->send_kill_message();
+      rc= NESTED_LOOP_KILLED;
+      goto finish; 
+    }
+    int err= 0;
+
+     if (rc == NESTED_LOOP_OK)
+       update_virtual_fields(join->thd, join_tab->table);
+ 
+    /* 
+      Do not look for matches if the last read record of the joined table
+      does not meet the conditions that have been pushed to this table
+    */
+    if (rc == NESTED_LOOP_OK && 
+        (!select || (err= select->skip_record(join->thd)) != 0))
+    {
+      if (err < 0)
+        return NESTED_LOOP_ERROR;
+      rc= NESTED_LOOP_OK;
+
+      /* Prepare to read records from the join buffer */
+      reset(FALSE);
+
+      /* Read each record from the join buffer and look for matches */
+      for (cnt= records - test(skip_last) ; cnt; cnt--)
+      { 
+        /* 
+          If only the first match is needed and it has been already found for
+          the next record read from the join buffer then the record is skipped.
+	*/
+        if (!check_only_first_match || !skip_record_if_match())
+        {
+	  get_record();
+          rc= generate_full_extensions(get_curr_rec());
+          if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
+	    goto finish;   
+        }
+      }
+    }
+  } while (!(error= info->read_record(info)));
+
+  if (error > 0)				// Fatal error
+    rc= NESTED_LOOP_ERROR; 
+finish:                  
+  for (tab= join->join_tab; tab != join_tab ; tab++)
+    tab->table->status= tab->status;
+  return rc;
+}
+
+     
+/*
+  Set match flag for a record in join buffer if it has not been set yet    
+
+  SYNOPSIS
+    set_match_flag_if_none()
+      first_inner     the join table to which this flag is attached to
+      rec_ptr         pointer to the record in the join buffer 
+
+  DESCRIPTION
+    If the records of the table are accumulated in a join buffer the function
+    sets the match flag for the record in the buffer that is referred to by
+    the record from this cache positioned at 'rec_ptr'. 
+    The function also sets the match flag 'found' of the table first inner
+    if it has not been set before. 
+
+  NOTES
+    The function assumes that the match flag for any record in any cache
+    is placed in the first byte occupied by the record fields. 
+
+  RETURN
+    TRUE   the match flag is set by this call for the first time
+    FALSE  the match flag has been set before this call
+*/ 
+
+bool JOIN_CACHE::set_match_flag_if_none(JOIN_TAB *first_inner,
+                                        uchar *rec_ptr)
+{
+  if (!first_inner->cache)
+  {
+    /* 
+      Records of the first inner table to which the flag is attached to
+      are not accumulated in a join buffer.
+    */
+    if (first_inner->found)
+      return FALSE;
+    else
+    {
+      first_inner->found= 1;
+      return TRUE;
+    }
+  }
+  JOIN_CACHE *cache= this;
+  while (cache->join_tab != first_inner)
+  {
+    cache= cache->prev_cache;
+    DBUG_ASSERT(cache);
+    rec_ptr= cache->get_rec_ref(rec_ptr);
+  } 
+  if (rec_ptr[0] == 0)
+  {
+    rec_ptr[0]= 1;
+    first_inner->found= 1;
+    return TRUE;  
+  }
+  return FALSE;
+}
+
+
+/*
+  Generate all full extensions for a partial join record in the buffer    
+
+  SYNOPSIS
+    generate_full_extensions()
+      rec_ptr     pointer to the record from join buffer to generate extensions 
+
+  DESCRIPTION
+    The function first checks whether the current record of 'join_tab' matches
+    the partial join record from join buffer located at 'rec_ptr'. If it is the
+    case the function calls the join_tab->next_select method to generate
+    all full extension for this partial join match.
+      
+  RETURN
+    return one of enum_nested_loop_state.
+*/ 
+
+enum_nested_loop_state JOIN_CACHE::generate_full_extensions(uchar *rec_ptr)
+{
+  enum_nested_loop_state rc= NESTED_LOOP_OK;
+  
+  /*
+    Check whether the extended partial join record meets
+    the pushdown conditions. 
+  */
+  if (check_match(rec_ptr))
+  {    
+    int res= 0;
+
+    if (!join_tab->check_weed_out_table || 
+        !(res= do_sj_dups_weedout(join->thd, join_tab->check_weed_out_table)))
+    {
+      set_curr_rec_link(rec_ptr);
+      rc= (join_tab->next_select)(join, join_tab+1, 0);
+      if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
+      {
+        reset(TRUE);
+        return rc;
+      }
+    }
+    if (res == -1)
+    {
+      rc= NESTED_LOOP_ERROR;
+      return rc;
+    }
+  }
+  return rc;
+}
+
+
+/*
+  Check matching to a partial join record from the join buffer    
+
+  SYNOPSIS
+    check_match()
+      rec_ptr     pointer to the record from join buffer to check matching to 
+
+  DESCRIPTION
+    The function checks whether the current record of 'join_tab' matches
+    the partial join record from join buffer located at 'rec_ptr'. If this is
+    the case and 'join_tab' is the last inner table of a semi-join or an outer
+    join the function turns on the match flag for the 'rec_ptr' record unless
+    it has been already set.
+
+  NOTES
+    Setting the match flag on can trigger re-evaluation of pushdown conditions
+    for the record when join_tab is the last inner table of an outer join.
+      
+  RETURN
+    TRUE   there is a match
+    FALSE  there is no match
+*/ 
+
+inline bool JOIN_CACHE::check_match(uchar *rec_ptr)
+{
+  /* Check whether pushdown conditions are satisfied */
+  if (join_tab->select && join_tab->select->skip_record(join->thd) < 1)
+    return FALSE;
+
+  if (!join_tab->is_last_inner_table())
+    return TRUE;
+
+  /* 
+     This is the last inner table of an outer join,
+     and maybe of other embedding outer joins, or
+     this is the last inner table of a semi-join.
+  */
+  JOIN_TAB *first_inner= join_tab->get_first_inner_table();
+  do
+  {
+    set_match_flag_if_none(first_inner, rec_ptr);
+    if (first_inner->check_only_first_match() &&
+        !join_tab->first_inner)
+      return TRUE;
+    /* 
+      This is the first match for the outer table row.
+      The function set_match_flag_if_none has turned the flag
+      first_inner->found on. The pushdown predicates for
+      inner tables must be re-evaluated with this flag on.
+      Note that, if first_inner is the first inner table 
+      of a semi-join, but is not an inner table of an outer join
+      such that 'not exists' optimization can  be applied to it, 
+      the re-evaluation of the pushdown predicates is not needed.
+    */      
+    for (JOIN_TAB *tab= first_inner; tab <= join_tab; tab++)
+    {
+      if (tab->select && tab->select->skip_record(join->thd) < 1)
+        return FALSE;
+    }
+  }
+  while ((first_inner= first_inner->first_upper) &&
+         first_inner->last_inner == join_tab);
+  
+  return TRUE;
+} 
+
+
+/*
+  Add null complements for unmatched outer records from join buffer    
+
+  SYNOPSIS
+    join_null_complements()
+      skip_last    do not add null complements for the last record 
+
+  DESCRIPTION
+    This function is called only for inner tables of outer joins.
+    The function retrieves all rows from the join buffer and adds null
+    complements for those of them that do not have matches for outer
+    table records.
+    If the 'join_tab' is the last inner table of the embedding outer 
+    join and the null complemented record satisfies the outer join
+    condition then the the corresponding match flag is turned on
+    unless it has been set earlier. This setting may trigger
+    re-evaluation of pushdown conditions for the record. 
+
+  NOTES
+    The same implementation of the virtual method join_null_complements
+    is used for JOIN_CACHE_BNL and JOIN_CACHE_BKA.
+      
+  RETURN
+    return one of enum_nested_loop_state.
+*/ 
+
+enum_nested_loop_state JOIN_CACHE::join_null_complements(bool skip_last)
+{
+  uint cnt; 
+  enum_nested_loop_state rc= NESTED_LOOP_OK;
+  bool is_first_inner= join_tab == join_tab->first_unmatched;
+  bool is_last_inner= join_tab == join_tab->first_unmatched->last_inner;
+ 
+  /* Return at once if there are no records in the join buffer */
+  if (!records)
+    return NESTED_LOOP_OK;
+  
+  cnt= records - (is_key_access() ? 0 : test(skip_last));
+
+  /* This function may be called only for inner tables of outer joins */ 
+  DBUG_ASSERT(join_tab->first_inner);
+
+  for ( ; cnt; cnt--)
+  {
+    if (join->thd->killed)
+    {
+      /* The user has aborted the execution of the query */
+      join->thd->send_kill_message();
+      rc= NESTED_LOOP_KILLED;
+      goto finish;
+    }
+    /* Just skip the whole record if a match for it has been already found */
+    if (!is_first_inner || !skip_record_if_match())
+    {
+      get_record();
+      /* The outer row is complemented by nulls for each inner table */
+      restore_record(join_tab->table, s->default_values);
+      mark_as_null_row(join_tab->table);  
+      /* Check all pushdown conditions attached to the inner table */
+      join_tab->first_unmatched->found= 1;
+      if (join_tab->select && join_tab->select->skip_record(join->thd) < 1)
+        continue;
+      if (is_last_inner)
+      { 
+        JOIN_TAB *first_upper= join_tab->first_unmatched->first_upper;
+        while (first_upper && first_upper->last_inner == join_tab)
+        {
+          set_match_flag_if_none(first_upper, get_curr_rec());
+          for (JOIN_TAB* tab= first_upper; tab <= join_tab; tab++)
+          {
+            if (tab->select && tab->select->skip_record(join->thd) < 1)
+              goto next;
+          }
+          first_upper= first_upper->first_upper;
+        }
+      }
+      /* Find all matches for the remaining join tables */
+      rc= (*join_tab->next_select)(join, join_tab+1, 0);
+      if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
+      {
+        reset(TRUE);
+        goto finish;
+      }
+    }
+  next:
+    ;
+  }
+
+finish:
+  return rc;
+}
+
+
+/*
+  Initialize retrieval of range sequence for BKA algorithm
+    
+  SYNOPSIS
+    bka_range_seq_init()
+     init_params   pointer to the BKA join cache object
+     n_ranges      the number of ranges obtained 
+     flags         combination of HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
+
+  DESCRIPTION
+    The function interprets init_param as a pointer to a JOIN_CACHE_BKA
+    object. The function prepares for an iteration over the join keys
+    built for all records from the cache join buffer.
+
+  NOTE
+    This function are used only as a callback function.    
+
+  RETURN
+    init_param value that is to be used as a parameter of bka_range_seq_next()
+*/    
+
+static 
+range_seq_t bka_range_seq_init(void *init_param, uint n_ranges, uint flags)
+{
+  DBUG_ENTER("bka_range_seq_init");
+  JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) init_param;
+  cache->reset(0);
+  DBUG_RETURN((range_seq_t) init_param);
+}
+
+
+/*
+  Get the key over the next record from the join buffer used by BKA  
+    
+  SYNOPSIS
+    bka_range_seq_next()
+      seq    the value returned by  bka_range_seq_init
+      range  OUT reference to the next range
+  
+  DESCRIPTION
+    The function interprets seq as a pointer to a JOIN_CACHE_BKA
+    object. The function returns a pointer to the range descriptor
+    for the key built over the next record from the join buffer.
+
+  NOTE
+    This function are used only as a callback function.
+   
+  RETURN
+    0   ok, the range structure filled with info about the next key
+    1   no more ranges
+*/    
+
+static 
+uint bka_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
+{
+  DBUG_ENTER("bka_range_seq_next");
+  JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
+  TABLE_REF *ref= &cache->join_tab->ref;
+  key_range *start_key= &range->start_key;
+  if ((start_key->length= cache->get_next_key((uchar **) &start_key->key)))
+  {
+    start_key->keypart_map= (1 << ref->key_parts) - 1;
+    start_key->flag= HA_READ_KEY_EXACT;
+    range->end_key= *start_key;
+    range->end_key.flag= HA_READ_AFTER_KEY;
+    range->ptr= (char *) cache->get_curr_rec();
+    range->range_flag= EQ_RANGE;
+    DBUG_RETURN(0);
+  } 
+  DBUG_RETURN(1);
+}
+
+
+/*
+  Check whether range_info orders to skip the next record from BKA buffer
+
+  SYNOPSIS
+    bka_range_seq_skip_record()
+      seq              value returned by bka_range_seq_init()
+      range_info       information about the next range
+      rowid [NOT USED] rowid of the record to be checked 
+
+    
+  DESCRIPTION
+    The function interprets seq as a pointer to a JOIN_CACHE_BKA object.
+    The function interprets seq as a pointer to the JOIN_CACHE_BKA_UNIQUE
+    object. The function returns TRUE if the record with this range_info
+    is to be filtered out from the stream of records returned by
+    multi_range_read_next(). 
+
+  NOTE
+    This function are used only as a callback function.
+
+  RETURN
+    1    record with this range_info is to be filtered out from the stream
+         of records returned by multi_range_read_next()
+    0    the record is to be left in the stream
+*/ 
+
+static 
+bool bka_range_seq_skip_record(range_seq_t rseq, char *range_info, uchar *rowid)
+{
+  DBUG_ENTER("bka_range_seq_skip_record");
+  JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
+  bool res= cache->get_match_flag_by_pos((uchar *) range_info);
+  DBUG_RETURN(res);
+}
+
+/*
+  Using BKA find matches from the next table for records from the join buffer   
+
+  SYNOPSIS
+    join_matching_records()
+      skip_last    do not look for matches for the last partial join record 
+
+  DESCRIPTION
+    This function can be used only when the table join_tab can be accessed
+    by keys built over the fields of previous join tables.
+    The function retrieves all partial join records from the join buffer and
+    for each of them builds the key value to access join_tab, performs index
+    look-up with this key and selects matching records yielded by this look-up
+    If a match is found the function will call the sub_select function trying
+    to look for matches for the remaining join operations.
+    This function currently is called only from the function join_records.    
+    It's assumed that this function is always called with the skip_last 
+    parameter equal to false.
+
+  NOTES
+    The function produces all matching extensions for the records in the 
+    join buffer following the path of the Batched Key Access algorithm. 
+    When an outer join operation is performed all unmatched records from
+    the join buffer must be extended by null values. The function 
+    join_null_complements serves this purpose.
+    The Batched Key Access algorithm assumes that key accesses are batched.
+    In other words it assumes that, first, either keys themselves or the
+    corresponding rowids (primary keys) are accumulated in a buffer, then
+    data rows from  join_tab are fetched for all of them. When a row is
+    fetched it is always returned with a reference to the key by which it
+    has been accessed.
+    When key values are batched we can save on the number of the server 
+    requests for index lookups. For the remote engines, like NDB cluster, it
+    essentially reduces the number of round trips between the server and
+    the engine when performing a join operation. 
+    When the rowids for the keys are batched we can optimize the order
+    in what we fetch the data for this rowids. The performance benefits of
+    this optimization can be significant for such engines as MyISAM, InnoDB.
+    What is exactly batched are hidden behind implementations of
+    MRR handler interface that is supposed to be appropriately chosen
+    for each engine. If for a engine no specific implementation of the MRR
+    interface is supllied then the default implementation is used. This
+    implementation actually follows the path of Nested Loops Join algorithm.
+    In this case BKA join surely will demonstrate a worse performance than
+    NL join. 
+            
+  RETURN
+    return one of enum_nested_loop_state
+*/
+
+enum_nested_loop_state JOIN_CACHE_BKA::join_matching_records(bool skip_last)
+{
+  int error;
+  handler *file= join_tab->table->file;
+  enum_nested_loop_state rc= NESTED_LOOP_OK;
+  uchar *rec_ptr= 0;
+  bool check_only_first_match= join_tab->check_only_first_match();
+
+  /* Set functions to iterate over keys in the join buffer */
+
+  RANGE_SEQ_IF seq_funcs= { bka_range_seq_init, 
+                            bka_range_seq_next,
+                            check_only_first_match ?
+                              bka_range_seq_skip_record : 0,
+                            join_tab->cache_idx_cond ?
+                              bka_skip_index_tuple : 0 };
+
+  /* The value of skip_last must be always FALSE when this function is called */
+  DBUG_ASSERT(!skip_last);
+
+  /* Return at once if there are no records in the join buffer */
+  if (!records)
+    return NESTED_LOOP_OK;  
+                   
+  rc= init_join_matching_records(&seq_funcs, records);
+  if (rc != NESTED_LOOP_OK)
+    goto finish;
+
+  while (!(error= file->multi_range_read_next((char **) &rec_ptr)))
+  {
+    if (join->thd->killed)
+    {
+      /* The user has aborted the execution of the query */
+      join->thd->send_kill_message();
+      rc= NESTED_LOOP_KILLED; 
+      goto finish;
+    }
+    if (join_tab->keep_current_rowid)
+      join_tab->table->file->position(join_tab->table->record[0]);
+    /* 
+      If only the first match is needed and it has been already found 
+      for the associated partial join record then the returned candidate
+      is discarded.
+    */
+    if (rc == NESTED_LOOP_OK &&
+        (!check_only_first_match || !get_match_flag_by_pos(rec_ptr)))
+    {
+      get_record_by_pos(rec_ptr);
+      update_virtual_fields(join->thd, join_tab->table);
+      rc= generate_full_extensions(rec_ptr);
+      if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
+	goto finish;   
+    }
+  }
+
+  if (error > 0 && error != HA_ERR_END_OF_FILE)	   
+    return NESTED_LOOP_ERROR; 
+finish:                  
+  return end_join_matching_records(rc);
+}
+
+
+
+/* 
+  Prepare to search for records that match records from the join buffer
+
+  SYNOPSIS
+    init_join_matching_records()
+      seq_funcs    structure of range sequence interface
+      ranges       number of keys/ranges in the sequence
+
+  DESCRIPTION
+    This function calls the multi_range_read_init function to set up
+    the BKA process of generating the keys from the records in the join
+    buffer and looking for matching records from the table to be joined.
+    The function passes as a parameter a structure of functions that
+    implement the range sequence interface. This interface is used to
+    enumerate all generated keys and optionally to filter the matching
+    records returned by the multi_range_read_next calls from the
+    intended invocation of the join_matching_records method. The
+    multi_range_read_init function also receives the parameters for
+    MRR buffer to be used and flags specifying the mode in which
+    this buffer will be functioning.
+    The number of keys in the sequence expected by multi_range_read_init
+    is passed through the parameter ranges.  
+    
+  RETURN
+    return one of enum_nested_loop_state
+*/
+
+enum_nested_loop_state 
+JOIN_CACHE_BKA::init_join_matching_records(RANGE_SEQ_IF *seq_funcs, uint ranges)
+{
+  int error;
+  handler *file= join_tab->table->file;
+  enum_nested_loop_state rc= NESTED_LOOP_OK;
+
+  join_tab->table->null_row= 0;
+
+
+  /* Dynamic range access is never used with BKA */
+  DBUG_ASSERT(join_tab->use_quick != 2);
+
+  for (JOIN_TAB *tab =join->join_tab; tab != join_tab ; tab++)
+  {
+    tab->status= tab->table->status;
+    tab->table->status= 0;
+  }
+
+  init_mrr_buff();
+
+  /* 
+    Prepare to iterate over keys from the join buffer and to get
+    matching candidates obtained with MMR handler functions.
+  */ 
+  if (!file->inited)
+    file->ha_index_init(join_tab->ref.key, 1);
+  if ((error= file->multi_range_read_init(seq_funcs, (void*) this, ranges,
+					  mrr_mode, &mrr_buff)))
+    rc= error < 0 ? NESTED_LOOP_NO_MORE_ROWS: NESTED_LOOP_ERROR;
+  
+  return rc;
+}
+
+
+/* 
+  Finish searching for records that match records from the join buffer
+
+  SYNOPSIS
+    end_join_matching_records()
+      rc      return code passed by the join_matching_records function
+
+  DESCRIPTION
+    This function perform final actions on searching for all matches for
+    the records from the join buffer and building all full join extensions
+    of the records with these matches. 
+    
+  RETURN
+    return code rc passed to the function as a parameter
+*/
+
+enum_nested_loop_state 
+JOIN_CACHE_BKA::end_join_matching_records(enum_nested_loop_state rc)
+{
+  for (JOIN_TAB *tab=join->join_tab; tab != join_tab ; tab++)
+    tab->table->status= tab->status;
+  return rc;  
+}
+
+
+/* 
+  Get the key built over the next record from BKA join buffer
+
+  SYNOPSIS
+    get_next_key()
+      key    pointer to the buffer where the key value is to be placed
+
+  DESCRIPTION
+    The function reads key fields from the current record in the join buffer.
+    and builds the key value out of these fields that will be used to access
+    the 'join_tab' table. Some of key fields may belong to previous caches.
+    They are accessed via record references to the record parts stored in the
+    previous join buffers. The other key fields always are placed right after
+    the flag fields of the record.
+    If the key is embedded, which means that its value can be read directly
+    from the join buffer, then *key is set to the beginning of the key in
+    this buffer. Otherwise the key is built in the join_tab->ref->key_buff.
+    The function returns the length of the key if it succeeds ro read it.
+    If is assumed that the functions starts reading at the position of
+    the record length which is provided for each records in a BKA cache.
+    After the key is built the 'pos' value points to the first position after
+    the current record. 
+    The function returns 0 if the initial position is after the beginning
+    of the record fields for last record from the join buffer. 
+
+  RETURN
+    length of the key value - if the starting value of 'pos' points to
+    the position before the fields for the last record,
+    0 - otherwise.     
+*/
+
+uint JOIN_CACHE_BKA::get_next_key(uchar ** key)
+{
+  uint len;
+  uint32 rec_len;
+  uchar *init_pos;
+  JOIN_CACHE *cache;
+  
+  if (pos > last_rec_pos || !records)
+    return 0;
+
+  /* Any record in a BKA cache is prepended with its length */
+  DBUG_ASSERT(with_length);
+   
+  /* Read the length of the record */
+  rec_len= get_rec_length(pos);
+  pos+= size_of_rec_len; 
+  init_pos= pos;
+
+  /* Read a reference to the previous cache if any */
+  if (prev_cache)
+    pos+= prev_cache->get_size_of_rec_offset();
+
+  curr_rec_pos= pos;
+
+  /* Read all flag fields of the record */
+  read_flag_fields();
+ 
+  if (use_emb_key)
+  {
+    /* An embedded key is taken directly from the join buffer */
+    *key= pos;
+    len= emb_key_length;
+  }
+  else
+  {
+    /* Read key arguments from previous caches if there are any such fields */
+    if (external_key_arg_fields)
+    {
+      uchar *rec_ptr= curr_rec_pos;
+      uint key_arg_count= external_key_arg_fields;
+      CACHE_FIELD **copy_ptr= blob_ptr-key_arg_count;
+      for (cache= prev_cache; key_arg_count; cache= cache->prev_cache)
+      { 
+        uint len= 0;
+        DBUG_ASSERT(cache);
+        rec_ptr= cache->get_rec_ref(rec_ptr);
+        while (!cache->referenced_fields)
+        {
+          cache= cache->prev_cache;
+          DBUG_ASSERT(cache);
+          rec_ptr= cache->get_rec_ref(rec_ptr);
+        }
+        while (key_arg_count && 
+               cache->read_referenced_field(*copy_ptr, rec_ptr, &len))
+        {
+          copy_ptr++;
+          --key_arg_count;
+        }
+      }
+    }
+    
+    /* 
+      Read the other key arguments from the current record. The fields for
+      these arguments are always first in the sequence of the record's fields.
+    */     
+    CACHE_FIELD *copy= field_descr+flag_fields;
+    CACHE_FIELD *copy_end= copy+local_key_arg_fields;
+    bool blob_in_rec_buff= blob_data_is_in_rec_buff(curr_rec_pos);
+    for ( ; copy < copy_end; copy++)
+      read_record_field(copy, blob_in_rec_buff);
+    
+    /* Build the key over the fields read into the record buffers */ 
+    TABLE_REF *ref= &join_tab->ref;
+    cp_buffer_from_ref(join->thd, join_tab->table, ref);
+    *key= ref->key_buff;
+    len= ref->key_length;
+  }
+
+  pos= init_pos+rec_len;
+
+  return len;
+} 
+
+
+/* 
+  Initialize a BKA_UNIQUE cache       
+
+  SYNOPSIS
+    init()
+
+  DESCRIPTION
+    The function initializes the cache structure. It supposed to be called
+    right after a constructor for the JOIN_CACHE_BKA_UNIQUE.
+    The function allocates memory for the join buffer and for descriptors of
+    the record fields stored in the buffer.
+    The function also estimates the number of hash table entries in the hash
+    table to be used and initializes this hash table.
+
+  NOTES
+    The code of this function should have been included into the constructor
+    code itself. However the new operator for the class JOIN_CACHE_BKA_UNIQUE
+    would never fail while memory allocation for the join buffer is not 
+    absolutely unlikely to fail. That's why this memory allocation has to be
+    placed in a separate function that is called in a couple with a cache 
+    constructor.
+    It is quite natural to put almost all other constructor actions into
+    this function.     
+  
+  RETURN
+    0   initialization with buffer allocations has been succeeded
+    1   otherwise
+*/
+
+int JOIN_CACHE_BKA_UNIQUE::init()
+{
+  int rc= 0;
+  TABLE_REF *ref= &join_tab->ref;
+  
+  DBUG_ENTER("JOIN_CACHE_BKA_UNIQUE::init");
+
+  hash_table= 0;
+  key_entries= 0;
+
+  if ((rc= JOIN_CACHE_BKA::init()))
+    DBUG_RETURN (rc);
+
+  key_length= ref->key_length;
+
+  /* Take into account a reference to the next record in the key chain */
+  pack_length+= get_size_of_rec_offset(); 
+ 
+  /* Calculate the minimal possible value of size_of_key_ofs greater than 1 */
+  uint max_size_of_key_ofs= max(2, get_size_of_rec_offset());  
+  for (size_of_key_ofs= 2;
+       size_of_key_ofs <= max_size_of_key_ofs;
+       size_of_key_ofs+= 2)
+  {    
+    key_entry_length= get_size_of_rec_offset() + // key chain header
+                      size_of_key_ofs +          // reference to the next key 
+                      (use_emb_key ?  get_size_of_rec_offset() : key_length);
+
+    uint n= buff_size / (pack_length+key_entry_length+size_of_key_ofs);
+
+    /*
+      TODO: Make a better estimate for this upper bound of
+            the number of records in in the join buffer.
+    */
+    uint max_n= buff_size / (pack_length-length+
+                             key_entry_length+size_of_key_ofs);
+
+    hash_entries= (uint) (n / 0.7);
+    
+    if (offset_size(max_n*key_entry_length) <=
+        size_of_key_ofs)
+      break;
+  }
+   
+  /* Initialize the hash table */ 
+  hash_table= buff + (buff_size-hash_entries*size_of_key_ofs);
+  cleanup_hash_table();
+  curr_key_entry= hash_table;
+
+  pack_length+= key_entry_length;
+  pack_length_with_blob_ptrs+= get_size_of_rec_offset() + key_entry_length;
+
+  rec_fields_offset= get_size_of_rec_offset()+get_size_of_rec_length()+
+                     (prev_cache ? prev_cache->get_size_of_rec_offset() : 0);
+
+  data_fields_offset= 0;
+  if (use_emb_key)
+  {
+    CACHE_FIELD *copy= field_descr;
+    CACHE_FIELD *copy_end= copy+flag_fields;
+    for ( ; copy < copy_end; copy++)
+      data_fields_offset+= copy->length;
+  } 
+
+  DBUG_RETURN(rc);
+}
+
+
+/* 
+  Reset the JOIN_CACHE_BKA_UNIQUE  buffer for reading/writing
+
+  SYNOPSIS
+    reset()
+      for_writing  if it's TRUE the function reset the buffer for writing
+
+  DESCRIPTION
+    This implementation of the virtual function reset() resets the join buffer
+    of the JOIN_CACHE_BKA_UNIQUE class for reading or writing.
+    Additionally to what the default implementation does this function
+    cleans up the hash table allocated within the buffer.  
+    
+  RETURN
+    none
+*/
+ 
+void JOIN_CACHE_BKA_UNIQUE::reset(bool for_writing)
+{
+  this->JOIN_CACHE::reset(for_writing);
+  if (for_writing && hash_table)
+    cleanup_hash_table();
+  curr_key_entry= hash_table;
+}
+
+/* 
+  Add a record into the JOIN_CACHE_BKA_UNIQUE buffer
+
+  SYNOPSIS
+    put_record()
+
+  DESCRIPTION
+    This implementation of the virtual function put_record writes the next
+    matching record into the join buffer of the JOIN_CACHE_BKA_UNIQUE class.
+    Additionally to what the default implementation does this function
+    performs the following. 
+    It extracts from the record the key value used in lookups for matching
+    records and searches for this key in the hash tables from the join cache.
+    If it finds the key in the hash table it joins the record to the chain
+    of records with this key. If the key is not found in the hash table the
+    key is placed into it and a chain containing only the newly added record 
+    is attached to the key entry. The key value is either placed in the hash 
+    element added for the key or, if the use_emb_key flag is set, remains in
+    the record from the partial join.
+    
+  RETURN
+    TRUE    if it has been decided that it should be the last record
+            in the join buffer,
+    FALSE   otherwise
+*/
+
+bool JOIN_CACHE_BKA_UNIQUE::put_record()
+{
+  bool is_full;
+  uchar *key;
+  uint key_len= key_length;
+  uchar *key_ref_ptr;
+  uchar *link= 0;
+  TABLE_REF *ref= &join_tab->ref;
+  uchar *next_ref_ptr= pos;
+
+  pos+= get_size_of_rec_offset();
+  /* Write the record into the join buffer */  
+  if (prev_cache)
+    link= prev_cache->get_curr_rec_link();
+  write_record_data(link, &is_full);
+
+  if (use_emb_key)
+    key= get_curr_emb_key();
+  else
+  {
+    /* Build the key over the fields read into the record buffers */ 
+    cp_buffer_from_ref(join->thd, join_tab->table, ref);
+    key= ref->key_buff;
+  }
+
+  /* Look for the key in the hash table */
+  if (key_search(key, key_len, &key_ref_ptr))
+  {
+    uchar *last_next_ref_ptr;
+    /* 
+      The key is found in the hash table. 
+      Add the record to the circular list of the records attached to this key.
+      Below 'rec' is the record to be added into the record chain for the found
+      key, 'key_ref' points to a flatten representation of the st_key_entry 
+      structure that contains the key and the head of the record chain.
+    */
+    last_next_ref_ptr= get_next_rec_ref(key_ref_ptr+get_size_of_key_offset());
+    /* rec->next_rec= key_entry->last_rec->next_rec */
+    memcpy(next_ref_ptr, last_next_ref_ptr, get_size_of_rec_offset());
+    /* key_entry->last_rec->next_rec= rec */ 
+    store_next_rec_ref(last_next_ref_ptr, next_ref_ptr);
+    /* key_entry->last_rec= rec */
+    store_next_rec_ref(key_ref_ptr+get_size_of_key_offset(), next_ref_ptr);
+  }
+  else
+  {
+    /* 
+      The key is not found in the hash table.
+      Put the key into the join buffer linking it with the keys for the
+      corresponding hash entry. Create a circular list with one element
+      referencing the record and attach the list to the key in the buffer.
+    */
+    uchar *cp= last_key_entry;
+    cp-= get_size_of_rec_offset()+get_size_of_key_offset();
+    store_next_key_ref(key_ref_ptr, cp);
+    store_null_key_ref(cp);
+    store_next_rec_ref(next_ref_ptr, next_ref_ptr);
+    store_next_rec_ref(cp+get_size_of_key_offset(), next_ref_ptr);
+    if (use_emb_key)
+    {
+      cp-= get_size_of_rec_offset();
+      store_emb_key_ref(cp, key);
+    }
+    else
+    {
+      cp-= key_len;
+      memcpy(cp, key, key_len);
+    }
+    last_key_entry= cp;
+    /* Increment the counter of key_entries in the hash table */ 
+    key_entries++;
+  }  
+  return is_full;
+}
+
+
+/*
+  Read the next record from the JOIN_CACHE_BKA_UNIQUE buffer
+
+  SYNOPSIS
+    get_record()
+
+  DESCRIPTION
+    Additionally to what the default implementation of the virtual 
+    function get_record does this implementation skips the link element
+    used to connect the records with the same key into a chain. 
+
+  RETURN
+    TRUE  - there are no more records to read from the join buffer
+    FALSE - otherwise
+*/
+
+bool JOIN_CACHE_BKA_UNIQUE::get_record()
+{ 
+  pos+= get_size_of_rec_offset();
+  return this->JOIN_CACHE::get_record();
+}
+
+
+/* 
+  Skip record from the JOIN_CACHE_BKA_UNIQUE join buffer if its match flag is on
+
+  SYNOPSIS
+    skip_record_if_match()
+
+  DESCRIPTION
+    This implementation of the virtual function skip_record_if_match does
+    the same as the default implementation does, but it takes into account
+    the link element used to connect the records with the same key into a chain. 
+
+  RETURN
+    TRUE  - the match flag is on and the record has been skipped
+    FALSE - the match flag is off 
+*/
+
+bool JOIN_CACHE_BKA_UNIQUE::skip_record_if_match()
+{
+  uchar *save_pos= pos;
+  pos+= get_size_of_rec_offset();
+  if (!this->JOIN_CACHE::skip_record_if_match())
+  {
+    pos= save_pos;
+    return FALSE;
+  }
+  return TRUE;
+}
+
+
+/* 
+  Search for a key in the hash table of the join buffer
+
+  SYNOPSIS
+    key_search()
+      key             pointer to the key value
+      key_len         key value length
+      key_ref_ptr OUT position of the reference to the next key from 
+                      the hash element for the found key , or
+                      a position where the reference to the the hash 
+                      element for the key is to be added in the
+                      case when the key has not been found
+      
+  DESCRIPTION
+    The function looks for a key in the hash table of the join buffer.
+    If the key is found the functionreturns the position of the reference
+    to the next key from  to the hash element for the given key. 
+    Otherwise the function returns the position where the reference to the
+    newly created hash element for the given key is to be added.  
+
+  RETURN
+    TRUE  - the key is found in the hash table
+    FALSE - otherwise
+*/
+
+bool JOIN_CACHE_BKA_UNIQUE::key_search(uchar *key, uint key_len,
+                                       uchar **key_ref_ptr) 
+{
+  bool is_found= FALSE;
+  uint idx= get_hash_idx(key, key_length);
+  uchar *ref_ptr= hash_table+size_of_key_ofs*idx;
+  while (!is_null_key_ref(ref_ptr))
+  {
+    uchar *next_key;
+    ref_ptr= get_next_key_ref(ref_ptr);
+    next_key= use_emb_key ? get_emb_key(ref_ptr-get_size_of_rec_offset()) :
+                            ref_ptr-key_length;
+
+    if (memcmp(next_key, key, key_len) == 0)
+    {
+      is_found= TRUE;
+      break;
+    }
+  }
+  *key_ref_ptr= ref_ptr;
+  return is_found;
+} 
+
+
+/* 
+  Calclulate hash value for a key in the hash table of the join buffer
+
+  SYNOPSIS
+    get_hash_idx()
+      key             pointer to the key value
+      key_len         key value length
+      
+  DESCRIPTION
+    The function calculates an index of the hash entry in the hash table
+    of the join buffer for the given key  
+
+  RETURN
+    the calculated index of the hash entry for the given key.  
+*/
+
+uint JOIN_CACHE_BKA_UNIQUE::get_hash_idx(uchar* key, uint key_len)
+{
+  ulong nr= 1;
+  ulong nr2= 4;
+  uchar *pos= key;
+  uchar *end= key+key_len;
+  for (; pos < end ; pos++)
+  {
+    nr^= (ulong) ((((uint) nr & 63)+nr2)*((uint) *pos))+ (nr << 8);
+    nr2+= 3;
+  }
+  return nr % hash_entries;
+}
+
+
+/* 
+  Clean up the hash table of the join buffer
+
+  SYNOPSIS
+    cleanup_hash_table()
+      key             pointer to the key value
+      key_len         key value length
+      
+  DESCRIPTION
+    The function cleans up the hash table in the join buffer removing all
+    hash elements from the table. 
+
+  RETURN
+    none  
+*/
+
+void JOIN_CACHE_BKA_UNIQUE:: cleanup_hash_table()
+{
+  last_key_entry= hash_table;
+  bzero(hash_table, (buff+buff_size)-hash_table);
+  key_entries= 0;
+}
+
+
+/*
+  Initialize retrieval of range sequence for BKA_UNIQUE algorithm
+    
+  SYNOPSIS
+    bka_range_seq_init()
+      init_params   pointer to the BKA_INIQUE join cache object
+      n_ranges      the number of ranges obtained 
+      flags         combination of HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
+
+  DESCRIPTION
+    The function interprets init_param as a pointer to a JOIN_CACHE_BKA_UNIQUE
+    object. The function prepares for an iteration over the unique join keys
+    built over the records from the cache join buffer.
+
+  NOTE
+    This function are used only as a callback function.    
+
+  RETURN
+    init_param    value that is to be used as a parameter of 
+                  bka_unique_range_seq_next()
+*/    
+
+static 
+range_seq_t bka_unique_range_seq_init(void *init_param, uint n_ranges,
+                                      uint flags)
+{
+  DBUG_ENTER("bka_unique_range_seq_init");
+  JOIN_CACHE_BKA_UNIQUE *cache= (JOIN_CACHE_BKA_UNIQUE *) init_param;
+  cache->reset(0);
+  DBUG_RETURN((range_seq_t) init_param);
+}
+
+
+/*
+  Get the key over the next record from the join buffer used by BKA_UNIQUE  
+    
+  SYNOPSIS
+    bka_unique_range_seq_next()
+      seq        value returned by  bka_unique_range_seq_init()
+      range  OUT reference to the next range
+  
+  DESCRIPTION
+    The function interprets seq as a pointer to the JOIN_CACHE_BKA_UNIQUE 
+    object. The function returns a pointer to the range descriptor
+    for the next unique key built over records from the join buffer.
+
+  NOTE
+    This function are used only as a callback function.
+   
+  RETURN
+    0    ok, the range structure filled with info about the next key
+    1    no more ranges
+*/    
+
+static 
+uint bka_unique_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
+{
+  DBUG_ENTER("bka_unique_range_seq_next");
+  JOIN_CACHE_BKA_UNIQUE *cache= (JOIN_CACHE_BKA_UNIQUE *) rseq;
+  TABLE_REF *ref= &cache->join_tab->ref;
+  key_range *start_key= &range->start_key;
+  if ((start_key->length= cache->get_next_key((uchar **) &start_key->key)))
+  {
+    start_key->keypart_map= (1 << ref->key_parts) - 1;
+    start_key->flag= HA_READ_KEY_EXACT;
+    range->end_key= *start_key;
+    range->end_key.flag= HA_READ_AFTER_KEY;
+    range->ptr= (char *) cache->get_curr_key_chain();
+    range->range_flag= EQ_RANGE;
+    DBUG_RETURN(0);
+  } 
+  DBUG_RETURN(1);
+}
+
+
+/*
+  Check whether range_info orders to skip the next record from BKA_UNIQUE buffer
+
+  SYNOPSIS
+    bka_unique_range_seq_skip_record()
+      seq              value returned by bka_unique_range_seq_init()
+      range_info       information about the next range
+      rowid [NOT USED] rowid of the record to be checked (not used)
+    
+  DESCRIPTION
+    The function interprets seq as a pointer to the JOIN_CACHE_BKA_UNIQUE
+    object. The function returns TRUE if the record with this range_info
+    is to be filtered out from the stream of records returned by
+    multi_range_read_next(). 
+
+  NOTE
+    This function are used only as a callback function.
+
+  RETURN
+    1    record with this range_info is to be filtered out from the stream
+         of records returned by multi_range_read_next()
+    0    the record is to be left in the stream
+*/ 
+
+static 
+bool bka_unique_range_seq_skip_record(range_seq_t rseq, char *range_info,
+                                      uchar *rowid)
+{
+  DBUG_ENTER("bka_unique_range_seq_skip_record");
+  JOIN_CACHE_BKA_UNIQUE *cache= (JOIN_CACHE_BKA_UNIQUE *) rseq;
+  bool res= cache->check_all_match_flags_for_key((uchar *) range_info);
+  DBUG_RETURN(res);
+}
+
+ 
+/*
+  Check if the record combination matches the index condition
+
+  SYNOPSIS
+    JOIN_CACHE_BKA_UNIQUE::skip_index_tuple()
+      rseq             Value returned by bka_range_seq_init()
+      range_info       MRR range association data
+    
+  DESCRIPTION
+    See JOIN_CACHE_BKA::skip_index_tuple().
+    This function is the variant for use with
+    JOIN_CACHE_BKA_UNIQUE. The difference from JOIN_CACHE_BKA case is that
+    there may be multiple previous table record combinations that share the
+    same key, i.e. they map to the same MRR range.
+    As a consequence, we need to loop through all previous table record
+    combinations that match the given MRR range key range_info until we find
+    one that satisfies the index condition.
+
+  NOTE
+    Possible optimization:
+    Before we unpack the record from a previous table
+    check if this table is used in the condition.
+    If so then unpack the record otherwise skip the unpacking.
+    This should be done by a special virtual method
+    get_partial_record_by_pos().
+
+  RETURN
+    0    The record combination satisfies the index condition
+    1    Otherwise
+
+
+*/
+
+bool JOIN_CACHE_BKA_UNIQUE::skip_index_tuple(range_seq_t rseq, char *range_info)
+{
+  DBUG_ENTER("JOIN_CACHE_BKA_UNIQUE::skip_index_tuple");
+  JOIN_CACHE_BKA_UNIQUE *cache= (JOIN_CACHE_BKA_UNIQUE *) rseq;
+  uchar *last_rec_ref_ptr=  cache->get_next_rec_ref((uchar*) range_info);
+  uchar *next_rec_ref_ptr= last_rec_ref_ptr;
+  do
+  {
+    next_rec_ref_ptr= cache->get_next_rec_ref(next_rec_ref_ptr);
+    uchar *rec_ptr= next_rec_ref_ptr + cache->rec_fields_offset;
+    cache->get_record_by_pos(rec_ptr);
+    if (join_tab->cache_idx_cond->val_int())
+      DBUG_RETURN(FALSE);
+  } while(next_rec_ref_ptr != last_rec_ref_ptr);
+  DBUG_RETURN(TRUE);
+}
+
+
+/*
+  Check if the record combination matches the index condition
+
+  SYNOPSIS
+    bka_unique_skip_index_tuple()
+      rseq             Value returned by bka_range_seq_init()
+      range_info       MRR range association data
+    
+  DESCRIPTION
+    This is wrapper for JOIN_CACHE_BKA_UNIQUE::skip_index_tuple method,
+    see comments there.
+
+  NOTE
+    This function is used as a RANGE_SEQ_IF::skip_index_tuple callback.
+ 
+  RETURN
+    0    The record combination satisfies the index condition
+    1    Otherwise
+*/
+
+static 
+bool bka_unique_skip_index_tuple(range_seq_t rseq, char *range_info)
+{
+  DBUG_ENTER("bka_unique_skip_index_tuple");
+  JOIN_CACHE_BKA_UNIQUE *cache= (JOIN_CACHE_BKA_UNIQUE *) rseq;
+  DBUG_RETURN(cache->skip_index_tuple(rseq, range_info));
+}
+
+
+/*
+  Using BKA_UNIQUE find matches from the next table for records from join buffer   
+
+  SYNOPSIS
+    join_matching_records()
+      skip_last    do not look for matches for the last partial join record 
+
+  DESCRIPTION
+    This function can be used only when the table join_tab can be accessed
+    by keys built over the fields of previous join tables.
+    The function retrieves all keys from the hash table of the join buffer
+    built for partial join records from the buffer. For each of these keys
+    the function performs an index lookup and tries to match records yielded
+    by this lookup with records from the join buffer attached to the key.
+    If a match is found the function will call the sub_select function trying
+    to look for matches for the remaining join operations.
+    This function does not assume that matching records are necessarily
+    returned with references to the keys by which they were found. If the call
+    of the function multi_range_read_init returns flags with
+    HA_MRR_NO_ASSOCIATION then a search for the key built from the returned
+    record is carried on. The search is performed by probing in in the hash
+    table of the join buffer.
+    This function currently is called only from the function join_records.    
+    It's assumed that this function is always called with the skip_last 
+    parameter equal to false.
+            
+  RETURN
+    return one of enum_nested_loop_state 
+*/
+
+enum_nested_loop_state 
+JOIN_CACHE_BKA_UNIQUE::join_matching_records(bool skip_last)
+{
+  int error;
+  uchar *key_chain_ptr;
+  handler *file= join_tab->table->file;
+  enum_nested_loop_state rc= NESTED_LOOP_OK;
+  bool check_only_first_match= join_tab->check_only_first_match();
+  bool no_association= test(mrr_mode &  HA_MRR_NO_ASSOCIATION);
+
+  /* Set functions to iterate over keys in the join buffer */
+  RANGE_SEQ_IF seq_funcs= { bka_unique_range_seq_init,
+                            bka_unique_range_seq_next,
+                            check_only_first_match && !no_association ?
+                              bka_unique_range_seq_skip_record : 0,
+                            join_tab->cache_idx_cond ?
+                              bka_unique_skip_index_tuple : 0  };
+
+  /* The value of skip_last must be always FALSE when this function is called */
+  DBUG_ASSERT(!skip_last);
+
+  /* Return at once if there are no records in the join buffer */
+  if (!records)
+    return NESTED_LOOP_OK;  
+                   
+  rc= init_join_matching_records(&seq_funcs, key_entries);
+  if (rc != NESTED_LOOP_OK)
+    goto finish;
+
+  while (!(error= file->multi_range_read_next((char **) &key_chain_ptr)))
+  {
+    if (no_association)
+    {
+      uchar *key_ref_ptr;
+      TABLE *table= join_tab->table;
+      TABLE_REF *ref= &join_tab->ref;
+      KEY *keyinfo= table->key_info+ref->key;
+      /* 
+        Build the key value out of  the record returned by the call of
+        multi_range_read_next in the record buffer
+      */ 
+      key_copy(ref->key_buff, table->record[0], keyinfo, ref->key_length);
+      /* Look for this key in the join buffer */
+      if (!key_search(ref->key_buff, ref->key_length, &key_ref_ptr))
+	continue;
+      key_chain_ptr= key_ref_ptr+get_size_of_key_offset();
+    } 
+
+    uchar *last_rec_ref_ptr= get_next_rec_ref(key_chain_ptr);
+    uchar *next_rec_ref_ptr= last_rec_ref_ptr;
+    do
+    {
+      next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr);
+      uchar *rec_ptr= next_rec_ref_ptr+rec_fields_offset;
+
+      if (join->thd->killed)
+      {
+        /* The user has aborted the execution of the query */
+        join->thd->send_kill_message();
+        rc= NESTED_LOOP_KILLED; 
+        goto finish;
+      }
+      /* 
+        If only the first match is needed and it has been already found
+        for the associated partial join record then the returned candidate
+        is discarded.
+      */
+      if (rc == NESTED_LOOP_OK &&
+          (!check_only_first_match || !get_match_flag_by_pos(rec_ptr)))
+      {
+        get_record_by_pos(rec_ptr);
+        update_virtual_fields(join->thd, join_tab->table);
+        rc= generate_full_extensions(rec_ptr);
+        if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
+	  goto finish;   
+      }
+    }
+    while (next_rec_ref_ptr != last_rec_ref_ptr); 
+  }
+
+  if (error > 0 && error != HA_ERR_END_OF_FILE)	   
+    return NESTED_LOOP_ERROR; 
+finish:                  
+  return end_join_matching_records(rc);
+}
+
+
+/*
+  Check whether all records in a key chain have their match flags set on   
+
+  SYNOPSIS
+    check_all_match_flags_for_key()
+      key_chain_ptr     
+
+  DESCRIPTION
+    This function retrieves records in the given circular chain and checks
+    whether their match flags are set on. The parameter key_chain_ptr shall
+    point to the position in the join buffer storing the reference to the
+    last element of this chain. 
+            
+  RETURN
+    TRUE   if each retrieved record has its match flag set on
+    FALSE  otherwise 
+*/
+
+bool JOIN_CACHE_BKA_UNIQUE::check_all_match_flags_for_key(uchar *key_chain_ptr)
+{
+  uchar *last_rec_ref_ptr= get_next_rec_ref(key_chain_ptr);
+  uchar *next_rec_ref_ptr= last_rec_ref_ptr;
+  do
+  {
+    next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr);
+    uchar *rec_ptr= next_rec_ref_ptr+rec_fields_offset;
+    if (!get_match_flag_by_pos(rec_ptr))
+      return FALSE;
+  }
+  while (next_rec_ref_ptr != last_rec_ref_ptr);
+  return TRUE;
+}
+  
+
+/* 
+  Get the next key built for the records from BKA_UNIQUE join buffer
+
+  SYNOPSIS
+    get_next_key()
+      key    pointer to the buffer where the key value is to be placed
+
+  DESCRIPTION
+    The function reads the next key value stored in the hash table of the
+    join buffer. Depending on the value of the use_emb_key flag of the
+    join cache the value is read either from the table itself or from
+    the record field where it occurs. 
+
+  RETURN
+    length of the key value - if the starting value of 'cur_key_entry' refers
+    to the position after that referred by the the value of 'last_key_entry'    
+    0 - otherwise.     
+*/
+
+uint JOIN_CACHE_BKA_UNIQUE::get_next_key(uchar ** key)
+{  
+  if (curr_key_entry == last_key_entry)
+    return 0;
+
+  curr_key_entry-= key_entry_length;
+
+  *key = use_emb_key ? get_emb_key(curr_key_entry) : curr_key_entry;
+
+  DBUG_ASSERT(*key >= buff && *key < hash_table);
+
+  return key_length;
+}
+
+
+/****************************************************************************
+ * Join cache module end
+ ****************************************************************************/
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
index ba508783133..67717ebab29 100644
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -408,6 +408,7 @@ void lex_start(THD *thd)
   lex->reset_query_tables_list(FALSE);
   lex->expr_allows_subselect= TRUE;
   lex->use_only_table_context= FALSE;
+  lex->parse_vcol_expr= FALSE;
 
   lex->name.str= 0;
   lex->name.length= 0;
@@ -908,7 +909,7 @@ int MYSQLlex(void *arg, void *yythd)
 
 int lex_one_token(void *arg, void *yythd)
 {
-  reg1	uchar c= 0;
+  reg1	uchar c;
   bool comment_closed;
   int	tokval, result_state;
   uint length;
@@ -917,10 +918,11 @@ int lex_one_token(void *arg, void *yythd)
   Lex_input_stream *lip= & thd->m_parser_state->m_lip;
   LEX *lex= thd->lex;
   YYSTYPE *yylval=(YYSTYPE*) arg;
-  CHARSET_INFO *cs= thd->charset();
-  uchar *state_map= cs->state_map;
-  uchar *ident_map= cs->ident_map;
+  CHARSET_INFO *const cs= thd->charset();
+  const uchar *const state_map= cs->state_map;
+  const uchar *const ident_map= cs->ident_map;
 
+  LINT_INIT(c);
   lip->yylval=yylval;			// The global state
 
   lip->start_token();
@@ -986,7 +988,8 @@ int lex_one_token(void *arg, void *yythd)
           its value in a query for the binlog, the query must stay
           grammatically correct.
         */
-        if (c == '?' && lip->stmt_prepare_mode && !ident_map[lip->yyPeek()])
+        if (c == '?' && lip->stmt_prepare_mode &&
+            !ident_map[(uchar) lip->yyPeek()])
         return(PARAM_MARKER);
       }
 
@@ -1054,7 +1057,10 @@ int lex_one_token(void *arg, void *yythd)
       else
 #endif
       {
-        for (result_state= c; ident_map[c= lip->yyGet()]; result_state|= c) ;
+        for (result_state= c;
+             ident_map[(uchar) (c= lip->yyGet())];
+             result_state|= c)
+          ;
         /* If there were non-ASCII characters, mark that we must convert */
         result_state= result_state & 0x80 ? IDENT_QUOTED : IDENT;
       }
@@ -1066,9 +1072,11 @@ int lex_one_token(void *arg, void *yythd)
           If we find a space then this can't be an identifier. We notice this
           below by checking start != lex->ptr.
         */
-        for (; state_map[c] == MY_LEX_SKIP ; c= lip->yyGet()) ;
+        for (; state_map[(uchar) c] == MY_LEX_SKIP ; c= lip->yyGet())
+          ;
       }
-      if (start == lip->get_ptr() && c == '.' && ident_map[lip->yyPeek()])
+      if (start == lip->get_ptr() && c == '.' &&
+          ident_map[(uchar) lip->yyPeek()])
 	lip->next_state=MY_LEX_IDENT_SEP;
       else
       {					// '(' must follow directly if function
@@ -1111,12 +1119,12 @@ int lex_one_token(void *arg, void *yythd)
 
       return(result_state);			// IDENT or IDENT_QUOTED
 
-    case MY_LEX_IDENT_SEP:		// Found ident and now '.'
+    case MY_LEX_IDENT_SEP:                  // Found ident and now '.'
       yylval->lex_str.str= (char*) lip->get_ptr();
       yylval->lex_str.length= 1;
-      c= lip->yyGet();                  // should be '.'
-      lip->next_state= MY_LEX_IDENT_START;// Next is an ident (not a keyword)
-      if (!ident_map[lip->yyPeek()])            // Probably ` or "
+      c= lip->yyGet();                          // should be '.'
+      lip->next_state= MY_LEX_IDENT_START;      // Next is ident (not keyword)
+      if (!ident_map[(uchar) lip->yyPeek()])    // Probably ` or "
 	lip->next_state= MY_LEX_START;
       return((int) c);
 
@@ -1139,7 +1147,8 @@ int lex_one_token(void *arg, void *yythd)
         }
         else if (c == 'b')
         {
-          while ((c= lip->yyGet()) == '0' || c == '1') ;
+          while ((c= lip->yyGet()) == '0' || c == '1')
+            ;
           if ((lip->yyLength() >= 3) && !ident_map[c])
           {
             /* Skip '0b' */
@@ -1198,11 +1207,12 @@ int lex_one_token(void *arg, void *yythd)
       else
 #endif
       {
-        for (result_state=0; ident_map[c= lip->yyGet()]; result_state|= c) ;
+        for (result_state=0; ident_map[c= lip->yyGet()]; result_state|= c)
+          ;
         /* If there were non-ASCII characters, mark that we must convert */
         result_state= result_state & 0x80 ? IDENT_QUOTED : IDENT;
       }
-      if (c == '.' && ident_map[lip->yyPeek()])
+      if (c == '.' && ident_map[(uchar) lip->yyPeek()])
 	lip->next_state=MY_LEX_IDENT_SEP;// Next is '.'
 
       yylval->lex_str= get_token(lip, 0, lip->yyLength());
@@ -1301,7 +1311,8 @@ int lex_one_token(void *arg, void *yythd)
 
     case MY_LEX_BIN_NUMBER:           // Found b'bin-string'
       lip->yySkip();                  // Accept opening '
-      while ((c= lip->yyGet()) == '0' || c == '1') ;
+      while ((c= lip->yyGet()) == '0' || c == '1')
+        ;
       if (c != '\'')
         return(ABORT_SYM);            // Illegal hex constant
       lip->yySkip();                  // Accept closing '
@@ -1312,8 +1323,8 @@ int lex_one_token(void *arg, void *yythd)
       return (BIN_NUM);
 
     case MY_LEX_CMP_OP:			// Incomplete comparison operator
-      if (state_map[lip->yyPeek()] == MY_LEX_CMP_OP ||
-          state_map[lip->yyPeek()] == MY_LEX_LONG_CMP_OP)
+      if (state_map[(uchar) lip->yyPeek()] == MY_LEX_CMP_OP ||
+          state_map[(uchar) lip->yyPeek()] == MY_LEX_LONG_CMP_OP)
         lip->yySkip();
       if ((tokval = find_keyword(lip, lip->yyLength() + 1, 0)))
       {
@@ -1324,11 +1335,11 @@ int lex_one_token(void *arg, void *yythd)
       break;
 
     case MY_LEX_LONG_CMP_OP:		// Incomplete comparison operator
-      if (state_map[lip->yyPeek()] == MY_LEX_CMP_OP ||
-          state_map[lip->yyPeek()] == MY_LEX_LONG_CMP_OP)
+      if (state_map[(uchar) lip->yyPeek()] == MY_LEX_CMP_OP ||
+          state_map[(uchar) lip->yyPeek()] == MY_LEX_LONG_CMP_OP)
       {
         lip->yySkip();
-        if (state_map[lip->yyPeek()] == MY_LEX_CMP_OP)
+        if (state_map[(uchar) lip->yyPeek()] == MY_LEX_CMP_OP)
           lip->yySkip();
       }
       if ((tokval = find_keyword(lip, lip->yyLength() + 1, 0)))
@@ -1547,7 +1558,7 @@ int lex_one_token(void *arg, void *yythd)
       }
       break;
     case MY_LEX_USER_END:		// end '@' of user@hostname
-      switch (state_map[lip->yyPeek()]) {
+      switch (state_map[(uchar) lip->yyPeek()]) {
       case MY_LEX_STRING:
       case MY_LEX_USER_VARIABLE_DELIMITER:
       case MY_LEX_STRING_OR_DELIMITER:
@@ -1572,7 +1583,7 @@ int lex_one_token(void *arg, void *yythd)
       yylval->lex_str.str=(char*) lip->get_ptr();
       yylval->lex_str.length=1;
       lip->yySkip();                                    // Skip '@'
-      lip->next_state= (state_map[lip->yyPeek()] ==
+      lip->next_state= (state_map[(uchar) lip->yyPeek()] ==
 			MY_LEX_USER_VARIABLE_DELIMITER ?
 			MY_LEX_OPERATOR_OR_IDENT :
 			MY_LEX_IDENT_OR_KEYWORD);
@@ -1584,7 +1595,8 @@ int lex_one_token(void *arg, void *yythd)
 	[(global | local | session) .]variable_name
       */
 
-      for (result_state= 0; ident_map[c= lip->yyGet()]; result_state|= c) ;
+      for (result_state= 0; ident_map[c= lip->yyGet()]; result_state|= c)
+        ;
       /* If there were non-ASCII characters, mark that we must convert */
       result_state= result_state & 0x80 ? IDENT_QUOTED : IDENT;
 
@@ -1714,6 +1726,7 @@ void st_select_lex_unit::init_query()
   item_list.empty();
   describe= 0;
   found_rows_for_union= 0;
+  insert_table_with_stored_vcol= 0;
 }
 
 void st_select_lex::init_query()
@@ -1728,7 +1741,6 @@ void st_select_lex::init_query()
   having= prep_having= where= prep_where= 0;
   olap= UNSPECIFIED_OLAP_TYPE;
   having_fix_field= 0;
-  group_fix_field= 0;
   context.select_lex= this;
   context.init();
   /*
@@ -1755,11 +1767,14 @@ void st_select_lex::init_query()
   exclude_from_table_unique_test= no_wrap_view_item= FALSE;
   nest_level= 0;
   link_next= 0;
+
+  bzero((char*) expr_cache_may_be_used, sizeof(expr_cache_may_be_used));
 }
 
 void st_select_lex::init_select()
 {
   st_select_lex_node::init_select();
+  sj_nests.empty();
   group_list.empty();
   type= db= 0;
   having= 0;
@@ -1951,6 +1966,55 @@ void st_select_lex_unit::exclude_tree()
 }
 
 
+/**
+  Register reference to an item which the subqueries depends on
+
+  @param def_sel         select against which the item is resolved
+  @param dependency      reference to the item
+
+  @details
+  This function puts the reference dependency to an item that is either an
+  outer field or an aggregate function resolved against an outer select into
+  the list 'depends_on'. It adds it to the 'depends_on' lists for each
+  subquery between this one and 'def_sel' - the subquery against which the
+  item is resolved.
+*/
+
+void st_select_lex::register_dependency_item(st_select_lex *def_sel,
+                                             Item **dependency)
+{
+  SELECT_LEX *s= this;
+  DBUG_ENTER("st_select_lex::register_dependency_item");
+  DBUG_ASSERT(this != def_sel);
+  DBUG_ASSERT(*dependency);
+  do
+  {
+    /* check duplicates */
+    List_iterator_fast<Item*> li(s->master_unit()->item->depends_on);
+    Item **dep;
+    while ((dep= li++))
+    {
+      if ((*dep)->eq(*dependency, FALSE))
+      {
+         DBUG_PRINT("info", ("dependency %s already present",
+                             ((*dependency)->name ?
+                              (*dependency)->name :
+                              "<no name>")));
+         DBUG_VOID_RETURN;
+      }
+    }
+
+    s->master_unit()->item->depends_on.push_back(dependency);
+    DBUG_PRINT("info", ("depends_on: Select: %d  added: %s",
+                        s->select_number,
+                        ((*dependency)->name ?
+                         (*dependency)->name :
+                         "<no name>")));
+  } while ((s= s->outer_select()) != def_sel);
+  DBUG_VOID_RETURN;
+}
+
+
 /*
   st_select_lex_node::mark_as_dependent mark all st_select_lex struct from 
   this to 'last' as dependent
@@ -1963,15 +2027,18 @@ void st_select_lex_unit::exclude_tree()
     'last' should be reachable from this st_select_lex_node
 */
 
-void st_select_lex::mark_as_dependent(st_select_lex *last)
+bool st_select_lex::mark_as_dependent(THD *thd, st_select_lex *last, Item *dependency)
 {
+
+  DBUG_ASSERT(this != last);
+
   /*
     Mark all selects from resolved to 1 before select where was
     found table as depended (of select where was found table)
   */
-  for (SELECT_LEX *s= this;
-       s && s != last;
-       s= s->outer_select())
+  SELECT_LEX *s= this;
+  do
+  {
     if (!(s->uncacheable & UNCACHEABLE_DEPENDENT))
     {
       // Select is dependent of outer select
@@ -1987,8 +2054,15 @@ void st_select_lex::mark_as_dependent(st_select_lex *last)
           sl->uncacheable|= UNCACHEABLE_UNITED;
       }
     }
+
+    Item_subselect *subquery_expr= s->master_unit()->item;
+    if (subquery_expr && subquery_expr->mark_as_dependent(thd, last, 
+                                                          dependency))
+      return TRUE;
+  } while ((s= s->outer_select()) != last && s != 0);
   is_correlated= TRUE;
   this->master_unit()->item->is_correlated= TRUE;
+  return FALSE;
 }
 
 bool st_select_lex_node::set_braces(bool value)      { return 1; }
@@ -2192,16 +2266,28 @@ void st_select_lex::print_limit(THD *thd,
 {
   SELECT_LEX_UNIT *unit= master_unit();
   Item_subselect *item= unit->item;
-  if (item && unit->global_parameters == this &&
-      (item->substype() == Item_subselect::EXISTS_SUBS ||
-       item->substype() == Item_subselect::IN_SUBS ||
-       item->substype() == Item_subselect::ALL_SUBS))
+
+  if (item && unit->global_parameters == this)
   {
-    DBUG_ASSERT(!item->fixed ||
-                (select_limit->val_int() == LL(1) && offset_limit == 0));
-    return;
+    Item_subselect::subs_type subs_type= item->substype();
+    if (subs_type == Item_subselect::EXISTS_SUBS ||
+        subs_type == Item_subselect::IN_SUBS ||
+        subs_type == Item_subselect::ALL_SUBS)
+    {
+      DBUG_ASSERT(!item->fixed ||
+                  /*
+                    If not using materialization both:
+                    select_limit == 1, and there should be no offset_limit.
+                  */
+                  (((subs_type == Item_subselect::IN_SUBS) &&
+                    ((Item_in_subselect*)item)->exec_method ==
+                    Item_in_subselect::MATERIALIZATION) ?
+                   TRUE :
+                   (select_limit->val_int() == 1LL) &&
+                   offset_limit == 0));
+      return;
+    }
   }
-
   if (explicit_limit)
   {
     str->append(STRING_WITH_LEN(" limit "));
@@ -2214,6 +2300,7 @@ void st_select_lex::print_limit(THD *thd,
   }
 }
 
+
 /**
   @brief Restore the LEX and THD in case of a parse error.
 
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index 1f4c9420102..f40797d6829 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -190,6 +190,9 @@ enum enum_sql_command {
   SQLCOM_SHOW_PROFILE, SQLCOM_SHOW_PROFILES,
   SQLCOM_SIGNAL, SQLCOM_RESIGNAL,
   SQLCOM_SHOW_RELAYLOG_EVENTS, 
+  SQLCOM_SHOW_USER_STATS, SQLCOM_SHOW_TABLE_STATS, SQLCOM_SHOW_INDEX_STATS,
+  SQLCOM_SHOW_CLIENT_STATS,
+
   /*
     When a command is added here, be sure it's also added in mysqld.cc
     in "struct show_var_st status_vars[]= {" ...
@@ -631,6 +634,13 @@ public:
   bool describe; /* union exec() called for EXPLAIN */
   Procedure *last_procedure;	 /* Pointer to procedure, if such exists */
 
+  /* 
+    Insert table with stored virtual columns.
+    This is used only in those rare cases 
+    when the list of inserted values is empty.
+  */
+  TABLE *insert_table_with_stored_vcol;
+
   void init_query();
   st_select_lex_unit* master_unit();
   st_select_lex* outer_select();
@@ -658,7 +668,8 @@ public:
   bool add_fake_select_lex(THD *thd);
   void init_prepare_fake_select_lex(THD *thd);
   inline bool is_prepared() { return prepared; }
-  bool change_result(select_subselect *result, select_subselect *old_result);
+  bool change_result(select_result_interceptor *result,
+                     select_result_interceptor *old_result);
   void set_limit(st_select_lex *values);
   void set_thd(THD *thd_arg) { thd= thd_arg; }
   inline bool is_union (); 
@@ -703,6 +714,7 @@ public:
   List<TABLE_LIST> top_join_list; /* join list of the top level          */
   List<TABLE_LIST> *join_list;    /* list for the currently parsed join  */
   TABLE_LIST *embedding;          /* table embedding to the above list   */
+  List<TABLE_LIST> sj_nests;      /* Semi-join nests within this join */
   /*
     Beginning of the list of leaves in a FROM clause, where the leaves
     inlcude all base tables including view tables. The tables are connected
@@ -743,8 +755,6 @@ public:
   bool  braces;   	/* SELECT ... UNION (SELECT ... ) <- this braces */
   /* TRUE when having fix field called in processing of this SELECT */
   bool having_fix_field;
-  /* TRUE when GROUP BY fix field called in processing of this SELECT */
-  bool group_fix_field;
   /* List of references to fields referenced from inner selects */
   List<Item_outer_ref> inner_refs_list;
   /* Number of Item_sum-derived objects in this SELECT */
@@ -755,6 +765,11 @@ public:
   /* explicit LIMIT clause was used */
   bool explicit_limit;
   /*
+    This array is used to note  whether we have any candidates for
+    expression caching in the corresponding clauses
+  */
+  bool expr_cache_may_be_used[PARSING_PLACE_SIZE];
+  /*
     there are subquery in HAVING clause => we can't close tables before
     query processing end even if we use temporary table
   */
@@ -832,8 +847,10 @@ public:
   {
     return master_unit()->return_after_parsing();
   }
+  inline bool is_subquery_function() { return master_unit()->item != 0; }
 
-  void mark_as_dependent(st_select_lex *last);
+  bool mark_as_dependent(THD *thd, st_select_lex *last, Item *dependency);
+  void register_dependency_item(st_select_lex *last, Item **dependency);
 
   bool set_braces(bool value);
   bool inc_in_sum_expr();
@@ -918,7 +935,7 @@ public:
   }
 
   void clear_index_hints(void) { index_hints= NULL; }
-
+  bool is_part_of_union() { return master_unit()->is_union(); }
 private:  
   /* current index hint kind. used in filling up index_hints */
   enum index_hint_type current_index_hint_type;
@@ -1060,6 +1077,7 @@ enum xa_option_words {XA_NONE, XA_JOIN, XA_RESUME, XA_ONE_PHASE,
                       XA_SUSPEND, XA_FOR_MIGRATE};
 
 extern const LEX_STRING null_lex_str;
+extern const LEX_STRING empty_lex_str;
 
 class Sroutine_hash_entry;
 
@@ -2157,6 +2175,7 @@ struct LEX: public Query_tables_list
   LEX_USER *grant_user;
   XID *xid;
   THD *thd;
+  Virtual_column_info *vcol_info;
 
   /* maintain a list of used plugins for this LEX */
   DYNAMIC_ARRAY plugins;
@@ -2242,6 +2261,14 @@ struct LEX: public Query_tables_list
     syntax error back.
   */
   bool expr_allows_subselect;
+  /*
+    A special command "PARSE_VCOL_EXPR" is defined for the parser 
+    to translate a defining expression of a virtual column into an 
+    Item object.
+    The following flag is used to prevent other applications to use 
+    this command.
+  */
+  bool parse_vcol_expr;
 
   enum SSL_type ssl_type;			/* defined in violite.h */
   enum enum_duplicates duplicates;
@@ -2366,6 +2393,11 @@ struct LEX: public Query_tables_list
   const char *stmt_definition_end;
 
   /**
+    Collects create options for Field and KEY
+  */
+  engine_option_value *option_list, *option_list_last;
+
+  /**
     During name resolution search only in the table list given by 
     Name_resolution_context::first_name_resolution_table and
     Name_resolution_context::last_name_resolution_table
diff --git a/sql/sql_list.h b/sql/sql_list.h
index 5cd24236644..78549b73cef 100644
--- a/sql/sql_list.h
+++ b/sql/sql_list.h
@@ -54,7 +54,7 @@ public:
   static void operator delete[](void *ptr, MEM_ROOT *mem_root)
   { /* never called */ }
   static void operator delete[](void *ptr, size_t size) { TRASH(ptr, size); }
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
   bool dummy;
   inline Sql_alloc() :dummy(0) {}
   inline ~Sql_alloc() {}
@@ -521,6 +521,43 @@ public:
 
 
 /*
+  Exchange sort algorithm for List<T>.
+*/
+template <class T> 
+inline void exchange_sort(List<T> *list_to_sort,
+                          int (*sort_func)(T *a, T *b, void *arg), void *arg)
+{
+  bool swap;
+  List_iterator<T> it(*list_to_sort);
+  do
+  {
+    T *item1= it++;
+    T **ref1= it.ref();
+    T *item2;
+
+    swap= FALSE;
+    while ((item2= it++))
+    {
+      T **ref2= it.ref();
+      if (sort_func(item1, item2, arg) < 0)
+      {
+        T *item= *ref1;
+        *ref1= *ref2;
+        *ref2= item;
+        swap= TRUE;
+      }
+      else
+      {
+        item1= item2;
+        ref1= ref2;
+      }
+    }
+    it.rewind();
+  } while (swap);
+}
+
+
+/*
   A simple intrusive list which automaticly removes element from list
   on delete (for THD element)
 */
diff --git a/sql/sql_load.cc b/sql/sql_load.cc
index 69059961633..c7c20938dc3 100644
--- a/sql/sql_load.cc
+++ b/sql/sql_load.cc
@@ -802,7 +802,7 @@ read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
     }
     it.rewind();
     uchar *pos=read_info.row_start;
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
     read_info.row_end[0]=0;
 #endif
 
@@ -944,11 +944,10 @@ read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
       real_item= item->real_item();
 
       if ((!read_info.enclosed &&
-	  (enclosed_length && length == 4 &&
-           !memcmp(pos, STRING_WITH_LEN("NULL")))) ||
+           (enclosed_length && length == 4 &&
+            !memcmp(pos, STRING_WITH_LEN("NULL")))) ||
 	  (length == 1 && read_info.found_null))
       {
-
         if (real_item->type() == Item::FIELD_ITEM)
         {
           Field *field= ((Item_field *)real_item)->field;
@@ -1295,7 +1294,7 @@ READ_INFO::READ_INFO(File file_par, uint tot_length, CHARSET_INFO *cs,
 		     String &field_term, String &line_start, String &line_term,
 		     String &enclosed_par, int escape, bool get_it_from_net,
 		     bool is_fifo)
-  :file(file_par),escape_char(escape)
+  :file(file_par),buffer(0),escape_char(escape)
 {
   read_charset= cs;
   field_term_ptr=(char*) field_term.ptr();
@@ -1495,8 +1494,9 @@ int READ_INFO::read_field()
 	// End of enclosed field if followed by field_term or line_term
 	if (chr == my_b_EOF ||
 	    (chr == line_term_char && terminator(line_term_ptr,
-						line_term_length)))
-	{					// Maybe unexpected linefeed
+                                                 line_term_length)))
+        {
+          /* Maybe unexpected linefeed */
 	  enclosed=1;
 	  found_end_of_line=1;
 	  row_start=buffer+1;
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index b78815f0e52..786636addfe 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -101,6 +101,10 @@
 
 #define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
 
+#ifdef WITH_ARIA_STORAGE_ENGINE
+#include "../storage/maria/ha_maria.h"
+#endif
+
 /**
   @defgroup Runtime_Environment Runtime Environment
   @{
@@ -118,6 +122,8 @@
 
 static bool execute_sqlcom_select(THD *thd, TABLE_LIST *all_tables);
 static void sql_kill(THD *thd, ulong id, bool only_kill_query);
+static bool execute_show_status(THD *, TABLE_LIST *);
+static bool execute_rename_table(THD *, TABLE_LIST *, TABLE_LIST *);
 
 const char *any_db="*any*";	// Special symbol for check_access
 
@@ -159,7 +165,6 @@ const char *xa_state_names[]={
   "NON-EXISTING", "ACTIVE", "IDLE", "PREPARED", "ROLLBACK ONLY"
 };
 
-
 #ifdef HAVE_REPLICATION
 /**
   Returns true if all tables should be ignored.
@@ -359,13 +364,12 @@ void init_update_queries(void)
   sql_command_flags[SQLCOM_SHOW_PROFILES]=    CF_STATUS_COMMAND;
   sql_command_flags[SQLCOM_SHOW_PROFILE]=     CF_STATUS_COMMAND;
   sql_command_flags[SQLCOM_BINLOG_BASE64_EVENT]= CF_STATUS_COMMAND;
-
-   sql_command_flags[SQLCOM_SHOW_TABLES]=       (CF_STATUS_COMMAND |
-                                                 CF_SHOW_TABLE_COMMAND |
-                                                 CF_REEXECUTION_FRAGILE);
-  sql_command_flags[SQLCOM_SHOW_TABLE_STATUS]= (CF_STATUS_COMMAND |
-                                                CF_SHOW_TABLE_COMMAND |
-                                                CF_REEXECUTION_FRAGILE);
+  sql_command_flags[SQLCOM_SHOW_CLIENT_STATS]= CF_STATUS_COMMAND;
+  sql_command_flags[SQLCOM_SHOW_USER_STATS]=   CF_STATUS_COMMAND;
+  sql_command_flags[SQLCOM_SHOW_TABLE_STATS]=  CF_STATUS_COMMAND;
+  sql_command_flags[SQLCOM_SHOW_INDEX_STATS]=  CF_STATUS_COMMAND;
+  sql_command_flags[SQLCOM_SHOW_TABLES]=       (CF_STATUS_COMMAND | CF_SHOW_TABLE_COMMAND | CF_REEXECUTION_FRAGILE);
+  sql_command_flags[SQLCOM_SHOW_TABLE_STATUS]= (CF_STATUS_COMMAND | CF_SHOW_TABLE_COMMAND | CF_REEXECUTION_FRAGILE);
 
 
   sql_command_flags[SQLCOM_CREATE_USER]=       CF_CHANGES_DATA | CF_PROTECT_AGAINST_GRL;
@@ -429,7 +433,7 @@ bool sqlcom_can_generate_row_events(const THD *thd)
  
 bool is_update_query(enum enum_sql_command command)
 {
-  DBUG_ASSERT(command >= 0 && command <= SQLCOM_END);
+  DBUG_ASSERT(command <= SQLCOM_END);
   return (sql_command_flags[command] & CF_CHANGES_DATA) != 0;
 }
 
@@ -440,7 +444,7 @@ bool is_update_query(enum enum_sql_command command)
 */
 bool is_log_table_write_query(enum enum_sql_command command)
 {
-  DBUG_ASSERT(command >= 0 && command <= SQLCOM_END);
+  DBUG_ASSERT(command <= SQLCOM_END);
   return (sql_command_flags[command] & CF_WRITE_LOGS_COMMAND) != 0;
 }
 
@@ -493,7 +497,7 @@ void execute_init_command(THD *thd, LEX_STRING *init_command,
 static void handle_bootstrap_impl(THD *thd)
 {
   MYSQL_FILE *file= bootstrap_file;
-  char *buff;
+  char *buff, *res;
 
   DBUG_ENTER("handle_bootstrap");
 
@@ -503,9 +507,8 @@ static void handle_bootstrap_impl(THD *thd)
 #endif /* EMBEDDED_LIBRARY */
 
   thd_proc_info(thd, 0);
-  thd->security_ctx->priv_user=
-    thd->security_ctx->user= (char*) my_strdup("boot", MYF(MY_WME));
-  thd->security_ctx->priv_host[0]=0;
+  thd->security_ctx->user= (char*) my_strdup("boot", MYF(MY_WME));
+  thd->security_ctx->priv_user[0]= thd->security_ctx->priv_host[0]=0;
   /*
     Make the "client" handle multiple results. This is necessary
     to enable stored procedures with SELECTs and Dynamic SQL
@@ -534,7 +537,13 @@ static void handle_bootstrap_impl(THD *thd)
         break;
       }
       buff= (char*) thd->net.buff;
-      mysql_file_fgets(buff + length, thd->net.max_packet - length, file);
+      res= mysql_file_fgets(buff + length, thd->net.max_packet - length, file);
+      if (!res && !feof(file))
+      {
+        net_end_statement(thd);
+        bootstrap_error= 1;
+        break;
+      }
       length+= (ulong) strlen(buff + length);
       /* purecov: end */
     }
@@ -642,7 +651,6 @@ end:
   return;
 }
 
-
 /**
   @brief Check access privs for a MERGE table and fix children lock types.
 
@@ -754,6 +762,9 @@ bool do_command(THD *thd)
 
   net_new_transaction(net);
 
+  /* Save for user statistics */
+  thd->start_bytes_received= thd->status_var.bytes_received;
+
   if ((packet_length= my_net_read(net)) == packet_error)
   {
     DBUG_PRINT("info",("Got error %d reading command from socket %s",
@@ -910,7 +921,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
   NET *net= &thd->net;
   bool error= 0;
   DBUG_ENTER("dispatch_command");
-  DBUG_PRINT("info",("packet: '%*.s'; command: %d", packet_length, packet, command));
+  DBUG_PRINT("info", ("command: %d", command));
 
 #if defined(ENABLED_PROFILING)
   thd->profiling.start_new_query();
@@ -925,6 +936,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
     the slow log only if opt_log_slow_admin_statements is set.
   */
   thd->enable_slow_log= TRUE;
+  thd->query_plan_flags= QPLAN_INIT;
   thd->lex->sql_command= SQLCOM_END; /* to avoid confusing VIEW detectors */
   thd->set_time();
   if (!thd->is_valid_time())
@@ -977,96 +989,34 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
   case COM_CHANGE_USER:
   {
     status_var_increment(thd->status_var.com_other);
-    char *user= (char*) packet, *packet_end= packet + packet_length;
-    /* Safe because there is always a trailing \0 at the end of the packet */
-    char *passwd= strend(user)+1;
 
     thd->change_user();
     thd->clear_error();                         // if errors from rollback
 
-    /*
-      Old clients send null-terminated string ('\0' for empty string) for
-      password.  New clients send the size (1 byte) + string (not null
-      terminated, so also '\0' for empty string).
+    /* acl_authenticate() takes the data from net->read_pos */
+    net->read_pos= (uchar*)packet;
 
-      Cast *passwd to an unsigned char, so that it doesn't extend the sign
-      for *passwd > 127 and become 2**32-127 after casting to uint.
-    */
-    char db_buff[NAME_LEN+1];                 // buffer to store db in utf8
-    char *db= passwd;
-    char *save_db;
-    /*
-      If there is no password supplied, the packet must contain '\0',
-      in any type of handshake (4.1 or pre-4.1).
-     */
-    if (passwd >= packet_end)
-    {
-      my_message(ER_UNKNOWN_COM_ERROR, ER(ER_UNKNOWN_COM_ERROR), MYF(0));
-      break;
-    }
-    uint passwd_len= (thd->client_capabilities & CLIENT_SECURE_CONNECTION ?
-                      (uchar)(*passwd++) : strlen(passwd));
-    uint dummy_errors, save_db_length, db_length;
-    int res;
-    Security_context save_security_ctx= *thd->security_ctx;
-    USER_CONN *save_user_connect;
-
-    db+= passwd_len + 1;
-    /*
-      Database name is always NUL-terminated, so in case of empty database
-      the packet must contain at least the trailing '\0'.
-    */
-    if (db >= packet_end)
-    {
-      my_message(ER_UNKNOWN_COM_ERROR, ER(ER_UNKNOWN_COM_ERROR), MYF(0));
-      break;
-    }
-    db_length= strlen(db);
+    uint save_db_length= thd->db_length;
+    char *save_db= thd->db;
+    USER_CONN *save_user_connect= thd->user_connect;
+  Security_context save_security_ctx= *thd->security_ctx;
+    CHARSET_INFO *save_character_set_client=
+      thd->variables.character_set_client;
+    CHARSET_INFO *save_collation_connection=
+      thd->variables.collation_connection;
+    CHARSET_INFO *save_character_set_results=
+      thd->variables.character_set_results;
 
-    char *ptr= db + db_length + 1;
-    uint cs_number= 0;
-
-    if (ptr < packet_end)
-    {
-      if (ptr + 2 > packet_end)
-      {
-        my_message(ER_UNKNOWN_COM_ERROR, ER(ER_UNKNOWN_COM_ERROR), MYF(0));
-        break;
-      }
-
-      cs_number= uint2korr(ptr);
-    }
-
-    /* Convert database name to utf8 */
-    db_buff[copy_and_convert(db_buff, sizeof(db_buff)-1,
-                             system_charset_info, db, db_length,
-                             thd->charset(), &dummy_errors)]= 0;
-    db= db_buff;
-
-    /* Save user and privileges */
-    save_db_length= thd->db_length;
-    save_db= thd->db;
-    save_user_connect= thd->user_connect;
-
-    if (!(thd->security_ctx->user= my_strdup(user, MYF(0))))
-    {
-      thd->security_ctx->user= save_security_ctx.user;
-      my_message(ER_OUT_OF_RESOURCES, ER(ER_OUT_OF_RESOURCES), MYF(0));
-      break;
-    }
-
-    /* Clear variables that are allocated */
-    thd->user_connect= 0;
-    thd->security_ctx->priv_user= thd->security_ctx->user;
-    res= check_user(thd, COM_CHANGE_USER, passwd, passwd_len, db, FALSE);
-
-    if (res)
+    if (acl_authenticate(thd, 0, packet_length))
     {
       my_free(thd->security_ctx->user);
       *thd->security_ctx= save_security_ctx;
       thd->user_connect= save_user_connect;
-      thd->db= save_db;
-      thd->db_length= save_db_length;
+      thd->reset_db(save_db, save_db_length);
+      thd->variables.character_set_client= save_character_set_client;
+      thd->variables.collation_connection= save_collation_connection;
+      thd->variables.character_set_results= save_character_set_results;
+      thd->update_charset();
     }
     else
     {
@@ -1077,12 +1027,6 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
 #endif /* NO_EMBEDDED_ACCESS_CHECKS */
       my_free(save_db);
       my_free(save_security_ctx.user);
-
-      if (cs_number)
-      {
-        thd_init_client_charset(thd, cs_number);
-        thd->update_charset();
-      }
     }
     break;
   }
@@ -1125,8 +1069,6 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
                       thd->security_ctx->priv_user,
                       (char *) thd->security_ctx->host_or_ip);
     char *packet_end= thd->query() + thd->query_length();
-    /* 'b' stands for 'buffer' parameter', special for 'my_snprintf' */
-
     general_log_write(thd, command, thd->query(), thd->query_length());
     DBUG_PRINT("query",("%-.4096s",thd->query()));
 #if defined(ENABLED_PROFILING)
@@ -1143,11 +1085,19 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
     {
       char *beginning_of_next_stmt= (char*)
         parser_state.m_lip.found_semicolon;
+
+#ifdef WITH_ARIA_STORAGE_ENGINE
+      ha_maria::implicit_commit(thd, FALSE);
+#endif
+
       /*
         Multiple queries exits, execute them individually
       */
+      close_thread_tables(thd);
+
       thd->protocol->end_statement();
       query_cache_end_of_result(thd);
+
       ulong length= (ulong)(packet_end - beginning_of_next_stmt);
 
       log_slow_statement(thd);
@@ -1212,12 +1162,13 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
       break;
     /*
       We have name + wildcard in packet, separated by endzero
+      (The packet is guaranteed to end with an end zero)
     */
     arg_end= strend(packet);
     uint arg_length= arg_end - packet;
 
     /* Check given table name length. */
-    if (arg_length >= packet_length || arg_length > NAME_LEN)
+    if (packet_length - arg_length > NAME_LEN + 1 || arg_length > SAFE_NAME_LEN)
     {
       my_message(ER_UNKNOWN_COM_ERROR, ER(ER_UNKNOWN_COM_ERROR), MYF(0));
       break;
@@ -1297,6 +1248,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
 
       status_var_increment(thd->status_var.com_other);
       thd->enable_slow_log= opt_log_slow_admin_statements;
+      thd->query_plan_flags|= QPLAN_ADMIN;
       if (check_global_access(thd, REPL_SLAVE_ACL))
 	break;
 
@@ -1392,16 +1344,21 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
 #endif
   case COM_STATISTICS:
   {
-    STATUS_VAR current_global_status_var;
+    STATUS_VAR *current_global_status_var;      // Big; Don't allocate on stack
     ulong uptime;
-    uint length __attribute__((unused));
+#if defined(SAFEMALLOC) || !defined(EMBEDDED_LIBRARY)
+    uint length;
+#endif
     ulonglong queries_per_second1000;
     char buff[250];
     uint buff_len= sizeof(buff);
 
+    if (!(current_global_status_var= (STATUS_VAR*)
+          thd->alloc(sizeof(STATUS_VAR))))
+      break;
     general_log_print(thd, command, NullS);
     status_var_increment(thd->status_var.com_stat[SQLCOM_SHOW_STATUS]);
-    calc_sum_of_all_status(&current_global_status_var);
+    calc_sum_of_all_status(current_global_status_var);
     if (!(uptime= (ulong) (thd->start_time - server_start_time)))
       queries_per_second1000= 0;
     else
@@ -1413,8 +1370,8 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
                         "Open tables: %u  Queries per second avg: %u.%u",
                         uptime,
                         (int) thread_count, (ulong) thd->query_id,
-                        current_global_status_var.long_query_count,
-                        current_global_status_var.opened_tables,
+                        current_global_status_var->long_query_count,
+                        current_global_status_var->opened_tables,
                         refresh_version,
                         cached_open_tables(),
                         (uint) (queries_per_second1000 / 1000),
@@ -1536,6 +1493,19 @@ void log_slow_statement(THD *thd)
   if (unlikely(thd->in_sub_stmt))
     DBUG_VOID_RETURN;                           // Don't set time for sub stmt
 
+  /* Follow the slow log filter configuration. */ 
+  DBUG_ASSERT(thd->variables.log_slow_filter != 0);
+  if (!(thd->variables.log_slow_filter & thd->query_plan_flags))
+    DBUG_VOID_RETURN; 
+ 
+  /* 
+     If rate limiting of slow log writes is enabled, decide whether to log
+     this query to the log or not.
+  */ 
+  if (thd->variables.log_slow_rate_limit > 1 &&
+      (global_query_id % thd->variables.log_slow_rate_limit) != 0)
+    DBUG_VOID_RETURN;
+
   /*
     Do not log administrative statements unless the appropriate option is
     set.
@@ -1666,6 +1636,12 @@ int prepare_schema_table(THD *thd, LEX *lex, Table_ident *table_ident,
     thd->profiling.discard_current_query();
 #endif
     break;
+  case SCH_USER_STATS:
+  case SCH_CLIENT_STATS:
+    if (check_global_access(thd, SUPER_ACL | PROCESS_ACL))
+      DBUG_RETURN(1);
+  case SCH_TABLE_STATS:
+  case SCH_INDEX_STATS:
   case SCH_OPEN_TABLES:
   case SCH_VARIABLES:
   case SCH_STATUS:
@@ -2115,23 +2091,7 @@ mysql_execute_command(THD *thd)
     break;
   case SQLCOM_SHOW_STATUS:
   {
-    system_status_var old_status_var= thd->status_var;
-    thd->initial_status_var= &old_status_var;
-    if (!(res= check_table_access(thd, SELECT_ACL, all_tables, FALSE,
-                                  UINT_MAX, FALSE)))
-      res= execute_sqlcom_select(thd, all_tables);
-    /* Don't log SHOW STATUS commands to slow query log */
-    thd->server_status&= ~(SERVER_QUERY_NO_INDEX_USED |
-                           SERVER_QUERY_NO_GOOD_INDEX_USED);
-    /*
-      restore status variables, as we don't want 'show status' to cause
-      changes
-    */
-    mysql_mutex_lock(&LOCK_status);
-    add_diff_to_status(&global_status_var, &thd->status_var,
-                       &old_status_var);
-    thd->status_var= old_status_var;
-    mysql_mutex_unlock(&LOCK_status);
+    execute_show_status(thd, all_tables);
     break;
   }
   case SQLCOM_SHOW_DATABASES:
@@ -2147,6 +2107,10 @@ mysql_execute_command(THD *thd)
   case SQLCOM_SHOW_COLLATIONS:
   case SQLCOM_SHOW_STORAGE_ENGINES:
   case SQLCOM_SHOW_PROFILE:
+  case SQLCOM_SHOW_CLIENT_STATS:
+  case SQLCOM_SHOW_USER_STATS:
+  case SQLCOM_SHOW_TABLE_STATS:
+  case SQLCOM_SHOW_INDEX_STATS:
   case SQLCOM_SELECT:
   {
     thd->status_var.last_query_cost= 0.0;
@@ -2265,8 +2229,8 @@ case SQLCOM_PREPARE:
     my_error(ER_FEATURE_DISABLED, MYF(0), "SHOW PROFILES", "enable-profiling");
     goto error;
 #endif
-    break;
   }
+  break;
   case SQLCOM_SHOW_NEW_MASTER:
   {
     if (check_global_access(thd, REPL_SLAVE_ACL))
@@ -2625,6 +2589,7 @@ end_with_restore_list:
       ALTER TABLE.
     */
     thd->enable_slow_log= opt_log_slow_admin_statements;
+    thd->query_plan_flags|= QPLAN_ADMIN;
 
     bzero((char*) &create_info, sizeof(create_info));
     create_info.db_type= 0;
@@ -2673,37 +2638,10 @@ end_with_restore_list:
   }
 #endif /* HAVE_REPLICATION */
 
+#error set QPLAN_ADMIN somehow universally, not per statement
   case SQLCOM_RENAME_TABLE:
   {
-    DBUG_ASSERT(first_table == all_tables && first_table != 0);
-    TABLE_LIST *table;
-    for (table= first_table; table; table= table->next_local->next_local)
-    {
-      if (check_access(thd, ALTER_ACL | DROP_ACL, table->db,
-                       &table->grant.privilege,
-                       &table->grant.m_internal,
-                       0, 0) ||
-          check_access(thd, INSERT_ACL | CREATE_ACL, table->next_local->db,
-                       &table->next_local->grant.privilege,
-                       &table->next_local->grant.m_internal,
-                       0, 0))
-	goto error;
-      TABLE_LIST old_list, new_list;
-      /*
-        we do not need initialize old_list and new_list because we will
-        come table[0] and table->next[0] there
-      */
-      old_list= table[0];
-      new_list= table->next_local[0];
-      if (check_grant(thd, ALTER_ACL | DROP_ACL, &old_list, FALSE, 1, FALSE) ||
-         (!test_all_bits(table->next_local->grant.privilege,
-                         INSERT_ACL | CREATE_ACL) &&
-          check_grant(thd, INSERT_ACL | CREATE_ACL, &new_list, FALSE, 1,
-                      FALSE)))
-        goto error;
-    }
-
-    if (mysql_rename_tables(thd, first_table, 0))
+    if (execute_rename_table(thd, first_table, all_tables))
       goto error;
     break;
   }
@@ -3904,8 +3842,8 @@ end_with_restore_list:
         if (sp_grant_privileges(thd, lex->sphead->m_db.str, name,
                                 lex->sql_command == SQLCOM_CREATE_PROCEDURE))
           push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
-                       ER_PROC_AUTO_GRANT_FAIL,
-                       ER(ER_PROC_AUTO_GRANT_FAIL));
+                       ER_PROC_AUTO_GRANT_FAIL, ER(ER_PROC_AUTO_GRANT_FAIL));
+        thd->clear_error();
       }
 
       /*
@@ -4422,6 +4360,7 @@ create_sp_error:
     break;
   }
   thd_proc_info(thd, "query end");
+  thd->update_stats();
 
   /*
     Binlog-related cleanup:
@@ -4527,6 +4466,7 @@ static bool execute_sqlcom_select(THD *thd, TABLE_LIST *all_tables)
       param->select_limit=
         new Item_int((ulonglong) thd->variables.select_limit);
   }
+  thd->thd_marker.emb_on_expr_nest= NULL;
   if (!(res= open_and_lock_tables(thd, all_tables, TRUE, 0)))
   {
     if (lex->describe)
@@ -4567,10 +4507,73 @@ static bool execute_sqlcom_select(THD *thd, TABLE_LIST *all_tables)
         delete result;
     }
   }
+  /* Count number of empty select queries */
+  if (!thd->sent_row_count)
+    status_var_increment(thd->status_var.empty_queries);
+  status_var_add(thd->status_var.rows_sent, thd->sent_row_count);
+  return res;
+}
+
+
+static bool execute_show_status(THD *thd, TABLE_LIST *all_tables)
+{
+  bool res;
+  system_status_var old_status_var= thd->status_var;
+  thd->initial_status_var= &old_status_var;
+  if (!(res= check_table_access(thd, SELECT_ACL, all_tables, FALSE,
+                                UINT_MAX, FALSE)))
+    res= execute_sqlcom_select(thd, all_tables);
+  /* Don't log SHOW STATUS commands to slow query log */
+  thd->server_status&= ~(SERVER_QUERY_NO_INDEX_USED |
+                         SERVER_QUERY_NO_GOOD_INDEX_USED);
+  /*
+    restore status variables, as we don't want 'show status' to cause
+    changes
+  */
+  mysql_mutex_lock(&LOCK_status);
+  add_diff_to_status(&global_status_var, &thd->status_var,
+                     &old_status_var);
+  thd->status_var= old_status_var;
+  mysql_mutex_unlock(&LOCK_status);
   return res;
 }
 
 
+static bool execute_rename_table(THD *thd, TABLE_LIST *first_table,
+                                 TABLE_LIST *all_tables)
+{
+  DBUG_ASSERT(first_table == all_tables && first_table != 0);
+  TABLE_LIST *table;
+  for (table= first_table; table; table= table->next_local->next_local)
+  {
+    if (check_access(thd, ALTER_ACL | DROP_ACL, table->db,
+                     &table->grant.privilege,
+                     &table->grant.m_internal,
+                     0, 0) ||
+        check_access(thd, INSERT_ACL | CREATE_ACL, table->next_local->db,
+                     &table->next_local->grant.privilege,
+                     &table->next_local->grant.m_internal,
+                     0, 0))
+      return 1;
+    TABLE_LIST old_list, new_list;
+    /*
+      we do not need initialize old_list and new_list because we will
+      come table[0] and table->next[0] there
+    */
+    old_list= table[0];
+    new_list= table->next_local[0];
+    if (check_grant(thd, ALTER_ACL | DROP_ACL, &old_list, FALSE, 1, FALSE) ||
+       (!test_all_bits(table->next_local->grant.privilege,
+                       INSERT_ACL | CREATE_ACL) &&
+        check_grant(thd, INSERT_ACL | CREATE_ACL, &new_list, FALSE, 1,
+                    FALSE)))
+      return 1;
+  }
+
+  return mysql_rename_tables(thd, first_table, 0);
+}
+
+
 #ifndef NO_EMBEDDED_ACCESS_CHECKS
 /**
   Check grants for commands which work only with one table.
@@ -4750,6 +4753,7 @@ check_access(THD *thd, ulong want_access, const char *db, ulong *save_priv,
       case ACL_INTERNAL_ACCESS_DENIED:
         if (! no_errors)
         {
+          status_var_increment(thd->status_var.access_denied_errors);
           my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
                    sctx->priv_user, sctx->priv_host, db);
         }
@@ -4799,12 +4803,15 @@ check_access(THD *thd, ulong want_access, const char *db, ulong *save_priv,
   {						// We can never grant this
     DBUG_PRINT("error",("No possible access"));
     if (!no_errors)
+    {
+      status_var_increment(thd->status_var.access_denied_errors);
       my_error(ER_ACCESS_DENIED_ERROR, MYF(0),
                sctx->priv_user,
                sctx->priv_host,
                (thd->password ?
                 ER(ER_YES) :
                 ER(ER_NO)));                    /* purecov: tested */
+    }
     DBUG_RETURN(TRUE);				/* purecov: tested */
   }
 
@@ -4862,13 +4869,14 @@ check_access(THD *thd, ulong want_access, const char *db, ulong *save_priv,
   */
   DBUG_PRINT("error",("Access denied"));
   if (!no_errors)
+  {
+    status_var_increment(thd->status_var.access_denied_errors);
     my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
              sctx->priv_user, sctx->priv_host,
              (db ? db : (thd->db ?
                          thd->db :
                          "unknown")));
   DBUG_RETURN(TRUE);
-
 }
 
 
@@ -4907,6 +4915,7 @@ static bool check_show_access(THD *thd, TABLE_LIST *table)
 
     if (!thd->col_access && check_grant_db(thd, dst_db_name))
     {
+      status_var_increment(thd->status_var.access_denied_errors);
       my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
                thd->security_ctx->priv_user,
                thd->security_ctx->priv_host,
@@ -4992,8 +5001,8 @@ check_table_access(THD *thd, ulong requirements,TABLE_LIST *tables,
 {
   TABLE_LIST *org_tables= tables;
   TABLE_LIST *first_not_own_table= thd->lex->first_not_own_table();
-  uint i= 0;
   Security_context *sctx= thd->security_ctx, *backup_ctx= thd->security_ctx;
+  uint i= 0;
   /*
     The check that first_not_own_table is not reached is for the case when
     the given table list refers to the list for prelocking (contains tables
@@ -5178,6 +5187,7 @@ bool check_global_access(THD *thd, ulong want_access)
     return 0;
   get_privilege_desc(command, sizeof(command), want_access);
   my_error(ER_SPECIFIC_ACCESS_DENIED_ERROR, MYF(0), command);
+  status_var_increment(thd->status_var.access_denied_errors);
   return 1;
 #else
   return 0;
@@ -5278,17 +5288,18 @@ bool my_yyoverflow(short **yyss, YYSTYPE **yyvs, ulong *yystacksize)
 
   @todo Call it after we use THD for queries, not before.
 */
-void mysql_reset_thd_for_next_command(THD *thd)
+void mysql_reset_thd_for_next_command(THD *thd, my_bool calculate_userstat)
 {
-  thd->reset_for_next_command();
+  thd->reset_for_next_command(calculate_userstat);
 }
 
-void THD::reset_for_next_command()
+void THD::reset_for_next_command(bool calculate_userstat)
 {
   THD *thd= this;
   DBUG_ENTER("mysql_reset_thd_for_next_command");
   DBUG_ASSERT(!thd->spcont); /* not for substatements of routines */
   DBUG_ASSERT(! thd->in_sub_stmt);
+  DBUG_ASSERT(thd->transaction.on);
   thd->free_list= 0;
   thd->select_number= 1;
   /*
@@ -5329,6 +5340,18 @@ void THD::reset_for_next_command()
   thd->rand_used= 0;
   thd->sent_row_count= thd->examined_row_count= 0;
 
+  /* Copy data for user stats */
+  if ((thd->userstat_running= calculate_userstat))
+  {
+    thd->start_cpu_time= my_getcputime();
+    memcpy(&thd->org_status_var, &thd->status_var, sizeof(thd->status_var));
+    thd->select_commands= thd->update_commands= thd->other_commands= 0;
+  }
+
+  thd->query_plan_flags= QPLAN_INIT;
+  thd->query_plan_fsort_passes= 0;
+  thd->thd_marker.emb_on_expr_nest= NULL;
+
   thd->reset_current_stmt_binlog_format_row();
   thd->binlog_unsafe_warning_flags= 0;
 
@@ -5521,7 +5544,6 @@ void mysql_parse(THD *thd, char *rawbuf, uint length,
 {
   int error __attribute__((unused));
   DBUG_ENTER("mysql_parse");
-
   DBUG_EXECUTE_IF("parser_debug", turn_parser_debug_on(););
 
   /*
@@ -5541,7 +5563,7 @@ void mysql_parse(THD *thd, char *rawbuf, uint length,
     FIXME: cleanup the dependencies in the code to simplify this.
   */
   lex_start(thd);
-  mysql_reset_thd_for_next_command(thd);
+  mysql_reset_thd_for_next_command(thd, opt_userstat_running);
 
   if (query_cache_send_result_to_client(thd, rawbuf, length) <= 0)
   {
@@ -5609,7 +5631,11 @@ void mysql_parse(THD *thd, char *rawbuf, uint length,
     thd->cleanup_after_query();
     DBUG_ASSERT(thd->change_list.is_empty());
   }
-
+  else
+  {
+    /* Update statistics for getting the query from the cache */
+    thd->lex->sql_command= SQLCOM_SELECT;
+  }
   DBUG_VOID_RETURN;
 }
 
@@ -5635,7 +5661,7 @@ bool mysql_test_parse_for_slave(THD *thd, char *rawbuf, uint length)
   if (!(error= parser_state.init(thd, rawbuf, length)))
   {
     lex_start(thd);
-    mysql_reset_thd_for_next_command(thd);
+    mysql_reset_thd_for_next_command(thd, opt_userstat_running);
 
     if (!parse_sql(thd, & parser_state, NULL) &&
         all_tables_not_ok(thd, lex->select_lex.table_list.first))
@@ -5663,7 +5689,9 @@ bool add_field_to_list(THD *thd, LEX_STRING *field_name, enum_field_types type,
                        LEX_STRING *comment,
 		       char *change,
                        List<String> *interval_list, CHARSET_INFO *cs,
-		       uint uint_geom_type)
+		       uint uint_geom_type,
+		       Virtual_column_info *vcol_info,
+                       engine_option_value *create_options)
 {
   register Create_field *new_field;
   LEX  *lex= thd->lex;
@@ -5681,7 +5709,7 @@ bool add_field_to_list(THD *thd, LEX_STRING *field_name, enum_field_types type,
     lex->col_list.push_back(new Key_part_spec(*field_name, 0));
     key= new Key(Key::PRIMARY, null_lex_str,
                       &default_key_create_info,
-                      0, lex->col_list);
+                      0, lex->col_list, NULL);
     lex->alter_info.key_list.push_back(key);
     lex->col_list.empty();
   }
@@ -5691,7 +5719,7 @@ bool add_field_to_list(THD *thd, LEX_STRING *field_name, enum_field_types type,
     lex->col_list.push_back(new Key_part_spec(*field_name, 0));
     key= new Key(Key::UNIQUE, null_lex_str,
                  &default_key_create_info, 0,
-                 lex->col_list);
+                 lex->col_list, NULL);
     lex->alter_info.key_list.push_back(key);
     lex->col_list.empty();
   }
@@ -5738,7 +5766,8 @@ bool add_field_to_list(THD *thd, LEX_STRING *field_name, enum_field_types type,
   if (!(new_field= new Create_field()) ||
       new_field->init(thd, field_name->str, type, length, decimals, type_modifier,
                       default_value, on_update_value, comment, change,
-                      interval_list, cs, uint_geom_type))
+                      interval_list, cs, uint_geom_type, vcol_info,
+                      create_options))
     DBUG_RETURN(1);
 
   lex->alter_info.create_list.push_back(new_field);
@@ -6414,7 +6443,6 @@ uint kill_one_thread(THD *thd, ulong id, bool only_kill_query)
   mysql_mutex_unlock(&LOCK_thread_count);
   if (tmp)
   {
-
     /*
       If we're SUPER, we can KILL anything, including system-threads.
       No further checks.
@@ -7042,8 +7070,9 @@ void get_default_definer(THD *thd, LEX_USER *definer)
   definer->host.str= (char *) sctx->priv_host;
   definer->host.length= strlen(definer->host.str);
 
-  definer->password.str= NULL;
-  definer->password.length= 0;
+  definer->password= null_lex_str;
+  definer->plugin= empty_lex_str;
+  definer->auth= empty_lex_str;
 }
 
 
@@ -7189,15 +7218,14 @@ bool check_string_char_length(LEX_STRING *str, const char *err_msg,
 
 /*
   Check if path does not contain mysql data home directory
+
   SYNOPSIS
     test_if_data_home_dir()
     dir                     directory
-    conv_home_dir           converted data home directory
-    home_dir_len            converted data home directory length
 
   RETURN VALUES
     0	ok
-    1	error  
+    1	error ;  Given path contains data directory
 */
 C_MODE_START
 
@@ -7225,11 +7253,17 @@ int test_if_data_home_dir(const char *dir)
                         mysql_unpacked_real_data_home_len,
                         (const uchar*) mysql_unpacked_real_data_home,
                         mysql_unpacked_real_data_home_len))
+      {
+        DBUG_PRINT("error", ("Path is part of mysql_real_data_home"));
         DBUG_RETURN(1);
+      }
     }
     else if (!memcmp(path, mysql_unpacked_real_data_home,
                      mysql_unpacked_real_data_home_len))
+    {
+      DBUG_PRINT("error", ("Path is part of mysql_real_data_home"));
       DBUG_RETURN(1);
+    }
   }
   DBUG_RETURN(0);
 }
diff --git a/sql/sql_parse.h b/sql/sql_parse.h
index ad620544d29..ef19df682dc 100644
--- a/sql/sql_parse.h
+++ b/sql/sql_parse.h
@@ -112,7 +112,9 @@ bool add_field_to_list(THD *thd, LEX_STRING *field_name, enum enum_field_types t
 		       LEX_STRING *comment,
 		       char *change, List<String> *interval_list,
 		       CHARSET_INFO *cs,
-		       uint uint_geom_type);
+		       uint uint_geom_type,
+                       Virtual_column_info *vcol_info,
+                       engine_option_value *create_options);
 bool add_to_list(THD *thd, SQL_I_List<ORDER> &list, Item *group, bool asc);
 void add_join_on(TABLE_LIST *b,Item *expr);
 void add_join_natural(TABLE_LIST *a,TABLE_LIST *b,List<String> *using_fields,
diff --git a/sql/sql_partition.cc b/sql/sql_partition.cc
index b72816f8ce3..9ab4e0c5226 100644
--- a/sql/sql_partition.cc
+++ b/sql/sql_partition.cc
@@ -1105,8 +1105,9 @@ static bool fix_fields_part_func(THD *thd, Item* func_expr, TABLE *table,
   saved_full_group_by_flag= thd->lex->current_select->full_group_by_flag;
   saved_allow_sum_func= thd->lex->allow_sum_func;
   thd->lex->allow_sum_func= 0;
-
-  error= func_expr->fix_fields(thd, (Item**)&func_expr);
+  
+  if (!(error= func_expr->fix_fields(thd, (Item**)&func_expr)))
+    func_expr->walk(&Item::vcol_in_partition_func_processor, 0, NULL);
 
   /*
     Restore full_group_by_flag and allow_sum_func,
@@ -2032,7 +2033,7 @@ static int add_int(File fptr, longlong number)
 static int add_uint(File fptr, ulonglong number)
 {
   char buff[32];
-  longlong2str(number, buff, 10);
+  longlong2str(number, buff, 10, 1);
   return add_string(fptr, buff);
 }
 
@@ -7853,8 +7854,8 @@ static uint32 get_next_partition_via_walking(PARTITION_ITERATOR *part_iter)
     field->store(part_iter->field_vals.cur++,
                  ((Field_num*)field)->unsigned_flag);
     if ((part_iter->part_info->is_sub_partitioned() &&
-        !part_iter->part_info->get_part_partition_id(part_iter->part_info,
-                                                     &part_id, &dummy)) ||
+         !part_iter->part_info->get_part_partition_id(part_iter->part_info,
+                                                      &part_id, &dummy)) ||
         !part_iter->part_info->get_partition_id(part_iter->part_info,
                                                 &part_id, &dummy))
       return part_id;
@@ -7975,4 +7976,3 @@ uint get_partition_field_store_length(Field *field)
   return store_length;
 }
 #endif
-
diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc
index f48a3183289..e046a9fc273 100644
--- a/sql/sql_plugin.cc
+++ b/sql/sql_plugin.cc
@@ -31,6 +31,7 @@
 #include <my_getopt.h>
 #include "sql_audit.h"
 #include "lock.h"                               // MYSQL_LOCK_IGNORE_TIMEOUT
+#include <mysql/plugin_auth.h>
 #define REPORT_TO_LOG  1
 #define REPORT_TO_USER 2
 
@@ -52,6 +53,18 @@ static TYPELIB global_plugin_typelib=
 char *opt_plugin_load= NULL;
 char *opt_plugin_dir_ptr;
 char opt_plugin_dir[FN_REFLEN];
+uint plugin_maturity;
+
+/*
+  not really needed now, this map will become essential when we add more
+  maturity levels. We cannot change existing maturity constants,
+  so the next value - even if it will be MariaDB_PLUGIN_MATURITY_VERY_BUGGY -
+  will inevitably be larger than MariaDB_PLUGIN_MATURITY_STABLE.
+  To be able to compare them we use this mapping array
+*/
+uint plugin_maturity_map[]=
+{ 0, 1, 2, 3, 4, 5, 6 };
+
 /*
   When you ad a new plugin type, add both a string and make sure that the
   init and deinit array are correctly updated.
@@ -65,6 +78,7 @@ const LEX_STRING plugin_type_names[MYSQL_MAX_PLUGIN_TYPE_NUM]=
   { C_STRING_WITH_LEN("INFORMATION SCHEMA") },
   { C_STRING_WITH_LEN("AUDIT") },
   { C_STRING_WITH_LEN("REPLICATION") },
+  { C_STRING_WITH_LEN("AUTHENTICATION") }
 };
 
 extern int initialize_schema_table(st_plugin_int *plugin);
@@ -81,13 +95,13 @@ extern int finalize_audit_plugin(st_plugin_int *plugin);
 plugin_type_init plugin_type_initialize[MYSQL_MAX_PLUGIN_TYPE_NUM]=
 {
   0,ha_initialize_handlerton,0,0,initialize_schema_table,
-  initialize_audit_plugin
+  initialize_audit_plugin, 0, 0
 };
 
 plugin_type_init plugin_type_deinitialize[MYSQL_MAX_PLUGIN_TYPE_NUM]=
 {
   0,ha_finalize_handlerton,0,0,finalize_schema_table,
-  finalize_audit_plugin
+  finalize_audit_plugin, 0, 0
 };
 
 #ifdef HAVE_DLOPEN
@@ -97,6 +111,14 @@ static const char *sizeof_st_plugin_sym=
                    "_mysql_sizeof_struct_st_plugin_";
 static const char *plugin_declarations_sym= "_mysql_plugin_declarations_";
 static int min_plugin_interface_version= MYSQL_PLUGIN_INTERFACE_VERSION & ~0xFF;
+static const char *maria_plugin_interface_version_sym=
+                   "_maria_plugin_interface_version_";
+static const char *maria_sizeof_st_plugin_sym=
+                   "_maria_sizeof_struct_st_plugin_";
+static const char *maria_plugin_declarations_sym=
+                   "_maria_plugin_declarations_";
+static int min_maria_plugin_interface_version=
+                   MARIA_PLUGIN_INTERFACE_VERSION & ~0xFF;
 #endif
 
 /* Note that 'int version' must be the first field of every plugin
@@ -110,7 +132,8 @@ static int min_plugin_info_interface_version[MYSQL_MAX_PLUGIN_TYPE_NUM]=
   MYSQL_DAEMON_INTERFACE_VERSION,
   MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION,
   MYSQL_AUDIT_INTERFACE_VERSION,
-  MYSQL_REPLICATION_INTERFACE_VERSION
+  MYSQL_REPLICATION_INTERFACE_VERSION,
+  MYSQL_AUTHENTICATION_INTERFACE_VERSION
 };
 static int cur_plugin_info_interface_version[MYSQL_MAX_PLUGIN_TYPE_NUM]=
 {
@@ -120,7 +143,8 @@ static int cur_plugin_info_interface_version[MYSQL_MAX_PLUGIN_TYPE_NUM]=
   MYSQL_DAEMON_INTERFACE_VERSION,
   MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION,
   MYSQL_AUDIT_INTERFACE_VERSION,
-  MYSQL_REPLICATION_INTERFACE_VERSION
+  MYSQL_REPLICATION_INTERFACE_VERSION,
+  MYSQL_AUTHENTICATION_INTERFACE_VERSION
 };
 
 /* support for Services */
@@ -246,7 +270,7 @@ static bool plugin_load_list(MEM_ROOT *tmp_root, int *argc, char **argv,
                              const char *list);
 static int test_plugin_options(MEM_ROOT *, struct st_plugin_int *,
                                int *, char **);
-static bool register_builtin(struct st_mysql_plugin *, struct st_plugin_int *,
+static bool register_builtin(struct st_maria_plugin *, struct st_plugin_int *,
                              struct st_plugin_int **);
 static void unlock_variables(THD *thd, struct system_variables *vars);
 static void cleanup_variables(THD *thd, struct system_variables *vars);
@@ -392,11 +416,230 @@ static inline void free_plugin_mem(struct st_plugin_dl *p)
     dlclose(p->handle);
 #endif
   my_free(p->dl.str);
-  if (p->version != MYSQL_PLUGIN_INTERFACE_VERSION)
+  if (p->allocated)
     my_free(p->plugins);
 }
 
 
+/**
+  Reads data from mysql plugin interface
+
+  @param plugin_dl       Structure where the data should be put
+  @param sym             Reverence on version info
+  @param dlpath          Path to the module
+  @param report          What errors should be reported
+
+  @retval FALSE OK
+  @retval TRUE  ERROR
+*/
+
+#ifdef HAVE_DLOPEN
+static my_bool read_mysql_plugin_info(struct st_plugin_dl *plugin_dl,
+                                      void *sym, char *dlpath,
+                                      int report)
+{
+  DBUG_ENTER("read_maria_plugin_info");
+  /* Determine interface version */
+  if (!sym)
+  {
+    free_plugin_mem(plugin_dl);
+    report_error(report, ER_CANT_FIND_DL_ENTRY, plugin_interface_version_sym);
+    DBUG_RETURN(TRUE);
+  }
+  plugin_dl->mariaversion= 0;
+  plugin_dl->mysqlversion= *(int *)sym;
+  /* Versioning */
+  if (plugin_dl->mysqlversion < min_plugin_interface_version ||
+      (plugin_dl->mysqlversion >> 8) > (MYSQL_PLUGIN_INTERFACE_VERSION >> 8))
+  {
+    free_plugin_mem(plugin_dl);
+    report_error(report, ER_CANT_OPEN_LIBRARY, dlpath, 0,
+                 "plugin interface version mismatch");
+    DBUG_RETURN(TRUE);
+  }
+  /* Find plugin declarations */
+  if (!(sym= dlsym(plugin_dl->handle, plugin_declarations_sym)))
+  {
+    free_plugin_mem(plugin_dl);
+    report_error(report, ER_CANT_FIND_DL_ENTRY, plugin_declarations_sym);
+    DBUG_RETURN(TRUE);
+  }
+
+  /* convert mysql declaration to maria one */
+  {
+    int i;
+    uint sizeof_st_plugin;
+    struct st_mysql_plugin *old;
+    struct st_maria_plugin *cur;
+    char *ptr= (char *)sym;
+
+    if ((sym= dlsym(plugin_dl->handle, sizeof_st_plugin_sym)))
+      sizeof_st_plugin= *(int *)sym;
+    else
+    {
+      DBUG_ASSERT(min_plugin_interface_version == 0);
+      sizeof_st_plugin= (int)offsetof(struct st_mysql_plugin, version);
+    }
+
+    for (i= 0;
+         ((struct st_mysql_plugin *)(ptr + i * sizeof_st_plugin))->info;
+         i++)
+      /* no op */;
+
+    cur= (struct st_maria_plugin*)
+          my_malloc((i + 1) * sizeof(struct st_maria_plugin),
+                    MYF(MY_ZEROFILL|MY_WME));
+    if (!cur)
+    {
+      free_plugin_mem(plugin_dl);
+      report_error(report, ER_OUTOFMEMORY, plugin_dl->dl.length);
+      DBUG_RETURN(TRUE);
+    }
+    /*
+      All st_plugin fields not initialized in the plugin explicitly, are
+      set to 0. It matches C standard behaviour for struct initializers that
+      have less values than the struct definition.
+    */
+    for (i=0;
+         (old= (struct st_mysql_plugin *)(ptr + i * sizeof_st_plugin))->info;
+         i++)
+    {
+
+      cur[i].type= old->type;
+      cur[i].info= old->info;
+      cur[i].name= old->name;
+      cur[i].author= old->author;
+      cur[i].descr= old->descr;
+      cur[i].license= old->license;
+      cur[i].init= old->init;
+      cur[i].deinit= old->deinit;
+      cur[i].version= old->version;
+      cur[i].status_vars= old->status_vars;
+      cur[i].system_vars= old->system_vars;
+      /*
+        Something like this should be added to process
+        new mysql plugin versions:
+        if (plugin_dl->mysqlversion > 0x0101)
+        {
+           cur[i].newfield= CONSTANT_MEANS_UNKNOWN;
+        }
+        else
+        {
+           cur[i].newfield= old->newfield;
+        }
+      */
+      /* Maria only fields */
+      cur[i].version_info= "Unknown";
+      cur[i].maturity= MariaDB_PLUGIN_MATURITY_UNKNOWN;
+    }
+    plugin_dl->allocated= true;
+    plugin_dl->plugins= (struct st_maria_plugin *)cur;
+  }
+
+  DBUG_RETURN(FALSE);
+}
+
+
+/**
+  Reads data from maria plugin interface
+
+  @param plugin_dl       Structure where the data should be put
+  @param sym             Reverence on version info
+  @param dlpath          Path to the module
+  @param report          what errors should be reported
+
+  @retval FALSE OK
+  @retval TRUE  ERROR
+*/
+
+static my_bool read_maria_plugin_info(struct st_plugin_dl *plugin_dl,
+                                      void *sym, char *dlpath,
+                                      int report)
+{
+  DBUG_ENTER("read_maria_plugin_info");
+
+  /* Determine interface version */
+  if (!(sym))
+  {
+    /*
+      Actually this branch impossible because in case of absence of maria
+      version we try mysql version.
+    */
+    free_plugin_mem(plugin_dl);
+    report_error(report, ER_CANT_FIND_DL_ENTRY,
+                 maria_plugin_interface_version_sym);
+    DBUG_RETURN(TRUE);
+  }
+  plugin_dl->mariaversion= *(int *)sym;
+  plugin_dl->mysqlversion= 0;
+  /* Versioning */
+  if (plugin_dl->mariaversion < min_maria_plugin_interface_version ||
+      (plugin_dl->mariaversion >> 8) > (MARIA_PLUGIN_INTERFACE_VERSION >> 8))
+  {
+    free_plugin_mem(plugin_dl);
+    report_error(report, ER_CANT_OPEN_LIBRARY, dlpath, 0,
+                 "plugin interface version mismatch");
+    DBUG_RETURN(TRUE);
+  }
+  /* Find plugin declarations */
+  if (!(sym= dlsym(plugin_dl->handle, maria_plugin_declarations_sym)))
+  {
+    free_plugin_mem(plugin_dl);
+    report_error(report, ER_CANT_FIND_DL_ENTRY, maria_plugin_declarations_sym);
+    DBUG_RETURN(TRUE);
+  }
+  if (plugin_dl->mariaversion != MARIA_PLUGIN_INTERFACE_VERSION)
+  {
+    uint sizeof_st_plugin;
+    struct st_maria_plugin *old, *cur;
+    char *ptr= (char *)sym;
+
+    if ((sym= dlsym(plugin_dl->handle, maria_sizeof_st_plugin_sym)))
+      sizeof_st_plugin= *(int *)sym;
+    else
+    {
+      free_plugin_mem(plugin_dl);
+      report_error(report, ER_CANT_FIND_DL_ENTRY, maria_sizeof_st_plugin_sym);
+      DBUG_RETURN(TRUE);
+    }
+
+    if (sizeof_st_plugin != sizeof(st_mysql_plugin))
+    {
+      int i;
+      for (i= 0;
+           ((struct st_maria_plugin *)(ptr + i * sizeof_st_plugin))->info;
+           i++)
+        /* no op */;
+
+      cur= (struct st_maria_plugin*)
+        my_malloc((i + 1) * sizeof(struct st_maria_plugin),
+                  MYF(MY_ZEROFILL|MY_WME));
+      if (!cur)
+      {
+        free_plugin_mem(plugin_dl);
+        report_error(report, ER_OUTOFMEMORY, plugin_dl->dl.length);
+        DBUG_RETURN(TRUE);
+      }
+      /*
+        All st_plugin fields not initialized in the plugin explicitly, are
+        set to 0. It matches C standard behaviour for struct initializers that
+        have less values than the struct definition.
+      */
+      for (i=0;
+           (old= (struct st_maria_plugin *)(ptr + i * sizeof_st_plugin))->info;
+           i++)
+        memcpy(cur + i, old, min(sizeof(cur[i]), sizeof_st_plugin));
+
+      sym= cur;
+      plugin_dl->allocated= true;
+    }
+  }
+  plugin_dl->plugins= (struct st_maria_plugin *)sym;
+
+  DBUG_RETURN(FALSE);
+}
+#endif /* HAVE_DLOPEN */
+
 static st_plugin_dl *plugin_dl_add(const LEX_STRING *dl, int report)
 {
 #ifdef HAVE_DLOPEN
@@ -444,22 +687,21 @@ static st_plugin_dl *plugin_dl_add(const LEX_STRING *dl, int report)
     report_error(report, ER_CANT_OPEN_LIBRARY, dlpath, errno, errmsg);
     DBUG_RETURN(0);
   }
-  /* Determine interface version */
-  if (!(sym= dlsym(plugin_dl.handle, plugin_interface_version_sym)))
+
+  /* Checks which plugin interface present and reads info */
+  if (!(sym= dlsym(plugin_dl.handle, maria_plugin_interface_version_sym)))
   {
-    free_plugin_mem(&plugin_dl);
-    report_error(report, ER_CANT_FIND_DL_ENTRY, plugin_interface_version_sym);
-    DBUG_RETURN(0);
+    if (read_mysql_plugin_info(&plugin_dl,
+                               dlsym(plugin_dl.handle,
+                                     plugin_interface_version_sym),
+                               dlpath,
+                               report))
+      DBUG_RETURN(0);
   }
-  plugin_dl.version= *(int *)sym;
-  /* Versioning */
-  if (plugin_dl.version < min_plugin_interface_version ||
-      (plugin_dl.version >> 8) > (MYSQL_PLUGIN_INTERFACE_VERSION >> 8))
+  else
   {
-    free_plugin_mem(&plugin_dl);
-    report_error(report, ER_CANT_OPEN_LIBRARY, dlpath, 0,
-                 "plugin interface version mismatch");
-    DBUG_RETURN(0);
+    if (read_maria_plugin_info(&plugin_dl, sym, dlpath, report))
+      DBUG_RETURN(0);
   }
 
   /* link the services in */
@@ -482,64 +724,6 @@ static st_plugin_dl *plugin_dl_add(const LEX_STRING *dl, int report)
     }
   }
 
-  /* Find plugin declarations */
-  if (!(sym= dlsym(plugin_dl.handle, plugin_declarations_sym)))
-  {
-    free_plugin_mem(&plugin_dl);
-    report_error(report, ER_CANT_FIND_DL_ENTRY, plugin_declarations_sym);
-    DBUG_RETURN(0);
-  }
-
-  if (plugin_dl.version != MYSQL_PLUGIN_INTERFACE_VERSION)
-  {
-    uint sizeof_st_plugin;
-    struct st_mysql_plugin *old, *cur;
-    char *ptr= (char *)sym;
-
-    if ((sym= dlsym(plugin_dl.handle, sizeof_st_plugin_sym)))
-      sizeof_st_plugin= *(int *)sym;
-    else
-    {
-#ifdef ERROR_ON_NO_SIZEOF_PLUGIN_SYMBOL
-      report_error(report, ER_CANT_FIND_DL_ENTRY, sizeof_st_plugin_sym);
-      DBUG_RETURN(0);
-#else
-      /*
-        When the following assert starts failing, we'll have to switch
-        to the upper branch of the #ifdef
-      */
-      DBUG_ASSERT(min_plugin_interface_version == 0);
-      sizeof_st_plugin= (int)offsetof(struct st_mysql_plugin, version);
-#endif
-    }
-
-    for (i= 0;
-         ((struct st_mysql_plugin *)(ptr+i*sizeof_st_plugin))->info;
-         i++)
-      /* no op */;
-
-    cur= (struct st_mysql_plugin*)
-      my_malloc((i+1)*sizeof(struct st_mysql_plugin), MYF(MY_ZEROFILL|MY_WME));
-    if (!cur)
-    {
-      free_plugin_mem(&plugin_dl);
-      report_error(report, ER_OUTOFMEMORY, plugin_dl.dl.length);
-      DBUG_RETURN(0);
-    }
-    /*
-      All st_plugin fields not initialized in the plugin explicitly, are
-      set to 0. It matches C standard behaviour for struct initializers that
-      have less values than the struct definition.
-    */
-    for (i=0;
-         (old=(struct st_mysql_plugin *)(ptr+i*sizeof_st_plugin))->info;
-         i++)
-      memcpy(cur+i, old, min(sizeof(cur[i]), sizeof_st_plugin));
-
-    sym= cur;
-  }
-  plugin_dl.plugins= (struct st_mysql_plugin *)sym;
-
   /* Duplicate and convert dll name */
   plugin_dl.dl.length= dl->length * files_charset_info->mbmaxlen + 1;
   if (! (plugin_dl.dl.str= (char*) my_malloc(plugin_dl.dl.length, MYF(0))))
@@ -670,7 +854,10 @@ static plugin_ref intern_plugin_lock(LEX *lex, plugin_ref rc)
   {
     plugin_ref plugin;
 #ifdef DBUG_OFF
-    /* built-in plugins don't need ref counting */
+    /*
+      In optimized builds we don't do reference counting for built-in
+      (plugin->plugin_dl == 0) plugins.
+    */
     if (!pi->plugin_dl)
       DBUG_RETURN(pi);
 
@@ -687,7 +874,7 @@ static plugin_ref intern_plugin_lock(LEX *lex, plugin_ref rc)
     *plugin= pi;
 #endif
     pi->ref_count++;
-    DBUG_PRINT("info",("thd: 0x%lx, plugin: \"%s\", ref_count: %d",
+    DBUG_PRINT("lock",("thd: 0x%lx  plugin: \"%s\" LOCK ref_count: %d",
                        (long) current_thd, pi->name.str, pi->ref_count));
 
     if (lex)
@@ -703,6 +890,26 @@ plugin_ref plugin_lock(THD *thd, plugin_ref *ptr)
   LEX *lex= thd ? thd->lex : 0;
   plugin_ref rc;
   DBUG_ENTER("plugin_lock");
+
+#ifdef DBUG_OFF
+  /*
+    In optimized builds we don't do reference counting for built-in
+    (plugin->plugin_dl == 0) plugins.
+
+    Note that we access plugin->plugin_dl outside of LOCK_plugin, and for
+    dynamic plugins a 'plugin' could correspond to plugin that was unloaded
+    meanwhile!  But because st_plugin_int is always allocated on
+    plugin_mem_root, the pointer can never be invalid - the memory is never
+    freed.
+    Of course, the memory that 'plugin' points to can be overwritten by
+    another plugin being loaded, but plugin->plugin_dl can never change
+    from zero to non-zero or vice versa.
+    That is, it's always safe to check for plugin->plugin_dl==0 even
+    without a mutex.
+  */
+  if (! plugin_dlib(ptr))
+    DBUG_RETURN(ptr);
+#endif
   mysql_mutex_lock(&LOCK_plugin);
   rc= my_intern_plugin_lock_ci(lex, *ptr);
   mysql_mutex_unlock(&LOCK_plugin);
@@ -757,7 +964,7 @@ static bool plugin_add(MEM_ROOT *tmp_root,
                        int *argc, char **argv, int report)
 {
   struct st_plugin_int tmp;
-  struct st_mysql_plugin *plugin;
+  struct st_maria_plugin *plugin;
   DBUG_ENTER("plugin_add");
   if (plugin_find_internal(name, MYSQL_ANY_PLUGIN))
   {
@@ -791,6 +998,17 @@ static bool plugin_add(MEM_ROOT *tmp_root,
         report_error(report, ER_CANT_OPEN_LIBRARY, dl->str, 0, buf);
         goto err;
       }
+      if (plugin_maturity_map[plugin->maturity] < plugin_maturity)
+      {
+        char buf[256];
+        strxnmov(buf, sizeof(buf) - 1, "Loading of ",
+                 plugin_maturity_names[plugin->maturity],
+                 " plugins is prohibited by --plugin-maturity=",
+                 plugin_maturity_names[plugin_maturity],
+                 NullS);
+        report_error(report, ER_CANT_OPEN_LIBRARY, dl->str, 0, buf);
+        goto err;
+      }
       tmp.plugin= plugin;
       tmp.name.str= (char *)plugin->name;
       tmp.name.length= name_len;
@@ -961,8 +1179,6 @@ static void intern_plugin_unlock(LEX *lex, plugin_ref plugin)
   my_free(plugin);
 #endif
 
-  DBUG_PRINT("info",("unlocking plugin, name= %s, ref_count= %d",
-                     pi->name.str, pi->ref_count));
   if (lex)
   {
     /*
@@ -982,6 +1198,9 @@ static void intern_plugin_unlock(LEX *lex, plugin_ref plugin)
   DBUG_ASSERT(pi->ref_count);
   pi->ref_count--;
 
+  DBUG_PRINT("lock",("thd: 0x%lx  plugin: \"%s\" UNLOCK ref_count: %d",
+                     (long) current_thd, pi->name.str, pi->ref_count));
+
   if (pi->state == PLUGIN_IS_DELETED && !pi->ref_count)
     reap_needed= true;
 
@@ -1012,6 +1231,9 @@ void plugin_unlock_list(THD *thd, plugin_ref *list, uint count)
 {
   LEX *lex= thd ? thd->lex : 0;
   DBUG_ENTER("plugin_unlock_list");
+  if (count == 0)
+    DBUG_VOID_RETURN;
+
   DBUG_ASSERT(list);
   mysql_mutex_lock(&LOCK_plugin);
   while (count--)
@@ -1172,8 +1394,8 @@ int plugin_init(int *argc, char **argv, int flags)
 {
   uint i;
   bool is_myisam;
-  struct st_mysql_plugin **builtins;
-  struct st_mysql_plugin *plugin;
+  struct st_maria_plugin **builtins;
+  struct st_maria_plugin *plugin;
   struct st_plugin_int tmp, *plugin_ptr, **reap;
   MEM_ROOT tmp_root;
   bool reaped_mandatory_plugin= false;
@@ -1232,6 +1454,7 @@ int plugin_init(int *argc, char **argv, int flags)
           !my_strnncoll(&my_charset_latin1, (const uchar*) plugin->name,
                         6, (const uchar*) "InnoDB", 6))
         continue;
+
       bzero(&tmp, sizeof(tmp));
       tmp.plugin= plugin;
       tmp.name.str= (char *)plugin->name;
@@ -1359,7 +1582,7 @@ err:
 }
 
 
-static bool register_builtin(struct st_mysql_plugin *plugin,
+static bool register_builtin(struct st_maria_plugin *plugin,
                              struct st_plugin_int *tmp,
                              struct st_plugin_int **ptr)
 {
@@ -1430,7 +1653,12 @@ static void plugin_load(MEM_ROOT *tmp_root, int *argc, char **argv)
     goto end;
   }
   table= tables.table;
-  init_read_record(&read_record_info, new_thd, table, NULL, 1, 0, FALSE);
+  if (init_read_record(&read_record_info, new_thd, table, NULL, 1, 0, FALSE))
+  {
+    sql_print_error("Could not initialize init_read_record; Plugins not "
+                    "loaded");
+    goto end;
+  }
   table->use_all_columns();
   /*
     there're no other threads running yet, so we don't need a mutex.
@@ -1476,7 +1704,7 @@ static bool plugin_load_list(MEM_ROOT *tmp_root, int *argc, char **argv,
   char buffer[FN_REFLEN];
   LEX_STRING name= {buffer, 0}, dl= {NULL, 0}, *str= &name;
   struct st_plugin_dl *plugin_dl;
-  struct st_mysql_plugin *plugin;
+  struct st_maria_plugin *plugin;
   char *p= buffer;
   DBUG_ENTER("plugin_load_list");
   while (list)
@@ -1744,9 +1972,10 @@ bool mysql_install_plugin(THD *thd, const LEX_STRING *name, const LEX_STRING *dl
 
   if (tmp->state == PLUGIN_IS_DISABLED)
   {
-    push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
-                        ER_CANT_INITIALIZE_UDF, ER(ER_CANT_INITIALIZE_UDF),
-                        name->str, "Plugin is disabled");
+    if (global_system_variables.log_warnings)
+      push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+                          ER_CANT_INITIALIZE_UDF, ER(ER_CANT_INITIALIZE_UDF),
+                          name->str, "Plugin is disabled");
   }
   else
   {
@@ -1859,8 +2088,8 @@ bool mysql_uninstall_plugin(THD *thd, const LEX_STRING *name)
   table->field[0]->store(name->str, name->length, system_charset_info);
   key_copy(user_key, table->record[0], table->key_info,
            table->key_info->key_length);
-  if (! table->file->index_read_idx_map(table->record[0], 0, user_key,
-                                        HA_WHOLE_KEY, HA_READ_KEY_EXACT))
+  if (! table->file->ha_index_read_idx_map(table->record[0], 0, user_key,
+                                           HA_WHOLE_KEY, HA_READ_KEY_EXACT))
   {
     int error;
     /*
@@ -3315,7 +3544,6 @@ static int test_plugin_options(MEM_ROOT *tmp_root, struct st_plugin_int *tmp,
   struct sys_var_chain chain= { NULL, NULL };
   bool disable_plugin;
   enum_plugin_load_policy plugin_load_policy= tmp->is_mandatory ? PLUGIN_FORCE : PLUGIN_ON;
-
   MEM_ROOT *mem_root= alloc_root_inited(&tmp->mem_root) ?
                       &tmp->mem_root : &plugin_mem_root;
   st_mysql_sys_var **opt;
@@ -3329,13 +3557,13 @@ static int test_plugin_options(MEM_ROOT *tmp_root, struct st_plugin_int *tmp,
   DBUG_ENTER("test_plugin_options");
   DBUG_ASSERT(tmp->plugin && tmp->name.str);
 
+#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE
   /*
-    The 'federated' and 'ndbcluster' storage engines are always disabled by
-    default.
+    The 'ndbcluster' storage engines is always disabled by default.
   */
-  if (!(my_strcasecmp(&my_charset_latin1, tmp->name.str, "federated") &&
-      my_strcasecmp(&my_charset_latin1, tmp->name.str, "ndbcluster")))
+  if (!my_strcasecmp(&my_charset_latin1, tmp->name.str, "ndbcluster"))
     plugin_load_policy= PLUGIN_OFF;
+#endif
 
   for (opt= tmp->plugin->system_vars; opt && *opt; opt++)
     count+= 2; /* --{plugin}-{optname} and --plugin-{plugin}-{optname} */
diff --git a/sql/sql_plugin.h b/sql/sql_plugin.h
index 079dc4e6dca..756328579f1 100644
--- a/sql/sql_plugin.h
+++ b/sql/sql_plugin.h
@@ -21,7 +21,7 @@
   that is defined in plugin.h
 */
 #define SHOW_always_last SHOW_KEY_CACHE_LONG, \
-            SHOW_KEY_CACHE_LONGLONG, SHOW_LONG_STATUS, SHOW_DOUBLE_STATUS, \
+            SHOW_LONG_STATUS, SHOW_DOUBLE_STATUS, \
             SHOW_HAVE, SHOW_MY_BOOL, SHOW_HA_ROWS, SHOW_SYS, \
             SHOW_LONG_NOFLUSH, SHOW_LONGLONG_STATUS, SHOW_LEX_STRING
 #include <mysql/plugin.h>
@@ -78,8 +78,10 @@ struct st_plugin_dl
 {
   LEX_STRING dl;
   void *handle;
-  struct st_mysql_plugin *plugins;
-  int version;
+  struct st_maria_plugin *plugins;
+  int mysqlversion;
+  int mariaversion;
+  bool   allocated;
   uint ref_count;            /* number of plugins loaded from the library */
 };
 
@@ -88,7 +90,7 @@ struct st_plugin_dl
 struct st_plugin_int
 {
   LEX_STRING name;
-  struct st_mysql_plugin *plugin;
+  struct st_maria_plugin *plugin;
   struct st_plugin_dl *plugin_dl;
   uint state;
   uint ref_count;               /* number of threads using the plugin */
@@ -105,6 +107,8 @@ struct st_plugin_int
 */
 #ifdef DBUG_OFF
 typedef struct st_plugin_int *plugin_ref;
+#define plugin_ref_to_int(A) A
+#define plugin_int_to_ref(A) A
 #define plugin_decl(pi) ((pi)->plugin)
 #define plugin_dlib(pi) ((pi)->plugin_dl)
 #define plugin_data(pi,cast) ((cast)((pi)->data))
@@ -113,6 +117,8 @@ typedef struct st_plugin_int *plugin_ref;
 #define plugin_equals(p1,p2) ((p1) == (p2))
 #else
 typedef struct st_plugin_int **plugin_ref;
+#define plugin_ref_to_int(A) (A ? A[0] : NULL)
+#define plugin_int_to_ref(A) &(A)
 #define plugin_decl(pi) ((pi)[0]->plugin)
 #define plugin_dlib(pi) ((pi)[0]->plugin_dl)
 #define plugin_data(pi,cast) ((cast)((pi)[0]->data))
@@ -127,6 +133,9 @@ extern char *opt_plugin_load;
 extern char *opt_plugin_dir_ptr;
 extern char opt_plugin_dir[FN_REFLEN];
 extern const LEX_STRING plugin_type_names[];
+extern uint plugin_maturity;
+extern TYPELIB plugin_maturity_values;
+extern const char *plugin_maturity_names[];
 
 extern int plugin_init(int *argc, char **argv, int init_flags);
 extern void plugin_shutdown(void);
@@ -136,7 +145,7 @@ extern bool plugin_is_ready(const LEX_STRING *name, int type);
 #define my_plugin_lock_by_name_ci(A,B,C) plugin_lock_by_name(A,B,C)
 #define my_plugin_lock(A,B) plugin_lock(A,B)
 #define my_plugin_lock_ci(A,B) plugin_lock(A,B)
-extern plugin_ref plugin_lock(THD *thd, plugin_ref *ptr);
+extern plugin_ref plugin_lock(THD *thd, plugin_ref ptr CALLER_INFO_PROTO);
 extern plugin_ref plugin_lock_by_name(THD *thd, const LEX_STRING *name,
                                       int type);
 extern void plugin_unlock(THD *thd, plugin_ref plugin);
diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc
index fcbf2c48896..9cd5d498c14 100644
--- a/sql/sql_prepare.cc
+++ b/sql/sql_prepare.cc
@@ -1245,6 +1245,8 @@ static bool mysql_test_insert(Prepared_statement *stmt,
   if (insert_precheck(thd, table_list))
     goto error;
 
+  upgrade_lock_type_for_insert(thd, &table_list->lock_type, duplic,
+                               values_list.elements > 1);
   /*
     open temporary memory pool for temporary data allocated by derived
     tables & preparation procedure
@@ -1475,6 +1477,7 @@ static int mysql_test_select(Prepared_statement *stmt,
     goto error;
 
   thd->used_tables= 0;                        // Updated by setup_fields
+  thd->thd_marker.emb_on_expr_nest= 0;
 
   /*
     JOIN::prepare calls
@@ -2167,14 +2170,13 @@ void mysqld_stmt_prepare(THD *thd, const char *packet, uint packet_length)
   Protocol *save_protocol= thd->protocol;
   Prepared_statement *stmt;
   DBUG_ENTER("mysqld_stmt_prepare");
-
   DBUG_PRINT("prep_query", ("%s", packet));
 
   /* First of all clear possible warnings from the previous command */
-  mysql_reset_thd_for_next_command(thd);
+  mysql_reset_thd_for_next_command(thd, opt_userstat_running);
 
   if (! (stmt= new Prepared_statement(thd)))
-    DBUG_VOID_RETURN; /* out of memory: error is set in Sql_alloc */
+    goto end;           /* out of memory: error is set in Sql_alloc */
 
   if (thd->stmt_map.insert(thd, stmt))
   {
@@ -2182,7 +2184,7 @@ void mysqld_stmt_prepare(THD *thd, const char *packet, uint packet_length)
       The error is set in the insert. The statement itself
       will be also deleted there (this is how the hash works).
     */
-    DBUG_VOID_RETURN;
+    goto end;
   }
 
   thd->protocol= &thd->protocol_binary;
@@ -2196,6 +2198,7 @@ void mysqld_stmt_prepare(THD *thd, const char *packet, uint packet_length)
   thd->protocol= save_protocol;
 
   /* check_prepared_statemnt sends the metadata packet in case of success */
+end:
   DBUG_VOID_RETURN;
 }
 
@@ -2242,7 +2245,7 @@ static const char *get_dynamic_sql_string(LEX *lex, uint *query_len)
                                          lex->prepared_stmt_code.length))
         && entry->value)
     {
-      my_bool is_var_null;
+      bool is_var_null;
       var_value= entry->val_str(&is_var_null, &str, NOT_FIXED_DEC);
       /*
         NULL value of variable checked early as entry->value so here
@@ -2551,7 +2554,7 @@ void mysqld_stmt_execute(THD *thd, char *packet_arg, uint packet_length)
   packet+= 9;                               /* stmt_id + 5 bytes of flags */
 
   /* First of all clear possible warnings from the previous command */
-  mysql_reset_thd_for_next_command(thd);
+  mysql_reset_thd_for_next_command(thd, opt_userstat_running);
 
   if (!(stmt= find_prepared_statement(thd, stmt_id)))
   {
@@ -2647,7 +2650,8 @@ void mysqld_stmt_fetch(THD *thd, char *packet, uint packet_length)
   DBUG_ENTER("mysqld_stmt_fetch");
 
   /* First of all clear possible warnings from the previous command */
-  mysql_reset_thd_for_next_command(thd);
+  mysql_reset_thd_for_next_command(thd, opt_userstat_running);
+
   status_var_increment(thd->status_var.com_stmt_fetch);
   if (!(stmt= find_prepared_statement(thd, stmt_id)))
   {
@@ -2706,7 +2710,7 @@ void mysqld_stmt_reset(THD *thd, char *packet)
   DBUG_ENTER("mysqld_stmt_reset");
 
   /* First of all clear possible warnings from the previous command */
-  mysql_reset_thd_for_next_command(thd);
+  mysql_reset_thd_for_next_command(thd, opt_userstat_running);
 
   status_var_increment(thd->status_var.com_stmt_reset);
   if (!(stmt= find_prepared_statement(thd, stmt_id)))
@@ -3493,7 +3497,7 @@ Prepared_statement::execute_server_runnable(Server_runnable *server_runnable)
 bool
 Prepared_statement::reprepare()
 {
-  char saved_cur_db_name_buf[NAME_LEN+1];
+  char saved_cur_db_name_buf[SAFE_NAME_LEN+1];
   LEX_STRING saved_cur_db_name=
     { saved_cur_db_name_buf, sizeof(saved_cur_db_name_buf) };
   LEX_STRING stmt_db_name= { db, db_length };
@@ -3653,7 +3657,7 @@ bool Prepared_statement::execute(String *expanded_query, bool open_cursor)
   Query_arena *old_stmt_arena;
   bool error= TRUE;
 
-  char saved_cur_db_name_buf[NAME_LEN+1];
+  char saved_cur_db_name_buf[SAFE_NAME_LEN+1];
   LEX_STRING saved_cur_db_name=
     { saved_cur_db_name_buf, sizeof(saved_cur_db_name_buf) };
   bool cur_db_changed;
diff --git a/sql/sql_priv.h b/sql/sql_priv.h
index f0f6a1969f5..d36f62ebc5c 100644
--- a/sql/sql_priv.h
+++ b/sql/sql_priv.h
@@ -154,15 +154,37 @@
 #define OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION    (1ULL << 2)
 #define OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT     (1ULL << 3)
 #define OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN (1ULL << 4)
-#define OPTIMIZER_SWITCH_LAST                      (1ULL << 5)
-
-/* The following must be kept in sync with optimizer_switch_str in mysqld.cc */
-#define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \
-                                  OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \
-                                  OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION | \
-                                  OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT | \
-                                  OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN)
+#define OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN       (1ULL << 5)
+#define OPTIMIZER_SWITCH_FIRSTMATCH                (1ULL << 6)
+#define OPTIMIZER_SWITCH_LOOSE_SCAN                (1ULL << 7)
+#define OPTIMIZER_SWITCH_MATERIALIZATION           (1ULL << 8)
+#define OPTIMIZER_SWITCH_SEMIJOIN                  (1ULL << 9)
+#define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE (1ULL <<10)
+#define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN  (1ULL <<11)
+#define OPTIMIZER_SWITCH_SUBQUERY_CACHE            (1ULL <<12)
+#define OPTIMIZER_SWITCH_TABLE_ELIMINATION         (1ULL <<13)
+#define OPTIMIZER_SWITCH_LAST                      (1ULL <<14)
+
+#ifdef DBUG_OFF
+#define DBUG_ONLY_TABLE_ELIMINATION 0
+#else
+#define DBUG_ONLY_TABLE_ELIMINATION OPTIMIZER_SWITCH_TABLE_ELIMINATION
+#endif
 
+#  define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \
+                                    OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \
+                                    OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION | \
+                                    OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT | \
+                                    OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN |\
+                                    OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN | \
+                                    OPTIMIZER_SWITCH_FIRSTMATCH | \
+                                    OPTIMIZER_SWITCH_LOOSE_SCAN | \
+                                    OPTIMIZER_SWITCH_MATERIALIZATION | \
+                                    OPTIMIZER_SWITCH_SEMIJOIN | \
+                                    OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
+                                    OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\
+                                    OPTIMIZER_SWITCH_SUBQUERY_CACHE |\
+                                    DBUG_ONLY_TABLE_ELIMINATION)
 
 /*
   Replication uses 8 bytes to store SQL_MODE in the binary log. The day you
@@ -208,7 +230,9 @@ enum enum_parsing_place
   IN_HAVING,
   SELECT_LIST,
   IN_WHERE,
-  IN_ON
+  IN_ON,
+  IN_GROUP_BY,
+  PARSING_PLACE_SIZE /* always should be the last */
 };
 
 
@@ -225,10 +249,6 @@ enum enum_yes_no_unknown
 };
 
 #ifdef MYSQL_SERVER
-
-#endif /* MYSQL_SERVER */
-
-#ifdef MYSQL_SERVER
 /*
   A set of constants used for checking non aggregated fields and sum
   functions mixture in the ONLY_FULL_GROUP_BY_MODE.
diff --git a/sql/sql_profile.cc b/sql/sql_profile.cc
index ce3d786cf92..dad290bdd3a 100644
--- a/sql/sql_profile.cc
+++ b/sql/sql_profile.cc
@@ -37,7 +37,7 @@
 #include "sql_class.h"                    // THD
 
 #define TIME_FLOAT_DIGITS 9
-/** two vals encoded: (dec*100)+len */
+/** two vals encoded: (len*100)+dec */
 #define TIME_I_S_DECIMAL_SIZE (TIME_FLOAT_DIGITS*100)+(TIME_FLOAT_DIGITS-3)
 
 #define MAX_QUERY_LENGTH 300
@@ -250,6 +250,8 @@ void PROF_MEASUREMENT::collect()
   // which is typically ~15ms. So intervals shorter than that will not be
   // measurable by this function.
   GetProcessTimes(GetCurrentProcess(), &ftDummy, &ftDummy, &ftKernel, &ftUser);
+  GetProcessIoCounters(GetCurrentProcess(), &io_count);
+  GetProcessMemoryInfo(GetCurrentProcess(), &mem_count, sizeof(mem_count));
 #endif
 }
 
@@ -657,6 +659,17 @@ int PROFILING::fill_statistics_info(THD *thd_arg, TABLE_LIST *tables, Item *cond
       table->field[9]->store((uint32)(entry->rusage.ru_oublock -
                              previous->rusage.ru_oublock));
       table->field[9]->set_notnull();
+#elif defined(__WIN__)
+      ULONGLONG reads_delta = entry->io_count.ReadOperationCount - 
+                              previous->io_count.ReadOperationCount;
+      ULONGLONG writes_delta = entry->io_count.WriteOperationCount - 
+                              previous->io_count.WriteOperationCount;
+
+      table->field[8]->store((uint32)reads_delta);
+      table->field[8]->set_notnull();
+
+      table->field[9]->store((uint32)writes_delta);
+      table->field[9]->set_notnull();
 #else
       /* TODO: Add block IO info for non-BSD systems */
 #endif
@@ -679,6 +692,13 @@ int PROFILING::fill_statistics_info(THD *thd_arg, TABLE_LIST *tables, Item *cond
       table->field[13]->store((uint32)(entry->rusage.ru_minflt -
                              previous->rusage.ru_minflt), true);
       table->field[13]->set_notnull();
+#elif defined(__WIN__)
+      /* Windows APIs don't easily distinguish between hard and soft page
+         faults, so we just fill the 'major' column and leave the second NULL.
+      */
+      table->field[12]->store((uint32)(entry->mem_count.PageFaultCount -
+                             previous->mem_count.PageFaultCount), true);
+      table->field[12]->set_notnull();
 #else
       /* TODO: Add page fault info for non-BSD systems */
 #endif
diff --git a/sql/sql_profile.h b/sql/sql_profile.h
index 7d17dc69b88..0931fb0df0c 100644
--- a/sql/sql_profile.h
+++ b/sql/sql_profile.h
@@ -43,6 +43,10 @@ int make_profile_table_for_show(THD *thd, ST_SCHEMA_TABLE *schema_table);
 #include "sql_priv.h"
 #include "unireg.h"
 
+#ifdef __WIN__
+#include <psapi.h>
+#endif
+
 #ifdef HAVE_SYS_RESOURCE_H
 #include <sys/resource.h>
 #endif
@@ -174,6 +178,8 @@ private:
   struct rusage rusage;
 #elif defined(_WIN32)
   FILETIME ftKernel, ftUser;
+  IO_COUNTERS io_count;
+  PROCESS_MEMORY_COUNTERS mem_count;
 #endif
 
   char *function;
diff --git a/sql/sql_reload.cc b/sql/sql_reload.cc
index bf38af78536..ea4f53187f9 100644
--- a/sql/sql_reload.cc
+++ b/sql/sql_reload.cc
@@ -274,6 +274,35 @@ bool reload_acl_and_cache(THD *thd, unsigned long options,
 #endif
  if (options & REFRESH_USER_RESOURCES)
    reset_mqh((LEX_USER *) NULL, 0);             /* purecov: inspected */
+  if (options & REFRESH_TABLE_STATS)
+  {
+    mysql_mutex_lock(&LOCK_global_table_stats);
+    free_global_table_stats();
+    init_global_table_stats();
+    mysql_mutex_unlock(&LOCK_global_table_stats);
+  }
+  if (options & REFRESH_INDEX_STATS)
+  {
+    mysql_mutex_lock(&LOCK_global_index_stats);
+    free_global_index_stats();
+    init_global_index_stats();
+    mysql_mutex_unlock(&LOCK_global_index_stats);
+  }
+  if (options & (REFRESH_USER_STATS | REFRESH_CLIENT_STATS))
+  {
+    mysql_mutex_lock(&LOCK_global_user_client_stats);
+    if (options & REFRESH_USER_STATS)
+    {
+      free_global_user_stats();
+      init_global_user_stats();
+    }
+    if (options & REFRESH_CLIENT_STATS)
+    {
+      free_global_client_stats();
+      init_global_client_stats();
+    }
+    mysql_mutex_unlock(&LOCK_global_user_client_stats);
+  }
  *write_to_binlog= tmp_write_to_binlog;
  /*
    If the query was killed then this function must fail.
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 2a2fe3eb36f..32387d676c9 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -1,4 +1,5 @@
 /* Copyright (c) 2000, 2010 Oracle and/or its affiliates. All rights reserved.
+   Copyright (c) 2009-2010 Monty Program Ab
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -47,10 +48,18 @@
 #include "records.h"             // init_read_record, end_read_record
 #include "filesort.h"            // filesort_free_buffers
 #include "sql_union.h"           // mysql_union
+#include "opt_subselect.h"
+
 #include <m_ctype.h>
 #include <my_bit.h>
 #include <hash.h>
 #include <ft_global.h>
+//#if defined(WITH_ARIA_STORAGE_ENGINE) && defined(USE_MARIA_FOR_TMP_TABLES)
+//#include "../storage/maria/ha_maria.h"
+//#define TMP_ENGINE_HTON maria_hton
+//#else
+//#define TMP_ENGINE_HTON myisam_hton
+//#endif
 
 #define PREV_BITS(type,A)	((type) (((type) 1 << (A)) -1))
 
@@ -72,14 +81,14 @@ static bool update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,
                                 table_map table_map, SELECT_LEX *select_lex,
                                 st_sargable_param **sargables);
 static int sort_keyuse(KEYUSE *a,KEYUSE *b);
-static void set_position(JOIN *join,uint index,JOIN_TAB *table,KEYUSE *key);
 static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
 			       table_map used_tables);
-static bool choose_plan(JOIN *join,table_map join_tables);
+bool choose_plan(JOIN *join,table_map join_tables);
 
-static void best_access_path(JOIN *join, JOIN_TAB *s, THD *thd,
-                             table_map remaining_tables, uint idx,
-                             double record_count, double read_time);
+void best_access_path(JOIN *join, JOIN_TAB *s, 
+                             table_map remaining_tables, uint idx, 
+                             bool disable_jbuf, double record_count,
+                             POSITION *pos, POSITION *loose_scan_pos);
 static void optimize_straight_join(JOIN *join, table_map join_tables);
 static bool greedy_search(JOIN *join, table_map remaining_tables,
                              uint depth, uint prune_level);
@@ -90,8 +99,9 @@ static bool best_extension_by_limited_search(JOIN *join,
                                              uint prune_level);
 static uint determine_search_depth(JOIN* join);
 C_MODE_START
-static int join_tab_cmp(const void* ptr1, const void* ptr2);
-static int join_tab_cmp_straight(const void* ptr1, const void* ptr2);
+static int join_tab_cmp(const void *dummy, const void* ptr1, const void* ptr2);
+static int join_tab_cmp_straight(const void *dummy, const void* ptr1, const void* ptr2);
+static int join_tab_cmp_embedded_first(const void *emb, const void* ptr1, const void *ptr2);
 C_MODE_END
 /*
   TODO: 'find_best' is here only temporarily until 'greedy_search' is
@@ -107,8 +117,10 @@ static store_key *get_store_key(THD *thd,
 				KEY_PART_INFO *key_part, uchar *key_buff,
 				uint maybe_null);
 static void make_outerjoin_info(JOIN *join);
+static Item*
+make_cond_after_sjm(Item *root_cond, Item *cond, table_map tables, table_map sjm_tables);
 static bool make_join_select(JOIN *join,SQL_SELECT *select,COND *item);
-static void make_join_readinfo(JOIN *join, ulonglong options);
+static bool make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after);
 static bool only_eq_ref_tables(JOIN *join, ORDER *order, table_map tables);
 static void update_depend_map(JOIN *join);
 static void update_depend_map(JOIN *join, ORDER *order);
@@ -126,18 +138,21 @@ static COND* substitute_for_best_equal_field(COND *cond,
                                              COND_EQUAL *cond_equal,
                                              void *table_join_idx);
 static COND *simplify_joins(JOIN *join, List<TABLE_LIST> *join_list,
-                            COND *conds, bool top);
+                            COND *conds, bool top, bool in_sj);
 static bool check_interleaving_with_nj(JOIN_TAB *next);
 static void restore_prev_nj_state(JOIN_TAB *last);
-static void reset_nj_counters(List<TABLE_LIST> *join_list);
+static uint reset_nj_counters(JOIN *join, List<TABLE_LIST> *join_list);
 static uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list,
                                           uint first_unused);
 
 static COND *optimize_cond(JOIN *join, COND *conds,
                            List<TABLE_LIST> *join_list,
-			   Item::cond_result *cond_value);
-static bool open_tmp_table(TABLE *table);
-static bool create_myisam_tmp_table(TABLE *,TMP_TABLE_PARAM *, ulonglong, my_bool);
+                           Item::cond_result *cond_value, 
+                           COND_EQUAL **cond_equal);
+static bool const_expression_in_where(COND *conds,Item *item, Item **comp_item);
+static bool create_internal_tmp_table_from_heap2(THD *, TABLE *,
+                                     ENGINE_COLUMNDEF *, ENGINE_COLUMNDEF **, 
+                                     int, bool, handlerton *, const char *);
 static int do_select(JOIN *join,List<Item> *fields,TABLE *tmp_table,
 		     Procedure *proc);
 
@@ -147,10 +162,8 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
 static enum_nested_loop_state
 evaluate_null_complemented_join_record(JOIN *join, JOIN_TAB *join_tab);
 static enum_nested_loop_state
-flush_cached_records(JOIN *join, JOIN_TAB *join_tab, bool skip_last);
-static enum_nested_loop_state
 end_send(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
-static enum_nested_loop_state
+enum_nested_loop_state
 end_send_group(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
 static enum_nested_loop_state
 end_write(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
@@ -158,7 +171,7 @@ static enum_nested_loop_state
 end_update(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
 static enum_nested_loop_state
 end_unique_update(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
-static enum_nested_loop_state
+enum_nested_loop_state
 end_write_group(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
 
 static int test_if_group_changed(List<Cached_item> &list);
@@ -173,7 +186,7 @@ static int join_no_more_records(READ_RECORD *info);
 static int join_read_next(READ_RECORD *info);
 static int join_init_quick_read_record(JOIN_TAB *tab);
 static int test_if_quick_select(JOIN_TAB *tab);
-static int join_init_read_record(JOIN_TAB *tab);
+static bool test_if_use_dynamic_range_scan(JOIN_TAB *join_tab);
 static int join_read_first(JOIN_TAB *tab);
 static int join_read_next(READ_RECORD *info);
 static int join_read_next_same(READ_RECORD *info);
@@ -184,8 +197,14 @@ static int join_ft_read_first(JOIN_TAB *tab);
 static int join_ft_read_next(READ_RECORD *info);
 int join_read_always_key_or_null(JOIN_TAB *tab);
 int join_read_next_same_or_null(READ_RECORD *info);
-static COND *make_cond_for_table(COND *cond,table_map table,
-				 table_map used_table);
+static COND *make_cond_for_table(Item *cond,table_map table,
+				 table_map used_table,
+                                 bool exclude_expensive_cond);
+static COND *make_cond_for_table_from_pred(Item *root_cond, Item *cond,
+                                           table_map tables,
+                                           table_map used_table,
+                                           bool exclude_expensive_cond);
+
 static Item* part_of_refkey(TABLE *form,Field *field);
 uint find_shortest_key(TABLE *table, const key_map *usable_keys);
 static bool test_if_cheaper_ordering(const JOIN_TAB *tab,
@@ -198,7 +217,7 @@ static bool test_if_cheaper_ordering(const JOIN_TAB *tab,
                                      uint *saved_best_key_parts= NULL);
 static bool test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,
 				    ha_rows select_limit, bool no_changes,
-                                    key_map *map);
+                                    const key_map *map);
 static bool list_contains_unique_index(TABLE *table,
                           bool (*find_func) (Field *, void *), void *data);
 static bool find_field_in_item_list (Field *field, void *data);
@@ -212,15 +231,8 @@ static int remove_dup_with_compare(THD *thd, TABLE *entry, Field **field,
 				   ulong offset,Item *having);
 static int remove_dup_with_hash_index(THD *thd,TABLE *table,
 				      uint field_count, Field **first_field,
-
 				      ulong key_length,Item *having);
-static int join_init_cache(THD *thd,JOIN_TAB *tables,uint table_count);
-static ulong used_blob_length(CACHE_FIELD **ptr);
-static bool store_record_in_cache(JOIN_CACHE *cache);
-static void reset_cache_read(JOIN_CACHE *cache);
-static void reset_cache_write(JOIN_CACHE *cache);
-static void read_cached_record(JOIN_TAB *tab);
-static bool cmp_buffer_with_ref(JOIN_TAB *tab);
+static bool cmp_buffer_with_ref(THD *thd, TABLE *table, TABLE_REF *tab_ref);
 static bool setup_new_fields(THD *thd, List<Item> &fields,
 			     List<Item> &all_fields, ORDER *new_order);
 static ORDER *create_distinct_group(THD *thd, Item **ref_pointer_array,
@@ -252,10 +264,15 @@ static bool init_sum_functions(Item_sum **func, Item_sum **end);
 static bool update_sum_func(Item_sum **func);
 static void select_describe(JOIN *join, bool need_tmp_table,bool need_order,
 			    bool distinct, const char *message=NullS);
-static Item *remove_additional_cond(Item* conds);
 static void add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab);
-static bool test_if_ref(Item_field *left_item,Item *right_item);
+void get_partial_join_cost(JOIN *join, uint idx, double *read_time_arg,
+                           double *record_count_arg);
+static uint make_join_orderinfo(JOIN *join);
+static int
+join_read_record_no_init(JOIN_TAB *tab);
 
+Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field,
+                            bool *inherited_fl);
 
 /**
   This handles SELECT with and without UNION.
@@ -334,8 +351,8 @@ bool handle_select(THD *thd, LEX *lex, select_result *result,
       function is aggregated in the select where the outer field was
       resolved or in some more inner select then the Item_direct_ref
       class should be used.
-      Also it should be used if we are grouping by a subquery containing
-      the outer field.
+      It used used also if we are grouping by a subquery that refers
+      this outer field.
     The resolution is done here and not at the fix_fields() stage as
     it can be done only after sum functions are fixed and pulled up to
     selects where they are have to be aggregated.
@@ -352,13 +369,25 @@ bool handle_select(THD *thd, LEX *lex, select_result *result,
 
 bool
 fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
-                 Item **ref_pointer_array, ORDER *group_list)
+                 Item **ref_pointer_array)
 {
   Item_outer_ref *ref;
   bool res= FALSE;
   bool direct_ref= FALSE;
 
-  List_iterator<Item_outer_ref> ref_it(select->inner_refs_list);
+  /*
+    Mark the references from  the inner_refs_list that are occurred in
+    the group by expressions. Those references will contain direct
+    references to the referred fields. The markers are set in 
+    the found_in_group_by field of the references from the list.
+  */
+  List_iterator_fast <Item_outer_ref> ref_it(select->inner_refs_list);
+  for (ORDER *group= select->join->group_list; group;  group= group->next)
+  {
+    (*group->item)->walk(&Item::check_inner_refs_processor,
+                         TRUE, (uchar *) &ref_it);
+  } 
+    
   while ((ref= ref_it++))
   {
     Item *item= ref->outer_ref;
@@ -402,22 +431,9 @@ fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
         }
       }
     }
-    else
-    {
-      /*
-        Check if GROUP BY item trees contain the outer ref:
-        in this case we have to use Item_direct_ref instead of Item_ref.
-      */
-      for (ORDER *group= group_list; group; group= group->next)
-      {
-        if ((*group->item)->walk(&Item::find_item_processor, TRUE,
-                                 (uchar *) ref))
-        {
-          direct_ref= TRUE;
-          break;
-        }
-      }
-    }
+    else if (ref->found_in_group_by)
+      direct_ref= TRUE;
+
     new_ref= direct_ref ?
               new Item_direct_ref(ref->context, item_ref, ref->table_name,
                           ref->field_name, ref->alias_name_used) :
@@ -479,6 +495,7 @@ inline int setup_without_group(THD *thd, Item **ref_pointer_array,
   mysql_select assumes that all tables are already opened
 *****************************************************************************/
 
+
 /**
   Prepare of whole select (including sub queries in future).
 
@@ -565,24 +582,13 @@ JOIN::prepare(Item ***rref_pointer_array,
       DBUG_RETURN(-1);				/* purecov: inspected */
     thd->lex->allow_sum_func= save_allow_sum_func;
   }
-
-  if (!thd->lex->view_prepare_mode && !(select_options & SELECT_DESCRIBE))
-  {
-    Item_subselect *subselect;
-    /* Is it subselect? */
-    if ((subselect= select_lex->master_unit()->item))
-    {
-      Item_subselect::trans_res res;
-      if ((res= subselect->select_transformer(this)) !=
-	  Item_subselect::RES_OK)
-      {
-        select_lex->fix_prepare_information(thd, &conds, &having);
-	DBUG_RETURN((res == Item_subselect::RES_ERROR));
-      }
-    }
-  }
+  
+  int res= check_and_do_in_subquery_rewrites(this);
 
   select_lex->fix_prepare_information(thd, &conds, &having);
+  
+  if (res)
+    DBUG_RETURN(res);
 
   if (order)
   {
@@ -632,8 +638,7 @@ JOIN::prepare(Item ***rref_pointer_array,
   }
 
   if (select_lex->inner_refs_list.elements &&
-      fix_inner_refs(thd, all_fields, select_lex, ref_pointer_array,
-                     group_list))
+      fix_inner_refs(thd, all_fields, select_lex, ref_pointer_array))
     DBUG_RETURN(-1);
 
   if (group_list)
@@ -750,88 +755,6 @@ err:
 }
 
 
-/*
-  Remove the predicates pushed down into the subquery
-
-  SYNOPSIS
-    JOIN::remove_subq_pushed_predicates()
-      where   IN  Must be NULL
-              OUT The remaining WHERE condition, or NULL
-
-  DESCRIPTION
-    Given that this join will be executed using (unique|index)_subquery,
-    without "checking NULL", remove the predicates that were pushed down
-    into the subquery.
-
-    If the subquery compares scalar values, we can remove the condition that
-    was wrapped into trig_cond (it will be checked when needed by the subquery
-    engine)
-
-    If the subquery compares row values, we need to keep the wrapped
-    equalities in the WHERE clause: when the left (outer) tuple has both NULL
-    and non-NULL values, we'll do a full table scan and will rely on the
-    equalities corresponding to non-NULL parts of left tuple to filter out
-    non-matching records.
-
-    TODO: We can remove the equalities that will be guaranteed to be true by the
-    fact that subquery engine will be using index lookup. This must be done only
-    for cases where there are no conversion errors of significance, e.g. 257
-    that is searched in a byte. But this requires homogenization of the return 
-    codes of all Field*::store() methods.
-*/
-
-void JOIN::remove_subq_pushed_predicates(Item **where)
-{
-  if (conds->type() == Item::FUNC_ITEM &&
-      ((Item_func *)this->conds)->functype() == Item_func::EQ_FUNC &&
-      ((Item_func *)conds)->arguments()[0]->type() == Item::REF_ITEM &&
-      ((Item_func *)conds)->arguments()[1]->type() == Item::FIELD_ITEM &&
-      test_if_ref ((Item_field *)((Item_func *)conds)->arguments()[1],
-                   ((Item_func *)conds)->arguments()[0]))
-  {
-    *where= 0;
-    return;
-  }
-}
-
-
-/*
-  Index lookup-based subquery: save some flags for EXPLAIN output
-
-  SYNOPSIS
-    save_index_subquery_explain_info()
-      join_tab  Subquery's join tab (there is only one as index lookup is
-                only used for subqueries that are single-table SELECTs)
-      where     Subquery's WHERE clause
-
-  DESCRIPTION
-    For index lookup-based subquery (i.e. one executed with
-    subselect_uniquesubquery_engine or subselect_indexsubquery_engine),
-    check its EXPLAIN output row should contain 
-      "Using index" (TAB_INFO_FULL_SCAN_ON_NULL) 
-      "Using Where" (TAB_INFO_USING_WHERE)
-      "Full scan on NULL key" (TAB_INFO_FULL_SCAN_ON_NULL)
-    and set appropriate flags in join_tab->packed_info.
-*/
-
-static void save_index_subquery_explain_info(JOIN_TAB *join_tab, Item* where)
-{
-  join_tab->packed_info= TAB_INFO_HAVE_VALUE;
-  if (join_tab->table->covering_keys.is_set(join_tab->ref.key))
-    join_tab->packed_info |= TAB_INFO_USING_INDEX;
-  if (where)
-    join_tab->packed_info |= TAB_INFO_USING_WHERE;
-  for (uint i = 0; i < join_tab->ref.key_parts; i++)
-  {
-    if (join_tab->ref.cond_guards[i])
-    {
-      join_tab->packed_info |= TAB_INFO_FULL_SCAN_ON_NULL;
-      break;
-    }
-  }
-}
-
-
 /**
   global select optimisation.
 
@@ -847,6 +770,9 @@ static void save_index_subquery_explain_info(JOIN_TAB *join_tab, Item* where)
 int
 JOIN::optimize()
 {
+  ulonglong select_opts_for_readinfo;
+  uint no_jbuf_after;
+
   DBUG_ENTER("JOIN::optimize");
   // to prevent double initialization on EXPLAIN
   if (optimized)
@@ -854,6 +780,12 @@ JOIN::optimize()
   optimized= 1;
 
   thd_proc_info(thd, "optimizing");
+
+  /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
+  if (convert_join_subqueries_to_semijoins(this))
+    DBUG_RETURN(1); /* purecov: inspected */
+  /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
+
   row_limit= ((select_distinct || order || group_list) ? HA_POS_ERROR :
 	      unit->select_limit_cnt);
   /* select_limit is used to decide if we are likely to scan the whole table */
@@ -903,7 +835,7 @@ JOIN::optimize()
     sel->first_cond_optimization= 0;
 
     /* Convert all outer joins to inner joins if possible */
-    conds= simplify_joins(this, join_list, conds, TRUE);
+    conds= simplify_joins(this, join_list, conds, TRUE, FALSE);
     build_bitmap_for_nested_joins(join_list, 0);
 
     sel->prep_where= conds ? conds->copy_andor_structure(thd) : 0;
@@ -912,7 +844,7 @@ JOIN::optimize()
       thd->restore_active_arena(arena, &backup);
   }
 
-  conds= optimize_cond(this, conds, join_list, &cond_value);   
+  conds= optimize_cond(this, conds, join_list, &cond_value, &cond_equal);   
   if (thd->is_error())
   {
     error= 1;
@@ -921,7 +853,7 @@ JOIN::optimize()
   }
 
   {
-    having= optimize_cond(this, having, join_list, &having_value);
+    having= optimize_cond(this, having, join_list, &having_value, &having_equal);
     if (thd->is_error())
     {
       error= 1;
@@ -942,7 +874,7 @@ JOIN::optimize()
                            "Impossible HAVING" : "Impossible WHERE";
       tables= 0;
       error= 0;
-      DBUG_RETURN(0);
+      goto setup_subq_exit;
     }
   }
 
@@ -992,7 +924,7 @@ JOIN::optimize()
 	zero_result_cause= "No matching min/max row";
         tables= 0;
 	error=0;
-	DBUG_RETURN(0);
+        goto setup_subq_exit;
       }
       if (res > 1)
       {
@@ -1006,7 +938,7 @@ JOIN::optimize()
         zero_result_cause= "No matching min/max row";
         tables= 0;
         error=0;
-        DBUG_RETURN(0);
+        goto setup_subq_exit;
       }
       DBUG_PRINT("info",("Select tables optimized away"));
       zero_result_cause= "Select tables optimized away";
@@ -1025,19 +957,23 @@ JOIN::optimize()
       if (conds && !(thd->lex->describe & DESCRIBE_EXTENDED))
       {
         COND *table_independent_conds=
-          make_cond_for_table(conds, PSEUDO_TABLE_BITS, 0);
+          make_cond_for_table(conds, PSEUDO_TABLE_BITS, 0, FALSE);
         DBUG_EXECUTE("where",
                      print_where(table_independent_conds,
                                  "where after opt_sum_query()",
                                  QT_ORDINARY););
         conds= table_independent_conds;
       }
+      goto setup_subq_exit;
     }
   }
   if (!tables_list)
   {
     DBUG_PRINT("info",("No tables"));
     error= 0;
+    /* Create all structures needed for materialized subquery execution. */
+    if (setup_subquery_materialization())
+      DBUG_RETURN(1);
     DBUG_RETURN(0);
   }
   error= -1;					// Error is sent to client
@@ -1081,7 +1017,7 @@ JOIN::optimize()
     zero_result_cause= "no matching row in const table";
     DBUG_PRINT("error",("Error: %s", zero_result_cause));
     error= 0;
-    DBUG_RETURN(0);
+    goto setup_subq_exit;
   }
   if (!(thd->variables.option_bits & OPTION_BIG_SELECTS) &&
       best_read > (double) thd->variables.max_join_size &&
@@ -1108,7 +1044,7 @@ JOIN::optimize()
     DBUG_RETURN(1);
   }
   
-  reset_nj_counters(join_list);
+  reset_nj_counters(this, join_list);
   make_outerjoin_info(this);
 
   /*
@@ -1148,39 +1084,11 @@ JOIN::optimize()
     conds=new Item_int((longlong) 0,1);	// Always false
   }
 
-  /*
-    It's necessary to check const part of HAVING cond as
-    there is a chance that some cond parts may become
-    const items after make_join_statisctics(for example
-    when Item is a reference to cost table field from
-    outer join).
-    This check is performed only for those conditions
-    which do not use aggregate functions. In such case
-    temporary table may not be used and const condition
-    elements may be lost during further having
-    condition transformation in JOIN::exec.
-  */
-  if (having && const_table_map && !having->with_sum_func)
-  {
-    having->update_used_tables();
-    having= remove_eq_conds(thd, having, &having_value);
-    if (having_value == Item::COND_FALSE)
-    {
-      having= new Item_int((longlong) 0,1);
-      zero_result_cause= "Impossible HAVING noticed after reading const tables";
-      error= 0;
-      DBUG_RETURN(0);
-    }
-  }
-
-  /* Cache constant expressions in WHERE, HAVING, ON clauses. */
-  cache_const_exprs();
-
   if (make_join_select(this, select, conds))
   {
     zero_result_cause=
       "Impossible WHERE noticed after reading const tables";
-    DBUG_RETURN(0);				// error == 0
+    goto setup_subq_exit;
   }
 
   error= -1;					/* if goto err */
@@ -1405,76 +1313,38 @@ JOIN::optimize()
 	      test(select_options & OPTION_BUFFER_RESULT))) ||
              (rollup.state != ROLLUP::STATE_NONE && select_distinct));
 
-  // No cache for MATCH
-  make_join_readinfo(this,
-		     (select_options & (SELECT_DESCRIBE |
-					SELECT_NO_JOIN_CACHE)) |
-		     (select_lex->ftfunc_list->elements ?
-		      SELECT_NO_JOIN_CACHE : 0));
+  /*
+    If the hint FORCE INDEX FOR ORDER BY/GROUP BY is used for the table
+    whose columns are required to be returned in a sorted order, then
+    the proper value for no_jbuf_after should be yielded by a call to
+    the make_join_orderinfo function.
+    Yet the current implementation of FORCE INDEX hints does not
+    allow us to do it in a clean manner.
+  */
+  no_jbuf_after= 1 ? tables : make_join_orderinfo(this);
+
+  select_opts_for_readinfo=
+    (select_options & (SELECT_DESCRIBE | SELECT_NO_JOIN_CACHE)) |
+    (select_lex->ftfunc_list->elements ?  SELECT_NO_JOIN_CACHE : 0);
+
+  // No cache for MATCH == 'Don't use join buffering when we use MATCH'.
+  if (make_join_readinfo(this, select_opts_for_readinfo, no_jbuf_after))
+    DBUG_RETURN(1);
 
   /* Perform FULLTEXT search before all regular searches */
   if (!(select_options & SELECT_DESCRIBE))
     init_ftfuncs(thd, select_lex, test(order));
 
-  /*
-    is this simple IN subquery?
-  */
-  if (!group_list && !order &&
-      unit->item && unit->item->substype() == Item_subselect::IN_SUBS &&
-      tables == 1 && conds &&
-      !unit->is_union())
-  {
-    if (!having)
-    {
-      Item *where= conds;
-      if (join_tab[0].type == JT_EQ_REF &&
-	  join_tab[0].ref.items[0]->name == in_left_expr_name)
-      {
-        remove_subq_pushed_predicates(&where);
-        save_index_subquery_explain_info(join_tab, where);
-        join_tab[0].type= JT_UNIQUE_SUBQUERY;
-        error= 0;
-        DBUG_RETURN(unit->item->
-                    change_engine(new
-                                  subselect_uniquesubquery_engine(thd,
-                                                                  join_tab,
-                                                                  unit->item,
-                                                                  where)));
-      }
-      else if (join_tab[0].type == JT_REF &&
-	       join_tab[0].ref.items[0]->name == in_left_expr_name)
-      {
-	remove_subq_pushed_predicates(&where);
-        save_index_subquery_explain_info(join_tab, where);
-        join_tab[0].type= JT_INDEX_SUBQUERY;
-        error= 0;
-        DBUG_RETURN(unit->item->
-                    change_engine(new
-                                  subselect_indexsubquery_engine(thd,
-                                                                 join_tab,
-                                                                 unit->item,
-                                                                 where,
-                                                                 NULL,
-                                                                 0)));
-      }
-    } else if (join_tab[0].type == JT_REF_OR_NULL &&
-	       join_tab[0].ref.items[0]->name == in_left_expr_name &&
-               having->name == in_having_cond)
-    {
-      join_tab[0].type= JT_INDEX_SUBQUERY;
-      error= 0;
-      conds= remove_additional_cond(conds);
-      save_index_subquery_explain_info(join_tab, conds);
-      DBUG_RETURN(unit->item->
-		  change_engine(new subselect_indexsubquery_engine(thd,
-								   join_tab,
-								   unit->item,
-								   conds,
-                                                                   having,
-								   1)));
-    }
+  /* Create all structures needed for materialized subquery execution. */
+  if (setup_subquery_materialization())
+    DBUG_RETURN(1);
+  
+  int res;
+  if ((res= rewrite_to_index_subquery_engine(this)) != -1)
+    DBUG_RETURN(res);
+  if (setup_subquery_caches())
+    DBUG_RETURN(-1);
 
-  }
   /*
     Need to tell handlers that to play it safe, it should fetch all
     columns of the primary key of the tables: this is because MySQL may
@@ -1538,7 +1408,7 @@ JOIN::optimize()
       for (ORDER *tmp_order= order; tmp_order ; tmp_order=tmp_order->next)
       {
         Item *item= *tmp_order->item;
-        if (item->walk(&Item::is_expensive_processor, 0, (uchar*)0))
+        if (item->is_expensive())
         {
           /* Force tmp table without sort */
           need_tmp=1; simple_order=simple_group=0;
@@ -1695,6 +1565,109 @@ JOIN::optimize()
 
   error= 0;
   DBUG_RETURN(0);
+
+setup_subq_exit:
+  /*
+    Even with zero matching rows, subqueries in the HAVING clause may
+    need to be evaluated if there are aggregate functions in the
+    query. If we have planned to materialize the subquery, we need to
+    set it up properly before prematurely leaving optimize().
+  */
+  if (setup_subquery_materialization())
+    DBUG_RETURN(1);
+  error= 0;
+  DBUG_RETURN(0);
+}
+
+
+/**
+  Setup expression caches for subqueries that need them
+
+  @details
+  The function wraps correlated subquery expressions that return one value
+  into objects of the class Item_cache_wrapper setting up an expression
+  cache for each of them. The result values of the subqueries are to be
+  cached together with the corresponding sets of the parameters - outer
+  references of the subqueries.
+
+  @retval FALSE OK
+  @retval TRUE  Error
+*/
+
+bool JOIN::setup_subquery_caches()
+{
+  DBUG_ENTER("JOIN::setup_subquery_caches");
+
+  /*
+    We have to check all this condition together because items created in
+    one of this clauses can be moved to another one by optimizer
+  */
+  if (select_lex->expr_cache_may_be_used[IN_WHERE] ||
+      select_lex->expr_cache_may_be_used[IN_HAVING] ||
+      select_lex->expr_cache_may_be_used[IN_ON] ||
+      select_lex->expr_cache_may_be_used[NO_MATTER])
+  {
+    if (conds)
+      conds= conds->transform(&Item::expr_cache_insert_transformer,
+                              (uchar*) thd);
+    for (JOIN_TAB *tab= join_tab + const_tables;
+         tab < join_tab + tables ;
+         tab++)
+    {
+      if (tab->select_cond)
+        tab->select_cond=
+          tab->select_cond->transform(&Item::expr_cache_insert_transformer,
+                                      (uchar*) thd);
+      if (tab->cache_select && tab->cache_select->cond)
+        tab->cache_select->cond=
+          tab->cache_select->
+          cond->transform(&Item::expr_cache_insert_transformer,
+                          (uchar*) thd);
+
+    }
+
+    if (having)
+      having= having->transform(&Item::expr_cache_insert_transformer,
+                                (uchar*) thd);
+    if (tmp_having)
+    {
+      DBUG_ASSERT(having == NULL);
+      tmp_having= tmp_having->transform(&Item::expr_cache_insert_transformer,
+                                        (uchar*) thd);
+    }
+  }
+  if (select_lex->expr_cache_may_be_used[SELECT_LIST] ||
+      select_lex->expr_cache_may_be_used[IN_GROUP_BY] ||
+      select_lex->expr_cache_may_be_used[NO_MATTER])
+  {
+    List_iterator<Item> li(all_fields);
+    Item *item;
+    while ((item= li++))
+    {
+      Item *new_item=
+        item->transform(&Item::expr_cache_insert_transformer, (uchar*) thd);
+      if (new_item != item)
+      {
+        thd->change_item_tree(li.ref(), new_item);
+      }
+    }
+    for (ORDER *group= group_list; group ; group= group->next)
+    {
+      *group->item=
+        (*group->item)->transform(&Item::expr_cache_insert_transformer,
+                                  (uchar*) thd);
+    }
+  }
+  if (select_lex->expr_cache_may_be_used[NO_MATTER])
+  {
+    for (ORDER *ord= order; ord; ord= ord->next)
+    {
+      *ord->item=
+        (*ord->item)->transform(&Item::expr_cache_insert_transformer,
+                                (uchar*) thd);
+    }
+  }
+  DBUG_RETURN(FALSE);
 }
 
 
@@ -1732,6 +1705,7 @@ JOIN::reinit()
     free_io_cache(exec_tmp_table2);
     filesort_free_buffers(exec_tmp_table2,0);
   }
+  clear_sj_tmp_tables(this);
   if (items0)
     set_items_ref_array(items0);
 
@@ -1754,6 +1728,16 @@ JOIN::reinit()
       func->clear();
   }
 
+  if (no_rows_in_result_called)
+  {
+    /* Reset effect of possible no_rows_in_result() */
+    List_iterator_fast<Item> it(fields_list);
+    Item *item;
+
+    no_rows_in_result_called= 0;
+    while ((item= it++))
+      item->restore_to_before_no_rows_in_result();
+  }  
   DBUG_RETURN(0);
 }
 
@@ -1961,7 +1945,10 @@ JOIN::exec()
     DBUG_PRINT("info", ("%s", thd->proc_info));
     if (!curr_join->sort_and_group &&
         curr_join->const_tables != curr_join->tables)
-      curr_join->join_tab[curr_join->const_tables].sorted= 0;
+    {
+      JOIN_TAB *first_tab= curr_join->join_tab + curr_join->const_tables;
+      first_tab->sorted= test(first_tab->loosescan_match_tab);
+    }
     if ((tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table, 0)))
     {
       error= tmp_error;
@@ -1973,7 +1960,9 @@ JOIN::exec()
       curr_join->having= curr_join->tmp_having= 0; // Allready done
     
     /* Change sum_fields reference to calculated fields in tmp_table */
+#ifdef HAVE_valgrind
     if (curr_join != this)
+#endif
       curr_join->all_fields= *curr_all_fields;
     if (!items1)
     {
@@ -1993,7 +1982,9 @@ JOIN::exec()
 				      fields_list.elements, all_fields))
 	  DBUG_VOID_RETURN;
       }
+#ifdef HAVE_valgrind
       if (curr_join != this)
+#endif
       {
         curr_join->tmp_all_fields1= tmp_all_fields1;
         curr_join->tmp_fields_list1= tmp_fields_list1;
@@ -2039,8 +2030,8 @@ JOIN::exec()
     */
 
     if ((curr_join->group_list && (!test_if_subpart(curr_join->group_list,
-						   curr_join->order) || 
-				  curr_join->select_distinct)) ||
+                                                    curr_join->order) || 
+                                   curr_join->select_distinct)) ||
 	(curr_join->select_distinct &&
 	 curr_join->tmp_table_param.using_indirect_summary_function))
     {					/* Must copy to another table */
@@ -2126,7 +2117,10 @@ JOIN::exec()
       curr_join->group_list= 0;
       if (!curr_join->sort_and_group &&
           curr_join->const_tables != curr_join->tables)
-        curr_join->join_tab[curr_join->const_tables].sorted= 0;
+      {
+        JOIN_TAB *first_tab= curr_join->join_tab + curr_join->const_tables;
+        first_tab->sorted= test(first_tab->loosescan_match_tab);
+      }
       if (setup_sum_funcs(curr_join->thd, curr_join->sum_funcs) ||
 	  (tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table,
 				0)))
@@ -2146,7 +2140,13 @@ JOIN::exec()
 				     tmp_fields_list2, tmp_all_fields2, 
 				     fields_list.elements, tmp_all_fields1))
 	  DBUG_VOID_RETURN;
+#ifdef HAVE_valgrind
+        /*
+          Some GCCs use memcpy() for struct assignment, even for x=x.
+          GCC bug 19410: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19410
+        */
         if (curr_join != this)
+#endif
         {
           curr_join->tmp_fields_list2= tmp_fields_list2;
           curr_join->tmp_all_fields2= tmp_all_fields2;
@@ -2206,7 +2206,9 @@ JOIN::exec()
       tmp_table_param.save_copy_field= curr_join->tmp_table_param.copy_field;
       tmp_table_param.save_copy_field_end=
 	curr_join->tmp_table_param.copy_field_end;
+#ifdef HAVE_valgrind
       if (curr_join != this)
+#endif
       {
         curr_join->tmp_all_fields3= tmp_all_fields3;
         curr_join->tmp_fields_list3= tmp_fields_list3;
@@ -2249,7 +2251,7 @@ JOIN::exec()
 
       Item* sort_table_cond= make_cond_for_table(curr_join->tmp_having,
 						 used_tables,
-						 used_tables);
+						 (table_map)0, FALSE);
       if (sort_table_cond)
       {
 	if (!curr_table->select)
@@ -2265,14 +2267,14 @@ JOIN::exec()
 	    DBUG_VOID_RETURN;
 	  curr_table->select->cond->fix_fields(thd, 0);
 	}
-	curr_table->select_cond= curr_table->select->cond;
+        curr_table->set_select_cond(curr_table->select->cond, __LINE__);
 	curr_table->select_cond->top_level_item();
 	DBUG_EXECUTE("where",print_where(curr_table->select->cond,
 					 "select and having",
                                          QT_ORDINARY););
 	curr_join->tmp_having= make_cond_for_table(curr_join->tmp_having,
 						   ~ (table_map) 0,
-						   ~used_tables);
+						   ~used_tables, FALSE);
 	DBUG_EXECUTE("where",print_where(curr_join->tmp_having,
                                          "having after sort",
                                          QT_ORDINARY););
@@ -2404,10 +2406,11 @@ JOIN::destroy()
       anywhere else (as we need to keep the join is reusable).
     */
     tmp_table_param.cleanup();
-    tmp_table_param.copy_field= tmp_join->tmp_table_param.copy_field= 0;
+    tmp_join->tmp_table_param.copy_field= 0;
     DBUG_RETURN(tmp_join->destroy());
   }
   cond_equal= 0;
+  having_equal= 0;
 
   cleanup(1);
  /* Cleanup items referencing temporary table columns */
@@ -2418,6 +2421,7 @@ JOIN::destroy()
   if (exec_tmp_table2)
     free_tmp_table(thd, exec_tmp_table2);
   delete select;
+  destroy_sj_tmp_tables(this);
   delete_dynamic(&keyuse);
   delete procedure;
   DBUG_RETURN(error);
@@ -2514,10 +2518,9 @@ mysql_select(THD *thd, Item ***rref_pointer_array,
       }
       else
       {
-        err= join->prepare(rref_pointer_array, tables, wild_num,
-                           conds, og_num, order, group, having, proc_param,
-                           select_lex, unit);
-        if (err)
+        if ((err= join->prepare(rref_pointer_array, tables, wild_num,
+                                conds, og_num, order, group, having,
+                                proc_param, select_lex, unit)))
 	{
 	  goto err;
 	}
@@ -2528,14 +2531,20 @@ mysql_select(THD *thd, Item ***rref_pointer_array,
   }
   else
   {
+    /*
+      When in EXPLAIN, delay deleting the joins so that they are still
+      available when we're producing EXPLAIN EXTENDED warning text.
+    */
+    if (select_options & SELECT_DESCRIBE)
+      free_join= 0;
+
     if (!(join= new JOIN(thd, fields, select_options, result)))
 	DBUG_RETURN(TRUE);
     thd_proc_info(thd, "init");
     thd->used_tables=0;                         // Updated by setup_fields
-    err= join->prepare(rref_pointer_array, tables, wild_num,
-                       conds, og_num, order, group, having, proc_param,
-                       select_lex, unit);
-    if (err)
+    if ((err= join->prepare(rref_pointer_array, tables, wild_num,
+                            conds, og_num, order, group, having, proc_param,
+                            select_lex, unit)))
     {
       goto err;
     }
@@ -2573,6 +2582,52 @@ err:
   DBUG_RETURN(join->error);
 }
 
+
+/**
+  Setup for execution all subqueries of a query, for which the optimizer
+  chose hash semi-join.
+
+  @details Iterate over all subqueries of the query, and if they are under an
+  IN predicate, and the optimizer chose to compute it via hash semi-join:
+  - try to initialize all data structures needed for the materialized execution
+    of the IN predicate,
+  - if this fails, then perform the IN=>EXISTS transformation which was
+    previously blocked during JOIN::prepare.
+
+  This method is part of the "code generation" query processing phase.
+
+  This phase must be called after substitute_for_best_equal_field() because
+  that function may replace items with other items from a multiple equality,
+  and we need to reference the correct items in the index access method of the
+  IN predicate.
+
+  @return Operation status
+  @retval FALSE     success.
+  @retval TRUE      error occurred.
+*/
+
+bool JOIN::setup_subquery_materialization()
+{
+  for (SELECT_LEX_UNIT *un= select_lex->first_inner_unit(); un;
+       un= un->next_unit())
+  {
+    for (SELECT_LEX *sl= un->first_select(); sl; sl= sl->next_select())
+    {
+      Item_subselect *subquery_predicate= sl->master_unit()->item;
+      if (subquery_predicate &&
+          subquery_predicate->substype() == Item_subselect::IN_SUBS)
+      {
+        Item_in_subselect *in_subs= (Item_in_subselect*) subquery_predicate;
+        if (in_subs->exec_method == Item_in_subselect::MATERIALIZATION &&
+            in_subs->setup_engine())
+          return TRUE;
+      }
+    }
+  }
+  return FALSE;
+}
+
+
 /*****************************************************************************
   Create JOIN_TABS, make a guess about the table types,
   Approximate how many records will be used in each table
@@ -2590,8 +2645,9 @@ static ha_rows get_quick_record_count(THD *thd, SQL_SELECT *select,
   if (select)
   {
     select->head=table;
+    table->reginfo.impossible_range=0;
     if ((error= select->test_quick_select(thd, *(key_map *)keys,(table_map) 0,
-                                          limit, 0)) == 1)
+                                          limit, 0, FALSE)) == 1)
       DBUG_RETURN(select->quick->records);
     if (error == -1)
     {
@@ -2617,6 +2673,7 @@ typedef struct st_sargable_param
   uint num_values;           /* number of values in the above array      */
 } SARGABLE_PARAM;  
 
+
 /**
   Calculate the best possible join and initialize the join structure.
 
@@ -2640,6 +2697,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
   JOIN_TAB *stat,*stat_end,*s,**stat_ref;
   KEYUSE *keyuse,*start_keyuse;
   table_map outer_join=0;
+  table_map no_rows_const_tables= 0;
   SARGABLE_PARAM *sargables= 0;
   JOIN_TAB *stat_vector[MAX_TABLES+1];
   DBUG_ENTER("make_join_statistics");
@@ -2700,6 +2758,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
 #endif
       {						// Empty table
         s->dependent= 0;                        // Ignore LEFT JOIN depend.
+        no_rows_const_tables |= table->map;
 	set_position(join,const_count++,s,(KEYUSE*) 0);
 	continue;
       }
@@ -2709,7 +2768,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
         s->embedding_map|= embedding->nested_join->nj_map;
       continue;
     }
-    if (embedding)
+    if (embedding && !(embedding->sj_on_expr && ! embedding->embedding))
     {
       /* s belongs to a nested join, maybe to several embedded joins */
       s->embedding_map= 0;
@@ -2736,6 +2795,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
         !table->fulltext_searched && !join->no_const_tables)
     {
       set_position(join,const_count++,s,(KEYUSE*) 0);
+      no_rows_const_tables |= table->map;
     }
   }
   stat_vector[i]=0;
@@ -2746,53 +2806,45 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
     /* 
        Build transitive closure for relation 'to be dependent on'.
        This will speed up the plan search for many cases with outer joins,
-       as well as allow us to catch illegal cross references.
+       as well as allow us to catch illegal cross references/
        Warshall's algorithm is used to build the transitive closure.
-       As we may restart the outer loop upto 'table_count' times, the
-       complexity of the algorithm is O((number of tables)^3).
-       However, most of the iterations will be shortcircuited when
-       there are no pedendencies to propogate.
+       As we use bitmaps to represent the relation the complexity
+       of the algorithm is O((number of tables)^2).
+
+       The classic form of the Warshall's algorithm would look like: 
+       for (i= 0; i < table_count; i++)
+       {
+         for (j= 0; j < table_count; j++)
+         {
+           for (k= 0; k < table_count; k++)
+           {
+             if (bitmap_is_set(stat[j].dependent, i) &&
+                 bitmap_is_set(stat[i].dependent, k))
+               bitmap_set_bit(stat[j].dependent, k);
+         }
+       }  
     */
-    for (i= 0 ; i < table_count ; i++)
+    
+    for (s= stat ; s < stat_end ; s++)
     {
-      uint j;
-      table= stat[i].table;
-
-      if (!table->reginfo.join_tab->dependent)
-        continue;
-
-      /* Add my dependencies to other tables depending on me */
-      for (j= 0, s= stat ; j < table_count ; j++, s++)
+      table= s->table;
+      for (JOIN_TAB *t= stat ; t < stat_end ; t++)
       {
-        if (s->dependent & table->map)
-        {
-          table_map was_dependent= s->dependent;
-          s->dependent |= table->reginfo.join_tab->dependent;
-          /*
-            If we change dependencies for a table we already have
-            processed: Redo dependency propagation from this table.
-          */
-          if (i > j && s->dependent != was_dependent)
-          {
-            i = j-1;
-            break;
-          }
-        }
+        if (t->dependent & table->map)
+          t->dependent |= table->reginfo.join_tab->dependent;
       }
+      if (outer_join & s->table->map)
+        s->table->maybe_null= 1;
     }
-
+    /* Catch illegal cross references for outer joins */
     for (i= 0, s= stat ; i < table_count ; i++, s++)
     {
-      /* Catch illegal cross references for outer joins */
       if (s->dependent & s->table->map)
       {
         join->tables=0;			// Don't use join->table
         my_message(ER_WRONG_OUTER_JOIN, ER(ER_WRONG_OUTER_JOIN), MYF(0));
         goto error;
       }
-
-      if (outer_join & s->table->map)
-        s->table->maybe_null= 1;
       s->key_dependent= s->dependent;
     }
   }
@@ -2803,26 +2855,34 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
                             ~outer_join, join->select_lex, &sargables))
       goto error;
 
-  /* Read tables with 0 or 1 rows (system tables) */
-  join->const_table_map= 0;
+  join->const_table_map= no_rows_const_tables;
+  join->const_tables= const_count;
+  eliminate_tables(join);
+  join->const_table_map &= ~no_rows_const_tables;
+  const_count= join->const_tables;
+  found_const_table_map= join->const_table_map;
 
+  /* Read tables with 0 or 1 rows (system tables) */
   for (POSITION *p_pos=join->positions, *p_end=p_pos+const_count;
        p_pos < p_end ;
        p_pos++)
   {
-    int tmp;
     s= p_pos->table;
-    s->type=JT_SYSTEM;
-    join->const_table_map|=s->table->map;
-    if ((tmp=join_read_const_table(s, p_pos)))
-    {
-      if (tmp > 0)
-	goto error;		// Fatal error
-    }
-    else
+    if (! (s->table->map & join->eliminated_tables))
     {
-      found_const_table_map|= s->table->map;
-      s->table->pos_in_table_list->optimized_away= TRUE;
+      int tmp;
+      s->type=JT_SYSTEM;
+      join->const_table_map|=s->table->map;
+      if ((tmp=join_read_const_table(s, p_pos)))
+      {
+        if (tmp > 0)
+          goto error;		// Fatal error
+      }
+      else
+      {
+        found_const_table_map|= s->table->map;
+        s->table->pos_in_table_list->optimized_away= TRUE;
+      }
     }
   }
 
@@ -2848,7 +2908,8 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
         substitution of a const table the key value happens to be null
         then we can state that there are no matches for this equi-join.
       */  
-      if ((keyuse= s->keyuse) && *s->on_expr_ref && !s->embedding_map)
+      if ((keyuse= s->keyuse) && *s->on_expr_ref && !s->embedding_map &&
+         !(table->map & join->eliminated_tables))
       {
         /* 
           When performing an outer join operation if there are no matching rows
@@ -2925,9 +2986,16 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
 	    keyuse++;
 	  } while (keyuse->table == table && keyuse->key == key);
 
+          TABLE_LIST *embedding= table->pos_in_table_list->embedding;
+          /*
+            TODO (low priority): currently we ignore the const tables that
+            are within a semi-join nest which is within an outer join nest.
+            The effect of this is that we don't do const substitution for
+            such tables.
+          */
 	  if (eq_part.is_prefix(table->key_info[key].key_parts) &&
               !table->fulltext_searched && 
-              !table->pos_in_table_list->embedding)
+              (!embedding || (embedding->sj_on_expr && !embedding->embedding)))
 	  {
             if (table->key_info[key].flags & HA_NOSAME)
             {
@@ -2961,7 +3029,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
       }
     }
   } while (join->const_table_map & found_ref && ref_changed);
-
+ 
   /* 
     Update info on indexes that can be used for search lookups as
     reading const tables may has added new sargable predicates. 
@@ -3011,9 +3079,16 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
       all select distinct fields participate in one index.
     */
     add_group_and_distinct_keys(join, s);
-
-    if (!s->const_keys.is_clear_all() &&
-        !s->table->pos_in_table_list->embedding)
+    
+    /*
+      Perform range analysis if there are keys it could use (1). 
+      Don't do range analysis if we're on the inner side of an outer join (2).
+      Do range analysis if we're on the inner side of a semi-join (3).
+    */
+    if (!s->const_keys.is_clear_all() &&                        // (1)
+        (!s->table->pos_in_table_list->embedding ||             // (2)
+         (s->table->pos_in_table_list->embedding &&             // (3)
+          s->table->pos_in_table_list->embedding->sj_on_expr))) // (3)
     {
       ha_rows records;
       SQL_SELECT *select;
@@ -3057,16 +3132,24 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
     }
   }
 
+  if (pull_out_semijoin_tables(join))
+    DBUG_RETURN(TRUE);
+
   join->join_tab=stat;
   join->map2table=stat_ref;
   join->all_tables= table_vector;
   join->const_tables=const_count;
   join->found_const_table_map=found_const_table_map;
 
+  if (join->const_tables != join->tables)
+    optimize_keyuse(join, keyuse_array);
+   
+  if (optimize_semijoin_nests(join, all_table_map))
+    DBUG_RETURN(TRUE); /* purecov: inspected */
+
   /* Find an optimal join order of the non-constant tables. */
   if (join->const_tables != join->tables)
   {
-    optimize_keyuse(join, keyuse_array);
     if (choose_plan(join, all_table_map & ~join->const_table_map))
       goto error;
   }
@@ -3113,25 +3196,53 @@ typedef struct key_field_t {
   */
   bool          null_rejecting; 
   bool         *cond_guard; /* See KEYUSE::cond_guard */
+  uint          sj_pred_no; /* See KEYUSE::sj_pred_no */
 } KEY_FIELD;
 
-/* Values in optimize */
-#define KEY_OPTIMIZE_EXISTS		1
-#define KEY_OPTIMIZE_REF_OR_NULL	2
-
 /**
   Merge new key definitions to old ones, remove those not used in both.
 
   This is called for OR between different levels.
 
-  To be able to do 'ref_or_null' we merge a comparison of a column
-  and 'column IS NULL' to one test.  This is useful for sub select queries
-  that are internally transformed to something like:.
+  That is, the function operates on an array of KEY_FIELD elements which has
+  two parts:
+
+                      $LEFT_PART             $RIGHT_PART
+             +-----------------------+-----------------------+
+            start                new_fields                 end
+         
+  $LEFT_PART and $RIGHT_PART are arrays that have KEY_FIELD elements for two
+  parts of the OR condition. Our task is to produce an array of KEY_FIELD 
+  elements that would correspond to "$LEFT_PART OR $RIGHT_PART". 
+  
+  The rules for combining elements are as follows:
+
+    (keyfieldA1 AND keyfieldA2 AND ...) OR (keyfieldB1 AND keyfieldB2 AND ...)=
+     
+     = AND_ij (keyfieldA_i OR keyfieldB_j)
+  
+  We discard all (keyfieldA_i OR keyfieldB_j) that refer to different
+  fields. For those referring to the same field, the logic is as follows:
+    
+    t.keycol=expr1 OR t.keycol=expr2 -> (since expr1 and expr2 are different 
+                                         we can't produce a single equality,
+                                         so produce nothing)
+
+    t.keycol=expr1 OR t.keycol=expr1 -> t.keycol=expr1
+
+    t.keycol=expr1 OR t.keycol IS NULL -> t.keycol=expr1, and also set
+                                          KEY_OPTIMIZE_REF_OR_NULL flag
+
+  The last one is for ref_or_null access. We have handling for this special
+  because it's needed for evaluating IN subqueries that are internally
+  transformed into 
 
   @code
-  SELECT * FROM t1 WHERE t1.key=outer_ref_field or t1.key IS NULL 
+    EXISTS(SELECT * FROM t1 WHERE t1.key=outer_ref_field or t1.key IS NULL)
   @endcode
 
+  See add_key_fields() for discussion of what is and_level.
+
   KEY_FIELD::null_rejecting is processed as follows: @n
   result has null_rejecting=true if it is set for both ORed references.
   for example:
@@ -3251,6 +3362,52 @@ merge_key_fields(KEY_FIELD *start,KEY_FIELD *new_fields,KEY_FIELD *end,
 }
 
 
+/*
+  Given a field, return its index in semi-join's select list, or UINT_MAX
+
+  DESCRIPTION
+    Given a field, we find its table; then see if the table is within a
+    semi-join nest and if the field was in select list of the subselect.
+    If it was, we return field's index in the select list. The value is used
+    by LooseScan strategy.
+*/
+
+static uint get_semi_join_select_list_index(Field *field)
+{
+  uint res= UINT_MAX;
+  TABLE_LIST *emb_sj_nest;
+  if ((emb_sj_nest= field->table->pos_in_table_list->embedding) &&
+      emb_sj_nest->sj_on_expr)
+  {
+    Item_in_subselect *subq_pred= emb_sj_nest->sj_subq_pred;
+    st_select_lex *subq_lex= subq_pred->unit->first_select();
+    if (subq_pred->left_expr->cols() == 1)
+    {
+      Item *sel_item= subq_lex->ref_pointer_array[0];
+      if (sel_item->type() == Item::FIELD_ITEM &&
+          ((Item_field*)sel_item)->field->eq(field))
+      {
+        res= 0;
+      }
+    }
+    else
+    {
+      for (uint i= 0; i < subq_pred->left_expr->cols(); i++)
+      {
+        Item *sel_item= subq_lex->ref_pointer_array[i];
+        if (sel_item->type() == Item::FIELD_ITEM &&
+            ((Item_field*)sel_item)->field->eq(field))
+        {
+          res= i;
+          break;
+        }
+      }
+    }
+  }
+  return res;
+}
+
+
 /**
   Add a possible key to array of possible keys if it's usable as a key
 
@@ -3410,11 +3567,19 @@ add_key_field(KEY_FIELD **key_fields,uint and_level, Item_func *cond,
     We use null_rejecting in add_not_null_conds() to add
     'othertbl.field IS NOT NULL' to tab->select_cond.
   */
-  (*key_fields)->null_rejecting= ((cond->functype() == Item_func::EQ_FUNC ||
-                                   cond->functype() == Item_func::MULT_EQUAL_FUNC) &&
-                                  ((*value)->type() == Item::FIELD_ITEM) &&
-                                  ((Item_field*)*value)->field->maybe_null());
+  {
+    Item *real= (*value)->real_item();
+    if (((cond->functype() == Item_func::EQ_FUNC) ||
+         (cond->functype() == Item_func::MULT_EQUAL_FUNC)) &&
+        (real->type() == Item::FIELD_ITEM) &&
+        ((Item_field*)real)->field->maybe_null())
+      (*key_fields)->null_rejecting= true;
+    else
+      (*key_fields)->null_rejecting= false;
+  }
   (*key_fields)->cond_guard= NULL;
+
+  (*key_fields)->sj_pred_no= get_semi_join_select_list_index(field);
   (*key_fields)++;
 }
 
@@ -3488,11 +3653,30 @@ static bool
 is_local_field (Item *field)
 {
   return field->real_item()->type() == Item::FIELD_ITEM
-    && !(field->used_tables() & OUTER_REF_TABLE_BIT)
-    && !((Item_field *)field->real_item())->depended_from;
+     && !(field->used_tables() & OUTER_REF_TABLE_BIT)
+     && !((Item_field *)field->real_item())->depended_from;
 }
 
 
+/*
+  In this and other functions, and_level is a number that is ever-growing
+  and is different for the contents of every AND or OR clause. For example,
+  when processing clause
+
+     (a AND b AND c) OR (x AND y)
+  
+  we'll have
+   * KEY_FIELD elements for (a AND b AND c) are assigned and_level=1
+   * KEY_FIELD elements for (x AND y) are assigned and_level=2
+   * OR operation is performed, and whatever elements are left after it are
+     assigned and_level=3.
+
+  The primary reason for having and_level attribute is the OR operation which 
+  uses and_level to mark KEY_FIELDs that should get into the result of the OR
+  operation
+
+*/
+
 static void
 add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
                COND *cond, table_map usable_tables,
@@ -3672,8 +3856,9 @@ add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
         {
           if (!field->eq(item->field))
           {
+            Item *tmp_item= item;
             add_key_field(key_fields, *and_level, cond_func, field,
-                          TRUE, (Item **) &item, 1, usable_tables,
+                          TRUE, &tmp_item, 1, usable_tables,
                           sargables);
           }
         }
@@ -3732,6 +3917,7 @@ add_key_part(DYNAMIC_ARRAY *keyuse_array,KEY_FIELD *key_field)
 	  keyuse.optimize= key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL;
           keyuse.null_rejecting= key_field->null_rejecting;
           keyuse.cond_guard= key_field->cond_guard;
+          keyuse.sj_pred_no= key_field->sj_pred_no;
 	  if (insert_dynamic(keyuse_array,(uchar*) &keyuse))
             return TRUE;
 	}
@@ -3769,7 +3955,7 @@ add_ft_keys(DYNAMIC_ARRAY *keyuse_array,
           ((functype == Item_func::GE_FUNC && arg1->val_real() > 0) ||
            (functype == Item_func::GT_FUNC && arg1->val_real() >=0)))
         cond_func= (Item_func_match *) arg0;
-      else if (arg0->const_item() &&
+      else if (arg0->const_item() && arg0->cols() == 1 &&
                 arg1->type() == Item::FUNC_ITEM &&
                 ((Item_func *) arg1)->functype() == Item_func::FT_FUNC &&
                ((functype == Item_func::LE_FUNC && arg0->val_real() > 0) ||
@@ -3804,6 +3990,7 @@ add_ft_keys(DYNAMIC_ARRAY *keyuse_array,
   keyuse.used_tables=cond_func->key_item()->used_tables();
   keyuse.optimize= 0;
   keyuse.keypart_map= 0;
+  keyuse.sj_pred_no= UINT_MAX;
   return insert_dynamic(keyuse_array,(uchar*) &keyuse);
 }
 
@@ -3868,20 +4055,34 @@ static void add_key_fields_for_nj(JOIN *join, TABLE_LIST *nested_join_table,
                                   SARGABLE_PARAM **sargables)
 {
   List_iterator<TABLE_LIST> li(nested_join_table->nested_join->join_list);
+  List_iterator<TABLE_LIST> li2(nested_join_table->nested_join->join_list);
+  bool have_another = FALSE;
   table_map tables= 0;
   TABLE_LIST *table;
   DBUG_ASSERT(nested_join_table->nested_join);
 
-  while ((table= li++))
+  while ((table= li++) || (have_another && (li=li2, have_another=FALSE,
+                                            (table= li++))))
   {
     if (table->nested_join)
-      add_key_fields_for_nj(join, table, end, and_level, sargables);
+    {
+      if (!table->on_expr)
+      {
+        /* It's a semi-join nest. Walk into it as if it wasn't a nest */
+        have_another= TRUE;
+        li2= li;
+        li= List_iterator<TABLE_LIST>(table->nested_join->join_list); 
+      }
+      else
+        add_key_fields_for_nj(join, table, end, and_level, sargables);
+    }
     else
       if (!table->on_expr)
         tables |= table->table->map;
   }
-  add_key_fields(join, end, and_level, nested_join_table->on_expr, tables,
-                 sargables);
+  if (nested_join_table->on_expr)
+    add_key_fields(join, end, and_level, nested_join_table->on_expr, tables,
+                   sargables);
 }
 
 
@@ -4048,7 +4249,7 @@ update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
 	  continue;
       }
 
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
       /* Valgrind complains about overlapped memcpy when save_pos==use. */
       if (save_pos != use)
 #endif
@@ -4065,6 +4266,7 @@ update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
     (void) set_dynamic(keyuse,(uchar*) &key_end,i);
     keyuse->elements=i;
   }
+  DBUG_EXECUTE("opt", print_keyuse_array(keyuse););
   return FALSE;
 }
 
@@ -4258,14 +4460,17 @@ add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab)
 
 /** Save const tables first as used tables. */
 
-static void
-set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
+void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
 {
   join->positions[idx].table= table;
   join->positions[idx].key=key;
   join->positions[idx].records_read=1.0;	/* This is a const table */
   join->positions[idx].ref_depend_map= 0;
 
+  join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */
+  join->positions[idx].sj_strategy= SJ_OPT_NONE;
+  join->positions[idx].use_join_buffer= FALSE;
+
   /* Move the const table as down as possible in best_ref */
   JOIN_TAB **pos=join->best_ref+idx+1;
   JOIN_TAB *next=join->best_ref[idx];
@@ -4296,23 +4501,28 @@ set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
   @param thd              thread for the connection that submitted the query
   @param remaining_tables set of tables not included into the partial plan yet
   @param idx              the length of the partial plan
+  @param disable_jbuf     TRUE<=> Don't use join buffering
   @param record_count     estimate for the number of records returned by the
                           partial plan
-  @param read_time        the cost of the partial plan
+  @param pos              OUT Table access plan
+  @param loose_scan_pos   OUT Table plan that uses loosescan, or set cost to 
+                              DBL_MAX if not possible.
 
   @return
     None
 */
 
-static void
+void
 best_access_path(JOIN      *join,
                  JOIN_TAB  *s,
-                 THD       *thd,
                  table_map remaining_tables,
                  uint      idx,
+                 bool      disable_jbuf,
                  double    record_count,
-                 double    read_time)
+                 POSITION *pos,
+                 POSITION *loose_scan_pos)
 {
+  THD *thd= join->thd;
   KEYUSE *best_key=         0;
   uint best_max_key_part=   0;
   my_bool found_constraint= 0;
@@ -4322,8 +4532,13 @@ best_access_path(JOIN      *join,
   table_map best_ref_depends_map= 0;
   double tmp;
   ha_rows rec;
-  DBUG_ENTER("best_access_path");
+  bool best_uses_jbuf= FALSE;
 
+  Loose_scan_opt loose_scan_opt;
+  DBUG_ENTER("best_access_path");
+  
+  loose_scan_opt.init(join, s, remaining_tables);
+  
   if (s->keyuse)
   {                                            /* Use key if possible */
     TABLE *table= s->table;
@@ -4348,6 +4563,10 @@ best_access_path(JOIN      *join,
       /* Calculate how many key segments of the current key we can use */
       start_key= keyuse;
 
+      loose_scan_opt.next_ref_key();
+      DBUG_PRINT("info", ("Considering ref access on key %s",
+                          keyuse->table->key_info[keyuse->key].name));
+
       do /* For each keypart */
       {
         uint keypart= keyuse->keypart;
@@ -4356,7 +4575,6 @@ best_access_path(JOIN      *join,
         
         do /* For each way to access the keypart */
         {
-
           /*
             if 1. expression doesn't refer to forward tables
                2. we won't get two ref-or-null's
@@ -4385,6 +4603,7 @@ best_access_path(JOIN      *join,
             if (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)
               ref_or_null_part |= keyuse->keypart_map;
           }
+          loose_scan_opt.add_keyuse(remaining_tables, keyuse);
           keyuse++;
         } while (keyuse->table == table && keyuse->key == key &&
                  keyuse->keypart == keypart);
@@ -4394,7 +4613,7 @@ best_access_path(JOIN      *join,
       /*
         Assume that that each key matches a proportional part of table.
       */
-      if (!found_part && !ft_key)
+      if (!found_part && !ft_key && !loose_scan_opt.have_a_case())
         continue;                               // Nothing usable found
 
       if (rec < MATCHING_ROWS_IN_OTHER_TABLE)
@@ -4414,10 +4633,10 @@ best_access_path(JOIN      *join,
       }
       else
       {
-        found_constraint= 1;
-        /*
-          Check if we found full key
-        */
+        found_constraint= test(found_part);
+        loose_scan_opt.check_ref_access_part1(s, key, start_key, found_part);
+
+        /* Check if we found full key */
         if (found_part == PREV_BITS(uint,keyinfo->key_parts) &&
             !ref_or_null_part)
         {                                         /* use eq key */
@@ -4491,14 +4710,10 @@ best_access_path(JOIN      *join,
             tmp= records;
             set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
             if (table->covering_keys.is_set(key))
-            {
-              /* we can use only index tree */
-              uint keys_per_block= table->file->stats.block_size/2/
-                (keyinfo->key_length+table->file->ref_length)+1;
-              tmp= record_count*(tmp+keys_per_block-1)/keys_per_block;
-            }
+              tmp= table->file->keyread_time(key, 1, tmp);
             else
-              tmp= record_count*min(tmp,s->worst_seeks);
+              tmp= table->file->read_time(key, 1, min(tmp,s->worst_seeks)-1);
+            tmp*= record_count;
           }
         }
         else
@@ -4658,20 +4873,18 @@ best_access_path(JOIN      *join,
             /* Limit the number of matched rows */
             set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
             if (table->covering_keys.is_set(key))
-            {
-              /* we can use only index tree */
-              uint keys_per_block= table->file->stats.block_size/2/
-                (keyinfo->key_length+table->file->ref_length)+1;
-              tmp= record_count*(tmp+keys_per_block-1)/keys_per_block;
-            }
+              tmp= table->file->keyread_time(key, 1, tmp);
             else
-              tmp= record_count*min(tmp,s->worst_seeks);
+              tmp= table->file->read_time(key, 1, min(tmp,s->worst_seeks)-1);
+            tmp*= record_count;
           }
           else
             tmp= best_time;                    // Do nothing
         }
+        loose_scan_opt.check_ref_access_part2(key, start_key, records, tmp);
+
       } /* not ft_key */
-      if (tmp < best_time - records/(double) TIME_FOR_COMPARE)
+      if (tmp + 0.0001 < best_time - records/(double) TIME_FOR_COMPARE)
       {
         best_time= tmp + records/(double) TIME_FOR_COMPARE;
         best= tmp;
@@ -4680,7 +4893,7 @@ best_access_path(JOIN      *join,
         best_max_key_part= max_key_part;
         best_ref_depends_map= found_ref;
       }
-    }
+    } /* for each key */
     records= best_records;
   }
 
@@ -4693,7 +4906,7 @@ best_access_path(JOIN      *join,
     This is because table scans uses index and we would not win
     anything by using a table scan.
 
-    A word for word translation of the below if-statement in psergey's
+    A word for word translation of the below if-statement in sergefp's
     understanding: we check if we should use table scan if:
     (1) The found 'ref' access produces more records than a table scan
         (or index scan, or quick select), or 'ref' is more expensive than
@@ -4744,6 +4957,7 @@ best_access_path(JOIN      *join,
       than FULL: so if RANGE is present, it's always preferred to FULL.
       Here we estimate its cost.
     */
+
     if (s->quick)
     {
       /*
@@ -4758,12 +4972,14 @@ best_access_path(JOIN      *join,
       tmp= record_count *
         (s->quick->read_time +
          (s->found_records - rnd_records)/(double) TIME_FOR_COMPARE);
+
+      loose_scan_opt.check_range_access(join, idx, s->quick);
     }
     else
     {
       /* Estimate cost of reading table. */
       tmp= s->table->file->scan_time();
-      if (s->table->map & join->outer_join)     // Can't use join cache
+      if ((s->table->map & join->outer_join) || disable_jbuf)     // Can't use join cache
       {
         /*
           For each record we have to:
@@ -4809,15 +5025,21 @@ best_access_path(JOIN      *join,
       best_key= 0;
       /* range/index_merge/ALL/index access method are "independent", so: */
       best_ref_depends_map= 0;
+      best_uses_jbuf= test(!disable_jbuf && !((s->table->map & 
+                                               join->outer_join)));
     }
   }
-
+  
   /* Update the cost information for the current partial plan */
-  join->positions[idx].records_read= records;
-  join->positions[idx].read_time=    best;
-  join->positions[idx].key=          best_key;
-  join->positions[idx].table=        s;
-  join->positions[idx].ref_depend_map= best_ref_depends_map;
+  pos->records_read= records;
+  pos->read_time=    best;
+  pos->key=          best_key;
+  pos->table=        s;
+  pos->ref_depend_map= best_ref_depends_map;
+  pos->loosescan_key= MAX_KEY;
+  pos->use_join_buffer= best_uses_jbuf;
+   
+  loose_scan_opt.save_to_position(s, loose_scan_pos);
 
   if (!best_key &&
       idx == join->const_tables &&
@@ -4852,7 +5074,7 @@ best_access_path(JOIN      *join,
     TRUE        Fatal error
 */
 
-static bool
+bool
 choose_plan(JOIN *join, table_map join_tables)
 {
   uint search_depth= join->thd->variables.optimizer_search_depth;
@@ -4861,19 +5083,34 @@ choose_plan(JOIN *join, table_map join_tables)
   DBUG_ENTER("choose_plan");
 
   join->cur_embedding_map= 0;
-  reset_nj_counters(join->join_list);
-  /*
-    if (SELECT_STRAIGHT_JOIN option is set)
-      reorder tables so dependent tables come after tables they depend 
-      on, otherwise keep tables in the order they were specified in the query 
-    else
-      Apply heuristic: pre-sort all access plans with respect to the number of
-      records accessed.
-  */
-  my_qsort(join->best_ref + join->const_tables,
-           join->tables - join->const_tables, sizeof(JOIN_TAB*),
-           straight_join ? join_tab_cmp_straight : join_tab_cmp);
-  
+  join->cur_dups_producing_tables= 0;
+  reset_nj_counters(join, join->join_list);
+  qsort2_cmp jtab_sort_func;
+
+  if (join->emb_sjm_nest)
+  {
+    /* We're optimizing semi-join materialization nest, so put the 
+       tables from this semi-join as first
+    */
+    jtab_sort_func= join_tab_cmp_embedded_first;
+  }
+  else
+  {
+    /*
+      if (SELECT_STRAIGHT_JOIN option is set)
+        reorder tables so dependent tables come after tables they depend 
+        on, otherwise keep tables in the order they were specified in the query 
+      else
+        Apply heuristic: pre-sort all access plans with respect to the number of
+        records accessed.
+    */
+    jtab_sort_func= straight_join ? join_tab_cmp_straight : join_tab_cmp;
+  }
+  my_qsort2(join->best_ref + join->const_tables,
+            join->tables - join->const_tables, sizeof(JOIN_TAB*),
+            jtab_sort_func, (void*)join->emb_sjm_nest);
+  join->cur_sj_inner_tables= 0;
+
   if (straight_join)
   {
     optimize_straight_join(join, join_tables);
@@ -4937,7 +5174,7 @@ choose_plan(JOIN *join, table_map join_tables)
 */
 
 static int
-join_tab_cmp(const void* ptr1, const void* ptr2)
+join_tab_cmp(const void *dummy, const void* ptr1, const void* ptr2)
 {
   JOIN_TAB *jt1= *(JOIN_TAB**) ptr1;
   JOIN_TAB *jt2= *(JOIN_TAB**) ptr2;
@@ -4959,18 +5196,58 @@ join_tab_cmp(const void* ptr1, const void* ptr2)
 */
 
 static int
-join_tab_cmp_straight(const void* ptr1, const void* ptr2)
+join_tab_cmp_straight(const void *dummy, const void* ptr1, const void* ptr2)
+{
+  JOIN_TAB *jt1= *(JOIN_TAB**) ptr1;
+  JOIN_TAB *jt2= *(JOIN_TAB**) ptr2;
+
+  /*
+    We don't do subquery flattening if the parent or child select has
+    STRAIGHT_JOIN modifier. It is complicated to implement and the semantics
+    is hardly useful.
+  */
+  DBUG_ASSERT(!jt1->emb_sj_nest);
+  DBUG_ASSERT(!jt2->emb_sj_nest);
+
+  if (jt1->dependent & jt2->table->map)
+    return 1;
+  if (jt2->dependent & jt1->table->map)
+    return -1;
+  return jt1 > jt2 ? 1 : (jt1 < jt2 ? -1 : 0);
+}
+
+
+/*
+  Same as join_tab_cmp but tables from within the given semi-join nest go 
+  first. Used when the optimizing semi-join materialization nests.
+*/
+
+static int
+join_tab_cmp_embedded_first(const void *emb,  const void* ptr1, const void* ptr2)
 {
+  const TABLE_LIST *emb_nest= (TABLE_LIST*) emb;
   JOIN_TAB *jt1= *(JOIN_TAB**) ptr1;
   JOIN_TAB *jt2= *(JOIN_TAB**) ptr2;
 
+  if (jt1->emb_sj_nest == emb_nest && jt2->emb_sj_nest != emb_nest)
+    return -1;
+  if (jt1->emb_sj_nest != emb_nest && jt2->emb_sj_nest == emb_nest)
+    return 1;
+
   if (jt1->dependent & jt2->table->map)
     return 1;
   if (jt2->dependent & jt1->table->map)
     return -1;
+
+  if (jt1->found_records > jt2->found_records)
+    return 1;
+  if (jt1->found_records < jt2->found_records)
+    return -1; 
+  
   return jt1 > jt2 ? 1 : (jt1 < jt2 ? -1 : 0);
 }
 
+
 /**
   Heuristic procedure to automatically guess a reasonable degree of
   exhaustiveness for the greedy search procedure.
@@ -5054,15 +5331,20 @@ optimize_straight_join(JOIN *join, table_map join_tables)
   uint idx= join->const_tables;
   double    record_count= 1.0;
   double    read_time=    0.0;
- 
+  POSITION  loose_scan_pos;
+
   for (JOIN_TAB **pos= join->best_ref + idx ; (s= *pos) ; pos++)
   {
     /* Find the best access method from 's' to the current partial plan */
-    best_access_path(join, s, join->thd, join_tables, idx,
-                     record_count, read_time);
+    best_access_path(join, s, join_tables, idx, FALSE, record_count,
+                     join->positions + idx, &loose_scan_pos);
+
     /* compute the cost of the new plan extended with 's' */
     record_count*= join->positions[idx].records_read;
     read_time+=    join->positions[idx].read_time;
+    advance_sj_state(join, join_tables, s, idx, &record_count, &read_time,
+                     &loose_scan_pos);
+
     join_tables&= ~(s->table->map);
     ++idx;
   }
@@ -5171,11 +5453,17 @@ greedy_search(JOIN      *join,
   uint      size_remain;    // cardinality of remaining_tables
   POSITION  best_pos;
   JOIN_TAB  *best_table; // the next plan node to be added to the curr QEP
+  uint      n_tables; // ==join->tables or # tables in the sj-mat nest we're optimizing
 
   DBUG_ENTER("greedy_search");
 
   /* number of tables that remain to be optimized */
-  size_remain= my_count_bits(remaining_tables);
+  n_tables= size_remain= my_count_bits(remaining_tables &
+                                       (join->emb_sjm_nest? 
+                                         (join->emb_sjm_nest->sj_inner_tables &
+                                          ~join->const_table_map)
+                                         :
+                                         ~(table_map)0));
 
   do {
     /* Find the extension of the current QEP with the lowest cost */
@@ -5195,7 +5483,7 @@ greedy_search(JOIN      *join,
         'join->best_positions' contains a complete optimal extension of the
         current partial QEP.
       */
-      DBUG_EXECUTE("opt", print_plan(join, join->tables,
+      DBUG_EXECUTE("opt", print_plan(join, n_tables,
                                      record_count, read_time, read_time,
                                      "optimal"););
       DBUG_RETURN(FALSE);
@@ -5223,6 +5511,7 @@ greedy_search(JOIN      *join,
     /* This has been already checked by best_extension_by_limited_search */
     DBUG_ASSERT(!is_interleave_error);
 
+
     /* find the position of 'best_table' in 'join->best_ref' */
     best_idx= idx;
     JOIN_TAB *pos= join->best_ref[best_idx];
@@ -5247,6 +5536,49 @@ greedy_search(JOIN      *join,
 }
 
 
+/*
+  Calculate a cost of given partial join order
+ 
+  SYNOPSIS
+    get_partial_join_cost()
+      join               IN    Join to use. join->positions holds the
+                               partial join order
+      idx                IN    # tables in the partial join order
+      read_time_arg      OUT   Store read time here 
+      record_count_arg   OUT   Store record count here
+
+  DESCRIPTION
+
+    This is needed for semi-join materialization code. The idea is that 
+    we detect sj-materialization after we've put all sj-inner tables into
+    the join prefix
+
+      prefix-tables semi-join-inner-tables  tN
+                                             ^--we're here
+
+    and we'll need to get the cost of prefix-tables prefix again.
+*/
+
+void get_partial_join_cost(JOIN *join, uint n_tables, double *read_time_arg,
+                           double *record_count_arg)
+{
+  double record_count= 1;
+  double read_time= 0.0;
+  for (uint i= join->const_tables; i < n_tables + join->const_tables ; i++)
+  {
+    if (join->best_positions[i].records_read)
+    {
+      record_count *= join->best_positions[i].records_read;
+      read_time += join->best_positions[i].read_time;
+    }
+  }
+  *read_time_arg= read_time;// + record_count / TIME_FOR_COMPARE;
+  *record_count_arg= record_count;
+}
+
+
+
+
 /**
   Find a good, possibly optimal, query execution plan (QEP) by a possibly
   exhaustive search.
@@ -5393,21 +5725,33 @@ best_extension_by_limited_search(JOIN      *join,
   DBUG_EXECUTE("opt", print_plan(join, idx, record_count, read_time, read_time,
                                 "part_plan"););
 
+  table_map allowed_tables= ~(table_map)0;
+  if (join->emb_sjm_nest)
+    allowed_tables= join->emb_sjm_nest->sj_inner_tables & ~join->const_table_map;
+
   for (JOIN_TAB **pos= join->best_ref + idx ; (s= *pos) ; pos++)
   {
     table_map real_table_bit= s->table->map;
     if ((remaining_tables & real_table_bit) && 
+        (allowed_tables & real_table_bit) &&
         !(remaining_tables & s->dependent) && 
         (!idx || !check_interleaving_with_nj(s)))
     {
       double current_record_count, current_read_time;
+      POSITION *position= join->positions + idx;
 
       /* Find the best access method from 's' to the current partial plan */
-      best_access_path(join, s, thd, remaining_tables, idx,
-                       record_count, read_time);
+      POSITION loose_scan_pos;
+      best_access_path(join, s, remaining_tables, idx, FALSE, record_count, 
+                       join->positions + idx, &loose_scan_pos);
+
       /* Compute the cost of extending the plan with 's' */
-      current_record_count= record_count * join->positions[idx].records_read;
-      current_read_time=    read_time + join->positions[idx].read_time;
+
+      current_record_count= record_count * position->records_read;
+      current_read_time=    read_time + position->read_time;
+
+      advance_sj_state(join, remaining_tables, s, idx, &current_record_count,
+                       &current_read_time, &loose_scan_pos);
 
       /* Expand only partial plans with lower cost than the best QEP so far */
       if ((current_read_time +
@@ -5421,6 +5765,7 @@ best_extension_by_limited_search(JOIN      *join,
                                         (double) TIME_FOR_COMPARE),
                                        "prune_by_cost"););
         restore_prev_nj_state(s);
+        restore_prev_sj_state(remaining_tables, s, idx);
         continue;
       }
 
@@ -5453,11 +5798,12 @@ best_extension_by_limited_search(JOIN      *join,
                                          current_read_time,
                                          "pruned_by_heuristic"););
           restore_prev_nj_state(s);
+          restore_prev_sj_state(remaining_tables, s, idx);
           continue;
         }
       }
 
-      if ( (search_depth > 1) && (remaining_tables & ~real_table_bit) )
+      if ( (search_depth > 1) && (remaining_tables & ~real_table_bit) & allowed_tables )
       { /* Recursively expand the current partial plan */
         swap_variables(JOIN_TAB*, join->best_ref[idx], *pos);
         if (best_extension_by_limited_search(join,
@@ -5494,6 +5840,7 @@ best_extension_by_limited_search(JOIN      *join,
                                        "full_plan"););
       }
       restore_prev_nj_state(s);
+      restore_prev_sj_state(remaining_tables, s, idx);
     }
   }
   DBUG_RETURN(FALSE);
@@ -5547,8 +5894,9 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
         (!idx|| !check_interleaving_with_nj(s)))
     {
       double records, best;
-      best_access_path(join, s, thd, rest_tables, idx, record_count, 
-                       read_time);
+      POSITION loose_scan_pos;
+      best_access_path(join, s, rest_tables, idx, FALSE, record_count, 
+                       join->positions + idx, &loose_scan_pos);
       records= join->positions[idx].records_read;
       best= join->positions[idx].read_time;
       /*
@@ -5557,6 +5905,9 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
       */
       double current_record_count=record_count*records;
       double current_read_time=read_time+best;
+      advance_sj_state(join, rest_tables, s, idx, &current_record_count, 
+                       &current_read_time, &loose_scan_pos);
+
       if (best_record_count > current_record_count ||
 	  best_read_time > current_read_time ||
 	  (idx == join->const_tables && s->table == join->sort_by_table))
@@ -5575,6 +5926,7 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
 	swap_variables(JOIN_TAB*, join->best_ref[idx], *pos);
       }
       restore_prev_nj_state(s);
+      restore_prev_sj_state(rest_tables, s, idx);
       if (join->select_options & SELECT_STRAIGHT_JOIN)
 	break;				// Don't test all combinations
     }
@@ -5587,13 +5939,14 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
   Find how much space the prevous read not const tables takes in cache.
 */
 
-static void calc_used_field_length(THD *thd, JOIN_TAB *join_tab)
+void calc_used_field_length(THD *thd, JOIN_TAB *join_tab)
 {
   uint null_fields,blobs,fields,rec_length;
   Field **f_ptr,*field;
-  MY_BITMAP *read_set= join_tab->table->read_set;;
+  uint uneven_bit_fields;
+  MY_BITMAP *read_set= join_tab->table->read_set;
 
-  null_fields= blobs= fields= rec_length=0;
+  uneven_bit_fields= null_fields= blobs= fields= rec_length=0;
   for (f_ptr=join_tab->table->field ; (field= *f_ptr) ; f_ptr++)
   {
     if (bitmap_is_set(read_set, field->field_index))
@@ -5605,21 +5958,30 @@ static void calc_used_field_length(THD *thd, JOIN_TAB *join_tab)
 	blobs++;
       if (!(flags & NOT_NULL_FLAG))
 	null_fields++;
+      if (field->type() == MYSQL_TYPE_BIT &&
+          ((Field_bit*)field)->bit_len)
+        uneven_bit_fields++;
     }
   }
-  if (null_fields)
+  if (null_fields || uneven_bit_fields)
     rec_length+=(join_tab->table->s->null_fields+7)/8;
   if (join_tab->table->maybe_null)
     rec_length+=sizeof(my_bool);
   if (blobs)
   {
     uint blob_length=(uint) (join_tab->table->file->stats.mean_rec_length-
-			     (join_tab->table->s->reclength- rec_length));
+			     (join_tab->table->s->reclength-rec_length));
     rec_length+=(uint) max(4,blob_length);
-  }
+  }  
+  /*
+    psergey-todo: why we don't count here rowid that we might need to store
+    when using DuplicateElimination?
+  */
   join_tab->used_fields=fields;
   join_tab->used_fieldlength=rec_length;
   join_tab->used_blobs=blobs;
+  join_tab->used_null_fields= null_fields;
+  join_tab->used_uneven_bit_fields= uneven_bit_fields;
 }
 
 
@@ -5728,8 +6090,24 @@ prev_record_reads(JOIN *join, uint idx, table_map found_ref)
 }
 
 
-/**
-  Set up join struct according to best position.
+/*
+  Set up join struct according to the picked join order in
+  
+  SYNOPSIS
+    get_best_combination()
+      join  The join to process (the picked join order is mainly in
+            join->best_positions)
+
+  DESCRIPTION
+    Setup join structures according the picked join order
+    - finalize semi-join strategy choices (see
+        fix_semijoin_strategies_for_picked_join_order)
+    - create join->join_tab array and put there the JOIN_TABs in the join order
+    - create data structures describing ref access methods.
+
+  RETURN 
+    FALSE  OK
+    TRUE   Out of memory
 */
 
 static bool
@@ -5751,6 +6129,9 @@ get_best_combination(JOIN *join)
   join->full_join=0;
 
   used_tables= OUTER_REF_TABLE_BIT;		// Outer row is already read
+
+  fix_semijoin_strategies_for_picked_join_order(join);
+  
   for (j=join_tab, tablenr=0 ; tablenr < table_count ; tablenr++,j++)
   {
     TABLE *form;
@@ -5764,14 +6145,17 @@ get_best_combination(JOIN *join)
     if (j->type == JT_CONST)
       continue;					// Handled in make_join_stat..
 
+    j->loosescan_match_tab= NULL;  //non-nulls will be set later
     j->ref.key = -1;
     j->ref.key_parts=0;
 
     if (j->type == JT_SYSTEM)
       continue;
-    if (j->keys.is_clear_all() || !(keyuse= join->best_positions[tablenr].key))
+    if (j->keys.is_clear_all() || !(keyuse= join->best_positions[tablenr].key) || 
+        (join->best_positions[tablenr].sj_strategy == SJ_OPT_LOOSE_SCAN))
     {
       j->type=JT_ALL;
+      j->index= join->best_positions[tablenr].loosescan_key;
       if (tablenr != join->const_tables)
 	join->full_join=1;
     }
@@ -5852,7 +6236,7 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
   j->ref.key_err=1;
   j->ref.has_record= FALSE;
   j->ref.null_rejecting= 0;
-  j->ref.use_count= 0;
+  j->ref.disable_cache= FALSE;
   keyuse=org_keyuse;
 
   store_key **ref_key= j->ref.key_copy;
@@ -5889,7 +6273,8 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
 	store_key_item tmp(thd, keyinfo->key_part[i].field,
                            key_buff + maybe_null,
                            maybe_null ?  key_buff : 0,
-                           keyinfo->key_part[i].length, keyuse->val);
+                           keyinfo->key_part[i].length, keyuse->val,
+                           FALSE);
 	if (thd->is_fatal_error)
 	  DBUG_RETURN(TRUE);
 	tmp.copy();
@@ -5970,7 +6355,7 @@ get_store_key(THD *thd, KEYUSE *keyuse, table_map used_tables,
 			    key_buff + maybe_null,
 			    maybe_null ? key_buff : 0,
 			    key_part->length,
-			    keyuse->val);
+			    keyuse->val, FALSE);
 }
 
 /**
@@ -6032,6 +6417,7 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table)
   tables= 1;
   const_tables= 0;
   const_table_map= 0;
+  eliminated_tables= 0;
   tmp_table_param.field_count= tmp_table_param.sum_func_count=
     tmp_table_param.func_count= 0;
   /*
@@ -6047,9 +6433,12 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table)
   row_limit= unit->select_limit_cnt;
   do_send_rows= row_limit ? 1 : 0;
 
-  join_tab->cache.buff=0;			/* No caching */
+  join_tab->use_join_cache= FALSE;
+  join_tab->cache=0;			        /* No caching */
   join_tab->table=temp_table;
+  join_tab->cache_select= 0;
   join_tab->select=0;
+  join_tab->set_select_cond(NULL, __LINE__);
   join_tab->select_cond=0;
   join_tab->quick=0;
   join_tab->type= JT_ALL;			/* Map through all records */
@@ -6064,6 +6453,11 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table)
   join_tab->read_first_record= join_init_read_record;
   join_tab->join= this;
   join_tab->ref.key_parts= 0;
+  join_tab->keep_current_rowid= FALSE;
+  join_tab->flush_weedout_table= join_tab->check_weed_out_table= NULL;
+  join_tab->do_firstmatch= NULL;
+  join_tab->loosescan_match_tab= NULL;
+  join_tab->emb_sj_nest= NULL;
   bzero((char*) &join_tab->read_record,sizeof(join_tab->read_record));
   temp_table->status=0;
   temp_table->null_row=0;
@@ -6155,8 +6549,9 @@ static void add_not_null_conds(JOIN *join)
         {
           Item *item= tab->ref.items[keypart];
           Item *notnull;
-          DBUG_ASSERT(item->type() == Item::FIELD_ITEM);
-          Item_field *not_null_item= (Item_field*)item;
+          Item *real= item->real_item();
+          DBUG_ASSERT(real->type() == Item::FIELD_ITEM);
+          Item_field *not_null_item= (Item_field*)real;
           JOIN_TAB *referred_tab= not_null_item->field->table->reginfo.join_tab;
           /*
             For UPDATE queries such as:
@@ -6178,7 +6573,9 @@ static void add_not_null_conds(JOIN *join)
           DBUG_EXECUTE("where",print_where(notnull,
                                            referred_tab->table->alias,
                                            QT_ORDINARY););
-          add_cond_and_fix(&referred_tab->select_cond, notnull);
+          COND *new_cond= referred_tab->select_cond;
+          add_cond_and_fix(&new_cond, notnull);
+          referred_tab->set_select_cond(new_cond, __LINE__);
         }
       }
     }
@@ -6261,7 +6658,6 @@ add_found_match_trig_cond(JOIN_TAB *tab, COND *cond, JOIN_TAB *root_tab)
     This function can be called only after the execution plan
     has been chosen.
 */
-
 static void
 make_outerjoin_info(JOIN *join)
 {
@@ -6288,6 +6684,9 @@ make_outerjoin_info(JOIN *join)
     }    
     for ( ; embedding ; embedding= embedding->embedding)
     {
+      /* Ignore sj-nests: */
+      if (!embedding->on_expr)
+        continue;
       NESTED_JOIN *nested_join= embedding->nested_join;
       if (!nested_join->counter)
       {
@@ -6303,10 +6702,17 @@ make_outerjoin_info(JOIN *join)
       }
       if (!tab->first_inner)  
         tab->first_inner= nested_join->first_nested;
-      if (++nested_join->counter < nested_join->join_list.elements)
+      if (tab->table->reginfo.not_exists_optimize)
+        tab->first_inner->table->reginfo.not_exists_optimize= 1;         
+      if (++nested_join->counter < nested_join->n_tables)
         break;
       /* Table tab is the last inner table for nested join. */
       nested_join->first_nested->last_inner= tab;
+      if (tab->first_inner->table->reginfo.not_exists_optimize)
+      {
+        for (JOIN_TAB *join_tab= tab->first_inner; join_tab <= tab; join_tab++)
+          join_tab->table->reginfo.not_exists_optimize= 1;
+      } 
     }
   }
   DBUG_VOID_RETURN;
@@ -6322,6 +6728,12 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
   {
     add_not_null_conds(join);
     table_map used_tables;
+    /*
+      Step #1: Extract constant condition
+       - Extract and check the constant part of the WHERE 
+       - Extract constant parts of ON expressions from outer 
+         joins and attach them appropriately.
+    */
     if (cond)                /* Because of QUICK_GROUP_MIN_MAX_SELECT */
     {                        /* there may be a select without a cond. */    
       if (join->tables > 1)
@@ -6330,11 +6742,19 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 	  thd->lex->current_select->master_unit() ==
 	  &thd->lex->unit)		// not upper level SELECT
         join->const_table_map|=RAND_TABLE_BIT;
+
+      /*
+        Extract expressions that depend on constant tables
+        1. Const part of the join's WHERE clause can be checked immediately
+           and if it is not satisfied then the join has empty result
+        2. Constant parts of outer joins' ON expressions must be attached 
+           there inside the triggers.
+      */
       {						// Check const tables
         COND *const_cond=
 	  make_cond_for_table(cond,
                               join->const_table_map,
-                              (table_map) 0);
+                              (table_map) 0, TRUE);
         DBUG_EXECUTE("where",print_where(const_cond,"constants", QT_ORDINARY););
         for (JOIN_TAB *tab= join->join_tab+join->const_tables;
              tab < join->join_tab+join->tables ; tab++)
@@ -6344,18 +6764,19 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
             JOIN_TAB *cond_tab= tab->first_inner;
             COND *tmp= make_cond_for_table(*tab->on_expr_ref,
                                            join->const_table_map,
-                                         (  table_map) 0);
+                                         (  table_map) 0, FALSE);
             if (!tmp)
               continue;
             tmp= new Item_func_trig_cond(tmp, &cond_tab->not_null_compl);
             if (!tmp)
               DBUG_RETURN(1);
             tmp->quick_fix_field();
-            cond_tab->select_cond= !cond_tab->select_cond ? tmp :
-	                            new Item_cond_and(cond_tab->select_cond,
-                                                      tmp);
+            COND *new_cond= !cond_tab->select_cond ? tmp :
+              new Item_cond_and(cond_tab->select_cond, tmp);
+            cond_tab->set_select_cond(new_cond, __LINE__);
             if (!cond_tab->select_cond)
 	      DBUG_RETURN(1);
+            cond_tab->select_cond->update_used_tables();
             cond_tab->select_cond->quick_fix_field();
           }       
         }
@@ -6366,20 +6787,42 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
         }
       }
     }
+
+    /*
+      Step #2: Extract WHERE/ON parts
+    */
+    table_map save_used_tables= 0;
     used_tables=((select->const_tables=join->const_table_map) |
 		 OUTER_REF_TABLE_BIT | RAND_TABLE_BIT);
+    JOIN_TAB *tab;
+    table_map current_map;
     for (uint i=join->const_tables ; i < join->tables ; i++)
     {
-      JOIN_TAB *tab=join->join_tab+i;
+      tab= join->join_tab+i;
       /*
         first_inner is the X in queries like:
         SELECT * FROM t1 LEFT OUTER JOIN (t2 JOIN t3) ON X
       */
       JOIN_TAB *first_inner_tab= tab->first_inner; 
-      table_map current_map= tab->table->map;
+      current_map= tab->table->map;
       bool use_quick_range=0;
       COND *tmp;
 
+      /* 
+        Tables that are within SJ-Materialization nests cannot have their
+        conditions referring to preceding non-const tables.
+         - If we're looking at the first SJM table, reset used_tables
+           to refer to only allowed tables
+      */
+      if (tab->emb_sj_nest && tab->emb_sj_nest->sj_mat_info && 
+          tab->emb_sj_nest->sj_mat_info->is_used &&
+          !(used_tables & tab->emb_sj_nest->sj_inner_tables))
+      {
+        save_used_tables= used_tables;
+        used_tables= join->const_table_map | OUTER_REF_TABLE_BIT | 
+                     RAND_TABLE_BIT;
+      }
+
       /*
 	Following force including random expression in last table condition.
 	It solve problem with select like SELECT * FROM t1 WHERE rand() > 0.5
@@ -6409,7 +6852,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 
       tmp= NULL;
       if (cond)
-        tmp= make_cond_for_table(cond,used_tables,current_map);
+        tmp= make_cond_for_table(cond, used_tables, current_map, FALSE);
       if (cond && !tmp && tab->quick)
       {						// Outer join
         if (tab->type != JT_ALL)
@@ -6434,7 +6877,8 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
         }
 
       }
-      if (tmp || !cond || tab->type == JT_REF)
+      if (tmp || !cond || tab->type == JT_REF || tab->type == JT_REF_OR_NULL ||
+          tab->type == JT_EQ_REF || first_inner_tab)
       {
         DBUG_EXECUTE("where",print_where(tmp,tab->table->alias, QT_ORDINARY););
 	SQL_SELECT *sel= tab->select= ((SQL_SELECT*)
@@ -6456,15 +6900,16 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
           */
           if (!(tmp= add_found_match_trig_cond(first_inner_tab, tmp, 0)))
             DBUG_RETURN(1);
-          tab->select_cond=sel->cond=tmp;
+          sel->cond= tmp;
+          tab->set_select_cond(tmp, __LINE__);
           /* Push condition to storage engine if this is enabled
              and the condition is not guarded */
           tab->table->file->pushed_cond= NULL;
-	  if (thd->variables.optimizer_switch &
-              OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN)
+	  if ((thd->variables.optimizer_switch &
+              OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN) && !first_inner_tab)
           {
             COND *push_cond= 
-              make_cond_for_table(tmp, current_map, current_map);
+              make_cond_for_table(tmp, current_map, current_map, FALSE);
             if (push_cond)
             {
               /* Push condition to handler */
@@ -6474,7 +6919,10 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
           }
         }
         else
-          tab->select_cond= sel->cond= NULL;
+        {
+          sel->cond= NULL;
+          tab->set_select_cond(NULL, __LINE__);
+        }
 
 	sel->head=tab->table;
         DBUG_EXECUTE("where",print_where(tmp,tab->table->alias, QT_ORDINARY););
@@ -6516,7 +6964,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 	  */
 
 	  if ((cond &&
-              !tab->keys.is_subset(tab->const_keys) && i > 0) ||
+               (!tab->keys.is_subset(tab->const_keys) && i > 0)) ||
 	      (!tab->const_keys.is_clear_all() && i == join->const_tables &&
 	       join->unit->select_limit_cnt <
 	       join->best_positions[i].records_read &&
@@ -6541,7 +6989,8 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 				       (join->select_options &
 					OPTION_FOUND_ROWS ?
 					HA_POS_ERROR :
-					join->unit->select_limit_cnt), 0) < 0)
+					join->unit->select_limit_cnt), 0,
+                                        FALSE) < 0)
             {
 	      /*
 		Before reporting "Impossible WHERE" for the whole query
@@ -6554,7 +7003,8 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
                                          (join->select_options &
                                           OPTION_FOUND_ROWS ?
                                           HA_POS_ERROR :
-                                          join->unit->select_limit_cnt),0) < 0)
+                                          join->unit->select_limit_cnt),0,
+                                          FALSE) < 0)
 		DBUG_RETURN(1);			// Impossible WHERE
             }
             else
@@ -6581,19 +7031,20 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 	      2 : 1;
 	    sel->read_tables= used_tables & ~current_map;
 	  }
-	  if (i != join->const_tables && tab->use_quick != 2)
+	  if (i != join->const_tables && tab->use_quick != 2 &&
+              !tab->first_inner)
 	  {					/* Read with cache */
 	    if (cond &&
                 (tmp=make_cond_for_table(cond,
 					 join->const_table_map |
 					 current_map,
-					 current_map)))
+					 current_map, FALSE)))
 	    {
               DBUG_EXECUTE("where",print_where(tmp,"cache", QT_ORDINARY););
-	      tab->cache.select=(SQL_SELECT*)
+	      tab->cache_select=(SQL_SELECT*)
 		thd->memdup((uchar*) sel, sizeof(SQL_SELECT));
-	      tab->cache.select->cond=tmp;
-	      tab->cache.select->read_tables=join->const_table_map;
+	      tab->cache_select->cond=tmp;
+	      tab->cache_select->read_tables=join->const_table_map;
 	    }
 	  }
 	}
@@ -6617,7 +7068,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
           JOIN_TAB *cond_tab= join_tab->first_inner;
           COND *tmp= make_cond_for_table(*join_tab->on_expr_ref,
                                          join->const_table_map,
-                                         (table_map) 0);
+                                         (table_map) 0, FALSE);
           if (!tmp)
             continue;
           tmp= new Item_func_trig_cond(tmp, &cond_tab->not_null_compl);
@@ -6629,6 +7080,9 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
           if (!cond_tab->select_cond)
 	    DBUG_RETURN(1);
           cond_tab->select_cond->quick_fix_field();
+          cond_tab->select_cond->update_used_tables();
+          if (cond_tab->select)
+            cond_tab->select->cond= cond_tab->select_cond; 
         }       
       }
 
@@ -6649,7 +7103,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
           current_map= tab->table->map;
           used_tables2|= current_map;
           COND *tmp_cond= make_cond_for_table(on_expr, used_tables2,
-                                             current_map);
+                                              current_map, FALSE);
           if (tmp_cond)
           {
             JOIN_TAB *cond_tab= tab < first_inner_tab ? first_inner_tab : tab;
@@ -6683,16 +7137,113 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
             if (!cond_tab->select_cond)
 	      DBUG_RETURN(1);
             cond_tab->select_cond->quick_fix_field();
+            cond_tab->select_cond->update_used_tables();
+            if (cond_tab->select)
+              cond_tab->select->cond= cond_tab->select_cond;
           }              
         }
         first_inner_tab= first_inner_tab->first_upper;       
       }
+
+      if (save_used_tables && !(used_tables & 
+                                ~(tab->emb_sj_nest->sj_inner_tables |
+                                  join->const_table_map | PSEUDO_TABLE_BITS)))
+      {
+        /*
+          We have reached the end of semi join nest. That is, the join order
+          looks like this:
+
+           outer_tbl1 SJ-Materialize(inner_tbl1 ... inner_tblN) outer_tbl ...
+                                                               ^
+                                                                \-we're here
+          At this point, we need to produce two conditions
+           - A condition that can be checked when we have all of the sj-inner
+             tables (inner_tbl1 ... inner_tblN). This will be used while doing
+             materialization.
+           - A condition that can be checked when we have all of the tables
+             in the prefix (both inner and outer).
+        */
+        tab->emb_sj_nest->sj_mat_info->join_cond= 
+          cond ?
+             make_cond_after_sjm(cond, cond, save_used_tables, used_tables):
+            NULL;
+        used_tables= save_used_tables | used_tables;
+        save_used_tables= 0;
+      }
+
     }
   }
   DBUG_RETURN(0);
 }
 
 
+/*
+  Determine {after which table we'll produce ordered set} 
+
+  SYNOPSIS
+    make_join_orderinfo()
+     join
+
+   
+  DESCRIPTION 
+    Determine if the set is already ordered for ORDER BY, so it can 
+    disable join cache because it will change the ordering of the results.
+    Code handles sort table that is at any location (not only first after 
+    the const tables) despite the fact that it's currently prohibited.
+    We must disable join cache if the first non-const table alone is
+    ordered. If there is a temp table the ordering is done as a last
+    operation and doesn't prevent join cache usage.
+
+  RETURN
+    Number of table after which the set will be ordered
+    join->tables if we don't need an ordered set 
+*/
+
+static uint make_join_orderinfo(JOIN *join)
+{
+  JOIN_TAB *tab;
+  if (join->need_tmp)
+    return join->tables;
+  tab= join->get_sort_by_join_tab();
+  return tab ? tab-join->join_tab : join->tables;
+}
+
+/*
+  Deny usage of join buffer for the specified table
+
+  SYNOPSIS
+    set_join_cache_denial()
+      tab    join table for which join buffer usage is to be denied  
+     
+  DESCRIPTION
+    The function denies usage of join buffer when joining the table 'tab'.
+    The table is marked as not employing any join buffer. If a join cache
+    object has been already allocated for the table this object is destroyed.
+
+  RETURN
+    none    
+*/
+
+static
+void set_join_cache_denial(JOIN_TAB *join_tab)
+{
+  if (join_tab->cache)
+  {
+    join_tab->cache->free();
+    join_tab->cache= 0;
+  }
+  if (join_tab->use_join_cache)
+  {
+    join_tab->use_join_cache= FALSE;
+    /*
+      It could be only sub_select(). It could not be sub_seject_sjm because we
+      don't do join buffering for the first table in sjm nest. 
+    */
+    join_tab[-1].next_select= sub_select;
+  }
+}
+
+
 /**
   The default implementation of unlock-row method of READ_RECORD,
   used in all access methods.
@@ -6705,7 +7256,6 @@ void rr_unlock_row(st_join_table *tab)
 }
 
 
-
 /**
   Pick the appropriate access method functions
 
@@ -6756,47 +7306,434 @@ pick_table_access_method(JOIN_TAB *tab)
 }
 
 
-static void
-make_join_readinfo(JOIN *join, ulonglong options)
+/* 
+  Revise usage of join buffer for the specified table and the whole nest   
+
+  SYNOPSIS
+    revise_cache_usage()
+      tab    join table for which join buffer usage is to be revised  
+
+  DESCRIPTION
+    The function revise the decision to use a join buffer for the table 'tab'.
+    If this table happened to be among the inner tables of a nested outer join/
+    semi-join the functions denies usage of join buffers for all of them
+
+  RETURN
+    none    
+*/
+
+static
+void revise_cache_usage(JOIN_TAB *join_tab)
+{
+  JOIN_TAB *tab;
+  JOIN_TAB *first_inner;
+
+  if (join_tab->first_inner)
+  {
+    JOIN_TAB *end_tab= join_tab;
+    for (first_inner= join_tab->first_inner; 
+         first_inner;
+         first_inner= first_inner->first_upper)           
+    {
+      for (tab= end_tab-1; tab >= first_inner; tab--)
+        set_join_cache_denial(tab);
+      end_tab= first_inner;
+    }
+  }
+  else if (join_tab->first_sj_inner_tab)
+  {
+    first_inner= join_tab->first_sj_inner_tab;
+    for (tab= join_tab-1; tab >= first_inner; tab--)
+    {
+      if (tab->first_sj_inner_tab == first_inner)
+        set_join_cache_denial(tab);
+    }
+  }
+  else set_join_cache_denial(join_tab);
+}
+
+
+/*
+  end_select-compatible function that writes the record into a sjm temptable
+  
+  SYNOPSIS
+    end_sj_materialize()
+      join            The join 
+      join_tab        Last join table
+      end_of_records  FALSE <=> This call is made to pass another record 
+                                combination
+                      TRUE  <=> EOF (no action)
+
+  DESCRIPTION
+    This function is used by semi-join materialization to capture suquery's
+    resultset and write it into the temptable (that is, materialize it).
+
+  NOTE
+    This function is used only for semi-join materialization. Non-semijoin
+    materialization uses different mechanism.
+
+  RETURN 
+    NESTED_LOOP_OK
+    NESTED_LOOP_ERROR
+*/
+
+static enum_nested_loop_state 
+end_sj_materialize(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
+{
+  int error;
+  THD *thd= join->thd;
+  SJ_MATERIALIZATION_INFO *sjm= join_tab[-1].emb_sj_nest->sj_mat_info;
+  DBUG_ENTER("end_sj_materialize");
+  if (!end_of_records)
+  {
+    TABLE *table= sjm->table;
+
+    List_iterator<Item> it(sjm->sjm_table_cols);
+    Item *item;
+    while ((item= it++))
+    {
+      if (item->is_null())
+        DBUG_RETURN(NESTED_LOOP_OK);
+    }
+    fill_record(thd, table->field, sjm->sjm_table_cols, TRUE, FALSE);
+    if (thd->is_error())
+      DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
+    if ((error= table->file->ha_write_row(table->record[0])))
+    {
+      /* create_myisam_from_heap will generate error if needed */
+      if (table->file->is_fatal_error(error, HA_CHECK_DUP) &&
+          create_internal_tmp_table_from_heap(thd, table,
+                                              sjm->sjm_table_param.start_recinfo, 
+                                              &sjm->sjm_table_param.recinfo, error, 1))
+        DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
+    }
+  }
+  DBUG_RETURN(NESTED_LOOP_OK);
+}
+
+
+/* 
+  Check whether a join buffer can be used to join the specified table   
+
+  SYNOPSIS
+    check_join_cache_usage()
+      tab                 joined table to check join buffer usage for
+      join                join for which the check is performed
+      options             options of the join
+      no_jbuf_after       don't use join buffering after table with this number
+      icp_other_tables_ok OUT TRUE if condition pushdown supports
+                          other tables presence
+
+  DESCRIPTION
+    The function finds out whether the table 'tab' can be joined using a join
+    buffer. This check is performed after the best execution plan for 'join'
+    has been chosen. If the function decides that a join buffer can be employed
+    then it selects the most appropriate join cache object that contains this
+    join buffer.
+    The result of the check and the type of the the join buffer to be used
+    depend on:
+      - the access method to access rows of the joined table
+      - whether the join table is an inner table of an outer join or semi-join
+      - the join cache level set for the query
+      - the join 'options'.
+    In any case join buffer is not used if the number of the joined table is
+    greater than 'no_jbuf_after'. It's also never used if the value of
+    join_cache_level is equal to 0.
+    The other valid settings of join_cache_level lay in the interval 1..8.
+    If join_cache_level==1|2 then join buffer is used only for inner joins
+    with 'JT_ALL' access method.  
+    If join_cache_level==3|4 then join buffer is used for any join operation
+    (inner join, outer join, semi-join) with 'JT_ALL' access method.
+    If 'JT_ALL' access method is used to read rows of the joined table then
+    always a JOIN_CACHE_BNL object is employed.
+    If an index is used to access rows of the joined table and the value of
+    join_cache_level==5|6 then a JOIN_CACHE_BKA object is employed. 
+    If an index is used to access rows of the joined table and the value of
+    join_cache_level==7|8 then a JOIN_CACHE_BKA_UNIQUE object is employed. 
+    If the value of join_cache_level is odd then creation of a non-linked 
+    join cache is forced.
+    If the function decides that a join buffer can be used to join the table
+    'tab' then it sets the value of tab->use_join_buffer to TRUE and assigns
+    the selected join cache object to the field 'cache' of the previous
+    join table. 
+    If the function creates a join cache object it tries to initialize it. The
+    failure to do this results in an invocation of the function that destructs
+    the created object.
+ 
+  NOTES
+    An inner table of a nested outer join or a nested semi-join can be currently
+    joined only when a linked cache object is employed. In these cases setting
+    join cache level to an odd number results in denial of usage of any join
+    buffer when joining the table.
+    For a nested outer join/semi-join, currently, we either use join buffers for
+    all inner tables or for none of them. 
+    Some engines (e.g. Falcon) currently allow to use only a join cache
+    of the type JOIN_CACHE_BKA_UNIQUE when the joined table is accessed through
+    an index. For these engines setting the value of join_cache_level to 5 or 6
+    results in that no join buffer is used to join the table. 
+  
+  TODO
+    Support BKA inside SJ-Materialization nests. When doing this, we'll need
+    to only store sj-inner tables in the join buffer.
+#if 0
+        JOIN_TAB *first_tab= join->join_tab+join->const_tables;
+        uint n_tables= i-join->const_tables;
+        / *
+          We normally put all preceding tables into the join buffer, except
+          for the constant tables.
+          If we're inside a semi-join materialization nest, e.g.
+
+             outer_tbl1  outer_tbl2  ( inner_tbl1, inner_tbl2 ) ...
+                                                       ^-- we're here
+
+          then we need to put into the join buffer only the tables from
+          within the nest.
+        * /
+        if (i >= first_sjm_table && i < last_sjm_table)
+        {
+          n_tables= i - first_sjm_table; // will be >0 if we got here
+          first_tab= join->join_tab + first_sjm_table;
+        }
+#endif
+
+  RETURN
+
+    cache level if cache is used, otherwise returns 0
+*/
+
+static
+uint check_join_cache_usage(JOIN_TAB *tab,
+                            JOIN *join, ulonglong options,
+                            uint no_jbuf_after,
+                            bool *icp_other_tables_ok)
+{
+  uint flags;
+  COST_VECT cost;
+  ha_rows rows;
+  uint bufsz= 4096;
+  JOIN_CACHE *prev_cache=0;
+  uint cache_level= join->thd->variables.join_cache_level;
+  bool force_unlinked_cache= test(cache_level & 1);
+  uint i= tab - join->join_tab;
+
+  *icp_other_tables_ok= TRUE;
+  if (cache_level == 0 || i == join->const_tables)
+    return 0;
+
+  if (options & SELECT_NO_JOIN_CACHE)
+    goto no_join_cache;
+  /* 
+    psergey-todo: why the below when execution code seems to handle the
+    "range checked for each record" case?
+  */
+  if (tab->use_quick == 2)
+    goto no_join_cache;
+  /*
+    Non-linked join buffers can't guarantee one match
+  */
+   if (force_unlinked_cache && 
+       (!tab->type == JT_ALL || cache_level <= 4) && 
+       ((tab->is_inner_table_of_semi_join_with_first_match() &&
+         !tab->is_single_inner_of_semi_join_with_first_match()) ||
+        (tab->is_inner_table_of_outer_join() &&
+         !tab->is_single_inner_of_outer_join())))
+    goto no_join_cache;
+
+  /*
+    Don't use join buffering if we're dictated not to by no_jbuf_after (this
+    ...)
+  */
+  if (!(i <= no_jbuf_after) || tab->loosescan_match_tab || 
+      sj_is_materialize_strategy(join->best_positions[i].sj_strategy))
+    goto no_join_cache;
+
+  for (JOIN_TAB *first_inner= tab->first_inner; first_inner;
+       first_inner= first_inner->first_upper)
+  {
+    if (first_inner != tab && !first_inner->use_join_cache)
+      goto no_join_cache;
+  }
+  if (tab->first_sj_inner_tab && tab->first_sj_inner_tab != tab &&
+      !tab->first_sj_inner_tab->use_join_cache)
+    goto no_join_cache;
+  if (!tab[-1].use_join_cache)
+  {
+    /* 
+      Check whether table tab and the previous one belong to the same nest of
+      inner tables and if so do not use join buffer when joining table tab. 
+    */
+    if (tab->first_inner)
+    {
+      for (JOIN_TAB *first_inner= tab[-1].first_inner;
+           first_inner;
+           first_inner= first_inner->first_upper)
+      {
+        if (first_inner == tab->first_inner)
+          goto no_join_cache;
+      }
+    }
+    else if (tab->first_sj_inner_tab &&
+             tab->first_sj_inner_tab == tab[-1].first_sj_inner_tab)
+      goto no_join_cache; 
+  }       
+
+  if (!force_unlinked_cache)
+    prev_cache= tab[-1].cache;
+
+  switch (tab->type) {
+  case JT_ALL:
+    if (cache_level <= 2 && (tab->first_inner || tab->first_sj_inner_tab))
+      goto no_join_cache;
+    if ((options & SELECT_DESCRIBE) ||
+        (((tab->cache= new JOIN_CACHE_BNL(join, tab, prev_cache))) &&
+         !tab->cache->init()))
+    {
+      *icp_other_tables_ok= FALSE;
+      return cache_level;
+    }
+    goto no_join_cache;
+  case JT_SYSTEM:
+  case JT_CONST:
+  case JT_REF:
+  case JT_EQ_REF:
+    if (cache_level <= 4)
+      return 0;
+    flags= HA_MRR_NO_NULL_ENDPOINTS;
+    if (tab->table->covering_keys.is_set(tab->ref.key))
+      flags|= HA_MRR_INDEX_ONLY;
+    rows= tab->table->file->multi_range_read_info(tab->ref.key, 10, 20,
+                                                  &bufsz, &flags, &cost);
+    if ((rows != HA_POS_ERROR) && !(flags & HA_MRR_USE_DEFAULT_IMPL) &&
+        (!(flags & HA_MRR_NO_ASSOCIATION) || cache_level > 6) &&
+        ((options & SELECT_DESCRIBE) ||
+         (((cache_level <= 6 && 
+           (tab->cache= new JOIN_CACHE_BKA(join, tab, flags, prev_cache))) ||
+	  (cache_level > 6 &&  
+           (tab->cache= new JOIN_CACHE_BKA_UNIQUE(join, tab, flags, prev_cache)))
+           ) && !tab->cache->init())))
+      return cache_level;
+    goto no_join_cache;
+  default : ;
+  }
+
+no_join_cache:
+  if (cache_level>2)
+    revise_cache_usage(tab); 
+  return 0;
+}
+
+
+/*
+  Plan refinement stage: do various setup things for the executor
+
+  SYNOPSIS
+    make_join_readinfo()
+      join           Join being processed
+      options        Join's options (checking for SELECT_DESCRIBE, 
+                     SELECT_NO_JOIN_CACHE)
+      no_jbuf_after  Don't use join buffering after table with this number.
+
+  DESCRIPTION
+    Plan refinement stage: do various set ups for the executioner
+      - set up use of join buffering
+      - push index conditions
+      - increment relevant counters
+      - etc
+
+  RETURN 
+    FALSE - OK
+    TRUE  - Out of memory
+*/
+
+static bool
+make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after)
 {
   uint i;
   bool statistics= test(!(join->select_options & SELECT_DESCRIBE));
-  bool ordered_set= 0;
   bool sorted= 1;
+  uint first_sjm_table= MAX_TABLES;
+  uint last_sjm_table= MAX_TABLES;
   DBUG_ENTER("make_join_readinfo");
 
+
+  if (!join->select_lex->sj_nests.is_empty() &&
+      setup_semijoin_dups_elimination(join, options, no_jbuf_after))
+    DBUG_RETURN(TRUE); /* purecov: inspected */
+
   for (i=join->const_tables ; i < join->tables ; i++)
   {
     JOIN_TAB *tab=join->join_tab+i;
     TABLE *table=tab->table;
+    bool icp_other_tables_ok;
     tab->read_record.table= table;
     tab->read_record.file=table->file;
     tab->read_record.unlock_row= rr_unlock_row;
     tab->next_select=sub_select;		/* normal select */
-
-    /*
-      Determine if the set is already ordered for ORDER BY, so it can 
-      disable join cache because it will change the ordering of the results.
-      Code handles sort table that is at any location (not only first after 
-      the const tables) despite the fact that it's currently prohibited.
-      We must disable join cache if the first non-const table alone is
-      ordered. If there is a temp table the ordering is done as a last
-      operation and doesn't prevent join cache usage.
-    */
-    if (!ordered_set && !join->need_tmp && 
-        (table == join->sort_by_table ||
-         (join->sort_by_table == (TABLE *) 1 && i != join->const_tables)))
-      ordered_set= 1;
-
     tab->sorted= sorted;
     sorted= 0;                                  // only first must be sorted
+    if (tab->loosescan_match_tab)
+    {
+      if (!(tab->loosescan_buf= (uchar*)join->thd->alloc(tab->
+                                                         loosescan_key_len)))
+        return TRUE; /* purecov: inspected */
+      tab->sorted= TRUE;
+    }
+    if (sj_is_materialize_strategy(join->best_positions[i].sj_strategy))
+    {
+      /* This is a start of semi-join nest */
+      first_sjm_table= i;
+      last_sjm_table= i + join->best_positions[i].n_sj_tables;
+      if (i == join->const_tables)
+        join->first_select= sub_select_sjm;
+      else
+       tab[-1].next_select= sub_select_sjm;
+
+      if (setup_sj_materialization(tab))
+        return TRUE;
+    }
     table->status=STATUS_NO_RECORD;
     pick_table_access_method (tab);
 
     switch (tab->type) {
+    case JT_SYSTEM:				// Only happens with left join 
+    case JT_CONST:				// Only happens with left join
+      /* Only happens with outer joins */
+      tab->read_first_record= tab->type == JT_SYSTEM ?
+                                join_read_system :join_read_const;
+      if (check_join_cache_usage(tab, join, options, no_jbuf_after,
+                                 &icp_other_tables_ok))
+      {
+        tab->use_join_cache= TRUE;
+        tab[-1].next_select=sub_select_cache;
+      }
+      else
+      if (table->covering_keys.is_set(tab->ref.key) &&
+          !table->no_keyread)
+      {
+        table->key_read=1;
+        table->file->extra(HA_EXTRA_KEYREAD);
+      }
+      else
+        push_index_cond(tab, tab->ref.key, icp_other_tables_ok);
+        break;
     case JT_EQ_REF:
       tab->read_record.unlock_row= join_read_key_unlock_row;
       /* fall through */
+      if (check_join_cache_usage(tab, join, options, no_jbuf_after,
+                                 &icp_other_tables_ok))
+      {
+        tab->use_join_cache= TRUE;
+        tab[-1].next_select=sub_select_cache;
+      }
+      if (table->covering_keys.is_set(tab->ref.key) &&
+	  !table->no_keyread)
+      {
+	table->key_read=1;
+	table->file->extra(HA_EXTRA_KEYREAD);
+      }
+      else
+        push_index_cond(tab, tab->ref.key, icp_other_tables_ok );
+      break;
     case JT_REF_OR_NULL:
     case JT_REF:
       if (tab->select)
@@ -6806,26 +7743,30 @@ make_join_readinfo(JOIN *join, ulonglong options)
       }
       delete tab->quick;
       tab->quick=0;
-      /* fall through */
-    case JT_CONST:				// Only happens with left join
+      if (check_join_cache_usage(tab, join, options, no_jbuf_after,
+                                 &icp_other_tables_ok))
+      {
+        tab->use_join_cache= TRUE;
+        tab[-1].next_select=sub_select_cache;
+      }
       if (table->covering_keys.is_set(tab->ref.key) &&
 	  !table->no_keyread)
-        table->set_keyread(TRUE);
+        table->enable_keyread();
+      else
+        push_index_cond(tab, tab->ref.key, icp_other_tables_ok);
       break;
     case JT_ALL:
       /*
 	If previous table use cache
         If the incoming data set is already sorted don't use cache.
+        Also don't use cache if this is the first table in semi-join
+          materialization nest.
       */
-      if (i != join->const_tables && !(options & SELECT_NO_JOIN_CACHE) &&
-          tab->use_quick != 2 && !tab->first_inner && !ordered_set)
+      if (check_join_cache_usage(tab, join, options, no_jbuf_after,
+                                 &icp_other_tables_ok))
       {
-	if ((options & SELECT_DESCRIBE) ||
-	    !join_init_cache(join->thd,join->join_tab+join->const_tables,
-			     i-join->const_tables))
-	{
-	  tab[-1].next_select=sub_select_cache; /* Patch previous */
-	}
+        tab->use_join_cache= TRUE;
+        tab[-1].next_select=sub_select_cache;
       }
       /* These init changes read_record */
       if (tab->use_quick == 2)
@@ -6849,7 +7790,10 @@ make_join_readinfo(JOIN *join, ulonglong options)
 	  {
 	    join->thd->server_status|=SERVER_QUERY_NO_INDEX_USED;
 	    if (statistics)
+	    {
 	      status_var_increment(join->thd->status_var.select_scan_count);
+	      join->thd->query_plan_flags|= QPLAN_FULL_SCAN;
+	    }
 	  }
 	}
 	else
@@ -6863,7 +7807,10 @@ make_join_readinfo(JOIN *join, ulonglong options)
 	  {
 	    join->thd->server_status|=SERVER_QUERY_NO_INDEX_USED;
 	    if (statistics)
+	    {
 	      status_var_increment(join->thd->status_var.select_full_join_count);
+	      join->thd->query_plan_flags|= QPLAN_FULL_JOIN;
+	    }
 	  }
 	}
 	if (!table->no_keyread)
@@ -6871,7 +7818,7 @@ make_join_readinfo(JOIN *join, ulonglong options)
 	  if (tab->select && tab->select->quick &&
               tab->select->quick->index != MAX_KEY && //not index_merge
 	      table->covering_keys.is_set(tab->select->quick->index))
-            table->set_keyread(TRUE);
+            table->enable_keyread();
 	  else if (!table->covering_keys.is_clear_all() &&
 		   !(tab->select && tab->select->quick))
 	  {					// Only read index tree
@@ -6891,10 +7838,12 @@ make_join_readinfo(JOIN *join, ulonglong options)
 	    tab->type=JT_NEXT;		// Read with index_first / index_next
 	  }
 	}
+        if (tab->select && tab->select->quick &&
+            tab->select->quick->index != MAX_KEY && ! tab->table->key_read)
+          push_index_cond(tab, tab->select->quick->index, icp_other_tables_ok);
       }
       break;
     case JT_FT:
-    case JT_SYSTEM: 
       break;
     default:
       DBUG_PRINT("error",("Table type %d found",tab->type)); /* purecov: deadcode */
@@ -6905,7 +7854,32 @@ make_join_readinfo(JOIN *join, ulonglong options)
     }
   }
   join->join_tab[join->tables-1].next_select=0; /* Set by do_select */
-  DBUG_VOID_RETURN;
+  
+/*
+    If a join buffer is used to join a table the ordering by an index
+    for the first non-constant table cannot be employed anymore.
+  */
+  for (i=join->const_tables ; i < join->tables ; i++)
+  {
+    JOIN_TAB *tab=join->join_tab+i;
+    if (tab->use_join_cache)
+    {
+      JOIN_TAB *sort_by_tab= join->get_sort_by_join_tab();
+      if (sort_by_tab && !join->need_tmp)
+      {
+        join->need_tmp= 1;
+        join->simple_order= join->simple_group= 0;
+        if (sort_by_tab->type == JT_NEXT)
+        {
+          sort_by_tab->type= JT_ALL;
+          sort_by_tab->read_first_record= join_init_read_record;
+        }
+      }
+      break;
+    }
+  }
+
+  DBUG_RETURN(FALSE);
 }
 
 
@@ -6952,12 +7926,15 @@ void JOIN_TAB::cleanup()
   select= 0;
   delete quick;
   quick= 0;
-  my_free(cache.buff);
-  cache.buff= 0;
+  if (cache)
+  {
+    cache->free();
+    cache= 0;
+  }
   limit= 0;
   if (table)
   {
-    table->set_keyread(FALSE);
+    table->disable_keyread();
     table->file->ha_index_or_rnd_end();
     /*
       We need to reset this for next select
@@ -6970,6 +7947,72 @@ void JOIN_TAB::cleanup()
 
 
 /**
+  Build a TABLE_REF structure for index lookup in the temporary table
+
+  @param thd             Thread handle
+  @param tmp_key         The temporary table key
+  @param it              The iterator of items for lookup in the key
+
+  @details
+  Build TABLE_REF object for lookup in the key 'tmp_key' using items
+  accessible via item iterator 'it'.
+
+  @retval TRUE  Error
+  @retval FALSE OK
+*/
+
+bool TABLE_REF::tmp_table_index_lookup_init(THD *thd,
+                                            KEY *tmp_key,
+                                            Item_iterator &it,
+                                            bool value)
+{
+  uint tmp_key_parts= tmp_key->key_parts;
+  DBUG_ENTER("TABLE_REF::tmp_table_index_lookup_init");
+
+  key= 0; /* The only temp table index. */
+  key_length= tmp_key->key_length;
+  if (!(key_buff=
+        (uchar*) thd->calloc(ALIGN_SIZE(tmp_key->key_length) * 2)) ||
+      !(key_copy=
+        (store_key**) thd->alloc((sizeof(store_key*) *
+                                  (tmp_key_parts + 1)))) ||
+      !(items=
+        (Item**) thd->alloc(sizeof(Item*) * tmp_key_parts)))
+    DBUG_RETURN(TRUE);
+
+  key_buff2= key_buff + ALIGN_SIZE(tmp_key->key_length);
+
+  KEY_PART_INFO *cur_key_part= tmp_key->key_part;
+  store_key **ref_key= key_copy;
+  uchar *cur_ref_buff= key_buff;
+
+  it.open();
+  for (uint i= 0; i < tmp_key_parts; i++, cur_key_part++, ref_key++)
+  {
+    Item *item= it.next();
+    DBUG_ASSERT(item);
+    items[i]= item;
+    int null_count= test(cur_key_part->field->real_maybe_null());
+    *ref_key= new store_key_item(thd, cur_key_part->field,
+                                 /* TIMOUR:
+                                    the NULL byte is taken into account in
+                                    cur_key_part->store_length, so instead of
+                                    cur_ref_buff + test(maybe_null), we could
+                                    use that information instead.
+                                 */
+                                 cur_ref_buff + null_count,
+                                 null_count ? cur_ref_buff : 0,
+                                 cur_key_part->length, items[i], value);
+    cur_ref_buff+= cur_key_part->store_length;
+  }
+  *ref_key= NULL; /* End marker. */
+  key_err= 1;
+  key_parts= tmp_key_parts;
+  DBUG_RETURN(FALSE);
+}
+
+
+/**
   Partially cleanup JOIN after it has executed: close index or rnd read
   (table cursors), free quick selects.
 
@@ -7229,8 +8272,6 @@ eq_ref_table(JOIN *join, ORDER *start_order, JOIN_TAB *tab)
 static bool
 only_eq_ref_tables(JOIN *join,ORDER *order,table_map tables)
 {
-  if (specialflag &  SPECIAL_SAFE_MODE)
-    return 0;			// skip this optimize /* purecov: inspected */
   tables&= ~PSEUDO_TABLE_BITS;
   for (JOIN_TAB **tab=join->map2table ; tables ; tab++, tables>>=1)
   {
@@ -8226,6 +9267,7 @@ static COND *build_equal_items(THD *thd, COND *cond,
     if (cond->type() == Item::COND_ITEM &&
         ((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
       cond_equal= &((Item_cond_and*) cond)->cond_equal;
+
     else if (cond->type() == Item::FUNC_ITEM &&
              ((Item_cond*) cond)->functype() == Item_func::MULT_EQUAL_FUNC)
     {
@@ -8310,6 +9352,15 @@ static int compare_fields_by_table_order(Item_field *field1,
 }
 
 
+static TABLE_LIST* embedding_sjm(Item_field *item_field)
+{
+  TABLE_LIST *nest= item_field->field->table->pos_in_table_list->embedding;
+  if (nest && nest->sj_mat_info && nest->sj_mat_info->is_used)
+    return nest;
+  else
+    return NULL;
+}
+
 /**
   Generate minimal set of simple equalities equivalent to a multiple equality.
 
@@ -8343,6 +9394,23 @@ static int compare_fields_by_table_order(Item_field *field1,
     So only t1.a=t3.c should be left in the lower level.
     If cond is equal to 0, then not more then one equality is generated
     and a pointer to it is returned as the result of the function.
+    
+    Equality substutution and semi-join materialization nests:
+
+       In case join order looks like this:
+
+          outer_tbl1 outer_tbl2 SJM (inner_tbl1 inner_tbl2) outer_tbl3 
+
+        We must not construct equalities like 
+
+           outer_tbl1.col = inner_tbl1.col 
+
+        because they would get attached to inner_tbl1 and will get evaluated
+        during materialization phase, when we don't have current value of
+        outer_tbl1.col.
+
+        Item_equal::get_first() also takes similar measures for dealing with
+        equality substitution in presense of SJM nests.
 
   @return
     - The condition with generated simple equalities or
@@ -8350,8 +9418,8 @@ static int compare_fields_by_table_order(Item_field *field1,
     - 0, otherwise.
 */
 
-static Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels,
-                                  Item_equal *item_equal)
+Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels,
+                           Item_equal *item_equal)
 {
   List<Item> eq_list;
   Item_func_eq *eq_item= 0;
@@ -8360,18 +9428,46 @@ static Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels,
   Item *item_const= item_equal->get_const();
   Item_equal_iterator it(*item_equal);
   Item *head;
+  DBUG_ASSERT(!cond || cond->type() == Item::COND_ITEM);
+
+  TABLE_LIST *current_sjm= NULL;
+  Item *current_sjm_head= NULL;
+
+  /* 
+    Pick the "head" item: the constant one or the first in the join order
+    that's not inside some SJM nest.
+  */
   if (item_const)
     head= item_const;
   else
   {
-    head= item_equal->get_first();
+    TABLE_LIST *emb_nest;
+    Item_field *item_field;
+    head= item_field= item_equal->get_first(NULL);
     it++;
+    if ((emb_nest= embedding_sjm(item_field)))
+    {
+      current_sjm= emb_nest;
+      current_sjm_head= head;
+    }
   }
+
   Item_field *item_field;
+  /*
+    For each other item, generate "item=head" equality (except the tables that 
+    are within SJ-Materialization nests, for those "head" is defined
+    differently)
+  */
   while ((item_field= it++))
   {
     Item_equal *upper= item_field->find_item_equal(upper_levels);
     Item_field *item= item_field;
+    TABLE_LIST *field_sjm= embedding_sjm(item_field);
+
+    /* 
+      Check if "item_field=head" equality is already guaranteed to be true 
+      on upper AND-levels.
+    */
     if (upper)
     { 
       if (item_const && upper->get_const())
@@ -8386,43 +9482,58 @@ static Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels,
         }
       }
     }
-    if (item == item_field)
+    
+    bool produce_equality= test(item == item_field);
+    if (!item_const && field_sjm && field_sjm != current_sjm)
+    {
+      /* Entering an SJM nest */
+      current_sjm_head= item_field;
+      if (!field_sjm->sj_mat_info->is_sj_scan)
+        produce_equality= FALSE;
+    }
+
+    if (produce_equality)
     {
       if (eq_item)
         eq_list.push_back(eq_item);
-      eq_item= new Item_func_eq(item_field, head);
+      
+      eq_item= new Item_func_eq(item_field, current_sjm? current_sjm_head: head);
+
       if (!eq_item)
         return 0;
       eq_item->set_cmp_func();
       eq_item->quick_fix_field();
-   }
+    }
+    current_sjm= field_sjm;
   }
 
-  if (!cond && !eq_list.head())
+  if (!cond)
   {
-    if (!eq_item)
-      return new Item_int((longlong) 1,1);
-    return eq_item;
-  }
-
-  if (eq_item)
+    if (eq_list.is_empty())
+    {
+      if (eq_item)
+        return eq_item;
+      return new Item_int((longlong) 1, 1);
+    }
+    /* eq_item is always set if list is not empty */
+    DBUG_ASSERT(eq_item);
     eq_list.push_back(eq_item);
-  if (!cond)
-    cond= new Item_cond_and(eq_list);
+    if (!(cond= new Item_cond_and(eq_list)))
+      return 0;                                 // Error
+  }
   else
   {
-    DBUG_ASSERT(cond->type() == Item::COND_ITEM);
-    if (eq_list.elements)
+    if (eq_item)
+      eq_list.push_back(eq_item);
+    if (!eq_list.is_empty())
       ((Item_cond *) cond)->add_at_head(&eq_list);
   }
-
   cond->quick_fix_field();
   cond->update_used_tables();
    
   return cond;
 }
 
-
 /**
   Substitute every field reference in a condition by the best equal field
   and eliminate all multiple equality predicates.
@@ -8456,6 +9567,7 @@ static COND* substitute_for_best_equal_field(COND *cond,
                                              void *table_join_idx)
 {
   Item_equal *item_equal;
+  COND *org_cond= cond;                 // Return this in case of fatal error
 
   if (cond->type() == Item::COND_ITEM)
   {
@@ -8479,7 +9591,7 @@ static COND* substitute_for_best_equal_field(COND *cond,
     Item *item;
     while ((item= li++))
     {
-      Item *new_item =substitute_for_best_equal_field(item, cond_equal,
+      Item *new_item= substitute_for_best_equal_field(item, cond_equal,
                                                       table_join_idx);
       /*
         This works OK with PS/SP re-execution as changes are made to
@@ -8498,6 +9610,8 @@ static COND* substitute_for_best_equal_field(COND *cond,
         // This occurs when eliminate_item_equal() founds that cond is
         // always false and substitutes it with Item_int 0.
         // Due to this, value of item_equal will be 0, so just return it.
+        if (!cond)
+          return org_cond;                      // Error
         if (cond->type() != Item::COND_ITEM)
           break;
       }
@@ -8514,7 +9628,8 @@ static COND* substitute_for_best_equal_field(COND *cond,
     item_equal->sort(&compare_fields_by_table_order, table_join_idx);
     if (cond_equal && cond_equal->current_level.head() == item_equal)
       cond_equal= 0;
-    return eliminate_item_equal(0, cond_equal, item_equal);
+    cond= eliminate_item_equal(0, cond_equal, item_equal);
+    return cond ? cond : org_cond;
   }
   else
     cond->transform(&Item::replace_equal_field, 0);
@@ -8669,37 +9784,6 @@ change_cond_ref_to_const(THD *thd, I_List<COND_CMP> *save_list,
   }
 }
 
-/**
-  Remove additional condition inserted by IN/ALL/ANY transformation.
-
-  @param conds   condition for processing
-
-  @return
-    new conditions
-*/
-
-static Item *remove_additional_cond(Item* conds)
-{
-  if (conds->name == in_additional_cond)
-    return 0;
-  if (conds->type() == Item::COND_ITEM)
-  {
-    Item_cond *cnd= (Item_cond*) conds;
-    List_iterator<Item> li(*(cnd->argument_list()));
-    Item *item;
-    while ((item= li++))
-    {
-      if (item->name == in_additional_cond)
-      {
-	li.remove();
-	if (cnd->argument_list()->elements == 1)
-	  return cnd->argument_list()->head();
-	return conds;
-      }
-    }
-  }
-  return conds;
-}
 
 static void
 propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list,
@@ -8761,7 +9845,6 @@ propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list,
   }
 }
 
-
 /**
   Simplify joins replacing outer joins by inner joins whenever it's
   possible.
@@ -8856,13 +9939,18 @@ propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list,
     consider any plan where one of the inner tables is before some of outer
     tables.
 
-
+  IMPLEMENTATION
     The function is implemented by a recursive procedure.  On the recursive
     ascent all attributes are calculated, all outer joins that can be
     converted are replaced and then all unnecessary braces are removed.
     As join list contains join tables in the reverse order sequential
     elimination of outer joins does not require extra recursive calls.
 
+  SEMI-JOIN NOTES
+    Remove all semi-joins that have are within another semi-join (i.e. have
+    an "ancestor" semi-join nest)
+
+  EXAMPLES
     Here is an example of a join query with invalid cross references:
     @code
       SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t3.a LEFT JOIN t3 ON t3.b=t1.b 
@@ -8872,14 +9960,15 @@ propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list,
   @param join_list   list representation of the join to be converted
   @param conds       conditions to add on expressions for converted joins
   @param top         true <=> conds is the where condition
-
+  @param in_sj       TRUE <=> processing semi-join nest's children
   @return
     - The new condition, if success
     - 0, otherwise
 */
 
 static COND *
-simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
+simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top,
+               bool in_sj)
 {
   TABLE_LIST *table;
   NESTED_JOIN *nested_join;
@@ -8915,7 +10004,7 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
            the corresponding on expression is added to E. 
 	*/ 
         expr= simplify_joins(join, &nested_join->join_list,
-                             expr, FALSE);
+                             expr, FALSE, in_sj || table->sj_on_expr);
 
         if (!table->prep_on_expr || expr != table->on_expr)
         {
@@ -8927,9 +10016,12 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
       }
       nested_join->used_tables= (table_map) 0;
       nested_join->not_null_tables=(table_map) 0;
-      conds= simplify_joins(join, &nested_join->join_list, conds, top);
+      conds= simplify_joins(join, &nested_join->join_list, conds, top, 
+                            in_sj || table->sj_on_expr);
       used_tables= nested_join->used_tables;
       not_null_tables= nested_join->not_null_tables;  
+      /* The following two might become unequal after table elimination: */
+      nested_join->n_tables= nested_join->join_list.elements;
     }
     else
     {
@@ -8955,7 +10047,7 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
       table->outer_join= 0;
       if (table->on_expr)
       {
-        /* Add on expression to the where condition. */
+        /* Add ON expression to the WHERE or upper-level ON condition. */
         if (conds)
         {
           conds= and_conds(conds, table->on_expr);
@@ -9015,7 +10107,8 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
           For example it might happen if RAND() function
           is used in JOIN ON clause.
 	*/  
-        if (!((prev_table->on_expr->used_tables() & ~RAND_TABLE_BIT) &
+        if (!((prev_table->on_expr->used_tables() &
+               ~(OUTER_REF_TABLE_BIT | RAND_TABLE_BIT)) &
               ~prev_used_tables))
           prev_table->dep_tables|= used_tables;
       }
@@ -9023,14 +10116,25 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
     prev_table= table;
   }
     
-  /* Flatten nested joins that can be flattened. */
+  /* 
+    Flatten nested joins that can be flattened.
+    no ON expression and not a semi-join => can be flattened.
+  */
   TABLE_LIST *right_neighbor= NULL;
   li.rewind();
   while ((table= li++))
   {
     bool fix_name_res= FALSE;
     nested_join= table->nested_join;
-    if (nested_join && !table->on_expr)
+    if (table->sj_on_expr && !in_sj)
+    {
+       /*
+         If this is a semi-join that is not contained within another semi-join, 
+         leave it intact (otherwise it is flattened)
+       */
+      join->select_lex->sj_nests.push_back(table);
+    }
+    else if (nested_join && !table->on_expr)
     {
       TABLE_LIST *tbl;
       List_iterator<TABLE_LIST> it(nested_join->join_list);
@@ -9057,8 +10161,8 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
 /**
   Assign each nested join structure a bit in nested_join_map.
 
-    Assign each nested join structure (except "confluent" ones - those that
-    embed only one element) a bit in nested_join_map.
+    Assign each nested join structure (except ones that embed only one element
+    and so are redundant) a bit in nested_join_map.
 
   @param join          Join being processed
   @param join_list     List of tables
@@ -9067,7 +10171,7 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
 
   @note
     This function is called after simplify_joins(), when there are no
-    redundant nested joins, #non_confluent_nested_joins <= #tables_in_join so
+    redundant nested joins, #non_redundant_nested_joins <= #tables_in_join so
     we will not run out of bits in nested_join_map.
 
   @return
@@ -9094,9 +10198,11 @@ static uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list,
             with anything)
         2. we could run out bits in nested_join_map otherwise.
       */
-      if (nested_join->join_list.elements != 1)
+      if (nested_join->n_tables != 1)
       {
-        nested_join->nj_map= (nested_join_map) 1 << first_unused++;
+        /* Don't assign bits to sj-nests */
+        if (table->on_expr)
+          nested_join->nj_map= (nested_join_map) 1 << first_unused++;
         first_unused= build_bitmap_for_nested_joins(&nested_join->join_list,
                                                     first_unused);
       }
@@ -9116,21 +10222,28 @@ static uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list,
                     tables which will be ignored.
 */
 
-static void reset_nj_counters(List<TABLE_LIST> *join_list)
+static uint reset_nj_counters(JOIN *join, List<TABLE_LIST> *join_list)
 {
   List_iterator<TABLE_LIST> li(*join_list);
   TABLE_LIST *table;
   DBUG_ENTER("reset_nj_counters");
+  uint n=0;
   while ((table= li++))
   {
     NESTED_JOIN *nested_join;
+    bool is_eliminated_nest= FALSE;
     if ((nested_join= table->nested_join))
     {
       nested_join->counter= 0;
-      reset_nj_counters(&nested_join->join_list);
+      nested_join->n_tables= reset_nj_counters(join, &nested_join->join_list);
+      if (!nested_join->n_tables)
+        is_eliminated_nest= TRUE;
     }
+    if ((!table->table && !is_eliminated_nest) || 
+        (table->table && (table->table->map & ~join->eliminated_tables)))
+      n++;
   }
-  DBUG_VOID_RETURN;
+  DBUG_RETURN(n);
 }
 
 
@@ -9242,28 +10355,31 @@ static bool check_interleaving_with_nj(JOIN_TAB *next_tab)
     Do update counters for "pairs of brackets" that we've left (marked as
     X,Y,Z in the above picture)
   */
-  for (;next_emb; next_emb= next_emb->embedding)
+  for (;next_emb && next_emb != join->emb_sjm_nest; next_emb= next_emb->embedding)
   {
-    next_emb->nested_join->counter++;
-    if (next_emb->nested_join->counter == 1)
+    if (!next_emb->sj_on_expr)
     {
-      /* 
-        next_emb is the first table inside a nested join we've "entered". In
-        the picture above, we're looking at the 'X' bracket. Don't exit yet as
-        X bracket might have Y pair bracket.
+      next_emb->nested_join->counter++;
+      if (next_emb->nested_join->counter == 1)
+      {
+        /* 
+          next_emb is the first table inside a nested join we've "entered". In
+          the picture above, we're looking at the 'X' bracket. Don't exit yet as
+          X bracket might have Y pair bracket.
+        */
+        join->cur_embedding_map |= next_emb->nested_join->nj_map;
+      }
+      
+      if (next_emb->nested_join->n_tables !=
+          next_emb->nested_join->counter)
+        break;
+
+      /*
+        We're currently at Y or Z-bracket as depicted in the above picture.
+        Mark that we've left it and continue walking up the brackets hierarchy.
       */
-      join->cur_embedding_map |= next_emb->nested_join->nj_map;
+      join->cur_embedding_map &= ~next_emb->nested_join->nj_map;
     }
-    
-    if (next_emb->nested_join->join_list.elements !=
-        next_emb->nested_join->counter)
-      break;
-
-    /*
-      We're currently at Y or Z-bracket as depicted in the above picture.
-      Mark that we've left it and continue walking up the brackets hierarchy.
-    */
-    join->cur_embedding_map &= ~next_emb->nested_join->nj_map;
   }
   return FALSE;
 }
@@ -9325,33 +10441,127 @@ static void restore_prev_nj_state(JOIN_TAB *last)
 {
   TABLE_LIST *last_emb= last->table->pos_in_table_list->embedding;
   JOIN *join= last->join;
-  for (;last_emb != NULL; last_emb= last_emb->embedding)
+  for (;last_emb != NULL && last_emb != join->emb_sjm_nest; 
+       last_emb= last_emb->embedding)
   {
-    NESTED_JOIN *nest= last_emb->nested_join;
-    DBUG_ASSERT(nest->counter > 0);
-    
-    bool was_fully_covered= nest->is_fully_covered();
-    
-    if (--nest->counter == 0)
-      join->cur_embedding_map&= ~nest->nj_map;
-    
-    if (!was_fully_covered)
-      break;
+    if (!last_emb->sj_on_expr)
+    {
+      NESTED_JOIN *nest= last_emb->nested_join;
+      DBUG_ASSERT(nest->counter > 0);
+      
+      bool was_fully_covered= nest->is_fully_covered();
+      
+      if (--nest->counter == 0)
+        join->cur_embedding_map&= ~nest->nj_map;
+      
+      if (!was_fully_covered)
+        break;
+      
+      join->cur_embedding_map|= nest->nj_map;
+    }
+  }
+}
+
+
+
+/*
+  Change access methods not to use join buffering and adjust costs accordingly
+
+  SYNOPSIS
+    optimize_wo_join_buffering()
+      join
+      first_tab               The first tab to do re-optimization for
+      last_tab                The last tab to do re-optimization for
+      last_remaining_tables   Bitmap of tables that are not in the
+                              [0...last_tab] join prefix
+      first_alt               TRUE <=> Use the LooseScan plan for the first_tab
+      no_jbuf_before          Don't allow to use join buffering before this
+                              table
+      reopt_rec_count     OUT New output record count
+      reopt_cost          OUT New join prefix cost
+      sj_inner_fanout     OUT Fanout in the [first_tab; last_tab] range that
+                              is produced by semi-join-inner tables.
+
+  DESCRIPTION
+    Given a join prefix [0; ... first_tab], change the access to the tables
+    in the [first_tab; last_tab] not to use join buffering. This is needed
+    because some semi-join strategies cannot be used together with the join
+    buffering.
+    In general case the best table order in [first_tab; last_tab] range with
+    join buffering is different from the best order without join buffering but
+    we don't try finding a better join order. (TODO ask Igor why did we
+    chose not to do this in the end. that's actually the difference from the 
+    forking approach)
+*/
+
+void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab, 
+                                table_map last_remaining_tables, 
+                                bool first_alt, uint no_jbuf_before,
+                                double *reopt_rec_count, double *reopt_cost,
+                                double *sj_inner_fanout)
+{
+  double cost, rec_count, inner_fanout= 1.0;
+  table_map reopt_remaining_tables= last_remaining_tables;
+  uint i;
+
+  if (first_tab > join->const_tables)
+  {
+    cost=      join->positions[first_tab - 1].prefix_cost.total_cost();
+    rec_count= join->positions[first_tab - 1].prefix_record_count;
+  }
+  else
+  {
+    cost= 0.0;
+    rec_count= 1;
+  }
+
+  for (i= first_tab; i <= last_tab; i++)
+    reopt_remaining_tables |= join->positions[i].table->table->map;
+
+  for (i= first_tab; i <= last_tab; i++)
+  {
+    JOIN_TAB *rs= join->positions[i].table;
+    POSITION pos, loose_scan_pos;
     
-    join->cur_embedding_map|= nest->nj_map;
+    if ((i == first_tab && first_alt) || join->positions[i].use_join_buffer)
+    {
+      /* Find the best access method that would not use join buffering */
+      best_access_path(join, rs, reopt_remaining_tables, i, 
+                       test(i < no_jbuf_before), rec_count,
+                       &pos, &loose_scan_pos);
+    }
+    else 
+      pos= join->positions[i];
+
+    if ((i == first_tab && first_alt))
+      pos= loose_scan_pos;
+
+    reopt_remaining_tables &= ~rs->table->map;
+    rec_count *= pos.records_read;
+    cost += pos.read_time;
+
+    if (rs->emb_sj_nest)
+      inner_fanout *= pos.records_read;
   }
+
+  *reopt_rec_count= rec_count;
+  *reopt_cost= cost;
+  *sj_inner_fanout= inner_fanout;
 }
 
 
 static COND *
 optimize_cond(JOIN *join, COND *conds, List<TABLE_LIST> *join_list,
-              Item::cond_result *cond_value)
+              Item::cond_result *cond_value, COND_EQUAL **cond_equal)
 {
   THD *thd= join->thd;
   DBUG_ENTER("optimize_cond");
 
   if (!conds)
+  {
     *cond_value= Item::COND_TRUE;
+    build_equal_items(join->thd, NULL, NULL, join_list, cond_equal);
+  }  
   else
   {
     /* 
@@ -9497,7 +10707,17 @@ internal_remove_eq_conds(THD *thd, COND *cond, Item::cond_result *cond_value)
       return (COND*) 0;
     }
   }
-  else if (cond->const_item())
+  else if (cond->const_item() && !cond->is_expensive())
+  /*
+    DontEvaluateMaterializedSubqueryTooEarly:
+    TODO: 
+    Excluding all expensive functions is too restritive we should exclude only
+    materialized IN subquery predicates because they can't yet be evaluated
+    here (they need additional initialization that is done later on).
+
+    The proper way to exclude the subqueries would be to walk the cond tree and
+    check for materialized subqueries there.
+  */
   {
     *cond_value= eval_const_cond(cond) ? Item::COND_TRUE : Item::COND_FALSE;
     return (COND*) 0;
@@ -9790,6 +11010,8 @@ Field *create_tmp_field_from_field(THD *thd, Field *org_field,
       table->s->db_create_options|= HA_OPTION_PACK_RECORD;
     else if (org_field->type() == FIELD_TYPE_DOUBLE)
       ((Field_double *) new_field)->not_fixed= TRUE;
+    new_field->vcol_info= 0;
+    new_field->stored_in_db= TRUE;
   }
   return new_field;
 }
@@ -10073,6 +11295,8 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type,
   case Item::REF_ITEM:
   case Item::NULL_ITEM:
   case Item::VARBIN_ITEM:
+  case Item::CACHE_ITEM:
+  case Item::EXPR_CACHE_ITEM:
     if (make_copy_field)
     {
       DBUG_ASSERT(((Item_result_field*)item)->result_field);
@@ -10103,9 +11327,13 @@ void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps)
   uint field_count= table->s->fields;
   bitmap_init(&table->def_read_set, (my_bitmap_map*) bitmaps, field_count,
               FALSE);
-  bitmap_init(&table->tmp_set,
+  bitmap_init(&table->def_vcol_set,
               (my_bitmap_map*) (bitmaps+ bitmap_buffer_size(field_count)),
               field_count, FALSE);
+  bitmap_init(&table->tmp_set,
+              (my_bitmap_map*) (bitmaps+ 2*bitmap_buffer_size(field_count)),
+              field_count, FALSE);
+
   /* write_set and all_set are copies of read_set */
   table->def_write_set= table->def_read_set;
   table->s->all_set= table->def_read_set;
@@ -10140,11 +11368,6 @@ void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps)
                               be used for name resolving; can be "".
 */
 
-#define STRING_TOTAL_LENGTH_TO_PACK_ROWS 128
-#define AVG_STRING_LENGTH_TO_PACK_ROWS   64
-#define RATIO_TO_PACK_ROWS	       2
-#define MIN_STRING_LENGTH_TO_PACK_ROWS   10
-
 TABLE *
 create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
 		 ORDER *group, bool distinct, bool save_sum_fields,
@@ -10164,7 +11387,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   bool  using_unique_constraint= 0;
   bool  use_packed_rows= 0;
   bool  not_all_columns= !(select_options & TMP_TABLE_ALL_COLUMNS);
-  char  *tmpname,path[FN_REFLEN];
+  char  *tmpname,path[FN_REFLEN], tmp_table_name[50];
   uchar	*pos, *group_buff, *bitmaps;
   uchar *null_flags;
   Field **reg_field, **from_field, **default_field;
@@ -10173,7 +11396,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   KEY *keyinfo;
   KEY_PART_INFO *key_part_info;
   Item **copy_func;
-  MI_COLUMNDEF *recinfo;
+  ENGINE_COLUMNDEF *recinfo;
   /*
     total_uneven_bit_length is uneven bit length for visible fields
     hidden_uneven_bit_length is uneven bit length for hidden fields
@@ -10189,26 +11412,27 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
               (ulong) rows_limit,test(group)));
 
   status_var_increment(thd->status_var.created_tmp_tables);
+  thd->query_plan_flags|= QPLAN_TMP_TABLE;
 
   if (use_temp_pool && !(test_flags & TEST_KEEP_TMP_TABLES))
     temp_pool_slot = bitmap_lock_set_next(&temp_pool);
 
   if (temp_pool_slot != MY_BIT_NONE) // we got a slot
-    sprintf(path, "%s_%lx_%i", tmp_file_prefix,
+    sprintf(tmp_table_name, "%s_%lx_%i", tmp_file_prefix,
             current_pid, temp_pool_slot);
   else
   {
     /* if we run out of slots or we are not using tempool */
-    sprintf(path,"%s%lx_%lx_%x", tmp_file_prefix,current_pid,
+    sprintf(tmp_table_name, "%s%lx_%lx_%x", tmp_file_prefix,current_pid,
             thd->thread_id, thd->tmp_table++);
   }
 
   /*
     No need to change table name to lower case as we are only creating
-    MyISAM or HEAP tables here
+    MyISAM, Aria or HEAP tables here
   */
-  fn_format(path, path, mysql_tmpdir, "", MY_REPLACE_EXT|MY_UNPACK_FILENAME);
-
+  fn_format(path, tmp_table_name, mysql_tmpdir, "",
+            MY_REPLACE_EXT|MY_UNPACK_FILENAME);
 
   if (group)
   {
@@ -10216,6 +11440,12 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
       group=0;					// Can't use group key
     else for (ORDER *tmp=group ; tmp ; tmp=tmp->next)
     {
+      /*
+        marker == 4 means two things:
+        - store NULLs in the key, and
+        - convert BIT fields to 64-bit long, needed because MEMORY tables
+          can't index BIT fields.
+      */
       (*tmp->item)->marker=4;			// Store null in key
       if ((*tmp->item)->max_length >= CONVERT_IF_BIGGER_TO_BLOB)
 	using_unique_constraint=1;
@@ -10254,10 +11484,10 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
                         sizeof(*key_part_info)*(param->group_parts+1),
                         &param->start_recinfo,
                         sizeof(*param->recinfo)*(field_count*2+4),
-                        &tmpname, (uint) strlen(path)+1,
+                        &tmpname, (uint) strlen(tmp_table_name)+1,
                         &group_buff, (group && ! using_unique_constraint ?
                                       param->group_length : 0),
-                        &bitmaps, bitmap_buffer_size(field_count)*2,
+                        &bitmaps, bitmap_buffer_size(field_count)*3,
                         NullS))
   {
     if (temp_pool_slot != MY_BIT_NONE)
@@ -10273,7 +11503,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
     DBUG_RETURN(NULL);				/* purecov: inspected */
   }
   param->items_to_copy= copy_func;
-  strmov(tmpname,path);
+  strmov(tmpname, tmp_table_name);
   /* make table according to fields */
 
   bzero((char*) table,sizeof(*table));
@@ -10302,7 +11532,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   init_tmp_table_share(thd, share, "", 0, tmpname, tmpname);
   share->blob_field= blob_field;
   share->blob_ptr_size= portable_sizeof_char_ptr;
-  share->db_low_byte_first=1;                // True for HEAP and MyISAM
+  share->db_low_byte_first=1;                // True for HEAP, MyISAM and Maria
   share->table_charset= param->table_charset;
   share->primary_key= MAX_KEY;               // Indicate no primary key
   share->keys_for_keyread.init();
@@ -10329,6 +11559,8 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
         if (item->used_tables() & OUTER_REF_TABLE_BIT)
           item->update_used_tables();
         if (type == Item::SUBSELECT_ITEM ||
+            (item->get_cached_item() &&
+             item->get_cached_item()->type() == Item::SUBSELECT_ITEM ) ||
             (item->used_tables() & ~OUTER_REF_TABLE_BIT))
         {
 	  /*
@@ -10414,7 +11646,15 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
                          group != 0,
                          !force_copy_fields &&
                            (not_all_columns || group !=0),
-                         item->marker == 4, force_copy_fields,
+                         /*
+                           If item->marker == 4 then we force create_tmp_field
+                           to create a 64-bit longs for BIT fields because HEAP
+                           tables can't index BIT fields directly. We do the same
+                           for distinct, as we want the distinct index to be
+                           usable in this case too.
+                         */
+                         item->marker == 4  || param->bit_fields_as_long, // psergey-feb17
+                         force_copy_fields,
                          param->convert_blob_length);
 
       if (!new_field)
@@ -10436,6 +11676,12 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
         *blob_field++= fieldnr;
 	blob_count++;
       }
+      if (new_field->real_type() == MYSQL_TYPE_STRING ||
+          new_field->real_type() == MYSQL_TYPE_VARCHAR)
+      {
+        string_count++;
+        string_total_length+= new_field->pack_length();
+      }
       if (item->marker == 4 && item->maybe_null)
       {
 	group_null_items++;
@@ -10478,9 +11724,10 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   /* future: storage engine selection can be made dynamic? */
   if (blob_count || using_unique_constraint
       || (thd->variables.big_tables && !(select_options & SELECT_SMALL_RESULT))
-      || (select_options & TMP_TABLE_FORCE_MYISAM))
+      || (select_options & TMP_TABLE_FORCE_MYISAM)
+      || thd->variables.tmp_table_size == 0)
   {
-    share->db_plugin= ha_lock_engine(0, myisam_hton);
+    share->db_plugin= ha_lock_engine(0, TMP_ENGINE_HTON);
     table->file= get_new_handler(share, &table->mem_root,
                                  share->db_type());
     if (group &&
@@ -10497,7 +11744,6 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   if (!table->file)
     goto err;
 
-
   if (!using_unique_constraint)
     reclength+= group_null_items;	// null flag is stored separately
 
@@ -10520,8 +11766,8 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   /* Use packed rows if there is blobs or a lot of space to gain */
   if (blob_count ||
       (string_total_length >= STRING_TOTAL_LENGTH_TO_PACK_ROWS &&
-      (reclength / string_total_length <= RATIO_TO_PACK_ROWS ||
-       string_total_length / string_count >= AVG_STRING_LENGTH_TO_PACK_ROWS)))
+       (reclength / string_total_length <= RATIO_TO_PACK_ROWS ||
+        string_total_length / string_count >= AVG_STRING_LENGTH_TO_PACK_ROWS)))
     use_packed_rows= 1;
 
   share->reclength= reclength;
@@ -10552,7 +11798,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
 
     table->null_flags= (uchar*) table->record[0];
     share->null_fields= null_count+ hidden_null_count;
-    share->null_bytes= null_pack_length;
+    share->null_bytes= share->null_bytes_for_compare= null_pack_length;
   }
   null_count= (blob_count == 0) ? 1 : 0;
   hidden_field_count=param->hidden_field_count;
@@ -10634,13 +11880,16 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
     /* Make entry for create table */
     recinfo->length=length;
     if (field->flags & BLOB_FLAG)
-      recinfo->type= (int) FIELD_BLOB;
+      recinfo->type= FIELD_BLOB;
     else if (use_packed_rows &&
              field->real_type() == MYSQL_TYPE_STRING &&
 	     length >= MIN_STRING_LENGTH_TO_PACK_ROWS)
-      recinfo->type=FIELD_SKIP_ENDSPACE;
+      recinfo->type= FIELD_SKIP_ENDSPACE;
+    else if (field->real_type() == MYSQL_TYPE_VARCHAR)
+      recinfo->type= FIELD_VARCHAR;
     else
-      recinfo->type=FIELD_NORMAL;
+      recinfo->type= FIELD_NORMAL;
+
     if (!--hidden_field_count)
       null_count=(null_count+7) & ~7;		// move to next byte
 
@@ -10677,7 +11926,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
     param->group_buff=group_buff;
     share->keys=1;
     share->uniques= test(using_unique_constraint);
-    table->key_info=keyinfo;
+    table->key_info= table->s->key_info= keyinfo;
     keyinfo->key_part=key_part_info;
     keyinfo->flags=HA_NOSAME;
     keyinfo->usable_key_parts=keyinfo->key_parts= param->group_parts;
@@ -10743,29 +11992,40 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
     */
     DBUG_PRINT("info",("hidden_field_count: %d", param->hidden_field_count));
 
-    null_pack_length-=hidden_null_pack_length;
-    keyinfo->key_parts= ((field_count-param->hidden_field_count)+
-			 test(null_pack_length));
-    table->distinct= 1;
-    share->keys= 1;
     if (blob_count)
     {
-      using_unique_constraint=1;
+      /*
+        Special mode for index creation in MyISAM used to support unique
+        indexes on blobs with arbitrary length. Such indexes cannot be
+        used for lookups.
+      */
+      //// psergey-merge: using_unique_constraint=1;
       share->uniques= 1;
     }
+    null_pack_length-=hidden_null_pack_length;
+    keyinfo->key_parts= ((field_count-param->hidden_field_count)+
+			 (share->uniques ? test(null_pack_length) : 0));
+    table->distinct= 1;
+    share->keys= 1;
     if (!(key_part_info= (KEY_PART_INFO*)
           alloc_root(&table->mem_root,
                      keyinfo->key_parts * sizeof(KEY_PART_INFO))))
       goto err;
     bzero((void*) key_part_info, keyinfo->key_parts * sizeof(KEY_PART_INFO));
-    table->key_info=keyinfo;
+    table->key_info= table->s->key_info= keyinfo;
     keyinfo->key_part=key_part_info;
     keyinfo->flags=HA_NOSAME | HA_NULL_ARE_EQUAL;
-    keyinfo->key_length=(uint16) reclength;
+    keyinfo->key_length= 0;  // Will compute the sum of the parts below.
     keyinfo->name= (char*) "distinct_key";
     keyinfo->algorithm= HA_KEY_ALG_UNDEF;
     keyinfo->rec_per_key=0;
-    if (null_pack_length)
+
+    /*
+      Create an extra field to hold NULL bits so that unique indexes on
+      blobs can distinguish NULL from 0. This extra field is not needed
+      when we do not use UNIQUE indexes for blobs.
+    */
+    if (null_pack_length && share->uniques)
     {
       key_part_info->null_bit=0;
       key_part_info->offset=hidden_null_pack_length;
@@ -10792,6 +12052,24 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
       key_part_info->field=    *reg_field;
       key_part_info->offset=   (*reg_field)->offset(table->record[0]);
       key_part_info->length=   (uint16) (*reg_field)->pack_length();
+      /* TODO:
+        The below method of computing the key format length of the
+        key part is a copy/paste from opt_range.cc, and table.cc.
+        This should be factored out, e.g. as a method of Field.
+        In addition it is not clear if any of the Field::*_length
+        methods is supposed to compute the same length. If so, it
+        might be reused.
+      */
+      key_part_info->store_length= key_part_info->length;
+
+      if ((*reg_field)->real_maybe_null())
+        key_part_info->store_length+= HA_KEY_NULL_LENGTH;
+      if ((*reg_field)->type() == MYSQL_TYPE_BLOB || 
+          (*reg_field)->real_type() == MYSQL_TYPE_VARCHAR)
+        key_part_info->store_length+= HA_KEY_BLOB_LENGTH;
+
+      keyinfo->key_length+= key_part_info->store_length;
+
       key_part_info->type=     (uint8) (*reg_field)->key_type();
       key_part_info->key_type =
 	((ha_base_keytype) key_part_info->type == HA_KEYTYPE_TEXT ||
@@ -10804,13 +12082,15 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
   if (thd->is_fatal_error)				// If end of memory
     goto err;					 /* purecov: inspected */
   share->db_record_offset= 1;
-  if (share->db_type() == myisam_hton)
+  if (share->db_type() == TMP_ENGINE_HTON)
   {
-    if (create_myisam_tmp_table(table, param, select_options,
-                                thd->variables.big_tables))
+    if (create_internal_tmp_table(table, param->keyinfo, param->start_recinfo,
+                                  &param->recinfo, select_options,
+                                  thd->variables.big_tables))
       goto err;
   }
-  if (open_tmp_table(table))
+  DBUG_PRINT("info", ("skip_create_table: %d", (int)param->skip_create_table));
+  if (!param->skip_create_table && open_tmp_table(table))
     goto err;
 
   thd->mem_root= mem_root_save;
@@ -10826,6 +12106,7 @@ err:
 }
 
 
+
 /****************************************************************************/
 
 /**
@@ -10866,7 +12147,7 @@ TABLE *create_virtual_tmp_table(THD *thd, List<Create_field> &field_list)
                         &share, sizeof(*share),
                         &field, (field_count + 1) * sizeof(Field*),
                         &blob_field, (field_count+1) *sizeof(uint),
-                        &bitmaps, bitmap_buffer_size(field_count)*2,
+                        &bitmaps, bitmap_buffer_size(field_count)*3,
                         NullS))
     return 0;
 
@@ -10918,7 +12199,7 @@ TABLE *create_virtual_tmp_table(THD *thd, List<Create_field> &field_list)
   {
     table->null_flags= (uchar*) table->record[0];
     share->null_fields= null_count;
-    share->null_bytes= null_pack_length;
+    share->null_bytes= share->null_bytes_for_compare= null_pack_length;
   }
 
   table->in_use= thd;           /* field->reset() may access table->in_use */
@@ -10968,11 +12249,12 @@ error:
 }
 
 
-static bool open_tmp_table(TABLE *table)
+bool open_tmp_table(TABLE *table)
 {
   int error;
-  if ((error=table->file->ha_open(table, table->s->table_name.str,O_RDWR,
-                                  HA_OPEN_TMP_TABLE | HA_OPEN_INTERNAL_TABLE)))
+  if ((error= table->file->ha_open(table, table->s->table_name.str, O_RDWR,
+                                   HA_OPEN_TMP_TABLE |
+                                   HA_OPEN_INTERNAL_TABLE)))
   {
     table->file->print_error(error,MYF(0)); /* purecov: inspected */
     table->db_stat=0;
@@ -10983,15 +12265,212 @@ static bool open_tmp_table(TABLE *table)
 }
 
 
-static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param,
-				    ulonglong options, my_bool big_tables)
+#if defined(WITH_ARIA_STORAGE_ENGINE) && defined(USE_MARIA_FOR_TMP_TABLES)
+
+/*
+  Create internal (MyISAM or Maria) temporary table
+
+  SYNOPSIS
+    create_internal_tmp_table()
+      table           Table object that descrimes the table to be created
+      keyinfo         Description of the index (there is always one index)
+      start_recinfo   engine's column descriptions
+      recinfo INOUT   End of engine's column descriptions
+      options         Option bits
+   
+  DESCRIPTION
+    Create an internal emporary table according to passed description. The is
+    assumed to have one unique index or constraint.
+
+    The passed array or ENGINE_COLUMNDEF structures must have this form:
+
+      1. 1-byte column (afaiu for 'deleted' flag) (note maybe not 1-byte
+         when there are many nullable columns)
+      2. Table columns
+      3. One free ENGINE_COLUMNDEF element (*recinfo points here)
+   
+    This function may use the free element to create hash column for unique
+    constraint.
+
+   RETURN
+     FALSE - OK
+     TRUE  - Error
+*/
+
+
+bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, 
+                               ENGINE_COLUMNDEF *start_recinfo,
+                               ENGINE_COLUMNDEF **recinfo, 
+                               ulonglong options, my_bool big_tables)
+{
+  int error;
+  MARIA_KEYDEF keydef;
+  MARIA_UNIQUEDEF uniquedef;
+  TABLE_SHARE *share= table->s;
+  MARIA_CREATE_INFO create_info;
+  DBUG_ENTER("create_internal_tmp_table");
+
+  if (share->keys)
+  {						// Get keys for ni_create
+    bool using_unique_constraint=0;
+    HA_KEYSEG *seg= (HA_KEYSEG*) alloc_root(&table->mem_root,
+                                            sizeof(*seg) * keyinfo->key_parts);
+    if (!seg)
+      goto err;
+
+    bzero(seg, sizeof(*seg) * keyinfo->key_parts);
+    if (keyinfo->key_length >= table->file->max_key_length() ||
+	keyinfo->key_parts > table->file->max_key_parts() ||
+	share->uniques)
+    {
+      /* Can't create a key; Make a unique constraint instead of a key */
+      share->keys=    0;
+      share->uniques= 1;
+      using_unique_constraint=1;
+      bzero((char*) &uniquedef,sizeof(uniquedef));
+      uniquedef.keysegs=keyinfo->key_parts;
+      uniquedef.seg=seg;
+      uniquedef.null_are_equal=1;
+
+      /* Create extra column for hash value */
+      bzero((uchar*) *recinfo,sizeof(**recinfo));
+      (*recinfo)->type=   FIELD_CHECK;
+      (*recinfo)->length= MARIA_UNIQUE_HASH_LENGTH;
+      (*recinfo)++;
+      share->reclength+=      MARIA_UNIQUE_HASH_LENGTH;
+    }
+    else
+    {
+      /* Create an unique key */
+      bzero((char*) &keydef,sizeof(keydef));
+      keydef.flag=HA_NOSAME;
+      keydef.keysegs=  keyinfo->key_parts;
+      keydef.seg= seg;
+    }
+    for (uint i=0; i < keyinfo->key_parts ; i++,seg++)
+    {
+      Field *field=keyinfo->key_part[i].field;
+      seg->flag=     0;
+      seg->language= field->charset()->number;
+      seg->length=   keyinfo->key_part[i].length;
+      seg->start=    keyinfo->key_part[i].offset;
+      if (field->flags & BLOB_FLAG)
+      {
+	seg->type=
+	((keyinfo->key_part[i].key_type & FIELDFLAG_BINARY) ?
+	 HA_KEYTYPE_VARBINARY2 : HA_KEYTYPE_VARTEXT2);
+	seg->bit_start= (uint8)(field->pack_length() - share->blob_ptr_size);
+	seg->flag= HA_BLOB_PART;
+	seg->length=0;			// Whole blob in unique constraint
+      }
+      else
+      {
+	seg->type= keyinfo->key_part[i].type;
+        /* Tell handler if it can do suffic space compression */
+	if (field->real_type() == MYSQL_TYPE_STRING &&
+	    keyinfo->key_part[i].length > 32)
+	  seg->flag|= HA_SPACE_PACK;
+      }
+      if (!(field->flags & NOT_NULL_FLAG))
+      {
+	seg->null_bit= field->null_bit;
+	seg->null_pos= (uint) (field->null_ptr - (uchar*) table->record[0]);
+	/*
+	  We are using a GROUP BY on something that contains NULL
+	  In this case we have to tell Aria that two NULL should
+	  on INSERT be regarded at the same value
+	*/
+	if (!using_unique_constraint)
+	  keydef.flag|= HA_NULL_ARE_EQUAL;
+      }
+    }
+  }
+  bzero((char*) &create_info,sizeof(create_info));
+
+  if (big_tables && !(options & SELECT_SMALL_RESULT))
+    create_info.data_file_length= ~(ulonglong) 0;
+
+  if ((error= maria_create(share->table_name.str,
+                           share->reclength < 64 &&
+                           !share->blob_fields ? STATIC_RECORD :
+                           BLOCK_RECORD,
+                           share->keys, &keydef,
+                           (uint) (*recinfo-start_recinfo),
+                           start_recinfo,
+                           share->uniques, &uniquedef,
+                           &create_info,
+                           HA_CREATE_TMP_TABLE)))
+  {
+    table->file->print_error(error,MYF(0));	/* purecov: inspected */
+    table->db_stat=0;
+    goto err;
+  }
+  status_var_increment(table->in_use->status_var.created_tmp_disk_tables);
+  table->in_use->query_plan_flags|= QPLAN_TMP_DISK;
+  share->db_record_offset= 1;
+  DBUG_RETURN(0);
+ err:
+  DBUG_RETURN(1);
+}
+
+
+bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
+                                         ENGINE_COLUMNDEF *start_recinfo,
+                                         ENGINE_COLUMNDEF **recinfo, 
+                                         int error,
+                                         bool ignore_last_dupp_key_error)
+{
+  return create_internal_tmp_table_from_heap2(thd, table, 
+                                              start_recinfo, recinfo, error,
+                                              ignore_last_dupp_key_error,
+                                              maria_hton,
+                                              "converting HEAP to Aria");
+}
+
+#else
+
+/*
+  Create internal (MyISAM or Maria) temporary table
+
+  SYNOPSIS
+    create_internal_tmp_table()
+      table           Table object that descrimes the table to be created
+      keyinfo         Description of the index (there is always one index)
+      start_recinfo   engine's column descriptions
+      recinfo INOUT   End of engine's column descriptions
+      options         Option bits
+   
+  DESCRIPTION
+    Create an internal emporary table according to passed description. The is
+    assumed to have one unique index or constraint.
+
+    The passed array or ENGINE_COLUMNDEF structures must have this form:
+
+      1. 1-byte column (afaiu for 'deleted' flag) (note maybe not 1-byte
+         when there are many nullable columns)
+      2. Table columns
+      3. One free ENGINE_COLUMNDEF element (*recinfo points here)
+   
+    This function may use the free element to create hash column for unique
+    constraint.
+
+   RETURN
+     FALSE - OK
+     TRUE  - Error
+*/
+
+/* Create internal MyISAM temporary table */
+
+bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, 
+                               ENGINE_COLUMNDEF *start_recinfo,
+                               ENGINE_COLUMNDEF **recinfo,
+                               ulonglong options, my_bool big_tables)
 {
   int error;
   MI_KEYDEF keydef;
   MI_UNIQUEDEF uniquedef;
-  KEY *keyinfo=param->keyinfo;
   TABLE_SHARE *share= table->s;
-  DBUG_ENTER("create_myisam_tmp_table");
+  DBUG_ENTER("create_internal_tmp_table");
 
   if (share->keys)
   {						// Get keys for ni_create
@@ -11016,10 +12495,10 @@ static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param,
       uniquedef.null_are_equal=1;
 
       /* Create extra column for hash value */
-      bzero((uchar*) param->recinfo,sizeof(*param->recinfo));
-      param->recinfo->type= FIELD_CHECK;
-      param->recinfo->length=MI_UNIQUE_HASH_LENGTH;
-      param->recinfo++;
+      bzero((uchar*) *recinfo,sizeof(**recinfo));
+      (*recinfo)->type= FIELD_CHECK;
+      (*recinfo)->length=MI_UNIQUE_HASH_LENGTH;
+      (*recinfo)++;
       share->reclength+=MI_UNIQUE_HASH_LENGTH;
     }
     else
@@ -11075,8 +12554,8 @@ static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param,
     create_info.data_file_length= ~(ulonglong) 0;
 
   if ((error=mi_create(share->table_name.str, share->keys, &keydef,
-		       (uint) (param->recinfo-param->start_recinfo),
-		       param->start_recinfo,
+		       (uint) (*recinfo-start_recinfo),
+		       start_recinfo,
 		       share->uniques, &uniquedef,
 		       &create_info,
 		       HA_CREATE_TMP_TABLE)))
@@ -11093,58 +12572,46 @@ static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param,
 }
 
 
-void
-free_tmp_table(THD *thd, TABLE *entry)
-{
-  MEM_ROOT own_root= entry->mem_root;
-  const char *save_proc_info;
-  DBUG_ENTER("free_tmp_table");
-  DBUG_PRINT("enter",("table: %s",entry->alias));
-
-  save_proc_info=thd->proc_info;
-  thd_proc_info(thd, "removing tmp table");
-
-  // Release latches since this can take a long time
-  ha_release_temporary_latches(thd);
-
-  if (entry->file)
-  {
-    if (entry->db_stat)
-      entry->file->ha_drop_table(entry->s->table_name.str);
-    else
-      entry->file->ha_delete_table(entry->s->table_name.str);
-    delete entry->file;
-  }
-
-  /* free blobs */
-  for (Field **ptr=entry->field ; *ptr ; ptr++)
-    (*ptr)->free();
-  free_io_cache(entry);
-
-  if (entry->temp_pool_slot != MY_BIT_NONE)
-    bitmap_lock_clear_bit(&temp_pool, entry->temp_pool_slot);
+/**
+  If a HEAP table gets full, create a MyISAM table and copy all rows to this
+*/
 
-  plugin_unlock(0, entry->s->db_plugin);
+bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
+                                         ENGINE_COLUMNDEF *start_recinfo,
+                                         ENGINE_COLUMNDEF **recinfo, 
+                                         int error,
+                                         bool ignore_last_dupp_key_error)
+{
+  return create_internal_tmp_table_from_heap2(thd, table, 
+                                              start_recinfo, recinfo, error,
+                                              ignore_last_dupp_key_error,
+                                              myisam_hton,
+                                              "converting HEAP to MyISAM");
+}
 
-  free_root(&own_root, MYF(0)); /* the table is allocated in its own root */
-  thd_proc_info(thd, save_proc_info);
+#endif /* WITH_MARIA_STORAGE_ENGINE */
 
-  DBUG_VOID_RETURN;
-}
 
-/**
-  If a HEAP table gets full, create a MyISAM table and copy all rows
-  to this.
+/*
+  If a HEAP table gets full, create a internal table in MyISAM or Maria
+  and copy all rows to this
 */
 
-bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
-			     int error, bool ignore_last_dupp_key_error)
+
+static bool
+create_internal_tmp_table_from_heap2(THD *thd, TABLE *table,
+                                     ENGINE_COLUMNDEF *start_recinfo,
+                                     ENGINE_COLUMNDEF **recinfo, 
+                                     int error,
+                                     bool ignore_last_dupp_key_error,
+                                     handlerton *hton,
+                                     const char *proc_info)
 {
   TABLE new_table;
   TABLE_SHARE share;
   const char *save_proc_info;
   int write_err;
-  DBUG_ENTER("create_myisam_from_heap");
+  DBUG_ENTER("create_internal_tmp_table_from_heap2");
 
   if (table->s->db_type() != heap_hton || 
       error != HA_ERR_RECORD_FILE_FULL)
@@ -11156,31 +12623,29 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
     table->file->print_error(error, MYF(ME_FATALERROR));
     DBUG_RETURN(1);
   }
-
-  // Release latches since this can take a long time
-  ha_release_temporary_latches(thd);
-
   new_table= *table;
   share= *table->s;
   new_table.s= &share;
-  new_table.s->db_plugin= ha_lock_engine(thd, myisam_hton);
+  new_table.s->db_plugin= ha_lock_engine(thd, hton);
   if (!(new_table.file= get_new_handler(&share, &new_table.mem_root,
                                         new_table.s->db_type())))
     DBUG_RETURN(1);				// End of memory
 
   save_proc_info=thd->proc_info;
-  thd_proc_info(thd, "converting HEAP to MyISAM");
+  thd_proc_info(thd, proc_info);
 
-  if (create_myisam_tmp_table(&new_table, param,
-			      thd->lex->select_lex.options | thd->variables.option_bits,
-                              thd->variables.big_tables))
+  if (create_internal_tmp_table(&new_table, table->key_info, start_recinfo,
+                                recinfo, thd->lex->select_lex.options | 
+			        thd->variables.option_bits,
+                                thd->variables.big_tables))
     goto err2;
   if (open_tmp_table(&new_table))
     goto err1;
   if (table->file->indexes_are_disabled())
     new_table.file->ha_disable_indexes(HA_KEY_SWITCH_ALL);
   table->file->ha_index_or_rnd_end();
-  table->file->ha_rnd_init(1);
+  if (table->file->ha_rnd_init_with_error(1))
+    DBUG_RETURN(1);
   if (table->no_rows)
   {
     new_table.file->extra(HA_EXTRA_NO_ROWS);
@@ -11205,7 +12670,7 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
     is safe as this is a temporary MyISAM table without timestamp/autoincrement
     or partitioning.
   */
-  while (!table->file->rnd_next(new_table.record[1]))
+  while (!table->file->ha_rnd_next(new_table.record[1]))
   {
     write_err= new_table.file->ha_write_row(new_table.record[1]);
     DBUG_EXECUTE_IF("raise_error", write_err= HA_ERR_FOUND_DUPP_KEY ;);
@@ -11226,7 +12691,7 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
   delete table->file;
   table->file=0;
   plugin_unlock(0, table->s->db_plugin);
-  share.db_plugin= my_plugin_lock(0, &share.db_plugin);
+  share.db_plugin= my_plugin_lock(0, share.db_plugin);
   new_table.s= table->s;                       // Keep old share
   *table= new_table;
   *table->s= share;
@@ -11235,7 +12700,7 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
   table->use_all_columns();
   if (save_proc_info)
     thd_proc_info(thd, (!strcmp(save_proc_info,"Copying to tmp table") ?
-                  "Copying to tmp table on disk" : save_proc_info));
+                     "Copying to tmp table on disk" : save_proc_info));
   DBUG_RETURN(0);
 
  err:
@@ -11253,6 +12718,43 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
 }
 
 
+void
+free_tmp_table(THD *thd, TABLE *entry)
+{
+  MEM_ROOT own_root= entry->mem_root;
+  const char *save_proc_info;
+  DBUG_ENTER("free_tmp_table");
+  DBUG_PRINT("enter",("table: %s",entry->alias));
+
+  save_proc_info=thd->proc_info;
+  thd_proc_info(thd, "removing tmp table");
+
+  if (entry->file)
+  {
+    if (entry->db_stat)
+      entry->file->ha_drop_table(entry->s->table_name.str);
+    else
+      entry->file->ha_delete_table(entry->s->table_name.str);
+    delete entry->file;
+  }
+
+  /* free blobs */
+  for (Field **ptr=entry->field ; *ptr ; ptr++)
+    (*ptr)->free();
+  free_io_cache(entry);
+
+  if (entry->temp_pool_slot != MY_BIT_NONE)
+    bitmap_lock_clear_bit(&temp_pool, entry->temp_pool_slot);
+
+  plugin_unlock(0, entry->s->db_plugin);
+
+  free_root(&own_root, MYF(0)); /* the table is allocated in its own root */
+  thd_proc_info(thd, save_proc_info);
+
+  DBUG_VOID_RETURN;
+}
+
+
 /**
   @details
   Rows produced by a join sweep may end up in a temporary table or be sent
@@ -11342,14 +12844,14 @@ Next_select_func setup_end_select_func(JOIN *join)
   @retval
     -1  if error should be sent
 */
-
 static int
 do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
 {
   int rc= 0;
   enum_nested_loop_state error= NESTED_LOOP_OK;
-  JOIN_TAB *join_tab= NULL;
+  JOIN_TAB *join_tab;
   DBUG_ENTER("do_select");
+  LINT_INIT(join_tab);
   
   join->procedure=procedure;
   join->tmp_table= table;			/* Save for easy recursion */
@@ -11361,7 +12863,14 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
     empty_record(table);
     if (table->group && join->tmp_table_param.sum_func_count &&
         table->s->keys && !table->file->inited)
-      table->file->ha_index_init(0, 0);
+    {
+      int tmp_error;
+      if ((tmp_error= table->file->ha_index_init(0, 0)))
+      {
+        table->file->print_error(tmp_error, MYF(0)); /* purecov: inspected */
+        DBUG_RETURN(-1);                             /* purecov: inspected */
+      }
+    }
   }
   /* Set up select_end */
   Next_select_func end_select= setup_end_select_func(join);
@@ -11394,9 +12903,12 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
     }
     else if (join->send_row_on_empty_set())
     {
-      List<Item> *columns_list= (procedure ? &join->procedure_fields_list :
-                                 fields);
-      rc= join->result->send_data(*columns_list);
+      if (!join->having || join->having->val_int())
+      {
+        List<Item> *columns_list= (procedure ? &join->procedure_fields_list :
+                                   fields);
+        rc= join->result->send_data(*columns_list);
+      }
     }
     /*
       An error can happen when evaluating the conds 
@@ -11409,9 +12921,9 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
   else
   {
     DBUG_ASSERT(join->tables);
-    error= sub_select(join,join_tab,0);
+    error= join->first_select(join,join_tab,0);
     if (error == NESTED_LOOP_OK || error == NESTED_LOOP_NO_MORE_ROWS)
-      error= sub_select(join,join_tab,1);
+      error= join->first_select(join,join_tab,1);
     if (error == NESTED_LOOP_QUERY_LIMIT)
       error= NESTED_LOOP_OK;                    /* select_limit used */
   }
@@ -11468,33 +12980,226 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
 }
 
 
+int rr_sequential_and_unpack(READ_RECORD *info)
+{
+  int error;
+  if ((error= rr_sequential(info)))
+    return error;
+  
+  for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++)
+    (*cp->do_copy)(cp);
+
+  return error;
+}
+
+
+/*
+  Semi-join materialization join function
+
+  SYNOPSIS
+    sub_select_sjm()
+      join            The join
+      join_tab        The first table in the materialization nest
+      end_of_records  FALSE <=> This call is made to pass another record 
+                                combination
+                      TRUE  <=> EOF
+
+  DESCRIPTION
+    This is a join execution function that does materialization of a join
+    suborder before joining it to the rest of the join.
+
+    The table pointed by join_tab is the first of the materialized tables.
+    This function first creates the materialized table and then switches to
+    joining the materialized table with the rest of the join.
+
+    The materialized table can be accessed in two ways:
+     - index lookups
+     - full table scan
+
+  RETURN
+    One of enum_nested_loop_state values
+*/
+
 enum_nested_loop_state
-sub_select_cache(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
+sub_select_sjm(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
 {
+  int res;
   enum_nested_loop_state rc;
 
+  DBUG_ENTER("sub_select_sjm");
+
+  if (!join_tab->emb_sj_nest)
+  {
+    /*
+      We're handling GROUP BY/ORDER BY, this is the first table, and we've
+      actually executed the join already and now we're just reading the
+      result of the join from the temporary table.
+      Bypass to regular join handling.
+      Yes, it would be nicer if sub_select_sjm wasn't called at all in this
+      case but there's no easy way to arrange this.
+    */
+    rc= sub_select(join, join_tab, end_of_records);
+    DBUG_RETURN(rc);
+  }
+
+  SJ_MATERIALIZATION_INFO *sjm= join_tab->emb_sj_nest->sj_mat_info;
+  if (end_of_records)
+  {
+    rc= (*join_tab[sjm->tables - 1].next_select)(join,
+                                                 join_tab + sjm->tables,
+                                                 end_of_records);
+    DBUG_RETURN(rc);
+  }
+  if (!sjm->materialized)
+  {
+    /*
+      Do the materialization. First, put end_sj_materialize after the last
+      inner table so we can catch record combinations of sj-inner tables.
+    */
+    Next_select_func next_func= join_tab[sjm->tables - 1].next_select;
+    join_tab[sjm->tables - 1].next_select= end_sj_materialize;
+
+    /*
+      Now run the join for the inner tables. The first call is to run the
+      join, the second one is to signal EOF (this is essential for some
+      join strategies, e.g. it will make join buffering flush the records)
+    */
+    if ((rc= sub_select(join, join_tab, FALSE)) < 0 ||
+        (rc= sub_select(join, join_tab, TRUE/*EOF*/)) < 0)
+    {
+      join_tab[sjm->tables - 1].next_select= next_func;
+      DBUG_RETURN(rc); /* it's NESTED_LOOP_(ERROR|KILLED)*/
+    }
+    join_tab[sjm->tables - 1].next_select= next_func;
+
+    /*
+      Ok, materialization finished. Initialize the access to the temptable
+    */
+    sjm->materialized= TRUE;
+    join_tab->read_record.read_record= join_no_more_records;
+    if (sjm->is_sj_scan)
+    {
+      /* Initialize full scan */
+      JOIN_TAB *last_tab= join_tab + (sjm->tables - 1);
+      init_read_record(&last_tab->read_record, join->thd,
+                       sjm->table, NULL, TRUE, TRUE, FALSE);
+
+      DBUG_ASSERT(last_tab->read_record.read_record == rr_sequential);
+      last_tab->read_first_record= join_read_record_no_init;
+      last_tab->read_record.copy_field= sjm->copy_field;
+      last_tab->read_record.copy_field_end= sjm->copy_field +
+                                            sjm->sjm_table_cols.elements;
+      last_tab->read_record.read_record= rr_sequential_and_unpack;
+    }
+  }
+
+  if (sjm->is_sj_scan)
+  {
+    /* Do full scan of the materialized table */
+    JOIN_TAB *last_tab= join_tab + (sjm->tables - 1);
+
+    Item *save_cond= last_tab->select_cond;
+    last_tab->set_select_cond(sjm->join_cond, __LINE__);
+
+    rc= sub_select(join, last_tab, end_of_records);
+    last_tab->set_select_cond(save_cond, __LINE__);
+    DBUG_RETURN(rc);
+  }
+  else
+  {
+    /* Do index lookup in the materialized table */
+    if ((res= join_read_key2(join_tab->join->thd, join_tab,
+                             sjm->table, sjm->tab_ref)) == 1)
+      DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
+    if (res || !sjm->in_equality->val_int())
+      DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
+  }
+  rc= (*join_tab[sjm->tables - 1].next_select)(join,
+                                               join_tab + sjm->tables,
+                                               end_of_records);
+  DBUG_RETURN(rc);
+}
+
+
+/*
+  Fill the join buffer with partial records, retrieve all full  matches for them   
+
+  SYNOPSIS
+    sub_select_cache()
+      join     pointer to the structure providing all context info for the query
+      join_tab the first next table of the execution plan to be retrieved
+      end_records  true when we need to perform final steps of the retrieval
+
+  DESCRIPTION
+    For a given table Ti= join_tab from the sequence of tables of the chosen 
+    execution plan T1,...,Ti,...,Tn the function just put the partial record
+    t1,...,t[i-1] into the join buffer associated with table Ti unless this
+    is the last record added into the buffer. In this case,  the function 
+    additionally finds all matching full records for all partial
+    records accumulated in the buffer, after which it cleans the buffer up.
+    If a partial join record t1,...,ti is extended utilizing a dynamic
+    range scan then it is not put into the join buffer. Rather all matching
+    records are found for it at once by the function sub_select.
+
+  NOTES
+    The function implements the algorithmic schema for both Blocked Nested
+    Loop Join and Batched Key Access Join. The difference can be seen only at
+    the level of of the implementation of the put_record and join_records
+    virtual methods for the cache object associated with the join_tab.
+    The put_record method accumulates records in the cache, while the 
+    join_records method builds all matching join records and send them into
+    the output stream.  
+      
+  RETURN
+    return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS.
+*/ 
+
+enum_nested_loop_state
+sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
+{
+  enum_nested_loop_state rc;
+  JOIN_CACHE *cache= join_tab->cache;
+
+  DBUG_ENTER("sub_select_cache");
+
+  /* This function cannot be called if join_tab has no associated join buffer */
+  DBUG_ASSERT(cache != NULL);
+
+  join_tab->cache->reset_join(join);
+
   if (end_of_records)
   {
-    rc= flush_cached_records(join,join_tab,FALSE);
+    rc= cache->join_records(FALSE);
     if (rc == NESTED_LOOP_OK || rc == NESTED_LOOP_NO_MORE_ROWS)
-      rc= sub_select(join,join_tab,end_of_records);
-    return rc;
+      rc= sub_select(join, join_tab, end_of_records);
+    DBUG_RETURN(rc);
   }
-  if (join->thd->killed)		// If aborted by user
+  if (join->thd->killed)
   {
+    /* The user has aborted the execution of the query */
     join->thd->send_kill_message();
-    return NESTED_LOOP_KILLED;                   /* purecov: inspected */
+    DBUG_RETURN(NESTED_LOOP_KILLED);
   }
-  if (join_tab->use_quick != 2 || test_if_quick_select(join_tab) <= 0)
+  if (!test_if_use_dynamic_range_scan(join_tab))
   {
-    if (!store_record_in_cache(&join_tab->cache))
-      return NESTED_LOOP_OK;                     // There is more room in cache
-    return flush_cached_records(join,join_tab,FALSE);
+    if (!cache->put_record())
+      DBUG_RETURN(NESTED_LOOP_OK); 
+    /* 
+      We has decided that after the record we've just put into the buffer
+      won't add any more records. Now try to find all the matching 
+      extensions for all records in the buffer.
+    */ 
+    rc= cache->join_records(FALSE);
+    DBUG_RETURN(rc);
   }
-  rc= flush_cached_records(join, join_tab, TRUE);
+  /*
+     TODO: Check whether we really need the call below and we can't do
+           without it. If it's not the case remove it.
+  */ 
+  rc= cache->join_records(TRUE);
   if (rc == NESTED_LOOP_OK || rc == NESTED_LOOP_NO_MORE_ROWS)
     rc= sub_select(join, join_tab, end_of_records);
-  return rc;
+  DBUG_RETURN(rc);
 }
 
 /**
@@ -11620,14 +13325,24 @@ sub_select_cache(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
 enum_nested_loop_state
 sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
 {
+  DBUG_ENTER("sub_select");
+
   join_tab->table->null_row=0;
   if (end_of_records)
-    return (*join_tab->next_select)(join,join_tab+1,end_of_records);
-
+  {
+    enum_nested_loop_state nls=
+      (*join_tab->next_select)(join,join_tab+1,end_of_records);
+    DBUG_RETURN(nls);
+  }
   int error;
   enum_nested_loop_state rc;
   READ_RECORD *info= &join_tab->read_record;
 
+  if (join_tab->flush_weedout_table)
+  {
+    do_sj_reset(join_tab->flush_weedout_table);
+  }
+
   join->return_tab= join_tab;
 
   if (join_tab->last_inner)
@@ -11642,13 +13357,53 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
     join_tab->last_inner->first_unmatched= join_tab;
   }
   join->thd->warning_info->reset_current_row_for_warning();
+  
+  if (join_tab->loosescan_match_tab)
+    join_tab->loosescan_match_tab->found_match= FALSE;
 
   error= (*join_tab->read_first_record)(join_tab);
-  rc= evaluate_join_record(join, join_tab, error);
 
-  while (rc == NESTED_LOOP_OK)
+  if (join_tab->keep_current_rowid)
+    join_tab->table->file->position(join_tab->table->record[0]);
+
+  rc= evaluate_join_record(join, join_tab, error);
+  
+  /* 
+    Note: psergey has added the 2nd part of the following condition; the 
+    change should probably be made in 5.1, too.
+  */
+  bool skip_over= FALSE;
+  while (rc == NESTED_LOOP_OK && join->return_tab >= join_tab)
   {
+    if (join_tab->loosescan_match_tab && 
+        join_tab->loosescan_match_tab->found_match)
+    {
+      KEY *key= join_tab->table->key_info + join_tab->index;
+      key_copy(join_tab->loosescan_buf, info->record, key, 
+               join_tab->loosescan_key_len);
+      skip_over= TRUE;
+    }
+
     error= info->read_record(info);
+
+    if (skip_over && !error) 
+    {
+      if(!key_cmp(join_tab->table->key_info[join_tab->index].key_part,
+                  join_tab->loosescan_buf, join_tab->loosescan_key_len))
+      {
+        /* 
+          This is the LooseScan action: skip over records with the same key
+          value if we already had a match for them.
+        */
+        continue;
+      }
+      join_tab->loosescan_match_tab->found_match= FALSE;
+      skip_over= FALSE;
+    }
+
+    if (join_tab->keep_current_rowid)
+      join_tab->table->file->position(join_tab->table->record[0]);
+    
     rc= evaluate_join_record(join, join_tab, error);
   }
 
@@ -11658,16 +13413,23 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
 
   if (rc == NESTED_LOOP_NO_MORE_ROWS)
     rc= NESTED_LOOP_OK;
-  return rc;
+  DBUG_RETURN(rc);
 }
 
 
 /**
-  Process one record of the nested loop join.
-
-    This function will evaluate parts of WHERE/ON clauses that are
-    applicable to the partial record on hand and in case of success
-    submit this record to the next level of the nested loop.
+  @brief Process one row of the nested loop join.
+
+  This function will evaluate parts of WHERE/ON clauses that are
+  applicable to the partial row on hand and in case of success
+  submit this row to the next level of the nested loop.
+
+  @param  join     - The join object
+  @param  join_tab - The most inner join_tab being processed
+  @param  error > 0: Error, terminate processing
+                = 0: (Partial) row is available
+                < 0: No more rows available at this level
+  @return Nested loop state (Ok, No_more_rows, Error, Killed)
 */
 
 static enum_nested_loop_state
@@ -11679,16 +13441,21 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
   COND *select_cond= join_tab->select_cond;
   bool select_cond_result= TRUE;
 
+  DBUG_ENTER("evaluate_join_record");
+  DBUG_PRINT("enter",
+             ("evaluate_join_record join: %p join_tab: %p"
+              " cond: %p error: %d", join, join_tab, select_cond, error));
   if (error > 0 || (join->thd->is_error()))     // Fatal error
-    return NESTED_LOOP_ERROR;
+    DBUG_RETURN(NESTED_LOOP_ERROR);
   if (error < 0)
-    return NESTED_LOOP_NO_MORE_ROWS;
+    DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
   if (join->thd->killed)			// Aborted by user
   {
     join->thd->send_kill_message();
-    return NESTED_LOOP_KILLED;               /* purecov: inspected */
+    DBUG_RETURN(NESTED_LOOP_KILLED);            /* purecov: inspected */
   }
-  DBUG_PRINT("info", ("select cond 0x%lx", (ulong)select_cond));
+
+  update_virtual_fields(join->thd, join_tab->table);
 
   if (select_cond)
   {
@@ -11696,7 +13463,7 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
 
     /* check for errors evaluating the condition */
     if (join->thd->is_error())
-      return NESTED_LOOP_ERROR;
+      DBUG_RETURN(NESTED_LOOP_ERROR);
   }
 
   if (!select_cond || select_cond_result)
@@ -11706,6 +13473,7 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
       condition is true => a match is found.
     */
     bool found= 1;
+    bool use_not_exists_opt= 0;
     while (join_tab->first_unmatched && found)
     {
       /*
@@ -11722,7 +13490,7 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
       for (JOIN_TAB *tab= first_unmatched; tab <= join_tab; tab++)
       {
         if (tab->table->reginfo.not_exists_optimize)
-          return NESTED_LOOP_NO_MORE_ROWS;
+          use_not_exists_opt= 1;
         /* Check all predicates that has just been activated. */
         /*
           Actually all predicates non-guarded by first_unmatched->found
@@ -11741,7 +13509,7 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
               not to the last table of the current nest level.
             */
             join->return_tab= tab;
-            return NESTED_LOOP_OK;
+            DBUG_RETURN(NESTED_LOOP_OK);
           }
         }
       }
@@ -11755,6 +13523,28 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
       join_tab->first_unmatched= first_unmatched;
     }
 
+    if (use_not_exists_opt)
+      DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
+    JOIN_TAB *return_tab= join->return_tab;
+    join_tab->found_match= TRUE;
+
+    if (join_tab->check_weed_out_table && found)
+    {
+      int res= do_sj_dups_weedout(join->thd, join_tab->check_weed_out_table);
+      if (res == -1)
+        DBUG_RETURN(NESTED_LOOP_ERROR);
+      else if (res == 1)
+        found= FALSE;
+    }
+    else if (join_tab->do_firstmatch)
+    {
+      /* 
+        We should return to the join_tab->do_firstmatch after we have 
+        enumerated all the suffixes for current prefix row combination
+      */
+      return_tab= join_tab->do_firstmatch;
+    }
+
     /*
       It was not just a return to lower loop level when one
       of the newly activated predicates is evaluated as false
@@ -11771,16 +13561,19 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
       rc= (*join_tab->next_select)(join, join_tab+1, 0);
       join->thd->warning_info->inc_current_row_for_warning();
       if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
-        return rc;
+        DBUG_RETURN(rc);
+      if (return_tab < join->return_tab)
+        join->return_tab= return_tab;
+
       if (join->return_tab < join_tab)
-        return NESTED_LOOP_OK;
+        DBUG_RETURN(NESTED_LOOP_OK);
       /*
         Test if this was a SELECT DISTINCT query on a table that
         was not in the field list;  In this case we can abort if
         we found a row, as no new rows can be added to the result.
       */
       if (not_used_in_distinct && found_records != join->found_records)
-        return NESTED_LOOP_NO_MORE_ROWS;
+        DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
     }
     else
     {
@@ -11798,10 +13591,9 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
     join->thd->warning_info->inc_current_row_for_warning();
     join_tab->read_record.unlock_row(join_tab);
   }
-  return NESTED_LOOP_OK;
+  DBUG_RETURN(NESTED_LOOP_OK);
 }
 
-
 /**
 
   @details
@@ -11868,97 +13660,27 @@ evaluate_null_complemented_join_record(JOIN *join, JOIN_TAB *join_tab)
   return (*join_tab->next_select)(join, join_tab+1, 0);
 }
 
+#ifdef MERGE_JUNK
+//psergey3-merge: remove:
+  SQL_SELECT *select;
+  select= join_tab->select;
 
-static enum_nested_loop_state
-flush_cached_records(JOIN *join,JOIN_TAB *join_tab,bool skip_last)
-{
-  enum_nested_loop_state rc= NESTED_LOOP_OK;
-  int error;
-  READ_RECORD *info;
-
-  join_tab->table->null_row= 0;
-  if (!join_tab->cache.records)
-    return NESTED_LOOP_OK;                      /* Nothing to do */
-  if (skip_last)
-    (void) store_record_in_cache(&join_tab->cache); // Must save this for later
-  if (join_tab->use_quick == 2)
-  {
-    if (join_tab->select->quick)
-    {					/* Used quick select last. reset it */
-      delete join_tab->select->quick;
-      join_tab->select->quick=0;
-    }
-  }
- /* read through all records */
-  if ((error=join_init_read_record(join_tab)))
-  {
-    reset_cache_write(&join_tab->cache);
-    return error < 0 ? NESTED_LOOP_NO_MORE_ROWS: NESTED_LOOP_ERROR;
-  }
-
-  for (JOIN_TAB *tmp=join->join_tab; tmp != join_tab ; tmp++)
-  {
-    tmp->status=tmp->table->status;
-    tmp->table->status=0;
-  }
-
-  info= &join_tab->read_record;
-  do
-  {
-    if (join->thd->killed)
-    {
-      join->thd->send_kill_message();
-      return NESTED_LOOP_KILLED; // Aborted by user /* purecov: inspected */
-    }
-    SQL_SELECT *select=join_tab->select;
-    if (rc == NESTED_LOOP_OK)
-    {
-      bool skip_record= FALSE;
-      if (join_tab->cache.select &&
-          join_tab->cache.select->skip_record(join->thd, &skip_record))
+    int err= 0;
+         (err= join_tab->cache.select->skip_record(join->thd)) != 0 ))
       {
         reset_cache_write(&join_tab->cache);
         return NESTED_LOOP_ERROR;
       }
 
-      if (!skip_record)
-      {
-        uint i;
-        reset_cache_read(&join_tab->cache);
-        for (i=(join_tab->cache.records- (skip_last ? 1 : 0)) ; i-- > 0 ;)
-        {
-          read_cached_record(join_tab);
-          skip_record= FALSE;
-          if (select && select->skip_record(join->thd, &skip_record))
+	if (!select || (err= select->skip_record(join->thd)) != 0)
+          if (err < 0)
           {
             reset_cache_write(&join_tab->cache);
             return NESTED_LOOP_ERROR;
           }
-          if (!skip_record)
-          {
-            rc= (join_tab->next_select)(join,join_tab+1,0);
-            if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
-            {
-              reset_cache_write(&join_tab->cache);
-              return rc;
-            }
-          }
-        }
-      }
-    }
-  } while (!(error=info->read_record(info)));
-
-  if (skip_last)
-    read_cached_record(join_tab);		// Restore current record
-  reset_cache_write(&join_tab->cache);
-  if (error > 0)				// Fatal error
-    return NESTED_LOOP_ERROR;                   /* purecov: inspected */
-  for (JOIN_TAB *tmp2=join->join_tab; tmp2 != join_tab ; tmp2++)
-    tmp2->table->status=tmp2->status;
-  return NESTED_LOOP_OK;
-}
-
-
+      
+    rc= NESTED_LOOP_OK;
+#endif
 /*****************************************************************************
   The different ways to read a record
   Returns -1 if row was not found, 0 if row was found and 1 on errors
@@ -11989,10 +13711,10 @@ int safe_index_read(JOIN_TAB *tab)
 {
   int error;
   TABLE *table= tab->table;
-  if ((error=table->file->index_read_map(table->record[0],
-                                         tab->ref.key_buff,
-                                         make_prev_keypart_map(tab->ref.key_parts),
-                                         HA_READ_KEY_EXACT)))
+  if ((error= table->file->ha_index_read_map(table->record[0],
+                                             tab->ref.key_buff,
+                                             make_prev_keypart_map(tab->ref.key_parts),
+                                             HA_READ_KEY_EXACT)))
     return report_error(table, error);
   return 0;
 }
@@ -12019,6 +13741,11 @@ join_read_const_table(JOIN_TAB *tab, POSITION *pos)
       if (!table->maybe_null || error > 0)
 	DBUG_RETURN(error);
     }
+    /*
+      The optimizer trust the engine that when stats.records is 0, there
+      was no found rows
+    */
+    DBUG_ASSERT(table->file->stats.records > 0 || error);
   }
   else
   {
@@ -12026,11 +13753,11 @@ join_read_const_table(JOIN_TAB *tab, POSITION *pos)
 	!table->no_keyread &&
         (int) table->reginfo.lock_type <= (int) TL_READ_HIGH_PRIORITY)
     {
-      table->set_keyread(TRUE);
+      table->enable_keyread();
       tab->index= tab->ref.key;
     }
     error=join_read_const(tab);
-    table->set_keyread(FALSE);
+    table->disable_keyread();
     if (error)
     {
       tab->info="unique row not found";
@@ -12043,6 +13770,17 @@ join_read_const_table(JOIN_TAB *tab, POSITION *pos)
   }
   if (*tab->on_expr_ref && !table->null_row)
   {
+#if !defined(DBUG_OFF) && defined(NOT_USING_ITEM_EQUAL)
+    /*
+      This test could be very useful to find bugs in the optimizer
+      where we would call this function with an expression that can't be
+      evaluated yet. We can't have this enabled by default as long as
+      have items like Item_equal, that doesn't report they are const but
+      they can still be called even if they contain not const items.
+    */
+    (*tab->on_expr_ref)->update_used_tables();
+    DBUG_ASSERT((*tab->on_expr_ref)->const_item());
+#endif
     if ((table->null_row= test((*tab->on_expr_ref)->val_int() == 0)))
       mark_as_null_row(table);  
   }
@@ -12080,8 +13818,8 @@ join_read_system(JOIN_TAB *tab)
   int error;
   if (table->status & STATUS_GARBAGE)		// If first read
   {
-    if ((error=table->file->read_first_row(table->record[0],
-					   table->s->primary_key)))
+    if ((error= table->file->ha_read_first_row(table->record[0],
+                                               table->s->primary_key)))
     {
       if (error != HA_ERR_END_OF_FILE)
 	return report_error(table, error);
@@ -12089,6 +13827,7 @@ join_read_system(JOIN_TAB *tab)
       empty_record(table);			// Make empty record
       return -1;
     }
+    update_virtual_fields(tab->join->thd, table);
     store_record(table,record[1]);
   }
   else if (!table->status)			// Only happens with left join
@@ -12123,10 +13862,10 @@ join_read_const(JOIN_TAB *tab)
       error=HA_ERR_KEY_NOT_FOUND;
     else
     {
-      error=table->file->index_read_idx_map(table->record[0],tab->ref.key,
-                                            (uchar*) tab->ref.key_buff,
-                                            make_prev_keypart_map(tab->ref.key_parts),
-                                            HA_READ_KEY_EXACT);
+      error= table->file->ha_index_read_idx_map(table->record[0],tab->ref.key,
+                                                (uchar*) tab->ref.key_buff,
+                                                make_prev_keypart_map(tab->ref.key_parts),
+                                                HA_READ_KEY_EXACT);
     }
     if (error)
     {
@@ -12137,6 +13876,7 @@ join_read_const(JOIN_TAB *tab)
 	return report_error(table, error);
       return -1;
     }
+    update_virtual_fields(tab->join->thd, table);
     store_record(table,record[1]);
   }
   else if (!(table->status & ~STATUS_NULL_ROW))	// Only happens with left join
@@ -12148,21 +13888,49 @@ join_read_const(JOIN_TAB *tab)
   return table->status ? -1 : 0;
 }
 
+/*
+  eq_ref access method implementation: "read_first" function
+
+  SYNOPSIS
+    join_read_key()
+      tab  JOIN_TAB of the accessed table
+
+  DESCRIPTION
+    This is "read_fist" function for the eq_ref access method. The difference
+    from ref access function is that is that it has a one-element lookup 
+    cache (see cmp_buffer_with_ref)
+
+  RETURN
+    0  - Ok
+   -1  - Row not found 
+    1  - Error
+*/
+
 
 static int
 join_read_key(JOIN_TAB *tab)
 {
-  int error;
-  TABLE *table= tab->table;
+  return join_read_key2(tab->join->thd, tab, tab->table, &tab->ref);
+}
 
+
+/*
+  eq_ref access handler but generalized a bit to support TABLE and TABLE_REF
+  not from the join_tab. See join_read_key for detailed synopsis.
+*/
+int join_read_key2(THD *thd, JOIN_TAB *tab, TABLE *table, TABLE_REF *table_ref)
+{
+  int error;
   if (!table->file->inited)
   {
-    table->file->ha_index_init(tab->ref.key, tab->sorted);
+    table->file->ha_index_init(table_ref->key, (tab ? tab->sorted : TRUE));
   }
-  if (cmp_buffer_with_ref(tab) ||
+
+  /* TODO: Why don't we do "Late NULLs Filtering" here? */
+  if (cmp_buffer_with_ref(thd, table, table_ref) ||
       (table->status & (STATUS_GARBAGE | STATUS_NO_PARENT | STATUS_NULL_ROW)))
   {
-    if (tab->ref.key_err)
+    if (table_ref->key_err)
     {
       table->status=STATUS_NOT_FOUND;
       return -1;
@@ -12171,28 +13939,28 @@ join_read_key(JOIN_TAB *tab)
       Moving away from the current record. Unlock the row
       in the handler if it did not match the partial WHERE.
     */
-    if (tab->ref.has_record && tab->ref.use_count == 0)
+    if (tab && tab->ref.has_record && tab->ref.use_count == 0)
     {
       tab->read_record.file->unlock_row();
-      tab->ref.has_record= FALSE;
+      table_ref->has_record= FALSE;
     }
-    error=table->file->index_read_map(table->record[0],
-                                      tab->ref.key_buff,
-                                      make_prev_keypart_map(tab->ref.key_parts),
-                                      HA_READ_KEY_EXACT);
+    error=table->file->ha_index_read_map(table->record[0],
+                                  table_ref->key_buff,
+                                  make_prev_keypart_map(table_ref->key_parts),
+                                  HA_READ_KEY_EXACT);
     if (error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
       return report_error(table, error);
 
     if (! error)
     {
-      tab->ref.has_record= TRUE;
-      tab->ref.use_count= 1;
+      table_ref->has_record= TRUE;
+      table_ref->use_count= 1;
     }
   }
   else if (table->status == 0)
   {
-    DBUG_ASSERT(tab->ref.has_record);
-    tab->ref.use_count++;
+    DBUG_ASSERT(table_ref->has_record);
+    table_ref->use_count++;
   }
   table->null_row=0;
   return table->status ? -1 : 0;
@@ -12243,8 +14011,14 @@ join_read_always_key(JOIN_TAB *tab)
 
   /* Initialize the index first */
   if (!table->file->inited)
-    table->file->ha_index_init(tab->ref.key, tab->sorted);
- 
+  {
+    if ((error= table->file->ha_index_init(tab->ref.key, tab->sorted)))
+    {
+      table->file->print_error(error, MYF(0));/* purecov: inspected */
+      return(1);                              /* purecov: inspected */
+    }
+  }
+
   /* Perform "Late NULLs Filtering" (see internals manual for explanations) */
   for (uint i= 0 ; i < tab->ref.key_parts ; i++)
   {
@@ -12254,10 +14028,10 @@ join_read_always_key(JOIN_TAB *tab)
 
   if (cp_buffer_from_ref(tab->join->thd, table, &tab->ref))
     return -1;
-  if ((error=table->file->index_read_map(table->record[0],
-                                         tab->ref.key_buff,
-                                         make_prev_keypart_map(tab->ref.key_parts),
-                                         HA_READ_KEY_EXACT)))
+  if ((error= table->file->ha_index_read_map(table->record[0],
+                                             tab->ref.key_buff,
+                                             make_prev_keypart_map(tab->ref.key_parts),
+                                             HA_READ_KEY_EXACT)))
   {
     if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
       return report_error(table, error);
@@ -12279,12 +14053,19 @@ join_read_last_key(JOIN_TAB *tab)
   TABLE *table= tab->table;
 
   if (!table->file->inited)
-    table->file->ha_index_init(tab->ref.key, tab->sorted);
+  {
+    if ((error= table->file->ha_index_init(tab->ref.key, tab->sorted)))
+    {
+      table->file->print_error(error, MYF(0));/* purecov: inspected */
+      return(1);                              /* purecov: inspected */
+    }
+  }
   if (cp_buffer_from_ref(tab->join->thd, table, &tab->ref))
     return -1;
-  if ((error=table->file->index_read_last_map(table->record[0],
-                                              tab->ref.key_buff,
-                                              make_prev_keypart_map(tab->ref.key_parts))))
+  if ((error= table->file->ha_index_read_map(table->record[0],
+                                            tab->ref.key_buff,
+                                     make_prev_keypart_map(tab->ref.key_parts),
+                                            HA_READ_PREFIX_LAST)))
   {
     if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
       return report_error(table, error);
@@ -12309,9 +14090,9 @@ join_read_next_same(READ_RECORD *info)
   TABLE *table= info->table;
   JOIN_TAB *tab=table->reginfo.join_tab;
 
-  if ((error=table->file->index_next_same(table->record[0],
-					  tab->ref.key_buff,
-					  tab->ref.key_length)))
+  if ((error= table->file->ha_index_next_same(table->record[0],
+                                              tab->ref.key_buff,
+                                              tab->ref.key_length)))
   {
     if (error != HA_ERR_END_OF_FILE)
       return report_error(table, error);
@@ -12329,7 +14110,7 @@ join_read_prev_same(READ_RECORD *info)
   TABLE *table= info->table;
   JOIN_TAB *tab=table->reginfo.join_tab;
 
-  if ((error=table->file->index_prev(table->record[0])))
+  if ((error= table->file->ha_index_prev(table->record[0])))
     return report_error(table, error);
   if (key_cmp_if_same(table, tab->ref.key_buff, tab->ref.key,
                       tab->ref.key_length))
@@ -12352,7 +14133,7 @@ join_init_quick_read_record(JOIN_TAB *tab)
 
 int read_first_record_seq(JOIN_TAB *tab)
 {
-  if (tab->read_record.file->ha_rnd_init(1))
+  if (tab->read_record.file->ha_rnd_init_with_error(1))
     return 1;
   return (*tab->read_record.read_record)(&tab->read_record);
 }
@@ -12363,17 +14144,30 @@ test_if_quick_select(JOIN_TAB *tab)
   delete tab->select->quick;
   tab->select->quick=0;
   return tab->select->test_quick_select(tab->join->thd, tab->keys,
-					(table_map) 0, HA_POS_ERROR, 0);
+					(table_map) 0, HA_POS_ERROR, 0,
+                                        FALSE);
 }
 
 
-static int
-join_init_read_record(JOIN_TAB *tab)
+static 
+bool test_if_use_dynamic_range_scan(JOIN_TAB *join_tab)
+{
+    return (join_tab->use_quick == 2 && test_if_quick_select(join_tab) > 0);
+}
+
+int join_init_read_record(JOIN_TAB *tab)
 {
   if (tab->select && tab->select->quick && tab->select->quick->reset())
     return 1;
-  init_read_record(&tab->read_record, tab->join->thd, tab->table,
-		   tab->select,1,1, FALSE);
+  if (init_read_record(&tab->read_record, tab->join->thd, tab->table,
+                       tab->select,1,1, FALSE))
+    return 1;
+  return (*tab->read_record.read_record)(&tab->read_record);
+}
+
+static int
+join_read_record_no_init(JOIN_TAB *tab)
+{
   return (*tab->read_record.read_record)(&tab->read_record);
 }
 
@@ -12381,10 +14175,11 @@ join_init_read_record(JOIN_TAB *tab)
 static int
 join_read_first(JOIN_TAB *tab)
 {
-  int error;
+  int error= 0;
   TABLE *table=tab->table;
-  if (table->covering_keys.is_set(tab->index) && !table->no_keyread)
-    table->set_keyread(TRUE);
+  if (table->covering_keys.is_set(tab->index) && !table->no_keyread &&
+      !table->key_read)
+    table->enable_keyread();
   tab->table->status=0;
   tab->read_record.read_record=join_read_next;
   tab->read_record.table=table;
@@ -12392,8 +14187,10 @@ join_read_first(JOIN_TAB *tab)
   tab->read_record.index=tab->index;
   tab->read_record.record=table->record[0];
   if (!table->file->inited)
-    table->file->ha_index_init(tab->index, tab->sorted);
-  if ((error=tab->table->file->index_first(tab->table->record[0])))
+    error= table->file->ha_index_init(tab->index, tab->sorted);
+  if (!error)
+    error= table->file->prepare_index_scan();
+  if (error || (error=tab->table->file->ha_index_first(tab->table->record[0])))
   {
     if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
       report_error(table, error);
@@ -12407,8 +14204,9 @@ static int
 join_read_next(READ_RECORD *info)
 {
   int error;
-  if ((error=info->file->index_next(info->record)))
+  if ((error= info->file->ha_index_next(info->record)))
     return report_error(info->table, error);
+
   return 0;
 }
 
@@ -12417,9 +14215,10 @@ static int
 join_read_last(JOIN_TAB *tab)
 {
   TABLE *table=tab->table;
-  int error;
-  if (table->covering_keys.is_set(tab->index) && !table->no_keyread)
-    table->set_keyread(TRUE);
+  int error= 0;
+  if (table->covering_keys.is_set(tab->index) && !table->no_keyread &&
+      !table->key_read)
+    table->enable_keyread();
   tab->table->status=0;
   tab->read_record.read_record=join_read_prev;
   tab->read_record.table=table;
@@ -12427,9 +14226,12 @@ join_read_last(JOIN_TAB *tab)
   tab->read_record.index=tab->index;
   tab->read_record.record=table->record[0];
   if (!table->file->inited)
-    table->file->ha_index_init(tab->index, 1);
-  if ((error= tab->table->file->index_last(tab->table->record[0])))
+    error= table->file->ha_index_init(tab->index, 1);
+  if (!error)
+    error= table->file->prepare_index_scan();
+  if (error || (error= tab->table->file->ha_index_last(tab->table->record[0])))
     return report_error(table, error);
+
   return 0;
 }
 
@@ -12438,7 +14240,7 @@ static int
 join_read_prev(READ_RECORD *info)
 {
   int error;
-  if ((error= info->file->index_prev(info->record)))
+  if ((error= info->file->ha_index_prev(info->record)))
     return report_error(info->table, error);
   return 0;
 }
@@ -12450,11 +14252,15 @@ join_ft_read_first(JOIN_TAB *tab)
   int error;
   TABLE *table= tab->table;
 
-  if (!table->file->inited)
-    table->file->ha_index_init(tab->ref.key, 1);
+  if (!table->file->inited &&
+      (error= table->file->ha_index_init(tab->ref.key, 1)))
+  {
+    table->file->print_error(error, MYF(0));  /* purecov: inspected */
+    return(1);                                /* purecov: inspected */
+  }
   table->file->ft_init();
 
-  if ((error= table->file->ft_read(table->record[0])))
+  if ((error= table->file->ha_ft_read(table->record[0])))
     return report_error(table, error);
   return 0;
 }
@@ -12463,7 +14269,7 @@ static int
 join_ft_read_next(READ_RECORD *info)
 {
   int error;
-  if ((error= info->file->ft_read(info->table->record[0])))
+  if ((error= info->file->ha_ft_read(info->table->record[0])))
     return report_error(info->table, error);
   return 0;
 }
@@ -12611,7 +14417,7 @@ end_send(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 
 
 	/* ARGSUSED */
-static enum_nested_loop_state
+enum_nested_loop_state
 end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 	       bool end_of_records)
 {
@@ -12649,8 +14455,11 @@ end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 	  {
             List_iterator_fast<Item> it(*join->fields);
             Item *item;
+            DBUG_PRINT("info", ("no matching rows"));
+
 	    /* No matching rows for group function */
 	    join->clear();
+            join->no_rows_in_result_called= 1;
 
             while ((item= it++))
               item->no_rows_in_result();
@@ -12747,12 +14556,14 @@ end_write(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
     {
       int error;
       join->found_records++;
-      if ((error=table->file->ha_write_row(table->record[0])))
+      if ((error= table->file->ha_write_row(table->record[0])))
       {
         if (!table->file->is_fatal_error(error, HA_CHECK_DUP))
 	  goto end;
-	if (create_myisam_from_heap(join->thd, table, &join->tmp_table_param,
-				    error,1))
+	if (create_internal_tmp_table_from_heap(join->thd, table, 
+                                                join->tmp_table_param.start_recinfo,
+                                                &join->tmp_table_param.recinfo,
+                                                error,1))
 	  DBUG_RETURN(NESTED_LOOP_ERROR);        // Not a table_is_full error
 	table->s->uniques=0;			// To ensure rows are the same
       }
@@ -12802,15 +14613,15 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
     if (item->maybe_null)
       group->buff[-1]= (char) group->field->is_null();
   }
-  if (!table->file->index_read_map(table->record[1],
-                                   join->tmp_table_param.group_buff,
-                                   HA_WHOLE_KEY,
-                                   HA_READ_KEY_EXACT))
+  if (!table->file->ha_index_read_map(table->record[1],
+                                      join->tmp_table_param.group_buff,
+                                      HA_WHOLE_KEY,
+                                      HA_READ_KEY_EXACT))
   {						/* Update old record */
     restore_record(table,record[1]);
     update_tmptable_sum_func(join->sum_funcs,table);
-    if ((error=table->file->ha_update_row(table->record[1],
-                                          table->record[0])))
+    if ((error= table->file->ha_update_row(table->record[1],
+                                           table->record[0])))
     {
       table->file->print_error(error,MYF(0));	/* purecov: inspected */
       DBUG_RETURN(NESTED_LOOP_ERROR);            /* purecov: inspected */
@@ -12834,13 +14645,19 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
   init_tmptable_sum_functions(join->sum_funcs);
   if (copy_funcs(join->tmp_table_param.items_to_copy, join->thd))
     DBUG_RETURN(NESTED_LOOP_ERROR);           /* purecov: inspected */
-  if ((error=table->file->ha_write_row(table->record[0])))
+  if ((error= table->file->ha_write_row(table->record[0])))
   {
-    if (create_myisam_from_heap(join->thd, table, &join->tmp_table_param,
-				error, 0))
+    if (create_internal_tmp_table_from_heap(join->thd, table,
+                                            join->tmp_table_param.start_recinfo,
+                                            &join->tmp_table_param.recinfo,
+                                            error, 0))
       DBUG_RETURN(NESTED_LOOP_ERROR);            // Not a table_is_full error
     /* Change method to update rows */
-    table->file->ha_index_init(0, 0);
+    if ((error= table->file->ha_index_init(0, 0)))
+    {
+      table->file->print_error(error, MYF(0));/* purecov: inspected */
+      DBUG_RETURN(NESTED_LOOP_ERROR);         /* purecov: inspected */
+    }
     join->join_tab[join->tables-1].next_select=end_unique_update;
   }
   join->send_records++;
@@ -12871,7 +14688,7 @@ end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
   if (copy_funcs(join->tmp_table_param.items_to_copy, join->thd))
     DBUG_RETURN(NESTED_LOOP_ERROR);           /* purecov: inspected */
 
-  if (!(error=table->file->ha_write_row(table->record[0])))
+  if (!(error= table->file->ha_write_row(table->record[0])))
     join->send_records++;			// New group
   else
   {
@@ -12880,15 +14697,15 @@ end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
       table->file->print_error(error,MYF(0));	/* purecov: inspected */
       DBUG_RETURN(NESTED_LOOP_ERROR);            /* purecov: inspected */
     }
-    if (table->file->rnd_pos(table->record[1],table->file->dup_ref))
+    if (table->file->ha_rnd_pos(table->record[1],table->file->dup_ref))
     {
       table->file->print_error(error,MYF(0));	/* purecov: inspected */
       DBUG_RETURN(NESTED_LOOP_ERROR);            /* purecov: inspected */
     }
     restore_record(table,record[1]);
     update_tmptable_sum_func(join->sum_funcs,table);
-    if ((error=table->file->ha_update_row(table->record[1],
-                                          table->record[0])))
+    if ((error= table->file->ha_update_row(table->record[1],
+                                           table->record[0])))
     {
       table->file->print_error(error,MYF(0));	/* purecov: inspected */
       DBUG_RETURN(NESTED_LOOP_ERROR);            /* purecov: inspected */
@@ -12899,7 +14716,7 @@ end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 
 
 	/* ARGSUSED */
-static enum_nested_loop_state
+enum_nested_loop_state
 end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 		bool end_of_records)
 {
@@ -12932,9 +14749,11 @@ end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 	if (!join->having || join->having->val_int())
 	{
           int error= table->file->ha_write_row(table->record[0]);
-          if (error && create_myisam_from_heap(join->thd, table,
-                                               &join->tmp_table_param,
-                                               error, 0))
+          if (error && 
+              create_internal_tmp_table_from_heap(join->thd, table,
+                                                  join->tmp_table_param.start_recinfo,
+                                                  &join->tmp_table_param.recinfo,
+                                                  error, 0))
 	    DBUG_RETURN(NESTED_LOOP_ERROR);
         }
         if (join->rollup.state != ROLLUP::STATE_NONE)
@@ -12986,12 +14805,24 @@ end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
     1 if right_item is used removable reference key on left_item
 */
 
-static bool test_if_ref(Item_field *left_item,Item *right_item)
+bool test_if_ref(Item *root_cond, Item_field *left_item,Item *right_item)
 {
   Field *field=left_item->field;
-  // No need to change const test. We also have to keep tests on LEFT JOIN
-  if (!field->table->const_table && !field->table->maybe_null)
+  JOIN_TAB *join_tab= field->table->reginfo.join_tab;
+  // No need to change const test
+  if (!field->table->const_table && join_tab &&
+      (!join_tab->first_inner ||
+       *join_tab->first_inner->on_expr_ref == root_cond))
   {
+    // Cond guards
+    for (uint i = 0; i < join_tab->ref.key_parts; i++)
+    {
+      if (join_tab->ref.cond_guards[i])
+      {
+        return FALSE;
+      }
+    }
+    //
     Item *ref_item=part_of_refkey(field->table,field);
     if (ref_item && ref_item->eq(right_item,1))
     {
@@ -13022,10 +14853,175 @@ static bool test_if_ref(Item_field *left_item,Item *right_item)
 }
 
 
+
+/*
+  Extract a condition that can be checked after reading given table
+  
+  SYNOPSIS
+    make_cond_for_table()
+      cond         Condition to analyze
+      tables       Tables for which "current field values" are available
+      used_table   Table that we're extracting the condition for (may 
+                   also include PSEUDO_TABLE_BITS
+      exclude_expensive_cond  Do not push expensive conditions
+
+  DESCRIPTION
+    Extract the condition that can be checked after reading the table
+    specified in 'used_table', given that current-field values for tables
+    specified in 'tables' bitmap are available.
+
+    The function assumes that
+      - Constant parts of the condition has already been checked.
+      - Condition that could be checked for tables in 'tables' has already 
+        been checked.
+        
+    The function takes into account that some parts of the condition are
+    guaranteed to be true by employed 'ref' access methods (the code that
+    does this is located at the end, search down for "EQ_FUNC").
+
+
+  SEE ALSO 
+    make_cond_for_info_schema uses similar algorithm
+
+  RETURN
+    Extracted condition
+*/
+
+static Item *
+make_cond_for_table(Item *cond, table_map tables, table_map used_table,
+                    bool exclude_expensive_cond)
+{
+  return make_cond_for_table_from_pred(cond, cond, tables, used_table,
+                                       exclude_expensive_cond);
+}
+               
+static Item *
+make_cond_for_table_from_pred(Item *root_cond, Item *cond,
+                              table_map tables, table_map used_table,
+                              bool exclude_expensive_cond)
+
+{
+  if (used_table && !(cond->used_tables() & used_table) &&
+      /*
+        Exclude constant conditions not checked at optimization time if
+        the table we are pushing conditions to is the first one.
+        As a result, such conditions are not considered as already checked
+        and will be checked at execution time, attached to the first table.
+
+        psergey: TODO: "used_table & 1" doesn't make sense in nearly any
+        context. Look at setup_table_map(), table bits reflect the order 
+        the tables were encountered by the parser. Check what we should
+        replace this condition with.
+      */
+      !((used_table & 1) && cond->is_expensive()))
+    return (COND*) 0;				// Already checked
+  if (cond->type() == Item::COND_ITEM)
+  {
+    if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
+    {
+      /* Create new top level AND item */
+      Item_cond_and *new_cond=new Item_cond_and;
+      if (!new_cond)
+	return (COND*) 0;			// OOM /* purecov: inspected */
+      List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
+      Item *item;
+      while ((item=li++))
+      {
+	Item *fix=make_cond_for_table_from_pred(root_cond, item, 
+                                                tables, used_table,
+                                                exclude_expensive_cond);
+	if (fix)
+	  new_cond->argument_list()->push_back(fix);
+      }
+      switch (new_cond->argument_list()->elements) {
+      case 0:
+	return (COND*) 0;			// Always true
+      case 1:
+	return new_cond->argument_list()->head();
+      default:
+	/*
+	  Item_cond_and do not need fix_fields for execution, its parameters
+	  are fixed or do not need fix_fields, too
+	*/
+	new_cond->quick_fix_field();
+	new_cond->used_tables_cache=
+	  ((Item_cond_and*) cond)->used_tables_cache &
+	  tables;
+	return new_cond;
+      }
+    }
+    else
+    {						// Or list
+      Item_cond_or *new_cond=new Item_cond_or;
+      if (!new_cond)
+	return (COND*) 0;			// OOM /* purecov: inspected */
+      List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
+      Item *item;
+      while ((item=li++))
+      {
+	Item *fix=make_cond_for_table_from_pred(root_cond, item,
+                                                tables, 0L,
+                                                exclude_expensive_cond);
+	if (!fix)
+	  return (COND*) 0;			// Always true
+	new_cond->argument_list()->push_back(fix);
+      }
+      /*
+	Item_cond_and do not need fix_fields for execution, its parameters
+	are fixed or do not need fix_fields, too
+      */
+      new_cond->quick_fix_field();
+      new_cond->used_tables_cache= ((Item_cond_or*) cond)->used_tables_cache;
+      new_cond->top_level_item();
+      return new_cond;
+    }
+  }
+
+  /*
+    Because the following test takes a while and it can be done
+    table_count times, we mark each item that we have examined with the result
+    of the test
+  */
+  if (cond->marker == 3 || (cond->used_tables() & ~tables) ||
+      /*
+        When extracting constant conditions, treat expensive conditions as
+        non-constant, so that they are not evaluated at optimization time.
+      */
+      (!used_table && exclude_expensive_cond && cond->is_expensive()))
+    return (COND*) 0;				// Can't check this yet
+  if (cond->marker == 2 || cond->eq_cmp_result() == Item::COND_OK)
+    return cond;				// Not boolean op
+
+  if (cond->type() == Item::FUNC_ITEM && 
+      ((Item_func*) cond)->functype() == Item_func::EQ_FUNC)
+  {
+    Item *left_item=	((Item_func*) cond)->arguments()[0]->real_item();
+    Item *right_item= ((Item_func*) cond)->arguments()[1]->real_item();
+    if (left_item->type() == Item::FIELD_ITEM &&
+	test_if_ref(root_cond, (Item_field*) left_item,right_item))
+    {
+      cond->marker=3;			// Checked when read
+      return (COND*) 0;
+    }
+    if (right_item->type() == Item::FIELD_ITEM &&
+	test_if_ref(root_cond, (Item_field*) right_item,left_item))
+    {
+      cond->marker=3;			// Checked when read
+      return (COND*) 0;
+    }
+  }
+  cond->marker=2;
+  return cond;
+}
+
+
+
 static COND *
-make_cond_for_table(COND *cond, table_map tables, table_map used_table)
+make_cond_after_sjm(Item *root_cond, Item *cond, table_map tables, 
+                    table_map sjm_tables)
 {
-  if (used_table && !(cond->used_tables() & used_table))
+  if ((!(cond->used_tables() & ~tables) || 
+       !(cond->used_tables() & ~sjm_tables)))
     return (COND*) 0;				// Already checked
   if (cond->type() == Item::COND_ITEM)
   {
@@ -13039,7 +15035,7 @@ make_cond_for_table(COND *cond, table_map tables, table_map used_table)
       Item *item;
       while ((item=li++))
       {
-	Item *fix=make_cond_for_table(item,tables,used_table);
+	Item *fix=make_cond_after_sjm(root_cond, item, tables, sjm_tables);
 	if (fix)
 	  new_cond->argument_list()->push_back(fix);
       }
@@ -13069,7 +15065,7 @@ make_cond_for_table(COND *cond, table_map tables, table_map used_table)
       Item *item;
       while ((item=li++))
       {
-	Item *fix=make_cond_for_table(item,tables,0L);
+	Item *fix= make_cond_after_sjm(root_cond, item, tables, 0L);
 	if (!fix)
 	  return (COND*) 0;			// Always true
 	new_cond->argument_list()->push_back(fix);
@@ -13091,23 +15087,27 @@ make_cond_for_table(COND *cond, table_map tables, table_map used_table)
     of the test
   */
 
-  if (cond->marker == 3 || (cond->used_tables() & ~tables))
+  if (cond->marker == 3 || (cond->used_tables() & ~(tables | sjm_tables)))
     return (COND*) 0;				// Can't check this yet
   if (cond->marker == 2 || cond->eq_cmp_result() == Item::COND_OK)
     return cond;				// Not boolean op
 
+  /* 
+    Remove equalities that are guaranteed to be true by use of 'ref' access
+    method
+  */
   if (((Item_func*) cond)->functype() == Item_func::EQ_FUNC)
   {
-    Item *left_item=	((Item_func*) cond)->arguments()[0];
-    Item *right_item= ((Item_func*) cond)->arguments()[1];
+    Item *left_item= ((Item_func*) cond)->arguments()[0]->real_item();
+    Item *right_item= ((Item_func*) cond)->arguments()[1]->real_item();
     if (left_item->type() == Item::FIELD_ITEM &&
-	test_if_ref((Item_field*) left_item,right_item))
+	test_if_ref(root_cond, (Item_field*) left_item,right_item))
     {
       cond->marker=3;			// Checked when read
       return (COND*) 0;
     }
     if (right_item->type() == Item::FIELD_ITEM &&
-	test_if_ref((Item_field*) right_item,left_item))
+	test_if_ref(root_cond, (Item_field*) right_item,left_item))
     {
       cond->marker=3;			// Checked when read
       return (COND*) 0;
@@ -13117,6 +15117,7 @@ make_cond_for_table(COND *cond, table_map tables, table_map used_table)
   return cond;
 }
 
+
 static Item *
 part_of_refkey(TABLE *table,Field *field)
 {
@@ -13266,23 +15267,6 @@ ok:
   @param table                 Table to scan
   @param usable_keys           Allowed keys
 
-  @note
-     As far as 
-     1) clustered primary key entry data set is a set of all record
-        fields (key fields and not key fields) and
-     2) secondary index entry data is a union of its key fields and
-        primary key fields (at least InnoDB and its derivatives don't
-        duplicate primary key fields there, even if the primary and
-        the secondary keys have a common subset of key fields),
-     then secondary index entry data is always a subset of primary key entry.
-     Unfortunately, key_info[nr].key_length doesn't show the length
-     of key/pointer pair but a sum of key field lengths only, thus
-     we can't estimate index IO volume comparing only this key_length
-     value of secondary keys and clustered PK.
-     So, try secondary keys first, and choose PK only if there are no
-     usable secondary covering keys or found best secondary key include
-     all table fields (i.e. same as PK):
-
   @return
     MAX_KEY     no suitable key found
     key index   otherwise
@@ -13290,41 +15274,23 @@ ok:
 
 uint find_shortest_key(TABLE *table, const key_map *usable_keys)
 {
+  double min_cost= DBL_MAX;
   uint best= MAX_KEY;
-  uint usable_clustered_pk= (table->file->primary_key_is_clustered() &&
-                             table->s->primary_key != MAX_KEY &&
-                             usable_keys->is_set(table->s->primary_key)) ?
-                            table->s->primary_key : MAX_KEY;
   if (!usable_keys->is_clear_all())
   {
-    uint min_length= (uint) ~0;
     for (uint nr=0; nr < table->s->keys ; nr++)
     {
-      if (nr == usable_clustered_pk)
-        continue;
       if (usable_keys->is_set(nr))
       {
-        if (table->key_info[nr].key_length < min_length)
+        double cost= table->file->keyread_time(nr, 1, table->file->records());
+        if (cost < min_cost)
         {
-          min_length=table->key_info[nr].key_length;
+          min_cost= cost;
           best=nr;
         }
       }
     }
   }
-  if (usable_clustered_pk != MAX_KEY)
-  {
-    /*
-     If the primary key is clustered and found shorter key covers all table
-     fields then primary key scan normally would be faster because amount of
-     data to scan is the same but PK is clustered.
-     It's safe to compare key parts with table fields since duplicate key
-     parts aren't allowed.
-     */
-    if (best == MAX_KEY ||
-        table->key_info[best].key_parts >= table->s->fields)
-      best= usable_clustered_pk;
-  }
   return best;
 }
 
@@ -13544,7 +15510,7 @@ find_field_in_item_list (Field *field, void *data)
 
 static bool
 test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
-			bool no_changes, key_map *map)
+			bool no_changes, const key_map *map)
 {
   int ref_key;
   uint ref_key_parts;
@@ -13554,6 +15520,7 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
   SQL_SELECT *select=tab->select;
   key_map usable_keys;
   QUICK_SELECT_I *save_quick= 0;
+  COND *orig_select_cond= 0;
   DBUG_ENTER("test_if_skip_sort_order");
   LINT_INIT(ref_key_parts);
 
@@ -13573,7 +15540,7 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
     }
     usable_keys.intersect(((Item_field*) item)->field->part_of_sortkey);
     if (usable_keys.is_clear_all())
-      DBUG_RETURN(0);					// No usable keys
+      goto use_filesort;					// No usable keys
   }
 
   ref_key= -1;
@@ -13583,7 +15550,7 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
     ref_key=	   tab->ref.key;
     ref_key_parts= tab->ref.key_parts;
     if (tab->type == JT_REF_OR_NULL || tab->type == JT_FT)
-      DBUG_RETURN(0);
+      goto use_filesort;
   }
   else if (select && select->quick)		// Range found by opt_range
   {
@@ -13598,7 +15565,7 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
     if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE || 
         quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || 
         quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT)
-      DBUG_RETURN(0);
+      goto use_filesort;
     ref_key=	   select->quick->index;
     ref_key_parts= select->quick->used_key_parts;
   }
@@ -13620,10 +15587,15 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
       */
       if (table->covering_keys.is_set(ref_key))
 	usable_keys.intersect(table->covering_keys);
+      if (tab->pre_idx_push_select_cond)
+        orig_select_cond= tab->set_cond(tab->pre_idx_push_select_cond);
+
       if ((new_ref_key= test_if_subkey(order, table, ref_key, ref_key_parts,
 				       &usable_keys)) < MAX_KEY)
       {
 	/* Found key that can be used to retrieve data in sorted order */
+        //psergey-mrr:if (tab->pre_idx_push_select_cond)
+        //  tab->select_cond= tab->select->cond= tab->pre_idx_push_select_cond;
 	if (tab->ref.key >= 0)
 	{
           /*
@@ -13637,9 +15609,10 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
           KEYUSE *keyuse= tab->keyuse;
           while (keyuse->key != new_ref_key && keyuse->table == tab->table)
             keyuse++;
+
           if (create_ref_for_key(tab->join, tab, keyuse, 
                                  tab->join->const_table_map))
-            DBUG_RETURN(0);
+            goto use_filesort;
 
           pick_table_access_method(tab);
 	}
@@ -13661,9 +15634,10 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
                                         (tab->join->select_options &
                                          OPTION_FOUND_ROWS) ?
                                         HA_POS_ERROR :
-                                        tab->join->unit->select_limit_cnt,0) <=
+                                        tab->join->unit->select_limit_cnt,0,
+                                        TRUE) <=
               0)
-            DBUG_RETURN(0);
+            goto use_filesort;
 	}
         ref_key= new_ref_key;
       }
@@ -13675,9 +15649,9 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
       goto check_reverse_order;
   }
   {
-    uint best_key_parts= 0;
+    uint UNINIT_VAR(best_key_parts);
     uint saved_best_key_parts= 0;
-    int best_key_direction= 0;
+    int UNINIT_VAR(best_key_direction);
     int best_key= -1;
     JOIN *join= tab->join;
     ha_rows table_records= table->file->stats.records;
@@ -13713,7 +15687,7 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
                                     join->select_options & OPTION_FOUND_ROWS ?
                                     HA_POS_ERROR :
                                     join->unit->select_limit_cnt,
-                                    0) > 0;
+                                    TRUE, FALSE) > 0;
       }
       if (!no_changes)
       {
@@ -13722,7 +15696,7 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
            and best_key doesn't, then revert the decision.
         */
         if (!table->covering_keys.is_set(best_key))
-          table->set_keyread(FALSE);
+          table->disable_keyread();
         if (!quick_created)
 	{
           tab->index= best_key;
@@ -13734,8 +15708,20 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
             delete select->quick;
             select->quick= 0;
           }
-          if (table->covering_keys.is_set(best_key))
-            table->set_keyread(TRUE);
+          if (table->covering_keys.is_set(best_key) && ! table->key_read)
+            table->enable_keyread();
+          if (tab->pre_idx_push_select_cond)
+          {
+            COND *tmp_cond= tab->pre_idx_push_select_cond;
+            if (orig_select_cond)
+            {
+              tmp_cond= and_conds(tmp_cond, orig_select_cond);
+              tmp_cond->quick_fix_field();
+            }
+            tab->set_cond(tmp_cond);
+            /* orig_select_cond was merged, no need to restore original one. */
+            orig_select_cond= 0;
+          }
           table->file->ha_index_or_rnd_end();
           if (join->select_options & SELECT_DESCRIBE)
           {
@@ -13776,7 +15762,7 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
         saved_best_key_parts :  best_key_parts;
     }
     else
-      DBUG_RETURN(0); 
+      goto use_filesort; 
   } 
 
 check_reverse_order:                  
@@ -13799,7 +15785,7 @@ check_reverse_order:
         {
           tab->limit= 0;
           select->quick= save_quick;
-          DBUG_RETURN(0);                   // Use filesort
+          goto use_filesort;                   // Use filesort
         }
             
         /* ORDER BY range_key DESC */
@@ -13808,7 +15794,7 @@ check_reverse_order:
 	{
           select->quick= save_quick;
           tab->limit= 0;
-	  DBUG_RETURN(0);		// Reverse sort not supported
+	  goto use_filesort;		// Reverse sort not supported
 	}
 	select->set_quick(tmp);
       }
@@ -13827,8 +15813,14 @@ check_reverse_order:
     }
   }
   else if (select && select->quick)
-    select->quick->sorted= 1;
+    select->quick->need_sorted_output();
+  if (orig_select_cond)
+    tab->set_cond(orig_select_cond);
   DBUG_RETURN(1);
+use_filesort:
+  if (orig_select_cond)
+    tab->set_cond(orig_select_cond);
+  DBUG_RETURN(0);
 }
 
 
@@ -13915,7 +15907,7 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
         and in index_merge 'Only index' cannot be used
       */
       if (((uint) tab->ref.key != select->quick->index))
-        table->set_keyread(FALSE);
+        table->disable_keyread();
     }
     else
     {
@@ -13926,7 +15918,7 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
 	field, quick will contain an empty record set.
       */
       if (!(select->quick= (tab->type == JT_FT ?
-			    new FT_SELECT(thd, table, tab->ref.key) :
+			    get_ft_select(thd, table, tab->ref.key) :
 			    get_quick_select_for_ref(thd, table, &tab->ref, 
                                                      tab->found_records))))
 	goto err;
@@ -13965,19 +15957,18 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
     table->quick_keys.clear_all();  // as far as we cleanup select->quick
     table->sort.io_cache= tablesort_result_cache;
   }
-  tab->select_cond=0;
+  tab->set_select_cond(NULL, __LINE__);
   tab->last_inner= 0;
   tab->first_unmatched= 0;
   tab->type=JT_ALL;				// Read with normal read_record
   tab->read_first_record= join_init_read_record;
   tab->join->examined_rows+=examined_rows;
-  table->set_keyread(FALSE); // Restore if we used indexes
+  table->disable_keyread(); // Restore if we used indexes
   DBUG_RETURN(table->sort.found_records == HA_POS_ERROR);
 err:
   DBUG_RETURN(-1);
 }
 
-
 /*****************************************************************************
   Remove duplicates from tmp table
   This should be recoded to add a unique index to the table and remove
@@ -14080,8 +16071,10 @@ static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field,
   org_record=(char*) (record=table->record[0])+offset;
   new_record=(char*) table->record[1]+offset;
 
-  file->ha_rnd_init(1);
-  error=file->rnd_next(record);
+  if (file->ha_rnd_init_with_error(1))
+    DBUG_RETURN(1);
+
+  error= file->ha_rnd_next(record);
   for (;;)
   {
     if (thd->killed)
@@ -14094,7 +16087,7 @@ static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field,
     {
       if (error == HA_ERR_RECORD_DELETED)
       {
-        error= file->rnd_next(record);
+        error= file->ha_rnd_next(record);
         continue;
       }
       if (error == HA_ERR_END_OF_FILE)
@@ -14103,9 +16096,9 @@ static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field,
     }
     if (having && !having->val_int())
     {
-      if ((error=file->ha_delete_row(record)))
+      if ((error= file->ha_delete_row(record)))
 	goto err;
-      error=file->rnd_next(record);
+      error= file->ha_rnd_next(record);
       continue;
     }
     if (copy_blobs(first_field))
@@ -14120,7 +16113,7 @@ static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field,
     bool found=0;
     for (;;)
     {
-      if ((error=file->rnd_next(record)))
+      if ((error= file->ha_rnd_next(record)))
       {
 	if (error == HA_ERR_RECORD_DELETED)
 	  continue;
@@ -14130,19 +16123,20 @@ static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field,
       }
       if (compare_record(table, first_field) == 0)
       {
-	if ((error=file->ha_delete_row(record)))
+	if ((error= file->ha_delete_row(record)))
 	  goto err;
       }
       else if (!found)
       {
 	found=1;
-	file->position(record);	// Remember position
+        if ((error= file->remember_rnd_pos()))
+          goto err;
       }
     }
     if (!found)
       break;					// End of file
-    /* Restart search on next row */
-    error=file->restart_rnd_next(record,file->ref);
+    /* Restart search on saved row */
+    error=file->restart_rnd_next(record);
   }
 
   file->extra(HA_EXTRA_NO_CACHE);
@@ -14208,7 +16202,9 @@ static int remove_dup_with_hash_index(THD *thd, TABLE *table,
     DBUG_RETURN(1);
   }
 
-  file->ha_rnd_init(1);
+  if ((error= file->ha_rnd_init(1)))
+    goto err;
+
   key_pos=key_buffer;
   for (;;)
   {
@@ -14219,7 +16215,7 @@ static int remove_dup_with_hash_index(THD *thd, TABLE *table,
       error=0;
       goto err;
     }
-    if ((error=file->rnd_next(record)))
+    if ((error= file->ha_rnd_next(record)))
     {
       if (error == HA_ERR_RECORD_DELETED)
 	continue;
@@ -14229,7 +16225,7 @@ static int remove_dup_with_hash_index(THD *thd, TABLE *table,
     }
     if (having && !having->val_int())
     {
-      if ((error=file->ha_delete_row(record)))
+      if ((error= file->ha_delete_row(record)))
 	goto err;
       continue;
     }
@@ -14246,7 +16242,7 @@ static int remove_dup_with_hash_index(THD *thd, TABLE *table,
     if (my_hash_search(&hash, org_key_pos, key_length))
     {
       /* Duplicated found ; Remove the row */
-      if ((error=file->ha_delete_row(record)))
+      if ((error= file->ha_delete_row(record)))
 	goto err;
     }
     else
@@ -14312,274 +16308,52 @@ SORT_FIELD *make_unireg_sortorder(ORDER *order, uint *length,
 }
 
 
-/*****************************************************************************
-  Fill join cache with packed records
-  Records are stored in tab->cache.buffer and last record in
-  last record is stored with pointers to blobs to support very big
-  records
-******************************************************************************/
-
-static int
-join_init_cache(THD *thd,JOIN_TAB *tables,uint table_count)
-{
-  reg1 uint i;
-  uint length, blobs;
-  size_t size;
-  CACHE_FIELD *copy,**blob_ptr;
-  JOIN_CACHE  *cache;
-  JOIN_TAB *join_tab;
-  DBUG_ENTER("join_init_cache");
+/*
+  eq_ref: Create the lookup key and check if it is the same as saved key
 
-  cache= &tables[table_count].cache;
-  cache->fields=blobs=0;
 
-  join_tab=tables;
-  for (i=0 ; i < table_count ; i++,join_tab++)
-  {
-    if (!join_tab->used_fieldlength)		/* Not calced yet */
-      calc_used_field_length(thd, join_tab);
-    cache->fields+=join_tab->used_fields;
-    blobs+=join_tab->used_blobs;
-  }
-  if (!(cache->field=(CACHE_FIELD*)
-	sql_alloc(sizeof(CACHE_FIELD)*(cache->fields+table_count*2)+(blobs+1)*
 
-		  sizeof(CACHE_FIELD*))))
-  {
-    my_free(cache->buff);		/* purecov: inspected */
-    cache->buff=0;				/* purecov: inspected */
-    DBUG_RETURN(1);				/* purecov: inspected */
-  }
-  copy=cache->field;
-  blob_ptr=cache->blob_ptr=(CACHE_FIELD**)
-    (cache->field+cache->fields+table_count*2);
 
-  length=0;
-  for (i=0 ; i < table_count ; i++)
-  {
-    bool have_bit_fields= FALSE;
-    uint null_fields=0,used_fields;
-    Field **f_ptr,*field;
-    MY_BITMAP *read_set= tables[i].table->read_set;
-    for (f_ptr=tables[i].table->field,used_fields=tables[i].used_fields ;
-	 used_fields ;
-	 f_ptr++)
-    {
-      field= *f_ptr;
-      if (bitmap_is_set(read_set, field->field_index))
-      {
-	used_fields--;
-	length+=field->fill_cache_field(copy);
-	if (copy->type == CACHE_BLOB)
-	  (*blob_ptr++)=copy;
-	if (field->real_maybe_null())
-	  null_fields++;
-        if (field->type() == MYSQL_TYPE_BIT &&
-            ((Field_bit*)field)->bit_len)
-          have_bit_fields= TRUE;    
-	copy++;
-      }
-    }
-    /* Copy null bits from table */
-    if (null_fields || have_bit_fields)
-    {						/* must copy null bits */
-      copy->str= tables[i].table->null_flags;
-      copy->length= tables[i].table->s->null_bytes;
-      copy->type=0;
-      copy->field=0;
-      length+=copy->length;
-      copy++;
-      cache->fields++;
-    }
-    /* If outer join table, copy null_row flag */
-    if (tables[i].table->maybe_null)
-    {
-      copy->str= (uchar*) &tables[i].table->null_row;
-      copy->length=sizeof(tables[i].table->null_row);
-      copy->type=0;
-      copy->field=0;
-      length+=copy->length;
-      copy++;
-      cache->fields++;
-    }
-  }
-
-  cache->length=length+blobs*sizeof(char*);
-  cache->blobs=blobs;
-  *blob_ptr=0;					/* End sequentel */
-  size=max(thd->variables.join_buff_size, cache->length);
-  if (!(cache->buff=(uchar*) my_malloc(size,MYF(0))))
-    DBUG_RETURN(1);				/* Don't use cache */ /* purecov: inspected */
-  cache->end=cache->buff+size;
-  reset_cache_write(cache);
-  DBUG_RETURN(0);
-}
-
-
-static ulong
-used_blob_length(CACHE_FIELD **ptr)
-{
-  uint length,blob_length;
-  for (length=0 ; *ptr ; ptr++)
-  {
-    Field_blob *field_blob= (Field_blob *) (*ptr)->field;
-    (*ptr)->blob_length=blob_length= field_blob->get_length();
-    length+=blob_length;
-    field_blob->get_ptr(&(*ptr)->str);
-  }
-  return length;
-}
+  SYNOPSIS
+    cmp_buffer_with_ref()
+      tab      Join tab of the accessed table
+      table    The table to read.  This is usually tab->table, except for 
+               semi-join when we might need to make a lookup in a temptable
+               instead.
+      tab_ref  The structure with methods to collect index lookup tuple. 
+               This is usually table->ref, except for the case of when we're 
+               doing lookup into semi-join materialization table.
+
+  DESCRIPTION 
+    Used by eq_ref access method: create the index lookup key and check if 
+    we've used this key at previous lookup (If yes, we don't need to repeat
+    the lookup - the record has been already fetched)
 
+  RETURN 
+    TRUE   No cached record for the key, or failed to create the key (due to
+           out-of-domain error)
+    FALSE  The created key is the same as the previous one (and the record 
+           is already in table->record)
+*/
 
 static bool
-store_record_in_cache(JOIN_CACHE *cache)
+cmp_buffer_with_ref(THD *thd, TABLE *table, TABLE_REF *tab_ref)
 {
-  uint length;
-  uchar *pos;
-  CACHE_FIELD *copy,*end_field;
-  bool last_record;
-
-  pos=cache->pos;
-  end_field=cache->field+cache->fields;
-
-  length=cache->length;
-  if (cache->blobs)
-    length+=used_blob_length(cache->blob_ptr);
-  if ((last_record= (length + cache->length > (size_t) (cache->end - pos))))
-    cache->ptr_record=cache->records;
-
-  /*
-    There is room in cache. Put record there
-  */
-  cache->records++;
-  for (copy=cache->field ; copy < end_field; copy++)
+  bool no_prev_key;
+  if (!tab_ref->disable_cache)
   {
-    if (copy->type == CACHE_BLOB)
+    if (!(no_prev_key= tab_ref->key_err))
     {
-      Field_blob *blob_field= (Field_blob *) copy->field;
-      if (last_record)
-      {
-	blob_field->get_image(pos, copy->length+sizeof(char*), 
-                              blob_field->charset());
-	pos+=copy->length+sizeof(char*);
-      }
-      else
-      {
-	blob_field->get_image(pos, copy->length, // blob length
-                              blob_field->charset());
-	memcpy(pos+copy->length,copy->str,copy->blob_length);  // Blob data
-	pos+=copy->length+copy->blob_length;
-      }
-    }
-    else
-    {
-      if (copy->type == CACHE_STRIPPED)
-      {
-	uchar *str,*end;
-        Field *field= copy->field;
-        if (field && field->maybe_null() && field->is_null())
-          end= str= copy->str;
-        else
-          for (str=copy->str,end= str+copy->length;
-               end > str && end[-1] == ' ' ;
-               end--) ;
-	length=(uint) (end-str);
-	memcpy(pos+2, str, length);
-        int2store(pos, length);
-	pos+= length+2;
-      }
-      else
-      {
-	memcpy(pos,copy->str,copy->length);
-	pos+=copy->length;
-      }
+      /* Previous access found a row. Copy its key */
+      memcpy(tab_ref->key_buff2, tab_ref->key_buff, tab_ref->key_length);
     }
   }
-  cache->pos=pos;
-  return last_record || (size_t) (cache->end - pos) < cache->length;
-}
-
-
-static void
-reset_cache_read(JOIN_CACHE *cache)
-{
-  cache->record_nr=0;
-  cache->pos=cache->buff;
-}
-
-
-static void reset_cache_write(JOIN_CACHE *cache)
-{
-  reset_cache_read(cache);
-  cache->records= 0;
-  cache->ptr_record= (uint) ~0;
-}
-
-
-static void
-read_cached_record(JOIN_TAB *tab)
-{
-  uchar *pos;
-  uint length;
-  bool last_record;
-  CACHE_FIELD *copy,*end_field;
-
-  last_record=tab->cache.record_nr++ == tab->cache.ptr_record;
-  pos=tab->cache.pos;
-
-  for (copy=tab->cache.field,end_field=copy+tab->cache.fields ;
-       copy < end_field;
-       copy++)
-  {
-    if (copy->type == CACHE_BLOB)
-    {
-      Field_blob *blob_field= (Field_blob *) copy->field;
-      if (last_record)
-      {
-	blob_field->set_image(pos, copy->length+sizeof(char*),
-                              blob_field->charset());
-	pos+=copy->length+sizeof(char*);
-      }
-      else
-      {
-	blob_field->set_ptr(pos, pos+copy->length);
-	pos+=copy->length + blob_field->get_length();
-      }
-    }
-    else
-    {
-      if (copy->type == CACHE_STRIPPED)
-      {
-        length= uint2korr(pos);
-	memcpy(copy->str, pos+2, length);
-	memset(copy->str+length, ' ', copy->length-length);
-	pos+= 2 + length;
-      }
-      else
-      {
-	memcpy(copy->str,pos,copy->length);
-	pos+=copy->length;
-      }
-    }
-  }
-  tab->cache.pos=pos;
-  return;
-}
-
-
-static bool
-cmp_buffer_with_ref(JOIN_TAB *tab)
-{
-  bool diff;
-  if (!(diff=tab->ref.key_err))
-  {
-    memcpy(tab->ref.key_buff2, tab->ref.key_buff, tab->ref.key_length);
-  }
-  if ((tab->ref.key_err= cp_buffer_from_ref(tab->join->thd, tab->table,
-                                            &tab->ref)) ||
-      diff)
+  else 
+    no_prev_key= TRUE;
+  if ((tab_ref->key_err= cp_buffer_from_ref(thd, table, tab_ref)) ||
+      no_prev_key)
     return 1;
-  return memcmp(tab->ref.key_buff2, tab->ref.key_buff, tab->ref.key_length)
+  return memcmp(tab_ref->key_buff2, tab_ref->key_buff, tab_ref->key_length)
     != 0;
 }
 
@@ -14697,8 +16471,7 @@ find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
     /* Lookup the current GROUP field in the FROM clause. */
     order_item_type= order_item->type();
     from_field= (Field*) not_found_field;
-    if ((is_group_field &&
-        order_item_type == Item::FIELD_ITEM) ||
+    if ((is_group_field && order_item_type == Item::FIELD_ITEM) ||
         order_item_type == Item::REF_ITEM)
     {
       from_field= find_field_in_tables(thd, (Item_ident*) order_item, tables,
@@ -14758,30 +16531,12 @@ find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
     time.
 
     We check order_item->fixed because Item_func_group_concat can put
-    arguments for which fix_fields already was called.
-    
-    group_fix_field= TRUE is to resolve aliases from the SELECT list
-    without creating of Item_ref-s: JOIN::exec() wraps aliased items
-    in SELECT list with Item_copy items. To re-evaluate such a tree
-    that includes Item_copy items we have to refresh Item_copy caches,
-    but:
-      - filesort() never refresh Item_copy items,
-      - end_send_group() checks every record for group boundary by the
-        test_if_group_changed function that obtain data from these
-        Item_copy items, but the copy_fields function that
-        refreshes Item copy items is called after group boundaries only -
-        that is a vicious circle.
-    So we prevent inclusion of Item_copy items.
+    arguments for which fix_fields already was called.    
   */
-  bool save_group_fix_field= thd->lex->current_select->group_fix_field;
-  if (is_group_field)
-    thd->lex->current_select->group_fix_field= TRUE;
-  bool ret= (!order_item->fixed &&
+  if (!order_item->fixed &&
       (order_item->fix_fields(thd, order->item) ||
        (order_item= *order->item)->check_cols(1) ||
-       thd->is_fatal_error));
-  thd->lex->current_select->group_fix_field= save_group_fix_field;
-  if (ret)
+       thd->is_fatal_error))
     return TRUE; /* Wrong field. */
 
   uint el= all_fields.elements;
@@ -14853,6 +16608,8 @@ setup_group(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
   uint org_fields=all_fields.elements;
 
   thd->where="group statement";
+  enum_parsing_place save_place= thd->lex->current_select->parsing_place;
+  thd->lex->current_select->parsing_place= IN_GROUP_BY;
   for (ord= order; ord; ord= ord->next)
   {
     if (find_order_in_list(thd, ref_pointer_array, tables, ord, fields,
@@ -14865,6 +16622,8 @@ setup_group(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
       return 1;
     }
   }
+  thd->lex->current_select->parsing_place= save_place;
+
   if (thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY)
   {
     /*
@@ -15289,7 +17048,7 @@ alloc_group_fields(JOIN *join,ORDER *group)
   {
     for (; group ; group=group->next)
     {
-      Cached_item *tmp=new_Cached_item(join->thd, *group->item);
+      Cached_item *tmp=new_Cached_item(join->thd, *group->item, TRUE);
       if (!tmp || join->group_fields.push_front(tmp))
 	return TRUE;
     }
@@ -15299,6 +17058,38 @@ alloc_group_fields(JOIN *join,ORDER *group)
 }
 
 
+
+/*
+  Test if a single-row cache of items changed, and update the cache.
+
+  @details Test if a list of items that typically represents a result
+  row has changed. If the value of some item changed, update the cached
+  value for this item.
+  
+  @param list list of <item, cached_value> pairs stored as Cached_item.
+
+  @return -1 if no item changed
+  @return index of the first item that changed
+*/
+
+int test_if_item_cache_changed(List<Cached_item> &list)
+{
+  DBUG_ENTER("test_if_item_cache_changed");
+  List_iterator<Cached_item> li(list);
+  int idx= -1,i;
+  Cached_item *buff;
+
+  for (i=(int) list.elements-1 ; (buff=li++) ; i--)
+  {
+    if (buff->cmp())
+      idx=i;
+  }
+  DBUG_PRINT("info", ("idx: %d", idx));
+  DBUG_RETURN(idx);
+}
+
+
+
 static int
 test_if_group_changed(List<Cached_item> &list)
 {
@@ -15396,7 +17187,7 @@ setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
       pos= item;
       if (item->field->flags & BLOB_FLAG)
       {
-	if (!(pos= Item_copy::create(pos)))
+	if (!(pos= new Item_copy_string(pos)))
 	  goto err;
        /*
          Item_copy_string::copy for function can call 
@@ -15430,7 +17221,7 @@ setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
           DBUG_ASSERT (param->field_count > (uint) (copy - copy_start));
           copy->set(tmp, item->result_field);
           item->result_field->move_field(copy->to_ptr,copy->to_null_ptr,1);
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
           copy->to_ptr[copy->from_length]= 0;
 #endif
           copy++;
@@ -15439,6 +17230,8 @@ setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
     }
     else if ((real_pos->type() == Item::FUNC_ITEM ||
 	      real_pos->type() == Item::SUBSELECT_ITEM ||
+              (real_pos->get_cached_item() &&
+               real_pos->get_cached_item()->type() == Item::SUBSELECT_ITEM) ||
 	      real_pos->type() == Item::CACHE_ITEM ||
 	      real_pos->type() == Item::COND_ITEM) &&
 	     !real_pos->with_sum_func)
@@ -15450,7 +17243,7 @@ setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
 	 on how the value is to be used: In some cases this may be an
 	 argument in a group function, like: IF(ISNULL(col),0,COUNT(*))
       */
-      if (!(pos= Item_copy::create(pos)))
+      if (!(pos=new Item_copy_string(pos)))
 	goto err;
       if (i < border)                           // HAVING, ORDER and GROUP BY
       {
@@ -15503,8 +17296,8 @@ copy_fields(TMP_TABLE_PARAM *param)
     (*ptr->do_copy)(ptr);
 
   List_iterator_fast<Item> it(param->copy_funcs);
-  Item_copy *item;
-  while ((item = (Item_copy*) it++))
+  Item_copy_string *item;
+  while ((item = (Item_copy_string*) it++))
     item->copy();
 }
 
@@ -15917,16 +17710,21 @@ static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab)
     DBUG_RETURN(TRUE);
 
   if (!cond->fixed)
-    cond->fix_fields(thd, (Item**)&cond);
+  {
+    Item *tmp_item= (Item*) cond;
+    cond->fix_fields(thd, &tmp_item);
+    DBUG_ASSERT(cond == tmp_item);
+  }
   if (join_tab->select)
   {
     if (join_tab->select->cond)
       error=(int) cond->add(join_tab->select->cond);
-    join_tab->select_cond=join_tab->select->cond=cond;
+    join_tab->select->cond= cond;
+    join_tab->set_select_cond(cond, __LINE__);
   }
   else if ((join_tab->select= make_select(join_tab->table, 0, 0, cond, 0,
                                           &error)))
-    join_tab->select_cond=cond;
+    join_tab->set_select_cond(cond, __LINE__);
 
   DBUG_RETURN(error ? TRUE : FALSE);
 }
@@ -16381,8 +18179,10 @@ int JOIN::rollup_write_data(uint idx, TABLE *table_arg)
       copy_sum_funcs(sum_funcs_end[i+1], sum_funcs_end[i]);
       if ((write_error= table_arg->file->ha_write_row(table_arg->record[0])))
       {
-	if (create_myisam_from_heap(thd, table_arg, &tmp_table_param,
-                                    write_error, 0))
+	if (create_internal_tmp_table_from_heap(thd, table_arg, 
+                                                tmp_table_param.start_recinfo,
+                                                &tmp_table_param.recinfo,
+                                                write_error, 0))
 	  return 1;		     
       }
     }
@@ -16464,7 +18264,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
       the UNION to provide precise EXPLAIN information will hardly be
       appreciated :)
     */
-    char table_name_buffer[NAME_LEN];
+    char table_name_buffer[SAFE_NAME_LEN];
     item_list.empty();
     /* id */
     item_list.push_back(new Item_null);
@@ -16528,7 +18328,15 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
   else
   {
     table_map used_tables=0;
-    for (uint i=0 ; i < join->tables ; i++)
+
+    uchar sjm_nests[MAX_TABLES];
+    uint sjm_nests_cur=0;
+    uint sjm_nests_end= 0;
+    uint end_table= join->tables;
+    bool printing_materialize_nest= FALSE;
+    uint select_id= join->select_lex->select_number;
+
+    for (uint i=0 ; i < end_table ; i++)
     {
       JOIN_TAB *tab=join->join_tab+i;
       TABLE *table=tab->table;
@@ -16536,8 +18344,9 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
       char buff[512]; 
       char buff1[512], buff2[512], buff3[512];
       char keylen_str_buf[64];
+      my_bool key_read;
       String extra(buff, sizeof(buff),cs);
-      char table_name_buffer[NAME_LEN];
+      char table_name_buffer[SAFE_NAME_LEN];
       String tmp1(buff1,sizeof(buff1),cs);
       String tmp2(buff2,sizeof(buff2),cs);
       String tmp3(buff3,sizeof(buff3),cs);
@@ -16545,16 +18354,101 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
       tmp1.length(0);
       tmp2.length(0);
       tmp3.length(0);
-
       quick_type= -1;
+
+      /* Don't show eliminated tables */
+      if (table->map & join->eliminated_tables)
+      {
+        used_tables|=table->map;
+        continue;
+      }
+
       item_list.empty();
       /* id */
-      item_list.push_back(new Item_uint((uint32)
-				       join->select_lex->select_number));
+      item_list.push_back(new Item_uint((uint32)select_id));
       /* select_type */
-      item_list.push_back(new Item_string(join->select_lex->type,
-					  strlen(join->select_lex->type),
-					  cs));
+      const char* stype= printing_materialize_nest? "SUBQUERY" : 
+                                                    join->select_lex->type;
+      item_list.push_back(new Item_string(stype, strlen(stype), cs));
+      
+      /* 
+        Special processing for SJ-Materialization nests: print the fake table
+        and delay printing of the SJM nest contents until later.
+      */
+      uint sj_strategy= join->best_positions[i].sj_strategy;
+      if (sj_is_materialize_strategy(sj_strategy) &&
+          !printing_materialize_nest)
+      {
+        /* table */
+        int len= my_snprintf(table_name_buffer, 
+                             sizeof(table_name_buffer)-1,
+                             "subselect%d", 
+                             tab->emb_sj_nest->sj_subq_pred->get_identifier());
+	item_list.push_back(new Item_string(table_name_buffer, len, cs));
+        /* partitions */
+        if (join->thd->lex->describe & DESCRIBE_PARTITIONS)
+          item_list.push_back(item_null);
+        /* type */
+        uint type= (sj_strategy == SJ_OPT_MATERIALIZE_SCAN)? JT_ALL : JT_EQ_REF;
+        item_list.push_back(new Item_string(join_type_str[type],
+                                            strlen(join_type_str[type]),
+                                            cs));
+        /* possible_keys */
+	item_list.push_back(new Item_string("unique_key", 
+                                            strlen("unique_key"), cs));
+        if (sj_strategy == SJ_OPT_MATERIALIZE_SCAN)
+        {
+          item_list.push_back(item_null); /* key */
+          item_list.push_back(item_null); /* key_len */
+          item_list.push_back(item_null); /* ref */
+        }
+        else
+        {
+          /* key */
+          item_list.push_back(new Item_string("unique_key", strlen("unique_key"), cs));
+          /* key_len */
+          uint klen= tab->emb_sj_nest->sj_mat_info->table->key_info[0].key_length;
+          uint buflen= longlong10_to_str(klen, keylen_str_buf, 10) - keylen_str_buf;
+          item_list.push_back(new Item_string(keylen_str_buf, buflen, cs));
+          /* ref */
+          item_list.push_back(new Item_string("func", strlen("func"), cs));
+        }
+        /* rows */
+        ha_rows rows= (sj_strategy == SJ_OPT_MATERIALIZE_SCAN)?
+                       tab->emb_sj_nest->sj_mat_info->rows : 1;
+        item_list.push_back(new Item_int((longlong)rows, 
+                                         MY_INT64_NUM_DECIMAL_DIGITS));
+        /* filtered */
+        if (join->thd->lex->describe & DESCRIBE_EXTENDED)
+          item_list.push_back(new Item_float(1.0, 2));
+        
+        /* Extra */
+	if (need_tmp_table)
+	{
+	  need_tmp_table=0;
+	  extra.append(STRING_WITH_LEN("; Using temporary"));
+	}
+	if (need_order)
+	{
+	  need_order=0;
+	  extra.append(STRING_WITH_LEN("; Using filesort"));
+	}
+        /* Skip initial "; "*/
+        const char *str= extra.ptr();
+        uint32 extra_len= extra.length();
+        if (extra_len)
+        {
+          str += 2;
+          extra_len -= 2;
+        }
+	item_list.push_back(new Item_string(str, extra_len, cs));
+
+        /* Register the nest for further processing: */
+        sjm_nests[sjm_nests_end++]= i;
+        i += join->best_positions[i].n_sj_tables-1;
+        goto loop_end;
+      }
+
       if (tab->type == JT_ALL && tab->select && tab->select->quick)
       {
         quick_type= tab->select->quick->get_type();
@@ -16565,6 +18459,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
         else
 	  tab->type = JT_RANGE;
       }
+
       /* table */
       if (table->derived_select_number)
       {
@@ -16633,8 +18528,8 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
 	item_list.push_back(new Item_string(key_info->name,
 					    strlen(key_info->name),
 					    system_charset_info));
-        length= longlong2str(tab->ref.key_length, keylen_str_buf, 10) - 
-                keylen_str_buf;
+        length= (longlong10_to_str(tab->ref.key_length, keylen_str_buf, 10) - 
+                 keylen_str_buf);
         item_list.push_back(new Item_string(keylen_str_buf, length,
                                             system_charset_info));
 	for (store_key **ref=tab->ref.key_copy ; *ref ; ref++)
@@ -16652,8 +18547,8 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
         register uint length;
 	item_list.push_back(new Item_string(key_info->name,
 					    strlen(key_info->name),cs));
-        length= longlong2str(key_info->key_length, keylen_str_buf, 10) - 
-                keylen_str_buf;
+        length= (longlong10_to_str(key_info->key_length, keylen_str_buf, 10) - 
+                 keylen_str_buf);
         item_list.push_back(new Item_string(keylen_str_buf, 
                                             length,
                                             system_charset_info));
@@ -16731,6 +18626,17 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
         /* Add "filtered" field to item_list. */
         if (join->thd->lex->describe & DESCRIBE_EXTENDED)
         {
+          /*
+            psergey-todo: 
+              in the code above, we cast to integer when asssigning to
+              examined_rows. 
+              In the code below, we may divide original value but result of
+              conversion of the same value to integer, which may produce a
+              value that's greater than 100%, which looks very odd.
+              I'm not fixing this right away because that might trigger a wave
+              of small EXPLAIN EXTENDED output changes, which I don't have time
+              to deal with right now.
+          */
           float f= 0.0; 
           if (examined_rows)
             f= (float) (100.0 * join->best_positions[i].records_read /
@@ -16740,7 +18646,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
       }
 
       /* Build "Extra" field and add it to item_list. */
-      my_bool key_read=table->key_read;
+      key_read=table->key_read;
       if ((tab->type == JT_NEXT || tab->type == JT_CONST) &&
           table->covering_keys.is_set(tab->index))
 	key_read=1;
@@ -16770,6 +18676,18 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
       }
       else
       {
+        uint keyno= MAX_KEY;
+        if (tab->ref.key_parts)
+          keyno= tab->ref.key;
+        else if (tab->select && tab->select->quick)
+          keyno = tab->select->quick->index;
+
+        if (keyno != MAX_KEY && keyno == table->file->pushed_idx_cond_keyno &&
+            table->file->pushed_idx_cond)
+          extra.append(STRING_WITH_LEN("; Using index condition"));
+        else if (tab->cache_idx_cond)
+          extra.append(STRING_WITH_LEN("; Using index condition(BKA)"));
+
         if (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || 
             quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT ||
             quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
@@ -16839,6 +18757,14 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
         }
 	if (table->reginfo.not_exists_optimize)
 	  extra.append(STRING_WITH_LEN("; Not exists"));
+
+        if (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE &&
+            !(((QUICK_RANGE_SELECT*)(tab->select->quick))->mrr_flags &
+             HA_MRR_USE_DEFAULT_IMPL))
+        {
+	  extra.append(STRING_WITH_LEN("; Using MRR"));
+        }
+
 	if (need_tmp_table)
 	{
 	  need_tmp_table=0;
@@ -16851,6 +18777,56 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
 	}
 	if (distinct & test_all_bits(used_tables,thd->used_tables))
 	  extra.append(STRING_WITH_LEN("; Distinct"));
+        if (tab->loosescan_match_tab)
+        {
+          extra.append(STRING_WITH_LEN("; LooseScan"));
+        }
+
+        if (tab->flush_weedout_table)
+          extra.append(STRING_WITH_LEN("; Start temporary"));
+        if (tab->check_weed_out_table)
+          extra.append(STRING_WITH_LEN("; End temporary"));
+        else if (tab->do_firstmatch)
+        {
+          if (tab->do_firstmatch == join->join_tab - 1)
+            extra.append(STRING_WITH_LEN("; FirstMatch"));
+          else
+          {
+            extra.append(STRING_WITH_LEN("; FirstMatch("));
+            TABLE *prev_table=tab->do_firstmatch->table;
+            if (prev_table->derived_select_number)
+            {
+              char namebuf[NAME_LEN];
+              /* Derived table name generation */
+              int len= my_snprintf(namebuf, sizeof(namebuf)-1,
+                                   "<derived%u>",
+                                   prev_table->derived_select_number);
+              extra.append(namebuf, len);
+            }
+            else
+              extra.append(prev_table->pos_in_table_list->alias);
+            extra.append(STRING_WITH_LEN(")"));
+          }
+        }
+
+        /*
+        if (sj_is_materialize_strategy(sj_strategy))
+        {
+          if (join->best_positions[i].n_sj_tables == 1)
+            extra.append(STRING_WITH_LEN("; Materialize"));
+          else
+          {
+            last_sjm_table= i + join->best_positions[i].n_sj_tables - 1;
+            extra.append(STRING_WITH_LEN("; Start materialize"));
+          }
+          if (sj_strategy == SJ_OPT_MATERIALIZE_SCAN)
+              extra.append(STRING_WITH_LEN("; Scan"));
+        }
+        else if (last_sjm_table == i)
+        {
+          extra.append(STRING_WITH_LEN("; End materialize"));
+        }
+        */
 
         for (uint part= 0; part < tab->ref.key_parts; part++)
         {
@@ -16860,6 +18836,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
             break;
           }
         }
+
         if (i > 0 && tab[-1].next_select == sub_select_cache)
           extra.append(STRING_WITH_LEN("; Using join buffer"));
         
@@ -16873,6 +18850,15 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
         }
 	item_list.push_back(new Item_string(str, len, cs));
       }
+    loop_end:
+       if (i+1 == end_table && sjm_nests_cur != sjm_nests_end)
+       {
+         printing_materialize_nest= TRUE;
+         i= sjm_nests[sjm_nests_cur++] - 1;
+         end_table= (i+1) + join->best_positions[i+1].n_sj_tables;
+         select_id= join->join_tab[i+1].emb_sj_nest->sj_subq_pred->get_identifier();
+       }
+      
       // For next iteration
       used_tables|=table->map;
       if (result->send_data(item_list))
@@ -16883,8 +18869,31 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
        unit;
        unit= unit->next_unit())
   {
-    if (mysql_explain_union(thd, unit, result))
-      DBUG_VOID_RETURN;
+    /*
+      This fix_fields() call is to handle an edge case like this:
+       
+        SELECT ... UNION SELECT ... ORDER BY (SELECT ...)
+      
+      for such queries, we'll get here before having called
+      subquery_expr->fix_fields(), which will cause failure to
+    */
+    if (unit->item && !unit->item->fixed)
+    {
+      Item *ref= unit->item;
+      if (unit->item->fix_fields(thd, &ref))
+        DBUG_VOID_RETURN;
+      DBUG_ASSERT(ref == unit->item);
+    }
+
+    /* 
+      Display subqueries only if they are not parts of eliminated WHERE/ON
+      clauses.
+    */
+    if (!(unit->item && unit->item->eliminated))
+    {
+      if (mysql_explain_union(thd, unit, result))
+        DBUG_VOID_RETURN;
+    }
   }
   DBUG_VOID_RETURN;
 }
@@ -16925,7 +18934,6 @@ bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, select_result *result)
     unit->fake_select_lex->options|= SELECT_DESCRIBE;
     if (!(res= unit->prepare(thd, result, SELECT_NO_UNLOCK | SELECT_DESCRIBE)))
       res= unit->exec();
-    res|= unit->cleanup();
   }
   else
   {
@@ -16948,6 +18956,53 @@ bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, select_result *result)
 }
 
 
+static void print_table_array(THD *thd, 
+                              table_map eliminated_tables,
+                              String *str, TABLE_LIST **table, 
+                              TABLE_LIST **end,
+                              enum_query_type query_type)
+{
+  (*table)->print(thd, eliminated_tables, str, query_type);
+
+  for (TABLE_LIST **tbl= table + 1; tbl < end; tbl++)
+  {
+    TABLE_LIST *curr= *tbl;
+    
+    /*
+      The "eliminated_tables &&" check guards againist the case of 
+      printing the query for CREATE VIEW. We do that without having run 
+      JOIN::optimize() and so will have nested_join->used_tables==0.
+    */
+    if (eliminated_tables &&
+        ((curr->table && (curr->table->map & eliminated_tables)) ||
+         (curr->nested_join && !(curr->nested_join->used_tables &
+                                ~eliminated_tables))))
+    {
+      continue;
+    }
+
+    if (curr->outer_join)
+    {
+      /* MySQL converts right to left joins */
+      str->append(STRING_WITH_LEN(" left join "));
+    }
+    else if (curr->straight)
+      str->append(STRING_WITH_LEN(" straight_join "));
+    else if (curr->sj_inner_tables)
+      str->append(STRING_WITH_LEN(" semi join "));
+    else
+      str->append(STRING_WITH_LEN(" join "));
+    curr->print(thd, eliminated_tables, str, query_type);
+    if (curr->on_expr)
+    {
+      str->append(STRING_WITH_LEN(" on("));
+      curr->on_expr->print(str, query_type);
+      str->append(')');
+    }
+  }
+}
+
+
 /**
   Print joins from the FROM clause.
 
@@ -16958,6 +19013,7 @@ bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, select_result *result)
 */
 
 static void print_join(THD *thd,
+                       table_map eliminated_tables,
                        String *str,
                        List<TABLE_LIST> *tables,
                        enum_query_type query_type)
@@ -16990,32 +19046,36 @@ static void print_join(THD *thd,
   }
 
   DBUG_ASSERT(tables->elements >= 1);
-  (*table)->print(thd, str, query_type);
-
-  TABLE_LIST **end= table + non_const_tables;
-  for (TABLE_LIST **tbl= table + 1; tbl < end; tbl++)
+  /*
+    Assert that the first table in the list isn't eliminated. This comes from
+    the fact that the first table can't be inner table of an outer join.
+  */
+  DBUG_ASSERT(!eliminated_tables || 
+              !(((*table)->table && ((*table)->table->map & eliminated_tables)) ||
+                ((*table)->nested_join && !((*table)->nested_join->used_tables &
+                                           ~eliminated_tables))));
+  /* 
+    If the first table is a semi-join nest, swap it with something that is
+    not a semi-join nest.
+  */
+  if ((*table)->sj_inner_tables)
   {
-    TABLE_LIST *curr= *tbl;
-    if (curr->outer_join)
+    TABLE_LIST **end= table + tables->elements;
+    for (TABLE_LIST **t2= table; t2!=end; t2++)
     {
-      /* MySQL converts right to left joins */
-      str->append(STRING_WITH_LEN(" left join "));
-    }
-    else if (curr->straight)
-      str->append(STRING_WITH_LEN(" straight_join "));
-    else
-      str->append(STRING_WITH_LEN(" join "));
-    curr->print(thd, str, query_type);
-    if (curr->on_expr)
-    {
-      str->append(STRING_WITH_LEN(" on("));
-      curr->on_expr->print(str, query_type);
-      str->append(')');
+      if (!(*t2)->sj_inner_tables)
+      {
+        TABLE_LIST *tmp= *t2;
+        *t2= *table;
+        *table= tmp;
+        break;
+      }
     }
   }
+  print_table_array(thd, eliminated_tables, str, table, 
+                    table +  non_const_tables, query_type);
 }
 
-
 /**
   @brief Print an index hint
 
@@ -17059,12 +19119,13 @@ Index_hint::print(THD *thd, String *str)
   @param str   string where table should be printed
 */
 
-void TABLE_LIST::print(THD *thd, String *str, enum_query_type query_type)
+void TABLE_LIST::print(THD *thd, table_map eliminated_tables, String *str, 
+                       enum_query_type query_type)
 {
   if (nested_join)
   {
     str->append('(');
-    print_join(thd, str, &nested_join->join_list, query_type);
+    print_join(thd, eliminated_tables, str, &nested_join->join_list, query_type);
     str->append(')');
   }
   else
@@ -17195,7 +19256,7 @@ void st_select_lex::print(THD *thd, String *str, enum_query_type query_type)
     else
       str->append(',');
 
-    if (master_unit()->item && item->is_autogenerated_name)
+    if (is_subquery_function() && item->is_autogenerated_name)
     {
       /*
         Do not print auto-generated aliases in subqueries. It has no purpose
@@ -17215,7 +19276,7 @@ void st_select_lex::print(THD *thd, String *str, enum_query_type query_type)
   {
     str->append(STRING_WITH_LEN(" from "));
     /* go through join tree */
-    print_join(thd, str, &top_join_list, query_type);
+    print_join(thd, join? join->eliminated_tables: 0, str, &top_join_list, query_type);
   }
   else if (where)
   {
@@ -17482,9 +19543,47 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
             key (e.g. as in Innodb). 
             See Bug #28591 for details.
           */  
-          rec_per_key= used_key_parts &&
-                       used_key_parts <= keyinfo->key_parts ?
-                       keyinfo->rec_per_key[used_key_parts-1] : 1;
+          uint used_index_parts= keyinfo->key_parts;
+          uint used_pk_parts= 0;
+          if (used_key_parts > used_index_parts)
+            used_pk_parts= used_key_parts-used_index_parts;
+          rec_per_key= keyinfo->rec_per_key[used_key_parts-1];
+          /* Take into account the selectivity of the used pk prefix */
+          if (used_pk_parts)
+	  {
+            KEY *pkinfo=tab->table->key_info+table->s->primary_key;
+            /*
+              If the values of of records per key for the prefixes
+              of the primary key are considered unknown we assume
+              they are equal to 1.
+	    */
+            if (used_key_parts == pkinfo->key_parts ||
+                pkinfo->rec_per_key[0] == 0)
+              rec_per_key= 1;                 
+            if (rec_per_key > 1)
+	    {
+              rec_per_key*= pkinfo->rec_per_key[used_pk_parts-1];
+              rec_per_key/= pkinfo->rec_per_key[0];
+              /* 
+                The value of rec_per_key for the extended key has
+                to be adjusted accordingly if some components of
+                the secondary key are included in the primary key.
+	      */
+               for(uint i= 0; i < used_pk_parts; i++)
+	      {
+	        if (pkinfo->key_part[i].field->key_start.is_set(nr))
+	        {
+                  /* 
+                    We presume here that for any index rec_per_key[i] != 0
+                    if rec_per_key[0] != 0.
+	          */
+                  DBUG_ASSERT(pkinfo->rec_per_key[i]);
+                  rec_per_key*= pkinfo->rec_per_key[i-1];
+                  rec_per_key/= pkinfo->rec_per_key[i];
+                }
+	      }
+            }    
+          }
           set_if_bigger(rec_per_key, 1);
           /*
             With a grouping query each group containing on average
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 93885e23f76..1c24ed37581 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -17,6 +17,8 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
 
 
+#ifndef SQL_SELECT_INCLUDED
+#define SQL_SELECT_INCLUDED
 /**
   @file
 
@@ -35,6 +37,16 @@
 #include "opt_range.h"                /* SQL_SELECT, QUICK_SELECT_I */
 
 
+#if defined(WITH_ARIA_STORAGE_ENGINE) && defined(USE_MARIA_FOR_TMP_TABLES)
+#include "../storage/maria/ha_maria.h"
+#define TMP_ENGINE_HTON maria_hton
+#else
+#define TMP_ENGINE_HTON myisam_hton
+#endif
+/* Values in optimize */
+#define KEY_OPTIMIZE_EXISTS		1
+#define KEY_OPTIMIZE_REF_OR_NULL	2
+
 typedef struct keyuse_t {
   TABLE *table;
   Item	*val;				/**< or value if no field */
@@ -58,6 +70,11 @@ typedef struct keyuse_t {
     NULL  - Otherwise (the source equality can't be turned off)
   */
   bool *cond_guard;
+  /*
+     0..64    <=> This was created from semi-join IN-equality # sj_pred_no.
+     MAX_UINT  Otherwise
+  */
+  uint         sj_pred_no;
 } KEYUSE;
 
 class store_key;
@@ -99,32 +116,17 @@ typedef struct st_table_ref
     in the join.
   */
   ha_rows       use_count;
-} TABLE_REF;
-
-
-
-#define CACHE_BLOB      1        /* blob field  */
-#define CACHE_STRIPPED  2        /* field stripped of trailing spaces */
-
-/**
-  CACHE_FIELD and JOIN_CACHE is used on full join to cache records in outer
-  table
-*/
 
-typedef struct st_cache_field {
-  uchar *str;
-  uint length, blob_length;
-  Field *field;
-  uint type;    /**< category of the of the copied field (CACHE_BLOB et al.) */
-} CACHE_FIELD;
+  /*
+    TRUE <=> disable the "cache" as doing lookup with the same key value may
+    produce different results (because of Index Condition Pushdown)
 
+  */
+  bool          disable_cache;
 
-typedef struct st_join_cache {
-  uchar *buff,*pos,*end;
-  uint records,record_nr,ptr_record,fields,length,blobs;
-  CACHE_FIELD *field,**blob_ptr;
-  SQL_SELECT *select;
-} JOIN_CACHE;
+  bool tmp_table_index_lookup_init(THD *thd, KEY *tmp_key, Item_iterator &it,
+                                   bool value);
+} TABLE_REF;
 
 
 /*
@@ -153,15 +155,27 @@ enum enum_nested_loop_state
 typedef enum_nested_loop_state
 (*Next_select_func)(JOIN *, struct st_join_table *, bool);
 Next_select_func setup_end_select_func(JOIN *join);
+int rr_sequential(READ_RECORD *info);
+
 
+class JOIN_CACHE;
+class SJ_TMP_TABLE;
 
 typedef struct st_join_table {
   st_join_table() {}                          /* Remove gcc warning */
   TABLE		*table;
   KEYUSE	*keyuse;			/**< pointer to first used key */
   SQL_SELECT	*select;
-  COND		*select_cond;
+  COND          *select_cond;
   QUICK_SELECT_I *quick;
+  /* 
+    The value of select_cond before we've attempted to do Index Condition
+    Pushdown. We may need to restore everything back if we first choose one
+    index but then reconsider (see test_if_skip_sort_order() for such
+    scenarios).
+    NULL means no index condition pushdown was performed.
+  */
+  Item          *pre_idx_push_select_cond;
   Item	       **on_expr_ref;   /**< pointer to the associated on expression   */
   COND_EQUAL    *cond_equal;    /**< multiple equalities for the on expression */
   st_join_table *first_inner;   /**< first inner table for including outerjoin */
@@ -213,6 +227,9 @@ typedef struct st_join_table {
   uint		use_quick,index;
   uint		status;				///< Save status for cache
   uint		used_fields,used_fieldlength,used_blobs;
+  uint          used_null_fields;
+  uint          used_rowid_fields;
+  uint          used_uneven_bit_fields;
   enum join_type type;
   bool		cached_eq_ref_table,eq_ref_table,not_used_in_distinct;
   bool		sorted;
@@ -223,11 +240,70 @@ typedef struct st_join_table {
   */ 
   ha_rows       limit; 
   TABLE_REF	ref;
-  JOIN_CACHE	cache;
+  bool          use_join_cache;
+  JOIN_CACHE	*cache;
+  /*
+    Index condition for BKA access join
+  */
+  Item          *cache_idx_cond;
+  SQL_SELECT    *cache_select;
   JOIN		*join;
-  /** Bitmap of nested joins this table is part of */
+  /*
+    Embedding SJ-nest (may be not the direct parent), or NULL if none.
+    This variable holds the result of table pullout.
+  */
+  TABLE_LIST    *emb_sj_nest;
+
+  /* FirstMatch variables (final QEP) */
+  struct st_join_table *first_sj_inner_tab;
+  struct st_join_table *last_sj_inner_tab;
+
+  /* Variables for semi-join duplicate elimination */
+  SJ_TMP_TABLE  *flush_weedout_table;
+  SJ_TMP_TABLE  *check_weed_out_table;
+  
+  /*
+    If set, means we should stop join enumeration after we've got the first
+    match and return to the specified join tab. May point to
+    join->join_tab[-1] which means stop join execution after the first
+    match.
+  */
+  struct st_join_table  *do_firstmatch;
+ 
+  /* 
+     ptr  - We're doing a LooseScan, this join tab is the first (i.e. 
+            "driving") join tab), and ptr points to the last join tab
+            handled by the strategy. loosescan_match_tab->found_match
+            should be checked to see if the current value group had a match.
+     NULL - Not doing a loose scan on this join tab.
+  */
+  struct st_join_table *loosescan_match_tab;
+
+  /* Buffer to save index tuple to be able to skip duplicates */
+  uchar *loosescan_buf;
+  
+  /* Length of key tuple (depends on #keyparts used) to store in the above */
+  uint loosescan_key_len;
+
+  /* Used by LooseScan. TRUE<=> there has been a matching record combination */
+  bool found_match;
+  
+  /*
+    Used by DuplicateElimination. tab->table->ref must have the rowid
+    whenever we have a current record.
+  */
+  int  keep_current_rowid;
+
+  /* NestedOuterJoins: Bitmap of nested joins this table is part of */
   nested_join_map embedding_map;
 
+  /*
+    Semi-join strategy to be used for this join table. This is a copy of
+    POSITION::sj_strategy field. This field is set up by the
+    fix_semijion_strategies_for_picked_join_order.
+  */
+  uint sj_strategy;
+
   void cleanup();
   inline bool is_using_loose_index_scan()
   {
@@ -240,12 +316,929 @@ typedef struct st_join_table {
     return (is_using_loose_index_scan() &&
             ((QUICK_GROUP_MIN_MAX_SELECT *)select->quick)->is_agg_distinct());
   }
+  bool check_rowid_field()
+  {
+    if (keep_current_rowid && !used_rowid_fields)
+    {
+      used_rowid_fields= 1;
+      used_fieldlength+= table->file->ref_length;
+    }
+    return test(used_rowid_fields);
+  }
+  bool is_inner_table_of_semi_join_with_first_match()
+  {
+    return first_sj_inner_tab != NULL;
+  }
+  bool is_inner_table_of_outer_join()
+  {
+    return first_inner != NULL;
+  }
+  bool is_single_inner_of_semi_join_with_first_match()
+  {
+    return first_sj_inner_tab == this && last_sj_inner_tab == this;            
+  }
+  bool is_single_inner_of_outer_join()
+  {
+    return first_inner == this && first_inner->last_inner == this;
+  }
+  bool is_first_inner_for_outer_join()
+  {
+    return first_inner && first_inner == this;
+  }
+  bool use_match_flag()
+  {
+    return is_first_inner_for_outer_join() || first_sj_inner_tab == this ; 
+  }
+  bool check_only_first_match()
+  {
+    return is_inner_table_of_semi_join_with_first_match() ||
+           (is_inner_table_of_outer_join() &&
+            table->reginfo.not_exists_optimize);
+  }
+  bool is_last_inner_table()
+  {
+    return (first_inner && first_inner->last_inner == this) ||
+           last_sj_inner_tab == this;
+  }
+  struct st_join_table *get_first_inner_table()
+  {
+    if (first_inner)
+      return first_inner;
+    return first_sj_inner_tab; 
+  }
+  void set_select_cond(COND *to, uint line)
+  {
+    DBUG_PRINT("info", ("select_cond changes %p -> %p at line %u tab %p",
+                        select_cond, to, line, this));
+    select_cond= to;
+  }
+  COND *set_cond(COND *new_cond)
+  {
+    COND *tmp_select_cond= select_cond;
+    set_select_cond(new_cond, __LINE__);
+    if (select)
+      select->cond= new_cond;
+    return tmp_select_cond;
+  }
 } JOIN_TAB;
 
+
+/* 
+  Categories of data fields of variable length written into join cache buffers.
+  The value of any of these fields is written into cache together with the
+  prepended length of the value.     
+*/
+#define CACHE_BLOB      1        /* blob field  */
+#define CACHE_STRIPPED  2        /* field stripped of trailing spaces */
+#define CACHE_VARSTR1   3        /* short string value (length takes 1 byte) */ 
+#define CACHE_VARSTR2   4        /* long string value (length takes 2 bytes) */
+
+/*
+  The CACHE_FIELD structure used to describe fields of records that
+  are written into a join cache buffer from record buffers and backward.
+*/
+typedef struct st_cache_field {
+  uchar *str;   /**< buffer from/to where the field is to be copied */ 
+  uint length;  /**< maximal number of bytes to be copied from/to str */
+  /* 
+    Field object for the moved field
+    (0 - for a flag field, see JOIN_CACHE::create_flag_fields).
+  */
+  Field *field;
+  uint type;    /**< category of the of the copied field (CACHE_BLOB et al.) */
+  /* 
+    The number of the record offset value for the field in the sequence
+    of offsets placed after the last field of the record. These
+    offset values are used to access fields referred to from other caches.
+    If the value is 0 then no offset for the field is saved in the
+    trailing sequence of offsets.
+  */ 
+  uint referenced_field_no; 
+  /* The remaining structure fields are used as containers for temp values */
+  uint blob_length; /**< length of the blob to be copied */
+  uint offset;      /**< field offset to be saved in cache buffer */
+} CACHE_FIELD;
+
+
+/*
+  JOIN_CACHE is the base class to support the implementations of both
+  Blocked-Based Nested Loops (BNL) Join Algorithm and Batched Key Access (BKA)
+  Join Algorithm. The first algorithm is supported by the derived class
+  JOIN_CACHE_BNL, while the second algorithm is supported by the derived
+  class JOIN_CACHE_BKA.
+  These two algorithms have a lot in common. Both algorithms first
+  accumulate the records of the left join operand in a join buffer and
+  then search for matching rows of the second operand for all accumulated
+  records.
+  For the first algorithm this strategy saves on logical I/O operations:
+  the entire set of records from the join buffer requires only one look-through
+  the records provided by the second operand. 
+  For the second algorithm the accumulation of records allows to optimize
+  fetching rows of the second operand from disk for some engines (MyISAM, 
+  InnoDB), or to minimize the number of round-trips between the Server and
+  the engine nodes (NDB Cluster).        
+*/ 
+
+class JOIN_CACHE :public Sql_alloc
+{
+
+private:
+
+  /* Size of the offset of a record from the cache */   
+  uint size_of_rec_ofs;    
+  /* Size of the length of a record in the cache */
+  uint size_of_rec_len;
+  /* Size of the offset of a field within a record in the cache */   
+  uint size_of_fld_ofs;
+
+protected:
+       
+  /* 3 functions below actually do not use the hidden parameter 'this' */ 
+
+  /* Calculate the number of bytes used to store an offset value */
+  uint offset_size(uint len)
+  { return (len < 256 ? 1 : len < 256*256 ? 2 : 4); }
+
+  /* Get the offset value that takes ofs_sz bytes at the position ptr */
+  ulong get_offset(uint ofs_sz, uchar *ptr)
+  {
+    switch (ofs_sz) {
+    case 1: return uint(*ptr);
+    case 2: return uint2korr(ptr);
+    case 4: return uint4korr(ptr);
+    }
+    return 0;
+  }
+
+  /* Set the offset value ofs that takes ofs_sz bytes at the position ptr */ 
+  void store_offset(uint ofs_sz, uchar *ptr, ulong ofs)
+  {
+    switch (ofs_sz) {
+    case 1: *ptr= (uchar) ofs; return;
+    case 2: int2store(ptr, (uint16) ofs); return;
+    case 4: int4store(ptr, (uint32) ofs); return;
+    }
+  }
+  
+  /* 
+    The total maximal length of the fields stored for a record in the cache.
+    For blob fields only the sizes of the blob lengths are taken into account. 
+  */
+  uint length;
+
+  /* 
+    Representation of the executed multi-way join through which all needed
+    context can be accessed.  
+  */   
+  JOIN *join;  
+
+  /* 
+    Cardinality of the range of join tables whose fields can be put into the
+    cache. (A table from the range not necessarily contributes to the cache.)
+  */
+  uint tables;
+
+  /* 
+    The total number of flag and data fields that can appear in a record
+    written into the cache. Fields with null values are always skipped 
+    to save space. 
+  */
+  uint fields;
+
+  /* 
+    The total number of flag fields in a record put into the cache. They are
+    used for table null bitmaps, table null row flags, and an optional match
+    flag. Flag fields go before other fields in a cache record with the match
+    flag field placed always at the very beginning of the record.
+  */
+  uint flag_fields;
+
+  /* The total number of blob fields that are written into the cache */ 
+  uint blobs;
+
+  /* 
+    The total number of fields referenced from field descriptors for other join
+    caches. These fields are used to construct key values to access matching
+    rows with index lookups. Currently the fields can be referenced only from
+    descriptors for bka caches. However they may belong to a cache of any type.
+  */   
+  uint referenced_fields;
+   
+  /* 
+    The current number of already created data field descriptors.
+    This number can be useful for implementations of the init methods.  
+  */
+  uint data_field_count; 
+
+  /* 
+    The current number of already created pointers to the data field
+    descriptors. This number can be useful for implementations of
+    the init methods.  
+  */
+  uint data_field_ptr_count; 
+  /* 
+    Array of the descriptors of fields containing 'fields' elements.
+    These are all fields that are stored for a record in the cache. 
+  */
+  CACHE_FIELD *field_descr;
+
+  /* 
+    Array of pointers to the blob descriptors that contains 'blobs' elements.
+  */
+  CACHE_FIELD **blob_ptr;
+
+  /* 
+    This flag indicates that records written into the join buffer contain
+    a match flag field. The flag must be set by the init method. 
+  */
+  bool with_match_flag; 
+  /*
+    This flag indicates that any record is prepended with the length of the
+    record which allows us to skip the record or part of it without reading.
+  */
+  bool with_length;
+
+  /* 
+    The maximal number of bytes used for a record representation in
+    the cache excluding the space for blob data. 
+    For future derived classes this representation may contains some
+    redundant info such as a key value associated with the record.     
+  */
+  uint pack_length;
+  /* 
+    The value of pack_length incremented by the total size of all 
+    pointers of a record in the cache to the blob data. 
+  */
+  uint pack_length_with_blob_ptrs;
+
+  /* Pointer to the beginning of the join buffer */
+  uchar *buff;         
+  /* 
+    Size of the entire memory allocated for the join buffer.
+    Part of this memory may be reserved for the auxiliary buffer.
+  */ 
+  ulong buff_size;
+  /* Size of the auxiliary buffer. */ 
+  ulong aux_buff_size;
+
+  /* The number of records put into the join buffer */ 
+  uint records;
+
+  /* 
+    Pointer to the current position in the join buffer.
+    This member is used both when writing to buffer and
+    when reading from it.
+  */
+  uchar *pos;
+  /* 
+    Pointer to the first free position in the join buffer,
+    right after the last record into it.
+  */
+  uchar *end_pos; 
+
+  /* 
+    Pointer to the beginning of first field of the current read/write record
+    from the join buffer. The value is adjusted by the get_record/put_record
+    functions.
+  */
+  uchar *curr_rec_pos;
+  /* 
+    Pointer to the beginning of first field of the last record
+    from the join buffer.
+  */
+  uchar *last_rec_pos;
+
+  /* 
+    Flag is set if the blob data for the last record in the join buffer
+    is in record buffers rather than in the join cache.
+  */
+  bool last_rec_blob_data_is_in_rec_buff;
+
+  /* 
+    Pointer to the position to the current record link. 
+    Record links are used only with linked caches. Record links allow to set
+    connections between parts of one join record that are stored in different
+    join buffers.
+    In the simplest case a record link is just a pointer to the beginning of
+    the record stored in the buffer.
+    In a more general case a link could be a reference to an array of pointers
+    to records in the buffer.   */
+  uchar *curr_rec_link;
+
+  void calc_record_fields();     
+  int alloc_fields(uint external_fields);
+  void create_flag_fields();
+  void create_remaining_fields(bool all_read_fields);
+  void set_constants();
+  int alloc_buffer();
+
+  uint get_size_of_rec_offset() { return size_of_rec_ofs; }
+  uint get_size_of_rec_length() { return size_of_rec_len; }
+  uint get_size_of_fld_offset() { return size_of_fld_ofs; }
+
+  uchar *get_rec_ref(uchar *ptr)
+  {
+    return buff+get_offset(size_of_rec_ofs, ptr-size_of_rec_ofs);
+  }
+  ulong get_rec_length(uchar *ptr)
+  { 
+    return (ulong) get_offset(size_of_rec_len, ptr);
+  }
+  ulong get_fld_offset(uchar *ptr)
+  { 
+    return (ulong) get_offset(size_of_fld_ofs, ptr);
+  }
+
+  void store_rec_ref(uchar *ptr, uchar* ref)
+  {
+    store_offset(size_of_rec_ofs, ptr-size_of_rec_ofs, (ulong) (ref-buff));
+  }
+
+  void store_rec_length(uchar *ptr, ulong len)
+  {
+    store_offset(size_of_rec_len, ptr, len);
+  }
+  void store_fld_offset(uchar *ptr, ulong ofs)
+  {
+    store_offset(size_of_fld_ofs, ptr, ofs);
+  }
+
+  /* Write record fields and their required offsets into the join buffer */ 
+  uint write_record_data(uchar *link, bool *is_full);
+
+  /* 
+    This method must determine for how much the auxiliary buffer should be
+    incremented when a new record is added to the join buffer.
+    If no auxiliary buffer is needed the function should return 0.
+  */
+  virtual uint aux_buffer_incr() { return 0; }
+
+  /* Shall calculate how much space is remaining in the join buffer */ 
+  virtual ulong rem_space() 
+  { 
+    return max(buff_size-(end_pos-buff)-aux_buff_size,0);
+  }
+
+  /* Shall skip record from the join buffer if its match flag is on */
+  virtual bool skip_record_if_match();
+
+  /*  Read all flag and data fields of a record from the join buffer */
+  uint read_all_record_fields();
+  
+  /* Read all flag fields of a record from the join buffer */
+  uint read_flag_fields();
+
+  /* Read a data record field from the join buffer */
+  uint read_record_field(CACHE_FIELD *copy, bool last_record);
+
+  /* Read a referenced field from the join buffer */
+  bool read_referenced_field(CACHE_FIELD *copy, uchar *rec_ptr, uint *len);
+
+  /* 
+    True if rec_ptr points to the record whose blob data stay in
+    record buffers
+  */
+  bool blob_data_is_in_rec_buff(uchar *rec_ptr)
+  {
+    return rec_ptr == last_rec_pos && last_rec_blob_data_is_in_rec_buff;
+  }
+
+  /* Find matches from the next table for records from the join buffer */   
+  virtual enum_nested_loop_state join_matching_records(bool skip_last)=0;
+
+  /* Add null complements for unmatched outer records from buffer */
+  virtual enum_nested_loop_state join_null_complements(bool skip_last);
+
+  /* Restore the fields of the last record from the join buffer */
+  virtual void restore_last_record();
+
+  /*Set match flag for a record in join buffer if it has not been set yet */
+  bool set_match_flag_if_none(JOIN_TAB *first_inner, uchar *rec_ptr);
+
+  enum_nested_loop_state generate_full_extensions(uchar *rec_ptr);
+
+  /* Check matching to a partial join record from the join buffer */
+  bool check_match(uchar *rec_ptr);
+
+public:
+
+  /* Table to be joined with the partial join records from the cache */ 
+  JOIN_TAB *join_tab;
+
+  /* Pointer to the previous join cache if there is any */
+  JOIN_CACHE *prev_cache;
+  /* Pointer to the next join cache if there is any */
+  JOIN_CACHE *next_cache;
+
+  /* Shall initialize the join cache structure */ 
+  virtual int init()=0;  
+
+  /* The function shall return TRUE only for BKA caches */
+  virtual bool is_key_access() { return FALSE; }
+
+  /* Shall reset the join buffer for reading/writing */
+  virtual void reset(bool for_writing);
+
+  /* 
+    This function shall add a record into the join buffer and return TRUE
+    if it has been decided that it should be the last record in the buffer.
+  */ 
+  virtual bool put_record();
+
+  /* 
+    This function shall read the next record into the join buffer and return
+    TRUE if there is no more next records.
+  */ 
+  virtual bool get_record();
+
+  /* 
+    This function shall read the record at the position rec_ptr
+    in the join buffer
+  */ 
+  virtual void get_record_by_pos(uchar *rec_ptr);
+
+  /* Shall return the value of the match flag for the positioned record */
+  virtual bool get_match_flag_by_pos(uchar *rec_ptr);
+
+  /* Shall return the position of the current record */
+  virtual uchar *get_curr_rec() { return curr_rec_pos; }
+
+  /* Shall set the current record link */
+  virtual void set_curr_rec_link(uchar *link) { curr_rec_link= link; }
+
+  /* Shall return the current record link */
+  virtual uchar *get_curr_rec_link()
+  { 
+    return (curr_rec_link ? curr_rec_link : get_curr_rec());
+  }
+     
+  /* Join records from the join buffer with records from the next join table */    
+  enum_nested_loop_state join_records(bool skip_last);
+
+  virtual ~JOIN_CACHE() {}
+  void reset_join(JOIN *j) { join= j; }
+  void free()
+  { 
+    x_free(buff);
+    buff= 0;
+  }   
+  
+  friend class JOIN_CACHE_BNL;
+  friend class JOIN_CACHE_BKA;
+  friend class JOIN_CACHE_BKA_UNIQUE;
+};
+
+
+class JOIN_CACHE_BNL :public JOIN_CACHE
+{
+
+protected:
+
+  /* Using BNL find matches from the next table for records from join buffer */
+  enum_nested_loop_state join_matching_records(bool skip_last);
+
+public:
+
+  /* 
+    This constructor creates an unlinked BNL join cache. The cache is to be
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter.
+  */   
+  JOIN_CACHE_BNL(JOIN *j, JOIN_TAB *tab)
+  { 
+    join= j;
+    join_tab= tab;
+    prev_cache= next_cache= 0;
+  }
+
+  /* 
+    This constructor creates a linked BNL join cache. The cache is to be 
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter. The parameter 'prev' specifies the previous
+    cache object to which this cache is linked.
+  */   
+  JOIN_CACHE_BNL(JOIN *j, JOIN_TAB *tab, JOIN_CACHE *prev)
+  { 
+    join= j;
+    join_tab= tab;
+    prev_cache= prev;
+    next_cache= 0;
+    if (prev)
+      prev->next_cache= this;
+  }
+
+  /* Initialize the BNL cache */       
+  int init();
+
+};
+
+class JOIN_CACHE_BKA :public JOIN_CACHE
+{
+protected:
+
+  /* Flag to to be passed to the MRR interface */ 
+  uint mrr_mode;
+
+  /* MRR buffer assotiated with this join cache */
+  HANDLER_BUFFER mrr_buff;
+
+  /* Shall initialize the MRR buffer */
+  virtual void init_mrr_buff()
+  {
+    mrr_buff.buffer= end_pos;
+    mrr_buff.buffer_end= buff+buff_size;
+  }
+
+  /*
+    The number of the cache fields that are used in building keys to access
+    the table join_tab
+  */
+  uint local_key_arg_fields;
+  /* 
+    The total number of the fields in the previous caches that are used
+    in building keys t access the table join_tab
+  */
+  uint external_key_arg_fields;
+
+  /* 
+    This flag indicates that the key values will be read directly from the join
+    buffer. It will save us building key values in the key buffer.
+  */
+  bool use_emb_key;
+  /* The length of an embedded key value */ 
+  uint emb_key_length;
+
+  /* Check the possibility to read the access keys directly from join buffer */  
+  bool check_emb_key_usage();
+
+  /* Calculate the increment of the MM buffer for a record write */
+  uint aux_buffer_incr();
+
+  /* Using BKA find matches from the next table for records from join buffer */
+  enum_nested_loop_state join_matching_records(bool skip_last);
+
+  /* Prepare to search for records that match records from the join buffer */
+  enum_nested_loop_state init_join_matching_records(RANGE_SEQ_IF *seq_funcs,
+                                                    uint ranges);
+
+  /* Finish searching for records that match records from the join buffer */
+  enum_nested_loop_state end_join_matching_records(enum_nested_loop_state rc);
+
+public:
+  
+  /* 
+    This constructor creates an unlinked BKA join cache. The cache is to be
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter.
+    The MRR mode initially is set to 'flags'.
+  */   
+  JOIN_CACHE_BKA(JOIN *j, JOIN_TAB *tab, uint flags)
+  { 
+    join= j;
+    join_tab= tab;
+    prev_cache= next_cache= 0;
+    mrr_mode= flags;
+  }
+
+  /* 
+    This constructor creates a linked BKA join cache. The cache is to be 
+    used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter. The parameter 'prev' specifies the cache
+    object to which this cache is linked.
+    The MRR mode initially is set to 'flags'.
+  */   
+  JOIN_CACHE_BKA(JOIN *j, JOIN_TAB *tab, uint flags,  JOIN_CACHE* prev)
+  { 
+    join= j;
+    join_tab= tab;
+    prev_cache= prev;
+    next_cache= 0;
+    if (prev)
+      prev->next_cache= this;
+    mrr_mode= flags;
+  }
+
+  /* Initialize the BKA cache */       
+  int init();
+
+  bool is_key_access() { return TRUE; }
+
+  /* Shall get the key built over the next record from the join buffer */
+  virtual uint get_next_key(uchar **key);
+
+  /* Check if the record combination matches the index condition */
+  bool skip_index_tuple(range_seq_t rseq, char *range_info);
+};
+
+/*
+  The class JOIN_CACHE_BKA_UNIQUE supports the variant of the BKA join algorithm
+  that submits only distinct keys to the MRR interface. The records in the join
+  buffer of a cache of this class that have the same access key are linked into
+  a chain attached to a key entry structure that either itself contains the key
+  value, or, in the case when the keys are embedded, refers to its occurance in
+  one of the records from the chain.
+  To build the chains with the same keys a hash table is employed. It is placed
+  at the very end of the join buffer. The array of hash entries is allocated
+  first at the very bottom of the join buffer, then go key entries. A hash entry
+  contains a header of the list of the key entries with the same hash value. 
+  Each key entry is a structure of the following type:
+    struct st_join_cache_key_entry {
+      union { 
+        uchar[] value;
+        cache_ref *value_ref; // offset from the beginning of the buffer
+      } hash_table_key;
+      key_ref next_key; // offset backward from the beginning of hash table
+      cache_ref *last_rec // offset from the beginning of the buffer
+    }
+  The references linking the records in a chain are always placed at the very
+  beginning of the record info stored in the join buffer. The records are 
+  linked in a circular list. A new record is always added to the end of this 
+  list. When a key is passed to the MRR interface it can be passed either with
+  an association link containing a reference to the header of the record chain
+  attached to the corresponding key entry in the hash table, or without any
+  association link. When the next record is returned by a call to the MRR 
+  function multi_range_read_next without any association (because if was not
+  passed  together with the key) then the key value is extracted from the
+  returned record and searched for it in the hash table. If there is any records
+  with such key the chain of them will be yielded as the result of this search.
+
+  The following picture represents a typical layout for the info stored in the
+  join buffer of a join cache object of the JOIN_CACHE_BKA_UNIQUE class.
+    
+  buff
+  V
+  +----------------------------------------------------------------------------+
+  |     |[*]record_1_1|                                                        |
+  |     ^ |                                                                    |
+  |     | +--------------------------------------------------+                 |
+  |     |                           |[*]record_2_1|          |                 |
+  |     |                           ^ |                      V                 |
+  |     |                           | +------------------+   |[*]record_1_2|   |
+  |     |                           +--------------------+-+   |               |
+  |+--+ +---------------------+                          | |   +-------------+ |
+  ||  |                       |                          V |                 | |
+  |||[*]record_3_1|         |[*]record_1_3|              |[*]record_2_2|     | |
+  ||^                       ^                            ^                   | |
+  ||+----------+            |                            |                   | |
+  ||^          |            |<---------------------------+-------------------+ |
+  |++          | | ... mrr  |   buffer ...           ... |     |               |
+  |            |            |                            |                     |
+  |      +-----+--------+   |                      +-----|-------+             |
+  |      V     |        |   |                      V     |       |             |
+  ||key_3|[/]|[*]|      |   |                |key_2|[/]|[*]|     |             |
+  |                   +-+---|-----------------------+            |             |
+  |                   V |   |                       |            |             |
+  |             |key_1|[*]|[*]|         |   | ... |[*]|   ...  |[*]|  ...  |   |
+  +----------------------------------------------------------------------------+
+                                        ^           ^            ^
+                                        |           i-th entry   j-th entry
+                                        hash table
+
+  i-th hash entry:
+    circular record chain for key_1:
+      record_1_1
+      record_1_2
+      record_1_3 (points to record_1_1)
+    circular record chain for key_3:
+      record_3_1 (points to itself)
+
+  j-th hash entry:
+    circular record chain for key_2:
+      record_2_1
+      record_2_2 (points to record_2_1)
+
+*/
+
+class JOIN_CACHE_BKA_UNIQUE :public JOIN_CACHE_BKA
+{
+
+private:
+
+  /* Size of the offset of a key entry in the hash table */
+  uint size_of_key_ofs;
+
+  /* 
+    Length of a key value.
+    It is assumed that all key values have the same length.
+  */
+  uint key_length;
+  /* 
+    Length of the key entry in the hash table.
+    A key entry either contains the key value, or it contains a reference
+    to the key value if use_emb_key flag is set for the cache.
+  */ 
+  uint key_entry_length;
+ 
+  /* The beginning of the hash table in the join buffer */
+  uchar *hash_table;
+  /* Number of hash entries in the hash table */
+  uint hash_entries;
+
+  /* Number of key entries in the hash table (number of distinct keys) */
+  uint key_entries;
+
+  /* The position of the last key entry in the hash table */
+  uchar *last_key_entry;
+
+  /* The position of the currently retrieved key entry in the hash table */
+  uchar *curr_key_entry;
+
+  /* 
+    The offset of the record fields from the beginning of the record
+    representation. The record representation starts with a reference to
+    the next record in the key record chain followed by the length of
+    the trailing record data followed by a reference to the record segment
+     in the previous cache, if any, followed by the record fields.
+  */ 
+  uint rec_fields_offset;
+  /* The offset of the data fields from the beginning of the record fields */
+  uint data_fields_offset;
+  
+  uint get_hash_idx(uchar* key, uint key_len);
+
+  void cleanup_hash_table();
+  
+protected:
+
+  uint get_size_of_key_offset() { return size_of_key_ofs; }
+
+  /* 
+    Get the position of the next_key_ptr field pointed to by 
+    a linking reference stored at the position key_ref_ptr. 
+    This reference is actually the offset backward from the
+    beginning of hash table.
+  */  
+  uchar *get_next_key_ref(uchar *key_ref_ptr)
+  {
+    return hash_table-get_offset(size_of_key_ofs, key_ref_ptr);
+  }
+
+  /* 
+    Store the linking reference to the next_key_ptr field at 
+    the position key_ref_ptr. The position of the next_key_ptr
+    field is pointed to by ref. The stored reference is actually
+    the offset backward from the beginning of the hash table.
+  */  
+  void store_next_key_ref(uchar *key_ref_ptr, uchar *ref)
+  {
+    store_offset(size_of_key_ofs, key_ref_ptr, (ulong) (hash_table-ref));
+  }     
+  
+  /* 
+    Check whether the reference to the next_key_ptr field at the position
+    key_ref_ptr contains  a nil value.
+  */
+  bool is_null_key_ref(uchar *key_ref_ptr)
+  {
+    ulong nil= 0;
+    return memcmp(key_ref_ptr, &nil, size_of_key_ofs ) == 0;
+  } 
+
+  /* 
+    Set the reference to the next_key_ptr field at the position
+    key_ref_ptr equal to nil.
+  */
+  void store_null_key_ref(uchar *key_ref_ptr)
+  {
+    ulong nil= 0;
+    store_offset(size_of_key_ofs, key_ref_ptr, nil);
+  } 
+
+  uchar *get_next_rec_ref(uchar *ref_ptr)
+  {
+    return buff+get_offset(get_size_of_rec_offset(), ref_ptr);
+  }
+
+  void store_next_rec_ref(uchar *ref_ptr, uchar *ref)
+  {
+    store_offset(get_size_of_rec_offset(), ref_ptr, (ulong) (ref-buff));
+  }     
+ 
+  /*
+    Get the position of the embedded key value for the current
+    record pointed to by get_curr_rec().
+  */ 
+  uchar *get_curr_emb_key()
+  {
+    return get_curr_rec()+data_fields_offset;
+  }
+
+  /*
+    Get the position of the embedded key value pointed to by a reference
+    stored at ref_ptr. The stored reference is actually the offset from
+    the beginning of the join buffer.
+  */  
+  uchar *get_emb_key(uchar *ref_ptr)
+  {
+    return buff+get_offset(get_size_of_rec_offset(), ref_ptr);
+  }
+
+  /* 
+    Store the reference to an embedded key at the position key_ref_ptr.
+    The position of the embedded key is pointed to by ref. The stored
+    reference is actually the offset from the beginning of the join buffer.
+  */  
+  void store_emb_key_ref(uchar *ref_ptr, uchar *ref)
+  {
+    store_offset(get_size_of_rec_offset(), ref_ptr, (ulong) (ref-buff));
+  }
+  
+  /* 
+    Calculate how much space in the buffer would not be occupied by
+    records, key entries and additional memory for the MMR buffer.
+  */ 
+  ulong rem_space() 
+  { 
+    return max(last_key_entry-end_pos-aux_buff_size,0);
+  }
+
+  /* 
+    Initialize the MRR buffer allocating some space within the join buffer.
+    The entire space between the last record put into the join buffer and the
+    last key entry added to the hash table is used for the MRR buffer.
+  */
+  void init_mrr_buff()
+  {
+    mrr_buff.buffer= end_pos;
+    mrr_buff.buffer_end= last_key_entry;
+  }
+
+  /* Skip record from JOIN_CACHE_BKA_UNIQUE buffer if its match flag is on */
+  bool skip_record_if_match();
+
+  /* Using BKA_UNIQUE find matches for records from join buffer */
+  enum_nested_loop_state join_matching_records(bool skip_last);
+
+  /* Search for a key in the hash table of the join buffer */
+  bool key_search(uchar *key, uint key_len, uchar **key_ref_ptr);
+
+public:
+
+  /* 
+    This constructor creates an unlinked BKA_UNIQUE join cache. The cache is
+    to be used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter.
+    The MRR mode initially is set to 'flags'.
+  */   
+  JOIN_CACHE_BKA_UNIQUE(JOIN *j, JOIN_TAB *tab, uint flags)
+    :JOIN_CACHE_BKA(j, tab, flags) {}
+
+  /* 
+    This constructor creates a linked BKA_UNIQUE join cache. The cache is
+    to be used to join table 'tab' to the result of joining the previous tables 
+    specified by the 'j' parameter. The parameter 'prev' specifies the cache
+    object to which this cache is linked.
+    The MRR mode initially is set to 'flags'.
+  */   
+  JOIN_CACHE_BKA_UNIQUE(JOIN *j, JOIN_TAB *tab, uint flags,  JOIN_CACHE* prev)
+    :JOIN_CACHE_BKA(j, tab, flags, prev) {}
+
+  /* Initialize the BKA_UNIQUE cache */       
+  int init();
+
+  /* Reset the JOIN_CACHE_BKA_UNIQUE  buffer for reading/writing */
+  void reset(bool for_writing);
+
+  /* Add a record into the JOIN_CACHE_BKA_UNIQUE buffer */
+  bool put_record();
+
+  /* Read the next record from the JOIN_CACHE_BKA_UNIQUE buffer */
+  bool get_record();
+
+  /*
+    Shall check whether all records in a key chain have 
+    their match flags set on
+  */   
+  virtual bool check_all_match_flags_for_key(uchar *key_chain_ptr);
+
+  uint get_next_key(uchar **key); 
+  
+  /* Get the head of the record chain attached to the current key entry */ 
+  uchar *get_curr_key_chain()
+  {
+    return get_next_rec_ref(curr_key_entry+key_entry_length-
+                            get_size_of_rec_offset());
+  }
+  
+  /* Check if the record combination matches the index condition */
+  bool skip_index_tuple(range_seq_t rseq, char *range_info);
+};
+
+
 enum_nested_loop_state sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool
                                         end_of_records);
 enum_nested_loop_state sub_select(JOIN *join,JOIN_TAB *join_tab, bool
                                   end_of_records);
+enum_nested_loop_state sub_select_sjm(JOIN *join, JOIN_TAB *join_tab, 
+                                      bool end_of_records);
+
+enum_nested_loop_state
+end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
+	       bool end_of_records);
+enum_nested_loop_state
+end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
+		bool end_of_records);
+
 
 /**
   Information about a position of table within a join order. Used in join
@@ -276,6 +1269,89 @@ typedef struct st_position
 
   /* If ref-based access is used: bitmap of tables this table depends on  */
   table_map ref_depend_map;
+
+  bool use_join_buffer; 
+  
+  
+  /* These form a stack of partial join order costs and output sizes */
+  COST_VECT prefix_cost;
+  double    prefix_record_count;
+
+  /*
+    Current optimization state: Semi-join strategy to be used for this
+    and preceding join tables.
+    
+    Join optimizer sets this for the *last* join_tab in the
+    duplicate-generating range. That is, in order to interpret this field, 
+    one needs to traverse join->[best_]positions array from right to left.
+    When you see a join table with sj_strategy!= SJ_OPT_NONE, some other
+    field (depending on the strategy) tells how many preceding positions 
+    this applies to. The values of covered_preceding_positions->sj_strategy
+    must be ignored.
+  */
+  uint sj_strategy;
+  /*
+    Valid only after fix_semijoin_strategies_for_picked_join_order() call:
+    if sj_strategy!=SJ_OPT_NONE, this is the number of subsequent tables that
+    are covered by the specified semi-join strategy
+  */
+  uint n_sj_tables;
+
+/* LooseScan strategy members */
+
+  /* The first (i.e. driving) table we're doing loose scan for */
+  uint        first_loosescan_table;
+  /* 
+     Tables that need to be in the prefix before we can calculate the cost
+     of using LooseScan strategy.
+  */
+  table_map   loosescan_need_tables;
+
+  /*
+    keyno  -  Planning to do LooseScan on this key. If keyuse is NULL then 
+              this is a full index scan, otherwise this is a ref+loosescan
+              scan (and keyno matches the KEUSE's)
+    MAX_KEY - Not doing a LooseScan
+  */
+  uint loosescan_key;  // final (one for strategy instance )
+  uint loosescan_parts; /* Number of keyparts to be kept distinct */
+  
+/* FirstMatch strategy */
+  /*
+    Index of the first inner table that we intend to handle with this
+    strategy
+  */
+  uint first_firstmatch_table;
+  /*
+    Tables that were not in the join prefix when we've started considering 
+    FirstMatch strategy.
+  */
+  table_map first_firstmatch_rtbl;
+  /* 
+    Tables that need to be in the prefix before we can calculate the cost
+    of using FirstMatch strategy.
+   */
+  table_map firstmatch_need_tables;
+
+
+/* Duplicate Weedout strategy */
+  /* The first table that the strategy will need to handle */
+  uint  first_dupsweedout_table;
+  /*
+    Tables that we will need to have in the prefix to do the weedout step
+    (all inner and all outer that the involved semi-joins are correlated with)
+  */
+  table_map dupsweedout_tables;
+
+/* SJ-Materialization-Scan strategy */
+  /* The last inner table (valid once we're after it) */
+  uint      sjm_scan_last_inner;
+  /*
+    Tables that we need to have in the prefix to calculate the correct cost.
+    Basically, we need all inner tables and outer tables mentioned in the
+    semi-join's ON expression so we can correctly account for fanout.
+  */
+  table_map sjm_scan_need_tables;
 } POSITION;
 
 
@@ -289,6 +1365,19 @@ typedef struct st_rollup
 } ROLLUP;
 
 
+#define SJ_OPT_NONE 0
+#define SJ_OPT_DUPS_WEEDOUT 1
+#define SJ_OPT_LOOSE_SCAN   2
+#define SJ_OPT_FIRST_MATCH  3
+#define SJ_OPT_MATERIALIZE  4
+#define SJ_OPT_MATERIALIZE_SCAN  5
+
+inline bool sj_is_materialize_strategy(uint strategy)
+{
+  return strategy >= SJ_OPT_MATERIALIZE;
+}
+
+
 class JOIN :public Sql_alloc
 {
   JOIN(const JOIN &rhs);                        /**< not implemented */
@@ -297,9 +1386,18 @@ public:
   JOIN_TAB *join_tab,**best_ref;
   JOIN_TAB **map2table;    ///< mapping between table indexes and JOIN_TABs
   JOIN_TAB *join_tab_save; ///< saved join_tab for subquery reexecution
-  TABLE    **all_tables,*sort_by_table;
-  uint	   tables,const_tables;
+  TABLE    **all_tables;
+  /**
+    The table which has an index that allows to produce the requried ordering.
+    A special value of 0x1 means that the ordering will be produced by
+    passing 1st non-const table to filesort(). NULL means no such table exists.
+  */
+  TABLE    *sort_by_table;
+  uint	   tables;        /**< Number of tables in the join */
+  uint     outer_tables;  /**< Number of tables that are not inside semijoin */
+  uint     const_tables;
   uint	   send_group_parts;
+  bool	   group;          /**< If query contains GROUP BY clause */
   /**
     Indicates that grouping will be performed on the result set during
     query execution. This field belongs to query execution.
@@ -307,9 +1405,17 @@ public:
     @see make_group_fields, alloc_group_fields, JOIN::exec
   */
   bool     sort_and_group; 
-  bool     first_record,full_join,group, no_field_update;
+  bool     first_record,full_join, no_field_update;
   bool	   do_send_rows;
-  table_map const_table_map,found_const_table_map;
+  table_map const_table_map;
+  /*
+    Constant tables for which we have found a row (as opposed to those for
+    which we didn't).
+  */
+  table_map found_const_table_map;
+  
+  /* Tables removed by table elimination. Set to 0 before the elimination. */
+  table_map eliminated_tables;
   /*
      Bitmap of all inner tables from outer joins
   */
@@ -325,14 +1431,47 @@ public:
       - on each fetch iteration we add num_rows to fetch to fetch_limit
   */
   ha_rows  fetch_limit;
-  POSITION positions[MAX_TABLES+1],best_positions[MAX_TABLES+1];
+  /* Finally picked QEP. This is result of join optimization */
+  POSITION best_positions[MAX_TABLES+1];
+
+/******* Join optimization state members start *******/
+  /*
+    pointer - we're doing optimization for a semi-join materialization nest.
+    NULL    - otherwise
+  */
+  TABLE_LIST *emb_sjm_nest;
   
-  /* *
+  /* Current join optimization state */
+  POSITION positions[MAX_TABLES+1];
+  
+  /*
     Bitmap of nested joins embedding the position at the end of the current 
     partial join (valid only during join optimizer run).
   */
   nested_join_map cur_embedding_map;
+  
+  /*
+    Bitmap of inner tables of semi-join nests that have a proper subset of
+    their tables in the current join prefix. That is, of those semi-join
+    nests that have their tables both in and outside of the join prefix.
+  */
+  table_map cur_sj_inner_tables;
+  
+  /*
+    Bitmap of semi-join inner tables that are in the join prefix and for
+    which there's no provision for how to eliminate semi-join duplicates
+    they produce.
+  */
+  table_map cur_dups_producing_tables;
 
+  /* We also maintain a stack of join optimization states in * join->positions[] */
+/******* Join optimization state members end *******/
+  Next_select_func first_select;
+  /*
+    The cost of best complete join plan found so far during optimization,
+    after optimization phase - cost of picked join order (not taking into
+    account the changes made by test_if_skip_sort_order()).
+  */
   double   best_read;
   List<Item> *fields;
   List<Cached_item> group_fields, group_fields_cache;
@@ -363,24 +1502,31 @@ public:
     the number of rows in it may vary from one subquery execution to another.
   */
   bool no_const_tables; 
+  /*
+    This flag is set if we call no_rows_in_result() as par of end_group().
+    This is used as a simple speed optimization to avoiding calling
+    restore_no_rows_in_result() in ::reinit()
+  */
+  bool no_rows_in_result_called;
   
   /**
     Copy of this JOIN to be used with temporary tables.
 
-    tmp_join is used when the JOIN needs to be "reusable" (e.g. in a subquery
-    that gets re-executed several times) and we know will use temporary tables
-    for materialization. The materialization to a temporary table overwrites the
-    JOIN structure to point to the temporary table after the materialization is
-    done. This is where tmp_join is used : it's a copy of the JOIN before the
-    materialization and is used in restoring before re-execution by overwriting
-    the current JOIN structure with the saved copy.
-    Because of this we should pay extra care of not freeing up helper structures
-    that are referenced by the original contents of the JOIN. We can check for
-    this by making sure the "current" join is not the temporary copy, e.g.
-    !tmp_join || tmp_join != join
+    tmp_join is used when the JOIN needs to be "reusable" (e.g. in a
+    subquery that gets re-executed several times) and we know will use
+    temporary tables for materialization. The materialization to a
+    temporary table overwrites the JOIN structure to point to the
+    temporary table after the materialization is done. This is where
+    tmp_join is used : it's a copy of the JOIN before the
+    materialization and is used in restoring before re-execution by
+    overwriting the current JOIN structure with the saved copy.
+    Because of this we should pay extra care of not freeing up helper
+    structures that are referenced by the original contents of the
+    JOIN. We can check for this by making sure the "current" join is
+    not the temporary copy, e.g.  !tmp_join || tmp_join != join
  
-    We should free these sub-structures at JOIN::destroy() if the "current" join
-    has a copy is not that copy.
+    We should free these sub-structures at JOIN::destroy() if the
+    "current" join has a copy is not that copy.
   */
   JOIN *tmp_join;
   ROLLUP rollup;				///< Used with rollup
@@ -429,6 +1575,7 @@ public:
   TABLE_LIST *tables_list;           ///<hold 'tables' parameter of mysql_select
   List<TABLE_LIST> *join_list;       ///< list of joined tables in reverse order
   COND_EQUAL *cond_equal;
+  COND_EQUAL *having_equal;
   SQL_SELECT *select;                ///<created in optimisation phase
   JOIN_TAB *return_tab;              ///<used only for outer joins
   Item **ref_pointer_array; ///<used pointer reference for this select
@@ -440,6 +1587,13 @@ public:
   bool union_part; ///< this subselect is part of union 
   bool optimized; ///< flag to avoid double optimization in EXPLAIN
 
+  Array<Item_in_subselect> sj_subselects;
+
+  /* Temporary tables used to weed-out semi-join duplicates */
+  List<TABLE> sj_tmp_tables;
+  /* SJM nests that are executed with SJ-Materialization strategy */
+  List<SJ_MATERIALIZATION_INFO> sjm_info_list;
+
   /* 
     storage for caching buffers allocated during query execution. 
     These buffers allocations need to be cached as the thread memory pool is
@@ -457,7 +1611,7 @@ public:
 
   JOIN(THD *thd_arg, List<Item> &fields_arg, ulonglong select_options_arg,
        select_result *result_arg)
-    :fields_list(fields_arg)
+    :fields_list(fields_arg), sj_subselects(thd_arg->mem_root, 4)
   {
     init(thd_arg, fields_arg, select_options_arg, result_arg);
   }
@@ -469,6 +1623,7 @@ public:
     all_tables= 0;
     tables= 0;
     const_tables= 0;
+    eliminated_tables= 0;
     join_list= 0;
     implicit_grouping= FALSE;
     sort_and_group= 0;
@@ -507,7 +1662,9 @@ public:
     zero_result_cause= 0;
     optimized= 0;
     cond_equal= 0;
+    having_equal= 0;
     group_optimized_away= 0;
+    no_rows_in_result_called= 0;
 
     all_fields= fields_arg;
     if (&fields_list != &fields_arg)      /* Avoid valgrind-warning */
@@ -518,6 +1675,7 @@ public:
     rollup.state= ROLLUP::STATE_NONE;
 
     no_const_tables= FALSE;
+    first_select= sub_select;
   }
 
   int prepare(Item ***rref_pointer_array, TABLE_LIST *tables, uint wind_num,
@@ -530,6 +1688,8 @@ public:
   int destroy();
   void restore_tmp();
   bool alloc_func_list();
+  bool flatten_subqueries();
+  bool setup_subquery_materialization();
   bool make_sum_func_list(List<Item> &all_fields, List<Item> &send_fields,
 			  bool before_group_by, bool recompute= FALSE);
 
@@ -551,7 +1711,6 @@ public:
 			  Item_sum ***func);
   int rollup_send_data(uint idx);
   int rollup_write_data(uint idx, TABLE *table);
-  void remove_subq_pushed_predicates(Item **where);
   /**
     Release memory and, if possible, the open tables held by this execution
     plan (and nested plans). It's used to release some tables before
@@ -576,6 +1735,21 @@ public:
                                         select_lex == unit->fake_select_lex));
   }
   void cache_const_exprs();
+  inline table_map all_tables_map()
+  {
+    return (table_map(1) << tables) - 1;
+  }
+  /* 
+    Return the table for which an index scan can be used to satisfy 
+    the sort order needed by the ORDER BY/(implicit) GROUP BY clause 
+  */
+  JOIN_TAB *get_sort_by_join_tab()
+  {
+    return (need_tmp || !sort_by_table || skip_sort_order ||
+            ((group || tmp_table_param.sum_func_count) && !group_list)) ?
+              NULL : join_tab+const_tables;
+  }
+  bool setup_subquery_caches();
 private:
   /**
     TRUE if the query contains an aggregate function but has no GROUP
@@ -595,11 +1769,6 @@ extern const char *join_type_str[];
 
 /* Extern functions in sql_select.cc */
 bool store_val_in_field(Field *field, Item *val, enum_check_fields check_flag);
-TABLE *create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
-			ORDER *group, bool distinct, bool save_sum_fields,
-			ulonglong select_options, ha_rows rows_limit,
-			const char* alias);
-void free_tmp_table(THD *thd, TABLE *entry);
 void count_field_types(SELECT_LEX *select_lex, TMP_TABLE_PARAM *param, 
                        List<Item> &fields, bool reset_with_sum_func);
 bool setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
@@ -608,8 +1777,6 @@ bool setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
 		       uint elements, List<Item> &fields);
 void copy_fields(TMP_TABLE_PARAM *param);
 bool copy_funcs(Item **func_ptr, const THD *thd);
-bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
-			     int error, bool ignore_last_dupp_error);
 uint find_shortest_key(TABLE *table, const key_map *usable_keys);
 Field* create_tmp_field_from_field(THD *thd, Field* org_field,
                                    const char *name, TABLE *table,
@@ -723,12 +1890,17 @@ class store_key_item :public store_key
 {
  protected:
   Item *item;
+  /*
+    Flag that forces usage of save_val() method which save value of the
+    item instead of save_in_field() method which saves result.
+  */
+  bool use_value;
 public:
   store_key_item(THD *thd, Field *to_field_arg, uchar *ptr,
-                 uchar *null_ptr_arg, uint length, Item *item_arg)
+                 uchar *null_ptr_arg, uint length, Item *item_arg, bool val)
     :store_key(thd, to_field_arg, ptr,
 	       null_ptr_arg ? null_ptr_arg : item_arg->maybe_null ?
-	       &err : (uchar*) 0, length), item(item_arg)
+	       &err : (uchar*) 0, length), item(item_arg), use_value(val)
   {}
   const char *name() const { return "func"; }
 
@@ -738,7 +1910,11 @@ public:
     TABLE *table= to_field->table;
     my_bitmap_map *old_map= dbug_tmp_use_all_columns(table,
                                                      table->write_set);
-    int res= item->save_in_field(to_field, 1);
+    int res= FALSE;
+    if (use_value)
+      item->save_val(to_field);
+    else
+      res= item->save_in_field(to_field, 1);
     /*
      Item::save_in_field() may call Item::val_xxx(). And if this is a subquery
      we need to check for errors executing it and react accordingly
@@ -762,7 +1938,7 @@ public:
 		       Item *item_arg)
     :store_key_item(thd, to_field_arg,ptr,
 		    null_ptr_arg ? null_ptr_arg : item_arg->maybe_null ?
-		    &err : (uchar*) 0, length, item_arg), inited(0)
+		    &err : (uchar*) 0, length, item_arg, FALSE), inited(0)
   {
   }
   const char *name() const { return "const"; }
@@ -805,7 +1981,9 @@ int setup_group(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
 		List<Item> &fields, List<Item> &all_fields, ORDER *order,
 		bool *hidden_group_fields);
 bool fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
-                   Item **ref_pointer_array, ORDER *group_list= NULL);
+                   Item **ref_pointer_array);
+int join_read_key2(THD *thd, struct st_join_table *tab, TABLE *table,
+                   struct st_table_ref *table_ref);
 
 bool handle_select(THD *thd, LEX *lex, select_result *result,
                    ulong setup_tables_done_option);
@@ -836,6 +2014,26 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type,
 
 TABLE *create_virtual_tmp_table(THD *thd, List<Create_field> &field_list);
 
+int test_if_item_cache_changed(List<Cached_item> &list);
+void calc_used_field_length(THD *thd, JOIN_TAB *join_tab);
+int join_init_read_record(JOIN_TAB *tab);
+void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key);
+inline Item * and_items(Item* cond, Item *item)
+{
+  return (cond? (new Item_cond_and(cond, item)) : item);
+}
+bool choose_plan(JOIN *join,table_map join_tables);
+void get_partial_join_cost(JOIN *join, uint n_tables, double *read_time_arg,
+                           double *record_count_arg);
+void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab, 
+                                table_map last_remaining_tables, 
+                                bool first_alt, uint no_jbuf_before,
+                                double *reopt_rec_count, double *reopt_cost,
+                                double *sj_inner_fanout);
+Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field,
+                            bool *inherited_fl);
+bool test_if_ref(COND *root_cond, 
+                 Item_field *left_item,Item *right_item);
 
 inline bool optimizer_flag(THD *thd, uint flag)
 { 
@@ -849,5 +2047,35 @@ bool const_expression_in_where(COND *cond, Item *comp_item,
                                Field *comp_field= NULL,
                                Item **const_item= NULL);
 
+/* Table elimination entry point function */
+void eliminate_tables(JOIN *join);
+
+/* Index Condition Pushdown entry point function */
+void push_index_cond(JOIN_TAB *tab, uint keyno, bool other_tbls_ok);
+
+/****************************************************************************
+  Temporary table support for SQL Runtime
+ ***************************************************************************/
+
+#define STRING_TOTAL_LENGTH_TO_PACK_ROWS 128
+#define AVG_STRING_LENGTH_TO_PACK_ROWS   64
+#define RATIO_TO_PACK_ROWS	       2
+#define MIN_STRING_LENGTH_TO_PACK_ROWS   10
+
+TABLE *create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
+			ORDER *group, bool distinct, bool save_sum_fields,
+			ulonglong select_options, ha_rows rows_limit,
+			char* alias);
+void free_tmp_table(THD *thd, TABLE *entry);
+bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
+                                         ENGINE_COLUMNDEF *start_recinfo,
+                                         ENGINE_COLUMNDEF **recinfo, 
+                                         int error, bool ignore_last_dupp_key_error);
+bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, 
+                               ENGINE_COLUMNDEF *start_recinfo,
+                               ENGINE_COLUMNDEF **recinfo, 
+                               ulonglong options);
+bool open_tmp_table(TABLE *table);
+void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps);
 
 #endif /* SQL_SELECT_INCLUDED */
diff --git a/sql/sql_servers.cc b/sql/sql_servers.cc
index bc845ed2cdd..7d33abc16cd 100644
--- a/sql/sql_servers.cc
+++ b/sql/sql_servers.cc
@@ -211,8 +211,9 @@ static bool servers_load(THD *thd, TABLE_LIST *tables)
   free_root(&mem, MYF(0));
   init_sql_alloc(&mem, ACL_ALLOC_BLOCK_SIZE, 0);
 
-  init_read_record(&read_record_info,thd,table=tables[0].table,NULL,1,0, 
-                   FALSE);
+  if (init_read_record(&read_record_info,thd,table=tables[0].table,NULL,1,0, 
+                       FALSE))
+    DBUG_RETURN(1);
   while (!(read_record_info.read_record(&read_record_info)))
   {
     /* return_val is already TRUE, so no need to set */
@@ -544,10 +545,10 @@ int insert_server_record(TABLE *table, FOREIGN_SERVER *server)
                          system_charset_info);
 
   /* read index until record is that specified in server_name */
-  if ((error= table->file->index_read_idx_map(table->record[0], 0,
-                                              (uchar *)table->field[0]->ptr,
-                                              HA_WHOLE_KEY,
-                                              HA_READ_KEY_EXACT)))
+  if ((error= table->file->ha_index_read_idx_map(table->record[0], 0,
+                                                 (uchar *)table->field[0]->ptr,
+                                                 HA_WHOLE_KEY,
+                                                 HA_READ_KEY_EXACT)))
   {
     /* if not found, err */
     if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
@@ -888,10 +889,10 @@ update_server_record(TABLE *table, FOREIGN_SERVER *server)
                          server->server_name_length,
                          system_charset_info);
 
-  if ((error= table->file->index_read_idx_map(table->record[0], 0,
-                                              (uchar *)table->field[0]->ptr,
-                                              ~(longlong)0,
-                                              HA_READ_KEY_EXACT)))
+  if ((error= table->file->ha_index_read_idx_map(table->record[0], 0,
+                                                 (uchar *)table->field[0]->ptr,
+                                                 ~(longlong)0,
+                                                 HA_READ_KEY_EXACT)))
   {
     if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
       table->file->print_error(error, MYF(0));
@@ -947,10 +948,10 @@ delete_server_record(TABLE *table,
   /* set the field that's the PK to the value we're looking for */
   table->field[0]->store(server_name, server_name_length, system_charset_info);
 
-  if ((error= table->file->index_read_idx_map(table->record[0], 0,
-                                          (uchar *)table->field[0]->ptr,
-                                          HA_WHOLE_KEY,
-                                          HA_READ_KEY_EXACT)))
+  if ((error= table->file->ha_index_read_idx_map(table->record[0], 0,
+                                                 (uchar *)table->field[0]->ptr,
+                                                 HA_WHOLE_KEY,
+                                                 HA_READ_KEY_EXACT)))
   {
     if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
       table->file->print_error(error, MYF(0));
diff --git a/sql/sql_show.cc b/sql/sql_show.cc
index 6b24e3db7bc..c7cdab06d7b 100644
--- a/sql/sql_show.cc
+++ b/sql/sql_show.cc
@@ -22,6 +22,7 @@
 #include "sql_acl.h"                        // fill_schema_*_privileges
 #include "sql_select.h"                         // For select_describe
 #include "sql_base.h"                       // close_tables_for_reopen
+#include "create_options.h"
 #include "sql_show.h"
 #include "sql_table.h"                        // filename_to_tablename,
                                               // primary_key_name,
@@ -95,6 +96,9 @@ static TYPELIB grant_types = { sizeof(grant_names)/sizeof(char **),
                                grant_names, NULL};
 #endif
 
+/* Match the values of enum ha_choice */
+static const char *ha_choice_values[] = {"", "0", "1"};
+
 static void store_key_options(THD *thd, String *packet, TABLE *table,
                               KEY *key_info);
 
@@ -118,11 +122,21 @@ static int make_version_string(char *buf, int buf_length, uint version)
   return my_snprintf(buf, buf_length, "%d.%d", version>>8,version&0xff);
 }
 
+
+static const LEX_STRING maturity_name[]={
+  { C_STRING_WITH_LEN("Unknown") },
+  { C_STRING_WITH_LEN("Experimental") },
+  { C_STRING_WITH_LEN("Alpha") },
+  { C_STRING_WITH_LEN("Beta") },
+  { C_STRING_WITH_LEN("Gamma") },
+  { C_STRING_WITH_LEN("Stable") }};
+
+
 static my_bool show_plugins(THD *thd, plugin_ref plugin,
                             void *arg)
 {
   TABLE *table= (TABLE*) arg;
-  struct st_mysql_plugin *plug= plugin_decl(plugin);
+  struct st_maria_plugin *plug= plugin_decl(plugin);
   struct st_plugin_dl *plugin_dl= plugin_dlib(plugin);
   CHARSET_INFO *cs= system_charset_info;
   char version_buf[20];
@@ -136,7 +150,6 @@ static my_bool show_plugins(THD *thd, plugin_ref plugin,
         make_version_string(version_buf, sizeof(version_buf), plug->version),
         cs);
 
-
   switch (plugin_state(plugin)) {
   /* case PLUGIN_IS_FREED: does not happen */
   case PLUGIN_IS_DELETED:
@@ -168,7 +181,7 @@ static my_bool show_plugins(THD *thd, plugin_ref plugin,
     table->field[5]->set_notnull();
     table->field[6]->store(version_buf,
           make_version_string(version_buf, sizeof(version_buf),
-                              plugin_dl->version),
+                              plugin_dl->mariaversion),
           cs);
     table->field[6]->set_notnull();
   }
@@ -197,20 +210,40 @@ static my_bool show_plugins(THD *thd, plugin_ref plugin,
 
   switch (plug->license) {
   case PLUGIN_LICENSE_GPL:
-    table->field[9]->store(PLUGIN_LICENSE_GPL_STRING, 
+    table->field[9]->store(PLUGIN_LICENSE_GPL_STRING,
                            strlen(PLUGIN_LICENSE_GPL_STRING), cs);
     break;
   case PLUGIN_LICENSE_BSD:
-    table->field[9]->store(PLUGIN_LICENSE_BSD_STRING, 
+    table->field[9]->store(PLUGIN_LICENSE_BSD_STRING,
                            strlen(PLUGIN_LICENSE_BSD_STRING), cs);
     break;
   default:
-    table->field[9]->store(PLUGIN_LICENSE_PROPRIETARY_STRING, 
+    table->field[9]->store(PLUGIN_LICENSE_PROPRIETARY_STRING,
                            strlen(PLUGIN_LICENSE_PROPRIETARY_STRING), cs);
     break;
   }
   table->field[9]->set_notnull();
 
+  if ((uint) plug->maturity <= MariaDB_PLUGIN_MATURITY_STABLE)
+     table->field[10]->store(maturity_name[plug->maturity].str,
+                             maturity_name[plug->maturity].length,
+                             cs);
+   else
+   {
+     DBUG_ASSERT(0);
+     table->field[10]->store("Unknown", 7, cs);
+   }
+   table->field[10]->set_notnull();
+
+  if (plug->version_info)
+  {
+    table->field[11]->store(plug->version_info,
+                            strlen(plug->version_info), cs);
+    table->field[11]->set_notnull();
+  }
+  else
+    table->field[11]->set_null();
+
   return schema_table_store_record(thd, table);
 }
 
@@ -417,8 +450,6 @@ find_files(THD *thd, List<LEX_STRING> *files, const char *db,
       wild_length= strlen(wild);
   }
 
-
-
   bzero((char*) &table_list,sizeof(table_list));
 
   if (!(dirp = my_dir(path,MYF(dir ? MY_WANT_STAT : 0))))
@@ -432,11 +463,11 @@ find_files(THD *thd, List<LEX_STRING> *files, const char *db,
 
   for (i=0 ; i < (uint) dirp->number_off_files  ; i++)
   {
-    char uname[NAME_LEN + 1];                   /* Unencoded name */
+    char uname[SAFE_NAME_LEN + 1];              /* Unencoded name */
     file=dirp->dir_entry+i;
     if (dir)
     {                                           /* Return databases */
-      if ((file->name[0] == '.' && 
+      if ((file->name[0] == '.' &&
           ((file->name[1] == '.' && file->name[2] == '\0') ||
             file->name[1] == '\0')))
         continue;                               /* . or .. */
@@ -467,7 +498,7 @@ find_files(THD *thd, List<LEX_STRING> *files, const char *db,
           if (my_wildcmp(files_charset_info,
                          uname, uname + file_name_len,
                          wild, wild + wild_length,
-                         wild_prefix, wild_one,wild_many))
+                         wild_prefix, wild_one, wild_many))
             continue;
 	}
 	else if (wild_compare(uname, wild, 0))
@@ -515,7 +546,7 @@ find_files(THD *thd, List<LEX_STRING> *files, const char *db,
         continue;
     }
 #endif
-    if (!(file_name= 
+    if (!(file_name=
           thd->make_lex_string(file_name, uname, file_name_len, TRUE)) ||
         files->push_back(file_name))
     {
@@ -788,6 +819,7 @@ bool mysqld_show_create_db(THD *thd, char *dbname,
 		sctx->master_access);
   if (!(db_access & DB_ACLS) && check_grant_db(thd,dbname))
   {
+    status_var_increment(thd->status_var.access_denied_errors);
     my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
              sctx->priv_user, sctx->host_or_ip, dbname);
     general_log_print(thd,COM_INIT_DB,ER(ER_DBACCESS_DENIED_ERROR),
@@ -870,7 +902,7 @@ mysqld_list_fields(THD *thd, TABLE_LIST *table_list, const char *wild)
   Field **ptr,*field;
   for (ptr=table->field ; (field= *ptr); ptr++)
   {
-    if (!wild || !wild[0] || 
+    if (!wild || !wild[0] ||
         !wild_case_compare(system_charset_info, field->field_name,wild))
     {
       if (table_list->view)
@@ -951,7 +983,7 @@ append_identifier(THD *thd, String *packet, const char *name, uint length)
 
   /*
     The identifier must be quoted as it includes a quote character or
-   it's a keyword
+    it's a keyword
   */
 
   (void) packet->reserve(length*2 + 2);
@@ -1078,10 +1110,10 @@ static bool get_field_default_value(THD *thd, Field *timestamp_field,
       if (field_type == MYSQL_TYPE_BIT)
       {
         longlong dec= field->val_int();
-        char *ptr= longlong2str(dec, tmp + 2, 2);
+        char *ptr= longlong2str(dec, tmp + 2, 2, 1);
         uint32 length= (uint32) (ptr - tmp);
         tmp[0]= 'b';
-        tmp[1]= '\'';        
+        tmp[1]= '\'';
         tmp[length]= '\'';
         type.length(length + 1);
         quoted= 0;
@@ -1113,6 +1145,30 @@ static bool get_field_default_value(THD *thd, Field *timestamp_field,
 }
 
 
+/**
+  Appends list of options to string
+
+  @param thd             thread handler
+  @param packet          string to append
+  @param opt             list of options
+*/
+
+static void append_create_options(THD *thd, String *packet,
+				  engine_option_value *opt)
+{
+  for(; opt; opt= opt->next)
+  {
+    DBUG_ASSERT(opt->value.str);
+    packet->append(' ');
+    append_identifier(thd, packet, opt->name.str, opt->name.length);
+    packet->append('=');
+    if (opt->quoted_value)
+      append_unescaped(packet, opt->value.str, opt->value.length);
+    else
+      packet->append(opt->value.str, opt->value.length);
+  }
+}
+
 /*
   Build a CREATE TABLE statement for a table.
 
@@ -1127,11 +1183,11 @@ static bool get_field_default_value(THD *thd, Field *timestamp_field,
                       to tailor the format of the statement.  Can be
                       NULL, in which case only SQL_MODE is considered
                       when building the statement.
-  
+
   NOTE
     Currently always return 0, but might return error code in the
     future.
-    
+
   RETURN
     0       OK
  */
@@ -1232,7 +1288,7 @@ int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet,
     field->sql_type(type);
     packet->append(type.ptr(), type.length(), system_charset_info);
 
-    if (field->has_charset() && 
+    if (field->has_charset() &&
         !(thd->variables.sql_mode & (MODE_MYSQL323 | MODE_MYSQL40)))
     {
       if (field->charset() != share->table_charset)
@@ -1240,8 +1296,8 @@ int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet,
 	packet->append(STRING_WITH_LEN(" CHARACTER SET "));
 	packet->append(field->charset()->csname);
       }
-      /* 
-	For string types dump collation name only if 
+      /*
+	For string types dump collation name only if
 	collation is not primary for the given charset
       */
       if (!(field->charset()->state & MY_CS_PRIMARY))
@@ -1251,6 +1307,19 @@ int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet,
       }
     }
 
+    if (field->vcol_info)
+    {
+      packet->append(STRING_WITH_LEN(" AS ("));
+      packet->append(field->vcol_info->expr_str.str,
+                     field->vcol_info->expr_str.length,
+                     system_charset_info);
+      packet->append(STRING_WITH_LEN(")"));
+      if (field->stored_in_db)
+        packet->append(STRING_WITH_LEN(" PERSISTENT"));
+      else
+        packet->append(STRING_WITH_LEN(" VIRTUAL"));
+    }
+
     if (flags & NOT_NULL_FLAG)
       packet->append(STRING_WITH_LEN(" NOT NULL"));
     else if (field->type() == MYSQL_TYPE_TIMESTAMP)
@@ -1262,18 +1331,19 @@ int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet,
       packet->append(STRING_WITH_LEN(" NULL"));
     }
 
-    if (get_field_default_value(thd, table->timestamp_field,
+    if (!field->vcol_info &&
+        get_field_default_value(thd, table->timestamp_field,
                                 field, &def_value, 1))
     {
       packet->append(STRING_WITH_LEN(" DEFAULT "));
       packet->append(def_value.ptr(), def_value.length(), system_charset_info);
     }
 
-    if (!limited_mysql_mode && table->timestamp_field == field && 
+    if (!limited_mysql_mode && table->timestamp_field == field &&
         field->unireg_check != Field::TIMESTAMP_DN_FIELD)
       packet->append(STRING_WITH_LEN(" ON UPDATE CURRENT_TIMESTAMP"));
 
-    if (field->unireg_check == Field::NEXT_NUMBER && 
+    if (field->unireg_check == Field::NEXT_NUMBER &&
         !(thd->variables.sql_mode & MODE_NO_FIELD_OPTIONS))
       packet->append(STRING_WITH_LEN(" AUTO_INCREMENT"));
 
@@ -1282,12 +1352,15 @@ int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet,
       packet->append(STRING_WITH_LEN(" COMMENT "));
       append_unescaped(packet, field->comment.str, field->comment.length);
     }
+    append_create_options(thd, packet, field->option_list);
   }
 
   key_info= table->key_info;
   bzero((char*) &create_info, sizeof(create_info));
-  /* Allow update_create_info to update row type */
+  /* Allow update_create_info to update row type, page checksums and options */
   create_info.row_type= share->row_type;
+  create_info.page_checksum= share->page_checksum;
+  create_info.options= share->db_create_options;
   file->update_create_info(&create_info);
   primary_key= share->primary_key;
 
@@ -1351,6 +1424,7 @@ int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet,
       append_identifier(thd, packet, parser_name->str, parser_name->length);
       packet->append(STRING_WITH_LEN(" */ "));
     }
+    append_create_options(thd, packet, key_info->option_list);
   }
 
   /*
@@ -1467,20 +1541,30 @@ int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet,
       packet->append(buff, (uint) (end - buff));
     }
 
-    if (share->db_create_options & HA_OPTION_PACK_KEYS)
+    if (create_info.options & HA_OPTION_PACK_KEYS)
       packet->append(STRING_WITH_LEN(" PACK_KEYS=1"));
-    if (share->db_create_options & HA_OPTION_NO_PACK_KEYS)
+    if (create_info.options & HA_OPTION_NO_PACK_KEYS)
       packet->append(STRING_WITH_LEN(" PACK_KEYS=0"));
     /* We use CHECKSUM, instead of TABLE_CHECKSUM, for backward compability */
-    if (share->db_create_options & HA_OPTION_CHECKSUM)
+    if (create_info.options & HA_OPTION_CHECKSUM)
       packet->append(STRING_WITH_LEN(" CHECKSUM=1"));
-    if (share->db_create_options & HA_OPTION_DELAY_KEY_WRITE)
+    if (create_info.page_checksum != HA_CHOICE_UNDEF)
+    {
+      packet->append(STRING_WITH_LEN(" PAGE_CHECKSUM="));
+      packet->append(ha_choice_values[create_info.page_checksum], 1);
+    }
+    if (create_info.options & HA_OPTION_DELAY_KEY_WRITE)
       packet->append(STRING_WITH_LEN(" DELAY_KEY_WRITE=1"));
     if (create_info.row_type != ROW_TYPE_DEFAULT)
     {
       packet->append(STRING_WITH_LEN(" ROW_FORMAT="));
       packet->append(ha_row_type[(uint) create_info.row_type]);
     }
+    if (share->transactional != HA_CHOICE_UNDEF)
+    {
+      packet->append(STRING_WITH_LEN(" TRANSACTIONAL="));
+      packet->append(ha_choice_values[(uint) share->transactional], 1);
+    }
     if (table->s->key_block_size)
     {
       char *end;
@@ -1499,6 +1583,7 @@ int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet,
       packet->append(STRING_WITH_LEN(" CONNECTION="));
       append_unescaped(packet, share->connect_string.str, share->connect_string.length);
     }
+    append_create_options(thd, packet, share->option_list);
     append_directory(thd, packet, "DATA",  create_info.data_file_name);
     append_directory(thd, packet, "INDEX", create_info.index_file_name);
   }
@@ -1591,7 +1676,7 @@ view_store_options(THD *thd, TABLE_LIST *table, String *buff)
 
 /*
   Append DEFINER clause to the given buffer.
-  
+
   SYNOPSIS
     append_definer()
     thd           [in] thread handle
@@ -1620,7 +1705,7 @@ static void append_algorithm(TABLE_LIST *table, String *buff)
 
 /*
   Append DEFINER clause to the given buffer.
-  
+
   SYNOPSIS
     append_definer()
     thd           [in] thread handle
@@ -1809,8 +1894,8 @@ void mysqld_list_processes(THD *thd,const char *user, bool verbose)
 			"%s:%u", tmp_sctx->host_or_ip, tmp->peer_port);
 	}
 	else
-	  thd_info->host= thd->strdup(tmp_sctx->host_or_ip[0] ? 
-                                      tmp_sctx->host_or_ip : 
+	  thd_info->host= thd->strdup(tmp_sctx->host_or_ip[0] ?
+                                      tmp_sctx->host_or_ip :
                                       tmp_sctx->host ? tmp_sctx->host : "");
         if ((thd_info->db=tmp->db))             // Safe test
           thd_info->db=thd->strdup(thd_info->db);
@@ -1869,7 +1954,7 @@ int fill_schema_processlist(THD* thd, TABLE_LIST* tables, COND* cond)
   TABLE *table= tables->table;
   CHARSET_INFO *cs= system_charset_info;
   char *user;
-  time_t now= my_time(0);
+  ulonglong unow= my_micro_time();
   DBUG_ENTER("fill_process_list");
 
   user= thd->security_ctx->master_access & PROCESS_ACL ?
@@ -1928,8 +2013,8 @@ int fill_schema_processlist(THD* thd, TABLE_LIST* tables, COND* cond)
         table->field[4]->store(command_name[tmp->command].str,
                                command_name[tmp->command].length, cs);
       /* MYSQL_TIME */
-      table->field[5]->store((longlong)(tmp->start_time ?
-                                      now - tmp->start_time : 0), FALSE);
+      const ulonglong utime= tmp->start_utime ? unow - tmp->start_utime : 0;
+      table->field[5]->store(utime / 1000000, TRUE);
       /* STATE */
       if ((val= thread_state_info(tmp)))
       {
@@ -1953,6 +2038,9 @@ int fill_schema_processlist(THD* thd, TABLE_LIST* tables, COND* cond)
       }
       mysql_mutex_unlock(&tmp->LOCK_thd_data);
 
+      /* TIME_MS */
+      table->field[8]->store((double)(utime / 1000.0));
+
       if (schema_table_store_record(thd, table))
       {
         mysql_mutex_unlock(&LOCK_thread_count);
@@ -2065,7 +2153,7 @@ void reset_status_vars()
     /* Note that SHOW_LONG_NOFLUSH variables are not reset */
     if (ptr->type == SHOW_LONG)
       *(ulong*) ptr->value= 0;
-  }  
+  }
 }
 
 /*
@@ -2141,11 +2229,7 @@ void remove_status_vars(SHOW_VAR *list)
   }
 }
 
-inline void make_upper(char *buf)
-{
-  for (; *buf; buf++)
-    *buf= my_toupper(system_charset_info, *buf);
-}
+
 
 static bool show_status_array(THD *thd, const char *wild,
                               SHOW_VAR *variables,
@@ -2169,7 +2253,7 @@ static bool show_status_array(THD *thd, const char *wild,
   CHARSET_INFO *charset= system_charset_info;
   DBUG_ENTER("show_status_array");
 
-  thd->count_cuted_fields= CHECK_FIELD_WARN;  
+  thd->count_cuted_fields= CHECK_FIELD_WARN;
   null_lex_str.str= 0;				// For sys_var->value_ptr()
   null_lex_str.length= 0;
 
@@ -2184,7 +2268,7 @@ static bool show_status_array(THD *thd, const char *wild,
     strnmov(prefix_end, variables->name, len);
     name_buffer[sizeof(name_buffer)-1]=0;       /* Safety */
     if (ucase_names)
-      make_upper(name_buffer);
+      my_caseup_str(system_charset_info, name_buffer);
 
     restore_record(table, s->default_values);
     table->field[0]->store(name_buffer, strlen(name_buffer),
@@ -2228,21 +2312,21 @@ static bool show_status_array(THD *thd, const char *wild,
         */
         switch (show_type) {
         case SHOW_DOUBLE_STATUS:
-          value= ((char *) status_var + (ulong) value);
+          value= ((char *) status_var + (intptr) value);
           /* fall through */
         case SHOW_DOUBLE:
           /* 6 is the default precision for '%f' in sprintf() */
           end= buff + my_fcvt(*(double *) value, 6, buff, NULL);
           break;
         case SHOW_LONG_STATUS:
-          value= ((char *) status_var + (ulong) value);
+          value= ((char *) status_var + (intptr) value);
           /* fall through */
         case SHOW_LONG:
         case SHOW_LONG_NOFLUSH: // the difference lies in refresh_status()
           end= int10_to_str(*(long*) value, buff, 10);
           break;
         case SHOW_LONGLONG_STATUS:
-          value= ((char *) status_var + (ulonglong) value);
+          value= ((char *) status_var + (intptr) value);
           /* fall through */
         case SHOW_LONGLONG:
           end= longlong10_to_str(*(longlong*) value, buff, 10);
@@ -2289,14 +2373,6 @@ static bool show_status_array(THD *thd, const char *wild,
             end= pos + ls->length;
           break;
         }
-        case SHOW_KEY_CACHE_LONG:
-          value= (char*) dflt_key_cache + (ulong)value;
-          end= int10_to_str(*(long*) value, buff, 10);
-          break;
-        case SHOW_KEY_CACHE_LONGLONG:
-          value= (char*) dflt_key_cache + (ulong)value;
-	  end= longlong10_to_str(*(longlong*) value, buff, 10);
-	  break;
         case SHOW_UNDEF:
           break;                                        // Return empty string
         case SHOW_SYS:                                  // Cannot happen
@@ -2304,6 +2380,7 @@ static bool show_status_array(THD *thd, const char *wild,
           DBUG_ASSERT(0);
           break;
         }
+        pthread_mutex_unlock(&LOCK_global_system_variables);
         table->field[1]->store(pos, (uint32) (end - pos), charset);
         thd->count_cuted_fields= CHECK_FIELD_IGNORE;
         table->field[1]->set_notnull();
@@ -2323,6 +2400,323 @@ end:
   DBUG_RETURN(res);
 }
 
+#ifdef COMPLEAT_PATCH_NOT_ADDED_YET
+/*
+  Aggregate values for mapped_user entries by their role.
+
+  SYNOPSIS
+  aggregate_user_stats
+  all_user_stats - input to aggregate
+  agg_user_stats - returns aggregated values
+
+  RETURN
+  0 - OK
+  1 - error
+*/
+
+static int aggregate_user_stats(HASH *all_user_stats, HASH *agg_user_stats)
+{
+  DBUG_ENTER("aggregate_user_stats");
+  if (hash_init(agg_user_stats, system_charset_info,
+                max(all_user_stats->records, 1),
+                0, 0, (hash_get_key)get_key_user_stats,
+                (hash_free_key)free_user_stats, 0))
+  {
+    sql_print_error("Malloc in aggregate_user_stats failed");
+    DBUG_RETURN(1);
+  }
+
+  for (uint i= 0; i < all_user_stats->records; i++)
+  {
+    USER_STATS *user= (USER_STATS*)hash_element(all_user_stats, i);
+    USER_STATS *agg_user;
+    uint name_length= strlen(user->priv_user);
+
+    if (!(agg_user= (USER_STATS*) hash_search(agg_user_stats,
+                                              (uchar*)user->priv_user,
+                                              name_length)))
+    {
+      // First entry for this role.
+      if (!(agg_user= (USER_STATS*) my_malloc(sizeof(USER_STATS),
+                                              MYF(MY_WME | MY_ZEROFILL))))
+      {
+        sql_print_error("Malloc in aggregate_user_stats failed");
+        DBUG_RETURN(1);
+      }
+
+      init_user_stats(agg_user, user->priv_user, name_length,
+                      user->priv_user,
+                      user->total_connections, user->concurrent_connections,
+                      user->connected_time, user->busy_time, user->cpu_time,
+                      user->bytes_received, user->bytes_sent,
+                      user->binlog_bytes_written,
+                      user->rows_sent, user->rows_read,
+                      user->rows_inserted, user->rows_deleted,
+                      user->rows_updated, 
+                      user->select_commands, user->update_commands,
+                      user->other_commands,
+                      user->commit_trans, user->rollback_trans,
+                      user->denied_connections, user->lost_connections,
+                      user->access_denied_errors, user->empty_queries);
+
+      if (my_hash_insert(agg_user_stats, (uchar*) agg_user))
+      {
+        /* Out of memory */
+        my_free(agg_user, 0);
+        sql_print_error("Malloc in aggregate_user_stats failed");
+        DBUG_RETURN(1);
+      }
+    }
+    else
+    {
+      /* Aggregate with existing values for this role. */
+      add_user_stats(agg_user,
+                     user->total_connections, user->concurrent_connections,
+                     user->connected_time, user->busy_time, user->cpu_time,
+                     user->bytes_received, user->bytes_sent,
+                     user->binlog_bytes_written,
+                     user->rows_sent, user->rows_read,
+                     user->rows_inserted, user->rows_deleted,
+                     user->rows_updated,
+                     user->select_commands, user->update_commands,
+                     user->other_commands,
+                     user->commit_trans, user->rollback_trans,
+                     user->denied_connections, user->lost_connections,
+                     user->access_denied_errors, user->empty_queries);
+    }
+  }
+  DBUG_PRINT("exit", ("aggregated %lu input into %lu output entries",
+                      all_user_stats->records, agg_user_stats->records));
+  DBUG_RETURN(0);
+}
+#endif
+
+/*
+  Write result to network for SHOW USER_STATISTICS
+
+  SYNOPSIS
+  send_user_stats
+  all_user_stats - values to return
+  table - I_S table
+
+  RETURN
+  0 - OK
+  1 - error
+*/
+
+int send_user_stats(THD* thd, HASH *all_user_stats, TABLE *table)
+{
+  DBUG_ENTER("send_user_stats");
+
+  for (uint i= 0; i < all_user_stats->records; i++)
+  {
+    uint j= 0;
+    USER_STATS *user_stats= (USER_STATS*) hash_element(all_user_stats, i);
+    
+    table->field[j++]->store(user_stats->user, user_stats->user_name_length,
+                             system_charset_info);
+    table->field[j++]->store((longlong)user_stats->total_connections,TRUE);
+    table->field[j++]->store((longlong)user_stats->concurrent_connections, TRUE);
+    table->field[j++]->store((longlong)user_stats->connected_time, TRUE);
+    table->field[j++]->store((double)user_stats->busy_time);
+    table->field[j++]->store((double)user_stats->cpu_time);
+    table->field[j++]->store((longlong)user_stats->bytes_received, TRUE);
+    table->field[j++]->store((longlong)user_stats->bytes_sent, TRUE);
+    table->field[j++]->store((longlong)user_stats->binlog_bytes_written, TRUE);
+    table->field[j++]->store((longlong)user_stats->rows_read, TRUE);
+    table->field[j++]->store((longlong)user_stats->rows_sent, TRUE);
+    table->field[j++]->store((longlong)user_stats->rows_deleted, TRUE);
+    table->field[j++]->store((longlong)user_stats->rows_inserted, TRUE);
+    table->field[j++]->store((longlong)user_stats->rows_updated, TRUE);
+    table->field[j++]->store((longlong)user_stats->select_commands, TRUE);
+    table->field[j++]->store((longlong)user_stats->update_commands, TRUE);
+    table->field[j++]->store((longlong)user_stats->other_commands, TRUE);
+    table->field[j++]->store((longlong)user_stats->commit_trans, TRUE);
+    table->field[j++]->store((longlong)user_stats->rollback_trans, TRUE);
+    table->field[j++]->store((longlong)user_stats->denied_connections, TRUE);
+    table->field[j++]->store((longlong)user_stats->lost_connections, TRUE);
+    table->field[j++]->store((longlong)user_stats->access_denied_errors, TRUE);
+    table->field[j++]->store((longlong)user_stats->empty_queries, TRUE);
+    if (schema_table_store_record(thd, table))
+    {
+      DBUG_PRINT("error", ("store record error"));
+      DBUG_RETURN(1);
+    }
+  }
+  DBUG_RETURN(0);
+}
+
+/*
+  Process SHOW USER_STATISTICS
+
+  SYNOPSIS
+  mysqld_show_user_stats
+  thd - current thread
+  wild - limit results to the entry for this user
+  with_roles - when true, display role for mapped users
+
+  RETURN
+  0 - OK
+  1 - error
+*/
+
+int fill_schema_user_stats(THD* thd, TABLE_LIST* tables, COND* cond)
+{
+  TABLE *table= tables->table;
+  int result;
+  DBUG_ENTER("fill_schema_user_stats");
+
+  if (check_global_access(thd, SUPER_ACL | PROCESS_ACL))
+    DBUG_RETURN(1);
+
+  /*
+    Iterates through all the global stats and sends them to the client.
+    Pattern matching on the client IP is supported.
+  */
+
+  pthread_mutex_lock(&LOCK_global_user_client_stats);
+  result= send_user_stats(thd, &global_user_stats, table) != 0;
+  pthread_mutex_unlock(&LOCK_global_user_client_stats);
+
+  DBUG_PRINT("exit", ("result: %d", result));
+  DBUG_RETURN(result);
+}
+
+/*
+   Process SHOW CLIENT_STATISTICS
+
+   SYNOPSIS
+     mysqld_show_client_stats
+       thd - current thread
+       wild - limit results to the entry for this client
+
+   RETURN
+     0 - OK
+     1 - error
+*/
+
+int fill_schema_client_stats(THD* thd, TABLE_LIST* tables, COND* cond)
+{
+  TABLE *table= tables->table;
+  int result;
+  DBUG_ENTER("fill_schema_client_stats");
+
+  if (check_global_access(thd, SUPER_ACL | PROCESS_ACL))
+    DBUG_RETURN(1);
+
+  /*
+    Iterates through all the global stats and sends them to the client.
+    Pattern matching on the client IP is supported.
+  */
+
+  pthread_mutex_lock(&LOCK_global_user_client_stats);
+  result= send_user_stats(thd, &global_client_stats, table) != 0;
+  pthread_mutex_unlock(&LOCK_global_user_client_stats);
+
+  DBUG_PRINT("exit", ("result: %d", result));
+  DBUG_RETURN(result);
+}
+
+
+/* Fill information schema table with table statistics */
+
+int fill_schema_table_stats(THD *thd, TABLE_LIST *tables, COND *cond)
+{
+  TABLE *table= tables->table;
+  DBUG_ENTER("fill_schema_table_stats");
+
+  pthread_mutex_lock(&LOCK_global_table_stats);
+  for (uint i= 0; i < global_table_stats.records; i++)
+  {
+    char *end_of_schema;
+    TABLE_STATS *table_stats= 
+      (TABLE_STATS*)hash_element(&global_table_stats, i);
+    TABLE_LIST tmp_table;
+    size_t schema_length, table_name_length;
+
+    end_of_schema= strend(table_stats->table);
+    schema_length= (size_t) (end_of_schema - table_stats->table);
+    table_name_length= strlen(table_stats->table + schema_length + 1);
+
+    bzero((char*) &tmp_table,sizeof(tmp_table));
+    tmp_table.db=         table_stats->table;
+    tmp_table.table_name= end_of_schema+1;
+    tmp_table.grant.privilege= 0;
+    if (check_access(thd, SELECT_ACL | EXTRA_ACL, tmp_table.db,
+                     &tmp_table.grant.privilege, 0, 0,
+                     is_schema_db(tmp_table.db)) ||
+        check_grant(thd, SELECT_ACL, &tmp_table, 1, UINT_MAX,
+                    1))
+      continue;
+
+    table->field[0]->store(table_stats->table, schema_length,
+                           system_charset_info);
+    table->field[1]->store(table_stats->table + schema_length+1,
+                           table_name_length, system_charset_info);
+    table->field[2]->store((longlong)table_stats->rows_read, TRUE);
+    table->field[3]->store((longlong)table_stats->rows_changed, TRUE);
+    table->field[4]->store((longlong)table_stats->rows_changed_x_indexes,
+                           TRUE);
+    if (schema_table_store_record(thd, table))
+    {
+      VOID(pthread_mutex_unlock(&LOCK_global_table_stats));
+      DBUG_RETURN(1);
+    }
+  }
+  pthread_mutex_unlock(&LOCK_global_table_stats);
+  DBUG_RETURN(0);
+}
+
+
+/* Fill information schema table with index statistics */
+
+int fill_schema_index_stats(THD *thd, TABLE_LIST *tables, COND *cond)
+{
+  TABLE *table= tables->table;
+  DBUG_ENTER("fill_schema_index_stats");
+
+  pthread_mutex_lock(&LOCK_global_index_stats);
+  for (uint i= 0; i < global_index_stats.records; i++)
+  {
+    INDEX_STATS *index_stats =
+      (INDEX_STATS*) hash_element(&global_index_stats, i);
+    TABLE_LIST tmp_table;
+    char *index_name;
+    size_t schema_name_length, table_name_length, index_name_length;
+
+    bzero((char*) &tmp_table,sizeof(tmp_table));
+    tmp_table.db=         index_stats->index;
+    tmp_table.table_name= strend(index_stats->index)+1;
+    tmp_table.grant.privilege= 0;
+    if (check_access(thd, SELECT_ACL | EXTRA_ACL, tmp_table.db,
+                      &tmp_table.grant.privilege, 0, 0,
+                      is_schema_db(tmp_table.db)) ||
+        check_grant(thd, SELECT_ACL, &tmp_table, 1, UINT_MAX, 1))
+      continue;
+
+    index_name=         strend(tmp_table.table_name)+1; 
+    schema_name_length= (tmp_table.table_name - index_stats->index) -1;
+    table_name_length=  (index_name - tmp_table.table_name)-1;
+    index_name_length=  (index_stats->index_name_length - schema_name_length -
+                         table_name_length - 3);
+
+    table->field[0]->store(tmp_table.db, schema_name_length,
+                           system_charset_info);
+    table->field[1]->store(tmp_table.table_name, table_name_length,
+                           system_charset_info);
+    table->field[2]->store(index_name, index_name_length, system_charset_info);
+    table->field[3]->store((longlong)index_stats->rows_read, TRUE);
+
+    if (schema_table_store_record(thd, table))
+    { 
+      VOID(pthread_mutex_unlock(&LOCK_global_index_stats));
+      DBUG_RETURN(1);
+    }
+  }
+  pthread_mutex_unlock(&LOCK_global_index_stats);
+  DBUG_RETURN(0);
+}
+
 
 /* collect status for all running threads */
 
@@ -2335,10 +2729,10 @@ void calc_sum_of_all_status(STATUS_VAR *to)
 
   I_List_iterator<THD> it(threads);
   THD *tmp;
-  
+
   /* Get global values as base */
   *to= global_status_var;
-  
+
   /* Add to this status from existing threads */
   while ((tmp= it++))
     add_to_status(to, &tmp->status_var);
@@ -2377,9 +2771,10 @@ bool schema_table_store_record(THD *thd, TABLE *table)
   int error;
   if ((error= table->file->ha_write_row(table->record[0])))
   {
-    if (create_myisam_from_heap(thd, table, 
-                                table->pos_in_table_list->schema_table_param,
-                                error, 0))
+    TMP_TABLE_PARAM *param= table->pos_in_table_list->schema_table_param;
+    if (create_internal_tmp_table_from_heap(thd, table, param->start_recinfo, 
+                                            &param->recinfo, error, 0))
+
       return 1;
   }
   return 0;
@@ -2399,17 +2794,17 @@ int make_table_list(THD *thd, SELECT_LEX *sel,
 
 
 /**
-  @brief    Get lookup value from the part of 'WHERE' condition 
+  @brief    Get lookup value from the part of 'WHERE' condition
 
-  @details This function gets lookup value from 
-           the part of 'WHERE' condition if it's possible and 
+  @details This function gets lookup value from
+           the part of 'WHERE' condition if it's possible and
            fill appropriate lookup_field_vals struct field
            with this value.
 
   @param[in]      thd                   thread handler
   @param[in]      item_func             part of WHERE condition
   @param[in]      table                 I_S table
-  @param[in, out] lookup_field_vals     Struct which holds lookup values 
+  @param[in, out] lookup_field_vals     Struct which holds lookup values
 
   @return
     0             success
@@ -2417,7 +2812,7 @@ int make_table_list(THD *thd, SELECT_LEX *sel,
 */
 
 bool get_lookup_value(THD *thd, Item_func *item_func,
-                      TABLE_LIST *table, 
+                      TABLE_LIST *table,
                       LOOKUP_FIELD_VALUES *lookup_field_vals)
 {
   ST_SCHEMA_TABLE *schema_table= table->schema_table;
@@ -2483,16 +2878,16 @@ bool get_lookup_value(THD *thd, Item_func *item_func,
 
 
 /**
-  @brief    Calculates lookup values from 'WHERE' condition 
+  @brief    Calculates lookup values from 'WHERE' condition
 
   @details This function calculates lookup value(database name, table name)
-           from 'WHERE' condition if it's possible and 
+           from 'WHERE' condition if it's possible and
            fill lookup_field_vals struct fields with these values.
 
   @param[in]      thd                   thread handler
   @param[in]      cond                  WHERE condition
   @param[in]      table                 I_S table
-  @param[in, out] lookup_field_vals     Struct which holds lookup values 
+  @param[in, out] lookup_field_vals     Struct which holds lookup values
 
   @return
     0             success
@@ -2641,7 +3036,7 @@ static COND * make_cond_for_info_schema(COND *cond, TABLE_LIST *table)
   @param[in]      thd                   thread handler
   @param[in]      cond                  WHERE condition
   @param[in]      tables                I_S table
-  @param[in, out] lookup_field_values   Struct which holds lookup values 
+  @param[in, out] lookup_field_values   Struct which holds lookup values
 
   @return
     0             success
@@ -2721,7 +3116,7 @@ enum enum_schema_tables get_schema_table_idx(ST_SCHEMA_TABLE *schema_table)
     idx_field_vals        idx_field_vals->db_name contains db name or
                           wild string
     with_i_schema         returns 1 if we added 'IS' name to list
-                          otherwise returns 0 
+                          otherwise returns 0
 
   RETURN
     zero                  success
@@ -2745,7 +3140,7 @@ int make_db_list(THD *thd, List<LEX_STRING> *files,
       LIKE clause (see also get_index_field_values() function)
     */
     if (!lookup_field_vals->db_value.str ||
-        !wild_case_compare(system_charset_info, 
+        !wild_case_compare(system_charset_info,
                            INFORMATION_SCHEMA_NAME.str,
                            lookup_field_vals->db_value.str))
     {
@@ -2789,7 +3184,7 @@ int make_db_list(THD *thd, List<LEX_STRING> *files,
 }
 
 
-struct st_add_schema_table 
+struct st_add_schema_table
 {
   List<LEX_STRING> *files;
   const char *wild;
@@ -2853,7 +3248,7 @@ int schema_tables_add(THD *thd, List<LEX_STRING> *files, const char *wild)
       else if (wild_compare(tmp_schema_table->table_name, wild, 0))
         continue;
     }
-    if ((file_name= 
+    if ((file_name=
          thd->make_lex_string(file_name, tmp_schema_table->table_name,
                               strlen(tmp_schema_table->table_name), TRUE)) &&
         !files->push_back(file_name))
@@ -2915,7 +3310,7 @@ make_table_name_list(THD *thd, List<LEX_STRING> *table_names, LEX *lex,
       }
     }
     else
-    {    
+    {
       if (table_names->push_back(&lookup_field_vals->table_value))
         return 1;
       /*
@@ -2980,7 +3375,7 @@ make_table_name_list(THD *thd, List<LEX_STRING> *table_names, LEX *lex,
     @retval       1           error
 */
 
-static int 
+static int
 fill_schema_show_cols_or_idxs(THD *thd, TABLE_LIST *tables,
                               ST_SCHEMA_TABLE *schema_table,
                               bool can_deadlock,
@@ -3007,7 +3402,7 @@ fill_schema_show_cols_or_idxs(THD *thd, TABLE_LIST *tables,
     Let us set fake sql_command so views won't try to merge
     themselves into main statement. If we don't do this,
     SELECT * from information_schema.xxxx will cause problems.
-    SQLCOM_SHOW_FIELDS is used because it satisfies 'only_view_structure()' 
+    SQLCOM_SHOW_FIELDS is used because it satisfies 'only_view_structure()'
   */
   lex->sql_command= SQLCOM_SHOW_FIELDS;
   res= open_normal_and_derived_tables(thd, show_table_list,
@@ -3023,11 +3418,11 @@ fill_schema_show_cols_or_idxs(THD *thd, TABLE_LIST *tables,
     get_all_tables() returns 1 on failure and 0 on success thus
     return only these and not the result code of ::process_table()
 
-    We should use show_table_list->alias instead of 
+    We should use show_table_list->alias instead of
     show_table_list->table_name because table_name
     could be changed during opening of I_S tables. It's safe
-    to use alias because alias contains original table name 
-    in this case(this part of code is used only for 
+    to use alias because alias contains original table name
+    in this case(this part of code is used only for
     'show columns' & 'show statistics' commands).
   */
    table_name= thd->make_lex_string(&tmp_lex_string1, show_table_list->alias,
@@ -3037,7 +3432,7 @@ fill_schema_show_cols_or_idxs(THD *thd, TABLE_LIST *tables,
                                    show_table_list->db_length, FALSE);
    else
      db_name= &show_table_list->view_db;
-      
+
 
    error= test(schema_table->process_table(thd, show_table_list,
                                            table, res, db_name,
@@ -3428,7 +3823,7 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond)
   List<LEX_STRING> db_names;
   List_iterator_fast<LEX_STRING> it(db_names);
   COND *partial_cond= 0;
-  uint derived_tables= lex->derived_tables; 
+  uint derived_tables= lex->derived_tables;
   int error= 1;
   Open_tables_backup open_tables_state_backup;
   bool save_view_prepare_mode= lex->view_prepare_mode;
@@ -3497,7 +3892,7 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond)
 
   if (!lookup_field_vals.wild_db_value && !lookup_field_vals.wild_table_value)
   {
-    /* 
+    /*
       if lookup value is empty string then
       it's impossible table name or db name
     */
@@ -3515,7 +3910,7 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond)
       !lookup_field_vals.wild_db_value)
     tables->has_db_lookup_value= TRUE;
   if (lookup_field_vals.table_value.length &&
-      !lookup_field_vals.wild_table_value) 
+      !lookup_field_vals.wild_table_value)
     tables->has_table_lookup_value= TRUE;
 
   if (tables->has_db_lookup_value && tables->has_table_lookup_value)
@@ -3566,7 +3961,7 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond)
         {
           /*
             If table is I_S.tables and open_table_method is 0 (eg SKIP_OPEN)
-            we can skip table opening and we don't have lookup value for 
+            we can skip table opening and we don't have lookup value for
             table name or lookup value is wild string(table name list is
             already created by make_table_name_list() function).
           */
@@ -3647,10 +4042,10 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond)
             else
             {
               /*
-                We should use show_table_list->alias instead of 
+                We should use show_table_list->alias instead of
                 show_table_list->table_name because table_name
                 could be changed during opening of I_S tables. It's safe
-                to use alias because alias contains original table name 
+                to use alias because alias contains original table name
                 in this case.
               */
               thd->make_lex_string(&tmp_lex_string, show_table_list->alias,
@@ -3718,9 +4113,9 @@ int fill_schema_schemata(THD *thd, TABLE_LIST *tables, COND *cond)
 
   if (get_lookup_field_values(thd, cond, tables, &lookup_field_vals))
     DBUG_RETURN(0);
-  DBUG_PRINT("INDEX VALUES",("db_name='%s', table_name='%s'",
-                             lookup_field_vals.db_value.str,
-                             lookup_field_vals.table_value.str));
+  DBUG_PRINT("INDEX VALUES",("db_name: %s  table_name: %s",
+                             val_or_null(lookup_field_vals.db_value.str),
+                             val_or_null(lookup_field_vals.table_value.str)));
   if (make_db_list(thd, &db_names, &lookup_field_vals,
                    &with_i_schema))
     DBUG_RETURN(1);
@@ -3806,7 +4201,8 @@ static int get_schema_tables_record(THD *thd, TABLE_LIST *tables,
   }
   else
   {
-    char option_buff[350],*ptr;
+    char option_buff[350];
+    String str(option_buff,sizeof(option_buff), system_charset_info);
     TABLE *show_table= tables->table;
     TABLE_SHARE *share= show_table->s;
     handler *file= show_table->file;
@@ -3839,47 +4235,58 @@ static int get_schema_tables_record(THD *thd, TABLE_LIST *tables,
     table->field[4]->store(tmp_buff, strlen(tmp_buff), cs);
     table->field[5]->store((longlong) share->frm_version, TRUE);
 
-    ptr=option_buff;
+    str.length(0);
     if (share->min_rows)
     {
-      ptr=strmov(ptr," min_rows=");
-      ptr=longlong10_to_str(share->min_rows,ptr,10);
+      str.qs_append(STRING_WITH_LEN(" min_rows="));
+      str.qs_append(share->min_rows);
     }
     if (share->max_rows)
     {
-      ptr=strmov(ptr," max_rows=");
-      ptr=longlong10_to_str(share->max_rows,ptr,10);
+      str.qs_append(STRING_WITH_LEN(" max_rows="));
+      str.qs_append(share->max_rows);
     }
     if (share->avg_row_length)
     {
-      ptr=strmov(ptr," avg_row_length=");
-      ptr=longlong10_to_str(share->avg_row_length,ptr,10);
+      str.qs_append(STRING_WITH_LEN(" avg_row_length="));
+      str.qs_append(share->avg_row_length);
     }
     if (share->db_create_options & HA_OPTION_PACK_KEYS)
-      ptr=strmov(ptr," pack_keys=1");
+      str.qs_append(STRING_WITH_LEN(" pack_keys=1"));
     if (share->db_create_options & HA_OPTION_NO_PACK_KEYS)
-      ptr=strmov(ptr," pack_keys=0");
+      str.qs_append(STRING_WITH_LEN(" pack_keys=0"));
     /* We use CHECKSUM, instead of TABLE_CHECKSUM, for backward compability */
     if (share->db_create_options & HA_OPTION_CHECKSUM)
-      ptr=strmov(ptr," checksum=1");
+      str.qs_append(STRING_WITH_LEN(" checksum=1"));
+    if (share->page_checksum != HA_CHOICE_UNDEF)
+    {
+      str.qs_append(STRING_WITH_LEN(" page_checksum="));
+      str.qs_append(ha_choice_values[(uint) share->page_checksum]);
+    }
     if (share->db_create_options & HA_OPTION_DELAY_KEY_WRITE)
-      ptr=strmov(ptr," delay_key_write=1");
+      str.qs_append(STRING_WITH_LEN(" delay_key_write=1"));
     if (share->row_type != ROW_TYPE_DEFAULT)
-      ptr=strxmov(ptr, " row_format=", 
-                  ha_row_type[(uint) share->row_type],
-                  NullS);
+    {
+      str.qs_append(STRING_WITH_LEN(" row_format="));
+      str.qs_append(ha_row_type[(uint) share->row_type]);
+    }
     if (share->key_block_size)
     {
-      ptr= strmov(ptr, " KEY_BLOCK_SIZE=");
-      ptr= longlong10_to_str(share->key_block_size, ptr, 10);
+      str.qs_append(STRING_WITH_LEN(" key_block_size="));
+      str.qs_append(share->key_block_size);
     }
 #ifdef WITH_PARTITION_STORAGE_ENGINE
     if (is_partitioned)
-      ptr= strmov(ptr, " partitioned");
+      str.qs_append(STRING_WITH_LEN(" partitioned"));
 #endif
-    table->field[19]->store(option_buff+1,
-                            (ptr == option_buff ? 0 : 
-                             (uint) (ptr-option_buff)-1), cs);
+    if (share->transactional != HA_CHOICE_UNDEF)
+    {
+      str.qs_append(STRING_WITH_LEN(" transactional="));
+      str.qs_append(ha_choice_values[(uint) share->transactional]);
+    }
+    append_create_options(thd, &str, share->option_list);
+    if (str.length())
+      table->field[19]->store(str.ptr()+1, str.length()-1, cs);
 
     tmp_buff= (share->table_charset ?
                share->table_charset->name : "default");
@@ -3916,7 +4323,7 @@ static int get_schema_tables_record(THD *thd, TABLE_LIST *tables,
         tmp_buff= "Compact";
         break;
       case ROW_TYPE_PAGE:
-        tmp_buff= "Paged";
+        tmp_buff= "Page";
         break;
       }
       table->field[6]->store(tmp_buff, strlen(tmp_buff), cs);
@@ -3961,7 +4368,7 @@ static int get_schema_tables_record(THD *thd, TABLE_LIST *tables,
         table->field[16]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
         table->field[16]->set_notnull();
       }
-      if (file->ha_table_flags() & (ulong) HA_HAS_CHECKSUM)
+      if (file->ha_table_flags() & (HA_HAS_OLD_CHECKSUM | HA_HAS_NEW_CHECKSUM))
       {
         table->field[18]->store((longlong) file->checksum(), TRUE);
         table->field[18]->set_notnull();
@@ -4201,6 +4608,8 @@ static int get_schema_column_record(THD *thd, TABLE_LIST *tables,
         field->unireg_check != Field::TIMESTAMP_DN_FIELD)
       table->field[16]->store(STRING_WITH_LEN("on update CURRENT_TIMESTAMP"),
                               cs);
+    if (field->vcol_info)
+      table->field[16]->store(STRING_WITH_LEN("VIRTUAL"), cs);
 
     table->field[18]->store(field->comment.str, field->comment.length, cs);
     if (schema_table_store_record(thd, table))
@@ -4222,7 +4631,7 @@ int fill_schema_charsets(THD *thd, TABLE_LIST *tables, COND *cond)
        cs++)
   {
     CHARSET_INFO *tmp_cs= cs[0];
-    if (tmp_cs && (tmp_cs->state & MY_CS_PRIMARY) && 
+    if (tmp_cs && (tmp_cs->state & MY_CS_PRIMARY) &&
         (tmp_cs->state & MY_CS_AVAILABLE) &&
         !(tmp_cs->state & MY_CS_HIDDEN) &&
         !(wild && wild[0] &&
@@ -4258,7 +4667,7 @@ static my_bool iter_schema_engines(THD *thd, plugin_ref plugin,
   if (plugin_state(plugin) != PLUGIN_IS_READY)
   {
 
-    struct st_mysql_plugin *plug= plugin_decl(plugin);
+    struct st_maria_plugin *plug= plugin_decl(plugin);
     if (!(wild && wild[0] &&
           wild_case_compare(scs, plug->name,wild)))
     {
@@ -4339,7 +4748,7 @@ int fill_schema_collation(THD *thd, TABLE_LIST *tables, COND *cond)
          cl ++)
     {
       CHARSET_INFO *tmp_cl= cl[0];
-      if (!tmp_cl || !(tmp_cl->state & MY_CS_AVAILABLE) || 
+      if (!tmp_cl || !(tmp_cl->state & MY_CS_AVAILABLE) ||
           !my_charset_same(tmp_cs, tmp_cl))
 	continue;
       if (!(wild && wild[0] &&
@@ -4375,7 +4784,7 @@ int fill_schema_coll_charset_app(THD *thd, TABLE_LIST *tables, COND *cond)
   {
     CHARSET_INFO **cl;
     CHARSET_INFO *tmp_cs= cs[0];
-    if (!tmp_cs || !(tmp_cs->state & MY_CS_AVAILABLE) || 
+    if (!tmp_cs || !(tmp_cs->state & MY_CS_AVAILABLE) ||
         !(tmp_cs->state & MY_CS_PRIMARY))
       continue;
     for (cl= all_charsets;
@@ -4576,7 +4985,7 @@ bool store_schema_proc(THD *thd, TABLE *table, TABLE *proc_table,
   MYSQL_TIME time;
   LEX *lex= thd->lex;
   CHARSET_INFO *cs= system_charset_info;
-  char sp_db_buff[NAME_LEN + 1], sp_name_buff[NAME_LEN + 1],
+  char sp_db_buff[SAFE_NAME_LEN + 1], sp_name_buff[NAME_LEN + 1],
     definer_buff[USERNAME_LENGTH + HOSTNAME_LENGTH + 2],
     returns_buff[MAX_FIELD_WIDTH];
 
@@ -4733,7 +5142,7 @@ int fill_schema_proc(THD *thd, TABLE_LIST *tables, COND *cond)
     DBUG_RETURN(1);
   }
   proc_table->file->ha_index_init(0, 1);
-  if ((res= proc_table->file->index_first(proc_table->record[0])))
+  if ((res= proc_table->file->ha_index_first(proc_table->record[0])))
   {
     res= (res == HA_ERR_END_OF_FILE) ? 0 : 1;
     goto err;
@@ -4746,7 +5155,7 @@ int fill_schema_proc(THD *thd, TABLE_LIST *tables, COND *cond)
     res= 1;
     goto err;
   }
-  while (!proc_table->file->index_next(proc_table->record[0]))
+  while (!proc_table->file->ha_index_next(proc_table->record[0]))
   {
     if (schema_table_idx == SCH_PROCEDURES ?
         store_schema_proc(thd, table, proc_table, wild, full_access, definer): 
@@ -5033,7 +5442,7 @@ static int get_schema_constraints_record(THD *thd, TABLE_LIST *tables,
     TABLE *show_table= tables->table;
     KEY *key_info=show_table->key_info;
     uint primary_key= show_table->s->primary_key;
-    show_table->file->info(HA_STATUS_VARIABLE | 
+    show_table->file->info(HA_STATUS_VARIABLE |
                            HA_STATUS_NO_LOCK |
                            HA_STATUS_TIME);
     for (uint i=0 ; i < show_table->s->keys ; i++, key_info++)
@@ -5062,7 +5471,7 @@ static int get_schema_constraints_record(THD *thd, TABLE_LIST *tables,
     List_iterator_fast<FOREIGN_KEY_INFO> it(f_key_list);
     while ((f_key_info=it++))
     {
-      if (store_constraints(thd, table, db_name, table_name, 
+      if (store_constraints(thd, table, db_name, table_name,
                             f_key_info->forein_id->str,
                             strlen(f_key_info->forein_id->str),
                             "FOREIGN KEY", 11))
@@ -5220,7 +5629,7 @@ static int get_schema_key_column_usage_record(THD *thd,
     TABLE *show_table= tables->table;
     KEY *key_info=show_table->key_info;
     uint primary_key= show_table->s->primary_key;
-    show_table->file->info(HA_STATUS_VARIABLE | 
+    show_table->file->info(HA_STATUS_VARIABLE |
                            HA_STATUS_NO_LOCK |
                            HA_STATUS_TIME);
     for (uint i=0 ; i < show_table->s->keys ; i++, key_info++)
@@ -5237,8 +5646,8 @@ static int get_schema_key_column_usage_record(THD *thd,
           restore_record(table, s->default_values);
           store_key_column_usage(table, db_name, table_name,
                                  key_info->name,
-                                 strlen(key_info->name), 
-                                 key_part->field->field_name, 
+                                 strlen(key_info->name),
+                                 key_part->field->field_name,
                                  strlen(key_part->field->field_name),
                                  (longlong) f_idx);
           if (schema_table_store_record(thd, table))
@@ -5274,7 +5683,7 @@ static int get_schema_key_column_usage_record(THD *thd,
                                system_charset_info);
         table->field[9]->set_notnull();
         table->field[10]->store(f_key_info->referenced_table->str,
-                                f_key_info->referenced_table->length, 
+                                f_key_info->referenced_table->length,
                                 system_charset_info);
         table->field[10]->set_notnull();
         table->field[11]->store(r_info->str, r_info->length,
@@ -5401,7 +5810,7 @@ static void store_schema_partitions_record(THD *thd, TABLE *schema_table,
     table->field[20]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
     table->field[20]->set_notnull();
   }
-  if (file->ha_table_flags() & (ulong) HA_HAS_CHECKSUM)
+  if (file->ha_table_flags() & (HA_HAS_OLD_CHECKSUM | HA_HAS_NEW_CHECKSUM))
   {
     table->field[21]->store((longlong) stat_info.check_sum, TRUE);
     table->field[21]->set_notnull();
@@ -5547,7 +5956,7 @@ static int get_schema_partitions_record(THD *thd, TABLE_LIST *tables,
         tmp_res.append(partition_keywords[PKW_KEY].str,
                        partition_keywords[PKW_KEY].length);
       else
-        tmp_res.append(partition_keywords[PKW_HASH].str, 
+        tmp_res.append(partition_keywords[PKW_HASH].str,
                        partition_keywords[PKW_HASH].length);
       table->field[7]->store(tmp_res.ptr(), tmp_res.length(), cs);
       break;
@@ -5582,7 +5991,7 @@ static int get_schema_partitions_record(THD *thd, TABLE_LIST *tables,
         tmp_res.append(partition_keywords[PKW_KEY].str,
                        partition_keywords[PKW_KEY].length);
       else
-        tmp_res.append(partition_keywords[PKW_HASH].str, 
+        tmp_res.append(partition_keywords[PKW_HASH].str,
                        partition_keywords[PKW_HASH].length);
       table->field[8]->store(tmp_res.ptr(), tmp_res.length(), cs);
       table->field[8]->set_notnull();
@@ -5695,7 +6104,7 @@ static int get_schema_partitions_record(THD *thd, TABLE_LIST *tables,
           /* SUBPARTITION_ORDINAL_POSITION */
           table->field[6]->store((longlong) ++subpart_pos, TRUE);
           table->field[6]->set_notnull();
-          
+
           store_schema_partitions_record(thd, table, show_table, subpart_elem,
                                          file, part_id);
           part_id++;
@@ -5864,7 +6273,7 @@ copy_event_to_schema_table(THD *thd, TABLE *sch_table, TABLE *event_table)
   else
     sch_table->field[ISE_ON_COMPLETION]->
                                 store(STRING_WITH_LEN("PRESERVE"), scs);
-    
+
   number_to_datetime(et.created, &time, 0, &not_used);
   DBUG_ASSERT(not_used==0);
   sch_table->field[ISE_CREATED]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
@@ -5985,7 +6394,7 @@ int fill_status(THD *thd, TABLE_LIST *tables, COND *cond)
     tmp1= &tmp;
   }
   else
-  { 
+  {
     option_type= OPT_SESSION;
     tmp1= &thd->status_var;
   }
@@ -6040,7 +6449,7 @@ get_referential_constraints_record(THD *thd, TABLE_LIST *tables,
   {
     List<FOREIGN_KEY_INFO> f_key_list;
     TABLE *show_table= tables->table;
-    show_table->file->info(HA_STATUS_VARIABLE | 
+    show_table->file->info(HA_STATUS_VARIABLE |
                            HA_STATUS_NO_LOCK |
                            HA_STATUS_TIME);
 
@@ -6058,20 +6467,20 @@ get_referential_constraints_record(THD *thd, TABLE_LIST *tables,
       table->field[3]->store(STRING_WITH_LEN("def"), cs);
       table->field[4]->store(f_key_info->referenced_db->str, 
                              f_key_info->referenced_db->length, cs);
-      table->field[10]->store(f_key_info->referenced_table->str, 
+      table->field[10]->store(f_key_info->referenced_table->str,
                              f_key_info->referenced_table->length, cs);
       if (f_key_info->referenced_key_name)
       {
-        table->field[5]->store(f_key_info->referenced_key_name->str, 
+        table->field[5]->store(f_key_info->referenced_key_name->str,
                                f_key_info->referenced_key_name->length, cs);
         table->field[5]->set_notnull();
       }
       else
         table->field[5]->set_null();
       table->field[6]->store(STRING_WITH_LEN("NONE"), cs);
-      table->field[7]->store(f_key_info->update_method->str, 
+      table->field[7]->store(f_key_info->update_method->str,
                              f_key_info->update_method->length, cs);
-      table->field[8]->store(f_key_info->delete_method->str, 
+      table->field[8]->store(f_key_info->delete_method->str,
                              f_key_info->delete_method->length, cs);
       if (schema_table_store_record(thd, table))
         DBUG_RETURN(1);
@@ -6080,12 +6489,87 @@ get_referential_constraints_record(THD *thd, TABLE_LIST *tables,
   DBUG_RETURN(0);
 }
 
-struct schema_table_ref 
+struct schema_table_ref
 {
   const char *table_name;
   ST_SCHEMA_TABLE *schema_table;
 };
 
+ST_FIELD_INFO user_stats_fields_info[]=
+{
+  {"USER", USERNAME_LENGTH, MYSQL_TYPE_STRING, 0, 0, "User", SKIP_OPEN_TABLE},
+  {"TOTAL_CONNECTIONS", MY_INT32_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Total_connections",SKIP_OPEN_TABLE},
+  {"CONCURRENT_CONNECTIONS", MY_INT32_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Concurrent_connections",SKIP_OPEN_TABLE},
+  {"CONNECTED_TIME", MY_INT32_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Connected_time",SKIP_OPEN_TABLE},
+  {"BUSY_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_DOUBLE, 0, 0, "Busy_time",SKIP_OPEN_TABLE},
+  {"CPU_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_DOUBLE, 0, 0, "Cpu_time",SKIP_OPEN_TABLE},
+  {"BYTES_RECEIVED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Bytes_received",SKIP_OPEN_TABLE},
+  {"BYTES_SENT", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Bytes_sent",SKIP_OPEN_TABLE},
+  {"BINLOG_BYTES_WRITTEN", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Binlog_bytes_written",SKIP_OPEN_TABLE},
+  {"ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_read",SKIP_OPEN_TABLE},
+  {"ROWS_SENT", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_sent",SKIP_OPEN_TABLE},
+  {"ROWS_DELETED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_deleted",SKIP_OPEN_TABLE},
+  {"ROWS_INSERTED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_inserted",SKIP_OPEN_TABLE},
+  {"ROWS_UPDATED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_updated",SKIP_OPEN_TABLE},
+  {"SELECT_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Select_commands",SKIP_OPEN_TABLE},
+  {"UPDATE_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Update_commands",SKIP_OPEN_TABLE},
+  {"OTHER_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Other_commands",SKIP_OPEN_TABLE},
+  {"COMMIT_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Commit_transactions",SKIP_OPEN_TABLE},
+  {"ROLLBACK_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rollback_transactions",SKIP_OPEN_TABLE},
+  {"DENIED_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Denied_connections",SKIP_OPEN_TABLE},
+  {"LOST_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Lost_connections",SKIP_OPEN_TABLE},
+  {"ACCESS_DENIED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Access_denied",SKIP_OPEN_TABLE},
+  {"EMPTY_QUERIES", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Empty_queries",SKIP_OPEN_TABLE},
+  {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, 0}
+};
+
+ST_FIELD_INFO client_stats_fields_info[]=
+{
+  {"CLIENT", LIST_PROCESS_HOST_LEN, MYSQL_TYPE_STRING, 0, 0, "Client",SKIP_OPEN_TABLE},
+  {"TOTAL_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Total_connections",SKIP_OPEN_TABLE},
+  {"CONCURRENT_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Concurrent_connections",SKIP_OPEN_TABLE},
+  {"CONNECTED_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Connected_time",SKIP_OPEN_TABLE},
+  {"BUSY_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_DOUBLE, 0, 0, "Busy_time",SKIP_OPEN_TABLE},
+  {"CPU_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_DOUBLE, 0, 0, "Cpu_time",SKIP_OPEN_TABLE},
+  {"BYTES_RECEIVED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Bytes_received",SKIP_OPEN_TABLE},
+  {"BYTES_SENT", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Bytes_sent",SKIP_OPEN_TABLE},
+  {"BINLOG_BYTES_WRITTEN", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Binlog_bytes_written",SKIP_OPEN_TABLE},
+  {"ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_read",SKIP_OPEN_TABLE},
+  {"ROWS_SENT", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_sent",SKIP_OPEN_TABLE},
+  {"ROWS_DELETED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_deleted",SKIP_OPEN_TABLE},
+  {"ROWS_INSERTED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_inserted",SKIP_OPEN_TABLE},
+  {"ROWS_UPDATED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_updated",SKIP_OPEN_TABLE},
+  {"SELECT_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Select_commands",SKIP_OPEN_TABLE},
+  {"UPDATE_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Update_commands",SKIP_OPEN_TABLE},
+  {"OTHER_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Other_commands",SKIP_OPEN_TABLE},
+  {"COMMIT_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Commit_transactions",SKIP_OPEN_TABLE},
+  {"ROLLBACK_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rollback_transactions",SKIP_OPEN_TABLE},
+  {"DENIED_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Denied_connections",SKIP_OPEN_TABLE},
+  {"LOST_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Lost_connections",SKIP_OPEN_TABLE},
+  {"ACCESS_DENIED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Access_denied",SKIP_OPEN_TABLE},
+  {"EMPTY_QUERIES", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Empty_queries",SKIP_OPEN_TABLE},
+  {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, 0}
+};
+
+
+ST_FIELD_INFO table_stats_fields_info[]=
+{
+  {"TABLE_SCHEMA", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_schema",SKIP_OPEN_TABLE},
+  {"TABLE_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_name",SKIP_OPEN_TABLE},
+  {"ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_read",SKIP_OPEN_TABLE},
+  {"ROWS_CHANGED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_changed",SKIP_OPEN_TABLE},
+  {"ROWS_CHANGED_X_INDEXES", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_changed_x_#indexes",SKIP_OPEN_TABLE},
+  {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, 0}
+};
+
+ST_FIELD_INFO index_stats_fields_info[]=
+{
+  {"TABLE_SCHEMA", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_schema",SKIP_OPEN_TABLE},
+  {"TABLE_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_name",SKIP_OPEN_TABLE},
+  {"INDEX_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Index_name",SKIP_OPEN_TABLE},
+  {"ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_read",SKIP_OPEN_TABLE},
+  {0, 0, MYSQL_TYPE_STRING, 0, 0, 0,0}
+};
 
 /*
   Find schema_tables elment by name
@@ -6147,7 +6631,7 @@ ST_SCHEMA_TABLE *find_schema_table(THD *thd, const char* table_name)
   }
 
   schema_table_a.table_name= table_name;
-  if (plugin_foreach(thd, find_schema_table_in_plugin, 
+  if (plugin_foreach(thd, find_schema_table_in_plugin,
                      MYSQL_INFORMATION_SCHEMA_PLUGIN, &schema_table_a))
     DBUG_RETURN(schema_table_a.schema_table);
 
@@ -6169,7 +6653,7 @@ ST_SCHEMA_TABLE *get_schema_table(enum enum_schema_tables schema_table_idx)
     into it two numbers, based on modulus of base-10 numbers.  In the ones
     position is the number of decimals.  Tens position is unused.  In the
     hundreds and thousands position is a two-digit decimal number representing
-    length.  Encode this value with  (decimals*100)+length  , where
+    length.  Encode this value with  (length*100)+decimals  , where
     0<decimals<10 and 0<=length<100 .
 
   @param
@@ -6222,7 +6706,7 @@ TABLE *create_schema_table(THD *thd, TABLE_LIST *table_list)
       break;
     case MYSQL_TYPE_FLOAT:
     case MYSQL_TYPE_DOUBLE:
-      if ((item= new Item_float(fields_info->field_name, 0.0, NOT_FIXED_DEC, 
+      if ((item= new Item_float(fields_info->field_name, 0.0, NOT_FIXED_DEC,
                            fields_info->field_length)) == NULL)
         DBUG_RETURN(NULL);
       break;
@@ -6232,6 +6716,12 @@ TABLE *create_schema_table(THD *thd, TABLE_LIST *table_list)
       {
         DBUG_RETURN(0);
       }
+      /*
+        Create a type holder, as we want the type of the item to defined
+        the type of the object, not the value
+      */
+      if (!(item= new Item_type_holder(thd, item)))
+        DBUG_RETURN(0);
       item->unsigned_flag= (fields_info->field_flags & MY_I_S_UNSIGNED);
       item->decimals= fields_info->field_length%10;
       item->max_length= (fields_info->field_length/100)%100;
@@ -6621,7 +7111,7 @@ bool get_schema_tables_result(JOIN *join,
 
   thd->no_warnings_for_error= 1;
   for (JOIN_TAB *tab= join->join_tab; tab < tmp_join_tab; tab++)
-  {  
+  {
     if (!tab->table || !tab->table->pos_in_table_list)
       break;
 
@@ -6720,6 +7210,90 @@ int hton_fill_schema_table(THD *thd, TABLE_LIST *tables, COND *cond)
 }
 
 
+static
+int store_key_cache_table_record(THD *thd, TABLE *table,
+                                 const char *name, uint name_length,
+                                 KEY_CACHE *key_cache,
+                                 uint partitions, uint partition_no)
+{
+  KEY_CACHE_STATISTICS keycache_stats;
+  uint err;
+  DBUG_ENTER("store_key_cache_table_record");
+
+  get_key_cache_statistics(key_cache, partition_no, &keycache_stats);
+
+  if (!key_cache->key_cache_inited || keycache_stats.mem_size == 0)
+    DBUG_RETURN(0);
+
+  restore_record(table, s->default_values);
+  table->field[0]->store(name, name_length, system_charset_info);
+  if (partitions == 0)
+    table->field[1]->set_null();
+  else
+  {
+    table->field[1]->set_notnull(); 
+    table->field[1]->store((long) partitions, TRUE);
+  }
+
+  if (partition_no == 0)
+    table->field[2]->set_null();
+  else
+  {
+    table->field[2]->set_notnull();
+    table->field[2]->store((long) partition_no, TRUE);
+  }
+  table->field[3]->store(keycache_stats.mem_size, TRUE);
+  table->field[4]->store(keycache_stats.block_size, TRUE);
+  table->field[5]->store(keycache_stats.blocks_used, TRUE);
+  table->field[6]->store(keycache_stats.blocks_unused, TRUE);
+  table->field[7]->store(keycache_stats.blocks_changed, TRUE);
+  table->field[8]->store(keycache_stats.read_requests, TRUE);
+  table->field[9]->store(keycache_stats.reads, TRUE);
+  table->field[10]->store(keycache_stats.write_requests, TRUE);
+  table->field[11]->store(keycache_stats.writes, TRUE);
+
+  err= schema_table_store_record(thd, table);
+  DBUG_RETURN(err);
+}
+
+
+int fill_key_cache_tables(THD *thd, TABLE_LIST *tables, COND *cond)
+{
+  TABLE *table= tables->table;
+  I_List_iterator<NAMED_LIST> it(key_caches);
+  NAMED_LIST *element;
+  DBUG_ENTER("fill_key_cache_tables");
+
+  while ((element= it++))
+  {
+    KEY_CACHE *key_cache= (KEY_CACHE *) element->data;
+
+    if (!key_cache->key_cache_inited)
+      continue;
+
+    uint partitions= key_cache->partitions;    
+    DBUG_ASSERT(partitions <= MAX_KEY_CACHE_PARTITIONS);
+
+    if (partitions)
+    {
+      for (uint i= 0; i < partitions; i++)
+      {
+        if (store_key_cache_table_record(thd, table,
+                                         element->name, element->name_length, 
+                                         key_cache, partitions, i+1))
+	  DBUG_RETURN(1);
+      }
+    }
+
+    if (store_key_cache_table_record(thd, table, 
+                                     element->name, element->name_length,
+                                     key_cache, partitions, 0))
+      DBUG_RETURN(1);
+  }
+  DBUG_RETURN(0);
+}
+
+
 ST_FIELD_INFO schema_fields_info[]=
 {
   {"CATALOG_NAME", FN_REFLEN, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE},
@@ -6747,17 +7321,17 @@ ST_FIELD_INFO tables_fields_info[]=
   {"ROW_FORMAT", 10, MYSQL_TYPE_STRING, 0, 1, "Row_format", OPEN_FULL_TABLE},
   {"TABLE_ROWS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Rows", OPEN_FULL_TABLE},
-  {"AVG_ROW_LENGTH", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 
+  {"AVG_ROW_LENGTH", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Avg_row_length", OPEN_FULL_TABLE},
-  {"DATA_LENGTH", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 
+  {"DATA_LENGTH", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Data_length", OPEN_FULL_TABLE},
   {"MAX_DATA_LENGTH", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Max_data_length", OPEN_FULL_TABLE},
-  {"INDEX_LENGTH", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 
+  {"INDEX_LENGTH", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Index_length", OPEN_FULL_TABLE},
   {"DATA_FREE", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Data_free", OPEN_FULL_TABLE},
-  {"AUTO_INCREMENT", MY_INT64_NUM_DECIMAL_DIGITS , MYSQL_TYPE_LONGLONG, 0, 
+  {"AUTO_INCREMENT", MY_INT64_NUM_DECIMAL_DIGITS , MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Auto_increment", OPEN_FULL_TABLE},
   {"CREATE_TIME", 0, MYSQL_TYPE_DATETIME, 0, 1, "Create_time", OPEN_FULL_TABLE},
   {"UPDATE_TIME", 0, MYSQL_TYPE_DATETIME, 0, 1, "Update_time", OPEN_FULL_TABLE},
@@ -6841,7 +7415,7 @@ ST_FIELD_INFO engines_fields_info[]=
 {
   {"ENGINE", 64, MYSQL_TYPE_STRING, 0, 0, "Engine", SKIP_OPEN_TABLE},
   {"SUPPORT", 8, MYSQL_TYPE_STRING, 0, 0, "Support", SKIP_OPEN_TABLE},
-  {"COMMENT", 80, MYSQL_TYPE_STRING, 0, 0, "Comment", SKIP_OPEN_TABLE},
+  {"COMMENT", 160, MYSQL_TYPE_STRING, 0, 0, "Comment", SKIP_OPEN_TABLE},
   {"TRANSACTIONS", 3, MYSQL_TYPE_STRING, 0, 1, "Transactions", SKIP_OPEN_TABLE},
   {"XA", 3, MYSQL_TYPE_STRING, 0, 1, "XA", SKIP_OPEN_TABLE},
   {"SAVEPOINTS", 3 ,MYSQL_TYPE_STRING, 0, 1, "Savepoints", SKIP_OPEN_TABLE},
@@ -7075,8 +7649,8 @@ ST_FIELD_INFO table_names_fields_info[]=
 {
   {"TABLE_CATALOG", FN_REFLEN, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE},
   {"TABLE_SCHEMA",NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE},
-  {"TABLE_NAME", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, 0, "Tables_in_",
-   SKIP_OPEN_TABLE},
+  {"TABLE_NAME", NAME_CHAR_LEN + MYSQL50_TABLE_NAME_PREFIX_LENGTH,
+   MYSQL_TYPE_STRING, 0, 0, "Tables_in_", SKIP_OPEN_TABLE},
   {"TABLE_TYPE", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_type",
    OPEN_FRM_ONLY},
   {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE}
@@ -7196,6 +7770,8 @@ ST_FIELD_INFO processlist_fields_info[]=
   {"STATE", 64, MYSQL_TYPE_STRING, 0, 1, "State", SKIP_OPEN_TABLE},
   {"INFO", PROCESS_LIST_INFO_WIDTH, MYSQL_TYPE_STRING, 0, 1, "Info",
    SKIP_OPEN_TABLE},
+  {"TIME_MS", 100 * (MY_INT64_NUM_DECIMAL_DIGITS + 1) + 3, MYSQL_TYPE_DECIMAL,
+   0, 0, "Time_ms", SKIP_OPEN_TABLE},
   {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE}
 };
 
@@ -7214,6 +7790,8 @@ ST_FIELD_INFO plugin_fields_info[]=
   {"PLUGIN_AUTHOR", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, 1, 0, SKIP_OPEN_TABLE},
   {"PLUGIN_DESCRIPTION", 65535, MYSQL_TYPE_STRING, 0, 1, 0, SKIP_OPEN_TABLE},
   {"PLUGIN_LICENSE", 80, MYSQL_TYPE_STRING, 0, 1, "License", SKIP_OPEN_TABLE},
+  {"PLUGIN_MATURITY", 12, MYSQL_TYPE_STRING, 0, 1, 0, SKIP_OPEN_TABLE},
+  {"PLUGIN_AUTH_VERSION", 80, MYSQL_TYPE_STRING, 0, 1, 0, SKIP_OPEN_TABLE},
   {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE}
 };
 
@@ -7239,9 +7817,9 @@ ST_FIELD_INFO files_fields_info[]=
   {"EXTENT_SIZE", 4, MYSQL_TYPE_LONGLONG, 0, 0, 0, SKIP_OPEN_TABLE},
   {"INITIAL_SIZE", 21, MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), 0, SKIP_OPEN_TABLE},
-  {"MAXIMUM_SIZE", 21, MYSQL_TYPE_LONGLONG, 0, 
+  {"MAXIMUM_SIZE", 21, MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), 0, SKIP_OPEN_TABLE},
-  {"AUTOEXTEND_SIZE", 21, MYSQL_TYPE_LONGLONG, 0, 
+  {"AUTOEXTEND_SIZE", 21, MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), 0, SKIP_OPEN_TABLE},
   {"CREATION_TIME", 0, MYSQL_TYPE_DATETIME, 0, 1, 0, SKIP_OPEN_TABLE},
   {"LAST_UPDATE_TIME", 0, MYSQL_TYPE_DATETIME, 0, 1, 0, SKIP_OPEN_TABLE},
@@ -7253,20 +7831,20 @@ ST_FIELD_INFO files_fields_info[]=
   {"ROW_FORMAT", 10, MYSQL_TYPE_STRING, 0, 1, "Row_format", SKIP_OPEN_TABLE},
   {"TABLE_ROWS", 21 , MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Rows", SKIP_OPEN_TABLE},
-  {"AVG_ROW_LENGTH", 21 , MYSQL_TYPE_LONGLONG, 0, 
+  {"AVG_ROW_LENGTH", 21 , MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Avg_row_length", SKIP_OPEN_TABLE},
-  {"DATA_LENGTH", 21 , MYSQL_TYPE_LONGLONG, 0, 
+  {"DATA_LENGTH", 21 , MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Data_length", SKIP_OPEN_TABLE},
-  {"MAX_DATA_LENGTH", 21 , MYSQL_TYPE_LONGLONG, 0, 
+  {"MAX_DATA_LENGTH", 21 , MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Max_data_length", SKIP_OPEN_TABLE},
-  {"INDEX_LENGTH", 21 , MYSQL_TYPE_LONGLONG, 0, 
+  {"INDEX_LENGTH", 21 , MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Index_length", SKIP_OPEN_TABLE},
-  {"DATA_FREE", 21 , MYSQL_TYPE_LONGLONG, 0, 
+  {"DATA_FREE", 21 , MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Data_free", SKIP_OPEN_TABLE},
   {"CREATE_TIME", 0, MYSQL_TYPE_DATETIME, 0, 1, "Create_time", SKIP_OPEN_TABLE},
   {"UPDATE_TIME", 0, MYSQL_TYPE_DATETIME, 0, 1, "Update_time", SKIP_OPEN_TABLE},
   {"CHECK_TIME", 0, MYSQL_TYPE_DATETIME, 0, 1, "Check_time", SKIP_OPEN_TABLE},
-  {"CHECKSUM", 21 , MYSQL_TYPE_LONGLONG, 0, 
+  {"CHECKSUM", 21 , MYSQL_TYPE_LONGLONG, 0,
    (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), "Checksum", SKIP_OPEN_TABLE},
   {"STATUS", 20, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE},
   {"EXTRA", 255, MYSQL_TYPE_STRING, 0, 1, 0, SKIP_OPEN_TABLE},
@@ -7351,6 +7929,35 @@ ST_FIELD_INFO tablespaces_fields_info[]=
 };
 
 
+ST_FIELD_INFO keycache_fields_info[]=
+{
+  {"KEY_CACHE_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE},
+  {"SEGMENTS", 3, MYSQL_TYPE_LONG, 0, 
+   (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED) , 0, SKIP_OPEN_TABLE},
+  {"SEGMENT_NUMBER", 3, MYSQL_TYPE_LONG, 0,
+   (MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), 0, SKIP_OPEN_TABLE},
+  {"FULL_SIZE", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
+   (MY_I_S_UNSIGNED), 0, SKIP_OPEN_TABLE},
+  {"BLOCK_SIZE", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
+   (MY_I_S_UNSIGNED), 0, SKIP_OPEN_TABLE },
+  {"USED_BLOCKS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
+    (MY_I_S_UNSIGNED), "Key_blocks_used", SKIP_OPEN_TABLE},
+  {"UNUSED_BLOCKS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
+   (MY_I_S_UNSIGNED), "Key_blocks_unused", SKIP_OPEN_TABLE},
+  {"DIRTY_BLOCKS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
+   (MY_I_S_UNSIGNED), "Key_blocks_not_flushed", SKIP_OPEN_TABLE},
+  {"READ_REQUESTS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
+   (MY_I_S_UNSIGNED), "Key_read_requests", SKIP_OPEN_TABLE},
+  {"READS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
+   (MY_I_S_UNSIGNED), "Key_reads", SKIP_OPEN_TABLE},
+  {"WRITE_REQUESTS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
+   (MY_I_S_UNSIGNED), "Key_write_requests", SKIP_OPEN_TABLE},
+  {"WRITES", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
+   (MY_I_S_UNSIGNED), "Key_writes", SKIP_OPEN_TABLE},
+  {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE}
+};
+
+
 /*
   Description of ST_FIELD_INFO in table.h
 
@@ -7360,13 +7967,15 @@ ST_FIELD_INFO tablespaces_fields_info[]=
 
 ST_SCHEMA_TABLE schema_tables[]=
 {
-  {"CHARACTER_SETS", charsets_fields_info, create_schema_table, 
+  {"CHARACTER_SETS", charsets_fields_info, create_schema_table,
    fill_schema_charsets, make_character_sets_old_format, 0, -1, -1, 0, 0},
-  {"COLLATIONS", collation_fields_info, create_schema_table, 
+  {"CLIENT_STATISTICS", client_stats_fields_info, create_schema_table, 
+   fill_schema_client_stats, make_old_format, 0, -1, -1, 0, 0},
+  {"COLLATIONS", collation_fields_info, create_schema_table,
    fill_schema_collation, make_old_format, 0, -1, -1, 0, 0},
   {"COLLATION_CHARACTER_SET_APPLICABILITY", coll_charset_app_fields_info,
    create_schema_table, fill_schema_coll_charset_app, 0, 0, -1, -1, 0, 0},
-  {"COLUMNS", columns_fields_info, create_schema_table, 
+  {"COLUMNS", columns_fields_info, create_schema_table,
    get_all_tables, make_columns_old_format, get_schema_column_record, 1, 2, 0,
    OPTIMIZE_I_S_TABLE|OPEN_VIEW_FULL},
   {"COLUMN_PRIVILEGES", column_privileges_fields_info, create_schema_table,
@@ -7386,6 +7995,10 @@ ST_SCHEMA_TABLE schema_tables[]=
    fill_status, make_old_format, 0, 0, -1, 0, 0},
   {"GLOBAL_VARIABLES", variables_fields_info, create_schema_table,
    fill_variables, make_old_format, 0, 0, -1, 0, 0},
+  {"INDEX_STATISTICS", index_stats_fields_info, create_schema_table,
+   fill_schema_index_stats, make_old_format, 0, -1, -1, 0, 0},
+  {"KEY_CACHES", keycache_fields_info, create_schema_table,
+   fill_key_cache_tables, make_old_format, 0, -1,-1, 0, 0}, 
   {"KEY_COLUMN_USAGE", key_column_usage_fields_info, create_schema_table,
    get_all_tables, 0, get_schema_key_column_usage_record, 4, 5, 0,
    OPTIMIZE_I_S_TABLE|OPEN_TABLE_ONLY},
@@ -7401,7 +8014,7 @@ ST_SCHEMA_TABLE schema_tables[]=
   {"PROCESSLIST", processlist_fields_info, create_schema_table,
    fill_schema_processlist, make_old_format, 0, -1, -1, 0, 0},
   {"PROFILING", query_profile_statistics_info, create_schema_table,
-    fill_query_profile_statistics_info, make_profile_table_for_show, 
+    fill_query_profile_statistics_info, make_profile_table_for_show,
     NULL, -1, -1, false, 0},
   {"REFERENTIAL_CONSTRAINTS", referential_constraints_fields_info,
    create_schema_table, get_all_tables, 0, get_referential_constraints_record,
@@ -7416,12 +8029,12 @@ ST_SCHEMA_TABLE schema_tables[]=
    fill_status, make_old_format, 0, 0, -1, 0, 0},
   {"SESSION_VARIABLES", variables_fields_info, create_schema_table,
    fill_variables, make_old_format, 0, 0, -1, 0, 0},
-  {"STATISTICS", stat_fields_info, create_schema_table, 
+  {"STATISTICS", stat_fields_info, create_schema_table,
    get_all_tables, make_old_format, get_schema_stat_record, 1, 2, 0,
    OPEN_TABLE_ONLY|OPTIMIZE_I_S_TABLE},
-  {"STATUS", variables_fields_info, create_schema_table, fill_status, 
+  {"STATUS", variables_fields_info, create_schema_table, fill_status,
    make_old_format, 0, 0, -1, 1, 0},
-  {"TABLES", tables_fields_info, create_schema_table, 
+  {"TABLES", tables_fields_info, create_schema_table,
    get_all_tables, make_old_format, get_schema_tables_record, 1, 2, 0,
    OPTIMIZE_I_S_TABLE},
   {"TABLESPACES", tablespaces_fields_info, create_schema_table,
@@ -7433,14 +8046,18 @@ ST_SCHEMA_TABLE schema_tables[]=
    get_all_tables, make_table_names_old_format, 0, 1, 2, 1, 0},
   {"TABLE_PRIVILEGES", table_privileges_fields_info, create_schema_table,
    fill_schema_table_privileges, 0, 0, -1, -1, 0, 0},
+  {"TABLE_STATISTICS", table_stats_fields_info, create_schema_table,
+   fill_schema_table_stats, make_old_format, 0, -1, -1, 0, 0},
   {"TRIGGERS", triggers_fields_info, create_schema_table,
    get_all_tables, make_old_format, get_schema_triggers_record, 5, 6, 0,
    OPEN_TRIGGER_ONLY|OPTIMIZE_I_S_TABLE},
   {"USER_PRIVILEGES", user_privileges_fields_info, create_schema_table, 
    fill_schema_user_privileges, 0, 0, -1, -1, 0, 0},
+  {"USER_STATISTICS", user_stats_fields_info, create_schema_table, 
+   fill_schema_user_stats, make_old_format, 0, -1, -1, 0, 0},
   {"VARIABLES", variables_fields_info, create_schema_table, fill_variables,
    make_old_format, 0, 0, -1, 1, 0},
-  {"VIEWS", view_fields_info, create_schema_table, 
+  {"VIEWS", view_fields_info, create_schema_table,
    get_all_tables, 0, get_schema_views_record, 1, 2, 0,
    OPEN_VIEW_ONLY|OPTIMIZE_I_S_TABLE},
   {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
@@ -7466,8 +8083,8 @@ int initialize_schema_table(st_plugin_int *plugin)
   {
     schema_table->create_table= create_schema_table;
     schema_table->old_format= make_old_format;
-    schema_table->idx_field1= -1, 
-    schema_table->idx_field2= -1; 
+    schema_table->idx_field1= -1,
+    schema_table->idx_field2= -1;
 
     /* Make the name available to the init() function. */
     schema_table->table_name= plugin->name.str;
@@ -7480,7 +8097,7 @@ int initialize_schema_table(st_plugin_int *plugin)
       my_free(schema_table);
       DBUG_RETURN(1);
     }
-    
+
     /* Make sure the plugin name is not set inside the init() function. */
     schema_table->table_name= plugin->name.str;
   }
diff --git a/sql/sql_sort.h b/sql/sql_sort.h
index 45ef14c1319..6c185b4a61d 100644
--- a/sql/sql_sort.h
+++ b/sql/sql_sort.h
@@ -49,7 +49,9 @@ struct TABLE;
    the callback function 'unpack_addon_fields'.
 */
 
-typedef struct st_sort_addon_field {  /* Sort addon packed field */
+typedef struct st_sort_addon_field
+{
+  /* Sort addon packed field */
   Field *field;          /* Original field */
   uint   offset;         /* Offset from the last sorted field */
   uint   null_offset;    /* Offset to to null bit from the last sorted field */
@@ -57,14 +59,6 @@ typedef struct st_sort_addon_field {  /* Sort addon packed field */
   uint8  null_bit;       /* Null bit mask for the field */
 } SORT_ADDON_FIELD;
 
-typedef struct st_buffpek {		/* Struktur om sorteringsbuffrarna */
-  my_off_t file_pos;			/* Where we are in the sort file */
-  uchar *base,*key;			/* key pointers */
-  ha_rows count;			/* Number of rows in table */
-  ulong mem_count;			/* numbers of keys in memory */
-  ulong max_keys;			/* Max keys in buffert */
-} BUFFPEK;
-
 struct BUFFPEK_COMPARE_CONTEXT
 {
   qsort_cmp2 key_compare;
diff --git a/sql/sql_string.cc b/sql/sql_string.cc
index 4b7dab243d2..058af3c6826 100644
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@@ -27,6 +27,9 @@
 
 #include "sql_string.h"
 
+#ifdef MYSQL_CLIENT
+#error Attempt to use server-side sql_string on client. Use client/sql_string.cc
+#endif 
 /*****************************************************************************
 ** String functions
 *****************************************************************************/
@@ -58,13 +61,13 @@ bool String::real_alloc(uint32 length)
 
 bool String::realloc(uint32 alloc_length)
 {
-  uint32 len=ALIGN_SIZE(alloc_length+1);
-  DBUG_ASSERT(len > alloc_length);
-  if (len <= alloc_length)
-    return TRUE;                                 /* Overflow */
-  if (Alloced_length < len)
+  if (Alloced_length <= alloc_length)
   {
     char *new_ptr;
+    uint32 len= ALIGN_SIZE(alloc_length+1);
+    DBUG_ASSERT(len > alloc_length);
+    if (len <= alloc_length)
+      return TRUE;                                 /* Overflow */
     if (alloced)
     {
       if (!(new_ptr= (char*) my_realloc(Ptr,len,MYF(MY_WME))))
@@ -635,10 +638,10 @@ void String::qs_append(int i)
   str_length+= (int) (end-buff);
 }
 
-void String::qs_append(uint i)
+void String::qs_append(ulonglong i)
 {
   char *buff= Ptr + str_length;
-  char *end= int10_to_str(i, buff, 10);
+  char *end= longlong10_to_str(i, buff,10);
   str_length+= (int) (end-buff);
 }
 
@@ -914,6 +917,65 @@ my_copy_with_hex_escaping(CHARSET_INFO *cs,
 }
 
 /*
+  Optimized for quick copying of ASCII characters in the range 0x00..0x7F.
+*/
+uint32
+copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs, 
+                 const char *from, uint32 from_length, CHARSET_INFO *from_cs,
+                 uint *errors)
+{
+  /*
+    If any of the character sets is not ASCII compatible,
+    immediately switch to slow mb_wc->wc_mb method.
+  */
+  if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
+    return copy_and_convert_extended(to, to_length, to_cs,
+                                     from, from_length, from_cs, errors);
+
+  uint32 length= min(to_length, from_length), length2= length;
+
+#if defined(__i386__)
+  /*
+    Special loop for i386, it allows to refer to a
+    non-aligned memory block as UINT32, which makes
+    it possible to copy four bytes at once. This
+    gives about 10% performance improvement comparing
+    to byte-by-byte loop.
+  */
+  for ( ; length >= 4; length-= 4, from+= 4, to+= 4)
+  {
+    if ((*(uint32*)from) & 0x80808080)
+      break;
+    *((uint32*) to)= *((const uint32*) from);
+  }
+#endif
+
+  for (; ; *to++= *from++, length--)
+  {
+    if (!length)
+    {
+      *errors= 0;
+      return length2;
+    }
+    if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */
+    {
+      uint32 copied_length= length2 - length;
+      to_length-= copied_length;
+      from_length-= copied_length;
+      return copied_length + copy_and_convert_extended(to, to_length,
+                                                       to_cs,
+                                                       from, from_length,
+                                                       from_cs,
+                                                       errors);
+    }
+  }
+
+  DBUG_ASSERT(FALSE); // Should never get to here
+  return 0;           // Make compiler happy
+}
+
+
+/*
   copy a string,
   with optional character set conversion,
   with optional left padding (for binary -> UCS2 conversion)
diff --git a/sql/sql_string.h b/sql/sql_string.h
index d21b5353b76..880edce0e77 100644
--- a/sql/sql_string.h
+++ b/sql/sql_string.h
@@ -26,6 +26,10 @@
 #include "my_sys.h"              /* alloc_root, my_free, my_realloc */
 #include "m_string.h"                           /* TRASH */
 
+#ifdef MYSQL_CLIENT
+#error Attempt to use server-side sql_string on client. Use client/sql_string.h
+#endif 
+
 class String;
 typedef struct charset_info_st CHARSET_INFO;
 typedef struct st_io_cache IO_CACHE;
@@ -73,6 +77,11 @@ public:
     Ptr=(char*) str; str_length=(uint) strlen(str); Alloced_length=0; alloced=0;
     str_charset=cs;
   }
+  /*
+    NOTE: If one intend to use the c_ptr() method, the following two
+    contructors need the size of memory for STR to be at least LEN+1 (to make
+    room for zero termination).
+  */
   String(const char *str,uint32 len, CHARSET_INFO *cs)
   { 
     Ptr=(char*) str; str_length=len; Alloced_length=0; alloced=0;
@@ -338,6 +347,10 @@ public:
     int4store(Ptr + position,value);
   }
 
+  void qs_append(const char *str)
+  {
+    qs_append(str, strlen(str));
+  }
   void qs_append(const char *str, uint32 len);
   void qs_append(double d);
   void qs_append(double *d);
@@ -347,7 +360,15 @@ public:
      str_length++;
   }
   void qs_append(int i);
-  void qs_append(uint i);
+  void qs_append(uint i)
+  {
+    qs_append((ulonglong)i);
+  }
+  void qs_append(ulong i)
+  {
+    qs_append((ulonglong)i);
+  }
+  void qs_append(ulonglong i);
 
   /* Inline (general) functions used by the protocol functions */
 
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index 72fb4ea930b..98b3e4ebaf2 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -45,6 +45,7 @@
 #include <hash.h>
 #include <myisam.h>
 #include <my_dir.h>
+#include "create_options.h"
 #include "sp_head.h"
 #include "sp.h"
 #include "sql_trigger.h"
@@ -70,14 +71,8 @@ static int copy_data_between_tables(TABLE *from,TABLE *to,
 
 static bool prepare_blob_field(THD *thd, Create_field *sql_field);
 static bool check_engine(THD *, const char *, HA_CREATE_INFO *);
-static int
-mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
-                           Alter_info *alter_info,
-                           bool tmp_table,
-                           uint *db_options,
-                           handler *file, KEY **key_info_buffer,
-                           uint *key_count, int select_field_count);
-
+static int mysql_prepare_create_table(THD *, HA_CREATE_INFO *, Alter_info *,
+                              bool, uint *, handler *, KEY **, uint *, int);
 
 /**
   @brief Helper function for explain_filename
@@ -2697,7 +2692,12 @@ int prepare_create_field(Create_field *sql_field,
                           (sql_field->decimals << FIELDFLAG_DEC_SHIFT));
     break;
   }
-  if (!(sql_field->flags & NOT_NULL_FLAG))
+  if (sql_field->flags & NOT_NULL_FLAG)
+    DBUG_PRINT("info", ("1"));
+  if (sql_field->vcol_info)
+    DBUG_PRINT("info", ("2"));
+  if (!(sql_field->flags & NOT_NULL_FLAG) ||
+      (sql_field->vcol_info))  /* Make virtual columns allow NULL values */
     sql_field->pack_flag|= FIELDFLAG_MAYBE_NULL;
   if (sql_field->flags & NO_DEFAULT_VALUE_FLAG)
     sql_field->pack_flag|= FIELDFLAG_NO_DEFAULT;
@@ -3050,6 +3050,8 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
             null_fields--;
 	  sql_field->flags=		dup_field->flags;
           sql_field->interval=          dup_field->interval;
+          sql_field->vcol_info=         dup_field->vcol_info;
+          sql_field->stored_in_db=      dup_field->stored_in_db;
 	  it2.remove();			// Remove first (create) definition
 	  select_field_pos--;
 	  break;
@@ -3059,7 +3061,7 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
     /* Don't pack rows in old tables if the user has requested this */
     if ((sql_field->flags & BLOB_FLAG) ||
 	(sql_field->sql_type == MYSQL_TYPE_VARCHAR &&
-	create_info->row_type != ROW_TYPE_FIXED))
+         create_info->row_type != ROW_TYPE_FIXED))
       (*db_options)|= HA_OPTION_PACK_RECORD;
     it2.rewind();
   }
@@ -3082,7 +3084,28 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
     sql_field->offset= record_offset;
     if (MTYP_TYPENR(sql_field->unireg_check) == Field::NEXT_NUMBER)
       auto_increment++;
-    record_offset+= sql_field->pack_length;
+    if (parse_option_list(thd, &sql_field->option_struct,
+                          sql_field->option_list,
+                          create_info->db_type->field_options, FALSE,
+                          thd->mem_root))
+      DBUG_RETURN(TRUE);
+    /*
+      For now skip fields that are not physically stored in the database
+      (virtual fields) and update their offset later 
+      (see the next loop).
+    */
+    if (sql_field->stored_in_db)
+      record_offset+= sql_field->pack_length;
+  }
+  /* Update virtual fields' offset*/
+  it.rewind();
+  while ((sql_field=it++))
+  {
+    if (!sql_field->stored_in_db)
+    {
+      sql_field->offset= record_offset;
+      record_offset+= sql_field->pack_length;
+    }
   }
   if (timestamps_with_niladic > 1)
   {
@@ -3131,6 +3154,8 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
     if (key->type == Key::FOREIGN_KEY)
     {
       fk_key_count++;
+      if (((Foreign_key *)key)->validate(alter_info->create_list))
+        DBUG_RETURN(TRUE);
       Foreign_key *fk_key= (Foreign_key*) key;
       if (fk_key->ref_columns.elements &&
 	  fk_key->ref_columns.elements != fk_key->columns.elements)
@@ -3260,6 +3285,12 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
     key_info->key_part=key_part_info;
     key_info->usable_key_parts= key_number;
     key_info->algorithm= key->key_create_info.algorithm;
+    key_info->option_list= key->option_list;
+    if (parse_option_list(thd, &key_info->option_struct,
+                          key_info->option_list,
+                          create_info->db_type->index_options, FALSE,
+                          thd->mem_root))
+      DBUG_RETURN(TRUE);
 
     if (key->type == Key::FULLTEXT)
     {
@@ -3424,6 +3455,17 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
 	  }
 	}
 #endif
+        if (!sql_field->stored_in_db)
+        {
+          /* Key fields must always be physically stored. */
+          my_error(ER_KEY_BASED_ON_GENERATED_VIRTUAL_COLUMN, MYF(0));
+          DBUG_RETURN(TRUE);
+        }
+        if (key->type == Key::PRIMARY && sql_field->vcol_info)
+        {
+          my_error(ER_PRIMARY_KEY_BASED_ON_VIRTUAL_COLUMN, MYF(0));
+          DBUG_RETURN(TRUE);
+        }
 	if (!(sql_field->flags & NOT_NULL_FLAG))
 	{
 	  if (key->type == Key::PRIMARY)
@@ -3508,7 +3550,7 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
 	else if (!(file->ha_table_flags() & HA_NO_PREFIX_CHAR_KEYS))
 	  length=column->length;
       }
-      else if (length == 0)
+      else if (length == 0 && (sql_field->flags & NOT_NULL_FLAG))
       {
 	my_error(ER_WRONG_KEY_COLUMN, MYF(0), column->field_name.str);
 	  DBUG_RETURN(TRUE);
@@ -3676,6 +3718,12 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
     }
   }
 
+  if (parse_option_list(thd, &create_info->option_struct,
+                          create_info->option_list,
+                          file->partition_ht()->table_options, FALSE,
+                          thd->mem_root))
+      DBUG_RETURN(TRUE);
+
   DBUG_RETURN(FALSE);
 }
 
@@ -3885,8 +3933,9 @@ bool mysql_create_table_no_lock(THD *thd,
   set_table_default_charset(thd, create_info, (char*) db);
 
   db_options= create_info->table_options;
-  if (create_info->row_type == ROW_TYPE_DYNAMIC)
-    db_options|=HA_OPTION_PACK_RECORD;
+  if (create_info->row_type != ROW_TYPE_FIXED &&
+      create_info->row_type != ROW_TYPE_DEFAULT)
+    db_options|= HA_OPTION_PACK_RECORD;
   alias= table_case_name(create_info, table_name);
   if (!(file= get_new_handler((TABLE_SHARE*) 0, thd->mem_root,
                               create_info->db_type)))
@@ -4091,6 +4140,15 @@ bool mysql_create_table_no_lock(THD *thd,
     goto err;
   }
 
+  /* Give warnings for not supported table options */
+  if (create_info->transactional && !file->ht->commit)
+    push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
+                        ER_ILLEGAL_HA_CREATE_OPTION,
+                        ER(ER_ILLEGAL_HA_CREATE_OPTION),
+                        file->engine_name()->str,
+                        "TRANSACTIONAL=1");
+
+
   if (!internal_tmp_table && !(create_info->options & HA_LEX_CREATE_TMP_TABLE))
   {
     if (!access(path,F_OK))
@@ -4145,7 +4203,6 @@ bool mysql_create_table_no_lock(THD *thd,
           goto warn;
         my_error(ER_TABLE_EXISTS_ERROR,MYF(0),table_name);
         goto err;
-        break;
       default:
         DBUG_PRINT("info", ("error: %u from storage engine", retcode));
         my_error(retcode, MYF(0),table_name);
@@ -4379,7 +4436,7 @@ mysql_rename_table(handlerton *base, const char *old_db,
   char from[FN_REFLEN + 1], to[FN_REFLEN + 1],
     lc_from[FN_REFLEN + 1], lc_to[FN_REFLEN + 1];
   char *from_base= from, *to_base= to;
-  char tmp_name[NAME_LEN+1];
+  char tmp_name[SAFE_NAME_LEN+1];
   handler *file;
   int error=0;
   DBUG_ENTER("mysql_rename_table");
@@ -4765,6 +4822,7 @@ mysql_compare_tables(TABLE *table,
   KEY_PART_INFO *key_part;
   KEY_PART_INFO *end;
   THD *thd= table->in_use;
+  uint i;
   /*
     Remember if the new definition has new VARCHAR column;
     create_info->varchar will be reset in mysql_prepare_create_table.
@@ -4791,6 +4849,9 @@ mysql_compare_tables(TABLE *table,
   Alter_info tmp_alter_info(*alter_info, thd->mem_root);
   uint db_options= 0; /* not used */
 
+  /* Set default value for return value (to ensure it's always set) */
+  *need_copy_table= ALTER_TABLE_DATA_CHANGED;
+
   /* Create the prepared information. */
   if (mysql_prepare_create_table(thd, create_info,
                                  &tmp_alter_info,
@@ -4839,6 +4900,8 @@ mysql_compare_tables(TABLE *table,
       create_info->used_fields & HA_CREATE_USED_CHARSET ||
       create_info->used_fields & HA_CREATE_USED_DEFAULT_CHARSET ||
       (table->s->row_type != create_info->row_type) ||
+      create_info->used_fields & HA_CREATE_USED_PAGE_CHECKSUM ||
+      create_info->used_fields & HA_CREATE_USED_TRANSACTIONAL ||
       create_info->used_fields & HA_CREATE_USED_PACK_KEYS ||
       create_info->used_fields & HA_CREATE_USED_MAX_ROWS ||
       (alter_info->flags & (ALTER_RECREATE | ALTER_FOREIGN_KEY)) ||
@@ -4846,10 +4909,16 @@ mysql_compare_tables(TABLE *table,
       !table->s->mysql_version ||
       (table->s->frm_version < FRM_VER_TRUE_VARCHAR && varchar))
   {
-    *need_copy_table= ALTER_TABLE_DATA_CHANGED;
+    DBUG_PRINT("info", ("Basic checks -> ALTER_TABLE_DATA_CHANGED"));
     DBUG_RETURN(0);
   }
 
+  if ((create_info->fileds_option_struct=
+       (void**)thd->calloc(sizeof(void*) * table->s->fields)) == NULL ||
+      (create_info->indexes_option_struct=
+       (void**)thd->calloc(sizeof(void*) * table->s->keys)) == NULL)
+    DBUG_RETURN(1);
+
   /*
     Use transformed info to evaluate possibility of fast ALTER TABLE
     but use the preserved field to persist modifications.
@@ -4861,12 +4930,15 @@ mysql_compare_tables(TABLE *table,
     Go through fields and check if the original ones are compatible
     with new table.
   */
-  for (f_ptr= table->field, new_field= new_field_it++,
+  for (i= 0, f_ptr= table->field, new_field= new_field_it++,
        tmp_new_field= tmp_new_field_it++;
        (field= *f_ptr);
-       f_ptr++, new_field= new_field_it++,
+       i++, f_ptr++, new_field= new_field_it++,
        tmp_new_field= tmp_new_field_it++)
   {
+    DBUG_ASSERT(i < table->s->fields);
+    create_info->fileds_option_struct[i]= tmp_new_field->option_struct;
+
     /* Make sure we have at least the default charset in use. */
     if (!new_field->charset)
       new_field->charset= create_info->default_table_charset;
@@ -4875,16 +4947,30 @@ mysql_compare_tables(TABLE *table,
     if ((tmp_new_field->flags & NOT_NULL_FLAG) !=
 	(uint) (field->flags & NOT_NULL_FLAG))
     {
+      DBUG_PRINT("info", ("NULL behaviour difference in field '%s' -> "
+                          "ALTER_TABLE_DATA_CHANGED", new_field->field_name));
+      DBUG_RETURN(0);
+    }
+
+    /*
+      Check if the altered column is computed and either
+      is stored or is used in the partitioning expression.
+      TODO: Mark such a column with an alter flag only if
+      the defining expression has changed.
+    */
+    if (field->vcol_info && 
+        (field->stored_in_db || field->vcol_info->is_in_partitioning_expr()))
+    {
       *need_copy_table= ALTER_TABLE_DATA_CHANGED;
       DBUG_RETURN(0);
     }
 
     /* Don't pack rows in old tables if the user has requested this. */
-    if (create_info->row_type == ROW_TYPE_DYNAMIC ||
-	(tmp_new_field->flags & BLOB_FLAG) ||
-	(tmp_new_field->sql_type == MYSQL_TYPE_VARCHAR &&
-	create_info->row_type != ROW_TYPE_FIXED))
-      create_info->table_options|= HA_OPTION_PACK_RECORD;
+      if (create_info->row_type == ROW_TYPE_DYNAMIC ||
+          (tmp_new_field->flags & BLOB_FLAG) ||
+          (tmp_new_field->sql_type == MYSQL_TYPE_VARCHAR &&
+           create_info->row_type != ROW_TYPE_FIXED))
+        create_info->table_options|= HA_OPTION_PACK_RECORD;
 
     /* Check if field was renamed */
     field->flags&= ~FIELD_IS_RENAMED;
@@ -4896,7 +4982,8 @@ mysql_compare_tables(TABLE *table,
     /* Evaluate changes bitmap and send to check_if_incompatible_data() */
     if (!(tmp= field->is_equal(tmp_new_field)))
     {
-      *need_copy_table= ALTER_TABLE_DATA_CHANGED;
+      DBUG_PRINT("info", ("!field_is_equal('%s') -> ALTER_TABLE_DATA_CHANGED",
+                          new_field->field_name));
       DBUG_RETURN(0);
     }
     // Clear indexed marker
@@ -5005,7 +5092,9 @@ mysql_compare_tables(TABLE *table,
   for (new_key= *key_info_buffer; new_key < new_key_end; new_key++)
   {
     /* Search an old key with the same name. */
-    for (table_key= table->key_info; table_key < table_key_end; table_key++)
+    for (i= 0, table_key= table->key_info;
+         table_key < table_key_end;
+         i++, table_key++)
     {
       if (! strcmp(table_key->name, new_key->name))
         break;
@@ -5024,21 +5113,31 @@ mysql_compare_tables(TABLE *table,
       }
       DBUG_PRINT("info", ("index added: '%s'", new_key->name));
     }
+    else
+    {
+      DBUG_ASSERT(i < table->s->keys);
+      create_info->indexes_option_struct[i]= new_key->option_struct;
+    }
   }
 
   /* Check if changes are compatible with current handler without a copy */
   if (table->file->check_if_incompatible_data(create_info, changes))
   {
-    *need_copy_table= ALTER_TABLE_DATA_CHANGED;
+    DBUG_PRINT("info", ("check_if_incompatible_data() -> "
+                        "ALTER_TABLE_DATA_CHANGED"));
     DBUG_RETURN(0);
   }
 
   if (*index_drop_count || *index_add_count)
   {
+    DBUG_PRINT("info", ("Index dropped=%u added=%u -> "
+                        "ALTER_TABLE_INDEX_CHANGED",
+                        *index_drop_count, *index_add_count));
     *need_copy_table= ALTER_TABLE_INDEX_CHANGED;
     DBUG_RETURN(0);
   }
 
+  DBUG_PRINT("info", (" -> ALTER_TABLE_METADATA_ONLY"));
   *need_copy_table= ALTER_TABLE_METADATA_ONLY; // Tables are compatible
   DBUG_RETURN(0);
 }
@@ -5157,6 +5256,7 @@ blob_length_by_type(enum_field_types type)
     Sets create_info->varchar if the table has a VARCHAR column.
     Prepares alter_info->create_list and alter_info->key_list with
     columns and keys of the new table.
+
   @retval TRUE   error, out of memory or a semantical error in ALTER
                  TABLE instructions
   @retval FALSE  success
@@ -5183,7 +5283,8 @@ mysql_prepare_alter_table(THD *thd, TABLE *table,
   uint used_fields= create_info->used_fields;
   KEY *key_info=table->key_info;
   bool rc= TRUE;
-
+  Create_field *def;
+  Field **f_ptr,*field;
   DBUG_ENTER("mysql_prepare_alter_table");
 
   create_info->varchar= FALSE;
@@ -5204,6 +5305,8 @@ mysql_prepare_alter_table(THD *thd, TABLE *table,
   }
   if (!(used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE))
     create_info->key_block_size= table->s->key_block_size;
+  if (!(used_fields & HA_CREATE_USED_TRANSACTIONAL))
+    create_info->transactional= table->s->transactional;
 
   if (!create_info->tablespace && create_info->storage_media != HA_SM_MEMORY)
   {
@@ -5217,18 +5320,18 @@ mysql_prepare_alter_table(THD *thd, TABLE *table,
       create_info->tablespace= tablespace;
   }
   restore_record(table, s->default_values);     // Empty record for DEFAULT
-  Create_field *def;
 
+  create_info->option_list= merge_engine_table_options(table->s->option_list,
+                                        create_info->option_list, thd->mem_root);
   /*
     First collect all fields from table which isn't in drop_list
   */
-  Field **f_ptr,*field;
   for (f_ptr=table->field ; (field= *f_ptr) ; f_ptr++)
   {
+    Alter_drop *drop;
     if (field->type() == MYSQL_TYPE_STRING)
       create_info->varchar= TRUE;
     /* Check if field should be dropped */
-    Alter_drop *drop;
     drop_it.rewind();
     while ((drop=drop_it++))
     {
@@ -5261,6 +5364,13 @@ mysql_prepare_alter_table(THD *thd, TABLE *table,
     if (def)
     {						// Field is changed
       def->field=field;
+      if (field->stored_in_db != def->stored_in_db)
+      {
+        my_error(ER_UNSUPPORTED_ACTION_ON_VIRTUAL_COLUMN,
+                 MYF(0),
+                 "Changing the STORED status");
+        goto err;
+      }
       if (!def->after)
       {
 	new_create_list.push_back(def);
@@ -5302,7 +5412,8 @@ mysql_prepare_alter_table(THD *thd, TABLE *table,
   {
     if (def->change && ! def->field)
     {
-      my_error(ER_BAD_FIELD_ERROR, MYF(0), def->change, table->s->table_name.str);
+      my_error(ER_BAD_FIELD_ERROR, MYF(0), def->change,
+               table->s->table_name.str);
       goto err;
     }
     /*
@@ -5337,7 +5448,8 @@ mysql_prepare_alter_table(THD *thd, TABLE *table,
       }
       if (!find)
       {
-	my_error(ER_BAD_FIELD_ERROR, MYF(0), def->after, table->s->table_name.str);
+	my_error(ER_BAD_FIELD_ERROR, MYF(0), def->after,
+                 table->s->table_name.str);
         goto err;
       }
       find_it.after(def);			// Put element after this
@@ -5387,6 +5499,8 @@ mysql_prepare_alter_table(THD *thd, TABLE *table,
 	continue;				// Wrong field (from UNIREG)
       const char *key_part_name=key_part->field->field_name;
       Create_field *cfield;
+      uint key_part_length;
+
       field_it.rewind();
       while ((cfield=field_it++))
       {
@@ -5402,7 +5516,7 @@ mysql_prepare_alter_table(THD *thd, TABLE *table,
       }
       if (!cfield)
 	continue;				// Field is removed
-      uint key_part_length=key_part->length;
+      key_part_length= key_part->length;
       if (cfield->field)			// Not new field
       {
         /*
@@ -5475,7 +5589,7 @@ mysql_prepare_alter_table(THD *thd, TABLE *table,
       key= new Key(key_type, key_name, strlen(key_name),
                    &key_create_info,
                    test(key_info->flags & HA_GENERATED_KEY),
-                   key_parts);
+                   key_parts, key_info->option_list);
       new_key_list.push_back(key);
     }
   }
@@ -5483,6 +5597,9 @@ mysql_prepare_alter_table(THD *thd, TABLE *table,
     Key *key;
     while ((key=key_it++))			// Add new keys
     {
+      if (key->type == Key::FOREIGN_KEY &&
+          ((Foreign_key *)key)->validate(new_create_list))
+        goto err;
       if (key->type != Key::FOREIGN_KEY)
         new_key_list.push_back(key);
       if (key->name.str &&
@@ -5611,6 +5728,9 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
   uint *index_add_buffer= NULL;
   uint candidate_key_count= 0;
   bool no_pk;
+  ulong explicit_used_fields= 0;
+  enum ha_extra_function extra_func= thd->locked_tables ? HA_EXTRA_NOT_USED
+                                                        : HA_EXTRA_FORCE_REOPEN;
   DBUG_ENTER("mysql_alter_table");
 
   /*
@@ -5825,19 +5945,21 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
   }
 
   /*
-   If this is an ALTER TABLE and no explicit row type specified reuse
-   the table's row type.
-   Note : this is the same as if the row type was specified explicitly.
+    If this is an ALTER TABLE and no explicit row type specified reuse
+    the table's row type.
+    Note: this is the same as if the row type was specified explicitly and
+    we must thus set HA_CREATE_USED_ROW_FORMAT!
   */
   if (create_info->row_type == ROW_TYPE_NOT_USED)
   {
     /* ALTER TABLE without explicit row type */
     create_info->row_type= table->s->row_type;
-  }
-  else
-  {
-    /* ALTER TABLE with specific row type */
-    create_info->used_fields |= HA_CREATE_USED_ROW_FORMAT;
+    /*
+      We have to mark the row type as used, as otherwise the engine may
+      change the row format in update_create_info().
+    */
+    create_info->used_fields|= HA_CREATE_USED_ROW_FORMAT;
+    explicit_used_fields|= HA_CREATE_USED_ROW_FORMAT;
   }
 
   DBUG_PRINT("info", ("old type: %s  new type: %s",
@@ -5859,13 +5981,13 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
     case LEAVE_AS_IS:
       break;
     case ENABLE:
-      if (wait_while_table_is_used(thd, table, HA_EXTRA_FORCE_REOPEN))
+      if (wait_while_table_is_used(thd, table, extra_func))
         goto err;
       DBUG_EXECUTE_IF("sleep_alter_enable_indexes", my_sleep(6000000););
       error= table->file->ha_enable_indexes(HA_KEY_SWITCH_NONUNIQ_SAVE);
       break;
     case DISABLE:
-      if (wait_while_table_is_used(thd, table, HA_EXTRA_FORCE_REOPEN))
+      if (wait_while_table_is_used(thd, table, extra_func))
         goto err;
       error=table->file->ha_disable_indexes(HA_KEY_SWITCH_NONUNIQ_SAVE);
       break;
@@ -5893,7 +6015,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
         simple rename did nothing and therefore we can safely return
         without additional clean-up.
       */
-      if (wait_while_table_is_used(thd, table, HA_EXTRA_FORCE_REOPEN))
+      if (wait_while_table_is_used(thd, table, extra_func))
         goto err;
       close_all_tables_for_name(thd, table->s, TRUE);
       /*
@@ -5991,6 +6113,9 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
   if (mysql_prepare_alter_table(thd, table, create_info, alter_info))
     goto err;
   
+  /* Remove markers set for update_create_info */
+  create_info->used_fields&= ~explicit_used_fields;
+
   if (need_copy_table == ALTER_TABLE_METADATA_ONLY)
     need_copy_table= alter_info->change_level;
 
@@ -6340,7 +6465,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
   else
   {
     if (!table->s->tmp_table &&
-        wait_while_table_is_used(thd, table, HA_EXTRA_FORCE_REOPEN))
+        wait_while_table_is_used(thd, table, extra_func))
       goto err_new_table_cleanup;
     thd_proc_info(thd, "manage keys");
     alter_table_manage_keys(table, table->file->indexes_are_disabled(),
@@ -6384,6 +6509,12 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
       /* Add the indexes. */
       if ((error= table->file->add_index(table, key_info, index_add_count)))
       {
+#warning fix the following
+#ifdef UNKNOWN_MONTY_ADDITION
+        /* Only report error if handler has not already reported an error */
+        if (!thd->main_da.is_error())
+#endif
+        {
         /*
           Exchange the key_info for the error message. If we exchange
           key number by key name in the message later, we need correct info.
@@ -6392,6 +6523,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
         table->key_info= key_info;
         table->file->print_error(error, MYF(0));
         table->key_info= save_key_info;
+        }
         goto err_new_table_cleanup;
       }
     }
@@ -6548,11 +6680,11 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
   else if (mysql_rename_table(new_db_type, new_db, tmp_name, new_db,
                               new_alias, FN_FROM_IS_TMP) ||
            ((new_name != table_name || new_db != db) && // we also do rename
-           (need_copy_table != ALTER_TABLE_METADATA_ONLY ||
-            mysql_rename_table(save_old_db_type, db, table_name, new_db,
-                               new_alias, NO_FRM_RENAME)) &&
-           Table_triggers_list::change_table_name(thd, db, table_name,
-                                                  new_db, new_alias)))
+            (need_copy_table != ALTER_TABLE_METADATA_ONLY ||
+             mysql_rename_table(save_old_db_type, db, table_name, new_db,
+                                new_alias, NO_FRM_RENAME)) &&
+            Table_triggers_list::change_table_name(thd, db, table_name,
+                                                   new_db, new_alias)))
   {
     /* Try to get everything back. */
     error=1;
@@ -6633,27 +6765,6 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
   if (write_bin_log(thd, TRUE, thd->query(), thd->query_length()))
     DBUG_RETURN(TRUE);
 
-  if (ha_check_storage_engine_flag(old_db_type, HTON_FLUSH_AFTER_RENAME))
-  {
-    /*
-      For the alter table to be properly flushed to the logs, we
-      have to open the new table.  If not, we get a problem on server
-      shutdown. But we do not need to attach MERGE children.
-    */
-    char path[FN_REFLEN];
-    TABLE *t_table;
-    build_table_filename(path + 1, sizeof(path) - 1, new_db, table_name, "", 0);
-    t_table= open_temporary_table(thd, path, new_db, tmp_name, 0);
-    if (t_table)
-    {
-      intern_close_table(t_table);
-      my_free(t_table);
-    }
-    else
-      sql_print_warning("Could not open table %s.%s after rename\n",
-                        new_db,table_name);
-    ha_flush_logs(old_db_type);
-  }
   table_list->table=0;				// For query cache
   query_cache_invalidate3(thd, table_list, 0);
 
@@ -6739,7 +6850,9 @@ err_with_mdl:
   thd->mdl_context.release_all_locks_for_name(mdl_ticket);
   DBUG_RETURN(TRUE);
 }
-/* mysql_alter_table */
+
+
+/* Copy all rows from one table to another */
 
 
 
@@ -6799,9 +6912,9 @@ copy_data_between_tables(TABLE *from,TABLE *to,
                          enum enum_enable_or_disable keys_onoff,
                          bool error_if_not_empty)
 {
-  int error;
-  Copy_field *copy,*copy_end;
-  ulong found_count,delete_count;
+  int error= 1, errpos= 0;
+  Copy_field *copy= NULL, *copy_end;
+  ha_rows found_count= 0, delete_count= 0;
   THD *thd= current_thd;
   uint length= 0;
   SORT_FIELD *sortorder;
@@ -6811,18 +6924,22 @@ copy_data_between_tables(TABLE *from,TABLE *to,
   List<Item>   all_fields;
   ha_rows examined_rows;
   bool auto_increment_field_copied= 0;
-  ulong save_sql_mode;
+  ulong save_sql_mode= thd->variables.sql_mode;
   ulonglong prev_insert_id;
+  List_iterator<Create_field> it(create);
+  Create_field *def;
   DBUG_ENTER("copy_data_between_tables");
 
   if (mysql_trans_prepare_alter_copy_data(thd))
-    DBUG_RETURN(-1);
-  
+    goto err;
+  errpos=1;
+
   if (!(copy= new Copy_field[to->s->fields]))
-    DBUG_RETURN(-1);				/* purecov: inspected */
+    goto err;		/* purecov: inspected */
 
   if (to->file->ha_external_lock(thd, F_WRLCK))
-    DBUG_RETURN(-1);
+    goto err;
+  errpos= 2;
 
   /* We need external lock before we can disable/enable keys */
   alter_table_manage_keys(to, from->file->indexes_are_disabled(), keys_onoff);
@@ -6834,11 +6951,8 @@ copy_data_between_tables(TABLE *from,TABLE *to,
 
   from->file->info(HA_STATUS_VARIABLE);
   to->file->ha_start_bulk_insert(from->file->stats.records);
+  errpos= 3;
 
-  save_sql_mode= thd->variables.sql_mode;
-
-  List_iterator<Create_field> it(create);
-  Create_field *def;
   copy_end=copy;
   for (Field **ptr=to->field ; *ptr ; ptr++)
   {
@@ -6862,8 +6976,6 @@ copy_data_between_tables(TABLE *from,TABLE *to,
 
   }
 
-  found_count=delete_count=0;
-
   if (order)
   {
     if (to->s->primary_key != MAX_KEY && to->file->primary_key_is_clustered())
@@ -6883,7 +6995,6 @@ copy_data_between_tables(TABLE *from,TABLE *to,
       tables.table= from;
       tables.alias= tables.table_name= from->s->table_name.str;
       tables.db= from->s->db.str;
-      error= 1;
 
       if (thd->lex->select_lex.setup_ref_array(thd, order_num) ||
           setup_order(thd, thd->lex->select_lex.ref_pointer_array,
@@ -6899,7 +7010,10 @@ copy_data_between_tables(TABLE *from,TABLE *to,
 
   /* Tell handler that we have values for all columns in the to table */
   to->use_all_columns();
-  init_read_record(&info, thd, from, (SQL_SELECT *) 0, 1, 1, FALSE);
+  to->mark_virtual_columns_for_write(TRUE);
+  if (init_read_record(&info, thd, from, (SQL_SELECT *) 0, 1, 1, FALSE))
+    goto err;
+  errpos= 4;
   if (ignore)
     to->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
   thd->warning_info->reset_current_row_for_warning();
@@ -6912,6 +7026,7 @@ copy_data_between_tables(TABLE *from,TABLE *to,
       error= 1;
       break;
     }
+    update_virtual_fields(thd, from);
     /* Return error if source table isn't empty. */
     if (error_if_not_empty)
     {
@@ -6931,6 +7046,12 @@ copy_data_between_tables(TABLE *from,TABLE *to,
       copy_ptr->do_copy(copy_ptr);
     }
     prev_insert_id= to->file->next_insert_id;
+    update_virtual_fields(thd, to, TRUE);
+    if (thd->is_error())
+    {
+      error= 1;
+      break;
+    }
     error=to->file->ha_write_row(to->record[0]);
     to->auto_increment_field_not_null= FALSE;
     if (error)
@@ -6963,28 +7084,31 @@ copy_data_between_tables(TABLE *from,TABLE *to,
       found_count++;
     thd->warning_info->inc_current_row_for_warning();
   }
-  end_read_record(&info);
+
+err:
+  if (errpos >= 4)
+    end_read_record(&info);
   free_io_cache(from);
-  delete [] copy;				// This is never 0
+  delete [] copy;
 
-  if (to->file->ha_end_bulk_insert() && error <= 0)
+  if (error > 0)
+    to->file->extra(HA_EXTRA_PREPARE_FOR_DROP);
+  if (errpos >= 3 && to->file->ha_end_bulk_insert() && error <= 0)
   {
     to->file->print_error(my_errno,MYF(0));
     error= 1;
   }
   to->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY);
 
-  if (mysql_trans_commit_alter_copy_data(thd))
+  if (errpos >= 1 && mysql_trans_commit_alter_copy_data(thd))
     error= 1;
 
- err:
   thd->variables.sql_mode= save_sql_mode;
   thd->abort_on_warning= 0;
-  free_io_cache(from);
   *copied= found_count;
   *deleted=delete_count;
   to->file->ha_release_auto_increment();
-  if (to->file->ha_external_lock(thd,F_UNLCK))
+  if (errpos >= 2 && to->file->ha_external_lock(thd,F_UNLCK))
     error=1;
   if (error < 0 && to->file->extra(HA_EXTRA_PREPARE_FOR_RENAME))
     error= 1;
@@ -7054,7 +7178,7 @@ bool mysql_checksum_table(THD *thd, TABLE_LIST *tables,
   /* Open one table after the other to keep lock time as short as possible. */
   for (table= tables; table; table= table->next_local)
   {
-    char table_name[NAME_LEN*2+2];
+    char table_name[SAFE_NAME_LEN*2+2];
     TABLE *t;
 
     strxmov(table_name, table->db ,".", table->table_name, NullS);
@@ -7073,11 +7197,14 @@ bool mysql_checksum_table(THD *thd, TABLE_LIST *tables,
     }
     else
     {
-      if (t->file->ha_table_flags() & HA_HAS_CHECKSUM &&
-	  !(check_opt->flags & T_EXTEND))
+      /* Call ->checksum() if the table checksum matches 'old_mode' settings */
+      if (!(check_opt->flags & T_EXTEND) &&
+          (((t->file->ha_table_flags() & HA_HAS_OLD_CHECKSUM) &&
+            thd->variables.old_mode) ||
+           ((t->file->ha_table_flags() & HA_HAS_NEW_CHECKSUM) &&
+            !thd->variables.old_mode)))
 	protocol->store((ulonglong)t->file->checksum());
-      else if (!(t->file->ha_table_flags() & HA_HAS_CHECKSUM) &&
-	       (check_opt->flags & T_QUICK))
+      else if (check_opt->flags & T_QUICK)
 	protocol->store_null();
       else
       {
@@ -7104,7 +7231,7 @@ bool mysql_checksum_table(THD *thd, TABLE_LIST *tables,
               goto err;
             }
 	    ha_checksum row_crc= 0;
-            int error= t->file->rnd_next(t->record[0]);
+            int error= t->file->ha_rnd_next(t->record[0]);
             if (unlikely(error))
             {
               if (error == HA_ERR_RECORD_DELETED)
@@ -7125,6 +7252,9 @@ bool mysql_checksum_table(THD *thd, TABLE_LIST *tables,
 	    {
 	      Field *f= t->field[i];
 
+              if (! thd->variables.old_mode &&
+                  f->is_real_null(0))
+                continue;
              /*
                BLOB and VARCHAR have pointers in their field, we must convert
                to string; GEOMETRY is implemented on top of BLOB.
@@ -7200,7 +7330,7 @@ static bool check_engine(THD *thd, const char *table_name,
     if (create_info->used_fields & HA_CREATE_USED_ENGINE)
     {
       my_error(ER_ILLEGAL_HA_CREATE_OPTION, MYF(0),
-               ha_resolve_storage_engine_name(*new_engine), "TEMPORARY");
+               hton_name(*new_engine)->str, "TEMPORARY");
       *new_engine= 0;
       return TRUE;
     }
diff --git a/sql/sql_table.h b/sql/sql_table.h
index eb0b1aa94dd..bc32235aaff 100644
--- a/sql/sql_table.h
+++ b/sql/sql_table.h
@@ -108,9 +108,6 @@ enum enum_explain_filename_mode
 /* depends on errmsg.txt Database `db`, Table `t` ... */
 #define EXPLAIN_FILENAME_MAX_EXTRA_LENGTH 63
 
-#define MYSQL50_TABLE_NAME_PREFIX         "#mysql50#"
-#define MYSQL50_TABLE_NAME_PREFIX_LENGTH  9
-
 #define WFRM_WRITE_SHADOW 1
 #define WFRM_INSTALL_SHADOW 2
 #define WFRM_PACK_FRM 4
diff --git a/sql/sql_tablespace.cc b/sql/sql_tablespace.cc
index 18650c11c09..47b5f6c26fc 100644
--- a/sql/sql_tablespace.cc
+++ b/sql/sql_tablespace.cc
@@ -38,7 +38,7 @@ int mysql_alter_tablespace(THD *thd, st_alter_tablespace *ts_info)
       push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
                           ER_WARN_USING_OTHER_HANDLER,
                           ER(ER_WARN_USING_OTHER_HANDLER),
-                          ha_resolve_storage_engine_name(hton),
+                          hton_name(hton)->str,
                           ts_info->tablespace_name ? ts_info->tablespace_name
                                                 : ts_info->logfile_group_name);
   }
@@ -47,13 +47,14 @@ int mysql_alter_tablespace(THD *thd, st_alter_tablespace *ts_info)
   {
     if ((error= hton->alter_tablespace(hton, thd, ts_info)))
     {
-      if (error == HA_ADMIN_NOT_IMPLEMENTED)
+      if (error == 1)
       {
-        my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "");
+        DBUG_RETURN(1);
       }
-      else if (error == 1)
+
+      if (error == HA_ADMIN_NOT_IMPLEMENTED)
       {
-        DBUG_RETURN(1);
+        my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "");
       }
       else
       {
@@ -67,7 +68,7 @@ int mysql_alter_tablespace(THD *thd, st_alter_tablespace *ts_info)
     push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
                         ER_ILLEGAL_HA_CREATE_OPTION,
                         ER(ER_ILLEGAL_HA_CREATE_OPTION),
-                        ha_resolve_storage_engine_name(hton),
+                        hton_name(hton)->str,
                         "TABLESPACE or LOGFILE GROUP");
   }
   error= write_bin_log(thd, FALSE, thd->query(), thd->query_length());
diff --git a/sql/sql_test.cc b/sql/sql_test.cc
index 9d0614f8529..9dda1113294 100644
--- a/sql/sql_test.cc
+++ b/sql/sql_test.cc
@@ -238,6 +238,44 @@ TEST_join(JOIN *join)
 }
 
 
+#define FT_KEYPART   (MAX_REF_PARTS+10)
+
+void print_keyuse(KEYUSE *keyuse)
+{
+  char buff[256];
+  char buf2[64]; 
+  const char *fieldname;
+  String str(buff,(uint32) sizeof(buff), system_charset_info);
+  str.length(0);
+  keyuse->val->print(&str, QT_ORDINARY);
+  str.append('\0');
+  if (keyuse->keypart == FT_KEYPART)
+    fieldname= "FT_KEYPART";
+  else
+    fieldname= keyuse->table->key_info[keyuse->key].key_part[keyuse->keypart].field->field_name;
+  longlong2str(keyuse->used_tables, buf2, 16, 0); 
+  DBUG_LOCK_FILE;
+  fprintf(DBUG_FILE, "KEYUSE: %s.%s=%s  optimize= %d used_tables=%s "
+          "ref_table_rows= %lu keypart_map= %0lx\n",
+          keyuse->table->alias, fieldname, str.ptr(),
+          keyuse->optimize, buf2, (ulong)keyuse->ref_table_rows, 
+          keyuse->keypart_map);
+  DBUG_UNLOCK_FILE;
+  //key_part_map keypart_map; --?? there can be several? 
+}
+
+
+/* purecov: begin inspected */
+void print_keyuse_array(DYNAMIC_ARRAY *keyuse_array)
+{
+  DBUG_LOCK_FILE;
+  fprintf(DBUG_FILE, "KEYUSE array (%d elements)\n", keyuse_array->elements);
+  DBUG_UNLOCK_FILE;
+  for(uint i=0; i < keyuse_array->elements; i++)
+    print_keyuse((KEYUSE*)dynamic_array_ptr(keyuse_array, i));
+}
+
+
 /* 
   Print the current state during query optimization.
 
@@ -338,6 +376,27 @@ print_plan(JOIN* join, uint idx, double record_count, double read_time,
   DBUG_UNLOCK_FILE;
 }
 
+
+void print_sjm(SJ_MATERIALIZATION_INFO *sjm)
+{
+  DBUG_LOCK_FILE;
+  fprintf(DBUG_FILE, "\nsemi-join nest{\n");
+  fprintf(DBUG_FILE, "  tables { \n");
+  for (uint i= 0;i < sjm->tables; i++)
+  {
+    fprintf(DBUG_FILE, "    %s%s\n", 
+            sjm->positions[i].table->table->alias,
+            (i == sjm->tables -1)? "": ",");
+  }
+  fprintf(DBUG_FILE, "  }\n");
+  fprintf(DBUG_FILE, "  materialize_cost= %g\n",
+          sjm->materialization_cost.total_cost());
+  fprintf(DBUG_FILE, "  rows= %g\n", sjm->rows);
+  fprintf(DBUG_FILE, "}\n");
+  DBUG_UNLOCK_FILE;
+}
+/* purecov: end */
+
 #endif
 
 C_MODE_START
@@ -471,11 +530,15 @@ static int print_key_cache_status(const char *name, KEY_CACHE *key_cache)
   }
   else
   {
+    KEY_CACHE_STATISTICS stats;
+    get_key_cache_statistics(key_cache, 0, &stats);
+
     printf("%s\n\
 Buffer_size:    %10lu\n\
 Block_size:     %10lu\n\
 Division_limit: %10lu\n\
-Age_limit:      %10lu\n\
+Age_threshold:  %10lu\n\
+Partitions:     %10lu\n\
 blocks used:    %10lu\n\
 not flushed:    %10lu\n\
 w_requests:     %10s\n\
@@ -483,15 +546,17 @@ writes:         %10s\n\
 r_requests:     %10s\n\
 reads:          %10s\n\n",
 	   name,
-	   (ulong) key_cache->param_buff_size,
+	   (ulong)key_cache->param_buff_size,
            (ulong)key_cache->param_block_size,
 	   (ulong)key_cache->param_division_limit,
            (ulong)key_cache->param_age_threshold,
-	   key_cache->blocks_used,key_cache->global_blocks_changed,
-	   llstr(key_cache->global_cache_w_requests,llbuff1),
-           llstr(key_cache->global_cache_write,llbuff2),
-	   llstr(key_cache->global_cache_r_requests,llbuff3),
-           llstr(key_cache->global_cache_read,llbuff4));
+           key_cache->param_partitions,
+	   (ulong)stats.blocks_used,
+           (ulong)stats.blocks_changed,
+	   llstr(stats.write_requests,llbuff1),
+           llstr(stats.writes,llbuff2),
+	   llstr(stats.read_requests,llbuff3),
+           llstr(stats.reads,llbuff4));
   }
   return 0;
 }
diff --git a/sql/sql_test.h b/sql/sql_test.h
index d7fcc126cb2..82d4aad838a 100644
--- a/sql/sql_test.h
+++ b/sql/sql_test.h
@@ -30,6 +30,8 @@ void TEST_filesort(SORT_FIELD *sortorder,uint s_length);
 void TEST_join(JOIN *join);
 void print_plan(JOIN* join,uint idx, double record_count, double read_time,
                 double current_read_time, const char *info);
+void print_keyuse_array(DYNAMIC_ARRAY *keyuse_array);
+void print_sjm(SJ_MATERIALIZATION_INFO *sjm);
 void dump_TABLE_LIST_graph(SELECT_LEX *select_lex, TABLE_LIST* tl);
 #endif
 void mysql_print_status();
diff --git a/sql/sql_time.cc b/sql/sql_time.cc
index 9606304ea16..de3bd35b46c 100644
--- a/sql/sql_time.cc
+++ b/sql/sql_time.cc
@@ -630,7 +630,7 @@ bool parse_date_time_format(timestamp_type format_type,
       return 0;
     break;
   default:
-    DBUG_ASSERT(1);
+    DBUG_ASSERT(0);
     break;
   }
   return 1;					// Error
diff --git a/sql/sql_trigger.cc b/sql/sql_trigger.cc
index f0f9011f61f..5c158bae398 100644
--- a/sql/sql_trigger.cc
+++ b/sql/sql_trigger.cc
@@ -1417,7 +1417,7 @@ bool Table_triggers_list::check_n_load(THD *thd, const char *db,
           To remove this prefix we use check_n_cut_mysql50_prefix().
         */
 
-        char fname[NAME_LEN + 1];
+        char fname[SAFE_NAME_LEN + 1];
         DBUG_ASSERT((!my_strcasecmp(table_alias_charset, lex.query_tables->db, db) ||
                      (check_n_cut_mysql50_prefix(db, fname, sizeof(fname)) &&
                       !my_strcasecmp(table_alias_charset, lex.query_tables->db, fname))) &&
@@ -1934,7 +1934,7 @@ bool Table_triggers_list::change_table_name(THD *thd, const char *db,
     */
     if (my_strcasecmp(table_alias_charset, db, new_db))
     {
-      char dbname[NAME_LEN + 1];
+      char dbname[SAFE_NAME_LEN + 1];
       if (check_n_cut_mysql50_prefix(db, dbname, sizeof(dbname)) && 
           !my_strcasecmp(table_alias_charset, dbname, new_db))
       {
diff --git a/sql/sql_truncate.cc b/sql/sql_truncate.cc
index a61abdafe3e..99f3808c98e 100644
--- a/sql/sql_truncate.cc
+++ b/sql/sql_truncate.cc
@@ -328,6 +328,7 @@ static bool open_and_lock_table_for_truncate(THD *thd, TABLE_LIST *table_ref,
           upgrade_shared_lock_to_exclusive(table_ref->mdl_request.ticket,
                                            timeout))
         DBUG_RETURN(TRUE);
+
       tdc_remove_table(thd, TDC_RT_REMOVE_ALL, table_ref->db,
                        table_ref->table_name, FALSE);
     }
diff --git a/sql/sql_udf.cc b/sql/sql_udf.cc
index ba1d0ceadeb..605b2367f48 100644
--- a/sql/sql_udf.cc
+++ b/sql/sql_udf.cc
@@ -178,7 +178,13 @@ void udf_init()
   }
 
   table= tables.table;
-  init_read_record(&read_record_info, new_thd, table, NULL,1,0,FALSE);
+  if (init_read_record(&read_record_info, new_thd, table, NULL,1,0,FALSE))
+  {
+    sql_print_error("Could not initialize init_read_record; udf's not "
+                    "loaded");
+    goto end;
+  }
+
   table->use_all_columns();
   while (!(error= read_record_info.read_record(&read_record_info)))
   {
@@ -235,7 +241,7 @@ void udf_init()
     }
     tmp->dlhandle = dl;
     {
-      char buf[NAME_LEN+16], *missing;
+      char buf[SAFE_NAME_LEN+16], *missing;
       if ((missing= init_syms(tmp, buf)))
       {
         sql_print_error(ER(ER_CANT_FIND_DL_ENTRY), missing);
@@ -489,7 +495,7 @@ int mysql_create_function(THD *thd,udf_func *udf)
   }
   udf->dlhandle=dl;
   {
-    char buf[NAME_LEN+16], *missing;
+    char buf[SAFE_NAME_LEN+16], *missing;
     if ((missing= init_syms(udf, buf)))
     {
       my_error(ER_CANT_FIND_DL_ENTRY, MYF(0), missing);
@@ -606,10 +612,10 @@ int mysql_drop_function(THD *thd,const LEX_STRING *udf_name)
     goto err;
   table->use_all_columns();
   table->field[0]->store(exact_name_str, exact_name_len, &my_charset_bin);
-  if (!table->file->index_read_idx_map(table->record[0], 0,
-                                       (uchar*) table->field[0]->ptr,
-                                       HA_WHOLE_KEY,
-                                       HA_READ_KEY_EXACT))
+  if (!table->file->ha_index_read_idx_map(table->record[0], 0,
+                                          (uchar*) table->field[0]->ptr,
+                                          HA_WHOLE_KEY,
+                                          HA_READ_KEY_EXACT))
   {
     int error;
     if ((error = table->file->ha_delete_row(table->record[0])))
diff --git a/sql/sql_union.cc b/sql/sql_union.cc
index acc0f704c44..faf0a81c40d 100644
--- a/sql/sql_union.cc
+++ b/sql/sql_union.cc
@@ -60,15 +60,18 @@ bool select_union::send_data(List<Item> &values)
     unit->offset_limit_cnt--;
     return 0;
   }
-  fill_record(thd, table->field, values, 1);
+  fill_record(thd, table->field, values, TRUE, FALSE);
   if (thd->is_error())
     return 1;
 
   if ((error= table->file->ha_write_row(table->record[0])))
   {
-    /* create_myisam_from_heap will generate error if needed */
+    /* create_internal_tmp_table_from_heap will generate error if needed */
     if (table->file->is_fatal_error(error, HA_CHECK_DUP) &&
-        create_myisam_from_heap(thd, table, &tmp_table_param, error, 1))
+        create_internal_tmp_table_from_heap(thd, table,
+                                            tmp_table_param.start_recinfo, 
+                                            &tmp_table_param.recinfo, error,
+                                            1))
       return 1;
   }
   return 0;
@@ -103,6 +106,8 @@ bool select_union::flush()
       is_union_distinct  if set, the temporary table will eliminate
                          duplicates on insert
       options            create options
+      table_alias        name of the temporary table
+      bit_fields_as_long convert bit fields to ulonglong
 
   DESCRIPTION
     Create a temporary table that is used to store the result of a UNION,
@@ -116,11 +121,13 @@ bool select_union::flush()
 bool
 select_union::create_result_table(THD *thd_arg, List<Item> *column_types,
                                   bool is_union_distinct, ulonglong options,
-                                  const char *alias)
+                                  const char *alias,
+                                   bool bit_fields_as_long)
 {
   DBUG_ASSERT(table == 0);
   tmp_table_param.init();
   tmp_table_param.field_count= column_types->elements;
+  tmp_table_param.bit_fields_as_long= bit_fields_as_long;
 
   if (! (table= create_tmp_table(thd_arg, &tmp_table_param, *column_types,
                                  (ORDER*) 0, is_union_distinct, 1,
@@ -132,6 +139,22 @@ select_union::create_result_table(THD *thd_arg, List<Item> *column_types,
 }
 
 
+/**
+  Reset and empty the temporary table that stores the materialized query result.
+
+  @note The cleanup performed here is exactly the same as for the two temp
+  tables of JOIN - exec_tmp_table_[1 | 2].
+*/
+
+void select_union::cleanup()
+{
+  table->file->extra(HA_EXTRA_RESET_STATE);
+  table->file->ha_delete_all_rows();
+  free_io_cache(table);
+  filesort_free_buffers(table,0);
+}
+
+
 /*
   initialization procedures before fake_select_lex preparation()
 
@@ -378,7 +401,7 @@ bool st_select_lex_unit::prepare(THD *thd_arg, select_result *sel_result,
       create_options= create_options | TMP_TABLE_FORCE_MYISAM;
 
     if (union_result->create_result_table(thd, &types, test(union_distinct),
-                                          create_options, ""))
+                                          create_options, "", FALSE))
       goto err;
     bzero((char*) &result_table_list, sizeof(result_table_list));
     result_table_list.db= (char*) "";
@@ -742,8 +765,8 @@ void st_select_lex_unit::reinit_exec_mechanism()
     TRUE  - error
 */
 
-bool st_select_lex_unit::change_result(select_subselect *new_result,
-                                       select_subselect *old_result)
+bool st_select_lex_unit::change_result(select_result_interceptor *new_result,
+                                       select_result_interceptor *old_result)
 {
   bool res= FALSE;
   for (SELECT_LEX *sl= first_select(); sl; sl= sl->next_select())
diff --git a/sql/sql_update.cc b/sql/sql_update.cc
index 68440f6623a..76c90e40b17 100644
--- a/sql/sql_update.cc
+++ b/sql/sql_update.cc
@@ -46,12 +46,12 @@
 
 bool compare_record(TABLE *table)
 {
-  if (table->s->blob_fields + table->s->varchar_fields == 0)
+  if (table->s->can_cmp_whole_record)
     return cmp_record(table,record[1]);
   /* Compare null bits */
   if (memcmp(table->null_flags,
 	     table->null_flags+table->s->rec_buff_length,
-	     table->s->null_bytes))
+	     table->s->null_bytes_for_compare))
     return TRUE;				// Diff in NULL value
   /* Compare updated fields */
   for (Field **ptr= table->field ; *ptr ; ptr++)
@@ -160,7 +160,7 @@ static void prepare_record_for_error_message(int error, TABLE *table)
   /* Tell the engine about the new set. */
   table->file->column_bitmaps_signal();
   /* Read record that is identified by table->file->ref. */
-  (void) table->file->rnd_pos(table->record[1], table->file->ref);
+  (void) table->file->ha_rnd_pos(table->record[1], table->file->ref);
   /* Copy the newly read columns into the new record. */
   for (field_p= table->field; (field= *field_p); field_p++)
     if (bitmap_is_set(&unique_map, field->field_index))
@@ -285,8 +285,7 @@ int mysql_update(THD *thd,
       table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET;
     else
     {
-      if (table->timestamp_field_type == TIMESTAMP_AUTO_SET_ON_UPDATE ||
-          table->timestamp_field_type == TIMESTAMP_AUTO_SET_ON_BOTH)
+      if (((uint) table->timestamp_field_type) & TIMESTAMP_AUTO_SET_ON_UPDATE)
         bitmap_set_bit(table->write_set,
                        table->timestamp_field->field_index);
     }
@@ -320,10 +319,8 @@ int mysql_update(THD *thd,
     update force the table handler to retrieve write-only fields to be able
     to compare records and detect data change.
   */
-  if (table->file->ha_table_flags() & HA_PARTIAL_COLUMN_READ &&
-      table->timestamp_field &&
-      (table->timestamp_field_type == TIMESTAMP_AUTO_SET_ON_UPDATE ||
-       table->timestamp_field_type == TIMESTAMP_AUTO_SET_ON_BOTH))
+  if ((table->file->ha_table_flags() & HA_PARTIAL_COLUMN_READ) &&
+      (((uint) table->timestamp_field_type) & TIMESTAMP_AUTO_SET_ON_UPDATE))
     bitmap_union(table->read_set, table->write_set);
   // Don't count on usage of 'only index' when calculating which key to use
   table->covering_keys.clear_all();
@@ -467,7 +464,10 @@ int mysql_update(THD *thd,
       */
 
       if (used_index == MAX_KEY || (select && select->quick))
-        init_read_record(&info, thd, table, select, 0, 1, FALSE);
+      {
+        if (init_read_record(&info, thd, table, select, 0, 1, FALSE))
+          goto err;
+      }
       else
         init_read_record_idx(&info, thd, table, 1, used_index, reverse);
 
@@ -476,15 +476,9 @@ int mysql_update(THD *thd,
 
       while (!(error=info.read_record(&info)) && !thd->killed)
       {
+        update_virtual_fields(thd, table);
         thd->examined_row_count++;
-        bool skip_record= FALSE;
-        if (select && select->skip_record(thd, &skip_record))
-        {
-          error= 1;
-          table->file->unlock_row();
-          break;
-        }
-        if (!skip_record)
+	if (!select || (error= select->skip_record(thd)) > 0)
 	{
           if (table->file->was_semi_consistent_read())
 	    continue;  /* repeat the read of the same row if it still exists */
@@ -503,7 +497,15 @@ int mysql_update(THD *thd,
 	  }
 	}
 	else
+        {
 	  table->file->unlock_row();
+          if (error < 0)
+          {
+            /* Fatal error from select->skip_record() */
+            error= 1;
+            break;
+          }
+        }
       }
       if (thd->killed && !error)
 	error= 1;				// Aborted
@@ -541,7 +543,8 @@ int mysql_update(THD *thd,
   if (select && select->quick && select->quick->reset())
     goto err;
   table->file->try_semi_consistent_read(1);
-  init_read_record(&info, thd, table, select, 0, 1, FALSE);
+  if (init_read_record(&info, thd, table, select, 0, 1, FALSE))
+    goto err;
 
   updated= found= 0;
   /*
@@ -590,9 +593,9 @@ int mysql_update(THD *thd,
 
   while (!(error=info.read_record(&info)) && !thd->killed)
   {
+    update_virtual_fields(thd, table);
     thd->examined_row_count++;
-    bool skip_record;
-    if (!select || (!select->skip_record(thd, &skip_record) && !skip_record))
+    if (!select || select->skip_record(thd) > 0)
     {
       if (table->file->was_semi_consistent_read())
         continue;  /* repeat the read of the same row if it still exists */
@@ -860,7 +863,7 @@ int mysql_update(THD *thd,
 err:
   delete select;
   free_underlaid_joins(thd, select_lex);
-  table->set_keyread(FALSE);
+  table->disable_keyread();
   thd->abort_on_warning= 0;
   DBUG_RETURN(1);
 }
@@ -1277,10 +1280,9 @@ int multi_update::prepare(List<Item> &not_used_values,
         update force the table handler to retrieve write-only fields to be able
         to compare records and detect data change.
         */
-      if (table->file->ha_table_flags() & HA_PARTIAL_COLUMN_READ &&
-          table->timestamp_field &&
-          (table->timestamp_field_type == TIMESTAMP_AUTO_SET_ON_UPDATE ||
-           table->timestamp_field_type == TIMESTAMP_AUTO_SET_ON_BOTH))
+      if ((table->file->ha_table_flags() & HA_PARTIAL_COLUMN_READ) &&
+          (((uint) table->timestamp_field_type) &
+           TIMESTAMP_AUTO_SET_ON_UPDATE))
         bitmap_union(table->read_set, table->write_set);
     }
   }
@@ -1765,15 +1767,17 @@ bool multi_update::send_data(List<Item> &not_used_values)
       /* Store regular updated fields in the row. */
       fill_record(thd,
                   tmp_table->field + 1 + unupdated_check_opt_tables.elements,
-                  *values_for_table[offset], 1);
+                  *values_for_table[offset], TRUE, FALSE);
 
       /* Write row, ignoring duplicated updates to a row */
       error= tmp_table->file->ha_write_row(tmp_table->record[0]);
       if (error != HA_ERR_FOUND_DUPP_KEY && error != HA_ERR_FOUND_DUPP_UNIQUE)
       {
         if (error &&
-            create_myisam_from_heap(thd, tmp_table,
-                                    tmp_table_param + offset, error, 1))
+            create_internal_tmp_table_from_heap(thd, tmp_table,
+                                         tmp_table_param[offset].start_recinfo,
+                                         &tmp_table_param[offset].recinfo,
+                                         error, 1))
         {
           do_update= 0;
           DBUG_RETURN(1);			// Not a table_is_full error
@@ -1851,7 +1855,7 @@ int multi_update::do_updates()
   TABLE_LIST *cur_table;
   int local_error= 0;
   ha_rows org_updated;
-  TABLE *table, *tmp_table;
+  TABLE *table, *tmp_table, *err_table;
   List_iterator_fast<TABLE> check_opt_it(unupdated_check_opt_tables);
   DBUG_ENTER("multi_update::do_updates");
 
@@ -1869,14 +1873,21 @@ int multi_update::do_updates()
     org_updated= updated;
     tmp_table= tmp_tables[cur_table->shared];
     tmp_table->file->extra(HA_EXTRA_CACHE);	// Change to read cache
-    (void) table->file->ha_rnd_init(0);
+    if ((local_error= table->file->ha_rnd_init(0)))
+    {
+      err_table= table;
+      goto err;
+    }
     table->file->extra(HA_EXTRA_NO_CACHE);
 
     check_opt_it.rewind();
     while(TABLE *tbl= check_opt_it++)
     {
-      if (tbl->file->ha_rnd_init(1))
+      if ((local_error= tbl->file->ha_rnd_init(1)))
+      {
+        err_table= tbl;
         goto err;
+      }
       tbl->file->extra(HA_EXTRA_CACHE);
     }
 
@@ -1884,9 +1895,11 @@ int multi_update::do_updates()
       Setup copy functions to copy fields from temporary table
     */
     List_iterator_fast<Item> field_it(*fields_for_table[offset]);
-    Field **field= tmp_table->field + 
-                   1 + unupdated_check_opt_tables.elements; // Skip row pointers
+    Field **field;
     Copy_field *copy_field_ptr= copy_field, *copy_field_end;
+
+    /* Skip row pointers */
+    field= tmp_table->field + 1 + unupdated_check_opt_tables.elements;
     for ( ; *field ; field++)
     {
       Item_field *item= (Item_field* ) field_it++;
@@ -1894,8 +1907,11 @@ int multi_update::do_updates()
     }
     copy_field_end=copy_field_ptr;
 
-    if ((local_error = tmp_table->file->ha_rnd_init(1)))
+    if ((local_error= tmp_table->file->ha_rnd_init(1)))
+    {
+      err_table= tmp_table;
       goto err;
+    }
 
     can_compare_record= (!(table->file->ha_table_flags() &
                            HA_PARTIAL_COLUMN_READ) ||
@@ -1905,13 +1921,17 @@ int multi_update::do_updates()
     for (;;)
     {
       if (thd->killed && trans_safe)
-	goto err;
-      if ((local_error=tmp_table->file->rnd_next(tmp_table->record[0])))
+      {
+        thd->fatal_error();
+	goto err2;
+      }
+      if ((local_error= tmp_table->file->ha_rnd_next(tmp_table->record[0])))
       {
 	if (local_error == HA_ERR_END_OF_FILE)
 	  break;
 	if (local_error == HA_ERR_RECORD_DELETED)
 	  continue;				// May happen on dup key
+        err_table= tmp_table;
 	goto err;
       }
 
@@ -1921,12 +1941,15 @@ int multi_update::do_updates()
       uint field_num= 0;
       do
       {
-        if((local_error=
-              tbl->file->rnd_pos(tbl->record[0],
-                                (uchar *) tmp_table->field[field_num]->ptr)))
+        if ((local_error=
+             tbl->file->ha_rnd_pos(tbl->record[0],
+                                   (uchar*) tmp_table->field[field_num]->ptr)))
+        {
+          err_table= tbl;
           goto err;
+        }
         field_num++;
-      } while((tbl= check_opt_it++));
+      } while ((tbl= check_opt_it++));
 
       table->status|= STATUS_UPDATED;
       store_record(table,record[1]);
@@ -1951,7 +1974,10 @@ int multi_update::do_updates()
           if (error == VIEW_CHECK_SKIP)
             continue;
           else if (error == VIEW_CHECK_ERROR)
-            goto err;
+          {
+            thd->fatal_error();
+            goto err2;
+          }
         }
 	if ((local_error=table->file->ha_update_row(table->record[1],
 						    table->record[0])) &&
@@ -1959,7 +1985,10 @@ int multi_update::do_updates()
 	{
 	  if (!ignore ||
               table->file->is_fatal_error(local_error, HA_CHECK_DUP_KEY))
+          {
+            err_table= table;
 	    goto err;
+          }
 	}
         if (local_error != HA_ERR_RECORD_IS_THE_SAME)
           updated++;
@@ -1994,8 +2023,8 @@ int multi_update::do_updates()
 
 err:
   {
-    prepare_record_for_error_message(local_error, table);
-    table->file->print_error(local_error,MYF(ME_FATALERROR));
+    prepare_record_for_error_message(local_error, err_table);
+    err_table->file->print_error(local_error,MYF(ME_FATALERROR));
   }
 
 err2:
diff --git a/sql/sql_view.cc b/sql/sql_view.cc
index 5fdf7b8d850..3d8b9323b2a 100644
--- a/sql/sql_view.cc
+++ b/sql/sql_view.cc
@@ -1069,10 +1069,12 @@ bool mysql_make_view(THD *thd, File_parser *parser, TABLE_LIST *table,
   bool parse_status;
   bool result, view_is_mergeable;
   TABLE_LIST *UNINIT_VAR(view_main_select_tables);
-
   DBUG_ENTER("mysql_make_view");
   DBUG_PRINT("info", ("table: 0x%lx (%s)", (ulong) table, table->table_name));
 
+  LINT_INIT(parse_status);
+  LINT_INIT(view_select);
+
   if (table->view)
   {
     /*
@@ -1210,7 +1212,7 @@ bool mysql_make_view(THD *thd, File_parser *parser, TABLE_LIST *table,
   table->view= lex= thd->lex= (LEX*) new(thd->mem_root) st_lex_local;
 
   {
-    char old_db_buf[NAME_LEN+1];
+    char old_db_buf[SAFE_NAME_LEN+1];
     LEX_STRING old_db= { old_db_buf, sizeof(old_db_buf) };
     bool dbchanged;
     Parser_state parser_state;
@@ -1236,7 +1238,7 @@ bool mysql_make_view(THD *thd, File_parser *parser, TABLE_LIST *table,
       + MODE_PIPES_AS_CONCAT          affect expression parsing
       + MODE_ANSI_QUOTES              affect expression parsing
       + MODE_IGNORE_SPACE             affect expression parsing
-      - MODE_NOT_USED                 not used :)
+      - MODE_IGNORE_BAD_TABLE_OPTIONS affect only CREATE/ALTER TABLE parsing
       * MODE_ONLY_FULL_GROUP_BY       affect execution
       * MODE_NO_UNSIGNED_SUBTRACTION  affect execution
       - MODE_NO_DIR_IN_CREATE         affect table creation only
diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
index 83e9078f5cb..608a105efaa 100644
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -57,6 +57,7 @@
 #include "sql_partition_admin.h"               // Alter_table_*_partition_stmt
 #include "sql_signal.h"
 #include "event_parse_data.h"
+#include "create_options.h"
 #include <myisam.h>
 #include <myisammrg.h>
 #include "keycaches.h"
@@ -70,6 +71,9 @@
 
 int yylex(void *yylval, void *yythd);
 
+const LEX_STRING null_lex_str= {0,0};
+const LEX_STRING empty_lex_str= { (char*) "", 0 };
+
 #define yyoverflow(A,B,C,D,E,F)               \
   {                                           \
     ulong val= *(F);                          \
@@ -707,6 +711,7 @@ static bool add_create_index_prepare (LEX *lex, Table_ident *table)
   lex->alter_info.flags= ALTER_ADD_INDEX;
   lex->col_list.empty();
   lex->change= NullS;
+  lex->option_list= NULL;
   return FALSE;
 }
 
@@ -716,7 +721,7 @@ static bool add_create_index (LEX *lex, Key::Keytype type,
 {
   Key *key;
   key= new Key(type, name, info ? info : &lex->key_create_info, generated, 
-               lex->col_list);
+               lex->col_list, lex->option_list);
   if (key == NULL)
     return TRUE;
 
@@ -755,6 +760,7 @@ static bool add_create_index (LEX *lex, Key::Keytype type,
   enum enum_tx_isolation tx_isolation;
   enum Cast_target cast_type;
   enum Item_udftype udf_type;
+  enum ha_choice choice;
   CHARSET_INFO *charset;
   thr_lock_type lock_type;
   interval_type interval, interval_time_st;
@@ -813,6 +819,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  ALGORITHM_SYM
 %token  ALL                           /* SQL-2003-R */
 %token  ALTER                         /* SQL-2003-R */
+%token  ALWAYS_SYM
 %token  ANALYZE_SYM
 %token  AND_AND_SYM                   /* OPERATOR */
 %token  AND_SYM                       /* SQL-2003-R */
@@ -864,6 +871,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  CIPHER_SYM
 %token  CLASS_ORIGIN_SYM              /* SQL-2003-N */
 %token  CLIENT_SYM
+%token  CLIENT_STATS_SYM
 %token  CLOSE_SYM                     /* SQL-2003-R */
 %token  COALESCE                      /* SQL-2003-N */
 %token  CODE_SYM
@@ -987,6 +995,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  FUNCTION_SYM                  /* SQL-2003-R */
 %token  GE
 %token  GENERAL
+%token  GENERATED_SYM
 %token  GEOMETRYCOLLECTION
 %token  GEOMETRY_SYM
 %token  GET_FORMAT                    /* MYSQL-FUNC */
@@ -1017,6 +1026,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  IMPORT
 %token  INDEXES
 %token  INDEX_SYM
+%token	INDEX_STATS_SYM
 %token  INFILE
 %token  INITIAL_SIZE_SYM
 %token  INNER_SYM                     /* SQL-2003-R */
@@ -1164,13 +1174,16 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  OWNER_SYM
 %token  PACK_KEYS_SYM
 %token  PAGE_SYM
+%token  PAGE_CHECKSUM_SYM
 %token  PARAM_MARKER
 %token  PARSER_SYM
+%token  PARSE_VCOL_EXPR_SYM
 %token  PARTIAL                       /* SQL-2003-N */
 %token  PARTITIONING_SYM
 %token  PARTITIONS_SYM
 %token  PARTITION_SYM                 /* SQL-2003-R */
 %token  PASSWORD
+%token  PERSISTENT_SYM
 %token  PHASE_SYM
 %token  PLUGINS_SYM
 %token  PLUGIN_SYM
@@ -1310,6 +1323,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  TABLES
 %token  TABLESPACE
 %token  TABLE_REF_PRIORITY
+%token  TABLE_STATS_SYM
 %token  TABLE_SYM                     /* SQL-2003-R */
 %token  TABLE_CHECKSUM_SYM
 %token  TABLE_NAME_SYM                /* SQL-2003-N */
@@ -1330,6 +1344,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  TO_SYM                        /* SQL-2003-R */
 %token  TRAILING                      /* SQL-2003-R */
 %token  TRANSACTION_SYM
+%token  TRANSACTIONAL_SYM
 %token  TRIGGERS_SYM
 %token  TRIGGER_SYM                   /* SQL-2003-R */
 %token  TRIM                          /* SQL-2003-N */
@@ -1357,6 +1372,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  UPGRADE_SYM
 %token  USAGE                         /* SQL-2003-N */
 %token  USER                          /* SQL-2003-R */
+%token  USER_STATS_SYM
 %token  USE_FRM
 %token  USE_SYM
 %token  USING                         /* SQL-2003-R */
@@ -1371,7 +1387,9 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  VARIANCE_SYM
 %token  VARYING                       /* SQL-2003-R */
 %token  VAR_SAMP_SYM
+%token  VIA_SYM
 %token  VIEW_SYM                      /* SQL-2003-N */
+%token  VIRTUAL_SYM
 %token  WAIT_SYM
 %token  WARNINGS
 %token  WEEK_SYM
@@ -1439,7 +1457,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
         table_option opt_if_not_exists opt_no_write_to_binlog
         opt_temporary all_or_any opt_distinct
         opt_ignore_leaves fulltext_options spatial_type union_option
-        start_transaction_opts
+        start_transaction_opts field_def
         union_opt select_derived_init option_type2
         opt_natural_language_mode opt_query_expansion
         opt_ev_status opt_ev_on_completion ev_on_completion opt_ev_comment
@@ -1457,6 +1475,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %type <ulonglong_number>
         ulonglong_num real_ulonglong_num size_number
 
+%type <choice> choice
+
 %type <lock_type>
         replace_lock_option opt_low_priority insert_lock_option load_data_lock
 
@@ -1599,6 +1619,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
         part_column_list
         server_def server_options_list server_option
         definer_opt no_definer definer
+        parse_vcol_expr vcol_opt_specifier vcol_opt_attribute
+        vcol_opt_attribute_list vcol_attribute
 END_OF_INPUT
 
 %type <NONE> call sp_proc_stmts sp_proc_stmts1 sp_proc_stmt
@@ -1733,6 +1755,7 @@ statement:
         | lock
         | optimize
         | keycache
+        | parse_vcol_expr
         | partition_entry
         | preload
         | prepare
@@ -2059,7 +2082,7 @@ create:
               push_warning_printf(YYTHD, MYSQL_ERROR::WARN_LEVEL_WARN,
                                   ER_WARN_USING_OTHER_HANDLER,
                                   ER(ER_WARN_USING_OTHER_HANDLER),
-                                  ha_resolve_storage_engine_name(lex->create_info.db_type),
+                                  hton_name(lex->create_info.db_type)->str,
                                   $5->table.str);
             }
             create_table_set_open_action_and_adjust_tables(lex);
@@ -2531,6 +2554,7 @@ sp_init_param:
 
             lex->interval_list.empty();
             lex->uint_geom_type= 0;
+            lex->vcol_info= 0;
           }
         ;
 
@@ -4251,7 +4275,11 @@ create2:
         ;
 
 create2a:
-          create_field_list ')' opt_create_table_options
+          create_field_list ')'
+          {
+            Lex->create_info.option_list= NULL;
+          }
+          opt_create_table_options
           opt_create_partitioning
           create3 {}
         |  opt_create_partitioning
@@ -5102,6 +5130,11 @@ create_table_option:
              Lex->create_info.table_options|= $3 ? HA_OPTION_CHECKSUM : HA_OPTION_NO_CHECKSUM;
              Lex->create_info.used_fields|= HA_CREATE_USED_CHECKSUM;
           }
+        | PAGE_CHECKSUM_SYM opt_equal choice
+          {
+            Lex->create_info.used_fields|= HA_CREATE_USED_PAGE_CHECKSUM;
+            Lex->create_info.page_checksum= $3;
+          }
         | DELAY_KEY_WRITE_SYM opt_equal ulong_num
           {
             Lex->create_info.table_options|= $3 ? HA_OPTION_DELAY_KEY_WRITE : HA_OPTION_NO_DELAY_KEY_WRITE;
@@ -5172,6 +5205,35 @@ create_table_option:
             Lex->create_info.used_fields|= HA_CREATE_USED_KEY_BLOCK_SIZE;
             Lex->create_info.key_block_size= $3;
           }
+        | TRANSACTIONAL_SYM opt_equal choice
+          {
+	    Lex->create_info.used_fields|= HA_CREATE_USED_TRANSACTIONAL;
+            Lex->create_info.transactional= $3;
+          }
+        | IDENT_sys equal TEXT_STRING_sys
+          {
+            new (YYTHD->mem_root)
+              engine_option_value($1, $3, true, &Lex->create_info.option_list,
+                                  &Lex->option_list_last);
+          }
+        | IDENT_sys equal ident
+          {
+            new (YYTHD->mem_root)
+              engine_option_value($1, $3, false, &Lex->create_info.option_list,
+                                  &Lex->option_list_last);
+          }
+        | IDENT_sys equal real_ulonglong_num
+          {
+            new (YYTHD->mem_root)
+              engine_option_value($1, $3, &Lex->create_info.option_list,
+                                  &Lex->option_list_last, YYTHD->mem_root);
+          }
+        | IDENT_sys equal DEFAULT
+          {
+            new (YYTHD->mem_root)
+              engine_option_value($1, &Lex->create_info.option_list,
+                                  &Lex->option_list_last);
+          }
         ;
 
 default_charset:
@@ -5253,6 +5315,7 @@ row_types:
         | COMPRESSED_SYM { $$= ROW_TYPE_COMPRESSED; }
         | REDUNDANT_SYM  { $$= ROW_TYPE_REDUNDANT; }
         | COMPACT_SYM    { $$= ROW_TYPE_COMPACT; }
+        | PAGE_SYM       { $$= ROW_TYPE_PAGE; }
         ;
 
 merge_insert_types:
@@ -5300,25 +5363,33 @@ column_def:
         ;
 
 key_def:
-          normal_key_type opt_ident key_alg '(' key_list ')' normal_key_options
+          normal_key_type opt_ident key_alg '(' key_list ')'
+          { Lex->option_list= NULL; }
+          normal_key_options
           {
             if (add_create_index (Lex, $1, $2))
               MYSQL_YYABORT;
           }
         | fulltext opt_key_or_index opt_ident init_key_options 
-            '(' key_list ')' fulltext_key_options
+            '(' key_list ')'
+          { Lex->option_list= NULL; }
+            fulltext_key_options
           {
             if (add_create_index (Lex, $1, $3))
               MYSQL_YYABORT;
           }
         | spatial opt_key_or_index opt_ident init_key_options 
-            '(' key_list ')' spatial_key_options
+            '(' key_list ')'
+          { Lex->option_list= NULL; }
+            spatial_key_options
           {
             if (add_create_index (Lex, $1, $3))
               MYSQL_YYABORT;
           }
         | opt_constraint constraint_key_type opt_ident key_alg
-          '(' key_list ')' normal_key_options
+          '(' key_list ')'
+          { Lex->option_list= NULL; }
+          normal_key_options
           {
             if (add_create_index (Lex, $2, $3.str ? $3 : $1))
               MYSQL_YYABORT;
@@ -5335,6 +5406,7 @@ key_def:
             if (key == NULL)
               MYSQL_YYABORT;
             lex->alter_info.key_list.push_back(key);
+            lex->option_list= NULL;
             if (add_create_index (lex, Key::MULTIPLE, $1.str ? $1 : $4,
                                   &default_key_create_info, 1))
               MYSQL_YYABORT;
@@ -5374,8 +5446,10 @@ field_spec:
             lex->default_value= lex->on_update_value= 0;
             lex->comment=null_lex_str;
             lex->charset=NULL;
+	    lex->vcol_info= 0;
+            lex->option_list= NULL;
           }
-          type opt_attribute
+          field_def
           {
             LEX *lex=Lex;
             if (add_field_to_list(lex->thd, &$1, (enum enum_field_types) $3,
@@ -5383,8 +5457,98 @@ field_spec:
                                   lex->default_value, lex->on_update_value, 
                                   &lex->comment,
                                   lex->change,&lex->interval_list,lex->charset,
-                                  lex->uint_geom_type))
+                                  lex->uint_geom_type,
+                                  lex->vcol_info, lex->option_list))
+              MYSQL_YYABORT;
+          }
+        ;
+
+field_def:
+          type opt_attribute {}
+        | type opt_generated_always AS '(' virtual_column_func ')'
+          vcol_opt_specifier
+          vcol_opt_attribute
+          {
+            $$= (enum enum_field_types)MYSQL_TYPE_VIRTUAL;
+            Lex->vcol_info->set_field_type((enum enum_field_types) $1);
+          }
+        ;
+
+opt_generated_always:
+          /* empty */
+        | GENERATED_SYM ALWAYS_SYM {}
+        ;
+
+vcol_opt_specifier:
+          /* empty */
+          {
+            Lex->vcol_info->set_stored_in_db_flag(FALSE);
+          }
+        | VIRTUAL_SYM
+          {
+            Lex->vcol_info->set_stored_in_db_flag(FALSE);
+          }
+        | PERSISTENT_SYM
+          {
+            Lex->vcol_info->set_stored_in_db_flag(TRUE);
+          }
+        ;
+
+vcol_opt_attribute:
+          /* empty */ {}
+        | vcol_opt_attribute_list {}
+        ;
+
+vcol_opt_attribute_list:
+          vcol_opt_attribute_list vcol_attribute {}
+        | vcol_attribute
+        ;
+
+vcol_attribute:
+          UNIQUE_SYM
+          {
+            LEX *lex=Lex;
+            lex->type|= UNIQUE_FLAG;
+            lex->alter_info.flags|= ALTER_ADD_INDEX;
+          }
+        | UNIQUE_SYM KEY_SYM
+          {
+            LEX *lex=Lex;
+            lex->type|= UNIQUE_KEY_FLAG;
+            lex->alter_info.flags|= ALTER_ADD_INDEX;
+          }
+        | COMMENT_SYM TEXT_STRING_sys { Lex->comment= $2; }
+        ;
+
+parse_vcol_expr:
+          PARSE_VCOL_EXPR_SYM '(' virtual_column_func ')'
+          {
+            /*
+              "PARSE_VCOL_EXPR" can only be used by the SQL server
+              when reading a '*.frm' file.
+              Prevent the end user from invoking this command.
+            */
+            if (!Lex->parse_vcol_expr)
+            {
+              my_message(ER_SYNTAX_ERROR, ER(ER_SYNTAX_ERROR), MYF(0));
               MYSQL_YYABORT;
+            }
+          }
+        ;
+
+virtual_column_func:
+          remember_name expr remember_end
+          {
+            Lex->vcol_info= new Virtual_column_info();
+            if (!Lex->vcol_info)
+            {
+              mem_alloc_error(sizeof(Virtual_column_info));
+              MYSQL_YYABORT;
+            }
+            uint expr_len= (uint)($3 - $1) - 1;
+            Lex->vcol_info->expr_str.str= (char* ) sql_memdup($1 + 1, expr_len);
+            Lex->vcol_info->expr_str.length= expr_len;
+            Lex->vcol_info->expr_item= $2;
           }
         ;
 
@@ -5715,6 +5879,29 @@ attribute:
               Lex->charset=$2;
             }
           }
+        | IDENT_sys equal TEXT_STRING_sys
+          {
+            new (YYTHD->mem_root)
+              engine_option_value($1, $3, true, &Lex->option_list,
+                                  &Lex->option_list_last);
+          }
+        | IDENT_sys equal ident
+          {
+            new (YYTHD->mem_root)
+              engine_option_value($1, $3, false, &Lex->option_list,
+                                  &Lex->option_list_last);
+          }
+        | IDENT_sys equal real_ulonglong_num
+          {
+            new (YYTHD->mem_root)
+              engine_option_value($1, $3, &Lex->option_list,
+                                  &Lex->option_list_last, YYTHD->mem_root);
+          }
+        | IDENT_sys equal DEFAULT
+          {
+            new (YYTHD->mem_root)
+              engine_option_value($1, &Lex->option_list, &Lex->option_list_last);
+          }
         ;
 
 
@@ -6084,6 +6271,29 @@ all_key_opt:
           KEY_BLOCK_SIZE opt_equal ulong_num
           { Lex->key_create_info.block_size= $3; }
 	| COMMENT_SYM TEXT_STRING_sys { Lex->key_create_info.comment= $2; }
+        | IDENT_sys equal TEXT_STRING_sys
+          {
+            new (YYTHD->mem_root)
+              engine_option_value($1, $3, true, &Lex->option_list,
+                                  &Lex->option_list_last);
+          }
+        | IDENT_sys equal ident
+          {
+            new (YYTHD->mem_root)
+              engine_option_value($1, $3, false, &Lex->option_list,
+                                  &Lex->option_list_last);
+          }
+        | IDENT_sys equal real_ulonglong_num
+          {
+            new (YYTHD->mem_root)
+              engine_option_value($1, $3, &Lex->option_list,
+                                  &Lex->option_list_last, YYTHD->mem_root);
+          }
+        | IDENT_sys equal DEFAULT
+          {
+            new (YYTHD->mem_root)
+              engine_option_value($1, &Lex->option_list, &Lex->option_list_last);
+          }
         ;
 
 normal_key_opt:
@@ -6615,6 +6825,7 @@ alter_list_item:
             LEX *lex=Lex;
             lex->change= $3.str;
             lex->alter_info.flags|= ALTER_CHANGE_COLUMN;
+            lex->option_list= NULL;
           }
           field_spec opt_place
           {
@@ -6628,8 +6839,10 @@ alter_list_item:
             lex->comment=null_lex_str;
             lex->charset= NULL;
             lex->alter_info.flags|= ALTER_CHANGE_COLUMN;
+	    lex->vcol_info= 0;
+            lex->option_list= NULL;
           }
-          type opt_attribute
+          field_def
           {
             LEX *lex=Lex;
             if (add_field_to_list(lex->thd,&$3,
@@ -6638,7 +6851,8 @@ alter_list_item:
                                   lex->default_value, lex->on_update_value,
                                   &lex->comment,
                                   $3.str, &lex->interval_list, lex->charset,
-                                  lex->uint_geom_type))
+                                  lex->uint_geom_type,
+                                  lex->vcol_info, lex->option_list))
               MYSQL_YYABORT;
           }
           opt_place
@@ -6748,8 +6962,7 @@ alter_list_item:
           }
         | create_table_options_space_separated
           {
-            LEX *lex=Lex;
-            lex->alter_info.flags|= ALTER_OPTIONS;
+            Lex->alter_info.flags|= ALTER_OPTIONS;
           }
         | FORCE_SYM
           {
@@ -6872,7 +7085,7 @@ slave_until:
           {
             LEX *lex=Lex;
             if (((lex->mi.log_file_name || lex->mi.pos) &&
-                (lex->mi.relay_log_name || lex->mi.relay_log_pos)) ||
+                 (lex->mi.relay_log_name || lex->mi.relay_log_pos)) ||
                 !((lex->mi.log_file_name && lex->mi.pos) ||
                   (lex->mi.relay_log_name && lex->mi.relay_log_pos)))
             {
@@ -7193,8 +7406,6 @@ cache_keys_spec:
           {
             Lex->select_lex.alloc_index_hints(YYTHD);
             Select->set_index_hint_type(INDEX_HINT_USE, 
-                                        old_mode ? 
-                                        INDEX_HINT_MASK_JOIN : 
                                         INDEX_HINT_MASK_ALL);
           }
           cache_key_list_or_empty
@@ -8708,7 +8919,7 @@ function_call_generic:
 
             builder= find_qualified_function_builder(thd);
             DBUG_ASSERT(builder);
-            item= builder->create(thd, $1, $3, true, $5);
+            item= builder->create_with_db(thd, $1, $3, true, $5);
 
             if (! ($$= item))
             {
@@ -10016,6 +10227,7 @@ ulonglong_num:
 real_ulonglong_num:
           NUM           { int error; $$= (ulonglong) my_strtoll10($1.str, (char**) 0, &error); }
         | ULONGLONG_NUM { int error; $$= (ulonglong) my_strtoll10($1.str, (char**) 0, &error); }
+        | HEX_NUM       { $$= strtoull($1.str, (char**) 0, 16); }
         | LONG_NUM      { int error; $$= (ulonglong) my_strtoll10($1.str, (char**) 0, &error); }
         | dec_num_error { MYSQL_YYABORT; }
         ;
@@ -10030,6 +10242,11 @@ dec_num:
         | FLOAT_NUM
         ;
 
+choice:
+	ulong_num { $$= $1 != 0 ? HA_CHOICE_YES : HA_CHOICE_NO; }
+	| DEFAULT { $$= HA_CHOICE_UNDEF; }
+	;
+
 procedure_clause:
           /* empty */
         | PROCEDURE_SYM ident /* Procedure name */
@@ -10433,16 +10650,12 @@ replace:
 insert_lock_option:
           /* empty */
           {
-#ifdef HAVE_QUERY_CACHE
             /*
-              If it is SP we do not allow insert optimisation whan result of
+              If it is SP we do not allow insert optimisation when result of
               insert visible only after the table unlocking but everyone can
               read table.
             */
             $$= (Lex->sphead ? TL_WRITE_DEFAULT : TL_WRITE_CONCURRENT_INSERT);
-#else
-            $$= TL_WRITE_CONCURRENT_INSERT;
-#endif
           }
         | LOW_PRIORITY  { $$= TL_WRITE_LOW_PRIORITY; }
         | DELAYED_SYM
@@ -11081,6 +11294,34 @@ show_param:
           {
             Lex->sql_command = SQLCOM_SHOW_SLAVE_STAT;
           }
+        | CLIENT_STATS_SYM
+          {
+           LEX *lex= Lex;
+           lex->sql_command= SQLCOM_SHOW_CLIENT_STATS;
+           if (prepare_schema_table(YYTHD, lex, 0, SCH_CLIENT_STATS))
+             MYSQL_YYABORT;
+          }
+        | USER_STATS_SYM
+          {
+             LEX *lex= Lex;
+             lex->sql_command= SQLCOM_SHOW_USER_STATS;
+             if (prepare_schema_table(YYTHD, lex, 0, SCH_USER_STATS))
+               MYSQL_YYABORT;
+          }
+        | TABLE_STATS_SYM
+          {
+             LEX *lex= Lex;
+             lex->sql_command= SQLCOM_SHOW_TABLE_STATS;
+             if (prepare_schema_table(YYTHD, lex, 0, SCH_TABLE_STATS))
+               MYSQL_YYABORT;
+          }
+        | INDEX_STATS_SYM
+          {
+             LEX *lex= Lex;
+             lex->sql_command= SQLCOM_SHOW_INDEX_STATS;
+             if (prepare_schema_table(YYTHD, lex, 0, SCH_INDEX_STATS))
+               MYSQL_YYABORT;
+          }
         | CREATE PROCEDURE_SYM sp_name
           {
             LEX *lex= Lex;
@@ -11313,6 +11554,14 @@ flush_option:
           { Lex->type|= REFRESH_STATUS; }
         | SLAVE
           { Lex->type|= REFRESH_SLAVE; }
+  	| CLIENT_STATS_SYM
+          { Lex->type|= REFRESH_CLIENT_STATS; }
+  	| USER_STATS_SYM
+         { Lex->type|= REFRESH_USER_STATS; }
+  	| TABLE_STATS_SYM
+          { Lex->type|= REFRESH_TABLE_STATS; }
+  	| INDEX_STATS_SYM
+          { Lex->type|= REFRESH_INDEX_STATS; }
         | MASTER_SYM
           { Lex->type|= REFRESH_MASTER; }
         | DES_KEY_FILE
@@ -11462,15 +11711,11 @@ load_data_lock:
           /* empty */ { $$= TL_WRITE_DEFAULT; }
         | CONCURRENT
           {
-#ifdef HAVE_QUERY_CACHE
             /*
-              Ignore this option in SP to avoid problem with query cache
+              Ignore this option in SP to avoid problem with query cache and
+              triggers with non default priority locks
             */
-            if (Lex->sphead != 0)
-              $$= TL_WRITE_DEFAULT;
-            else
-#endif
-              $$= TL_WRITE_CONCURRENT_INSERT;
+            $$= (Lex->sphead ? TL_WRITE_DEFAULT : TL_WRITE_CONCURRENT_INSERT);
           }
         | LOW_PRIORITY { $$= TL_WRITE_LOW_PRIORITY; }
         ;
@@ -12306,6 +12551,9 @@ user:
             $$->user = $1;
             $$->host.str= (char *) "%";
             $$->host.length= 1;
+            $$->password= null_lex_str; 
+            $$->plugin= empty_lex_str;
+            $$->auth= empty_lex_str;
 
             if (check_string_char_length(&$$->user, ER(ER_USERNAME),
                                          USERNAME_CHAR_LENGTH,
@@ -12318,6 +12566,9 @@ user:
             if (!($$=(LEX_USER*) thd->alloc(sizeof(st_lex_user))))
               MYSQL_YYABORT;
             $$->user = $1; $$->host=$3;
+            $$->password= null_lex_str; 
+            $$->plugin= empty_lex_str;
+            $$->auth= empty_lex_str;
 
             if (check_string_char_length(&$$->user, ER(ER_USERNAME),
                                          USERNAME_CHAR_LENGTH,
@@ -12405,6 +12656,7 @@ keyword_sp:
         | AGAINST                  {}
         | AGGREGATE_SYM            {}
         | ALGORITHM_SYM            {}
+        | ALWAYS_SYM               {}
         | ANY_SYM                  {}
         | AT_SYM                   {}
         | AUTHORS_SYM              {}
@@ -12423,6 +12675,7 @@ keyword_sp:
         | CHAIN_SYM                {}
         | CHANGED                  {}
         | CIPHER_SYM               {}
+        | CLIENT_STATS_SYM         {}
         | CLIENT_SYM               {}
         | CLASS_ORIGIN_SYM         {}
         | COALESCE                 {}
@@ -12481,6 +12734,7 @@ keyword_sp:
         | FILE_SYM                 {}
         | FIRST_SYM                {}
         | FIXED_SYM                {}
+        | GENERATED_SYM            {}
         | GEOMETRY_SYM             {}
         | GEOMETRYCOLLECTION       {}
         | GET_FORMAT               {}
@@ -12490,6 +12744,7 @@ keyword_sp:
         | HOSTS_SYM                {}
         | HOUR_SYM                 {}
         | IDENTIFIED_SYM           {}
+        | INDEX_STATS_SYM          {}
         | INVOKER_SYM              {}
         | IMPORT                   {}
         | INDEXES                  {}
@@ -12568,6 +12823,7 @@ keyword_sp:
         | PARTITIONING_SYM         {}
         | PARTITIONS_SYM           {}
         | PASSWORD                 {}
+        | PERSISTENT_SYM           {}
         | PHASE_SYM                {}
         | PLUGIN_SYM               {}
         | PLUGINS_SYM              {}
@@ -12616,6 +12872,7 @@ keyword_sp:
         | SIMPLE_SYM               {}
         | SHARE_SYM                {}
         | SHUTDOWN                 {}
+        | SLOW_SYM                 {}
         | SNAPSHOT_SYM             {}
         | SOUNDS_SYM               {}
         | SOURCE_SYM               {}
@@ -12637,6 +12894,7 @@ keyword_sp:
         | SWAPS_SYM                {}
         | SWITCHES_SYM             {}
         | TABLE_NAME_SYM           {}
+        | TABLE_STATS_SYM          {}
         | TABLES                   {}
         | TABLE_CHECKSUM_SYM       {}
         | TABLESPACE               {}
@@ -12645,6 +12903,7 @@ keyword_sp:
         | TEXT_SYM                 {}
         | THAN_SYM                 {}
         | TRANSACTION_SYM          {}
+        | TRANSACTIONAL_SYM        {}
         | TRIGGERS_SYM             {}
         | TIMESTAMP                {}
         | TIMESTAMP_ADD            {}
@@ -12661,9 +12920,11 @@ keyword_sp:
         | UNKNOWN_SYM              {}
         | UNTIL_SYM                {}
         | USER                     {}
+        | USER_STATS_SYM           {}
         | USE_FRM                  {}
         | VARIABLES                {}
         | VIEW_SYM                 {}
+        | VIRTUAL_SYM              {}
         | VALUE_SYM                {}
         | WARNINGS                 {}
         | WAIT_SYM                 {}
@@ -13170,6 +13431,11 @@ table_lock:
 lock_option:
           READ_SYM               { $$= TL_READ_NO_INSERT; }
         | WRITE_SYM              { $$= TL_WRITE_DEFAULT; }
+        | WRITE_SYM CONCURRENT
+          {
+            $$= (Lex->sphead ? TL_WRITE_DEFAULT : TL_WRITE_CONCURRENT_INSERT);
+          }
+
         | LOW_PRIORITY WRITE_SYM { $$= TL_WRITE_LOW_PRIORITY; }
         | READ_SYM LOCAL_SYM     { $$= TL_READ; }
         ;
@@ -13577,6 +13843,18 @@ grant_user:
           }
         | user IDENTIFIED_SYM BY PASSWORD TEXT_STRING
           { $$= $1; $1->password= $5; }
+        | user IDENTIFIED_SYM VIA_SYM ident_or_text
+          {
+            $$= $1;
+            $1->plugin= $4;
+            $1->auth= empty_lex_str;
+          }
+        | user IDENTIFIED_SYM VIA_SYM ident_or_text USING TEXT_STRING_sys
+          {
+            $$= $1;
+            $1->plugin= $4;
+            $1->auth= $6;
+          }
         | user
           { $$= $1; $1->password= null_lex_str; }
         ;
@@ -14293,6 +14571,7 @@ sf_tail:
             lex->length= lex->dec= NULL;
             lex->interval_list.empty();
             lex->type= 0;
+            lex->vcol_info= 0;
           }
           type_with_opt_collate /* $11 */
           { /* $12 */
diff --git a/sql/structs.h b/sql/structs.h
index 5ffcc4dc62e..327b28fe79c 100644
--- a/sql/structs.h
+++ b/sql/structs.h
@@ -65,7 +65,8 @@ typedef struct st_key_part_info {	/* Info about a key part */
   Field *field;
   uint	offset;				/* offset in record (from 0) */
   uint	null_offset;			/* Offset to null_bit in record */
-  uint16 length;                        /* Length of keypart value in bytes */
+  /* Length of key part in bytes, excluding NULL flag and length bytes */
+  uint16 length;
   /* 
     Number of bytes required to store the keypart value. This may be
     different from the "length" field as it also counts
@@ -80,6 +81,7 @@ typedef struct st_key_part_info {	/* Info about a key part */
   uint8 null_bit;			/* Position to null_bit */
 } KEY_PART_INFO ;
 
+class engine_option_value;
 
 typedef struct st_key {
   uint	key_length;			/* Tot length of key */
@@ -88,6 +90,7 @@ typedef struct st_key {
   uint  extra_length;
   uint	usable_key_parts;		/* Should normally be = key_parts */
   uint  block_size;
+  uint  name_length;
   enum  ha_key_alg algorithm;
   /*
     Note that parser is used when the table is opened for use, and
@@ -100,6 +103,8 @@ typedef struct st_key {
   };
   KEY_PART_INFO *key_part;
   char	*name;				/* Name of key */
+  /* Unique name for cache;  db + \0 + table_name + \0 + key_name + \0 */
+  uchar *cache_name;
   /*
     Array of AVG(#records with the same field value) for 1st ... Nth key part.
     0 means 'not known'.
@@ -111,6 +116,9 @@ typedef struct st_key {
   } handler;
   TABLE *table;
   LEX_STRING comment;
+  /** reference to the list of options or NULL */
+  engine_option_value *option_list;
+  void *option_struct;                  /* structure with parsed options */
 } KEY;
 
 
@@ -156,7 +164,7 @@ extern const char *show_comp_option_name[];
 typedef int *(*update_var)(THD *, struct st_mysql_show_var *);
 
 typedef struct	st_lex_user {
-  LEX_STRING user, host, password;
+  LEX_STRING user, host, password, plugin, auth;
 } LEX_USER;
 
 /*
@@ -218,6 +226,111 @@ typedef struct  user_conn {
   USER_RESOURCES user_resources;
 } USER_CONN;
 
+typedef struct st_user_stats
+{
+  char user[max(USERNAME_LENGTH, LIST_PROCESS_HOST_LEN) + 1];
+  // Account name the user is mapped to when this is a user from mapped_user.
+  // Otherwise, the same value as user.
+  char priv_user[max(USERNAME_LENGTH, LIST_PROCESS_HOST_LEN) + 1];
+  uint user_name_length;
+  uint total_connections;
+  uint concurrent_connections;
+  time_t connected_time;  // in seconds
+  double busy_time;       // in seconds
+  double cpu_time;        // in seconds
+  ulonglong bytes_received;
+  ulonglong bytes_sent;
+  ulonglong binlog_bytes_written;
+  ha_rows   rows_read, rows_sent;
+  ha_rows rows_updated, rows_deleted, rows_inserted;
+  ulonglong select_commands, update_commands, other_commands;
+  ulonglong commit_trans, rollback_trans;
+  ulonglong denied_connections, lost_connections;
+  ulonglong access_denied_errors;
+  ulonglong empty_queries;
+} USER_STATS;
+
+/* Lookup function for hash tables with USER_STATS entries */
+extern "C" uchar *get_key_user_stats(USER_STATS *user_stats, size_t *length,
+                                     my_bool not_used __attribute__((unused)));
+
+/* Free all memory for a hash table with USER_STATS entries */
+extern void free_user_stats(USER_STATS* user_stats);
+
+/* Intialize an instance of USER_STATS */
+extern void
+init_user_stats(USER_STATS *user_stats,
+                const char *user,
+                size_t user_length,
+                const char *priv_user,
+                uint total_connections,
+                uint concurrent_connections,
+                time_t connected_time,
+                double busy_time,
+                double cpu_time,
+                ulonglong bytes_received,
+                ulonglong bytes_sent,
+                ulonglong binlog_bytes_written,
+                ha_rows rows_sent,
+                ha_rows rows_read,
+                ha_rows rows_inserted,
+                ha_rows rows_deleted,
+                ha_rows rows_updated,
+                ulonglong select_commands,
+                ulonglong update_commands,
+                ulonglong other_commands,
+                ulonglong commit_trans,
+                ulonglong rollback_trans,
+                ulonglong denied_connections,
+                ulonglong lost_connections,
+                ulonglong access_denied_errors,
+                ulonglong empty_queries);
+
+/* Increment values of an instance of USER_STATS */
+extern void
+add_user_stats(USER_STATS *user_stats,
+               uint total_connections,
+               uint concurrent_connections,
+               time_t connected_time,
+               double busy_time,
+               double cpu_time,
+               ulonglong bytes_received,
+               ulonglong bytes_sent,
+               ulonglong binlog_bytes_written,
+               ha_rows rows_sent,
+               ha_rows rows_read,
+               ha_rows rows_inserted,
+               ha_rows rows_deleted,
+               ha_rows rows_updated,
+               ulonglong select_commands,
+               ulonglong update_commands,
+               ulonglong other_commands,
+               ulonglong commit_trans,
+               ulonglong rollback_trans,
+               ulonglong denied_connections,
+               ulonglong lost_connections,
+               ulonglong access_denied_errors,
+               ulonglong empty_queries);
+
+typedef struct st_table_stats
+{
+  char table[NAME_LEN * 2 + 2];  // [db] + '\0' + [table] + '\0'
+  uint table_name_length;
+  ulonglong rows_read, rows_changed;
+  ulonglong rows_changed_x_indexes;
+  /* Stores enum db_type, but forward declarations cannot be done */
+  int engine_type;
+} TABLE_STATS;
+
+typedef struct st_index_stats
+{
+  // [db] + '\0' + [table] + '\0' + [index] + '\0'
+  char index[NAME_LEN * 3 + 3];
+  uint index_name_length;                       /* Length of 'index' */
+  ulonglong rows_read;
+} INDEX_STATS;
+
+
 	/* Bits in form->update */
 #define REG_MAKE_DUPP		1	/* Make a copy of record when read */
 #define REG_NEW_RECORD		2	/* Write a new record if not found */
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
index ca3447dcc82..1f00818e274 100644
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -69,6 +69,7 @@ extern void close_thread_tables(THD *thd);
 #define PFS_TRAILING_PROPERTIES \
   NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(NULL), ON_UPDATE(NULL), \
   0, NULL, sys_var::PARSE_EARLY
+#warning move PARSE_EARLY where it belongs
 
 static Sys_var_mybool Sys_pfs_enabled(
        "performance_schema",
@@ -1364,6 +1365,14 @@ static const char *optimizer_switch_names[]=
 {
   "index_merge", "index_merge_union", "index_merge_sort_union",
   "index_merge_intersection", "engine_condition_pushdown",
+  "index_condition_pushdown",
+  "firstmatch","loosescan","materialization", "semijoin",
+  "partial_match_rowid_merge",
+  "partial_match_table_scan",
+  "subquery_cache",
+#ifndef DBUG_OFF
+  "table_elimination",
+#endif
   "default", NullS
 };
 /** propagates changes to @@engine_condition_pushdown */
@@ -1371,7 +1380,7 @@ static bool fix_optimizer_switch(sys_var *self, THD *thd,
                                  enum_var_type type)
 {
   SV *sv= (type == OPT_GLOBAL) ? &global_system_variables : &thd->variables;
-  sv->engine_condition_pushdown= 
+  sv->engine_condition_pushdown=
     test(sv->optimizer_switch & OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN);
   return false;
 }
@@ -1379,7 +1388,10 @@ static Sys_var_flagset Sys_optimizer_switch(
        "optimizer_switch",
        "optimizer_switch=option=val[,option=val...], where option is one of "
        "{index_merge, index_merge_union, index_merge_sort_union, "
-       "index_merge_intersection, engine_condition_pushdown}"
+       "index_merge_intersection, engine_condition_pushdown, "
+       "index_condition_pushdown, firstmatch, loosescan, materialization, "
+       "semijoin, partial_match_rowid_merge, partial_match_table_scan, "
+       "subquery_cache} "
        " and val is one of {on, off, default}",
        SESSION_VAR(optimizer_switch), CMD_LINE(REQUIRED_ARG),
        optimizer_switch_names, DEFAULT(OPTIMIZER_SWITCH_DEFAULT),
@@ -1548,11 +1560,6 @@ static Sys_var_ulong Sys_range_alloc_block_size(
        VALID_RANGE(RANGE_ALLOC_BLOCK_SIZE, ULONG_MAX),
        DEFAULT(RANGE_ALLOC_BLOCK_SIZE), BLOCK_SIZE(1024));
 
-static Sys_var_ulong Sys_multi_range_count(
-       "multi_range_count", "Number of key ranges to request at once",
-       SESSION_VAR(multi_range_count), CMD_LINE(REQUIRED_ARG),
-       VALID_RANGE(1, ULONG_MAX), DEFAULT(256), BLOCK_SIZE(1));
-
 static bool fix_thd_mem_root(sys_var *self, THD *thd, enum_var_type type)
 {
   if (type != OPT_GLOBAL)
@@ -1901,7 +1908,8 @@ static bool fix_sql_mode(sys_var *self, THD *thd, enum_var_type type)
 */
 static const char *sql_mode_names[]=
 {
-  "REAL_AS_FLOAT", "PIPES_AS_CONCAT", "ANSI_QUOTES", "IGNORE_SPACE", ",",
+  "REAL_AS_FLOAT", "PIPES_AS_CONCAT", "ANSI_QUOTES", "IGNORE_SPACE",
+  "IGNORE_BAD_TABLE_OPTIONS",
   "ONLY_FULL_GROUP_BY", "NO_UNSIGNED_SUBTRACTION", "NO_DIR_IN_CREATE",
   "POSTGRESQL", "ORACLE", "MSSQL", "DB2", "MAXDB", "NO_KEY_OPTIONS",
   "NO_TABLE_OPTIONS", "NO_FIELD_OPTIONS", "MYSQL323", "MYSQL40", "ANSI",
@@ -2067,7 +2075,7 @@ static Sys_var_tx_isolation Sys_tx_isolation(
 static Sys_var_ulonglong Sys_tmp_table_size(
        "tmp_table_size",
        "If an internal in-memory temporary table exceeds this size, MySQL "
-       "will automatically convert it to an on-disk MyISAM table",
+       "will automatically convert it to an on-disk MyISAM or Aria table",
        SESSION_VAR(tmp_table_size), CMD_LINE(REQUIRED_ARG),
        VALID_RANGE(1024, (ulonglong)~(intptr)0), DEFAULT(16*1024*1024),
        BLOCK_SIZE(1));
@@ -3107,3 +3115,146 @@ static Sys_var_tz Sys_time_zone(
        SESSION_VAR(time_zone), NO_CMD_LINE,
        DEFAULT(&default_tz), NO_MUTEX_GUARD, IN_BINLOG);
 
+const char *plugin_maturity_names[]=
+{ "unknown", "experimental", "alpha", "beta", "gamma", "stable", 0 };
+static Sys_var_enum Sys_plugin_maturity(
+       "plugin_maturity",
+       "The lowest desirable plugin maturity. Plugins less mature than "
+       "that will not be installed or loaded.",
+       READ_ONLY GLOBAL_VAR(server_maturity), CMD_LINE(REQUIRED_ARG),
+       plugin_maturity_names, DEFAULT(MariaDB_PLUGIN_MATURITY_UNKNOWN));
+
+#error *may be* turn the below into sysvars
+
+  {"deadlock-search-depth-short", OPT_DEADLOCK_SEARCH_DEPTH_SHORT,
+   "Short search depth for the two-step deadlock detection",
+   &global_system_variables.wt_deadlock_search_depth_short,
+   &max_system_variables.wt_deadlock_search_depth_short,
+   0, GET_ULONG, REQUIRED_ARG, 4, 0, 32, 0, 0, 0},
+  {"deadlock-search-depth-long", OPT_DEADLOCK_SEARCH_DEPTH_LONG,
+   "Long search depth for the two-step deadlock detection",
+   &global_system_variables.wt_deadlock_search_depth_long,
+   &max_system_variables.wt_deadlock_search_depth_long,
+   0, GET_ULONG, REQUIRED_ARG, 15, 0, 33, 0, 0, 0},
+  {"deadlock-timeout-short", OPT_DEADLOCK_TIMEOUT_SHORT,
+   "Short timeout for the two-step deadlock detection (in microseconds)",
+   &global_system_variables.wt_timeout_short,
+   &max_system_variables.wt_timeout_short,
+   0, GET_ULONG, REQUIRED_ARG, 10000, 0, ULONG_MAX, 0, 0, 0},
+  {"deadlock-timeout-long", OPT_DEADLOCK_TIMEOUT_LONG,
+   "Long timeout for the two-step deadlock detection (in microseconds)",
+   &global_system_variables.wt_timeout_long,
+   &max_system_variables.wt_timeout_long,
+   0, GET_ULONG, REQUIRED_ARG, 50000000, 0, ULONG_MAX, 0, 0, 0},
+
+  {"debug-crc-break", OPT_DEBUG_CRC,
+   "Call my_debug_put_break_here() if crc matches this number (for debug).",
+   &my_crc_dbug_check, &my_crc_dbug_check,
+   0, GET_ULONG, REQUIRED_ARG, 0, 0, ~(ulong) 0L, 0, 0, 0},
+
+  {"extra-port", OPT_EXTRA_PORT,
+   "Extra port number to use for tcp-connections in a one-thread-per-connection manner. 0 means don't use another port",
+   &mysqld_extra_port,
+   &mysqld_extra_port, 0, GET_UINT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+  {"extra-max-connections", OPT_MAX_CONNECTIONS,
+   "The number of connections on 'extra-port.",
+   &extra_max_connections,
+   &extra_max_connections, 0, GET_ULONG, REQUIRED_ARG, 1, 1, 100000,
+   0, 1, 0},
+
+#ifdef SAFE_MUTEX
+  {"mutex-deadlock-detector", OPT_MUTEX_DEADLOCK_DETECTOR,
+   "Enable checking of wrong mutex usage.",
+   &safe_mutex_deadlock_detector,
+   &safe_mutex_deadlock_detector,
+   0, GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, 0},
+#endif
+
+  {"safemalloc-mem-limit", OPT_SAFEMALLOC_MEM_LIMIT,
+   "Simulate memory shortage when compiled with the --with-debug=full option.",
+   0, 0, 0, GET_ULL, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+
+#ifndef DBUG_OFF
+#ifdef SAFEMALLOC
+  {"skip-safemalloc", OPT_SKIP_SAFEMALLOC,
+   "Don't use the memory allocation checking.", 0, 0, 0, GET_NO_ARG, NO_ARG,
+   0, 0, 0, 0, 0, 0}, sf_malloc_quick=1;
+#endif
+#endif
+
+  {"log-slow-time", OPT_LONG_QUERY_TIME,
+   "Log all queries that have taken more than long_query_time seconds to execute to file. "
+   "The argument will be treated as a decimal value with microsecond precission.",
+   &long_query_time, &long_query_time, 0, GET_DOUBLE,
+   REQUIRED_ARG, 10, 0, LONG_TIMEOUT, 0, 0, 0},
+  {"key_cache_segments", OPT_KEY_CACHE_PARTITIONS,
+   "The number of segments in a key cache",
+   &dflt_key_cache_var.param_partitions, 0,
+   0, (GET_ULONG | GET_ASK_ADDR), REQUIRED_ARG, DEFAULT_KEY_CACHE_PARTITIONS,
+   0, MAX_KEY_CACHE_PARTITIONS, 0, 1, 0},  
+  {"log-slow-filter", OPT_LOG_SLOW_FILTER,
+   "Log only the queries that followed certain execution plan. Multiple flags "
+   "allowed in a comma-separated string. [admin, filesort, filesort_on_disk, "
+   "full_join, full_scan, query_cache, query_cache_miss, tmp_table, "
+   "tmp_table_on_disk]. Sets log-slow-admin-command to ON",
+   0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, QPLAN_ALWAYS_SET, 0, 0},
+  {"log-slow-rate_limit", OPT_LOG_SLOW_RATE_LIMIT,
+   "If set, only write to slow log every 'log_slow_rate_limit' query (use "
+   "this to reduce output on slow query log)",
+   &global_system_variables.log_slow_rate_limit,
+   &max_system_variables.log_slow_rate_limit, 0, GET_ULONG,
+   REQUIRED_ARG, 1, 1, ~0L, 0, 1L, 0},
+  {"log-slow-verbosity", OPT_LOG_SLOW_VERBOSITY,
+   "Choose how verbose the messages to your slow log will be. Multiple flags "
+   "allowed in a comma-separated string. [query_plan, innodb]",
+   0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 },
+  global_system_variables.log_slow_verbosity= LOG_SLOW_VERBOSITY_INIT;
+  global_system_variables.log_slow_filter=    QPLAN_ALWAYS_SET;
+  {"log-slow-file", OPT_SLOW_QUERY_LOG_FILE,
+    "Log slow queries to given log file. Defaults logging to hostname-slow.log",
+   &opt_slow_logname, &opt_slow_logname, 0, GET_STR,
+   REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+   {"join_cache_level", OPT_JOIN_CACHE_LEVEL,
+   "Controls what join operations can be executed with join buffers. Odd numbers are used for plain join buffers while even numbers are used for linked buffers",
+   &global_system_variables.join_cache_level,
+   &max_system_variables.join_cache_level,
+   0, GET_ULONG, REQUIRED_ARG, 1, 0, 8, 0, 1, 0},
+  {"mrr_buffer_size", OPT_MRR_BUFFER_SIZE,
+   "Size of buffer to use when using MRR with range access",
+   (uchar**) &global_system_variables.mrr_buff_size,
+   (uchar**) &max_system_variables.mrr_buff_size, 0,
+   GET_ULONG, REQUIRED_ARG, 256*1024L, IO_SIZE*2+MALLOC_OVERHEAD,
+   INT_MAX32, MALLOC_OVERHEAD, 1 /* Small to be able to do tests */ , 0},
+
+  {"sync-sys", OPT_SYNC,
+   "Enable/disable system sync calls. Should only be turned off when running "
+   "tests or debugging!!",
+   &opt_sync, &opt_sync, 0, GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, 0},
+  {"rowid_merge_buff_size", OPT_ROWID_MERGE_BUFF_SIZE,
+   "The size of the buffers used [NOT] IN evaluation via partial matching.",
+   (uchar**) &global_system_variables.rowid_merge_buff_size,
+   (uchar**) &max_system_variables.rowid_merge_buff_size, 0, GET_ULONG,
+   REQUIRED_ARG, 8*1024*1024L, 0, MAX_MEM_TABLE_SIZE/2, 0, 1, 0},
+  {"userstat", OPT_USERSTAT,
+   "Control USER_STATISTICS, CLIENT_STATISTICS, INDEX_STATISTICS and TABLE_STATISTICS running",
+   (uchar**) &opt_userstat_running, (uchar**) &opt_userstat_running,
+   0, GET_BOOL, NO_ARG, 0, 0, 1, 0, 1, 0},
+  case OPT_LOG_SLOW_FILTER:
+    global_system_variables.log_slow_filter=
+      find_bit_type_or_exit(argument, &log_slow_filter_typelib,
+                            opt->name, &error);
+    /*
+      If we are using filters, we set opt_slow_admin_statements to be always
+      true so we can maintain everything with filters
+    */
+    opt_log_slow_admin_statements= 1;
+    if (error)
+      return 1;
+    break;
+  case OPT_LOG_SLOW_VERBOSITY:
+    global_system_variables.log_slow_verbosity=
+      find_bit_type_or_exit(argument, &log_slow_verbosity_typelib,
+                            opt->name, &error);
+    if (error)
+      return 1;
+    break;
diff --git a/sql/table.cc b/sql/table.cc
index 030de6719c7..2bde110f57f 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -31,8 +31,10 @@
                                  // fix_partition_func, partition_info
 #include "sql_acl.h"             // *_ACL, acl_getroot_no_password
 #include "sql_base.h"            // release_table_share
+#include "create_options.h"
 #include <m_ctype.h>
 #include "my_md5.h"
+#include "my_bit.h"
 #include "sql_select.h"
 #include "mdl.h"                 // MDL_wait_for_graph_visitor
 
@@ -51,6 +53,12 @@ LEX_STRING GENERAL_LOG_NAME= {C_STRING_WITH_LEN("general_log")};
 /* SLOW_LOG name */
 LEX_STRING SLOW_LOG_NAME= {C_STRING_WITH_LEN("slow_log")};
 
+/* 
+  Keyword added as a prefix when parsing the defining expression for a
+  virtual column read from the column definition saved in the frm file
+*/
+LEX_STRING parse_vcol_keyword= { C_STRING_WITH_LEN("PARSE_VCOL_EXPR ") };
+
 	/* Functions defined in this file */
 
 void open_table_error(TABLE_SHARE *share, int error, int db_errno,
@@ -500,34 +508,34 @@ inline bool is_system_table_name(const char *name, uint length)
   CHARSET_INFO *ci= system_charset_info;
 
   return (
-           /* mysql.proc table */
-           (length == 4 &&
-             my_tolower(ci, name[0]) == 'p' && 
-             my_tolower(ci, name[1]) == 'r' &&
-             my_tolower(ci, name[2]) == 'o' &&
-             my_tolower(ci, name[3]) == 'c') ||
-
-           (length > 4 &&
-             (
-               /* one of mysql.help* tables */
-               (my_tolower(ci, name[0]) == 'h' &&
-                 my_tolower(ci, name[1]) == 'e' &&
-                 my_tolower(ci, name[2]) == 'l' &&
-                 my_tolower(ci, name[3]) == 'p') ||
-
-               /* one of mysql.time_zone* tables */
-               (my_tolower(ci, name[0]) == 't' &&
-                 my_tolower(ci, name[1]) == 'i' &&
-                 my_tolower(ci, name[2]) == 'm' &&
-                 my_tolower(ci, name[3]) == 'e') ||
-
-               /* mysql.event table */
-               (my_tolower(ci, name[0]) == 'e' &&
-                 my_tolower(ci, name[1]) == 'v' &&
-                 my_tolower(ci, name[2]) == 'e' &&
-                 my_tolower(ci, name[3]) == 'n' &&
-                 my_tolower(ci, name[4]) == 't')
-             )
+          /* mysql.proc table */
+          (length == 4 &&
+           my_tolower(ci, name[0]) == 'p' && 
+           my_tolower(ci, name[1]) == 'r' &&
+           my_tolower(ci, name[2]) == 'o' &&
+           my_tolower(ci, name[3]) == 'c') ||
+
+          (length > 4 &&
+           (
+            /* one of mysql.help* tables */
+            (my_tolower(ci, name[0]) == 'h' &&
+             my_tolower(ci, name[1]) == 'e' &&
+             my_tolower(ci, name[2]) == 'l' &&
+             my_tolower(ci, name[3]) == 'p') ||
+
+            /* one of mysql.time_zone* tables */
+            (my_tolower(ci, name[0]) == 't' &&
+             my_tolower(ci, name[1]) == 'i' &&
+             my_tolower(ci, name[2]) == 'm' &&
+             my_tolower(ci, name[3]) == 'e') ||
+
+            /* mysql.event table */
+            (my_tolower(ci, name[0]) == 'e' &&
+             my_tolower(ci, name[1]) == 'v' &&
+             my_tolower(ci, name[2]) == 'e' &&
+             my_tolower(ci, name[3]) == 'n' &&
+             my_tolower(ci, name[4]) == 't')
+            )
            )
          );
 }
@@ -537,11 +545,11 @@ inline bool is_system_table_name(const char *name, uint length)
   Check if a string contains path elements
 */  
 
-static inline bool has_disabled_path_chars(const char *str)
+static bool has_disabled_path_chars(const char *str)
 {
   for (; *str; str++)
-    switch (*str)
-    {
+  {
+    switch (*str) {
       case FN_EXTCHAR:
       case '/':
       case '\\':
@@ -549,6 +557,7 @@ static inline bool has_disabled_path_chars(const char *str)
       case '@':
         return TRUE;
     }
+  }
   return FALSE;
 }
 
@@ -738,8 +747,15 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
   const char **interval_array;
   enum legacy_db_type legacy_db_type;
   my_bitmap_map *bitmaps;
+  bool null_bits_are_used;
+  uint vcol_screen_length, options_len;
+  char *vcol_screen_pos;
+  uchar *options, *buff= 0;
   DBUG_ENTER("open_binary_frm");
 
+  LINT_INIT(options);
+  LINT_INIT(options_len);
+
   new_field_pack_flag= head[27];
   new_frm_ver= (head[2] - FRM_VER);
   field_pack_length= new_frm_ver < 2 ? 11 : 17;
@@ -787,6 +803,8 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
   if (!head[32])				// New frm file in 3.23
   {
     share->avg_row_length= uint4korr(head+34);
+    share->transactional= (ha_choice) (head[39] & 3);
+    share->page_checksum= (ha_choice) ((head[39] >> 2) & 3);
     share->row_type= (row_type) head[40];
     share->table_charset= get_charset((((uint) head[41]) << 8) + 
                                         (uint) head[38],MYF(0));
@@ -847,7 +865,6 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
 
   for (i=0 ; i < keys ; i++, keyinfo++)
   {
-    keyinfo->table= 0;                           // Updated in open_frm
     if (new_frm_ver >= 3)
     {
       keyinfo->flags=	   (uint) uint2korr(strpos) ^ HA_NOSAME;
@@ -913,6 +930,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
   }
 
   share->reclength = uint2korr((head+16));
+  share->stored_rec_length= share->reclength;
   if (*(head+26) == 1)
     share->system= 1;				/* one-record-database */
 #ifdef HAVE_CRYPTED_FRM
@@ -930,25 +948,19 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
   if ((n_length= uint4korr(head+55)))
   {
     /* Read extra data segment */
-    uchar *buff, *next_chunk, *buff_end;
+    uchar *next_chunk, *buff_end;
     DBUG_PRINT("info", ("extra segment size is %u bytes", n_length));
-    if (!(next_chunk= buff= (uchar*) my_malloc(n_length, MYF(MY_WME))))
+    if (!(next_chunk= buff= (uchar*) my_malloc(n_length+1, MYF(MY_WME))))
       goto err;
     if (mysql_file_pread(file, buff, n_length, record_offset + share->reclength,
                          MYF(MY_NABP)))
-    {
-      my_free(buff);
-      goto err;
-    }
+      goto free_and_err;
     share->connect_string.length= uint2korr(buff);
     if (!(share->connect_string.str= strmake_root(&share->mem_root,
                                                   (char*) next_chunk + 2,
                                                   share->connect_string.
                                                   length)))
-    {
-      my_free(buff);
-      goto err;
-    }
+      goto free_and_err;
     next_chunk+= share->connect_string.length + 2;
     buff_end= buff + n_length;
     if (next_chunk + 2 < buff_end)
@@ -967,8 +979,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
                 plugin_data(tmp_plugin, handlerton *)))
         {
           /* bad file, legacy_db_type did not match the name */
-          my_free(buff);
-          goto err;
+          goto free_and_err;
         }
         /*
           tmp_plugin is locked with a local lock.
@@ -976,7 +987,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
           replacing it with a globally locked version of tmp_plugin
         */
         plugin_unlock(NULL, share->db_plugin);
-        share->db_plugin= my_plugin_lock(NULL, &tmp_plugin);
+        share->db_plugin= my_plugin_lock(NULL, tmp_plugin);
         DBUG_PRINT("info", ("setting dbtype to '%.*s' (%d)",
                             str_db_type_length, next_chunk + 2,
                             ha_legacy_type(share->db_type())));
@@ -997,8 +1008,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
           error= 8;
           my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0),
                    "--skip-partition");
-          my_free(buff);
-          goto err;
+          goto free_and_err;
         }
         plugin_unlock(NULL, share->db_plugin);
         share->db_plugin= ha_lock_engine(NULL, partition_hton);
@@ -1013,8 +1023,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
         error= 8;
         name.str[name.length]=0;
         my_error(ER_UNKNOWN_STORAGE_ENGINE, MYF(0), name.str);
-        my_free(buff);
-        goto err;
+        goto free_and_err;
         /* purecov: end */
       }
       next_chunk+= str_db_type_length + 2;
@@ -1029,35 +1038,17 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
         if (!(share->partition_info_str= (char*)
               memdup_root(&share->mem_root, next_chunk + 4,
                           partition_info_str_len + 1)))
-        {
-          my_free(buff);
-          goto err;
-        }
+          goto free_and_err;
       }
 #else
       if (partition_info_str_len)
       {
         DBUG_PRINT("info", ("WITH_PARTITION_STORAGE_ENGINE is not defined"));
-        my_free(buff);
-        goto err;
+        goto free_and_err;
       }
 #endif
       next_chunk+= 5 + partition_info_str_len;
     }
-#if MYSQL_VERSION_ID < 50200
-    if (share->mysql_version >= 50106 && share->mysql_version <= 50109)
-    {
-      /*
-         Partition state array was here in version 5.1.6 to 5.1.9, this code
-         makes it possible to load a 5.1.6 table in later versions. Can most
-         likely be removed at some point in time. Will only be used for
-         upgrades within 5.1 series of versions. Upgrade to 5.2 can only be
-         done from newer 5.1 versions.
-      */
-      next_chunk+= 4;
-    }
-    else
-#endif
     if (share->mysql_version >= 50110)
     {
       /* New auto_partitioned indicator introduced in 5.1.11 */
@@ -1065,6 +1056,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
       share->auto_partitioned= *next_chunk;
 #endif
       next_chunk++;
+      DBUG_ASSERT(next_chunk <= buff_end);
     }
     keyinfo= share->key_info;
     for (i= 0; i < keys; i++, keyinfo++)
@@ -1076,8 +1068,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
         {
           DBUG_PRINT("error",
                      ("fulltext key uses parser that is not defined in .frm"));
-          my_free(buff);
-          goto err;
+          goto free_and_err;
         }
         parser_name.str= (char*) next_chunk;
         parser_name.length= strlen((char*) next_chunk);
@@ -1087,11 +1078,11 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
         if (! keyinfo->parser)
         {
           my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), parser_name.str);
-          my_free(buff);
-          goto err;
+          goto free_and_err;
         }
       }
     }
+
     if (forminfo[46] == (uchar)255)
     {
       //reading long table comment
@@ -1099,19 +1090,27 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
       {
           DBUG_PRINT("error",
                      ("long table comment is not defined in .frm"));
-          my_free(buff);
-          goto err;
+          goto free_and_err;
       }
       share->comment.length = uint2korr(next_chunk);
       if (! (share->comment.str= strmake_root(&share->mem_root,
              (char*)next_chunk + 2, share->comment.length)))
-      {
-          my_free(buff);
-          goto err;
-      }
+          goto free_and_err;
       next_chunk+= 2 + share->comment.length;
     }
-    my_free(buff);
+
+    DBUG_ASSERT(next_chunk <= buff_end);
+    if (share->db_create_options & HA_OPTION_TEXT_CREATE_OPTIONS)
+    {
+      /*
+        store options position, but skip till the time we will
+        know number of fields
+      */
+      options_len= uint4korr(next_chunk);
+      options= next_chunk + 4;
+      next_chunk+= options_len + 4;
+    }
+    DBUG_ASSERT(next_chunk <= buff_end);
   }
   share->key_block_size= uint2korr(head+62);
 
@@ -1121,11 +1120,11 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
   share->rec_buff_length= rec_buff_length;
   if (!(record= (uchar *) alloc_root(&share->mem_root,
                                      rec_buff_length)))
-    goto err;                                   /* purecov: inspected */
+    goto free_and_err;                          /* purecov: inspected */
   share->default_values= record;
   if (mysql_file_pread(file, record, (size_t) share->reclength,
                        record_offset, MYF(MY_NABP)))
-    goto err;                                   /* purecov: inspected */
+    goto free_and_err;                          /* purecov: inspected */
 
   mysql_file_seek(file, pos+288, MY_SEEK_SET, MYF(0));
 #ifdef HAVE_CRYPTED_FRM
@@ -1133,7 +1132,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
   {
     crypted->decode((char*) forminfo+256,288-256);
     if (sint2korr(forminfo+284) != 0)		// Should be 0
-      goto err;                                 // Wrong password
+      goto free_and_err;                        // Wrong password
   }
 #endif
 
@@ -1145,6 +1144,9 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
   int_length= uint2korr(forminfo+274);
   share->null_fields= uint2korr(forminfo+282);
   com_length= uint2korr(forminfo+284);
+  vcol_screen_length= uint2korr(forminfo+286);
+  share->vfields= 0;
+  share->stored_fields= share->fields;
   if (forminfo[46] != (uchar)255)
   {
     share->comment.length=  (int) (forminfo[46]);
@@ -1152,7 +1154,8 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
                                      share->comment.length);
   }
 
-  DBUG_PRINT("info",("i_count: %d  i_parts: %d  index: %d  n_length: %d  int_length: %d  com_length: %d", interval_count,interval_parts, share->keys,n_length,int_length, com_length));
+  DBUG_PRINT("info",("i_count: %d  i_parts: %d  index: %d  n_length: %d  int_length: %d  com_length: %d  vcol_screen_length: %d", interval_count,interval_parts, share->keys,n_length,int_length, com_length, vcol_screen_length));
+
 
   if (!(field_ptr = (Field **)
 	alloc_root(&share->mem_root,
@@ -1160,14 +1163,16 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
 			   interval_count*sizeof(TYPELIB)+
 			   (share->fields+interval_parts+
 			    keys+3)*sizeof(char *)+
-			   (n_length+int_length+com_length)))))
-    goto err;                                   /* purecov: inspected */
+			   (n_length+int_length+com_length+
+			       vcol_screen_length)))))
+    goto free_and_err;                           /* purecov: inspected */
 
   share->field= field_ptr;
   read_length=(uint) (share->fields * field_pack_length +
-		      pos+ (uint) (n_length+int_length+com_length));
+		      pos+ (uint) (n_length+int_length+com_length+
+		                   vcol_screen_length));
   if (read_string(file,(uchar**) &disk_buff,read_length))
-    goto err;                                   /* purecov: inspected */
+    goto free_and_err;                           /* purecov: inspected */
 #ifdef HAVE_CRYPTED_FRM
   if (crypted)
   {
@@ -1186,11 +1191,15 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
   memcpy((char*) names, strpos+(share->fields*field_pack_length),
 	 (uint) (n_length+int_length));
   comment_pos= names+(n_length+int_length);
-  memcpy(comment_pos, disk_buff+read_length-com_length, com_length);
+  memcpy(comment_pos, disk_buff+read_length-com_length-vcol_screen_length, 
+         com_length);
+  vcol_screen_pos= names+(n_length+int_length+com_length);
+  memcpy(vcol_screen_pos, disk_buff+read_length-vcol_screen_length, 
+         vcol_screen_length);
 
   fix_type_pointers(&interval_array, &share->fieldnames, 1, &names);
   if (share->fieldnames.count != share->fields)
-    goto err;
+    goto free_and_err;
   fix_type_pointers(&interval_array, share->intervals, interval_count,
 		    &names);
 
@@ -1204,7 +1213,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
       uint count= (uint) (interval->count + 1) * sizeof(uint);
       if (!(interval->type_lengths= (uint *) alloc_root(&share->mem_root,
                                                         count)))
-        goto err;
+        goto free_and_err;
       for (count= 0; count < interval->count; count++)
       {
         char *val= (char*) interval->type_names[count];
@@ -1220,9 +1229,10 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
  /* Allocate handler */
   if (!(handler_file= get_new_handler(share, thd->mem_root,
                                       share->db_type())))
-    goto err;
+    goto free_and_err;
 
   record= share->default_values-1;              /* Fieldstart = 1 */
+  null_bits_are_used= share->null_fields != 0;
   if (share->null_field_first)
   {
     null_flags= null_pos= (uchar*) record+1;
@@ -1254,10 +1264,14 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
   for (i=0 ; i < share->fields; i++, strpos+=field_pack_length, field_ptr++)
   {
     uint pack_flag, interval_nr, unireg_type, recpos, field_length;
+    uint vcol_info_length=0;
+    uint vcol_expr_length=0;
     enum_field_types field_type;
     CHARSET_INFO *charset=NULL;
     Field::geometry_type geom_type= Field::GEOM_GEOMETRY;
     LEX_STRING comment;
+    Virtual_column_info *vcol_info= 0;
+    bool fld_stored_in_db= TRUE;
 
     if (new_frm_ver >= 3)
     {
@@ -1278,7 +1292,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
 	charset= &my_charset_bin;
 #else
 	error= 4;  // unsupported field type
-	goto err;
+	goto free_and_err;
 #endif
       }
       else
@@ -1290,9 +1304,21 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
         {
           error= 5; // Unknown or unavailable charset
           errarg= (int) csid;
-          goto err;
+          goto free_and_err;
         }
       }
+
+      if ((uchar)field_type == (uchar)MYSQL_TYPE_VIRTUAL)
+      {
+        DBUG_ASSERT(interval_nr); // Expect non-null expression
+        /* 
+          The interval_id byte in the .frm file stores the length of the
+          expression statement for a virtual column.
+        */
+        vcol_info_length= interval_nr;
+        interval_nr= 0;
+      }
+
       if (!comment_length)
       {
 	comment.str= (char*) "";
@@ -1304,6 +1330,34 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
 	comment.length= comment_length;
 	comment_pos+=   comment_length;
       }
+
+      if (vcol_info_length)
+      {
+        /*
+          Get virtual column data stored in the .frm file as follows:
+          byte 1      = 1 (always 1 to allow for future extensions)
+          byte 2      = sql_type
+          byte 3      = flags (as of now, 0 - no flags, 1 - field is physically stored)
+          byte 4-...  = virtual column expression (text data)
+        */
+        vcol_info= new Virtual_column_info();
+        if ((uint)vcol_screen_pos[0] != 1)
+        {
+          error= 4;
+          goto free_and_err;
+        }
+        field_type= (enum_field_types) (uchar) vcol_screen_pos[1];
+        fld_stored_in_db= (bool) (uint) vcol_screen_pos[2];
+        vcol_expr_length= vcol_info_length-(uint)FRM_VCOL_HEADER_SIZE;
+        if (!(vcol_info->expr_str.str=
+              (char *)memdup_root(&share->mem_root,
+                                  vcol_screen_pos+(uint)FRM_VCOL_HEADER_SIZE,
+                                  vcol_expr_length)))
+          goto free_and_err;
+        vcol_info->expr_str.length= vcol_expr_length;
+        vcol_screen_pos+= vcol_info_length;
+        share->vfields++;
+      }
     }
     else
     {
@@ -1389,13 +1443,16 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
     if (!reg_field)				// Not supported field type
     {
       error= 4;
-      goto err;			/* purecov: inspected */
+      goto free_and_err;			/* purecov: inspected */
     }
 
     reg_field->field_index= i;
     reg_field->comment=comment;
+    reg_field->vcol_info= vcol_info;
+    reg_field->stored_in_db= fld_stored_in_db;
     if (field_type == MYSQL_TYPE_BIT && !f_bit_as_char(pack_flag))
     {
+      null_bits_are_used= 1;
       if ((null_bit_pos+= field_length & 7) > 7)
       {
         null_pos++;
@@ -1416,7 +1473,9 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
       share->timestamp_field_offset= i;
 
     if (use_hash)
-      if (my_hash_insert(&share->name_hash, (uchar*) field_ptr) )
+    {
+      if (my_hash_insert(&share->name_hash,
+                         (uchar*) field_ptr))
       {
         /*
           Set return code 8 here to indicate that an error has
@@ -1424,10 +1483,20 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
           sent (OOM).
         */
         error= 8; 
-        goto err;
+        goto free_and_err;
       }
+    }
+    if (!reg_field->stored_in_db)
+    {
+      share->stored_fields--;
+      if (share->stored_rec_length>=recpos)
+        share->stored_rec_length= recpos-1;
+    }
   }
   *field_ptr=0;					// End marker
+  /* Sanity checks: */
+  DBUG_ASSERT(share->fields>=share->stored_fields);
+  DBUG_ASSERT(share->reclength>=share->stored_rec_length);
 
   /* Fix key->name and key_part->field */
   if (key_parts)
@@ -1442,6 +1511,19 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
     {
       uint usable_parts= 0;
       keyinfo->name=(char*) share->keynames.type_names[key];
+      keyinfo->name_length= strlen(keyinfo->name);
+      keyinfo->cache_name=
+        (uchar*) alloc_root(&share->mem_root,
+                            share->table_cache_key.length+
+                            keyinfo->name_length + 1);
+      if (keyinfo->cache_name)           // If not out of memory
+      {
+        uchar *pos= keyinfo->cache_name;
+        memcpy(pos, share->table_cache_key.str, share->table_cache_key.length);
+        memcpy(pos + share->table_cache_key.length, keyinfo->name,
+               keyinfo->name_length+1);
+      }
+
       /* Fix fulltext keys for old .frm files */
       if (share->key_info[key].flags & HA_FULLTEXT)
 	share->key_info[key].algorithm= HA_KEY_ALG_FULLTEXT;
@@ -1478,7 +1560,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
 	if (!key_part->fieldnr)
         {
           error= 4;                             // Wrong file
-          goto err;
+          goto free_and_err;
         }
         field= key_part->field= share->field[key_part->fieldnr-1];
         key_part->type= field->key_type();
@@ -1574,21 +1656,20 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
                                 share->table_name.str,
                                 share->table_name.str);
             share->crashed= 1;                // Marker for CHECK TABLE
-            goto to_be_deleted;
+            continue;
           }
 #endif
           key_part->key_part_flag|= HA_PART_KEY_SEG;
         }
-
-	to_be_deleted:
-
         /*
-          If the field can be NULL, don't optimize away the test
-          key_part_column = expression from the WHERE clause
-          as we need to test for NULL = NULL.
+          Sometimes we can compare key parts for equality with memcmp.
+          But not always.
         */
-        if (field->real_maybe_null())
-          key_part->key_part_flag|= HA_NULL_PART;
+        if (!(key_part->key_part_flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART |
+                                         HA_BIT_PART)) &&
+            key_part->type != HA_KEYTYPE_FLOAT &&
+            key_part->type == HA_KEYTYPE_DOUBLE)
+          key_part->key_part_flag|= HA_CAN_MEMCMP;
       }
       keyinfo->usable_key_parts= usable_parts; // Filesort
 
@@ -1637,6 +1718,16 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
           null_length, 255);
   }
 
+  if (share->db_create_options & HA_OPTION_TEXT_CREATE_OPTIONS)
+  {
+    DBUG_ASSERT(options_len);
+    if (engine_table_options_frm_read(options, options_len, share))
+      goto free_and_err;
+  }
+  if (parse_engine_table_options(thd, handler_file->partition_ht(), share))
+    goto free_and_err;
+  my_free(buff, MYF(MY_ALLOW_ZERO_PTR));
+
   if (share->found_next_number_field)
   {
     reg_field= *share->found_next_number_field;
@@ -1678,6 +1769,9 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
   share->null_bytes= (null_pos - (uchar*) null_flags +
                       (null_bit_pos + 7) / 8);
   share->last_null_bit_pos= null_bit_pos;
+  share->null_bytes_for_compare= null_bits_are_used ? share->null_bytes : 0;
+  share->can_cmp_whole_record= (share->blob_fields == 0 &&
+                                share->varchar_fields == 0);
 
   share->db_low_byte_first= handler_file->low_byte_first();
   share->column_bitmap_size= bitmap_buffer_size(share->fields);
@@ -1695,6 +1789,8 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
 #endif
   DBUG_RETURN (0);
 
+ free_and_err:
+  my_free(buff, MYF(MY_ALLOW_ZERO_PTR));
  err:
   share->error= error;
   share->open_errno= my_errno;
@@ -1714,6 +1810,316 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
   DBUG_RETURN(error);
 } /* open_binary_frm */
 
+/*
+  @brief
+    Clear GET_FIXED_FIELDS_FLAG in all fields of a table
+
+  @param
+    table     The table for whose fields the flags are to be cleared
+
+  @note
+    This routine is used for error handling purposes.
+
+  @return
+    none
+*/
+
+static void clear_field_flag(TABLE *table)
+{
+  Field **ptr;
+  DBUG_ENTER("clear_field_flag");
+
+  for (ptr= table->field; *ptr; ptr++)
+    (*ptr)->flags&= (~GET_FIXED_FIELDS_FLAG);
+  DBUG_VOID_RETURN;
+}
+
+
+/*
+  @brief 
+    Perform semantic analysis of the defining expression for a virtual column
+
+  @param
+    thd           The thread object
+  @param
+    table         The table containing the virtual column
+  @param
+    vcol_field    The virtual field whose defining expression is to be analyzed
+
+  @details
+    The function performs semantic analysis of the defining expression for
+    the virtual column vcol_field. The expression is used to compute the
+    values of this column.
+
+  @note
+   The function exploits the fact  that the fix_fields method sets the flag 
+   GET_FIXED_FIELDS_FLAG for all fields in the item tree.
+   This flag must always be unset before returning from this function
+   since it is used for other purposes as well.
+ 
+  @retval
+    TRUE           An error occurred, something was wrong with the function
+  @retval
+    FALSE          Otherwise
+*/
+
+bool fix_vcol_expr(THD *thd,
+                   TABLE *table,
+                   Field *vcol_field)
+{
+  Virtual_column_info *vcol_info= vcol_field->vcol_info;
+  Item* func_expr= vcol_info->expr_item;
+  uint dir_length, home_dir_length;
+  bool result= TRUE;
+  TABLE_LIST tables;
+  TABLE_LIST *save_table_list, *save_first_table, *save_last_table;
+  int error;
+  Name_resolution_context *context;
+  const char *save_where;
+  char* db_name;
+  char db_name_string[FN_REFLEN];
+  bool save_use_only_table_context;
+  Field **ptr, *field;
+  enum_mark_columns save_mark_used_columns= thd->mark_used_columns;
+  DBUG_ASSERT(func_expr);
+  DBUG_ENTER("fix_vcol_expr");
+
+  /*
+    Set-up the TABLE_LIST object to be a list with a single table
+    Set the object to zero to create NULL pointers and set alias
+    and real name to table name and get database name from file name.
+  */
+
+  bzero((void*)&tables, sizeof(TABLE_LIST));
+  tables.alias= tables.table_name= (char*) table->s->table_name.str;
+  tables.table= table;
+  tables.next_local= 0;
+  tables.next_name_resolution_table= 0;
+  strmov(db_name_string, table->s->normalized_path.str);
+  dir_length= dirname_length(db_name_string);
+  db_name_string[dir_length - 1]= 0;
+  home_dir_length= dirname_length(db_name_string);
+  db_name= &db_name_string[home_dir_length];
+  tables.db= db_name;
+
+  thd->mark_used_columns= MARK_COLUMNS_NONE;
+
+  context= thd->lex->current_context();
+  table->map= 1; //To ensure correct calculation of const item
+  table->get_fields_in_item_tree= TRUE;
+  save_table_list= context->table_list;
+  save_first_table= context->first_name_resolution_table;
+  save_last_table= context->last_name_resolution_table;
+  context->table_list= &tables;
+  context->first_name_resolution_table= &tables;
+  context->last_name_resolution_table= NULL;
+  func_expr->walk(&Item::change_context_processor, 0, (uchar*) context);
+  save_where= thd->where;
+  thd->where= "virtual column function";
+
+  /* Save the context before fixing the fields*/
+  save_use_only_table_context= thd->lex->use_only_table_context;
+  thd->lex->use_only_table_context= TRUE;
+  /* Fix fields referenced to by the virtual column function */
+  error= func_expr->fix_fields(thd, (Item**)0);
+  /* Restore the original context*/
+  thd->lex->use_only_table_context= save_use_only_table_context;
+  context->table_list= save_table_list;
+  context->first_name_resolution_table= save_first_table;
+  context->last_name_resolution_table= save_last_table;
+
+  if (unlikely(error))
+  {
+    DBUG_PRINT("info", 
+    ("Field in virtual column expression does not belong to the table"));
+    goto end;
+  }
+  thd->where= save_where;
+  if (unlikely(func_expr->result_type() == ROW_RESULT))
+  {
+     my_error(ER_ROW_EXPR_FOR_VCOL, MYF(0));
+     goto end;
+  }
+#ifdef PARANOID
+  /*
+    Walk through the Item tree checking if all items are valid
+   to be part of the virtual column
+  */
+  error= func_expr->walk(&Item::check_vcol_func_processor, 0, NULL);
+  if (error)
+  {
+    my_error(ER_VIRTUAL_COLUMN_FUNCTION_IS_NOT_ALLOWED, MYF(0), field_name);
+    goto end;
+  }
+#endif
+  if (unlikely(func_expr->const_item()))
+  {
+    my_error(ER_CONST_EXPR_IN_VCOL, MYF(0));
+    goto end;
+  }
+  /* Ensure that this virtual column is not based on another virtual field. */
+  ptr= table->field;
+  while ((field= *(ptr++))) 
+  {
+    if ((field->flags & GET_FIXED_FIELDS_FLAG) &&
+        (field->vcol_info))
+    {
+      my_error(ER_VCOL_BASED_ON_VCOL, MYF(0));
+      goto end;
+    }
+  }
+  result= FALSE;
+
+end:
+
+  /* Clear GET_FIXED_FIELDS_FLAG for the fields of the table */
+  clear_field_flag(table);
+
+  table->get_fields_in_item_tree= FALSE;
+  thd->mark_used_columns= save_mark_used_columns;
+  table->map= 0; //Restore old value
+ 
+ DBUG_RETURN(result);
+}
+
+/*
+  @brief
+    Unpack the definition of a virtual column from its linear representation
+
+  @parm
+    thd                  The thread object
+  @param
+    table                The table containing the virtual column
+  @param
+    field                The field for the virtual
+  @param  
+    vcol_expr            The string representation of the defining expression
+  @param[out]
+    error_reported       The flag to inform the caller that no other error
+                         messages are to be generated
+
+  @details
+    The function takes string representation 'vcol_expr' of the defining
+    expression for the virtual field 'field' of the table 'table' and
+    parses it, building an item object for it. The pointer to this item is
+    placed into in field->vcol_info.expr_item. After this the function performs
+    semantic analysis of the item by calling the the function fix_vcol_expr.
+    Since the defining expression is part of the table definition the item for
+    it is created in table->memroot within the special arena TABLE::expr_arena.
+
+  @note
+    Before passing 'vcol_expr" to the parser the function embraces it in 
+    parenthesis and prepands it a special keyword.
+  
+   @retval
+    FALSE           If a success
+   @retval
+    TRUE            Otherwise
+*/
+bool unpack_vcol_info_from_frm(THD *thd,
+                               TABLE *table,
+                               Field *field,
+                               LEX_STRING *vcol_expr,
+                               bool *error_reported)
+{
+  bool rc;
+  char *vcol_expr_str;
+  int str_len;
+  CHARSET_INFO *old_character_set_client;
+  Query_arena *backup_stmt_arena_ptr;
+  Query_arena backup_arena;
+  Query_arena *vcol_arena= 0;
+  Parser_state parser_state;
+  DBUG_ENTER("unpack_vcol_info_from_frm");
+  DBUG_ASSERT(vcol_expr);
+
+  old_character_set_client= thd->variables.character_set_client;
+  backup_stmt_arena_ptr= thd->stmt_arena;
+
+  /* 
+    Step 1: Construct the input string for the parser.
+    The string to be parsed has to be of the following format:
+    "PARSE_VCOL_EXPR (<expr_string_from_frm>)".
+  */
+  
+  if (!(vcol_expr_str= (char*) alloc_root(&table->mem_root,
+                                          vcol_expr->length + 
+                                            parse_vcol_keyword.length + 3)))
+  {
+    DBUG_RETURN(TRUE);
+  }
+  memcpy(vcol_expr_str,
+         (char*) parse_vcol_keyword.str,
+         parse_vcol_keyword.length);
+  str_len= parse_vcol_keyword.length;
+  memcpy(vcol_expr_str + str_len, "(", 1);
+  str_len++;
+  memcpy(vcol_expr_str + str_len, 
+         (char*) vcol_expr->str, 
+         vcol_expr->length);
+  str_len+= vcol_expr->length;
+  memcpy(vcol_expr_str + str_len, ")", 1);
+  str_len++;
+  memcpy(vcol_expr_str + str_len, "\0", 1);
+  str_len++;
+
+  if (parser_state.init(thd, vcol_expr_str, str_len))
+    goto err;
+
+  /* 
+    Step 2: Setup thd for parsing.
+  */
+  vcol_arena= table->expr_arena;
+  if (!vcol_arena)
+  {
+    Query_arena expr_arena(&table->mem_root, Query_arena::INITIALIZED);
+    if (!(vcol_arena= (Query_arena *) alloc_root(&table->mem_root,
+                                                 sizeof(Query_arena))))
+      goto err;
+    *vcol_arena= expr_arena;
+    table->expr_arena= vcol_arena;
+  }
+  thd->set_n_backup_active_arena(vcol_arena, &backup_arena);
+  thd->stmt_arena= vcol_arena;
+
+  thd->lex->parse_vcol_expr= TRUE;
+
+  /* 
+    Step 3: Use the parser to build an Item object from vcol_expr_str.
+  */
+  if (parse_sql(thd, &parser_state, NULL))
+  {
+    goto err;
+  }
+  /* From now on use vcol_info generated by the parser. */
+  field->vcol_info= thd->lex->vcol_info;
+
+  /* Validate the Item tree. */
+  if (fix_vcol_expr(thd, table, field))
+  {
+    *error_reported= TRUE;
+    field->vcol_info= 0;
+    goto err;
+  }
+  rc= FALSE;
+  goto end;
+
+err:
+  rc= TRUE;
+  thd->lex->parse_vcol_expr= FALSE;
+  thd->free_items();
+end:
+  thd->stmt_arena= backup_stmt_arena_ptr;
+  if (vcol_arena)
+    thd->restore_active_arena(vcol_arena, &backup_arena);
+  thd->variables.character_set_client= old_character_set_client;
+
+  DBUG_RETURN(rc);
+}
+
+/*
+  Read data from a binary .frm file from MySQL 3.23 - 5.0 into TABLE_SHARE
+*/
 
 /*
   Open a table based on a TABLE_SHARE
@@ -1748,11 +2154,14 @@ int open_table_from_share(THD *thd, TABLE_SHARE *share, const char *alias,
   uint records, i, bitmap_size;
   bool error_reported= FALSE;
   uchar *record, *bitmaps;
-  Field **field_ptr;
+  Field **field_ptr, **vfield_ptr;
+  bool save_view_prepare_mode= thd->lex->view_prepare_mode;
   DBUG_ENTER("open_table_from_share");
   DBUG_PRINT("enter",("name: '%s.%s'  form: 0x%lx", share->db.str,
                       share->table_name.str, (long) outparam));
 
+  thd->lex->view_prepare_mode= FALSE; // not a view
+
   error= 1;
   bzero((char*) outparam, sizeof(*outparam));
   outparam->in_use= thd;
@@ -1809,7 +2218,7 @@ int open_table_from_share(THD *thd, TABLE_SHARE *share, const char *alias,
       outparam->record[1]= outparam->record[0];   // Safety
   }
 
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
   /*
     We need this because when we read var-length rows, we are not updating
     bytes after end of varchar
@@ -1899,6 +2308,34 @@ int open_table_from_share(THD *thd, TABLE_SHARE *share, const char *alias,
     }
   }
 
+  /*
+    Process virtual columns, if any.
+  */
+  if (!(vfield_ptr = (Field **) alloc_root(&outparam->mem_root,
+                                          (uint) ((share->vfields+1)*
+                                                  sizeof(Field*)))))
+    goto err;
+
+  outparam->vfield= vfield_ptr;
+  
+  for (field_ptr= outparam->field; *field_ptr; field_ptr++)
+  {
+    if ((*field_ptr)->vcol_info)
+    {
+      if (unpack_vcol_info_from_frm(thd,
+                                    outparam,
+                                    *field_ptr,
+                                    &(*field_ptr)->vcol_info->expr_str,
+                                    &error_reported))
+      {
+        error= 4; // in case no error is reported
+        goto err;
+      }
+      *(vfield_ptr++)= *field_ptr;
+    }
+  }
+  *vfield_ptr= 0;                              // End marker
+
 #ifdef WITH_PARTITION_STORAGE_ENGINE
   if (share->partition_info_str_len && outparam->file)
   {
@@ -1945,8 +2382,8 @@ int open_table_from_share(THD *thd, TABLE_SHARE *share, const char *alias,
     {
       if (work_part_info_used)
         tmp= fix_partition_func(thd, outparam, is_create_table);
-      outparam->part_info->item_free_list= part_func_arena.free_list;
     }
+    outparam->part_info->item_free_list= part_func_arena.free_list;
 partititon_err:
     if (tmp)
     {
@@ -1964,17 +2401,33 @@ partititon_err:
   }
 #endif
 
+  /* Check virtual columns against table's storage engine. */
+  if (share->vfields && 
+        ((outparam->file && 
+          !outparam->file->check_if_supported_virtual_columns()) ||
+	 (!outparam->file && share->db_type() && 
+	   share->db_type()->db_type == DB_TYPE_CSV_DB))) // Workaround for CSV
+  {
+    my_error(ER_UNSUPPORTED_ACTION_ON_VIRTUAL_COLUMN,
+             MYF(0), 
+             "Specified storage engine");
+    error_reported= TRUE;
+    goto err;
+  }
+
   /* Allocate bitmaps */
 
   bitmap_size= share->column_bitmap_size;
-  if (!(bitmaps= (uchar*) alloc_root(&outparam->mem_root, bitmap_size*3)))
+  if (!(bitmaps= (uchar*) alloc_root(&outparam->mem_root, bitmap_size*4)))
     goto err;
   bitmap_init(&outparam->def_read_set,
               (my_bitmap_map*) bitmaps, share->fields, FALSE);
   bitmap_init(&outparam->def_write_set,
               (my_bitmap_map*) (bitmaps+bitmap_size), share->fields, FALSE);
-  bitmap_init(&outparam->tmp_set,
+  bitmap_init(&outparam->def_vcol_set,
               (my_bitmap_map*) (bitmaps+bitmap_size*2), share->fields, FALSE);
+  bitmap_init(&outparam->tmp_set,
+              (my_bitmap_map*) (bitmaps+bitmap_size*3), share->fields, FALSE);
   outparam->default_column_bitmaps();
 
   /* The table struct is now initialized;  Open the table */
@@ -2029,7 +2482,7 @@ partititon_err:
     }
   }
 
-#if defined(HAVE_purify) && !defined(DBUG_OFF)
+#if defined(HAVE_valgrind) && !defined(DBUG_OFF)
   bzero((char*) bitmaps, bitmap_size*3);
 #endif
 
@@ -2038,6 +2491,7 @@ partititon_err:
                                HA_HAS_OWN_BINLOGGING);
   thd->status_var.opened_tables++;
 
+  thd->lex->view_prepare_mode= save_view_prepare_mode;
   DBUG_RETURN (0);
 
  err:
@@ -2050,6 +2504,7 @@ partititon_err:
 #endif
   outparam->file= 0;				// For easier error checking
   outparam->db_stat=0;
+  thd->lex->view_prepare_mode= save_view_prepare_mode;
   free_root(&outparam->mem_root, MYF(0));       // Safe to call on bzero'd root
   my_free((void *) outparam->alias);
   DBUG_RETURN (error);
@@ -2072,13 +2527,21 @@ int closefrm(register TABLE *table, bool free_share)
   DBUG_PRINT("enter", ("table: 0x%lx", (long) table));
 
   if (table->db_stat)
+  {
+    if (table->s->deleting)
+      table->file->extra(HA_EXTRA_PREPARE_FOR_DROP);
     error=table->file->close();
+  }
   my_free((void *) table->alias);
   table->alias= 0;
+  if (table->expr_arena)
+    table->expr_arena->free_items();
   if (table->field)
   {
     for (Field **ptr=table->field ; *ptr ; ptr++)
+    {
       delete *ptr;
+    }
     table->field= 0;
   }
   delete table->file;
@@ -2546,6 +3009,7 @@ File create_frm(THD *thd, const char *name, const char *db,
   int create_flags= O_RDWR | O_TRUNC;
   ulong key_comment_total_bytes= 0;
   uint i;
+  DBUG_ENTER("create_frm");
 
   if (create_info->options & HA_LEX_CREATE_TMP_TABLE)
     create_flags|= O_EXCL | O_NOFOLLOW;
@@ -2612,11 +3076,8 @@ File create_frm(THD *thd, const char *name, const char *db,
     csid= (create_info->default_table_charset ?
            create_info->default_table_charset->number : 0);
     fileinfo[38]= (uchar) csid;
-    /*
-      In future versions, we will store in fileinfo[39] the values of the
-      TRANSACTIONAL and PAGE_CHECKSUM clauses of CREATE TABLE.
-    */
-    fileinfo[39]= 0;
+    fileinfo[39]= (uchar) ((uint) create_info->transactional |
+                           ((uint) create_info->page_checksum << 2));
     fileinfo[40]= (uchar) create_info->row_type;
     /* Next few bytes where for RAID support */
     fileinfo[41]= (uchar) (csid >> 8);
@@ -2652,7 +3113,7 @@ File create_frm(THD *thd, const char *name, const char *db,
     else
       my_error(ER_CANT_CREATE_TABLE,MYF(0),table,my_errno);
   }
-  return (file);
+  DBUG_RETURN(file);
 } /* create_frm */
 
 
@@ -2669,6 +3130,9 @@ void update_create_info_from_table(HA_CREATE_INFO *create_info, TABLE *table)
   create_info->default_table_charset= share->table_charset;
   create_info->table_charset= 0;
   create_info->comment= share->comment;
+  create_info->transactional= share->transactional;
+  create_info->page_checksum= share->page_checksum;
+  create_info->option_list= share->option_list;
 
   DBUG_VOID_RETURN;
 }
@@ -2790,15 +3254,15 @@ bool check_db_name(LEX_STRING *org_name)
   uint name_length= org_name->length;
   bool check_for_path_chars;
 
-  if (!name_length || name_length > NAME_LEN)
-    return 1;
-
   if ((check_for_path_chars= check_mysql50_prefix(name)))
   {
     name+= MYSQL50_TABLE_NAME_PREFIX_LENGTH;
     name_length-= MYSQL50_TABLE_NAME_PREFIX_LENGTH;
   }
 
+  if (!name_length || name_length > NAME_LEN)
+    return 1;
+
   if (lower_case_table_names && name != any_db)
     my_casedn_str(files_charset_info, name);
 
@@ -2817,6 +3281,15 @@ bool check_table_name(const char *name, size_t length, bool check_for_path_chars
   // name length in symbols
   size_t name_length= 0;
   const char *end= name+length;
+
+
+  if (!check_for_path_chars &&
+      (check_for_path_chars= check_mysql50_prefix(name)))
+  {
+    name+= MYSQL50_TABLE_NAME_PREFIX_LENGTH;
+    length-= MYSQL50_TABLE_NAME_PREFIX_LENGTH;
+  }
+
   if (!length || length > NAME_LEN)
     return 1;
 #if defined(USE_MB) && defined(USE_MB_IDENT)
@@ -2835,7 +3308,7 @@ bool check_table_name(const char *name, size_t length, bool check_for_path_chars
       int len=my_ismbchar(system_charset_info, name, end);
       if (len)
       {
-        name += len;
+        name+= len;
         name_length++;
         continue;
       }
@@ -3656,7 +4129,7 @@ bool TABLE_LIST::prep_check_option(THD *thd, uint8 check_opt_type)
     const char *save_where= thd->where;
     thd->where= "check option";
     if ((!check_option->fixed &&
-        check_option->fix_fields(thd, &check_option)) ||
+         check_option->fix_fields(thd, &check_option)) ||
         check_option->check_cols(1))
     {
       DBUG_RETURN(TRUE);
@@ -4047,11 +4520,8 @@ bool TABLE_LIST::prepare_view_securety_context(THD *thd)
   {
     DBUG_PRINT("info", ("This table is suid view => load contest"));
     DBUG_ASSERT(view && view_sctx);
-    if (acl_getroot_no_password(view_sctx,
-                                definer.user.str,
-                                definer.host.str,
-                                definer.host.str,
-                                thd->db))
+    if (acl_getroot(view_sctx, definer.user.str, definer.host.str,
+                                definer.host.str, thd->db))
     {
       if ((thd->lex->sql_command == SQLCOM_SHOW_CREATE) ||
           (thd->lex->sql_command == SQLCOM_SHOW_FIELDS))
@@ -4379,15 +4849,15 @@ void Field_iterator_table_ref::set_field_iterator()
     DBUG_ASSERT(table_ref->is_natural_join ||
                 table_ref->nested_join ||
                 (table_ref->join_columns &&
-                /* This is a merge view. */
-                ((table_ref->field_translation &&
-                  table_ref->join_columns->elements ==
-                  (ulong)(table_ref->field_translation_end -
-                          table_ref->field_translation)) ||
-                 /* This is stored table or a tmptable view. */
-                 (!table_ref->field_translation &&
-                  table_ref->join_columns->elements ==
-                  table_ref->table->s->fields))));
+                 /* This is a merge view. */
+                 ((table_ref->field_translation &&
+                   table_ref->join_columns->elements ==
+                   (ulong)(table_ref->field_translation_end -
+                           table_ref->field_translation)) ||
+                  /* This is stored table or a tmptable view. */
+                  (!table_ref->field_translation &&
+                   table_ref->join_columns->elements ==
+                   table_ref->table->s->fields))));
     field_it= &natural_join_it;
     DBUG_PRINT("info",("field_it for '%s' is Field_iterator_natural_join",
                        table_ref->alias));
@@ -4646,9 +5116,10 @@ void TABLE::clear_column_bitmaps()
     Reset column read/write usage. It's identical to:
     bitmap_clear_all(&table->def_read_set);
     bitmap_clear_all(&table->def_write_set);
+    bitmap_clear_all(&table->def_vcol_set);
   */
-  bzero((char*) def_read_set.bitmap, s->column_bitmap_size*2);
-  column_bitmaps_set(&def_read_set, &def_write_set);
+  bzero((char*) def_read_set.bitmap, s->column_bitmap_size*3);
+  column_bitmaps_set(&def_read_set, &def_write_set, &def_vcol_set);
 }
 
 
@@ -4691,7 +5162,7 @@ void TABLE::mark_columns_used_by_index(uint index)
   MY_BITMAP *bitmap= &tmp_set;
   DBUG_ENTER("TABLE::mark_columns_used_by_index");
 
-  set_keyread(TRUE);
+  enable_keyread();
   bitmap_clear_all(bitmap);
   mark_columns_used_by_index_no_reset(index, bitmap);
   column_bitmaps_set(bitmap, bitmap);
@@ -4712,7 +5183,7 @@ void TABLE::add_read_columns_used_by_index(uint index)
   MY_BITMAP *bitmap= &tmp_set;
   DBUG_ENTER("TABLE::add_read_columns_used_by_index");
 
-  set_keyread(TRUE);
+  enable_keyread();
   bitmap_copy(bitmap, read_set);
   mark_columns_used_by_index_no_reset(index, bitmap);
   column_bitmaps_set(bitmap, write_set);
@@ -4735,7 +5206,7 @@ void TABLE::restore_column_maps_after_mark_index()
 {
   DBUG_ENTER("TABLE::restore_column_maps_after_mark_index");
 
-  set_keyread(FALSE);
+  disable_keyread();
   default_column_bitmaps();
   file->column_bitmaps_signal();
   DBUG_VOID_RETURN;
@@ -4753,7 +5224,14 @@ void TABLE::mark_columns_used_by_index_no_reset(uint index,
   KEY_PART_INFO *key_part_end= (key_part +
                                 key_info[index].key_parts);
   for (;key_part != key_part_end; key_part++)
+  {
     bitmap_set_bit(bitmap, key_part->fieldnr-1);
+    if (key_part->field->vcol_info &&
+        key_part->field->vcol_info->expr_item)
+      key_part->field->vcol_info->
+               expr_item->walk(&Item::register_field_in_bitmap, 
+                               1, (uchar *) bitmap);
+  }
 }
 
 
@@ -4880,6 +5358,8 @@ void TABLE::mark_columns_needed_for_update()
       file->column_bitmaps_signal();
     }
   }
+  /* Mark all virtual columns needed for update */
+  mark_virtual_columns_for_write(FALSE);
   DBUG_VOID_RETURN;
 }
 
@@ -4906,6 +5386,232 @@ void TABLE::mark_columns_needed_for_insert()
   }
   if (found_next_number_field)
     mark_auto_increment_column();
+  /* Mark virtual columns for insert */
+  mark_virtual_columns_for_write(TRUE);
+}
+
+
+/*
+   @brief Mark a column as virtual used by the query
+
+   @param field           the field for the column to be marked
+
+   @details
+     The function marks the column for 'field' as virtual (computed)
+     in the bitmap vcol_set.
+     If the column is marked for the first time the expression to compute
+     the column is traversed and all columns that are occurred there are
+     marked in the read_set of the table.
+
+   @retval
+     TRUE       if column is marked for the first time
+   @retval
+     FALSE      otherwise
+*/
+
+bool st_table::mark_virtual_col(Field *field)
+{
+  bool res;
+  DBUG_ASSERT(field->vcol_info);
+  if (!(res= bitmap_fast_test_and_set(vcol_set, field->field_index)))
+  {
+    Item *vcol_item= field->vcol_info->expr_item;
+    DBUG_ASSERT(vcol_item);
+    vcol_item->walk(&Item::register_field_in_read_map, 1, (uchar *) 0);
+  }
+  return res;
+}
+
+
+/* 
+  @brief Mark virtual columns for update/insert commands
+    
+  @param insert_fl    <-> virtual columns are marked for insert command 
+
+  @details
+    The function marks virtual columns used in a update/insert commands
+    in the vcol_set bitmap.
+    For an insert command a virtual column is always marked in write_set if
+    it is a stored column.
+    If a virtual column is from  write_set it is always marked in vcol_set.
+    If a stored virtual column is not from write_set but it is computed
+    through columns from write_set it is also marked in vcol_set, and,
+    besides, it is added to write_set. 
+
+  @return       void
+
+  @note
+    Let table t1 have columns a,b,c and let column c be a stored virtual 
+    column computed through columns a and b. Then for the query
+      UPDATE t1 SET a=1
+    column c will be placed into vcol_set and into write_set while
+    column b will be placed into read_set.
+    If column c was a virtual column, but not a stored virtual column
+    then it would not be added to any of the sets. Column b would not
+    be added to read_set either.           
+*/
+
+void st_table::mark_virtual_columns_for_write(bool insert_fl)
+{
+  Field **vfield_ptr, *tmp_vfield;
+  bool bitmap_updated= FALSE;
+
+  for (vfield_ptr= vfield; *vfield_ptr; vfield_ptr++)
+  {
+    tmp_vfield= *vfield_ptr;
+    if (bitmap_is_set(write_set, tmp_vfield->field_index))
+      bitmap_updated= mark_virtual_col(tmp_vfield);
+    else if (tmp_vfield->stored_in_db)
+    {
+      bool mark_fl= insert_fl;
+      if (!mark_fl)
+      {
+        MY_BITMAP *save_read_set;
+        Item *vcol_item= tmp_vfield->vcol_info->expr_item;
+        DBUG_ASSERT(vcol_item);
+        bitmap_clear_all(&tmp_set);
+        save_read_set= read_set;
+        read_set= &tmp_set;
+        vcol_item->walk(&Item::register_field_in_read_map, 1, (uchar *) 0);
+        read_set= save_read_set;
+        bitmap_intersect(&tmp_set, write_set);
+        mark_fl= !bitmap_is_clear_all(&tmp_set);
+      }
+      if (mark_fl)
+      {
+        bitmap_set_bit(write_set, tmp_vfield->field_index);
+        mark_virtual_col(tmp_vfield);
+        bitmap_updated= TRUE;
+      }
+    } 
+  }
+  if (bitmap_updated)
+    file->column_bitmaps_signal();
+}
+
+
+/**
+  Allocate space for keys
+
+  @param key_count  number of keys to allocate.
+
+  @details
+  Allocates space enough to fit 'key_count' keys for this table.
+
+  @return FALSE space was successfully allocated.
+  @return TRUE an error occur.
+*/
+
+bool TABLE::alloc_keys(uint key_count)
+{
+  DBUG_ASSERT(!s->keys);
+  key_info= s->key_info= (KEY*) alloc_root(&mem_root, sizeof(KEY)*key_count);
+  max_keys= key_count;
+  return !(key_info);
+}
+
+
+/**
+  Add a key to a temporary  table
+
+  @param key            the number of the key
+  @param key_parts      number of components of the key
+  @param next_field_no  the call-back function that returns the number of
+                        the field used as the next component of the key
+  @param arg            the argument for the above function
+  @param unique         Is it unique index
+
+  @details
+  The function adds a new key to the table that is assumed to be
+  temprary table. The call-back function must at each call must return
+  the number of the field that used as next component of this key
+
+  @return FALSE is a success
+  @return TRUE if a failure
+*/
+
+bool TABLE::add_tmp_key(uint key, uint key_parts,
+                        uint (*next_field_no) (uchar *), uchar *arg,
+                        bool unique)
+{
+  DBUG_ASSERT(key < max_keys);
+
+  char buf[NAME_CHAR_LEN];
+  KEY* keyinfo;
+  Field **reg_field;
+  uint i;
+  bool key_start= TRUE;
+  KEY_PART_INFO* key_part_info=
+      (KEY_PART_INFO*) alloc_root(&mem_root, sizeof(KEY_PART_INFO)*key_parts);
+  if (!key_part_info)
+    return TRUE;
+  keyinfo= key_info + key;
+  keyinfo->key_part= key_part_info;
+  keyinfo->usable_key_parts= keyinfo->key_parts = key_parts;
+  keyinfo->key_length=0;
+  keyinfo->algorithm= HA_KEY_ALG_UNDEF;
+  keyinfo->flags= HA_GENERATED_KEY;
+  if (unique)
+    keyinfo->flags|= HA_NOSAME;
+  sprintf(buf, "key%i", key);
+  if (!(keyinfo->name= strdup_root(&mem_root, buf)))
+    return TRUE;
+  keyinfo->rec_per_key= (ulong*) alloc_root(&mem_root,
+                                            sizeof(ulong)*key_parts);
+  if (!keyinfo->rec_per_key)
+    return TRUE;
+  bzero(keyinfo->rec_per_key, sizeof(ulong)*key_parts);
+  for (i= 0; i < key_parts; i++)
+  {
+    reg_field= field + next_field_no(arg);
+    if (key_start)
+      (*reg_field)->key_start.set_bit(key);
+    key_start= FALSE;
+      (*reg_field)->part_of_key.set_bit(key);
+    (*reg_field)->flags|= PART_KEY_FLAG;
+    key_part_info->null_bit= (*reg_field)->null_bit;
+    key_part_info->null_offset= (uint) ((*reg_field)->null_ptr -
+                                          (uchar*) record[0]);
+    key_part_info->field=    *reg_field;
+    key_part_info->offset=   (*reg_field)->offset(record[0]);
+    key_part_info->length=   (uint16) (*reg_field)->pack_length();
+    keyinfo->key_length+= key_part_info->length;
+    key_part_info->key_part_flag= 0;
+    /* TODO:
+      The below method of computing the key format length of the
+      key part is a copy/paste from opt_range.cc, and table.cc.
+      This should be factored out, e.g. as a method of Field.
+      In addition it is not clear if any of the Field::*_length
+      methods is supposed to compute the same length. If so, it
+      might be reused.
+    */
+    key_part_info->store_length= key_part_info->length;
+
+    if ((*reg_field)->real_maybe_null())
+    {
+      key_part_info->store_length+= HA_KEY_NULL_LENGTH;
+      keyinfo->key_length+= HA_KEY_NULL_LENGTH;
+      if (unique)
+        keyinfo->flags|= HA_NULL_ARE_EQUAL;     // def. that NULL == NULL
+    }
+    if ((*reg_field)->type() == MYSQL_TYPE_BLOB || 
+        (*reg_field)->real_type() == MYSQL_TYPE_VARCHAR)
+    {
+      key_part_info->store_length+= HA_KEY_BLOB_LENGTH;
+      keyinfo->key_length+= HA_KEY_BLOB_LENGTH; // ???
+    }
+
+    key_part_info->type=     (uint8) (*reg_field)->key_type();
+    key_part_info->key_type =
+      ((ha_base_keytype) key_part_info->type == HA_KEYTYPE_TEXT ||
+       (ha_base_keytype) key_part_info->type == HA_KEYTYPE_VARTEXT1 ||
+       (ha_base_keytype) key_part_info->type == HA_KEYTYPE_VARTEXT2) ?
+      0 : FIELDFLAG_BINARY;
+    key_part_info++;
+  }
+  set_if_bigger(s->max_key_length, keyinfo->key_length);
+  s->keys++;
+  return FALSE;
 }
 
 
@@ -5231,6 +5937,56 @@ bool is_simple_order(ORDER *order)
   return TRUE;
 }
 
+/*
+  @brief Compute values for virtual columns used in query
+
+  @param  thd              Thread handle
+  @param  table            The TABLE object
+  @param  for_write        Requests to compute only fields needed for write   
+  
+  @details
+    The function computes the values of the virtual columns of the table and
+    stores them in the table record buffer.
+    Only fields from vcol_set are computed, and, when the flag for_write is not
+    set to TRUE, a virtual field is computed only if it's not stored.
+    The flag for_write is set to TRUE for row insert/update operations. 
+ 
+  @retval
+    0    Success
+  @retval
+    >0   Error occurred when storing a virtual field value
+*/
+
+int update_virtual_fields(THD *thd, TABLE *table, bool for_write)
+{
+  DBUG_ENTER("update_virtual_fields");
+  Field **vfield_ptr, *vfield;
+  int error= 0;
+  if (!table || !table->vfield)
+    DBUG_RETURN(0);
+
+  thd->reset_arena_for_cached_items(table->expr_arena);
+  /* Iterate over virtual fields in the table */
+  for (vfield_ptr= table->vfield; *vfield_ptr; vfield_ptr++)
+  {
+    vfield= (*vfield_ptr);
+    DBUG_ASSERT(vfield->vcol_info && vfield->vcol_info->expr_item);
+    /* Only update those fields that are marked in the vcol_set bitmap */
+    if (bitmap_is_set(table->vcol_set, vfield->field_index) &&
+        (for_write || !vfield->stored_in_db))
+    {
+      /* Compute the actual value of the virtual fields */
+      error= vfield->vcol_info->expr_item->save_in_field(vfield, 0);
+      DBUG_PRINT("info", ("field '%s' - updated", vfield->field_name));
+    }
+    else
+    {
+      DBUG_PRINT("info", ("field '%s' - skipped", vfield->field_name));
+    }
+  }
+  thd->reset_arena_for_cached_items(0);
+  DBUG_RETURN(0);
+}
 
 /*****************************************************************************
 ** Instansiate templates
diff --git a/sql/table.h b/sql/table.h
index 93b91b3a3d4..2ba4b9dccd3 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -176,6 +176,9 @@ protected:
   CHARSET_INFO *m_connection_cl;
 };
 
+class Query_arena;
+
+/*************************************************************************/
 
 /**
  View_creation_ctx -- creation context of view objects.
@@ -573,6 +576,9 @@ struct TABLE_SHARE
   I_P_List <TABLE, TABLE_share> used_tables;
   I_P_List <TABLE, TABLE_share> free_tables;
 
+  engine_option_value *option_list;     /* text options for table */
+  void *option_struct;                  /* structure with parsed options */
+
   /* The following is copied to each TABLE on OPEN */
   Field **field;
   Field **found_next_number_field;
@@ -612,6 +618,8 @@ struct TABLE_SHARE
   ulong   avg_row_length;		/* create information */
   ulong   version, mysql_version;
   ulong   reclength;			/* Recordlength */
+  /* Stored record length. No generated-only virtual fields are included */
+  ulong   stored_rec_length;            
 
   plugin_ref db_plugin;			/* storage engine plugin */
   inline handlerton *db_type() const	/* table_type for handler */
@@ -622,11 +630,23 @@ struct TABLE_SHARE
   enum row_type row_type;		/* How rows are stored */
   enum tmp_table_type tmp_table;
 
+  /** Transactional or not. */
+  enum ha_choice transactional;
+  /** Per-page checksums or not. */
+  enum ha_choice page_checksum;
+
   uint ref_count;                       /* How many TABLE objects uses this */
   uint blob_ptr_size;			/* 4 or 8 */
   uint key_block_size;			/* create key_block_size, if used */
   uint null_bytes, last_null_bit_pos;
+  /*
+    Same as null_bytes, except that if there is only a 'delete-marker' in
+    the record then this value is 0.
+  */
+  uint null_bytes_for_compare;
   uint fields;				/* Number of fields */
+  /* Number of stored fields, generated-only virtual fields are not included */
+  uint stored_fields;                   
   uint rec_buff_length;                 /* Size of table->record[] buffer */
   uint keys, key_parts;
   uint max_key_length, max_unique_length, total_key_length;
@@ -647,12 +667,16 @@ struct TABLE_SHARE
   uint error, open_errno, errarg;       /* error from open_table_def() */
   uint column_bitmap_size;
   uchar frm_version;
+  uint vfields;                         /* Number of computed (virtual) fields */
   bool null_field_first;
   bool system;                          /* Set if system table (one record) */
   bool crypted;                         /* If .frm file is crypted */
   bool db_low_byte_first;		/* Portable row format */
   bool crashed;
   bool is_view;
+  bool deleting;                        /* going to delete this table */
+  bool can_cmp_whole_record;
+#warning look at can_cmp_whole_record
   ulong table_map_id;                   /* for row-based replication */
 
   /*
@@ -948,6 +972,7 @@ public:
   Field *next_number_field;		/* Set if next_number is activated */
   Field *found_next_number_field;	/* Set on open */
   Field_timestamp *timestamp_field;
+  Field **vfield;                       /* Pointer to virtual fields*/
 
   /* Table's triggers, 0 if there are no of them */
   Table_triggers_list *triggers;
@@ -958,8 +983,8 @@ public:
   const char	*alias;            	  /* alias or table name */
   uchar		*null_flags;
   my_bitmap_map	*bitmap_init_value;
-  MY_BITMAP     def_read_set, def_write_set, tmp_set; /* containers */
-  MY_BITMAP     *read_set, *write_set;          /* Active column sets */
+  MY_BITMAP     def_read_set, def_write_set, def_vcol_set, tmp_set; 
+  MY_BITMAP     *read_set, *write_set, *vcol_set; /* Active column sets */
   /*
    The ID of the query that opened and is using this table. Has different
    meanings depending on the table type.
@@ -1024,10 +1049,11 @@ public:
   uint          temp_pool_slot;		/* Used by intern temp tables */
   uint		status;                 /* What's in record[0] */
   uint		db_stat;		/* mode of file as in handler.h */
+  uint          max_keys;               /* Size of allocated key_info array. */
   /* number of select if it is derived table */
   uint          derived_select_number;
   int		current_lock;           /* Type of lock on table */
-  my_bool copy_blobs;			/* copy_blobs when storing */
+  bool copy_blobs;			/* copy_blobs when storing */
 
   /*
     0 or JOIN_TYPE_{LEFT|RIGHT}. Currently this is only compared to 0.
@@ -1039,56 +1065,64 @@ public:
     If true, the current table row is considered to have all columns set to 
     NULL, including columns declared as "not null" (see maybe_null).
   */
-  my_bool null_row;
+  bool null_row;
 
   /*
     TODO: Each of the following flags take up 8 bits. They can just as easily
     be put into one single unsigned long and instead of taking up 18
     bytes, it would take up 4.
   */
-  my_bool force_index;
+  bool force_index;
 
   /**
     Flag set when the statement contains FORCE INDEX FOR ORDER BY
     See TABLE_LIST::process_index_hints().
   */
-  my_bool force_index_order;
+  bool force_index_order;
 
   /**
     Flag set when the statement contains FORCE INDEX FOR GROUP BY
     See TABLE_LIST::process_index_hints().
   */
-  my_bool force_index_group;
-  my_bool distinct,const_table,no_rows;
+  bool force_index_group;
+  bool distinct,const_table,no_rows;
 
   /**
      If set, the optimizer has found that row retrieval should access index 
      tree only.
    */
-  my_bool key_read;
-  my_bool no_keyread;
-  my_bool locked_by_logger;
-  my_bool no_replicate;
-  my_bool locked_by_name;
-  my_bool fulltext_searched;
-  my_bool no_cache;
+  bool key_read;
+  bool no_keyread;
+  bool locked_by_logger;
+  bool no_replicate;
+  bool locked_by_name;
+  bool fulltext_searched;
+  bool no_cache;
   /* To signal that the table is associated with a HANDLER statement */
-  my_bool open_by_handler;
+  bool open_by_handler;
   /*
     To indicate that a non-null value of the auto_increment field
     was provided by the user or retrieved from the current record.
     Used only in the MODE_NO_AUTO_VALUE_ON_ZERO mode.
   */
-  my_bool auto_increment_field_not_null;
-  my_bool insert_or_update;             /* Can be used by the handler */
-  my_bool alias_name_used;		/* true if table_name is alias */
-  my_bool get_fields_in_item_tree;      /* Signal to fix_field */
-  my_bool m_needs_reopen;
+  bool auto_increment_field_not_null;
+  bool insert_or_update;             /* Can be used by the handler */
+  bool alias_name_used;		/* true if table_name is alias */
+  bool get_fields_in_item_tree;      /* Signal to fix_field */
+  bool m_needs_reopen;
 
   REGINFO reginfo;			/* field connections */
   MEM_ROOT mem_root;
   GRANT_INFO grant;
   FILESORT_INFO sort;
+  /*
+    The arena which the items for expressions from the table definition
+    are associated with.  
+    Currently only the items of the expressions for virtual columns are
+    associated with this arena.
+    TODO: To attach the partitioning expressions to this arena.  
+  */
+  Query_arena *expr_arena;
 #ifdef WITH_PARTITION_STORAGE_ENGINE
   partition_info *part_info;            /* Partition related information */
   bool no_partitions_used; /* If true, all partitions have been pruned away */
@@ -1107,6 +1141,8 @@ public:
   void mark_columns_needed_for_update(void);
   void mark_columns_needed_for_delete(void);
   void mark_columns_needed_for_insert(void);
+  bool mark_virtual_col(Field *field);
+  void mark_virtual_columns_for_write(bool insert_fl);
   inline void column_bitmaps_set(MY_BITMAP *read_set_arg,
                                  MY_BITMAP *write_set_arg)
   {
@@ -1115,12 +1151,30 @@ public:
     if (file)
       file->column_bitmaps_signal();
   }
+  inline void column_bitmaps_set(MY_BITMAP *read_set_arg,
+                                 MY_BITMAP *write_set_arg,
+                                 MY_BITMAP *vcol_set_arg)
+  {
+    read_set= read_set_arg;
+    write_set= write_set_arg;
+    vcol_set= vcol_set_arg;
+    if (file)
+      file->column_bitmaps_signal();
+  }
   inline void column_bitmaps_set_no_signal(MY_BITMAP *read_set_arg,
                                            MY_BITMAP *write_set_arg)
   {
     read_set= read_set_arg;
     write_set= write_set_arg;
   }
+  inline void column_bitmaps_set_no_signal(MY_BITMAP *read_set_arg,
+                                           MY_BITMAP *write_set_arg,
+                                           MY_BITMAP *vcol_set_arg)
+  {
+    read_set= read_set_arg;
+    write_set= write_set_arg;
+    vcol_set= vcol_set_arg;
+  }
   inline void use_all_columns()
   {
     column_bitmaps_set(&s->all_set, &s->all_set);
@@ -1129,24 +1183,33 @@ public:
   {
     read_set= &def_read_set;
     write_set= &def_write_set;
+    vcol_set= &def_vcol_set;
   }
   /** Should this instance of the table be reopened? */
   inline bool needs_reopen()
   { return !db_stat || m_needs_reopen; }
 
-  inline void set_keyread(bool flag)
+  bool alloc_keys(uint key_count);
+  bool add_tmp_key(uint key, uint key_parts,
+                   uint (*next_field_no) (uchar *), uchar *arg,
+                   bool unique);
+  inline void enable_keyread()
   {
-    DBUG_ASSERT(file);
-    if (flag && !key_read)
-    {
-      key_read= 1;
-      file->extra(HA_EXTRA_KEYREAD);
-    }
-    else if (!flag && key_read)
+    DBUG_ENTER("enable_keyread");
+    DBUG_ASSERT(key_read == 0);
+    key_read= 1;
+    file->extra(HA_EXTRA_KEYREAD);
+    DBUG_VOID_RETURN;
+  }
+  inline void disable_keyread()
+  {
+    DBUG_ENTER("disable_keyread");
+    if (key_read)
     {
       key_read= 0;
       file->extra(HA_EXTRA_NO_KEYREAD);
     }
+    DBUG_VOID_RETURN;
   }
 
   bool update_const_key_parts(COND *conds);
@@ -1334,6 +1397,11 @@ enum enum_open_type
 };
 
 
+class SJ_MATERIALIZATION_INFO;
+class Index_hint;
+class Item_in_subselect;
+
+
 /*
   Table reference in the FROM clause.
 
@@ -1406,6 +1474,20 @@ struct TABLE_LIST
   char		*db, *alias, *table_name, *schema_table_name;
   char          *option;                /* Used by cache index  */
   Item		*on_expr;		/* Used with outer join */
+
+  Item          *sj_on_expr;
+  /*
+    (Valid only for semi-join nests) Bitmap of tables that are within the
+    semi-join (this is different from bitmap of all nest's children because
+    tables that were pulled out of the semi-join nest remain listed as
+    nest's children).
+  */
+  table_map     sj_inner_tables;
+  /* Number of IN-compared expressions */
+  uint          sj_in_exprs; 
+  Item_in_subselect  *sj_subq_pred;
+  SJ_MATERIALIZATION_INFO *sj_mat_info;
+
   /*
     The structure of ON expression presented in the member above
     can be changed during certain optimizations. This member
@@ -1643,6 +1725,7 @@ struct TABLE_LIST
   */
   bool          is_fqtn;
 
+  bool          deleting;               /* going to delete this table */
 
   /* View creation context. */
 
@@ -1695,7 +1778,8 @@ struct TABLE_LIST
   {
     return derived || view || schema_table || !table;
   }
-  void print(THD *thd, String *str, enum_query_type query_type);
+  void print(THD *thd, table_map eliminated_tables, String *str, 
+             enum_query_type query_type);
   bool check_single_table(TABLE_LIST **table, table_map map,
                           TABLE_LIST *view);
   bool set_insert_values(MEM_ROOT *mem_root);
@@ -1928,7 +2012,11 @@ public:
 typedef struct st_nested_join
 {
   List<TABLE_LIST>  join_list;       /* list of elements in the nested join */
-  table_map         used_tables;     /* bitmap of tables in the nested join */
+  /* 
+    Bitmap of tables within this nested join (including those embedded within
+    its children), including tables removed by table elimination.
+  */
+  table_map         used_tables;
   table_map         not_null_tables; /* tables that rejects nulls           */
   /**
     Used for pointing out the first table in the plan being covered by this
@@ -1943,7 +2031,20 @@ typedef struct st_nested_join
     Before each use the counters are zeroed by reset_nj_counters.
   */
   uint              counter;
+  /*
+    Number of elements in join_list that were not (or contain table(s) that 
+    weren't) removed by table elimination.
+  */
+  uint              n_tables;
   nested_join_map   nj_map;          /* Bit used to identify this nested join*/
+  /*
+    (Valid only for semi-join nests) Bitmap of tables outside the semi-join
+    that are used within the semi-join's ON condition.
+  */
+  table_map         sj_depends_on;
+  /* Outer non-trivially correlated tables */
+  table_map         sj_corr_tables;
+  List<Item>        sj_outer_expr_list;
   /**
      True if this join nest node is completely covered by the query execution
      plan. This means two things.
diff --git a/sql/thr_malloc.cc b/sql/thr_malloc.cc
index 79a6fd79d4c..bbcfeeda3e8 100644
--- a/sql/thr_malloc.cc
+++ b/sql/thr_malloc.cc
@@ -67,11 +67,13 @@ void init_sql_alloc(MEM_ROOT *mem_root, uint block_size, uint pre_alloc)
 }
 
 
+#ifndef MYSQL_CLIENT
 void *sql_alloc(size_t Size)
 {
   MEM_ROOT *root= *my_pthread_getspecific_ptr(MEM_ROOT**,THR_MALLOC);
   return alloc_root(root,Size);
 }
+#endif
 
 
 void *sql_calloc(size_t size)
diff --git a/sql/tztime.cc b/sql/tztime.cc
index a2f319d4307..1a99417fdca 100644
--- a/sql/tztime.cc
+++ b/sql/tztime.cc
@@ -456,7 +456,7 @@ prepare_tz_info(TIME_ZONE_INFO *sp, MEM_ROOT *storage)
 
     if (end_t == MY_TIME_T_MAX ||
         ((cur_off_and_corr > 0) &&
-        (end_t >= MY_TIME_T_MAX - cur_off_and_corr)))
+         (end_t >= MY_TIME_T_MAX - cur_off_and_corr)))
       /* end of t space */
       break;
 
@@ -1716,7 +1716,7 @@ my_tz_init(THD *org_thd, const char *default_tzname, my_bool bootstrap)
 
   tz_leapcnt= 0;
 
-  res= table->file->index_first(table->record[0]);
+  res= table->file->ha_index_first(table->record[0]);
 
   while (!res)
   {
@@ -1738,7 +1738,7 @@ my_tz_init(THD *org_thd, const char *default_tzname, my_bool bootstrap)
                 tz_leapcnt, (ulong) tz_lsis[tz_leapcnt-1].ls_trans,
                 tz_lsis[tz_leapcnt-1].ls_corr));
 
-    res= table->file->index_next(table->record[0]);
+    res= table->file->ha_index_next(table->record[0]);
   }
 
   (void)table->file->ha_index_end();
@@ -1854,6 +1854,8 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables)
   uint tzid, ttid;
   my_time_t ttime;
   char buff[MAX_FIELD_WIDTH];
+  uchar keybuff[32];
+  Field *field;
   String abbr(buff, sizeof(buff), &my_charset_latin1);
   char *alloc_buff, *tz_name_buff;
   /*
@@ -1906,8 +1908,8 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables)
   */
   (void)table->file->ha_index_init(0, 1);
 
-  if (table->file->index_read_map(table->record[0], table->field[0]->ptr,
-                                  HA_WHOLE_KEY, HA_READ_KEY_EXACT))
+  if (table->file->ha_index_read_map(table->record[0], table->field[0]->ptr,
+                                     HA_WHOLE_KEY, HA_READ_KEY_EXACT))
   {
 #ifdef EXTRA_DEBUG
     /*
@@ -1931,11 +1933,16 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables)
   */
   table= tz_tables->table;
   tz_tables= tz_tables->next_local;
-  table->field[0]->store((longlong) tzid, TRUE);
+  field= table->field[0];
+  field->store((longlong) tzid, TRUE);
+  DBUG_ASSERT(field->key_length() <= sizeof(keybuff));
+  field->get_key_image(keybuff,
+                       min(field->key_length(), sizeof(keybuff)),
+                       Field::itRAW);
   (void)table->file->ha_index_init(0, 1);
 
-  if (table->file->index_read_map(table->record[0], table->field[0]->ptr,
-                                  HA_WHOLE_KEY, HA_READ_KEY_EXACT))
+  if (table->file->ha_index_read_map(table->record[0], keybuff,
+                                     HA_WHOLE_KEY, HA_READ_KEY_EXACT))
   {
     sql_print_error("Can't find description of time zone '%u'", tzid);
     goto end;
@@ -1958,11 +1965,16 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables)
   */
   table= tz_tables->table;
   tz_tables= tz_tables->next_local;
-  table->field[0]->store((longlong) tzid, TRUE);
+  field= table->field[0];
+  field->store((longlong) tzid, TRUE);
+  DBUG_ASSERT(field->key_length() <= sizeof(keybuff));
+  field->get_key_image(keybuff,
+                       min(field->key_length(), sizeof(keybuff)),
+                       Field::itRAW);
   (void)table->file->ha_index_init(0, 1);
 
-  res= table->file->index_read_map(table->record[0], table->field[0]->ptr,
-                                   (key_part_map)1, HA_READ_KEY_EXACT);
+  res= table->file->ha_index_read_map(table->record[0], keybuff,
+                                      (key_part_map)1, HA_READ_KEY_EXACT);
   while (!res)
   {
     ttid= (uint)table->field[1]->val_int();
@@ -2009,8 +2021,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables)
 
     tmp_tz_info.typecnt= ttid + 1;
 
-    res= table->file->index_next_same(table->record[0],
-                                      table->field[0]->ptr, 4);
+    res= table->file->ha_index_next_same(table->record[0], keybuff, 4);
   }
 
   if (res != HA_ERR_END_OF_FILE)
@@ -2032,8 +2043,8 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables)
   table->field[0]->store((longlong) tzid, TRUE);
   (void)table->file->ha_index_init(0, 1);
 
-  res= table->file->index_read_map(table->record[0], table->field[0]->ptr,
-                                   (key_part_map)1, HA_READ_KEY_EXACT);
+  res= table->file->ha_index_read_map(table->record[0], keybuff,
+                                      (key_part_map)1, HA_READ_KEY_EXACT);
   while (!res)
   {
     ttime= (my_time_t)table->field[1]->val_int();
@@ -2062,8 +2073,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables)
       ("time_zone_transition table: tz_id: %u  tt_time: %lu  tt_id: %u",
        tzid, (ulong) ttime, ttid));
 
-    res= table->file->index_next_same(table->record[0],
-                                      table->field[0]->ptr, 4);
+    res= table->file->ha_index_next_same(table->record[0], keybuff, 4);
   }
 
   /*
diff --git a/sql/udf_example.c b/sql/udf_example.c
index 36828bdf94d..e0b72c99d7a 100644
--- a/sql/udf_example.c
+++ b/sql/udf_example.c
@@ -211,7 +211,7 @@ char *is_const(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long
 **		try to keep the error message less than 80 bytes long!
 **
 ** This function should return 1 if something goes wrong. In this case
-** message should contain something usefull!
+** message should contain something useful!
 **************************************************************************/
 
 #define MAXMETAPH 8
diff --git a/sql/udf_example.def b/sql/udf_example.def
index 3d569941cc8..41150b24e8f 100644
--- a/sql/udf_example.def
+++ b/sql/udf_example.def
@@ -3,8 +3,10 @@ VERSION		1.0
 EXPORTS
   lookup
   lookup_init
+  lookup_deinit
   reverse_lookup
   reverse_lookup_init
+  reverse_lookup_deinit
   metaphon_init
   metaphon_deinit
   metaphon
diff --git a/sql/uniques.cc b/sql/uniques.cc
index 690b310bde5..f5ab50c7c90 100644
--- a/sql/uniques.cc
+++ b/sql/uniques.cc
@@ -426,7 +426,7 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size,
   if (end <= begin ||
       merge_buffer_size < (ulong) (key_length * (end - begin + 1)) ||
       init_queue(&queue, (uint) (end - begin), offsetof(BUFFPEK, key), 0,
-                 buffpek_compare, &compare_context))
+                 buffpek_compare, &compare_context, 0, 0))
     return 1;
   /* we need space for one key when a piece of merge buffer is re-read */
   merge_buffer_size-= key_length;
@@ -471,7 +471,7 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size,
     */
     top->key+= key_length;
     if (--top->mem_count)
-      queue_replaced(&queue);
+      queue_replace_top(&queue);
     else /* next piece should be read */
     {
       /* save old_key not to overwrite it in read_to_buffer */
@@ -481,14 +481,14 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size,
       if (bytes_read == (uint) (-1))
         goto end;
       else if (bytes_read > 0)      /* top->key, top->mem_count are reset */
-        queue_replaced(&queue);     /* in read_to_buffer */
+        queue_replace_top(&queue);             /* in read_to_buffer */
       else
       {
         /*
           Tree for old 'top' element is empty: remove it from the queue and
           give all its memory to the nearest tree.
         */
-        queue_remove(&queue, 0);
+        queue_remove_top(&queue);
         reuse_freed_buff(&queue, top, key_length);
       }
     }
@@ -606,9 +606,10 @@ bool Unique::get(TABLE *table)
   outfile=table->sort.io_cache=(IO_CACHE*) my_malloc(sizeof(IO_CACHE),
                                 MYF(MY_ZEROFILL));
 
-  if (!outfile || (! my_b_inited(outfile) &&
-      open_cached_file(outfile,mysql_tmpdir,TEMP_PREFIX,READ_RECORD_BUFFER,
-		       MYF(MY_WME))))
+  if (!outfile ||
+      (! my_b_inited(outfile) &&
+       open_cached_file(outfile,mysql_tmpdir,TEMP_PREFIX,READ_RECORD_BUFFER,
+                        MYF(MY_WME))))
     return 1;
   reinit_io_cache(outfile,WRITE_CACHE,0L,0,0);
 
diff --git a/sql/unireg.cc b/sql/unireg.cc
index 6433b8bc7c2..b4fbd2aea3e 100644
--- a/sql/unireg.cc
+++ b/sql/unireg.cc
@@ -28,11 +28,15 @@
 #include "sql_partition.h"                      // struct partition_info
 #include "sql_table.h"                          // check_duplicate_warning
 #include "sql_class.h"                  // THD, Internal_error_handler
+#include "create_options.h"
 #include <m_ctype.h>
 #include <assert.h>
 
 #define FCOMP			17		/* Bytes for a packed field */
 
+/* threshold for safe_alloca */
+#define ALLOCA_THRESHOLD       2048
+
 static uchar * pack_screens(List<Create_field> &create_fields,
 			    uint *info_length, uint *screens, bool small_file);
 static uint pack_keys(uchar *keybuff,uint key_count, KEY *key_info,
@@ -116,6 +120,7 @@ bool mysql_create_frm(THD *thd, const char *file_name,
   ulong key_buff_length;
   File file;
   ulong filepos, data_offset;
+  uint options_len;
   uchar fileinfo[64],forminfo[288],*keybuff;
   uchar *screen_buff;
   char buff[128];
@@ -166,8 +171,7 @@ bool mysql_create_frm(THD *thd, const char *file_name,
   reclength=uint2korr(forminfo+266);
 
   /* Calculate extra data segment length */
-  str_db_type.str= (char *) ha_resolve_storage_engine_name(create_info->db_type);
-  str_db_type.length= strlen(str_db_type.str);
+  str_db_type= *hton_name(create_info->db_type);
   /* str_db_type */
   create_info->extra_size= (2 + str_db_type.length +
                             2 + create_info->connect_string.length);
@@ -191,6 +195,19 @@ bool mysql_create_frm(THD *thd, const char *file_name,
     if (key_info[i].parser_name)
       create_info->extra_size+= key_info[i].parser_name->length + 1;
   }
+
+  options_len= engine_table_options_frm_length(create_info->option_list,
+                                               create_fields,
+                                               keys, key_info);
+  DBUG_PRINT("info", ("Options length: %u", options_len));
+  if (options_len)
+  {
+    create_info->table_options|= HA_OPTION_TEXT_CREATE_OPTIONS;
+    create_info->extra_size+= (options_len + 4);
+  }
+  else
+    create_info->table_options&= ~HA_OPTION_TEXT_CREATE_OPTIONS;
+
   /*
     This gives us the byte-position of the character at
     (character-position, not byte-position) TABLE_COMMENT_MAXLEN.
@@ -334,6 +351,7 @@ bool mysql_create_frm(THD *thd, const char *file_name,
     if (mysql_file_write(file, (uchar*) buff, 6, MYF_RW))
       goto err;
   }
+
   for (i= 0; i < keys; i++)
   {
     if (key_info[i].parser_name)
@@ -353,6 +371,24 @@ bool mysql_create_frm(THD *thd, const char *file_name,
       goto err;
   }
 
+  if (options_len)
+  {
+    uchar *optbuff= (uchar *)my_safe_alloca(options_len + 4, ALLOCA_THRESHOLD);
+    my_bool error;
+    DBUG_PRINT("info", ("Create options length: %u", options_len));
+    if (!optbuff)
+      goto err;
+    int4store(optbuff, options_len);
+    engine_table_options_frm_image(optbuff + 4,
+                                   create_info->option_list,
+                                   create_fields,
+                                   keys, key_info);
+    error= my_write(file, optbuff, options_len + 4, MYF_RW);
+    my_safe_afree(optbuff, options_len + 4, ALLOCA_THRESHOLD);
+    if (error)
+      goto err;
+  }
+
   mysql_file_seek(file, filepos, MY_SEEK_SET, MYF(0));
   if (mysql_file_write(file, forminfo, 288, MYF_RW) ||
       mysql_file_write(file, screen_buff, info_length, MYF_RW) ||
@@ -572,7 +608,7 @@ static uint pack_keys(uchar *keybuff, uint key_count, KEY *keyinfo,
     int2store(pos+6, key->block_size);
     pos+=8;
     key_parts+=key->key_parts;
-    DBUG_PRINT("loop", ("flags: %lu  key_parts: %d at 0x%lx",
+    DBUG_PRINT("loop", ("flags: %lu  key_parts: %d  key_part: 0x%lx",
                         key->flags, key->key_parts,
                         (long) key->key_part));
     for (key_part=key->key_part,key_part_end=key_part+key->key_parts ;
@@ -642,7 +678,7 @@ static bool pack_header(uchar *forminfo, enum legacy_db_type table_type,
 {
   uint length,int_count,int_length,no_empty, int_parts;
   uint time_stamp_pos,null_fields;
-  ulong reclength, totlength, n_length, com_length;
+  ulong reclength, totlength, n_length, com_length, vcol_info_length;
   DBUG_ENTER("pack_header");
 
   if (create_fields.elements > MAX_FIELDS)
@@ -653,8 +689,8 @@ static bool pack_header(uchar *forminfo, enum legacy_db_type table_type,
 
   totlength= 0L;
   reclength= data_offset;
-  no_empty=int_count=int_parts=int_length=time_stamp_pos=null_fields=
-    com_length=0;
+  no_empty=int_count=int_parts=int_length=time_stamp_pos=null_fields=0;
+  com_length=vcol_info_length=0;
   n_length=2L;
 
 	/* Check fields */
@@ -686,6 +722,30 @@ static bool pack_header(uchar *forminfo, enum legacy_db_type table_type,
                      ER_TOO_LONG_FIELD_COMMENT, warn_buff);
       field->comment.length= tmp_len;
     }
+    if (field->vcol_info)
+    {
+      tmp_len=
+        system_charset_info->cset->charpos(system_charset_info,
+                                           field->vcol_info->expr_str.str,
+                                           field->vcol_info->expr_str.str +
+                                           field->vcol_info->expr_str.length,
+                                           VIRTUAL_COLUMN_EXPRESSION_MAXLEN);
+
+      if (tmp_len < field->vcol_info->expr_str.length)
+      {
+        my_error(ER_WRONG_STRING_LENGTH, MYF(0),
+                 field->vcol_info->expr_str.str,"VIRTUAL COLUMN EXPRESSION",
+                 (uint) VIRTUAL_COLUMN_EXPRESSION_MAXLEN);
+        DBUG_RETURN(1);
+      }
+      /*
+        Sum up the length of the expression string and the length of the
+        mandatory header to the total length of info on the defining 
+        expressions saved in the frm file for virtual columns.
+      */
+      vcol_info_length+= field->vcol_info->expr_str.length+
+                         (uint)FRM_VCOL_HEADER_SIZE;
+    }
 
     totlength+= field->length;
     com_length+= field->comment.length;
@@ -705,8 +765,6 @@ static bool pack_header(uchar *forminfo, enum legacy_db_type table_type,
 	!time_stamp_pos)
       time_stamp_pos= (uint) field->offset+ (uint) data_offset + 1;
     length=field->pack_length;
-    /* Ensure we don't have any bugs when generating offsets */
-    DBUG_ASSERT(reclength == field->offset + data_offset);
     if ((uint) field->offset+ (uint) data_offset+ length > reclength)
       reclength=(uint) (field->offset+ data_offset + length);
     n_length+= (ulong) strlen(field->field_name)+1;
@@ -773,7 +831,8 @@ static bool pack_header(uchar *forminfo, enum legacy_db_type table_type,
   /* Hack to avoid bugs with small static rows in MySQL */
   reclength=max(file->min_record_length(table_options),reclength);
   if (info_length+(ulong) create_fields.elements*FCOMP+288+
-      n_length+int_length+com_length > 65535L || int_count > 255)
+      n_length+int_length+com_length+vcol_info_length > 65535L || 
+      int_count > 255)
   {
     my_message(ER_TOO_MANY_FIELDS, ER(ER_TOO_MANY_FIELDS), MYF(0));
     DBUG_RETURN(1);
@@ -781,7 +840,7 @@ static bool pack_header(uchar *forminfo, enum legacy_db_type table_type,
 
   bzero((char*)forminfo,288);
   length=(info_length+create_fields.elements*FCOMP+288+n_length+int_length+
-	  com_length);
+	  com_length+vcol_info_length);
   int2store(forminfo,length);
   forminfo[256] = (uint8) screens;
   int2store(forminfo+258,create_fields.elements);
@@ -798,7 +857,8 @@ static bool pack_header(uchar *forminfo, enum legacy_db_type table_type,
   int2store(forminfo+280,22);			/* Rows needed */
   int2store(forminfo+282,null_fields);
   int2store(forminfo+284,com_length);
-  /* Up to forminfo+288 is free to use for additional information */
+  int2store(forminfo+286,vcol_info_length);
+  /* forminfo+288 is free to use for additional information */
   DBUG_RETURN(0);
 } /* pack_header */
 
@@ -837,7 +897,7 @@ static bool pack_fields(File file, List<Create_field> &create_fields,
                         ulong data_offset)
 {
   reg2 uint i;
-  uint int_count, comment_length=0;
+  uint int_count, comment_length= 0, vcol_info_length=0;
   uchar buff[MAX_FIELD_WIDTH];
   Create_field *field;
   DBUG_ENTER("pack_fields");
@@ -850,6 +910,7 @@ static bool pack_fields(File file, List<Create_field> &create_fields,
   while ((field=it++))
   {
     uint recpos;
+    uint cur_vcol_expr_len= 0;
     buff[0]= (uchar) field->row;
     buff[1]= (uchar) field->col;
     buff[2]= (uchar) field->sc_length;
@@ -879,6 +940,17 @@ static bool pack_fields(File file, List<Create_field> &create_fields,
     {
       buff[11]= buff[14]= 0;			// Numerical
     }
+    if (field->vcol_info)
+    {
+      /* 
+        Use the interval_id place in the .frm file to store the length of
+        the additional data saved for the virtual field
+      */
+      buff[12]= cur_vcol_expr_len= field->vcol_info->expr_str.length +
+                (uint)FRM_VCOL_HEADER_SIZE;
+      vcol_info_length+= cur_vcol_expr_len+(uint)FRM_VCOL_HEADER_SIZE;
+      buff[13]= (uchar) MYSQL_TYPE_VIRTUAL;
+    }
     int2store(buff+15, field->comment.length);
     comment_length+= field->comment.length;
     set_if_bigger(int_count,field->interval_id);
@@ -973,6 +1045,34 @@ static bool pack_fields(File file, List<Create_field> &create_fields,
 	  DBUG_RETURN(1);
     }
   }
+  if (vcol_info_length)
+  {
+    it.rewind();
+    int_count=0;
+    while ((field=it++))
+    {
+      /*
+        Pack each virtual field as follows:
+        byte 1      = 1 (always 1 to allow for future extensions)
+        byte 2      = sql_type
+        byte 3      = flags (as of now, 0 - no flags, 1 - field is physically stored)
+        byte 4-...  = virtual column expression (text data)
+      */
+      if (field->vcol_info && field->vcol_info->expr_str.length)
+      {
+        buff[0]= (uchar)1;
+        buff[1]= (uchar) field->sql_type;
+        buff[2]= (uchar) field->stored_in_db;
+        if (my_write(file, buff, 3, MYF_RW))
+          DBUG_RETURN(1);
+        if (my_write(file,
+                     (uchar*) field->vcol_info->expr_str.str,
+                     field->vcol_info->expr_str.length,
+                     MYF_RW))
+          DBUG_RETURN(1);
+      }
+    }
+  }
   DBUG_RETURN(0);
 }
 
diff --git a/sql/unireg.h b/sql/unireg.h
index b897c887c89..028cb0546d8 100644
--- a/sql/unireg.h
+++ b/sql/unireg.h
@@ -163,6 +163,13 @@ typedef struct st_ha_create_information HA_CREATE_INFO;
 
 #define DEFAULT_KEY_CACHE_NAME "default"
 
+/* The length of the header part for each virtual column in the .frm file */
+#define FRM_VCOL_HEADER_SIZE 3
+
+/* Maximum length of the defining expression for a virtual columns */
+#define VIRTUAL_COLUMN_EXPRESSION_MAXLEN 255 - FRM_VCOL_HEADER_SIZE
+
+
 /* Include prototypes for unireg */
 
 #include "mysqld_error.h"
author	Sergei Golubchik <sergii@pisem.net>	2010-11-25 18:17:28 +0100
committer	Sergei Golubchik <sergii@pisem.net>	2010-11-25 18:17:28 +0100
commit	65ca700def99289cc31a7040537f5aa6e12bf485 (patch)
tree	97b3a07299b626c519da0e80c122b5b79b933914 /sql
parent	2ab57de38d13d927ddff2d51aed4af34e13998f5 (diff)
parent	6e5bcca7935d3c62f84bb640e5357664a210ee12 (diff)
download	mariadb-git-65ca700def99289cc31a7040537f5aa6e12bf485.tar.gz