1 files changed, 814 insertions, 153 deletions
diff --git a/sql/handler.h b/sql/handler.h
index 0b970a1349d..ee1731af563 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -1,8 +1,8 @@
 #ifndef HANDLER_INCLUDED
 #define HANDLER_INCLUDED
-
 /*
-   Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+   Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+   Copyright (c) 2009-2011 Monty Program Ab
 
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License
@@ -36,6 +36,10 @@
 #include <ft_global.h>
 #include <keycache.h>
 
+#if MAX_KEY > 128
+#error MAX_KEY is too large.  Values up to 128 are supported.
+#endif
+
 // the following is for checking tables
 
 #define HA_ADMIN_ALREADY_DONE	  1
@@ -116,7 +120,8 @@
 #define HA_CAN_FULLTEXT        (1 << 21)
 #define HA_CAN_SQL_HANDLER     (1 << 22)
 #define HA_NO_AUTO_INCREMENT   (1 << 23)
-#define HA_HAS_CHECKSUM        (1 << 24)
+/* Has automatic checksums and uses the old checksum format */
+#define HA_HAS_OLD_CHECKSUM    (1 << 24)
 /* Table data are stored in separate files (for lower_case_table_names) */
 #define HA_FILE_BASED	       (1 << 26)
 #define HA_NO_VARCHAR	       (1 << 27)
@@ -156,6 +161,7 @@
     ordered.
 */
 #define HA_DUPLICATE_KEY_NOT_IN_ORDER    (LL(1) << 36)
+
 /*
   Engine supports REPAIR TABLE. Used by CHECK TABLE FOR UPGRADE if an
   incompatible table is detected. If this flag is set, CHECK TABLE FOR UPGRADE
@@ -163,6 +169,25 @@
 */
 #define HA_CAN_REPAIR                    (LL(1) << 37)
 
+/* Has automatic checksums and uses the new checksum format */
+#define HA_HAS_NEW_CHECKSUM    (LL(1) << 38)
+#define HA_CAN_VIRTUAL_COLUMNS (LL(1) << 39)
+#define HA_MRR_CANT_SORT       (LL(1) << 40)
+#define HA_RECORD_MUST_BE_CLEAN_ON_WRITE (LL(1) << 41)
+
+/*
+  Table condition pushdown must be performed regardless of
+  'engine_condition_pushdown' setting.
+
+  This flag is aimed at storage engines that come with "special" predicates
+  that can only be evaluated inside the storage engine.  
+  For example, when one does 
+    select * from sphinx_table where query='{fulltext_query}'
+  then the "query=..." condition must be always pushed down into storage
+  engine.
+*/
+#define HA_MUST_USE_TABLE_CONDITION_PUSHDOWN (LL(1) << 42)
+
 /*
   Set of all binlog flags. Currently only contain the capabilities
   flags.
@@ -178,6 +203,18 @@
 #define HA_KEYREAD_ONLY         64	/* Support HA_EXTRA_KEYREAD */
 
 /*
+  Index scan will not return records in rowid order. Not guaranteed to be
+  set for unordered (e.g. HASH) indexes.
+*/
+#define HA_KEY_SCAN_NOT_ROR     128 
+#define HA_DO_INDEX_COND_PUSHDOWN  256 /* Supports Index Condition Pushdown */
+/*
+  Data is clustered on this key. This means that when you read the key
+  you also get the row data without any additional disk reads.
+*/
+#define HA_CLUSTERED_INDEX      512
+
+/*
   bits in alter_table_flags:
 */
 /*
@@ -235,12 +272,6 @@
 #define HA_FAST_CHANGE_PARTITION                (1L << 13)
 #define HA_PARTITION_ONE_PHASE                  (1L << 14)
 
-/*
-  Index scan will not return records in rowid order. Not guaranteed to be
-  set for unordered (e.g. HASH) indexes.
-*/
-#define HA_KEY_SCAN_NOT_ROR     128 
-
 /* operations for disable/enable indexes */
 #define HA_KEY_SWITCH_NONUNIQ      0
 #define HA_KEY_SWITCH_ALL          1
@@ -287,8 +318,6 @@
 #define HA_LEX_CREATE_TMP_TABLE	1
 #define HA_LEX_CREATE_IF_NOT_EXISTS 2
 #define HA_LEX_CREATE_TABLE_LIKE 4
-#define HA_OPTION_NO_CHECKSUM	(1L << 17)
-#define HA_OPTION_NO_DELAY_KEY_WRITE (1L << 18)
 #define HA_MAX_REC_LENGTH	65535
 
 /* Table caching type */
@@ -329,6 +358,11 @@ enum legacy_db_type
   DB_TYPE_FIRST_DYNAMIC=42,
   DB_TYPE_DEFAULT=127 // Must be last
 };
+/*
+  Better name for DB_TYPE_UNKNOWN. Should be used for engines that do not have
+  a hard-coded type value here.
+ */
+#define DB_TYPE_AUTOASSIGN DB_TYPE_UNKNOWN
 
 enum row_type { ROW_TYPE_NOT_USED=-1, ROW_TYPE_DEFAULT, ROW_TYPE_FIXED,
 		ROW_TYPE_DYNAMIC, ROW_TYPE_COMPRESSED,
@@ -377,30 +411,10 @@ enum enum_binlog_command {
 #define HA_CREATE_USED_PASSWORD         (1L << 17)
 #define HA_CREATE_USED_CONNECTION       (1L << 18)
 #define HA_CREATE_USED_KEY_BLOCK_SIZE   (1L << 19)
-/** Unused. Reserved for future versions. */
+/* The following two are used by Maria engine: */
 #define HA_CREATE_USED_TRANSACTIONAL    (1L << 20)
-/** Unused. Reserved for future versions. */
 #define HA_CREATE_USED_PAGE_CHECKSUM    (1L << 21)
 
-
-/*
-  This is master database for most of system tables. However there
-  can be other databases which can hold system tables. Respective
-  storage engines define their own system database names.
-*/
-extern const char *mysqld_system_database;
-
-/*
-  Structure to hold list of system_database.system_table.
-  This is used at both mysqld and storage engine layer.
-*/
-struct st_system_tablename
-{
-  const char *db;
-  const char *tablename;
-};
-
-
 typedef ulonglong my_xid; // this line is the same as in log_event.h
 #define MYSQL_XID_PREFIX "MySQLXid"
 #define MYSQL_XID_PREFIX_LEN 8 // must be a multiple of 8
@@ -579,6 +593,7 @@ struct TABLE;
 enum enum_schema_tables
 {
   SCH_CHARSETS= 0,
+  SCH_CLIENT_STATS,
   SCH_COLLATIONS,
   SCH_COLLATION_CHARACTER_SET_APPLICABILITY,
   SCH_COLUMNS,
@@ -588,6 +603,8 @@ enum enum_schema_tables
   SCH_FILES,
   SCH_GLOBAL_STATUS,
   SCH_GLOBAL_VARIABLES,
+  SCH_INDEX_STATS,
+  SCH_KEY_CACHES,
   SCH_KEY_COLUMN_USAGE,
   SCH_OPEN_TABLES,
   SCH_PARAMETERS,
@@ -608,8 +625,10 @@ enum enum_schema_tables
   SCH_TABLE_CONSTRAINTS,
   SCH_TABLE_NAMES,
   SCH_TABLE_PRIVILEGES,
+  SCH_TABLE_STATS,
   SCH_TRIGGERS,
   SCH_USER_PRIVILEGES,
+  SCH_USER_STATS,
   SCH_VARIABLES,
   SCH_VIEWS
 };
@@ -648,6 +667,108 @@ struct handler_log_file_data {
   enum log_status status;
 };
 
+/*
+  Definitions for engine-specific table/field/index options in the CREATE TABLE.
+
+  Options are declared with HA_*OPTION_* macros (HA_TOPTION_NUMBER,
+  HA_FOPTION_ENUM, HA_IOPTION_STRING, etc).
+
+  Every macros takes the option name, and the name of the underlying field of
+  the appropriate C structure. The "appropriate C structure" is
+  ha_table_option_struct for table level options,
+  ha_field_option_struct for field level options,
+  ha_index_option_struct for key level options. The engine either
+  defines a structure of this name, or uses #define's to map
+  these "appropriate" names to the actual structure type name.
+
+  ULL options use a ulonglong as the backing store.
+  HA_*OPTION_NUMBER() takes the option name, the structure field name,
+  the default value for the option, min, max, and blk_siz values.
+
+  STRING options use a char* as a backing store.
+  HA_*OPTION_STRING takes the option name and the structure field name.
+  The default value will be 0.
+
+  ENUM options use a uint as a backing store (not enum!!!).
+  HA_*OPTION_ENUM takes the option name, the structure field name,
+  the default value for the option as a number, and a string with the
+  permitted values for this enum - one string with comma separated values,
+  for example: "gzip,bzip2,lzma"
+
+  BOOL options use a bool as a backing store.
+  HA_*OPTION_BOOL takes the option name, the structure field name,
+  and the default value for the option.
+  From the SQL, BOOL options accept YES/NO, ON/OFF, and 1/0.
+
+  The name of the option is limited to 255 bytes,
+  the value (for string options) - to the 32767 bytes.
+
+  See ha_example.cc for an example.
+*/
+
+struct ha_table_option_struct;
+struct ha_field_option_struct;
+struct ha_index_option_struct;
+
+enum ha_option_type { HA_OPTION_TYPE_ULL,    /* unsigned long long */
+                      HA_OPTION_TYPE_STRING, /* char * */
+                      HA_OPTION_TYPE_ENUM,   /* uint */
+                      HA_OPTION_TYPE_BOOL};  /* bool */
+
+#define HA_xOPTION_NUMBER(name, struc, field, def, min, max, blk_siz)   \
+  { HA_OPTION_TYPE_ULL, name, sizeof(name)-1,                        \
+    offsetof(struc, field), def, min, max, blk_siz, 0 }
+#define HA_xOPTION_STRING(name, struc, field)                        \
+  { HA_OPTION_TYPE_STRING, name, sizeof(name)-1,                     \
+    offsetof(struc, field), 0, 0, 0, 0, 0 }
+#define HA_xOPTION_ENUM(name, struc, field, values, def)             \
+  { HA_OPTION_TYPE_ENUM, name, sizeof(name)-1,                       \
+    offsetof(struc, field), def, 0,                                  \
+    sizeof(values)-1, 0, values }
+#define HA_xOPTION_BOOL(name, struc, field, def)                     \
+  { HA_OPTION_TYPE_BOOL, name, sizeof(name)-1,                       \
+    offsetof(struc, field), def, 0, 1, 0, 0 }
+#define HA_xOPTION_END { HA_OPTION_TYPE_ULL, 0, 0, 0, 0, 0, 0, 0, 0 }
+
+#define HA_TOPTION_NUMBER(name, field, def, min, max, blk_siz)          \
+  HA_xOPTION_NUMBER(name, ha_table_option_struct, field, def, min, max, blk_siz)
+#define HA_TOPTION_STRING(name, field)                               \
+  HA_xOPTION_STRING(name, ha_table_option_struct, field)
+#define HA_TOPTION_ENUM(name, field, values, def)                    \
+  HA_xOPTION_ENUM(name, ha_table_option_struct, field, values, def)
+#define HA_TOPTION_BOOL(name, field, def)                            \
+  HA_xOPTION_BOOL(name, ha_table_option_struct, field, def)
+#define HA_TOPTION_END HA_xOPTION_END
+
+#define HA_FOPTION_NUMBER(name, field, def, min, max, blk_siz)          \
+  HA_xOPTION_NUMBER(name, ha_field_option_struct, field, def, min, max, blk_siz)
+#define HA_FOPTION_STRING(name, field)                               \
+  HA_xOPTION_STRING(name, ha_field_option_struct, field)
+#define HA_FOPTION_ENUM(name, field, values, def)                    \
+  HA_xOPTION_ENUM(name, ha_field_option_struct, field, values, def)
+#define HA_FOPTION_BOOL(name, field, def)                            \
+  HA_xOPTION_BOOL(name, ha_field_option_struct, field, def)
+#define HA_FOPTION_END HA_xOPTION_END
+
+#define HA_IOPTION_NUMBER(name, field, def, min, max, blk_siz)          \
+  HA_xOPTION_NUMBER(name, ha_index_option_struct, field, def, min, max, blk_siz)
+#define HA_IOPTION_STRING(name, field)                               \
+  HA_xOPTION_STRING(name, ha_index_option_struct, field)
+#define HA_IOPTION_ENUM(name, field, values, def)                    \
+  HA_xOPTION_ENUM(name, ha_index_option_struct, field, values, def)
+#define HA_IOPTION_BOOL(name, field, values, def)                    \
+  HA_xOPTION_BOOL(name, ha_index_option_struct, field, values, def)
+#define HA_IOPTION_END HA_xOPTION_END
+
+typedef struct st_ha_create_table_option {
+  enum ha_option_type type;
+  const char *name;
+  size_t name_length;
+  ptrdiff_t offset;
+  ulonglong def_value;
+  ulonglong min_value, max_value, block_size;
+  const char *values;
+} ha_create_table_option;
 
 enum handler_iterator_type
 {
@@ -709,8 +830,9 @@ struct handlerton
   SHOW_COMP_OPTION state;
 
   /*
-    Historical number used for frm file to determine the correct storage engine.
-    This is going away and new engines will just use "name" for this.
+    Historical number used for frm file to determine the correct
+    storage engine.  This is going away and new engines will just use
+    "name" for this.
   */
   enum legacy_db_type db_type;
   /*
@@ -760,12 +882,113 @@ struct handlerton
      NOTE 'all' is also false in auto-commit mode where 'end of statement'
      and 'real commit' mean the same event.
    */
-   int  (*commit)(handlerton *hton, THD *thd, bool all);
+   int (*commit)(handlerton *hton, THD *thd, bool all);
+   /*
+     The commit_ordered() method is called prior to the commit() method, after
+     the transaction manager has decided to commit (not rollback) the
+     transaction. Unlike commit(), commit_ordered() is called only when the
+     full transaction is committed, not for each commit of statement
+     transaction in a multi-statement transaction.
+
+     Not that like prepare(), commit_ordered() is only called when 2-phase
+     commit takes place. Ie. when no binary log and only a single engine
+     participates in a transaction, one commit() is called, no
+     commit_ordered(). So engines must be prepared for this.
+
+     The calls to commit_ordered() in multiple parallel transactions is
+     guaranteed to happen in the same order in every participating
+     handler. This can be used to ensure the same commit order among multiple
+     handlers (eg. in table handler and binlog). So if transaction T1 calls
+     into commit_ordered() of handler A before T2, then T1 will also call
+     commit_ordered() of handler B before T2.
+
+     Engines that implement this method should during this call make the
+     transaction visible to other transactions, thereby making the order of
+     transaction commits be defined by the order of commit_ordered() calls.
+
+     The intention is that commit_ordered() should do the minimal amount of
+     work that needs to happen in consistent commit order among handlers. To
+     preserve ordering, calls need to be serialised on a global mutex, so
+     doing any time-consuming or blocking operations in commit_ordered() will
+     limit scalability.
+
+     Handlers can rely on commit_ordered() calls to be serialised (no two
+     calls can run in parallel, so no extra locking on the handler part is
+     required to ensure this).
+
+     Note that commit_ordered() can be called from a different thread than the
+     one handling the transaction! So it can not do anything that depends on
+     thread local storage, in particular it can not call my_error() and
+     friends (instead it can store the error code and delay the call of
+     my_error() to the commit() method).
+
+     Similarly, since commit_ordered() returns void, any return error code
+     must be saved and returned from the commit() method instead.
+
+     The commit_ordered method is optional, and can be left unset if not
+     needed in a particular handler (then there will be no ordering guarantees
+     wrt. other engines and binary log).
+   */
+   void (*commit_ordered)(handlerton *hton, THD *thd, bool all);
    int  (*rollback)(handlerton *hton, THD *thd, bool all);
    int  (*prepare)(handlerton *hton, THD *thd, bool all);
+   /*
+     The prepare_ordered method is optional. If set, it will be called after
+     successful prepare() in all handlers participating in 2-phase
+     commit. Like commit_ordered(), it is called only when the full
+     transaction is committed, not for each commit of statement transaction.
+
+     The calls to prepare_ordered() among multiple parallel transactions are
+     ordered consistently with calls to commit_ordered(). This means that
+     calls to prepare_ordered() effectively define the commit order, and that
+     each handler will see the same sequence of transactions calling into
+     prepare_ordered() and commit_ordered().
+
+     Thus, prepare_ordered() can be used to define commit order for handlers
+     that need to do this in the prepare step (like binlog). It can also be
+     used to release transaction's locks early in an order consistent with the
+     order transactions will be eventually committed.
+
+     Like commit_ordered(), prepare_ordered() calls are serialised to maintain
+     ordering, so the intention is that they should execute fast, with only
+     the minimal amount of work needed to define commit order. Handlers can
+     rely on this serialisation, and do not need to do any extra locking to
+     avoid two prepare_ordered() calls running in parallel.
+
+     Like commit_ordered(), prepare_ordered() is not guaranteed to be called
+     in the context of the thread handling the rest of the transaction. So it
+     cannot invoke code that relies on thread local storage, in particular it
+     cannot call my_error().
+
+     prepare_ordered() cannot cause a rollback by returning an error, all
+     possible errors must be handled in prepare() (the prepare_ordered()
+     method returns void). In case of some fatal error, a record of the error
+     must be made internally by the engine and returned from commit() later.
+
+     Note that for user-level XA SQL commands, no consistent ordering among
+     prepare_ordered() and commit_ordered() is guaranteed (as that would
+     require blocking all other commits for an indefinite time).
+
+     When 2-phase commit is not used (eg. only one engine (and no binlog) in
+     transaction), neither prepare() nor prepare_ordered() is called.
+   */
+   void (*prepare_ordered)(handlerton *hton, THD *thd, bool all);
    int  (*recover)(handlerton *hton, XID *xid_list, uint len);
    int  (*commit_by_xid)(handlerton *hton, XID *xid);
    int  (*rollback_by_xid)(handlerton *hton, XID *xid);
+  /*
+    "Disable or enable checkpointing internal to the storage engine. This is
+    used for FLUSH TABLES WITH READ LOCK AND DISABLE CHECKPOINT to ensure that
+    the engine will never start any recovery from a time between
+    FLUSH TABLES ... ; UNLOCK TABLES.
+
+    While checkpointing is disabled, the engine should pause any background
+    write activity (such as tablespace checkpointing) that require consistency
+    between different files (such as transaction log and tablespace files) for
+    crash recovery to succeed. The idea is to use this to make safe
+    multi-volume LVM snapshot backups.
+  */
+   int  (*checkpoint_state)(handlerton *hton, bool disabled);
    void *(*create_cursor_read_view)(handlerton *hton, THD *thd);
    void (*set_cursor_read_view)(handlerton *hton, THD *thd, void *read_view);
    void (*close_cursor_read_view)(handlerton *hton, THD *thd, void *read_view);
@@ -820,50 +1043,29 @@ struct handlerton
    int (*table_exists_in_engine)(handlerton *hton, THD* thd, const char *db,
                                  const char *name);
 
-  /**
-    List of all system tables specific to the SE.
-    Array element would look like below,
-     { "<database_name>", "<system table name>" },
-    The last element MUST be,
-     { (const char*)NULL, (const char*)NULL }
-
-    @see ha_example_system_tables in ha_example.cc
-
-    This interface is optional, so every SE need not implement it.
-  */
-  const char* (*system_database)();
-
-  /**
-    Check if the given db.tablename is a system table for this SE.
-
-    @param db                         Database name to check.
-    @param table_name                 table name to check.
-    @param is_sql_layer_system_table  if the supplied db.table_name is a SQL
-                                      layer system table.
-
-    @see example_is_supported_system_table in ha_example.cc
+   uint32 license; /* Flag for Engine License */
+   /*
+     Optional clauses in the CREATE/ALTER TABLE
+   */
+   ha_create_table_option *table_options; // table level options
+   ha_create_table_option *field_options; // these are specified per field
+   ha_create_table_option *index_options; // these are specified per index
 
-    is_sql_layer_system_table is supplied to make more efficient
-    checks possible for SEs that support all SQL layer tables.
+};
 
-    This interface is optional, so every SE need not implement it.
-  */
-  bool (*is_supported_system_table)(const char *db,
-                                    const char *table_name,
-                                    bool is_sql_layer_system_table);
 
-   uint32 license; /* Flag for Engine License */
-   void *data; /* Location for engines to keep personal structures */
-};
+inline LEX_STRING *hton_name(const handlerton *hton)
+{
+  return &(hton2plugin[hton->slot]->name);
+}
 
 
 /* Possible flags of a handlerton (there can be 32 of them) */
 #define HTON_NO_FLAGS                 0
 #define HTON_CLOSE_CURSORS_AT_COMMIT (1 << 0)
 #define HTON_ALTER_NOT_SUPPORTED     (1 << 1) //Engine does not support alter
-#define HTON_CAN_RECREATE            (1 << 2) //Delete all is used fro truncate
+#define HTON_CAN_RECREATE            (1 << 2) //Delete all is used for truncate
 #define HTON_HIDDEN                  (1 << 3) //Engine does not appear in lists
-#define HTON_FLUSH_AFTER_RENAME      (1 << 4)
 #define HTON_NOT_USER_SELECTABLE     (1 << 5)
 #define HTON_TEMPORARY_NOT_SUPPORTED (1 << 6) //Having temporary tables not supported
 #define HTON_SUPPORT_LOG_TABLES      (1 << 7) //Engine supports log tables
@@ -909,6 +1111,7 @@ struct THD_TRANS
 
   void reset() { no_2pc= FALSE; modified_non_trans_table= FALSE; }
   bool is_empty() const { return ha_list == NULL; }
+  THD_TRANS() {}                        /* Remove gcc warning */
 };
 
 
@@ -1019,9 +1222,9 @@ typedef struct {
   ulonglong delete_length;
   ha_rows records;
   ulong mean_rec_length;
-  ulong create_time;
-  ulong check_time;
-  ulong update_time;
+  time_t create_time;
+  time_t check_time;
+  time_t update_time;
   ulonglong check_sum;
 } PARTITION_STATS;
 
@@ -1034,7 +1237,7 @@ class partition_info;
 struct st_partition_iter;
 #define NOT_A_PARTITION_ID ((uint32)-1)
 
-enum enum_ha_unused { HA_CHOICE_UNDEF, HA_CHOICE_NO, HA_CHOICE_YES };
+enum ha_choice { HA_CHOICE_UNDEF, HA_CHOICE_NO, HA_CHOICE_YES };
 
 typedef struct st_ha_create_information
 {
@@ -1066,11 +1269,16 @@ typedef struct st_ha_create_information
   uint options;				/* OR of HA_CREATE_ options */
   uint merge_insert_method;
   uint extra_size;                      /* length of extra data segment */
-  enum enum_ha_unused unused1;
-  bool frm_only;                        /* 1 if no ha_create_table() */
-  bool varchar;                         /* 1 if table has a VARCHAR */
-  enum ha_storage_media storage_media;  /* DEFAULT, DISK or MEMORY */
-  enum enum_ha_unused unused2;
+  enum ha_choice transactional;
+  bool frm_only;                        ///< 1 if no ha_create_table()
+  bool varchar;                         ///< 1 if table has a VARCHAR
+  enum ha_storage_media storage_media;  ///< DEFAULT, DISK or MEMORY
+  enum ha_choice page_checksum;         ///< If we have page_checksums
+  engine_option_value *option_list;     ///< list of table create options
+  /* the following three are only for ALTER TABLE, check_if_incompatible_data() */
+  ha_table_option_struct *option_struct;           ///< structure with parsed table options
+  ha_field_option_struct **fields_option_struct;   ///< array of field option structures
+  ha_index_option_struct **indexes_option_struct;  ///< array of index option structures
 } HA_CREATE_INFO;
 
 
@@ -1145,11 +1353,250 @@ typedef struct st_ha_check_opt
   st_ha_check_opt() {}                        /* Remove gcc warning */
   uint flags;       /* isam layer flags (e.g. for myisamchk) */
   uint sql_flags;   /* sql layer flags - for something myisamchk cannot do */
-  KEY_CACHE *key_cache;	/* new key cache when changing key cache */
+  time_t start_time;   /* When check/repair starts */
+  KEY_CACHE *key_cache; /* new key cache when changing key cache */
   void init();
 } HA_CHECK_OPT;
 
 
+/********************************************************************************
+ * MRR
+ ********************************************************************************/
+
+typedef void *range_seq_t;
+
+typedef struct st_range_seq_if
+{
+  /*
+    Get key information
+ 
+    SYNOPSIS
+      get_key_info()
+        init_params  The seq_init_param parameter 
+        length       OUT length of the keys in this range sequence
+        map          OUT key_part_map of the keys in this range sequence
+
+    DESCRIPTION
+      This function is set only when using HA_MRR_FIXED_KEY mode. In that mode, 
+      all ranges are single-point equality ranges that use the same set of key
+      parts. This function allows the MRR implementation to get the length of
+      a key, and which keyparts it uses.
+  */
+  void (*get_key_info)(void *init_params, uint *length, key_part_map *map);
+
+  /*
+    Initialize the traversal of range sequence
+    
+    SYNOPSIS
+      init()
+        init_params  The seq_init_param parameter 
+        n_ranges     The number of ranges obtained 
+        flags        A combination of HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
+
+    RETURN
+      An opaque value to be used as RANGE_SEQ_IF::next() parameter
+  */
+  range_seq_t (*init)(void *init_params, uint n_ranges, uint flags);
+
+
+  /*
+    Get the next range in the range sequence
+
+    SYNOPSIS
+      next()
+        seq    The value returned by RANGE_SEQ_IF::init()
+        range  OUT Information about the next range
+    
+    RETURN
+      FALSE - Ok, the range structure filled with info about the next range
+      TRUE  - No more ranges
+  */
+  bool (*next) (range_seq_t seq, KEY_MULTI_RANGE *range);
+
+  /*
+    Check whether range_info orders to skip the next record
+
+    SYNOPSIS
+      skip_record()
+        seq         The value returned by RANGE_SEQ_IF::init()
+        range_info  Information about the next range 
+                    (Ignored if MRR_NO_ASSOCIATION is set)
+        rowid       Rowid of the record to be checked (ignored if set to 0)
+    
+    RETURN
+      1 - Record with this range_info and/or this rowid shall be filtered
+          out from the stream of records returned by multi_range_read_next()
+      0 - The record shall be left in the stream
+  */ 
+  bool (*skip_record) (range_seq_t seq, range_id_t range_info, uchar *rowid);
+
+  /*
+    Check if the record combination matches the index condition
+    SYNOPSIS
+      skip_index_tuple()
+        seq         The value returned by RANGE_SEQ_IF::init()
+        range_info  Information about the next range 
+    
+    RETURN
+      0 - The record combination satisfies the index condition
+      1 - Otherwise
+  */ 
+  bool (*skip_index_tuple) (range_seq_t seq, range_id_t range_info);
+} RANGE_SEQ_IF;
+
+typedef bool (*SKIP_INDEX_TUPLE_FUNC) (range_seq_t seq, range_id_t range_info);
+
+class COST_VECT
+{ 
+public:
+  double io_count;     /* number of I/O                 */
+  double avg_io_cost;  /* cost of an average I/O oper.  */
+  double cpu_cost;     /* cost of operations in CPU     */
+  double mem_cost;     /* cost of used memory           */ 
+  double import_cost;  /* cost of remote operations     */
+  
+  enum { IO_COEFF=1 };
+  enum { CPU_COEFF=1 };
+  enum { MEM_COEFF=1 };
+  enum { IMPORT_COEFF=1 };
+
+  COST_VECT() {}                              // keep gcc happy
+
+  double total_cost() 
+  {
+    return IO_COEFF*io_count*avg_io_cost + CPU_COEFF * cpu_cost +
+           MEM_COEFF*mem_cost + IMPORT_COEFF*import_cost;
+  }
+
+  void zero()
+  {
+    avg_io_cost= 1.0;
+    io_count= cpu_cost= mem_cost= import_cost= 0.0;
+  }
+
+  void multiply(double m)
+  {
+    io_count *= m;
+    cpu_cost *= m;
+    import_cost *= m;
+    /* Don't multiply mem_cost */
+  }
+
+  void add(const COST_VECT* cost)
+  {
+    double io_count_sum= io_count + cost->io_count;
+    add_io(cost->io_count, cost->avg_io_cost);
+    io_count= io_count_sum;
+    cpu_cost += cost->cpu_cost;
+  }
+  void add_io(double add_io_cnt, double add_avg_cost)
+  {
+    /* In edge cases add_io_cnt may be zero */
+    if (add_io_cnt > 0)
+    {
+      double io_count_sum= io_count + add_io_cnt;
+      avg_io_cost= (io_count * avg_io_cost + 
+                    add_io_cnt * add_avg_cost) / io_count_sum;
+      io_count= io_count_sum;
+    }
+  }
+
+  /*
+    To be used when we go from old single value-based cost calculations to
+    the new COST_VECT-based.
+  */
+  void convert_from_cost(double cost)
+  {
+    zero();
+    avg_io_cost= 1.0;
+    io_count= cost;
+  }
+};
+
+void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, 
+                         COST_VECT *cost);
+
+/*
+  Indicates that all scanned ranges will be singlepoint (aka equality) ranges.
+  The ranges may not use the full key but all of them will use the same number
+  of key parts.
+*/
+#define HA_MRR_SINGLE_POINT 1
+#define HA_MRR_FIXED_KEY  2
+
+/* 
+  Indicates that RANGE_SEQ_IF::next(&range) doesn't need to fill in the
+  'range' parameter.
+*/
+#define HA_MRR_NO_ASSOCIATION 4
+
+/* 
+  The MRR user will provide ranges in key order, and MRR implementation
+  must return rows in key order.
+*/
+#define HA_MRR_SORTED 8
+
+/* MRR implementation doesn't have to retrieve full records */
+#define HA_MRR_INDEX_ONLY 16
+
+/* 
+  The passed memory buffer is of maximum possible size, the caller can't
+  assume larger buffer.
+*/
+#define HA_MRR_LIMITS 32
+
+
+/*
+  Flag set <=> default MRR implementation is used
+  (The choice is made by **_info[_const]() function which may set this
+   flag. SQL layer remembers the flag value and then passes it to
+   multi_read_range_init().
+*/
+#define HA_MRR_USE_DEFAULT_IMPL 64
+
+/*
+  Used only as parameter to multi_range_read_info():
+  Flag set <=> the caller guarantees that the bounds of the scanned ranges
+  will not have NULL values.
+*/
+#define HA_MRR_NO_NULL_ENDPOINTS 128
+
+/*
+  The MRR user has materialized range keys somewhere in the user's buffer.
+  This can be used for optimization of the procedure that sorts these keys
+  since in this case key values don't have to be copied into the MRR buffer.
+
+  In other words, it is guaranteed that after RANGE_SEQ_IF::next() call the 
+  pointer in range->start_key.key will point to a key value that will remain 
+  there until the end of the MRR scan.
+*/
+#define HA_MRR_MATERIALIZED_KEYS 256
+
+/*
+  The following bits are reserved for use by MRR implementation. The intended
+  use scenario:
+
+  * sql layer calls handler->multi_range_read_info[_const]() 
+    - MRR implementation figures out what kind of scan it will perform, saves
+      the result in *mrr_mode parameter.
+  * sql layer remembers what was returned in *mrr_mode
+
+  * the optimizer picks the query plan (which may or may not include the MRR 
+    scan that was estimated by the multi_range_read_info[_const] call)
+
+  * if the query is an EXPLAIN statement, sql layer will call 
+    handler->multi_range_read_explain_info(mrr_mode) to get a text description
+    of the picked MRR scan; the description will be a part of EXPLAIN output.
+*/
+#define HA_MRR_IMPLEMENTATION_FLAG1 512
+#define HA_MRR_IMPLEMENTATION_FLAG2 1024
+#define HA_MRR_IMPLEMENTATION_FLAG3 2048
+#define HA_MRR_IMPLEMENTATION_FLAG4 4096
+#define HA_MRR_IMPLEMENTATION_FLAG5 8192
+#define HA_MRR_IMPLEMENTATION_FLAG6 16384
+
+#define HA_MRR_IMPLEMENTATION_FLAGS \
+  (512 | 1024 | 2048 | 4096 | 8192 | 16384)
 
 /*
   This is a buffer area that the handler can use to store rows.
@@ -1160,8 +1607,8 @@ typedef struct st_ha_check_opt
 
 typedef struct st_handler_buffer
 {
-  const uchar *buffer;         /* Buffer one can start using */
-  const uchar *buffer_end;     /* End of buffer */
+  /* const? */uchar *buffer;         /* Buffer one can start using */
+  /* const? */uchar *buffer_end;     /* End of buffer */
   uchar *end_of_used_area;     /* End of area that was used by handler */
 } HANDLER_BUFFER;
 
@@ -1187,19 +1634,26 @@ public:
   ha_rows records;
   ha_rows deleted;			/* Deleted records */
   ulong mean_rec_length;		/* physical reclength */
-  ulong create_time;			/* When table was created */
-  ulong check_time;
-  ulong update_time;
+  time_t create_time;			/* When table was created */
+  time_t check_time;
+  time_t update_time;
   uint block_size;			/* index block size */
 
+  /*
+    number of buffer bytes that native mrr implementation needs,
+  */
+  uint mrr_length_per_rec; 
+
   ha_statistics():
     data_file_length(0), max_data_file_length(0),
     index_file_length(0), delete_length(0), auto_increment_value(0),
     records(0), deleted(0), mean_rec_length(0), create_time(0),
-    check_time(0), update_time(0), block_size(0)
+    check_time(0), update_time(0), block_size(0), mrr_length_per_rec(0)
   {}
 };
 
+extern "C" enum icp_result handler_index_cond_check(void* h_arg);
+
 uint calculate_key_len(TABLE *, uint, const uchar *, key_part_map);
 /*
   bitmap with first N+1 bits set
@@ -1258,27 +1712,42 @@ public:
 
   ha_statistics stats;
 
-  /** The following are for read_multi_range */
-  bool multi_range_sorted;
-  KEY_MULTI_RANGE *multi_range_curr;
-  KEY_MULTI_RANGE *multi_range_end;
-  HANDLER_BUFFER *multi_range_buffer;
+  /** MultiRangeRead-related members: */
+  range_seq_t mrr_iter;    /* Interator to traverse the range sequence */
+  RANGE_SEQ_IF mrr_funcs;  /* Range sequence traversal functions */
+  HANDLER_BUFFER *multi_range_buffer; /* MRR buffer info */
+  uint ranges_in_seq; /* Total number of ranges in the traversed sequence */
+  /* TRUE <=> source MRR ranges and the output are ordered */
+  bool mrr_is_output_sorted;
+
+  /** TRUE <=> we're currently traversing a range in mrr_cur_range. */
+  bool mrr_have_range;
+  /** Current range (the one we're now returning rows from) */
+  KEY_MULTI_RANGE mrr_cur_range;
 
   /** The following are for read_range() */
   key_range save_end_range, *end_range;
   KEY_PART_INFO *range_key_part;
   int key_compare_result_on_equal;
   bool eq_range;
+  bool internal_tmp_table;                      /* If internal tmp table */
 
   uint errkey;				/* Last dup key */
   uint key_used_on_scan;
   uint active_index;
+  /* 
+    TRUE <=> the engine guarantees that returned records are within the range
+    being scanned.
+  */
+  bool in_range_check_pushed_down;
+
   /** Length of ref (1-8 or the clustered key length) */
   uint ref_length;
   FT_INFO *ft_handler;
   enum {NONE=0, INDEX, RND} inited;
   bool locked;
   bool implicit_emptied;                /* Can be !=0 only if HEAP */
+  bool mark_trx_done;
   const COND *pushed_cond;
   /**
     next_insert_id is the next value which should be inserted into the
@@ -1301,6 +1770,16 @@ public:
     Interval returned by get_auto_increment() and being consumed by the
     inserter.
   */
+  /* Statistics  variables */
+  ulonglong rows_read;
+  ulonglong rows_tmp_read;
+  ulonglong rows_changed;
+  /* One bigger than needed to avoid to test if key == MAX_KEY */
+  ulonglong index_rows_read[MAX_KEY+1];
+
+  Item *pushed_idx_cond;
+  uint pushed_idx_cond_keyno;  /* The index which the above condition is for */
+
   Discrete_interval auto_inc_interval_for_cur_row;
   /**
      Number of reserved auto-increment intervals. Serves as a heuristic
@@ -1327,14 +1806,19 @@ public:
   handler(handlerton *ht_arg, TABLE_SHARE *share_arg)
     :table_share(share_arg), table(0),
     estimation_rows_to_insert(0), ht(ht_arg),
-    ref(0), key_used_on_scan(MAX_KEY), active_index(MAX_KEY),
+    ref(0), end_range(NULL), key_used_on_scan(MAX_KEY), active_index(MAX_KEY),
+    in_range_check_pushed_down(FALSE),
     ref_length(sizeof(my_off_t)),
     ft_handler(0), inited(NONE),
-    locked(FALSE), implicit_emptied(0),
+    locked(FALSE), implicit_emptied(0), mark_trx_done(FALSE),
     pushed_cond(0), next_insert_id(0), insert_id_for_cur_row(0),
+    pushed_idx_cond(NULL),
+    pushed_idx_cond_keyno(MAX_KEY),
     auto_inc_intervals_count(0),
     m_psi(NULL)
-    {}
+  {
+    reset_statistics();
+  }
   virtual ~handler(void)
   {
     DBUG_ASSERT(locked == FALSE);
@@ -1348,29 +1832,38 @@ public:
   }
   /* ha_ methods: pubilc wrappers for private virtual API */
 
-  int ha_open(TABLE *table, const char *name, int mode, int test_if_locked);
+  int ha_open(TABLE *table, const char *name, int mode, uint test_if_locked);
   int ha_index_init(uint idx, bool sorted)
   {
     int result;
     DBUG_ENTER("ha_index_init");
     DBUG_ASSERT(inited==NONE);
     if (!(result= index_init(idx, sorted)))
-      inited=INDEX;
+    {
+      inited=       INDEX;
+      active_index= idx;
+      end_range= NULL;
+    }
     DBUG_RETURN(result);
   }
   int ha_index_end()
   {
     DBUG_ENTER("ha_index_end");
     DBUG_ASSERT(inited==INDEX);
-    inited=NONE;
+    inited=       NONE;
+    active_index= MAX_KEY;
+    end_range=    NULL;
     DBUG_RETURN(index_end());
   }
-  int ha_rnd_init(bool scan)
+  /* This is called after index_init() if we need to do a index scan */
+  virtual int prepare_index_scan() { return 0; }
+  int ha_rnd_init(bool scan) __attribute__ ((warn_unused_result))
   {
     int result;
     DBUG_ENTER("ha_rnd_init");
     DBUG_ASSERT(inited==NONE || (inited==RND && scan));
     inited= (result= rnd_init(scan)) ? NONE: RND;
+    end_range= NULL;
     DBUG_RETURN(result);
   }
   int ha_rnd_end()
@@ -1378,9 +1871,18 @@ public:
     DBUG_ENTER("ha_rnd_end");
     DBUG_ASSERT(inited==RND);
     inited=NONE;
+    end_range= NULL;
     DBUG_RETURN(rnd_end());
   }
+  int ha_rnd_init_with_error(bool scan) __attribute__ ((warn_unused_result));
   int ha_reset();
+  /* Tell handler (not storage engine) this is start of a new statement */
+  void ha_start_of_new_statement()
+  {
+    ft_handler= 0;
+    mark_trx_done= FALSE;
+  }
+
   /* this is necessary in many places, e.g. in HANDLER command */
   int ha_index_or_rnd_end()
   {
@@ -1409,13 +1911,17 @@ public:
   int ha_repair(THD* thd, HA_CHECK_OPT* check_opt);
   void ha_start_bulk_insert(ha_rows rows)
   {
+    DBUG_ENTER("handler::ha_start_bulk_insert");
     estimation_rows_to_insert= rows;
     start_bulk_insert(rows);
+    DBUG_VOID_RETURN;
   }
   int ha_end_bulk_insert()
   {
+    DBUG_ENTER("handler::ha_end_bulk_insert");
     estimation_rows_to_insert= 0;
-    return end_bulk_insert();
+    int ret= end_bulk_insert();
+    DBUG_RETURN(ret);
   }
   int ha_bulk_update_row(const uchar *old_data, uchar *new_data,
                          uint *dup_key_found);
@@ -1453,15 +1959,31 @@ public:
   virtual void print_error(int error, myf errflag);
   virtual bool get_error_message(int error, String *buf);
   uint get_dup_key(int error);
+  void reset_statistics()
+  {
+    rows_read= rows_changed= rows_tmp_read= 0;
+    bzero(index_rows_read, sizeof(index_rows_read));
+  }
   virtual void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share)
   {
     table= table_arg;
     table_share= share;
+    reset_statistics();
   }
   virtual double scan_time()
   { return ulonglong2double(stats.data_file_length) / IO_SIZE + 2; }
   virtual double read_time(uint index, uint ranges, ha_rows rows)
   { return rows2double(ranges+rows); }
+
+  /**
+    Calculate cost of 'keyread' scan for given index and number of records.
+
+     @param index    index to read
+     @param ranges   #of ranges to read
+     @param rows     #of records to read
+  */
+  virtual double keyread_time(uint index, uint ranges, ha_rows rows);
+
   virtual const key_map *keys_to_use_for_scanning() { return &key_map_empty; }
   bool has_transactions()
   { return (ha_table_flags() & HA_NO_TRANSACTIONS) == 0; }
@@ -1518,8 +2040,13 @@ public:
     as there may be several calls to this routine.
   */
   virtual void column_bitmaps_signal();
-  uint get_index(void) const { return active_index; }
-  virtual int close(void)=0;
+  /*
+    We have to check for inited as some engines, like innodb, sets
+    active_index during table scan.
+  */
+  uint get_index(void) const
+  { return inited == INDEX ? active_index : MAX_KEY; }
+  int ha_close(void);
 
   /**
     @retval  0   Bulk update used by handler
@@ -1565,22 +2092,23 @@ public:
   }
   /**
      @brief
-     Positions an index cursor to the index specified in the handle. Fetches the
-     row if available. If the key value is null, begin at the first key of the
-     index.
+     Positions an index cursor to the index specified in the
+     handle. Fetches the row if available. If the key value is null,
+     begin at the first key of the index.
   */
+protected:
   virtual int index_read_map(uchar * buf, const uchar * key,
                              key_part_map keypart_map,
                              enum ha_rkey_function find_flag)
   {
     uint key_len= calculate_key_len(table, active_index, key, keypart_map);
-    return  index_read(buf, key, key_len, find_flag);
+    return index_read(buf, key, key_len, find_flag);
   }
   /**
      @brief
-     Positions an index cursor to the index specified in the handle. Fetches the
-     row if available. If the key value is null, begin at the first key of the
-     index.
+     Positions an index cursor to the index specified in the
+     handle. Fetches the row if available. If the key value is null,
+     begin at the first key of the index.
   */
   virtual int index_read_idx_map(uchar * buf, uint index, const uchar * key,
                                  key_part_map keypart_map,
@@ -1594,30 +2122,85 @@ public:
   virtual int index_last(uchar * buf)
    { return  HA_ERR_WRONG_COMMAND; }
   virtual int index_next_same(uchar *buf, const uchar *key, uint keylen);
-  /**
-     @brief
-     The following functions works like index_read, but it find the last
-     row with the current key value or prefix.
-  */
-  virtual int index_read_last_map(uchar * buf, const uchar * key,
-                                  key_part_map keypart_map)
+  virtual int close(void)=0;
+  inline void update_rows_read()
   {
-    uint key_len= calculate_key_len(table, active_index, key, keypart_map);
-    return index_read_last(buf, key, key_len);
+    if (likely(!internal_tmp_table))
+      rows_read++;
+    else
+      rows_tmp_read++;
+  }
+  inline void update_index_statistics()
+  {
+    index_rows_read[active_index]++;
+    update_rows_read();
   }
-  virtual int read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
-                                     KEY_MULTI_RANGE *ranges, uint range_count,
-                                     bool sorted, HANDLER_BUFFER *buffer);
-  virtual int read_multi_range_next(KEY_MULTI_RANGE **found_range_p);
+public:
+
+  /* Similar functions like the above, but does statistics counting */
+  inline int ha_index_read_map(uchar * buf, const uchar * key,
+                               key_part_map keypart_map,
+                               enum ha_rkey_function find_flag);
+  inline int ha_index_read_idx_map(uchar * buf, uint index, const uchar * key,
+                                   key_part_map keypart_map,
+                                   enum ha_rkey_function find_flag);
+  inline int ha_index_next(uchar * buf);
+  inline int ha_index_prev(uchar * buf);
+  inline int ha_index_first(uchar * buf);
+  inline int ha_index_last(uchar * buf);
+  inline int ha_index_next_same(uchar *buf, const uchar *key, uint keylen);
+  /*
+    TODO: should we make for those functions non-virtual ha_func_name wrappers,
+    too?
+  */
+  virtual ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+                                              void *seq_init_param, 
+                                              uint n_ranges, uint *bufsz,
+                                              uint *mrr_mode, COST_VECT *cost);
+  virtual ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+                                        uint key_parts, uint *bufsz, 
+                                        uint *mrr_mode, COST_VECT *cost);
+  virtual int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
+                                    uint n_ranges, uint mrr_mode, 
+                                    HANDLER_BUFFER *buf);
+  virtual int multi_range_read_next(range_id_t *range_info);
+  /*
+    Return string representation of the MRR plan.
+
+    This is intended to be used for EXPLAIN, via the following scenario:
+    1. SQL layer calls handler->multi_range_read_info().
+    1.1. Storage engine figures out whether it will use some non-default
+         MRR strategy, sets appropritate bits in *mrr_mode, and returns 
+         control to SQL layer
+    2. SQL layer remembers the returned mrr_mode
+    3. SQL layer compares various options and choses the final query plan. As
+       a part of that, it makes a choice of whether to use the MRR strategy
+       picked in 1.1
+    4. EXPLAIN code converts the query plan to its text representation. If MRR
+       strategy is part of the plan, it calls
+       multi_range_read_explain_info(mrr_mode) to get a text representation of
+       the picked MRR strategy.
+
+    @param mrr_mode   Mode which was returned by multi_range_read_info[_const]
+    @param str        INOUT string to be printed for EXPLAIN
+    @param str_end    End of the string buffer. The function is free to put the 
+                      string into [str..str_end] memory range.
+  */
+  virtual int multi_range_read_explain_info(uint mrr_mode, char *str, 
+                                            size_t size)
+  { return 0; }
+
   virtual int read_range_first(const key_range *start_key,
                                const key_range *end_key,
                                bool eq_range, bool sorted);
   virtual int read_range_next();
   int compare_key(key_range *range);
+  int compare_key2(key_range *range);
   virtual int ft_init() { return HA_ERR_WRONG_COMMAND; }
   void ft_end() { ft_handler=NULL; }
   virtual FT_INFO *ft_init_ext(uint flags, uint inx,String *key)
     { return NULL; }
+private:
   virtual int ft_read(uchar *buf) { return HA_ERR_WRONG_COMMAND; }
   virtual int rnd_next(uchar *buf)=0;
   virtual int rnd_pos(uchar * buf, uchar *pos)=0;
@@ -1627,20 +2210,33 @@ public:
     It will return the row with the PK given in the record argument.
   */
   virtual int rnd_pos_by_record(uchar *record)
-    {
-      position(record);
-      return rnd_pos(record, ref);
-    }
+  {
+    position(record);
+    return rnd_pos(record, ref);
+  }
   virtual int read_first_row(uchar *buf, uint primary_key);
+public:
+
+  /* Same as above, but with statistics */
+  inline int ha_ft_read(uchar *buf);
+  inline int ha_rnd_next(uchar *buf);
+  inline int ha_rnd_pos(uchar *buf, uchar *pos);
+  inline int ha_rnd_pos_by_record(uchar *buf);
+  inline int ha_read_first_row(uchar *buf, uint primary_key);
+
   /**
-    The following function is only needed for tables that may be temporary
-    tables during joins.
+    The following 3 function is only needed for tables that may be
+    internal temporary tables during joins.
   */
-  virtual int restart_rnd_next(uchar *buf, uchar *pos)
+  virtual int remember_rnd_pos()
+    { return HA_ERR_WRONG_COMMAND; }
+  virtual int restart_rnd_next(uchar *buf)
     { return HA_ERR_WRONG_COMMAND; }
   virtual int rnd_same(uchar *buf, uint inx)
     { return HA_ERR_WRONG_COMMAND; }
-  virtual ha_rows records_in_range(uint inx, key_range *min_key, key_range *max_key)
+
+  virtual ha_rows records_in_range(uint inx, key_range *min_key,
+                                   key_range *max_key)
     { return (ha_rows) 10; }
   /*
     If HA_PRIMARY_KEY_REQUIRED_FOR_POSITION is set, then it sets ref
@@ -1734,6 +2330,7 @@ public:
   { return(NULL);}  /* gets tablespace name from handler */
   /** used in ALTER TABLE; 1 if changing storage engine is allowed */
   virtual bool can_switch_engines() { return 1; }
+  virtual int can_continue_handler_scan() { return 0; }
   /**
     Get the list of foreign keys in this table.
 
@@ -1781,7 +2378,8 @@ public:
   */
   virtual const char **bas_ext() const =0;
 
-  virtual int get_default_no_partitions(HA_CREATE_INFO *info) { return 1;}
+  virtual int get_default_no_partitions(HA_CREATE_INFO *create_info)
+  { return 1;}
   virtual void set_auto_partitions(partition_info *part_info) { return; }
   virtual bool get_no_parts(const char *name,
                             uint *no_parts)
@@ -1847,18 +2445,18 @@ public:
   virtual uint max_supported_key_part_length() const { return 255; }
   virtual uint min_record_length(uint options) const { return 1; }
 
-  virtual bool low_byte_first() const { return 1; }
   virtual uint checksum() const { return 0; }
   virtual bool is_crashed() const  { return 0; }
-  virtual bool auto_repair() const { return 0; }
+  virtual bool auto_repair(int error) const { return 0; }
 
+  void update_global_table_stats();
+  void update_global_index_stats();
 
 #define CHF_CREATE_FLAG 0
 #define CHF_DELETE_FLAG 1
 #define CHF_RENAME_FLAG 2
 #define CHF_INDEX_FLAG  3
 
-
   /**
     @note lock_count() can return > 1 if the table is MERGE or partitioned.
   */
@@ -1926,9 +2524,28 @@ public:
 
 
  /*
-   @retval TRUE   Primary key (if there is one) is clustered
-                  key covering all fields
-   @retval FALSE  otherwise
+   Check if the primary key (if there is one) is a clustered and a
+   reference key. This means:
+
+   - Data is stored together with the primary key (no secondary lookup
+     needed to find the row data). The optimizer uses this to find out
+     the cost of fetching data.
+   - The primary key is part of each secondary key and is used
+     to find the row data in the primary index when reading trough
+     secondary indexes.
+   - When doing a HA_KEYREAD_ONLY we get also all the primary key parts
+     into the row. This is critical property used by index_merge.
+
+   All the above is usually true for engines that store the row
+   data in the primary key index (e.g. in a b-tree), and use the primary
+   key value as a position().  InnoDB is an example of such an engine.
+
+   For such a clustered primary key, the following should also hold:
+   index_flags() should contain HA_CLUSTERED_INDEX
+   table_flags() should contain HA_TABLE_SCAN_ON_INDEX
+
+   @retval TRUE   yes
+   @retval FALSE  No.
  */
  virtual bool primary_key_is_clustered() { return FALSE; }
  virtual int cmp_ref(const uchar *ref1, const uchar *ref2)
@@ -1969,6 +2586,14 @@ public:
    Pops the top if condition stack, if stack is not empty.
  */
  virtual void cond_pop() { return; };
+ virtual Item *idx_cond_push(uint keyno, Item* idx_cond) { return idx_cond; }
+ /** Reset information about pushed index conditions */
+ virtual void cancel_pushed_idx_cond()
+ {
+   pushed_idx_cond= NULL;
+   pushed_idx_cond_keyno= MAX_KEY;
+   in_range_check_pushed_down= false;
+ }
  virtual bool check_if_incompatible_data(HA_CREATE_INFO *create_info,
 					 uint table_changes)
  { return COMPATIBLE_DATA_NO; }
@@ -1986,9 +2611,26 @@ public:
     return 0;
   }
 
+  LEX_STRING *engine_name() { return hton_name(ht); }
+
+  /*
+    @brief
+    Check whether the engine supports virtual columns
+    
+    @retval
+      FALSE   if the engine does not support virtual columns    
+    @retval
+      TRUE    if the engine supports virtual columns
+  */
+
+  virtual bool check_if_supported_virtual_columns(void) { return FALSE;}
+  
+  TABLE* get_table() { return table; }
 protected:
+  /* deprecated, don't use in new engines */
+  inline void ha_statistic_increment(ulong SSV::*offset) const { }
+
   /* Service methods for use by storage engines. */
-  void ha_statistic_increment(ulong SSV::*offset) const;
   void **ha_data(THD *) const;
   THD *ha_thd(void) const;
 
@@ -2038,10 +2680,18 @@ protected:
     tables.
   */
   virtual int delete_table(const char *name);
+
 private:
   /* Private helpers */
-  inline void mark_trx_read_write();
-private:
+  void mark_trx_read_write_part2();
+  inline void mark_trx_read_write()
+  {
+    if (!mark_trx_done)
+      mark_trx_read_write_part2();
+  }
+  inline void increment_statistics(ulong SSV::*offset) const;
+  inline void decrement_statistics(ulong SSV::*offset) const;
+
   /*
     Low-level primitives for storage engines.  These should be
     overridden by the storage engine class. To call these methods, use
@@ -2049,8 +2699,9 @@ private:
   */
 
   virtual int open(const char *name, int mode, uint test_if_locked)=0;
-  virtual int index_init(uint idx, bool sorted) { active_index= idx; return 0; }
-  virtual int index_end() { active_index= MAX_KEY; return 0; }
+  /* Note: ha_index_read_idx_map() may buypass index_init() */
+  virtual int index_init(uint idx, bool sorted) { return 0; }
+  virtual int index_end() { return 0; }
   /**
     rnd_init() can be called two times without rnd_end() in between
     (it only makes sense if scan=1).
@@ -2131,8 +2782,6 @@ private:
   virtual int index_read(uchar * buf, const uchar * key, uint key_len,
                          enum ha_rkey_function find_flag)
    { return  HA_ERR_WRONG_COMMAND; }
-  virtual int index_read_last(uchar * buf, const uchar * key, uint key_len)
-   { return (my_errno= HA_ERR_WRONG_COMMAND); }
   /**
     This method is similar to update_row, however the handler doesn't need
     to execute the updates at this point in time. The handler can be certain
@@ -2179,14 +2828,16 @@ private:
     @remark The table is locked in exclusive mode.
   */
   virtual int truncate()
-  { return HA_ERR_WRONG_COMMAND; }
+  {
+    int error= delete_all_rows();
+    return error ? error : reset_auto_increment(0);
+  }
   /**
     Reset the auto-increment counter to the given value, i.e. the next row
-    inserted will get the given value. HA_ERR_WRONG_COMMAND is returned by
-    storage engines that don't support this operation.
+    inserted will get the given value.
   */
   virtual int reset_auto_increment(ulonglong value)
-  { return HA_ERR_WRONG_COMMAND; }
+  { return 0; }
   virtual int optimize(THD* thd, HA_CHECK_OPT* check_opt)
   { return HA_ADMIN_NOT_IMPLEMENTED; }
   virtual int analyze(THD* thd, HA_CHECK_OPT* check_opt)
@@ -2215,8 +2866,20 @@ private:
   { return HA_ERR_WRONG_COMMAND; }
   virtual int rename_partitions(const char *path)
   { return HA_ERR_WRONG_COMMAND; }
+  friend class ha_partition;
+public:
+  /* XXX to be removed, see ha_partition::partition_ht() */
+  virtual handlerton *partition_ht() const
+  { return ht; }
+  inline int ha_write_tmp_row(uchar *buf);
+  inline int ha_update_tmp_row(const uchar * old_data, uchar * new_data);
+
+  friend enum icp_result handler_index_cond_check(void* h_arg);
 };
 
+#include "multi_range_read.h"
+
+bool key_uses_partial_cols(TABLE *table, uint keyno);
 
 	/* Some extern variables used with handlers */
 
@@ -2245,7 +2908,7 @@ static inline enum legacy_db_type ha_legacy_type(const handlerton *db_type)
 
 static inline const char *ha_resolve_storage_engine_name(const handlerton *db_type)
 {
-  return db_type == NULL ? "UNKNOWN" : hton2plugin[db_type->slot]->name.str;
+  return db_type == NULL ? "UNKNOWN" : hton_name(db_type)->str;
 }
 
 static inline bool ha_check_storage_engine_flag(const handlerton *db_type, uint32 flag)
@@ -2271,6 +2934,7 @@ int ha_panic(enum ha_panic_function flag);
 void ha_close_connection(THD* thd);
 bool ha_flush_logs(handlerton *db_type);
 void ha_drop_database(char* path);
+void ha_checkpoint_state(bool disable);
 int ha_create_table(THD *thd, const char *path,
                     const char *db, const char *table_name,
                     HA_CREATE_INFO *create_info,
@@ -2290,13 +2954,12 @@ int ha_discover(THD* thd, const char* dbname, const char* name,
 int ha_find_files(THD *thd,const char *db,const char *path,
                   const char *wild, bool dir, List<LEX_STRING>* files);
 int ha_table_exists_in_engine(THD* thd, const char* db, const char* name);
-bool ha_check_if_supported_system_table(handlerton *hton, const char* db, 
-                                        const char* table_name);
 
 /* key cache */
-extern "C" int ha_init_key_cache(const char *name, KEY_CACHE *key_cache);
+extern "C" int ha_init_key_cache(const char *name, KEY_CACHE *key_cache, void *);
 int ha_resize_key_cache(KEY_CACHE *key_cache);
 int ha_change_key_cache_param(KEY_CACHE *key_cache);
+int ha_repartition_key_cache(KEY_CACHE *key_cache);
 int ha_change_key_cache(KEY_CACHE *old_key_cache, KEY_CACHE *new_key_cache);
 
 /* report to InnoDB that control passes to the client */
@@ -2353,10 +3016,8 @@ const char *get_canonical_filename(handler *file, const char *path,
                                    char *tmp_path);
 bool mysql_xa_recover(THD *thd);
 
-
 inline const char *table_case_name(HA_CREATE_INFO *info, const char *name)
 {
   return ((lower_case_table_names == 2 && info->alias) ? info->alias : name);
 }
-
-#endif /* HANDLER_INCLUDED */
+#endif