summaryrefslogtreecommitdiff
path: root/sql/handler.h
diff options
context:
space:
mode:
Diffstat (limited to 'sql/handler.h')
-rw-r--r--sql/handler.h967
1 files changed, 814 insertions, 153 deletions
diff --git a/sql/handler.h b/sql/handler.h
index 0b970a1349d..ee1731af563 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -1,8 +1,8 @@
#ifndef HANDLER_INCLUDED
#define HANDLER_INCLUDED
-
/*
- Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+ Copyright (c) 2000, 2011, Oracle and/or its affiliates.
+ Copyright (c) 2009-2011 Monty Program Ab
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
@@ -36,6 +36,10 @@
#include <ft_global.h>
#include <keycache.h>
+#if MAX_KEY > 128
+#error MAX_KEY is too large. Values up to 128 are supported.
+#endif
+
// the following is for checking tables
#define HA_ADMIN_ALREADY_DONE 1
@@ -116,7 +120,8 @@
#define HA_CAN_FULLTEXT (1 << 21)
#define HA_CAN_SQL_HANDLER (1 << 22)
#define HA_NO_AUTO_INCREMENT (1 << 23)
-#define HA_HAS_CHECKSUM (1 << 24)
+/* Has automatic checksums and uses the old checksum format */
+#define HA_HAS_OLD_CHECKSUM (1 << 24)
/* Table data are stored in separate files (for lower_case_table_names) */
#define HA_FILE_BASED (1 << 26)
#define HA_NO_VARCHAR (1 << 27)
@@ -156,6 +161,7 @@
ordered.
*/
#define HA_DUPLICATE_KEY_NOT_IN_ORDER (LL(1) << 36)
+
/*
Engine supports REPAIR TABLE. Used by CHECK TABLE FOR UPGRADE if an
incompatible table is detected. If this flag is set, CHECK TABLE FOR UPGRADE
@@ -163,6 +169,25 @@
*/
#define HA_CAN_REPAIR (LL(1) << 37)
+/* Has automatic checksums and uses the new checksum format */
+#define HA_HAS_NEW_CHECKSUM (LL(1) << 38)
+#define HA_CAN_VIRTUAL_COLUMNS (LL(1) << 39)
+#define HA_MRR_CANT_SORT (LL(1) << 40)
+#define HA_RECORD_MUST_BE_CLEAN_ON_WRITE (LL(1) << 41)
+
+/*
+ Table condition pushdown must be performed regardless of
+ 'engine_condition_pushdown' setting.
+
+ This flag is aimed at storage engines that come with "special" predicates
+ that can only be evaluated inside the storage engine.
+ For example, when one does
+ select * from sphinx_table where query='{fulltext_query}'
+ then the "query=..." condition must be always pushed down into storage
+ engine.
+*/
+#define HA_MUST_USE_TABLE_CONDITION_PUSHDOWN (LL(1) << 42)
+
/*
Set of all binlog flags. Currently only contain the capabilities
flags.
@@ -178,6 +203,18 @@
#define HA_KEYREAD_ONLY 64 /* Support HA_EXTRA_KEYREAD */
/*
+ Index scan will not return records in rowid order. Not guaranteed to be
+ set for unordered (e.g. HASH) indexes.
+*/
+#define HA_KEY_SCAN_NOT_ROR 128
+#define HA_DO_INDEX_COND_PUSHDOWN 256 /* Supports Index Condition Pushdown */
+/*
+ Data is clustered on this key. This means that when you read the key
+ you also get the row data without any additional disk reads.
+*/
+#define HA_CLUSTERED_INDEX 512
+
+/*
bits in alter_table_flags:
*/
/*
@@ -235,12 +272,6 @@
#define HA_FAST_CHANGE_PARTITION (1L << 13)
#define HA_PARTITION_ONE_PHASE (1L << 14)
-/*
- Index scan will not return records in rowid order. Not guaranteed to be
- set for unordered (e.g. HASH) indexes.
-*/
-#define HA_KEY_SCAN_NOT_ROR 128
-
/* operations for disable/enable indexes */
#define HA_KEY_SWITCH_NONUNIQ 0
#define HA_KEY_SWITCH_ALL 1
@@ -287,8 +318,6 @@
#define HA_LEX_CREATE_TMP_TABLE 1
#define HA_LEX_CREATE_IF_NOT_EXISTS 2
#define HA_LEX_CREATE_TABLE_LIKE 4
-#define HA_OPTION_NO_CHECKSUM (1L << 17)
-#define HA_OPTION_NO_DELAY_KEY_WRITE (1L << 18)
#define HA_MAX_REC_LENGTH 65535
/* Table caching type */
@@ -329,6 +358,11 @@ enum legacy_db_type
DB_TYPE_FIRST_DYNAMIC=42,
DB_TYPE_DEFAULT=127 // Must be last
};
+/*
+ Better name for DB_TYPE_UNKNOWN. Should be used for engines that do not have
+ a hard-coded type value here.
+ */
+#define DB_TYPE_AUTOASSIGN DB_TYPE_UNKNOWN
enum row_type { ROW_TYPE_NOT_USED=-1, ROW_TYPE_DEFAULT, ROW_TYPE_FIXED,
ROW_TYPE_DYNAMIC, ROW_TYPE_COMPRESSED,
@@ -377,30 +411,10 @@ enum enum_binlog_command {
#define HA_CREATE_USED_PASSWORD (1L << 17)
#define HA_CREATE_USED_CONNECTION (1L << 18)
#define HA_CREATE_USED_KEY_BLOCK_SIZE (1L << 19)
-/** Unused. Reserved for future versions. */
+/* The following two are used by Maria engine: */
#define HA_CREATE_USED_TRANSACTIONAL (1L << 20)
-/** Unused. Reserved for future versions. */
#define HA_CREATE_USED_PAGE_CHECKSUM (1L << 21)
-
-/*
- This is master database for most of system tables. However there
- can be other databases which can hold system tables. Respective
- storage engines define their own system database names.
-*/
-extern const char *mysqld_system_database;
-
-/*
- Structure to hold list of system_database.system_table.
- This is used at both mysqld and storage engine layer.
-*/
-struct st_system_tablename
-{
- const char *db;
- const char *tablename;
-};
-
-
typedef ulonglong my_xid; // this line is the same as in log_event.h
#define MYSQL_XID_PREFIX "MySQLXid"
#define MYSQL_XID_PREFIX_LEN 8 // must be a multiple of 8
@@ -579,6 +593,7 @@ struct TABLE;
enum enum_schema_tables
{
SCH_CHARSETS= 0,
+ SCH_CLIENT_STATS,
SCH_COLLATIONS,
SCH_COLLATION_CHARACTER_SET_APPLICABILITY,
SCH_COLUMNS,
@@ -588,6 +603,8 @@ enum enum_schema_tables
SCH_FILES,
SCH_GLOBAL_STATUS,
SCH_GLOBAL_VARIABLES,
+ SCH_INDEX_STATS,
+ SCH_KEY_CACHES,
SCH_KEY_COLUMN_USAGE,
SCH_OPEN_TABLES,
SCH_PARAMETERS,
@@ -608,8 +625,10 @@ enum enum_schema_tables
SCH_TABLE_CONSTRAINTS,
SCH_TABLE_NAMES,
SCH_TABLE_PRIVILEGES,
+ SCH_TABLE_STATS,
SCH_TRIGGERS,
SCH_USER_PRIVILEGES,
+ SCH_USER_STATS,
SCH_VARIABLES,
SCH_VIEWS
};
@@ -648,6 +667,108 @@ struct handler_log_file_data {
enum log_status status;
};
+/*
+ Definitions for engine-specific table/field/index options in the CREATE TABLE.
+
+ Options are declared with HA_*OPTION_* macros (HA_TOPTION_NUMBER,
+ HA_FOPTION_ENUM, HA_IOPTION_STRING, etc).
+
+ Every macros takes the option name, and the name of the underlying field of
+ the appropriate C structure. The "appropriate C structure" is
+ ha_table_option_struct for table level options,
+ ha_field_option_struct for field level options,
+ ha_index_option_struct for key level options. The engine either
+ defines a structure of this name, or uses #define's to map
+ these "appropriate" names to the actual structure type name.
+
+ ULL options use a ulonglong as the backing store.
+ HA_*OPTION_NUMBER() takes the option name, the structure field name,
+ the default value for the option, min, max, and blk_siz values.
+
+ STRING options use a char* as a backing store.
+ HA_*OPTION_STRING takes the option name and the structure field name.
+ The default value will be 0.
+
+ ENUM options use a uint as a backing store (not enum!!!).
+ HA_*OPTION_ENUM takes the option name, the structure field name,
+ the default value for the option as a number, and a string with the
+ permitted values for this enum - one string with comma separated values,
+ for example: "gzip,bzip2,lzma"
+
+ BOOL options use a bool as a backing store.
+ HA_*OPTION_BOOL takes the option name, the structure field name,
+ and the default value for the option.
+ From the SQL, BOOL options accept YES/NO, ON/OFF, and 1/0.
+
+ The name of the option is limited to 255 bytes,
+ the value (for string options) - to the 32767 bytes.
+
+ See ha_example.cc for an example.
+*/
+
+struct ha_table_option_struct;
+struct ha_field_option_struct;
+struct ha_index_option_struct;
+
+enum ha_option_type { HA_OPTION_TYPE_ULL, /* unsigned long long */
+ HA_OPTION_TYPE_STRING, /* char * */
+ HA_OPTION_TYPE_ENUM, /* uint */
+ HA_OPTION_TYPE_BOOL}; /* bool */
+
+#define HA_xOPTION_NUMBER(name, struc, field, def, min, max, blk_siz) \
+ { HA_OPTION_TYPE_ULL, name, sizeof(name)-1, \
+ offsetof(struc, field), def, min, max, blk_siz, 0 }
+#define HA_xOPTION_STRING(name, struc, field) \
+ { HA_OPTION_TYPE_STRING, name, sizeof(name)-1, \
+ offsetof(struc, field), 0, 0, 0, 0, 0 }
+#define HA_xOPTION_ENUM(name, struc, field, values, def) \
+ { HA_OPTION_TYPE_ENUM, name, sizeof(name)-1, \
+ offsetof(struc, field), def, 0, \
+ sizeof(values)-1, 0, values }
+#define HA_xOPTION_BOOL(name, struc, field, def) \
+ { HA_OPTION_TYPE_BOOL, name, sizeof(name)-1, \
+ offsetof(struc, field), def, 0, 1, 0, 0 }
+#define HA_xOPTION_END { HA_OPTION_TYPE_ULL, 0, 0, 0, 0, 0, 0, 0, 0 }
+
+#define HA_TOPTION_NUMBER(name, field, def, min, max, blk_siz) \
+ HA_xOPTION_NUMBER(name, ha_table_option_struct, field, def, min, max, blk_siz)
+#define HA_TOPTION_STRING(name, field) \
+ HA_xOPTION_STRING(name, ha_table_option_struct, field)
+#define HA_TOPTION_ENUM(name, field, values, def) \
+ HA_xOPTION_ENUM(name, ha_table_option_struct, field, values, def)
+#define HA_TOPTION_BOOL(name, field, def) \
+ HA_xOPTION_BOOL(name, ha_table_option_struct, field, def)
+#define HA_TOPTION_END HA_xOPTION_END
+
+#define HA_FOPTION_NUMBER(name, field, def, min, max, blk_siz) \
+ HA_xOPTION_NUMBER(name, ha_field_option_struct, field, def, min, max, blk_siz)
+#define HA_FOPTION_STRING(name, field) \
+ HA_xOPTION_STRING(name, ha_field_option_struct, field)
+#define HA_FOPTION_ENUM(name, field, values, def) \
+ HA_xOPTION_ENUM(name, ha_field_option_struct, field, values, def)
+#define HA_FOPTION_BOOL(name, field, def) \
+ HA_xOPTION_BOOL(name, ha_field_option_struct, field, def)
+#define HA_FOPTION_END HA_xOPTION_END
+
+#define HA_IOPTION_NUMBER(name, field, def, min, max, blk_siz) \
+ HA_xOPTION_NUMBER(name, ha_index_option_struct, field, def, min, max, blk_siz)
+#define HA_IOPTION_STRING(name, field) \
+ HA_xOPTION_STRING(name, ha_index_option_struct, field)
+#define HA_IOPTION_ENUM(name, field, values, def) \
+ HA_xOPTION_ENUM(name, ha_index_option_struct, field, values, def)
+#define HA_IOPTION_BOOL(name, field, values, def) \
+ HA_xOPTION_BOOL(name, ha_index_option_struct, field, values, def)
+#define HA_IOPTION_END HA_xOPTION_END
+
+typedef struct st_ha_create_table_option {
+ enum ha_option_type type;
+ const char *name;
+ size_t name_length;
+ ptrdiff_t offset;
+ ulonglong def_value;
+ ulonglong min_value, max_value, block_size;
+ const char *values;
+} ha_create_table_option;
enum handler_iterator_type
{
@@ -709,8 +830,9 @@ struct handlerton
SHOW_COMP_OPTION state;
/*
- Historical number used for frm file to determine the correct storage engine.
- This is going away and new engines will just use "name" for this.
+ Historical number used for frm file to determine the correct
+ storage engine. This is going away and new engines will just use
+ "name" for this.
*/
enum legacy_db_type db_type;
/*
@@ -760,12 +882,113 @@ struct handlerton
NOTE 'all' is also false in auto-commit mode where 'end of statement'
and 'real commit' mean the same event.
*/
- int (*commit)(handlerton *hton, THD *thd, bool all);
+ int (*commit)(handlerton *hton, THD *thd, bool all);
+ /*
+ The commit_ordered() method is called prior to the commit() method, after
+ the transaction manager has decided to commit (not rollback) the
+ transaction. Unlike commit(), commit_ordered() is called only when the
+ full transaction is committed, not for each commit of statement
+ transaction in a multi-statement transaction.
+
+ Not that like prepare(), commit_ordered() is only called when 2-phase
+ commit takes place. Ie. when no binary log and only a single engine
+ participates in a transaction, one commit() is called, no
+ commit_ordered(). So engines must be prepared for this.
+
+ The calls to commit_ordered() in multiple parallel transactions is
+ guaranteed to happen in the same order in every participating
+ handler. This can be used to ensure the same commit order among multiple
+ handlers (eg. in table handler and binlog). So if transaction T1 calls
+ into commit_ordered() of handler A before T2, then T1 will also call
+ commit_ordered() of handler B before T2.
+
+ Engines that implement this method should during this call make the
+ transaction visible to other transactions, thereby making the order of
+ transaction commits be defined by the order of commit_ordered() calls.
+
+ The intention is that commit_ordered() should do the minimal amount of
+ work that needs to happen in consistent commit order among handlers. To
+ preserve ordering, calls need to be serialised on a global mutex, so
+ doing any time-consuming or blocking operations in commit_ordered() will
+ limit scalability.
+
+ Handlers can rely on commit_ordered() calls to be serialised (no two
+ calls can run in parallel, so no extra locking on the handler part is
+ required to ensure this).
+
+ Note that commit_ordered() can be called from a different thread than the
+ one handling the transaction! So it can not do anything that depends on
+ thread local storage, in particular it can not call my_error() and
+ friends (instead it can store the error code and delay the call of
+ my_error() to the commit() method).
+
+ Similarly, since commit_ordered() returns void, any return error code
+ must be saved and returned from the commit() method instead.
+
+ The commit_ordered method is optional, and can be left unset if not
+ needed in a particular handler (then there will be no ordering guarantees
+ wrt. other engines and binary log).
+ */
+ void (*commit_ordered)(handlerton *hton, THD *thd, bool all);
int (*rollback)(handlerton *hton, THD *thd, bool all);
int (*prepare)(handlerton *hton, THD *thd, bool all);
+ /*
+ The prepare_ordered method is optional. If set, it will be called after
+ successful prepare() in all handlers participating in 2-phase
+ commit. Like commit_ordered(), it is called only when the full
+ transaction is committed, not for each commit of statement transaction.
+
+ The calls to prepare_ordered() among multiple parallel transactions are
+ ordered consistently with calls to commit_ordered(). This means that
+ calls to prepare_ordered() effectively define the commit order, and that
+ each handler will see the same sequence of transactions calling into
+ prepare_ordered() and commit_ordered().
+
+ Thus, prepare_ordered() can be used to define commit order for handlers
+ that need to do this in the prepare step (like binlog). It can also be
+ used to release transaction's locks early in an order consistent with the
+ order transactions will be eventually committed.
+
+ Like commit_ordered(), prepare_ordered() calls are serialised to maintain
+ ordering, so the intention is that they should execute fast, with only
+ the minimal amount of work needed to define commit order. Handlers can
+ rely on this serialisation, and do not need to do any extra locking to
+ avoid two prepare_ordered() calls running in parallel.
+
+ Like commit_ordered(), prepare_ordered() is not guaranteed to be called
+ in the context of the thread handling the rest of the transaction. So it
+ cannot invoke code that relies on thread local storage, in particular it
+ cannot call my_error().
+
+ prepare_ordered() cannot cause a rollback by returning an error, all
+ possible errors must be handled in prepare() (the prepare_ordered()
+ method returns void). In case of some fatal error, a record of the error
+ must be made internally by the engine and returned from commit() later.
+
+ Note that for user-level XA SQL commands, no consistent ordering among
+ prepare_ordered() and commit_ordered() is guaranteed (as that would
+ require blocking all other commits for an indefinite time).
+
+ When 2-phase commit is not used (eg. only one engine (and no binlog) in
+ transaction), neither prepare() nor prepare_ordered() is called.
+ */
+ void (*prepare_ordered)(handlerton *hton, THD *thd, bool all);
int (*recover)(handlerton *hton, XID *xid_list, uint len);
int (*commit_by_xid)(handlerton *hton, XID *xid);
int (*rollback_by_xid)(handlerton *hton, XID *xid);
+ /*
+ "Disable or enable checkpointing internal to the storage engine. This is
+ used for FLUSH TABLES WITH READ LOCK AND DISABLE CHECKPOINT to ensure that
+ the engine will never start any recovery from a time between
+ FLUSH TABLES ... ; UNLOCK TABLES.
+
+ While checkpointing is disabled, the engine should pause any background
+ write activity (such as tablespace checkpointing) that require consistency
+ between different files (such as transaction log and tablespace files) for
+ crash recovery to succeed. The idea is to use this to make safe
+ multi-volume LVM snapshot backups.
+ */
+ int (*checkpoint_state)(handlerton *hton, bool disabled);
void *(*create_cursor_read_view)(handlerton *hton, THD *thd);
void (*set_cursor_read_view)(handlerton *hton, THD *thd, void *read_view);
void (*close_cursor_read_view)(handlerton *hton, THD *thd, void *read_view);
@@ -820,50 +1043,29 @@ struct handlerton
int (*table_exists_in_engine)(handlerton *hton, THD* thd, const char *db,
const char *name);
- /**
- List of all system tables specific to the SE.
- Array element would look like below,
- { "<database_name>", "<system table name>" },
- The last element MUST be,
- { (const char*)NULL, (const char*)NULL }
-
- @see ha_example_system_tables in ha_example.cc
-
- This interface is optional, so every SE need not implement it.
- */
- const char* (*system_database)();
-
- /**
- Check if the given db.tablename is a system table for this SE.
-
- @param db Database name to check.
- @param table_name table name to check.
- @param is_sql_layer_system_table if the supplied db.table_name is a SQL
- layer system table.
-
- @see example_is_supported_system_table in ha_example.cc
+ uint32 license; /* Flag for Engine License */
+ /*
+ Optional clauses in the CREATE/ALTER TABLE
+ */
+ ha_create_table_option *table_options; // table level options
+ ha_create_table_option *field_options; // these are specified per field
+ ha_create_table_option *index_options; // these are specified per index
- is_sql_layer_system_table is supplied to make more efficient
- checks possible for SEs that support all SQL layer tables.
+};
- This interface is optional, so every SE need not implement it.
- */
- bool (*is_supported_system_table)(const char *db,
- const char *table_name,
- bool is_sql_layer_system_table);
- uint32 license; /* Flag for Engine License */
- void *data; /* Location for engines to keep personal structures */
-};
+inline LEX_STRING *hton_name(const handlerton *hton)
+{
+ return &(hton2plugin[hton->slot]->name);
+}
/* Possible flags of a handlerton (there can be 32 of them) */
#define HTON_NO_FLAGS 0
#define HTON_CLOSE_CURSORS_AT_COMMIT (1 << 0)
#define HTON_ALTER_NOT_SUPPORTED (1 << 1) //Engine does not support alter
-#define HTON_CAN_RECREATE (1 << 2) //Delete all is used fro truncate
+#define HTON_CAN_RECREATE (1 << 2) //Delete all is used for truncate
#define HTON_HIDDEN (1 << 3) //Engine does not appear in lists
-#define HTON_FLUSH_AFTER_RENAME (1 << 4)
#define HTON_NOT_USER_SELECTABLE (1 << 5)
#define HTON_TEMPORARY_NOT_SUPPORTED (1 << 6) //Having temporary tables not supported
#define HTON_SUPPORT_LOG_TABLES (1 << 7) //Engine supports log tables
@@ -909,6 +1111,7 @@ struct THD_TRANS
void reset() { no_2pc= FALSE; modified_non_trans_table= FALSE; }
bool is_empty() const { return ha_list == NULL; }
+ THD_TRANS() {} /* Remove gcc warning */
};
@@ -1019,9 +1222,9 @@ typedef struct {
ulonglong delete_length;
ha_rows records;
ulong mean_rec_length;
- ulong create_time;
- ulong check_time;
- ulong update_time;
+ time_t create_time;
+ time_t check_time;
+ time_t update_time;
ulonglong check_sum;
} PARTITION_STATS;
@@ -1034,7 +1237,7 @@ class partition_info;
struct st_partition_iter;
#define NOT_A_PARTITION_ID ((uint32)-1)
-enum enum_ha_unused { HA_CHOICE_UNDEF, HA_CHOICE_NO, HA_CHOICE_YES };
+enum ha_choice { HA_CHOICE_UNDEF, HA_CHOICE_NO, HA_CHOICE_YES };
typedef struct st_ha_create_information
{
@@ -1066,11 +1269,16 @@ typedef struct st_ha_create_information
uint options; /* OR of HA_CREATE_ options */
uint merge_insert_method;
uint extra_size; /* length of extra data segment */
- enum enum_ha_unused unused1;
- bool frm_only; /* 1 if no ha_create_table() */
- bool varchar; /* 1 if table has a VARCHAR */
- enum ha_storage_media storage_media; /* DEFAULT, DISK or MEMORY */
- enum enum_ha_unused unused2;
+ enum ha_choice transactional;
+ bool frm_only; ///< 1 if no ha_create_table()
+ bool varchar; ///< 1 if table has a VARCHAR
+ enum ha_storage_media storage_media; ///< DEFAULT, DISK or MEMORY
+ enum ha_choice page_checksum; ///< If we have page_checksums
+ engine_option_value *option_list; ///< list of table create options
+ /* the following three are only for ALTER TABLE, check_if_incompatible_data() */
+ ha_table_option_struct *option_struct; ///< structure with parsed table options
+ ha_field_option_struct **fields_option_struct; ///< array of field option structures
+ ha_index_option_struct **indexes_option_struct; ///< array of index option structures
} HA_CREATE_INFO;
@@ -1145,11 +1353,250 @@ typedef struct st_ha_check_opt
st_ha_check_opt() {} /* Remove gcc warning */
uint flags; /* isam layer flags (e.g. for myisamchk) */
uint sql_flags; /* sql layer flags - for something myisamchk cannot do */
- KEY_CACHE *key_cache; /* new key cache when changing key cache */
+ time_t start_time; /* When check/repair starts */
+ KEY_CACHE *key_cache; /* new key cache when changing key cache */
void init();
} HA_CHECK_OPT;
+/********************************************************************************
+ * MRR
+ ********************************************************************************/
+
+typedef void *range_seq_t;
+
+typedef struct st_range_seq_if
+{
+ /*
+ Get key information
+
+ SYNOPSIS
+ get_key_info()
+ init_params The seq_init_param parameter
+ length OUT length of the keys in this range sequence
+ map OUT key_part_map of the keys in this range sequence
+
+ DESCRIPTION
+ This function is set only when using HA_MRR_FIXED_KEY mode. In that mode,
+ all ranges are single-point equality ranges that use the same set of key
+ parts. This function allows the MRR implementation to get the length of
+ a key, and which keyparts it uses.
+ */
+ void (*get_key_info)(void *init_params, uint *length, key_part_map *map);
+
+ /*
+ Initialize the traversal of range sequence
+
+ SYNOPSIS
+ init()
+ init_params The seq_init_param parameter
+ n_ranges The number of ranges obtained
+ flags A combination of HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
+
+ RETURN
+ An opaque value to be used as RANGE_SEQ_IF::next() parameter
+ */
+ range_seq_t (*init)(void *init_params, uint n_ranges, uint flags);
+
+
+ /*
+ Get the next range in the range sequence
+
+ SYNOPSIS
+ next()
+ seq The value returned by RANGE_SEQ_IF::init()
+ range OUT Information about the next range
+
+ RETURN
+ FALSE - Ok, the range structure filled with info about the next range
+ TRUE - No more ranges
+ */
+ bool (*next) (range_seq_t seq, KEY_MULTI_RANGE *range);
+
+ /*
+ Check whether range_info orders to skip the next record
+
+ SYNOPSIS
+ skip_record()
+ seq The value returned by RANGE_SEQ_IF::init()
+ range_info Information about the next range
+ (Ignored if MRR_NO_ASSOCIATION is set)
+ rowid Rowid of the record to be checked (ignored if set to 0)
+
+ RETURN
+ 1 - Record with this range_info and/or this rowid shall be filtered
+ out from the stream of records returned by multi_range_read_next()
+ 0 - The record shall be left in the stream
+ */
+ bool (*skip_record) (range_seq_t seq, range_id_t range_info, uchar *rowid);
+
+ /*
+ Check if the record combination matches the index condition
+ SYNOPSIS
+ skip_index_tuple()
+ seq The value returned by RANGE_SEQ_IF::init()
+ range_info Information about the next range
+
+ RETURN
+ 0 - The record combination satisfies the index condition
+ 1 - Otherwise
+ */
+ bool (*skip_index_tuple) (range_seq_t seq, range_id_t range_info);
+} RANGE_SEQ_IF;
+
+typedef bool (*SKIP_INDEX_TUPLE_FUNC) (range_seq_t seq, range_id_t range_info);
+
+class COST_VECT
+{
+public:
+ double io_count; /* number of I/O */
+ double avg_io_cost; /* cost of an average I/O oper. */
+ double cpu_cost; /* cost of operations in CPU */
+ double mem_cost; /* cost of used memory */
+ double import_cost; /* cost of remote operations */
+
+ enum { IO_COEFF=1 };
+ enum { CPU_COEFF=1 };
+ enum { MEM_COEFF=1 };
+ enum { IMPORT_COEFF=1 };
+
+ COST_VECT() {} // keep gcc happy
+
+ double total_cost()
+ {
+ return IO_COEFF*io_count*avg_io_cost + CPU_COEFF * cpu_cost +
+ MEM_COEFF*mem_cost + IMPORT_COEFF*import_cost;
+ }
+
+ void zero()
+ {
+ avg_io_cost= 1.0;
+ io_count= cpu_cost= mem_cost= import_cost= 0.0;
+ }
+
+ void multiply(double m)
+ {
+ io_count *= m;
+ cpu_cost *= m;
+ import_cost *= m;
+ /* Don't multiply mem_cost */
+ }
+
+ void add(const COST_VECT* cost)
+ {
+ double io_count_sum= io_count + cost->io_count;
+ add_io(cost->io_count, cost->avg_io_cost);
+ io_count= io_count_sum;
+ cpu_cost += cost->cpu_cost;
+ }
+ void add_io(double add_io_cnt, double add_avg_cost)
+ {
+ /* In edge cases add_io_cnt may be zero */
+ if (add_io_cnt > 0)
+ {
+ double io_count_sum= io_count + add_io_cnt;
+ avg_io_cost= (io_count * avg_io_cost +
+ add_io_cnt * add_avg_cost) / io_count_sum;
+ io_count= io_count_sum;
+ }
+ }
+
+ /*
+ To be used when we go from old single value-based cost calculations to
+ the new COST_VECT-based.
+ */
+ void convert_from_cost(double cost)
+ {
+ zero();
+ avg_io_cost= 1.0;
+ io_count= cost;
+ }
+};
+
+void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
+ COST_VECT *cost);
+
+/*
+ Indicates that all scanned ranges will be singlepoint (aka equality) ranges.
+ The ranges may not use the full key but all of them will use the same number
+ of key parts.
+*/
+#define HA_MRR_SINGLE_POINT 1
+#define HA_MRR_FIXED_KEY 2
+
+/*
+ Indicates that RANGE_SEQ_IF::next(&range) doesn't need to fill in the
+ 'range' parameter.
+*/
+#define HA_MRR_NO_ASSOCIATION 4
+
+/*
+ The MRR user will provide ranges in key order, and MRR implementation
+ must return rows in key order.
+*/
+#define HA_MRR_SORTED 8
+
+/* MRR implementation doesn't have to retrieve full records */
+#define HA_MRR_INDEX_ONLY 16
+
+/*
+ The passed memory buffer is of maximum possible size, the caller can't
+ assume larger buffer.
+*/
+#define HA_MRR_LIMITS 32
+
+
+/*
+ Flag set <=> default MRR implementation is used
+ (The choice is made by **_info[_const]() function which may set this
+ flag. SQL layer remembers the flag value and then passes it to
+ multi_read_range_init().
+*/
+#define HA_MRR_USE_DEFAULT_IMPL 64
+
+/*
+ Used only as parameter to multi_range_read_info():
+ Flag set <=> the caller guarantees that the bounds of the scanned ranges
+ will not have NULL values.
+*/
+#define HA_MRR_NO_NULL_ENDPOINTS 128
+
+/*
+ The MRR user has materialized range keys somewhere in the user's buffer.
+ This can be used for optimization of the procedure that sorts these keys
+ since in this case key values don't have to be copied into the MRR buffer.
+
+ In other words, it is guaranteed that after RANGE_SEQ_IF::next() call the
+ pointer in range->start_key.key will point to a key value that will remain
+ there until the end of the MRR scan.
+*/
+#define HA_MRR_MATERIALIZED_KEYS 256
+
+/*
+ The following bits are reserved for use by MRR implementation. The intended
+ use scenario:
+
+ * sql layer calls handler->multi_range_read_info[_const]()
+ - MRR implementation figures out what kind of scan it will perform, saves
+ the result in *mrr_mode parameter.
+ * sql layer remembers what was returned in *mrr_mode
+
+ * the optimizer picks the query plan (which may or may not include the MRR
+ scan that was estimated by the multi_range_read_info[_const] call)
+
+ * if the query is an EXPLAIN statement, sql layer will call
+ handler->multi_range_read_explain_info(mrr_mode) to get a text description
+ of the picked MRR scan; the description will be a part of EXPLAIN output.
+*/
+#define HA_MRR_IMPLEMENTATION_FLAG1 512
+#define HA_MRR_IMPLEMENTATION_FLAG2 1024
+#define HA_MRR_IMPLEMENTATION_FLAG3 2048
+#define HA_MRR_IMPLEMENTATION_FLAG4 4096
+#define HA_MRR_IMPLEMENTATION_FLAG5 8192
+#define HA_MRR_IMPLEMENTATION_FLAG6 16384
+
+#define HA_MRR_IMPLEMENTATION_FLAGS \
+ (512 | 1024 | 2048 | 4096 | 8192 | 16384)
/*
This is a buffer area that the handler can use to store rows.
@@ -1160,8 +1607,8 @@ typedef struct st_ha_check_opt
typedef struct st_handler_buffer
{
- const uchar *buffer; /* Buffer one can start using */
- const uchar *buffer_end; /* End of buffer */
+ /* const? */uchar *buffer; /* Buffer one can start using */
+ /* const? */uchar *buffer_end; /* End of buffer */
uchar *end_of_used_area; /* End of area that was used by handler */
} HANDLER_BUFFER;
@@ -1187,19 +1634,26 @@ public:
ha_rows records;
ha_rows deleted; /* Deleted records */
ulong mean_rec_length; /* physical reclength */
- ulong create_time; /* When table was created */
- ulong check_time;
- ulong update_time;
+ time_t create_time; /* When table was created */
+ time_t check_time;
+ time_t update_time;
uint block_size; /* index block size */
+ /*
+ number of buffer bytes that native mrr implementation needs,
+ */
+ uint mrr_length_per_rec;
+
ha_statistics():
data_file_length(0), max_data_file_length(0),
index_file_length(0), delete_length(0), auto_increment_value(0),
records(0), deleted(0), mean_rec_length(0), create_time(0),
- check_time(0), update_time(0), block_size(0)
+ check_time(0), update_time(0), block_size(0), mrr_length_per_rec(0)
{}
};
+extern "C" enum icp_result handler_index_cond_check(void* h_arg);
+
uint calculate_key_len(TABLE *, uint, const uchar *, key_part_map);
/*
bitmap with first N+1 bits set
@@ -1258,27 +1712,42 @@ public:
ha_statistics stats;
- /** The following are for read_multi_range */
- bool multi_range_sorted;
- KEY_MULTI_RANGE *multi_range_curr;
- KEY_MULTI_RANGE *multi_range_end;
- HANDLER_BUFFER *multi_range_buffer;
+ /** MultiRangeRead-related members: */
+ range_seq_t mrr_iter; /* Interator to traverse the range sequence */
+ RANGE_SEQ_IF mrr_funcs; /* Range sequence traversal functions */
+ HANDLER_BUFFER *multi_range_buffer; /* MRR buffer info */
+ uint ranges_in_seq; /* Total number of ranges in the traversed sequence */
+ /* TRUE <=> source MRR ranges and the output are ordered */
+ bool mrr_is_output_sorted;
+
+ /** TRUE <=> we're currently traversing a range in mrr_cur_range. */
+ bool mrr_have_range;
+ /** Current range (the one we're now returning rows from) */
+ KEY_MULTI_RANGE mrr_cur_range;
/** The following are for read_range() */
key_range save_end_range, *end_range;
KEY_PART_INFO *range_key_part;
int key_compare_result_on_equal;
bool eq_range;
+ bool internal_tmp_table; /* If internal tmp table */
uint errkey; /* Last dup key */
uint key_used_on_scan;
uint active_index;
+ /*
+ TRUE <=> the engine guarantees that returned records are within the range
+ being scanned.
+ */
+ bool in_range_check_pushed_down;
+
/** Length of ref (1-8 or the clustered key length) */
uint ref_length;
FT_INFO *ft_handler;
enum {NONE=0, INDEX, RND} inited;
bool locked;
bool implicit_emptied; /* Can be !=0 only if HEAP */
+ bool mark_trx_done;
const COND *pushed_cond;
/**
next_insert_id is the next value which should be inserted into the
@@ -1301,6 +1770,16 @@ public:
Interval returned by get_auto_increment() and being consumed by the
inserter.
*/
+ /* Statistics variables */
+ ulonglong rows_read;
+ ulonglong rows_tmp_read;
+ ulonglong rows_changed;
+ /* One bigger than needed to avoid to test if key == MAX_KEY */
+ ulonglong index_rows_read[MAX_KEY+1];
+
+ Item *pushed_idx_cond;
+ uint pushed_idx_cond_keyno; /* The index which the above condition is for */
+
Discrete_interval auto_inc_interval_for_cur_row;
/**
Number of reserved auto-increment intervals. Serves as a heuristic
@@ -1327,14 +1806,19 @@ public:
handler(handlerton *ht_arg, TABLE_SHARE *share_arg)
:table_share(share_arg), table(0),
estimation_rows_to_insert(0), ht(ht_arg),
- ref(0), key_used_on_scan(MAX_KEY), active_index(MAX_KEY),
+ ref(0), end_range(NULL), key_used_on_scan(MAX_KEY), active_index(MAX_KEY),
+ in_range_check_pushed_down(FALSE),
ref_length(sizeof(my_off_t)),
ft_handler(0), inited(NONE),
- locked(FALSE), implicit_emptied(0),
+ locked(FALSE), implicit_emptied(0), mark_trx_done(FALSE),
pushed_cond(0), next_insert_id(0), insert_id_for_cur_row(0),
+ pushed_idx_cond(NULL),
+ pushed_idx_cond_keyno(MAX_KEY),
auto_inc_intervals_count(0),
m_psi(NULL)
- {}
+ {
+ reset_statistics();
+ }
virtual ~handler(void)
{
DBUG_ASSERT(locked == FALSE);
@@ -1348,29 +1832,38 @@ public:
}
/* ha_ methods: pubilc wrappers for private virtual API */
- int ha_open(TABLE *table, const char *name, int mode, int test_if_locked);
+ int ha_open(TABLE *table, const char *name, int mode, uint test_if_locked);
int ha_index_init(uint idx, bool sorted)
{
int result;
DBUG_ENTER("ha_index_init");
DBUG_ASSERT(inited==NONE);
if (!(result= index_init(idx, sorted)))
- inited=INDEX;
+ {
+ inited= INDEX;
+ active_index= idx;
+ end_range= NULL;
+ }
DBUG_RETURN(result);
}
int ha_index_end()
{
DBUG_ENTER("ha_index_end");
DBUG_ASSERT(inited==INDEX);
- inited=NONE;
+ inited= NONE;
+ active_index= MAX_KEY;
+ end_range= NULL;
DBUG_RETURN(index_end());
}
- int ha_rnd_init(bool scan)
+ /* This is called after index_init() if we need to do a index scan */
+ virtual int prepare_index_scan() { return 0; }
+ int ha_rnd_init(bool scan) __attribute__ ((warn_unused_result))
{
int result;
DBUG_ENTER("ha_rnd_init");
DBUG_ASSERT(inited==NONE || (inited==RND && scan));
inited= (result= rnd_init(scan)) ? NONE: RND;
+ end_range= NULL;
DBUG_RETURN(result);
}
int ha_rnd_end()
@@ -1378,9 +1871,18 @@ public:
DBUG_ENTER("ha_rnd_end");
DBUG_ASSERT(inited==RND);
inited=NONE;
+ end_range= NULL;
DBUG_RETURN(rnd_end());
}
+ int ha_rnd_init_with_error(bool scan) __attribute__ ((warn_unused_result));
int ha_reset();
+ /* Tell handler (not storage engine) this is start of a new statement */
+ void ha_start_of_new_statement()
+ {
+ ft_handler= 0;
+ mark_trx_done= FALSE;
+ }
+
/* this is necessary in many places, e.g. in HANDLER command */
int ha_index_or_rnd_end()
{
@@ -1409,13 +1911,17 @@ public:
int ha_repair(THD* thd, HA_CHECK_OPT* check_opt);
void ha_start_bulk_insert(ha_rows rows)
{
+ DBUG_ENTER("handler::ha_start_bulk_insert");
estimation_rows_to_insert= rows;
start_bulk_insert(rows);
+ DBUG_VOID_RETURN;
}
int ha_end_bulk_insert()
{
+ DBUG_ENTER("handler::ha_end_bulk_insert");
estimation_rows_to_insert= 0;
- return end_bulk_insert();
+ int ret= end_bulk_insert();
+ DBUG_RETURN(ret);
}
int ha_bulk_update_row(const uchar *old_data, uchar *new_data,
uint *dup_key_found);
@@ -1453,15 +1959,31 @@ public:
virtual void print_error(int error, myf errflag);
virtual bool get_error_message(int error, String *buf);
uint get_dup_key(int error);
+ void reset_statistics()
+ {
+ rows_read= rows_changed= rows_tmp_read= 0;
+ bzero(index_rows_read, sizeof(index_rows_read));
+ }
virtual void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share)
{
table= table_arg;
table_share= share;
+ reset_statistics();
}
virtual double scan_time()
{ return ulonglong2double(stats.data_file_length) / IO_SIZE + 2; }
virtual double read_time(uint index, uint ranges, ha_rows rows)
{ return rows2double(ranges+rows); }
+
+ /**
+ Calculate cost of 'keyread' scan for given index and number of records.
+
+ @param index index to read
+ @param ranges #of ranges to read
+ @param rows #of records to read
+ */
+ virtual double keyread_time(uint index, uint ranges, ha_rows rows);
+
virtual const key_map *keys_to_use_for_scanning() { return &key_map_empty; }
bool has_transactions()
{ return (ha_table_flags() & HA_NO_TRANSACTIONS) == 0; }
@@ -1518,8 +2040,13 @@ public:
as there may be several calls to this routine.
*/
virtual void column_bitmaps_signal();
- uint get_index(void) const { return active_index; }
- virtual int close(void)=0;
+ /*
+ We have to check for inited as some engines, like innodb, sets
+ active_index during table scan.
+ */
+ uint get_index(void) const
+ { return inited == INDEX ? active_index : MAX_KEY; }
+ int ha_close(void);
/**
@retval 0 Bulk update used by handler
@@ -1565,22 +2092,23 @@ public:
}
/**
@brief
- Positions an index cursor to the index specified in the handle. Fetches the
- row if available. If the key value is null, begin at the first key of the
- index.
+ Positions an index cursor to the index specified in the
+ handle. Fetches the row if available. If the key value is null,
+ begin at the first key of the index.
*/
+protected:
virtual int index_read_map(uchar * buf, const uchar * key,
key_part_map keypart_map,
enum ha_rkey_function find_flag)
{
uint key_len= calculate_key_len(table, active_index, key, keypart_map);
- return index_read(buf, key, key_len, find_flag);
+ return index_read(buf, key, key_len, find_flag);
}
/**
@brief
- Positions an index cursor to the index specified in the handle. Fetches the
- row if available. If the key value is null, begin at the first key of the
- index.
+ Positions an index cursor to the index specified in the
+ handle. Fetches the row if available. If the key value is null,
+ begin at the first key of the index.
*/
virtual int index_read_idx_map(uchar * buf, uint index, const uchar * key,
key_part_map keypart_map,
@@ -1594,30 +2122,85 @@ public:
virtual int index_last(uchar * buf)
{ return HA_ERR_WRONG_COMMAND; }
virtual int index_next_same(uchar *buf, const uchar *key, uint keylen);
- /**
- @brief
- The following functions works like index_read, but it find the last
- row with the current key value or prefix.
- */
- virtual int index_read_last_map(uchar * buf, const uchar * key,
- key_part_map keypart_map)
+ virtual int close(void)=0;
+ inline void update_rows_read()
{
- uint key_len= calculate_key_len(table, active_index, key, keypart_map);
- return index_read_last(buf, key, key_len);
+ if (likely(!internal_tmp_table))
+ rows_read++;
+ else
+ rows_tmp_read++;
+ }
+ inline void update_index_statistics()
+ {
+ index_rows_read[active_index]++;
+ update_rows_read();
}
- virtual int read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
- KEY_MULTI_RANGE *ranges, uint range_count,
- bool sorted, HANDLER_BUFFER *buffer);
- virtual int read_multi_range_next(KEY_MULTI_RANGE **found_range_p);
+public:
+
+ /* Similar functions like the above, but does statistics counting */
+ inline int ha_index_read_map(uchar * buf, const uchar * key,
+ key_part_map keypart_map,
+ enum ha_rkey_function find_flag);
+ inline int ha_index_read_idx_map(uchar * buf, uint index, const uchar * key,
+ key_part_map keypart_map,
+ enum ha_rkey_function find_flag);
+ inline int ha_index_next(uchar * buf);
+ inline int ha_index_prev(uchar * buf);
+ inline int ha_index_first(uchar * buf);
+ inline int ha_index_last(uchar * buf);
+ inline int ha_index_next_same(uchar *buf, const uchar *key, uint keylen);
+ /*
+ TODO: should we make for those functions non-virtual ha_func_name wrappers,
+ too?
+ */
+ virtual ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+ void *seq_init_param,
+ uint n_ranges, uint *bufsz,
+ uint *mrr_mode, COST_VECT *cost);
+ virtual ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+ uint key_parts, uint *bufsz,
+ uint *mrr_mode, COST_VECT *cost);
+ virtual int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
+ uint n_ranges, uint mrr_mode,
+ HANDLER_BUFFER *buf);
+ virtual int multi_range_read_next(range_id_t *range_info);
+ /*
+ Return string representation of the MRR plan.
+
+ This is intended to be used for EXPLAIN, via the following scenario:
+ 1. SQL layer calls handler->multi_range_read_info().
+ 1.1. Storage engine figures out whether it will use some non-default
+ MRR strategy, sets appropritate bits in *mrr_mode, and returns
+ control to SQL layer
+ 2. SQL layer remembers the returned mrr_mode
+ 3. SQL layer compares various options and choses the final query plan. As
+ a part of that, it makes a choice of whether to use the MRR strategy
+ picked in 1.1
+ 4. EXPLAIN code converts the query plan to its text representation. If MRR
+ strategy is part of the plan, it calls
+ multi_range_read_explain_info(mrr_mode) to get a text representation of
+ the picked MRR strategy.
+
+ @param mrr_mode Mode which was returned by multi_range_read_info[_const]
+ @param str INOUT string to be printed for EXPLAIN
+ @param str_end End of the string buffer. The function is free to put the
+ string into [str..str_end] memory range.
+ */
+ virtual int multi_range_read_explain_info(uint mrr_mode, char *str,
+ size_t size)
+ { return 0; }
+
virtual int read_range_first(const key_range *start_key,
const key_range *end_key,
bool eq_range, bool sorted);
virtual int read_range_next();
int compare_key(key_range *range);
+ int compare_key2(key_range *range);
virtual int ft_init() { return HA_ERR_WRONG_COMMAND; }
void ft_end() { ft_handler=NULL; }
virtual FT_INFO *ft_init_ext(uint flags, uint inx,String *key)
{ return NULL; }
+private:
virtual int ft_read(uchar *buf) { return HA_ERR_WRONG_COMMAND; }
virtual int rnd_next(uchar *buf)=0;
virtual int rnd_pos(uchar * buf, uchar *pos)=0;
@@ -1627,20 +2210,33 @@ public:
It will return the row with the PK given in the record argument.
*/
virtual int rnd_pos_by_record(uchar *record)
- {
- position(record);
- return rnd_pos(record, ref);
- }
+ {
+ position(record);
+ return rnd_pos(record, ref);
+ }
virtual int read_first_row(uchar *buf, uint primary_key);
+public:
+
+ /* Same as above, but with statistics */
+ inline int ha_ft_read(uchar *buf);
+ inline int ha_rnd_next(uchar *buf);
+ inline int ha_rnd_pos(uchar *buf, uchar *pos);
+ inline int ha_rnd_pos_by_record(uchar *buf);
+ inline int ha_read_first_row(uchar *buf, uint primary_key);
+
/**
- The following function is only needed for tables that may be temporary
- tables during joins.
+ The following 3 function is only needed for tables that may be
+ internal temporary tables during joins.
*/
- virtual int restart_rnd_next(uchar *buf, uchar *pos)
+ virtual int remember_rnd_pos()
+ { return HA_ERR_WRONG_COMMAND; }
+ virtual int restart_rnd_next(uchar *buf)
{ return HA_ERR_WRONG_COMMAND; }
virtual int rnd_same(uchar *buf, uint inx)
{ return HA_ERR_WRONG_COMMAND; }
- virtual ha_rows records_in_range(uint inx, key_range *min_key, key_range *max_key)
+
+ virtual ha_rows records_in_range(uint inx, key_range *min_key,
+ key_range *max_key)
{ return (ha_rows) 10; }
/*
If HA_PRIMARY_KEY_REQUIRED_FOR_POSITION is set, then it sets ref
@@ -1734,6 +2330,7 @@ public:
{ return(NULL);} /* gets tablespace name from handler */
/** used in ALTER TABLE; 1 if changing storage engine is allowed */
virtual bool can_switch_engines() { return 1; }
+ virtual int can_continue_handler_scan() { return 0; }
/**
Get the list of foreign keys in this table.
@@ -1781,7 +2378,8 @@ public:
*/
virtual const char **bas_ext() const =0;
- virtual int get_default_no_partitions(HA_CREATE_INFO *info) { return 1;}
+ virtual int get_default_no_partitions(HA_CREATE_INFO *create_info)
+ { return 1;}
virtual void set_auto_partitions(partition_info *part_info) { return; }
virtual bool get_no_parts(const char *name,
uint *no_parts)
@@ -1847,18 +2445,18 @@ public:
virtual uint max_supported_key_part_length() const { return 255; }
virtual uint min_record_length(uint options) const { return 1; }
- virtual bool low_byte_first() const { return 1; }
virtual uint checksum() const { return 0; }
virtual bool is_crashed() const { return 0; }
- virtual bool auto_repair() const { return 0; }
+ virtual bool auto_repair(int error) const { return 0; }
+ void update_global_table_stats();
+ void update_global_index_stats();
#define CHF_CREATE_FLAG 0
#define CHF_DELETE_FLAG 1
#define CHF_RENAME_FLAG 2
#define CHF_INDEX_FLAG 3
-
/**
@note lock_count() can return > 1 if the table is MERGE or partitioned.
*/
@@ -1926,9 +2524,28 @@ public:
/*
- @retval TRUE Primary key (if there is one) is clustered
- key covering all fields
- @retval FALSE otherwise
+ Check if the primary key (if there is one) is a clustered and a
+ reference key. This means:
+
+ - Data is stored together with the primary key (no secondary lookup
+ needed to find the row data). The optimizer uses this to find out
+ the cost of fetching data.
+ - The primary key is part of each secondary key and is used
+ to find the row data in the primary index when reading trough
+ secondary indexes.
+ - When doing a HA_KEYREAD_ONLY we get also all the primary key parts
+ into the row. This is critical property used by index_merge.
+
+ All the above is usually true for engines that store the row
+ data in the primary key index (e.g. in a b-tree), and use the primary
+ key value as a position(). InnoDB is an example of such an engine.
+
+ For such a clustered primary key, the following should also hold:
+ index_flags() should contain HA_CLUSTERED_INDEX
+ table_flags() should contain HA_TABLE_SCAN_ON_INDEX
+
+ @retval TRUE yes
+ @retval FALSE No.
*/
virtual bool primary_key_is_clustered() { return FALSE; }
virtual int cmp_ref(const uchar *ref1, const uchar *ref2)
@@ -1969,6 +2586,14 @@ public:
Pops the top if condition stack, if stack is not empty.
*/
virtual void cond_pop() { return; };
+ virtual Item *idx_cond_push(uint keyno, Item* idx_cond) { return idx_cond; }
+ /** Reset information about pushed index conditions */
+ virtual void cancel_pushed_idx_cond()
+ {
+ pushed_idx_cond= NULL;
+ pushed_idx_cond_keyno= MAX_KEY;
+ in_range_check_pushed_down= false;
+ }
virtual bool check_if_incompatible_data(HA_CREATE_INFO *create_info,
uint table_changes)
{ return COMPATIBLE_DATA_NO; }
@@ -1986,9 +2611,26 @@ public:
return 0;
}
+ LEX_STRING *engine_name() { return hton_name(ht); }
+
+ /*
+ @brief
+ Check whether the engine supports virtual columns
+
+ @retval
+ FALSE if the engine does not support virtual columns
+ @retval
+ TRUE if the engine supports virtual columns
+ */
+
+ virtual bool check_if_supported_virtual_columns(void) { return FALSE;}
+
+ TABLE* get_table() { return table; }
protected:
+ /* deprecated, don't use in new engines */
+ inline void ha_statistic_increment(ulong SSV::*offset) const { }
+
/* Service methods for use by storage engines. */
- void ha_statistic_increment(ulong SSV::*offset) const;
void **ha_data(THD *) const;
THD *ha_thd(void) const;
@@ -2038,10 +2680,18 @@ protected:
tables.
*/
virtual int delete_table(const char *name);
+
private:
/* Private helpers */
- inline void mark_trx_read_write();
-private:
+ void mark_trx_read_write_part2();
+ inline void mark_trx_read_write()
+ {
+ if (!mark_trx_done)
+ mark_trx_read_write_part2();
+ }
+ inline void increment_statistics(ulong SSV::*offset) const;
+ inline void decrement_statistics(ulong SSV::*offset) const;
+
/*
Low-level primitives for storage engines. These should be
overridden by the storage engine class. To call these methods, use
@@ -2049,8 +2699,9 @@ private:
*/
virtual int open(const char *name, int mode, uint test_if_locked)=0;
- virtual int index_init(uint idx, bool sorted) { active_index= idx; return 0; }
- virtual int index_end() { active_index= MAX_KEY; return 0; }
+ /* Note: ha_index_read_idx_map() may buypass index_init() */
+ virtual int index_init(uint idx, bool sorted) { return 0; }
+ virtual int index_end() { return 0; }
/**
rnd_init() can be called two times without rnd_end() in between
(it only makes sense if scan=1).
@@ -2131,8 +2782,6 @@ private:
virtual int index_read(uchar * buf, const uchar * key, uint key_len,
enum ha_rkey_function find_flag)
{ return HA_ERR_WRONG_COMMAND; }
- virtual int index_read_last(uchar * buf, const uchar * key, uint key_len)
- { return (my_errno= HA_ERR_WRONG_COMMAND); }
/**
This method is similar to update_row, however the handler doesn't need
to execute the updates at this point in time. The handler can be certain
@@ -2179,14 +2828,16 @@ private:
@remark The table is locked in exclusive mode.
*/
virtual int truncate()
- { return HA_ERR_WRONG_COMMAND; }
+ {
+ int error= delete_all_rows();
+ return error ? error : reset_auto_increment(0);
+ }
/**
Reset the auto-increment counter to the given value, i.e. the next row
- inserted will get the given value. HA_ERR_WRONG_COMMAND is returned by
- storage engines that don't support this operation.
+ inserted will get the given value.
*/
virtual int reset_auto_increment(ulonglong value)
- { return HA_ERR_WRONG_COMMAND; }
+ { return 0; }
virtual int optimize(THD* thd, HA_CHECK_OPT* check_opt)
{ return HA_ADMIN_NOT_IMPLEMENTED; }
virtual int analyze(THD* thd, HA_CHECK_OPT* check_opt)
@@ -2215,8 +2866,20 @@ private:
{ return HA_ERR_WRONG_COMMAND; }
virtual int rename_partitions(const char *path)
{ return HA_ERR_WRONG_COMMAND; }
+ friend class ha_partition;
+public:
+ /* XXX to be removed, see ha_partition::partition_ht() */
+ virtual handlerton *partition_ht() const
+ { return ht; }
+ inline int ha_write_tmp_row(uchar *buf);
+ inline int ha_update_tmp_row(const uchar * old_data, uchar * new_data);
+
+ friend enum icp_result handler_index_cond_check(void* h_arg);
};
+#include "multi_range_read.h"
+
+bool key_uses_partial_cols(TABLE *table, uint keyno);
/* Some extern variables used with handlers */
@@ -2245,7 +2908,7 @@ static inline enum legacy_db_type ha_legacy_type(const handlerton *db_type)
static inline const char *ha_resolve_storage_engine_name(const handlerton *db_type)
{
- return db_type == NULL ? "UNKNOWN" : hton2plugin[db_type->slot]->name.str;
+ return db_type == NULL ? "UNKNOWN" : hton_name(db_type)->str;
}
static inline bool ha_check_storage_engine_flag(const handlerton *db_type, uint32 flag)
@@ -2271,6 +2934,7 @@ int ha_panic(enum ha_panic_function flag);
void ha_close_connection(THD* thd);
bool ha_flush_logs(handlerton *db_type);
void ha_drop_database(char* path);
+void ha_checkpoint_state(bool disable);
int ha_create_table(THD *thd, const char *path,
const char *db, const char *table_name,
HA_CREATE_INFO *create_info,
@@ -2290,13 +2954,12 @@ int ha_discover(THD* thd, const char* dbname, const char* name,
int ha_find_files(THD *thd,const char *db,const char *path,
const char *wild, bool dir, List<LEX_STRING>* files);
int ha_table_exists_in_engine(THD* thd, const char* db, const char* name);
-bool ha_check_if_supported_system_table(handlerton *hton, const char* db,
- const char* table_name);
/* key cache */
-extern "C" int ha_init_key_cache(const char *name, KEY_CACHE *key_cache);
+extern "C" int ha_init_key_cache(const char *name, KEY_CACHE *key_cache, void *);
int ha_resize_key_cache(KEY_CACHE *key_cache);
int ha_change_key_cache_param(KEY_CACHE *key_cache);
+int ha_repartition_key_cache(KEY_CACHE *key_cache);
int ha_change_key_cache(KEY_CACHE *old_key_cache, KEY_CACHE *new_key_cache);
/* report to InnoDB that control passes to the client */
@@ -2353,10 +3016,8 @@ const char *get_canonical_filename(handler *file, const char *path,
char *tmp_path);
bool mysql_xa_recover(THD *thd);
-
inline const char *table_case_name(HA_CREATE_INFO *info, const char *name)
{
return ((lower_case_table_names == 2 && info->alias) ? info->alias : name);
}
-
-#endif /* HANDLER_INCLUDED */
+#endif