summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
Diffstat (limited to 'sql')
-rw-r--r--sql/filesort.cc355
-rw-r--r--sql/filesort.h12
-rw-r--r--sql/ha_partition.cc30
-rw-r--r--sql/ha_partition.h1
-rw-r--r--sql/handler.h15
-rw-r--r--sql/records.cc275
-rw-r--r--sql/records.h31
-rw-r--r--sql/sql_class.h1
-rw-r--r--sql/sql_sort.h19
9 files changed, 659 insertions, 80 deletions
diff --git a/sql/filesort.cc b/sql/filesort.cc
index 00dfa08bba8..86626c85ebf 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -55,6 +55,11 @@ static bool write_keys(Sort_param *param, SORT_INFO *fs_info,
uint count, IO_CACHE *buffer_file, IO_CACHE *tempfile);
static void make_sortkey(Sort_param *param, uchar *to, uchar *ref_pos);
static void register_used_fields(Sort_param *param);
+static void register_tmp_table_fields(SORT_INFO *fs_info);
+static int create_fs_tmp_table_if_needed(THD *thd, Sort_param *param,
+ SORT_INFO *fs_info);
+static int write_fs_tmp_table_row(THD *thd, SORT_INFO *fs_info);
+static void free_fs_tmp_table(THD *thd, SORT_INFO *fs_info);
static bool save_index(Sort_param *param, uint count,
SORT_INFO *table_sort);
static uint suffix_length(ulong string_length);
@@ -63,7 +68,8 @@ static uint sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length,
static SORT_ADDON_FIELD *get_addon_fields(ulong max_length_for_sort_data,
Field **ptabfield,
uint sortlength,
- LEX_STRING *addon_buf);
+ LEX_STRING *addon_buf,
+ uint *ptmp_fields);
static void unpack_addon_fields(struct st_sort_addon_field *addon_field,
uchar *buff, uchar *buff_end);
static bool check_if_pq_applicable(Sort_param *param, SORT_INFO *info,
@@ -72,7 +78,8 @@ static bool check_if_pq_applicable(Sort_param *param, SORT_INFO *info,
void Sort_param::init_for_filesort(uint sortlen, TABLE *table,
ulong max_length_for_sort_data,
- ha_rows maxrows, bool sort_positions)
+ ha_rows maxrows, bool sort_positions,
+ uint *tmp_fields)
{
DBUG_ASSERT(addon_field == 0 && addon_buf.length == 0);
@@ -86,7 +93,8 @@ void Sort_param::init_for_filesort(uint sortlen, TABLE *table,
to sorted fields and get its total length in addon_buf.length
*/
addon_field= get_addon_fields(max_length_for_sort_data,
- table->field, sort_length, &addon_buf);
+ table->field, sort_length, &addon_buf,
+ tmp_fields);
}
if (addon_field)
{
@@ -189,10 +197,11 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort,
sort->found_rows= HA_POS_ERROR;
param.init_for_filesort(sortlength(thd, filesort->sortorder, s_length,
- &multi_byte_charset),
+ &multi_byte_charset),
table,
thd->variables.max_length_for_sort_data,
- max_rows, filesort->sort_positions);
+ max_rows, filesort->sort_positions,
+ &sort->tmp_fields);
sort->addon_buf= param.addon_buf;
sort->addon_field= param.addon_field;
@@ -273,7 +282,7 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort,
num_rows= find_all_keys(thd, &param, select,
sort,
&buffpek_pointers,
- &tempfile,
+ &tempfile,
pq.is_initialized() ? &pq : NULL,
&sort->found_rows);
if (num_rows == HA_POS_ERROR)
@@ -345,7 +354,7 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort,
}
error= 0;
- err:
+err:
my_free(param.tmp_buffer);
if (!subselect || !subselect->is_uncacheable())
{
@@ -700,7 +709,7 @@ static void dbug_print_record(TABLE *table, bool print_rowid)
static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select,
SORT_INFO *fs_info,
- IO_CACHE *buffpek_pointers,
+ IO_CACHE *buffpek_pointers,
IO_CACHE *tempfile,
Bounded_queue<uchar, uchar> *pq,
ha_rows *found_rows)
@@ -709,8 +718,10 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select,
uint idx,indexpos,ref_length;
uchar *ref_pos,*next_pos,ref_buff[MAX_REFLENGTH];
my_off_t record;
+ TABLE *fs_tmp_table;
TABLE *sort_form;
handler *file;
+ handler *ref_file;
MY_BITMAP *save_read_set, *save_write_set, *save_vcol_set;
Item *sort_cond;
ha_rows retval;
@@ -728,9 +739,24 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select,
quick_select=select && select->quick;
record=0;
*found_rows= 0;
+
+ if (!quick_select)
+ {
+ /*
+ Potentially create a temp table to avoid rnd_pos() calls on the
+ table to be sorted
+ */
+ if (create_fs_tmp_table_if_needed(thd, param, fs_info))
+ goto err;
+ fs_tmp_table= fs_info->fs_tmp_table;
+ }
+ else
+ fs_tmp_table= NULL;
+ ref_file= (fs_tmp_table ? fs_tmp_table->file : file);
+
flag= ((file->ha_table_flags() & HA_REC_NOT_IN_SEQ) || quick_select);
if (flag)
- ref_pos= &file->ref[0];
+ ref_pos= &ref_file->ref[0];
next_pos=ref_pos;
DBUG_EXECUTE_IF("show_explain_in_find_all_keys",
@@ -760,6 +786,8 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select,
register_used_fields(param);
if (quick_select)
select->quick->add_used_key_part_to_set();
+ else
+ register_tmp_table_fields(fs_info);
sort_cond= (!select ? 0 :
(!select->pre_idx_push_select_cond ?
@@ -786,18 +814,25 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select,
}
else /* Not quick-select */
{
+ error= file->ha_rnd_next(sort_form->record[0]);
+ if (!flag)
{
- error= file->ha_rnd_next(sort_form->record[0]);
- if (!flag)
- {
- my_store_ptr(ref_pos,ref_length,record); // Position to row
- record+= sort_form->s->db_record_offset;
- }
- else if (!error)
- file->position(sort_form->record[0]);
+ my_store_ptr(ref_pos,ref_length,record); // Position to row
+ record+= sort_form->s->db_record_offset;
+ }
+ else if (!error)
+ {
+ /*
+ If filesort is using a temp table, write the row to the temp table,
+ and save its row position
+ */
+ if (fs_tmp_table)
+ error= write_fs_tmp_table_row(thd, fs_info);
+ else
+ file->position(sort_form->record[0]);
}
if (error && error != HA_ERR_RECORD_DELETED)
- break;
+ break;
}
if (thd->check_killed())
@@ -904,7 +939,19 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select,
DBUG_RETURN(retval);
err:
+ if (!quick_select)
+ {
+ if (file->inited)
+ {
+ (void)file->extra(HA_EXTRA_NO_CACHE); /* End cacheing of records */
+ if (!next_pos)
+ file->ha_rnd_end();
+ }
+ if (fs_tmp_table && fs_tmp_table->file->inited)
+ fs_tmp_table->file->ha_rnd_end();
+ }
sort_form->column_bitmaps_set(save_read_set, save_write_set, save_vcol_set);
+ free_fs_tmp_table(thd, fs_info);
DBUG_RETURN(HA_POS_ERROR);
} /* find_all_keys */
@@ -1295,6 +1342,199 @@ static void register_used_fields(Sort_param *param)
}
+/**
+ Register the filesort temp table fields in the sorted table's read set
+
+ @param fs_info Filesort information that includes the filesort
+ temp table and an array of its fields.
+*/
+
+static void register_tmp_table_fields(SORT_INFO *fs_info)
+{
+ DBUG_ENTER("register_tmp_table_fields");
+
+ if (fs_info->fs_tmp_table)
+ {
+ Copy_field *tmp_field= fs_info->tmp_field;
+
+ for (; tmp_field->from_field; tmp_field++)
+ {
+ /* Register the corresponding field in the original table */
+ bitmap_fast_test_and_set(tmp_field->from_field->table->read_set,
+ tmp_field->from_field->field_index);
+ }
+ }
+
+ DBUG_VOID_RETURN;
+}
+
+
+/**
+ Potentially create a filesort temp table to avoid rnd_pos() calls on the
+ table to be sorted
+
+ @param param Sort information and parameters.
+ @param fs_info Filesort information that includes the filesort
+ temp table and an array of its fields.
+
+ @retval
+ 0 Temp table creation succeeded or temp table is
+ unnecessary.
+ @retval
+ 1 Memory allocation failed or temp table creation failed.
+*/
+
+static int create_fs_tmp_table_if_needed(THD *thd, Sort_param *param,
+ SORT_INFO *fs_info)
+{
+ TABLE *table= param->sort_form;
+ DBUG_ENTER("create_fs_tmp_table_if_needed");
+
+ if (fs_info->tmp_fields && table->file->ha_is_rnd_pos_expensive())
+ {
+ /*
+ Create a filesort temp table to avoid expensive rnd_pos() calls
+ on the table to be sorted
+ */
+ Copy_field *tmp_field;
+ List<Item> tmp_field_list;
+ Item_field *item_field;
+ Field **pfield;
+ Field *field;
+
+ /* Allocate memory for the temp table field array */
+ tmp_field= (Copy_field *)
+ my_malloc(sizeof(Copy_field) * (fs_info->tmp_fields + 1),
+ MYF(MY_WME | MY_THREAD_SPECIFIC));
+ if (!tmp_field)
+ DBUG_RETURN(1);
+ fs_info->tmp_field= tmp_field;
+
+ /* Initialize the field array elements */
+ for (pfield= table->field; (field= *pfield); pfield++)
+ {
+ if (!bitmap_is_set(table->read_set, field->field_index))
+ continue;
+ /*
+ All fields referenced in the query are to be written
+ to the temp table
+ */
+ tmp_field->from_field= field;
+ tmp_field++;
+ }
+ tmp_field->from_field= 0; // Put end marker
+
+ /* Create the temp table field list */
+ for (tmp_field= fs_info->tmp_field; tmp_field->from_field; tmp_field++)
+ {
+ item_field= new (thd->mem_root) Item_field(thd, tmp_field->from_field);
+ if (tmp_field_list.push_back(item_field, thd->mem_root))
+ {
+ free_fs_tmp_table(thd, fs_info);
+ DBUG_RETURN(1);
+ }
+ }
+
+ TMP_TABLE_PARAM tmp_table_param;
+ tmp_table_param.init();
+ tmp_table_param.field_count= fs_info->tmp_fields;
+ tmp_table_param.table_charset= table->s->table_charset;
+ tmp_table_param.skip_create_table= TRUE;
+
+ /* Create the filesort temp table */
+ TABLE *fs_tmp_table= create_tmp_table(thd, &tmp_table_param,
+ tmp_field_list,
+ NULL,
+ FALSE,
+ FALSE,
+ thd->variables.option_bits |
+ TMP_TABLE_ALL_COLUMNS,
+ param->max_rows, &empty_clex_str,
+ FALSE, FALSE);
+
+ if (!fs_tmp_table)
+ {
+ free_fs_tmp_table(thd, fs_info);
+ DBUG_RETURN(1);
+ }
+
+ /* Fill in the pointers to the temp table fields in the field array */
+ for (tmp_field= fs_info->tmp_field, pfield= fs_tmp_table->field;
+ (field= *pfield);
+ tmp_field++, pfield++)
+ tmp_field->set(field, tmp_field->from_field, FALSE);
+ fs_info->fs_tmp_table= fs_tmp_table;
+
+ /* Fix up the sort buffer parameters */
+ param->update_ref_length(fs_tmp_table->file->ref_length);
+
+ fs_tmp_table->prepare_for_position();
+ }
+
+ DBUG_RETURN(0);
+}
+
+
+/**
+ Copy column values from the current row of the table being sorted
+ to the current filesort temp table row. Write the row to the
+ filesort temp table.
+
+ @param fs_info Filesort information that includes the filesort
+ temp table and an array of its fields.
+
+ @retval
+ 0 Temp table row was created and successfully written.
+ @retval
+ <> 0 Temp table write failed.
+*/
+
+static int write_fs_tmp_table_row(THD *thd, SORT_INFO *fs_info)
+{
+ TABLE *fs_tmp_table= fs_info->fs_tmp_table;
+ Copy_field *tmp_field;
+ int error;
+ DBUG_ENTER("write_fs_tmp_table_row");
+
+ /*
+ Copy each column value present in the temp table
+ from the table being sorted
+ */
+ for (tmp_field= fs_info->tmp_field; tmp_field->from_field; tmp_field++)
+ tmp_field->do_copy(tmp_field);
+
+ /* Write the temp table row */
+ error= fs_tmp_table->file->ha_write_tmp_row(fs_tmp_table->record[0]);
+ if (error)
+ DBUG_RETURN(error);
+
+ /* Save the written row's position in the temp table */
+ fs_tmp_table->file->position(fs_tmp_table->record[0]);
+ DBUG_RETURN(0);
+}
+
+
+/**
+ Free the filesort temp table and its information structures.
+
+ @param thd Thread handle.
+ @param fs_info Filesort information that includes the filesort
+ temp table and an array of its fields.
+*/
+
+static void free_fs_tmp_table(THD *thd, SORT_INFO *fs_info)
+{
+ if (fs_info->fs_tmp_table)
+ {
+ free_tmp_table(thd, fs_info->fs_tmp_table);
+ fs_info->fs_tmp_table= NULL;
+ }
+ my_free(fs_info->tmp_field);
+ fs_info->tmp_field= NULL;
+ fs_info->tmp_fields= 0;
+}
+
+
static bool save_index(Sort_param *param, uint count,
SORT_INFO *table_sort)
{
@@ -2010,6 +2250,8 @@ sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length,
@param ptabfield Array of references to the table fields
@param sortlength Total length of sorted fields
@param [out] addon_buf Buffer to us for appended fields
+ @param [out] ptmp_fields Pointer to the number of temp table fields,
+ if any
@note
The null bits for the appended values are supposed to be put together
@@ -2023,20 +2265,25 @@ sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length,
static SORT_ADDON_FIELD *
get_addon_fields(ulong max_length_for_sort_data,
- Field **ptabfield, uint sortlength, LEX_STRING *addon_buf)
+ Field **ptabfield, uint sortlength, LEX_STRING *addon_buf,
+ uint *ptmp_fields)
{
Field **pfield;
Field *field;
- SORT_ADDON_FIELD *addonf;
- uint length= 0;
- uint fields= 0;
+ SORT_ADDON_FIELD *addonf= NULL;
+ uint addon_length= 0;
+ uint addon_fields= 0;
uint null_fields= 0;
+ uint nonaddon_fields= 0;
+ bool has_blob_field= FALSE;
MY_BITMAP *read_set= (*ptabfield)->table->read_set;
DBUG_ENTER("get_addon_fields");
/*
- If there is a reference to a field in the query add it
- to the the set of appended fields.
+ If there is a reference to a field in the query that is not a blob/text
+ field, add it to the the set of appended fields.
+ We cannot use addons if there is a blob/text field.
+ All referenced fields are written to the temp table.
Note for future refinement:
This this a too strong condition.
Actually we need only the fields referred in the
@@ -2051,34 +2298,52 @@ get_addon_fields(ulong max_length_for_sort_data,
{
if (!bitmap_is_set(read_set, field->field_index))
continue;
- if (field->flags & BLOB_FLAG)
- DBUG_RETURN(0);
- length+= field->max_packed_col_length(field->pack_length());
- if (field->maybe_null())
- null_fields++;
- fields++;
- }
- if (!fields)
- DBUG_RETURN(0);
- length+= (null_fields+7)/8;
+ if (has_blob_field)
+ nonaddon_fields++;
+ else if (field->flags & BLOB_FLAG)
+ {
+ has_blob_field= TRUE;
+ nonaddon_fields= (addon_fields + 1);
+ null_fields= 0;
+ addon_fields= 0;
+ addon_length= 0;
+ }
+ else
+ {
+ addon_length+= field->max_packed_col_length(field->pack_length());
+ addon_fields++;
+ if (field->maybe_null())
+ null_fields++;
+ }
+ }
+ if (nonaddon_fields)
+ *ptmp_fields= addon_fields + nonaddon_fields; // Total number of fields
+ else
+ *ptmp_fields= 0; // Temp table is unnecessary
+ if (!addon_fields)
+ DBUG_RETURN(NULL);
- if (length+sortlength > max_length_for_sort_data ||
+ addon_length+= (null_fields+7)/8;
+
+ if (addon_length+sortlength > max_length_for_sort_data ||
!my_multi_malloc(MYF(MY_WME | MY_THREAD_SPECIFIC),
- &addonf, sizeof(SORT_ADDON_FIELD) * (fields+1),
- &addon_buf->str, length,
+ &addonf, sizeof(SORT_ADDON_FIELD) * (addon_fields+1),
+ &addon_buf->str, addon_length,
NullS))
+ {
+ *ptmp_fields= addon_fields + nonaddon_fields; // Total number of fields
+ DBUG_RETURN(NULL);
+ }
- DBUG_RETURN(0);
-
- addon_buf->length= length;
- length= (null_fields+7)/8;
+ addon_buf->length= addon_length;
+ addon_length= (null_fields+7)/8;
null_fields= 0;
for (pfield= ptabfield; (field= *pfield) ; pfield++)
{
if (!bitmap_is_set(read_set, field->field_index))
continue;
addonf->field= field;
- addonf->offset= length;
+ addonf->offset= addon_length;
if (field->maybe_null())
{
addonf->null_offset= null_fields/8;
@@ -2091,13 +2356,13 @@ get_addon_fields(ulong max_length_for_sort_data,
addonf->null_bit= 0;
}
addonf->length= field->max_packed_col_length(field->pack_length());
- length+= addonf->length;
+ addon_length+= addonf->length;
addonf++;
}
addonf->field= 0; // Put end marker
- DBUG_PRINT("info",("addon_length: %d",length));
- DBUG_RETURN(addonf-fields);
+ DBUG_PRINT("info",("addon_length: %d",addon_length));
+ DBUG_RETURN(addonf-addon_fields);
}
diff --git a/sql/filesort.h b/sql/filesort.h
index bd1d81f91ef..60b21ffab47 100644
--- a/sql/filesort.h
+++ b/sql/filesort.h
@@ -27,6 +27,7 @@ class Filesort_tracker;
struct SORT_FIELD;
typedef struct st_order ORDER;
class JOIN;
+class Copy_field;
/**
@@ -87,7 +88,8 @@ class SORT_INFO
public:
SORT_INFO()
- :addon_field(0), record_pointers(0)
+ :addon_field(0), record_pointers(0),
+ fs_tmp_table(NULL), tmp_field(NULL), tmp_fields(0)
{
buffpek.str= 0;
my_b_clear(&io_cache);
@@ -101,6 +103,9 @@ public:
my_free(record_pointers);
my_free(buffpek.str);
my_free(addon_field);
+ fs_tmp_table= NULL; // Freed in end_read_record()
+ tmp_field= NULL; // Freed in end_read_record()
+ tmp_fields= 0;
}
void reset()
@@ -119,6 +124,11 @@ public:
/* To unpack back */
void (*unpack)(struct st_sort_addon_field *, uchar *, uchar *);
uchar *record_pointers; /* If sorted in memory */
+ TABLE *fs_tmp_table; /* Optional temp table used by filesort to */
+ /* eliminate rnd_pos() calls to the table
+ /* being sorted */
+ Copy_field *tmp_field; /* Filesort temp table field array */
+ uint tmp_fields; /* Number of filesort temp table fields */
/*
How many rows in final result.
Also how many rows in record_pointers, if used
diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc
index 4ec6f3dfa38..852bcefed3c 100644
--- a/sql/ha_partition.cc
+++ b/sql/ha_partition.cc
@@ -5185,6 +5185,36 @@ int ha_partition::rnd_pos_by_record(uchar *record)
}
+/*
+ Determine whether a call to rnd_pos() is expensive
+
+ SYNOPSIS
+ is_rnd_pos_expensive()
+
+ RETURN VALUE
+ FALSE No inherent inefficiencies in rnd_pos()
+ TRUE rnd_pos() call is inefficient
+
+ DESCRIPTION
+ Some engines, such as Spider, have an inefficient implementation of
+ rnd_pos(), because they need to do a remote access to fetch the
+ single table row. Determine whether the rnd_pos() implementation
+ for any of the partitions is expensive.
+*/
+
+bool ha_partition::is_rnd_pos_expensive()
+{
+ DBUG_ENTER("ha_partition::is_rnd_pos_expensive");
+ uint i;
+
+ for (i= 0; i < m_tot_parts; i++)
+ if (m_file[i]->ha_is_rnd_pos_expensive())
+ DBUG_RETURN(TRUE);
+
+ DBUG_RETURN(FALSE);
+}
+
+
/****************************************************************************
MODULE index scan
****************************************************************************/
diff --git a/sql/ha_partition.h b/sql/ha_partition.h
index 30dd24b6014..1d3f9c14c79 100644
--- a/sql/ha_partition.h
+++ b/sql/ha_partition.h
@@ -691,6 +691,7 @@ public:
virtual int rnd_next(uchar * buf);
virtual int rnd_pos(uchar * buf, uchar * pos);
virtual int rnd_pos_by_record(uchar *record);
+ virtual bool is_rnd_pos_expensive();
virtual void position(const uchar * record);
/*
diff --git a/sql/handler.h b/sql/handler.h
index a96e98c2f84..98e02eaa244 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -3536,6 +3536,17 @@ public:
position(record);
return rnd_pos(record, ref);
}
+ /**
+ Some engines are unable to provide an efficient implementation
+ for rnd_pos(). Spider is such an engine, as a call to rnd_pos()
+ needs to access a table on a remote data node to retrieve the
+ single table row.
+ */
+ virtual bool is_rnd_pos_expensive()
+ {
+ /* Engine's rnd_pos() implementation has no inherent inefficiencies */
+ return FALSE;
+ }
virtual int read_first_row(uchar *buf, uint primary_key);
public:
@@ -3545,6 +3556,10 @@ public:
int ha_rnd_next(uchar *buf);
int ha_rnd_pos(uchar *buf, uchar *pos);
inline int ha_rnd_pos_by_record(uchar *buf);
+ inline bool ha_is_rnd_pos_expensive()
+ {
+ return is_rnd_pos_expensive();
+ }
inline int ha_read_first_row(uchar *buf, uint primary_key);
/**
diff --git a/sql/records.cc b/sql/records.cc
index ac84ca84ab6..b016cd97783 100644
--- a/sql/records.cc
+++ b/sql/records.cc
@@ -38,16 +38,21 @@
static int rr_quick(READ_RECORD *info);
int rr_sequential(READ_RECORD *info);
static int rr_from_tempfile(READ_RECORD *info);
+static int rr_from_tempfile_and_copy(READ_RECORD *info);
static int rr_unpack_from_tempfile(READ_RECORD *info);
static int rr_unpack_from_buffer(READ_RECORD *info);
int rr_from_pointers(READ_RECORD *info);
+int rr_from_pointers_and_copy(READ_RECORD *info);
static int rr_from_cache(READ_RECORD *info);
+static int rr_from_cache_and_copy(READ_RECORD *info);
static int init_rr_cache(THD *thd, READ_RECORD *info);
static int rr_cmp(uchar *a,uchar *b);
static int rr_index_first(READ_RECORD *info);
static int rr_index_last(READ_RECORD *info);
static int rr_index(READ_RECORD *info);
static int rr_index_desc(READ_RECORD *info);
+static int init_copy(READ_RECORD *info);
+static void end_copy(READ_RECORD *info);
/**
@@ -77,6 +82,11 @@ bool init_read_record_idx(READ_RECORD *info, THD *thd, TABLE *table,
bzero((char*) info,sizeof(*info));
info->thd= thd;
info->table= table;
+ info->copy_table= NULL;
+ info->tmp_field= NULL;
+ info->tmp_fields= 0;
+ info->free_tmp_table= FALSE;
+ info->addon_field= NULL;
info->record= table->record[0];
info->print_error= print_error;
info->unlock_row= rr_unlock_row;
@@ -188,13 +198,39 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
bool disable_rr_cache)
{
IO_CACHE *tempfile;
- SORT_ADDON_FIELD *addon_field= filesort ? filesort->addon_field : 0;
+ SORT_ADDON_FIELD *addon_field;
+ bool has_fs_tmp_table;
DBUG_ENTER("init_read_record");
bzero((char*) info,sizeof(*info));
info->thd=thd;
+ if (filesort)
+ {
+ if (filesort->fs_tmp_table)
+ {
+ has_fs_tmp_table= TRUE;
+ info->copy_table= table;
+ table= filesort->fs_tmp_table;
+ }
+ else
+ {
+ has_fs_tmp_table= FALSE;
+ info->copy_table= NULL;
+ }
+ info->tmp_field= filesort->tmp_field;
+ info->tmp_fields= filesort->tmp_fields;
+ addon_field= filesort->addon_field;
+ }
+ else
+ {
+ has_fs_tmp_table= FALSE;
+ info->copy_table= NULL;
+ info->tmp_field= NULL;
+ info->tmp_fields= 0;
+ addon_field= NULL;
+ }
+ info->free_tmp_table= has_fs_tmp_table;
info->table=table;
- info->forms= &info->table; /* Only one table */
info->addon_field= addon_field;
if ((table->s->tmp_table == INTERNAL_TMP_TABLE ||
@@ -230,13 +266,18 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
{
DBUG_PRINT("info",("using rr_from_tempfile"));
info->read_record_func=
- addon_field ? rr_unpack_from_tempfile : rr_from_tempfile;
+ addon_field ? rr_unpack_from_tempfile :
+ has_fs_tmp_table ? rr_from_tempfile_and_copy :
+ rr_from_tempfile;
info->io_cache= tempfile;
reinit_io_cache(info->io_cache,READ_CACHE,0L,0,0);
- info->ref_pos=table->file->ref;
+ info->ref_pos= table->file->ref;
if (!table->file->inited)
if (table->file->ha_rnd_init_with_error(0))
DBUG_RETURN(1);
+ if (has_fs_tmp_table)
+ if (init_copy(info))
+ DBUG_RETURN(1);
/*
addon_field is checked because if we use addon fields,
@@ -245,22 +286,26 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
*/
if (!disable_rr_cache &&
!addon_field &&
- thd->variables.read_rnd_buff_size &&
- !(table->file->ha_table_flags() & HA_FAST_KEY_READ) &&
- (table->db_stat & HA_READ_ONLY ||
- table->reginfo.lock_type <= TL_READ_NO_INSERT) &&
- (ulonglong) table->s->reclength* (table->file->stats.records+
- table->file->stats.deleted) >
- (ulonglong) MIN_FILE_LENGTH_TO_USE_ROW_CACHE &&
- info->io_cache->end_of_file/info->ref_length * table->s->reclength >
- (my_off_t) MIN_ROWS_TO_USE_TABLE_CACHE &&
- !table->s->blob_fields &&
+ thd->variables.read_rnd_buff_size &&
+ !(table->file->ha_table_flags() & HA_FAST_KEY_READ) &&
+ (table->db_stat & HA_READ_ONLY ||
+ table->reginfo.lock_type <= TL_READ_NO_INSERT) &&
+ (ulonglong) table->s->reclength*
+ (table->file->stats.records+
+ table->file->stats.deleted) >
+ (ulonglong) MIN_FILE_LENGTH_TO_USE_ROW_CACHE &&
+ info->io_cache->end_of_file/info->ref_length *
+ table->s->reclength >
+ (my_off_t) MIN_ROWS_TO_USE_TABLE_CACHE &&
+ !table->s->blob_fields &&
info->ref_length <= MAX_REFLENGTH)
{
if (! init_rr_cache(thd, info))
{
- DBUG_PRINT("info",("using rr_from_cache"));
- info->read_record_func= rr_from_cache;
+ info->read_record_func=
+ has_fs_tmp_table ? rr_from_cache_and_copy :
+ rr_from_cache;
+ DBUG_PRINT("info",("using rr_from_cache"));
}
}
}
@@ -272,13 +317,19 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
else if (filesort && filesort->record_pointers)
{
DBUG_PRINT("info",("using record_pointers"));
- if (table->file->ha_rnd_init_with_error(0))
- DBUG_RETURN(1);
+ if (!table->file->inited)
+ if (table->file->ha_rnd_init_with_error(0))
+ DBUG_RETURN(1);
info->cache_pos= filesort->record_pointers;
info->cache_end= (info->cache_pos+
filesort->return_rows * info->ref_length);
info->read_record_func=
- addon_field ? rr_unpack_from_buffer : rr_from_pointers;
+ addon_field ? rr_unpack_from_buffer :
+ has_fs_tmp_table ? rr_from_pointers_and_copy :
+ rr_from_pointers;
+ if (has_fs_tmp_table)
+ if (init_copy(info))
+ DBUG_RETURN(1);
}
else if (table->file->keyread_enabled())
{
@@ -300,11 +351,11 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
DBUG_RETURN(1);
/* We can use record cache if we don't update dynamic length tables */
if (!table->no_cache &&
- (use_record_cache > 0 ||
- (int) table->reginfo.lock_type <= (int) TL_READ_HIGH_PRIORITY ||
- !(table->s->db_options_in_use & HA_OPTION_PACK_RECORD) ||
- (use_record_cache < 0 &&
- !(table->file->ha_table_flags() & HA_NOT_DELETE_WITH_CACHE))))
+ (use_record_cache > 0 ||
+ (int) table->reginfo.lock_type <= (int) TL_READ_HIGH_PRIORITY ||
+ !(table->s->db_options_in_use & HA_OPTION_PACK_RECORD) ||
+ (use_record_cache < 0 &&
+ !(table->file->ha_table_flags() & HA_NOT_DELETE_WITH_CACHE))))
(void) table->file->extra_opt(HA_EXTRA_CACHE,
thd->variables.read_buff_size);
}
@@ -333,6 +384,15 @@ void end_read_record(READ_RECORD *info)
(void) info->table->file->extra(HA_EXTRA_NO_CACHE);
if (info->read_record_func != rr_quick) // otherwise quick_range does it
(void) info->table->file->ha_index_or_rnd_end();
+ if (info->free_tmp_table)
+ {
+ free_tmp_table(info->thd, info->table);
+ end_copy(info);
+ my_free(info->tmp_field);
+ info->tmp_field= NULL;
+ info->tmp_fields= 0;
+ info->free_tmp_table= FALSE;
+ }
info->table=0;
}
}
@@ -521,7 +581,35 @@ static int rr_from_tempfile(READ_RECORD *info)
/**
Read a result set record from a temporary file after sorting.
- The function first reads the next sorted record from the temporary file.
+ The function first reads the next sorted record from the temporary file
+ into a buffer. If successful, it copies the fields to the
+ table being sorted.
+
+ @param info Reference to the context including record descriptors
+
+ @retval
+ 0 Record successfully read.
+ @retval
+ -1 No more records to read or record read failed.
+*/
+
+int rr_from_tempfile_and_copy(READ_RECORD *info)
+{
+ int error;
+ if ((error= rr_from_tempfile(info)))
+ return error;
+
+ for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++)
+ (*cp->do_copy)(cp);
+
+ return error;
+}
+
+
+/**
+ Read a result set record from a temporary file after sorting.
+
+ The function first reads the next sorted record from the temporary file
into a buffer. If a success it calls a callback function that unpacks
the fields values use in the result set from this buffer into their
positions in the regular record buffer.
@@ -569,6 +657,35 @@ int rr_from_pointers(READ_RECORD *info)
return tmp;
}
+
+/**
+ Read a result set record from a temporary file after sorting.
+
+ The function first reads the next sorted record from the temporary file
+ into a buffer. If successful, it copies the fields to the
+ table being sorted.
+
+ @param info Reference to the context including record descriptors
+
+ @retval
+ 0 Record successfully read.
+ @retval
+ -1 No more records to read or record read failed.
+*/
+
+int rr_from_pointers_and_copy(READ_RECORD *info)
+{
+ int error;
+ if ((error= rr_from_pointers(info)))
+ return error;
+
+ for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++)
+ (*cp->do_copy)(cp);
+
+ return error;
+}
+
+
/**
Read a result set record from a buffer after sorting.
@@ -703,6 +820,114 @@ static int rr_from_cache(READ_RECORD *info)
} /* rr_from_cache */
+/**
+ Read a result set record from cache after sorting.
+
+ The function first reads the next sorted record from cache.
+ If successful, it copies the fields to the table being sorted.
+
+ @param info Reference to the context including record descriptors
+
+ @retval
+ 0 Record successfully read.
+ @retval
+ -1 No more records to read or record read failed.
+*/
+
+int rr_from_cache_and_copy(READ_RECORD *info)
+{
+ int error;
+ if ((error= rr_from_cache(info)))
+ return error;
+
+ for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++)
+ (*cp->do_copy)(cp);
+
+ return error;
+}
+
+
+/**
+ Set up for copying the fields of the current row
+ from the filesort temp table to the table being sorted.
+
+ @param info Reference to the context including record descriptors
+
+ @retval
+ 0 Success.
+ @retval
+ 1 Memory allocation failure.
+*/
+
+static int init_copy(READ_RECORD *info)
+{
+ TABLE *table= info->copy_table;
+ Copy_field *tmp_field;
+ Copy_field *copy_field;
+ MY_BITMAP *write_set;
+ my_bitmap_map *column_bitmap= NULL;
+
+ /* Allocate the memory for the copy_field descriptors */
+ copy_field= (Copy_field *) my_malloc(sizeof(Copy_field) * info->tmp_fields,
+ MYF(MY_WME | MY_THREAD_SPECIFIC));
+ if (!copy_field)
+ return 1;
+ info->copy_field= copy_field;
+
+ /* Allocate the memory for the updated table write set */
+ if (!(write_set= (MY_BITMAP *)
+ my_malloc(sizeof(MY_BITMAP),
+ MYF(MY_WME | MY_THREAD_SPECIFIC))))
+ {
+ my_free(copy_field);
+ return 1;
+ }
+ /* Initialize the column bitmap for the updated table write set */
+ my_bitmap_init(write_set, column_bitmap, table->s->fields, FALSE);
+ info->save_write_set= table->write_set;
+ table->column_bitmaps_set_no_signal(table->read_set, write_set);
+
+ /*
+ Each column value present in the temp table needs to be copied
+ to the table being sorted
+ */
+ for (tmp_field= info->tmp_field; tmp_field->from_field; tmp_field++)
+ {
+ bitmap_fast_test_and_set(table->write_set,
+ tmp_field->from_field->field_index);
+ copy_field->set(tmp_field->from_field, tmp_field->to_field, FALSE);
+ copy_field++;
+ }
+ table->file->column_bitmaps_signal();
+ info->copy_field_end= copy_field;
+
+ return 0;
+}
+
+
+/**
+ Do cleanup at the completion of copying field values from the
+ filesort temp table to the table being sorted.
+
+ @param info Reference to the context including record descriptors
+*/
+
+static void end_copy(READ_RECORD *info)
+{
+ TABLE *table= info->copy_table;
+ MY_BITMAP *write_set= table->write_set;
+
+ table->column_bitmaps_set(table->read_set, info->save_write_set);
+
+ my_bitmap_free(write_set);
+ my_free(write_set);
+ my_free(info->copy_field);
+ info->copy_table= NULL;
+ info->save_write_set= NULL;
+ info->copy_field= info->copy_field_end= NULL;
+}
+
+
static int rr_cmp(uchar *a,uchar *b)
{
if (a[0] != b[0])
diff --git a/sql/records.h b/sql/records.h
index 940c88ca0c7..0ffd08e320c 100644
--- a/sql/records.h
+++ b/sql/records.h
@@ -27,6 +27,8 @@ class SQL_SELECT;
class Copy_field;
class SORT_INFO;
+#include "my_bitmap.h"
+
struct READ_RECORD;
void end_read_record(READ_RECORD *info);
@@ -53,7 +55,9 @@ struct READ_RECORD
TABLE *table; /* Head-form */
//handler *file;
- TABLE **forms; /* head and ref forms */
+ TABLE *copy_table; /* Original table that a */
+ /* filesort temp table */
+ /* row is copied to */
Unlock_row_func unlock_row;
Read_func read_record_func;
THD *thd;
@@ -61,25 +65,38 @@ struct READ_RECORD
uint cache_records;
uint ref_length,struct_length,reclength,rec_cache_size,error_offset;
uint index;
- uchar *ref_pos; /* pointer to form->refpos */
+ uint tmp_fields; /* Number of filesort temp
+ table fields */
+ MY_BITMAP *save_write_set; /* Original write set when */
+ /* records are read from /*
+ /* the filesort temp table */
+ /* and copied to the */
+ /* original table */
+ uchar *ref_pos; /* Pointer to form->refpos */
uchar *record;
uchar *rec_buf; /* to read field values after filesort */
uchar *cache,*cache_pos,*cache_end,*read_positions;
- struct st_sort_addon_field *addon_field; /* Pointer to the fields info */
+ struct st_sort_addon_field *addon_field; /* Pointer to the fields info */
+ Copy_field *tmp_field; /* Filesort temp table */
+ /* field array */
struct st_io_cache *io_cache;
- bool print_error, ignore_not_found_rows;
+ bool print_error, ignore_not_found_rows, free_tmp_table;
void (*unpack)(struct st_sort_addon_field *, uchar *, uchar *);
int read_record() { return read_record_func(this); }
- /*
+ /*
SJ-Materialization runtime may need to read fields from the materialized
- table and unpack them into original table fields:
+ table and unpack them into original table fields.
+ Read following a filesort may need to read fields from its temp table
+ and unpack them into the corresponding original table fields.
*/
Copy_field *copy_field;
Copy_field *copy_field_end;
+
public:
- READ_RECORD() : table(NULL), cache(NULL) {}
+ READ_RECORD()
+ : table(NULL), cache(NULL), copy_field(NULL), copy_field_end(NULL) {}
~READ_RECORD() { end_read_record(this); }
};
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 4722f3f5989..df333ae87cb 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -5767,7 +5767,6 @@ class user_var_entry
user_var_entry *get_variable(HASH *hash, LEX_CSTRING *name,
bool create_if_not_exists);
-class SORT_INFO;
class multi_delete :public select_result_interceptor
{
TABLE_LIST *delete_tables, *table_being_deleted;
diff --git a/sql/sql_sort.h b/sql/sql_sort.h
index d57239671a8..2ca9c383035 100644
--- a/sql/sql_sort.h
+++ b/sql/sql_sort.h
@@ -93,7 +93,24 @@ public:
}
void init_for_filesort(uint sortlen, TABLE *table,
ulong max_length_for_sort_data,
- ha_rows maxrows, bool sort_positions);
+ ha_rows maxrows, bool sort_positions,
+ uint *tmp_fields);
+ void update_ref_length(uint new_ref_length)
+ {
+ if (!addon_field)
+ {
+ if (ref_length)
+ {
+ res_length-= ref_length;
+ sort_length-= ref_length;
+ rec_length-= ref_length;
+ }
+ res_length+= new_ref_length;
+ sort_length+= new_ref_length;
+ rec_length+= new_ref_length;
+ ref_length= new_ref_length;
+ }
+ }
};