diff options
Diffstat (limited to 'sql')
-rw-r--r-- | sql/filesort.cc | 355 | ||||
-rw-r--r-- | sql/filesort.h | 12 | ||||
-rw-r--r-- | sql/ha_partition.cc | 30 | ||||
-rw-r--r-- | sql/ha_partition.h | 1 | ||||
-rw-r--r-- | sql/handler.h | 15 | ||||
-rw-r--r-- | sql/records.cc | 275 | ||||
-rw-r--r-- | sql/records.h | 31 | ||||
-rw-r--r-- | sql/sql_class.h | 1 | ||||
-rw-r--r-- | sql/sql_sort.h | 19 |
9 files changed, 659 insertions, 80 deletions
diff --git a/sql/filesort.cc b/sql/filesort.cc index 00dfa08bba8..86626c85ebf 100644 --- a/sql/filesort.cc +++ b/sql/filesort.cc @@ -55,6 +55,11 @@ static bool write_keys(Sort_param *param, SORT_INFO *fs_info, uint count, IO_CACHE *buffer_file, IO_CACHE *tempfile); static void make_sortkey(Sort_param *param, uchar *to, uchar *ref_pos); static void register_used_fields(Sort_param *param); +static void register_tmp_table_fields(SORT_INFO *fs_info); +static int create_fs_tmp_table_if_needed(THD *thd, Sort_param *param, + SORT_INFO *fs_info); +static int write_fs_tmp_table_row(THD *thd, SORT_INFO *fs_info); +static void free_fs_tmp_table(THD *thd, SORT_INFO *fs_info); static bool save_index(Sort_param *param, uint count, SORT_INFO *table_sort); static uint suffix_length(ulong string_length); @@ -63,7 +68,8 @@ static uint sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length, static SORT_ADDON_FIELD *get_addon_fields(ulong max_length_for_sort_data, Field **ptabfield, uint sortlength, - LEX_STRING *addon_buf); + LEX_STRING *addon_buf, + uint *ptmp_fields); static void unpack_addon_fields(struct st_sort_addon_field *addon_field, uchar *buff, uchar *buff_end); static bool check_if_pq_applicable(Sort_param *param, SORT_INFO *info, @@ -72,7 +78,8 @@ static bool check_if_pq_applicable(Sort_param *param, SORT_INFO *info, void Sort_param::init_for_filesort(uint sortlen, TABLE *table, ulong max_length_for_sort_data, - ha_rows maxrows, bool sort_positions) + ha_rows maxrows, bool sort_positions, + uint *tmp_fields) { DBUG_ASSERT(addon_field == 0 && addon_buf.length == 0); @@ -86,7 +93,8 @@ void Sort_param::init_for_filesort(uint sortlen, TABLE *table, to sorted fields and get its total length in addon_buf.length */ addon_field= get_addon_fields(max_length_for_sort_data, - table->field, sort_length, &addon_buf); + table->field, sort_length, &addon_buf, + tmp_fields); } if (addon_field) { @@ -189,10 +197,11 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort, sort->found_rows= HA_POS_ERROR; param.init_for_filesort(sortlength(thd, filesort->sortorder, s_length, - &multi_byte_charset), + &multi_byte_charset), table, thd->variables.max_length_for_sort_data, - max_rows, filesort->sort_positions); + max_rows, filesort->sort_positions, + &sort->tmp_fields); sort->addon_buf= param.addon_buf; sort->addon_field= param.addon_field; @@ -273,7 +282,7 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort, num_rows= find_all_keys(thd, ¶m, select, sort, &buffpek_pointers, - &tempfile, + &tempfile, pq.is_initialized() ? &pq : NULL, &sort->found_rows); if (num_rows == HA_POS_ERROR) @@ -345,7 +354,7 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort, } error= 0; - err: +err: my_free(param.tmp_buffer); if (!subselect || !subselect->is_uncacheable()) { @@ -700,7 +709,7 @@ static void dbug_print_record(TABLE *table, bool print_rowid) static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select, SORT_INFO *fs_info, - IO_CACHE *buffpek_pointers, + IO_CACHE *buffpek_pointers, IO_CACHE *tempfile, Bounded_queue<uchar, uchar> *pq, ha_rows *found_rows) @@ -709,8 +718,10 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select, uint idx,indexpos,ref_length; uchar *ref_pos,*next_pos,ref_buff[MAX_REFLENGTH]; my_off_t record; + TABLE *fs_tmp_table; TABLE *sort_form; handler *file; + handler *ref_file; MY_BITMAP *save_read_set, *save_write_set, *save_vcol_set; Item *sort_cond; ha_rows retval; @@ -728,9 +739,24 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select, quick_select=select && select->quick; record=0; *found_rows= 0; + + if (!quick_select) + { + /* + Potentially create a temp table to avoid rnd_pos() calls on the + table to be sorted + */ + if (create_fs_tmp_table_if_needed(thd, param, fs_info)) + goto err; + fs_tmp_table= fs_info->fs_tmp_table; + } + else + fs_tmp_table= NULL; + ref_file= (fs_tmp_table ? fs_tmp_table->file : file); + flag= ((file->ha_table_flags() & HA_REC_NOT_IN_SEQ) || quick_select); if (flag) - ref_pos= &file->ref[0]; + ref_pos= &ref_file->ref[0]; next_pos=ref_pos; DBUG_EXECUTE_IF("show_explain_in_find_all_keys", @@ -760,6 +786,8 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select, register_used_fields(param); if (quick_select) select->quick->add_used_key_part_to_set(); + else + register_tmp_table_fields(fs_info); sort_cond= (!select ? 0 : (!select->pre_idx_push_select_cond ? @@ -786,18 +814,25 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select, } else /* Not quick-select */ { + error= file->ha_rnd_next(sort_form->record[0]); + if (!flag) { - error= file->ha_rnd_next(sort_form->record[0]); - if (!flag) - { - my_store_ptr(ref_pos,ref_length,record); // Position to row - record+= sort_form->s->db_record_offset; - } - else if (!error) - file->position(sort_form->record[0]); + my_store_ptr(ref_pos,ref_length,record); // Position to row + record+= sort_form->s->db_record_offset; + } + else if (!error) + { + /* + If filesort is using a temp table, write the row to the temp table, + and save its row position + */ + if (fs_tmp_table) + error= write_fs_tmp_table_row(thd, fs_info); + else + file->position(sort_form->record[0]); } if (error && error != HA_ERR_RECORD_DELETED) - break; + break; } if (thd->check_killed()) @@ -904,7 +939,19 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select, DBUG_RETURN(retval); err: + if (!quick_select) + { + if (file->inited) + { + (void)file->extra(HA_EXTRA_NO_CACHE); /* End cacheing of records */ + if (!next_pos) + file->ha_rnd_end(); + } + if (fs_tmp_table && fs_tmp_table->file->inited) + fs_tmp_table->file->ha_rnd_end(); + } sort_form->column_bitmaps_set(save_read_set, save_write_set, save_vcol_set); + free_fs_tmp_table(thd, fs_info); DBUG_RETURN(HA_POS_ERROR); } /* find_all_keys */ @@ -1295,6 +1342,199 @@ static void register_used_fields(Sort_param *param) } +/** + Register the filesort temp table fields in the sorted table's read set + + @param fs_info Filesort information that includes the filesort + temp table and an array of its fields. +*/ + +static void register_tmp_table_fields(SORT_INFO *fs_info) +{ + DBUG_ENTER("register_tmp_table_fields"); + + if (fs_info->fs_tmp_table) + { + Copy_field *tmp_field= fs_info->tmp_field; + + for (; tmp_field->from_field; tmp_field++) + { + /* Register the corresponding field in the original table */ + bitmap_fast_test_and_set(tmp_field->from_field->table->read_set, + tmp_field->from_field->field_index); + } + } + + DBUG_VOID_RETURN; +} + + +/** + Potentially create a filesort temp table to avoid rnd_pos() calls on the + table to be sorted + + @param param Sort information and parameters. + @param fs_info Filesort information that includes the filesort + temp table and an array of its fields. + + @retval + 0 Temp table creation succeeded or temp table is + unnecessary. + @retval + 1 Memory allocation failed or temp table creation failed. +*/ + +static int create_fs_tmp_table_if_needed(THD *thd, Sort_param *param, + SORT_INFO *fs_info) +{ + TABLE *table= param->sort_form; + DBUG_ENTER("create_fs_tmp_table_if_needed"); + + if (fs_info->tmp_fields && table->file->ha_is_rnd_pos_expensive()) + { + /* + Create a filesort temp table to avoid expensive rnd_pos() calls + on the table to be sorted + */ + Copy_field *tmp_field; + List<Item> tmp_field_list; + Item_field *item_field; + Field **pfield; + Field *field; + + /* Allocate memory for the temp table field array */ + tmp_field= (Copy_field *) + my_malloc(sizeof(Copy_field) * (fs_info->tmp_fields + 1), + MYF(MY_WME | MY_THREAD_SPECIFIC)); + if (!tmp_field) + DBUG_RETURN(1); + fs_info->tmp_field= tmp_field; + + /* Initialize the field array elements */ + for (pfield= table->field; (field= *pfield); pfield++) + { + if (!bitmap_is_set(table->read_set, field->field_index)) + continue; + /* + All fields referenced in the query are to be written + to the temp table + */ + tmp_field->from_field= field; + tmp_field++; + } + tmp_field->from_field= 0; // Put end marker + + /* Create the temp table field list */ + for (tmp_field= fs_info->tmp_field; tmp_field->from_field; tmp_field++) + { + item_field= new (thd->mem_root) Item_field(thd, tmp_field->from_field); + if (tmp_field_list.push_back(item_field, thd->mem_root)) + { + free_fs_tmp_table(thd, fs_info); + DBUG_RETURN(1); + } + } + + TMP_TABLE_PARAM tmp_table_param; + tmp_table_param.init(); + tmp_table_param.field_count= fs_info->tmp_fields; + tmp_table_param.table_charset= table->s->table_charset; + tmp_table_param.skip_create_table= TRUE; + + /* Create the filesort temp table */ + TABLE *fs_tmp_table= create_tmp_table(thd, &tmp_table_param, + tmp_field_list, + NULL, + FALSE, + FALSE, + thd->variables.option_bits | + TMP_TABLE_ALL_COLUMNS, + param->max_rows, &empty_clex_str, + FALSE, FALSE); + + if (!fs_tmp_table) + { + free_fs_tmp_table(thd, fs_info); + DBUG_RETURN(1); + } + + /* Fill in the pointers to the temp table fields in the field array */ + for (tmp_field= fs_info->tmp_field, pfield= fs_tmp_table->field; + (field= *pfield); + tmp_field++, pfield++) + tmp_field->set(field, tmp_field->from_field, FALSE); + fs_info->fs_tmp_table= fs_tmp_table; + + /* Fix up the sort buffer parameters */ + param->update_ref_length(fs_tmp_table->file->ref_length); + + fs_tmp_table->prepare_for_position(); + } + + DBUG_RETURN(0); +} + + +/** + Copy column values from the current row of the table being sorted + to the current filesort temp table row. Write the row to the + filesort temp table. + + @param fs_info Filesort information that includes the filesort + temp table and an array of its fields. + + @retval + 0 Temp table row was created and successfully written. + @retval + <> 0 Temp table write failed. +*/ + +static int write_fs_tmp_table_row(THD *thd, SORT_INFO *fs_info) +{ + TABLE *fs_tmp_table= fs_info->fs_tmp_table; + Copy_field *tmp_field; + int error; + DBUG_ENTER("write_fs_tmp_table_row"); + + /* + Copy each column value present in the temp table + from the table being sorted + */ + for (tmp_field= fs_info->tmp_field; tmp_field->from_field; tmp_field++) + tmp_field->do_copy(tmp_field); + + /* Write the temp table row */ + error= fs_tmp_table->file->ha_write_tmp_row(fs_tmp_table->record[0]); + if (error) + DBUG_RETURN(error); + + /* Save the written row's position in the temp table */ + fs_tmp_table->file->position(fs_tmp_table->record[0]); + DBUG_RETURN(0); +} + + +/** + Free the filesort temp table and its information structures. + + @param thd Thread handle. + @param fs_info Filesort information that includes the filesort + temp table and an array of its fields. +*/ + +static void free_fs_tmp_table(THD *thd, SORT_INFO *fs_info) +{ + if (fs_info->fs_tmp_table) + { + free_tmp_table(thd, fs_info->fs_tmp_table); + fs_info->fs_tmp_table= NULL; + } + my_free(fs_info->tmp_field); + fs_info->tmp_field= NULL; + fs_info->tmp_fields= 0; +} + + static bool save_index(Sort_param *param, uint count, SORT_INFO *table_sort) { @@ -2010,6 +2250,8 @@ sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length, @param ptabfield Array of references to the table fields @param sortlength Total length of sorted fields @param [out] addon_buf Buffer to us for appended fields + @param [out] ptmp_fields Pointer to the number of temp table fields, + if any @note The null bits for the appended values are supposed to be put together @@ -2023,20 +2265,25 @@ sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length, static SORT_ADDON_FIELD * get_addon_fields(ulong max_length_for_sort_data, - Field **ptabfield, uint sortlength, LEX_STRING *addon_buf) + Field **ptabfield, uint sortlength, LEX_STRING *addon_buf, + uint *ptmp_fields) { Field **pfield; Field *field; - SORT_ADDON_FIELD *addonf; - uint length= 0; - uint fields= 0; + SORT_ADDON_FIELD *addonf= NULL; + uint addon_length= 0; + uint addon_fields= 0; uint null_fields= 0; + uint nonaddon_fields= 0; + bool has_blob_field= FALSE; MY_BITMAP *read_set= (*ptabfield)->table->read_set; DBUG_ENTER("get_addon_fields"); /* - If there is a reference to a field in the query add it - to the the set of appended fields. + If there is a reference to a field in the query that is not a blob/text + field, add it to the the set of appended fields. + We cannot use addons if there is a blob/text field. + All referenced fields are written to the temp table. Note for future refinement: This this a too strong condition. Actually we need only the fields referred in the @@ -2051,34 +2298,52 @@ get_addon_fields(ulong max_length_for_sort_data, { if (!bitmap_is_set(read_set, field->field_index)) continue; - if (field->flags & BLOB_FLAG) - DBUG_RETURN(0); - length+= field->max_packed_col_length(field->pack_length()); - if (field->maybe_null()) - null_fields++; - fields++; - } - if (!fields) - DBUG_RETURN(0); - length+= (null_fields+7)/8; + if (has_blob_field) + nonaddon_fields++; + else if (field->flags & BLOB_FLAG) + { + has_blob_field= TRUE; + nonaddon_fields= (addon_fields + 1); + null_fields= 0; + addon_fields= 0; + addon_length= 0; + } + else + { + addon_length+= field->max_packed_col_length(field->pack_length()); + addon_fields++; + if (field->maybe_null()) + null_fields++; + } + } + if (nonaddon_fields) + *ptmp_fields= addon_fields + nonaddon_fields; // Total number of fields + else + *ptmp_fields= 0; // Temp table is unnecessary + if (!addon_fields) + DBUG_RETURN(NULL); - if (length+sortlength > max_length_for_sort_data || + addon_length+= (null_fields+7)/8; + + if (addon_length+sortlength > max_length_for_sort_data || !my_multi_malloc(MYF(MY_WME | MY_THREAD_SPECIFIC), - &addonf, sizeof(SORT_ADDON_FIELD) * (fields+1), - &addon_buf->str, length, + &addonf, sizeof(SORT_ADDON_FIELD) * (addon_fields+1), + &addon_buf->str, addon_length, NullS)) + { + *ptmp_fields= addon_fields + nonaddon_fields; // Total number of fields + DBUG_RETURN(NULL); + } - DBUG_RETURN(0); - - addon_buf->length= length; - length= (null_fields+7)/8; + addon_buf->length= addon_length; + addon_length= (null_fields+7)/8; null_fields= 0; for (pfield= ptabfield; (field= *pfield) ; pfield++) { if (!bitmap_is_set(read_set, field->field_index)) continue; addonf->field= field; - addonf->offset= length; + addonf->offset= addon_length; if (field->maybe_null()) { addonf->null_offset= null_fields/8; @@ -2091,13 +2356,13 @@ get_addon_fields(ulong max_length_for_sort_data, addonf->null_bit= 0; } addonf->length= field->max_packed_col_length(field->pack_length()); - length+= addonf->length; + addon_length+= addonf->length; addonf++; } addonf->field= 0; // Put end marker - DBUG_PRINT("info",("addon_length: %d",length)); - DBUG_RETURN(addonf-fields); + DBUG_PRINT("info",("addon_length: %d",addon_length)); + DBUG_RETURN(addonf-addon_fields); } diff --git a/sql/filesort.h b/sql/filesort.h index bd1d81f91ef..60b21ffab47 100644 --- a/sql/filesort.h +++ b/sql/filesort.h @@ -27,6 +27,7 @@ class Filesort_tracker; struct SORT_FIELD; typedef struct st_order ORDER; class JOIN; +class Copy_field; /** @@ -87,7 +88,8 @@ class SORT_INFO public: SORT_INFO() - :addon_field(0), record_pointers(0) + :addon_field(0), record_pointers(0), + fs_tmp_table(NULL), tmp_field(NULL), tmp_fields(0) { buffpek.str= 0; my_b_clear(&io_cache); @@ -101,6 +103,9 @@ public: my_free(record_pointers); my_free(buffpek.str); my_free(addon_field); + fs_tmp_table= NULL; // Freed in end_read_record() + tmp_field= NULL; // Freed in end_read_record() + tmp_fields= 0; } void reset() @@ -119,6 +124,11 @@ public: /* To unpack back */ void (*unpack)(struct st_sort_addon_field *, uchar *, uchar *); uchar *record_pointers; /* If sorted in memory */ + TABLE *fs_tmp_table; /* Optional temp table used by filesort to */ + /* eliminate rnd_pos() calls to the table + /* being sorted */ + Copy_field *tmp_field; /* Filesort temp table field array */ + uint tmp_fields; /* Number of filesort temp table fields */ /* How many rows in final result. Also how many rows in record_pointers, if used diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc index 4ec6f3dfa38..852bcefed3c 100644 --- a/sql/ha_partition.cc +++ b/sql/ha_partition.cc @@ -5185,6 +5185,36 @@ int ha_partition::rnd_pos_by_record(uchar *record) } +/* + Determine whether a call to rnd_pos() is expensive + + SYNOPSIS + is_rnd_pos_expensive() + + RETURN VALUE + FALSE No inherent inefficiencies in rnd_pos() + TRUE rnd_pos() call is inefficient + + DESCRIPTION + Some engines, such as Spider, have an inefficient implementation of + rnd_pos(), because they need to do a remote access to fetch the + single table row. Determine whether the rnd_pos() implementation + for any of the partitions is expensive. +*/ + +bool ha_partition::is_rnd_pos_expensive() +{ + DBUG_ENTER("ha_partition::is_rnd_pos_expensive"); + uint i; + + for (i= 0; i < m_tot_parts; i++) + if (m_file[i]->ha_is_rnd_pos_expensive()) + DBUG_RETURN(TRUE); + + DBUG_RETURN(FALSE); +} + + /**************************************************************************** MODULE index scan ****************************************************************************/ diff --git a/sql/ha_partition.h b/sql/ha_partition.h index 30dd24b6014..1d3f9c14c79 100644 --- a/sql/ha_partition.h +++ b/sql/ha_partition.h @@ -691,6 +691,7 @@ public: virtual int rnd_next(uchar * buf); virtual int rnd_pos(uchar * buf, uchar * pos); virtual int rnd_pos_by_record(uchar *record); + virtual bool is_rnd_pos_expensive(); virtual void position(const uchar * record); /* diff --git a/sql/handler.h b/sql/handler.h index a96e98c2f84..98e02eaa244 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -3536,6 +3536,17 @@ public: position(record); return rnd_pos(record, ref); } + /** + Some engines are unable to provide an efficient implementation + for rnd_pos(). Spider is such an engine, as a call to rnd_pos() + needs to access a table on a remote data node to retrieve the + single table row. + */ + virtual bool is_rnd_pos_expensive() + { + /* Engine's rnd_pos() implementation has no inherent inefficiencies */ + return FALSE; + } virtual int read_first_row(uchar *buf, uint primary_key); public: @@ -3545,6 +3556,10 @@ public: int ha_rnd_next(uchar *buf); int ha_rnd_pos(uchar *buf, uchar *pos); inline int ha_rnd_pos_by_record(uchar *buf); + inline bool ha_is_rnd_pos_expensive() + { + return is_rnd_pos_expensive(); + } inline int ha_read_first_row(uchar *buf, uint primary_key); /** diff --git a/sql/records.cc b/sql/records.cc index ac84ca84ab6..b016cd97783 100644 --- a/sql/records.cc +++ b/sql/records.cc @@ -38,16 +38,21 @@ static int rr_quick(READ_RECORD *info); int rr_sequential(READ_RECORD *info); static int rr_from_tempfile(READ_RECORD *info); +static int rr_from_tempfile_and_copy(READ_RECORD *info); static int rr_unpack_from_tempfile(READ_RECORD *info); static int rr_unpack_from_buffer(READ_RECORD *info); int rr_from_pointers(READ_RECORD *info); +int rr_from_pointers_and_copy(READ_RECORD *info); static int rr_from_cache(READ_RECORD *info); +static int rr_from_cache_and_copy(READ_RECORD *info); static int init_rr_cache(THD *thd, READ_RECORD *info); static int rr_cmp(uchar *a,uchar *b); static int rr_index_first(READ_RECORD *info); static int rr_index_last(READ_RECORD *info); static int rr_index(READ_RECORD *info); static int rr_index_desc(READ_RECORD *info); +static int init_copy(READ_RECORD *info); +static void end_copy(READ_RECORD *info); /** @@ -77,6 +82,11 @@ bool init_read_record_idx(READ_RECORD *info, THD *thd, TABLE *table, bzero((char*) info,sizeof(*info)); info->thd= thd; info->table= table; + info->copy_table= NULL; + info->tmp_field= NULL; + info->tmp_fields= 0; + info->free_tmp_table= FALSE; + info->addon_field= NULL; info->record= table->record[0]; info->print_error= print_error; info->unlock_row= rr_unlock_row; @@ -188,13 +198,39 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table, bool disable_rr_cache) { IO_CACHE *tempfile; - SORT_ADDON_FIELD *addon_field= filesort ? filesort->addon_field : 0; + SORT_ADDON_FIELD *addon_field; + bool has_fs_tmp_table; DBUG_ENTER("init_read_record"); bzero((char*) info,sizeof(*info)); info->thd=thd; + if (filesort) + { + if (filesort->fs_tmp_table) + { + has_fs_tmp_table= TRUE; + info->copy_table= table; + table= filesort->fs_tmp_table; + } + else + { + has_fs_tmp_table= FALSE; + info->copy_table= NULL; + } + info->tmp_field= filesort->tmp_field; + info->tmp_fields= filesort->tmp_fields; + addon_field= filesort->addon_field; + } + else + { + has_fs_tmp_table= FALSE; + info->copy_table= NULL; + info->tmp_field= NULL; + info->tmp_fields= 0; + addon_field= NULL; + } + info->free_tmp_table= has_fs_tmp_table; info->table=table; - info->forms= &info->table; /* Only one table */ info->addon_field= addon_field; if ((table->s->tmp_table == INTERNAL_TMP_TABLE || @@ -230,13 +266,18 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table, { DBUG_PRINT("info",("using rr_from_tempfile")); info->read_record_func= - addon_field ? rr_unpack_from_tempfile : rr_from_tempfile; + addon_field ? rr_unpack_from_tempfile : + has_fs_tmp_table ? rr_from_tempfile_and_copy : + rr_from_tempfile; info->io_cache= tempfile; reinit_io_cache(info->io_cache,READ_CACHE,0L,0,0); - info->ref_pos=table->file->ref; + info->ref_pos= table->file->ref; if (!table->file->inited) if (table->file->ha_rnd_init_with_error(0)) DBUG_RETURN(1); + if (has_fs_tmp_table) + if (init_copy(info)) + DBUG_RETURN(1); /* addon_field is checked because if we use addon fields, @@ -245,22 +286,26 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table, */ if (!disable_rr_cache && !addon_field && - thd->variables.read_rnd_buff_size && - !(table->file->ha_table_flags() & HA_FAST_KEY_READ) && - (table->db_stat & HA_READ_ONLY || - table->reginfo.lock_type <= TL_READ_NO_INSERT) && - (ulonglong) table->s->reclength* (table->file->stats.records+ - table->file->stats.deleted) > - (ulonglong) MIN_FILE_LENGTH_TO_USE_ROW_CACHE && - info->io_cache->end_of_file/info->ref_length * table->s->reclength > - (my_off_t) MIN_ROWS_TO_USE_TABLE_CACHE && - !table->s->blob_fields && + thd->variables.read_rnd_buff_size && + !(table->file->ha_table_flags() & HA_FAST_KEY_READ) && + (table->db_stat & HA_READ_ONLY || + table->reginfo.lock_type <= TL_READ_NO_INSERT) && + (ulonglong) table->s->reclength* + (table->file->stats.records+ + table->file->stats.deleted) > + (ulonglong) MIN_FILE_LENGTH_TO_USE_ROW_CACHE && + info->io_cache->end_of_file/info->ref_length * + table->s->reclength > + (my_off_t) MIN_ROWS_TO_USE_TABLE_CACHE && + !table->s->blob_fields && info->ref_length <= MAX_REFLENGTH) { if (! init_rr_cache(thd, info)) { - DBUG_PRINT("info",("using rr_from_cache")); - info->read_record_func= rr_from_cache; + info->read_record_func= + has_fs_tmp_table ? rr_from_cache_and_copy : + rr_from_cache; + DBUG_PRINT("info",("using rr_from_cache")); } } } @@ -272,13 +317,19 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table, else if (filesort && filesort->record_pointers) { DBUG_PRINT("info",("using record_pointers")); - if (table->file->ha_rnd_init_with_error(0)) - DBUG_RETURN(1); + if (!table->file->inited) + if (table->file->ha_rnd_init_with_error(0)) + DBUG_RETURN(1); info->cache_pos= filesort->record_pointers; info->cache_end= (info->cache_pos+ filesort->return_rows * info->ref_length); info->read_record_func= - addon_field ? rr_unpack_from_buffer : rr_from_pointers; + addon_field ? rr_unpack_from_buffer : + has_fs_tmp_table ? rr_from_pointers_and_copy : + rr_from_pointers; + if (has_fs_tmp_table) + if (init_copy(info)) + DBUG_RETURN(1); } else if (table->file->keyread_enabled()) { @@ -300,11 +351,11 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table, DBUG_RETURN(1); /* We can use record cache if we don't update dynamic length tables */ if (!table->no_cache && - (use_record_cache > 0 || - (int) table->reginfo.lock_type <= (int) TL_READ_HIGH_PRIORITY || - !(table->s->db_options_in_use & HA_OPTION_PACK_RECORD) || - (use_record_cache < 0 && - !(table->file->ha_table_flags() & HA_NOT_DELETE_WITH_CACHE)))) + (use_record_cache > 0 || + (int) table->reginfo.lock_type <= (int) TL_READ_HIGH_PRIORITY || + !(table->s->db_options_in_use & HA_OPTION_PACK_RECORD) || + (use_record_cache < 0 && + !(table->file->ha_table_flags() & HA_NOT_DELETE_WITH_CACHE)))) (void) table->file->extra_opt(HA_EXTRA_CACHE, thd->variables.read_buff_size); } @@ -333,6 +384,15 @@ void end_read_record(READ_RECORD *info) (void) info->table->file->extra(HA_EXTRA_NO_CACHE); if (info->read_record_func != rr_quick) // otherwise quick_range does it (void) info->table->file->ha_index_or_rnd_end(); + if (info->free_tmp_table) + { + free_tmp_table(info->thd, info->table); + end_copy(info); + my_free(info->tmp_field); + info->tmp_field= NULL; + info->tmp_fields= 0; + info->free_tmp_table= FALSE; + } info->table=0; } } @@ -521,7 +581,35 @@ static int rr_from_tempfile(READ_RECORD *info) /** Read a result set record from a temporary file after sorting. - The function first reads the next sorted record from the temporary file. + The function first reads the next sorted record from the temporary file + into a buffer. If successful, it copies the fields to the + table being sorted. + + @param info Reference to the context including record descriptors + + @retval + 0 Record successfully read. + @retval + -1 No more records to read or record read failed. +*/ + +int rr_from_tempfile_and_copy(READ_RECORD *info) +{ + int error; + if ((error= rr_from_tempfile(info))) + return error; + + for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++) + (*cp->do_copy)(cp); + + return error; +} + + +/** + Read a result set record from a temporary file after sorting. + + The function first reads the next sorted record from the temporary file into a buffer. If a success it calls a callback function that unpacks the fields values use in the result set from this buffer into their positions in the regular record buffer. @@ -569,6 +657,35 @@ int rr_from_pointers(READ_RECORD *info) return tmp; } + +/** + Read a result set record from a temporary file after sorting. + + The function first reads the next sorted record from the temporary file + into a buffer. If successful, it copies the fields to the + table being sorted. + + @param info Reference to the context including record descriptors + + @retval + 0 Record successfully read. + @retval + -1 No more records to read or record read failed. +*/ + +int rr_from_pointers_and_copy(READ_RECORD *info) +{ + int error; + if ((error= rr_from_pointers(info))) + return error; + + for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++) + (*cp->do_copy)(cp); + + return error; +} + + /** Read a result set record from a buffer after sorting. @@ -703,6 +820,114 @@ static int rr_from_cache(READ_RECORD *info) } /* rr_from_cache */ +/** + Read a result set record from cache after sorting. + + The function first reads the next sorted record from cache. + If successful, it copies the fields to the table being sorted. + + @param info Reference to the context including record descriptors + + @retval + 0 Record successfully read. + @retval + -1 No more records to read or record read failed. +*/ + +int rr_from_cache_and_copy(READ_RECORD *info) +{ + int error; + if ((error= rr_from_cache(info))) + return error; + + for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++) + (*cp->do_copy)(cp); + + return error; +} + + +/** + Set up for copying the fields of the current row + from the filesort temp table to the table being sorted. + + @param info Reference to the context including record descriptors + + @retval + 0 Success. + @retval + 1 Memory allocation failure. +*/ + +static int init_copy(READ_RECORD *info) +{ + TABLE *table= info->copy_table; + Copy_field *tmp_field; + Copy_field *copy_field; + MY_BITMAP *write_set; + my_bitmap_map *column_bitmap= NULL; + + /* Allocate the memory for the copy_field descriptors */ + copy_field= (Copy_field *) my_malloc(sizeof(Copy_field) * info->tmp_fields, + MYF(MY_WME | MY_THREAD_SPECIFIC)); + if (!copy_field) + return 1; + info->copy_field= copy_field; + + /* Allocate the memory for the updated table write set */ + if (!(write_set= (MY_BITMAP *) + my_malloc(sizeof(MY_BITMAP), + MYF(MY_WME | MY_THREAD_SPECIFIC)))) + { + my_free(copy_field); + return 1; + } + /* Initialize the column bitmap for the updated table write set */ + my_bitmap_init(write_set, column_bitmap, table->s->fields, FALSE); + info->save_write_set= table->write_set; + table->column_bitmaps_set_no_signal(table->read_set, write_set); + + /* + Each column value present in the temp table needs to be copied + to the table being sorted + */ + for (tmp_field= info->tmp_field; tmp_field->from_field; tmp_field++) + { + bitmap_fast_test_and_set(table->write_set, + tmp_field->from_field->field_index); + copy_field->set(tmp_field->from_field, tmp_field->to_field, FALSE); + copy_field++; + } + table->file->column_bitmaps_signal(); + info->copy_field_end= copy_field; + + return 0; +} + + +/** + Do cleanup at the completion of copying field values from the + filesort temp table to the table being sorted. + + @param info Reference to the context including record descriptors +*/ + +static void end_copy(READ_RECORD *info) +{ + TABLE *table= info->copy_table; + MY_BITMAP *write_set= table->write_set; + + table->column_bitmaps_set(table->read_set, info->save_write_set); + + my_bitmap_free(write_set); + my_free(write_set); + my_free(info->copy_field); + info->copy_table= NULL; + info->save_write_set= NULL; + info->copy_field= info->copy_field_end= NULL; +} + + static int rr_cmp(uchar *a,uchar *b) { if (a[0] != b[0]) diff --git a/sql/records.h b/sql/records.h index 940c88ca0c7..0ffd08e320c 100644 --- a/sql/records.h +++ b/sql/records.h @@ -27,6 +27,8 @@ class SQL_SELECT; class Copy_field; class SORT_INFO; +#include "my_bitmap.h" + struct READ_RECORD; void end_read_record(READ_RECORD *info); @@ -53,7 +55,9 @@ struct READ_RECORD TABLE *table; /* Head-form */ //handler *file; - TABLE **forms; /* head and ref forms */ + TABLE *copy_table; /* Original table that a */ + /* filesort temp table */ + /* row is copied to */ Unlock_row_func unlock_row; Read_func read_record_func; THD *thd; @@ -61,25 +65,38 @@ struct READ_RECORD uint cache_records; uint ref_length,struct_length,reclength,rec_cache_size,error_offset; uint index; - uchar *ref_pos; /* pointer to form->refpos */ + uint tmp_fields; /* Number of filesort temp + table fields */ + MY_BITMAP *save_write_set; /* Original write set when */ + /* records are read from /* + /* the filesort temp table */ + /* and copied to the */ + /* original table */ + uchar *ref_pos; /* Pointer to form->refpos */ uchar *record; uchar *rec_buf; /* to read field values after filesort */ uchar *cache,*cache_pos,*cache_end,*read_positions; - struct st_sort_addon_field *addon_field; /* Pointer to the fields info */ + struct st_sort_addon_field *addon_field; /* Pointer to the fields info */ + Copy_field *tmp_field; /* Filesort temp table */ + /* field array */ struct st_io_cache *io_cache; - bool print_error, ignore_not_found_rows; + bool print_error, ignore_not_found_rows, free_tmp_table; void (*unpack)(struct st_sort_addon_field *, uchar *, uchar *); int read_record() { return read_record_func(this); } - /* + /* SJ-Materialization runtime may need to read fields from the materialized - table and unpack them into original table fields: + table and unpack them into original table fields. + Read following a filesort may need to read fields from its temp table + and unpack them into the corresponding original table fields. */ Copy_field *copy_field; Copy_field *copy_field_end; + public: - READ_RECORD() : table(NULL), cache(NULL) {} + READ_RECORD() + : table(NULL), cache(NULL), copy_field(NULL), copy_field_end(NULL) {} ~READ_RECORD() { end_read_record(this); } }; diff --git a/sql/sql_class.h b/sql/sql_class.h index 4722f3f5989..df333ae87cb 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -5767,7 +5767,6 @@ class user_var_entry user_var_entry *get_variable(HASH *hash, LEX_CSTRING *name, bool create_if_not_exists); -class SORT_INFO; class multi_delete :public select_result_interceptor { TABLE_LIST *delete_tables, *table_being_deleted; diff --git a/sql/sql_sort.h b/sql/sql_sort.h index d57239671a8..2ca9c383035 100644 --- a/sql/sql_sort.h +++ b/sql/sql_sort.h @@ -93,7 +93,24 @@ public: } void init_for_filesort(uint sortlen, TABLE *table, ulong max_length_for_sort_data, - ha_rows maxrows, bool sort_positions); + ha_rows maxrows, bool sort_positions, + uint *tmp_fields); + void update_ref_length(uint new_ref_length) + { + if (!addon_field) + { + if (ref_length) + { + res_length-= ref_length; + sort_length-= ref_length; + rec_length-= ref_length; + } + res_length+= new_ref_length; + sort_length+= new_ref_length; + rec_length+= new_ref_length; + ref_length= new_ref_length; + } + } }; |