diff options
author | Mattias Jonsson <mattias.jonsson@sun.com> | 2008-10-01 12:14:55 +0200 |
---|---|---|
committer | Mattias Jonsson <mattias.jonsson@sun.com> | 2008-10-01 12:14:55 +0200 |
commit | 22e0319620e3c9d2d2a552a8bc68e8233f06e544 (patch) | |
tree | 9758fa11760f3312e5b60eba313d85008667d54d /sql/ha_partition.cc | |
parent | 6816cf6a6518bb5108fe7907f71e58be994db6e5 (diff) | |
parent | be63f0af4f46048eda18bd66300949a810f0fd1e (diff) | |
download | mariadb-git-22e0319620e3c9d2d2a552a8bc68e8233f06e544.tar.gz |
merge
Diffstat (limited to 'sql/ha_partition.cc')
-rw-r--r-- | sql/ha_partition.cc | 349 |
1 files changed, 240 insertions, 109 deletions
diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc index 4d521e9d3a0..8f96ca08085 100644 --- a/sql/ha_partition.cc +++ b/sql/ha_partition.cc @@ -160,7 +160,8 @@ const uint ha_partition::NO_CURRENT_PART_ID= 0xFFFFFFFF; ha_partition::ha_partition(handlerton *hton, TABLE_SHARE *share) :handler(hton, share), m_part_info(NULL), m_create_handler(FALSE), - m_is_sub_partitioned(0), is_clone(FALSE) + m_is_sub_partitioned(0), is_clone(FALSE), auto_increment_lock(FALSE), + auto_increment_safe_stmt_log_lock(FALSE) { DBUG_ENTER("ha_partition::ha_partition(table)"); init_handler_variables(); @@ -182,7 +183,8 @@ ha_partition::ha_partition(handlerton *hton, TABLE_SHARE *share) ha_partition::ha_partition(handlerton *hton, partition_info *part_info) :handler(hton, NULL), m_part_info(part_info), m_create_handler(TRUE), - m_is_sub_partitioned(m_part_info->is_sub_partitioned()), is_clone(FALSE) + m_is_sub_partitioned(m_part_info->is_sub_partitioned()), is_clone(FALSE), + auto_increment_lock(FALSE), auto_increment_safe_stmt_log_lock(FALSE) { DBUG_ENTER("ha_partition::ha_partition(part_info)"); init_handler_variables(); @@ -1248,7 +1250,7 @@ int ha_partition::prepare_new_partition(TABLE *tbl, assumes that external_lock() is last call that may fail here. Otherwise see description for cleanup_new_partition(). */ - if ((error= file->ha_external_lock(current_thd, m_lock_type))) + if ((error= file->ha_external_lock(ha_thd(), m_lock_type))) goto error; DBUG_RETURN(0); @@ -1336,8 +1338,8 @@ void ha_partition::cleanup_new_partition(uint part_count) int ha_partition::change_partitions(HA_CREATE_INFO *create_info, const char *path, - ulonglong *copied, - ulonglong *deleted, + ulonglong * const copied, + ulonglong * const deleted, const uchar *pack_frm_data __attribute__((unused)), size_t pack_frm_len @@ -1354,7 +1356,7 @@ int ha_partition::change_partitions(HA_CREATE_INFO *create_info, int error= 1; bool first; uint temp_partitions= m_part_info->temp_partitions.elements; - THD *thd= current_thd; + THD *thd= ha_thd(); DBUG_ENTER("ha_partition::change_partitions"); /* @@ -1628,7 +1630,8 @@ int ha_partition::change_partitions(HA_CREATE_INFO *create_info, partitions. */ -int ha_partition::copy_partitions(ulonglong *copied, ulonglong *deleted) +int ha_partition::copy_partitions(ulonglong * const copied, + ulonglong * const deleted) { uint reorg_part= 0; int result= 0; @@ -1674,13 +1677,13 @@ int ha_partition::copy_partitions(ulonglong *copied, ulonglong *deleted) table since it doesn't fit into any partition any longer due to changed partitioning ranges or list values. */ - deleted++; + (*deleted)++; } else { THD *thd= ha_thd(); /* Copy record to new handler */ - copied++; + (*copied)++; tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */ result= m_new_file[new_part]->ha_write_row(m_rec0); reenable_binlog(thd); @@ -1804,7 +1807,7 @@ uint ha_partition::del_ren_cre_table(const char *from, handler **file, **abort_file; DBUG_ENTER("del_ren_cre_table()"); - if (get_from_handler_file(from, current_thd->mem_root)) + if (get_from_handler_file(from, ha_thd()->mem_root)) DBUG_RETURN(TRUE); DBUG_ASSERT(m_file_buffer); DBUG_PRINT("enter", ("from: (%s) to: (%s)", from, to)); @@ -1931,7 +1934,7 @@ int ha_partition::set_up_table_before_create(TABLE *tbl, { int error= 0; const char *partition_name; - THD *thd= current_thd; + THD *thd= ha_thd(); DBUG_ENTER("set_up_table_before_create"); if (!part_elem) @@ -2327,7 +2330,7 @@ bool ha_partition::get_from_handler_file(const char *name, MEM_ROOT *mem_root) tot_partition_words= (m_tot_parts + 3) / 4; engine_array= (handlerton **) my_alloca(m_tot_parts * sizeof(handlerton*)); for (i= 0; i < m_tot_parts; i++) - engine_array[i]= ha_resolve_by_legacy_type(current_thd, + engine_array[i]= ha_resolve_by_legacy_type(ha_thd(), (enum legacy_db_type) *(uchar *) ((file_buffer) + 12 + i)); address_tot_name_len= file_buffer + 12 + 4 * tot_partition_words; @@ -2398,8 +2401,10 @@ int ha_partition::open(const char *name, int mode, uint test_if_locked) uint alloc_len; handler **file; char name_buff[FN_REFLEN]; + bool is_not_tmp_table= (table_share->tmp_table == NO_TMP_TABLE); DBUG_ENTER("ha_partition::open"); + DBUG_ASSERT(table->s == table_share); ref_length= 0; m_mode= mode; m_open_test_lock= test_if_locked; @@ -2408,9 +2413,9 @@ int ha_partition::open(const char *name, int mode, uint test_if_locked) DBUG_RETURN(1); m_start_key.length= 0; m_rec0= table->record[0]; - m_rec_length= table->s->reclength; + m_rec_length= table_share->reclength; alloc_len= m_tot_parts * (m_rec_length + PARTITION_BYTES_IN_POS); - alloc_len+= table->s->max_key_length; + alloc_len+= table_share->max_key_length; if (!m_ordered_rec_buffer) { if (!(m_ordered_rec_buffer= (uchar*)my_malloc(alloc_len, MYF(MY_WME)))) @@ -2483,6 +2488,29 @@ int ha_partition::open(const char *name, int mode, uint test_if_locked) goto err_handler; /* + Use table_share->ha_data to share auto_increment_value among all handlers + for the same table. + */ + if (is_not_tmp_table) + pthread_mutex_lock(&table_share->mutex); + if (!table_share->ha_data) + { + HA_DATA_PARTITION *ha_data; + /* currently only needed for auto_increment */ + table_share->ha_data= ha_data= (HA_DATA_PARTITION*) + alloc_root(&table_share->mem_root, + sizeof(HA_DATA_PARTITION)); + if (!ha_data) + { + pthread_mutex_unlock(&table_share->mutex); + goto err_handler; + } + DBUG_PRINT("info", ("table_share->ha_data 0x%p", ha_data)); + bzero(ha_data, sizeof(HA_DATA_PARTITION)); + } + if (is_not_tmp_table) + pthread_mutex_unlock(&table_share->mutex); + /* Some handlers update statistics as part of the open call. This will in some cases corrupt the statistics of the partition handler and thus to ensure we have correct statistics we call info from open after @@ -2539,6 +2567,7 @@ int ha_partition::close(void) handler **file; DBUG_ENTER("ha_partition::close"); + DBUG_ASSERT(table->s == table_share); delete_queue(&m_queue); if (!is_clone) bitmap_free(&(m_part_info->used_partitions)); @@ -2607,6 +2636,7 @@ int ha_partition::external_lock(THD *thd, int lock_type) handler **file; DBUG_ENTER("ha_partition::external_lock"); + DBUG_ASSERT(!auto_increment_lock && !auto_increment_safe_stmt_log_lock); file= m_file; m_lock_type= lock_type; @@ -2825,8 +2855,9 @@ int ha_partition::write_row(uchar * buf) uint32 part_id; int error; longlong func_value; - bool autoincrement_lock= FALSE; + bool have_auto_increment= table->next_number_field && buf == table->record[0]; my_bitmap_map *old_map; + HA_DATA_PARTITION *ha_data= (HA_DATA_PARTITION*) table_share->ha_data; THD *thd= ha_thd(); timestamp_auto_set_type orig_timestamp_type= table->timestamp_field_type; #ifdef NOT_NEEDED @@ -2844,28 +2875,16 @@ int ha_partition::write_row(uchar * buf) If we have an auto_increment column and we are writing a changed row or a new row, then update the auto_increment value in the record. */ - if (table->next_number_field && buf == table->record[0]) + if (have_auto_increment) { - /* - Some engines (InnoDB for example) can change autoincrement - counter only after 'table->write_row' operation. - So if another thread gets inside the ha_partition::write_row - before it is complete, it gets same auto_increment value, - which means DUP_KEY error (bug #27405) - Here we separate the access using table_share->mutex, and - use autoincrement_lock variable to avoid unnecessary locks. - Probably not an ideal solution. - */ - if (table_share->tmp_table == NO_TMP_TABLE) + if (!ha_data->auto_inc_initialized && + !table->s->next_number_keypart) { /* - Bug#30878 crash when alter table from non partitioned table - to partitioned. - Checking if tmp table then there is no need to lock, - and the table_share->mutex may not be initialised. + If auto_increment in table_share is not initialized, start by + initializing it. */ - autoincrement_lock= TRUE; - pthread_mutex_lock(&table_share->mutex); + info(HA_STATUS_AUTO); } error= update_auto_increment(); @@ -2903,11 +2922,11 @@ int ha_partition::write_row(uchar * buf) DBUG_PRINT("info", ("Insert in partition %d", part_id)); tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */ error= m_file[part_id]->ha_write_row(buf); + if (have_auto_increment && !table->s->next_number_keypart) + set_auto_increment_if_higher(table->next_number_field->val_int()); reenable_binlog(thd); exit: table->timestamp_field_type= orig_timestamp_type; - if (autoincrement_lock) - pthread_mutex_unlock(&table_share->mutex); DBUG_RETURN(error); } @@ -2931,13 +2950,6 @@ exit: Keep in mind that the server can do updates based on ordering if an ORDER BY clause was used. Consecutive ordering is not guarenteed. - Currently new_data will not have an updated auto_increament record, or - and updated timestamp field. You can do these for partition by doing these: - if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) - table->timestamp_field->set_time(); - if (table->next_number_field && record == table->record[0]) - update_auto_increment(); - Called from sql_select.cc, sql_acl.cc, sql_update.cc, and sql_insert.cc. new_data is always record[0] old_data is normally record[1] but may be anything @@ -2969,17 +2981,23 @@ int ha_partition::update_row(const uchar *old_data, uchar *new_data) goto exit; } - /* - TODO: - set_internal_auto_increment= - max(set_internal_auto_increment, new_data->auto_increment) - */ m_last_part= new_part_id; if (new_part_id == old_part_id) { DBUG_PRINT("info", ("Update in partition %d", new_part_id)); tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */ error= m_file[new_part_id]->ha_update_row(old_data, new_data); + /* + if updating an auto_increment column, update + table_share->ha_data->next_auto_inc_val if needed. + (not to be used if auto_increment on secondary field in a multi- + column index) + mysql_update does not set table->next_number_field, so we use + table->found_next_number_field instead. + */ + if (table->found_next_number_field && new_data == table->record[0] && + !table->s->next_number_keypart) + set_auto_increment_if_higher(table->found_next_number_field->val_int()); reenable_binlog(thd); goto exit; } @@ -2989,6 +3007,9 @@ int ha_partition::update_row(const uchar *old_data, uchar *new_data) old_part_id, new_part_id)); tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */ error= m_file[new_part_id]->ha_write_row(new_data); + if (table->found_next_number_field && new_data == table->record[0] && + !table->s->next_number_keypart) + set_auto_increment_if_higher(table->found_next_number_field->val_int()); reenable_binlog(thd); if (error) goto exit; @@ -3084,8 +3105,17 @@ int ha_partition::delete_all_rows() { int error; handler **file; + THD *thd= ha_thd(); DBUG_ENTER("ha_partition::delete_all_rows"); + if (thd->lex->sql_command == SQLCOM_TRUNCATE) + { + HA_DATA_PARTITION *ha_data= (HA_DATA_PARTITION*) table_share->ha_data; + lock_auto_increment(); + ha_data->next_auto_inc_val= 0; + ha_data->auto_inc_initialized= FALSE; + unlock_auto_increment(); + } file= m_file; do { @@ -4596,21 +4626,54 @@ int ha_partition::handle_ordered_prev(uchar *buf) int ha_partition::info(uint flag) { - handler *file, **file_array; - DBUG_ENTER("ha_partition:info"); + DBUG_ENTER("ha_partition::info"); if (flag & HA_STATUS_AUTO) { - ulonglong auto_increment_value= 0; + bool auto_inc_is_first_in_idx= (table_share->next_number_keypart == 0); + HA_DATA_PARTITION *ha_data= (HA_DATA_PARTITION*) table_share->ha_data; DBUG_PRINT("info", ("HA_STATUS_AUTO")); - file_array= m_file; - do + if (!table->found_next_number_field) + stats.auto_increment_value= 0; + else if (ha_data->auto_inc_initialized) { - file= *file_array; - file->info(HA_STATUS_AUTO); - set_if_bigger(auto_increment_value, file->stats.auto_increment_value); - } while (*(++file_array)); - stats.auto_increment_value= auto_increment_value; + lock_auto_increment(); + stats.auto_increment_value= ha_data->next_auto_inc_val; + unlock_auto_increment(); + } + else + { + lock_auto_increment(); + /* to avoid two concurrent initializations, check again when locked */ + if (ha_data->auto_inc_initialized) + stats.auto_increment_value= ha_data->next_auto_inc_val; + else + { + handler *file, **file_array; + ulonglong auto_increment_value= 0; + file_array= m_file; + DBUG_PRINT("info", + ("checking all partitions for auto_increment_value")); + do + { + file= *file_array; + file->info(HA_STATUS_AUTO); + set_if_bigger(auto_increment_value, + file->stats.auto_increment_value); + } while (*(++file_array)); + + DBUG_ASSERT(auto_increment_value); + stats.auto_increment_value= auto_increment_value; + if (auto_inc_is_first_in_idx) + { + set_if_bigger(ha_data->next_auto_inc_val, auto_increment_value); + ha_data->auto_inc_initialized= TRUE; + DBUG_PRINT("info", ("initializing next_auto_inc_val to %lu", + (ulong) ha_data->next_auto_inc_val)); + } + } + unlock_auto_increment(); + } } if (flag & HA_STATUS_VARIABLE) { @@ -4634,6 +4697,7 @@ int ha_partition::info(uint flag) check_time: Time of last check (only applicable to MyISAM) We report last time of all underlying handlers */ + handler *file, **file_array; stats.records= 0; stats.deleted= 0; stats.data_file_length= 0; @@ -4715,6 +4779,7 @@ int ha_partition::info(uint flag) So we calculate these constants by using the variables on the first handler. */ + handler *file; file= m_file[0]; file->info(HA_STATUS_CONST); @@ -4736,6 +4801,7 @@ int ha_partition::info(uint flag) } if (flag & HA_STATUS_TIME) { + handler *file, **file_array; DBUG_PRINT("info", ("info: HA_STATUS_TIME")); /* This flag is used to set the latest update time of the table. @@ -5796,19 +5862,33 @@ int ha_partition::cmp_ref(const uchar *ref1, const uchar *ref2) MODULE auto increment ****************************************************************************/ -void ha_partition::restore_auto_increment(ulonglong) -{ - DBUG_ENTER("ha_partition::restore_auto_increment"); - DBUG_VOID_RETURN; +int ha_partition::reset_auto_increment(ulonglong value) +{ + handler **file= m_file; + int res; + HA_DATA_PARTITION *ha_data= (HA_DATA_PARTITION*) table_share->ha_data; + DBUG_ENTER("ha_partition::reset_auto_increment"); + lock_auto_increment(); + ha_data->auto_inc_initialized= FALSE; + ha_data->next_auto_inc_val= 0; + do + { + if ((res= (*file)->ha_reset_auto_increment(value)) != 0) + break; + } while (*(++file)); + unlock_auto_increment(); + DBUG_RETURN(res); } -/* +/** This method is called by update_auto_increment which in turn is called - by the individual handlers as part of write_row. We will always let - the first handler keep track of the auto increment value for all - partitions. + by the individual handlers as part of write_row. We use the + table_share->ha_data->next_auto_inc_val, or search all + partitions for the highest auto_increment_value if not initialized or + if auto_increment field is a secondary part of a key, we must search + every partition when holding a mutex to be sure of correctness. */ void ha_partition::get_auto_increment(ulonglong offset, ulonglong increment, @@ -5816,59 +5896,88 @@ void ha_partition::get_auto_increment(ulonglong offset, ulonglong increment, ulonglong *first_value, ulonglong *nb_reserved_values) { - ulonglong first_value_part, last_value_part, nb_reserved_values_part, - last_value= ~ (ulonglong) 0; - handler **pos, **end; - bool retry= TRUE; DBUG_ENTER("ha_partition::get_auto_increment"); - -again: - for (pos=m_file, end= m_file+ m_tot_parts; pos != end ; pos++) + DBUG_PRINT("info", ("offset: %lu inc: %lu desired_values: %lu " + "first_value: %lu", (ulong) offset, (ulong) increment, + (ulong) nb_desired_values, (ulong) *first_value)); + DBUG_ASSERT(increment && nb_desired_values); + *first_value= 0; + if (table->s->next_number_keypart) { - first_value_part= *first_value; - (*pos)->get_auto_increment(offset, increment, nb_desired_values, - &first_value_part, &nb_reserved_values_part); - if (first_value_part == ~(ulonglong)(0)) // error in one partition - { - *first_value= first_value_part; - sql_print_error("Partition failed to reserve auto_increment value"); - DBUG_VOID_RETURN; - } /* - Partition has reserved an interval. Intersect it with the intervals - already reserved for the previous partitions. + next_number_keypart is != 0 if the auto_increment column is a secondary + column in the index (it is allowed in MyISAM) */ - last_value_part= (nb_reserved_values_part == ULONGLONG_MAX) ? - ULONGLONG_MAX : (first_value_part + nb_reserved_values_part * increment); - set_if_bigger(*first_value, first_value_part); - set_if_smaller(last_value, last_value_part); + DBUG_PRINT("info", ("next_number_keypart != 0")); + ulonglong nb_reserved_values_part; + ulonglong first_value_part, max_first_value; + handler **file= m_file; + first_value_part= max_first_value= *first_value; + /* Must lock and find highest value among all partitions. */ + lock_auto_increment(); + do + { + /* Only nb_desired_values = 1 makes sense */ + (*file)->get_auto_increment(offset, increment, 1, + &first_value_part, &nb_reserved_values_part); + if (first_value_part == ~(ulonglong)(0)) // error in one partition + { + *first_value= first_value_part; + /* log that the error was between table/partition handler */ + sql_print_error("Partition failed to reserve auto_increment value"); + unlock_auto_increment(); + DBUG_VOID_RETURN; + } + DBUG_PRINT("info", ("first_value_part: %lu", (ulong) first_value_part)); + set_if_bigger(max_first_value, first_value_part); + } while (*(++file)); + *first_value= max_first_value; + *nb_reserved_values= 1; + unlock_auto_increment(); } - if (last_value < *first_value) /* empty intersection, error */ + else { + THD *thd= ha_thd(); + HA_DATA_PARTITION *ha_data= (HA_DATA_PARTITION*) table_share->ha_data; /* - When we have an empty intersection, it means that one or more - partitions may have a significantly different autoinc next value. - We should not fail here - it just means that we should try to - find a new reservation making use of the current *first_value - wbich should now be compatible with all partitions. + This is initialized in the beginning of the first write_row call. */ - if (retry) - { - retry= FALSE; - last_value= ~ (ulonglong) 0; - release_auto_increment(); - goto again; - } + DBUG_ASSERT(ha_data->auto_inc_initialized); /* - We should not get here. + Get a lock for handling the auto_increment in table_share->ha_data + for avoiding two concurrent statements getting the same number. + */ + + lock_auto_increment(); + + /* + In a multi-row insert statement like INSERT SELECT and LOAD DATA + where the number of candidate rows to insert is not known in advance + we must hold a lock/mutex for the whole statement if we have statement + based replication. Because the statement-based binary log contains + only the first generated value used by the statement, and slaves assumes + all other generated values used by this statement were consecutive to + this first one, we must exclusively lock the generator until the statement + is done. */ - sql_print_error("Failed to calculate auto_increment value for partition"); - - *first_value= ~(ulonglong)(0); + if (!auto_increment_safe_stmt_log_lock && + thd->lex->sql_command != SQLCOM_INSERT && + mysql_bin_log.is_open() && + !thd->current_stmt_binlog_row_based && + (thd->options & OPTION_BIN_LOG)) + { + DBUG_PRINT("info", ("locking auto_increment_safe_stmt_log_lock")); + auto_increment_safe_stmt_log_lock= TRUE; + } + + /* this gets corrected (for offset/increment) in update_auto_increment */ + *first_value= ha_data->next_auto_inc_val; + ha_data->next_auto_inc_val+= nb_desired_values * increment; + + unlock_auto_increment(); + DBUG_PRINT("info", ("*first_value: %lu", (ulong) *first_value)); + *nb_reserved_values= nb_desired_values; } - if (increment) // If not check for values - *nb_reserved_values= (last_value == ULONGLONG_MAX) ? - ULONGLONG_MAX : ((last_value - *first_value) / increment); DBUG_VOID_RETURN; } @@ -5876,9 +5985,31 @@ void ha_partition::release_auto_increment() { DBUG_ENTER("ha_partition::release_auto_increment"); - for (uint i= 0; i < m_tot_parts; i++) + if (table->s->next_number_keypart) { - m_file[i]->ha_release_auto_increment(); + for (uint i= 0; i < m_tot_parts; i++) + m_file[i]->ha_release_auto_increment(); + } + else if (next_insert_id) + { + HA_DATA_PARTITION *ha_data= (HA_DATA_PARTITION*) table_share->ha_data; + ulonglong next_auto_inc_val; + lock_auto_increment(); + next_auto_inc_val= ha_data->next_auto_inc_val; + if (next_insert_id < next_auto_inc_val && + auto_inc_interval_for_cur_row.maximum() >= next_auto_inc_val) + ha_data->next_auto_inc_val= next_insert_id; + DBUG_PRINT("info", ("ha_data->next_auto_inc_val: %lu", + (ulong) ha_data->next_auto_inc_val)); + + /* Unlock the multi row statement lock taken in get_auto_increment */ + if (auto_increment_safe_stmt_log_lock) + { + auto_increment_safe_stmt_log_lock= FALSE; + DBUG_PRINT("info", ("unlocking auto_increment_safe_stmt_log_lock")); + } + + unlock_auto_increment(); } DBUG_VOID_RETURN; } |