/***************************************************************************** Copyright (c) 2014, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA *****************************************************************************/ /**************************************************//** @file ut/ut0stage.h Supplementary code to performance schema stage instrumentation. Created Nov 12, 2014 Vasil Dimov *******************************************************/ #ifndef ut0stage_h #define ut0stage_h #include #include #include "my_global.h" /* needed for headers from mysql/psi/ */ #include "mysql/psi/mysql_stage.h" /* mysql_stage_inc_work_completed */ #include "mysql/psi/psi.h" /* HAVE_PSI_STAGE_INTERFACE, PSI_stage_progress */ #include "dict0mem.h" /* dict_index_t */ #include "row0log.h" /* row_log_estimate_work() */ #include "srv0srv.h" /* ut_stage_alter_t */ #ifdef HAVE_PSI_STAGE_INTERFACE typedef void PSI_stage_progress; /** Class used to report ALTER TABLE progress via performance_schema. The only user of this class is the ALTER TABLE code and it calls the methods in the following order constructor begin_phase_read_pk() multiple times: n_pk_recs_inc() // once per record read inc() // once per page read end_phase_read_pk() if any new indexes are being added, for each one: begin_phase_sort() multiple times: inc() // once per record sorted begin_phase_insert() multiple times: inc() // once per record inserted being_phase_log_index() multiple times: inc() // once per log-block applied begin_phase_flush() multiple times: inc() // once per page flushed begin_phase_log_table() multiple times: inc() // once per log-block applied begin_phase_end() destructor This class knows the specifics of each phase and tries to increment the progress in an even manner across the entire ALTER TABLE lifetime. */ class ut_stage_alter_t { public: /** Constructor. @param[in] pk primary key of the old table */ explicit ut_stage_alter_t( const dict_index_t* pk) : m_progress(NULL), m_pk(pk), m_n_pk_recs(0), m_n_pk_pages(0), m_n_recs_processed(0), m_n_flush_pages(0), m_cur_phase(NOT_STARTED) { } /** Destructor. */ ~ut_stage_alter_t(); /** Flag an ALTER TABLE start (read primary key phase). @param[in] n_sort_indexes number of indexes that will be sorted during ALTER TABLE, used for estimating the total work to be done */ void begin_phase_read_pk( ulint n_sort_indexes); /** Increment the number of records in PK (table) with 1. This is used to get more accurate estimate about the number of records per page which is needed because some phases work on per-page basis while some work on per-record basis and we want to get the progress as even as possible. */ void n_pk_recs_inc(); /** Flag either one record or one page processed, depending on the current phase. @param[in] inc_val flag this many units processed at once */ void inc( ulint inc_val = 1); /** Flag the end of reading of the primary key. Here we know the exact number of pages and records and calculate the number of records per page and refresh the estimate. */ void end_phase_read_pk(); /** Flag the beginning of the sort phase. @param[in] sort_multi_factor since merge sort processes one page more than once we only update the estimate once per this many pages processed. */ void begin_phase_sort( double sort_multi_factor); /** Flag the beginning of the insert phase. */ void begin_phase_insert(); /** Flag the beginning of the flush phase. @param[in] n_flush_pages this many pages are going to be flushed */ void begin_phase_flush( ulint n_flush_pages); /** Flag the beginning of the log index phase. */ void begin_phase_log_index(); /** Flag the beginning of the log table phase. */ void begin_phase_log_table(); /** Flag the beginning of the end phase. */ void begin_phase_end(); private: /** Update the estimate of total work to be done. */ void reestimate(); /** Change the current phase. @param[in] new_stage pointer to the new stage to change to */ void change_phase( const PSI_stage_info* new_stage); /** Performance schema accounting object. */ /* TODO: MySQL 5.7 PSI */ PSI_stage_progress* m_progress; /** Old table PK. Used for calculating the estimate. */ const dict_index_t* m_pk; /** Number of records in the primary key (table), including delete marked records. */ ulint m_n_pk_recs; /** Number of leaf pages in the primary key. */ ulint m_n_pk_pages; /** Estimated number of records per page in the primary key. */ double m_n_recs_per_page; /** Number of indexes that are being added. */ ulint m_n_sort_indexes; /** During the sort phase, increment the counter once per this many pages processed. This is because sort processes one page more than once. */ ulint m_sort_multi_factor; /** Number of records processed during sort & insert phases. We need to increment the counter only once page, or once per recs-per-page records. */ ulint m_n_recs_processed; /** Number of pages to flush. */ ulint m_n_flush_pages; /** Current phase. */ enum { NOT_STARTED = 0, READ_PK = 1, SORT = 2, INSERT = 3, FLUSH = 4, /* JAN: TODO: MySQL 5.7 vrs. MariaDB sql/log.h LOG_INDEX = 5, LOG_TABLE = 6, */ LOG_INNODB_INDEX = 5, LOG_INNODB_TABLE = 6, END = 7, } m_cur_phase; }; /** Destructor. */ inline ut_stage_alter_t::~ut_stage_alter_t() { if (m_progress == NULL) { return; } /* TODO: MySQL 5.7 PSI: Set completed = estimated before we quit. mysql_stage_set_work_completed( m_progress, mysql_stage_get_work_estimated(m_progress)); mysql_end_stage(); */ } /** Flag an ALTER TABLE start (read primary key phase). @param[in] n_sort_indexes number of indexes that will be sorted during ALTER TABLE, used for estimating the total work to be done */ inline void ut_stage_alter_t::begin_phase_read_pk( ulint n_sort_indexes) { m_n_sort_indexes = n_sort_indexes; m_cur_phase = READ_PK; /* TODO: MySQL 5.7 PSI m_progress = mysql_set_stage( srv_stage_alter_table_read_pk_internal_sort.m_key); mysql_stage_set_work_completed(m_progress, 0); */ reestimate(); } /** Increment the number of records in PK (table) with 1. This is used to get more accurate estimate about the number of records per page which is needed because some phases work on per-page basis while some work on per-record basis and we want to get the progress as even as possible. */ inline void ut_stage_alter_t::n_pk_recs_inc() { m_n_pk_recs++; } /** Flag either one record or one page processed, depending on the current phase. */ inline void ut_stage_alter_t::inc(ulint) { if (m_progress == NULL) { return; } ulint multi_factor = 1; bool should_proceed = true; switch (m_cur_phase) { case NOT_STARTED: ut_error; case READ_PK: m_n_pk_pages++; #if 0 /* TODO: MySQL 5.7 PSI */ ut_ad(inc_val == 1); /* Overall the read pk phase will read all the pages from the PK and will do work, proportional to the number of added indexes, thus when this is called once per read page we increment with 1 + m_n_sort_indexes */ inc_val = 1 + m_n_sort_indexes; #endif break; case SORT: multi_factor = m_sort_multi_factor; /* fall through */ case INSERT: { /* Increment the progress every nth record. During sort and insert phases, this method is called once per record processed. We need fractional point numbers here because "records per page" is such a number naturally and to avoid rounding skew we want, for example: if there are (double) N records per page, then the work_completed should be incremented on the inc() calls round(k*N), for k=1,2,3... */ const double every_nth = m_n_recs_per_page * multi_factor; const ulint k = static_cast( round(m_n_recs_processed / every_nth)); const ulint nth = static_cast( round(k * every_nth)); should_proceed = m_n_recs_processed == nth; m_n_recs_processed++; break; } case FLUSH: break; /* JAN: TODO: MySQL 5.7 case LOG_INDEX: break; case LOG_TABLE: break; */ case LOG_INNODB_INDEX: case LOG_INNODB_TABLE: break; case END: break; } if (should_proceed) { /* TODO: MySQL 5.7 PSI mysql_stage_inc_work_completed(m_progress, inc_val); */ reestimate(); } } /** Flag the end of reading of the primary key. Here we know the exact number of pages and records and calculate the number of records per page and refresh the estimate. */ inline void ut_stage_alter_t::end_phase_read_pk() { reestimate(); if (m_n_pk_pages == 0) { /* The number of pages in the PK could be 0 if the tree is empty. In this case we set m_n_recs_per_page to 1 to avoid division by zero later. */ m_n_recs_per_page = 1.0; } else { m_n_recs_per_page = std::max( static_cast(m_n_pk_recs) / m_n_pk_pages, 1.0); } } /** Flag the beginning of the sort phase. @param[in] sort_multi_factor since merge sort processes one page more than once we only update the estimate once per this many pages processed. */ inline void ut_stage_alter_t::begin_phase_sort( double sort_multi_factor) { if (sort_multi_factor <= 1.0) { m_sort_multi_factor = 1; } else { m_sort_multi_factor = static_cast( round(sort_multi_factor)); } change_phase(&srv_stage_alter_table_merge_sort); } /** Flag the beginning of the insert phase. */ inline void ut_stage_alter_t::begin_phase_insert() { change_phase(&srv_stage_alter_table_insert); } /** Flag the beginning of the flush phase. @param[in] n_flush_pages this many pages are going to be flushed */ inline void ut_stage_alter_t::begin_phase_flush( ulint n_flush_pages) { m_n_flush_pages = n_flush_pages; reestimate(); change_phase(&srv_stage_alter_table_flush); } /** Flag the beginning of the log index phase. */ inline void ut_stage_alter_t::begin_phase_log_index() { change_phase(&srv_stage_alter_table_log_index); } /** Flag the beginning of the log table phase. */ inline void ut_stage_alter_t::begin_phase_log_table() { change_phase(&srv_stage_alter_table_log_table); } /** Flag the beginning of the end phase. */ inline void ut_stage_alter_t::begin_phase_end() { change_phase(&srv_stage_alter_table_end); } /** Update the estimate of total work to be done. */ inline void ut_stage_alter_t::reestimate() { if (m_progress == NULL) { return; } /* During the log table phase we calculate the estimate as work done so far + log size remaining. */ if (m_cur_phase == LOG_INNODB_TABLE) { /* TODO: MySQL 5.7 PSI mysql_stage_set_work_estimated( m_progress, mysql_stage_get_work_completed(m_progress) + row_log_estimate_work(m_pk)); */ return; } /* During the other phases we use a formula, regardless of how much work has been done so far. */ /* For number of pages in the PK - if the PK has not been read yet, use stat_n_leaf_pages (approximate), otherwise use the exact number we gathered. */ const ulint n_pk_pages = m_cur_phase != READ_PK ? m_n_pk_pages : m_pk->stat_n_leaf_pages; /* If flush phase has not started yet and we do not know how many pages are to be flushed, then use a wild guess - the number of pages in the PK / 2. */ if (m_n_flush_pages == 0) { m_n_flush_pages = n_pk_pages / 2; } ulonglong estimate __attribute__((unused)) = n_pk_pages * (1 /* read PK */ + m_n_sort_indexes /* row_merge_buf_sort() inside the read PK per created index */ + m_n_sort_indexes * 2 /* sort & insert per created index */) + m_n_flush_pages + row_log_estimate_work(m_pk); /* Prevent estimate < completed */ /* TODO: MySQL 5.7 PSI estimate = std::max(estimate, mysql_stage_get_work_completed(m_progress)); mysql_stage_set_work_estimated(m_progress, estimate); */ } /** Change the current phase. @param[in] new_stage pointer to the new stage to change to */ inline void ut_stage_alter_t::change_phase( const PSI_stage_info* new_stage) { if (m_progress == NULL) { return; } if (new_stage == &srv_stage_alter_table_read_pk_internal_sort) { m_cur_phase = READ_PK; } else if (new_stage == &srv_stage_alter_table_merge_sort) { m_cur_phase = SORT; } else if (new_stage == &srv_stage_alter_table_insert) { m_cur_phase = INSERT; } else if (new_stage == &srv_stage_alter_table_flush) { m_cur_phase = FLUSH; /* JAN: TODO: MySQL 5.7 used LOG_INDEX and LOG_TABLE */ } else if (new_stage == &srv_stage_alter_table_log_index) { m_cur_phase = LOG_INNODB_INDEX; } else if (new_stage == &srv_stage_alter_table_log_table) { m_cur_phase = LOG_INNODB_TABLE; } else if (new_stage == &srv_stage_alter_table_end) { m_cur_phase = END; } else { ut_error; } /* TODO: MySQL 5.7 PSI const ulonglong c = mysql_stage_get_work_completed(m_progress); const ulonglong e = mysql_stage_get_work_estimated(m_progress); m_progress = mysql_set_stage(new_stage->m_key); mysql_stage_set_work_completed(m_progress, c); mysql_stage_set_work_estimated(m_progress, e); */ } #else /* HAVE_PSI_STAGE_INTERFACE */ class ut_stage_alter_t { public: explicit ut_stage_alter_t(const dict_index_t*) {} void begin_phase_read_pk(ulint) {} void n_pk_recs_inc() {} void inc() {} void inc(ulint) {} void end_phase_read_pk() {} void begin_phase_sort(double) {} void begin_phase_insert() {} void begin_phase_flush(ulint) {} void begin_phase_log_index() {} void begin_phase_log_table() {} void begin_phase_end() {} }; #endif /* HAVE_PSI_STAGE_INTERFACE */ #endif /* ut0stage_h */