summaryrefslogtreecommitdiff
path: root/storage/innobase/include/ut0stage.h
blob: 1eb7810a1bbb97085f1329335b7806e3a711a5f7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
/*****************************************************************************

Copyright (c) 2014, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, MariaDB Corporation.

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA

*****************************************************************************/

/**************************************************//**
@file ut/ut0stage.h
Supplementary code to performance schema stage instrumentation.

Created Nov 12, 2014 Vasil Dimov
*******************************************************/

#ifndef ut0stage_h
#define ut0stage_h

#include <algorithm>
#include <math.h>

#include "my_global.h" /* needed for headers from mysql/psi/ */

#include "mysql/psi/mysql_stage.h" /* mysql_stage_inc_work_completed */
#include "mysql/psi/psi.h" /* HAVE_PSI_STAGE_INTERFACE, PSI_stage_progress */

#include "dict0mem.h" /* dict_index_t */
#include "row0log.h" /* row_log_estimate_work() */
#include "srv0srv.h" /* ut_stage_alter_t */

#ifdef HAVE_PSI_STAGE_INTERFACE

typedef void PSI_stage_progress;

/** Class used to report ALTER TABLE progress via performance_schema.
The only user of this class is the ALTER TABLE code and it calls the methods
in the following order
constructor
begin_phase_read_pk()
  multiple times:
    n_pk_recs_inc() // once per record read
    inc() // once per page read
end_phase_read_pk()
if any new indexes are being added, for each one:
  begin_phase_sort()
    multiple times:
      inc() // once per record sorted
  begin_phase_insert()
    multiple times:
      inc() // once per record inserted
  being_phase_log_index()
    multiple times:
      inc() // once per log-block applied
begin_phase_flush()
    multiple times:
      inc() // once per page flushed
begin_phase_log_table()
    multiple times:
      inc() // once per log-block applied
begin_phase_end()
destructor

This class knows the specifics of each phase and tries to increment the
progress in an even manner across the entire ALTER TABLE lifetime. */
class ut_stage_alter_t {
public:
	/** Constructor.
	@param[in]	pk	primary key of the old table */
	explicit
	ut_stage_alter_t(
		const dict_index_t*	pk)
		:
		m_progress(NULL),
		m_pk(pk),
		m_n_pk_recs(0),
		m_n_pk_pages(0),
		m_n_recs_processed(0),
		m_n_flush_pages(0),
		m_cur_phase(NOT_STARTED)
	{
	}

	/** Destructor. */
	~ut_stage_alter_t();

	/** Flag an ALTER TABLE start (read primary key phase).
	@param[in]	n_sort_indexes	number of indexes that will be sorted
	during ALTER TABLE, used for estimating the total work to be done */
	void
	begin_phase_read_pk(
		ulint	n_sort_indexes);

	/** Increment the number of records in PK (table) with 1.
	This is used to get more accurate estimate about the number of
	records per page which is needed because some phases work on
	per-page basis while some work on per-record basis and we want
	to get the progress as even as possible. */
	void
	n_pk_recs_inc();

	/** Flag either one record or one page processed, depending on the
	current phase.
	@param[in]	inc_val	flag this many units processed at once */
	void
	inc(
		ulint	inc_val = 1);

	/** Flag the end of reading of the primary key.
	Here we know the exact number of pages and records and calculate
	the number of records per page and refresh the estimate. */
	void
	end_phase_read_pk();

	/** Flag the beginning of the sort phase.
	@param[in]	sort_multi_factor	since merge sort processes
	one page more than once we only update the estimate once per this
	many pages processed. */
	void
	begin_phase_sort(
		double	sort_multi_factor);

	/** Flag the beginning of the insert phase. */
	void
	begin_phase_insert();

	/** Flag the beginning of the flush phase.
	@param[in]	n_flush_pages	this many pages are going to be
	flushed */
	void
	begin_phase_flush(
		ulint	n_flush_pages);

	/** Flag the beginning of the log index phase. */
	void
	begin_phase_log_index();

	/** Flag the beginning of the log table phase. */
	void
	begin_phase_log_table();

	/** Flag the beginning of the end phase. */
	void
	begin_phase_end();

private:

	/** Update the estimate of total work to be done. */
	void
	reestimate();

	/** Change the current phase.
	@param[in]	new_stage	pointer to the new stage to change to */
	void
	change_phase(
		const PSI_stage_info*	new_stage);

	/** Performance schema accounting object. */
	/* TODO: MySQL 5.7 PSI */
	PSI_stage_progress*	m_progress;

	/** Old table PK. Used for calculating the estimate. */
	const dict_index_t*	m_pk;

	/** Number of records in the primary key (table), including delete
	marked records. */
	ulint			m_n_pk_recs;

	/** Number of leaf pages in the primary key. */
	ulint			m_n_pk_pages;

	/** Estimated number of records per page in the primary key. */
	double			m_n_recs_per_page;

	/** Number of indexes that are being added. */
	ulint			m_n_sort_indexes;

	/** During the sort phase, increment the counter once per this
	many pages processed. This is because sort processes one page more
	than once. */
	ulint			m_sort_multi_factor;

	/** Number of records processed during sort & insert phases. We
	need to increment the counter only once page, or once per
	recs-per-page records. */
	ulint			m_n_recs_processed;

	/** Number of pages to flush. */
	ulint			m_n_flush_pages;

	/** Current phase. */
	enum {
		NOT_STARTED = 0,
		READ_PK = 1,
		SORT = 2,
		INSERT = 3,
		FLUSH = 4,
		/* JAN: TODO: MySQL 5.7 vrs. MariaDB sql/log.h
		LOG_INDEX = 5,
		LOG_TABLE = 6, */
		LOG_INNODB_INDEX = 5,
		LOG_INNODB_TABLE = 6,
		END = 7,
	}			m_cur_phase;
};

/** Destructor. */
inline
ut_stage_alter_t::~ut_stage_alter_t()
{
	if (m_progress == NULL) {
		return;
	}

	/* TODO: MySQL 5.7 PSI: Set completed = estimated before we quit.
	mysql_stage_set_work_completed(
		m_progress,
		mysql_stage_get_work_estimated(m_progress));

	mysql_end_stage();
	*/
}

/** Flag an ALTER TABLE start (read primary key phase).
@param[in]	n_sort_indexes	number of indexes that will be sorted
during ALTER TABLE, used for estimating the total work to be done */
inline
void
ut_stage_alter_t::begin_phase_read_pk(
	ulint	n_sort_indexes)
{
	m_n_sort_indexes = n_sort_indexes;

	m_cur_phase = READ_PK;

	/* TODO: MySQL 5.7 PSI
	m_progress = mysql_set_stage(
		srv_stage_alter_table_read_pk_internal_sort.m_key);

	mysql_stage_set_work_completed(m_progress, 0);
	*/
	reestimate();
}

/** Increment the number of records in PK (table) with 1.
This is used to get more accurate estimate about the number of
records per page which is needed because some phases work on
per-page basis while some work on per-record basis and we want
to get the progress as even as possible. */
inline
void
ut_stage_alter_t::n_pk_recs_inc()
{
	m_n_pk_recs++;
}

/** Flag either one record or one page processed, depending on the
current phase. */
inline
void
ut_stage_alter_t::inc(ulint)
{
	if (m_progress == NULL) {
		return;
	}

	ulint	multi_factor = 1;
	bool	should_proceed = true;

	switch (m_cur_phase) {
	case NOT_STARTED:
		ut_error;
	case READ_PK:
		m_n_pk_pages++;
#if 0 /* TODO: MySQL 5.7 PSI */
		ut_ad(inc_val == 1);
		/* Overall the read pk phase will read all the pages from the
		PK and will do work, proportional to the number of added
		indexes, thus when this is called once per read page we
		increment with 1 + m_n_sort_indexes */
		inc_val = 1 + m_n_sort_indexes;
#endif
		break;
	case SORT:
		multi_factor = m_sort_multi_factor;
		/* fall through */
	case INSERT: {
		/* Increment the progress every nth record. During
		sort and insert phases, this method is called once per
		record processed. We need fractional point numbers here
		because "records per page" is such a number naturally and
		to avoid rounding skew we want, for example: if there are
		(double) N records per page, then the work_completed
	        should be incremented on the inc() calls round(k*N),
		for k=1,2,3... */
		const double	every_nth = m_n_recs_per_page * multi_factor;

		const ulint	k = static_cast<ulint>(
			round(m_n_recs_processed / every_nth));

		const ulint	nth = static_cast<ulint>(
			round(k * every_nth));

		should_proceed = m_n_recs_processed == nth;

		m_n_recs_processed++;

		break;
	}
	case FLUSH:
		break;
	/* JAN: TODO: MySQL 5.7
	case LOG_INDEX:
		break;
	case LOG_TABLE:
	break; */
	case LOG_INNODB_INDEX:
	case LOG_INNODB_TABLE:
		break;
	case END:
		break;
	}

	if (should_proceed) {
		/* TODO: MySQL 5.7 PSI
		mysql_stage_inc_work_completed(m_progress, inc_val);
		*/
		reestimate();
	}
}

/** Flag the end of reading of the primary key.
Here we know the exact number of pages and records and calculate
the number of records per page and refresh the estimate. */
inline
void
ut_stage_alter_t::end_phase_read_pk()
{
	reestimate();

	if (m_n_pk_pages == 0) {
		/* The number of pages in the PK could be 0 if the tree is
		empty. In this case we set m_n_recs_per_page to 1 to avoid
		division by zero later. */
		m_n_recs_per_page = 1.0;
	} else {
		m_n_recs_per_page = std::max(
			static_cast<double>(m_n_pk_recs) / m_n_pk_pages,
			1.0);
	}
}

/** Flag the beginning of the sort phase.
@param[in]	sort_multi_factor	since merge sort processes
one page more than once we only update the estimate once per this
many pages processed. */
inline
void
ut_stage_alter_t::begin_phase_sort(
	double	sort_multi_factor)
{
	if (sort_multi_factor <= 1.0) {
		m_sort_multi_factor = 1;
	} else {
		m_sort_multi_factor = static_cast<ulint>(
			round(sort_multi_factor));
	}

	change_phase(&srv_stage_alter_table_merge_sort);
}

/** Flag the beginning of the insert phase. */
inline
void
ut_stage_alter_t::begin_phase_insert()
{
	change_phase(&srv_stage_alter_table_insert);
}

/** Flag the beginning of the flush phase.
@param[in]	n_flush_pages	this many pages are going to be
flushed */
inline
void
ut_stage_alter_t::begin_phase_flush(
	ulint	n_flush_pages)
{
	m_n_flush_pages = n_flush_pages;

	reestimate();

	change_phase(&srv_stage_alter_table_flush);
}

/** Flag the beginning of the log index phase. */
inline
void
ut_stage_alter_t::begin_phase_log_index()
{
	change_phase(&srv_stage_alter_table_log_index);
}

/** Flag the beginning of the log table phase. */
inline
void
ut_stage_alter_t::begin_phase_log_table()
{
	change_phase(&srv_stage_alter_table_log_table);
}

/** Flag the beginning of the end phase. */
inline
void
ut_stage_alter_t::begin_phase_end()
{
	change_phase(&srv_stage_alter_table_end);
}

/** Update the estimate of total work to be done. */
inline
void
ut_stage_alter_t::reestimate()
{
	if (m_progress == NULL) {
		return;
	}

	/* During the log table phase we calculate the estimate as
	work done so far + log size remaining. */
	if (m_cur_phase == LOG_INNODB_TABLE) {
		/* TODO: MySQL 5.7 PSI
		mysql_stage_set_work_estimated(
			m_progress,
			mysql_stage_get_work_completed(m_progress)
			+ row_log_estimate_work(m_pk));
		*/
		return;
	}

	/* During the other phases we use a formula, regardless of
	how much work has been done so far. */

	/* For number of pages in the PK - if the PK has not been
	read yet, use stat_n_leaf_pages (approximate), otherwise
	use the exact number we gathered. */
	const ulint	n_pk_pages
		= m_cur_phase != READ_PK
		? m_n_pk_pages
		: m_pk->stat_n_leaf_pages;

	/* If flush phase has not started yet and we do not know how
	many pages are to be flushed, then use a wild guess - the
	number of pages in the PK / 2. */
	if (m_n_flush_pages == 0) {
		m_n_flush_pages = n_pk_pages / 2;
	}

	ulonglong	estimate __attribute__((unused))
		= n_pk_pages
		* (1 /* read PK */
		   + m_n_sort_indexes /* row_merge_buf_sort() inside the
				      read PK per created index */
		   + m_n_sort_indexes * 2 /* sort & insert per created index */)
		+ m_n_flush_pages
		+ row_log_estimate_work(m_pk);

	/* Prevent estimate < completed */
	/* TODO: MySQL 5.7 PSI
	estimate = std::max(estimate,
			    mysql_stage_get_work_completed(m_progress));

	mysql_stage_set_work_estimated(m_progress, estimate);
	*/
}

/** Change the current phase.
@param[in]	new_stage	pointer to the new stage to change to */
inline
void
ut_stage_alter_t::change_phase(
	const PSI_stage_info*	new_stage)
{
	if (m_progress == NULL) {
		return;
	}

	if (new_stage == &srv_stage_alter_table_read_pk_internal_sort) {
		m_cur_phase = READ_PK;
	} else if (new_stage == &srv_stage_alter_table_merge_sort) {
		m_cur_phase = SORT;
	} else if (new_stage == &srv_stage_alter_table_insert) {
		m_cur_phase = INSERT;
	} else if (new_stage == &srv_stage_alter_table_flush) {
		m_cur_phase = FLUSH;
	/* JAN: TODO: MySQL 5.7 used LOG_INDEX and LOG_TABLE */
	} else if (new_stage == &srv_stage_alter_table_log_index) {
		m_cur_phase = LOG_INNODB_INDEX;
	} else if (new_stage == &srv_stage_alter_table_log_table) {
		m_cur_phase = LOG_INNODB_TABLE;
	} else if (new_stage == &srv_stage_alter_table_end) {
		m_cur_phase = END;
	} else {
		ut_error;
	}

	/* TODO: MySQL 5.7 PSI
	const ulonglong	c = mysql_stage_get_work_completed(m_progress);
	const ulonglong	e = mysql_stage_get_work_estimated(m_progress);

	m_progress = mysql_set_stage(new_stage->m_key);

	mysql_stage_set_work_completed(m_progress, c);
	mysql_stage_set_work_estimated(m_progress, e);
	*/
}
#else /* HAVE_PSI_STAGE_INTERFACE */

class ut_stage_alter_t {
public:
	explicit ut_stage_alter_t(const dict_index_t*) {}

	void begin_phase_read_pk(ulint)	{}

	void n_pk_recs_inc() {}

	void inc() {}
	void inc(ulint) {}

	void end_phase_read_pk() {}

	void begin_phase_sort(double) {}

	void begin_phase_insert() {}

	void begin_phase_flush(ulint) {}

	void begin_phase_log_index() {}

	void begin_phase_log_table() {}

	void begin_phase_end() {}
};

#endif /* HAVE_PSI_STAGE_INTERFACE */

#endif /* ut0stage_h */