From 7a9405e3dc8741d50658e976b7e16c1807c2b9a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= <marko.makela@mariadb.com>
Date: Wed, 25 Nov 2020 09:40:12 +0200
Subject: MDEV-24270 Misuse of io_getevents() causes wake-ups at least twice
 per second

In the asynchronous I/O interface, InnoDB is invoking io_getevents()
with a timeout value of half a second, and requesting exactly 1 event
at a time.

The reason to have such a short timeout is to facilitate shutdown.

We can do better: Use an infinite timeout, wait for a larger maximum
number of events. On shutdown, we will invoke io_destroy(), which
should lead to the io_getevents system call reporting EINVAL.

my_getevents(): Reimplement the libaio io_getevents() by only invoking
the system call. The library implementation would try to elide the
system call and return 0 immediately if aio_ring_is_empty() holds.
Here, we do want a blocking system call, not 100% CPU usage. Neither
do we want the aio_ring_is_empty() trigger SIGSEGV because it is
dereferencing some memory that was freed by io_destroy().
---
 tpool/aio_linux.cc | 161 ++++++++++++++++++++++++++---------------------------
 1 file changed, 79 insertions(+), 82 deletions(-)
diff --git a/tpool/aio_linux.cc b/tpool/aio_linux.cc
index 24bc04c75ba..51b656a604b 100644
--- a/tpool/aio_linux.cc
+++ b/tpool/aio_linux.cc
@@ -1,4 +1,4 @@
-/* Copyright(C) 2019 MariaDB Corporation.
+/* Copyright (C) 2019, 2020, MariaDB Corporation.
 
 This program is free software; you can redistribute itand /or modify
 it under the terms of the GNU General Public License as published by
@@ -14,133 +14,133 @@ along with this program; if not, write to the Free Software
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/
 
 #include "tpool_structs.h"
-
-#include <stdlib.h>
-#include <signal.h>
-#include <assert.h>
 #include "tpool.h"
-#include <thread>
+
 #ifdef LINUX_NATIVE_AIO
-#include <libaio.h>
+# include <thread>
+# include <atomic>
+# include <libaio.h>
+# include <sys/syscall.h>
+
+/** A simpler alternative to io_getevents(), without
+aio_ring_is_empty() that may trigger SIGSEGV */
+static int my_getevents(io_context_t ctx, long min_nr, long nr, io_event *ev)
+{
+  int saved_errno= errno;
+  int ret= syscall(__NR_io_getevents, reinterpret_cast<long>(ctx),
+                   min_nr, nr, ev, 0);
+  if (ret < 0)
+  {
+    ret= -errno;
+    errno= saved_errno;
+  }
+  return ret;
+}
 #endif
 /*
   Linux AIO implementation, based on native AIO.
   Needs libaio.h and -laio at the compile time.
 
-  submit_io() is used to submit async IO.
+  io_submit() is used to submit async IO.
 
-  There is a single thread, that collects the completion notification
-  with  io_getevent(), and forwards io completion callback
+  A single thread will collect the completion notification
+  with io_getevents() and forward io completion callback to
   the worker threadpool.
 */
 namespace tpool
 {
 #ifdef LINUX_NATIVE_AIO
 
-class aio_linux : public aio
+class aio_linux final : public aio
 {
-  thread_pool* m_pool;
+  thread_pool *m_pool;
   io_context_t m_io_ctx;
-  bool m_in_shutdown;
   std::thread m_getevent_thread;
+  static std::atomic<bool> shutdown_in_progress;
 
-  static void getevent_thread_routine(aio_linux* aio)
+  static void getevent_thread_routine(aio_linux *aio)
   {
+    io_event events[1];
     for (;;)
     {
-      io_event event;
-      struct timespec ts{0, 500000000};
-      int ret = io_getevents(aio->m_io_ctx, 1, 1, &event, &ts);
-
-      if (aio->m_in_shutdown)
-        break;
-
-      if (ret > 0)
-      {
-        aiocb* iocb = (aiocb*)event.obj;
-        long long res = event.res;
-        if (res < 0)
+      switch (int ret= my_getevents(aio->m_io_ctx, 1, 1, events)) {
+      case -EINTR:
+      case 0:
+        continue;
+      case -EINVAL:
+        if (shutdown_in_progress)
+          return;
+        /* fall through */
+      default:
+        if (ret != 1)
         {
-          iocb->m_err = static_cast<int>(-res);
-          iocb->m_ret_len = 0;
+          fprintf(stderr, "io_getevents returned %d\n", ret);
+          abort();
+          return;
         }
         else
         {
-          iocb->m_ret_len = ret;
-          iocb->m_err = 0;
+          const io_event &event= events[0];
+          aiocb *iocb= static_cast<aiocb*>(event.obj);
+          if (static_cast<int>(event.res) < 0)
+          {
+            iocb->m_err= -event.res;
+            iocb->m_ret_len= 0;
+          }
+          else
+          {
+            iocb->m_ret_len= event.res;
+            iocb->m_err= 0;
+          }
+          iocb->m_internal_task.m_func= iocb->m_callback;
+          iocb->m_internal_task.m_arg= iocb;
+          iocb->m_internal_task.m_group= iocb->m_group;
+          aio->m_pool->submit_task(&iocb->m_internal_task);
         }
-
-        iocb->m_internal_task.m_func = iocb->m_callback;
-        iocb->m_internal_task.m_arg = iocb;
-        iocb->m_internal_task.m_group = iocb->m_group;
-        aio->m_pool->submit_task(&iocb->m_internal_task);
-        continue;
-      }
-      switch (ret)
-      {
-      case -EAGAIN:
-        usleep(1000);
-        continue;
-      case -EINTR:
-      case 0:
-        continue;
-      default:
-        fprintf(stderr, "io_getevents returned %d\n", ret);
-        abort();
       }
     }
   }
 
 public:
-  aio_linux(io_context_t ctx, thread_pool* pool)
+  aio_linux(io_context_t ctx, thread_pool *pool)
     : m_pool(pool), m_io_ctx(ctx),
-    m_in_shutdown(), m_getevent_thread(getevent_thread_routine, this)
+    m_getevent_thread(getevent_thread_routine, this)
   {
   }
 
   ~aio_linux()
   {
-    m_in_shutdown = true;
-    m_getevent_thread.join();
+    shutdown_in_progress= true;
     io_destroy(m_io_ctx);
+    m_getevent_thread.join();
+    shutdown_in_progress= false;
   }
 
-  // Inherited via aio
-  virtual int submit_io(aiocb* cb) override
+  int submit_io(aiocb *cb) override
   {
-
-    if (cb->m_opcode == aio_opcode::AIO_PREAD)
-      io_prep_pread((iocb *)cb, cb->m_fh, cb->m_buffer, cb->m_len,
-        cb->m_offset);
-    else
-      io_prep_pwrite((iocb *)cb, cb->m_fh, cb->m_buffer, cb->m_len,
-        cb->m_offset);
-
-    int ret;
-    ret = io_submit(m_io_ctx, 1, (iocb * *)& cb);
+    io_prep_pread(static_cast<iocb*>(cb), cb->m_fh, cb->m_buffer, cb->m_len,
+                  cb->m_offset);
+    if (cb->m_opcode != aio_opcode::AIO_PREAD)
+      cb->aio_lio_opcode= IO_CMD_PWRITE;
+    iocb *icb= static_cast<iocb*>(cb);
+    int ret= io_submit(m_io_ctx, 1, &icb);
     if (ret == 1)
       return 0;
-    errno = -ret;
+    errno= -ret;
     return -1;
   }
 
-  // Inherited via aio
-  virtual int bind(native_file_handle& fd) override
-  {
-    return 0;
-  }
-  virtual int unbind(const native_file_handle& fd) override
-  {
-    return 0;
-  }
+  int bind(native_file_handle&) override { return 0; }
+  int unbind(const native_file_handle&) override { return 0; }
 };
 
-aio* create_linux_aio(thread_pool* pool, int max_io)
+std::atomic<bool> aio_linux::shutdown_in_progress;
+
+aio *create_linux_aio(thread_pool *pool, int max_io)
 {
   io_context_t ctx;
-  memset(&ctx, 0, sizeof(ctx));
-  int ret = io_setup(max_io, &ctx);
-  if (ret)
+  memset(&ctx, 0, sizeof ctx);
+  if (int ret= io_setup(max_io, &ctx))
   {
     fprintf(stderr, "io_setup(%d) returned %d\n", max_io, ret);
     return nullptr;
@@ -148,9 +148,6 @@ aio* create_linux_aio(thread_pool* pool, int max_io)
   return new aio_linux(ctx, pool);
 }
 #else
-aio* create_linux_aio(thread_pool* pool, int max_aio)
-{
-  return nullptr;
-}
+aio *create_linux_aio(thread_pool*, int) { return nullptr; }
 #endif
 }
-- 
cgit v1.2.1


From 6479006e14691ff85072d06682f81b90875e9cb0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= <marko.makela@mariadb.com>
Date: Wed, 25 Nov 2020 09:42:38 +0200
Subject: MDEV-24270: Collect multiple completed events at a time

tpool::aio::N_PENDING: Replaces OS_AIO_N_PENDING_IOS_PER_THREAD.
This limits two similar things: the number of outstanding requests
that a thread may io_submit(), and the number of completed requests
collected at a time by io_getevents().
---
 storage/innobase/buf/buf0buf.cc    |  2 +-
 storage/innobase/include/os0file.h |  3 ---
 storage/innobase/os/os0file.cc     |  4 ++--
 tpool/aio_linux.cc                 | 11 +++++------
 tpool/tpool.h                      |  2 ++
 5 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index 2ddd9f278b1..f3ad84a4853 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -1467,7 +1467,7 @@ bool buf_pool_t::create()
   ut_d(lru_scan_itr.m_mutex= &mutex);
 
   io_buf.create((srv_n_read_io_threads + srv_n_write_io_threads) *
-                OS_AIO_N_PENDING_IOS_PER_THREAD);
+                tpool::aio::N_PENDING);
 
   /* FIXME: remove some of these variables */
   srv_buf_pool_curr_size= curr_pool_size;
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index 5e0039304b0..c82375e4f5f 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -265,9 +265,6 @@ struct os_file_size_t {
 	os_offset_t	m_alloc_size;
 };
 
-/** Win NT does not allow more than 64 */
-static const ulint OS_AIO_N_PENDING_IOS_PER_THREAD = 256;
-
 extern ulint	os_n_file_reads;
 extern ulint	os_n_file_writes;
 extern ulint	os_n_fsyncs;
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index ee7f09ec17f..6e8aacbe799 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -4027,8 +4027,8 @@ static bool is_linux_native_aio_supported()
 
 bool os_aio_init(ulint n_reader_threads, ulint n_writer_threads, ulint)
 {
-  int max_write_events= int(n_writer_threads * OS_AIO_N_PENDING_IOS_PER_THREAD);
-  int max_read_events= int(n_reader_threads * OS_AIO_N_PENDING_IOS_PER_THREAD);
+  int max_write_events= int(n_writer_threads * tpool::aio::N_PENDING);
+  int max_read_events= int(n_reader_threads * tpool::aio::N_PENDING);
   int max_events = max_read_events + max_write_events;
 	int ret;
 
diff --git a/tpool/aio_linux.cc b/tpool/aio_linux.cc
index 51b656a604b..cb967c117e4 100644
--- a/tpool/aio_linux.cc
+++ b/tpool/aio_linux.cc
@@ -60,27 +60,26 @@ class aio_linux final : public aio
 
   static void getevent_thread_routine(aio_linux *aio)
   {
-    io_event events[1];
+    io_event events[N_PENDING];
     for (;;)
     {
-      switch (int ret= my_getevents(aio->m_io_ctx, 1, 1, events)) {
+      switch (int ret= my_getevents(aio->m_io_ctx, 1, N_PENDING, events)) {
       case -EINTR:
-      case 0:
         continue;
       case -EINVAL:
         if (shutdown_in_progress)
           return;
         /* fall through */
       default:
-        if (ret != 1)
+        if (ret < 0)
         {
           fprintf(stderr, "io_getevents returned %d\n", ret);
           abort();
           return;
         }
-        else
+        for (int i= 0; i < ret; i++)
         {
-          const io_event &event= events[0];
+          const io_event &event= events[i];
           aiocb *iocb= static_cast<aiocb*>(event.obj);
           if (static_cast<int>(event.res) < 0)
           {
diff --git a/tpool/tpool.h b/tpool/tpool.h
index 0d83af5bd74..a2f9a9133cb 100644
--- a/tpool/tpool.h
+++ b/tpool/tpool.h
@@ -155,6 +155,8 @@ struct aiocb
 class aio
 {
 public:
+  /** Maximum number of pending requests per thread */
+  static constexpr unsigned N_PENDING= 256;
   /**
     Submit asyncronous IO.
     On completion, cb->m_callback is executed.
-- 
cgit v1.2.1


From 4a22056c975762891db11eff1bcda157d65cb0b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= <marko.makela@mariadb.com>
Date: Wed, 25 Nov 2020 11:28:26 +0200
Subject: Cleanup: Remove redundant nonnull attributes

---
 storage/innobase/include/os0file.h  | 3 +--
 storage/innobase/include/trx0undo.h | 3 +--
 storage/innobase/srv/srv0start.cc   | 2 +-
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index c82375e4f5f..d01a15c28ed 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -237,8 +237,7 @@ private:
   @param off   byte offset from the start (SEEK_SET)
   @param len   size of the hole in bytes
   @return DB_SUCCESS or error code */
-  dberr_t punch_hole(os_offset_t off, ulint len) const
-    MY_ATTRIBUTE((nonnull));
+  dberr_t punch_hole(os_offset_t off, ulint len) const;
 
 public:
   /** Page to be written on write operation */
diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h
index 9325f39a309..e032c53263a 100644
--- a/storage/innobase/include/trx0undo.h
+++ b/storage/innobase/include/trx0undo.h
@@ -160,8 +160,7 @@ trx_undo_get_first_rec(const fil_space_t &space, uint32_t page_no,
 NOTE: This corresponds to a redo log record and must not be changed!
 @see mtr_t::undo_create()
 @param[in,out]	block	undo log page */
-void trx_undo_page_init(const buf_block_t &block)
-	MY_ATTRIBUTE((nonnull));
+void trx_undo_page_init(const buf_block_t &block);
 
 /** Allocate an undo log page.
 @param[in,out]	undo	undo log
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index b53c7888509..6a5cc362791 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -341,7 +341,7 @@ static dberr_t create_log_file(bool create_new_db, lsn_t lsn,
 @param[in,out]	logfile0	name of the first log file
 @return	error code
 @retval	DB_SUCCESS	on successful operation */
-MY_ATTRIBUTE((warn_unused_result, nonnull))
+MY_ATTRIBUTE((warn_unused_result))
 static dberr_t create_log_file_rename(lsn_t lsn, std::string &logfile0)
 {
   ut_ad(!srv_log_file_created);
-- 
cgit v1.2.1


From 3dfeae0e221c7ced19372383df5eef1bafc7fe6e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= <marko.makela@mariadb.com>
Date: Wed, 25 Nov 2020 11:32:49 +0200
Subject: Cleanup: Fix Intel compiler warnings about sign conversions

---
 storage/innobase/btr/btr0cur.cc    | 4 ++--
 storage/innobase/include/rw_lock.h | 4 ++--
 storage/innobase/page/page0cur.cc  | 2 +-
 storage/innobase/row/row0merge.cc  | 4 ++--
 storage/innobase/trx/trx0rec.cc    | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index 846d8ecfd7e..9efa2d8f8bd 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -4158,7 +4158,7 @@ void btr_cur_upd_rec_in_place(rec_t *rec, const dict_index_t *index,
 			}
 			ulint l = rec_get_1byte_offs_flag(rec)
 				? (n + 1) : (n + 1) * 2;
-			byte* b = &rec[-REC_N_OLD_EXTRA_BYTES - l];
+			byte* b = rec - REC_N_OLD_EXTRA_BYTES - l;
 			compile_time_assert(REC_1BYTE_SQL_NULL_MASK << 8
 					    == REC_2BYTE_SQL_NULL_MASK);
 			mtr->write<1>(*block, b,
@@ -4181,7 +4181,7 @@ void btr_cur_upd_rec_in_place(rec_t *rec, const dict_index_t *index,
 			ut_ad(len == rec_get_nth_field_size(rec, n));
 			ulint l = rec_get_1byte_offs_flag(rec)
 				? (n + 1) : (n + 1) * 2;
-			byte* b = &rec[-REC_N_OLD_EXTRA_BYTES - l];
+			byte* b = rec - REC_N_OLD_EXTRA_BYTES - l;
 			compile_time_assert(REC_1BYTE_SQL_NULL_MASK << 8
 					    == REC_2BYTE_SQL_NULL_MASK);
 			mtr->write<1>(*block, b,
diff --git a/storage/innobase/include/rw_lock.h b/storage/innobase/include/rw_lock.h
index 9fcafacc426..b50a76fa779 100644
--- a/storage/innobase/include/rw_lock.h
+++ b/storage/innobase/include/rw_lock.h
@@ -30,9 +30,9 @@ protected:
   /** Available lock */
   static constexpr uint32_t UNLOCKED= 0;
   /** Flag to indicate that write_lock() is being held */
-  static constexpr uint32_t WRITER= 1 << 31;
+  static constexpr uint32_t WRITER= 1U << 31;
   /** Flag to indicate that write_lock_wait() is pending */
-  static constexpr uint32_t WRITER_WAITING= 1 << 30;
+  static constexpr uint32_t WRITER_WAITING= 1U << 30;
   /** Flag to indicate that write_lock() or write_lock_wait() is pending */
   static constexpr uint32_t WRITER_PENDING= WRITER | WRITER_WAITING;
 
diff --git a/storage/innobase/page/page0cur.cc b/storage/innobase/page/page0cur.cc
index 5ca0d120a10..781884842ef 100644
--- a/storage/innobase/page/page0cur.cc
+++ b/storage/innobase/page/page0cur.cc
@@ -2683,7 +2683,7 @@ corrupted:
   data_len-= enc_hdr_l >> 3;
   data= &static_cast<const byte*>(data)[enc_hdr_l >> 3];
 
-  memcpy(buf, &prev_rec[-REC_N_NEW_EXTRA_BYTES - hdr_c], hdr_c);
+  memcpy(buf, prev_rec - REC_N_NEW_EXTRA_BYTES - hdr_c, hdr_c);
   buf+= hdr_c;
   *buf++= static_cast<byte>((enc_hdr_l & 3) << 4); /* info_bits; n_owned=0 */
   *buf++= static_cast<byte>(h >> 5); /* MSB of heap number */
diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc
index 558bf6d5a07..e306cb3429b 100644
--- a/storage/innobase/row/row0merge.cc
+++ b/storage/innobase/row/row0merge.cc
@@ -1820,8 +1820,8 @@ row_merge_read_clustered_index(
 	based on that. */
 
 	clust_index = dict_table_get_first_index(old_table);
-	const ulint old_trx_id_col = DATA_TRX_ID - DATA_N_SYS_COLS
-		+ ulint(old_table->n_cols);
+	const ulint old_trx_id_col = ulint(old_table->n_cols)
+		- (DATA_N_SYS_COLS - DATA_TRX_ID);
 	ut_ad(old_table->cols[old_trx_id_col].mtype == DATA_SYS);
 	ut_ad(old_table->cols[old_trx_id_col].prtype
 	      == (DATA_TRX_ID | DATA_NOT_NULL));
diff --git a/storage/innobase/trx/trx0rec.cc b/storage/innobase/trx/trx0rec.cc
index 49b0a21330d..1df61ac5b56 100644
--- a/storage/innobase/trx/trx0rec.cc
+++ b/storage/innobase/trx/trx0rec.cc
@@ -2455,7 +2455,7 @@ trx_undo_prev_version_build(
 				      == rec_get_nth_field_size(rec, n));
 				ulint l = rec_get_1byte_offs_flag(*old_vers)
 					? (n + 1) : (n + 1) * 2;
-				(*old_vers)[-REC_N_OLD_EXTRA_BYTES - l]
+				*(*old_vers - REC_N_OLD_EXTRA_BYTES - l)
 					&= byte(~REC_1BYTE_SQL_NULL_MASK);
 			}
 		}
-- 
cgit v1.2.1


From 78df9e37a65bfdd32ec721a9152d5c94cbe89217 Mon Sep 17 00:00:00 2001
From: Vladislav Vaintroub <wlad@mariadb.com>
Date: Wed, 25 Nov 2020 10:59:10 +0100
Subject: Partially Revert "MDEV-24270: Collect multiple completed events at a
 time"

This partially reverts commit 6479006e14691ff85072d06682f81b90875e9cb0.

Remove the constant tpool::aio::N_PENDING, which has no
intrinsic meaning for the tpool.
---
 storage/innobase/buf/buf0buf.cc    | 2 +-
 storage/innobase/include/os0file.h | 3 +++
 storage/innobase/os/os0file.cc     | 4 ++--
 tpool/aio_linux.cc                 | 6 ++++--
 tpool/tpool.h                      | 2 --
 5 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index f3ad84a4853..2ddd9f278b1 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -1467,7 +1467,7 @@ bool buf_pool_t::create()
   ut_d(lru_scan_itr.m_mutex= &mutex);
 
   io_buf.create((srv_n_read_io_threads + srv_n_write_io_threads) *
-                tpool::aio::N_PENDING);
+                OS_AIO_N_PENDING_IOS_PER_THREAD);
 
   /* FIXME: remove some of these variables */
   srv_buf_pool_curr_size= curr_pool_size;
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index d01a15c28ed..b723ba009a3 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -264,6 +264,9 @@ struct os_file_size_t {
 	os_offset_t	m_alloc_size;
 };
 
+/** Win NT does not allow more than 64 */
+static const ulint OS_AIO_N_PENDING_IOS_PER_THREAD = 256;
+
 extern ulint	os_n_file_reads;
 extern ulint	os_n_file_writes;
 extern ulint	os_n_fsyncs;
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 6e8aacbe799..ee7f09ec17f 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -4027,8 +4027,8 @@ static bool is_linux_native_aio_supported()
 
 bool os_aio_init(ulint n_reader_threads, ulint n_writer_threads, ulint)
 {
-  int max_write_events= int(n_writer_threads * tpool::aio::N_PENDING);
-  int max_read_events= int(n_reader_threads * tpool::aio::N_PENDING);
+  int max_write_events= int(n_writer_threads * OS_AIO_N_PENDING_IOS_PER_THREAD);
+  int max_read_events= int(n_reader_threads * OS_AIO_N_PENDING_IOS_PER_THREAD);
   int max_events = max_read_events + max_write_events;
 	int ret;
 
diff --git a/tpool/aio_linux.cc b/tpool/aio_linux.cc
index cb967c117e4..03872df5ac7 100644
--- a/tpool/aio_linux.cc
+++ b/tpool/aio_linux.cc
@@ -51,6 +51,8 @@ namespace tpool
 {
 #ifdef LINUX_NATIVE_AIO
 
+#define MAX_EVENTS 256
+
 class aio_linux final : public aio
 {
   thread_pool *m_pool;
@@ -60,10 +62,10 @@ class aio_linux final : public aio
 
   static void getevent_thread_routine(aio_linux *aio)
   {
-    io_event events[N_PENDING];
+    io_event events[MAX_EVENTS];
     for (;;)
     {
-      switch (int ret= my_getevents(aio->m_io_ctx, 1, N_PENDING, events)) {
+      switch (int ret= my_getevents(aio->m_io_ctx, 1, MAX_EVENTS, events)) {
       case -EINTR:
         continue;
       case -EINVAL:
diff --git a/tpool/tpool.h b/tpool/tpool.h
index a2f9a9133cb..0d83af5bd74 100644
--- a/tpool/tpool.h
+++ b/tpool/tpool.h
@@ -155,8 +155,6 @@ struct aiocb
 class aio
 {
 public:
-  /** Maximum number of pending requests per thread */
-  static constexpr unsigned N_PENDING= 256;
   /**
     Submit asyncronous IO.
     On completion, cb->m_callback is executed.
-- 
cgit v1.2.1


From c130c60b2bb985bd15137ca471d15f690ef12dd7 Mon Sep 17 00:00:00 2001
From: Vladislav Vaintroub <wlad@mariadb.com>
Date: Wed, 25 Nov 2020 11:12:09 +0100
Subject: Cleanup. Provide accurate comment on my_getevents().

---
 tpool/aio_linux.cc | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/tpool/aio_linux.cc b/tpool/aio_linux.cc
index 03872df5ac7..c335c54964d 100644
--- a/tpool/aio_linux.cc
+++ b/tpool/aio_linux.cc
@@ -22,8 +22,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/
 # include <libaio.h>
 # include <sys/syscall.h>
 
-/** A simpler alternative to io_getevents(), without
-aio_ring_is_empty() that may trigger SIGSEGV */
+/*
+  A hack, which so far seems to allow allow getevents thread to be interrupted
+  by io_destroy() from another thread
+
+  libaio's io_getevent() would sometimes crash when attempting this feat,
+  thus the raw syscall.
+
+*/
 static int my_getevents(io_context_t ctx, long min_nr, long nr, io_event *ev)
 {
   int saved_errno= errno;
@@ -37,6 +43,8 @@ static int my_getevents(io_context_t ctx, long min_nr, long nr, io_event *ev)
   return ret;
 }
 #endif
+
+
 /*
   Linux AIO implementation, based on native AIO.
   Needs libaio.h and -laio at the compile time.
-- 
cgit v1.2.1


From af98fddc2b5ce6bb79f0e0f57f0bb71d12a53fb9 Mon Sep 17 00:00:00 2001
From: Vladislav Vaintroub <wlad@mariadb.com>
Date: Wed, 25 Nov 2020 11:12:53 +0100
Subject: Cleanup. Remove obsolete comment

---
 storage/innobase/include/os0file.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index b723ba009a3..760a68551f5 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -264,7 +264,6 @@ struct os_file_size_t {
 	os_offset_t	m_alloc_size;
 };
 
-/** Win NT does not allow more than 64 */
 static const ulint OS_AIO_N_PENDING_IOS_PER_THREAD = 256;
 
 extern ulint	os_n_file_reads;
-- 
cgit v1.2.1


From 2de95f7a1e741fd73aae33bd61c7aa16e4ad6698 Mon Sep 17 00:00:00 2001
From: Vladislav Vaintroub <wlad@mariadb.com>
Date: Wed, 25 Nov 2020 13:06:51 +0100
Subject: Fix misspelling.

Kudos to Marko for finding.
---
 tpool/tpool_generic.cc | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/tpool/tpool_generic.cc b/tpool/tpool_generic.cc
index 80fa87fb8c1..98237063ec2 100644
--- a/tpool/tpool_generic.cc
+++ b/tpool/tpool_generic.cc
@@ -230,19 +230,19 @@ class thread_pool_generic : public thread_pool
   /** Maximimum number of threads in this pool. */
   unsigned int m_max_threads;
 
-  /* Maintainence related statistics (see maintainence()) */
+  /* maintenance related statistics (see maintenance()) */
   size_t m_last_thread_count;
   unsigned long long m_last_activity;
-  std::unique_ptr<timer> m_maintaince_timer_task;
+  std::unique_ptr<timer> m_maintenance_timer_task;
 
   void worker_main(worker_data *thread_data);
   void worker_end(worker_data* thread_data);
 
   /* Checks threadpool responsiveness, adjusts thread_counts */
-  void maintainence();
-  static void maintainence_func(void* arg)
+  void maintenance();
+  static void maintenance_func(void* arg)
   {
-    ((thread_pool_generic *)arg)->maintainence();
+    ((thread_pool_generic *)arg)->maintenance();
   }
   bool add_thread();
   bool wake(worker_wake_reason reason, task *t = nullptr);
@@ -528,11 +528,11 @@ void thread_pool_generic::worker_main(worker_data *thread_var)
   Periodic job to fix thread count and concurrency,
   in case of long tasks, etc
 */
-void thread_pool_generic::maintainence()
+void thread_pool_generic::maintenance()
 {
   /*
     If pool is busy (i.e the its mutex is currently locked), we can
-    skip the maintainence task, some times, to lower mutex contention
+    skip the maintenance task, some times, to lower mutex contention
   */
   static int skip_counter;
   const int MAX_SKIPS = 10;
@@ -691,7 +691,7 @@ thread_pool_generic::thread_pool_generic(int min_threads, int max_threads) :
   m_max_threads(max_threads),
   m_last_thread_count(),
   m_last_activity(),
-  m_maintaince_timer_task()
+  m_maintenance_timer_task()
 {
 
   if (m_max_threads < m_concurrency)
@@ -703,8 +703,8 @@ thread_pool_generic::thread_pool_generic(int min_threads, int max_threads) :
 
   if (min_threads < max_threads)
   {
-    m_maintaince_timer_task.reset(new timer_generic(thread_pool_generic::maintainence_func, this, nullptr));
-    m_maintaince_timer_task->set_time((int)m_timer_interval.count(), (int)m_timer_interval.count());
+    m_maintenance_timer_task.reset(new timer_generic(thread_pool_generic::maintenance_func, this, nullptr));
+    m_maintenance_timer_task->set_time((int)m_timer_interval.count(), (int)m_timer_interval.count());
   }
 }
 
@@ -792,7 +792,7 @@ thread_pool_generic::~thread_pool_generic()
   m_aio.reset();
 
   /* Also stop the maintanence task early. */
-  m_maintaince_timer_task.reset();
+  m_maintenance_timer_task.reset();
 
   std::unique_lock<std::mutex> lk(m_mtx);
   m_in_shutdown= true;
-- 
cgit v1.2.1


From f693b72547e32e5b27e66146003d53fa3a4a67b2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= <marko.makela@mariadb.com>
Date: Wed, 25 Nov 2020 16:08:26 +0200
Subject: MDEV-24270: Clarify some comments

---
 storage/innobase/include/os0file.h |  2 +-
 tpool/aio_linux.cc                 | 29 ++++++++++++++++++++---------
 2 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index 760a68551f5..149da54b63c 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -264,7 +264,7 @@ struct os_file_size_t {
 	os_offset_t	m_alloc_size;
 };
 
-static const ulint OS_AIO_N_PENDING_IOS_PER_THREAD = 256;
+constexpr ulint OS_AIO_N_PENDING_IOS_PER_THREAD= 256;
 
 extern ulint	os_n_file_reads;
 extern ulint	os_n_file_writes;
diff --git a/tpool/aio_linux.cc b/tpool/aio_linux.cc
index c335c54964d..91d1d08c3ff 100644
--- a/tpool/aio_linux.cc
+++ b/tpool/aio_linux.cc
@@ -22,13 +22,22 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/
 # include <libaio.h>
 # include <sys/syscall.h>
 
-/*
-  A hack, which so far seems to allow allow getevents thread to be interrupted
-  by io_destroy() from another thread
-
-  libaio's io_getevent() would sometimes crash when attempting this feat,
-  thus the raw syscall.
-
+/**
+  Invoke the io_getevents() system call.
+
+  @param ctx     context from io_setup()
+  @param min_nr  minimum number of completion events to wait for
+  @param nr      maximum number of completion events to collect
+  @param ev      the collected events
+
+  In https://pagure.io/libaio/c/7cede5af5adf01ad26155061cc476aad0804d3fc
+  the io_getevents() implementation in libaio was "optimized" so that it
+  would elide the system call when there are no outstanding requests
+  and a timeout was specified.
+
+  The libaio code for dereferencing ctx would occasionally trigger
+  SIGSEGV if io_destroy() was concurrently invoked from another thread.
+  Hence, we use the raw system call.
 */
 static int my_getevents(io_context_t ctx, long min_nr, long nr, io_event *ev)
 {
@@ -59,8 +68,6 @@ namespace tpool
 {
 #ifdef LINUX_NATIVE_AIO
 
-#define MAX_EVENTS 256
-
 class aio_linux final : public aio
 {
   thread_pool *m_pool;
@@ -70,6 +77,10 @@ class aio_linux final : public aio
 
   static void getevent_thread_routine(aio_linux *aio)
   {
+    /* We collect this many events at a time. os_aio_init() would
+    multiply OS_AIO_N_PENDING_THREADS by the number of read and write threads
+    and ultimately pass it to io_setup() via thread_pool::configure_aio(). */
+    constexpr unsigned MAX_EVENTS= 256;
     io_event events[MAX_EVENTS];
     for (;;)
     {
-- 
cgit v1.2.1


From 7b1252c03d7131754d9503560fe507b33ca1f8b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= <marko.makela@mariadb.com>
Date: Wed, 25 Nov 2020 16:09:47 +0200
Subject: MDEV-24278 InnoDB page cleaner keeps waking up on idle server

The purpose of the InnoDB page cleaner subsystem is to write out
modified pages from the buffer pool to data files. When the
innodb_max_dirty_pages_pct_lwm is not exceeded or
innodb_adaptive_flushing=ON decides not to write out anything,
the page cleaner should keep sleeping indefinitely until the state
of the system changes: a dirty page is added to the buffer pool such
that the page cleaner would no longer be idle.

buf_flush_page_cleaner(): Explicitly note when the page cleaner is idle.
When that happens, use mysql_cond_wait() instead of mysql_cond_timedwait().

buf_flush_insert_into_flush_list(): Wake up the page cleaner if needed.

innodb_max_dirty_pages_pct_update(),
innodb_max_dirty_pages_pct_lwm_update():
Wake up the page cleaner just in case.

Note: buf_flush_ahead(), buf_flush_wait_flushed() and shutdown are
already waking up the page cleaner thread.
---
 storage/innobase/buf/buf0flu.cc       | 33 ++++++++++++++++++++++++++++++---
 storage/innobase/handler/ha_innodb.cc |  2 ++
 storage/innobase/include/buf0buf.h    | 21 ++++++++++++++++++++-
 3 files changed, 52 insertions(+), 4 deletions(-)

diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index 316c1822f0b..dc7ae3af70e 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -126,6 +126,20 @@ static void buf_flush_validate_skip()
 }
 #endif /* UNIV_DEBUG */
 
+/** Wake up the page cleaner if needed */
+inline void buf_pool_t::page_cleaner_wakeup()
+{
+  if (page_cleaner_idle() &&
+      (srv_max_dirty_pages_pct_lwm == 0.0 ||
+       srv_max_dirty_pages_pct_lwm <=
+       double(UT_LIST_GET_LEN(buf_pool.flush_list)) * 100.0 /
+       double(UT_LIST_GET_LEN(buf_pool.LRU) + UT_LIST_GET_LEN(buf_pool.free))))
+  {
+    page_cleaner_is_idle= false;
+    mysql_cond_signal(&do_flush_list);
+  }
+}
+
 /** Insert a modified block into the flush list.
 @param[in,out]	block	modified block
 @param[in]	lsn	oldest modification */
@@ -145,6 +159,7 @@ void buf_flush_insert_into_flush_list(buf_block_t* block, lsn_t lsn)
 
 	UT_LIST_ADD_FIRST(buf_pool.flush_list, &block->page);
 	ut_d(buf_flush_validate_skip());
+	buf_pool.page_cleaner_wakeup();
 	mysql_mutex_unlock(&buf_pool.flush_list_mutex);
 }
 
@@ -2067,8 +2082,12 @@ furious_flush:
     else if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED)
       break;
 
-    mysql_cond_timedwait(&buf_pool.do_flush_list, &buf_pool.flush_list_mutex,
-                         &abstime);
+    if (buf_pool.page_cleaner_idle())
+      mysql_cond_wait(&buf_pool.do_flush_list, &buf_pool.flush_list_mutex);
+    else
+      mysql_cond_timedwait(&buf_pool.do_flush_list, &buf_pool.flush_list_mutex,
+                           &abstime);
+
     set_timespec(abstime, 1);
 
     lsn_limit= buf_flush_sync_lsn;
@@ -2091,6 +2110,8 @@ furious_flush:
         /* wake up buf_flush_wait_flushed() */
         mysql_cond_broadcast(&buf_pool.done_flush_list);
       }
+unemployed:
+      buf_pool.page_cleaner_set_idle(true);
       continue;
     }
 
@@ -2101,13 +2122,14 @@ furious_flush:
       double(UT_LIST_GET_LEN(buf_pool.LRU) + UT_LIST_GET_LEN(buf_pool.free));
 
     if (dirty_pct < srv_max_dirty_pages_pct_lwm && !lsn_limit)
-      continue;
+      goto unemployed;
 
     const lsn_t oldest_lsn= buf_pool.get_oldest_modification(0);
 
     if (UNIV_UNLIKELY(lsn_limit != 0) && oldest_lsn >= lsn_limit)
       buf_flush_sync_lsn= 0;
 
+    buf_pool.page_cleaner_set_idle(false);
     mysql_mutex_unlock(&buf_pool.flush_list_mutex);
 
     ulint n_flushed;
@@ -2159,6 +2181,11 @@ do_checkpoint:
         goto do_checkpoint;
       }
     }
+    else
+    {
+      mysql_mutex_lock(&buf_pool.flush_list_mutex);
+      goto unemployed;
+    }
 
 #ifdef UNIV_DEBUG
     while (innodb_page_cleaner_disabled_debug && !buf_flush_sync_lsn &&
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index b12d6cfd695..9a47fb6b30f 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -17207,6 +17207,7 @@ innodb_max_dirty_pages_pct_update(
 				    in_val);
 
 		srv_max_dirty_pages_pct_lwm = in_val;
+		mysql_cond_signal(&buf_pool.do_flush_list);
 	}
 
 	srv_max_buf_pool_modified_pct = in_val;
@@ -17240,6 +17241,7 @@ innodb_max_dirty_pages_pct_lwm_update(
 	}
 
 	srv_max_dirty_pages_pct_lwm = in_val;
+	mysql_cond_signal(&buf_pool.do_flush_list);
 }
 
 /*************************************************************//**
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index df52d6b572c..e255f6db056 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -1937,10 +1937,29 @@ public:
   FlushHp flush_hp;
   /** modified blocks (a subset of LRU) */
   UT_LIST_BASE_NODE_T(buf_page_t) flush_list;
-
+private:
+  /** whether the page cleaner needs wakeup from indefinite sleep */
+  bool page_cleaner_is_idle;
+public:
   /** signalled to wake up the page_cleaner; protected by flush_list_mutex */
   mysql_cond_t do_flush_list;
 
+  /** @return whether the page cleaner must sleep due to being idle */
+  bool page_cleaner_idle() const
+  {
+    mysql_mutex_assert_owner(&flush_list_mutex);
+    return page_cleaner_is_idle;
+  }
+  /** Wake up the page cleaner if needed */
+  inline void page_cleaner_wakeup();
+
+  /** Register whether an explicit wakeup of the page cleaner is needed */
+  void page_cleaner_set_idle(bool deep_sleep)
+  {
+    mysql_mutex_assert_owner(&flush_list_mutex);
+    page_cleaner_is_idle= deep_sleep;
+  }
+
   // n_flush_LRU + n_flush_list is approximately COUNT(io_fix()==BUF_IO_WRITE)
   // in flush_list
 
-- 
cgit v1.2.1


From 657fcdf430f39a3103dff51a6a2b2bd3a090a498 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= <marko.makela@mariadb.com>
Date: Wed, 25 Nov 2020 16:54:00 +0200
Subject: MDEV-24280 InnoDB triggers too many independent periodic tasks

A side effect of MDEV-16264 is that a large number of threads will
be created at server startup, to be destroyed after a minute or two.

One source of such thread creation is srv_start_periodic_timer().
InnoDB is creating 3 periodic tasks: srv_master_callback (1Hz)
srv_error_monitor_task (1Hz), and srv_monitor_task (0.2Hz).

It appears that we can merge srv_error_monitor_task and srv_monitor_task
and have them invoked 4 times per minute (every 15 seconds). This will
affect our ability to enforce innodb_fatal_semaphore_wait_threshold and
some computations around BUF_LRU_STAT_N_INTERVAL.

We could remove srv_master_callback along with the DROP TABLE queue
at some point of time in the future. We must keep it independent
of the innodb_fatal_semaphore_wait_threshold detection, because
the background DROP TABLE queue could get stuck due to dict_sys
being locked by another thread. For now, srv_master_callback
must be invoked once per second, so that
innodb_flush_log_at_timeout=1 can work.

BUF_LRU_STAT_N_INTERVAL: Reduce the precision and extend the time
from 50*1 second to 4*15 seconds.

srv_error_monitor_timer: Remove.

MAX_MUTEX_NOWAIT: Increase from 20*1 second to 2*15 seconds.

srv_refresh_innodb_monitor_stats(): Avoid a repeated call to time(NULL).
Change the interval to less than 60 seconds.

srv_monitor(): Renamed from srv_monitor_task.

srv_monitor_task(): Renamed from srv_error_monitor_task().
Invoked only once in 15 seconds. Invoke also srv_monitor().
Increase the fatal_cnt threshold from 10*1 second to 1*15 seconds.

sync_array_print_long_waits_low(): Invoke time(NULL) only once.
Remove a bogus message about printouts for 30 seconds. Those
printouts were effectively already disabled in MDEV-16264
(commit 5e62b6a5e06eb02cbde1e34e95e26f42d87fce02).
---
 storage/innobase/buf/buf0lru.cc    |  7 ++---
 storage/innobase/include/srv0srv.h | 13 +++------
 storage/innobase/log/log0log.cc    |  1 -
 storage/innobase/srv/srv0srv.cc    | 59 ++++++++++++++------------------------
 storage/innobase/srv/srv0start.cc  |  4 +--
 storage/innobase/sync/sync0arr.cc  | 16 ++---------
 6 files changed, 33 insertions(+), 67 deletions(-)

diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc
index f9ed938b20c..37a4ec9849c 100644
--- a/storage/innobase/buf/buf0lru.cc
+++ b/storage/innobase/buf/buf0lru.cc
@@ -76,13 +76,12 @@ uncompressed and compressed data), which must be clean. */
 /* @{ */
 
 /** Number of intervals for which we keep the history of these stats.
-Each interval is 1 second, defined by the rate at which
-srv_error_monitor_thread() calls buf_LRU_stat_update(). */
-static const ulint BUF_LRU_STAT_N_INTERVAL = 50;
+Updated at SRV_MONITOR_INTERVAL (the buf_LRU_stat_update() call rate). */
+static constexpr ulint BUF_LRU_STAT_N_INTERVAL= 4;
 
 /** Co-efficient with which we multiply I/O operations to equate them
 with page_zip_decompress() operations. */
-static const ulint BUF_LRU_IO_TO_UNZIP_FACTOR = 50;
+static constexpr ulint BUF_LRU_IO_TO_UNZIP_FACTOR= 50;
 
 /** Sampled values buf_LRU_stat_cur.
 Not protected by any mutex.  Updated by buf_LRU_stat_update(). */
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 44712c5ae66..4fe90e8a0dc 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -702,13 +702,6 @@ Complete the shutdown tasks such as background DROP TABLE,
 and optionally change buffer merge (on innodb_fast_shutdown=0). */
 void srv_shutdown(bool ibuf_merge);
 
-
-/*************************************************************************
-A task which prints warnings about semaphore waits which have lasted
-too long. These can be used to track bugs which cause hangs.
-*/
-void srv_error_monitor_task(void*);
-
 } /* extern "C" */
 
 #ifdef UNIV_DEBUG
@@ -900,12 +893,14 @@ struct srv_slot_t{
 
 extern tpool::thread_pool *srv_thread_pool;
 extern std::unique_ptr<tpool::timer> srv_master_timer;
-extern std::unique_ptr<tpool::timer> srv_error_monitor_timer;
 extern std::unique_ptr<tpool::timer> srv_monitor_timer;
 
+/** The interval at which srv_monitor_task is invoked, in milliseconds */
+constexpr unsigned SRV_MONITOR_INTERVAL= 15000; /* 4 times per minute */
+
 static inline void srv_monitor_timer_schedule_now()
 {
-  srv_monitor_timer->set_time(0, 5000);
+  srv_monitor_timer->set_time(0, SRV_MONITOR_INTERVAL);
 }
 static inline void srv_start_periodic_timer(std::unique_ptr<tpool::timer>& t,
                                             void (*func)(void*), int period)
diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc
index cd5ed353767..be1a341d9e5 100644
--- a/storage/innobase/log/log0log.cc
+++ b/storage/innobase/log/log0log.cc
@@ -1011,7 +1011,6 @@ ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown()
 		!srv_read_only_mode && srv_fast_shutdown < 2) {
 		buf_dump_start();
 	}
-	srv_error_monitor_timer.reset();
 	srv_monitor_timer.reset();
 	lock_sys.timeout_timer.reset();
 	if (do_srv_shutdown) {
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index 554e2043ad1..f62dfd87671 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -186,7 +186,7 @@ tpool::thread_pool* srv_thread_pool;
 
 /** Maximum number of times allowed to conditionally acquire
 mutex before switching to blocking wait on the mutex */
-#define MAX_MUTEX_NOWAIT	20
+#define MAX_MUTEX_NOWAIT	2
 
 /** Check whether the number of failed nonblocking mutex
 acquisition attempts exceeds maximum allowed value. If so,
@@ -555,8 +555,7 @@ struct purge_coordinator_state
 
 static purge_coordinator_state purge_state;
 
-/** threadpool timer for srv_error_monitor_task(). */
-std::unique_ptr<tpool::timer> srv_error_monitor_timer;
+/** threadpool timer for srv_monitor_task() */
 std::unique_ptr<tpool::timer> srv_monitor_timer;
 
 
@@ -769,16 +768,11 @@ srv_boot(void)
 
 /******************************************************************//**
 Refreshes the values used to calculate per-second averages. */
-static
-void
-srv_refresh_innodb_monitor_stats(void)
-/*==================================*/
+static void srv_refresh_innodb_monitor_stats(time_t current_time)
 {
 	mutex_enter(&srv_innodb_monitor_mutex);
 
-	time_t current_time = time(NULL);
-
-	if (difftime(current_time, srv_last_monitor_time) <= 60) {
+	if (difftime(current_time, srv_last_monitor_time) < 60) {
 		/* We referesh InnoDB Monitor values so that averages are
 		printed from at most 60 last seconds */
 		mutex_exit(&srv_innodb_monitor_mutex);
@@ -1309,26 +1303,18 @@ struct srv_monitor_state_t
 static srv_monitor_state_t monitor_state;
 
 /** A task which prints the info output by various InnoDB monitors.*/
-void srv_monitor_task(void*)
+static void srv_monitor()
 {
-	double		time_elapsed;
-	time_t		current_time;
-
-	ut_ad(!srv_read_only_mode);
-
-	current_time = time(NULL);
-
-	time_elapsed = difftime(current_time, monitor_state.last_monitor_time);
+	time_t current_time = time(NULL);
 
-	if (time_elapsed > 15) {
+	if (difftime(current_time, monitor_state.last_monitor_time) >= 15) {
 		monitor_state.last_monitor_time = current_time;
 
 		if (srv_print_innodb_monitor) {
 			/* Reset mutex_skipped counter everytime
 			srv_print_innodb_monitor changes. This is to
 			ensure we will not be blocked by lock_sys.mutex
-			for short duration information printing,
-			such as requested by sync_array_print_long_waits() */
+			for short duration information printing */
 			if (!monitor_state.last_srv_print_monitor) {
 				monitor_state.mutex_skipped = 0;
 				monitor_state.last_srv_print_monitor = true;
@@ -1366,14 +1352,14 @@ void srv_monitor_task(void*)
 		}
 	}
 
-	srv_refresh_innodb_monitor_stats();
+	srv_refresh_innodb_monitor_stats(current_time);
 }
 
 /*********************************************************************//**
 A task which prints warnings about semaphore waits which have lasted
 too long. These can be used to track bugs which cause hangs.
 */
-void srv_error_monitor_task(void*)
+void srv_monitor_task(void*)
 {
 	/* number of successive fatal timeouts observed */
 	static ulint		fatal_cnt;
@@ -1408,20 +1394,17 @@ void srv_error_monitor_task(void*)
 	if (sync_array_print_long_waits(&waiter, &sema)
 	    && sema == old_sema && os_thread_eq(waiter, old_waiter)) {
 #if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES)
-	  if (os_event_is_set(srv_allow_writes_event)) {
-#endif /* WITH_WSREP */
-		fatal_cnt++;
-#if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES)
-	  } else {
-		fprintf(stderr,
-			"WSREP: avoiding InnoDB self crash due to long "
-			"semaphore wait of  > %lu seconds\n"
-			"Server is processing SST donor operation, "
-			"fatal_cnt now: " ULINTPF,
-			srv_fatal_semaphore_wait_threshold, fatal_cnt);
-	  }
+		if (!os_event_is_set(srv_allow_writes_event)) {
+			fprintf(stderr,
+				"WSREP: avoiding InnoDB self crash due to "
+				"long semaphore wait of  > %lu seconds\n"
+				"Server is processing SST donor operation, "
+				"fatal_cnt now: " ULINTPF,
+				srv_fatal_semaphore_wait_threshold, fatal_cnt);
+			return;
+		}
 #endif /* WITH_WSREP */
-		if (fatal_cnt > 10) {
+		if (fatal_cnt++) {
 			ib::fatal() << "Semaphore wait has lasted > "
 				<< srv_fatal_semaphore_wait_threshold
 				<< " seconds. We intentionally crash the"
@@ -1432,6 +1415,8 @@ void srv_error_monitor_task(void*)
 		old_waiter = waiter;
 		old_sema = sema;
 	}
+
+	srv_monitor();
 }
 
 /******************************************************************//**
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index 6a5cc362791..a25335eba84 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -1826,8 +1826,8 @@ file_checked:
 
 		DBUG_EXECUTE_IF("innodb_skip_monitors", goto skip_monitors;);
 		/* Create the task which warns of long semaphore waits */
-		srv_start_periodic_timer(srv_error_monitor_timer, srv_error_monitor_task, 1000);
-		srv_start_periodic_timer(srv_monitor_timer, srv_monitor_task, 5000);
+		srv_start_periodic_timer(srv_monitor_timer, srv_monitor_task,
+					 SRV_MONITOR_INTERVAL);
 
 #ifndef DBUG_OFF
 skip_monitors:
diff --git a/storage/innobase/sync/sync0arr.cc b/storage/innobase/sync/sync0arr.cc
index 4b6f818000c..4ae3b02c071 100644
--- a/storage/innobase/sync/sync0arr.cc
+++ b/storage/innobase/sync/sync0arr.cc
@@ -893,6 +893,7 @@ sync_array_print_long_waits_low(
 #else
 # define SYNC_ARRAY_TIMEOUT	240
 #endif
+	const time_t now = time(NULL);
 
 	for (ulint i = 0; i < arr->n_cells; i++) {
 
@@ -908,7 +909,7 @@ sync_array_print_long_waits_low(
 			continue;
 		}
 
-		double	diff = difftime(time(NULL), cell->reservation_time);
+		double	diff = difftime(now, cell->reservation_time);
 
 		if (diff > SYNC_ARRAY_TIMEOUT) {
 			ib::warn() << "A long semaphore wait:";
@@ -982,12 +983,6 @@ sync_array_print_long_waits(
 	}
 
 	if (noticed) {
-		fprintf(stderr,
-			"InnoDB: ###### Starts InnoDB Monitor"
-			" for 30 secs to print diagnostic info:\n");
-
-		my_bool old_val = srv_print_innodb_monitor;
-
 		/* If some crucial semaphore is reserved, then also the InnoDB
 		Monitor can hang, and we do not get diagnostics. Since in
 		many cases an InnoDB hang is caused by a pwrite() or a pread()
@@ -1000,14 +995,7 @@ sync_array_print_long_waits(
 			MONITOR_VALUE(MONITOR_OS_PENDING_READS),
 			MONITOR_VALUE(MONITOR_OS_PENDING_WRITES));
 
-		srv_print_innodb_monitor = TRUE;
-
 		lock_wait_timeout_task(nullptr);
-
-		srv_print_innodb_monitor = static_cast<my_bool>(old_val);
-		fprintf(stderr,
-			"InnoDB: ###### Diagnostic info printed"
-			" to the standard error stream\n");
 	}
 
 	return(fatal);
-- 
cgit v1.2.1