1 files changed, 330 insertions, 422 deletions
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index 4ec6a61ccb9..4fb0ac37309 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -277,6 +277,7 @@ the read requests for the whole area.
 */
 
 #ifndef UNIV_INNOCHECKSUM
+# ifdef SUX_LOCK_GENERIC
 void page_hash_latch::read_lock_wait()
 {
   /* First, try busy spinning for a while. */
@@ -309,6 +310,7 @@ void page_hash_latch::write_lock_wait()
     std::this_thread::yield();
   while (!write_lock_poll());
 }
+# endif
 
 constexpr std::chrono::microseconds WAIT_FOR_READ(100);
 constexpr int WAIT_FOR_WRITE= 100;
@@ -1145,7 +1147,7 @@ void buf_pool_t::page_hash_table::create(ulint n)
   const size_t size= pad(n_cells) * sizeof *array;
   void* v= aligned_malloc(size, CPU_LEVEL1_DCACHE_LINESIZE);
   memset(v, 0, size);
-  array= static_cast<hash_cell_t*>(v);
+  array= static_cast<hash_chain*>(v);
 }
 
 /** Create the buffer pool.
@@ -1334,9 +1336,14 @@ inline bool buf_pool_t::realloc(buf_block_t *block)
 		return(false); /* free list was not enough */
 	}
 
-	const page_id_t id(block->page.id());
-	page_hash_latch* hash_lock = hash_lock_get(id);
-	hash_lock->write_lock();
+	const page_id_t id{block->page.id()};
+	hash_chain& chain = page_hash.cell_get(id.fold());
+	page_hash_latch& hash_lock = page_hash.lock_get(chain);
+	/* It does not make sense to use transactional_lock_guard
+	here, because copying innodb_page_size (4096 to 65536) bytes
+	as well as other changes would likely make the memory
+	transaction too large. */
+	hash_lock.lock();
 
 	if (block->page.can_relocate()) {
 		memcpy_aligned<OS_FILE_LOG_BLOCK_SIZE>(
@@ -1380,14 +1387,10 @@ inline bool buf_pool_t::realloc(buf_block_t *block)
 		}
 
 		/* relocate page_hash */
-		ut_ad(block->page.in_page_hash);
-		ut_ad(new_block->page.in_page_hash);
-		const ulint fold = id.fold();
-		ut_ad(&block->page == page_hash_get_low(id, fold));
-		ut_d(block->page.in_page_hash = false);
-		HASH_REPLACE(buf_page_t, hash, &page_hash, fold,
-			     &block->page, &new_block->page);
-
+		hash_chain& chain = page_hash.cell_get(id.fold());
+		ut_ad(&block->page == page_hash.get(id, chain));
+		buf_pool.page_hash.replace(chain, &block->page,
+					   &new_block->page);
 		buf_block_modify_clock_inc(block);
 		static_assert(FIL_PAGE_OFFSET % 4 == 0, "alignment");
 		memset_aligned<4>(block->frame + FIL_PAGE_OFFSET, 0xff, 4);
@@ -1422,7 +1425,7 @@ inline bool buf_pool_t::realloc(buf_block_t *block)
 		new_block = block;
 	}
 
-	hash_lock->write_unlock();
+	hash_lock.unlock();
 	buf_LRU_block_free_non_file_page(new_block);
 	return(true); /* free_list was enough */
 }
@@ -1505,7 +1508,7 @@ inline bool buf_pool_t::withdraw_blocks()
 				std::max<ulint>(withdraw_target
 						- UT_LIST_GET_LEN(withdraw),
 						srv_LRU_scan_depth));
-			buf_flush_wait_batch_end_acquiring_mutex(true);
+			buf_flush_wait_LRU_batch_end_acquiring_mutex();
 		}
 
 		/* relocate blocks/buddies in withdrawn area */
@@ -1597,7 +1600,7 @@ inline void buf_pool_t::page_hash_table::write_lock_all()
 {
   for (auto n= pad(n_cells) & ~ELEMENTS_PER_LATCH;; n-= ELEMENTS_PER_LATCH + 1)
   {
-    reinterpret_cast<page_hash_latch&>(array[n]).write_lock();
+    reinterpret_cast<page_hash_latch&>(array[n]).lock();
     if (!n)
       break;
   }
@@ -1608,7 +1611,7 @@ inline void buf_pool_t::page_hash_table::write_unlock_all()
 {
   for (auto n= pad(n_cells) & ~ELEMENTS_PER_LATCH;; n-= ELEMENTS_PER_LATCH + 1)
   {
-    reinterpret_cast<page_hash_latch&>(array[n]).write_unlock();
+    reinterpret_cast<page_hash_latch&>(array[n]).unlock();
     if (!n)
       break;
   }
@@ -1743,6 +1746,8 @@ withdraw_retry:
 			{found, withdraw_started, my_hrtime_coarse()};
 		withdraw_started = current_time;
 
+		/* This is going to exceed the maximum size of a
+		memory transaction. */
 		LockMutexGuard g{SRW_LOCK_CALL};
 		trx_sys.trx_list.for_each(f);
 	}
@@ -2047,13 +2052,14 @@ The caller must relocate bpage->list.
 @param dpage   destination control block */
 static void buf_relocate(buf_page_t *bpage, buf_page_t *dpage)
 {
-  const ulint fold= bpage->id().fold();
+  const page_id_t id= bpage->id();
+  buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(id.fold());
   ut_ad(bpage->state() == BUF_BLOCK_ZIP_PAGE);
   mysql_mutex_assert_owner(&buf_pool.mutex);
-  ut_ad(buf_pool.hash_lock_get(bpage->id())->is_write_locked());
+  ut_ad(buf_pool.page_hash.lock_get(chain).is_write_locked());
   ut_a(bpage->io_fix() == BUF_IO_NONE);
   ut_a(!bpage->buf_fix_count());
-  ut_ad(bpage == buf_pool.page_hash_get_low(bpage->id(), fold));
+  ut_ad(bpage == buf_pool.page_hash.get(id, chain));
   ut_ad(!buf_pool.watch_is_sentinel(*bpage));
   ut_ad(bpage->state() == BUF_BLOCK_ZIP_PAGE);
 
@@ -2088,29 +2094,24 @@ static void buf_relocate(buf_page_t *bpage, buf_page_t *dpage)
 
   ut_d(CheckInLRUList::validate());
 
-  /* relocate buf_pool.page_hash */
-  ut_ad(bpage->in_page_hash);
-  ut_ad(dpage->in_page_hash);
-  ut_d(bpage->in_page_hash= false);
-  HASH_REPLACE(buf_page_t, hash, &buf_pool.page_hash, fold, bpage, dpage);
+  buf_pool.page_hash.replace(chain, bpage, dpage);
 }
 
 /** Register a watch for a page identifier. The caller must hold an
 exclusive page hash latch. The *hash_lock may be released,
 relocated, and reacquired.
 @param id         page identifier
-@param hash_lock  exclusively held page_hash latch
+@param chain      hash table chain with exclusively held page_hash
 @return a buffer pool block corresponding to id
 @retval nullptr   if the block was not present, and a watch was installed */
 inline buf_page_t *buf_pool_t::watch_set(const page_id_t id,
-                                         page_hash_latch **hash_lock)
+                                         buf_pool_t::hash_chain &chain)
 {
-  const ulint fold= id.fold();
-  ut_ad(*hash_lock == page_hash.lock_get(fold));
-  ut_ad((*hash_lock)->is_write_locked());
+  ut_ad(&chain == &page_hash.cell_get(id.fold()));
+  ut_ad(page_hash.lock_get(chain).is_write_locked());
 
 retry:
-  if (buf_page_t *bpage= page_hash_get_low(id, fold))
+  if (buf_page_t *bpage= page_hash.get(id, chain))
   {
     if (!watch_is_sentinel(*bpage))
       /* The page was loaded meanwhile. */
@@ -2120,7 +2121,7 @@ retry:
     return nullptr;
   }
 
-  (*hash_lock)->write_unlock();
+  page_hash.lock_get(chain).unlock();
   /* Allocate a watch[] and then try to insert it into the page_hash. */
   mysql_mutex_lock(&mutex);
 
@@ -2140,28 +2141,23 @@ retry:
     ut_ad(!w->buf_fix_count());
     /* w is pointing to watch[], which is protected by mutex.
     Normally, buf_page_t::id for objects that are reachable by
-    page_hash_get_low(id, fold) are protected by hash_lock. */
+    page_hash.get(id, chain) are protected by hash_lock. */
     w->set_state(BUF_BLOCK_ZIP_PAGE);
     w->id_= id;
 
-    *hash_lock= page_hash.lock_get(fold);
-
-    buf_page_t *bpage= page_hash_get_low(id, fold);
+    buf_page_t *bpage= page_hash.get(id, chain);
     if (UNIV_LIKELY_NULL(bpage))
     {
       w->set_state(BUF_BLOCK_NOT_USED);
-      *hash_lock= page_hash.lock_get(fold);
-      (*hash_lock)->write_lock();
+      page_hash.lock_get(chain).lock();
       mysql_mutex_unlock(&mutex);
       goto retry;
     }
 
-    (*hash_lock)->write_lock();
+    page_hash.lock_get(chain).lock();
     ut_ad(!w->buf_fix_count_);
     w->buf_fix_count_= 1;
-    ut_ad(!w->in_page_hash);
-    ut_d(w->in_page_hash= true);
-    HASH_INSERT(buf_page_t, hash, &page_hash, fold, w);
+    buf_pool.page_hash.append(chain, w);
     mysql_mutex_unlock(&mutex);
     return nullptr;
   }
@@ -2173,50 +2169,57 @@ retry:
 
 /** Stop watching whether a page has been read in.
 watch_set(id) must have returned nullptr before.
-@param id   page identifier */
-void buf_pool_t::watch_unset(const page_id_t id)
+@param id         page identifier
+@param chain      unlocked hash table chain */
+TRANSACTIONAL_TARGET
+void buf_pool_t::watch_unset(const page_id_t id, buf_pool_t::hash_chain &chain)
 {
   mysql_mutex_assert_not_owner(&mutex);
-  const ulint fold= id.fold();
-  page_hash_latch *hash_lock= page_hash.lock<true>(fold);
-  /* The page must exist because watch_set() increments buf_fix_count. */
-  buf_page_t *w= page_hash_get_low(id, fold);
-  const auto buf_fix_count= w->buf_fix_count();
-  ut_ad(buf_fix_count);
-  const bool must_remove= buf_fix_count == 1 && watch_is_sentinel(*w);
-  ut_ad(w->in_page_hash);
-  if (!must_remove)
-    w->unfix();
-  hash_lock->write_unlock();
-
-  if (must_remove)
+  buf_page_t *w;
   {
-    const auto old= w;
-    /* The following is based on buf_pool_t::watch_remove(). */
-    mysql_mutex_lock(&mutex);
-    w= page_hash_get_low(id, fold);
-    page_hash_latch *hash_lock= buf_pool.page_hash.lock_get(fold);
-    hash_lock->write_lock();
+    transactional_lock_guard<page_hash_latch> g{page_hash.lock_get(chain)};
+    /* The page must exist because watch_set() increments buf_fix_count. */
+    w= page_hash.get(id, chain);
+    const auto buf_fix_count= w->buf_fix_count();
+    ut_ad(buf_fix_count);
+    ut_ad(w->in_page_hash);
+    if (buf_fix_count != 1 || !watch_is_sentinel(*w))
+    {
+      w->unfix();
+      w= nullptr;
+    }
+  }
+
+  if (!w)
+    return;
+
+  const auto old= w;
+  /* The following is based on buf_pool_t::watch_remove(). */
+  mysql_mutex_lock(&mutex);
+  w= page_hash.get(id, chain);
+
+  {
+    transactional_lock_guard<page_hash_latch> g
+      {buf_pool.page_hash.lock_get(chain)};
     if (w->unfix() == 0 && w == old)
     {
-      ut_ad(w->in_page_hash);
-      ut_d(w->in_page_hash= false);
-      HASH_DELETE(buf_page_t, hash, &page_hash, fold, w);
-      // Now that the watch is detached from page_hash, release it to watch[].
+      page_hash.remove(chain, w);
+      // Now that w is detached from page_hash, release it to watch[].
       ut_ad(w->id_ == id);
       ut_ad(!w->buf_fix_count());
       ut_ad(w->state() == BUF_BLOCK_ZIP_PAGE);
       w->set_state(BUF_BLOCK_NOT_USED);
     }
-    hash_lock->write_unlock();
-    mysql_mutex_unlock(&mutex);
   }
+
+  mysql_mutex_unlock(&mutex);
 }
 
 /** Mark the page status as FREED for the given tablespace and page number.
 @param[in,out]	space	tablespace
 @param[in]	page	page number
 @param[in,out]	mtr	mini-transaction */
+TRANSACTIONAL_TARGET
 void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr)
 {
   ut_ad(mtr);
@@ -2231,28 +2234,23 @@ void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr)
 
   ++buf_pool.stat.n_page_gets;
   const page_id_t page_id(space->id, page);
-  const ulint fold= page_id.fold();
-  page_hash_latch *hash_lock= buf_pool.page_hash.lock<false>(fold);
-  if (buf_block_t *block= reinterpret_cast<buf_block_t*>
-      (buf_pool.page_hash_get_low(page_id, fold)))
+  buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold());
+  buf_block_t *block;
   {
-    if (block->page.state() != BUF_BLOCK_FILE_PAGE)
-      /* FIXME: convert, but avoid buf_zip_decompress() */;
-    else
-    {
-      buf_block_buf_fix_inc(block);
-      ut_ad(block->page.buf_fix_count());
-      hash_lock->read_unlock();
-
-      mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX);
-      block->lock.x_lock();
-
-      block->page.status= buf_page_t::FREED;
+    transactional_shared_lock_guard<page_hash_latch> g
+      {buf_pool.page_hash.lock_get(chain)};
+    block= reinterpret_cast<buf_block_t*>
+      (buf_pool.page_hash.get(page_id, chain));
+    if (!block || block->page.state() != BUF_BLOCK_FILE_PAGE)
+      /* FIXME: convert ROW_FORMAT=COMPRESSED, without buf_zip_decompress() */
       return;
-    }
+    block->fix();
   }
+  ut_ad(block->page.buf_fix_count());
 
-  hash_lock->read_unlock();
+  mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX);
+  block->lock.x_lock();
+  block->page.status= buf_page_t::FREED;
 }
 
 /** Get read access to a compressed page (usually of type
@@ -2265,80 +2263,48 @@ the same set of mutexes or latches.
 @param[in]	page_id		page id
 @param[in]	zip_size	ROW_FORMAT=COMPRESSED page size
 @return pointer to the block */
+TRANSACTIONAL_TARGET
 buf_page_t* buf_page_get_zip(const page_id_t page_id, ulint zip_size)
 {
   ut_ad(zip_size);
   ut_ad(ut_is_2pow(zip_size));
   ++buf_pool.stat.n_page_gets;
 
-  bool discard_attempted= false;
-  const ulint fold= page_id.fold();
+  buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold());
+  page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain);
   buf_page_t *bpage;
-  page_hash_latch *hash_lock;
 
-  for (;;)
-  {
 lookup:
-    bpage= buf_pool.page_hash_get_locked<false>(page_id, fold, &hash_lock);
-    if (bpage)
-      break;
-
-    dberr_t err= buf_read_page(page_id, zip_size);
-
-    if (UNIV_UNLIKELY(err != DB_SUCCESS))
+  for (bool discard_attempted= false;;)
+  {
     {
-      ib::error() << "Reading compressed page " << page_id
-                  << " failed with error: " << err;
-      goto err_exit;
-    }
+      transactional_shared_lock_guard<page_hash_latch> g{hash_lock};
+      bpage= buf_pool.page_hash.get(page_id, chain);
+      if (!bpage || buf_pool.watch_is_sentinel(*bpage))
+        goto must_read_page;
 
-#ifdef UNIV_DEBUG
-    if (!(++buf_dbg_counter % 5771)) buf_pool.validate();
-#endif /* UNIV_DEBUG */
-  }
+      ut_ad(bpage->in_file());
+      ut_ad(page_id == bpage->id());
 
-  ut_ad(hash_lock->is_read_locked());
+      if (!bpage->zip.data)
+        /* There is no ROW_FORMAT=COMPRESSED page. */
+        return nullptr;
 
-  if (!bpage->zip.data)
-  {
-    /* There is no compressed page. */
-err_exit:
-    hash_lock->read_unlock();
-    return nullptr;
-  }
-
-  ut_ad(!buf_pool.watch_is_sentinel(*bpage));
-
-  switch (bpage->state()) {
-  case BUF_BLOCK_FILE_PAGE:
-    /* Discard the uncompressed page frame if possible. */
-    if (!discard_attempted)
-    {
-      discard_attempted= true;
-      hash_lock->read_unlock();
-      mysql_mutex_lock(&buf_pool.mutex);
-      if (buf_page_t *bpage= buf_pool.page_hash_get_low(page_id, fold))
-        buf_LRU_free_page(bpage, false);
-      mysql_mutex_unlock(&buf_pool.mutex);
-      goto lookup;
+      if (discard_attempted || bpage->state() == BUF_BLOCK_ZIP_PAGE)
+      {
+        bpage->fix();
+        break;
+      }
     }
-    /* fall through */
-  case BUF_BLOCK_ZIP_PAGE:
-    bpage->fix();
-    goto got_block;
-  default:
-    break;
-  }
 
-  ut_error;
-  goto err_exit;
-
-got_block:
-  bool must_read= bpage->io_fix() == BUF_IO_READ;
-  hash_lock->read_unlock();
+    discard_attempted= true;
+    mysql_mutex_lock(&buf_pool.mutex);
+    if (buf_page_t *bpage= buf_pool.page_hash.get(page_id, chain))
+      buf_LRU_free_page(bpage, false);
+    mysql_mutex_unlock(&buf_pool.mutex);
+  }
 
   DBUG_ASSERT(bpage->status != buf_page_t::FREED);
-
   bpage->set_accessed();
   buf_page_make_young_if_needed(bpage);
 
@@ -2348,12 +2314,19 @@ got_block:
   ut_ad(bpage->buf_fix_count());
   ut_ad(bpage->in_file());
 
-  if (must_read)
-    /* Let us wait until the read operation completes */
-    while (bpage->io_fix() == BUF_IO_READ)
-      std::this_thread::sleep_for(WAIT_FOR_READ);
-
+  /* Let us wait until the read operation completes */
+  while (bpage->io_fix() == BUF_IO_READ)
+    std::this_thread::sleep_for(WAIT_FOR_READ);
   return bpage;
+
+must_read_page:
+  if (dberr_t err= buf_read_page(page_id, zip_size))
+  {
+    ib::error() << "Reading compressed page " << page_id
+                << " failed with error: " << err;
+    return nullptr;
+  }
+  goto lookup;
 }
 
 /********************************************************************//**
@@ -2505,6 +2478,7 @@ while reading the page from file
 then it makes sure that it does merging of change buffer changes while
 reading the page from file.
 @return pointer to the block or NULL */
+TRANSACTIONAL_TARGET
 buf_block_t*
 buf_page_get_low(
 	const page_id_t		page_id,
@@ -2516,10 +2490,8 @@ buf_page_get_low(
 	dberr_t*		err,
 	bool			allow_ibuf_merge)
 {
-	buf_block_t*	block;
 	unsigned	access_time;
 	ulint		retries = 0;
-	const ulint	fold = page_id.fold();
 
 	ut_ad((mtr == NULL) == (mode == BUF_EVICT_IF_IN_POOL));
 	ut_ad(!mtr || mtr->is_active());
@@ -2570,156 +2542,141 @@ buf_page_get_low(
 	      || ibuf_page_low(page_id, zip_size, FALSE, NULL));
 
 	++buf_pool.stat.n_page_gets;
-loop:
-	buf_block_t* fix_block;
-	block = guess;
 
-	page_hash_latch* hash_lock = buf_pool.page_hash.lock<false>(fold);
+	auto& chain= buf_pool.page_hash.cell_get(page_id.fold());
+	page_hash_latch& hash_lock = buf_pool.page_hash.lock_get(chain);
+loop:
+	buf_block_t* block = guess;
 
 	if (block) {
-
-		/* If the guess is a compressed page descriptor that
-		has been allocated by buf_page_alloc_descriptor(),
-		it may have been freed by buf_relocate(). */
-
-		if (!buf_pool.is_uncompressed(block)
-		    || page_id != block->page.id()
-		    || block->page.state() != BUF_BLOCK_FILE_PAGE) {
-			/* Our guess was bogus or things have changed
-			since. */
-			guess = nullptr;
-			goto lookup;
-		} else {
+		transactional_shared_lock_guard<page_hash_latch> g{hash_lock};
+		if (buf_pool.is_uncompressed(block)
+		    && page_id == block->page.id()
+		    && block->page.state() == BUF_BLOCK_FILE_PAGE) {
 			ut_ad(!block->page.in_zip_hash);
+			block->fix();
+			goto got_block;
 		}
-	} else {
-lookup:
-		block = reinterpret_cast<buf_block_t*>(
-			buf_pool.page_hash_get_low(page_id, fold));
 	}
 
-	if (!block || buf_pool.watch_is_sentinel(block->page)) {
-		hash_lock->read_unlock();
-		block = nullptr;
+	guess = nullptr;
+
+	/* A memory transaction would frequently be aborted here. */
+	hash_lock.lock_shared();
+	block = reinterpret_cast<buf_block_t*>(
+		buf_pool.page_hash.get(page_id, chain));
+	if (UNIV_LIKELY(block
+			&& !buf_pool.watch_is_sentinel(block->page))) {
+		block->fix();
+		hash_lock.unlock_shared();
+		goto got_block;
 	}
+	hash_lock.unlock_shared();
 
-	if (UNIV_UNLIKELY(!block)) {
-		/* Page not in buf_pool: needs to be read from file */
-		if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
-			hash_lock = buf_pool.page_hash.lock<true>(fold);
-
-			if (buf_page_t *bpage= buf_pool.watch_set(
-				    page_id, &hash_lock)) {
-				/* We can release hash_lock after we
-				increment the fix count to make
-				sure that no state change takes place. */
-				bpage->fix();
-				hash_lock->write_unlock();
-				block = reinterpret_cast<buf_block_t*>(bpage);
-				fix_block = block;
-				goto got_block;
-			}
-
-			hash_lock->write_unlock();
+	/* Page not in buf_pool: needs to be read from file */
+	switch (mode) {
+	case BUF_GET_IF_IN_POOL:
+	case BUF_PEEK_IF_IN_POOL:
+	case BUF_EVICT_IF_IN_POOL:
+		return nullptr;
+	case BUF_GET_IF_IN_POOL_OR_WATCH:
+		/* We cannot easily use a memory transaction here. */
+		hash_lock.lock();
+		block = reinterpret_cast<buf_block_t*>
+			(buf_pool.watch_set(page_id, chain));
+		if (block) {
+			/* buffer-fixing prevents block->page.state()
+			changes */
+			block->fix();
 		}
+		hash_lock.unlock();
 
-		switch (mode) {
-		case BUF_GET_IF_IN_POOL:
-		case BUF_GET_IF_IN_POOL_OR_WATCH:
-		case BUF_PEEK_IF_IN_POOL:
-		case BUF_EVICT_IF_IN_POOL:
-			return(NULL);
+		if (block) {
+			goto got_block;
 		}
 
-		/* The call path is buf_read_page() ->
-		buf_read_page_low() (fil_space_t::io()) ->
-		buf_page_read_complete() ->
-		buf_decrypt_after_read(). Here fil_space_t* is used
-		and we decrypt -> buf_page_check_corrupt() where page
-		checksums are compared. Decryption, decompression as
-		well as error handling takes place at a lower level.
-		Here we only need to know whether the page really is
-		corrupted, or if an encrypted page with a valid
-		checksum cannot be decypted. */
-
-		dberr_t local_err = buf_read_page(page_id, zip_size);
-
-		if (local_err == DB_SUCCESS) {
-			buf_read_ahead_random(page_id, zip_size,
-					      ibuf_inside(mtr));
-
-			retries = 0;
-		} else if (mode == BUF_GET_POSSIBLY_FREED) {
+		return nullptr;
+	}
+
+	/* The call path is buf_read_page() ->
+	buf_read_page_low() (fil_space_t::io()) ->
+	buf_page_read_complete() ->
+	buf_decrypt_after_read(). Here fil_space_t* is used
+	and we decrypt -> buf_page_check_corrupt() where page
+	checksums are compared. Decryption, decompression as
+	well as error handling takes place at a lower level.
+	Here we only need to know whether the page really is
+	corrupted, or if an encrypted page with a valid
+	checksum cannot be decypted. */
+
+	if (dberr_t local_err = buf_read_page(page_id, zip_size)) {
+		if (mode == BUF_GET_POSSIBLY_FREED) {
 			if (err) {
 				*err = local_err;
 			}
-			return NULL;
+			return nullptr;
 		} else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
 			++retries;
-
-			DBUG_EXECUTE_IF(
-				"innodb_page_corruption_retries",
-				retries = BUF_PAGE_READ_MAX_RETRIES;
-			);
+			DBUG_EXECUTE_IF("innodb_page_corruption_retries",
+					retries = BUF_PAGE_READ_MAX_RETRIES;);
 		} else {
 			if (err) {
 				*err = local_err;
 			}
-
-			/* Pages whose encryption key is unavailable or used
-			key, encryption algorithm or encryption method is
-			incorrect are marked as encrypted in
+			/* Pages whose encryption key is unavailable or the
+			configured key, encryption algorithm or encryption
+			method are incorrect are marked as encrypted in
 			buf_page_check_corrupt(). Unencrypted page could be
 			corrupted in a way where the key_id field is
 			nonzero. There is no checksum on field
 			FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION. */
-			if (local_err == DB_DECRYPTION_FAILED) {
-				return (NULL);
-			}
-
-			if (local_err == DB_PAGE_CORRUPTED
-			    && srv_force_recovery) {
-				return NULL;
+			switch (local_err) {
+			case DB_PAGE_CORRUPTED:
+				if (!srv_force_recovery) {
+					break;
+				}
+				/* fall through */
+			case DB_DECRYPTION_FAILED:
+				return nullptr;
+			default:
+				break;
 			}
 
 			/* Try to set table as corrupted instead of
 			asserting. */
 			if (page_id.space() == TRX_SYS_SPACE) {
 			} else if (page_id.space() == SRV_TMP_SPACE_ID) {
-			} else if (fil_space_t* space= fil_space_t::get(
-					   page_id.space())) {
+			} else if (fil_space_t* space
+				   = fil_space_t::get(page_id.space())) {
 				bool set = dict_set_corrupted_by_space(space);
 				space->release();
 				if (set) {
-					return NULL;
+					return nullptr;
 				}
 			}
 
 			if (local_err == DB_IO_ERROR) {
-				return NULL;
+				return nullptr;
 			}
 
 			ib::fatal() << "Unable to read page " << page_id
-				<< " into the buffer pool after "
-				<< BUF_PAGE_READ_MAX_RETRIES
-				<< ". The most probable cause"
+				    << " into the buffer pool after "
+				    << BUF_PAGE_READ_MAX_RETRIES
+				    << ". The most probable cause"
 				" of this error may be that the"
 				" table has been corrupted."
 				" See https://mariadb.com/kb/en/library/innodb-recovery-modes/";
 		}
-
-#ifdef UNIV_DEBUG
-		if (!(++buf_dbg_counter % 5771)) buf_pool.validate();
-#endif /* UNIV_DEBUG */
-		goto loop;
 	} else {
-		fix_block = block;
+		buf_read_ahead_random(page_id, zip_size, ibuf_inside(mtr));
+		retries = 0;
 	}
 
-	fix_block->fix();
-	hash_lock->read_unlock();
+	ut_d(if (!(++buf_dbg_counter % 5771)) buf_pool.validate());
+	goto loop;
 
 got_block:
+	ut_ad(!block->page.in_zip_hash);
 	switch (mode) {
 	default:
 		ut_ad(block->zip_size() == zip_size);
@@ -2727,23 +2684,23 @@ got_block:
 	case BUF_GET_IF_IN_POOL:
 	case BUF_PEEK_IF_IN_POOL:
 	case BUF_EVICT_IF_IN_POOL:
-		if (fix_block->page.io_fix() == BUF_IO_READ) {
+		if (block->page.io_fix() == BUF_IO_READ) {
 			/* The page is being read to buffer pool,
 			but we cannot wait around for the read to
 			complete. */
-			fix_block->unfix();
+			block->unfix();
 			return(NULL);
 		}
 	}
 
-	switch (UNIV_EXPECT(fix_block->page.state(), BUF_BLOCK_FILE_PAGE)) {
+	switch (UNIV_EXPECT(block->page.state(), BUF_BLOCK_FILE_PAGE)) {
 	case BUF_BLOCK_FILE_PAGE:
 		if (fsp_is_system_temporary(page_id.space())
 		    && block->page.io_fix() != BUF_IO_NONE) {
 			/* This suggests that the page is being flushed.
 			Avoid returning reference to this page.
 			Instead wait for the flush action to complete. */
-			fix_block->unfix();
+			block->unfix();
 			std::this_thread::sleep_for(
 				std::chrono::microseconds(WAIT_FOR_WRITE));
 			goto loop;
@@ -2751,11 +2708,11 @@ got_block:
 
 		if (UNIV_UNLIKELY(mode == BUF_EVICT_IF_IN_POOL)) {
 evict_from_pool:
-			ut_ad(!fix_block->page.oldest_modification());
+			ut_ad(!block->page.oldest_modification());
 			mysql_mutex_lock(&buf_pool.mutex);
-			fix_block->unfix();
+			block->unfix();
 
-			if (!buf_LRU_free_page(&fix_block->page, true)) {
+			if (!buf_LRU_free_page(&block->page, true)) {
 				ut_ad(0);
 			}
 
@@ -2778,7 +2735,7 @@ evict_from_pool:
 			adaptive hash index.  There cannot be an
 			adaptive hash index for a compressed-only
 			page, so do not bother decompressing the page. */
-			fix_block->unfix();
+			block->unfix();
 
 			return(NULL);
 		}
@@ -2792,7 +2749,7 @@ evict_from_pool:
 			/* This condition often occurs when the buffer
 			is not buffer-fixed, but I/O-fixed by
 			buf_page_init_for_read(). */
-			fix_block->unfix();
+			block->unfix();
 
 			/* The block is buffer-fixed or I/O-fixed.
 			Try again later. */
@@ -2805,18 +2762,21 @@ evict_from_pool:
 		or relocated while we are attempting to allocate an
 		uncompressed page. */
 
-		block = buf_LRU_get_free_block(false);
-		buf_block_init_low(block);
+		buf_block_t *new_block = buf_LRU_get_free_block(false);
+		buf_block_init_low(new_block);
 
 		mysql_mutex_lock(&buf_pool.mutex);
-		hash_lock = buf_pool.page_hash.lock_get(fold);
+		page_hash_latch& hash_lock=buf_pool.page_hash.lock_get(chain);
 
-		hash_lock->write_lock();
+		/* It does not make sense to use
+		transactional_lock_guard here, because buf_relocate()
+		would likely make a  memory transaction too large. */
+		hash_lock.lock();
 
 		/* Buffer-fixing prevents the page_hash from changing. */
-		ut_ad(bpage == buf_pool.page_hash_get_low(page_id, fold));
+		ut_ad(bpage == buf_pool.page_hash.get(page_id, chain));
 
-		fix_block->unfix(); /* hash_lock protects us after this */
+		block->unfix(); /* hash_lock protects us after this */
 
 		if (bpage->buf_fix_count() || bpage->io_fix() != BUF_IO_NONE) {
 			/* The block was buffer-fixed or I/O-fixed while
@@ -2825,15 +2785,15 @@ evict_from_pool:
 			This should be extremely unlikely, for example,
 			if buf_page_get_zip() was invoked. */
 
-			hash_lock->write_unlock();
-			buf_LRU_block_free_non_file_page(block);
+			hash_lock.unlock();
+			buf_LRU_block_free_non_file_page(new_block);
 			mysql_mutex_unlock(&buf_pool.mutex);
 
 			/* Try again */
 			goto loop;
 		}
 
-		fix_block = block;
+		block = new_block;
 
 		/* Move the compressed page from bpage to block,
 		and uncompress it. */
@@ -2864,7 +2824,7 @@ evict_from_pool:
 		MEM_UNDEFINED(bpage, sizeof *bpage);
 
 		mysql_mutex_unlock(&buf_pool.mutex);
-		hash_lock->write_unlock();
+		hash_lock.unlock();
 		buf_pool.n_pend_unzip++;
 
 		access_time = block->page.is_accessed();
@@ -2880,9 +2840,9 @@ evict_from_pool:
 		buf_pool.mutex. */
 
 		if (!buf_zip_decompress(block, false)) {
-			fix_block->lock.x_unlock();
-			fix_block->page.io_unfix();
-			fix_block->unfix();
+			block->lock.x_unlock();
+			block->page.io_unfix();
+			block->unfix();
 			--buf_pool.n_pend_unzip;
 
 			if (err) {
@@ -2891,16 +2851,14 @@ evict_from_pool:
 			return NULL;
 		}
 
+		block->page.io_unfix();
 		block->lock.x_unlock();
-		fix_block->page.io_unfix();
 		--buf_pool.n_pend_unzip;
-		break;
 	}
 
-	ut_ad(block == fix_block);
-	ut_ad(fix_block->page.buf_fix_count());
+	ut_ad(block->page.buf_fix_count());
 
-	ut_ad(fix_block->page.state() == BUF_BLOCK_FILE_PAGE);
+	ut_ad(block->page.state() == BUF_BLOCK_FILE_PAGE);
 
 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
 re_evict:
@@ -2913,49 +2871,35 @@ re_evict:
 
 		mysql_mutex_lock(&buf_pool.mutex);
 
-		fix_block->unfix();
+		block->unfix();
 
 		/* Blocks cannot be relocated or enter or exit the
 		buf_pool while we are holding the buf_pool.mutex. */
-		const bool evicted = buf_LRU_free_page(&fix_block->page, true);
+		const bool evicted = buf_LRU_free_page(&block->page, true);
 		space->release();
 
 		if (evicted) {
-			hash_lock = buf_pool.page_hash.lock_get(fold);
-			hash_lock->write_lock();
+			page_hash_latch& hash_lock
+				= buf_pool.page_hash.lock_get(chain);
+			hash_lock.lock();
 			mysql_mutex_unlock(&buf_pool.mutex);
 			/* We may set the watch, as it would have
 			been set if the page were not in the
 			buffer pool in the first place. */
 			block= reinterpret_cast<buf_block_t*>(
 				mode == BUF_GET_IF_IN_POOL_OR_WATCH
-				? buf_pool.watch_set(page_id, &hash_lock)
-				: buf_pool.page_hash_get_low(page_id, fold));
-			hash_lock->write_unlock();
-
-			if (block != NULL) {
-				/* Either the page has been read in or
-				a watch was set on that in the window
-				where we released the buf_pool.mutex
-				and before we acquire the hash_lock
-				above. Try again. */
-				guess = block;
-
-				goto loop;
-			}
-
+				? buf_pool.watch_set(page_id, chain)
+				: buf_pool.page_hash.get(page_id, chain));
+			hash_lock.unlock();
 			return(NULL);
 		}
 
-		fix_block->fix();
+		block->fix();
 		mysql_mutex_unlock(&buf_pool.mutex);
-		buf_flush_list();
-		buf_flush_wait_batch_end_acquiring_mutex(false);
-		while (buf_flush_list_space(space));
-		os_aio_wait_until_no_pending_writes();
+		buf_flush_sync();
 
-		if (fix_block->page.buf_fix_count() == 1
-		    && !fix_block->page.oldest_modification()) {
+		if (block->page.buf_fix_count() == 1
+		    && !block->page.oldest_modification()) {
 			goto re_evict;
 		}
 
@@ -2963,7 +2907,7 @@ re_evict:
 	}
 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
 
-	ut_ad(fix_block->page.buf_fix_count());
+	ut_ad(block->page.buf_fix_count());
 
 	/* While tablespace is reinited the indexes are already freed but the
 	blocks related to it still resides in buffer pool. Trying to remove
@@ -2974,25 +2918,25 @@ re_evict:
 	"btr_search_drop_page_hash_when_freed". */
 	ut_ad(mode == BUF_GET_POSSIBLY_FREED
 	      || mode == BUF_PEEK_IF_IN_POOL
-	      || fix_block->page.status != buf_page_t::FREED);
+	      || block->page.status != buf_page_t::FREED);
 
-	const bool not_first_access = fix_block->page.set_accessed();
+	const bool not_first_access = block->page.set_accessed();
 
 	if (mode != BUF_PEEK_IF_IN_POOL) {
-		buf_page_make_young_if_needed(&fix_block->page);
+		buf_page_make_young_if_needed(&block->page);
 	}
 
 #ifdef UNIV_DEBUG
 	if (!(++buf_dbg_counter % 5771)) buf_pool.validate();
 #endif /* UNIV_DEBUG */
-	ut_ad(fix_block->page.state() == BUF_BLOCK_FILE_PAGE);
+	ut_ad(block->page.state() == BUF_BLOCK_FILE_PAGE);
 
 	/* We have to wait here because the IO_READ state was set
 	under the protection of the hash_lock and not block->lock. */
-	buf_wait_for_read(fix_block);
+	buf_wait_for_read(block);
 
-	if (fix_block->page.id() != page_id) {
-		buf_block_buf_fix_dec(fix_block);
+	if (block->page.id() != page_id) {
+		buf_block_buf_fix_dec(block);
 
 		if (err) {
 			*err = DB_PAGE_CORRUPTED;
@@ -3001,27 +2945,27 @@ re_evict:
 		return NULL;
 	}
 
-	if (fix_block->page.status != buf_page_t::FREED
+	if (block->page.status != buf_page_t::FREED
 	    && allow_ibuf_merge
-	    && fil_page_get_type(fix_block->frame) == FIL_PAGE_INDEX
-	    && page_is_leaf(fix_block->frame)) {
-		fix_block->lock.x_lock();
+	    && fil_page_get_type(block->frame) == FIL_PAGE_INDEX
+	    && page_is_leaf(block->frame)) {
+		block->lock.x_lock();
 
-		if (fix_block->page.ibuf_exist) {
-			fix_block->page.ibuf_exist = false;
-			ibuf_merge_or_delete_for_page(fix_block, page_id,
+		if (block->page.ibuf_exist) {
+			block->page.ibuf_exist = false;
+			ibuf_merge_or_delete_for_page(block, page_id,
 						      zip_size);
 		}
 
 		if (rw_latch == RW_X_LATCH) {
-			mtr->memo_push(fix_block, MTR_MEMO_PAGE_X_FIX);
+			mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX);
 		} else {
-			fix_block->lock.x_unlock();
+			block->lock.x_unlock();
 			goto get_latch;
 		}
 	} else {
 get_latch:
-		mtr->page_lock(fix_block, rw_latch);
+		mtr->page_lock(block, rw_latch);
 	}
 
 	if (!not_first_access && mode != BUF_PEEK_IF_IN_POOL) {
@@ -3031,7 +2975,7 @@ get_latch:
 		buf_read_ahead_linear(page_id, zip_size, ibuf_inside(mtr));
 	}
 
-	return(fix_block);
+	return block;
 }
 
 /** Get access to a database page. Buffered redo log may be applied.
@@ -3092,6 +3036,7 @@ buf_page_get_gen(
 This is the general function used to get optimistic access to a database
 page.
 @return TRUE if success */
+TRANSACTIONAL_TARGET
 ibool
 buf_page_optimistic_get(
 /*====================*/
@@ -3107,26 +3052,26 @@ buf_page_optimistic_get(
 	ut_ad(mtr->is_active());
 	ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_X_LATCH);
 
-	if (UNIV_UNLIKELY(block->page.state() != BUF_BLOCK_FILE_PAGE
-			  || block->page.io_fix() != BUF_IO_NONE)) {
+	if (have_transactional_memory) {
+	} else if (UNIV_UNLIKELY(block->page.state() != BUF_BLOCK_FILE_PAGE
+				 || block->page.io_fix() != BUF_IO_NONE)) {
 		return FALSE;
 	}
 
-	const page_id_t id(block->page.id());
+	const page_id_t id{block->page.id()};
+	buf_pool_t::hash_chain& chain = buf_pool.page_hash.cell_get(id.fold());
 
-	page_hash_latch *hash_lock = buf_pool.hash_lock_get(id);
-	hash_lock->read_lock();
-
-	if (UNIV_UNLIKELY(id != block->page.id()
-			  || block->page.state() != BUF_BLOCK_FILE_PAGE
-			  || block->page.io_fix() != BUF_IO_NONE)) {
-		hash_lock->read_unlock();
-		return(FALSE);
+	{
+		transactional_shared_lock_guard<page_hash_latch> g{
+			buf_pool.page_hash.lock_get(chain)};
+		if (UNIV_UNLIKELY(id != block->page.id()
+				  || block->page.state() != BUF_BLOCK_FILE_PAGE
+				  || block->page.io_fix() != BUF_IO_NONE)) {
+			return FALSE;
+		}
+		block->fix();
 	}
 
-	buf_block_buf_fix_inc(block);
-	hash_lock->read_unlock();
-
 	block->page.set_accessed();
 
 	buf_page_make_young_if_needed(&block->page);
@@ -3187,30 +3132,27 @@ Suitable for using when holding the lock_sys latches (as it avoids deadlock).
 @param[in,out]	mtr	mini-transaction
 @return the block
 @retval nullptr if an S-latch cannot be granted immediately */
+TRANSACTIONAL_TARGET
 buf_block_t *buf_page_try_get(const page_id_t page_id, mtr_t *mtr)
 {
   ut_ad(mtr);
   ut_ad(mtr->is_active());
+  buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold());
+  buf_block_t *block;
 
-  page_hash_latch *hash_lock;
-  buf_page_t *bpage= buf_pool.page_hash_get_locked<false>(page_id,
-                                                          page_id.fold(),
-                                                          &hash_lock);
-  if (!bpage)
-    return nullptr;
-  if (bpage->state() != BUF_BLOCK_FILE_PAGE)
   {
-    hash_lock->read_unlock();
-    return nullptr;
+    transactional_shared_lock_guard<page_hash_latch> g
+      {buf_pool.page_hash.lock_get(chain)};
+    block= reinterpret_cast<buf_block_t*>
+      (buf_pool.page_hash.get(page_id, chain));
+    if (!block || block->page.state() != BUF_BLOCK_FILE_PAGE)
+      return nullptr;
+    block->fix();
   }
 
-  buf_block_t *block= reinterpret_cast<buf_block_t*>(bpage);
-  buf_block_buf_fix_inc(block);
-  hash_lock->read_unlock();
-
   if (!block->lock.s_lock_try())
   {
-    buf_block_buf_fix_dec(block);
+    block->unfix();
     return nullptr;
   }
 
@@ -3219,9 +3161,9 @@ buf_block_t *buf_page_try_get(const page_id_t page_id, mtr_t *mtr)
 #ifdef UNIV_DEBUG
   if (!(++buf_dbg_counter % 5771)) buf_pool.validate();
 #endif /* UNIV_DEBUG */
-  ut_ad(bpage->buf_fix_count());
-  ut_ad(bpage->state() == BUF_BLOCK_FILE_PAGE);
-  ut_ad(bpage->id() == page_id);
+  ut_ad(block->page.buf_fix_count());
+  ut_ad(block->page.state() == BUF_BLOCK_FILE_PAGE);
+  ut_ad(block->page.id() == page_id);
 
   ++buf_pool.stat.n_page_gets;
   return block;
@@ -3240,6 +3182,7 @@ void buf_block_t::initialise(const page_id_t page_id, ulint zip_size,
   page_zip_set_size(&page.zip, zip_size);
 }
 
+TRANSACTIONAL_TARGET
 static buf_block_t* buf_page_create_low(page_id_t page_id, ulint zip_size,
                                         mtr_t *mtr, buf_block_t *free_block)
 {
@@ -3248,12 +3191,12 @@ static buf_block_t* buf_page_create_low(page_id_t page_id, ulint zip_size,
 
   free_block->initialise(page_id, zip_size, 1);
 
-  const ulint fold= page_id.fold();
+  buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold());
+loop:
   mysql_mutex_lock(&buf_pool.mutex);
 
-loop:
   buf_block_t *block= reinterpret_cast<buf_block_t*>
-    (buf_pool.page_hash_get_low(page_id, fold));
+    (buf_pool.page_hash.get(page_id, chain));
 
   if (block && block->page.in_file() &&
       !buf_pool.watch_is_sentinel(block->page))
@@ -3269,15 +3212,12 @@ loop:
       if (!mtr->have_x_latch(*block))
       {
         buf_block_buf_fix_inc(block);
-        while (!block->lock.x_lock_try())
+        if (!block->lock.x_lock_try())
         {
-          /* Wait for buf_page_write_complete() to release block->lock.
-          We must not hold buf_pool.mutex while waiting. */
-          timespec abstime;
-          set_timespec_nsec(abstime, 1000000);
-          my_cond_timedwait(&buf_pool.done_flush_list, &buf_pool.mutex.m_mutex,
-                            &abstime);
-        }
+          mysql_mutex_unlock(&buf_pool.mutex);
+          block->lock.x_lock();
+          mysql_mutex_lock(&buf_pool.mutex);
+	}
         mtr_memo_push(mtr, block, MTR_MEMO_PAGE_X_FIX);
       }
       else
@@ -3292,16 +3232,17 @@ loop:
 #endif
       break;
     case BUF_BLOCK_ZIP_PAGE:
-      page_hash_latch *hash_lock= buf_pool.page_hash.lock_get(fold);
-      hash_lock->write_lock();
+      page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain);
+      /* It does not make sense to use transactional_lock_guard here,
+      because buf_relocate() would likely make the memory transaction
+      too large. */
+      hash_lock.lock();
       if (block->page.io_fix() != BUF_IO_NONE)
       {
-        hash_lock->write_unlock();
+        hash_lock.unlock();
         /* Wait for buf_page_write_complete() to release the I/O fix. */
-        timespec abstime;
-        set_timespec_nsec(abstime, 1000000);
-        my_cond_timedwait(&buf_pool.done_flush_list, &buf_pool.mutex.m_mutex,
-                          &abstime);
+        mysql_mutex_unlock(&buf_pool.mutex);
+        os_aio_wait_until_no_pending_writes();
         goto loop;
       }
 
@@ -3313,7 +3254,7 @@ loop:
 
       free_block->page.set_state(BUF_BLOCK_FILE_PAGE);
       buf_unzip_LRU_add_block(free_block, FALSE);
-      hash_lock->write_unlock();
+      hash_lock.unlock();
       buf_page_free_descriptor(&block->page);
       block= free_block;
       buf_block_buf_fix_inc(block);
@@ -3349,25 +3290,20 @@ loop:
 
   /* The block must be put to the LRU list */
   buf_LRU_add_block(&block->page, false);
-  page_hash_latch *hash_lock= buf_pool.page_hash.lock_get(fold);
-  hash_lock->write_lock();
-  block->page.set_state(BUF_BLOCK_FILE_PAGE);
-  ut_d(block->page.in_page_hash= true);
-  HASH_INSERT(buf_page_t, hash, &buf_pool.page_hash, fold, &block->page);
+  {
+    transactional_lock_guard<page_hash_latch> g
+      {buf_pool.page_hash.lock_get(chain)};
+    block->page.set_state(BUF_BLOCK_FILE_PAGE);
+    buf_pool.page_hash.append(chain, &block->page);
+    block->lock.x_lock();
+    if (UNIV_UNLIKELY(zip_size))
+      /* Prevent race conditions during buf_buddy_alloc(), which may
+      release and reacquire buf_pool.mutex, by IO-fixing and X-latching. */
+      block->page.set_io_fix(BUF_IO_READ);
+  }
 
-  block->lock.x_lock();
   if (UNIV_UNLIKELY(zip_size))
   {
-    /* Prevent race conditions during buf_buddy_alloc(), which may
-    release and reacquire buf_pool.mutex, by IO-fixing and X-latching
-    the block. */
-    block->page.set_io_fix(BUF_IO_READ);
-    hash_lock->write_unlock();
-
-    /* buf_pool.mutex may be released and reacquired by
-    buf_buddy_alloc(). We must defer this operation until
-    after the block descriptor has been added to
-    buf_pool.LRU and buf_pool.page_hash. */
     block->page.zip.data= buf_buddy_alloc(zip_size);
 
     /* To maintain the invariant block->in_unzip_LRU_list ==
@@ -3378,8 +3314,6 @@ loop:
 
     block->page.set_io_fix(BUF_IO_NONE);
   }
-  else
-    hash_lock->write_unlock();
 
   mysql_mutex_unlock(&buf_pool.mutex);
 
@@ -3562,32 +3496,6 @@ static void buf_mark_space_corrupt(buf_page_t* bpage, const fil_space_t& space)
 	}
 }
 
-/** Release and evict a corrupted page.
-@param bpage    page that was being read */
-ATTRIBUTE_COLD void buf_pool_t::corrupted_evict(buf_page_t *bpage)
-{
-  const page_id_t id(bpage->id());
-  page_hash_latch *hash_lock= hash_lock_get(id);
-
-  mysql_mutex_lock(&mutex);
-  hash_lock->write_lock();
-
-  ut_ad(bpage->io_fix() == BUF_IO_READ);
-  ut_ad(!bpage->oldest_modification());
-  bpage->set_corrupt_id();
-
-  if (bpage->state() == BUF_BLOCK_FILE_PAGE)
-    reinterpret_cast<buf_block_t*>(bpage)->lock.x_unlock(true);
-  bpage->io_unfix();
-
-  /* remove from LRU and page_hash */
-  buf_LRU_free_one_page(bpage, id, hash_lock);
-  mysql_mutex_unlock(&mutex);
-
-  ut_d(auto n=) n_pend_reads--;
-  ut_ad(n > 0);
-}
-
 /** Mark a table corrupted.
 @param[in]	bpage	Corrupted page
 @param[in]	node	data file
@@ -3889,9 +3797,6 @@ void buf_pool_invalidate()
 {
 	mysql_mutex_lock(&buf_pool.mutex);
 
-	buf_flush_wait_batch_end(true);
-	buf_flush_wait_batch_end(false);
-
 	/* It is possible that a write batch that has been posted
 	earlier is still not complete. For buffer pool invalidation to
 	proceed we must ensure there is NO write activity happening. */
@@ -3953,7 +3858,8 @@ void buf_pool_t::validate()
 
 			case BUF_BLOCK_FILE_PAGE:
 				const page_id_t id = block->page.id();
-				ut_ad(page_hash_get_low(id, id.fold())
+				ut_ad(page_hash.get(id, page_hash.cell_get(
+							    id.fold()))
 				      == &block->page);
 				n_lru++;
 				break;
@@ -3986,7 +3892,7 @@ void buf_pool_t::validate()
 			break;
 		}
 		const page_id_t id = b->id();
-		ut_ad(page_hash_get_low(id, id.fold()) == b);
+		ut_ad(page_hash.get(id, page_hash.cell_get(id.fold())) == b);
 	}
 
 	ut_ad(UT_LIST_GET_LEN(flush_list) == n_flushing);
@@ -4039,6 +3945,8 @@ void buf_pool_t::print()
 
 	counts = static_cast<ulint*>(ut_malloc_nokey(sizeof(ulint) * size));
 
+	size_t pending_writes = os_aio_pending_writes();
+
 	mysql_mutex_lock(&mutex);
 	mysql_mutex_lock(&flush_list_mutex);
 
@@ -4051,7 +3959,7 @@ void buf_pool_t::print()
 		<< ", n pending decompressions=" << n_pend_unzip
 		<< ", n pending reads=" << n_pend_reads
 		<< ", n pending flush LRU=" << n_flush_LRU_
-		<< " list=" << n_flush_list_
+		<< " list=" << pending_writes
 		<< ", pages made young=" << stat.n_pages_made_young
 		<< ", not young=" << stat.n_pages_not_made_young
 		<< ", pages read=" << stat.n_pages_read
@@ -4169,7 +4077,7 @@ void buf_stats_get_pool_info(buf_pool_info_t *pool_info)
 
 	pool_info->n_pending_flush_lru = buf_pool.n_flush_LRU_;
 
-	pool_info->n_pending_flush_list = buf_pool.n_flush_list_;
+	pool_info->n_pending_flush_list = os_aio_pending_writes();
 
 	current_time = time(NULL);
 	time_elapsed = 0.001 + difftime(current_time,