summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--config.h.cmake2
-rw-r--r--configure.cmake26
-rw-r--r--include/my_cpu.h54
-rw-r--r--mysys/CMakeLists.txt2
-rw-r--r--mysys/my_cpu.c87
-rw-r--r--sql/mysqld.cc1
-rw-r--r--storage/innobase/include/ib0mutex.h5
-rw-r--r--storage/innobase/include/ut0ut.h17
-rw-r--r--storage/innobase/ut/ut0ut.cc23
9 files changed, 134 insertions, 83 deletions
diff --git a/config.h.cmake b/config.h.cmake
index b8a77899c4d..765d75dfb23 100644
--- a/config.h.cmake
+++ b/config.h.cmake
@@ -187,8 +187,6 @@
#cmakedefine HAVE_LINUX_FALLOC_H 1
#cmakedefine HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE 1
#cmakedefine HAVE_PREAD 1
-#cmakedefine HAVE_PAUSE_INSTRUCTION 1
-#cmakedefine HAVE_FAKE_PAUSE_INSTRUCTION 1
#cmakedefine HAVE_RDTSCLL 1
#cmakedefine HAVE_READ_REAL_TIME 1
#cmakedefine HAVE_PTHREAD_ATTR_CREATE 1
diff --git a/configure.cmake b/configure.cmake
index 67ed6503e3e..e75810f8150 100644
--- a/configure.cmake
+++ b/configure.cmake
@@ -758,32 +758,6 @@ IF(NOT C_HAS_inline)
ENDIF()
ENDIF()
-IF(NOT CMAKE_CROSSCOMPILING AND NOT MSVC)
- STRING(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} processor)
- IF(processor MATCHES "86" OR processor MATCHES "amd64" OR processor MATCHES "x64")
- #Check for x86 PAUSE instruction
- # We have to actually try running the test program, because of a bug
- # in Solaris on x86_64, where it wrongly reports that PAUSE is not
- # supported when trying to run an application. See
- # http://bugs.opensolaris.org/bugdatabase/printableBug.do?bug_id=6478684
- CHECK_C_SOURCE_RUNS("
- int main()
- {
- __asm__ __volatile__ (\"pause\");
- return 0;
- }" HAVE_PAUSE_INSTRUCTION)
- ENDIF()
- IF (NOT HAVE_PAUSE_INSTRUCTION)
- CHECK_C_SOURCE_COMPILES("
- int main()
- {
- __asm__ __volatile__ (\"rep; nop\");
- return 0;
- }
- " HAVE_FAKE_PAUSE_INSTRUCTION)
- ENDIF()
-ENDIF()
-
CHECK_SYMBOL_EXISTS(tcgetattr "termios.h" HAVE_TCGETATTR 1)
#
diff --git a/include/my_cpu.h b/include/my_cpu.h
index b5665fc108c..0e37eafe60e 100644
--- a/include/my_cpu.h
+++ b/include/my_cpu.h
@@ -46,10 +46,20 @@
#define HMT_high()
#endif
+#if defined __i386__ || defined __x86_64__ || defined _WIN32
+# define HAVE_PAUSE_INSTRUCTION /* added in Intel Pentium 4 */
+#endif
static inline void MY_RELAX_CPU(void)
{
-#ifdef HAVE_PAUSE_INSTRUCTION
+#ifdef _WIN32
+ /*
+ In the Win32 API, the x86 PAUSE instruction is executed by calling
+ the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
+ independent way by using YieldProcessor.
+ */
+ YieldProcessor();
+#elif defined HAVE_PAUSE_INSTRUCTION
/*
According to the gcc info page, asm volatile means that the
instruction has important side-effects and must not be removed.
@@ -61,16 +71,6 @@ static inline void MY_RELAX_CPU(void)
#else
__asm__ __volatile__ ("pause");
#endif
-
-#elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
- __asm__ __volatile__ ("rep; nop");
-#elif defined _WIN32
- /*
- In the Win32 API, the x86 PAUSE instruction is executed by calling
- the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
- independent way by using YieldProcessor.
- */
- YieldProcessor();
#elif defined(_ARCH_PWR8)
__ppc_get_timebase();
#else
@@ -81,6 +81,20 @@ static inline void MY_RELAX_CPU(void)
}
+#ifdef HAVE_PAUSE_INSTRUCTION
+# ifdef __cplusplus
+extern "C" {
+# endif
+extern unsigned my_cpu_relax_multiplier;
+void my_cpu_init(void);
+# ifdef __cplusplus
+}
+# endif
+#else
+# define my_cpu_relax_multiplier 200
+# define my_cpu_init() /* nothing */
+#endif
+
/*
LF_BACKOFF should be used to improve performance on hyperthreaded CPUs. Intel
recommends to use it in spin loops also on non-HT machines to reduce power
@@ -94,9 +108,23 @@ static inline void MY_RELAX_CPU(void)
static inline int LF_BACKOFF(void)
{
- int i;
- for (i= 0; i < 200; i++)
+ unsigned i= my_cpu_relax_multiplier;
+ while (i--)
MY_RELAX_CPU();
return 1;
}
+
+/**
+ Run a delay loop while waiting for a shared resource to be released.
+ @param delay originally, roughly microseconds on 100 MHz Intel Pentium
+*/
+static inline void ut_delay(unsigned delay)
+{
+ unsigned i= my_cpu_relax_multiplier / 4 * delay;
+ HMT_low();
+ while (i--)
+ MY_RELAX_CPU();
+ HMT_medium();
+}
+
#endif
diff --git a/mysys/CMakeLists.txt b/mysys/CMakeLists.txt
index 6990d1350e3..438d6b428e0 100644
--- a/mysys/CMakeLists.txt
+++ b/mysys/CMakeLists.txt
@@ -44,7 +44,7 @@ SET(MYSYS_SOURCES array.c charset-def.c charset.c checksum.c my_default.c
my_getncpus.c my_safehash.c my_chmod.c my_rnd.c
my_uuid.c wqueue.c waiting_threads.c ma_dyncol.c ../sql-common/my_time.c
my_rdtsc.c my_context.c psi_noop.c
- my_atomic_writes.c my_likely.c
+ my_atomic_writes.c my_cpu.c my_likely.c
file_logger.c my_dlerror.c)
IF (WIN32)
diff --git a/mysys/my_cpu.c b/mysys/my_cpu.c
new file mode 100644
index 00000000000..4a63bd01229
--- /dev/null
+++ b/mysys/my_cpu.c
@@ -0,0 +1,87 @@
+/* Copyright (c) 2019, MariaDB Corporation.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include <my_global.h>
+#include <my_cpu.h>
+
+#ifdef HAVE_PAUSE_INSTRUCTION
+/** How many times to invoke PAUSE in a loop */
+unsigned my_cpu_relax_multiplier = 200;
+
+# include <stdint.h>
+
+# ifdef _MSC_VER
+# include <intrin.h>
+# else
+# include <x86intrin.h>
+# endif
+
+#define PAUSE4 MY_RELAX_CPU(); MY_RELAX_CPU(); MY_RELAX_CPU(); MY_RELAX_CPU()
+#define PAUSE16 PAUSE4; PAUSE4; PAUSE4; PAUSE4
+
+/**
+ Initialize my_cpu_relax_multiplier.
+
+ Determine the duration of a PAUSE instruction by running an
+ unrolled loop of 16 PAUSE instructions twice, and taking the
+ faster of the two runs. In this way, even if the execution is
+ interrupted by the operating system, it should be extremely
+ unlikely that both loops get interrupted.
+
+ On the Intel Skylake microarchitecture, the PAUSE instruction takes
+ around 140 clock cycles, while on earlier microarchitectures it could
+ be 10 clock cycles or less. Scale the PAUSE loop counter accordingly.
+
+ On a pre-Skylake Intel Xeon CPU E5-2630 v4 @ 2.20GHz running an AMD64
+ executable, the numbers would be between 176 and 198 when all the code
+ is inlined as follows:
+
+ lfence,rdtsc,mov,shl,or, 16*pause,
+ lfence,rdtsc,mov,shl,or, 16*pause,
+ lfence,rdtsc.
+
+ That would yield 11 to 12 cycles per PAUSE instruction even if we
+ (wrongly) ignore the overhead of the other instructions.
+
+ On a Skylake mobile processor Intel Core i7-6500U CPU @ 2.50GHz, the
+ numbers would be somewhere around 6000 or 7000, yielding up to 430
+ cycles per instruction. This could be partly due to increased latency
+ for LFENCE and RDTSC, or simply dynamic clock scaling.
+
+ Let us define a threshold at roughly 30 cycles per PAUSE instruction,
+ and use a shorter delay if the PAUSE instruction takes longer than
+ that. In some AMD processors, the PAUSE instruction could take 40 or
+ 50 cycles. Let us use a shorter delay multiplier for them as well.
+
+ The 1/10 scaling factor (200/20) was derived experimentally by
+ Mikhail Sinyavin from Intel.
+*/
+void my_cpu_init(void)
+{
+ uint64_t t0, t1, t2;
+ _mm_lfence();
+ t0= __rdtsc();
+ PAUSE16;
+ _mm_lfence();
+ t1= __rdtsc();
+ PAUSE16;
+ _mm_lfence();
+ t2= __rdtsc();
+ t2-= t1;
+ t1-= t0;
+ if (t1 > 30 * 16 && t2 > 30 * 16)
+ my_cpu_relax_multiplier= 20;
+}
+#endif
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 4b31b78fc21..f98f51c73d0 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -5113,6 +5113,7 @@ static int init_server_components()
We need to call each of these following functions to ensure that
all things are initialized so that unireg_abort() doesn't fail
*/
+ my_cpu_init();
mdl_init();
if (tdc_init() || hostname_cache_init())
unireg_abort(1);
diff --git a/storage/innobase/include/ib0mutex.h b/storage/innobase/include/ib0mutex.h
index 1b8ec8d0fe4..3e99eb79416 100644
--- a/storage/innobase/include/ib0mutex.h
+++ b/storage/innobase/include/ib0mutex.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -29,8 +29,7 @@ Created 2013-03-26 Sunny Bains.
#ifndef ib0mutex_h
#define ib0mutex_h
-#include "ut0ut.h"
-#include "ut0rnd.h"
+#include "my_cpu.h"
#include "os0event.h"
#include "sync0arr.h"
diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h
index b54e41ea614..51e00c6f0fe 100644
--- a/storage/innobase/include/ut0ut.h
+++ b/storage/innobase/include/ut0ut.h
@@ -54,14 +54,6 @@ Created 1/20/1994 Heikki Tuuri
/** Time stamp */
typedef time_t ib_time_t;
-#if defined (__GNUC__)
-# define UT_COMPILER_BARRIER() __asm__ __volatile__ ("":::"memory")
-#elif defined (_MSC_VER)
-# define UT_COMPILER_BARRIER() _ReadWriteBarrier()
-#else
-# define UT_COMPILER_BARRIER()
-#endif
-
/*********************************************************************//**
Delays execution for at most max_wait_us microseconds or returns earlier
if cond becomes true.
@@ -270,14 +262,7 @@ void
ut_sprintf_timestamp(
/*=================*/
char* buf); /*!< in: buffer where to sprintf */
-/*************************************************************//**
-Runs an idle loop on CPU. The argument gives the desired delay
-in microseconds on 100 MHz Pentium + Visual C++.
-@return dummy value */
-void
-ut_delay(
-/*=====*/
- ulint delay); /*!< in: delay in microseconds on 100 MHz Pentium */
+
/*************************************************************//**
Prints the contents of a memory buffer in hex and ascii. */
void
diff --git a/storage/innobase/ut/ut0ut.cc b/storage/innobase/ut/ut0ut.cc
index 7ee015f8f38..42054f309c7 100644
--- a/storage/innobase/ut/ut0ut.cc
+++ b/storage/innobase/ut/ut0ut.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -284,27 +284,6 @@ ut_sprintf_timestamp(
}
/*************************************************************//**
-Runs an idle loop on CPU. The argument gives the desired delay
-in microseconds on 100 MHz Pentium + Visual C++.
-@return dummy value */
-void
-ut_delay(
-/*=====*/
- ulint delay) /*!< in: delay in microseconds on 100 MHz Pentium */
-{
- ulint i;
-
- HMT_low();
-
- for (i = 0; i < delay * 50; i++) {
- MY_RELAX_CPU();
- UT_COMPILER_BARRIER();
- }
-
- HMT_medium();
-}
-
-/*************************************************************//**
Prints the contents of a memory buffer in hex and ascii. */
void
ut_print_buf(