libstdc++: Optimize synchronization in std::future if futexes are available.

* src/c++11/futex.cc: New file. * include/bits/atomic_futex.h: New file. * include/std/future (__future_base::_State_baseV2): Use atomic_futex_unsigned instead of mutex+condvar. * src/c++11/futex.cc: Likewise. * include/Makefile.am: Add atomic_futex.h. * include/Makefile.in: Likewise. * src/c++11/Makefile.am: Add futex.cc. * src/c++11/Makefile.in: Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@219770 138bc75d-0d04-0410-961f-82ee72b054a4
author: torvald <torvald@138bc75d-0d04-0410-961f-82ee72b054a4> 2015-01-16 19:50:43 +0000
committer: torvald <torvald@138bc75d-0d04-0410-961f-82ee72b054a4> 2015-01-16 19:50:43 +0000
commit: 19a98dc427ef49300c6b39feddddf347d1985ff8 (patch)
tree: 7e0f7a52bc8fea8db2cc39c259441e1c7ab73feb /libstdc++-v3/include
parent: cd93a9ab5a03a6e9e7e7c46eb8e341f58efbbf7d (diff)
download: gcc-19a98dc427ef49300c6b39feddddf347d1985ff8.tar.gz
4 files changed, 339 insertions, 47 deletions
diff --git a/libstdc++-v3/include/Makefile.am b/libstdc++-v3/include/Makefile.am
index 08e5d5fc9c3..4772950124a 100644
--- a/libstdc++-v3/include/Makefile.am
+++ b/libstdc++-v3/include/Makefile.am
@@ -83,6 +83,7 @@ bits_headers = \
 	${bits_srcdir}/allocated_ptr.h \
 	${bits_srcdir}/allocator.h \
 	${bits_srcdir}/atomic_base.h \
+	${bits_srcdir}/atomic_futex.h \
 	${bits_srcdir}/basic_ios.h \
 	${bits_srcdir}/basic_ios.tcc \
 	${bits_srcdir}/basic_string.h \
diff --git a/libstdc++-v3/include/Makefile.in b/libstdc++-v3/include/Makefile.in
index 3e5d82ec43b..ebcaa9663fe 100644
--- a/libstdc++-v3/include/Makefile.in
+++ b/libstdc++-v3/include/Makefile.in
@@ -351,6 +351,7 @@ bits_headers = \
 	${bits_srcdir}/allocated_ptr.h \
 	${bits_srcdir}/allocator.h \
 	${bits_srcdir}/atomic_base.h \
+	${bits_srcdir}/atomic_futex.h \
 	${bits_srcdir}/basic_ios.h \
 	${bits_srcdir}/basic_ios.tcc \
 	${bits_srcdir}/basic_string.h \
diff --git a/libstdc++-v3/include/bits/atomic_futex.h b/libstdc++-v3/include/bits/atomic_futex.h
new file mode 100644
index 00000000000..9a418d8a00b
--- /dev/null
+++ b/libstdc++-v3/include/bits/atomic_futex.h
@@ -0,0 +1,288 @@
+// -*- C++ -*- header.
+
+// Copyright (C) 2015 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// Under Section 7 of GPL version 3, you are granted additional
+// permissions described in the GCC Runtime Library Exception, version
+// 3.1, as published by the Free Software Foundation.
+
+// You should have received a copy of the GNU General Public License and
+// a copy of the GCC Runtime Library Exception along with this program;
+// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+// <http://www.gnu.org/licenses/>.
+
+/** @file bits/atomic_futex.h
+ *  This is an internal header file, included by other library headers.
+ *  Do not attempt to use it directly.
+ */
+
+#ifndef _GLIBCXX_ATOMIC_FUTEX_H
+#define _GLIBCXX_ATOMIC_FUTEX_H 1
+
+#pragma GCC system_header
+
+#include <bits/c++config.h>
+#include <atomic>
+#include <chrono>
+#if !defined(_GLIBCXX_HAVE_LINUX_FUTEX)
+#include <mutex>
+#include <condition_variable>
+#endif
+
+#ifndef _GLIBCXX_ALWAYS_INLINE
+#define _GLIBCXX_ALWAYS_INLINE inline __attribute__((always_inline))
+#endif
+
+namespace std _GLIBCXX_VISIBILITY(default)
+{
+_GLIBCXX_BEGIN_NAMESPACE_VERSION
+
+#if defined(_GLIBCXX_HAVE_LINUX_FUTEX)
+  struct __atomic_futex_unsigned_base
+  {
+    // Returns false iff a timeout occurred.
+    bool
+    _M_futex_wait_until(unsigned *__addr, unsigned __val, bool __has_timeout,
+	chrono::seconds __s, chrono::nanoseconds __ns);
+
+    // This can be executed after the object has been destroyed.
+    static void _M_futex_notify_all(unsigned* __addr);
+  };
+
+  template <unsigned _Waiter_bit = 0x80000000>
+  struct __atomic_futex_unsigned : __atomic_futex_unsigned_base
+  {
+    typedef chrono::system_clock __clock_t;
+
+    // XXX We expect this to be lock-free, and having the payload at offset 0.
+#if ATOMIC_INT_LOCK_FREE < 2
+# error We require lock-free atomic operations on int
+#endif
+    atomic<unsigned> _M_data;
+
+    __atomic_futex_unsigned(unsigned __data) : _M_data(__data)
+    { }
+
+    _GLIBCXX_ALWAYS_INLINE unsigned
+    _M_load(memory_order __mo)
+    {
+      return _M_data.load(__mo) & ~_Waiter_bit;
+    }
+
+  private:
+
+    // If a timeout occurs, returns a current value after the timeout;
+    // otherwise, returns the operand's value if equal is true or a different
+    // value if equal is false.
+    // The assumed value is the caller's assumption about the current value
+    // when making the call.
+    unsigned
+    _M_load_and_test_until(unsigned __assumed, unsigned __operand,
+	bool __equal, memory_order __mo, bool __has_timeout,
+	chrono::seconds __s, chrono::nanoseconds __ns)
+    {
+      for (;;)
+	{
+	  // Don't bother checking the value again because we expect the caller to
+	  // have done it recently.
+	  // memory_order_relaxed is sufficient because we can rely on just the
+	  // modification order (store_notify uses an atomic RMW operation too),
+	  // and the futex syscalls synchronize between themselves.
+	  _M_data.fetch_or(_Waiter_bit, memory_order_relaxed);
+	  bool __ret;
+	  __ret = _M_futex_wait_until((unsigned*)(void*)&_M_data,
+	      __assumed | _Waiter_bit, __has_timeout, __s, __ns);
+	  // Fetch the current value after waiting (clears _Waiter_bit).
+	  __assumed = _M_load(__mo);
+	  if (!__ret || ((__operand == __assumed) == __equal))
+	    return __assumed;
+	  // TODO adapt wait time
+	}
+    }
+
+    // Returns the operand's value if equal is true or a different value if
+    // equal is false.
+    // The assumed value is the caller's assumption about the current value
+    // when making the call.
+    unsigned
+    _M_load_and_test(unsigned __assumed, unsigned __operand,
+	bool __equal, memory_order __mo)
+    {
+      return _M_load_and_test_until(__assumed, __operand, __equal, __mo,
+	  false, chrono::seconds(0), chrono::nanoseconds(0));
+    }
+
+    // If a timeout occurs, returns a current value after the timeout;
+    // otherwise, returns the operand's value if equal is true or a different
+    // value if equal is false.
+    // The assumed value is the caller's assumption about the current value
+    // when making the call.
+    template<typename _Dur>
+    unsigned
+    _M_load_and_test_until_impl(unsigned __assumed, unsigned __operand,
+	bool __equal, memory_order __mo,
+	const chrono::time_point<__clock_t, _Dur>& __atime)
+    {
+      auto __s = chrono::time_point_cast<chrono::seconds>(__atime);
+      auto __ns = chrono::duration_cast<chrono::nanoseconds>(__atime - __s);
+      // XXX correct?
+      return _M_load_and_test_until(__assumed, __operand, __equal, __mo,
+	  true, __s.time_since_epoch(), __ns);
+    }
+
+  public:
+
+    _GLIBCXX_ALWAYS_INLINE unsigned
+    _M_load_when_not_equal(unsigned __val, memory_order __mo)
+    {
+      unsigned __i = _M_load(__mo);
+      if ((__i & ~_Waiter_bit) != __val) return;
+      // TODO Spin-wait first.
+      return _M_load_and_test(__i, __val, false, __mo);
+    }
+
+    _GLIBCXX_ALWAYS_INLINE void
+    _M_load_when_equal(unsigned __val, memory_order __mo)
+    {
+      unsigned __i = _M_load(__mo);
+      if ((__i & ~_Waiter_bit) == __val)
+	return;
+      // TODO Spin-wait first.
+      _M_load_and_test(__i, __val, true, __mo);
+    }
+
+    // Returns false iff a timeout occurred.
+    template<typename _Rep, typename _Period>
+    _GLIBCXX_ALWAYS_INLINE bool
+    _M_load_when_equal_for(unsigned __val, memory_order __mo,
+	const chrono::duration<_Rep, _Period>& __rtime)
+    {
+      return _M_load_when_equal_until(__val, __mo, __clock_t::now() + __rtime);
+    }
+
+    // Returns false iff a timeout occurred.
+    template<typename _Clock, typename _Duration>
+    _GLIBCXX_ALWAYS_INLINE bool
+    _M_load_when_equal_until(unsigned __val, memory_order __mo,
+	const chrono::time_point<_Clock, _Duration>& __atime)
+    {
+      // DR 887 - Sync unknown clock to known clock.
+      const typename _Clock::time_point __c_entry = _Clock::now();
+      const __clock_t::time_point __s_entry = __clock_t::now();
+      const auto __delta = __atime - __c_entry;
+      const auto __s_atime = __s_entry + __delta;
+      return _M_load_when_equal_until(__val, __mo, __s_atime);
+    }
+
+    // Returns false iff a timeout occurred.
+    template<typename _Duration>
+    _GLIBCXX_ALWAYS_INLINE bool
+    _M_load_when_equal_until(unsigned __val, memory_order __mo,
+	const chrono::time_point<__clock_t, _Duration>& __atime)
+    {
+      unsigned __i = _M_load(__mo);
+      if ((__i & ~_Waiter_bit) == __val)
+	return true;
+      // TODO Spin-wait first.  Ignore effect on timeout.
+      __i = _M_load_and_test_until_impl(__i, __val, true, __mo, __atime);
+      return (__i & ~_Waiter_bit) == __val;
+    }
+
+    _GLIBCXX_ALWAYS_INLINE void
+    _M_store_notify_all(unsigned __val, memory_order __mo)
+    {
+      unsigned* __futex = (unsigned *)(void *)&_M_data;
+      if (_M_data.exchange(__val, __mo) & _Waiter_bit)
+	_M_futex_notify_all(__futex);
+    }
+
+  };
+
+#else
+
+  // If futexes are not available, use a mutex and a condvar to wait.
+  // Because we access the data only within critical sections, all accesses
+  // are sequentially consistent; thus, we satisfy any provided memory_order.
+  template <unsigned _Waiter_bit = 0x80000000>
+  struct __atomic_futex_unsigned
+  {
+    typedef chrono::system_clock __clock_t;
+
+    unsigned _M_data;
+    mutex _M_mutex;
+    condition_variable _M_condvar;
+
+    __atomic_futex_unsigned(unsigned __data) : _M_data(__data)
+    { }
+
+    _GLIBCXX_ALWAYS_INLINE unsigned
+    _M_load(memory_order __mo)
+    {
+      unique_lock<mutex> __lock(_M_mutex);
+      return _M_data;
+    }
+
+    _GLIBCXX_ALWAYS_INLINE unsigned
+    _M_load_when_not_equal(unsigned __val, memory_order __mo)
+    {
+      unique_lock<mutex> __lock(_M_mutex);
+      while (_M_data == __val)
+	_M_condvar.wait(__lock);
+      return _M_data;
+    }
+
+    _GLIBCXX_ALWAYS_INLINE void
+    _M_load_when_equal(unsigned __val, memory_order __mo)
+    {
+      unique_lock<mutex> __lock(_M_mutex);
+      while (_M_data != __val)
+	_M_condvar.wait(__lock);
+    }
+
+    template<typename _Rep, typename _Period>
+    _GLIBCXX_ALWAYS_INLINE bool
+    _M_load_when_equal_for(unsigned __val, memory_order __mo,
+	const chrono::duration<_Rep, _Period>& __rtime)
+    {
+      unique_lock<mutex> __lock(_M_mutex);
+      return _M_condvar.wait_for(__lock, __rtime,
+				 [&] { return _M_data == __val;});
+    }
+
+    template<typename _Clock, typename _Duration>
+    _GLIBCXX_ALWAYS_INLINE bool
+    _M_load_when_equal_until(unsigned __val, memory_order __mo,
+	const chrono::time_point<_Clock, _Duration>& __atime)
+    {
+      unique_lock<mutex> __lock(_M_mutex);
+      return _M_condvar.wait_until(__lock, __atime,
+				   [&] { return _M_data == __val;});
+    }
+
+    _GLIBCXX_ALWAYS_INLINE void
+    _M_store_notify_all(unsigned __val, memory_order __mo)
+    {
+      unique_lock<mutex> __lock(_M_mutex);
+      _M_data = __val;
+      _M_condvar.notify_all();
+    }
+
+  };
+
+#endif
+
+_GLIBCXX_END_NAMESPACE_VERSION
+} // namespace std
+
+#endif
diff --git a/libstdc++-v3/include/std/future b/libstdc++-v3/include/std/future
index 2ac6f157abe..cb0226dec4c 100644
--- a/libstdc++-v3/include/std/future
+++ b/libstdc++-v3/include/std/future
@@ -41,6 +41,7 @@
 #include <condition_variable>
 #include <system_error>
 #include <atomic>
+#include <bits/atomic_futex.h>
 #include <bits/functexcept.h>
 #include <bits/unique_ptr.h>
 #include <bits/shared_ptr.h>
@@ -294,15 +295,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     {
       typedef _Ptr<_Result_base> _Ptr_type;
 
+      enum _Status : unsigned {
+	__not_ready,
+	__ready
+      };
+
       _Ptr_type			_M_result;
-      mutex               	_M_mutex;
-      condition_variable  	_M_cond;
-      atomic_flag		_M_retrieved = ATOMIC_FLAG_INIT;
-      bool			_M_ready = false;
+      __atomic_futex_unsigned<>	_M_status;
+      atomic_flag         	_M_retrieved = ATOMIC_FLAG_INIT;
       once_flag			_M_once;
 
     public:
-      _State_baseV2() noexcept = default;
+      _State_baseV2() noexcept : _M_result(), _M_status(_Status::__not_ready)
+	{ }
       _State_baseV2(const _State_baseV2&) = delete;
       _State_baseV2& operator=(const _State_baseV2&) = delete;
       virtual ~_State_baseV2() = default;
@@ -312,9 +317,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       {
 	// Run any deferred function or join any asynchronous thread:
 	_M_complete_async();
-
-	unique_lock<mutex> __lock(_M_mutex);
-	_M_cond.wait(__lock, [&] { return _M_ready; });
+	// Acquire MO makes sure this synchronizes with the thread that made
+	// the future ready.
+	_M_status._M_load_when_equal(_Status::__ready, memory_order_acquire);
 	return *_M_result;
       }
 
@@ -322,12 +327,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
         future_status
         wait_for(const chrono::duration<_Rep, _Period>& __rel)
         {
-	  unique_lock<mutex> __lock(_M_mutex);
-	  if (_M_ready)
+	  // First, check if the future has been made ready.  Use acquire MO
+	  // to synchronize with the thread that made it ready.
+	  if (_M_status._M_load(memory_order_acquire) == _Status::__ready)
 	    return future_status::ready;
-	  if (_M_has_deferred())
+	  if (_M_is_deferred_future())
 	    return future_status::deferred;
-	  if (_M_cond.wait_for(__lock, __rel, [&] { return _M_ready; }))
+	  if (_M_status._M_load_when_equal_for(_Status::__ready,
+	      memory_order_acquire, __rel))
 	    {
 	      // _GLIBCXX_RESOLVE_LIB_DEFECTS
 	      // 2100.  timed waiting functions must also join
@@ -349,12 +356,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
         future_status
         wait_until(const chrono::time_point<_Clock, _Duration>& __abs)
         {
-	  unique_lock<mutex> __lock(_M_mutex);
-	  if (_M_ready)
+	  // First, check if the future has been made ready.  Use acquire MO
+	  // to synchronize with the thread that made it ready.
+	  if (_M_status._M_load(memory_order_acquire) == _Status::__ready)
 	    return future_status::ready;
-	  if (_M_has_deferred())
+	  if (_M_is_deferred_future())
 	    return future_status::deferred;
-	  if (_M_cond.wait_until(__lock, __abs, [&] { return _M_ready; }))
+	  if (_M_status._M_load_when_equal_until(_Status::__ready,
+	      memory_order_acquire, __abs))
 	    {
 	      // _GLIBCXX_RESOLVE_LIB_DEFECTS
 	      // 2100.  timed waiting functions must also join
@@ -367,45 +376,37 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	}
 
       // Provide a result to the shared state and make it ready.
-      // Atomically performs:
-      //   if (!_M_ready) {
-      //     _M_result = __res();
-      //     _M_ready = true;
-      //   }
+      // Calls at most once: _M_result = __res();
       void
       _M_set_result(function<_Ptr_type()> __res, bool __ignore_failure = false)
       {
-	unique_lock<mutex> __lock(_M_mutex, defer_lock);
+	bool __did_set = false;
         // all calls to this function are serialized,
         // side-effects of invoking __res only happen once
 	call_once(_M_once, &_State_baseV2::_M_do_set, this,
-		  std::__addressof(__res), std::__addressof(__lock));
-	if (__lock.owns_lock())
-	  {
-	    _M_ready = true;
-	    _M_cond.notify_all();
-	  }
+		  std::__addressof(__res), std::__addressof(__did_set));
+	if (__did_set)
+	  // Use release MO to synchronize with observers of the ready state.
+	  _M_status._M_store_notify_all(_Status::__ready,
+					memory_order_release);
 	else if (!__ignore_failure)
           __throw_future_error(int(future_errc::promise_already_satisfied));
       }
 
       // Provide a result to the shared state but delay making it ready
       // until the calling thread exits.
-      // Atomically performs:
-      //   if (!_M_ready) {
-      //     _M_result = __res();
-      //   }
+      // Calls at most once: _M_result = __res();
       void
       _M_set_delayed_result(function<_Ptr_type()> __res,
 			    weak_ptr<_State_baseV2> __self)
       {
+	bool __did_set = false;
 	unique_ptr<_Make_ready> __mr{new _Make_ready};
-	unique_lock<mutex> __lock(_M_mutex, defer_lock);
         // all calls to this function are serialized,
         // side-effects of invoking __res only happen once
 	call_once(_M_once, &_State_baseV2::_M_do_set, this,
-		  std::__addressof(__res), std::__addressof(__lock));
-	if (!__lock.owns_lock())
+		  std::__addressof(__res), std::__addressof(__did_set));
+	if (!__did_set)
           __throw_future_error(int(future_errc::promise_already_satisfied));
 	__mr->_M_shared_state = std::move(__self);
 	__mr->_M_set();
@@ -424,12 +425,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	    // provider is abandoning this shared state, so noone can be
 	    // trying to make the shared state ready at the same time, and
 	    // we can access _M_result directly instead of through call_once.
-	    {
-	      lock_guard<mutex> __lock(_M_mutex);
-	      _M_result.swap(__res);
-	      _M_ready = true;
-	    }
-	    _M_cond.notify_all();
+	    _M_result.swap(__res);
+	    // Use release MO to synchronize with observers of the ready state.
+	    _M_status._M_store_notify_all(_Status::__ready,
+					  memory_order_release);
 	  }
       }
 
@@ -523,18 +522,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     private:
       // The function invoked with std::call_once(_M_once, ...).
       void
-      _M_do_set(function<_Ptr_type()>* __f, unique_lock<mutex>* __lock)
+      _M_do_set(function<_Ptr_type()>* __f, bool* __did_set)
       {
-        _Ptr_type __res = (*__f)(); // do not hold lock while running setter
-	__lock->lock();
-        _M_result.swap(__res);
+        _Ptr_type __res = (*__f)();
+        // Notify the caller that we did try to set; if we do not throw an
+        // exception, the caller will be aware that it did set (e.g., see
+        // _M_set_result).
+	*__did_set = true;
+        _M_result.swap(__res); // nothrow
       }
 
       // Wait for completion of async function.
       virtual void _M_complete_async() { }
 
       // Return true if state corresponds to a deferred function.
-      virtual bool _M_has_deferred() const { return false; }
+      virtual bool _M_is_deferred_future() const { return false; }
 
       struct _Make_ready final : __at_thread_exit_elt
       {
@@ -1606,7 +1608,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
       // Caller should check whether the state is ready first, because this
       // function will return true even after the deferred function has run.
-      virtual bool _M_has_deferred() const { return true; }
+      virtual bool _M_is_deferred_future() const { return true; }
     };
 
   // Common functionality hoisted out of the _Async_state_impl template.
author	torvald <torvald@138bc75d-0d04-0410-961f-82ee72b054a4>	2015-01-16 19:50:43 +0000
committer	torvald <torvald@138bc75d-0d04-0410-961f-82ee72b054a4>	2015-01-16 19:50:43 +0000
commit	19a98dc427ef49300c6b39feddddf347d1985ff8 (patch)
tree	7e0f7a52bc8fea8db2cc39c259441e1c7ab73feb /libstdc++-v3/include
parent	cd93a9ab5a03a6e9e7e7c46eb8e341f58efbbf7d (diff)
download	gcc-19a98dc427ef49300c6b39feddddf347d1985ff8.tar.gz