diff options
author | torvald <torvald@138bc75d-0d04-0410-961f-82ee72b054a4> | 2015-01-16 19:50:43 +0000 |
---|---|---|
committer | torvald <torvald@138bc75d-0d04-0410-961f-82ee72b054a4> | 2015-01-16 19:50:43 +0000 |
commit | 19a98dc427ef49300c6b39feddddf347d1985ff8 (patch) | |
tree | 7e0f7a52bc8fea8db2cc39c259441e1c7ab73feb /libstdc++-v3/include | |
parent | cd93a9ab5a03a6e9e7e7c46eb8e341f58efbbf7d (diff) | |
download | gcc-19a98dc427ef49300c6b39feddddf347d1985ff8.tar.gz |
libstdc++: Optimize synchronization in std::future if futexes are available.
* src/c++11/futex.cc: New file.
* include/bits/atomic_futex.h: New file.
* include/std/future (__future_base::_State_baseV2): Use
atomic_futex_unsigned instead of mutex+condvar.
* src/c++11/futex.cc: Likewise.
* include/Makefile.am: Add atomic_futex.h.
* include/Makefile.in: Likewise.
* src/c++11/Makefile.am: Add futex.cc.
* src/c++11/Makefile.in: Likewise.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@219770 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libstdc++-v3/include')
-rw-r--r-- | libstdc++-v3/include/Makefile.am | 1 | ||||
-rw-r--r-- | libstdc++-v3/include/Makefile.in | 1 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/atomic_futex.h | 288 | ||||
-rw-r--r-- | libstdc++-v3/include/std/future | 96 |
4 files changed, 339 insertions, 47 deletions
diff --git a/libstdc++-v3/include/Makefile.am b/libstdc++-v3/include/Makefile.am index 08e5d5fc9c3..4772950124a 100644 --- a/libstdc++-v3/include/Makefile.am +++ b/libstdc++-v3/include/Makefile.am @@ -83,6 +83,7 @@ bits_headers = \ ${bits_srcdir}/allocated_ptr.h \ ${bits_srcdir}/allocator.h \ ${bits_srcdir}/atomic_base.h \ + ${bits_srcdir}/atomic_futex.h \ ${bits_srcdir}/basic_ios.h \ ${bits_srcdir}/basic_ios.tcc \ ${bits_srcdir}/basic_string.h \ diff --git a/libstdc++-v3/include/Makefile.in b/libstdc++-v3/include/Makefile.in index 3e5d82ec43b..ebcaa9663fe 100644 --- a/libstdc++-v3/include/Makefile.in +++ b/libstdc++-v3/include/Makefile.in @@ -351,6 +351,7 @@ bits_headers = \ ${bits_srcdir}/allocated_ptr.h \ ${bits_srcdir}/allocator.h \ ${bits_srcdir}/atomic_base.h \ + ${bits_srcdir}/atomic_futex.h \ ${bits_srcdir}/basic_ios.h \ ${bits_srcdir}/basic_ios.tcc \ ${bits_srcdir}/basic_string.h \ diff --git a/libstdc++-v3/include/bits/atomic_futex.h b/libstdc++-v3/include/bits/atomic_futex.h new file mode 100644 index 00000000000..9a418d8a00b --- /dev/null +++ b/libstdc++-v3/include/bits/atomic_futex.h @@ -0,0 +1,288 @@ +// -*- C++ -*- header. + +// Copyright (C) 2015 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file bits/atomic_futex.h + * This is an internal header file, included by other library headers. + * Do not attempt to use it directly. + */ + +#ifndef _GLIBCXX_ATOMIC_FUTEX_H +#define _GLIBCXX_ATOMIC_FUTEX_H 1 + +#pragma GCC system_header + +#include <bits/c++config.h> +#include <atomic> +#include <chrono> +#if !defined(_GLIBCXX_HAVE_LINUX_FUTEX) +#include <mutex> +#include <condition_variable> +#endif + +#ifndef _GLIBCXX_ALWAYS_INLINE +#define _GLIBCXX_ALWAYS_INLINE inline __attribute__((always_inline)) +#endif + +namespace std _GLIBCXX_VISIBILITY(default) +{ +_GLIBCXX_BEGIN_NAMESPACE_VERSION + +#if defined(_GLIBCXX_HAVE_LINUX_FUTEX) + struct __atomic_futex_unsigned_base + { + // Returns false iff a timeout occurred. + bool + _M_futex_wait_until(unsigned *__addr, unsigned __val, bool __has_timeout, + chrono::seconds __s, chrono::nanoseconds __ns); + + // This can be executed after the object has been destroyed. + static void _M_futex_notify_all(unsigned* __addr); + }; + + template <unsigned _Waiter_bit = 0x80000000> + struct __atomic_futex_unsigned : __atomic_futex_unsigned_base + { + typedef chrono::system_clock __clock_t; + + // XXX We expect this to be lock-free, and having the payload at offset 0. +#if ATOMIC_INT_LOCK_FREE < 2 +# error We require lock-free atomic operations on int +#endif + atomic<unsigned> _M_data; + + __atomic_futex_unsigned(unsigned __data) : _M_data(__data) + { } + + _GLIBCXX_ALWAYS_INLINE unsigned + _M_load(memory_order __mo) + { + return _M_data.load(__mo) & ~_Waiter_bit; + } + + private: + + // If a timeout occurs, returns a current value after the timeout; + // otherwise, returns the operand's value if equal is true or a different + // value if equal is false. + // The assumed value is the caller's assumption about the current value + // when making the call. + unsigned + _M_load_and_test_until(unsigned __assumed, unsigned __operand, + bool __equal, memory_order __mo, bool __has_timeout, + chrono::seconds __s, chrono::nanoseconds __ns) + { + for (;;) + { + // Don't bother checking the value again because we expect the caller to + // have done it recently. + // memory_order_relaxed is sufficient because we can rely on just the + // modification order (store_notify uses an atomic RMW operation too), + // and the futex syscalls synchronize between themselves. + _M_data.fetch_or(_Waiter_bit, memory_order_relaxed); + bool __ret; + __ret = _M_futex_wait_until((unsigned*)(void*)&_M_data, + __assumed | _Waiter_bit, __has_timeout, __s, __ns); + // Fetch the current value after waiting (clears _Waiter_bit). + __assumed = _M_load(__mo); + if (!__ret || ((__operand == __assumed) == __equal)) + return __assumed; + // TODO adapt wait time + } + } + + // Returns the operand's value if equal is true or a different value if + // equal is false. + // The assumed value is the caller's assumption about the current value + // when making the call. + unsigned + _M_load_and_test(unsigned __assumed, unsigned __operand, + bool __equal, memory_order __mo) + { + return _M_load_and_test_until(__assumed, __operand, __equal, __mo, + false, chrono::seconds(0), chrono::nanoseconds(0)); + } + + // If a timeout occurs, returns a current value after the timeout; + // otherwise, returns the operand's value if equal is true or a different + // value if equal is false. + // The assumed value is the caller's assumption about the current value + // when making the call. + template<typename _Dur> + unsigned + _M_load_and_test_until_impl(unsigned __assumed, unsigned __operand, + bool __equal, memory_order __mo, + const chrono::time_point<__clock_t, _Dur>& __atime) + { + auto __s = chrono::time_point_cast<chrono::seconds>(__atime); + auto __ns = chrono::duration_cast<chrono::nanoseconds>(__atime - __s); + // XXX correct? + return _M_load_and_test_until(__assumed, __operand, __equal, __mo, + true, __s.time_since_epoch(), __ns); + } + + public: + + _GLIBCXX_ALWAYS_INLINE unsigned + _M_load_when_not_equal(unsigned __val, memory_order __mo) + { + unsigned __i = _M_load(__mo); + if ((__i & ~_Waiter_bit) != __val) return; + // TODO Spin-wait first. + return _M_load_and_test(__i, __val, false, __mo); + } + + _GLIBCXX_ALWAYS_INLINE void + _M_load_when_equal(unsigned __val, memory_order __mo) + { + unsigned __i = _M_load(__mo); + if ((__i & ~_Waiter_bit) == __val) + return; + // TODO Spin-wait first. + _M_load_and_test(__i, __val, true, __mo); + } + + // Returns false iff a timeout occurred. + template<typename _Rep, typename _Period> + _GLIBCXX_ALWAYS_INLINE bool + _M_load_when_equal_for(unsigned __val, memory_order __mo, + const chrono::duration<_Rep, _Period>& __rtime) + { + return _M_load_when_equal_until(__val, __mo, __clock_t::now() + __rtime); + } + + // Returns false iff a timeout occurred. + template<typename _Clock, typename _Duration> + _GLIBCXX_ALWAYS_INLINE bool + _M_load_when_equal_until(unsigned __val, memory_order __mo, + const chrono::time_point<_Clock, _Duration>& __atime) + { + // DR 887 - Sync unknown clock to known clock. + const typename _Clock::time_point __c_entry = _Clock::now(); + const __clock_t::time_point __s_entry = __clock_t::now(); + const auto __delta = __atime - __c_entry; + const auto __s_atime = __s_entry + __delta; + return _M_load_when_equal_until(__val, __mo, __s_atime); + } + + // Returns false iff a timeout occurred. + template<typename _Duration> + _GLIBCXX_ALWAYS_INLINE bool + _M_load_when_equal_until(unsigned __val, memory_order __mo, + const chrono::time_point<__clock_t, _Duration>& __atime) + { + unsigned __i = _M_load(__mo); + if ((__i & ~_Waiter_bit) == __val) + return true; + // TODO Spin-wait first. Ignore effect on timeout. + __i = _M_load_and_test_until_impl(__i, __val, true, __mo, __atime); + return (__i & ~_Waiter_bit) == __val; + } + + _GLIBCXX_ALWAYS_INLINE void + _M_store_notify_all(unsigned __val, memory_order __mo) + { + unsigned* __futex = (unsigned *)(void *)&_M_data; + if (_M_data.exchange(__val, __mo) & _Waiter_bit) + _M_futex_notify_all(__futex); + } + + }; + +#else + + // If futexes are not available, use a mutex and a condvar to wait. + // Because we access the data only within critical sections, all accesses + // are sequentially consistent; thus, we satisfy any provided memory_order. + template <unsigned _Waiter_bit = 0x80000000> + struct __atomic_futex_unsigned + { + typedef chrono::system_clock __clock_t; + + unsigned _M_data; + mutex _M_mutex; + condition_variable _M_condvar; + + __atomic_futex_unsigned(unsigned __data) : _M_data(__data) + { } + + _GLIBCXX_ALWAYS_INLINE unsigned + _M_load(memory_order __mo) + { + unique_lock<mutex> __lock(_M_mutex); + return _M_data; + } + + _GLIBCXX_ALWAYS_INLINE unsigned + _M_load_when_not_equal(unsigned __val, memory_order __mo) + { + unique_lock<mutex> __lock(_M_mutex); + while (_M_data == __val) + _M_condvar.wait(__lock); + return _M_data; + } + + _GLIBCXX_ALWAYS_INLINE void + _M_load_when_equal(unsigned __val, memory_order __mo) + { + unique_lock<mutex> __lock(_M_mutex); + while (_M_data != __val) + _M_condvar.wait(__lock); + } + + template<typename _Rep, typename _Period> + _GLIBCXX_ALWAYS_INLINE bool + _M_load_when_equal_for(unsigned __val, memory_order __mo, + const chrono::duration<_Rep, _Period>& __rtime) + { + unique_lock<mutex> __lock(_M_mutex); + return _M_condvar.wait_for(__lock, __rtime, + [&] { return _M_data == __val;}); + } + + template<typename _Clock, typename _Duration> + _GLIBCXX_ALWAYS_INLINE bool + _M_load_when_equal_until(unsigned __val, memory_order __mo, + const chrono::time_point<_Clock, _Duration>& __atime) + { + unique_lock<mutex> __lock(_M_mutex); + return _M_condvar.wait_until(__lock, __atime, + [&] { return _M_data == __val;}); + } + + _GLIBCXX_ALWAYS_INLINE void + _M_store_notify_all(unsigned __val, memory_order __mo) + { + unique_lock<mutex> __lock(_M_mutex); + _M_data = __val; + _M_condvar.notify_all(); + } + + }; + +#endif + +_GLIBCXX_END_NAMESPACE_VERSION +} // namespace std + +#endif diff --git a/libstdc++-v3/include/std/future b/libstdc++-v3/include/std/future index 2ac6f157abe..cb0226dec4c 100644 --- a/libstdc++-v3/include/std/future +++ b/libstdc++-v3/include/std/future @@ -41,6 +41,7 @@ #include <condition_variable> #include <system_error> #include <atomic> +#include <bits/atomic_futex.h> #include <bits/functexcept.h> #include <bits/unique_ptr.h> #include <bits/shared_ptr.h> @@ -294,15 +295,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { typedef _Ptr<_Result_base> _Ptr_type; + enum _Status : unsigned { + __not_ready, + __ready + }; + _Ptr_type _M_result; - mutex _M_mutex; - condition_variable _M_cond; - atomic_flag _M_retrieved = ATOMIC_FLAG_INIT; - bool _M_ready = false; + __atomic_futex_unsigned<> _M_status; + atomic_flag _M_retrieved = ATOMIC_FLAG_INIT; once_flag _M_once; public: - _State_baseV2() noexcept = default; + _State_baseV2() noexcept : _M_result(), _M_status(_Status::__not_ready) + { } _State_baseV2(const _State_baseV2&) = delete; _State_baseV2& operator=(const _State_baseV2&) = delete; virtual ~_State_baseV2() = default; @@ -312,9 +317,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { // Run any deferred function or join any asynchronous thread: _M_complete_async(); - - unique_lock<mutex> __lock(_M_mutex); - _M_cond.wait(__lock, [&] { return _M_ready; }); + // Acquire MO makes sure this synchronizes with the thread that made + // the future ready. + _M_status._M_load_when_equal(_Status::__ready, memory_order_acquire); return *_M_result; } @@ -322,12 +327,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION future_status wait_for(const chrono::duration<_Rep, _Period>& __rel) { - unique_lock<mutex> __lock(_M_mutex); - if (_M_ready) + // First, check if the future has been made ready. Use acquire MO + // to synchronize with the thread that made it ready. + if (_M_status._M_load(memory_order_acquire) == _Status::__ready) return future_status::ready; - if (_M_has_deferred()) + if (_M_is_deferred_future()) return future_status::deferred; - if (_M_cond.wait_for(__lock, __rel, [&] { return _M_ready; })) + if (_M_status._M_load_when_equal_for(_Status::__ready, + memory_order_acquire, __rel)) { // _GLIBCXX_RESOLVE_LIB_DEFECTS // 2100. timed waiting functions must also join @@ -349,12 +356,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION future_status wait_until(const chrono::time_point<_Clock, _Duration>& __abs) { - unique_lock<mutex> __lock(_M_mutex); - if (_M_ready) + // First, check if the future has been made ready. Use acquire MO + // to synchronize with the thread that made it ready. + if (_M_status._M_load(memory_order_acquire) == _Status::__ready) return future_status::ready; - if (_M_has_deferred()) + if (_M_is_deferred_future()) return future_status::deferred; - if (_M_cond.wait_until(__lock, __abs, [&] { return _M_ready; })) + if (_M_status._M_load_when_equal_until(_Status::__ready, + memory_order_acquire, __abs)) { // _GLIBCXX_RESOLVE_LIB_DEFECTS // 2100. timed waiting functions must also join @@ -367,45 +376,37 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } // Provide a result to the shared state and make it ready. - // Atomically performs: - // if (!_M_ready) { - // _M_result = __res(); - // _M_ready = true; - // } + // Calls at most once: _M_result = __res(); void _M_set_result(function<_Ptr_type()> __res, bool __ignore_failure = false) { - unique_lock<mutex> __lock(_M_mutex, defer_lock); + bool __did_set = false; // all calls to this function are serialized, // side-effects of invoking __res only happen once call_once(_M_once, &_State_baseV2::_M_do_set, this, - std::__addressof(__res), std::__addressof(__lock)); - if (__lock.owns_lock()) - { - _M_ready = true; - _M_cond.notify_all(); - } + std::__addressof(__res), std::__addressof(__did_set)); + if (__did_set) + // Use release MO to synchronize with observers of the ready state. + _M_status._M_store_notify_all(_Status::__ready, + memory_order_release); else if (!__ignore_failure) __throw_future_error(int(future_errc::promise_already_satisfied)); } // Provide a result to the shared state but delay making it ready // until the calling thread exits. - // Atomically performs: - // if (!_M_ready) { - // _M_result = __res(); - // } + // Calls at most once: _M_result = __res(); void _M_set_delayed_result(function<_Ptr_type()> __res, weak_ptr<_State_baseV2> __self) { + bool __did_set = false; unique_ptr<_Make_ready> __mr{new _Make_ready}; - unique_lock<mutex> __lock(_M_mutex, defer_lock); // all calls to this function are serialized, // side-effects of invoking __res only happen once call_once(_M_once, &_State_baseV2::_M_do_set, this, - std::__addressof(__res), std::__addressof(__lock)); - if (!__lock.owns_lock()) + std::__addressof(__res), std::__addressof(__did_set)); + if (!__did_set) __throw_future_error(int(future_errc::promise_already_satisfied)); __mr->_M_shared_state = std::move(__self); __mr->_M_set(); @@ -424,12 +425,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // provider is abandoning this shared state, so noone can be // trying to make the shared state ready at the same time, and // we can access _M_result directly instead of through call_once. - { - lock_guard<mutex> __lock(_M_mutex); - _M_result.swap(__res); - _M_ready = true; - } - _M_cond.notify_all(); + _M_result.swap(__res); + // Use release MO to synchronize with observers of the ready state. + _M_status._M_store_notify_all(_Status::__ready, + memory_order_release); } } @@ -523,18 +522,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION private: // The function invoked with std::call_once(_M_once, ...). void - _M_do_set(function<_Ptr_type()>* __f, unique_lock<mutex>* __lock) + _M_do_set(function<_Ptr_type()>* __f, bool* __did_set) { - _Ptr_type __res = (*__f)(); // do not hold lock while running setter - __lock->lock(); - _M_result.swap(__res); + _Ptr_type __res = (*__f)(); + // Notify the caller that we did try to set; if we do not throw an + // exception, the caller will be aware that it did set (e.g., see + // _M_set_result). + *__did_set = true; + _M_result.swap(__res); // nothrow } // Wait for completion of async function. virtual void _M_complete_async() { } // Return true if state corresponds to a deferred function. - virtual bool _M_has_deferred() const { return false; } + virtual bool _M_is_deferred_future() const { return false; } struct _Make_ready final : __at_thread_exit_elt { @@ -1606,7 +1608,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // Caller should check whether the state is ready first, because this // function will return true even after the deferred function has run. - virtual bool _M_has_deferred() const { return true; } + virtual bool _M_is_deferred_future() const { return true; } }; // Common functionality hoisted out of the _Async_state_impl template. |