diff options
author | bkoz <bkoz@138bc75d-0d04-0410-961f-82ee72b054a4> | 2008-02-18 00:00:00 +0000 |
---|---|---|
committer | bkoz <bkoz@138bc75d-0d04-0410-961f-82ee72b054a4> | 2008-02-18 00:00:00 +0000 |
commit | 27b93b7c174d35a693e9669730399a7afc5c5303 (patch) | |
tree | 1ae40954f4791f14a61bb95b87627d6a6817f38b | |
parent | 0d5d4dc785d774b5d6e6e36dfd357483db34a9b4 (diff) | |
download | gcc-27b93b7c174d35a693e9669730399a7afc5c5303.tar.gz |
2008-02-17 Benjamin Kosnik <bkoz@redhat.com>
PR libstdc++/34797
* include/parallel/settings.h (_Settings): Reconstruct Settings class
here, uglify, remove anonymous namespace and static
members. Convert to datum.
* include/parallel/types.h: Move Settings:: enumerations here, uglify.
* src/parallel_settings.cc: New, definition for _Settings member
functions.
* include/parallel/multiway_merge.h: Same.
* include/parallel/for_each.h: Same.
* include/parallel/workstealing.h: Same.
* include/parallel/base.h: Same.
* include/parallel/numeric
* include/parallel/features.h: Same.
* include/parallel/quicksort.h: Same.
* include/parallel/equally_split.h: Same.
* include/parallel/algorithmfwd.h: Same.
* include/parallel/omp_loop_static.h: Same.
* include/parallel/random_shuffle.h: Same.
* include/parallel/balanced_quicksort.h: Same.
* include/parallel/tags.h: Same.
* include/parallel/multiway_mergesort.h: Same.
* include/parallel/numericfwd.h: Same.
* include/parallel/partition.h: Same.
* include/parallel/partial_sum.h: Same.
* include/parallel/find.h: Same.
* include/parallel/algo.h: Same.
* include/parallel/omp_loop.h: Same.
* include/parallel/sort.h: Same.
* src/Makefile.am (parallel_sources): Add parallel_settings.cc.
* src/Makefile.in: Regenerate.
* config/abi/pre/gnu.ver: Export _Settings::get and _Settings::set.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@132383 138bc75d-0d04-0410-961f-82ee72b054a4
28 files changed, 575 insertions, 532 deletions
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index f5b9e1e134e..133e83bc0e6 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,39 @@ +2008-02-17 Benjamin Kosnik <bkoz@redhat.com> + + PR libstdc++/34797 + * include/parallel/settings.h (_Settings): Reconstruct Settings class + here, uglify, remove anonymous namespace and static + members. Convert to datum. + * include/parallel/types.h: Move Settings:: enumerations here, uglify. + * src/parallel_settings.cc: New, definition for _Settings member + functions. + * include/parallel/multiway_merge.h: Same. + * include/parallel/for_each.h: Same. + * include/parallel/workstealing.h: Same. + * include/parallel/base.h: Same. + * include/parallel/numeric + * include/parallel/features.h: Same. + * include/parallel/quicksort.h: Same. + * include/parallel/equally_split.h: Same. + * include/parallel/algorithmfwd.h: Same. + * include/parallel/omp_loop_static.h: Same. + * include/parallel/random_shuffle.h: Same. + * include/parallel/balanced_quicksort.h: Same. + * include/parallel/tags.h: Same. + * include/parallel/multiway_mergesort.h: Same. + * include/parallel/numericfwd.h: Same. + * include/parallel/partition.h: Same. + * include/parallel/partial_sum.h: Same. + * include/parallel/find.h: Same. + * include/parallel/algo.h: Same. + * include/parallel/omp_loop.h: Same. + * include/parallel/sort.h: Same. + + * src/Makefile.am (parallel_sources): Add parallel_settings.cc. + * src/Makefile.in: Regenerate. + + * config/abi/pre/gnu.ver: Export _Settings::get and _Settings::set. + 2008-02-17 Paolo Carlini <pcarlini@suse.de> PR libstdc++/35221 diff --git a/libstdc++-v3/config/abi/pre/gnu.ver b/libstdc++-v3/config/abi/pre/gnu.ver index 0374d25aadf..0467a8032fa 100644 --- a/libstdc++-v3/config/abi/pre/gnu.ver +++ b/libstdc++-v3/config/abi/pre/gnu.ver @@ -779,7 +779,15 @@ GLIBCXX_3.4.10 { _ZNKSt4hashISt10error_codeEclES0_; _ZNKSt4hashI[eg]EclE[eg]; + _ZSt17__verify_grouping*; + + _ZNSt8__detail12__prime_listE; + _ZNSt3tr18__detail12__prime_listE; + # for parallel mode + _ZN14__gnu_parallel9_Settings3getEv; + _ZN14__gnu_parallel9_Settings3setERS0_; + _ZNSt9__cxx199815_List_node_base4hook*; _ZNSt9__cxx199815_List_node_base4swap*; _ZNSt9__cxx199815_List_node_base6unhookEv; @@ -788,10 +796,6 @@ GLIBCXX_3.4.10 { _ZNSt15basic_streambufI[cw]St11char_traitsI[cw]EE6stosscEv; - _ZSt17__verify_grouping*; - - _ZNSt8__detail12__prime_listE; - _ZNSt3tr18__detail12__prime_listE; _ZN9__gnu_cxx18stdio_sync_filebufI[cw]St11char_traitsI[cw]EE4syncEv; _ZN9__gnu_cxx18stdio_sync_filebufI[cw]St11char_traitsI[cw]EE[5-9CD]*; diff --git a/libstdc++-v3/include/parallel/algo.h b/libstdc++-v3/include/parallel/algo.h index f1c40323413..e836b284fd3 100644 --- a/libstdc++-v3/include/parallel/algo.h +++ b/libstdc++-v3/include/parallel/algo.h @@ -87,17 +87,16 @@ namespace __parallel Function for_each_switch(RandomAccessIterator begin, RandomAccessIterator end, Function f, random_access_iterator_tag, - __gnu_parallel::parallelism parallelism_tag + __gnu_parallel::_Parallelism parallelism_tag = __gnu_parallel::parallel_balanced) { if (_GLIBCXX_PARALLEL_CONDITION( static_cast<__gnu_parallel::sequence_index_t>(end - begin) - >= __gnu_parallel::Settings::for_each_minimal_n + >= __gnu_parallel::_Settings::get().for_each_minimal_n && __gnu_parallel::is_parallel(parallelism_tag))) { bool dummy; - __gnu_parallel::for_each_selector<RandomAccessIterator> - functionality; + __gnu_parallel::for_each_selector<RandomAccessIterator> functionality; return __gnu_parallel:: for_each_template_random_access(begin, end, f, functionality, @@ -112,7 +111,7 @@ namespace __parallel template<typename Iterator, typename Function> inline Function for_each(Iterator begin, Iterator end, Function f, - __gnu_parallel::parallelism parallelism_tag) + __gnu_parallel::_Parallelism parallelism_tag) { typedef std::iterator_traits<Iterator> iterator_traits; typedef typename iterator_traits::iterator_category iterator_category; @@ -333,7 +332,7 @@ namespace __parallel { if (_GLIBCXX_PARALLEL_CONDITION( static_cast<__gnu_parallel::sequence_index_t>(last - begin) - > __gnu_parallel::Settings::unique_copy_minimal_n)) + > __gnu_parallel::_Settings::get().unique_copy_minimal_n)) return __gnu_parallel::parallel_unique_copy(begin, last, out, pred); else return _GLIBCXX_STD_P::unique_copy(begin, last, out, pred); @@ -413,9 +412,9 @@ namespace __parallel { if (_GLIBCXX_PARALLEL_CONDITION( static_cast<__gnu_parallel::sequence_index_t>(end1 - begin1) - >= __gnu_parallel::Settings::set_union_minimal_n + >= __gnu_parallel::_Settings::get().set_union_minimal_n || static_cast<__gnu_parallel::sequence_index_t>(end2 - begin2) - >= __gnu_parallel::Settings::set_union_minimal_n)) + >= __gnu_parallel::_Settings::get().set_union_minimal_n)) return __gnu_parallel::parallel_set_union(begin1, end1, begin2, end2, result, pred); else @@ -519,9 +518,9 @@ namespace __parallel { if (_GLIBCXX_PARALLEL_CONDITION( static_cast<__gnu_parallel::sequence_index_t>(end1 - begin1) - >= __gnu_parallel::Settings::set_union_minimal_n + >= __gnu_parallel::_Settings::get().set_union_minimal_n || static_cast<__gnu_parallel::sequence_index_t>(end2 - begin2) - >= __gnu_parallel::Settings::set_union_minimal_n)) + >= __gnu_parallel::_Settings::get().set_union_minimal_n)) return __gnu_parallel::parallel_set_intersection(begin1, end1, begin2, end2, result, pred); else @@ -632,9 +631,9 @@ namespace __parallel { if (_GLIBCXX_PARALLEL_CONDITION( static_cast<__gnu_parallel::sequence_index_t>(end1 - begin1) - >= __gnu_parallel::Settings::set_symmetric_difference_minimal_n + >= __gnu_parallel::_Settings::get().set_symmetric_difference_minimal_n || static_cast<__gnu_parallel::sequence_index_t>(end2 - begin2) - >= __gnu_parallel::Settings::set_symmetric_difference_minimal_n)) + >= __gnu_parallel::_Settings::get().set_symmetric_difference_minimal_n)) return __gnu_parallel::parallel_set_symmetric_difference(begin1, end1, begin2, end2, result, pred); @@ -741,9 +740,9 @@ namespace __parallel { if (_GLIBCXX_PARALLEL_CONDITION( static_cast<__gnu_parallel::sequence_index_t>(end1 - begin1) - >= __gnu_parallel::Settings::set_difference_minimal_n + >= __gnu_parallel::_Settings::get().set_difference_minimal_n || static_cast<__gnu_parallel::sequence_index_t>(end2 - begin2) - >= __gnu_parallel::Settings::set_difference_minimal_n)) + >= __gnu_parallel::_Settings::get().set_difference_minimal_n)) return __gnu_parallel::parallel_set_difference(begin1, end1, begin2, end2, result, pred); @@ -903,7 +902,7 @@ namespace __parallel typename iterator_traits<RandomAccessIterator>::difference_type count_switch(RandomAccessIterator begin, RandomAccessIterator end, const T& value, random_access_iterator_tag, - __gnu_parallel::parallelism parallelism_tag + __gnu_parallel::_Parallelism parallelism_tag = __gnu_parallel::parallel_unbalanced) { typedef iterator_traits<RandomAccessIterator> traits_type; @@ -913,7 +912,7 @@ namespace __parallel if (_GLIBCXX_PARALLEL_CONDITION( static_cast<sequence_index_t>(end - begin) - >= __gnu_parallel::Settings::count_minimal_n + >= __gnu_parallel::_Settings::get().count_minimal_n && __gnu_parallel::is_parallel(parallelism_tag))) { __gnu_parallel::count_selector<RandomAccessIterator, difference_type> @@ -941,7 +940,7 @@ namespace __parallel template<typename InputIterator, typename T> inline typename iterator_traits<InputIterator>::difference_type count(InputIterator begin, InputIterator end, const T& value, - __gnu_parallel::parallelism parallelism_tag) + __gnu_parallel::_Parallelism parallelism_tag) { typedef iterator_traits<InputIterator> traits_type; typedef typename traits_type::iterator_category iterator_category; @@ -971,7 +970,7 @@ namespace __parallel typename iterator_traits<RandomAccessIterator>::difference_type count_if_switch(RandomAccessIterator begin, RandomAccessIterator end, Predicate pred, random_access_iterator_tag, - __gnu_parallel::parallelism parallelism_tag + __gnu_parallel::_Parallelism parallelism_tag = __gnu_parallel::parallel_unbalanced) { typedef iterator_traits<RandomAccessIterator> traits_type; @@ -981,7 +980,7 @@ namespace __parallel if (_GLIBCXX_PARALLEL_CONDITION( static_cast<sequence_index_t>(end - begin) - >= __gnu_parallel::Settings::count_minimal_n + >= __gnu_parallel::_Settings::get().count_minimal_n && __gnu_parallel::is_parallel(parallelism_tag))) { difference_type res = 0; @@ -1010,7 +1009,7 @@ namespace __parallel template<typename InputIterator, typename Predicate> inline typename iterator_traits<InputIterator>::difference_type count_if(InputIterator begin, InputIterator end, Predicate pred, - __gnu_parallel::parallelism parallelism_tag) + __gnu_parallel::_Parallelism parallelism_tag) { typedef iterator_traits<InputIterator> traits_type; typedef typename traits_type::iterator_category iterator_category; @@ -1217,12 +1216,12 @@ namespace __parallel transform1_switch(RandomAccessIterator1 begin, RandomAccessIterator1 end, RandomAccessIterator2 result, UnaryOperation unary_op, random_access_iterator_tag, random_access_iterator_tag, - __gnu_parallel::parallelism parallelism_tag + __gnu_parallel::_Parallelism parallelism_tag = __gnu_parallel::parallel_balanced) { if (_GLIBCXX_PARALLEL_CONDITION( static_cast<__gnu_parallel::sequence_index_t>(end - begin) - >= __gnu_parallel::Settings::transform_minimal_n + >= __gnu_parallel::_Settings::get().transform_minimal_n && __gnu_parallel::is_parallel(parallelism_tag))) { bool dummy = true; @@ -1259,7 +1258,7 @@ namespace __parallel inline OutputIterator transform(InputIterator begin, InputIterator end, OutputIterator result, UnaryOperation unary_op, - __gnu_parallel::parallelism parallelism_tag) + __gnu_parallel::_Parallelism parallelism_tag) { typedef std::iterator_traits<InputIterator> iteratori_traits; typedef std::iterator_traits<OutputIterator> iteratoro_traits; @@ -1306,11 +1305,11 @@ namespace __parallel RandomAccessIterator3 result, BinaryOperation binary_op, random_access_iterator_tag, random_access_iterator_tag, random_access_iterator_tag, - __gnu_parallel::parallelism parallelism_tag + __gnu_parallel::_Parallelism parallelism_tag = __gnu_parallel::parallel_balanced) { if (_GLIBCXX_PARALLEL_CONDITION( - (end1 - begin1) >= __gnu_parallel::Settings::transform_minimal_n + (end1 - begin1) >= __gnu_parallel::_Settings::get().transform_minimal_n && __gnu_parallel::is_parallel(parallelism_tag))) { bool dummy = true; @@ -1352,7 +1351,7 @@ namespace __parallel transform(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, OutputIterator result, BinaryOperation binary_op, - __gnu_parallel::parallelism parallelism_tag) + __gnu_parallel::_Parallelism parallelism_tag) { typedef std::iterator_traits<InputIterator1> iteratori1_traits; typedef typename iteratori1_traits::iterator_category @@ -1410,7 +1409,7 @@ namespace __parallel replace_switch(RandomAccessIterator begin, RandomAccessIterator end, const T& old_value, const T& new_value, random_access_iterator_tag, - __gnu_parallel::parallelism parallelism_tag + __gnu_parallel::_Parallelism parallelism_tag = __gnu_parallel::parallel_balanced) { // XXX parallel version is where? @@ -1422,7 +1421,7 @@ namespace __parallel template<typename ForwardIterator, typename T> inline void replace(ForwardIterator begin, ForwardIterator end, const T& old_value, - const T& new_value, __gnu_parallel::parallelism parallelism_tag) + const T& new_value, __gnu_parallel::_Parallelism parallelism_tag) { typedef iterator_traits<ForwardIterator> traits_type; typedef typename traits_type::iterator_category iterator_category; @@ -1463,12 +1462,12 @@ namespace __parallel replace_if_switch(RandomAccessIterator begin, RandomAccessIterator end, Predicate pred, const T& new_value, random_access_iterator_tag, - __gnu_parallel::parallelism parallelism_tag + __gnu_parallel::_Parallelism parallelism_tag = __gnu_parallel::parallel_balanced) { if (_GLIBCXX_PARALLEL_CONDITION( static_cast<__gnu_parallel::sequence_index_t>(end - begin) - >= __gnu_parallel::Settings::replace_minimal_n + >= __gnu_parallel::_Settings::get().replace_minimal_n && __gnu_parallel::is_parallel(parallelism_tag))) { bool dummy; @@ -1491,7 +1490,7 @@ namespace __parallel inline void replace_if(ForwardIterator begin, ForwardIterator end, Predicate pred, const T& new_value, - __gnu_parallel::parallelism parallelism_tag) + __gnu_parallel::_Parallelism parallelism_tag) { typedef std::iterator_traits<ForwardIterator> iterator_traits; typedef typename iterator_traits::iterator_category iterator_category; @@ -1528,12 +1527,12 @@ namespace __parallel void generate_switch(RandomAccessIterator begin, RandomAccessIterator end, Generator gen, random_access_iterator_tag, - __gnu_parallel::parallelism parallelism_tag + __gnu_parallel::_Parallelism parallelism_tag = __gnu_parallel::parallel_balanced) { if (_GLIBCXX_PARALLEL_CONDITION( static_cast<__gnu_parallel::sequence_index_t>(end - begin) - >= __gnu_parallel::Settings::generate_minimal_n + >= __gnu_parallel::_Settings::get().generate_minimal_n && __gnu_parallel::is_parallel(parallelism_tag))) { bool dummy; @@ -1552,7 +1551,7 @@ namespace __parallel template<typename ForwardIterator, typename Generator> inline void generate(ForwardIterator begin, ForwardIterator end, - Generator gen, __gnu_parallel::parallelism parallelism_tag) + Generator gen, __gnu_parallel::_Parallelism parallelism_tag) { typedef std::iterator_traits<ForwardIterator> iterator_traits; typedef typename iterator_traits::iterator_category iterator_category; @@ -1588,7 +1587,7 @@ namespace __parallel inline RandomAccessIterator generate_n_switch(RandomAccessIterator begin, Size n, Generator gen, random_access_iterator_tag, - __gnu_parallel::parallelism parallelism_tag + __gnu_parallel::_Parallelism parallelism_tag = __gnu_parallel::parallel_balanced) { // XXX parallel version is where? @@ -1599,7 +1598,7 @@ namespace __parallel template<typename OutputIterator, typename Size, typename Generator> inline OutputIterator generate_n(OutputIterator begin, Size n, Generator gen, - __gnu_parallel::parallelism parallelism_tag) + __gnu_parallel::_Parallelism parallelism_tag) { typedef std::iterator_traits<OutputIterator> iterator_traits; typedef typename iterator_traits::iterator_category iterator_category; @@ -1661,7 +1660,7 @@ namespace __parallel return; if (_GLIBCXX_PARALLEL_CONDITION( static_cast<__gnu_parallel::sequence_index_t>(end - begin) - >= __gnu_parallel::Settings::random_shuffle_minimal_n)) + >= __gnu_parallel::_Settings::get().random_shuffle_minimal_n)) __gnu_parallel::parallel_random_shuffle(begin, end, rand); else __gnu_parallel::sequential_random_shuffle(begin, end, rand); @@ -1689,7 +1688,7 @@ namespace __parallel { if (_GLIBCXX_PARALLEL_CONDITION( static_cast<__gnu_parallel::sequence_index_t>(end - begin) - >= __gnu_parallel::Settings::partition_minimal_n)) + >= __gnu_parallel::_Settings::get().partition_minimal_n)) { typedef typename std::iterator_traits<RandomAccessIterator>:: difference_type difference_type; @@ -1748,7 +1747,7 @@ namespace __parallel { if (_GLIBCXX_PARALLEL_CONDITION( static_cast<__gnu_parallel::sequence_index_t>(end - begin) - >= __gnu_parallel::Settings::sort_minimal_n)) + >= __gnu_parallel::_Settings::get().sort_minimal_n)) __gnu_parallel::parallel_sort(begin, end, comp, false); else sort(begin, end, comp, __gnu_parallel::sequential_tag()); @@ -1788,7 +1787,7 @@ namespace __parallel { if (_GLIBCXX_PARALLEL_CONDITION( static_cast<__gnu_parallel::sequence_index_t>(end - begin) - >= __gnu_parallel::Settings::sort_minimal_n)) + >= __gnu_parallel::_Settings::get().sort_minimal_n)) __gnu_parallel::parallel_sort(begin, end, comp, true); else stable_sort(begin, end, comp, __gnu_parallel::sequential_tag()); @@ -1837,9 +1836,9 @@ namespace __parallel { if (_GLIBCXX_PARALLEL_CONDITION( (static_cast<__gnu_parallel::sequence_index_t>(end1 - begin1) - >= __gnu_parallel::Settings::merge_minimal_n + >= __gnu_parallel::_Settings::get().merge_minimal_n || static_cast<__gnu_parallel::sequence_index_t>(end2 - begin2) - >= __gnu_parallel::Settings::merge_minimal_n))) + >= __gnu_parallel::_Settings::get().merge_minimal_n))) return __gnu_parallel::parallel_merge_advance(begin1, end1, begin2, end2, result, (end1 - begin1) @@ -1913,7 +1912,7 @@ namespace __parallel { if (_GLIBCXX_PARALLEL_CONDITION( static_cast<__gnu_parallel::sequence_index_t>(end - begin) - >= __gnu_parallel::Settings::nth_element_minimal_n)) + >= __gnu_parallel::_Settings::get().nth_element_minimal_n)) __gnu_parallel::parallel_nth_element(begin, nth, end, comp); else nth_element(begin, nth, end, comp, __gnu_parallel::sequential_tag()); @@ -1953,7 +1952,7 @@ namespace __parallel { if (_GLIBCXX_PARALLEL_CONDITION( static_cast<__gnu_parallel::sequence_index_t>(end - begin) - >= __gnu_parallel::Settings::partial_sort_minimal_n)) + >= __gnu_parallel::_Settings::get().partial_sort_minimal_n)) __gnu_parallel::parallel_partial_sort(begin, middle, end, comp); else partial_sort(begin, middle, end, comp, @@ -1997,12 +1996,12 @@ namespace __parallel RandomAccessIterator max_element_switch(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp, random_access_iterator_tag, - __gnu_parallel::parallelism parallelism_tag + __gnu_parallel::_Parallelism parallelism_tag = __gnu_parallel::parallel_balanced) { if (_GLIBCXX_PARALLEL_CONDITION( static_cast<__gnu_parallel::sequence_index_t>(end - begin) - >= __gnu_parallel::Settings::max_element_minimal_n + >= __gnu_parallel::_Settings::get().max_element_minimal_n && __gnu_parallel::is_parallel(parallelism_tag))) { RandomAccessIterator res(begin); @@ -2026,7 +2025,7 @@ namespace __parallel template<typename ForwardIterator> inline ForwardIterator max_element(ForwardIterator begin, ForwardIterator end, - __gnu_parallel::parallelism parallelism_tag) + __gnu_parallel::_Parallelism parallelism_tag) { typedef typename iterator_traits<ForwardIterator>::value_type value_type; return max_element(begin, end, std::less<value_type>(), parallelism_tag); @@ -2044,7 +2043,7 @@ namespace __parallel template<typename ForwardIterator, typename Comparator> inline ForwardIterator max_element(ForwardIterator begin, ForwardIterator end, Comparator comp, - __gnu_parallel::parallelism parallelism_tag) + __gnu_parallel::_Parallelism parallelism_tag) { typedef iterator_traits<ForwardIterator> traits_type; typedef typename traits_type::iterator_category iterator_category; @@ -2088,12 +2087,12 @@ namespace __parallel RandomAccessIterator min_element_switch(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp, random_access_iterator_tag, - __gnu_parallel::parallelism parallelism_tag + __gnu_parallel::_Parallelism parallelism_tag = __gnu_parallel::parallel_balanced) { if (_GLIBCXX_PARALLEL_CONDITION( static_cast<__gnu_parallel::sequence_index_t>(end - begin) - >= __gnu_parallel::Settings::min_element_minimal_n + >= __gnu_parallel::_Settings::get().min_element_minimal_n && __gnu_parallel::is_parallel(parallelism_tag))) { RandomAccessIterator res(begin); @@ -2117,7 +2116,7 @@ namespace __parallel template<typename ForwardIterator> inline ForwardIterator min_element(ForwardIterator begin, ForwardIterator end, - __gnu_parallel::parallelism parallelism_tag) + __gnu_parallel::_Parallelism parallelism_tag) { typedef typename iterator_traits<ForwardIterator>::value_type value_type; return min_element(begin, end, std::less<value_type>(), parallelism_tag); @@ -2135,7 +2134,7 @@ namespace __parallel template<typename ForwardIterator, typename Comparator> inline ForwardIterator min_element(ForwardIterator begin, ForwardIterator end, Comparator comp, - __gnu_parallel::parallelism parallelism_tag) + __gnu_parallel::_Parallelism parallelism_tag) { typedef iterator_traits<ForwardIterator> traits_type; typedef typename traits_type::iterator_category iterator_category; diff --git a/libstdc++-v3/include/parallel/algorithmfwd.h b/libstdc++-v3/include/parallel/algorithmfwd.h index f1aa5d71908..ea6b245eb7e 100644 --- a/libstdc++-v3/include/parallel/algorithmfwd.h +++ b/libstdc++-v3/include/parallel/algorithmfwd.h @@ -90,7 +90,7 @@ namespace __parallel template<typename _IIter, typename _Tp> typename iterator_traits<_IIter>::difference_type - count(_IIter, _IIter, const _Tp&, __gnu_parallel::parallelism); + count(_IIter, _IIter, const _Tp&, __gnu_parallel::_Parallelism); template<typename _IIter, typename _Tp, typename _IterTag> typename iterator_traits<_IIter>::difference_type @@ -99,7 +99,7 @@ namespace __parallel template<typename _RAIter, typename _Tp> typename iterator_traits<_RAIter>::difference_type count_switch(_RAIter, _RAIter, const _Tp&, random_access_iterator_tag, - __gnu_parallel::parallelism); + __gnu_parallel::_Parallelism); template<typename _IIter, typename _Predicate> @@ -112,7 +112,7 @@ namespace __parallel template<typename _IIter, typename _Predicate> typename iterator_traits<_IIter>::difference_type - count_if(_IIter, _IIter, _Predicate, __gnu_parallel::parallelism); + count_if(_IIter, _IIter, _Predicate, __gnu_parallel::_Parallelism); template<typename _IIter, typename _Predicate, typename _IterTag> typename iterator_traits<_IIter>::difference_type @@ -121,7 +121,7 @@ namespace __parallel template<typename _RAIter, typename _Predicate> typename iterator_traits<_RAIter>::difference_type count_if_switch(_RAIter, _RAIter, _Predicate, random_access_iterator_tag, - __gnu_parallel::parallelism); + __gnu_parallel::_Parallelism); // algobase.h template<typename _IIter1, typename _IIter2> @@ -219,7 +219,7 @@ namespace __parallel template<typename _Iterator, typename _Function> _Function - for_each(_Iterator, _Iterator, _Function, __gnu_parallel::parallelism); + for_each(_Iterator, _Iterator, _Function, __gnu_parallel::_Parallelism); template<typename _IIter, typename _Function, typename _IterTag> _Function @@ -228,7 +228,7 @@ namespace __parallel template<typename _RAIter, typename _Function> _Function for_each_switch(_RAIter, _RAIter, _Function, random_access_iterator_tag, - __gnu_parallel::parallelism); + __gnu_parallel::_Parallelism); template<typename _FIter, typename _Generator> @@ -241,7 +241,7 @@ namespace __parallel template<typename _FIter, typename _Generator> void - generate(_FIter, _FIter, _Generator, __gnu_parallel::parallelism); + generate(_FIter, _FIter, _Generator, __gnu_parallel::_Parallelism); template<typename _FIter, typename _Generator, typename _IterTag> void @@ -250,7 +250,7 @@ namespace __parallel template<typename _RAIter, typename _Generator> void generate_switch(_RAIter, _RAIter, _Generator, random_access_iterator_tag, - __gnu_parallel::parallelism); + __gnu_parallel::_Parallelism); template<typename _OIter, typename _Size, typename _Generator> _OIter @@ -262,7 +262,7 @@ namespace __parallel template<typename _OIter, typename _Size, typename _Generator> _OIter - generate_n(_OIter, _Size, _Generator, __gnu_parallel::parallelism); + generate_n(_OIter, _Size, _Generator, __gnu_parallel::_Parallelism); template<typename _OIter, typename _Size, typename _Generator, typename _IterTag> @@ -272,7 +272,7 @@ namespace __parallel template<typename _RAIter, typename _Size, typename _Generator> _RAIter generate_n_switch(_RAIter, _Size, _Generator, random_access_iterator_tag, - __gnu_parallel::parallelism); + __gnu_parallel::_Parallelism); template<typename _IIter1, typename _IIter2> bool @@ -416,7 +416,7 @@ namespace __parallel template<typename _IIter, typename _OIter, typename UnaryOperation> _OIter transform(_IIter, _IIter, _OIter, UnaryOperation, - __gnu_parallel::parallelism); + __gnu_parallel::_Parallelism); template<typename _IIter, typename _OIter, typename UnaryOperation, typename _IterTag1, typename _IterTag2> @@ -429,7 +429,7 @@ namespace __parallel _RAOIter transform1_switch(_RAIIter, _RAIIter, _RAOIter, UnaryOperation, random_access_iterator_tag, random_access_iterator_tag, - __gnu_parallel::parallelism); + __gnu_parallel::_Parallelism); template<typename _IIter1, typename _IIter2, typename _OIter, @@ -447,7 +447,7 @@ namespace __parallel typename _BiOperation> _OIter transform(_IIter1, _IIter1, _IIter2, _OIter, _BiOperation, - __gnu_parallel::parallelism); + __gnu_parallel::_Parallelism); template<typename _RAIter1, typename _RAIter2, typename _RAIter3, typename _BiOperation> @@ -455,7 +455,7 @@ namespace __parallel transform2_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter3, _BiOperation, random_access_iterator_tag, random_access_iterator_tag, random_access_iterator_tag, - __gnu_parallel::parallelism); + __gnu_parallel::_Parallelism); template<typename _IIter1, typename _IIter2, typename _OIter, typename _BiOperation, typename _Tag1, @@ -477,7 +477,7 @@ namespace __parallel template<typename _FIter, typename _Tp> void replace(_FIter, _FIter, const _Tp&, const _Tp&, - __gnu_parallel::parallelism); + __gnu_parallel::_Parallelism); template<typename _FIter, typename _Tp, typename _IterTag> void @@ -486,7 +486,7 @@ namespace __parallel template<typename _RAIter, typename _Tp> void replace_switch(_RAIter, _RAIter, const _Tp&, const _Tp&, - random_access_iterator_tag, __gnu_parallel::parallelism); + random_access_iterator_tag, __gnu_parallel::_Parallelism); template<typename _FIter, typename _Predicate, typename _Tp> @@ -501,7 +501,7 @@ namespace __parallel template<typename _FIter, typename _Predicate, typename _Tp> void replace_if(_FIter, _FIter, _Predicate, const _Tp&, - __gnu_parallel::parallelism); + __gnu_parallel::_Parallelism); template<typename _FIter, typename _Predicate, typename _Tp, typename _IterTag> @@ -512,7 +512,7 @@ namespace __parallel void replace_if_switch(_RAIter, _RAIter, _Predicate, const _Tp&, random_access_iterator_tag, - __gnu_parallel::parallelism); + __gnu_parallel::_Parallelism); template<typename _FIter> @@ -525,7 +525,7 @@ namespace __parallel template<typename _FIter> _FIter - max_element(_FIter, _FIter, __gnu_parallel::parallelism); + max_element(_FIter, _FIter, __gnu_parallel::_Parallelism); template<typename _FIter, typename _Compare> _FIter @@ -537,7 +537,7 @@ namespace __parallel template<typename _FIter, typename _Compare> _FIter - max_element(_FIter, _FIter, _Compare, __gnu_parallel::parallelism); + max_element(_FIter, _FIter, _Compare, __gnu_parallel::_Parallelism); template<typename _FIter, typename _Compare, typename _IterTag> _FIter @@ -546,7 +546,7 @@ namespace __parallel template<typename _RAIter, typename _Compare> _RAIter max_element_switch(_RAIter, _RAIter, _Compare, random_access_iterator_tag, - __gnu_parallel::parallelism); + __gnu_parallel::_Parallelism); template<typename _IIter1, typename _IIter2, typename _OIter> @@ -594,7 +594,7 @@ namespace __parallel template<typename _FIter> _FIter - min_element(_FIter, _FIter, __gnu_parallel::parallelism parallelism_tag); + min_element(_FIter, _FIter, __gnu_parallel::_Parallelism parallelism_tag); template<typename _FIter, typename _Compare> _FIter @@ -606,7 +606,7 @@ namespace __parallel template<typename _FIter, typename _Compare> _FIter - min_element(_FIter, _FIter, _Compare, __gnu_parallel::parallelism); + min_element(_FIter, _FIter, _Compare, __gnu_parallel::_Parallelism); template<typename _FIter, typename _Compare, typename _IterTag> _FIter @@ -615,7 +615,7 @@ namespace __parallel template<typename _RAIter, typename _Compare> _RAIter min_element_switch(_RAIter, _RAIter, _Compare, random_access_iterator_tag, - __gnu_parallel::parallelism); + __gnu_parallel::_Parallelism); template<typename _RAIter> void diff --git a/libstdc++-v3/include/parallel/balanced_quicksort.h b/libstdc++-v3/include/parallel/balanced_quicksort.h index 182c82df602..f6b3297cb45 100644 --- a/libstdc++-v3/include/parallel/balanced_quicksort.h +++ b/libstdc++-v3/include/parallel/balanced_quicksort.h @@ -252,7 +252,7 @@ template<typename RandomAccessIterator, typename Comparator> QSBThreadLocal<RandomAccessIterator>& tl = *tls[iam]; - difference_type base_case_n = Settings::sort_qsb_base_case_maximal_n; + difference_type base_case_n = _Settings::get().sort_qsb_base_case_maximal_n; if (base_case_n < 2) base_case_n = 2; thread_index_t num_threads = tl.num_threads; diff --git a/libstdc++-v3/include/parallel/base.h b/libstdc++-v3/include/parallel/base.h index de231698cc6..2060d817e0c 100644 --- a/libstdc++-v3/include/parallel/base.h +++ b/libstdc++-v3/include/parallel/base.h @@ -38,11 +38,12 @@ #ifndef _GLIBCXX_PARALLEL_BASE_H #define _GLIBCXX_PARALLEL_BASE_H 1 -#include <parallel/features.h> +#include <cstdio> #include <functional> +#include <omp.h> +#include <parallel/features.h> #include <parallel/basic_iterator.h> #include <parallel/parallel.h> -#include <cstdio> // Parallel mode namespaces. @@ -67,6 +68,7 @@ namespace __gnu_parallel */ namespace __gnu_sequential { + // Import whatever is the serial version. #ifdef _GLIBCXX_PARALLEL using namespace std::__norm; #else @@ -77,6 +79,22 @@ namespace __gnu_sequential namespace __gnu_parallel { + // NB: Including this file cannot produce (unresolved) symbols from + // the OpenMP runtime unless the parallel mode is actually invoked + // and active, which imples that the OpenMP runtime is actually + // going to be linked in. + inline int + get_max_threads() + { + int __i = omp_get_max_threads(); + return __i > 1 ? __i : 1; + } + + + inline bool + is_parallel(const _Parallelism __p) { return __p != sequential; } + + // XXX remove std::duplicates from here if possible, // XXX but keep minimal dependencies. @@ -175,11 +193,8 @@ template<typename _Predicate, typename argument_type> /** @brief Similar to std::binder1st, * but giving the argument types explicitly. */ -template< - typename _Operation, - typename first_argument_type, - typename second_argument_type, - typename result_type> +template<typename _Operation, typename first_argument_type, + typename second_argument_type, typename result_type> class binder1st : public std::unary_function<second_argument_type, result_type> { @@ -207,11 +222,8 @@ template< * @brief Similar to std::binder2nd, but giving the argument types * explicitly. */ -template< - typename _Operation, - typename first_argument_type, - typename second_argument_type, - typename result_type> +template<typename _Operation, typename first_argument_type, + typename second_argument_type, typename result_type> class binder2nd : public std::unary_function<first_argument_type, result_type> { diff --git a/libstdc++-v3/include/parallel/equally_split.h b/libstdc++-v3/include/parallel/equally_split.h index 4c4167e3d25..37e45816727 100644 --- a/libstdc++-v3/include/parallel/equally_split.h +++ b/libstdc++-v3/include/parallel/equally_split.h @@ -51,13 +51,11 @@ namespace __gnu_parallel * @returns End of splitter sequence, i. e. @c s+num_threads+1 */ template<typename difference_type, typename OutputIterator> OutputIterator - equally_split(difference_type n, - thread_index_t num_threads, - OutputIterator s) + equally_split(difference_type n, thread_index_t num_threads, OutputIterator s) { - difference_type chunk_length = n / num_threads, - num_longer_chunks = n % num_threads, - pos = 0; + difference_type chunk_length = n / num_threads; + difference_type num_longer_chunks = n % num_threads; + difference_type pos = 0; for (thread_index_t i = 0; i < num_threads; ++i) { *s++ = pos; @@ -75,17 +73,16 @@ template<typename difference_type, typename OutputIterator> * thread number thread_no+1 (excluded). * @param n Number of elements * @param num_threads Number of parts - * @returns Splitting point */ + * @returns _SplittingAlgorithm point */ template<typename difference_type> difference_type equally_split_point(difference_type n, thread_index_t num_threads, thread_index_t thread_no) { - difference_type chunk_length = n / num_threads, - num_longer_chunks = n % num_threads; - - if(thread_no < num_longer_chunks) + difference_type chunk_length = n / num_threads; + difference_type num_longer_chunks = n % num_threads; + if (thread_no < num_longer_chunks) return thread_no * (chunk_length + 1); else return num_longer_chunks * (chunk_length + 1) diff --git a/libstdc++-v3/include/parallel/features.h b/libstdc++-v3/include/parallel/features.h index a78c16b8886..2e09980405e 100644 --- a/libstdc++-v3/include/parallel/features.h +++ b/libstdc++-v3/include/parallel/features.h @@ -43,21 +43,21 @@ #ifndef _GLIBCXX_MERGESORT /** @def _GLIBCXX_MERGESORT * @brief Include parallel multi-way mergesort. - * @see __gnu_parallel::Settings::sort_algorithm */ + * @see __gnu_parallel::_Settings::sort_algorithm */ #define _GLIBCXX_MERGESORT 1 #endif #ifndef _GLIBCXX_QUICKSORT /** @def _GLIBCXX_QUICKSORT * @brief Include parallel unbalanced quicksort. - * @see __gnu_parallel::Settings::sort_algorithm */ + * @see __gnu_parallel::_Settings::sort_algorithm */ #define _GLIBCXX_QUICKSORT 1 #endif #ifndef _GLIBCXX_BAL_QUICKSORT /** @def _GLIBCXX_BAL_QUICKSORT * @brief Include parallel dynamically load-balanced quicksort. - * @see __gnu_parallel::Settings::sort_algorithm */ + * @see __gnu_parallel::_Settings::sort_algorithm */ #define _GLIBCXX_BAL_QUICKSORT 1 #endif @@ -65,7 +65,7 @@ /** @def _GLIBCXX_LOSER_TREE * @brief Include guarded (sequences may run empty) loser tree, * moving objects. - * @see __gnu_parallel::Settings multiway_merge_algorithm */ + * @see __gnu_parallel::_Settings multiway_merge_algorithm */ #define _GLIBCXX_LOSER_TREE 1 #endif @@ -73,21 +73,21 @@ /** @def _GLIBCXX_LOSER_TREE_EXPLICIT * @brief Include standard loser tree, storing two flags for infimum * and supremum. - * @see __gnu_parallel::Settings multiway_merge_algorithm */ + * @see __gnu_parallel::_Settings multiway_merge_algorithm */ #define _GLIBCXX_LOSER_TREE_EXPLICIT 0 #endif #ifndef _GLIBCXX_LOSER_TREE_REFERENCE /** @def _GLIBCXX_LOSER_TREE_REFERENCE * @brief Include some loser tree variant. - * @see __gnu_parallel::Settings multiway_merge_algorithm */ + * @see __gnu_parallel::_Settings multiway_merge_algorithm */ #define _GLIBCXX_LOSER_TREE_REFERENCE 0 #endif #ifndef _GLIBCXX_LOSER_TREE_POINTER /** @def _GLIBCXX_LOSER_TREE_POINTER * @brief Include some loser tree variant. - * @see __gnu_parallel::Settings multiway_merge_algorithm */ + * @see __gnu_parallel::_Settings multiway_merge_algorithm */ #define _GLIBCXX_LOSER_TREE_POINTER 1 #endif @@ -95,48 +95,48 @@ /** @def _GLIBCXX_LOSER_TREE_UNGUARDED * @brief Include unguarded (sequences must not run empty) loser * tree, moving objects. - * @see __gnu_parallel::Settings multiway_merge_algorithm */ + * @see __gnu_parallel::_Settings multiway_merge_algorithm */ #define _GLIBCXX_LOSER_TREE_UNGUARDED 0 #endif #ifndef _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED /** @def _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED * @brief Include some loser tree variant. - * @see __gnu_parallel::Settings multiway_merge_algorithm */ + * @see __gnu_parallel::_Settings multiway_merge_algorithm */ #define _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED 1 #endif #ifndef _GLIBCXX_LOSER_TREE_COMBINED /** @def _GLIBCXX_LOSER_TREE_COMBINED * @brief Include some loser tree variant. - * @see __gnu_parallel::Settings multiway_merge_algorithm */ + * @see __gnu_parallel::_Settings multiway_merge_algorithm */ #define _GLIBCXX_LOSER_TREE_COMBINED 0 #endif #ifndef _GLIBCXX_LOSER_TREE_SENTINEL /** @def _GLIBCXX_LOSER_TREE_SENTINEL * @brief Include some loser tree variant. - * @see __gnu_parallel::Settings multiway_merge_algorithm */ + * @see __gnu_parallel::_Settings multiway_merge_algorithm */ #define _GLIBCXX_LOSER_TREE_SENTINEL 0 #endif #ifndef _GLIBCXX_FIND_GROWING_BLOCKS /** @brief Include the growing blocks variant for std::find. - * @see __gnu_parallel::Settings::find_distribution */ + * @see __gnu_parallel::_Settings::find_algorithm */ #define _GLIBCXX_FIND_GROWING_BLOCKS 1 #endif #ifndef _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS /** @brief Include the equal-sized blocks variant for std::find. - * @see __gnu_parallel::Settings::find_distribution */ + * @see __gnu_parallel::_Settings::find_algorithm */ #define _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS 1 #endif #ifndef _GLIBCXX_FIND_EQUAL_SPLIT /** @def _GLIBCXX_FIND_EQUAL_SPLIT * @brief Include the equal splitting variant for std::find. - * @see __gnu_parallel::Settings::find_distribution */ + * @see __gnu_parallel::_Settings::find_algorithm */ #define _GLIBCXX_FIND_EQUAL_SPLIT 1 #endif diff --git a/libstdc++-v3/include/parallel/find.h b/libstdc++-v3/include/parallel/find.h index e31638a0ee6..3e0084f68ab 100644 --- a/libstdc++-v3/include/parallel/find.h +++ b/libstdc++-v3/include/parallel/find.h @@ -66,15 +66,15 @@ template<typename RandomAccessIterator1, find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, RandomAccessIterator2 begin2, Pred pred, Selector selector) { - switch (Settings::find_distribution) + switch (_Settings::get().find_algorithm) { - case Settings::GROWING_BLOCKS: + case GROWING_BLOCKS: return find_template(begin1, end1, begin2, pred, selector, growing_blocks_tag()); - case Settings::CONSTANT_SIZE_BLOCKS: + case CONSTANT_SIZE_BLOCKS: return find_template(begin1, end1, begin2, pred, selector, constant_size_blocks_tag()); - case Settings::EQUAL_SPLIT: + case EQUAL_SPLIT: return find_template(begin1, end1, begin2, pred, selector, equal_split_tag()); default: @@ -176,10 +176,10 @@ template<typename RandomAccessIterator1, * @param pred Find predicate. * @param selector Functionality (e. g. std::find_if (), std::equal(),...) * @return Place of finding in both sequences. - * @see __gnu_parallel::Settings::find_sequential_search_size - * @see __gnu_parallel::Settings::find_initial_block_size - * @see __gnu_parallel::Settings::find_maximum_block_size - * @see __gnu_parallel::Settings::find_increasing_factor + * @see __gnu_parallel::_Settings::find_sequential_search_size + * @see __gnu_parallel::_Settings::find_initial_block_size + * @see __gnu_parallel::_Settings::find_maximum_block_size + * @see __gnu_parallel::_Settings::find_increasing_factor * * There are two main differences between the growing blocks and * the constant-size blocks variants. @@ -204,10 +204,12 @@ template<typename RandomAccessIterator1, typedef typename traits_type::difference_type difference_type; typedef typename traits_type::value_type value_type; + const _Settings& __s = _Settings::get(); + difference_type length = end1 - begin1; difference_type sequential_search_size = - std::min<difference_type>(length, Settings::find_sequential_search_size); + std::min<difference_type>(length, __s.find_sequential_search_size); // Try it sequentially first. std::pair<RandomAccessIterator1, RandomAccessIterator2> find_seq_result = @@ -233,7 +235,7 @@ template<typename RandomAccessIterator1, // Not within first k elements -> start parallel. thread_index_t iam = omp_get_thread_num(); - difference_type block_size = Settings::find_initial_block_size; + difference_type block_size = __s.find_initial_block_size; difference_type start = fetch_and_add<difference_type>(&next_block_start, block_size); @@ -269,9 +271,8 @@ template<typename RandomAccessIterator1, } block_size = - std::min<difference_type>(block_size - * Settings::find_increasing_factor, - Settings::find_maximum_block_size); + std::min<difference_type>(block_size * __s.find_increasing_factor, + __s.find_maximum_block_size); // Get new block, update pointer to next block. start = @@ -302,8 +303,8 @@ template<typename RandomAccessIterator1, * @param pred Find predicate. * @param selector Functionality (e. g. std::find_if (), std::equal(),...) * @return Place of finding in both sequences. - * @see __gnu_parallel::Settings::find_sequential_search_size - * @see __gnu_parallel::Settings::find_block_size + * @see __gnu_parallel::_Settings::find_sequential_search_size + * @see __gnu_parallel::_Settings::find_block_size * There are two main differences between the growing blocks and the * constant-size blocks variants. * 1. For GB, the block size grows; for CSB, the block size is fixed. @@ -325,10 +326,12 @@ template<typename RandomAccessIterator1, typedef typename traits_type::difference_type difference_type; typedef typename traits_type::value_type value_type; + const _Settings& __s = _Settings::get(); + difference_type length = end1 - begin1; difference_type sequential_search_size = std::min<difference_type>( - length, Settings::find_sequential_search_size); + length, __s.find_sequential_search_size); // Try it sequentially first. std::pair<RandomAccessIterator1, RandomAccessIterator2> find_seq_result = @@ -351,7 +354,7 @@ template<typename RandomAccessIterator1, num_threads = omp_get_num_threads(); thread_index_t iam = omp_get_thread_num(); - difference_type block_size = Settings::find_initial_block_size; + difference_type block_size = __s.find_initial_block_size; // First element of thread's current iteration. difference_type iteration_start = sequential_search_size; diff --git a/libstdc++-v3/include/parallel/for_each.h b/libstdc++-v3/include/parallel/for_each.h index c5b15794823..8a4d702d2d5 100644 --- a/libstdc++-v3/include/parallel/for_each.h +++ b/libstdc++-v3/include/parallel/for_each.h @@ -71,7 +71,7 @@ namespace __gnu_parallel Result& output, typename std::iterator_traits<InputIterator>:: difference_type bound, - parallelism parallelism_tag) + _Parallelism parallelism_tag) { if (parallelism_tag == parallel_unbalanced) return for_each_template_random_access_ed(begin, end, user_op, diff --git a/libstdc++-v3/include/parallel/multiway_merge.h b/libstdc++-v3/include/parallel/multiway_merge.h index 419e96a3b5e..6cc724b6015 100644 --- a/libstdc++-v3/include/parallel/multiway_merge.h +++ b/libstdc++-v3/include/parallel/multiway_merge.h @@ -1359,11 +1359,10 @@ template<typename RandomAccessIteratorIterator, RandomAccessIterator3 return_target = target; int k = static_cast<int>(seqs_end - seqs_begin); - Settings::MultiwayMergeAlgorithm mwma = - Settings::multiway_merge_algorithm; + _MultiwayMergeAlgorithm mwma = _Settings::get().multiway_merge_algorithm; - if (!sentinel && mwma == Settings::LOSER_TREE_SENTINEL) - mwma = Settings::LOSER_TREE_COMBINED; + if (!sentinel && mwma == LOSER_TREE_SENTINEL) + mwma = LOSER_TREE_COMBINED; switch (k) { @@ -1385,14 +1384,14 @@ template<typename RandomAccessIteratorIterator, case 3: switch (mwma) { - case Settings::LOSER_TREE_COMBINED: + case LOSER_TREE_COMBINED: return_target = multiway_merge_3_combined(seqs_begin, seqs_end, target, comp, length, stable); break; - case Settings::LOSER_TREE_SENTINEL: + case LOSER_TREE_SENTINEL: return_target = multiway_merge_3_variant<unguarded_iterator>(seqs_begin, seqs_end, @@ -1413,13 +1412,13 @@ template<typename RandomAccessIteratorIterator, case 4: switch (mwma) { - case Settings::LOSER_TREE_COMBINED: + case LOSER_TREE_COMBINED: return_target = multiway_merge_4_combined(seqs_begin, seqs_end, target, comp, length, stable); break; - case Settings::LOSER_TREE_SENTINEL: + case LOSER_TREE_SENTINEL: return_target = multiway_merge_4_variant<unguarded_iterator>(seqs_begin, seqs_end, @@ -1440,14 +1439,14 @@ template<typename RandomAccessIteratorIterator, { switch (mwma) { - case Settings::BUBBLE: + case BUBBLE: return_target = multiway_merge_bubble(seqs_begin, seqs_end, target, comp, length, stable); break; #if _GLIBCXX_LOSER_TREE_EXPLICIT - case Settings::LOSER_TREE_EXPLICIT: + case LOSER_TREE_EXPLICIT: return_target = multiway_merge_loser_tree< LoserTreeExplicit<value_type, Comparator> >(seqs_begin, seqs_end, @@ -1457,7 +1456,7 @@ template<typename RandomAccessIteratorIterator, break; #endif #if _GLIBCXX_LOSER_TREE - case Settings::LOSER_TREE: + case LOSER_TREE: return_target = multiway_merge_loser_tree< LoserTree<value_type, Comparator> >(seqs_begin, seqs_end, @@ -1467,7 +1466,7 @@ template<typename RandomAccessIteratorIterator, break; #endif #if _GLIBCXX_LOSER_TREE_COMBINED - case Settings::LOSER_TREE_COMBINED: + case LOSER_TREE_COMBINED: return_target = multiway_merge_loser_tree_combined(seqs_begin, seqs_end, target, @@ -1476,7 +1475,7 @@ template<typename RandomAccessIteratorIterator, break; #endif #if _GLIBCXX_LOSER_TREE_SENTINEL - case Settings::LOSER_TREE_SENTINEL: + case LOSER_TREE_SENTINEL: return_target = multiway_merge_loser_tree_sentinel(seqs_begin, seqs_end, target, @@ -1550,6 +1549,7 @@ template<typename RandomAccessIteratorIterator, thread_index_t num_threads = static_cast<thread_index_t>( std::min<difference_type>(get_max_threads(), total_length)); + const _Settings& __s = _Settings::get(); # pragma omp parallel num_threads (num_threads) { @@ -1562,10 +1562,10 @@ template<typename RandomAccessIteratorIterator, for (int s = 0; s < num_threads; ++s) pieces[s].resize(k); - difference_type num_samples = - Settings::merge_oversampling * num_threads; + difference_type num_samples = __s.merge_oversampling + * num_threads; - if (Settings::multiway_merge_splitting == Settings::SAMPLING) + if (__s.multiway_merge_splitting == SAMPLING) { value_type* samples = static_cast<value_type*>( ::operator new(sizeof(value_type) * k * num_samples)); @@ -1623,7 +1623,7 @@ template<typename RandomAccessIteratorIterator, } else { - // (Settings::multiway_merge_splitting == Settings::EXACT). + // (_Settings::multiway_merge_splitting == _Settings::EXACT). std::vector<RandomAccessIterator1>* offsets = new std::vector<RandomAccessIterator1>[num_threads]; std::vector< @@ -1768,10 +1768,12 @@ template<typename RandomAccessIteratorPairIterator, if (seqs_begin == seqs_end) return target; + const _Settings& __s = _Settings::get(); + RandomAccessIterator3 target_end; if (_GLIBCXX_PARALLEL_CONDITION( - ((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k) - && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n))) + ((seqs_end - seqs_begin) >= __s.multiway_merge_minimal_k) + && ((sequence_index_t)length >= __s.multiway_merge_minimal_n))) target_end = parallel_multiway_merge(seqs_begin, seqs_end, target, comp, static_cast<difference_type>(length), @@ -1813,15 +1815,14 @@ template<typename RandomAccessIteratorPairIterator, _GLIBCXX_CALL(seqs_end - seqs_begin) - if (_GLIBCXX_PARALLEL_CONDITION( - ((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k) - && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n))) - return parallel_multiway_merge( - seqs_begin, seqs_end, - target, comp, static_cast<difference_type>(length), stable, true); + const _Settings& __s = _Settings::get(); + const bool cond1 = seqs_end - seqs_begin >= __s.multiway_merge_minimal_k; + const bool cond2 = sequence_index_t(length) >= __s.multiway_merge_minimal_n; + if (_GLIBCXX_PARALLEL_CONDITION(cond1 && cond2)) + return parallel_multiway_merge(seqs_begin, seqs_end, target, comp, + length, stable, true); else - return multiway_merge(seqs_begin, seqs_end, - target, comp, length, stable, + return multiway_merge(seqs_begin, seqs_end, target, comp, length, stable, true, sequential_tag()); } } diff --git a/libstdc++-v3/include/parallel/multiway_mergesort.h b/libstdc++-v3/include/parallel/multiway_mergesort.h index 923a2a89944..c8ceb2f40b7 100644 --- a/libstdc++-v3/include/parallel/multiway_mergesort.h +++ b/libstdc++-v3/include/parallel/multiway_mergesort.h @@ -130,8 +130,7 @@ template<typename RandomAccessIterator, typename _DifferenceTp> thread_index_t iam = omp_get_thread_num(); - num_samples = - Settings::sort_mwms_oversampling * sd->num_threads - 1; + num_samples = _Settings::get().sort_mwms_oversampling * sd->num_threads - 1; difference_type* es = new difference_type[num_samples + 2]; @@ -194,8 +193,8 @@ template<typename RandomAccessIterator, typename Comparator> // Invariant: locally sorted subsequence in sd->sorting_places[iam], // sd->sorting_places[iam] + length_local. - - if (Settings::sort_splitting == Settings::SAMPLING) + const _Settings& __s = _Settings::get(); + if (__s.sort_splitting == SAMPLING) { difference_type num_samples; determine_samples(sd, num_samples); @@ -237,7 +236,7 @@ template<typename RandomAccessIterator, typename Comparator> sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s]; } } - else if (Settings::sort_splitting == Settings::EXACT) + else if (__s.sort_splitting == EXACT) { # pragma omp barrier @@ -355,6 +354,7 @@ template<typename RandomAccessIterator, typename Comparator> // shared variables PMWMSSortingData<RandomAccessIterator> sd; difference_type* starts; + const _Settings& __s = _Settings::get(); # pragma omp parallel num_threads(num_threads) { @@ -374,10 +374,10 @@ template<typename RandomAccessIterator, typename Comparator> sd.merging_places = new RandomAccessIterator[num_threads]; #endif - if (Settings::sort_splitting == Settings::SAMPLING) + if (__s.sort_splitting == SAMPLING) { unsigned int size = - (Settings::sort_mwms_oversampling * num_threads - 1) + (__s.sort_mwms_oversampling * num_threads - 1) * num_threads; sd.samples = static_cast<value_type*>( ::operator new(size * sizeof(value_type))); @@ -412,7 +412,7 @@ template<typename RandomAccessIterator, typename Comparator> delete[] sd.sorting_places; delete[] sd.merging_places; - if (Settings::sort_splitting == Settings::SAMPLING) + if (__s.sort_splitting == SAMPLING) ::operator delete(sd.samples); delete[] sd.offsets; diff --git a/libstdc++-v3/include/parallel/numeric b/libstdc++-v3/include/parallel/numeric index 27bad63d3e5..1269c5251e4 100644 --- a/libstdc++-v3/include/parallel/numeric +++ b/libstdc++-v3/include/parallel/numeric @@ -91,12 +91,12 @@ namespace __parallel accumulate_switch(_RandomAccessIterator begin, _RandomAccessIterator end, T init, BinaryOperation binary_op, random_access_iterator_tag, - __gnu_parallel::parallelism parallelism_tag + __gnu_parallel::_Parallelism parallelism_tag = __gnu_parallel::parallel_unbalanced) { if (_GLIBCXX_PARALLEL_CONDITION( static_cast<__gnu_parallel::sequence_index_t>(end - begin) - >= __gnu_parallel::Settings::accumulate_minimal_n + >= __gnu_parallel::_Settings::get().accumulate_minimal_n && __gnu_parallel::is_parallel(parallelism_tag))) { T res = init; @@ -121,7 +121,7 @@ namespace __parallel template<typename InputIterator, typename T> inline T accumulate(InputIterator begin, InputIterator end, T init, - __gnu_parallel::parallelism parallelism_tag) + __gnu_parallel::_Parallelism parallelism_tag) { typedef std::iterator_traits<InputIterator> iterator_traits; typedef typename iterator_traits::value_type value_type; @@ -149,7 +149,7 @@ namespace __parallel inline T accumulate(InputIterator begin, InputIterator end, T init, BinaryOperation binary_op, - __gnu_parallel::parallelism parallelism_tag) + __gnu_parallel::_Parallelism parallelism_tag) { typedef iterator_traits<InputIterator> iterator_traits; typedef typename iterator_traits::iterator_category iterator_category; @@ -197,11 +197,11 @@ namespace __parallel BinaryFunction2 binary_op2, random_access_iterator_tag, random_access_iterator_tag, - __gnu_parallel::parallelism parallelism_tag + __gnu_parallel::_Parallelism parallelism_tag = __gnu_parallel::parallel_unbalanced) { if (_GLIBCXX_PARALLEL_CONDITION((last1 - first1) - >= __gnu_parallel::Settings:: + >= __gnu_parallel::_Settings::get(). accumulate_minimal_n && __gnu_parallel:: is_parallel(parallelism_tag))) @@ -241,7 +241,7 @@ namespace __parallel inner_product(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, T init, BinaryFunction1 binary_op1, BinaryFunction2 binary_op2, - __gnu_parallel::parallelism parallelism_tag) + __gnu_parallel::_Parallelism parallelism_tag) { typedef iterator_traits<InputIterator1> traits1_type; typedef typename traits1_type::iterator_category iterator1_category; @@ -276,7 +276,7 @@ namespace __parallel inline T inner_product(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, T init, - __gnu_parallel::parallelism parallelism_tag) + __gnu_parallel::_Parallelism parallelism_tag) { typedef iterator_traits<InputIterator1> traits_type1; typedef typename traits_type1::value_type value_type1; @@ -347,7 +347,7 @@ namespace __parallel { if (_GLIBCXX_PARALLEL_CONDITION( static_cast<__gnu_parallel::sequence_index_t>(end - begin) - >= __gnu_parallel::Settings::partial_sum_minimal_n)) + >= __gnu_parallel::_Settings::get().partial_sum_minimal_n)) return __gnu_parallel::parallel_partial_sum(begin, end, result, bin_op); else @@ -416,12 +416,12 @@ namespace __parallel OutputIterator result, BinaryOperation bin_op, random_access_iterator_tag, random_access_iterator_tag, - __gnu_parallel::parallelism parallelism_tag + __gnu_parallel::_Parallelism parallelism_tag = __gnu_parallel::parallel_balanced) { if (_GLIBCXX_PARALLEL_CONDITION( static_cast<__gnu_parallel::sequence_index_t>(end - begin) - >= __gnu_parallel::Settings::adjacent_difference_minimal_n + >= __gnu_parallel::_Settings::get().adjacent_difference_minimal_n && __gnu_parallel::is_parallel(parallelism_tag))) { bool dummy = true; @@ -448,7 +448,7 @@ namespace __parallel inline OutputIterator adjacent_difference(InputIterator begin, InputIterator end, OutputIterator result, - __gnu_parallel::parallelism parallelism_tag) + __gnu_parallel::_Parallelism parallelism_tag) { typedef iterator_traits<InputIterator> traits_type; typedef typename traits_type::value_type value_type; @@ -471,7 +471,7 @@ namespace __parallel inline OutputIterator adjacent_difference(InputIterator begin, InputIterator end, OutputIterator result, BinaryOperation binary_op, - __gnu_parallel::parallelism parallelism_tag) + __gnu_parallel::_Parallelism parallelism_tag) { typedef iterator_traits<InputIterator> traitsi_type; typedef typename traitsi_type::iterator_category iteratori_category; diff --git a/libstdc++-v3/include/parallel/numericfwd.h b/libstdc++-v3/include/parallel/numericfwd.h index f693a60a3bd..581ceaad6f4 100644 --- a/libstdc++-v3/include/parallel/numericfwd.h +++ b/libstdc++-v3/include/parallel/numericfwd.h @@ -54,7 +54,7 @@ namespace __parallel template<typename _IIter, typename _Tp> _Tp - accumulate(_IIter, _IIter, _Tp, __gnu_parallel::parallelism); + accumulate(_IIter, _IIter, _Tp, __gnu_parallel::_Parallelism); template<typename _IIter, typename _Tp, typename _Tag> _Tp @@ -72,7 +72,7 @@ namespace __parallel template<typename _IIter, typename _Tp, typename _BinaryOper> _Tp accumulate(_IIter, _IIter, _Tp, _BinaryOper, - __gnu_parallel::parallelism); + __gnu_parallel::_Parallelism); template<typename _IIter, typename _Tp, typename _BinaryOper, typename _Tag> @@ -83,7 +83,7 @@ namespace __parallel _Tp accumulate_switch(_RAIter, _RAIter, _Tp, _BinaryOper, random_access_iterator_tag, - __gnu_parallel::parallelism); + __gnu_parallel::_Parallelism); template<typename _IIter, typename _OIter> _OIter @@ -106,12 +106,12 @@ namespace __parallel template<typename _IIter, typename _OIter> _OIter adjacent_difference(_IIter, _IIter, _OIter, - __gnu_parallel::parallelism); + __gnu_parallel::_Parallelism); template<typename _IIter, typename _OIter, typename _BinaryOper> _OIter adjacent_difference(_IIter, _IIter, _OIter, _BinaryOper, - __gnu_parallel::parallelism); + __gnu_parallel::_Parallelism); template<typename _IIter, typename _OIter, typename _BinaryOper, typename _Tag1, typename _Tag2> @@ -124,7 +124,7 @@ namespace __parallel adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper, random_access_iterator_tag, random_access_iterator_tag, - __gnu_parallel::parallelism); + __gnu_parallel::_Parallelism); template<typename _IIter1, typename _IIter2, typename _Tp> _Tp @@ -138,7 +138,7 @@ namespace __parallel template<typename _IIter1, typename _IIter2, typename _Tp> _Tp inner_product(_IIter1, _IIter1, _IIter2, _Tp, - __gnu_parallel::parallelism); + __gnu_parallel::_Parallelism); template<typename _IIter1, typename _IIter2, typename _Tp, typename _BinaryFunction1, typename _BinaryFunction2> @@ -156,7 +156,7 @@ namespace __parallel typename BinaryFunction1, typename BinaryFunction2> _Tp inner_product(_IIter1, _IIter1, _IIter2, _Tp, BinaryFunction1, - BinaryFunction2, __gnu_parallel::parallelism); + BinaryFunction2, __gnu_parallel::_Parallelism); template<typename _RAIter1, typename _RAIter2, typename _Tp, typename BinaryFunction1, typename BinaryFunction2> @@ -164,7 +164,7 @@ namespace __parallel inner_product_switch(_RAIter1, _RAIter1, _RAIter2, _Tp, BinaryFunction1, BinaryFunction2, random_access_iterator_tag, random_access_iterator_tag, - __gnu_parallel::parallelism); + __gnu_parallel::_Parallelism); template<typename _IIter1, typename _IIter2, typename _Tp, typename _BinaryFunction1, typename _BinaryFunction2, diff --git a/libstdc++-v3/include/parallel/omp_loop.h b/libstdc++-v3/include/parallel/omp_loop.h index 97660c16bbc..bc34aed2857 100644 --- a/libstdc++-v3/include/parallel/omp_loop.h +++ b/libstdc++-v3/include/parallel/omp_loop.h @@ -101,7 +101,7 @@ template<typename RandomAccessIterator, thread_index_t iam = omp_get_thread_num(); -# pragma omp for schedule(dynamic, Settings::workstealing_chunk_size) +# pragma omp for schedule(dynamic, _Settings::get().workstealing_chunk_size) for (difference_type pos = 0; pos < length; ++pos) thread_results[iam] = r(thread_results[iam], f(o, begin+pos)); diff --git a/libstdc++-v3/include/parallel/omp_loop_static.h b/libstdc++-v3/include/parallel/omp_loop_static.h index 2546bb20165..96692e8645a 100644 --- a/libstdc++-v3/include/parallel/omp_loop_static.h +++ b/libstdc++-v3/include/parallel/omp_loop_static.h @@ -101,7 +101,7 @@ template<typename RandomAccessIterator, thread_index_t iam = omp_get_thread_num(); -# pragma omp for schedule(static, Settings::workstealing_chunk_size) +# pragma omp for schedule(static, _Settings::get().workstealing_chunk_size) for (difference_type pos = 0; pos < length; ++pos) thread_results[iam] = r(thread_results[iam], f(o, begin+pos)); } //parallel diff --git a/libstdc++-v3/include/parallel/partial_sum.h b/libstdc++-v3/include/parallel/partial_sum.h index f7ca754720d..fd4954d456d 100644 --- a/libstdc++-v3/include/parallel/partial_sum.h +++ b/libstdc++-v3/include/parallel/partial_sum.h @@ -118,6 +118,8 @@ template<typename InputIterator, difference_type* borders; value_type* sums; + const _Settings& __s = _Settings::get(); + # pragma omp parallel num_threads(num_threads) { # pragma omp single @@ -126,14 +128,13 @@ template<typename InputIterator, borders = new difference_type[num_threads + 2]; - if (Settings::partial_sum_dilatation == 1.0f) + if (__s.partial_sum_dilation == 1.0f) equally_split(n, num_threads + 1, borders); else { difference_type chunk_length = ((double)n - / ((double)num_threads - + Settings::partial_sum_dilatation)), + / ((double)num_threads + __s.partial_sum_dilation)), borderstart = n - num_threads * chunk_length; borders[0] = 0; for (int i = 1; i < (num_threads + 1); ++i) @@ -209,9 +210,9 @@ template<typename InputIterator, difference_type n = end - begin; - switch (Settings::partial_sum_algorithm) + switch (_Settings::get().partial_sum_algorithm) { - case Settings::LINEAR: + case LINEAR: // Need an initial offset. return parallel_partial_sum_linear(begin, end, result, bin_op, n); default: diff --git a/libstdc++-v3/include/parallel/partition.h b/libstdc++-v3/include/parallel/partition.h index 0a49d8f6082..7747b7e9980 100644 --- a/libstdc++-v3/include/parallel/partition.h +++ b/libstdc++-v3/include/parallel/partition.h @@ -69,6 +69,8 @@ template<typename RandomAccessIterator, typename Predicate> _GLIBCXX_CALL(n) + const _Settings& __s = _Settings::get(); + // Shared. _GLIBCXX_VOLATILE difference_type left = 0, right = n - 1; _GLIBCXX_VOLATILE difference_type leftover_left, leftover_right; @@ -91,14 +93,12 @@ template<typename RandomAccessIterator, typename Predicate> reserved_left = new bool[num_threads]; reserved_right = new bool[num_threads]; - if (Settings::partition_chunk_share > 0.0) - chunk_size = std::max<difference_type>(Settings:: - partition_chunk_size, - (double)n * Settings:: - partition_chunk_share + if (__s.partition_chunk_share > 0.0) + chunk_size = std::max<difference_type>(__s.partition_chunk_size, + (double)n * __s.partition_chunk_share / (double)num_threads); else - chunk_size = Settings::partition_chunk_size; + chunk_size = __s.partition_chunk_size; } while (right - left + 1 >= 2 * num_threads * chunk_size) @@ -346,7 +346,7 @@ template<typename RandomAccessIterator, typename Comparator> random_number rng; difference_type minimum_length = - std::max<difference_type>(2, Settings::partition_minimal_n); + std::max<difference_type>(2, _Settings::get().partition_minimal_n); // Break if input range to small. while (static_cast<sequence_index_t>(end - begin) >= minimum_length) @@ -409,7 +409,7 @@ template<typename RandomAccessIterator, typename Comparator> break; } - // Only at most Settings::partition_minimal_n elements left. + // Only at most _Settings::partition_minimal_n elements left. __gnu_sequential::sort(begin, end, comp); } diff --git a/libstdc++-v3/include/parallel/quicksort.h b/libstdc++-v3/include/parallel/quicksort.h index 5b5a62f30c5..de95549772a 100644 --- a/libstdc++-v3/include/parallel/quicksort.h +++ b/libstdc++-v3/include/parallel/quicksort.h @@ -134,7 +134,7 @@ namespace __gnu_parallel difference_type split = parallel_sort_qs_divide(begin, end, comp, pivot_rank, - Settings::sort_qs_num_samples_preset, + _Settings::get().sort_qs_num_samples_preset, num_threads); #pragma omp parallel sections @@ -179,8 +179,6 @@ namespace __gnu_parallel if (num_threads > n) num_threads = static_cast<thread_index_t>(n); - Settings::sort_qs_num_samples_preset = 100; - // Hard to avoid. omp_set_num_threads(num_threads); diff --git a/libstdc++-v3/include/parallel/random_shuffle.h b/libstdc++-v3/include/parallel/random_shuffle.h index 348a3a34b37..75d9e18d23a 100644 --- a/libstdc++-v3/include/parallel/random_shuffle.h +++ b/libstdc++-v3/include/parallel/random_shuffle.h @@ -274,6 +274,8 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator> _GLIBCXX_CALL(n) + const _Settings& __s = _Settings::get(); + if (num_threads > n) num_threads = static_cast<thread_index_t>(n); @@ -284,7 +286,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator> // Must fit into L1. num_bins_cache = std::max<difference_type>( - 1, n / (Settings::L1_cache_size_lb / sizeof(value_type))); + 1, n / (__s.L1_cache_size_lb / sizeof(value_type))); num_bins_cache = round_up_to_pow2(num_bins_cache); // No more buckets than TLB entries, power of 2 @@ -293,7 +295,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator> #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB // 2 TLB entries needed per bin. - num_bins = std::min<difference_type>(Settings::TLB_size / 2, num_bins); + num_bins = std::min<difference_type>(__s.TLB_size / 2, num_bins); #endif num_bins = round_up_to_pow2(num_bins); @@ -303,7 +305,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator> // Now try the L2 cache // Must fit into L2 num_bins_cache = static_cast<bin_index>(std::max<difference_type>( - 1, n / (Settings::L2_cache_size / sizeof(value_type)))); + 1, n / (__s.L2_cache_size / sizeof(value_type)))); num_bins_cache = round_up_to_pow2(num_bins_cache); // No more buckets than TLB entries, power of 2. @@ -313,7 +315,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator> #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB // 2 TLB entries needed per bin. num_bins = std::min( - static_cast<difference_type>(Settings::TLB_size / 2), num_bins); + static_cast<difference_type>(__s.TLB_size / 2), num_bins); #endif num_bins = round_up_to_pow2(num_bins); #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 @@ -403,6 +405,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator> typedef typename traits_type::difference_type difference_type; difference_type n = end - begin; + const _Settings& __s = _Settings::get(); bin_index num_bins, num_bins_cache; @@ -410,7 +413,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator> // Try the L1 cache first, must fit into L1. num_bins_cache = std::max<difference_type> - (1, n / (Settings::L1_cache_size_lb / sizeof(value_type))); + (1, n / (__s.L1_cache_size_lb / sizeof(value_type))); num_bins_cache = round_up_to_pow2(num_bins_cache); // No more buckets than TLB entries, power of 2 @@ -418,7 +421,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator> num_bins = std::min(n, (difference_type)num_bins_cache); #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB // 2 TLB entries needed per bin - num_bins = std::min((difference_type)Settings::TLB_size / 2, num_bins); + num_bins = std::min((difference_type)__s.TLB_size / 2, num_bins); #endif num_bins = round_up_to_pow2(num_bins); @@ -428,7 +431,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator> // Now try the L2 cache, must fit into L2. num_bins_cache = static_cast<bin_index>(std::max<difference_type>( - 1, n / (Settings::L2_cache_size / sizeof(value_type)))); + 1, n / (__s.L2_cache_size / sizeof(value_type)))); num_bins_cache = round_up_to_pow2(num_bins_cache); // No more buckets than TLB entries, power of 2 @@ -439,7 +442,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator> #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB // 2 TLB entries needed per bin num_bins = - std::min<difference_type>(Settings::TLB_size / 2, num_bins); + std::min<difference_type>(__s.TLB_size / 2, num_bins); #endif num_bins = round_up_to_pow2(num_bins); #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 diff --git a/libstdc++-v3/include/parallel/settings.h b/libstdc++-v3/include/parallel/settings.h index 920e27a69ce..ae1ae5ca063 100644 --- a/libstdc++-v3/include/parallel/settings.h +++ b/libstdc++-v3/include/parallel/settings.h @@ -29,65 +29,59 @@ // Public License. /** @file parallel/settings.h - * @brief Settings and tuning parameters, heuristics to decide + * @brief Runtime settings and tuning parameters, heuristics to decide * whether to use parallelized algorithms. * This file is a GNU parallel extension to the Standard C++ Library. * - * @section parallelization_decision The decision whether to run - * an algorithm in parallel. + * @section parallelization_decision + * The decision whether to run an algorithm in parallel. * - * There are several ways the user can switch on and off the - * parallel execution of an algorithm, both at compile- and - * run-time. + * There are several ways the user can switch on and off the parallel + * execution of an algorithm, both at compile- and run-time. * - * Only sequential execution can be forced at compile-time. - * This reduces code size and protects code parts that have + * Only sequential execution can be forced at compile-time. This + * reduces code size and protects code parts that have * non-thread-safe side effects. * - * Ultimately forcing parallel execution at compile-time does - * make much sense. - * Often, the sequential algorithm implementation is used as - * a subroutine, so no reduction in code size can be achieved. - * Also, the machine the program is run on might have only one - * processor core, so to avoid overhead, the algorithm is - * executed sequentially. + * Ultimately, forcing parallel execution at compile-time makes + * sense. Often, the sequential algorithm implementation is used as + * a subroutine, so no reduction in code size can be achieved. Also, + * the machine the program is run on might have only one processor + * core, so to avoid overhead, the algorithm is executed + * sequentially. * - * To force sequential execution of an algorithm ultimately - * at compile-time, the user must add the tag - * __gnu_parallel::sequential_tag() to the end of the - * parameter list, e. g. + * To force sequential execution of an algorithm ultimately at + * compile-time, the user must add the tag + * __gnu_parallel::sequential_tag() to the end of the parameter list, + * e. g. * * \code * std::sort(v.begin(), v.end(), __gnu_parallel::sequential_tag()); * \endcode * - * This is compatible with all overloaded algorithm variants. - * No additional code will be instantiated, at all. - * The same holds for most algorithm calls with iterators - * not providing random access. + * This is compatible with all overloaded algorithm variants. No + * additional code will be instantiated, at all. The same holds for + * most algorithm calls with iterators not providing random access. * * If the algorithm call is not forced to be executed sequentially - * at compile-time, the decision is made at run-time, for each call. - * First, the two (conceptually) global variables - * __gnu_parallel::Settings::force_sequential and - * __gnu_parallel::Settings::force_parallel are executed. - * If the former one is true, the sequential algorithm is executed. - * If the latter one is true and the former one is false, - * the algorithm is executed in parallel. + * at compile-time, the decision is made at run-time. + * The global variable __gnu_parallel::_Settings::algorithm_strategy + * is checked. It is a tristate variable corresponding to: * - * If none of these conditions has fired so far, a heuristic is used. - * The parallel algorithm implementation is called only if the - * input size is sufficiently large. - * For most algorithms, the input size is the (combined) length of - * the input sequence(s). - * The threshold can be set by the user, individually for each - * algorithm. - * The according variables are called - * __gnu_parallel::Settings::[algorithm]_minimal_n . + * a. force_sequential, meaning the sequential algorithm is executed. + * b. force_parallel, meaning the parallel algorithm is executed. + * c. heuristic + * + * For heuristic, the parallel algorithm implementation is called + * only if the input size is sufficiently large. For most + * algorithms, the input size is the (combined) length of the input + * sequence(s). The threshold can be set by the user, individually + * for each algorithm. The according variables are called + * __gnu_parallel::_Settings::[algorithm]_minimal_n . * * For some of the algorithms, there are even more tuning options, - * e. g. the ability to choose from multiple algorithm variants. - * See the __gnu_parallel::Settings class for details. + * e. g. the ability to choose from multiple algorithm variants. See + * below for details. */ // Written by Johannes Singler and Felix Putze. @@ -95,306 +89,199 @@ #ifndef _GLIBCXX_PARALLEL_SETTINGS_H #define _GLIBCXX_PARALLEL_SETTINGS_H 1 -#include <omp.h> #include <parallel/types.h> /** - * @brief The extensible condition on whether the parallel variant of - * an algorithm should be called. - * @param c A condition that is overruled by - * __gnu_parallel::Settings::force_parallel, i. e. usually a decision based on - * the input size. + * @brief Determine at compile(?)-time if the parallel variant of an + * algorithm should be called. + * @param c A condition that is convertible to bool that is overruled by + * __gnu_parallel::_Settings::algorithm_strategy. Usually a decision + * based on the input size. */ -#define _GLIBCXX_PARALLEL_CONDITION(c) \ -(!(__gnu_parallel::Settings::force_sequential) \ - && ((__gnu_parallel::get_max_threads() > 1 \ - && (c)) || __gnu_parallel::Settings::force_parallel)) +#define _GLIBCXX_PARALLEL_CONDITION(c) (__gnu_parallel::_Settings::get().algorithm_strategy != __gnu_parallel::force_sequential && ((__gnu_parallel::get_max_threads() > 1 && (c)) || __gnu_parallel::_Settings::get().algorithm_strategy == __gnu_parallel::force_parallel)) -namespace __gnu_parallel +/* +inline bool +parallel_condition(bool c) { - // NB: Including this file cannot produce (unresolved) symbols from - // the OpenMP runtime unless the parallel mode is actually invoked - // and active, which implies that the OpenMP runtime is actually - // going to be linked in. - inline int - get_max_threads() - { return omp_get_max_threads() > 1 ? omp_get_max_threads() : 1; } - -namespace + bool ret = false; + const _Settings& s = _Settings::get(); + if (s.algorithm_strategy != force_seqential) + { + if (s.algorithm_strategy == force_parallel) + ret = true; + else + ret = get_max_threads() > 1 && c; + } + return ret; +} +*/ + +namespace __gnu_parallel { - // XXX look at _Tune in mt_allocator.h - /** @brief Run-time settings for the parallel mode. */ - struct Settings + /// class _Settings + /// Run-time settings for the parallel mode, including all tunable parameters. + struct _Settings { - /** @brief Different parallel sorting algorithms to choose - from: multi-way mergesort, quicksort, load-balanced - quicksort. */ - enum SortAlgorithm - { MWMS, QS, QS_BALANCED }; - - /** @brief Different merging algorithms: bubblesort-alike, - loser-tree variants, enum sentinel */ - enum MultiwayMergeAlgorithm - { BUBBLE, LOSER_TREE_EXPLICIT, LOSER_TREE, LOSER_TREE_COMBINED, - LOSER_TREE_SENTINEL, MWM_ALGORITHM_LAST }; - - /** @brief Different splitting strategies for sorting/merging: - by sampling, exact */ - enum Splitting - { SAMPLING, EXACT }; + _AlgorithmStrategy algorithm_strategy; + + _SortAlgorithm sort_algorithm; + _PartialSumAlgorithm partial_sum_algorithm; + _MultiwayMergeAlgorithm multiway_merge_algorithm; + _FindAlgorithm find_algorithm; - /** @brief Different partial sum algorithms: recursive, linear */ - enum PartialSumAlgorithm - { RECURSIVE, LINEAR }; + _SplittingAlgorithm sort_splitting; + _SplittingAlgorithm merge_splitting; + _SplittingAlgorithm multiway_merge_splitting; - /** @brief Different find distribution strategies: growing - blocks, equal-sized blocks, equal splitting. */ - enum FindDistribution - { GROWING_BLOCKS, CONSTANT_SIZE_BLOCKS, EQUAL_SPLIT }; + // Per-algorithm settings. - /** @brief Force all algorithms to be executed sequentially. - * This setting cannot be overwritten. */ - static volatile bool force_sequential; + /// Minimal input size for accumulate. + sequence_index_t accumulate_minimal_n; - /** @brief Force all algorithms to be executed in parallel. - * This setting can be overridden by __gnu_parallel::sequential_tag - * (compile-time), and force_sequential (run-time). */ - static volatile bool force_parallel; + /// Minimal input size for adjacent_difference. + unsigned int adjacent_difference_minimal_n; - /** @brief Algorithm to use for sorting. */ - static volatile SortAlgorithm sort_algorithm; + /// Minimal input size for count and count_if. + sequence_index_t count_minimal_n; - /** @brief Strategy to use for splitting the input when - sorting (MWMS). */ - static volatile Splitting sort_splitting; + /// Minimal input size for fill. + sequence_index_t fill_minimal_n; - /** @brief Minimal input size for parallel sorting. */ - static volatile sequence_index_t sort_minimal_n; + /// Block size increase factor for find. + double find_increasing_factor; - /** @brief Oversampling factor for parallel std::sort (MWMS). */ - static volatile unsigned int sort_mwms_oversampling; + /// Initial block size for find. + sequence_index_t find_initial_block_size; - /** @brief Such many samples to take to find a good pivot - (quicksort). */ - static volatile unsigned int sort_qs_num_samples_preset; + /// Maximal block size for find. + sequence_index_t find_maximum_block_size; - /** @brief Maximal subsequence length to switch to unbalanced - * base case. Applies to std::sort with dynamically - * load-balanced quicksort. */ - static volatile sequence_index_t sort_qsb_base_case_maximal_n; + /// Start with looking for this many elements sequentially, for find. + sequence_index_t find_sequential_search_size; - /** @brief Minimal input size for parallel std::partition. */ - static volatile sequence_index_t partition_minimal_n; + /// Minimal input size for for_each. + sequence_index_t for_each_minimal_n; - /** @brief Chunk size for parallel std::partition. */ - static volatile sequence_index_t partition_chunk_size; + /// Minimal input size for generate. + sequence_index_t generate_minimal_n; - /** @brief Chunk size for parallel std::partition, relative to - * input size. If >0.0, this value overrides - * partition_chunk_size. */ - static volatile double partition_chunk_share; + /// Minimal input size for max_element. + sequence_index_t max_element_minimal_n; - /** @brief Minimal input size for parallel std::nth_element. */ - static volatile sequence_index_t nth_element_minimal_n; + /// Minimal input size for merge. + sequence_index_t merge_minimal_n; - /** @brief Minimal input size for parallel std::partial_sort. */ - static volatile sequence_index_t partial_sort_minimal_n; + /// Oversampling factor for merge. + unsigned int merge_oversampling; - /** @brief Minimal input size for parallel std::adjacent_difference. */ - static volatile unsigned int adjacent_difference_minimal_n; + /// Minimal input size for min_element. + sequence_index_t min_element_minimal_n; - /** @brief Minimal input size for parallel std::partial_sum. */ - static volatile unsigned int partial_sum_minimal_n; + /// Minimal input size for multiway_merge. + sequence_index_t multiway_merge_minimal_n; - /** @brief Algorithm to use for std::partial_sum. */ - static volatile PartialSumAlgorithm partial_sum_algorithm; + /// Oversampling factor for multiway_merge. + int multiway_merge_minimal_k; - /** @brief Assume "sum and write result" to be that factor - * slower than just "sum". This value is used for - * std::partial_sum. */ - static volatile float partial_sum_dilatation; + /// Oversampling factor for multiway_merge. + unsigned int multiway_merge_oversampling; - /** @brief Minimal input size for parallel std::random_shuffle. */ - static volatile unsigned int random_shuffle_minimal_n; + /// Minimal input size for nth_element. + sequence_index_t nth_element_minimal_n; - /** @brief Minimal input size for parallel std::merge. */ - static volatile sequence_index_t merge_minimal_n; + /// Chunk size for partition. + sequence_index_t partition_chunk_size; - /** @brief Splitting strategy for parallel std::merge. */ - static volatile Splitting merge_splitting; + /// Chunk size for partition, relative to input size. If > 0.0, + /// this value overrides partition_chunk_size. + double partition_chunk_share; - /** @brief Oversampling factor for parallel std::merge. - * Such many samples per thread are collected. */ - static volatile unsigned int merge_oversampling; + /// Minimal input size for partition. + sequence_index_t partition_minimal_n; - /** @brief Algorithm to use for parallel - __gnu_parallel::multiway_merge. */ - static volatile MultiwayMergeAlgorithm multiway_merge_algorithm; + /// Minimal input size for partial_sort. + sequence_index_t partial_sort_minimal_n; - /** @brief Splitting strategy to use for parallel - __gnu_parallel::multiway_merge. */ - static volatile Splitting multiway_merge_splitting; + /// Ratio for partial_sum. Assume "sum and write result" to be + /// this factor slower than just "sum". + float partial_sum_dilation; - //// Oversampling factor for parallel __gnu_parallel::multiway_merge. - static volatile unsigned int multiway_merge_oversampling; + /// Minimal input size for partial_sum. + unsigned int partial_sum_minimal_n; - /// Minimal input size for parallel __gnu_parallel::multiway_merge. - static volatile sequence_index_t multiway_merge_minimal_n; + /// Minimal input size for random_shuffle. + unsigned int random_shuffle_minimal_n; - /// Oversampling factor for parallel __gnu_parallel::multiway_merge. - static volatile int multiway_merge_minimal_k; + /// Minimal input size for replace and replace_if. + sequence_index_t replace_minimal_n; - /** @brief Minimal input size for parallel std::unique_copy. */ - static volatile sequence_index_t unique_copy_minimal_n; + /// Minimal input size for set_difference. + sequence_index_t set_difference_minimal_n; - static volatile sequence_index_t workstealing_chunk_size; + /// Minimal input size for set_intersection. + sequence_index_t set_intersection_minimal_n; - /** @brief Minimal input size for parallel std::for_each. */ - static volatile sequence_index_t for_each_minimal_n; + /// Minimal input size for set_symmetric_difference. + sequence_index_t set_symmetric_difference_minimal_n; - /** @brief Minimal input size for parallel std::count and - std::count_if. */ - static volatile sequence_index_t count_minimal_n; + /// Minimal input size for set_union. + sequence_index_t set_union_minimal_n; - /** @brief Minimal input size for parallel std::transform. */ - static volatile sequence_index_t transform_minimal_n; + /// Minimal input size for parallel sorting. + sequence_index_t sort_minimal_n; - /** @brief Minimal input size for parallel std::replace and - std::replace_if. */ - static volatile sequence_index_t replace_minimal_n; + /// Oversampling factor for parallel std::sort (MWMS). + unsigned int sort_mwms_oversampling; - /** @brief Minimal input size for parallel std::generate. */ - static volatile sequence_index_t generate_minimal_n; + /// Such many samples to take to find a good pivot (quicksort). + unsigned int sort_qs_num_samples_preset; - /** @brief Minimal input size for parallel std::fill. */ - static volatile sequence_index_t fill_minimal_n; + /// Maximal subsequence length to switch to unbalanced base case. + /// Applies to std::sort with dynamically load-balanced quicksort. + sequence_index_t sort_qsb_base_case_maximal_n; - /** @brief Minimal input size for parallel std::min_element. */ - static volatile sequence_index_t min_element_minimal_n; + /// Minimal input size for parallel std::transform. + sequence_index_t transform_minimal_n; - /** @brief Minimal input size for parallel std::max_element. */ - static volatile sequence_index_t max_element_minimal_n; + /// Minimal input size for unique_copy. + sequence_index_t unique_copy_minimal_n; - /** @brief Minimal input size for parallel std::accumulate. */ - static volatile sequence_index_t accumulate_minimal_n; + sequence_index_t workstealing_chunk_size; - /** @brief Distribution strategy for parallel std::find. */ - static volatile FindDistribution find_distribution; + // Hardware dependent tuning parameters. - /** @brief Start with looking for that many elements - sequentially, for std::find. */ - static volatile sequence_index_t find_sequential_search_size; + /// Size of the L1 cache in bytes (underestimation). + unsigned long long L1_cache_size; - /** @brief Initial block size for parallel std::find. */ - static volatile sequence_index_t find_initial_block_size; + /// Size of the L2 cache in bytes (underestimation). + unsigned long long L2_cache_size; - /** @brief Maximal block size for parallel std::find. */ - static volatile sequence_index_t find_maximum_block_size; + /// Size of the Translation Lookaside Buffer (underestimation). + unsigned int TLB_size; - /** @brief Block size increase factor for parallel std::find. */ - static volatile double find_increasing_factor; + /// Overestimation of cache line size. Used to avoid false + /// sharing, i. e. elements of different threads are at least this + /// amount apart. + unsigned int cache_line_size; - //set operations - /** @brief Minimal input size for parallel std::set_union. */ - static volatile sequence_index_t set_union_minimal_n; + // Statistics. - /** @brief Minimal input size for parallel - std::set_symmetric_difference. */ - static volatile sequence_index_t set_symmetric_difference_minimal_n; + /// The number of stolen ranges in load-balanced quicksort. + sequence_index_t qsb_steals; - /** @brief Minimal input size for parallel std::set_difference. */ - static volatile sequence_index_t set_difference_minimal_n; + /// Get the global settings. + static const _Settings& + get() throw(); - /** @brief Minimal input size for parallel std::set_intersection. */ - static volatile sequence_index_t set_intersection_minimal_n; + /// Set the global settings. + static void + set(_Settings&) throw(); - //hardware dependent tuning parameters - /** @brief Size of the L1 cache in bytes (underestimation). */ - static volatile unsigned long long L1_cache_size; - - /** @brief Size of the L2 cache in bytes (underestimation). */ - static volatile unsigned long long L2_cache_size; - - /** @brief Size of the Translation Lookaside Buffer - (underestimation). */ - static volatile unsigned int TLB_size; - - /** @brief Overestimation of cache line size. Used to avoid - * false sharing, i. e. elements of different threads are at - * least this amount apart. */ - static unsigned int cache_line_size; - - //statistics - /** @brief Statistic on the number of stolen ranges in - load-balanced quicksort.*/ - static volatile sequence_index_t qsb_steals; + explicit + _Settings() : algorithm_strategy(heuristic), sort_algorithm(MWMS), partial_sum_algorithm(LINEAR), multiway_merge_algorithm(LOSER_TREE), find_algorithm(CONSTANT_SIZE_BLOCKS), sort_splitting(EXACT), merge_splitting(EXACT), multiway_merge_splitting(EXACT), accumulate_minimal_n(1000), adjacent_difference_minimal_n(1000), count_minimal_n(1000), fill_minimal_n(1000), find_increasing_factor(2.0), find_initial_block_size(256), find_maximum_block_size(8192), find_sequential_search_size(256), for_each_minimal_n(1000), generate_minimal_n(1000), max_element_minimal_n(1000), merge_minimal_n(1000), merge_oversampling(10), min_element_minimal_n(1000), multiway_merge_minimal_n(1000), multiway_merge_minimal_k(2), multiway_merge_oversampling(10), nth_element_minimal_n(1000), partition_chunk_size(1000), partition_chunk_share(0.0), partition_minimal_n(1000), partial_sort_minimal_n(1000), partial_sum_dilation(1.0f), partial_sum_minimal_n(1000), random_shuffle_minimal_n(1000), replace_minimal_n(1000), set_difference_minimal_n(1000), set_intersection_minimal_n(1000), set_symmetric_difference_minimal_n(1000), set_union_minimal_n(1000), sort_minimal_n(1000), sort_mwms_oversampling(10), sort_qs_num_samples_preset(100), sort_qsb_base_case_maximal_n(100), transform_minimal_n(1000), unique_copy_minimal_n(10000), workstealing_chunk_size(100), L1_cache_size(16 << 10), L2_cache_size(256 << 10), TLB_size(128), cache_line_size(64), qsb_steals(0) + { } }; - - volatile bool Settings::force_parallel = false; - volatile bool Settings::force_sequential = false; - volatile Settings::SortAlgorithm Settings::sort_algorithm = Settings::MWMS; - volatile Settings::Splitting Settings::sort_splitting = Settings::EXACT; - volatile sequence_index_t Settings::sort_minimal_n = 1000; - - volatile unsigned int Settings::sort_mwms_oversampling = 10; - volatile unsigned int Settings::sort_qs_num_samples_preset = 100; - volatile sequence_index_t Settings::sort_qsb_base_case_maximal_n = 100; - volatile sequence_index_t Settings::partition_minimal_n = 1000; - volatile sequence_index_t Settings::nth_element_minimal_n = 1000; - volatile sequence_index_t Settings::partial_sort_minimal_n = 1000; - volatile sequence_index_t Settings::partition_chunk_size = 1000; - volatile double Settings::partition_chunk_share = 0.0; - volatile unsigned int Settings::adjacent_difference_minimal_n = 1000; - volatile Settings::PartialSumAlgorithm Settings:: - partial_sum_algorithm = Settings::LINEAR; - volatile unsigned int Settings::partial_sum_minimal_n = 1000; - volatile float Settings::partial_sum_dilatation = 1.0f; - volatile unsigned int Settings::random_shuffle_minimal_n = 1000; - volatile Settings::Splitting Settings::merge_splitting = Settings::EXACT; - volatile sequence_index_t Settings::merge_minimal_n = 1000; - volatile unsigned int Settings::merge_oversampling = 10; - volatile sequence_index_t Settings::multiway_merge_minimal_n = 1000; - volatile int Settings::multiway_merge_minimal_k = 2; - - // unique copy - volatile sequence_index_t Settings::unique_copy_minimal_n = 10000; - volatile Settings::MultiwayMergeAlgorithm Settings:: - multiway_merge_algorithm = Settings::LOSER_TREE; - volatile Settings::Splitting Settings::multiway_merge_splitting = - Settings::EXACT; - volatile unsigned int Settings::multiway_merge_oversampling = 10; - volatile Settings::FindDistribution Settings::find_distribution = - Settings::CONSTANT_SIZE_BLOCKS; - volatile sequence_index_t Settings::find_sequential_search_size = 256; - volatile sequence_index_t Settings::find_initial_block_size = 256; - volatile sequence_index_t Settings::find_maximum_block_size = 8192; - volatile double Settings::find_increasing_factor = 2.0; - volatile sequence_index_t Settings::workstealing_chunk_size = 100; - volatile sequence_index_t Settings::for_each_minimal_n = 1000; - volatile sequence_index_t Settings::count_minimal_n = 1000; - volatile sequence_index_t Settings::transform_minimal_n = 1000; - volatile sequence_index_t Settings::replace_minimal_n = 1000; - volatile sequence_index_t Settings::generate_minimal_n = 1000; - volatile sequence_index_t Settings::fill_minimal_n = 1000; - volatile sequence_index_t Settings::min_element_minimal_n = 1000; - volatile sequence_index_t Settings::max_element_minimal_n = 1000; - volatile sequence_index_t Settings::accumulate_minimal_n = 1000; - - //set operations - volatile sequence_index_t Settings::set_union_minimal_n = 1000; - volatile sequence_index_t Settings::set_intersection_minimal_n = 1000; - volatile sequence_index_t Settings::set_difference_minimal_n = 1000; - volatile sequence_index_t Settings::set_symmetric_difference_minimal_n = - 1000; - volatile unsigned long long Settings::L1_cache_size = 16 << 10; - volatile unsigned long long Settings::L2_cache_size = 256 << 10; - volatile unsigned int Settings::TLB_size = 128; - unsigned int Settings::cache_line_size = 64; - - //statistics - volatile sequence_index_t Settings::qsb_steals = 0; -} // end anonymous namespace - } #endif /* _GLIBCXX_SETTINGS_H */ diff --git a/libstdc++-v3/include/parallel/sort.h b/libstdc++-v3/include/parallel/sort.h index 5dc3470f410..edf4eea02d8 100644 --- a/libstdc++-v3/include/parallel/sort.h +++ b/libstdc++-v3/include/parallel/sort.h @@ -84,16 +84,15 @@ namespace __gnu_parallel if (false) ; #if _GLIBCXX_MERGESORT - else if (Settings::sort_algorithm == Settings::MWMS || stable) + else if (stable || _Settings::get().sort_algorithm == MWMS) parallel_sort_mwms(begin, end, comp, n, get_max_threads(), stable); #endif #if _GLIBCXX_QUICKSORT - else if (Settings::sort_algorithm == Settings::QS && !stable) + else if (!stable && _Settings::get().sort_algorithm == QS) parallel_sort_qs(begin, end, comp, n, get_max_threads()); #endif #if _GLIBCXX_BAL_QUICKSORT - else if (Settings::sort_algorithm == Settings::QS_BALANCED - && !stable) + else if (!stable && _Settings::get().sort_algorithm == QS_BALANCED) parallel_sort_qsb(begin, end, comp, n, get_max_threads()); #endif else diff --git a/libstdc++-v3/include/parallel/tags.h b/libstdc++-v3/include/parallel/tags.h index 1389aed4b06..b3f2ec86912 100644 --- a/libstdc++-v3/include/parallel/tags.h +++ b/libstdc++-v3/include/parallel/tags.h @@ -64,18 +64,19 @@ namespace __gnu_parallel struct omp_loop_static_tag : public parallel_tag { }; - // XXX settings.h Settings::FindDistribution + struct find_tag { }; + /** @brief Selects the growing block size variant for std::find(). @see _GLIBCXX_FIND_GROWING_BLOCKS */ - struct growing_blocks_tag { }; + struct growing_blocks_tag : public find_tag { }; /** @brief Selects the constant block size variant for std::find(). @see _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS */ - struct constant_size_blocks_tag { }; + struct constant_size_blocks_tag : public find_tag { }; /** @brief Selects the equal splitting variant for std::find(). @see _GLIBCXX_FIND_EQUAL_SPLIT */ - struct equal_split_tag { }; + struct equal_split_tag : public find_tag { }; } #endif /* _GLIBCXX_PARALLEL_TAGS_H */ diff --git a/libstdc++-v3/include/parallel/types.h b/libstdc++-v3/include/parallel/types.h index f71db81fee1..ded617edb6d 100644 --- a/libstdc++-v3/include/parallel/types.h +++ b/libstdc++-v3/include/parallel/types.h @@ -44,8 +44,8 @@ namespace __gnu_parallel { // Enumerated types. - /// @brief Run-time equivalents for the compile-time tags. - enum parallelism + /// Run-time equivalents for the compile-time tags. + enum _Parallelism { /// Not parallel. sequential, @@ -66,9 +66,60 @@ namespace __gnu_parallel parallel_taskqueue }; - inline bool - is_parallel(const parallelism __p) { return __p != sequential; } + /// Strategies for run-time algorithm selection: + // force_sequential, force_parallel, heuristic. + enum _AlgorithmStrategy + { + heuristic, + force_sequential, + force_parallel + }; + + /// Sorting algorithms: + // multi-way mergesort, quicksort, load-balanced quicksort. + enum _SortAlgorithm + { + MWMS, + QS, + QS_BALANCED + }; + + /// Merging algorithms: + // bubblesort-alike, loser-tree variants, enum sentinel. + enum _MultiwayMergeAlgorithm + { + BUBBLE, + LOSER_TREE_EXPLICIT, + LOSER_TREE, + LOSER_TREE_COMBINED, + LOSER_TREE_SENTINEL, + ENUM_SENTINEL + }; + + /// Partial sum algorithms: recursive, linear. + enum _PartialSumAlgorithm + { + RECURSIVE, + LINEAR + }; + + /// Sorting/merging algorithms: sampling, exact. + enum _SplittingAlgorithm + { + SAMPLING, + EXACT + }; + /// Find algorithms: + // growing blocks, equal-sized blocks, equal splitting. + enum _FindAlgorithm + { + GROWING_BLOCKS, + CONSTANT_SIZE_BLOCKS, + EQUAL_SPLIT + }; + + /// Integer Types. // XXX need to use <cstdint> /** @brief 16-bit signed integer. */ typedef short int16; @@ -101,20 +152,14 @@ namespace __gnu_parallel typedef uint16 thread_index_t; // XXX atomics interface? - /** - * @brief Longest compare-and-swappable integer type on this platform. - */ + /// Longest compare-and-swappable integer type on this platform. typedef int64 lcas_t; // XXX numeric_limits::digits? - /** - * @brief Number of bits of ::lcas_t. - */ + /// Number of bits of ::lcas_t. static const int lcas_t_bits = sizeof(lcas_t) * 8; - /** - * @brief ::lcas_t with the right half of bits set to 1. - */ + /// ::lcas_t with the right half of bits set to 1. static const lcas_t lcas_t_mask = ((lcas_t(1) << (lcas_t_bits / 2)) - 1); } diff --git a/libstdc++-v3/include/parallel/workstealing.h b/libstdc++-v3/include/parallel/workstealing.h index 4f0c7482c2f..628f12cca2c 100644 --- a/libstdc++-v3/include/parallel/workstealing.h +++ b/libstdc++-v3/include/parallel/workstealing.h @@ -114,17 +114,16 @@ template<typename RandomAccessIterator, typedef std::iterator_traits<RandomAccessIterator> traits_type; typedef typename traits_type::difference_type difference_type; + + const _Settings& __s = _Settings::get(); - - difference_type chunk_size = - static_cast<difference_type>(Settings::workstealing_chunk_size); + difference_type chunk_size = static_cast<difference_type>(__s.workstealing_chunk_size); // How many jobs? difference_type length = (bound < 0) ? (end - begin) : bound; // To avoid false sharing in a cache line. - const int stride = - Settings::cache_line_size * 10 / sizeof(Job<difference_type>) + 1; + const int stride = __s.cache_line_size * 10 / sizeof(Job<difference_type>) + 1; // Total number of threads currently working. thread_index_t busy = 0; diff --git a/libstdc++-v3/src/Makefile.am b/libstdc++-v3/src/Makefile.am index 9bc4b557214..65486190212 100644 --- a/libstdc++-v3/src/Makefile.am +++ b/libstdc++-v3/src/Makefile.am @@ -121,7 +121,7 @@ basic_file.cc: ${glibcxx_srcdir}/$(BASIC_FILE_CC) $(LN_S) ${glibcxx_srcdir}/$(BASIC_FILE_CC) ./$@ || true if ENABLE_PARALLEL -parallel_sources = parallel_list.cc +parallel_sources = parallel_list.cc parallel_settings.cc else parallel_sources = endif @@ -221,6 +221,11 @@ parallel_list.lo: parallel_list.cc parallel_list.o: parallel_list.cc $(CXXCOMPILE) $(PARALLEL_FLAGS) -c $< +parallel_settings.lo: parallel_settings.cc + $(LTCXXCOMPILE) $(PARALLEL_FLAGS) -c $< +parallel_settings.o: parallel_settings.cc + $(CXXCOMPILE) $(PARALLEL_FLAGS) -c $< + # Use special rules for the C++0x sources so that the proper flags are passed. system_error.lo: system_error.cc $(LTCXXCOMPILE) -std=gnu++0x -c $< diff --git a/libstdc++-v3/src/Makefile.in b/libstdc++-v3/src/Makefile.in index 71ddb748452..8c55b6f20d3 100644 --- a/libstdc++-v3/src/Makefile.in +++ b/libstdc++-v3/src/Makefile.in @@ -84,12 +84,13 @@ am__libstdc___la_SOURCES_DIST = bitmap_allocator.cc pool_allocator.cc \ codecvt_members.cc collate_members.cc ctype_members.cc \ messages_members.cc monetary_members.cc numeric_members.cc \ time_members.cc basic_file.cc c++locale.cc \ - compatibility-ldbl.cc parallel_list.cc + compatibility-ldbl.cc parallel_list.cc parallel_settings.cc am__objects_1 = atomicity.lo codecvt_members.lo collate_members.lo \ ctype_members.lo messages_members.lo monetary_members.lo \ numeric_members.lo time_members.lo @GLIBCXX_LDBL_COMPAT_TRUE@am__objects_2 = compatibility-ldbl.lo -@ENABLE_PARALLEL_TRUE@am__objects_3 = parallel_list.lo +@ENABLE_PARALLEL_TRUE@am__objects_3 = parallel_list.lo \ +@ENABLE_PARALLEL_TRUE@ parallel_settings.lo am__objects_4 = basic_file.lo c++locale.lo $(am__objects_2) \ $(am__objects_3) am__objects_5 = bitmap_allocator.lo pool_allocator.lo mt_allocator.lo \ @@ -359,7 +360,7 @@ host_sources_extra = \ basic_file.cc c++locale.cc ${ldbl_compat_sources} ${parallel_sources} @ENABLE_PARALLEL_FALSE@parallel_sources = -@ENABLE_PARALLEL_TRUE@parallel_sources = parallel_list.cc +@ENABLE_PARALLEL_TRUE@parallel_sources = parallel_list.cc parallel_settings.cc @GLIBCXX_LDBL_COMPAT_FALSE@ldbl_compat_sources = @GLIBCXX_LDBL_COMPAT_TRUE@ldbl_compat_sources = compatibility-ldbl.cc @@ -810,6 +811,11 @@ parallel_list.lo: parallel_list.cc parallel_list.o: parallel_list.cc $(CXXCOMPILE) $(PARALLEL_FLAGS) -c $< +parallel_settings.lo: parallel_settings.cc + $(LTCXXCOMPILE) $(PARALLEL_FLAGS) -c $< +parallel_settings.o: parallel_settings.cc + $(CXXCOMPILE) $(PARALLEL_FLAGS) -c $< + # Use special rules for the C++0x sources so that the proper flags are passed. system_error.lo: system_error.cc $(LTCXXCOMPILE) -std=gnu++0x -c $< diff --git a/libstdc++-v3/src/parallel_settings.cc b/libstdc++-v3/src/parallel_settings.cc new file mode 100644 index 00000000000..c47e1f2816d --- /dev/null +++ b/libstdc++-v3/src/parallel_settings.cc @@ -0,0 +1,47 @@ +// Default settings for parallel mode -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 2, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING. If not, write to the Free +// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +// USA. + +// As a special exception, you may use this file as part of a free software +// library without restriction. Specifically, if other files instantiate +// templates or use macros or inline functions from this file, or you compile +// this file and link it with other files to produce an executable, this +// file does not by itself cause the resulting executable to be covered by +// the GNU General Public License. This exception does not however +// invalidate any other reasons why the executable file might be covered by +// the GNU General Public License. + +#include <parallel/settings.h> + +namespace +{ + __gnu_parallel::_Settings s; +} + +namespace __gnu_parallel +{ + const _Settings& + _Settings::get() throw() + { return s; } + + // XXX MT + void + _Settings::set(_Settings& obj) throw() + { s = obj; } +} |