diff options
Diffstat (limited to 'libgomp')
121 files changed, 14890 insertions, 251 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 81bfc77a0e0..b9d1f96b36a 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,342 @@ +2018-11-08 Jakub Jelinek <jakub@redhat.com> + + * affinity.c (gomp_display_affinity_place): New function. + * affinity-fmt.c: New file. + * alloc.c (gomp_aligned_alloc, gomp_aligned_free): New functions. + * config/linux/affinity.c (gomp_display_affinity_place): New function. + * config/nvptx/icv-device.c (omp_get_num_teams, omp_get_team_num): + Move these functions to ... + * config/nvptx/teams.c: ... here. New file. + * config/nvptx/target.c (omp_pause_resource, omp_pause_resource_all): + New functions. + * config/nvptx/team.c (gomp_team_start, gomp_pause_host): New + functions. + * configure.ac: Check for aligned_alloc, posix_memalign, memalign + and _aligned_malloc. + (HAVE_UNAME, HAVE_GETHOSTNAME, HAVE_GETPID): Add new tests. + * configure.tgt: Add -DUSING_INITIAL_EXEC_TLS to XCFLAGS for Linux. + * env.c (gomp_display_affinity_var, gomp_affinity_format_var, + gomp_affinity_format_len): New variables. + (parse_schedule): Parse monotonic and nonmonotonic modifiers in + OMP_SCHEDULE variable. Set GFS_MONOTONIC for monotonic schedules. + (handle_omp_display_env): Display monotonic/nonmonotonic schedule + modifiers. Display (non-default) chunk sizes. Print + OMP_DISPLAY_AFFINITY and OMP_AFFINITY_FORMAT. + (initialize_env): Don't call pthread_attr_setdetachstate. Handle + OMP_DISPLAY_AFFINITY and OMP_AFFINITY_FORMAT env vars. + * fortran.c: Include stdio.h and string.h. + (omp_pause_resource, omp_pause_resource_all): Add ialias_redirect. + (omp_get_schedule_, omp_get_schedule_8_): Mask off GFS_MONOTONIC bit. + (omp_set_affinity_format_, omp_get_affinity_format_, + omp_display_affinity_, omp_capture_affinity_, omp_pause_resource_, + omp_pause_resource_all_): New functions. + * icv.c (omp_set_schedule): Mask off omp_sched_monotonic bit in + switch. + * icv-device.c (omp_get_num_teams, omp_get_team_num): Move these + functions to ... + * teams.c: ... here. New file. + * libgomp_g.h: Include gstdint.h. + (GOMP_loop_nonmonotonic_runtime_start, + GOMP_loop_maybe_nonmonotonic_runtime_start, GOMP_loop_start, + GOMP_loop_ordered_start, GOMP_loop_nonmonotonic_runtime_next, + GOMP_loop_maybe_nonmonotonic_runtime_next, GOMP_loop_doacross_start, + GOMP_parallel_loop_nonmonotonic_runtime, + GOMP_parallel_loop_maybe_nonmonotonic_runtime, + GOMP_loop_ull_nonmonotonic_runtime_start, + GOMP_loop_ull_maybe_nonmonotonic_runtime_start, GOMP_loop_ull_start, + GOMP_loop_ull_ordered_start, GOMP_loop_ull_nonmonotonic_runtime_next, + GOMP_loop_ull_maybe_nonmonotonic_runtime_next, + GOMP_loop_ull_doacross_start, GOMP_parallel_reductions, + GOMP_taskwait_depend, GOMP_taskgroup_reduction_register, + GOMP_taskgroup_reduction_unregister, GOMP_task_reduction_remap, + GOMP_workshare_task_reduction_unregister, GOMP_sections2_start, + GOMP_teams_reg): Declare. + * libgomp.h (GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC): Define unless + gomp_aligned_alloc uses fallback implementation. + (gomp_aligned_alloc, gomp_aligned_free): Declare. + (enum gomp_schedule_type): Add GFS_MONOTONIC. + (struct gomp_doacross_work_share): Add extra field. + (struct gomp_work_share): Add task_reductions field. + (struct gomp_taskgroup): Add workshare and reductions fields. + (GOMP_NEEDS_THREAD_HANDLE): Define if needed. + (gomp_thread_handle): New typedef. + (gomp_display_affinity_place, gomp_set_affinity_format, + gomp_display_string, gomp_display_affinity, + gomp_display_affinity_thread): Declare. + (gomp_doacross_init, gomp_doacross_ull_init): Add size_t argument. + (gomp_parallel_reduction_register, gomp_workshare_taskgroup_start, + gomp_workshare_task_reduction_register): Declare. + (gomp_team_start): Add taskgroup argument. + (gomp_pause_host): Declare. + (gomp_init_work_share, gomp_work_share_start): Change bool argument + to size_t. + (gomp_thread_self, gomp_thread_to_pthread_t): New inline functions. + * libgomp.map (GOMP_5.0): Export GOMP_loop_start, + GOMP_loop_ordered_start, GOMP_loop_doacross_start, + GOMP_loop_ull_start, GOMP_loop_ull_ordered_start, + GOMP_loop_ull_doacross_start, + GOMP_workshare_task_reduction_unregister, GOMP_sections2_start, + GOMP_loop_maybe_nonmonotonic_runtime_next, + GOMP_loop_maybe_nonmonotonic_runtime_start, + GOMP_loop_nonmonotonic_runtime_next, + GOMP_loop_nonmonotonic_runtime_start, + GOMP_loop_ull_maybe_nonmonotonic_runtime_next, + GOMP_loop_ull_maybe_nonmonotonic_runtime_start, + GOMP_loop_ull_nonmonotonic_runtime_next, + GOMP_loop_ull_nonmonotonic_runtime_start, + GOMP_parallel_loop_maybe_nonmonotonic_runtime, + GOMP_parallel_loop_nonmonotonic_runtime, GOMP_parallel_reductions, + GOMP_taskgroup_reduction_register, + GOMP_taskgroup_reduction_unregister, GOMP_task_reduction_remap, + GOMP_teams_reg and GOMP_taskwait_depend. + (OMP_5.0): Export omp_pause_resource{,_all}{,_}, + omp_{capture,display}_affinity{,_}, and + omp_[gs]et_affinity_format{,_}. + * loop.c: Include string.h. + (GOMP_loop_runtime_next): Add ialias. + (GOMP_taskgroup_reduction_register): Add ialias_redirect. + (gomp_loop_static_start, gomp_loop_dynamic_start, + gomp_loop_guided_start, gomp_loop_ordered_static_start, + gomp_loop_ordered_dynamic_start, gomp_loop_ordered_guided_start, + gomp_loop_doacross_static_start, gomp_loop_doacross_dynamic_start, + gomp_loop_doacross_guided_start): Adjust gomp_work_share_start + or gomp_doacross_init callers. + (gomp_adjust_sched, GOMP_loop_start, GOMP_loop_ordered_start, + GOMP_loop_doacross_start): New functions. + (GOMP_loop_runtime_start, GOMP_loop_ordered_runtime_start, + GOMP_loop_doacross_runtime_start, GOMP_parallel_loop_runtime_start): + Mask off GFS_MONOTONIC bit. + (GOMP_loop_maybe_nonmonotonic_runtime_next, + GOMP_loop_maybe_nonmonotonic_runtime_start, + GOMP_loop_nonmonotonic_runtime_next, + GOMP_loop_nonmonotonic_runtime_start, + GOMP_parallel_loop_maybe_nonmonotonic_runtime, + GOMP_parallel_loop_nonmonotonic_runtime): New aliases or wrapper + functions. + (gomp_parallel_loop_start): Pass NULL as taskgroup to + gomp_team_start. + * loop_ull.c: Include string.h. + (GOMP_loop_ull_runtime_next): Add ialias. + (GOMP_taskgroup_reduction_register): Add ialias_redirect. + (gomp_loop_ull_static_start, gomp_loop_ull_dynamic_start, + gomp_loop_ull_guided_start, gomp_loop_ull_ordered_static_start, + gomp_loop_ull_ordered_dynamic_start, + gomp_loop_ull_ordered_guided_start, + gomp_loop_ull_doacross_static_start, + gomp_loop_ull_doacross_dynamic_start, + gomp_loop_ull_doacross_guided_start): Adjust gomp_work_share_start + and gomp_doacross_ull_init callers. + (gomp_adjust_sched, GOMP_loop_ull_start, GOMP_loop_ull_ordered_start, + GOMP_loop_ull_doacross_start): New functions. + (GOMP_loop_ull_runtime_start, + GOMP_loop_ull_ordered_runtime_start, + GOMP_loop_ull_doacross_runtime_start): Mask off GFS_MONOTONIC bit. + (GOMP_loop_ull_maybe_nonmonotonic_runtime_next, + GOMP_loop_ull_maybe_nonmonotonic_runtime_start, + GOMP_loop_ull_nonmonotonic_runtime_next, + GOMP_loop_ull_nonmonotonic_runtime_start): Likewise. + * Makefile.am (libgomp_la_SOURCES): Add teams.c and affinity-fmt.c. + * omp.h.in (enum omp_sched_t): Add omp_sched_monotonic. + (omp_pause_resource_t, omp_depend_t): New typedefs. + (enum omp_lock_hint_t): Renamed to ... + (enum omp_sync_hint_t): ... this. Define omp_sync_hint_* + enumerators using numbers and omp_lock_hint_* as their aliases. + (omp_lock_hint_t): New typedef. Rename to ... + (omp_sync_hint_t): ... this. + (omp_init_lock_with_hint, omp_init_nest_lock_with_hint): Use + omp_sync_hint_t instead of omp_lock_hint_t. + (omp_pause_resource, omp_pause_resource_all, omp_set_affinity_format, + omp_get_affinity_format, omp_display_affinity, omp_capture_affinity): + Declare. + (omp_target_is_present, omp_target_disassociate_ptr): + Change first argument from void * to const void *. + (omp_target_memcpy, omp_target_memcpy_rect): Change second argument + from void * to const void *. + (omp_target_associate_ptr): Change first and second arguments from + void * to const void *. + * omp_lib.f90.in (omp_pause_resource_kind, omp_pause_soft, + omp_pause_hard): New parameters. + (omp_pause_resource, omp_pause_resource_all, omp_set_affinity_format, + omp_get_affinity_format, omp_display_affinity, omp_capture_affinity): + New interfaces. + * omp_lib.h.in (omp_pause_resource_kind, omp_pause_soft, + omp_pause_hard): New parameters. + (omp_pause_resource, omp_pause_resource_all, omp_set_affinity_format, + omp_get_affinity_format, omp_display_affinity, omp_capture_affinity): + New externals. + * ordered.c (gomp_doacross_init, gomp_doacross_ull_init): Add + EXTRA argument. If not needed to prepare array, if extra is 0, + clear ws->doacross, otherwise allocate just doacross structure and + extra payload. If array is needed, allocate also extra payload. + (GOMP_doacross_post, GOMP_doacross_wait, GOMP_doacross_ull_post, + GOMP_doacross_ull_wait): Handle doacross->array == NULL like + doacross == NULL. + * parallel.c (GOMP_parallel_start): Pass NULL as taskgroup to + gomp_team_start. + (GOMP_parallel): Likewise. Formatting fix. + (GOMP_parallel_reductions): New function. + (GOMP_cancellation_point): If taskgroup has workshare + flag set, check cancelled of prev taskgroup if any. + (GOMP_cancel): If taskgroup has workshare flag set, set cancelled + on prev taskgroup if any. + * sections.c: Include string.h. + (GOMP_taskgroup_reduction_register): Add ialias_redirect. + (GOMP_sections_start): Adjust gomp_work_share_start caller. + (GOMP_sections2_start): New function. + (GOMP_parallel_sections_start, GOMP_parallel_sections): + Pass NULL as taskgroup to gomp_team_start. + * single.c (GOMP_single_start, GOMP_single_copy_start): Adjust + gomp_work_share_start callers. + * target.c (GOMP_target_update_ext, GOMP_target_enter_exit_data): + If taskgroup has workshare flag set, check cancelled on prev + taskgroup if any. Guard all cancellation tests with + gomp_cancel_var test. + (omp_target_is_present, omp_target_disassociate_ptr): + Change ptr argument from void * to const void *. + (omp_target_memcpy): Change src argument from void * to const void *. + (omp_target_memcpy_rect): Likewise. + (omp_target_memcpy_rect_worker): Likewise. Use const char * casts + instead of char * where needed. + (omp_target_associate_ptr): Change host_ptr and device_ptr arguments + from void * to const void *. + (omp_pause_resource, omp_pause_resource_all): New functions. + * task.c (gomp_task_handle_depend): Handle new depend array format + in addition to the old. Handle mutexinoutset kinds the same as + inout for now, handle unspecified kinds. + (gomp_create_target_task): If taskgroup has workshare flag set, check + cancelled on prev taskgroup if any. Guard all cancellation tests with + gomp_cancel_var test. Handle new depend array format count in + addition to the old. + (GOMP_task): Likewise. Adjust function comment. + (gomp_task_run_pre): If taskgroup has workshare flag set, check + cancelled on prev taskgroup if any. Guard all cancellation tests with + gomp_cancel_var test. + (GOMP_taskwait_depend): New function. + (gomp_task_maybe_wait_for_dependencies): Handle new depend array + format in addition to the old. Handle mutexinoutset kinds the same as + inout for now, handle unspecified kinds. Fix a function comment typo. + (gomp_taskgroup_init): New function. + (GOMP_taskgroup_start): Use it. + (gomp_reduction_register, gomp_create_artificial_team, + GOMP_taskgroup_reduction_register, + GOMP_taskgroup_reduction_unregister, GOMP_task_reduction_remap, + gomp_parallel_reduction_register, + gomp_workshare_task_reduction_register, + gomp_workshare_taskgroup_start, + GOMP_workshare_task_reduction_unregister): New functions. + * taskloop.c (GOMP_taskloop): If taskgroup has workshare flag set, + check cancelled on prev taskgroup if any. Guard all cancellation + tests with gomp_cancel_var test. Handle GOMP_TASK_FLAG_REDUCTION flag + by calling GOMP_taskgroup_reduction_register. + * team.c (gomp_thread_attr): Remove comment. + (struct gomp_thread_start_data): Add handle field. + (gomp_thread_start): Call pthread_detach. + (gomp_new_team): Adjust gomp_init_work_share caller. + (gomp_free_pool_helper): Call pthread_detach. + (gomp_team_start): Add taskgroup argument, initialize implicit + tasks' taskgroup field to that. Don't call + pthread_attr_setdetachstate. Handle OMP_DISPLAY_AFFINITY env var. + (gomp_team_end): Determine nesting by thr->ts.level != 0 + rather than thr->ts.team != NULL. + (gomp_pause_pool_helper, gomp_pause_host): New functions. + * work.c (alloc_work_share): Use gomp_aligned_alloc instead of + gomp_malloc if GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC is defined. + (gomp_init_work_share): Change ORDERED argument from bool to size_t, + if more than 1 allocate also extra payload at the end of array. Never + keep ordered_team_ids NULL, set it to inline_ordered_team_ids instead. + (gomp_work_share_start): Change ORDERED argument from bool to size_t, + return true instead of ws. + * Makefile.in: Regenerated. + * configure: Regenerated. + * config.h.in: Regenerated. + * testsuite/libgomp.c/cancel-for-2.c (foo): Use cancel modifier + in some cases. + * testsuite/libgomp.c-c++-common/cancel-parallel-1.c: New test. + * testsuite/libgomp.c-c++-common/cancel-taskgroup-3.c: New test. + * testsuite/libgomp.c-c++-common/depend-iterator-1.c: New test. + * testsuite/libgomp.c-c++-common/depend-iterator-2.c: New test. + * testsuite/libgomp.c-c++-common/depend-mutexinout-1.c: New test. + * testsuite/libgomp.c-c++-common/depend-mutexinout-2.c: New test. + * testsuite/libgomp.c-c++-common/depobj-1.c: New test. + * testsuite/libgomp.c-c++-common/display-affinity-1.c: New test. + * testsuite/libgomp.c-c++-common/for-10.c: New test. + * testsuite/libgomp.c-c++-common/for-11.c: New test. + * testsuite/libgomp.c-c++-common/for-12.c: New test. + * testsuite/libgomp.c-c++-common/for-13.c: New test. + * testsuite/libgomp.c-c++-common/for-14.c: New test. + * testsuite/libgomp.c-c++-common/for-15.c: New test. + * testsuite/libgomp.c-c++-common/for-2.h: If CONDNE macro is defined, + define a different N(test), don't define N(f0) to N(f14), but instead + define N(f20) to N(f34) using != comparisons. + * testsuite/libgomp.c-c++-common/for-7.c: New test. + * testsuite/libgomp.c-c++-common/for-8.c: New test. + * testsuite/libgomp.c-c++-common/for-9.c: New test. + * testsuite/libgomp.c-c++-common/master-combined-1.c: New test. + * testsuite/libgomp.c-c++-common/pause-1.c: New test. + * testsuite/libgomp.c-c++-common/pause-2.c: New test. + * testsuite/libgomp.c-c++-common/pr66199-10.c: New test. + * testsuite/libgomp.c-c++-common/pr66199-11.c: New test. + * testsuite/libgomp.c-c++-common/pr66199-12.c: New test. + * testsuite/libgomp.c-c++-common/pr66199-13.c: New test. + * testsuite/libgomp.c-c++-common/pr66199-14.c: New test. + * testsuite/libgomp.c-c++-common/simd-1.c: New test. + * testsuite/libgomp.c-c++-common/taskloop-reduction-1.c: New test. + * testsuite/libgomp.c-c++-common/taskloop-reduction-2.c: New test. + * testsuite/libgomp.c-c++-common/taskloop-reduction-3.c: New test. + * testsuite/libgomp.c-c++-common/taskloop-reduction-4.c: New test. + * testsuite/libgomp.c-c++-common/task-reduction-11.c: New test. + * testsuite/libgomp.c-c++-common/task-reduction-12.c: New test. + * testsuite/libgomp.c-c++-common/task-reduction-1.c: New test. + * testsuite/libgomp.c-c++-common/task-reduction-2.c: New test. + * testsuite/libgomp.c-c++-common/task-reduction-3.c: New test. + * testsuite/libgomp.c-c++-common/task-reduction-4.c: New test. + * testsuite/libgomp.c-c++-common/task-reduction-5.c: New test. + * testsuite/libgomp.c-c++-common/task-reduction-6.c: New test. + * testsuite/libgomp.c-c++-common/task-reduction-7.c: New test. + * testsuite/libgomp.c-c++-common/task-reduction-8.c: New test. + * testsuite/libgomp.c-c++-common/task-reduction-9.c: New test. + * testsuite/libgomp.c-c++-common/taskwait-depend-1.c: New test. + * testsuite/libgomp.c++/depend-1.C: New test. + * testsuite/libgomp.c++/depend-iterator-1.C: New test. + * testsuite/libgomp.c++/depobj-1.C: New test. + * testsuite/libgomp.c++/for-16.C: New test. + * testsuite/libgomp.c++/for-21.C: New test. + * testsuite/libgomp.c++/for-22.C: New test. + * testsuite/libgomp.c++/for-23.C: New test. + * testsuite/libgomp.c++/for-24.C: New test. + * testsuite/libgomp.c++/for-25.C: New test. + * testsuite/libgomp.c++/for-26.C: New test. + * testsuite/libgomp.c++/taskloop-reduction-1.C: New test. + * testsuite/libgomp.c++/taskloop-reduction-2.C: New test. + * testsuite/libgomp.c++/taskloop-reduction-3.C: New test. + * testsuite/libgomp.c++/taskloop-reduction-4.C: New test. + * testsuite/libgomp.c++/task-reduction-10.C: New test. + * testsuite/libgomp.c++/task-reduction-11.C: New test. + * testsuite/libgomp.c++/task-reduction-12.C: New test. + * testsuite/libgomp.c++/task-reduction-13.C: New test. + * testsuite/libgomp.c++/task-reduction-14.C: New test. + * testsuite/libgomp.c++/task-reduction-15.C: New test. + * testsuite/libgomp.c++/task-reduction-16.C: New test. + * testsuite/libgomp.c++/task-reduction-17.C: New test. + * testsuite/libgomp.c++/task-reduction-18.C: New test. + * testsuite/libgomp.c++/task-reduction-19.C: New test. + * testsuite/libgomp.c/task-reduction-1.c: New test. + * testsuite/libgomp.c++/task-reduction-1.C: New test. + * testsuite/libgomp.c/task-reduction-2.c: New test. + * testsuite/libgomp.c++/task-reduction-2.C: New test. + * testsuite/libgomp.c++/task-reduction-3.C: New test. + * testsuite/libgomp.c++/task-reduction-4.C: New test. + * testsuite/libgomp.c++/task-reduction-5.C: New test. + * testsuite/libgomp.c++/task-reduction-6.C: New test. + * testsuite/libgomp.c++/task-reduction-7.C: New test. + * testsuite/libgomp.c++/task-reduction-8.C: New test. + * testsuite/libgomp.c++/task-reduction-9.C: New test. + * testsuite/libgomp.c/teams-1.c: New test. + * testsuite/libgomp.c/teams-2.c: New test. + * testsuite/libgomp.c/thread-limit-4.c: New test. + * testsuite/libgomp.c/thread-limit-5.c: New test. + * testsuite/libgomp.fortran/display-affinity-1.f90: New test. + 2018-11-06 Chung-Lin Tang <cltang@codesourcery.com> * oacc-mem.c (memcpy_tofrom_device): New function, combined from diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am index 604085c3d72..062fded0805 100644 --- a/libgomp/Makefile.am +++ b/libgomp/Makefile.am @@ -64,7 +64,8 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \ parallel.c sections.c single.c task.c team.c work.c lock.c mutex.c \ proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c target.c \ splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c oacc-init.c \ - oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c + oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \ + affinity-fmt.c teams.c include $(top_srcdir)/plugin/Makefrag.am diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in index dfd08792715..98499b8dc8f 100644 --- a/libgomp/Makefile.in +++ b/libgomp/Makefile.in @@ -216,7 +216,8 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \ sem.lo bar.lo ptrlock.lo time.lo fortran.lo affinity.lo \ target.lo splay-tree.lo libgomp-plugin.lo oacc-parallel.lo \ oacc-host.lo oacc-init.lo oacc-mem.lo oacc-async.lo \ - oacc-plugin.lo oacc-cuda.lo priority_queue.lo $(am__objects_1) + oacc-plugin.lo oacc-cuda.lo priority_queue.lo affinity-fmt.lo \ + teams.lo $(am__objects_1) libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) @@ -549,7 +550,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \ affinity.c target.c splay-tree.c libgomp-plugin.c \ oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \ oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \ - $(am__append_3) + affinity-fmt.c teams.c $(am__append_3) # Nvidia PTX OpenACC plugin. @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION) @@ -724,6 +725,7 @@ distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/affinity.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/affinity-fmt.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alloc.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/atomic.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bar.Plo@am__quote@ @@ -762,6 +764,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/team.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/teams.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/time.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/work.Plo@am__quote@ diff --git a/libgomp/affinity-fmt.c b/libgomp/affinity-fmt.c new file mode 100644 index 00000000000..08937b69d4e --- /dev/null +++ b/libgomp/affinity-fmt.c @@ -0,0 +1,481 @@ +/* Copyright (C) 2018 Free Software Foundation, Inc. + Contributed by Jakub Jelinek <jakub@redhat.com>. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "libgomp.h" +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_UNAME +#include <sys/utsname.h> +#endif + +void +gomp_set_affinity_format (const char *format, size_t len) +{ + if (len < gomp_affinity_format_len) + memcpy (gomp_affinity_format_var, format, len); + else + { + char *p; + if (gomp_affinity_format_len) + p = gomp_realloc (gomp_affinity_format_var, len + 1); + else + p = gomp_malloc (len + 1); + memcpy (p, format, len); + gomp_affinity_format_var = p; + gomp_affinity_format_len = len + 1; + } + gomp_affinity_format_var[len] = '\0'; +} + +void +omp_set_affinity_format (const char *format) +{ + gomp_set_affinity_format (format, strlen (format)); +} + +size_t +omp_get_affinity_format (char *buffer, size_t size) +{ + size_t len = strlen (gomp_affinity_format_var); + if (size) + { + if (len < size) + memcpy (buffer, gomp_affinity_format_var, len + 1); + else + { + memcpy (buffer, gomp_affinity_format_var, size - 1); + buffer[size - 1] = '\0'; + } + } + return len; +} + +void +gomp_display_string (char *buffer, size_t size, size_t *ret, + const char *str, size_t len) +{ + size_t r = *ret; + if (size && r < size) + { + size_t l = len; + if (size - r < len) + l = size - r; + memcpy (buffer + r, str, l); + } + *ret += len; + if (__builtin_expect (r > *ret, 0)) + gomp_fatal ("overflow in omp_capture_affinity"); +} + +static void +gomp_display_repeat (char *buffer, size_t size, size_t *ret, + char c, size_t len) +{ + size_t r = *ret; + if (size && r < size) + { + size_t l = len; + if (size - r < len) + l = size - r; + memset (buffer + r, c, l); + } + *ret += len; + if (__builtin_expect (r > *ret, 0)) + gomp_fatal ("overflow in omp_capture_affinity"); +} + +static void +gomp_display_num (char *buffer, size_t size, size_t *ret, + bool zero, bool right, size_t sz, char *buf) +{ + size_t l = strlen (buf); + if (sz == (size_t) -1 || l >= sz) + { + gomp_display_string (buffer, size, ret, buf, l); + return; + } + if (zero) + { + if (buf[0] == '-') + gomp_display_string (buffer, size, ret, buf, 1); + else if (buf[0] == '0' && buf[1] == 'x') + gomp_display_string (buffer, size, ret, buf, 2); + gomp_display_repeat (buffer, size, ret, '0', sz - l); + if (buf[0] == '-') + gomp_display_string (buffer, size, ret, buf + 1, l - 1); + else if (buf[0] == '0' && buf[1] == 'x') + gomp_display_string (buffer, size, ret, buf + 2, l - 2); + else + gomp_display_string (buffer, size, ret, buf, l); + } + else if (right) + { + gomp_display_repeat (buffer, size, ret, ' ', sz - l); + gomp_display_string (buffer, size, ret, buf, l); + } + else + { + gomp_display_string (buffer, size, ret, buf, l); + gomp_display_repeat (buffer, size, ret, ' ', sz - l); + } +} + +static void +gomp_display_int (char *buffer, size_t size, size_t *ret, + bool zero, bool right, size_t sz, int num) +{ + char buf[3 * sizeof (int) + 2]; + sprintf (buf, "%d", num); + gomp_display_num (buffer, size, ret, zero, right, sz, buf); +} + +static void +gomp_display_string_len (char *buffer, size_t size, size_t *ret, + bool right, size_t sz, char *str, size_t len) +{ + if (sz == (size_t) -1 || len >= sz) + { + gomp_display_string (buffer, size, ret, str, len); + return; + } + + if (right) + { + gomp_display_repeat (buffer, size, ret, ' ', sz - len); + gomp_display_string (buffer, size, ret, str, len); + } + else + { + gomp_display_string (buffer, size, ret, str, len); + gomp_display_repeat (buffer, size, ret, ' ', sz - len); + } +} + +static void +gomp_display_hostname (char *buffer, size_t size, size_t *ret, + bool right, size_t sz) +{ +#ifdef HAVE_GETHOSTNAME + { + char buf[256]; + char *b = buf; + size_t len = 256; + do + { + b[len - 1] = '\0'; + if (gethostname (b, len - 1) == 0) + { + size_t l = strlen (b); + if (l < len - 1) + { + gomp_display_string_len (buffer, size, ret, + right, sz, b, l); + if (b != buf) + free (b); + return; + } + } + if (len == 1048576) + break; + len = len * 2; + if (len == 512) + b = gomp_malloc (len); + else + b = gomp_realloc (b, len); + } + while (1); + if (b != buf) + free (b); + } +#endif +#ifdef HAVE_UNAME + { + struct utsname buf; + if (uname (&buf) == 0) + { + gomp_display_string_len (buffer, size, ret, right, sz, + buf.nodename, strlen (buf.nodename)); + return; + } + } +#endif + gomp_display_string_len (buffer, size, ret, right, sz, "node", 4); +} + +struct affinity_types_struct { + char long_str[18]; + char long_len; + char short_c; }; + +static struct affinity_types_struct affinity_types[] = +{ +#define AFFINITY_TYPE(l, s) \ + { #l, sizeof (#l) - 1, s } + AFFINITY_TYPE (team_num, 't'), + AFFINITY_TYPE (num_teams, 'T'), + AFFINITY_TYPE (nesting_level, 'L'), + AFFINITY_TYPE (thread_num, 'n'), + AFFINITY_TYPE (num_threads, 'N'), + AFFINITY_TYPE (ancestor_tnum, 'a'), + AFFINITY_TYPE (host, 'H'), + AFFINITY_TYPE (process_id, 'P'), + AFFINITY_TYPE (native_thread_id, 'i'), + AFFINITY_TYPE (thread_affinity, 'A') +#undef AFFINITY_TYPE +}; + +size_t +gomp_display_affinity (char *buffer, size_t size, + const char *format, gomp_thread_handle handle, + struct gomp_team_state *ts, unsigned int place) +{ + size_t ret = 0; + do + { + const char *p = strchr (format, '%'); + bool zero = false; + bool right = false; + size_t sz = -1; + char c; + int val; + if (p == NULL) + p = strchr (format, '\0'); + if (p != format) + gomp_display_string (buffer, size, &ret, + format, p - format); + if (*p == '\0') + break; + p++; + if (*p == '%') + { + gomp_display_string (buffer, size, &ret, "%", 1); + format = p + 1; + continue; + } + if (*p == '0') + { + zero = true; + p++; + if (*p != '.') + gomp_fatal ("leading zero not followed by dot in affinity format"); + } + if (*p == '.') + { + right = true; + p++; + } + if (*p >= '1' && *p <= '9') + { + char *end; + sz = strtoul (p, &end, 10); + p = end; + } + else if (zero || right) + gomp_fatal ("leading zero or right justification in affinity format " + "requires size"); + c = *p; + if (c == '{') + { + int i; + for (i = 0; + i < sizeof (affinity_types) / sizeof (affinity_types[0]); ++i) + if (strncmp (p + 1, affinity_types[i].long_str, + affinity_types[i].long_len) == 0 + && p[affinity_types[i].long_len + 1] == '}') + { + c = affinity_types[i].short_c; + p += affinity_types[i].long_len + 1; + break; + } + if (c == '{') + { + char *q = strchr (p + 1, '}'); + if (q) + gomp_fatal ("unsupported long type name '%.*s' in affinity " + "format", (int) (q - (p + 1)), p + 1); + else + gomp_fatal ("unterminated long type name '%s' in affinity " + "format", p + 1); + } + } + switch (c) + { + case 't': + val = omp_get_team_num (); + goto do_int; + case 'T': + val = omp_get_num_teams (); + goto do_int; + case 'L': + val = ts->level; + goto do_int; + case 'n': + val = ts->team_id; + goto do_int; + case 'N': + val = ts->team ? ts->team->nthreads : 1; + goto do_int; + case 'a': + val = ts->team ? ts->team->prev_ts.team_id : -1; + goto do_int; + case 'H': + gomp_display_hostname (buffer, size, &ret, right, sz); + break; + case 'P': +#ifdef HAVE_GETPID + val = getpid (); +#else + val = 0; +#endif + goto do_int; + case 'i': +#if defined(LIBGOMP_USE_PTHREADS) && defined(__GNUC__) + /* Handle integral pthread_t. */ + if (__builtin_classify_type (handle) == 1) + { + char buf[3 * (sizeof (handle) + sizeof (int)) + 4]; + + if (sizeof (handle) == sizeof (long)) + sprintf (buf, "0x%lx", (long) handle); + else if (sizeof (handle) == sizeof (long long)) + sprintf (buf, "0x%llx", (long long) handle); + else + sprintf (buf, "0x%x", (int) handle); + gomp_display_num (buffer, size, &ret, zero, right, sz, buf); + break; + } + /* And pointer pthread_t. */ + else if (__builtin_classify_type (handle) == 5) + { + char buf[3 * (sizeof (uintptr_t) + sizeof (int)) + 4]; + + if (sizeof (uintptr_t) == sizeof (long)) + sprintf (buf, "0x%lx", (long) (uintptr_t) handle); + else if (sizeof (uintptr_t) == sizeof (long long)) + sprintf (buf, "0x%llx", (long long) (uintptr_t) handle); + else + sprintf (buf, "0x%x", (int) (uintptr_t) handle); + gomp_display_num (buffer, size, &ret, zero, right, sz, buf); + break; + } +#endif + val = 0; + goto do_int; + case 'A': + if (sz == (size_t) -1) + gomp_display_affinity_place (buffer, size, &ret, + place - 1); + else if (right) + { + size_t len = 0; + gomp_display_affinity_place (NULL, 0, &len, place - 1); + if (len < sz) + gomp_display_repeat (buffer, size, &ret, ' ', sz - len); + gomp_display_affinity_place (buffer, size, &ret, place - 1); + } + else + { + size_t start = ret; + gomp_display_affinity_place (buffer, size, &ret, place - 1); + if (ret - start < sz) + gomp_display_repeat (buffer, size, &ret, ' ', sz - (ret - start)); + } + break; + do_int: + gomp_display_int (buffer, size, &ret, zero, right, sz, val); + break; + default: + gomp_fatal ("unsupported type %c in affinity format", c); + } + format = p + 1; + } + while (1); + return ret; +} + +size_t +omp_capture_affinity (char *buffer, size_t size, const char *format) +{ + struct gomp_thread *thr = gomp_thread (); + size_t ret + = gomp_display_affinity (buffer, size, + format && *format + ? format : gomp_affinity_format_var, + gomp_thread_self (), &thr->ts, thr->place); + if (size) + { + if (ret >= size) + buffer[size - 1] = '\0'; + else + buffer[ret] = '\0'; + } + return ret; +} +ialias (omp_capture_affinity) + +void +omp_display_affinity (const char *format) +{ + char buf[512]; + char *b; + size_t ret = ialias_call (omp_capture_affinity) (buf, sizeof buf, format); + if (ret < sizeof buf) + { + buf[ret] = '\n'; + fwrite (buf, 1, ret + 1, stderr); + return; + } + b = gomp_malloc (ret + 1); + ialias_call (omp_capture_affinity) (b, ret + 1, format); + b[ret] = '\n'; + fwrite (b, 1, ret + 1, stderr); + free (b); +} + +void +gomp_display_affinity_thread (gomp_thread_handle handle, + struct gomp_team_state *ts, unsigned int place) +{ + char buf[512]; + char *b; + size_t ret = gomp_display_affinity (buf, sizeof buf, gomp_affinity_format_var, + handle, ts, place); + if (ret < sizeof buf) + { + buf[ret] = '\n'; + fwrite (buf, 1, ret + 1, stderr); + return; + } + b = gomp_malloc (ret + 1); + gomp_display_affinity (b, ret + 1, gomp_affinity_format_var, + handle, ts, place); + b[ret] = '\n'; + fwrite (b, 1, ret + 1, stderr); + free (b); +} diff --git a/libgomp/affinity.c b/libgomp/affinity.c index 3f98e56c002..99a45dcd3a0 100644 --- a/libgomp/affinity.c +++ b/libgomp/affinity.c @@ -138,5 +138,18 @@ gomp_get_place_proc_ids_8 (int place_num, int64_t *ids) (void) ids; } +void +gomp_display_affinity_place (char *buffer, size_t size, size_t *ret, + int place) +{ + cpu_set_t *cpusetp; + char buf[sizeof (long) * 3 + 4]; + if (gomp_available_cpus > 1) + sprintf (buf, "0-%lu", gomp_available_cpus - 1); + else + strcpy (buf, "0"); + gomp_display_string (buffer, size, ret, buf, strlen (buf)); +} + ialias(omp_get_place_num_procs) ialias(omp_get_place_proc_ids) diff --git a/libgomp/alloc.c b/libgomp/alloc.c index 1bf40423579..296f1af1eaa 100644 --- a/libgomp/alloc.c +++ b/libgomp/alloc.c @@ -57,3 +57,50 @@ gomp_realloc (void *old, size_t size) gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size); return ret; } + +void * +gomp_aligned_alloc (size_t al, size_t size) +{ + void *ret; + if (al < sizeof (void *)) + al = sizeof (void *); +#ifdef HAVE_ALIGNED_ALLOC + ret = aligned_alloc (al, size); +#elif defined(HAVE__ALIGNED_MALLOC) + ret = _aligned_malloc (size, al); +#elif defined(HAVE_POSIX_MEMALIGN) + if (posix_memalign (&ret, al, size) != 0) + ret = NULL; +#elif defined(HAVE_MEMALIGN) + { + extern void *memalign (size_t, size_t); + ret = memalign (al, size); + } +#else + ret = NULL; + if ((al & (al - 1)) == 0 && size) + { + void *p = malloc (size + al); + if (p) + { + void *ap = (void *) (((uintptr_t) p + al) & -al); + ((void **) ap)[-1] = p; + ret = ap; + } + } +#endif + if (ret == NULL) + gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size); + return ret; +} + +void +gomp_aligned_free (void *ptr) +{ +#ifdef GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC + free (ptr); +#else + if (ptr) + free (((void **) ptr)[-1]); +#endif +} diff --git a/libgomp/config.h.in b/libgomp/config.h.in index e7bc4d97374..52f4ed44412 100644 --- a/libgomp/config.h.in +++ b/libgomp/config.h.in @@ -1,5 +1,8 @@ /* config.h.in. Generated from configure.ac by autoheader. */ +/* Define to 1 if you have the `aligned_alloc' function. */ +#undef HAVE_ALIGNED_ALLOC + /* Define to 1 if the target assembler supports .symver directive. */ #undef HAVE_AS_SYMVER_DIRECTIVE @@ -33,9 +36,15 @@ /* Define to 1 if you have the `getgid' function. */ #undef HAVE_GETGID +/* Define if gethostname is supported. */ +#undef HAVE_GETHOSTNAME + /* Define to 1 if you have the `getloadavg' function. */ #undef HAVE_GETLOADAVG +/* Define if getpid is supported. */ +#undef HAVE_GETPID + /* Define to 1 if you have the `getuid' function. */ #undef HAVE_GETUID @@ -45,9 +54,15 @@ /* Define to 1 if you have the `dl' library (-ldl). */ #undef HAVE_LIBDL +/* Define to 1 if you have the `memalign' function. */ +#undef HAVE_MEMALIGN + /* Define to 1 if you have the <memory.h> header file. */ #undef HAVE_MEMORY_H +/* Define to 1 if you have the `posix_memalign' function. */ +#undef HAVE_POSIX_MEMALIGN + /* Define if pthread_{,attr_}{g,s}etaffinity_np is supported. */ #undef HAVE_PTHREAD_AFFINITY_NP @@ -103,9 +118,15 @@ /* Define to 1 if the target supports thread-local storage. */ #undef HAVE_TLS +/* Define if uname is supported and struct utsname has nodename field. */ +#undef HAVE_UNAME + /* Define to 1 if you have the <unistd.h> header file. */ #undef HAVE_UNISTD_H +/* Define to 1 if you have the `_aligned_malloc' function. */ +#undef HAVE__ALIGNED_MALLOC + /* Define to 1 if you have the `__secure_getenv' function. */ #undef HAVE___SECURE_GETENV diff --git a/libgomp/config/linux/affinity.c b/libgomp/config/linux/affinity.c index a2aefb8977c..d2dcc1ea852 100644 --- a/libgomp/config/linux/affinity.c +++ b/libgomp/config/linux/affinity.c @@ -396,6 +396,56 @@ gomp_get_place_proc_ids_8 (int place_num, int64_t *ids) *ids++ = i; } +void +gomp_display_affinity_place (char *buffer, size_t size, size_t *ret, + int place) +{ + cpu_set_t *cpusetp; + char buf[sizeof (long) * 3 + 4]; + if (place >= 0 && place < gomp_places_list_len) + cpusetp = (cpu_set_t *) gomp_places_list[place]; + else if (gomp_cpusetp) + cpusetp = gomp_cpusetp; + else + { + if (gomp_available_cpus > 1) + sprintf (buf, "0-%lu", gomp_available_cpus - 1); + else + strcpy (buf, "0"); + gomp_display_string (buffer, size, ret, buf, strlen (buf)); + return; + } + + unsigned long i, max = 8 * gomp_cpuset_size, start; + bool prev_set = false; + start = max; + for (i = 0; i <= max; i++) + { + bool this_set; + if (i == max) + this_set = false; + else + this_set = CPU_ISSET_S (i, gomp_cpuset_size, cpusetp); + if (this_set != prev_set) + { + prev_set = this_set; + if (this_set) + { + char *p = buf; + if (start != max) + *p++ = ','; + sprintf (p, "%lu", i); + start = i; + } + else if (i == start + 1) + continue; + else + sprintf (buf, "-%lu", i - 1); + gomp_display_string (buffer, size, ret, buf, strlen (buf)); + } + } +} + ialias(omp_get_place_num_procs) ialias(omp_get_place_proc_ids) diff --git a/libgomp/config/nvptx/icv-device.c b/libgomp/config/nvptx/icv-device.c index 562db95f328..8cb464bb61c 100644 --- a/libgomp/config/nvptx/icv-device.c +++ b/libgomp/config/nvptx/icv-device.c @@ -46,20 +46,6 @@ omp_get_num_devices (void) } int -omp_get_num_teams (void) -{ - return gomp_num_teams_var + 1; -} - -int -omp_get_team_num (void) -{ - int ctaid; - asm ("mov.u32 %0, %%ctaid.x;" : "=r" (ctaid)); - return ctaid; -} - -int omp_is_initial_device (void) { /* NVPTX is an accelerator-only target. */ @@ -69,6 +55,4 @@ omp_is_initial_device (void) ialias (omp_set_default_device) ialias (omp_get_default_device) ialias (omp_get_num_devices) -ialias (omp_get_num_teams) -ialias (omp_get_team_num) ialias (omp_is_initial_device) diff --git a/libgomp/config/nvptx/target.c b/libgomp/config/nvptx/target.c index 64004acb422..3878227a27f 100644 --- a/libgomp/config/nvptx/target.c +++ b/libgomp/config/nvptx/target.c @@ -47,3 +47,21 @@ GOMP_teams (unsigned int num_teams, unsigned int thread_limit) } gomp_num_teams_var = num_teams - 1; } + +int +omp_pause_resource (omp_pause_resource_t kind, int device_num) +{ + (void) kind; + (void) device_num; + return -1; +} + +int +omp_pause_resource_all (omp_pause_resource_t kind) +{ + (void) kind; + return -1; +} + +ialias (omp_pause_resource) +ialias (omp_pause_resource_all) diff --git a/libgomp/config/nvptx/team.c b/libgomp/config/nvptx/team.c index 34059d389f2..8b0a0903641 100644 --- a/libgomp/config/nvptx/team.c +++ b/libgomp/config/nvptx/team.c @@ -116,7 +116,8 @@ gomp_thread_start (struct gomp_thread_pool *pool) void gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, - unsigned flags, struct gomp_team *team) + unsigned flags, struct gomp_team *team, + struct gomp_taskgroup *taskgroup) { struct gomp_thread *thr, *nthr; struct gomp_task *task; @@ -147,6 +148,7 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, nthreads_var = icv->nthreads_var; gomp_init_task (thr->task, task, icv); team->implicit_task[0].icv.nthreads_var = nthreads_var; + team->implicit_task[0].taskgroup = taskgroup; if (nthreads == 1) return; @@ -166,6 +168,7 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, nthr->task = &team->implicit_task[i]; gomp_init_task (nthr->task, task, icv); team->implicit_task[i].icv.nthreads_var = nthreads_var; + team->implicit_task[i].taskgroup = taskgroup; nthr->fn = fn; nthr->data = data; team->ordered_release[i] = &nthr->release; @@ -174,5 +177,11 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, gomp_simple_barrier_wait (&pool->threads_dock); } +int +gomp_pause_host (void) +{ + return -1; +} + #include "../../team.c" #endif diff --git a/libgomp/config/nvptx/teams.c b/libgomp/config/nvptx/teams.c new file mode 100644 index 00000000000..9bed0320fb8 --- /dev/null +++ b/libgomp/config/nvptx/teams.c @@ -0,0 +1,57 @@ +/* Copyright (C) 2015-2018 Free Software Foundation, Inc. + Contributed by Alexander Monakov <amonakov@ispras.ru> + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This file defines OpenMP API entry points that accelerator targets are + expected to replace. */ + +#include "libgomp.h" + +void +GOMP_teams_reg (void (*fn) (void *), void *data, unsigned int num_teams, + unsigned int thread_limit, unsigned int flags) +{ + (void) fn; + (void) data; + (void) flags; + (void) num_teams; + (void) thread_limit; +} + +int +omp_get_num_teams (void) +{ + return gomp_num_teams_var + 1; +} + +int +omp_get_team_num (void) +{ + int ctaid; + asm ("mov.u32 %0, %%ctaid.x;" : "=r" (ctaid)); + return ctaid; +} + +ialias (omp_get_num_teams) +ialias (omp_get_team_num) diff --git a/libgomp/configure b/libgomp/configure index e56fa21cd94..013e37c0ba5 100755 --- a/libgomp/configure +++ b/libgomp/configure @@ -15812,6 +15812,19 @@ _ACEOF fi done +for ac_func in aligned_alloc posix_memalign memalign _aligned_malloc +do : + as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` +ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" +eval as_val=\$$as_ac_var + if test "x$as_val" = x""yes; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + +fi +done + # Check for broken semaphore implementation on darwin. # sem_init returns: sem_init error: Function not implemented. @@ -16026,6 +16039,72 @@ fi fi +# Check for uname. +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <string.h> + #include <stdlib.h> + #include <sys/utsname.h> +int +main () +{ +struct utsname buf; + volatile size_t len = 0; + if (!uname (buf)) + len = strlen (buf.nodename); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +$as_echo "#define HAVE_UNAME 1" >>confdefs.h + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +# Check for gethostname. +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <unistd.h> +int +main () +{ + + char buf[256]; + if (gethostname (buf, sizeof (buf) - 1) == 0) + buf[255] = '\0'; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +$as_echo "#define HAVE_GETHOSTNAME 1" >>confdefs.h + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +# Check for getpid. +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <unistd.h> +int +main () +{ +int pid = getpid (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +$as_echo "#define HAVE_GETPID 1" >>confdefs.h + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + # See if we support thread-local storage. diff --git a/libgomp/configure.ac b/libgomp/configure.ac index e94edc76c29..f75c6226566 100644 --- a/libgomp/configure.ac +++ b/libgomp/configure.ac @@ -218,6 +218,7 @@ m4_include([plugin/configfrag.ac]) # Check for functions needed. AC_CHECK_FUNCS(getloadavg clock_gettime strtoull) +AC_CHECK_FUNCS(aligned_alloc posix_memalign memalign _aligned_malloc) # Check for broken semaphore implementation on darwin. # sem_init returns: sem_init error: Function not implemented. @@ -265,6 +266,41 @@ if test $ac_cv_func_clock_gettime = no; then [Define to 1 if you have the `clock_gettime' function.])]) fi +# Check for uname. +AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM( + [#include <string.h> + #include <stdlib.h> + #include <sys/utsname.h>], + [struct utsname buf; + volatile size_t len = 0; + if (!uname (buf)) + len = strlen (buf.nodename);])], + AC_DEFINE(HAVE_UNAME, 1, +[ Define if uname is supported and struct utsname has nodename field.])) + +# Check for gethostname. +AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM( + [#include <unistd.h>], + [ +changequote(,)dnl + char buf[256]; + if (gethostname (buf, sizeof (buf) - 1) == 0) + buf[255] = '\0'; +changequote([,])dnl + ])], + AC_DEFINE(HAVE_GETHOSTNAME, 1, +[ Define if gethostname is supported.])) + +# Check for getpid. +AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM( + [#include <unistd.h>], + [int pid = getpid ();])], + AC_DEFINE(HAVE_GETPID, 1, +[ Define if getpid is supported.])) + # See if we support thread-local storage. GCC_CHECK_TLS diff --git a/libgomp/configure.tgt b/libgomp/configure.tgt index 74d95a570c7..b88bf72fe3d 100644 --- a/libgomp/configure.tgt +++ b/libgomp/configure.tgt @@ -18,7 +18,7 @@ if test $gcc_cv_have_tls = yes ; then ;; *-*-linux* | *-*-gnu*) - XCFLAGS="${XCFLAGS} -ftls-model=initial-exec" + XCFLAGS="${XCFLAGS} -ftls-model=initial-exec -DUSING_INITIAL_EXEC_TLS" ;; *-*-rtems*) diff --git a/libgomp/env.c b/libgomp/env.c index 18c90bb09d0..2c9a609d607 100644 --- a/libgomp/env.c +++ b/libgomp/env.c @@ -88,6 +88,9 @@ void **gomp_places_list; unsigned long gomp_places_list_len; int gomp_debug_var; unsigned int gomp_num_teams_var; +bool gomp_display_affinity_var; +char *gomp_affinity_format_var = "level %L thread %i affinity %A"; +size_t gomp_affinity_format_len; char *goacc_device_type; int goacc_device_num; int goacc_default_dims[GOMP_DIM_MAX]; @@ -101,6 +104,7 @@ parse_schedule (void) { char *env, *end; unsigned long value; + int monotonic = 0; env = getenv ("OMP_SCHEDULE"); if (env == NULL) @@ -108,6 +112,26 @@ parse_schedule (void) while (isspace ((unsigned char) *env)) ++env; + if (strncasecmp (env, "monotonic", 9) == 0) + { + monotonic = 1; + env += 9; + } + else if (strncasecmp (env, "nonmonotonic", 12) == 0) + { + monotonic = -1; + env += 12; + } + if (monotonic) + { + while (isspace ((unsigned char) *env)) + ++env; + if (*env != ':') + goto unknown; + ++env; + while (isspace ((unsigned char) *env)) + ++env; + } if (strncasecmp (env, "static", 6) == 0) { gomp_global_icv.run_sched_var = GFS_STATIC; @@ -131,12 +155,16 @@ parse_schedule (void) else goto unknown; + if (monotonic == 1 + || (monotonic == 0 && gomp_global_icv.run_sched_var == GFS_STATIC)) + gomp_global_icv.run_sched_var |= GFS_MONOTONIC; + while (isspace ((unsigned char) *env)) ++env; if (*env == '\0') { gomp_global_icv.run_sched_chunk_size - = gomp_global_icv.run_sched_var != GFS_STATIC; + = (gomp_global_icv.run_sched_var & ~GFS_MONOTONIC) != GFS_STATIC; return; } if (*env++ != ',') @@ -159,7 +187,8 @@ parse_schedule (void) if ((int)value != value) goto invalid; - if (value == 0 && gomp_global_icv.run_sched_var != GFS_STATIC) + if (value == 0 + && (gomp_global_icv.run_sched_var & ~GFS_MONOTONIC) != GFS_STATIC) value = 1; gomp_global_icv.run_sched_chunk_size = value; return; @@ -1150,19 +1179,34 @@ handle_omp_display_env (unsigned long stacksize, int wait_policy) fputs ("'\n", stderr); fprintf (stderr, " OMP_SCHEDULE = '"); - switch (gomp_global_icv.run_sched_var) + if ((gomp_global_icv.run_sched_var & GFS_MONOTONIC)) + { + if (gomp_global_icv.run_sched_var != (GFS_MONOTONIC | GFS_STATIC)) + fputs ("MONOTONIC:", stderr); + } + else if (gomp_global_icv.run_sched_var == GFS_STATIC) + fputs ("NONMONOTONIC:", stderr); + switch (gomp_global_icv.run_sched_var & ~GFS_MONOTONIC) { case GFS_RUNTIME: fputs ("RUNTIME", stderr); + if (gomp_global_icv.run_sched_chunk_size != 1) + fprintf (stderr, ",%d", gomp_global_icv.run_sched_chunk_size); break; case GFS_STATIC: fputs ("STATIC", stderr); + if (gomp_global_icv.run_sched_chunk_size != 0) + fprintf (stderr, ",%d", gomp_global_icv.run_sched_chunk_size); break; case GFS_DYNAMIC: fputs ("DYNAMIC", stderr); + if (gomp_global_icv.run_sched_chunk_size != 1) + fprintf (stderr, ",%d", gomp_global_icv.run_sched_chunk_size); break; case GFS_GUIDED: fputs ("GUIDED", stderr); + if (gomp_global_icv.run_sched_chunk_size != 1) + fprintf (stderr, ",%d", gomp_global_icv.run_sched_chunk_size); break; case GFS_AUTO: fputs ("AUTO", stderr); @@ -1228,6 +1272,10 @@ handle_omp_display_env (unsigned long stacksize, int wait_policy) gomp_global_icv.default_device_var); fprintf (stderr, " OMP_MAX_TASK_PRIORITY = '%d'\n", gomp_max_task_priority_var); + fprintf (stderr, " OMP_DISPLAY_AFFINITY = '%s'\n", + gomp_display_affinity_var ? "TRUE" : "FALSE"); + fprintf (stderr, " OMP_AFFINITY_FORMAT = '%s'\n", + gomp_affinity_format_var); if (verbose) { @@ -1259,6 +1307,7 @@ initialize_env (void) parse_boolean ("OMP_DYNAMIC", &gomp_global_icv.dyn_var); parse_boolean ("OMP_NESTED", &gomp_global_icv.nest_var); parse_boolean ("OMP_CANCELLATION", &gomp_cancel_var); + parse_boolean ("OMP_DISPLAY_AFFINITY", &gomp_display_affinity_var); parse_int ("OMP_DEFAULT_DEVICE", &gomp_global_icv.default_device_var, true); parse_int ("OMP_MAX_TASK_PRIORITY", &gomp_max_task_priority_var, true); parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var, @@ -1308,6 +1357,13 @@ initialize_env (void) } if (gomp_global_icv.bind_var != omp_proc_bind_false) gomp_init_affinity (); + + { + const char *env = getenv ("OMP_AFFINITY_FORMAT"); + if (env != NULL) + gomp_set_affinity_format (env, strlen (env)); + } + wait_policy = parse_wait_policy (); if (!parse_spincount ("GOMP_SPINCOUNT", &gomp_spin_count_var)) { @@ -1333,7 +1389,6 @@ initialize_env (void) /* Not strictly environment related, but ordering constructors is tricky. */ pthread_attr_init (&gomp_thread_attr); - pthread_attr_setdetachstate (&gomp_thread_attr, PTHREAD_CREATE_DETACHED); if (parse_stacksize ("OMP_STACKSIZE", &stacksize) || parse_stacksize ("GOMP_STACKSIZE", &stacksize) diff --git a/libgomp/fortran.c b/libgomp/fortran.c index 24172a0f01a..0157baec648 100644 --- a/libgomp/fortran.c +++ b/libgomp/fortran.c @@ -28,6 +28,8 @@ #include "libgomp.h" #include "libgomp_f.h" #include <stdlib.h> +#include <stdio.h> +#include <string.h> #include <limits.h> #ifdef HAVE_ATTRIBUTE_ALIAS @@ -82,6 +84,8 @@ ialias_redirect (omp_get_team_num) ialias_redirect (omp_is_initial_device) ialias_redirect (omp_get_initial_device) ialias_redirect (omp_get_max_task_priority) +ialias_redirect (omp_pause_resource) +ialias_redirect (omp_pause_resource_all) #endif #ifndef LIBGOMP_GNU_SYMBOL_VERSIONING @@ -368,7 +372,9 @@ omp_get_schedule_ (int32_t *kind, int32_t *chunk_size) omp_sched_t k; int cs; omp_get_schedule (&k, &cs); - *kind = k; + /* For now mask off GFS_MONOTONIC, because OpenMP 4.5 code will not + expect to see it. */ + *kind = k & ~GFS_MONOTONIC; *chunk_size = cs; } @@ -378,7 +384,8 @@ omp_get_schedule_8_ (int32_t *kind, int64_t *chunk_size) omp_sched_t k; int cs; omp_get_schedule (&k, &cs); - *kind = k; + /* See above. */ + *kind = k & ~GFS_MONOTONIC; *chunk_size = cs; } @@ -576,3 +583,96 @@ omp_get_max_task_priority_ (void) { return omp_get_max_task_priority (); } + +void +omp_set_affinity_format_ (const char *format, size_t format_len) +{ + gomp_set_affinity_format (format, format_len); +} + +int32_t +omp_get_affinity_format_ (char *buffer, size_t buffer_len) +{ + size_t len = strlen (gomp_affinity_format_var); + if (buffer_len) + { + if (len < buffer_len) + { + memcpy (buffer, gomp_affinity_format_var, len); + memset (buffer + len, ' ', buffer_len - len); + } + else + memcpy (buffer, gomp_affinity_format_var, buffer_len); + } + return len; +} + +void +omp_display_affinity_ (const char *format, size_t format_len) +{ + char *fmt = NULL, fmt_buf[256]; + char buf[512]; + if (format_len) + { + fmt = format_len < 256 ? fmt_buf : gomp_malloc (format_len + 1); + memcpy (fmt, format, format_len); + fmt[format_len] = '\0'; + } + struct gomp_thread *thr = gomp_thread (); + size_t ret + = gomp_display_affinity (buf, sizeof buf, + format_len ? fmt : gomp_affinity_format_var, + gomp_thread_self (), &thr->ts, thr->place); + if (ret < sizeof buf) + { + buf[ret] = '\n'; + fwrite (buf, 1, ret + 1, stderr); + } + else + { + char *b = gomp_malloc (ret + 1); + gomp_display_affinity (buf, sizeof buf, + format_len ? fmt : gomp_affinity_format_var, + gomp_thread_self (), &thr->ts, thr->place); + b[ret] = '\n'; + fwrite (b, 1, ret + 1, stderr); + free (b); + } + if (fmt && fmt != fmt_buf) + free (fmt); +} + +int32_t +omp_capture_affinity_ (char *buffer, const char *format, + size_t buffer_len, size_t format_len) +{ + char *fmt = NULL, fmt_buf[256]; + if (format_len) + { + fmt = format_len < 256 ? fmt_buf : gomp_malloc (format_len + 1); + memcpy (fmt, format, format_len); + fmt[format_len] = '\0'; + } + struct gomp_thread *thr = gomp_thread (); + size_t ret + = gomp_display_affinity (buffer, buffer_len, + format_len ? fmt : gomp_affinity_format_var, + gomp_thread_self (), &thr->ts, thr->place); + if (fmt && fmt != fmt_buf) + free (fmt); + if (ret < buffer_len) + memset (buffer + ret, ' ', buffer_len - ret); + return ret; +} + +int32_t +omp_pause_resource_ (const int32_t *kind, const int32_t *device_num) +{ + return omp_pause_resource (*kind, *device_num); +} + +int32_t +omp_pause_resource_all_ (const int32_t *kind) +{ + return omp_pause_resource_all (*kind); +} diff --git a/libgomp/icv-device.c b/libgomp/icv-device.c index b643cb29ee7..9cb394e74dc 100644 --- a/libgomp/icv-device.c +++ b/libgomp/icv-device.c @@ -49,20 +49,6 @@ omp_get_num_devices (void) } int -omp_get_num_teams (void) -{ - /* Hardcoded to 1 on host, MIC, HSAIL? Maybe variable on PTX. */ - return 1; -} - -int -omp_get_team_num (void) -{ - /* Hardcoded to 0 on host, MIC, HSAIL? Maybe variable on PTX. */ - return 0; -} - -int omp_is_initial_device (void) { /* Hardcoded to 1 on host, should be 0 on MIC, HSAIL, PTX. */ @@ -72,6 +58,4 @@ omp_is_initial_device (void) ialias (omp_set_default_device) ialias (omp_get_default_device) ialias (omp_get_num_devices) -ialias (omp_get_num_teams) -ialias (omp_get_team_num) ialias (omp_is_initial_device) diff --git a/libgomp/icv.c b/libgomp/icv.c index 42db4fe6c88..68c400aec81 100644 --- a/libgomp/icv.c +++ b/libgomp/icv.c @@ -69,7 +69,7 @@ void omp_set_schedule (omp_sched_t kind, int chunk_size) { struct gomp_task_icv *icv = gomp_icv (true); - switch (kind) + switch (kind & ~omp_sched_monotonic) { case omp_sched_static: if (chunk_size < 1) diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 3a8cc2bd7d6..828e9b0095b 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -86,9 +86,21 @@ enum memmodel /* alloc.c */ +#if defined(HAVE_ALIGNED_ALLOC) \ + || defined(HAVE__ALIGNED_MALLOC) \ + || defined(HAVE_POSIX_MEMALIGN) \ + || defined(HAVE_MEMALIGN) +/* Defined if gomp_aligned_alloc doesn't use fallback version + and free can be used instead of gomp_aligned_free. */ +#define GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC 1 +#endif + extern void *gomp_malloc (size_t) __attribute__((malloc)); extern void *gomp_malloc_cleared (size_t) __attribute__((malloc)); extern void *gomp_realloc (void *, size_t); +extern void *gomp_aligned_alloc (size_t, size_t) + __attribute__((malloc, alloc_size (2))); +extern void gomp_aligned_free (void *); /* Avoid conflicting prototypes of alloca() in system headers by using GCC's builtin alloca(). */ @@ -138,7 +150,8 @@ enum gomp_schedule_type GFS_STATIC, GFS_DYNAMIC, GFS_GUIDED, - GFS_AUTO + GFS_AUTO, + GFS_MONOTONIC = 0x80000000U }; struct gomp_doacross_work_share @@ -175,6 +188,8 @@ struct gomp_doacross_work_share /* Likewise, but for the ull implementation. */ unsigned long long boundary_ull; }; + /* Pointer to extra memory if needed for lastprivate(conditional). */ + void *extra; /* Array of shift counts for each dimension if they can be flattened. */ unsigned int shift_counts[]; }; @@ -276,6 +291,9 @@ struct gomp_work_share struct gomp_work_share *next_free; }; + /* Task reductions for this work-sharing construct. */ + uintptr_t *task_reductions; + /* If only few threads are in the team, ordered_team_ids can point to this array which fills the padding at the end of this struct. */ unsigned inline_ordered_team_ids[0]; @@ -366,6 +384,9 @@ extern void **gomp_places_list; extern unsigned long gomp_places_list_len; extern unsigned int gomp_num_teams_var; extern int gomp_debug_var; +extern bool gomp_display_affinity_var; +extern char *gomp_affinity_format_var; +extern size_t gomp_affinity_format_len; extern int goacc_device_num; extern char *goacc_device_type; extern int goacc_default_dims[GOMP_DIM_MAX]; @@ -471,8 +492,10 @@ struct gomp_taskgroup struct gomp_taskgroup *prev; /* Queue of tasks that belong in this taskgroup. */ struct priority_queue taskgroup_queue; + uintptr_t *reductions; bool in_taskgroup_wait; bool cancelled; + bool workshare; gomp_sem_t taskgroup_sem; size_t num_children; }; @@ -615,6 +638,19 @@ struct gomp_thread /* User pthread thread pool */ struct gomp_thread_pool *thread_pool; + +#if defined(LIBGOMP_USE_PTHREADS) \ + && (!defined(HAVE_TLS) \ + || !defined(__GLIBC__) \ + || !defined(USING_INITIAL_EXEC_TLS)) + /* pthread_t of the thread containing this gomp_thread. + On Linux when using initial-exec TLS, + (typeof (pthread_t)) gomp_thread () - pthread_self () + is constant in all threads, so we can optimize and not + store it. */ +#define GOMP_NEEDS_THREAD_HANDLE 1 + pthread_t handle; +#endif }; @@ -711,6 +747,24 @@ extern bool gomp_affinity_finalize_place_list (bool); extern bool gomp_affinity_init_level (int, unsigned long, bool); extern void gomp_affinity_print_place (void *); extern void gomp_get_place_proc_ids_8 (int, int64_t *); +extern void gomp_display_affinity_place (char *, size_t, size_t *, int); + +/* affinity-fmt.c */ + +extern void gomp_set_affinity_format (const char *, size_t); +extern void gomp_display_string (char *, size_t, size_t *, const char *, + size_t); +#ifdef LIBGOMP_USE_PTHREADS +typedef pthread_t gomp_thread_handle; +#else +typedef struct {} gomp_thread_handle; +#endif +extern size_t gomp_display_affinity (char *, size_t, const char *, + gomp_thread_handle, + struct gomp_team_state *, unsigned int); +extern void gomp_display_affinity_thread (gomp_thread_handle, + struct gomp_team_state *, + unsigned int) __attribute__((cold)); /* iter.c */ @@ -747,9 +801,9 @@ extern void gomp_ordered_next (void); extern void gomp_ordered_static_init (void); extern void gomp_ordered_static_next (void); extern void gomp_ordered_sync (void); -extern void gomp_doacross_init (unsigned, long *, long); +extern void gomp_doacross_init (unsigned, long *, long, size_t); extern void gomp_doacross_ull_init (unsigned, unsigned long long *, - unsigned long long); + unsigned long long, size_t); /* parallel.c */ @@ -772,6 +826,10 @@ extern bool gomp_create_target_task (struct gomp_device_descr *, size_t *, unsigned short *, unsigned int, void **, void **, enum gomp_target_task_state); +extern struct gomp_taskgroup *gomp_parallel_reduction_register (uintptr_t *, + unsigned); +extern void gomp_workshare_taskgroup_start (void); +extern void gomp_workshare_task_reduction_register (uintptr_t *, uintptr_t *); static void inline gomp_finish_task (struct gomp_task *task) @@ -784,9 +842,11 @@ gomp_finish_task (struct gomp_task *task) extern struct gomp_team *gomp_new_team (unsigned); extern void gomp_team_start (void (*) (void *), void *, unsigned, - unsigned, struct gomp_team *); + unsigned, struct gomp_team *, + struct gomp_taskgroup *); extern void gomp_team_end (void); extern void gomp_free_thread (void *); +extern int gomp_pause_host (void); /* target.c */ @@ -1009,9 +1069,9 @@ extern bool gomp_remove_var (struct gomp_device_descr *, splay_tree_key); /* work.c */ -extern void gomp_init_work_share (struct gomp_work_share *, bool, unsigned); +extern void gomp_init_work_share (struct gomp_work_share *, size_t, unsigned); extern void gomp_fini_work_share (struct gomp_work_share *); -extern bool gomp_work_share_start (bool); +extern bool gomp_work_share_start (size_t); extern void gomp_work_share_end (void); extern bool gomp_work_share_end_cancel (void); extern void gomp_work_share_end_nowait (void); @@ -1138,4 +1198,42 @@ task_to_priority_node (enum priority_queue_type type, return (struct priority_node *) ((char *) task + priority_queue_offset (type)); } + +#ifdef LIBGOMP_USE_PTHREADS +static inline gomp_thread_handle +gomp_thread_self (void) +{ + return pthread_self (); +} + +static inline gomp_thread_handle +gomp_thread_to_pthread_t (struct gomp_thread *thr) +{ + struct gomp_thread *this_thr = gomp_thread (); + if (thr == this_thr) + return pthread_self (); +#ifdef GOMP_NEEDS_THREAD_HANDLE + return thr->handle; +#else + /* On Linux with initial-exec TLS, the pthread_t of the thread containing + thr can be computed from thr, this_thr and pthread_self (), + as the distance between this_thr and pthread_self () is constant. */ + return pthread_self () + ((uintptr_t) thr - (uintptr_t) this_thr); +#endif +} +#else +static inline gomp_thread_handle +gomp_thread_self (void) +{ + return (gomp_thread_handle) {}; +} + +static inline gomp_thread_handle +gomp_thread_to_pthread_t (struct gomp_thread *thr) +{ + (void) thr; + return gomp_thread_self (); +} +#endif + #endif /* LIBGOMP_H */ diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index e3f0c648e45..d8e2fd1818b 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -164,6 +164,22 @@ OMP_4.5 { omp_target_disassociate_ptr; } OMP_4.0; +OMP_5.0 { + global: + omp_capture_affinity; + omp_capture_affinity_; + omp_display_affinity; + omp_display_affinity_; + omp_get_affinity_format; + omp_get_affinity_format_; + omp_set_affinity_format; + omp_set_affinity_format_; + omp_pause_resource; + omp_pause_resource_; + omp_pause_resource_all; + omp_pause_resource_all_; +} OMP_4.5; + GOMP_1.0 { global: GOMP_atomic_end; @@ -298,6 +314,34 @@ GOMP_4.5 { GOMP_parallel_loop_nonmonotonic_guided; } GOMP_4.0.1; +GOMP_5.0 { + global: + GOMP_loop_doacross_start; + GOMP_loop_maybe_nonmonotonic_runtime_next; + GOMP_loop_maybe_nonmonotonic_runtime_start; + GOMP_loop_nonmonotonic_runtime_next; + GOMP_loop_nonmonotonic_runtime_start; + GOMP_loop_ordered_start; + GOMP_loop_start; + GOMP_loop_ull_doacross_start; + GOMP_loop_ull_maybe_nonmonotonic_runtime_next; + GOMP_loop_ull_maybe_nonmonotonic_runtime_start; + GOMP_loop_ull_nonmonotonic_runtime_next; + GOMP_loop_ull_nonmonotonic_runtime_start; + GOMP_loop_ull_ordered_start; + GOMP_loop_ull_start; + GOMP_parallel_loop_maybe_nonmonotonic_runtime; + GOMP_parallel_loop_nonmonotonic_runtime; + GOMP_parallel_reductions; + GOMP_sections2_start; + GOMP_taskgroup_reduction_register; + GOMP_taskgroup_reduction_unregister; + GOMP_task_reduction_remap; + GOMP_taskwait_depend; + GOMP_teams_reg; + GOMP_workshare_task_reduction_unregister; +} GOMP_4.5; + OACC_2.0 { global: acc_get_num_devices; diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h index c99b5129f70..5b54839b29e 100644 --- a/libgomp/libgomp_g.h +++ b/libgomp/libgomp_g.h @@ -31,6 +31,7 @@ #include <stdbool.h> #include <stddef.h> +#include "gstdint.h" /* barrier.c */ @@ -56,6 +57,12 @@ extern bool GOMP_loop_nonmonotonic_dynamic_start (long, long, long, long, long *, long *); extern bool GOMP_loop_nonmonotonic_guided_start (long, long, long, long, long *, long *); +extern bool GOMP_loop_nonmonotonic_runtime_start (long, long, long, + long *, long *); +extern bool GOMP_loop_maybe_nonmonotonic_runtime_start (long, long, long, + long *, long *); +extern bool GOMP_loop_start (long, long, long, long, long, long *, long *, + uintptr_t *, void **); extern bool GOMP_loop_ordered_static_start (long, long, long, long, long *, long *); @@ -64,6 +71,8 @@ extern bool GOMP_loop_ordered_dynamic_start (long, long, long, long, extern bool GOMP_loop_ordered_guided_start (long, long, long, long, long *, long *); extern bool GOMP_loop_ordered_runtime_start (long, long, long, long *, long *); +extern bool GOMP_loop_ordered_start (long, long, long, long, long, long *, + long *, uintptr_t *, void **); extern bool GOMP_loop_static_next (long *, long *); extern bool GOMP_loop_dynamic_next (long *, long *); @@ -71,6 +80,8 @@ extern bool GOMP_loop_guided_next (long *, long *); extern bool GOMP_loop_runtime_next (long *, long *); extern bool GOMP_loop_nonmonotonic_dynamic_next (long *, long *); extern bool GOMP_loop_nonmonotonic_guided_next (long *, long *); +extern bool GOMP_loop_nonmonotonic_runtime_next (long *, long *); +extern bool GOMP_loop_maybe_nonmonotonic_runtime_next (long *, long *); extern bool GOMP_loop_ordered_static_next (long *, long *); extern bool GOMP_loop_ordered_dynamic_next (long *, long *); @@ -85,6 +96,8 @@ extern bool GOMP_loop_doacross_guided_start (unsigned, long *, long, long *, long *); extern bool GOMP_loop_doacross_runtime_start (unsigned, long *, long *, long *); +extern bool GOMP_loop_doacross_start (unsigned, long *, long, long, long *, + long *, uintptr_t *, void **); extern void GOMP_parallel_loop_static_start (void (*)(void *), void *, unsigned, long, long, long, long); @@ -112,6 +125,13 @@ extern void GOMP_parallel_loop_nonmonotonic_dynamic (void (*)(void *), void *, extern void GOMP_parallel_loop_nonmonotonic_guided (void (*)(void *), void *, unsigned, long, long, long, long, unsigned); +extern void GOMP_parallel_loop_nonmonotonic_runtime (void (*)(void *), void *, + unsigned, long, long, + long, unsigned); +extern void GOMP_parallel_loop_maybe_nonmonotonic_runtime (void (*)(void *), + void *, unsigned, + long, long, + long, unsigned); extern void GOMP_loop_end (void); extern void GOMP_loop_end_nowait (void); @@ -154,6 +174,21 @@ extern bool GOMP_loop_ull_nonmonotonic_guided_start (bool, unsigned long long, unsigned long long, unsigned long long *, unsigned long long *); +extern bool GOMP_loop_ull_nonmonotonic_runtime_start (bool, unsigned long long, + unsigned long long, + unsigned long long, + unsigned long long *, + unsigned long long *); +extern bool GOMP_loop_ull_maybe_nonmonotonic_runtime_start (bool, + unsigned long long, + unsigned long long, + unsigned long long, + unsigned long long *, + unsigned long long *); +extern bool GOMP_loop_ull_start (bool, unsigned long long, unsigned long long, + unsigned long long, long, unsigned long long, + unsigned long long *, unsigned long long *, + uintptr_t *, void **); extern bool GOMP_loop_ull_ordered_static_start (bool, unsigned long long, unsigned long long, @@ -178,6 +213,13 @@ extern bool GOMP_loop_ull_ordered_runtime_start (bool, unsigned long long, unsigned long long, unsigned long long *, unsigned long long *); +extern bool GOMP_loop_ull_ordered_start (bool, unsigned long long, + unsigned long long, + unsigned long long, long, + unsigned long long, + unsigned long long *, + unsigned long long *, + uintptr_t *, void **); extern bool GOMP_loop_ull_static_next (unsigned long long *, unsigned long long *); @@ -191,6 +233,10 @@ extern bool GOMP_loop_ull_nonmonotonic_dynamic_next (unsigned long long *, unsigned long long *); extern bool GOMP_loop_ull_nonmonotonic_guided_next (unsigned long long *, unsigned long long *); +extern bool GOMP_loop_ull_nonmonotonic_runtime_next (unsigned long long *, + unsigned long long *); +extern bool GOMP_loop_ull_maybe_nonmonotonic_runtime_next (unsigned long long *, + unsigned long long *); extern bool GOMP_loop_ull_ordered_static_next (unsigned long long *, unsigned long long *); @@ -220,6 +266,11 @@ extern bool GOMP_loop_ull_doacross_runtime_start (unsigned, unsigned long long *, unsigned long long *, unsigned long long *); +extern bool GOMP_loop_ull_doacross_start (unsigned, unsigned long long *, + long, unsigned long long, + unsigned long long *, + unsigned long long *, + uintptr_t *, void **); /* ordered.c */ @@ -235,6 +286,8 @@ extern void GOMP_doacross_ull_wait (unsigned long long, ...); extern void GOMP_parallel_start (void (*) (void *), void *, unsigned); extern void GOMP_parallel_end (void); extern void GOMP_parallel (void (*) (void *), void *, unsigned, unsigned); +extern unsigned GOMP_parallel_reductions (void (*) (void *), void *, unsigned, + unsigned); extern bool GOMP_cancel (int, bool); extern bool GOMP_cancellation_point (int); @@ -251,13 +304,19 @@ extern void GOMP_taskloop_ull (void (*) (void *), void *, unsigned long long, unsigned long long, unsigned long long); extern void GOMP_taskwait (void); +extern void GOMP_taskwait_depend (void **); extern void GOMP_taskyield (void); extern void GOMP_taskgroup_start (void); extern void GOMP_taskgroup_end (void); +extern void GOMP_taskgroup_reduction_register (uintptr_t *); +extern void GOMP_taskgroup_reduction_unregister (uintptr_t *); +extern void GOMP_task_reduction_remap (size_t, size_t, void **); +extern void GOMP_workshare_task_reduction_unregister (bool); /* sections.c */ extern unsigned GOMP_sections_start (unsigned); +extern unsigned GOMP_sections2_start (unsigned, uintptr_t *, void **); extern unsigned GOMP_sections_next (void); extern void GOMP_parallel_sections_start (void (*) (void *), void *, unsigned, unsigned); @@ -293,6 +352,11 @@ extern void GOMP_target_enter_exit_data (int, size_t, void **, size_t *, void **); extern void GOMP_teams (unsigned int, unsigned int); +/* teams.c */ + +extern void GOMP_teams_reg (void (*) (void *), void *, unsigned, unsigned, + unsigned); + /* oacc-parallel.c */ extern void GOACC_parallel_keyed (int, void (*) (void *), size_t, diff --git a/libgomp/loop.c b/libgomp/loop.c index a8c7e246333..4e0683ba675 100644 --- a/libgomp/loop.c +++ b/libgomp/loop.c @@ -27,9 +27,13 @@ #include <limits.h> #include <stdlib.h> +#include <string.h> #include "libgomp.h" +ialias (GOMP_loop_runtime_next) +ialias_redirect (GOMP_taskgroup_reduction_register) + /* Initialize the given work share construct from the given arguments. */ static inline void @@ -79,12 +83,12 @@ gomp_loop_init (struct gomp_work_share *ws, long start, long end, long incr, } /* The *_start routines are called when first encountering a loop construct - that is not bound directly to a parallel construct. The first thread + that is not bound directly to a parallel construct. The first thread that arrives will create the work-share construct; subsequent threads will see the construct exists and allocate work from it. START, END, INCR are the bounds of the loop; due to the restrictions of - OpenMP, these values must be the same in every thread. This is not + OpenMP, these values must be the same in every thread. This is not verified (nor is it entirely verifiable, since START is not necessarily retained intact in the work-share data structure). CHUNK_SIZE is the scheduling parameter; again this must be identical in all threads. @@ -101,7 +105,7 @@ gomp_loop_static_start (long start, long end, long incr, long chunk_size, struct gomp_thread *thr = gomp_thread (); thr->ts.static_trip = 0; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_init (thr->ts.work_share, start, end, incr, GFS_STATIC, chunk_size); @@ -123,7 +127,7 @@ gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size, struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_init (thr->ts.work_share, start, end, incr, GFS_DYNAMIC, chunk_size); @@ -151,7 +155,7 @@ gomp_loop_guided_start (long start, long end, long incr, long chunk_size, struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_init (thr->ts.work_share, start, end, incr, GFS_GUIDED, chunk_size); @@ -174,7 +178,7 @@ GOMP_loop_runtime_start (long start, long end, long incr, long *istart, long *iend) { struct gomp_task_icv *icv = gomp_icv (false); - switch (icv->run_sched_var) + switch (icv->run_sched_var & ~GFS_MONOTONIC) { case GFS_STATIC: return gomp_loop_static_start (start, end, incr, @@ -197,6 +201,100 @@ GOMP_loop_runtime_start (long start, long end, long incr, } } +static long +gomp_adjust_sched (long sched, long *chunk_size) +{ + sched &= ~GFS_MONOTONIC; + switch (sched) + { + case GFS_STATIC: + case GFS_DYNAMIC: + case GFS_GUIDED: + return sched; + /* GFS_RUNTIME is used for runtime schedule without monotonic + or nonmonotonic modifiers on the clause. + GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic + modifier. */ + case GFS_RUNTIME: + /* GFS_AUTO is used for runtime schedule with nonmonotonic + modifier. */ + case GFS_AUTO: + { + struct gomp_task_icv *icv = gomp_icv (false); + sched = icv->run_sched_var & ~GFS_MONOTONIC; + switch (sched) + { + case GFS_STATIC: + case GFS_DYNAMIC: + case GFS_GUIDED: + *chunk_size = icv->run_sched_chunk_size; + break; + case GFS_AUTO: + sched = GFS_STATIC; + *chunk_size = 0; + break; + default: + abort (); + } + return sched; + } + default: + abort (); + } +} + +bool +GOMP_loop_start (long start, long end, long incr, long sched, + long chunk_size, long *istart, long *iend, + uintptr_t *reductions, void **mem) +{ + struct gomp_thread *thr = gomp_thread (); + + thr->ts.static_trip = 0; + if (reductions) + gomp_workshare_taskgroup_start (); + if (gomp_work_share_start (0)) + { + sched = gomp_adjust_sched (sched, &chunk_size); + gomp_loop_init (thr->ts.work_share, start, end, incr, + sched, chunk_size); + if (reductions) + { + GOMP_taskgroup_reduction_register (reductions); + thr->task->taskgroup->workshare = true; + thr->ts.work_share->task_reductions = reductions; + } + if (mem) + { + uintptr_t size = (uintptr_t) *mem; + if (size > (sizeof (struct gomp_work_share) + - offsetof (struct gomp_work_share, + inline_ordered_team_ids))) + thr->ts.work_share->ordered_team_ids + = gomp_malloc_cleared (size); + else + memset (thr->ts.work_share->ordered_team_ids, '\0', size); + *mem = (void *) thr->ts.work_share->ordered_team_ids; + } + gomp_work_share_init_done (); + } + else + { + if (reductions) + { + uintptr_t *first_reductions = thr->ts.work_share->task_reductions; + gomp_workshare_task_reduction_register (reductions, + first_reductions); + } + if (mem) + *mem = (void *) thr->ts.work_share->ordered_team_ids; + } + + if (!istart) + return true; + return ialias_call (GOMP_loop_runtime_next) (istart, iend); +} + /* The *_ordered_*_start routines are similar. The only difference is that this work-share construct is initialized to expect an ORDERED section. */ @@ -207,7 +305,7 @@ gomp_loop_ordered_static_start (long start, long end, long incr, struct gomp_thread *thr = gomp_thread (); thr->ts.static_trip = 0; - if (gomp_work_share_start (true)) + if (gomp_work_share_start (1)) { gomp_loop_init (thr->ts.work_share, start, end, incr, GFS_STATIC, chunk_size); @@ -225,7 +323,7 @@ gomp_loop_ordered_dynamic_start (long start, long end, long incr, struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (true)) + if (gomp_work_share_start (1)) { gomp_loop_init (thr->ts.work_share, start, end, incr, GFS_DYNAMIC, chunk_size); @@ -250,7 +348,7 @@ gomp_loop_ordered_guided_start (long start, long end, long incr, struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (true)) + if (gomp_work_share_start (1)) { gomp_loop_init (thr->ts.work_share, start, end, incr, GFS_GUIDED, chunk_size); @@ -273,7 +371,7 @@ GOMP_loop_ordered_runtime_start (long start, long end, long incr, long *istart, long *iend) { struct gomp_task_icv *icv = gomp_icv (false); - switch (icv->run_sched_var) + switch (icv->run_sched_var & ~GFS_MONOTONIC) { case GFS_STATIC: return gomp_loop_ordered_static_start (start, end, incr, @@ -297,6 +395,81 @@ GOMP_loop_ordered_runtime_start (long start, long end, long incr, } } +bool +GOMP_loop_ordered_start (long start, long end, long incr, long sched, + long chunk_size, long *istart, long *iend, + uintptr_t *reductions, void **mem) +{ + struct gomp_thread *thr = gomp_thread (); + size_t ordered = 1; + bool ret; + + thr->ts.static_trip = 0; + if (reductions) + gomp_workshare_taskgroup_start (); + if (mem) + ordered += (uintptr_t) *mem; + if (gomp_work_share_start (ordered)) + { + sched = gomp_adjust_sched (sched, &chunk_size); + gomp_loop_init (thr->ts.work_share, start, end, incr, + sched, chunk_size); + if (reductions) + { + GOMP_taskgroup_reduction_register (reductions); + thr->task->taskgroup->workshare = true; + thr->ts.work_share->task_reductions = reductions; + } + if (sched == GFS_STATIC) + gomp_ordered_static_init (); + else + gomp_mutex_lock (&thr->ts.work_share->lock); + gomp_work_share_init_done (); + } + else + { + if (reductions) + { + uintptr_t *first_reductions = thr->ts.work_share->task_reductions; + gomp_workshare_task_reduction_register (reductions, + first_reductions); + } + sched = thr->ts.work_share->sched; + if (sched != GFS_STATIC) + gomp_mutex_lock (&thr->ts.work_share->lock); + } + + if (mem) + { + uintptr_t p + = (uintptr_t) (thr->ts.work_share->ordered_team_ids + + (thr->ts.team ? thr->ts.team->nthreads : 1)); + p += __alignof__ (long long) - 1; + p &= ~(__alignof__ (long long) - 1); + *mem = (void *) p; + } + + switch (sched) + { + case GFS_STATIC: + case GFS_AUTO: + return !gomp_iter_static_next (istart, iend); + case GFS_DYNAMIC: + ret = gomp_iter_dynamic_next_locked (istart, iend); + break; + case GFS_GUIDED: + ret = gomp_iter_guided_next_locked (istart, iend); + break; + default: + abort (); + } + + if (ret) + gomp_ordered_first (); + gomp_mutex_unlock (&thr->ts.work_share->lock); + return ret; +} + /* The *_doacross_*_start routines are similar. The only difference is that this work-share construct is initialized to expect an ORDERED(N) - DOACROSS section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1 @@ -310,11 +483,11 @@ gomp_loop_doacross_static_start (unsigned ncounts, long *counts, struct gomp_thread *thr = gomp_thread (); thr->ts.static_trip = 0; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, GFS_STATIC, chunk_size); - gomp_doacross_init (ncounts, counts, chunk_size); + gomp_doacross_init (ncounts, counts, chunk_size, 0); gomp_work_share_init_done (); } @@ -328,11 +501,11 @@ gomp_loop_doacross_dynamic_start (unsigned ncounts, long *counts, struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, GFS_DYNAMIC, chunk_size); - gomp_doacross_init (ncounts, counts, chunk_size); + gomp_doacross_init (ncounts, counts, chunk_size, 0); gomp_work_share_init_done (); } @@ -354,11 +527,11 @@ gomp_loop_doacross_guided_start (unsigned ncounts, long *counts, struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, GFS_GUIDED, chunk_size); - gomp_doacross_init (ncounts, counts, chunk_size); + gomp_doacross_init (ncounts, counts, chunk_size, 0); gomp_work_share_init_done (); } @@ -378,7 +551,7 @@ GOMP_loop_doacross_runtime_start (unsigned ncounts, long *counts, long *istart, long *iend) { struct gomp_task_icv *icv = gomp_icv (false); - switch (icv->run_sched_var) + switch (icv->run_sched_var & ~GFS_MONOTONIC) { case GFS_STATIC: return gomp_loop_doacross_static_start (ncounts, counts, @@ -402,8 +575,52 @@ GOMP_loop_doacross_runtime_start (unsigned ncounts, long *counts, } } -/* The *_next routines are called when the thread completes processing of - the iteration block currently assigned to it. If the work-share +bool +GOMP_loop_doacross_start (unsigned ncounts, long *counts, long sched, + long chunk_size, long *istart, long *iend, + uintptr_t *reductions, void **mem) +{ + struct gomp_thread *thr = gomp_thread (); + + thr->ts.static_trip = 0; + if (reductions) + gomp_workshare_taskgroup_start (); + if (gomp_work_share_start (0)) + { + size_t extra = 0; + if (mem) + extra = (uintptr_t) *mem; + sched = gomp_adjust_sched (sched, &chunk_size); + gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, + sched, chunk_size); + gomp_doacross_init (ncounts, counts, chunk_size, extra); + if (reductions) + { + GOMP_taskgroup_reduction_register (reductions); + thr->task->taskgroup->workshare = true; + thr->ts.work_share->task_reductions = reductions; + } + gomp_work_share_init_done (); + } + else + { + if (reductions) + { + uintptr_t *first_reductions = thr->ts.work_share->task_reductions; + gomp_workshare_task_reduction_register (reductions, + first_reductions); + } + sched = thr->ts.work_share->sched; + } + + if (mem) + *mem = thr->ts.work_share->doacross->extra; + + return ialias_call (GOMP_loop_runtime_next) (istart, iend); +} + +/* The *_next routines are called when the thread completes processing of + the iteration block currently assigned to it. If the work-share construct is bound directly to a parallel construct, then the iteration bounds may have been set up before the parallel. In which case, this may be the first iteration for the thread. @@ -456,7 +673,7 @@ bool GOMP_loop_runtime_next (long *istart, long *iend) { struct gomp_thread *thr = gomp_thread (); - + switch (thr->ts.work_share->sched) { case GFS_STATIC: @@ -534,7 +751,7 @@ bool GOMP_loop_ordered_runtime_next (long *istart, long *iend) { struct gomp_thread *thr = gomp_thread (); - + switch (thr->ts.work_share->sched) { case GFS_STATIC: @@ -563,7 +780,7 @@ gomp_parallel_loop_start (void (*fn) (void *), void *data, num_threads = gomp_resolve_num_threads (num_threads, 0); team = gomp_new_team (num_threads); gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size); - gomp_team_start (fn, data, num_threads, flags, team); + gomp_team_start (fn, data, num_threads, flags, team, NULL); } void @@ -600,7 +817,8 @@ GOMP_parallel_loop_runtime_start (void (*fn) (void *), void *data, { struct gomp_task_icv *icv = gomp_icv (false); gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, - icv->run_sched_var, icv->run_sched_chunk_size, 0); + icv->run_sched_var & ~GFS_MONOTONIC, + icv->run_sched_chunk_size, 0); } ialias_redirect (GOMP_parallel_end) @@ -638,11 +856,28 @@ GOMP_parallel_loop_guided (void (*fn) (void *), void *data, GOMP_parallel_end (); } +void +GOMP_parallel_loop_runtime (void (*fn) (void *), void *data, + unsigned num_threads, long start, long end, + long incr, unsigned flags) +{ + struct gomp_task_icv *icv = gomp_icv (false); + gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, + icv->run_sched_var & ~GFS_MONOTONIC, + icv->run_sched_chunk_size, flags); + fn (data); + GOMP_parallel_end (); +} + #ifdef HAVE_ATTRIBUTE_ALIAS extern __typeof(GOMP_parallel_loop_dynamic) GOMP_parallel_loop_nonmonotonic_dynamic __attribute__((alias ("GOMP_parallel_loop_dynamic"))); extern __typeof(GOMP_parallel_loop_guided) GOMP_parallel_loop_nonmonotonic_guided __attribute__((alias ("GOMP_parallel_loop_guided"))); +extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_nonmonotonic_runtime + __attribute__((alias ("GOMP_parallel_loop_runtime"))); +extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_maybe_nonmonotonic_runtime + __attribute__((alias ("GOMP_parallel_loop_runtime"))); #else void GOMP_parallel_loop_nonmonotonic_dynamic (void (*fn) (void *), void *data, @@ -667,21 +902,35 @@ GOMP_parallel_loop_nonmonotonic_guided (void (*fn) (void *), void *data, fn (data); GOMP_parallel_end (); } -#endif void -GOMP_parallel_loop_runtime (void (*fn) (void *), void *data, - unsigned num_threads, long start, long end, - long incr, unsigned flags) +GOMP_parallel_loop_nonmonotonic_runtime (void (*fn) (void *), void *data, + unsigned num_threads, long start, + long end, long incr, unsigned flags) { struct gomp_task_icv *icv = gomp_icv (false); gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, - icv->run_sched_var, icv->run_sched_chunk_size, - flags); + icv->run_sched_var & ~GFS_MONOTONIC, + icv->run_sched_chunk_size, flags); fn (data); GOMP_parallel_end (); } +void +GOMP_parallel_loop_maybe_nonmonotonic_runtime (void (*fn) (void *), void *data, + unsigned num_threads, long start, + long end, long incr, + unsigned flags) +{ + struct gomp_task_icv *icv = gomp_icv (false); + gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, + icv->run_sched_var & ~GFS_MONOTONIC, + icv->run_sched_chunk_size, flags); + fn (data); + GOMP_parallel_end (); +} +#endif + /* The GOMP_loop_end* routines are called after the thread is told that all loop iterations are complete. The first two versions synchronize all threads; the nowait version does not. */ @@ -721,6 +970,10 @@ extern __typeof(gomp_loop_dynamic_start) GOMP_loop_nonmonotonic_dynamic_start __attribute__((alias ("gomp_loop_dynamic_start"))); extern __typeof(gomp_loop_guided_start) GOMP_loop_nonmonotonic_guided_start __attribute__((alias ("gomp_loop_guided_start"))); +extern __typeof(GOMP_loop_runtime_start) GOMP_loop_nonmonotonic_runtime_start + __attribute__((alias ("GOMP_loop_runtime_start"))); +extern __typeof(GOMP_loop_runtime_start) GOMP_loop_maybe_nonmonotonic_runtime_start + __attribute__((alias ("GOMP_loop_runtime_start"))); extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start __attribute__((alias ("gomp_loop_ordered_static_start"))); @@ -746,6 +999,10 @@ extern __typeof(gomp_loop_dynamic_next) GOMP_loop_nonmonotonic_dynamic_next __attribute__((alias ("gomp_loop_dynamic_next"))); extern __typeof(gomp_loop_guided_next) GOMP_loop_nonmonotonic_guided_next __attribute__((alias ("gomp_loop_guided_next"))); +extern __typeof(GOMP_loop_runtime_next) GOMP_loop_nonmonotonic_runtime_next + __attribute__((alias ("GOMP_loop_runtime_next"))); +extern __typeof(GOMP_loop_runtime_next) GOMP_loop_maybe_nonmonotonic_runtime_next + __attribute__((alias ("GOMP_loop_runtime_next"))); extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next __attribute__((alias ("gomp_loop_ordered_static_next"))); @@ -791,6 +1048,20 @@ GOMP_loop_nonmonotonic_guided_start (long start, long end, long incr, } bool +GOMP_loop_nonmonotonic_runtime_start (long start, long end, long incr, + long *istart, long *iend) +{ + return GOMP_loop_runtime_start (start, end, incr, istart, iend); +} + +bool +GOMP_loop_maybe_nonmonotonic_runtime_start (long start, long end, long incr, + long *istart, long *iend) +{ + return GOMP_loop_runtime_start (start, end, incr, istart, iend); +} + +bool GOMP_loop_ordered_static_start (long start, long end, long incr, long chunk_size, long *istart, long *iend) { @@ -869,6 +1140,18 @@ GOMP_loop_nonmonotonic_guided_next (long *istart, long *iend) } bool +GOMP_loop_nonmonotonic_runtime_next (long *istart, long *iend) +{ + return GOMP_loop_runtime_next (istart, iend); +} + +bool +GOMP_loop_maybe_nonmonotonic_runtime_next (long *istart, long *iend) +{ + return GOMP_loop_runtime_next (istart, iend); +} + +bool GOMP_loop_ordered_static_next (long *istart, long *iend) { return gomp_loop_ordered_static_next (istart, iend); diff --git a/libgomp/loop_ull.c b/libgomp/loop_ull.c index 3d4ac994f0a..ac658023e13 100644 --- a/libgomp/loop_ull.c +++ b/libgomp/loop_ull.c @@ -27,8 +27,12 @@ #include <limits.h> #include <stdlib.h> +#include <string.h> #include "libgomp.h" +ialias (GOMP_loop_ull_runtime_next) +ialias_redirect (GOMP_taskgroup_reduction_register) + typedef unsigned long long gomp_ull; /* Initialize the given work share construct from the given arguments. */ @@ -104,7 +108,7 @@ gomp_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end, struct gomp_thread *thr = gomp_thread (); thr->ts.static_trip = 0; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, GFS_STATIC, chunk_size); @@ -122,7 +126,7 @@ gomp_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end, struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, GFS_DYNAMIC, chunk_size); @@ -148,7 +152,7 @@ gomp_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end, struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, GFS_GUIDED, chunk_size); @@ -171,7 +175,7 @@ GOMP_loop_ull_runtime_start (bool up, gomp_ull start, gomp_ull end, gomp_ull incr, gomp_ull *istart, gomp_ull *iend) { struct gomp_task_icv *icv = gomp_icv (false); - switch (icv->run_sched_var) + switch (icv->run_sched_var & ~GFS_MONOTONIC) { case GFS_STATIC: return gomp_loop_ull_static_start (up, start, end, incr, @@ -195,6 +199,99 @@ GOMP_loop_ull_runtime_start (bool up, gomp_ull start, gomp_ull end, } } +static long +gomp_adjust_sched (long sched, gomp_ull *chunk_size) +{ + sched &= ~GFS_MONOTONIC; + switch (sched) + { + case GFS_STATIC: + case GFS_DYNAMIC: + case GFS_GUIDED: + return sched; + /* GFS_RUNTIME is used for runtime schedule without monotonic + or nonmonotonic modifiers on the clause. + GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic + modifier. */ + case GFS_RUNTIME: + /* GFS_AUTO is used for runtime schedule with nonmonotonic + modifier. */ + case GFS_AUTO: + { + struct gomp_task_icv *icv = gomp_icv (false); + sched = icv->run_sched_var & ~GFS_MONOTONIC; + switch (sched) + { + case GFS_STATIC: + case GFS_DYNAMIC: + case GFS_GUIDED: + *chunk_size = icv->run_sched_chunk_size; + break; + case GFS_AUTO: + sched = GFS_STATIC; + *chunk_size = 0; + break; + default: + abort (); + } + return sched; + } + default: + abort (); + } +} + +bool +GOMP_loop_ull_start (bool up, gomp_ull start, gomp_ull end, + gomp_ull incr, long sched, gomp_ull chunk_size, + gomp_ull *istart, gomp_ull *iend, + uintptr_t *reductions, void **mem) +{ + struct gomp_thread *thr = gomp_thread (); + + thr->ts.static_trip = 0; + if (reductions) + gomp_workshare_taskgroup_start (); + if (gomp_work_share_start (0)) + { + sched = gomp_adjust_sched (sched, &chunk_size); + gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, + sched, chunk_size); + if (reductions) + { + GOMP_taskgroup_reduction_register (reductions); + thr->task->taskgroup->workshare = true; + thr->ts.work_share->task_reductions = reductions; + } + if (mem) + { + uintptr_t size = (uintptr_t) *mem; + if (size > (sizeof (struct gomp_work_share) + - offsetof (struct gomp_work_share, + inline_ordered_team_ids))) + thr->ts.work_share->ordered_team_ids + = gomp_malloc_cleared (size); + else + memset (thr->ts.work_share->ordered_team_ids, '\0', size); + *mem = (void *) thr->ts.work_share->ordered_team_ids; + } + gomp_work_share_init_done (); + } + else + { + if (reductions) + { + uintptr_t *first_reductions = thr->ts.work_share->task_reductions; + gomp_workshare_task_reduction_register (reductions, + first_reductions); + } + if (mem) + *mem = (void *) thr->ts.work_share->ordered_team_ids; + } + + return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend); +} + /* The *_ordered_*_start routines are similar. The only difference is that this work-share construct is initialized to expect an ORDERED section. */ @@ -206,7 +303,7 @@ gomp_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end, struct gomp_thread *thr = gomp_thread (); thr->ts.static_trip = 0; - if (gomp_work_share_start (true)) + if (gomp_work_share_start (1)) { gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, GFS_STATIC, chunk_size); @@ -225,7 +322,7 @@ gomp_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end, struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (true)) + if (gomp_work_share_start (1)) { gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, GFS_DYNAMIC, chunk_size); @@ -251,7 +348,7 @@ gomp_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end, struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (true)) + if (gomp_work_share_start (1)) { gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, GFS_GUIDED, chunk_size); @@ -275,7 +372,7 @@ GOMP_loop_ull_ordered_runtime_start (bool up, gomp_ull start, gomp_ull end, gomp_ull *iend) { struct gomp_task_icv *icv = gomp_icv (false); - switch (icv->run_sched_var) + switch (icv->run_sched_var & ~GFS_MONOTONIC) { case GFS_STATIC: return gomp_loop_ull_ordered_static_start (up, start, end, incr, @@ -299,6 +396,82 @@ GOMP_loop_ull_ordered_runtime_start (bool up, gomp_ull start, gomp_ull end, } } +bool +GOMP_loop_ull_ordered_start (bool up, gomp_ull start, gomp_ull end, + gomp_ull incr, long sched, gomp_ull chunk_size, + gomp_ull *istart, gomp_ull *iend, + uintptr_t *reductions, void **mem) +{ + struct gomp_thread *thr = gomp_thread (); + size_t ordered = 1; + bool ret; + + thr->ts.static_trip = 0; + if (reductions) + gomp_workshare_taskgroup_start (); + if (mem) + ordered += (uintptr_t) *mem; + if (gomp_work_share_start (ordered)) + { + sched = gomp_adjust_sched (sched, &chunk_size); + gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, + sched, chunk_size); + if (reductions) + { + GOMP_taskgroup_reduction_register (reductions); + thr->task->taskgroup->workshare = true; + thr->ts.work_share->task_reductions = reductions; + } + if (sched == GFS_STATIC) + gomp_ordered_static_init (); + else + gomp_mutex_lock (&thr->ts.work_share->lock); + gomp_work_share_init_done (); + } + else + { + if (reductions) + { + uintptr_t *first_reductions = thr->ts.work_share->task_reductions; + gomp_workshare_task_reduction_register (reductions, + first_reductions); + } + sched = thr->ts.work_share->sched; + if (sched != GFS_STATIC) + gomp_mutex_lock (&thr->ts.work_share->lock); + } + + if (mem) + { + uintptr_t p + = (uintptr_t) (thr->ts.work_share->ordered_team_ids + + (thr->ts.team ? thr->ts.team->nthreads : 1)); + p += __alignof__ (long long) - 1; + p &= ~(__alignof__ (long long) - 1); + *mem = (void *) p; + } + + switch (sched) + { + case GFS_STATIC: + case GFS_AUTO: + return !gomp_iter_ull_static_next (istart, iend); + case GFS_DYNAMIC: + ret = gomp_iter_ull_dynamic_next_locked (istart, iend); + break; + case GFS_GUIDED: + ret = gomp_iter_ull_guided_next_locked (istart, iend); + break; + default: + abort (); + } + + if (ret) + gomp_ordered_first (); + gomp_mutex_unlock (&thr->ts.work_share->lock); + return ret; +} + /* The *_doacross_*_start routines are similar. The only difference is that this work-share construct is initialized to expect an ORDERED(N) - DOACROSS section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1 @@ -313,11 +486,11 @@ gomp_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts, struct gomp_thread *thr = gomp_thread (); thr->ts.static_trip = 0; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, GFS_STATIC, chunk_size); - gomp_doacross_ull_init (ncounts, counts, chunk_size); + gomp_doacross_ull_init (ncounts, counts, chunk_size, 0); gomp_work_share_init_done (); } @@ -332,11 +505,11 @@ gomp_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts, struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, GFS_DYNAMIC, chunk_size); - gomp_doacross_ull_init (ncounts, counts, chunk_size); + gomp_doacross_ull_init (ncounts, counts, chunk_size, 0); gomp_work_share_init_done (); } @@ -359,11 +532,11 @@ gomp_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts, struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, GFS_GUIDED, chunk_size); - gomp_doacross_ull_init (ncounts, counts, chunk_size); + gomp_doacross_ull_init (ncounts, counts, chunk_size, 0); gomp_work_share_init_done (); } @@ -383,7 +556,7 @@ GOMP_loop_ull_doacross_runtime_start (unsigned ncounts, gomp_ull *counts, gomp_ull *istart, gomp_ull *iend) { struct gomp_task_icv *icv = gomp_icv (false); - switch (icv->run_sched_var) + switch (icv->run_sched_var & ~GFS_MONOTONIC) { case GFS_STATIC: return gomp_loop_ull_doacross_static_start (ncounts, counts, @@ -407,6 +580,51 @@ GOMP_loop_ull_doacross_runtime_start (unsigned ncounts, gomp_ull *counts, } } +bool +GOMP_loop_ull_doacross_start (unsigned ncounts, gomp_ull *counts, + long sched, gomp_ull chunk_size, + gomp_ull *istart, gomp_ull *iend, + uintptr_t *reductions, void **mem) +{ + struct gomp_thread *thr = gomp_thread (); + + thr->ts.static_trip = 0; + if (reductions) + gomp_workshare_taskgroup_start (); + if (gomp_work_share_start (0)) + { + size_t extra = 0; + if (mem) + extra = (uintptr_t) *mem; + sched = gomp_adjust_sched (sched, &chunk_size); + gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, + sched, chunk_size); + gomp_doacross_ull_init (ncounts, counts, chunk_size, extra); + if (reductions) + { + GOMP_taskgroup_reduction_register (reductions); + thr->task->taskgroup->workshare = true; + thr->ts.work_share->task_reductions = reductions; + } + gomp_work_share_init_done (); + } + else + { + if (reductions) + { + uintptr_t *first_reductions = thr->ts.work_share->task_reductions; + gomp_workshare_task_reduction_register (reductions, + first_reductions); + } + sched = thr->ts.work_share->sched; + } + + if (mem) + *mem = thr->ts.work_share->doacross->extra; + + return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend); +} + /* The *_next routines are called when the thread completes processing of the iteration block currently assigned to it. If the work-share construct is bound directly to a parallel construct, then the iteration @@ -570,6 +788,10 @@ extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_nonmonotonic_dynamic_ __attribute__((alias ("gomp_loop_ull_dynamic_start"))); extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_nonmonotonic_guided_start __attribute__((alias ("gomp_loop_ull_guided_start"))); +extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_nonmonotonic_runtime_start + __attribute__((alias ("GOMP_loop_ull_runtime_start"))); +extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_maybe_nonmonotonic_runtime_start + __attribute__((alias ("GOMP_loop_ull_runtime_start"))); extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start __attribute__((alias ("gomp_loop_ull_ordered_static_start"))); @@ -595,6 +817,10 @@ extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_nonmonotonic_dynamic_n __attribute__((alias ("gomp_loop_ull_dynamic_next"))); extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_nonmonotonic_guided_next __attribute__((alias ("gomp_loop_ull_guided_next"))); +extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_nonmonotonic_runtime_next + __attribute__((alias ("GOMP_loop_ull_runtime_next"))); +extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_maybe_nonmonotonic_runtime_next + __attribute__((alias ("GOMP_loop_ull_runtime_next"))); extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next __attribute__((alias ("gomp_loop_ull_ordered_static_next"))); @@ -650,6 +876,23 @@ GOMP_loop_ull_nonmonotonic_guided_start (bool up, gomp_ull start, gomp_ull end, } bool +GOMP_loop_ull_nonmonotonic_runtime_start (bool up, gomp_ull start, + gomp_ull end, gomp_ull incr, + gomp_ull *istart, gomp_ull *iend) +{ + return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend); +} + +bool +GOMP_loop_ull_maybe_nonmonotonic_runtime_start (bool up, gomp_ull start, + gomp_ull end, gomp_ull incr, + gomp_ull *istart, + gomp_ull *iend) +{ + return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend); +} + +bool GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end, gomp_ull incr, gomp_ull chunk_size, gomp_ull *istart, gomp_ull *iend) @@ -734,6 +977,19 @@ GOMP_loop_ull_nonmonotonic_guided_next (gomp_ull *istart, gomp_ull *iend) } bool +GOMP_loop_ull_nonmonotonic_runtime_next (gomp_ull *istart, gomp_ull *iend) +{ + return GOMP_loop_ull_runtime_next (istart, iend); +} + +bool +GOMP_loop_ull_maybe_nonmonotonic_runtime_next (gomp_ull *istart, + gomp_ull *iend) +{ + return GOMP_loop_ull_runtime_next (istart, iend); +} + +bool GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend) { return gomp_loop_ull_ordered_static_next (istart, iend); diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in index 654a4d5f42b..a18303470c0 100644 --- a/libgomp/omp.h.in +++ b/libgomp/omp.h.in @@ -50,7 +50,8 @@ typedef enum omp_sched_t omp_sched_static = 1, omp_sched_dynamic = 2, omp_sched_guided = 3, - omp_sched_auto = 4 + omp_sched_auto = 4, + omp_sched_monotonic = 0x80000000U } omp_sched_t; typedef enum omp_proc_bind_t @@ -62,14 +63,32 @@ typedef enum omp_proc_bind_t omp_proc_bind_spread = 4 } omp_proc_bind_t; -typedef enum omp_lock_hint_t +typedef enum omp_sync_hint_t { - omp_lock_hint_none = 0, - omp_lock_hint_uncontended = 1, - omp_lock_hint_contended = 2, - omp_lock_hint_nonspeculative = 4, - omp_lock_hint_speculative = 8, -} omp_lock_hint_t; + omp_sync_hint_none = 0, + omp_lock_hint_none = omp_sync_hint_none, + omp_sync_hint_uncontended = 1, + omp_lock_hint_uncontended = omp_sync_hint_uncontended, + omp_sync_hint_contended = 2, + omp_lock_hint_contended = omp_sync_hint_contended, + omp_sync_hint_nonspeculative = 4, + omp_lock_hint_nonspeculative = omp_sync_hint_nonspeculative, + omp_sync_hint_speculative = 8, + omp_lock_hint_speculative = omp_sync_hint_speculative +} omp_sync_hint_t; + +typedef omp_sync_hint_t omp_lock_hint_t; + +typedef struct __attribute__((__aligned__ (sizeof (void *)))) omp_depend_t +{ + char __omp_depend_t__[2 * sizeof (void *)]; +} omp_depend_t; + +typedef enum omp_pause_resource_t +{ + omp_pause_soft = 1, + omp_pause_hard = 2 +} omp_pause_resource_t; #ifdef __cplusplus extern "C" { @@ -93,7 +112,7 @@ extern void omp_set_nested (int) __GOMP_NOTHROW; extern int omp_get_nested (void) __GOMP_NOTHROW; extern void omp_init_lock (omp_lock_t *) __GOMP_NOTHROW; -extern void omp_init_lock_with_hint (omp_lock_t *, omp_lock_hint_t) +extern void omp_init_lock_with_hint (omp_lock_t *, omp_sync_hint_t) __GOMP_NOTHROW; extern void omp_destroy_lock (omp_lock_t *) __GOMP_NOTHROW; extern void omp_set_lock (omp_lock_t *) __GOMP_NOTHROW; @@ -101,7 +120,7 @@ extern void omp_unset_lock (omp_lock_t *) __GOMP_NOTHROW; extern int omp_test_lock (omp_lock_t *) __GOMP_NOTHROW; extern void omp_init_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW; -extern void omp_init_nest_lock_with_hint (omp_nest_lock_t *, omp_lock_hint_t) +extern void omp_init_nest_lock_with_hint (omp_nest_lock_t *, omp_sync_hint_t) __GOMP_NOTHROW; extern void omp_destroy_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW; extern void omp_set_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW; @@ -144,19 +163,30 @@ extern int omp_get_max_task_priority (void) __GOMP_NOTHROW; extern void *omp_target_alloc (__SIZE_TYPE__, int) __GOMP_NOTHROW; extern void omp_target_free (void *, int) __GOMP_NOTHROW; -extern int omp_target_is_present (void *, int) __GOMP_NOTHROW; -extern int omp_target_memcpy (void *, void *, __SIZE_TYPE__, __SIZE_TYPE__, - __SIZE_TYPE__, int, int) __GOMP_NOTHROW; -extern int omp_target_memcpy_rect (void *, void *, __SIZE_TYPE__, int, +extern int omp_target_is_present (const void *, int) __GOMP_NOTHROW; +extern int omp_target_memcpy (void *, const void *, __SIZE_TYPE__, + __SIZE_TYPE__, __SIZE_TYPE__, int, int) + __GOMP_NOTHROW; +extern int omp_target_memcpy_rect (void *, const void *, __SIZE_TYPE__, int, const __SIZE_TYPE__ *, const __SIZE_TYPE__ *, const __SIZE_TYPE__ *, const __SIZE_TYPE__ *, const __SIZE_TYPE__ *, int, int) __GOMP_NOTHROW; -extern int omp_target_associate_ptr (void *, void *, __SIZE_TYPE__, +extern int omp_target_associate_ptr (const void *, const void *, __SIZE_TYPE__, __SIZE_TYPE__, int) __GOMP_NOTHROW; -extern int omp_target_disassociate_ptr (void *, int) __GOMP_NOTHROW; +extern int omp_target_disassociate_ptr (const void *, int) __GOMP_NOTHROW; + +extern void omp_set_affinity_format (const char *) __GOMP_NOTHROW; +extern __SIZE_TYPE__ omp_get_affinity_format (char *, __SIZE_TYPE__) + __GOMP_NOTHROW; +extern void omp_display_affinity (const char *) __GOMP_NOTHROW; +extern __SIZE_TYPE__ omp_capture_affinity (char *, __SIZE_TYPE__, const char *) + __GOMP_NOTHROW; + +extern int omp_pause_resource (omp_pause_resource_t, int) __GOMP_NOTHROW; +extern int omp_pause_resource_all (omp_pause_resource_t) __GOMP_NOTHROW; #ifdef __cplusplus } diff --git a/libgomp/omp_lib.f90.in b/libgomp/omp_lib.f90.in index eea33b893d6..caecfba749d 100644 --- a/libgomp/omp_lib.f90.in +++ b/libgomp/omp_lib.f90.in @@ -30,6 +30,7 @@ integer, parameter :: omp_sched_kind = 4 integer, parameter :: omp_proc_bind_kind = 4 integer, parameter :: omp_lock_hint_kind = 4 + integer, parameter :: omp_pause_resource_kind = 4 integer (omp_sched_kind), parameter :: omp_sched_static = 1 integer (omp_sched_kind), parameter :: omp_sched_dynamic = 2 integer (omp_sched_kind), parameter :: omp_sched_guided = 3 @@ -54,6 +55,10 @@ parameter :: omp_lock_hint_nonspeculative = 4 integer (omp_lock_hint_kind), & parameter :: omp_lock_hint_speculative = 8 + integer (kind=omp_pause_resource_kind), & + parameter :: omp_pause_soft = 1 + integer (kind=omp_pause_resource_kind), & + parameter :: omp_pause_hard = 2 end module module omp_lib @@ -433,4 +438,50 @@ end function omp_get_max_task_priority end interface + interface + subroutine omp_set_affinity_format (format) + character(len=*), intent(in) :: format + end subroutine omp_set_affinity_format + end interface + + interface + function omp_get_affinity_format (buffer) + integer (4) :: omp_get_affinity_format + character(len=*), intent(out) :: buffer + end function omp_get_affinity_format + end interface + + interface + subroutine omp_display_affinity (format) + character(len=*), intent(in) :: format + end subroutine omp_display_affinity + end interface + + interface + function omp_capture_affinity (buffer, format) + integer (4) :: omp_capture_affinity + character(len=*), intent(out) :: buffer + character(len=*), intent(in) :: format + end function omp_capture_affinity + end interface + + interface + function omp_pause_resource (kind, device_num) + use omp_lib_kinds + integer (4) :: omp_pause_resource + integer (kind=omp_pause_resource_kind), & + intent(in) :: kind + integer (4) :: device_num + end function + end interface + + interface + function omp_pause_resource_all (kind) + use omp_lib_kinds + integer (4) :: omp_pause_resource_all + integer (kind=omp_pause_resource_kind), & + intent(in) :: kind + end function + end interface + end module omp_lib diff --git a/libgomp/omp_lib.h.in b/libgomp/omp_lib.h.in index e57da94a2d5..2a553435a45 100644 --- a/libgomp/omp_lib.h.in +++ b/libgomp/omp_lib.h.in @@ -59,6 +59,12 @@ parameter (omp_lock_hint_nonspeculative = 4) parameter (omp_lock_hint_speculative = 8) parameter (openmp_version = 201511) + integer omp_pause_resource_kind + parameter (omp_pause_resource_kind = 4) + integer (omp_pause_resource_kind) omp_pause_soft + integer (omp_pause_resource_kind) omp_pause_hard + parameter (omp_pause_soft = 1) + parameter (omp_pause_hard = 2) external omp_init_lock, omp_init_nest_lock external omp_init_lock_with_hint @@ -126,3 +132,12 @@ external omp_get_max_task_priority integer(4) omp_get_max_task_priority + + external omp_set_affinity_format, omp_get_affinity_format + external omp_display_affinity, omp_capture_affinity + integer(4) omp_get_affinity_format + integer(4) omp_capture_affinity + + external omp_pause_resource, omp_pause_resource_all + integer(4) omp_pause_resource + integer(4) omp_pause_resource_all diff --git a/libgomp/ordered.c b/libgomp/ordered.c index 1bdd5b2f25b..521e9122d90 100644 --- a/libgomp/ordered.c +++ b/libgomp/ordered.c @@ -259,7 +259,8 @@ GOMP_ordered_end (void) #define MAX_COLLAPSED_BITS (__SIZEOF_LONG__ * __CHAR_BIT__) void -gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size) +gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size, + size_t extra) { struct gomp_thread *thr = gomp_thread (); struct gomp_team *team = thr->ts.team; @@ -269,13 +270,24 @@ gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size) struct gomp_doacross_work_share *doacross; if (team == NULL || team->nthreads == 1) - return; + { + empty: + if (!extra) + ws->doacross = NULL; + else + { + doacross = gomp_malloc_cleared (sizeof (*doacross) + extra); + doacross->extra = (void *) (doacross + 1); + ws->doacross = doacross; + } + return; + } for (i = 0; i < ncounts; i++) { /* If any count is 0, GOMP_doacross_{post,wait} can't be called. */ if (counts[i] == 0) - return; + goto empty; if (num_bits <= MAX_COLLAPSED_BITS) { @@ -314,7 +326,7 @@ gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size) elt_sz = (elt_sz + 63) & ~63UL; doacross = gomp_malloc (sizeof (*doacross) + 63 + num_ents * elt_sz - + shift_sz); + + shift_sz + extra); doacross->chunk_size = chunk_size; doacross->elt_sz = elt_sz; doacross->ncounts = ncounts; @@ -322,6 +334,13 @@ gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size) doacross->array = (unsigned char *) ((((uintptr_t) (doacross + 1)) + 63 + shift_sz) & ~(uintptr_t) 63); + if (extra) + { + doacross->extra = doacross->array + num_ents * elt_sz; + memset (doacross->extra, '\0', extra); + } + else + doacross->extra = NULL; if (num_bits <= MAX_COLLAPSED_BITS) { unsigned int shift_count = 0; @@ -360,7 +379,8 @@ GOMP_doacross_post (long *counts) unsigned long ent; unsigned int i; - if (__builtin_expect (doacross == NULL, 0)) + if (__builtin_expect (doacross == NULL, 0) + || __builtin_expect (doacross->array == NULL, 0)) { __sync_synchronize (); return; @@ -411,7 +431,8 @@ GOMP_doacross_wait (long first, ...) unsigned long ent; unsigned int i; - if (__builtin_expect (doacross == NULL, 0)) + if (__builtin_expect (doacross == NULL, 0) + || __builtin_expect (doacross->array == NULL, 0)) { __sync_synchronize (); return; @@ -488,7 +509,8 @@ GOMP_doacross_wait (long first, ...) typedef unsigned long long gomp_ull; void -gomp_doacross_ull_init (unsigned ncounts, gomp_ull *counts, gomp_ull chunk_size) +gomp_doacross_ull_init (unsigned ncounts, gomp_ull *counts, + gomp_ull chunk_size, size_t extra) { struct gomp_thread *thr = gomp_thread (); struct gomp_team *team = thr->ts.team; @@ -498,13 +520,24 @@ gomp_doacross_ull_init (unsigned ncounts, gomp_ull *counts, gomp_ull chunk_size) struct gomp_doacross_work_share *doacross; if (team == NULL || team->nthreads == 1) - return; + { + empty: + if (!extra) + ws->doacross = NULL; + else + { + doacross = gomp_malloc_cleared (sizeof (*doacross) + extra); + doacross->extra = (void *) (doacross + 1); + ws->doacross = doacross; + } + return; + } for (i = 0; i < ncounts; i++) { /* If any count is 0, GOMP_doacross_{post,wait} can't be called. */ if (counts[i] == 0) - return; + goto empty; if (num_bits <= MAX_COLLAPSED_BITS) { @@ -557,6 +590,13 @@ gomp_doacross_ull_init (unsigned ncounts, gomp_ull *counts, gomp_ull chunk_size) doacross->array = (unsigned char *) ((((uintptr_t) (doacross + 1)) + 63 + shift_sz) & ~(uintptr_t) 63); + if (extra) + { + doacross->extra = doacross->array + num_ents * elt_sz; + memset (doacross->extra, '\0', extra); + } + else + doacross->extra = NULL; if (num_bits <= MAX_COLLAPSED_BITS) { unsigned int shift_count = 0; @@ -595,7 +635,8 @@ GOMP_doacross_ull_post (gomp_ull *counts) unsigned long ent; unsigned int i; - if (__builtin_expect (doacross == NULL, 0)) + if (__builtin_expect (doacross == NULL, 0) + || __builtin_expect (doacross->array == NULL, 0)) { __sync_synchronize (); return; @@ -667,7 +708,8 @@ GOMP_doacross_ull_wait (gomp_ull first, ...) unsigned long ent; unsigned int i; - if (__builtin_expect (doacross == NULL, 0)) + if (__builtin_expect (doacross == NULL, 0) + || __builtin_expect (doacross->array == NULL, 0)) { __sync_synchronize (); return; diff --git a/libgomp/parallel.c b/libgomp/parallel.c index 803e8b770c8..c7a8c788a3b 100644 --- a/libgomp/parallel.c +++ b/libgomp/parallel.c @@ -123,7 +123,8 @@ void GOMP_parallel_start (void (*fn) (void *), void *data, unsigned num_threads) { num_threads = gomp_resolve_num_threads (num_threads, 0); - gomp_team_start (fn, data, num_threads, 0, gomp_new_team (num_threads)); + gomp_team_start (fn, data, num_threads, 0, gomp_new_team (num_threads), + NULL); } void @@ -161,14 +162,33 @@ GOMP_parallel_end (void) ialias (GOMP_parallel_end) void -GOMP_parallel (void (*fn) (void *), void *data, unsigned num_threads, unsigned int flags) +GOMP_parallel (void (*fn) (void *), void *data, unsigned num_threads, + unsigned int flags) { num_threads = gomp_resolve_num_threads (num_threads, 0); - gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads)); + gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads), + NULL); fn (data); ialias_call (GOMP_parallel_end) (); } +unsigned +GOMP_parallel_reductions (void (*fn) (void *), void *data, + unsigned num_threads, unsigned int flags) +{ + struct gomp_taskgroup *taskgroup; + num_threads = gomp_resolve_num_threads (num_threads, 0); + uintptr_t *rdata = *(uintptr_t **)data; + taskgroup = gomp_parallel_reduction_register (rdata, num_threads); + gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads), + taskgroup); + fn (data); + ialias_call (GOMP_parallel_end) (); + gomp_sem_destroy (&taskgroup->taskgroup_sem); + free (taskgroup); + return num_threads; +} + bool GOMP_cancellation_point (int which) { @@ -185,8 +205,15 @@ GOMP_cancellation_point (int which) } else if (which & GOMP_CANCEL_TASKGROUP) { - if (thr->task->taskgroup && thr->task->taskgroup->cancelled) - return true; + if (thr->task->taskgroup) + { + if (thr->task->taskgroup->cancelled) + return true; + if (thr->task->taskgroup->workshare + && thr->task->taskgroup->prev + && thr->task->taskgroup->prev->cancelled) + return true; + } /* FALLTHRU into the GOMP_CANCEL_PARALLEL case, as #pragma omp cancel parallel also cancels all explicit tasks. */ @@ -218,11 +245,17 @@ GOMP_cancel (int which, bool do_cancel) } else if (which & GOMP_CANCEL_TASKGROUP) { - if (thr->task->taskgroup && !thr->task->taskgroup->cancelled) + if (thr->task->taskgroup) { - gomp_mutex_lock (&team->task_lock); - thr->task->taskgroup->cancelled = true; - gomp_mutex_unlock (&team->task_lock); + struct gomp_taskgroup *taskgroup = thr->task->taskgroup; + if (taskgroup->workshare && taskgroup->prev) + taskgroup = taskgroup->prev; + if (!taskgroup->cancelled) + { + gomp_mutex_lock (&team->task_lock); + taskgroup->cancelled = true; + gomp_mutex_unlock (&team->task_lock); + } } return true; } diff --git a/libgomp/sections.c b/libgomp/sections.c index 65b53b45f73..3449e0067dd 100644 --- a/libgomp/sections.c +++ b/libgomp/sections.c @@ -26,8 +26,11 @@ /* This file handles the SECTIONS construct. */ #include "libgomp.h" +#include <string.h> +ialias_redirect (GOMP_taskgroup_reduction_register) + /* Initialize the given work share construct from the given arguments. */ static inline void @@ -72,7 +75,7 @@ GOMP_sections_start (unsigned count) struct gomp_thread *thr = gomp_thread (); long s, e, ret; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_sections_init (thr->ts.work_share, count); gomp_work_share_init_done (); @@ -95,6 +98,66 @@ GOMP_sections_start (unsigned count) return ret; } +unsigned +GOMP_sections2_start (unsigned count, uintptr_t *reductions, void **mem) +{ + struct gomp_thread *thr = gomp_thread (); + long s, e, ret; + + if (reductions) + gomp_workshare_taskgroup_start (); + if (gomp_work_share_start (0)) + { + gomp_sections_init (thr->ts.work_share, count); + if (reductions) + { + GOMP_taskgroup_reduction_register (reductions); + thr->task->taskgroup->workshare = true; + thr->ts.work_share->task_reductions = reductions; + } + if (mem) + { + uintptr_t size = (uintptr_t) *mem; + if (size > (sizeof (struct gomp_work_share) + - offsetof (struct gomp_work_share, + inline_ordered_team_ids))) + thr->ts.work_share->ordered_team_ids + = gomp_malloc_cleared (size); + else + memset (thr->ts.work_share->ordered_team_ids, '\0', size); + *mem = (void *) thr->ts.work_share->ordered_team_ids; + } + gomp_work_share_init_done (); + } + else + { + if (reductions) + { + uintptr_t *first_reductions = thr->ts.work_share->task_reductions; + gomp_workshare_task_reduction_register (reductions, + first_reductions); + } + if (mem) + *mem = (void *) thr->ts.work_share->ordered_team_ids; + } + +#ifdef HAVE_SYNC_BUILTINS + if (gomp_iter_dynamic_next (&s, &e)) + ret = s; + else + ret = 0; +#else + gomp_mutex_lock (&thr->ts.work_share->lock); + if (gomp_iter_dynamic_next_locked (&s, &e)) + ret = s; + else + ret = 0; + gomp_mutex_unlock (&thr->ts.work_share->lock); +#endif + + return ret; +} + /* This routine is called when the thread completes processing of the section currently assigned to it. If the work-share construct is bound directly to a parallel construct, then the construct may have @@ -140,7 +203,7 @@ GOMP_parallel_sections_start (void (*fn) (void *), void *data, num_threads = gomp_resolve_num_threads (num_threads, count); team = gomp_new_team (num_threads); gomp_sections_init (&team->work_shares[0], count); - gomp_team_start (fn, data, num_threads, 0, team); + gomp_team_start (fn, data, num_threads, 0, team, NULL); } ialias_redirect (GOMP_parallel_end) @@ -154,7 +217,7 @@ GOMP_parallel_sections (void (*fn) (void *), void *data, num_threads = gomp_resolve_num_threads (num_threads, count); team = gomp_new_team (num_threads); gomp_sections_init (&team->work_shares[0], count); - gomp_team_start (fn, data, num_threads, flags, team); + gomp_team_start (fn, data, num_threads, flags, team, NULL); fn (data); GOMP_parallel_end (); } diff --git a/libgomp/single.c b/libgomp/single.c index 24a7780ad93..d5093c6730c 100644 --- a/libgomp/single.c +++ b/libgomp/single.c @@ -47,7 +47,7 @@ GOMP_single_start (void) return __sync_bool_compare_and_swap (&team->single_count, single_count, single_count + 1L); #else - bool ret = gomp_work_share_start (false); + bool ret = gomp_work_share_start (0); if (ret) gomp_work_share_init_done (); gomp_work_share_end_nowait (); @@ -68,7 +68,7 @@ GOMP_single_copy_start (void) bool first; void *ret; - first = gomp_work_share_start (false); + first = gomp_work_share_start (0); if (first) { diff --git a/libgomp/target.c b/libgomp/target.c index dda041cdbef..8ebc2a370a1 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -1854,11 +1854,20 @@ GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs, struct gomp_team *team = thr->ts.team; /* If parallel or taskgroup has been cancelled, don't start new tasks. */ - if (team - && (gomp_team_barrier_cancelled (&team->barrier) - || (thr->task->taskgroup - && thr->task->taskgroup->cancelled))) - return; + if (__builtin_expect (gomp_cancel_var, 0) && team) + { + if (gomp_team_barrier_cancelled (&team->barrier)) + return; + if (thr->task->taskgroup) + { + if (thr->task->taskgroup->cancelled) + return; + if (thr->task->taskgroup->workshare + && thr->task->taskgroup->prev + && thr->task->taskgroup->prev->cancelled) + return; + } + } gomp_task_maybe_wait_for_dependencies (depend); } @@ -1873,10 +1882,20 @@ GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs, struct gomp_thread *thr = gomp_thread (); struct gomp_team *team = thr->ts.team; /* If parallel or taskgroup has been cancelled, don't start new tasks. */ - if (team - && (gomp_team_barrier_cancelled (&team->barrier) - || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) - return; + if (__builtin_expect (gomp_cancel_var, 0) && team) + { + if (gomp_team_barrier_cancelled (&team->barrier)) + return; + if (thr->task->taskgroup) + { + if (thr->task->taskgroup->cancelled) + return; + if (thr->task->taskgroup->workshare + && thr->task->taskgroup->prev + && thr->task->taskgroup->prev->cancelled) + return; + } + } gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true); } @@ -1985,11 +2004,20 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, struct gomp_team *team = thr->ts.team; /* If parallel or taskgroup has been cancelled, don't start new tasks. */ - if (team - && (gomp_team_barrier_cancelled (&team->barrier) - || (thr->task->taskgroup - && thr->task->taskgroup->cancelled))) - return; + if (__builtin_expect (gomp_cancel_var, 0) && team) + { + if (gomp_team_barrier_cancelled (&team->barrier)) + return; + if (thr->task->taskgroup) + { + if (thr->task->taskgroup->cancelled) + return; + if (thr->task->taskgroup->workshare + && thr->task->taskgroup->prev + && thr->task->taskgroup->prev->cancelled) + return; + } + } gomp_task_maybe_wait_for_dependencies (depend); } @@ -2004,10 +2032,20 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, struct gomp_thread *thr = gomp_thread (); struct gomp_team *team = thr->ts.team; /* If parallel or taskgroup has been cancelled, don't start new tasks. */ - if (team - && (gomp_team_barrier_cancelled (&team->barrier) - || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) - return; + if (__builtin_expect (gomp_cancel_var, 0) && team) + { + if (gomp_team_barrier_cancelled (&team->barrier)) + return; + if (thr->task->taskgroup) + { + if (thr->task->taskgroup->cancelled) + return; + if (thr->task->taskgroup->workshare + && thr->task->taskgroup->prev + && thr->task->taskgroup->prev->cancelled) + return; + } + } size_t i; if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) @@ -2164,7 +2202,7 @@ omp_target_free (void *device_ptr, int device_num) } int -omp_target_is_present (void *ptr, int device_num) +omp_target_is_present (const void *ptr, int device_num) { if (ptr == NULL) return 1; @@ -2196,8 +2234,9 @@ omp_target_is_present (void *ptr, int device_num) } int -omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset, - size_t src_offset, int dst_device_num, int src_device_num) +omp_target_memcpy (void *dst, const void *src, size_t length, + size_t dst_offset, size_t src_offset, int dst_device_num, + int src_device_num) { struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; bool ret; @@ -2264,7 +2303,7 @@ omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset, } static int -omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size, +omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size, int num_dims, const size_t *volume, const size_t *dst_offsets, const size_t *src_offsets, @@ -2286,21 +2325,25 @@ omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size, return EINVAL; if (dst_devicep == NULL && src_devicep == NULL) { - memcpy ((char *) dst + dst_off, (char *) src + src_off, length); + memcpy ((char *) dst + dst_off, (const char *) src + src_off, + length); ret = 1; } else if (src_devicep == NULL) ret = dst_devicep->host2dev_func (dst_devicep->target_id, (char *) dst + dst_off, - (char *) src + src_off, length); + (const char *) src + src_off, + length); else if (dst_devicep == NULL) ret = src_devicep->dev2host_func (src_devicep->target_id, (char *) dst + dst_off, - (char *) src + src_off, length); + (const char *) src + src_off, + length); else if (src_devicep == dst_devicep) ret = src_devicep->dev2dev_func (src_devicep->target_id, (char *) dst + dst_off, - (char *) src + src_off, length); + (const char *) src + src_off, + length); else ret = 0; return ret ? 0 : EINVAL; @@ -2321,7 +2364,7 @@ omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size, for (j = 0; j < volume[0]; j++) { ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off, - (char *) src + src_off, + (const char *) src + src_off, element_size, num_dims - 1, volume + 1, dst_offsets + 1, src_offsets + 1, dst_dimensions + 1, @@ -2336,7 +2379,7 @@ omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size, } int -omp_target_memcpy_rect (void *dst, void *src, size_t element_size, +omp_target_memcpy_rect (void *dst, const void *src, size_t element_size, int num_dims, const size_t *volume, const size_t *dst_offsets, const size_t *src_offsets, @@ -2395,8 +2438,8 @@ omp_target_memcpy_rect (void *dst, void *src, size_t element_size, } int -omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size, - size_t device_offset, int device_num) +omp_target_associate_ptr (const void *host_ptr, const void *device_ptr, + size_t size, size_t device_offset, int device_num) { if (device_num == GOMP_DEVICE_HOST_FALLBACK) return EINVAL; @@ -2457,7 +2500,7 @@ omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size, } int -omp_target_disassociate_ptr (void *ptr, int device_num) +omp_target_disassociate_ptr (const void *ptr, int device_num) { if (device_num == GOMP_DEVICE_HOST_FALLBACK) return EINVAL; @@ -2498,6 +2541,31 @@ omp_target_disassociate_ptr (void *ptr, int device_num) return ret; } +int +omp_pause_resource (omp_pause_resource_t kind, int device_num) +{ + (void) kind; + if (device_num == GOMP_DEVICE_HOST_FALLBACK) + return gomp_pause_host (); + if (device_num < 0 || device_num >= gomp_get_num_devices ()) + return -1; + /* Do nothing for target devices for now. */ + return 0; +} + +int +omp_pause_resource_all (omp_pause_resource_t kind) +{ + (void) kind; + if (gomp_pause_host ()) + return -1; + /* Do nothing for target devices for now. */ + return 0; +} + +ialias (omp_pause_resource) +ialias (omp_pause_resource_all) + #ifdef PLUGIN_SUPPORT /* This function tries to load a plugin for DEVICE. Name of plugin is passed diff --git a/libgomp/task.c b/libgomp/task.c index 80dcd902ab3..0c78b3c939c 100644 --- a/libgomp/task.c +++ b/libgomp/task.c @@ -166,21 +166,72 @@ gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent, void **depend) { size_t ndepend = (uintptr_t) depend[0]; - size_t nout = (uintptr_t) depend[1]; size_t i; hash_entry_type ent; + if (ndepend) + { + /* depend[0] is total # */ + size_t nout = (uintptr_t) depend[1]; /* # of out: and inout: */ + /* ndepend - nout is # of in: */ + for (i = 0; i < ndepend; i++) + { + task->depend[i].addr = depend[2 + i]; + task->depend[i].is_in = i >= nout; + } + } + else + { + ndepend = (uintptr_t) depend[1]; /* total # */ + size_t nout = (uintptr_t) depend[2]; /* # of out: and inout: */ + size_t nmutexinoutset = (uintptr_t) depend[3]; /* # of mutexinoutset: */ + /* For now we treat mutexinoutset like out, which is compliant, but + inefficient. */ + size_t nin = (uintptr_t) depend[4]; /* # of in: */ + /* ndepend - nout - nmutexinoutset - nin is # of depobjs */ + size_t normal = nout + nmutexinoutset + nin; + size_t n = 0; + for (i = normal; i < ndepend; i++) + { + void **d = (void **) (uintptr_t) depend[5 + i]; + switch ((uintptr_t) d[1]) + { + case GOMP_DEPEND_OUT: + case GOMP_DEPEND_INOUT: + case GOMP_DEPEND_MUTEXINOUTSET: + break; + case GOMP_DEPEND_IN: + continue; + default: + gomp_fatal ("unknown omp_depend_t dependence type %d", + (int) (uintptr_t) d[1]); + } + task->depend[n].addr = d[0]; + task->depend[n++].is_in = 0; + } + for (i = 0; i < normal; i++) + { + task->depend[n].addr = depend[5 + i]; + task->depend[n++].is_in = i >= nout + nmutexinoutset; + } + for (i = normal; i < ndepend; i++) + { + void **d = (void **) (uintptr_t) depend[5 + i]; + if ((uintptr_t) d[1] != GOMP_DEPEND_IN) + continue; + task->depend[n].addr = d[0]; + task->depend[n++].is_in = 1; + } + } task->depend_count = ndepend; task->num_dependees = 0; if (parent->depend_hash == NULL) parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12); for (i = 0; i < ndepend; i++) { - task->depend[i].addr = depend[2 + i]; task->depend[i].next = NULL; task->depend[i].prev = NULL; task->depend[i].task = task; - task->depend[i].is_in = i >= nout; task->depend[i].redundant = false; task->depend[i].redundant_out = false; @@ -205,7 +256,7 @@ gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent, last = ent; /* depend(in:...) doesn't depend on earlier depend(in:...). */ - if (i >= nout && ent->is_in) + if (task->depend[i].is_in && ent->is_in) continue; if (!ent->is_in) @@ -280,9 +331,18 @@ gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent, then the task may be executed by any member of the team. DEPEND is an array containing: + if depend[0] is non-zero, then: depend[0]: number of depend elements. - depend[1]: number of depend elements of type "out". - depend[2..N+1]: address of [1..N]th depend element. */ + depend[1]: number of depend elements of type "out/inout". + depend[2..N+1]: address of [1..N]th depend element. + otherwise, when depend[0] is zero, then: + depend[1]: number of depend elements. + depend[2]: number of depend elements of type "out/inout". + depend[3]: number of depend elements of type "mutexinoutset". + depend[4]: number of depend elements of type "in". + depend[5..4+depend[2]+depend[3]+depend[4]]: address of depend elements + depend[5+depend[2]+depend[3]+depend[4]..4+depend[1]]: address of + omp_depend_t objects. */ void GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), @@ -303,10 +363,20 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), #endif /* If parallel or taskgroup has been cancelled, don't start new tasks. */ - if (team - && (gomp_team_barrier_cancelled (&team->barrier) - || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) - return; + if (__builtin_expect (gomp_cancel_var, 0) && team) + { + if (gomp_team_barrier_cancelled (&team->barrier)) + return; + if (thr->task->taskgroup) + { + if (thr->task->taskgroup->cancelled) + return; + if (thr->task->taskgroup->workshare + && thr->task->taskgroup->prev + && thr->task->taskgroup->prev->cancelled) + return; + } + } if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0) priority = 0; @@ -377,7 +447,7 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), size_t depend_size = 0; if (flags & GOMP_TASK_FLAG_DEPEND) - depend_size = ((uintptr_t) depend[0] + depend_size = ((uintptr_t) (depend[0] ? depend[0] : depend[1]) * sizeof (struct gomp_task_depend_entry)); task = gomp_malloc (sizeof (*task) + depend_size + arg_size + arg_align - 1); @@ -404,14 +474,26 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), gomp_mutex_lock (&team->task_lock); /* If parallel or taskgroup has been cancelled, don't start new tasks. */ - if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier) - || (taskgroup && taskgroup->cancelled)) - && !task->copy_ctors_done, 0)) + if (__builtin_expect (gomp_cancel_var, 0) + && !task->copy_ctors_done) { - gomp_mutex_unlock (&team->task_lock); - gomp_finish_task (task); - free (task); - return; + if (gomp_team_barrier_cancelled (&team->barrier)) + { + do_cancel: + gomp_mutex_unlock (&team->task_lock); + gomp_finish_task (task); + free (task); + return; + } + if (taskgroup) + { + if (taskgroup->cancelled) + goto do_cancel; + if (taskgroup->workshare + && taskgroup->prev + && taskgroup->prev->cancelled) + goto do_cancel; + } } if (taskgroup) taskgroup->num_children++; @@ -463,6 +545,7 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), ialias (GOMP_taskgroup_start) ialias (GOMP_taskgroup_end) +ialias (GOMP_taskgroup_reduction_register) #define TYPE long #define UTYPE unsigned long @@ -601,10 +684,20 @@ gomp_create_target_task (struct gomp_device_descr *devicep, struct gomp_team *team = thr->ts.team; /* If parallel or taskgroup has been cancelled, don't start new tasks. */ - if (team - && (gomp_team_barrier_cancelled (&team->barrier) - || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) - return true; + if (__builtin_expect (gomp_cancel_var, 0) && team) + { + if (gomp_team_barrier_cancelled (&team->barrier)) + return true; + if (thr->task->taskgroup) + { + if (thr->task->taskgroup->cancelled) + return true; + if (thr->task->taskgroup->workshare + && thr->task->taskgroup->prev + && thr->task->taskgroup->prev->cancelled) + return true; + } + } struct gomp_target_task *ttask; struct gomp_task *task; @@ -617,7 +710,7 @@ gomp_create_target_task (struct gomp_device_descr *devicep, if (depend != NULL) { - depend_cnt = (uintptr_t) depend[0]; + depend_cnt = (uintptr_t) (depend[0] ? depend[0] : depend[1]); depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry); } if (fn) @@ -687,13 +780,25 @@ gomp_create_target_task (struct gomp_device_descr *devicep, task->final_task = 0; gomp_mutex_lock (&team->task_lock); /* If parallel or taskgroup has been cancelled, don't start new tasks. */ - if (__builtin_expect (gomp_team_barrier_cancelled (&team->barrier) - || (taskgroup && taskgroup->cancelled), 0)) + if (__builtin_expect (gomp_cancel_var, 0)) { - gomp_mutex_unlock (&team->task_lock); - gomp_finish_task (task); - free (task); - return true; + if (gomp_team_barrier_cancelled (&team->barrier)) + { + do_cancel: + gomp_mutex_unlock (&team->task_lock); + gomp_finish_task (task); + free (task); + return true; + } + if (taskgroup) + { + if (taskgroup->cancelled) + goto do_cancel; + if (taskgroup->workshare + && taskgroup->prev + && taskgroup->prev->cancelled) + goto do_cancel; + } } if (depend_size) { @@ -986,10 +1091,21 @@ gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent, if (--team->task_queued_count == 0) gomp_team_barrier_clear_task_pending (&team->barrier); - if ((gomp_team_barrier_cancelled (&team->barrier) - || (taskgroup && taskgroup->cancelled)) + if (__builtin_expect (gomp_cancel_var, 0) && !child_task->copy_ctors_done) - return true; + { + if (gomp_team_barrier_cancelled (&team->barrier)) + return true; + if (taskgroup) + { + if (taskgroup->cancelled) + return true; + if (taskgroup->workshare + && taskgroup->prev + && taskgroup->prev->cancelled) + return true; + } + } return false; } @@ -1456,6 +1572,35 @@ GOMP_taskwait (void) } } +/* Called when encountering a taskwait directive with depend clause(s). + Wait as if it was an mergeable included task construct with empty body. */ + +void +GOMP_taskwait_depend (void **depend) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + + /* If parallel or taskgroup has been cancelled, return early. */ + if (__builtin_expect (gomp_cancel_var, 0) && team) + { + if (gomp_team_barrier_cancelled (&team->barrier)) + return; + if (thr->task->taskgroup) + { + if (thr->task->taskgroup->cancelled) + return; + if (thr->task->taskgroup->workshare + && thr->task->taskgroup->prev + && thr->task->taskgroup->prev->cancelled) + return; + } + } + + if (thr->task && thr->task->depend_hash) + gomp_task_maybe_wait_for_dependencies (depend); +} + /* An undeferred task is about to run. Wait for all tasks that this undeferred task depends on. @@ -1464,7 +1609,7 @@ GOMP_taskwait (void) the scheduling queues. Then we iterate through these imminently ready tasks (and possibly other high priority tasks), and run them. If we run out of ready dependencies to execute, we either wait for - the reamining dependencies to finish, or wait for them to get + the remaining dependencies to finish, or wait for them to get scheduled so we can run them. DEPEND is as in GOMP_task. */ @@ -1477,21 +1622,50 @@ gomp_task_maybe_wait_for_dependencies (void **depend) struct gomp_team *team = thr->ts.team; struct gomp_task_depend_entry elem, *ent = NULL; struct gomp_taskwait taskwait; - size_t ndepend = (uintptr_t) depend[0]; + size_t orig_ndepend = (uintptr_t) depend[0]; size_t nout = (uintptr_t) depend[1]; + size_t ndepend = orig_ndepend; + size_t normal = ndepend; + size_t n = 2; size_t i; size_t num_awaited = 0; struct gomp_task *child_task = NULL; struct gomp_task *to_free = NULL; int do_wake = 0; + if (ndepend == 0) + { + ndepend = nout; + nout = (uintptr_t) depend[2] + (uintptr_t) depend[3]; + normal = nout + (uintptr_t) depend[4]; + n = 5; + } gomp_mutex_lock (&team->task_lock); for (i = 0; i < ndepend; i++) { - elem.addr = depend[i + 2]; + elem.addr = depend[i + n]; + elem.is_in = i >= nout; + if (__builtin_expect (i >= normal, 0)) + { + void **d = (void **) elem.addr; + switch ((uintptr_t) d[1]) + { + case GOMP_DEPEND_IN: + break; + case GOMP_DEPEND_OUT: + case GOMP_DEPEND_INOUT: + case GOMP_DEPEND_MUTEXINOUTSET: + elem.is_in = 0; + break; + default: + gomp_fatal ("unknown omp_depend_t dependence type %d", + (int) (uintptr_t) d[1]); + } + elem.addr = d[0]; + } ent = htab_find (task->depend_hash, &elem); for (; ent; ent = ent->next) - if (i >= nout && ent->is_in) + if (elem.is_in && ent->is_in) continue; else { @@ -1654,13 +1828,28 @@ GOMP_taskyield (void) /* Nothing at the moment. */ } +static inline struct gomp_taskgroup * +gomp_taskgroup_init (struct gomp_taskgroup *prev) +{ + struct gomp_taskgroup *taskgroup + = gomp_malloc (sizeof (struct gomp_taskgroup)); + taskgroup->prev = prev; + priority_queue_init (&taskgroup->taskgroup_queue); + taskgroup->reductions = prev ? prev->reductions : NULL; + taskgroup->in_taskgroup_wait = false; + taskgroup->cancelled = false; + taskgroup->workshare = false; + taskgroup->num_children = 0; + gomp_sem_init (&taskgroup->taskgroup_sem, 0); + return taskgroup; +} + void GOMP_taskgroup_start (void) { struct gomp_thread *thr = gomp_thread (); struct gomp_team *team = thr->ts.team; struct gomp_task *task = thr->task; - struct gomp_taskgroup *taskgroup; /* If team is NULL, all tasks are executed as GOMP_TASK_UNDEFERRED tasks and thus all children tasks of @@ -1668,14 +1857,7 @@ GOMP_taskgroup_start (void) by the time GOMP_taskgroup_end is called. */ if (team == NULL) return; - taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup)); - taskgroup->prev = task->taskgroup; - priority_queue_init (&taskgroup->taskgroup_queue); - taskgroup->in_taskgroup_wait = false; - taskgroup->cancelled = false; - taskgroup->num_children = 0; - gomp_sem_init (&taskgroup->taskgroup_sem, 0); - task->taskgroup = taskgroup; + task->taskgroup = gomp_taskgroup_init (task->taskgroup); } void @@ -1840,6 +2022,302 @@ GOMP_taskgroup_end (void) free (taskgroup); } +static inline __attribute__((always_inline)) void +gomp_reduction_register (uintptr_t *data, uintptr_t *old, uintptr_t *orig, + unsigned nthreads) +{ + size_t total_cnt = 0; + uintptr_t *d = data; + struct htab *old_htab = NULL, *new_htab; + do + { + if (__builtin_expect (orig != NULL, 0)) + { + /* For worksharing task reductions, memory has been allocated + already by some other thread that encountered the construct + earlier. */ + d[2] = orig[2]; + d[6] = orig[6]; + orig = (uintptr_t *) orig[4]; + } + else + { + size_t sz = d[1] * nthreads; + /* Should use omp_alloc if d[3] is not -1. */ + void *ptr = gomp_aligned_alloc (d[2], sz); + memset (ptr, '\0', sz); + d[2] = (uintptr_t) ptr; + d[6] = d[2] + sz; + } + d[5] = 0; + total_cnt += d[0]; + if (d[4] == 0) + { + d[4] = (uintptr_t) old; + break; + } + else + d = (uintptr_t *) d[4]; + } + while (1); + if (old && old[5]) + { + old_htab = (struct htab *) old[5]; + total_cnt += htab_elements (old_htab); + } + new_htab = htab_create (total_cnt); + if (old_htab) + { + /* Copy old hash table, like in htab_expand. */ + hash_entry_type *p, *olimit; + new_htab->n_elements = htab_elements (old_htab); + olimit = old_htab->entries + old_htab->size; + p = old_htab->entries; + do + { + hash_entry_type x = *p; + if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY) + *find_empty_slot_for_expand (new_htab, htab_hash (x)) = x; + p++; + } + while (p < olimit); + } + d = data; + do + { + size_t j; + for (j = 0; j < d[0]; ++j) + { + uintptr_t *p = d + 7 + j * 3; + p[2] = (uintptr_t) d; + /* Ugly hack, hash_entry_type is defined for the task dependencies, + which hash on the first element which is a pointer. We need + to hash also on the first sizeof (uintptr_t) bytes which contain + a pointer. Hide the cast from the compiler. */ + hash_entry_type n; + __asm ("" : "=g" (n) : "0" (p)); + *htab_find_slot (&new_htab, n, INSERT) = n; + } + if (d[4] == (uintptr_t) old) + break; + else + d = (uintptr_t *) d[4]; + } + while (1); + d[5] = (uintptr_t) new_htab; +} + +static void +gomp_create_artificial_team (void) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_task_icv *icv; + struct gomp_team *team = gomp_new_team (1); + struct gomp_task *task = thr->task; + icv = task ? &task->icv : &gomp_global_icv; + team->prev_ts = thr->ts; + thr->ts.team = team; + thr->ts.team_id = 0; + thr->ts.work_share = &team->work_shares[0]; + thr->ts.last_work_share = NULL; +#ifdef HAVE_SYNC_BUILTINS + thr->ts.single_count = 0; +#endif + thr->ts.static_trip = 0; + thr->task = &team->implicit_task[0]; + gomp_init_task (thr->task, NULL, icv); + if (task) + { + thr->task = task; + gomp_end_task (); + free (task); + thr->task = &team->implicit_task[0]; + } +#ifdef LIBGOMP_USE_PTHREADS + else + pthread_setspecific (gomp_thread_destructor, thr); +#endif +} + +/* The format of data is: + data[0] cnt + data[1] size + data[2] alignment (on output array pointer) + data[3] allocator (-1 if malloc allocator) + data[4] next pointer + data[5] used internally (htab pointer) + data[6] used internally (end of array) + cnt times + ent[0] address + ent[1] offset + ent[2] used internally (pointer to data[0]) + The entries are sorted by increasing offset, so that a binary + search can be performed. Normally, data[8] is 0, exception is + for worksharing construct task reductions in cancellable parallel, + where at offset 0 there should be space for a pointer and an integer + which are used internally. */ + +void +GOMP_taskgroup_reduction_register (uintptr_t *data) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + struct gomp_task *task; + unsigned nthreads; + if (__builtin_expect (team == NULL, 0)) + { + /* The task reduction code needs a team and task, so for + orphaned taskgroups just create the implicit team. */ + gomp_create_artificial_team (); + ialias_call (GOMP_taskgroup_start) (); + team = thr->ts.team; + } + nthreads = team->nthreads; + task = thr->task; + gomp_reduction_register (data, task->taskgroup->reductions, NULL, nthreads); + task->taskgroup->reductions = data; +} + +void +GOMP_taskgroup_reduction_unregister (uintptr_t *data) +{ + uintptr_t *d = data; + htab_free ((struct htab *) data[5]); + do + { + gomp_aligned_free ((void *) d[2]); + d = (uintptr_t *) d[4]; + } + while (d && !d[5]); +} +ialias (GOMP_taskgroup_reduction_unregister) + +/* For i = 0 to cnt-1, remap ptrs[i] which is either address of the + original list item or address of previously remapped original list + item to address of the private copy, store that to ptrs[i]. + For i < cntorig, additionally set ptrs[cnt+i] to the address of + the original list item. */ + +void +GOMP_task_reduction_remap (size_t cnt, size_t cntorig, void **ptrs) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_task *task = thr->task; + unsigned id = thr->ts.team_id; + uintptr_t *data = task->taskgroup->reductions; + uintptr_t *d; + struct htab *reduction_htab = (struct htab *) data[5]; + size_t i; + for (i = 0; i < cnt; ++i) + { + hash_entry_type ent, n; + __asm ("" : "=g" (ent) : "0" (ptrs + i)); + n = htab_find (reduction_htab, ent); + if (n) + { + uintptr_t *p; + __asm ("" : "=g" (p) : "0" (n)); + /* At this point, p[0] should be equal to (uintptr_t) ptrs[i], + p[1] is the offset within the allocated chunk for each + thread, p[2] is the array registered with + GOMP_taskgroup_reduction_register, d[2] is the base of the + allocated memory and d[1] is the size of the allocated chunk + for one thread. */ + d = (uintptr_t *) p[2]; + ptrs[i] = (void *) (d[2] + id * d[1] + p[1]); + if (__builtin_expect (i < cntorig, 0)) + ptrs[cnt + i] = (void *) p[0]; + continue; + } + d = data; + while (d != NULL) + { + if ((uintptr_t) ptrs[i] >= d[2] && (uintptr_t) ptrs[i] < d[6]) + break; + d = (uintptr_t *) d[4]; + } + if (d == NULL) + gomp_fatal ("couldn't find matching task_reduction or reduction with " + "task modifier for %p", ptrs[i]); + uintptr_t off = ((uintptr_t) ptrs[i] - d[2]) % d[1]; + ptrs[i] = (void *) (d[2] + id * d[1] + off); + if (__builtin_expect (i < cntorig, 0)) + { + size_t lo = 0, hi = d[0] - 1; + while (lo <= hi) + { + size_t m = (lo + hi) / 2; + if (d[7 + 3 * m + 1] < off) + lo = m + 1; + else if (d[7 + 3 * m + 1] == off) + { + ptrs[cnt + i] = (void *) d[7 + 3 * m]; + break; + } + else + hi = m - 1; + } + if (lo > hi) + gomp_fatal ("couldn't find matching task_reduction or reduction " + "with task modifier for %p", ptrs[i]); + } + } +} + +struct gomp_taskgroup * +gomp_parallel_reduction_register (uintptr_t *data, unsigned nthreads) +{ + struct gomp_taskgroup *taskgroup = gomp_taskgroup_init (NULL); + gomp_reduction_register (data, NULL, NULL, nthreads); + taskgroup->reductions = data; + return taskgroup; +} + +void +gomp_workshare_task_reduction_register (uintptr_t *data, uintptr_t *orig) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + struct gomp_task *task = thr->task; + unsigned nthreads = team->nthreads; + gomp_reduction_register (data, task->taskgroup->reductions, orig, nthreads); + task->taskgroup->reductions = data; +} + +void +gomp_workshare_taskgroup_start (void) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + struct gomp_task *task; + + if (team == NULL) + { + gomp_create_artificial_team (); + team = thr->ts.team; + } + task = thr->task; + task->taskgroup = gomp_taskgroup_init (task->taskgroup); + task->taskgroup->workshare = true; +} + +void +GOMP_workshare_task_reduction_unregister (bool cancelled) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_task *task = thr->task; + struct gomp_team *team = thr->ts.team; + uintptr_t *data = task->taskgroup->reductions; + ialias_call (GOMP_taskgroup_end) (); + if (thr->ts.team_id == 0) + ialias_call (GOMP_taskgroup_reduction_unregister) (data); + else + htab_free ((struct htab *) data[5]); + + if (!cancelled) + gomp_team_barrier_wait (&team->barrier); +} + int omp_in_final (void) { diff --git a/libgomp/taskloop.c b/libgomp/taskloop.c index 5515b355f00..4621405aa58 100644 --- a/libgomp/taskloop.c +++ b/libgomp/taskloop.c @@ -149,11 +149,28 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), if (flags & GOMP_TASK_FLAG_NOGROUP) { - if (thr->task && thr->task->taskgroup && thr->task->taskgroup->cancelled) - return; + if (__builtin_expect (gomp_cancel_var, 0) + && thr->task + && thr->task->taskgroup) + { + if (thr->task->taskgroup->cancelled) + return; + if (thr->task->taskgroup->workshare + && thr->task->taskgroup->prev + && thr->task->taskgroup->prev->cancelled) + return; + } } else - ialias_call (GOMP_taskgroup_start) (); + { + ialias_call (GOMP_taskgroup_start) (); + if (flags & GOMP_TASK_FLAG_REDUCTION) + { + struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; }; + uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr; + ialias_call (GOMP_taskgroup_reduction_register) (ptr); + } + } if (priority > gomp_max_task_priority_var) priority = gomp_max_task_priority_var; @@ -284,19 +301,31 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), gomp_mutex_lock (&team->task_lock); /* If parallel or taskgroup has been cancelled, don't start new tasks. */ - if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier) - || (taskgroup && taskgroup->cancelled)) - && cpyfn == NULL, 0)) + if (__builtin_expect (gomp_cancel_var, 0) + && cpyfn == NULL) { - gomp_mutex_unlock (&team->task_lock); - for (i = 0; i < num_tasks; i++) + if (gomp_team_barrier_cancelled (&team->barrier)) + { + do_cancel: + gomp_mutex_unlock (&team->task_lock); + for (i = 0; i < num_tasks; i++) + { + gomp_finish_task (tasks[i]); + free (tasks[i]); + } + if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0) + ialias_call (GOMP_taskgroup_end) (); + return; + } + if (taskgroup) { - gomp_finish_task (tasks[i]); - free (tasks[i]); + if (taskgroup->cancelled) + goto do_cancel; + if (taskgroup->workshare + && taskgroup->prev + && taskgroup->prev->cancelled) + goto do_cancel; } - if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0) - ialias_call (GOMP_taskgroup_end) (); - return; } if (taskgroup) taskgroup->num_children += num_tasks; diff --git a/libgomp/team.c b/libgomp/team.c index 87cdcfd52a9..e3e4c4d1ef2 100644 --- a/libgomp/team.c +++ b/libgomp/team.c @@ -32,7 +32,6 @@ #include <string.h> #ifdef LIBGOMP_USE_PTHREADS -/* This attribute contains PTHREAD_CREATE_DETACHED. */ pthread_attr_t gomp_thread_attr; /* This key is for the thread destructor. */ @@ -58,6 +57,7 @@ struct gomp_thread_start_data struct gomp_thread_pool *thread_pool; unsigned int place; bool nested; + pthread_t handle; }; @@ -89,6 +89,9 @@ gomp_thread_start (void *xdata) thr->ts = data->ts; thr->task = data->task; thr->place = data->place; +#ifdef GOMP_NEEDS_THREAD_HANDLE + thr->handle = data->handle; +#endif thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release; @@ -131,6 +134,7 @@ gomp_thread_start (void *xdata) } gomp_sem_destroy (&thr->release); + pthread_detach (pthread_self ()); thr->thread_pool = NULL; thr->task = NULL; return NULL; @@ -183,7 +187,7 @@ gomp_new_team (unsigned nthreads) team->single_count = 0; #endif team->work_shares_to_free = &team->work_shares[0]; - gomp_init_work_share (&team->work_shares[0], false, nthreads); + gomp_init_work_share (&team->work_shares[0], 0, nthreads); team->work_shares[0].next_alloc = NULL; team->work_share_list_free = NULL; team->work_share_list_alloc = &team->work_shares[1]; @@ -231,6 +235,7 @@ gomp_free_pool_helper (void *thread_pool) thr->thread_pool = NULL; thr->task = NULL; #ifdef LIBGOMP_USE_PTHREADS + pthread_detach (pthread_self ()); pthread_exit (NULL); #elif defined(__nvptx__) asm ("exit;"); @@ -297,7 +302,8 @@ gomp_free_thread (void *arg __attribute__((unused))) #ifdef LIBGOMP_USE_PTHREADS void gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, - unsigned flags, struct gomp_team *team) + unsigned flags, struct gomp_team *team, + struct gomp_taskgroup *taskgroup) { struct gomp_thread_start_data *start_data; struct gomp_thread *thr, *nthr; @@ -312,6 +318,7 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, unsigned int s = 0, rest = 0, p = 0, k = 0; unsigned int affinity_count = 0; struct gomp_thread **affinity_thr = NULL; + bool force_display = false; thr = gomp_thread (); nested = thr->ts.level; @@ -319,7 +326,12 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, task = thr->task; icv = task ? &task->icv : &gomp_global_icv; if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0) - gomp_init_affinity (); + { + gomp_init_affinity (); + if (__builtin_expect (gomp_display_affinity_var, 0) && nthreads == 1) + gomp_display_affinity_thread (gomp_thread_self (), &thr->ts, + thr->place); + } /* Always save the previous state, even if this isn't a nested team. In particular, we should save any work share state from an outer @@ -338,6 +350,9 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, #endif thr->ts.static_trip = 0; thr->task = &team->implicit_task[0]; +#ifdef GOMP_NEEDS_THREAD_HANDLE + thr->handle = pthread_self (); +#endif nthreads_var = icv->nthreads_var; if (__builtin_expect (gomp_nthreads_var_list != NULL, 0) && thr->ts.level < gomp_nthreads_var_list_len) @@ -350,6 +365,7 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, && thr->ts.level < gomp_bind_var_list_len) bind_var = gomp_bind_var_list[thr->ts.level]; gomp_init_task (thr->task, task, icv); + thr->task->taskgroup = taskgroup; team->implicit_task[0].icv.nthreads_var = nthreads_var; team->implicit_task[0].icv.bind_var = bind_var; @@ -465,7 +481,7 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, pool->threads = gomp_realloc (pool->threads, pool->threads_size - * sizeof (struct gomp_thread_data *)); + * sizeof (struct gomp_thread *)); } /* Release existing idle threads. */ @@ -540,6 +556,7 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, + place_partition_len)) { unsigned int l; + force_display = true; if (affinity_thr == NULL) { unsigned int j; @@ -623,6 +640,7 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, gomp_init_task (nthr->task, task, icv); team->implicit_task[i].icv.nthreads_var = nthreads_var; team->implicit_task[i].icv.bind_var = bind_var; + nthr->task->taskgroup = taskgroup; nthr->fn = fn; nthr->data = data; team->ordered_release[i] = &nthr->release; @@ -712,19 +730,17 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, { size_t stacksize; pthread_attr_init (&thread_attr); - pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED); if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize)) pthread_attr_setstacksize (&thread_attr, stacksize); attr = &thread_attr; } start_data = gomp_alloca (sizeof (struct gomp_thread_start_data) - * (nthreads-i)); + * (nthreads - i)); /* Launch new threads. */ for (; i < nthreads; ++i) { - pthread_t pt; int err; start_data->ts.place_partition_off = thr->ts.place_partition_off; @@ -810,11 +826,14 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, gomp_init_task (start_data->task, task, icv); team->implicit_task[i].icv.nthreads_var = nthreads_var; team->implicit_task[i].icv.bind_var = bind_var; + start_data->task->taskgroup = taskgroup; start_data->thread_pool = pool; start_data->nested = nested; attr = gomp_adjust_thread_attr (attr, &thread_attr); - err = pthread_create (&pt, attr, gomp_thread_start, start_data++); + err = pthread_create (&start_data->handle, attr, gomp_thread_start, + start_data); + start_data++; if (err != 0) gomp_fatal ("Thread creation failed: %s", strerror (err)); } @@ -854,6 +873,42 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, gomp_mutex_unlock (&gomp_managed_threads_lock); #endif } + if (__builtin_expect (gomp_display_affinity_var, 0)) + { + if (nested + || nthreads != old_threads_used + || force_display) + { + gomp_display_affinity_thread (gomp_thread_self (), &thr->ts, + thr->place); + if (nested) + { + start_data -= nthreads - 1; + for (i = 1; i < nthreads; ++i) + { + gomp_display_affinity_thread ( +#ifdef LIBGOMP_USE_PTHREADS + start_data->handle, +#else + gomp_thread_self (), +#endif + &start_data->ts, + start_data->place); + start_data++; + } + } + else + { + for (i = 1; i < nthreads; ++i) + { + gomp_thread_handle handle + = gomp_thread_to_pthread_t (pool->threads[i]); + gomp_display_affinity_thread (handle, &pool->threads[i]->ts, + pool->threads[i]->place); + } + } + } + } if (__builtin_expect (affinity_thr != NULL, 0) && team->prev_ts.place_partition_len > 64) free (affinity_thr); @@ -894,7 +949,7 @@ gomp_team_end (void) gomp_end_task (); thr->ts = team->prev_ts; - if (__builtin_expect (thr->ts.team != NULL, 0)) + if (__builtin_expect (thr->ts.level != 0, 0)) { #ifdef HAVE_SYNC_BUILTINS __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads); @@ -959,6 +1014,76 @@ team_destructor (void) crashes. */ pthread_key_delete (gomp_thread_destructor); } + +/* Similar to gomp_free_pool_helper, but don't detach itself, + gomp_pause_host will pthread_join those threads. */ + +static void +gomp_pause_pool_helper (void *thread_pool) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_thread_pool *pool + = (struct gomp_thread_pool *) thread_pool; + gomp_simple_barrier_wait_last (&pool->threads_dock); + gomp_sem_destroy (&thr->release); + thr->thread_pool = NULL; + thr->task = NULL; + pthread_exit (NULL); +} + +/* Free a thread pool and release its threads. Return non-zero on + failure. */ + +int +gomp_pause_host (void) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_thread_pool *pool = thr->thread_pool; + if (thr->ts.level) + return -1; + if (pool) + { + if (pool->threads_used > 0) + { + int i; + pthread_t *thrs + = gomp_alloca (sizeof (pthread_t) * pool->threads_used); + for (i = 1; i < pool->threads_used; i++) + { + struct gomp_thread *nthr = pool->threads[i]; + nthr->fn = gomp_pause_pool_helper; + nthr->data = pool; + thrs[i] = gomp_thread_to_pthread_t (nthr); + } + /* This barrier undocks threads docked on pool->threads_dock. */ + gomp_simple_barrier_wait (&pool->threads_dock); + /* And this waits till all threads have called gomp_barrier_wait_last + in gomp_pause_pool_helper. */ + gomp_simple_barrier_wait (&pool->threads_dock); + /* Now it is safe to destroy the barrier and free the pool. */ + gomp_simple_barrier_destroy (&pool->threads_dock); + +#ifdef HAVE_SYNC_BUILTINS + __sync_fetch_and_add (&gomp_managed_threads, + 1L - pool->threads_used); +#else + gomp_mutex_lock (&gomp_managed_threads_lock); + gomp_managed_threads -= pool->threads_used - 1L; + gomp_mutex_unlock (&gomp_managed_threads_lock); +#endif + for (i = 1; i < pool->threads_used; i++) + pthread_join (thrs[i], NULL); + } + if (pool->last_team) + free_team (pool->last_team); +#ifndef __nvptx__ + free (pool->threads); + free (pool); +#endif + thr->thread_pool = NULL; + } + return 0; +} #endif struct gomp_task_icv * diff --git a/libgomp/teams.c b/libgomp/teams.c new file mode 100644 index 00000000000..5aa0eae68d3 --- /dev/null +++ b/libgomp/teams.c @@ -0,0 +1,73 @@ +/* Copyright (C) 2018 Free Software Foundation, Inc. + Contributed by Jakub Jelinek <jakub@redhat.com>. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This file handles the host TEAMS construct. */ + +#include "libgomp.h" + +static unsigned gomp_num_teams = 1, gomp_team_num = 0; + +void +GOMP_teams_reg (void (*fn) (void *), void *data, unsigned int num_teams, + unsigned int thread_limit, unsigned int flags) +{ + (void) flags; + (void) num_teams; + unsigned old_thread_limit_var = 0; + if (thread_limit) + { + struct gomp_task_icv *icv = gomp_icv (true); + old_thread_limit_var = icv->thread_limit_var; + icv->thread_limit_var + = thread_limit > INT_MAX ? UINT_MAX : thread_limit; + } + if (num_teams == 0) + num_teams = 3; + gomp_num_teams = num_teams; + for (gomp_team_num = 0; gomp_team_num < num_teams; gomp_team_num++) + fn (data); + gomp_num_teams = 1; + gomp_team_num = 0; + if (thread_limit) + { + struct gomp_task_icv *icv = gomp_icv (true); + icv->thread_limit_var = old_thread_limit_var; + } +} + +int +omp_get_num_teams (void) +{ + return gomp_num_teams; +} + +int +omp_get_team_num (void) +{ + return gomp_team_num; +} + +ialias (omp_get_num_teams) +ialias (omp_get_team_num) diff --git a/libgomp/testsuite/libgomp.c++/depend-1.C b/libgomp/testsuite/libgomp.c++/depend-1.C new file mode 100644 index 00000000000..71a24d19f2a --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/depend-1.C @@ -0,0 +1,31 @@ +extern "C" void abort (); +int a, b, c, d, e; + +void +foo (int &x, bool y) +{ + #pragma omp task depend (out: x) + a = 1; + #pragma omp task depend (out: y ? b : c) + (y ? b : c) = 2; + #pragma omp task depend (inout: --d) + d += 4; + #pragma omp task depend (in : a, (y ? b : c), d) + e = a + b * 10 + c * 100 + d * 1000; +} + +int +main () +{ + #pragma omp parallel + #pragma omp single + foo (a, true); + if (e != 1 + 20 + 0 + 3000) + abort (); + a = b = c = d = e = 0; + #pragma omp parallel + #pragma omp single + foo (a, false); + if (e != 1 + 0 + 200 + 3000) + abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/depend-iterator-1.C b/libgomp/testsuite/libgomp.c++/depend-iterator-1.C new file mode 100644 index 00000000000..2cff5e84334 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/depend-iterator-1.C @@ -0,0 +1,167 @@ +extern "C" void abort (); +int arr[64], arr2[64], arr3[64]; + +int * +foo (int x, int y, long z) +{ + int v; + switch (x) + { + case 1: + if (z != 0 || y < 0 || y >= 64) + abort (); + #pragma omp atomic capture + { + v = arr2[y]; + arr2[y]++; + } + if (v != 0) abort (); + return &arr[y]; + case 2: + if (y < 0 || y > 60 || (y & 3) || z < 0 || z >= 4) + abort (); + #pragma omp atomic + arr2[y + z] = arr2[y + z] + 4; + return &arr[y + z]; + case 3: + if (z < 0 || z > 60 || (z & 3) || y < 0 || y >= 4) + abort (); + #pragma omp atomic + arr2[y + z] = arr2[y + z] + 16; + return &arr[y + z]; + case 4: + if (y != 0 || z > 64 || z <= 0) + abort (); + #pragma omp atomic + arr2[z - 1] = arr2[z - 1] + 64; + return &arr[z - 1]; + case 5: + if ((y & 3) != 0 || y < 64 || y >= 96 + || (z & 127) != 0 || z < 512 || z >= 1024) + abort (); + y = (y - 64) + (z - 512) / 128; + #pragma omp atomic + arr2[y] = arr2[y] + 256; + return &arr[y]; + case 6: + if ((y & 3) != 0 || y <= 64 || y > 96 + || (z & 127) != 1 || z <= 513 || z > 1025) + abort (); + y = (y - 68) + (z - 641) / 128; + #pragma omp atomic + arr2[y] = arr2[y] + 1024; + return &arr[y]; + default: + abort (); + } +} + +volatile int beg, end, step, step2; +volatile unsigned int begu, endu; + +template <int N> +void +bar () +{ + #pragma omp parallel + #pragma omp master + { + int i; + for (i = 0; i < 64; i++) + #pragma omp task depend (iterator (j=i:i+1) , out : foo (1, j, 0)[0]) + arr[i] = i; + #pragma omp task depend (iterator (int k=beg:end:step,long int l=0:4:1) , inout : \ + foo (2, k, l)[0], foo (3, l, k)[0]) private (i) + for (i = 0; i < 64; i++) + if (arr[i] != i) + abort (); + else + arr[i] = arr[i] + 1; + #pragma omp task depend (iterator (int *p=&arr3[64]:&arr3[0]:-1), inout : \ + foo (4, 0, p - &arr3[0])[0]) depend (in : beg) + for (i = 0; i < 64; i++) + if (arr[i] != i + 1) + abort (); + else + arr[i] = arr[i] + 2; + #pragma omp task depend (iterator (unsigned n=begu:endu:step2, unsigned int o = 512: 1024U: (unsigned char) 128), inout : \ + foo (5, n + 128, o)[0]) + for (i = 0; i < 64; i++) + if (arr[i] != i + 3) + abort (); + else + arr[i] = arr[i] + 4; + #pragma omp task depend (iterator (int unsigned p=endu:begu:step,unsigned q= 1025U:513U:(signed char) -128), in : \ + foo (6, p + 128, q)[0]) + for (i = 0; i < 64; i++) + if (arr[i] != i + 7) + abort (); + else + arr[i] = arr[i] + 8; + } +} + +template <typename A, typename B, typename C, typename D, typename E, typename F> +void +baz (A beg, A end, A step, D begu, D endu, A step2) +{ + #pragma omp parallel + #pragma omp master + { + int i; + for (i = 0; i < 64; i++) + #pragma omp task depend (iterator (A j=i:i+1),out : foo (1, j, 0)[0]) + arr[i] = i; + #pragma omp task depend (iterator (A k=beg:end:step,B l=0:4:1), inout : \ + foo (2, k, l)[0], foo (3, l, k)[0]) private (i) + for (i = 0; i < 64; i++) + if (arr[i] != i) + abort (); + else + arr[i] = arr[i] + 1; + #pragma omp task depend (iterator (C p=&arr3[64]:&arr3[0]:-1), in : \ + foo (4, 0, p - &arr3[0])[0]) depend (in : beg) + for (i = 0; i < 64; i++) + if (arr[i] != i + 1) + abort (); + else + arr[i] = arr[i] + 2; + #pragma omp task depend (iterator (D n=begu:endu:step2, D o = 512: 1024U:(E) 128), inout : \ + foo (5, n + 128, o)[0]) + for (i = 0; i < 64; i++) + if (arr[i] != i + 3) + abort (); + else + arr[i] = arr[i] + 4; + #pragma omp task depend (iterator (D p=endu:begu:step,D q= 1025U:513U:(F) -128), in : \ + foo (6, p + 128, q)[0]) + for (i = 0; i < 64; i++) + if (arr[i] != i + 7) + abort (); + else + arr[i] = arr[i] + 8; + } +} + +int +main () +{ + int m; + beg = 60; + end = -4; + step = -4; + step2 = 4; + begu = -64U; + endu = -32U; + bar<0> (); + for (m = 0; m < 64; m++) + if (arr[m] != m + 15 || arr2[m] != (m < 32 ? 1365 : 85)) + abort (); + else + arr[m] = arr2[m] = 0; + baz<int, long int, int *, unsigned int, unsigned char, signed char> (beg, end, step, begu, endu, step2); + for (m = 0; m < 64; m++) + if (arr[m] != m + 15 || arr2[m] != (m < 32 ? 1365 : 85)) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/depobj-1.C b/libgomp/testsuite/libgomp.c++/depobj-1.C new file mode 100644 index 00000000000..91edf8cc34b --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/depobj-1.C @@ -0,0 +1,103 @@ +#include <stdlib.h> +#include <omp.h> + +void +dep (omp_depend_t &d1, omp_depend_t *d2) +{ + int x = 1; + #pragma omp depobj (d1) depend(in: x) + #pragma omp depobj (*d2) depend(in: x) + + #pragma omp depobj (d2[0]) update(out) + #pragma omp parallel + #pragma omp single + { + #pragma omp task shared (x) depend(depobj:*d2) + x = 2; + #pragma omp task shared (x) depend(depobj : d1) + if (x != 2) + abort (); + } + #pragma omp depobj (d2[0]) destroy + #pragma omp depobj (d1) destroy +} + +template <typename T> +void +dep2 (T &d2) +{ + T d1; + #pragma omp parallel + #pragma omp single + { + int x = 1; + #pragma omp depobj (d1) depend(out: x) + #pragma omp depobj (*&d2) depend (in:x) + #pragma omp depobj(d2)update(in) + #pragma omp task shared (x) depend(depobj :d1) + x = 2; + #pragma omp task shared (x) depend(depobj: d2) + if (x != 2) + abort (); + #pragma omp taskwait + #pragma omp depobj(d1)destroy + #pragma omp depobj((&d2)[0]) destroy + } +} + +template <typename T> +void +dep3 (void) +{ + T d[2]; + #pragma omp parallel + { + int x = 1; + #pragma omp single + { + #pragma omp depobj(d[0]) depend(out:x) + #pragma omp depobj(d[1]) depend(in: x) + #pragma omp task shared (x) depend(depobj:*d) + x = 2; + #pragma omp task shared (x) depend(depobj:*(d + 1)) + if (x != 2) + abort (); + } + } + #pragma omp depobj(d[0]) destroy + #pragma omp depobj(d[1]) destroy +} + +int xx; +omp_depend_t dd1, dd2; + +template <int N> +void +antidep (void) +{ + xx = 1; + #pragma omp parallel + #pragma omp single + { + #pragma omp task shared(xx) depend(depobj:dd2) + if (xx != 1) + abort (); + #pragma omp task shared(xx) depend(depobj:dd1) + xx = 2; + } +} + +int +main () +{ + omp_depend_t d1, d2, d3; + dep (d1, &d2); + dep2 <omp_depend_t> (d3); + dep3 <omp_depend_t> (); + #pragma omp depobj (dd1) depend (inout: xx) + #pragma omp depobj (dd2) depend (in : xx) + antidep <0> (); + #pragma omp depobj (dd2) destroy + #pragma omp depobj (dd1) destroy + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/for-16.C b/libgomp/testsuite/libgomp.c++/for-16.C new file mode 100644 index 00000000000..e7e5b857f23 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/for-16.C @@ -0,0 +1,218 @@ +// PR c++/86443 +// { dg-do run } +// { dg-additional-options "-std=c++17" } + +typedef __PTRDIFF_TYPE__ ptrdiff_t; +extern "C" void abort (); + +template <typename T> +class I +{ +public: + typedef ptrdiff_t difference_type; + I (); + ~I (); + I (T *); + I (const I &); + T &operator * (); + T *operator -> (); + T &operator [] (const difference_type &) const; + I &operator = (const I &); + I &operator ++ (); + I operator ++ (int); + I &operator -- (); + I operator -- (int); + I &operator += (const difference_type &); + I &operator -= (const difference_type &); + I operator + (const difference_type &) const; + I operator - (const difference_type &) const; + template <typename S> friend bool operator == (I<S> &, I<S> &); + template <typename S> friend bool operator == (const I<S> &, const I<S> &); + template <typename S> friend bool operator < (I<S> &, I<S> &); + template <typename S> friend bool operator < (const I<S> &, const I<S> &); + template <typename S> friend bool operator <= (I<S> &, I<S> &); + template <typename S> friend bool operator <= (const I<S> &, const I<S> &); + template <typename S> friend bool operator > (I<S> &, I<S> &); + template <typename S> friend bool operator > (const I<S> &, const I<S> &); + template <typename S> friend bool operator >= (I<S> &, I<S> &); + template <typename S> friend bool operator >= (const I<S> &, const I<S> &); + template <typename S> friend typename I<S>::difference_type operator - (I<S> &, I<S> &); + template <typename S> friend typename I<S>::difference_type operator - (const I<S> &, const I<S> &); + template <typename S> friend I<S> operator + (typename I<S>::difference_type , const I<S> &); +private: + T *p; +}; +template <typename T> I<T>::I () : p (0) {} +template <typename T> I<T>::~I () {} +template <typename T> I<T>::I (T *x) : p (x) {} +template <typename T> I<T>::I (const I &x) : p (x.p) {} +template <typename T> T &I<T>::operator * () { return *p; } +template <typename T> T *I<T>::operator -> () { return p; } +template <typename T> T &I<T>::operator [] (const difference_type &x) const { return p[x]; } +template <typename T> I<T> &I<T>::operator = (const I &x) { p = x.p; return *this; } +template <typename T> I<T> &I<T>::operator ++ () { ++p; return *this; } +template <typename T> I<T> I<T>::operator ++ (int) { return I (p++); } +template <typename T> I<T> &I<T>::operator -- () { --p; return *this; } +template <typename T> I<T> I<T>::operator -- (int) { return I (p--); } +template <typename T> I<T> &I<T>::operator += (const difference_type &x) { p += x; return *this; } +template <typename T> I<T> &I<T>::operator -= (const difference_type &x) { p -= x; return *this; } +template <typename T> I<T> I<T>::operator + (const difference_type &x) const { return I (p + x); } +template <typename T> I<T> I<T>::operator - (const difference_type &x) const { return I (p - x); } +template <typename T> bool operator == (I<T> &x, I<T> &y) { return x.p == y.p; } +template <typename T> bool operator == (const I<T> &x, const I<T> &y) { return x.p == y.p; } +template <typename T> bool operator != (I<T> &x, I<T> &y) { return !(x == y); } +template <typename T> bool operator != (const I<T> &x, const I<T> &y) { return !(x == y); } +template <typename T> bool operator < (I<T> &x, I<T> &y) { return x.p < y.p; } +template <typename T> bool operator < (const I<T> &x, const I<T> &y) { return x.p < y.p; } +template <typename T> bool operator <= (I<T> &x, I<T> &y) { return x.p <= y.p; } +template <typename T> bool operator <= (const I<T> &x, const I<T> &y) { return x.p <= y.p; } +template <typename T> bool operator > (I<T> &x, I<T> &y) { return x.p > y.p; } +template <typename T> bool operator > (const I<T> &x, const I<T> &y) { return x.p > y.p; } +template <typename T> bool operator >= (I<T> &x, I<T> &y) { return x.p >= y.p; } +template <typename T> bool operator >= (const I<T> &x, const I<T> &y) { return x.p >= y.p; } +template <typename T> typename I<T>::difference_type operator - (I<T> &x, I<T> &y) { return x.p - y.p; } +template <typename T> typename I<T>::difference_type operator - (const I<T> &x, const I<T> &y) { return x.p - y.p; } +template <typename T> I<T> operator + (typename I<T>::difference_type x, const I<T> &y) { return I<T> (x + y.p); } + +template <typename T> +class J +{ +public: + J(const I<T> &x, const I<T> &y) : b (x), e (y) {} + const I<T> &begin (); + const I<T> &end (); +private: + I<T> b, e; +}; + +template <typename T> const I<T> &J<T>::begin () { return b; } +template <typename T> const I<T> &J<T>::end () { return e; } + +int results[2000]; + +template <typename T> +void +baz (I<T> &i) +{ + if (*i < 0 || *i >= 2000) + abort (); + results[*i]++; +} + +void +baz (int i) +{ + if (i < 0 || i >= 2000) + abort (); + results[i]++; +} + +void +f1 (J<int> j) +{ +#pragma omp distribute parallel for default(none) + for (I<int> i = j.begin (); i < j.end (); i += 3) + baz (*i); +} + +void +f2 (J<int> j) +{ + I<int> i; +#pragma omp distribute parallel for default(none) + for (i = j.begin (); i < j.end (); ++i) + baz (*i); +} + +template <int N> +void +f3 (J<int> j) +{ +#pragma omp distribute parallel for default(none) + for (I<int> i = j.begin (); i < j.end (); i += 6) + baz (*i); +} + +template <int N> +void +f4 (J<int> j) +{ + I<int> i; +#pragma omp distribute parallel for default(none) + for (i = j.begin (); i < j.end (); i += 9) + baz (*i); +} + +template <typename T> +void +f5 (J<T> j) +{ +#pragma omp distribute parallel for default(none) + for (I<T> i = j.begin (); i < j.end (); i += 4) + baz (*i); +} + +template <typename T> +void +f6 (J<T> j) +{ + I<T> i; +#pragma omp distribute parallel for default(none) + for (i = j.begin (); i < j.end (); i += 7) + baz (*i); +} + +#define check(expr) \ + for (int i = 0; i < 2000; i++) \ + if (expr) \ + { \ + if (results[i] != 1) \ + abort (); \ + results[i] = 0; \ + } \ + else if (results[i]) \ + abort () + +int +main () +{ + int a[2000]; + for (int i = 0; i < 2000; i++) + a[i] = i; + #pragma omp teams + { + J<int> j (&a[75], &a[1945]); + f1 (j); + } + check (i >= 75 && i < 1945 && (i - 75) % 3 == 0); + #pragma omp teams + { + J<int> j (&a[63], &a[1949]); + f2 (j); + } + check (i >= 63 && i < 1949); + #pragma omp teams + { + J<int> j (&a[58], &a[1979]); + f3 <2> (j); + } + check (i >= 58 && i < 1979 && (i - 58) % 6 == 0); + #pragma omp teams + { + J<int> j (&a[59], &a[1981]); + f4 <9> (j); + } + check (i >= 59 && i < 1981 && (i - 59) % 9 == 0); + #pragma omp teams + { + J<int> j (&a[52], &a[1972]); + f5 (j); + } + check (i >= 52 && i < 1972 && (i - 52) % 4 == 0); + #pragma omp teams + { + J<int> j (&a[31], &a[1827]); + f6 (j); + } + check (i >= 31 && i < 1827 && (i - 31) % 7 == 0); +} diff --git a/libgomp/testsuite/libgomp.c++/for-21.C b/libgomp/testsuite/libgomp.c++/for-21.C new file mode 100644 index 00000000000..fc0cb0ab672 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/for-21.C @@ -0,0 +1,291 @@ +// { dg-do run } + +typedef __PTRDIFF_TYPE__ ptrdiff_t; +extern "C" void abort (); + +template <typename T> +class I +{ +public: + typedef ptrdiff_t difference_type; + I (); + ~I (); + I (T *); + I (const I &); + T &operator * (); + T *operator -> (); + T &operator [] (const difference_type &) const; + I &operator = (const I &); + I &operator ++ (); + I operator ++ (int); + I &operator -- (); + I operator -- (int); + I &operator += (const difference_type &); + I &operator -= (const difference_type &); + I operator + (const difference_type &) const; + I operator - (const difference_type &) const; + template <typename S> friend bool operator == (I<S> &, I<S> &); + template <typename S> friend bool operator == (const I<S> &, const I<S> &); + template <typename S> friend bool operator < (I<S> &, I<S> &); + template <typename S> friend bool operator < (const I<S> &, const I<S> &); + template <typename S> friend bool operator <= (I<S> &, I<S> &); + template <typename S> friend bool operator <= (const I<S> &, const I<S> &); + template <typename S> friend bool operator > (I<S> &, I<S> &); + template <typename S> friend bool operator > (const I<S> &, const I<S> &); + template <typename S> friend bool operator >= (I<S> &, I<S> &); + template <typename S> friend bool operator >= (const I<S> &, const I<S> &); + template <typename S> friend typename I<S>::difference_type operator - (I<S> &, I<S> &); + template <typename S> friend typename I<S>::difference_type operator - (const I<S> &, const I<S> &); + template <typename S> friend I<S> operator + (typename I<S>::difference_type , const I<S> &); +private: + T *p; +}; +template <typename T> I<T>::I () : p (0) {} +template <typename T> I<T>::~I () {} +template <typename T> I<T>::I (T *x) : p (x) {} +template <typename T> I<T>::I (const I &x) : p (x.p) {} +template <typename T> T &I<T>::operator * () { return *p; } +template <typename T> T *I<T>::operator -> () { return p; } +template <typename T> T &I<T>::operator [] (const difference_type &x) const { return p[x]; } +template <typename T> I<T> &I<T>::operator = (const I &x) { p = x.p; return *this; } +template <typename T> I<T> &I<T>::operator ++ () { ++p; return *this; } +template <typename T> I<T> I<T>::operator ++ (int) { return I (p++); } +template <typename T> I<T> &I<T>::operator -- () { --p; return *this; } +template <typename T> I<T> I<T>::operator -- (int) { return I (p--); } +template <typename T> I<T> &I<T>::operator += (const difference_type &x) { p += x; return *this; } +template <typename T> I<T> &I<T>::operator -= (const difference_type &x) { p -= x; return *this; } +template <typename T> I<T> I<T>::operator + (const difference_type &x) const { return I (p + x); } +template <typename T> I<T> I<T>::operator - (const difference_type &x) const { return I (p - x); } +template <typename T> bool operator == (I<T> &x, I<T> &y) { return x.p == y.p; } +template <typename T> bool operator == (const I<T> &x, const I<T> &y) { return x.p == y.p; } +template <typename T> bool operator != (I<T> &x, I<T> &y) { return !(x == y); } +template <typename T> bool operator != (const I<T> &x, const I<T> &y) { return !(x == y); } +template <typename T> bool operator < (I<T> &x, I<T> &y) { return x.p < y.p; } +template <typename T> bool operator < (const I<T> &x, const I<T> &y) { return x.p < y.p; } +template <typename T> bool operator <= (I<T> &x, I<T> &y) { return x.p <= y.p; } +template <typename T> bool operator <= (const I<T> &x, const I<T> &y) { return x.p <= y.p; } +template <typename T> bool operator > (I<T> &x, I<T> &y) { return x.p > y.p; } +template <typename T> bool operator > (const I<T> &x, const I<T> &y) { return x.p > y.p; } +template <typename T> bool operator >= (I<T> &x, I<T> &y) { return x.p >= y.p; } +template <typename T> bool operator >= (const I<T> &x, const I<T> &y) { return x.p >= y.p; } +template <typename T> typename I<T>::difference_type operator - (I<T> &x, I<T> &y) { return x.p - y.p; } +template <typename T> typename I<T>::difference_type operator - (const I<T> &x, const I<T> &y) { return x.p - y.p; } +template <typename T> I<T> operator + (typename I<T>::difference_type x, const I<T> &y) { return I<T> (x + y.p); } + +template <typename T> +class J +{ +public: + J(const I<T> &x, const I<T> &y) : b (x), e (y) {} + const I<T> &begin (); + const I<T> &end (); +private: + I<T> b, e; +}; + +template <typename T> const I<T> &J<T>::begin () { return b; } +template <typename T> const I<T> &J<T>::end () { return e; } + +int results[2000]; + +template <typename T> +void +baz (I<T> &i) +{ + if (*i < 0 || *i >= 2000) + abort (); + results[*i]++; +} + +void +f1 (const I<int> &x, const I<int> &y) +{ +#pragma omp parallel for + for (I<int> i = x; i != y; i++) + baz (i); +} + +void +f2 (const I<int> &x, const I<int> &y) +{ + I<int> i; +#pragma omp parallel for private(i) + for (i = x; i != y - 1; i = 2 - 8 + 7 + i) + baz (i); +} + +template <typename T> +void +f3 (const I<int> &x, const I<int> &y) +{ +#pragma omp parallel for + for (I<int> i = x; i != y; i = i + 9 - 8) + baz (i); +} + +template <typename T> +void +f4 (const I<int> &x, const I<int> &y) +{ + I<int> i; +#pragma omp parallel for lastprivate(i) + for (i = x + 2000 - 64; i != y + 10; --i) + baz (i); +} + +void +f5 (const I<int> &x, const I<int> &y) +{ +#pragma omp parallel for + for (I<int> i = x + 2000 - 64; i != y + 10; i--) + baz (i); +} + +template <int N> +void +f6 (const I<int> &x, const I<int> &y) +{ +#pragma omp parallel for + for (I<int> i = x + 2000 - 64; i != y + 10; i = i - 12 + 11) + { + I<int> j = i + N; + baz (j); + } +} + +template <int N> +void +f7 (I<int> i, const I<int> &x, const I<int> &y) +{ +#pragma omp parallel for + for (i = x - 10; i != y + 10; i += N) + baz (i); +} + +template <int N> +void +f8 (J<int> j) +{ + I<int> i; +#pragma omp parallel for + for (i = j.begin (); i != j.end () + N; i += 1) + baz (i); +} + +template <typename T, int N> +void +f9 (const I<T> &x, const I<T> &y) +{ +#pragma omp parallel for + for (I<T> i = x; i != y; i = i - N) + baz (i); +} + +template <typename T, int N> +void +f10 (const I<T> &x, const I<T> &y) +{ + I<T> i; +#pragma omp parallel for + for (i = x; i != y; i = i + N) + baz (i); +} + +template <typename T> +void +f11 (const T &x, const T &y) +{ +#pragma omp parallel + { +#pragma omp for nowait + for (T i = x; i != y; i++) + baz (i); +#pragma omp single + { + T j = y + 3; + baz (j); + } + } +} + +template <typename T> +void +f12 (const T &x, const T &y) +{ + T i; +#pragma omp parallel for + for (i = x; i != y; --i) + baz (i); +} + +template <int N> +struct K +{ + template <typename T> + static void + f13 (const T &x, const T &y) + { +#pragma omp parallel for + for (T i = x; i != y + N; i += N) + baz (i); + } +}; + +#define check(expr) \ + for (int i = 0; i < 2000; i++) \ + if (expr) \ + { \ + if (results[i] != 1) \ + abort (); \ + results[i] = 0; \ + } \ + else if (results[i]) \ + abort () + +int +main () +{ + int a[2000]; + long b[2000]; + for (int i = 0; i < 2000; i++) + { + a[i] = i; + b[i] = i; + } + f1 (&a[10], &a[1990]); + check (i >= 10 && i < 1990); + f2 (&a[0], &a[1999]); + check (i < 1998); + f3<char> (&a[20], &a[1837]); + check (i >= 20 && i < 1837); + f4<int> (&a[0], &a[30]); + check (i > 40 && i <= 2000 - 64); + f5 (&a[0], &a[100]); + check (i > 110 && i <= 2000 - 64); + f6<-10> (&a[10], &a[110]); + check (i > 110 && i <= 2000 - 64); + f7<1> (I<int> (), &a[12], &a[1800]); + check (i >= 2 && i < 1810); + f8<121> (J<int> (&a[14], &a[1803])); + check (i >= 14 && i < 1924); + f9<int, -1> (&a[33], &a[1967]); + check (i >= 33 && i < 1967); + f10<int, -1> (&a[1939], &a[17]); + check (i > 17 && i <= 1939); + f11<I<int> > (&a[16], &a[1981]); + check ((i >= 16 && i < 1981) || i == 1984); + f12<I<int> > (&a[1761], &a[37]); + check (i > 37 && i <= 1761); + K<1>::f13<I<int> > (&a[1], &a[1935]); + check (i >= 1 && i < 1936); + f9<long, 1 - 2> (&b[33], &b[1967]); + check (i >= 33 && i < 1967); + f10<long, -1> (&b[1939], &b[17]); + check (i > 17 && i <= 1939); + f11<I<long> > (&b[16], &b[1981]); + check ((i >= 16 && i < 1981) || i == 1984); + f12<I<long> > (&b[1761], &b[37]); + check (i > 37 && i <= 1761); + K<1>::f13<I<long> > (&b[1], &b[1935]); + check (i >= 1 && i < 1936); +} diff --git a/libgomp/testsuite/libgomp.c++/for-22.C b/libgomp/testsuite/libgomp.c++/for-22.C new file mode 100644 index 00000000000..35fcf1f1dce --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/for-22.C @@ -0,0 +1,314 @@ +// { dg-do run } + +typedef __PTRDIFF_TYPE__ ptrdiff_t; +extern "C" void abort (); + +template <typename T> +class I +{ +public: + typedef ptrdiff_t difference_type; + I (); + ~I (); + I (T *); + I (const I &); + T &operator * (); + T *operator -> (); + T &operator [] (const difference_type &) const; + I &operator = (const I &); + I &operator ++ (); + I operator ++ (int); + I &operator -- (); + I operator -- (int); + I &operator += (const difference_type &); + I &operator -= (const difference_type &); + I operator + (const difference_type &) const; + I operator - (const difference_type &) const; + template <typename S> friend bool operator == (I<S> &, I<S> &); + template <typename S> friend bool operator == (const I<S> &, const I<S> &); + template <typename S> friend bool operator < (I<S> &, I<S> &); + template <typename S> friend bool operator < (const I<S> &, const I<S> &); + template <typename S> friend bool operator <= (I<S> &, I<S> &); + template <typename S> friend bool operator <= (const I<S> &, const I<S> &); + template <typename S> friend bool operator > (I<S> &, I<S> &); + template <typename S> friend bool operator > (const I<S> &, const I<S> &); + template <typename S> friend bool operator >= (I<S> &, I<S> &); + template <typename S> friend bool operator >= (const I<S> &, const I<S> &); + template <typename S> friend typename I<S>::difference_type operator - (I<S> &, I<S> &); + template <typename S> friend typename I<S>::difference_type operator - (const I<S> &, const I<S> &); + template <typename S> friend I<S> operator + (typename I<S>::difference_type , const I<S> &); +private: + T *p; +}; +template <typename T> I<T>::I () : p (0) {} +template <typename T> I<T>::~I () { p = (T *) 0; } +template <typename T> I<T>::I (T *x) : p (x) {} +template <typename T> I<T>::I (const I &x) : p (x.p) {} +template <typename T> T &I<T>::operator * () { return *p; } +template <typename T> T *I<T>::operator -> () { return p; } +template <typename T> T &I<T>::operator [] (const difference_type &x) const { return p[x]; } +template <typename T> I<T> &I<T>::operator = (const I &x) { p = x.p; return *this; } +template <typename T> I<T> &I<T>::operator ++ () { ++p; return *this; } +template <typename T> I<T> I<T>::operator ++ (int) { return I (p++); } +template <typename T> I<T> &I<T>::operator -- () { --p; return *this; } +template <typename T> I<T> I<T>::operator -- (int) { return I (p--); } +template <typename T> I<T> &I<T>::operator += (const difference_type &x) { p += x; return *this; } +template <typename T> I<T> &I<T>::operator -= (const difference_type &x) { p -= x; return *this; } +template <typename T> I<T> I<T>::operator + (const difference_type &x) const { return I (p + x); } +template <typename T> I<T> I<T>::operator - (const difference_type &x) const { return I (p - x); } +template <typename T> bool operator == (I<T> &x, I<T> &y) { return x.p == y.p; } +template <typename T> bool operator == (const I<T> &x, const I<T> &y) { return x.p == y.p; } +template <typename T> bool operator != (I<T> &x, I<T> &y) { return !(x == y); } +template <typename T> bool operator != (const I<T> &x, const I<T> &y) { return !(x == y); } +template <typename T> bool operator < (I<T> &x, I<T> &y) { return x.p < y.p; } +template <typename T> bool operator < (const I<T> &x, const I<T> &y) { return x.p < y.p; } +template <typename T> bool operator <= (I<T> &x, I<T> &y) { return x.p <= y.p; } +template <typename T> bool operator <= (const I<T> &x, const I<T> &y) { return x.p <= y.p; } +template <typename T> bool operator > (I<T> &x, I<T> &y) { return x.p > y.p; } +template <typename T> bool operator > (const I<T> &x, const I<T> &y) { return x.p > y.p; } +template <typename T> bool operator >= (I<T> &x, I<T> &y) { return x.p >= y.p; } +template <typename T> bool operator >= (const I<T> &x, const I<T> &y) { return x.p >= y.p; } +template <typename T> typename I<T>::difference_type operator - (I<T> &x, I<T> &y) { return x.p - y.p; } +template <typename T> typename I<T>::difference_type operator - (const I<T> &x, const I<T> &y) { return x.p - y.p; } +template <typename T> I<T> operator + (typename I<T>::difference_type x, const I<T> &y) { return I<T> (x + y.p); } + +template <typename T> +class J +{ +public: + J(const I<T> &x, const I<T> &y) : b (x), e (y) {} + const I<T> &begin (); + const I<T> &end (); +private: + I<T> b, e; +}; + +template <typename T> const I<T> &J<T>::begin () { return b; } +template <typename T> const I<T> &J<T>::end () { return e; } + +int results[2000]; + +template <typename T> +void +baz (I<T> &i) +{ + if (*i < 0 || *i >= 2000) + abort (); + results[*i]++; +} + +I<int> +f1 (const I<int> &x, const I<int> &y) +{ + I<int> i; +#pragma omp parallel shared(i) + { +#pragma omp for lastprivate (i) schedule(runtime) + for (i = x; i != y; i++) + baz (i); +#pragma omp single + i += 3; + } + return I<int> (i); +} + +I<int> +f2 (const I<int> &x, const I<int> &y) +{ + I<int> i; +#pragma omp parallel for lastprivate(i) + for (i = x; i != y - 1; i = 2 - 8 + 7 + i) + baz (i); + return I<int> (i); +} + +template <typename T> +I<int> +f3 (const I<int> &x, const I<int> &y) +{ + I<int> i; +#pragma omp parallel + #pragma omp for lastprivate (i) + for (i = x; i != y; i = i + 9 - 8) + baz (i); + return i; +} + +template <typename T> +I<int> +f4 (const I<int> &x, const I<int> &y) +{ + I<int> i; +#pragma omp parallel for lastprivate(i) + for (i = x + 2000 - 64; i != y + 10; --i) + baz (i); + return I<int> (i); +} + +template <typename T> +I<int> +f5 (const I<int> &x, const I<int> &y) +{ + I<int> i; +#pragma omp parallel for lastprivate(i) + for (i = x + 2000 - 64; i != y + T (10); i--) + baz (i); + return i; +} + +template <typename T> +I<int> +f6 (const I<int> &x, const I<int> &y) +{ + I<int> i; +#pragma omp parallel for lastprivate (i) + for (i = x + 2000 - 64; i != y + 10; i = i - T (12) + T (11)) + { + I<int> j = i + -10; + baz (j); + } + return I<int> (i); +} + +template <int N> +I<int> +f7 (I<int> i, const I<int> &x, const I<int> &y) +{ +#pragma omp parallel for lastprivate(i) + for (i = x - 10; i != y + 10; i += N) + baz (i); + return I<int> (i); +} + +template <int N> +I<int> +f8 (J<int> j) +{ + I<int> i; +#pragma omp parallel shared \ +(i) +#pragma omp for lastprivate (i) + for (i = j.begin (); i != j.end () + N; i += 1) + baz (i); + return i; +} + +I<int> i9; + +template <long N> +I<int> & +f9 (J<int> j) +{ +#pragma omp parallel for lastprivate(i9) + for (i9 = j.begin (); i9 != j.end () - N; i9 = i9 - N) + baz (i9); + return i9; +} + +template <typename T, int N> +I<T> +f10 (const I<T> &x, const I<T> &y) +{ + I<T> i; +#pragma omp parallel for lastprivate (i) + for (i = x; i != y; i = i + N) + baz (i); + return i; +} + +template <typename T, typename U> +T +f11 (T i, const T &x, const T &y) +{ +#pragma omp parallel + { +#pragma omp for lastprivate (i) + for (i = x + U (0); i != y + U (2 - 2); i = U(3) + U(-2) + i) + baz (i); +#pragma omp single + { + T j = y + 3; + baz (j); + } + } + return i; +} + +template <typename T> +T +f12 (const T &x, const T &y) +{ + T i; +#pragma omp parallel for lastprivate (i) + for (i = x; i != y; --i) + baz (i); + return i; +} + +#define check(expr) \ + for (int i = 0; i < 2000; i++) \ + if (expr) \ + { \ + if (results[i] != 1) \ + abort (); \ + results[i] = 0; \ + } \ + else if (results[i]) \ + abort () + +int +main () +{ + int a[2000]; + long b[2000]; + for (int i = 0; i < 2000; i++) + { + a[i] = i; + b[i] = i; + } + if (*f1 (&a[10], &a[1990]) != 1993) + abort (); + check (i >= 10 && i < 1990); + if (*f2 (&a[0], &a[1999]) != 1998) + abort (); + check (i < 1998); + if (*f3<char> (&a[20], &a[1837]) != 1837) + abort (); + check (i >= 20 && i < 1837); + if (*f4<int> (&a[0], &a[30]) != 40) + abort (); + check (i > 40 && i <= 2000 - 64); + if (*f5<int> (&a[0], &a[100]) != 110) + abort (); + check (i > 110 && i <= 2000 - 64); + if (*f6<int> (&a[10], &a[110]) != 120) + abort (); + check (i > 110 && i <= 2000 - 64); + if (*f7<1> (I<int> (), &a[12], &a[1800]) != 1810) + abort (); + check (i >= 2 && i < 1810); + if (*f8<121> (J<int> (&a[14], &a[1803])) != 1924) + abort (); + check (i >= 14 && i < 1924); + if (*f9<-1> (J<int> (&a[33], &a[1967])) != 1968) + abort (); + check (i >= 33 && i <= 1967); + if (*f10<int, -1> (&a[1939], &a[17]) != 17) + abort (); + check (i > 17 && i <= 1939); + if (*f11<I<int>, int> (I<int> (), &a[16], &a[1981]) != 1981) + abort (); + check ((i >= 16 && i < 1981) || i == 1984); + if (*f12<I<int> > (&a[1761], &a[37]) != 37) + abort (); + check (i > 37 && i <= 1761); + if (*f10<long, -1> (&b[1939], &b[17]) != 17) + abort (); + check (i > 17 && i <= 1939); + if (*f11<I<long>, long> (I<long> (), &b[16], &b[1981]) != 1981) + abort (); + check ((i >= 16 && i < 1981) || i == 1984); + if (*f12<I<long> > (&b[1761], &b[37]) != 37) + abort (); + check (i > 37 && i <= 1761); +} diff --git a/libgomp/testsuite/libgomp.c++/for-23.C b/libgomp/testsuite/libgomp.c++/for-23.C new file mode 100644 index 00000000000..e0d7b7afae4 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/for-23.C @@ -0,0 +1,416 @@ +// { dg-do run } +// { dg-additional-options "-std=c++17" } + +typedef __PTRDIFF_TYPE__ ptrdiff_t; +extern "C" void abort (); + +namespace std { + template<typename T> struct tuple_size; + template<int, typename> struct tuple_element; +} + +template <typename T> +class I +{ +public: + typedef ptrdiff_t difference_type; + I (); + ~I (); + I (T *); + I (const I &); + T &operator * (); + T *operator -> (); + T &operator [] (const difference_type &) const; + I &operator = (const I &); + I &operator ++ (); + I operator ++ (int); + I &operator -- (); + I operator -- (int); + I &operator += (const difference_type &); + I &operator -= (const difference_type &); + I operator + (const difference_type &) const; + I operator - (const difference_type &) const; + template <typename S> friend bool operator == (I<S> &, I<S> &); + template <typename S> friend bool operator == (const I<S> &, const I<S> &); + template <typename S> friend bool operator < (I<S> &, I<S> &); + template <typename S> friend bool operator < (const I<S> &, const I<S> &); + template <typename S> friend bool operator <= (I<S> &, I<S> &); + template <typename S> friend bool operator <= (const I<S> &, const I<S> &); + template <typename S> friend bool operator > (I<S> &, I<S> &); + template <typename S> friend bool operator > (const I<S> &, const I<S> &); + template <typename S> friend bool operator >= (I<S> &, I<S> &); + template <typename S> friend bool operator >= (const I<S> &, const I<S> &); + template <typename S> friend typename I<S>::difference_type operator - (I<S> &, I<S> &); + template <typename S> friend typename I<S>::difference_type operator - (const I<S> &, const I<S> &); + template <typename S> friend I<S> operator + (typename I<S>::difference_type , const I<S> &); +private: + T *p; +}; +template <typename T> I<T>::I () : p (0) {} +template <typename T> I<T>::~I () {} +template <typename T> I<T>::I (T *x) : p (x) {} +template <typename T> I<T>::I (const I &x) : p (x.p) {} +template <typename T> T &I<T>::operator * () { return *p; } +template <typename T> T *I<T>::operator -> () { return p; } +template <typename T> T &I<T>::operator [] (const difference_type &x) const { return p[x]; } +template <typename T> I<T> &I<T>::operator = (const I &x) { p = x.p; return *this; } +template <typename T> I<T> &I<T>::operator ++ () { ++p; return *this; } +template <typename T> I<T> I<T>::operator ++ (int) { return I (p++); } +template <typename T> I<T> &I<T>::operator -- () { --p; return *this; } +template <typename T> I<T> I<T>::operator -- (int) { return I (p--); } +template <typename T> I<T> &I<T>::operator += (const difference_type &x) { p += x; return *this; } +template <typename T> I<T> &I<T>::operator -= (const difference_type &x) { p -= x; return *this; } +template <typename T> I<T> I<T>::operator + (const difference_type &x) const { return I (p + x); } +template <typename T> I<T> I<T>::operator - (const difference_type &x) const { return I (p - x); } +template <typename T> bool operator == (I<T> &x, I<T> &y) { return x.p == y.p; } +template <typename T> bool operator == (const I<T> &x, const I<T> &y) { return x.p == y.p; } +template <typename T> bool operator != (I<T> &x, I<T> &y) { return !(x == y); } +template <typename T> bool operator != (const I<T> &x, const I<T> &y) { return !(x == y); } +template <typename T> bool operator < (I<T> &x, I<T> &y) { return x.p < y.p; } +template <typename T> bool operator < (const I<T> &x, const I<T> &y) { return x.p < y.p; } +template <typename T> bool operator <= (I<T> &x, I<T> &y) { return x.p <= y.p; } +template <typename T> bool operator <= (const I<T> &x, const I<T> &y) { return x.p <= y.p; } +template <typename T> bool operator > (I<T> &x, I<T> &y) { return x.p > y.p; } +template <typename T> bool operator > (const I<T> &x, const I<T> &y) { return x.p > y.p; } +template <typename T> bool operator >= (I<T> &x, I<T> &y) { return x.p >= y.p; } +template <typename T> bool operator >= (const I<T> &x, const I<T> &y) { return x.p >= y.p; } +template <typename T> typename I<T>::difference_type operator - (I<T> &x, I<T> &y) { return x.p - y.p; } +template <typename T> typename I<T>::difference_type operator - (const I<T> &x, const I<T> &y) { return x.p - y.p; } +template <typename T> I<T> operator + (typename I<T>::difference_type x, const I<T> &y) { return I<T> (x + y.p); } + +template <typename T> +class J +{ +public: + J(const I<T> &x, const I<T> &y) : b (x), e (y) {} + const I<T> &begin (); + const I<T> &end (); +private: + I<T> b, e; +}; + +template <typename T> const I<T> &J<T>::begin () { return b; } +template <typename T> const I<T> &J<T>::end () { return e; } + +template <typename T> +class K +{ +public: + K (); + ~K (); + template <int N> T &get () { if (N == 0) return c; else if (N == 1) return b; return a; } + T a, b, c; +}; + +template <typename T> K<T>::K () : a {}, b {}, c {} {} +template <typename T> K<T>::~K () {} +template <typename T> struct std::tuple_size<K<T>> { static constexpr int value = 3; }; +template <typename T, int N> struct std::tuple_element<N, K<T>> { using type = T; }; + +template <typename T> +class L +{ +public: + L (); + ~L (); + T a, b, c; +}; + +template <typename T> L<T>::L () : a {}, b {}, c {} {} +template <typename T> L<T>::~L () {} + +int a[2000]; +long b[40]; +short c[50]; +int d[1024]; +K<int> e[1089]; +L<int> f[1093]; +int results[2000]; + +template <typename T> +void +baz (I<T> &i) +{ + if (*i < 0 || *i >= 2000) + abort (); + results[*i]++; +} + +void +baz (int i) +{ + if (i < 0 || i >= 2000) + abort (); + results[i]++; +} + +void +f1 () +{ +#pragma omp parallel for default(none) shared(a) + for (auto i : a) + baz (i); +} + +void +f2 () +{ +#pragma omp parallel for default(none) shared(a) + for (auto &i : a) + if (&i != &a[i]) + abort (); + else + baz (i); +} + +void +f3 () +{ +#pragma omp parallel for collapse(3) default(none) shared(b, c) + for (auto &i : b) + for (int j = 9; j < 10; j++) + for (auto k : c) + if (&i != &b[i] || i < 0 || i >= 40 || j != 9 || k < 0 || k >= 50) + abort (); + else + baz (i * 50 + k); +} + +void +f4 (J<int> j) +{ +#pragma omp parallel for default(none) shared(j, a) + for (auto &i : j) + if (&i != &a[i]) + abort (); + else + baz (i); +} + +void +f5 () +{ +#pragma omp parallel for simd default(none) shared(d, results) + for (auto i : d) + results[i % 1024] += 2 * ((unsigned) i >> 10) + 1; +} + +void +f6 (J<K<int>> j) +{ +#pragma omp parallel for default(none) shared(j, e) + for (auto & [k, l, m] : j) + if (&k != &e[m].c || &l != &e[m].b || &m != &e[m].a || k != m * 3 || l != m * 2) + abort (); + else + baz (m); +} + +void +f7 (J<L<int>> j) +{ +#pragma omp parallel for default(none) shared(j, f) + for (auto & [k, l, m] : j) + if (&k != &f[k].a || &l != &f[k].b || &m != &f[k].c || l != k * 4 || m != k * 5) + abort (); + else + baz (k); +} + +void +f8 (J<K<int>> j) +{ +#pragma omp parallel for default(none) shared(j) + for (auto [k, l, m] : j) + if (k != m * 3 || l != m * 2) + abort (); + else + baz (m); +} + +void +f9 (J<L<int>> j) +{ +#pragma omp parallel for default(none) shared(j) + for (auto [k, l, m] : j) + if (l != k * 4 || m != k * 5) + abort (); + else + baz (k); +} + +template <int N> +void +f10 () +{ +#pragma omp parallel for default(none) shared(a) + for (auto i : a) + baz (i); +} + +template <int N> +void +f11 () +{ +#pragma omp parallel for default(none) shared(a) + for (auto &i : a) + if (&i != &a[i]) + abort (); + else + baz (i); +} + +template <int N> +void +f12 () +{ +#pragma omp parallel for collapse(3) default(none) shared(a, b, c) + for (auto &i : b) + for (I<int> j = I<int> (&a[9]); j < I<int> (&a[10]); j++) + for (auto k : c) + if (&i != &b[i] || i < 0 || i >= 40 || *j != 9 || k < 0 || k >= 50) + abort (); + else + baz (i * 50 + k); +} + +template <typename T> +void +f13 (J<T> j) +{ +#pragma omp parallel for default(none) shared(j, a) + for (auto &i : j) + if (&i != &a[i]) + abort (); + else + baz (i); +} + +template <int N> +void +f14 () +{ +#pragma omp parallel for simd default(none) shared(d, results) + for (auto i : d) + results[i % N] += 2 * ((unsigned) i >> 10) + 1; +} + +template <typename T> +void +f15 (J<K<T>> j) +{ +#pragma omp parallel for default(none) shared(j, e) + for (auto & [k, l, m] : j) + if (&k != &e[m].c || &l != &e[m].b || &m != &e[m].a || k != m * 3 || l != m * 2) + abort (); + else + baz (m); +} + +template <typename T> +void +f16 (J<L<T>> j) +{ +#pragma omp parallel for default(none) shared(j, f) + for (auto & [k, l, m] : j) + if (&k != &f[k].a || &l != &f[k].b || &m != &f[k].c || l != k * 4 || m != k * 5) + abort (); + else + baz (k); +} + +template <int N> +void +f17 (J<K<int>> j) +{ +#pragma omp parallel for default(none) shared(j) + for (auto [k, l, m] : j) + if (k != m * 3 || l != m * 2) + abort (); + else + baz (m); +} + +template <int N> +void +f18 (J<L<int>> j) +{ +#pragma omp parallel for default(none) shared(j) + for (auto [k, l, m] : j) + if (l != k * 4 || m != k * 5) + abort (); + else + baz (k); +} + +#define check(expr) \ + for (int i = 0; i < 2000; i++) \ + if (expr) \ + { \ + if (results[i] != 1) \ + abort (); \ + results[i] = 0; \ + } \ + else if (results[i]) \ + abort () + +int +main () +{ + for (int i = 0; i < 2000; i++) + a[i] = i; + for (int i = 0; i < 40; i++) + b[i] = i; + for (int i = 0; i < 50; i++) + c[i] = i; + for (int i = 0; i < 1024; i++) + d[i] = i; + for (int i = 0; i < 1089; i++) + { + e[i].a = i; + e[i].b = 2 * i; + e[i].c = 3 * i; + } + for (int i = 0; i < 1093; i++) + { + f[i].a = i; + f[i].b = 4 * i; + f[i].c = 5 * i; + } + f1 (); + check (1); + f2 (); + check (1); + f3 (); + check (1); + f4 (J<int> (&a[14], &a[1803])); + check (i >= 14 && i < 1803); + f5 (); + check (i >= 0 && i < 1024); + f6 (J<K<int>> (&e[19], &e[1029])); + check (i >= 19 && i < 1029); + f7 (J<L<int>> (&f[15], &f[1091])); + check (i >= 15 && i < 1091); + f8 (J<K<int>> (&e[27], &e[1037])); + check (i >= 27 && i < 1037); + f9 (J<L<int>> (&f[1], &f[1012])); + check (i >= 1 && i < 1012); + f10 <0> (); + check (1); + f11 <1> (); + check (1); + f12 <2> (); + check (1); + f13 (J<int> (&a[24], &a[1703])); + check (i >= 24 && i < 1703); + f14 <1024> (); + check (i >= 0 && i < 1024); + f15 (J<K<int>> (&e[39], &e[929])); + check (i >= 39 && i < 929); + f16 (J<L<int>> (&f[17], &f[1071])); + check (i >= 17 && i < 1071); + f17 <3> (J<K<int>> (&e[7], &e[1017])); + check (i >= 7 && i < 1017); + f18 <5> (J<L<int>> (&f[121], &f[1010])); + check (i >= 121 && i < 1010); +} diff --git a/libgomp/testsuite/libgomp.c++/for-24.C b/libgomp/testsuite/libgomp.c++/for-24.C new file mode 100644 index 00000000000..cb14addc14f --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/for-24.C @@ -0,0 +1,427 @@ +// { dg-do run } +// { dg-additional-options "-std=c++17" } + +typedef __PTRDIFF_TYPE__ ptrdiff_t; +extern "C" void abort (); + +namespace std { + template<typename T> struct tuple_size; + template<int, typename> struct tuple_element; +} + +#pragma omp declare target +template <typename T> +class I +{ +public: + typedef ptrdiff_t difference_type; + I (); + ~I (); + I (T *); + I (const I &); + T &operator * (); + T *operator -> (); + T &operator [] (const difference_type &) const; + I &operator = (const I &); + I &operator ++ (); + I operator ++ (int); + I &operator -- (); + I operator -- (int); + I &operator += (const difference_type &); + I &operator -= (const difference_type &); + I operator + (const difference_type &) const; + I operator - (const difference_type &) const; + template <typename S> friend bool operator == (I<S> &, I<S> &); + template <typename S> friend bool operator == (const I<S> &, const I<S> &); + template <typename S> friend bool operator < (I<S> &, I<S> &); + template <typename S> friend bool operator < (const I<S> &, const I<S> &); + template <typename S> friend bool operator <= (I<S> &, I<S> &); + template <typename S> friend bool operator <= (const I<S> &, const I<S> &); + template <typename S> friend bool operator > (I<S> &, I<S> &); + template <typename S> friend bool operator > (const I<S> &, const I<S> &); + template <typename S> friend bool operator >= (I<S> &, I<S> &); + template <typename S> friend bool operator >= (const I<S> &, const I<S> &); + template <typename S> friend typename I<S>::difference_type operator - (I<S> &, I<S> &); + template <typename S> friend typename I<S>::difference_type operator - (const I<S> &, const I<S> &); + template <typename S> friend I<S> operator + (typename I<S>::difference_type , const I<S> &); +private: + T *p; +}; +template <typename T> I<T>::I () : p (0) {} +template <typename T> I<T>::~I () {} +template <typename T> I<T>::I (T *x) : p (x) {} +template <typename T> I<T>::I (const I &x) : p (x.p) {} +template <typename T> T &I<T>::operator * () { return *p; } +template <typename T> T *I<T>::operator -> () { return p; } +template <typename T> T &I<T>::operator [] (const difference_type &x) const { return p[x]; } +template <typename T> I<T> &I<T>::operator = (const I &x) { p = x.p; return *this; } +template <typename T> I<T> &I<T>::operator ++ () { ++p; return *this; } +template <typename T> I<T> I<T>::operator ++ (int) { return I (p++); } +template <typename T> I<T> &I<T>::operator -- () { --p; return *this; } +template <typename T> I<T> I<T>::operator -- (int) { return I (p--); } +template <typename T> I<T> &I<T>::operator += (const difference_type &x) { p += x; return *this; } +template <typename T> I<T> &I<T>::operator -= (const difference_type &x) { p -= x; return *this; } +template <typename T> I<T> I<T>::operator + (const difference_type &x) const { return I (p + x); } +template <typename T> I<T> I<T>::operator - (const difference_type &x) const { return I (p - x); } +template <typename T> bool operator == (I<T> &x, I<T> &y) { return x.p == y.p; } +template <typename T> bool operator == (const I<T> &x, const I<T> &y) { return x.p == y.p; } +template <typename T> bool operator != (I<T> &x, I<T> &y) { return !(x == y); } +template <typename T> bool operator != (const I<T> &x, const I<T> &y) { return !(x == y); } +template <typename T> bool operator < (I<T> &x, I<T> &y) { return x.p < y.p; } +template <typename T> bool operator < (const I<T> &x, const I<T> &y) { return x.p < y.p; } +template <typename T> bool operator <= (I<T> &x, I<T> &y) { return x.p <= y.p; } +template <typename T> bool operator <= (const I<T> &x, const I<T> &y) { return x.p <= y.p; } +template <typename T> bool operator > (I<T> &x, I<T> &y) { return x.p > y.p; } +template <typename T> bool operator > (const I<T> &x, const I<T> &y) { return x.p > y.p; } +template <typename T> bool operator >= (I<T> &x, I<T> &y) { return x.p >= y.p; } +template <typename T> bool operator >= (const I<T> &x, const I<T> &y) { return x.p >= y.p; } +template <typename T> typename I<T>::difference_type operator - (I<T> &x, I<T> &y) { return x.p - y.p; } +template <typename T> typename I<T>::difference_type operator - (const I<T> &x, const I<T> &y) { return x.p - y.p; } +template <typename T> I<T> operator + (typename I<T>::difference_type x, const I<T> &y) { return I<T> (x + y.p); } + +template <typename T> +class J +{ +public: + J(const I<T> &x, const I<T> &y) : b (x), e (y) {} + const I<T> &begin (); + const I<T> &end (); +private: + I<T> b, e; +}; + +template <typename T> const I<T> &J<T>::begin () { return b; } +template <typename T> const I<T> &J<T>::end () { return e; } + +struct K +{ + template <int N> int &get () { if (N == 0) return c; else if (N == 1) return b; return a; } + int a, b, c; +}; + +template <> struct std::tuple_size<K> { static constexpr int value = 3; }; +template <int N> struct std::tuple_element<N, K> { using type = int; }; + +struct L +{ + int a, b, c; +}; + +int a[2000]; +long b[40]; +short c[50]; +int d[1024]; +K e[1089]; +L f[1093]; +#pragma omp end declare target + +int results[2000]; + +#pragma omp declare target +template <typename T> +void +baz (I<T> &i) +{ + if (*i < 0 || *i >= 2000) + abort (); + results[*i]++; +} + +void +baz (int i) +{ + if (i < 0 || i >= 2000) + abort (); + results[i]++; +} + +void +f1 () +{ +#pragma omp distribute parallel for default(none) shared(a) + for (auto i : a) + baz (i); +} + +void +f2 () +{ +#pragma omp distribute parallel for default(none) shared(a) + for (auto &i : a) + if (&i != &a[i]) + abort (); + else + baz (i); +} + +void +f3 () +{ +#pragma omp distribute parallel for collapse(3) default(none) shared(b, c) + for (auto &i : b) + for (int j = 9; j < 10; j++) + for (auto k : c) + if (&i != &b[i] || i < 0 || i >= 40 || j != 9 || k < 0 || k >= 50) + abort (); + else + baz (i * 50 + k); +} + +void +f4 (J<int> j) +{ +#pragma omp distribute parallel for default(none) shared(j, a) + for (auto &i : j) + if (&i != &a[i]) + abort (); + else + baz (i); +} + +void +f5 () +{ +#pragma omp distribute parallel for simd default(none) shared(d, results) + for (auto i : d) + results[i % 1024] += 2 * ((unsigned) i >> 10) + 1; +} + +void +f6 (J<K> j) +{ +#pragma omp distribute parallel for default(none) shared(j, e) + for (auto & [k, l, m] : j) + if (&k != &e[m].c || &l != &e[m].b || &m != &e[m].a || k != m * 3 || l != m * 2) + abort (); + else + baz (m); +} + +void +f7 (J<L> j) +{ +#pragma omp distribute parallel for default(none) shared(j, f) + for (auto & [k, l, m] : j) + if (&k != &f[k].a || &l != &f[k].b || &m != &f[k].c || l != k * 4 || m != k * 5) + abort (); + else + baz (k); +} + +void +f8 (J<K> j) +{ +#pragma omp distribute parallel for default(none) shared(j) + for (auto [k, l, m] : j) + if (k != m * 3 || l != m * 2) + abort (); + else + baz (m); +} + +void +f9 (J<L> j) +{ +#pragma omp distribute parallel for default(none) shared(j) + for (auto [k, l, m] : j) + if (l != k * 4 || m != k * 5) + abort (); + else + baz (k); +} + +template <int N> +void +f10 () +{ +#pragma omp distribute parallel for default(none) shared(a) + for (auto i : a) + baz (i); +} + +template <int N> +void +f11 () +{ +#pragma omp distribute parallel for default(none) shared(a) + for (auto &i : a) + if (&i != &a[i]) + abort (); + else + baz (i); +} + +template <int N> +void +f12 () +{ +#pragma omp distribute parallel for collapse(3) default(none) shared(a, b, c) + for (auto &i : b) + for (I<int> j = I<int> (&a[9]); j < I<int> (&a[10]); j++) + for (auto k : c) + if (&i != &b[i] || i < 0 || i >= 40 || *j != 9 || k < 0 || k >= 50) + abort (); + else + baz (i * 50 + k); +} + +template <typename T> +void +f13 (J<T> j) +{ +#pragma omp distribute parallel for default(none) shared(j, a) + for (auto &i : j) + if (&i != &a[i]) + abort (); + else + baz (i); +} + +template <int N> +void +f14 () +{ +#pragma omp distribute parallel for simd default(none) shared(d, results) + for (auto i : d) + results[i % N] += 2 * ((unsigned) i >> 10) + 1; +} + +template <typename T> +void +f15 (J<T> j) +{ +#pragma omp distribute parallel for default(none) shared(j, e) + for (auto & [k, l, m] : j) + if (&k != &e[m].c || &l != &e[m].b || &m != &e[m].a || k != m * 3 || l != m * 2) + abort (); + else + baz (m); +} + +template <typename T> +void +f16 (J<T> j) +{ +#pragma omp distribute parallel for default(none) shared(j, f) + for (auto & [k, l, m] : j) + if (&k != &f[k].a || &l != &f[k].b || &m != &f[k].c || l != k * 4 || m != k * 5) + abort (); + else + baz (k); +} + +template <int N> +void +f17 (J<K> j) +{ +#pragma omp distribute parallel for default(none) shared(j) + for (auto [k, l, m] : j) + if (k != m * 3 || l != m * 2) + abort (); + else + baz (m); +} + +template <int N> +void +f18 (J<L> j) +{ +#pragma omp distribute parallel for default(none) shared(j) + for (auto [k, l, m] : j) + if (l != k * 4 || m != k * 5) + abort (); + else + baz (k); +} +#pragma omp end declare target + +#define check(expr) \ + for (int i = 0; i < 2000; i++) \ + if (expr) \ + { \ + if (results[i] != 1) \ + abort (); \ + results[i] = 0; \ + } \ + else if (results[i]) \ + abort () + +int +main () +{ + for (int i = 0; i < 2000; i++) + a[i] = i; + for (int i = 0; i < 40; i++) + b[i] = i; + for (int i = 0; i < 50; i++) + c[i] = i; + for (int i = 0; i < 1024; i++) + d[i] = i; + for (int i = 0; i < 1089; i++) + { + e[i].a = i; + e[i].b = 2 * i; + e[i].c = 3 * i; + } + for (int i = 0; i < 1093; i++) + { + f[i].a = i; + f[i].b = 4 * i; + f[i].c = 5 * i; + } + #pragma omp target update to (a, b, c, d, e, f) + #pragma omp target teams map (tofrom: results) + f1 (); + check (1); + #pragma omp target teams map (tofrom: results) + f2 (); + check (1); + #pragma omp target teams map (tofrom: results) + f3 (); + check (1); + #pragma omp target teams map (tofrom: results) + f4 (J<int> (&a[14], &a[1803])); + check (i >= 14 && i < 1803); + #pragma omp target teams map (tofrom: results) + f5 (); + check (i >= 0 && i < 1024); + #pragma omp target teams map (tofrom: results) + f6 (J<K> (&e[19], &e[1029])); + check (i >= 19 && i < 1029); + #pragma omp target teams map (tofrom: results) + f7 (J<L> (&f[15], &f[1091])); + check (i >= 15 && i < 1091); + #pragma omp target teams map (tofrom: results) + f8 (J<K> (&e[27], &e[1037])); + check (i >= 27 && i < 1037); + #pragma omp target teams map (tofrom: results) + f9 (J<L> (&f[1], &f[1012])); + check (i >= 1 && i < 1012); + #pragma omp target teams map (tofrom: results) + f10 <0> (); + check (1); + #pragma omp target teams map (tofrom: results) + f11 <1> (); + check (1); + #pragma omp target teams map (tofrom: results) + f12 <2> (); + check (1); + #pragma omp target teams map (tofrom: results) + f13 (J<int> (&a[24], &a[1703])); + check (i >= 24 && i < 1703); + #pragma omp target teams map (tofrom: results) + f14 <1024> (); + check (i >= 0 && i < 1024); + #pragma omp target teams map (tofrom: results) + f15 (J<K> (&e[39], &e[929])); + check (i >= 39 && i < 929); + #pragma omp target teams map (tofrom: results) + f16 (J<L> (&f[17], &f[1071])); + check (i >= 17 && i < 1071); + #pragma omp target teams map (tofrom: results) + f17 <3> (J<K> (&e[7], &e[1017])); + check (i >= 7 && i < 1017); + #pragma omp target teams map (tofrom: results) + f18 <5> (J<L> (&f[121], &f[1010])); + check (i >= 121 && i < 1010); +} diff --git a/libgomp/testsuite/libgomp.c++/for-25.C b/libgomp/testsuite/libgomp.c++/for-25.C new file mode 100644 index 00000000000..3fecb488787 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/for-25.C @@ -0,0 +1,420 @@ +// { dg-do run } +// { dg-additional-options "-std=c++17" } + +typedef __PTRDIFF_TYPE__ ptrdiff_t; +extern "C" void abort (); + +namespace std { + template<typename T> struct tuple_size; + template<int, typename> struct tuple_element; +} + +template <typename T> +class I +{ +public: + typedef ptrdiff_t difference_type; + I (); + ~I (); + I (T *); + I (const I &); + T &operator * (); + T *operator -> (); + T &operator [] (const difference_type &) const; + I &operator = (const I &); + I &operator ++ (); + I operator ++ (int); + I &operator -- (); + I operator -- (int); + I &operator += (const difference_type &); + I &operator -= (const difference_type &); + I operator + (const difference_type &) const; + I operator - (const difference_type &) const; + template <typename S> friend bool operator == (I<S> &, I<S> &); + template <typename S> friend bool operator == (const I<S> &, const I<S> &); + template <typename S> friend bool operator < (I<S> &, I<S> &); + template <typename S> friend bool operator < (const I<S> &, const I<S> &); + template <typename S> friend bool operator <= (I<S> &, I<S> &); + template <typename S> friend bool operator <= (const I<S> &, const I<S> &); + template <typename S> friend bool operator > (I<S> &, I<S> &); + template <typename S> friend bool operator > (const I<S> &, const I<S> &); + template <typename S> friend bool operator >= (I<S> &, I<S> &); + template <typename S> friend bool operator >= (const I<S> &, const I<S> &); + template <typename S> friend typename I<S>::difference_type operator - (I<S> &, I<S> &); + template <typename S> friend typename I<S>::difference_type operator - (const I<S> &, const I<S> &); + template <typename S> friend I<S> operator + (typename I<S>::difference_type , const I<S> &); +private: + T *p; +}; +template <typename T> I<T>::I () : p (0) {} +template <typename T> I<T>::~I () {} +template <typename T> I<T>::I (T *x) : p (x) {} +template <typename T> I<T>::I (const I &x) : p (x.p) {} +template <typename T> T &I<T>::operator * () { return *p; } +template <typename T> T *I<T>::operator -> () { return p; } +template <typename T> T &I<T>::operator [] (const difference_type &x) const { return p[x]; } +template <typename T> I<T> &I<T>::operator = (const I &x) { p = x.p; return *this; } +template <typename T> I<T> &I<T>::operator ++ () { ++p; return *this; } +template <typename T> I<T> I<T>::operator ++ (int) { return I (p++); } +template <typename T> I<T> &I<T>::operator -- () { --p; return *this; } +template <typename T> I<T> I<T>::operator -- (int) { return I (p--); } +template <typename T> I<T> &I<T>::operator += (const difference_type &x) { p += x; return *this; } +template <typename T> I<T> &I<T>::operator -= (const difference_type &x) { p -= x; return *this; } +template <typename T> I<T> I<T>::operator + (const difference_type &x) const { return I (p + x); } +template <typename T> I<T> I<T>::operator - (const difference_type &x) const { return I (p - x); } +template <typename T> bool operator == (I<T> &x, I<T> &y) { return x.p == y.p; } +template <typename T> bool operator == (const I<T> &x, const I<T> &y) { return x.p == y.p; } +template <typename T> bool operator != (I<T> &x, I<T> &y) { return !(x == y); } +template <typename T> bool operator != (const I<T> &x, const I<T> &y) { return !(x == y); } +template <typename T> bool operator < (I<T> &x, I<T> &y) { return x.p < y.p; } +template <typename T> bool operator < (const I<T> &x, const I<T> &y) { return x.p < y.p; } +template <typename T> bool operator <= (I<T> &x, I<T> &y) { return x.p <= y.p; } +template <typename T> bool operator <= (const I<T> &x, const I<T> &y) { return x.p <= y.p; } +template <typename T> bool operator > (I<T> &x, I<T> &y) { return x.p > y.p; } +template <typename T> bool operator > (const I<T> &x, const I<T> &y) { return x.p > y.p; } +template <typename T> bool operator >= (I<T> &x, I<T> &y) { return x.p >= y.p; } +template <typename T> bool operator >= (const I<T> &x, const I<T> &y) { return x.p >= y.p; } +template <typename T> typename I<T>::difference_type operator - (I<T> &x, I<T> &y) { return x.p - y.p; } +template <typename T> typename I<T>::difference_type operator - (const I<T> &x, const I<T> &y) { return x.p - y.p; } +template <typename T> I<T> operator + (typename I<T>::difference_type x, const I<T> &y) { return I<T> (x + y.p); } + +template <typename T> +class J +{ +public: + J(const I<T> &x, const I<T> &y) : b (x), e (y) {} + const I<T> &begin (); + const I<T> &end (); +private: + I<T> b, e; +}; + +template <typename T> const I<T> &J<T>::begin () { return b; } +template <typename T> const I<T> &J<T>::end () { return e; } + +template <typename T> +class K +{ +public: + K (); + ~K (); + template <int N> T &get () { if (N == 0) return c; else if (N == 1) return b; return a; } + T a, b, c; +}; + +template <typename T> K<T>::K () : a {}, b {}, c {} {} +template <typename T> K<T>::~K () {} +template <typename T> struct std::tuple_size<K<T>> { static constexpr int value = 3; }; +template <typename T, int N> struct std::tuple_element<N, K<T>> { using type = T; }; + +template <typename T> +class L +{ +public: + L (); + ~L (); + T a, b, c; +}; + +template <typename T> L<T>::L () : a {}, b {}, c {} {} +template <typename T> L<T>::~L () {} + +int a[2000]; +long b[40]; +short c[50]; +int d[1024]; +K<int> e[1089]; +L<int> f[1093]; +int results[2000]; + +template <typename T> +void +baz (I<T> &i) +{ + if (*i < 0 || *i >= 2000) + abort (); + results[*i]++; +} + +void +baz (int i) +{ + if (i < 0 || i >= 2000) + abort (); + results[i]++; +} + +void +f1 () +{ +#pragma omp taskloop default(none) shared(a) + for (auto i : a) + baz (i); +} + +void +f2 () +{ +#pragma omp taskloop default(none) shared(a) + for (auto &i : a) + if (&i != &a[i]) + abort (); + else + baz (i); +} + +void +f3 () +{ +#pragma omp taskloop collapse(3) default(none) shared(b, c) + for (auto &i : b) + for (int j = 9; j < 10; j++) + for (auto k : c) + if (&i != &b[i] || i < 0 || i >= 40 || j != 9 || k < 0 || k >= 50) + abort (); + else + baz (i * 50 + k); +} + +void +f4 (J<int> j) +{ +#pragma omp taskloop default(none) shared(j, a) + for (auto &i : j) + if (&i != &a[i]) + abort (); + else + baz (i); +} + +void +f5 () +{ +#pragma omp taskloop simd default(none) shared(d, results) + for (auto i : d) + results[i % 1024] += 2 * ((unsigned) i >> 10) + 1; +} + +void +f6 (J<K<int>> j) +{ +#pragma omp taskloop default(none) shared(j, e) + for (auto & [k, l, m] : j) + if (&k != &e[m].c || &l != &e[m].b || &m != &e[m].a || k != m * 3 || l != m * 2) + abort (); + else + baz (m); +} + +void +f7 (J<L<int>> j) +{ +#pragma omp taskloop default(none) shared(j, f) + for (auto & [k, l, m] : j) + if (&k != &f[k].a || &l != &f[k].b || &m != &f[k].c || l != k * 4 || m != k * 5) + abort (); + else + baz (k); +} + +void +f8 (J<K<int>> j) +{ +#pragma omp taskloop default(none) shared(j) + for (auto [k, l, m] : j) + if (k != m * 3 || l != m * 2) + abort (); + else + baz (m); +} + +void +f9 (J<L<int>> j) +{ +#pragma omp taskloop default(none) shared(j) + for (auto [k, l, m] : j) + if (l != k * 4 || m != k * 5) + abort (); + else + baz (k); +} + +template <int N> +void +f10 () +{ +#pragma omp taskloop default(none) shared(a) + for (auto i : a) + baz (i); +} + +template <int N> +void +f11 () +{ +#pragma omp taskloop default(none) shared(a) + for (auto &i : a) + if (&i != &a[i]) + abort (); + else + baz (i); +} + +template <int N> +void +f12 () +{ +#pragma omp taskloop collapse(3) default(none) shared(a, b, c) + for (auto &i : b) + for (I<int> j = I<int> (&a[9]); j < I<int> (&a[10]); j++) + for (auto k : c) + if (&i != &b[i] || i < 0 || i >= 40 || *j != 9 || k < 0 || k >= 50) + abort (); + else + baz (i * 50 + k); +} + +template <typename T> +void +f13 (J<T> j) +{ +#pragma omp taskloop default(none) shared(j, a) + for (auto &i : j) + if (&i != &a[i]) + abort (); + else + baz (i); +} + +template <int N> +void +f14 () +{ +#pragma omp taskloop simd default(none) shared(d, results) + for (auto i : d) + results[i % N] += 2 * ((unsigned) i >> 10) + 1; +} + +template <typename T> +void +f15 (J<K<T>> j) +{ +#pragma omp taskloop default(none) shared(j, e) + for (auto & [k, l, m] : j) + if (&k != &e[m].c || &l != &e[m].b || &m != &e[m].a || k != m * 3 || l != m * 2) + abort (); + else + baz (m); +} + +template <typename T> +void +f16 (J<L<T>> j) +{ +#pragma omp taskloop default(none) shared(j, f) + for (auto & [k, l, m] : j) + if (&k != &f[k].a || &l != &f[k].b || &m != &f[k].c || l != k * 4 || m != k * 5) + abort (); + else + baz (k); +} + +template <int N> +void +f17 (J<K<int>> j) +{ +#pragma omp taskloop default(none) shared(j) + for (auto [k, l, m] : j) + if (k != m * 3 || l != m * 2) + abort (); + else + baz (m); +} + +template <int N> +void +f18 (J<L<int>> j) +{ +#pragma omp taskloop default(none) shared(j) + for (auto [k, l, m] : j) + if (l != k * 4 || m != k * 5) + abort (); + else + baz (k); +} + +#define check(expr) \ + for (int i = 0; i < 2000; i++) \ + if (expr) \ + { \ + if (results[i] != 1) \ + abort (); \ + results[i] = 0; \ + } \ + else if (results[i]) \ + abort () + +int +main () +{ + for (int i = 0; i < 2000; i++) + a[i] = i; + for (int i = 0; i < 40; i++) + b[i] = i; + for (int i = 0; i < 50; i++) + c[i] = i; + for (int i = 0; i < 1024; i++) + d[i] = i; + for (int i = 0; i < 1089; i++) + { + e[i].a = i; + e[i].b = 2 * i; + e[i].c = 3 * i; + } + for (int i = 0; i < 1093; i++) + { + f[i].a = i; + f[i].b = 4 * i; + f[i].c = 5 * i; + } + #pragma omp parallel + #pragma omp single + { + f1 (); + check (1); + f2 (); + check (1); + f3 (); + check (1); + f4 (J<int> (&a[14], &a[1803])); + check (i >= 14 && i < 1803); + f5 (); + check (i >= 0 && i < 1024); + f6 (J<K<int>> (&e[19], &e[1029])); + check (i >= 19 && i < 1029); + f7 (J<L<int>> (&f[15], &f[1091])); + check (i >= 15 && i < 1091); + f8 (J<K<int>> (&e[27], &e[1037])); + check (i >= 27 && i < 1037); + f9 (J<L<int>> (&f[1], &f[1012])); + check (i >= 1 && i < 1012); + f10 <0> (); + check (1); + f11 <1> (); + check (1); + f12 <2> (); + check (1); + f13 (J<int> (&a[24], &a[1703])); + check (i >= 24 && i < 1703); + f14 <1024> (); + check (i >= 0 && i < 1024); + f15 (J<K<int>> (&e[39], &e[929])); + check (i >= 39 && i < 929); + f16 (J<L<int>> (&f[17], &f[1071])); + check (i >= 17 && i < 1071); + f17 <3> (J<K<int>> (&e[7], &e[1017])); + check (i >= 7 && i < 1017); + f18 <5> (J<L<int>> (&f[121], &f[1010])); + check (i >= 121 && i < 1010); + } +} diff --git a/libgomp/testsuite/libgomp.c++/for-26.C b/libgomp/testsuite/libgomp.c++/for-26.C new file mode 100644 index 00000000000..bb7ae11d3cc --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/for-26.C @@ -0,0 +1,422 @@ +// { dg-do run } +// { dg-additional-options "-std=c++17" } + +typedef __PTRDIFF_TYPE__ ptrdiff_t; +extern "C" void abort (); + +namespace std { + template<typename T> struct tuple_size; + template<int, typename> struct tuple_element; +} + +template <typename T> +class I +{ +public: + typedef ptrdiff_t difference_type; + I (); + ~I (); + I (T *); + I (const I &); + T &operator * (); + T *operator -> (); + T &operator [] (const difference_type &) const; + I &operator = (const I &); + I &operator ++ (); + I operator ++ (int); + I &operator -- (); + I operator -- (int); + I &operator += (const difference_type &); + I &operator -= (const difference_type &); + I operator + (const difference_type &) const; + I operator - (const difference_type &) const; + template <typename S> friend bool operator == (I<S> &, I<S> &); + template <typename S> friend bool operator == (const I<S> &, const I<S> &); + template <typename S> friend bool operator < (I<S> &, I<S> &); + template <typename S> friend bool operator < (const I<S> &, const I<S> &); + template <typename S> friend bool operator <= (I<S> &, I<S> &); + template <typename S> friend bool operator <= (const I<S> &, const I<S> &); + template <typename S> friend bool operator > (I<S> &, I<S> &); + template <typename S> friend bool operator > (const I<S> &, const I<S> &); + template <typename S> friend bool operator >= (I<S> &, I<S> &); + template <typename S> friend bool operator >= (const I<S> &, const I<S> &); + template <typename S> friend typename I<S>::difference_type operator - (I<S> &, I<S> &); + template <typename S> friend typename I<S>::difference_type operator - (const I<S> &, const I<S> &); + template <typename S> friend I<S> operator + (typename I<S>::difference_type , const I<S> &); +private: + T *p; +}; +template <typename T> I<T>::I () : p (0) {} +template <typename T> I<T>::~I () {} +template <typename T> I<T>::I (T *x) : p (x) {} +template <typename T> I<T>::I (const I &x) : p (x.p) {} +template <typename T> T &I<T>::operator * () { return *p; } +template <typename T> T *I<T>::operator -> () { return p; } +template <typename T> T &I<T>::operator [] (const difference_type &x) const { return p[x]; } +template <typename T> I<T> &I<T>::operator = (const I &x) { p = x.p; return *this; } +template <typename T> I<T> &I<T>::operator ++ () { ++p; return *this; } +template <typename T> I<T> I<T>::operator ++ (int) { return I (p++); } +template <typename T> I<T> &I<T>::operator -- () { --p; return *this; } +template <typename T> I<T> I<T>::operator -- (int) { return I (p--); } +template <typename T> I<T> &I<T>::operator += (const difference_type &x) { p += x; return *this; } +template <typename T> I<T> &I<T>::operator -= (const difference_type &x) { p -= x; return *this; } +template <typename T> I<T> I<T>::operator + (const difference_type &x) const { return I (p + x); } +template <typename T> I<T> I<T>::operator - (const difference_type &x) const { return I (p - x); } +template <typename T> bool operator == (I<T> &x, I<T> &y) { return x.p == y.p; } +template <typename T> bool operator == (const I<T> &x, const I<T> &y) { return x.p == y.p; } +template <typename T> bool operator != (I<T> &x, I<T> &y) { return !(x == y); } +template <typename T> bool operator != (const I<T> &x, const I<T> &y) { return !(x == y); } +template <typename T> bool operator < (I<T> &x, I<T> &y) { return x.p < y.p; } +template <typename T> bool operator < (const I<T> &x, const I<T> &y) { return x.p < y.p; } +template <typename T> bool operator <= (I<T> &x, I<T> &y) { return x.p <= y.p; } +template <typename T> bool operator <= (const I<T> &x, const I<T> &y) { return x.p <= y.p; } +template <typename T> bool operator > (I<T> &x, I<T> &y) { return x.p > y.p; } +template <typename T> bool operator > (const I<T> &x, const I<T> &y) { return x.p > y.p; } +template <typename T> bool operator >= (I<T> &x, I<T> &y) { return x.p >= y.p; } +template <typename T> bool operator >= (const I<T> &x, const I<T> &y) { return x.p >= y.p; } +template <typename T> typename I<T>::difference_type operator - (I<T> &x, I<T> &y) { return x.p - y.p; } +template <typename T> typename I<T>::difference_type operator - (const I<T> &x, const I<T> &y) { return x.p - y.p; } +template <typename T> I<T> operator + (typename I<T>::difference_type x, const I<T> &y) { return I<T> (x + y.p); } + +template <typename T> +class J +{ +public: + J(const I<T> &x, const I<T> &y) : b (x), e (y) {} + const I<T> &begin (); + const I<T> &end (); +private: + I<T> b, e; +}; + +template <typename T> const I<T> &J<T>::begin () { return b; } +template <typename T> const I<T> &J<T>::end () { return e; } + +struct K +{ + template <int N> int &get () { if (N == 0) return c; else if (N == 1) return b; return a; } + int a, b, c; +}; + +template <> struct std::tuple_size<K> { static constexpr int value = 3; }; +template <int N> struct std::tuple_element<N, K> { using type = int; }; + +struct L +{ + int a, b, c; +}; + +int a[2000]; +long b[40]; +short c[50]; +int d[1024]; +K e[1089]; +L f[1093]; + +int results[2000]; + +template <typename T> +void +baz (I<T> &i) +{ + if (*i < 0 || *i >= 2000) + abort (); + results[*i]++; +} + +void +baz (int i) +{ + if (i < 0 || i >= 2000) + abort (); + results[i]++; +} + +void +f1 () +{ +#pragma omp distribute parallel for default(none) shared(a) + for (auto i : a) + baz (i); +} + +void +f2 () +{ +#pragma omp distribute parallel for default(none) shared(a) + for (auto &i : a) + if (&i != &a[i]) + abort (); + else + baz (i); +} + +void +f3 () +{ +#pragma omp distribute parallel for collapse(3) default(none) shared(b, c) + for (auto &i : b) + for (int j = 9; j < 10; j++) + for (auto k : c) + if (&i != &b[i] || i < 0 || i >= 40 || j != 9 || k < 0 || k >= 50) + abort (); + else + baz (i * 50 + k); +} + +void +f4 (J<int> j) +{ +#pragma omp distribute parallel for default(none) shared(j, a) + for (auto &i : j) + if (&i != &a[i]) + abort (); + else + baz (i); +} + +void +f5 () +{ +#pragma omp distribute parallel for simd default(none) shared(d, results) + for (auto i : d) + results[i % 1024] += 2 * ((unsigned) i >> 10) + 1; +} + +void +f6 (J<K> j) +{ +#pragma omp distribute parallel for default(none) shared(j, e) + for (auto & [k, l, m] : j) + if (&k != &e[m].c || &l != &e[m].b || &m != &e[m].a || k != m * 3 || l != m * 2) + abort (); + else + baz (m); +} + +void +f7 (J<L> j) +{ +#pragma omp distribute parallel for default(none) shared(j, f) + for (auto & [k, l, m] : j) + if (&k != &f[k].a || &l != &f[k].b || &m != &f[k].c || l != k * 4 || m != k * 5) + abort (); + else + baz (k); +} + +void +f8 (J<K> j) +{ +#pragma omp distribute parallel for default(none) shared(j) + for (auto [k, l, m] : j) + if (k != m * 3 || l != m * 2) + abort (); + else + baz (m); +} + +void +f9 (J<L> j) +{ +#pragma omp distribute parallel for default(none) shared(j) + for (auto [k, l, m] : j) + if (l != k * 4 || m != k * 5) + abort (); + else + baz (k); +} + +template <int N> +void +f10 () +{ +#pragma omp distribute parallel for default(none) shared(a) + for (auto i : a) + baz (i); +} + +template <int N> +void +f11 () +{ +#pragma omp distribute parallel for default(none) shared(a) + for (auto &i : a) + if (&i != &a[i]) + abort (); + else + baz (i); +} + +template <int N> +void +f12 () +{ +#pragma omp distribute parallel for collapse(3) default(none) shared(a, b, c) + for (auto &i : b) + for (I<int> j = I<int> (&a[9]); j < I<int> (&a[10]); j++) + for (auto k : c) + if (&i != &b[i] || i < 0 || i >= 40 || *j != 9 || k < 0 || k >= 50) + abort (); + else + baz (i * 50 + k); +} + +template <typename T> +void +f13 (J<T> j) +{ +#pragma omp distribute parallel for default(none) shared(j, a) + for (auto &i : j) + if (&i != &a[i]) + abort (); + else + baz (i); +} + +template <int N> +void +f14 () +{ +#pragma omp distribute parallel for simd default(none) shared(d, results) + for (auto i : d) + results[i % N] += 2 * ((unsigned) i >> 10) + 1; +} + +template <typename T> +void +f15 (J<T> j) +{ +#pragma omp distribute parallel for default(none) shared(j, e) + for (auto & [k, l, m] : j) + if (&k != &e[m].c || &l != &e[m].b || &m != &e[m].a || k != m * 3 || l != m * 2) + abort (); + else + baz (m); +} + +template <typename T> +void +f16 (J<T> j) +{ +#pragma omp distribute parallel for default(none) shared(j, f) + for (auto & [k, l, m] : j) + if (&k != &f[k].a || &l != &f[k].b || &m != &f[k].c || l != k * 4 || m != k * 5) + abort (); + else + baz (k); +} + +template <int N> +void +f17 (J<K> j) +{ +#pragma omp distribute parallel for default(none) shared(j) + for (auto [k, l, m] : j) + if (k != m * 3 || l != m * 2) + abort (); + else + baz (m); +} + +template <int N> +void +f18 (J<L> j) +{ +#pragma omp distribute parallel for default(none) shared(j) + for (auto [k, l, m] : j) + if (l != k * 4 || m != k * 5) + abort (); + else + baz (k); +} + +#define check(expr) \ + for (int i = 0; i < 2000; i++) \ + if (expr) \ + { \ + if (results[i] != 1) \ + abort (); \ + results[i] = 0; \ + } \ + else if (results[i]) \ + abort () + +int +main () +{ + for (int i = 0; i < 2000; i++) + a[i] = i; + for (int i = 0; i < 40; i++) + b[i] = i; + for (int i = 0; i < 50; i++) + c[i] = i; + for (int i = 0; i < 1024; i++) + d[i] = i; + for (int i = 0; i < 1089; i++) + { + e[i].a = i; + e[i].b = 2 * i; + e[i].c = 3 * i; + } + for (int i = 0; i < 1093; i++) + { + f[i].a = i; + f[i].b = 4 * i; + f[i].c = 5 * i; + } + #pragma omp teams + f1 (); + check (1); + #pragma omp teams + f2 (); + check (1); + #pragma omp teams + f3 (); + check (1); + #pragma omp teams + f4 (J<int> (&a[14], &a[1803])); + check (i >= 14 && i < 1803); + #pragma omp teams + f5 (); + check (i >= 0 && i < 1024); + #pragma omp teams + f6 (J<K> (&e[19], &e[1029])); + check (i >= 19 && i < 1029); + #pragma omp teams + f7 (J<L> (&f[15], &f[1091])); + check (i >= 15 && i < 1091); + #pragma omp teams + f8 (J<K> (&e[27], &e[1037])); + check (i >= 27 && i < 1037); + #pragma omp teams + f9 (J<L> (&f[1], &f[1012])); + check (i >= 1 && i < 1012); + #pragma omp teams + f10 <0> (); + check (1); + #pragma omp teams + f11 <1> (); + check (1); + #pragma omp teams + f12 <2> (); + check (1); + #pragma omp teams + f13 (J<int> (&a[24], &a[1703])); + check (i >= 24 && i < 1703); + #pragma omp teams + f14 <1024> (); + check (i >= 0 && i < 1024); + #pragma omp teams + f15 (J<K> (&e[39], &e[929])); + check (i >= 39 && i < 929); + #pragma omp teams + f16 (J<L> (&f[17], &f[1071])); + check (i >= 17 && i < 1071); + #pragma omp teams + f17 <3> (J<K> (&e[7], &e[1017])); + check (i >= 7 && i < 1017); + #pragma omp teams + f18 <5> (J<L> (&f[121], &f[1010])); + check (i >= 121 && i < 1010); +} diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-1.C b/libgomp/testsuite/libgomp.c++/task-reduction-1.C new file mode 100644 index 00000000000..b3e228e5310 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-1.C @@ -0,0 +1,63 @@ +extern "C" void abort (); + +int as; +int &a = as; +long int bs = 1; +long int &b = bs; + +void +foo (int &c, long long int &d) +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (*: d) in_reduction (+: c) \ + in_reduction (+: a) in_reduction (*: b) + { + a += 7; + b *= 2; + c += 9; + d *= 3; + } +} + +int +main () +{ + int cs = 0; + int &c = cs; + long long int ds = 1; + #pragma omp parallel + #pragma omp single + { + long long int &d = ds; + #pragma omp taskgroup task_reduction (+: a, c) task_reduction (*: b, d) + { + int i; + for (i = 0; i < 4; i++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b, d) + { + int j; + a += 7; + b *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b, d) + { + a += 7; + b *= 2; + c += 9; + d *= 3; + foo (c, d); + } + c += 9; + d *= 3; + } + } +#define THREEP4 (3LL * 3LL * 3LL * 3LL) + if (d != (THREEP4 * THREEP4 * THREEP4 * THREEP4 * THREEP4 * THREEP4 + * THREEP4)) + abort (); + } + if (a != 28 * 7 || b != (1L << 28) || c != 28 * 9) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-10.C b/libgomp/testsuite/libgomp.c++/task-reduction-10.C new file mode 100644 index 00000000000..0eb94c121be --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-10.C @@ -0,0 +1,125 @@ +extern "C" void abort (); + +struct S { S (); S (long long int, int); ~S (); static int cnt1, cnt2, cnt3; long long int s; int t; }; + +int S::cnt1; +int S::cnt2; +int S::cnt3; + +S::S () +{ + #pragma omp atomic + cnt1++; +} + +S::S (long long int x, int y) : s (x), t (y) +{ + #pragma omp atomic update + ++cnt2; +} + +S::~S () +{ + #pragma omp atomic + cnt3 = cnt3 + 1; + if (t < 3 || t > 9 || (t & 1) == 0) + abort (); +} + +void +bar (S *p, S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +baz (S *o, S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3)) +#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig)) + +S as = { 0LL, 7 }; +S &a = as; +S bs (1LL, 5); +S &b = bs; + +void +foo (S &c, S &d) +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: c) in_reduction (*: b, d) in_reduction (+: a) + { + a.s += 7; + b.s *= 2; + c.s += 9; + d.s *= 3; + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } +} + +void +test () +{ + S cs = { 0LL, 7 }; + S &c = cs; + S ds (1LL, 5); + #pragma omp parallel if (0) + { + S &d = ds; + #pragma omp parallel reduction (task, +: a, c) reduction (task, *: b, d) + { + #pragma omp for + for (int i = 0; i < 4; i++) + #pragma omp task in_reduction (*: b, d) in_reduction (+: a, c) + { + int j; + a.s += 7; + b.s *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a) in_reduction (*: b) \ + in_reduction (+: c) in_reduction (*: d) + { + a.s += 7; + b.s *= 2; + c.s += 9; + d.s *= 3; + foo (c, d); + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } + c.s += 9; + d.s *= 3; + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } + } +#define THREEP7 (3LL * 3LL * 3LL * 3LL * 3LL * 3LL * 3LL) + if (d.s != (THREEP7 * THREEP7 * THREEP7 * THREEP7) || d.t != 5) + abort (); + } + if (a.s != 28 * 7 || a.t != 7 || b.s != (1L << 28) || b.t != 5 + || c.s != 28 * 9 || c.t != 7) + abort (); +} + +int +main () +{ + int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3; + test (); + if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3) + abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-11.C b/libgomp/testsuite/libgomp.c++/task-reduction-11.C new file mode 100644 index 00000000000..542bdd64da1 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-11.C @@ -0,0 +1,237 @@ +extern "C" void abort (); + +int as[2]; +int (&a)[2] = as; +long long int bs[7] = { 9, 11, 1, 1, 1, 13, 15 }; +long long int (&b)[7] = bs; +int es[3] = { 5, 0, 5 }; +int (&e)[3] = es; +int fs[5] = { 6, 7, 0, 0, 9 }; +int (&f)[5] = fs; +int gs[4] = { 1, 0, 0, 2 }; +int (&g)[4] = gs; +int hs[3] = { 0, 1, 4 }; +int (&h)[3] = hs; +int ks[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; +int (&k)[4][2] = ks; +long long *ss; +long long *&s = ss; +long long (*ts)[2]; +long long (*&t)[2] = ts; + +template <typename T> +void +foo (T &n, T *&c, long long int *&d, T (&m)[3], T *&r, T (&o)[4], T *&p, T (&q)[4][2]) +{ + T i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + a[0] += 7; + a[1] += 17; + b[2] *= 2; + b[4] *= 2; + c[0] += 6; + d[1] *= 2; + e[1] += 19; + f[2] += 21; + f[3] += 23; + g[1] += 25; + g[2] += 27; + h[0] += 29; + k[1][0] += 31; + k[2][1] += 33; + m[1] += 19; + r[2] += 21; + r[3] += 23; + o[1] += 25; + o[2] += 27; + p[0] += 29; + q[1][0] += 31; + q[2][1] += 33; + s[1] *= 2; + t[2][0] *= 2; + t[3][1] *= 2; + } +} + +template <typename T> +void +test (T &n) +{ + T cs[2] = { 0, 0 }; + T (&c)[2] = cs; + T ps[3] = { 0, 1, 4 }; + T (&p)[3] = ps; + T qs[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; + T (&q)[4][2] = qs; + long long sb[4] = { 5, 1, 1, 6 }; + long long tb[5][2] = { { 9, 10 }, { 11, 12 }, { 1, 1 }, { 1, 1 }, { 13, 14 } }; + T ms[3] = { 5, 0, 5 }; + T os[4] = { 1, 0, 0, 2 }; + s = sb; + t = tb; + #pragma omp parallel if (0) + { + long long int ds[] = { 1, 1 }; + long long int (&d)[2] = ds; + T (&m)[3] = ms; + T rs[5] = { 6, 7, 0, 0, 9 }; + T (&r)[5] = rs; + T (&o)[4] = os; + #pragma omp parallel reduction (task,+: a, c) reduction (task,*: b[2 * n:3 * n], d) \ + reduction (task,+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + reduction (task,+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + reduction (task,*: t[2:2][:], s[1:n + 1]) + { + #pragma omp for + for (int i = 0; i < 4; i++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \ + in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \ + in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + T j; + a[0] += 2; + a[1] += 3; + b[2] *= 2; + f[3] += 8; + g[1] += 9; + g[2] += 10; + h[0] += 11; + k[1][1] += 13; + k[2][1] += 15; + m[1] += 16; + r[2] += 8; + s[1] *= 2; + t[2][1] *= 2; + t[3][1] *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) + { + m[1] += 6; + r[2] += 7; + q[1][0] += 17; + q[2][0] += 19; + a[0] += 4; + a[1] += 5; + b[3] *= 2; + b[4] *= 2; + f[3] += 18; + g[1] += 29; + g[2] += 18; + h[0] += 19; + s[2] *= 2; + t[2][0] *= 2; + t[3][0] *= 2; + T *cp = c; + long long int *dp = d; + T *rp = r; + T *pp = p; + foo (n, cp, dp, m, rp, o, pp, q); + r[3] += 18; + o[1] += 29; + o[2] += 18; + p[0] += 19; + c[0] += 4; + c[1] += 5; + d[0] *= 2; + e[1] += 6; + f[2] += 7; + k[1][0] += 17; + k[2][0] += 19; + } + r[3] += 8; + o[1] += 9; + o[2] += 10; + p[0] += 11; + q[1][1] += 13; + q[2][1] += 15; + b[3] *= 2; + c[0] += 4; + c[1] += 9; + d[0] *= 2; + e[1] += 16; + f[2] += 8; + } + } + if (d[0] != 1LL << (8 + 4) + || d[1] != 1LL << 16 + || m[0] != 5 + || m[1] != 19 * 16 + 6 * 8 + 16 * 4 + || m[2] != 5 + || r[0] != 6 + || r[1] != 7 + || r[2] != 21 * 16 + 7 * 8 + 8 * 4 + || r[3] != 23 * 16 + 18 * 8 + 8 * 4 + || r[4] != 9 + || o[0] != 1 + || o[1] != 25 * 16 + 29 * 8 + 9 * 4 + || o[2] != 27 * 16 + 18 * 8 + 10 * 4 + || o[3] != 2) + abort (); + } + if (a[0] != 7 * 16 + 4 * 8 + 2 * 4 + || a[1] != 17 * 16 + 5 * 8 + 3 * 4 + || b[0] != 9 || b[1] != 11 + || b[2] != 1LL << (16 + 4) + || b[3] != 1LL << (8 + 4) + || b[4] != 1LL << (16 + 8) + || b[5] != 13 || b[6] != 15 + || c[0] != 6 * 16 + 4 * 8 + 4 * 4 + || c[1] != 5 * 8 + 9 * 4 + || e[0] != 5 + || e[1] != 19 * 16 + 6 * 8 + 16 * 4 + || e[2] != 5 + || f[0] != 6 + || f[1] != 7 + || f[2] != 21 * 16 + 7 * 8 + 8 * 4 + || f[3] != 23 * 16 + 18 * 8 + 8 * 4 + || f[4] != 9 + || g[0] != 1 + || g[1] != 25 * 16 + 29 * 8 + 9 * 4 + || g[2] != 27 * 16 + 18 * 8 + 10 * 4 + || g[3] != 2 + || h[0] != 29 * 16 + 19 * 8 + 11 * 4 + || h[1] != 1 || h[2] != 4 + || k[0][0] != 5 || k[0][1] != 6 + || k[1][0] != 31 * 16 + 17 * 8 + || k[1][1] != 13 * 4 + || k[2][0] != 19 * 8 + || k[2][1] != 33 * 16 + 15 * 4 + || k[3][0] != 7 || k[3][1] != 8 + || p[0] != 29 * 16 + 19 * 8 + 11 * 4 + || p[1] != 1 || p[2] != 4 + || q[0][0] != 5 || q[0][1] != 6 + || q[1][0] != 31 * 16 + 17 * 8 + || q[1][1] != 13 * 4 + || q[2][0] != 19 * 8 + || q[2][1] != 33 * 16 + 15 * 4 + || q[3][0] != 7 || q[3][1] != 8 + || sb[0] != 5 + || sb[1] != 1LL << (16 + 4) + || sb[2] != 1LL << 8 + || sb[3] != 6 + || tb[0][0] != 9 || tb[0][1] != 10 || tb[1][0] != 11 || tb[1][1] != 12 + || tb[2][0] != 1LL << (16 + 8) + || tb[2][1] != 1LL << 4 + || tb[3][0] != 1LL << 8 + || tb[3][1] != 1LL << (16 + 4) + || tb[4][0] != 13 || tb[4][1] != 14) + abort (); +} + +int +main () +{ + int n = 1; + test (n); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-12.C b/libgomp/testsuite/libgomp.c++/task-reduction-12.C new file mode 100644 index 00000000000..02c1a787cbf --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-12.C @@ -0,0 +1,321 @@ +extern "C" void abort (); + +struct S { S (); S (long int, long int); ~S (); static int cnt1, cnt2, cnt3; long int s, t; }; + +int S::cnt1; +int S::cnt2; +int S::cnt3; + +S::S () +{ + #pragma omp atomic + cnt1++; +} + +S::S (long int x, long int y) : s (x), t (y) +{ + #pragma omp atomic update + ++cnt2; +} + +S::~S () +{ + #pragma omp atomic + cnt3 = cnt3 + 1; + if (t < 3 || t > 9 || (t & 1) == 0) + abort (); +} + +void +bar (S *p, S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +baz (S *o, S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3)) +#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig)) + +S a[2] = { { 0, 7 }, { 0, 7 } }; +S b[7] = { { 9, 5 }, { 11, 5 }, { 1, 5 }, { 1, 5 }, { 1, 5 }, { 13, 5 }, { 15, 5 } }; +S e[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; +S f[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; +S g[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; +S h[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; +S k[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; +S *s; +S (*t)[2]; + +template <int N> +void +foo (int n, S *c, S *d, S m[3], S *r, S o[4], S *p, S q[4][2]) +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + a[0].s += 7; + a[1].s += 17; + b[2].s *= 2; + b[4].s *= 2; + c[0].s += 6; + d[1].s *= 2; + e[1].s += 19; + f[2].s += 21; + f[3].s += 23; + g[1].s += 25; + g[2].s += 27; + h[0].s += 29; + k[1][0].s += 31; + k[2][1].s += 33; + m[1].s += 19; + r[2].s += 21; + r[3].s += 23; + o[1].s += 25; + o[2].s += 27; + p[0].s += 29; + q[1][0].s += 31; + q[2][1].s += 33; + s[1].s *= 2; + t[2][0].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + } +} + +template <int N> +void +test (int n) +{ + S c[2] = { { 0, 7 }, { 0, 7 } }; + S p[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; + S q[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; + S ss[4] = { { 5, 5 }, { 1, 5 }, { 1, 5 }, { 6, 5 } }; + S tt[5][2] = { { { 9, 5 }, { 10, 5 } }, { { 11, 5 }, { 12, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 13, 5 }, { 14, 5 } } }; + s = ss; + t = tt; + #pragma omp parallel num_threads (1) if (0) + { + S d[] = { { 1, 5 }, { 1, 5 } }; + S m[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; + S r[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; + S o[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; + #pragma omp parallel reduction (task, +: a, c) reduction (task, *: b[2 * n:3 * n], d) \ + reduction (task, +: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + reduction (task, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + reduction (task, *: t[2:2][:], s[1:n + 1]) + { + #pragma omp for + for (int i = 0; i < 4; i++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \ + in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \ + in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + int j; + a[0].s += 2; + a[1].s += 3; + b[2].s *= 2; + f[3].s += 8; + g[1].s += 9; + g[2].s += 10; + h[0].s += 11; + k[1][1].s += 13; + k[2][1].s += 15; + m[1].s += 16; + r[2].s += 8; + s[1].s *= 2; + t[2][1].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) + { + m[1].s += 6; + r[2].s += 7; + q[1][0].s += 17; + q[2][0].s += 19; + a[0].s += 4; + a[1].s += 5; + b[3].s *= 2; + b[4].s *= 2; + f[3].s += 18; + g[1].s += 29; + g[2].s += 18; + h[0].s += 19; + s[2].s *= 2; + t[2][0].s *= 2; + t[3][0].s *= 2; + foo<N> (n, c, d, m, r, o, p, q); + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + r[3].s += 18; + o[1].s += 29; + o[2].s += 18; + p[0].s += 19; + c[0].s += 4; + c[1].s += 5; + d[0].s *= 2; + e[1].s += 6; + f[2].s += 7; + k[1][0].s += 17; + k[2][0].s += 19; + } + r[3].s += 8; + o[1].s += 9; + o[2].s += 10; + p[0].s += 11; + q[1][1].s += 13; + q[2][1].s += 15; + b[3].s *= 2; + c[0].s += 4; + c[1].s += 9; + d[0].s *= 2; + e[1].s += 16; + f[2].s += 8; + } + } + if (d[0].s != 1LL << (8 + 4) + || d[1].s != 1LL << 16 + || m[0].s != 5 + || m[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || m[2].s != 5 + || r[0].s != 6 + || r[1].s != 7 + || r[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || r[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || r[4].s != 9 + || o[0].s != 1 + || o[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || o[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || o[3].s != 2) + abort (); + if (e[1].t != 7 || h[0].t != 7 || m[1].t != 7 || p[0].t != 7) + abort (); + for (int z = 0; z < 2; z++) + if (a[z].t != 7 || c[z].t != 7 || d[z].t != 5 || f[z + 2].t != 7 + || g[z + 1].t != 7 || r[z + 2].t != 7 || s[z + 1].t != 5 || o[z + 1].t != 7 + || k[z + 1][0].t != 7 || k[z + 1][1].t != 7 || q[z + 1][0].t != 7 || q[z + 1][1].t != 7 + || t[z + 2][0].t != 5 || t[z + 2][1].t != 5) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5) + abort (); + } + if (a[0].s != 7 * 16 + 4 * 8 + 2 * 4 + || a[1].s != 17 * 16 + 5 * 8 + 3 * 4 + || b[0].s != 9 || b[1].s != 11 + || b[2].s != 1LL << (16 + 4) + || b[3].s != 1LL << (8 + 4) + || b[4].s != 1LL << (16 + 8) + || b[5].s != 13 || b[6].s != 15 + || c[0].s != 6 * 16 + 4 * 8 + 4 * 4 + || c[1].s != 5 * 8 + 9 * 4 + || e[0].s != 5 + || e[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || e[2].s != 5 + || f[0].s != 6 + || f[1].s != 7 + || f[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || f[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || f[4].s != 9 + || g[0].s != 1 + || g[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || g[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || g[3].s != 2 + || h[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || h[1].s != 1 || h[2].s != 4 + || k[0][0].s != 5 || k[0][1].s != 6 + || k[1][0].s != 31 * 16 + 17 * 8 + || k[1][1].s != 13 * 4 + || k[2][0].s != 19 * 8 + || k[2][1].s != 33 * 16 + 15 * 4 + || k[3][0].s != 7 || k[3][1].s != 8 + || p[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || p[1].s != 1 || p[2].s != 4 + || q[0][0].s != 5 || q[0][1].s != 6 + || q[1][0].s != 31 * 16 + 17 * 8 + || q[1][1].s != 13 * 4 + || q[2][0].s != 19 * 8 + || q[2][1].s != 33 * 16 + 15 * 4 + || q[3][0].s != 7 || q[3][1].s != 8 + || ss[0].s != 5 + || ss[1].s != 1LL << (16 + 4) + || ss[2].s != 1LL << 8 + || ss[3].s != 6 + || tt[0][0].s != 9 || tt[0][1].s != 10 || tt[1][0].s != 11 || tt[1][1].s != 12 + || tt[2][0].s != 1LL << (16 + 8) + || tt[2][1].s != 1LL << 4 + || tt[3][0].s != 1LL << 8 + || tt[3][1].s != 1LL << (16 + 4) + || tt[4][0].s != 13 || tt[4][1].s != 14) + abort (); +} + +int +main () +{ + int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3; + test<0> (1); + if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-13.C b/libgomp/testsuite/libgomp.c++/task-reduction-13.C new file mode 100644 index 00000000000..3d0165dfa92 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-13.C @@ -0,0 +1,342 @@ +extern "C" void abort (); + +struct S { S (); S (long int, long int); ~S (); static int cnt1, cnt2, cnt3; long int s, t; }; + +int S::cnt1; +int S::cnt2; +int S::cnt3; + +S::S () +{ + #pragma omp atomic + cnt1++; +} + +S::S (long int x, long int y) : s (x), t (y) +{ + #pragma omp atomic update + ++cnt2; +} + +S::~S () +{ + #pragma omp atomic + cnt3 = cnt3 + 1; + if (t < 3 || t > 9 || (t & 1) == 0) + abort (); +} + +void +bar (S *p, S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +baz (S *o, S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3)) +#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig)) + +S as[2] = { { 0, 7 }, { 0, 7 } }; +S (&a)[2] = as; +S bs[7] = { { 9, 5 }, { 11, 5 }, { 1, 5 }, { 1, 5 }, { 1, 5 }, { 13, 5 }, { 15, 5 } }; +S (&b)[7] = bs; +S es[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; +S (&e)[3] = es; +S fs[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; +S (&f)[5] = fs; +S gs[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; +S (&g)[4] = gs; +S hs[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; +S (&h)[3] = hs; +S ks[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; +S (&k)[4][2] = ks; +S *ss; +S *&s = ss; +S (*ts)[2]; +S (*&t)[2] = ts; + +template <typename S, typename T> +void +foo (T &n, S *&c, S *&d, S (&m)[3], S *&r, S (&o)[4], S *&p, S (&q)[4][2]) +{ + T i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + a[0].s += 7; + a[1].s += 17; + b[2].s *= 2; + b[4].s *= 2; + c[0].s += 6; + d[1].s *= 2; + e[1].s += 19; + f[2].s += 21; + f[3].s += 23; + g[1].s += 25; + g[2].s += 27; + h[0].s += 29; + k[1][0].s += 31; + k[2][1].s += 33; + m[1].s += 19; + r[2].s += 21; + r[3].s += 23; + o[1].s += 25; + o[2].s += 27; + p[0].s += 29; + q[1][0].s += 31; + q[2][1].s += 33; + s[1].s *= 2; + t[2][0].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (T z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (T z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + } +} + +template <typename S, typename T> +void +test (T &n) +{ + S cs[2] = { { 0, 7 }, { 0, 7 } }; + S (&c)[2] = cs; + S ps[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; + S (&p)[3] = ps; + S qs[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; + S (&q)[4][2] = qs; + S sb[4] = { { 5, 5 }, { 1, 5 }, { 1, 5 }, { 6, 5 } }; + S tb[5][2] = { { { 9, 5 }, { 10, 5 } }, { { 11, 5 }, { 12, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 13, 5 }, { 14, 5 } } }; + S ms[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; + S os[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; + s = sb; + t = tb; + #pragma omp parallel if (0) + { + S ds[] = { { 1, 5 }, { 1, 5 } }; + S (&d)[2] = ds; + S (&m)[3] = ms; + S rs[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; + S (&r)[5] = rs; + S (&o)[4] = os; + #pragma omp parallel reduction (task, +: a, c) reduction (task, *: b[2 * n:3 * n], d) \ + reduction (task, +: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + reduction (task, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + reduction (task, *: t[2:2][:], s[1:n + 1]) + { + #pragma omp for + for (T i = 0; i < 4; i++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \ + in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \ + in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + T j; + a[0].s += 2; + a[1].s += 3; + b[2].s *= 2; + f[3].s += 8; + g[1].s += 9; + g[2].s += 10; + h[0].s += 11; + k[1][1].s += 13; + k[2][1].s += 15; + m[1].s += 16; + r[2].s += 8; + s[1].s *= 2; + t[2][1].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (T z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (T z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) + { + m[1].s += 6; + r[2].s += 7; + q[1][0].s += 17; + q[2][0].s += 19; + a[0].s += 4; + a[1].s += 5; + b[3].s *= 2; + b[4].s *= 2; + f[3].s += 18; + g[1].s += 29; + g[2].s += 18; + h[0].s += 19; + s[2].s *= 2; + t[2][0].s *= 2; + t[3][0].s *= 2; + S *cp = c; + S *dp = d; + S *rp = r; + S *pp = p; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (T z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (T z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + foo (n, cp, dp, m, rp, o, pp, q); + r[3].s += 18; + o[1].s += 29; + o[2].s += 18; + p[0].s += 19; + c[0].s += 4; + c[1].s += 5; + d[0].s *= 2; + e[1].s += 6; + f[2].s += 7; + k[1][0].s += 17; + k[2][0].s += 19; + } + r[3].s += 8; + o[1].s += 9; + o[2].s += 10; + p[0].s += 11; + q[1][1].s += 13; + q[2][1].s += 15; + b[3].s *= 2; + c[0].s += 4; + c[1].s += 9; + d[0].s *= 2; + e[1].s += 16; + f[2].s += 8; + } + } + if (d[0].s != 1LL << (8 + 4) + || d[1].s != 1LL << 16 + || m[0].s != 5 + || m[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || m[2].s != 5 + || r[0].s != 6 + || r[1].s != 7 + || r[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || r[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || r[4].s != 9 + || o[0].s != 1 + || o[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || o[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || o[3].s != 2) + abort (); + if (e[1].t != 7 || h[0].t != 7 || m[1].t != 7 || p[0].t != 7) + abort (); + for (T z = 0; z < 2; z++) + if (a[z].t != 7 || c[z].t != 7 || d[z].t != 5 || f[z + 2].t != 7 + || g[z + 1].t != 7 || r[z + 2].t != 7 || s[z + 1].t != 5 || o[z + 1].t != 7 + || k[z + 1][0].t != 7 || k[z + 1][1].t != 7 || q[z + 1][0].t != 7 || q[z + 1][1].t != 7 + || t[z + 2][0].t != 5 || t[z + 2][1].t != 5) + abort (); + for (T z = 0; z < 3; z++) + if (b[z + 2].t != 5) + abort (); + } + if (a[0].s != 7 * 16 + 4 * 8 + 2 * 4 + || a[1].s != 17 * 16 + 5 * 8 + 3 * 4 + || b[0].s != 9 || b[1].s != 11 + || b[2].s != 1LL << (16 + 4) + || b[3].s != 1LL << (8 + 4) + || b[4].s != 1LL << (16 + 8) + || b[5].s != 13 || b[6].s != 15 + || c[0].s != 6 * 16 + 4 * 8 + 4 * 4 + || c[1].s != 5 * 8 + 9 * 4 + || e[0].s != 5 + || e[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || e[2].s != 5 + || f[0].s != 6 + || f[1].s != 7 + || f[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || f[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || f[4].s != 9 + || g[0].s != 1 + || g[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || g[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || g[3].s != 2 + || h[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || h[1].s != 1 || h[2].s != 4 + || k[0][0].s != 5 || k[0][1].s != 6 + || k[1][0].s != 31 * 16 + 17 * 8 + || k[1][1].s != 13 * 4 + || k[2][0].s != 19 * 8 + || k[2][1].s != 33 * 16 + 15 * 4 + || k[3][0].s != 7 || k[3][1].s != 8 + || p[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || p[1].s != 1 || p[2].s != 4 + || q[0][0].s != 5 || q[0][1].s != 6 + || q[1][0].s != 31 * 16 + 17 * 8 + || q[1][1].s != 13 * 4 + || q[2][0].s != 19 * 8 + || q[2][1].s != 33 * 16 + 15 * 4 + || q[3][0].s != 7 || q[3][1].s != 8 + || sb[0].s != 5 + || sb[1].s != 1LL << (16 + 4) + || sb[2].s != 1LL << 8 + || sb[3].s != 6 + || tb[0][0].s != 9 || tb[0][1].s != 10 || tb[1][0].s != 11 || tb[1][1].s != 12 + || tb[2][0].s != 1LL << (16 + 8) + || tb[2][1].s != 1LL << 4 + || tb[3][0].s != 1LL << 8 + || tb[3][1].s != 1LL << (16 + 4) + || tb[4][0].s != 13 || tb[4][1].s != 14) + abort (); +} + +int +main () +{ + int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3; + int n = 1; + test<S, int> (n); + if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-14.C b/libgomp/testsuite/libgomp.c++/task-reduction-14.C new file mode 100644 index 00000000000..3f4e79b16c5 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-14.C @@ -0,0 +1,72 @@ +#include <omp.h> +#include <stdlib.h> + +struct A { A (); ~A (); A (const A &); static int cnt1, cnt2, cnt3; int a; }; +int A::cnt1; +int A::cnt2; +int A::cnt3; +A::A () : a (0) +{ + #pragma omp atomic + cnt1++; +} +A::A (const A &x) : a (x.a) +{ + #pragma omp atomic + cnt2++; +} +A::~A () +{ + #pragma omp atomic + cnt3++; +} +#pragma omp declare reduction (+: A: omp_out.a += omp_in.a) + +void +foo (int x) +{ + A a, b[2]; + int d = 1; + long int e[2] = { 1L, 1L }; + int c = 0; + #pragma omp parallel + { + if (x && omp_get_thread_num () == 0) + { + for (int i = 0; i < 10000000; ++i) + asm volatile (""); + c = 1; + #pragma omp cancel parallel + } + #pragma omp for reduction (task, +: a, b) reduction (task, *: d, e) + for (int i = 0; i < 64; i++) + #pragma omp task in_reduction (+: a, b) in_reduction (*: d, e) + { + a.a++; + b[0].a += 2; + b[1].a += 3; + d *= ((i & 7) == 0) + 1; + e[0] *= ((i & 7) == 3) + 1; + e[1] *= ((i & 3) == 2) + 1; + } + if (x && omp_get_cancellation ()) + abort (); + } + if (!c) + { + if (a.a != 64 || b[0].a != 128 || b[1].a != 192) + abort (); + if (d != 256 || e[0] != 256L || e[1] != 65536L) + abort (); + } +} + +int +main () +{ + int c1 = A::cnt1, c2 = A::cnt2, c3 = A::cnt3; + volatile int zero = 0; + foo (zero); + if (A::cnt1 + A::cnt2 - c1 - c2 != A::cnt3 - c3) + abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-15.C b/libgomp/testsuite/libgomp.c++/task-reduction-15.C new file mode 100644 index 00000000000..8a01e6b240a --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-15.C @@ -0,0 +1,75 @@ +extern "C" void abort (); + +int as; +int &a = as; +long int bs = 1; +long int &b = bs; + +template <typename T, typename U> +void +foo (T &c, U &d) +{ + T i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (*: d) in_reduction (+: c) \ + in_reduction (+: a) in_reduction (*: b) + { + a += 7; + b *= 2; + c += 9; + d *= 3; + } +} + +template <typename T, typename U> +void +bar () +{ + T cs = 0; + T &c = cs; + U ds = 1; + #pragma omp parallel if (0) + { + U &d = ds; + #pragma omp parallel + { + T i; + #pragma omp for reduction (task, +: a, c) reduction (task, *: b, d) + for (i = 0; i < 4; i++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b, d) + { + T j; + a += 7; + b *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b, d) + { + a += 7; + b *= 2; + c += 9; + d *= 3; + foo (c, d); + } + c += 9; + d *= 3; + } +#define THREEP4 (3LL * 3LL * 3LL * 3LL) + if (d != (THREEP4 * THREEP4 * THREEP4 * THREEP4 * THREEP4 * THREEP4 + * THREEP4)) + abort (); + if (a != 28 * 7 || b != (1L << 28) || c != 28 * 9) + abort (); + } + } + if (a != 28 * 7 || b != (1L << 28) || c != 28 * 9) + abort (); + if (ds != (THREEP4 * THREEP4 * THREEP4 * THREEP4 * THREEP4 * THREEP4 + * THREEP4)) + abort (); +} + +int +main () +{ + bar<int, long long int> (); +} diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-16.C b/libgomp/testsuite/libgomp.c++/task-reduction-16.C new file mode 100644 index 00000000000..5835edcbd5b --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-16.C @@ -0,0 +1,130 @@ +extern "C" void abort (); + +struct S { S (); S (long long int, int); ~S (); static int cnt1, cnt2, cnt3; long long int s; int t; }; + +int S::cnt1; +int S::cnt2; +int S::cnt3; + +S::S () +{ + #pragma omp atomic + cnt1++; +} + +S::S (long long int x, int y) : s (x), t (y) +{ + #pragma omp atomic update + ++cnt2; +} + +S::~S () +{ + #pragma omp atomic + cnt3 = cnt3 + 1; + if (t < 3 || t > 9 || (t & 1) == 0) + abort (); +} + +void +bar (S *p, S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +baz (S *o, S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3)) +#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig)) + +S as = { 0LL, 7 }; +S &a = as; +S bs (1LL, 5); +S &b = bs; + +void +foo (S &c, S &d) +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: c) in_reduction (*: b, d) in_reduction (+: a) + { + a.s += 7; + b.s *= 2; + c.s += 9; + d.s *= 3; + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } +} + +void +test () +{ + S cs = { 0LL, 7 }; + S &c = cs; + S ds (1LL, 5); + #pragma omp parallel if (0) + { + S &d = ds; + #pragma omp parallel shared (a, b, c, d) + { + #pragma omp for schedule (static, 1) reduction (task, +: a, c) reduction (task, *: b, d) + for (int i = 0; i < 4; i++) + #pragma omp task in_reduction (*: b, d) in_reduction (+: a, c) + { + int j; + a.s += 7; + b.s *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a) in_reduction (*: b) \ + in_reduction (+: c) in_reduction (*: d) + { + a.s += 7; + b.s *= 2; + c.s += 9; + d.s *= 3; + foo (c, d); + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } + c.s += 9; + d.s *= 3; + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } +#define THREEP7 (3LL * 3LL * 3LL * 3LL * 3LL * 3LL * 3LL) + if (d.s != (THREEP7 * THREEP7 * THREEP7 * THREEP7) || d.t != 5) + abort (); + if (a.s != 28 * 7 || a.t != 7 || b.s != (1L << 28) || b.t != 5 + || c.s != 28 * 9 || c.t != 7) + abort (); + } + } + if (a.s != 28 * 7 || a.t != 7 || b.s != (1L << 28) || b.t != 5 + || c.s != 28 * 9 || c.t != 7) + abort (); + if (ds.s != (THREEP7 * THREEP7 * THREEP7 * THREEP7) || ds.t != 5) + abort (); +} + +int +main () +{ + int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3; + test (); + if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3) + abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-17.C b/libgomp/testsuite/libgomp.c++/task-reduction-17.C new file mode 100644 index 00000000000..c00c8e46542 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-17.C @@ -0,0 +1,300 @@ +extern "C" void abort (); + +int as[2]; +int (&a)[2] = as; +long long int bs[7] = { 9, 11, 1, 1, 1, 13, 15 }; +long long int (&b)[7] = bs; +int es[3] = { 5, 0, 5 }; +int (&e)[3] = es; +int fs[5] = { 6, 7, 0, 0, 9 }; +int (&f)[5] = fs; +int gs[4] = { 1, 0, 0, 2 }; +int (&g)[4] = gs; +int hs[3] = { 0, 1, 4 }; +int (&h)[3] = hs; +int ks[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; +int (&k)[4][2] = ks; +long long *ss; +long long *&s = ss; +long long (*ts)[2]; +long long (*&t)[2] = ts; + +template <typename T> +void +foo (T &n, T *&c, long long int *&d, T (&m)[3], T *&r, T (&o)[4], T *&p, T (&q)[4][2]) +{ + T i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + a[0] += 7; + a[1] += 17; + b[2] *= 2; + b[4] *= 2; + c[0] += 6; + d[1] *= 2; + e[1] += 19; + f[2] += 21; + f[3] += 23; + g[1] += 25; + g[2] += 27; + h[0] += 29; + k[1][0] += 31; + k[2][1] += 33; + m[1] += 19; + r[2] += 21; + r[3] += 23; + o[1] += 25; + o[2] += 27; + p[0] += 29; + q[1][0] += 31; + q[2][1] += 33; + s[1] *= 2; + t[2][0] *= 2; + t[3][1] *= 2; + } +} + +template <typename T, typename I> +void +test (T &n, I x, I y) +{ + T cs[2] = { 0, 0 }; + T (&c)[2] = cs; + T ps[3] = { 0, 1, 4 }; + T (&p)[3] = ps; + T qs[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; + T (&q)[4][2] = qs; + long long sb[4] = { 5, 1, 1, 6 }; + long long tb[5][2] = { { 9, 10 }, { 11, 12 }, { 1, 1 }, { 1, 1 }, { 13, 14 } }; + T ms[3] = { 5, 0, 5 }; + T os[4] = { 1, 0, 0, 2 }; + s = sb; + t = tb; + #pragma omp parallel if (0) + { + long long int ds[] = { 1, 1 }; + long long int (&d)[2] = ds; + T (&m)[3] = ms; + T rs[5] = { 6, 7, 0, 0, 9 }; + T (&r)[5] = rs; + T (&o)[4] = os; + #pragma omp parallel + { + #pragma omp for reduction (task,+: a, c) reduction (task,*: b[2 * n:3 * n], d) \ + reduction (task,+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + reduction (task,+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + reduction (task,*: t[2:2][:], s[1:n + 1]) schedule (dynamic) + for (I i = x; i != y; i++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \ + in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \ + in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + T j; + a[0] += 2; + a[1] += 3; + b[2] *= 2; + f[3] += 8; + g[1] += 9; + g[2] += 10; + h[0] += 11; + k[1][1] += 13; + k[2][1] += 15; + m[1] += 16; + r[2] += 8; + s[1] *= 2; + t[2][1] *= 2; + t[3][1] *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) + { + m[1] += 6; + r[2] += 7; + q[1][0] += 17; + q[2][0] += 19; + a[0] += 4; + a[1] += 5; + b[3] *= 2; + b[4] *= 2; + f[3] += 18; + g[1] += 29; + g[2] += 18; + h[0] += 19; + s[2] *= 2; + t[2][0] *= 2; + t[3][0] *= 2; + T *cp = c; + long long int *dp = d; + T *rp = r; + T *pp = p; + foo (n, cp, dp, m, rp, o, pp, q); + r[3] += 18; + o[1] += 29; + o[2] += 18; + p[0] += 19; + c[0] += 4; + c[1] += 5; + d[0] *= 2; + e[1] += 6; + f[2] += 7; + k[1][0] += 17; + k[2][0] += 19; + } + r[3] += 8; + o[1] += 9; + o[2] += 10; + p[0] += 11; + q[1][1] += 13; + q[2][1] += 15; + b[3] *= 2; + c[0] += 4; + c[1] += 9; + d[0] *= 2; + e[1] += 16; + f[2] += 8; + } + if (d[0] != 1LL << (8 + 4) + || d[1] != 1LL << 16 + || m[0] != 5 + || m[1] != 19 * 16 + 6 * 8 + 16 * 4 + || m[2] != 5 + || r[0] != 6 + || r[1] != 7 + || r[2] != 21 * 16 + 7 * 8 + 8 * 4 + || r[3] != 23 * 16 + 18 * 8 + 8 * 4 + || r[4] != 9 + || o[0] != 1 + || o[1] != 25 * 16 + 29 * 8 + 9 * 4 + || o[2] != 27 * 16 + 18 * 8 + 10 * 4 + || o[3] != 2) + abort (); + if (a[0] != 7 * 16 + 4 * 8 + 2 * 4 + || a[1] != 17 * 16 + 5 * 8 + 3 * 4 + || b[0] != 9 || b[1] != 11 + || b[2] != 1LL << (16 + 4) + || b[3] != 1LL << (8 + 4) + || b[4] != 1LL << (16 + 8) + || b[5] != 13 || b[6] != 15 + || c[0] != 6 * 16 + 4 * 8 + 4 * 4 + || c[1] != 5 * 8 + 9 * 4 + || e[0] != 5 + || e[1] != 19 * 16 + 6 * 8 + 16 * 4 + || e[2] != 5 + || f[0] != 6 + || f[1] != 7 + || f[2] != 21 * 16 + 7 * 8 + 8 * 4 + || f[3] != 23 * 16 + 18 * 8 + 8 * 4 + || f[4] != 9 + || g[0] != 1 + || g[1] != 25 * 16 + 29 * 8 + 9 * 4 + || g[2] != 27 * 16 + 18 * 8 + 10 * 4 + || g[3] != 2 + || h[0] != 29 * 16 + 19 * 8 + 11 * 4 + || h[1] != 1 || h[2] != 4 + || k[0][0] != 5 || k[0][1] != 6 + || k[1][0] != 31 * 16 + 17 * 8 + || k[1][1] != 13 * 4 + || k[2][0] != 19 * 8 + || k[2][1] != 33 * 16 + 15 * 4 + || k[3][0] != 7 || k[3][1] != 8 + || p[0] != 29 * 16 + 19 * 8 + 11 * 4 + || p[1] != 1 || p[2] != 4 + || q[0][0] != 5 || q[0][1] != 6 + || q[1][0] != 31 * 16 + 17 * 8 + || q[1][1] != 13 * 4 + || q[2][0] != 19 * 8 + || q[2][1] != 33 * 16 + 15 * 4 + || q[3][0] != 7 || q[3][1] != 8 + || sb[0] != 5 + || sb[1] != 1LL << (16 + 4) + || sb[2] != 1LL << 8 + || sb[3] != 6 + || tb[0][0] != 9 || tb[0][1] != 10 || tb[1][0] != 11 || tb[1][1] != 12 + || tb[2][0] != 1LL << (16 + 8) + || tb[2][1] != 1LL << 4 + || tb[3][0] != 1LL << 8 + || tb[3][1] != 1LL << (16 + 4) + || tb[4][0] != 13 || tb[4][1] != 14) + abort (); + } + if (d[0] != 1LL << (8 + 4) + || d[1] != 1LL << 16 + || m[0] != 5 + || m[1] != 19 * 16 + 6 * 8 + 16 * 4 + || m[2] != 5 + || r[0] != 6 + || r[1] != 7 + || r[2] != 21 * 16 + 7 * 8 + 8 * 4 + || r[3] != 23 * 16 + 18 * 8 + 8 * 4 + || r[4] != 9 + || o[0] != 1 + || o[1] != 25 * 16 + 29 * 8 + 9 * 4 + || o[2] != 27 * 16 + 18 * 8 + 10 * 4 + || o[3] != 2) + abort (); + } + if (a[0] != 7 * 16 + 4 * 8 + 2 * 4 + || a[1] != 17 * 16 + 5 * 8 + 3 * 4 + || b[0] != 9 || b[1] != 11 + || b[2] != 1LL << (16 + 4) + || b[3] != 1LL << (8 + 4) + || b[4] != 1LL << (16 + 8) + || b[5] != 13 || b[6] != 15 + || c[0] != 6 * 16 + 4 * 8 + 4 * 4 + || c[1] != 5 * 8 + 9 * 4 + || e[0] != 5 + || e[1] != 19 * 16 + 6 * 8 + 16 * 4 + || e[2] != 5 + || f[0] != 6 + || f[1] != 7 + || f[2] != 21 * 16 + 7 * 8 + 8 * 4 + || f[3] != 23 * 16 + 18 * 8 + 8 * 4 + || f[4] != 9 + || g[0] != 1 + || g[1] != 25 * 16 + 29 * 8 + 9 * 4 + || g[2] != 27 * 16 + 18 * 8 + 10 * 4 + || g[3] != 2 + || h[0] != 29 * 16 + 19 * 8 + 11 * 4 + || h[1] != 1 || h[2] != 4 + || k[0][0] != 5 || k[0][1] != 6 + || k[1][0] != 31 * 16 + 17 * 8 + || k[1][1] != 13 * 4 + || k[2][0] != 19 * 8 + || k[2][1] != 33 * 16 + 15 * 4 + || k[3][0] != 7 || k[3][1] != 8 + || p[0] != 29 * 16 + 19 * 8 + 11 * 4 + || p[1] != 1 || p[2] != 4 + || q[0][0] != 5 || q[0][1] != 6 + || q[1][0] != 31 * 16 + 17 * 8 + || q[1][1] != 13 * 4 + || q[2][0] != 19 * 8 + || q[2][1] != 33 * 16 + 15 * 4 + || q[3][0] != 7 || q[3][1] != 8 + || sb[0] != 5 + || sb[1] != 1LL << (16 + 4) + || sb[2] != 1LL << 8 + || sb[3] != 6 + || tb[0][0] != 9 || tb[0][1] != 10 || tb[1][0] != 11 || tb[1][1] != 12 + || tb[2][0] != 1LL << (16 + 8) + || tb[2][1] != 1LL << 4 + || tb[3][0] != 1LL << 8 + || tb[3][1] != 1LL << (16 + 4) + || tb[4][0] != 13 || tb[4][1] != 14) + abort (); +} + +int +main () +{ + int n = 1; + test (n, 0ULL, 4ULL); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-18.C b/libgomp/testsuite/libgomp.c++/task-reduction-18.C new file mode 100644 index 00000000000..99c0e3727d4 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-18.C @@ -0,0 +1,325 @@ +extern "C" void abort (); + +struct S { S (); S (long int, long int); ~S (); static int cnt1, cnt2, cnt3; long int s, t; }; + +int S::cnt1; +int S::cnt2; +int S::cnt3; + +S::S () +{ + #pragma omp atomic + cnt1++; +} + +S::S (long int x, long int y) : s (x), t (y) +{ + #pragma omp atomic update + ++cnt2; +} + +S::~S () +{ + #pragma omp atomic + cnt3 = cnt3 + 1; + if (t < 3 || t > 9 || (t & 1) == 0) + abort (); +} + +void +bar (S *p, S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +baz (S *o, S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3)) +#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig)) + +S a[2] = { { 0, 7 }, { 0, 7 } }; +S b[7] = { { 9, 5 }, { 11, 5 }, { 1, 5 }, { 1, 5 }, { 1, 5 }, { 13, 5 }, { 15, 5 } }; +S e[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; +S f[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; +S g[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; +S h[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; +S k[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; +S *s; +S (*t)[2]; + +template <int N> +void +foo (int n, S *c, S *d, S m[3], S *r, S o[4], S *p, S q[4][2]) +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + a[0].s += 7; + a[1].s += 17; + b[2].s *= 2; + b[4].s *= 2; + c[0].s += 6; + d[1].s *= 2; + e[1].s += 19; + f[2].s += 21; + f[3].s += 23; + g[1].s += 25; + g[2].s += 27; + h[0].s += 29; + k[1][0].s += 31; + k[2][1].s += 33; + m[1].s += 19; + r[2].s += 21; + r[3].s += 23; + o[1].s += 25; + o[2].s += 27; + p[0].s += 29; + q[1][0].s += 31; + q[2][1].s += 33; + s[1].s *= 2; + t[2][0].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + } +} + +template <int N> +void +test (int n) +{ + S c[2] = { { 0, 7 }, { 0, 7 } }; + S p[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; + S q[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; + S ss[4] = { { 5, 5 }, { 1, 5 }, { 1, 5 }, { 6, 5 } }; + S tt[5][2] = { { { 9, 5 }, { 10, 5 } }, { { 11, 5 }, { 12, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 13, 5 }, { 14, 5 } } }; + s = ss; + t = tt; + #pragma omp parallel num_threads (1) if (0) + { + S d[] = { { 1, 5 }, { 1, 5 } }; + S m[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; + S r[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; + S o[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; + volatile unsigned long long x = 0; + volatile unsigned long long y = 4; + volatile unsigned long long z = 1; + #pragma omp parallel + { + #pragma omp for reduction (task, +: a, c) reduction (task, *: b[2 * n:3 * n], d) \ + reduction (task, +: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + reduction (task, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + reduction (task, *: t[2:2][:], s[1:n + 1]) \ + schedule (nonmonotonic: guided, 1) + for (unsigned long long i = x; i < y; i += z) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \ + in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \ + in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + int j; + a[0].s += 2; + a[1].s += 3; + b[2].s *= 2; + f[3].s += 8; + g[1].s += 9; + g[2].s += 10; + h[0].s += 11; + k[1][1].s += 13; + k[2][1].s += 15; + m[1].s += 16; + r[2].s += 8; + s[1].s *= 2; + t[2][1].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) + { + m[1].s += 6; + r[2].s += 7; + q[1][0].s += 17; + q[2][0].s += 19; + a[0].s += 4; + a[1].s += 5; + b[3].s *= 2; + b[4].s *= 2; + f[3].s += 18; + g[1].s += 29; + g[2].s += 18; + h[0].s += 19; + s[2].s *= 2; + t[2][0].s *= 2; + t[3][0].s *= 2; + foo<N> (n, c, d, m, r, o, p, q); + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + r[3].s += 18; + o[1].s += 29; + o[2].s += 18; + p[0].s += 19; + c[0].s += 4; + c[1].s += 5; + d[0].s *= 2; + e[1].s += 6; + f[2].s += 7; + k[1][0].s += 17; + k[2][0].s += 19; + } + r[3].s += 8; + o[1].s += 9; + o[2].s += 10; + p[0].s += 11; + q[1][1].s += 13; + q[2][1].s += 15; + b[3].s *= 2; + c[0].s += 4; + c[1].s += 9; + d[0].s *= 2; + e[1].s += 16; + f[2].s += 8; + } + if (a[0].s != 7 * 16 + 4 * 8 + 2 * 4 + || a[1].s != 17 * 16 + 5 * 8 + 3 * 4 + || b[0].s != 9 || b[1].s != 11 + || b[2].s != 1LL << (16 + 4) + || b[3].s != 1LL << (8 + 4) + || b[4].s != 1LL << (16 + 8) + || b[5].s != 13 || b[6].s != 15 + || c[0].s != 6 * 16 + 4 * 8 + 4 * 4 + || c[1].s != 5 * 8 + 9 * 4 + || e[0].s != 5 + || e[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || e[2].s != 5 + || f[0].s != 6 + || f[1].s != 7 + || f[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || f[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || f[4].s != 9 + || g[0].s != 1 + || g[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || g[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || g[3].s != 2 + || h[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || h[1].s != 1 || h[2].s != 4 + || k[0][0].s != 5 || k[0][1].s != 6 + || k[1][0].s != 31 * 16 + 17 * 8 + || k[1][1].s != 13 * 4 + || k[2][0].s != 19 * 8 + || k[2][1].s != 33 * 16 + 15 * 4 + || k[3][0].s != 7 || k[3][1].s != 8 + || p[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || p[1].s != 1 || p[2].s != 4 + || q[0][0].s != 5 || q[0][1].s != 6 + || q[1][0].s != 31 * 16 + 17 * 8 + || q[1][1].s != 13 * 4 + || q[2][0].s != 19 * 8 + || q[2][1].s != 33 * 16 + 15 * 4 + || q[3][0].s != 7 || q[3][1].s != 8 + || ss[0].s != 5 + || ss[1].s != 1LL << (16 + 4) + || ss[2].s != 1LL << 8 + || ss[3].s != 6 + || tt[0][0].s != 9 || tt[0][1].s != 10 || tt[1][0].s != 11 || tt[1][1].s != 12 + || tt[2][0].s != 1LL << (16 + 8) + || tt[2][1].s != 1LL << 4 + || tt[3][0].s != 1LL << 8 + || tt[3][1].s != 1LL << (16 + 4) + || tt[4][0].s != 13 || tt[4][1].s != 14) + abort (); + } + if (d[0].s != 1LL << (8 + 4) + || d[1].s != 1LL << 16 + || m[0].s != 5 + || m[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || m[2].s != 5 + || r[0].s != 6 + || r[1].s != 7 + || r[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || r[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || r[4].s != 9 + || o[0].s != 1 + || o[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || o[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || o[3].s != 2) + abort (); + if (e[1].t != 7 || h[0].t != 7 || m[1].t != 7 || p[0].t != 7) + abort (); + for (int z = 0; z < 2; z++) + if (a[z].t != 7 || c[z].t != 7 || d[z].t != 5 || f[z + 2].t != 7 + || g[z + 1].t != 7 || r[z + 2].t != 7 || s[z + 1].t != 5 || o[z + 1].t != 7 + || k[z + 1][0].t != 7 || k[z + 1][1].t != 7 || q[z + 1][0].t != 7 || q[z + 1][1].t != 7 + || t[z + 2][0].t != 5 || t[z + 2][1].t != 5) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5) + abort (); + } +} + +int +main () +{ + int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3; + test<0> (1); + if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-19.C b/libgomp/testsuite/libgomp.c++/task-reduction-19.C new file mode 100644 index 00000000000..15945c57cc2 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-19.C @@ -0,0 +1,343 @@ +extern "C" void abort (); + +struct S { S (); S (long int, long int); ~S (); static int cnt1, cnt2, cnt3; long int s, t; }; + +int S::cnt1; +int S::cnt2; +int S::cnt3; + +S::S () +{ + #pragma omp atomic + cnt1++; +} + +S::S (long int x, long int y) : s (x), t (y) +{ + #pragma omp atomic update + ++cnt2; +} + +S::~S () +{ + #pragma omp atomic + cnt3 = cnt3 + 1; + if (t < 3 || t > 9 || (t & 1) == 0) + abort (); +} + +void +bar (S *p, S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +baz (S *o, S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3)) +#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig)) + +S as[2] = { { 0, 7 }, { 0, 7 } }; +S (&a)[2] = as; +S bs[7] = { { 9, 5 }, { 11, 5 }, { 1, 5 }, { 1, 5 }, { 1, 5 }, { 13, 5 }, { 15, 5 } }; +S (&b)[7] = bs; +S es[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; +S (&e)[3] = es; +S fs[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; +S (&f)[5] = fs; +S gs[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; +S (&g)[4] = gs; +S hs[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; +S (&h)[3] = hs; +S ks[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; +S (&k)[4][2] = ks; +S *ss; +S *&s = ss; +S (*ts)[2]; +S (*&t)[2] = ts; + +template <typename S, typename T> +void +foo (T &n, S *&c, S *&d, S (&m)[3], S *&r, S (&o)[4], S *&p, S (&q)[4][2]) +{ + T i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + a[0].s += 7; + a[1].s += 17; + b[2].s *= 2; + b[4].s *= 2; + c[0].s += 6; + d[1].s *= 2; + e[1].s += 19; + f[2].s += 21; + f[3].s += 23; + g[1].s += 25; + g[2].s += 27; + h[0].s += 29; + k[1][0].s += 31; + k[2][1].s += 33; + m[1].s += 19; + r[2].s += 21; + r[3].s += 23; + o[1].s += 25; + o[2].s += 27; + p[0].s += 29; + q[1][0].s += 31; + q[2][1].s += 33; + s[1].s *= 2; + t[2][0].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (T z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (T z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + } +} + +template <typename S, typename T> +void +test (T &n) +{ + S cs[2] = { { 0, 7 }, { 0, 7 } }; + S (&c)[2] = cs; + S ps[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; + S (&p)[3] = ps; + S qs[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; + S (&q)[4][2] = qs; + S sb[4] = { { 5, 5 }, { 1, 5 }, { 1, 5 }, { 6, 5 } }; + S tb[5][2] = { { { 9, 5 }, { 10, 5 } }, { { 11, 5 }, { 12, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 13, 5 }, { 14, 5 } } }; + S ms[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; + S os[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; + s = sb; + t = tb; + #pragma omp parallel if (0) + { + S ds[] = { { 1, 5 }, { 1, 5 } }; + S (&d)[2] = ds; + S (&m)[3] = ms; + S rs[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; + S (&r)[5] = rs; + S (&o)[4] = os; + #pragma omp parallel + { + #pragma omp for reduction (task, +: a, c) reduction (task, *: b[2 * n:3 * n], d) \ + reduction (task, +: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + reduction (task, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + reduction (task, *: t[2:2][:], s[1:n + 1]) \ + schedule (monotonic: runtime) + for (T i = 0; i < 4; i++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \ + in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \ + in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + T j; + a[0].s += 2; + a[1].s += 3; + b[2].s *= 2; + f[3].s += 8; + g[1].s += 9; + g[2].s += 10; + h[0].s += 11; + k[1][1].s += 13; + k[2][1].s += 15; + m[1].s += 16; + r[2].s += 8; + s[1].s *= 2; + t[2][1].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (T z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (T z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) + { + m[1].s += 6; + r[2].s += 7; + q[1][0].s += 17; + q[2][0].s += 19; + a[0].s += 4; + a[1].s += 5; + b[3].s *= 2; + b[4].s *= 2; + f[3].s += 18; + g[1].s += 29; + g[2].s += 18; + h[0].s += 19; + s[2].s *= 2; + t[2][0].s *= 2; + t[3][0].s *= 2; + S *cp = c; + S *dp = d; + S *rp = r; + S *pp = p; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (T z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (T z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + foo (n, cp, dp, m, rp, o, pp, q); + r[3].s += 18; + o[1].s += 29; + o[2].s += 18; + p[0].s += 19; + c[0].s += 4; + c[1].s += 5; + d[0].s *= 2; + e[1].s += 6; + f[2].s += 7; + k[1][0].s += 17; + k[2][0].s += 19; + } + r[3].s += 8; + o[1].s += 9; + o[2].s += 10; + p[0].s += 11; + q[1][1].s += 13; + q[2][1].s += 15; + b[3].s *= 2; + c[0].s += 4; + c[1].s += 9; + d[0].s *= 2; + e[1].s += 16; + f[2].s += 8; + } + if (a[0].s != 7 * 16 + 4 * 8 + 2 * 4 + || a[1].s != 17 * 16 + 5 * 8 + 3 * 4 + || b[0].s != 9 || b[1].s != 11 + || b[2].s != 1LL << (16 + 4) + || b[3].s != 1LL << (8 + 4) + || b[4].s != 1LL << (16 + 8) + || b[5].s != 13 || b[6].s != 15 + || c[0].s != 6 * 16 + 4 * 8 + 4 * 4 + || c[1].s != 5 * 8 + 9 * 4 + || e[0].s != 5 + || e[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || e[2].s != 5 + || f[0].s != 6 + || f[1].s != 7 + || f[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || f[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || f[4].s != 9 + || g[0].s != 1 + || g[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || g[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || g[3].s != 2 + || h[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || h[1].s != 1 || h[2].s != 4 + || k[0][0].s != 5 || k[0][1].s != 6 + || k[1][0].s != 31 * 16 + 17 * 8 + || k[1][1].s != 13 * 4 + || k[2][0].s != 19 * 8 + || k[2][1].s != 33 * 16 + 15 * 4 + || k[3][0].s != 7 || k[3][1].s != 8 + || p[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || p[1].s != 1 || p[2].s != 4 + || q[0][0].s != 5 || q[0][1].s != 6 + || q[1][0].s != 31 * 16 + 17 * 8 + || q[1][1].s != 13 * 4 + || q[2][0].s != 19 * 8 + || q[2][1].s != 33 * 16 + 15 * 4 + || q[3][0].s != 7 || q[3][1].s != 8 + || sb[0].s != 5 + || sb[1].s != 1LL << (16 + 4) + || sb[2].s != 1LL << 8 + || sb[3].s != 6 + || tb[0][0].s != 9 || tb[0][1].s != 10 || tb[1][0].s != 11 || tb[1][1].s != 12 + || tb[2][0].s != 1LL << (16 + 8) + || tb[2][1].s != 1LL << 4 + || tb[3][0].s != 1LL << 8 + || tb[3][1].s != 1LL << (16 + 4) + || tb[4][0].s != 13 || tb[4][1].s != 14) + abort (); + if (d[0].s != 1LL << (8 + 4) + || d[1].s != 1LL << 16 + || m[0].s != 5 + || m[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || m[2].s != 5 + || r[0].s != 6 + || r[1].s != 7 + || r[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || r[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || r[4].s != 9 + || o[0].s != 1 + || o[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || o[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || o[3].s != 2) + abort (); + if (e[1].t != 7 || h[0].t != 7 || m[1].t != 7 || p[0].t != 7) + abort (); + for (T z = 0; z < 2; z++) + if (a[z].t != 7 || c[z].t != 7 || d[z].t != 5 || f[z + 2].t != 7 + || g[z + 1].t != 7 || r[z + 2].t != 7 || s[z + 1].t != 5 || o[z + 1].t != 7 + || k[z + 1][0].t != 7 || k[z + 1][1].t != 7 || q[z + 1][0].t != 7 || q[z + 1][1].t != 7 + || t[z + 2][0].t != 5 || t[z + 2][1].t != 5) + abort (); + for (T z = 0; z < 3; z++) + if (b[z + 2].t != 5) + abort (); + } + } +} + +int +main () +{ + int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3; + int n = 1; + test<S, int> (n); + if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-2.C b/libgomp/testsuite/libgomp.c++/task-reduction-2.C new file mode 100644 index 00000000000..75d2ee37e4f --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-2.C @@ -0,0 +1,119 @@ +extern "C" void abort (); + +struct S { S (); S (long int, long int); ~S (); static int cnt1, cnt2, cnt3; long int s, t; }; + +int S::cnt1; +int S::cnt2; +int S::cnt3; + +S::S () +{ + #pragma omp atomic + cnt1++; +} + +S::S (long int x, long int y) : s (x), t (y) +{ + #pragma omp atomic update + ++cnt2; +} + +S::~S () +{ + #pragma omp atomic + cnt3 = cnt3 + 1; + if (t < 3 || t > 9 || (t & 1) == 0) + abort (); +} + +void +bar (S *p, S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +baz (S *o, S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3)) +#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig)) + +S a = { 0, 7 }; +S b (1, 5); + +void +foo () +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (*: b) in_reduction (+: a) + { + a.s += 7; + b.s *= 2; + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9)) + abort (); + } +} + +void +test () +{ + S c = { 0, 7 }; + #pragma omp parallel + #pragma omp single + { + S d (1, 5); + #pragma omp taskgroup task_reduction (+: a, c) task_reduction (*: b, d) + { + int i; + for (i = 0; i < 4; i++) + #pragma omp task in_reduction (*: b, d) in_reduction (+: a, c) + { + int j; + a.s += 7; + b.s *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a) in_reduction (*: b) \ + in_reduction (+: c) in_reduction (*: d) + { + a.s += 7; + b.s *= 2; + c.s += 9; + d.s *= 3; + foo (); + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } + c.s += 9; + d.s *= 3; + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } + } +#define THREEP4 (3L * 3L * 3L * 3L) + if (d.s != (THREEP4 * THREEP4 * THREEP4) || d.t != 5) + abort (); + } + if (a.s != 28 * 7 || a.t != 7 || b.s != (1L << 28) || b.t != 5 + || c.s != 12 * 9 || c.t != 7) + abort (); +} + +int +main () +{ + int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3; + test (); + if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3) + abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-3.C b/libgomp/testsuite/libgomp.c++/task-reduction-3.C new file mode 100644 index 00000000000..a6eccf6ced6 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-3.C @@ -0,0 +1,126 @@ +extern "C" void abort (); + +struct S { S (); S (long long int, int); ~S (); static int cnt1, cnt2, cnt3; long long int s; int t; }; + +int S::cnt1; +int S::cnt2; +int S::cnt3; + +S::S () +{ + #pragma omp atomic + cnt1++; +} + +S::S (long long int x, int y) : s (x), t (y) +{ + #pragma omp atomic update + ++cnt2; +} + +S::~S () +{ + #pragma omp atomic + cnt3 = cnt3 + 1; + if (t < 3 || t > 9 || (t & 1) == 0) + abort (); +} + +void +bar (S *p, S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +baz (S *o, S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3)) +#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig)) + +S as = { 0LL, 7 }; +S &a = as; +S bs (1LL, 5); +S &b = bs; + +void +foo (S &c, S &d) +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: c) in_reduction (*: b, d) in_reduction (+: a) + { + a.s += 7; + b.s *= 2; + c.s += 9; + d.s *= 3; + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } +} + +void +test () +{ + S cs = { 0LL, 7 }; + S &c = cs; + S ds (1LL, 5); + #pragma omp parallel + #pragma omp single + { + S &d = ds; + #pragma omp taskgroup task_reduction (+: a, c) task_reduction (*: b, d) + { + int i; + for (i = 0; i < 4; i++) + #pragma omp task in_reduction (*: b, d) in_reduction (+: a, c) + { + int j; + a.s += 7; + b.s *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a) in_reduction (*: b) \ + in_reduction (+: c) in_reduction (*: d) + { + a.s += 7; + b.s *= 2; + c.s += 9; + d.s *= 3; + foo (c, d); + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } + c.s += 9; + d.s *= 3; + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } + } +#define THREEP7 (3LL * 3LL * 3LL * 3LL * 3LL * 3LL * 3LL) + if (d.s != (THREEP7 * THREEP7 * THREEP7 * THREEP7) || d.t != 5) + abort (); + } + if (a.s != 28 * 7 || a.t != 7 || b.s != (1L << 28) || b.t != 5 + || c.s != 28 * 9 || c.t != 7) + abort (); +} + +int +main () +{ + int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3; + test (); + if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3) + abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-4.C b/libgomp/testsuite/libgomp.c++/task-reduction-4.C new file mode 100644 index 00000000000..1d4da79b3c1 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-4.C @@ -0,0 +1,238 @@ +extern "C" void abort (); + +int as[2]; +int (&a)[2] = as; +long long int bs[7] = { 9, 11, 1, 1, 1, 13, 15 }; +long long int (&b)[7] = bs; +int es[3] = { 5, 0, 5 }; +int (&e)[3] = es; +int fs[5] = { 6, 7, 0, 0, 9 }; +int (&f)[5] = fs; +int gs[4] = { 1, 0, 0, 2 }; +int (&g)[4] = gs; +int hs[3] = { 0, 1, 4 }; +int (&h)[3] = hs; +int ks[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; +int (&k)[4][2] = ks; +long long *ss; +long long *&s = ss; +long long (*ts)[2]; +long long (*&t)[2] = ts; + +template <typename T> +void +foo (T &n, T *&c, long long int *&d, T (&m)[3], T *&r, T (&o)[4], T *&p, T (&q)[4][2]) +{ + T i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + a[0] += 7; + a[1] += 17; + b[2] *= 2; + b[4] *= 2; + c[0] += 6; + d[1] *= 2; + e[1] += 19; + f[2] += 21; + f[3] += 23; + g[1] += 25; + g[2] += 27; + h[0] += 29; + k[1][0] += 31; + k[2][1] += 33; + m[1] += 19; + r[2] += 21; + r[3] += 23; + o[1] += 25; + o[2] += 27; + p[0] += 29; + q[1][0] += 31; + q[2][1] += 33; + s[1] *= 2; + t[2][0] *= 2; + t[3][1] *= 2; + } +} + +template <typename T> +void +test (T &n) +{ + T cs[2] = { 0, 0 }; + T (&c)[2] = cs; + T ps[3] = { 0, 1, 4 }; + T (&p)[3] = ps; + T qs[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; + T (&q)[4][2] = qs; + long long sb[4] = { 5, 1, 1, 6 }; + long long tb[5][2] = { { 9, 10 }, { 11, 12 }, { 1, 1 }, { 1, 1 }, { 13, 14 } }; + T ms[3] = { 5, 0, 5 }; + T os[4] = { 1, 0, 0, 2 }; + s = sb; + t = tb; + #pragma omp parallel + #pragma omp single + { + long long int ds[] = { 1, 1 }; + long long int (&d)[2] = ds; + T (&m)[3] = ms; + T rs[5] = { 6, 7, 0, 0, 9 }; + T (&r)[5] = rs; + T (&o)[4] = os; + #pragma omp taskgroup task_reduction (+: a, c) task_reduction (*: b[2 * n:3 * n], d) \ + task_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + task_reduction (+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + task_reduction (*: t[2:2][:], s[1:n + 1]) + { + T i; + for (i = 0; i < 4; i++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \ + in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \ + in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + T j; + a[0] += 2; + a[1] += 3; + b[2] *= 2; + f[3] += 8; + g[1] += 9; + g[2] += 10; + h[0] += 11; + k[1][1] += 13; + k[2][1] += 15; + m[1] += 16; + r[2] += 8; + s[1] *= 2; + t[2][1] *= 2; + t[3][1] *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) + { + m[1] += 6; + r[2] += 7; + q[1][0] += 17; + q[2][0] += 19; + a[0] += 4; + a[1] += 5; + b[3] *= 2; + b[4] *= 2; + f[3] += 18; + g[1] += 29; + g[2] += 18; + h[0] += 19; + s[2] *= 2; + t[2][0] *= 2; + t[3][0] *= 2; + T *cp = c; + long long int *dp = d; + T *rp = r; + T *pp = p; + foo (n, cp, dp, m, rp, o, pp, q); + r[3] += 18; + o[1] += 29; + o[2] += 18; + p[0] += 19; + c[0] += 4; + c[1] += 5; + d[0] *= 2; + e[1] += 6; + f[2] += 7; + k[1][0] += 17; + k[2][0] += 19; + } + r[3] += 8; + o[1] += 9; + o[2] += 10; + p[0] += 11; + q[1][1] += 13; + q[2][1] += 15; + b[3] *= 2; + c[0] += 4; + c[1] += 9; + d[0] *= 2; + e[1] += 16; + f[2] += 8; + } + } + if (d[0] != 1LL << (8 + 4) + || d[1] != 1LL << 16 + || m[0] != 5 + || m[1] != 19 * 16 + 6 * 8 + 16 * 4 + || m[2] != 5 + || r[0] != 6 + || r[1] != 7 + || r[2] != 21 * 16 + 7 * 8 + 8 * 4 + || r[3] != 23 * 16 + 18 * 8 + 8 * 4 + || r[4] != 9 + || o[0] != 1 + || o[1] != 25 * 16 + 29 * 8 + 9 * 4 + || o[2] != 27 * 16 + 18 * 8 + 10 * 4 + || o[3] != 2) + abort (); + } + if (a[0] != 7 * 16 + 4 * 8 + 2 * 4 + || a[1] != 17 * 16 + 5 * 8 + 3 * 4 + || b[0] != 9 || b[1] != 11 + || b[2] != 1LL << (16 + 4) + || b[3] != 1LL << (8 + 4) + || b[4] != 1LL << (16 + 8) + || b[5] != 13 || b[6] != 15 + || c[0] != 6 * 16 + 4 * 8 + 4 * 4 + || c[1] != 5 * 8 + 9 * 4 + || e[0] != 5 + || e[1] != 19 * 16 + 6 * 8 + 16 * 4 + || e[2] != 5 + || f[0] != 6 + || f[1] != 7 + || f[2] != 21 * 16 + 7 * 8 + 8 * 4 + || f[3] != 23 * 16 + 18 * 8 + 8 * 4 + || f[4] != 9 + || g[0] != 1 + || g[1] != 25 * 16 + 29 * 8 + 9 * 4 + || g[2] != 27 * 16 + 18 * 8 + 10 * 4 + || g[3] != 2 + || h[0] != 29 * 16 + 19 * 8 + 11 * 4 + || h[1] != 1 || h[2] != 4 + || k[0][0] != 5 || k[0][1] != 6 + || k[1][0] != 31 * 16 + 17 * 8 + || k[1][1] != 13 * 4 + || k[2][0] != 19 * 8 + || k[2][1] != 33 * 16 + 15 * 4 + || k[3][0] != 7 || k[3][1] != 8 + || p[0] != 29 * 16 + 19 * 8 + 11 * 4 + || p[1] != 1 || p[2] != 4 + || q[0][0] != 5 || q[0][1] != 6 + || q[1][0] != 31 * 16 + 17 * 8 + || q[1][1] != 13 * 4 + || q[2][0] != 19 * 8 + || q[2][1] != 33 * 16 + 15 * 4 + || q[3][0] != 7 || q[3][1] != 8 + || sb[0] != 5 + || sb[1] != 1LL << (16 + 4) + || sb[2] != 1LL << 8 + || sb[3] != 6 + || tb[0][0] != 9 || tb[0][1] != 10 || tb[1][0] != 11 || tb[1][1] != 12 + || tb[2][0] != 1LL << (16 + 8) + || tb[2][1] != 1LL << 4 + || tb[3][0] != 1LL << 8 + || tb[3][1] != 1LL << (16 + 4) + || tb[4][0] != 13 || tb[4][1] != 14) + abort (); +} + +int +main () +{ + int n = 1; + test (n); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-5.C b/libgomp/testsuite/libgomp.c++/task-reduction-5.C new file mode 100644 index 00000000000..59583f15c82 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-5.C @@ -0,0 +1,320 @@ +extern "C" void abort (); + +struct S { S (); S (long int, long int); ~S (); static int cnt1, cnt2, cnt3; long int s, t; }; + +int S::cnt1; +int S::cnt2; +int S::cnt3; + +S::S () +{ + #pragma omp atomic + cnt1++; +} + +S::S (long int x, long int y) : s (x), t (y) +{ + #pragma omp atomic update + ++cnt2; +} + +S::~S () +{ + #pragma omp atomic + cnt3 = cnt3 + 1; + if (t < 3 || t > 9 || (t & 1) == 0) + abort (); +} + +void +bar (S *p, S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +baz (S *o, S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3)) +#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig)) + +S a[2] = { { 0, 7 }, { 0, 7 } }; +S b[7] = { { 9, 5 }, { 11, 5 }, { 1, 5 }, { 1, 5 }, { 1, 5 }, { 13, 5 }, { 15, 5 } }; +S e[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; +S f[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; +S g[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; +S h[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; +S k[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; +S *s; +S (*t)[2]; + +void +foo (int n, S *c, S *d, S m[3], S *r, S o[4], S *p, S q[4][2]) +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + a[0].s += 7; + a[1].s += 17; + b[2].s *= 2; + b[4].s *= 2; + c[0].s += 6; + d[1].s *= 2; + e[1].s += 19; + f[2].s += 21; + f[3].s += 23; + g[1].s += 25; + g[2].s += 27; + h[0].s += 29; + k[1][0].s += 31; + k[2][1].s += 33; + m[1].s += 19; + r[2].s += 21; + r[3].s += 23; + o[1].s += 25; + o[2].s += 27; + p[0].s += 29; + q[1][0].s += 31; + q[2][1].s += 33; + s[1].s *= 2; + t[2][0].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + } +} + +void +test (int n) +{ + S c[2] = { { 0, 7 }, { 0, 7 } }; + S p[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; + S q[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; + S ss[4] = { { 5, 5 }, { 1, 5 }, { 1, 5 }, { 6, 5 } }; + S tt[5][2] = { { { 9, 5 }, { 10, 5 } }, { { 11, 5 }, { 12, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 13, 5 }, { 14, 5 } } }; + s = ss; + t = tt; + #pragma omp parallel + #pragma omp single + { + S d[] = { { 1, 5 }, { 1, 5 } }; + S m[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; + S r[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; + S o[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; + #pragma omp taskgroup task_reduction (+: a, c) task_reduction (*: b[2 * n:3 * n], d) \ + task_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + task_reduction (+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + task_reduction (*: t[2:2][:], s[1:n + 1]) + { + int i; + for (i = 0; i < 4; i++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \ + in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \ + in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + int j; + a[0].s += 2; + a[1].s += 3; + b[2].s *= 2; + f[3].s += 8; + g[1].s += 9; + g[2].s += 10; + h[0].s += 11; + k[1][1].s += 13; + k[2][1].s += 15; + m[1].s += 16; + r[2].s += 8; + s[1].s *= 2; + t[2][1].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) + { + m[1].s += 6; + r[2].s += 7; + q[1][0].s += 17; + q[2][0].s += 19; + a[0].s += 4; + a[1].s += 5; + b[3].s *= 2; + b[4].s *= 2; + f[3].s += 18; + g[1].s += 29; + g[2].s += 18; + h[0].s += 19; + s[2].s *= 2; + t[2][0].s *= 2; + t[3][0].s *= 2; + foo (n, c, d, m, r, o, p, q); + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + r[3].s += 18; + o[1].s += 29; + o[2].s += 18; + p[0].s += 19; + c[0].s += 4; + c[1].s += 5; + d[0].s *= 2; + e[1].s += 6; + f[2].s += 7; + k[1][0].s += 17; + k[2][0].s += 19; + } + r[3].s += 8; + o[1].s += 9; + o[2].s += 10; + p[0].s += 11; + q[1][1].s += 13; + q[2][1].s += 15; + b[3].s *= 2; + c[0].s += 4; + c[1].s += 9; + d[0].s *= 2; + e[1].s += 16; + f[2].s += 8; + } + } + if (d[0].s != 1LL << (8 + 4) + || d[1].s != 1LL << 16 + || m[0].s != 5 + || m[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || m[2].s != 5 + || r[0].s != 6 + || r[1].s != 7 + || r[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || r[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || r[4].s != 9 + || o[0].s != 1 + || o[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || o[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || o[3].s != 2) + abort (); + if (e[1].t != 7 || h[0].t != 7 || m[1].t != 7 || p[0].t != 7) + abort (); + for (int z = 0; z < 2; z++) + if (a[z].t != 7 || c[z].t != 7 || d[z].t != 5 || f[z + 2].t != 7 + || g[z + 1].t != 7 || r[z + 2].t != 7 || s[z + 1].t != 5 || o[z + 1].t != 7 + || k[z + 1][0].t != 7 || k[z + 1][1].t != 7 || q[z + 1][0].t != 7 || q[z + 1][1].t != 7 + || t[z + 2][0].t != 5 || t[z + 2][1].t != 5) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5) + abort (); + } + if (a[0].s != 7 * 16 + 4 * 8 + 2 * 4 + || a[1].s != 17 * 16 + 5 * 8 + 3 * 4 + || b[0].s != 9 || b[1].s != 11 + || b[2].s != 1LL << (16 + 4) + || b[3].s != 1LL << (8 + 4) + || b[4].s != 1LL << (16 + 8) + || b[5].s != 13 || b[6].s != 15 + || c[0].s != 6 * 16 + 4 * 8 + 4 * 4 + || c[1].s != 5 * 8 + 9 * 4 + || e[0].s != 5 + || e[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || e[2].s != 5 + || f[0].s != 6 + || f[1].s != 7 + || f[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || f[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || f[4].s != 9 + || g[0].s != 1 + || g[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || g[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || g[3].s != 2 + || h[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || h[1].s != 1 || h[2].s != 4 + || k[0][0].s != 5 || k[0][1].s != 6 + || k[1][0].s != 31 * 16 + 17 * 8 + || k[1][1].s != 13 * 4 + || k[2][0].s != 19 * 8 + || k[2][1].s != 33 * 16 + 15 * 4 + || k[3][0].s != 7 || k[3][1].s != 8 + || p[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || p[1].s != 1 || p[2].s != 4 + || q[0][0].s != 5 || q[0][1].s != 6 + || q[1][0].s != 31 * 16 + 17 * 8 + || q[1][1].s != 13 * 4 + || q[2][0].s != 19 * 8 + || q[2][1].s != 33 * 16 + 15 * 4 + || q[3][0].s != 7 || q[3][1].s != 8 + || ss[0].s != 5 + || ss[1].s != 1LL << (16 + 4) + || ss[2].s != 1LL << 8 + || ss[3].s != 6 + || tt[0][0].s != 9 || tt[0][1].s != 10 || tt[1][0].s != 11 || tt[1][1].s != 12 + || tt[2][0].s != 1LL << (16 + 8) + || tt[2][1].s != 1LL << 4 + || tt[3][0].s != 1LL << 8 + || tt[3][1].s != 1LL << (16 + 4) + || tt[4][0].s != 13 || tt[4][1].s != 14) + abort (); +} + +int +main () +{ + int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3; + test (1); + if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-6.C b/libgomp/testsuite/libgomp.c++/task-reduction-6.C new file mode 100644 index 00000000000..d7f69da219b --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-6.C @@ -0,0 +1,341 @@ +extern "C" void abort (); + +struct S { S (); S (long int, long int); ~S (); static int cnt1, cnt2, cnt3; long int s, t; }; + +int S::cnt1; +int S::cnt2; +int S::cnt3; + +S::S () +{ + #pragma omp atomic + cnt1++; +} + +S::S (long int x, long int y) : s (x), t (y) +{ + #pragma omp atomic update + ++cnt2; +} + +S::~S () +{ + #pragma omp atomic + cnt3 = cnt3 + 1; + if (t < 3 || t > 9 || (t & 1) == 0) + abort (); +} + +void +bar (S *p, S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +baz (S *o, S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3)) +#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig)) + +S as[2] = { { 0, 7 }, { 0, 7 } }; +S (&a)[2] = as; +S bs[7] = { { 9, 5 }, { 11, 5 }, { 1, 5 }, { 1, 5 }, { 1, 5 }, { 13, 5 }, { 15, 5 } }; +S (&b)[7] = bs; +S es[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; +S (&e)[3] = es; +S fs[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; +S (&f)[5] = fs; +S gs[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; +S (&g)[4] = gs; +S hs[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; +S (&h)[3] = hs; +S ks[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; +S (&k)[4][2] = ks; +S *ss; +S *&s = ss; +S (*ts)[2]; +S (*&t)[2] = ts; + +void +foo (int &n, S *&c, S *&d, S (&m)[3], S *&r, S (&o)[4], S *&p, S (&q)[4][2]) +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + a[0].s += 7; + a[1].s += 17; + b[2].s *= 2; + b[4].s *= 2; + c[0].s += 6; + d[1].s *= 2; + e[1].s += 19; + f[2].s += 21; + f[3].s += 23; + g[1].s += 25; + g[2].s += 27; + h[0].s += 29; + k[1][0].s += 31; + k[2][1].s += 33; + m[1].s += 19; + r[2].s += 21; + r[3].s += 23; + o[1].s += 25; + o[2].s += 27; + p[0].s += 29; + q[1][0].s += 31; + q[2][1].s += 33; + s[1].s *= 2; + t[2][0].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + } +} + +void +test (int &n) +{ + S cs[2] = { { 0, 7 }, { 0, 7 } }; + S (&c)[2] = cs; + S ps[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; + S (&p)[3] = ps; + S qs[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; + S (&q)[4][2] = qs; + S sb[4] = { { 5, 5 }, { 1, 5 }, { 1, 5 }, { 6, 5 } }; + S tb[5][2] = { { { 9, 5 }, { 10, 5 } }, { { 11, 5 }, { 12, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 13, 5 }, { 14, 5 } } }; + S ms[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; + S os[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; + s = sb; + t = tb; + #pragma omp parallel + #pragma omp single + { + S ds[] = { { 1, 5 }, { 1, 5 } }; + S (&d)[2] = ds; + S (&m)[3] = ms; + S rs[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; + S (&r)[5] = rs; + S (&o)[4] = os; + #pragma omp taskgroup task_reduction (+: a, c) task_reduction (*: b[2 * n:3 * n], d) \ + task_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + task_reduction (+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + task_reduction (*: t[2:2][:], s[1:n + 1]) + { + int i; + for (i = 0; i < 4; i++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \ + in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \ + in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + int j; + a[0].s += 2; + a[1].s += 3; + b[2].s *= 2; + f[3].s += 8; + g[1].s += 9; + g[2].s += 10; + h[0].s += 11; + k[1][1].s += 13; + k[2][1].s += 15; + m[1].s += 16; + r[2].s += 8; + s[1].s *= 2; + t[2][1].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) + { + m[1].s += 6; + r[2].s += 7; + q[1][0].s += 17; + q[2][0].s += 19; + a[0].s += 4; + a[1].s += 5; + b[3].s *= 2; + b[4].s *= 2; + f[3].s += 18; + g[1].s += 29; + g[2].s += 18; + h[0].s += 19; + s[2].s *= 2; + t[2][0].s *= 2; + t[3][0].s *= 2; + S *cp = c; + S *dp = d; + S *rp = r; + S *pp = p; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + foo (n, cp, dp, m, rp, o, pp, q); + r[3].s += 18; + o[1].s += 29; + o[2].s += 18; + p[0].s += 19; + c[0].s += 4; + c[1].s += 5; + d[0].s *= 2; + e[1].s += 6; + f[2].s += 7; + k[1][0].s += 17; + k[2][0].s += 19; + } + r[3].s += 8; + o[1].s += 9; + o[2].s += 10; + p[0].s += 11; + q[1][1].s += 13; + q[2][1].s += 15; + b[3].s *= 2; + c[0].s += 4; + c[1].s += 9; + d[0].s *= 2; + e[1].s += 16; + f[2].s += 8; + } + } + if (d[0].s != 1LL << (8 + 4) + || d[1].s != 1LL << 16 + || m[0].s != 5 + || m[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || m[2].s != 5 + || r[0].s != 6 + || r[1].s != 7 + || r[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || r[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || r[4].s != 9 + || o[0].s != 1 + || o[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || o[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || o[3].s != 2) + abort (); + if (e[1].t != 7 || h[0].t != 7 || m[1].t != 7 || p[0].t != 7) + abort (); + for (int z = 0; z < 2; z++) + if (a[z].t != 7 || c[z].t != 7 || d[z].t != 5 || f[z + 2].t != 7 + || g[z + 1].t != 7 || r[z + 2].t != 7 || s[z + 1].t != 5 || o[z + 1].t != 7 + || k[z + 1][0].t != 7 || k[z + 1][1].t != 7 || q[z + 1][0].t != 7 || q[z + 1][1].t != 7 + || t[z + 2][0].t != 5 || t[z + 2][1].t != 5) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5) + abort (); + } + if (a[0].s != 7 * 16 + 4 * 8 + 2 * 4 + || a[1].s != 17 * 16 + 5 * 8 + 3 * 4 + || b[0].s != 9 || b[1].s != 11 + || b[2].s != 1LL << (16 + 4) + || b[3].s != 1LL << (8 + 4) + || b[4].s != 1LL << (16 + 8) + || b[5].s != 13 || b[6].s != 15 + || c[0].s != 6 * 16 + 4 * 8 + 4 * 4 + || c[1].s != 5 * 8 + 9 * 4 + || e[0].s != 5 + || e[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || e[2].s != 5 + || f[0].s != 6 + || f[1].s != 7 + || f[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || f[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || f[4].s != 9 + || g[0].s != 1 + || g[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || g[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || g[3].s != 2 + || h[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || h[1].s != 1 || h[2].s != 4 + || k[0][0].s != 5 || k[0][1].s != 6 + || k[1][0].s != 31 * 16 + 17 * 8 + || k[1][1].s != 13 * 4 + || k[2][0].s != 19 * 8 + || k[2][1].s != 33 * 16 + 15 * 4 + || k[3][0].s != 7 || k[3][1].s != 8 + || p[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || p[1].s != 1 || p[2].s != 4 + || q[0][0].s != 5 || q[0][1].s != 6 + || q[1][0].s != 31 * 16 + 17 * 8 + || q[1][1].s != 13 * 4 + || q[2][0].s != 19 * 8 + || q[2][1].s != 33 * 16 + 15 * 4 + || q[3][0].s != 7 || q[3][1].s != 8 + || sb[0].s != 5 + || sb[1].s != 1LL << (16 + 4) + || sb[2].s != 1LL << 8 + || sb[3].s != 6 + || tb[0][0].s != 9 || tb[0][1].s != 10 || tb[1][0].s != 11 || tb[1][1].s != 12 + || tb[2][0].s != 1LL << (16 + 8) + || tb[2][1].s != 1LL << 4 + || tb[3][0].s != 1LL << 8 + || tb[3][1].s != 1LL << (16 + 4) + || tb[4][0].s != 13 || tb[4][1].s != 14) + abort (); +} + +int +main () +{ + int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3; + int n = 1; + test (n); + if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-7.C b/libgomp/testsuite/libgomp.c++/task-reduction-7.C new file mode 100644 index 00000000000..2a4d82ef77b --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-7.C @@ -0,0 +1,145 @@ +typedef __SIZE_TYPE__ size_t; +extern "C" void abort (); + +void +bar (int *a, int *b, int *c, int (*d)[2], int (*e)[4], int *f, int *g, size_t n) +{ + #pragma omp task in_reduction (*: a[:n], b[3:n], c[n:n], d[0][:n], e[0][1:n], f[:n], g[1:n]) + { + a[0] *= 12; + a[1] *= 13; + b[3] *= 14; + b[4] *= 15; + c[n] *= 16; + c[n + 1] *= 17; + d[0][0] *= 18; + d[0][1] *= 19; + e[0][1] *= 20; + e[0][2] *= 21; + f[0] *= 22; + f[1] *= 23; + g[1] *= 24; + g[2] *= 25; + } +} + +void +foo (size_t n, void *x, void *y) +{ + int a[n], b[n + 3], c[2 * n]; + int (*d)[n] = (int (*)[n]) x; + int (*e)[n * 2] = (int (*)[n * 2]) y; + int fb[n], gb[n * 2]; + int (&f)[n] = fb; + int (&g)[n * 2] = gb; + int i; + for (i = 0; i < n; i++) + { + a[i] = 1; + b[i + 3] = 1; + c[i + n] = 1; + d[0][i] = 1; + e[0][i + 1] = 1; + f[i] = 1; + g[i + 1] = 1; + } + #pragma omp taskgroup task_reduction (*: a, b[3:n], c[n:n], d[0][:n], e[0][1:n], f, g[1:n]) + { + bar (a, b, c, (int (*)[2]) d, (int (*)[4]) e, &f[0], &g[0], n); + #pragma omp task in_reduction (*: a, b[3:n], c[n:n], d[0][:n], e[0][1:n], f, g[1:n]) + { + a[0] *= 2; + a[1] *= 3; + b[3] *= 4; + b[4] *= 5; + c[n] *= 6; + c[n + 1] *= 7; + d[0][0] *= 8; + d[0][1] *= 9; + e[0][1] *= 10; + e[0][2] *= 11; + f[0] *= 12; + f[1] *= 13; + g[1] *= 14; + g[2] *= 15; + } + n = 0; + } + if (a[0] != 24 || a[1] != 39 || b[3] != 56 || b[4] != 75) + abort (); + if (c[2] != 96 || c[3] != 119 || d[0][0] != 144 || d[0][1] != 171) + abort (); + if (e[0][1] != 200 || e[0][2] != 231 || f[0] != 264 || f[1] != 299) + abort (); + if (g[1] != 336 || g[2] != 375) + abort (); +} + +void +baz (size_t n, void *x, void *y) +{ + int a[n], b[n + 3], c[2 * n]; + int (*d)[n] = (int (*)[n]) x; + int (*e)[n * 2] = (int (*)[n * 2]) y; + int fb[n], gb[n * 2]; + int i; + for (i = 0; i < n; i++) + { + a[i] = 1; + b[i + 3] = 1; + c[i + n] = 1; + d[0][i] = 1; + e[0][i + 1] = 1; + fb[i] = 1; + gb[i + 1] = 1; + } + #pragma omp parallel num_threads(2) + #pragma omp master + { + int (&f)[n] = fb; + int (&g)[n * 2] = gb; + #pragma omp taskgroup task_reduction (*: a, b[3:n], c[n:n], d[0][:n], e[0][1:n], f, g[1:n]) + { + bar (a, b, c, (int (*)[2]) d, (int (*)[4]) e, &f[0], &g[0], n); + #pragma omp task in_reduction (*: a, b[3:n], c[n:n], d[0][:n], e[0][1:n], f, g[1:n]) + { + a[0] *= 2; + a[1] *= 3; + b[3] *= 4; + b[4] *= 5; + c[n] *= 6; + c[n + 1] *= 7; + d[0][0] *= 8; + d[0][1] *= 9; + e[0][1] *= 10; + e[0][2] *= 11; + f[0] *= 12; + f[1] *= 13; + g[1] *= 14; + g[2] *= 15; + } + n = 0; + } + } + if (a[0] != 24 || a[1] != 39 || b[3] != 56 || b[4] != 75) + abort (); + if (c[2] != 96 || c[3] != 119 || d[0][0] != 144 || d[0][1] != 171) + abort (); + if (e[0][1] != 200 || e[0][2] != 231 || fb[0] != 264 || fb[1] != 299) + abort (); + if (gb[1] != 336 || gb[2] != 375) + abort (); +} + +int +main () +{ + int d[2], e[4]; + volatile int two; + two = 2; + #pragma omp parallel num_threads (2) + #pragma omp master + foo (two, (void *) d, (void *) e); + baz (two, (void *) d, (void *) e); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-8.C b/libgomp/testsuite/libgomp.c++/task-reduction-8.C new file mode 100644 index 00000000000..26737a32acb --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-8.C @@ -0,0 +1,70 @@ +extern "C" void abort (); + +int as; +int &a = as; +long int bs = 1; +long int &b = bs; + +template <typename T, typename U> +void +foo (T &c, U &d) +{ + T i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (*: d) in_reduction (+: c) \ + in_reduction (+: a) in_reduction (*: b) + { + a += 7; + b *= 2; + c += 9; + d *= 3; + } +} + +template <typename T, typename U> +void +bar () +{ + T cs = 0; + T &c = cs; + U ds = 1; + #pragma omp parallel if (0) + { + U &d = ds; + #pragma omp parallel reduction (task, +: a, c) reduction (task, *: b, d) + { + T i; + #pragma omp for + for (i = 0; i < 4; i++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b, d) + { + T j; + a += 7; + b *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b, d) + { + a += 7; + b *= 2; + c += 9; + d *= 3; + foo (c, d); + } + c += 9; + d *= 3; + } + } +#define THREEP4 (3LL * 3LL * 3LL * 3LL) + if (d != (THREEP4 * THREEP4 * THREEP4 * THREEP4 * THREEP4 * THREEP4 + * THREEP4)) + abort (); + } + if (a != 28 * 7 || b != (1L << 28) || c != 28 * 9) + abort (); +} + +int +main () +{ + bar<int, long long int> (); +} diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-9.C b/libgomp/testsuite/libgomp.c++/task-reduction-9.C new file mode 100644 index 00000000000..068a7bb7b5d --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-9.C @@ -0,0 +1,128 @@ +#include <omp.h> +#include <stdlib.h> + +struct S { S (); S (long int, long int); ~S (); static int cnt1, cnt2, cnt3; long int s, t; }; + +int S::cnt1; +int S::cnt2; +int S::cnt3; + +S::S () +{ + #pragma omp atomic + cnt1++; +} + +S::S (long int x, long int y) : s (x), t (y) +{ + #pragma omp atomic update + ++cnt2; +} + +S::~S () +{ + #pragma omp atomic + cnt3 = cnt3 + 1; + if (t < 3 || t > 9 || (t & 1) == 0) + abort (); +} + +void +bar (S *p, S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +baz (S *o, S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3)) +#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig)) + +S a = { 0, 7 }; +S b (1, 5); + +void +foo () +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (*: b) in_reduction (+: a) + { + a.s += 7; + b.s *= 2; + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9)) + abort (); + } +} + +void +test () +{ + S c = { 0, 7 }; + int t; + #pragma omp parallel num_threads (1) + { + S d (1, 5); + int r = 0; + #pragma omp parallel reduction (task, +: a, c) reduction (task, *: b, d) \ + reduction (+: r) + { + int i; + #pragma omp master + t = omp_get_num_threads (); + r++; + a.s += 3; + c.s += 4; + #pragma omp for + for (i = 0; i < 4; i++) + #pragma omp task in_reduction (*: b, d) in_reduction (+: a, c) + { + int j; + a.s += 7; + b.s *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a) in_reduction (*: b) \ + in_reduction (+: c) in_reduction (*: d) + { + a.s += 7; + b.s *= 2; + c.s += 9; + d.s *= 3; + foo (); + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } + c.s += 9; + d.s *= 3; + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } + } +#define THREEP4 (3L * 3L * 3L * 3L) + if (d.s != (THREEP4 * THREEP4 * THREEP4) || d.t != 5 || r != t) + abort (); + } + if (a.s != 28 * 7 + 3 * t || a.t != 7 || b.s != (1L << 28) || b.t != 5 + || c.s != 12 * 9 + 4 * t || c.t != 7) + abort (); +} + +int +main () +{ + int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3; + test (); + if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3) + abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/taskloop-reduction-1.C b/libgomp/testsuite/libgomp.c++/taskloop-reduction-1.C new file mode 100644 index 00000000000..f7fb9cac9e5 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/taskloop-reduction-1.C @@ -0,0 +1,153 @@ +extern "C" void abort (); + +struct S { S (); S (unsigned long long int, int); ~S (); static int cnt1, cnt2, cnt3; unsigned long long int s; int t; }; + +int S::cnt1; +int S::cnt2; +int S::cnt3; + +S::S () +{ + #pragma omp atomic + cnt1++; +} + +S::S (unsigned long long int x, int y) : s (x), t (y) +{ + #pragma omp atomic update + ++cnt2; +} + +S::~S () +{ + #pragma omp atomic + cnt3 = cnt3 + 1; + if (t < 3 || t > 9 || (t & 1) == 0) + abort (); +} + +void +rbar (S *p, S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +rbaz (S *o, S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) \ + initializer (omp_priv (0, 3)) +#pragma omp declare reduction (*: S : rbaz (&omp_out, &omp_in)) \ + initializer (rbar (&omp_priv, &omp_orig)) + +S gs = { 0, 7 }; +S &g = gs; +S hs (1, 5); +S &h = hs; + +int +foo (int *a, int &b) +{ + int xs = 0; + int &x = xs; + #pragma omp taskloop reduction (+:x) in_reduction (+:b) + for (int i = 0; i < 64; i++) + { + x += a[i]; + b += a[i] * 2; + } + return x; +} + +unsigned long long int +bar (int *a, unsigned long long int &b) +{ + unsigned long long int xs = 1; + unsigned long long int &x = xs; + #pragma omp taskloop reduction (*:x) in_reduction (*:b) + for (int i = 0; i < 64; i++) + { + #pragma omp task in_reduction (*:x) + x *= a[i]; + #pragma omp task in_reduction (*:b) + b *= (3 - a[i]); + } + return x; +} + +void +baz (int i, int *a, int *c) +{ + #pragma omp task in_reduction (*:h) in_reduction (+:g) + { + g.s += 7 * a[i]; + h.s *= (3 - c[i]); + if ((g.t != 7 && g.t != 3) || (h.t != 5 && h.t != 9)) + abort (); + } +} + +void +test () +{ + int i, j, a[64], b = 0, c[64]; + unsigned long long int d = 1, e; + S ms (0, 7); + for (i = 0; i < 64; i++) + { + a[i] = 2 * i; + c[i] = 1 + ((i % 3) != 1); + } + #pragma omp parallel + #pragma omp master + { + S ns = { 1, 5 }; + S &m = ms; + S &n = ns; + #pragma omp taskgroup task_reduction (+:b) + j = foo (a, b); + #pragma omp taskgroup task_reduction (*:d) + e = bar (c, d); + #pragma omp taskloop reduction (+: g, m) reduction (*: h, n) + for (i = 0; i < 64; ++i) + { + g.s += 3 * a[i]; + h.s *= (3 - c[i]); + m.s += 4 * a[i]; + n.s *= c[i]; + if ((g.t != 7 && g.t != 3) || (h.t != 5 && h.t != 9) + || (m.t != 7 && m.t != 3) || (n.t != 5 && n.t != 9)) + abort (); + baz (i, a, c); + } + if (n.s != (1ULL << 43) || n.t != 5) + abort (); + } + if (j != 63 * 64 || b != 63 * 64 * 2) + abort (); + if (e != (1ULL << 43) || d != (1ULL << 21)) + abort (); + if (g.s != 63 * 64 * 10 || g.t != 7) + abort (); + if (h.s != (1ULL << 42) || h.t != 5) + abort (); + if (ms.s != 63 * 64 * 4 || ms.t != 7) + abort (); +} + +int +main () +{ + int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3; + test (); + if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3) + abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/taskloop-reduction-2.C b/libgomp/testsuite/libgomp.c++/taskloop-reduction-2.C new file mode 100644 index 00000000000..f1de5dae2ea --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/taskloop-reduction-2.C @@ -0,0 +1,253 @@ +extern "C" void abort (); + +int as[2]; +int (&a)[2] = as; +long long int bs[7] = { 9, 11, 1, 1, 1, 13, 15 }; +long long int (&b)[7] = bs; +int es[3] = { 5, 0, 5 }; +int (&e)[3] = es; +int fs[5] = { 6, 7, 0, 0, 9 }; +int (&f)[5] = fs; +int gs[4] = { 1, 0, 0, 2 }; +int (&g)[4] = gs; +int hs[3] = { 0, 1, 4 }; +int (&h)[3] = hs; +int ks[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; +int (&k)[4][2] = ks; +long long *ss; +long long *&s = ss; +long long (*ts)[2]; +long long (*&t)[2] = ts; +struct U { U (); ~U () {}; U (const U &); int u[4]; }; + +U::U +() +{ + u[0] = 0; u[1] = 1; u[2] = 2; u[3] = 3; +} + +U::U +(const U &r) +{ + u[0] = r.u[0]; u[1] = r.u[1]; u[2] = r.u[2]; u[3] = r.u[3]; +} + +void +foo (int &n, int *&c, long long int *&d, int (&m)[3], int *&r, int (&o)[4], int *&p, int (&q)[4][2]) +{ + int i; + U u; + u.u[2] = 8; + #pragma omp taskloop in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) firstprivate (u) nogroup + for (i = 0; i < 2; i++) + { + a[0] += 7; + a[1] += 17; + b[2] *= 2; + b[4] *= 2; + c[0] += 6; + d[1] *= 2; + e[1] += 19; + f[2] += 21; + f[3] += 23; + g[1] += 25; + g[2] += 27; + h[0] += 29; + k[1][0] += 31; + k[2][1] += 33; + m[1] += 19; + r[2] += 21; + r[3] += 23; + o[1] += 25; + o[2] += 27; + p[0] += 29; + q[1][0] += 31; + q[2][1] += 33; + s[1] *= 2; + t[2][0] *= 2; + t[3][1] *= 2; + if (u.u[2] != 8) + abort (); + } +} + +void +test (int &n) +{ + int cs[2] = { 0, 0 }; + int (&c)[2] = cs; + int ps[3] = { 0, 1, 4 }; + int (&p)[3] = ps; + int qs[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; + int (&q)[4][2] = qs; + long long sb[4] = { 5, 1, 1, 6 }; + long long tb[5][2] = { { 9, 10 }, { 11, 12 }, { 1, 1 }, { 1, 1 }, { 13, 14 } }; + int ms[3] = { 5, 0, 5 }; + int os[4] = { 1, 0, 0, 2 }; + s = sb; + t = tb; + U u; + u.u[2] = 10; + #pragma omp parallel + #pragma omp single + { + long long int ds[] = { 1, 1 }; + long long int (&d)[2] = ds; + int (&m)[3] = ms; + int rs[5] = { 6, 7, 0, 0, 9 }; + int (&r)[5] = rs; + int (&o)[4] = os; + int i; + #pragma omp taskloop reduction (+: a, c) reduction (*: b[2 * n:3 * n], d) \ + reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + reduction (+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + reduction (*: t[2:2][:], s[1:n + 1]) firstprivate (u) + for (i = 0; i < 4; i++) + { + int j; + a[0] += 2; + a[1] += 3; + b[2] *= 2; + f[3] += 8; + g[1] += 9; + g[2] += 10; + h[0] += 11; + k[1][1] += 13; + k[2][1] += 15; + m[1] += 16; + r[2] += 8; + s[1] *= 2; + t[2][1] *= 2; + t[3][1] *= 2; + if (u.u[2] != 10) + abort (); + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) firstprivate (u) + { + m[1] += 6; + r[2] += 7; + q[1][0] += 17; + q[2][0] += 19; + a[0] += 4; + a[1] += 5; + b[3] *= 2; + b[4] *= 2; + f[3] += 18; + g[1] += 29; + g[2] += 18; + h[0] += 19; + s[2] *= 2; + t[2][0] *= 2; + t[3][0] *= 2; + int *cp = c; + long long int *dp = d; + int *rp = r; + int *pp = p; + foo (n, cp, dp, m, rp, o, pp, q); + if (u.u[2] != 10) + abort (); + r[3] += 18; + o[1] += 29; + o[2] += 18; + p[0] += 19; + c[0] += 4; + c[1] += 5; + d[0] *= 2; + e[1] += 6; + f[2] += 7; + k[1][0] += 17; + k[2][0] += 19; + } + r[3] += 8; + o[1] += 9; + o[2] += 10; + p[0] += 11; + q[1][1] += 13; + q[2][1] += 15; + b[3] *= 2; + c[0] += 4; + c[1] += 9; + d[0] *= 2; + e[1] += 16; + f[2] += 8; + } + if (d[0] != 1LL << (8 + 4) + || d[1] != 1LL << 16 + || m[0] != 5 + || m[1] != 19 * 16 + 6 * 8 + 16 * 4 + || m[2] != 5 + || r[0] != 6 + || r[1] != 7 + || r[2] != 21 * 16 + 7 * 8 + 8 * 4 + || r[3] != 23 * 16 + 18 * 8 + 8 * 4 + || r[4] != 9 + || o[0] != 1 + || o[1] != 25 * 16 + 29 * 8 + 9 * 4 + || o[2] != 27 * 16 + 18 * 8 + 10 * 4 + || o[3] != 2) + abort (); + } + if (a[0] != 7 * 16 + 4 * 8 + 2 * 4 + || a[1] != 17 * 16 + 5 * 8 + 3 * 4 + || b[0] != 9 || b[1] != 11 + || b[2] != 1LL << (16 + 4) + || b[3] != 1LL << (8 + 4) + || b[4] != 1LL << (16 + 8) + || b[5] != 13 || b[6] != 15 + || c[0] != 6 * 16 + 4 * 8 + 4 * 4 + || c[1] != 5 * 8 + 9 * 4 + || e[0] != 5 + || e[1] != 19 * 16 + 6 * 8 + 16 * 4 + || e[2] != 5 + || f[0] != 6 + || f[1] != 7 + || f[2] != 21 * 16 + 7 * 8 + 8 * 4 + || f[3] != 23 * 16 + 18 * 8 + 8 * 4 + || f[4] != 9 + || g[0] != 1 + || g[1] != 25 * 16 + 29 * 8 + 9 * 4 + || g[2] != 27 * 16 + 18 * 8 + 10 * 4 + || g[3] != 2 + || h[0] != 29 * 16 + 19 * 8 + 11 * 4 + || h[1] != 1 || h[2] != 4 + || k[0][0] != 5 || k[0][1] != 6 + || k[1][0] != 31 * 16 + 17 * 8 + || k[1][1] != 13 * 4 + || k[2][0] != 19 * 8 + || k[2][1] != 33 * 16 + 15 * 4 + || k[3][0] != 7 || k[3][1] != 8 + || p[0] != 29 * 16 + 19 * 8 + 11 * 4 + || p[1] != 1 || p[2] != 4 + || q[0][0] != 5 || q[0][1] != 6 + || q[1][0] != 31 * 16 + 17 * 8 + || q[1][1] != 13 * 4 + || q[2][0] != 19 * 8 + || q[2][1] != 33 * 16 + 15 * 4 + || q[3][0] != 7 || q[3][1] != 8 + || sb[0] != 5 + || sb[1] != 1LL << (16 + 4) + || sb[2] != 1LL << 8 + || sb[3] != 6 + || tb[0][0] != 9 || tb[0][1] != 10 || tb[1][0] != 11 || tb[1][1] != 12 + || tb[2][0] != 1LL << (16 + 8) + || tb[2][1] != 1LL << 4 + || tb[3][0] != 1LL << 8 + || tb[3][1] != 1LL << (16 + 4) + || tb[4][0] != 13 || tb[4][1] != 14) + abort (); +} + +int +main () +{ + int n = 1; + test (n); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/taskloop-reduction-3.C b/libgomp/testsuite/libgomp.c++/taskloop-reduction-3.C new file mode 100644 index 00000000000..0588e4744f9 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/taskloop-reduction-3.C @@ -0,0 +1,314 @@ +extern "C" void abort (); + +struct S { S (); S (long int, long int); ~S (); static int cnt1, cnt2, cnt3; long int s, t; }; + +int S::cnt1; +int S::cnt2; +int S::cnt3; + +S::S () +{ + #pragma omp atomic + cnt1++; +} + +S::S (long int x, long int y) : s (x), t (y) +{ + #pragma omp atomic update + ++cnt2; +} + +S::~S () +{ + #pragma omp atomic + cnt3 = cnt3 + 1; + if (t < 3 || t > 9 || (t & 1) == 0) + abort (); +} + +void +bar (S *p, S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +baz (S *o, S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3)) +#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig)) + +S a[2] = { { 0, 7 }, { 0, 7 } }; +S b[7] = { { 9, 5 }, { 11, 5 }, { 1, 5 }, { 1, 5 }, { 1, 5 }, { 13, 5 }, { 15, 5 } }; +S e[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; +S f[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; +S g[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; +S h[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; +S k[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; +S *s; +S (*t)[2]; + +void +foo (int n, S *c, S *d, S m[3], S *r, S o[4], S *p, S q[4][2]) +{ + int i; + #pragma omp taskloop in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) nogroup + for (i = 0; i < 2; i++) + { + a[0].s += 7; + a[1].s += 17; + b[2].s *= 2; + b[4].s *= 2; + c[0].s += 6; + d[1].s *= 2; + e[1].s += 19; + f[2].s += 21; + f[3].s += 23; + g[1].s += 25; + g[2].s += 27; + h[0].s += 29; + k[1][0].s += 31; + k[2][1].s += 33; + m[1].s += 19; + r[2].s += 21; + r[3].s += 23; + o[1].s += 25; + o[2].s += 27; + p[0].s += 29; + q[1][0].s += 31; + q[2][1].s += 33; + s[1].s *= 2; + t[2][0].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + } +} + +void +test (int n) +{ + S c[2] = { { 0, 7 }, { 0, 7 } }; + S p[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; + S q[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; + S ss[4] = { { 5, 5 }, { 1, 5 }, { 1, 5 }, { 6, 5 } }; + S tt[5][2] = { { { 9, 5 }, { 10, 5 } }, { { 11, 5 }, { 12, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 13, 5 }, { 14, 5 } } }; + s = ss; + t = tt; + #pragma omp parallel + #pragma omp single + { + S d[] = { { 1, 5 }, { 1, 5 } }; + S m[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; + S r[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; + S o[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; + int i; + #pragma omp taskloop reduction (+: a, c) reduction (*: b[2 * n:3 * n], d) \ + reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + reduction (+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + reduction (*: t[2:2][:], s[1:n + 1]) + for (i = 0; i < 4; i++) + { + int j; + a[0].s += 2; + a[1].s += 3; + b[2].s *= 2; + f[3].s += 8; + g[1].s += 9; + g[2].s += 10; + h[0].s += 11; + k[1][1].s += 13; + k[2][1].s += 15; + m[1].s += 16; + r[2].s += 8; + s[1].s *= 2; + t[2][1].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + #pragma omp taskloop in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) nogroup + for (j = 0; j < 2; j++) + { + m[1].s += 6; + r[2].s += 7; + q[1][0].s += 17; + q[2][0].s += 19; + a[0].s += 4; + a[1].s += 5; + b[3].s *= 2; + b[4].s *= 2; + f[3].s += 18; + g[1].s += 29; + g[2].s += 18; + h[0].s += 19; + s[2].s *= 2; + t[2][0].s *= 2; + t[3][0].s *= 2; + foo (n, c, d, m, r, o, p, q); + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + r[3].s += 18; + o[1].s += 29; + o[2].s += 18; + p[0].s += 19; + c[0].s += 4; + c[1].s += 5; + d[0].s *= 2; + e[1].s += 6; + f[2].s += 7; + k[1][0].s += 17; + k[2][0].s += 19; + } + r[3].s += 8; + o[1].s += 9; + o[2].s += 10; + p[0].s += 11; + q[1][1].s += 13; + q[2][1].s += 15; + b[3].s *= 2; + c[0].s += 4; + c[1].s += 9; + d[0].s *= 2; + e[1].s += 16; + f[2].s += 8; + } + if (d[0].s != 1LL << (8 + 4) + || d[1].s != 1LL << 16 + || m[0].s != 5 + || m[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || m[2].s != 5 + || r[0].s != 6 + || r[1].s != 7 + || r[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || r[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || r[4].s != 9 + || o[0].s != 1 + || o[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || o[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || o[3].s != 2) + abort (); + if (e[1].t != 7 || h[0].t != 7 || m[1].t != 7 || p[0].t != 7) + abort (); + for (int z = 0; z < 2; z++) + if (a[z].t != 7 || c[z].t != 7 || d[z].t != 5 || f[z + 2].t != 7 + || g[z + 1].t != 7 || r[z + 2].t != 7 || s[z + 1].t != 5 || o[z + 1].t != 7 + || k[z + 1][0].t != 7 || k[z + 1][1].t != 7 || q[z + 1][0].t != 7 || q[z + 1][1].t != 7 + || t[z + 2][0].t != 5 || t[z + 2][1].t != 5) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5) + abort (); + } + if (a[0].s != 7 * 16 + 4 * 8 + 2 * 4 + || a[1].s != 17 * 16 + 5 * 8 + 3 * 4 + || b[0].s != 9 || b[1].s != 11 + || b[2].s != 1LL << (16 + 4) + || b[3].s != 1LL << (8 + 4) + || b[4].s != 1LL << (16 + 8) + || b[5].s != 13 || b[6].s != 15 + || c[0].s != 6 * 16 + 4 * 8 + 4 * 4 + || c[1].s != 5 * 8 + 9 * 4 + || e[0].s != 5 + || e[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || e[2].s != 5 + || f[0].s != 6 + || f[1].s != 7 + || f[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || f[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || f[4].s != 9 + || g[0].s != 1 + || g[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || g[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || g[3].s != 2 + || h[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || h[1].s != 1 || h[2].s != 4 + || k[0][0].s != 5 || k[0][1].s != 6 + || k[1][0].s != 31 * 16 + 17 * 8 + || k[1][1].s != 13 * 4 + || k[2][0].s != 19 * 8 + || k[2][1].s != 33 * 16 + 15 * 4 + || k[3][0].s != 7 || k[3][1].s != 8 + || p[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || p[1].s != 1 || p[2].s != 4 + || q[0][0].s != 5 || q[0][1].s != 6 + || q[1][0].s != 31 * 16 + 17 * 8 + || q[1][1].s != 13 * 4 + || q[2][0].s != 19 * 8 + || q[2][1].s != 33 * 16 + 15 * 4 + || q[3][0].s != 7 || q[3][1].s != 8 + || ss[0].s != 5 + || ss[1].s != 1LL << (16 + 4) + || ss[2].s != 1LL << 8 + || ss[3].s != 6 + || tt[0][0].s != 9 || tt[0][1].s != 10 || tt[1][0].s != 11 || tt[1][1].s != 12 + || tt[2][0].s != 1LL << (16 + 8) + || tt[2][1].s != 1LL << 4 + || tt[3][0].s != 1LL << 8 + || tt[3][1].s != 1LL << (16 + 4) + || tt[4][0].s != 13 || tt[4][1].s != 14) + abort (); +} + +int +main () +{ + int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3; + test (1); + if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/taskloop-reduction-4.C b/libgomp/testsuite/libgomp.c++/taskloop-reduction-4.C new file mode 100644 index 00000000000..41c7040ff1a --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/taskloop-reduction-4.C @@ -0,0 +1,315 @@ +extern "C" void abort (); + +struct S { S (); S (long int, long int); ~S (); static int cnt1, cnt2, cnt3; long int s, t; }; + +int S::cnt1; +int S::cnt2; +int S::cnt3; + +S::S () +{ + #pragma omp atomic + cnt1++; +} + +S::S (long int x, long int y) : s (x), t (y) +{ + #pragma omp atomic update + ++cnt2; +} + +S::~S () +{ + #pragma omp atomic + cnt3 = cnt3 + 1; + if (t < 3 || t > 9 || (t & 1) == 0) + abort (); +} + +void +bar (S *p, S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +baz (S *o, S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3)) +#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig)) + +S a[2] = { { 0, 7 }, { 0, 7 } }; +S b[7] = { { 9, 5 }, { 11, 5 }, { 1, 5 }, { 1, 5 }, { 1, 5 }, { 13, 5 }, { 15, 5 } }; +S e[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; +S f[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; +S g[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; +S h[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; +S k[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; +S *s; +S (*t)[2]; + +template <int N> +void +foo (int n, S *c, S *d, S m[3], S *r, S o[4], S *p, S q[4][2]) +{ + int i; + #pragma omp taskloop in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + reduction (default, +: o[n:n*2], m[1], p[0]) in_reduction (+: k[1:2][:], f[2:2]) \ + reduction (+: q[1:2][:], r[2:2]) in_reduction (+: g[n:n*2], e[1], h[0]) \ + in_reduction (*: s[1:2], t[2:2][:]) + for (i = 0; i < 2; i++) + { + a[0].s += 7; + a[1].s += 17; + b[2].s *= 2; + b[4].s *= 2; + c[0].s += 6; + d[1].s *= 2; + e[1].s += 19; + f[2].s += 21; + f[3].s += 23; + g[1].s += 25; + g[2].s += 27; + h[0].s += 29; + k[1][0].s += 31; + k[2][1].s += 33; + m[1].s += 19; + r[2].s += 21; + r[3].s += 23; + o[1].s += 25; + o[2].s += 27; + p[0].s += 29; + q[1][0].s += 31; + q[2][1].s += 33; + s[1].s *= 2; + t[2][0].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + } +} + +template <typename T> +void +test (int n) +{ + T c[2] = { { 0, 7 }, { 0, 7 } }; + T p[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; + T q[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; + T ss[4] = { { 5, 5 }, { 1, 5 }, { 1, 5 }, { 6, 5 } }; + T tt[5][2] = { { { 9, 5 }, { 10, 5 } }, { { 11, 5 }, { 12, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 13, 5 }, { 14, 5 } } }; + s = ss; + t = tt; + #pragma omp parallel + #pragma omp single + { + T d[] = { { 1, 5 }, { 1, 5 } }; + T m[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; + T r[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; + T o[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; + int i; + #pragma omp taskloop reduction (+: a, c) reduction (default, *: b[2 * n:3 * n], d) \ + reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + reduction (+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + reduction (*: t[2:2][:], s[1:n + 1]) + for (i = 0; i < 4; i++) + { + int j; + a[0].s += 2; + a[1].s += 3; + b[2].s *= 2; + f[3].s += 8; + g[1].s += 9; + g[2].s += 10; + h[0].s += 11; + k[1][1].s += 13; + k[2][1].s += 15; + m[1].s += 16; + r[2].s += 8; + s[1].s *= 2; + t[2][1].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + #pragma omp taskloop in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) nogroup + for (j = 0; j < 2; j++) + { + m[1].s += 6; + r[2].s += 7; + q[1][0].s += 17; + q[2][0].s += 19; + a[0].s += 4; + a[1].s += 5; + b[3].s *= 2; + b[4].s *= 2; + f[3].s += 18; + g[1].s += 29; + g[2].s += 18; + h[0].s += 19; + s[2].s *= 2; + t[2][0].s *= 2; + t[3][0].s *= 2; + foo<0> (n, c, d, m, r, o, p, q); + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + r[3].s += 18; + o[1].s += 29; + o[2].s += 18; + p[0].s += 19; + c[0].s += 4; + c[1].s += 5; + d[0].s *= 2; + e[1].s += 6; + f[2].s += 7; + k[1][0].s += 17; + k[2][0].s += 19; + } + r[3].s += 8; + o[1].s += 9; + o[2].s += 10; + p[0].s += 11; + q[1][1].s += 13; + q[2][1].s += 15; + b[3].s *= 2; + c[0].s += 4; + c[1].s += 9; + d[0].s *= 2; + e[1].s += 16; + f[2].s += 8; + } + if (d[0].s != 1LL << (8 + 4) + || d[1].s != 1LL << 16 + || m[0].s != 5 + || m[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || m[2].s != 5 + || r[0].s != 6 + || r[1].s != 7 + || r[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || r[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || r[4].s != 9 + || o[0].s != 1 + || o[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || o[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || o[3].s != 2) + abort (); + if (e[1].t != 7 || h[0].t != 7 || m[1].t != 7 || p[0].t != 7) + abort (); + for (int z = 0; z < 2; z++) + if (a[z].t != 7 || c[z].t != 7 || d[z].t != 5 || f[z + 2].t != 7 + || g[z + 1].t != 7 || r[z + 2].t != 7 || s[z + 1].t != 5 || o[z + 1].t != 7 + || k[z + 1][0].t != 7 || k[z + 1][1].t != 7 || q[z + 1][0].t != 7 || q[z + 1][1].t != 7 + || t[z + 2][0].t != 5 || t[z + 2][1].t != 5) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5) + abort (); + } + if (a[0].s != 7 * 16 + 4 * 8 + 2 * 4 + || a[1].s != 17 * 16 + 5 * 8 + 3 * 4 + || b[0].s != 9 || b[1].s != 11 + || b[2].s != 1LL << (16 + 4) + || b[3].s != 1LL << (8 + 4) + || b[4].s != 1LL << (16 + 8) + || b[5].s != 13 || b[6].s != 15 + || c[0].s != 6 * 16 + 4 * 8 + 4 * 4 + || c[1].s != 5 * 8 + 9 * 4 + || e[0].s != 5 + || e[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || e[2].s != 5 + || f[0].s != 6 + || f[1].s != 7 + || f[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || f[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || f[4].s != 9 + || g[0].s != 1 + || g[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || g[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || g[3].s != 2 + || h[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || h[1].s != 1 || h[2].s != 4 + || k[0][0].s != 5 || k[0][1].s != 6 + || k[1][0].s != 31 * 16 + 17 * 8 + || k[1][1].s != 13 * 4 + || k[2][0].s != 19 * 8 + || k[2][1].s != 33 * 16 + 15 * 4 + || k[3][0].s != 7 || k[3][1].s != 8 + || p[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || p[1].s != 1 || p[2].s != 4 + || q[0][0].s != 5 || q[0][1].s != 6 + || q[1][0].s != 31 * 16 + 17 * 8 + || q[1][1].s != 13 * 4 + || q[2][0].s != 19 * 8 + || q[2][1].s != 33 * 16 + 15 * 4 + || q[3][0].s != 7 || q[3][1].s != 8 + || ss[0].s != 5 + || ss[1].s != 1LL << (16 + 4) + || ss[2].s != 1LL << 8 + || ss[3].s != 6 + || tt[0][0].s != 9 || tt[0][1].s != 10 || tt[1][0].s != 11 || tt[1][1].s != 12 + || tt[2][0].s != 1LL << (16 + 8) + || tt[2][1].s != 1LL << 4 + || tt[3][0].s != 1LL << 8 + || tt[3][1].s != 1LL << (16 + 4) + || tt[4][0].s != 13 || tt[4][1].s != 14) + abort (); +} + +int +main () +{ + int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3; + test<S> (1); + if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3) + abort (); +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/cancel-parallel-1.c b/libgomp/testsuite/libgomp.c-c++-common/cancel-parallel-1.c new file mode 100644 index 00000000000..77395e2b0f3 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/cancel-parallel-1.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ +/* { dg-set-target-env-var OMP_CANCELLATION "true" } */ + +#include <stdlib.h> +#include <omp.h> + +int +main () +{ + int a[64]; + #pragma omp parallel + { + #pragma omp barrier + if (omp_get_thread_num () == 0) + { + #pragma omp cancel parallel + } + #pragma omp for + for (int i = 0; i < 64; i++) + a[i] = i; + if (omp_get_cancellation ()) + abort (); + } + #pragma omp parallel + { + #pragma omp barrier + if (omp_get_thread_num () == 0) + { + #pragma omp cancel parallel + } + #pragma omp taskgroup + { + #pragma omp for + for (int i = 0; i < 64; i++) + #pragma omp task + a[i] += i; + if (omp_get_cancellation ()) + abort (); + } + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/cancel-taskgroup-3.c b/libgomp/testsuite/libgomp.c-c++-common/cancel-taskgroup-3.c new file mode 100644 index 00000000000..b9af83595b0 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/cancel-taskgroup-3.c @@ -0,0 +1,68 @@ +/* { dg-do run } */ +/* { dg-set-target-env-var OMP_CANCELLATION "true" } */ + +#include <stdlib.h> +#include <omp.h> + +int +main () +{ + int a = 0, i; + #pragma omp parallel + #pragma omp taskgroup + { + #pragma omp task + { + #pragma omp cancel taskgroup + if (omp_get_cancellation ()) + abort (); + } + #pragma omp taskwait + #pragma omp for reduction (task, +: a) + for (i = 0; i < 64; ++i) + { + a++; + #pragma omp task in_reduction (+: a) + { + volatile int zero = 0; + a += zero; + if (omp_get_cancellation ()) + abort (); + } + } + if (a != 64) + abort (); + #pragma omp task + { + if (omp_get_cancellation ()) + abort (); + } + } + a = 0; + #pragma omp parallel + #pragma omp taskgroup + { + #pragma omp taskwait + #pragma omp for reduction (task, +: a) + for (i = 0; i < 64; ++i) + { + a++; + #pragma omp task in_reduction (+: a) + { + volatile int zero = 0; + a += zero; + #pragma omp cancel taskgroup + if (omp_get_cancellation ()) + abort (); + } + } + if (a != 64) + abort (); + #pragma omp task + { + if (omp_get_cancellation ()) + abort (); + } + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/depend-iterator-1.c b/libgomp/testsuite/libgomp.c-c++-common/depend-iterator-1.c new file mode 100644 index 00000000000..03cded42cbd --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/depend-iterator-1.c @@ -0,0 +1,115 @@ +#ifdef __cplusplus +extern "C" +#endif +void abort (void); +int arr[64], arr2[64], arr3[64]; + +int * +foo (int x, int y, long z) +{ + int v; + switch (x) + { + case 1: + if (z != 0 || y < 0 || y >= 64) + abort (); + #pragma omp atomic capture + { + v = arr2[y]; + arr2[y]++; + } + if (v != 0) abort (); + return &arr[y]; + case 2: + if (y < 0 || y > 60 || (y & 3) || z < 0 || z >= 4) + abort (); + #pragma omp atomic + arr2[y + z] = arr2[y + z] + 4; + return &arr[y + z]; + case 3: + if (z < 0 || z > 60 || (z & 3) || y < 0 || y >= 4) + abort (); + #pragma omp atomic + arr2[y + z] = arr2[y + z] + 16; + return &arr[y + z]; + case 4: + if (y != 0 || z > 64 || z <= 0) + abort (); + #pragma omp atomic + arr2[z - 1] = arr2[z - 1] + 64; + return &arr[z - 1]; + case 5: + if ((y & 3) != 0 || y < 64 || y >= 96 + || (z & 127) != 0 || z < 512 || z >= 1024) + abort (); + y = (y - 64) + (z - 512) / 128; + #pragma omp atomic + arr2[y] = arr2[y] + 256; + return &arr[y]; + case 6: + if ((y & 3) != 0 || y <= 64 || y > 96 + || (z & 127) != 1 || z <= 513 || z > 1025) + abort (); + y = (y - 68) + (z - 641) / 128; + #pragma omp atomic + arr2[y] = arr2[y] + 1024; + return &arr[y]; + default: + abort (); + } +} + +volatile int beg, end, step, step2; +volatile unsigned int begu, endu; + +int +main () +{ + int m; + beg = 60; + end = -4; + step = -4; + step2 = 4; + begu = -64U; + endu = -32U; + #pragma omp parallel + #pragma omp master + { + int i; + for (i = 0; i < 64; i++) + #pragma omp task depend (iterator (j=i:i+1) , out : foo (1, j, 0)[0]) + arr[i] = i; + #pragma omp task depend (iterator (int k=beg:end:step,long int l=0:4:1) , inout : \ + foo (2, k, l)[0], foo (3, l, k)[0]) private (i) + for (i = 0; i < 64; i++) + if (arr[i] != i) + abort (); + else + arr[i] = arr[i] + 1; + #pragma omp task depend (iterator (int *p=&arr3[64]:&arr3[0]:-1) , inout : \ + foo (4, 0, p - &arr3[0])[0]) depend (in : beg) + for (i = 0; i < 64; i++) + if (arr[i] != i + 1) + abort (); + else + arr[i] = arr[i] + 2; + #pragma omp task depend (iterator (unsigned n=begu:endu:step2, unsigned int o = 512: 1024U: (unsigned char) 128), inout : \ + foo (5, n + 128, o)[0]) + for (i = 0; i < 64; i++) + if (arr[i] != i + 3) + abort (); + else + arr[i] = arr[i] + 4; + #pragma omp task depend (iterator (int unsigned p=endu:begu:step,unsigned q= 1025U:513U:(signed char) -128), in : \ + foo (6, p + 128, q)[0]) + for (i = 0; i < 64; i++) + if (arr[i] != i + 7) + abort (); + else + arr[i] = arr[i] + 8; + } + for (m = 0; m < 64; m++) + if (arr[m] != m + 15 || arr2[m] != (m < 32 ? 1365 : 85)) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/depend-iterator-2.c b/libgomp/testsuite/libgomp.c-c++-common/depend-iterator-2.c new file mode 100644 index 00000000000..d9cbfdcbe79 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/depend-iterator-2.c @@ -0,0 +1,63 @@ +volatile int v; + +__attribute__((noipa)) void +foo (int *p, int i) +{ + #pragma omp task depend (out: p[0]) + v++; + #pragma omp task depend (in: p[0]) + v++; + #pragma omp task depend (inout: p[0]) + v++; + #pragma omp task depend (mutexinoutset: p[0]) + v++; + #pragma omp task depend (out: p[0]) depend (in: p[1]) + v++; + #pragma omp task depend (in: p[0]) depend (inout: p[1]) + v++; + #pragma omp task depend (inout: p[0]) depend (mutexinoutset: p[1]) + v++; + #pragma omp task depend (mutexinoutset: p[0]) depend (out: p[1]) + v++; + #pragma omp task depend (iterator (j=0:2) , out : p[j]) + v++; + #pragma omp task depend (iterator (j=0:2) , in : p[j]) + v++; + #pragma omp task depend (iterator (j=0:2) , inout : p[j]) + v++; + #pragma omp task depend (iterator (j=0:2) , mutexinoutset : p[j]) + v++; + #pragma omp task depend (iterator (j=0:2) , out : p[j]) depend (iterator (j=0:2) , in : p[j + 2]) + v++; + #pragma omp task depend (iterator (j=0:2) , in : p[j]) depend (iterator (j=0:2) , inout : p[j + 2]) + v++; + #pragma omp task depend (iterator (j=0:2) , inout : p[j]) depend (iterator (j=0:2) , mutexinoutset : p[j + 2]) + v++; + #pragma omp task depend (iterator (j=0:2) , mutexinoutset : p[j]) depend (iterator (j=0:2) , out : p[j + 2]) + v++; + #pragma omp task depend (iterator (j=0:i) , out : p[j]) + v++; + #pragma omp task depend (iterator (j=0:i) , in : p[j]) + v++; + #pragma omp task depend (iterator (j=0:i) , inout : p[j]) + v++; + #pragma omp task depend (iterator (j=0:i) , mutexinoutset : p[j]) + v++; + #pragma omp task depend (iterator (j=0:i) , out : p[j]) depend (iterator (j=0:i) , in : p[j + 2]) + v++; + #pragma omp task depend (iterator (j=0:i) , in : p[j]) depend (iterator (j=0:i) , inout : p[j + 2]) + v++; + #pragma omp task depend (iterator (j=0:i) , inout : p[j]) depend (iterator (j=0:i) , mutexinoutset : p[j + 2]) + v++; + #pragma omp task depend (iterator (j=0:i) , mutexinoutset : p[j]) depend (iterator (j=0:i) , out : p[j + 2]) + v++; +} + +int +main () +{ + int p[4]; + foo (p, 2); + foo (p, -1); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/depend-mutexinout-1.c b/libgomp/testsuite/libgomp.c-c++-common/depend-mutexinout-1.c new file mode 100644 index 00000000000..86322eddbcc --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/depend-mutexinout-1.c @@ -0,0 +1,28 @@ +int +main () +{ + int a, b, c, d; + #pragma omp parallel num_threads (6) + #pragma omp single + { + #pragma omp task depend(out: c) + c = 1; + #pragma omp task depend(out: a) + a = 2; + #pragma omp task depend(out: b) + b = 3; + /* The above 3 tasks can be scheduled in any order. */ + #pragma omp task depend(in: a) depend(mutexinoutset: c) + c += a; + #pragma omp task depend(in: b) depend(mutexinoutset: c) + c += b; + /* The above 2 tasks are mutually exclusive and need to wait + for the first and second or first and third tasks respectively. */ + #pragma omp task depend(in: c) + d = c; + /* The above task needs to wait for the mutexinoutset tasks. */ + } + if (d != 6) + __builtin_abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/depend-mutexinout-2.c b/libgomp/testsuite/libgomp.c-c++-common/depend-mutexinout-2.c new file mode 100644 index 00000000000..ed92063e2c9 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/depend-mutexinout-2.c @@ -0,0 +1,59 @@ +int +main () +{ + int a, b, c = 0; + #pragma omp parallel num_threads(2) + { + #pragma omp barrier + #pragma omp single + { + #pragma omp task depend(out: a) + { + int i; + a = 0; + for (i = 0; i < 524288; ++i) + { + asm volatile ("" : "+g" (a)); + a++; + } + } + #pragma omp task depend(out: b) + { + int i; + b = 0; + for (i = 0; i < 64; ++i) + { + asm volatile ("" : "+g" (b)); + b++; + } + } + #pragma omp task depend(in: a) depend(mutexinoutset: c) + { + int i; + int d = c; + for (i = 0; i < 524288 + 64 - a; ++i) + { + asm volatile ("" : "+g" (d) : "g" (&a) : "memory"); + d++; + } + asm volatile ("" : "+g" (d), "+g" (c)); + c = d; + } + #pragma omp task depend(in: b) depend(mutexinoutset: c) + { + int i; + int d = c; + for (i = 0; i < 524288 + 64 - b; ++i) + { + asm volatile ("" : "+g" (d) : "g" (&b) : "memory"); + d++; + } + asm volatile ("" : "+g" (d), "+g" (c)); + c = d; + } + } + } + if (c != 524288 + 64) + __builtin_abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/depobj-1.c b/libgomp/testsuite/libgomp.c-c++-common/depobj-1.c new file mode 100644 index 00000000000..a07a0c889c1 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/depobj-1.c @@ -0,0 +1,99 @@ +#include <stdlib.h> +#include <omp.h> + +void +dep (void) +{ + int x = 1; + omp_depend_t d1, d2; + #pragma omp depobj (d1) depend(in: x) + #pragma omp depobj (d2) depend(in: x) + #pragma omp depobj (d2) update(out) + #pragma omp parallel + #pragma omp single + { + #pragma omp task shared (x) depend(depobj: d2) + x = 2; + #pragma omp task shared (x) depend(depobj: d1) + if (x != 2) + abort (); + } + #pragma omp depobj (d2) destroy + #pragma omp depobj (d1) destroy +} + +void +dep2 (void) +{ + #pragma omp parallel + #pragma omp single + { + int x = 1; + omp_depend_t d1, d2; + #pragma omp depobj (d1) depend(out: x) + #pragma omp depobj (*&d2) depend (in:x) + #pragma omp depobj(d2)update(in) + #pragma omp task shared (x) depend(depobj:d1) + x = 2; + #pragma omp task shared (x) depend(depobj : d2) + if (x != 2) + abort (); + #pragma omp taskwait + #pragma omp depobj(d1)destroy + #pragma omp depobj((&d2)[0]) destroy + } +} + +void +dep3 (void) +{ + omp_depend_t d[2]; + #pragma omp parallel + { + int x = 1; + #pragma omp single + { + #pragma omp depobj(d[0]) depend(out:x) + #pragma omp depobj(d[1]) depend(in: x) + #pragma omp task shared (x) depend(depobj: *d) + x = 2; + #pragma omp task shared (x) depend(depobj: *(d + 1)) + if (x != 2) + abort (); + } + } + #pragma omp depobj(d[0]) destroy + #pragma omp depobj(d[1]) destroy +} + +int xx; +omp_depend_t dd1, dd2; + +void +antidep (void) +{ + xx = 1; + #pragma omp parallel + #pragma omp single + { + #pragma omp task shared(xx) depend(depobj:dd2) + if (xx != 1) + abort (); + #pragma omp task shared(xx) depend(depobj:dd1) + xx = 2; + } +} + +int +main () +{ + dep (); + dep2 (); + dep3 (); + #pragma omp depobj (dd1) depend (inout: xx) + #pragma omp depobj (dd2) depend (in : xx) + antidep (); + #pragma omp depobj (dd2) destroy + #pragma omp depobj (dd1) destroy + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/display-affinity-1.c b/libgomp/testsuite/libgomp.c-c++-common/display-affinity-1.c new file mode 100644 index 00000000000..a67bb37f1f0 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/display-affinity-1.c @@ -0,0 +1,91 @@ +/* { dg-set-target-env-var OMP_PROC_BIND "spread,close" } */ +/* { dg-set-target-env-var OMP_PLACES "cores" } */ +/* { dg-set-target-env-var OMP_NUM_THREADS "4" } */ +/* { dg-set-target-env-var OMP_AFFINITY_FORMAT "hello" } */ + +#include <omp.h> +#include <string.h> +#include <stdlib.h> + +int +main () +{ +#define FMT "L:%0.5L%%%n>%32H<!%.33{host}!%.6P_%i_%0.18i_%0.7{ancestor_tnum} %18A" + char buf[] = FMT, hostname[256], buf2[512 + 32], *q; + size_t l, l2, l3; + char *r = getenv ("OMP_AFFINITY_FORMAT"); + if (r && strcmp (r, "hello") == 0) + { + if (omp_get_affinity_format (NULL, 0) != 5) + abort (); + if (omp_get_affinity_format (buf2, 3) != 5 + || strcmp (buf2, "he") != 0) + abort (); + if (omp_get_affinity_format (buf2, 6) != 5 + || strcmp (buf2, "hello") != 0) + abort (); + } + omp_set_affinity_format (buf); + memset (buf, '^', sizeof (buf)); + if (omp_get_affinity_format (NULL, 0) != sizeof (buf) - 1) + abort (); + if (omp_get_affinity_format (buf, 3) != sizeof (buf) - 1 + || buf[0] != FMT[0] || buf[1] != FMT[1] || buf[2] != '\0') + abort (); + memset (buf, ' ', sizeof (buf)); + if (omp_get_affinity_format (buf, sizeof (buf) - 1) != sizeof (buf) - 1 + || strncmp (buf, FMT, sizeof (buf) - 2) != 0 + || buf[sizeof (buf) - 2] != '\0') + abort (); + memset (buf, '-', sizeof (buf)); + if (omp_get_affinity_format (buf, sizeof (buf)) != sizeof (buf) - 1 + || strcmp (buf, FMT) != 0) + abort (); + memset (buf, '0', sizeof (buf)); + omp_display_affinity (NULL); + omp_display_affinity (""); + omp_display_affinity ("%%%0.9N"); + omp_set_affinity_format ("%{host}"); + l = omp_capture_affinity (hostname, sizeof hostname, NULL); + if (l < sizeof (hostname)) + { + if (strlen (hostname) != l) + abort (); + l2 = omp_capture_affinity (NULL, 0, + "%0.5{nesting_level}%%%32{host}|||%.33H" + "%0.7a%3N!%N!"); + if (l2 != (5 + 1 + (l > 32 ? l : 32) + 3 + (l > 33 ? l : 33) + + 7 + 3 + 1 + 1 + 1)) + abort (); + omp_set_affinity_format ("%.5L%%%32H|||%.33{host}%0.7{ancestor_tnum}" + "%3{num_threads}!%{num_threads}!"); + l3 = omp_capture_affinity (buf2, sizeof buf2, ""); + if (l3 != l2) + abort (); + if (memcmp (buf2, " 0%", 5 + 1) != 0) + abort (); + q = buf2 + 6; + if (memcmp (q, hostname, l) != 0) + abort (); + q += l; + if (l < 32) + for (l3 = 32 - l; l3; l3--) + if (*q++ != ' ') + abort (); + if (memcmp (q, "|||", 3) != 0) + abort (); + q += 3; + if (l < 33) + for (l3 = 33 - l; l3; l3--) + if (*q++ != ' ') + abort (); + if (memcmp (q, hostname, l) != 0) + abort (); + q += l; + if (strcmp (q, "-0000011 !1!") != 0) + abort (); + } + #pragma omp parallel num_threads (4) proc_bind(spread) + omp_display_affinity ("%0.2a!%n!%.4L!%N;%.2t;%0.2T;%{team_num};%{num_teams};%A"); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/for-10.c b/libgomp/testsuite/libgomp.c-c++-common/for-10.c new file mode 100644 index 00000000000..2b339a599ce --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/for-10.c @@ -0,0 +1,4 @@ +/* { dg-additional-options "-std=gnu99" {target c } } */ + +#define CONDNE +#include "for-4.c" diff --git a/libgomp/testsuite/libgomp.c-c++-common/for-11.c b/libgomp/testsuite/libgomp.c-c++-common/for-11.c new file mode 100644 index 00000000000..e46b4dd04c1 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/for-11.c @@ -0,0 +1,4 @@ +/* { dg-additional-options "-std=gnu99" {target c } } */ + +#define CONDNE +#include "for-5.c" diff --git a/libgomp/testsuite/libgomp.c-c++-common/for-12.c b/libgomp/testsuite/libgomp.c-c++-common/for-12.c new file mode 100644 index 00000000000..e28ba93f8fe --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/for-12.c @@ -0,0 +1,4 @@ +/* { dg-additional-options "-std=gnu99" {target c } } */ + +#define CONDNE +#include "for-6.c" diff --git a/libgomp/testsuite/libgomp.c-c++-common/for-13.c b/libgomp/testsuite/libgomp.c-c++-common/for-13.c new file mode 100644 index 00000000000..a4767278fbb --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/for-13.c @@ -0,0 +1,99 @@ +unsigned short a[256]; + +__attribute__((noinline, noclone)) void +bar (void *x, unsigned short z) +{ + unsigned short *y = (unsigned short *) x; + if (y < &a[5] || y > &a[222] || y == &a[124]) + __builtin_abort (); + *y += z; +} + +__attribute__((noinline, noclone)) void +foo (void *qx, void *rx, void *sx, int n) +{ + unsigned short (*q)[n], (*r)[n], (*s)[n], (*p)[n]; + q = (typeof (q)) qx; + r = (typeof (r)) rx; + s = (typeof (s)) sx; + #pragma omp for + for (p = q; p != r; p++) + bar (p, 1); + #pragma omp for + for (p = s; p != r; p--) + bar (p, 2); + #pragma omp for + for (p = q; p != r; p = p + 1) + bar (p, 4); + #pragma omp for + for (p = s; p != r; p = p - 1) + bar (p, 8); + #pragma omp for + for (p = q; p != r; p = 1 + p) + bar (p, 16); + #pragma omp for + for (p = s; p != r; p = -1 + p) + bar (p, 32); + #pragma omp for + for (p = q; p != r; p += 1) + bar (p, 64); + #pragma omp for + for (p = s; p != r; p -= 1) + bar (p, 128); +} + +__attribute__((noinline, noclone)) void +baz (void *qx, void *rx, void *sx, int n) +{ + unsigned short (*q)[n], (*r)[n], (*s)[n], (*p)[n]; + q = (typeof (q)) qx; + r = (typeof (r)) rx; + s = (typeof (s)) sx; + #pragma omp for + for (p = q; p < r; p++) + bar (p, 256); + #pragma omp for + for (p = s; p > r; p--) + bar (p, 512); + #pragma omp for + for (p = q; p < r; p = p + 1) + bar (p, 1024); + #pragma omp for + for (p = s; p > r; p = p - 1) + bar (p, 2048); + #pragma omp for + for (p = q; p < r; p = 1 + p) + bar (p, 4096); + #pragma omp for + for (p = s; p > r; p = -1 + p) + bar (p, 8192); + #pragma omp for + for (p = q; p < r; p += 1) + bar (p, 16384); + #pragma omp for + for (p = s; p > r; p -= 1) + bar (p, 32768U); +} + +int +main () +{ + int i; + volatile int j = 7; +#pragma omp parallel + { + foo (&a[5 + (j - 7)], &a[124 + (j - 7)], &a[222 + (j - 7)], j); + baz (&a[5 + (j - 7)], &a[124 + (j - 7)], &a[222 + (j - 7)], j); + } + for (i = 0; i < 256; i++) + if (i < 5 || i > 222 || i == 124 || ((i - 5) % 7) != 0) + { + if (a[i]) + __builtin_abort (); + } + else if (i < 124 && a[i] != 1 + 4 + 16 + 64 + 256 + 1024 + 4096 + 16384) + __builtin_abort (); + else if (i > 124 && a[i] != 2 + 8 + 32 + 128 + 512 + 2048 + 8192 + 32768U) + __builtin_abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/for-14.c b/libgomp/testsuite/libgomp.c-c++-common/for-14.c new file mode 100644 index 00000000000..56440ab740f --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/for-14.c @@ -0,0 +1,110 @@ +/* { dg-additional-options "-std=gnu99" { target c } } */ + +extern +#ifdef __cplusplus +"C" +#endif +void abort (); + +#define M(x, y, z) O(x, y, z) +#define O(x, y, z) x ## _ ## y ## _ ## z + +#define F distribute +#define G d +#define S +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F distribute +#define G d_ds128 +#define S dist_schedule(static, 128) +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F distribute simd +#define G ds +#define S +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F distribute simd +#define G ds_ds128 +#define S dist_schedule(static, 128) +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F distribute parallel for +#define G dpf +#include "for-1.h" +#undef F +#undef G + +#define F distribute parallel for dist_schedule(static, 128) +#define G dpf_ds128 +#include "for-1.h" +#undef F +#undef G + +#define F distribute parallel for simd +#define G dpfs +#include "for-1.h" +#undef F +#undef G + +#define F distribute parallel for simd dist_schedule(static, 128) +#define G dpfs_ds128 +#include "for-1.h" +#undef F +#undef G + +int +main () +{ + int err = 0; + #pragma omp teams reduction(|:err) + { + err |= test_d_normal (); + err |= test_d_ds128_normal (); + err |= test_ds_normal (); + err |= test_ds_ds128_normal (); + err |= test_dpf_static (); + err |= test_dpf_static32 (); + err |= test_dpf_auto (); + err |= test_dpf_guided32 (); + err |= test_dpf_runtime (); + err |= test_dpf_ds128_static (); + err |= test_dpf_ds128_static32 (); + err |= test_dpf_ds128_auto (); + err |= test_dpf_ds128_guided32 (); + err |= test_dpf_ds128_runtime (); + err |= test_dpfs_static (); + err |= test_dpfs_static32 (); + err |= test_dpfs_auto (); + err |= test_dpfs_guided32 (); + err |= test_dpfs_runtime (); + err |= test_dpfs_ds128_static (); + err |= test_dpfs_ds128_static32 (); + err |= test_dpfs_ds128_auto (); + err |= test_dpfs_ds128_guided32 (); + err |= test_dpfs_ds128_runtime (); + } + if (err) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/for-15.c b/libgomp/testsuite/libgomp.c-c++-common/for-15.c new file mode 100644 index 00000000000..512b9725603 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/for-15.c @@ -0,0 +1,115 @@ +/* { dg-additional-options "-std=gnu99" { target c } } */ + +extern +#ifdef __cplusplus +"C" +#endif +void abort (); + +#define M(x, y, z) O(x, y, z) +#define O(x, y, z) x ## _ ## y ## _ ## z + +#define F for +#define G f +#define S +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F teams distribute +#define G td +#define S +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F teams distribute +#define G td_ds128 +#define S dist_schedule(static, 128) +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F teams distribute simd +#define G tds +#define S +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F teams distribute simd +#define G tds_ds128 +#define S dist_schedule(static, 128) +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F teams distribute parallel for +#define G tdpf +#include "for-1.h" +#undef F +#undef G + +#define F teams distribute parallel for dist_schedule(static, 128) +#define G tdpf_ds128 +#include "for-1.h" +#undef F +#undef G + +#define F teams distribute parallel for simd +#define G tdpfs +#include "for-1.h" +#undef F +#undef G + +#define F teams distribute parallel for simd dist_schedule(static, 128) +#define G tdpfs_ds128 +#include "for-1.h" +#undef F +#undef G + +int +main () +{ + if (test_td_normal () + || test_td_ds128_normal () + || test_tds_normal () + || test_tds_ds128_normal () + || test_tdpf_static () + || test_tdpf_static32 () + || test_tdpf_auto () + || test_tdpf_guided32 () + || test_tdpf_runtime () + || test_tdpf_ds128_static () + || test_tdpf_ds128_static32 () + || test_tdpf_ds128_auto () + || test_tdpf_ds128_guided32 () + || test_tdpf_ds128_runtime () + || test_tdpfs_static () + || test_tdpfs_static32 () + || test_tdpfs_auto () + || test_tdpfs_guided32 () + || test_tdpfs_runtime () + || test_tdpfs_ds128_static () + || test_tdpfs_ds128_static32 () + || test_tdpfs_ds128_auto () + || test_tdpfs_ds128_guided32 () + || test_tdpfs_ds128_runtime ()) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/for-2.h b/libgomp/testsuite/libgomp.c-c++-common/for-2.h index 0bd116c5aec..91a604a6c25 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/for-2.h +++ b/libgomp/testsuite/libgomp.c-c++-common/for-2.h @@ -21,6 +21,7 @@ noreturn (void) #define OMPFROM(v) do {} while (0) #endif +#ifndef CONDNE __attribute__((noinline, noclone)) void N(f0) (void) { @@ -311,3 +312,292 @@ N(test) (void) return 1; return 0; } + +#else + +__attribute__((noinline, noclone)) void +N(f20) (void) +{ + int i; + OMPTGT +#pragma omp F S + for (i = 0; i != 1500; i++) + a[i] += 2; +} + +__attribute__((noinline, noclone)) void +N(f21) (void) +{ + OMPTGT +#pragma omp F S + for (unsigned int i = __INT_MAX__; i < 1500U + __INT_MAX__; i += 2 - 1) + a[(i - __INT_MAX__)] -= 2; +} + +__attribute__((noinline, noclone)) void +N(f22) (void) +{ + unsigned long long i; + OMPTGT +#pragma omp F S + for (i = __LONG_LONG_MAX__ + 1500ULL - 27; + i != __LONG_LONG_MAX__ - 27ULL; i -= 3 - 2) + a[i + 26LL - __LONG_LONG_MAX__] -= 4; +} + +__attribute__((noinline, noclone)) void +N(f23) (long long n1, long long n2) +{ + OMPTGT +#pragma omp F S + for (long long i = n1 + 23; i != n2 - 25; --i) + a[i + 48] += 7; +} + +__attribute__((noinline, noclone)) void +N(f24) (void) +{ + unsigned int i; + OMPTGT +#pragma omp F S + for (i = 30; i != 30; i += 1) + a[i] += 10; +} + +__attribute__((noinline, noclone)) void +N(f25) (int n11, int n12, int n21, int n22, int n31, int n32, + int s2) +{ + SC int v1, v2, v3; + OMPTGT +#pragma omp F S collapse(3) + for (v1 = n11; v1 != n12; v1 += 17 - 19 + 3) + for (v2 = n21; v2 < n22; v2 += s2) + for (v3 = n31; v3 != n32; ++v3) + b[v1][v2][v3] += 2.5; +} + +__attribute__((noinline, noclone)) void +N(f26) (int n11, int n12, int n21, int n22, long long n31, long long n32, + int s2) +{ + SC int v1, v2; + SC long long v3; + OMPTGT +#pragma omp F S collapse(3) + for (v1 = n11; v1 != n12; v1 += -1) + for (v2 = n21; v2 > n22; v2 += s2) + for (v3 = n31; v3 != n32; v3 --) + b[v1][v2 / 2][v3] -= 4.5; +} + +__attribute__((noinline, noclone)) void +N(f27) (void) +{ + SC unsigned int v1, v3; + SC unsigned long long v2; + OMPTGT +#pragma omp F S collapse(3) + for (v1 = 0; v1 < 20; v1 += 2) + for (v2 = __LONG_LONG_MAX__ + 11ULL; + v2 != __LONG_LONG_MAX__ - 4ULL; -- v2) + for (v3 = 10; v3 != 0; v3--) + b[v1 >> 1][v2 - __LONG_LONG_MAX__ + 3][v3 - 1] += 5.5; +} + +__attribute__((noinline, noclone)) void +N(f28) (void) +{ + SC long long v1, v2, v3; + OMPTGT +#pragma omp F S collapse(3) + for (v1 = 0; v1 != 20; v1 -= 17 - 18) + for (v2 = 30; v2 < 20; v2++) + for (v3 = 10; v3 < 0; v3--) + b[v1][v2][v3] += 5.5; +} + +__attribute__((noinline, noclone)) void +N(f29) (void) +{ + int i; + OMPTGT +#pragma omp F S + for (i = 20; i != 20; i++) + { + a[i] += 2; + noreturn (); + a[i] -= 4; + } +} + +__attribute__((noinline, noclone)) void +N(f30) (void) +{ + SC int i; + OMPTGT +#pragma omp F S collapse(3) + for (i = 0; i != 10; i++) + for (int j = 10; j < 8; j++) + for (long k = -10; k != 10; k++) + { + b[i][j][k] += 4; + noreturn (); + b[i][j][k] -= 8; + } +} + +__attribute__((noinline, noclone)) void +N(f31) (int n) +{ + int i; + OMPTGT +#pragma omp F S + for (i = 20; i != n; i++) + { + a[i] += 8; + noreturn (); + a[i] -= 16; + } +} + +__attribute__((noinline, noclone)) void +N(f32) (int n) +{ + SC int i; + OMPTGT +#pragma omp F S collapse(3) + for (i = 0; i != 10; i++) + for (int j = n; j != 12; j++) + for (long k = -10; k != 10; k++) + { + b[i][j][k] += 16; + noreturn (); + b[i][j][k] -= 32; + } +} + +__attribute__((noinline, noclone)) void +N(f33) (void) +{ + int *i; + OMPTGT +#pragma omp F S + for (i = a; i != &a[1500]; i++) + i[0] += 2; +} + +__attribute__((noinline, noclone)) void +N(f34) (void) +{ + SC float *i; + OMPTGT +#pragma omp F S collapse(3) + for (i = &b[0][0][0]; i != &b[0][0][10]; i++) + for (float *j = &b[0][15][0]; j > &b[0][0][0]; j -= 10) + for (float *k = &b[0][0][10]; k != &b[0][0][0]; --k) + b[i - &b[0][0][0]][(j - &b[0][0][0]) / 10 - 1][(k - &b[0][0][0]) - 1] + -= 3.5; +} + +__attribute__((noinline, noclone)) int +N(test) (void) +{ + int i, j, k; + for (i = 0; i < 1500; i++) + a[i] = i - 25; + OMPTO (a); + N(f20) (); + OMPFROM (a); + for (i = 0; i < 1500; i++) + if (a[i] != i - 23) + return 1; + N(f21) (); + OMPFROM (a); + for (i = 0; i < 1500; i++) + if (a[i] != i - 25) + return 1; + N(f22) (); + OMPFROM (a); + for (i = 0; i < 1500; i++) + if (a[i] != i - 29) + return 1; + N(f23) (1500LL - 1 - 23 - 48, -1LL + 25 - 48); + OMPFROM (a); + for (i = 0; i < 1500; i++) + if (a[i] != i - 22) + return 1; + N(f24) (); + OMPFROM (a); + for (i = 0; i < 1500; i++) + if (a[i] != i - 22) + return 1; + for (i = 0; i < 10; i++) + for (j = 0; j < 15; j++) + for (k = 0; k < 10; k++) + b[i][j][k] = i - 2.5 + 1.5 * j - 1.5 * k; + OMPTO (b); + N(f25) (0, 10, 0, 15, 0, 10, 1); + OMPFROM (b); + for (i = 0; i < 10; i++) + for (j = 0; j < 15; j++) + for (k = 0; k < 10; k++) + if (b[i][j][k] != i + 1.5 * j - 1.5 * k) + return 1; + N(f25) (0, 10, 30, 15, 0, 10, 5); + OMPFROM (b); + for (i = 0; i < 10; i++) + for (j = 0; j < 15; j++) + for (k = 0; k < 10; k++) + if (b[i][j][k] != i + 1.5 * j - 1.5 * k) + return 1; + N(f26) (9, -1, 29, 0, 9, -1, -2); + OMPFROM (b); + for (i = 0; i < 10; i++) + for (j = 0; j < 15; j++) + for (k = 0; k < 10; k++) + if (b[i][j][k] != i - 4.5 + 1.5 * j - 1.5 * k) + return 1; + N(f27) (); + OMPFROM (b); + for (i = 0; i < 10; i++) + for (j = 0; j < 15; j++) + for (k = 0; k < 10; k++) + if (b[i][j][k] != i + 1.0 + 1.5 * j - 1.5 * k) + return 1; + N(f28) (); + OMPFROM (b); + for (i = 0; i < 10; i++) + for (j = 0; j < 15; j++) + for (k = 0; k < 10; k++) + if (b[i][j][k] != i + 1.0 + 1.5 * j - 1.5 * k) + return 1; + N(f29) (); + N(f30) (); + N(f31) (20); + N(f32) (12); + OMPFROM (a); + OMPFROM (b); + for (i = 0; i < 1500; i++) + if (a[i] != i - 22) + return 1; + for (i = 0; i < 10; i++) + for (j = 0; j < 15; j++) + for (k = 0; k < 10; k++) + if (b[i][j][k] != i + 1.0 + 1.5 * j - 1.5 * k) + return 1; + N(f33) (); + N(f34) (); + OMPFROM (a); + OMPFROM (b); + for (i = 0; i < 1500; i++) + if (a[i] != i - 20) + return 1; + for (i = 0; i < 10; i++) + for (j = 0; j < 15; j++) + for (k = 0; k < 10; k++) + if (b[i][j][k] != i - 2.5 + 1.5 * j - 1.5 * k) + return 1; + return 0; +} +#endif diff --git a/libgomp/testsuite/libgomp.c-c++-common/for-7.c b/libgomp/testsuite/libgomp.c-c++-common/for-7.c new file mode 100644 index 00000000000..43461bb5534 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/for-7.c @@ -0,0 +1,4 @@ +/* { dg-additional-options "-std=gnu99" {target c } } */ + +#define CONDNE +#include "for-1.c" diff --git a/libgomp/testsuite/libgomp.c-c++-common/for-8.c b/libgomp/testsuite/libgomp.c-c++-common/for-8.c new file mode 100644 index 00000000000..ed6bf7111b5 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/for-8.c @@ -0,0 +1,4 @@ +/* { dg-additional-options "-std=gnu99" {target c } } */ + +#define CONDNE +#include "for-2.c" diff --git a/libgomp/testsuite/libgomp.c-c++-common/for-9.c b/libgomp/testsuite/libgomp.c-c++-common/for-9.c new file mode 100644 index 00000000000..dfd674fee56 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/for-9.c @@ -0,0 +1,4 @@ +/* { dg-additional-options "-std=gnu99" {target c } } */ + +#define CONDNE +#include "for-3.c" diff --git a/libgomp/testsuite/libgomp.c-c++-common/master-combined-1.c b/libgomp/testsuite/libgomp.c-c++-common/master-combined-1.c new file mode 100644 index 00000000000..3e6da095122 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/master-combined-1.c @@ -0,0 +1,66 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ +/* { dg-additional-options "-std=c99" { target c } } */ +/* { dg-additional-options "-msse2" { target sse2_runtime } } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#include <omp.h> +#include <stdlib.h> + +#define N 64 + +int +main () +{ + int p, *q, i, l; + int a[N]; + q = a; + #pragma omp parallel master num_threads(4) private (p) shared(a) + { + int i; + p = omp_get_thread_num (); + if (p != 0) + abort (); + #pragma omp taskloop nogroup + for (i = 0; i < N; ++i) + { + if (omp_get_thread_num () >= 4) + abort (); + a[i] = i; + } + } + #pragma omp parallel num_threads(4) + { + #pragma omp master taskloop lastprivate (i, l) firstprivate (q) + for (i = 0; i != N; i = i + 1) + l = q[i]; + } + if (i != N || l != N - 1) + abort (); + #pragma omp parallel master taskloop num_threads(4) \ + lastprivate (i, l) firstprivate (q) + for (i = 0; i < N - 5; i += 2) + if (q[i] != i) + abort (); + else + l = q[i]; + if (i != N - 4 || l != N - 6) + abort (); + #pragma omp parallel master taskloop simd num_threads(4) + for (i = 0; i < N; i++) + a[i] = 2 * a[i]; + if (i != N) + abort (); + #pragma omp parallel num_threads(4) + { + int j; + #pragma omp master taskloop simd collapse(2) + for (i = 0; i < 2; i += 2) + for (j = 0; j < N; j++) + a[j] = a[j] + 1; + } + for (i = 0; i < N; i++) + if (a[i] != 2 * i + 1) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/pause-1.c b/libgomp/testsuite/libgomp.c-c++-common/pause-1.c new file mode 100644 index 00000000000..5367a72832e --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/pause-1.c @@ -0,0 +1,23 @@ +#include <omp.h> +#include <stdlib.h> + +int a[64]; + +int +main () +{ + int i; + #pragma omp parallel for + for (i = 0; i < 64; i++) + a[i] = i; + omp_pause_resource (omp_pause_soft, omp_get_initial_device ()); + #pragma omp parallel for + for (i = 0; i < 64; i++) + a[i] += i; + omp_pause_resource_all (omp_pause_hard); + #pragma omp parallel for + for (i = 0; i < 64; i++) + if (a[i] != 2 * i) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/pause-2.c b/libgomp/testsuite/libgomp.c-c++-common/pause-2.c new file mode 100644 index 00000000000..e4781cdc707 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/pause-2.c @@ -0,0 +1,41 @@ +/* { dg-do run } */ +/* { dg-require-effective-target tls_runtime } */ + +#include <omp.h> +#include <stdlib.h> + +int t = 128; +#pragma omp threadprivate (t) + +int +main () +{ + #pragma omp parallel + t = omp_get_thread_num () + 256; + #pragma omp parallel + if (t != omp_get_thread_num () + 256) + abort (); + omp_pause_resource (omp_pause_soft, omp_get_initial_device ()); + /* This goes beyond what is required by the standard, we actually + check if the threads other than the initial one have been destroyed. */ + #pragma omp parallel + { + if (omp_get_thread_num () != 0 && t != 128) + abort (); + t = omp_get_thread_num () + 384; + } + #pragma omp parallel + if (t != omp_get_thread_num () + 384) + abort (); + omp_pause_resource_all (omp_pause_hard); + #pragma omp parallel + { + if (omp_get_thread_num () != 0 && t != 128) + abort (); + t = omp_get_thread_num () + 512; + } + #pragma omp parallel + if (t != omp_get_thread_num () + 512) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/pr66199-10.c b/libgomp/testsuite/libgomp.c-c++-common/pr66199-10.c new file mode 100644 index 00000000000..301fa6c2551 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/pr66199-10.c @@ -0,0 +1,60 @@ +/* PR middle-end/66199 */ +/* { dg-do run } */ + +int u[1024], v[1024], w[1024]; + +__attribute__((noinline, noclone)) long +f1 (long a, long b) +{ + long d; + #pragma omp teams distribute parallel for simd default(none) firstprivate (a, b) shared(u, v, w) + for (d = a; d < b; d++) + u[d] = v[d] + w[d]; + return d; +} + +__attribute__((noinline, noclone)) long +f2 (long a, long b, long c) +{ + long d, e; + #pragma omp teams distribute parallel for simd default(none) firstprivate (a, b, c) shared(u, v, w) linear(d) lastprivate(e) + for (d = a; d < b; d++) + { + u[d] = v[d] + w[d]; + e = c + d * 5; + } + return d + e; +} + +__attribute__((noinline, noclone)) long +f3 (long a1, long b1, long a2, long b2) +{ + long d1, d2; + #pragma omp teams distribute parallel for simd default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) lastprivate(d1, d2) collapse(2) + for (d1 = a1; d1 < b1; d1++) + for (d2 = a2; d2 < b2; d2++) + u[d1 * 32 + d2] = v[d1 * 32 + d2] + w[d1 * 32 + d2]; + return d1 + d2; +} + +__attribute__((noinline, noclone)) long +f4 (long a1, long b1, long a2, long b2) +{ + long d1, d2; + #pragma omp teams distribute parallel for simd default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) collapse(2) + for (d1 = a1; d1 < b1; d1++) + for (d2 = a2; d2 < b2; d2++) + u[d1 * 32 + d2] = v[d1 * 32 + d2] + w[d1 * 32 + d2]; + return d1 + d2; +} + +int +main () +{ + if (f1 (0, 1024) != 1024 + || f2 (0, 1024, 17) != 1024 + (17 + 5 * 1023) + || f3 (0, 32, 0, 32) != 64 + || f4 (0, 32, 0, 32) != 64) + __builtin_abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/pr66199-11.c b/libgomp/testsuite/libgomp.c-c++-common/pr66199-11.c new file mode 100644 index 00000000000..bcb596eef5c --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/pr66199-11.c @@ -0,0 +1,38 @@ +/* PR middle-end/66199 */ +/* { dg-do run } */ +/* { dg-options "-O2" { target c } } */ + +int u[1024], v[1024], w[1024]; + +__attribute__((noinline, noclone)) long +f2 (long a, long b, long c) +{ + long d, e; + #pragma omp teams distribute parallel for default(none) firstprivate (a, b, c) shared(u, v, w) lastprivate(d, e) + for (d = a; d < b; d++) + { + u[d] = v[d] + w[d]; + e = c + d * 5; + } + return d + e; +} + +__attribute__((noinline, noclone)) long +f3 (long a1, long b1, long a2, long b2) +{ + long d1, d2; + #pragma omp teams distribute parallel for default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) lastprivate(d1, d2) collapse(2) + for (d1 = a1; d1 < b1; d1++) + for (d2 = a2; d2 < b2; d2++) + u[d1 * 32 + d2] = v[d1 * 32 + d2] + w[d1 * 32 + d2]; + return d1 + d2; +} + +int +main () +{ + if (f2 (0, 1024, 17) != 1024 + (17 + 5 * 1023) + || f3 (0, 32, 0, 32) != 64) + __builtin_abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/pr66199-12.c b/libgomp/testsuite/libgomp.c-c++-common/pr66199-12.c new file mode 100644 index 00000000000..78eb12ac7aa --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/pr66199-12.c @@ -0,0 +1,60 @@ +/* PR middle-end/66199 */ +/* { dg-do run } */ + +int u[1024], v[1024], w[1024]; + +__attribute__((noinline, noclone)) long +f1 (long a, long b) +{ + long d; + #pragma omp teams distribute simd default(none) firstprivate (a, b) shared(u, v, w) + for (d = a; d < b; d++) + u[d] = v[d] + w[d]; + return d; +} + +__attribute__((noinline, noclone)) long +f2 (long a, long b, long c) +{ + long d, e; + #pragma omp teams distribute simd default(none) firstprivate (a, b, c) shared(u, v, w) linear(d) lastprivate(e) + for (d = a; d < b; d++) + { + u[d] = v[d] + w[d]; + e = c + d * 5; + } + return d + e; +} + +__attribute__((noinline, noclone)) long +f3 (long a1, long b1, long a2, long b2) +{ + long d1, d2; + #pragma omp teams distribute simd default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) lastprivate(d1, d2) collapse(2) + for (d1 = a1; d1 < b1; d1++) + for (d2 = a2; d2 < b2; d2++) + u[d1 * 32 + d2] = v[d1 * 32 + d2] + w[d1 * 32 + d2]; + return d1 + d2; +} + +__attribute__((noinline, noclone)) long +f4 (long a1, long b1, long a2, long b2) +{ + long d1, d2; + #pragma omp teams distribute simd default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) collapse(2) + for (d1 = a1; d1 < b1; d1++) + for (d2 = a2; d2 < b2; d2++) + u[d1 * 32 + d2] = v[d1 * 32 + d2] + w[d1 * 32 + d2]; + return d1 + d2; +} + +int +main () +{ + if (f1 (0, 1024) != 1024 + || f2 (0, 1024, 17) != 1024 + (17 + 5 * 1023) + || f3 (0, 32, 0, 32) != 64 + || f4 (0, 32, 0, 32) != 64) + __builtin_abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/pr66199-13.c b/libgomp/testsuite/libgomp.c-c++-common/pr66199-13.c new file mode 100644 index 00000000000..2f41a3860f4 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/pr66199-13.c @@ -0,0 +1,64 @@ +/* PR middle-end/66199 */ +/* { dg-do run } */ + +int u[1024], v[1024], w[1024]; + +__attribute__((noinline, noclone)) long +f1 (long a, long b) +{ + long d; + #pragma omp teams default(none) shared(a, b, d, u, v, w) + #pragma omp distribute simd firstprivate (a, b) + for (d = a; d < b; d++) + u[d] = v[d] + w[d]; + return d; +} + +__attribute__((noinline, noclone)) long +f2 (long a, long b, long c) +{ + long d, e; + #pragma omp teams default(none) firstprivate (a, b, c) shared(d, e, u, v, w) + #pragma omp distribute simd linear(d) lastprivate(e) + for (d = a; d < b; d++) + { + u[d] = v[d] + w[d]; + e = c + d * 5; + } + return d + e; +} + +__attribute__((noinline, noclone)) long +f3 (long a1, long b1, long a2, long b2) +{ + long d1, d2; + #pragma omp teams default(none) shared(a1, b1, a2, b2, d1, d2, u, v, w) + #pragma omp distribute simd firstprivate (a1, b1, a2, b2) lastprivate(d1, d2) collapse(2) + for (d1 = a1; d1 < b1; d1++) + for (d2 = a2; d2 < b2; d2++) + u[d1 * 32 + d2] = v[d1 * 32 + d2] + w[d1 * 32 + d2]; + return d1 + d2; +} + +__attribute__((noinline, noclone)) long +f4 (long a1, long b1, long a2, long b2) +{ + long d1, d2; + #pragma omp teams default(none) firstprivate (a1, b1, a2, b2) shared(d1, d2, u, v, w) + #pragma omp distribute simd collapse(2) + for (d1 = a1; d1 < b1; d1++) + for (d2 = a2; d2 < b2; d2++) + u[d1 * 32 + d2] = v[d1 * 32 + d2] + w[d1 * 32 + d2]; + return d1 + d2; +} + +int +main () +{ + if (f1 (0, 1024) != 1024 + || f2 (0, 1024, 17) != 1024 + (17 + 5 * 1023) + || f3 (0, 32, 0, 32) != 64 + || f4 (0, 32, 0, 32) != 64) + __builtin_abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/pr66199-14.c b/libgomp/testsuite/libgomp.c-c++-common/pr66199-14.c new file mode 100644 index 00000000000..21936bfafaf --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/pr66199-14.c @@ -0,0 +1,39 @@ +/* PR middle-end/66199 */ +/* { dg-do run } */ + +int u[1024], v[1024], w[1024]; + +__attribute__((noinline, noclone)) long +f2 (long a, long b, long c) +{ + long d, e; + #pragma omp teams default(none) firstprivate (a, b, c) shared(d, e, u, v, w) + #pragma omp distribute lastprivate(d, e) + for (d = a; d < b; d++) + { + u[d] = v[d] + w[d]; + e = c + d * 5; + } + return d + e; +} + +__attribute__((noinline, noclone)) long +f3 (long a1, long b1, long a2, long b2) +{ + long d1, d2; + #pragma omp teams default(none) shared(a1, b1, a2, b2, d1, d2, u, v, w) + #pragma omp distribute firstprivate (a1, b1, a2, b2) lastprivate(d1, d2) collapse(2) + for (d1 = a1; d1 < b1; d1++) + for (d2 = a2; d2 < b2; d2++) + u[d1 * 32 + d2] = v[d1 * 32 + d2] + w[d1 * 32 + d2]; + return d1 + d2; +} + +int +main () +{ + if (f2 (0, 1024, 17) != 1024 + (17 + 5 * 1023) + || f3 (0, 32, 0, 32) != 64) + __builtin_abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/simd-1.c b/libgomp/testsuite/libgomp.c-c++-common/simd-1.c new file mode 100644 index 00000000000..cce234d8171 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/simd-1.c @@ -0,0 +1,71 @@ +// { dg-do run } +// { dg-additional-options "-msse2" { target sse2_runtime } } +// { dg-additional-options "-mavx" { target avx_runtime } } + +#define N 1024 +int a[N], b[N]; + +int +f1 (void) +{ + int i; + #pragma omp simd private (i) + for (i = 0; i < N; i++) + a[i] = b[i] * 2; + #pragma omp simd lastprivate (i) + for (i = 0; i < N; i++) + a[i] += b[i] * 2; + return i; +} + +int +f2 (void) +{ + int i, j; + #pragma omp simd private (i), collapse (2), lastprivate (j) + for (i = 0; i < 32; i++) + for (j = 0; j < 32; ++j) + a[i * 32 + j] += b[i * 32 + j] * 2; + return j; +} + +int +f3 (void) +{ + static int i; + #pragma omp for simd private (i) + for (i = 0; i < N; ++i) + a[i] = b[i] * 2; + #pragma omp for simd lastprivate (i) + for (i = 0; i < N; ++i) + a[i] += b[i] * 2; + return i; +} + +int +f4 (void) +{ + static int i, j; + #pragma omp for simd private (i)collapse (2)lastprivate (j) + for (i = 0; i < 32; ++i) + for (j = 0; j < 32; j++) + a[i * 32 + j] += b[i * 32 + j] * 2; + return j; +} + +int +main () +{ + int i; + for (i = 0; i < N; ++i) + a[i] = b[i] = i; + if (f1 () != 1024 || f2 () != 32) + __builtin_abort (); + #pragma omp parallel num_threads(4) + if (f3 () != 1024 || f4 () != 32) + __builtin_abort (); + for (i = 0; i < N; ++i) + if (a[i] != 6 * i || b[i] != i) + __builtin_abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-1.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-1.c new file mode 100644 index 00000000000..6c6191d96d5 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-1.c @@ -0,0 +1,58 @@ +#ifdef __cplusplus +extern "C" +#endif +void abort (void); + +int a; +long int b = 1; + +void +foo (void) +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: a) in_reduction (*: b) + { + a += 7; + b *= 2; + } +} + +int +main () +{ + int c = 0; + #pragma omp parallel + #pragma omp single + { + long int d = 1; + #pragma omp taskgroup task_reduction (+: a, c) task_reduction (*: b, d) + { + int i; + for (i = 0; i < 4; i++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b, d) + { + int j; + a += 7; + b *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b, d) + { + a += 7; + b *= 2; + c += 9; + d *= 3; + foo (); + } + c += 9; + d *= 3; + } + } +#define THREEP4 (3L * 3L * 3L * 3L) + if (d != (THREEP4 * THREEP4 * THREEP4)) + abort (); + } + if (a != 28 * 7 || b != (1L << 28) || c != 12 * 9) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-11.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-11.c new file mode 100644 index 00000000000..038b0e269e7 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-11.c @@ -0,0 +1,56 @@ +extern +#ifdef __cplusplus +"C" +#endif +void abort (void); +int a, b[3] = { 1, 1, 1 }; +unsigned long int c[2] = { ~0UL, ~0UL }; + +void +bar (int i) +{ + #pragma omp task in_reduction (*: b[:3]) in_reduction (&: c[1:]) \ + in_reduction (+: a) + { + a += 4; + b[1] *= 4; + c[1] &= ~(1UL << (i + 16)); + } +} + +void +foo (unsigned long long int x, unsigned long long int y, unsigned long long int z) +{ + unsigned long long int i; + #pragma omp for schedule(runtime) reduction (task, +: a) \ + reduction (task, *: b) reduction (task, &: c[1:1]) + for (i = x; i < y; i += z) + { + a++; + b[0] *= 2; + bar (i); + b[2] *= 3; + c[1] &= ~(1UL << i); + } +} + +int +main () +{ + volatile int two = 2; + foo (two, 7 * two, two); + if (a != 30 || b[0] != 64 || b[1] != (1 << 12) || b[2] != 3 * 3 * 3 * 3 * 3 * 3 + || c[0] != ~0UL || c[1] != ~0x15541554UL) + abort (); + a = 0; + b[0] = 1; + b[1] = 1; + b[2] = 1; + c[1] = ~0UL; + #pragma omp parallel + foo (two, 8 * two, two); + if (a != 35 || b[0] != 128 || b[1] != (1 << 14) || b[2] != 3 * 3 * 3 * 3 * 3 * 3 * 3 + || c[0] != ~0UL || c[1] != ~0x55545554UL) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-12.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-12.c new file mode 100644 index 00000000000..0ad92735ca7 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-12.c @@ -0,0 +1,67 @@ +extern +#ifdef __cplusplus +"C" +#endif +void abort (void); +int a, b[3] = { 1, 1, 1 }; +unsigned long int c[2] = { ~0UL, ~0UL }; + +void +bar (int i) +{ + #pragma omp task in_reduction (*: b[:3]) in_reduction (&: c[1:]) \ + in_reduction (+: a) + { + a += 4; + b[1] *= 4; + c[1] &= ~(1UL << (i + 16)); + } +} + +void +foo (int x) +{ + #pragma omp sections reduction (task, +: a) reduction (task, *: b) \ + reduction (task, &: c[1:1]) + { + { + a++; b[0] *= 2; bar (2); b[2] *= 3; c[1] &= ~(1UL << 2); + } + #pragma omp section + { b[0] *= 2; bar (4); b[2] *= 3; c[1] &= ~(1UL << 4); a++; } + #pragma omp section + { bar (6); b[2] *= 3; c[1] &= ~(1UL << 6); a++; b[0] *= 2; } + #pragma omp section + { b[2] *= 3; c[1] &= ~(1UL << 8); a++; b[0] *= 2; bar (8); } + #pragma omp section + { c[1] &= ~(1UL << 10); a++; b[0] *= 2; bar (10); b[2] *= 3; } + #pragma omp section + { a++; b[0] *= 2; b[2] *= 3; c[1] &= ~(1UL << 12); bar (12); } + #pragma omp section + if (x) + { + a++; b[0] *= 2; b[2] *= 3; bar (14); c[1] &= ~(1UL << 14); + } + } +} + +int +main () +{ + volatile int one = 1; + foo (!one); + if (a != 30 || b[0] != 64 || b[1] != (1 << 12) || b[2] != 3 * 3 * 3 * 3 * 3 * 3 + || c[0] != ~0UL || c[1] != ~0x15541554UL) + abort (); + a = 0; + b[0] = 1; + b[1] = 1; + b[2] = 1; + c[1] = ~0UL; + #pragma omp parallel + foo (one); + if (a != 35 || b[0] != 128 || b[1] != (1 << 14) || b[2] != 3 * 3 * 3 * 3 * 3 * 3 * 3 + || c[0] != ~0UL || c[1] != ~0x55545554UL) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-2.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-2.c new file mode 100644 index 00000000000..aad725c29e3 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-2.c @@ -0,0 +1,90 @@ +#ifdef __cplusplus +extern "C" +#endif +void abort (void); + +struct S { long int s, t; }; + +void +bar (struct S *p, struct S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +baz (struct S *o, struct S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: struct S : omp_out.s += omp_in.s) initializer (omp_priv = { 0, 3 }) +#pragma omp declare reduction (*: struct S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig)) + +struct S a = { 0, 7 }; +struct S b = { 1, 5 }; + +void +foo (void) +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (*: b) in_reduction (+: a) + { + a.s += 7; + b.s *= 2; + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9)) + abort (); + } +} + +int +main () +{ + struct S c = { 0, 7 }; + #pragma omp parallel + #pragma omp single + { + struct S d = { 1, 5 }; + #pragma omp taskgroup task_reduction (+: a, c) task_reduction (*: b, d) + { + int i; + for (i = 0; i < 4; i++) + #pragma omp task in_reduction (*: b, d) in_reduction (+: a, c) + { + int j; + a.s += 7; + b.s *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a) in_reduction (*: b) \ + in_reduction (+: c) in_reduction (*: d) + { + a.s += 7; + b.s *= 2; + c.s += 9; + d.s *= 3; + foo (); + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } + c.s += 9; + d.s *= 3; + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } + } +#define THREEP4 (3L * 3L * 3L * 3L) + if (d.s != (THREEP4 * THREEP4 * THREEP4) || d.t != 5) + abort (); + } + if (a.s != 28 * 7 || a.t != 7 || b.s != (1L << 28) || b.t != 5 + || c.s != 12 * 9 || c.t != 7) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-3.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-3.c new file mode 100644 index 00000000000..8a90e86e847 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-3.c @@ -0,0 +1,218 @@ +#ifdef __cplusplus +extern "C" +#endif +void abort (void); + +int a[2]; +long long int b[7] = { 9, 11, 1, 1, 1, 13, 15 }; +int e[3] = { 5, 0, 5 }; +int f[5] = { 6, 7, 0, 0, 9 }; +int g[4] = { 1, 0, 0, 2 }; +int h[3] = { 0, 1, 4 }; +int k[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; +long long *s; +long long (*t)[2]; + +void +foo (int n, int *c, long long int *d, int m[3], int *r, int o[4], int *p, int q[4][2]) +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + a[0] += 7; + a[1] += 17; + b[2] *= 2; + b[4] *= 2; + c[0] += 6; + d[1] *= 2; + e[1] += 19; + f[2] += 21; + f[3] += 23; + g[1] += 25; + g[2] += 27; + h[0] += 29; + k[1][0] += 31; + k[2][1] += 33; + m[1] += 19; + r[2] += 21; + r[3] += 23; + o[1] += 25; + o[2] += 27; + p[0] += 29; + q[1][0] += 31; + q[2][1] += 33; + s[1] *= 2; + t[2][0] *= 2; + t[3][1] *= 2; + } +} + +void +test (int n) +{ + int c[2] = { 0, 0 }; + int p[3] = { 0, 1, 4 }; + int q[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; + long long ss[4] = { 5, 1, 1, 6 }; + long long tt[5][2] = { { 9, 10 }, { 11, 12 }, { 1, 1 }, { 1, 1 }, { 13, 14 } }; + s = ss; + t = tt; + #pragma omp parallel + #pragma omp single + { + long long int d[] = { 1, 1 }; + int m[3] = { 5, 0, 5 }; + int r[5] = { 6, 7, 0, 0, 9 }; + int o[4] = { 1, 0, 0, 2 }; + #pragma omp taskgroup task_reduction (+: a, c) task_reduction (*: b[2 * n:3 * n], d) \ + task_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + task_reduction (+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + task_reduction (*: t[2:2][:], s[1:n + 1]) + { + int i; + for (i = 0; i < 4; i++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \ + in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \ + in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + int j; + a[0] += 2; + a[1] += 3; + b[2] *= 2; + f[3] += 8; + g[1] += 9; + g[2] += 10; + h[0] += 11; + k[1][1] += 13; + k[2][1] += 15; + m[1] += 16; + r[2] += 8; + s[1] *= 2; + t[2][1] *= 2; + t[3][1] *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) + { + m[1] += 6; + r[2] += 7; + q[1][0] += 17; + q[2][0] += 19; + a[0] += 4; + a[1] += 5; + b[3] *= 2; + b[4] *= 2; + f[3] += 18; + g[1] += 29; + g[2] += 18; + h[0] += 19; + s[2] *= 2; + t[2][0] *= 2; + t[3][0] *= 2; + foo (n, c, d, m, r, o, p, q); + r[3] += 18; + o[1] += 29; + o[2] += 18; + p[0] += 19; + c[0] += 4; + c[1] += 5; + d[0] *= 2; + e[1] += 6; + f[2] += 7; + k[1][0] += 17; + k[2][0] += 19; + } + r[3] += 8; + o[1] += 9; + o[2] += 10; + p[0] += 11; + q[1][1] += 13; + q[2][1] += 15; + b[3] *= 2; + c[0] += 4; + c[1] += 9; + d[0] *= 2; + e[1] += 16; + f[2] += 8; + } + } + if (d[0] != 1LL << (8 + 4) + || d[1] != 1LL << 16 + || m[0] != 5 + || m[1] != 19 * 16 + 6 * 8 + 16 * 4 + || m[2] != 5 + || r[0] != 6 + || r[1] != 7 + || r[2] != 21 * 16 + 7 * 8 + 8 * 4 + || r[3] != 23 * 16 + 18 * 8 + 8 * 4 + || r[4] != 9 + || o[0] != 1 + || o[1] != 25 * 16 + 29 * 8 + 9 * 4 + || o[2] != 27 * 16 + 18 * 8 + 10 * 4 + || o[3] != 2) + abort (); + } + if (a[0] != 7 * 16 + 4 * 8 + 2 * 4 + || a[1] != 17 * 16 + 5 * 8 + 3 * 4 + || b[0] != 9 || b[1] != 11 + || b[2] != 1LL << (16 + 4) + || b[3] != 1LL << (8 + 4) + || b[4] != 1LL << (16 + 8) + || b[5] != 13 || b[6] != 15 + || c[0] != 6 * 16 + 4 * 8 + 4 * 4 + || c[1] != 5 * 8 + 9 * 4 + || e[0] != 5 + || e[1] != 19 * 16 + 6 * 8 + 16 * 4 + || e[2] != 5 + || f[0] != 6 + || f[1] != 7 + || f[2] != 21 * 16 + 7 * 8 + 8 * 4 + || f[3] != 23 * 16 + 18 * 8 + 8 * 4 + || f[4] != 9 + || g[0] != 1 + || g[1] != 25 * 16 + 29 * 8 + 9 * 4 + || g[2] != 27 * 16 + 18 * 8 + 10 * 4 + || g[3] != 2 + || h[0] != 29 * 16 + 19 * 8 + 11 * 4 + || h[1] != 1 || h[2] != 4 + || k[0][0] != 5 || k[0][1] != 6 + || k[1][0] != 31 * 16 + 17 * 8 + || k[1][1] != 13 * 4 + || k[2][0] != 19 * 8 + || k[2][1] != 33 * 16 + 15 * 4 + || k[3][0] != 7 || k[3][1] != 8 + || p[0] != 29 * 16 + 19 * 8 + 11 * 4 + || p[1] != 1 || p[2] != 4 + || q[0][0] != 5 || q[0][1] != 6 + || q[1][0] != 31 * 16 + 17 * 8 + || q[1][1] != 13 * 4 + || q[2][0] != 19 * 8 + || q[2][1] != 33 * 16 + 15 * 4 + || q[3][0] != 7 || q[3][1] != 8 + || ss[0] != 5 + || ss[1] != 1LL << (16 + 4) + || ss[2] != 1LL << 8 + || ss[3] != 6 + || tt[0][0] != 9 || tt[0][1] != 10 || tt[1][0] != 11 || tt[1][1] != 12 + || tt[2][0] != 1LL << (16 + 8) + || tt[2][1] != 1LL << 4 + || tt[3][0] != 1LL << 8 + || tt[3][1] != 1LL << (16 + 4) + || tt[4][0] != 13 || tt[4][1] != 14) + abort (); +} + +int +main () +{ + test (1); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-4.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-4.c new file mode 100644 index 00000000000..b0e5197623b --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-4.c @@ -0,0 +1,70 @@ +extern +#ifdef __cplusplus +"C" +#endif +void abort (void); + +void +bar (long long int *p) +{ + p[0] *= 2; + #pragma omp task in_reduction (*: p[0]) + p[0] *= 3; +} + +void +foo (long long int *p, long long int *q) +{ + #pragma omp taskgroup task_reduction (*: p[0]) + { + #pragma omp task in_reduction (*: p[0]) + bar (p); + #pragma omp task in_reduction (*: p[0]) + bar (p); + bar (p); + #pragma omp taskgroup task_reduction (*: q[0]) + { + #pragma omp task in_reduction (*: q[0]) + bar (q); + #pragma omp task in_reduction (*: q[0]) + bar (q); + #pragma omp task in_reduction (*: q[0]) + bar (q); + bar (q); + #pragma omp task in_reduction (*: p[0]) + { + #pragma omp taskgroup task_reduction (*: p[0]) + { + #pragma omp task in_reduction (*: p[0]) + bar (p); + p[0] *= 2; + #pragma omp task in_reduction (*: p[0]) + bar (p); + } + } + } + } +} + +int +main () +{ + long long int p = 1LL, q = 1LL; + foo (&p, &q); + if (p != 6LL * 6LL * 6LL * 6LL * 6LL * 2LL || q != 6LL * 6LL * 6LL * 6LL) + abort (); + p = 1LL; + q = 1LL; + #pragma omp taskgroup + foo (&p, &q); + if (p != 6LL * 6LL * 6LL * 6LL * 6LL * 2LL || q != 6LL * 6LL * 6LL * 6LL) + abort (); + p = 1LL; + q = 1LL; + #pragma omp parallel + #pragma omp single + foo (&p, &q); + if (p != 6LL * 6LL * 6LL * 6LL * 6LL * 2LL || q != 6LL * 6LL * 6LL * 6LL) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-5.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-5.c new file mode 100644 index 00000000000..018dc4fb28e --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-5.c @@ -0,0 +1,65 @@ +typedef __SIZE_TYPE__ size_t; +extern +#ifdef __cplusplus +"C" +#endif +void abort (void); + +int *q; + +void +bar (int *p, int *r, int *t, int s, size_t u) +{ + #pragma omp task in_reduction (*: p[0], q[0], r[s - 1], t[0:u + 1]) + { + *p *= 4; + *q *= 5; + r[s - 1] *= 6; + t[0] *= 8; + t[1] *= 9; + } +} + +void +foo (int *p, int *r, int *t, int s, size_t u) +{ + int *p2 = p; + #pragma omp taskgroup task_reduction (*: p[0], q[0], r[s], t[0:u + 1]) + { + p = (int *) 0; + s++; + bar (p2, r, t, s, u); + r++; + #pragma omp taskwait + #pragma omp task in_reduction (*: p2[0], q[0], r[s - 2], t[0:u + 1]) + { + *p2 *= 2; + *q *= 3; + r[s - 2] *= 7; + t[0] *= 10; + t[1] *= 11; + } + u = (~(size_t) 0) / 4; + s++; + p2 = (int *) 0; + q = (int *) 0; + r = (int *) 0; + t = (int *) 0; + } +} + +int +main () +{ + int a = 1, b = 1, c[2] = { 1, 0 }, d[3] = { 1, 1, -1 }; + volatile int zero; + zero = 0; + q = &b; + #pragma omp parallel num_threads (2) + #pragma omp master + foo (&a, &c[0], &d[0], zero, zero + 1); + if (a != 8 || b != 15 || c[0] != 42 || c[1] != 0 + || d[0] != 80 || d[1] != 99 || d[2] != -1) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-6.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-6.c new file mode 100644 index 00000000000..09cbea66542 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-6.c @@ -0,0 +1,125 @@ +#include <omp.h> +#include <stdlib.h> + +struct S { unsigned long long int s, t; }; + +void +rbar (struct S *p, struct S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +rbaz (struct S *o, struct S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: struct S : omp_out.s += omp_in.s) \ + initializer (omp_priv = { 0, 3 }) +#pragma omp declare reduction (*: struct S : rbaz (&omp_out, &omp_in)) \ + initializer (rbar (&omp_priv, &omp_orig)) + +struct S g = { 0, 7 }; +struct S h = { 1, 5 }; + +int +foo (int *a, int *b) +{ + int x = 0; + #pragma omp taskloop reduction (+:x) in_reduction (+:b[0]) + for (int i = 0; i < 64; i++) + { + x += a[i]; + *b += a[i] * 2; + } + return x; +} + +unsigned long long int +bar (int *a, unsigned long long int *b) +{ + unsigned long long int x = 1; + #pragma omp taskloop reduction (*:x) in_reduction (*:b[0]) + for (int i = 0; i < 64; i++) + { + #pragma omp task in_reduction (*:x) + x *= a[i]; + #pragma omp task in_reduction (*:b[0]) + *b *= (3 - a[i]); + } + return x; +} + +void +baz (int i, int *a, int *c) +{ + #pragma omp task in_reduction (*:h) in_reduction (+:g) + { + g.s += 7 * a[i]; + h.s *= (3 - c[i]); + if ((g.t != 7 && g.t != 3) || (h.t != 5 && h.t != 9)) + abort (); + } +} + +int +main () +{ + int i, j, a[64], b = 0, c[64]; + unsigned long long int d = 1, e; + int r = 0, t; + struct S m = { 0, 7 }; + struct S n = { 1, 5 }; + for (i = 0; i < 64; i++) + { + a[i] = 2 * i; + c[i] = 1 + ((i % 3) != 1); + } + #pragma omp parallel reduction (task, +:b) reduction(+:r) \ + reduction(task,*:d) reduction (task, +: g, m) \ + reduction (task, *: h, n) shared(t) + { + #pragma omp master + { + j = foo (a, &b); + t = omp_get_num_threads (); + } + r++; + #pragma omp single nowait + e = bar (c, &d); + #pragma omp master + #pragma omp taskloop in_reduction (+: g, m) in_reduction (*: h, n) + for (i = 0; i < 64; ++i) + { + g.s += 3 * a[i]; + h.s *= (3 - c[i]); + m.s += 4 * a[i]; + n.s *= c[i]; + if ((g.t != 7 && g.t != 3) || (h.t != 5 && h.t != 9) + || (m.t != 7 && m.t != 3) || (n.t != 5 && n.t != 9)) + abort (); + baz (i, a, c); + } + } + if (n.s != (1ULL << 43) || n.t != 5) + abort (); + if (j != 63 * 64 || b != 63 * 64 * 2) + abort (); + if (e != (1ULL << 43) || d != (1ULL << 21)) + abort (); + if (g.s != 63 * 64 * 10 || g.t != 7) + abort (); + if (h.s != (1ULL << 42) || h.t != 5) + abort (); + if (m.s != 63 * 64 * 4 || m.t != 7) + abort (); + if (r != t) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-7.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-7.c new file mode 100644 index 00000000000..c656f5ff00b --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-7.c @@ -0,0 +1,216 @@ +#ifdef __cplusplus +extern "C" +#endif +void abort (void); + +int a[2]; +long long int b[7] = { 9, 11, 1, 1, 1, 13, 15 }; +int e[3] = { 5, 0, 5 }; +int f[5] = { 6, 7, 0, 0, 9 }; +int g[4] = { 1, 0, 0, 2 }; +int h[3] = { 0, 1, 4 }; +int k[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; +long long *s; +long long (*t)[2]; + +void +foo (int n, int *c, long long int *d, int m[3], int *r, int o[4], int *p, int q[4][2]) +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + a[0] += 7; + a[1] += 17; + b[2] *= 2; + b[4] *= 2; + c[0] += 6; + d[1] *= 2; + e[1] += 19; + f[2] += 21; + f[3] += 23; + g[1] += 25; + g[2] += 27; + h[0] += 29; + k[1][0] += 31; + k[2][1] += 33; + m[1] += 19; + r[2] += 21; + r[3] += 23; + o[1] += 25; + o[2] += 27; + p[0] += 29; + q[1][0] += 31; + q[2][1] += 33; + s[1] *= 2; + t[2][0] *= 2; + t[3][1] *= 2; + } +} + +void +test (int n) +{ + int c[2] = { 0, 0 }; + int p[3] = { 0, 1, 4 }; + int q[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; + long long ss[4] = { 5, 1, 1, 6 }; + long long tt[5][2] = { { 9, 10 }, { 11, 12 }, { 1, 1 }, { 1, 1 }, { 13, 14 } }; + long long int d[] = { 1, 1 }; + int m[3] = { 5, 0, 5 }; + int r[5] = { 6, 7, 0, 0, 9 }; + int o[4] = { 1, 0, 0, 2 }; + s = ss; + t = tt; + #pragma omp parallel reduction (task, +: a, c) reduction (task, *: b[2 * n:3 * n], d) \ + reduction (task, +: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + reduction (task, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + reduction (task, *: t[2:2][:], s[1:n + 1]) num_threads(4) + { + int i; + #pragma omp for + for (i = 0; i < 4; i++) + { + #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \ + in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \ + in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + int j; + a[0] += 2; + a[1] += 3; + b[2] *= 2; + f[3] += 8; + g[1] += 9; + g[2] += 10; + h[0] += 11; + k[1][1] += 13; + k[2][1] += 15; + m[1] += 16; + r[2] += 8; + s[1] *= 2; + t[2][1] *= 2; + t[3][1] *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) + { + m[1] += 6; + r[2] += 7; + q[1][0] += 17; + q[2][0] += 19; + a[0] += 4; + a[1] += 5; + b[3] *= 2; + b[4] *= 2; + f[3] += 18; + g[1] += 29; + g[2] += 18; + h[0] += 19; + s[2] *= 2; + t[2][0] *= 2; + t[3][0] *= 2; + foo (n, c, d, m, r, o, p, q); + r[3] += 18; + o[1] += 29; + o[2] += 18; + p[0] += 19; + c[0] += 4; + c[1] += 5; + d[0] *= 2; + e[1] += 6; + f[2] += 7; + k[1][0] += 17; + k[2][0] += 19; + } + r[3] += 8; + o[1] += 9; + o[2] += 10; + p[0] += 11; + q[1][1] += 13; + q[2][1] += 15; + b[3] *= 2; + c[0] += 4; + c[1] += 9; + d[0] *= 2; + e[1] += 16; + f[2] += 8; + } + } + } + if (a[0] != 7 * 16 + 4 * 8 + 2 * 4 + || a[1] != 17 * 16 + 5 * 8 + 3 * 4 + || b[0] != 9 || b[1] != 11 + || b[2] != 1LL << (16 + 4) + || b[3] != 1LL << (8 + 4) + || b[4] != 1LL << (16 + 8) + || b[5] != 13 || b[6] != 15 + || c[0] != 6 * 16 + 4 * 8 + 4 * 4 + || c[1] != 5 * 8 + 9 * 4 + || d[0] != 1LL << (8 + 4) + || d[1] != 1LL << 16 + || e[0] != 5 + || e[1] != 19 * 16 + 6 * 8 + 16 * 4 + || e[2] != 5 + || f[0] != 6 + || f[1] != 7 + || f[2] != 21 * 16 + 7 * 8 + 8 * 4 + || f[3] != 23 * 16 + 18 * 8 + 8 * 4 + || f[4] != 9 + || g[0] != 1 + || g[1] != 25 * 16 + 29 * 8 + 9 * 4 + || g[2] != 27 * 16 + 18 * 8 + 10 * 4 + || g[3] != 2 + || h[0] != 29 * 16 + 19 * 8 + 11 * 4 + || h[1] != 1 || h[2] != 4 + || k[0][0] != 5 || k[0][1] != 6 + || k[1][0] != 31 * 16 + 17 * 8 + || k[1][1] != 13 * 4 + || k[2][0] != 19 * 8 + || k[2][1] != 33 * 16 + 15 * 4 + || k[3][0] != 7 || k[3][1] != 8 + || m[0] != 5 + || m[1] != 19 * 16 + 6 * 8 + 16 * 4 + || m[2] != 5 + || o[0] != 1 + || o[1] != 25 * 16 + 29 * 8 + 9 * 4 + || o[2] != 27 * 16 + 18 * 8 + 10 * 4 + || o[3] != 2 + || p[0] != 29 * 16 + 19 * 8 + 11 * 4 + || p[1] != 1 || p[2] != 4 + || q[0][0] != 5 || q[0][1] != 6 + || q[1][0] != 31 * 16 + 17 * 8 + || q[1][1] != 13 * 4 + || q[2][0] != 19 * 8 + || q[2][1] != 33 * 16 + 15 * 4 + || q[3][0] != 7 || q[3][1] != 8 + || r[0] != 6 + || r[1] != 7 + || r[2] != 21 * 16 + 7 * 8 + 8 * 4 + || r[3] != 23 * 16 + 18 * 8 + 8 * 4 + || r[4] != 9 + || ss[0] != 5 + || ss[1] != 1LL << (16 + 4) + || ss[2] != 1LL << 8 + || ss[3] != 6 + || tt[0][0] != 9 || tt[0][1] != 10 || tt[1][0] != 11 || tt[1][1] != 12 + || tt[2][0] != 1LL << (16 + 8) + || tt[2][1] != 1LL << 4 + || tt[3][0] != 1LL << 8 + || tt[3][1] != 1LL << (16 + 4) + || tt[4][0] != 13 || tt[4][1] != 14) + abort (); +} + +int +main () +{ + test (1); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-8.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-8.c new file mode 100644 index 00000000000..7b0859db6f0 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-8.c @@ -0,0 +1,141 @@ +#include <omp.h> +#include <stdlib.h> + +struct S { unsigned long long int s, t; }; + +void +rbar (struct S *p, struct S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +rbaz (struct S *o, struct S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: struct S : omp_out.s += omp_in.s) \ + initializer (omp_priv = { 0, 3 }) +#pragma omp declare reduction (*: struct S : rbaz (&omp_out, &omp_in)) \ + initializer (rbar (&omp_priv, &omp_orig)) + +struct S g = { 0, 7 }; +struct S h = { 1, 5 }; + +int +foo (int z, int *a, int *b) +{ + int x = 0; + #pragma omp taskloop reduction (+:x) in_reduction (+:b[0]) + for (int i = z; i < z + 8; i++) + { + x += a[i]; + *b += a[i] * 2; + } + return x; +} + +unsigned long long int +bar (int z, int *a, unsigned long long int *b, int *s) +{ + unsigned long long int x = 1; + #pragma omp taskloop reduction (*:x) in_reduction (*:b[0]) + for (int i = z; i < z + 8; i++) + { + #pragma omp task in_reduction (*:x) + x *= a[i]; + #pragma omp task in_reduction (*:b[0]) + *b *= (3 - a[i]); + s[0]++; + } + return x; +} + +void +baz (int i, int *a, int *c) +{ + #pragma omp task in_reduction (*:h) in_reduction (+:g) + { + g.s += 7 * a[i]; + h.s *= (3 - c[i]); + if ((g.t != 7 && g.t != 3) || (h.t != 5 && h.t != 9)) + abort (); + } +} + +int +main () +{ + int i, j = 0, a[64], b = 0, c[64], f = 0; + unsigned long long int d = 1, e = 1; + volatile int one = 1; + int r = 0, s = 0, t; + struct S m = { 0, 7 }; + struct S n = { 1, 5 }; + for (i = 0; i < 64; i++) + { + a[i] = 2 * i; + c[i] = 1 + ((i % 3) != 1); + } + #pragma omp parallel reduction (task, +:b) shared(t) reduction(+:r, s) + { + int z, q1, q2, q3; + #pragma omp master + t = omp_get_num_threads (); + #pragma omp for schedule(static) reduction (task, +: f) reduction (+: j) + for (z = 0; z < 64; z += 8) + { + f++; + j += foo (z, a, &b); + j += foo (z, a, &f); + } + if (j != 63 * 64 * 2 || f != 63 * 64 * 2 + 8) + abort (); + r++; + #pragma omp taskgroup task_reduction (+: s) + { + #pragma omp for schedule(static, 1) reduction(task, *: d) reduction (*: e) + for (z = 0; z < 64; z += 8) + e *= bar (z, c, &d, &s); + } + if (e != (1ULL << 43) || d != (1ULL << 21)) + abort (); + #pragma omp for schedule(monotonic: dynamic, 1) reduction (task, +: g, m) \ + reduction (task, *: h, n) collapse(3) + for (q1 = 0; q1 < one; q1++) + for (q2 = 0; q2 < 64; q2 += 8) + for (q3 = 0; q3 < one; ++q3) + #pragma omp taskloop in_reduction (+: g, m) in_reduction (*: h, n) \ + nogroup + for (i = q2; i < q2 + 8; ++i) + { + g.s += 3 * a[i]; + h.s *= (3 - c[i]); + m.s += 4 * a[i]; + n.s *= c[i]; + if ((g.t != 7 && g.t != 3) || (h.t != 5 && h.t != 9) + || (m.t != 7 && m.t != 3) || (n.t != 5 && n.t != 9)) + abort (); + baz (i, a, c); + } + if (n.s != (1ULL << 43) || n.t != 5) + abort (); + if (g.s != 63 * 64 * 10 || g.t != 7) + abort (); + if (h.s != (1ULL << 42) || h.t != 5) + abort (); + if (m.s != 63 * 64 * 4 || m.t != 7) + abort (); + } + if (b != 63 * 64 * 2) + abort (); + if (r != t || s != 64) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-9.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-9.c new file mode 100644 index 00000000000..3d71fef8670 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-9.c @@ -0,0 +1,217 @@ +#ifdef __cplusplus +extern "C" +#endif +void abort (void); + +int a[2]; +long long int b[7] = { 9, 11, 1, 1, 1, 13, 15 }; +int e[3] = { 5, 0, 5 }; +int f[5] = { 6, 7, 0, 0, 9 }; +int g[4] = { 1, 0, 0, 2 }; +int h[3] = { 0, 1, 4 }; +int k[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; +long long *s; +long long (*t)[2]; + +void +foo (int n, int *c, long long int *d, int m[3], int *r, int o[4], int *p, int q[4][2]) +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + a[0] += 7; + a[1] += 17; + b[2] *= 2; + b[4] *= 2; + c[0] += 6; + d[1] *= 2; + e[1] += 19; + f[2] += 21; + f[3] += 23; + g[1] += 25; + g[2] += 27; + h[0] += 29; + k[1][0] += 31; + k[2][1] += 33; + m[1] += 19; + r[2] += 21; + r[3] += 23; + o[1] += 25; + o[2] += 27; + p[0] += 29; + q[1][0] += 31; + q[2][1] += 33; + s[1] *= 2; + t[2][0] *= 2; + t[3][1] *= 2; + } +} + +void +test (int n) +{ + int c[2] = { 0, 0 }; + int p[3] = { 0, 1, 4 }; + int q[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; + long long ss[4] = { 5, 1, 1, 6 }; + long long tt[5][2] = { { 9, 10 }, { 11, 12 }, { 1, 1 }, { 1, 1 }, { 13, 14 } }; + long long int d[] = { 1, 1 }; + int m[3] = { 5, 0, 5 }; + int r[5] = { 6, 7, 0, 0, 9 }; + int o[4] = { 1, 0, 0, 2 }; + s = ss; + t = tt; + #pragma omp parallel num_threads(4) + { + int i; + #pragma omp for reduction (task, +: a, c) reduction (task, *: b[2 * n:3 * n], d) \ + reduction (task, +: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + reduction (task, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + reduction (task, *: t[2:2][:], s[1:n + 1]) \ + schedule(nonmonotonic: runtime) + for (i = 0; i < 4; i++) + { + #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \ + in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \ + in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + int j; + a[0] += 2; + a[1] += 3; + b[2] *= 2; + f[3] += 8; + g[1] += 9; + g[2] += 10; + h[0] += 11; + k[1][1] += 13; + k[2][1] += 15; + m[1] += 16; + r[2] += 8; + s[1] *= 2; + t[2][1] *= 2; + t[3][1] *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) + { + m[1] += 6; + r[2] += 7; + q[1][0] += 17; + q[2][0] += 19; + a[0] += 4; + a[1] += 5; + b[3] *= 2; + b[4] *= 2; + f[3] += 18; + g[1] += 29; + g[2] += 18; + h[0] += 19; + s[2] *= 2; + t[2][0] *= 2; + t[3][0] *= 2; + foo (n, c, d, m, r, o, p, q); + r[3] += 18; + o[1] += 29; + o[2] += 18; + p[0] += 19; + c[0] += 4; + c[1] += 5; + d[0] *= 2; + e[1] += 6; + f[2] += 7; + k[1][0] += 17; + k[2][0] += 19; + } + r[3] += 8; + o[1] += 9; + o[2] += 10; + p[0] += 11; + q[1][1] += 13; + q[2][1] += 15; + b[3] *= 2; + c[0] += 4; + c[1] += 9; + d[0] *= 2; + e[1] += 16; + f[2] += 8; + } + } + } + if (a[0] != 7 * 16 + 4 * 8 + 2 * 4 + || a[1] != 17 * 16 + 5 * 8 + 3 * 4 + || b[0] != 9 || b[1] != 11 + || b[2] != 1LL << (16 + 4) + || b[3] != 1LL << (8 + 4) + || b[4] != 1LL << (16 + 8) + || b[5] != 13 || b[6] != 15 + || c[0] != 6 * 16 + 4 * 8 + 4 * 4 + || c[1] != 5 * 8 + 9 * 4 + || d[0] != 1LL << (8 + 4) + || d[1] != 1LL << 16 + || e[0] != 5 + || e[1] != 19 * 16 + 6 * 8 + 16 * 4 + || e[2] != 5 + || f[0] != 6 + || f[1] != 7 + || f[2] != 21 * 16 + 7 * 8 + 8 * 4 + || f[3] != 23 * 16 + 18 * 8 + 8 * 4 + || f[4] != 9 + || g[0] != 1 + || g[1] != 25 * 16 + 29 * 8 + 9 * 4 + || g[2] != 27 * 16 + 18 * 8 + 10 * 4 + || g[3] != 2 + || h[0] != 29 * 16 + 19 * 8 + 11 * 4 + || h[1] != 1 || h[2] != 4 + || k[0][0] != 5 || k[0][1] != 6 + || k[1][0] != 31 * 16 + 17 * 8 + || k[1][1] != 13 * 4 + || k[2][0] != 19 * 8 + || k[2][1] != 33 * 16 + 15 * 4 + || k[3][0] != 7 || k[3][1] != 8 + || m[0] != 5 + || m[1] != 19 * 16 + 6 * 8 + 16 * 4 + || m[2] != 5 + || o[0] != 1 + || o[1] != 25 * 16 + 29 * 8 + 9 * 4 + || o[2] != 27 * 16 + 18 * 8 + 10 * 4 + || o[3] != 2 + || p[0] != 29 * 16 + 19 * 8 + 11 * 4 + || p[1] != 1 || p[2] != 4 + || q[0][0] != 5 || q[0][1] != 6 + || q[1][0] != 31 * 16 + 17 * 8 + || q[1][1] != 13 * 4 + || q[2][0] != 19 * 8 + || q[2][1] != 33 * 16 + 15 * 4 + || q[3][0] != 7 || q[3][1] != 8 + || r[0] != 6 + || r[1] != 7 + || r[2] != 21 * 16 + 7 * 8 + 8 * 4 + || r[3] != 23 * 16 + 18 * 8 + 8 * 4 + || r[4] != 9 + || ss[0] != 5 + || ss[1] != 1LL << (16 + 4) + || ss[2] != 1LL << 8 + || ss[3] != 6 + || tt[0][0] != 9 || tt[0][1] != 10 || tt[1][0] != 11 || tt[1][1] != 12 + || tt[2][0] != 1LL << (16 + 8) + || tt[2][1] != 1LL << 4 + || tt[3][0] != 1LL << 8 + || tt[3][1] != 1LL << (16 + 4) + || tt[4][0] != 13 || tt[4][1] != 14) + abort (); +} + +int +main () +{ + test (1); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/taskloop-reduction-1.c b/libgomp/testsuite/libgomp.c-c++-common/taskloop-reduction-1.c new file mode 100644 index 00000000000..d44b471a372 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/taskloop-reduction-1.c @@ -0,0 +1,119 @@ +extern +#ifdef __cplusplus +"C" +#endif +void abort (void); + +struct S { unsigned long long int s, t; }; + +void +rbar (struct S *p, struct S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +rbaz (struct S *o, struct S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: struct S : omp_out.s += omp_in.s) \ + initializer (omp_priv = { 0, 3 }) +#pragma omp declare reduction (*: struct S : rbaz (&omp_out, &omp_in)) \ + initializer (rbar (&omp_priv, &omp_orig)) + +struct S g = { 0, 7 }; +struct S h = { 1, 5 }; + +int +foo (int *a, int *b) +{ + int x = 0; + #pragma omp taskloop reduction (+:x) in_reduction (+:b[0]) + for (int i = 0; i < 64; i++) + { + x += a[i]; + *b += a[i] * 2; + } + return x; +} + +unsigned long long int +bar (int *a, unsigned long long int *b) +{ + unsigned long long int x = 1; + #pragma omp taskloop reduction (*:x) in_reduction (*:b[0]) + for (int i = 0; i < 64; i++) + { + #pragma omp task in_reduction (*:x) + x *= a[i]; + #pragma omp task in_reduction (*:b[0]) + *b *= (3 - a[i]); + } + return x; +} + +void +baz (int i, int *a, int *c) +{ + #pragma omp task in_reduction (*:h) in_reduction (+:g) + { + g.s += 7 * a[i]; + h.s *= (3 - c[i]); + if ((g.t != 7 && g.t != 3) || (h.t != 5 && h.t != 9)) + abort (); + } +} + +int +main () +{ + int i, j, a[64], b = 0, c[64]; + unsigned long long int d = 1, e; + struct S m = { 0, 7 }; + for (i = 0; i < 64; i++) + { + a[i] = 2 * i; + c[i] = 1 + ((i % 3) != 1); + } + #pragma omp parallel + #pragma omp master + { + struct S n = { 1, 5 }; + #pragma omp taskgroup task_reduction (+:b) + j = foo (a, &b); + #pragma omp taskgroup task_reduction (*:d) + e = bar (c, &d); + #pragma omp taskloop reduction (+: g, m) reduction (*: h, n) + for (i = 0; i < 64; ++i) + { + g.s += 3 * a[i]; + h.s *= (3 - c[i]); + m.s += 4 * a[i]; + n.s *= c[i]; + if ((g.t != 7 && g.t != 3) || (h.t != 5 && h.t != 9) + || (m.t != 7 && m.t != 3) || (n.t != 5 && n.t != 9)) + abort (); + baz (i, a, c); + } + if (n.s != (1ULL << 43) || n.t != 5) + abort (); + } + if (j != 63 * 64 || b != 63 * 64 * 2) + abort (); + if (e != (1ULL << 43) || d != (1ULL << 21)) + abort (); + if (g.s != 63 * 64 * 10 || g.t != 7) + abort (); + if (h.s != (1ULL << 42) || h.t != 5) + abort (); + if (m.s != 63 * 64 * 4 || m.t != 7) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/taskloop-reduction-2.c b/libgomp/testsuite/libgomp.c-c++-common/taskloop-reduction-2.c new file mode 100644 index 00000000000..8fc05dc668e --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/taskloop-reduction-2.c @@ -0,0 +1,212 @@ +#ifdef __cplusplus +extern "C" +#endif +void abort (void); + +int a[2]; +long long int b[7] = { 9, 11, 1, 1, 1, 13, 15 }; +int e[3] = { 5, 0, 5 }; +int f[5] = { 6, 7, 0, 0, 9 }; +int g[4] = { 1, 0, 0, 2 }; +int h[3] = { 0, 1, 4 }; +int k[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; +long long *s; +long long (*t)[2]; + +void +foo (int n, int *c, long long int *d, int m[3], int *r, int o[4], int *p, int q[4][2]) +{ + int i; + #pragma omp taskloop in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) nogroup + for (i = 0; i < 2; i++) + { + a[0] += 7; + a[1] += 17; + b[2] *= 2; + b[4] *= 2; + c[0] += 6; + d[1] *= 2; + e[1] += 19; + f[2] += 21; + f[3] += 23; + g[1] += 25; + g[2] += 27; + h[0] += 29; + k[1][0] += 31; + k[2][1] += 33; + m[1] += 19; + r[2] += 21; + r[3] += 23; + o[1] += 25; + o[2] += 27; + p[0] += 29; + q[1][0] += 31; + q[2][1] += 33; + s[1] *= 2; + t[2][0] *= 2; + t[3][1] *= 2; + } +} + +void +test (int n) +{ + int c[2] = { 0, 0 }; + int p[3] = { 0, 1, 4 }; + int q[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; + long long ss[4] = { 5, 1, 1, 6 }; + long long tt[5][2] = { { 9, 10 }, { 11, 12 }, { 1, 1 }, { 1, 1 }, { 13, 14 } }; + s = ss; + t = tt; + #pragma omp parallel + #pragma omp single + { + long long int d[] = { 1, 1 }; + int m[3] = { 5, 0, 5 }; + int r[5] = { 6, 7, 0, 0, 9 }; + int o[4] = { 1, 0, 0, 2 }; + int i; + #pragma omp taskloop reduction (+: a, c) reduction (default, *: b[2 * n:3 * n], d) \ + reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + reduction (default, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + reduction (*: t[2:2][:], s[1:n + 1]) + for (i = 0; i < 4; i++) + { + int j; + a[0] += 2; + a[1] += 3; + b[2] *= 2; + f[3] += 8; + g[1] += 9; + g[2] += 10; + h[0] += 11; + k[1][1] += 13; + k[2][1] += 15; + m[1] += 16; + r[2] += 8; + s[1] *= 2; + t[2][1] *= 2; + t[3][1] *= 2; + #pragma omp taskloop in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) nogroup + for (j = 0; j < 2; j++) + { + m[1] += 6; + r[2] += 7; + q[1][0] += 17; + q[2][0] += 19; + a[0] += 4; + a[1] += 5; + b[3] *= 2; + b[4] *= 2; + f[3] += 18; + g[1] += 29; + g[2] += 18; + h[0] += 19; + s[2] *= 2; + t[2][0] *= 2; + t[3][0] *= 2; + foo (n, c, d, m, r, o, p, q); + r[3] += 18; + o[1] += 29; + o[2] += 18; + p[0] += 19; + c[0] += 4; + c[1] += 5; + d[0] *= 2; + e[1] += 6; + f[2] += 7; + k[1][0] += 17; + k[2][0] += 19; + } + r[3] += 8; + o[1] += 9; + o[2] += 10; + p[0] += 11; + q[1][1] += 13; + q[2][1] += 15; + b[3] *= 2; + c[0] += 4; + c[1] += 9; + d[0] *= 2; + e[1] += 16; + f[2] += 8; + } + if (d[0] != 1LL << (8 + 4) + || d[1] != 1LL << 16 + || m[0] != 5 + || m[1] != 19 * 16 + 6 * 8 + 16 * 4 + || m[2] != 5 + || r[0] != 6 + || r[1] != 7 + || r[2] != 21 * 16 + 7 * 8 + 8 * 4 + || r[3] != 23 * 16 + 18 * 8 + 8 * 4 + || r[4] != 9 + || o[0] != 1 + || o[1] != 25 * 16 + 29 * 8 + 9 * 4 + || o[2] != 27 * 16 + 18 * 8 + 10 * 4 + || o[3] != 2) + abort (); + } + if (a[0] != 7 * 16 + 4 * 8 + 2 * 4 + || a[1] != 17 * 16 + 5 * 8 + 3 * 4 + || b[0] != 9 || b[1] != 11 + || b[2] != 1LL << (16 + 4) + || b[3] != 1LL << (8 + 4) + || b[4] != 1LL << (16 + 8) + || b[5] != 13 || b[6] != 15 + || c[0] != 6 * 16 + 4 * 8 + 4 * 4 + || c[1] != 5 * 8 + 9 * 4 + || e[0] != 5 + || e[1] != 19 * 16 + 6 * 8 + 16 * 4 + || e[2] != 5 + || f[0] != 6 + || f[1] != 7 + || f[2] != 21 * 16 + 7 * 8 + 8 * 4 + || f[3] != 23 * 16 + 18 * 8 + 8 * 4 + || f[4] != 9 + || g[0] != 1 + || g[1] != 25 * 16 + 29 * 8 + 9 * 4 + || g[2] != 27 * 16 + 18 * 8 + 10 * 4 + || g[3] != 2 + || h[0] != 29 * 16 + 19 * 8 + 11 * 4 + || h[1] != 1 || h[2] != 4 + || k[0][0] != 5 || k[0][1] != 6 + || k[1][0] != 31 * 16 + 17 * 8 + || k[1][1] != 13 * 4 + || k[2][0] != 19 * 8 + || k[2][1] != 33 * 16 + 15 * 4 + || k[3][0] != 7 || k[3][1] != 8 + || p[0] != 29 * 16 + 19 * 8 + 11 * 4 + || p[1] != 1 || p[2] != 4 + || q[0][0] != 5 || q[0][1] != 6 + || q[1][0] != 31 * 16 + 17 * 8 + || q[1][1] != 13 * 4 + || q[2][0] != 19 * 8 + || q[2][1] != 33 * 16 + 15 * 4 + || q[3][0] != 7 || q[3][1] != 8 + || ss[0] != 5 + || ss[1] != 1LL << (16 + 4) + || ss[2] != 1LL << 8 + || ss[3] != 6 + || tt[0][0] != 9 || tt[0][1] != 10 || tt[1][0] != 11 || tt[1][1] != 12 + || tt[2][0] != 1LL << (16 + 8) + || tt[2][1] != 1LL << 4 + || tt[3][0] != 1LL << 8 + || tt[3][1] != 1LL << (16 + 4) + || tt[4][0] != 13 || tt[4][1] != 14) + abort (); +} + +int +main () +{ + test (1); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/taskloop-reduction-3.c b/libgomp/testsuite/libgomp.c-c++-common/taskloop-reduction-3.c new file mode 100644 index 00000000000..30a83249046 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/taskloop-reduction-3.c @@ -0,0 +1,45 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ +/* { dg-additional-options "-std=c99" { target c } } */ +/* { dg-additional-options "-msse2" { target sse2_runtime } } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define N 1024 +long int u[N], m, n; + +__attribute__((noipa)) void +foo (void) +{ + int i; + #pragma omp taskloop simd reduction (+:m) grainsize (64) + for (i = 0; i < N; ++i) + m += u[i]; +} + +__attribute__((noipa)) void +bar (int x) +{ + #pragma omp taskloop simd in_reduction (+:n) grainsize (64) nogroup + for (int i = (x & 1) * (N / 2); i < (x & 1) * (N / 2) + (N / 2); i++) + n += 2 * u[i]; +} + +int +main () +{ + int i; + for (i = 0; i < N; ++i) + u[i] = i; + #pragma omp parallel master + { + foo (); + #pragma omp taskgroup task_reduction (+:n) + { + bar (0); + bar (1); + } + } + if (m != (long)(N - 1) * (N / 2) || n != (long)(N - 1) * N) + __builtin_abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/taskloop-reduction-4.c b/libgomp/testsuite/libgomp.c-c++-common/taskloop-reduction-4.c new file mode 100644 index 00000000000..c1c29b37414 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/taskloop-reduction-4.c @@ -0,0 +1,65 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ +/* { dg-additional-options "-std=c99" { target c } } */ +/* { dg-additional-options "-msse2" { target sse2_runtime } } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#include <omp.h> +#include <stdlib.h> + +#define N 1024 +long int u[N], m, n, o; + +__attribute__((noipa)) void +foo (void) +{ + int i = -1; + #pragma omp master taskloop simd reduction (+:m) grainsize (64) + for (i = 0; i < N; ++i) + m += u[i]; + if (i != (omp_get_thread_num () ? -1 : N)) + abort (); +} + +__attribute__((noipa)) void +bar (int x) +{ + int i = -1; + #pragma omp master taskloop simd in_reduction (+:n) grainsize (64) + for (i = (x & 1) * (N / 2); i < (x & 1) * (N / 2) + (N / 2); i++) + n += 2 * u[i]; + if (i != (omp_get_thread_num () ? -1 : (x & 1) * (N / 2) + (N / 2))) + abort (); +} + +__attribute__((noipa)) void +baz (void) +{ + int i; + #pragma omp parallel master taskloop simd reduction (+:o) grainsize (64) + for (i = 0; i < N; ++i) + o += u[i]; + if (i != N) + abort (); +} + +int +main () +{ + int i; + for (i = 0; i < N; ++i) + u[i] = i; + #pragma omp parallel + { + foo (); + #pragma omp taskgroup task_reduction (+:n) + { + bar (0); + bar (1); + } + } + baz (); + if (m != (long)(N - 1) * (N / 2) || n != (long)(N - 1) * N || o != m) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/taskwait-depend-1.c b/libgomp/testsuite/libgomp.c-c++-common/taskwait-depend-1.c new file mode 100644 index 00000000000..094b4b3586a --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/taskwait-depend-1.c @@ -0,0 +1,29 @@ +#ifdef __cplusplus +extern "C" +#endif +void abort (void); + +int +main () +{ + int a, b, c, d; + #pragma omp parallel num_threads (4) + #pragma omp single + { + #pragma omp task depend(out : a) + a = 6; + #pragma omp task depend(out : b) + b = 7; + #pragma omp task depend(out : c) + c = 8; + #pragma omp taskwait depend(in : a, c) + d = a + c; + #pragma omp task depend(out : a) + a = 9; + #pragma omp task depend(out : c) + c = 10; + } + if (a != 9 || b != 7 || c != 10 || d != 6 + 8) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/cancel-for-2.c b/libgomp/testsuite/libgomp.c/cancel-for-2.c index 30cfbb1c502..d1d6f86733d 100644 --- a/libgomp/testsuite/libgomp.c/cancel-for-2.c +++ b/libgomp/testsuite/libgomp.c/cancel-for-2.c @@ -20,7 +20,7 @@ foo (int *x) #pragma omp for for (i = 0; i < 1000; ++i) { - #pragma omp cancel for if (x[1]) + #pragma omp cancel for if (cancel: x[1]) #pragma omp atomic v++; } @@ -34,7 +34,7 @@ foo (int *x) #pragma omp for for (i = 0; i < 1000; ++i) { - #pragma omp cancel for if (x[3]) + #pragma omp cancel for if ( cancel : x[3]) #pragma omp atomic v += 2; } @@ -54,7 +54,7 @@ foo (int *x) #pragma omp cancel for if (x[0]) abort (); } - #pragma omp cancel parallel if (omp_get_thread_num () == 2 && x[4]) + #pragma omp cancel parallel if (cancel:omp_get_thread_num () == 2 && x[4]) #pragma omp for for (i = 0; i < 1000; ++i) { diff --git a/libgomp/testsuite/libgomp.c/task-reduction-1.c b/libgomp/testsuite/libgomp.c/task-reduction-1.c new file mode 100644 index 00000000000..224d995bd13 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/task-reduction-1.c @@ -0,0 +1,137 @@ +typedef __SIZE_TYPE__ size_t; +extern void abort (void); + +void +bar (int *a, int *b, int *c, int (*d)[2], int (*e)[4], size_t n, int f[1][n], int g[1][n * 2]) +{ + #pragma omp task in_reduction (*: a[:n], b[3:n], c[n:n], d[0][:n], e[0][1:n], f[0][:], g[0][1:n]) + { + a[0] *= 12; + a[1] *= 13; + b[3] *= 14; + b[4] *= 15; + c[n] *= 16; + c[n + 1] *= 17; + d[0][0] *= 18; + d[0][1] *= 19; + e[0][1] *= 20; + e[0][2] *= 21; + f[0][0] *= 22; + f[0][1] *= 23; + g[0][1] *= 24; + g[0][2] *= 25; + } +} + +void +foo (size_t n, void *x, void *y, int f[1][n], int g[1][n * 2]) +{ + int a[n], b[n + 3], c[2 * n]; + int (*d)[n] = (int (*)[n]) x; + int (*e)[n * 2] = (int (*)[n * 2]) y; + int i; + for (i = 0; i < n; i++) + { + a[i] = 1; + b[i + 3] = 1; + c[i + n] = 1; + d[0][i] = 1; + e[0][i + 1] = 1; + f[0][i] = 1; + g[0][i + 1] = 1; + } + #pragma omp taskgroup task_reduction (*: a, b[3:n], c[n:n], d[0][:n], e[0][1:n], f[0][:], g[0][1:n]) + { + bar (a, b, c, (int (*)[2]) d, (int (*)[4]) e, n, f, g); + #pragma omp task in_reduction (*: a, b[3:n], c[n:n], d[0][:n], e[0][1:n], f[0][:], g[0][1:n]) + { + a[0] *= 2; + a[1] *= 3; + b[3] *= 4; + b[4] *= 5; + c[n] *= 6; + c[n + 1] *= 7; + d[0][0] *= 8; + d[0][1] *= 9; + e[0][1] *= 10; + e[0][2] *= 11; + f[0][0] *= 12; + f[0][1] *= 13; + g[0][1] *= 14; + g[0][2] *= 15; + } + n = 0; + } + if (a[0] != 24 || a[1] != 39 || b[3] != 56 || b[4] != 75) + abort (); + if (c[2] != 96 || c[3] != 119 || d[0][0] != 144 || d[0][1] != 171) + abort (); + if (e[0][1] != 200 || e[0][2] != 231 || f[0][0] != 264 || f[0][1] != 299) + abort (); + if (g[0][1] != 336 || g[0][2] != 375) + abort (); +} + +void +baz (size_t n, void *x, void *y, int f[1][n], int g[1][n * 2]) +{ + int a[n], b[n + 3], c[2 * n]; + int (*d)[n] = (int (*)[n]) x; + int (*e)[n * 2] = (int (*)[n * 2]) y; + int i; + for (i = 0; i < n; i++) + { + a[i] = 1; + b[i + 3] = 1; + c[i + n] = 1; + d[0][i] = 1; + e[0][i + 1] = 1; + f[0][i] = 1; + g[0][i + 1] = 1; + } + #pragma omp parallel num_threads(2) + #pragma omp master + #pragma omp taskgroup task_reduction (*: a, b[3:n], c[n:n], d[0][:n], e[0][1:n], f[0][:], g[0][1:n]) + { + bar (a, b, c, (int (*)[2]) d, (int (*)[4]) e, n, f, g); + #pragma omp task in_reduction (*: a, b[3:n], c[n:n], d[0][:n], e[0][1:n], f[0][:], g[0][1:n]) + { + a[0] *= 2; + a[1] *= 3; + b[3] *= 4; + b[4] *= 5; + c[n] *= 6; + c[n + 1] *= 7; + d[0][0] *= 8; + d[0][1] *= 9; + e[0][1] *= 10; + e[0][2] *= 11; + f[0][0] *= 12; + f[0][1] *= 13; + g[0][1] *= 14; + g[0][2] *= 15; + } + n = 0; + } + if (a[0] != 24 || a[1] != 39 || b[3] != 56 || b[4] != 75) + abort (); + if (c[2] != 96 || c[3] != 119 || d[0][0] != 144 || d[0][1] != 171) + abort (); + if (e[0][1] != 200 || e[0][2] != 231 || f[0][0] != 264 || f[0][1] != 299) + abort (); + if (g[0][1] != 336 || g[0][2] != 375) + abort (); +} + +int +main () +{ + int d[1][2], e[1][4], f[1][2], g[1][4]; + volatile int two; + two = 2; + #pragma omp parallel num_threads (2) + #pragma omp master + foo (two, (void *) d, (void *) e, f, g); + baz (two, (void *) d, (void *) e, f, g); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/task-reduction-2.c b/libgomp/testsuite/libgomp.c/task-reduction-2.c new file mode 100644 index 00000000000..f5d48c36873 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/task-reduction-2.c @@ -0,0 +1,86 @@ +typedef __SIZE_TYPE__ size_t; +extern void abort (void); + +void +bar (int *a, int *b, int *c, int (*d)[2], int (*e)[4], size_t n, int f[1][n], int g[1][n * 2]) +{ + #pragma omp task in_reduction (*: a[:n], b[3:n], c[n:n], d[0][:n], e[0][1:n], f[0][:], g[0][1:n]) + { + a[0] *= 12; + a[1] *= 13; + b[3] *= 14; + b[4] *= 15; + c[n] *= 16; + c[n + 1] *= 17; + d[0][0] *= 18; + d[0][1] *= 19; + e[0][1] *= 20; + e[0][2] *= 21; + f[0][0] *= 22; + f[0][1] *= 23; + g[0][1] *= 24; + g[0][2] *= 25; + } +} + +void +baz (size_t n, void *x, void *y, int f[1][n], int g[1][n * 2]) +{ + int a[n], b[n + 3], c[2 * n]; + int (*d)[n] = (int (*)[n]) x; + int (*e)[n * 2] = (int (*)[n * 2]) y; + int i; + for (i = 0; i < n; i++) + { + a[i] = 1; + b[i + 3] = 1; + c[i + n] = 1; + d[0][i] = 1; + e[0][i + 1] = 1; + f[0][i] = 1; + g[0][i + 1] = 1; + } + #pragma omp parallel num_threads(2) firstprivate (n) \ + reduction (task, *: a, b[3:n], c[n:n], d[0][:n], e[0][1:n], f[0][:], g[0][1:n]) + { + #pragma omp master + bar (a, b, c, (int (*)[2]) d, (int (*)[4]) e, n, f, g); + #pragma omp master + #pragma omp task in_reduction (*: a, b[3:n], c[n:n], d[0][:n], e[0][1:n], f[0][:], g[0][1:n]) + { + a[0] *= 2; + a[1] *= 3; + b[3] *= 4; + b[4] *= 5; + c[n] *= 6; + c[n + 1] *= 7; + d[0][0] *= 8; + d[0][1] *= 9; + e[0][1] *= 10; + e[0][2] *= 11; + f[0][0] *= 12; + f[0][1] *= 13; + g[0][1] *= 14; + g[0][2] *= 15; + } + n = 0; + } + if (a[0] != 24 || a[1] != 39 || b[3] != 56 || b[4] != 75) + abort (); + if (c[2] != 96 || c[3] != 119 || d[0][0] != 144 || d[0][1] != 171) + abort (); + if (e[0][1] != 200 || e[0][2] != 231 || f[0][0] != 264 || f[0][1] != 299) + abort (); + if (g[0][1] != 336 || g[0][2] != 375) + abort (); +} + +int +main () +{ + int d[1][2], e[1][4], f[1][2], g[1][4]; + volatile int two; + two = 2; + baz (two, (void *) d, (void *) e, f, g); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/teams-1.c b/libgomp/testsuite/libgomp.c/teams-1.c new file mode 100644 index 00000000000..c5df8371b9e --- /dev/null +++ b/libgomp/testsuite/libgomp.c/teams-1.c @@ -0,0 +1,27 @@ +#include <omp.h> +#include <stdlib.h> + +int +main () +{ + #pragma omp teams thread_limit (2) + { + if (omp_in_parallel () + || omp_get_level () != 0 + || omp_get_ancestor_thread_num (0) != 0 + || omp_get_ancestor_thread_num (1) != -1) + abort (); + omp_set_dynamic (0); + omp_set_nested (1); + #pragma omp parallel num_threads (2) + { + if (!omp_in_parallel () + || omp_get_level () != 1 + || omp_get_ancestor_thread_num (0) != 0 + || omp_get_ancestor_thread_num (1) != omp_get_thread_num () + || omp_get_ancestor_thread_num (2) != -1) + abort (); + } + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/teams-2.c b/libgomp/testsuite/libgomp.c/teams-2.c new file mode 100644 index 00000000000..2ddf50875c5 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/teams-2.c @@ -0,0 +1,123 @@ +#include <omp.h> +#include <stdlib.h> + +__attribute__((noinline)) +void +foo (int x, int y, int z, int *a, int *b) +{ + if (x == 0) + { + int i, j; + for (i = 0; i < 64; i++) + #pragma omp parallel for shared (a, b) + for (j = 0; j < 32; j++) + foo (3, i, j, a, b); + } + else if (x == 1) + { + int i, j; + #pragma omp distribute dist_schedule (static, 1) + for (i = 0; i < 64; i++) + #pragma omp parallel for shared (a, b) + for (j = 0; j < 32; j++) + foo (3, i, j, a, b); + } + else if (x == 2) + { + int j; + #pragma omp parallel for shared (a, b) + for (j = 0; j < 32; j++) + foo (3, y, j, a, b); + } + else + { + #pragma omp atomic + b[y] += z; + #pragma omp atomic + *a += 1; + } +} + +__attribute__((noinline)) +int +bar (int x, int y, int z) +{ + int a, b[64], i; + a = 8; + for (i = 0; i < 64; i++) + b[i] = i; + foo (x, y, z, &a, b); + if (x == 0) + { + if (a != 8 + 64 * 32) + return 1; + for (i = 0; i < 64; i++) + if (b[i] != i + 31 * 32 / 2) + return 1; + } + else if (x == 1) + { + int c = omp_get_num_teams (); + int d = omp_get_team_num (); + int e = d; + int f = 0; + for (i = 0; i < 64; i++) + if (i == e) + { + if (b[i] != i + 31 * 32 / 2) + return 1; + f++; + e = e + c; + } + else if (b[i] != i) + return 1; + if (a < 8 || a > 8 + f * 32) + return 1; + } + else if (x == 2) + { + if (a != 8 + 32) + return 1; + for (i = 0; i < 64; i++) + if (b[i] != i + (i == y ? 31 * 32 / 2 : 0)) + return 1; + } + else if (x == 3) + { + if (a != 8 + 1) + return 1; + for (i = 0; i < 64; i++) + if (b[i] != i + (i == y ? z : 0)) + return 1; + } + return 0; +} + +int +main () +{ + int i, j, err = 0; + #pragma omp teams reduction(+:err) + err += bar (0, 0, 0); + if (err) + abort (); + #pragma omp teams reduction(+:err) + err += bar (1, 0, 0); + if (err) + abort (); + #pragma omp teams reduction(+:err) + #pragma omp distribute + for (i = 0; i < 64; i++) + err += bar (2, i, 0); + if (err) + abort (); + #pragma omp teams reduction(+:err) + #pragma omp distribute + for (i = 0; i < 64; i++) + #pragma omp parallel for reduction(+:err) + for (j = 0; j < 32; j++) + err += bar (3, i, j); + if (err) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/thread-limit-4.c b/libgomp/testsuite/libgomp.c/thread-limit-4.c new file mode 100644 index 00000000000..5642e6a87ba --- /dev/null +++ b/libgomp/testsuite/libgomp.c/thread-limit-4.c @@ -0,0 +1,57 @@ +/* { dg-do run } */ +/* { dg-set-target-env-var OMP_THREAD_LIMIT "9" } */ + +#include <stdlib.h> +#include <unistd.h> +#include <omp.h> + +int +main () +{ + if (omp_get_thread_limit () != 9) + return 0; + omp_set_dynamic (0); + #pragma omp parallel num_threads (8) + if (omp_get_num_threads () != 8) + abort (); + #pragma omp parallel num_threads (16) + if (omp_get_num_threads () > 9) + abort (); + #pragma omp teams thread_limit (6) + { + if (omp_get_thread_limit () > 6) + abort (); + if (omp_get_thread_limit () == 6) + { + omp_set_dynamic (0); + omp_set_nested (1); + #pragma omp parallel num_threads (3) + if (omp_get_num_threads () != 3) + abort (); + #pragma omp parallel num_threads (3) + if (omp_get_num_threads () != 3) + abort (); + #pragma omp parallel num_threads (8) + if (omp_get_num_threads () > 6) + abort (); + #pragma omp parallel num_threads (6) + if (omp_get_num_threads () != 6) + abort (); + int cnt = 0; + #pragma omp parallel num_threads (5) + #pragma omp parallel num_threads (5) + #pragma omp parallel num_threads (2) + { + int v; + #pragma omp atomic capture + v = ++cnt; + if (v > 6) + abort (); + usleep (10000); + #pragma omp atomic + --cnt; + } + } + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/thread-limit-5.c b/libgomp/testsuite/libgomp.c/thread-limit-5.c new file mode 100644 index 00000000000..d3d22b1e1a6 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/thread-limit-5.c @@ -0,0 +1,11 @@ +#include <stdlib.h> +#include <omp.h> + +int +main () +{ + #pragma omp teams thread_limit (1) + if (omp_get_thread_limit () != 1) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.fortran/display-affinity-1.f90 b/libgomp/testsuite/libgomp.fortran/display-affinity-1.f90 new file mode 100644 index 00000000000..4811b6f801b --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/display-affinity-1.f90 @@ -0,0 +1,34 @@ +! { dg-set-target-env-var OMP_PROC_BIND "spread,close" } +! { dg-set-target-env-var OMP_PLACES "cores" } +! { dg-set-target-env-var OMP_NUM_THREADS "4" } +! { dg-set-target-env-var OMP_AFFINITY_FORMAT "hello" } + + use omp_lib + character(len=68) :: buf, buf2 + character(len=8) :: buf3 + character(len=1) :: buf4 + integer :: l1, l2 + + buf = 'L:%0.5L%%%n>%32H<!%.33{host}!%.6P_%i_%0.18i_%0.7{ancestor_tnum} %18A' + call omp_set_affinity_format (format = buf) + if (omp_get_affinity_format (buf4) /= 68) stop 1 + if (buf4 /= 'L') stop 2 + if (omp_get_affinity_format (buf2) /= 68) stop 3 + if (buf2 /= buf) stop 4 + if (omp_get_affinity_format (buf3) /= 68) stop 5 + if (buf3 /= 'L:%0.5L%') stop 6 + call omp_display_affinity ('') + call omp_display_affinity ('%%%0.9N') + l1 = omp_capture_affinity (buf4, '%0.5{nesting_level}%%|||%0.7a%3N!%N!') + buf = '%.5L%%|||%0.7{ancestor_tnum}%3{num_threads}!%{num_threads}!' + call omp_set_affinity_format (trim (buf)) + l2 = omp_capture_affinity (buf2, '') + if (l1 /= l2) stop 7 + if (l1 /= 22) stop 8 + if (buf2 /= ' 0%|||-0000011 !1!') stop 9 + if (buf4 /= '0') stop 10 +!$omp parallel num_threads (4) proc_bind(spread) + call omp_display_affinity ('%0.2a!%n!%.4L!%N;%.2t;%0.2T;& + &%{team_num};%{num_teams};%A') +!$omp end parallel +end diff --git a/libgomp/work.c b/libgomp/work.c index b2b34145289..16fc7076edd 100644 --- a/libgomp/work.c +++ b/libgomp/work.c @@ -76,7 +76,15 @@ alloc_work_share (struct gomp_team *team) #endif team->work_share_chunk *= 2; + /* Allocating gomp_work_share structures aligned is just an + optimization, don't do it when using the fallback method. */ +#ifdef GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC + ws = gomp_aligned_alloc (__alignof (struct gomp_work_share), + team->work_share_chunk + * sizeof (struct gomp_work_share)); +#else ws = gomp_malloc (team->work_share_chunk * sizeof (struct gomp_work_share)); +#endif ws->next_alloc = team->work_shares[0].next_alloc; team->work_shares[0].next_alloc = ws; team->work_share_list_alloc = &ws[1]; @@ -90,30 +98,35 @@ alloc_work_share (struct gomp_team *team) This shouldn't touch the next_alloc field. */ void -gomp_init_work_share (struct gomp_work_share *ws, bool ordered, +gomp_init_work_share (struct gomp_work_share *ws, size_t ordered, unsigned nthreads) { gomp_mutex_init (&ws->lock); if (__builtin_expect (ordered, 0)) { -#define INLINE_ORDERED_TEAM_IDS_CNT \ - ((sizeof (struct gomp_work_share) \ - - offsetof (struct gomp_work_share, inline_ordered_team_ids)) \ - / sizeof (((struct gomp_work_share *) 0)->inline_ordered_team_ids[0])) - - if (nthreads > INLINE_ORDERED_TEAM_IDS_CNT) - ws->ordered_team_ids - = gomp_malloc (nthreads * sizeof (*ws->ordered_team_ids)); +#define INLINE_ORDERED_TEAM_IDS_SIZE \ + (sizeof (struct gomp_work_share) \ + - offsetof (struct gomp_work_share, inline_ordered_team_ids)) + + if (__builtin_expect (ordered != 1, 0)) + { + ordered += nthreads * sizeof (*ws->ordered_team_ids) - 1; + ordered = ordered + __alignof__ (long long) - 1; + ordered &= ~(__alignof__ (long long) - 1); + } + else + ordered = nthreads * sizeof (*ws->ordered_team_ids); + if (ordered > INLINE_ORDERED_TEAM_IDS_SIZE) + ws->ordered_team_ids = gomp_malloc (ordered); else ws->ordered_team_ids = ws->inline_ordered_team_ids; - memset (ws->ordered_team_ids, '\0', - nthreads * sizeof (*ws->ordered_team_ids)); + memset (ws->ordered_team_ids, '\0', ordered); ws->ordered_num_used = 0; ws->ordered_owner = -1; ws->ordered_cur = 0; } else - ws->ordered_team_ids = NULL; + ws->ordered_team_ids = ws->inline_ordered_team_ids; gomp_ptrlock_init (&ws->next_ws, NULL); ws->threads_completed = 0; } @@ -166,7 +179,7 @@ free_work_share (struct gomp_team *team, struct gomp_work_share *ws) if this was the first thread to reach this point. */ bool -gomp_work_share_start (bool ordered) +gomp_work_share_start (size_t ordered) { struct gomp_thread *thr = gomp_thread (); struct gomp_team *team = thr->ts.team; @@ -178,7 +191,7 @@ gomp_work_share_start (bool ordered) ws = gomp_malloc (sizeof (*ws)); gomp_init_work_share (ws, ordered, 1); thr->ts.work_share = ws; - return ws; + return true; } ws = thr->ts.work_share; |