diff options
Diffstat (limited to 'libgomp')
113 files changed, 12498 insertions, 429 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 74a135cd55b..df4e9370f81 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,317 @@ +2015-10-13 Jakub Jelinek <jakub@redhat.com> + Aldy Hernandez <aldyh@redhat.com> + Ilya Verbin <ilya.verbin@intel.com> + + * config/linux/affinity.c (omp_get_place_num_procs, + omp_get_place_proc_ids, gomp_get_place_proc_ids_8): New functions. + * config/linux/doacross.h: New file. + * config/posix/affinity.c (omp_get_place_num_procs, + omp_get_place_proc_ids, gomp_get_place_proc_ids_8): New functions. + * config/posix/doacross.h: New file. + * env.c: Include gomp-constants.h. + (struct gomp_task_icv): Rename run_sched_modifier to + run_sched_chunk_size. + (gomp_max_task_priority_var): New variable. + (parse_schedule): Rename run_sched_modifier to run_sched_chunk_size. + (handle_omp_display_env): Change _OPENMP value from 201307 to + 201511. Print OMP_MAX_TASK_PRIORITY. + (initialize_env): Parse OMP_MAX_TASK_PRIORITY. + (omp_set_schedule, omp_get_schedule): Rename modifier argument to + chunk_size and run_sched_modifier to run_sched_chunk_size. + (omp_get_max_task_priority, omp_get_initial_device, + omp_get_num_places, omp_get_place_num, omp_get_partition_num_places, + omp_get_partition_place_nums): New functions. + * fortran.c (omp_set_schedule_, omp_set_schedule_8_, + omp_get_schedule_, omp_get_schedule_8_): Rename modifier argument + to chunk_size. + (omp_get_num_places_, omp_get_place_num_procs_, + omp_get_place_num_procs_8_, omp_get_place_proc_ids_, + omp_get_place_proc_ids_8_, omp_get_place_num_, + omp_get_partition_num_places_, omp_get_partition_place_nums_, + omp_get_partition_place_nums_8_, omp_get_initial_device_, + omp_get_max_task_priority_): New functions. + * libgomp_g.h (GOMP_loop_doacross_static_start, + GOMP_loop_doacross_dynamic_start, GOMP_loop_doacross_guided_start, + GOMP_loop_doacross_runtime_start, GOMP_loop_ull_doacross_static_start, + GOMP_loop_ull_doacross_dynamic_start, + GOMP_loop_ull_doacross_guided_start, + GOMP_loop_ull_doacross_runtime_start, GOMP_doacross_post, + GOMP_doacross_wait, GOMP_doacross_ull_post, GOMP_doacross_wait, + GOMP_taskloop, GOMP_taskloop_ull, GOMP_target_41, + GOMP_target_data_41, GOMP_target_update_41, + GOMP_target_enter_exit_data): New prototypes. + (GOMP_task): Add prototype argument. + * libgomp.h (_LIBGOMP_CHECKING_): Define to 0 if not yet defined. + (struct gomp_doacross_work_share): New type. + (struct gomp_work_share): Add doacross field. + (struct gomp_task_icv): Rename run_sched_modifier to + run_sched_chunk_size. + (enum gomp_task_kind): Rename GOMP_TASK_IFFALSE to + GOMP_TASK_UNDEFERRED. Add comments. + (struct gomp_task_depend_entry): Add comments. + (struct gomp_task): Likewise. + (struct gomp_taskgroup): Likewise. + (struct gomp_target_task): New type. + (struct gomp_team): Add comment. + (gomp_get_place_proc_ids_8, gomp_doacross_init, + gomp_doacross_ull_init, gomp_task_maybe_wait_for_dependencies, + gomp_create_target_task, gomp_target_task_fn): New prototypes. + (struct target_var_desc): New type. + (struct target_mem_desc): Adjust comment. Use struct + target_var_desc instead of splay_tree_key for list. + (REFCOUNT_INFINITY): Define. + (struct splay_tree_key_s): Remove copy_from field. + (struct gomp_device_descr): Add dev2dev_func field. + (enum gomp_map_vars_kind): New enum. + (gomp_map_vars): Add one argument. + * libgomp.map (OMP_4.5): Export omp_get_max_task_priority, + omp_get_max_task_priority_, omp_get_num_places, omp_get_num_places_, + omp_get_place_num_procs, omp_get_place_num_procs_, + omp_get_place_num_procs_8_, omp_get_place_proc_ids, + omp_get_place_proc_ids_, omp_get_place_proc_ids_8_, omp_get_place_num, + omp_get_place_num_, omp_get_partition_num_places, + omp_get_partition_num_places_, omp_get_partition_place_nums, + omp_get_partition_place_nums_, omp_get_partition_place_nums_8_, + omp_get_initial_device, omp_get_initial_device_, omp_target_alloc, + omp_target_free, omp_target_is_present, omp_target_memcpy, + omp_target_memcpy_rect, omp_target_associate_ptr and + omp_target_disassociate_ptr. + (GOMP_4.0.2): Renamed to ... + (GOMP_4.5): ... this. Export GOMP_target_41, GOMP_target_data_41, + GOMP_target_update_41, GOMP_target_enter_exit_data, GOMP_taskloop, + GOMP_taskloop_ull, GOMP_loop_doacross_dynamic_start, + GOMP_loop_doacross_guided_start, GOMP_loop_doacross_runtime_start, + GOMP_loop_doacross_static_start, GOMP_doacross_post, + GOMP_doacross_wait, GOMP_loop_ull_doacross_dynamic_start, + GOMP_loop_ull_doacross_guided_start, + GOMP_loop_ull_doacross_runtime_start, + GOMP_loop_ull_doacross_static_start, GOMP_doacross_ull_post and + GOMP_doacross_ull_wait. + * libgomp.texi: Document omp_get_max_task_priority. + Rename modifier argument to chunk_size for omp_set_schedule and + omp_get_schedule. Document OMP_MAX_TASK_PRIORITY env var. + * loop.c (GOMP_loop_runtime_start): Adjust for run_sched_modifier + to run_sched_chunk_size renaming. + (GOMP_loop_ordered_runtime_start): Likewise. + (gomp_loop_doacross_static_start, gomp_loop_doacross_dynamic_start, + gomp_loop_doacross_guided_start, GOMP_loop_doacross_runtime_start, + GOMP_parallel_loop_runtime_start): New functions. + (GOMP_parallel_loop_runtime): Adjust for run_sched_modifier + to run_sched_chunk_size renaming. + (GOMP_loop_doacross_static_start, GOMP_loop_doacross_dynamic_start, + GOMP_loop_doacross_guided_start): New functions or aliases. + * loop_ull.c (GOMP_loop_ull_runtime_start): Adjust for + run_sched_modifier to run_sched_chunk_size renaming. + (GOMP_loop_ull_ordered_runtime_start): Likewise. + (gomp_loop_ull_doacross_static_start, + gomp_loop_ull_doacross_dynamic_start, + gomp_loop_ull_doacross_guided_start, + GOMP_loop_ull_doacross_runtime_start): New functions. + (GOMP_loop_ull_doacross_static_start, + GOMP_loop_ull_doacross_dynamic_start, + GOMP_loop_ull_doacross_guided_start): New functions or aliases. + * oacc-mem.c (acc_map_data, present_create_copy, + gomp_acc_insert_pointer): Pass GOMP_MAP_VARS_OPENACC instead of false + to gomp_map_vars. + (gomp_acc_remove_pointer): Use copy_from from target_var_desc. + * oacc-parallel.c (GOACC_data_start): Pass GOMP_MAP_VARS_OPENACC + instead of false to gomp_map_vars. + (GOACC_parallel_keyed): Likewise. Use copy_from from target_var_desc. + * omp.h.in (omp_lock_hint_t): New type. + (omp_init_lock_with_hint, omp_init_nest_lock_with_hint, + omp_get_num_places, omp_get_place_num_procs, omp_get_place_proc_ids, + omp_get_place_num, omp_get_partition_num_places, + omp_get_partition_place_nums, omp_get_initial_device, + omp_get_max_task_priority, omp_target_alloc, omp_target_free, + omp_target_is_present, omp_target_memcpy, omp_target_memcpy_rect, + omp_target_associate_ptr, omp_target_disassociate_ptr): New + prototypes. + * omp_lib.f90.in (omp_lock_hint_kind): New parameter. + (omp_lock_hint_none, omp_lock_hint_uncontended, + omp_lock_hint_contended, omp_lock_hint_nonspeculative, + omp_lock_hint_speculative): New parameters. + (omp_init_lock_with_hint, omp_init_nest_lock_with_hint, + omp_get_num_places, omp_get_place_num_procs, omp_get_place_proc_ids, + omp_get_place_num, omp_get_partition_num_places, + omp_get_partition_place_nums, omp_get_initial_device, + omp_get_max_task_priority): New interfaces. + (omp_set_schedule, omp_get_schedule): Rename modifier argument + to chunk_size. + * omp_lib.h.in (omp_lock_hint_kind): New parameter. + (omp_lock_hint_none, omp_lock_hint_uncontended, + omp_lock_hint_contended, omp_lock_hint_nonspeculative, + omp_lock_hint_speculative): New parameters. + (omp_init_lock_with_hint, omp_init_nest_lock_with_hint, + omp_get_num_places, omp_get_place_num_procs, omp_get_place_proc_ids, + omp_get_place_num, omp_get_partition_num_places, + omp_get_partition_place_nums, omp_get_initial_device, + omp_get_max_task_priority): New functions and subroutines. + * ordered.c: Include stdarg.h and string.h. + (MAX_COLLAPSED_BITS): Define. + (gomp_doacross_init, GOMP_doacross_post, GOMP_doacross_wait, + gomp_doacross_ull_init, GOMP_doacross_ull_post, + GOMP_doacross_ull_wait): New functions. + * target.c: Include errno.h. + (resolve_device): If device is not initialized, call + gomp_init_device on it. + (gomp_map_lookup): New function. + (gomp_map_vars_existing): Add tgt_var argument, fill it in. + Don't bump refcount if REFCOUNT_INFINITY. Handle + GOMP_MAP_ALWAYS_TO_P. + (get_kind): Rename is_openacc argument to short_mapkind. + (gomp_map_pointer): Use gomp_map_lookup. + (gomp_map_fields_existing): New function. + (gomp_map_vars): Rename is_openacc argument to short_mapkind + and is_target to pragma_kind. Handle GOMP_MAP_VARS_ENTER_DATA, + handle GOMP_MAP_FIRSTPRIVATE_INT, GOMP_MAP_STRUCT, + GOMP_MAP_USE_DEVICE_PTR, GOMP_MAP_ZERO_LEN_ARRAY_SECTION. + Adjust for tgt->list changed type and copy_from living in there. + (gomp_copy_from_async): Adjust for tgt->list changed type and + copy_from living in there. + (gomp_unmap_vars): Likewise. + (gomp_update): Likewise. Rename is_openacc argument to + short_mapkind. Don't fail if object is not mapped. + (gomp_load_image_to_device): Initialize refcount to + REFCOUNT_INFINITY. + (gomp_target_fallback): New function. + (gomp_get_target_fn_addr): Likewise. + (GOMP_target): Adjust gomp_map_vars caller, use + gomp_get_target_fn_addr and gomp_target_fallback. + (GOMP_target_41): New function. + (gomp_target_data_fallback): New function. + (GOMP_target_data): Use it, adjust gomp_map_vars caller. + (GOMP_target_data_41): New function. + (GOMP_target_update): Adjust gomp_update caller. + (GOMP_target_update_41): New function. + (gomp_exit_data, GOMP_target_enter_exit_data, + gomp_target_task_fn, omp_target_alloc, omp_target_free, + omp_target_is_present, omp_target_memcpy, + omp_target_memcpy_rect_worker, omp_target_memcpy_rect, + omp_target_associate_ptr, omp_target_disassociate_ptr, + gomp_load_plugin_for_device): New functions. + * task.c: Include gomp-constants.h. Include taskloop.c + twice to get GOMP_taskloop and GOMP_taskloop_ull definitions. + (gomp_task_handle_depend): New function. + (GOMP_task): Use it. Add priority argument. Use + gomp-constant.h constants instead of hardcoded numbers. + Rename GOMP_TASK_IFFALSE to GOMP_TASK_UNDEFERRED. + (gomp_create_target_task): New function. + (verify_children_queue, verify_taskgroup_queue, + verify_task_queue): New functions. + (gomp_task_run_pre): Call verify_*_queue functions. + If an upcoming tied task is about to leave the sibling or + taskgroup queues in an invalid state, adjust appropriately. + Remove taskgroup argument. Add comments. + (gomp_task_run_post_handle_dependers): Add comments. + (gomp_task_run_post_remove_parent): Likewise. + (gomp_barrier_handle_tasks): Adjust gomp_task_run_pre caller. + (GOMP_taskwait): Likewise. Add comments. + (gomp_task_maybe_wait_for_dependencies): Fix scheduling + problem such that the first non parent_depends_on task does not + end up at the end of the children queue. + (GOMP_taskgroup_start): Rename GOMP_TASK_IFFALSE to + GOMP_TASK_UNDEFERRED. + (GOMP_taskgroup_end): Adjust gomp_task_run_pre caller. + * taskloop.c: New file. + * testsuite/lib/libgomp.exp + (check_effective_target_offload_device_nonshared_as): New proc. + * testsuite/libgomp.c/affinity-2.c: New test. + * testsuite/libgomp.c/doacross-1.c: New test. + * testsuite/libgomp.c/doacross-2.c: New test. + * testsuite/libgomp.c/examples-4/declare_target-1.c (fib_wrapper): + Add map clause to target. + * testsuite/libgomp.c/examples-4/declare_target-4.c (accum): Likewise. + * testsuite/libgomp.c/examples-4/declare_target-5.c (accum): Likewise. + * testsuite/libgomp.c/examples-4/device-1.c (main): Likewise. + * testsuite/libgomp.c/examples-4/device-3.c (main): Likewise. + * testsuite/libgomp.c/examples-4/target_data-3.c (gramSchmidt): + Likewise. + * testsuite/libgomp.c/examples-4/teams-2.c (dotprod): Likewise. + * testsuite/libgomp.c/examples-4/teams-3.c (dotprod): Likewise. + * testsuite/libgomp.c/examples-4/teams-4.c (dotprod): Likewise. + * testsuite/libgomp.c/for-2.h (OMPTGT, OMPTO, OMPFROM): Define if + not defined. Use those where needed. + * testsuite/libgomp.c/for-4.c: New test. + * testsuite/libgomp.c/for-5.c: New test. + * testsuite/libgomp.c/for-6.c: New test. + * testsuite/libgomp.c/linear-1.c: New test. + * testsuite/libgomp.c/ordered-4.c: New test. + * testsuite/libgomp.c/pr66199-2.c (f2): Adjust for linear clause + only allowed on the loop iterator. + * testsuite/libgomp.c/pr66199-3.c: New test. + * testsuite/libgomp.c/pr66199-4.c: New test. + * testsuite/libgomp.c/reduction-7.c: New test. + * testsuite/libgomp.c/reduction-8.c: New test. + * testsuite/libgomp.c/reduction-9.c: New test. + * testsuite/libgomp.c/reduction-10.c: New test. + * testsuite/libgomp.c/target-1.c (fn2, fn3, fn4): Add + map(tofrom:s). + * testsuite/libgomp.c/target-2.c (fn2, fn3, fn4): Likewise. + * testsuite/libgomp.c/target-7.c (foo): Add map(h) where needed. + * testsuite/libgomp.c/target-11.c: New test. + * testsuite/libgomp.c/target-12.c: New test. + * testsuite/libgomp.c/target-13.c: New test. + * testsuite/libgomp.c/target-14.c: New test. + * testsuite/libgomp.c/target-15.c: New test. + * testsuite/libgomp.c/target-16.c: New test. + * testsuite/libgomp.c/target-17.c: New test. + * testsuite/libgomp.c/target-18.c: New test. + * testsuite/libgomp.c/target-19.c: New test. + * testsuite/libgomp.c/target-20.c: New test. + * testsuite/libgomp.c/target-21.c: New test. + * testsuite/libgomp.c/target-22.c: New test. + * testsuite/libgomp.c/target-23.c: New test. + * testsuite/libgomp.c/target-24.c: New test. + * testsuite/libgomp.c/target-25.c: New test. + * testsuite/libgomp.c/target-26.c: New test. + * testsuite/libgomp.c/target-27.c: New test. + * testsuite/libgomp.c/taskloop-1.c: New test. + * testsuite/libgomp.c/taskloop-2.c: New test. + * testsuite/libgomp.c/taskloop-3.c: New test. + * testsuite/libgomp.c/taskloop-4.c: New test. + * testsuite/libgomp.c++/ctor-13.C: New test. + * testsuite/libgomp.c++/doacross-1.C: New test. + * testsuite/libgomp.c++/examples-4/declare_target-2.C: + Replace offload_device with offload_device_nonshared_as. + * testsuite/libgomp.c++/for-12.C: New test. + * testsuite/libgomp.c++/for-13.C: New test. + * testsuite/libgomp.c++/for-14.C: New test. + * testsuite/libgomp.c++/linear-1.C: New test. + * testsuite/libgomp.c++/member-1.C: New test. + * testsuite/libgomp.c++/member-2.C: New test. + * testsuite/libgomp.c++/member-3.C: New test. + * testsuite/libgomp.c++/member-4.C: New test. + * testsuite/libgomp.c++/member-5.C: New test. + * testsuite/libgomp.c++/ordered-1.C: New test. + * testsuite/libgomp.c++/reduction-5.C: New test. + * testsuite/libgomp.c++/reduction-6.C: New test. + * testsuite/libgomp.c++/reduction-7.C: New test. + * testsuite/libgomp.c++/reduction-8.C: New test. + * testsuite/libgomp.c++/reduction-9.C: New test. + * testsuite/libgomp.c++/reduction-10.C: New test. + * testsuite/libgomp.c++/reference-1.C: New test. + * testsuite/libgomp.c++/simd14.C: New test. + * testsuite/libgomp.c++/target-2.C (fn2): Add map(tofrom: s) clause. + * testsuite/libgomp.c++/target-5.C: New test. + * testsuite/libgomp.c++/target-6.C: New test. + * testsuite/libgomp.c++/target-7.C: New test. + * testsuite/libgomp.c++/target-8.C: New test. + * testsuite/libgomp.c++/target-9.C: New test. + * testsuite/libgomp.c++/target-10.C: New test. + * testsuite/libgomp.c++/target-11.C: New test. + * testsuite/libgomp.c++/target-12.C: New test. + * testsuite/libgomp.c++/taskloop-1.C: New test. + * testsuite/libgomp.c++/taskloop-2.C: New test. + * testsuite/libgomp.c++/taskloop-3.C: New test. + * testsuite/libgomp.c++/taskloop-4.C: New test. + * testsuite/libgomp.c++/taskloop-5.C: New test. + * testsuite/libgomp.c++/taskloop-6.C: New test. + * testsuite/libgomp.c++/taskloop-7.C: New test. + * testsuite/libgomp.c++/taskloop-8.C: New test. + * testsuite/libgomp.c++/taskloop-9.C: New test. + * testsuite/libgomp.fortran/affinity1.f90: New test. + * testsuite/libgomp.fortran/affinity2.f90: New test. + 2015-10-13 Tom de Vries <tom@codesourcery.com> PR tree-optimization/67476 diff --git a/libgomp/config/linux/affinity.c b/libgomp/config/linux/affinity.c index 17b65afb49a..775ee0a7fdf 100644 --- a/libgomp/config/linux/affinity.c +++ b/libgomp/config/linux/affinity.c @@ -353,6 +353,45 @@ gomp_affinity_print_place (void *p) fprintf (stderr, ":%lu", len); } +int +omp_get_place_num_procs (int place_num) +{ + if (place_num < 0 || place_num >= gomp_places_list_len) + return 0; + + cpu_set_t *cpusetp = (cpu_set_t *) gomp_places_list[place_num]; + return gomp_cpuset_popcount (gomp_cpuset_size, cpusetp); +} + +void +omp_get_place_proc_ids (int place_num, int *ids) +{ + if (place_num < 0 || place_num >= gomp_places_list_len) + return; + + cpu_set_t *cpusetp = (cpu_set_t *) gomp_places_list[place_num]; + unsigned long i, max = 8 * gomp_cpuset_size; + for (i = 0; i < max; i++) + if (CPU_ISSET_S (i, gomp_cpuset_size, cpusetp)) + *ids++ = i; +} + +void +gomp_get_place_proc_ids_8 (int place_num, int64_t *ids) +{ + if (place_num < 0 || place_num >= gomp_places_list_len) + return; + + cpu_set_t *cpusetp = (cpu_set_t *) gomp_places_list[place_num]; + unsigned long i, max = 8 * gomp_cpuset_size; + for (i = 0; i < max; i++) + if (CPU_ISSET_S (i, gomp_cpuset_size, cpusetp)) + *ids++ = i; +} + +ialias(omp_get_place_num_procs) +ialias(omp_get_place_proc_ids) + #else #include "../posix/affinity.c" diff --git a/libgomp/config/linux/doacross.h b/libgomp/config/linux/doacross.h new file mode 100644 index 00000000000..7a5a645f3cf --- /dev/null +++ b/libgomp/config/linux/doacross.h @@ -0,0 +1,57 @@ +/* Copyright (C) 2015 Free Software Foundation, Inc. + Contributed by Jakub Jelinek <jakub@redhat.com>. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This is a Linux specific implementation of doacross spinning. */ + +#ifndef GOMP_DOACROSS_H +#define GOMP_DOACROSS_H 1 + +#include "libgomp.h" +#include <errno.h> +#include "wait.h" + +#ifdef HAVE_ATTRIBUTE_VISIBILITY +# pragma GCC visibility push(hidden) +#endif + +static inline void doacross_spin (unsigned long *addr, unsigned long expected, + unsigned long cur) +{ + /* FIXME: back off depending on how large expected - cur is. */ + do + { + cpu_relax (); + cur = __atomic_load_n (addr, MEMMODEL_RELAXED); + if (expected < cur) + return; + } + while (1); +} + +#ifdef HAVE_ATTRIBUTE_VISIBILITY +# pragma GCC visibility pop +#endif + +#endif /* GOMP_DOACROSS_H */ diff --git a/libgomp/config/posix/affinity.c b/libgomp/config/posix/affinity.c index 6840d3a727d..9008853c953 100644 --- a/libgomp/config/posix/affinity.c +++ b/libgomp/config/posix/affinity.c @@ -114,3 +114,27 @@ gomp_affinity_print_place (void *p) { (void) p; } + +int +omp_get_place_num_procs (int place_num) +{ + (void) place_num; + return 0; +} + +void +omp_get_place_proc_ids (int place_num, int *ids) +{ + (void) place_num; + (void) ids; +} + +void +gomp_get_place_proc_ids_8 (int place_num, int64_t *ids) +{ + (void) place_num; + (void) ids; +} + +ialias(omp_get_place_num_procs) +ialias(omp_get_place_proc_ids) diff --git a/libgomp/config/posix/doacross.h b/libgomp/config/posix/doacross.h new file mode 100644 index 00000000000..537bcbba51c --- /dev/null +++ b/libgomp/config/posix/doacross.h @@ -0,0 +1,62 @@ +/* Copyright (C) 2015 Free Software Foundation, Inc. + Contributed by Jakub Jelinek <jakub@redhat.com>. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This is a generic implementation of doacross spinning. */ + +#ifndef GOMP_DOACROSS_H +#define GOMP_DOACROSS_H 1 + +#include "libgomp.h" +#include <errno.h> + +#ifdef HAVE_ATTRIBUTE_VISIBILITY +# pragma GCC visibility push(hidden) +#endif + +static inline void +cpu_relax (void) +{ + __asm volatile ("" : : : "memory"); +} + +static inline void doacross_spin (unsigned long *addr, unsigned long expected, + unsigned long cur) +{ + /* FIXME: back off depending on how large expected - cur is. */ + do + { + cpu_relax (); + cur = __atomic_load_n (addr, MEMMODEL_RELAXED); + if (expected < cur) + return; + } + while (1); +} + +#ifdef HAVE_ATTRIBUTE_VISIBILITY +# pragma GCC visibility pop +#endif + +#endif /* GOMP_DOACROSS_H */ diff --git a/libgomp/env.c b/libgomp/env.c index 6b5e963c4ea..5d6cdcf0184 100644 --- a/libgomp/env.c +++ b/libgomp/env.c @@ -29,6 +29,7 @@ #include "libgomp.h" #include "libgomp_f.h" #include "oacc-int.h" +#include "gomp-constants.h" #include <ctype.h> #include <stdlib.h> #include <stdio.h> @@ -58,7 +59,7 @@ struct gomp_task_icv gomp_global_icv = { .nthreads_var = 1, .thread_limit_var = UINT_MAX, .run_sched_var = GFS_DYNAMIC, - .run_sched_modifier = 1, + .run_sched_chunk_size = 1, .default_device_var = 0, .dyn_var = false, .nest_var = false, @@ -68,6 +69,7 @@ struct gomp_task_icv gomp_global_icv = { unsigned long gomp_max_active_levels_var = INT_MAX; bool gomp_cancel_var = false; +int gomp_max_task_priority_var = 0; #ifndef HAVE_SYNC_BUILTINS gomp_mutex_t gomp_managed_threads_lock; #endif @@ -123,7 +125,7 @@ parse_schedule (void) ++env; if (*env == '\0') { - gomp_global_icv.run_sched_modifier + gomp_global_icv.run_sched_chunk_size = gomp_global_icv.run_sched_var != GFS_STATIC; return; } @@ -149,7 +151,7 @@ parse_schedule (void) if (value == 0 && gomp_global_icv.run_sched_var != GFS_STATIC) value = 1; - gomp_global_icv.run_sched_modifier = value; + gomp_global_icv.run_sched_chunk_size = value; return; unknown: @@ -1069,7 +1071,7 @@ handle_omp_display_env (unsigned long stacksize, int wait_policy) fputs ("\nOPENMP DISPLAY ENVIRONMENT BEGIN\n", stderr); - fputs (" _OPENMP = '201307'\n", stderr); + fputs (" _OPENMP = '201511'\n", stderr); fprintf (stderr, " OMP_DYNAMIC = '%s'\n", gomp_global_icv.dyn_var ? "TRUE" : "FALSE"); fprintf (stderr, " OMP_NESTED = '%s'\n", @@ -1157,6 +1159,8 @@ handle_omp_display_env (unsigned long stacksize, int wait_policy) gomp_cancel_var ? "TRUE" : "FALSE"); fprintf (stderr, " OMP_DEFAULT_DEVICE = '%d'\n", gomp_global_icv.default_device_var); + fprintf (stderr, " OMP_MAX_TASK_PRIORITY = '%d'\n", + gomp_max_task_priority_var); if (verbose) { @@ -1189,6 +1193,7 @@ initialize_env (void) parse_boolean ("OMP_NESTED", &gomp_global_icv.nest_var); parse_boolean ("OMP_CANCELLATION", &gomp_cancel_var); parse_int ("OMP_DEFAULT_DEVICE", &gomp_global_icv.default_device_var, true); + parse_int ("OMP_MAX_TASK_PRIORITY", &gomp_max_task_priority_var, true); parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var, true); if (parse_unsigned_long ("OMP_THREAD_LIMIT", &thread_limit_var, false)) @@ -1337,21 +1342,21 @@ omp_get_nested (void) } void -omp_set_schedule (omp_sched_t kind, int modifier) +omp_set_schedule (omp_sched_t kind, int chunk_size) { struct gomp_task_icv *icv = gomp_icv (true); switch (kind) { case omp_sched_static: - if (modifier < 1) - modifier = 0; - icv->run_sched_modifier = modifier; + if (chunk_size < 1) + chunk_size = 0; + icv->run_sched_chunk_size = chunk_size; break; case omp_sched_dynamic: case omp_sched_guided: - if (modifier < 1) - modifier = 1; - icv->run_sched_modifier = modifier; + if (chunk_size < 1) + chunk_size = 1; + icv->run_sched_chunk_size = chunk_size; break; case omp_sched_auto: break; @@ -1362,11 +1367,11 @@ omp_set_schedule (omp_sched_t kind, int modifier) } void -omp_get_schedule (omp_sched_t *kind, int *modifier) +omp_get_schedule (omp_sched_t *kind, int *chunk_size) { struct gomp_task_icv *icv = gomp_icv (false); *kind = icv->run_sched_var; - *modifier = icv->run_sched_modifier; + *chunk_size = icv->run_sched_chunk_size; } int @@ -1402,6 +1407,12 @@ omp_get_cancellation (void) return gomp_cancel_var; } +int +omp_get_max_task_priority (void) +{ + return gomp_max_task_priority_var; +} + omp_proc_bind_t omp_get_proc_bind (void) { @@ -1450,6 +1461,59 @@ omp_is_initial_device (void) return 1; } +int +omp_get_initial_device (void) +{ + return GOMP_DEVICE_HOST_FALLBACK; +} + +int +omp_get_num_places (void) +{ + return gomp_places_list_len; +} + +int +omp_get_place_num (void) +{ + if (gomp_places_list == NULL) + return -1; + + struct gomp_thread *thr = gomp_thread (); + if (thr->place == 0) + gomp_init_affinity (); + + return (int) thr->place - 1; +} + +int +omp_get_partition_num_places (void) +{ + if (gomp_places_list == NULL) + return 0; + + struct gomp_thread *thr = gomp_thread (); + if (thr->place == 0) + gomp_init_affinity (); + + return thr->ts.place_partition_len; +} + +void +omp_get_partition_place_nums (int *place_nums) +{ + if (gomp_places_list == NULL) + return; + + struct gomp_thread *thr = gomp_thread (); + if (thr->place == 0) + gomp_init_affinity (); + + unsigned int i; + for (i = 0; i < thr->ts.place_partition_len; i++) + *place_nums++ = thr->ts.place_partition_off + i; +} + ialias (omp_set_dynamic) ialias (omp_set_nested) ialias (omp_set_num_threads) @@ -1469,3 +1533,9 @@ ialias (omp_get_num_devices) ialias (omp_get_num_teams) ialias (omp_get_team_num) ialias (omp_is_initial_device) +ialias (omp_get_initial_device) +ialias (omp_get_max_task_priority) +ialias (omp_get_num_places) +ialias (omp_get_place_num) +ialias (omp_get_partition_num_places) +ialias (omp_get_partition_place_nums) diff --git a/libgomp/fortran.c b/libgomp/fortran.c index 993145f8890..ceff9ac48e6 100644 --- a/libgomp/fortran.c +++ b/libgomp/fortran.c @@ -68,12 +68,20 @@ ialias_redirect (omp_get_active_level) ialias_redirect (omp_in_final) ialias_redirect (omp_get_cancellation) ialias_redirect (omp_get_proc_bind) +ialias_redirect (omp_get_num_places) +ialias_redirect (omp_get_place_num_procs) +ialias_redirect (omp_get_place_proc_ids) +ialias_redirect (omp_get_place_num) +ialias_redirect (omp_get_partition_num_places) +ialias_redirect (omp_get_partition_place_nums) ialias_redirect (omp_set_default_device) ialias_redirect (omp_get_default_device) ialias_redirect (omp_get_num_devices) ialias_redirect (omp_get_num_teams) ialias_redirect (omp_get_team_num) ialias_redirect (omp_is_initial_device) +ialias_redirect (omp_get_initial_device) +ialias_redirect (omp_get_max_task_priority) #endif #ifndef LIBGOMP_GNU_SYMBOL_VERSIONING @@ -343,35 +351,35 @@ omp_get_wtime_ (void) } void -omp_set_schedule_ (const int32_t *kind, const int32_t *modifier) +omp_set_schedule_ (const int32_t *kind, const int32_t *chunk_size) { - omp_set_schedule (*kind, *modifier); + omp_set_schedule (*kind, *chunk_size); } void -omp_set_schedule_8_ (const int32_t *kind, const int64_t *modifier) +omp_set_schedule_8_ (const int32_t *kind, const int64_t *chunk_size) { - omp_set_schedule (*kind, TO_INT (*modifier)); + omp_set_schedule (*kind, TO_INT (*chunk_size)); } void -omp_get_schedule_ (int32_t *kind, int32_t *modifier) +omp_get_schedule_ (int32_t *kind, int32_t *chunk_size) { omp_sched_t k; - int m; - omp_get_schedule (&k, &m); + int cs; + omp_get_schedule (&k, &cs); *kind = k; - *modifier = m; + *chunk_size = cs; } void -omp_get_schedule_8_ (int32_t *kind, int64_t *modifier) +omp_get_schedule_8_ (int32_t *kind, int64_t *chunk_size) { omp_sched_t k; - int m; - omp_get_schedule (&k, &m); + int cs; + omp_get_schedule (&k, &cs); *kind = k; - *modifier = m; + *chunk_size = cs; } int32_t @@ -452,6 +460,69 @@ omp_get_proc_bind_ (void) return omp_get_proc_bind (); } +int32_t +omp_get_num_places_ (void) +{ + return omp_get_num_places (); +} + +int32_t +omp_get_place_num_procs_ (const int32_t *place_num) +{ + return omp_get_place_num_procs (*place_num); +} + +int32_t +omp_get_place_num_procs_8_ (const int64_t *place_num) +{ + return omp_get_place_num_procs (TO_INT (*place_num)); +} + +void +omp_get_place_proc_ids_ (const int32_t *place_num, int32_t *ids) +{ + omp_get_place_proc_ids (*place_num, ids); +} + +void +omp_get_place_proc_ids_8_ (const int64_t *place_num, int64_t *ids) +{ + gomp_get_place_proc_ids_8 (TO_INT (*place_num), ids); +} + +int32_t +omp_get_place_num_ (void) +{ + return omp_get_place_num (); +} + +int32_t +omp_get_partition_num_places_ (void) +{ + return omp_get_partition_num_places (); +} + +void +omp_get_partition_place_nums_ (int32_t *place_nums) +{ + omp_get_partition_place_nums (place_nums); +} + +void +omp_get_partition_place_nums_8_ (int64_t *place_nums) +{ + if (gomp_places_list == NULL) + return; + + struct gomp_thread *thr = gomp_thread (); + if (thr->place == 0) + gomp_init_affinity (); + + unsigned int i; + for (i = 0; i < thr->ts.place_partition_len; i++) + *place_nums++ = (int64_t) thr->ts.place_partition_off + i; +} + void omp_set_default_device_ (const int32_t *device_num) { @@ -493,3 +564,15 @@ omp_is_initial_device_ (void) { return omp_is_initial_device (); } + +int32_t +omp_get_initial_device_ (void) +{ + return omp_get_initial_device (); +} + +int32_t +omp_get_max_task_priority_ (void) +{ + return omp_get_max_task_priority (); +} diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 04262c4ab28..9c8b1fb8744 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -36,6 +36,11 @@ #ifndef LIBGOMP_H #define LIBGOMP_H 1 +#ifndef _LIBGOMP_CHECKING_ +/* Define to 1 to perform internal sanity checks. */ +#define _LIBGOMP_CHECKING_ 0 +#endif + #include "config.h" #include "gstdint.h" #include "libgomp-plugin.h" @@ -78,6 +83,44 @@ enum gomp_schedule_type GFS_AUTO }; +struct gomp_doacross_work_share +{ + union { + /* chunk_size copy, as ws->chunk_size is multiplied by incr for + GFS_DYNAMIC. */ + long chunk_size; + /* Likewise, but for ull implementation. */ + unsigned long long chunk_size_ull; + /* For schedule(static,0) this is the number + of iterations assigned to the last thread, i.e. number of + iterations / number of threads. */ + long q; + /* Likewise, but for ull implementation. */ + unsigned long long q_ull; + }; + /* Size of each array entry (padded to cache line size). */ + unsigned long elt_sz; + /* Number of dimensions in sink vectors. */ + unsigned int ncounts; + /* True if the iterations can be flattened. */ + bool flattened; + /* Actual array (of elt_sz sized units), aligned to cache line size. + This is indexed by team_id for GFS_STATIC and outermost iteration + / chunk_size for other schedules. */ + unsigned char *array; + /* These two are only used for schedule(static,0). */ + /* This one is number of iterations % number of threads. */ + long t; + union { + /* And this one is cached t * (q + 1). */ + long boundary; + /* Likewise, but for the ull implementation. */ + unsigned long long boundary_ull; + }; + /* Array of shift counts for each dimension if they can be flattened. */ + unsigned int shift_counts[]; +}; + struct gomp_work_share { /* This member records the SCHEDULE clause to be used for this construct. @@ -109,13 +152,18 @@ struct gomp_work_share }; }; - /* This is a circular queue that details which threads will be allowed - into the ordered region and in which order. When a thread allocates - iterations on which it is going to work, it also registers itself at - the end of the array. When a thread reaches the ordered region, it - checks to see if it is the one at the head of the queue. If not, it - blocks on its RELEASE semaphore. */ - unsigned *ordered_team_ids; + union { + /* This is a circular queue that details which threads will be allowed + into the ordered region and in which order. When a thread allocates + iterations on which it is going to work, it also registers itself at + the end of the array. When a thread reaches the ordered region, it + checks to see if it is the one at the head of the queue. If not, it + blocks on its RELEASE semaphore. */ + unsigned *ordered_team_ids; + + /* This is a pointer to DOACROSS work share data. */ + struct gomp_doacross_work_share *doacross; + }; /* This is the number of threads that have registered themselves in the circular queue ordered_team_ids. */ @@ -234,7 +282,7 @@ struct gomp_task_icv { unsigned long nthreads_var; enum gomp_schedule_type run_sched_var; - int run_sched_modifier; + int run_sched_chunk_size; int default_device_var; unsigned int thread_limit_var; bool dyn_var; @@ -263,9 +311,13 @@ extern char *goacc_device_type; enum gomp_task_kind { + /* Implicit task. */ GOMP_TASK_IMPLICIT, - GOMP_TASK_IFFALSE, + /* Undeferred task. */ + GOMP_TASK_UNDEFERRED, + /* Task created by GOMP_task and waiting to be run. */ GOMP_TASK_WAITING, + /* Task currently executing or scheduled and about to execute. */ GOMP_TASK_TIED }; @@ -275,10 +327,13 @@ struct htab; struct gomp_task_depend_entry { + /* Address of dependency. */ void *addr; struct gomp_task_depend_entry *next; struct gomp_task_depend_entry *prev; + /* Task that provides the dependency in ADDR. */ struct gomp_task *task; + /* Depend entry is of type "IN". */ bool is_in; bool redundant; bool redundant_out; @@ -306,19 +361,35 @@ struct gomp_taskwait struct gomp_task { + /* Parent circular list. See children description below. */ struct gomp_task *parent; + /* Circular list representing the children of this task. + + In this list we first have parent_depends_on ready to run tasks, + then !parent_depends_on ready to run tasks, and finally already + running tasks. */ struct gomp_task *children; struct gomp_task *next_child; struct gomp_task *prev_child; + /* Circular task_queue in `struct gomp_team'. + + GOMP_TASK_WAITING tasks come before GOMP_TASK_TIED tasks. */ struct gomp_task *next_queue; struct gomp_task *prev_queue; + /* Circular queue in gomp_taskgroup->children. + + GOMP_TASK_WAITING tasks come before GOMP_TASK_TIED tasks. */ struct gomp_task *next_taskgroup; struct gomp_task *prev_taskgroup; + /* Taskgroup this task belongs in. */ struct gomp_taskgroup *taskgroup; + /* Tasks that depend on this task. */ struct gomp_dependers_vec *dependers; struct htab *depend_hash; struct gomp_taskwait *taskwait; + /* Number of items in DEPEND. */ size_t depend_count; + /* Number of tasks in the DEPENDERS field above. */ size_t num_dependees; struct gomp_task_icv icv; void (*fn) (void *); @@ -327,13 +398,23 @@ struct gomp_task bool in_tied_task; bool final_task; bool copy_ctors_done; + /* Set for undeferred tasks with unsatisfied dependencies which + block further execution of their parent until the dependencies + are satisfied. */ bool parent_depends_on; + /* Dependencies provided and/or needed for this task. DEPEND_COUNT + is the number of items available. */ struct gomp_task_depend_entry depend[]; }; struct gomp_taskgroup { struct gomp_taskgroup *prev; + /* Circular list of tasks that belong in this taskgroup. + + Tasks are chained by next/prev_taskgroup within gomp_task, and + are sorted by GOMP_TASK_WAITING tasks, and then GOMP_TASK_TIED + tasks. */ struct gomp_task *children; bool in_taskgroup_wait; bool cancelled; @@ -341,6 +422,17 @@ struct gomp_taskgroup size_t num_children; }; +struct gomp_target_task +{ + struct gomp_device_descr *devicep; + void (*fn) (void *); + size_t mapnum; + size_t *sizes; + unsigned short *kinds; + unsigned int flags; + void *hostaddrs[]; +}; + /* This structure describes a "team" of threads. These are the threads that are spawned by a PARALLEL constructs, as well as the work sharing constructs that the team encounters. */ @@ -403,6 +495,8 @@ struct gomp_team struct gomp_work_share work_shares[8]; gomp_mutex_t task_lock; + /* Scheduled tasks. Chain fields are next/prev_queue within a + gomp_task. */ struct gomp_task *task_queue; /* Number of all GOMP_TASK_{WAITING,TIED} tasks in the team. */ unsigned int task_count; @@ -531,6 +625,7 @@ extern bool gomp_affinity_same_place (void *, void *); extern bool gomp_affinity_finalize_place_list (bool); extern bool gomp_affinity_init_level (int, unsigned long, bool); extern void gomp_affinity_print_place (void *); +extern void gomp_get_place_proc_ids_8 (int, int64_t *); /* alloc.c */ @@ -600,6 +695,9 @@ extern void gomp_ordered_next (void); extern void gomp_ordered_static_init (void); extern void gomp_ordered_static_next (void); extern void gomp_ordered_sync (void); +extern void gomp_doacross_init (unsigned, long *, long); +extern void gomp_doacross_ull_init (unsigned, unsigned long long *, + unsigned long long); /* parallel.c */ @@ -616,6 +714,11 @@ extern void gomp_init_task (struct gomp_task *, struct gomp_task *, struct gomp_task_icv *); extern void gomp_end_task (void); extern void gomp_barrier_handle_tasks (gomp_barrier_state_t); +extern void gomp_task_maybe_wait_for_dependencies (void **); +extern void gomp_create_target_task (struct gomp_device_descr *, + void (*) (void *), size_t, void **, + size_t *, unsigned short *, unsigned int, + void **); static void inline gomp_finish_task (struct gomp_task *task) @@ -636,11 +739,25 @@ extern void gomp_free_thread (void *); extern void gomp_init_targets_once (void); extern int gomp_get_num_devices (void); +extern void gomp_target_task_fn (void *); typedef struct splay_tree_node_s *splay_tree_node; typedef struct splay_tree_s *splay_tree; typedef struct splay_tree_key_s *splay_tree_key; +struct target_var_desc { + /* Splay key. */ + splay_tree_key key; + /* True if data should be copied from device to host at the end. */ + bool copy_from; + /* True if data always should be copied from device to host at the end. */ + bool always_copy_from; + /* Relative offset against key host_start. */ + uintptr_t offset; + /* Actual length. */ + uintptr_t length; +}; + struct target_mem_desc { /* Reference count. */ uintptr_t refcount; @@ -660,11 +777,14 @@ struct target_mem_desc { /* Corresponding target device descriptor. */ struct gomp_device_descr *device_descr; - /* List of splay keys to remove (or decrease refcount) + /* List of target items to remove (or decrease refcount) at the end of region. */ - splay_tree_key list[]; + struct target_var_desc list[]; }; +/* Special value for refcount - infinity. */ +#define REFCOUNT_INFINITY (~(uintptr_t) 0) + struct splay_tree_key_s { /* Address of the host object. */ uintptr_t host_start; @@ -678,8 +798,6 @@ struct splay_tree_key_s { uintptr_t refcount; /* Asynchronous reference count. */ uintptr_t async_refcount; - /* True if data should be copied from device to host at the end. */ - bool copy_from; }; #include "splay-tree.h" @@ -757,6 +875,7 @@ struct gomp_device_descr void (*free_func) (int, void *); void *(*dev2host_func) (int, void *, const void *, size_t); void *(*host2dev_func) (int, void *, const void *, size_t); + void *(*dev2dev_func) (int, void *, const void *, size_t); void (*run_func) (int, void *, void *); /* Splay tree containing information about mapped memory regions. */ @@ -774,12 +893,22 @@ struct gomp_device_descr acc_dispatch_t openacc; }; +/* Kind of the pragma, for which gomp_map_vars () is called. */ +enum gomp_map_vars_kind +{ + GOMP_MAP_VARS_OPENACC, + GOMP_MAP_VARS_TARGET, + GOMP_MAP_VARS_DATA, + GOMP_MAP_VARS_ENTER_DATA +}; + extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *); extern void gomp_acc_remove_pointer (void *, bool, int, int); extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *, size_t, void **, void **, - size_t *, void *, bool, bool); + size_t *, void *, bool, + enum gomp_map_vars_kind); extern void gomp_copy_from_async (struct target_mem_desc *); extern void gomp_unmap_vars (struct target_mem_desc *, bool); extern void gomp_init_device (struct gomp_device_descr *); diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index 3b3e0c2ac73..2153661ed5a 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -134,6 +134,36 @@ OMP_4.0 { omp_is_initial_device_; } OMP_3.1; +OMP_4.5 { + global: + omp_get_max_task_priority; + omp_get_max_task_priority_; + omp_get_num_places; + omp_get_num_places_; + omp_get_place_num_procs; + omp_get_place_num_procs_; + omp_get_place_num_procs_8_; + omp_get_place_proc_ids; + omp_get_place_proc_ids_; + omp_get_place_proc_ids_8_; + omp_get_place_num; + omp_get_place_num_; + omp_get_partition_num_places; + omp_get_partition_num_places_; + omp_get_partition_place_nums; + omp_get_partition_place_nums_; + omp_get_partition_place_nums_8_; + omp_get_initial_device; + omp_get_initial_device_; + omp_target_alloc; + omp_target_free; + omp_target_is_present; + omp_target_memcpy; + omp_target_memcpy_rect; + omp_target_associate_ptr; + omp_target_disassociate_ptr; +} OMP_4.0; + GOMP_1.0 { global: GOMP_atomic_end; @@ -234,10 +264,28 @@ GOMP_4.0.1 { GOMP_offload_unregister; } GOMP_4.0; -GOMP_4.0.2 { +GOMP_4.5 { global: + GOMP_target_41; + GOMP_target_data_41; + GOMP_target_update_41; + GOMP_target_enter_exit_data; + GOMP_taskloop; + GOMP_taskloop_ull; GOMP_offload_register_ver; GOMP_offload_unregister_ver; + GOMP_loop_doacross_dynamic_start; + GOMP_loop_doacross_guided_start; + GOMP_loop_doacross_runtime_start; + GOMP_loop_doacross_static_start; + GOMP_doacross_post; + GOMP_doacross_wait; + GOMP_loop_ull_doacross_dynamic_start; + GOMP_loop_ull_doacross_guided_start; + GOMP_loop_ull_doacross_runtime_start; + GOMP_loop_ull_doacross_static_start; + GOMP_doacross_ull_post; + GOMP_doacross_ull_wait; } GOMP_4.0.1; OACC_2.0 { diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index 06b1c67fc02..67e6d199066 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -155,6 +155,7 @@ linkage, and do not throw exceptions. * omp_get_dynamic:: Dynamic teams setting * omp_get_level:: Number of parallel regions * omp_get_max_active_levels:: Maximum number of active regions +* omp_get_max_task_priority:: Maximum task priority value that can be set * omp_get_max_threads:: Maximum number of threads of parallel region * omp_get_nested:: Nested parallel regions * omp_get_num_devices:: Number of target devices @@ -388,6 +389,27 @@ This function obtains the maximum allowed number of nested, active parallel regi @end table +@node omp_get_max_task_priority +@section @code{omp_get_max_task_priority} -- Maximum priority value +that can be set for tasks. +@table @asis +@item @emph{Description}: +This function obtains the maximum allowed priority number for tasks. + +@item @emph{C/C++} +@multitable @columnfractions .20 .80 +@item @emph{Prototype}: @tab @code{int omp_get_max_task_priority(void);} +@end multitable + +@item @emph{Fortran}: +@multitable @columnfractions .20 .80 +@item @emph{Interface}: @tab @code{integer function omp_get_max_task_priority()} +@end multitable + +@item @emph{Reference}: +@uref{http://www.openmp.org/, OpenMP specification v4.5}, Section 3.2.29. +@end table + @node omp_get_max_threads @section @code{omp_get_max_threads} -- Maximum number of threads of parallel region @@ -581,18 +603,18 @@ set via @env{OMP_PROC_BIND}. Possible values are @code{omp_proc_bind_false}, Obtain the runtime scheduling method. The @var{kind} argument will be set to the value @code{omp_sched_static}, @code{omp_sched_dynamic}, @code{omp_sched_guided} or @code{omp_sched_auto}. The second argument, -@var{modifier}, is set to the chunk size. +@var{chunk_size}, is set to the chunk size. @item @emph{C/C++} @multitable @columnfractions .20 .80 -@item @emph{Prototype}: @tab @code{void omp_get_schedule(omp_sched_t *kind, int *modifier);} +@item @emph{Prototype}: @tab @code{void omp_get_schedule(omp_sched_t *kind, int *chunk_size);} @end multitable @item @emph{Fortran}: @multitable @columnfractions .20 .80 -@item @emph{Interface}: @tab @code{subroutine omp_get_schedule(kind, modifier)} +@item @emph{Interface}: @tab @code{subroutine omp_get_schedule(kind, chunk_size)} @item @tab @code{integer(kind=omp_sched_kind) kind} -@item @tab @code{integer modifier} +@item @tab @code{integer chunk_size} @end multitable @item @emph{See also}: @@ -929,19 +951,19 @@ Sets the runtime scheduling method. The @var{kind} argument can have the value @code{omp_sched_static}, @code{omp_sched_dynamic}, @code{omp_sched_guided} or @code{omp_sched_auto}. Except for @code{omp_sched_auto}, the chunk size is set to the value of -@var{modifier} if positive, or to the default value if zero or negative. -For @code{omp_sched_auto} the @var{modifier} argument is ignored. +@var{chunk_size} if positive, or to the default value if zero or negative. +For @code{omp_sched_auto} the @var{chunk_size} argument is ignored. @item @emph{C/C++} @multitable @columnfractions .20 .80 -@item @emph{Prototype}: @tab @code{void omp_set_schedule(omp_sched_t kind, int modifier);} +@item @emph{Prototype}: @tab @code{void omp_set_schedule(omp_sched_t kind, int chunk_size);} @end multitable @item @emph{Fortran}: @multitable @columnfractions .20 .80 -@item @emph{Interface}: @tab @code{subroutine omp_set_schedule(kind, modifier)} +@item @emph{Interface}: @tab @code{subroutine omp_set_schedule(kind, chunk_size)} @item @tab @code{integer(kind=omp_sched_kind) kind} -@item @tab @code{integer modifier} +@item @tab @code{integer chunk_size} @end multitable @item @emph{See also}: @@ -1311,6 +1333,7 @@ beginning with @env{GOMP_} are GNU extensions. * OMP_DEFAULT_DEVICE:: Set the device used in target regions * OMP_DYNAMIC:: Dynamic adjustment of threads * OMP_MAX_ACTIVE_LEVELS:: Set the maximum number of nested parallel regions +* OMP_MAX_TASK_PRIORITY:: Set the maximum task priority value * OMP_NESTED:: Nested parallel regions * OMP_NUM_THREADS:: Specifies the number of threads to use * OMP_PROC_BIND:: Whether theads may be moved between CPUs @@ -1420,6 +1443,26 @@ If undefined, the number of active levels is unlimited. +@node OMP_MAX_TASK_PRIORITY +@section @env{OMP_MAX_TASK_PRIORITY} -- Set the maximum priority +number that can be set for a task. +@cindex Environment Variable +@table @asis +@item @emph{Description}: +Specifies the initial value for the maximum priority value that can be +set for a task. The value of this variable shall be a non-negative +integer, and zero is allowed. If undefined, the default priority is +0. + +@item @emph{See also}: +@ref{omp_get_max_task_priority} + +@item @emph{Reference}: +@uref{http://www.openmp.org/, OpenMP specification v4.5}, Section 4.14 +@end table + + + @node OMP_NESTED @section @env{OMP_NESTED} -- Nested parallel regions @cindex Environment Variable diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h index e7f4effaf48..c28ad2116dc 100644 --- a/libgomp/libgomp_g.h +++ b/libgomp/libgomp_g.h @@ -71,6 +71,15 @@ extern bool GOMP_loop_ordered_dynamic_next (long *, long *); extern bool GOMP_loop_ordered_guided_next (long *, long *); extern bool GOMP_loop_ordered_runtime_next (long *, long *); +extern bool GOMP_loop_doacross_static_start (unsigned, long *, long, long *, + long *); +extern bool GOMP_loop_doacross_dynamic_start (unsigned, long *, long, long *, + long *); +extern bool GOMP_loop_doacross_guided_start (unsigned, long *, long, long *, + long *); +extern bool GOMP_loop_doacross_runtime_start (unsigned, long *, long *, + long *); + extern void GOMP_parallel_loop_static_start (void (*)(void *), void *, unsigned, long, long, long, long); extern void GOMP_parallel_loop_dynamic_start (void (*)(void *), void *, @@ -164,10 +173,34 @@ extern bool GOMP_loop_ull_ordered_guided_next (unsigned long long *, extern bool GOMP_loop_ull_ordered_runtime_next (unsigned long long *, unsigned long long *); +extern bool GOMP_loop_ull_doacross_static_start (unsigned, + unsigned long long *, + unsigned long long, + unsigned long long *, + unsigned long long *); +extern bool GOMP_loop_ull_doacross_dynamic_start (unsigned, + unsigned long long *, + unsigned long long, + unsigned long long *, + unsigned long long *); +extern bool GOMP_loop_ull_doacross_guided_start (unsigned, + unsigned long long *, + unsigned long long, + unsigned long long *, + unsigned long long *); +extern bool GOMP_loop_ull_doacross_runtime_start (unsigned, + unsigned long long *, + unsigned long long *, + unsigned long long *); + /* ordered.c */ extern void GOMP_ordered_start (void); extern void GOMP_ordered_end (void); +extern void GOMP_doacross_post (long *); +extern void GOMP_doacross_wait (long, ...); +extern void GOMP_doacross_ull_post (unsigned long long *); +extern void GOMP_doacross_ull_wait (unsigned long long, ...); /* parallel.c */ @@ -180,7 +213,15 @@ extern bool GOMP_cancellation_point (int); /* task.c */ extern void GOMP_task (void (*) (void *), void *, void (*) (void *, void *), - long, long, bool, unsigned, void **); + long, long, bool, unsigned, void **, int); +extern void GOMP_taskloop (void (*) (void *), void *, + void (*) (void *, void *), long, long, unsigned, + unsigned long, int, long, long, long); +extern void GOMP_taskloop_ull (void (*) (void *), void *, + void (*) (void *, void *), long, long, + unsigned, unsigned long, int, + unsigned long long, unsigned long long, + unsigned long long); extern void GOMP_taskwait (void); extern void GOMP_taskyield (void); extern void GOMP_taskgroup_start (void); @@ -208,11 +249,20 @@ extern void GOMP_single_copy_end (void *); extern void GOMP_target (int, void (*) (void *), const void *, size_t, void **, size_t *, unsigned char *); +extern void GOMP_target_41 (int, void (*) (void *), size_t, void **, size_t *, + unsigned short *, unsigned int, void **); extern void GOMP_target_data (int, const void *, size_t, void **, size_t *, unsigned char *); +extern void GOMP_target_data_41 (int, size_t, void **, size_t *, + unsigned short *); extern void GOMP_target_end_data (void); extern void GOMP_target_update (int, const void *, size_t, void **, size_t *, unsigned char *); +extern void GOMP_target_update_41 (int, size_t, void **, size_t *, + unsigned short *, unsigned int, void **); +extern void GOMP_target_enter_exit_data (int, size_t, void **, size_t *, + unsigned short *, unsigned int, + void **); extern void GOMP_teams (unsigned int, unsigned int); /* oacc-parallel.c */ diff --git a/libgomp/loop.c b/libgomp/loop.c index 27d78db7a56..812f66cd725 100644 --- a/libgomp/loop.c +++ b/libgomp/loop.c @@ -169,13 +169,16 @@ GOMP_loop_runtime_start (long start, long end, long incr, switch (icv->run_sched_var) { case GFS_STATIC: - return gomp_loop_static_start (start, end, incr, icv->run_sched_modifier, + return gomp_loop_static_start (start, end, incr, + icv->run_sched_chunk_size, istart, iend); case GFS_DYNAMIC: - return gomp_loop_dynamic_start (start, end, incr, icv->run_sched_modifier, + return gomp_loop_dynamic_start (start, end, incr, + icv->run_sched_chunk_size, istart, iend); case GFS_GUIDED: - return gomp_loop_guided_start (start, end, incr, icv->run_sched_modifier, + return gomp_loop_guided_start (start, end, incr, + icv->run_sched_chunk_size, istart, iend); case GFS_AUTO: /* For now map to schedule(static), later on we could play with feedback @@ -266,15 +269,15 @@ GOMP_loop_ordered_runtime_start (long start, long end, long incr, { case GFS_STATIC: return gomp_loop_ordered_static_start (start, end, incr, - icv->run_sched_modifier, + icv->run_sched_chunk_size, istart, iend); case GFS_DYNAMIC: return gomp_loop_ordered_dynamic_start (start, end, incr, - icv->run_sched_modifier, + icv->run_sched_chunk_size, istart, iend); case GFS_GUIDED: return gomp_loop_ordered_guided_start (start, end, incr, - icv->run_sched_modifier, + icv->run_sched_chunk_size, istart, iend); case GFS_AUTO: /* For now map to schedule(static), later on we could play with feedback @@ -286,6 +289,111 @@ GOMP_loop_ordered_runtime_start (long start, long end, long incr, } } +/* The *_doacross_*_start routines are similar. The only difference is that + this work-share construct is initialized to expect an ORDERED(N) - DOACROSS + section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1 + and other COUNTS array elements tell the library number of iterations + in the ordered inner loops. */ + +static bool +gomp_loop_doacross_static_start (unsigned ncounts, long *counts, + long chunk_size, long *istart, long *iend) +{ + struct gomp_thread *thr = gomp_thread (); + + thr->ts.static_trip = 0; + if (gomp_work_share_start (false)) + { + gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, + GFS_STATIC, chunk_size); + gomp_doacross_init (ncounts, counts, chunk_size); + gomp_work_share_init_done (); + } + + return !gomp_iter_static_next (istart, iend); +} + +static bool +gomp_loop_doacross_dynamic_start (unsigned ncounts, long *counts, + long chunk_size, long *istart, long *iend) +{ + struct gomp_thread *thr = gomp_thread (); + bool ret; + + if (gomp_work_share_start (false)) + { + gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, + GFS_DYNAMIC, chunk_size); + gomp_doacross_init (ncounts, counts, chunk_size); + gomp_work_share_init_done (); + } + +#ifdef HAVE_SYNC_BUILTINS + ret = gomp_iter_dynamic_next (istart, iend); +#else + gomp_mutex_lock (&thr->ts.work_share->lock); + ret = gomp_iter_dynamic_next_locked (istart, iend); + gomp_mutex_unlock (&thr->ts.work_share->lock); +#endif + + return ret; +} + +static bool +gomp_loop_doacross_guided_start (unsigned ncounts, long *counts, + long chunk_size, long *istart, long *iend) +{ + struct gomp_thread *thr = gomp_thread (); + bool ret; + + if (gomp_work_share_start (false)) + { + gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, + GFS_GUIDED, chunk_size); + gomp_doacross_init (ncounts, counts, chunk_size); + gomp_work_share_init_done (); + } + +#ifdef HAVE_SYNC_BUILTINS + ret = gomp_iter_guided_next (istart, iend); +#else + gomp_mutex_lock (&thr->ts.work_share->lock); + ret = gomp_iter_guided_next_locked (istart, iend); + gomp_mutex_unlock (&thr->ts.work_share->lock); +#endif + + return ret; +} + +bool +GOMP_loop_doacross_runtime_start (unsigned ncounts, long *counts, + long *istart, long *iend) +{ + struct gomp_task_icv *icv = gomp_icv (false); + switch (icv->run_sched_var) + { + case GFS_STATIC: + return gomp_loop_doacross_static_start (ncounts, counts, + icv->run_sched_chunk_size, + istart, iend); + case GFS_DYNAMIC: + return gomp_loop_doacross_dynamic_start (ncounts, counts, + icv->run_sched_chunk_size, + istart, iend); + case GFS_GUIDED: + return gomp_loop_doacross_guided_start (ncounts, counts, + icv->run_sched_chunk_size, + istart, iend); + case GFS_AUTO: + /* For now map to schedule(static), later on we could play with feedback + driven choice. */ + return gomp_loop_doacross_static_start (ncounts, counts, + 0, istart, iend); + default: + abort (); + } +} + /* The *_next routines are called when the thread completes processing of the iteration block currently assigned to it. If the work-share construct is bound directly to a parallel construct, then the iteration @@ -484,7 +592,7 @@ GOMP_parallel_loop_runtime_start (void (*fn) (void *), void *data, { struct gomp_task_icv *icv = gomp_icv (false); gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, - icv->run_sched_var, icv->run_sched_modifier, 0); + icv->run_sched_var, icv->run_sched_chunk_size, 0); } ialias_redirect (GOMP_parallel_end) @@ -529,7 +637,7 @@ GOMP_parallel_loop_runtime (void (*fn) (void *), void *data, { struct gomp_task_icv *icv = gomp_icv (false); gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, - icv->run_sched_var, icv->run_sched_modifier, + icv->run_sched_var, icv->run_sched_chunk_size, flags); fn (data); GOMP_parallel_end (); @@ -578,6 +686,13 @@ extern __typeof(gomp_loop_ordered_dynamic_start) GOMP_loop_ordered_dynamic_start extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start __attribute__((alias ("gomp_loop_ordered_guided_start"))); +extern __typeof(gomp_loop_doacross_static_start) GOMP_loop_doacross_static_start + __attribute__((alias ("gomp_loop_doacross_static_start"))); +extern __typeof(gomp_loop_doacross_dynamic_start) GOMP_loop_doacross_dynamic_start + __attribute__((alias ("gomp_loop_doacross_dynamic_start"))); +extern __typeof(gomp_loop_doacross_guided_start) GOMP_loop_doacross_guided_start + __attribute__((alias ("gomp_loop_doacross_guided_start"))); + extern __typeof(gomp_loop_static_next) GOMP_loop_static_next __attribute__((alias ("gomp_loop_static_next"))); extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next @@ -638,6 +753,30 @@ GOMP_loop_ordered_guided_start (long start, long end, long incr, } bool +GOMP_loop_doacross_static_start (unsigned ncounts, long *counts, + long chunk_size, long *istart, long *iend) +{ + return gomp_loop_doacross_static_start (ncounts, counts, chunk_size, + istart, iend); +} + +bool +GOMP_loop_doacross_dynamic_start (unsigned ncounts, long *counts, + long chunk_size, long *istart, long *iend) +{ + return gomp_loop_doacross_dynamic_start (ncounts, counts, chunk_size, + istart, iend); +} + +bool +GOMP_loop_doacross_guided_start (unsigned ncounts, long *counts, + long chunk_size, long *istart, long *iend) +{ + return gomp_loop_doacross_guided_start (ncounts, counts, chunk_size, + istart, iend); +} + +bool GOMP_loop_static_next (long *istart, long *iend) { return gomp_loop_static_next (istart, iend); diff --git a/libgomp/loop_ull.c b/libgomp/loop_ull.c index de56ae0b7ce..1f2ed546024 100644 --- a/libgomp/loop_ull.c +++ b/libgomp/loop_ull.c @@ -175,15 +175,15 @@ GOMP_loop_ull_runtime_start (bool up, gomp_ull start, gomp_ull end, { case GFS_STATIC: return gomp_loop_ull_static_start (up, start, end, incr, - icv->run_sched_modifier, + icv->run_sched_chunk_size, istart, iend); case GFS_DYNAMIC: return gomp_loop_ull_dynamic_start (up, start, end, incr, - icv->run_sched_modifier, + icv->run_sched_chunk_size, istart, iend); case GFS_GUIDED: return gomp_loop_ull_guided_start (up, start, end, incr, - icv->run_sched_modifier, + icv->run_sched_chunk_size, istart, iend); case GFS_AUTO: /* For now map to schedule(static), later on we could play with feedback @@ -279,15 +279,15 @@ GOMP_loop_ull_ordered_runtime_start (bool up, gomp_ull start, gomp_ull end, { case GFS_STATIC: return gomp_loop_ull_ordered_static_start (up, start, end, incr, - icv->run_sched_modifier, + icv->run_sched_chunk_size, istart, iend); case GFS_DYNAMIC: return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr, - icv->run_sched_modifier, + icv->run_sched_chunk_size, istart, iend); case GFS_GUIDED: return gomp_loop_ull_ordered_guided_start (up, start, end, incr, - icv->run_sched_modifier, + icv->run_sched_chunk_size, istart, iend); case GFS_AUTO: /* For now map to schedule(static), later on we could play with feedback @@ -299,6 +299,114 @@ GOMP_loop_ull_ordered_runtime_start (bool up, gomp_ull start, gomp_ull end, } } +/* The *_doacross_*_start routines are similar. The only difference is that + this work-share construct is initialized to expect an ORDERED(N) - DOACROSS + section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1 + and other COUNTS array elements tell the library number of iterations + in the ordered inner loops. */ + +static bool +gomp_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts, + gomp_ull chunk_size, gomp_ull *istart, + gomp_ull *iend) +{ + struct gomp_thread *thr = gomp_thread (); + + thr->ts.static_trip = 0; + if (gomp_work_share_start (false)) + { + gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, + GFS_STATIC, chunk_size); + gomp_doacross_ull_init (ncounts, counts, chunk_size); + gomp_work_share_init_done (); + } + + return !gomp_iter_ull_static_next (istart, iend); +} + +static bool +gomp_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts, + gomp_ull chunk_size, gomp_ull *istart, + gomp_ull *iend) +{ + struct gomp_thread *thr = gomp_thread (); + bool ret; + + if (gomp_work_share_start (false)) + { + gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, + GFS_DYNAMIC, chunk_size); + gomp_doacross_ull_init (ncounts, counts, chunk_size); + gomp_work_share_init_done (); + } + +#if defined HAVE_SYNC_BUILTINS && defined __LP64__ + ret = gomp_iter_ull_dynamic_next (istart, iend); +#else + gomp_mutex_lock (&thr->ts.work_share->lock); + ret = gomp_iter_ull_dynamic_next_locked (istart, iend); + gomp_mutex_unlock (&thr->ts.work_share->lock); +#endif + + return ret; +} + +static bool +gomp_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts, + gomp_ull chunk_size, gomp_ull *istart, + gomp_ull *iend) +{ + struct gomp_thread *thr = gomp_thread (); + bool ret; + + if (gomp_work_share_start (false)) + { + gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, + GFS_GUIDED, chunk_size); + gomp_doacross_ull_init (ncounts, counts, chunk_size); + gomp_work_share_init_done (); + } + +#if defined HAVE_SYNC_BUILTINS && defined __LP64__ + ret = gomp_iter_ull_guided_next (istart, iend); +#else + gomp_mutex_lock (&thr->ts.work_share->lock); + ret = gomp_iter_ull_guided_next_locked (istart, iend); + gomp_mutex_unlock (&thr->ts.work_share->lock); +#endif + + return ret; +} + +bool +GOMP_loop_ull_doacross_runtime_start (unsigned ncounts, gomp_ull *counts, + gomp_ull *istart, gomp_ull *iend) +{ + struct gomp_task_icv *icv = gomp_icv (false); + switch (icv->run_sched_var) + { + case GFS_STATIC: + return gomp_loop_ull_doacross_static_start (ncounts, counts, + icv->run_sched_chunk_size, + istart, iend); + case GFS_DYNAMIC: + return gomp_loop_ull_doacross_dynamic_start (ncounts, counts, + icv->run_sched_chunk_size, + istart, iend); + case GFS_GUIDED: + return gomp_loop_ull_doacross_guided_start (ncounts, counts, + icv->run_sched_chunk_size, + istart, iend); + case GFS_AUTO: + /* For now map to schedule(static), later on we could play with feedback + driven choice. */ + return gomp_loop_ull_doacross_static_start (ncounts, counts, + 0, istart, iend); + default: + abort (); + } +} + /* The *_next routines are called when the thread completes processing of the iteration block currently assigned to it. If the work-share construct is bound directly to a parallel construct, then the iteration @@ -466,6 +574,13 @@ extern __typeof(gomp_loop_ull_ordered_dynamic_start) GOMP_loop_ull_ordered_dynam extern __typeof(gomp_loop_ull_ordered_guided_start) GOMP_loop_ull_ordered_guided_start __attribute__((alias ("gomp_loop_ull_ordered_guided_start"))); +extern __typeof(gomp_loop_ull_doacross_static_start) GOMP_loop_ull_doacross_static_start + __attribute__((alias ("gomp_loop_ull_doacross_static_start"))); +extern __typeof(gomp_loop_ull_doacross_dynamic_start) GOMP_loop_ull_doacross_dynamic_start + __attribute__((alias ("gomp_loop_ull_doacross_dynamic_start"))); +extern __typeof(gomp_loop_ull_doacross_guided_start) GOMP_loop_ull_doacross_guided_start + __attribute__((alias ("gomp_loop_ull_doacross_guided_start"))); + extern __typeof(gomp_loop_ull_static_next) GOMP_loop_ull_static_next __attribute__((alias ("gomp_loop_ull_static_next"))); extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_dynamic_next @@ -535,6 +650,33 @@ GOMP_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end, } bool +GOMP_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts, + gomp_ull chunk_size, gomp_ull *istart, + gomp_ull *iend) +{ + return gomp_loop_ull_doacross_static_start (ncounts, counts, chunk_size, + istart, iend); +} + +bool +GOMP_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts, + gomp_ull chunk_size, gomp_ull *istart, + gomp_ull *iend) +{ + return gomp_loop_ull_doacross_dynamic_start (ncounts, counts, chunk_size, + istart, iend); +} + +bool +GOMP_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts, + gomp_ull chunk_size, gomp_ull *istart, + gomp_ull *iend) +{ + return gomp_loop_ull_doacross_guided_start (ncounts, counts, chunk_size, + istart, iend); +} + +bool GOMP_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend) { return gomp_loop_ull_static_next (istart, iend); diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 90d43eb2b8a..af067d6e73c 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -289,7 +289,8 @@ acc_map_data (void *h, void *d, size_t s) if (d != h) gomp_fatal ("cannot map data on shared-memory system"); - tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false); + tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, + GOMP_MAP_VARS_OPENACC); } else { @@ -318,7 +319,7 @@ acc_map_data (void *h, void *d, size_t s) gomp_mutex_unlock (&acc_dev->lock); tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes, - &kinds, true, false); + &kinds, true, GOMP_MAP_VARS_OPENACC); } gomp_mutex_lock (&acc_dev->lock); @@ -447,7 +448,7 @@ present_create_copy (unsigned f, void *h, size_t s) gomp_mutex_unlock (&acc_dev->lock); tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true, - false); + GOMP_MAP_VARS_OPENACC); gomp_mutex_lock (&acc_dev->lock); @@ -594,7 +595,7 @@ gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, - NULL, sizes, kinds, true, false); + NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC); gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); gomp_mutex_lock (&acc_dev->lock); @@ -651,7 +652,7 @@ gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum) } if (force_copyfrom) - t->list[0]->copy_from = 1; + t->list[0].copy_from = 1; gomp_mutex_unlock (&acc_dev->lock); diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index e31bc0a7bc6..b150106981e 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -168,12 +168,12 @@ GOACC_parallel_keyed (int device, void (*fn) (void *), tgt_fn = (void (*)) fn; tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, - false); + GOMP_MAP_VARS_OPENACC); devaddrs = gomp_alloca (sizeof (void *) * mapnum); for (i = 0; i < mapnum; i++) - devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start - + tgt->list[i]->tgt_offset); + devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start + + tgt->list[i].key->tgt_offset); acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes, kinds, async, dims, tgt); @@ -228,7 +228,8 @@ GOACC_data_start (int device, size_t mapnum, if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || host_fallback) { - tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false); + tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, + GOMP_MAP_VARS_OPENACC); tgt->prev = thr->mapped_data; thr->mapped_data = tgt; @@ -237,7 +238,7 @@ GOACC_data_start (int device, size_t mapnum, gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, - false); + GOMP_MAP_VARS_OPENACC); gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); tgt->prev = thr->mapped_data; thr->mapped_data = tgt; diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in index dac3e8ad6ef..090498ad784 100644 --- a/libgomp/omp.h.in +++ b/libgomp/omp.h.in @@ -62,6 +62,15 @@ typedef enum omp_proc_bind_t omp_proc_bind_spread = 4 } omp_proc_bind_t; +typedef enum omp_lock_hint_t +{ + omp_lock_hint_none = 0, + omp_lock_hint_uncontended = 1, + omp_lock_hint_contended = 2, + omp_lock_hint_nonspeculative = 4, + omp_lock_hint_speculative = 8, +} omp_lock_hint_t; + #ifdef __cplusplus extern "C" { # define __GOMP_NOTHROW throw () @@ -84,12 +93,16 @@ extern void omp_set_nested (int) __GOMP_NOTHROW; extern int omp_get_nested (void) __GOMP_NOTHROW; extern void omp_init_lock (omp_lock_t *) __GOMP_NOTHROW; +extern void omp_init_lock_with_hint (omp_lock_t *, omp_lock_hint_t) + __GOMP_NOTHROW; extern void omp_destroy_lock (omp_lock_t *) __GOMP_NOTHROW; extern void omp_set_lock (omp_lock_t *) __GOMP_NOTHROW; extern void omp_unset_lock (omp_lock_t *) __GOMP_NOTHROW; extern int omp_test_lock (omp_lock_t *) __GOMP_NOTHROW; extern void omp_init_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW; +extern void omp_init_nest_lock_with_hint (omp_lock_t *, omp_lock_hint_t) + __GOMP_NOTHROW; extern void omp_destroy_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW; extern void omp_set_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW; extern void omp_unset_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW; @@ -112,6 +125,12 @@ extern int omp_in_final (void) __GOMP_NOTHROW; extern int omp_get_cancellation (void) __GOMP_NOTHROW; extern omp_proc_bind_t omp_get_proc_bind (void) __GOMP_NOTHROW; +extern int omp_get_num_places (void) __GOMP_NOTHROW; +extern int omp_get_place_num_procs (int) __GOMP_NOTHROW; +extern void omp_get_place_proc_ids (int, int *) __GOMP_NOTHROW; +extern int omp_get_place_num (void) __GOMP_NOTHROW; +extern int omp_get_partition_num_places (void) __GOMP_NOTHROW; +extern void omp_get_partition_place_nums (int *) __GOMP_NOTHROW; extern void omp_set_default_device (int) __GOMP_NOTHROW; extern int omp_get_default_device (void) __GOMP_NOTHROW; @@ -120,6 +139,24 @@ extern int omp_get_num_teams (void) __GOMP_NOTHROW; extern int omp_get_team_num (void) __GOMP_NOTHROW; extern int omp_is_initial_device (void) __GOMP_NOTHROW; +extern int omp_get_initial_device (void) __GOMP_NOTHROW; +extern int omp_get_max_task_priority (void) __GOMP_NOTHROW; + +extern void *omp_target_alloc (__SIZE_TYPE__, int) __GOMP_NOTHROW; +extern void omp_target_free (void *, int) __GOMP_NOTHROW; +extern int omp_target_is_present (void *, int) __GOMP_NOTHROW; +extern int omp_target_memcpy (void *, void *, __SIZE_TYPE__, __SIZE_TYPE__, + __SIZE_TYPE__, int, int) __GOMP_NOTHROW; +extern int omp_target_memcpy_rect (void *, void *, __SIZE_TYPE__, int, + const __SIZE_TYPE__ *, + const __SIZE_TYPE__ *, + const __SIZE_TYPE__ *, + const __SIZE_TYPE__ *, + const __SIZE_TYPE__ *, int, int) + __GOMP_NOTHROW; +extern int omp_target_associate_ptr (void *, void *, __SIZE_TYPE__, + __SIZE_TYPE__, int) __GOMP_NOTHROW; +extern int omp_target_disassociate_ptr (void *, int) __GOMP_NOTHROW; #ifdef __cplusplus } diff --git a/libgomp/omp_lib.f90.in b/libgomp/omp_lib.f90.in index 122563e625a..28df9c1664e 100644 --- a/libgomp/omp_lib.f90.in +++ b/libgomp/omp_lib.f90.in @@ -29,15 +29,31 @@ integer, parameter :: omp_nest_lock_kind = @OMP_NEST_LOCK_KIND@ integer, parameter :: omp_sched_kind = 4 integer, parameter :: omp_proc_bind_kind = 4 + integer, parameter :: omp_lock_hint_kind = 4 integer (omp_sched_kind), parameter :: omp_sched_static = 1 integer (omp_sched_kind), parameter :: omp_sched_dynamic = 2 integer (omp_sched_kind), parameter :: omp_sched_guided = 3 integer (omp_sched_kind), parameter :: omp_sched_auto = 4 - integer (omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 - integer (omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 - integer (omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 - integer (omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 - integer (omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 + integer (omp_proc_bind_kind), & + parameter :: omp_proc_bind_false = 0 + integer (omp_proc_bind_kind), & + parameter :: omp_proc_bind_true = 1 + integer (omp_proc_bind_kind), & + parameter :: omp_proc_bind_master = 2 + integer (omp_proc_bind_kind), & + parameter :: omp_proc_bind_close = 3 + integer (omp_proc_bind_kind), & + parameter :: omp_proc_bind_spread = 4 + integer (omp_lock_hint_kind), & + parameter :: omp_lock_hint_none = 0 + integer (omp_lock_hint_kind), & + parameter :: omp_lock_hint_uncontended = 1 + integer (omp_lock_hint_kind), & + parameter :: omp_lock_hint_contended = 2 + integer (omp_lock_hint_kind), & + parameter :: omp_lock_hint_nonspeculative = 4 + integer (omp_lock_hint_kind), & + parameter :: omp_lock_hint_speculative = 8 end module module omp_lib @@ -53,6 +69,14 @@ end interface interface + subroutine omp_init_lock_with_hint (svar, hint) + use omp_lib_kinds + integer (omp_lock_kind), intent (out) :: svar + integer (omp_lock_hint_kind), intent (in) :: hint + end subroutine omp_init_lock_with_hint + end interface + + interface subroutine omp_init_nest_lock (nvar) use omp_lib_kinds integer (omp_nest_lock_kind), intent (out) :: nvar @@ -60,6 +84,14 @@ end interface interface + subroutine omp_init_nest_lock_with_hint (nvar, hint) + use omp_lib_kinds + integer (omp_nest_lock_kind), intent (out) :: nvar + integer (omp_lock_hint_kind), intent (in) :: hint + end subroutine omp_init_nest_lock_with_hint + end interface + + interface subroutine omp_destroy_lock (svar) use omp_lib_kinds integer (omp_lock_kind), intent (inout) :: svar @@ -199,28 +231,28 @@ end interface interface omp_set_schedule - subroutine omp_set_schedule (kind, modifier) + subroutine omp_set_schedule (kind, chunk_size) use omp_lib_kinds integer (omp_sched_kind), intent (in) :: kind - integer (4), intent (in) :: modifier + integer (4), intent (in) :: chunk_size end subroutine omp_set_schedule - subroutine omp_set_schedule_8 (kind, modifier) + subroutine omp_set_schedule_8 (kind, chunk_size) use omp_lib_kinds integer (omp_sched_kind), intent (in) :: kind - integer (8), intent (in) :: modifier + integer (8), intent (in) :: chunk_size end subroutine omp_set_schedule_8 end interface interface omp_get_schedule - subroutine omp_get_schedule (kind, modifier) + subroutine omp_get_schedule (kind, chunk_size) use omp_lib_kinds integer (omp_sched_kind), intent (out) :: kind - integer (4), intent (out) :: modifier + integer (4), intent (out) :: chunk_size end subroutine omp_get_schedule - subroutine omp_get_schedule_8 (kind, modifier) + subroutine omp_get_schedule_8 (kind, chunk_size) use omp_lib_kinds integer (omp_sched_kind), intent (out) :: kind - integer (8), intent (out) :: modifier + integer (8), intent (out) :: chunk_size end subroutine omp_get_schedule_8 end interface @@ -298,6 +330,58 @@ end function omp_get_proc_bind end interface + interface + function omp_get_num_places () + integer (4) :: omp_get_num_places + end function omp_get_num_places + end interface + + interface omp_get_place_num_procs + function omp_get_place_num_procs (place_num) + integer (4), intent(in) :: place_num + integer (4) :: omp_get_place_num_procs + end function omp_get_place_num_procs + + function omp_get_place_num_procs_8 (place_num) + integer (8), intent(in) :: place_num + integer (4) :: omp_get_place_num_procs_8 + end function omp_get_place_num_procs_8 + end interface + + interface omp_get_place_proc_ids + subroutine omp_get_place_proc_ids (place_num, ids) + integer (4), intent(in) :: place_num + integer (4), intent(out) :: ids(*) + end subroutine omp_get_place_proc_ids + + subroutine omp_get_place_proc_ids_8 (place_num, ids) + integer (8), intent(in) :: place_num + integer (8), intent(out) :: ids(*) + end subroutine omp_get_place_proc_ids_8 + end interface + + interface + function omp_get_place_num () + integer (4) :: omp_get_place_num + end function omp_get_place_num + end interface + + interface + function omp_get_partition_num_places () + integer (4) :: omp_get_partition_num_places + end function omp_get_partition_num_places + end interface + + interface omp_get_partition_place_nums + subroutine omp_get_partition_place_nums (place_nums) + integer (4), intent(out) :: place_nums(*) + end subroutine omp_get_partition_place_nums + + subroutine omp_get_partition_place_nums_8 (place_nums) + integer (8), intent(out) :: place_nums(*) + end subroutine omp_get_partition_place_nums_8 + end interface + interface omp_set_default_device subroutine omp_set_default_device (device_num) integer (4), intent (in) :: device_num @@ -337,4 +421,16 @@ end function omp_is_initial_device end interface + interface + function omp_get_initial_device () + integer (4) :: omp_get_initial_device + end function omp_get_initial_device + end interface + + interface + function omp_get_max_task_priority () + integer (4) :: omp_get_max_task_priority + end function omp_get_max_task_priority + end interface + end module omp_lib diff --git a/libgomp/omp_lib.h.in b/libgomp/omp_lib.h.in index d590bc15135..81662424500 100644 --- a/libgomp/omp_lib.h.in +++ b/libgomp/omp_lib.h.in @@ -46,9 +46,23 @@ parameter (omp_proc_bind_master = 2) parameter (omp_proc_bind_close = 3) parameter (omp_proc_bind_spread = 4) + integer omp_lock_hint_kind + parameter (omp_lock_hint_kind = 4) + integer (omp_lock_hint_kind) omp_lock_hint_none + integer (omp_lock_hint_kind) omp_lock_hint_uncontended + integer (omp_lock_hint_kind) omp_lock_hint_contended + integer (omp_lock_hint_kind) omp_lock_hint_nonspeculative + integer (omp_lock_hint_kind) omp_lock_hint_speculative + parameter (omp_lock_hint_none = 0) + parameter (omp_lock_hint_uncontended = 1) + parameter (omp_lock_hint_contended = 2) + parameter (omp_lock_hint_nonspeculative = 4) + parameter (omp_lock_hint_speculative = 8) parameter (openmp_version = 201307) external omp_init_lock, omp_init_nest_lock + external omp_init_lock_with_hint + external omp_init_nest_lock_with_hint external omp_destroy_lock, omp_destroy_nest_lock external omp_set_lock, omp_set_nest_lock external omp_unset_lock, omp_unset_nest_lock @@ -88,6 +102,17 @@ external omp_get_proc_bind integer(omp_proc_bind_kind) omp_get_proc_bind + integer(4) omp_get_num_places + external omp_get_num_places + integer(4) omp_get_place_num_procs + external omp_get_place_num_procs + external omp_get_place_proc_ids + integer(4) omp_get_place_num + external omp_get_place_num + integer(4) omp_get_partition_num_places + external omp_get_partition_num_places + external omp_get_partition_place_nums + external omp_set_default_device, omp_get_default_device external omp_get_num_devices, omp_get_num_teams external omp_get_team_num @@ -96,3 +121,8 @@ external omp_is_initial_device logical(4) omp_is_initial_device + external omp_get_initial_device + integer(4) omp_get_initial_device + + external omp_get_max_task_priority + integer(4) omp_get_max_task_priority diff --git a/libgomp/ordered.c b/libgomp/ordered.c index 69ca217b4d5..fdac3ee8f58 100644 --- a/libgomp/ordered.c +++ b/libgomp/ordered.c @@ -26,6 +26,9 @@ /* This file handles the ORDERED construct. */ #include "libgomp.h" +#include <stdarg.h> +#include <string.h> +#include "doacross.h" /* This function is called when first allocating an iteration block. That @@ -250,3 +253,521 @@ void GOMP_ordered_end (void) { } + +/* DOACROSS initialization. */ + +#define MAX_COLLAPSED_BITS (__SIZEOF_LONG__ * __CHAR_BIT__) + +void +gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + struct gomp_work_share *ws = thr->ts.work_share; + unsigned int i, bits[MAX_COLLAPSED_BITS], num_bits = 0; + unsigned long ent, num_ents, elt_sz, shift_sz; + struct gomp_doacross_work_share *doacross; + + if (team == NULL || team->nthreads == 1) + return; + + for (i = 0; i < ncounts; i++) + { + /* If any count is 0, GOMP_doacross_{post,wait} can't be called. */ + if (counts[i] == 0) + return; + + if (num_bits <= MAX_COLLAPSED_BITS) + { + unsigned int this_bits; + if (counts[i] == 1) + this_bits = 1; + else + this_bits = __SIZEOF_LONG__ * __CHAR_BIT__ + - __builtin_clzl (counts[i] - 1); + if (num_bits + this_bits <= MAX_COLLAPSED_BITS) + { + bits[i] = this_bits; + num_bits += this_bits; + } + else + num_bits = MAX_COLLAPSED_BITS + 1; + } + } + + if (ws->sched == GFS_STATIC) + num_ents = team->nthreads; + else + num_ents = (counts[0] - 1) / chunk_size + 1; + if (num_bits <= MAX_COLLAPSED_BITS) + { + elt_sz = sizeof (unsigned long); + shift_sz = ncounts * sizeof (unsigned int); + } + else + { + elt_sz = sizeof (unsigned long) * ncounts; + shift_sz = 0; + } + elt_sz = (elt_sz + 63) & ~63UL; + + doacross = gomp_malloc (sizeof (*doacross) + 63 + num_ents * elt_sz + + shift_sz); + doacross->chunk_size = chunk_size; + doacross->elt_sz = elt_sz; + doacross->ncounts = ncounts; + doacross->flattened = false; + doacross->array = (unsigned char *) + ((((uintptr_t) (doacross + 1)) + 63 + shift_sz) + & ~(uintptr_t) 63); + if (num_bits <= MAX_COLLAPSED_BITS) + { + unsigned int shift_count = 0; + doacross->flattened = true; + for (i = ncounts; i > 0; i--) + { + doacross->shift_counts[i - 1] = shift_count; + shift_count += bits[i - 1]; + } + for (ent = 0; ent < num_ents; ent++) + *(unsigned long *) (doacross->array + ent * elt_sz) = 0; + } + else + for (ent = 0; ent < num_ents; ent++) + memset (doacross->array + ent * elt_sz, '\0', + sizeof (unsigned long) * ncounts); + if (ws->sched == GFS_STATIC && chunk_size == 0) + { + unsigned long q = counts[0] / num_ents; + unsigned long t = counts[0] % num_ents; + doacross->boundary = t * (q + 1); + doacross->q = q; + doacross->t = t; + } + ws->doacross = doacross; +} + +/* DOACROSS POST operation. */ + +void +GOMP_doacross_post (long *counts) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_work_share *ws = thr->ts.work_share; + struct gomp_doacross_work_share *doacross = ws->doacross; + unsigned long ent; + unsigned int i; + + if (__builtin_expect (doacross == NULL, 0)) + { + __sync_synchronize (); + return; + } + + if (__builtin_expect (ws->sched == GFS_STATIC, 1)) + ent = thr->ts.team_id; + else + ent = counts[0] / doacross->chunk_size; + unsigned long *array = (unsigned long *) (doacross->array + + ent * doacross->elt_sz); + + if (__builtin_expect (doacross->flattened, 1)) + { + unsigned long flattened + = (unsigned long) counts[0] << doacross->shift_counts[0]; + + for (i = 1; i < doacross->ncounts; i++) + flattened |= (unsigned long) counts[i] + << doacross->shift_counts[i]; + flattened++; + if (flattened == __atomic_load_n (array, MEMMODEL_ACQUIRE)) + __atomic_thread_fence (MEMMODEL_RELEASE); + else + __atomic_store_n (array, flattened, MEMMODEL_RELEASE); + return; + } + + __atomic_thread_fence (MEMMODEL_ACQUIRE); + for (i = doacross->ncounts; i-- > 0; ) + { + if (counts[i] + 1UL != __atomic_load_n (&array[i], MEMMODEL_RELAXED)) + __atomic_store_n (&array[i], counts[i] + 1UL, MEMMODEL_RELEASE); + } +} + +/* DOACROSS WAIT operation. */ + +void +GOMP_doacross_wait (long first, ...) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_work_share *ws = thr->ts.work_share; + struct gomp_doacross_work_share *doacross = ws->doacross; + va_list ap; + unsigned long ent; + unsigned int i; + + if (__builtin_expect (doacross == NULL, 0)) + { + __sync_synchronize (); + return; + } + + if (__builtin_expect (ws->sched == GFS_STATIC, 1)) + { + if (ws->chunk_size == 0) + { + if (first < doacross->boundary) + ent = first / (doacross->q + 1); + else + ent = (first - doacross->boundary) / doacross->q + + doacross->t; + } + else + ent = first / ws->chunk_size % thr->ts.team->nthreads; + } + else + ent = first / doacross->chunk_size; + unsigned long *array = (unsigned long *) (doacross->array + + ent * doacross->elt_sz); + + if (__builtin_expect (doacross->flattened, 1)) + { + unsigned long flattened + = (unsigned long) first << doacross->shift_counts[0]; + unsigned long cur; + + va_start (ap, first); + for (i = 1; i < doacross->ncounts; i++) + flattened |= (unsigned long) va_arg (ap, long) + << doacross->shift_counts[i]; + cur = __atomic_load_n (array, MEMMODEL_ACQUIRE); + if (flattened < cur) + { + __atomic_thread_fence (MEMMODEL_RELEASE); + va_end (ap); + return; + } + doacross_spin (array, flattened, cur); + __atomic_thread_fence (MEMMODEL_RELEASE); + va_end (ap); + return; + } + + do + { + va_start (ap, first); + for (i = 0; i < doacross->ncounts; i++) + { + unsigned long thisv + = (unsigned long) (i ? va_arg (ap, long) : first) + 1; + unsigned long cur = __atomic_load_n (&array[i], MEMMODEL_RELAXED); + if (thisv < cur) + { + i = doacross->ncounts; + break; + } + if (thisv > cur) + break; + } + va_end (ap); + if (i == doacross->ncounts) + break; + cpu_relax (); + } + while (1); + __sync_synchronize (); +} + +typedef unsigned long long gomp_ull; + +void +gomp_doacross_ull_init (unsigned ncounts, gomp_ull *counts, gomp_ull chunk_size) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + struct gomp_work_share *ws = thr->ts.work_share; + unsigned int i, bits[MAX_COLLAPSED_BITS], num_bits = 0; + unsigned long ent, num_ents, elt_sz, shift_sz; + struct gomp_doacross_work_share *doacross; + + if (team == NULL || team->nthreads == 1) + return; + + for (i = 0; i < ncounts; i++) + { + /* If any count is 0, GOMP_doacross_{post,wait} can't be called. */ + if (counts[i] == 0) + return; + + if (num_bits <= MAX_COLLAPSED_BITS) + { + unsigned int this_bits; + if (counts[i] == 1) + this_bits = 1; + else + this_bits = __SIZEOF_LONG_LONG__ * __CHAR_BIT__ + - __builtin_clzll (counts[i] - 1); + if (num_bits + this_bits <= MAX_COLLAPSED_BITS) + { + bits[i] = this_bits; + num_bits += this_bits; + } + else + num_bits = MAX_COLLAPSED_BITS + 1; + } + } + + if (ws->sched == GFS_STATIC) + num_ents = team->nthreads; + else + num_ents = (counts[0] - 1) / chunk_size + 1; + if (num_bits <= MAX_COLLAPSED_BITS) + { + elt_sz = sizeof (unsigned long); + shift_sz = ncounts * sizeof (unsigned int); + } + else + { + if (sizeof (gomp_ull) == sizeof (unsigned long)) + elt_sz = sizeof (gomp_ull) * ncounts; + else if (sizeof (gomp_ull) == 2 * sizeof (unsigned long)) + elt_sz = sizeof (unsigned long) * 2 * ncounts; + else + abort (); + shift_sz = 0; + } + elt_sz = (elt_sz + 63) & ~63UL; + + doacross = gomp_malloc (sizeof (*doacross) + 63 + num_ents * elt_sz + + shift_sz); + doacross->chunk_size_ull = chunk_size; + doacross->elt_sz = elt_sz; + doacross->ncounts = ncounts; + doacross->flattened = false; + doacross->boundary = 0; + doacross->array = (unsigned char *) + ((((uintptr_t) (doacross + 1)) + 63 + shift_sz) + & ~(uintptr_t) 63); + if (num_bits <= MAX_COLLAPSED_BITS) + { + unsigned int shift_count = 0; + doacross->flattened = true; + for (i = ncounts; i > 0; i--) + { + doacross->shift_counts[i - 1] = shift_count; + shift_count += bits[i - 1]; + } + for (ent = 0; ent < num_ents; ent++) + *(unsigned long *) (doacross->array + ent * elt_sz) = 0; + } + else + for (ent = 0; ent < num_ents; ent++) + memset (doacross->array + ent * elt_sz, '\0', + sizeof (unsigned long) * ncounts); + if (ws->sched == GFS_STATIC && chunk_size == 0) + { + gomp_ull q = counts[0] / num_ents; + gomp_ull t = counts[0] % num_ents; + doacross->boundary_ull = t * (q + 1); + doacross->q_ull = q; + doacross->t = t; + } + ws->doacross = doacross; +} + +/* DOACROSS POST operation. */ + +void +GOMP_doacross_ull_post (gomp_ull *counts) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_work_share *ws = thr->ts.work_share; + struct gomp_doacross_work_share *doacross = ws->doacross; + unsigned long ent; + unsigned int i; + + if (__builtin_expect (doacross == NULL, 0)) + { + __sync_synchronize (); + return; + } + + if (__builtin_expect (ws->sched == GFS_STATIC, 1)) + ent = thr->ts.team_id; + else + ent = counts[0] / doacross->chunk_size_ull; + + if (__builtin_expect (doacross->flattened, 1)) + { + unsigned long *array = (unsigned long *) (doacross->array + + ent * doacross->elt_sz); + gomp_ull flattened + = counts[0] << doacross->shift_counts[0]; + + for (i = 1; i < doacross->ncounts; i++) + flattened |= counts[i] << doacross->shift_counts[i]; + flattened++; + if (flattened == __atomic_load_n (array, MEMMODEL_ACQUIRE)) + __atomic_thread_fence (MEMMODEL_RELEASE); + else + __atomic_store_n (array, flattened, MEMMODEL_RELEASE); + return; + } + + __atomic_thread_fence (MEMMODEL_ACQUIRE); + if (sizeof (gomp_ull) == sizeof (unsigned long)) + { + gomp_ull *array = (gomp_ull *) (doacross->array + + ent * doacross->elt_sz); + + for (i = doacross->ncounts; i-- > 0; ) + { + if (counts[i] + 1UL != __atomic_load_n (&array[i], MEMMODEL_RELAXED)) + __atomic_store_n (&array[i], counts[i] + 1UL, MEMMODEL_RELEASE); + } + } + else + { + unsigned long *array = (unsigned long *) (doacross->array + + ent * doacross->elt_sz); + + for (i = doacross->ncounts; i-- > 0; ) + { + gomp_ull cull = counts[i] + 1UL; + unsigned long c = (unsigned long) cull; + if (c != __atomic_load_n (&array[2 * i + 1], MEMMODEL_RELAXED)) + __atomic_store_n (&array[2 * i + 1], c, MEMMODEL_RELEASE); + c = cull >> (__SIZEOF_LONG_LONG__ * __CHAR_BIT__ / 2); + if (c != __atomic_load_n (&array[2 * i], MEMMODEL_RELAXED)) + __atomic_store_n (&array[2 * i], c, MEMMODEL_RELEASE); + } + } +} + +/* DOACROSS WAIT operation. */ + +void +GOMP_doacross_ull_wait (gomp_ull first, ...) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_work_share *ws = thr->ts.work_share; + struct gomp_doacross_work_share *doacross = ws->doacross; + va_list ap; + unsigned long ent; + unsigned int i; + + if (__builtin_expect (doacross == NULL, 0)) + { + __sync_synchronize (); + return; + } + + if (__builtin_expect (ws->sched == GFS_STATIC, 1)) + { + if (ws->chunk_size_ull == 0) + { + if (first < doacross->boundary_ull) + ent = first / (doacross->q_ull + 1); + else + ent = (first - doacross->boundary_ull) / doacross->q_ull + + doacross->t; + } + else + ent = first / ws->chunk_size_ull % thr->ts.team->nthreads; + } + else + ent = first / doacross->chunk_size_ull; + + if (__builtin_expect (doacross->flattened, 1)) + { + unsigned long *array = (unsigned long *) (doacross->array + + ent * doacross->elt_sz); + gomp_ull flattened = first << doacross->shift_counts[0]; + unsigned long cur; + + va_start (ap, first); + for (i = 1; i < doacross->ncounts; i++) + flattened |= va_arg (ap, gomp_ull) + << doacross->shift_counts[i]; + cur = __atomic_load_n (array, MEMMODEL_ACQUIRE); + if (flattened < cur) + { + __atomic_thread_fence (MEMMODEL_RELEASE); + va_end (ap); + return; + } + doacross_spin (array, flattened, cur); + __atomic_thread_fence (MEMMODEL_RELEASE); + va_end (ap); + return; + } + + if (sizeof (gomp_ull) == sizeof (unsigned long)) + { + gomp_ull *array = (gomp_ull *) (doacross->array + + ent * doacross->elt_sz); + do + { + va_start (ap, first); + for (i = 0; i < doacross->ncounts; i++) + { + gomp_ull thisv + = (i ? va_arg (ap, gomp_ull) : first) + 1; + gomp_ull cur = __atomic_load_n (&array[i], MEMMODEL_RELAXED); + if (thisv < cur) + { + i = doacross->ncounts; + break; + } + if (thisv > cur) + break; + } + va_end (ap); + if (i == doacross->ncounts) + break; + cpu_relax (); + } + while (1); + } + else + { + unsigned long *array = (unsigned long *) (doacross->array + + ent * doacross->elt_sz); + do + { + va_start (ap, first); + for (i = 0; i < doacross->ncounts; i++) + { + gomp_ull thisv + = (i ? va_arg (ap, gomp_ull) : first) + 1; + unsigned long t + = thisv >> (__SIZEOF_LONG_LONG__ * __CHAR_BIT__ / 2); + unsigned long cur + = __atomic_load_n (&array[2 * i], MEMMODEL_RELAXED); + if (t < cur) + { + i = doacross->ncounts; + break; + } + if (t > cur) + break; + t = thisv; + cur = __atomic_load_n (&array[2 * i + 1], MEMMODEL_RELAXED); + if (t < cur) + { + i = doacross->ncounts; + break; + } + if (t > cur) + break; + } + va_end (ap); + if (i == doacross->ncounts) + break; + cpu_relax (); + } + while (1); + } + __sync_synchronize (); +} diff --git a/libgomp/target.c b/libgomp/target.c index 758ece5d78c..de6a2c9c9c5 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -38,6 +38,7 @@ #endif #include <string.h> #include <assert.h> +#include <errno.h> #ifdef PLUGIN_SUPPORT #include <dlfcn.h> @@ -133,17 +134,48 @@ resolve_device (int device_id) if (device_id < 0 || device_id >= gomp_get_num_devices ()) return NULL; + gomp_mutex_lock (&devices[device_id].lock); + if (!devices[device_id].is_initialized) + gomp_init_device (&devices[device_id]); + gomp_mutex_unlock (&devices[device_id].lock); + return &devices[device_id]; } -/* Handle the case where splay_tree_lookup found oldn for newn. +static inline splay_tree_key +gomp_map_lookup (splay_tree mem_map, splay_tree_key key) +{ + if (key->host_start != key->host_end) + return splay_tree_lookup (mem_map, key); + + key->host_end++; + splay_tree_key n = splay_tree_lookup (mem_map, key); + key->host_end--; + if (n) + return n; + key->host_start--; + n = splay_tree_lookup (mem_map, key); + key->host_start++; + if (n) + return n; + return splay_tree_lookup (mem_map, key); +} + +/* Handle the case where gomp_map_lookup found oldn for newn. Helper function of gomp_map_vars. */ static inline void gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn, - splay_tree_key newn, unsigned char kind) + splay_tree_key newn, struct target_var_desc *tgt_var, + unsigned char kind) { + tgt_var->key = oldn; + tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind); + tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind); + tgt_var->offset = newn->host_start - oldn->host_start; + tgt_var->length = newn->host_end - newn->host_start; + if ((kind & GOMP_MAP_FLAG_FORCE) || oldn->host_start > newn->host_start || oldn->host_end < newn->host_end) @@ -154,14 +186,22 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn, (void *) newn->host_start, (void *) newn->host_end, (void *) oldn->host_start, (void *) oldn->host_end); } - oldn->refcount++; + + if (GOMP_MAP_ALWAYS_TO_P (kind)) + devicep->host2dev_func (devicep->target_id, + (void *) (oldn->tgt->tgt_start + oldn->tgt_offset + + newn->host_start - oldn->host_start), + (void *) newn->host_start, + newn->host_end - newn->host_start); + if (oldn->refcount != REFCOUNT_INFINITY) + oldn->refcount++; } static int -get_kind (bool is_openacc, void *kinds, int idx) +get_kind (bool short_mapkind, void *kinds, int idx) { - return is_openacc ? ((unsigned short *) kinds)[idx] - : ((unsigned char *) kinds)[idx]; + return short_mapkind ? ((unsigned short *) kinds)[idx] + : ((unsigned char *) kinds)[idx]; } static void @@ -185,20 +225,8 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr, } /* Add bias to the pointer value. */ cur_node.host_start += bias; - cur_node.host_end = cur_node.host_start + 1; - splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); - if (n == NULL) - { - /* Could be possibly zero size array section. */ - cur_node.host_end--; - n = splay_tree_lookup (mem_map, &cur_node); - if (n == NULL) - { - cur_node.host_start--; - n = splay_tree_lookup (mem_map, &cur_node); - cur_node.host_start++; - } - } + cur_node.host_end = cur_node.host_start; + splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); if (n == NULL) { gomp_mutex_unlock (&devicep->lock); @@ -218,20 +246,81 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr, sizeof (void *)); } +static void +gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n, + size_t first, size_t i, void **hostaddrs, + size_t *sizes, void *kinds) +{ + struct gomp_device_descr *devicep = tgt->device_descr; + struct splay_tree_s *mem_map = &devicep->mem_map; + struct splay_tree_key_s cur_node; + int kind; + const bool short_mapkind = true; + const int typemask = short_mapkind ? 0xff : 0x7; + + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = cur_node.host_start + sizes[i]; + splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node); + kind = get_kind (short_mapkind, kinds, i); + if (n2 + && n2->tgt == n->tgt + && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) + { + gomp_map_vars_existing (devicep, n2, &cur_node, + &tgt->list[i], kind & typemask); + return; + } + if (sizes[i] == 0) + { + if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1]) + { + cur_node.host_start--; + n2 = splay_tree_lookup (mem_map, &cur_node); + cur_node.host_start++; + if (n2 + && n2->tgt == n->tgt + && n2->host_start - n->host_start + == n2->tgt_offset - n->tgt_offset) + { + gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i], + kind & typemask); + return; + } + } + cur_node.host_end++; + n2 = splay_tree_lookup (mem_map, &cur_node); + cur_node.host_end--; + if (n2 + && n2->tgt == n->tgt + && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) + { + gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i], + kind & typemask); + return; + } + } + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("Trying to map into device [%p..%p) structure element when " + "other mapped elements from the same structure weren't mapped " + "together with it", (void *) cur_node.host_start, + (void *) cur_node.host_end); +} + attribute_hidden struct target_mem_desc * gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds, - bool is_openacc, bool is_target) + bool short_mapkind, enum gomp_map_vars_kind pragma_kind) { size_t i, tgt_align, tgt_size, not_found_cnt = 0; - const int rshift = is_openacc ? 8 : 3; - const int typemask = is_openacc ? 0xff : 0x7; + bool has_firstprivate = false; + const int rshift = short_mapkind ? 8 : 3; + const int typemask = short_mapkind ? 0xff : 0x7; struct splay_tree_s *mem_map = &devicep->mem_map; struct splay_tree_key_s cur_node; struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum); tgt->list_count = mapnum; - tgt->refcount = 1; + tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1; tgt->device_descr = devicep; if (mapnum == 0) @@ -239,7 +328,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, tgt_align = sizeof (void *); tgt_size = 0; - if (is_target) + if (pragma_kind == GOMP_MAP_VARS_TARGET) { size_t align = 4 * sizeof (void *); tgt_align = align; @@ -250,10 +339,61 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, for (i = 0; i < mapnum; i++) { - int kind = get_kind (is_openacc, kinds, i); - if (hostaddrs[i] == NULL) + int kind = get_kind (short_mapkind, kinds, i); + if (hostaddrs[i] == NULL + || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT) { - tgt->list[i] = NULL; + tgt->list[i].key = NULL; + tgt->list[i].offset = ~(uintptr_t) 0; + continue; + } + else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR) + { + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = cur_node.host_start; + splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); + if (n == NULL) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("use_device_ptr pointer wasn't mapped"); + } + cur_node.host_start -= n->host_start; + hostaddrs[i] + = (void *) (n->tgt->tgt_start + n->tgt_offset + + cur_node.host_start); + tgt->list[i].key = NULL; + tgt->list[i].offset = ~(uintptr_t) 0; + continue; + } + else if ((kind & typemask) == GOMP_MAP_STRUCT) + { + size_t first = i + 1; + size_t last = i + sizes[i]; + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = (uintptr_t) hostaddrs[last] + + sizes[last]; + tgt->list[i].key = NULL; + tgt->list[i].offset = ~(uintptr_t) 2; + splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); + if (n == NULL) + { + size_t align = (size_t) 1 << (kind >> rshift); + if (tgt_align < align) + tgt_align = align; + tgt_size -= (uintptr_t) hostaddrs[first] + - (uintptr_t) hostaddrs[i]; + tgt_size = (tgt_size + align - 1) & ~(align - 1); + tgt_size += cur_node.host_end - (uintptr_t) hostaddrs[i]; + not_found_cnt += last - i; + for (i = first; i <= last; i++) + tgt->list[i].key = NULL; + i--; + continue; + } + for (i = first; i <= last; i++) + gomp_map_fields_existing (tgt, n, first, i, hostaddrs, + sizes, kinds); + i--; continue; } cur_node.host_start = (uintptr_t) hostaddrs[i]; @@ -261,15 +401,37 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, cur_node.host_end = cur_node.host_start + sizes[i]; else cur_node.host_end = cur_node.host_start + sizeof (void *); - splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); - if (n) + if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE) + { + tgt->list[i].key = NULL; + + size_t align = (size_t) 1 << (kind >> rshift); + if (tgt_align < align) + tgt_align = align; + tgt_size = (tgt_size + align - 1) & ~(align - 1); + tgt_size += cur_node.host_end - cur_node.host_start; + has_firstprivate = true; + continue; + } + splay_tree_key n; + if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION) { - tgt->list[i] = n; - gomp_map_vars_existing (devicep, n, &cur_node, kind & typemask); + n = gomp_map_lookup (mem_map, &cur_node); + if (!n) + { + tgt->list[i].key = NULL; + tgt->list[i].offset = ~(uintptr_t) 1; + continue; + } } else + n = splay_tree_lookup (mem_map, &cur_node); + if (n) + gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i], + kind & typemask); + else { - tgt->list[i] = NULL; + tgt->list[i].key = NULL; size_t align = (size_t) 1 << (kind >> rshift); not_found_cnt++; @@ -281,7 +443,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, { size_t j; for (j = i + 1; j < mapnum; j++) - if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j) + if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j) & typemask)) break; else if ((uintptr_t) hostaddrs[j] < cur_node.host_start @@ -290,7 +452,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, break; else { - tgt->list[j] = NULL; + tgt->list[j].key = NULL; i++; } } @@ -308,7 +470,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, tgt->tgt_start = (uintptr_t) tgt->to_free; tgt->tgt_end = tgt->tgt_start + sizes[0]; } - else if (not_found_cnt || is_target) + else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET) { /* Allocate tgt_align aligned tgt_size block of memory. */ /* FIXME: Perhaps change interface to allocate properly aligned @@ -327,22 +489,74 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, } tgt_size = 0; - if (is_target) + if (pragma_kind == GOMP_MAP_VARS_TARGET) tgt_size = mapnum * sizeof (void *); tgt->array = NULL; - if (not_found_cnt) + if (not_found_cnt || has_firstprivate) { - tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array)); + if (not_found_cnt) + tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array)); splay_tree_node array = tgt->array; - size_t j; + size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0; + uintptr_t field_tgt_base = 0; for (i = 0; i < mapnum; i++) - if (tgt->list[i] == NULL) + if (tgt->list[i].key == NULL) { - int kind = get_kind (is_openacc, kinds, i); + int kind = get_kind (short_mapkind, kinds, i); if (hostaddrs[i] == NULL) continue; + switch (kind & typemask) + { + size_t align, len, first, last; + splay_tree_key n; + case GOMP_MAP_FIRSTPRIVATE: + align = (size_t) 1 << (kind >> rshift); + tgt_size = (tgt_size + align - 1) & ~(align - 1); + tgt->list[i].offset = tgt_size; + len = sizes[i]; + devicep->host2dev_func (devicep->target_id, + (void *) (tgt->tgt_start + tgt_size), + (void *) hostaddrs[i], len); + tgt_size += len; + continue; + case GOMP_MAP_FIRSTPRIVATE_INT: + case GOMP_MAP_USE_DEVICE_PTR: + case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: + continue; + case GOMP_MAP_STRUCT: + first = i + 1; + last = i + sizes[i]; + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = (uintptr_t) hostaddrs[last] + + sizes[last]; + if (tgt->list[first].key != NULL) + continue; + n = splay_tree_lookup (mem_map, &cur_node); + if (n == NULL) + { + size_t align = (size_t) 1 << (kind >> rshift); + tgt_size -= (uintptr_t) hostaddrs[first] + - (uintptr_t) hostaddrs[i]; + tgt_size = (tgt_size + align - 1) & ~(align - 1); + tgt_size += (uintptr_t) hostaddrs[first] + - (uintptr_t) hostaddrs[i]; + field_tgt_base = (uintptr_t) hostaddrs[first]; + field_tgt_offset = tgt_size; + field_tgt_clear = last; + tgt_size += cur_node.host_end + - (uintptr_t) hostaddrs[first]; + continue; + } + for (i = first; i <= last; i++) + gomp_map_fields_existing (tgt, n, first, i, hostaddrs, + sizes, kinds); + i--; + continue; + default: + break; + } splay_tree_key k = &array->key; k->host_start = (uintptr_t) hostaddrs[i]; if (!GOMP_MAP_POINTER_P (kind & typemask)) @@ -351,19 +565,31 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, k->host_end = k->host_start + sizeof (void *); splay_tree_key n = splay_tree_lookup (mem_map, k); if (n) - { - tgt->list[i] = n; - gomp_map_vars_existing (devicep, n, k, kind & typemask); - } + gomp_map_vars_existing (devicep, n, k, &tgt->list[i], + kind & typemask); else { size_t align = (size_t) 1 << (kind >> rshift); - tgt->list[i] = k; - tgt_size = (tgt_size + align - 1) & ~(align - 1); + tgt->list[i].key = k; k->tgt = tgt; - k->tgt_offset = tgt_size; - tgt_size += k->host_end - k->host_start; - k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask); + if (field_tgt_clear != ~(size_t) 0) + { + k->tgt_offset = k->host_start - field_tgt_base + + field_tgt_offset; + if (i == field_tgt_clear) + field_tgt_clear = ~(size_t) 0; + } + else + { + tgt_size = (tgt_size + align - 1) & ~(align - 1); + k->tgt_offset = tgt_size; + tgt_size += k->host_end - k->host_start; + } + tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask); + tgt->list[i].always_copy_from + = GOMP_MAP_ALWAYS_FROM_P (kind & typemask); + tgt->list[i].offset = 0; + tgt->list[i].length = k->host_end - k->host_start; k->refcount = 1; k->async_refcount = 0; tgt->refcount++; @@ -376,11 +602,14 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, case GOMP_MAP_FROM: case GOMP_MAP_FORCE_ALLOC: case GOMP_MAP_FORCE_FROM: + case GOMP_MAP_ALWAYS_FROM: break; case GOMP_MAP_TO: case GOMP_MAP_TOFROM: case GOMP_MAP_FORCE_TO: case GOMP_MAP_FORCE_TOFROM: + case GOMP_MAP_ALWAYS_TO: + case GOMP_MAP_ALWAYS_TOFROM: /* FIXME: Perhaps add some smarts, like if copying several adjacent fields from host to target, use some host buffer to avoid sending each var individually. */ @@ -403,7 +632,8 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, k->host_end - k->host_start); for (j = i + 1; j < mapnum; j++) - if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j) + if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, + j) & typemask)) break; else if ((uintptr_t) hostaddrs[j] < k->host_start @@ -412,8 +642,11 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, break; else { - tgt->list[j] = k; - k->refcount++; + tgt->list[j].key = k; + tgt->list[j].copy_from = false; + tgt->list[j].always_copy_from = false; + if (k->refcount != REFCOUNT_INFINITY) + k->refcount++; gomp_map_pointer (tgt, (uintptr_t) *(void **) hostaddrs[j], k->tgt_offset @@ -460,15 +693,30 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, } } - if (is_target) + if (pragma_kind == GOMP_MAP_VARS_TARGET) { for (i = 0; i < mapnum; i++) { - if (tgt->list[i] == NULL) - cur_node.tgt_offset = (uintptr_t) NULL; + if (tgt->list[i].key == NULL) + { + if (tgt->list[i].offset == ~(uintptr_t) 0) + cur_node.tgt_offset = (uintptr_t) hostaddrs[i]; + else if (tgt->list[i].offset == ~(uintptr_t) 1) + cur_node.tgt_offset = 0; + else if (tgt->list[i].offset == ~(uintptr_t) 2) + cur_node.tgt_offset = tgt->list[i + 1].key->tgt->tgt_start + + tgt->list[i + 1].key->tgt_offset + + tgt->list[i + 1].offset + + (uintptr_t) hostaddrs[i] + - (uintptr_t) hostaddrs[i + 1]; + else + cur_node.tgt_offset = tgt->tgt_start + + tgt->list[i].offset; + } else - cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start - + tgt->list[i]->tgt_offset; + cur_node.tgt_offset = tgt->list[i].key->tgt->tgt_start + + tgt->list[i].key->tgt_offset + + tgt->list[i].offset; /* FIXME: see above FIXME comment. */ devicep->host2dev_func (devicep->target_id, (void *) (tgt->tgt_start @@ -478,6 +726,15 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, } } + /* If the variable from "omp target enter data" map-list was already mapped, + tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or + gomp_exit_data. */ + if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0) + { + free (tgt); + tgt = NULL; + } + gomp_mutex_unlock (&devicep->lock); return tgt; } @@ -508,17 +765,17 @@ gomp_copy_from_async (struct target_mem_desc *tgt) gomp_mutex_lock (&devicep->lock); for (i = 0; i < tgt->list_count; i++) - if (tgt->list[i] == NULL) + if (tgt->list[i].key == NULL) ; - else if (tgt->list[i]->refcount > 1) + else if (tgt->list[i].key->refcount > 1) { - tgt->list[i]->refcount--; - tgt->list[i]->async_refcount++; + tgt->list[i].key->refcount--; + tgt->list[i].key->async_refcount++; } else { - splay_tree_key k = tgt->list[i]; - if (k->copy_from) + splay_tree_key k = tgt->list[i].key; + if (tgt->list[i].copy_from) devicep->dev2host_func (devicep->target_id, (void *) k->host_start, (void *) (k->tgt->tgt_start + k->tgt_offset), k->host_end - k->host_start); @@ -546,25 +803,41 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom) size_t i; for (i = 0; i < tgt->list_count; i++) - if (tgt->list[i] == NULL) - ; - else if (tgt->list[i]->refcount > 1) - tgt->list[i]->refcount--; - else if (tgt->list[i]->async_refcount > 0) - tgt->list[i]->async_refcount--; - else - { - splay_tree_key k = tgt->list[i]; - if (k->copy_from && do_copyfrom) - devicep->dev2host_func (devicep->target_id, (void *) k->host_start, - (void *) (k->tgt->tgt_start + k->tgt_offset), - k->host_end - k->host_start); - splay_tree_remove (&devicep->mem_map, k); - if (k->tgt->refcount > 1) - k->tgt->refcount--; - else - gomp_unmap_tgt (k->tgt); - } + { + splay_tree_key k = tgt->list[i].key; + if (k == NULL) + continue; + + bool do_unmap = false; + if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY) + k->refcount--; + else if (k->refcount == 1) + { + if (k->async_refcount > 0) + k->async_refcount--; + else + { + k->refcount--; + do_unmap = true; + } + } + + if ((do_unmap && do_copyfrom && tgt->list[i].copy_from) + || tgt->list[i].always_copy_from) + devicep->dev2host_func (devicep->target_id, + (void *) (k->host_start + tgt->list[i].offset), + (void *) (k->tgt->tgt_start + k->tgt_offset + + tgt->list[i].offset), + tgt->list[i].length); + if (do_unmap) + { + splay_tree_remove (&devicep->mem_map, k); + if (k->tgt->refcount > 1) + k->tgt->refcount--; + else + gomp_unmap_tgt (k->tgt); + } + } if (tgt->refcount > 1) tgt->refcount--; @@ -576,11 +849,11 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom) static void gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, - size_t *sizes, void *kinds, bool is_openacc) + size_t *sizes, void *kinds, bool short_mapkind) { size_t i; struct splay_tree_key_s cur_node; - const int typemask = is_openacc ? 0xff : 0x7; + const int typemask = short_mapkind ? 0xff : 0x7; if (!devicep) return; @@ -597,7 +870,7 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node); if (n) { - int kind = get_kind (is_openacc, kinds, i); + int kind = get_kind (short_mapkind, kinds, i); if (n->host_start > cur_node.host_start || n->host_end < cur_node.host_end) { @@ -626,13 +899,6 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, - n->host_start), cur_node.host_end - cur_node.host_start); } - else - { - gomp_mutex_unlock (&devicep->lock); - gomp_fatal ("Trying to update [%p..%p) object that is not mapped", - (void *) cur_node.host_start, - (void *) cur_node.host_end); - } } gomp_mutex_unlock (&devicep->lock); } @@ -678,7 +944,7 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, /* Insert host-target address mapping into splay tree. */ struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array)); - tgt->refcount = 1; + tgt->refcount = REFCOUNT_INFINITY; tgt->tgt_start = 0; tgt->tgt_end = 0; tgt->to_free = NULL; @@ -694,9 +960,8 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, k->host_end = k->host_start + 1; k->tgt = tgt; k->tgt_offset = target_table[i].start; - k->refcount = 1; + k->refcount = REFCOUNT_INFINITY; k->async_refcount = 0; - k->copy_from = false; array->left = NULL; array->right = NULL; splay_tree_insert (&devicep->mem_map, array); @@ -720,9 +985,8 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, k->host_end = k->host_start + (uintptr_t) host_var_table[i * 2 + 1]; k->tgt = tgt; k->tgt_offset = target_var->start; - k->refcount = 1; + k->refcount = REFCOUNT_INFINITY; k->async_refcount = 0; - k->copy_from = false; array->left = NULL; array->right = NULL; splay_tree_insert (&devicep->mem_map, array); @@ -945,6 +1209,47 @@ gomp_fini_device (struct gomp_device_descr *devicep) devicep->is_initialized = false; } +/* Host fallback for GOMP_target{,_41} routines. */ + +static void +gomp_target_fallback (void (*fn) (void *), void **hostaddrs) +{ + struct gomp_thread old_thr, *thr = gomp_thread (); + old_thr = *thr; + memset (thr, '\0', sizeof (*thr)); + if (gomp_places_list) + { + thr->place = old_thr.place; + thr->ts.place_partition_len = gomp_places_list_len; + } + fn (hostaddrs); + gomp_free_thread (thr); + *thr = old_thr; +} + +/* Helper function of GOMP_target{,_41} routines. */ + +static void * +gomp_get_target_fn_addr (struct gomp_device_descr *devicep, + void (*host_fn) (void *)) +{ + if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC) + return (void *) host_fn; + else + { + gomp_mutex_lock (&devicep->lock); + struct splay_tree_key_s k; + k.host_start = (uintptr_t) host_fn; + k.host_end = k.host_start + 1; + splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k); + gomp_mutex_unlock (&devicep->lock); + if (tgt_fn == NULL) + gomp_fatal ("Target function wasn't mapped"); + + return (void *) tgt_fn->tgt_offset; + } +} + /* Called when encountering a target directive. If DEVICE is GOMP_DEVICE_ICV, it means use device-var ICV. If it is GOMP_DEVICE_HOST_FALLBACK (or any value @@ -964,51 +1269,85 @@ GOMP_target (int device, void (*fn) (void *), const void *unused, if (devicep == NULL || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return gomp_target_fallback (fn, hostaddrs); + + void *fn_addr = gomp_get_target_fn_addr (devicep, fn); + + struct target_mem_desc *tgt_vars + = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, + GOMP_MAP_VARS_TARGET); + struct gomp_thread old_thr, *thr = gomp_thread (); + old_thr = *thr; + memset (thr, '\0', sizeof (*thr)); + if (gomp_places_list) { - /* Host fallback. */ - struct gomp_thread old_thr, *thr = gomp_thread (); - old_thr = *thr; - memset (thr, '\0', sizeof (*thr)); - if (gomp_places_list) - { - thr->place = old_thr.place; - thr->ts.place_partition_len = gomp_places_list_len; - } - fn (hostaddrs); - gomp_free_thread (thr); - *thr = old_thr; - return; + thr->place = old_thr.place; + thr->ts.place_partition_len = gomp_places_list_len; } + devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start); + gomp_free_thread (thr); + *thr = old_thr; + gomp_unmap_vars (tgt_vars, true); +} - gomp_mutex_lock (&devicep->lock); - if (!devicep->is_initialized) - gomp_init_device (devicep); - gomp_mutex_unlock (&devicep->lock); +void +GOMP_target_41 (int device, void (*fn) (void *), size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned short *kinds, + unsigned int flags, void **depend) +{ + struct gomp_device_descr *devicep = resolve_device (device); - void *fn_addr; + /* If there are depend clauses, but nowait is not present, + block the parent task until the dependencies are resolved + and then just continue with the rest of the function as if it + is a merged task. */ + if (depend != NULL) + { + struct gomp_thread *thr = gomp_thread (); + if (thr->task && thr->task->depend_hash) + gomp_task_maybe_wait_for_dependencies (depend); + } - if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC) - fn_addr = (void *) fn; - else + if (devicep == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) { - gomp_mutex_lock (&devicep->lock); - struct splay_tree_key_s k; - k.host_start = (uintptr_t) fn; - k.host_end = k.host_start + 1; - splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k); - if (tgt_fn == NULL) + size_t i, tgt_align = 0, tgt_size = 0; + char *tgt = NULL; + for (i = 0; i < mapnum; i++) + if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) + { + size_t align = (size_t) 1 << (kinds[i] >> 8); + if (tgt_align < align) + tgt_align = align; + tgt_size = (tgt_size + align - 1) & ~(align - 1); + tgt_size += sizes[i]; + } + if (tgt_align) { - gomp_mutex_unlock (&devicep->lock); - gomp_fatal ("Target function wasn't mapped"); + tgt = gomp_alloca (tgt_size + tgt_align - 1); + uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); + if (al) + tgt += tgt_align - al; + tgt_size = 0; + for (i = 0; i < mapnum; i++) + if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) + { + size_t align = (size_t) 1 << (kinds[i] >> 8); + tgt_size = (tgt_size + align - 1) & ~(align - 1); + memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); + hostaddrs[i] = tgt + tgt_size; + tgt_size = tgt_size + sizes[i]; + } } - gomp_mutex_unlock (&devicep->lock); - - fn_addr = (void *) tgt_fn->tgt_offset; + gomp_target_fallback (fn, hostaddrs); + return; } + void *fn_addr = gomp_get_target_fn_addr (devicep, fn); + struct target_mem_desc *tgt_vars - = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, - true); + = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, + GOMP_MAP_VARS_TARGET); struct gomp_thread old_thr, *thr = gomp_thread (); old_thr = *thr; memset (thr, '\0', sizeof (*thr)); @@ -1023,6 +1362,26 @@ GOMP_target (int device, void (*fn) (void *), const void *unused, gomp_unmap_vars (tgt_vars, true); } +/* Host fallback for GOMP_target_data{,_41} routines. */ + +static void +gomp_target_data_fallback (void) +{ + struct gomp_task_icv *icv = gomp_icv (false); + if (icv->target_data) + { + /* Even when doing a host fallback, if there are any active + #pragma omp target data constructs, need to remember the + new #pragma omp target data, otherwise GOMP_target_end_data + would get out of sync. */ + struct target_mem_desc *tgt + = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, + GOMP_MAP_VARS_DATA); + tgt->prev = icv->target_data; + icv->target_data = tgt; + } +} + void GOMP_target_data (int device, const void *unused, size_t mapnum, void **hostaddrs, size_t *sizes, unsigned char *kinds) @@ -1031,31 +1390,29 @@ GOMP_target_data (int device, const void *unused, size_t mapnum, if (devicep == NULL || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) - { - /* Host fallback. */ - struct gomp_task_icv *icv = gomp_icv (false); - if (icv->target_data) - { - /* Even when doing a host fallback, if there are any active - #pragma omp target data constructs, need to remember the - new #pragma omp target data, otherwise GOMP_target_end_data - would get out of sync. */ - struct target_mem_desc *tgt - = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false); - tgt->prev = icv->target_data; - icv->target_data = tgt; - } - return; - } - - gomp_mutex_lock (&devicep->lock); - if (!devicep->is_initialized) - gomp_init_device (devicep); - gomp_mutex_unlock (&devicep->lock); + return gomp_target_data_fallback (); struct target_mem_desc *tgt = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, - false); + GOMP_MAP_VARS_DATA); + struct gomp_task_icv *icv = gomp_icv (true); + tgt->prev = icv->target_data; + icv->target_data = tgt; +} + +void +GOMP_target_data_41 (int device, size_t mapnum, void **hostaddrs, size_t *sizes, + unsigned short *kinds) +{ + struct gomp_device_descr *devicep = resolve_device (device); + + if (devicep == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return gomp_target_data_fallback (); + + struct target_mem_desc *tgt + = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, + GOMP_MAP_VARS_DATA); struct gomp_task_icv *icv = gomp_icv (true); tgt->prev = icv->target_data; icv->target_data = tgt; @@ -1083,12 +1440,230 @@ GOMP_target_update (int device, const void *unused, size_t mapnum, || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) return; + gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false); +} + +void +GOMP_target_update_41 (int device, size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds, + unsigned int flags, void **depend) +{ + struct gomp_device_descr *devicep = resolve_device (device); + + /* If there are depend clauses, but nowait is not present, + block the parent task until the dependencies are resolved + and then just continue with the rest of the function as if it + is a merged task. Until we are able to schedule task during + variable mapping or unmapping, ignore nowait if depend clauses + are not present. */ + if (depend != NULL) + { + struct gomp_thread *thr = gomp_thread (); + if (thr->task && thr->task->depend_hash) + { + if ((flags & GOMP_TARGET_FLAG_NOWAIT) + && thr->ts.team + && !thr->task->final_task) + { + gomp_create_target_task (devicep, (void (*) (void *)) NULL, + mapnum, hostaddrs, sizes, kinds, + flags | GOMP_TARGET_FLAG_UPDATE, + depend); + return; + } + + struct gomp_team *team = thr->ts.team; + /* If parallel or taskgroup has been cancelled, don't start new + tasks. */ + if (team + && (gomp_team_barrier_cancelled (&team->barrier) + || (thr->task->taskgroup + && thr->task->taskgroup->cancelled))) + return; + + gomp_task_maybe_wait_for_dependencies (depend); + } + } + + if (devicep == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return; + + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + /* If parallel or taskgroup has been cancelled, don't start new tasks. */ + if (team + && (gomp_team_barrier_cancelled (&team->barrier) + || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) + return; + + gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true); +} + +static void +gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned short *kinds) +{ + const int typemask = 0xff; + size_t i; gomp_mutex_lock (&devicep->lock); - if (!devicep->is_initialized) - gomp_init_device (devicep); + for (i = 0; i < mapnum; i++) + { + struct splay_tree_key_s cur_node; + unsigned char kind = kinds[i] & typemask; + switch (kind) + { + case GOMP_MAP_FROM: + case GOMP_MAP_ALWAYS_FROM: + case GOMP_MAP_DELETE: + case GOMP_MAP_RELEASE: + case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: + case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION: + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = cur_node.host_start + sizes[i]; + splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION + || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION) + ? gomp_map_lookup (&devicep->mem_map, &cur_node) + : splay_tree_lookup (&devicep->mem_map, &cur_node); + if (!k) + continue; + + if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY) + k->refcount--; + if ((kind == GOMP_MAP_DELETE + || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION) + && k->refcount != REFCOUNT_INFINITY) + k->refcount = 0; + + if ((kind == GOMP_MAP_FROM && k->refcount == 0) + || kind == GOMP_MAP_ALWAYS_FROM) + devicep->dev2host_func (devicep->target_id, + (void *) cur_node.host_start, + (void *) (k->tgt->tgt_start + k->tgt_offset + + cur_node.host_start + - k->host_start), + cur_node.host_end - cur_node.host_start); + if (k->refcount == 0) + { + splay_tree_remove (&devicep->mem_map, k); + if (k->tgt->refcount > 1) + k->tgt->refcount--; + else + gomp_unmap_tgt (k->tgt); + } + + break; + default: + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x", + kind); + } + } + gomp_mutex_unlock (&devicep->lock); +} - gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false); +void +GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds, + unsigned int flags, void **depend) +{ + struct gomp_device_descr *devicep = resolve_device (device); + + /* If there are depend clauses, but nowait is not present, + block the parent task until the dependencies are resolved + and then just continue with the rest of the function as if it + is a merged task. Until we are able to schedule task during + variable mapping or unmapping, ignore nowait if depend clauses + are not present. */ + if (depend != NULL) + { + struct gomp_thread *thr = gomp_thread (); + if (thr->task && thr->task->depend_hash) + { + if ((flags & GOMP_TARGET_FLAG_NOWAIT) + && thr->ts.team + && !thr->task->final_task) + { + gomp_create_target_task (devicep, (void (*) (void *)) NULL, + mapnum, hostaddrs, sizes, kinds, + flags, depend); + return; + } + + struct gomp_team *team = thr->ts.team; + /* If parallel or taskgroup has been cancelled, don't start new + tasks. */ + if (team + && (gomp_team_barrier_cancelled (&team->barrier) + || (thr->task->taskgroup + && thr->task->taskgroup->cancelled))) + return; + + gomp_task_maybe_wait_for_dependencies (depend); + } + } + + if (devicep == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return; + + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + /* If parallel or taskgroup has been cancelled, don't start new tasks. */ + if (team + && (gomp_team_barrier_cancelled (&team->barrier) + || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) + return; + + size_t i; + if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) + for (i = 0; i < mapnum; i++) + if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT) + { + gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i], + &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA); + i += sizes[i]; + } + else + gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i], + true, GOMP_MAP_VARS_ENTER_DATA); + else + gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds); +} + +void +gomp_target_task_fn (void *data) +{ + struct gomp_target_task *ttask = (struct gomp_target_task *) data; + if (ttask->fn != NULL) + { + /* GOMP_target_41 */ + } + else if (ttask->devicep == NULL + || !(ttask->devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return; + + size_t i; + if (ttask->flags & GOMP_TARGET_FLAG_UPDATE) + gomp_update (ttask->devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes, + ttask->kinds, true); + else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) + for (i = 0; i < ttask->mapnum; i++) + if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT) + { + gomp_map_vars (ttask->devicep, ttask->sizes[i] + 1, + &ttask->hostaddrs[i], NULL, &ttask->sizes[i], + &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA); + i += ttask->sizes[i]; + } + else + gomp_map_vars (ttask->devicep, 1, &ttask->hostaddrs[i], NULL, + &ttask->sizes[i], &ttask->kinds[i], + true, GOMP_MAP_VARS_ENTER_DATA); + else + gomp_exit_data (ttask->devicep, ttask->mapnum, ttask->hostaddrs, + ttask->sizes, ttask->kinds); } void @@ -1103,6 +1678,384 @@ GOMP_teams (unsigned int num_teams, unsigned int thread_limit) (void) num_teams; } +void * +omp_target_alloc (size_t size, int device_num) +{ + if (device_num == GOMP_DEVICE_HOST_FALLBACK) + return malloc (size); + + if (device_num < 0) + return NULL; + + struct gomp_device_descr *devicep = resolve_device (device_num); + if (devicep == NULL) + return NULL; + + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return malloc (size); + + gomp_mutex_lock (&devicep->lock); + void *ret = devicep->alloc_func (devicep->target_id, size); + gomp_mutex_unlock (&devicep->lock); + return ret; +} + +void +omp_target_free (void *device_ptr, int device_num) +{ + if (device_ptr == NULL) + return; + + if (device_num == GOMP_DEVICE_HOST_FALLBACK) + { + free (device_ptr); + return; + } + + if (device_num < 0) + return; + + struct gomp_device_descr *devicep = resolve_device (device_num); + if (devicep == NULL) + return; + + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + { + free (device_ptr); + return; + } + + gomp_mutex_lock (&devicep->lock); + devicep->free_func (devicep->target_id, device_ptr); + gomp_mutex_unlock (&devicep->lock); +} + +int +omp_target_is_present (void *ptr, int device_num) +{ + if (ptr == NULL) + return 1; + + if (device_num == GOMP_DEVICE_HOST_FALLBACK) + return 1; + + if (device_num < 0) + return 0; + + struct gomp_device_descr *devicep = resolve_device (device_num); + if (devicep == NULL) + return 0; + + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return 1; + + gomp_mutex_lock (&devicep->lock); + struct splay_tree_s *mem_map = &devicep->mem_map; + struct splay_tree_key_s cur_node; + + cur_node.host_start = (uintptr_t) ptr; + cur_node.host_end = cur_node.host_start; + splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); + int ret = n != NULL; + gomp_mutex_unlock (&devicep->lock); + return ret; +} + +int +omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset, + size_t src_offset, int dst_device_num, int src_device_num) +{ + struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; + + if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK) + { + if (dst_device_num < 0) + return EINVAL; + + dst_devicep = resolve_device (dst_device_num); + if (dst_devicep == NULL) + return EINVAL; + + if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + dst_devicep = NULL; + } + if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) + { + if (src_device_num < 0) + return EINVAL; + + src_devicep = resolve_device (src_device_num); + if (src_devicep == NULL) + return EINVAL; + + if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + src_devicep = NULL; + } + if (src_devicep == NULL && dst_devicep == NULL) + { + memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length); + return 0; + } + if (src_devicep == NULL) + { + gomp_mutex_lock (&dst_devicep->lock); + dst_devicep->host2dev_func (dst_devicep->target_id, + (char *) dst + dst_offset, + (char *) src + src_offset, length); + gomp_mutex_unlock (&dst_devicep->lock); + return 0; + } + if (dst_devicep == NULL) + { + gomp_mutex_lock (&src_devicep->lock); + src_devicep->dev2host_func (src_devicep->target_id, + (char *) dst + dst_offset, + (char *) src + src_offset, length); + gomp_mutex_unlock (&src_devicep->lock); + return 0; + } + if (src_devicep == dst_devicep) + { + gomp_mutex_lock (&src_devicep->lock); + src_devicep->dev2dev_func (src_devicep->target_id, + (char *) dst + dst_offset, + (char *) src + src_offset, length); + gomp_mutex_unlock (&src_devicep->lock); + return 0; + } + return EINVAL; +} + +static int +omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size, + int num_dims, const size_t *volume, + const size_t *dst_offsets, + const size_t *src_offsets, + const size_t *dst_dimensions, + const size_t *src_dimensions, + struct gomp_device_descr *dst_devicep, + struct gomp_device_descr *src_devicep) +{ + size_t dst_slice = element_size; + size_t src_slice = element_size; + size_t j, dst_off, src_off, length; + int i, ret; + + if (num_dims == 1) + { + if (__builtin_mul_overflow (element_size, volume[0], &length) + || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off) + || __builtin_mul_overflow (element_size, src_offsets[0], &src_off)) + return EINVAL; + if (dst_devicep == NULL && src_devicep == NULL) + memcpy ((char *) dst + dst_off, (char *) src + src_off, length); + else if (src_devicep == NULL) + dst_devicep->host2dev_func (dst_devicep->target_id, + (char *) dst + dst_off, + (char *) src + src_off, length); + else if (dst_devicep == NULL) + src_devicep->dev2host_func (src_devicep->target_id, + (char *) dst + dst_off, + (char *) src + src_off, length); + else if (src_devicep == dst_devicep) + src_devicep->dev2dev_func (src_devicep->target_id, + (char *) dst + dst_off, + (char *) src + src_off, length); + else + return EINVAL; + return 0; + } + + /* FIXME: it would be nice to have some plugin function to handle + num_dims == 2 and num_dims == 3 more efficiently. Larger ones can + be handled in the generic recursion below, and for host-host it + should be used even for any num_dims >= 2. */ + + for (i = 1; i < num_dims; i++) + if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice) + || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice)) + return EINVAL; + if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off) + || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off)) + return EINVAL; + for (j = 0; j < volume[0]; j++) + { + ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off, + (char *) src + src_off, + element_size, num_dims - 1, + volume + 1, dst_offsets + 1, + src_offsets + 1, dst_dimensions + 1, + src_dimensions + 1, dst_devicep, + src_devicep); + if (ret) + return ret; + dst_off += dst_slice; + src_off += src_slice; + } + return 0; +} + +int +omp_target_memcpy_rect (void *dst, void *src, size_t element_size, + int num_dims, const size_t *volume, + const size_t *dst_offsets, + const size_t *src_offsets, + const size_t *dst_dimensions, + const size_t *src_dimensions, + int dst_device_num, int src_device_num) +{ + struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; + + if (!dst && !src) + return INT_MAX; + + if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK) + { + if (dst_device_num < 0) + return EINVAL; + + dst_devicep = resolve_device (dst_device_num); + if (dst_devicep == NULL) + return EINVAL; + + if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + dst_devicep = NULL; + } + if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) + { + if (src_device_num < 0) + return EINVAL; + + src_devicep = resolve_device (src_device_num); + if (src_devicep == NULL) + return EINVAL; + + if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + src_devicep = NULL; + } + + if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep) + return EINVAL; + + if (src_devicep) + gomp_mutex_lock (&src_devicep->lock); + else if (dst_devicep) + gomp_mutex_lock (&dst_devicep->lock); + int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims, + volume, dst_offsets, src_offsets, + dst_dimensions, src_dimensions, + dst_devicep, src_devicep); + if (src_devicep) + gomp_mutex_unlock (&src_devicep->lock); + else if (dst_devicep) + gomp_mutex_unlock (&dst_devicep->lock); + return ret; +} + +int +omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size, + size_t device_offset, int device_num) +{ + if (device_num == GOMP_DEVICE_HOST_FALLBACK) + return EINVAL; + + if (device_num < 0) + return EINVAL; + + struct gomp_device_descr *devicep = resolve_device (device_num); + if (devicep == NULL) + return EINVAL; + + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return EINVAL; + + gomp_mutex_lock (&devicep->lock); + + struct splay_tree_s *mem_map = &devicep->mem_map; + struct splay_tree_key_s cur_node; + int ret = EINVAL; + + cur_node.host_start = (uintptr_t) host_ptr; + cur_node.host_end = cur_node.host_start + size; + splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); + if (n) + { + if (n->tgt->tgt_start + n->tgt_offset + == (uintptr_t) device_ptr + device_offset + && n->host_start <= cur_node.host_start + && n->host_end >= cur_node.host_end) + ret = 0; + } + else + { + struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); + tgt->array = gomp_malloc (sizeof (*tgt->array)); + tgt->refcount = 1; + tgt->tgt_start = 0; + tgt->tgt_end = 0; + tgt->to_free = NULL; + tgt->prev = NULL; + tgt->list_count = 0; + tgt->device_descr = devicep; + splay_tree_node array = tgt->array; + splay_tree_key k = &array->key; + k->host_start = cur_node.host_start; + k->host_end = cur_node.host_end; + k->tgt = tgt; + k->tgt_offset = (uintptr_t) device_ptr + device_offset; + k->refcount = REFCOUNT_INFINITY; + k->async_refcount = 0; + array->left = NULL; + array->right = NULL; + splay_tree_insert (&devicep->mem_map, array); + ret = 0; + } + gomp_mutex_unlock (&devicep->lock); + return ret; +} + +int +omp_target_disassociate_ptr (void *ptr, int device_num) +{ + if (device_num == GOMP_DEVICE_HOST_FALLBACK) + return EINVAL; + + if (device_num < 0) + return EINVAL; + + struct gomp_device_descr *devicep = resolve_device (device_num); + if (devicep == NULL) + return EINVAL; + + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return EINVAL; + + gomp_mutex_lock (&devicep->lock); + + struct splay_tree_s *mem_map = &devicep->mem_map; + struct splay_tree_key_s cur_node; + int ret = EINVAL; + + cur_node.host_start = (uintptr_t) ptr; + cur_node.host_end = cur_node.host_start; + splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); + if (n + && n->host_start == cur_node.host_start + && n->refcount == REFCOUNT_INFINITY + && n->tgt->tgt_start == 0 + && n->tgt->to_free == NULL + && n->tgt->refcount == 1 + && n->tgt->list_count == 0) + { + splay_tree_remove (&devicep->mem_map, n); + gomp_unmap_tgt (n->tgt); + ret = 0; + } + + gomp_mutex_unlock (&devicep->lock); + return ret; +} + #ifdef PLUGIN_SUPPORT /* This function tries to load a plugin for DEVICE. Name of plugin is passed @@ -1153,7 +2106,10 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device, DLSYM (host2dev); device->capabilities = device->get_caps_func (); if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) - DLSYM (run); + { + DLSYM (run); + DLSYM (dev2dev); + } if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) { if (!DLSYM_OPT (openacc.exec, openacc_parallel) diff --git a/libgomp/task.c b/libgomp/task.c index 74920d5ddb8..1246c6ae318 100644 --- a/libgomp/task.c +++ b/libgomp/task.c @@ -29,6 +29,7 @@ #include "libgomp.h" #include <stdlib.h> #include <string.h> +#include "gomp-constants.h" typedef struct gomp_task_depend_entry *hash_entry_type; @@ -91,6 +92,8 @@ gomp_end_task (void) thr->task = task->parent; } +/* Orphan the task in CHILDREN and all its siblings. */ + static inline void gomp_clear_parent (struct gomp_task *children) { @@ -105,16 +108,136 @@ gomp_clear_parent (struct gomp_task *children) while (task != children); } -static void gomp_task_maybe_wait_for_dependencies (void **depend); +/* Helper function for GOMP_task and gomp_create_target_task. Depend clause + handling for undeferred task creation. */ + +static void +gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent, + void **depend) +{ + size_t ndepend = (uintptr_t) depend[0]; + size_t nout = (uintptr_t) depend[1]; + size_t i; + hash_entry_type ent; + + task->depend_count = ndepend; + task->num_dependees = 0; + if (parent->depend_hash == NULL) + parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12); + for (i = 0; i < ndepend; i++) + { + task->depend[i].addr = depend[2 + i]; + task->depend[i].next = NULL; + task->depend[i].prev = NULL; + task->depend[i].task = task; + task->depend[i].is_in = i >= nout; + task->depend[i].redundant = false; + task->depend[i].redundant_out = false; + + hash_entry_type *slot = htab_find_slot (&parent->depend_hash, + &task->depend[i], INSERT); + hash_entry_type out = NULL, last = NULL; + if (*slot) + { + /* If multiple depends on the same task are the same, all but the + first one are redundant. As inout/out come first, if any of them + is inout/out, it will win, which is the right semantics. */ + if ((*slot)->task == task) + { + task->depend[i].redundant = true; + continue; + } + for (ent = *slot; ent; ent = ent->next) + { + if (ent->redundant_out) + break; + + last = ent; + + /* depend(in:...) doesn't depend on earlier depend(in:...). */ + if (i >= nout && ent->is_in) + continue; + + if (!ent->is_in) + out = ent; + + struct gomp_task *tsk = ent->task; + if (tsk->dependers == NULL) + { + tsk->dependers + = gomp_malloc (sizeof (struct gomp_dependers_vec) + + 6 * sizeof (struct gomp_task *)); + tsk->dependers->n_elem = 1; + tsk->dependers->allocated = 6; + tsk->dependers->elem[0] = task; + task->num_dependees++; + continue; + } + /* We already have some other dependency on tsk from earlier + depend clause. */ + else if (tsk->dependers->n_elem + && (tsk->dependers->elem[tsk->dependers->n_elem - 1] + == task)) + continue; + else if (tsk->dependers->n_elem == tsk->dependers->allocated) + { + tsk->dependers->allocated + = tsk->dependers->allocated * 2 + 2; + tsk->dependers + = gomp_realloc (tsk->dependers, + sizeof (struct gomp_dependers_vec) + + (tsk->dependers->allocated + * sizeof (struct gomp_task *))); + } + tsk->dependers->elem[tsk->dependers->n_elem++] = task; + task->num_dependees++; + } + task->depend[i].next = *slot; + (*slot)->prev = &task->depend[i]; + } + *slot = &task->depend[i]; + + /* There is no need to store more than one depend({,in}out:) task per + address in the hash table chain for the purpose of creation of + deferred tasks, because each out depends on all earlier outs, thus it + is enough to record just the last depend({,in}out:). For depend(in:), + we need to keep all of the previous ones not terminated yet, because + a later depend({,in}out:) might need to depend on all of them. So, if + the new task's clause is depend({,in}out:), we know there is at most + one other depend({,in}out:) clause in the list (out). For + non-deferred tasks we want to see all outs, so they are moved to the + end of the chain, after first redundant_out entry all following + entries should be redundant_out. */ + if (!task->depend[i].is_in && out) + { + if (out != last) + { + out->next->prev = out->prev; + out->prev->next = out->next; + out->next = last->next; + out->prev = last; + last->next = out; + if (out->next) + out->next->prev = out; + } + out->redundant_out = true; + } + } +} /* Called when encountering an explicit task directive. If IF_CLAUSE is false, then we must not delay in executing the task. If UNTIED is true, - then the task may be executed by any member of the team. */ + then the task may be executed by any member of the team. + + DEPEND is an array containing: + depend[0]: number of depend elements. + depend[1]: number of depend elements of type "out". + depend[2..N+1]: address of [1..N]th depend element. */ void GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), long arg_size, long arg_align, bool if_clause, unsigned flags, - void **depend) + void **depend, int priority) { struct gomp_thread *thr = gomp_thread (); struct gomp_team *team = thr->ts.team; @@ -126,8 +249,7 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), might be running on different thread than FN. */ if (cpyfn) if_clause = false; - if (flags & 1) - flags &= ~1; + flags &= ~GOMP_TASK_FLAG_UNTIED; #endif /* If parallel or taskgroup has been cancelled, don't start new tasks. */ @@ -136,6 +258,11 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) return; + if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0) + priority = 0; + /* FIXME, use priority. */ + (void) priority; + if (!if_clause || team == NULL || (thr->task && thr->task->final_task) || team->task_count > 64 * team->nthreads) @@ -148,12 +275,14 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), depend clauses for non-deferred tasks other than this, because the parent task is suspended until the child task finishes and thus it can't start further child tasks. */ - if ((flags & 8) && thr->task && thr->task->depend_hash) + if ((flags & GOMP_TASK_FLAG_DEPEND) + && thr->task && thr->task->depend_hash) gomp_task_maybe_wait_for_dependencies (depend); gomp_init_task (&task, thr->task, gomp_icv (false)); - task.kind = GOMP_TASK_IFFALSE; - task.final_task = (thr->task && thr->task->final_task) || (flags & 2); + task.kind = GOMP_TASK_UNDEFERRED; + task.final_task = (thr->task && thr->task->final_task) + || (flags & GOMP_TASK_FLAG_FINAL); if (thr->task) { task.in_tied_task = thr->task->in_tied_task; @@ -196,7 +325,7 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), bool do_wake; size_t depend_size = 0; - if (flags & 8) + if (flags & GOMP_TASK_FLAG_DEPEND) depend_size = ((uintptr_t) depend[0] * sizeof (struct gomp_task_depend_entry)); task = gomp_malloc (sizeof (*task) + depend_size @@ -204,7 +333,7 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1) & ~(uintptr_t) (arg_align - 1)); gomp_init_task (task, parent, gomp_icv (false)); - task->kind = GOMP_TASK_IFFALSE; + task->kind = GOMP_TASK_UNDEFERRED; task->in_tied_task = parent->in_tied_task; task->taskgroup = taskgroup; thr->task = task; @@ -219,7 +348,7 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), task->kind = GOMP_TASK_WAITING; task->fn = fn; task->fn_data = arg; - task->final_task = (flags & 2) >> 1; + task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1; gomp_mutex_lock (&team->task_lock); /* If parallel or taskgroup has been cancelled, don't start new tasks. */ @@ -236,123 +365,7 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), taskgroup->num_children++; if (depend_size) { - size_t ndepend = (uintptr_t) depend[0]; - size_t nout = (uintptr_t) depend[1]; - size_t i; - hash_entry_type ent; - - task->depend_count = ndepend; - task->num_dependees = 0; - if (parent->depend_hash == NULL) - parent->depend_hash - = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12); - for (i = 0; i < ndepend; i++) - { - task->depend[i].addr = depend[2 + i]; - task->depend[i].next = NULL; - task->depend[i].prev = NULL; - task->depend[i].task = task; - task->depend[i].is_in = i >= nout; - task->depend[i].redundant = false; - task->depend[i].redundant_out = false; - - hash_entry_type *slot - = htab_find_slot (&parent->depend_hash, &task->depend[i], - INSERT); - hash_entry_type out = NULL, last = NULL; - if (*slot) - { - /* If multiple depends on the same task are the - same, all but the first one are redundant. - As inout/out come first, if any of them is - inout/out, it will win, which is the right - semantics. */ - if ((*slot)->task == task) - { - task->depend[i].redundant = true; - continue; - } - for (ent = *slot; ent; ent = ent->next) - { - if (ent->redundant_out) - break; - - last = ent; - - /* depend(in:...) doesn't depend on earlier - depend(in:...). */ - if (i >= nout && ent->is_in) - continue; - - if (!ent->is_in) - out = ent; - - struct gomp_task *tsk = ent->task; - if (tsk->dependers == NULL) - { - tsk->dependers - = gomp_malloc (sizeof (struct gomp_dependers_vec) - + 6 * sizeof (struct gomp_task *)); - tsk->dependers->n_elem = 1; - tsk->dependers->allocated = 6; - tsk->dependers->elem[0] = task; - task->num_dependees++; - continue; - } - /* We already have some other dependency on tsk - from earlier depend clause. */ - else if (tsk->dependers->n_elem - && (tsk->dependers->elem[tsk->dependers->n_elem - - 1] - == task)) - continue; - else if (tsk->dependers->n_elem - == tsk->dependers->allocated) - { - tsk->dependers->allocated - = tsk->dependers->allocated * 2 + 2; - tsk->dependers - = gomp_realloc (tsk->dependers, - sizeof (struct gomp_dependers_vec) - + (tsk->dependers->allocated - * sizeof (struct gomp_task *))); - } - tsk->dependers->elem[tsk->dependers->n_elem++] = task; - task->num_dependees++; - } - task->depend[i].next = *slot; - (*slot)->prev = &task->depend[i]; - } - *slot = &task->depend[i]; - - /* There is no need to store more than one depend({,in}out:) - task per address in the hash table chain for the purpose - of creation of deferred tasks, because each out - depends on all earlier outs, thus it is enough to record - just the last depend({,in}out:). For depend(in:), we need - to keep all of the previous ones not terminated yet, because - a later depend({,in}out:) might need to depend on all of - them. So, if the new task's clause is depend({,in}out:), - we know there is at most one other depend({,in}out:) clause - in the list (out). For non-deferred tasks we want to see - all outs, so they are moved to the end of the chain, - after first redundant_out entry all following entries - should be redundant_out. */ - if (!task->depend[i].is_in && out) - { - if (out != last) - { - out->next->prev = out->prev; - out->prev->next = out->next; - out->next = last->next; - out->prev = last; - last->next = out; - if (out->next) - out->next->prev = out; - } - out->redundant_out = true; - } - } + gomp_task_handle_depend (task, parent, depend); if (task->num_dependees) { gomp_mutex_unlock (&team->task_lock); @@ -374,6 +387,7 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), parent->children = task; if (taskgroup) { + /* If applicable, place task into its taskgroup. */ if (taskgroup->children) { task->next_taskgroup = taskgroup->children; @@ -412,26 +426,340 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), } } +ialias (GOMP_taskgroup_start) +ialias (GOMP_taskgroup_end) + +#define TYPE long +#define UTYPE unsigned long +#define TYPE_is_long 1 +#include "taskloop.c" +#undef TYPE +#undef UTYPE +#undef TYPE_is_long + +#define TYPE unsigned long long +#define UTYPE TYPE +#define GOMP_taskloop GOMP_taskloop_ull +#include "taskloop.c" +#undef TYPE +#undef UTYPE +#undef GOMP_taskloop + +/* Called for nowait target tasks. */ + +void +gomp_create_target_task (struct gomp_device_descr *devicep, + void (*fn) (void *), size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds, + unsigned int flags, void **depend) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + + /* If parallel or taskgroup has been cancelled, don't start new tasks. */ + if (team + && (gomp_team_barrier_cancelled (&team->barrier) + || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) + return; + + struct gomp_target_task *ttask; + struct gomp_task *task; + struct gomp_task *parent = thr->task; + struct gomp_taskgroup *taskgroup = parent->taskgroup; + bool do_wake; + size_t depend_size = 0; + + if (depend != NULL) + depend_size = ((uintptr_t) depend[0] + * sizeof (struct gomp_task_depend_entry)); + task = gomp_malloc (sizeof (*task) + depend_size + + sizeof (*ttask) + + mapnum * (sizeof (void *) + sizeof (size_t) + + sizeof (unsigned short))); + gomp_init_task (task, parent, gomp_icv (false)); + task->kind = GOMP_TASK_WAITING; + task->in_tied_task = parent->in_tied_task; + task->taskgroup = taskgroup; + ttask = (struct gomp_target_task *) &task->depend[(uintptr_t) depend[0]]; + ttask->devicep = devicep; + ttask->fn = fn; + ttask->mapnum = mapnum; + memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *)); + ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum]; + memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t)); + ttask->kinds = (unsigned short *) &ttask->sizes[mapnum]; + memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short)); + ttask->flags = flags; + task->fn = gomp_target_task_fn; + task->fn_data = ttask; + task->final_task = 0; + gomp_mutex_lock (&team->task_lock); + /* If parallel or taskgroup has been cancelled, don't start new tasks. */ + if (__builtin_expect (gomp_team_barrier_cancelled (&team->barrier) + || (taskgroup && taskgroup->cancelled), 0)) + { + gomp_mutex_unlock (&team->task_lock); + gomp_finish_task (task); + free (task); + return; + } + if (taskgroup) + taskgroup->num_children++; + if (depend_size) + { + gomp_task_handle_depend (task, parent, depend); + if (task->num_dependees) + { + gomp_mutex_unlock (&team->task_lock); + return; + } + } + if (parent->children) + { + task->next_child = parent->children; + task->prev_child = parent->children->prev_child; + task->next_child->prev_child = task; + task->prev_child->next_child = task; + } + else + { + task->next_child = task; + task->prev_child = task; + } + parent->children = task; + if (taskgroup) + { + /* If applicable, place task into its taskgroup. */ + if (taskgroup->children) + { + task->next_taskgroup = taskgroup->children; + task->prev_taskgroup = taskgroup->children->prev_taskgroup; + task->next_taskgroup->prev_taskgroup = task; + task->prev_taskgroup->next_taskgroup = task; + } + else + { + task->next_taskgroup = task; + task->prev_taskgroup = task; + } + taskgroup->children = task; + } + if (team->task_queue) + { + task->next_queue = team->task_queue; + task->prev_queue = team->task_queue->prev_queue; + task->next_queue->prev_queue = task; + task->prev_queue->next_queue = task; + } + else + { + task->next_queue = task; + task->prev_queue = task; + team->task_queue = task; + } + ++team->task_count; + ++team->task_queued_count; + gomp_team_barrier_set_task_pending (&team->barrier); + do_wake = team->task_running_count + !parent->in_tied_task + < team->nthreads; + gomp_mutex_unlock (&team->task_lock); + if (do_wake) + gomp_team_barrier_wake (&team->barrier, 1); +} + +#if _LIBGOMP_CHECKING +/* Sanity check TASK to make sure it is in its parent's children + queue, and that the tasks therein are in the right order. + + The expected order is: + parent_depends_on WAITING tasks + !parent_depends_on WAITING tasks + TIED tasks + + PARENT is the alleged parent of TASK. */ + +static void +verify_children_queue (struct gomp_task *task, struct gomp_task *parent) +{ + if (task->parent != parent) + gomp_fatal ("verify_children_queue: incompatible parents"); + /* It's OK, Annie was an orphan and she turned out all right. */ + if (!parent) + return; + + bool seen_tied = false; + bool seen_plain_waiting = false; + bool found = false; + struct gomp_task *t = parent->children; + while (1) + { + if (t == task) + found = true; + if (seen_tied && t->kind == GOMP_TASK_WAITING) + gomp_fatal ("verify_children_queue: WAITING task after TIED"); + if (t->kind == GOMP_TASK_TIED) + seen_tied = true; + else if (t->kind == GOMP_TASK_WAITING) + { + if (t->parent_depends_on) + { + if (seen_plain_waiting) + gomp_fatal ("verify_children_queue: parent_depends_on after " + "!parent_depends_on"); + } + else + seen_plain_waiting = true; + } + t = t->next_child; + if (t == parent->children) + break; + } + if (!found) + gomp_fatal ("verify_children_queue: child not found in parent queue"); +} + +/* Sanity check TASK to make sure it is in its taskgroup queue (if + applicable), and that the tasks therein are in the right order. + + The expected order is that GOMP_TASK_WAITING tasks must come before + GOMP_TASK_TIED tasks. + + TASK is the task. */ + +static void +verify_taskgroup_queue (struct gomp_task *task) +{ + struct gomp_taskgroup *taskgroup = task->taskgroup; + if (!taskgroup) + return; + + bool seen_tied = false; + bool found = false; + struct gomp_task *t = taskgroup->children; + while (1) + { + if (t == task) + found = true; + if (t->kind == GOMP_TASK_WAITING && seen_tied) + gomp_fatal ("verify_taskgroup_queue: WAITING task after TIED"); + if (t->kind == GOMP_TASK_TIED) + seen_tied = true; + t = t->next_taskgroup; + if (t == taskgroup->children) + break; + } + if (!found) + gomp_fatal ("verify_taskgroup_queue: child not found in parent queue"); +} + +/* Verify that TASK is in the team's task queue. */ + +static void +verify_task_queue (struct gomp_task *task, struct gomp_team *team) +{ + struct gomp_task *t = team->task_queue; + if (team) + while (1) + { + if (t == task) + return; + t = t->next_queue; + if (t == team->task_queue) + break; + } + gomp_fatal ("verify_team_queue: child not in team"); +} +#endif + static inline bool gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent, - struct gomp_taskgroup *taskgroup, struct gomp_team *team) + struct gomp_team *team) { +#if _LIBGOMP_CHECKING + verify_children_queue (child_task, parent); + verify_taskgroup_queue (child_task); + verify_task_queue (child_task, team); +#endif + if (parent) { + /* Adjust children such that it will point to a next child, + while the current one is scheduled to be executed. This way, + GOMP_taskwait (and others) can schedule a next task while + waiting. + + Do not remove it entirely from the circular list, as it is + still a child, though not one we should consider first (say + by GOMP_taskwait). */ if (parent->children == child_task) parent->children = child_task->next_child; + /* TIED tasks cannot come before WAITING tasks. If we're about + to make this task TIED, rewire things appropriately. + However, a TIED task at the end is perfectly fine. */ + else if (child_task->next_child->kind == GOMP_TASK_WAITING + && child_task->next_child != parent->children) + { + /* Remove from the list. */ + child_task->prev_child->next_child = child_task->next_child; + child_task->next_child->prev_child = child_task->prev_child; + /* Rewire at the end of its siblings. */ + child_task->next_child = parent->children; + child_task->prev_child = parent->children->prev_child; + parent->children->prev_child->next_child = child_task; + parent->children->prev_child = child_task; + } + + /* If the current task (child_task) is at the top of the + parent's last_parent_depends_on, it's about to be removed + from it. Adjust last_parent_depends_on appropriately. */ if (__builtin_expect (child_task->parent_depends_on, 0) && parent->taskwait->last_parent_depends_on == child_task) { + /* The last_parent_depends_on list was built with all + parent_depends_on entries linked to the prev_child. Grab + the next last_parent_depends_on head from this prev_child if + available... */ if (child_task->prev_child->kind == GOMP_TASK_WAITING && child_task->prev_child->parent_depends_on) parent->taskwait->last_parent_depends_on = child_task->prev_child; else - parent->taskwait->last_parent_depends_on = NULL; + { + /* ...otherwise, there are no more parent_depends_on + entries waiting to run. In which case, clear the + list. */ + parent->taskwait->last_parent_depends_on = NULL; + } } } - if (taskgroup && taskgroup->children == child_task) - taskgroup->children = child_task->next_taskgroup; + + /* Adjust taskgroup to point to the next taskgroup. See note above + regarding adjustment of children as to why the child_task is not + removed entirely from the circular list. */ + struct gomp_taskgroup *taskgroup = child_task->taskgroup; + if (taskgroup) + { + if (taskgroup->children == child_task) + taskgroup->children = child_task->next_taskgroup; + /* TIED tasks cannot come before WAITING tasks. If we're about + to make this task TIED, rewire things appropriately. + However, a TIED task at the end is perfectly fine. */ + else if (child_task->next_taskgroup->kind == GOMP_TASK_WAITING + && child_task->next_taskgroup != taskgroup->children) + { + /* Remove from the list. */ + child_task->prev_taskgroup->next_taskgroup + = child_task->next_taskgroup; + child_task->next_taskgroup->prev_taskgroup + = child_task->prev_taskgroup; + /* Rewire at the end of its taskgroup. */ + child_task->next_taskgroup = taskgroup->children; + child_task->prev_taskgroup = taskgroup->children->prev_taskgroup; + taskgroup->children->prev_taskgroup->next_taskgroup = child_task; + taskgroup->children->prev_taskgroup = child_task; + } + } + + /* Remove child_task from the task_queue. */ child_task->prev_queue->next_queue = child_task->next_queue; child_task->next_queue->prev_queue = child_task->prev_queue; if (team->task_queue == child_task) @@ -442,6 +770,7 @@ gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent, team->task_queue = NULL; } child_task->kind = GOMP_TASK_TIED; + if (--team->task_queued_count == 0) gomp_team_barrier_clear_task_pending (&team->barrier); if ((gomp_team_barrier_cancelled (&team->barrier) @@ -479,6 +808,11 @@ gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task) } } +/* After CHILD_TASK has been run, adjust the various task queues to + give higher priority to the tasks that depend on CHILD_TASK. + + TEAM is the team to which CHILD_TASK belongs to. */ + static size_t gomp_task_run_post_handle_dependers (struct gomp_task *child_task, struct gomp_team *team) @@ -502,6 +836,7 @@ gomp_task_run_post_handle_dependers (struct gomp_task *child_task, if (parent->taskwait && parent->taskwait->last_parent_depends_on && !task->parent_depends_on) { + /* Put depender in last_parent_depends_on. */ struct gomp_task *last_parent_depends_on = parent->taskwait->last_parent_depends_on; task->next_child = last_parent_depends_on->next_child; @@ -509,6 +844,8 @@ gomp_task_run_post_handle_dependers (struct gomp_task *child_task, } else { + /* Make depender a sibling of child_task, and place + it at the top of said sibling list. */ task->next_child = parent->children; task->prev_child = parent->children->prev_child; parent->children = task; @@ -518,6 +855,7 @@ gomp_task_run_post_handle_dependers (struct gomp_task *child_task, } else { + /* Make depender a sibling of child_task. */ task->next_child = task; task->prev_child = task; parent->children = task; @@ -539,6 +877,8 @@ gomp_task_run_post_handle_dependers (struct gomp_task *child_task, parent->taskwait->last_parent_depends_on = task; } } + /* If depender is in a taskgroup, put it at the TOP of its + taskgroup. */ if (taskgroup) { if (taskgroup->children) @@ -560,6 +900,8 @@ gomp_task_run_post_handle_dependers (struct gomp_task *child_task, gomp_sem_post (&taskgroup->taskgroup_sem); } } + /* Put depender of child_task at the END of the team's + task_queue. */ if (team->task_queue) { task->next_queue = team->task_queue; @@ -602,12 +944,18 @@ gomp_task_run_post_handle_depend (struct gomp_task *child_task, return gomp_task_run_post_handle_dependers (child_task, team); } +/* Remove CHILD_TASK from its parent. */ + static inline void gomp_task_run_post_remove_parent (struct gomp_task *child_task) { struct gomp_task *parent = child_task->parent; if (parent == NULL) return; + + /* If this was the last task the parent was depending on, + synchronize with gomp_task_maybe_wait_for_dependencies so it can + clean up and return. */ if (__builtin_expect (child_task->parent_depends_on, 0) && --parent->taskwait->n_depend == 0 && parent->taskwait->in_depend_wait) @@ -615,6 +963,8 @@ gomp_task_run_post_remove_parent (struct gomp_task *child_task) parent->taskwait->in_depend_wait = false; gomp_sem_post (&parent->taskwait->taskwait_sem); } + + /* Remove CHILD_TASK from its sibling list. */ child_task->prev_child->next_child = child_task->next_child; child_task->next_child->prev_child = child_task->prev_child; if (parent->children != child_task) @@ -637,6 +987,8 @@ gomp_task_run_post_remove_parent (struct gomp_task *child_task) } } +/* Remove CHILD_TASK from its taskgroup. */ + static inline void gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task) { @@ -701,7 +1053,7 @@ gomp_barrier_handle_tasks (gomp_barrier_state_t state) { child_task = team->task_queue; cancelled = gomp_task_run_pre (child_task, child_task->parent, - child_task->taskgroup, team); + team); if (__builtin_expect (cancelled, 0)) { if (to_free) @@ -766,7 +1118,9 @@ gomp_barrier_handle_tasks (gomp_barrier_state_t state) } } -/* Called when encountering a taskwait directive. */ +/* Called when encountering a taskwait directive. + + Wait for all children of the current task. */ void GOMP_taskwait (void) @@ -812,8 +1166,7 @@ GOMP_taskwait (void) { child_task = task->children; cancelled - = gomp_task_run_pre (child_task, task, child_task->taskgroup, - team); + = gomp_task_run_pre (child_task, task, team); if (__builtin_expect (cancelled, 0)) { if (to_free) @@ -863,6 +1216,9 @@ GOMP_taskwait (void) finish_cancelled:; size_t new_tasks = gomp_task_run_post_handle_depend (child_task, team); + + /* Remove child_task from children list, and set up the next + sibling to be run. */ child_task->prev_child->next_child = child_task->next_child; child_task->next_child->prev_child = child_task->prev_child; if (task->children == child_task) @@ -872,8 +1228,12 @@ GOMP_taskwait (void) else task->children = NULL; } + /* Orphan all the children of CHILD_TASK. */ gomp_clear_parent (child_task->children); + + /* Remove CHILD_TASK from its taskgroup. */ gomp_task_run_post_remove_taskgroup (child_task); + to_free = child_task; child_task = NULL; team->task_count--; @@ -889,9 +1249,11 @@ GOMP_taskwait (void) } /* This is like GOMP_taskwait, but we only wait for tasks that the - upcoming task depends on. */ + upcoming task depends on. -static void + DEPEND is as in GOMP_task. */ + +void gomp_task_maybe_wait_for_dependencies (void **depend) { struct gomp_thread *thr = gomp_thread (); @@ -923,11 +1285,33 @@ gomp_task_maybe_wait_for_dependencies (void **depend) { tsk->parent_depends_on = true; ++num_awaited; + /* If a task we need to wait for is not already + running and is ready to be scheduled, move it to + front, so that we run it as soon as possible. + + We rearrange the children queue such that all + parent_depends_on tasks are first, and + last_parent_depends_on points to the last such task + we rearranged. For example, given the following + children where PD[123] are the parent_depends_on + tasks: + + task->children + | + V + C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4 + + We rearrange such that: + + task->children + | +--- last_parent_depends_on + | | + V V + PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4 + */ + if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING) { - /* If a task we need to wait for is not already - running and is ready to be scheduled, move it - to front, so that we run it as soon as possible. */ if (last_parent_depends_on) { tsk->prev_child->next_child = tsk->next_child; @@ -941,8 +1325,8 @@ gomp_task_maybe_wait_for_dependencies (void **depend) { tsk->prev_child->next_child = tsk->next_child; tsk->next_child->prev_child = tsk->prev_child; - tsk->prev_child = task->children; - tsk->next_child = task->children->next_child; + tsk->prev_child = task->children->prev_child; + tsk->next_child = task->children; task->children = tsk; tsk->prev_child->next_child = tsk; tsk->next_child->prev_child = tsk; @@ -983,8 +1367,7 @@ gomp_task_maybe_wait_for_dependencies (void **depend) { child_task = task->children; cancelled - = gomp_task_run_pre (child_task, task, child_task->taskgroup, - team); + = gomp_task_run_pre (child_task, task, team); if (__builtin_expect (cancelled, 0)) { if (to_free) @@ -1028,6 +1411,8 @@ gomp_task_maybe_wait_for_dependencies (void **depend) = gomp_task_run_post_handle_depend (child_task, team); if (child_task->parent_depends_on) --taskwait.n_depend; + + /* Remove child_task from sibling list. */ child_task->prev_child->next_child = child_task->next_child; child_task->next_child->prev_child = child_task->prev_child; if (task->children == child_task) @@ -1037,6 +1422,7 @@ gomp_task_maybe_wait_for_dependencies (void **depend) else task->children = NULL; } + gomp_clear_parent (child_task->children); gomp_task_run_post_remove_taskgroup (child_task); to_free = child_task; @@ -1070,7 +1456,7 @@ GOMP_taskgroup_start (void) struct gomp_taskgroup *taskgroup; /* If team is NULL, all tasks are executed as - GOMP_TASK_IFFALSE tasks and thus all children tasks of + GOMP_TASK_UNDEFERRED tasks and thus all children tasks of taskgroup and their descendant tasks will be finished by the time GOMP_taskgroup_end is called. */ if (team == NULL) @@ -1137,8 +1523,7 @@ GOMP_taskgroup_end (void) if (child_task->kind == GOMP_TASK_WAITING) { cancelled - = gomp_task_run_pre (child_task, child_task->parent, taskgroup, - team); + = gomp_task_run_pre (child_task, child_task->parent, team); if (__builtin_expect (cancelled, 0)) { if (to_free) diff --git a/libgomp/taskloop.c b/libgomp/taskloop.c new file mode 100644 index 00000000000..f57a5a16ef2 --- /dev/null +++ b/libgomp/taskloop.c @@ -0,0 +1,363 @@ +/* Copyright (C) 2015 Free Software Foundation, Inc. + Contributed by Jakub Jelinek <jakub@redhat.com>. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This file handles the taskloop construct. It is included twice, once + for the long and once for unsigned long long variant. */ + +/* Called when encountering an explicit task directive. If IF_CLAUSE is + false, then we must not delay in executing the task. If UNTIED is true, + then the task may be executed by any member of the team. */ + +void +GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), + long arg_size, long arg_align, unsigned flags, + unsigned long num_tasks, int priority, + TYPE start, TYPE end, TYPE step) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + +#ifdef HAVE_BROKEN_POSIX_SEMAPHORES + /* If pthread_mutex_* is used for omp_*lock*, then each task must be + tied to one thread all the time. This means UNTIED tasks must be + tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN + might be running on different thread than FN. */ + if (cpyfn) + flags &= ~GOMP_TASK_FLAG_IF; + flags &= ~GOMP_TASK_FLAG_UNTIED; +#endif + + /* If parallel or taskgroup has been cancelled, don't start new tasks. */ + if (team && gomp_team_barrier_cancelled (&team->barrier)) + return; + +#ifdef TYPE_is_long + TYPE s = step; + if (step > 0) + { + if (start >= end) + return; + s--; + } + else + { + if (start <= end) + return; + s++; + } + UTYPE n = (end - start + s) / step; +#else + UTYPE n; + if (flags & GOMP_TASK_FLAG_UP) + { + if (start >= end) + return; + n = (end - start + step - 1) / step; + } + else + { + if (start <= end) + return; + n = (start - end - step - 1) / -step; + } +#endif + + TYPE task_step = step; + unsigned long nfirst = n; + if (flags & GOMP_TASK_FLAG_GRAINSIZE) + { + unsigned long grainsize = num_tasks; +#ifdef TYPE_is_long + num_tasks = n / grainsize; +#else + UTYPE ndiv = n / grainsize; + num_tasks = ndiv; + if (num_tasks != ndiv) + num_tasks = ~0UL; +#endif + if (num_tasks <= 1) + { + num_tasks = 1; + task_step = end - start; + } + else if (num_tasks >= grainsize +#ifndef TYPE_is_long + && num_tasks != ~0UL +#endif + ) + { + UTYPE mul = num_tasks * grainsize; + task_step = (TYPE) grainsize * step; + if (mul != n) + { + task_step += step; + nfirst = n - mul - 1; + } + } + else + { + UTYPE div = n / num_tasks; + UTYPE mod = n % num_tasks; + task_step = (TYPE) div * step; + if (mod) + { + task_step += step; + nfirst = mod - 1; + } + } + } + else + { + if (num_tasks == 0) + num_tasks = team ? team->nthreads : 1; + if (num_tasks >= n) + num_tasks = n; + else + { + UTYPE div = n / num_tasks; + UTYPE mod = n % num_tasks; + task_step = (TYPE) div * step; + if (mod) + { + task_step += step; + nfirst = mod - 1; + } + } + } + + if (flags & GOMP_TASK_FLAG_NOGROUP) + { + if (thr->task && thr->task->taskgroup && thr->task->taskgroup->cancelled) + return; + } + else + ialias_call (GOMP_taskgroup_start) (); + + /* FIXME, use priority. */ + (void) priority; + + if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL + || (thr->task && thr->task->final_task) + || team->task_count + num_tasks > 64 * team->nthreads) + { + unsigned long i; + if (__builtin_expect (cpyfn != NULL, 0)) + { + struct gomp_task task[num_tasks]; + struct gomp_task *parent = thr->task; + arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1); + char buf[num_tasks * arg_size + arg_align - 1]; + char *arg = (char *) (((uintptr_t) buf + arg_align - 1) + & ~(uintptr_t) (arg_align - 1)); + char *orig_arg = arg; + for (i = 0; i < num_tasks; i++) + { + gomp_init_task (&task[i], parent, gomp_icv (false)); + task[i].kind = GOMP_TASK_UNDEFERRED; + task[i].final_task = (thr->task && thr->task->final_task) + || (flags & GOMP_TASK_FLAG_FINAL); + if (thr->task) + { + task[i].in_tied_task = thr->task->in_tied_task; + task[i].taskgroup = thr->task->taskgroup; + } + thr->task = &task[i]; + cpyfn (arg, data); + arg += arg_size; + } + arg = orig_arg; + for (i = 0; i < num_tasks; i++) + { + thr->task = &task[i]; + ((TYPE *)arg)[0] = start; + start += task_step; + ((TYPE *)arg)[1] = start; + if (i == nfirst) + task_step -= step; + fn (arg); + arg += arg_size; + if (task[i].children != NULL) + { + gomp_mutex_lock (&team->task_lock); + gomp_clear_parent (task[i].children); + gomp_mutex_unlock (&team->task_lock); + } + gomp_end_task (); + } + } + else + for (i = 0; i < num_tasks; i++) + { + struct gomp_task task; + + gomp_init_task (&task, thr->task, gomp_icv (false)); + task.kind = GOMP_TASK_UNDEFERRED; + task.final_task = (thr->task && thr->task->final_task) + || (flags & GOMP_TASK_FLAG_FINAL); + if (thr->task) + { + task.in_tied_task = thr->task->in_tied_task; + task.taskgroup = thr->task->taskgroup; + } + thr->task = &task; + ((TYPE *)data)[0] = start; + start += task_step; + ((TYPE *)data)[1] = start; + if (i == nfirst) + task_step -= step; + fn (data); + if (task.children != NULL) + { + gomp_mutex_lock (&team->task_lock); + gomp_clear_parent (task.children); + gomp_mutex_unlock (&team->task_lock); + } + gomp_end_task (); + } + } + else + { + struct gomp_task *tasks[num_tasks]; + struct gomp_task *parent = thr->task; + struct gomp_taskgroup *taskgroup = parent->taskgroup; + char *arg; + int do_wake; + unsigned long i; + + for (i = 0; i < num_tasks; i++) + { + struct gomp_task *task + = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1); + tasks[i] = task; + arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1) + & ~(uintptr_t) (arg_align - 1)); + gomp_init_task (task, parent, gomp_icv (false)); + task->kind = GOMP_TASK_UNDEFERRED; + task->in_tied_task = parent->in_tied_task; + task->taskgroup = taskgroup; + thr->task = task; + if (cpyfn) + { + cpyfn (arg, data); + task->copy_ctors_done = true; + } + else + memcpy (arg, data, arg_size); + ((TYPE *)arg)[0] = start; + start += task_step; + ((TYPE *)arg)[1] = start; + if (i == nfirst) + task_step -= step; + thr->task = parent; + task->kind = GOMP_TASK_WAITING; + task->fn = fn; + task->fn_data = arg; + task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1; + } + gomp_mutex_lock (&team->task_lock); + /* If parallel or taskgroup has been cancelled, don't start new + tasks. */ + if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier) + || (taskgroup && taskgroup->cancelled)) + && cpyfn == NULL, 0)) + { + gomp_mutex_unlock (&team->task_lock); + for (i = 0; i < num_tasks; i++) + { + gomp_finish_task (tasks[i]); + free (tasks[i]); + } + if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0) + ialias_call (GOMP_taskgroup_end) (); + return; + } + if (taskgroup) + taskgroup->num_children += num_tasks; + for (i = 0; i < num_tasks; i++) + { + struct gomp_task *task = tasks[i]; + if (parent->children) + { + task->next_child = parent->children; + task->prev_child = parent->children->prev_child; + task->next_child->prev_child = task; + task->prev_child->next_child = task; + } + else + { + task->next_child = task; + task->prev_child = task; + } + parent->children = task; + if (taskgroup) + { + if (taskgroup->children) + { + task->next_taskgroup = taskgroup->children; + task->prev_taskgroup = taskgroup->children->prev_taskgroup; + task->next_taskgroup->prev_taskgroup = task; + task->prev_taskgroup->next_taskgroup = task; + } + else + { + task->next_taskgroup = task; + task->prev_taskgroup = task; + } + taskgroup->children = task; + } + if (team->task_queue) + { + task->next_queue = team->task_queue; + task->prev_queue = team->task_queue->prev_queue; + task->next_queue->prev_queue = task; + task->prev_queue->next_queue = task; + } + else + { + task->next_queue = task; + task->prev_queue = task; + team->task_queue = task; + } + ++team->task_count; + ++team->task_queued_count; + } + gomp_team_barrier_set_task_pending (&team->barrier); + if (team->task_running_count + !parent->in_tied_task + < team->nthreads) + { + do_wake = team->nthreads - team->task_running_count + - !parent->in_tied_task; + if ((unsigned long) do_wake > num_tasks) + do_wake = num_tasks; + } + else + do_wake = 0; + gomp_mutex_unlock (&team->task_lock); + if (do_wake) + gomp_team_barrier_wake (&team->barrier, do_wake); + } + if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0) + ialias_call (GOMP_taskgroup_end) (); +} diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp index 1040c29e0eb..6dc1e8ef3ca 100644 --- a/libgomp/testsuite/lib/libgomp.exp +++ b/libgomp/testsuite/lib/libgomp.exp @@ -321,6 +321,19 @@ proc check_effective_target_offload_device { } { } ] } +# Return 1 if offload device is available and it has non-shared address space. +proc check_effective_target_offload_device_nonshared_as { } { + return [check_runtime_nocache offload_device_nonshared_as { + int main () + { + int a = 8; + #pragma omp target map(to: a) + a++; + return a != 8; + } + } ] +} + # Return 1 if at least one nvidia board is present. proc check_effective_target_openacc_nvidia_accel_present { } { diff --git a/libgomp/testsuite/libgomp.c++/ctor-13.C b/libgomp/testsuite/libgomp.c++/ctor-13.C new file mode 100644 index 00000000000..8c7a09f315d --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/ctor-13.C @@ -0,0 +1,242 @@ +// { dg-do run } + +#include <omp.h> +#include <assert.h> + +struct B +{ + static int ic, dc, xc, ac, cc; + + B(); + B(const B &); + ~B(); + B& operator=(const B &); + void doit(); + static void clear(); +}; + +int B::ic; +int B::dc; +int B::xc; +int B::cc; +int B::ac; + +B::B() +{ + #pragma omp atomic + ic++; +} + +B::~B() +{ + #pragma omp atomic + dc++; +} + +B::B(const B &) +{ + #pragma omp atomic + cc++; +} + +B& B::operator=(const B &) +{ + #pragma omp atomic + ac++; + return *this; +} + +void B::doit() +{ + #pragma omp atomic + xc++; +} + +void B::clear() +{ + ic = 0; + dc = 0; + cc = 0; + ac = 0; + xc = 0; +} + +static int n; + +void f1(B &a) +{ + B b; + B &c = b; + #pragma omp parallel default(none) private(a, c) shared (n) + { + #pragma omp master + n = omp_get_num_threads (); + a.doit(); + c.doit(); + } +} + +void f2(B &a) +{ + B b; + B &c = b; + #pragma omp parallel default(none) firstprivate(a, c) shared(n) + { + #pragma omp master + n = omp_get_num_threads (); + a.doit(); + c.doit(); + } +} + +void f3(B &a) +{ + B b; + B &c = b; + #pragma omp parallel default(none) shared(n, a, c) + { + #pragma omp master + n = omp_get_num_threads (); + #pragma omp for lastprivate (a, c) + for (int i = 0; i < omp_get_num_threads (); i++) + { + a.doit(); + c.doit(); + } + } +} + +void f4() +{ + B b; + B &c = b; + #pragma omp parallel default(none) private (c) shared (n) + { + B d; + B &e = d; + #pragma omp single copyprivate (c, e) + { + c.doit(); + e.doit(); + } + c.doit(); + e.doit(); + } +} + +void f5(B (&a)[2]) +{ + B b[2]; + B (&c)[2] = b; + #pragma omp parallel default(none) private(a, c) shared (n) + { + #pragma omp master + n = omp_get_num_threads (); + a[0].doit(); + a[1].doit(); + c[0].doit(); + c[1].doit(); + } +} + +void f6(B (&a)[2]) +{ + B b[2]; + B (&c)[2] = b; + #pragma omp parallel default(none) firstprivate(a, c) shared (n) + { + #pragma omp master + n = omp_get_num_threads (); + a[0].doit(); + a[1].doit(); + c[0].doit(); + c[1].doit(); + } +} + +void f7(B (&a)[2]) +{ + B b[2]; + B (&c)[2] = b; + #pragma omp parallel default(none) shared(n, a, c) + { + #pragma omp master + n = omp_get_num_threads (); + #pragma omp for lastprivate (a, c) + for (int i = 0; i < omp_get_num_threads (); i++) + { + a[0].doit(); + a[1].doit(); + c[0].doit(); + c[1].doit(); + } + } +} + +void f8() +{ + B b[2]; + B (&c)[2] = b; + #pragma omp parallel default(none) private (c) shared (n) + { + B d[2]; + B (&e)[2] = d; + #pragma omp single copyprivate (c, e) + { + c[0].doit(); + c[1].doit(); + e[0].doit(); + e[1].doit(); + } + c[0].doit(); + c[1].doit(); + e[0].doit(); + e[1].doit(); + } +} + +int main() +{ + { + B a; + f1(a); + } + assert (B::xc == 2*n && B::ic == 2*n+2 && B::dc == 2*n+2 && B::ac == 0 && B::cc == 0); + B::clear(); + { + B a; + f2(a); + } + assert (B::xc == 2*n && B::ic == 2 && B::dc == 2*n+2 && B::ac == 0 && B::cc == 2*n); + B::clear(); + { + B a; + f3(a); + } + assert (B::xc == 2*n && B::ic == 2*n+2 && B::dc == 2*n+2 && B::ac == 2 && B::cc == 0); + B::clear(); + f4(); + assert (B::xc == 2*n+2 && B::ic == 2*n+1 && B::dc == 2*n+1 && B::ac == 2*n-2 && B::cc == 0); + B::clear(); + { + B a[2]; + f5(a); + } + assert (B::xc == 4*n && B::ic == 4*n+4 && B::dc == 4*n+4 && B::ac == 0 && B::cc == 0); + B::clear(); + { + B a[2]; + f6(a); + } + assert (B::xc == 4*n && B::ic == 4 && B::dc == 4*n+4 && B::ac == 0 && B::cc == 4*n); + B::clear(); + { + B a[2]; + f7(a); + } + assert (B::xc == 4*n && B::ic == 4*n+4 && B::dc == 4*n+4 && B::ac == 4 && B::cc == 0); + B::clear(); + f8(); + assert (B::xc == 4*n+4 && B::ic == 4*n+2 && B::dc == 4*n+2 && B::ac == 4*n-4 && B::cc == 0); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/doacross-1.C b/libgomp/testsuite/libgomp.c++/doacross-1.C new file mode 100644 index 00000000000..bc53ee6e8a2 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/doacross-1.C @@ -0,0 +1,294 @@ +// { dg-do run } + +typedef __PTRDIFF_TYPE__ ptrdiff_t; +extern "C" void abort (); + +template <typename T> +class I +{ +public: + typedef ptrdiff_t difference_type; + I (); + ~I (); + I (T *); + I (const I &); + T &operator * (); + T *operator -> (); + T &operator [] (const difference_type &) const; + I &operator = (const I &); + I &operator ++ (); + I operator ++ (int); + I &operator -- (); + I operator -- (int); + I &operator += (const difference_type &); + I &operator -= (const difference_type &); + I operator + (const difference_type &) const; + I operator - (const difference_type &) const; + template <typename S> friend bool operator == (I<S> &, I<S> &); + template <typename S> friend bool operator == (const I<S> &, const I<S> &); + template <typename S> friend bool operator < (I<S> &, I<S> &); + template <typename S> friend bool operator < (const I<S> &, const I<S> &); + template <typename S> friend bool operator <= (I<S> &, I<S> &); + template <typename S> friend bool operator <= (const I<S> &, const I<S> &); + template <typename S> friend bool operator > (I<S> &, I<S> &); + template <typename S> friend bool operator > (const I<S> &, const I<S> &); + template <typename S> friend bool operator >= (I<S> &, I<S> &); + template <typename S> friend bool operator >= (const I<S> &, const I<S> &); + template <typename S> friend typename I<S>::difference_type operator - (I<S> &, I<S> &); + template <typename S> friend typename I<S>::difference_type operator - (const I<S> &, const I<S> &); + template <typename S> friend I<S> operator + (typename I<S>::difference_type , const I<S> &); +private: + T *p; +}; +template <typename T> I<T>::I () : p (0) {} +template <typename T> I<T>::~I () {} +template <typename T> I<T>::I (T *x) : p (x) {} +template <typename T> I<T>::I (const I &x) : p (x.p) {} +template <typename T> T &I<T>::operator * () { return *p; } +template <typename T> T *I<T>::operator -> () { return p; } +template <typename T> T &I<T>::operator [] (const difference_type &x) const { return p[x]; } +template <typename T> I<T> &I<T>::operator = (const I &x) { p = x.p; return *this; } +template <typename T> I<T> &I<T>::operator ++ () { ++p; return *this; } +template <typename T> I<T> I<T>::operator ++ (int) { return I (p++); } +template <typename T> I<T> &I<T>::operator -- () { --p; return *this; } +template <typename T> I<T> I<T>::operator -- (int) { return I (p--); } +template <typename T> I<T> &I<T>::operator += (const difference_type &x) { p += x; return *this; } +template <typename T> I<T> &I<T>::operator -= (const difference_type &x) { p -= x; return *this; } +template <typename T> I<T> I<T>::operator + (const difference_type &x) const { return I (p + x); } +template <typename T> I<T> I<T>::operator - (const difference_type &x) const { return I (p - x); } +template <typename T> bool operator == (I<T> &x, I<T> &y) { return x.p == y.p; } +template <typename T> bool operator == (const I<T> &x, const I<T> &y) { return x.p == y.p; } +template <typename T> bool operator != (I<T> &x, I<T> &y) { return !(x == y); } +template <typename T> bool operator != (const I<T> &x, const I<T> &y) { return !(x == y); } +template <typename T> bool operator < (I<T> &x, I<T> &y) { return x.p < y.p; } +template <typename T> bool operator < (const I<T> &x, const I<T> &y) { return x.p < y.p; } +template <typename T> bool operator <= (I<T> &x, I<T> &y) { return x.p <= y.p; } +template <typename T> bool operator <= (const I<T> &x, const I<T> &y) { return x.p <= y.p; } +template <typename T> bool operator > (I<T> &x, I<T> &y) { return x.p > y.p; } +template <typename T> bool operator > (const I<T> &x, const I<T> &y) { return x.p > y.p; } +template <typename T> bool operator >= (I<T> &x, I<T> &y) { return x.p >= y.p; } +template <typename T> bool operator >= (const I<T> &x, const I<T> &y) { return x.p >= y.p; } +template <typename T> typename I<T>::difference_type operator - (I<T> &x, I<T> &y) { return x.p - y.p; } +template <typename T> typename I<T>::difference_type operator - (const I<T> &x, const I<T> &y) { return x.p - y.p; } +template <typename T> I<T> operator + (typename I<T>::difference_type x, const I<T> &y) { return I<T> (x + y.p); } + +int results[2048]; + +template <typename T> +void +baz (I<T> &i, I<T> &j, I<T> &k, T &l) +{ + if (*i < 0 || *i >= 16) + abort (); + if (*j < 0 || *j >= 16) + abort (); + if (*k < 0 || *k >= 16) + abort (); + if (l < 0 || l >= 16) + abort (); + #pragma omp atomic + results[512 * *i + 64 * *j + 8 * *k + l]++; +} + +template <typename T> +void +baz (T &i, T &j, T &k, T &l) +{ + if (i < 0 || i >= 16) + abort (); + if (j < 0 || j >= 16) + abort (); + if (k < 0 || k >= 16) + abort (); + if (l < 0 || l >= 16) + abort (); + #pragma omp atomic + results[512 * i + 64 * j + 8 * k + l]++; +} + +void +f1 (const I<int> &a, const I<int> &b, const I<int> &c, const I<int> &d, + const I<int> &e, const I<int> &f, int g, int h, + I<int> &r1, I<int> &r2, I<int> &r3) +{ + I<int> i, j, k; + int l; +#pragma omp parallel for ordered(4) lastprivate (i, j, k) schedule(static, 1) + for (i = a; i <= b; i++) + for (j = c; j < d; j++) + for (k = e; k < f; k++) + for (l = g; l < h; l++) + { + #pragma omp ordered depend(sink: i - 1, j, k + 1, l - 2) + baz (i, j, k, l); + if (i > a && k < f - 1 && l > g + 1) + { + int m; + #pragma omp atomic read + m = results[512 * *(i - 1) + 64 * *j + 8 * *(k + 1) + l - 2]; + if (m == 0) + abort (); + } + #pragma omp ordered depend(source) + } + r1 = i; + r2 = j; + r3 = k; +} + +void +f2 (int a, int b, int c, int d, int e, int f, int g, int h, int &r1, int &r2, int &r3) +{ + int i, j, k, l; +#pragma omp parallel for collapse (1) ordered(4) lastprivate (i, j, k) schedule(static, 2) + for (i = a; i <= b; i++) + for (j = c; j < d; j++) + for (k = e; k < f; k++) + for (l = g; l < h; l++) + { + #pragma omp ordered depend(sink: i - 1, j, k + 1, l - 2) + baz (i, j, k, l); + if (i > a && k < f - 1 && l > g + 1) + { + int m; + #pragma omp atomic read + m = results[512 * (i - 1) + 64 * j + 8 * (k + 1) + l - 2]; + if (m == 0) + abort (); + } + #pragma omp ordered depend(source) + } + r1 = i; + r2 = j; + r3 = k; +} + +void +f3 (const I<int> &a, const I<int> &b, const I<int> &c, const I<int> &d, + const I<int> &e, const I<int> &f, int g, int h, + I<int> &r1, I<int> &r2, I<int> &r3) +{ + I<int> i, j, k; + int l; +#pragma omp parallel for collapse (2) ordered(4) lastprivate (i, j, k) schedule(static, 1) + for (i = a; i <= b; i++) + for (j = c; j < d; j++) + for (k = e; k < f; k++) + for (l = g; l < h; l++) + { + #pragma omp ordered depend(sink: i - 1, j, k + 1, l - 2) + baz (i, j, k, l); + if (i > a && k < f - 1 && l > g + 1) + { + int m; + #pragma omp atomic read + m = results[512 * *(i - 1) + 64 * *j + 8 * *(k + 1) + l - 2]; + if (m == 0) + abort (); + } + #pragma omp ordered depend(source) + } + r1 = i; + r2 = j; + r3 = k; +} + +void +f4 (int a, int b, int c, int d, int e, int f, int g, int h, int &r1, int &r2, int &r3) +{ + int i, j, k, l; +#pragma omp parallel for collapse (2) ordered(4) lastprivate (i, j, k) schedule(static, 2) + for (i = a; i <= b; i++) + for (j = c; j < d; j++) + for (k = e; k < f; k++) + for (l = g; l < h; l++) + { + #pragma omp ordered depend(sink: i - 1, j, k + 1, l - 2) + baz (i, j, k, l); + if (i > a && k < f - 1 && l > g + 1) + { + int m; + #pragma omp atomic read + m = results[512 * (i - 1) + 64 * j + 8 * (k + 1) + l - 2]; + if (m == 0) + abort (); + } + #pragma omp ordered depend(source) + } + r1 = i; + r2 = j; + r3 = k; +} + +#define check(expr) \ + for (int i = 0; i < 2048; i++) \ + if (expr) \ + { \ + if (results[i] != 1) \ + abort (); \ + results[i] = 0; \ + } \ + else if (results[i]) \ + abort () + +int +main () +{ + int a[16], s1, s2, s3; + I<int> r1, r2, r3; + for (int i = 0; i < 16; i++) + a[i] = i; + r1 = &a[15]; r2 = &a[15]; r3 = &a[15]; + f1 (&a[1], &a[3], &a[2], &a[5], &a[1], &a[3], 0, 5, r1, r2, r3); + if (*r1 != 4 || *r2 != 5 || *r3 != 3) + abort (); + check ((i / 512) - 1U < 3U && ((i / 64) & 7) - 2U < 3U && ((i / 8) & 7) - 1U < 2U && (i & 7) < 5); + r1 = &a[15]; r2 = &a[15]; r3 = &a[15]; + f1 (&a[1], &a[3], &a[1], &a[4], &a[1], &a[5], 1, 0, r1, r2, r3); + if (*r1 != 4 || *r2 != 4 || *r3 != 5) + abort (); + r1 = &a[15]; r2 = &a[15]; r3 = &a[15]; + f1 (&a[1], &a[3], &a[1], &a[9], &a[7], &a[2], 0, 7, r1, r2, r3); + if (*r1 != 4 || *r2 != 9 || *r3 != 7) + abort (); + s1 = 15; s2 = 15; s3 = 15; + f2 (1, 3, 2, 5, 1, 3, 0, 5, s1, s2, s3); + if (s1 != 4 || s2 != 5 || s3 != 3) + abort (); + check ((i / 512) - 1U < 3U && ((i / 64) & 7) - 2U < 3U && ((i / 8) & 7) - 1U < 2U && (i & 7) < 5); + s1 = 15; s2 = 15; s3 = 15; + f2 (1, 3, 1, 4, 1, 5, 1, 0, s1, s2, s3); + if (s1 != 4 || s2 != 4 || s3 != 5) + abort (); + s1 = 15; s2 = 15; s3 = 15; + f2 (1, 3, 1, 9, 7, 2, 0, 7, s1, s2, s3); + if (s1 != 4 || s2 != 9 || s3 != 7) + abort (); + r1 = &a[15]; r2 = &a[15]; r3 = &a[15]; + f3 (&a[1], &a[3], &a[2], &a[5], &a[1], &a[3], 0, 5, r1, r2, r3); + if (*r1 != 4 || *r2 != 5 || *r3 != 3) + abort (); + check ((i / 512) - 1U < 3U && ((i / 64) & 7) - 2U < 3U && ((i / 8) & 7) - 1U < 2U && (i & 7) < 5); + r1 = &a[15]; r2 = &a[15]; r3 = &a[15]; + f3 (&a[1], &a[3], &a[1], &a[4], &a[1], &a[5], 1, 0, r1, r2, r3); + if (*r1 != 4 || *r2 != 4 || *r3 != 5) + abort (); + r1 = &a[15]; r2 = &a[15]; r3 = &a[15]; + f3 (&a[1], &a[3], &a[1], &a[9], &a[7], &a[2], 0, 7, r1, r2, r3); + if (*r1 != 4 || *r2 != 9 || *r3 != 7) + abort (); + s1 = 15; s2 = 15; s3 = 15; + f4 (1, 3, 2, 5, 1, 3, 0, 5, s1, s2, s3); + if (s1 != 4 || s2 != 5 || s3 != 3) + abort (); + check ((i / 512) - 1U < 3U && ((i / 64) & 7) - 2U < 3U && ((i / 8) & 7) - 1U < 2U && (i & 7) < 5); + s1 = 15; s2 = 15; s3 = 15; + f4 (1, 3, 1, 4, 1, 5, 1, 0, s1, s2, s3); + if (s1 != 4 || s2 != 4 || s3 != 5) + abort (); + s1 = 15; s2 = 15; s3 = 15; + f4 (1, 3, 1, 9, 7, 2, 0, 7, s1, s2, s3); + if (s1 != 4 || s2 != 9 || s3 != 7) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/examples-4/declare_target-2.C b/libgomp/testsuite/libgomp.c++/examples-4/declare_target-2.C index 75276e7c5c6..6d5b5e47990 100644 --- a/libgomp/testsuite/libgomp.c++/examples-4/declare_target-2.C +++ b/libgomp/testsuite/libgomp.c++/examples-4/declare_target-2.C @@ -1,5 +1,5 @@ // { dg-do run } -// { dg-require-effective-target offload_device } +// { dg-require-effective-target offload_device_nonshared_as } #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.c++/for-12.C b/libgomp/testsuite/libgomp.c++/for-12.C new file mode 100644 index 00000000000..ea32192e45d --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/for-12.C @@ -0,0 +1,42 @@ +/* { dg-options "-fopenmp" } */ + +extern "C" void abort (void); + +#define M(x, y, z) O(x, y, z) +#define O(x, y, z) x ## _ ## y ## _ ## z + +#define F taskloop +#define G taskloop +#define S +#define N(x) M(x, G, normal) +#include "../libgomp.c/for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F taskloop simd +#define G taskloop_simd +#define S +#define N(x) M(x, G, normal) +#include "../libgomp.c/for-2.h" +#undef S +#undef N +#undef F +#undef G + +int +main () +{ + int err = 0; + #pragma omp parallel reduction(|:err) + #pragma omp single + { + if (test_taskloop_normal () + || test_taskloop_simd_normal ()) + err = 1; + } + if (err) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/for-13.C b/libgomp/testsuite/libgomp.c++/for-13.C new file mode 100644 index 00000000000..ac1601a766f --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/for-13.C @@ -0,0 +1,151 @@ +extern "C" void abort (); + +#define M(x, y, z) O(x, y, z) +#define O(x, y, z) x ## _ ## y ## _ ## z + +#pragma omp declare target + +#define F for +#define G f +#define S +#define N(x) M(x, G, normal) +#include "../libgomp.c/for-2.h" +#undef S +#undef N +#undef F +#undef G + +#pragma omp end declare target + +#undef OMPFROM +#undef OMPTO +#define DO_PRAGMA(x) _Pragma (#x) +#define OMPFROM(v) DO_PRAGMA (omp target update from(v)) +#define OMPTO(v) DO_PRAGMA (omp target update to(v)) + +#define F target parallel for +#define G tpf +#include "../libgomp.c/for-1.h" +#undef F +#undef G + +#define F target simd +#define G t_simd +#define S +#define N(x) M(x, G, normal) +#include "../libgomp.c/for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F target parallel for simd +#define G tpf_simd +#include "../libgomp.c/for-1.h" +#undef F +#undef G + +#define F target teams distribute +#define G ttd +#define S +#define N(x) M(x, G, normal) +#include "../libgomp.c/for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F target teams distribute +#define G ttd_ds128 +#define S dist_schedule(static, 128) +#define N(x) M(x, G, normal) +#include "../libgomp.c/for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F target teams distribute simd +#define G ttds +#define S +#define N(x) M(x, G, normal) +#include "../libgomp.c/for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F target teams distribute simd +#define G ttds_ds128 +#define S dist_schedule(static, 128) +#define N(x) M(x, G, normal) +#include "../libgomp.c/for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F target teams distribute parallel for +#define G ttdpf +#include "../libgomp.c/for-1.h" +#undef F +#undef G + +#define F target teams distribute parallel for dist_schedule(static, 128) +#define G ttdpf_ds128 +#include "../libgomp.c/for-1.h" +#undef F +#undef G + +#define F target teams distribute parallel for simd +#define G ttdpfs +#include "../libgomp.c/for-1.h" +#undef F +#undef G + +#define F target teams distribute parallel for simd dist_schedule(static, 128) +#define G ttdpfs_ds128 +#include "../libgomp.c/for-1.h" +#undef F +#undef G + +int +main () +{ + if (test_tpf_static () + || test_tpf_static32 () + || test_tpf_auto () + || test_tpf_guided32 () + || test_tpf_runtime () + || test_t_simd_normal () + || test_tpf_simd_static () + || test_tpf_simd_static32 () + || test_tpf_simd_auto () + || test_tpf_simd_guided32 () + || test_tpf_simd_runtime () + || test_ttd_normal () + || test_ttd_ds128_normal () + || test_ttds_normal () + || test_ttds_ds128_normal () + || test_ttdpf_static () + || test_ttdpf_static32 () + || test_ttdpf_auto () + || test_ttdpf_guided32 () + || test_ttdpf_runtime () + || test_ttdpf_ds128_static () + || test_ttdpf_ds128_static32 () + || test_ttdpf_ds128_auto () + || test_ttdpf_ds128_guided32 () + || test_ttdpf_ds128_runtime () + || test_ttdpfs_static () + || test_ttdpfs_static32 () + || test_ttdpfs_auto () + || test_ttdpfs_guided32 () + || test_ttdpfs_runtime () + || test_ttdpfs_ds128_static () + || test_ttdpfs_ds128_static32 () + || test_ttdpfs_ds128_auto () + || test_ttdpfs_ds128_guided32 () + || test_ttdpfs_ds128_runtime ()) + abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/for-14.C b/libgomp/testsuite/libgomp.c++/for-14.C new file mode 100644 index 00000000000..7738473b601 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/for-14.C @@ -0,0 +1,120 @@ +extern "C" void abort (); + +#define M(x, y, z) O(x, y, z) +#define O(x, y, z) x ## _ ## y ## _ ## z + +#pragma omp declare target + +#define F for +#define G f +#define S +#define N(x) M(x, G, normal) +#include "../libgomp.c/for-2.h" +#undef S +#undef N +#undef F +#undef G + +#pragma omp end declare target + +#undef OMPTGT +#undef OMPFROM +#undef OMPTO +#define DO_PRAGMA(x) _Pragma (#x) +#define OMPTGT DO_PRAGMA (omp target) +#define OMPFROM(v) DO_PRAGMA (omp target update from(v)) +#define OMPTO(v) DO_PRAGMA (omp target update to(v)) + +#define F teams distribute +#define G td +#define S +#define N(x) M(x, G, normal) +#include "../libgomp.c/for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F teams distribute +#define G td_ds128 +#define S dist_schedule(static, 128) +#define N(x) M(x, G, normal) +#include "../libgomp.c/for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F teams distribute simd +#define G tds +#define S +#define N(x) M(x, G, normal) +#include "../libgomp.c/for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F teams distribute simd +#define G tds_ds128 +#define S dist_schedule(static, 128) +#define N(x) M(x, G, normal) +#include "../libgomp.c/for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F teams distribute parallel for +#define G tdpf +#include "../libgomp.c/for-1.h" +#undef F +#undef G + +#define F teams distribute parallel for dist_schedule(static, 128) +#define G tdpf_ds128 +#include "../libgomp.c/for-1.h" +#undef F +#undef G + +#define F teams distribute parallel for simd +#define G tdpfs +#include "../libgomp.c/for-1.h" +#undef F +#undef G + +#define F teams distribute parallel for simd dist_schedule(static, 128) +#define G tdpfs_ds128 +#include "../libgomp.c/for-1.h" +#undef F +#undef G + +int +main () +{ + if (test_td_normal () + || test_td_ds128_normal () + || test_tds_normal () + || test_tds_ds128_normal () + || test_tdpf_static () + || test_tdpf_static32 () + || test_tdpf_auto () + || test_tdpf_guided32 () + || test_tdpf_runtime () + || test_tdpf_ds128_static () + || test_tdpf_ds128_static32 () + || test_tdpf_ds128_auto () + || test_tdpf_ds128_guided32 () + || test_tdpf_ds128_runtime () + || test_tdpfs_static () + || test_tdpfs_static32 () + || test_tdpfs_auto () + || test_tdpfs_guided32 () + || test_tdpfs_runtime () + || test_tdpfs_ds128_static () + || test_tdpfs_ds128_static32 () + || test_tdpfs_ds128_auto () + || test_tdpfs_ds128_guided32 () + || test_tdpfs_ds128_runtime ()) + abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/linear-1.C b/libgomp/testsuite/libgomp.c++/linear-1.C new file mode 100644 index 00000000000..1dd1ffc8939 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/linear-1.C @@ -0,0 +1,268 @@ +int a[256]; + +__attribute__((noinline, noclone)) int +f1 (int i) +{ + #pragma omp parallel for linear (i: 4) + for (int j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) short int & +f2 (short int &i, char k) +{ + #pragma omp parallel for linear (i: k + 1) + for (long j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +template <typename T> +__attribute__((noinline, noclone)) T +f3 (T i, T k) +{ + #pragma omp parallel for linear (i: k) + for (short j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +template <typename T> +__attribute__((noinline, noclone)) T & +f4 (T &i) +{ + #pragma omp parallel for linear (i: 4) schedule(static, 3) + for (int j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) short int +f5 (short int i, char &k) +{ + #pragma omp parallel for linear (i: k + 1) schedule(static, 5) + for (long j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +template <int N> +__attribute__((noinline, noclone)) long long int +f6 (long long int i, long long int k) +{ + #pragma omp parallel for linear (i: k) schedule(static, 7) + for (short j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) int +f7 (int &i) +{ + #pragma omp parallel for linear (i: 4) schedule(dynamic, 3) + for (int j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) short int +f8 (short int i, char k) +{ + #pragma omp parallel for linear (i: k + 1) schedule(dynamic, 5) + for (long j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) long long int +f9 (long long int i, long long int k) +{ + #pragma omp parallel for linear (i: k) schedule(dynamic, 7) + for (short j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +template <typename T> +__attribute__((noinline, noclone)) T & +f10 (T &i, long &step) +{ + #pragma omp parallel for linear (i: 4) + for (int j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) short int +f11 (short int i, char k, char step) +{ + #pragma omp parallel for linear (i: k + 1) + for (long j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) long long int +f12 (long long int i, long long int k, int step) +{ + #pragma omp parallel for linear (i: k) + for (short j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) int +f13 (int &i, long long int step) +{ + #pragma omp parallel for linear (i: 4) schedule(static, 3) + for (int j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) short int +f14 (short int &i, char &k, int &step) +{ + #pragma omp parallel for linear (i: k + 1) schedule(static, 5) + for (long j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +template <int N> +__attribute__((noinline, noclone)) long long int +f15 (long long int i, long long int k, long int step) +{ + #pragma omp parallel for linear (i: k) schedule(static, 7) + for (short j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) int +f16 (int i, long long int step) +{ + #pragma omp parallel for linear (i: 4) schedule(dynamic, 3) + for (int j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) short int +f17 (short int i, char k, int step) +{ + #pragma omp parallel for linear (i: k + 1) schedule(dynamic, 5) + for (long j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +template <typename T> +__attribute__((noinline, noclone)) T +f18 (T i, T k, long int step) +{ + #pragma omp parallel for linear (i: k) schedule(dynamic, 7) + for (short j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +int +main () +{ +#define TEST(x) \ + if (x != 8 + 48 * 4) \ + __builtin_abort (); \ + for (int i = 0; i < 256; i++) \ + if (a[i] != (((i & 3) == 0 && i >= 8 \ + && i < 8 + 48 * 4) \ + ? ((i - 8) / 4) + 16 : 0)) \ + __builtin_abort (); \ + __builtin_memset (a, 0, sizeof (a)) + TEST (f1 (8)); + short int vs = 8; + TEST (f2 (vs, 3)); + TEST (f3 (8LL, 4LL)); + int vi = 8; + TEST (f4 (vi)); + char vk = 3; + TEST (f5 (8, vk)); + TEST (f6<7> (8LL, 4LL)); + vi = 8; + TEST (f7 (vi)); + TEST (f8 (8, 3)); + TEST (f9 (8LL, 4LL)); + vi = 8; + long vl = 2; + TEST (f10 (vi, vl)); + TEST (f11 (8, 3, 2)); + TEST (f12 (8LL, 4LL, 2)); + vi = 8; + TEST (f13 (vi, 2)); + vs = 8; + vk = 3; + vi = 2; + TEST (f14 (vs, vk, vi)); + TEST (f15<9> (8LL, 4LL, 2)); + TEST (f16 (8, 2)); + TEST (f17 (8, 3, 2)); + long long int vll1 = 8LL; + long long int vll2 = 4LL; + TEST (f18<long long int &> (vll1, vll2, 2)); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/member-1.C b/libgomp/testsuite/libgomp.c++/member-1.C new file mode 100644 index 00000000000..d2d0c5b2667 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/member-1.C @@ -0,0 +1,206 @@ +// { dg-do run } + +#include <omp.h> + +struct R { R () {}; ~R () {}; int r; }; +struct T { T () {}; virtual ~T () {}; int t; }; +int c; +struct A : public R, virtual public T { A () : b(c) {} int a; int &b; void m1 (); }; + +void +take (int &a, int &b, int &c, int &d) +{ + asm volatile ("" : : "g" (&a), "g" (&b), "g" (&c), "g" (&d) : "memory"); +} + +void +A::m1 () +{ + #pragma omp parallel private (a, r, T::t, A::b) + { + int q = omp_get_thread_num (); + a = q; + r = 2 * q; + t = 3 * q; + b = 4 * q; + take (a, r, t, b); + #pragma omp barrier + if (A::a != q || R::r != 2 * q || T::t != 3 * q || A::b != 4 * q) + __builtin_abort (); + } + a = 7; + r = 8; + t = 9; + b = 10; + #pragma omp parallel firstprivate (A::a, R::r, t, b) + { + int q = omp_get_thread_num (); + take (A::a, R::r, T::t, A::b); + if (a != 7 || r != 8 || t != 9 || b != 10) + __builtin_abort (); + A::a = 5 * q; + R::r = 6 * q; + T::t = 7 * q; + A::b = 8 * q; + take (a, r, t, b); + #pragma omp barrier + if (a != 5 * q || r != 6 * q || t != 7 * q || b != 8 * q) + __builtin_abort (); + } + bool f = false; + a = -5; + b = -4; + r = -3; + t = -2; + int n; + #pragma omp parallel for firstprivate (a, T::t, b, f) lastprivate (A::a, r, t, n) + for (int i = 0; i < omp_get_num_threads (); i++) + { + int q = omp_get_thread_num (); + if (!f) + { + if (A::a != -5 || A::b != -4 || T::t != -2) + __builtin_abort (); + } + else if (a != q || b != 2 * q || r != 3 * q || t != 4 * q) + __builtin_abort (); + take (a, r, t, b); + A::a = q; + A::b = 2 * q; + R::r = 3 * q; + T::t = 4 * q; + n = q; + f = true; + } + if (a != n || r != 3 * n || T::t != 4 * n) + __builtin_abort (); + b = 8; + #pragma omp parallel + #pragma omp single + for (int i = 0; i < 5; i++) + #pragma omp task firstprivate (t, b, n) private (a, R::r) + { + if (t != 4 * n || b != 8) + __builtin_abort (); + a = 9; + r = 8; + t = 12; + b = 18; + take (a, r, t, b); + if (a != 9 || r != 8 || t != 12 || b != 18) + __builtin_abort (); + } + a = 1; + b = 2; + R::r = 3; + t = 4; + #pragma omp parallel private (f) + { + f = false; + #pragma omp single + #pragma omp taskloop firstprivate (r, T::t, b, f) lastprivate (a, t, b, n) + for (int i = 0; i < 30; i++) + { + int q = omp_get_thread_num (); + if (!f) + { + if (R::r != 3 || A::b != 2 || T::t != 4) + __builtin_abort (); + } + else if (a != 7 * q || b != 8 * q || r != 9 * q || t != 10 * q) + __builtin_abort (); + take (a, r, t, b); + A::a = 7 * q; + A::b = 8 * q; + R::r = 9 * q; + T::t = 10 * q; + n = q; + f = true; + } + } + if (a != 7 * n || b != 8 * n || t != 10 * n) + __builtin_abort (); + a = 1; + b = 2; + R::r = 3; + t = 4; + #pragma omp parallel private (f) + { + f = false; + #pragma omp single + #pragma omp taskloop firstprivate (r, T::t, b, A::a, f) + for (int i = 0; i < 30; i++) + { + int q = omp_get_thread_num (); + if (!f) + { + if (A::a != 1 || R::r != 3 || A::b != 2 || T::t != 4) + __builtin_abort (); + } + else if (a != 7 * q || b != 8 * q || r != 9 * q || t != 10 * q) + __builtin_abort (); + take (a, r, t, b); + A::a = 7 * q; + A::b = 8 * q; + R::r = 9 * q; + T::t = 10 * q; + f = true; + } + } + #pragma omp parallel private (f) + { + f = false; + #pragma omp single + #pragma omp taskloop lastprivate (a, t, b, n) + for (int i = 0; i < 30; i++) + { + int q = omp_get_thread_num (); + if (f && (a != 7 * q || b != 8 * q || r != 9 * q || t != 10 * q)) + __builtin_abort (); + take (a, r, t, b); + A::a = 7 * q; + A::b = 8 * q; + R::r = 9 * q; + T::t = 10 * q; + n = q; + f = true; + } + } + if (a != 7 * n || b != 8 * n || t != 10 * n) + __builtin_abort (); + #pragma omp parallel private (a, T::t, A::b, r) + { + int q = omp_get_thread_num (); + a = q; + b = 2 * q; + r = 3 * q; + t = 4 * q; + take (a, b, r, t); + #pragma omp single copyprivate (A::a, t, b, R::r) + n = q; + if (a != n || b != 2 * n || r != 3 * n || t != 4 * n) + __builtin_abort (); + } + a = 0; + b = 0; + R::r = 0; + t = 0; + #pragma omp parallel for reduction (+: A::a, t, b, R::r) + for (int i = 0; i < 30; i++) + { + a += i; + A::b += 2 * i; + r += 3 * i; + T::t += 4 * i; + take (a, b, r, t); + } + if (A::a != 435 || b != 2 * 435 || R::r != 3 * 435 || t != 4 * 435) + __builtin_abort (); +} + +int +main () +{ + A a; + a.m1 (); +} diff --git a/libgomp/testsuite/libgomp.c++/member-2.C b/libgomp/testsuite/libgomp.c++/member-2.C new file mode 100644 index 00000000000..bb348d8a822 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/member-2.C @@ -0,0 +1,211 @@ +// { dg-do run } + +#include <omp.h> + +int c, d, e; +struct R { R () {}; ~R () {}; int r; }; +template <typename Q> +struct T { T () : t(d) {}; virtual ~T () {}; Q t; }; +template <typename Q> +struct A : public R, virtual public T<Q> { A () : b(c), a(e) {} Q a; int &b; void m1 (); }; + +void +take (int &a, int &b, int &c, int &d) +{ + asm volatile ("" : : "g" (&a), "g" (&b), "g" (&c), "g" (&d) : "memory"); +} + +template <typename Q> +void +A<Q>::m1 () +{ + #pragma omp parallel private (a, r, T<Q>::t, A::b) + { + int q = omp_get_thread_num (); + a = q; + r = 2 * q; + T<Q>::t = 3 * q; + b = 4 * q; + take (a, r, T<Q>::t, b); + #pragma omp barrier + if (A::a != q || R::r != 2 * q || T<Q>::t != 3 * q || A::b != 4 * q) + __builtin_abort (); + } + a = 7; + r = 8; + T<Q>::t = 9; + b = 10; + #pragma omp parallel firstprivate (A::a, R::r, T<Q>::t, b) + { + int q = omp_get_thread_num (); + take (A::a, R::r, T<Q>::t, A::b); + if (a != 7 || r != 8 || T<Q>::t != 9 || b != 10) + __builtin_abort (); + A::a = 5 * q; + R::r = 6 * q; + T<Q>::t = 7 * q; + A::b = 8 * q; + take (a, r, T<Q>::t, b); + #pragma omp barrier + if (a != 5 * q || r != 6 * q || T<Q>::t != 7 * q || b != 8 * q) + __builtin_abort (); + } + bool f = false; + a = -5; + b = -4; + r = -3; + T<Q>::t = -2; + int n; + #pragma omp parallel for firstprivate (a, T<Q>::t, b, f) lastprivate (A::a, r, T<Q>::t, n) + for (int i = 0; i < omp_get_num_threads (); i++) + { + int q = omp_get_thread_num (); + if (!f) + { + if (A::a != -5 || A::b != -4 || T<Q>::t != -2) + __builtin_abort (); + } + else if (a != q || b != 2 * q || r != 3 * q || T<Q>::t != 4 * q) + __builtin_abort (); + take (a, r, T<Q>::t, b); + A::a = q; + A::b = 2 * q; + R::r = 3 * q; + T<Q>::t = 4 * q; + n = q; + f = true; + } + if (a != n || r != 3 * n || T<Q>::t != 4 * n) + __builtin_abort (); + b = 8; + #pragma omp parallel + #pragma omp single + for (int i = 0; i < 5; i++) + #pragma omp task firstprivate (T<Q>::t, b, n) private (a, R::r) + { + if (T<Q>::t != 4 * n || b != 8) + __builtin_abort (); + a = 9; + r = 8; + T<Q>::t = 12; + b = 18; + take (a, r, T<Q>::t, b); + if (a != 9 || r != 8 || T<Q>::t != 12 || b != 18) + __builtin_abort (); + } + a = 1; + b = 2; + R::r = 3; + T<Q>::t = 4; + #pragma omp parallel private (f) + { + f = false; + #pragma omp single + #pragma omp taskloop firstprivate (r, T<Q>::t, b, f) lastprivate (a, T<Q>::t, b, n) + for (int i = 0; i < 30; i++) + { + int q = omp_get_thread_num (); + if (!f) + { + if (R::r != 3 || A::b != 2 || T<Q>::t != 4) + __builtin_abort (); + } + else if (a != 7 * q || b != 8 * q || r != 9 * q || T<Q>::t != 10 * q) + __builtin_abort (); + take (a, r, T<Q>::t, b); + A::a = 7 * q; + A::b = 8 * q; + R::r = 9 * q; + T<Q>::t = 10 * q; + n = q; + f = true; + } + } + if (a != 7 * n || b != 8 * n || T<Q>::t != 10 * n) + __builtin_abort (); + a = 1; + b = 2; + R::r = 3; + T<Q>::t = 4; + #pragma omp parallel private (f) + { + f = false; + #pragma omp single + #pragma omp taskloop firstprivate (r, T<Q>::t, b, A::a, f) + for (int i = 0; i < 30; i++) + { + int q = omp_get_thread_num (); + if (!f) + { + if (A::a != 1 || R::r != 3 || A::b != 2 || T<Q>::t != 4) + __builtin_abort (); + } + else if (a != 7 * q || b != 8 * q || r != 9 * q || T<Q>::t != 10 * q) + __builtin_abort (); + take (a, r, T<Q>::t, b); + A::a = 7 * q; + A::b = 8 * q; + R::r = 9 * q; + T<Q>::t = 10 * q; + f = true; + } + } + #pragma omp parallel private (f) + { + f = false; + #pragma omp single + #pragma omp taskloop lastprivate (a, T<Q>::t, b, n) + for (int i = 0; i < 30; i++) + { + int q = omp_get_thread_num (); + if (f && (a != 7 * q || b != 8 * q || r != 9 * q || T<Q>::t != 10 * q)) + __builtin_abort (); + take (a, r, T<Q>::t, b); + A::a = 7 * q; + A::b = 8 * q; + R::r = 9 * q; + T<Q>::t = 10 * q; + n = q; + f = true; + } + } + if (a != 7 * n || b != 8 * n || T<Q>::t != 10 * n) + __builtin_abort (); + #pragma omp parallel private (a, T<Q>::t, A::b, r) + { + int q = omp_get_thread_num (); + a = q; + b = 2 * q; + r = 3 * q; + T<Q>::t = 4 * q; + take (a, b, r, T<Q>::t); + #pragma omp single copyprivate (A::a, T<Q>::t, b, R::r) + n = q; + if (a != n || b != 2 * n || r != 3 * n || T<Q>::t != 4 * n) + __builtin_abort (); + } + a = 0; + b = 0; + R::r = 0; + T<Q>::t = 0; + #pragma omp parallel for reduction (+: A::a, T<Q>::t, b, R::r) + for (int i = 0; i < 30; i++) + { + a += i; + A::b += 2 * i; + r += 3 * i; + T<Q>::t += 4 * i; + take (a, b, r, T<Q>::t); + } + if (A::a != 435 || b != 2 * 435 || R::r != 3 * 435 || T<Q>::t != 4 * 435) + __builtin_abort (); +} + +int +main () +{ + A<int> a; + a.m1 (); + A<int &> b; + b.m1 (); +} diff --git a/libgomp/testsuite/libgomp.c++/member-3.C b/libgomp/testsuite/libgomp.c++/member-3.C new file mode 100644 index 00000000000..50bd587d86b --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/member-3.C @@ -0,0 +1,105 @@ +// { dg-do run } + +struct R { R () {}; ~R () {}; int r; }; +struct T { T () {}; virtual ~T () {}; int t; }; +int c; +struct A : public R, virtual public T { A () : b(c) {} int a; int &b; void m1 (); }; +int d[64]; + +void +A::m1 () +{ + r = 0; + #pragma omp parallel for private (a) reduction(|:R::r) + for (a = 0; A::a < 31; a += 2) + r |= (1 << A::a); + if (r != 0x55555555) + __builtin_abort (); + #pragma omp parallel for simd linear (R::r) + for (R::r = 0; r < 32; R::r++) + d[r + 8] |= 1; + for (int i = 0; i < 64; i++) + if (d[i] != ((i >= 8 && i < 32 + 8) ? 1 : 0)) + __builtin_abort (); + #pragma omp parallel for lastprivate (t) + for (T::t = 0; t < 32; t += 3) + d[T::t + 2] |= 2; + if (T::t != 33) + __builtin_abort (); + for (int i = 0; i < 64; i++) + if (d[i] != (((i >= 8 && i < 32 + 8) ? 1 : 0) + | ((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 2 : 0))) + __builtin_abort (); + #pragma omp simd linear (t) + for (t = 0; t < 32; t++) + d[T::t + 9] |= 4; + if (t != 32) + __builtin_abort (); + for (int i = 0; i < 64; i++) + if (d[i] != (((i >= 8 && i < 32 + 8) ? 1 : 0) + | ((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 2 : 0) + | ((i >= 9 && i < 32 + 9) ? 4 : 0))) + __builtin_abort (); + r = 0; + #pragma omp parallel for reduction(|:r) + for (a = 0; A::a < 31; a += 2) + r |= (1 << A::a); + if (r != 0x55555555) + __builtin_abort (); + #pragma omp parallel for simd + for (R::r = 0; r < 32; R::r += 2) + d[r + 8] |= 8; + for (int i = 0; i < 64; i++) + if (d[i] != (((i >= 8 && i < 32 + 8) ? ((i & 1) ? 1 : (8 | 1)) : 0) + | ((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 2 : 0) + | ((i >= 9 && i < 32 + 9) ? 4 : 0))) + __builtin_abort (); + #pragma omp simd collapse(2) + for (T::t = 0; t < 7; t += 2) + for (a = 0; A::a < 8; a++) + d[((t << 2) | a) + 3] |= 16; + if (t != 8 || A::a != 8) + __builtin_abort (); + for (int i = 0; i < 64; i++) + if (d[i] != (((i >= 8 && i < 32 + 8) ? ((i & 1) ? 1 : (8 | 1)) : 0) + | ((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 2 : 0) + | ((i >= 9 && i < 32 + 9) ? 4 : 0) + | ((i >= 3 && i < 32 + 3) ? 16 : 0))) + __builtin_abort (); + T::t = 32; + a = 16; + #pragma omp parallel + #pragma omp single + #pragma omp taskloop simd collapse(2) + for (t = 0; T::t < 7; T::t += 2) + for (A::a = 0; a < 8; A::a++) + d[((t << 2) | A::a) + 3] |= 32; + if (T::t != 8 || a != 8) + __builtin_abort (); + for (int i = 0; i < 64; i++) + if (d[i] != (((i >= 8 && i < 32 + 8) ? ((i & 1) ? 1 : (8 | 1)) : 0) + | ((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 2 : 0) + | ((i >= 9 && i < 32 + 9) ? 4 : 0) + | ((i >= 3 && i < 32 + 3) ? (16 | 32) : 0))) + __builtin_abort (); + #pragma omp parallel + #pragma omp single + #pragma omp taskloop simd + for (R::r = 0; r < 31; R::r += 2) + d[r + 8] |= 64; + if (r != 32) + __builtin_abort (); + for (int i = 0; i < 64; i++) + if (d[i] != (((i >= 8 && i < 32 + 8) ? ((i & 1) ? 1 : (64 | 8 | 1)) : 0) + | ((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 2 : 0) + | ((i >= 9 && i < 32 + 9) ? 4 : 0) + | ((i >= 3 && i < 32 + 3) ? (16 | 32) : 0))) + __builtin_abort (); +} + +int +main () +{ + A a; + a.m1 (); +} diff --git a/libgomp/testsuite/libgomp.c++/member-4.C b/libgomp/testsuite/libgomp.c++/member-4.C new file mode 100644 index 00000000000..f76695de6fb --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/member-4.C @@ -0,0 +1,108 @@ +// { dg-do run } + +int c, d, e; +struct R { R () {}; ~R () {}; int r; }; +template <typename Q> +struct T { T () : t(d) {}; virtual ~T () {}; Q t; }; +template <typename Q> +struct A : public R, virtual public T<Q> { A () : b(c), a(e) {} Q a; int &b; void m1 (); }; +int f[64]; + +template <typename Q> +void +A<Q>::m1 () +{ + r = 0; + #pragma omp parallel for private (a) reduction(|:R::r) + for (a = 0; A::a < 31; a += 2) + r |= (1 << A::a); + if (r != 0x55555555) + __builtin_abort (); + #pragma omp parallel for simd linear (R::r) + for (R::r = 0; r < 32; R::r++) + f[r + 8] |= 1; + for (int i = 0; i < 64; i++) + if (f[i] != ((i >= 8 && i < 32 + 8) ? 1 : 0)) + __builtin_abort (); + #pragma omp parallel for lastprivate (T<Q>::t) + for (T<Q>::t = 0; T<Q>::t < 32; T<Q>::t += 3) + f[T<Q>::t + 2] |= 2; + if (T<Q>::t != 33) + __builtin_abort (); + for (int i = 0; i < 64; i++) + if (f[i] != (((i >= 8 && i < 32 + 8) ? 1 : 0) + | ((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 2 : 0))) + __builtin_abort (); + #pragma omp simd linear (T<Q>::t) + for (T<Q>::t = 0; T<Q>::t < 32; T<Q>::t++) + f[T<Q>::t + 9] |= 4; + if (T<Q>::t != 32) + __builtin_abort (); + for (int i = 0; i < 64; i++) + if (f[i] != (((i >= 8 && i < 32 + 8) ? 1 : 0) + | ((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 2 : 0) + | ((i >= 9 && i < 32 + 9) ? 4 : 0))) + __builtin_abort (); + r = 0; + #pragma omp parallel for reduction(|:r) + for (a = 0; A::a < 31; a += 2) + r |= (1 << A::a); + if (r != 0x55555555) + __builtin_abort (); + #pragma omp parallel for simd + for (R::r = 0; r < 32; R::r += 2) + f[r + 8] |= 8; + for (int i = 0; i < 64; i++) + if (f[i] != (((i >= 8 && i < 32 + 8) ? ((i & 1) ? 1 : (8 | 1)) : 0) + | ((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 2 : 0) + | ((i >= 9 && i < 32 + 9) ? 4 : 0))) + __builtin_abort (); + #pragma omp simd collapse(2) + for (T<Q>::t = 0; T<Q>::t < 7; T<Q>::t += 2) + for (a = 0; A::a < 8; a++) + f[((T<Q>::t << 2) | a) + 3] |= 16; + if (T<Q>::t != 8 || A::a != 8) + __builtin_abort (); + for (int i = 0; i < 64; i++) + if (f[i] != (((i >= 8 && i < 32 + 8) ? ((i & 1) ? 1 : (8 | 1)) : 0) + | ((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 2 : 0) + | ((i >= 9 && i < 32 + 9) ? 4 : 0) + | ((i >= 3 && i < 32 + 3) ? 16 : 0))) + __builtin_abort (); + T<Q>::t = 32; + a = 16; + #pragma omp parallel + #pragma omp single + #pragma omp taskloop simd collapse(2) + for (T<Q>::t = 0; T<Q>::t < 7; T<Q>::t += 2) + for (A::a = 0; a < 8; A::a++) + f[((T<Q>::t << 2) | A::a) + 3] |= 32; + if (T<Q>::t != 8 || a != 8) + __builtin_abort (); + for (int i = 0; i < 64; i++) + if (f[i] != (((i >= 8 && i < 32 + 8) ? ((i & 1) ? 1 : (8 | 1)) : 0) + | ((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 2 : 0) + | ((i >= 9 && i < 32 + 9) ? 4 : 0) + | ((i >= 3 && i < 32 + 3) ? (16 | 32) : 0))) + __builtin_abort (); + #pragma omp parallel + #pragma omp single + #pragma omp taskloop simd + for (R::r = 0; r < 31; R::r += 2) + f[r + 8] |= 64; + if (r != 32) + __builtin_abort (); + for (int i = 0; i < 64; i++) + if (f[i] != (((i >= 8 && i < 32 + 8) ? ((i & 1) ? 1 : (64 | 8 | 1)) : 0) + | ((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 2 : 0) + | ((i >= 9 && i < 32 + 9) ? 4 : 0) + | ((i >= 3 && i < 32 + 3) ? (16 | 32) : 0))) + __builtin_abort (); +} + +int +main () +{ + A<int> a; + a.m1 (); +} diff --git a/libgomp/testsuite/libgomp.c++/member-5.C b/libgomp/testsuite/libgomp.c++/member-5.C new file mode 100644 index 00000000000..d6fec7a841a --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/member-5.C @@ -0,0 +1,183 @@ +// { dg-do run } + +typedef __PTRDIFF_TYPE__ ptrdiff_t; + +template <typename T> +class I +{ +public: + typedef ptrdiff_t difference_type; + I (); + ~I (); + I (T *); + I (const I &); + T &operator * (); + T *operator -> (); + T &operator [] (const difference_type &) const; + I &operator = (const I &); + I &operator ++ (); + I operator ++ (int); + I &operator -- (); + I operator -- (int); + I &operator += (const difference_type &); + I &operator -= (const difference_type &); + I operator + (const difference_type &) const; + I operator - (const difference_type &) const; + template <typename S> friend bool operator == (I<S> &, I<S> &); + template <typename S> friend bool operator == (const I<S> &, const I<S> &); + template <typename S> friend bool operator < (I<S> &, I<S> &); + template <typename S> friend bool operator < (const I<S> &, const I<S> &); + template <typename S> friend bool operator <= (I<S> &, I<S> &); + template <typename S> friend bool operator <= (const I<S> &, const I<S> &); + template <typename S> friend bool operator > (I<S> &, I<S> &); + template <typename S> friend bool operator > (const I<S> &, const I<S> &); + template <typename S> friend bool operator >= (I<S> &, I<S> &); + template <typename S> friend bool operator >= (const I<S> &, const I<S> &); + template <typename S> friend typename I<S>::difference_type operator - (I<S> &, I<S> &); + template <typename S> friend typename I<S>::difference_type operator - (const I<S> &, const I<S> &); + template <typename S> friend I<S> operator + (typename I<S>::difference_type , const I<S> &); +private: + T *p; +}; +template <typename T> I<T>::I () : p (0) {} +template <typename T> I<T>::~I () {} +template <typename T> I<T>::I (T *x) : p (x) {} +template <typename T> I<T>::I (const I &x) : p (x.p) {} +template <typename T> T &I<T>::operator * () { return *p; } +template <typename T> T *I<T>::operator -> () { return p; } +template <typename T> T &I<T>::operator [] (const difference_type &x) const { return p[x]; } +template <typename T> I<T> &I<T>::operator = (const I &x) { p = x.p; return *this; } +template <typename T> I<T> &I<T>::operator ++ () { ++p; return *this; } +template <typename T> I<T> I<T>::operator ++ (int) { return I (p++); } +template <typename T> I<T> &I<T>::operator -- () { --p; return *this; } +template <typename T> I<T> I<T>::operator -- (int) { return I (p--); } +template <typename T> I<T> &I<T>::operator += (const difference_type &x) { p += x; return *this; } +template <typename T> I<T> &I<T>::operator -= (const difference_type &x) { p -= x; return *this; } +template <typename T> I<T> I<T>::operator + (const difference_type &x) const { return I (p + x); } +template <typename T> I<T> I<T>::operator - (const difference_type &x) const { return I (p - x); } +template <typename T> bool operator == (I<T> &x, I<T> &y) { return x.p == y.p; } +template <typename T> bool operator == (const I<T> &x, const I<T> &y) { return x.p == y.p; } +template <typename T> bool operator != (I<T> &x, I<T> &y) { return !(x == y); } +template <typename T> bool operator != (const I<T> &x, const I<T> &y) { return !(x == y); } +template <typename T> bool operator < (I<T> &x, I<T> &y) { return x.p < y.p; } +template <typename T> bool operator < (const I<T> &x, const I<T> &y) { return x.p < y.p; } +template <typename T> bool operator <= (I<T> &x, I<T> &y) { return x.p <= y.p; } +template <typename T> bool operator <= (const I<T> &x, const I<T> &y) { return x.p <= y.p; } +template <typename T> bool operator > (I<T> &x, I<T> &y) { return x.p > y.p; } +template <typename T> bool operator > (const I<T> &x, const I<T> &y) { return x.p > y.p; } +template <typename T> bool operator >= (I<T> &x, I<T> &y) { return x.p >= y.p; } +template <typename T> bool operator >= (const I<T> &x, const I<T> &y) { return x.p >= y.p; } +template <typename T> typename I<T>::difference_type operator - (I<T> &x, I<T> &y) { return x.p - y.p; } +template <typename T> typename I<T>::difference_type operator - (const I<T> &x, const I<T> &y) { return x.p - y.p; } +template <typename T> I<T> operator + (typename I<T>::difference_type x, const I<T> &y) { return I<T> (x + y.p); } + +struct R { R () {}; ~R () {}; I<int> r; }; +struct T { T () {}; virtual ~T () {}; I<int> t; }; +struct A : public R, virtual public T { A () {} I<int> a; void m1 (const I<int> &, const I<int> &); }; +template <typename Q> +struct U { U () {}; virtual ~U () {}; Q t; }; +template <typename Q> +struct B : public R, virtual public U<Q> { B () {} Q a; void m2 (const Q &, const Q &, const I<int> &, const I<int> &); }; + +int d[64]; + +void +A::m1 (const I<int> &x, const I<int> &y) +{ + int w = 0; + #pragma omp parallel for private (a) reduction(|:w) + for (a = x; A::a < y - 33; a += 2) + w |= (1 << *A::a); + if (w != 0x55555555) + __builtin_abort (); + #pragma omp parallel for lastprivate (t) + for (T::t = x; t < y - 32; t += 3) + d[*T::t + 2] |= 1; + if (*T::t != 33) + __builtin_abort (); + for (int i = 0; i < 64; i++) + if (d[i] != ((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 1 : 0)) + __builtin_abort (); + w = 0; + #pragma omp parallel for reduction(|:w) + for (a = x; A::a < y - 33; a += 2) + w |= (1 << *A::a); + if (w != 0x55555555) + __builtin_abort (); + #pragma omp taskloop + for (R::r = x; r < y - 32; R::r += 2) + d[*r + 8] |= 2; + for (int i = 0; i < 64; i++) + if (d[i] != (((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 1 : 0) + | ((i >= 8 && i < 32 + 8 && (i & 1) == 0) ? 2 : 0))) + __builtin_abort (); + #pragma omp taskloop collapse(2) + for (T::t = x; t < y - 57; t += 2) + for (a = x; A::a < y - 56; a++) + d[((*t << 2) | *a) + 3] |= 4; + for (int i = 0; i < 64; i++) + if (d[i] != (((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 1 : 0) + | ((i >= 8 && i < 32 + 8 && (i & 1) == 0) ? 2 : 0) + | ((i >= 3 && i < 32 + 3) ? 4 : 0))) + __builtin_abort (); +} + +template <typename Q> +void +B<Q>::m2 (const Q &u, const Q &v, const I<int> &x, const I<int> &y) +{ + int w = 0; + #pragma omp parallel for private (a) reduction(|:w) + for (a = u; B::a < v - 33; a += 2) + w |= (1 << *B::a); + if (w != 0x55555555) + __builtin_abort (); + #pragma omp parallel for lastprivate (U<Q>::t) + for (U<Q>::t = u; U<Q>::t < v - 32; U<Q>::t += 3) + d[*U<Q>::t + 2] |= 1; + if (*U<Q>::t != 33) + __builtin_abort (); + for (int i = 0; i < 64; i++) + if (d[i] != ((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 1 : 0)) + __builtin_abort (); + w = 0; + #pragma omp parallel for reduction(|:w) + for (a = u; B::a < v - 33; a += 2) + w |= (1 << *B::a); + if (w != 0x55555555) + __builtin_abort (); + #pragma omp taskloop + for (R::r = x; r < y - 32; R::r += 2) + d[*r + 8] |= 2; + for (int i = 0; i < 64; i++) + if (d[i] != (((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 1 : 0) + | ((i >= 8 && i < 32 + 8 && (i & 1) == 0) ? 2 : 0))) + __builtin_abort (); + #pragma omp taskloop collapse(2) + for (U<Q>::t = u; U<Q>::t < v - 57; U<Q>::t += 2) + for (a = u; B::a < v - 56; a++) + d[((*U<Q>::t << 2) | *a) + 3] |= 4; + for (int i = 0; i < 64; i++) + if (d[i] != (((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 1 : 0) + | ((i >= 8 && i < 32 + 8 && (i & 1) == 0) ? 2 : 0) + | ((i >= 3 && i < 32 + 3) ? 4 : 0))) + __builtin_abort (); +} + +int +main () +{ + A a; + int b[128]; + for (int i = 0; i < 128; i++) + b[i] = i - 32; + a.m1 (&b[32], &b[96]); + for (int i = 0; i < 64; i++) + d[i] = 0; + B<I<int> > c; + c.m2 (&b[32], &b[96], &b[32], &b[96]); + for (int i = 0; i < 64; i++) + d[i] = 0; + B<int *> d; + d.m2 (&b[32], &b[96], &b[32], &b[96]); +} diff --git a/libgomp/testsuite/libgomp.c++/ordered-1.C b/libgomp/testsuite/libgomp.c++/ordered-1.C new file mode 100644 index 00000000000..a1bedd808ac --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/ordered-1.C @@ -0,0 +1 @@ +#include "../libgomp.c/ordered-4.c" diff --git a/libgomp/testsuite/libgomp.c++/reduction-10.C b/libgomp/testsuite/libgomp.c++/reduction-10.C new file mode 100644 index 00000000000..2254430f168 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/reduction-10.C @@ -0,0 +1,201 @@ +template <typename T> +struct A +{ + A () { t = 0; } + A (T x) { t = x; } + A (const A &x) { t = x.t; } + ~A () {} + T t; +}; +template <typename T> +struct M +{ + M () { t = 1; } + M (T x) { t = x; } + M (const M &x) { t = x.t; } + ~M () {} + T t; +}; +template <typename T> +struct B +{ + B () { t = ~(T) 0; } + B (T x) { t = x; } + B (const B &x) { t = x.t; } + ~B () {} + T t; +}; +template <typename T> +void +add (T &x, T &y) +{ + x.t += y.t; +} +template <typename T> +void +zero (T &x) +{ + x.t = 0; +} +template <typename T> +void +orit (T *x, T *y) +{ + y->t |= x->t; +} +B<long> bb; +#pragma omp declare reduction(+:A<int>:omp_out.t += omp_in.t) +#pragma omp declare reduction(+:A<char>:add (omp_out, omp_in)) initializer(zero (omp_priv)) +#pragma omp declare reduction(*:M<int>:omp_out.t *= omp_in.t) initializer(omp_priv = 1) +#pragma omp declare reduction(|:A<unsigned long long>:orit (&omp_in, &omp_out)) +#pragma omp declare reduction(&:B<long>:omp_out.t = omp_out.t & omp_in.t) initializer(orit (&omp_priv, &omp_orig)) +#pragma omp declare reduction(maxb:short:omp_out = omp_in > omp_out ? omp_in : omp_out) initializer(omp_priv = -6) + +A<char> z[10]; + +template <int N> +__attribute__((noinline, noclone)) void +foo (A<int> (*&x)[3][N], M<int> *y, B<long> (&w)[1][N], int p1, long p2, long p3, int p4, + int p5, long p6, short p7) +{ + A<unsigned long long> a[p7 + 4]; + short bb[p7]; + short (&b)[p7] = bb; + for (int i = 0; i < p7; i++) + bb[i] = -6; + #pragma omp parallel for reduction(+:x[0:p1 + 1][:p2 + N - 2], z[:p3]) \ + reduction(*:y[:p4]) reduction(|:a[:p5 - N + 2]) \ + reduction(&:w[0:p6 - 3 + N][:p6]) reduction(maxb:b) + for (int i = 0; i < 128; i++) + { + x[i / 64][i % 3][(i / 4) & 1].t += i; + if ((i & 15) == 1) + y[0].t *= 3; + if ((i & 31) == N) + y[1].t *= 7; + if ((i & 63) == 3) + y[N].t *= 17; + z[i / 32].t += (i & 3); + if (i < 4) + z[i].t += i; + a[i / 32].t |= 1ULL << (i & 30); + w[0][i & 1].t &= ~(1L << (i / 17 * 3)); + if ((i % 79) > b[0]) + b[0] = i % 79; + if ((i % 13) > b[1]) + b[1] = i % 13; + if ((i % 23) > b[N]) + b[N] = i % 23; + if ((i % 85) > b[3]) + b[3] = i % 85; + if ((i % 192) > b[4]) + b[4] = i % 192; + } + for (int i = 0; i < 9; i++) + if (a[i].t != (i < 4 ? 0x55555555ULL : 0)) + __builtin_abort (); + if (bb[0] != 78 || bb[1] != 12 || bb[N] != 22 || bb[3] != 84 || bb[4] != 127) + __builtin_abort (); +} + +A<int> a3[4][3][2]; +A<int> (*p3)[3][2] = &a3[1]; +M<int> y3[5] = { 0, 1, 1, 1, 0 }; +B<long> w3[1][2]; + +template <int N> +struct S +{ + A<int> (*&x)[3][N]; + M<int> *y; + B<long> (&w)[1][N]; + A<char> z[10]; + short b[5]; + A<unsigned long long> a[9]; + S() : x(p3), y(y3+1), w(w3), z(), a(), b() {} + __attribute__((noinline, noclone)) void foo (int, long, long, int, int, long, short); +}; + +template <int N> +void +S<N>::foo (int p1, long p2, long p3, int p4, int p5, long p6, short p7) +{ + #pragma omp parallel for reduction(+:x[0:p1 + 1][:p2][0:N], z[:p3 + N - 2]) \ + reduction(*:y[:p4]) reduction(|:a[:p5]) \ + reduction(&:w[0:p6 - 3 + N][:p6]) reduction(maxb:b) + for (int i = 0; i < 128; i++) + { + x[i / 64][i % 3][(i / 4) & 1].t += i; + if ((i & 15) == 1) + y[0].t *= 3; + if ((i & 31) == N) + y[1].t *= 7; + if ((i & 63) == 3) + y[N].t *= 17; + z[i / 32].t += (i & 3); + if (i < 4) + z[i].t += i; + a[i / 32].t |= 1ULL << (i & 30); + w[0][i & 1].t &= ~(1L << (i / 17 * 3)); + if ((i % 79) > b[0]) + b[0] = i % 79; + if ((i % 13) > b[1]) + b[1] = i % 13; + if ((i % 23) > b[N]) + b[N] = i % 23; + if ((i % 85) > b[3]) + b[3] = i % 85; + if ((i % 192) > b[4]) + b[4] = i % 192; + } +} + +int +main () +{ + A<int> a[4][3][2]; + static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }}, + {{ 312, 381 }, { 295, 356 }, { 337, 335 }}, + {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }}, + {{ 0, 0 }, { 0, 0 }, { 0, 0 }}}; + A<int> (*p)[3][2] = &a[1]; + M<int> y[5] = { 0, 1, 1, 1, 0 }; + int y2[5] = { 0, 6561, 2401, 289, 0 }; + char z2[10] = { 48, 49, 50, 51, 0, 0, 0, 0, 0, 0 }; + B<long> w[1][2]; + foo<2> (p, y + 1, w, 1, 3L, 4L, 3, 4, 2L, 5); + for (int i = 0; i < 4; i++) + for (int j = 0; j < 3; j++) + for (int k = 0; k < 2; k++) + if (a[i][j][k].t != a2[i][j][k]) + __builtin_abort (); + for (int i = 0; i < 5; i++) + if (y[i].t != y2[i]) + __builtin_abort (); + for (int i = 0; i < 10; i++) + if (z[i].t != z2[i]) + __builtin_abort (); + if (w[0][0].t != ~0x249249L || w[0][1].t != ~0x249249L) + __builtin_abort (); + S<2> s; + s.foo (1, 3L, 4L, 3, 4, 2L, 5); + for (int i = 0; i < 9; i++) + if (s.a[i].t != (i < 4 ? 0x55555555ULL : 0)) + __builtin_abort (); + for (int i = 0; i < 4; i++) + for (int j = 0; j < 3; j++) + for (int k = 0; k < 2; k++) + if (a3[i][j][k].t != a2[i][j][k]) + __builtin_abort (); + for (int i = 0; i < 5; i++) + if (y3[i].t != y2[i]) + __builtin_abort (); + for (int i = 0; i < 10; i++) + if (s.z[i].t != z2[i]) + __builtin_abort (); + if (w3[0][0].t != ~0x249249L || w3[0][1].t != ~0x249249L) + __builtin_abort (); + if (s.b[0] != 78 || s.b[1] != 12 || s.b[2] != 22 + || s.b[3] != 84 || s.b[4] != 127) + __builtin_abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/reduction-5.C b/libgomp/testsuite/libgomp.c++/reduction-5.C new file mode 100644 index 00000000000..212fd69be58 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/reduction-5.C @@ -0,0 +1,127 @@ +char z[10] = { 0 }; + +__attribute__((noinline, noclone)) void +foo (int (*&x)[3][2], int *y, long (&w)[1][2]) +{ + unsigned long long a[9] = {}; + short b[5] = {}; + #pragma omp parallel for reduction(+:x[0:2][:][0:2], z[:4]) \ + reduction(*:y[:3]) reduction(|:a[:4]) \ + reduction(&:w[0:][:2]) reduction(max:b) + for (int i = 0; i < 128; i++) + { + x[i / 64][i % 3][(i / 4) & 1] += i; + if ((i & 15) == 1) + y[0] *= 3; + if ((i & 31) == 2) + y[1] *= 7; + if ((i & 63) == 3) + y[2] *= 17; + z[i / 32] += (i & 3); + if (i < 4) + z[i] += i; + a[i / 32] |= 1ULL << (i & 30); + w[0][i & 1] &= ~(1L << (i / 17 * 3)); + if ((i % 79) > b[0]) + b[0] = i % 79; + if ((i % 13) > b[1]) + b[1] = i % 13; + if ((i % 23) > b[2]) + b[2] = i % 23; + if ((i % 85) > b[3]) + b[3] = i % 85; + if ((i % 192) > b[4]) + b[4] = i % 192; + } + for (int i = 0; i < 9; i++) + if (a[i] != (i < 4 ? 0x55555555ULL : 0)) + __builtin_abort (); + if (b[0] != 78 || b[1] != 12 || b[2] != 22 || b[3] != 84 || b[4] != 127) + __builtin_abort (); +} + +int a3[4][3][2]; +int (*p3)[3][2] = &a3[1]; +int y3[5] = { 0, 1, 1, 1, 0 }; +long w3[1][2] = { ~0L, ~0L }; +short bb[5]; + +struct S +{ + int (*&x)[3][2]; + int *y; + long (&w)[1][2]; + char z[10]; + short (&b)[5]; + unsigned long long a[9]; + S() : x(p3), y(y3+1), w(w3), z(), a(), b(bb) {} + __attribute__((noinline, noclone)) void foo (); +}; + +void +S::foo () +{ + #pragma omp parallel for reduction(+:x[0:2][:][0:2], z[:4]) \ + reduction(*:y[:3]) reduction(|:a[:4]) \ + reduction(&:w[0:][:2]) reduction(max:b) + for (int i = 0; i < 128; i++) + { + x[i / 64][i % 3][(i / 4) & 1] += i; + if ((i & 15) == 1) + y[0] *= 3; + if ((i & 31) == 2) + y[1] *= 7; + if ((i & 63) == 3) + y[2] *= 17; + z[i / 32] += (i & 3); + if (i < 4) + z[i] += i; + a[i / 32] |= 1ULL << (i & 30); + w[0][i & 1] &= ~(1L << (i / 17 * 3)); + if ((i % 79) > b[0]) + b[0] = i % 79; + if ((i % 13) > b[1]) + b[1] = i % 13; + if ((i % 23) > b[2]) + b[2] = i % 23; + if ((i % 85) > b[3]) + b[3] = i % 85; + if ((i % 192) > b[4]) + b[4] = i % 192; + } +} + +int +main () +{ + int a[4][3][2] = {}; + static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }}, + {{ 312, 381 }, { 295, 356 }, { 337, 335 }}, + {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }}, + {{ 0, 0 }, { 0, 0 }, { 0, 0 }}}; + int (*p)[3][2] = &a[1]; + int y[5] = { 0, 1, 1, 1, 0 }; + int y2[5] = { 0, 6561, 2401, 289, 0 }; + char z2[10] = { 48, 49, 50, 51, 0, 0, 0, 0, 0, 0 }; + long w[1][2] = { ~0L, ~0L }; + foo (p, y + 1, w); + if (__builtin_memcmp (a, a2, sizeof (a)) + || __builtin_memcmp (y, y2, sizeof (y)) + || __builtin_memcmp (z, z2, sizeof (z)) + || w[0][0] != ~0x249249L + || w[0][1] != ~0x249249L) + __builtin_abort (); + S s; + s.foo (); + for (int i = 0; i < 9; i++) + if (s.a[i] != (i < 4 ? 0x55555555ULL : 0)) + __builtin_abort (); + if (__builtin_memcmp (a3, a2, sizeof (a3)) + || __builtin_memcmp (y3, y2, sizeof (y3)) + || __builtin_memcmp (s.z, z2, sizeof (s.z)) + || w3[0][0] != ~0x249249L + || w3[0][1] != ~0x249249L) + __builtin_abort (); + if (bb[0] != 78 || bb[1] != 12 || bb[2] != 22 || bb[3] != 84 || bb[4] != 127) + __builtin_abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/reduction-6.C b/libgomp/testsuite/libgomp.c++/reduction-6.C new file mode 100644 index 00000000000..f180ca35edd --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/reduction-6.C @@ -0,0 +1,195 @@ +template <typename T> +struct A +{ + A () { t = 0; } + A (T x) { t = x; } + A (const A &x) { t = x.t; } + ~A () {} + T t; +}; +template <typename T> +struct M +{ + M () { t = 1; } + M (T x) { t = x; } + M (const M &x) { t = x.t; } + ~M () {} + T t; +}; +template <typename T> +struct B +{ + B () { t = ~(T) 0; } + B (T x) { t = x; } + B (const B &x) { t = x.t; } + ~B () {} + T t; +}; +template <typename T> +void +add (T &x, T &y) +{ + x.t += y.t; +} +template <typename T> +void +zero (T &x) +{ + x.t = 0; +} +template <typename T> +void +orit (T *x, T *y) +{ + y->t |= x->t; +} +B<long> bb; +#pragma omp declare reduction(+:A<int>:omp_out.t += omp_in.t) +#pragma omp declare reduction(+:A<char>:add (omp_out, omp_in)) initializer(zero (omp_priv)) +#pragma omp declare reduction(*:M<int>:omp_out.t *= omp_in.t) initializer(omp_priv = 1) +#pragma omp declare reduction(|:A<unsigned long long>:orit (&omp_in, &omp_out)) +#pragma omp declare reduction(&:B<long>:omp_out.t = omp_out.t & omp_in.t) initializer(orit (&omp_priv, &omp_orig)) +#pragma omp declare reduction(maxb:short:omp_out = omp_in > omp_out ? omp_in : omp_out) initializer(omp_priv = -6) + +A<char> z[10]; + +__attribute__((noinline, noclone)) void +foo (A<int> (*&x)[3][2], M<int> *y, B<long> (&w)[1][2]) +{ + A<unsigned long long> a[9]; + short bb[5] = {}; + short (&b)[5] = bb; + #pragma omp parallel for reduction(+:x[0:2][:][0:2], z[:4]) \ + reduction(*:y[:3]) reduction(|:a[:4]) \ + reduction(&:w[0:][:2]) reduction(maxb:b) + for (int i = 0; i < 128; i++) + { + x[i / 64][i % 3][(i / 4) & 1].t += i; + if ((i & 15) == 1) + y[0].t *= 3; + if ((i & 31) == 2) + y[1].t *= 7; + if ((i & 63) == 3) + y[2].t *= 17; + z[i / 32].t += (i & 3); + if (i < 4) + z[i].t += i; + a[i / 32].t |= 1ULL << (i & 30); + w[0][i & 1].t &= ~(1L << (i / 17 * 3)); + if ((i % 79) > b[0]) + b[0] = i % 79; + if ((i % 13) > b[1]) + b[1] = i % 13; + if ((i % 23) > b[2]) + b[2] = i % 23; + if ((i % 85) > b[3]) + b[3] = i % 85; + if ((i % 192) > b[4]) + b[4] = i % 192; + } + for (int i = 0; i < 9; i++) + if (a[i].t != (i < 4 ? 0x55555555ULL : 0)) + __builtin_abort (); + if (bb[0] != 78 || bb[1] != 12 || bb[2] != 22 || bb[3] != 84 || bb[4] != 127) + __builtin_abort (); +} + +A<int> a3[4][3][2]; +A<int> (*p3)[3][2] = &a3[1]; +M<int> y3[5] = { 0, 1, 1, 1, 0 }; +B<long> w3[1][2]; + +struct S +{ + A<int> (*&x)[3][2]; + M<int> *y; + B<long> (&w)[1][2]; + A<char> z[10]; + short b[5]; + A<unsigned long long> a[9]; + S() : x(p3), y(y3+1), w(w3), z(), a(), b() {} + __attribute__((noinline, noclone)) void foo (); +}; + +void +S::foo () +{ + #pragma omp parallel for reduction(+:x[0:2][:][0:2], z[:4]) \ + reduction(*:y[:3]) reduction(|:a[:4]) \ + reduction(&:w[0:][:2]) reduction(maxb:b) + for (int i = 0; i < 128; i++) + { + x[i / 64][i % 3][(i / 4) & 1].t += i; + if ((i & 15) == 1) + y[0].t *= 3; + if ((i & 31) == 2) + y[1].t *= 7; + if ((i & 63) == 3) + y[2].t *= 17; + z[i / 32].t += (i & 3); + if (i < 4) + z[i].t += i; + a[i / 32].t |= 1ULL << (i & 30); + w[0][i & 1].t &= ~(1L << (i / 17 * 3)); + if ((i % 79) > b[0]) + b[0] = i % 79; + if ((i % 13) > b[1]) + b[1] = i % 13; + if ((i % 23) > b[2]) + b[2] = i % 23; + if ((i % 85) > b[3]) + b[3] = i % 85; + if ((i % 192) > b[4]) + b[4] = i % 192; + } +} + +int +main () +{ + A<int> a[4][3][2]; + static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }}, + {{ 312, 381 }, { 295, 356 }, { 337, 335 }}, + {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }}, + {{ 0, 0 }, { 0, 0 }, { 0, 0 }}}; + A<int> (*p)[3][2] = &a[1]; + M<int> y[5] = { 0, 1, 1, 1, 0 }; + int y2[5] = { 0, 6561, 2401, 289, 0 }; + char z2[10] = { 48, 49, 50, 51, 0, 0, 0, 0, 0, 0 }; + B<long> w[1][2]; + foo (p, y + 1, w); + for (int i = 0; i < 4; i++) + for (int j = 0; j < 3; j++) + for (int k = 0; k < 2; k++) + if (a[i][j][k].t != a2[i][j][k]) + __builtin_abort (); + for (int i = 0; i < 5; i++) + if (y[i].t != y2[i]) + __builtin_abort (); + for (int i = 0; i < 10; i++) + if (z[i].t != z2[i]) + __builtin_abort (); + if (w[0][0].t != ~0x249249L || w[0][1].t != ~0x249249L) + __builtin_abort (); + S s; + s.foo (); + for (int i = 0; i < 9; i++) + if (s.a[i].t != (i < 4 ? 0x55555555ULL : 0)) + __builtin_abort (); + for (int i = 0; i < 4; i++) + for (int j = 0; j < 3; j++) + for (int k = 0; k < 2; k++) + if (a3[i][j][k].t != a2[i][j][k]) + __builtin_abort (); + for (int i = 0; i < 5; i++) + if (y3[i].t != y2[i]) + __builtin_abort (); + for (int i = 0; i < 10; i++) + if (s.z[i].t != z2[i]) + __builtin_abort (); + if (w3[0][0].t != ~0x249249L || w3[0][1].t != ~0x249249L) + __builtin_abort (); + if (s.b[0] != 78 || s.b[1] != 12 || s.b[2] != 22 + || s.b[3] != 84 || s.b[4] != 127) + __builtin_abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/reduction-7.C b/libgomp/testsuite/libgomp.c++/reduction-7.C new file mode 100644 index 00000000000..75f9d08aac4 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/reduction-7.C @@ -0,0 +1,134 @@ +char z[10] = { 0 }; + +__attribute__((noinline, noclone)) void +foo (int (*&x)[3][2], int *y, long (&w)[1][2], int p1, long p2, long p3, int p4, + int p5, long p6, short p7) +{ + unsigned long long a[p7 + 4]; + short b[p7]; + for (int i = 0; i < p7 + 4; i++) + { + if (i < p7) + b[i] = -6; + a[i] = 0; + } + #pragma omp parallel for reduction(+:x[0:p1 + 1][:p2], z[:p3]) \ + reduction(*:y[:p4]) reduction(|:a[:p5]) \ + reduction(&:w[0:p6 - 1][:p6]) reduction(max:b) + for (int i = 0; i < 128; i++) + { + x[i / 64][i % 3][(i / 4) & 1] += i; + if ((i & 15) == 1) + y[0] *= 3; + if ((i & 31) == 2) + y[1] *= 7; + if ((i & 63) == 3) + y[2] *= 17; + z[i / 32] += (i & 3); + if (i < 4) + z[i] += i; + a[i / 32] |= 1ULL << (i & 30); + w[0][i & 1] &= ~(1L << (i / 17 * 3)); + if ((i % 79) > b[0]) + b[0] = i % 79; + if ((i % 13) > b[1]) + b[1] = i % 13; + if ((i % 23) > b[2]) + b[2] = i % 23; + if ((i % 85) > b[3]) + b[3] = i % 85; + if ((i % 192) > b[4]) + b[4] = i % 192; + } + for (int i = 0; i < 9; i++) + if (a[i] != (i < 4 ? 0x55555555ULL : 0)) + __builtin_abort (); + if (b[0] != 78 || b[1] != 12 || b[2] != 22 || b[3] != 84 || b[4] != 127) + __builtin_abort (); +} + +int a3[4][3][2]; +int (*p3)[3][2] = &a3[1]; +int y3[5] = { 0, 1, 1, 1, 0 }; +long w3[1][2] = { ~0L, ~0L }; +short bb[5]; + +struct S +{ + int (*&x)[3][2]; + int *y; + long (&w)[1][2]; + char z[10]; + short (&b)[5]; + unsigned long long a[9]; + S() : x(p3), y(y3+1), w(w3), z(), a(), b(bb) {} + __attribute__((noinline, noclone)) void foo (int, long, long, int, int, long, short); +}; + +void +S::foo (int p1, long p2, long p3, int p4, int p5, long p6, short p7) +{ + #pragma omp parallel for reduction(+:x[0:p1 + 1][:p2], z[:p3]) \ + reduction(*:y[:p4]) reduction(|:a[:p5]) \ + reduction(&:w[0:p6 - 1][:p6]) reduction(max:b[0:p7]) + for (int i = 0; i < 128; i++) + { + x[i / 64][i % 3][(i / 4) & 1] += i; + if ((i & 15) == 1) + y[0] *= 3; + if ((i & 31) == 2) + y[1] *= 7; + if ((i & 63) == 3) + y[2] *= 17; + z[i / 32] += (i & 3); + if (i < 4) + z[i] += i; + a[i / 32] |= 1ULL << (i & 30); + w[0][i & 1] &= ~(1L << (i / 17 * 3)); + if ((i % 79) > b[0]) + b[0] = i % 79; + if ((i % 13) > b[1]) + b[1] = i % 13; + if ((i % 23) > b[2]) + b[2] = i % 23; + if ((i % 85) > b[3]) + b[3] = i % 85; + if ((i % 192) > b[4]) + b[4] = i % 192; + } +} + +int +main () +{ + int a[4][3][2] = {}; + static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }}, + {{ 312, 381 }, { 295, 356 }, { 337, 335 }}, + {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }}, + {{ 0, 0 }, { 0, 0 }, { 0, 0 }}}; + int (*p)[3][2] = &a[1]; + int y[5] = { 0, 1, 1, 1, 0 }; + int y2[5] = { 0, 6561, 2401, 289, 0 }; + char z2[10] = { 48, 49, 50, 51, 0, 0, 0, 0, 0, 0 }; + long w[1][2] = { ~0L, ~0L }; + foo (p, y + 1, w, 1, 3L, 4L, 3, 4, 2L, 5); + if (__builtin_memcmp (a, a2, sizeof (a)) + || __builtin_memcmp (y, y2, sizeof (y)) + || __builtin_memcmp (z, z2, sizeof (z)) + || w[0][0] != ~0x249249L + || w[0][1] != ~0x249249L) + __builtin_abort (); + S s; + s.foo (1, 3L, 4L, 3, 4, 2L, 5); + for (int i = 0; i < 9; i++) + if (s.a[i] != (i < 4 ? 0x55555555ULL : 0)) + __builtin_abort (); + if (__builtin_memcmp (a3, a2, sizeof (a3)) + || __builtin_memcmp (y3, y2, sizeof (y3)) + || __builtin_memcmp (s.z, z2, sizeof (s.z)) + || w3[0][0] != ~0x249249L + || w3[0][1] != ~0x249249L) + __builtin_abort (); + if (bb[0] != 78 || bb[1] != 12 || bb[2] != 22 || bb[3] != 84 || bb[4] != 127) + __builtin_abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/reduction-8.C b/libgomp/testsuite/libgomp.c++/reduction-8.C new file mode 100644 index 00000000000..cffd7cc2d4c --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/reduction-8.C @@ -0,0 +1,198 @@ +template <typename T> +struct A +{ + A () { t = 0; } + A (T x) { t = x; } + A (const A &x) { t = x.t; } + ~A () {} + T t; +}; +template <typename T> +struct M +{ + M () { t = 1; } + M (T x) { t = x; } + M (const M &x) { t = x.t; } + ~M () {} + T t; +}; +template <typename T> +struct B +{ + B () { t = ~(T) 0; } + B (T x) { t = x; } + B (const B &x) { t = x.t; } + ~B () {} + T t; +}; +template <typename T> +void +add (T &x, T &y) +{ + x.t += y.t; +} +template <typename T> +void +zero (T &x) +{ + x.t = 0; +} +template <typename T> +void +orit (T *x, T *y) +{ + y->t |= x->t; +} +B<long> bb; +#pragma omp declare reduction(+:A<int>:omp_out.t += omp_in.t) +#pragma omp declare reduction(+:A<char>:add (omp_out, omp_in)) initializer(zero (omp_priv)) +#pragma omp declare reduction(*:M<int>:omp_out.t *= omp_in.t) initializer(omp_priv = 1) +#pragma omp declare reduction(|:A<unsigned long long>:orit (&omp_in, &omp_out)) +#pragma omp declare reduction(&:B<long>:omp_out.t = omp_out.t & omp_in.t) initializer(orit (&omp_priv, &omp_orig)) +#pragma omp declare reduction(maxb:short:omp_out = omp_in > omp_out ? omp_in : omp_out) initializer(omp_priv = -6) + +A<char> z[10]; + +__attribute__((noinline, noclone)) void +foo (A<int> (*&x)[3][2], M<int> *y, B<long> (&w)[1][2], int p1, long p2, long p3, int p4, + int p5, long p6, short p7) +{ + A<unsigned long long> a[p7 + 4]; + short bb[p7]; + short (&b)[p7] = bb; + for (int i = 0; i < p7; i++) + bb[i] = -6; + #pragma omp parallel for reduction(+:x[0:p1 + 1][:p2], z[:p3]) \ + reduction(*:y[:p4]) reduction(|:a[:p5]) \ + reduction(&:w[0:p6 - 1][:p6]) reduction(maxb:b) + for (int i = 0; i < 128; i++) + { + x[i / 64][i % 3][(i / 4) & 1].t += i; + if ((i & 15) == 1) + y[0].t *= 3; + if ((i & 31) == 2) + y[1].t *= 7; + if ((i & 63) == 3) + y[2].t *= 17; + z[i / 32].t += (i & 3); + if (i < 4) + z[i].t += i; + a[i / 32].t |= 1ULL << (i & 30); + w[0][i & 1].t &= ~(1L << (i / 17 * 3)); + if ((i % 79) > b[0]) + b[0] = i % 79; + if ((i % 13) > b[1]) + b[1] = i % 13; + if ((i % 23) > b[2]) + b[2] = i % 23; + if ((i % 85) > b[3]) + b[3] = i % 85; + if ((i % 192) > b[4]) + b[4] = i % 192; + } + for (int i = 0; i < 9; i++) + if (a[i].t != (i < 4 ? 0x55555555ULL : 0)) + __builtin_abort (); + if (bb[0] != 78 || bb[1] != 12 || bb[2] != 22 || bb[3] != 84 || bb[4] != 127) + __builtin_abort (); +} + +A<int> a3[4][3][2]; +A<int> (*p3)[3][2] = &a3[1]; +M<int> y3[5] = { 0, 1, 1, 1, 0 }; +B<long> w3[1][2]; + +struct S +{ + A<int> (*&x)[3][2]; + M<int> *y; + B<long> (&w)[1][2]; + A<char> z[10]; + short b[5]; + A<unsigned long long> a[9]; + S() : x(p3), y(y3+1), w(w3), z(), a(), b() {} + __attribute__((noinline, noclone)) void foo (int, long, long, int, int, long, short); +}; + +void +S::foo (int p1, long p2, long p3, int p4, int p5, long p6, short p7) +{ + #pragma omp parallel for reduction(+:x[0:p1 + 1][:p2][0:2], z[:p3]) \ + reduction(*:y[:p4]) reduction(|:a[:p5]) \ + reduction(&:w[0:p6 - 1][:p6]) reduction(maxb:b) + for (int i = 0; i < 128; i++) + { + x[i / 64][i % 3][(i / 4) & 1].t += i; + if ((i & 15) == 1) + y[0].t *= 3; + if ((i & 31) == 2) + y[1].t *= 7; + if ((i & 63) == 3) + y[2].t *= 17; + z[i / 32].t += (i & 3); + if (i < 4) + z[i].t += i; + a[i / 32].t |= 1ULL << (i & 30); + w[0][i & 1].t &= ~(1L << (i / 17 * 3)); + if ((i % 79) > b[0]) + b[0] = i % 79; + if ((i % 13) > b[1]) + b[1] = i % 13; + if ((i % 23) > b[2]) + b[2] = i % 23; + if ((i % 85) > b[3]) + b[3] = i % 85; + if ((i % 192) > b[4]) + b[4] = i % 192; + } +} + +int +main () +{ + A<int> a[4][3][2]; + static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }}, + {{ 312, 381 }, { 295, 356 }, { 337, 335 }}, + {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }}, + {{ 0, 0 }, { 0, 0 }, { 0, 0 }}}; + A<int> (*p)[3][2] = &a[1]; + M<int> y[5] = { 0, 1, 1, 1, 0 }; + int y2[5] = { 0, 6561, 2401, 289, 0 }; + char z2[10] = { 48, 49, 50, 51, 0, 0, 0, 0, 0, 0 }; + B<long> w[1][2]; + foo (p, y + 1, w, 1, 3L, 4L, 3, 4, 2L, 5); + for (int i = 0; i < 4; i++) + for (int j = 0; j < 3; j++) + for (int k = 0; k < 2; k++) + if (a[i][j][k].t != a2[i][j][k]) + __builtin_abort (); + for (int i = 0; i < 5; i++) + if (y[i].t != y2[i]) + __builtin_abort (); + for (int i = 0; i < 10; i++) + if (z[i].t != z2[i]) + __builtin_abort (); + if (w[0][0].t != ~0x249249L || w[0][1].t != ~0x249249L) + __builtin_abort (); + S s; + s.foo (1, 3L, 4L, 3, 4, 2L, 5); + for (int i = 0; i < 9; i++) + if (s.a[i].t != (i < 4 ? 0x55555555ULL : 0)) + __builtin_abort (); + for (int i = 0; i < 4; i++) + for (int j = 0; j < 3; j++) + for (int k = 0; k < 2; k++) + if (a3[i][j][k].t != a2[i][j][k]) + __builtin_abort (); + for (int i = 0; i < 5; i++) + if (y3[i].t != y2[i]) + __builtin_abort (); + for (int i = 0; i < 10; i++) + if (s.z[i].t != z2[i]) + __builtin_abort (); + if (w3[0][0].t != ~0x249249L || w3[0][1].t != ~0x249249L) + __builtin_abort (); + if (s.b[0] != 78 || s.b[1] != 12 || s.b[2] != 22 + || s.b[3] != 84 || s.b[4] != 127) + __builtin_abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/reduction-9.C b/libgomp/testsuite/libgomp.c++/reduction-9.C new file mode 100644 index 00000000000..117a8f66c52 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/reduction-9.C @@ -0,0 +1,130 @@ +char z[10] = { 0 }; + +template <int N> +__attribute__((noinline, noclone)) void +foo (int (*&x)[3][N], int *y, long (&w)[1][N]) +{ + unsigned long long a[9] = {}; + short b[5] = {}; + #pragma omp parallel for reduction(+:x[0:N][:][0:N], z[:4]) \ + reduction(*:y[:3]) reduction(|:a[:4]) \ + reduction(&:w[0:][:N]) reduction(max:b) + for (int i = 0; i < 128; i++) + { + x[i / 64][i % 3][(i / 4) & 1] += i; + if ((i & 15) == 1) + y[0] *= 3; + if ((i & 31) == N) + y[1] *= 7; + if ((i & 63) == 3) + y[N] *= 17; + z[i / 32] += (i & 3); + if (i < 4) + z[i] += i; + a[i / 32] |= 1ULL << (i & 30); + w[0][i & 1] &= ~(1L << (i / 17 * 3)); + if ((i % 79) > b[0]) + b[0] = i % 79; + if ((i % 13) > b[1]) + b[1] = i % 13; + if ((i % 23) > b[N]) + b[N] = i % 23; + if ((i % 85) > b[3]) + b[3] = i % 85; + if ((i % 192) > b[4]) + b[4] = i % 192; + } + for (int i = 0; i < 9; i++) + if (a[i] != (i < 4 ? 0x55555555ULL : 0)) + __builtin_abort (); + if (b[0] != 78 || b[1] != 12 || b[N] != 22 || b[3] != 84 || b[4] != 127) + __builtin_abort (); +} + +int a3[4][3][2]; +int (*p3)[3][2] = &a3[1]; +int y3[5] = { 0, 1, 1, 1, 0 }; +long w3[1][2] = { ~0L, ~0L }; +short bb[5]; + +template <int N> +struct S +{ + int (*&x)[3][N]; + int *y; + long (&w)[1][N]; + char z[10]; + short (&b)[5]; + unsigned long long a[9]; + S() : x(p3), y(y3+1), w(w3), z(), a(), b(bb) {} + __attribute__((noinline, noclone)) void foo (); +}; + +template <int N> +void +S<N>::foo () +{ + #pragma omp parallel for reduction(+:x[0:N][:][0:N], z[:4]) \ + reduction(*:y[:3]) reduction(|:a[:4]) \ + reduction(&:w[0:][:N]) reduction(max:b) + for (int i = 0; i < 128; i++) + { + x[i / 64][i % 3][(i / 4) & 1] += i; + if ((i & 15) == 1) + y[0] *= 3; + if ((i & 31) == N) + y[1] *= 7; + if ((i & 63) == 3) + y[N] *= 17; + z[i / 32] += (i & 3); + if (i < 4) + z[i] += i; + a[i / 32] |= 1ULL << (i & 30); + w[0][i & 1] &= ~(1L << (i / 17 * 3)); + if ((i % 79) > b[0]) + b[0] = i % 79; + if ((i % 13) > b[1]) + b[1] = i % 13; + if ((i % 23) > b[N]) + b[N] = i % 23; + if ((i % 85) > b[3]) + b[3] = i % 85; + if ((i % 192) > b[4]) + b[4] = i % 192; + } +} + +int +main () +{ + int a[4][3][2] = {}; + static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }}, + {{ 312, 381 }, { 295, 356 }, { 337, 335 }}, + {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }}, + {{ 0, 0 }, { 0, 0 }, { 0, 0 }}}; + int (*p)[3][2] = &a[1]; + int y[5] = { 0, 1, 1, 1, 0 }; + int y2[5] = { 0, 6561, 2401, 289, 0 }; + char z2[10] = { 48, 49, 50, 51, 0, 0, 0, 0, 0, 0 }; + long w[1][2] = { ~0L, ~0L }; + foo<2> (p, y + 1, w); + if (__builtin_memcmp (a, a2, sizeof (a)) + || __builtin_memcmp (y, y2, sizeof (y)) + || __builtin_memcmp (z, z2, sizeof (z)) + || w[0][0] != ~0x249249L + || w[0][1] != ~0x249249L) + __builtin_abort (); + S<2> s; + s.foo (); + for (int i = 0; i < 9; i++) + if (s.a[i] != (i < 4 ? 0x55555555ULL : 0)) + __builtin_abort (); + if (__builtin_memcmp (a3, a2, sizeof (a3)) + || __builtin_memcmp (y3, y2, sizeof (y3)) + || __builtin_memcmp (s.z, z2, sizeof (s.z)) + || w3[0][0] != ~0x249249L + || w3[0][1] != ~0x249249L) + __builtin_abort (); + if (bb[0] != 78 || bb[1] != 12 || bb[2] != 22 || bb[3] != 84 || bb[4] != 127) + __builtin_abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/reference-1.C b/libgomp/testsuite/libgomp.c++/reference-1.C new file mode 100644 index 00000000000..f2a78614a13 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/reference-1.C @@ -0,0 +1,57 @@ +// { dg-do run } + +#include <omp.h> + +__attribute__((noinline, noclone)) void +foo (int &a, short &d, char &g) +{ + unsigned long b = 12; + unsigned long &c = b; + long long e = 21; + long long &f = e; + unsigned int h = 12; + unsigned int &k = h; + #pragma omp parallel default(none) private(a, c) firstprivate(d, f) shared(g, k) + { + int i = omp_get_thread_num (); + a = i; + c = 2 * i; + if (d != 27 || f != 21) + __builtin_abort (); + d = 3 * (i & 0xfff); + f = 4 * i; + #pragma omp barrier + if (a != i || c != 2 * i || d != 3 * (i & 0xfff) || f != 4 * i) + __builtin_abort (); + #pragma omp for lastprivate(g, k) + for (int j = 0; j < 32; j++) + { + g = j; + k = 3 * j; + } + } + if (g != 31 || k != 31 * 3) + __builtin_abort (); + #pragma omp parallel for firstprivate (g, k) lastprivate (g, k) + for (int j = 0; j < 32; j++) + { + if (g != 31 || k != 31 * 3) + __builtin_abort (); + if (j == 31) + { + g = 29; + k = 138; + } + } + if (g != 29 || k != 138) + __builtin_abort (); +} + +int +main () +{ + int a = 5; + short d = 27; + char g = ' '; + foo (a, d, g); +} diff --git a/libgomp/testsuite/libgomp.c++/simd14.C b/libgomp/testsuite/libgomp.c++/simd14.C new file mode 100644 index 00000000000..dc18cb619ac --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/simd14.C @@ -0,0 +1,43 @@ +// { dg-do run } +// { dg-options "-O2" } +// { dg-additional-options "-msse2" { target sse2_runtime } } +// { dg-additional-options "-mavx" { target avx_runtime } } + +int a[1024]; +short b[2048]; + +static inline void +bar (int &x, unsigned long long &y, short *&z) +{ + a[x] = x + y + *z; + x++; + y += 17; + z += 2; +} + +__attribute__((noinline, noclone)) int +foo (unsigned long long &s, short *&t) +{ + int i, j = 0; + int &r = j; +#pragma omp parallel for simd linear(r) linear(s:17ULL) linear(t:2) + for (i = 0; i < 1024; i++) + bar (r, s, t); + return j; +} + +int +main () +{ + int i; + for (i = 0; i < 2048; i++) + b[i] = 3 * i; + unsigned long long s = 12; + short *t = b; + int j = foo (s, t); + for (i = 0; i < 1024; i++) + if (a[i] != 12 + 24 * i) + __builtin_abort (); + if (j != 1024 || s != 12 + 1024 * 17ULL || t != &b[2048]) + __builtin_abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/target-10.C b/libgomp/testsuite/libgomp.c++/target-10.C new file mode 100644 index 00000000000..860773eed15 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-10.C @@ -0,0 +1,154 @@ +extern "C" void abort (void); +union U { int x; long long y; }; +struct T { int a; union U b; int c; }; +struct S { int s; int u; T v; int x[10]; union U w; int y[10]; int z[10]; }; +volatile int z; + +template <typename R> +void +foo () +{ + R s; + s.template s = 0; + s.u = 1; + s.v.a = 2; + s.v.b.y = 3LL; + s.v.c = 19; + s.w.x = 4; + s.template x[0] = 7; + s.x[1] = 8; + s.y[3] = 9; + s.y[4] = 10; + s.y[5] = 11; + int err = 0; + #pragma omp target map (to:s.template v.template b, s.u, s.x[0:z + 2]) \ + map (tofrom:s.y[3:3]) \ + map (from: s.w, s.template z[z + 1:z + 3], err) + { + err = 0; + if (s.u != 1 || s.v.b.y != 3LL || s.x[0] != 7 || s.x[1] != 8 + || s.y[3] != 9 || s.y[4] != 10 || s.y[5] != 11) + err = 1; + s.w.x = 6; + s.y[3] = 12; + s.y[4] = 13; + s.y[5] = 14; + s.z[1] = 15; + s.z[2] = 16; + s.z[3] = 17; + } + if (err || s.w.x != 6 || s.y[3] != 12 || s.y[4] != 13 || s.y[5] != 14 + || s.z[1] != 15 || s.z[2] != 16 || s.z[3] != 17) + abort (); + s.u++; + s.v.a++; + s.v.b.y++; + s.w.x++; + s.x[1] = 18; + s.z[0] = 19; + #pragma omp target data map (tofrom: s) + #pragma omp target map (always to: s.template w, s.x[1], err) map (alloc:s.u, s. template v.template b, s.z[z:z + 1]) + { + err = 0; + if (s.u != 2 || s.v.b.y != 4LL || s.w.x != 7 || s.x[1] != 18 || s.z[0] != 19) + err = 1; + s.w.x = 8; + s.x[1] = 20; + s.z[0] = 21; + } + if (err || s.w.x != 8 || s.x[1] != 20 || s.z[0] != 21) + abort (); + s.u++; + s.v.a++; + s.v.b.y++; + s.w.x++; + s.x[0] = 22; + s.x[1] = 23; + #pragma omp target data map (from: s.w, s.x[0:2]) map (to: s.v.b, s.u) + #pragma omp target map (always to: s.w, s.x[0:2], err) map (alloc:s.u, s.v.b) + { + err = 0; + if (s.u != 3 || s.v.b.y != 5LL || s.w.x != 9 || s.x[0] != 22 || s.x[1] != 23) + err = 1; + s.w.x = 11; + s.x[0] = 24; + s.x[1] = 25; + } + if (err || s.w.x != 11 || s.x[0] != 24 || s.x[1] != 25) + abort (); +} + +int +main () +{ + S s; + s.s = 0; + s.u = 1; + s.v.a = 2; + s.v.b.y = 3LL; + s.v.c = 19; + s.w.x = 4; + s.x[0] = 7; + s.x[1] = 8; + s.y[3] = 9; + s.y[4] = 10; + s.y[5] = 11; + int err = 0; + #pragma omp target map (to:s.v.b, s.u, s.x[0:z + 2]) \ + map (tofrom:s.y[3:3]) \ + map (from: s.w, s.z[z + 1:z + 3], err) + { + err = 0; + if (s.u != 1 || s.v.b.y != 3LL || s.x[0] != 7 || s.x[1] != 8 + || s.y[3] != 9 || s.y[4] != 10 || s.y[5] != 11) + err = 1; + s.w.x = 6; + s.y[3] = 12; + s.y[4] = 13; + s.y[5] = 14; + s.z[1] = 15; + s.z[2] = 16; + s.z[3] = 17; + } + if (err || s.w.x != 6 || s.y[3] != 12 || s.y[4] != 13 || s.y[5] != 14 + || s.z[1] != 15 || s.z[2] != 16 || s.z[3] != 17) + abort (); + s.u++; + s.v.a++; + s.v.b.y++; + s.w.x++; + s.x[1] = 18; + s.z[0] = 19; + #pragma omp target data map (tofrom: s) + #pragma omp target map (always to: s.w, s.x[1], err) map (alloc:s.u, s.v.b, s.z[z:z + 1]) + { + err = 0; + if (s.u != 2 || s.v.b.y != 4LL || s.w.x != 7 || s.x[1] != 18 || s.z[0] != 19) + err = 1; + s.w.x = 8; + s.x[1] = 20; + s.z[0] = 21; + } + if (err || s.w.x != 8 || s.x[1] != 20 || s.z[0] != 21) + abort (); + s.u++; + s.v.a++; + s.v.b.y++; + s.w.x++; + s.x[0] = 22; + s.x[1] = 23; + #pragma omp target data map (from: s.w, s.x[0:2]) map (to: s.v.b, s.u) + #pragma omp target map (always to: s.w, s.x[0:2], err) map (alloc:s.u, s.v.b) + { + err = 0; + if (s.u != 3 || s.v.b.y != 5LL || s.w.x != 9 || s.x[0] != 22 || s.x[1] != 23) + err = 1; + s.w.x = 11; + s.x[0] = 24; + s.x[1] = 25; + } + if (err || s.w.x != 11 || s.x[0] != 24 || s.x[1] != 25) + abort (); + foo <S> (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/target-11.C b/libgomp/testsuite/libgomp.c++/target-11.C new file mode 100644 index 00000000000..fe99603351d --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-11.C @@ -0,0 +1,121 @@ +extern "C" void abort (); +struct T { int a; int *b; int c; char (&d)[10]; }; +struct S { int *s; char *u; T v; short *w; short *&x; }; +volatile int z; + +template <typename A, typename B, typename C, typename D> +void +foo () +{ + A d[10]; + B *e; + C a[32], i; + A b[32]; + B c[32]; + for (i = 0; i < 32; i++) + { + a[i] = i; + b[i] = 32 + i; + c[i] = 64 + i; + } + for (i = 0; i < 10; i++) + d[i] = 17 + i; + e = c + 18; + D s = { a, b + 2, { 0, a + 16, 0, d }, c + 3, e }; + int err = 0; + #pragma omp target map (to:s.v.b[0:z + 7], s.template u[z + 1:z + 4]) \ + map (tofrom:s.s[3:3], s. template v. template d[z + 1:z + 3]) \ + map (from: s.w[z:4], s.x[1:3], err) private (i) + { + err = 0; + for (i = 0; i < 7; i++) + if (s.v.b[i] != 16 + i) + err = 1; + for (i = 1; i < 5; i++) + if (s.u[i] != 34 + i) + err = 1; + for (i = 3; i < 6; i++) + if (s.s[i] != i) + err = 1; + else + s.s[i] = 128 + i; + for (i = 1; i < 4; i++) + if (s.v.d[i] != 17 + i) + err = 1; + else + s.v.d[i] = 23 + i; + for (i = 0; i < 4; i++) + s.w[i] = 96 + i; + for (i = 1; i < 4; i++) + s.x[i] = 173 + i; + } + if (err) + abort (); + for (i = 0; i < 32; i++) + if (a[i] != ((i >= 3 && i < 6) ? 128 + i : i) + || b[i] != 32 + i + || c[i] != ((i >= 3 && i < 7) ? 93 + i : ((i >= 19 && i < 22) ? 155 + i : 64 + i))) + abort (); + for (i = 0; i < 10; i++) + if (d[i] != ((i >= 1 && i < 4) ? 23 + i : 17 + i)) + abort (); +} + +int +main () +{ + char d[10]; + short *e; + int a[32], i; + char b[32]; + short c[32]; + for (i = 0; i < 32; i++) + { + a[i] = i; + b[i] = 32 + i; + c[i] = 64 + i; + } + for (i = 0; i < 10; i++) + d[i] = 17 + i; + e = c + 18; + S s = { a, b + 2, { 0, a + 16, 0, d }, c + 3, e }; + int err = 0; + #pragma omp target map (to:s.v.b[0:z + 7], s.u[z + 1:z + 4]) \ + map (tofrom:s.s[3:3], s.v.d[z + 1:z + 3]) \ + map (from: s.w[z:4], s.x[1:3], err) private (i) + { + err = 0; + for (i = 0; i < 7; i++) + if (s.v.b[i] != 16 + i) + err = 1; + for (i = 1; i < 5; i++) + if (s.u[i] != 34 + i) + err = 1; + for (i = 3; i < 6; i++) + if (s.s[i] != i) + err = 1; + else + s.s[i] = 128 + i; + for (i = 1; i < 4; i++) + if (s.v.d[i] != 17 + i) + err = 1; + else + s.v.d[i] = 23 + i; + for (i = 0; i < 4; i++) + s.w[i] = 96 + i; + for (i = 1; i < 4; i++) + s.x[i] = 173 + i; + } + if (err) + abort (); + for (i = 0; i < 32; i++) + if (a[i] != ((i >= 3 && i < 6) ? 128 + i : i) + || b[i] != 32 + i + || c[i] != ((i >= 3 && i < 7) ? 93 + i : ((i >= 19 && i < 22) ? 155 + i : 64 + i))) + abort (); + for (i = 0; i < 10; i++) + if (d[i] != ((i >= 1 && i < 4) ? 23 + i : 17 + i)) + abort (); + foo <char, short, int, S> (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/target-12.C b/libgomp/testsuite/libgomp.c++/target-12.C new file mode 100644 index 00000000000..3b4ed57df68 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-12.C @@ -0,0 +1,93 @@ +extern "C" void abort (void); +struct S { int s; int *u; int v[5]; }; +volatile int z; + +template <typename T> +void +foo () +{ + int u[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, err = 0; + T s = { 9, u + 3, { 10, 11, 12, 13, 14 } }; + int *v = u + 4; + #pragma omp target enter data map (to: s.s, s.template u[0:5]) map (alloc: s.template v[1:3]) + s.s++; + u[3]++; + s.v[1]++; + #pragma omp target update to (s.template s) to (s.u[0:2], s.v[1:3]) + #pragma omp target map (alloc: s.s, s.v[1:3]) map (from: err) + { + err = 0; + if (s.s != 10 || s.v[1] != 12 || s.v[2] != 12 || s.v[3] != 13) + err = 1; + if (v[-1] != 4 || v[0] != 4 || v[1] != 5 || v[2] != 6 || v[3] != 7) + err = 1; + s.s++; + s.v[2] += 2; + v[-1] = 5; + v[3] = 9; + } + if (err) + abort (); + #pragma omp target map (alloc: s.u[0:5]) + { + err = 0; + if (s.u[0] != 5 || s.u[1] != 4 || s.u[2] != 5 || s.u[3] != 6 || s.u[4] != 9) + err = 1; + s.u[1] = 12; + } + #pragma omp target update from (s.s, s.u[0:5]) from (s.v[1:3]) + if (err || s.s != 11 || u[0] != 0 || u[1] != 1 || u[2] != 2 || u[3] != 5 + || u[4] != 12 || u[5] != 5 || u[6] != 6 || u[7] != 9 || u[8] != 8 + || u[9] != 9 || s.v[0] != 10 || s.v[1] != 12 || s.v[2] != 14 + || s.v[3] != 13 || s.v[4] != 14) + abort (); + #pragma omp target exit data map (release: s.s) + #pragma omp target exit data map (release: s.u[0:5]) + #pragma omp target exit data map (delete: s.v[1:3]) + #pragma omp target exit data map (release: s.s) +} + +int +main () +{ + int u[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, err = 0; + S s = { 9, u + 3, { 10, 11, 12, 13, 14 } }; + int *v = u + 4; + #pragma omp target enter data map (to: s.s, s.u[0:5]) map (alloc: s.v[1:3]) + s.s++; + u[3]++; + s.v[1]++; + #pragma omp target update to (s.s) to (s.u[0:2], s.v[1:3]) + #pragma omp target map (alloc: s.s, s.v[1:3]) map (from: err) + { + err = 0; + if (s.s != 10 || s.v[1] != 12 || s.v[2] != 12 || s.v[3] != 13) + err = 1; + if (v[-1] != 4 || v[0] != 4 || v[1] != 5 || v[2] != 6 || v[3] != 7) + err = 1; + s.s++; + s.v[2] += 2; + v[-1] = 5; + v[3] = 9; + } + if (err) + abort (); + #pragma omp target map (alloc: s.u[0:5]) + { + err = 0; + if (s.u[0] != 5 || s.u[1] != 4 || s.u[2] != 5 || s.u[3] != 6 || s.u[4] != 9) + err = 1; + s.u[1] = 12; + } + #pragma omp target update from (s.s, s.u[0:5]) from (s.v[1:3]) + if (err || s.s != 11 || u[0] != 0 || u[1] != 1 || u[2] != 2 || u[3] != 5 + || u[4] != 12 || u[5] != 5 || u[6] != 6 || u[7] != 9 || u[8] != 8 + || u[9] != 9 || s.v[0] != 10 || s.v[1] != 12 || s.v[2] != 14 + || s.v[3] != 13 || s.v[4] != 14) + abort (); + #pragma omp target exit data map (release: s.s) + #pragma omp target exit data map (release: s.u[0:5]) + #pragma omp target exit data map (always, delete: s.v[1:3]) + #pragma omp target exit data map (release: s.s) + #pragma omp target exit data map (always delete : s.v[1:3]) +} diff --git a/libgomp/testsuite/libgomp.c++/target-2.C b/libgomp/testsuite/libgomp.c++/target-2.C index 35e910acc2e..1eab7f29b4a 100644 --- a/libgomp/testsuite/libgomp.c++/target-2.C +++ b/libgomp/testsuite/libgomp.c++/target-2.C @@ -33,7 +33,8 @@ fn2 (int x, double (&dr) [1024], double *&er) int j; fn1 (hr + 2 * x, ir + 2 * x, x); #pragma omp target map(to: br[:x], cr[0:x], dr[x:x], er[x:x]) \ - map(to: fr[0:x], gr[0:x], hr[2 * x:x], ir[2 * x:x]) + map(to: fr[0:x], gr[0:x], hr[2 * x:x], ir[2 * x:x]) \ + map(tofrom: s) #pragma omp parallel for reduction(+:s) for (j = 0; j < x; j++) s += br[j] * cr[j] + dr[x + j] + er[x + j] diff --git a/libgomp/testsuite/libgomp.c++/target-5.C b/libgomp/testsuite/libgomp.c++/target-5.C new file mode 100644 index 00000000000..6639be394c6 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-5.C @@ -0,0 +1 @@ +#include "../libgomp.c/target-13.c" diff --git a/libgomp/testsuite/libgomp.c++/target-6.C b/libgomp/testsuite/libgomp.c++/target-6.C new file mode 100644 index 00000000000..8dbafb0437b --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-6.C @@ -0,0 +1,64 @@ +extern "C" void abort (void); +struct S { int s, t; }; + +void +foo (int &x, int &y, S &u, S &v, double &s, double &t) +{ + int err = 0, i; + int a[y - 2], b[y - 2]; + int (&c)[y - 2] = a, (&d)[y - 2] = b; + for (i = 0; i < y - 2; i++) + { + c[i] = i; + d[i] = 3 + i; + } + #pragma omp target private (x, u, s, c, i) firstprivate (y, v, t, d) map(from:err) + { + x = y; + u = v; + s = t; + for (i = 0; i < y - 2; i++) + c[i] = d[i]; + err = (x != 6 || y != 6 + || u.s != 9 || u.t != 10 || v.s != 9 || v.t != 10 + || s != 12.5 || t != 12.5); + for (i = 0; i < y - 2; i++) + if (d[i] != 3 + i || c[i] != 3 + i) + err = 1; + else + { + c[i] += 2 * i; + d[i] += i; + } + x += 1; + y += 2; + u.s += 3; + v.t += 4; + s += 2.5; + t += 3.0; + if (x != 7 || y != 8 + || u.s != 12 || u.t != 10 || v.s != 9 || v.t != 14 + || s != 15.0 || t != 15.5) + err = 1; + for (i = 0; i < y - 4; i++) + if (d[i] != 3 + 2 * i || c[i] != 3 + 3 * i) + err = 1; + } + if (err || x != 5 || y != 6 + || u.s != 7 || u.t != 8 || v.s != 9 || v.t != 10 + || s != 11.5 || t != 12.5) + abort (); + for (i = 0; i < y - 2; i++) + if (d[i] != 3 + i || c[i] != i) + abort (); +} + +int +main () +{ + int x = 5, y = 6; + S u = { 7, 8 }, v = { 9, 10 }; + double s = 11.5, t = 12.5; + foo (x, y, u, v, s, t); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/target-7.C b/libgomp/testsuite/libgomp.c++/target-7.C new file mode 100644 index 00000000000..e13c50f26da --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-7.C @@ -0,0 +1,90 @@ +extern "C" void abort (); + +void +foo (int *x, int *&y, int (&z)[15]) +{ + int a[10], b[15], err, i; + for (i = 0; i < 10; i++) + a[i] = 7 * i; + for (i = 0; i < 15; i++) + b[i] = 8 * i; + #pragma omp target map(to:x[5:10], y[5:10], z[5:10], a[0:10], b[5:10]) map(from:err) + { + err = 0; + for (i = 0; i < 10; i++) + if (x[5 + i] != 20 + 4 * i + || y[5 + i] != 25 + 5 * i + || z[5 + i] != 30 + 6 * i + || a[i] != 7 * i + || b[5 + i] != 40 + 8 * i) + err = 1; + } + if (err) + abort (); +} + +void +bar (int n, int v) +{ + int a[n], b[n], c[n], d[n], e[n], err, i; + int (*x)[n] = &c; + int (*y2)[n] = &d; + int (*&y)[n] = y2; + int (&z)[n] = e; + for (i = 0; i < n; i++) + { + (*x)[i] = 4 * i; + (*y)[i] = 5 * i; + z[i] = 6 * i; + a[i] = 7 * i; + b[i] = 8 * i; + } + #pragma omp target map(to:x[0][5:10], y[0][5:10], z[5:10], a[0:10], b[5:10]) map(from:err) + { + err = 0; + for (i = 0; i < 10; i++) + if ((*x)[5 + i] != 20 + 4 * i + || (*y)[5 + i] != 25 + 5 * i + || z[5 + i] != 30 + 6 * i + || a[i] != 7 * i + || b[5 + i] != 40 + 8 * i) + err = 1; + } + if (err) + abort (); + for (i = 0; i < n; i++) + { + (*x)[i] = 9 * i; + (*y)[i] = 10 * i; + z[i] = 11 * i; + a[i] = 12 * i; + b[i] = 13 * i; + } + #pragma omp target map(to:x[0][v:v+5], y[0][v:v+5], z[v:v+5], a[v-5:v+5], b[v:v+5]) map(from:err) + { + err = 0; + for (i = 0; i < 10; i++) + if ((*x)[5 + i] != 45 + 9 * i + || (*y)[5 + i] != 50 + 10 * i + || z[5 + i] != 55 + 11 * i + || a[i] != 12 * i + || b[5 + i] != 65 + 13 * i) + err = 1; + } + if (err) + abort (); +} + +int +main () +{ + int x[15], y2[15], z[15], *y = y2, i; + for (i = 0; i < 15; i++) + { + x[i] = 4 * i; + y[i] = 5 * i; + z[i] = 6 * i; + } + foo (x, y, z); + bar (15, 5); +} diff --git a/libgomp/testsuite/libgomp.c++/target-8.C b/libgomp/testsuite/libgomp.c++/target-8.C new file mode 100644 index 00000000000..d886b476754 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-8.C @@ -0,0 +1,58 @@ +extern "C" void abort (); +struct S { int a; }; +#ifdef __SIZEOF_INT128__ +typedef __int128 T; +#else +typedef long long int T; +#endif + +void +foo (T a, int b, struct S c) +{ + int err; + #pragma omp target firstprivate (a, b, c) map(from:err) + { + err = 0; + if (a != 131 || b != 276 || c.a != 59) + err = 1; + a = 936; + b = 27; + c.a = 98; + if (a != 936 || b != 27 || c.a != 98) + err = 1; + } + if (err || a != 131 || b != 276 || c.a != 59) + abort (); +} + +void +bar (T &a, int &b, struct S &c) +{ + int err; + #pragma omp target firstprivate (a, b, c) map(from:err) + { + err = 0; + if (a != 131 || b != 276 || c.a != 59) + err = 1; + a = 936; + b = 27; + c.a = 98; + if (a != 936 || b != 27 || c.a != 98) + err = 1; + } + if (err || a != 131 || b != 276 || c.a != 59) + abort (); +} + +int +main () +{ + T a = 131; + int b = 276; + struct S c; + c.a = 59; + foo (a, b, c); + bar (a, b, c); + if (a != 131 || b != 276 || c.a != 59) + abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/target-9.C b/libgomp/testsuite/libgomp.c++/target-9.C new file mode 100644 index 00000000000..a5d171b0b3d --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-9.C @@ -0,0 +1,73 @@ +extern "C" void abort (void); + +void +foo (int *&p, int (&s)[5], int n) +{ + int a[4] = { 7, 8, 9, 10 }, b[n], c[3] = { 20, 21, 22 }; + int *r = a + 1, *q = p - 1, i, err; + for (i = 0; i < n; i++) + b[i] = 9 + i; + #pragma omp target data map(to:a) + #pragma omp target data use_device_ptr(r) map(from:err) + #pragma omp target is_device_ptr(r) private(i) map(from:err) + { + err = 0; + for (i = 0; i < 4; i++) + if (r[i - 1] != 7 + i) + err = 1; + } + if (err) + abort (); + #pragma omp target data map(to:q[:4]) + #pragma omp target data use_device_ptr(p) map(from:err) + #pragma omp target is_device_ptr(p) private(i) map(from:err) + { + err = 0; + for (i = 0; i < 4; i++) + if (p[i - 1] != i) + err = 1; + } + if (err) + abort (); + #pragma omp target data map(to:b) + #pragma omp target data use_device_ptr(b) map(from:err) + #pragma omp target is_device_ptr(b) private(i) map(from:err) + { + err = 0; + for (i = 0; i < n; i++) + if (b[i] != 9 + i) + err = 1; + } + if (err) + abort (); + #pragma omp target data map(to:c) + #pragma omp target data use_device_ptr(c) map(from:err) + #pragma omp target is_device_ptr(c) private(i) map(from:err) + { + err = 0; + for (i = 0; i < 3; i++) + if (c[i] != 20 + i) + err = 1; + } + if (err) + abort (); + #pragma omp target data map(to:s[:5]) + #pragma omp target data use_device_ptr(s) map(from:err) + #pragma omp target is_device_ptr(s) private(i) map(from:err) + { + err = 0; + for (i = 0; i < 5; i++) + if (s[i] != 17 + i) + err = 1; + } + if (err) + abort (); +} + +int +main () +{ + int a[4] = { 0, 1, 2, 3 }, b[5] = { 17, 18, 19, 20, 21 }; + int *p = a + 1; + foo (p, b, 9); +} diff --git a/libgomp/testsuite/libgomp.c++/taskloop-1.C b/libgomp/testsuite/libgomp.c++/taskloop-1.C new file mode 100644 index 00000000000..66f8e0b1d7c --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/taskloop-1.C @@ -0,0 +1,4 @@ +// { dg-do run } +// { dg-options "-O2 -fopenmp" } + +#include "../libgomp.c/taskloop-1.c" diff --git a/libgomp/testsuite/libgomp.c++/taskloop-2.C b/libgomp/testsuite/libgomp.c++/taskloop-2.C new file mode 100644 index 00000000000..67a0e92717e --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/taskloop-2.C @@ -0,0 +1,6 @@ +// { dg-do run } +// { dg-options "-O2" } +// { dg-additional-options "-msse2" { target sse2_runtime } } +// { dg-additional-options "-mavx" { target avx_runtime } } + +#include "../libgomp.c/taskloop-2.c" diff --git a/libgomp/testsuite/libgomp.c++/taskloop-3.C b/libgomp/testsuite/libgomp.c++/taskloop-3.C new file mode 100644 index 00000000000..bfd793c1c58 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/taskloop-3.C @@ -0,0 +1,4 @@ +// { dg-do run } +// { dg-options "-O2 -fopenmp" } + +#include "../libgomp.c/taskloop-3.c" diff --git a/libgomp/testsuite/libgomp.c++/taskloop-4.C b/libgomp/testsuite/libgomp.c++/taskloop-4.C new file mode 100644 index 00000000000..937cfcc0029 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/taskloop-4.C @@ -0,0 +1,4 @@ +// { dg-do run } +// { dg-options "-O2 -fopenmp" } + +#include "../libgomp.c/taskloop-4.c" diff --git a/libgomp/testsuite/libgomp.c++/taskloop-5.C b/libgomp/testsuite/libgomp.c++/taskloop-5.C new file mode 100644 index 00000000000..eb464467b66 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/taskloop-5.C @@ -0,0 +1,73 @@ +#include <omp.h> + +__attribute__((noinline, noclone)) void +foo (int &b) +{ +#pragma omp parallel +#pragma omp single + { + bool f = false; + #pragma omp taskloop firstprivate (b, f) + for (int i = 0; i < 30; i++) + { + int q = omp_get_thread_num (); + if (!f) + { + if (b != 2) + __builtin_abort (); + } + else if (b != 8 * q) + __builtin_abort (); + b = 8 * q; + f = true; + } + } + int n; +#pragma omp parallel +#pragma omp single + { + bool f = false; + #pragma omp taskloop firstprivate (f) lastprivate (b, n) + for (int i = 0; i < 30; i++) + { + int q = omp_get_thread_num (); + if (f && b != 8 * q) + __builtin_abort (); + b = 8 * q; + n = q; + f = true; + } + } + if (b != 8 * n) + __builtin_abort (); + b = 9; +#pragma omp parallel +#pragma omp single + { + bool f = false; + #pragma omp taskloop firstprivate (b, f) lastprivate (b, n) + for (int i = 0; i < 30; i++) + { + int q = omp_get_thread_num (); + if (!f) + { + if (b != 9) + __builtin_abort (); + } + else if (b != 11 * q) + __builtin_abort (); + b = 11 * q; + n = q; + f = true; + } + } + if (b != 11 * n) + __builtin_abort (); +} + +int +main () +{ + int b = 2; + foo (b); +} diff --git a/libgomp/testsuite/libgomp.c++/taskloop-6.C b/libgomp/testsuite/libgomp.c++/taskloop-6.C new file mode 100644 index 00000000000..edf7f7a371b --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/taskloop-6.C @@ -0,0 +1,442 @@ +// { dg-do run } + +typedef __PTRDIFF_TYPE__ ptrdiff_t; +extern "C" void abort (); + +template <typename T> +class I +{ +public: + typedef ptrdiff_t difference_type; + I (); + ~I (); + I (T *); + I (const I &); + T &operator * (); + T *operator -> (); + T &operator [] (const difference_type &) const; + I &operator = (const I &); + I &operator ++ (); + I operator ++ (int); + I &operator -- (); + I operator -- (int); + I &operator += (const difference_type &); + I &operator -= (const difference_type &); + I operator + (const difference_type &) const; + I operator - (const difference_type &) const; + template <typename S> friend bool operator == (I<S> &, I<S> &); + template <typename S> friend bool operator == (const I<S> &, const I<S> &); + template <typename S> friend bool operator < (I<S> &, I<S> &); + template <typename S> friend bool operator < (const I<S> &, const I<S> &); + template <typename S> friend bool operator <= (I<S> &, I<S> &); + template <typename S> friend bool operator <= (const I<S> &, const I<S> &); + template <typename S> friend bool operator > (I<S> &, I<S> &); + template <typename S> friend bool operator > (const I<S> &, const I<S> &); + template <typename S> friend bool operator >= (I<S> &, I<S> &); + template <typename S> friend bool operator >= (const I<S> &, const I<S> &); + template <typename S> friend typename I<S>::difference_type operator - (I<S> &, I<S> &); + template <typename S> friend typename I<S>::difference_type operator - (const I<S> &, const I<S> &); + template <typename S> friend I<S> operator + (typename I<S>::difference_type , const I<S> &); +private: + T *p; +}; +template <typename T> I<T>::I () : p (0) {} +template <typename T> I<T>::~I () {} +template <typename T> I<T>::I (T *x) : p (x) {} +template <typename T> I<T>::I (const I &x) : p (x.p) {} +template <typename T> T &I<T>::operator * () { return *p; } +template <typename T> T *I<T>::operator -> () { return p; } +template <typename T> T &I<T>::operator [] (const difference_type &x) const { return p[x]; } +template <typename T> I<T> &I<T>::operator = (const I &x) { p = x.p; return *this; } +template <typename T> I<T> &I<T>::operator ++ () { ++p; return *this; } +template <typename T> I<T> I<T>::operator ++ (int) { return I (p++); } +template <typename T> I<T> &I<T>::operator -- () { --p; return *this; } +template <typename T> I<T> I<T>::operator -- (int) { return I (p--); } +template <typename T> I<T> &I<T>::operator += (const difference_type &x) { p += x; return *this; } +template <typename T> I<T> &I<T>::operator -= (const difference_type &x) { p -= x; return *this; } +template <typename T> I<T> I<T>::operator + (const difference_type &x) const { return I (p + x); } +template <typename T> I<T> I<T>::operator - (const difference_type &x) const { return I (p - x); } +template <typename T> bool operator == (I<T> &x, I<T> &y) { return x.p == y.p; } +template <typename T> bool operator == (const I<T> &x, const I<T> &y) { return x.p == y.p; } +template <typename T> bool operator != (I<T> &x, I<T> &y) { return !(x == y); } +template <typename T> bool operator != (const I<T> &x, const I<T> &y) { return !(x == y); } +template <typename T> bool operator < (I<T> &x, I<T> &y) { return x.p < y.p; } +template <typename T> bool operator < (const I<T> &x, const I<T> &y) { return x.p < y.p; } +template <typename T> bool operator <= (I<T> &x, I<T> &y) { return x.p <= y.p; } +template <typename T> bool operator <= (const I<T> &x, const I<T> &y) { return x.p <= y.p; } +template <typename T> bool operator > (I<T> &x, I<T> &y) { return x.p > y.p; } +template <typename T> bool operator > (const I<T> &x, const I<T> &y) { return x.p > y.p; } +template <typename T> bool operator >= (I<T> &x, I<T> &y) { return x.p >= y.p; } +template <typename T> bool operator >= (const I<T> &x, const I<T> &y) { return x.p >= y.p; } +template <typename T> typename I<T>::difference_type operator - (I<T> &x, I<T> &y) { return x.p - y.p; } +template <typename T> typename I<T>::difference_type operator - (const I<T> &x, const I<T> &y) { return x.p - y.p; } +template <typename T> I<T> operator + (typename I<T>::difference_type x, const I<T> &y) { return I<T> (x + y.p); } + +template <typename T> +class J +{ +public: + J(const I<T> &x, const I<T> &y) : b (x), e (y) {} + const I<T> &begin (); + const I<T> &end (); +private: + I<T> b, e; +}; + +template <typename T> const I<T> &J<T>::begin () { return b; } +template <typename T> const I<T> &J<T>::end () { return e; } + +int results[2000]; + +template <typename T> +void +baz (I<T> &i) +{ + if (*i < 0 || *i >= 2000) + abort (); + results[*i]++; +} + +void +f1 (const I<int> &x, const I<int> &y) +{ +#pragma omp parallel +#pragma omp single +#pragma omp taskloop num_tasks(22) + for (I<int> i = x; i <= y; i += 6) + baz (i); +} + +void +f2 (const I<int> &x, const I<int> &y) +{ + I<int> i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop grainsize(384) private(i) + for (i = x; i < y - 1; i = 1 - 6 + 7 + i) + baz (i); +} + +template <typename T> +void +f3 (const I<int> &x, const I<int> &y) +{ +#pragma omp parallel +#pragma omp single +#pragma omp taskloop default(none) firstprivate (x, y) + for (I<int> i = x; i <= y; i = i + 9 - 8) + baz (i); +} + +template <typename T> +void +f4 (const I<int> &x, const I<int> &y) +{ + I<int> i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop lastprivate(i) + for (i = x + 2000 - 64; i > y + 10; --i) + baz (i); +} + +void +f5 (const I<int> &x, const I<int> &y) +{ +#pragma omp parallel +#pragma omp single +#pragma omp taskloop + for (I<int> i = x + 2000 - 64; i > y + 10; i -= 10) + baz (i); +} + +template <int N> +void +f6 (const I<int> &x, const I<int> &y) +{ +#pragma omp parallel +#pragma omp single +#pragma omp taskloop + for (I<int> i = x + 2000 - 64; i > y + 10; i = i - 12 + 2) + { + I<int> j = i + N; + baz (j); + } +} + +template <int N> +void +f7 (I<int> i, const I<int> &x, const I<int> &y) +{ +#pragma omp parallel +#pragma omp single +#pragma omp taskloop default(none) firstprivate (x, y) + for (i = x - 10; i <= y + 10; i += N) + baz (i); +} + +template <int N> +void +f8 (J<int> j) +{ + I<int> i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop default(none) num_tasks(*I<int> (j.begin ())) firstprivate (j) + for (i = j.begin (); i <= j.end () + N; i += 2) + baz (i); +} + +template <typename T, int N> +void +f9 (const I<T> &x, const I<T> &y) +{ +#pragma omp parallel +#pragma omp single +#pragma omp taskloop grainsize(163) + for (I<T> i = x; i <= y; i = i + N) + baz (i); +} + +template <typename T, int N> +void +f10 (const I<T> &x, const I<T> &y) +{ + I<T> i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop + for (i = x; i > y; i = i + N) + baz (i); +} + +template <typename T> +void +f11 (const T &x, const T &y) +{ +#pragma omp parallel + { +#pragma omp single nowait +#pragma omp taskloop nogroup + for (T i = x; i <= y; i += 3) + baz (i); +#pragma omp single nowait + { + T j = y + 3; + baz (j); + } + } +} + +template <typename T> +void +f12 (const T &x, const T &y) +{ + T i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop + for (i = x; i > y; --i) + baz (i); +} + +template <int N> +struct K +{ + template <typename T> + static void + f13 (const T &x, const T &y) + { +#pragma omp parallel +#pragma omp single +#pragma omp taskloop + for (T i = x; i <= y + N; i += N) + baz (i); + } +}; + +I<int> +f14 (const I<int> &x, const I<int> &y) +{ + I<int> i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop lastprivate(i) + for (i = x; i < y - 1; i = 1 - 6 + 7 + i) + baz (i); + return i; +} + +template <typename T> +I<int> +f15 (const I<int> &x, const I<int> &y) +{ + I<int> i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop lastprivate(i) + for (i = x + 2000 - 64; i > y + 10; --i) + baz (i); + return i; +} + +template <int N> +I<int> +f16 (I<int> i, const I<int> &x, const I<int> &y) +{ +#pragma omp parallel +#pragma omp single +#pragma omp taskloop lastprivate(i) + for (i = x - 10; i <= y + 10; i += N) + baz (i); + return i; +} + +template <int N> +I<int> +f17 (J<int> j) +{ + static I<int> i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop lastprivate(i) + for (i = j.begin (); i <= j.end () + N; i += 2) + baz (i); + return i; +} + +template <typename T, int N> +I<T> +f18 (const I<T> &x, const I<T> &y) +{ + static I<T> i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop lastprivate(i) + for (i = x; i > y; i = i + N) + baz (i); + return i; +} + +template <typename T> +T +f19 (const T &x, const T &y) +{ + T i; +#pragma omp parallel + { +#pragma omp single nowait +#pragma omp taskloop nogroup lastprivate(i) + for (i = x; i <= y; i += 3) + baz (i); +#pragma omp single nowait + { + T j = y + 3; + baz (j); + } + } + return i; +} + +template <typename T> +T +f20 (const T &x, const T &y) +{ + T i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop lastprivate(i) + for (i = x; i > y; --i) + baz (i); + return i; +} + +#define check(expr) \ + for (int i = 0; i < 2000; i++) \ + if (expr) \ + { \ + if (results[i] != 1) \ + abort (); \ + results[i] = 0; \ + } \ + else if (results[i]) \ + abort () + +int +main () +{ + int a[2000]; + long b[2000]; + for (int i = 0; i < 2000; i++) + { + a[i] = i; + b[i] = i; + } + f1 (&a[10], &a[1990]); + check (i >= 10 && i <= 1990 && (i - 10) % 6 == 0); + f2 (&a[0], &a[1999]); + check (i < 1998 && (i & 1) == 0); + f3<char> (&a[20], &a[1837]); + check (i >= 20 && i <= 1837); + f4<int> (&a[0], &a[30]); + check (i > 40 && i <= 2000 - 64); + f5 (&a[0], &a[100]); + check (i >= 116 && i <= 2000 - 64 && (i - 116) % 10 == 0); + f6<-10> (&a[10], &a[110]); + check (i >= 116 && i <= 2000 - 64 && (i - 116) % 10 == 0); + f7<6> (I<int> (), &a[12], &a[1800]); + check (i >= 2 && i <= 1808 && (i - 2) % 6 == 0); + f8<121> (J<int> (&a[14], &a[1803])); + check (i >= 14 && i <= 1924 && (i & 1) == 0); + f9<int, 7> (&a[33], &a[1967]); + check (i >= 33 && i <= 1967 && (i - 33) % 7 == 0); + f10<int, -7> (&a[1939], &a[17]); + check (i >= 21 && i <= 1939 && (i - 21) % 7 == 0); + f11<I<int> > (&a[16], &a[1981]); + check (i >= 16 && i <= 1984 && (i - 16) % 3 == 0); + f12<I<int> > (&a[1761], &a[37]); + check (i > 37 && i <= 1761); + K<5>::f13<I<int> > (&a[1], &a[1935]); + check (i >= 1 && i <= 1936 && (i - 1) % 5 == 0); + if (f14 (&a[0], &a[1999]) != I<int>(&a[1998])) + abort (); + check (i < 1998 && (i & 1) == 0); + if (f15<int> (&a[0], &a[30]) != I<int>(&a[40])) + abort (); + check (i > 40 && i <= 2000 - 64); + if (f16<6> (I<int> (), &a[12], &a[1800]) != I<int>(&a[1814])) + abort (); + check (i >= 2 && i <= 1808 && (i - 2) % 6 == 0); + if (f17<121> (J<int> (&a[14], &a[1803])) != I<int>(&a[1926])) + abort (); + check (i >= 14 && i <= 1924 && (i & 1) == 0); + if (f18<int, -7> (&a[1939], &a[17]) != I<int>(&a[14])) + abort (); + check (i >= 21 && i <= 1939 && (i - 21) % 7 == 0); + if (f19<I<int> > (&a[16], &a[1981]) != I<int>(&a[1984])) + abort (); + check (i >= 16 && i <= 1984 && (i - 16) % 3 == 0); + if (f20<I<int> > (&a[1761], &a[37]) != I<int>(&a[37])) + abort (); + check (i > 37 && i <= 1761); + f9<long, 7> (&b[33], &b[1967]); + check (i >= 33 && i <= 1967 && (i - 33) % 7 == 0); + f10<long, -7> (&b[1939], &b[17]); + check (i >= 21 && i <= 1939 && (i - 21) % 7 == 0); + f11<I<long> > (&b[16], &b[1981]); + check (i >= 16 && i <= 1984 && (i - 16) % 3 == 0); + f12<I<long> > (&b[1761], &b[37]); + check (i > 37 && i <= 1761); + K<5>::f13<I<long> > (&b[1], &b[1935]); + check (i >= 1 && i <= 1936 && (i - 1) % 5 == 0); + if (f18<long, -7> (&b[1939], &b[17]) != I<long>(&b[14])) + abort (); + check (i >= 21 && i <= 1939 && (i - 21) % 7 == 0); + if (f19<I<long> > (&b[16], &b[1981]) != I<long>(&b[1984])) + abort (); + check (i >= 16 && i <= 1984 && (i - 16) % 3 == 0); + if (f20<I<long> > (&b[1761], &b[37]) != I<long>(&b[37])) + abort (); + check (i > 37 && i <= 1761); +} diff --git a/libgomp/testsuite/libgomp.c++/taskloop-7.C b/libgomp/testsuite/libgomp.c++/taskloop-7.C new file mode 100644 index 00000000000..b9a3c81e381 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/taskloop-7.C @@ -0,0 +1,400 @@ +// { dg-do run } + +#include <vector> +#include <cstdlib> + +template <typename T> +class J +{ +public: + typedef typename std::vector<T>::const_iterator const_iterator; + J(const const_iterator &x, const const_iterator &y) : b (x), e (y) {} + const const_iterator &begin (); + const const_iterator &end (); +private: + const_iterator b, e; +}; + +template <typename T> +const typename std::vector<T>::const_iterator &J<T>::begin () { return b; } +template <typename T> +const typename std::vector<T>::const_iterator &J<T>::end () { return e; } + +int results[2000]; + +template <typename T> +void +baz (T &i) +{ + if (*i < 0 || *i >= 2000) + std::abort (); + results[*i]++; +} + +void +f1 (const std::vector<int>::const_iterator &x, + const std::vector<int>::const_iterator &y) +{ +#pragma omp parallel +#pragma omp single +#pragma omp taskloop + for (std::vector<int>::const_iterator i = x; i <= y; i += 6) + baz (i); +} + +void +f2 (const std::vector<int>::const_iterator &x, + const std::vector<int>::const_iterator &y) +{ + std::vector<int>::const_iterator i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop private(i) + for (i = x; i < y - 1; i = 1 - 6 + 7 + i) + baz (i); +} + +template <typename T> +void +f3 (const std::vector<int>::const_iterator &x, + const std::vector<int>::const_iterator &y) +{ +#pragma omp parallel +#pragma omp single +#pragma omp taskloop + for (std::vector<int>::const_iterator i = x; i <= y; i = i + 9 - 8) + baz (i); +} + +template <typename T> +void +f4 (const std::vector<int>::const_iterator &x, + const std::vector<int>::const_iterator &y) +{ + std::vector<int>::const_iterator i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop lastprivate(i) + for (i = x + 2000 - 64; i > y + 10; --i) + baz (i); +} + +void +f5 (const std::vector<int>::const_iterator &x, + const std::vector<int>::const_iterator &y) +{ +#pragma omp parallel +#pragma omp single +#pragma omp taskloop + for (std::vector<int>::const_iterator i = x + 2000 - 64; i > y + 10; i -= 10) + baz (i); +} + +template <int N> +void +f6 (const std::vector<int>::const_iterator &x, + const std::vector<int>::const_iterator &y) +{ +#pragma omp parallel +#pragma omp single +#pragma omp taskloop + for (std::vector<int>::const_iterator i = x + 2000 - 64; + i > y + 10; i = i - 12 + 2) + { + std::vector<int>::const_iterator j = i + N; + baz (j); + } +} + +template <int N> +void +f7 (std::vector<int>::const_iterator i, + const std::vector<int>::const_iterator &x, + const std::vector<int>::const_iterator &y) +{ +#pragma omp parallel +#pragma omp single +#pragma omp taskloop + for (i = x - 10; i <= y + 10; i += N) + baz (i); +} + +template <int N> +void +f8 (J<int> j) +{ + std::vector<int>::const_iterator i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop + for (i = j.begin (); i <= j.end () + N; i += 2) + baz (i); +} + +template <typename T, int N> +void +f9 (const typename std::vector<T>::const_iterator &x, + const typename std::vector<T>::const_iterator &y) +{ +#pragma omp parallel +#pragma omp single +#pragma omp taskloop + for (typename std::vector<T>::const_iterator i = x; i <= y; i = i + N) + baz (i); +} + +template <typename T, int N> +void +f10 (const typename std::vector<T>::const_iterator &x, + const typename std::vector<T>::const_iterator &y) +{ + typename std::vector<T>::const_iterator i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop + for (i = x; i > y; i = i + N) + baz (i); +} + +template <typename T> +void +f11 (const T &x, const T &y) +{ +#pragma omp parallel + { +#pragma omp single nowait +#pragma omp taskloop nogroup + for (T i = x; i <= y; i += 3) + baz (i); +#pragma omp single nowait + { + T j = y + 3; + baz (j); + } + } +} + +template <typename T> +void +f12 (const T &x, const T &y) +{ + T i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop + for (i = x; i > y; --i) + baz (i); +} + +template <int N> +struct K +{ + template <typename T> + static void + f13 (const T &x, const T &y) + { +#pragma omp parallel +#pragma omp single +#pragma omp taskloop + for (T i = x; i <= y + N; i += N) + baz (i); + } +}; + +std::vector<int>::const_iterator +f14 (const std::vector<int>::const_iterator &x, + const std::vector<int>::const_iterator &y) +{ + std::vector<int>::const_iterator i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop lastprivate(i) + for (i = x; i < y - 1; i = 1 - 6 + 7 + i) + baz (i); + return i; +} + +template <typename T> +std::vector<int>::const_iterator +f15 (const std::vector<int>::const_iterator &x, + const std::vector<int>::const_iterator &y) +{ + std::vector<int>::const_iterator i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop lastprivate(i) + for (i = x + 2000 - 64; i > y + 10; --i) + baz (i); + return i; +} + +template <int N> +std::vector<int>::const_iterator +f16 (std::vector<int>::const_iterator i, + const std::vector<int>::const_iterator &x, + const std::vector<int>::const_iterator &y) +{ +#pragma omp parallel +#pragma omp single +#pragma omp taskloop lastprivate(i) + for (i = x - 10; i <= y + 10; i += N) + baz (i); + return i; +} + +template <int N> +std::vector<int>::const_iterator +f17 (J<int> j) +{ + static std::vector<int>::const_iterator i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop lastprivate(i) + for (i = j.begin (); i <= j.end () + N; i += 2) + baz (i); + return i; +} + +template <typename T, int N> +typename std::vector<T>::const_iterator +f18 (const typename std::vector<T>::const_iterator &x, + const typename std::vector<T>::const_iterator &y) +{ + static typename std::vector<T>::const_iterator i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop lastprivate(i) + for (i = x; i > y; i = i + N) + baz (i); + return i; +} + +template <typename T> +T +f19 (const T &x, const T &y) +{ + T i; +#pragma omp parallel + { +#pragma omp single nowait +#pragma omp taskloop nogroup lastprivate(i) + for (i = x; i <= y; i += 3) + baz (i); +#pragma omp single nowait + { + T j = y + 3; + baz (j); + } + } + return i; +} + +template <typename T> +T +f20 (const T &x, const T &y) +{ + T i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop lastprivate(i) + for (i = x; i > y; --i) + baz (i); + return i; +} + +#define check(expr) \ + for (int i = 0; i < 2000; i++) \ + if (expr) \ + { \ + if (results[i] != 1) \ + std::abort (); \ + results[i] = 0; \ + } \ + else if (results[i]) \ + std::abort () + +int +main () +{ + std::vector<int> a(2000); + std::vector<long> b(2000); + for (int i = 0; i < 2000; i++) + { + a[i] = i; + b[i] = i; + } + f1 (a.begin () + 10, a.begin () + 1990); + check (i >= 10 && i <= 1990 && (i - 10) % 6 == 0); + f2 (a.begin () + 0, a.begin () + 1999); + check (i < 1998 && (i & 1) == 0); + f3<char> (a.begin () + 20, a.begin () + 1837); + check (i >= 20 && i <= 1837); + f4<int> (a.begin () + 0, a.begin () + 30); + check (i > 40 && i <= 2000 - 64); + f5 (a.begin () + 0, a.begin () + 100); + check (i >= 116 && i <= 2000 - 64 && (i - 116) % 10 == 0); + f6<-10> (a.begin () + 10, a.begin () + 110); + check (i >= 116 && i <= 2000 - 64 && (i - 116) % 10 == 0); + f7<6> (std::vector<int>::const_iterator (), a.begin () + 12, + a.begin () + 1800); + check (i >= 2 && i <= 1808 && (i - 2) % 6 == 0); + f8<121> (J<int> (a.begin () + 14, a.begin () + 1803)); + check (i >= 14 && i <= 1924 && (i & 1) == 0); + f9<int, 7> (a.begin () + 33, a.begin () + 1967); + check (i >= 33 && i <= 1967 && (i - 33) % 7 == 0); + f10<int, -7> (a.begin () + 1939, a.begin () + 17); + check (i >= 21 && i <= 1939 && (i - 21) % 7 == 0); + f11<std::vector<int>::const_iterator > (a.begin () + 16, a.begin () + 1981); + check (i >= 16 && i <= 1984 && (i - 16) % 3 == 0); + f12<std::vector<int>::const_iterator > (a.begin () + 1761, a.begin () + 37); + check (i > 37 && i <= 1761); + K<5>::f13<std::vector<int>::const_iterator > (a.begin () + 1, + a.begin () + 1935); + check (i >= 1 && i <= 1936 && (i - 1) % 5 == 0); + if (f14 (a.begin () + 0, a.begin () + 1999) != a.begin () + 1998) + std::abort (); + check (i < 1998 && (i & 1) == 0); + if (f15<int> (a.begin () + 0, a.begin () + 30) != a.begin () + 40) + std::abort (); + check (i > 40 && i <= 2000 - 64); + if (f16<6> (std::vector<int>::const_iterator (), a.begin () + 12, + a.begin () + 1800) != a.begin () + 1814) + std::abort (); + check (i >= 2 && i <= 1808 && (i - 2) % 6 == 0); + if (f17<121> (J<int> (a.begin () + 14, a.begin () + 1803)) != a.begin () + 1926) + std::abort (); + check (i >= 14 && i <= 1924 && (i & 1) == 0); + if (f18<int, -7> (a.begin () + 1939, a.begin () + 17) != a.begin () + 14) + std::abort (); + check (i >= 21 && i <= 1939 && (i - 21) % 7 == 0); + if (f19<std::vector<int>::const_iterator > (a.begin () + 16, a.begin () + 1981) + != a.begin () + 1984) + std::abort (); + check (i >= 16 && i <= 1984 && (i - 16) % 3 == 0); + if (f20<std::vector<int>::const_iterator > (a.begin () + 1761, a.begin () + 37) + != a.begin () + 37) + std::abort (); + check (i > 37 && i <= 1761); + f9<long, 7> (b.begin () + 33, b.begin () + 1967); + check (i >= 33 && i <= 1967 && (i - 33) % 7 == 0); + f10<long, -7> (b.begin () + 1939, b.begin () + 17); + check (i >= 21 && i <= 1939 && (i - 21) % 7 == 0); + f11<std::vector<long>::const_iterator > (b.begin () + 16, b.begin () + 1981); + check (i >= 16 && i <= 1984 && (i - 16) % 3 == 0); + f12<std::vector<long>::const_iterator > (b.begin () + 1761, b.begin () + 37); + check (i > 37 && i <= 1761); + K<5>::f13<std::vector<long>::const_iterator > (b.begin () + 1, + b.begin () + 1935); + check (i >= 1 && i <= 1936 && (i - 1) % 5 == 0); + if (f18<long, -7> (b.begin () + 1939, b.begin () + 17) != b.begin () + 14) + std::abort (); + check (i >= 21 && i <= 1939 && (i - 21) % 7 == 0); + if (f19<std::vector<long>::const_iterator > (b.begin () + 16, b.begin () + 1981) + != b.begin () + 1984) + std::abort (); + check (i >= 16 && i <= 1984 && (i - 16) % 3 == 0); + if (f20<std::vector<long>::const_iterator > (b.begin () + 1761, b.begin () + 37) + != b.begin () + 37) + std::abort (); + check (i > 37 && i <= 1761); +} diff --git a/libgomp/testsuite/libgomp.c++/taskloop-8.C b/libgomp/testsuite/libgomp.c++/taskloop-8.C new file mode 100644 index 00000000000..d164907d1d6 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/taskloop-8.C @@ -0,0 +1,250 @@ +// { dg-do run } + +#include <string> +#include <cstdlib> + +template <typename T> +class J +{ +public: + typedef typename std::basic_string<T>::iterator iterator; + J(const iterator &x, const iterator &y) : b (x), e (y) {} + const iterator &begin (); + const iterator &end (); +private: + iterator b, e; +}; + +template <typename T> +const typename std::basic_string<T>::iterator &J<T>::begin () { return b; } +template <typename T> +const typename std::basic_string<T>::iterator &J<T>::end () { return e; } + +template <typename T> +void +baz (T &i) +{ + if (*i < L'a' || *i >= L'a' + 2000) + std::abort (); + (*i)++; +} + +void +f1 (const std::basic_string<wchar_t>::iterator &x, + const std::basic_string<wchar_t>::iterator &y) +{ +#pragma omp parallel +#pragma omp single +#pragma omp taskloop + for (std::basic_string<wchar_t>::iterator i = x; i <= y; i += 6) + baz (i); +} + +void +f2 (const std::basic_string<wchar_t>::iterator &x, + const std::basic_string<wchar_t>::iterator &y) +{ + std::basic_string<wchar_t>::iterator i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop private(i) + for (i = x; i < y - 1; i = 1 - 6 + 7 + i) + baz (i); +} + +template <typename T> +void +f3 (const std::basic_string<wchar_t>::iterator &x, + const std::basic_string<wchar_t>::iterator &y) +{ +#pragma omp parallel +#pragma omp single +#pragma omp taskloop + for (std::basic_string<wchar_t>::iterator i = x; i <= y; i = i + 9 - 8) + baz (i); +} + +template <typename T> +void +f4 (const std::basic_string<wchar_t>::iterator &x, + const std::basic_string<wchar_t>::iterator &y) +{ + std::basic_string<wchar_t>::iterator i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop lastprivate(i) + for (i = x + 2000 - 64; i > y + 10; --i) + baz (i); +} + +void +f5 (const std::basic_string<wchar_t>::iterator &x, + const std::basic_string<wchar_t>::iterator &y) +{ +#pragma omp parallel +#pragma omp single +#pragma omp taskloop + for (std::basic_string<wchar_t>::iterator i = x + 2000 - 64; + i > y + 10; i -= 10) + baz (i); +} + +template <int N> +void +f6 (const std::basic_string<wchar_t>::iterator &x, + const std::basic_string<wchar_t>::iterator &y) +{ +#pragma omp parallel +#pragma omp single +#pragma omp taskloop + for (std::basic_string<wchar_t>::iterator i = x + 2000 - 64; + i > y + 10; i = i - 12 + 2) + { + std::basic_string<wchar_t>::iterator j = i + N; + baz (j); + } +} + +template <int N> +void +f7 (std::basic_string<wchar_t>::iterator i, + const std::basic_string<wchar_t>::iterator &x, + const std::basic_string<wchar_t>::iterator &y) +{ +#pragma omp parallel +#pragma omp single +#pragma omp taskloop + for (i = x - 10; i <= y + 10; i += N) + baz (i); +} + +template <wchar_t N> +void +f8 (J<wchar_t> j) +{ + std::basic_string<wchar_t>::iterator i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop + for (i = j.begin (); i <= j.end () + N; i += 2) + baz (i); +} + +template <typename T, int N> +void +f9 (const typename std::basic_string<T>::iterator &x, + const typename std::basic_string<T>::iterator &y) +{ +#pragma omp parallel +#pragma omp single +#pragma omp taskloop + for (typename std::basic_string<T>::iterator i = x; i <= y; i = i + N) + baz (i); +} + +template <typename T, int N> +void +f10 (const typename std::basic_string<T>::iterator &x, + const typename std::basic_string<T>::iterator &y) +{ + typename std::basic_string<T>::iterator i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop + for (i = x; i > y; i = i + N) + baz (i); +} + +template <typename T> +void +f11 (const T &x, const T &y) +{ +#pragma omp parallel + { +#pragma omp single nowait +#pragma omp taskloop nogroup + for (T i = x; i <= y; i += 3) + baz (i); +#pragma omp single nowait + { + T j = y + 3; + baz (j); + } + } +} + +template <typename T> +void +f12 (const T &x, const T &y) +{ + T i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop private(i) + for (i = x; i > y; --i) + baz (i); +} + +template <int N> +struct K +{ + template <typename T> + static void + f13 (const T &x, const T &y) + { +#pragma omp parallel +#pragma omp single +#pragma omp taskloop + for (T i = x; i <= y + N; i += N) + baz (i); + } +}; + +#define check(expr) \ + for (int i = 0; i < 2000; i++) \ + if (expr) \ + { \ + if (a[i] != L'a' + i + 1) \ + std::abort (); \ + a[i] = L'a' + i; \ + } \ + else if (a[i] != L'a' + i) \ + std::abort () + +int +main () +{ + std::basic_string<wchar_t> a = L""; + for (int i = 0; i < 2000; i++) + a += L'a' + i; + f1 (a.begin () + 10, a.begin () + 1990); + check (i >= 10 && i <= 1990 && (i - 10) % 6 == 0); + f2 (a.begin () + 0, a.begin () + 1999); + check (i < 1998 && (i & 1) == 0); + f3<char> (a.begin () + 20, a.begin () + 1837); + check (i >= 20 && i <= 1837); + f4<int> (a.begin () + 0, a.begin () + 30); + check (i > 40 && i <= 2000 - 64); + f5 (a.begin () + 0, a.begin () + 100); + check (i >= 116 && i <= 2000 - 64 && (i - 116) % 10 == 0); + f6<-10> (a.begin () + 10, a.begin () + 110); + check (i >= 116 && i <= 2000 - 64 && (i - 116) % 10 == 0); + f7<6> (std::basic_string<wchar_t>::iterator (), a.begin () + 12, + a.begin () + 1800); + check (i >= 2 && i <= 1808 && (i - 2) % 6 == 0); + f8<121> (J<wchar_t> (a.begin () + 14, a.begin () + 1803)); + check (i >= 14 && i <= 1924 && (i & 1) == 0); + f9<wchar_t, 7> (a.begin () + 33, a.begin () + 1967); + check (i >= 33 && i <= 1967 && (i - 33) % 7 == 0); + f10<wchar_t, -7> (a.begin () + 1939, a.begin () + 17); + check (i >= 21 && i <= 1939 && (i - 21) % 7 == 0); + f11<std::basic_string<wchar_t>::iterator > (a.begin () + 16, + a.begin () + 1981); + check (i >= 16 && i <= 1984 && (i - 16) % 3 == 0); + f12<std::basic_string<wchar_t>::iterator > (a.begin () + 1761, + a.begin () + 37); + check (i > 37 && i <= 1761); + K<5>::f13<std::basic_string<wchar_t>::iterator > (a.begin () + 1, + a.begin () + 1935); + check (i >= 1 && i <= 1936 && (i - 1) % 5 == 0); +} diff --git a/libgomp/testsuite/libgomp.c++/taskloop-9.C b/libgomp/testsuite/libgomp.c++/taskloop-9.C new file mode 100644 index 00000000000..65abc31ff8d --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/taskloop-9.C @@ -0,0 +1,323 @@ +// { dg-do run } + +typedef __PTRDIFF_TYPE__ ptrdiff_t; +extern "C" void abort (); + +template <typename T> +class I +{ +public: + typedef ptrdiff_t difference_type; + I (); + ~I (); + I (T *); + I (const I &); + T &operator * (); + T *operator -> (); + T &operator [] (const difference_type &) const; + I &operator = (const I &); + I &operator ++ (); + I operator ++ (int); + I &operator -- (); + I operator -- (int); + I &operator += (const difference_type &); + I &operator -= (const difference_type &); + I operator + (const difference_type &) const; + I operator - (const difference_type &) const; + template <typename S> friend bool operator == (I<S> &, I<S> &); + template <typename S> friend bool operator == (const I<S> &, const I<S> &); + template <typename S> friend bool operator < (I<S> &, I<S> &); + template <typename S> friend bool operator < (const I<S> &, const I<S> &); + template <typename S> friend bool operator <= (I<S> &, I<S> &); + template <typename S> friend bool operator <= (const I<S> &, const I<S> &); + template <typename S> friend bool operator > (I<S> &, I<S> &); + template <typename S> friend bool operator > (const I<S> &, const I<S> &); + template <typename S> friend bool operator >= (I<S> &, I<S> &); + template <typename S> friend bool operator >= (const I<S> &, const I<S> &); + template <typename S> friend typename I<S>::difference_type operator - (I<S> &, I<S> &); + template <typename S> friend typename I<S>::difference_type operator - (const I<S> &, const I<S> &); + template <typename S> friend I<S> operator + (typename I<S>::difference_type , const I<S> &); +private: + T *p; +}; +template <typename T> I<T>::I () : p (0) {} +template <typename T> I<T>::~I () { p = (T *) 0; } +template <typename T> I<T>::I (T *x) : p (x) {} +template <typename T> I<T>::I (const I &x) : p (x.p) {} +template <typename T> T &I<T>::operator * () { return *p; } +template <typename T> T *I<T>::operator -> () { return p; } +template <typename T> T &I<T>::operator [] (const difference_type &x) const { return p[x]; } +template <typename T> I<T> &I<T>::operator = (const I &x) { p = x.p; return *this; } +template <typename T> I<T> &I<T>::operator ++ () { ++p; return *this; } +template <typename T> I<T> I<T>::operator ++ (int) { return I (p++); } +template <typename T> I<T> &I<T>::operator -- () { --p; return *this; } +template <typename T> I<T> I<T>::operator -- (int) { return I (p--); } +template <typename T> I<T> &I<T>::operator += (const difference_type &x) { p += x; return *this; } +template <typename T> I<T> &I<T>::operator -= (const difference_type &x) { p -= x; return *this; } +template <typename T> I<T> I<T>::operator + (const difference_type &x) const { return I (p + x); } +template <typename T> I<T> I<T>::operator - (const difference_type &x) const { return I (p - x); } +template <typename T> bool operator == (I<T> &x, I<T> &y) { return x.p == y.p; } +template <typename T> bool operator == (const I<T> &x, const I<T> &y) { return x.p == y.p; } +template <typename T> bool operator != (I<T> &x, I<T> &y) { return !(x == y); } +template <typename T> bool operator != (const I<T> &x, const I<T> &y) { return !(x == y); } +template <typename T> bool operator < (I<T> &x, I<T> &y) { return x.p < y.p; } +template <typename T> bool operator < (const I<T> &x, const I<T> &y) { return x.p < y.p; } +template <typename T> bool operator <= (I<T> &x, I<T> &y) { return x.p <= y.p; } +template <typename T> bool operator <= (const I<T> &x, const I<T> &y) { return x.p <= y.p; } +template <typename T> bool operator > (I<T> &x, I<T> &y) { return x.p > y.p; } +template <typename T> bool operator > (const I<T> &x, const I<T> &y) { return x.p > y.p; } +template <typename T> bool operator >= (I<T> &x, I<T> &y) { return x.p >= y.p; } +template <typename T> bool operator >= (const I<T> &x, const I<T> &y) { return x.p >= y.p; } +template <typename T> typename I<T>::difference_type operator - (I<T> &x, I<T> &y) { return x.p - y.p; } +template <typename T> typename I<T>::difference_type operator - (const I<T> &x, const I<T> &y) { return x.p - y.p; } +template <typename T> I<T> operator + (typename I<T>::difference_type x, const I<T> &y) { return I<T> (x + y.p); } + +template <typename T> +class J +{ +public: + J(const I<T> &x, const I<T> &y) : b (x), e (y) {} + const I<T> &begin (); + const I<T> &end (); +private: + I<T> b, e; +}; + +template <typename T> const I<T> &J<T>::begin () { return b; } +template <typename T> const I<T> &J<T>::end () { return e; } + +int results[2000]; + +template <typename T> +void +baz (I<T> &i) +{ + if (*i < 0 || *i >= 2000) + abort (); + results[*i]++; +} + +I<int> +f1 (const I<int> &x, const I<int> &y) +{ + I<int> i; +#pragma omp parallel shared (i) + { + #pragma omp single + #pragma omp taskloop lastprivate (i) + for (i = x; i < y - 1; ++i) + baz (i); + #pragma omp single + i += 3; + } + return I<int> (i); +} + +I<int> +f2 (const I<int> &x, const I<int> &y) +{ + I<int> i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop lastprivate (i) + for (i = x; i < y - 1; i = 1 - 6 + 7 + i) + baz (i); + return I<int> (i); +} + +template <typename T> +I<int> +f3 (const I<int> &x, const I<int> &y) +{ + I<int> i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop lastprivate (i) + for (i = x + 1000 - 64; i <= y - 10; i++) + baz (i); + return i; +} + +template <typename T> +I<int> +f4 (const I<int> &x, const I<int> &y) +{ + I<int> i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop lastprivate (i) + for (i = x + 2000 - 64; i > y + 10; --i) + baz (i); + return I<int> (i); +} + +template <typename T> +I<int> +f5 (const I<int> &x, const I<int> &y) +{ + I<int> i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop lastprivate (i) + for (i = x; i > y + T (6); i--) + baz (i); + return i; +} + +template <typename T> +I<int> +f6 (const I<int> &x, const I<int> &y) +{ + I<int> i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop lastprivate (i) + for (i = x - T (7); i > y; i -= T (2)) + baz (i); + return I<int> (i); +} + +template <int N> +I<int> +f7 (I<int> i, const I<int> &x, const I<int> &y) +{ +#pragma omp parallel +#pragma omp single +#pragma omp taskloop lastprivate (i) + for (i = x - 10; i <= y + 10; i += N) + baz (i); + return I<int> (i); +} + +template <int N> +I<int> +f8 (J<int> j) +{ + I<int> i; +#pragma omp parallel shared (i) + #pragma omp single + #pragma omp taskloop lastprivate (i) + for (i = j.begin (); i <= j.end () + N; i += 2) + baz (i); + return i; +} + +I<int> i9; + +template <long N> +I<int> & +f9 (J<int> j) +{ +#pragma omp parallel +#pragma omp single +#pragma omp taskloop lastprivate (i9) + for (i9 = j.begin () + N; i9 <= j.end () - N; i9 = i9 - N) + baz (i9); + return i9; +} + +template <typename T, int N> +I<T> +f10 (const I<T> &x, const I<T> &y) +{ + I<T> i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop lastprivate (i) + for (i = x; i > y; i = i + N) + baz (i); + return i; +} + +template <typename T, typename U> +T +f11 (T i, const T &x, const T &y) +{ +#pragma omp parallel + #pragma omp single + #pragma omp taskloop lastprivate (i) + for (i = x + U (2); i <= y + U (1); i = U (2) + U (3) + i) + baz (i); + return T (i); +} + +template <typename T> +T +f12 (const T &x, const T &y) +{ + T i; +#pragma omp parallel +#pragma omp single +#pragma omp taskloop lastprivate (i) + for (i = x; i > y; --i) + baz (i); + return i; +} + +#define check(expr) \ + for (int i = 0; i < 2000; i++) \ + if (expr) \ + { \ + if (results[i] != 1) \ + abort (); \ + results[i] = 0; \ + } \ + else if (results[i]) \ + abort () + +int +main () +{ + int a[2000]; + long b[2000]; + for (int i = 0; i < 2000; i++) + { + a[i] = i; + b[i] = i; + } + if (*f1 (&a[10], &a[1873]) != 1875) + abort (); + check (i >= 10 && i < 1872); + if (*f2 (&a[0], &a[1998]) != 1998) + abort (); + check (i < 1997 && (i & 1) == 0); + if (*f3<int> (&a[10], &a[1971]) != 1962) + abort (); + check (i >= 946 && i <= 1961); + if (*f4<int> (&a[0], &a[30]) != 40) + abort (); + check (i > 40 && i <= 2000 - 64); + if (*f5<short> (&a[1931], &a[17]) != 23) + abort (); + check (i > 23 && i <= 1931); + if (*f6<long> (&a[1931], &a[17]) != 16) + abort (); + check (i > 17 && i <= 1924 && (i & 1) == 0); + if (*f7<6> (I<int> (), &a[12], &a[1800]) != 1814) + abort (); + check (i >= 2 && i <= 1808 && (i - 2) % 6 == 0); + if (*f8<121> (J<int> (&a[14], &a[1803])) != 1926) + abort (); + check (i >= 14 && i <= 1924 && (i & 1) == 0); + if (*f9<-3L> (J<int> (&a[27], &a[1761])) != 1767) + abort (); + check (i >= 24 && i <= 1764 && (i % 3) == 0); + if (*f10<int, -7> (&a[1939], &a[17]) != 14) + abort (); + check (i >= 21 && i <= 1939 && i % 7 == 0); + if (*f11<I<int>, short> (I<int> (), &a[71], &a[1941]) != 1943) + abort (); + check (i >= 73 && i <= 1938 && (i - 73) % 5 == 0); + if (*f12<I<int> > (&a[1761], &a[37]) != 37) + abort (); + check (i > 37 && i <= 1761); + if (*f10<long, -7> (&b[1939], &b[17]) != 14) + abort (); + check (i >= 21 && i <= 1939 && i % 7 == 0); + if (*f11<I<long>, short> (I<long> (), &b[71], &b[1941]) != 1943) + abort (); + check (i >= 73 && i <= 1938 && (i - 73) % 5 == 0); + if (*f12<I<long> > (&b[1761], &b[37]) != 37) + abort (); + check (i > 37 && i <= 1761); +} diff --git a/libgomp/testsuite/libgomp.c/affinity-2.c b/libgomp/testsuite/libgomp.c/affinity-2.c new file mode 100644 index 00000000000..f8216574704 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/affinity-2.c @@ -0,0 +1,89 @@ +/* { dg-do run } */ +/* { dg-set-target-env-var OMP_PROC_BIND "spread,close" } */ +/* { dg-set-target-env-var OMP_PLACES "{6,7}:4:-2,!{2,3}" } */ +/* { dg-set-target-env-var OMP_NUM_THREADS "2" } */ + +#include <omp.h> +#include <stdlib.h> +#include <stdio.h> + +int * +get_buf (int nump) +{ + static int *buf; + static size_t buf_size; + if ((size_t) nump > buf_size) + { + buf_size *= 2; + if (nump > buf_size) + buf_size = nump + 64; + int *bufn = realloc (buf, buf_size * sizeof (int)); + if (bufn == NULL) + { + fprintf (stderr, "memory allocation error\n"); + exit (1); + } + buf = bufn; + } + return buf; +} + +void +print_place (int count, int *ids) +{ + int i, j; + printf ("{"); + for (i = 0; i < count; i++) + { + for (j = i + 1; j < count; j++) + if (ids[j] != ids[i] + (j - i)) + break; + if (i) + printf (","); + if (j == i + 1) + printf ("%d", ids[i]); + else + { + printf ("%d:%d", ids[i], j - i); + i = j - 1; + } + } + printf ("}\n"); +} + +void +print_place_var (void) +{ + int place = omp_get_place_num (); + int num_places = omp_get_partition_num_places (); + int *ids = get_buf (num_places); + omp_get_partition_place_nums (ids); + printf ("place %d\n", place); + if (num_places) + printf ("partition %d-%d\n", ids[0], ids[num_places - 1]); +} + +int +main () +{ + int i, num = omp_get_num_places (), nump, *ids; + printf ("omp_get_num_places () == %d\n", num); + for (i = 0; i < num; i++) + { + printf ("place %d ", i); + nump = omp_get_place_num_procs (i); + ids = get_buf (nump); + omp_get_place_proc_ids (i, ids); + print_place (nump, ids); + } + print_place_var (); + omp_set_nested (1); + #pragma omp parallel + if (omp_get_thread_num () == omp_get_num_threads () - 1) + { + #pragma omp parallel + if (omp_get_thread_num () == omp_get_num_threads () - 1) + print_place_var (); + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/doacross-1.c b/libgomp/testsuite/libgomp.c/doacross-1.c new file mode 100644 index 00000000000..0794c80ec2e --- /dev/null +++ b/libgomp/testsuite/libgomp.c/doacross-1.c @@ -0,0 +1,181 @@ +extern void abort (void); + +#define N 256 +int a[N], b[N / 16][8][4], c[N / 32][8][8]; +volatile int d, e; + +int +main () +{ + int i, j, k, l, m; + #pragma omp parallel private (l) + { + #pragma omp for schedule(static, 1) ordered (1) nowait + for (i = 0; i < N; i++) + { + #pragma omp atomic write + a[i] = 1; + #pragma omp ordered depend(sink: i - 1) + if (i) + { + #pragma omp atomic read + l = a[i - 1]; + if (l < 2) + abort (); + } + #pragma omp atomic write + a[i] = 2; + if (i < N - 1) + { + #pragma omp atomic read + l = a[i + 1]; + if (l == 3) + abort (); + } + #pragma omp ordered depend(source) + #pragma omp atomic write + a[i] = 3; + } + #pragma omp for schedule(static, 0) ordered (3) nowait + for (i = 2; i < N / 16 - 1; i++) + for (j = 0; j < 8; j += 2) + for (k = 1; k <= 3; k++) + { + #pragma omp atomic write + b[i][j][k] = 1; + #pragma omp ordered depend(sink: i, j - 2, k - 1) \ + depend(sink: i - 2, j - 2, k + 1) + #pragma omp ordered depend(sink: i - 3, j + 2, k - 2) + if (j >= 2 && k > 1) + { + #pragma omp atomic read + l = b[i][j - 2][k - 1]; + if (l < 2) + abort (); + } + #pragma omp atomic write + b[i][j][k] = 2; + if (i >= 4 && j >= 2 && k < 3) + { + #pragma omp atomic read + l = b[i - 2][j - 2][k + 1]; + if (l < 2) + abort (); + } + if (i >= 5 && j < N / 16 - 3 && k == 3) + { + #pragma omp atomic read + l = b[i - 3][j + 2][k - 2]; + if (l < 2) + abort (); + } + #pragma omp ordered depend(source) + #pragma omp atomic write + b[i][j][k] = 3; + } +#define A(n) int n; +#define B(n) A(n##0) A(n##1) A(n##2) A(n##3) +#define C(n) B(n##0) B(n##1) B(n##2) B(n##3) +#define D(n) C(n##0) C(n##1) C(n##2) C(n##3) + D(m) +#undef A + #pragma omp for collapse (2) ordered(61) schedule(dynamic, 15) + for (i = 0; i < N / 32; i++) + for (j = 7; j > 1; j--) + for (k = 6; k >= 0; k -= 2) +#define A(n) for (n = 4; n < 5; n++) + D(m) +#undef A + { + #pragma omp atomic write + c[i][j][k] = 1; +#define A(n) ,n +#define E(n) C(n##0) C(n##1) C(n##2) B(n##30) B(n##31) A(n##320) A(n##321) + #pragma omp ordered depend (sink: i, j, k + 2 E(m)) \ + depend (sink:i - 2, j + 1, k - 4 E(m)) \ + depend(sink: i - 1, j - 2, k - 2 E(m)) + if (k <= 4) + { + l = c[i][j][k + 2]; + if (l < 2) + abort (); + } + #pragma omp atomic write + c[i][j][k] = 2; + if (i >= 2 && j < 7 && k >= 4) + { + l = c[i - 2][j + 1][k - 4]; + if (l < 2) + abort (); + } + if (i >= 1 && j >= 4 && k >= 2) + { + l = c[i - 1][j - 2][k - 2]; + if (l < 2) + abort (); + } + #pragma omp ordered depend (source) + #pragma omp atomic write + c[i][j][k] = 3; + } + + #pragma omp for collapse(2) ordered(4) lastprivate (i, j, k) + for (i = 0; i < d + 1; i++) + for (j = d + 1; j >= 0; j--) + for (k = 0; k < d; k++) + for (l = 0; l < d + 2; l++) + { + #pragma omp ordered depend (source) + #pragma omp ordered depend (sink:i - 2, j + 2, k - 2, l) + if (!e) + abort (); + } + #pragma omp single + { + if (i != 1 || j != -1 || k != 0) + abort (); + i = 8; j = 9; k = 10; + } + #pragma omp for collapse(2) ordered(4) lastprivate (i, j, k, m) + for (i = 0; i < d + 1; i++) + for (j = d + 1; j >= 0; j--) + for (k = 0; k < d + 2; k++) + for (m = 0; m < d; m++) + { + #pragma omp ordered depend (source) + #pragma omp ordered depend (sink:i - 2, j + 2, k - 2, m) + abort (); + } + #pragma omp single + if (i != 1 || j != -1 || k != 2 || m != 0) + abort (); + #pragma omp for collapse(2) ordered(4) nowait + for (i = 0; i < d + 1; i++) + for (j = d; j > 0; j--) + for (k = 0; k < d + 2; k++) + for (l = 0; l < d + 4; l++) + { + #pragma omp ordered depend (source) + #pragma omp ordered depend (sink:i - 2, j + 2, k - 2, l) + if (!e) + abort (); + } + #pragma omp for nowait + for (i = 0; i < N; i++) + if (a[i] != 3) + abort (); + #pragma omp for collapse(2) private(k) nowait + for (i = 0; i < N / 16; i++) + for (j = 0; j < 8; j++) + for (k = 0; k < 4; k++) + if (b[i][j][k] != 3 * (i >= 2 && i < N / 16 - 1 && (j & 1) == 0 && k >= 1)) + abort (); + #pragma omp for collapse(3) nowait + for (i = 0; i < N / 32; i++) + for (j = 0; j < 8; j++) + for (k = 0; k < 8; k++) + if (c[i][j][k] != 3 * (j >= 2 && (k & 1) == 0)) + abort (); + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/doacross-2.c b/libgomp/testsuite/libgomp.c/doacross-2.c new file mode 100644 index 00000000000..e491bb22965 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/doacross-2.c @@ -0,0 +1,225 @@ +extern void abort (void); + +#define N 256 +int a[N], b[N / 16][8][4], c[N / 32][8][8], g[N / 16][8][6]; +volatile int d, e; +volatile unsigned long long f; + +int +main () +{ + unsigned long long i; + int j, k, l, m; + #pragma omp parallel private (l) + { + #pragma omp for schedule(static, 1) ordered (1) nowait + for (i = 1; i < N + f; i++) + { + #pragma omp atomic write + a[i] = 1; + #pragma omp ordered depend(sink: i - 1) + if (i > 1) + { + #pragma omp atomic read + l = a[i - 1]; + if (l < 2) + abort (); + } + #pragma omp atomic write + a[i] = 2; + if (i < N - 1) + { + #pragma omp atomic read + l = a[i + 1]; + if (l == 3) + abort (); + } + #pragma omp ordered depend(source) + #pragma omp atomic write + a[i] = 3; + } + #pragma omp for schedule(static, 0) ordered (3) nowait + for (i = 3; i < N / 16 - 1 + f; i++) + for (j = 0; j < 8; j += 2) + for (k = 1; k <= 3; k++) + { + #pragma omp atomic write + b[i][j][k] = 1; + #pragma omp ordered depend(sink: i, j - 2, k - 1) \ + depend(sink: i - 2, j - 2, k + 1) + #pragma omp ordered depend(sink: i - 3, j + 2, k - 2) + if (j >= 2 && k > 1) + { + #pragma omp atomic read + l = b[i][j - 2][k - 1]; + if (l < 2) + abort (); + } + #pragma omp atomic write + b[i][j][k] = 2; + if (i >= 5 && j >= 2 && k < 3) + { + #pragma omp atomic read + l = b[i - 2][j - 2][k + 1]; + if (l < 2) + abort (); + } + if (i >= 6 && j < N / 16 - 3 && k == 3) + { + #pragma omp atomic read + l = b[i - 3][j + 2][k - 2]; + if (l < 2) + abort (); + } + #pragma omp ordered depend(source) + #pragma omp atomic write + b[i][j][k] = 3; + } +#define A(n) int n; +#define B(n) A(n##0) A(n##1) A(n##2) A(n##3) +#define C(n) B(n##0) B(n##1) B(n##2) B(n##3) +#define D(n) C(n##0) C(n##1) C(n##2) C(n##3) + D(m) +#undef A + #pragma omp for collapse (2) ordered(61) schedule(dynamic, 15) + for (i = 2; i < N / 32 + f; i++) + for (j = 7; j > 1; j--) + for (k = 6; k >= 0; k -= 2) +#define A(n) for (n = 4; n < 5; n++) + D(m) +#undef A + { + #pragma omp atomic write + c[i][j][k] = 1; +#define A(n) ,n +#define E(n) C(n##0) C(n##1) C(n##2) B(n##30) B(n##31) A(n##320) A(n##321) + #pragma omp ordered depend (sink: i, j, k + 2 E(m)) \ + depend (sink:i - 2, j + 1, k - 4 E(m)) \ + depend(sink: i - 1, j - 2, k - 2 E(m)) + if (k <= 4) + { + l = c[i][j][k + 2]; + if (l < 2) + abort (); + } + #pragma omp atomic write + c[i][j][k] = 2; + if (i >= 4 && j < 7 && k >= 4) + { + l = c[i - 2][j + 1][k - 4]; + if (l < 2) + abort (); + } + if (i >= 3 && j >= 4 && k >= 2) + { + l = c[i - 1][j - 2][k - 2]; + if (l < 2) + abort (); + } + #pragma omp ordered depend (source) + #pragma omp atomic write + c[i][j][k] = 3; + } + #pragma omp for schedule(static, 0) ordered (3) nowait + for (j = 0; j < N / 16 - 1; j++) + for (k = 0; k < 8; k += 2) + for (i = 3; i <= 5 + f; i++) + { + #pragma omp atomic write + g[j][k][i] = 1; + #pragma omp ordered depend(sink: j, k - 2, i - 1) \ + depend(sink: j - 2, k - 2, i + 1) + #pragma omp ordered depend(sink: j - 3, k + 2, i - 2) + if (k >= 2 && i > 3) + { + #pragma omp atomic read + l = g[j][k - 2][i - 1]; + if (l < 2) + abort (); + } + #pragma omp atomic write + g[j][k][i] = 2; + if (j >= 2 && k >= 2 && i < 5) + { + #pragma omp atomic read + l = g[j - 2][k - 2][i + 1]; + if (l < 2) + abort (); + } + if (j >= 3 && k < N / 16 - 3 && i == 5) + { + #pragma omp atomic read + l = g[j - 3][k + 2][i - 2]; + if (l < 2) + abort (); + } + #pragma omp ordered depend(source) + #pragma omp atomic write + g[j][k][i] = 3; + } + #pragma omp for collapse(2) ordered(4) lastprivate (i, j, k) + for (i = 2; i < f + 3; i++) + for (j = d + 1; j >= 0; j--) + for (k = 0; k < d; k++) + for (l = 0; l < d + 2; l++) + { + #pragma omp ordered depend (source) + #pragma omp ordered depend (sink:i - 2, j + 2, k - 2, l) + if (!e) + abort (); + } + #pragma omp single + { + if (i != 3 || j != -1 || k != 0) + abort (); + i = 8; j = 9; k = 10; + } + #pragma omp for collapse(2) ordered(4) lastprivate (i, j, k, m) + for (i = 2; i < f + 3; i++) + for (j = d + 1; j >= 0; j--) + for (k = 0; k < d + 2; k++) + for (m = 0; m < d; m++) + { + #pragma omp ordered depend (source) + #pragma omp ordered depend (sink:i - 2, j + 2, k - 2, m) + abort (); + } + #pragma omp single + if (i != 3 || j != -1 || k != 2 || m != 0) + abort (); + #pragma omp for collapse(2) ordered(4) nowait + for (i = 2; i < f + 3; i++) + for (j = d; j > 0; j--) + for (k = 0; k < d + 2; k++) + for (l = 0; l < d + 4; l++) + { + #pragma omp ordered depend (source) + #pragma omp ordered depend (sink:i - 2, j + 2, k - 2, l) + if (!e) + abort (); + } + #pragma omp for nowait + for (i = 0; i < N; i++) + if (a[i] != 3 * (i >= 1)) + abort (); + #pragma omp for collapse(2) private(k) nowait + for (i = 0; i < N / 16; i++) + for (j = 0; j < 8; j++) + for (k = 0; k < 4; k++) + if (b[i][j][k] != 3 * (i >= 3 && i < N / 16 - 1 && (j & 1) == 0 && k >= 1)) + abort (); + #pragma omp for collapse(3) nowait + for (i = 0; i < N / 32; i++) + for (j = 0; j < 8; j++) + for (k = 0; k < 8; k++) + if (c[i][j][k] != 3 * (i >= 2 && j >= 2 && (k & 1) == 0)) + abort (); + #pragma omp for collapse(2) private(k) nowait + for (i = 0; i < N / 16; i++) + for (j = 0; j < 8; j++) + for (k = 0; k < 6; k++) + if (g[i][j][k] != 3 * (i < N / 16 - 1 && (j & 1) == 0 && k >= 3)) + abort (); + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/examples-4/declare_target-1.c b/libgomp/testsuite/libgomp.c/examples-4/declare_target-1.c index beca8555780..6d4bc4fac12 100644 --- a/libgomp/testsuite/libgomp.c/examples-4/declare_target-1.c +++ b/libgomp/testsuite/libgomp.c/examples-4/declare_target-1.c @@ -20,7 +20,7 @@ int fib_wrapper (int n) { int x = 0; - #pragma omp target if(n > THRESHOLD) + #pragma omp target if(n > THRESHOLD) map(from:x) x = fib (n); return x; diff --git a/libgomp/testsuite/libgomp.c/examples-4/declare_target-4.c b/libgomp/testsuite/libgomp.c/examples-4/declare_target-4.c index db70460b309..f2414366951 100644 --- a/libgomp/testsuite/libgomp.c/examples-4/declare_target-4.c +++ b/libgomp/testsuite/libgomp.c/examples-4/declare_target-4.c @@ -41,7 +41,7 @@ float accum (int k) int i; float tmp = 0.0; - #pragma omp target + #pragma omp target map(tofrom:tmp) #pragma omp parallel for reduction(+:tmp) for (i = 0; i < N; i++) tmp += Pfun (i, k); diff --git a/libgomp/testsuite/libgomp.c/examples-4/declare_target-5.c b/libgomp/testsuite/libgomp.c/examples-4/declare_target-5.c index b550f1ff540..33d6137afd5 100644 --- a/libgomp/testsuite/libgomp.c/examples-4/declare_target-5.c +++ b/libgomp/testsuite/libgomp.c/examples-4/declare_target-5.c @@ -48,7 +48,7 @@ float accum () int i, k; float tmp = 0.0; - #pragma omp target + #pragma omp target map(tofrom:tmp) #pragma omp parallel for reduction(+:tmp) for (i = 0; i < N; i++) { diff --git a/libgomp/testsuite/libgomp.c/examples-4/device-1.c b/libgomp/testsuite/libgomp.c/examples-4/device-1.c index f7c84fb4c14..dad8572f8f0 100644 --- a/libgomp/testsuite/libgomp.c/examples-4/device-1.c +++ b/libgomp/testsuite/libgomp.c/examples-4/device-1.c @@ -10,11 +10,11 @@ int main () int b = 0; int c, d; - #pragma omp target if(a > 200 && a < 400) + #pragma omp target if(a > 200 && a < 400) map(from: c) c = omp_is_initial_device (); #pragma omp target data map(to: b) if(a > 200 && a < 400) - #pragma omp target + #pragma omp target map(from: b, d) { b = 100; d = omp_is_initial_device (); @@ -26,11 +26,11 @@ int main () a += 200; b = 0; - #pragma omp target if(a > 200 && a < 400) + #pragma omp target if(a > 200 && a < 400) map(from: c) c = omp_is_initial_device (); #pragma omp target data map(to: b) if(a > 200 && a < 400) - #pragma omp target + #pragma omp target map(from: b, d) { b = 100; d = omp_is_initial_device (); @@ -42,11 +42,11 @@ int main () a += 200; b = 0; - #pragma omp target if(a > 200 && a < 400) + #pragma omp target if(a > 200 && a < 400) map(from: c) c = omp_is_initial_device (); #pragma omp target data map(to: b) if(a > 200 && a < 400) - #pragma omp target + #pragma omp target map(from: b, d) { b = 100; d = omp_is_initial_device (); diff --git a/libgomp/testsuite/libgomp.c/examples-4/device-3.c b/libgomp/testsuite/libgomp.c/examples-4/device-3.c index 8a0cf7c200d..af086533278 100644 --- a/libgomp/testsuite/libgomp.c/examples-4/device-3.c +++ b/libgomp/testsuite/libgomp.c/examples-4/device-3.c @@ -9,7 +9,7 @@ int main () int res; int default_device = omp_get_default_device (); - #pragma omp target + #pragma omp target map(from: res) res = omp_is_initial_device (); if (res) @@ -17,7 +17,7 @@ int main () omp_set_default_device (omp_get_num_devices ()); - #pragma omp target + #pragma omp target map(from: res) res = omp_is_initial_device (); if (!res) diff --git a/libgomp/testsuite/libgomp.c/examples-4/target_data-3.c b/libgomp/testsuite/libgomp.c/examples-4/target_data-3.c index abb283801f8..46b674013d0 100644 --- a/libgomp/testsuite/libgomp.c/examples-4/target_data-3.c +++ b/libgomp/testsuite/libgomp.c/examples-4/target_data-3.c @@ -47,7 +47,7 @@ void gramSchmidt (int Q[][COLS], const int rows, const int cols) { int tmp = 0; - #pragma omp target + #pragma omp target map(tofrom:tmp) #pragma omp parallel for reduction(+:tmp) for (i = 0; i < rows; i++) tmp += (Q[i][k] * Q[i][k]); diff --git a/libgomp/testsuite/libgomp.c/examples-4/teams-2.c b/libgomp/testsuite/libgomp.c/examples-4/teams-2.c index 8bbbc355b17..7d0a60ebb51 100644 --- a/libgomp/testsuite/libgomp.c/examples-4/teams-2.c +++ b/libgomp/testsuite/libgomp.c/examples-4/teams-2.c @@ -32,7 +32,7 @@ float dotprod (float B[], float C[], int n, int block_size, int i, i0; float sum = 0; - #pragma omp target map(to: B[0:n], C[0:n]) + #pragma omp target map(to: B[0:n], C[0:n]) map(tofrom: sum) #pragma omp teams num_teams(num_teams) thread_limit(block_threads) \ reduction(+:sum) #pragma omp distribute diff --git a/libgomp/testsuite/libgomp.c/examples-4/teams-3.c b/libgomp/testsuite/libgomp.c/examples-4/teams-3.c index b6708785884..5fe63a68a4b 100644 --- a/libgomp/testsuite/libgomp.c/examples-4/teams-3.c +++ b/libgomp/testsuite/libgomp.c/examples-4/teams-3.c @@ -31,7 +31,7 @@ float dotprod (float B[], float C[], int n) int i; float sum = 0; - #pragma omp target teams map(to: B[0:n], C[0:n]) + #pragma omp target teams map(to: B[0:n], C[0:n]) map(tofrom: sum) #pragma omp distribute parallel for reduction(+:sum) for (i = 0; i < n; i++) sum += B[i] * C[i]; diff --git a/libgomp/testsuite/libgomp.c/examples-4/teams-4.c b/libgomp/testsuite/libgomp.c/examples-4/teams-4.c index 9aef78ecfba..6136eabef66 100644 --- a/libgomp/testsuite/libgomp.c/examples-4/teams-4.c +++ b/libgomp/testsuite/libgomp.c/examples-4/teams-4.c @@ -31,7 +31,7 @@ float dotprod (float B[], float C[], int n) int i; float sum = 0; - #pragma omp target map(to: B[0:n], C[0:n]) + #pragma omp target map(to: B[0:n], C[0:n]) map(tofrom:sum) #pragma omp teams num_teams(8) thread_limit(16) #pragma omp distribute parallel for reduction(+:sum) \ dist_schedule(static, 1024) \ diff --git a/libgomp/testsuite/libgomp.c/for-2.h b/libgomp/testsuite/libgomp.c/for-2.h index 920d23b5202..0bd116c5aec 100644 --- a/libgomp/testsuite/libgomp.c/for-2.h +++ b/libgomp/testsuite/libgomp.c/for-2.h @@ -11,11 +11,21 @@ noreturn (void) #ifndef SC #define SC #endif +#ifndef OMPTGT +#define OMPTGT +#endif +#ifndef OMPTO +#define OMPTO(v) do {} while (0) +#endif +#ifndef OMPFROM +#define OMPFROM(v) do {} while (0) +#endif __attribute__((noinline, noclone)) void N(f0) (void) { int i; + OMPTGT #pragma omp F S for (i = 0; i < 1500; i++) a[i] += 2; @@ -24,6 +34,7 @@ N(f0) (void) __attribute__((noinline, noclone)) void N(f1) (void) { + OMPTGT #pragma omp F S for (unsigned int i = __INT_MAX__; i < 3000U + __INT_MAX__; i += 2) a[(i - __INT_MAX__) >> 1] -= 2; @@ -33,6 +44,7 @@ __attribute__((noinline, noclone)) void N(f2) (void) { unsigned long long i; + OMPTGT #pragma omp F S for (i = __LONG_LONG_MAX__ + 4500ULL - 27; i > __LONG_LONG_MAX__ - 27ULL; i -= 3) @@ -42,6 +54,7 @@ N(f2) (void) __attribute__((noinline, noclone)) void N(f3) (long long n1, long long n2, long long s3) { + OMPTGT #pragma omp F S for (long long i = n1 + 23; i > n2 - 25; i -= s3) a[i + 48] += 7; @@ -51,6 +64,7 @@ __attribute__((noinline, noclone)) void N(f4) (void) { unsigned int i; + OMPTGT #pragma omp F S for (i = 30; i < 20; i += 2) a[i] += 10; @@ -61,6 +75,7 @@ N(f5) (int n11, int n12, int n21, int n22, int n31, int n32, int s1, int s2, int s3) { SC int v1, v2, v3; + OMPTGT #pragma omp F S collapse(3) for (v1 = n11; v1 < n12; v1 += s1) for (v2 = n21; v2 < n22; v2 += s2) @@ -74,6 +89,7 @@ N(f6) (int n11, int n12, int n21, int n22, long long n31, long long n32, { SC int v1, v2; SC long long v3; + OMPTGT #pragma omp F S collapse(3) for (v1 = n11; v1 > n12; v1 += s1) for (v2 = n21; v2 > n22; v2 += s2) @@ -86,6 +102,7 @@ N(f7) (void) { SC unsigned int v1, v3; SC unsigned long long v2; + OMPTGT #pragma omp F S collapse(3) for (v1 = 0; v1 < 20; v1 += 2) for (v2 = __LONG_LONG_MAX__ + 16ULL; @@ -98,6 +115,7 @@ __attribute__((noinline, noclone)) void N(f8) (void) { SC long long v1, v2, v3; + OMPTGT #pragma omp F S collapse(3) for (v1 = 0; v1 < 20; v1 += 2) for (v2 = 30; v2 < 20; v2++) @@ -109,6 +127,7 @@ __attribute__((noinline, noclone)) void N(f9) (void) { int i; + OMPTGT #pragma omp F S for (i = 20; i < 10; i++) { @@ -122,6 +141,7 @@ __attribute__((noinline, noclone)) void N(f10) (void) { SC int i; + OMPTGT #pragma omp F S collapse(3) for (i = 0; i < 10; i++) for (int j = 10; j < 8; j++) @@ -137,6 +157,7 @@ __attribute__((noinline, noclone)) void N(f11) (int n) { int i; + OMPTGT #pragma omp F S for (i = 20; i < n; i++) { @@ -150,6 +171,7 @@ __attribute__((noinline, noclone)) void N(f12) (int n) { SC int i; + OMPTGT #pragma omp F S collapse(3) for (i = 0; i < 10; i++) for (int j = n; j < 8; j++) @@ -165,6 +187,7 @@ __attribute__((noinline, noclone)) void N(f13) (void) { int *i; + OMPTGT #pragma omp F S for (i = a; i < &a[1500]; i++) i[0] += 2; @@ -174,6 +197,7 @@ __attribute__((noinline, noclone)) void N(f14) (void) { SC float *i; + OMPTGT #pragma omp F S collapse(3) for (i = &b[0][0][0]; i < &b[0][0][10]; i++) for (float *j = &b[0][15][0]; j > &b[0][0][0]; j -= 10) @@ -188,27 +212,34 @@ N(test) (void) int i, j, k; for (i = 0; i < 1500; i++) a[i] = i - 25; + OMPTO (a); N(f0) (); + OMPFROM (a); for (i = 0; i < 1500; i++) if (a[i] != i - 23) return 1; N(f1) (); + OMPFROM (a); for (i = 0; i < 1500; i++) if (a[i] != i - 25) return 1; N(f2) (); + OMPFROM (a); for (i = 0; i < 1500; i++) if (a[i] != i - 29) return 1; N(f3) (1500LL - 1 - 23 - 48, -1LL + 25 - 48, 1LL); + OMPFROM (a); for (i = 0; i < 1500; i++) if (a[i] != i - 22) return 1; N(f3) (1500LL - 1 - 23 - 48, 1500LL - 1, 7LL); + OMPFROM (a); for (i = 0; i < 1500; i++) if (a[i] != i - 22) return 1; N(f4) (); + OMPFROM (a); for (i = 0; i < 1500; i++) if (a[i] != i - 22) return 1; @@ -216,31 +247,37 @@ N(test) (void) for (j = 0; j < 15; j++) for (k = 0; k < 10; k++) b[i][j][k] = i - 2.5 + 1.5 * j - 1.5 * k; + OMPTO (b); N(f5) (0, 10, 0, 15, 0, 10, 1, 1, 1); + OMPFROM (b); for (i = 0; i < 10; i++) for (j = 0; j < 15; j++) for (k = 0; k < 10; k++) if (b[i][j][k] != i + 1.5 * j - 1.5 * k) return 1; N(f5) (0, 10, 30, 15, 0, 10, 4, 5, 6); + OMPFROM (b); for (i = 0; i < 10; i++) for (j = 0; j < 15; j++) for (k = 0; k < 10; k++) if (b[i][j][k] != i + 1.5 * j - 1.5 * k) return 1; N(f6) (9, -1, 29, 0, 9, -1, -1, -2, -1); + OMPFROM (b); for (i = 0; i < 10; i++) for (j = 0; j < 15; j++) for (k = 0; k < 10; k++) if (b[i][j][k] != i - 4.5 + 1.5 * j - 1.5 * k) return 1; N(f7) (); + OMPFROM (b); for (i = 0; i < 10; i++) for (j = 0; j < 15; j++) for (k = 0; k < 10; k++) if (b[i][j][k] != i + 1.0 + 1.5 * j - 1.5 * k) return 1; N(f8) (); + OMPFROM (b); for (i = 0; i < 10; i++) for (j = 0; j < 15; j++) for (k = 0; k < 10; k++) @@ -250,6 +287,8 @@ N(test) (void) N(f10) (); N(f11) (10); N(f12) (12); + OMPFROM (a); + OMPFROM (b); for (i = 0; i < 1500; i++) if (a[i] != i - 22) return 1; @@ -260,6 +299,8 @@ N(test) (void) return 1; N(f13) (); N(f14) (); + OMPFROM (a); + OMPFROM (b); for (i = 0; i < 1500; i++) if (a[i] != i - 20) return 1; diff --git a/libgomp/testsuite/libgomp.c/for-4.c b/libgomp/testsuite/libgomp.c/for-4.c new file mode 100644 index 00000000000..ef5465e1e76 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/for-4.c @@ -0,0 +1,42 @@ +/* { dg-options "-std=gnu99 -fopenmp" } */ + +extern void abort (void); + +#define M(x, y, z) O(x, y, z) +#define O(x, y, z) x ## _ ## y ## _ ## z + +#define F taskloop +#define G taskloop +#define S +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F taskloop simd +#define G taskloop_simd +#define S +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +int +main () +{ + int err = 0; + #pragma omp parallel reduction(|:err) + #pragma omp single + { + if (test_taskloop_normal () + || test_taskloop_simd_normal ()) + err = 1; + } + if (err) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/for-5.c b/libgomp/testsuite/libgomp.c/for-5.c new file mode 100644 index 00000000000..84e636ab0f9 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/for-5.c @@ -0,0 +1,154 @@ +/* { dg-additional-options "-std=gnu99" } */ + +extern void abort (); + +#define M(x, y, z) O(x, y, z) +#define O(x, y, z) x ## _ ## y ## _ ## z + +#pragma omp declare target + +#define F for +#define G f +#define S +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +#pragma omp end declare target + +#undef OMPFROM +#undef OMPTO +#define DO_PRAGMA(x) _Pragma (#x) +#define OMPFROM(v) DO_PRAGMA (omp target update from(v)) +#define OMPTO(v) DO_PRAGMA (omp target update to(v)) + +#define F target parallel for +#define G tpf +#include "for-1.h" +#undef F +#undef G + +#define F target simd +#define G t_simd +#define S +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F target parallel for simd +#define G tpf_simd +#include "for-1.h" +#undef F +#undef G + +#define F target teams distribute +#define G ttd +#define S +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F target teams distribute +#define G ttd_ds128 +#define S dist_schedule(static, 128) +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F target teams distribute simd +#define G ttds +#define S +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F target teams distribute simd +#define G ttds_ds128 +#define S dist_schedule(static, 128) +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F target teams distribute parallel for +#define G ttdpf +#include "for-1.h" +#undef F +#undef G + +#define F target teams distribute parallel for dist_schedule(static, 128) +#define G ttdpf_ds128 +#include "for-1.h" +#undef F +#undef G + +#define F target teams distribute parallel for simd +#define G ttdpfs +#include "for-1.h" +#undef F +#undef G + +#define F target teams distribute parallel for simd dist_schedule(static, 128) +#define G ttdpfs_ds128 +#include "for-1.h" +#undef F +#undef G + +int +main () +{ + if (test_tpf_static () + || test_tpf_static32 () + || test_tpf_auto () + || test_tpf_guided32 () + || test_tpf_runtime () + || test_t_simd_normal () + || test_tpf_simd_static () + || test_tpf_simd_static32 () + || test_tpf_simd_auto () + || test_tpf_simd_guided32 () + || test_tpf_simd_runtime () + || test_ttd_normal () + || test_ttd_ds128_normal () + || test_ttds_normal () + || test_ttds_ds128_normal () + || test_ttdpf_static () + || test_ttdpf_static32 () + || test_ttdpf_auto () + || test_ttdpf_guided32 () + || test_ttdpf_runtime () + || test_ttdpf_ds128_static () + || test_ttdpf_ds128_static32 () + || test_ttdpf_ds128_auto () + || test_ttdpf_ds128_guided32 () + || test_ttdpf_ds128_runtime () + || test_ttdpfs_static () + || test_ttdpfs_static32 () + || test_ttdpfs_auto () + || test_ttdpfs_guided32 () + || test_ttdpfs_runtime () + || test_ttdpfs_ds128_static () + || test_ttdpfs_ds128_static32 () + || test_ttdpfs_ds128_auto () + || test_ttdpfs_ds128_guided32 () + || test_ttdpfs_ds128_runtime ()) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/for-6.c b/libgomp/testsuite/libgomp.c/for-6.c new file mode 100644 index 00000000000..7f3c65e82b1 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/for-6.c @@ -0,0 +1,123 @@ +/* { dg-additional-options "-std=gnu99" } */ + +extern void abort (); + +#define M(x, y, z) O(x, y, z) +#define O(x, y, z) x ## _ ## y ## _ ## z + +#pragma omp declare target + +#define F for +#define G f +#define S +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +#pragma omp end declare target + +#undef OMPTGT +#undef OMPFROM +#undef OMPTO +#define DO_PRAGMA(x) _Pragma (#x) +#define OMPTGT DO_PRAGMA (omp target) +#define OMPFROM(v) DO_PRAGMA (omp target update from(v)) +#define OMPTO(v) DO_PRAGMA (omp target update to(v)) + +#define F teams distribute +#define G td +#define S +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F teams distribute +#define G td_ds128 +#define S dist_schedule(static, 128) +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F teams distribute simd +#define G tds +#define S +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F teams distribute simd +#define G tds_ds128 +#define S dist_schedule(static, 128) +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F teams distribute parallel for +#define G tdpf +#include "for-1.h" +#undef F +#undef G + +#define F teams distribute parallel for dist_schedule(static, 128) +#define G tdpf_ds128 +#include "for-1.h" +#undef F +#undef G + +#define F teams distribute parallel for simd +#define G tdpfs +#include "for-1.h" +#undef F +#undef G + +#define F teams distribute parallel for simd dist_schedule(static, 128) +#define G tdpfs_ds128 +#include "for-1.h" +#undef F +#undef G + +int +main () +{ + if (test_td_normal () + || test_td_ds128_normal () + || test_tds_normal () + || test_tds_ds128_normal () + || test_tdpf_static () + || test_tdpf_static32 () + || test_tdpf_auto () + || test_tdpf_guided32 () + || test_tdpf_runtime () + || test_tdpf_ds128_static () + || test_tdpf_ds128_static32 () + || test_tdpf_ds128_auto () + || test_tdpf_ds128_guided32 () + || test_tdpf_ds128_runtime () + || test_tdpfs_static () + || test_tdpfs_static32 () + || test_tdpfs_auto () + || test_tdpfs_guided32 () + || test_tdpfs_runtime () + || test_tdpfs_ds128_static () + || test_tdpfs_ds128_static32 () + || test_tdpfs_ds128_auto () + || test_tdpfs_ds128_guided32 () + || test_tdpfs_ds128_runtime ()) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/linear-1.c b/libgomp/testsuite/libgomp.c/linear-1.c new file mode 100644 index 00000000000..f86fb33c5da --- /dev/null +++ b/libgomp/testsuite/libgomp.c/linear-1.c @@ -0,0 +1,250 @@ +int a[256]; + +__attribute__((noinline, noclone)) int +f1 (int i) +{ + #pragma omp parallel for linear (i: 4) + for (int j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) short int +f2 (short int i, char k) +{ + #pragma omp parallel for linear (i: k + 1) + for (long j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) long long int +f3 (long long int i, long long int k) +{ + #pragma omp parallel for linear (i: k) + for (short j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) int +f4 (int i) +{ + #pragma omp parallel for linear (i: 4) schedule(static, 3) + for (int j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) short int +f5 (short int i, char k) +{ + #pragma omp parallel for linear (i: k + 1) schedule(static, 5) + for (long j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) long long int +f6 (long long int i, long long int k) +{ + #pragma omp parallel for linear (i: k) schedule(static, 7) + for (short j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) int +f7 (int i) +{ + #pragma omp parallel for linear (i: 4) schedule(dynamic, 3) + for (int j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) short int +f8 (short int i, char k) +{ + #pragma omp parallel for linear (i: k + 1) schedule(dynamic, 5) + for (long j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) long long int +f9 (long long int i, long long int k) +{ + #pragma omp parallel for linear (i: k) schedule(dynamic, 7) + for (short j = 16; j < 64; j++) + { + a[i] = j; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) int +f10 (int i, long step) +{ + #pragma omp parallel for linear (i: 4) + for (int j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) short int +f11 (short int i, char k, char step) +{ + #pragma omp parallel for linear (i: k + 1) + for (long j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) long long int +f12 (long long int i, long long int k, int step) +{ + #pragma omp parallel for linear (i: k) + for (short j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) int +f13 (int i, long long int step) +{ + #pragma omp parallel for linear (i: 4) schedule(static, 3) + for (int j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) short int +f14 (short int i, char k, int step) +{ + #pragma omp parallel for linear (i: k + 1) schedule(static, 5) + for (long j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) long long int +f15 (long long int i, long long int k, long int step) +{ + #pragma omp parallel for linear (i: k) schedule(static, 7) + for (short j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) int +f16 (int i, long long int step) +{ + #pragma omp parallel for linear (i: 4) schedule(dynamic, 3) + for (int j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) short int +f17 (short int i, char k, int step) +{ + #pragma omp parallel for linear (i: k + 1) schedule(dynamic, 5) + for (long j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +__attribute__((noinline, noclone)) long long int +f18 (long long int i, long long int k, long int step) +{ + #pragma omp parallel for linear (i: k) schedule(dynamic, 7) + for (short j = 16; j < 112; j += step) + { + a[i] = j / 2 + 8; + i += 4; + } + return i; +} + +int +main () +{ +#define TEST(x) \ + if (x != 8 + 48 * 4) \ + __builtin_abort (); \ + for (int i = 0; i < 256; i++) \ + if (a[i] != (((i & 3) == 0 && i >= 8 \ + && i < 8 + 48 * 4) \ + ? ((i - 8) / 4) + 16 : 0)) \ + __builtin_abort (); \ + __builtin_memset (a, 0, sizeof (a)) + TEST (f1 (8)); + TEST (f2 (8, 3)); + TEST (f3 (8LL, 4LL)); + TEST (f4 (8)); + TEST (f5 (8, 3)); + TEST (f6 (8LL, 4LL)); + TEST (f7 (8)); + TEST (f8 (8, 3)); + TEST (f9 (8LL, 4LL)); + TEST (f10 (8, 2)); + TEST (f11 (8, 3, 2)); + TEST (f12 (8LL, 4LL, 2)); + TEST (f13 (8, 2)); + TEST (f14 (8, 3, 2)); + TEST (f15 (8LL, 4LL, 2)); + TEST (f16 (8, 2)); + TEST (f17 (8, 3, 2)); + TEST (f18 (8LL, 4LL, 2)); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/ordered-4.c b/libgomp/testsuite/libgomp.c/ordered-4.c new file mode 100644 index 00000000000..8412d4715c3 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/ordered-4.c @@ -0,0 +1,83 @@ +extern +#ifdef __cplusplus +"C" +#endif +void abort (void); + +void +foo (int i, char *j) +{ + #pragma omp atomic + j[i]++; + #pragma omp ordered threads + { + int t; + #pragma omp atomic read + t = j[i]; + if (t != 3) + abort (); + if (i > 1) + { + #pragma omp atomic read + t = j[i - 1]; + if (t == 2) + abort (); + } + if (i < 127) + { + #pragma omp atomic read + t = j[i + 1]; + if (t == 4) + abort (); + } + } + #pragma omp atomic + j[i]++; +} + +int +main () +{ + int i; + char j[128]; + #pragma omp parallel + { + #pragma omp for + for (i = 0; i < 128; i++) + j[i] = 0; + #pragma omp for ordered schedule(dynamic, 1) + for (i = 0; i < 128; i++) + { + #pragma omp atomic + j[i]++; + #pragma omp ordered threads + { + int t; + #pragma omp atomic read + t = j[i]; + if (t != 1) + abort (); + if (i > 1) + { + #pragma omp atomic read + t = j[i - 1]; + if (t == 0) + abort (); + } + if (i < 127) + { + #pragma omp atomic read + t = j[i + 1]; + if (t == 2) + abort (); + } + } + #pragma omp atomic + j[i]++; + } + #pragma omp for ordered schedule(static, 1) + for (i = 0; i < 128; i++) + foo (i, j); + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/pr66199-2.c b/libgomp/testsuite/libgomp.c/pr66199-2.c index ddb79de8943..2fc9eec529a 100644 --- a/libgomp/testsuite/libgomp.c/pr66199-2.c +++ b/libgomp/testsuite/libgomp.c/pr66199-2.c @@ -18,12 +18,11 @@ __attribute__((noinline, noclone)) void f2 (long a, long b, long c) { long d, e; - #pragma omp target teams distribute parallel for simd default(none) firstprivate (a, b) shared(u, v, w) linear(d) linear(c:5) lastprivate(e) + #pragma omp target teams distribute parallel for simd default(none) firstprivate (a, b, c) shared(u, v, w) linear(d) lastprivate(e) for (d = a; d < b; d++) { u[d] = v[d] + w[d]; - c += 5; - e = c; + e = c + d * 5; } } diff --git a/libgomp/testsuite/libgomp.c/pr66199-3.c b/libgomp/testsuite/libgomp.c/pr66199-3.c new file mode 100644 index 00000000000..fe0ccb47197 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/pr66199-3.c @@ -0,0 +1,50 @@ +/* PR middle-end/66199 */ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp" } */ + +int u[1024], v[1024], w[1024]; + +__attribute__((noinline, noclone)) long +f1 (long a, long b) +{ + long d; + #pragma omp parallel for lastprivate (d) default(none) firstprivate (a, b) shared(u, v, w) + for (d = a; d < b; d++) + u[d] = v[d] + w[d]; + return d; +} + +__attribute__((noinline, noclone)) long +f2 (long a, long b, long c) +{ + long d, e; + #pragma omp parallel for lastprivate (d) default(none) firstprivate (a, b) shared(u, v, w) linear(c:5) lastprivate(e) + for (d = a; d < b; d++) + { + u[d] = v[d] + w[d]; + c += 5; + e = c; + } + return d + c + e; +} + +__attribute__((noinline, noclone)) long +f3 (long a1, long b1, long a2, long b2) +{ + long d1, d2; + #pragma omp parallel for default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) lastprivate(d1, d2) collapse(2) + for (d1 = a1; d1 < b1; d1++) + for (d2 = a2; d2 < b2; d2++) + u[d1 * 32 + d2] = v[d1 * 32 + d2] + w[d1 * 32 + d2]; + return d1 + d2; +} + +int +main () +{ + if (f1 (0, 1024) != 1024 + || f2 (0, 1024, 17) != 1024 + 2 * (17 + 5 * 1024) + || f3 (0, 32, 0, 32) != 64) + __builtin_abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/pr66199-4.c b/libgomp/testsuite/libgomp.c/pr66199-4.c new file mode 100644 index 00000000000..a9b1bb8a59e --- /dev/null +++ b/libgomp/testsuite/libgomp.c/pr66199-4.c @@ -0,0 +1,58 @@ +/* PR middle-end/66199 */ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp" } */ + +#pragma omp declare target +int u[1024], v[1024], w[1024]; +#pragma omp end declare target + +__attribute__((noinline, noclone)) void +f1 (long a, long b) +{ + long d; + #pragma omp target teams distribute parallel for default(none) firstprivate (a, b) shared(u, v, w) + for (d = a; d < b; d++) + u[d] = v[d] + w[d]; +} + +__attribute__((noinline, noclone)) void +f2 (long a, long b, long c) +{ + long d, e; + #pragma omp target teams distribute parallel for default(none) firstprivate (a, b, c) shared(u, v, w) lastprivate(d, e) + for (d = a; d < b; d++) + { + u[d] = v[d] + w[d]; + e = c + d * 5; + } +} + +__attribute__((noinline, noclone)) void +f3 (long a1, long b1, long a2, long b2) +{ + long d1, d2; + #pragma omp target teams distribute parallel for default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) lastprivate(d1, d2) collapse(2) + for (d1 = a1; d1 < b1; d1++) + for (d2 = a2; d2 < b2; d2++) + u[d1 * 32 + d2] = v[d1 * 32 + d2] + w[d1 * 32 + d2]; +} + +__attribute__((noinline, noclone)) void +f4 (long a1, long b1, long a2, long b2) +{ + long d1, d2; + #pragma omp target teams distribute parallel for default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) collapse(2) + for (d1 = a1; d1 < b1; d1++) + for (d2 = a2; d2 < b2; d2++) + u[d1 * 32 + d2] = v[d1 * 32 + d2] + w[d1 * 32 + d2]; +} + +int +main () +{ + f1 (0, 1024); + f2 (0, 1024, 17); + f3 (0, 32, 0, 32); + f4 (0, 32, 0, 32); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/reduction-10.c b/libgomp/testsuite/libgomp.c/reduction-10.c new file mode 100644 index 00000000000..3c95ebd4a4b --- /dev/null +++ b/libgomp/testsuite/libgomp.c/reduction-10.c @@ -0,0 +1,105 @@ +struct A { int t; }; +struct B { char t; }; +struct C { unsigned long long t; }; +struct D { long t; }; +void +add (struct B *x, struct B *y) +{ + x->t += y->t; +} +void +zero (struct B *x) +{ + x->t = 0; +} +void +orit (struct C *x, struct C *y) +{ + y->t |= x->t; +} +#pragma omp declare reduction(+:struct A:omp_out.t += omp_in.t) +#pragma omp declare reduction(+:struct B:add (&omp_out, &omp_in)) initializer(zero (&omp_priv)) +#pragma omp declare reduction(*:struct A:omp_out.t *= omp_in.t) initializer(omp_priv = { 1 }) +#pragma omp declare reduction(|:struct C:orit (&omp_in, &omp_out)) +#pragma omp declare reduction(&:struct D:omp_out.t = omp_out.t & omp_in.t) initializer(omp_priv = { ~0L }) +#pragma omp declare reduction(maxb:short:omp_out = omp_in > omp_out ? omp_in : omp_out) initializer(omp_priv = -6) + +struct B z[10]; + +__attribute__((noinline, noclone)) void +foo (struct A (*x)[3][2], struct A *y, struct D w[1][2], int p1, long p2, long p3, int p4, + int p5, long p6, short p7) +{ + struct C a[p7 + 4]; + short b[p7]; + int i; + for (i = 0; i < p7 + 4; i++) + { + if (i < p7) + b[i] = -6; + a[i].t = 0; + } + #pragma omp parallel for reduction(+:x[0:p1 + 1][:p2], z[:p3]) \ + reduction(*:y[:p4]) reduction(|:a[:p5]) \ + reduction(&:w[0:p6 - 1][:p6]) reduction(maxb:b) + for (i = 0; i < 128; i++) + { + x[i / 64][i % 3][(i / 4) & 1].t += i; + if ((i & 15) == 1) + y[0].t *= 3; + if ((i & 31) == 2) + y[1].t *= 7; + if ((i & 63) == 3) + y[2].t *= 17; + z[i / 32].t += (i & 3); + if (i < 4) + z[i].t += i; + a[i / 32].t |= 1ULL << (i & 30); + w[0][i & 1].t &= ~(1L << (i / 17 * 3)); + if ((i % 79) > b[0]) + b[0] = i % 79; + if ((i % 13) > b[1]) + b[1] = i % 13; + if ((i % 23) > b[2]) + b[2] = i % 23; + if ((i % 85) > b[3]) + b[3] = i % 85; + if ((i % 192) > b[4]) + b[4] = i % 192; + } + for (i = 0; i < 9; i++) + if (a[i].t != (i < 4 ? 0x55555555ULL : 0)) + __builtin_abort (); + if (b[0] != 78 || b[1] != 12 || b[2] != 22 || b[3] != 84 || b[4] != 127) + __builtin_abort (); +} + +int +main () +{ + struct A a[4][3][2] = {}; + static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }}, + {{ 312, 381 }, { 295, 356 }, { 337, 335 }}, + {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }}, + {{ 0, 0 }, { 0, 0 }, { 0, 0 }}}; + struct A y[5] = { { 0 }, { 1 }, { 1 }, { 1 }, { 0 } }; + int y2[5] = { 0, 6561, 2401, 289, 0 }; + char z2[10] = { 48, 49, 50, 51, 0, 0, 0, 0, 0, 0 }; + struct D w[1][2] = { { { ~0L }, { ~0L } } }; + foo (&a[1], y + 1, w, 1, 3L, 4L, 3, 4, 2L, 5); + int i, j, k; + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) + for (k = 0; k < 2; k++) + if (a[i][j][k].t != a2[i][j][k]) + __builtin_abort (); + for (i = 0; i < 5; i++) + if (y[i].t != y2[i]) + __builtin_abort (); + for (i = 0; i < 10; i++) + if (z[i].t != z2[i]) + __builtin_abort (); + if (w[0][0].t != ~0x249249L || w[0][1].t != ~0x249249L) + __builtin_abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/reduction-7.c b/libgomp/testsuite/libgomp.c/reduction-7.c new file mode 100644 index 00000000000..347c26f46d3 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/reduction-7.c @@ -0,0 +1,64 @@ +char z[10] = { 0 }; + +__attribute__((noinline, noclone)) void +foo (int (*x)[3][2], int *y, long w[1][2]) +{ + unsigned long long a[9] = {}; + short b[5] = {}; + int i; + #pragma omp parallel for reduction(+:x[0:2][:][0:2], z[:4]) \ + reduction(*:y[:3]) reduction(|:a[:4]) \ + reduction(&:w[0:1][:2]) reduction(max:b) + for (i = 0; i < 128; i++) + { + x[i / 64][i % 3][(i / 4) & 1] += i; + if ((i & 15) == 1) + y[0] *= 3; + if ((i & 31) == 2) + y[1] *= 7; + if ((i & 63) == 3) + y[2] *= 17; + z[i / 32] += (i & 3); + if (i < 4) + z[i] += i; + a[i / 32] |= 1ULL << (i & 30); + w[0][i & 1] &= ~(1L << (i / 17 * 3)); + if ((i % 79) > b[0]) + b[0] = i % 79; + if ((i % 13) > b[1]) + b[1] = i % 13; + if ((i % 23) > b[2]) + b[2] = i % 23; + if ((i % 85) > b[3]) + b[3] = i % 85; + if ((i % 192) > b[4]) + b[4] = i % 192; + } + for (i = 0; i < 9; i++) + if (a[i] != (i < 4 ? 0x55555555ULL : 0)) + __builtin_abort (); + if (b[0] != 78 || b[1] != 12 || b[2] != 22 || b[3] != 84 || b[4] != 127) + __builtin_abort (); +} + +int +main () +{ + int a[4][3][2] = {}; + static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }}, + {{ 312, 381 }, { 295, 356 }, { 337, 335 }}, + {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }}, + {{ 0, 0 }, { 0, 0 }, { 0, 0 }}}; + int y[5] = { 0, 1, 1, 1, 0 }; + int y2[5] = { 0, 6561, 2401, 289, 0 }; + char z2[10] = { 48, 49, 50, 51, 0, 0, 0, 0, 0, 0 }; + long w[1][2] = { ~0L, ~0L }; + foo (&a[1], y + 1, w); + if (__builtin_memcmp (a, a2, sizeof (a)) + || __builtin_memcmp (y, y2, sizeof (y)) + || __builtin_memcmp (z, z2, sizeof (z)) + || w[0][0] != ~0x249249L + || w[0][1] != ~0x249249L) + __builtin_abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/reduction-8.c b/libgomp/testsuite/libgomp.c/reduction-8.c new file mode 100644 index 00000000000..f4ec03aabea --- /dev/null +++ b/libgomp/testsuite/libgomp.c/reduction-8.c @@ -0,0 +1,98 @@ +struct A { int t; }; +struct B { char t; }; +struct C { unsigned long long t; }; +struct D { long t; }; +void +add (struct B *x, struct B *y) +{ + x->t += y->t; +} +void +zero (struct B *x) +{ + x->t = 0; +} +void +orit (struct C *x, struct C *y) +{ + y->t |= x->t; +} +#pragma omp declare reduction(+:struct A:omp_out.t += omp_in.t) +#pragma omp declare reduction(+:struct B:add (&omp_out, &omp_in)) initializer(zero (&omp_priv)) +#pragma omp declare reduction(*:struct A:omp_out.t *= omp_in.t) initializer(omp_priv = { 1 }) +#pragma omp declare reduction(|:struct C:orit (&omp_in, &omp_out)) +#pragma omp declare reduction(&:struct D:omp_out.t = omp_out.t & omp_in.t) initializer(omp_priv = { ~0L }) +#pragma omp declare reduction(maxb:short:omp_out = omp_in > omp_out ? omp_in : omp_out) initializer(omp_priv = -6) + +struct B z[10]; + +__attribute__((noinline, noclone)) void +foo (struct A (*x)[3][2], struct A *y, struct D w[1][2]) +{ + struct C a[9] = {}; + short b[5] = {}; + int i; + #pragma omp parallel for reduction(+:x[0:2][:][0:2], z[:4]) \ + reduction(*:y[:3]) reduction(|:a[:4]) \ + reduction(&:w[0:1][:2]) reduction(maxb:b) + for (i = 0; i < 128; i++) + { + x[i / 64][i % 3][(i / 4) & 1].t += i; + if ((i & 15) == 1) + y[0].t *= 3; + if ((i & 31) == 2) + y[1].t *= 7; + if ((i & 63) == 3) + y[2].t *= 17; + z[i / 32].t += (i & 3); + if (i < 4) + z[i].t += i; + a[i / 32].t |= 1ULL << (i & 30); + w[0][i & 1].t &= ~(1L << (i / 17 * 3)); + if ((i % 79) > b[0]) + b[0] = i % 79; + if ((i % 13) > b[1]) + b[1] = i % 13; + if ((i % 23) > b[2]) + b[2] = i % 23; + if ((i % 85) > b[3]) + b[3] = i % 85; + if ((i % 192) > b[4]) + b[4] = i % 192; + } + for (i = 0; i < 9; i++) + if (a[i].t != (i < 4 ? 0x55555555ULL : 0)) + __builtin_abort (); + if (b[0] != 78 || b[1] != 12 || b[2] != 22 || b[3] != 84 || b[4] != 127) + __builtin_abort (); +} + +int +main () +{ + struct A a[4][3][2] = {}; + static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }}, + {{ 312, 381 }, { 295, 356 }, { 337, 335 }}, + {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }}, + {{ 0, 0 }, { 0, 0 }, { 0, 0 }}}; + struct A y[5] = { { 0 }, { 1 }, { 1 }, { 1 }, { 0 } }; + int y2[5] = { 0, 6561, 2401, 289, 0 }; + char z2[10] = { 48, 49, 50, 51, 0, 0, 0, 0, 0, 0 }; + struct D w[1][2] = { { { ~0L }, { ~0L } } }; + foo (&a[1], y + 1, w); + int i, j, k; + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) + for (k = 0; k < 2; k++) + if (a[i][j][k].t != a2[i][j][k]) + __builtin_abort (); + for (i = 0; i < 5; i++) + if (y[i].t != y2[i]) + __builtin_abort (); + for (i = 0; i < 10; i++) + if (z[i].t != z2[i]) + __builtin_abort (); + if (w[0][0].t != ~0x249249L || w[0][1].t != ~0x249249L) + __builtin_abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/reduction-9.c b/libgomp/testsuite/libgomp.c/reduction-9.c new file mode 100644 index 00000000000..13605c1ab88 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/reduction-9.c @@ -0,0 +1,71 @@ +char z[10] = { 0 }; + +__attribute__((noinline, noclone)) void +foo (int (*x)[3][2], int *y, long w[1][2], int p1, long p2, long p3, int p4, + int p5, long p6, short p7) +{ + unsigned long long a[p7 + 4]; + short b[p7]; + int i; + for (i = 0; i < p7 + 4; i++) + { + if (i < p7) + b[i] = -6; + a[i] = 0; + } + #pragma omp parallel for reduction(+:x[0:p1 + 1][:p2], z[:p3]) \ + reduction(*:y[:p4]) reduction(|:a[:p5]) \ + reduction(&:w[0:p6 - 1][:p6]) reduction(max:b) + for (i = 0; i < 128; i++) + { + x[i / 64][i % 3][(i / 4) & 1] += i; + if ((i & 15) == 1) + y[0] *= 3; + if ((i & 31) == 2) + y[1] *= 7; + if ((i & 63) == 3) + y[2] *= 17; + z[i / 32] += (i & 3); + if (i < 4) + z[i] += i; + a[i / 32] |= 1ULL << (i & 30); + w[0][i & 1] &= ~(1L << (i / 17 * 3)); + if ((i % 79) > b[0]) + b[0] = i % 79; + if ((i % 13) > b[1]) + b[1] = i % 13; + if ((i % 23) > b[2]) + b[2] = i % 23; + if ((i % 85) > b[3]) + b[3] = i % 85; + if ((i % 192) > b[4]) + b[4] = i % 192; + } + for (i = 0; i < 9; i++) + if (a[i] != (i < 4 ? 0x55555555ULL : 0)) + __builtin_abort (); + if (b[0] != 78 || b[1] != 12 || b[2] != 22 || b[3] != 84 || b[4] != 127) + __builtin_abort (); +} + +int +main () +{ + int a[4][3][2] = {}; + static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }}, + {{ 312, 381 }, { 295, 356 }, { 337, 335 }}, + {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }}, + {{ 0, 0 }, { 0, 0 }, { 0, 0 }}}; + int y[5] = { 0, 1, 1, 1, 0 }; + int y2[5] = { 0, 6561, 2401, 289, 0 }; + char z2[10] = { 48, 49, 50, 51, 0, 0, 0, 0, 0, 0 }; + long w[1][2] = { ~0L, ~0L }; + foo (&a[1], y + 1, w, 1, 3L, 4L, 3, 4, 2L, 5); + if (__builtin_memcmp (a, a2, sizeof (a)) + || __builtin_memcmp (y, y2, sizeof (y)) + || __builtin_memcmp (z, z2, sizeof (z)) + || w[0][0] != ~0x249249L + || w[0][1] != ~0x249249L) + __builtin_abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-1.c b/libgomp/testsuite/libgomp.c/target-1.c index f734d3c279d..c7abb008182 100644 --- a/libgomp/testsuite/libgomp.c/target-1.c +++ b/libgomp/testsuite/libgomp.c/target-1.c @@ -34,7 +34,7 @@ fn2 (int x, int y, int z) fn1 (b, c, x); #pragma omp target data map(to: b) { - #pragma omp target map(tofrom: c) + #pragma omp target map(tofrom: c, s) #pragma omp teams num_teams(y) thread_limit(z) reduction(+:s) firstprivate(x) #pragma omp distribute dist_schedule(static, 4) collapse(1) for (j=0; j < x; j += y) @@ -52,7 +52,7 @@ fn3 (int x) double b[1024], c[1024], s = 0; int i; fn1 (b, c, x); - #pragma omp target map(to: b, c) + #pragma omp target map(to: b, c) map(tofrom:s) #pragma omp parallel for reduction(+:s) for (i = 0; i < x; i++) tgt (), s += b[i] * c[i]; @@ -66,7 +66,8 @@ fn4 (int x, double *p) int i; fn1 (b, c, x); fn1 (d + x, p + x, x); - #pragma omp target map(to: b, c[0:x], d[x:x]) map(to:p[x:64 + (x & 31)]) + #pragma omp target map(to: b, c[0:x], d[x:x]) map(to:p[x:64 + (x & 31)]) \ + map(tofrom: s) #pragma omp parallel for reduction(+:s) for (i = 0; i < x; i++) s += b[i] * c[i] + d[x + i] + p[x + i]; diff --git a/libgomp/testsuite/libgomp.c/target-11.c b/libgomp/testsuite/libgomp.c/target-11.c new file mode 100644 index 00000000000..625c2863f4b --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-11.c @@ -0,0 +1,86 @@ +/* { dg-require-effective-target offload_device_nonshared_as } */ + +#include <stdlib.h> +#include <assert.h> + +#define N 32 + +void test_array_section (int *p) +{ + #pragma omp target data map(alloc: p[0:N]) + { + int ok = 1; + for (int i = 10; i < 10 + 4; i++) + p[i] = 997 * i; + + #pragma omp target map(always to:p[10:4]) map(tofrom: ok) + for (int i = 10; i < 10 + 4; i++) + if (p[i] != 997 * i) + ok = 0; + + assert (ok); + + #pragma omp target map(always from:p[7:9]) + for (int i = 0; i < N; i++) + p[i] = i; + } +} + +int main () +{ + int aa = 0, bb = 0, cc = 0, dd = 0; + + #pragma omp target data map(tofrom: aa) map(to: bb) map(from: cc, dd) + { + int ok; + aa = bb = cc = 1; + + /* Set dd on target to 0 for the further check. */ + #pragma omp target map(always to: dd) + ; + + dd = 1; + #pragma omp target map(tofrom: aa) map(always to: bb) \ + map(always from: cc) map(to: dd) map(from: ok) + { + /* bb is always to, aa and dd are not. */ + ok = (aa == 0) && (bb == 1) && (dd == 0); + aa = bb = cc = dd = 2; + } + + assert (ok); + assert (aa == 1); + assert (bb == 1); + assert (cc == 2); /* cc is always from. */ + assert (dd == 1); + + dd = 3; + #pragma omp target map(from: cc) map(always to: dd) map(from: ok) + { + ok = (dd == 3); /* dd is always to. */ + cc = dd = 4; + } + + assert (ok); + assert (cc == 2); + assert (dd == 3); + } + + assert (aa == 2); + assert (bb == 1); + assert (cc == 4); + assert (dd == 4); + + int *array = calloc (N, sizeof (int)); + test_array_section (array); + + for (int i = 0; i < 7; i++) + assert (array[i] == 0); + for (int i = 7; i < 7 + 9; i++) + assert (array[i] == i); + for (int i = 7 + 9; i < N; i++) + assert (array[i] == 0); + + free (array); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-12.c b/libgomp/testsuite/libgomp.c/target-12.c new file mode 100644 index 00000000000..e6b009463ad --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-12.c @@ -0,0 +1,130 @@ +#include <omp.h> +#include <stdlib.h> + +int +main () +{ + int d = omp_get_default_device (); + int id = omp_get_initial_device (); + int err; + int q[128], i; + void *p; + + if (d < 0 || d >= omp_get_num_devices ()) + d = id; + + for (i = 0; i < 128; i++) + q[i] = i; + + p = omp_target_alloc (130 * sizeof (int), d); + if (p == NULL) + return 0; + + if (omp_target_memcpy_rect (NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, + d, id) < 3 + || omp_target_memcpy_rect (NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, + NULL, id, d) < 3 + || omp_target_memcpy_rect (NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, + NULL, id, id) < 3) + abort (); + + if (omp_target_associate_ptr (q, p, 128 * sizeof (int), sizeof (int), d) == 0) + { + size_t volume[3] = { 128, 0, 0 }; + size_t dst_offsets[3] = { 0, 0, 0 }; + size_t src_offsets[3] = { 1, 0, 0 }; + size_t dst_dimensions[3] = { 128, 0, 0 }; + size_t src_dimensions[3] = { 128, 0, 0 }; + + if (omp_target_associate_ptr (q, p, 128 * sizeof (int), sizeof (int), d) != 0) + abort (); + + if (omp_target_is_present (q, d) != 1 + || omp_target_is_present (&q[32], d) != 1 + || omp_target_is_present (&q[128], d) != 1) + abort (); + + if (omp_target_memcpy (p, q, 128 * sizeof (int), sizeof (int), 0, + d, id) != 0) + abort (); + + #pragma omp target if (d >= 0) device (d >= 0 ? d : 0) map(alloc:q[0:32]) map(from:err) + { + int j; + err = 0; + for (j = 0; j < 128; j++) + if (q[j] != j) + err = 1; + else + q[j] += 4; + } + + if (err) + abort (); + + if (omp_target_memcpy_rect (q, p, sizeof (int), 1, volume, + dst_offsets, src_offsets, dst_dimensions, + src_dimensions, id, d) != 0) + abort (); + + for (i = 0; i < 128; i++) + if (q[i] != i + 4) + abort (); + + volume[2] = 2; + volume[1] = 3; + volume[0] = 6; + dst_offsets[2] = 1; + dst_offsets[1] = 0; + dst_offsets[0] = 0; + src_offsets[2] = 1; + src_offsets[1] = 0; + src_offsets[0] = 3; + dst_dimensions[2] = 2; + dst_dimensions[1] = 3; + dst_dimensions[0] = 6; + src_dimensions[2] = 3; + src_dimensions[1] = 4; + src_dimensions[0] = 6; + if (omp_target_memcpy_rect (p, q, sizeof (int), 3, volume, + dst_offsets, src_offsets, dst_dimensions, + src_dimensions, d, id) != 0) + abort (); + + #pragma omp target if (d >= 0) device (d >= 0 ? d : 0) map(alloc:q[0:32]) map(from:err) + { + int j, k, l; + err = 0; + for (j = 0; j < 6; j++) + for (k = 0; k < 3; k++) + for (l = 0; l < 2; l++) + if (q[j * 6 + k * 2 + l] != 3 * 12 + 4 + 1 + l + k * 3 + j * 12) + err = 1; + } + + if (err) + abort (); + + if (omp_target_memcpy (p, p, 10 * sizeof (int), 51 * sizeof (int), + 111 * sizeof (int), d, d) != 0) + abort (); + + #pragma omp target if (d >= 0) device (d >= 0 ? d : 0) map(alloc:q[0:32]) map(from:err) + { + int j; + err = 0; + for (j = 0; j < 10; j++) + if (q[50 + j] != q[110 + j]) + err = 1; + } + + if (err) + abort (); + + if (omp_target_disassociate_ptr (q, d) != 0) + abort (); + } + + omp_target_free (p, d); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-13.c b/libgomp/testsuite/libgomp.c/target-13.c new file mode 100644 index 00000000000..168850b507c --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-13.c @@ -0,0 +1,45 @@ +#ifdef __cplusplus +extern "C" +#else +extern +#endif +void abort (void); +struct S { int s, t; }; + +void +foo () +{ + int x = 5, y = 6, err = 0; + struct S u = { 7, 8 }, v = { 9, 10 }; + double s = 11.5, t = 12.5; + #pragma omp target private (x, u, s) firstprivate (y, v, t) map(from:err) + { + x = y; + u = v; + s = t; + err = (x != 6 || y != 6 + || u.s != 9 || u.t != 10 || v.s != 9 || v.t != 10 + || s != 12.5 || t != 12.5); + x += 1; + y += 2; + u.s += 3; + v.t += 4; + s += 2.5; + t += 3.0; + if (x != 7 || y != 8 + || u.s != 12 || u.t != 10 || v.s != 9 || v.t != 14 + || s != 15.0 || t != 15.5) + err = 1; + } + if (err || x != 5 || y != 6 + || u.s != 7 || u.t != 8 || v.s != 9 || v.t != 10 + || s != 11.5 || t != 12.5) + abort (); +} + +int +main () +{ + foo (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-14.c b/libgomp/testsuite/libgomp.c/target-14.c new file mode 100644 index 00000000000..17d383407a2 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-14.c @@ -0,0 +1,38 @@ +#include <omp.h> +#include <stdlib.h> + +int +main () +{ + int d = omp_get_default_device (); + int id = omp_get_initial_device (); + int err; + void *p; + + if (d < 0 || d >= omp_get_num_devices ()) + d = id; + + p = omp_target_alloc (128 * sizeof (int), d); + if (p == NULL) + return 0; + + #pragma omp target is_device_ptr (p) if (d >= 0) device (d >= 0 ? d : 0) + { + int i, *q = (int *) p; + for (i = 0; i < 128; i++) + q[i] = i + 7; + } + #pragma omp target is_device_ptr (p) if (d >= 0) device (d >= 0 ? d : 0) map(from:err) + { + int i; + err = 0; + for (i = 0; i < 128; i++) + if (((int *) p)[i] != i + 7) + err = 1; + } + if (err) + abort (); + + omp_target_free (p, d); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-15.c b/libgomp/testsuite/libgomp.c/target-15.c new file mode 100644 index 00000000000..fee9252ef3d --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-15.c @@ -0,0 +1,74 @@ +extern void abort (void); + +void +foo (int *x) +{ + int a[10], b[15], err, i; + for (i = 0; i < 10; i++) + a[i] = 7 * i; + for (i = 0; i < 15; i++) + b[i] = 8 * i; + #pragma omp target map(to:x[5:10], a[0:10], b[5:10]) map(from:err) + { + err = 0; + for (i = 0; i < 10; i++) + if (x[5 + i] != 20 + 4 * i + || a[i] != 7 * i + || b[5 + i] != 40 + 8 * i) + err = 1; + } + if (err) + abort (); +} + +void +bar (int n, int v) +{ + int a[n], b[n], c[n], d[n], e[n], err, i; + int (*x)[n] = &c; + for (i = 0; i < n; i++) + { + (*x)[i] = 4 * i; + a[i] = 7 * i; + b[i] = 8 * i; + } + #pragma omp target map(to:x[0][5:10], a[0:10], b[5:10]) map(from:err) + { + err = 0; + for (i = 0; i < 10; i++) + if ((*x)[5 + i] != 20 + 4 * i + || a[i] != 7 * i + || b[5 + i] != 40 + 8 * i) + err = 1; + } + if (err) + abort (); + for (i = 0; i < n; i++) + { + (*x)[i] = 9 * i; + a[i] = 12 * i; + b[i] = 13 * i; + } + #pragma omp target map(to:x[0][v:v+5], a[v-5:v+5], b[v:v+5]) map(from:err) + { + err = 0; + for (i = 0; i < 10; i++) + if ((*x)[5 + i] != 45 + 9 * i + || a[i] != 12 * i + || b[5 + i] != 65 + 13 * i) + err = 1; + } + if (err) + abort (); +} + +int +main () +{ + int x[15], i; + for (i = 0; i < 15; i++) + x[i] = 4 * i; + foo (x); + bar (15, 5); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-16.c b/libgomp/testsuite/libgomp.c/target-16.c new file mode 100644 index 00000000000..7b0919b1b00 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-16.c @@ -0,0 +1,45 @@ +extern void abort (void); + +void +foo (int n) +{ + int a[n], i, err; + for (i = 0; i < n; i++) + a[i] = 7 * i; + #pragma omp target firstprivate (a) map(from:err) private (i) + { + err = 0; + for (i = 0; i < n; i++) + if (a[i] != 7 * i) + err = 1; + } + if (err) + abort (); +} + +void +bar (int n) +{ + int a[n], i, err; + #pragma omp target private (a) map(from:err) + { + #pragma omp parallel for + for (i = 0; i < n; i++) + a[i] = 7 * i; + err = 0; + #pragma omp parallel for reduction(|:err) + for (i = 0; i < n; i++) + if (a[i] != 7 * i) + err |= 1; + } + if (err) + abort (); +} + +int +main () +{ + foo (7); + bar (7); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-17.c b/libgomp/testsuite/libgomp.c/target-17.c new file mode 100644 index 00000000000..4a762012eaf --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-17.c @@ -0,0 +1,99 @@ +extern void abort (void); + +void +foo (int n) +{ + int a[n], i, err; + for (i = 0; i < n; i++) + a[i] = 5 * i; + #pragma omp target map(to:a) map(from:err) private(i) + { + err = 0; + for (i = 0; i < n; i++) + if (a[i] != 5 * i) + err = 1; + } + if (err) + abort (); + for (i = 0; i < n; i++) + a[i] += i; + #pragma omp target map(from:err) private(i) + { + err = 0; + for (i = 0; i < n; i++) + if (a[i] != 6 * i) + err = 1; + } + if (err) + abort (); + for (i = 0; i < n; i++) + a[i] += i; + #pragma omp target firstprivate (a) map(from:err) private(i) + { + err = 0; + for (i = 0; i < n; i++) + if (a[i] != 7 * i) + err = 1; + } + if (err) + abort (); + int on = n; + #pragma omp target firstprivate (n) map(tofrom: n) + { + n++; + } + if (on != n) + abort (); + #pragma omp target map(tofrom: n) private (n) + { + n = 25; + } + if (on != n) + abort (); + for (i = 0; i < n; i++) + a[i] += i; + #pragma omp target map(to:a) firstprivate (a) map(from:err) private(i) + { + err = 0; + for (i = 0; i < n; i++) + if (a[i] != 8 * i) + err = 1; + } + if (err) + abort (); + for (i = 0; i < n; i++) + a[i] += i; + #pragma omp target firstprivate (a) map(to:a) map(from:err) private(i) + { + err = 0; + for (i = 0; i < n; i++) + if (a[i] != 9 * i) + err = 1; + } + if (err) + abort (); + for (i = 0; i < n; i++) + a[i] += i; + #pragma omp target map(tofrom:a) map(from:err) private(a, i) + { + err = 0; + for (i = 0; i < n; i++) + a[i] = 7; + #pragma omp parallel for reduction(|:err) + for (i = 0; i < n; i++) + if (a[i] != 7) + err |= 1; + } + if (err) + abort (); + for (i = 0; i < n; i++) + if (a[i] != 10 * i) + abort (); +} + +int +main () +{ + foo (9); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-18.c b/libgomp/testsuite/libgomp.c/target-18.c new file mode 100644 index 00000000000..cbacaf6a77a --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-18.c @@ -0,0 +1,52 @@ +extern void abort (void); + +void +foo (int n) +{ + int a[4] = { 0, 1, 2, 3 }, b[n]; + int *p = a + 1, i, err; + for (i = 0; i < n; i++) + b[i] = 9 + i; + #pragma omp target data map(to:a) + #pragma omp target data use_device_ptr(p) map(from:err) + #pragma omp target is_device_ptr(p) private(i) map(from:err) + { + err = 0; + for (i = 0; i < 4; i++) + if (p[i - 1] != i) + err = 1; + } + if (err) + abort (); + for (i = 0; i < 4; i++) + a[i] = 23 + i; + #pragma omp target data map(to:a) + #pragma omp target data use_device_ptr(a) map(from:err) + #pragma omp target is_device_ptr(a) private(i) map(from:err) + { + err = 0; + for (i = 0; i < 4; i++) + if (a[i] != 23 + i) + err = 1; + } + if (err) + abort (); + #pragma omp target data map(to:b) + #pragma omp target data use_device_ptr(b) map(from:err) + #pragma omp target is_device_ptr(b) private(i) map(from:err) + { + err = 0; + for (i = 0; i < 4; i++) + if (b[i] != 9 + i) + err = 1; + } + if (err) + abort (); +} + +int +main () +{ + foo (9); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-19.c b/libgomp/testsuite/libgomp.c/target-19.c new file mode 100644 index 00000000000..710c5078ff6 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-19.c @@ -0,0 +1,127 @@ +extern void abort (void); + +void +foo (int *p, int *q, int *r, int n, int m) +{ + int i, err, *s = r; + #pragma omp target data map(to:p[0:8]) + { + /* For zero length array sections, p points to the start of + already mapped range, q to the end of it, and r does not point + to an mapped range. */ + #pragma omp target map(alloc:p[:0]) map(to:q[:0]) map(from:r[:0]) private(i) map(from:err) firstprivate (s) + { + err = 0; + for (i = 0; i < 8; i++) + if (p[i] != i + 1 || q[i - 8] != i + 1) + err = 1; + if (p + 8 != q || (r != (int *) 0 && r != s)) + err = 1; + } + if (err) + abort (); + /* Implicit mapping of pointers behaves the same way. */ + #pragma omp target private(i) map(from:err) firstprivate (s) + { + err = 0; + for (i = 0; i < 8; i++) + if (p[i] != i + 1 || q[i - 8] != i + 1) + err = 1; + if (p + 8 != q || (r != (int *) 0 && r != s)) + err = 1; + } + if (err) + abort (); + /* And zero-length array sections, though not known at compile + time, behave the same. */ + #pragma omp target map(p[:n]) map(tofrom:q[:n]) map(alloc:r[:n]) private(i) map(from:err) firstprivate (s) + { + err = 0; + for (i = 0; i < 8; i++) + if (p[i] != i + 1 || q[i - 8] != i + 1) + err = 1; + if (p + 8 != q || (r != (int *) 0 && r != s)) + err = 1; + } + if (err) + abort (); + /* Non-zero length array sections, though not known at compile, + behave differently. */ + #pragma omp target map(p[:m]) map(tofrom:q[:m]) map(to:r[:m]) private(i) map(from:err) + { + err = 0; + for (i = 0; i < 8; i++) + if (p[i] != i + 1) + err = 1; + if (q[0] != 9 || r[0] != 10) + err = 1; + } + if (err) + abort (); + #pragma omp target data map(to:q[0:1]) + { + /* For zero length array sections, p points to the start of + already mapped range, q points to the start of another one, + and r to the end of the second one. */ + #pragma omp target map(to:p[:0]) map(from:q[:0]) map(tofrom:r[:0]) private(i) map(from:err) + { + err = 0; + for (i = 0; i < 8; i++) + if (p[i] != i + 1) + err = 1; + if (q[0] != 9 || r != q + 1) + err = 1; + } + if (err) + abort (); + /* Implicit mapping of pointers behaves the same way. */ + #pragma omp target private(i) map(from:err) + { + err = 0; + for (i = 0; i < 8; i++) + if (p[i] != i + 1) + err = 1; + if (q[0] != 9 || r != q + 1) + err = 1; + } + if (err) + abort (); + /* And zero-length array sections, though not known at compile + time, behave the same. */ + #pragma omp target map(p[:n]) map(alloc:q[:n]) map(from:r[:n]) private(i) map(from:err) + { + err = 0; + for (i = 0; i < 8; i++) + if (p[i] != i + 1) + err = 1; + if (q[0] != 9 || r != q + 1) + err = 1; + } + if (err) + abort (); + /* Non-zero length array sections, though not known at compile, + behave differently. */ + #pragma omp target map(p[:m]) map(alloc:q[:m]) map(tofrom:r[:m]) private(i) map(from:err) + { + err = 0; + for (i = 0; i < 8; i++) + if (p[i] != i + 1) + err = 1; + if (q[0] != 9 || r[0] != 10) + err = 1; + } + if (err) + abort (); + } + } +} + +int +main () +{ + int a[32], i; + for (i = 0; i < 32; i++) + a[i] = i; + foo (a + 1, a + 9, a + 10, 0, 1); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-2.c b/libgomp/testsuite/libgomp.c/target-2.c index ada8dad81ad..0ba766c0a82 100644 --- a/libgomp/testsuite/libgomp.c/target-2.c +++ b/libgomp/testsuite/libgomp.c/target-2.c @@ -23,7 +23,7 @@ fn2 (int x) int i; fn1 (b, c, x); fn1 (e, d + x, x); - #pragma omp target map(to: b, c[:x], d[x:x], e) + #pragma omp target map(to: b, c[:x], d[x:x], e) map(tofrom: s) #pragma omp parallel for reduction(+:s) for (i = 0; i < x; i++) s += b[i] * c[i] + d[x + i] + sizeof (b) - sizeof (c); @@ -38,7 +38,7 @@ fn3 (int x) int i; fn1 (b, c, x); fn1 (e, d, x); - #pragma omp target + #pragma omp target map(tofrom: s) #pragma omp parallel for reduction(+:s) for (i = 0; i < x; i++) s += b[i] * c[i] + d[i]; @@ -56,7 +56,7 @@ fn4 (int x) #pragma omp target data map(from: b, c[:x], d[x:x], e) { #pragma omp target update to(b, c[:x], d[x:x], e) - #pragma omp target map(c[:x], d[x:x]) + #pragma omp target map(c[:x], d[x:x], s) #pragma omp parallel for reduction(+:s) for (i = 0; i < x; i++) { diff --git a/libgomp/testsuite/libgomp.c/target-20.c b/libgomp/testsuite/libgomp.c/target-20.c new file mode 100644 index 00000000000..3f4e798a755 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-20.c @@ -0,0 +1,120 @@ +/* { dg-require-effective-target offload_device_nonshared_as } */ + +#include <stdlib.h> +#include <assert.h> + +#define N 40 + +int sum; +int var1 = 1; +int var2 = 2; + +#pragma omp declare target +int D[N]; +#pragma omp end declare target + +void enter_data (int *X) +{ + #pragma omp target enter data map(to: var1, var2, X[:N]) map(alloc: sum) +} + +void exit_data_0 (int *D) +{ + #pragma omp target exit data map(delete: D[:N]) +} + +void exit_data_1 () +{ + #pragma omp target exit data map(from: var1) +} + +void exit_data_2 (int *X) +{ + #pragma omp target exit data map(from: var2) map(release: X[:N], sum) +} + +void exit_data_3 (int *p) +{ + #pragma omp target exit data map(from: p[:0]) +} + +void test_nested () +{ + int X = 0, Y = 0, Z = 0; + + #pragma omp target data map(from: X, Y, Z) + { + #pragma omp target data map(from: X, Y, Z) + { + #pragma omp target map(from: X, Y, Z) + X = Y = Z = 1337; + assert (X == 0); + assert (Y == 0); + assert (Z == 0); + + #pragma omp target exit data map(from: X) map(release: Y) + assert (X == 0); + assert (Y == 0); + + #pragma omp target exit data map(release: Y) map(delete: Z) + assert (Y == 0); + assert (Z == 0); + } + assert (X == 1337); + assert (Y == 0); + assert (Z == 0); + + #pragma omp target map(from: X) + X = 2448; + assert (X == 2448); + assert (Y == 0); + assert (Z == 0); + + X = 4896; + } + assert (X == 4896); + assert (Y == 0); + assert (Z == 0); +} + +int main () +{ + int *X = malloc (N * sizeof (int)); + int *Y = malloc (N * sizeof (int)); + X[10] = 10; + Y[20] = 20; + enter_data (X); + + exit_data_0 (D); /* This should have no effect on D. */ + + #pragma omp target map(alloc: var1, var2, X[:N]) map(to: Y[:N]) map(always from: sum) + { + var1 += X[10]; + var2 += Y[20]; + sum = var1 + var2; + D[sum]++; + } + + assert (var1 == 1); + assert (var2 == 2); + assert (sum == 33); + + exit_data_1 (); + assert (var1 == 11); + assert (var2 == 2); + + /* Increase refcount of already mapped X[0:N]. */ + #pragma omp target enter data map(alloc: X[16:1]) + + exit_data_2 (X); + assert (var2 == 22); + + exit_data_3 (X + 5); /* Unmap X[0:N]. */ + + free (X); + free (Y); + + test_nested (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-21.c b/libgomp/testsuite/libgomp.c/target-21.c new file mode 100644 index 00000000000..41498cf2148 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-21.c @@ -0,0 +1,79 @@ +extern void abort (void); +union U { int x; long long y; }; +struct T { int a; union U b; int c; }; +struct S { int s; int u; struct T v; int x[10]; union U w; int y[10]; int z[10]; }; +volatile int z; + +int +main () +{ + struct S s; + s.s = 0; + s.u = 1; + s.v.a = 2; + s.v.b.y = 3LL; + s.v.c = 19; + s.w.x = 4; + s.x[0] = 7; + s.x[1] = 8; + s.y[3] = 9; + s.y[4] = 10; + s.y[5] = 11; + int err = 0; + #pragma omp target map (to:s.v.b, s.u, s.x[0:z + 2]) \ + map (tofrom:s.y[3:3]) \ + map (from: s.w, s.z[z + 1:z + 3], err) + { + err = 0; + if (s.u != 1 || s.v.b.y != 3LL || s.x[0] != 7 || s.x[1] != 8 + || s.y[3] != 9 || s.y[4] != 10 || s.y[5] != 11) + err = 1; + s.w.x = 6; + s.y[3] = 12; + s.y[4] = 13; + s.y[5] = 14; + s.z[1] = 15; + s.z[2] = 16; + s.z[3] = 17; + } + if (err || s.w.x != 6 || s.y[3] != 12 || s.y[4] != 13 || s.y[5] != 14 + || s.z[1] != 15 || s.z[2] != 16 || s.z[3] != 17) + abort (); + s.u++; + s.v.a++; + s.v.b.y++; + s.w.x++; + s.x[1] = 18; + s.z[0] = 19; + #pragma omp target data map (tofrom: s) + #pragma omp target map (always to: s.w, s.x[1], err) map (alloc:s.u, s.v.b, s.z[z:z + 1]) + { + err = 0; + if (s.u != 2 || s.v.b.y != 4LL || s.w.x != 7 || s.x[1] != 18 || s.z[0] != 19) + err = 1; + s.w.x = 8; + s.x[1] = 20; + s.z[0] = 21; + } + if (err || s.w.x != 8 || s.x[1] != 20 || s.z[0] != 21) + abort (); + s.u++; + s.v.a++; + s.v.b.y++; + s.w.x++; + s.x[0] = 22; + s.x[1] = 23; + #pragma omp target data map (from: s.w, s.x[0:2]) map (to: s.v.b, s.u) + #pragma omp target map (always to: s.w, s.x[0:2], err) map (alloc:s.u, s.v.b) + { + err = 0; + if (s.u != 3 || s.v.b.y != 5LL || s.w.x != 9 || s.x[0] != 22 || s.x[1] != 23) + err = 1; + s.w.x = 11; + s.x[0] = 24; + s.x[1] = 25; + } + if (err || s.w.x != 11 || s.x[0] != 24 || s.x[1] != 25) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-22.c b/libgomp/testsuite/libgomp.c/target-22.c new file mode 100644 index 00000000000..aad8a0a09df --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-22.c @@ -0,0 +1,51 @@ +extern void abort (void); +struct T { int a; int *b; int c; }; +struct S { int *s; char *u; struct T v; short *w; }; +volatile int z; + +int +main () +{ + struct S s; + int a[32], i; + char b[32]; + short c[32]; + for (i = 0; i < 32; i++) + { + a[i] = i; + b[i] = 32 + i; + c[i] = 64 + i; + } + s.s = a; + s.u = b + 2; + s.v.b = a + 16; + s.w = c + 3; + int err = 0; + #pragma omp target map (to:s.v.b[0:z + 7], s.u[z + 1:z + 4]) \ + map (tofrom:s.s[3:3]) \ + map (from: s.w[z:4], err) private (i) + { + err = 0; + for (i = 0; i < 7; i++) + if (s.v.b[i] != 16 + i) + err = 1; + for (i = 1; i < 5; i++) + if (s.u[i] != 34 + i) + err = 1; + for (i = 3; i < 6; i++) + if (s.s[i] != i) + err = 1; + else + s.s[i] = 128 + i; + for (i = 0; i < 4; i++) + s.w[i] = 96 + i; + } + if (err) + abort (); + for (i = 0; i < 32; i++) + if (a[i] != ((i >= 3 && i < 6) ? 128 + i : i) + || b[i] != 32 + i + || c[i] != ((i >= 3 && i < 7) ? 93 + i : 64 + i)) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-23.c b/libgomp/testsuite/libgomp.c/target-23.c new file mode 100644 index 00000000000..fb1532a07b2 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-23.c @@ -0,0 +1,48 @@ +extern void abort (void); +struct S { int s; int *u; int v[5]; }; +volatile int z; + +int +main () +{ + int u[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, err = 0; + struct S s = { 9, u + 3, { 10, 11, 12, 13, 14 } }; + int *v = u + 4; + #pragma omp target enter data map (to: s.s, s.u[0:5]) map (alloc: s.v[1:3]) + s.s++; + u[3]++; + s.v[1]++; + #pragma omp target update to (s.s) to (s.u[0:2], s.v[1:3]) + #pragma omp target map (alloc: s.s, s.v[1:3]) map (from: err) + { + err = 0; + if (s.s != 10 || s.v[1] != 12 || s.v[2] != 12 || s.v[3] != 13) + err = 1; + if (v[-1] != 4 || v[0] != 4 || v[1] != 5 || v[2] != 6 || v[3] != 7) + err = 1; + s.s++; + s.v[2] += 2; + v[-1] = 5; + v[3] = 9; + } + if (err) + abort (); + #pragma omp target map (alloc: s.u[0:5]) + { + err = 0; + if (s.u[0] != 5 || s.u[1] != 4 || s.u[2] != 5 || s.u[3] != 6 || s.u[4] != 9) + err = 1; + s.u[1] = 12; + } + #pragma omp target update from (s.s, s.u[0:5]) from (s.v[1:3]) + if (err || s.s != 11 || u[0] != 0 || u[1] != 1 || u[2] != 2 || u[3] != 5 + || u[4] != 12 || u[5] != 5 || u[6] != 6 || u[7] != 9 || u[8] != 8 + || u[9] != 9 || s.v[0] != 10 || s.v[1] != 12 || s.v[2] != 14 + || s.v[3] != 13 || s.v[4] != 14) + abort (); + #pragma omp target exit data map (release: s.s) + #pragma omp target exit data map (release: s.u[0:5]) + #pragma omp target exit data map (delete: s.v[1:3]) + #pragma omp target exit data map (release: s.s) + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-24.c b/libgomp/testsuite/libgomp.c/target-24.c new file mode 100644 index 00000000000..e0ff29aaee8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-24.c @@ -0,0 +1,43 @@ +#include <omp.h> +#include <stdlib.h> + +int +main () +{ + int d = omp_get_default_device (); + int id = omp_get_initial_device (); + + if (d < 0 || d >= omp_get_num_devices ()) + d = id; + + int a[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; + int *b = a; + int shared_mem = 0; + #pragma omp target map (alloc: shared_mem) + shared_mem = 1; + if (omp_target_is_present (b, d) != shared_mem) + abort (); + #pragma omp target enter data map (to: a) + if (omp_target_is_present (b, d) == 0) + abort (); + #pragma omp target enter data map (alloc: b[:0]) + if (omp_target_is_present (b, d) == 0) + abort (); + #pragma omp target exit data map (release: b[:0]) + if (omp_target_is_present (b, d) == 0) + abort (); + #pragma omp target exit data map (release: b[:0]) + if (omp_target_is_present (b, d) != shared_mem) + abort (); + #pragma omp target enter data map (to: a) + if (omp_target_is_present (b, d) == 0) + abort (); + #pragma omp target enter data map (always, to: b[:0]) + if (omp_target_is_present (b, d) == 0) + abort (); + #pragma omp target exit data map (delete: b[:0]) + if (omp_target_is_present (b, d) != shared_mem) + abort (); + #pragma omp target exit data map (from: b[:0]) + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-25.c b/libgomp/testsuite/libgomp.c/target-25.c new file mode 100644 index 00000000000..aeb19aee510 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-25.c @@ -0,0 +1,84 @@ +#include <stdlib.h> +#include <unistd.h> + +int +main () +{ + int x = 0, y = 0, z = 0, s = 11, t = 12, u = 13, w = 7, err; + #pragma omp parallel + #pragma omp single + { + #pragma omp task depend(in: x) + { + usleep (5000); + x = 1; + } + #pragma omp task depend(in: x) + { + usleep (6000); + y = 2; + } + #pragma omp task depend(out: z) + { + usleep (7000); + z = 3; + } + #pragma omp target map(tofrom: x) map(from: err) firstprivate (y) depend(inout: x, z) + err = (x != 1 || y != 2 || z != 3); + if (err) + abort (); + #pragma omp task depend(in: x) + { + usleep (5000); + x = 4; + } + #pragma omp task depend(in: x) + { + usleep (4000); + y = 5; + } + #pragma omp task depend(in: z) + { + usleep (3000); + z = 6; + } + #pragma omp target enter data nowait map (to: w) + #pragma omp target enter data depend (inout: x, z) map (to: x, y, z) + #pragma omp target map (alloc: x, y, z) map(from: err) + { + err = (x != 4 || y != 5 || z != 6); + x = 7; + y = 8; + z = 9; + } + if (err) + abort (); + #pragma omp taskwait + #pragma omp target map (alloc: w) map(from: err) + { + err = w != 7; + w = 17; + } + if (err) + abort (); + #pragma omp task depend(in: x) + { + usleep (2000); + s = 14; + } + #pragma omp task depend(in: x) + { + usleep (3000); + t = 15; + } + #pragma omp task depend(in: z) + { + usleep (4000); + u = 16; + } + #pragma omp target exit data depend (inout: x, z) map (from: x, y, z, w) + if (x != 7 || y != 8 || z != 9 || s != 14 || t != 15 || u != 16 || w != 17) + abort (); + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-26.c b/libgomp/testsuite/libgomp.c/target-26.c new file mode 100644 index 00000000000..fa6b52598da --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-26.c @@ -0,0 +1,36 @@ +extern void abort (void); +#pragma omp declare target +int a[4] = { 2, 3, 4, 5 }, *b; +#pragma omp end declare target + +int +main () +{ + int err; + int c[3] = { 6, 7, 8 }; + b = c; + #pragma omp target map(to: a[0:2], b[0:2]) map(from: err) + err = a[0] != 2 || a[1] != 3 || a[2] != 4 || a[3] != 5 || b[0] != 6 || b[1] != 7; + if (err) + abort (); + a[1] = 9; + a[2] = 10; + #pragma omp target map(always,to:a[1:2]) map(from: err) + err = a[0] != 2 || a[1] != 9 || a[2] != 10 || a[3] != 5; + if (err) + abort (); + #pragma omp parallel firstprivate(a, b, c, err) num_threads (2) + #pragma omp single + { + b = c + 1; + a[0] = 11; + a[2] = 13; + c[1] = 14; + int d = 0; + #pragma omp target map(to: a[0:3], b[d:2]) map (from: err) + err = a[0] != 11 || a[1] != 9 || a[2] != 13 || b[0] != 14 || b[1] != 8; + if (err) + abort (); + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-27.c b/libgomp/testsuite/libgomp.c/target-27.c new file mode 100644 index 00000000000..c86651b02e3 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-27.c @@ -0,0 +1,67 @@ +#include <stdlib.h> +#include <unistd.h> + +int +main () +{ + int x = 0, y = 0, z = 0, err; + int shared_mem = 0; + #pragma omp target map(to: shared_mem) + shared_mem = 1; + #pragma omp parallel + #pragma omp single + { + #pragma omp task depend(in: x) + { + usleep (5000); + x = 1; + } + #pragma omp task depend(in: x) + { + usleep (6000); + y = 2; + } + #pragma omp task depend(out: z) + { + usleep (7000); + z = 3; + } + #pragma omp target enter data map(to: x, y, z) depend(inout: x, z) nowait + #pragma omp task depend(inout: x, z) + { + x++; y++; z++; + } + #pragma omp target update to(x, y) depend(inout: x) nowait + #pragma omp target enter data map(always, to: z) depend(inout: z) nowait + #pragma omp target map (alloc: x, y, z) map (from: err) depend(inout: x, z) + { + err = x != 2 || y != 3 || z != 4; + x = 5; y = 6; z = 7; + } + #pragma omp task depend(in: x) + { + usleep (5000); + if (!shared_mem) + x = 1; + } + #pragma omp task depend(in: x) + { + usleep (6000); + if (!shared_mem) + y = 2; + } + #pragma omp task depend(out: z) + { + usleep (3000); + if (!shared_mem) + z = 3; + } + #pragma omp target exit data map(release: z) depend(inout: z) nowait + #pragma omp target exit data map(from: x, y) depend(inout: x) nowait + #pragma omp target exit data map(from: z) depend(inout: z) nowait + #pragma omp taskwait + if (err || x != 5 || y != 6 || z != 7) + abort (); + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-7.c b/libgomp/testsuite/libgomp.c/target-7.c index 0fe6150283d..41a1332bb0c 100644 --- a/libgomp/testsuite/libgomp.c/target-7.c +++ b/libgomp/testsuite/libgomp.c/target-7.c @@ -37,63 +37,63 @@ foo (int f) abort (); #pragma omp target data device (d) map (to: h) { - #pragma omp target device (d) + #pragma omp target device (d) map (h) if (omp_get_level () != 0 || (f && !omp_is_initial_device ()) || h++ != 5) abort (); #pragma omp target update device (d) from (h) } #pragma omp target data if (v > 1) map (to: h) { - #pragma omp target if (v > 1) + #pragma omp target if (v > 1) map(h) if (omp_get_level () != 0 || !omp_is_initial_device () || h++ != 6) abort (); #pragma omp target update if (v > 1) from (h) } #pragma omp target data device (d) if (v > 1) map (to: h) { - #pragma omp target device (d) if (v > 1) + #pragma omp target device (d) if (v > 1) map(h) if (omp_get_level () != 0 || !omp_is_initial_device () || h++ != 7) abort (); #pragma omp target update device (d) if (v > 1) from (h) } #pragma omp target data if (v <= 1) map (to: h) { - #pragma omp target if (v <= 1) + #pragma omp target if (v <= 1) map (tofrom: h) if (omp_get_level () != 0 || h++ != 8) abort (); #pragma omp target update if (v <= 1) from (h) } #pragma omp target data device (d) if (v <= 1) map (to: h) { - #pragma omp target device (d) if (v <= 1) + #pragma omp target device (d) if (v <= 1) map (h) if (omp_get_level () != 0 || (f && !omp_is_initial_device ()) || h++ != 9) abort (); #pragma omp target update device (d) if (v <= 1) from (h) } #pragma omp target data if (0) map (to: h) { - #pragma omp target if (0) + #pragma omp target if (0) map (h) if (omp_get_level () != 0 || !omp_is_initial_device () || h++ != 10) abort (); #pragma omp target update if (0) from (h) } #pragma omp target data device (d) if (0) map (to: h) { - #pragma omp target device (d) if (0) + #pragma omp target device (d) if (0) map (h) if (omp_get_level () != 0 || !omp_is_initial_device () || h++ != 11) abort (); #pragma omp target update device (d) if (0) from (h) } #pragma omp target data if (1) map (to: h) { - #pragma omp target if (1) + #pragma omp target if (1) map (tofrom: h) if (omp_get_level () != 0 || h++ != 12) abort (); #pragma omp target update if (1) from (h) } #pragma omp target data device (d) if (1) map (to: h) { - #pragma omp target device (d) if (1) + #pragma omp target device (d) if (1) map (tofrom: h) if (omp_get_level () != 0 || (f && !omp_is_initial_device ()) || h++ != 13) abort (); #pragma omp target update device (d) if (1) from (h) diff --git a/libgomp/testsuite/libgomp.c/taskloop-1.c b/libgomp/testsuite/libgomp.c/taskloop-1.c new file mode 100644 index 00000000000..21551f2950c --- /dev/null +++ b/libgomp/testsuite/libgomp.c/taskloop-1.c @@ -0,0 +1,46 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp -std=c99" } */ + +int q, r, e; + +__attribute__((noinline, noclone)) void +foo (long a, long b) +{ + #pragma omp taskloop lastprivate (q) nogroup + for (long d = a; d < b; d += 2) + { + q = d; + if (d < 2 || d > 6 || (d & 1)) + #pragma omp atomic + e |= 1; + } +} + +__attribute__((noinline, noclone)) int +bar (int a, int b) +{ + int q = 7; + #pragma omp taskloop lastprivate (q) + for (int d = a; d < b; d++) + { + if (d < 12 || d > 17) + #pragma omp atomic + e |= 1; + q = d; + } + return q; +} + +int +main () +{ + #pragma omp parallel + #pragma omp single + { + foo (2, 7); + r = bar (12, 18); + } + if (q != 6 || r != 17 || e) + __builtin_abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/taskloop-2.c b/libgomp/testsuite/libgomp.c/taskloop-2.c new file mode 100644 index 00000000000..be893ebf80a --- /dev/null +++ b/libgomp/testsuite/libgomp.c/taskloop-2.c @@ -0,0 +1,147 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -std=c99" } */ +/* { dg-additional-options "-msse2" { target sse2_runtime } } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +int u[1024], v[1024], w[1024], m; + +__attribute__((noinline, noclone)) void +f1 (long a, long b) +{ + #pragma omp taskloop simd default(none) shared(u, v, w) nogroup + for (long d = a; d < b; d++) + u[d] = v[d] + w[d]; +} + +__attribute__((noinline, noclone)) int +f2 (long a, long b, long c) +{ + int d, e; + #pragma omp taskloop simd default(none) shared(u, v, w) linear(d:1) linear(c:5) lastprivate(e) + for (d = a; d < b; d++) + { + u[d] = v[d] + w[d]; + c = c + 5; + e = c + 9; + } + return d + c + e; +} + +__attribute__((noinline, noclone)) int +f3 (long a, long b) +{ + int d; + #pragma omp taskloop simd default(none) shared(u, v, w) + for (d = a; d < b; d++) + { + int *p = &d; + u[d] = v[d] + w[d]; + } + return d; +} + +__attribute__((noinline, noclone)) int +f4 (long a, long b, long c, long d) +{ + int e, f, g; + #pragma omp taskloop simd default(none) shared(u, v, w) collapse(2) lastprivate(g) + for (e = a; e < b; e++) + for (f = c; f < d; f++) + { + int *p = &e; + int *q = &f; + int r = 32 * e + f; + u[r] = v[r] + w[r]; + g = r; + } + return e + f + g; +} + +__attribute__((noinline, noclone)) int +f5 (long a, long b, long c, long d) +{ + int e, f; + #pragma omp taskloop simd default(none) shared(u, v, w) collapse(2) + for (e = a; e < b; e++) + for (f = c; f < d; f++) + { + int r = 32 * e + f; + u[r] = v[r] + w[r]; + } + return e + f; +} + +int +main () +{ + int i; + for (i = 0; i < 1024; i++) + { + v[i] = i; + w[i] = i + 1; + } + #pragma omp parallel + #pragma omp single + f1 (0, 1024); + for (i = 0; i < 1024; i++) + if (u[i] != 2 * i + 1) + __builtin_abort (); + else + { + v[i] = 1024 - i; + w[i] = 512 - i; + } + #pragma omp parallel + #pragma omp single + m = f2 (2, 1022, 17); + for (i = 0; i < 1024; i++) + if ((i < 2 || i >= 1022) ? u[i] != 2 * i + 1 : u[i] != 1536 - 2 * i) + __builtin_abort (); + else + { + v[i] = i; + w[i] = i + 1; + } + if (m != 1022 + 2 * (1020 * 5 + 17) + 9) + __builtin_abort (); + #pragma omp parallel + #pragma omp single + m = f3 (0, 1024); + for (i = 0; i < 1024; i++) + if (u[i] != 2 * i + 1) + __builtin_abort (); + else + { + v[i] = 1024 - i; + w[i] = 512 - i; + } + if (m != 1024) + __builtin_abort (); + #pragma omp parallel + #pragma omp single + m = f4 (0, 32, 0, 32); + for (i = 0; i < 1024; i++) + if (u[i] != 1536 - 2 * i) + __builtin_abort (); + else + { + v[i] = i; + w[i] = i + 1; + } + if (m != 32 + 32 + 1023) + __builtin_abort (); + #pragma omp parallel + #pragma omp single + m = f5 (0, 32, 0, 32); + for (i = 0; i < 1024; i++) + if (u[i] != 2 * i + 1) + __builtin_abort (); + else + { + v[i] = 1024 - i; + w[i] = 512 - i; + } + if (m != 32 + 32) + __builtin_abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/taskloop-3.c b/libgomp/testsuite/libgomp.c/taskloop-3.c new file mode 100644 index 00000000000..5356d7f0251 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/taskloop-3.c @@ -0,0 +1,84 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp -std=c99" } */ + +int g; +int a[1024]; + +__attribute__((noinline, noclone)) int +f1 (int x) +{ + #pragma omp taskloop firstprivate (x) lastprivate (x) + for (int i = 0; i < 64; i++) + { + if (x != 74) + __builtin_abort (); + if (i == 63) + x = i + 4; + } + return x; +} + +__attribute__((noinline, noclone)) void +f2 (void) +{ + #pragma omp taskloop firstprivate (g) lastprivate (g) nogroup + for (int i = 0; i < 64; i++) + { + if (g != 77) + __builtin_abort (); + if (i == 63) + g = i + 9; + } +} + +__attribute__((noinline, noclone)) long long +f3 (long long a, long long b, long long c) +{ + long long i; + int l; + #pragma omp taskloop default (none) lastprivate (i, l) + for (i = a; i < b; i += c) + l = i; + return l * 7 + i; +} + +__attribute__((noinline, noclone)) long long +f4 (long long a, long long b, long long c, long long d, + long long e, long long f, int k) +{ + long long i, j; + int l; + #pragma omp taskloop default (none) collapse(2) \ + firstprivate (k) lastprivate (i, j, k, l) + for (i = a; i < b; i += e) + for (j = c; j < d; j += f) + { + if (k != 73) + __builtin_abort (); + if (i == 31 && j == 46) + k = i; + l = j; + } + return i + 5 * j + 11 * k + 17 * l; +} + +int +main () +{ + #pragma omp parallel + #pragma omp single + { + if (f1 (74) != 63 + 4) + __builtin_abort (); + g = 77; + f2 (); + #pragma omp taskwait + if (g != 63 + 9) + __builtin_abort (); + if (f3 (7, 12, 2) != 11 * 7 + 13) + __builtin_abort (); + if (f4 (0, 32, 16, 48, 1, 2, 73) != 32 + 5 * 48 + 11 * 31 + 17 * 46) + __builtin_abort (); + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/taskloop-4.c b/libgomp/testsuite/libgomp.c/taskloop-4.c new file mode 100644 index 00000000000..a69be19c9c2 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/taskloop-4.c @@ -0,0 +1,97 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp" } */ + +int u[64], v; + +__attribute__((noinline, noclone)) int +test (int a, int b, int c, int d, void (*fn) (int, int, int, int), + int *num_tasks, int *min_iters, int *max_iters) +{ + int i, t = 0; + __builtin_memset (u, 0, sizeof u); + v = 0; + fn (a, b, c, d); + *min_iters = 0; + *max_iters = 0; + *num_tasks = v; + if (v) + { + *min_iters = u[0]; + *max_iters = u[0]; + t = u[0]; + for (i = 1; i < v; i++) + { + if (*min_iters > u[i]) + *min_iters = u[i]; + if (*max_iters < u[i]) + *max_iters = u[i]; + t += u[i]; + } + } + return t; +} + +void +grainsize (int a, int b, int c, int d) +{ + int i, j = 0, k = 0; + #pragma omp taskloop firstprivate (j, k) grainsize(d) + for (i = a; i < b; i += c) + { + if (j == 0) + { + #pragma omp atomic capture + k = v++; + if (k >= 64) + __builtin_abort (); + } + u[k] = ++j; + } +} + +void +num_tasks (int a, int b, int c, int d) +{ + int i, j = 0, k = 0; + #pragma omp taskloop firstprivate (j, k) num_tasks(d) + for (i = a; i < b; i += c) + { + if (j == 0) + { + #pragma omp atomic capture + k = v++; + if (k >= 64) + __builtin_abort (); + } + u[k] = ++j; + } +} + +int +main () +{ + #pragma omp parallel + #pragma omp single + { + int min_iters, max_iters, ntasks; + /* If grainsize is present, # of task loop iters is >= grainsize && < 2 * grainsize, + unless # of loop iterations is smaller than grainsize. */ + if (test (0, 79, 1, 17, grainsize, &ntasks, &min_iters, &max_iters) != 79 + || min_iters < 17 || max_iters >= 17 * 2) + __builtin_abort (); + if (test (-49, 2541, 7, 28, grainsize, &ntasks, &min_iters, &max_iters) != 370 + || min_iters < 28 || max_iters >= 28 * 2) + __builtin_abort (); + if (test (7, 21, 2, 15, grainsize, &ntasks, &min_iters, &max_iters) != 7 + || ntasks != 1 || min_iters != 7 || max_iters != 7) + __builtin_abort (); + /* If num_tasks is present, # of task loop iters is min (# of loop iters, num_tasks). */ + if (test (-51, 2500, 48, 9, num_tasks, &ntasks, &min_iters, &max_iters) != 54 + || ntasks != 9) + __builtin_abort (); + if (test (0, 25, 2, 17, num_tasks, &ntasks, &min_iters, &max_iters) != 13 + || ntasks != 13) + __builtin_abort (); + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.fortran/affinity1.f90 b/libgomp/testsuite/libgomp.fortran/affinity1.f90 new file mode 100644 index 00000000000..26b5185ba3c --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/affinity1.f90 @@ -0,0 +1,49 @@ +! { dg-do run } +! { dg-skip-if "" { ! run_expensive_tests } { "*" } { "-O2" } } +! { dg-set-target-env-var OMP_PROC_BIND "spread,close" } +! { dg-set-target-env-var OMP_PLACES "{6,7}:4:-2,!{2,3}" } +! { dg-set-target-env-var OMP_NUM_THREADS "2" } + + use omp_lib + integer :: num, i, nump + num = omp_get_num_places () + print *, 'omp_get_num_places () == ', num + do i = 0, num - 1 + nump = omp_get_place_num_procs (place_num = i) + if (nump .eq. 0) then + print *, 'place ', i, ' {}' + else + call print_place (i, nump) + end if + end do + call print_place_var + call omp_set_nested (nested = .true.) + !$omp parallel + if (omp_get_thread_num () == omp_get_num_threads () - 1) then + !$omp parallel + if (omp_get_thread_num () == omp_get_num_threads () - 1) & + call print_place_var + !$omp end parallel + end if + !$omp end parallel +contains + subroutine print_place (i, nump) + integer, intent (in) :: i, nump + integer :: ids(nump) + call omp_get_place_proc_ids (place_num = i, ids = ids) + print *, 'place ', i, ' {', ids, '}' + end subroutine + subroutine print_place_var + integer :: place, num_places + place = omp_get_place_num () + num_places = omp_get_partition_num_places () + print *, 'place ', place + if (num_places .gt. 0) call print_partition (num_places) + end subroutine + subroutine print_partition (num_places) + integer, intent (in) :: num_places + integer :: place_nums(num_places) + call omp_get_partition_place_nums (place_nums = place_nums) + print *, 'partition ', place_nums(1), '-', place_nums(num_places) + end subroutine +end diff --git a/libgomp/testsuite/libgomp.fortran/affinity2.f90 b/libgomp/testsuite/libgomp.fortran/affinity2.f90 new file mode 100644 index 00000000000..338f0e8bb93 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/affinity2.f90 @@ -0,0 +1,8 @@ +! { dg-do run } +! { dg-additional-options "-fdefault-integer-8" } +! { dg-skip-if "" { ! run_expensive_tests } { "*" } { "-O2" } } +! { dg-set-target-env-var OMP_PROC_BIND "spread,close" } +! { dg-set-target-env-var OMP_PLACES "{6,7}:4:-2,!{2,3}" } +! { dg-set-target-env-var OMP_NUM_THREADS "2" } + +include 'affinity1.f90' |