summaryrefslogtreecommitdiff
path: root/libgomp
diff options
context:
space:
mode:
authorjakub <jakub@138bc75d-0d04-0410-961f-82ee72b054a4>2015-10-13 19:06:23 +0000
committerjakub <jakub@138bc75d-0d04-0410-961f-82ee72b054a4>2015-10-13 19:06:23 +0000
commit43895be5773360d358760e215f3d7b6a09267bfd (patch)
treee8508f7b6cd5600095f6c36ccd08e6440d82340c /libgomp
parente2c514f0507fb1864c4eed5d691e47156be57b5b (diff)
downloadgcc-43895be5773360d358760e215f3d7b6a09267bfd.tar.gz
gcc/
2015-10-13 Jakub Jelinek <jakub@redhat.com> Aldy Hernandez <aldyh@redhat.com> Ilya Verbin <ilya.verbin@intel.com> * builtin-types.def (BT_FN_BOOL_UINT_LONGPTR_LONGPTR_LONGPTR, BT_FN_BOOL_UINT_ULLPTR_ULLPTR_ULLPTR, BT_FN_BOOL_UINT_LONGPTR_LONG_LONGPTR_LONGPTR, BT_FN_BOOL_UINT_ULLPTR_ULL_ULLPTR_ULLPTR, BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_UINT_PTR, BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR, BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT, BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_UINT_LONG_INT_LONG_LONG_LONG, BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_UINT_LONG_INT_ULL_ULL_ULL, BT_FN_VOID_LONG_VAR, BT_FN_VOID_ULL_VAR): New. (BT_FN_VOID_INT_PTR_SIZE_PTR_PTR_PTR, BT_FN_VOID_INT_OMPFN_PTR_SIZE_PTR_PTR_PTR, BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR): Remove. * cgraph.h (enum cgraph_simd_clone_arg_type): Add SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP, SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP and SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP. (struct cgraph_simd_clone_arg): Adjust comment. * coretypes.h (struct gomp_ordered): New forward decl. * gimple.c (gimple_build_omp_critical): Add CLAUSES argument, set critical clauses to it. (gimple_build_omp_ordered): Return gomp_ordered * instead of gimple *. Add CLAUSES argument, set ordered clauses to it. (gimple_copy): Unshare clauses on GIMPLE_OMP_CRITICAL and GIMPLE_OMP_ORDERED. * gimple.def (GIMPLE_OMP_ORDERED): Change from GSS_OMP to GSS_OMP_SINGLE_LAYOUT, move it after GIMPLE_OMP_TEAMS. * gimple.h (enum gf_mask): Add GF_OMP_TASK_TASKLOOP. Add another bit to GF_OMP_FOR_KIND_MASK mask. Add GF_OMP_FOR_KIND_TASKLOOP, renumber GF_OMP_FOR_KIND_CILKFOR and GF_OMP_FOR_KIND_OACC_LOOP. Adjust GF_OMP_FOR_SIMD, GF_OMP_FOR_COMBINED and GF_OMP_FOR_COMBINED_INTO. Add another bit to GF_OMP_TARGET_KIND_MASK mask. Add GF_OMP_TARGET_KIND_ENTER_DATA and GF_OMP_TARGET_KIND_EXIT_DATA, renumber GF_OMP_TARGET_KIND_OACC_{PARALLEL,KERNELS,DATA,UPDATE,ENTER_EXIT_DATA}. (gomp_critical): Add clauses field. (gomp_ordered): New struct. (is_a_helper <gomp_ordered *>::test): New inline. (gimple_build_omp_critical): Add CLAUSES argument. (gimple_build_omp_ordered): Likewise. Return gomp_ordered * instead of gimple *. (gimple_omp_critical_clauses, gimple_omp_critical_clauses_ptr, gimple_omp_critical_set_clauses, gimple_omp_ordered_clauses, gimple_omp_ordered_clauses_ptr, gimple_omp_ordered_set_clauses, gimple_omp_task_taskloop_p, gimple_omp_task_set_taskloop_p): New inline functions. * gimple-pretty-print.c (dump_gimple_omp_for): Handle taskloop. (dump_gimple_omp_target): Handle enter data and exit data. (dump_gimple_omp_block): Don't handle GIMPLE_OMP_ORDERED here. (dump_gimple_omp_critical): Print clauses. (dump_gimple_omp_ordered): New function. (dump_gimple_omp_task): Handle taskloop. (pp_gimple_stmt_1): Use dump_gimple_omp_ordered for GIMPLE_OMP_ORDERED. * gimple-walk.c (walk_gimple_op): Walk clauses on GIMPLE_OMP_CRITICAL and GIMPLE_OMP_ORDERED. * gimplify.c (enum gimplify_omp_var_data): Add GOVD_MAP_0LEN_ARRAY. (enum omp_region_type): Add ORT_COMBINED_TARGET and ORT_NONE. (struct gimplify_omp_ctx): Add loop_iter_var, target_map_scalars_firstprivate, target_map_pointers_as_0len_arrays and target_firstprivatize_array_bases fields. (delete_omp_context): Release loop_iter_var. (gimplify_bind_expr): Handle ORT_NONE. (maybe_fold_stmt): Adjust check for ORT_TARGET for the addition of ORT_COMBINED_TARGET. (is_gimple_stmt): Return true for OMP_TASKLOOP, OMP_TEAMS and OMP_TARGET{,_DATA,_UPDATE,_ENTER_DATA,_EXIT_DATA}. (omp_firstprivatize_variable): Handle ORT_NONE. Adjust check for ORT_TARGET for the addition of ORT_COMBINED_TARGET. Handle ctx->target_map_scalars_firstprivate. (omp_add_variable): Handle ORT_NONE. Allow map clause together with data sharing clauses. For data sharing clause with VLA decl on omp target/target data don't add firstprivate for the pointer. Call omp_notice_variable on TYPE_SIZE_UNIT only if it is a DECL_P. (omp_notice_threadprivate_variable): Adjust check for ORT_TARGET for the addition of ORT_COMBINED_TARGET. (omp_notice_variable): Handle ORT_NONE. Adjust check for ORT_TARGET for the addition of ORT_COMBINED_TARGET. Handle implicit mapping of pointers as zero length array sections and ctx->target_map_scalars_firstprivate mapping of scalars as firstprivate data sharing. (omp_check_private): Handle omp_member_access_dummy_var vars. (find_decl_expr): New function. (gimplify_scan_omp_clauses): Add CODE argument. For OMP_CLAUSE_IF complain if OMP_CLAUSE_IF_MODIFIER is present and does not match code. Handle OMP_CLAUSE_GANG separately. Handle OMP_CLAUSE_{PRIORITY,GRAINSIZE,NUM_TASKS,NOGROUP,THREADS,SIMD,SIMDLEN} clauses. Diagnose linear clause on combined distribute {, parallel for} simd construct, unless it is the loop iterator. Handle struct element GOMP_MAP_FIRSTPRIVATE_POINTER. Handle map clauses with COMPONENT_REF. Initialize ctx->target_map_scalars_firstprivate, ctx->target_firstprivatize_array_bases and ctx->target_map_pointers_as_0len_arrays. Add firstprivate for linear clause even to target region if combined. Remove map clauses with GOMP_MAP_FIRSTPRIVATE_POINTER kind from OMP_TARGET_{,ENTER_,EXIT_}DATA. For GOMP_MAP_FIRSTPRIVATE_POINTER map kind with non-INTEGER_CST OMP_CLAUSE_SIZE firstprivatize the bias. Handle OMP_CLAUSE_DEPEND_{SINK,SOURCE}. Handle OMP_CLAUSE_{{USE,IS}_DEVICE_PTR,DEFAULTMAP,HINT}. For linear clause on worksharing loop combined with parallel add shared clause on the parallel. Handle OMP_CLAUSE_REDUCTION with MEM_REF OMP_CLAUSE_DECL. Set DECL_NAME on omp_member_access_dummy_var vars. Add lastprivate clause to outer taskloop if needed. (gimplify_adjust_omp_clauses_1): Handle GOVD_MAP_0LEN_ARRAY. If gimplify_omp_ctxp->target_firstprivatize_array_bases, use GOMP_MAP_FIRSTPRIVATE_POINTER map kind instead of GOMP_MAP_POINTER. (gimplify_adjust_omp_clauses): Add CODE argument. Handle removal of GOMP_MAP_FIRSTPRIVATE_POINTER struct elements for struct not seen in target body. Handle removal of struct mapping if struct is not seen in target body. Remove GOMP_MAP_STRUCT map clause on OMP_TARGET_EXIT_DATA. Adjust check for ORT_TARGET for the addition of ORT_COMBINED_TARGET. Use GOMP_MAP_FIRSTPRIVATE_POINTER instead of GOMP_MAP_POINTER if ctx->target_firstprivatize_array_bases for VLAs. Set OMP_CLAUSE_MAP_PRIVATE if both data sharing and map clause appear together. Handle OMP_CLAUSE_{{USE,IS}_DEVICE_PTR,DEFAULTMAP,HINT}. Don't remove map clause if it has map-type-modifier always. Handle OMP_CLAUSE_{PRIORITY,GRAINSIZE,NUM_TASKS,NOGROUP,THREADS,SIMD,SIMDLEN} clauses. (gimplify_oacc_cache, gimplify_omp_parallel, gimplify_omp_task): Adjust gimplify_scan_omp_clauses and gimplify_adjust_omp_clauses callers. (gimplify_omp_for): Likewise. Handle OMP_TASKLOOP. Initialize loop_iter_var. Use OMP_FOR_ORIG_DECLS. Fix handling of lastprivate iterators in doacross loops. (gimplify_omp_workshare): Adjust gimplify_scan_omp_clauses and gimplify_adjust_omp_clauses callers. Use ORT_COMBINED_TARGET for OMP_TARGET_COMBINED. Adjust check for ORT_TARGET for the addition of ORT_COMBINED_TARGET. (gimplify_omp_target_update): Adjust gimplify_scan_omp_clauses and gimplify_adjust_omp_clauses callers. Handle OMP_TARGET_ENTER_DATA and OMP_TARGET_EXIT_DATA. (gimplify_omp_ordered): New function. (gimplify_expr): Handle OMP_TASKLOOP, OMP_TARGET_ENTER_DATA and OMP_TARGET_EXIT_DATA. Use gimplify_omp_ordered for OMP_ORDERED. Gimplify clauses on OMP_CRITICAL. * internal-fn.c (expand_GOMP_SIMD_ORDERED_START, expand_GOMP_SIMD_ORDERED_END): New functions. * internal-fn.def (GOMP_SIMD_ORDERED_START, GOMP_SIMD_ORDERED_END): New internal functions. * omp-builtins.def (BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START, BUILT_IN_GOMP_LOOP_DOACROSS_DYNAMIC_START, BUILT_IN_GOMP_LOOP_DOACROSS_GUIDED_START, BUILT_IN_GOMP_LOOP_DOACROSS_RUNTIME_START, BUILT_IN_GOMP_LOOP_ULL_DOACROSS_STATIC_START, BUILT_IN_GOMP_LOOP_ULL_DOACROSS_DYNAMIC_START, BUILT_IN_GOMP_LOOP_ULL_DOACROSS_GUIDED_START, BUILT_IN_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START, BUILT_IN_GOMP_DOACROSS_POST, BUILT_IN_GOMP_DOACROSS_WAIT, BUILT_IN_GOMP_DOACROSS_ULL_POST, BUILT_IN_GOMP_DOACROSS_ULL_WAIT, BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA, BUILT_IN_GOMP_TASKLOOP, BUILT_IN_GOMP_TASKLOOP_ULL): New built-ins. (BUILT_IN_GOMP_TASK): Add INT argument to the end. (BUILT_IN_GOMP_TARGET): Rename from GOMP_target to GOMP_target_41, adjust type. (BUILT_IN_GOMP_TARGET_DATA): Rename from GOMP_target_data to GOMP_target_data_41, adjust type. (BUILT_IN_GOMP_TARGET_UPDATE): Rename from GOMP_target_update to GOMP_target_update_41, adjust type. * omp-low.c (struct omp_region): Adjust comments, add ord_stmt field. (struct omp_for_data): Add ordered and simd_schedule fields. (omp_member_access_dummy_var, unshare_and_remap_1, unshare_and_remap, is_taskloop_ctx): New functions. (is_taskreg_ctx): Use is_parallel_ctx and is_task_ctx. (extract_omp_for_data): Handle taskloops and doacross loops and simd schedule modifier. (omp_adjust_chunk_size): New function. (get_ws_args_for): Use it. (lookup_sfield): Change first argument to splay_tree_key, add overload with first argument tree. (maybe_lookup_field): Likewise. (use_pointer_for_field): Handle omp_member_access_dummy_var. (omp_copy_decl_2): If var is TREE_ADDRESSABLE listed in task_shared_vars, clear TREE_ADDRESSABLE on the copy. (build_outer_var_ref): Add LASTPRIVATE argument, handle taskloops and omp_member_access_dummy_var vars. (build_sender_ref): Change first argument to splay_tree_key, add overload with first argument tree. (install_var_field): For mask & 8 use &DECL_UID as key instead of the tree itself. (fixup_child_record_type): Const qualify *.omp_data_i. (scan_sharing_clauses): Handle OMP_CLAUSE_SHARED_FIRSTPRIVATE, C/C++ array reductions, OMP_CLAUSE_{IS,USE}_DEVICE_PTR clauses, OMP_CLAUSE_{PRIORITY,GRAINSIZE,NUM_TASKS,SIMDLEN,THREADS,SIMD} and OMP_CLAUSE_{NOGROUP,DEFAULTMAP} clauses, OMP_CLAUSE__LOOPTEMP_ clause on taskloop, GOMP_MAP_FIRSTPRIVATE_POINTER, OMP_CLAUSE_MAP_PRIVATE. (create_omp_child_function): Set TREE_READONLY on .omp_data_i. (find_combined_for): Allow searching for different GIMPLE_OMP_FOR kinds. (add_taskreg_looptemp_clauses): New function. (scan_omp_parallel): Use it. (scan_omp_task): Likewise. (finish_taskreg_scan): Handle OMP_CLAUSE_SHARED_FIRSTPRIVATE. For taskloop, move fields for the first two _LOOPTEMP_ clauses first. (check_omp_nesting_restrictions): Handle GF_OMP_TARGET_KIND_ENTER_DATA and GF_OMP_TARGET_KIND_EXIT_DATA. Formatting fixes. Allow the sandwiched taskloop constructs. Type check OMP_CLAUSE_DEPEND_{KIND,SOURCE}. Allow ordered simd inside of simd region. Diagnose depend(source) or depend(sink:...) on target constructs or task/taskloop. (handle_simd_reference): Use get_name. (lower_rec_input_clauses): Likewise. Ignore all OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE clauses on taskloop construct. Allow _LOOPTEMP_ clause on GOMP_TASK. Unshare new_var before passing it to omp_clause_{default,copy}_ctor. Handle OMP_CLAUSE_REDUCTION with MEM_REF OMP_CLAUSE_DECL. Set lastprivate_firstprivate flag for linear that needs copyin and copyout. Use BUILT_IN_ALLOCA_WITH_ALIGN instead of BUILT_IN_ALLOCA. (lower_lastprivate_clauses): For OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE on taskloop lookup decl in outer context. Pass true to build_outer_var_ref lastprivate argument. Handle OMP_CLAUSE_LASTPRIVATE_TASKLOOP_IV lastprivate if the decl is global outside of outer taskloop for. (lower_reduction_clauses): Handle OMP_CLAUSE_REDUCTION with MEM_REF OMP_CLAUSE_DECL. (lower_send_clauses): Ignore first two _LOOPTEMP_ clauses in taskloop GOMP_TASK. Handle OMP_CLAUSE_SHARED_FIRSTPRIVATE. Handle omp_member_access_dummy_var vars. Handle OMP_CLAUSE_REDUCTION with MEM_REF OMP_CLAUSE_DECL. Use new lookup_sfield overload. (lower_send_shared_vars): Ignore fields with NULL or FIELD_DECL abstract origin. Handle omp_member_access_dummy_var vars. (expand_parallel_call): Use expand_omp_build_assign. (expand_task_call): Handle taskloop construct expansion. Add REGION argument. Use GOMP_TASK_* defines instead of hardcoded integers. Add priority argument to GOMP_task* calls. Or in GOMP_TASK_FLAG_PRIORITY into flags if priority is present for GOMP_task call. (expand_omp_build_assign): Add prototype. Add AFTER argument, if true emit statements after *GSI_P and continue linking. (expand_omp_taskreg): Adjust expand_task_call caller. (expand_omp_for_init_counts): Rename zero_iter_bb argument to zero_iter1_bb and first_zero_iter to first_zero_iter1, add zero_iter2_bb and first_zero_iter2 arguments, handle computation of counts even for ordered loops. (expand_omp_for_init_vars): Handle GOMP_TASK inner_stmt. (expand_omp_ordered_source, expand_omp_ordered_sink, expand_omp_ordered_source_sink, expand_omp_for_ordered_loops): New functions. (expand_omp_for_generic): Use omp_adjust_chunk_size. Handle linear clauses on worksharing loop. Handle DOACROSS loop expansion. (expand_omp_for_static_nochunk): Handle linear clauses on worksharing loop. Adjust expand_omp_for_init_counts callers. (expand_omp_for_static_chunk): Likewise. Use omp_adjust_chunk_size. (expand_omp_simd): Handle addressable fd->loop.v. Adjust expand_omp_for_init_counts callers. (expand_omp_taskloop_for_outer, expand_omp_taskloop_for_inner): New functions. (expand_omp_for): Call expand_omp_taskloop_for_* for taskloop. Handle doacross loops. (expand_omp_target): Handle GF_OMP_TARGET_KIND_ENTER_DATA and GF_OMP_TARGET_KIND_EXIT_DATA. Pass flags and depend arguments to GOMP_target_{41,update_41,enter_exit_data} libcalls. (expand_omp): Don't expand ordered depend constructs here, record ord_stmt instead for later expand_omp_for_generic. (build_omp_regions_1): Handle GF_OMP_TARGET_KIND_ENTER_DATA and GF_OMP_TARGET_KIND_EXIT_DATA. Treat GIMPLE_OMP_ORDERED with depend clause as stand-alone directive. (lower_omp_ordered_clauses): New function. (lower_omp_ordered): Handle OMP_CLAUSE_SIMD, for OMP_CLAUSE_DEPEND don't lower anything. (lower_omp_for_lastprivate): Use last _looptemp_ clause on taskloop for comparison. (lower_omp_for): Handle taskloop constructs. Adjust OMP_CLAUSE_DECL and OMP_CLAUSE_LINEAR_STEP so that expand_omp_for_* can use it during expansion for linear adjustments. (create_task_copyfn): Handle OMP_CLAUSE_SHARED_FIRSTPRIVATE. (lower_depend_clauses): Assert not seeing sink/source depend kinds. Set TREE_ADDRESSABLE on array. Change first argument from gimple * to tree * pointing to the stmt's clauses. (lower_omp_taskreg): Adjust lower_depend_clauses caller. (lower_omp_target): Handle GF_OMP_TARGET_KIND_ENTER_DATA and GF_OMP_TARGET_KIND_EXIT_DATA, depend clauses, GOMP_MAP_{RELEASE,ALWAYS_{TO,FROM,TOFROM},FIRSTPRIVATE_POINTER,STRUCT} map kinds, OMP_CLAUSE_{FIRSTPRIVATE,PRIVATE,{IS,USE}_DEVICE_PTR clauses. Always use short kind and 8-bit align shift. (lower_omp_regimplify_p): Use IS_TYPE_OR_DECL_P macro. (struct lower_omp_regimplify_operands_data): New type. (lower_omp_regimplify_operands_p, lower_omp_regimplify_operands): New functions. (lower_omp_1): Use lower_omp_regimplify_operands instead of gimple_regimplify_operands. (make_gimple_omp_edges): Handle GF_OMP_TARGET_KIND_ENTER_DATA and GF_OMP_TARGET_KIND_EXIT_DATA. Treat GIMPLE_OMP_ORDERED with depend clause as stand-alone directive. (simd_clone_clauses_extract): Honor OMP_CLAUSE_LINEAR_KIND. (simd_clone_mangle): Mangle the various linear kinds per the new ABI. (simd_clone_adjust_argument_types): Handle SIMD_CLONE_ARG_TYPE_LINEAR_*_CONSTANT_STEP. (simd_clone_init_simd_arrays): Don't do anything for uval. (simd_clone_adjust): Handle SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP like SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP. Handle SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP. * omp-low.h (omp_member_access_dummy_var): New prototype. * passes.def (pass_simduid_cleanup): Schedule another copy of the pass after all optimizations. * tree.c (omp_clause_code_name): Add entries for OMP_CLAUSE_{TO_DECLARE,LINK,{USE,IS}_DEVICE_PTR,DEFAULTMAP,HINT} and OMP_CLAUSE_{PRIORITY,GRAINSIZE,NUM_TASKS,NOGROUP,THREADS,SIMD}. (omp_clause_num_ops): Likewise. Bump number of OMP_CLAUSE_REDUCTION arguments to 5 and for OMP_CLAUSE_ORDERED to 1. (walk_tree_1): Adjust for OMP_CLAUSE_ORDERED having 1 argument and OMP_CLAUSE_REDUCTION 5 arguments. Handle OMP_CLAUSE_{TO_DECLARE,LINK,{USE,IS}_DEVICE_PTR,DEFAULTMAP,HINT} and OMP_CLAUSE_{PRIORITY,GRAINSIZE,NUM_TASKS,NOGROUP,THREADS,SIMD} clauses. * tree-core.h (enum omp_clause_linear_kind): New. (struct tree_omp_clause): Change type of map_kind from unsigned char to unsigned int. Add subcode.if_modifier and subcode.linear_kind fields. (enum omp_clause_code): Add OMP_CLAUSE_{TO_DECLARE,LINK,{USE,IS}_DEVICE_PTR,DEFAULTMAP,HINT} and OMP_CLAUSE_{PRIORITY,GRAINSIZE,NUM_TASKS,NOGROUP,THREADS,SIMD}. (OMP_CLAUSE_REDUCTION): Document OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER. (enum omp_clause_depend_kind): Add OMP_CLAUSE_DEPEND_{SOURCE,SINK}. * tree.def (OMP_FOR): Add OMP_FOR_ORIG_DECLS operand. (OMP_CRITICAL): Move before OMP_SINGLE. Add OMP_CRITICAL_CLAUSES operand. (OMP_ORDERED): Move before OMP_SINGLE. Add OMP_ORDERED_CLAUSES operand. (OMP_TASKLOOP, OMP_TARGET_ENTER_DATA, OMP_TARGET_EXIT_DATA): New tree codes. * tree.h (OMP_BODY): Replace OMP_CRITICAL with OMP_TASKGROUP. (OMP_CLAUSE_SET_MAP_KIND): Cast to unsigned int rather than unsigned char. (OMP_CRITICAL_NAME): Adjust to be 3rd operand instead of 2nd. (OMP_CLAUSE_NUM_TASKS_EXPR): Formatting fix. (OMP_STANDALONE_CLAUSES): Adjust to cover OMP_TARGET_{ENTER,EXIT}_DATA. (OMP_CLAUSE_DEPEND_SINK_NEGATIVE, OMP_TARGET_COMBINED, OMP_CLAUSE_MAP_PRIVATE, OMP_FOR_ORIG_DECLS, OMP_CLAUSE_IF_MODIFIER, OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION, OMP_CRITICAL_CLAUSES, OMP_CLAUSE_PRIVATE_TASKLOOP_IV, OMP_CLAUSE_LASTPRIVATE_TASKLOOP_IV, OMP_CLAUSE_HINT_EXPR, OMP_CLAUSE_SCHEDULE_SIMD, OMP_CLAUSE_LINEAR_KIND, OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER, OMP_CLAUSE_SHARED_FIRSTPRIVATE, OMP_ORDERED_CLAUSES, OMP_TARGET_ENTER_DATA_CLAUSES, OMP_TARGET_EXIT_DATA_CLAUSES, OMP_CLAUSE_NUM_TASKS_EXPR, OMP_CLAUSE_GRAINSIZE_EXPR, OMP_CLAUSE_PRIORITY_EXPR, OMP_CLAUSE_ORDERED_EXPR): Define. * tree-inline.c (remap_gimple_stmt): Handle clauses on GIMPLE_OMP_ORDERED and GIMPLE_OMP_CRITICAL. For IFN_GOMP_SIMD_ORDERED_{START,END} set has_simduid_loops. * tree-nested.c (convert_nonlocal_omp_clauses): Handle OMP_CLAUSE_{TO_DECLARE,LINK,{USE,IS}_DEVICE_PTR,SIMDLEN,PRIORITY,SIMD} and OMP_CLAUSE_{GRAINSIZE,NUM_TASKS,HINT,NOGROUP,THREADS,DEFAULTMAP} clauses. Handle OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER. (convert_local_omp_clauses): Likewise. * tree-pretty-print.c (dump_omp_clause): Handle OMP_CLAUSE_{TO_DECLARE,LINK,{USE,IS}_DEVICE_PTR,SIMDLEN,PRIORITY,SIMD} and OMP_CLAUSE_{GRAINSIZE,NUM_TASKS,HINT,NOGROUP,THREADS,DEFAULTMAP} clauses. Handle OMP_CLAUSE_IF_MODIFIER, OMP_CLAUSE_ORDERED_EXPR, OMP_CLAUSE_SCHEDULE_SIMD, OMP_CLAUSE_LINEAR_KIND, OMP_CLAUSE_DEPEND_{SOURCE,SINK}. Use "delete" for GOMP_MAP_FORCE_DEALLOC. Handle GOMP_MAP_{ALWAYS_{TO,FROM,TOFROM},RELEASE,FIRSTPRIVATE_POINTER,STRUCT}. (dump_generic_node): Handle OMP_TASKLOOP, OMP_TARGET_{ENTER,EXIT}_DATA and clauses on OMP_ORDERED and OMP_CRITICAL. * tree-vectorizer.c (adjust_simduid_builtins): Adjust comment. Remove IFN_GOMP_SIMD_ORDERED_{START,END}. (vectorize_loops): Adjust comments. (pass_simduid_cleanup::execute): Likewise. * tree-vect-stmts.c (vectorizable_simd_clone_call): Handle SIMD_CLONE_ARG_TYPE_LINEAR_{REF,VAL,UVAL}_CONSTANT_STEP. * wide-int.h (wi::gcd): New. gcc/c-family/ 2015-10-13 Jakub Jelinek <jakub@redhat.com> Aldy Hernandez <aldyh@redhat.com> * c-common.c (enum c_builtin_type): Define DEF_FUNCTION_TYPE_9, DEF_FUNCTION_TYPE_10 and DEF_FUNCTION_TYPE_11. (c_define_builtins): Likewise. * c-common.h (enum c_omp_clause_split): Add C_OMP_CLAUSE_SPLIT_TASKLOOP. (c_finish_omp_critical, c_finish_omp_ordered): Add CLAUSES argument. (c_finish_omp_for): Add ORIG_DECLV argument. * c-cppbuiltin.c (c_cpp_builtins): Predefine _OPENMP as 201511 instead of 201307. * c-omp.c (c_finish_omp_critical): Add CLAUSES argument, set OMP_CRITICAL_CLAUSES to it. (c_finish_omp_ordered): Add CLAUSES argument, set OMP_ORDERED_CLAUSES to it. (c_finish_omp_for): Add ORIG_DECLV argument, set OMP_FOR_ORIG_DECLS to it if OMP_FOR. Clear DECL_INITIAL on the IVs. (c_omp_split_clauses): Handle OpenMP 4.5 combined/composite constructs and new OpenMP 4.5 clauses. Clear OMP_CLAUSE_SCHEDULE_SIMD if not combined with OMP_SIMD. Add verification code. * c-pragma.c (omp_pragmas_simd): Add taskloop. * c-pragma.h (enum pragma_kind): Add PRAGMA_OMP_TASKLOOP. (enum pragma_omp_clause): Add PRAGMA_OMP_CLAUSE_{DEFAULTMAP,GRAINSIZE,HINT,{IS,USE}_DEVICE_PTR} and PRAGMA_OMP_CLAUSE_{LINK,NOGROUP,NUM_TASKS,PRIORITY,SIMD,THREADS}. gcc/c/ 2015-10-13 Jakub Jelinek <jakub@redhat.com> Aldy Hernandez <aldyh@redhat.com> * c-parser.c (c_parser_pragma): Handle PRAGMA_OMP_ORDERED here. (c_parser_omp_clause_name): Handle OpenMP 4.5 clauses. (c_parser_omp_variable_list): Handle structure elements for map, to and from clauses. Handle array sections in reduction clause. Formatting fixes. (c_parser_omp_clause_if): Add IS_OMP argument, handle parsing of if clause modifiers. (c_parser_omp_clause_num_tasks, c_parser_omp_clause_grainsize, c_parser_omp_clause_priority, c_parser_omp_clause_hint, c_parser_omp_clause_defaultmap, c_parser_omp_clause_use_device_ptr, c_parser_omp_clause_is_device_ptr): New functions. (c_parser_omp_clause_ordered): Parse optional parameter. (c_parser_omp_clause_reduction): Handle array reductions. (c_parser_omp_clause_schedule): Parse optional simd modifier. (c_parser_omp_clause_nogroup, c_parser_omp_clause_orderedkind): New functions. (c_parser_omp_clause_linear): Parse linear clause modifiers. (c_parser_omp_clause_depend_sink): New function. (c_parser_omp_clause_depend): Parse source/sink depend kinds. (c_parser_omp_clause_map): Parse release/delete map kinds and optional always modifier. (c_parser_oacc_all_clauses): Adjust c_parser_omp_clause_if and c_finish_omp_clauses callers. (c_parser_omp_all_clauses): Likewise. Parse OpenMP 4.5 clauses. Parse "to" as OMP_CLAUSE_TO_DECLARE if on declare target directive. (c_parser_oacc_cache): Adjust c_finish_omp_clauses caller. (OMP_CRITICAL_CLAUSE_MASK): Define. (c_parser_omp_critical): Parse critical clauses. (c_parser_omp_for_loop): Handle doacross loops, adjust c_finish_omp_for and c_finish_omp_clauses callers. (OMP_SIMD_CLAUSE_MASK): Add simdlen clause. (c_parser_omp_simd): Allow ordered clause if it has no parameter. (OMP_FOR_CLAUSE_MASK): Add linear clause. (c_parser_omp_for): Disallow ordered clause when combined with distribute. Disallow linear clause when combined with distribute and not combined with simd. (OMP_ORDERED_CLAUSE_MASK, OMP_ORDERED_DEPEND_CLAUSE_MASK): Define. (c_parser_omp_ordered): Add CONTEXT argument, remove LOC argument, parse clauses and if depend clause is found, don't parse a body. (c_parser_omp_parallel): Disallow copyin clause on target parallel. Allow target parallel without for after it. (OMP_TASK_CLAUSE_MASK): Add priority clause. (OMP_TARGET_DATA_CLAUSE_MASK): Add use_device_ptr clause. (c_parser_omp_target_data): Diagnose no map clauses or clauses with invalid kinds. (OMP_TARGET_UPDATE_CLAUSE_MASK): Add depend and nowait clauses. (OMP_TARGET_ENTER_DATA_CLAUSE_MASK, OMP_TARGET_EXIT_DATA_CLAUSE_MASK): Define. (c_parser_omp_target_enter_data, c_parser_omp_target_exit_data): New functions. (OMP_TARGET_CLAUSE_MASK): Add depend, nowait, private, firstprivate, defaultmap and is_device_ptr clauses. (c_parser_omp_target): Parse target parallel and target simd. Set OMP_TARGET_COMBINED on combined constructs. Parse target enter data and target exit data. Diagnose invalid map kinds. (OMP_DECLARE_TARGET_CLAUSE_MASK): Define. (c_parser_omp_declare_target): Parse OpenMP 4.5 forms of this construct. (c_parser_omp_declare_reduction): Use STRIP_NOPS when checking for &omp_priv. (OMP_TASKLOOP_CLAUSE_MASK): Define. (c_parser_omp_taskloop): New function. (c_parser_omp_construct): Don't handle PRAGMA_OMP_ORDERED here, handle PRAGMA_OMP_TASKLOOP. (c_parser_cilk_for): Adjust c_finish_omp_clauses callers. * c-tree.h (c_finish_omp_clauses): Add two new arguments. * c-typeck.c (handle_omp_array_sections_1): Fix comment typo. Add IS_OMP argument, handle structure element bases, diagnose bitfields, pass IS_OMP recursively, diagnose known zero length array sections in depend clauses, handle array sections in reduction clause, diagnose negative length even for pointers. (handle_omp_array_sections): Add IS_OMP argument, use auto_vec for types, pass IS_OMP down to handle_omp_array_sections_1, handle array sections in reduction clause, set OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION if map could be zero length array section, use GOMP_MAP_FIRSTPRIVATE_POINTER for IS_OMP. (c_finish_omp_clauses): Add IS_OMP and DECLARE_SIMD arguments. Handle new OpenMP 4.5 clauses and new restrictions for the old ones. gcc/cp/ 2015-10-13 Jakub Jelinek <jakub@redhat.com> Aldy Hernandez <aldyh@redhat.com> * class.c (finish_struct_1): Call finish_omp_declare_simd_methods. * cp-gimplify.c (cp_gimplify_expr): Handle OMP_TASKLOOP. (cp_genericize_r): Likewise. (cxx_omp_finish_clause): Don't diagnose references. (cxx_omp_disregard_value_expr): New function. * cp-objcp-common.h (LANG_HOOKS_OMP_DISREGARD_VALUE_EXPR): Redefine. * cp-tree.h (OMP_FOR_GIMPLIFYING_P): Document for OMP_TASKLOOP. (DECL_OMP_PRIVATIZED_MEMBER): Define. (finish_omp_declare_simd_methods, push_omp_privatization_clauses, pop_omp_privatization_clauses, save_omp_privatization_clauses, restore_omp_privatization_clauses, omp_privatize_field, cxx_omp_disregard_value_expr): New prototypes. (finish_omp_clauses): Add two new arguments. (finish_omp_for): Add ORIG_DECLV argument. * parser.c (cp_parser_lambda_body): Call save_omp_privatization_clauses and restore_omp_privatization_clauses. (cp_parser_omp_clause_name): Handle OpenMP 4.5 clauses. (cp_parser_omp_var_list_no_open): Handle structure elements for map, to and from clauses. Handle array sections in reduction clause. Parse this keyword. Formatting fixes. (cp_parser_omp_clause_if): Add IS_OMP argument, handle parsing of if clause modifiers. (cp_parser_omp_clause_num_tasks, cp_parser_omp_clause_grainsize, cp_parser_omp_clause_priority, cp_parser_omp_clause_hint, cp_parser_omp_clause_defaultmap): New functions. (cp_parser_omp_clause_ordered): Parse optional parameter. (cp_parser_omp_clause_reduction): Handle array reductions. (cp_parser_omp_clause_schedule): Parse optional simd modifier. (cp_parser_omp_clause_nogroup, cp_parser_omp_clause_orderedkind): New functions. (cp_parser_omp_clause_linear): Parse linear clause modifiers. (cp_parser_omp_clause_depend_sink): New function. (cp_parser_omp_clause_depend): Parse source/sink depend kinds. (cp_parser_omp_clause_map): Parse release/delete map kinds and optional always modifier. (cp_parser_oacc_all_clauses): Adjust cp_parser_omp_clause_if and finish_omp_clauses callers. (cp_parser_omp_all_clauses): Likewise. Parse OpenMP 4.5 clauses. Parse "to" as OMP_CLAUSE_TO_DECLARE if on declare target directive. (OMP_CRITICAL_CLAUSE_MASK): Define. (cp_parser_omp_critical): Parse critical clauses. (cp_parser_omp_for_incr): Use cp_tree_equal if processing_template_decl. (cp_parser_omp_for_loop_init): Return tree instead of bool. Handle non-static data member iterators. (cp_parser_omp_for_loop): Handle doacross loops, adjust finish_omp_for and finish_omp_clauses callers. (cp_omp_split_clauses): Adjust finish_omp_clauses caller. (OMP_SIMD_CLAUSE_MASK): Add simdlen clause. (cp_parser_omp_simd): Allow ordered clause if it has no parameter. (OMP_FOR_CLAUSE_MASK): Add linear clause. (cp_parser_omp_for): Disallow ordered clause when combined with distribute. Disallow linear clause when combined with distribute and not combined with simd. (OMP_ORDERED_CLAUSE_MASK, OMP_ORDERED_DEPEND_CLAUSE_MASK): Define. (cp_parser_omp_ordered): Add CONTEXT argument, return bool instead of tree, parse clauses and if depend clause is found, don't parse a body. (cp_parser_omp_parallel): Disallow copyin clause on target parallel. Allow target parallel without for after it. (OMP_TASK_CLAUSE_MASK): Add priority clause. (OMP_TARGET_DATA_CLAUSE_MASK): Add use_device_ptr clause. (cp_parser_omp_target_data): Diagnose no map clauses or clauses with invalid kinds. (OMP_TARGET_UPDATE_CLAUSE_MASK): Add depend and nowait clauses. (OMP_TARGET_ENTER_DATA_CLAUSE_MASK, OMP_TARGET_EXIT_DATA_CLAUSE_MASK): Define. (cp_parser_omp_target_enter_data, cp_parser_omp_target_exit_data): New functions. (OMP_TARGET_CLAUSE_MASK): Add depend, nowait, private, firstprivate, defaultmap and is_device_ptr clauses. (cp_parser_omp_target): Parse target parallel and target simd. Set OMP_TARGET_COMBINED on combined constructs. Parse target enter data and target exit data. Diagnose invalid map kinds. (cp_parser_oacc_cache): Adjust finish_omp_clauses caller. (OMP_DECLARE_TARGET_CLAUSE_MASK): Define. (cp_parser_omp_declare_target): Parse OpenMP 4.5 forms of this construct. (OMP_TASKLOOP_CLAUSE_MASK): Define. (cp_parser_omp_taskloop): New function. (cp_parser_omp_construct): Don't handle PRAGMA_OMP_ORDERED here, handle PRAGMA_OMP_TASKLOOP. (cp_parser_pragma): Handle PRAGMA_OMP_ORDERED here directly, handle PRAGMA_OMP_TASKLOOP, call push_omp_privatization_clauses and pop_omp_privatization_clauses around parsing calls. (cp_parser_cilk_for): Adjust finish_omp_clauses caller. * pt.c (apply_late_template_attributes): Adjust tsubst_omp_clauses and finish_omp_clauses callers. (tsubst_omp_clause_decl): Return NULL if decl is NULL. For TREE_LIST, copy over OMP_CLAUSE_DEPEND_SINK_NEGATIVE bit. Use tsubst_expr instead of tsubst_copy, undo convert_from_reference effects. (tsubst_omp_clauses): Add ALLOW_FIELDS argument. Handle new OpenMP 4.5 clauses. Use tsubst_omp_clause_decl for more clauses. If ALLOW_FIELDS, handle non-static data members in the clauses. Clear OMP_CLAUSE_LINEAR_STEP if it has been cleared before. (omp_parallel_combined_clauses): New variable. (tsubst_omp_for_iterator): Add ORIG_DECLV argument, recur on OMP_FOR_ORIG_DECLS, handle non-static data member iterators. Improve handling of clauses on combined constructs. (tsubst_expr): Call push_omp_privatization_clauses and pop_omp_privatization_clauses around instantiation of certain OpenMP constructs, improve handling of clauses on combined constructs, handle OMP_TASKLOOP, adjust tsubst_omp_for_iterator, tsubst_omp_clauses and finish_omp_for callers, handle clauses on critical and ordered, handle OMP_TARGET_{ENTER,EXIT}_DATA. (instantiate_decl): Call save_omp_privatization_clauses and restore_omp_privatization_clauses around instantiation. (dependent_omp_for_p): Fix up comment typo. Handle SCOPE_REF. * semantics.c (omp_private_member_map, omp_private_member_vec, omp_private_member_ignore_next): New variables. (finish_non_static_data_member): Return dummy decl for privatized non-static data members. (omp_clause_decl_field, omp_clause_printable_decl, omp_note_field_privatization, omp_privatize_field): New functions. (handle_omp_array_sections_1): Fix comment typo. Add IS_OMP argument, handle structure element bases, diagnose bitfields, pass IS_OMP recursively, diagnose known zero length array sections in depend clauses, handle array sections in reduction clause, diagnose negative length even for pointers. (handle_omp_array_sections): Add IS_OMP argument, use auto_vec for types, pass IS_OMP down to handle_omp_array_sections_1, handle array sections in reduction clause, set OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION if map could be zero length array section, use GOMP_MAP_FIRSTPRIVATE_POINTER for IS_OMP. (finish_omp_reduction_clause): Handle array sections and arrays. Use omp_clause_printable_decl. (finish_omp_declare_simd_methods, cp_finish_omp_clause_depend_sink): New functions. (finish_omp_clauses): Add ALLOW_FIELDS and DECLARE_SIMD arguments. Handle new OpenMP 4.5 clauses and new restrictions for the old ones, handle non-static data members, reject this keyword when not allowed. (push_omp_privatization_clauses, pop_omp_privatization_clauses, save_omp_privatization_clauses, restore_omp_privatization_clauses): New functions. (handle_omp_for_class_iterator): Handle OMP_TASKLOOP class iterators. Add collapse and ordered arguments. Fix handling of lastprivate iterators in doacross loops. (finish_omp_for): Add ORIG_DECLV argument, handle doacross loops, adjust c_finish_omp_for, handle_omp_for_class_iterator and finish_omp_clauses callers. Fill in OMP_CLAUSE_LINEAR_STEP on simd loops with non-static data member iterators. gcc/fortran/ 2015-10-13 Jakub Jelinek <jakub@redhat.com> Ilya Verbin <ilya.verbin@intel.com> * f95-lang.c (DEF_FUNCTION_TYPE_9, DEF_FUNCTION_TYPE_10, DEF_FUNCTION_TYPE_11, DEF_FUNCTION_TYPE_VAR_1): Define. * trans-openmp.c (gfc_trans_omp_clauses): Set OMP_CLAUSE_IF_MODIFIER to ERROR_MARK, OMP_CLAUSE_ORDERED_EXPR to NULL. (gfc_trans_omp_critical): Adjust for addition of clauses. (gfc_trans_omp_ordered): Likewise. * types.def (BT_FN_BOOL_UINT_LONGPTR_LONGPTR_LONGPTR, BT_FN_BOOL_UINT_ULLPTR_ULLPTR_ULLPTR, BT_FN_BOOL_UINT_LONGPTR_LONG_LONGPTR_LONGPTR, BT_FN_BOOL_UINT_ULLPTR_ULL_ULLPTR_ULLPTR, BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_UINT_PTR, BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR, BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT, BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_UINT_LONG_INT_LONG_LONG_LONG, BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_UINT_LONG_INT_ULL_ULL_ULL, BT_FN_VOID_LONG_VAR, BT_FN_VOID_ULL_VAR): New. (BT_FN_VOID_INT_PTR_SIZE_PTR_PTR_PTR, BT_FN_VOID_INT_OMPFN_PTR_SIZE_PTR_PTR_PTR, BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR): Remove. gcc/lto/ 2015-10-13 Jakub Jelinek <jakub@redhat.com> * lto-lang.c (DEF_FUNCTION_TYPE_9, DEF_FUNCTION_TYPE_10, DEF_FUNCTION_TYPE_11): Define. gcc/jit/ 2015-10-13 Jakub Jelinek <jakub@redhat.com> * jit-builtins.c (DEF_FUNCTION_TYPE_9, DEF_FUNCTION_TYPE_10, DEF_FUNCTION_TYPE_11): Define. * jit-builtins.h (DEF_FUNCTION_TYPE_9, DEF_FUNCTION_TYPE_10, DEF_FUNCTION_TYPE_11): Define. gcc/ada/ 2015-10-13 Jakub Jelinek <jakub@redhat.com> * gcc-interface/utils.c (DEF_FUNCTION_TYPE_9, DEF_FUNCTION_TYPE_10, DEF_FUNCTION_TYPE_11): Define. gcc/testsuite/ 2015-10-13 Jakub Jelinek <jakub@redhat.com> Aldy Hernandez <aldyh@redhat.com> * c-c++-common/gomp/cancel-1.c (f2): Add map clause to target data. * c-c++-common/gomp/clauses-1.c: New test. * c-c++-common/gomp/clauses-2.c: New test. * c-c++-common/gomp/clauses-3.c: New test. * c-c++-common/gomp/clauses-4.c: New test. * c-c++-common/gomp/declare-target-1.c: New test. * c-c++-common/gomp/declare-target-2.c: New test. * c-c++-common/gomp/depend-3.c: New test. * c-c++-common/gomp/depend-4.c: New test. * c-c++-common/gomp/doacross-1.c: New test. * c-c++-common/gomp/if-1.c: New test. * c-c++-common/gomp/if-2.c: New test. * c-c++-common/gomp/linear-1.c: New test. * c-c++-common/gomp/map-2.c: New test. * c-c++-common/gomp/map-3.c: New test. * c-c++-common/gomp/nesting-1.c (f_omp_parallel, f_omp_target_data): Add map clause to target data. * c-c++-common/gomp/nesting-warn-1.c (f_omp_target): Likewise. * c-c++-common/gomp/ordered-1.c: New test. * c-c++-common/gomp/ordered-2.c: New test. * c-c++-common/gomp/ordered-3.c: New test. * c-c++-common/gomp/pr61486-1.c (foo): Remove linear clause on non-iterator. * c-c++-common/gomp/pr61486-2.c (test, test2): Remove ordered clause and ordered construct where no longer allowed. * c-c++-common/gomp/priority-1.c: New test. * c-c++-common/gomp/reduction-1.c: New test. * c-c++-common/gomp/schedule-simd-1.c: New test. * c-c++-common/gomp/sink-1.c: New test. * c-c++-common/gomp/sink-2.c: New test. * c-c++-common/gomp/sink-3.c: New test. * c-c++-common/gomp/sink-4.c: New test. * c-c++-common/gomp/udr-1.c: New test. * c-c++-common/taskloop-1.c: New test. * c-c++-common/cpp/openmp-define-3.c: Adjust for the new value of _OPENMP macro. * c-c++-common/cilk-plus/PS/body.c (foo): Adjust expected diagnostics. * c-c++-common/goacc-gomp/nesting-fail-1.c (f_acc_parallel, f_acc_kernels, f_acc_data, f_acc_loop): Add map clause to target data. * gcc.dg/gomp/clause-1.c: * gcc.dg/gomp/reduction-1.c: New test. * gcc.dg/gomp/sink-fold-1.c: New test. * gcc.dg/gomp/sink-fold-2.c: New test. * gcc.dg/gomp/sink-fold-3.c: New test. * gcc.dg/vect/vect-simd-clone-15.c: New test. * g++.dg/gomp/clause-1.C (T::test): Remove dg-error on privatization of non-static data members. * g++.dg/gomp/clause-3.C (foo): Remove one dg-error directive. Add some linear clause tests. * g++.dg/gomp/declare-simd-3.C: New test. * g++.dg/gomp/linear-1.C: New test. * g++.dg/gomp/member-1.C: New test. * g++.dg/gomp/member-2.C: New test. * g++.dg/gomp/pr66571-2.C: New test. * g++.dg/gomp/pr67504.C (foo): Add test for ordered clause with dependent argument. * g++.dg/gomp/pr67522.C (foo): Add test for invalid array section in reduction clause. * g++.dg/gomp/reference-1.C: New test. * g++.dg/gomp/sink-1.C: New test. * g++.dg/gomp/sink-2.C: New test. * g++.dg/gomp/sink-3.C: New test. * g++.dg/gomp/task-1.C: Remove both dg-error directives. * g++.dg/gomp/this-1.C: New test. * g++.dg/gomp/this-2.C: New test. * g++.dg/vect/simd-clone-2.cc: New test. * g++.dg/vect/simd-clone-2.h: New test. * g++.dg/vect/simd-clone-3.cc: New test. * g++.dg/vect/simd-clone-4.cc: New test. * g++.dg/vect/simd-clone-4.h: New test. * g++.dg/vect/simd-clone-5.cc: New test. include/ 2015-10-13 Jakub Jelinek <jakub@redhat.com> Ilya Verbin <ilya.verbin@intel.com> * gomp-constants.h (GOMP_MAP_FLAG_ALWAYS): Define. (enum gomp_map_kind): Add GOMP_MAP_FIRSTPRIVATE, GOMP_MAP_FIRSTPRIVATE_INT, GOMP_MAP_USE_DEVICE_PTR, GOMP_MAP_ZERO_LEN_ARRAY_SECTION, GOMP_MAP_ALWAYS_TO, GOMP_MAP_ALWAYS_FROM, GOMP_MAP_ALWAYS_TOFROM, GOMP_MAP_STRUCT, GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION, GOMP_MAP_DELETE, GOMP_MAP_RELEASE, GOMP_MAP_FIRSTPRIVATE_POINTER. (GOMP_MAP_ALWAYS_TO_P, GOMP_MAP_ALWAYS_FROM_P): Define. (GOMP_TASK_FLAG_UNTIED, GOMP_TASK_FLAG_FINAL, GOMP_TASK_FLAG_MERGEABLE, GOMP_TASK_FLAG_DEPEND, GOMP_TASK_FLAG_PRIORITY, GOMP_TASK_FLAG_UP, GOMP_TASK_FLAG_GRAINSIZE, GOMP_TASK_FLAG_IF, GOMP_TASK_FLAG_NOGROUP, GOMP_TARGET_FLAG_NOWAIT, GOMP_TARGET_FLAG_EXIT_DATA, GOMP_TARGET_FLAG_UPDATE): Define. libgomp/ 2015-10-13 Jakub Jelinek <jakub@redhat.com> Aldy Hernandez <aldyh@redhat.com> Ilya Verbin <ilya.verbin@intel.com> * config/linux/affinity.c (omp_get_place_num_procs, omp_get_place_proc_ids, gomp_get_place_proc_ids_8): New functions. * config/linux/doacross.h: New file. * config/posix/affinity.c (omp_get_place_num_procs, omp_get_place_proc_ids, gomp_get_place_proc_ids_8): New functions. * config/posix/doacross.h: New file. * env.c: Include gomp-constants.h. (struct gomp_task_icv): Rename run_sched_modifier to run_sched_chunk_size. (gomp_max_task_priority_var): New variable. (parse_schedule): Rename run_sched_modifier to run_sched_chunk_size. (handle_omp_display_env): Change _OPENMP value from 201307 to 201511. Print OMP_MAX_TASK_PRIORITY. (initialize_env): Parse OMP_MAX_TASK_PRIORITY. (omp_set_schedule, omp_get_schedule): Rename modifier argument to chunk_size and run_sched_modifier to run_sched_chunk_size. (omp_get_max_task_priority, omp_get_initial_device, omp_get_num_places, omp_get_place_num, omp_get_partition_num_places, omp_get_partition_place_nums): New functions. * fortran.c (omp_set_schedule_, omp_set_schedule_8_, omp_get_schedule_, omp_get_schedule_8_): Rename modifier argument to chunk_size. (omp_get_num_places_, omp_get_place_num_procs_, omp_get_place_num_procs_8_, omp_get_place_proc_ids_, omp_get_place_proc_ids_8_, omp_get_place_num_, omp_get_partition_num_places_, omp_get_partition_place_nums_, omp_get_partition_place_nums_8_, omp_get_initial_device_, omp_get_max_task_priority_): New functions. * libgomp_g.h (GOMP_loop_doacross_static_start, GOMP_loop_doacross_dynamic_start, GOMP_loop_doacross_guided_start, GOMP_loop_doacross_runtime_start, GOMP_loop_ull_doacross_static_start, GOMP_loop_ull_doacross_dynamic_start, GOMP_loop_ull_doacross_guided_start, GOMP_loop_ull_doacross_runtime_start, GOMP_doacross_post, GOMP_doacross_wait, GOMP_doacross_ull_post, GOMP_doacross_wait, GOMP_taskloop, GOMP_taskloop_ull, GOMP_target_41, GOMP_target_data_41, GOMP_target_update_41, GOMP_target_enter_exit_data): New prototypes. (GOMP_task): Add prototype argument. * libgomp.h (_LIBGOMP_CHECKING_): Define to 0 if not yet defined. (struct gomp_doacross_work_share): New type. (struct gomp_work_share): Add doacross field. (struct gomp_task_icv): Rename run_sched_modifier to run_sched_chunk_size. (enum gomp_task_kind): Rename GOMP_TASK_IFFALSE to GOMP_TASK_UNDEFERRED. Add comments. (struct gomp_task_depend_entry): Add comments. (struct gomp_task): Likewise. (struct gomp_taskgroup): Likewise. (struct gomp_target_task): New type. (struct gomp_team): Add comment. (gomp_get_place_proc_ids_8, gomp_doacross_init, gomp_doacross_ull_init, gomp_task_maybe_wait_for_dependencies, gomp_create_target_task, gomp_target_task_fn): New prototypes. (struct target_var_desc): New type. (struct target_mem_desc): Adjust comment. Use struct target_var_desc instead of splay_tree_key for list. (REFCOUNT_INFINITY): Define. (struct splay_tree_key_s): Remove copy_from field. (struct gomp_device_descr): Add dev2dev_func field. (enum gomp_map_vars_kind): New enum. (gomp_map_vars): Add one argument. * libgomp.map (OMP_4.5): Export omp_get_max_task_priority, omp_get_max_task_priority_, omp_get_num_places, omp_get_num_places_, omp_get_place_num_procs, omp_get_place_num_procs_, omp_get_place_num_procs_8_, omp_get_place_proc_ids, omp_get_place_proc_ids_, omp_get_place_proc_ids_8_, omp_get_place_num, omp_get_place_num_, omp_get_partition_num_places, omp_get_partition_num_places_, omp_get_partition_place_nums, omp_get_partition_place_nums_, omp_get_partition_place_nums_8_, omp_get_initial_device, omp_get_initial_device_, omp_target_alloc, omp_target_free, omp_target_is_present, omp_target_memcpy, omp_target_memcpy_rect, omp_target_associate_ptr and omp_target_disassociate_ptr. (GOMP_4.0.2): Renamed to ... (GOMP_4.5): ... this. Export GOMP_target_41, GOMP_target_data_41, GOMP_target_update_41, GOMP_target_enter_exit_data, GOMP_taskloop, GOMP_taskloop_ull, GOMP_loop_doacross_dynamic_start, GOMP_loop_doacross_guided_start, GOMP_loop_doacross_runtime_start, GOMP_loop_doacross_static_start, GOMP_doacross_post, GOMP_doacross_wait, GOMP_loop_ull_doacross_dynamic_start, GOMP_loop_ull_doacross_guided_start, GOMP_loop_ull_doacross_runtime_start, GOMP_loop_ull_doacross_static_start, GOMP_doacross_ull_post and GOMP_doacross_ull_wait. * libgomp.texi: Document omp_get_max_task_priority. Rename modifier argument to chunk_size for omp_set_schedule and omp_get_schedule. Document OMP_MAX_TASK_PRIORITY env var. * loop.c (GOMP_loop_runtime_start): Adjust for run_sched_modifier to run_sched_chunk_size renaming. (GOMP_loop_ordered_runtime_start): Likewise. (gomp_loop_doacross_static_start, gomp_loop_doacross_dynamic_start, gomp_loop_doacross_guided_start, GOMP_loop_doacross_runtime_start, GOMP_parallel_loop_runtime_start): New functions. (GOMP_parallel_loop_runtime): Adjust for run_sched_modifier to run_sched_chunk_size renaming. (GOMP_loop_doacross_static_start, GOMP_loop_doacross_dynamic_start, GOMP_loop_doacross_guided_start): New functions or aliases. * loop_ull.c (GOMP_loop_ull_runtime_start): Adjust for run_sched_modifier to run_sched_chunk_size renaming. (GOMP_loop_ull_ordered_runtime_start): Likewise. (gomp_loop_ull_doacross_static_start, gomp_loop_ull_doacross_dynamic_start, gomp_loop_ull_doacross_guided_start, GOMP_loop_ull_doacross_runtime_start): New functions. (GOMP_loop_ull_doacross_static_start, GOMP_loop_ull_doacross_dynamic_start, GOMP_loop_ull_doacross_guided_start): New functions or aliases. * oacc-mem.c (acc_map_data, present_create_copy, gomp_acc_insert_pointer): Pass GOMP_MAP_VARS_OPENACC instead of false to gomp_map_vars. (gomp_acc_remove_pointer): Use copy_from from target_var_desc. * oacc-parallel.c (GOACC_data_start): Pass GOMP_MAP_VARS_OPENACC instead of false to gomp_map_vars. (GOACC_parallel_keyed): Likewise. Use copy_from from target_var_desc. * omp.h.in (omp_lock_hint_t): New type. (omp_init_lock_with_hint, omp_init_nest_lock_with_hint, omp_get_num_places, omp_get_place_num_procs, omp_get_place_proc_ids, omp_get_place_num, omp_get_partition_num_places, omp_get_partition_place_nums, omp_get_initial_device, omp_get_max_task_priority, omp_target_alloc, omp_target_free, omp_target_is_present, omp_target_memcpy, omp_target_memcpy_rect, omp_target_associate_ptr, omp_target_disassociate_ptr): New prototypes. * omp_lib.f90.in (omp_lock_hint_kind): New parameter. (omp_lock_hint_none, omp_lock_hint_uncontended, omp_lock_hint_contended, omp_lock_hint_nonspeculative, omp_lock_hint_speculative): New parameters. (omp_init_lock_with_hint, omp_init_nest_lock_with_hint, omp_get_num_places, omp_get_place_num_procs, omp_get_place_proc_ids, omp_get_place_num, omp_get_partition_num_places, omp_get_partition_place_nums, omp_get_initial_device, omp_get_max_task_priority): New interfaces. (omp_set_schedule, omp_get_schedule): Rename modifier argument to chunk_size. * omp_lib.h.in (omp_lock_hint_kind): New parameter. (omp_lock_hint_none, omp_lock_hint_uncontended, omp_lock_hint_contended, omp_lock_hint_nonspeculative, omp_lock_hint_speculative): New parameters. (omp_init_lock_with_hint, omp_init_nest_lock_with_hint, omp_get_num_places, omp_get_place_num_procs, omp_get_place_proc_ids, omp_get_place_num, omp_get_partition_num_places, omp_get_partition_place_nums, omp_get_initial_device, omp_get_max_task_priority): New functions and subroutines. * ordered.c: Include stdarg.h and string.h. (MAX_COLLAPSED_BITS): Define. (gomp_doacross_init, GOMP_doacross_post, GOMP_doacross_wait, gomp_doacross_ull_init, GOMP_doacross_ull_post, GOMP_doacross_ull_wait): New functions. * target.c: Include errno.h. (resolve_device): If device is not initialized, call gomp_init_device on it. (gomp_map_lookup): New function. (gomp_map_vars_existing): Add tgt_var argument, fill it in. Don't bump refcount if REFCOUNT_INFINITY. Handle GOMP_MAP_ALWAYS_TO_P. (get_kind): Rename is_openacc argument to short_mapkind. (gomp_map_pointer): Use gomp_map_lookup. (gomp_map_fields_existing): New function. (gomp_map_vars): Rename is_openacc argument to short_mapkind and is_target to pragma_kind. Handle GOMP_MAP_VARS_ENTER_DATA, handle GOMP_MAP_FIRSTPRIVATE_INT, GOMP_MAP_STRUCT, GOMP_MAP_USE_DEVICE_PTR, GOMP_MAP_ZERO_LEN_ARRAY_SECTION. Adjust for tgt->list changed type and copy_from living in there. (gomp_copy_from_async): Adjust for tgt->list changed type and copy_from living in there. (gomp_unmap_vars): Likewise. (gomp_update): Likewise. Rename is_openacc argument to short_mapkind. Don't fail if object is not mapped. (gomp_load_image_to_device): Initialize refcount to REFCOUNT_INFINITY. (gomp_target_fallback): New function. (gomp_get_target_fn_addr): Likewise. (GOMP_target): Adjust gomp_map_vars caller, use gomp_get_target_fn_addr and gomp_target_fallback. (GOMP_target_41): New function. (gomp_target_data_fallback): New function. (GOMP_target_data): Use it, adjust gomp_map_vars caller. (GOMP_target_data_41): New function. (GOMP_target_update): Adjust gomp_update caller. (GOMP_target_update_41): New function. (gomp_exit_data, GOMP_target_enter_exit_data, gomp_target_task_fn, omp_target_alloc, omp_target_free, omp_target_is_present, omp_target_memcpy, omp_target_memcpy_rect_worker, omp_target_memcpy_rect, omp_target_associate_ptr, omp_target_disassociate_ptr, gomp_load_plugin_for_device): New functions. * task.c: Include gomp-constants.h. Include taskloop.c twice to get GOMP_taskloop and GOMP_taskloop_ull definitions. (gomp_task_handle_depend): New function. (GOMP_task): Use it. Add priority argument. Use gomp-constant.h constants instead of hardcoded numbers. Rename GOMP_TASK_IFFALSE to GOMP_TASK_UNDEFERRED. (gomp_create_target_task): New function. (verify_children_queue, verify_taskgroup_queue, verify_task_queue): New functions. (gomp_task_run_pre): Call verify_*_queue functions. If an upcoming tied task is about to leave the sibling or taskgroup queues in an invalid state, adjust appropriately. Remove taskgroup argument. Add comments. (gomp_task_run_post_handle_dependers): Add comments. (gomp_task_run_post_remove_parent): Likewise. (gomp_barrier_handle_tasks): Adjust gomp_task_run_pre caller. (GOMP_taskwait): Likewise. Add comments. (gomp_task_maybe_wait_for_dependencies): Fix scheduling problem such that the first non parent_depends_on task does not end up at the end of the children queue. (GOMP_taskgroup_start): Rename GOMP_TASK_IFFALSE to GOMP_TASK_UNDEFERRED. (GOMP_taskgroup_end): Adjust gomp_task_run_pre caller. * taskloop.c: New file. * testsuite/lib/libgomp.exp (check_effective_target_offload_device_nonshared_as): New proc. * testsuite/libgomp.c/affinity-2.c: New test. * testsuite/libgomp.c/doacross-1.c: New test. * testsuite/libgomp.c/doacross-2.c: New test. * testsuite/libgomp.c/examples-4/declare_target-1.c (fib_wrapper): Add map clause to target. * testsuite/libgomp.c/examples-4/declare_target-4.c (accum): Likewise. * testsuite/libgomp.c/examples-4/declare_target-5.c (accum): Likewise. * testsuite/libgomp.c/examples-4/device-1.c (main): Likewise. * testsuite/libgomp.c/examples-4/device-3.c (main): Likewise. * testsuite/libgomp.c/examples-4/target_data-3.c (gramSchmidt): Likewise. * testsuite/libgomp.c/examples-4/teams-2.c (dotprod): Likewise. * testsuite/libgomp.c/examples-4/teams-3.c (dotprod): Likewise. * testsuite/libgomp.c/examples-4/teams-4.c (dotprod): Likewise. * testsuite/libgomp.c/for-2.h (OMPTGT, OMPTO, OMPFROM): Define if not defined. Use those where needed. * testsuite/libgomp.c/for-4.c: New test. * testsuite/libgomp.c/for-5.c: New test. * testsuite/libgomp.c/for-6.c: New test. * testsuite/libgomp.c/linear-1.c: New test. * testsuite/libgomp.c/ordered-4.c: New test. * testsuite/libgomp.c/pr66199-2.c (f2): Adjust for linear clause only allowed on the loop iterator. * testsuite/libgomp.c/pr66199-3.c: New test. * testsuite/libgomp.c/pr66199-4.c: New test. * testsuite/libgomp.c/reduction-7.c: New test. * testsuite/libgomp.c/reduction-8.c: New test. * testsuite/libgomp.c/reduction-9.c: New test. * testsuite/libgomp.c/reduction-10.c: New test. * testsuite/libgomp.c/target-1.c (fn2, fn3, fn4): Add map(tofrom:s). * testsuite/libgomp.c/target-2.c (fn2, fn3, fn4): Likewise. * testsuite/libgomp.c/target-7.c (foo): Add map(h) where needed. * testsuite/libgomp.c/target-11.c: New test. * testsuite/libgomp.c/target-12.c: New test. * testsuite/libgomp.c/target-13.c: New test. * testsuite/libgomp.c/target-14.c: New test. * testsuite/libgomp.c/target-15.c: New test. * testsuite/libgomp.c/target-16.c: New test. * testsuite/libgomp.c/target-17.c: New test. * testsuite/libgomp.c/target-18.c: New test. * testsuite/libgomp.c/target-19.c: New test. * testsuite/libgomp.c/target-20.c: New test. * testsuite/libgomp.c/target-21.c: New test. * testsuite/libgomp.c/target-22.c: New test. * testsuite/libgomp.c/target-23.c: New test. * testsuite/libgomp.c/target-24.c: New test. * testsuite/libgomp.c/target-25.c: New test. * testsuite/libgomp.c/target-26.c: New test. * testsuite/libgomp.c/target-27.c: New test. * testsuite/libgomp.c/taskloop-1.c: New test. * testsuite/libgomp.c/taskloop-2.c: New test. * testsuite/libgomp.c/taskloop-3.c: New test. * testsuite/libgomp.c/taskloop-4.c: New test. * testsuite/libgomp.c++/ctor-13.C: New test. * testsuite/libgomp.c++/doacross-1.C: New test. * testsuite/libgomp.c++/examples-4/declare_target-2.C: Replace offload_device with offload_device_nonshared_as. * testsuite/libgomp.c++/for-12.C: New test. * testsuite/libgomp.c++/for-13.C: New test. * testsuite/libgomp.c++/for-14.C: New test. * testsuite/libgomp.c++/linear-1.C: New test. * testsuite/libgomp.c++/member-1.C: New test. * testsuite/libgomp.c++/member-2.C: New test. * testsuite/libgomp.c++/member-3.C: New test. * testsuite/libgomp.c++/member-4.C: New test. * testsuite/libgomp.c++/member-5.C: New test. * testsuite/libgomp.c++/ordered-1.C: New test. * testsuite/libgomp.c++/reduction-5.C: New test. * testsuite/libgomp.c++/reduction-6.C: New test. * testsuite/libgomp.c++/reduction-7.C: New test. * testsuite/libgomp.c++/reduction-8.C: New test. * testsuite/libgomp.c++/reduction-9.C: New test. * testsuite/libgomp.c++/reduction-10.C: New test. * testsuite/libgomp.c++/reference-1.C: New test. * testsuite/libgomp.c++/simd14.C: New test. * testsuite/libgomp.c++/target-2.C (fn2): Add map(tofrom: s) clause. * testsuite/libgomp.c++/target-5.C: New test. * testsuite/libgomp.c++/target-6.C: New test. * testsuite/libgomp.c++/target-7.C: New test. * testsuite/libgomp.c++/target-8.C: New test. * testsuite/libgomp.c++/target-9.C: New test. * testsuite/libgomp.c++/target-10.C: New test. * testsuite/libgomp.c++/target-11.C: New test. * testsuite/libgomp.c++/target-12.C: New test. * testsuite/libgomp.c++/taskloop-1.C: New test. * testsuite/libgomp.c++/taskloop-2.C: New test. * testsuite/libgomp.c++/taskloop-3.C: New test. * testsuite/libgomp.c++/taskloop-4.C: New test. * testsuite/libgomp.c++/taskloop-5.C: New test. * testsuite/libgomp.c++/taskloop-6.C: New test. * testsuite/libgomp.c++/taskloop-7.C: New test. * testsuite/libgomp.c++/taskloop-8.C: New test. * testsuite/libgomp.c++/taskloop-9.C: New test. * testsuite/libgomp.fortran/affinity1.f90: New test. * testsuite/libgomp.fortran/affinity2.f90: New test. liboffloadmic/ 2015-10-13 Ilya Verbin <ilya.verbin@intel.com> * plugin/libgomp-plugin-intelmic.cpp (GOMP_OFFLOAD_dev2dev): New function. * plugin/offload_target_main.cpp (__offload_target_tgt2tgt): New static function, register it in liboffloadmic. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@228777 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libgomp')
-rw-r--r--libgomp/ChangeLog314
-rw-r--r--libgomp/config/linux/affinity.c39
-rw-r--r--libgomp/config/linux/doacross.h57
-rw-r--r--libgomp/config/posix/affinity.c24
-rw-r--r--libgomp/config/posix/doacross.h62
-rw-r--r--libgomp/env.c96
-rw-r--r--libgomp/fortran.c107
-rw-r--r--libgomp/libgomp.h157
-rw-r--r--libgomp/libgomp.map50
-rw-r--r--libgomp/libgomp.texi61
-rw-r--r--libgomp/libgomp_g.h52
-rw-r--r--libgomp/loop.c155
-rw-r--r--libgomp/loop_ull.c154
-rw-r--r--libgomp/oacc-mem.c11
-rw-r--r--libgomp/oacc-parallel.c11
-rw-r--r--libgomp/omp.h.in37
-rw-r--r--libgomp/omp_lib.f90.in122
-rw-r--r--libgomp/omp_lib.h.in30
-rw-r--r--libgomp/ordered.c521
-rw-r--r--libgomp/target.c1274
-rw-r--r--libgomp/task.c681
-rw-r--r--libgomp/taskloop.c363
-rw-r--r--libgomp/testsuite/lib/libgomp.exp13
-rw-r--r--libgomp/testsuite/libgomp.c++/ctor-13.C242
-rw-r--r--libgomp/testsuite/libgomp.c++/doacross-1.C294
-rw-r--r--libgomp/testsuite/libgomp.c++/examples-4/declare_target-2.C2
-rw-r--r--libgomp/testsuite/libgomp.c++/for-12.C42
-rw-r--r--libgomp/testsuite/libgomp.c++/for-13.C151
-rw-r--r--libgomp/testsuite/libgomp.c++/for-14.C120
-rw-r--r--libgomp/testsuite/libgomp.c++/linear-1.C268
-rw-r--r--libgomp/testsuite/libgomp.c++/member-1.C206
-rw-r--r--libgomp/testsuite/libgomp.c++/member-2.C211
-rw-r--r--libgomp/testsuite/libgomp.c++/member-3.C105
-rw-r--r--libgomp/testsuite/libgomp.c++/member-4.C108
-rw-r--r--libgomp/testsuite/libgomp.c++/member-5.C183
-rw-r--r--libgomp/testsuite/libgomp.c++/ordered-1.C1
-rw-r--r--libgomp/testsuite/libgomp.c++/reduction-10.C201
-rw-r--r--libgomp/testsuite/libgomp.c++/reduction-5.C127
-rw-r--r--libgomp/testsuite/libgomp.c++/reduction-6.C195
-rw-r--r--libgomp/testsuite/libgomp.c++/reduction-7.C134
-rw-r--r--libgomp/testsuite/libgomp.c++/reduction-8.C198
-rw-r--r--libgomp/testsuite/libgomp.c++/reduction-9.C130
-rw-r--r--libgomp/testsuite/libgomp.c++/reference-1.C57
-rw-r--r--libgomp/testsuite/libgomp.c++/simd14.C43
-rw-r--r--libgomp/testsuite/libgomp.c++/target-10.C154
-rw-r--r--libgomp/testsuite/libgomp.c++/target-11.C121
-rw-r--r--libgomp/testsuite/libgomp.c++/target-12.C93
-rw-r--r--libgomp/testsuite/libgomp.c++/target-2.C3
-rw-r--r--libgomp/testsuite/libgomp.c++/target-5.C1
-rw-r--r--libgomp/testsuite/libgomp.c++/target-6.C64
-rw-r--r--libgomp/testsuite/libgomp.c++/target-7.C90
-rw-r--r--libgomp/testsuite/libgomp.c++/target-8.C58
-rw-r--r--libgomp/testsuite/libgomp.c++/target-9.C73
-rw-r--r--libgomp/testsuite/libgomp.c++/taskloop-1.C4
-rw-r--r--libgomp/testsuite/libgomp.c++/taskloop-2.C6
-rw-r--r--libgomp/testsuite/libgomp.c++/taskloop-3.C4
-rw-r--r--libgomp/testsuite/libgomp.c++/taskloop-4.C4
-rw-r--r--libgomp/testsuite/libgomp.c++/taskloop-5.C73
-rw-r--r--libgomp/testsuite/libgomp.c++/taskloop-6.C442
-rw-r--r--libgomp/testsuite/libgomp.c++/taskloop-7.C400
-rw-r--r--libgomp/testsuite/libgomp.c++/taskloop-8.C250
-rw-r--r--libgomp/testsuite/libgomp.c++/taskloop-9.C323
-rw-r--r--libgomp/testsuite/libgomp.c/affinity-2.c89
-rw-r--r--libgomp/testsuite/libgomp.c/doacross-1.c181
-rw-r--r--libgomp/testsuite/libgomp.c/doacross-2.c225
-rw-r--r--libgomp/testsuite/libgomp.c/examples-4/declare_target-1.c2
-rw-r--r--libgomp/testsuite/libgomp.c/examples-4/declare_target-4.c2
-rw-r--r--libgomp/testsuite/libgomp.c/examples-4/declare_target-5.c2
-rw-r--r--libgomp/testsuite/libgomp.c/examples-4/device-1.c12
-rw-r--r--libgomp/testsuite/libgomp.c/examples-4/device-3.c4
-rw-r--r--libgomp/testsuite/libgomp.c/examples-4/target_data-3.c2
-rw-r--r--libgomp/testsuite/libgomp.c/examples-4/teams-2.c2
-rw-r--r--libgomp/testsuite/libgomp.c/examples-4/teams-3.c2
-rw-r--r--libgomp/testsuite/libgomp.c/examples-4/teams-4.c2
-rw-r--r--libgomp/testsuite/libgomp.c/for-2.h41
-rw-r--r--libgomp/testsuite/libgomp.c/for-4.c42
-rw-r--r--libgomp/testsuite/libgomp.c/for-5.c154
-rw-r--r--libgomp/testsuite/libgomp.c/for-6.c123
-rw-r--r--libgomp/testsuite/libgomp.c/linear-1.c250
-rw-r--r--libgomp/testsuite/libgomp.c/ordered-4.c83
-rw-r--r--libgomp/testsuite/libgomp.c/pr66199-2.c5
-rw-r--r--libgomp/testsuite/libgomp.c/pr66199-3.c50
-rw-r--r--libgomp/testsuite/libgomp.c/pr66199-4.c58
-rw-r--r--libgomp/testsuite/libgomp.c/reduction-10.c105
-rw-r--r--libgomp/testsuite/libgomp.c/reduction-7.c64
-rw-r--r--libgomp/testsuite/libgomp.c/reduction-8.c98
-rw-r--r--libgomp/testsuite/libgomp.c/reduction-9.c71
-rw-r--r--libgomp/testsuite/libgomp.c/target-1.c7
-rw-r--r--libgomp/testsuite/libgomp.c/target-11.c86
-rw-r--r--libgomp/testsuite/libgomp.c/target-12.c130
-rw-r--r--libgomp/testsuite/libgomp.c/target-13.c45
-rw-r--r--libgomp/testsuite/libgomp.c/target-14.c38
-rw-r--r--libgomp/testsuite/libgomp.c/target-15.c74
-rw-r--r--libgomp/testsuite/libgomp.c/target-16.c45
-rw-r--r--libgomp/testsuite/libgomp.c/target-17.c99
-rw-r--r--libgomp/testsuite/libgomp.c/target-18.c52
-rw-r--r--libgomp/testsuite/libgomp.c/target-19.c127
-rw-r--r--libgomp/testsuite/libgomp.c/target-2.c6
-rw-r--r--libgomp/testsuite/libgomp.c/target-20.c120
-rw-r--r--libgomp/testsuite/libgomp.c/target-21.c79
-rw-r--r--libgomp/testsuite/libgomp.c/target-22.c51
-rw-r--r--libgomp/testsuite/libgomp.c/target-23.c48
-rw-r--r--libgomp/testsuite/libgomp.c/target-24.c43
-rw-r--r--libgomp/testsuite/libgomp.c/target-25.c84
-rw-r--r--libgomp/testsuite/libgomp.c/target-26.c36
-rw-r--r--libgomp/testsuite/libgomp.c/target-27.c67
-rw-r--r--libgomp/testsuite/libgomp.c/target-7.c18
-rw-r--r--libgomp/testsuite/libgomp.c/taskloop-1.c46
-rw-r--r--libgomp/testsuite/libgomp.c/taskloop-2.c147
-rw-r--r--libgomp/testsuite/libgomp.c/taskloop-3.c84
-rw-r--r--libgomp/testsuite/libgomp.c/taskloop-4.c97
-rw-r--r--libgomp/testsuite/libgomp.fortran/affinity1.f9049
-rw-r--r--libgomp/testsuite/libgomp.fortran/affinity2.f908
113 files changed, 12498 insertions, 429 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog
index 74a135cd55b..df4e9370f81 100644
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,317 @@
+2015-10-13 Jakub Jelinek <jakub@redhat.com>
+ Aldy Hernandez <aldyh@redhat.com>
+ Ilya Verbin <ilya.verbin@intel.com>
+
+ * config/linux/affinity.c (omp_get_place_num_procs,
+ omp_get_place_proc_ids, gomp_get_place_proc_ids_8): New functions.
+ * config/linux/doacross.h: New file.
+ * config/posix/affinity.c (omp_get_place_num_procs,
+ omp_get_place_proc_ids, gomp_get_place_proc_ids_8): New functions.
+ * config/posix/doacross.h: New file.
+ * env.c: Include gomp-constants.h.
+ (struct gomp_task_icv): Rename run_sched_modifier to
+ run_sched_chunk_size.
+ (gomp_max_task_priority_var): New variable.
+ (parse_schedule): Rename run_sched_modifier to run_sched_chunk_size.
+ (handle_omp_display_env): Change _OPENMP value from 201307 to
+ 201511. Print OMP_MAX_TASK_PRIORITY.
+ (initialize_env): Parse OMP_MAX_TASK_PRIORITY.
+ (omp_set_schedule, omp_get_schedule): Rename modifier argument to
+ chunk_size and run_sched_modifier to run_sched_chunk_size.
+ (omp_get_max_task_priority, omp_get_initial_device,
+ omp_get_num_places, omp_get_place_num, omp_get_partition_num_places,
+ omp_get_partition_place_nums): New functions.
+ * fortran.c (omp_set_schedule_, omp_set_schedule_8_,
+ omp_get_schedule_, omp_get_schedule_8_): Rename modifier argument
+ to chunk_size.
+ (omp_get_num_places_, omp_get_place_num_procs_,
+ omp_get_place_num_procs_8_, omp_get_place_proc_ids_,
+ omp_get_place_proc_ids_8_, omp_get_place_num_,
+ omp_get_partition_num_places_, omp_get_partition_place_nums_,
+ omp_get_partition_place_nums_8_, omp_get_initial_device_,
+ omp_get_max_task_priority_): New functions.
+ * libgomp_g.h (GOMP_loop_doacross_static_start,
+ GOMP_loop_doacross_dynamic_start, GOMP_loop_doacross_guided_start,
+ GOMP_loop_doacross_runtime_start, GOMP_loop_ull_doacross_static_start,
+ GOMP_loop_ull_doacross_dynamic_start,
+ GOMP_loop_ull_doacross_guided_start,
+ GOMP_loop_ull_doacross_runtime_start, GOMP_doacross_post,
+ GOMP_doacross_wait, GOMP_doacross_ull_post, GOMP_doacross_wait,
+ GOMP_taskloop, GOMP_taskloop_ull, GOMP_target_41,
+ GOMP_target_data_41, GOMP_target_update_41,
+ GOMP_target_enter_exit_data): New prototypes.
+ (GOMP_task): Add prototype argument.
+ * libgomp.h (_LIBGOMP_CHECKING_): Define to 0 if not yet defined.
+ (struct gomp_doacross_work_share): New type.
+ (struct gomp_work_share): Add doacross field.
+ (struct gomp_task_icv): Rename run_sched_modifier to
+ run_sched_chunk_size.
+ (enum gomp_task_kind): Rename GOMP_TASK_IFFALSE to
+ GOMP_TASK_UNDEFERRED. Add comments.
+ (struct gomp_task_depend_entry): Add comments.
+ (struct gomp_task): Likewise.
+ (struct gomp_taskgroup): Likewise.
+ (struct gomp_target_task): New type.
+ (struct gomp_team): Add comment.
+ (gomp_get_place_proc_ids_8, gomp_doacross_init,
+ gomp_doacross_ull_init, gomp_task_maybe_wait_for_dependencies,
+ gomp_create_target_task, gomp_target_task_fn): New prototypes.
+ (struct target_var_desc): New type.
+ (struct target_mem_desc): Adjust comment. Use struct
+ target_var_desc instead of splay_tree_key for list.
+ (REFCOUNT_INFINITY): Define.
+ (struct splay_tree_key_s): Remove copy_from field.
+ (struct gomp_device_descr): Add dev2dev_func field.
+ (enum gomp_map_vars_kind): New enum.
+ (gomp_map_vars): Add one argument.
+ * libgomp.map (OMP_4.5): Export omp_get_max_task_priority,
+ omp_get_max_task_priority_, omp_get_num_places, omp_get_num_places_,
+ omp_get_place_num_procs, omp_get_place_num_procs_,
+ omp_get_place_num_procs_8_, omp_get_place_proc_ids,
+ omp_get_place_proc_ids_, omp_get_place_proc_ids_8_, omp_get_place_num,
+ omp_get_place_num_, omp_get_partition_num_places,
+ omp_get_partition_num_places_, omp_get_partition_place_nums,
+ omp_get_partition_place_nums_, omp_get_partition_place_nums_8_,
+ omp_get_initial_device, omp_get_initial_device_, omp_target_alloc,
+ omp_target_free, omp_target_is_present, omp_target_memcpy,
+ omp_target_memcpy_rect, omp_target_associate_ptr and
+ omp_target_disassociate_ptr.
+ (GOMP_4.0.2): Renamed to ...
+ (GOMP_4.5): ... this. Export GOMP_target_41, GOMP_target_data_41,
+ GOMP_target_update_41, GOMP_target_enter_exit_data, GOMP_taskloop,
+ GOMP_taskloop_ull, GOMP_loop_doacross_dynamic_start,
+ GOMP_loop_doacross_guided_start, GOMP_loop_doacross_runtime_start,
+ GOMP_loop_doacross_static_start, GOMP_doacross_post,
+ GOMP_doacross_wait, GOMP_loop_ull_doacross_dynamic_start,
+ GOMP_loop_ull_doacross_guided_start,
+ GOMP_loop_ull_doacross_runtime_start,
+ GOMP_loop_ull_doacross_static_start, GOMP_doacross_ull_post and
+ GOMP_doacross_ull_wait.
+ * libgomp.texi: Document omp_get_max_task_priority.
+ Rename modifier argument to chunk_size for omp_set_schedule and
+ omp_get_schedule. Document OMP_MAX_TASK_PRIORITY env var.
+ * loop.c (GOMP_loop_runtime_start): Adjust for run_sched_modifier
+ to run_sched_chunk_size renaming.
+ (GOMP_loop_ordered_runtime_start): Likewise.
+ (gomp_loop_doacross_static_start, gomp_loop_doacross_dynamic_start,
+ gomp_loop_doacross_guided_start, GOMP_loop_doacross_runtime_start,
+ GOMP_parallel_loop_runtime_start): New functions.
+ (GOMP_parallel_loop_runtime): Adjust for run_sched_modifier
+ to run_sched_chunk_size renaming.
+ (GOMP_loop_doacross_static_start, GOMP_loop_doacross_dynamic_start,
+ GOMP_loop_doacross_guided_start): New functions or aliases.
+ * loop_ull.c (GOMP_loop_ull_runtime_start): Adjust for
+ run_sched_modifier to run_sched_chunk_size renaming.
+ (GOMP_loop_ull_ordered_runtime_start): Likewise.
+ (gomp_loop_ull_doacross_static_start,
+ gomp_loop_ull_doacross_dynamic_start,
+ gomp_loop_ull_doacross_guided_start,
+ GOMP_loop_ull_doacross_runtime_start): New functions.
+ (GOMP_loop_ull_doacross_static_start,
+ GOMP_loop_ull_doacross_dynamic_start,
+ GOMP_loop_ull_doacross_guided_start): New functions or aliases.
+ * oacc-mem.c (acc_map_data, present_create_copy,
+ gomp_acc_insert_pointer): Pass GOMP_MAP_VARS_OPENACC instead of false
+ to gomp_map_vars.
+ (gomp_acc_remove_pointer): Use copy_from from target_var_desc.
+ * oacc-parallel.c (GOACC_data_start): Pass GOMP_MAP_VARS_OPENACC
+ instead of false to gomp_map_vars.
+ (GOACC_parallel_keyed): Likewise. Use copy_from from target_var_desc.
+ * omp.h.in (omp_lock_hint_t): New type.
+ (omp_init_lock_with_hint, omp_init_nest_lock_with_hint,
+ omp_get_num_places, omp_get_place_num_procs, omp_get_place_proc_ids,
+ omp_get_place_num, omp_get_partition_num_places,
+ omp_get_partition_place_nums, omp_get_initial_device,
+ omp_get_max_task_priority, omp_target_alloc, omp_target_free,
+ omp_target_is_present, omp_target_memcpy, omp_target_memcpy_rect,
+ omp_target_associate_ptr, omp_target_disassociate_ptr): New
+ prototypes.
+ * omp_lib.f90.in (omp_lock_hint_kind): New parameter.
+ (omp_lock_hint_none, omp_lock_hint_uncontended,
+ omp_lock_hint_contended, omp_lock_hint_nonspeculative,
+ omp_lock_hint_speculative): New parameters.
+ (omp_init_lock_with_hint, omp_init_nest_lock_with_hint,
+ omp_get_num_places, omp_get_place_num_procs, omp_get_place_proc_ids,
+ omp_get_place_num, omp_get_partition_num_places,
+ omp_get_partition_place_nums, omp_get_initial_device,
+ omp_get_max_task_priority): New interfaces.
+ (omp_set_schedule, omp_get_schedule): Rename modifier argument
+ to chunk_size.
+ * omp_lib.h.in (omp_lock_hint_kind): New parameter.
+ (omp_lock_hint_none, omp_lock_hint_uncontended,
+ omp_lock_hint_contended, omp_lock_hint_nonspeculative,
+ omp_lock_hint_speculative): New parameters.
+ (omp_init_lock_with_hint, omp_init_nest_lock_with_hint,
+ omp_get_num_places, omp_get_place_num_procs, omp_get_place_proc_ids,
+ omp_get_place_num, omp_get_partition_num_places,
+ omp_get_partition_place_nums, omp_get_initial_device,
+ omp_get_max_task_priority): New functions and subroutines.
+ * ordered.c: Include stdarg.h and string.h.
+ (MAX_COLLAPSED_BITS): Define.
+ (gomp_doacross_init, GOMP_doacross_post, GOMP_doacross_wait,
+ gomp_doacross_ull_init, GOMP_doacross_ull_post,
+ GOMP_doacross_ull_wait): New functions.
+ * target.c: Include errno.h.
+ (resolve_device): If device is not initialized, call
+ gomp_init_device on it.
+ (gomp_map_lookup): New function.
+ (gomp_map_vars_existing): Add tgt_var argument, fill it in.
+ Don't bump refcount if REFCOUNT_INFINITY. Handle
+ GOMP_MAP_ALWAYS_TO_P.
+ (get_kind): Rename is_openacc argument to short_mapkind.
+ (gomp_map_pointer): Use gomp_map_lookup.
+ (gomp_map_fields_existing): New function.
+ (gomp_map_vars): Rename is_openacc argument to short_mapkind
+ and is_target to pragma_kind. Handle GOMP_MAP_VARS_ENTER_DATA,
+ handle GOMP_MAP_FIRSTPRIVATE_INT, GOMP_MAP_STRUCT,
+ GOMP_MAP_USE_DEVICE_PTR, GOMP_MAP_ZERO_LEN_ARRAY_SECTION.
+ Adjust for tgt->list changed type and copy_from living in there.
+ (gomp_copy_from_async): Adjust for tgt->list changed type and
+ copy_from living in there.
+ (gomp_unmap_vars): Likewise.
+ (gomp_update): Likewise. Rename is_openacc argument to
+ short_mapkind. Don't fail if object is not mapped.
+ (gomp_load_image_to_device): Initialize refcount to
+ REFCOUNT_INFINITY.
+ (gomp_target_fallback): New function.
+ (gomp_get_target_fn_addr): Likewise.
+ (GOMP_target): Adjust gomp_map_vars caller, use
+ gomp_get_target_fn_addr and gomp_target_fallback.
+ (GOMP_target_41): New function.
+ (gomp_target_data_fallback): New function.
+ (GOMP_target_data): Use it, adjust gomp_map_vars caller.
+ (GOMP_target_data_41): New function.
+ (GOMP_target_update): Adjust gomp_update caller.
+ (GOMP_target_update_41): New function.
+ (gomp_exit_data, GOMP_target_enter_exit_data,
+ gomp_target_task_fn, omp_target_alloc, omp_target_free,
+ omp_target_is_present, omp_target_memcpy,
+ omp_target_memcpy_rect_worker, omp_target_memcpy_rect,
+ omp_target_associate_ptr, omp_target_disassociate_ptr,
+ gomp_load_plugin_for_device): New functions.
+ * task.c: Include gomp-constants.h. Include taskloop.c
+ twice to get GOMP_taskloop and GOMP_taskloop_ull definitions.
+ (gomp_task_handle_depend): New function.
+ (GOMP_task): Use it. Add priority argument. Use
+ gomp-constant.h constants instead of hardcoded numbers.
+ Rename GOMP_TASK_IFFALSE to GOMP_TASK_UNDEFERRED.
+ (gomp_create_target_task): New function.
+ (verify_children_queue, verify_taskgroup_queue,
+ verify_task_queue): New functions.
+ (gomp_task_run_pre): Call verify_*_queue functions.
+ If an upcoming tied task is about to leave the sibling or
+ taskgroup queues in an invalid state, adjust appropriately.
+ Remove taskgroup argument. Add comments.
+ (gomp_task_run_post_handle_dependers): Add comments.
+ (gomp_task_run_post_remove_parent): Likewise.
+ (gomp_barrier_handle_tasks): Adjust gomp_task_run_pre caller.
+ (GOMP_taskwait): Likewise. Add comments.
+ (gomp_task_maybe_wait_for_dependencies): Fix scheduling
+ problem such that the first non parent_depends_on task does not
+ end up at the end of the children queue.
+ (GOMP_taskgroup_start): Rename GOMP_TASK_IFFALSE to
+ GOMP_TASK_UNDEFERRED.
+ (GOMP_taskgroup_end): Adjust gomp_task_run_pre caller.
+ * taskloop.c: New file.
+ * testsuite/lib/libgomp.exp
+ (check_effective_target_offload_device_nonshared_as): New proc.
+ * testsuite/libgomp.c/affinity-2.c: New test.
+ * testsuite/libgomp.c/doacross-1.c: New test.
+ * testsuite/libgomp.c/doacross-2.c: New test.
+ * testsuite/libgomp.c/examples-4/declare_target-1.c (fib_wrapper):
+ Add map clause to target.
+ * testsuite/libgomp.c/examples-4/declare_target-4.c (accum): Likewise.
+ * testsuite/libgomp.c/examples-4/declare_target-5.c (accum): Likewise.
+ * testsuite/libgomp.c/examples-4/device-1.c (main): Likewise.
+ * testsuite/libgomp.c/examples-4/device-3.c (main): Likewise.
+ * testsuite/libgomp.c/examples-4/target_data-3.c (gramSchmidt):
+ Likewise.
+ * testsuite/libgomp.c/examples-4/teams-2.c (dotprod): Likewise.
+ * testsuite/libgomp.c/examples-4/teams-3.c (dotprod): Likewise.
+ * testsuite/libgomp.c/examples-4/teams-4.c (dotprod): Likewise.
+ * testsuite/libgomp.c/for-2.h (OMPTGT, OMPTO, OMPFROM): Define if
+ not defined. Use those where needed.
+ * testsuite/libgomp.c/for-4.c: New test.
+ * testsuite/libgomp.c/for-5.c: New test.
+ * testsuite/libgomp.c/for-6.c: New test.
+ * testsuite/libgomp.c/linear-1.c: New test.
+ * testsuite/libgomp.c/ordered-4.c: New test.
+ * testsuite/libgomp.c/pr66199-2.c (f2): Adjust for linear clause
+ only allowed on the loop iterator.
+ * testsuite/libgomp.c/pr66199-3.c: New test.
+ * testsuite/libgomp.c/pr66199-4.c: New test.
+ * testsuite/libgomp.c/reduction-7.c: New test.
+ * testsuite/libgomp.c/reduction-8.c: New test.
+ * testsuite/libgomp.c/reduction-9.c: New test.
+ * testsuite/libgomp.c/reduction-10.c: New test.
+ * testsuite/libgomp.c/target-1.c (fn2, fn3, fn4): Add
+ map(tofrom:s).
+ * testsuite/libgomp.c/target-2.c (fn2, fn3, fn4): Likewise.
+ * testsuite/libgomp.c/target-7.c (foo): Add map(h) where needed.
+ * testsuite/libgomp.c/target-11.c: New test.
+ * testsuite/libgomp.c/target-12.c: New test.
+ * testsuite/libgomp.c/target-13.c: New test.
+ * testsuite/libgomp.c/target-14.c: New test.
+ * testsuite/libgomp.c/target-15.c: New test.
+ * testsuite/libgomp.c/target-16.c: New test.
+ * testsuite/libgomp.c/target-17.c: New test.
+ * testsuite/libgomp.c/target-18.c: New test.
+ * testsuite/libgomp.c/target-19.c: New test.
+ * testsuite/libgomp.c/target-20.c: New test.
+ * testsuite/libgomp.c/target-21.c: New test.
+ * testsuite/libgomp.c/target-22.c: New test.
+ * testsuite/libgomp.c/target-23.c: New test.
+ * testsuite/libgomp.c/target-24.c: New test.
+ * testsuite/libgomp.c/target-25.c: New test.
+ * testsuite/libgomp.c/target-26.c: New test.
+ * testsuite/libgomp.c/target-27.c: New test.
+ * testsuite/libgomp.c/taskloop-1.c: New test.
+ * testsuite/libgomp.c/taskloop-2.c: New test.
+ * testsuite/libgomp.c/taskloop-3.c: New test.
+ * testsuite/libgomp.c/taskloop-4.c: New test.
+ * testsuite/libgomp.c++/ctor-13.C: New test.
+ * testsuite/libgomp.c++/doacross-1.C: New test.
+ * testsuite/libgomp.c++/examples-4/declare_target-2.C:
+ Replace offload_device with offload_device_nonshared_as.
+ * testsuite/libgomp.c++/for-12.C: New test.
+ * testsuite/libgomp.c++/for-13.C: New test.
+ * testsuite/libgomp.c++/for-14.C: New test.
+ * testsuite/libgomp.c++/linear-1.C: New test.
+ * testsuite/libgomp.c++/member-1.C: New test.
+ * testsuite/libgomp.c++/member-2.C: New test.
+ * testsuite/libgomp.c++/member-3.C: New test.
+ * testsuite/libgomp.c++/member-4.C: New test.
+ * testsuite/libgomp.c++/member-5.C: New test.
+ * testsuite/libgomp.c++/ordered-1.C: New test.
+ * testsuite/libgomp.c++/reduction-5.C: New test.
+ * testsuite/libgomp.c++/reduction-6.C: New test.
+ * testsuite/libgomp.c++/reduction-7.C: New test.
+ * testsuite/libgomp.c++/reduction-8.C: New test.
+ * testsuite/libgomp.c++/reduction-9.C: New test.
+ * testsuite/libgomp.c++/reduction-10.C: New test.
+ * testsuite/libgomp.c++/reference-1.C: New test.
+ * testsuite/libgomp.c++/simd14.C: New test.
+ * testsuite/libgomp.c++/target-2.C (fn2): Add map(tofrom: s) clause.
+ * testsuite/libgomp.c++/target-5.C: New test.
+ * testsuite/libgomp.c++/target-6.C: New test.
+ * testsuite/libgomp.c++/target-7.C: New test.
+ * testsuite/libgomp.c++/target-8.C: New test.
+ * testsuite/libgomp.c++/target-9.C: New test.
+ * testsuite/libgomp.c++/target-10.C: New test.
+ * testsuite/libgomp.c++/target-11.C: New test.
+ * testsuite/libgomp.c++/target-12.C: New test.
+ * testsuite/libgomp.c++/taskloop-1.C: New test.
+ * testsuite/libgomp.c++/taskloop-2.C: New test.
+ * testsuite/libgomp.c++/taskloop-3.C: New test.
+ * testsuite/libgomp.c++/taskloop-4.C: New test.
+ * testsuite/libgomp.c++/taskloop-5.C: New test.
+ * testsuite/libgomp.c++/taskloop-6.C: New test.
+ * testsuite/libgomp.c++/taskloop-7.C: New test.
+ * testsuite/libgomp.c++/taskloop-8.C: New test.
+ * testsuite/libgomp.c++/taskloop-9.C: New test.
+ * testsuite/libgomp.fortran/affinity1.f90: New test.
+ * testsuite/libgomp.fortran/affinity2.f90: New test.
+
2015-10-13 Tom de Vries <tom@codesourcery.com>
PR tree-optimization/67476
diff --git a/libgomp/config/linux/affinity.c b/libgomp/config/linux/affinity.c
index 17b65afb49a..775ee0a7fdf 100644
--- a/libgomp/config/linux/affinity.c
+++ b/libgomp/config/linux/affinity.c
@@ -353,6 +353,45 @@ gomp_affinity_print_place (void *p)
fprintf (stderr, ":%lu", len);
}
+int
+omp_get_place_num_procs (int place_num)
+{
+ if (place_num < 0 || place_num >= gomp_places_list_len)
+ return 0;
+
+ cpu_set_t *cpusetp = (cpu_set_t *) gomp_places_list[place_num];
+ return gomp_cpuset_popcount (gomp_cpuset_size, cpusetp);
+}
+
+void
+omp_get_place_proc_ids (int place_num, int *ids)
+{
+ if (place_num < 0 || place_num >= gomp_places_list_len)
+ return;
+
+ cpu_set_t *cpusetp = (cpu_set_t *) gomp_places_list[place_num];
+ unsigned long i, max = 8 * gomp_cpuset_size;
+ for (i = 0; i < max; i++)
+ if (CPU_ISSET_S (i, gomp_cpuset_size, cpusetp))
+ *ids++ = i;
+}
+
+void
+gomp_get_place_proc_ids_8 (int place_num, int64_t *ids)
+{
+ if (place_num < 0 || place_num >= gomp_places_list_len)
+ return;
+
+ cpu_set_t *cpusetp = (cpu_set_t *) gomp_places_list[place_num];
+ unsigned long i, max = 8 * gomp_cpuset_size;
+ for (i = 0; i < max; i++)
+ if (CPU_ISSET_S (i, gomp_cpuset_size, cpusetp))
+ *ids++ = i;
+}
+
+ialias(omp_get_place_num_procs)
+ialias(omp_get_place_proc_ids)
+
#else
#include "../posix/affinity.c"
diff --git a/libgomp/config/linux/doacross.h b/libgomp/config/linux/doacross.h
new file mode 100644
index 00000000000..7a5a645f3cf
--- /dev/null
+++ b/libgomp/config/linux/doacross.h
@@ -0,0 +1,57 @@
+/* Copyright (C) 2015 Free Software Foundation, Inc.
+ Contributed by Jakub Jelinek <jakub@redhat.com>.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This is a Linux specific implementation of doacross spinning. */
+
+#ifndef GOMP_DOACROSS_H
+#define GOMP_DOACROSS_H 1
+
+#include "libgomp.h"
+#include <errno.h>
+#include "wait.h"
+
+#ifdef HAVE_ATTRIBUTE_VISIBILITY
+# pragma GCC visibility push(hidden)
+#endif
+
+static inline void doacross_spin (unsigned long *addr, unsigned long expected,
+ unsigned long cur)
+{
+ /* FIXME: back off depending on how large expected - cur is. */
+ do
+ {
+ cpu_relax ();
+ cur = __atomic_load_n (addr, MEMMODEL_RELAXED);
+ if (expected < cur)
+ return;
+ }
+ while (1);
+}
+
+#ifdef HAVE_ATTRIBUTE_VISIBILITY
+# pragma GCC visibility pop
+#endif
+
+#endif /* GOMP_DOACROSS_H */
diff --git a/libgomp/config/posix/affinity.c b/libgomp/config/posix/affinity.c
index 6840d3a727d..9008853c953 100644
--- a/libgomp/config/posix/affinity.c
+++ b/libgomp/config/posix/affinity.c
@@ -114,3 +114,27 @@ gomp_affinity_print_place (void *p)
{
(void) p;
}
+
+int
+omp_get_place_num_procs (int place_num)
+{
+ (void) place_num;
+ return 0;
+}
+
+void
+omp_get_place_proc_ids (int place_num, int *ids)
+{
+ (void) place_num;
+ (void) ids;
+}
+
+void
+gomp_get_place_proc_ids_8 (int place_num, int64_t *ids)
+{
+ (void) place_num;
+ (void) ids;
+}
+
+ialias(omp_get_place_num_procs)
+ialias(omp_get_place_proc_ids)
diff --git a/libgomp/config/posix/doacross.h b/libgomp/config/posix/doacross.h
new file mode 100644
index 00000000000..537bcbba51c
--- /dev/null
+++ b/libgomp/config/posix/doacross.h
@@ -0,0 +1,62 @@
+/* Copyright (C) 2015 Free Software Foundation, Inc.
+ Contributed by Jakub Jelinek <jakub@redhat.com>.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This is a generic implementation of doacross spinning. */
+
+#ifndef GOMP_DOACROSS_H
+#define GOMP_DOACROSS_H 1
+
+#include "libgomp.h"
+#include <errno.h>
+
+#ifdef HAVE_ATTRIBUTE_VISIBILITY
+# pragma GCC visibility push(hidden)
+#endif
+
+static inline void
+cpu_relax (void)
+{
+ __asm volatile ("" : : : "memory");
+}
+
+static inline void doacross_spin (unsigned long *addr, unsigned long expected,
+ unsigned long cur)
+{
+ /* FIXME: back off depending on how large expected - cur is. */
+ do
+ {
+ cpu_relax ();
+ cur = __atomic_load_n (addr, MEMMODEL_RELAXED);
+ if (expected < cur)
+ return;
+ }
+ while (1);
+}
+
+#ifdef HAVE_ATTRIBUTE_VISIBILITY
+# pragma GCC visibility pop
+#endif
+
+#endif /* GOMP_DOACROSS_H */
diff --git a/libgomp/env.c b/libgomp/env.c
index 6b5e963c4ea..5d6cdcf0184 100644
--- a/libgomp/env.c
+++ b/libgomp/env.c
@@ -29,6 +29,7 @@
#include "libgomp.h"
#include "libgomp_f.h"
#include "oacc-int.h"
+#include "gomp-constants.h"
#include <ctype.h>
#include <stdlib.h>
#include <stdio.h>
@@ -58,7 +59,7 @@ struct gomp_task_icv gomp_global_icv = {
.nthreads_var = 1,
.thread_limit_var = UINT_MAX,
.run_sched_var = GFS_DYNAMIC,
- .run_sched_modifier = 1,
+ .run_sched_chunk_size = 1,
.default_device_var = 0,
.dyn_var = false,
.nest_var = false,
@@ -68,6 +69,7 @@ struct gomp_task_icv gomp_global_icv = {
unsigned long gomp_max_active_levels_var = INT_MAX;
bool gomp_cancel_var = false;
+int gomp_max_task_priority_var = 0;
#ifndef HAVE_SYNC_BUILTINS
gomp_mutex_t gomp_managed_threads_lock;
#endif
@@ -123,7 +125,7 @@ parse_schedule (void)
++env;
if (*env == '\0')
{
- gomp_global_icv.run_sched_modifier
+ gomp_global_icv.run_sched_chunk_size
= gomp_global_icv.run_sched_var != GFS_STATIC;
return;
}
@@ -149,7 +151,7 @@ parse_schedule (void)
if (value == 0 && gomp_global_icv.run_sched_var != GFS_STATIC)
value = 1;
- gomp_global_icv.run_sched_modifier = value;
+ gomp_global_icv.run_sched_chunk_size = value;
return;
unknown:
@@ -1069,7 +1071,7 @@ handle_omp_display_env (unsigned long stacksize, int wait_policy)
fputs ("\nOPENMP DISPLAY ENVIRONMENT BEGIN\n", stderr);
- fputs (" _OPENMP = '201307'\n", stderr);
+ fputs (" _OPENMP = '201511'\n", stderr);
fprintf (stderr, " OMP_DYNAMIC = '%s'\n",
gomp_global_icv.dyn_var ? "TRUE" : "FALSE");
fprintf (stderr, " OMP_NESTED = '%s'\n",
@@ -1157,6 +1159,8 @@ handle_omp_display_env (unsigned long stacksize, int wait_policy)
gomp_cancel_var ? "TRUE" : "FALSE");
fprintf (stderr, " OMP_DEFAULT_DEVICE = '%d'\n",
gomp_global_icv.default_device_var);
+ fprintf (stderr, " OMP_MAX_TASK_PRIORITY = '%d'\n",
+ gomp_max_task_priority_var);
if (verbose)
{
@@ -1189,6 +1193,7 @@ initialize_env (void)
parse_boolean ("OMP_NESTED", &gomp_global_icv.nest_var);
parse_boolean ("OMP_CANCELLATION", &gomp_cancel_var);
parse_int ("OMP_DEFAULT_DEVICE", &gomp_global_icv.default_device_var, true);
+ parse_int ("OMP_MAX_TASK_PRIORITY", &gomp_max_task_priority_var, true);
parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var,
true);
if (parse_unsigned_long ("OMP_THREAD_LIMIT", &thread_limit_var, false))
@@ -1337,21 +1342,21 @@ omp_get_nested (void)
}
void
-omp_set_schedule (omp_sched_t kind, int modifier)
+omp_set_schedule (omp_sched_t kind, int chunk_size)
{
struct gomp_task_icv *icv = gomp_icv (true);
switch (kind)
{
case omp_sched_static:
- if (modifier < 1)
- modifier = 0;
- icv->run_sched_modifier = modifier;
+ if (chunk_size < 1)
+ chunk_size = 0;
+ icv->run_sched_chunk_size = chunk_size;
break;
case omp_sched_dynamic:
case omp_sched_guided:
- if (modifier < 1)
- modifier = 1;
- icv->run_sched_modifier = modifier;
+ if (chunk_size < 1)
+ chunk_size = 1;
+ icv->run_sched_chunk_size = chunk_size;
break;
case omp_sched_auto:
break;
@@ -1362,11 +1367,11 @@ omp_set_schedule (omp_sched_t kind, int modifier)
}
void
-omp_get_schedule (omp_sched_t *kind, int *modifier)
+omp_get_schedule (omp_sched_t *kind, int *chunk_size)
{
struct gomp_task_icv *icv = gomp_icv (false);
*kind = icv->run_sched_var;
- *modifier = icv->run_sched_modifier;
+ *chunk_size = icv->run_sched_chunk_size;
}
int
@@ -1402,6 +1407,12 @@ omp_get_cancellation (void)
return gomp_cancel_var;
}
+int
+omp_get_max_task_priority (void)
+{
+ return gomp_max_task_priority_var;
+}
+
omp_proc_bind_t
omp_get_proc_bind (void)
{
@@ -1450,6 +1461,59 @@ omp_is_initial_device (void)
return 1;
}
+int
+omp_get_initial_device (void)
+{
+ return GOMP_DEVICE_HOST_FALLBACK;
+}
+
+int
+omp_get_num_places (void)
+{
+ return gomp_places_list_len;
+}
+
+int
+omp_get_place_num (void)
+{
+ if (gomp_places_list == NULL)
+ return -1;
+
+ struct gomp_thread *thr = gomp_thread ();
+ if (thr->place == 0)
+ gomp_init_affinity ();
+
+ return (int) thr->place - 1;
+}
+
+int
+omp_get_partition_num_places (void)
+{
+ if (gomp_places_list == NULL)
+ return 0;
+
+ struct gomp_thread *thr = gomp_thread ();
+ if (thr->place == 0)
+ gomp_init_affinity ();
+
+ return thr->ts.place_partition_len;
+}
+
+void
+omp_get_partition_place_nums (int *place_nums)
+{
+ if (gomp_places_list == NULL)
+ return;
+
+ struct gomp_thread *thr = gomp_thread ();
+ if (thr->place == 0)
+ gomp_init_affinity ();
+
+ unsigned int i;
+ for (i = 0; i < thr->ts.place_partition_len; i++)
+ *place_nums++ = thr->ts.place_partition_off + i;
+}
+
ialias (omp_set_dynamic)
ialias (omp_set_nested)
ialias (omp_set_num_threads)
@@ -1469,3 +1533,9 @@ ialias (omp_get_num_devices)
ialias (omp_get_num_teams)
ialias (omp_get_team_num)
ialias (omp_is_initial_device)
+ialias (omp_get_initial_device)
+ialias (omp_get_max_task_priority)
+ialias (omp_get_num_places)
+ialias (omp_get_place_num)
+ialias (omp_get_partition_num_places)
+ialias (omp_get_partition_place_nums)
diff --git a/libgomp/fortran.c b/libgomp/fortran.c
index 993145f8890..ceff9ac48e6 100644
--- a/libgomp/fortran.c
+++ b/libgomp/fortran.c
@@ -68,12 +68,20 @@ ialias_redirect (omp_get_active_level)
ialias_redirect (omp_in_final)
ialias_redirect (omp_get_cancellation)
ialias_redirect (omp_get_proc_bind)
+ialias_redirect (omp_get_num_places)
+ialias_redirect (omp_get_place_num_procs)
+ialias_redirect (omp_get_place_proc_ids)
+ialias_redirect (omp_get_place_num)
+ialias_redirect (omp_get_partition_num_places)
+ialias_redirect (omp_get_partition_place_nums)
ialias_redirect (omp_set_default_device)
ialias_redirect (omp_get_default_device)
ialias_redirect (omp_get_num_devices)
ialias_redirect (omp_get_num_teams)
ialias_redirect (omp_get_team_num)
ialias_redirect (omp_is_initial_device)
+ialias_redirect (omp_get_initial_device)
+ialias_redirect (omp_get_max_task_priority)
#endif
#ifndef LIBGOMP_GNU_SYMBOL_VERSIONING
@@ -343,35 +351,35 @@ omp_get_wtime_ (void)
}
void
-omp_set_schedule_ (const int32_t *kind, const int32_t *modifier)
+omp_set_schedule_ (const int32_t *kind, const int32_t *chunk_size)
{
- omp_set_schedule (*kind, *modifier);
+ omp_set_schedule (*kind, *chunk_size);
}
void
-omp_set_schedule_8_ (const int32_t *kind, const int64_t *modifier)
+omp_set_schedule_8_ (const int32_t *kind, const int64_t *chunk_size)
{
- omp_set_schedule (*kind, TO_INT (*modifier));
+ omp_set_schedule (*kind, TO_INT (*chunk_size));
}
void
-omp_get_schedule_ (int32_t *kind, int32_t *modifier)
+omp_get_schedule_ (int32_t *kind, int32_t *chunk_size)
{
omp_sched_t k;
- int m;
- omp_get_schedule (&k, &m);
+ int cs;
+ omp_get_schedule (&k, &cs);
*kind = k;
- *modifier = m;
+ *chunk_size = cs;
}
void
-omp_get_schedule_8_ (int32_t *kind, int64_t *modifier)
+omp_get_schedule_8_ (int32_t *kind, int64_t *chunk_size)
{
omp_sched_t k;
- int m;
- omp_get_schedule (&k, &m);
+ int cs;
+ omp_get_schedule (&k, &cs);
*kind = k;
- *modifier = m;
+ *chunk_size = cs;
}
int32_t
@@ -452,6 +460,69 @@ omp_get_proc_bind_ (void)
return omp_get_proc_bind ();
}
+int32_t
+omp_get_num_places_ (void)
+{
+ return omp_get_num_places ();
+}
+
+int32_t
+omp_get_place_num_procs_ (const int32_t *place_num)
+{
+ return omp_get_place_num_procs (*place_num);
+}
+
+int32_t
+omp_get_place_num_procs_8_ (const int64_t *place_num)
+{
+ return omp_get_place_num_procs (TO_INT (*place_num));
+}
+
+void
+omp_get_place_proc_ids_ (const int32_t *place_num, int32_t *ids)
+{
+ omp_get_place_proc_ids (*place_num, ids);
+}
+
+void
+omp_get_place_proc_ids_8_ (const int64_t *place_num, int64_t *ids)
+{
+ gomp_get_place_proc_ids_8 (TO_INT (*place_num), ids);
+}
+
+int32_t
+omp_get_place_num_ (void)
+{
+ return omp_get_place_num ();
+}
+
+int32_t
+omp_get_partition_num_places_ (void)
+{
+ return omp_get_partition_num_places ();
+}
+
+void
+omp_get_partition_place_nums_ (int32_t *place_nums)
+{
+ omp_get_partition_place_nums (place_nums);
+}
+
+void
+omp_get_partition_place_nums_8_ (int64_t *place_nums)
+{
+ if (gomp_places_list == NULL)
+ return;
+
+ struct gomp_thread *thr = gomp_thread ();
+ if (thr->place == 0)
+ gomp_init_affinity ();
+
+ unsigned int i;
+ for (i = 0; i < thr->ts.place_partition_len; i++)
+ *place_nums++ = (int64_t) thr->ts.place_partition_off + i;
+}
+
void
omp_set_default_device_ (const int32_t *device_num)
{
@@ -493,3 +564,15 @@ omp_is_initial_device_ (void)
{
return omp_is_initial_device ();
}
+
+int32_t
+omp_get_initial_device_ (void)
+{
+ return omp_get_initial_device ();
+}
+
+int32_t
+omp_get_max_task_priority_ (void)
+{
+ return omp_get_max_task_priority ();
+}
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 04262c4ab28..9c8b1fb8744 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -36,6 +36,11 @@
#ifndef LIBGOMP_H
#define LIBGOMP_H 1
+#ifndef _LIBGOMP_CHECKING_
+/* Define to 1 to perform internal sanity checks. */
+#define _LIBGOMP_CHECKING_ 0
+#endif
+
#include "config.h"
#include "gstdint.h"
#include "libgomp-plugin.h"
@@ -78,6 +83,44 @@ enum gomp_schedule_type
GFS_AUTO
};
+struct gomp_doacross_work_share
+{
+ union {
+ /* chunk_size copy, as ws->chunk_size is multiplied by incr for
+ GFS_DYNAMIC. */
+ long chunk_size;
+ /* Likewise, but for ull implementation. */
+ unsigned long long chunk_size_ull;
+ /* For schedule(static,0) this is the number
+ of iterations assigned to the last thread, i.e. number of
+ iterations / number of threads. */
+ long q;
+ /* Likewise, but for ull implementation. */
+ unsigned long long q_ull;
+ };
+ /* Size of each array entry (padded to cache line size). */
+ unsigned long elt_sz;
+ /* Number of dimensions in sink vectors. */
+ unsigned int ncounts;
+ /* True if the iterations can be flattened. */
+ bool flattened;
+ /* Actual array (of elt_sz sized units), aligned to cache line size.
+ This is indexed by team_id for GFS_STATIC and outermost iteration
+ / chunk_size for other schedules. */
+ unsigned char *array;
+ /* These two are only used for schedule(static,0). */
+ /* This one is number of iterations % number of threads. */
+ long t;
+ union {
+ /* And this one is cached t * (q + 1). */
+ long boundary;
+ /* Likewise, but for the ull implementation. */
+ unsigned long long boundary_ull;
+ };
+ /* Array of shift counts for each dimension if they can be flattened. */
+ unsigned int shift_counts[];
+};
+
struct gomp_work_share
{
/* This member records the SCHEDULE clause to be used for this construct.
@@ -109,13 +152,18 @@ struct gomp_work_share
};
};
- /* This is a circular queue that details which threads will be allowed
- into the ordered region and in which order. When a thread allocates
- iterations on which it is going to work, it also registers itself at
- the end of the array. When a thread reaches the ordered region, it
- checks to see if it is the one at the head of the queue. If not, it
- blocks on its RELEASE semaphore. */
- unsigned *ordered_team_ids;
+ union {
+ /* This is a circular queue that details which threads will be allowed
+ into the ordered region and in which order. When a thread allocates
+ iterations on which it is going to work, it also registers itself at
+ the end of the array. When a thread reaches the ordered region, it
+ checks to see if it is the one at the head of the queue. If not, it
+ blocks on its RELEASE semaphore. */
+ unsigned *ordered_team_ids;
+
+ /* This is a pointer to DOACROSS work share data. */
+ struct gomp_doacross_work_share *doacross;
+ };
/* This is the number of threads that have registered themselves in
the circular queue ordered_team_ids. */
@@ -234,7 +282,7 @@ struct gomp_task_icv
{
unsigned long nthreads_var;
enum gomp_schedule_type run_sched_var;
- int run_sched_modifier;
+ int run_sched_chunk_size;
int default_device_var;
unsigned int thread_limit_var;
bool dyn_var;
@@ -263,9 +311,13 @@ extern char *goacc_device_type;
enum gomp_task_kind
{
+ /* Implicit task. */
GOMP_TASK_IMPLICIT,
- GOMP_TASK_IFFALSE,
+ /* Undeferred task. */
+ GOMP_TASK_UNDEFERRED,
+ /* Task created by GOMP_task and waiting to be run. */
GOMP_TASK_WAITING,
+ /* Task currently executing or scheduled and about to execute. */
GOMP_TASK_TIED
};
@@ -275,10 +327,13 @@ struct htab;
struct gomp_task_depend_entry
{
+ /* Address of dependency. */
void *addr;
struct gomp_task_depend_entry *next;
struct gomp_task_depend_entry *prev;
+ /* Task that provides the dependency in ADDR. */
struct gomp_task *task;
+ /* Depend entry is of type "IN". */
bool is_in;
bool redundant;
bool redundant_out;
@@ -306,19 +361,35 @@ struct gomp_taskwait
struct gomp_task
{
+ /* Parent circular list. See children description below. */
struct gomp_task *parent;
+ /* Circular list representing the children of this task.
+
+ In this list we first have parent_depends_on ready to run tasks,
+ then !parent_depends_on ready to run tasks, and finally already
+ running tasks. */
struct gomp_task *children;
struct gomp_task *next_child;
struct gomp_task *prev_child;
+ /* Circular task_queue in `struct gomp_team'.
+
+ GOMP_TASK_WAITING tasks come before GOMP_TASK_TIED tasks. */
struct gomp_task *next_queue;
struct gomp_task *prev_queue;
+ /* Circular queue in gomp_taskgroup->children.
+
+ GOMP_TASK_WAITING tasks come before GOMP_TASK_TIED tasks. */
struct gomp_task *next_taskgroup;
struct gomp_task *prev_taskgroup;
+ /* Taskgroup this task belongs in. */
struct gomp_taskgroup *taskgroup;
+ /* Tasks that depend on this task. */
struct gomp_dependers_vec *dependers;
struct htab *depend_hash;
struct gomp_taskwait *taskwait;
+ /* Number of items in DEPEND. */
size_t depend_count;
+ /* Number of tasks in the DEPENDERS field above. */
size_t num_dependees;
struct gomp_task_icv icv;
void (*fn) (void *);
@@ -327,13 +398,23 @@ struct gomp_task
bool in_tied_task;
bool final_task;
bool copy_ctors_done;
+ /* Set for undeferred tasks with unsatisfied dependencies which
+ block further execution of their parent until the dependencies
+ are satisfied. */
bool parent_depends_on;
+ /* Dependencies provided and/or needed for this task. DEPEND_COUNT
+ is the number of items available. */
struct gomp_task_depend_entry depend[];
};
struct gomp_taskgroup
{
struct gomp_taskgroup *prev;
+ /* Circular list of tasks that belong in this taskgroup.
+
+ Tasks are chained by next/prev_taskgroup within gomp_task, and
+ are sorted by GOMP_TASK_WAITING tasks, and then GOMP_TASK_TIED
+ tasks. */
struct gomp_task *children;
bool in_taskgroup_wait;
bool cancelled;
@@ -341,6 +422,17 @@ struct gomp_taskgroup
size_t num_children;
};
+struct gomp_target_task
+{
+ struct gomp_device_descr *devicep;
+ void (*fn) (void *);
+ size_t mapnum;
+ size_t *sizes;
+ unsigned short *kinds;
+ unsigned int flags;
+ void *hostaddrs[];
+};
+
/* This structure describes a "team" of threads. These are the threads
that are spawned by a PARALLEL constructs, as well as the work sharing
constructs that the team encounters. */
@@ -403,6 +495,8 @@ struct gomp_team
struct gomp_work_share work_shares[8];
gomp_mutex_t task_lock;
+ /* Scheduled tasks. Chain fields are next/prev_queue within a
+ gomp_task. */
struct gomp_task *task_queue;
/* Number of all GOMP_TASK_{WAITING,TIED} tasks in the team. */
unsigned int task_count;
@@ -531,6 +625,7 @@ extern bool gomp_affinity_same_place (void *, void *);
extern bool gomp_affinity_finalize_place_list (bool);
extern bool gomp_affinity_init_level (int, unsigned long, bool);
extern void gomp_affinity_print_place (void *);
+extern void gomp_get_place_proc_ids_8 (int, int64_t *);
/* alloc.c */
@@ -600,6 +695,9 @@ extern void gomp_ordered_next (void);
extern void gomp_ordered_static_init (void);
extern void gomp_ordered_static_next (void);
extern void gomp_ordered_sync (void);
+extern void gomp_doacross_init (unsigned, long *, long);
+extern void gomp_doacross_ull_init (unsigned, unsigned long long *,
+ unsigned long long);
/* parallel.c */
@@ -616,6 +714,11 @@ extern void gomp_init_task (struct gomp_task *, struct gomp_task *,
struct gomp_task_icv *);
extern void gomp_end_task (void);
extern void gomp_barrier_handle_tasks (gomp_barrier_state_t);
+extern void gomp_task_maybe_wait_for_dependencies (void **);
+extern void gomp_create_target_task (struct gomp_device_descr *,
+ void (*) (void *), size_t, void **,
+ size_t *, unsigned short *, unsigned int,
+ void **);
static void inline
gomp_finish_task (struct gomp_task *task)
@@ -636,11 +739,25 @@ extern void gomp_free_thread (void *);
extern void gomp_init_targets_once (void);
extern int gomp_get_num_devices (void);
+extern void gomp_target_task_fn (void *);
typedef struct splay_tree_node_s *splay_tree_node;
typedef struct splay_tree_s *splay_tree;
typedef struct splay_tree_key_s *splay_tree_key;
+struct target_var_desc {
+ /* Splay key. */
+ splay_tree_key key;
+ /* True if data should be copied from device to host at the end. */
+ bool copy_from;
+ /* True if data always should be copied from device to host at the end. */
+ bool always_copy_from;
+ /* Relative offset against key host_start. */
+ uintptr_t offset;
+ /* Actual length. */
+ uintptr_t length;
+};
+
struct target_mem_desc {
/* Reference count. */
uintptr_t refcount;
@@ -660,11 +777,14 @@ struct target_mem_desc {
/* Corresponding target device descriptor. */
struct gomp_device_descr *device_descr;
- /* List of splay keys to remove (or decrease refcount)
+ /* List of target items to remove (or decrease refcount)
at the end of region. */
- splay_tree_key list[];
+ struct target_var_desc list[];
};
+/* Special value for refcount - infinity. */
+#define REFCOUNT_INFINITY (~(uintptr_t) 0)
+
struct splay_tree_key_s {
/* Address of the host object. */
uintptr_t host_start;
@@ -678,8 +798,6 @@ struct splay_tree_key_s {
uintptr_t refcount;
/* Asynchronous reference count. */
uintptr_t async_refcount;
- /* True if data should be copied from device to host at the end. */
- bool copy_from;
};
#include "splay-tree.h"
@@ -757,6 +875,7 @@ struct gomp_device_descr
void (*free_func) (int, void *);
void *(*dev2host_func) (int, void *, const void *, size_t);
void *(*host2dev_func) (int, void *, const void *, size_t);
+ void *(*dev2dev_func) (int, void *, const void *, size_t);
void (*run_func) (int, void *, void *);
/* Splay tree containing information about mapped memory regions. */
@@ -774,12 +893,22 @@ struct gomp_device_descr
acc_dispatch_t openacc;
};
+/* Kind of the pragma, for which gomp_map_vars () is called. */
+enum gomp_map_vars_kind
+{
+ GOMP_MAP_VARS_OPENACC,
+ GOMP_MAP_VARS_TARGET,
+ GOMP_MAP_VARS_DATA,
+ GOMP_MAP_VARS_ENTER_DATA
+};
+
extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *);
extern void gomp_acc_remove_pointer (void *, bool, int, int);
extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *,
size_t, void **, void **,
- size_t *, void *, bool, bool);
+ size_t *, void *, bool,
+ enum gomp_map_vars_kind);
extern void gomp_copy_from_async (struct target_mem_desc *);
extern void gomp_unmap_vars (struct target_mem_desc *, bool);
extern void gomp_init_device (struct gomp_device_descr *);
diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index 3b3e0c2ac73..2153661ed5a 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -134,6 +134,36 @@ OMP_4.0 {
omp_is_initial_device_;
} OMP_3.1;
+OMP_4.5 {
+ global:
+ omp_get_max_task_priority;
+ omp_get_max_task_priority_;
+ omp_get_num_places;
+ omp_get_num_places_;
+ omp_get_place_num_procs;
+ omp_get_place_num_procs_;
+ omp_get_place_num_procs_8_;
+ omp_get_place_proc_ids;
+ omp_get_place_proc_ids_;
+ omp_get_place_proc_ids_8_;
+ omp_get_place_num;
+ omp_get_place_num_;
+ omp_get_partition_num_places;
+ omp_get_partition_num_places_;
+ omp_get_partition_place_nums;
+ omp_get_partition_place_nums_;
+ omp_get_partition_place_nums_8_;
+ omp_get_initial_device;
+ omp_get_initial_device_;
+ omp_target_alloc;
+ omp_target_free;
+ omp_target_is_present;
+ omp_target_memcpy;
+ omp_target_memcpy_rect;
+ omp_target_associate_ptr;
+ omp_target_disassociate_ptr;
+} OMP_4.0;
+
GOMP_1.0 {
global:
GOMP_atomic_end;
@@ -234,10 +264,28 @@ GOMP_4.0.1 {
GOMP_offload_unregister;
} GOMP_4.0;
-GOMP_4.0.2 {
+GOMP_4.5 {
global:
+ GOMP_target_41;
+ GOMP_target_data_41;
+ GOMP_target_update_41;
+ GOMP_target_enter_exit_data;
+ GOMP_taskloop;
+ GOMP_taskloop_ull;
GOMP_offload_register_ver;
GOMP_offload_unregister_ver;
+ GOMP_loop_doacross_dynamic_start;
+ GOMP_loop_doacross_guided_start;
+ GOMP_loop_doacross_runtime_start;
+ GOMP_loop_doacross_static_start;
+ GOMP_doacross_post;
+ GOMP_doacross_wait;
+ GOMP_loop_ull_doacross_dynamic_start;
+ GOMP_loop_ull_doacross_guided_start;
+ GOMP_loop_ull_doacross_runtime_start;
+ GOMP_loop_ull_doacross_static_start;
+ GOMP_doacross_ull_post;
+ GOMP_doacross_ull_wait;
} GOMP_4.0.1;
OACC_2.0 {
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 06b1c67fc02..67e6d199066 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -155,6 +155,7 @@ linkage, and do not throw exceptions.
* omp_get_dynamic:: Dynamic teams setting
* omp_get_level:: Number of parallel regions
* omp_get_max_active_levels:: Maximum number of active regions
+* omp_get_max_task_priority:: Maximum task priority value that can be set
* omp_get_max_threads:: Maximum number of threads of parallel region
* omp_get_nested:: Nested parallel regions
* omp_get_num_devices:: Number of target devices
@@ -388,6 +389,27 @@ This function obtains the maximum allowed number of nested, active parallel regi
@end table
+@node omp_get_max_task_priority
+@section @code{omp_get_max_task_priority} -- Maximum priority value
+that can be set for tasks.
+@table @asis
+@item @emph{Description}:
+This function obtains the maximum allowed priority number for tasks.
+
+@item @emph{C/C++}
+@multitable @columnfractions .20 .80
+@item @emph{Prototype}: @tab @code{int omp_get_max_task_priority(void);}
+@end multitable
+
+@item @emph{Fortran}:
+@multitable @columnfractions .20 .80
+@item @emph{Interface}: @tab @code{integer function omp_get_max_task_priority()}
+@end multitable
+
+@item @emph{Reference}:
+@uref{http://www.openmp.org/, OpenMP specification v4.5}, Section 3.2.29.
+@end table
+
@node omp_get_max_threads
@section @code{omp_get_max_threads} -- Maximum number of threads of parallel region
@@ -581,18 +603,18 @@ set via @env{OMP_PROC_BIND}. Possible values are @code{omp_proc_bind_false},
Obtain the runtime scheduling method. The @var{kind} argument will be
set to the value @code{omp_sched_static}, @code{omp_sched_dynamic},
@code{omp_sched_guided} or @code{omp_sched_auto}. The second argument,
-@var{modifier}, is set to the chunk size.
+@var{chunk_size}, is set to the chunk size.
@item @emph{C/C++}
@multitable @columnfractions .20 .80
-@item @emph{Prototype}: @tab @code{void omp_get_schedule(omp_sched_t *kind, int *modifier);}
+@item @emph{Prototype}: @tab @code{void omp_get_schedule(omp_sched_t *kind, int *chunk_size);}
@end multitable
@item @emph{Fortran}:
@multitable @columnfractions .20 .80
-@item @emph{Interface}: @tab @code{subroutine omp_get_schedule(kind, modifier)}
+@item @emph{Interface}: @tab @code{subroutine omp_get_schedule(kind, chunk_size)}
@item @tab @code{integer(kind=omp_sched_kind) kind}
-@item @tab @code{integer modifier}
+@item @tab @code{integer chunk_size}
@end multitable
@item @emph{See also}:
@@ -929,19 +951,19 @@ Sets the runtime scheduling method. The @var{kind} argument can have the
value @code{omp_sched_static}, @code{omp_sched_dynamic},
@code{omp_sched_guided} or @code{omp_sched_auto}. Except for
@code{omp_sched_auto}, the chunk size is set to the value of
-@var{modifier} if positive, or to the default value if zero or negative.
-For @code{omp_sched_auto} the @var{modifier} argument is ignored.
+@var{chunk_size} if positive, or to the default value if zero or negative.
+For @code{omp_sched_auto} the @var{chunk_size} argument is ignored.
@item @emph{C/C++}
@multitable @columnfractions .20 .80
-@item @emph{Prototype}: @tab @code{void omp_set_schedule(omp_sched_t kind, int modifier);}
+@item @emph{Prototype}: @tab @code{void omp_set_schedule(omp_sched_t kind, int chunk_size);}
@end multitable
@item @emph{Fortran}:
@multitable @columnfractions .20 .80
-@item @emph{Interface}: @tab @code{subroutine omp_set_schedule(kind, modifier)}
+@item @emph{Interface}: @tab @code{subroutine omp_set_schedule(kind, chunk_size)}
@item @tab @code{integer(kind=omp_sched_kind) kind}
-@item @tab @code{integer modifier}
+@item @tab @code{integer chunk_size}
@end multitable
@item @emph{See also}:
@@ -1311,6 +1333,7 @@ beginning with @env{GOMP_} are GNU extensions.
* OMP_DEFAULT_DEVICE:: Set the device used in target regions
* OMP_DYNAMIC:: Dynamic adjustment of threads
* OMP_MAX_ACTIVE_LEVELS:: Set the maximum number of nested parallel regions
+* OMP_MAX_TASK_PRIORITY:: Set the maximum task priority value
* OMP_NESTED:: Nested parallel regions
* OMP_NUM_THREADS:: Specifies the number of threads to use
* OMP_PROC_BIND:: Whether theads may be moved between CPUs
@@ -1420,6 +1443,26 @@ If undefined, the number of active levels is unlimited.
+@node OMP_MAX_TASK_PRIORITY
+@section @env{OMP_MAX_TASK_PRIORITY} -- Set the maximum priority
+number that can be set for a task.
+@cindex Environment Variable
+@table @asis
+@item @emph{Description}:
+Specifies the initial value for the maximum priority value that can be
+set for a task. The value of this variable shall be a non-negative
+integer, and zero is allowed. If undefined, the default priority is
+0.
+
+@item @emph{See also}:
+@ref{omp_get_max_task_priority}
+
+@item @emph{Reference}:
+@uref{http://www.openmp.org/, OpenMP specification v4.5}, Section 4.14
+@end table
+
+
+
@node OMP_NESTED
@section @env{OMP_NESTED} -- Nested parallel regions
@cindex Environment Variable
diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h
index e7f4effaf48..c28ad2116dc 100644
--- a/libgomp/libgomp_g.h
+++ b/libgomp/libgomp_g.h
@@ -71,6 +71,15 @@ extern bool GOMP_loop_ordered_dynamic_next (long *, long *);
extern bool GOMP_loop_ordered_guided_next (long *, long *);
extern bool GOMP_loop_ordered_runtime_next (long *, long *);
+extern bool GOMP_loop_doacross_static_start (unsigned, long *, long, long *,
+ long *);
+extern bool GOMP_loop_doacross_dynamic_start (unsigned, long *, long, long *,
+ long *);
+extern bool GOMP_loop_doacross_guided_start (unsigned, long *, long, long *,
+ long *);
+extern bool GOMP_loop_doacross_runtime_start (unsigned, long *, long *,
+ long *);
+
extern void GOMP_parallel_loop_static_start (void (*)(void *), void *,
unsigned, long, long, long, long);
extern void GOMP_parallel_loop_dynamic_start (void (*)(void *), void *,
@@ -164,10 +173,34 @@ extern bool GOMP_loop_ull_ordered_guided_next (unsigned long long *,
extern bool GOMP_loop_ull_ordered_runtime_next (unsigned long long *,
unsigned long long *);
+extern bool GOMP_loop_ull_doacross_static_start (unsigned,
+ unsigned long long *,
+ unsigned long long,
+ unsigned long long *,
+ unsigned long long *);
+extern bool GOMP_loop_ull_doacross_dynamic_start (unsigned,
+ unsigned long long *,
+ unsigned long long,
+ unsigned long long *,
+ unsigned long long *);
+extern bool GOMP_loop_ull_doacross_guided_start (unsigned,
+ unsigned long long *,
+ unsigned long long,
+ unsigned long long *,
+ unsigned long long *);
+extern bool GOMP_loop_ull_doacross_runtime_start (unsigned,
+ unsigned long long *,
+ unsigned long long *,
+ unsigned long long *);
+
/* ordered.c */
extern void GOMP_ordered_start (void);
extern void GOMP_ordered_end (void);
+extern void GOMP_doacross_post (long *);
+extern void GOMP_doacross_wait (long, ...);
+extern void GOMP_doacross_ull_post (unsigned long long *);
+extern void GOMP_doacross_ull_wait (unsigned long long, ...);
/* parallel.c */
@@ -180,7 +213,15 @@ extern bool GOMP_cancellation_point (int);
/* task.c */
extern void GOMP_task (void (*) (void *), void *, void (*) (void *, void *),
- long, long, bool, unsigned, void **);
+ long, long, bool, unsigned, void **, int);
+extern void GOMP_taskloop (void (*) (void *), void *,
+ void (*) (void *, void *), long, long, unsigned,
+ unsigned long, int, long, long, long);
+extern void GOMP_taskloop_ull (void (*) (void *), void *,
+ void (*) (void *, void *), long, long,
+ unsigned, unsigned long, int,
+ unsigned long long, unsigned long long,
+ unsigned long long);
extern void GOMP_taskwait (void);
extern void GOMP_taskyield (void);
extern void GOMP_taskgroup_start (void);
@@ -208,11 +249,20 @@ extern void GOMP_single_copy_end (void *);
extern void GOMP_target (int, void (*) (void *), const void *,
size_t, void **, size_t *, unsigned char *);
+extern void GOMP_target_41 (int, void (*) (void *), size_t, void **, size_t *,
+ unsigned short *, unsigned int, void **);
extern void GOMP_target_data (int, const void *,
size_t, void **, size_t *, unsigned char *);
+extern void GOMP_target_data_41 (int, size_t, void **, size_t *,
+ unsigned short *);
extern void GOMP_target_end_data (void);
extern void GOMP_target_update (int, const void *,
size_t, void **, size_t *, unsigned char *);
+extern void GOMP_target_update_41 (int, size_t, void **, size_t *,
+ unsigned short *, unsigned int, void **);
+extern void GOMP_target_enter_exit_data (int, size_t, void **, size_t *,
+ unsigned short *, unsigned int,
+ void **);
extern void GOMP_teams (unsigned int, unsigned int);
/* oacc-parallel.c */
diff --git a/libgomp/loop.c b/libgomp/loop.c
index 27d78db7a56..812f66cd725 100644
--- a/libgomp/loop.c
+++ b/libgomp/loop.c
@@ -169,13 +169,16 @@ GOMP_loop_runtime_start (long start, long end, long incr,
switch (icv->run_sched_var)
{
case GFS_STATIC:
- return gomp_loop_static_start (start, end, incr, icv->run_sched_modifier,
+ return gomp_loop_static_start (start, end, incr,
+ icv->run_sched_chunk_size,
istart, iend);
case GFS_DYNAMIC:
- return gomp_loop_dynamic_start (start, end, incr, icv->run_sched_modifier,
+ return gomp_loop_dynamic_start (start, end, incr,
+ icv->run_sched_chunk_size,
istart, iend);
case GFS_GUIDED:
- return gomp_loop_guided_start (start, end, incr, icv->run_sched_modifier,
+ return gomp_loop_guided_start (start, end, incr,
+ icv->run_sched_chunk_size,
istart, iend);
case GFS_AUTO:
/* For now map to schedule(static), later on we could play with feedback
@@ -266,15 +269,15 @@ GOMP_loop_ordered_runtime_start (long start, long end, long incr,
{
case GFS_STATIC:
return gomp_loop_ordered_static_start (start, end, incr,
- icv->run_sched_modifier,
+ icv->run_sched_chunk_size,
istart, iend);
case GFS_DYNAMIC:
return gomp_loop_ordered_dynamic_start (start, end, incr,
- icv->run_sched_modifier,
+ icv->run_sched_chunk_size,
istart, iend);
case GFS_GUIDED:
return gomp_loop_ordered_guided_start (start, end, incr,
- icv->run_sched_modifier,
+ icv->run_sched_chunk_size,
istart, iend);
case GFS_AUTO:
/* For now map to schedule(static), later on we could play with feedback
@@ -286,6 +289,111 @@ GOMP_loop_ordered_runtime_start (long start, long end, long incr,
}
}
+/* The *_doacross_*_start routines are similar. The only difference is that
+ this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
+ section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
+ and other COUNTS array elements tell the library number of iterations
+ in the ordered inner loops. */
+
+static bool
+gomp_loop_doacross_static_start (unsigned ncounts, long *counts,
+ long chunk_size, long *istart, long *iend)
+{
+ struct gomp_thread *thr = gomp_thread ();
+
+ thr->ts.static_trip = 0;
+ if (gomp_work_share_start (false))
+ {
+ gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
+ GFS_STATIC, chunk_size);
+ gomp_doacross_init (ncounts, counts, chunk_size);
+ gomp_work_share_init_done ();
+ }
+
+ return !gomp_iter_static_next (istart, iend);
+}
+
+static bool
+gomp_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
+ long chunk_size, long *istart, long *iend)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ bool ret;
+
+ if (gomp_work_share_start (false))
+ {
+ gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
+ GFS_DYNAMIC, chunk_size);
+ gomp_doacross_init (ncounts, counts, chunk_size);
+ gomp_work_share_init_done ();
+ }
+
+#ifdef HAVE_SYNC_BUILTINS
+ ret = gomp_iter_dynamic_next (istart, iend);
+#else
+ gomp_mutex_lock (&thr->ts.work_share->lock);
+ ret = gomp_iter_dynamic_next_locked (istart, iend);
+ gomp_mutex_unlock (&thr->ts.work_share->lock);
+#endif
+
+ return ret;
+}
+
+static bool
+gomp_loop_doacross_guided_start (unsigned ncounts, long *counts,
+ long chunk_size, long *istart, long *iend)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ bool ret;
+
+ if (gomp_work_share_start (false))
+ {
+ gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
+ GFS_GUIDED, chunk_size);
+ gomp_doacross_init (ncounts, counts, chunk_size);
+ gomp_work_share_init_done ();
+ }
+
+#ifdef HAVE_SYNC_BUILTINS
+ ret = gomp_iter_guided_next (istart, iend);
+#else
+ gomp_mutex_lock (&thr->ts.work_share->lock);
+ ret = gomp_iter_guided_next_locked (istart, iend);
+ gomp_mutex_unlock (&thr->ts.work_share->lock);
+#endif
+
+ return ret;
+}
+
+bool
+GOMP_loop_doacross_runtime_start (unsigned ncounts, long *counts,
+ long *istart, long *iend)
+{
+ struct gomp_task_icv *icv = gomp_icv (false);
+ switch (icv->run_sched_var)
+ {
+ case GFS_STATIC:
+ return gomp_loop_doacross_static_start (ncounts, counts,
+ icv->run_sched_chunk_size,
+ istart, iend);
+ case GFS_DYNAMIC:
+ return gomp_loop_doacross_dynamic_start (ncounts, counts,
+ icv->run_sched_chunk_size,
+ istart, iend);
+ case GFS_GUIDED:
+ return gomp_loop_doacross_guided_start (ncounts, counts,
+ icv->run_sched_chunk_size,
+ istart, iend);
+ case GFS_AUTO:
+ /* For now map to schedule(static), later on we could play with feedback
+ driven choice. */
+ return gomp_loop_doacross_static_start (ncounts, counts,
+ 0, istart, iend);
+ default:
+ abort ();
+ }
+}
+
/* The *_next routines are called when the thread completes processing of
the iteration block currently assigned to it. If the work-share
construct is bound directly to a parallel construct, then the iteration
@@ -484,7 +592,7 @@ GOMP_parallel_loop_runtime_start (void (*fn) (void *), void *data,
{
struct gomp_task_icv *icv = gomp_icv (false);
gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
- icv->run_sched_var, icv->run_sched_modifier, 0);
+ icv->run_sched_var, icv->run_sched_chunk_size, 0);
}
ialias_redirect (GOMP_parallel_end)
@@ -529,7 +637,7 @@ GOMP_parallel_loop_runtime (void (*fn) (void *), void *data,
{
struct gomp_task_icv *icv = gomp_icv (false);
gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
- icv->run_sched_var, icv->run_sched_modifier,
+ icv->run_sched_var, icv->run_sched_chunk_size,
flags);
fn (data);
GOMP_parallel_end ();
@@ -578,6 +686,13 @@ extern __typeof(gomp_loop_ordered_dynamic_start) GOMP_loop_ordered_dynamic_start
extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start
__attribute__((alias ("gomp_loop_ordered_guided_start")));
+extern __typeof(gomp_loop_doacross_static_start) GOMP_loop_doacross_static_start
+ __attribute__((alias ("gomp_loop_doacross_static_start")));
+extern __typeof(gomp_loop_doacross_dynamic_start) GOMP_loop_doacross_dynamic_start
+ __attribute__((alias ("gomp_loop_doacross_dynamic_start")));
+extern __typeof(gomp_loop_doacross_guided_start) GOMP_loop_doacross_guided_start
+ __attribute__((alias ("gomp_loop_doacross_guided_start")));
+
extern __typeof(gomp_loop_static_next) GOMP_loop_static_next
__attribute__((alias ("gomp_loop_static_next")));
extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next
@@ -638,6 +753,30 @@ GOMP_loop_ordered_guided_start (long start, long end, long incr,
}
bool
+GOMP_loop_doacross_static_start (unsigned ncounts, long *counts,
+ long chunk_size, long *istart, long *iend)
+{
+ return gomp_loop_doacross_static_start (ncounts, counts, chunk_size,
+ istart, iend);
+}
+
+bool
+GOMP_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
+ long chunk_size, long *istart, long *iend)
+{
+ return gomp_loop_doacross_dynamic_start (ncounts, counts, chunk_size,
+ istart, iend);
+}
+
+bool
+GOMP_loop_doacross_guided_start (unsigned ncounts, long *counts,
+ long chunk_size, long *istart, long *iend)
+{
+ return gomp_loop_doacross_guided_start (ncounts, counts, chunk_size,
+ istart, iend);
+}
+
+bool
GOMP_loop_static_next (long *istart, long *iend)
{
return gomp_loop_static_next (istart, iend);
diff --git a/libgomp/loop_ull.c b/libgomp/loop_ull.c
index de56ae0b7ce..1f2ed546024 100644
--- a/libgomp/loop_ull.c
+++ b/libgomp/loop_ull.c
@@ -175,15 +175,15 @@ GOMP_loop_ull_runtime_start (bool up, gomp_ull start, gomp_ull end,
{
case GFS_STATIC:
return gomp_loop_ull_static_start (up, start, end, incr,
- icv->run_sched_modifier,
+ icv->run_sched_chunk_size,
istart, iend);
case GFS_DYNAMIC:
return gomp_loop_ull_dynamic_start (up, start, end, incr,
- icv->run_sched_modifier,
+ icv->run_sched_chunk_size,
istart, iend);
case GFS_GUIDED:
return gomp_loop_ull_guided_start (up, start, end, incr,
- icv->run_sched_modifier,
+ icv->run_sched_chunk_size,
istart, iend);
case GFS_AUTO:
/* For now map to schedule(static), later on we could play with feedback
@@ -279,15 +279,15 @@ GOMP_loop_ull_ordered_runtime_start (bool up, gomp_ull start, gomp_ull end,
{
case GFS_STATIC:
return gomp_loop_ull_ordered_static_start (up, start, end, incr,
- icv->run_sched_modifier,
+ icv->run_sched_chunk_size,
istart, iend);
case GFS_DYNAMIC:
return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr,
- icv->run_sched_modifier,
+ icv->run_sched_chunk_size,
istart, iend);
case GFS_GUIDED:
return gomp_loop_ull_ordered_guided_start (up, start, end, incr,
- icv->run_sched_modifier,
+ icv->run_sched_chunk_size,
istart, iend);
case GFS_AUTO:
/* For now map to schedule(static), later on we could play with feedback
@@ -299,6 +299,114 @@ GOMP_loop_ull_ordered_runtime_start (bool up, gomp_ull start, gomp_ull end,
}
}
+/* The *_doacross_*_start routines are similar. The only difference is that
+ this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
+ section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
+ and other COUNTS array elements tell the library number of iterations
+ in the ordered inner loops. */
+
+static bool
+gomp_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts,
+ gomp_ull chunk_size, gomp_ull *istart,
+ gomp_ull *iend)
+{
+ struct gomp_thread *thr = gomp_thread ();
+
+ thr->ts.static_trip = 0;
+ if (gomp_work_share_start (false))
+ {
+ gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
+ GFS_STATIC, chunk_size);
+ gomp_doacross_ull_init (ncounts, counts, chunk_size);
+ gomp_work_share_init_done ();
+ }
+
+ return !gomp_iter_ull_static_next (istart, iend);
+}
+
+static bool
+gomp_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts,
+ gomp_ull chunk_size, gomp_ull *istart,
+ gomp_ull *iend)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ bool ret;
+
+ if (gomp_work_share_start (false))
+ {
+ gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
+ GFS_DYNAMIC, chunk_size);
+ gomp_doacross_ull_init (ncounts, counts, chunk_size);
+ gomp_work_share_init_done ();
+ }
+
+#if defined HAVE_SYNC_BUILTINS && defined __LP64__
+ ret = gomp_iter_ull_dynamic_next (istart, iend);
+#else
+ gomp_mutex_lock (&thr->ts.work_share->lock);
+ ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
+ gomp_mutex_unlock (&thr->ts.work_share->lock);
+#endif
+
+ return ret;
+}
+
+static bool
+gomp_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts,
+ gomp_ull chunk_size, gomp_ull *istart,
+ gomp_ull *iend)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ bool ret;
+
+ if (gomp_work_share_start (false))
+ {
+ gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
+ GFS_GUIDED, chunk_size);
+ gomp_doacross_ull_init (ncounts, counts, chunk_size);
+ gomp_work_share_init_done ();
+ }
+
+#if defined HAVE_SYNC_BUILTINS && defined __LP64__
+ ret = gomp_iter_ull_guided_next (istart, iend);
+#else
+ gomp_mutex_lock (&thr->ts.work_share->lock);
+ ret = gomp_iter_ull_guided_next_locked (istart, iend);
+ gomp_mutex_unlock (&thr->ts.work_share->lock);
+#endif
+
+ return ret;
+}
+
+bool
+GOMP_loop_ull_doacross_runtime_start (unsigned ncounts, gomp_ull *counts,
+ gomp_ull *istart, gomp_ull *iend)
+{
+ struct gomp_task_icv *icv = gomp_icv (false);
+ switch (icv->run_sched_var)
+ {
+ case GFS_STATIC:
+ return gomp_loop_ull_doacross_static_start (ncounts, counts,
+ icv->run_sched_chunk_size,
+ istart, iend);
+ case GFS_DYNAMIC:
+ return gomp_loop_ull_doacross_dynamic_start (ncounts, counts,
+ icv->run_sched_chunk_size,
+ istart, iend);
+ case GFS_GUIDED:
+ return gomp_loop_ull_doacross_guided_start (ncounts, counts,
+ icv->run_sched_chunk_size,
+ istart, iend);
+ case GFS_AUTO:
+ /* For now map to schedule(static), later on we could play with feedback
+ driven choice. */
+ return gomp_loop_ull_doacross_static_start (ncounts, counts,
+ 0, istart, iend);
+ default:
+ abort ();
+ }
+}
+
/* The *_next routines are called when the thread completes processing of
the iteration block currently assigned to it. If the work-share
construct is bound directly to a parallel construct, then the iteration
@@ -466,6 +574,13 @@ extern __typeof(gomp_loop_ull_ordered_dynamic_start) GOMP_loop_ull_ordered_dynam
extern __typeof(gomp_loop_ull_ordered_guided_start) GOMP_loop_ull_ordered_guided_start
__attribute__((alias ("gomp_loop_ull_ordered_guided_start")));
+extern __typeof(gomp_loop_ull_doacross_static_start) GOMP_loop_ull_doacross_static_start
+ __attribute__((alias ("gomp_loop_ull_doacross_static_start")));
+extern __typeof(gomp_loop_ull_doacross_dynamic_start) GOMP_loop_ull_doacross_dynamic_start
+ __attribute__((alias ("gomp_loop_ull_doacross_dynamic_start")));
+extern __typeof(gomp_loop_ull_doacross_guided_start) GOMP_loop_ull_doacross_guided_start
+ __attribute__((alias ("gomp_loop_ull_doacross_guided_start")));
+
extern __typeof(gomp_loop_ull_static_next) GOMP_loop_ull_static_next
__attribute__((alias ("gomp_loop_ull_static_next")));
extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_dynamic_next
@@ -535,6 +650,33 @@ GOMP_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
}
bool
+GOMP_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts,
+ gomp_ull chunk_size, gomp_ull *istart,
+ gomp_ull *iend)
+{
+ return gomp_loop_ull_doacross_static_start (ncounts, counts, chunk_size,
+ istart, iend);
+}
+
+bool
+GOMP_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts,
+ gomp_ull chunk_size, gomp_ull *istart,
+ gomp_ull *iend)
+{
+ return gomp_loop_ull_doacross_dynamic_start (ncounts, counts, chunk_size,
+ istart, iend);
+}
+
+bool
+GOMP_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts,
+ gomp_ull chunk_size, gomp_ull *istart,
+ gomp_ull *iend)
+{
+ return gomp_loop_ull_doacross_guided_start (ncounts, counts, chunk_size,
+ istart, iend);
+}
+
+bool
GOMP_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
{
return gomp_loop_ull_static_next (istart, iend);
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index 90d43eb2b8a..af067d6e73c 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -289,7 +289,8 @@ acc_map_data (void *h, void *d, size_t s)
if (d != h)
gomp_fatal ("cannot map data on shared-memory system");
- tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false);
+ tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
+ GOMP_MAP_VARS_OPENACC);
}
else
{
@@ -318,7 +319,7 @@ acc_map_data (void *h, void *d, size_t s)
gomp_mutex_unlock (&acc_dev->lock);
tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
- &kinds, true, false);
+ &kinds, true, GOMP_MAP_VARS_OPENACC);
}
gomp_mutex_lock (&acc_dev->lock);
@@ -447,7 +448,7 @@ present_create_copy (unsigned f, void *h, size_t s)
gomp_mutex_unlock (&acc_dev->lock);
tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
- false);
+ GOMP_MAP_VARS_OPENACC);
gomp_mutex_lock (&acc_dev->lock);
@@ -594,7 +595,7 @@ gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs,
- NULL, sizes, kinds, true, false);
+ NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
gomp_mutex_lock (&acc_dev->lock);
@@ -651,7 +652,7 @@ gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
}
if (force_copyfrom)
- t->list[0]->copy_from = 1;
+ t->list[0].copy_from = 1;
gomp_mutex_unlock (&acc_dev->lock);
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index e31bc0a7bc6..b150106981e 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -168,12 +168,12 @@ GOACC_parallel_keyed (int device, void (*fn) (void *),
tgt_fn = (void (*)) fn;
tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
- false);
+ GOMP_MAP_VARS_OPENACC);
devaddrs = gomp_alloca (sizeof (void *) * mapnum);
for (i = 0; i < mapnum; i++)
- devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start
- + tgt->list[i]->tgt_offset);
+ devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
+ + tgt->list[i].key->tgt_offset);
acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes,
kinds, async, dims, tgt);
@@ -228,7 +228,8 @@ GOACC_data_start (int device, size_t mapnum,
if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|| host_fallback)
{
- tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false);
+ tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
+ GOMP_MAP_VARS_OPENACC);
tgt->prev = thr->mapped_data;
thr->mapped_data = tgt;
@@ -237,7 +238,7 @@ GOACC_data_start (int device, size_t mapnum,
gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
- false);
+ GOMP_MAP_VARS_OPENACC);
gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
tgt->prev = thr->mapped_data;
thr->mapped_data = tgt;
diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in
index dac3e8ad6ef..090498ad784 100644
--- a/libgomp/omp.h.in
+++ b/libgomp/omp.h.in
@@ -62,6 +62,15 @@ typedef enum omp_proc_bind_t
omp_proc_bind_spread = 4
} omp_proc_bind_t;
+typedef enum omp_lock_hint_t
+{
+ omp_lock_hint_none = 0,
+ omp_lock_hint_uncontended = 1,
+ omp_lock_hint_contended = 2,
+ omp_lock_hint_nonspeculative = 4,
+ omp_lock_hint_speculative = 8,
+} omp_lock_hint_t;
+
#ifdef __cplusplus
extern "C" {
# define __GOMP_NOTHROW throw ()
@@ -84,12 +93,16 @@ extern void omp_set_nested (int) __GOMP_NOTHROW;
extern int omp_get_nested (void) __GOMP_NOTHROW;
extern void omp_init_lock (omp_lock_t *) __GOMP_NOTHROW;
+extern void omp_init_lock_with_hint (omp_lock_t *, omp_lock_hint_t)
+ __GOMP_NOTHROW;
extern void omp_destroy_lock (omp_lock_t *) __GOMP_NOTHROW;
extern void omp_set_lock (omp_lock_t *) __GOMP_NOTHROW;
extern void omp_unset_lock (omp_lock_t *) __GOMP_NOTHROW;
extern int omp_test_lock (omp_lock_t *) __GOMP_NOTHROW;
extern void omp_init_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW;
+extern void omp_init_nest_lock_with_hint (omp_lock_t *, omp_lock_hint_t)
+ __GOMP_NOTHROW;
extern void omp_destroy_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW;
extern void omp_set_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW;
extern void omp_unset_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW;
@@ -112,6 +125,12 @@ extern int omp_in_final (void) __GOMP_NOTHROW;
extern int omp_get_cancellation (void) __GOMP_NOTHROW;
extern omp_proc_bind_t omp_get_proc_bind (void) __GOMP_NOTHROW;
+extern int omp_get_num_places (void) __GOMP_NOTHROW;
+extern int omp_get_place_num_procs (int) __GOMP_NOTHROW;
+extern void omp_get_place_proc_ids (int, int *) __GOMP_NOTHROW;
+extern int omp_get_place_num (void) __GOMP_NOTHROW;
+extern int omp_get_partition_num_places (void) __GOMP_NOTHROW;
+extern void omp_get_partition_place_nums (int *) __GOMP_NOTHROW;
extern void omp_set_default_device (int) __GOMP_NOTHROW;
extern int omp_get_default_device (void) __GOMP_NOTHROW;
@@ -120,6 +139,24 @@ extern int omp_get_num_teams (void) __GOMP_NOTHROW;
extern int omp_get_team_num (void) __GOMP_NOTHROW;
extern int omp_is_initial_device (void) __GOMP_NOTHROW;
+extern int omp_get_initial_device (void) __GOMP_NOTHROW;
+extern int omp_get_max_task_priority (void) __GOMP_NOTHROW;
+
+extern void *omp_target_alloc (__SIZE_TYPE__, int) __GOMP_NOTHROW;
+extern void omp_target_free (void *, int) __GOMP_NOTHROW;
+extern int omp_target_is_present (void *, int) __GOMP_NOTHROW;
+extern int omp_target_memcpy (void *, void *, __SIZE_TYPE__, __SIZE_TYPE__,
+ __SIZE_TYPE__, int, int) __GOMP_NOTHROW;
+extern int omp_target_memcpy_rect (void *, void *, __SIZE_TYPE__, int,
+ const __SIZE_TYPE__ *,
+ const __SIZE_TYPE__ *,
+ const __SIZE_TYPE__ *,
+ const __SIZE_TYPE__ *,
+ const __SIZE_TYPE__ *, int, int)
+ __GOMP_NOTHROW;
+extern int omp_target_associate_ptr (void *, void *, __SIZE_TYPE__,
+ __SIZE_TYPE__, int) __GOMP_NOTHROW;
+extern int omp_target_disassociate_ptr (void *, int) __GOMP_NOTHROW;
#ifdef __cplusplus
}
diff --git a/libgomp/omp_lib.f90.in b/libgomp/omp_lib.f90.in
index 122563e625a..28df9c1664e 100644
--- a/libgomp/omp_lib.f90.in
+++ b/libgomp/omp_lib.f90.in
@@ -29,15 +29,31 @@
integer, parameter :: omp_nest_lock_kind = @OMP_NEST_LOCK_KIND@
integer, parameter :: omp_sched_kind = 4
integer, parameter :: omp_proc_bind_kind = 4
+ integer, parameter :: omp_lock_hint_kind = 4
integer (omp_sched_kind), parameter :: omp_sched_static = 1
integer (omp_sched_kind), parameter :: omp_sched_dynamic = 2
integer (omp_sched_kind), parameter :: omp_sched_guided = 3
integer (omp_sched_kind), parameter :: omp_sched_auto = 4
- integer (omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0
- integer (omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1
- integer (omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2
- integer (omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3
- integer (omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4
+ integer (omp_proc_bind_kind), &
+ parameter :: omp_proc_bind_false = 0
+ integer (omp_proc_bind_kind), &
+ parameter :: omp_proc_bind_true = 1
+ integer (omp_proc_bind_kind), &
+ parameter :: omp_proc_bind_master = 2
+ integer (omp_proc_bind_kind), &
+ parameter :: omp_proc_bind_close = 3
+ integer (omp_proc_bind_kind), &
+ parameter :: omp_proc_bind_spread = 4
+ integer (omp_lock_hint_kind), &
+ parameter :: omp_lock_hint_none = 0
+ integer (omp_lock_hint_kind), &
+ parameter :: omp_lock_hint_uncontended = 1
+ integer (omp_lock_hint_kind), &
+ parameter :: omp_lock_hint_contended = 2
+ integer (omp_lock_hint_kind), &
+ parameter :: omp_lock_hint_nonspeculative = 4
+ integer (omp_lock_hint_kind), &
+ parameter :: omp_lock_hint_speculative = 8
end module
module omp_lib
@@ -53,6 +69,14 @@
end interface
interface
+ subroutine omp_init_lock_with_hint (svar, hint)
+ use omp_lib_kinds
+ integer (omp_lock_kind), intent (out) :: svar
+ integer (omp_lock_hint_kind), intent (in) :: hint
+ end subroutine omp_init_lock_with_hint
+ end interface
+
+ interface
subroutine omp_init_nest_lock (nvar)
use omp_lib_kinds
integer (omp_nest_lock_kind), intent (out) :: nvar
@@ -60,6 +84,14 @@
end interface
interface
+ subroutine omp_init_nest_lock_with_hint (nvar, hint)
+ use omp_lib_kinds
+ integer (omp_nest_lock_kind), intent (out) :: nvar
+ integer (omp_lock_hint_kind), intent (in) :: hint
+ end subroutine omp_init_nest_lock_with_hint
+ end interface
+
+ interface
subroutine omp_destroy_lock (svar)
use omp_lib_kinds
integer (omp_lock_kind), intent (inout) :: svar
@@ -199,28 +231,28 @@
end interface
interface omp_set_schedule
- subroutine omp_set_schedule (kind, modifier)
+ subroutine omp_set_schedule (kind, chunk_size)
use omp_lib_kinds
integer (omp_sched_kind), intent (in) :: kind
- integer (4), intent (in) :: modifier
+ integer (4), intent (in) :: chunk_size
end subroutine omp_set_schedule
- subroutine omp_set_schedule_8 (kind, modifier)
+ subroutine omp_set_schedule_8 (kind, chunk_size)
use omp_lib_kinds
integer (omp_sched_kind), intent (in) :: kind
- integer (8), intent (in) :: modifier
+ integer (8), intent (in) :: chunk_size
end subroutine omp_set_schedule_8
end interface
interface omp_get_schedule
- subroutine omp_get_schedule (kind, modifier)
+ subroutine omp_get_schedule (kind, chunk_size)
use omp_lib_kinds
integer (omp_sched_kind), intent (out) :: kind
- integer (4), intent (out) :: modifier
+ integer (4), intent (out) :: chunk_size
end subroutine omp_get_schedule
- subroutine omp_get_schedule_8 (kind, modifier)
+ subroutine omp_get_schedule_8 (kind, chunk_size)
use omp_lib_kinds
integer (omp_sched_kind), intent (out) :: kind
- integer (8), intent (out) :: modifier
+ integer (8), intent (out) :: chunk_size
end subroutine omp_get_schedule_8
end interface
@@ -298,6 +330,58 @@
end function omp_get_proc_bind
end interface
+ interface
+ function omp_get_num_places ()
+ integer (4) :: omp_get_num_places
+ end function omp_get_num_places
+ end interface
+
+ interface omp_get_place_num_procs
+ function omp_get_place_num_procs (place_num)
+ integer (4), intent(in) :: place_num
+ integer (4) :: omp_get_place_num_procs
+ end function omp_get_place_num_procs
+
+ function omp_get_place_num_procs_8 (place_num)
+ integer (8), intent(in) :: place_num
+ integer (4) :: omp_get_place_num_procs_8
+ end function omp_get_place_num_procs_8
+ end interface
+
+ interface omp_get_place_proc_ids
+ subroutine omp_get_place_proc_ids (place_num, ids)
+ integer (4), intent(in) :: place_num
+ integer (4), intent(out) :: ids(*)
+ end subroutine omp_get_place_proc_ids
+
+ subroutine omp_get_place_proc_ids_8 (place_num, ids)
+ integer (8), intent(in) :: place_num
+ integer (8), intent(out) :: ids(*)
+ end subroutine omp_get_place_proc_ids_8
+ end interface
+
+ interface
+ function omp_get_place_num ()
+ integer (4) :: omp_get_place_num
+ end function omp_get_place_num
+ end interface
+
+ interface
+ function omp_get_partition_num_places ()
+ integer (4) :: omp_get_partition_num_places
+ end function omp_get_partition_num_places
+ end interface
+
+ interface omp_get_partition_place_nums
+ subroutine omp_get_partition_place_nums (place_nums)
+ integer (4), intent(out) :: place_nums(*)
+ end subroutine omp_get_partition_place_nums
+
+ subroutine omp_get_partition_place_nums_8 (place_nums)
+ integer (8), intent(out) :: place_nums(*)
+ end subroutine omp_get_partition_place_nums_8
+ end interface
+
interface omp_set_default_device
subroutine omp_set_default_device (device_num)
integer (4), intent (in) :: device_num
@@ -337,4 +421,16 @@
end function omp_is_initial_device
end interface
+ interface
+ function omp_get_initial_device ()
+ integer (4) :: omp_get_initial_device
+ end function omp_get_initial_device
+ end interface
+
+ interface
+ function omp_get_max_task_priority ()
+ integer (4) :: omp_get_max_task_priority
+ end function omp_get_max_task_priority
+ end interface
+
end module omp_lib
diff --git a/libgomp/omp_lib.h.in b/libgomp/omp_lib.h.in
index d590bc15135..81662424500 100644
--- a/libgomp/omp_lib.h.in
+++ b/libgomp/omp_lib.h.in
@@ -46,9 +46,23 @@
parameter (omp_proc_bind_master = 2)
parameter (omp_proc_bind_close = 3)
parameter (omp_proc_bind_spread = 4)
+ integer omp_lock_hint_kind
+ parameter (omp_lock_hint_kind = 4)
+ integer (omp_lock_hint_kind) omp_lock_hint_none
+ integer (omp_lock_hint_kind) omp_lock_hint_uncontended
+ integer (omp_lock_hint_kind) omp_lock_hint_contended
+ integer (omp_lock_hint_kind) omp_lock_hint_nonspeculative
+ integer (omp_lock_hint_kind) omp_lock_hint_speculative
+ parameter (omp_lock_hint_none = 0)
+ parameter (omp_lock_hint_uncontended = 1)
+ parameter (omp_lock_hint_contended = 2)
+ parameter (omp_lock_hint_nonspeculative = 4)
+ parameter (omp_lock_hint_speculative = 8)
parameter (openmp_version = 201307)
external omp_init_lock, omp_init_nest_lock
+ external omp_init_lock_with_hint
+ external omp_init_nest_lock_with_hint
external omp_destroy_lock, omp_destroy_nest_lock
external omp_set_lock, omp_set_nest_lock
external omp_unset_lock, omp_unset_nest_lock
@@ -88,6 +102,17 @@
external omp_get_proc_bind
integer(omp_proc_bind_kind) omp_get_proc_bind
+ integer(4) omp_get_num_places
+ external omp_get_num_places
+ integer(4) omp_get_place_num_procs
+ external omp_get_place_num_procs
+ external omp_get_place_proc_ids
+ integer(4) omp_get_place_num
+ external omp_get_place_num
+ integer(4) omp_get_partition_num_places
+ external omp_get_partition_num_places
+ external omp_get_partition_place_nums
+
external omp_set_default_device, omp_get_default_device
external omp_get_num_devices, omp_get_num_teams
external omp_get_team_num
@@ -96,3 +121,8 @@
external omp_is_initial_device
logical(4) omp_is_initial_device
+ external omp_get_initial_device
+ integer(4) omp_get_initial_device
+
+ external omp_get_max_task_priority
+ integer(4) omp_get_max_task_priority
diff --git a/libgomp/ordered.c b/libgomp/ordered.c
index 69ca217b4d5..fdac3ee8f58 100644
--- a/libgomp/ordered.c
+++ b/libgomp/ordered.c
@@ -26,6 +26,9 @@
/* This file handles the ORDERED construct. */
#include "libgomp.h"
+#include <stdarg.h>
+#include <string.h>
+#include "doacross.h"
/* This function is called when first allocating an iteration block. That
@@ -250,3 +253,521 @@ void
GOMP_ordered_end (void)
{
}
+
+/* DOACROSS initialization. */
+
+#define MAX_COLLAPSED_BITS (__SIZEOF_LONG__ * __CHAR_BIT__)
+
+void
+gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+ struct gomp_work_share *ws = thr->ts.work_share;
+ unsigned int i, bits[MAX_COLLAPSED_BITS], num_bits = 0;
+ unsigned long ent, num_ents, elt_sz, shift_sz;
+ struct gomp_doacross_work_share *doacross;
+
+ if (team == NULL || team->nthreads == 1)
+ return;
+
+ for (i = 0; i < ncounts; i++)
+ {
+ /* If any count is 0, GOMP_doacross_{post,wait} can't be called. */
+ if (counts[i] == 0)
+ return;
+
+ if (num_bits <= MAX_COLLAPSED_BITS)
+ {
+ unsigned int this_bits;
+ if (counts[i] == 1)
+ this_bits = 1;
+ else
+ this_bits = __SIZEOF_LONG__ * __CHAR_BIT__
+ - __builtin_clzl (counts[i] - 1);
+ if (num_bits + this_bits <= MAX_COLLAPSED_BITS)
+ {
+ bits[i] = this_bits;
+ num_bits += this_bits;
+ }
+ else
+ num_bits = MAX_COLLAPSED_BITS + 1;
+ }
+ }
+
+ if (ws->sched == GFS_STATIC)
+ num_ents = team->nthreads;
+ else
+ num_ents = (counts[0] - 1) / chunk_size + 1;
+ if (num_bits <= MAX_COLLAPSED_BITS)
+ {
+ elt_sz = sizeof (unsigned long);
+ shift_sz = ncounts * sizeof (unsigned int);
+ }
+ else
+ {
+ elt_sz = sizeof (unsigned long) * ncounts;
+ shift_sz = 0;
+ }
+ elt_sz = (elt_sz + 63) & ~63UL;
+
+ doacross = gomp_malloc (sizeof (*doacross) + 63 + num_ents * elt_sz
+ + shift_sz);
+ doacross->chunk_size = chunk_size;
+ doacross->elt_sz = elt_sz;
+ doacross->ncounts = ncounts;
+ doacross->flattened = false;
+ doacross->array = (unsigned char *)
+ ((((uintptr_t) (doacross + 1)) + 63 + shift_sz)
+ & ~(uintptr_t) 63);
+ if (num_bits <= MAX_COLLAPSED_BITS)
+ {
+ unsigned int shift_count = 0;
+ doacross->flattened = true;
+ for (i = ncounts; i > 0; i--)
+ {
+ doacross->shift_counts[i - 1] = shift_count;
+ shift_count += bits[i - 1];
+ }
+ for (ent = 0; ent < num_ents; ent++)
+ *(unsigned long *) (doacross->array + ent * elt_sz) = 0;
+ }
+ else
+ for (ent = 0; ent < num_ents; ent++)
+ memset (doacross->array + ent * elt_sz, '\0',
+ sizeof (unsigned long) * ncounts);
+ if (ws->sched == GFS_STATIC && chunk_size == 0)
+ {
+ unsigned long q = counts[0] / num_ents;
+ unsigned long t = counts[0] % num_ents;
+ doacross->boundary = t * (q + 1);
+ doacross->q = q;
+ doacross->t = t;
+ }
+ ws->doacross = doacross;
+}
+
+/* DOACROSS POST operation. */
+
+void
+GOMP_doacross_post (long *counts)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_work_share *ws = thr->ts.work_share;
+ struct gomp_doacross_work_share *doacross = ws->doacross;
+ unsigned long ent;
+ unsigned int i;
+
+ if (__builtin_expect (doacross == NULL, 0))
+ {
+ __sync_synchronize ();
+ return;
+ }
+
+ if (__builtin_expect (ws->sched == GFS_STATIC, 1))
+ ent = thr->ts.team_id;
+ else
+ ent = counts[0] / doacross->chunk_size;
+ unsigned long *array = (unsigned long *) (doacross->array
+ + ent * doacross->elt_sz);
+
+ if (__builtin_expect (doacross->flattened, 1))
+ {
+ unsigned long flattened
+ = (unsigned long) counts[0] << doacross->shift_counts[0];
+
+ for (i = 1; i < doacross->ncounts; i++)
+ flattened |= (unsigned long) counts[i]
+ << doacross->shift_counts[i];
+ flattened++;
+ if (flattened == __atomic_load_n (array, MEMMODEL_ACQUIRE))
+ __atomic_thread_fence (MEMMODEL_RELEASE);
+ else
+ __atomic_store_n (array, flattened, MEMMODEL_RELEASE);
+ return;
+ }
+
+ __atomic_thread_fence (MEMMODEL_ACQUIRE);
+ for (i = doacross->ncounts; i-- > 0; )
+ {
+ if (counts[i] + 1UL != __atomic_load_n (&array[i], MEMMODEL_RELAXED))
+ __atomic_store_n (&array[i], counts[i] + 1UL, MEMMODEL_RELEASE);
+ }
+}
+
+/* DOACROSS WAIT operation. */
+
+void
+GOMP_doacross_wait (long first, ...)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_work_share *ws = thr->ts.work_share;
+ struct gomp_doacross_work_share *doacross = ws->doacross;
+ va_list ap;
+ unsigned long ent;
+ unsigned int i;
+
+ if (__builtin_expect (doacross == NULL, 0))
+ {
+ __sync_synchronize ();
+ return;
+ }
+
+ if (__builtin_expect (ws->sched == GFS_STATIC, 1))
+ {
+ if (ws->chunk_size == 0)
+ {
+ if (first < doacross->boundary)
+ ent = first / (doacross->q + 1);
+ else
+ ent = (first - doacross->boundary) / doacross->q
+ + doacross->t;
+ }
+ else
+ ent = first / ws->chunk_size % thr->ts.team->nthreads;
+ }
+ else
+ ent = first / doacross->chunk_size;
+ unsigned long *array = (unsigned long *) (doacross->array
+ + ent * doacross->elt_sz);
+
+ if (__builtin_expect (doacross->flattened, 1))
+ {
+ unsigned long flattened
+ = (unsigned long) first << doacross->shift_counts[0];
+ unsigned long cur;
+
+ va_start (ap, first);
+ for (i = 1; i < doacross->ncounts; i++)
+ flattened |= (unsigned long) va_arg (ap, long)
+ << doacross->shift_counts[i];
+ cur = __atomic_load_n (array, MEMMODEL_ACQUIRE);
+ if (flattened < cur)
+ {
+ __atomic_thread_fence (MEMMODEL_RELEASE);
+ va_end (ap);
+ return;
+ }
+ doacross_spin (array, flattened, cur);
+ __atomic_thread_fence (MEMMODEL_RELEASE);
+ va_end (ap);
+ return;
+ }
+
+ do
+ {
+ va_start (ap, first);
+ for (i = 0; i < doacross->ncounts; i++)
+ {
+ unsigned long thisv
+ = (unsigned long) (i ? va_arg (ap, long) : first) + 1;
+ unsigned long cur = __atomic_load_n (&array[i], MEMMODEL_RELAXED);
+ if (thisv < cur)
+ {
+ i = doacross->ncounts;
+ break;
+ }
+ if (thisv > cur)
+ break;
+ }
+ va_end (ap);
+ if (i == doacross->ncounts)
+ break;
+ cpu_relax ();
+ }
+ while (1);
+ __sync_synchronize ();
+}
+
+typedef unsigned long long gomp_ull;
+
+void
+gomp_doacross_ull_init (unsigned ncounts, gomp_ull *counts, gomp_ull chunk_size)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+ struct gomp_work_share *ws = thr->ts.work_share;
+ unsigned int i, bits[MAX_COLLAPSED_BITS], num_bits = 0;
+ unsigned long ent, num_ents, elt_sz, shift_sz;
+ struct gomp_doacross_work_share *doacross;
+
+ if (team == NULL || team->nthreads == 1)
+ return;
+
+ for (i = 0; i < ncounts; i++)
+ {
+ /* If any count is 0, GOMP_doacross_{post,wait} can't be called. */
+ if (counts[i] == 0)
+ return;
+
+ if (num_bits <= MAX_COLLAPSED_BITS)
+ {
+ unsigned int this_bits;
+ if (counts[i] == 1)
+ this_bits = 1;
+ else
+ this_bits = __SIZEOF_LONG_LONG__ * __CHAR_BIT__
+ - __builtin_clzll (counts[i] - 1);
+ if (num_bits + this_bits <= MAX_COLLAPSED_BITS)
+ {
+ bits[i] = this_bits;
+ num_bits += this_bits;
+ }
+ else
+ num_bits = MAX_COLLAPSED_BITS + 1;
+ }
+ }
+
+ if (ws->sched == GFS_STATIC)
+ num_ents = team->nthreads;
+ else
+ num_ents = (counts[0] - 1) / chunk_size + 1;
+ if (num_bits <= MAX_COLLAPSED_BITS)
+ {
+ elt_sz = sizeof (unsigned long);
+ shift_sz = ncounts * sizeof (unsigned int);
+ }
+ else
+ {
+ if (sizeof (gomp_ull) == sizeof (unsigned long))
+ elt_sz = sizeof (gomp_ull) * ncounts;
+ else if (sizeof (gomp_ull) == 2 * sizeof (unsigned long))
+ elt_sz = sizeof (unsigned long) * 2 * ncounts;
+ else
+ abort ();
+ shift_sz = 0;
+ }
+ elt_sz = (elt_sz + 63) & ~63UL;
+
+ doacross = gomp_malloc (sizeof (*doacross) + 63 + num_ents * elt_sz
+ + shift_sz);
+ doacross->chunk_size_ull = chunk_size;
+ doacross->elt_sz = elt_sz;
+ doacross->ncounts = ncounts;
+ doacross->flattened = false;
+ doacross->boundary = 0;
+ doacross->array = (unsigned char *)
+ ((((uintptr_t) (doacross + 1)) + 63 + shift_sz)
+ & ~(uintptr_t) 63);
+ if (num_bits <= MAX_COLLAPSED_BITS)
+ {
+ unsigned int shift_count = 0;
+ doacross->flattened = true;
+ for (i = ncounts; i > 0; i--)
+ {
+ doacross->shift_counts[i - 1] = shift_count;
+ shift_count += bits[i - 1];
+ }
+ for (ent = 0; ent < num_ents; ent++)
+ *(unsigned long *) (doacross->array + ent * elt_sz) = 0;
+ }
+ else
+ for (ent = 0; ent < num_ents; ent++)
+ memset (doacross->array + ent * elt_sz, '\0',
+ sizeof (unsigned long) * ncounts);
+ if (ws->sched == GFS_STATIC && chunk_size == 0)
+ {
+ gomp_ull q = counts[0] / num_ents;
+ gomp_ull t = counts[0] % num_ents;
+ doacross->boundary_ull = t * (q + 1);
+ doacross->q_ull = q;
+ doacross->t = t;
+ }
+ ws->doacross = doacross;
+}
+
+/* DOACROSS POST operation. */
+
+void
+GOMP_doacross_ull_post (gomp_ull *counts)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_work_share *ws = thr->ts.work_share;
+ struct gomp_doacross_work_share *doacross = ws->doacross;
+ unsigned long ent;
+ unsigned int i;
+
+ if (__builtin_expect (doacross == NULL, 0))
+ {
+ __sync_synchronize ();
+ return;
+ }
+
+ if (__builtin_expect (ws->sched == GFS_STATIC, 1))
+ ent = thr->ts.team_id;
+ else
+ ent = counts[0] / doacross->chunk_size_ull;
+
+ if (__builtin_expect (doacross->flattened, 1))
+ {
+ unsigned long *array = (unsigned long *) (doacross->array
+ + ent * doacross->elt_sz);
+ gomp_ull flattened
+ = counts[0] << doacross->shift_counts[0];
+
+ for (i = 1; i < doacross->ncounts; i++)
+ flattened |= counts[i] << doacross->shift_counts[i];
+ flattened++;
+ if (flattened == __atomic_load_n (array, MEMMODEL_ACQUIRE))
+ __atomic_thread_fence (MEMMODEL_RELEASE);
+ else
+ __atomic_store_n (array, flattened, MEMMODEL_RELEASE);
+ return;
+ }
+
+ __atomic_thread_fence (MEMMODEL_ACQUIRE);
+ if (sizeof (gomp_ull) == sizeof (unsigned long))
+ {
+ gomp_ull *array = (gomp_ull *) (doacross->array
+ + ent * doacross->elt_sz);
+
+ for (i = doacross->ncounts; i-- > 0; )
+ {
+ if (counts[i] + 1UL != __atomic_load_n (&array[i], MEMMODEL_RELAXED))
+ __atomic_store_n (&array[i], counts[i] + 1UL, MEMMODEL_RELEASE);
+ }
+ }
+ else
+ {
+ unsigned long *array = (unsigned long *) (doacross->array
+ + ent * doacross->elt_sz);
+
+ for (i = doacross->ncounts; i-- > 0; )
+ {
+ gomp_ull cull = counts[i] + 1UL;
+ unsigned long c = (unsigned long) cull;
+ if (c != __atomic_load_n (&array[2 * i + 1], MEMMODEL_RELAXED))
+ __atomic_store_n (&array[2 * i + 1], c, MEMMODEL_RELEASE);
+ c = cull >> (__SIZEOF_LONG_LONG__ * __CHAR_BIT__ / 2);
+ if (c != __atomic_load_n (&array[2 * i], MEMMODEL_RELAXED))
+ __atomic_store_n (&array[2 * i], c, MEMMODEL_RELEASE);
+ }
+ }
+}
+
+/* DOACROSS WAIT operation. */
+
+void
+GOMP_doacross_ull_wait (gomp_ull first, ...)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_work_share *ws = thr->ts.work_share;
+ struct gomp_doacross_work_share *doacross = ws->doacross;
+ va_list ap;
+ unsigned long ent;
+ unsigned int i;
+
+ if (__builtin_expect (doacross == NULL, 0))
+ {
+ __sync_synchronize ();
+ return;
+ }
+
+ if (__builtin_expect (ws->sched == GFS_STATIC, 1))
+ {
+ if (ws->chunk_size_ull == 0)
+ {
+ if (first < doacross->boundary_ull)
+ ent = first / (doacross->q_ull + 1);
+ else
+ ent = (first - doacross->boundary_ull) / doacross->q_ull
+ + doacross->t;
+ }
+ else
+ ent = first / ws->chunk_size_ull % thr->ts.team->nthreads;
+ }
+ else
+ ent = first / doacross->chunk_size_ull;
+
+ if (__builtin_expect (doacross->flattened, 1))
+ {
+ unsigned long *array = (unsigned long *) (doacross->array
+ + ent * doacross->elt_sz);
+ gomp_ull flattened = first << doacross->shift_counts[0];
+ unsigned long cur;
+
+ va_start (ap, first);
+ for (i = 1; i < doacross->ncounts; i++)
+ flattened |= va_arg (ap, gomp_ull)
+ << doacross->shift_counts[i];
+ cur = __atomic_load_n (array, MEMMODEL_ACQUIRE);
+ if (flattened < cur)
+ {
+ __atomic_thread_fence (MEMMODEL_RELEASE);
+ va_end (ap);
+ return;
+ }
+ doacross_spin (array, flattened, cur);
+ __atomic_thread_fence (MEMMODEL_RELEASE);
+ va_end (ap);
+ return;
+ }
+
+ if (sizeof (gomp_ull) == sizeof (unsigned long))
+ {
+ gomp_ull *array = (gomp_ull *) (doacross->array
+ + ent * doacross->elt_sz);
+ do
+ {
+ va_start (ap, first);
+ for (i = 0; i < doacross->ncounts; i++)
+ {
+ gomp_ull thisv
+ = (i ? va_arg (ap, gomp_ull) : first) + 1;
+ gomp_ull cur = __atomic_load_n (&array[i], MEMMODEL_RELAXED);
+ if (thisv < cur)
+ {
+ i = doacross->ncounts;
+ break;
+ }
+ if (thisv > cur)
+ break;
+ }
+ va_end (ap);
+ if (i == doacross->ncounts)
+ break;
+ cpu_relax ();
+ }
+ while (1);
+ }
+ else
+ {
+ unsigned long *array = (unsigned long *) (doacross->array
+ + ent * doacross->elt_sz);
+ do
+ {
+ va_start (ap, first);
+ for (i = 0; i < doacross->ncounts; i++)
+ {
+ gomp_ull thisv
+ = (i ? va_arg (ap, gomp_ull) : first) + 1;
+ unsigned long t
+ = thisv >> (__SIZEOF_LONG_LONG__ * __CHAR_BIT__ / 2);
+ unsigned long cur
+ = __atomic_load_n (&array[2 * i], MEMMODEL_RELAXED);
+ if (t < cur)
+ {
+ i = doacross->ncounts;
+ break;
+ }
+ if (t > cur)
+ break;
+ t = thisv;
+ cur = __atomic_load_n (&array[2 * i + 1], MEMMODEL_RELAXED);
+ if (t < cur)
+ {
+ i = doacross->ncounts;
+ break;
+ }
+ if (t > cur)
+ break;
+ }
+ va_end (ap);
+ if (i == doacross->ncounts)
+ break;
+ cpu_relax ();
+ }
+ while (1);
+ }
+ __sync_synchronize ();
+}
diff --git a/libgomp/target.c b/libgomp/target.c
index 758ece5d78c..de6a2c9c9c5 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -38,6 +38,7 @@
#endif
#include <string.h>
#include <assert.h>
+#include <errno.h>
#ifdef PLUGIN_SUPPORT
#include <dlfcn.h>
@@ -133,17 +134,48 @@ resolve_device (int device_id)
if (device_id < 0 || device_id >= gomp_get_num_devices ())
return NULL;
+ gomp_mutex_lock (&devices[device_id].lock);
+ if (!devices[device_id].is_initialized)
+ gomp_init_device (&devices[device_id]);
+ gomp_mutex_unlock (&devices[device_id].lock);
+
return &devices[device_id];
}
-/* Handle the case where splay_tree_lookup found oldn for newn.
+static inline splay_tree_key
+gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
+{
+ if (key->host_start != key->host_end)
+ return splay_tree_lookup (mem_map, key);
+
+ key->host_end++;
+ splay_tree_key n = splay_tree_lookup (mem_map, key);
+ key->host_end--;
+ if (n)
+ return n;
+ key->host_start--;
+ n = splay_tree_lookup (mem_map, key);
+ key->host_start++;
+ if (n)
+ return n;
+ return splay_tree_lookup (mem_map, key);
+}
+
+/* Handle the case where gomp_map_lookup found oldn for newn.
Helper function of gomp_map_vars. */
static inline void
gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
- splay_tree_key newn, unsigned char kind)
+ splay_tree_key newn, struct target_var_desc *tgt_var,
+ unsigned char kind)
{
+ tgt_var->key = oldn;
+ tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
+ tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
+ tgt_var->offset = newn->host_start - oldn->host_start;
+ tgt_var->length = newn->host_end - newn->host_start;
+
if ((kind & GOMP_MAP_FLAG_FORCE)
|| oldn->host_start > newn->host_start
|| oldn->host_end < newn->host_end)
@@ -154,14 +186,22 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
(void *) newn->host_start, (void *) newn->host_end,
(void *) oldn->host_start, (void *) oldn->host_end);
}
- oldn->refcount++;
+
+ if (GOMP_MAP_ALWAYS_TO_P (kind))
+ devicep->host2dev_func (devicep->target_id,
+ (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
+ + newn->host_start - oldn->host_start),
+ (void *) newn->host_start,
+ newn->host_end - newn->host_start);
+ if (oldn->refcount != REFCOUNT_INFINITY)
+ oldn->refcount++;
}
static int
-get_kind (bool is_openacc, void *kinds, int idx)
+get_kind (bool short_mapkind, void *kinds, int idx)
{
- return is_openacc ? ((unsigned short *) kinds)[idx]
- : ((unsigned char *) kinds)[idx];
+ return short_mapkind ? ((unsigned short *) kinds)[idx]
+ : ((unsigned char *) kinds)[idx];
}
static void
@@ -185,20 +225,8 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
}
/* Add bias to the pointer value. */
cur_node.host_start += bias;
- cur_node.host_end = cur_node.host_start + 1;
- splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
- if (n == NULL)
- {
- /* Could be possibly zero size array section. */
- cur_node.host_end--;
- n = splay_tree_lookup (mem_map, &cur_node);
- if (n == NULL)
- {
- cur_node.host_start--;
- n = splay_tree_lookup (mem_map, &cur_node);
- cur_node.host_start++;
- }
- }
+ cur_node.host_end = cur_node.host_start;
+ splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
if (n == NULL)
{
gomp_mutex_unlock (&devicep->lock);
@@ -218,20 +246,81 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
sizeof (void *));
}
+static void
+gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
+ size_t first, size_t i, void **hostaddrs,
+ size_t *sizes, void *kinds)
+{
+ struct gomp_device_descr *devicep = tgt->device_descr;
+ struct splay_tree_s *mem_map = &devicep->mem_map;
+ struct splay_tree_key_s cur_node;
+ int kind;
+ const bool short_mapkind = true;
+ const int typemask = short_mapkind ? 0xff : 0x7;
+
+ cur_node.host_start = (uintptr_t) hostaddrs[i];
+ cur_node.host_end = cur_node.host_start + sizes[i];
+ splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
+ kind = get_kind (short_mapkind, kinds, i);
+ if (n2
+ && n2->tgt == n->tgt
+ && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
+ {
+ gomp_map_vars_existing (devicep, n2, &cur_node,
+ &tgt->list[i], kind & typemask);
+ return;
+ }
+ if (sizes[i] == 0)
+ {
+ if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
+ {
+ cur_node.host_start--;
+ n2 = splay_tree_lookup (mem_map, &cur_node);
+ cur_node.host_start++;
+ if (n2
+ && n2->tgt == n->tgt
+ && n2->host_start - n->host_start
+ == n2->tgt_offset - n->tgt_offset)
+ {
+ gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
+ kind & typemask);
+ return;
+ }
+ }
+ cur_node.host_end++;
+ n2 = splay_tree_lookup (mem_map, &cur_node);
+ cur_node.host_end--;
+ if (n2
+ && n2->tgt == n->tgt
+ && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
+ {
+ gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
+ kind & typemask);
+ return;
+ }
+ }
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("Trying to map into device [%p..%p) structure element when "
+ "other mapped elements from the same structure weren't mapped "
+ "together with it", (void *) cur_node.host_start,
+ (void *) cur_node.host_end);
+}
+
attribute_hidden struct target_mem_desc *
gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
- bool is_openacc, bool is_target)
+ bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
{
size_t i, tgt_align, tgt_size, not_found_cnt = 0;
- const int rshift = is_openacc ? 8 : 3;
- const int typemask = is_openacc ? 0xff : 0x7;
+ bool has_firstprivate = false;
+ const int rshift = short_mapkind ? 8 : 3;
+ const int typemask = short_mapkind ? 0xff : 0x7;
struct splay_tree_s *mem_map = &devicep->mem_map;
struct splay_tree_key_s cur_node;
struct target_mem_desc *tgt
= gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
tgt->list_count = mapnum;
- tgt->refcount = 1;
+ tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
tgt->device_descr = devicep;
if (mapnum == 0)
@@ -239,7 +328,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
tgt_align = sizeof (void *);
tgt_size = 0;
- if (is_target)
+ if (pragma_kind == GOMP_MAP_VARS_TARGET)
{
size_t align = 4 * sizeof (void *);
tgt_align = align;
@@ -250,10 +339,61 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
for (i = 0; i < mapnum; i++)
{
- int kind = get_kind (is_openacc, kinds, i);
- if (hostaddrs[i] == NULL)
+ int kind = get_kind (short_mapkind, kinds, i);
+ if (hostaddrs[i] == NULL
+ || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
{
- tgt->list[i] = NULL;
+ tgt->list[i].key = NULL;
+ tgt->list[i].offset = ~(uintptr_t) 0;
+ continue;
+ }
+ else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
+ {
+ cur_node.host_start = (uintptr_t) hostaddrs[i];
+ cur_node.host_end = cur_node.host_start;
+ splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
+ if (n == NULL)
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("use_device_ptr pointer wasn't mapped");
+ }
+ cur_node.host_start -= n->host_start;
+ hostaddrs[i]
+ = (void *) (n->tgt->tgt_start + n->tgt_offset
+ + cur_node.host_start);
+ tgt->list[i].key = NULL;
+ tgt->list[i].offset = ~(uintptr_t) 0;
+ continue;
+ }
+ else if ((kind & typemask) == GOMP_MAP_STRUCT)
+ {
+ size_t first = i + 1;
+ size_t last = i + sizes[i];
+ cur_node.host_start = (uintptr_t) hostaddrs[i];
+ cur_node.host_end = (uintptr_t) hostaddrs[last]
+ + sizes[last];
+ tgt->list[i].key = NULL;
+ tgt->list[i].offset = ~(uintptr_t) 2;
+ splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
+ if (n == NULL)
+ {
+ size_t align = (size_t) 1 << (kind >> rshift);
+ if (tgt_align < align)
+ tgt_align = align;
+ tgt_size -= (uintptr_t) hostaddrs[first]
+ - (uintptr_t) hostaddrs[i];
+ tgt_size = (tgt_size + align - 1) & ~(align - 1);
+ tgt_size += cur_node.host_end - (uintptr_t) hostaddrs[i];
+ not_found_cnt += last - i;
+ for (i = first; i <= last; i++)
+ tgt->list[i].key = NULL;
+ i--;
+ continue;
+ }
+ for (i = first; i <= last; i++)
+ gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
+ sizes, kinds);
+ i--;
continue;
}
cur_node.host_start = (uintptr_t) hostaddrs[i];
@@ -261,15 +401,37 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
cur_node.host_end = cur_node.host_start + sizes[i];
else
cur_node.host_end = cur_node.host_start + sizeof (void *);
- splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
- if (n)
+ if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
+ {
+ tgt->list[i].key = NULL;
+
+ size_t align = (size_t) 1 << (kind >> rshift);
+ if (tgt_align < align)
+ tgt_align = align;
+ tgt_size = (tgt_size + align - 1) & ~(align - 1);
+ tgt_size += cur_node.host_end - cur_node.host_start;
+ has_firstprivate = true;
+ continue;
+ }
+ splay_tree_key n;
+ if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
{
- tgt->list[i] = n;
- gomp_map_vars_existing (devicep, n, &cur_node, kind & typemask);
+ n = gomp_map_lookup (mem_map, &cur_node);
+ if (!n)
+ {
+ tgt->list[i].key = NULL;
+ tgt->list[i].offset = ~(uintptr_t) 1;
+ continue;
+ }
}
else
+ n = splay_tree_lookup (mem_map, &cur_node);
+ if (n)
+ gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
+ kind & typemask);
+ else
{
- tgt->list[i] = NULL;
+ tgt->list[i].key = NULL;
size_t align = (size_t) 1 << (kind >> rshift);
not_found_cnt++;
@@ -281,7 +443,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
{
size_t j;
for (j = i + 1; j < mapnum; j++)
- if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
+ if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
& typemask))
break;
else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
@@ -290,7 +452,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
break;
else
{
- tgt->list[j] = NULL;
+ tgt->list[j].key = NULL;
i++;
}
}
@@ -308,7 +470,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
tgt->tgt_start = (uintptr_t) tgt->to_free;
tgt->tgt_end = tgt->tgt_start + sizes[0];
}
- else if (not_found_cnt || is_target)
+ else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
{
/* Allocate tgt_align aligned tgt_size block of memory. */
/* FIXME: Perhaps change interface to allocate properly aligned
@@ -327,22 +489,74 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
}
tgt_size = 0;
- if (is_target)
+ if (pragma_kind == GOMP_MAP_VARS_TARGET)
tgt_size = mapnum * sizeof (void *);
tgt->array = NULL;
- if (not_found_cnt)
+ if (not_found_cnt || has_firstprivate)
{
- tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
+ if (not_found_cnt)
+ tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
splay_tree_node array = tgt->array;
- size_t j;
+ size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
+ uintptr_t field_tgt_base = 0;
for (i = 0; i < mapnum; i++)
- if (tgt->list[i] == NULL)
+ if (tgt->list[i].key == NULL)
{
- int kind = get_kind (is_openacc, kinds, i);
+ int kind = get_kind (short_mapkind, kinds, i);
if (hostaddrs[i] == NULL)
continue;
+ switch (kind & typemask)
+ {
+ size_t align, len, first, last;
+ splay_tree_key n;
+ case GOMP_MAP_FIRSTPRIVATE:
+ align = (size_t) 1 << (kind >> rshift);
+ tgt_size = (tgt_size + align - 1) & ~(align - 1);
+ tgt->list[i].offset = tgt_size;
+ len = sizes[i];
+ devicep->host2dev_func (devicep->target_id,
+ (void *) (tgt->tgt_start + tgt_size),
+ (void *) hostaddrs[i], len);
+ tgt_size += len;
+ continue;
+ case GOMP_MAP_FIRSTPRIVATE_INT:
+ case GOMP_MAP_USE_DEVICE_PTR:
+ case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
+ continue;
+ case GOMP_MAP_STRUCT:
+ first = i + 1;
+ last = i + sizes[i];
+ cur_node.host_start = (uintptr_t) hostaddrs[i];
+ cur_node.host_end = (uintptr_t) hostaddrs[last]
+ + sizes[last];
+ if (tgt->list[first].key != NULL)
+ continue;
+ n = splay_tree_lookup (mem_map, &cur_node);
+ if (n == NULL)
+ {
+ size_t align = (size_t) 1 << (kind >> rshift);
+ tgt_size -= (uintptr_t) hostaddrs[first]
+ - (uintptr_t) hostaddrs[i];
+ tgt_size = (tgt_size + align - 1) & ~(align - 1);
+ tgt_size += (uintptr_t) hostaddrs[first]
+ - (uintptr_t) hostaddrs[i];
+ field_tgt_base = (uintptr_t) hostaddrs[first];
+ field_tgt_offset = tgt_size;
+ field_tgt_clear = last;
+ tgt_size += cur_node.host_end
+ - (uintptr_t) hostaddrs[first];
+ continue;
+ }
+ for (i = first; i <= last; i++)
+ gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
+ sizes, kinds);
+ i--;
+ continue;
+ default:
+ break;
+ }
splay_tree_key k = &array->key;
k->host_start = (uintptr_t) hostaddrs[i];
if (!GOMP_MAP_POINTER_P (kind & typemask))
@@ -351,19 +565,31 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
k->host_end = k->host_start + sizeof (void *);
splay_tree_key n = splay_tree_lookup (mem_map, k);
if (n)
- {
- tgt->list[i] = n;
- gomp_map_vars_existing (devicep, n, k, kind & typemask);
- }
+ gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
+ kind & typemask);
else
{
size_t align = (size_t) 1 << (kind >> rshift);
- tgt->list[i] = k;
- tgt_size = (tgt_size + align - 1) & ~(align - 1);
+ tgt->list[i].key = k;
k->tgt = tgt;
- k->tgt_offset = tgt_size;
- tgt_size += k->host_end - k->host_start;
- k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
+ if (field_tgt_clear != ~(size_t) 0)
+ {
+ k->tgt_offset = k->host_start - field_tgt_base
+ + field_tgt_offset;
+ if (i == field_tgt_clear)
+ field_tgt_clear = ~(size_t) 0;
+ }
+ else
+ {
+ tgt_size = (tgt_size + align - 1) & ~(align - 1);
+ k->tgt_offset = tgt_size;
+ tgt_size += k->host_end - k->host_start;
+ }
+ tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
+ tgt->list[i].always_copy_from
+ = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
+ tgt->list[i].offset = 0;
+ tgt->list[i].length = k->host_end - k->host_start;
k->refcount = 1;
k->async_refcount = 0;
tgt->refcount++;
@@ -376,11 +602,14 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
case GOMP_MAP_FROM:
case GOMP_MAP_FORCE_ALLOC:
case GOMP_MAP_FORCE_FROM:
+ case GOMP_MAP_ALWAYS_FROM:
break;
case GOMP_MAP_TO:
case GOMP_MAP_TOFROM:
case GOMP_MAP_FORCE_TO:
case GOMP_MAP_FORCE_TOFROM:
+ case GOMP_MAP_ALWAYS_TO:
+ case GOMP_MAP_ALWAYS_TOFROM:
/* FIXME: Perhaps add some smarts, like if copying
several adjacent fields from host to target, use some
host buffer to avoid sending each var individually. */
@@ -403,7 +632,8 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
k->host_end - k->host_start);
for (j = i + 1; j < mapnum; j++)
- if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
+ if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
+ j)
& typemask))
break;
else if ((uintptr_t) hostaddrs[j] < k->host_start
@@ -412,8 +642,11 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
break;
else
{
- tgt->list[j] = k;
- k->refcount++;
+ tgt->list[j].key = k;
+ tgt->list[j].copy_from = false;
+ tgt->list[j].always_copy_from = false;
+ if (k->refcount != REFCOUNT_INFINITY)
+ k->refcount++;
gomp_map_pointer (tgt,
(uintptr_t) *(void **) hostaddrs[j],
k->tgt_offset
@@ -460,15 +693,30 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
}
}
- if (is_target)
+ if (pragma_kind == GOMP_MAP_VARS_TARGET)
{
for (i = 0; i < mapnum; i++)
{
- if (tgt->list[i] == NULL)
- cur_node.tgt_offset = (uintptr_t) NULL;
+ if (tgt->list[i].key == NULL)
+ {
+ if (tgt->list[i].offset == ~(uintptr_t) 0)
+ cur_node.tgt_offset = (uintptr_t) hostaddrs[i];
+ else if (tgt->list[i].offset == ~(uintptr_t) 1)
+ cur_node.tgt_offset = 0;
+ else if (tgt->list[i].offset == ~(uintptr_t) 2)
+ cur_node.tgt_offset = tgt->list[i + 1].key->tgt->tgt_start
+ + tgt->list[i + 1].key->tgt_offset
+ + tgt->list[i + 1].offset
+ + (uintptr_t) hostaddrs[i]
+ - (uintptr_t) hostaddrs[i + 1];
+ else
+ cur_node.tgt_offset = tgt->tgt_start
+ + tgt->list[i].offset;
+ }
else
- cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start
- + tgt->list[i]->tgt_offset;
+ cur_node.tgt_offset = tgt->list[i].key->tgt->tgt_start
+ + tgt->list[i].key->tgt_offset
+ + tgt->list[i].offset;
/* FIXME: see above FIXME comment. */
devicep->host2dev_func (devicep->target_id,
(void *) (tgt->tgt_start
@@ -478,6 +726,15 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
}
}
+ /* If the variable from "omp target enter data" map-list was already mapped,
+ tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
+ gomp_exit_data. */
+ if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
+ {
+ free (tgt);
+ tgt = NULL;
+ }
+
gomp_mutex_unlock (&devicep->lock);
return tgt;
}
@@ -508,17 +765,17 @@ gomp_copy_from_async (struct target_mem_desc *tgt)
gomp_mutex_lock (&devicep->lock);
for (i = 0; i < tgt->list_count; i++)
- if (tgt->list[i] == NULL)
+ if (tgt->list[i].key == NULL)
;
- else if (tgt->list[i]->refcount > 1)
+ else if (tgt->list[i].key->refcount > 1)
{
- tgt->list[i]->refcount--;
- tgt->list[i]->async_refcount++;
+ tgt->list[i].key->refcount--;
+ tgt->list[i].key->async_refcount++;
}
else
{
- splay_tree_key k = tgt->list[i];
- if (k->copy_from)
+ splay_tree_key k = tgt->list[i].key;
+ if (tgt->list[i].copy_from)
devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
(void *) (k->tgt->tgt_start + k->tgt_offset),
k->host_end - k->host_start);
@@ -546,25 +803,41 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
size_t i;
for (i = 0; i < tgt->list_count; i++)
- if (tgt->list[i] == NULL)
- ;
- else if (tgt->list[i]->refcount > 1)
- tgt->list[i]->refcount--;
- else if (tgt->list[i]->async_refcount > 0)
- tgt->list[i]->async_refcount--;
- else
- {
- splay_tree_key k = tgt->list[i];
- if (k->copy_from && do_copyfrom)
- devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
- (void *) (k->tgt->tgt_start + k->tgt_offset),
- k->host_end - k->host_start);
- splay_tree_remove (&devicep->mem_map, k);
- if (k->tgt->refcount > 1)
- k->tgt->refcount--;
- else
- gomp_unmap_tgt (k->tgt);
- }
+ {
+ splay_tree_key k = tgt->list[i].key;
+ if (k == NULL)
+ continue;
+
+ bool do_unmap = false;
+ if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
+ k->refcount--;
+ else if (k->refcount == 1)
+ {
+ if (k->async_refcount > 0)
+ k->async_refcount--;
+ else
+ {
+ k->refcount--;
+ do_unmap = true;
+ }
+ }
+
+ if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
+ || tgt->list[i].always_copy_from)
+ devicep->dev2host_func (devicep->target_id,
+ (void *) (k->host_start + tgt->list[i].offset),
+ (void *) (k->tgt->tgt_start + k->tgt_offset
+ + tgt->list[i].offset),
+ tgt->list[i].length);
+ if (do_unmap)
+ {
+ splay_tree_remove (&devicep->mem_map, k);
+ if (k->tgt->refcount > 1)
+ k->tgt->refcount--;
+ else
+ gomp_unmap_tgt (k->tgt);
+ }
+ }
if (tgt->refcount > 1)
tgt->refcount--;
@@ -576,11 +849,11 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
static void
gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
- size_t *sizes, void *kinds, bool is_openacc)
+ size_t *sizes, void *kinds, bool short_mapkind)
{
size_t i;
struct splay_tree_key_s cur_node;
- const int typemask = is_openacc ? 0xff : 0x7;
+ const int typemask = short_mapkind ? 0xff : 0x7;
if (!devicep)
return;
@@ -597,7 +870,7 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
if (n)
{
- int kind = get_kind (is_openacc, kinds, i);
+ int kind = get_kind (short_mapkind, kinds, i);
if (n->host_start > cur_node.host_start
|| n->host_end < cur_node.host_end)
{
@@ -626,13 +899,6 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
- n->host_start),
cur_node.host_end - cur_node.host_start);
}
- else
- {
- gomp_mutex_unlock (&devicep->lock);
- gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
- (void *) cur_node.host_start,
- (void *) cur_node.host_end);
- }
}
gomp_mutex_unlock (&devicep->lock);
}
@@ -678,7 +944,7 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
/* Insert host-target address mapping into splay tree. */
struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
- tgt->refcount = 1;
+ tgt->refcount = REFCOUNT_INFINITY;
tgt->tgt_start = 0;
tgt->tgt_end = 0;
tgt->to_free = NULL;
@@ -694,9 +960,8 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
k->host_end = k->host_start + 1;
k->tgt = tgt;
k->tgt_offset = target_table[i].start;
- k->refcount = 1;
+ k->refcount = REFCOUNT_INFINITY;
k->async_refcount = 0;
- k->copy_from = false;
array->left = NULL;
array->right = NULL;
splay_tree_insert (&devicep->mem_map, array);
@@ -720,9 +985,8 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
k->host_end = k->host_start + (uintptr_t) host_var_table[i * 2 + 1];
k->tgt = tgt;
k->tgt_offset = target_var->start;
- k->refcount = 1;
+ k->refcount = REFCOUNT_INFINITY;
k->async_refcount = 0;
- k->copy_from = false;
array->left = NULL;
array->right = NULL;
splay_tree_insert (&devicep->mem_map, array);
@@ -945,6 +1209,47 @@ gomp_fini_device (struct gomp_device_descr *devicep)
devicep->is_initialized = false;
}
+/* Host fallback for GOMP_target{,_41} routines. */
+
+static void
+gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
+{
+ struct gomp_thread old_thr, *thr = gomp_thread ();
+ old_thr = *thr;
+ memset (thr, '\0', sizeof (*thr));
+ if (gomp_places_list)
+ {
+ thr->place = old_thr.place;
+ thr->ts.place_partition_len = gomp_places_list_len;
+ }
+ fn (hostaddrs);
+ gomp_free_thread (thr);
+ *thr = old_thr;
+}
+
+/* Helper function of GOMP_target{,_41} routines. */
+
+static void *
+gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
+ void (*host_fn) (void *))
+{
+ if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
+ return (void *) host_fn;
+ else
+ {
+ gomp_mutex_lock (&devicep->lock);
+ struct splay_tree_key_s k;
+ k.host_start = (uintptr_t) host_fn;
+ k.host_end = k.host_start + 1;
+ splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
+ gomp_mutex_unlock (&devicep->lock);
+ if (tgt_fn == NULL)
+ gomp_fatal ("Target function wasn't mapped");
+
+ return (void *) tgt_fn->tgt_offset;
+ }
+}
+
/* Called when encountering a target directive. If DEVICE
is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
GOMP_DEVICE_HOST_FALLBACK (or any value
@@ -964,51 +1269,85 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,
if (devicep == NULL
|| !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ return gomp_target_fallback (fn, hostaddrs);
+
+ void *fn_addr = gomp_get_target_fn_addr (devicep, fn);
+
+ struct target_mem_desc *tgt_vars
+ = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
+ GOMP_MAP_VARS_TARGET);
+ struct gomp_thread old_thr, *thr = gomp_thread ();
+ old_thr = *thr;
+ memset (thr, '\0', sizeof (*thr));
+ if (gomp_places_list)
{
- /* Host fallback. */
- struct gomp_thread old_thr, *thr = gomp_thread ();
- old_thr = *thr;
- memset (thr, '\0', sizeof (*thr));
- if (gomp_places_list)
- {
- thr->place = old_thr.place;
- thr->ts.place_partition_len = gomp_places_list_len;
- }
- fn (hostaddrs);
- gomp_free_thread (thr);
- *thr = old_thr;
- return;
+ thr->place = old_thr.place;
+ thr->ts.place_partition_len = gomp_places_list_len;
}
+ devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
+ gomp_free_thread (thr);
+ *thr = old_thr;
+ gomp_unmap_vars (tgt_vars, true);
+}
- gomp_mutex_lock (&devicep->lock);
- if (!devicep->is_initialized)
- gomp_init_device (devicep);
- gomp_mutex_unlock (&devicep->lock);
+void
+GOMP_target_41 (int device, void (*fn) (void *), size_t mapnum,
+ void **hostaddrs, size_t *sizes, unsigned short *kinds,
+ unsigned int flags, void **depend)
+{
+ struct gomp_device_descr *devicep = resolve_device (device);
- void *fn_addr;
+ /* If there are depend clauses, but nowait is not present,
+ block the parent task until the dependencies are resolved
+ and then just continue with the rest of the function as if it
+ is a merged task. */
+ if (depend != NULL)
+ {
+ struct gomp_thread *thr = gomp_thread ();
+ if (thr->task && thr->task->depend_hash)
+ gomp_task_maybe_wait_for_dependencies (depend);
+ }
- if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
- fn_addr = (void *) fn;
- else
+ if (devicep == NULL
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
{
- gomp_mutex_lock (&devicep->lock);
- struct splay_tree_key_s k;
- k.host_start = (uintptr_t) fn;
- k.host_end = k.host_start + 1;
- splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
- if (tgt_fn == NULL)
+ size_t i, tgt_align = 0, tgt_size = 0;
+ char *tgt = NULL;
+ for (i = 0; i < mapnum; i++)
+ if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
+ {
+ size_t align = (size_t) 1 << (kinds[i] >> 8);
+ if (tgt_align < align)
+ tgt_align = align;
+ tgt_size = (tgt_size + align - 1) & ~(align - 1);
+ tgt_size += sizes[i];
+ }
+ if (tgt_align)
{
- gomp_mutex_unlock (&devicep->lock);
- gomp_fatal ("Target function wasn't mapped");
+ tgt = gomp_alloca (tgt_size + tgt_align - 1);
+ uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
+ if (al)
+ tgt += tgt_align - al;
+ tgt_size = 0;
+ for (i = 0; i < mapnum; i++)
+ if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
+ {
+ size_t align = (size_t) 1 << (kinds[i] >> 8);
+ tgt_size = (tgt_size + align - 1) & ~(align - 1);
+ memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
+ hostaddrs[i] = tgt + tgt_size;
+ tgt_size = tgt_size + sizes[i];
+ }
}
- gomp_mutex_unlock (&devicep->lock);
-
- fn_addr = (void *) tgt_fn->tgt_offset;
+ gomp_target_fallback (fn, hostaddrs);
+ return;
}
+ void *fn_addr = gomp_get_target_fn_addr (devicep, fn);
+
struct target_mem_desc *tgt_vars
- = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
- true);
+ = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
+ GOMP_MAP_VARS_TARGET);
struct gomp_thread old_thr, *thr = gomp_thread ();
old_thr = *thr;
memset (thr, '\0', sizeof (*thr));
@@ -1023,6 +1362,26 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,
gomp_unmap_vars (tgt_vars, true);
}
+/* Host fallback for GOMP_target_data{,_41} routines. */
+
+static void
+gomp_target_data_fallback (void)
+{
+ struct gomp_task_icv *icv = gomp_icv (false);
+ if (icv->target_data)
+ {
+ /* Even when doing a host fallback, if there are any active
+ #pragma omp target data constructs, need to remember the
+ new #pragma omp target data, otherwise GOMP_target_end_data
+ would get out of sync. */
+ struct target_mem_desc *tgt
+ = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
+ GOMP_MAP_VARS_DATA);
+ tgt->prev = icv->target_data;
+ icv->target_data = tgt;
+ }
+}
+
void
GOMP_target_data (int device, const void *unused, size_t mapnum,
void **hostaddrs, size_t *sizes, unsigned char *kinds)
@@ -1031,31 +1390,29 @@ GOMP_target_data (int device, const void *unused, size_t mapnum,
if (devicep == NULL
|| !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
- {
- /* Host fallback. */
- struct gomp_task_icv *icv = gomp_icv (false);
- if (icv->target_data)
- {
- /* Even when doing a host fallback, if there are any active
- #pragma omp target data constructs, need to remember the
- new #pragma omp target data, otherwise GOMP_target_end_data
- would get out of sync. */
- struct target_mem_desc *tgt
- = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false);
- tgt->prev = icv->target_data;
- icv->target_data = tgt;
- }
- return;
- }
-
- gomp_mutex_lock (&devicep->lock);
- if (!devicep->is_initialized)
- gomp_init_device (devicep);
- gomp_mutex_unlock (&devicep->lock);
+ return gomp_target_data_fallback ();
struct target_mem_desc *tgt
= gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
- false);
+ GOMP_MAP_VARS_DATA);
+ struct gomp_task_icv *icv = gomp_icv (true);
+ tgt->prev = icv->target_data;
+ icv->target_data = tgt;
+}
+
+void
+GOMP_target_data_41 (int device, size_t mapnum, void **hostaddrs, size_t *sizes,
+ unsigned short *kinds)
+{
+ struct gomp_device_descr *devicep = resolve_device (device);
+
+ if (devicep == NULL
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ return gomp_target_data_fallback ();
+
+ struct target_mem_desc *tgt
+ = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
+ GOMP_MAP_VARS_DATA);
struct gomp_task_icv *icv = gomp_icv (true);
tgt->prev = icv->target_data;
icv->target_data = tgt;
@@ -1083,12 +1440,230 @@ GOMP_target_update (int device, const void *unused, size_t mapnum,
|| !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
return;
+ gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
+}
+
+void
+GOMP_target_update_41 (int device, size_t mapnum, void **hostaddrs,
+ size_t *sizes, unsigned short *kinds,
+ unsigned int flags, void **depend)
+{
+ struct gomp_device_descr *devicep = resolve_device (device);
+
+ /* If there are depend clauses, but nowait is not present,
+ block the parent task until the dependencies are resolved
+ and then just continue with the rest of the function as if it
+ is a merged task. Until we are able to schedule task during
+ variable mapping or unmapping, ignore nowait if depend clauses
+ are not present. */
+ if (depend != NULL)
+ {
+ struct gomp_thread *thr = gomp_thread ();
+ if (thr->task && thr->task->depend_hash)
+ {
+ if ((flags & GOMP_TARGET_FLAG_NOWAIT)
+ && thr->ts.team
+ && !thr->task->final_task)
+ {
+ gomp_create_target_task (devicep, (void (*) (void *)) NULL,
+ mapnum, hostaddrs, sizes, kinds,
+ flags | GOMP_TARGET_FLAG_UPDATE,
+ depend);
+ return;
+ }
+
+ struct gomp_team *team = thr->ts.team;
+ /* If parallel or taskgroup has been cancelled, don't start new
+ tasks. */
+ if (team
+ && (gomp_team_barrier_cancelled (&team->barrier)
+ || (thr->task->taskgroup
+ && thr->task->taskgroup->cancelled)))
+ return;
+
+ gomp_task_maybe_wait_for_dependencies (depend);
+ }
+ }
+
+ if (devicep == NULL
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ return;
+
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+ /* If parallel or taskgroup has been cancelled, don't start new tasks. */
+ if (team
+ && (gomp_team_barrier_cancelled (&team->barrier)
+ || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
+ return;
+
+ gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
+}
+
+static void
+gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
+ void **hostaddrs, size_t *sizes, unsigned short *kinds)
+{
+ const int typemask = 0xff;
+ size_t i;
gomp_mutex_lock (&devicep->lock);
- if (!devicep->is_initialized)
- gomp_init_device (devicep);
+ for (i = 0; i < mapnum; i++)
+ {
+ struct splay_tree_key_s cur_node;
+ unsigned char kind = kinds[i] & typemask;
+ switch (kind)
+ {
+ case GOMP_MAP_FROM:
+ case GOMP_MAP_ALWAYS_FROM:
+ case GOMP_MAP_DELETE:
+ case GOMP_MAP_RELEASE:
+ case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
+ case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
+ cur_node.host_start = (uintptr_t) hostaddrs[i];
+ cur_node.host_end = cur_node.host_start + sizes[i];
+ splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
+ || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
+ ? gomp_map_lookup (&devicep->mem_map, &cur_node)
+ : splay_tree_lookup (&devicep->mem_map, &cur_node);
+ if (!k)
+ continue;
+
+ if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
+ k->refcount--;
+ if ((kind == GOMP_MAP_DELETE
+ || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
+ && k->refcount != REFCOUNT_INFINITY)
+ k->refcount = 0;
+
+ if ((kind == GOMP_MAP_FROM && k->refcount == 0)
+ || kind == GOMP_MAP_ALWAYS_FROM)
+ devicep->dev2host_func (devicep->target_id,
+ (void *) cur_node.host_start,
+ (void *) (k->tgt->tgt_start + k->tgt_offset
+ + cur_node.host_start
+ - k->host_start),
+ cur_node.host_end - cur_node.host_start);
+ if (k->refcount == 0)
+ {
+ splay_tree_remove (&devicep->mem_map, k);
+ if (k->tgt->refcount > 1)
+ k->tgt->refcount--;
+ else
+ gomp_unmap_tgt (k->tgt);
+ }
+
+ break;
+ default:
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
+ kind);
+ }
+ }
+
gomp_mutex_unlock (&devicep->lock);
+}
- gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
+void
+GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
+ size_t *sizes, unsigned short *kinds,
+ unsigned int flags, void **depend)
+{
+ struct gomp_device_descr *devicep = resolve_device (device);
+
+ /* If there are depend clauses, but nowait is not present,
+ block the parent task until the dependencies are resolved
+ and then just continue with the rest of the function as if it
+ is a merged task. Until we are able to schedule task during
+ variable mapping or unmapping, ignore nowait if depend clauses
+ are not present. */
+ if (depend != NULL)
+ {
+ struct gomp_thread *thr = gomp_thread ();
+ if (thr->task && thr->task->depend_hash)
+ {
+ if ((flags & GOMP_TARGET_FLAG_NOWAIT)
+ && thr->ts.team
+ && !thr->task->final_task)
+ {
+ gomp_create_target_task (devicep, (void (*) (void *)) NULL,
+ mapnum, hostaddrs, sizes, kinds,
+ flags, depend);
+ return;
+ }
+
+ struct gomp_team *team = thr->ts.team;
+ /* If parallel or taskgroup has been cancelled, don't start new
+ tasks. */
+ if (team
+ && (gomp_team_barrier_cancelled (&team->barrier)
+ || (thr->task->taskgroup
+ && thr->task->taskgroup->cancelled)))
+ return;
+
+ gomp_task_maybe_wait_for_dependencies (depend);
+ }
+ }
+
+ if (devicep == NULL
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ return;
+
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+ /* If parallel or taskgroup has been cancelled, don't start new tasks. */
+ if (team
+ && (gomp_team_barrier_cancelled (&team->barrier)
+ || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
+ return;
+
+ size_t i;
+ if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
+ for (i = 0; i < mapnum; i++)
+ if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
+ {
+ gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
+ &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
+ i += sizes[i];
+ }
+ else
+ gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
+ true, GOMP_MAP_VARS_ENTER_DATA);
+ else
+ gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
+}
+
+void
+gomp_target_task_fn (void *data)
+{
+ struct gomp_target_task *ttask = (struct gomp_target_task *) data;
+ if (ttask->fn != NULL)
+ {
+ /* GOMP_target_41 */
+ }
+ else if (ttask->devicep == NULL
+ || !(ttask->devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ return;
+
+ size_t i;
+ if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
+ gomp_update (ttask->devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
+ ttask->kinds, true);
+ else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
+ for (i = 0; i < ttask->mapnum; i++)
+ if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
+ {
+ gomp_map_vars (ttask->devicep, ttask->sizes[i] + 1,
+ &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
+ &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
+ i += ttask->sizes[i];
+ }
+ else
+ gomp_map_vars (ttask->devicep, 1, &ttask->hostaddrs[i], NULL,
+ &ttask->sizes[i], &ttask->kinds[i],
+ true, GOMP_MAP_VARS_ENTER_DATA);
+ else
+ gomp_exit_data (ttask->devicep, ttask->mapnum, ttask->hostaddrs,
+ ttask->sizes, ttask->kinds);
}
void
@@ -1103,6 +1678,384 @@ GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
(void) num_teams;
}
+void *
+omp_target_alloc (size_t size, int device_num)
+{
+ if (device_num == GOMP_DEVICE_HOST_FALLBACK)
+ return malloc (size);
+
+ if (device_num < 0)
+ return NULL;
+
+ struct gomp_device_descr *devicep = resolve_device (device_num);
+ if (devicep == NULL)
+ return NULL;
+
+ if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ return malloc (size);
+
+ gomp_mutex_lock (&devicep->lock);
+ void *ret = devicep->alloc_func (devicep->target_id, size);
+ gomp_mutex_unlock (&devicep->lock);
+ return ret;
+}
+
+void
+omp_target_free (void *device_ptr, int device_num)
+{
+ if (device_ptr == NULL)
+ return;
+
+ if (device_num == GOMP_DEVICE_HOST_FALLBACK)
+ {
+ free (device_ptr);
+ return;
+ }
+
+ if (device_num < 0)
+ return;
+
+ struct gomp_device_descr *devicep = resolve_device (device_num);
+ if (devicep == NULL)
+ return;
+
+ if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ {
+ free (device_ptr);
+ return;
+ }
+
+ gomp_mutex_lock (&devicep->lock);
+ devicep->free_func (devicep->target_id, device_ptr);
+ gomp_mutex_unlock (&devicep->lock);
+}
+
+int
+omp_target_is_present (void *ptr, int device_num)
+{
+ if (ptr == NULL)
+ return 1;
+
+ if (device_num == GOMP_DEVICE_HOST_FALLBACK)
+ return 1;
+
+ if (device_num < 0)
+ return 0;
+
+ struct gomp_device_descr *devicep = resolve_device (device_num);
+ if (devicep == NULL)
+ return 0;
+
+ if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ return 1;
+
+ gomp_mutex_lock (&devicep->lock);
+ struct splay_tree_s *mem_map = &devicep->mem_map;
+ struct splay_tree_key_s cur_node;
+
+ cur_node.host_start = (uintptr_t) ptr;
+ cur_node.host_end = cur_node.host_start;
+ splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
+ int ret = n != NULL;
+ gomp_mutex_unlock (&devicep->lock);
+ return ret;
+}
+
+int
+omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
+ size_t src_offset, int dst_device_num, int src_device_num)
+{
+ struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
+
+ if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
+ {
+ if (dst_device_num < 0)
+ return EINVAL;
+
+ dst_devicep = resolve_device (dst_device_num);
+ if (dst_devicep == NULL)
+ return EINVAL;
+
+ if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ dst_devicep = NULL;
+ }
+ if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
+ {
+ if (src_device_num < 0)
+ return EINVAL;
+
+ src_devicep = resolve_device (src_device_num);
+ if (src_devicep == NULL)
+ return EINVAL;
+
+ if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ src_devicep = NULL;
+ }
+ if (src_devicep == NULL && dst_devicep == NULL)
+ {
+ memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
+ return 0;
+ }
+ if (src_devicep == NULL)
+ {
+ gomp_mutex_lock (&dst_devicep->lock);
+ dst_devicep->host2dev_func (dst_devicep->target_id,
+ (char *) dst + dst_offset,
+ (char *) src + src_offset, length);
+ gomp_mutex_unlock (&dst_devicep->lock);
+ return 0;
+ }
+ if (dst_devicep == NULL)
+ {
+ gomp_mutex_lock (&src_devicep->lock);
+ src_devicep->dev2host_func (src_devicep->target_id,
+ (char *) dst + dst_offset,
+ (char *) src + src_offset, length);
+ gomp_mutex_unlock (&src_devicep->lock);
+ return 0;
+ }
+ if (src_devicep == dst_devicep)
+ {
+ gomp_mutex_lock (&src_devicep->lock);
+ src_devicep->dev2dev_func (src_devicep->target_id,
+ (char *) dst + dst_offset,
+ (char *) src + src_offset, length);
+ gomp_mutex_unlock (&src_devicep->lock);
+ return 0;
+ }
+ return EINVAL;
+}
+
+static int
+omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size,
+ int num_dims, const size_t *volume,
+ const size_t *dst_offsets,
+ const size_t *src_offsets,
+ const size_t *dst_dimensions,
+ const size_t *src_dimensions,
+ struct gomp_device_descr *dst_devicep,
+ struct gomp_device_descr *src_devicep)
+{
+ size_t dst_slice = element_size;
+ size_t src_slice = element_size;
+ size_t j, dst_off, src_off, length;
+ int i, ret;
+
+ if (num_dims == 1)
+ {
+ if (__builtin_mul_overflow (element_size, volume[0], &length)
+ || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
+ || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
+ return EINVAL;
+ if (dst_devicep == NULL && src_devicep == NULL)
+ memcpy ((char *) dst + dst_off, (char *) src + src_off, length);
+ else if (src_devicep == NULL)
+ dst_devicep->host2dev_func (dst_devicep->target_id,
+ (char *) dst + dst_off,
+ (char *) src + src_off, length);
+ else if (dst_devicep == NULL)
+ src_devicep->dev2host_func (src_devicep->target_id,
+ (char *) dst + dst_off,
+ (char *) src + src_off, length);
+ else if (src_devicep == dst_devicep)
+ src_devicep->dev2dev_func (src_devicep->target_id,
+ (char *) dst + dst_off,
+ (char *) src + src_off, length);
+ else
+ return EINVAL;
+ return 0;
+ }
+
+ /* FIXME: it would be nice to have some plugin function to handle
+ num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
+ be handled in the generic recursion below, and for host-host it
+ should be used even for any num_dims >= 2. */
+
+ for (i = 1; i < num_dims; i++)
+ if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
+ || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
+ return EINVAL;
+ if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
+ || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
+ return EINVAL;
+ for (j = 0; j < volume[0]; j++)
+ {
+ ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
+ (char *) src + src_off,
+ element_size, num_dims - 1,
+ volume + 1, dst_offsets + 1,
+ src_offsets + 1, dst_dimensions + 1,
+ src_dimensions + 1, dst_devicep,
+ src_devicep);
+ if (ret)
+ return ret;
+ dst_off += dst_slice;
+ src_off += src_slice;
+ }
+ return 0;
+}
+
+int
+omp_target_memcpy_rect (void *dst, void *src, size_t element_size,
+ int num_dims, const size_t *volume,
+ const size_t *dst_offsets,
+ const size_t *src_offsets,
+ const size_t *dst_dimensions,
+ const size_t *src_dimensions,
+ int dst_device_num, int src_device_num)
+{
+ struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
+
+ if (!dst && !src)
+ return INT_MAX;
+
+ if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
+ {
+ if (dst_device_num < 0)
+ return EINVAL;
+
+ dst_devicep = resolve_device (dst_device_num);
+ if (dst_devicep == NULL)
+ return EINVAL;
+
+ if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ dst_devicep = NULL;
+ }
+ if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
+ {
+ if (src_device_num < 0)
+ return EINVAL;
+
+ src_devicep = resolve_device (src_device_num);
+ if (src_devicep == NULL)
+ return EINVAL;
+
+ if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ src_devicep = NULL;
+ }
+
+ if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
+ return EINVAL;
+
+ if (src_devicep)
+ gomp_mutex_lock (&src_devicep->lock);
+ else if (dst_devicep)
+ gomp_mutex_lock (&dst_devicep->lock);
+ int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
+ volume, dst_offsets, src_offsets,
+ dst_dimensions, src_dimensions,
+ dst_devicep, src_devicep);
+ if (src_devicep)
+ gomp_mutex_unlock (&src_devicep->lock);
+ else if (dst_devicep)
+ gomp_mutex_unlock (&dst_devicep->lock);
+ return ret;
+}
+
+int
+omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size,
+ size_t device_offset, int device_num)
+{
+ if (device_num == GOMP_DEVICE_HOST_FALLBACK)
+ return EINVAL;
+
+ if (device_num < 0)
+ return EINVAL;
+
+ struct gomp_device_descr *devicep = resolve_device (device_num);
+ if (devicep == NULL)
+ return EINVAL;
+
+ if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ return EINVAL;
+
+ gomp_mutex_lock (&devicep->lock);
+
+ struct splay_tree_s *mem_map = &devicep->mem_map;
+ struct splay_tree_key_s cur_node;
+ int ret = EINVAL;
+
+ cur_node.host_start = (uintptr_t) host_ptr;
+ cur_node.host_end = cur_node.host_start + size;
+ splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
+ if (n)
+ {
+ if (n->tgt->tgt_start + n->tgt_offset
+ == (uintptr_t) device_ptr + device_offset
+ && n->host_start <= cur_node.host_start
+ && n->host_end >= cur_node.host_end)
+ ret = 0;
+ }
+ else
+ {
+ struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
+ tgt->array = gomp_malloc (sizeof (*tgt->array));
+ tgt->refcount = 1;
+ tgt->tgt_start = 0;
+ tgt->tgt_end = 0;
+ tgt->to_free = NULL;
+ tgt->prev = NULL;
+ tgt->list_count = 0;
+ tgt->device_descr = devicep;
+ splay_tree_node array = tgt->array;
+ splay_tree_key k = &array->key;
+ k->host_start = cur_node.host_start;
+ k->host_end = cur_node.host_end;
+ k->tgt = tgt;
+ k->tgt_offset = (uintptr_t) device_ptr + device_offset;
+ k->refcount = REFCOUNT_INFINITY;
+ k->async_refcount = 0;
+ array->left = NULL;
+ array->right = NULL;
+ splay_tree_insert (&devicep->mem_map, array);
+ ret = 0;
+ }
+ gomp_mutex_unlock (&devicep->lock);
+ return ret;
+}
+
+int
+omp_target_disassociate_ptr (void *ptr, int device_num)
+{
+ if (device_num == GOMP_DEVICE_HOST_FALLBACK)
+ return EINVAL;
+
+ if (device_num < 0)
+ return EINVAL;
+
+ struct gomp_device_descr *devicep = resolve_device (device_num);
+ if (devicep == NULL)
+ return EINVAL;
+
+ if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ return EINVAL;
+
+ gomp_mutex_lock (&devicep->lock);
+
+ struct splay_tree_s *mem_map = &devicep->mem_map;
+ struct splay_tree_key_s cur_node;
+ int ret = EINVAL;
+
+ cur_node.host_start = (uintptr_t) ptr;
+ cur_node.host_end = cur_node.host_start;
+ splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
+ if (n
+ && n->host_start == cur_node.host_start
+ && n->refcount == REFCOUNT_INFINITY
+ && n->tgt->tgt_start == 0
+ && n->tgt->to_free == NULL
+ && n->tgt->refcount == 1
+ && n->tgt->list_count == 0)
+ {
+ splay_tree_remove (&devicep->mem_map, n);
+ gomp_unmap_tgt (n->tgt);
+ ret = 0;
+ }
+
+ gomp_mutex_unlock (&devicep->lock);
+ return ret;
+}
+
#ifdef PLUGIN_SUPPORT
/* This function tries to load a plugin for DEVICE. Name of plugin is passed
@@ -1153,7 +2106,10 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
DLSYM (host2dev);
device->capabilities = device->get_caps_func ();
if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
- DLSYM (run);
+ {
+ DLSYM (run);
+ DLSYM (dev2dev);
+ }
if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
{
if (!DLSYM_OPT (openacc.exec, openacc_parallel)
diff --git a/libgomp/task.c b/libgomp/task.c
index 74920d5ddb8..1246c6ae318 100644
--- a/libgomp/task.c
+++ b/libgomp/task.c
@@ -29,6 +29,7 @@
#include "libgomp.h"
#include <stdlib.h>
#include <string.h>
+#include "gomp-constants.h"
typedef struct gomp_task_depend_entry *hash_entry_type;
@@ -91,6 +92,8 @@ gomp_end_task (void)
thr->task = task->parent;
}
+/* Orphan the task in CHILDREN and all its siblings. */
+
static inline void
gomp_clear_parent (struct gomp_task *children)
{
@@ -105,16 +108,136 @@ gomp_clear_parent (struct gomp_task *children)
while (task != children);
}
-static void gomp_task_maybe_wait_for_dependencies (void **depend);
+/* Helper function for GOMP_task and gomp_create_target_task. Depend clause
+ handling for undeferred task creation. */
+
+static void
+gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
+ void **depend)
+{
+ size_t ndepend = (uintptr_t) depend[0];
+ size_t nout = (uintptr_t) depend[1];
+ size_t i;
+ hash_entry_type ent;
+
+ task->depend_count = ndepend;
+ task->num_dependees = 0;
+ if (parent->depend_hash == NULL)
+ parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
+ for (i = 0; i < ndepend; i++)
+ {
+ task->depend[i].addr = depend[2 + i];
+ task->depend[i].next = NULL;
+ task->depend[i].prev = NULL;
+ task->depend[i].task = task;
+ task->depend[i].is_in = i >= nout;
+ task->depend[i].redundant = false;
+ task->depend[i].redundant_out = false;
+
+ hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
+ &task->depend[i], INSERT);
+ hash_entry_type out = NULL, last = NULL;
+ if (*slot)
+ {
+ /* If multiple depends on the same task are the same, all but the
+ first one are redundant. As inout/out come first, if any of them
+ is inout/out, it will win, which is the right semantics. */
+ if ((*slot)->task == task)
+ {
+ task->depend[i].redundant = true;
+ continue;
+ }
+ for (ent = *slot; ent; ent = ent->next)
+ {
+ if (ent->redundant_out)
+ break;
+
+ last = ent;
+
+ /* depend(in:...) doesn't depend on earlier depend(in:...). */
+ if (i >= nout && ent->is_in)
+ continue;
+
+ if (!ent->is_in)
+ out = ent;
+
+ struct gomp_task *tsk = ent->task;
+ if (tsk->dependers == NULL)
+ {
+ tsk->dependers
+ = gomp_malloc (sizeof (struct gomp_dependers_vec)
+ + 6 * sizeof (struct gomp_task *));
+ tsk->dependers->n_elem = 1;
+ tsk->dependers->allocated = 6;
+ tsk->dependers->elem[0] = task;
+ task->num_dependees++;
+ continue;
+ }
+ /* We already have some other dependency on tsk from earlier
+ depend clause. */
+ else if (tsk->dependers->n_elem
+ && (tsk->dependers->elem[tsk->dependers->n_elem - 1]
+ == task))
+ continue;
+ else if (tsk->dependers->n_elem == tsk->dependers->allocated)
+ {
+ tsk->dependers->allocated
+ = tsk->dependers->allocated * 2 + 2;
+ tsk->dependers
+ = gomp_realloc (tsk->dependers,
+ sizeof (struct gomp_dependers_vec)
+ + (tsk->dependers->allocated
+ * sizeof (struct gomp_task *)));
+ }
+ tsk->dependers->elem[tsk->dependers->n_elem++] = task;
+ task->num_dependees++;
+ }
+ task->depend[i].next = *slot;
+ (*slot)->prev = &task->depend[i];
+ }
+ *slot = &task->depend[i];
+
+ /* There is no need to store more than one depend({,in}out:) task per
+ address in the hash table chain for the purpose of creation of
+ deferred tasks, because each out depends on all earlier outs, thus it
+ is enough to record just the last depend({,in}out:). For depend(in:),
+ we need to keep all of the previous ones not terminated yet, because
+ a later depend({,in}out:) might need to depend on all of them. So, if
+ the new task's clause is depend({,in}out:), we know there is at most
+ one other depend({,in}out:) clause in the list (out). For
+ non-deferred tasks we want to see all outs, so they are moved to the
+ end of the chain, after first redundant_out entry all following
+ entries should be redundant_out. */
+ if (!task->depend[i].is_in && out)
+ {
+ if (out != last)
+ {
+ out->next->prev = out->prev;
+ out->prev->next = out->next;
+ out->next = last->next;
+ out->prev = last;
+ last->next = out;
+ if (out->next)
+ out->next->prev = out;
+ }
+ out->redundant_out = true;
+ }
+ }
+}
/* Called when encountering an explicit task directive. If IF_CLAUSE is
false, then we must not delay in executing the task. If UNTIED is true,
- then the task may be executed by any member of the team. */
+ then the task may be executed by any member of the team.
+
+ DEPEND is an array containing:
+ depend[0]: number of depend elements.
+ depend[1]: number of depend elements of type "out".
+ depend[2..N+1]: address of [1..N]th depend element. */
void
GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
long arg_size, long arg_align, bool if_clause, unsigned flags,
- void **depend)
+ void **depend, int priority)
{
struct gomp_thread *thr = gomp_thread ();
struct gomp_team *team = thr->ts.team;
@@ -126,8 +249,7 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
might be running on different thread than FN. */
if (cpyfn)
if_clause = false;
- if (flags & 1)
- flags &= ~1;
+ flags &= ~GOMP_TASK_FLAG_UNTIED;
#endif
/* If parallel or taskgroup has been cancelled, don't start new tasks. */
@@ -136,6 +258,11 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
|| (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
return;
+ if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0)
+ priority = 0;
+ /* FIXME, use priority. */
+ (void) priority;
+
if (!if_clause || team == NULL
|| (thr->task && thr->task->final_task)
|| team->task_count > 64 * team->nthreads)
@@ -148,12 +275,14 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
depend clauses for non-deferred tasks other than this, because
the parent task is suspended until the child task finishes and thus
it can't start further child tasks. */
- if ((flags & 8) && thr->task && thr->task->depend_hash)
+ if ((flags & GOMP_TASK_FLAG_DEPEND)
+ && thr->task && thr->task->depend_hash)
gomp_task_maybe_wait_for_dependencies (depend);
gomp_init_task (&task, thr->task, gomp_icv (false));
- task.kind = GOMP_TASK_IFFALSE;
- task.final_task = (thr->task && thr->task->final_task) || (flags & 2);
+ task.kind = GOMP_TASK_UNDEFERRED;
+ task.final_task = (thr->task && thr->task->final_task)
+ || (flags & GOMP_TASK_FLAG_FINAL);
if (thr->task)
{
task.in_tied_task = thr->task->in_tied_task;
@@ -196,7 +325,7 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
bool do_wake;
size_t depend_size = 0;
- if (flags & 8)
+ if (flags & GOMP_TASK_FLAG_DEPEND)
depend_size = ((uintptr_t) depend[0]
* sizeof (struct gomp_task_depend_entry));
task = gomp_malloc (sizeof (*task) + depend_size
@@ -204,7 +333,7 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
& ~(uintptr_t) (arg_align - 1));
gomp_init_task (task, parent, gomp_icv (false));
- task->kind = GOMP_TASK_IFFALSE;
+ task->kind = GOMP_TASK_UNDEFERRED;
task->in_tied_task = parent->in_tied_task;
task->taskgroup = taskgroup;
thr->task = task;
@@ -219,7 +348,7 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
task->kind = GOMP_TASK_WAITING;
task->fn = fn;
task->fn_data = arg;
- task->final_task = (flags & 2) >> 1;
+ task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
gomp_mutex_lock (&team->task_lock);
/* If parallel or taskgroup has been cancelled, don't start new
tasks. */
@@ -236,123 +365,7 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
taskgroup->num_children++;
if (depend_size)
{
- size_t ndepend = (uintptr_t) depend[0];
- size_t nout = (uintptr_t) depend[1];
- size_t i;
- hash_entry_type ent;
-
- task->depend_count = ndepend;
- task->num_dependees = 0;
- if (parent->depend_hash == NULL)
- parent->depend_hash
- = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
- for (i = 0; i < ndepend; i++)
- {
- task->depend[i].addr = depend[2 + i];
- task->depend[i].next = NULL;
- task->depend[i].prev = NULL;
- task->depend[i].task = task;
- task->depend[i].is_in = i >= nout;
- task->depend[i].redundant = false;
- task->depend[i].redundant_out = false;
-
- hash_entry_type *slot
- = htab_find_slot (&parent->depend_hash, &task->depend[i],
- INSERT);
- hash_entry_type out = NULL, last = NULL;
- if (*slot)
- {
- /* If multiple depends on the same task are the
- same, all but the first one are redundant.
- As inout/out come first, if any of them is
- inout/out, it will win, which is the right
- semantics. */
- if ((*slot)->task == task)
- {
- task->depend[i].redundant = true;
- continue;
- }
- for (ent = *slot; ent; ent = ent->next)
- {
- if (ent->redundant_out)
- break;
-
- last = ent;
-
- /* depend(in:...) doesn't depend on earlier
- depend(in:...). */
- if (i >= nout && ent->is_in)
- continue;
-
- if (!ent->is_in)
- out = ent;
-
- struct gomp_task *tsk = ent->task;
- if (tsk->dependers == NULL)
- {
- tsk->dependers
- = gomp_malloc (sizeof (struct gomp_dependers_vec)
- + 6 * sizeof (struct gomp_task *));
- tsk->dependers->n_elem = 1;
- tsk->dependers->allocated = 6;
- tsk->dependers->elem[0] = task;
- task->num_dependees++;
- continue;
- }
- /* We already have some other dependency on tsk
- from earlier depend clause. */
- else if (tsk->dependers->n_elem
- && (tsk->dependers->elem[tsk->dependers->n_elem
- - 1]
- == task))
- continue;
- else if (tsk->dependers->n_elem
- == tsk->dependers->allocated)
- {
- tsk->dependers->allocated
- = tsk->dependers->allocated * 2 + 2;
- tsk->dependers
- = gomp_realloc (tsk->dependers,
- sizeof (struct gomp_dependers_vec)
- + (tsk->dependers->allocated
- * sizeof (struct gomp_task *)));
- }
- tsk->dependers->elem[tsk->dependers->n_elem++] = task;
- task->num_dependees++;
- }
- task->depend[i].next = *slot;
- (*slot)->prev = &task->depend[i];
- }
- *slot = &task->depend[i];
-
- /* There is no need to store more than one depend({,in}out:)
- task per address in the hash table chain for the purpose
- of creation of deferred tasks, because each out
- depends on all earlier outs, thus it is enough to record
- just the last depend({,in}out:). For depend(in:), we need
- to keep all of the previous ones not terminated yet, because
- a later depend({,in}out:) might need to depend on all of
- them. So, if the new task's clause is depend({,in}out:),
- we know there is at most one other depend({,in}out:) clause
- in the list (out). For non-deferred tasks we want to see
- all outs, so they are moved to the end of the chain,
- after first redundant_out entry all following entries
- should be redundant_out. */
- if (!task->depend[i].is_in && out)
- {
- if (out != last)
- {
- out->next->prev = out->prev;
- out->prev->next = out->next;
- out->next = last->next;
- out->prev = last;
- last->next = out;
- if (out->next)
- out->next->prev = out;
- }
- out->redundant_out = true;
- }
- }
+ gomp_task_handle_depend (task, parent, depend);
if (task->num_dependees)
{
gomp_mutex_unlock (&team->task_lock);
@@ -374,6 +387,7 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
parent->children = task;
if (taskgroup)
{
+ /* If applicable, place task into its taskgroup. */
if (taskgroup->children)
{
task->next_taskgroup = taskgroup->children;
@@ -412,26 +426,340 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
}
}
+ialias (GOMP_taskgroup_start)
+ialias (GOMP_taskgroup_end)
+
+#define TYPE long
+#define UTYPE unsigned long
+#define TYPE_is_long 1
+#include "taskloop.c"
+#undef TYPE
+#undef UTYPE
+#undef TYPE_is_long
+
+#define TYPE unsigned long long
+#define UTYPE TYPE
+#define GOMP_taskloop GOMP_taskloop_ull
+#include "taskloop.c"
+#undef TYPE
+#undef UTYPE
+#undef GOMP_taskloop
+
+/* Called for nowait target tasks. */
+
+void
+gomp_create_target_task (struct gomp_device_descr *devicep,
+ void (*fn) (void *), size_t mapnum, void **hostaddrs,
+ size_t *sizes, unsigned short *kinds,
+ unsigned int flags, void **depend)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+
+ /* If parallel or taskgroup has been cancelled, don't start new tasks. */
+ if (team
+ && (gomp_team_barrier_cancelled (&team->barrier)
+ || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
+ return;
+
+ struct gomp_target_task *ttask;
+ struct gomp_task *task;
+ struct gomp_task *parent = thr->task;
+ struct gomp_taskgroup *taskgroup = parent->taskgroup;
+ bool do_wake;
+ size_t depend_size = 0;
+
+ if (depend != NULL)
+ depend_size = ((uintptr_t) depend[0]
+ * sizeof (struct gomp_task_depend_entry));
+ task = gomp_malloc (sizeof (*task) + depend_size
+ + sizeof (*ttask)
+ + mapnum * (sizeof (void *) + sizeof (size_t)
+ + sizeof (unsigned short)));
+ gomp_init_task (task, parent, gomp_icv (false));
+ task->kind = GOMP_TASK_WAITING;
+ task->in_tied_task = parent->in_tied_task;
+ task->taskgroup = taskgroup;
+ ttask = (struct gomp_target_task *) &task->depend[(uintptr_t) depend[0]];
+ ttask->devicep = devicep;
+ ttask->fn = fn;
+ ttask->mapnum = mapnum;
+ memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
+ ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
+ memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
+ ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
+ memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
+ ttask->flags = flags;
+ task->fn = gomp_target_task_fn;
+ task->fn_data = ttask;
+ task->final_task = 0;
+ gomp_mutex_lock (&team->task_lock);
+ /* If parallel or taskgroup has been cancelled, don't start new tasks. */
+ if (__builtin_expect (gomp_team_barrier_cancelled (&team->barrier)
+ || (taskgroup && taskgroup->cancelled), 0))
+ {
+ gomp_mutex_unlock (&team->task_lock);
+ gomp_finish_task (task);
+ free (task);
+ return;
+ }
+ if (taskgroup)
+ taskgroup->num_children++;
+ if (depend_size)
+ {
+ gomp_task_handle_depend (task, parent, depend);
+ if (task->num_dependees)
+ {
+ gomp_mutex_unlock (&team->task_lock);
+ return;
+ }
+ }
+ if (parent->children)
+ {
+ task->next_child = parent->children;
+ task->prev_child = parent->children->prev_child;
+ task->next_child->prev_child = task;
+ task->prev_child->next_child = task;
+ }
+ else
+ {
+ task->next_child = task;
+ task->prev_child = task;
+ }
+ parent->children = task;
+ if (taskgroup)
+ {
+ /* If applicable, place task into its taskgroup. */
+ if (taskgroup->children)
+ {
+ task->next_taskgroup = taskgroup->children;
+ task->prev_taskgroup = taskgroup->children->prev_taskgroup;
+ task->next_taskgroup->prev_taskgroup = task;
+ task->prev_taskgroup->next_taskgroup = task;
+ }
+ else
+ {
+ task->next_taskgroup = task;
+ task->prev_taskgroup = task;
+ }
+ taskgroup->children = task;
+ }
+ if (team->task_queue)
+ {
+ task->next_queue = team->task_queue;
+ task->prev_queue = team->task_queue->prev_queue;
+ task->next_queue->prev_queue = task;
+ task->prev_queue->next_queue = task;
+ }
+ else
+ {
+ task->next_queue = task;
+ task->prev_queue = task;
+ team->task_queue = task;
+ }
+ ++team->task_count;
+ ++team->task_queued_count;
+ gomp_team_barrier_set_task_pending (&team->barrier);
+ do_wake = team->task_running_count + !parent->in_tied_task
+ < team->nthreads;
+ gomp_mutex_unlock (&team->task_lock);
+ if (do_wake)
+ gomp_team_barrier_wake (&team->barrier, 1);
+}
+
+#if _LIBGOMP_CHECKING
+/* Sanity check TASK to make sure it is in its parent's children
+ queue, and that the tasks therein are in the right order.
+
+ The expected order is:
+ parent_depends_on WAITING tasks
+ !parent_depends_on WAITING tasks
+ TIED tasks
+
+ PARENT is the alleged parent of TASK. */
+
+static void
+verify_children_queue (struct gomp_task *task, struct gomp_task *parent)
+{
+ if (task->parent != parent)
+ gomp_fatal ("verify_children_queue: incompatible parents");
+ /* It's OK, Annie was an orphan and she turned out all right. */
+ if (!parent)
+ return;
+
+ bool seen_tied = false;
+ bool seen_plain_waiting = false;
+ bool found = false;
+ struct gomp_task *t = parent->children;
+ while (1)
+ {
+ if (t == task)
+ found = true;
+ if (seen_tied && t->kind == GOMP_TASK_WAITING)
+ gomp_fatal ("verify_children_queue: WAITING task after TIED");
+ if (t->kind == GOMP_TASK_TIED)
+ seen_tied = true;
+ else if (t->kind == GOMP_TASK_WAITING)
+ {
+ if (t->parent_depends_on)
+ {
+ if (seen_plain_waiting)
+ gomp_fatal ("verify_children_queue: parent_depends_on after "
+ "!parent_depends_on");
+ }
+ else
+ seen_plain_waiting = true;
+ }
+ t = t->next_child;
+ if (t == parent->children)
+ break;
+ }
+ if (!found)
+ gomp_fatal ("verify_children_queue: child not found in parent queue");
+}
+
+/* Sanity check TASK to make sure it is in its taskgroup queue (if
+ applicable), and that the tasks therein are in the right order.
+
+ The expected order is that GOMP_TASK_WAITING tasks must come before
+ GOMP_TASK_TIED tasks.
+
+ TASK is the task. */
+
+static void
+verify_taskgroup_queue (struct gomp_task *task)
+{
+ struct gomp_taskgroup *taskgroup = task->taskgroup;
+ if (!taskgroup)
+ return;
+
+ bool seen_tied = false;
+ bool found = false;
+ struct gomp_task *t = taskgroup->children;
+ while (1)
+ {
+ if (t == task)
+ found = true;
+ if (t->kind == GOMP_TASK_WAITING && seen_tied)
+ gomp_fatal ("verify_taskgroup_queue: WAITING task after TIED");
+ if (t->kind == GOMP_TASK_TIED)
+ seen_tied = true;
+ t = t->next_taskgroup;
+ if (t == taskgroup->children)
+ break;
+ }
+ if (!found)
+ gomp_fatal ("verify_taskgroup_queue: child not found in parent queue");
+}
+
+/* Verify that TASK is in the team's task queue. */
+
+static void
+verify_task_queue (struct gomp_task *task, struct gomp_team *team)
+{
+ struct gomp_task *t = team->task_queue;
+ if (team)
+ while (1)
+ {
+ if (t == task)
+ return;
+ t = t->next_queue;
+ if (t == team->task_queue)
+ break;
+ }
+ gomp_fatal ("verify_team_queue: child not in team");
+}
+#endif
+
static inline bool
gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
- struct gomp_taskgroup *taskgroup, struct gomp_team *team)
+ struct gomp_team *team)
{
+#if _LIBGOMP_CHECKING
+ verify_children_queue (child_task, parent);
+ verify_taskgroup_queue (child_task);
+ verify_task_queue (child_task, team);
+#endif
+
if (parent)
{
+ /* Adjust children such that it will point to a next child,
+ while the current one is scheduled to be executed. This way,
+ GOMP_taskwait (and others) can schedule a next task while
+ waiting.
+
+ Do not remove it entirely from the circular list, as it is
+ still a child, though not one we should consider first (say
+ by GOMP_taskwait). */
if (parent->children == child_task)
parent->children = child_task->next_child;
+ /* TIED tasks cannot come before WAITING tasks. If we're about
+ to make this task TIED, rewire things appropriately.
+ However, a TIED task at the end is perfectly fine. */
+ else if (child_task->next_child->kind == GOMP_TASK_WAITING
+ && child_task->next_child != parent->children)
+ {
+ /* Remove from the list. */
+ child_task->prev_child->next_child = child_task->next_child;
+ child_task->next_child->prev_child = child_task->prev_child;
+ /* Rewire at the end of its siblings. */
+ child_task->next_child = parent->children;
+ child_task->prev_child = parent->children->prev_child;
+ parent->children->prev_child->next_child = child_task;
+ parent->children->prev_child = child_task;
+ }
+
+ /* If the current task (child_task) is at the top of the
+ parent's last_parent_depends_on, it's about to be removed
+ from it. Adjust last_parent_depends_on appropriately. */
if (__builtin_expect (child_task->parent_depends_on, 0)
&& parent->taskwait->last_parent_depends_on == child_task)
{
+ /* The last_parent_depends_on list was built with all
+ parent_depends_on entries linked to the prev_child. Grab
+ the next last_parent_depends_on head from this prev_child if
+ available... */
if (child_task->prev_child->kind == GOMP_TASK_WAITING
&& child_task->prev_child->parent_depends_on)
parent->taskwait->last_parent_depends_on = child_task->prev_child;
else
- parent->taskwait->last_parent_depends_on = NULL;
+ {
+ /* ...otherwise, there are no more parent_depends_on
+ entries waiting to run. In which case, clear the
+ list. */
+ parent->taskwait->last_parent_depends_on = NULL;
+ }
}
}
- if (taskgroup && taskgroup->children == child_task)
- taskgroup->children = child_task->next_taskgroup;
+
+ /* Adjust taskgroup to point to the next taskgroup. See note above
+ regarding adjustment of children as to why the child_task is not
+ removed entirely from the circular list. */
+ struct gomp_taskgroup *taskgroup = child_task->taskgroup;
+ if (taskgroup)
+ {
+ if (taskgroup->children == child_task)
+ taskgroup->children = child_task->next_taskgroup;
+ /* TIED tasks cannot come before WAITING tasks. If we're about
+ to make this task TIED, rewire things appropriately.
+ However, a TIED task at the end is perfectly fine. */
+ else if (child_task->next_taskgroup->kind == GOMP_TASK_WAITING
+ && child_task->next_taskgroup != taskgroup->children)
+ {
+ /* Remove from the list. */
+ child_task->prev_taskgroup->next_taskgroup
+ = child_task->next_taskgroup;
+ child_task->next_taskgroup->prev_taskgroup
+ = child_task->prev_taskgroup;
+ /* Rewire at the end of its taskgroup. */
+ child_task->next_taskgroup = taskgroup->children;
+ child_task->prev_taskgroup = taskgroup->children->prev_taskgroup;
+ taskgroup->children->prev_taskgroup->next_taskgroup = child_task;
+ taskgroup->children->prev_taskgroup = child_task;
+ }
+ }
+
+ /* Remove child_task from the task_queue. */
child_task->prev_queue->next_queue = child_task->next_queue;
child_task->next_queue->prev_queue = child_task->prev_queue;
if (team->task_queue == child_task)
@@ -442,6 +770,7 @@ gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
team->task_queue = NULL;
}
child_task->kind = GOMP_TASK_TIED;
+
if (--team->task_queued_count == 0)
gomp_team_barrier_clear_task_pending (&team->barrier);
if ((gomp_team_barrier_cancelled (&team->barrier)
@@ -479,6 +808,11 @@ gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
}
}
+/* After CHILD_TASK has been run, adjust the various task queues to
+ give higher priority to the tasks that depend on CHILD_TASK.
+
+ TEAM is the team to which CHILD_TASK belongs to. */
+
static size_t
gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
struct gomp_team *team)
@@ -502,6 +836,7 @@ gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
if (parent->taskwait && parent->taskwait->last_parent_depends_on
&& !task->parent_depends_on)
{
+ /* Put depender in last_parent_depends_on. */
struct gomp_task *last_parent_depends_on
= parent->taskwait->last_parent_depends_on;
task->next_child = last_parent_depends_on->next_child;
@@ -509,6 +844,8 @@ gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
}
else
{
+ /* Make depender a sibling of child_task, and place
+ it at the top of said sibling list. */
task->next_child = parent->children;
task->prev_child = parent->children->prev_child;
parent->children = task;
@@ -518,6 +855,7 @@ gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
}
else
{
+ /* Make depender a sibling of child_task. */
task->next_child = task;
task->prev_child = task;
parent->children = task;
@@ -539,6 +877,8 @@ gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
parent->taskwait->last_parent_depends_on = task;
}
}
+ /* If depender is in a taskgroup, put it at the TOP of its
+ taskgroup. */
if (taskgroup)
{
if (taskgroup->children)
@@ -560,6 +900,8 @@ gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
gomp_sem_post (&taskgroup->taskgroup_sem);
}
}
+ /* Put depender of child_task at the END of the team's
+ task_queue. */
if (team->task_queue)
{
task->next_queue = team->task_queue;
@@ -602,12 +944,18 @@ gomp_task_run_post_handle_depend (struct gomp_task *child_task,
return gomp_task_run_post_handle_dependers (child_task, team);
}
+/* Remove CHILD_TASK from its parent. */
+
static inline void
gomp_task_run_post_remove_parent (struct gomp_task *child_task)
{
struct gomp_task *parent = child_task->parent;
if (parent == NULL)
return;
+
+ /* If this was the last task the parent was depending on,
+ synchronize with gomp_task_maybe_wait_for_dependencies so it can
+ clean up and return. */
if (__builtin_expect (child_task->parent_depends_on, 0)
&& --parent->taskwait->n_depend == 0
&& parent->taskwait->in_depend_wait)
@@ -615,6 +963,8 @@ gomp_task_run_post_remove_parent (struct gomp_task *child_task)
parent->taskwait->in_depend_wait = false;
gomp_sem_post (&parent->taskwait->taskwait_sem);
}
+
+ /* Remove CHILD_TASK from its sibling list. */
child_task->prev_child->next_child = child_task->next_child;
child_task->next_child->prev_child = child_task->prev_child;
if (parent->children != child_task)
@@ -637,6 +987,8 @@ gomp_task_run_post_remove_parent (struct gomp_task *child_task)
}
}
+/* Remove CHILD_TASK from its taskgroup. */
+
static inline void
gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
{
@@ -701,7 +1053,7 @@ gomp_barrier_handle_tasks (gomp_barrier_state_t state)
{
child_task = team->task_queue;
cancelled = gomp_task_run_pre (child_task, child_task->parent,
- child_task->taskgroup, team);
+ team);
if (__builtin_expect (cancelled, 0))
{
if (to_free)
@@ -766,7 +1118,9 @@ gomp_barrier_handle_tasks (gomp_barrier_state_t state)
}
}
-/* Called when encountering a taskwait directive. */
+/* Called when encountering a taskwait directive.
+
+ Wait for all children of the current task. */
void
GOMP_taskwait (void)
@@ -812,8 +1166,7 @@ GOMP_taskwait (void)
{
child_task = task->children;
cancelled
- = gomp_task_run_pre (child_task, task, child_task->taskgroup,
- team);
+ = gomp_task_run_pre (child_task, task, team);
if (__builtin_expect (cancelled, 0))
{
if (to_free)
@@ -863,6 +1216,9 @@ GOMP_taskwait (void)
finish_cancelled:;
size_t new_tasks
= gomp_task_run_post_handle_depend (child_task, team);
+
+ /* Remove child_task from children list, and set up the next
+ sibling to be run. */
child_task->prev_child->next_child = child_task->next_child;
child_task->next_child->prev_child = child_task->prev_child;
if (task->children == child_task)
@@ -872,8 +1228,12 @@ GOMP_taskwait (void)
else
task->children = NULL;
}
+ /* Orphan all the children of CHILD_TASK. */
gomp_clear_parent (child_task->children);
+
+ /* Remove CHILD_TASK from its taskgroup. */
gomp_task_run_post_remove_taskgroup (child_task);
+
to_free = child_task;
child_task = NULL;
team->task_count--;
@@ -889,9 +1249,11 @@ GOMP_taskwait (void)
}
/* This is like GOMP_taskwait, but we only wait for tasks that the
- upcoming task depends on. */
+ upcoming task depends on.
-static void
+ DEPEND is as in GOMP_task. */
+
+void
gomp_task_maybe_wait_for_dependencies (void **depend)
{
struct gomp_thread *thr = gomp_thread ();
@@ -923,11 +1285,33 @@ gomp_task_maybe_wait_for_dependencies (void **depend)
{
tsk->parent_depends_on = true;
++num_awaited;
+ /* If a task we need to wait for is not already
+ running and is ready to be scheduled, move it to
+ front, so that we run it as soon as possible.
+
+ We rearrange the children queue such that all
+ parent_depends_on tasks are first, and
+ last_parent_depends_on points to the last such task
+ we rearranged. For example, given the following
+ children where PD[123] are the parent_depends_on
+ tasks:
+
+ task->children
+ |
+ V
+ C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4
+
+ We rearrange such that:
+
+ task->children
+ | +--- last_parent_depends_on
+ | |
+ V V
+ PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4
+ */
+
if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
{
- /* If a task we need to wait for is not already
- running and is ready to be scheduled, move it
- to front, so that we run it as soon as possible. */
if (last_parent_depends_on)
{
tsk->prev_child->next_child = tsk->next_child;
@@ -941,8 +1325,8 @@ gomp_task_maybe_wait_for_dependencies (void **depend)
{
tsk->prev_child->next_child = tsk->next_child;
tsk->next_child->prev_child = tsk->prev_child;
- tsk->prev_child = task->children;
- tsk->next_child = task->children->next_child;
+ tsk->prev_child = task->children->prev_child;
+ tsk->next_child = task->children;
task->children = tsk;
tsk->prev_child->next_child = tsk;
tsk->next_child->prev_child = tsk;
@@ -983,8 +1367,7 @@ gomp_task_maybe_wait_for_dependencies (void **depend)
{
child_task = task->children;
cancelled
- = gomp_task_run_pre (child_task, task, child_task->taskgroup,
- team);
+ = gomp_task_run_pre (child_task, task, team);
if (__builtin_expect (cancelled, 0))
{
if (to_free)
@@ -1028,6 +1411,8 @@ gomp_task_maybe_wait_for_dependencies (void **depend)
= gomp_task_run_post_handle_depend (child_task, team);
if (child_task->parent_depends_on)
--taskwait.n_depend;
+
+ /* Remove child_task from sibling list. */
child_task->prev_child->next_child = child_task->next_child;
child_task->next_child->prev_child = child_task->prev_child;
if (task->children == child_task)
@@ -1037,6 +1422,7 @@ gomp_task_maybe_wait_for_dependencies (void **depend)
else
task->children = NULL;
}
+
gomp_clear_parent (child_task->children);
gomp_task_run_post_remove_taskgroup (child_task);
to_free = child_task;
@@ -1070,7 +1456,7 @@ GOMP_taskgroup_start (void)
struct gomp_taskgroup *taskgroup;
/* If team is NULL, all tasks are executed as
- GOMP_TASK_IFFALSE tasks and thus all children tasks of
+ GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
taskgroup and their descendant tasks will be finished
by the time GOMP_taskgroup_end is called. */
if (team == NULL)
@@ -1137,8 +1523,7 @@ GOMP_taskgroup_end (void)
if (child_task->kind == GOMP_TASK_WAITING)
{
cancelled
- = gomp_task_run_pre (child_task, child_task->parent, taskgroup,
- team);
+ = gomp_task_run_pre (child_task, child_task->parent, team);
if (__builtin_expect (cancelled, 0))
{
if (to_free)
diff --git a/libgomp/taskloop.c b/libgomp/taskloop.c
new file mode 100644
index 00000000000..f57a5a16ef2
--- /dev/null
+++ b/libgomp/taskloop.c
@@ -0,0 +1,363 @@
+/* Copyright (C) 2015 Free Software Foundation, Inc.
+ Contributed by Jakub Jelinek <jakub@redhat.com>.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file handles the taskloop construct. It is included twice, once
+ for the long and once for unsigned long long variant. */
+
+/* Called when encountering an explicit task directive. If IF_CLAUSE is
+ false, then we must not delay in executing the task. If UNTIED is true,
+ then the task may be executed by any member of the team. */
+
+void
+GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
+ long arg_size, long arg_align, unsigned flags,
+ unsigned long num_tasks, int priority,
+ TYPE start, TYPE end, TYPE step)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+
+#ifdef HAVE_BROKEN_POSIX_SEMAPHORES
+ /* If pthread_mutex_* is used for omp_*lock*, then each task must be
+ tied to one thread all the time. This means UNTIED tasks must be
+ tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
+ might be running on different thread than FN. */
+ if (cpyfn)
+ flags &= ~GOMP_TASK_FLAG_IF;
+ flags &= ~GOMP_TASK_FLAG_UNTIED;
+#endif
+
+ /* If parallel or taskgroup has been cancelled, don't start new tasks. */
+ if (team && gomp_team_barrier_cancelled (&team->barrier))
+ return;
+
+#ifdef TYPE_is_long
+ TYPE s = step;
+ if (step > 0)
+ {
+ if (start >= end)
+ return;
+ s--;
+ }
+ else
+ {
+ if (start <= end)
+ return;
+ s++;
+ }
+ UTYPE n = (end - start + s) / step;
+#else
+ UTYPE n;
+ if (flags & GOMP_TASK_FLAG_UP)
+ {
+ if (start >= end)
+ return;
+ n = (end - start + step - 1) / step;
+ }
+ else
+ {
+ if (start <= end)
+ return;
+ n = (start - end - step - 1) / -step;
+ }
+#endif
+
+ TYPE task_step = step;
+ unsigned long nfirst = n;
+ if (flags & GOMP_TASK_FLAG_GRAINSIZE)
+ {
+ unsigned long grainsize = num_tasks;
+#ifdef TYPE_is_long
+ num_tasks = n / grainsize;
+#else
+ UTYPE ndiv = n / grainsize;
+ num_tasks = ndiv;
+ if (num_tasks != ndiv)
+ num_tasks = ~0UL;
+#endif
+ if (num_tasks <= 1)
+ {
+ num_tasks = 1;
+ task_step = end - start;
+ }
+ else if (num_tasks >= grainsize
+#ifndef TYPE_is_long
+ && num_tasks != ~0UL
+#endif
+ )
+ {
+ UTYPE mul = num_tasks * grainsize;
+ task_step = (TYPE) grainsize * step;
+ if (mul != n)
+ {
+ task_step += step;
+ nfirst = n - mul - 1;
+ }
+ }
+ else
+ {
+ UTYPE div = n / num_tasks;
+ UTYPE mod = n % num_tasks;
+ task_step = (TYPE) div * step;
+ if (mod)
+ {
+ task_step += step;
+ nfirst = mod - 1;
+ }
+ }
+ }
+ else
+ {
+ if (num_tasks == 0)
+ num_tasks = team ? team->nthreads : 1;
+ if (num_tasks >= n)
+ num_tasks = n;
+ else
+ {
+ UTYPE div = n / num_tasks;
+ UTYPE mod = n % num_tasks;
+ task_step = (TYPE) div * step;
+ if (mod)
+ {
+ task_step += step;
+ nfirst = mod - 1;
+ }
+ }
+ }
+
+ if (flags & GOMP_TASK_FLAG_NOGROUP)
+ {
+ if (thr->task && thr->task->taskgroup && thr->task->taskgroup->cancelled)
+ return;
+ }
+ else
+ ialias_call (GOMP_taskgroup_start) ();
+
+ /* FIXME, use priority. */
+ (void) priority;
+
+ if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL
+ || (thr->task && thr->task->final_task)
+ || team->task_count + num_tasks > 64 * team->nthreads)
+ {
+ unsigned long i;
+ if (__builtin_expect (cpyfn != NULL, 0))
+ {
+ struct gomp_task task[num_tasks];
+ struct gomp_task *parent = thr->task;
+ arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1);
+ char buf[num_tasks * arg_size + arg_align - 1];
+ char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
+ & ~(uintptr_t) (arg_align - 1));
+ char *orig_arg = arg;
+ for (i = 0; i < num_tasks; i++)
+ {
+ gomp_init_task (&task[i], parent, gomp_icv (false));
+ task[i].kind = GOMP_TASK_UNDEFERRED;
+ task[i].final_task = (thr->task && thr->task->final_task)
+ || (flags & GOMP_TASK_FLAG_FINAL);
+ if (thr->task)
+ {
+ task[i].in_tied_task = thr->task->in_tied_task;
+ task[i].taskgroup = thr->task->taskgroup;
+ }
+ thr->task = &task[i];
+ cpyfn (arg, data);
+ arg += arg_size;
+ }
+ arg = orig_arg;
+ for (i = 0; i < num_tasks; i++)
+ {
+ thr->task = &task[i];
+ ((TYPE *)arg)[0] = start;
+ start += task_step;
+ ((TYPE *)arg)[1] = start;
+ if (i == nfirst)
+ task_step -= step;
+ fn (arg);
+ arg += arg_size;
+ if (task[i].children != NULL)
+ {
+ gomp_mutex_lock (&team->task_lock);
+ gomp_clear_parent (task[i].children);
+ gomp_mutex_unlock (&team->task_lock);
+ }
+ gomp_end_task ();
+ }
+ }
+ else
+ for (i = 0; i < num_tasks; i++)
+ {
+ struct gomp_task task;
+
+ gomp_init_task (&task, thr->task, gomp_icv (false));
+ task.kind = GOMP_TASK_UNDEFERRED;
+ task.final_task = (thr->task && thr->task->final_task)
+ || (flags & GOMP_TASK_FLAG_FINAL);
+ if (thr->task)
+ {
+ task.in_tied_task = thr->task->in_tied_task;
+ task.taskgroup = thr->task->taskgroup;
+ }
+ thr->task = &task;
+ ((TYPE *)data)[0] = start;
+ start += task_step;
+ ((TYPE *)data)[1] = start;
+ if (i == nfirst)
+ task_step -= step;
+ fn (data);
+ if (task.children != NULL)
+ {
+ gomp_mutex_lock (&team->task_lock);
+ gomp_clear_parent (task.children);
+ gomp_mutex_unlock (&team->task_lock);
+ }
+ gomp_end_task ();
+ }
+ }
+ else
+ {
+ struct gomp_task *tasks[num_tasks];
+ struct gomp_task *parent = thr->task;
+ struct gomp_taskgroup *taskgroup = parent->taskgroup;
+ char *arg;
+ int do_wake;
+ unsigned long i;
+
+ for (i = 0; i < num_tasks; i++)
+ {
+ struct gomp_task *task
+ = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1);
+ tasks[i] = task;
+ arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1)
+ & ~(uintptr_t) (arg_align - 1));
+ gomp_init_task (task, parent, gomp_icv (false));
+ task->kind = GOMP_TASK_UNDEFERRED;
+ task->in_tied_task = parent->in_tied_task;
+ task->taskgroup = taskgroup;
+ thr->task = task;
+ if (cpyfn)
+ {
+ cpyfn (arg, data);
+ task->copy_ctors_done = true;
+ }
+ else
+ memcpy (arg, data, arg_size);
+ ((TYPE *)arg)[0] = start;
+ start += task_step;
+ ((TYPE *)arg)[1] = start;
+ if (i == nfirst)
+ task_step -= step;
+ thr->task = parent;
+ task->kind = GOMP_TASK_WAITING;
+ task->fn = fn;
+ task->fn_data = arg;
+ task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
+ }
+ gomp_mutex_lock (&team->task_lock);
+ /* If parallel or taskgroup has been cancelled, don't start new
+ tasks. */
+ if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
+ || (taskgroup && taskgroup->cancelled))
+ && cpyfn == NULL, 0))
+ {
+ gomp_mutex_unlock (&team->task_lock);
+ for (i = 0; i < num_tasks; i++)
+ {
+ gomp_finish_task (tasks[i]);
+ free (tasks[i]);
+ }
+ if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
+ ialias_call (GOMP_taskgroup_end) ();
+ return;
+ }
+ if (taskgroup)
+ taskgroup->num_children += num_tasks;
+ for (i = 0; i < num_tasks; i++)
+ {
+ struct gomp_task *task = tasks[i];
+ if (parent->children)
+ {
+ task->next_child = parent->children;
+ task->prev_child = parent->children->prev_child;
+ task->next_child->prev_child = task;
+ task->prev_child->next_child = task;
+ }
+ else
+ {
+ task->next_child = task;
+ task->prev_child = task;
+ }
+ parent->children = task;
+ if (taskgroup)
+ {
+ if (taskgroup->children)
+ {
+ task->next_taskgroup = taskgroup->children;
+ task->prev_taskgroup = taskgroup->children->prev_taskgroup;
+ task->next_taskgroup->prev_taskgroup = task;
+ task->prev_taskgroup->next_taskgroup = task;
+ }
+ else
+ {
+ task->next_taskgroup = task;
+ task->prev_taskgroup = task;
+ }
+ taskgroup->children = task;
+ }
+ if (team->task_queue)
+ {
+ task->next_queue = team->task_queue;
+ task->prev_queue = team->task_queue->prev_queue;
+ task->next_queue->prev_queue = task;
+ task->prev_queue->next_queue = task;
+ }
+ else
+ {
+ task->next_queue = task;
+ task->prev_queue = task;
+ team->task_queue = task;
+ }
+ ++team->task_count;
+ ++team->task_queued_count;
+ }
+ gomp_team_barrier_set_task_pending (&team->barrier);
+ if (team->task_running_count + !parent->in_tied_task
+ < team->nthreads)
+ {
+ do_wake = team->nthreads - team->task_running_count
+ - !parent->in_tied_task;
+ if ((unsigned long) do_wake > num_tasks)
+ do_wake = num_tasks;
+ }
+ else
+ do_wake = 0;
+ gomp_mutex_unlock (&team->task_lock);
+ if (do_wake)
+ gomp_team_barrier_wake (&team->barrier, do_wake);
+ }
+ if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
+ ialias_call (GOMP_taskgroup_end) ();
+}
diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp
index 1040c29e0eb..6dc1e8ef3ca 100644
--- a/libgomp/testsuite/lib/libgomp.exp
+++ b/libgomp/testsuite/lib/libgomp.exp
@@ -321,6 +321,19 @@ proc check_effective_target_offload_device { } {
} ]
}
+# Return 1 if offload device is available and it has non-shared address space.
+proc check_effective_target_offload_device_nonshared_as { } {
+ return [check_runtime_nocache offload_device_nonshared_as {
+ int main ()
+ {
+ int a = 8;
+ #pragma omp target map(to: a)
+ a++;
+ return a != 8;
+ }
+ } ]
+}
+
# Return 1 if at least one nvidia board is present.
proc check_effective_target_openacc_nvidia_accel_present { } {
diff --git a/libgomp/testsuite/libgomp.c++/ctor-13.C b/libgomp/testsuite/libgomp.c++/ctor-13.C
new file mode 100644
index 00000000000..8c7a09f315d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/ctor-13.C
@@ -0,0 +1,242 @@
+// { dg-do run }
+
+#include <omp.h>
+#include <assert.h>
+
+struct B
+{
+ static int ic, dc, xc, ac, cc;
+
+ B();
+ B(const B &);
+ ~B();
+ B& operator=(const B &);
+ void doit();
+ static void clear();
+};
+
+int B::ic;
+int B::dc;
+int B::xc;
+int B::cc;
+int B::ac;
+
+B::B()
+{
+ #pragma omp atomic
+ ic++;
+}
+
+B::~B()
+{
+ #pragma omp atomic
+ dc++;
+}
+
+B::B(const B &)
+{
+ #pragma omp atomic
+ cc++;
+}
+
+B& B::operator=(const B &)
+{
+ #pragma omp atomic
+ ac++;
+ return *this;
+}
+
+void B::doit()
+{
+ #pragma omp atomic
+ xc++;
+}
+
+void B::clear()
+{
+ ic = 0;
+ dc = 0;
+ cc = 0;
+ ac = 0;
+ xc = 0;
+}
+
+static int n;
+
+void f1(B &a)
+{
+ B b;
+ B &c = b;
+ #pragma omp parallel default(none) private(a, c) shared (n)
+ {
+ #pragma omp master
+ n = omp_get_num_threads ();
+ a.doit();
+ c.doit();
+ }
+}
+
+void f2(B &a)
+{
+ B b;
+ B &c = b;
+ #pragma omp parallel default(none) firstprivate(a, c) shared(n)
+ {
+ #pragma omp master
+ n = omp_get_num_threads ();
+ a.doit();
+ c.doit();
+ }
+}
+
+void f3(B &a)
+{
+ B b;
+ B &c = b;
+ #pragma omp parallel default(none) shared(n, a, c)
+ {
+ #pragma omp master
+ n = omp_get_num_threads ();
+ #pragma omp for lastprivate (a, c)
+ for (int i = 0; i < omp_get_num_threads (); i++)
+ {
+ a.doit();
+ c.doit();
+ }
+ }
+}
+
+void f4()
+{
+ B b;
+ B &c = b;
+ #pragma omp parallel default(none) private (c) shared (n)
+ {
+ B d;
+ B &e = d;
+ #pragma omp single copyprivate (c, e)
+ {
+ c.doit();
+ e.doit();
+ }
+ c.doit();
+ e.doit();
+ }
+}
+
+void f5(B (&a)[2])
+{
+ B b[2];
+ B (&c)[2] = b;
+ #pragma omp parallel default(none) private(a, c) shared (n)
+ {
+ #pragma omp master
+ n = omp_get_num_threads ();
+ a[0].doit();
+ a[1].doit();
+ c[0].doit();
+ c[1].doit();
+ }
+}
+
+void f6(B (&a)[2])
+{
+ B b[2];
+ B (&c)[2] = b;
+ #pragma omp parallel default(none) firstprivate(a, c) shared (n)
+ {
+ #pragma omp master
+ n = omp_get_num_threads ();
+ a[0].doit();
+ a[1].doit();
+ c[0].doit();
+ c[1].doit();
+ }
+}
+
+void f7(B (&a)[2])
+{
+ B b[2];
+ B (&c)[2] = b;
+ #pragma omp parallel default(none) shared(n, a, c)
+ {
+ #pragma omp master
+ n = omp_get_num_threads ();
+ #pragma omp for lastprivate (a, c)
+ for (int i = 0; i < omp_get_num_threads (); i++)
+ {
+ a[0].doit();
+ a[1].doit();
+ c[0].doit();
+ c[1].doit();
+ }
+ }
+}
+
+void f8()
+{
+ B b[2];
+ B (&c)[2] = b;
+ #pragma omp parallel default(none) private (c) shared (n)
+ {
+ B d[2];
+ B (&e)[2] = d;
+ #pragma omp single copyprivate (c, e)
+ {
+ c[0].doit();
+ c[1].doit();
+ e[0].doit();
+ e[1].doit();
+ }
+ c[0].doit();
+ c[1].doit();
+ e[0].doit();
+ e[1].doit();
+ }
+}
+
+int main()
+{
+ {
+ B a;
+ f1(a);
+ }
+ assert (B::xc == 2*n && B::ic == 2*n+2 && B::dc == 2*n+2 && B::ac == 0 && B::cc == 0);
+ B::clear();
+ {
+ B a;
+ f2(a);
+ }
+ assert (B::xc == 2*n && B::ic == 2 && B::dc == 2*n+2 && B::ac == 0 && B::cc == 2*n);
+ B::clear();
+ {
+ B a;
+ f3(a);
+ }
+ assert (B::xc == 2*n && B::ic == 2*n+2 && B::dc == 2*n+2 && B::ac == 2 && B::cc == 0);
+ B::clear();
+ f4();
+ assert (B::xc == 2*n+2 && B::ic == 2*n+1 && B::dc == 2*n+1 && B::ac == 2*n-2 && B::cc == 0);
+ B::clear();
+ {
+ B a[2];
+ f5(a);
+ }
+ assert (B::xc == 4*n && B::ic == 4*n+4 && B::dc == 4*n+4 && B::ac == 0 && B::cc == 0);
+ B::clear();
+ {
+ B a[2];
+ f6(a);
+ }
+ assert (B::xc == 4*n && B::ic == 4 && B::dc == 4*n+4 && B::ac == 0 && B::cc == 4*n);
+ B::clear();
+ {
+ B a[2];
+ f7(a);
+ }
+ assert (B::xc == 4*n && B::ic == 4*n+4 && B::dc == 4*n+4 && B::ac == 4 && B::cc == 0);
+ B::clear();
+ f8();
+ assert (B::xc == 4*n+4 && B::ic == 4*n+2 && B::dc == 4*n+2 && B::ac == 4*n-4 && B::cc == 0);
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/doacross-1.C b/libgomp/testsuite/libgomp.c++/doacross-1.C
new file mode 100644
index 00000000000..bc53ee6e8a2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/doacross-1.C
@@ -0,0 +1,294 @@
+// { dg-do run }
+
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+extern "C" void abort ();
+
+template <typename T>
+class I
+{
+public:
+ typedef ptrdiff_t difference_type;
+ I ();
+ ~I ();
+ I (T *);
+ I (const I &);
+ T &operator * ();
+ T *operator -> ();
+ T &operator [] (const difference_type &) const;
+ I &operator = (const I &);
+ I &operator ++ ();
+ I operator ++ (int);
+ I &operator -- ();
+ I operator -- (int);
+ I &operator += (const difference_type &);
+ I &operator -= (const difference_type &);
+ I operator + (const difference_type &) const;
+ I operator - (const difference_type &) const;
+ template <typename S> friend bool operator == (I<S> &, I<S> &);
+ template <typename S> friend bool operator == (const I<S> &, const I<S> &);
+ template <typename S> friend bool operator < (I<S> &, I<S> &);
+ template <typename S> friend bool operator < (const I<S> &, const I<S> &);
+ template <typename S> friend bool operator <= (I<S> &, I<S> &);
+ template <typename S> friend bool operator <= (const I<S> &, const I<S> &);
+ template <typename S> friend bool operator > (I<S> &, I<S> &);
+ template <typename S> friend bool operator > (const I<S> &, const I<S> &);
+ template <typename S> friend bool operator >= (I<S> &, I<S> &);
+ template <typename S> friend bool operator >= (const I<S> &, const I<S> &);
+ template <typename S> friend typename I<S>::difference_type operator - (I<S> &, I<S> &);
+ template <typename S> friend typename I<S>::difference_type operator - (const I<S> &, const I<S> &);
+ template <typename S> friend I<S> operator + (typename I<S>::difference_type , const I<S> &);
+private:
+ T *p;
+};
+template <typename T> I<T>::I () : p (0) {}
+template <typename T> I<T>::~I () {}
+template <typename T> I<T>::I (T *x) : p (x) {}
+template <typename T> I<T>::I (const I &x) : p (x.p) {}
+template <typename T> T &I<T>::operator * () { return *p; }
+template <typename T> T *I<T>::operator -> () { return p; }
+template <typename T> T &I<T>::operator [] (const difference_type &x) const { return p[x]; }
+template <typename T> I<T> &I<T>::operator = (const I &x) { p = x.p; return *this; }
+template <typename T> I<T> &I<T>::operator ++ () { ++p; return *this; }
+template <typename T> I<T> I<T>::operator ++ (int) { return I (p++); }
+template <typename T> I<T> &I<T>::operator -- () { --p; return *this; }
+template <typename T> I<T> I<T>::operator -- (int) { return I (p--); }
+template <typename T> I<T> &I<T>::operator += (const difference_type &x) { p += x; return *this; }
+template <typename T> I<T> &I<T>::operator -= (const difference_type &x) { p -= x; return *this; }
+template <typename T> I<T> I<T>::operator + (const difference_type &x) const { return I (p + x); }
+template <typename T> I<T> I<T>::operator - (const difference_type &x) const { return I (p - x); }
+template <typename T> bool operator == (I<T> &x, I<T> &y) { return x.p == y.p; }
+template <typename T> bool operator == (const I<T> &x, const I<T> &y) { return x.p == y.p; }
+template <typename T> bool operator != (I<T> &x, I<T> &y) { return !(x == y); }
+template <typename T> bool operator != (const I<T> &x, const I<T> &y) { return !(x == y); }
+template <typename T> bool operator < (I<T> &x, I<T> &y) { return x.p < y.p; }
+template <typename T> bool operator < (const I<T> &x, const I<T> &y) { return x.p < y.p; }
+template <typename T> bool operator <= (I<T> &x, I<T> &y) { return x.p <= y.p; }
+template <typename T> bool operator <= (const I<T> &x, const I<T> &y) { return x.p <= y.p; }
+template <typename T> bool operator > (I<T> &x, I<T> &y) { return x.p > y.p; }
+template <typename T> bool operator > (const I<T> &x, const I<T> &y) { return x.p > y.p; }
+template <typename T> bool operator >= (I<T> &x, I<T> &y) { return x.p >= y.p; }
+template <typename T> bool operator >= (const I<T> &x, const I<T> &y) { return x.p >= y.p; }
+template <typename T> typename I<T>::difference_type operator - (I<T> &x, I<T> &y) { return x.p - y.p; }
+template <typename T> typename I<T>::difference_type operator - (const I<T> &x, const I<T> &y) { return x.p - y.p; }
+template <typename T> I<T> operator + (typename I<T>::difference_type x, const I<T> &y) { return I<T> (x + y.p); }
+
+int results[2048];
+
+template <typename T>
+void
+baz (I<T> &i, I<T> &j, I<T> &k, T &l)
+{
+ if (*i < 0 || *i >= 16)
+ abort ();
+ if (*j < 0 || *j >= 16)
+ abort ();
+ if (*k < 0 || *k >= 16)
+ abort ();
+ if (l < 0 || l >= 16)
+ abort ();
+ #pragma omp atomic
+ results[512 * *i + 64 * *j + 8 * *k + l]++;
+}
+
+template <typename T>
+void
+baz (T &i, T &j, T &k, T &l)
+{
+ if (i < 0 || i >= 16)
+ abort ();
+ if (j < 0 || j >= 16)
+ abort ();
+ if (k < 0 || k >= 16)
+ abort ();
+ if (l < 0 || l >= 16)
+ abort ();
+ #pragma omp atomic
+ results[512 * i + 64 * j + 8 * k + l]++;
+}
+
+void
+f1 (const I<int> &a, const I<int> &b, const I<int> &c, const I<int> &d,
+ const I<int> &e, const I<int> &f, int g, int h,
+ I<int> &r1, I<int> &r2, I<int> &r3)
+{
+ I<int> i, j, k;
+ int l;
+#pragma omp parallel for ordered(4) lastprivate (i, j, k) schedule(static, 1)
+ for (i = a; i <= b; i++)
+ for (j = c; j < d; j++)
+ for (k = e; k < f; k++)
+ for (l = g; l < h; l++)
+ {
+ #pragma omp ordered depend(sink: i - 1, j, k + 1, l - 2)
+ baz (i, j, k, l);
+ if (i > a && k < f - 1 && l > g + 1)
+ {
+ int m;
+ #pragma omp atomic read
+ m = results[512 * *(i - 1) + 64 * *j + 8 * *(k + 1) + l - 2];
+ if (m == 0)
+ abort ();
+ }
+ #pragma omp ordered depend(source)
+ }
+ r1 = i;
+ r2 = j;
+ r3 = k;
+}
+
+void
+f2 (int a, int b, int c, int d, int e, int f, int g, int h, int &r1, int &r2, int &r3)
+{
+ int i, j, k, l;
+#pragma omp parallel for collapse (1) ordered(4) lastprivate (i, j, k) schedule(static, 2)
+ for (i = a; i <= b; i++)
+ for (j = c; j < d; j++)
+ for (k = e; k < f; k++)
+ for (l = g; l < h; l++)
+ {
+ #pragma omp ordered depend(sink: i - 1, j, k + 1, l - 2)
+ baz (i, j, k, l);
+ if (i > a && k < f - 1 && l > g + 1)
+ {
+ int m;
+ #pragma omp atomic read
+ m = results[512 * (i - 1) + 64 * j + 8 * (k + 1) + l - 2];
+ if (m == 0)
+ abort ();
+ }
+ #pragma omp ordered depend(source)
+ }
+ r1 = i;
+ r2 = j;
+ r3 = k;
+}
+
+void
+f3 (const I<int> &a, const I<int> &b, const I<int> &c, const I<int> &d,
+ const I<int> &e, const I<int> &f, int g, int h,
+ I<int> &r1, I<int> &r2, I<int> &r3)
+{
+ I<int> i, j, k;
+ int l;
+#pragma omp parallel for collapse (2) ordered(4) lastprivate (i, j, k) schedule(static, 1)
+ for (i = a; i <= b; i++)
+ for (j = c; j < d; j++)
+ for (k = e; k < f; k++)
+ for (l = g; l < h; l++)
+ {
+ #pragma omp ordered depend(sink: i - 1, j, k + 1, l - 2)
+ baz (i, j, k, l);
+ if (i > a && k < f - 1 && l > g + 1)
+ {
+ int m;
+ #pragma omp atomic read
+ m = results[512 * *(i - 1) + 64 * *j + 8 * *(k + 1) + l - 2];
+ if (m == 0)
+ abort ();
+ }
+ #pragma omp ordered depend(source)
+ }
+ r1 = i;
+ r2 = j;
+ r3 = k;
+}
+
+void
+f4 (int a, int b, int c, int d, int e, int f, int g, int h, int &r1, int &r2, int &r3)
+{
+ int i, j, k, l;
+#pragma omp parallel for collapse (2) ordered(4) lastprivate (i, j, k) schedule(static, 2)
+ for (i = a; i <= b; i++)
+ for (j = c; j < d; j++)
+ for (k = e; k < f; k++)
+ for (l = g; l < h; l++)
+ {
+ #pragma omp ordered depend(sink: i - 1, j, k + 1, l - 2)
+ baz (i, j, k, l);
+ if (i > a && k < f - 1 && l > g + 1)
+ {
+ int m;
+ #pragma omp atomic read
+ m = results[512 * (i - 1) + 64 * j + 8 * (k + 1) + l - 2];
+ if (m == 0)
+ abort ();
+ }
+ #pragma omp ordered depend(source)
+ }
+ r1 = i;
+ r2 = j;
+ r3 = k;
+}
+
+#define check(expr) \
+ for (int i = 0; i < 2048; i++) \
+ if (expr) \
+ { \
+ if (results[i] != 1) \
+ abort (); \
+ results[i] = 0; \
+ } \
+ else if (results[i]) \
+ abort ()
+
+int
+main ()
+{
+ int a[16], s1, s2, s3;
+ I<int> r1, r2, r3;
+ for (int i = 0; i < 16; i++)
+ a[i] = i;
+ r1 = &a[15]; r2 = &a[15]; r3 = &a[15];
+ f1 (&a[1], &a[3], &a[2], &a[5], &a[1], &a[3], 0, 5, r1, r2, r3);
+ if (*r1 != 4 || *r2 != 5 || *r3 != 3)
+ abort ();
+ check ((i / 512) - 1U < 3U && ((i / 64) & 7) - 2U < 3U && ((i / 8) & 7) - 1U < 2U && (i & 7) < 5);
+ r1 = &a[15]; r2 = &a[15]; r3 = &a[15];
+ f1 (&a[1], &a[3], &a[1], &a[4], &a[1], &a[5], 1, 0, r1, r2, r3);
+ if (*r1 != 4 || *r2 != 4 || *r3 != 5)
+ abort ();
+ r1 = &a[15]; r2 = &a[15]; r3 = &a[15];
+ f1 (&a[1], &a[3], &a[1], &a[9], &a[7], &a[2], 0, 7, r1, r2, r3);
+ if (*r1 != 4 || *r2 != 9 || *r3 != 7)
+ abort ();
+ s1 = 15; s2 = 15; s3 = 15;
+ f2 (1, 3, 2, 5, 1, 3, 0, 5, s1, s2, s3);
+ if (s1 != 4 || s2 != 5 || s3 != 3)
+ abort ();
+ check ((i / 512) - 1U < 3U && ((i / 64) & 7) - 2U < 3U && ((i / 8) & 7) - 1U < 2U && (i & 7) < 5);
+ s1 = 15; s2 = 15; s3 = 15;
+ f2 (1, 3, 1, 4, 1, 5, 1, 0, s1, s2, s3);
+ if (s1 != 4 || s2 != 4 || s3 != 5)
+ abort ();
+ s1 = 15; s2 = 15; s3 = 15;
+ f2 (1, 3, 1, 9, 7, 2, 0, 7, s1, s2, s3);
+ if (s1 != 4 || s2 != 9 || s3 != 7)
+ abort ();
+ r1 = &a[15]; r2 = &a[15]; r3 = &a[15];
+ f3 (&a[1], &a[3], &a[2], &a[5], &a[1], &a[3], 0, 5, r1, r2, r3);
+ if (*r1 != 4 || *r2 != 5 || *r3 != 3)
+ abort ();
+ check ((i / 512) - 1U < 3U && ((i / 64) & 7) - 2U < 3U && ((i / 8) & 7) - 1U < 2U && (i & 7) < 5);
+ r1 = &a[15]; r2 = &a[15]; r3 = &a[15];
+ f3 (&a[1], &a[3], &a[1], &a[4], &a[1], &a[5], 1, 0, r1, r2, r3);
+ if (*r1 != 4 || *r2 != 4 || *r3 != 5)
+ abort ();
+ r1 = &a[15]; r2 = &a[15]; r3 = &a[15];
+ f3 (&a[1], &a[3], &a[1], &a[9], &a[7], &a[2], 0, 7, r1, r2, r3);
+ if (*r1 != 4 || *r2 != 9 || *r3 != 7)
+ abort ();
+ s1 = 15; s2 = 15; s3 = 15;
+ f4 (1, 3, 2, 5, 1, 3, 0, 5, s1, s2, s3);
+ if (s1 != 4 || s2 != 5 || s3 != 3)
+ abort ();
+ check ((i / 512) - 1U < 3U && ((i / 64) & 7) - 2U < 3U && ((i / 8) & 7) - 1U < 2U && (i & 7) < 5);
+ s1 = 15; s2 = 15; s3 = 15;
+ f4 (1, 3, 1, 4, 1, 5, 1, 0, s1, s2, s3);
+ if (s1 != 4 || s2 != 4 || s3 != 5)
+ abort ();
+ s1 = 15; s2 = 15; s3 = 15;
+ f4 (1, 3, 1, 9, 7, 2, 0, 7, s1, s2, s3);
+ if (s1 != 4 || s2 != 9 || s3 != 7)
+ abort ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/examples-4/declare_target-2.C b/libgomp/testsuite/libgomp.c++/examples-4/declare_target-2.C
index 75276e7c5c6..6d5b5e47990 100644
--- a/libgomp/testsuite/libgomp.c++/examples-4/declare_target-2.C
+++ b/libgomp/testsuite/libgomp.c++/examples-4/declare_target-2.C
@@ -1,5 +1,5 @@
// { dg-do run }
-// { dg-require-effective-target offload_device }
+// { dg-require-effective-target offload_device_nonshared_as }
#include <stdlib.h>
diff --git a/libgomp/testsuite/libgomp.c++/for-12.C b/libgomp/testsuite/libgomp.c++/for-12.C
new file mode 100644
index 00000000000..ea32192e45d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/for-12.C
@@ -0,0 +1,42 @@
+/* { dg-options "-fopenmp" } */
+
+extern "C" void abort (void);
+
+#define M(x, y, z) O(x, y, z)
+#define O(x, y, z) x ## _ ## y ## _ ## z
+
+#define F taskloop
+#define G taskloop
+#define S
+#define N(x) M(x, G, normal)
+#include "../libgomp.c/for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F taskloop simd
+#define G taskloop_simd
+#define S
+#define N(x) M(x, G, normal)
+#include "../libgomp.c/for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+int
+main ()
+{
+ int err = 0;
+ #pragma omp parallel reduction(|:err)
+ #pragma omp single
+ {
+ if (test_taskloop_normal ()
+ || test_taskloop_simd_normal ())
+ err = 1;
+ }
+ if (err)
+ abort ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/for-13.C b/libgomp/testsuite/libgomp.c++/for-13.C
new file mode 100644
index 00000000000..ac1601a766f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/for-13.C
@@ -0,0 +1,151 @@
+extern "C" void abort ();
+
+#define M(x, y, z) O(x, y, z)
+#define O(x, y, z) x ## _ ## y ## _ ## z
+
+#pragma omp declare target
+
+#define F for
+#define G f
+#define S
+#define N(x) M(x, G, normal)
+#include "../libgomp.c/for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#pragma omp end declare target
+
+#undef OMPFROM
+#undef OMPTO
+#define DO_PRAGMA(x) _Pragma (#x)
+#define OMPFROM(v) DO_PRAGMA (omp target update from(v))
+#define OMPTO(v) DO_PRAGMA (omp target update to(v))
+
+#define F target parallel for
+#define G tpf
+#include "../libgomp.c/for-1.h"
+#undef F
+#undef G
+
+#define F target simd
+#define G t_simd
+#define S
+#define N(x) M(x, G, normal)
+#include "../libgomp.c/for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F target parallel for simd
+#define G tpf_simd
+#include "../libgomp.c/for-1.h"
+#undef F
+#undef G
+
+#define F target teams distribute
+#define G ttd
+#define S
+#define N(x) M(x, G, normal)
+#include "../libgomp.c/for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F target teams distribute
+#define G ttd_ds128
+#define S dist_schedule(static, 128)
+#define N(x) M(x, G, normal)
+#include "../libgomp.c/for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F target teams distribute simd
+#define G ttds
+#define S
+#define N(x) M(x, G, normal)
+#include "../libgomp.c/for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F target teams distribute simd
+#define G ttds_ds128
+#define S dist_schedule(static, 128)
+#define N(x) M(x, G, normal)
+#include "../libgomp.c/for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F target teams distribute parallel for
+#define G ttdpf
+#include "../libgomp.c/for-1.h"
+#undef F
+#undef G
+
+#define F target teams distribute parallel for dist_schedule(static, 128)
+#define G ttdpf_ds128
+#include "../libgomp.c/for-1.h"
+#undef F
+#undef G
+
+#define F target teams distribute parallel for simd
+#define G ttdpfs
+#include "../libgomp.c/for-1.h"
+#undef F
+#undef G
+
+#define F target teams distribute parallel for simd dist_schedule(static, 128)
+#define G ttdpfs_ds128
+#include "../libgomp.c/for-1.h"
+#undef F
+#undef G
+
+int
+main ()
+{
+ if (test_tpf_static ()
+ || test_tpf_static32 ()
+ || test_tpf_auto ()
+ || test_tpf_guided32 ()
+ || test_tpf_runtime ()
+ || test_t_simd_normal ()
+ || test_tpf_simd_static ()
+ || test_tpf_simd_static32 ()
+ || test_tpf_simd_auto ()
+ || test_tpf_simd_guided32 ()
+ || test_tpf_simd_runtime ()
+ || test_ttd_normal ()
+ || test_ttd_ds128_normal ()
+ || test_ttds_normal ()
+ || test_ttds_ds128_normal ()
+ || test_ttdpf_static ()
+ || test_ttdpf_static32 ()
+ || test_ttdpf_auto ()
+ || test_ttdpf_guided32 ()
+ || test_ttdpf_runtime ()
+ || test_ttdpf_ds128_static ()
+ || test_ttdpf_ds128_static32 ()
+ || test_ttdpf_ds128_auto ()
+ || test_ttdpf_ds128_guided32 ()
+ || test_ttdpf_ds128_runtime ()
+ || test_ttdpfs_static ()
+ || test_ttdpfs_static32 ()
+ || test_ttdpfs_auto ()
+ || test_ttdpfs_guided32 ()
+ || test_ttdpfs_runtime ()
+ || test_ttdpfs_ds128_static ()
+ || test_ttdpfs_ds128_static32 ()
+ || test_ttdpfs_ds128_auto ()
+ || test_ttdpfs_ds128_guided32 ()
+ || test_ttdpfs_ds128_runtime ())
+ abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/for-14.C b/libgomp/testsuite/libgomp.c++/for-14.C
new file mode 100644
index 00000000000..7738473b601
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/for-14.C
@@ -0,0 +1,120 @@
+extern "C" void abort ();
+
+#define M(x, y, z) O(x, y, z)
+#define O(x, y, z) x ## _ ## y ## _ ## z
+
+#pragma omp declare target
+
+#define F for
+#define G f
+#define S
+#define N(x) M(x, G, normal)
+#include "../libgomp.c/for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#pragma omp end declare target
+
+#undef OMPTGT
+#undef OMPFROM
+#undef OMPTO
+#define DO_PRAGMA(x) _Pragma (#x)
+#define OMPTGT DO_PRAGMA (omp target)
+#define OMPFROM(v) DO_PRAGMA (omp target update from(v))
+#define OMPTO(v) DO_PRAGMA (omp target update to(v))
+
+#define F teams distribute
+#define G td
+#define S
+#define N(x) M(x, G, normal)
+#include "../libgomp.c/for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F teams distribute
+#define G td_ds128
+#define S dist_schedule(static, 128)
+#define N(x) M(x, G, normal)
+#include "../libgomp.c/for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F teams distribute simd
+#define G tds
+#define S
+#define N(x) M(x, G, normal)
+#include "../libgomp.c/for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F teams distribute simd
+#define G tds_ds128
+#define S dist_schedule(static, 128)
+#define N(x) M(x, G, normal)
+#include "../libgomp.c/for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F teams distribute parallel for
+#define G tdpf
+#include "../libgomp.c/for-1.h"
+#undef F
+#undef G
+
+#define F teams distribute parallel for dist_schedule(static, 128)
+#define G tdpf_ds128
+#include "../libgomp.c/for-1.h"
+#undef F
+#undef G
+
+#define F teams distribute parallel for simd
+#define G tdpfs
+#include "../libgomp.c/for-1.h"
+#undef F
+#undef G
+
+#define F teams distribute parallel for simd dist_schedule(static, 128)
+#define G tdpfs_ds128
+#include "../libgomp.c/for-1.h"
+#undef F
+#undef G
+
+int
+main ()
+{
+ if (test_td_normal ()
+ || test_td_ds128_normal ()
+ || test_tds_normal ()
+ || test_tds_ds128_normal ()
+ || test_tdpf_static ()
+ || test_tdpf_static32 ()
+ || test_tdpf_auto ()
+ || test_tdpf_guided32 ()
+ || test_tdpf_runtime ()
+ || test_tdpf_ds128_static ()
+ || test_tdpf_ds128_static32 ()
+ || test_tdpf_ds128_auto ()
+ || test_tdpf_ds128_guided32 ()
+ || test_tdpf_ds128_runtime ()
+ || test_tdpfs_static ()
+ || test_tdpfs_static32 ()
+ || test_tdpfs_auto ()
+ || test_tdpfs_guided32 ()
+ || test_tdpfs_runtime ()
+ || test_tdpfs_ds128_static ()
+ || test_tdpfs_ds128_static32 ()
+ || test_tdpfs_ds128_auto ()
+ || test_tdpfs_ds128_guided32 ()
+ || test_tdpfs_ds128_runtime ())
+ abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/linear-1.C b/libgomp/testsuite/libgomp.c++/linear-1.C
new file mode 100644
index 00000000000..1dd1ffc8939
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/linear-1.C
@@ -0,0 +1,268 @@
+int a[256];
+
+__attribute__((noinline, noclone)) int
+f1 (int i)
+{
+ #pragma omp parallel for linear (i: 4)
+ for (int j = 16; j < 64; j++)
+ {
+ a[i] = j;
+ i += 4;
+ }
+ return i;
+}
+
+__attribute__((noinline, noclone)) short int &
+f2 (short int &i, char k)
+{
+ #pragma omp parallel for linear (i: k + 1)
+ for (long j = 16; j < 64; j++)
+ {
+ a[i] = j;
+ i += 4;
+ }
+ return i;
+}
+
+template <typename T>
+__attribute__((noinline, noclone)) T
+f3 (T i, T k)
+{
+ #pragma omp parallel for linear (i: k)
+ for (short j = 16; j < 64; j++)
+ {
+ a[i] = j;
+ i += 4;
+ }
+ return i;
+}
+
+template <typename T>
+__attribute__((noinline, noclone)) T &
+f4 (T &i)
+{
+ #pragma omp parallel for linear (i: 4) schedule(static, 3)
+ for (int j = 16; j < 64; j++)
+ {
+ a[i] = j;
+ i += 4;
+ }
+ return i;
+}
+
+__attribute__((noinline, noclone)) short int
+f5 (short int i, char &k)
+{
+ #pragma omp parallel for linear (i: k + 1) schedule(static, 5)
+ for (long j = 16; j < 64; j++)
+ {
+ a[i] = j;
+ i += 4;
+ }
+ return i;
+}
+
+template <int N>
+__attribute__((noinline, noclone)) long long int
+f6 (long long int i, long long int k)
+{
+ #pragma omp parallel for linear (i: k) schedule(static, 7)
+ for (short j = 16; j < 64; j++)
+ {
+ a[i] = j;
+ i += 4;
+ }
+ return i;
+}
+
+__attribute__((noinline, noclone)) int
+f7 (int &i)
+{
+ #pragma omp parallel for linear (i: 4) schedule(dynamic, 3)
+ for (int j = 16; j < 64; j++)
+ {
+ a[i] = j;
+ i += 4;
+ }
+ return i;
+}
+
+__attribute__((noinline, noclone)) short int
+f8 (short int i, char k)
+{
+ #pragma omp parallel for linear (i: k + 1) schedule(dynamic, 5)
+ for (long j = 16; j < 64; j++)
+ {
+ a[i] = j;
+ i += 4;
+ }
+ return i;
+}
+
+__attribute__((noinline, noclone)) long long int
+f9 (long long int i, long long int k)
+{
+ #pragma omp parallel for linear (i: k) schedule(dynamic, 7)
+ for (short j = 16; j < 64; j++)
+ {
+ a[i] = j;
+ i += 4;
+ }
+ return i;
+}
+
+template <typename T>
+__attribute__((noinline, noclone)) T &
+f10 (T &i, long &step)
+{
+ #pragma omp parallel for linear (i: 4)
+ for (int j = 16; j < 112; j += step)
+ {
+ a[i] = j / 2 + 8;
+ i += 4;
+ }
+ return i;
+}
+
+__attribute__((noinline, noclone)) short int
+f11 (short int i, char k, char step)
+{
+ #pragma omp parallel for linear (i: k + 1)
+ for (long j = 16; j < 112; j += step)
+ {
+ a[i] = j / 2 + 8;
+ i += 4;
+ }
+ return i;
+}
+
+__attribute__((noinline, noclone)) long long int
+f12 (long long int i, long long int k, int step)
+{
+ #pragma omp parallel for linear (i: k)
+ for (short j = 16; j < 112; j += step)
+ {
+ a[i] = j / 2 + 8;
+ i += 4;
+ }
+ return i;
+}
+
+__attribute__((noinline, noclone)) int
+f13 (int &i, long long int step)
+{
+ #pragma omp parallel for linear (i: 4) schedule(static, 3)
+ for (int j = 16; j < 112; j += step)
+ {
+ a[i] = j / 2 + 8;
+ i += 4;
+ }
+ return i;
+}
+
+__attribute__((noinline, noclone)) short int
+f14 (short int &i, char &k, int &step)
+{
+ #pragma omp parallel for linear (i: k + 1) schedule(static, 5)
+ for (long j = 16; j < 112; j += step)
+ {
+ a[i] = j / 2 + 8;
+ i += 4;
+ }
+ return i;
+}
+
+template <int N>
+__attribute__((noinline, noclone)) long long int
+f15 (long long int i, long long int k, long int step)
+{
+ #pragma omp parallel for linear (i: k) schedule(static, 7)
+ for (short j = 16; j < 112; j += step)
+ {
+ a[i] = j / 2 + 8;
+ i += 4;
+ }
+ return i;
+}
+
+__attribute__((noinline, noclone)) int
+f16 (int i, long long int step)
+{
+ #pragma omp parallel for linear (i: 4) schedule(dynamic, 3)
+ for (int j = 16; j < 112; j += step)
+ {
+ a[i] = j / 2 + 8;
+ i += 4;
+ }
+ return i;
+}
+
+__attribute__((noinline, noclone)) short int
+f17 (short int i, char k, int step)
+{
+ #pragma omp parallel for linear (i: k + 1) schedule(dynamic, 5)
+ for (long j = 16; j < 112; j += step)
+ {
+ a[i] = j / 2 + 8;
+ i += 4;
+ }
+ return i;
+}
+
+template <typename T>
+__attribute__((noinline, noclone)) T
+f18 (T i, T k, long int step)
+{
+ #pragma omp parallel for linear (i: k) schedule(dynamic, 7)
+ for (short j = 16; j < 112; j += step)
+ {
+ a[i] = j / 2 + 8;
+ i += 4;
+ }
+ return i;
+}
+
+int
+main ()
+{
+#define TEST(x) \
+ if (x != 8 + 48 * 4) \
+ __builtin_abort (); \
+ for (int i = 0; i < 256; i++) \
+ if (a[i] != (((i & 3) == 0 && i >= 8 \
+ && i < 8 + 48 * 4) \
+ ? ((i - 8) / 4) + 16 : 0)) \
+ __builtin_abort (); \
+ __builtin_memset (a, 0, sizeof (a))
+ TEST (f1 (8));
+ short int vs = 8;
+ TEST (f2 (vs, 3));
+ TEST (f3 (8LL, 4LL));
+ int vi = 8;
+ TEST (f4 (vi));
+ char vk = 3;
+ TEST (f5 (8, vk));
+ TEST (f6<7> (8LL, 4LL));
+ vi = 8;
+ TEST (f7 (vi));
+ TEST (f8 (8, 3));
+ TEST (f9 (8LL, 4LL));
+ vi = 8;
+ long vl = 2;
+ TEST (f10 (vi, vl));
+ TEST (f11 (8, 3, 2));
+ TEST (f12 (8LL, 4LL, 2));
+ vi = 8;
+ TEST (f13 (vi, 2));
+ vs = 8;
+ vk = 3;
+ vi = 2;
+ TEST (f14 (vs, vk, vi));
+ TEST (f15<9> (8LL, 4LL, 2));
+ TEST (f16 (8, 2));
+ TEST (f17 (8, 3, 2));
+ long long int vll1 = 8LL;
+ long long int vll2 = 4LL;
+ TEST (f18<long long int &> (vll1, vll2, 2));
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/member-1.C b/libgomp/testsuite/libgomp.c++/member-1.C
new file mode 100644
index 00000000000..d2d0c5b2667
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/member-1.C
@@ -0,0 +1,206 @@
+// { dg-do run }
+
+#include <omp.h>
+
+struct R { R () {}; ~R () {}; int r; };
+struct T { T () {}; virtual ~T () {}; int t; };
+int c;
+struct A : public R, virtual public T { A () : b(c) {} int a; int &b; void m1 (); };
+
+void
+take (int &a, int &b, int &c, int &d)
+{
+ asm volatile ("" : : "g" (&a), "g" (&b), "g" (&c), "g" (&d) : "memory");
+}
+
+void
+A::m1 ()
+{
+ #pragma omp parallel private (a, r, T::t, A::b)
+ {
+ int q = omp_get_thread_num ();
+ a = q;
+ r = 2 * q;
+ t = 3 * q;
+ b = 4 * q;
+ take (a, r, t, b);
+ #pragma omp barrier
+ if (A::a != q || R::r != 2 * q || T::t != 3 * q || A::b != 4 * q)
+ __builtin_abort ();
+ }
+ a = 7;
+ r = 8;
+ t = 9;
+ b = 10;
+ #pragma omp parallel firstprivate (A::a, R::r, t, b)
+ {
+ int q = omp_get_thread_num ();
+ take (A::a, R::r, T::t, A::b);
+ if (a != 7 || r != 8 || t != 9 || b != 10)
+ __builtin_abort ();
+ A::a = 5 * q;
+ R::r = 6 * q;
+ T::t = 7 * q;
+ A::b = 8 * q;
+ take (a, r, t, b);
+ #pragma omp barrier
+ if (a != 5 * q || r != 6 * q || t != 7 * q || b != 8 * q)
+ __builtin_abort ();
+ }
+ bool f = false;
+ a = -5;
+ b = -4;
+ r = -3;
+ t = -2;
+ int n;
+ #pragma omp parallel for firstprivate (a, T::t, b, f) lastprivate (A::a, r, t, n)
+ for (int i = 0; i < omp_get_num_threads (); i++)
+ {
+ int q = omp_get_thread_num ();
+ if (!f)
+ {
+ if (A::a != -5 || A::b != -4 || T::t != -2)
+ __builtin_abort ();
+ }
+ else if (a != q || b != 2 * q || r != 3 * q || t != 4 * q)
+ __builtin_abort ();
+ take (a, r, t, b);
+ A::a = q;
+ A::b = 2 * q;
+ R::r = 3 * q;
+ T::t = 4 * q;
+ n = q;
+ f = true;
+ }
+ if (a != n || r != 3 * n || T::t != 4 * n)
+ __builtin_abort ();
+ b = 8;
+ #pragma omp parallel
+ #pragma omp single
+ for (int i = 0; i < 5; i++)
+ #pragma omp task firstprivate (t, b, n) private (a, R::r)
+ {
+ if (t != 4 * n || b != 8)
+ __builtin_abort ();
+ a = 9;
+ r = 8;
+ t = 12;
+ b = 18;
+ take (a, r, t, b);
+ if (a != 9 || r != 8 || t != 12 || b != 18)
+ __builtin_abort ();
+ }
+ a = 1;
+ b = 2;
+ R::r = 3;
+ t = 4;
+ #pragma omp parallel private (f)
+ {
+ f = false;
+ #pragma omp single
+ #pragma omp taskloop firstprivate (r, T::t, b, f) lastprivate (a, t, b, n)
+ for (int i = 0; i < 30; i++)
+ {
+ int q = omp_get_thread_num ();
+ if (!f)
+ {
+ if (R::r != 3 || A::b != 2 || T::t != 4)
+ __builtin_abort ();
+ }
+ else if (a != 7 * q || b != 8 * q || r != 9 * q || t != 10 * q)
+ __builtin_abort ();
+ take (a, r, t, b);
+ A::a = 7 * q;
+ A::b = 8 * q;
+ R::r = 9 * q;
+ T::t = 10 * q;
+ n = q;
+ f = true;
+ }
+ }
+ if (a != 7 * n || b != 8 * n || t != 10 * n)
+ __builtin_abort ();
+ a = 1;
+ b = 2;
+ R::r = 3;
+ t = 4;
+ #pragma omp parallel private (f)
+ {
+ f = false;
+ #pragma omp single
+ #pragma omp taskloop firstprivate (r, T::t, b, A::a, f)
+ for (int i = 0; i < 30; i++)
+ {
+ int q = omp_get_thread_num ();
+ if (!f)
+ {
+ if (A::a != 1 || R::r != 3 || A::b != 2 || T::t != 4)
+ __builtin_abort ();
+ }
+ else if (a != 7 * q || b != 8 * q || r != 9 * q || t != 10 * q)
+ __builtin_abort ();
+ take (a, r, t, b);
+ A::a = 7 * q;
+ A::b = 8 * q;
+ R::r = 9 * q;
+ T::t = 10 * q;
+ f = true;
+ }
+ }
+ #pragma omp parallel private (f)
+ {
+ f = false;
+ #pragma omp single
+ #pragma omp taskloop lastprivate (a, t, b, n)
+ for (int i = 0; i < 30; i++)
+ {
+ int q = omp_get_thread_num ();
+ if (f && (a != 7 * q || b != 8 * q || r != 9 * q || t != 10 * q))
+ __builtin_abort ();
+ take (a, r, t, b);
+ A::a = 7 * q;
+ A::b = 8 * q;
+ R::r = 9 * q;
+ T::t = 10 * q;
+ n = q;
+ f = true;
+ }
+ }
+ if (a != 7 * n || b != 8 * n || t != 10 * n)
+ __builtin_abort ();
+ #pragma omp parallel private (a, T::t, A::b, r)
+ {
+ int q = omp_get_thread_num ();
+ a = q;
+ b = 2 * q;
+ r = 3 * q;
+ t = 4 * q;
+ take (a, b, r, t);
+ #pragma omp single copyprivate (A::a, t, b, R::r)
+ n = q;
+ if (a != n || b != 2 * n || r != 3 * n || t != 4 * n)
+ __builtin_abort ();
+ }
+ a = 0;
+ b = 0;
+ R::r = 0;
+ t = 0;
+ #pragma omp parallel for reduction (+: A::a, t, b, R::r)
+ for (int i = 0; i < 30; i++)
+ {
+ a += i;
+ A::b += 2 * i;
+ r += 3 * i;
+ T::t += 4 * i;
+ take (a, b, r, t);
+ }
+ if (A::a != 435 || b != 2 * 435 || R::r != 3 * 435 || t != 4 * 435)
+ __builtin_abort ();
+}
+
+int
+main ()
+{
+ A a;
+ a.m1 ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/member-2.C b/libgomp/testsuite/libgomp.c++/member-2.C
new file mode 100644
index 00000000000..bb348d8a822
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/member-2.C
@@ -0,0 +1,211 @@
+// { dg-do run }
+
+#include <omp.h>
+
+int c, d, e;
+struct R { R () {}; ~R () {}; int r; };
+template <typename Q>
+struct T { T () : t(d) {}; virtual ~T () {}; Q t; };
+template <typename Q>
+struct A : public R, virtual public T<Q> { A () : b(c), a(e) {} Q a; int &b; void m1 (); };
+
+void
+take (int &a, int &b, int &c, int &d)
+{
+ asm volatile ("" : : "g" (&a), "g" (&b), "g" (&c), "g" (&d) : "memory");
+}
+
+template <typename Q>
+void
+A<Q>::m1 ()
+{
+ #pragma omp parallel private (a, r, T<Q>::t, A::b)
+ {
+ int q = omp_get_thread_num ();
+ a = q;
+ r = 2 * q;
+ T<Q>::t = 3 * q;
+ b = 4 * q;
+ take (a, r, T<Q>::t, b);
+ #pragma omp barrier
+ if (A::a != q || R::r != 2 * q || T<Q>::t != 3 * q || A::b != 4 * q)
+ __builtin_abort ();
+ }
+ a = 7;
+ r = 8;
+ T<Q>::t = 9;
+ b = 10;
+ #pragma omp parallel firstprivate (A::a, R::r, T<Q>::t, b)
+ {
+ int q = omp_get_thread_num ();
+ take (A::a, R::r, T<Q>::t, A::b);
+ if (a != 7 || r != 8 || T<Q>::t != 9 || b != 10)
+ __builtin_abort ();
+ A::a = 5 * q;
+ R::r = 6 * q;
+ T<Q>::t = 7 * q;
+ A::b = 8 * q;
+ take (a, r, T<Q>::t, b);
+ #pragma omp barrier
+ if (a != 5 * q || r != 6 * q || T<Q>::t != 7 * q || b != 8 * q)
+ __builtin_abort ();
+ }
+ bool f = false;
+ a = -5;
+ b = -4;
+ r = -3;
+ T<Q>::t = -2;
+ int n;
+ #pragma omp parallel for firstprivate (a, T<Q>::t, b, f) lastprivate (A::a, r, T<Q>::t, n)
+ for (int i = 0; i < omp_get_num_threads (); i++)
+ {
+ int q = omp_get_thread_num ();
+ if (!f)
+ {
+ if (A::a != -5 || A::b != -4 || T<Q>::t != -2)
+ __builtin_abort ();
+ }
+ else if (a != q || b != 2 * q || r != 3 * q || T<Q>::t != 4 * q)
+ __builtin_abort ();
+ take (a, r, T<Q>::t, b);
+ A::a = q;
+ A::b = 2 * q;
+ R::r = 3 * q;
+ T<Q>::t = 4 * q;
+ n = q;
+ f = true;
+ }
+ if (a != n || r != 3 * n || T<Q>::t != 4 * n)
+ __builtin_abort ();
+ b = 8;
+ #pragma omp parallel
+ #pragma omp single
+ for (int i = 0; i < 5; i++)
+ #pragma omp task firstprivate (T<Q>::t, b, n) private (a, R::r)
+ {
+ if (T<Q>::t != 4 * n || b != 8)
+ __builtin_abort ();
+ a = 9;
+ r = 8;
+ T<Q>::t = 12;
+ b = 18;
+ take (a, r, T<Q>::t, b);
+ if (a != 9 || r != 8 || T<Q>::t != 12 || b != 18)
+ __builtin_abort ();
+ }
+ a = 1;
+ b = 2;
+ R::r = 3;
+ T<Q>::t = 4;
+ #pragma omp parallel private (f)
+ {
+ f = false;
+ #pragma omp single
+ #pragma omp taskloop firstprivate (r, T<Q>::t, b, f) lastprivate (a, T<Q>::t, b, n)
+ for (int i = 0; i < 30; i++)
+ {
+ int q = omp_get_thread_num ();
+ if (!f)
+ {
+ if (R::r != 3 || A::b != 2 || T<Q>::t != 4)
+ __builtin_abort ();
+ }
+ else if (a != 7 * q || b != 8 * q || r != 9 * q || T<Q>::t != 10 * q)
+ __builtin_abort ();
+ take (a, r, T<Q>::t, b);
+ A::a = 7 * q;
+ A::b = 8 * q;
+ R::r = 9 * q;
+ T<Q>::t = 10 * q;
+ n = q;
+ f = true;
+ }
+ }
+ if (a != 7 * n || b != 8 * n || T<Q>::t != 10 * n)
+ __builtin_abort ();
+ a = 1;
+ b = 2;
+ R::r = 3;
+ T<Q>::t = 4;
+ #pragma omp parallel private (f)
+ {
+ f = false;
+ #pragma omp single
+ #pragma omp taskloop firstprivate (r, T<Q>::t, b, A::a, f)
+ for (int i = 0; i < 30; i++)
+ {
+ int q = omp_get_thread_num ();
+ if (!f)
+ {
+ if (A::a != 1 || R::r != 3 || A::b != 2 || T<Q>::t != 4)
+ __builtin_abort ();
+ }
+ else if (a != 7 * q || b != 8 * q || r != 9 * q || T<Q>::t != 10 * q)
+ __builtin_abort ();
+ take (a, r, T<Q>::t, b);
+ A::a = 7 * q;
+ A::b = 8 * q;
+ R::r = 9 * q;
+ T<Q>::t = 10 * q;
+ f = true;
+ }
+ }
+ #pragma omp parallel private (f)
+ {
+ f = false;
+ #pragma omp single
+ #pragma omp taskloop lastprivate (a, T<Q>::t, b, n)
+ for (int i = 0; i < 30; i++)
+ {
+ int q = omp_get_thread_num ();
+ if (f && (a != 7 * q || b != 8 * q || r != 9 * q || T<Q>::t != 10 * q))
+ __builtin_abort ();
+ take (a, r, T<Q>::t, b);
+ A::a = 7 * q;
+ A::b = 8 * q;
+ R::r = 9 * q;
+ T<Q>::t = 10 * q;
+ n = q;
+ f = true;
+ }
+ }
+ if (a != 7 * n || b != 8 * n || T<Q>::t != 10 * n)
+ __builtin_abort ();
+ #pragma omp parallel private (a, T<Q>::t, A::b, r)
+ {
+ int q = omp_get_thread_num ();
+ a = q;
+ b = 2 * q;
+ r = 3 * q;
+ T<Q>::t = 4 * q;
+ take (a, b, r, T<Q>::t);
+ #pragma omp single copyprivate (A::a, T<Q>::t, b, R::r)
+ n = q;
+ if (a != n || b != 2 * n || r != 3 * n || T<Q>::t != 4 * n)
+ __builtin_abort ();
+ }
+ a = 0;
+ b = 0;
+ R::r = 0;
+ T<Q>::t = 0;
+ #pragma omp parallel for reduction (+: A::a, T<Q>::t, b, R::r)
+ for (int i = 0; i < 30; i++)
+ {
+ a += i;
+ A::b += 2 * i;
+ r += 3 * i;
+ T<Q>::t += 4 * i;
+ take (a, b, r, T<Q>::t);
+ }
+ if (A::a != 435 || b != 2 * 435 || R::r != 3 * 435 || T<Q>::t != 4 * 435)
+ __builtin_abort ();
+}
+
+int
+main ()
+{
+ A<int> a;
+ a.m1 ();
+ A<int &> b;
+ b.m1 ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/member-3.C b/libgomp/testsuite/libgomp.c++/member-3.C
new file mode 100644
index 00000000000..50bd587d86b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/member-3.C
@@ -0,0 +1,105 @@
+// { dg-do run }
+
+struct R { R () {}; ~R () {}; int r; };
+struct T { T () {}; virtual ~T () {}; int t; };
+int c;
+struct A : public R, virtual public T { A () : b(c) {} int a; int &b; void m1 (); };
+int d[64];
+
+void
+A::m1 ()
+{
+ r = 0;
+ #pragma omp parallel for private (a) reduction(|:R::r)
+ for (a = 0; A::a < 31; a += 2)
+ r |= (1 << A::a);
+ if (r != 0x55555555)
+ __builtin_abort ();
+ #pragma omp parallel for simd linear (R::r)
+ for (R::r = 0; r < 32; R::r++)
+ d[r + 8] |= 1;
+ for (int i = 0; i < 64; i++)
+ if (d[i] != ((i >= 8 && i < 32 + 8) ? 1 : 0))
+ __builtin_abort ();
+ #pragma omp parallel for lastprivate (t)
+ for (T::t = 0; t < 32; t += 3)
+ d[T::t + 2] |= 2;
+ if (T::t != 33)
+ __builtin_abort ();
+ for (int i = 0; i < 64; i++)
+ if (d[i] != (((i >= 8 && i < 32 + 8) ? 1 : 0)
+ | ((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 2 : 0)))
+ __builtin_abort ();
+ #pragma omp simd linear (t)
+ for (t = 0; t < 32; t++)
+ d[T::t + 9] |= 4;
+ if (t != 32)
+ __builtin_abort ();
+ for (int i = 0; i < 64; i++)
+ if (d[i] != (((i >= 8 && i < 32 + 8) ? 1 : 0)
+ | ((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 2 : 0)
+ | ((i >= 9 && i < 32 + 9) ? 4 : 0)))
+ __builtin_abort ();
+ r = 0;
+ #pragma omp parallel for reduction(|:r)
+ for (a = 0; A::a < 31; a += 2)
+ r |= (1 << A::a);
+ if (r != 0x55555555)
+ __builtin_abort ();
+ #pragma omp parallel for simd
+ for (R::r = 0; r < 32; R::r += 2)
+ d[r + 8] |= 8;
+ for (int i = 0; i < 64; i++)
+ if (d[i] != (((i >= 8 && i < 32 + 8) ? ((i & 1) ? 1 : (8 | 1)) : 0)
+ | ((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 2 : 0)
+ | ((i >= 9 && i < 32 + 9) ? 4 : 0)))
+ __builtin_abort ();
+ #pragma omp simd collapse(2)
+ for (T::t = 0; t < 7; t += 2)
+ for (a = 0; A::a < 8; a++)
+ d[((t << 2) | a) + 3] |= 16;
+ if (t != 8 || A::a != 8)
+ __builtin_abort ();
+ for (int i = 0; i < 64; i++)
+ if (d[i] != (((i >= 8 && i < 32 + 8) ? ((i & 1) ? 1 : (8 | 1)) : 0)
+ | ((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 2 : 0)
+ | ((i >= 9 && i < 32 + 9) ? 4 : 0)
+ | ((i >= 3 && i < 32 + 3) ? 16 : 0)))
+ __builtin_abort ();
+ T::t = 32;
+ a = 16;
+ #pragma omp parallel
+ #pragma omp single
+ #pragma omp taskloop simd collapse(2)
+ for (t = 0; T::t < 7; T::t += 2)
+ for (A::a = 0; a < 8; A::a++)
+ d[((t << 2) | A::a) + 3] |= 32;
+ if (T::t != 8 || a != 8)
+ __builtin_abort ();
+ for (int i = 0; i < 64; i++)
+ if (d[i] != (((i >= 8 && i < 32 + 8) ? ((i & 1) ? 1 : (8 | 1)) : 0)
+ | ((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 2 : 0)
+ | ((i >= 9 && i < 32 + 9) ? 4 : 0)
+ | ((i >= 3 && i < 32 + 3) ? (16 | 32) : 0)))
+ __builtin_abort ();
+ #pragma omp parallel
+ #pragma omp single
+ #pragma omp taskloop simd
+ for (R::r = 0; r < 31; R::r += 2)
+ d[r + 8] |= 64;
+ if (r != 32)
+ __builtin_abort ();
+ for (int i = 0; i < 64; i++)
+ if (d[i] != (((i >= 8 && i < 32 + 8) ? ((i & 1) ? 1 : (64 | 8 | 1)) : 0)
+ | ((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 2 : 0)
+ | ((i >= 9 && i < 32 + 9) ? 4 : 0)
+ | ((i >= 3 && i < 32 + 3) ? (16 | 32) : 0)))
+ __builtin_abort ();
+}
+
+int
+main ()
+{
+ A a;
+ a.m1 ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/member-4.C b/libgomp/testsuite/libgomp.c++/member-4.C
new file mode 100644
index 00000000000..f76695de6fb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/member-4.C
@@ -0,0 +1,108 @@
+// { dg-do run }
+
+int c, d, e;
+struct R { R () {}; ~R () {}; int r; };
+template <typename Q>
+struct T { T () : t(d) {}; virtual ~T () {}; Q t; };
+template <typename Q>
+struct A : public R, virtual public T<Q> { A () : b(c), a(e) {} Q a; int &b; void m1 (); };
+int f[64];
+
+template <typename Q>
+void
+A<Q>::m1 ()
+{
+ r = 0;
+ #pragma omp parallel for private (a) reduction(|:R::r)
+ for (a = 0; A::a < 31; a += 2)
+ r |= (1 << A::a);
+ if (r != 0x55555555)
+ __builtin_abort ();
+ #pragma omp parallel for simd linear (R::r)
+ for (R::r = 0; r < 32; R::r++)
+ f[r + 8] |= 1;
+ for (int i = 0; i < 64; i++)
+ if (f[i] != ((i >= 8 && i < 32 + 8) ? 1 : 0))
+ __builtin_abort ();
+ #pragma omp parallel for lastprivate (T<Q>::t)
+ for (T<Q>::t = 0; T<Q>::t < 32; T<Q>::t += 3)
+ f[T<Q>::t + 2] |= 2;
+ if (T<Q>::t != 33)
+ __builtin_abort ();
+ for (int i = 0; i < 64; i++)
+ if (f[i] != (((i >= 8 && i < 32 + 8) ? 1 : 0)
+ | ((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 2 : 0)))
+ __builtin_abort ();
+ #pragma omp simd linear (T<Q>::t)
+ for (T<Q>::t = 0; T<Q>::t < 32; T<Q>::t++)
+ f[T<Q>::t + 9] |= 4;
+ if (T<Q>::t != 32)
+ __builtin_abort ();
+ for (int i = 0; i < 64; i++)
+ if (f[i] != (((i >= 8 && i < 32 + 8) ? 1 : 0)
+ | ((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 2 : 0)
+ | ((i >= 9 && i < 32 + 9) ? 4 : 0)))
+ __builtin_abort ();
+ r = 0;
+ #pragma omp parallel for reduction(|:r)
+ for (a = 0; A::a < 31; a += 2)
+ r |= (1 << A::a);
+ if (r != 0x55555555)
+ __builtin_abort ();
+ #pragma omp parallel for simd
+ for (R::r = 0; r < 32; R::r += 2)
+ f[r + 8] |= 8;
+ for (int i = 0; i < 64; i++)
+ if (f[i] != (((i >= 8 && i < 32 + 8) ? ((i & 1) ? 1 : (8 | 1)) : 0)
+ | ((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 2 : 0)
+ | ((i >= 9 && i < 32 + 9) ? 4 : 0)))
+ __builtin_abort ();
+ #pragma omp simd collapse(2)
+ for (T<Q>::t = 0; T<Q>::t < 7; T<Q>::t += 2)
+ for (a = 0; A::a < 8; a++)
+ f[((T<Q>::t << 2) | a) + 3] |= 16;
+ if (T<Q>::t != 8 || A::a != 8)
+ __builtin_abort ();
+ for (int i = 0; i < 64; i++)
+ if (f[i] != (((i >= 8 && i < 32 + 8) ? ((i & 1) ? 1 : (8 | 1)) : 0)
+ | ((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 2 : 0)
+ | ((i >= 9 && i < 32 + 9) ? 4 : 0)
+ | ((i >= 3 && i < 32 + 3) ? 16 : 0)))
+ __builtin_abort ();
+ T<Q>::t = 32;
+ a = 16;
+ #pragma omp parallel
+ #pragma omp single
+ #pragma omp taskloop simd collapse(2)
+ for (T<Q>::t = 0; T<Q>::t < 7; T<Q>::t += 2)
+ for (A::a = 0; a < 8; A::a++)
+ f[((T<Q>::t << 2) | A::a) + 3] |= 32;
+ if (T<Q>::t != 8 || a != 8)
+ __builtin_abort ();
+ for (int i = 0; i < 64; i++)
+ if (f[i] != (((i >= 8 && i < 32 + 8) ? ((i & 1) ? 1 : (8 | 1)) : 0)
+ | ((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 2 : 0)
+ | ((i >= 9 && i < 32 + 9) ? 4 : 0)
+ | ((i >= 3 && i < 32 + 3) ? (16 | 32) : 0)))
+ __builtin_abort ();
+ #pragma omp parallel
+ #pragma omp single
+ #pragma omp taskloop simd
+ for (R::r = 0; r < 31; R::r += 2)
+ f[r + 8] |= 64;
+ if (r != 32)
+ __builtin_abort ();
+ for (int i = 0; i < 64; i++)
+ if (f[i] != (((i >= 8 && i < 32 + 8) ? ((i & 1) ? 1 : (64 | 8 | 1)) : 0)
+ | ((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 2 : 0)
+ | ((i >= 9 && i < 32 + 9) ? 4 : 0)
+ | ((i >= 3 && i < 32 + 3) ? (16 | 32) : 0)))
+ __builtin_abort ();
+}
+
+int
+main ()
+{
+ A<int> a;
+ a.m1 ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/member-5.C b/libgomp/testsuite/libgomp.c++/member-5.C
new file mode 100644
index 00000000000..d6fec7a841a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/member-5.C
@@ -0,0 +1,183 @@
+// { dg-do run }
+
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+
+template <typename T>
+class I
+{
+public:
+ typedef ptrdiff_t difference_type;
+ I ();
+ ~I ();
+ I (T *);
+ I (const I &);
+ T &operator * ();
+ T *operator -> ();
+ T &operator [] (const difference_type &) const;
+ I &operator = (const I &);
+ I &operator ++ ();
+ I operator ++ (int);
+ I &operator -- ();
+ I operator -- (int);
+ I &operator += (const difference_type &);
+ I &operator -= (const difference_type &);
+ I operator + (const difference_type &) const;
+ I operator - (const difference_type &) const;
+ template <typename S> friend bool operator == (I<S> &, I<S> &);
+ template <typename S> friend bool operator == (const I<S> &, const I<S> &);
+ template <typename S> friend bool operator < (I<S> &, I<S> &);
+ template <typename S> friend bool operator < (const I<S> &, const I<S> &);
+ template <typename S> friend bool operator <= (I<S> &, I<S> &);
+ template <typename S> friend bool operator <= (const I<S> &, const I<S> &);
+ template <typename S> friend bool operator > (I<S> &, I<S> &);
+ template <typename S> friend bool operator > (const I<S> &, const I<S> &);
+ template <typename S> friend bool operator >= (I<S> &, I<S> &);
+ template <typename S> friend bool operator >= (const I<S> &, const I<S> &);
+ template <typename S> friend typename I<S>::difference_type operator - (I<S> &, I<S> &);
+ template <typename S> friend typename I<S>::difference_type operator - (const I<S> &, const I<S> &);
+ template <typename S> friend I<S> operator + (typename I<S>::difference_type , const I<S> &);
+private:
+ T *p;
+};
+template <typename T> I<T>::I () : p (0) {}
+template <typename T> I<T>::~I () {}
+template <typename T> I<T>::I (T *x) : p (x) {}
+template <typename T> I<T>::I (const I &x) : p (x.p) {}
+template <typename T> T &I<T>::operator * () { return *p; }
+template <typename T> T *I<T>::operator -> () { return p; }
+template <typename T> T &I<T>::operator [] (const difference_type &x) const { return p[x]; }
+template <typename T> I<T> &I<T>::operator = (const I &x) { p = x.p; return *this; }
+template <typename T> I<T> &I<T>::operator ++ () { ++p; return *this; }
+template <typename T> I<T> I<T>::operator ++ (int) { return I (p++); }
+template <typename T> I<T> &I<T>::operator -- () { --p; return *this; }
+template <typename T> I<T> I<T>::operator -- (int) { return I (p--); }
+template <typename T> I<T> &I<T>::operator += (const difference_type &x) { p += x; return *this; }
+template <typename T> I<T> &I<T>::operator -= (const difference_type &x) { p -= x; return *this; }
+template <typename T> I<T> I<T>::operator + (const difference_type &x) const { return I (p + x); }
+template <typename T> I<T> I<T>::operator - (const difference_type &x) const { return I (p - x); }
+template <typename T> bool operator == (I<T> &x, I<T> &y) { return x.p == y.p; }
+template <typename T> bool operator == (const I<T> &x, const I<T> &y) { return x.p == y.p; }
+template <typename T> bool operator != (I<T> &x, I<T> &y) { return !(x == y); }
+template <typename T> bool operator != (const I<T> &x, const I<T> &y) { return !(x == y); }
+template <typename T> bool operator < (I<T> &x, I<T> &y) { return x.p < y.p; }
+template <typename T> bool operator < (const I<T> &x, const I<T> &y) { return x.p < y.p; }
+template <typename T> bool operator <= (I<T> &x, I<T> &y) { return x.p <= y.p; }
+template <typename T> bool operator <= (const I<T> &x, const I<T> &y) { return x.p <= y.p; }
+template <typename T> bool operator > (I<T> &x, I<T> &y) { return x.p > y.p; }
+template <typename T> bool operator > (const I<T> &x, const I<T> &y) { return x.p > y.p; }
+template <typename T> bool operator >= (I<T> &x, I<T> &y) { return x.p >= y.p; }
+template <typename T> bool operator >= (const I<T> &x, const I<T> &y) { return x.p >= y.p; }
+template <typename T> typename I<T>::difference_type operator - (I<T> &x, I<T> &y) { return x.p - y.p; }
+template <typename T> typename I<T>::difference_type operator - (const I<T> &x, const I<T> &y) { return x.p - y.p; }
+template <typename T> I<T> operator + (typename I<T>::difference_type x, const I<T> &y) { return I<T> (x + y.p); }
+
+struct R { R () {}; ~R () {}; I<int> r; };
+struct T { T () {}; virtual ~T () {}; I<int> t; };
+struct A : public R, virtual public T { A () {} I<int> a; void m1 (const I<int> &, const I<int> &); };
+template <typename Q>
+struct U { U () {}; virtual ~U () {}; Q t; };
+template <typename Q>
+struct B : public R, virtual public U<Q> { B () {} Q a; void m2 (const Q &, const Q &, const I<int> &, const I<int> &); };
+
+int d[64];
+
+void
+A::m1 (const I<int> &x, const I<int> &y)
+{
+ int w = 0;
+ #pragma omp parallel for private (a) reduction(|:w)
+ for (a = x; A::a < y - 33; a += 2)
+ w |= (1 << *A::a);
+ if (w != 0x55555555)
+ __builtin_abort ();
+ #pragma omp parallel for lastprivate (t)
+ for (T::t = x; t < y - 32; t += 3)
+ d[*T::t + 2] |= 1;
+ if (*T::t != 33)
+ __builtin_abort ();
+ for (int i = 0; i < 64; i++)
+ if (d[i] != ((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 1 : 0))
+ __builtin_abort ();
+ w = 0;
+ #pragma omp parallel for reduction(|:w)
+ for (a = x; A::a < y - 33; a += 2)
+ w |= (1 << *A::a);
+ if (w != 0x55555555)
+ __builtin_abort ();
+ #pragma omp taskloop
+ for (R::r = x; r < y - 32; R::r += 2)
+ d[*r + 8] |= 2;
+ for (int i = 0; i < 64; i++)
+ if (d[i] != (((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 1 : 0)
+ | ((i >= 8 && i < 32 + 8 && (i & 1) == 0) ? 2 : 0)))
+ __builtin_abort ();
+ #pragma omp taskloop collapse(2)
+ for (T::t = x; t < y - 57; t += 2)
+ for (a = x; A::a < y - 56; a++)
+ d[((*t << 2) | *a) + 3] |= 4;
+ for (int i = 0; i < 64; i++)
+ if (d[i] != (((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 1 : 0)
+ | ((i >= 8 && i < 32 + 8 && (i & 1) == 0) ? 2 : 0)
+ | ((i >= 3 && i < 32 + 3) ? 4 : 0)))
+ __builtin_abort ();
+}
+
+template <typename Q>
+void
+B<Q>::m2 (const Q &u, const Q &v, const I<int> &x, const I<int> &y)
+{
+ int w = 0;
+ #pragma omp parallel for private (a) reduction(|:w)
+ for (a = u; B::a < v - 33; a += 2)
+ w |= (1 << *B::a);
+ if (w != 0x55555555)
+ __builtin_abort ();
+ #pragma omp parallel for lastprivate (U<Q>::t)
+ for (U<Q>::t = u; U<Q>::t < v - 32; U<Q>::t += 3)
+ d[*U<Q>::t + 2] |= 1;
+ if (*U<Q>::t != 33)
+ __builtin_abort ();
+ for (int i = 0; i < 64; i++)
+ if (d[i] != ((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 1 : 0))
+ __builtin_abort ();
+ w = 0;
+ #pragma omp parallel for reduction(|:w)
+ for (a = u; B::a < v - 33; a += 2)
+ w |= (1 << *B::a);
+ if (w != 0x55555555)
+ __builtin_abort ();
+ #pragma omp taskloop
+ for (R::r = x; r < y - 32; R::r += 2)
+ d[*r + 8] |= 2;
+ for (int i = 0; i < 64; i++)
+ if (d[i] != (((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 1 : 0)
+ | ((i >= 8 && i < 32 + 8 && (i & 1) == 0) ? 2 : 0)))
+ __builtin_abort ();
+ #pragma omp taskloop collapse(2)
+ for (U<Q>::t = u; U<Q>::t < v - 57; U<Q>::t += 2)
+ for (a = u; B::a < v - 56; a++)
+ d[((*U<Q>::t << 2) | *a) + 3] |= 4;
+ for (int i = 0; i < 64; i++)
+ if (d[i] != (((i >= 2 && i < 32 + 2 && (i - 2) % 3 == 0) ? 1 : 0)
+ | ((i >= 8 && i < 32 + 8 && (i & 1) == 0) ? 2 : 0)
+ | ((i >= 3 && i < 32 + 3) ? 4 : 0)))
+ __builtin_abort ();
+}
+
+int
+main ()
+{
+ A a;
+ int b[128];
+ for (int i = 0; i < 128; i++)
+ b[i] = i - 32;
+ a.m1 (&b[32], &b[96]);
+ for (int i = 0; i < 64; i++)
+ d[i] = 0;
+ B<I<int> > c;
+ c.m2 (&b[32], &b[96], &b[32], &b[96]);
+ for (int i = 0; i < 64; i++)
+ d[i] = 0;
+ B<int *> d;
+ d.m2 (&b[32], &b[96], &b[32], &b[96]);
+}
diff --git a/libgomp/testsuite/libgomp.c++/ordered-1.C b/libgomp/testsuite/libgomp.c++/ordered-1.C
new file mode 100644
index 00000000000..a1bedd808ac
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/ordered-1.C
@@ -0,0 +1 @@
+#include "../libgomp.c/ordered-4.c"
diff --git a/libgomp/testsuite/libgomp.c++/reduction-10.C b/libgomp/testsuite/libgomp.c++/reduction-10.C
new file mode 100644
index 00000000000..2254430f168
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/reduction-10.C
@@ -0,0 +1,201 @@
+template <typename T>
+struct A
+{
+ A () { t = 0; }
+ A (T x) { t = x; }
+ A (const A &x) { t = x.t; }
+ ~A () {}
+ T t;
+};
+template <typename T>
+struct M
+{
+ M () { t = 1; }
+ M (T x) { t = x; }
+ M (const M &x) { t = x.t; }
+ ~M () {}
+ T t;
+};
+template <typename T>
+struct B
+{
+ B () { t = ~(T) 0; }
+ B (T x) { t = x; }
+ B (const B &x) { t = x.t; }
+ ~B () {}
+ T t;
+};
+template <typename T>
+void
+add (T &x, T &y)
+{
+ x.t += y.t;
+}
+template <typename T>
+void
+zero (T &x)
+{
+ x.t = 0;
+}
+template <typename T>
+void
+orit (T *x, T *y)
+{
+ y->t |= x->t;
+}
+B<long> bb;
+#pragma omp declare reduction(+:A<int>:omp_out.t += omp_in.t)
+#pragma omp declare reduction(+:A<char>:add (omp_out, omp_in)) initializer(zero (omp_priv))
+#pragma omp declare reduction(*:M<int>:omp_out.t *= omp_in.t) initializer(omp_priv = 1)
+#pragma omp declare reduction(|:A<unsigned long long>:orit (&omp_in, &omp_out))
+#pragma omp declare reduction(&:B<long>:omp_out.t = omp_out.t & omp_in.t) initializer(orit (&omp_priv, &omp_orig))
+#pragma omp declare reduction(maxb:short:omp_out = omp_in > omp_out ? omp_in : omp_out) initializer(omp_priv = -6)
+
+A<char> z[10];
+
+template <int N>
+__attribute__((noinline, noclone)) void
+foo (A<int> (*&x)[3][N], M<int> *y, B<long> (&w)[1][N], int p1, long p2, long p3, int p4,
+ int p5, long p6, short p7)
+{
+ A<unsigned long long> a[p7 + 4];
+ short bb[p7];
+ short (&b)[p7] = bb;
+ for (int i = 0; i < p7; i++)
+ bb[i] = -6;
+ #pragma omp parallel for reduction(+:x[0:p1 + 1][:p2 + N - 2], z[:p3]) \
+ reduction(*:y[:p4]) reduction(|:a[:p5 - N + 2]) \
+ reduction(&:w[0:p6 - 3 + N][:p6]) reduction(maxb:b)
+ for (int i = 0; i < 128; i++)
+ {
+ x[i / 64][i % 3][(i / 4) & 1].t += i;
+ if ((i & 15) == 1)
+ y[0].t *= 3;
+ if ((i & 31) == N)
+ y[1].t *= 7;
+ if ((i & 63) == 3)
+ y[N].t *= 17;
+ z[i / 32].t += (i & 3);
+ if (i < 4)
+ z[i].t += i;
+ a[i / 32].t |= 1ULL << (i & 30);
+ w[0][i & 1].t &= ~(1L << (i / 17 * 3));
+ if ((i % 79) > b[0])
+ b[0] = i % 79;
+ if ((i % 13) > b[1])
+ b[1] = i % 13;
+ if ((i % 23) > b[N])
+ b[N] = i % 23;
+ if ((i % 85) > b[3])
+ b[3] = i % 85;
+ if ((i % 192) > b[4])
+ b[4] = i % 192;
+ }
+ for (int i = 0; i < 9; i++)
+ if (a[i].t != (i < 4 ? 0x55555555ULL : 0))
+ __builtin_abort ();
+ if (bb[0] != 78 || bb[1] != 12 || bb[N] != 22 || bb[3] != 84 || bb[4] != 127)
+ __builtin_abort ();
+}
+
+A<int> a3[4][3][2];
+A<int> (*p3)[3][2] = &a3[1];
+M<int> y3[5] = { 0, 1, 1, 1, 0 };
+B<long> w3[1][2];
+
+template <int N>
+struct S
+{
+ A<int> (*&x)[3][N];
+ M<int> *y;
+ B<long> (&w)[1][N];
+ A<char> z[10];
+ short b[5];
+ A<unsigned long long> a[9];
+ S() : x(p3), y(y3+1), w(w3), z(), a(), b() {}
+ __attribute__((noinline, noclone)) void foo (int, long, long, int, int, long, short);
+};
+
+template <int N>
+void
+S<N>::foo (int p1, long p2, long p3, int p4, int p5, long p6, short p7)
+{
+ #pragma omp parallel for reduction(+:x[0:p1 + 1][:p2][0:N], z[:p3 + N - 2]) \
+ reduction(*:y[:p4]) reduction(|:a[:p5]) \
+ reduction(&:w[0:p6 - 3 + N][:p6]) reduction(maxb:b)
+ for (int i = 0; i < 128; i++)
+ {
+ x[i / 64][i % 3][(i / 4) & 1].t += i;
+ if ((i & 15) == 1)
+ y[0].t *= 3;
+ if ((i & 31) == N)
+ y[1].t *= 7;
+ if ((i & 63) == 3)
+ y[N].t *= 17;
+ z[i / 32].t += (i & 3);
+ if (i < 4)
+ z[i].t += i;
+ a[i / 32].t |= 1ULL << (i & 30);
+ w[0][i & 1].t &= ~(1L << (i / 17 * 3));
+ if ((i % 79) > b[0])
+ b[0] = i % 79;
+ if ((i % 13) > b[1])
+ b[1] = i % 13;
+ if ((i % 23) > b[N])
+ b[N] = i % 23;
+ if ((i % 85) > b[3])
+ b[3] = i % 85;
+ if ((i % 192) > b[4])
+ b[4] = i % 192;
+ }
+}
+
+int
+main ()
+{
+ A<int> a[4][3][2];
+ static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }},
+ {{ 312, 381 }, { 295, 356 }, { 337, 335 }},
+ {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }},
+ {{ 0, 0 }, { 0, 0 }, { 0, 0 }}};
+ A<int> (*p)[3][2] = &a[1];
+ M<int> y[5] = { 0, 1, 1, 1, 0 };
+ int y2[5] = { 0, 6561, 2401, 289, 0 };
+ char z2[10] = { 48, 49, 50, 51, 0, 0, 0, 0, 0, 0 };
+ B<long> w[1][2];
+ foo<2> (p, y + 1, w, 1, 3L, 4L, 3, 4, 2L, 5);
+ for (int i = 0; i < 4; i++)
+ for (int j = 0; j < 3; j++)
+ for (int k = 0; k < 2; k++)
+ if (a[i][j][k].t != a2[i][j][k])
+ __builtin_abort ();
+ for (int i = 0; i < 5; i++)
+ if (y[i].t != y2[i])
+ __builtin_abort ();
+ for (int i = 0; i < 10; i++)
+ if (z[i].t != z2[i])
+ __builtin_abort ();
+ if (w[0][0].t != ~0x249249L || w[0][1].t != ~0x249249L)
+ __builtin_abort ();
+ S<2> s;
+ s.foo (1, 3L, 4L, 3, 4, 2L, 5);
+ for (int i = 0; i < 9; i++)
+ if (s.a[i].t != (i < 4 ? 0x55555555ULL : 0))
+ __builtin_abort ();
+ for (int i = 0; i < 4; i++)
+ for (int j = 0; j < 3; j++)
+ for (int k = 0; k < 2; k++)
+ if (a3[i][j][k].t != a2[i][j][k])
+ __builtin_abort ();
+ for (int i = 0; i < 5; i++)
+ if (y3[i].t != y2[i])
+ __builtin_abort ();
+ for (int i = 0; i < 10; i++)
+ if (s.z[i].t != z2[i])
+ __builtin_abort ();
+ if (w3[0][0].t != ~0x249249L || w3[0][1].t != ~0x249249L)
+ __builtin_abort ();
+ if (s.b[0] != 78 || s.b[1] != 12 || s.b[2] != 22
+ || s.b[3] != 84 || s.b[4] != 127)
+ __builtin_abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/reduction-5.C b/libgomp/testsuite/libgomp.c++/reduction-5.C
new file mode 100644
index 00000000000..212fd69be58
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/reduction-5.C
@@ -0,0 +1,127 @@
+char z[10] = { 0 };
+
+__attribute__((noinline, noclone)) void
+foo (int (*&x)[3][2], int *y, long (&w)[1][2])
+{
+ unsigned long long a[9] = {};
+ short b[5] = {};
+ #pragma omp parallel for reduction(+:x[0:2][:][0:2], z[:4]) \
+ reduction(*:y[:3]) reduction(|:a[:4]) \
+ reduction(&:w[0:][:2]) reduction(max:b)
+ for (int i = 0; i < 128; i++)
+ {
+ x[i / 64][i % 3][(i / 4) & 1] += i;
+ if ((i & 15) == 1)
+ y[0] *= 3;
+ if ((i & 31) == 2)
+ y[1] *= 7;
+ if ((i & 63) == 3)
+ y[2] *= 17;
+ z[i / 32] += (i & 3);
+ if (i < 4)
+ z[i] += i;
+ a[i / 32] |= 1ULL << (i & 30);
+ w[0][i & 1] &= ~(1L << (i / 17 * 3));
+ if ((i % 79) > b[0])
+ b[0] = i % 79;
+ if ((i % 13) > b[1])
+ b[1] = i % 13;
+ if ((i % 23) > b[2])
+ b[2] = i % 23;
+ if ((i % 85) > b[3])
+ b[3] = i % 85;
+ if ((i % 192) > b[4])
+ b[4] = i % 192;
+ }
+ for (int i = 0; i < 9; i++)
+ if (a[i] != (i < 4 ? 0x55555555ULL : 0))
+ __builtin_abort ();
+ if (b[0] != 78 || b[1] != 12 || b[2] != 22 || b[3] != 84 || b[4] != 127)
+ __builtin_abort ();
+}
+
+int a3[4][3][2];
+int (*p3)[3][2] = &a3[1];
+int y3[5] = { 0, 1, 1, 1, 0 };
+long w3[1][2] = { ~0L, ~0L };
+short bb[5];
+
+struct S
+{
+ int (*&x)[3][2];
+ int *y;
+ long (&w)[1][2];
+ char z[10];
+ short (&b)[5];
+ unsigned long long a[9];
+ S() : x(p3), y(y3+1), w(w3), z(), a(), b(bb) {}
+ __attribute__((noinline, noclone)) void foo ();
+};
+
+void
+S::foo ()
+{
+ #pragma omp parallel for reduction(+:x[0:2][:][0:2], z[:4]) \
+ reduction(*:y[:3]) reduction(|:a[:4]) \
+ reduction(&:w[0:][:2]) reduction(max:b)
+ for (int i = 0; i < 128; i++)
+ {
+ x[i / 64][i % 3][(i / 4) & 1] += i;
+ if ((i & 15) == 1)
+ y[0] *= 3;
+ if ((i & 31) == 2)
+ y[1] *= 7;
+ if ((i & 63) == 3)
+ y[2] *= 17;
+ z[i / 32] += (i & 3);
+ if (i < 4)
+ z[i] += i;
+ a[i / 32] |= 1ULL << (i & 30);
+ w[0][i & 1] &= ~(1L << (i / 17 * 3));
+ if ((i % 79) > b[0])
+ b[0] = i % 79;
+ if ((i % 13) > b[1])
+ b[1] = i % 13;
+ if ((i % 23) > b[2])
+ b[2] = i % 23;
+ if ((i % 85) > b[3])
+ b[3] = i % 85;
+ if ((i % 192) > b[4])
+ b[4] = i % 192;
+ }
+}
+
+int
+main ()
+{
+ int a[4][3][2] = {};
+ static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }},
+ {{ 312, 381 }, { 295, 356 }, { 337, 335 }},
+ {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }},
+ {{ 0, 0 }, { 0, 0 }, { 0, 0 }}};
+ int (*p)[3][2] = &a[1];
+ int y[5] = { 0, 1, 1, 1, 0 };
+ int y2[5] = { 0, 6561, 2401, 289, 0 };
+ char z2[10] = { 48, 49, 50, 51, 0, 0, 0, 0, 0, 0 };
+ long w[1][2] = { ~0L, ~0L };
+ foo (p, y + 1, w);
+ if (__builtin_memcmp (a, a2, sizeof (a))
+ || __builtin_memcmp (y, y2, sizeof (y))
+ || __builtin_memcmp (z, z2, sizeof (z))
+ || w[0][0] != ~0x249249L
+ || w[0][1] != ~0x249249L)
+ __builtin_abort ();
+ S s;
+ s.foo ();
+ for (int i = 0; i < 9; i++)
+ if (s.a[i] != (i < 4 ? 0x55555555ULL : 0))
+ __builtin_abort ();
+ if (__builtin_memcmp (a3, a2, sizeof (a3))
+ || __builtin_memcmp (y3, y2, sizeof (y3))
+ || __builtin_memcmp (s.z, z2, sizeof (s.z))
+ || w3[0][0] != ~0x249249L
+ || w3[0][1] != ~0x249249L)
+ __builtin_abort ();
+ if (bb[0] != 78 || bb[1] != 12 || bb[2] != 22 || bb[3] != 84 || bb[4] != 127)
+ __builtin_abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/reduction-6.C b/libgomp/testsuite/libgomp.c++/reduction-6.C
new file mode 100644
index 00000000000..f180ca35edd
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/reduction-6.C
@@ -0,0 +1,195 @@
+template <typename T>
+struct A
+{
+ A () { t = 0; }
+ A (T x) { t = x; }
+ A (const A &x) { t = x.t; }
+ ~A () {}
+ T t;
+};
+template <typename T>
+struct M
+{
+ M () { t = 1; }
+ M (T x) { t = x; }
+ M (const M &x) { t = x.t; }
+ ~M () {}
+ T t;
+};
+template <typename T>
+struct B
+{
+ B () { t = ~(T) 0; }
+ B (T x) { t = x; }
+ B (const B &x) { t = x.t; }
+ ~B () {}
+ T t;
+};
+template <typename T>
+void
+add (T &x, T &y)
+{
+ x.t += y.t;
+}
+template <typename T>
+void
+zero (T &x)
+{
+ x.t = 0;
+}
+template <typename T>
+void
+orit (T *x, T *y)
+{
+ y->t |= x->t;
+}
+B<long> bb;
+#pragma omp declare reduction(+:A<int>:omp_out.t += omp_in.t)
+#pragma omp declare reduction(+:A<char>:add (omp_out, omp_in)) initializer(zero (omp_priv))
+#pragma omp declare reduction(*:M<int>:omp_out.t *= omp_in.t) initializer(omp_priv = 1)
+#pragma omp declare reduction(|:A<unsigned long long>:orit (&omp_in, &omp_out))
+#pragma omp declare reduction(&:B<long>:omp_out.t = omp_out.t & omp_in.t) initializer(orit (&omp_priv, &omp_orig))
+#pragma omp declare reduction(maxb:short:omp_out = omp_in > omp_out ? omp_in : omp_out) initializer(omp_priv = -6)
+
+A<char> z[10];
+
+__attribute__((noinline, noclone)) void
+foo (A<int> (*&x)[3][2], M<int> *y, B<long> (&w)[1][2])
+{
+ A<unsigned long long> a[9];
+ short bb[5] = {};
+ short (&b)[5] = bb;
+ #pragma omp parallel for reduction(+:x[0:2][:][0:2], z[:4]) \
+ reduction(*:y[:3]) reduction(|:a[:4]) \
+ reduction(&:w[0:][:2]) reduction(maxb:b)
+ for (int i = 0; i < 128; i++)
+ {
+ x[i / 64][i % 3][(i / 4) & 1].t += i;
+ if ((i & 15) == 1)
+ y[0].t *= 3;
+ if ((i & 31) == 2)
+ y[1].t *= 7;
+ if ((i & 63) == 3)
+ y[2].t *= 17;
+ z[i / 32].t += (i & 3);
+ if (i < 4)
+ z[i].t += i;
+ a[i / 32].t |= 1ULL << (i & 30);
+ w[0][i & 1].t &= ~(1L << (i / 17 * 3));
+ if ((i % 79) > b[0])
+ b[0] = i % 79;
+ if ((i % 13) > b[1])
+ b[1] = i % 13;
+ if ((i % 23) > b[2])
+ b[2] = i % 23;
+ if ((i % 85) > b[3])
+ b[3] = i % 85;
+ if ((i % 192) > b[4])
+ b[4] = i % 192;
+ }
+ for (int i = 0; i < 9; i++)
+ if (a[i].t != (i < 4 ? 0x55555555ULL : 0))
+ __builtin_abort ();
+ if (bb[0] != 78 || bb[1] != 12 || bb[2] != 22 || bb[3] != 84 || bb[4] != 127)
+ __builtin_abort ();
+}
+
+A<int> a3[4][3][2];
+A<int> (*p3)[3][2] = &a3[1];
+M<int> y3[5] = { 0, 1, 1, 1, 0 };
+B<long> w3[1][2];
+
+struct S
+{
+ A<int> (*&x)[3][2];
+ M<int> *y;
+ B<long> (&w)[1][2];
+ A<char> z[10];
+ short b[5];
+ A<unsigned long long> a[9];
+ S() : x(p3), y(y3+1), w(w3), z(), a(), b() {}
+ __attribute__((noinline, noclone)) void foo ();
+};
+
+void
+S::foo ()
+{
+ #pragma omp parallel for reduction(+:x[0:2][:][0:2], z[:4]) \
+ reduction(*:y[:3]) reduction(|:a[:4]) \
+ reduction(&:w[0:][:2]) reduction(maxb:b)
+ for (int i = 0; i < 128; i++)
+ {
+ x[i / 64][i % 3][(i / 4) & 1].t += i;
+ if ((i & 15) == 1)
+ y[0].t *= 3;
+ if ((i & 31) == 2)
+ y[1].t *= 7;
+ if ((i & 63) == 3)
+ y[2].t *= 17;
+ z[i / 32].t += (i & 3);
+ if (i < 4)
+ z[i].t += i;
+ a[i / 32].t |= 1ULL << (i & 30);
+ w[0][i & 1].t &= ~(1L << (i / 17 * 3));
+ if ((i % 79) > b[0])
+ b[0] = i % 79;
+ if ((i % 13) > b[1])
+ b[1] = i % 13;
+ if ((i % 23) > b[2])
+ b[2] = i % 23;
+ if ((i % 85) > b[3])
+ b[3] = i % 85;
+ if ((i % 192) > b[4])
+ b[4] = i % 192;
+ }
+}
+
+int
+main ()
+{
+ A<int> a[4][3][2];
+ static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }},
+ {{ 312, 381 }, { 295, 356 }, { 337, 335 }},
+ {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }},
+ {{ 0, 0 }, { 0, 0 }, { 0, 0 }}};
+ A<int> (*p)[3][2] = &a[1];
+ M<int> y[5] = { 0, 1, 1, 1, 0 };
+ int y2[5] = { 0, 6561, 2401, 289, 0 };
+ char z2[10] = { 48, 49, 50, 51, 0, 0, 0, 0, 0, 0 };
+ B<long> w[1][2];
+ foo (p, y + 1, w);
+ for (int i = 0; i < 4; i++)
+ for (int j = 0; j < 3; j++)
+ for (int k = 0; k < 2; k++)
+ if (a[i][j][k].t != a2[i][j][k])
+ __builtin_abort ();
+ for (int i = 0; i < 5; i++)
+ if (y[i].t != y2[i])
+ __builtin_abort ();
+ for (int i = 0; i < 10; i++)
+ if (z[i].t != z2[i])
+ __builtin_abort ();
+ if (w[0][0].t != ~0x249249L || w[0][1].t != ~0x249249L)
+ __builtin_abort ();
+ S s;
+ s.foo ();
+ for (int i = 0; i < 9; i++)
+ if (s.a[i].t != (i < 4 ? 0x55555555ULL : 0))
+ __builtin_abort ();
+ for (int i = 0; i < 4; i++)
+ for (int j = 0; j < 3; j++)
+ for (int k = 0; k < 2; k++)
+ if (a3[i][j][k].t != a2[i][j][k])
+ __builtin_abort ();
+ for (int i = 0; i < 5; i++)
+ if (y3[i].t != y2[i])
+ __builtin_abort ();
+ for (int i = 0; i < 10; i++)
+ if (s.z[i].t != z2[i])
+ __builtin_abort ();
+ if (w3[0][0].t != ~0x249249L || w3[0][1].t != ~0x249249L)
+ __builtin_abort ();
+ if (s.b[0] != 78 || s.b[1] != 12 || s.b[2] != 22
+ || s.b[3] != 84 || s.b[4] != 127)
+ __builtin_abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/reduction-7.C b/libgomp/testsuite/libgomp.c++/reduction-7.C
new file mode 100644
index 00000000000..75f9d08aac4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/reduction-7.C
@@ -0,0 +1,134 @@
+char z[10] = { 0 };
+
+__attribute__((noinline, noclone)) void
+foo (int (*&x)[3][2], int *y, long (&w)[1][2], int p1, long p2, long p3, int p4,
+ int p5, long p6, short p7)
+{
+ unsigned long long a[p7 + 4];
+ short b[p7];
+ for (int i = 0; i < p7 + 4; i++)
+ {
+ if (i < p7)
+ b[i] = -6;
+ a[i] = 0;
+ }
+ #pragma omp parallel for reduction(+:x[0:p1 + 1][:p2], z[:p3]) \
+ reduction(*:y[:p4]) reduction(|:a[:p5]) \
+ reduction(&:w[0:p6 - 1][:p6]) reduction(max:b)
+ for (int i = 0; i < 128; i++)
+ {
+ x[i / 64][i % 3][(i / 4) & 1] += i;
+ if ((i & 15) == 1)
+ y[0] *= 3;
+ if ((i & 31) == 2)
+ y[1] *= 7;
+ if ((i & 63) == 3)
+ y[2] *= 17;
+ z[i / 32] += (i & 3);
+ if (i < 4)
+ z[i] += i;
+ a[i / 32] |= 1ULL << (i & 30);
+ w[0][i & 1] &= ~(1L << (i / 17 * 3));
+ if ((i % 79) > b[0])
+ b[0] = i % 79;
+ if ((i % 13) > b[1])
+ b[1] = i % 13;
+ if ((i % 23) > b[2])
+ b[2] = i % 23;
+ if ((i % 85) > b[3])
+ b[3] = i % 85;
+ if ((i % 192) > b[4])
+ b[4] = i % 192;
+ }
+ for (int i = 0; i < 9; i++)
+ if (a[i] != (i < 4 ? 0x55555555ULL : 0))
+ __builtin_abort ();
+ if (b[0] != 78 || b[1] != 12 || b[2] != 22 || b[3] != 84 || b[4] != 127)
+ __builtin_abort ();
+}
+
+int a3[4][3][2];
+int (*p3)[3][2] = &a3[1];
+int y3[5] = { 0, 1, 1, 1, 0 };
+long w3[1][2] = { ~0L, ~0L };
+short bb[5];
+
+struct S
+{
+ int (*&x)[3][2];
+ int *y;
+ long (&w)[1][2];
+ char z[10];
+ short (&b)[5];
+ unsigned long long a[9];
+ S() : x(p3), y(y3+1), w(w3), z(), a(), b(bb) {}
+ __attribute__((noinline, noclone)) void foo (int, long, long, int, int, long, short);
+};
+
+void
+S::foo (int p1, long p2, long p3, int p4, int p5, long p6, short p7)
+{
+ #pragma omp parallel for reduction(+:x[0:p1 + 1][:p2], z[:p3]) \
+ reduction(*:y[:p4]) reduction(|:a[:p5]) \
+ reduction(&:w[0:p6 - 1][:p6]) reduction(max:b[0:p7])
+ for (int i = 0; i < 128; i++)
+ {
+ x[i / 64][i % 3][(i / 4) & 1] += i;
+ if ((i & 15) == 1)
+ y[0] *= 3;
+ if ((i & 31) == 2)
+ y[1] *= 7;
+ if ((i & 63) == 3)
+ y[2] *= 17;
+ z[i / 32] += (i & 3);
+ if (i < 4)
+ z[i] += i;
+ a[i / 32] |= 1ULL << (i & 30);
+ w[0][i & 1] &= ~(1L << (i / 17 * 3));
+ if ((i % 79) > b[0])
+ b[0] = i % 79;
+ if ((i % 13) > b[1])
+ b[1] = i % 13;
+ if ((i % 23) > b[2])
+ b[2] = i % 23;
+ if ((i % 85) > b[3])
+ b[3] = i % 85;
+ if ((i % 192) > b[4])
+ b[4] = i % 192;
+ }
+}
+
+int
+main ()
+{
+ int a[4][3][2] = {};
+ static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }},
+ {{ 312, 381 }, { 295, 356 }, { 337, 335 }},
+ {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }},
+ {{ 0, 0 }, { 0, 0 }, { 0, 0 }}};
+ int (*p)[3][2] = &a[1];
+ int y[5] = { 0, 1, 1, 1, 0 };
+ int y2[5] = { 0, 6561, 2401, 289, 0 };
+ char z2[10] = { 48, 49, 50, 51, 0, 0, 0, 0, 0, 0 };
+ long w[1][2] = { ~0L, ~0L };
+ foo (p, y + 1, w, 1, 3L, 4L, 3, 4, 2L, 5);
+ if (__builtin_memcmp (a, a2, sizeof (a))
+ || __builtin_memcmp (y, y2, sizeof (y))
+ || __builtin_memcmp (z, z2, sizeof (z))
+ || w[0][0] != ~0x249249L
+ || w[0][1] != ~0x249249L)
+ __builtin_abort ();
+ S s;
+ s.foo (1, 3L, 4L, 3, 4, 2L, 5);
+ for (int i = 0; i < 9; i++)
+ if (s.a[i] != (i < 4 ? 0x55555555ULL : 0))
+ __builtin_abort ();
+ if (__builtin_memcmp (a3, a2, sizeof (a3))
+ || __builtin_memcmp (y3, y2, sizeof (y3))
+ || __builtin_memcmp (s.z, z2, sizeof (s.z))
+ || w3[0][0] != ~0x249249L
+ || w3[0][1] != ~0x249249L)
+ __builtin_abort ();
+ if (bb[0] != 78 || bb[1] != 12 || bb[2] != 22 || bb[3] != 84 || bb[4] != 127)
+ __builtin_abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/reduction-8.C b/libgomp/testsuite/libgomp.c++/reduction-8.C
new file mode 100644
index 00000000000..cffd7cc2d4c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/reduction-8.C
@@ -0,0 +1,198 @@
+template <typename T>
+struct A
+{
+ A () { t = 0; }
+ A (T x) { t = x; }
+ A (const A &x) { t = x.t; }
+ ~A () {}
+ T t;
+};
+template <typename T>
+struct M
+{
+ M () { t = 1; }
+ M (T x) { t = x; }
+ M (const M &x) { t = x.t; }
+ ~M () {}
+ T t;
+};
+template <typename T>
+struct B
+{
+ B () { t = ~(T) 0; }
+ B (T x) { t = x; }
+ B (const B &x) { t = x.t; }
+ ~B () {}
+ T t;
+};
+template <typename T>
+void
+add (T &x, T &y)
+{
+ x.t += y.t;
+}
+template <typename T>
+void
+zero (T &x)
+{
+ x.t = 0;
+}
+template <typename T>
+void
+orit (T *x, T *y)
+{
+ y->t |= x->t;
+}
+B<long> bb;
+#pragma omp declare reduction(+:A<int>:omp_out.t += omp_in.t)
+#pragma omp declare reduction(+:A<char>:add (omp_out, omp_in)) initializer(zero (omp_priv))
+#pragma omp declare reduction(*:M<int>:omp_out.t *= omp_in.t) initializer(omp_priv = 1)
+#pragma omp declare reduction(|:A<unsigned long long>:orit (&omp_in, &omp_out))
+#pragma omp declare reduction(&:B<long>:omp_out.t = omp_out.t & omp_in.t) initializer(orit (&omp_priv, &omp_orig))
+#pragma omp declare reduction(maxb:short:omp_out = omp_in > omp_out ? omp_in : omp_out) initializer(omp_priv = -6)
+
+A<char> z[10];
+
+__attribute__((noinline, noclone)) void
+foo (A<int> (*&x)[3][2], M<int> *y, B<long> (&w)[1][2], int p1, long p2, long p3, int p4,
+ int p5, long p6, short p7)
+{
+ A<unsigned long long> a[p7 + 4];
+ short bb[p7];
+ short (&b)[p7] = bb;
+ for (int i = 0; i < p7; i++)
+ bb[i] = -6;
+ #pragma omp parallel for reduction(+:x[0:p1 + 1][:p2], z[:p3]) \
+ reduction(*:y[:p4]) reduction(|:a[:p5]) \
+ reduction(&:w[0:p6 - 1][:p6]) reduction(maxb:b)
+ for (int i = 0; i < 128; i++)
+ {
+ x[i / 64][i % 3][(i / 4) & 1].t += i;
+ if ((i & 15) == 1)
+ y[0].t *= 3;
+ if ((i & 31) == 2)
+ y[1].t *= 7;
+ if ((i & 63) == 3)
+ y[2].t *= 17;
+ z[i / 32].t += (i & 3);
+ if (i < 4)
+ z[i].t += i;
+ a[i / 32].t |= 1ULL << (i & 30);
+ w[0][i & 1].t &= ~(1L << (i / 17 * 3));
+ if ((i % 79) > b[0])
+ b[0] = i % 79;
+ if ((i % 13) > b[1])
+ b[1] = i % 13;
+ if ((i % 23) > b[2])
+ b[2] = i % 23;
+ if ((i % 85) > b[3])
+ b[3] = i % 85;
+ if ((i % 192) > b[4])
+ b[4] = i % 192;
+ }
+ for (int i = 0; i < 9; i++)
+ if (a[i].t != (i < 4 ? 0x55555555ULL : 0))
+ __builtin_abort ();
+ if (bb[0] != 78 || bb[1] != 12 || bb[2] != 22 || bb[3] != 84 || bb[4] != 127)
+ __builtin_abort ();
+}
+
+A<int> a3[4][3][2];
+A<int> (*p3)[3][2] = &a3[1];
+M<int> y3[5] = { 0, 1, 1, 1, 0 };
+B<long> w3[1][2];
+
+struct S
+{
+ A<int> (*&x)[3][2];
+ M<int> *y;
+ B<long> (&w)[1][2];
+ A<char> z[10];
+ short b[5];
+ A<unsigned long long> a[9];
+ S() : x(p3), y(y3+1), w(w3), z(), a(), b() {}
+ __attribute__((noinline, noclone)) void foo (int, long, long, int, int, long, short);
+};
+
+void
+S::foo (int p1, long p2, long p3, int p4, int p5, long p6, short p7)
+{
+ #pragma omp parallel for reduction(+:x[0:p1 + 1][:p2][0:2], z[:p3]) \
+ reduction(*:y[:p4]) reduction(|:a[:p5]) \
+ reduction(&:w[0:p6 - 1][:p6]) reduction(maxb:b)
+ for (int i = 0; i < 128; i++)
+ {
+ x[i / 64][i % 3][(i / 4) & 1].t += i;
+ if ((i & 15) == 1)
+ y[0].t *= 3;
+ if ((i & 31) == 2)
+ y[1].t *= 7;
+ if ((i & 63) == 3)
+ y[2].t *= 17;
+ z[i / 32].t += (i & 3);
+ if (i < 4)
+ z[i].t += i;
+ a[i / 32].t |= 1ULL << (i & 30);
+ w[0][i & 1].t &= ~(1L << (i / 17 * 3));
+ if ((i % 79) > b[0])
+ b[0] = i % 79;
+ if ((i % 13) > b[1])
+ b[1] = i % 13;
+ if ((i % 23) > b[2])
+ b[2] = i % 23;
+ if ((i % 85) > b[3])
+ b[3] = i % 85;
+ if ((i % 192) > b[4])
+ b[4] = i % 192;
+ }
+}
+
+int
+main ()
+{
+ A<int> a[4][3][2];
+ static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }},
+ {{ 312, 381 }, { 295, 356 }, { 337, 335 }},
+ {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }},
+ {{ 0, 0 }, { 0, 0 }, { 0, 0 }}};
+ A<int> (*p)[3][2] = &a[1];
+ M<int> y[5] = { 0, 1, 1, 1, 0 };
+ int y2[5] = { 0, 6561, 2401, 289, 0 };
+ char z2[10] = { 48, 49, 50, 51, 0, 0, 0, 0, 0, 0 };
+ B<long> w[1][2];
+ foo (p, y + 1, w, 1, 3L, 4L, 3, 4, 2L, 5);
+ for (int i = 0; i < 4; i++)
+ for (int j = 0; j < 3; j++)
+ for (int k = 0; k < 2; k++)
+ if (a[i][j][k].t != a2[i][j][k])
+ __builtin_abort ();
+ for (int i = 0; i < 5; i++)
+ if (y[i].t != y2[i])
+ __builtin_abort ();
+ for (int i = 0; i < 10; i++)
+ if (z[i].t != z2[i])
+ __builtin_abort ();
+ if (w[0][0].t != ~0x249249L || w[0][1].t != ~0x249249L)
+ __builtin_abort ();
+ S s;
+ s.foo (1, 3L, 4L, 3, 4, 2L, 5);
+ for (int i = 0; i < 9; i++)
+ if (s.a[i].t != (i < 4 ? 0x55555555ULL : 0))
+ __builtin_abort ();
+ for (int i = 0; i < 4; i++)
+ for (int j = 0; j < 3; j++)
+ for (int k = 0; k < 2; k++)
+ if (a3[i][j][k].t != a2[i][j][k])
+ __builtin_abort ();
+ for (int i = 0; i < 5; i++)
+ if (y3[i].t != y2[i])
+ __builtin_abort ();
+ for (int i = 0; i < 10; i++)
+ if (s.z[i].t != z2[i])
+ __builtin_abort ();
+ if (w3[0][0].t != ~0x249249L || w3[0][1].t != ~0x249249L)
+ __builtin_abort ();
+ if (s.b[0] != 78 || s.b[1] != 12 || s.b[2] != 22
+ || s.b[3] != 84 || s.b[4] != 127)
+ __builtin_abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/reduction-9.C b/libgomp/testsuite/libgomp.c++/reduction-9.C
new file mode 100644
index 00000000000..117a8f66c52
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/reduction-9.C
@@ -0,0 +1,130 @@
+char z[10] = { 0 };
+
+template <int N>
+__attribute__((noinline, noclone)) void
+foo (int (*&x)[3][N], int *y, long (&w)[1][N])
+{
+ unsigned long long a[9] = {};
+ short b[5] = {};
+ #pragma omp parallel for reduction(+:x[0:N][:][0:N], z[:4]) \
+ reduction(*:y[:3]) reduction(|:a[:4]) \
+ reduction(&:w[0:][:N]) reduction(max:b)
+ for (int i = 0; i < 128; i++)
+ {
+ x[i / 64][i % 3][(i / 4) & 1] += i;
+ if ((i & 15) == 1)
+ y[0] *= 3;
+ if ((i & 31) == N)
+ y[1] *= 7;
+ if ((i & 63) == 3)
+ y[N] *= 17;
+ z[i / 32] += (i & 3);
+ if (i < 4)
+ z[i] += i;
+ a[i / 32] |= 1ULL << (i & 30);
+ w[0][i & 1] &= ~(1L << (i / 17 * 3));
+ if ((i % 79) > b[0])
+ b[0] = i % 79;
+ if ((i % 13) > b[1])
+ b[1] = i % 13;
+ if ((i % 23) > b[N])
+ b[N] = i % 23;
+ if ((i % 85) > b[3])
+ b[3] = i % 85;
+ if ((i % 192) > b[4])
+ b[4] = i % 192;
+ }
+ for (int i = 0; i < 9; i++)
+ if (a[i] != (i < 4 ? 0x55555555ULL : 0))
+ __builtin_abort ();
+ if (b[0] != 78 || b[1] != 12 || b[N] != 22 || b[3] != 84 || b[4] != 127)
+ __builtin_abort ();
+}
+
+int a3[4][3][2];
+int (*p3)[3][2] = &a3[1];
+int y3[5] = { 0, 1, 1, 1, 0 };
+long w3[1][2] = { ~0L, ~0L };
+short bb[5];
+
+template <int N>
+struct S
+{
+ int (*&x)[3][N];
+ int *y;
+ long (&w)[1][N];
+ char z[10];
+ short (&b)[5];
+ unsigned long long a[9];
+ S() : x(p3), y(y3+1), w(w3), z(), a(), b(bb) {}
+ __attribute__((noinline, noclone)) void foo ();
+};
+
+template <int N>
+void
+S<N>::foo ()
+{
+ #pragma omp parallel for reduction(+:x[0:N][:][0:N], z[:4]) \
+ reduction(*:y[:3]) reduction(|:a[:4]) \
+ reduction(&:w[0:][:N]) reduction(max:b)
+ for (int i = 0; i < 128; i++)
+ {
+ x[i / 64][i % 3][(i / 4) & 1] += i;
+ if ((i & 15) == 1)
+ y[0] *= 3;
+ if ((i & 31) == N)
+ y[1] *= 7;
+ if ((i & 63) == 3)
+ y[N] *= 17;
+ z[i / 32] += (i & 3);
+ if (i < 4)
+ z[i] += i;
+ a[i / 32] |= 1ULL << (i & 30);
+ w[0][i & 1] &= ~(1L << (i / 17 * 3));
+ if ((i % 79) > b[0])
+ b[0] = i % 79;
+ if ((i % 13) > b[1])
+ b[1] = i % 13;
+ if ((i % 23) > b[N])
+ b[N] = i % 23;
+ if ((i % 85) > b[3])
+ b[3] = i % 85;
+ if ((i % 192) > b[4])
+ b[4] = i % 192;
+ }
+}
+
+int
+main ()
+{
+ int a[4][3][2] = {};
+ static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }},
+ {{ 312, 381 }, { 295, 356 }, { 337, 335 }},
+ {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }},
+ {{ 0, 0 }, { 0, 0 }, { 0, 0 }}};
+ int (*p)[3][2] = &a[1];
+ int y[5] = { 0, 1, 1, 1, 0 };
+ int y2[5] = { 0, 6561, 2401, 289, 0 };
+ char z2[10] = { 48, 49, 50, 51, 0, 0, 0, 0, 0, 0 };
+ long w[1][2] = { ~0L, ~0L };
+ foo<2> (p, y + 1, w);
+ if (__builtin_memcmp (a, a2, sizeof (a))
+ || __builtin_memcmp (y, y2, sizeof (y))
+ || __builtin_memcmp (z, z2, sizeof (z))
+ || w[0][0] != ~0x249249L
+ || w[0][1] != ~0x249249L)
+ __builtin_abort ();
+ S<2> s;
+ s.foo ();
+ for (int i = 0; i < 9; i++)
+ if (s.a[i] != (i < 4 ? 0x55555555ULL : 0))
+ __builtin_abort ();
+ if (__builtin_memcmp (a3, a2, sizeof (a3))
+ || __builtin_memcmp (y3, y2, sizeof (y3))
+ || __builtin_memcmp (s.z, z2, sizeof (s.z))
+ || w3[0][0] != ~0x249249L
+ || w3[0][1] != ~0x249249L)
+ __builtin_abort ();
+ if (bb[0] != 78 || bb[1] != 12 || bb[2] != 22 || bb[3] != 84 || bb[4] != 127)
+ __builtin_abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/reference-1.C b/libgomp/testsuite/libgomp.c++/reference-1.C
new file mode 100644
index 00000000000..f2a78614a13
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/reference-1.C
@@ -0,0 +1,57 @@
+// { dg-do run }
+
+#include <omp.h>
+
+__attribute__((noinline, noclone)) void
+foo (int &a, short &d, char &g)
+{
+ unsigned long b = 12;
+ unsigned long &c = b;
+ long long e = 21;
+ long long &f = e;
+ unsigned int h = 12;
+ unsigned int &k = h;
+ #pragma omp parallel default(none) private(a, c) firstprivate(d, f) shared(g, k)
+ {
+ int i = omp_get_thread_num ();
+ a = i;
+ c = 2 * i;
+ if (d != 27 || f != 21)
+ __builtin_abort ();
+ d = 3 * (i & 0xfff);
+ f = 4 * i;
+ #pragma omp barrier
+ if (a != i || c != 2 * i || d != 3 * (i & 0xfff) || f != 4 * i)
+ __builtin_abort ();
+ #pragma omp for lastprivate(g, k)
+ for (int j = 0; j < 32; j++)
+ {
+ g = j;
+ k = 3 * j;
+ }
+ }
+ if (g != 31 || k != 31 * 3)
+ __builtin_abort ();
+ #pragma omp parallel for firstprivate (g, k) lastprivate (g, k)
+ for (int j = 0; j < 32; j++)
+ {
+ if (g != 31 || k != 31 * 3)
+ __builtin_abort ();
+ if (j == 31)
+ {
+ g = 29;
+ k = 138;
+ }
+ }
+ if (g != 29 || k != 138)
+ __builtin_abort ();
+}
+
+int
+main ()
+{
+ int a = 5;
+ short d = 27;
+ char g = ' ';
+ foo (a, d, g);
+}
diff --git a/libgomp/testsuite/libgomp.c++/simd14.C b/libgomp/testsuite/libgomp.c++/simd14.C
new file mode 100644
index 00000000000..dc18cb619ac
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/simd14.C
@@ -0,0 +1,43 @@
+// { dg-do run }
+// { dg-options "-O2" }
+// { dg-additional-options "-msse2" { target sse2_runtime } }
+// { dg-additional-options "-mavx" { target avx_runtime } }
+
+int a[1024];
+short b[2048];
+
+static inline void
+bar (int &x, unsigned long long &y, short *&z)
+{
+ a[x] = x + y + *z;
+ x++;
+ y += 17;
+ z += 2;
+}
+
+__attribute__((noinline, noclone)) int
+foo (unsigned long long &s, short *&t)
+{
+ int i, j = 0;
+ int &r = j;
+#pragma omp parallel for simd linear(r) linear(s:17ULL) linear(t:2)
+ for (i = 0; i < 1024; i++)
+ bar (r, s, t);
+ return j;
+}
+
+int
+main ()
+{
+ int i;
+ for (i = 0; i < 2048; i++)
+ b[i] = 3 * i;
+ unsigned long long s = 12;
+ short *t = b;
+ int j = foo (s, t);
+ for (i = 0; i < 1024; i++)
+ if (a[i] != 12 + 24 * i)
+ __builtin_abort ();
+ if (j != 1024 || s != 12 + 1024 * 17ULL || t != &b[2048])
+ __builtin_abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-10.C b/libgomp/testsuite/libgomp.c++/target-10.C
new file mode 100644
index 00000000000..860773eed15
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-10.C
@@ -0,0 +1,154 @@
+extern "C" void abort (void);
+union U { int x; long long y; };
+struct T { int a; union U b; int c; };
+struct S { int s; int u; T v; int x[10]; union U w; int y[10]; int z[10]; };
+volatile int z;
+
+template <typename R>
+void
+foo ()
+{
+ R s;
+ s.template s = 0;
+ s.u = 1;
+ s.v.a = 2;
+ s.v.b.y = 3LL;
+ s.v.c = 19;
+ s.w.x = 4;
+ s.template x[0] = 7;
+ s.x[1] = 8;
+ s.y[3] = 9;
+ s.y[4] = 10;
+ s.y[5] = 11;
+ int err = 0;
+ #pragma omp target map (to:s.template v.template b, s.u, s.x[0:z + 2]) \
+ map (tofrom:s.y[3:3]) \
+ map (from: s.w, s.template z[z + 1:z + 3], err)
+ {
+ err = 0;
+ if (s.u != 1 || s.v.b.y != 3LL || s.x[0] != 7 || s.x[1] != 8
+ || s.y[3] != 9 || s.y[4] != 10 || s.y[5] != 11)
+ err = 1;
+ s.w.x = 6;
+ s.y[3] = 12;
+ s.y[4] = 13;
+ s.y[5] = 14;
+ s.z[1] = 15;
+ s.z[2] = 16;
+ s.z[3] = 17;
+ }
+ if (err || s.w.x != 6 || s.y[3] != 12 || s.y[4] != 13 || s.y[5] != 14
+ || s.z[1] != 15 || s.z[2] != 16 || s.z[3] != 17)
+ abort ();
+ s.u++;
+ s.v.a++;
+ s.v.b.y++;
+ s.w.x++;
+ s.x[1] = 18;
+ s.z[0] = 19;
+ #pragma omp target data map (tofrom: s)
+ #pragma omp target map (always to: s.template w, s.x[1], err) map (alloc:s.u, s. template v.template b, s.z[z:z + 1])
+ {
+ err = 0;
+ if (s.u != 2 || s.v.b.y != 4LL || s.w.x != 7 || s.x[1] != 18 || s.z[0] != 19)
+ err = 1;
+ s.w.x = 8;
+ s.x[1] = 20;
+ s.z[0] = 21;
+ }
+ if (err || s.w.x != 8 || s.x[1] != 20 || s.z[0] != 21)
+ abort ();
+ s.u++;
+ s.v.a++;
+ s.v.b.y++;
+ s.w.x++;
+ s.x[0] = 22;
+ s.x[1] = 23;
+ #pragma omp target data map (from: s.w, s.x[0:2]) map (to: s.v.b, s.u)
+ #pragma omp target map (always to: s.w, s.x[0:2], err) map (alloc:s.u, s.v.b)
+ {
+ err = 0;
+ if (s.u != 3 || s.v.b.y != 5LL || s.w.x != 9 || s.x[0] != 22 || s.x[1] != 23)
+ err = 1;
+ s.w.x = 11;
+ s.x[0] = 24;
+ s.x[1] = 25;
+ }
+ if (err || s.w.x != 11 || s.x[0] != 24 || s.x[1] != 25)
+ abort ();
+}
+
+int
+main ()
+{
+ S s;
+ s.s = 0;
+ s.u = 1;
+ s.v.a = 2;
+ s.v.b.y = 3LL;
+ s.v.c = 19;
+ s.w.x = 4;
+ s.x[0] = 7;
+ s.x[1] = 8;
+ s.y[3] = 9;
+ s.y[4] = 10;
+ s.y[5] = 11;
+ int err = 0;
+ #pragma omp target map (to:s.v.b, s.u, s.x[0:z + 2]) \
+ map (tofrom:s.y[3:3]) \
+ map (from: s.w, s.z[z + 1:z + 3], err)
+ {
+ err = 0;
+ if (s.u != 1 || s.v.b.y != 3LL || s.x[0] != 7 || s.x[1] != 8
+ || s.y[3] != 9 || s.y[4] != 10 || s.y[5] != 11)
+ err = 1;
+ s.w.x = 6;
+ s.y[3] = 12;
+ s.y[4] = 13;
+ s.y[5] = 14;
+ s.z[1] = 15;
+ s.z[2] = 16;
+ s.z[3] = 17;
+ }
+ if (err || s.w.x != 6 || s.y[3] != 12 || s.y[4] != 13 || s.y[5] != 14
+ || s.z[1] != 15 || s.z[2] != 16 || s.z[3] != 17)
+ abort ();
+ s.u++;
+ s.v.a++;
+ s.v.b.y++;
+ s.w.x++;
+ s.x[1] = 18;
+ s.z[0] = 19;
+ #pragma omp target data map (tofrom: s)
+ #pragma omp target map (always to: s.w, s.x[1], err) map (alloc:s.u, s.v.b, s.z[z:z + 1])
+ {
+ err = 0;
+ if (s.u != 2 || s.v.b.y != 4LL || s.w.x != 7 || s.x[1] != 18 || s.z[0] != 19)
+ err = 1;
+ s.w.x = 8;
+ s.x[1] = 20;
+ s.z[0] = 21;
+ }
+ if (err || s.w.x != 8 || s.x[1] != 20 || s.z[0] != 21)
+ abort ();
+ s.u++;
+ s.v.a++;
+ s.v.b.y++;
+ s.w.x++;
+ s.x[0] = 22;
+ s.x[1] = 23;
+ #pragma omp target data map (from: s.w, s.x[0:2]) map (to: s.v.b, s.u)
+ #pragma omp target map (always to: s.w, s.x[0:2], err) map (alloc:s.u, s.v.b)
+ {
+ err = 0;
+ if (s.u != 3 || s.v.b.y != 5LL || s.w.x != 9 || s.x[0] != 22 || s.x[1] != 23)
+ err = 1;
+ s.w.x = 11;
+ s.x[0] = 24;
+ s.x[1] = 25;
+ }
+ if (err || s.w.x != 11 || s.x[0] != 24 || s.x[1] != 25)
+ abort ();
+ foo <S> ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-11.C b/libgomp/testsuite/libgomp.c++/target-11.C
new file mode 100644
index 00000000000..fe99603351d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-11.C
@@ -0,0 +1,121 @@
+extern "C" void abort ();
+struct T { int a; int *b; int c; char (&d)[10]; };
+struct S { int *s; char *u; T v; short *w; short *&x; };
+volatile int z;
+
+template <typename A, typename B, typename C, typename D>
+void
+foo ()
+{
+ A d[10];
+ B *e;
+ C a[32], i;
+ A b[32];
+ B c[32];
+ for (i = 0; i < 32; i++)
+ {
+ a[i] = i;
+ b[i] = 32 + i;
+ c[i] = 64 + i;
+ }
+ for (i = 0; i < 10; i++)
+ d[i] = 17 + i;
+ e = c + 18;
+ D s = { a, b + 2, { 0, a + 16, 0, d }, c + 3, e };
+ int err = 0;
+ #pragma omp target map (to:s.v.b[0:z + 7], s.template u[z + 1:z + 4]) \
+ map (tofrom:s.s[3:3], s. template v. template d[z + 1:z + 3]) \
+ map (from: s.w[z:4], s.x[1:3], err) private (i)
+ {
+ err = 0;
+ for (i = 0; i < 7; i++)
+ if (s.v.b[i] != 16 + i)
+ err = 1;
+ for (i = 1; i < 5; i++)
+ if (s.u[i] != 34 + i)
+ err = 1;
+ for (i = 3; i < 6; i++)
+ if (s.s[i] != i)
+ err = 1;
+ else
+ s.s[i] = 128 + i;
+ for (i = 1; i < 4; i++)
+ if (s.v.d[i] != 17 + i)
+ err = 1;
+ else
+ s.v.d[i] = 23 + i;
+ for (i = 0; i < 4; i++)
+ s.w[i] = 96 + i;
+ for (i = 1; i < 4; i++)
+ s.x[i] = 173 + i;
+ }
+ if (err)
+ abort ();
+ for (i = 0; i < 32; i++)
+ if (a[i] != ((i >= 3 && i < 6) ? 128 + i : i)
+ || b[i] != 32 + i
+ || c[i] != ((i >= 3 && i < 7) ? 93 + i : ((i >= 19 && i < 22) ? 155 + i : 64 + i)))
+ abort ();
+ for (i = 0; i < 10; i++)
+ if (d[i] != ((i >= 1 && i < 4) ? 23 + i : 17 + i))
+ abort ();
+}
+
+int
+main ()
+{
+ char d[10];
+ short *e;
+ int a[32], i;
+ char b[32];
+ short c[32];
+ for (i = 0; i < 32; i++)
+ {
+ a[i] = i;
+ b[i] = 32 + i;
+ c[i] = 64 + i;
+ }
+ for (i = 0; i < 10; i++)
+ d[i] = 17 + i;
+ e = c + 18;
+ S s = { a, b + 2, { 0, a + 16, 0, d }, c + 3, e };
+ int err = 0;
+ #pragma omp target map (to:s.v.b[0:z + 7], s.u[z + 1:z + 4]) \
+ map (tofrom:s.s[3:3], s.v.d[z + 1:z + 3]) \
+ map (from: s.w[z:4], s.x[1:3], err) private (i)
+ {
+ err = 0;
+ for (i = 0; i < 7; i++)
+ if (s.v.b[i] != 16 + i)
+ err = 1;
+ for (i = 1; i < 5; i++)
+ if (s.u[i] != 34 + i)
+ err = 1;
+ for (i = 3; i < 6; i++)
+ if (s.s[i] != i)
+ err = 1;
+ else
+ s.s[i] = 128 + i;
+ for (i = 1; i < 4; i++)
+ if (s.v.d[i] != 17 + i)
+ err = 1;
+ else
+ s.v.d[i] = 23 + i;
+ for (i = 0; i < 4; i++)
+ s.w[i] = 96 + i;
+ for (i = 1; i < 4; i++)
+ s.x[i] = 173 + i;
+ }
+ if (err)
+ abort ();
+ for (i = 0; i < 32; i++)
+ if (a[i] != ((i >= 3 && i < 6) ? 128 + i : i)
+ || b[i] != 32 + i
+ || c[i] != ((i >= 3 && i < 7) ? 93 + i : ((i >= 19 && i < 22) ? 155 + i : 64 + i)))
+ abort ();
+ for (i = 0; i < 10; i++)
+ if (d[i] != ((i >= 1 && i < 4) ? 23 + i : 17 + i))
+ abort ();
+ foo <char, short, int, S> ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-12.C b/libgomp/testsuite/libgomp.c++/target-12.C
new file mode 100644
index 00000000000..3b4ed57df68
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-12.C
@@ -0,0 +1,93 @@
+extern "C" void abort (void);
+struct S { int s; int *u; int v[5]; };
+volatile int z;
+
+template <typename T>
+void
+foo ()
+{
+ int u[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, err = 0;
+ T s = { 9, u + 3, { 10, 11, 12, 13, 14 } };
+ int *v = u + 4;
+ #pragma omp target enter data map (to: s.s, s.template u[0:5]) map (alloc: s.template v[1:3])
+ s.s++;
+ u[3]++;
+ s.v[1]++;
+ #pragma omp target update to (s.template s) to (s.u[0:2], s.v[1:3])
+ #pragma omp target map (alloc: s.s, s.v[1:3]) map (from: err)
+ {
+ err = 0;
+ if (s.s != 10 || s.v[1] != 12 || s.v[2] != 12 || s.v[3] != 13)
+ err = 1;
+ if (v[-1] != 4 || v[0] != 4 || v[1] != 5 || v[2] != 6 || v[3] != 7)
+ err = 1;
+ s.s++;
+ s.v[2] += 2;
+ v[-1] = 5;
+ v[3] = 9;
+ }
+ if (err)
+ abort ();
+ #pragma omp target map (alloc: s.u[0:5])
+ {
+ err = 0;
+ if (s.u[0] != 5 || s.u[1] != 4 || s.u[2] != 5 || s.u[3] != 6 || s.u[4] != 9)
+ err = 1;
+ s.u[1] = 12;
+ }
+ #pragma omp target update from (s.s, s.u[0:5]) from (s.v[1:3])
+ if (err || s.s != 11 || u[0] != 0 || u[1] != 1 || u[2] != 2 || u[3] != 5
+ || u[4] != 12 || u[5] != 5 || u[6] != 6 || u[7] != 9 || u[8] != 8
+ || u[9] != 9 || s.v[0] != 10 || s.v[1] != 12 || s.v[2] != 14
+ || s.v[3] != 13 || s.v[4] != 14)
+ abort ();
+ #pragma omp target exit data map (release: s.s)
+ #pragma omp target exit data map (release: s.u[0:5])
+ #pragma omp target exit data map (delete: s.v[1:3])
+ #pragma omp target exit data map (release: s.s)
+}
+
+int
+main ()
+{
+ int u[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, err = 0;
+ S s = { 9, u + 3, { 10, 11, 12, 13, 14 } };
+ int *v = u + 4;
+ #pragma omp target enter data map (to: s.s, s.u[0:5]) map (alloc: s.v[1:3])
+ s.s++;
+ u[3]++;
+ s.v[1]++;
+ #pragma omp target update to (s.s) to (s.u[0:2], s.v[1:3])
+ #pragma omp target map (alloc: s.s, s.v[1:3]) map (from: err)
+ {
+ err = 0;
+ if (s.s != 10 || s.v[1] != 12 || s.v[2] != 12 || s.v[3] != 13)
+ err = 1;
+ if (v[-1] != 4 || v[0] != 4 || v[1] != 5 || v[2] != 6 || v[3] != 7)
+ err = 1;
+ s.s++;
+ s.v[2] += 2;
+ v[-1] = 5;
+ v[3] = 9;
+ }
+ if (err)
+ abort ();
+ #pragma omp target map (alloc: s.u[0:5])
+ {
+ err = 0;
+ if (s.u[0] != 5 || s.u[1] != 4 || s.u[2] != 5 || s.u[3] != 6 || s.u[4] != 9)
+ err = 1;
+ s.u[1] = 12;
+ }
+ #pragma omp target update from (s.s, s.u[0:5]) from (s.v[1:3])
+ if (err || s.s != 11 || u[0] != 0 || u[1] != 1 || u[2] != 2 || u[3] != 5
+ || u[4] != 12 || u[5] != 5 || u[6] != 6 || u[7] != 9 || u[8] != 8
+ || u[9] != 9 || s.v[0] != 10 || s.v[1] != 12 || s.v[2] != 14
+ || s.v[3] != 13 || s.v[4] != 14)
+ abort ();
+ #pragma omp target exit data map (release: s.s)
+ #pragma omp target exit data map (release: s.u[0:5])
+ #pragma omp target exit data map (always, delete: s.v[1:3])
+ #pragma omp target exit data map (release: s.s)
+ #pragma omp target exit data map (always delete : s.v[1:3])
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-2.C b/libgomp/testsuite/libgomp.c++/target-2.C
index 35e910acc2e..1eab7f29b4a 100644
--- a/libgomp/testsuite/libgomp.c++/target-2.C
+++ b/libgomp/testsuite/libgomp.c++/target-2.C
@@ -33,7 +33,8 @@ fn2 (int x, double (&dr) [1024], double *&er)
int j;
fn1 (hr + 2 * x, ir + 2 * x, x);
#pragma omp target map(to: br[:x], cr[0:x], dr[x:x], er[x:x]) \
- map(to: fr[0:x], gr[0:x], hr[2 * x:x], ir[2 * x:x])
+ map(to: fr[0:x], gr[0:x], hr[2 * x:x], ir[2 * x:x]) \
+ map(tofrom: s)
#pragma omp parallel for reduction(+:s)
for (j = 0; j < x; j++)
s += br[j] * cr[j] + dr[x + j] + er[x + j]
diff --git a/libgomp/testsuite/libgomp.c++/target-5.C b/libgomp/testsuite/libgomp.c++/target-5.C
new file mode 100644
index 00000000000..6639be394c6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-5.C
@@ -0,0 +1 @@
+#include "../libgomp.c/target-13.c"
diff --git a/libgomp/testsuite/libgomp.c++/target-6.C b/libgomp/testsuite/libgomp.c++/target-6.C
new file mode 100644
index 00000000000..8dbafb0437b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-6.C
@@ -0,0 +1,64 @@
+extern "C" void abort (void);
+struct S { int s, t; };
+
+void
+foo (int &x, int &y, S &u, S &v, double &s, double &t)
+{
+ int err = 0, i;
+ int a[y - 2], b[y - 2];
+ int (&c)[y - 2] = a, (&d)[y - 2] = b;
+ for (i = 0; i < y - 2; i++)
+ {
+ c[i] = i;
+ d[i] = 3 + i;
+ }
+ #pragma omp target private (x, u, s, c, i) firstprivate (y, v, t, d) map(from:err)
+ {
+ x = y;
+ u = v;
+ s = t;
+ for (i = 0; i < y - 2; i++)
+ c[i] = d[i];
+ err = (x != 6 || y != 6
+ || u.s != 9 || u.t != 10 || v.s != 9 || v.t != 10
+ || s != 12.5 || t != 12.5);
+ for (i = 0; i < y - 2; i++)
+ if (d[i] != 3 + i || c[i] != 3 + i)
+ err = 1;
+ else
+ {
+ c[i] += 2 * i;
+ d[i] += i;
+ }
+ x += 1;
+ y += 2;
+ u.s += 3;
+ v.t += 4;
+ s += 2.5;
+ t += 3.0;
+ if (x != 7 || y != 8
+ || u.s != 12 || u.t != 10 || v.s != 9 || v.t != 14
+ || s != 15.0 || t != 15.5)
+ err = 1;
+ for (i = 0; i < y - 4; i++)
+ if (d[i] != 3 + 2 * i || c[i] != 3 + 3 * i)
+ err = 1;
+ }
+ if (err || x != 5 || y != 6
+ || u.s != 7 || u.t != 8 || v.s != 9 || v.t != 10
+ || s != 11.5 || t != 12.5)
+ abort ();
+ for (i = 0; i < y - 2; i++)
+ if (d[i] != 3 + i || c[i] != i)
+ abort ();
+}
+
+int
+main ()
+{
+ int x = 5, y = 6;
+ S u = { 7, 8 }, v = { 9, 10 };
+ double s = 11.5, t = 12.5;
+ foo (x, y, u, v, s, t);
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-7.C b/libgomp/testsuite/libgomp.c++/target-7.C
new file mode 100644
index 00000000000..e13c50f26da
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-7.C
@@ -0,0 +1,90 @@
+extern "C" void abort ();
+
+void
+foo (int *x, int *&y, int (&z)[15])
+{
+ int a[10], b[15], err, i;
+ for (i = 0; i < 10; i++)
+ a[i] = 7 * i;
+ for (i = 0; i < 15; i++)
+ b[i] = 8 * i;
+ #pragma omp target map(to:x[5:10], y[5:10], z[5:10], a[0:10], b[5:10]) map(from:err)
+ {
+ err = 0;
+ for (i = 0; i < 10; i++)
+ if (x[5 + i] != 20 + 4 * i
+ || y[5 + i] != 25 + 5 * i
+ || z[5 + i] != 30 + 6 * i
+ || a[i] != 7 * i
+ || b[5 + i] != 40 + 8 * i)
+ err = 1;
+ }
+ if (err)
+ abort ();
+}
+
+void
+bar (int n, int v)
+{
+ int a[n], b[n], c[n], d[n], e[n], err, i;
+ int (*x)[n] = &c;
+ int (*y2)[n] = &d;
+ int (*&y)[n] = y2;
+ int (&z)[n] = e;
+ for (i = 0; i < n; i++)
+ {
+ (*x)[i] = 4 * i;
+ (*y)[i] = 5 * i;
+ z[i] = 6 * i;
+ a[i] = 7 * i;
+ b[i] = 8 * i;
+ }
+ #pragma omp target map(to:x[0][5:10], y[0][5:10], z[5:10], a[0:10], b[5:10]) map(from:err)
+ {
+ err = 0;
+ for (i = 0; i < 10; i++)
+ if ((*x)[5 + i] != 20 + 4 * i
+ || (*y)[5 + i] != 25 + 5 * i
+ || z[5 + i] != 30 + 6 * i
+ || a[i] != 7 * i
+ || b[5 + i] != 40 + 8 * i)
+ err = 1;
+ }
+ if (err)
+ abort ();
+ for (i = 0; i < n; i++)
+ {
+ (*x)[i] = 9 * i;
+ (*y)[i] = 10 * i;
+ z[i] = 11 * i;
+ a[i] = 12 * i;
+ b[i] = 13 * i;
+ }
+ #pragma omp target map(to:x[0][v:v+5], y[0][v:v+5], z[v:v+5], a[v-5:v+5], b[v:v+5]) map(from:err)
+ {
+ err = 0;
+ for (i = 0; i < 10; i++)
+ if ((*x)[5 + i] != 45 + 9 * i
+ || (*y)[5 + i] != 50 + 10 * i
+ || z[5 + i] != 55 + 11 * i
+ || a[i] != 12 * i
+ || b[5 + i] != 65 + 13 * i)
+ err = 1;
+ }
+ if (err)
+ abort ();
+}
+
+int
+main ()
+{
+ int x[15], y2[15], z[15], *y = y2, i;
+ for (i = 0; i < 15; i++)
+ {
+ x[i] = 4 * i;
+ y[i] = 5 * i;
+ z[i] = 6 * i;
+ }
+ foo (x, y, z);
+ bar (15, 5);
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-8.C b/libgomp/testsuite/libgomp.c++/target-8.C
new file mode 100644
index 00000000000..d886b476754
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-8.C
@@ -0,0 +1,58 @@
+extern "C" void abort ();
+struct S { int a; };
+#ifdef __SIZEOF_INT128__
+typedef __int128 T;
+#else
+typedef long long int T;
+#endif
+
+void
+foo (T a, int b, struct S c)
+{
+ int err;
+ #pragma omp target firstprivate (a, b, c) map(from:err)
+ {
+ err = 0;
+ if (a != 131 || b != 276 || c.a != 59)
+ err = 1;
+ a = 936;
+ b = 27;
+ c.a = 98;
+ if (a != 936 || b != 27 || c.a != 98)
+ err = 1;
+ }
+ if (err || a != 131 || b != 276 || c.a != 59)
+ abort ();
+}
+
+void
+bar (T &a, int &b, struct S &c)
+{
+ int err;
+ #pragma omp target firstprivate (a, b, c) map(from:err)
+ {
+ err = 0;
+ if (a != 131 || b != 276 || c.a != 59)
+ err = 1;
+ a = 936;
+ b = 27;
+ c.a = 98;
+ if (a != 936 || b != 27 || c.a != 98)
+ err = 1;
+ }
+ if (err || a != 131 || b != 276 || c.a != 59)
+ abort ();
+}
+
+int
+main ()
+{
+ T a = 131;
+ int b = 276;
+ struct S c;
+ c.a = 59;
+ foo (a, b, c);
+ bar (a, b, c);
+ if (a != 131 || b != 276 || c.a != 59)
+ abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-9.C b/libgomp/testsuite/libgomp.c++/target-9.C
new file mode 100644
index 00000000000..a5d171b0b3d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-9.C
@@ -0,0 +1,73 @@
+extern "C" void abort (void);
+
+void
+foo (int *&p, int (&s)[5], int n)
+{
+ int a[4] = { 7, 8, 9, 10 }, b[n], c[3] = { 20, 21, 22 };
+ int *r = a + 1, *q = p - 1, i, err;
+ for (i = 0; i < n; i++)
+ b[i] = 9 + i;
+ #pragma omp target data map(to:a)
+ #pragma omp target data use_device_ptr(r) map(from:err)
+ #pragma omp target is_device_ptr(r) private(i) map(from:err)
+ {
+ err = 0;
+ for (i = 0; i < 4; i++)
+ if (r[i - 1] != 7 + i)
+ err = 1;
+ }
+ if (err)
+ abort ();
+ #pragma omp target data map(to:q[:4])
+ #pragma omp target data use_device_ptr(p) map(from:err)
+ #pragma omp target is_device_ptr(p) private(i) map(from:err)
+ {
+ err = 0;
+ for (i = 0; i < 4; i++)
+ if (p[i - 1] != i)
+ err = 1;
+ }
+ if (err)
+ abort ();
+ #pragma omp target data map(to:b)
+ #pragma omp target data use_device_ptr(b) map(from:err)
+ #pragma omp target is_device_ptr(b) private(i) map(from:err)
+ {
+ err = 0;
+ for (i = 0; i < n; i++)
+ if (b[i] != 9 + i)
+ err = 1;
+ }
+ if (err)
+ abort ();
+ #pragma omp target data map(to:c)
+ #pragma omp target data use_device_ptr(c) map(from:err)
+ #pragma omp target is_device_ptr(c) private(i) map(from:err)
+ {
+ err = 0;
+ for (i = 0; i < 3; i++)
+ if (c[i] != 20 + i)
+ err = 1;
+ }
+ if (err)
+ abort ();
+ #pragma omp target data map(to:s[:5])
+ #pragma omp target data use_device_ptr(s) map(from:err)
+ #pragma omp target is_device_ptr(s) private(i) map(from:err)
+ {
+ err = 0;
+ for (i = 0; i < 5; i++)
+ if (s[i] != 17 + i)
+ err = 1;
+ }
+ if (err)
+ abort ();
+}
+
+int
+main ()
+{
+ int a[4] = { 0, 1, 2, 3 }, b[5] = { 17, 18, 19, 20, 21 };
+ int *p = a + 1;
+ foo (p, b, 9);
+}
diff --git a/libgomp/testsuite/libgomp.c++/taskloop-1.C b/libgomp/testsuite/libgomp.c++/taskloop-1.C
new file mode 100644
index 00000000000..66f8e0b1d7c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/taskloop-1.C
@@ -0,0 +1,4 @@
+// { dg-do run }
+// { dg-options "-O2 -fopenmp" }
+
+#include "../libgomp.c/taskloop-1.c"
diff --git a/libgomp/testsuite/libgomp.c++/taskloop-2.C b/libgomp/testsuite/libgomp.c++/taskloop-2.C
new file mode 100644
index 00000000000..67a0e92717e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/taskloop-2.C
@@ -0,0 +1,6 @@
+// { dg-do run }
+// { dg-options "-O2" }
+// { dg-additional-options "-msse2" { target sse2_runtime } }
+// { dg-additional-options "-mavx" { target avx_runtime } }
+
+#include "../libgomp.c/taskloop-2.c"
diff --git a/libgomp/testsuite/libgomp.c++/taskloop-3.C b/libgomp/testsuite/libgomp.c++/taskloop-3.C
new file mode 100644
index 00000000000..bfd793c1c58
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/taskloop-3.C
@@ -0,0 +1,4 @@
+// { dg-do run }
+// { dg-options "-O2 -fopenmp" }
+
+#include "../libgomp.c/taskloop-3.c"
diff --git a/libgomp/testsuite/libgomp.c++/taskloop-4.C b/libgomp/testsuite/libgomp.c++/taskloop-4.C
new file mode 100644
index 00000000000..937cfcc0029
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/taskloop-4.C
@@ -0,0 +1,4 @@
+// { dg-do run }
+// { dg-options "-O2 -fopenmp" }
+
+#include "../libgomp.c/taskloop-4.c"
diff --git a/libgomp/testsuite/libgomp.c++/taskloop-5.C b/libgomp/testsuite/libgomp.c++/taskloop-5.C
new file mode 100644
index 00000000000..eb464467b66
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/taskloop-5.C
@@ -0,0 +1,73 @@
+#include <omp.h>
+
+__attribute__((noinline, noclone)) void
+foo (int &b)
+{
+#pragma omp parallel
+#pragma omp single
+ {
+ bool f = false;
+ #pragma omp taskloop firstprivate (b, f)
+ for (int i = 0; i < 30; i++)
+ {
+ int q = omp_get_thread_num ();
+ if (!f)
+ {
+ if (b != 2)
+ __builtin_abort ();
+ }
+ else if (b != 8 * q)
+ __builtin_abort ();
+ b = 8 * q;
+ f = true;
+ }
+ }
+ int n;
+#pragma omp parallel
+#pragma omp single
+ {
+ bool f = false;
+ #pragma omp taskloop firstprivate (f) lastprivate (b, n)
+ for (int i = 0; i < 30; i++)
+ {
+ int q = omp_get_thread_num ();
+ if (f && b != 8 * q)
+ __builtin_abort ();
+ b = 8 * q;
+ n = q;
+ f = true;
+ }
+ }
+ if (b != 8 * n)
+ __builtin_abort ();
+ b = 9;
+#pragma omp parallel
+#pragma omp single
+ {
+ bool f = false;
+ #pragma omp taskloop firstprivate (b, f) lastprivate (b, n)
+ for (int i = 0; i < 30; i++)
+ {
+ int q = omp_get_thread_num ();
+ if (!f)
+ {
+ if (b != 9)
+ __builtin_abort ();
+ }
+ else if (b != 11 * q)
+ __builtin_abort ();
+ b = 11 * q;
+ n = q;
+ f = true;
+ }
+ }
+ if (b != 11 * n)
+ __builtin_abort ();
+}
+
+int
+main ()
+{
+ int b = 2;
+ foo (b);
+}
diff --git a/libgomp/testsuite/libgomp.c++/taskloop-6.C b/libgomp/testsuite/libgomp.c++/taskloop-6.C
new file mode 100644
index 00000000000..edf7f7a371b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/taskloop-6.C
@@ -0,0 +1,442 @@
+// { dg-do run }
+
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+extern "C" void abort ();
+
+template <typename T>
+class I
+{
+public:
+ typedef ptrdiff_t difference_type;
+ I ();
+ ~I ();
+ I (T *);
+ I (const I &);
+ T &operator * ();
+ T *operator -> ();
+ T &operator [] (const difference_type &) const;
+ I &operator = (const I &);
+ I &operator ++ ();
+ I operator ++ (int);
+ I &operator -- ();
+ I operator -- (int);
+ I &operator += (const difference_type &);
+ I &operator -= (const difference_type &);
+ I operator + (const difference_type &) const;
+ I operator - (const difference_type &) const;
+ template <typename S> friend bool operator == (I<S> &, I<S> &);
+ template <typename S> friend bool operator == (const I<S> &, const I<S> &);
+ template <typename S> friend bool operator < (I<S> &, I<S> &);
+ template <typename S> friend bool operator < (const I<S> &, const I<S> &);
+ template <typename S> friend bool operator <= (I<S> &, I<S> &);
+ template <typename S> friend bool operator <= (const I<S> &, const I<S> &);
+ template <typename S> friend bool operator > (I<S> &, I<S> &);
+ template <typename S> friend bool operator > (const I<S> &, const I<S> &);
+ template <typename S> friend bool operator >= (I<S> &, I<S> &);
+ template <typename S> friend bool operator >= (const I<S> &, const I<S> &);
+ template <typename S> friend typename I<S>::difference_type operator - (I<S> &, I<S> &);
+ template <typename S> friend typename I<S>::difference_type operator - (const I<S> &, const I<S> &);
+ template <typename S> friend I<S> operator + (typename I<S>::difference_type , const I<S> &);
+private:
+ T *p;
+};
+template <typename T> I<T>::I () : p (0) {}
+template <typename T> I<T>::~I () {}
+template <typename T> I<T>::I (T *x) : p (x) {}
+template <typename T> I<T>::I (const I &x) : p (x.p) {}
+template <typename T> T &I<T>::operator * () { return *p; }
+template <typename T> T *I<T>::operator -> () { return p; }
+template <typename T> T &I<T>::operator [] (const difference_type &x) const { return p[x]; }
+template <typename T> I<T> &I<T>::operator = (const I &x) { p = x.p; return *this; }
+template <typename T> I<T> &I<T>::operator ++ () { ++p; return *this; }
+template <typename T> I<T> I<T>::operator ++ (int) { return I (p++); }
+template <typename T> I<T> &I<T>::operator -- () { --p; return *this; }
+template <typename T> I<T> I<T>::operator -- (int) { return I (p--); }
+template <typename T> I<T> &I<T>::operator += (const difference_type &x) { p += x; return *this; }
+template <typename T> I<T> &I<T>::operator -= (const difference_type &x) { p -= x; return *this; }
+template <typename T> I<T> I<T>::operator + (const difference_type &x) const { return I (p + x); }
+template <typename T> I<T> I<T>::operator - (const difference_type &x) const { return I (p - x); }
+template <typename T> bool operator == (I<T> &x, I<T> &y) { return x.p == y.p; }
+template <typename T> bool operator == (const I<T> &x, const I<T> &y) { return x.p == y.p; }
+template <typename T> bool operator != (I<T> &x, I<T> &y) { return !(x == y); }
+template <typename T> bool operator != (const I<T> &x, const I<T> &y) { return !(x == y); }
+template <typename T> bool operator < (I<T> &x, I<T> &y) { return x.p < y.p; }
+template <typename T> bool operator < (const I<T> &x, const I<T> &y) { return x.p < y.p; }
+template <typename T> bool operator <= (I<T> &x, I<T> &y) { return x.p <= y.p; }
+template <typename T> bool operator <= (const I<T> &x, const I<T> &y) { return x.p <= y.p; }
+template <typename T> bool operator > (I<T> &x, I<T> &y) { return x.p > y.p; }
+template <typename T> bool operator > (const I<T> &x, const I<T> &y) { return x.p > y.p; }
+template <typename T> bool operator >= (I<T> &x, I<T> &y) { return x.p >= y.p; }
+template <typename T> bool operator >= (const I<T> &x, const I<T> &y) { return x.p >= y.p; }
+template <typename T> typename I<T>::difference_type operator - (I<T> &x, I<T> &y) { return x.p - y.p; }
+template <typename T> typename I<T>::difference_type operator - (const I<T> &x, const I<T> &y) { return x.p - y.p; }
+template <typename T> I<T> operator + (typename I<T>::difference_type x, const I<T> &y) { return I<T> (x + y.p); }
+
+template <typename T>
+class J
+{
+public:
+ J(const I<T> &x, const I<T> &y) : b (x), e (y) {}
+ const I<T> &begin ();
+ const I<T> &end ();
+private:
+ I<T> b, e;
+};
+
+template <typename T> const I<T> &J<T>::begin () { return b; }
+template <typename T> const I<T> &J<T>::end () { return e; }
+
+int results[2000];
+
+template <typename T>
+void
+baz (I<T> &i)
+{
+ if (*i < 0 || *i >= 2000)
+ abort ();
+ results[*i]++;
+}
+
+void
+f1 (const I<int> &x, const I<int> &y)
+{
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop num_tasks(22)
+ for (I<int> i = x; i <= y; i += 6)
+ baz (i);
+}
+
+void
+f2 (const I<int> &x, const I<int> &y)
+{
+ I<int> i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop grainsize(384) private(i)
+ for (i = x; i < y - 1; i = 1 - 6 + 7 + i)
+ baz (i);
+}
+
+template <typename T>
+void
+f3 (const I<int> &x, const I<int> &y)
+{
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop default(none) firstprivate (x, y)
+ for (I<int> i = x; i <= y; i = i + 9 - 8)
+ baz (i);
+}
+
+template <typename T>
+void
+f4 (const I<int> &x, const I<int> &y)
+{
+ I<int> i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop lastprivate(i)
+ for (i = x + 2000 - 64; i > y + 10; --i)
+ baz (i);
+}
+
+void
+f5 (const I<int> &x, const I<int> &y)
+{
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop
+ for (I<int> i = x + 2000 - 64; i > y + 10; i -= 10)
+ baz (i);
+}
+
+template <int N>
+void
+f6 (const I<int> &x, const I<int> &y)
+{
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop
+ for (I<int> i = x + 2000 - 64; i > y + 10; i = i - 12 + 2)
+ {
+ I<int> j = i + N;
+ baz (j);
+ }
+}
+
+template <int N>
+void
+f7 (I<int> i, const I<int> &x, const I<int> &y)
+{
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop default(none) firstprivate (x, y)
+ for (i = x - 10; i <= y + 10; i += N)
+ baz (i);
+}
+
+template <int N>
+void
+f8 (J<int> j)
+{
+ I<int> i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop default(none) num_tasks(*I<int> (j.begin ())) firstprivate (j)
+ for (i = j.begin (); i <= j.end () + N; i += 2)
+ baz (i);
+}
+
+template <typename T, int N>
+void
+f9 (const I<T> &x, const I<T> &y)
+{
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop grainsize(163)
+ for (I<T> i = x; i <= y; i = i + N)
+ baz (i);
+}
+
+template <typename T, int N>
+void
+f10 (const I<T> &x, const I<T> &y)
+{
+ I<T> i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop
+ for (i = x; i > y; i = i + N)
+ baz (i);
+}
+
+template <typename T>
+void
+f11 (const T &x, const T &y)
+{
+#pragma omp parallel
+ {
+#pragma omp single nowait
+#pragma omp taskloop nogroup
+ for (T i = x; i <= y; i += 3)
+ baz (i);
+#pragma omp single nowait
+ {
+ T j = y + 3;
+ baz (j);
+ }
+ }
+}
+
+template <typename T>
+void
+f12 (const T &x, const T &y)
+{
+ T i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop
+ for (i = x; i > y; --i)
+ baz (i);
+}
+
+template <int N>
+struct K
+{
+ template <typename T>
+ static void
+ f13 (const T &x, const T &y)
+ {
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop
+ for (T i = x; i <= y + N; i += N)
+ baz (i);
+ }
+};
+
+I<int>
+f14 (const I<int> &x, const I<int> &y)
+{
+ I<int> i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop lastprivate(i)
+ for (i = x; i < y - 1; i = 1 - 6 + 7 + i)
+ baz (i);
+ return i;
+}
+
+template <typename T>
+I<int>
+f15 (const I<int> &x, const I<int> &y)
+{
+ I<int> i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop lastprivate(i)
+ for (i = x + 2000 - 64; i > y + 10; --i)
+ baz (i);
+ return i;
+}
+
+template <int N>
+I<int>
+f16 (I<int> i, const I<int> &x, const I<int> &y)
+{
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop lastprivate(i)
+ for (i = x - 10; i <= y + 10; i += N)
+ baz (i);
+ return i;
+}
+
+template <int N>
+I<int>
+f17 (J<int> j)
+{
+ static I<int> i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop lastprivate(i)
+ for (i = j.begin (); i <= j.end () + N; i += 2)
+ baz (i);
+ return i;
+}
+
+template <typename T, int N>
+I<T>
+f18 (const I<T> &x, const I<T> &y)
+{
+ static I<T> i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop lastprivate(i)
+ for (i = x; i > y; i = i + N)
+ baz (i);
+ return i;
+}
+
+template <typename T>
+T
+f19 (const T &x, const T &y)
+{
+ T i;
+#pragma omp parallel
+ {
+#pragma omp single nowait
+#pragma omp taskloop nogroup lastprivate(i)
+ for (i = x; i <= y; i += 3)
+ baz (i);
+#pragma omp single nowait
+ {
+ T j = y + 3;
+ baz (j);
+ }
+ }
+ return i;
+}
+
+template <typename T>
+T
+f20 (const T &x, const T &y)
+{
+ T i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop lastprivate(i)
+ for (i = x; i > y; --i)
+ baz (i);
+ return i;
+}
+
+#define check(expr) \
+ for (int i = 0; i < 2000; i++) \
+ if (expr) \
+ { \
+ if (results[i] != 1) \
+ abort (); \
+ results[i] = 0; \
+ } \
+ else if (results[i]) \
+ abort ()
+
+int
+main ()
+{
+ int a[2000];
+ long b[2000];
+ for (int i = 0; i < 2000; i++)
+ {
+ a[i] = i;
+ b[i] = i;
+ }
+ f1 (&a[10], &a[1990]);
+ check (i >= 10 && i <= 1990 && (i - 10) % 6 == 0);
+ f2 (&a[0], &a[1999]);
+ check (i < 1998 && (i & 1) == 0);
+ f3<char> (&a[20], &a[1837]);
+ check (i >= 20 && i <= 1837);
+ f4<int> (&a[0], &a[30]);
+ check (i > 40 && i <= 2000 - 64);
+ f5 (&a[0], &a[100]);
+ check (i >= 116 && i <= 2000 - 64 && (i - 116) % 10 == 0);
+ f6<-10> (&a[10], &a[110]);
+ check (i >= 116 && i <= 2000 - 64 && (i - 116) % 10 == 0);
+ f7<6> (I<int> (), &a[12], &a[1800]);
+ check (i >= 2 && i <= 1808 && (i - 2) % 6 == 0);
+ f8<121> (J<int> (&a[14], &a[1803]));
+ check (i >= 14 && i <= 1924 && (i & 1) == 0);
+ f9<int, 7> (&a[33], &a[1967]);
+ check (i >= 33 && i <= 1967 && (i - 33) % 7 == 0);
+ f10<int, -7> (&a[1939], &a[17]);
+ check (i >= 21 && i <= 1939 && (i - 21) % 7 == 0);
+ f11<I<int> > (&a[16], &a[1981]);
+ check (i >= 16 && i <= 1984 && (i - 16) % 3 == 0);
+ f12<I<int> > (&a[1761], &a[37]);
+ check (i > 37 && i <= 1761);
+ K<5>::f13<I<int> > (&a[1], &a[1935]);
+ check (i >= 1 && i <= 1936 && (i - 1) % 5 == 0);
+ if (f14 (&a[0], &a[1999]) != I<int>(&a[1998]))
+ abort ();
+ check (i < 1998 && (i & 1) == 0);
+ if (f15<int> (&a[0], &a[30]) != I<int>(&a[40]))
+ abort ();
+ check (i > 40 && i <= 2000 - 64);
+ if (f16<6> (I<int> (), &a[12], &a[1800]) != I<int>(&a[1814]))
+ abort ();
+ check (i >= 2 && i <= 1808 && (i - 2) % 6 == 0);
+ if (f17<121> (J<int> (&a[14], &a[1803])) != I<int>(&a[1926]))
+ abort ();
+ check (i >= 14 && i <= 1924 && (i & 1) == 0);
+ if (f18<int, -7> (&a[1939], &a[17]) != I<int>(&a[14]))
+ abort ();
+ check (i >= 21 && i <= 1939 && (i - 21) % 7 == 0);
+ if (f19<I<int> > (&a[16], &a[1981]) != I<int>(&a[1984]))
+ abort ();
+ check (i >= 16 && i <= 1984 && (i - 16) % 3 == 0);
+ if (f20<I<int> > (&a[1761], &a[37]) != I<int>(&a[37]))
+ abort ();
+ check (i > 37 && i <= 1761);
+ f9<long, 7> (&b[33], &b[1967]);
+ check (i >= 33 && i <= 1967 && (i - 33) % 7 == 0);
+ f10<long, -7> (&b[1939], &b[17]);
+ check (i >= 21 && i <= 1939 && (i - 21) % 7 == 0);
+ f11<I<long> > (&b[16], &b[1981]);
+ check (i >= 16 && i <= 1984 && (i - 16) % 3 == 0);
+ f12<I<long> > (&b[1761], &b[37]);
+ check (i > 37 && i <= 1761);
+ K<5>::f13<I<long> > (&b[1], &b[1935]);
+ check (i >= 1 && i <= 1936 && (i - 1) % 5 == 0);
+ if (f18<long, -7> (&b[1939], &b[17]) != I<long>(&b[14]))
+ abort ();
+ check (i >= 21 && i <= 1939 && (i - 21) % 7 == 0);
+ if (f19<I<long> > (&b[16], &b[1981]) != I<long>(&b[1984]))
+ abort ();
+ check (i >= 16 && i <= 1984 && (i - 16) % 3 == 0);
+ if (f20<I<long> > (&b[1761], &b[37]) != I<long>(&b[37]))
+ abort ();
+ check (i > 37 && i <= 1761);
+}
diff --git a/libgomp/testsuite/libgomp.c++/taskloop-7.C b/libgomp/testsuite/libgomp.c++/taskloop-7.C
new file mode 100644
index 00000000000..b9a3c81e381
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/taskloop-7.C
@@ -0,0 +1,400 @@
+// { dg-do run }
+
+#include <vector>
+#include <cstdlib>
+
+template <typename T>
+class J
+{
+public:
+ typedef typename std::vector<T>::const_iterator const_iterator;
+ J(const const_iterator &x, const const_iterator &y) : b (x), e (y) {}
+ const const_iterator &begin ();
+ const const_iterator &end ();
+private:
+ const_iterator b, e;
+};
+
+template <typename T>
+const typename std::vector<T>::const_iterator &J<T>::begin () { return b; }
+template <typename T>
+const typename std::vector<T>::const_iterator &J<T>::end () { return e; }
+
+int results[2000];
+
+template <typename T>
+void
+baz (T &i)
+{
+ if (*i < 0 || *i >= 2000)
+ std::abort ();
+ results[*i]++;
+}
+
+void
+f1 (const std::vector<int>::const_iterator &x,
+ const std::vector<int>::const_iterator &y)
+{
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop
+ for (std::vector<int>::const_iterator i = x; i <= y; i += 6)
+ baz (i);
+}
+
+void
+f2 (const std::vector<int>::const_iterator &x,
+ const std::vector<int>::const_iterator &y)
+{
+ std::vector<int>::const_iterator i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop private(i)
+ for (i = x; i < y - 1; i = 1 - 6 + 7 + i)
+ baz (i);
+}
+
+template <typename T>
+void
+f3 (const std::vector<int>::const_iterator &x,
+ const std::vector<int>::const_iterator &y)
+{
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop
+ for (std::vector<int>::const_iterator i = x; i <= y; i = i + 9 - 8)
+ baz (i);
+}
+
+template <typename T>
+void
+f4 (const std::vector<int>::const_iterator &x,
+ const std::vector<int>::const_iterator &y)
+{
+ std::vector<int>::const_iterator i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop lastprivate(i)
+ for (i = x + 2000 - 64; i > y + 10; --i)
+ baz (i);
+}
+
+void
+f5 (const std::vector<int>::const_iterator &x,
+ const std::vector<int>::const_iterator &y)
+{
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop
+ for (std::vector<int>::const_iterator i = x + 2000 - 64; i > y + 10; i -= 10)
+ baz (i);
+}
+
+template <int N>
+void
+f6 (const std::vector<int>::const_iterator &x,
+ const std::vector<int>::const_iterator &y)
+{
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop
+ for (std::vector<int>::const_iterator i = x + 2000 - 64;
+ i > y + 10; i = i - 12 + 2)
+ {
+ std::vector<int>::const_iterator j = i + N;
+ baz (j);
+ }
+}
+
+template <int N>
+void
+f7 (std::vector<int>::const_iterator i,
+ const std::vector<int>::const_iterator &x,
+ const std::vector<int>::const_iterator &y)
+{
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop
+ for (i = x - 10; i <= y + 10; i += N)
+ baz (i);
+}
+
+template <int N>
+void
+f8 (J<int> j)
+{
+ std::vector<int>::const_iterator i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop
+ for (i = j.begin (); i <= j.end () + N; i += 2)
+ baz (i);
+}
+
+template <typename T, int N>
+void
+f9 (const typename std::vector<T>::const_iterator &x,
+ const typename std::vector<T>::const_iterator &y)
+{
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop
+ for (typename std::vector<T>::const_iterator i = x; i <= y; i = i + N)
+ baz (i);
+}
+
+template <typename T, int N>
+void
+f10 (const typename std::vector<T>::const_iterator &x,
+ const typename std::vector<T>::const_iterator &y)
+{
+ typename std::vector<T>::const_iterator i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop
+ for (i = x; i > y; i = i + N)
+ baz (i);
+}
+
+template <typename T>
+void
+f11 (const T &x, const T &y)
+{
+#pragma omp parallel
+ {
+#pragma omp single nowait
+#pragma omp taskloop nogroup
+ for (T i = x; i <= y; i += 3)
+ baz (i);
+#pragma omp single nowait
+ {
+ T j = y + 3;
+ baz (j);
+ }
+ }
+}
+
+template <typename T>
+void
+f12 (const T &x, const T &y)
+{
+ T i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop
+ for (i = x; i > y; --i)
+ baz (i);
+}
+
+template <int N>
+struct K
+{
+ template <typename T>
+ static void
+ f13 (const T &x, const T &y)
+ {
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop
+ for (T i = x; i <= y + N; i += N)
+ baz (i);
+ }
+};
+
+std::vector<int>::const_iterator
+f14 (const std::vector<int>::const_iterator &x,
+ const std::vector<int>::const_iterator &y)
+{
+ std::vector<int>::const_iterator i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop lastprivate(i)
+ for (i = x; i < y - 1; i = 1 - 6 + 7 + i)
+ baz (i);
+ return i;
+}
+
+template <typename T>
+std::vector<int>::const_iterator
+f15 (const std::vector<int>::const_iterator &x,
+ const std::vector<int>::const_iterator &y)
+{
+ std::vector<int>::const_iterator i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop lastprivate(i)
+ for (i = x + 2000 - 64; i > y + 10; --i)
+ baz (i);
+ return i;
+}
+
+template <int N>
+std::vector<int>::const_iterator
+f16 (std::vector<int>::const_iterator i,
+ const std::vector<int>::const_iterator &x,
+ const std::vector<int>::const_iterator &y)
+{
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop lastprivate(i)
+ for (i = x - 10; i <= y + 10; i += N)
+ baz (i);
+ return i;
+}
+
+template <int N>
+std::vector<int>::const_iterator
+f17 (J<int> j)
+{
+ static std::vector<int>::const_iterator i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop lastprivate(i)
+ for (i = j.begin (); i <= j.end () + N; i += 2)
+ baz (i);
+ return i;
+}
+
+template <typename T, int N>
+typename std::vector<T>::const_iterator
+f18 (const typename std::vector<T>::const_iterator &x,
+ const typename std::vector<T>::const_iterator &y)
+{
+ static typename std::vector<T>::const_iterator i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop lastprivate(i)
+ for (i = x; i > y; i = i + N)
+ baz (i);
+ return i;
+}
+
+template <typename T>
+T
+f19 (const T &x, const T &y)
+{
+ T i;
+#pragma omp parallel
+ {
+#pragma omp single nowait
+#pragma omp taskloop nogroup lastprivate(i)
+ for (i = x; i <= y; i += 3)
+ baz (i);
+#pragma omp single nowait
+ {
+ T j = y + 3;
+ baz (j);
+ }
+ }
+ return i;
+}
+
+template <typename T>
+T
+f20 (const T &x, const T &y)
+{
+ T i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop lastprivate(i)
+ for (i = x; i > y; --i)
+ baz (i);
+ return i;
+}
+
+#define check(expr) \
+ for (int i = 0; i < 2000; i++) \
+ if (expr) \
+ { \
+ if (results[i] != 1) \
+ std::abort (); \
+ results[i] = 0; \
+ } \
+ else if (results[i]) \
+ std::abort ()
+
+int
+main ()
+{
+ std::vector<int> a(2000);
+ std::vector<long> b(2000);
+ for (int i = 0; i < 2000; i++)
+ {
+ a[i] = i;
+ b[i] = i;
+ }
+ f1 (a.begin () + 10, a.begin () + 1990);
+ check (i >= 10 && i <= 1990 && (i - 10) % 6 == 0);
+ f2 (a.begin () + 0, a.begin () + 1999);
+ check (i < 1998 && (i & 1) == 0);
+ f3<char> (a.begin () + 20, a.begin () + 1837);
+ check (i >= 20 && i <= 1837);
+ f4<int> (a.begin () + 0, a.begin () + 30);
+ check (i > 40 && i <= 2000 - 64);
+ f5 (a.begin () + 0, a.begin () + 100);
+ check (i >= 116 && i <= 2000 - 64 && (i - 116) % 10 == 0);
+ f6<-10> (a.begin () + 10, a.begin () + 110);
+ check (i >= 116 && i <= 2000 - 64 && (i - 116) % 10 == 0);
+ f7<6> (std::vector<int>::const_iterator (), a.begin () + 12,
+ a.begin () + 1800);
+ check (i >= 2 && i <= 1808 && (i - 2) % 6 == 0);
+ f8<121> (J<int> (a.begin () + 14, a.begin () + 1803));
+ check (i >= 14 && i <= 1924 && (i & 1) == 0);
+ f9<int, 7> (a.begin () + 33, a.begin () + 1967);
+ check (i >= 33 && i <= 1967 && (i - 33) % 7 == 0);
+ f10<int, -7> (a.begin () + 1939, a.begin () + 17);
+ check (i >= 21 && i <= 1939 && (i - 21) % 7 == 0);
+ f11<std::vector<int>::const_iterator > (a.begin () + 16, a.begin () + 1981);
+ check (i >= 16 && i <= 1984 && (i - 16) % 3 == 0);
+ f12<std::vector<int>::const_iterator > (a.begin () + 1761, a.begin () + 37);
+ check (i > 37 && i <= 1761);
+ K<5>::f13<std::vector<int>::const_iterator > (a.begin () + 1,
+ a.begin () + 1935);
+ check (i >= 1 && i <= 1936 && (i - 1) % 5 == 0);
+ if (f14 (a.begin () + 0, a.begin () + 1999) != a.begin () + 1998)
+ std::abort ();
+ check (i < 1998 && (i & 1) == 0);
+ if (f15<int> (a.begin () + 0, a.begin () + 30) != a.begin () + 40)
+ std::abort ();
+ check (i > 40 && i <= 2000 - 64);
+ if (f16<6> (std::vector<int>::const_iterator (), a.begin () + 12,
+ a.begin () + 1800) != a.begin () + 1814)
+ std::abort ();
+ check (i >= 2 && i <= 1808 && (i - 2) % 6 == 0);
+ if (f17<121> (J<int> (a.begin () + 14, a.begin () + 1803)) != a.begin () + 1926)
+ std::abort ();
+ check (i >= 14 && i <= 1924 && (i & 1) == 0);
+ if (f18<int, -7> (a.begin () + 1939, a.begin () + 17) != a.begin () + 14)
+ std::abort ();
+ check (i >= 21 && i <= 1939 && (i - 21) % 7 == 0);
+ if (f19<std::vector<int>::const_iterator > (a.begin () + 16, a.begin () + 1981)
+ != a.begin () + 1984)
+ std::abort ();
+ check (i >= 16 && i <= 1984 && (i - 16) % 3 == 0);
+ if (f20<std::vector<int>::const_iterator > (a.begin () + 1761, a.begin () + 37)
+ != a.begin () + 37)
+ std::abort ();
+ check (i > 37 && i <= 1761);
+ f9<long, 7> (b.begin () + 33, b.begin () + 1967);
+ check (i >= 33 && i <= 1967 && (i - 33) % 7 == 0);
+ f10<long, -7> (b.begin () + 1939, b.begin () + 17);
+ check (i >= 21 && i <= 1939 && (i - 21) % 7 == 0);
+ f11<std::vector<long>::const_iterator > (b.begin () + 16, b.begin () + 1981);
+ check (i >= 16 && i <= 1984 && (i - 16) % 3 == 0);
+ f12<std::vector<long>::const_iterator > (b.begin () + 1761, b.begin () + 37);
+ check (i > 37 && i <= 1761);
+ K<5>::f13<std::vector<long>::const_iterator > (b.begin () + 1,
+ b.begin () + 1935);
+ check (i >= 1 && i <= 1936 && (i - 1) % 5 == 0);
+ if (f18<long, -7> (b.begin () + 1939, b.begin () + 17) != b.begin () + 14)
+ std::abort ();
+ check (i >= 21 && i <= 1939 && (i - 21) % 7 == 0);
+ if (f19<std::vector<long>::const_iterator > (b.begin () + 16, b.begin () + 1981)
+ != b.begin () + 1984)
+ std::abort ();
+ check (i >= 16 && i <= 1984 && (i - 16) % 3 == 0);
+ if (f20<std::vector<long>::const_iterator > (b.begin () + 1761, b.begin () + 37)
+ != b.begin () + 37)
+ std::abort ();
+ check (i > 37 && i <= 1761);
+}
diff --git a/libgomp/testsuite/libgomp.c++/taskloop-8.C b/libgomp/testsuite/libgomp.c++/taskloop-8.C
new file mode 100644
index 00000000000..d164907d1d6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/taskloop-8.C
@@ -0,0 +1,250 @@
+// { dg-do run }
+
+#include <string>
+#include <cstdlib>
+
+template <typename T>
+class J
+{
+public:
+ typedef typename std::basic_string<T>::iterator iterator;
+ J(const iterator &x, const iterator &y) : b (x), e (y) {}
+ const iterator &begin ();
+ const iterator &end ();
+private:
+ iterator b, e;
+};
+
+template <typename T>
+const typename std::basic_string<T>::iterator &J<T>::begin () { return b; }
+template <typename T>
+const typename std::basic_string<T>::iterator &J<T>::end () { return e; }
+
+template <typename T>
+void
+baz (T &i)
+{
+ if (*i < L'a' || *i >= L'a' + 2000)
+ std::abort ();
+ (*i)++;
+}
+
+void
+f1 (const std::basic_string<wchar_t>::iterator &x,
+ const std::basic_string<wchar_t>::iterator &y)
+{
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop
+ for (std::basic_string<wchar_t>::iterator i = x; i <= y; i += 6)
+ baz (i);
+}
+
+void
+f2 (const std::basic_string<wchar_t>::iterator &x,
+ const std::basic_string<wchar_t>::iterator &y)
+{
+ std::basic_string<wchar_t>::iterator i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop private(i)
+ for (i = x; i < y - 1; i = 1 - 6 + 7 + i)
+ baz (i);
+}
+
+template <typename T>
+void
+f3 (const std::basic_string<wchar_t>::iterator &x,
+ const std::basic_string<wchar_t>::iterator &y)
+{
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop
+ for (std::basic_string<wchar_t>::iterator i = x; i <= y; i = i + 9 - 8)
+ baz (i);
+}
+
+template <typename T>
+void
+f4 (const std::basic_string<wchar_t>::iterator &x,
+ const std::basic_string<wchar_t>::iterator &y)
+{
+ std::basic_string<wchar_t>::iterator i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop lastprivate(i)
+ for (i = x + 2000 - 64; i > y + 10; --i)
+ baz (i);
+}
+
+void
+f5 (const std::basic_string<wchar_t>::iterator &x,
+ const std::basic_string<wchar_t>::iterator &y)
+{
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop
+ for (std::basic_string<wchar_t>::iterator i = x + 2000 - 64;
+ i > y + 10; i -= 10)
+ baz (i);
+}
+
+template <int N>
+void
+f6 (const std::basic_string<wchar_t>::iterator &x,
+ const std::basic_string<wchar_t>::iterator &y)
+{
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop
+ for (std::basic_string<wchar_t>::iterator i = x + 2000 - 64;
+ i > y + 10; i = i - 12 + 2)
+ {
+ std::basic_string<wchar_t>::iterator j = i + N;
+ baz (j);
+ }
+}
+
+template <int N>
+void
+f7 (std::basic_string<wchar_t>::iterator i,
+ const std::basic_string<wchar_t>::iterator &x,
+ const std::basic_string<wchar_t>::iterator &y)
+{
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop
+ for (i = x - 10; i <= y + 10; i += N)
+ baz (i);
+}
+
+template <wchar_t N>
+void
+f8 (J<wchar_t> j)
+{
+ std::basic_string<wchar_t>::iterator i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop
+ for (i = j.begin (); i <= j.end () + N; i += 2)
+ baz (i);
+}
+
+template <typename T, int N>
+void
+f9 (const typename std::basic_string<T>::iterator &x,
+ const typename std::basic_string<T>::iterator &y)
+{
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop
+ for (typename std::basic_string<T>::iterator i = x; i <= y; i = i + N)
+ baz (i);
+}
+
+template <typename T, int N>
+void
+f10 (const typename std::basic_string<T>::iterator &x,
+ const typename std::basic_string<T>::iterator &y)
+{
+ typename std::basic_string<T>::iterator i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop
+ for (i = x; i > y; i = i + N)
+ baz (i);
+}
+
+template <typename T>
+void
+f11 (const T &x, const T &y)
+{
+#pragma omp parallel
+ {
+#pragma omp single nowait
+#pragma omp taskloop nogroup
+ for (T i = x; i <= y; i += 3)
+ baz (i);
+#pragma omp single nowait
+ {
+ T j = y + 3;
+ baz (j);
+ }
+ }
+}
+
+template <typename T>
+void
+f12 (const T &x, const T &y)
+{
+ T i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop private(i)
+ for (i = x; i > y; --i)
+ baz (i);
+}
+
+template <int N>
+struct K
+{
+ template <typename T>
+ static void
+ f13 (const T &x, const T &y)
+ {
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop
+ for (T i = x; i <= y + N; i += N)
+ baz (i);
+ }
+};
+
+#define check(expr) \
+ for (int i = 0; i < 2000; i++) \
+ if (expr) \
+ { \
+ if (a[i] != L'a' + i + 1) \
+ std::abort (); \
+ a[i] = L'a' + i; \
+ } \
+ else if (a[i] != L'a' + i) \
+ std::abort ()
+
+int
+main ()
+{
+ std::basic_string<wchar_t> a = L"";
+ for (int i = 0; i < 2000; i++)
+ a += L'a' + i;
+ f1 (a.begin () + 10, a.begin () + 1990);
+ check (i >= 10 && i <= 1990 && (i - 10) % 6 == 0);
+ f2 (a.begin () + 0, a.begin () + 1999);
+ check (i < 1998 && (i & 1) == 0);
+ f3<char> (a.begin () + 20, a.begin () + 1837);
+ check (i >= 20 && i <= 1837);
+ f4<int> (a.begin () + 0, a.begin () + 30);
+ check (i > 40 && i <= 2000 - 64);
+ f5 (a.begin () + 0, a.begin () + 100);
+ check (i >= 116 && i <= 2000 - 64 && (i - 116) % 10 == 0);
+ f6<-10> (a.begin () + 10, a.begin () + 110);
+ check (i >= 116 && i <= 2000 - 64 && (i - 116) % 10 == 0);
+ f7<6> (std::basic_string<wchar_t>::iterator (), a.begin () + 12,
+ a.begin () + 1800);
+ check (i >= 2 && i <= 1808 && (i - 2) % 6 == 0);
+ f8<121> (J<wchar_t> (a.begin () + 14, a.begin () + 1803));
+ check (i >= 14 && i <= 1924 && (i & 1) == 0);
+ f9<wchar_t, 7> (a.begin () + 33, a.begin () + 1967);
+ check (i >= 33 && i <= 1967 && (i - 33) % 7 == 0);
+ f10<wchar_t, -7> (a.begin () + 1939, a.begin () + 17);
+ check (i >= 21 && i <= 1939 && (i - 21) % 7 == 0);
+ f11<std::basic_string<wchar_t>::iterator > (a.begin () + 16,
+ a.begin () + 1981);
+ check (i >= 16 && i <= 1984 && (i - 16) % 3 == 0);
+ f12<std::basic_string<wchar_t>::iterator > (a.begin () + 1761,
+ a.begin () + 37);
+ check (i > 37 && i <= 1761);
+ K<5>::f13<std::basic_string<wchar_t>::iterator > (a.begin () + 1,
+ a.begin () + 1935);
+ check (i >= 1 && i <= 1936 && (i - 1) % 5 == 0);
+}
diff --git a/libgomp/testsuite/libgomp.c++/taskloop-9.C b/libgomp/testsuite/libgomp.c++/taskloop-9.C
new file mode 100644
index 00000000000..65abc31ff8d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/taskloop-9.C
@@ -0,0 +1,323 @@
+// { dg-do run }
+
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+extern "C" void abort ();
+
+template <typename T>
+class I
+{
+public:
+ typedef ptrdiff_t difference_type;
+ I ();
+ ~I ();
+ I (T *);
+ I (const I &);
+ T &operator * ();
+ T *operator -> ();
+ T &operator [] (const difference_type &) const;
+ I &operator = (const I &);
+ I &operator ++ ();
+ I operator ++ (int);
+ I &operator -- ();
+ I operator -- (int);
+ I &operator += (const difference_type &);
+ I &operator -= (const difference_type &);
+ I operator + (const difference_type &) const;
+ I operator - (const difference_type &) const;
+ template <typename S> friend bool operator == (I<S> &, I<S> &);
+ template <typename S> friend bool operator == (const I<S> &, const I<S> &);
+ template <typename S> friend bool operator < (I<S> &, I<S> &);
+ template <typename S> friend bool operator < (const I<S> &, const I<S> &);
+ template <typename S> friend bool operator <= (I<S> &, I<S> &);
+ template <typename S> friend bool operator <= (const I<S> &, const I<S> &);
+ template <typename S> friend bool operator > (I<S> &, I<S> &);
+ template <typename S> friend bool operator > (const I<S> &, const I<S> &);
+ template <typename S> friend bool operator >= (I<S> &, I<S> &);
+ template <typename S> friend bool operator >= (const I<S> &, const I<S> &);
+ template <typename S> friend typename I<S>::difference_type operator - (I<S> &, I<S> &);
+ template <typename S> friend typename I<S>::difference_type operator - (const I<S> &, const I<S> &);
+ template <typename S> friend I<S> operator + (typename I<S>::difference_type , const I<S> &);
+private:
+ T *p;
+};
+template <typename T> I<T>::I () : p (0) {}
+template <typename T> I<T>::~I () { p = (T *) 0; }
+template <typename T> I<T>::I (T *x) : p (x) {}
+template <typename T> I<T>::I (const I &x) : p (x.p) {}
+template <typename T> T &I<T>::operator * () { return *p; }
+template <typename T> T *I<T>::operator -> () { return p; }
+template <typename T> T &I<T>::operator [] (const difference_type &x) const { return p[x]; }
+template <typename T> I<T> &I<T>::operator = (const I &x) { p = x.p; return *this; }
+template <typename T> I<T> &I<T>::operator ++ () { ++p; return *this; }
+template <typename T> I<T> I<T>::operator ++ (int) { return I (p++); }
+template <typename T> I<T> &I<T>::operator -- () { --p; return *this; }
+template <typename T> I<T> I<T>::operator -- (int) { return I (p--); }
+template <typename T> I<T> &I<T>::operator += (const difference_type &x) { p += x; return *this; }
+template <typename T> I<T> &I<T>::operator -= (const difference_type &x) { p -= x; return *this; }
+template <typename T> I<T> I<T>::operator + (const difference_type &x) const { return I (p + x); }
+template <typename T> I<T> I<T>::operator - (const difference_type &x) const { return I (p - x); }
+template <typename T> bool operator == (I<T> &x, I<T> &y) { return x.p == y.p; }
+template <typename T> bool operator == (const I<T> &x, const I<T> &y) { return x.p == y.p; }
+template <typename T> bool operator != (I<T> &x, I<T> &y) { return !(x == y); }
+template <typename T> bool operator != (const I<T> &x, const I<T> &y) { return !(x == y); }
+template <typename T> bool operator < (I<T> &x, I<T> &y) { return x.p < y.p; }
+template <typename T> bool operator < (const I<T> &x, const I<T> &y) { return x.p < y.p; }
+template <typename T> bool operator <= (I<T> &x, I<T> &y) { return x.p <= y.p; }
+template <typename T> bool operator <= (const I<T> &x, const I<T> &y) { return x.p <= y.p; }
+template <typename T> bool operator > (I<T> &x, I<T> &y) { return x.p > y.p; }
+template <typename T> bool operator > (const I<T> &x, const I<T> &y) { return x.p > y.p; }
+template <typename T> bool operator >= (I<T> &x, I<T> &y) { return x.p >= y.p; }
+template <typename T> bool operator >= (const I<T> &x, const I<T> &y) { return x.p >= y.p; }
+template <typename T> typename I<T>::difference_type operator - (I<T> &x, I<T> &y) { return x.p - y.p; }
+template <typename T> typename I<T>::difference_type operator - (const I<T> &x, const I<T> &y) { return x.p - y.p; }
+template <typename T> I<T> operator + (typename I<T>::difference_type x, const I<T> &y) { return I<T> (x + y.p); }
+
+template <typename T>
+class J
+{
+public:
+ J(const I<T> &x, const I<T> &y) : b (x), e (y) {}
+ const I<T> &begin ();
+ const I<T> &end ();
+private:
+ I<T> b, e;
+};
+
+template <typename T> const I<T> &J<T>::begin () { return b; }
+template <typename T> const I<T> &J<T>::end () { return e; }
+
+int results[2000];
+
+template <typename T>
+void
+baz (I<T> &i)
+{
+ if (*i < 0 || *i >= 2000)
+ abort ();
+ results[*i]++;
+}
+
+I<int>
+f1 (const I<int> &x, const I<int> &y)
+{
+ I<int> i;
+#pragma omp parallel shared (i)
+ {
+ #pragma omp single
+ #pragma omp taskloop lastprivate (i)
+ for (i = x; i < y - 1; ++i)
+ baz (i);
+ #pragma omp single
+ i += 3;
+ }
+ return I<int> (i);
+}
+
+I<int>
+f2 (const I<int> &x, const I<int> &y)
+{
+ I<int> i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop lastprivate (i)
+ for (i = x; i < y - 1; i = 1 - 6 + 7 + i)
+ baz (i);
+ return I<int> (i);
+}
+
+template <typename T>
+I<int>
+f3 (const I<int> &x, const I<int> &y)
+{
+ I<int> i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop lastprivate (i)
+ for (i = x + 1000 - 64; i <= y - 10; i++)
+ baz (i);
+ return i;
+}
+
+template <typename T>
+I<int>
+f4 (const I<int> &x, const I<int> &y)
+{
+ I<int> i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop lastprivate (i)
+ for (i = x + 2000 - 64; i > y + 10; --i)
+ baz (i);
+ return I<int> (i);
+}
+
+template <typename T>
+I<int>
+f5 (const I<int> &x, const I<int> &y)
+{
+ I<int> i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop lastprivate (i)
+ for (i = x; i > y + T (6); i--)
+ baz (i);
+ return i;
+}
+
+template <typename T>
+I<int>
+f6 (const I<int> &x, const I<int> &y)
+{
+ I<int> i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop lastprivate (i)
+ for (i = x - T (7); i > y; i -= T (2))
+ baz (i);
+ return I<int> (i);
+}
+
+template <int N>
+I<int>
+f7 (I<int> i, const I<int> &x, const I<int> &y)
+{
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop lastprivate (i)
+ for (i = x - 10; i <= y + 10; i += N)
+ baz (i);
+ return I<int> (i);
+}
+
+template <int N>
+I<int>
+f8 (J<int> j)
+{
+ I<int> i;
+#pragma omp parallel shared (i)
+ #pragma omp single
+ #pragma omp taskloop lastprivate (i)
+ for (i = j.begin (); i <= j.end () + N; i += 2)
+ baz (i);
+ return i;
+}
+
+I<int> i9;
+
+template <long N>
+I<int> &
+f9 (J<int> j)
+{
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop lastprivate (i9)
+ for (i9 = j.begin () + N; i9 <= j.end () - N; i9 = i9 - N)
+ baz (i9);
+ return i9;
+}
+
+template <typename T, int N>
+I<T>
+f10 (const I<T> &x, const I<T> &y)
+{
+ I<T> i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop lastprivate (i)
+ for (i = x; i > y; i = i + N)
+ baz (i);
+ return i;
+}
+
+template <typename T, typename U>
+T
+f11 (T i, const T &x, const T &y)
+{
+#pragma omp parallel
+ #pragma omp single
+ #pragma omp taskloop lastprivate (i)
+ for (i = x + U (2); i <= y + U (1); i = U (2) + U (3) + i)
+ baz (i);
+ return T (i);
+}
+
+template <typename T>
+T
+f12 (const T &x, const T &y)
+{
+ T i;
+#pragma omp parallel
+#pragma omp single
+#pragma omp taskloop lastprivate (i)
+ for (i = x; i > y; --i)
+ baz (i);
+ return i;
+}
+
+#define check(expr) \
+ for (int i = 0; i < 2000; i++) \
+ if (expr) \
+ { \
+ if (results[i] != 1) \
+ abort (); \
+ results[i] = 0; \
+ } \
+ else if (results[i]) \
+ abort ()
+
+int
+main ()
+{
+ int a[2000];
+ long b[2000];
+ for (int i = 0; i < 2000; i++)
+ {
+ a[i] = i;
+ b[i] = i;
+ }
+ if (*f1 (&a[10], &a[1873]) != 1875)
+ abort ();
+ check (i >= 10 && i < 1872);
+ if (*f2 (&a[0], &a[1998]) != 1998)
+ abort ();
+ check (i < 1997 && (i & 1) == 0);
+ if (*f3<int> (&a[10], &a[1971]) != 1962)
+ abort ();
+ check (i >= 946 && i <= 1961);
+ if (*f4<int> (&a[0], &a[30]) != 40)
+ abort ();
+ check (i > 40 && i <= 2000 - 64);
+ if (*f5<short> (&a[1931], &a[17]) != 23)
+ abort ();
+ check (i > 23 && i <= 1931);
+ if (*f6<long> (&a[1931], &a[17]) != 16)
+ abort ();
+ check (i > 17 && i <= 1924 && (i & 1) == 0);
+ if (*f7<6> (I<int> (), &a[12], &a[1800]) != 1814)
+ abort ();
+ check (i >= 2 && i <= 1808 && (i - 2) % 6 == 0);
+ if (*f8<121> (J<int> (&a[14], &a[1803])) != 1926)
+ abort ();
+ check (i >= 14 && i <= 1924 && (i & 1) == 0);
+ if (*f9<-3L> (J<int> (&a[27], &a[1761])) != 1767)
+ abort ();
+ check (i >= 24 && i <= 1764 && (i % 3) == 0);
+ if (*f10<int, -7> (&a[1939], &a[17]) != 14)
+ abort ();
+ check (i >= 21 && i <= 1939 && i % 7 == 0);
+ if (*f11<I<int>, short> (I<int> (), &a[71], &a[1941]) != 1943)
+ abort ();
+ check (i >= 73 && i <= 1938 && (i - 73) % 5 == 0);
+ if (*f12<I<int> > (&a[1761], &a[37]) != 37)
+ abort ();
+ check (i > 37 && i <= 1761);
+ if (*f10<long, -7> (&b[1939], &b[17]) != 14)
+ abort ();
+ check (i >= 21 && i <= 1939 && i % 7 == 0);
+ if (*f11<I<long>, short> (I<long> (), &b[71], &b[1941]) != 1943)
+ abort ();
+ check (i >= 73 && i <= 1938 && (i - 73) % 5 == 0);
+ if (*f12<I<long> > (&b[1761], &b[37]) != 37)
+ abort ();
+ check (i > 37 && i <= 1761);
+}
diff --git a/libgomp/testsuite/libgomp.c/affinity-2.c b/libgomp/testsuite/libgomp.c/affinity-2.c
new file mode 100644
index 00000000000..f8216574704
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/affinity-2.c
@@ -0,0 +1,89 @@
+/* { dg-do run } */
+/* { dg-set-target-env-var OMP_PROC_BIND "spread,close" } */
+/* { dg-set-target-env-var OMP_PLACES "{6,7}:4:-2,!{2,3}" } */
+/* { dg-set-target-env-var OMP_NUM_THREADS "2" } */
+
+#include <omp.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+int *
+get_buf (int nump)
+{
+ static int *buf;
+ static size_t buf_size;
+ if ((size_t) nump > buf_size)
+ {
+ buf_size *= 2;
+ if (nump > buf_size)
+ buf_size = nump + 64;
+ int *bufn = realloc (buf, buf_size * sizeof (int));
+ if (bufn == NULL)
+ {
+ fprintf (stderr, "memory allocation error\n");
+ exit (1);
+ }
+ buf = bufn;
+ }
+ return buf;
+}
+
+void
+print_place (int count, int *ids)
+{
+ int i, j;
+ printf ("{");
+ for (i = 0; i < count; i++)
+ {
+ for (j = i + 1; j < count; j++)
+ if (ids[j] != ids[i] + (j - i))
+ break;
+ if (i)
+ printf (",");
+ if (j == i + 1)
+ printf ("%d", ids[i]);
+ else
+ {
+ printf ("%d:%d", ids[i], j - i);
+ i = j - 1;
+ }
+ }
+ printf ("}\n");
+}
+
+void
+print_place_var (void)
+{
+ int place = omp_get_place_num ();
+ int num_places = omp_get_partition_num_places ();
+ int *ids = get_buf (num_places);
+ omp_get_partition_place_nums (ids);
+ printf ("place %d\n", place);
+ if (num_places)
+ printf ("partition %d-%d\n", ids[0], ids[num_places - 1]);
+}
+
+int
+main ()
+{
+ int i, num = omp_get_num_places (), nump, *ids;
+ printf ("omp_get_num_places () == %d\n", num);
+ for (i = 0; i < num; i++)
+ {
+ printf ("place %d ", i);
+ nump = omp_get_place_num_procs (i);
+ ids = get_buf (nump);
+ omp_get_place_proc_ids (i, ids);
+ print_place (nump, ids);
+ }
+ print_place_var ();
+ omp_set_nested (1);
+ #pragma omp parallel
+ if (omp_get_thread_num () == omp_get_num_threads () - 1)
+ {
+ #pragma omp parallel
+ if (omp_get_thread_num () == omp_get_num_threads () - 1)
+ print_place_var ();
+ }
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/doacross-1.c b/libgomp/testsuite/libgomp.c/doacross-1.c
new file mode 100644
index 00000000000..0794c80ec2e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/doacross-1.c
@@ -0,0 +1,181 @@
+extern void abort (void);
+
+#define N 256
+int a[N], b[N / 16][8][4], c[N / 32][8][8];
+volatile int d, e;
+
+int
+main ()
+{
+ int i, j, k, l, m;
+ #pragma omp parallel private (l)
+ {
+ #pragma omp for schedule(static, 1) ordered (1) nowait
+ for (i = 0; i < N; i++)
+ {
+ #pragma omp atomic write
+ a[i] = 1;
+ #pragma omp ordered depend(sink: i - 1)
+ if (i)
+ {
+ #pragma omp atomic read
+ l = a[i - 1];
+ if (l < 2)
+ abort ();
+ }
+ #pragma omp atomic write
+ a[i] = 2;
+ if (i < N - 1)
+ {
+ #pragma omp atomic read
+ l = a[i + 1];
+ if (l == 3)
+ abort ();
+ }
+ #pragma omp ordered depend(source)
+ #pragma omp atomic write
+ a[i] = 3;
+ }
+ #pragma omp for schedule(static, 0) ordered (3) nowait
+ for (i = 2; i < N / 16 - 1; i++)
+ for (j = 0; j < 8; j += 2)
+ for (k = 1; k <= 3; k++)
+ {
+ #pragma omp atomic write
+ b[i][j][k] = 1;
+ #pragma omp ordered depend(sink: i, j - 2, k - 1) \
+ depend(sink: i - 2, j - 2, k + 1)
+ #pragma omp ordered depend(sink: i - 3, j + 2, k - 2)
+ if (j >= 2 && k > 1)
+ {
+ #pragma omp atomic read
+ l = b[i][j - 2][k - 1];
+ if (l < 2)
+ abort ();
+ }
+ #pragma omp atomic write
+ b[i][j][k] = 2;
+ if (i >= 4 && j >= 2 && k < 3)
+ {
+ #pragma omp atomic read
+ l = b[i - 2][j - 2][k + 1];
+ if (l < 2)
+ abort ();
+ }
+ if (i >= 5 && j < N / 16 - 3 && k == 3)
+ {
+ #pragma omp atomic read
+ l = b[i - 3][j + 2][k - 2];
+ if (l < 2)
+ abort ();
+ }
+ #pragma omp ordered depend(source)
+ #pragma omp atomic write
+ b[i][j][k] = 3;
+ }
+#define A(n) int n;
+#define B(n) A(n##0) A(n##1) A(n##2) A(n##3)
+#define C(n) B(n##0) B(n##1) B(n##2) B(n##3)
+#define D(n) C(n##0) C(n##1) C(n##2) C(n##3)
+ D(m)
+#undef A
+ #pragma omp for collapse (2) ordered(61) schedule(dynamic, 15)
+ for (i = 0; i < N / 32; i++)
+ for (j = 7; j > 1; j--)
+ for (k = 6; k >= 0; k -= 2)
+#define A(n) for (n = 4; n < 5; n++)
+ D(m)
+#undef A
+ {
+ #pragma omp atomic write
+ c[i][j][k] = 1;
+#define A(n) ,n
+#define E(n) C(n##0) C(n##1) C(n##2) B(n##30) B(n##31) A(n##320) A(n##321)
+ #pragma omp ordered depend (sink: i, j, k + 2 E(m)) \
+ depend (sink:i - 2, j + 1, k - 4 E(m)) \
+ depend(sink: i - 1, j - 2, k - 2 E(m))
+ if (k <= 4)
+ {
+ l = c[i][j][k + 2];
+ if (l < 2)
+ abort ();
+ }
+ #pragma omp atomic write
+ c[i][j][k] = 2;
+ if (i >= 2 && j < 7 && k >= 4)
+ {
+ l = c[i - 2][j + 1][k - 4];
+ if (l < 2)
+ abort ();
+ }
+ if (i >= 1 && j >= 4 && k >= 2)
+ {
+ l = c[i - 1][j - 2][k - 2];
+ if (l < 2)
+ abort ();
+ }
+ #pragma omp ordered depend (source)
+ #pragma omp atomic write
+ c[i][j][k] = 3;
+ }
+
+ #pragma omp for collapse(2) ordered(4) lastprivate (i, j, k)
+ for (i = 0; i < d + 1; i++)
+ for (j = d + 1; j >= 0; j--)
+ for (k = 0; k < d; k++)
+ for (l = 0; l < d + 2; l++)
+ {
+ #pragma omp ordered depend (source)
+ #pragma omp ordered depend (sink:i - 2, j + 2, k - 2, l)
+ if (!e)
+ abort ();
+ }
+ #pragma omp single
+ {
+ if (i != 1 || j != -1 || k != 0)
+ abort ();
+ i = 8; j = 9; k = 10;
+ }
+ #pragma omp for collapse(2) ordered(4) lastprivate (i, j, k, m)
+ for (i = 0; i < d + 1; i++)
+ for (j = d + 1; j >= 0; j--)
+ for (k = 0; k < d + 2; k++)
+ for (m = 0; m < d; m++)
+ {
+ #pragma omp ordered depend (source)
+ #pragma omp ordered depend (sink:i - 2, j + 2, k - 2, m)
+ abort ();
+ }
+ #pragma omp single
+ if (i != 1 || j != -1 || k != 2 || m != 0)
+ abort ();
+ #pragma omp for collapse(2) ordered(4) nowait
+ for (i = 0; i < d + 1; i++)
+ for (j = d; j > 0; j--)
+ for (k = 0; k < d + 2; k++)
+ for (l = 0; l < d + 4; l++)
+ {
+ #pragma omp ordered depend (source)
+ #pragma omp ordered depend (sink:i - 2, j + 2, k - 2, l)
+ if (!e)
+ abort ();
+ }
+ #pragma omp for nowait
+ for (i = 0; i < N; i++)
+ if (a[i] != 3)
+ abort ();
+ #pragma omp for collapse(2) private(k) nowait
+ for (i = 0; i < N / 16; i++)
+ for (j = 0; j < 8; j++)
+ for (k = 0; k < 4; k++)
+ if (b[i][j][k] != 3 * (i >= 2 && i < N / 16 - 1 && (j & 1) == 0 && k >= 1))
+ abort ();
+ #pragma omp for collapse(3) nowait
+ for (i = 0; i < N / 32; i++)
+ for (j = 0; j < 8; j++)
+ for (k = 0; k < 8; k++)
+ if (c[i][j][k] != 3 * (j >= 2 && (k & 1) == 0))
+ abort ();
+ }
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/doacross-2.c b/libgomp/testsuite/libgomp.c/doacross-2.c
new file mode 100644
index 00000000000..e491bb22965
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/doacross-2.c
@@ -0,0 +1,225 @@
+extern void abort (void);
+
+#define N 256
+int a[N], b[N / 16][8][4], c[N / 32][8][8], g[N / 16][8][6];
+volatile int d, e;
+volatile unsigned long long f;
+
+int
+main ()
+{
+ unsigned long long i;
+ int j, k, l, m;
+ #pragma omp parallel private (l)
+ {
+ #pragma omp for schedule(static, 1) ordered (1) nowait
+ for (i = 1; i < N + f; i++)
+ {
+ #pragma omp atomic write
+ a[i] = 1;
+ #pragma omp ordered depend(sink: i - 1)
+ if (i > 1)
+ {
+ #pragma omp atomic read
+ l = a[i - 1];
+ if (l < 2)
+ abort ();
+ }
+ #pragma omp atomic write
+ a[i] = 2;
+ if (i < N - 1)
+ {
+ #pragma omp atomic read
+ l = a[i + 1];
+ if (l == 3)
+ abort ();
+ }
+ #pragma omp ordered depend(source)
+ #pragma omp atomic write
+ a[i] = 3;
+ }
+ #pragma omp for schedule(static, 0) ordered (3) nowait
+ for (i = 3; i < N / 16 - 1 + f; i++)
+ for (j = 0; j < 8; j += 2)
+ for (k = 1; k <= 3; k++)
+ {
+ #pragma omp atomic write
+ b[i][j][k] = 1;
+ #pragma omp ordered depend(sink: i, j - 2, k - 1) \
+ depend(sink: i - 2, j - 2, k + 1)
+ #pragma omp ordered depend(sink: i - 3, j + 2, k - 2)
+ if (j >= 2 && k > 1)
+ {
+ #pragma omp atomic read
+ l = b[i][j - 2][k - 1];
+ if (l < 2)
+ abort ();
+ }
+ #pragma omp atomic write
+ b[i][j][k] = 2;
+ if (i >= 5 && j >= 2 && k < 3)
+ {
+ #pragma omp atomic read
+ l = b[i - 2][j - 2][k + 1];
+ if (l < 2)
+ abort ();
+ }
+ if (i >= 6 && j < N / 16 - 3 && k == 3)
+ {
+ #pragma omp atomic read
+ l = b[i - 3][j + 2][k - 2];
+ if (l < 2)
+ abort ();
+ }
+ #pragma omp ordered depend(source)
+ #pragma omp atomic write
+ b[i][j][k] = 3;
+ }
+#define A(n) int n;
+#define B(n) A(n##0) A(n##1) A(n##2) A(n##3)
+#define C(n) B(n##0) B(n##1) B(n##2) B(n##3)
+#define D(n) C(n##0) C(n##1) C(n##2) C(n##3)
+ D(m)
+#undef A
+ #pragma omp for collapse (2) ordered(61) schedule(dynamic, 15)
+ for (i = 2; i < N / 32 + f; i++)
+ for (j = 7; j > 1; j--)
+ for (k = 6; k >= 0; k -= 2)
+#define A(n) for (n = 4; n < 5; n++)
+ D(m)
+#undef A
+ {
+ #pragma omp atomic write
+ c[i][j][k] = 1;
+#define A(n) ,n
+#define E(n) C(n##0) C(n##1) C(n##2) B(n##30) B(n##31) A(n##320) A(n##321)
+ #pragma omp ordered depend (sink: i, j, k + 2 E(m)) \
+ depend (sink:i - 2, j + 1, k - 4 E(m)) \
+ depend(sink: i - 1, j - 2, k - 2 E(m))
+ if (k <= 4)
+ {
+ l = c[i][j][k + 2];
+ if (l < 2)
+ abort ();
+ }
+ #pragma omp atomic write
+ c[i][j][k] = 2;
+ if (i >= 4 && j < 7 && k >= 4)
+ {
+ l = c[i - 2][j + 1][k - 4];
+ if (l < 2)
+ abort ();
+ }
+ if (i >= 3 && j >= 4 && k >= 2)
+ {
+ l = c[i - 1][j - 2][k - 2];
+ if (l < 2)
+ abort ();
+ }
+ #pragma omp ordered depend (source)
+ #pragma omp atomic write
+ c[i][j][k] = 3;
+ }
+ #pragma omp for schedule(static, 0) ordered (3) nowait
+ for (j = 0; j < N / 16 - 1; j++)
+ for (k = 0; k < 8; k += 2)
+ for (i = 3; i <= 5 + f; i++)
+ {
+ #pragma omp atomic write
+ g[j][k][i] = 1;
+ #pragma omp ordered depend(sink: j, k - 2, i - 1) \
+ depend(sink: j - 2, k - 2, i + 1)
+ #pragma omp ordered depend(sink: j - 3, k + 2, i - 2)
+ if (k >= 2 && i > 3)
+ {
+ #pragma omp atomic read
+ l = g[j][k - 2][i - 1];
+ if (l < 2)
+ abort ();
+ }
+ #pragma omp atomic write
+ g[j][k][i] = 2;
+ if (j >= 2 && k >= 2 && i < 5)
+ {
+ #pragma omp atomic read
+ l = g[j - 2][k - 2][i + 1];
+ if (l < 2)
+ abort ();
+ }
+ if (j >= 3 && k < N / 16 - 3 && i == 5)
+ {
+ #pragma omp atomic read
+ l = g[j - 3][k + 2][i - 2];
+ if (l < 2)
+ abort ();
+ }
+ #pragma omp ordered depend(source)
+ #pragma omp atomic write
+ g[j][k][i] = 3;
+ }
+ #pragma omp for collapse(2) ordered(4) lastprivate (i, j, k)
+ for (i = 2; i < f + 3; i++)
+ for (j = d + 1; j >= 0; j--)
+ for (k = 0; k < d; k++)
+ for (l = 0; l < d + 2; l++)
+ {
+ #pragma omp ordered depend (source)
+ #pragma omp ordered depend (sink:i - 2, j + 2, k - 2, l)
+ if (!e)
+ abort ();
+ }
+ #pragma omp single
+ {
+ if (i != 3 || j != -1 || k != 0)
+ abort ();
+ i = 8; j = 9; k = 10;
+ }
+ #pragma omp for collapse(2) ordered(4) lastprivate (i, j, k, m)
+ for (i = 2; i < f + 3; i++)
+ for (j = d + 1; j >= 0; j--)
+ for (k = 0; k < d + 2; k++)
+ for (m = 0; m < d; m++)
+ {
+ #pragma omp ordered depend (source)
+ #pragma omp ordered depend (sink:i - 2, j + 2, k - 2, m)
+ abort ();
+ }
+ #pragma omp single
+ if (i != 3 || j != -1 || k != 2 || m != 0)
+ abort ();
+ #pragma omp for collapse(2) ordered(4) nowait
+ for (i = 2; i < f + 3; i++)
+ for (j = d; j > 0; j--)
+ for (k = 0; k < d + 2; k++)
+ for (l = 0; l < d + 4; l++)
+ {
+ #pragma omp ordered depend (source)
+ #pragma omp ordered depend (sink:i - 2, j + 2, k - 2, l)
+ if (!e)
+ abort ();
+ }
+ #pragma omp for nowait
+ for (i = 0; i < N; i++)
+ if (a[i] != 3 * (i >= 1))
+ abort ();
+ #pragma omp for collapse(2) private(k) nowait
+ for (i = 0; i < N / 16; i++)
+ for (j = 0; j < 8; j++)
+ for (k = 0; k < 4; k++)
+ if (b[i][j][k] != 3 * (i >= 3 && i < N / 16 - 1 && (j & 1) == 0 && k >= 1))
+ abort ();
+ #pragma omp for collapse(3) nowait
+ for (i = 0; i < N / 32; i++)
+ for (j = 0; j < 8; j++)
+ for (k = 0; k < 8; k++)
+ if (c[i][j][k] != 3 * (i >= 2 && j >= 2 && (k & 1) == 0))
+ abort ();
+ #pragma omp for collapse(2) private(k) nowait
+ for (i = 0; i < N / 16; i++)
+ for (j = 0; j < 8; j++)
+ for (k = 0; k < 6; k++)
+ if (g[i][j][k] != 3 * (i < N / 16 - 1 && (j & 1) == 0 && k >= 3))
+ abort ();
+ }
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/examples-4/declare_target-1.c b/libgomp/testsuite/libgomp.c/examples-4/declare_target-1.c
index beca8555780..6d4bc4fac12 100644
--- a/libgomp/testsuite/libgomp.c/examples-4/declare_target-1.c
+++ b/libgomp/testsuite/libgomp.c/examples-4/declare_target-1.c
@@ -20,7 +20,7 @@ int fib_wrapper (int n)
{
int x = 0;
- #pragma omp target if(n > THRESHOLD)
+ #pragma omp target if(n > THRESHOLD) map(from:x)
x = fib (n);
return x;
diff --git a/libgomp/testsuite/libgomp.c/examples-4/declare_target-4.c b/libgomp/testsuite/libgomp.c/examples-4/declare_target-4.c
index db70460b309..f2414366951 100644
--- a/libgomp/testsuite/libgomp.c/examples-4/declare_target-4.c
+++ b/libgomp/testsuite/libgomp.c/examples-4/declare_target-4.c
@@ -41,7 +41,7 @@ float accum (int k)
int i;
float tmp = 0.0;
- #pragma omp target
+ #pragma omp target map(tofrom:tmp)
#pragma omp parallel for reduction(+:tmp)
for (i = 0; i < N; i++)
tmp += Pfun (i, k);
diff --git a/libgomp/testsuite/libgomp.c/examples-4/declare_target-5.c b/libgomp/testsuite/libgomp.c/examples-4/declare_target-5.c
index b550f1ff540..33d6137afd5 100644
--- a/libgomp/testsuite/libgomp.c/examples-4/declare_target-5.c
+++ b/libgomp/testsuite/libgomp.c/examples-4/declare_target-5.c
@@ -48,7 +48,7 @@ float accum ()
int i, k;
float tmp = 0.0;
- #pragma omp target
+ #pragma omp target map(tofrom:tmp)
#pragma omp parallel for reduction(+:tmp)
for (i = 0; i < N; i++)
{
diff --git a/libgomp/testsuite/libgomp.c/examples-4/device-1.c b/libgomp/testsuite/libgomp.c/examples-4/device-1.c
index f7c84fb4c14..dad8572f8f0 100644
--- a/libgomp/testsuite/libgomp.c/examples-4/device-1.c
+++ b/libgomp/testsuite/libgomp.c/examples-4/device-1.c
@@ -10,11 +10,11 @@ int main ()
int b = 0;
int c, d;
- #pragma omp target if(a > 200 && a < 400)
+ #pragma omp target if(a > 200 && a < 400) map(from: c)
c = omp_is_initial_device ();
#pragma omp target data map(to: b) if(a > 200 && a < 400)
- #pragma omp target
+ #pragma omp target map(from: b, d)
{
b = 100;
d = omp_is_initial_device ();
@@ -26,11 +26,11 @@ int main ()
a += 200;
b = 0;
- #pragma omp target if(a > 200 && a < 400)
+ #pragma omp target if(a > 200 && a < 400) map(from: c)
c = omp_is_initial_device ();
#pragma omp target data map(to: b) if(a > 200 && a < 400)
- #pragma omp target
+ #pragma omp target map(from: b, d)
{
b = 100;
d = omp_is_initial_device ();
@@ -42,11 +42,11 @@ int main ()
a += 200;
b = 0;
- #pragma omp target if(a > 200 && a < 400)
+ #pragma omp target if(a > 200 && a < 400) map(from: c)
c = omp_is_initial_device ();
#pragma omp target data map(to: b) if(a > 200 && a < 400)
- #pragma omp target
+ #pragma omp target map(from: b, d)
{
b = 100;
d = omp_is_initial_device ();
diff --git a/libgomp/testsuite/libgomp.c/examples-4/device-3.c b/libgomp/testsuite/libgomp.c/examples-4/device-3.c
index 8a0cf7c200d..af086533278 100644
--- a/libgomp/testsuite/libgomp.c/examples-4/device-3.c
+++ b/libgomp/testsuite/libgomp.c/examples-4/device-3.c
@@ -9,7 +9,7 @@ int main ()
int res;
int default_device = omp_get_default_device ();
- #pragma omp target
+ #pragma omp target map(from: res)
res = omp_is_initial_device ();
if (res)
@@ -17,7 +17,7 @@ int main ()
omp_set_default_device (omp_get_num_devices ());
- #pragma omp target
+ #pragma omp target map(from: res)
res = omp_is_initial_device ();
if (!res)
diff --git a/libgomp/testsuite/libgomp.c/examples-4/target_data-3.c b/libgomp/testsuite/libgomp.c/examples-4/target_data-3.c
index abb283801f8..46b674013d0 100644
--- a/libgomp/testsuite/libgomp.c/examples-4/target_data-3.c
+++ b/libgomp/testsuite/libgomp.c/examples-4/target_data-3.c
@@ -47,7 +47,7 @@ void gramSchmidt (int Q[][COLS], const int rows, const int cols)
{
int tmp = 0;
- #pragma omp target
+ #pragma omp target map(tofrom:tmp)
#pragma omp parallel for reduction(+:tmp)
for (i = 0; i < rows; i++)
tmp += (Q[i][k] * Q[i][k]);
diff --git a/libgomp/testsuite/libgomp.c/examples-4/teams-2.c b/libgomp/testsuite/libgomp.c/examples-4/teams-2.c
index 8bbbc355b17..7d0a60ebb51 100644
--- a/libgomp/testsuite/libgomp.c/examples-4/teams-2.c
+++ b/libgomp/testsuite/libgomp.c/examples-4/teams-2.c
@@ -32,7 +32,7 @@ float dotprod (float B[], float C[], int n, int block_size,
int i, i0;
float sum = 0;
- #pragma omp target map(to: B[0:n], C[0:n])
+ #pragma omp target map(to: B[0:n], C[0:n]) map(tofrom: sum)
#pragma omp teams num_teams(num_teams) thread_limit(block_threads) \
reduction(+:sum)
#pragma omp distribute
diff --git a/libgomp/testsuite/libgomp.c/examples-4/teams-3.c b/libgomp/testsuite/libgomp.c/examples-4/teams-3.c
index b6708785884..5fe63a68a4b 100644
--- a/libgomp/testsuite/libgomp.c/examples-4/teams-3.c
+++ b/libgomp/testsuite/libgomp.c/examples-4/teams-3.c
@@ -31,7 +31,7 @@ float dotprod (float B[], float C[], int n)
int i;
float sum = 0;
- #pragma omp target teams map(to: B[0:n], C[0:n])
+ #pragma omp target teams map(to: B[0:n], C[0:n]) map(tofrom: sum)
#pragma omp distribute parallel for reduction(+:sum)
for (i = 0; i < n; i++)
sum += B[i] * C[i];
diff --git a/libgomp/testsuite/libgomp.c/examples-4/teams-4.c b/libgomp/testsuite/libgomp.c/examples-4/teams-4.c
index 9aef78ecfba..6136eabef66 100644
--- a/libgomp/testsuite/libgomp.c/examples-4/teams-4.c
+++ b/libgomp/testsuite/libgomp.c/examples-4/teams-4.c
@@ -31,7 +31,7 @@ float dotprod (float B[], float C[], int n)
int i;
float sum = 0;
- #pragma omp target map(to: B[0:n], C[0:n])
+ #pragma omp target map(to: B[0:n], C[0:n]) map(tofrom:sum)
#pragma omp teams num_teams(8) thread_limit(16)
#pragma omp distribute parallel for reduction(+:sum) \
dist_schedule(static, 1024) \
diff --git a/libgomp/testsuite/libgomp.c/for-2.h b/libgomp/testsuite/libgomp.c/for-2.h
index 920d23b5202..0bd116c5aec 100644
--- a/libgomp/testsuite/libgomp.c/for-2.h
+++ b/libgomp/testsuite/libgomp.c/for-2.h
@@ -11,11 +11,21 @@ noreturn (void)
#ifndef SC
#define SC
#endif
+#ifndef OMPTGT
+#define OMPTGT
+#endif
+#ifndef OMPTO
+#define OMPTO(v) do {} while (0)
+#endif
+#ifndef OMPFROM
+#define OMPFROM(v) do {} while (0)
+#endif
__attribute__((noinline, noclone)) void
N(f0) (void)
{
int i;
+ OMPTGT
#pragma omp F S
for (i = 0; i < 1500; i++)
a[i] += 2;
@@ -24,6 +34,7 @@ N(f0) (void)
__attribute__((noinline, noclone)) void
N(f1) (void)
{
+ OMPTGT
#pragma omp F S
for (unsigned int i = __INT_MAX__; i < 3000U + __INT_MAX__; i += 2)
a[(i - __INT_MAX__) >> 1] -= 2;
@@ -33,6 +44,7 @@ __attribute__((noinline, noclone)) void
N(f2) (void)
{
unsigned long long i;
+ OMPTGT
#pragma omp F S
for (i = __LONG_LONG_MAX__ + 4500ULL - 27;
i > __LONG_LONG_MAX__ - 27ULL; i -= 3)
@@ -42,6 +54,7 @@ N(f2) (void)
__attribute__((noinline, noclone)) void
N(f3) (long long n1, long long n2, long long s3)
{
+ OMPTGT
#pragma omp F S
for (long long i = n1 + 23; i > n2 - 25; i -= s3)
a[i + 48] += 7;
@@ -51,6 +64,7 @@ __attribute__((noinline, noclone)) void
N(f4) (void)
{
unsigned int i;
+ OMPTGT
#pragma omp F S
for (i = 30; i < 20; i += 2)
a[i] += 10;
@@ -61,6 +75,7 @@ N(f5) (int n11, int n12, int n21, int n22, int n31, int n32,
int s1, int s2, int s3)
{
SC int v1, v2, v3;
+ OMPTGT
#pragma omp F S collapse(3)
for (v1 = n11; v1 < n12; v1 += s1)
for (v2 = n21; v2 < n22; v2 += s2)
@@ -74,6 +89,7 @@ N(f6) (int n11, int n12, int n21, int n22, long long n31, long long n32,
{
SC int v1, v2;
SC long long v3;
+ OMPTGT
#pragma omp F S collapse(3)
for (v1 = n11; v1 > n12; v1 += s1)
for (v2 = n21; v2 > n22; v2 += s2)
@@ -86,6 +102,7 @@ N(f7) (void)
{
SC unsigned int v1, v3;
SC unsigned long long v2;
+ OMPTGT
#pragma omp F S collapse(3)
for (v1 = 0; v1 < 20; v1 += 2)
for (v2 = __LONG_LONG_MAX__ + 16ULL;
@@ -98,6 +115,7 @@ __attribute__((noinline, noclone)) void
N(f8) (void)
{
SC long long v1, v2, v3;
+ OMPTGT
#pragma omp F S collapse(3)
for (v1 = 0; v1 < 20; v1 += 2)
for (v2 = 30; v2 < 20; v2++)
@@ -109,6 +127,7 @@ __attribute__((noinline, noclone)) void
N(f9) (void)
{
int i;
+ OMPTGT
#pragma omp F S
for (i = 20; i < 10; i++)
{
@@ -122,6 +141,7 @@ __attribute__((noinline, noclone)) void
N(f10) (void)
{
SC int i;
+ OMPTGT
#pragma omp F S collapse(3)
for (i = 0; i < 10; i++)
for (int j = 10; j < 8; j++)
@@ -137,6 +157,7 @@ __attribute__((noinline, noclone)) void
N(f11) (int n)
{
int i;
+ OMPTGT
#pragma omp F S
for (i = 20; i < n; i++)
{
@@ -150,6 +171,7 @@ __attribute__((noinline, noclone)) void
N(f12) (int n)
{
SC int i;
+ OMPTGT
#pragma omp F S collapse(3)
for (i = 0; i < 10; i++)
for (int j = n; j < 8; j++)
@@ -165,6 +187,7 @@ __attribute__((noinline, noclone)) void
N(f13) (void)
{
int *i;
+ OMPTGT
#pragma omp F S
for (i = a; i < &a[1500]; i++)
i[0] += 2;
@@ -174,6 +197,7 @@ __attribute__((noinline, noclone)) void
N(f14) (void)
{
SC float *i;
+ OMPTGT
#pragma omp F S collapse(3)
for (i = &b[0][0][0]; i < &b[0][0][10]; i++)
for (float *j = &b[0][15][0]; j > &b[0][0][0]; j -= 10)
@@ -188,27 +212,34 @@ N(test) (void)
int i, j, k;
for (i = 0; i < 1500; i++)
a[i] = i - 25;
+ OMPTO (a);
N(f0) ();
+ OMPFROM (a);
for (i = 0; i < 1500; i++)
if (a[i] != i - 23)
return 1;
N(f1) ();
+ OMPFROM (a);
for (i = 0; i < 1500; i++)
if (a[i] != i - 25)
return 1;
N(f2) ();
+ OMPFROM (a);
for (i = 0; i < 1500; i++)
if (a[i] != i - 29)
return 1;
N(f3) (1500LL - 1 - 23 - 48, -1LL + 25 - 48, 1LL);
+ OMPFROM (a);
for (i = 0; i < 1500; i++)
if (a[i] != i - 22)
return 1;
N(f3) (1500LL - 1 - 23 - 48, 1500LL - 1, 7LL);
+ OMPFROM (a);
for (i = 0; i < 1500; i++)
if (a[i] != i - 22)
return 1;
N(f4) ();
+ OMPFROM (a);
for (i = 0; i < 1500; i++)
if (a[i] != i - 22)
return 1;
@@ -216,31 +247,37 @@ N(test) (void)
for (j = 0; j < 15; j++)
for (k = 0; k < 10; k++)
b[i][j][k] = i - 2.5 + 1.5 * j - 1.5 * k;
+ OMPTO (b);
N(f5) (0, 10, 0, 15, 0, 10, 1, 1, 1);
+ OMPFROM (b);
for (i = 0; i < 10; i++)
for (j = 0; j < 15; j++)
for (k = 0; k < 10; k++)
if (b[i][j][k] != i + 1.5 * j - 1.5 * k)
return 1;
N(f5) (0, 10, 30, 15, 0, 10, 4, 5, 6);
+ OMPFROM (b);
for (i = 0; i < 10; i++)
for (j = 0; j < 15; j++)
for (k = 0; k < 10; k++)
if (b[i][j][k] != i + 1.5 * j - 1.5 * k)
return 1;
N(f6) (9, -1, 29, 0, 9, -1, -1, -2, -1);
+ OMPFROM (b);
for (i = 0; i < 10; i++)
for (j = 0; j < 15; j++)
for (k = 0; k < 10; k++)
if (b[i][j][k] != i - 4.5 + 1.5 * j - 1.5 * k)
return 1;
N(f7) ();
+ OMPFROM (b);
for (i = 0; i < 10; i++)
for (j = 0; j < 15; j++)
for (k = 0; k < 10; k++)
if (b[i][j][k] != i + 1.0 + 1.5 * j - 1.5 * k)
return 1;
N(f8) ();
+ OMPFROM (b);
for (i = 0; i < 10; i++)
for (j = 0; j < 15; j++)
for (k = 0; k < 10; k++)
@@ -250,6 +287,8 @@ N(test) (void)
N(f10) ();
N(f11) (10);
N(f12) (12);
+ OMPFROM (a);
+ OMPFROM (b);
for (i = 0; i < 1500; i++)
if (a[i] != i - 22)
return 1;
@@ -260,6 +299,8 @@ N(test) (void)
return 1;
N(f13) ();
N(f14) ();
+ OMPFROM (a);
+ OMPFROM (b);
for (i = 0; i < 1500; i++)
if (a[i] != i - 20)
return 1;
diff --git a/libgomp/testsuite/libgomp.c/for-4.c b/libgomp/testsuite/libgomp.c/for-4.c
new file mode 100644
index 00000000000..ef5465e1e76
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/for-4.c
@@ -0,0 +1,42 @@
+/* { dg-options "-std=gnu99 -fopenmp" } */
+
+extern void abort (void);
+
+#define M(x, y, z) O(x, y, z)
+#define O(x, y, z) x ## _ ## y ## _ ## z
+
+#define F taskloop
+#define G taskloop
+#define S
+#define N(x) M(x, G, normal)
+#include "for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F taskloop simd
+#define G taskloop_simd
+#define S
+#define N(x) M(x, G, normal)
+#include "for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+int
+main ()
+{
+ int err = 0;
+ #pragma omp parallel reduction(|:err)
+ #pragma omp single
+ {
+ if (test_taskloop_normal ()
+ || test_taskloop_simd_normal ())
+ err = 1;
+ }
+ if (err)
+ abort ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/for-5.c b/libgomp/testsuite/libgomp.c/for-5.c
new file mode 100644
index 00000000000..84e636ab0f9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/for-5.c
@@ -0,0 +1,154 @@
+/* { dg-additional-options "-std=gnu99" } */
+
+extern void abort ();
+
+#define M(x, y, z) O(x, y, z)
+#define O(x, y, z) x ## _ ## y ## _ ## z
+
+#pragma omp declare target
+
+#define F for
+#define G f
+#define S
+#define N(x) M(x, G, normal)
+#include "for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#pragma omp end declare target
+
+#undef OMPFROM
+#undef OMPTO
+#define DO_PRAGMA(x) _Pragma (#x)
+#define OMPFROM(v) DO_PRAGMA (omp target update from(v))
+#define OMPTO(v) DO_PRAGMA (omp target update to(v))
+
+#define F target parallel for
+#define G tpf
+#include "for-1.h"
+#undef F
+#undef G
+
+#define F target simd
+#define G t_simd
+#define S
+#define N(x) M(x, G, normal)
+#include "for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F target parallel for simd
+#define G tpf_simd
+#include "for-1.h"
+#undef F
+#undef G
+
+#define F target teams distribute
+#define G ttd
+#define S
+#define N(x) M(x, G, normal)
+#include "for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F target teams distribute
+#define G ttd_ds128
+#define S dist_schedule(static, 128)
+#define N(x) M(x, G, normal)
+#include "for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F target teams distribute simd
+#define G ttds
+#define S
+#define N(x) M(x, G, normal)
+#include "for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F target teams distribute simd
+#define G ttds_ds128
+#define S dist_schedule(static, 128)
+#define N(x) M(x, G, normal)
+#include "for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F target teams distribute parallel for
+#define G ttdpf
+#include "for-1.h"
+#undef F
+#undef G
+
+#define F target teams distribute parallel for dist_schedule(static, 128)
+#define G ttdpf_ds128
+#include "for-1.h"
+#undef F
+#undef G
+
+#define F target teams distribute parallel for simd
+#define G ttdpfs
+#include "for-1.h"
+#undef F
+#undef G
+
+#define F target teams distribute parallel for simd dist_schedule(static, 128)
+#define G ttdpfs_ds128
+#include "for-1.h"
+#undef F
+#undef G
+
+int
+main ()
+{
+ if (test_tpf_static ()
+ || test_tpf_static32 ()
+ || test_tpf_auto ()
+ || test_tpf_guided32 ()
+ || test_tpf_runtime ()
+ || test_t_simd_normal ()
+ || test_tpf_simd_static ()
+ || test_tpf_simd_static32 ()
+ || test_tpf_simd_auto ()
+ || test_tpf_simd_guided32 ()
+ || test_tpf_simd_runtime ()
+ || test_ttd_normal ()
+ || test_ttd_ds128_normal ()
+ || test_ttds_normal ()
+ || test_ttds_ds128_normal ()
+ || test_ttdpf_static ()
+ || test_ttdpf_static32 ()
+ || test_ttdpf_auto ()
+ || test_ttdpf_guided32 ()
+ || test_ttdpf_runtime ()
+ || test_ttdpf_ds128_static ()
+ || test_ttdpf_ds128_static32 ()
+ || test_ttdpf_ds128_auto ()
+ || test_ttdpf_ds128_guided32 ()
+ || test_ttdpf_ds128_runtime ()
+ || test_ttdpfs_static ()
+ || test_ttdpfs_static32 ()
+ || test_ttdpfs_auto ()
+ || test_ttdpfs_guided32 ()
+ || test_ttdpfs_runtime ()
+ || test_ttdpfs_ds128_static ()
+ || test_ttdpfs_ds128_static32 ()
+ || test_ttdpfs_ds128_auto ()
+ || test_ttdpfs_ds128_guided32 ()
+ || test_ttdpfs_ds128_runtime ())
+ abort ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/for-6.c b/libgomp/testsuite/libgomp.c/for-6.c
new file mode 100644
index 00000000000..7f3c65e82b1
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/for-6.c
@@ -0,0 +1,123 @@
+/* { dg-additional-options "-std=gnu99" } */
+
+extern void abort ();
+
+#define M(x, y, z) O(x, y, z)
+#define O(x, y, z) x ## _ ## y ## _ ## z
+
+#pragma omp declare target
+
+#define F for
+#define G f
+#define S
+#define N(x) M(x, G, normal)
+#include "for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#pragma omp end declare target
+
+#undef OMPTGT
+#undef OMPFROM
+#undef OMPTO
+#define DO_PRAGMA(x) _Pragma (#x)
+#define OMPTGT DO_PRAGMA (omp target)
+#define OMPFROM(v) DO_PRAGMA (omp target update from(v))
+#define OMPTO(v) DO_PRAGMA (omp target update to(v))
+
+#define F teams distribute
+#define G td
+#define S
+#define N(x) M(x, G, normal)
+#include "for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F teams distribute
+#define G td_ds128
+#define S dist_schedule(static, 128)
+#define N(x) M(x, G, normal)
+#include "for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F teams distribute simd
+#define G tds
+#define S
+#define N(x) M(x, G, normal)
+#include "for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F teams distribute simd
+#define G tds_ds128
+#define S dist_schedule(static, 128)
+#define N(x) M(x, G, normal)
+#include "for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F teams distribute parallel for
+#define G tdpf
+#include "for-1.h"
+#undef F
+#undef G
+
+#define F teams distribute parallel for dist_schedule(static, 128)
+#define G tdpf_ds128
+#include "for-1.h"
+#undef F
+#undef G
+
+#define F teams distribute parallel for simd
+#define G tdpfs
+#include "for-1.h"
+#undef F
+#undef G
+
+#define F teams distribute parallel for simd dist_schedule(static, 128)
+#define G tdpfs_ds128
+#include "for-1.h"
+#undef F
+#undef G
+
+int
+main ()
+{
+ if (test_td_normal ()
+ || test_td_ds128_normal ()
+ || test_tds_normal ()
+ || test_tds_ds128_normal ()
+ || test_tdpf_static ()
+ || test_tdpf_static32 ()
+ || test_tdpf_auto ()
+ || test_tdpf_guided32 ()
+ || test_tdpf_runtime ()
+ || test_tdpf_ds128_static ()
+ || test_tdpf_ds128_static32 ()
+ || test_tdpf_ds128_auto ()
+ || test_tdpf_ds128_guided32 ()
+ || test_tdpf_ds128_runtime ()
+ || test_tdpfs_static ()
+ || test_tdpfs_static32 ()
+ || test_tdpfs_auto ()
+ || test_tdpfs_guided32 ()
+ || test_tdpfs_runtime ()
+ || test_tdpfs_ds128_static ()
+ || test_tdpfs_ds128_static32 ()
+ || test_tdpfs_ds128_auto ()
+ || test_tdpfs_ds128_guided32 ()
+ || test_tdpfs_ds128_runtime ())
+ abort ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/linear-1.c b/libgomp/testsuite/libgomp.c/linear-1.c
new file mode 100644
index 00000000000..f86fb33c5da
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/linear-1.c
@@ -0,0 +1,250 @@
+int a[256];
+
+__attribute__((noinline, noclone)) int
+f1 (int i)
+{
+ #pragma omp parallel for linear (i: 4)
+ for (int j = 16; j < 64; j++)
+ {
+ a[i] = j;
+ i += 4;
+ }
+ return i;
+}
+
+__attribute__((noinline, noclone)) short int
+f2 (short int i, char k)
+{
+ #pragma omp parallel for linear (i: k + 1)
+ for (long j = 16; j < 64; j++)
+ {
+ a[i] = j;
+ i += 4;
+ }
+ return i;
+}
+
+__attribute__((noinline, noclone)) long long int
+f3 (long long int i, long long int k)
+{
+ #pragma omp parallel for linear (i: k)
+ for (short j = 16; j < 64; j++)
+ {
+ a[i] = j;
+ i += 4;
+ }
+ return i;
+}
+
+__attribute__((noinline, noclone)) int
+f4 (int i)
+{
+ #pragma omp parallel for linear (i: 4) schedule(static, 3)
+ for (int j = 16; j < 64; j++)
+ {
+ a[i] = j;
+ i += 4;
+ }
+ return i;
+}
+
+__attribute__((noinline, noclone)) short int
+f5 (short int i, char k)
+{
+ #pragma omp parallel for linear (i: k + 1) schedule(static, 5)
+ for (long j = 16; j < 64; j++)
+ {
+ a[i] = j;
+ i += 4;
+ }
+ return i;
+}
+
+__attribute__((noinline, noclone)) long long int
+f6 (long long int i, long long int k)
+{
+ #pragma omp parallel for linear (i: k) schedule(static, 7)
+ for (short j = 16; j < 64; j++)
+ {
+ a[i] = j;
+ i += 4;
+ }
+ return i;
+}
+
+__attribute__((noinline, noclone)) int
+f7 (int i)
+{
+ #pragma omp parallel for linear (i: 4) schedule(dynamic, 3)
+ for (int j = 16; j < 64; j++)
+ {
+ a[i] = j;
+ i += 4;
+ }
+ return i;
+}
+
+__attribute__((noinline, noclone)) short int
+f8 (short int i, char k)
+{
+ #pragma omp parallel for linear (i: k + 1) schedule(dynamic, 5)
+ for (long j = 16; j < 64; j++)
+ {
+ a[i] = j;
+ i += 4;
+ }
+ return i;
+}
+
+__attribute__((noinline, noclone)) long long int
+f9 (long long int i, long long int k)
+{
+ #pragma omp parallel for linear (i: k) schedule(dynamic, 7)
+ for (short j = 16; j < 64; j++)
+ {
+ a[i] = j;
+ i += 4;
+ }
+ return i;
+}
+
+__attribute__((noinline, noclone)) int
+f10 (int i, long step)
+{
+ #pragma omp parallel for linear (i: 4)
+ for (int j = 16; j < 112; j += step)
+ {
+ a[i] = j / 2 + 8;
+ i += 4;
+ }
+ return i;
+}
+
+__attribute__((noinline, noclone)) short int
+f11 (short int i, char k, char step)
+{
+ #pragma omp parallel for linear (i: k + 1)
+ for (long j = 16; j < 112; j += step)
+ {
+ a[i] = j / 2 + 8;
+ i += 4;
+ }
+ return i;
+}
+
+__attribute__((noinline, noclone)) long long int
+f12 (long long int i, long long int k, int step)
+{
+ #pragma omp parallel for linear (i: k)
+ for (short j = 16; j < 112; j += step)
+ {
+ a[i] = j / 2 + 8;
+ i += 4;
+ }
+ return i;
+}
+
+__attribute__((noinline, noclone)) int
+f13 (int i, long long int step)
+{
+ #pragma omp parallel for linear (i: 4) schedule(static, 3)
+ for (int j = 16; j < 112; j += step)
+ {
+ a[i] = j / 2 + 8;
+ i += 4;
+ }
+ return i;
+}
+
+__attribute__((noinline, noclone)) short int
+f14 (short int i, char k, int step)
+{
+ #pragma omp parallel for linear (i: k + 1) schedule(static, 5)
+ for (long j = 16; j < 112; j += step)
+ {
+ a[i] = j / 2 + 8;
+ i += 4;
+ }
+ return i;
+}
+
+__attribute__((noinline, noclone)) long long int
+f15 (long long int i, long long int k, long int step)
+{
+ #pragma omp parallel for linear (i: k) schedule(static, 7)
+ for (short j = 16; j < 112; j += step)
+ {
+ a[i] = j / 2 + 8;
+ i += 4;
+ }
+ return i;
+}
+
+__attribute__((noinline, noclone)) int
+f16 (int i, long long int step)
+{
+ #pragma omp parallel for linear (i: 4) schedule(dynamic, 3)
+ for (int j = 16; j < 112; j += step)
+ {
+ a[i] = j / 2 + 8;
+ i += 4;
+ }
+ return i;
+}
+
+__attribute__((noinline, noclone)) short int
+f17 (short int i, char k, int step)
+{
+ #pragma omp parallel for linear (i: k + 1) schedule(dynamic, 5)
+ for (long j = 16; j < 112; j += step)
+ {
+ a[i] = j / 2 + 8;
+ i += 4;
+ }
+ return i;
+}
+
+__attribute__((noinline, noclone)) long long int
+f18 (long long int i, long long int k, long int step)
+{
+ #pragma omp parallel for linear (i: k) schedule(dynamic, 7)
+ for (short j = 16; j < 112; j += step)
+ {
+ a[i] = j / 2 + 8;
+ i += 4;
+ }
+ return i;
+}
+
+int
+main ()
+{
+#define TEST(x) \
+ if (x != 8 + 48 * 4) \
+ __builtin_abort (); \
+ for (int i = 0; i < 256; i++) \
+ if (a[i] != (((i & 3) == 0 && i >= 8 \
+ && i < 8 + 48 * 4) \
+ ? ((i - 8) / 4) + 16 : 0)) \
+ __builtin_abort (); \
+ __builtin_memset (a, 0, sizeof (a))
+ TEST (f1 (8));
+ TEST (f2 (8, 3));
+ TEST (f3 (8LL, 4LL));
+ TEST (f4 (8));
+ TEST (f5 (8, 3));
+ TEST (f6 (8LL, 4LL));
+ TEST (f7 (8));
+ TEST (f8 (8, 3));
+ TEST (f9 (8LL, 4LL));
+ TEST (f10 (8, 2));
+ TEST (f11 (8, 3, 2));
+ TEST (f12 (8LL, 4LL, 2));
+ TEST (f13 (8, 2));
+ TEST (f14 (8, 3, 2));
+ TEST (f15 (8LL, 4LL, 2));
+ TEST (f16 (8, 2));
+ TEST (f17 (8, 3, 2));
+ TEST (f18 (8LL, 4LL, 2));
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/ordered-4.c b/libgomp/testsuite/libgomp.c/ordered-4.c
new file mode 100644
index 00000000000..8412d4715c3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/ordered-4.c
@@ -0,0 +1,83 @@
+extern
+#ifdef __cplusplus
+"C"
+#endif
+void abort (void);
+
+void
+foo (int i, char *j)
+{
+ #pragma omp atomic
+ j[i]++;
+ #pragma omp ordered threads
+ {
+ int t;
+ #pragma omp atomic read
+ t = j[i];
+ if (t != 3)
+ abort ();
+ if (i > 1)
+ {
+ #pragma omp atomic read
+ t = j[i - 1];
+ if (t == 2)
+ abort ();
+ }
+ if (i < 127)
+ {
+ #pragma omp atomic read
+ t = j[i + 1];
+ if (t == 4)
+ abort ();
+ }
+ }
+ #pragma omp atomic
+ j[i]++;
+}
+
+int
+main ()
+{
+ int i;
+ char j[128];
+ #pragma omp parallel
+ {
+ #pragma omp for
+ for (i = 0; i < 128; i++)
+ j[i] = 0;
+ #pragma omp for ordered schedule(dynamic, 1)
+ for (i = 0; i < 128; i++)
+ {
+ #pragma omp atomic
+ j[i]++;
+ #pragma omp ordered threads
+ {
+ int t;
+ #pragma omp atomic read
+ t = j[i];
+ if (t != 1)
+ abort ();
+ if (i > 1)
+ {
+ #pragma omp atomic read
+ t = j[i - 1];
+ if (t == 0)
+ abort ();
+ }
+ if (i < 127)
+ {
+ #pragma omp atomic read
+ t = j[i + 1];
+ if (t == 2)
+ abort ();
+ }
+ }
+ #pragma omp atomic
+ j[i]++;
+ }
+ #pragma omp for ordered schedule(static, 1)
+ for (i = 0; i < 128; i++)
+ foo (i, j);
+ }
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/pr66199-2.c b/libgomp/testsuite/libgomp.c/pr66199-2.c
index ddb79de8943..2fc9eec529a 100644
--- a/libgomp/testsuite/libgomp.c/pr66199-2.c
+++ b/libgomp/testsuite/libgomp.c/pr66199-2.c
@@ -18,12 +18,11 @@ __attribute__((noinline, noclone)) void
f2 (long a, long b, long c)
{
long d, e;
- #pragma omp target teams distribute parallel for simd default(none) firstprivate (a, b) shared(u, v, w) linear(d) linear(c:5) lastprivate(e)
+ #pragma omp target teams distribute parallel for simd default(none) firstprivate (a, b, c) shared(u, v, w) linear(d) lastprivate(e)
for (d = a; d < b; d++)
{
u[d] = v[d] + w[d];
- c += 5;
- e = c;
+ e = c + d * 5;
}
}
diff --git a/libgomp/testsuite/libgomp.c/pr66199-3.c b/libgomp/testsuite/libgomp.c/pr66199-3.c
new file mode 100644
index 00000000000..fe0ccb47197
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/pr66199-3.c
@@ -0,0 +1,50 @@
+/* PR middle-end/66199 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp" } */
+
+int u[1024], v[1024], w[1024];
+
+__attribute__((noinline, noclone)) long
+f1 (long a, long b)
+{
+ long d;
+ #pragma omp parallel for lastprivate (d) default(none) firstprivate (a, b) shared(u, v, w)
+ for (d = a; d < b; d++)
+ u[d] = v[d] + w[d];
+ return d;
+}
+
+__attribute__((noinline, noclone)) long
+f2 (long a, long b, long c)
+{
+ long d, e;
+ #pragma omp parallel for lastprivate (d) default(none) firstprivate (a, b) shared(u, v, w) linear(c:5) lastprivate(e)
+ for (d = a; d < b; d++)
+ {
+ u[d] = v[d] + w[d];
+ c += 5;
+ e = c;
+ }
+ return d + c + e;
+}
+
+__attribute__((noinline, noclone)) long
+f3 (long a1, long b1, long a2, long b2)
+{
+ long d1, d2;
+ #pragma omp parallel for default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) lastprivate(d1, d2) collapse(2)
+ for (d1 = a1; d1 < b1; d1++)
+ for (d2 = a2; d2 < b2; d2++)
+ u[d1 * 32 + d2] = v[d1 * 32 + d2] + w[d1 * 32 + d2];
+ return d1 + d2;
+}
+
+int
+main ()
+{
+ if (f1 (0, 1024) != 1024
+ || f2 (0, 1024, 17) != 1024 + 2 * (17 + 5 * 1024)
+ || f3 (0, 32, 0, 32) != 64)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/pr66199-4.c b/libgomp/testsuite/libgomp.c/pr66199-4.c
new file mode 100644
index 00000000000..a9b1bb8a59e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/pr66199-4.c
@@ -0,0 +1,58 @@
+/* PR middle-end/66199 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp" } */
+
+#pragma omp declare target
+int u[1024], v[1024], w[1024];
+#pragma omp end declare target
+
+__attribute__((noinline, noclone)) void
+f1 (long a, long b)
+{
+ long d;
+ #pragma omp target teams distribute parallel for default(none) firstprivate (a, b) shared(u, v, w)
+ for (d = a; d < b; d++)
+ u[d] = v[d] + w[d];
+}
+
+__attribute__((noinline, noclone)) void
+f2 (long a, long b, long c)
+{
+ long d, e;
+ #pragma omp target teams distribute parallel for default(none) firstprivate (a, b, c) shared(u, v, w) lastprivate(d, e)
+ for (d = a; d < b; d++)
+ {
+ u[d] = v[d] + w[d];
+ e = c + d * 5;
+ }
+}
+
+__attribute__((noinline, noclone)) void
+f3 (long a1, long b1, long a2, long b2)
+{
+ long d1, d2;
+ #pragma omp target teams distribute parallel for default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) lastprivate(d1, d2) collapse(2)
+ for (d1 = a1; d1 < b1; d1++)
+ for (d2 = a2; d2 < b2; d2++)
+ u[d1 * 32 + d2] = v[d1 * 32 + d2] + w[d1 * 32 + d2];
+}
+
+__attribute__((noinline, noclone)) void
+f4 (long a1, long b1, long a2, long b2)
+{
+ long d1, d2;
+ #pragma omp target teams distribute parallel for default(none) firstprivate (a1, b1, a2, b2) shared(u, v, w) collapse(2)
+ for (d1 = a1; d1 < b1; d1++)
+ for (d2 = a2; d2 < b2; d2++)
+ u[d1 * 32 + d2] = v[d1 * 32 + d2] + w[d1 * 32 + d2];
+}
+
+int
+main ()
+{
+ f1 (0, 1024);
+ f2 (0, 1024, 17);
+ f3 (0, 32, 0, 32);
+ f4 (0, 32, 0, 32);
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/reduction-10.c b/libgomp/testsuite/libgomp.c/reduction-10.c
new file mode 100644
index 00000000000..3c95ebd4a4b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/reduction-10.c
@@ -0,0 +1,105 @@
+struct A { int t; };
+struct B { char t; };
+struct C { unsigned long long t; };
+struct D { long t; };
+void
+add (struct B *x, struct B *y)
+{
+ x->t += y->t;
+}
+void
+zero (struct B *x)
+{
+ x->t = 0;
+}
+void
+orit (struct C *x, struct C *y)
+{
+ y->t |= x->t;
+}
+#pragma omp declare reduction(+:struct A:omp_out.t += omp_in.t)
+#pragma omp declare reduction(+:struct B:add (&omp_out, &omp_in)) initializer(zero (&omp_priv))
+#pragma omp declare reduction(*:struct A:omp_out.t *= omp_in.t) initializer(omp_priv = { 1 })
+#pragma omp declare reduction(|:struct C:orit (&omp_in, &omp_out))
+#pragma omp declare reduction(&:struct D:omp_out.t = omp_out.t & omp_in.t) initializer(omp_priv = { ~0L })
+#pragma omp declare reduction(maxb:short:omp_out = omp_in > omp_out ? omp_in : omp_out) initializer(omp_priv = -6)
+
+struct B z[10];
+
+__attribute__((noinline, noclone)) void
+foo (struct A (*x)[3][2], struct A *y, struct D w[1][2], int p1, long p2, long p3, int p4,
+ int p5, long p6, short p7)
+{
+ struct C a[p7 + 4];
+ short b[p7];
+ int i;
+ for (i = 0; i < p7 + 4; i++)
+ {
+ if (i < p7)
+ b[i] = -6;
+ a[i].t = 0;
+ }
+ #pragma omp parallel for reduction(+:x[0:p1 + 1][:p2], z[:p3]) \
+ reduction(*:y[:p4]) reduction(|:a[:p5]) \
+ reduction(&:w[0:p6 - 1][:p6]) reduction(maxb:b)
+ for (i = 0; i < 128; i++)
+ {
+ x[i / 64][i % 3][(i / 4) & 1].t += i;
+ if ((i & 15) == 1)
+ y[0].t *= 3;
+ if ((i & 31) == 2)
+ y[1].t *= 7;
+ if ((i & 63) == 3)
+ y[2].t *= 17;
+ z[i / 32].t += (i & 3);
+ if (i < 4)
+ z[i].t += i;
+ a[i / 32].t |= 1ULL << (i & 30);
+ w[0][i & 1].t &= ~(1L << (i / 17 * 3));
+ if ((i % 79) > b[0])
+ b[0] = i % 79;
+ if ((i % 13) > b[1])
+ b[1] = i % 13;
+ if ((i % 23) > b[2])
+ b[2] = i % 23;
+ if ((i % 85) > b[3])
+ b[3] = i % 85;
+ if ((i % 192) > b[4])
+ b[4] = i % 192;
+ }
+ for (i = 0; i < 9; i++)
+ if (a[i].t != (i < 4 ? 0x55555555ULL : 0))
+ __builtin_abort ();
+ if (b[0] != 78 || b[1] != 12 || b[2] != 22 || b[3] != 84 || b[4] != 127)
+ __builtin_abort ();
+}
+
+int
+main ()
+{
+ struct A a[4][3][2] = {};
+ static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }},
+ {{ 312, 381 }, { 295, 356 }, { 337, 335 }},
+ {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }},
+ {{ 0, 0 }, { 0, 0 }, { 0, 0 }}};
+ struct A y[5] = { { 0 }, { 1 }, { 1 }, { 1 }, { 0 } };
+ int y2[5] = { 0, 6561, 2401, 289, 0 };
+ char z2[10] = { 48, 49, 50, 51, 0, 0, 0, 0, 0, 0 };
+ struct D w[1][2] = { { { ~0L }, { ~0L } } };
+ foo (&a[1], y + 1, w, 1, 3L, 4L, 3, 4, 2L, 5);
+ int i, j, k;
+ for (i = 0; i < 4; i++)
+ for (j = 0; j < 3; j++)
+ for (k = 0; k < 2; k++)
+ if (a[i][j][k].t != a2[i][j][k])
+ __builtin_abort ();
+ for (i = 0; i < 5; i++)
+ if (y[i].t != y2[i])
+ __builtin_abort ();
+ for (i = 0; i < 10; i++)
+ if (z[i].t != z2[i])
+ __builtin_abort ();
+ if (w[0][0].t != ~0x249249L || w[0][1].t != ~0x249249L)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/reduction-7.c b/libgomp/testsuite/libgomp.c/reduction-7.c
new file mode 100644
index 00000000000..347c26f46d3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/reduction-7.c
@@ -0,0 +1,64 @@
+char z[10] = { 0 };
+
+__attribute__((noinline, noclone)) void
+foo (int (*x)[3][2], int *y, long w[1][2])
+{
+ unsigned long long a[9] = {};
+ short b[5] = {};
+ int i;
+ #pragma omp parallel for reduction(+:x[0:2][:][0:2], z[:4]) \
+ reduction(*:y[:3]) reduction(|:a[:4]) \
+ reduction(&:w[0:1][:2]) reduction(max:b)
+ for (i = 0; i < 128; i++)
+ {
+ x[i / 64][i % 3][(i / 4) & 1] += i;
+ if ((i & 15) == 1)
+ y[0] *= 3;
+ if ((i & 31) == 2)
+ y[1] *= 7;
+ if ((i & 63) == 3)
+ y[2] *= 17;
+ z[i / 32] += (i & 3);
+ if (i < 4)
+ z[i] += i;
+ a[i / 32] |= 1ULL << (i & 30);
+ w[0][i & 1] &= ~(1L << (i / 17 * 3));
+ if ((i % 79) > b[0])
+ b[0] = i % 79;
+ if ((i % 13) > b[1])
+ b[1] = i % 13;
+ if ((i % 23) > b[2])
+ b[2] = i % 23;
+ if ((i % 85) > b[3])
+ b[3] = i % 85;
+ if ((i % 192) > b[4])
+ b[4] = i % 192;
+ }
+ for (i = 0; i < 9; i++)
+ if (a[i] != (i < 4 ? 0x55555555ULL : 0))
+ __builtin_abort ();
+ if (b[0] != 78 || b[1] != 12 || b[2] != 22 || b[3] != 84 || b[4] != 127)
+ __builtin_abort ();
+}
+
+int
+main ()
+{
+ int a[4][3][2] = {};
+ static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }},
+ {{ 312, 381 }, { 295, 356 }, { 337, 335 }},
+ {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }},
+ {{ 0, 0 }, { 0, 0 }, { 0, 0 }}};
+ int y[5] = { 0, 1, 1, 1, 0 };
+ int y2[5] = { 0, 6561, 2401, 289, 0 };
+ char z2[10] = { 48, 49, 50, 51, 0, 0, 0, 0, 0, 0 };
+ long w[1][2] = { ~0L, ~0L };
+ foo (&a[1], y + 1, w);
+ if (__builtin_memcmp (a, a2, sizeof (a))
+ || __builtin_memcmp (y, y2, sizeof (y))
+ || __builtin_memcmp (z, z2, sizeof (z))
+ || w[0][0] != ~0x249249L
+ || w[0][1] != ~0x249249L)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/reduction-8.c b/libgomp/testsuite/libgomp.c/reduction-8.c
new file mode 100644
index 00000000000..f4ec03aabea
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/reduction-8.c
@@ -0,0 +1,98 @@
+struct A { int t; };
+struct B { char t; };
+struct C { unsigned long long t; };
+struct D { long t; };
+void
+add (struct B *x, struct B *y)
+{
+ x->t += y->t;
+}
+void
+zero (struct B *x)
+{
+ x->t = 0;
+}
+void
+orit (struct C *x, struct C *y)
+{
+ y->t |= x->t;
+}
+#pragma omp declare reduction(+:struct A:omp_out.t += omp_in.t)
+#pragma omp declare reduction(+:struct B:add (&omp_out, &omp_in)) initializer(zero (&omp_priv))
+#pragma omp declare reduction(*:struct A:omp_out.t *= omp_in.t) initializer(omp_priv = { 1 })
+#pragma omp declare reduction(|:struct C:orit (&omp_in, &omp_out))
+#pragma omp declare reduction(&:struct D:omp_out.t = omp_out.t & omp_in.t) initializer(omp_priv = { ~0L })
+#pragma omp declare reduction(maxb:short:omp_out = omp_in > omp_out ? omp_in : omp_out) initializer(omp_priv = -6)
+
+struct B z[10];
+
+__attribute__((noinline, noclone)) void
+foo (struct A (*x)[3][2], struct A *y, struct D w[1][2])
+{
+ struct C a[9] = {};
+ short b[5] = {};
+ int i;
+ #pragma omp parallel for reduction(+:x[0:2][:][0:2], z[:4]) \
+ reduction(*:y[:3]) reduction(|:a[:4]) \
+ reduction(&:w[0:1][:2]) reduction(maxb:b)
+ for (i = 0; i < 128; i++)
+ {
+ x[i / 64][i % 3][(i / 4) & 1].t += i;
+ if ((i & 15) == 1)
+ y[0].t *= 3;
+ if ((i & 31) == 2)
+ y[1].t *= 7;
+ if ((i & 63) == 3)
+ y[2].t *= 17;
+ z[i / 32].t += (i & 3);
+ if (i < 4)
+ z[i].t += i;
+ a[i / 32].t |= 1ULL << (i & 30);
+ w[0][i & 1].t &= ~(1L << (i / 17 * 3));
+ if ((i % 79) > b[0])
+ b[0] = i % 79;
+ if ((i % 13) > b[1])
+ b[1] = i % 13;
+ if ((i % 23) > b[2])
+ b[2] = i % 23;
+ if ((i % 85) > b[3])
+ b[3] = i % 85;
+ if ((i % 192) > b[4])
+ b[4] = i % 192;
+ }
+ for (i = 0; i < 9; i++)
+ if (a[i].t != (i < 4 ? 0x55555555ULL : 0))
+ __builtin_abort ();
+ if (b[0] != 78 || b[1] != 12 || b[2] != 22 || b[3] != 84 || b[4] != 127)
+ __builtin_abort ();
+}
+
+int
+main ()
+{
+ struct A a[4][3][2] = {};
+ static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }},
+ {{ 312, 381 }, { 295, 356 }, { 337, 335 }},
+ {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }},
+ {{ 0, 0 }, { 0, 0 }, { 0, 0 }}};
+ struct A y[5] = { { 0 }, { 1 }, { 1 }, { 1 }, { 0 } };
+ int y2[5] = { 0, 6561, 2401, 289, 0 };
+ char z2[10] = { 48, 49, 50, 51, 0, 0, 0, 0, 0, 0 };
+ struct D w[1][2] = { { { ~0L }, { ~0L } } };
+ foo (&a[1], y + 1, w);
+ int i, j, k;
+ for (i = 0; i < 4; i++)
+ for (j = 0; j < 3; j++)
+ for (k = 0; k < 2; k++)
+ if (a[i][j][k].t != a2[i][j][k])
+ __builtin_abort ();
+ for (i = 0; i < 5; i++)
+ if (y[i].t != y2[i])
+ __builtin_abort ();
+ for (i = 0; i < 10; i++)
+ if (z[i].t != z2[i])
+ __builtin_abort ();
+ if (w[0][0].t != ~0x249249L || w[0][1].t != ~0x249249L)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/reduction-9.c b/libgomp/testsuite/libgomp.c/reduction-9.c
new file mode 100644
index 00000000000..13605c1ab88
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/reduction-9.c
@@ -0,0 +1,71 @@
+char z[10] = { 0 };
+
+__attribute__((noinline, noclone)) void
+foo (int (*x)[3][2], int *y, long w[1][2], int p1, long p2, long p3, int p4,
+ int p5, long p6, short p7)
+{
+ unsigned long long a[p7 + 4];
+ short b[p7];
+ int i;
+ for (i = 0; i < p7 + 4; i++)
+ {
+ if (i < p7)
+ b[i] = -6;
+ a[i] = 0;
+ }
+ #pragma omp parallel for reduction(+:x[0:p1 + 1][:p2], z[:p3]) \
+ reduction(*:y[:p4]) reduction(|:a[:p5]) \
+ reduction(&:w[0:p6 - 1][:p6]) reduction(max:b)
+ for (i = 0; i < 128; i++)
+ {
+ x[i / 64][i % 3][(i / 4) & 1] += i;
+ if ((i & 15) == 1)
+ y[0] *= 3;
+ if ((i & 31) == 2)
+ y[1] *= 7;
+ if ((i & 63) == 3)
+ y[2] *= 17;
+ z[i / 32] += (i & 3);
+ if (i < 4)
+ z[i] += i;
+ a[i / 32] |= 1ULL << (i & 30);
+ w[0][i & 1] &= ~(1L << (i / 17 * 3));
+ if ((i % 79) > b[0])
+ b[0] = i % 79;
+ if ((i % 13) > b[1])
+ b[1] = i % 13;
+ if ((i % 23) > b[2])
+ b[2] = i % 23;
+ if ((i % 85) > b[3])
+ b[3] = i % 85;
+ if ((i % 192) > b[4])
+ b[4] = i % 192;
+ }
+ for (i = 0; i < 9; i++)
+ if (a[i] != (i < 4 ? 0x55555555ULL : 0))
+ __builtin_abort ();
+ if (b[0] != 78 || b[1] != 12 || b[2] != 22 || b[3] != 84 || b[4] != 127)
+ __builtin_abort ();
+}
+
+int
+main ()
+{
+ int a[4][3][2] = {};
+ static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }},
+ {{ 312, 381 }, { 295, 356 }, { 337, 335 }},
+ {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }},
+ {{ 0, 0 }, { 0, 0 }, { 0, 0 }}};
+ int y[5] = { 0, 1, 1, 1, 0 };
+ int y2[5] = { 0, 6561, 2401, 289, 0 };
+ char z2[10] = { 48, 49, 50, 51, 0, 0, 0, 0, 0, 0 };
+ long w[1][2] = { ~0L, ~0L };
+ foo (&a[1], y + 1, w, 1, 3L, 4L, 3, 4, 2L, 5);
+ if (__builtin_memcmp (a, a2, sizeof (a))
+ || __builtin_memcmp (y, y2, sizeof (y))
+ || __builtin_memcmp (z, z2, sizeof (z))
+ || w[0][0] != ~0x249249L
+ || w[0][1] != ~0x249249L)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/target-1.c b/libgomp/testsuite/libgomp.c/target-1.c
index f734d3c279d..c7abb008182 100644
--- a/libgomp/testsuite/libgomp.c/target-1.c
+++ b/libgomp/testsuite/libgomp.c/target-1.c
@@ -34,7 +34,7 @@ fn2 (int x, int y, int z)
fn1 (b, c, x);
#pragma omp target data map(to: b)
{
- #pragma omp target map(tofrom: c)
+ #pragma omp target map(tofrom: c, s)
#pragma omp teams num_teams(y) thread_limit(z) reduction(+:s) firstprivate(x)
#pragma omp distribute dist_schedule(static, 4) collapse(1)
for (j=0; j < x; j += y)
@@ -52,7 +52,7 @@ fn3 (int x)
double b[1024], c[1024], s = 0;
int i;
fn1 (b, c, x);
- #pragma omp target map(to: b, c)
+ #pragma omp target map(to: b, c) map(tofrom:s)
#pragma omp parallel for reduction(+:s)
for (i = 0; i < x; i++)
tgt (), s += b[i] * c[i];
@@ -66,7 +66,8 @@ fn4 (int x, double *p)
int i;
fn1 (b, c, x);
fn1 (d + x, p + x, x);
- #pragma omp target map(to: b, c[0:x], d[x:x]) map(to:p[x:64 + (x & 31)])
+ #pragma omp target map(to: b, c[0:x], d[x:x]) map(to:p[x:64 + (x & 31)]) \
+ map(tofrom: s)
#pragma omp parallel for reduction(+:s)
for (i = 0; i < x; i++)
s += b[i] * c[i] + d[x + i] + p[x + i];
diff --git a/libgomp/testsuite/libgomp.c/target-11.c b/libgomp/testsuite/libgomp.c/target-11.c
new file mode 100644
index 00000000000..625c2863f4b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-11.c
@@ -0,0 +1,86 @@
+/* { dg-require-effective-target offload_device_nonshared_as } */
+
+#include <stdlib.h>
+#include <assert.h>
+
+#define N 32
+
+void test_array_section (int *p)
+{
+ #pragma omp target data map(alloc: p[0:N])
+ {
+ int ok = 1;
+ for (int i = 10; i < 10 + 4; i++)
+ p[i] = 997 * i;
+
+ #pragma omp target map(always to:p[10:4]) map(tofrom: ok)
+ for (int i = 10; i < 10 + 4; i++)
+ if (p[i] != 997 * i)
+ ok = 0;
+
+ assert (ok);
+
+ #pragma omp target map(always from:p[7:9])
+ for (int i = 0; i < N; i++)
+ p[i] = i;
+ }
+}
+
+int main ()
+{
+ int aa = 0, bb = 0, cc = 0, dd = 0;
+
+ #pragma omp target data map(tofrom: aa) map(to: bb) map(from: cc, dd)
+ {
+ int ok;
+ aa = bb = cc = 1;
+
+ /* Set dd on target to 0 for the further check. */
+ #pragma omp target map(always to: dd)
+ ;
+
+ dd = 1;
+ #pragma omp target map(tofrom: aa) map(always to: bb) \
+ map(always from: cc) map(to: dd) map(from: ok)
+ {
+ /* bb is always to, aa and dd are not. */
+ ok = (aa == 0) && (bb == 1) && (dd == 0);
+ aa = bb = cc = dd = 2;
+ }
+
+ assert (ok);
+ assert (aa == 1);
+ assert (bb == 1);
+ assert (cc == 2); /* cc is always from. */
+ assert (dd == 1);
+
+ dd = 3;
+ #pragma omp target map(from: cc) map(always to: dd) map(from: ok)
+ {
+ ok = (dd == 3); /* dd is always to. */
+ cc = dd = 4;
+ }
+
+ assert (ok);
+ assert (cc == 2);
+ assert (dd == 3);
+ }
+
+ assert (aa == 2);
+ assert (bb == 1);
+ assert (cc == 4);
+ assert (dd == 4);
+
+ int *array = calloc (N, sizeof (int));
+ test_array_section (array);
+
+ for (int i = 0; i < 7; i++)
+ assert (array[i] == 0);
+ for (int i = 7; i < 7 + 9; i++)
+ assert (array[i] == i);
+ for (int i = 7 + 9; i < N; i++)
+ assert (array[i] == 0);
+
+ free (array);
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/target-12.c b/libgomp/testsuite/libgomp.c/target-12.c
new file mode 100644
index 00000000000..e6b009463ad
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-12.c
@@ -0,0 +1,130 @@
+#include <omp.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+ int d = omp_get_default_device ();
+ int id = omp_get_initial_device ();
+ int err;
+ int q[128], i;
+ void *p;
+
+ if (d < 0 || d >= omp_get_num_devices ())
+ d = id;
+
+ for (i = 0; i < 128; i++)
+ q[i] = i;
+
+ p = omp_target_alloc (130 * sizeof (int), d);
+ if (p == NULL)
+ return 0;
+
+ if (omp_target_memcpy_rect (NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL,
+ d, id) < 3
+ || omp_target_memcpy_rect (NULL, NULL, 0, 0, NULL, NULL, NULL, NULL,
+ NULL, id, d) < 3
+ || omp_target_memcpy_rect (NULL, NULL, 0, 0, NULL, NULL, NULL, NULL,
+ NULL, id, id) < 3)
+ abort ();
+
+ if (omp_target_associate_ptr (q, p, 128 * sizeof (int), sizeof (int), d) == 0)
+ {
+ size_t volume[3] = { 128, 0, 0 };
+ size_t dst_offsets[3] = { 0, 0, 0 };
+ size_t src_offsets[3] = { 1, 0, 0 };
+ size_t dst_dimensions[3] = { 128, 0, 0 };
+ size_t src_dimensions[3] = { 128, 0, 0 };
+
+ if (omp_target_associate_ptr (q, p, 128 * sizeof (int), sizeof (int), d) != 0)
+ abort ();
+
+ if (omp_target_is_present (q, d) != 1
+ || omp_target_is_present (&q[32], d) != 1
+ || omp_target_is_present (&q[128], d) != 1)
+ abort ();
+
+ if (omp_target_memcpy (p, q, 128 * sizeof (int), sizeof (int), 0,
+ d, id) != 0)
+ abort ();
+
+ #pragma omp target if (d >= 0) device (d >= 0 ? d : 0) map(alloc:q[0:32]) map(from:err)
+ {
+ int j;
+ err = 0;
+ for (j = 0; j < 128; j++)
+ if (q[j] != j)
+ err = 1;
+ else
+ q[j] += 4;
+ }
+
+ if (err)
+ abort ();
+
+ if (omp_target_memcpy_rect (q, p, sizeof (int), 1, volume,
+ dst_offsets, src_offsets, dst_dimensions,
+ src_dimensions, id, d) != 0)
+ abort ();
+
+ for (i = 0; i < 128; i++)
+ if (q[i] != i + 4)
+ abort ();
+
+ volume[2] = 2;
+ volume[1] = 3;
+ volume[0] = 6;
+ dst_offsets[2] = 1;
+ dst_offsets[1] = 0;
+ dst_offsets[0] = 0;
+ src_offsets[2] = 1;
+ src_offsets[1] = 0;
+ src_offsets[0] = 3;
+ dst_dimensions[2] = 2;
+ dst_dimensions[1] = 3;
+ dst_dimensions[0] = 6;
+ src_dimensions[2] = 3;
+ src_dimensions[1] = 4;
+ src_dimensions[0] = 6;
+ if (omp_target_memcpy_rect (p, q, sizeof (int), 3, volume,
+ dst_offsets, src_offsets, dst_dimensions,
+ src_dimensions, d, id) != 0)
+ abort ();
+
+ #pragma omp target if (d >= 0) device (d >= 0 ? d : 0) map(alloc:q[0:32]) map(from:err)
+ {
+ int j, k, l;
+ err = 0;
+ for (j = 0; j < 6; j++)
+ for (k = 0; k < 3; k++)
+ for (l = 0; l < 2; l++)
+ if (q[j * 6 + k * 2 + l] != 3 * 12 + 4 + 1 + l + k * 3 + j * 12)
+ err = 1;
+ }
+
+ if (err)
+ abort ();
+
+ if (omp_target_memcpy (p, p, 10 * sizeof (int), 51 * sizeof (int),
+ 111 * sizeof (int), d, d) != 0)
+ abort ();
+
+ #pragma omp target if (d >= 0) device (d >= 0 ? d : 0) map(alloc:q[0:32]) map(from:err)
+ {
+ int j;
+ err = 0;
+ for (j = 0; j < 10; j++)
+ if (q[50 + j] != q[110 + j])
+ err = 1;
+ }
+
+ if (err)
+ abort ();
+
+ if (omp_target_disassociate_ptr (q, d) != 0)
+ abort ();
+ }
+
+ omp_target_free (p, d);
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/target-13.c b/libgomp/testsuite/libgomp.c/target-13.c
new file mode 100644
index 00000000000..168850b507c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-13.c
@@ -0,0 +1,45 @@
+#ifdef __cplusplus
+extern "C"
+#else
+extern
+#endif
+void abort (void);
+struct S { int s, t; };
+
+void
+foo ()
+{
+ int x = 5, y = 6, err = 0;
+ struct S u = { 7, 8 }, v = { 9, 10 };
+ double s = 11.5, t = 12.5;
+ #pragma omp target private (x, u, s) firstprivate (y, v, t) map(from:err)
+ {
+ x = y;
+ u = v;
+ s = t;
+ err = (x != 6 || y != 6
+ || u.s != 9 || u.t != 10 || v.s != 9 || v.t != 10
+ || s != 12.5 || t != 12.5);
+ x += 1;
+ y += 2;
+ u.s += 3;
+ v.t += 4;
+ s += 2.5;
+ t += 3.0;
+ if (x != 7 || y != 8
+ || u.s != 12 || u.t != 10 || v.s != 9 || v.t != 14
+ || s != 15.0 || t != 15.5)
+ err = 1;
+ }
+ if (err || x != 5 || y != 6
+ || u.s != 7 || u.t != 8 || v.s != 9 || v.t != 10
+ || s != 11.5 || t != 12.5)
+ abort ();
+}
+
+int
+main ()
+{
+ foo ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/target-14.c b/libgomp/testsuite/libgomp.c/target-14.c
new file mode 100644
index 00000000000..17d383407a2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-14.c
@@ -0,0 +1,38 @@
+#include <omp.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+ int d = omp_get_default_device ();
+ int id = omp_get_initial_device ();
+ int err;
+ void *p;
+
+ if (d < 0 || d >= omp_get_num_devices ())
+ d = id;
+
+ p = omp_target_alloc (128 * sizeof (int), d);
+ if (p == NULL)
+ return 0;
+
+ #pragma omp target is_device_ptr (p) if (d >= 0) device (d >= 0 ? d : 0)
+ {
+ int i, *q = (int *) p;
+ for (i = 0; i < 128; i++)
+ q[i] = i + 7;
+ }
+ #pragma omp target is_device_ptr (p) if (d >= 0) device (d >= 0 ? d : 0) map(from:err)
+ {
+ int i;
+ err = 0;
+ for (i = 0; i < 128; i++)
+ if (((int *) p)[i] != i + 7)
+ err = 1;
+ }
+ if (err)
+ abort ();
+
+ omp_target_free (p, d);
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/target-15.c b/libgomp/testsuite/libgomp.c/target-15.c
new file mode 100644
index 00000000000..fee9252ef3d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-15.c
@@ -0,0 +1,74 @@
+extern void abort (void);
+
+void
+foo (int *x)
+{
+ int a[10], b[15], err, i;
+ for (i = 0; i < 10; i++)
+ a[i] = 7 * i;
+ for (i = 0; i < 15; i++)
+ b[i] = 8 * i;
+ #pragma omp target map(to:x[5:10], a[0:10], b[5:10]) map(from:err)
+ {
+ err = 0;
+ for (i = 0; i < 10; i++)
+ if (x[5 + i] != 20 + 4 * i
+ || a[i] != 7 * i
+ || b[5 + i] != 40 + 8 * i)
+ err = 1;
+ }
+ if (err)
+ abort ();
+}
+
+void
+bar (int n, int v)
+{
+ int a[n], b[n], c[n], d[n], e[n], err, i;
+ int (*x)[n] = &c;
+ for (i = 0; i < n; i++)
+ {
+ (*x)[i] = 4 * i;
+ a[i] = 7 * i;
+ b[i] = 8 * i;
+ }
+ #pragma omp target map(to:x[0][5:10], a[0:10], b[5:10]) map(from:err)
+ {
+ err = 0;
+ for (i = 0; i < 10; i++)
+ if ((*x)[5 + i] != 20 + 4 * i
+ || a[i] != 7 * i
+ || b[5 + i] != 40 + 8 * i)
+ err = 1;
+ }
+ if (err)
+ abort ();
+ for (i = 0; i < n; i++)
+ {
+ (*x)[i] = 9 * i;
+ a[i] = 12 * i;
+ b[i] = 13 * i;
+ }
+ #pragma omp target map(to:x[0][v:v+5], a[v-5:v+5], b[v:v+5]) map(from:err)
+ {
+ err = 0;
+ for (i = 0; i < 10; i++)
+ if ((*x)[5 + i] != 45 + 9 * i
+ || a[i] != 12 * i
+ || b[5 + i] != 65 + 13 * i)
+ err = 1;
+ }
+ if (err)
+ abort ();
+}
+
+int
+main ()
+{
+ int x[15], i;
+ for (i = 0; i < 15; i++)
+ x[i] = 4 * i;
+ foo (x);
+ bar (15, 5);
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/target-16.c b/libgomp/testsuite/libgomp.c/target-16.c
new file mode 100644
index 00000000000..7b0919b1b00
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-16.c
@@ -0,0 +1,45 @@
+extern void abort (void);
+
+void
+foo (int n)
+{
+ int a[n], i, err;
+ for (i = 0; i < n; i++)
+ a[i] = 7 * i;
+ #pragma omp target firstprivate (a) map(from:err) private (i)
+ {
+ err = 0;
+ for (i = 0; i < n; i++)
+ if (a[i] != 7 * i)
+ err = 1;
+ }
+ if (err)
+ abort ();
+}
+
+void
+bar (int n)
+{
+ int a[n], i, err;
+ #pragma omp target private (a) map(from:err)
+ {
+ #pragma omp parallel for
+ for (i = 0; i < n; i++)
+ a[i] = 7 * i;
+ err = 0;
+ #pragma omp parallel for reduction(|:err)
+ for (i = 0; i < n; i++)
+ if (a[i] != 7 * i)
+ err |= 1;
+ }
+ if (err)
+ abort ();
+}
+
+int
+main ()
+{
+ foo (7);
+ bar (7);
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/target-17.c b/libgomp/testsuite/libgomp.c/target-17.c
new file mode 100644
index 00000000000..4a762012eaf
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-17.c
@@ -0,0 +1,99 @@
+extern void abort (void);
+
+void
+foo (int n)
+{
+ int a[n], i, err;
+ for (i = 0; i < n; i++)
+ a[i] = 5 * i;
+ #pragma omp target map(to:a) map(from:err) private(i)
+ {
+ err = 0;
+ for (i = 0; i < n; i++)
+ if (a[i] != 5 * i)
+ err = 1;
+ }
+ if (err)
+ abort ();
+ for (i = 0; i < n; i++)
+ a[i] += i;
+ #pragma omp target map(from:err) private(i)
+ {
+ err = 0;
+ for (i = 0; i < n; i++)
+ if (a[i] != 6 * i)
+ err = 1;
+ }
+ if (err)
+ abort ();
+ for (i = 0; i < n; i++)
+ a[i] += i;
+ #pragma omp target firstprivate (a) map(from:err) private(i)
+ {
+ err = 0;
+ for (i = 0; i < n; i++)
+ if (a[i] != 7 * i)
+ err = 1;
+ }
+ if (err)
+ abort ();
+ int on = n;
+ #pragma omp target firstprivate (n) map(tofrom: n)
+ {
+ n++;
+ }
+ if (on != n)
+ abort ();
+ #pragma omp target map(tofrom: n) private (n)
+ {
+ n = 25;
+ }
+ if (on != n)
+ abort ();
+ for (i = 0; i < n; i++)
+ a[i] += i;
+ #pragma omp target map(to:a) firstprivate (a) map(from:err) private(i)
+ {
+ err = 0;
+ for (i = 0; i < n; i++)
+ if (a[i] != 8 * i)
+ err = 1;
+ }
+ if (err)
+ abort ();
+ for (i = 0; i < n; i++)
+ a[i] += i;
+ #pragma omp target firstprivate (a) map(to:a) map(from:err) private(i)
+ {
+ err = 0;
+ for (i = 0; i < n; i++)
+ if (a[i] != 9 * i)
+ err = 1;
+ }
+ if (err)
+ abort ();
+ for (i = 0; i < n; i++)
+ a[i] += i;
+ #pragma omp target map(tofrom:a) map(from:err) private(a, i)
+ {
+ err = 0;
+ for (i = 0; i < n; i++)
+ a[i] = 7;
+ #pragma omp parallel for reduction(|:err)
+ for (i = 0; i < n; i++)
+ if (a[i] != 7)
+ err |= 1;
+ }
+ if (err)
+ abort ();
+ for (i = 0; i < n; i++)
+ if (a[i] != 10 * i)
+ abort ();
+}
+
+int
+main ()
+{
+ foo (9);
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/target-18.c b/libgomp/testsuite/libgomp.c/target-18.c
new file mode 100644
index 00000000000..cbacaf6a77a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-18.c
@@ -0,0 +1,52 @@
+extern void abort (void);
+
+void
+foo (int n)
+{
+ int a[4] = { 0, 1, 2, 3 }, b[n];
+ int *p = a + 1, i, err;
+ for (i = 0; i < n; i++)
+ b[i] = 9 + i;
+ #pragma omp target data map(to:a)
+ #pragma omp target data use_device_ptr(p) map(from:err)
+ #pragma omp target is_device_ptr(p) private(i) map(from:err)
+ {
+ err = 0;
+ for (i = 0; i < 4; i++)
+ if (p[i - 1] != i)
+ err = 1;
+ }
+ if (err)
+ abort ();
+ for (i = 0; i < 4; i++)
+ a[i] = 23 + i;
+ #pragma omp target data map(to:a)
+ #pragma omp target data use_device_ptr(a) map(from:err)
+ #pragma omp target is_device_ptr(a) private(i) map(from:err)
+ {
+ err = 0;
+ for (i = 0; i < 4; i++)
+ if (a[i] != 23 + i)
+ err = 1;
+ }
+ if (err)
+ abort ();
+ #pragma omp target data map(to:b)
+ #pragma omp target data use_device_ptr(b) map(from:err)
+ #pragma omp target is_device_ptr(b) private(i) map(from:err)
+ {
+ err = 0;
+ for (i = 0; i < 4; i++)
+ if (b[i] != 9 + i)
+ err = 1;
+ }
+ if (err)
+ abort ();
+}
+
+int
+main ()
+{
+ foo (9);
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/target-19.c b/libgomp/testsuite/libgomp.c/target-19.c
new file mode 100644
index 00000000000..710c5078ff6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-19.c
@@ -0,0 +1,127 @@
+extern void abort (void);
+
+void
+foo (int *p, int *q, int *r, int n, int m)
+{
+ int i, err, *s = r;
+ #pragma omp target data map(to:p[0:8])
+ {
+ /* For zero length array sections, p points to the start of
+ already mapped range, q to the end of it, and r does not point
+ to an mapped range. */
+ #pragma omp target map(alloc:p[:0]) map(to:q[:0]) map(from:r[:0]) private(i) map(from:err) firstprivate (s)
+ {
+ err = 0;
+ for (i = 0; i < 8; i++)
+ if (p[i] != i + 1 || q[i - 8] != i + 1)
+ err = 1;
+ if (p + 8 != q || (r != (int *) 0 && r != s))
+ err = 1;
+ }
+ if (err)
+ abort ();
+ /* Implicit mapping of pointers behaves the same way. */
+ #pragma omp target private(i) map(from:err) firstprivate (s)
+ {
+ err = 0;
+ for (i = 0; i < 8; i++)
+ if (p[i] != i + 1 || q[i - 8] != i + 1)
+ err = 1;
+ if (p + 8 != q || (r != (int *) 0 && r != s))
+ err = 1;
+ }
+ if (err)
+ abort ();
+ /* And zero-length array sections, though not known at compile
+ time, behave the same. */
+ #pragma omp target map(p[:n]) map(tofrom:q[:n]) map(alloc:r[:n]) private(i) map(from:err) firstprivate (s)
+ {
+ err = 0;
+ for (i = 0; i < 8; i++)
+ if (p[i] != i + 1 || q[i - 8] != i + 1)
+ err = 1;
+ if (p + 8 != q || (r != (int *) 0 && r != s))
+ err = 1;
+ }
+ if (err)
+ abort ();
+ /* Non-zero length array sections, though not known at compile,
+ behave differently. */
+ #pragma omp target map(p[:m]) map(tofrom:q[:m]) map(to:r[:m]) private(i) map(from:err)
+ {
+ err = 0;
+ for (i = 0; i < 8; i++)
+ if (p[i] != i + 1)
+ err = 1;
+ if (q[0] != 9 || r[0] != 10)
+ err = 1;
+ }
+ if (err)
+ abort ();
+ #pragma omp target data map(to:q[0:1])
+ {
+ /* For zero length array sections, p points to the start of
+ already mapped range, q points to the start of another one,
+ and r to the end of the second one. */
+ #pragma omp target map(to:p[:0]) map(from:q[:0]) map(tofrom:r[:0]) private(i) map(from:err)
+ {
+ err = 0;
+ for (i = 0; i < 8; i++)
+ if (p[i] != i + 1)
+ err = 1;
+ if (q[0] != 9 || r != q + 1)
+ err = 1;
+ }
+ if (err)
+ abort ();
+ /* Implicit mapping of pointers behaves the same way. */
+ #pragma omp target private(i) map(from:err)
+ {
+ err = 0;
+ for (i = 0; i < 8; i++)
+ if (p[i] != i + 1)
+ err = 1;
+ if (q[0] != 9 || r != q + 1)
+ err = 1;
+ }
+ if (err)
+ abort ();
+ /* And zero-length array sections, though not known at compile
+ time, behave the same. */
+ #pragma omp target map(p[:n]) map(alloc:q[:n]) map(from:r[:n]) private(i) map(from:err)
+ {
+ err = 0;
+ for (i = 0; i < 8; i++)
+ if (p[i] != i + 1)
+ err = 1;
+ if (q[0] != 9 || r != q + 1)
+ err = 1;
+ }
+ if (err)
+ abort ();
+ /* Non-zero length array sections, though not known at compile,
+ behave differently. */
+ #pragma omp target map(p[:m]) map(alloc:q[:m]) map(tofrom:r[:m]) private(i) map(from:err)
+ {
+ err = 0;
+ for (i = 0; i < 8; i++)
+ if (p[i] != i + 1)
+ err = 1;
+ if (q[0] != 9 || r[0] != 10)
+ err = 1;
+ }
+ if (err)
+ abort ();
+ }
+ }
+}
+
+int
+main ()
+{
+ int a[32], i;
+ for (i = 0; i < 32; i++)
+ a[i] = i;
+ foo (a + 1, a + 9, a + 10, 0, 1);
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/target-2.c b/libgomp/testsuite/libgomp.c/target-2.c
index ada8dad81ad..0ba766c0a82 100644
--- a/libgomp/testsuite/libgomp.c/target-2.c
+++ b/libgomp/testsuite/libgomp.c/target-2.c
@@ -23,7 +23,7 @@ fn2 (int x)
int i;
fn1 (b, c, x);
fn1 (e, d + x, x);
- #pragma omp target map(to: b, c[:x], d[x:x], e)
+ #pragma omp target map(to: b, c[:x], d[x:x], e) map(tofrom: s)
#pragma omp parallel for reduction(+:s)
for (i = 0; i < x; i++)
s += b[i] * c[i] + d[x + i] + sizeof (b) - sizeof (c);
@@ -38,7 +38,7 @@ fn3 (int x)
int i;
fn1 (b, c, x);
fn1 (e, d, x);
- #pragma omp target
+ #pragma omp target map(tofrom: s)
#pragma omp parallel for reduction(+:s)
for (i = 0; i < x; i++)
s += b[i] * c[i] + d[i];
@@ -56,7 +56,7 @@ fn4 (int x)
#pragma omp target data map(from: b, c[:x], d[x:x], e)
{
#pragma omp target update to(b, c[:x], d[x:x], e)
- #pragma omp target map(c[:x], d[x:x])
+ #pragma omp target map(c[:x], d[x:x], s)
#pragma omp parallel for reduction(+:s)
for (i = 0; i < x; i++)
{
diff --git a/libgomp/testsuite/libgomp.c/target-20.c b/libgomp/testsuite/libgomp.c/target-20.c
new file mode 100644
index 00000000000..3f4e798a755
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-20.c
@@ -0,0 +1,120 @@
+/* { dg-require-effective-target offload_device_nonshared_as } */
+
+#include <stdlib.h>
+#include <assert.h>
+
+#define N 40
+
+int sum;
+int var1 = 1;
+int var2 = 2;
+
+#pragma omp declare target
+int D[N];
+#pragma omp end declare target
+
+void enter_data (int *X)
+{
+ #pragma omp target enter data map(to: var1, var2, X[:N]) map(alloc: sum)
+}
+
+void exit_data_0 (int *D)
+{
+ #pragma omp target exit data map(delete: D[:N])
+}
+
+void exit_data_1 ()
+{
+ #pragma omp target exit data map(from: var1)
+}
+
+void exit_data_2 (int *X)
+{
+ #pragma omp target exit data map(from: var2) map(release: X[:N], sum)
+}
+
+void exit_data_3 (int *p)
+{
+ #pragma omp target exit data map(from: p[:0])
+}
+
+void test_nested ()
+{
+ int X = 0, Y = 0, Z = 0;
+
+ #pragma omp target data map(from: X, Y, Z)
+ {
+ #pragma omp target data map(from: X, Y, Z)
+ {
+ #pragma omp target map(from: X, Y, Z)
+ X = Y = Z = 1337;
+ assert (X == 0);
+ assert (Y == 0);
+ assert (Z == 0);
+
+ #pragma omp target exit data map(from: X) map(release: Y)
+ assert (X == 0);
+ assert (Y == 0);
+
+ #pragma omp target exit data map(release: Y) map(delete: Z)
+ assert (Y == 0);
+ assert (Z == 0);
+ }
+ assert (X == 1337);
+ assert (Y == 0);
+ assert (Z == 0);
+
+ #pragma omp target map(from: X)
+ X = 2448;
+ assert (X == 2448);
+ assert (Y == 0);
+ assert (Z == 0);
+
+ X = 4896;
+ }
+ assert (X == 4896);
+ assert (Y == 0);
+ assert (Z == 0);
+}
+
+int main ()
+{
+ int *X = malloc (N * sizeof (int));
+ int *Y = malloc (N * sizeof (int));
+ X[10] = 10;
+ Y[20] = 20;
+ enter_data (X);
+
+ exit_data_0 (D); /* This should have no effect on D. */
+
+ #pragma omp target map(alloc: var1, var2, X[:N]) map(to: Y[:N]) map(always from: sum)
+ {
+ var1 += X[10];
+ var2 += Y[20];
+ sum = var1 + var2;
+ D[sum]++;
+ }
+
+ assert (var1 == 1);
+ assert (var2 == 2);
+ assert (sum == 33);
+
+ exit_data_1 ();
+ assert (var1 == 11);
+ assert (var2 == 2);
+
+ /* Increase refcount of already mapped X[0:N]. */
+ #pragma omp target enter data map(alloc: X[16:1])
+
+ exit_data_2 (X);
+ assert (var2 == 22);
+
+ exit_data_3 (X + 5); /* Unmap X[0:N]. */
+
+ free (X);
+ free (Y);
+
+ test_nested ();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/target-21.c b/libgomp/testsuite/libgomp.c/target-21.c
new file mode 100644
index 00000000000..41498cf2148
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-21.c
@@ -0,0 +1,79 @@
+extern void abort (void);
+union U { int x; long long y; };
+struct T { int a; union U b; int c; };
+struct S { int s; int u; struct T v; int x[10]; union U w; int y[10]; int z[10]; };
+volatile int z;
+
+int
+main ()
+{
+ struct S s;
+ s.s = 0;
+ s.u = 1;
+ s.v.a = 2;
+ s.v.b.y = 3LL;
+ s.v.c = 19;
+ s.w.x = 4;
+ s.x[0] = 7;
+ s.x[1] = 8;
+ s.y[3] = 9;
+ s.y[4] = 10;
+ s.y[5] = 11;
+ int err = 0;
+ #pragma omp target map (to:s.v.b, s.u, s.x[0:z + 2]) \
+ map (tofrom:s.y[3:3]) \
+ map (from: s.w, s.z[z + 1:z + 3], err)
+ {
+ err = 0;
+ if (s.u != 1 || s.v.b.y != 3LL || s.x[0] != 7 || s.x[1] != 8
+ || s.y[3] != 9 || s.y[4] != 10 || s.y[5] != 11)
+ err = 1;
+ s.w.x = 6;
+ s.y[3] = 12;
+ s.y[4] = 13;
+ s.y[5] = 14;
+ s.z[1] = 15;
+ s.z[2] = 16;
+ s.z[3] = 17;
+ }
+ if (err || s.w.x != 6 || s.y[3] != 12 || s.y[4] != 13 || s.y[5] != 14
+ || s.z[1] != 15 || s.z[2] != 16 || s.z[3] != 17)
+ abort ();
+ s.u++;
+ s.v.a++;
+ s.v.b.y++;
+ s.w.x++;
+ s.x[1] = 18;
+ s.z[0] = 19;
+ #pragma omp target data map (tofrom: s)
+ #pragma omp target map (always to: s.w, s.x[1], err) map (alloc:s.u, s.v.b, s.z[z:z + 1])
+ {
+ err = 0;
+ if (s.u != 2 || s.v.b.y != 4LL || s.w.x != 7 || s.x[1] != 18 || s.z[0] != 19)
+ err = 1;
+ s.w.x = 8;
+ s.x[1] = 20;
+ s.z[0] = 21;
+ }
+ if (err || s.w.x != 8 || s.x[1] != 20 || s.z[0] != 21)
+ abort ();
+ s.u++;
+ s.v.a++;
+ s.v.b.y++;
+ s.w.x++;
+ s.x[0] = 22;
+ s.x[1] = 23;
+ #pragma omp target data map (from: s.w, s.x[0:2]) map (to: s.v.b, s.u)
+ #pragma omp target map (always to: s.w, s.x[0:2], err) map (alloc:s.u, s.v.b)
+ {
+ err = 0;
+ if (s.u != 3 || s.v.b.y != 5LL || s.w.x != 9 || s.x[0] != 22 || s.x[1] != 23)
+ err = 1;
+ s.w.x = 11;
+ s.x[0] = 24;
+ s.x[1] = 25;
+ }
+ if (err || s.w.x != 11 || s.x[0] != 24 || s.x[1] != 25)
+ abort ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/target-22.c b/libgomp/testsuite/libgomp.c/target-22.c
new file mode 100644
index 00000000000..aad8a0a09df
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-22.c
@@ -0,0 +1,51 @@
+extern void abort (void);
+struct T { int a; int *b; int c; };
+struct S { int *s; char *u; struct T v; short *w; };
+volatile int z;
+
+int
+main ()
+{
+ struct S s;
+ int a[32], i;
+ char b[32];
+ short c[32];
+ for (i = 0; i < 32; i++)
+ {
+ a[i] = i;
+ b[i] = 32 + i;
+ c[i] = 64 + i;
+ }
+ s.s = a;
+ s.u = b + 2;
+ s.v.b = a + 16;
+ s.w = c + 3;
+ int err = 0;
+ #pragma omp target map (to:s.v.b[0:z + 7], s.u[z + 1:z + 4]) \
+ map (tofrom:s.s[3:3]) \
+ map (from: s.w[z:4], err) private (i)
+ {
+ err = 0;
+ for (i = 0; i < 7; i++)
+ if (s.v.b[i] != 16 + i)
+ err = 1;
+ for (i = 1; i < 5; i++)
+ if (s.u[i] != 34 + i)
+ err = 1;
+ for (i = 3; i < 6; i++)
+ if (s.s[i] != i)
+ err = 1;
+ else
+ s.s[i] = 128 + i;
+ for (i = 0; i < 4; i++)
+ s.w[i] = 96 + i;
+ }
+ if (err)
+ abort ();
+ for (i = 0; i < 32; i++)
+ if (a[i] != ((i >= 3 && i < 6) ? 128 + i : i)
+ || b[i] != 32 + i
+ || c[i] != ((i >= 3 && i < 7) ? 93 + i : 64 + i))
+ abort ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/target-23.c b/libgomp/testsuite/libgomp.c/target-23.c
new file mode 100644
index 00000000000..fb1532a07b2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-23.c
@@ -0,0 +1,48 @@
+extern void abort (void);
+struct S { int s; int *u; int v[5]; };
+volatile int z;
+
+int
+main ()
+{
+ int u[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, err = 0;
+ struct S s = { 9, u + 3, { 10, 11, 12, 13, 14 } };
+ int *v = u + 4;
+ #pragma omp target enter data map (to: s.s, s.u[0:5]) map (alloc: s.v[1:3])
+ s.s++;
+ u[3]++;
+ s.v[1]++;
+ #pragma omp target update to (s.s) to (s.u[0:2], s.v[1:3])
+ #pragma omp target map (alloc: s.s, s.v[1:3]) map (from: err)
+ {
+ err = 0;
+ if (s.s != 10 || s.v[1] != 12 || s.v[2] != 12 || s.v[3] != 13)
+ err = 1;
+ if (v[-1] != 4 || v[0] != 4 || v[1] != 5 || v[2] != 6 || v[3] != 7)
+ err = 1;
+ s.s++;
+ s.v[2] += 2;
+ v[-1] = 5;
+ v[3] = 9;
+ }
+ if (err)
+ abort ();
+ #pragma omp target map (alloc: s.u[0:5])
+ {
+ err = 0;
+ if (s.u[0] != 5 || s.u[1] != 4 || s.u[2] != 5 || s.u[3] != 6 || s.u[4] != 9)
+ err = 1;
+ s.u[1] = 12;
+ }
+ #pragma omp target update from (s.s, s.u[0:5]) from (s.v[1:3])
+ if (err || s.s != 11 || u[0] != 0 || u[1] != 1 || u[2] != 2 || u[3] != 5
+ || u[4] != 12 || u[5] != 5 || u[6] != 6 || u[7] != 9 || u[8] != 8
+ || u[9] != 9 || s.v[0] != 10 || s.v[1] != 12 || s.v[2] != 14
+ || s.v[3] != 13 || s.v[4] != 14)
+ abort ();
+ #pragma omp target exit data map (release: s.s)
+ #pragma omp target exit data map (release: s.u[0:5])
+ #pragma omp target exit data map (delete: s.v[1:3])
+ #pragma omp target exit data map (release: s.s)
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/target-24.c b/libgomp/testsuite/libgomp.c/target-24.c
new file mode 100644
index 00000000000..e0ff29aaee8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-24.c
@@ -0,0 +1,43 @@
+#include <omp.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+ int d = omp_get_default_device ();
+ int id = omp_get_initial_device ();
+
+ if (d < 0 || d >= omp_get_num_devices ())
+ d = id;
+
+ int a[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+ int *b = a;
+ int shared_mem = 0;
+ #pragma omp target map (alloc: shared_mem)
+ shared_mem = 1;
+ if (omp_target_is_present (b, d) != shared_mem)
+ abort ();
+ #pragma omp target enter data map (to: a)
+ if (omp_target_is_present (b, d) == 0)
+ abort ();
+ #pragma omp target enter data map (alloc: b[:0])
+ if (omp_target_is_present (b, d) == 0)
+ abort ();
+ #pragma omp target exit data map (release: b[:0])
+ if (omp_target_is_present (b, d) == 0)
+ abort ();
+ #pragma omp target exit data map (release: b[:0])
+ if (omp_target_is_present (b, d) != shared_mem)
+ abort ();
+ #pragma omp target enter data map (to: a)
+ if (omp_target_is_present (b, d) == 0)
+ abort ();
+ #pragma omp target enter data map (always, to: b[:0])
+ if (omp_target_is_present (b, d) == 0)
+ abort ();
+ #pragma omp target exit data map (delete: b[:0])
+ if (omp_target_is_present (b, d) != shared_mem)
+ abort ();
+ #pragma omp target exit data map (from: b[:0])
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/target-25.c b/libgomp/testsuite/libgomp.c/target-25.c
new file mode 100644
index 00000000000..aeb19aee510
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-25.c
@@ -0,0 +1,84 @@
+#include <stdlib.h>
+#include <unistd.h>
+
+int
+main ()
+{
+ int x = 0, y = 0, z = 0, s = 11, t = 12, u = 13, w = 7, err;
+ #pragma omp parallel
+ #pragma omp single
+ {
+ #pragma omp task depend(in: x)
+ {
+ usleep (5000);
+ x = 1;
+ }
+ #pragma omp task depend(in: x)
+ {
+ usleep (6000);
+ y = 2;
+ }
+ #pragma omp task depend(out: z)
+ {
+ usleep (7000);
+ z = 3;
+ }
+ #pragma omp target map(tofrom: x) map(from: err) firstprivate (y) depend(inout: x, z)
+ err = (x != 1 || y != 2 || z != 3);
+ if (err)
+ abort ();
+ #pragma omp task depend(in: x)
+ {
+ usleep (5000);
+ x = 4;
+ }
+ #pragma omp task depend(in: x)
+ {
+ usleep (4000);
+ y = 5;
+ }
+ #pragma omp task depend(in: z)
+ {
+ usleep (3000);
+ z = 6;
+ }
+ #pragma omp target enter data nowait map (to: w)
+ #pragma omp target enter data depend (inout: x, z) map (to: x, y, z)
+ #pragma omp target map (alloc: x, y, z) map(from: err)
+ {
+ err = (x != 4 || y != 5 || z != 6);
+ x = 7;
+ y = 8;
+ z = 9;
+ }
+ if (err)
+ abort ();
+ #pragma omp taskwait
+ #pragma omp target map (alloc: w) map(from: err)
+ {
+ err = w != 7;
+ w = 17;
+ }
+ if (err)
+ abort ();
+ #pragma omp task depend(in: x)
+ {
+ usleep (2000);
+ s = 14;
+ }
+ #pragma omp task depend(in: x)
+ {
+ usleep (3000);
+ t = 15;
+ }
+ #pragma omp task depend(in: z)
+ {
+ usleep (4000);
+ u = 16;
+ }
+ #pragma omp target exit data depend (inout: x, z) map (from: x, y, z, w)
+ if (x != 7 || y != 8 || z != 9 || s != 14 || t != 15 || u != 16 || w != 17)
+ abort ();
+ }
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/target-26.c b/libgomp/testsuite/libgomp.c/target-26.c
new file mode 100644
index 00000000000..fa6b52598da
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-26.c
@@ -0,0 +1,36 @@
+extern void abort (void);
+#pragma omp declare target
+int a[4] = { 2, 3, 4, 5 }, *b;
+#pragma omp end declare target
+
+int
+main ()
+{
+ int err;
+ int c[3] = { 6, 7, 8 };
+ b = c;
+ #pragma omp target map(to: a[0:2], b[0:2]) map(from: err)
+ err = a[0] != 2 || a[1] != 3 || a[2] != 4 || a[3] != 5 || b[0] != 6 || b[1] != 7;
+ if (err)
+ abort ();
+ a[1] = 9;
+ a[2] = 10;
+ #pragma omp target map(always,to:a[1:2]) map(from: err)
+ err = a[0] != 2 || a[1] != 9 || a[2] != 10 || a[3] != 5;
+ if (err)
+ abort ();
+ #pragma omp parallel firstprivate(a, b, c, err) num_threads (2)
+ #pragma omp single
+ {
+ b = c + 1;
+ a[0] = 11;
+ a[2] = 13;
+ c[1] = 14;
+ int d = 0;
+ #pragma omp target map(to: a[0:3], b[d:2]) map (from: err)
+ err = a[0] != 11 || a[1] != 9 || a[2] != 13 || b[0] != 14 || b[1] != 8;
+ if (err)
+ abort ();
+ }
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/target-27.c b/libgomp/testsuite/libgomp.c/target-27.c
new file mode 100644
index 00000000000..c86651b02e3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-27.c
@@ -0,0 +1,67 @@
+#include <stdlib.h>
+#include <unistd.h>
+
+int
+main ()
+{
+ int x = 0, y = 0, z = 0, err;
+ int shared_mem = 0;
+ #pragma omp target map(to: shared_mem)
+ shared_mem = 1;
+ #pragma omp parallel
+ #pragma omp single
+ {
+ #pragma omp task depend(in: x)
+ {
+ usleep (5000);
+ x = 1;
+ }
+ #pragma omp task depend(in: x)
+ {
+ usleep (6000);
+ y = 2;
+ }
+ #pragma omp task depend(out: z)
+ {
+ usleep (7000);
+ z = 3;
+ }
+ #pragma omp target enter data map(to: x, y, z) depend(inout: x, z) nowait
+ #pragma omp task depend(inout: x, z)
+ {
+ x++; y++; z++;
+ }
+ #pragma omp target update to(x, y) depend(inout: x) nowait
+ #pragma omp target enter data map(always, to: z) depend(inout: z) nowait
+ #pragma omp target map (alloc: x, y, z) map (from: err) depend(inout: x, z)
+ {
+ err = x != 2 || y != 3 || z != 4;
+ x = 5; y = 6; z = 7;
+ }
+ #pragma omp task depend(in: x)
+ {
+ usleep (5000);
+ if (!shared_mem)
+ x = 1;
+ }
+ #pragma omp task depend(in: x)
+ {
+ usleep (6000);
+ if (!shared_mem)
+ y = 2;
+ }
+ #pragma omp task depend(out: z)
+ {
+ usleep (3000);
+ if (!shared_mem)
+ z = 3;
+ }
+ #pragma omp target exit data map(release: z) depend(inout: z) nowait
+ #pragma omp target exit data map(from: x, y) depend(inout: x) nowait
+ #pragma omp target exit data map(from: z) depend(inout: z) nowait
+ #pragma omp taskwait
+ if (err || x != 5 || y != 6 || z != 7)
+ abort ();
+ }
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/target-7.c b/libgomp/testsuite/libgomp.c/target-7.c
index 0fe6150283d..41a1332bb0c 100644
--- a/libgomp/testsuite/libgomp.c/target-7.c
+++ b/libgomp/testsuite/libgomp.c/target-7.c
@@ -37,63 +37,63 @@ foo (int f)
abort ();
#pragma omp target data device (d) map (to: h)
{
- #pragma omp target device (d)
+ #pragma omp target device (d) map (h)
if (omp_get_level () != 0 || (f && !omp_is_initial_device ()) || h++ != 5)
abort ();
#pragma omp target update device (d) from (h)
}
#pragma omp target data if (v > 1) map (to: h)
{
- #pragma omp target if (v > 1)
+ #pragma omp target if (v > 1) map(h)
if (omp_get_level () != 0 || !omp_is_initial_device () || h++ != 6)
abort ();
#pragma omp target update if (v > 1) from (h)
}
#pragma omp target data device (d) if (v > 1) map (to: h)
{
- #pragma omp target device (d) if (v > 1)
+ #pragma omp target device (d) if (v > 1) map(h)
if (omp_get_level () != 0 || !omp_is_initial_device () || h++ != 7)
abort ();
#pragma omp target update device (d) if (v > 1) from (h)
}
#pragma omp target data if (v <= 1) map (to: h)
{
- #pragma omp target if (v <= 1)
+ #pragma omp target if (v <= 1) map (tofrom: h)
if (omp_get_level () != 0 || h++ != 8)
abort ();
#pragma omp target update if (v <= 1) from (h)
}
#pragma omp target data device (d) if (v <= 1) map (to: h)
{
- #pragma omp target device (d) if (v <= 1)
+ #pragma omp target device (d) if (v <= 1) map (h)
if (omp_get_level () != 0 || (f && !omp_is_initial_device ()) || h++ != 9)
abort ();
#pragma omp target update device (d) if (v <= 1) from (h)
}
#pragma omp target data if (0) map (to: h)
{
- #pragma omp target if (0)
+ #pragma omp target if (0) map (h)
if (omp_get_level () != 0 || !omp_is_initial_device () || h++ != 10)
abort ();
#pragma omp target update if (0) from (h)
}
#pragma omp target data device (d) if (0) map (to: h)
{
- #pragma omp target device (d) if (0)
+ #pragma omp target device (d) if (0) map (h)
if (omp_get_level () != 0 || !omp_is_initial_device () || h++ != 11)
abort ();
#pragma omp target update device (d) if (0) from (h)
}
#pragma omp target data if (1) map (to: h)
{
- #pragma omp target if (1)
+ #pragma omp target if (1) map (tofrom: h)
if (omp_get_level () != 0 || h++ != 12)
abort ();
#pragma omp target update if (1) from (h)
}
#pragma omp target data device (d) if (1) map (to: h)
{
- #pragma omp target device (d) if (1)
+ #pragma omp target device (d) if (1) map (tofrom: h)
if (omp_get_level () != 0 || (f && !omp_is_initial_device ()) || h++ != 13)
abort ();
#pragma omp target update device (d) if (1) from (h)
diff --git a/libgomp/testsuite/libgomp.c/taskloop-1.c b/libgomp/testsuite/libgomp.c/taskloop-1.c
new file mode 100644
index 00000000000..21551f2950c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/taskloop-1.c
@@ -0,0 +1,46 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp -std=c99" } */
+
+int q, r, e;
+
+__attribute__((noinline, noclone)) void
+foo (long a, long b)
+{
+ #pragma omp taskloop lastprivate (q) nogroup
+ for (long d = a; d < b; d += 2)
+ {
+ q = d;
+ if (d < 2 || d > 6 || (d & 1))
+ #pragma omp atomic
+ e |= 1;
+ }
+}
+
+__attribute__((noinline, noclone)) int
+bar (int a, int b)
+{
+ int q = 7;
+ #pragma omp taskloop lastprivate (q)
+ for (int d = a; d < b; d++)
+ {
+ if (d < 12 || d > 17)
+ #pragma omp atomic
+ e |= 1;
+ q = d;
+ }
+ return q;
+}
+
+int
+main ()
+{
+ #pragma omp parallel
+ #pragma omp single
+ {
+ foo (2, 7);
+ r = bar (12, 18);
+ }
+ if (q != 6 || r != 17 || e)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/taskloop-2.c b/libgomp/testsuite/libgomp.c/taskloop-2.c
new file mode 100644
index 00000000000..be893ebf80a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/taskloop-2.c
@@ -0,0 +1,147 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -std=c99" } */
+/* { dg-additional-options "-msse2" { target sse2_runtime } } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+int u[1024], v[1024], w[1024], m;
+
+__attribute__((noinline, noclone)) void
+f1 (long a, long b)
+{
+ #pragma omp taskloop simd default(none) shared(u, v, w) nogroup
+ for (long d = a; d < b; d++)
+ u[d] = v[d] + w[d];
+}
+
+__attribute__((noinline, noclone)) int
+f2 (long a, long b, long c)
+{
+ int d, e;
+ #pragma omp taskloop simd default(none) shared(u, v, w) linear(d:1) linear(c:5) lastprivate(e)
+ for (d = a; d < b; d++)
+ {
+ u[d] = v[d] + w[d];
+ c = c + 5;
+ e = c + 9;
+ }
+ return d + c + e;
+}
+
+__attribute__((noinline, noclone)) int
+f3 (long a, long b)
+{
+ int d;
+ #pragma omp taskloop simd default(none) shared(u, v, w)
+ for (d = a; d < b; d++)
+ {
+ int *p = &d;
+ u[d] = v[d] + w[d];
+ }
+ return d;
+}
+
+__attribute__((noinline, noclone)) int
+f4 (long a, long b, long c, long d)
+{
+ int e, f, g;
+ #pragma omp taskloop simd default(none) shared(u, v, w) collapse(2) lastprivate(g)
+ for (e = a; e < b; e++)
+ for (f = c; f < d; f++)
+ {
+ int *p = &e;
+ int *q = &f;
+ int r = 32 * e + f;
+ u[r] = v[r] + w[r];
+ g = r;
+ }
+ return e + f + g;
+}
+
+__attribute__((noinline, noclone)) int
+f5 (long a, long b, long c, long d)
+{
+ int e, f;
+ #pragma omp taskloop simd default(none) shared(u, v, w) collapse(2)
+ for (e = a; e < b; e++)
+ for (f = c; f < d; f++)
+ {
+ int r = 32 * e + f;
+ u[r] = v[r] + w[r];
+ }
+ return e + f;
+}
+
+int
+main ()
+{
+ int i;
+ for (i = 0; i < 1024; i++)
+ {
+ v[i] = i;
+ w[i] = i + 1;
+ }
+ #pragma omp parallel
+ #pragma omp single
+ f1 (0, 1024);
+ for (i = 0; i < 1024; i++)
+ if (u[i] != 2 * i + 1)
+ __builtin_abort ();
+ else
+ {
+ v[i] = 1024 - i;
+ w[i] = 512 - i;
+ }
+ #pragma omp parallel
+ #pragma omp single
+ m = f2 (2, 1022, 17);
+ for (i = 0; i < 1024; i++)
+ if ((i < 2 || i >= 1022) ? u[i] != 2 * i + 1 : u[i] != 1536 - 2 * i)
+ __builtin_abort ();
+ else
+ {
+ v[i] = i;
+ w[i] = i + 1;
+ }
+ if (m != 1022 + 2 * (1020 * 5 + 17) + 9)
+ __builtin_abort ();
+ #pragma omp parallel
+ #pragma omp single
+ m = f3 (0, 1024);
+ for (i = 0; i < 1024; i++)
+ if (u[i] != 2 * i + 1)
+ __builtin_abort ();
+ else
+ {
+ v[i] = 1024 - i;
+ w[i] = 512 - i;
+ }
+ if (m != 1024)
+ __builtin_abort ();
+ #pragma omp parallel
+ #pragma omp single
+ m = f4 (0, 32, 0, 32);
+ for (i = 0; i < 1024; i++)
+ if (u[i] != 1536 - 2 * i)
+ __builtin_abort ();
+ else
+ {
+ v[i] = i;
+ w[i] = i + 1;
+ }
+ if (m != 32 + 32 + 1023)
+ __builtin_abort ();
+ #pragma omp parallel
+ #pragma omp single
+ m = f5 (0, 32, 0, 32);
+ for (i = 0; i < 1024; i++)
+ if (u[i] != 2 * i + 1)
+ __builtin_abort ();
+ else
+ {
+ v[i] = 1024 - i;
+ w[i] = 512 - i;
+ }
+ if (m != 32 + 32)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/taskloop-3.c b/libgomp/testsuite/libgomp.c/taskloop-3.c
new file mode 100644
index 00000000000..5356d7f0251
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/taskloop-3.c
@@ -0,0 +1,84 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp -std=c99" } */
+
+int g;
+int a[1024];
+
+__attribute__((noinline, noclone)) int
+f1 (int x)
+{
+ #pragma omp taskloop firstprivate (x) lastprivate (x)
+ for (int i = 0; i < 64; i++)
+ {
+ if (x != 74)
+ __builtin_abort ();
+ if (i == 63)
+ x = i + 4;
+ }
+ return x;
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+ #pragma omp taskloop firstprivate (g) lastprivate (g) nogroup
+ for (int i = 0; i < 64; i++)
+ {
+ if (g != 77)
+ __builtin_abort ();
+ if (i == 63)
+ g = i + 9;
+ }
+}
+
+__attribute__((noinline, noclone)) long long
+f3 (long long a, long long b, long long c)
+{
+ long long i;
+ int l;
+ #pragma omp taskloop default (none) lastprivate (i, l)
+ for (i = a; i < b; i += c)
+ l = i;
+ return l * 7 + i;
+}
+
+__attribute__((noinline, noclone)) long long
+f4 (long long a, long long b, long long c, long long d,
+ long long e, long long f, int k)
+{
+ long long i, j;
+ int l;
+ #pragma omp taskloop default (none) collapse(2) \
+ firstprivate (k) lastprivate (i, j, k, l)
+ for (i = a; i < b; i += e)
+ for (j = c; j < d; j += f)
+ {
+ if (k != 73)
+ __builtin_abort ();
+ if (i == 31 && j == 46)
+ k = i;
+ l = j;
+ }
+ return i + 5 * j + 11 * k + 17 * l;
+}
+
+int
+main ()
+{
+ #pragma omp parallel
+ #pragma omp single
+ {
+ if (f1 (74) != 63 + 4)
+ __builtin_abort ();
+ g = 77;
+ f2 ();
+ #pragma omp taskwait
+ if (g != 63 + 9)
+ __builtin_abort ();
+ if (f3 (7, 12, 2) != 11 * 7 + 13)
+ __builtin_abort ();
+ if (f4 (0, 32, 16, 48, 1, 2, 73) != 32 + 5 * 48 + 11 * 31 + 17 * 46)
+ __builtin_abort ();
+ }
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/taskloop-4.c b/libgomp/testsuite/libgomp.c/taskloop-4.c
new file mode 100644
index 00000000000..a69be19c9c2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/taskloop-4.c
@@ -0,0 +1,97 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp" } */
+
+int u[64], v;
+
+__attribute__((noinline, noclone)) int
+test (int a, int b, int c, int d, void (*fn) (int, int, int, int),
+ int *num_tasks, int *min_iters, int *max_iters)
+{
+ int i, t = 0;
+ __builtin_memset (u, 0, sizeof u);
+ v = 0;
+ fn (a, b, c, d);
+ *min_iters = 0;
+ *max_iters = 0;
+ *num_tasks = v;
+ if (v)
+ {
+ *min_iters = u[0];
+ *max_iters = u[0];
+ t = u[0];
+ for (i = 1; i < v; i++)
+ {
+ if (*min_iters > u[i])
+ *min_iters = u[i];
+ if (*max_iters < u[i])
+ *max_iters = u[i];
+ t += u[i];
+ }
+ }
+ return t;
+}
+
+void
+grainsize (int a, int b, int c, int d)
+{
+ int i, j = 0, k = 0;
+ #pragma omp taskloop firstprivate (j, k) grainsize(d)
+ for (i = a; i < b; i += c)
+ {
+ if (j == 0)
+ {
+ #pragma omp atomic capture
+ k = v++;
+ if (k >= 64)
+ __builtin_abort ();
+ }
+ u[k] = ++j;
+ }
+}
+
+void
+num_tasks (int a, int b, int c, int d)
+{
+ int i, j = 0, k = 0;
+ #pragma omp taskloop firstprivate (j, k) num_tasks(d)
+ for (i = a; i < b; i += c)
+ {
+ if (j == 0)
+ {
+ #pragma omp atomic capture
+ k = v++;
+ if (k >= 64)
+ __builtin_abort ();
+ }
+ u[k] = ++j;
+ }
+}
+
+int
+main ()
+{
+ #pragma omp parallel
+ #pragma omp single
+ {
+ int min_iters, max_iters, ntasks;
+ /* If grainsize is present, # of task loop iters is >= grainsize && < 2 * grainsize,
+ unless # of loop iterations is smaller than grainsize. */
+ if (test (0, 79, 1, 17, grainsize, &ntasks, &min_iters, &max_iters) != 79
+ || min_iters < 17 || max_iters >= 17 * 2)
+ __builtin_abort ();
+ if (test (-49, 2541, 7, 28, grainsize, &ntasks, &min_iters, &max_iters) != 370
+ || min_iters < 28 || max_iters >= 28 * 2)
+ __builtin_abort ();
+ if (test (7, 21, 2, 15, grainsize, &ntasks, &min_iters, &max_iters) != 7
+ || ntasks != 1 || min_iters != 7 || max_iters != 7)
+ __builtin_abort ();
+ /* If num_tasks is present, # of task loop iters is min (# of loop iters, num_tasks). */
+ if (test (-51, 2500, 48, 9, num_tasks, &ntasks, &min_iters, &max_iters) != 54
+ || ntasks != 9)
+ __builtin_abort ();
+ if (test (0, 25, 2, 17, num_tasks, &ntasks, &min_iters, &max_iters) != 13
+ || ntasks != 13)
+ __builtin_abort ();
+ }
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.fortran/affinity1.f90 b/libgomp/testsuite/libgomp.fortran/affinity1.f90
new file mode 100644
index 00000000000..26b5185ba3c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/affinity1.f90
@@ -0,0 +1,49 @@
+! { dg-do run }
+! { dg-skip-if "" { ! run_expensive_tests } { "*" } { "-O2" } }
+! { dg-set-target-env-var OMP_PROC_BIND "spread,close" }
+! { dg-set-target-env-var OMP_PLACES "{6,7}:4:-2,!{2,3}" }
+! { dg-set-target-env-var OMP_NUM_THREADS "2" }
+
+ use omp_lib
+ integer :: num, i, nump
+ num = omp_get_num_places ()
+ print *, 'omp_get_num_places () == ', num
+ do i = 0, num - 1
+ nump = omp_get_place_num_procs (place_num = i)
+ if (nump .eq. 0) then
+ print *, 'place ', i, ' {}'
+ else
+ call print_place (i, nump)
+ end if
+ end do
+ call print_place_var
+ call omp_set_nested (nested = .true.)
+ !$omp parallel
+ if (omp_get_thread_num () == omp_get_num_threads () - 1) then
+ !$omp parallel
+ if (omp_get_thread_num () == omp_get_num_threads () - 1) &
+ call print_place_var
+ !$omp end parallel
+ end if
+ !$omp end parallel
+contains
+ subroutine print_place (i, nump)
+ integer, intent (in) :: i, nump
+ integer :: ids(nump)
+ call omp_get_place_proc_ids (place_num = i, ids = ids)
+ print *, 'place ', i, ' {', ids, '}'
+ end subroutine
+ subroutine print_place_var
+ integer :: place, num_places
+ place = omp_get_place_num ()
+ num_places = omp_get_partition_num_places ()
+ print *, 'place ', place
+ if (num_places .gt. 0) call print_partition (num_places)
+ end subroutine
+ subroutine print_partition (num_places)
+ integer, intent (in) :: num_places
+ integer :: place_nums(num_places)
+ call omp_get_partition_place_nums (place_nums = place_nums)
+ print *, 'partition ', place_nums(1), '-', place_nums(num_places)
+ end subroutine
+end
diff --git a/libgomp/testsuite/libgomp.fortran/affinity2.f90 b/libgomp/testsuite/libgomp.fortran/affinity2.f90
new file mode 100644
index 00000000000..338f0e8bb93
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/affinity2.f90
@@ -0,0 +1,8 @@
+! { dg-do run }
+! { dg-additional-options "-fdefault-integer-8" }
+! { dg-skip-if "" { ! run_expensive_tests } { "*" } { "-O2" } }
+! { dg-set-target-env-var OMP_PROC_BIND "spread,close" }
+! { dg-set-target-env-var OMP_PLACES "{6,7}:4:-2,!{2,3}" }
+! { dg-set-target-env-var OMP_NUM_THREADS "2" }
+
+include 'affinity1.f90'