diff options
author | Jakub Jelinek <jakub@gcc.gnu.org> | 2015-10-13 21:06:23 +0200 |
---|---|---|
committer | Jakub Jelinek <jakub@gcc.gnu.org> | 2015-10-13 21:06:23 +0200 |
commit | d9a6bd32adc40a7e1e5c72692a330f14453ad7f0 (patch) | |
tree | e8508f7b6cd5600095f6c36ccd08e6440d82340c /libgomp/target.c | |
parent | 1a6e82b8c099145a2ced78c0573eeeb90e3e2cfa (diff) | |
download | gcc-d9a6bd32adc40a7e1e5c72692a330f14453ad7f0.tar.gz |
builtin-types.def (BT_FN_BOOL_UINT_LONGPTR_LONGPTR_LONGPTR, [...]): New.
gcc/
2015-10-13 Jakub Jelinek <jakub@redhat.com>
Aldy Hernandez <aldyh@redhat.com>
Ilya Verbin <ilya.verbin@intel.com>
* builtin-types.def (BT_FN_BOOL_UINT_LONGPTR_LONGPTR_LONGPTR,
BT_FN_BOOL_UINT_ULLPTR_ULLPTR_ULLPTR,
BT_FN_BOOL_UINT_LONGPTR_LONG_LONGPTR_LONGPTR,
BT_FN_BOOL_UINT_ULLPTR_ULL_ULLPTR_ULLPTR,
BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_UINT_PTR,
BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR,
BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT,
BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_UINT_LONG_INT_LONG_LONG_LONG,
BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_UINT_LONG_INT_ULL_ULL_ULL,
BT_FN_VOID_LONG_VAR, BT_FN_VOID_ULL_VAR): New.
(BT_FN_VOID_INT_PTR_SIZE_PTR_PTR_PTR,
BT_FN_VOID_INT_OMPFN_PTR_SIZE_PTR_PTR_PTR,
BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR): Remove.
* cgraph.h (enum cgraph_simd_clone_arg_type): Add
SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP,
SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP and
SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP.
(struct cgraph_simd_clone_arg): Adjust comment.
* coretypes.h (struct gomp_ordered): New forward decl.
* gimple.c (gimple_build_omp_critical): Add CLAUSES argument,
set critical clauses to it.
(gimple_build_omp_ordered): Return gomp_ordered * instead of
gimple *. Add CLAUSES argument, set ordered clauses to it.
(gimple_copy): Unshare clauses on GIMPLE_OMP_CRITICAL and
GIMPLE_OMP_ORDERED.
* gimple.def (GIMPLE_OMP_ORDERED): Change from GSS_OMP to
GSS_OMP_SINGLE_LAYOUT, move it after GIMPLE_OMP_TEAMS.
* gimple.h (enum gf_mask): Add GF_OMP_TASK_TASKLOOP. Add another bit
to GF_OMP_FOR_KIND_MASK mask. Add GF_OMP_FOR_KIND_TASKLOOP, renumber
GF_OMP_FOR_KIND_CILKFOR and GF_OMP_FOR_KIND_OACC_LOOP. Adjust
GF_OMP_FOR_SIMD, GF_OMP_FOR_COMBINED and GF_OMP_FOR_COMBINED_INTO.
Add another bit to GF_OMP_TARGET_KIND_MASK mask. Add
GF_OMP_TARGET_KIND_ENTER_DATA and GF_OMP_TARGET_KIND_EXIT_DATA,
renumber
GF_OMP_TARGET_KIND_OACC_{PARALLEL,KERNELS,DATA,UPDATE,ENTER_EXIT_DATA}.
(gomp_critical): Add clauses field.
(gomp_ordered): New struct.
(is_a_helper <gomp_ordered *>::test): New inline.
(gimple_build_omp_critical): Add CLAUSES argument.
(gimple_build_omp_ordered): Likewise. Return gomp_ordered *
instead of gimple *.
(gimple_omp_critical_clauses, gimple_omp_critical_clauses_ptr,
gimple_omp_critical_set_clauses, gimple_omp_ordered_clauses,
gimple_omp_ordered_clauses_ptr, gimple_omp_ordered_set_clauses,
gimple_omp_task_taskloop_p, gimple_omp_task_set_taskloop_p): New
inline functions.
* gimple-pretty-print.c (dump_gimple_omp_for): Handle taskloop.
(dump_gimple_omp_target): Handle enter data and exit data.
(dump_gimple_omp_block): Don't handle GIMPLE_OMP_ORDERED here.
(dump_gimple_omp_critical): Print clauses.
(dump_gimple_omp_ordered): New function.
(dump_gimple_omp_task): Handle taskloop.
(pp_gimple_stmt_1): Use dump_gimple_omp_ordered for
GIMPLE_OMP_ORDERED.
* gimple-walk.c (walk_gimple_op): Walk clauses on
GIMPLE_OMP_CRITICAL and GIMPLE_OMP_ORDERED.
* gimplify.c (enum gimplify_omp_var_data): Add GOVD_MAP_0LEN_ARRAY.
(enum omp_region_type): Add ORT_COMBINED_TARGET and ORT_NONE.
(struct gimplify_omp_ctx): Add loop_iter_var,
target_map_scalars_firstprivate, target_map_pointers_as_0len_arrays
and target_firstprivatize_array_bases fields.
(delete_omp_context): Release loop_iter_var.
(gimplify_bind_expr): Handle ORT_NONE.
(maybe_fold_stmt): Adjust check for ORT_TARGET for the addition of
ORT_COMBINED_TARGET.
(is_gimple_stmt): Return true for OMP_TASKLOOP, OMP_TEAMS and
OMP_TARGET{,_DATA,_UPDATE,_ENTER_DATA,_EXIT_DATA}.
(omp_firstprivatize_variable): Handle ORT_NONE. Adjust check for
ORT_TARGET for the addition of ORT_COMBINED_TARGET. Handle
ctx->target_map_scalars_firstprivate.
(omp_add_variable): Handle ORT_NONE. Allow map clause together with
data sharing clauses. For data sharing clause with VLA decl
on omp target/target data don't add firstprivate for the pointer.
Call omp_notice_variable on TYPE_SIZE_UNIT only if it is a DECL_P.
(omp_notice_threadprivate_variable): Adjust check for ORT_TARGET for
the addition of ORT_COMBINED_TARGET.
(omp_notice_variable): Handle ORT_NONE. Adjust check for ORT_TARGET
for the addition of ORT_COMBINED_TARGET. Handle implicit mapping of
pointers as zero length array sections and
ctx->target_map_scalars_firstprivate mapping of scalars as firstprivate
data sharing.
(omp_check_private): Handle omp_member_access_dummy_var vars.
(find_decl_expr): New function.
(gimplify_scan_omp_clauses): Add CODE argument. For OMP_CLAUSE_IF
complain if OMP_CLAUSE_IF_MODIFIER is present and does not match code.
Handle OMP_CLAUSE_GANG separately. Handle
OMP_CLAUSE_{PRIORITY,GRAINSIZE,NUM_TASKS,NOGROUP,THREADS,SIMD,SIMDLEN}
clauses. Diagnose linear clause on combined
distribute {, parallel for} simd construct, unless it is the loop
iterator. Handle struct element GOMP_MAP_FIRSTPRIVATE_POINTER.
Handle map clauses with COMPONENT_REF. Initialize
ctx->target_map_scalars_firstprivate,
ctx->target_firstprivatize_array_bases and
ctx->target_map_pointers_as_0len_arrays. Add firstprivate for
linear clause even to target region if combined. Remove
map clauses with GOMP_MAP_FIRSTPRIVATE_POINTER kind from
OMP_TARGET_{,ENTER_,EXIT_}DATA. For GOMP_MAP_FIRSTPRIVATE_POINTER
map kind with non-INTEGER_CST OMP_CLAUSE_SIZE firstprivatize the bias.
Handle OMP_CLAUSE_DEPEND_{SINK,SOURCE}. Handle
OMP_CLAUSE_{{USE,IS}_DEVICE_PTR,DEFAULTMAP,HINT}.
For linear clause on worksharing loop combined with parallel add
shared clause on the parallel. Handle OMP_CLAUSE_REDUCTION
with MEM_REF OMP_CLAUSE_DECL. Set DECL_NAME on
omp_member_access_dummy_var vars. Add lastprivate clause to outer
taskloop if needed.
(gimplify_adjust_omp_clauses_1): Handle GOVD_MAP_0LEN_ARRAY.
If gimplify_omp_ctxp->target_firstprivatize_array_bases, use
GOMP_MAP_FIRSTPRIVATE_POINTER map kind instead of
GOMP_MAP_POINTER.
(gimplify_adjust_omp_clauses): Add CODE argument. Handle removal
of GOMP_MAP_FIRSTPRIVATE_POINTER struct elements for struct not seen
in target body. Handle removal of struct mapping if struct is not
seen in target body. Remove GOMP_MAP_STRUCT map clause on
OMP_TARGET_EXIT_DATA. Adjust check for ORT_TARGET for the
addition of ORT_COMBINED_TARGET. Use GOMP_MAP_FIRSTPRIVATE_POINTER
instead of GOMP_MAP_POINTER if ctx->target_firstprivatize_array_bases
for VLAs. Set OMP_CLAUSE_MAP_PRIVATE if both data sharing and map
clause appear together. Handle
OMP_CLAUSE_{{USE,IS}_DEVICE_PTR,DEFAULTMAP,HINT}. Don't remove map
clause if it has map-type-modifier always. Handle
OMP_CLAUSE_{PRIORITY,GRAINSIZE,NUM_TASKS,NOGROUP,THREADS,SIMD,SIMDLEN}
clauses.
(gimplify_oacc_cache, gimplify_omp_parallel, gimplify_omp_task):
Adjust gimplify_scan_omp_clauses and gimplify_adjust_omp_clauses
callers.
(gimplify_omp_for): Likewise. Handle OMP_TASKLOOP. Initialize
loop_iter_var. Use OMP_FOR_ORIG_DECLS. Fix handling of lastprivate
iterators in doacross loops.
(gimplify_omp_workshare): Adjust gimplify_scan_omp_clauses and
gimplify_adjust_omp_clauses callers. Use ORT_COMBINED_TARGET
for OMP_TARGET_COMBINED. Adjust check for ORT_TARGET
for the addition of ORT_COMBINED_TARGET.
(gimplify_omp_target_update): Adjust gimplify_scan_omp_clauses and
gimplify_adjust_omp_clauses callers. Handle OMP_TARGET_ENTER_DATA
and OMP_TARGET_EXIT_DATA.
(gimplify_omp_ordered): New function.
(gimplify_expr): Handle OMP_TASKLOOP, OMP_TARGET_ENTER_DATA and
OMP_TARGET_EXIT_DATA. Use gimplify_omp_ordered for OMP_ORDERED.
Gimplify clauses on OMP_CRITICAL.
* internal-fn.c (expand_GOMP_SIMD_ORDERED_START,
expand_GOMP_SIMD_ORDERED_END): New functions.
* internal-fn.def (GOMP_SIMD_ORDERED_START,
GOMP_SIMD_ORDERED_END): New internal functions.
* omp-builtins.def (BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START,
BUILT_IN_GOMP_LOOP_DOACROSS_DYNAMIC_START,
BUILT_IN_GOMP_LOOP_DOACROSS_GUIDED_START,
BUILT_IN_GOMP_LOOP_DOACROSS_RUNTIME_START,
BUILT_IN_GOMP_LOOP_ULL_DOACROSS_STATIC_START,
BUILT_IN_GOMP_LOOP_ULL_DOACROSS_DYNAMIC_START,
BUILT_IN_GOMP_LOOP_ULL_DOACROSS_GUIDED_START,
BUILT_IN_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START,
BUILT_IN_GOMP_DOACROSS_POST, BUILT_IN_GOMP_DOACROSS_WAIT,
BUILT_IN_GOMP_DOACROSS_ULL_POST, BUILT_IN_GOMP_DOACROSS_ULL_WAIT,
BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA, BUILT_IN_GOMP_TASKLOOP,
BUILT_IN_GOMP_TASKLOOP_ULL): New built-ins.
(BUILT_IN_GOMP_TASK): Add INT argument to the end.
(BUILT_IN_GOMP_TARGET): Rename from GOMP_target to GOMP_target_41,
adjust type.
(BUILT_IN_GOMP_TARGET_DATA): Rename from GOMP_target_data to
GOMP_target_data_41, adjust type.
(BUILT_IN_GOMP_TARGET_UPDATE): Rename from GOMP_target_update to
GOMP_target_update_41, adjust type.
* omp-low.c (struct omp_region): Adjust comments, add ord_stmt
field.
(struct omp_for_data): Add ordered and simd_schedule fields.
(omp_member_access_dummy_var, unshare_and_remap_1,
unshare_and_remap, is_taskloop_ctx): New functions.
(is_taskreg_ctx): Use is_parallel_ctx and is_task_ctx.
(extract_omp_for_data): Handle taskloops and doacross loops
and simd schedule modifier.
(omp_adjust_chunk_size): New function.
(get_ws_args_for): Use it.
(lookup_sfield): Change first argument to splay_tree_key,
add overload with first argument tree.
(maybe_lookup_field): Likewise.
(use_pointer_for_field): Handle omp_member_access_dummy_var.
(omp_copy_decl_2): If var is TREE_ADDRESSABLE listed in
task_shared_vars, clear TREE_ADDRESSABLE on the copy.
(build_outer_var_ref): Add LASTPRIVATE argument, handle
taskloops and omp_member_access_dummy_var vars.
(build_sender_ref): Change first argument to splay_tree_key,
add overload with first argument tree.
(install_var_field): For mask & 8 use &DECL_UID as key instead
of the tree itself.
(fixup_child_record_type): Const qualify *.omp_data_i.
(scan_sharing_clauses): Handle OMP_CLAUSE_SHARED_FIRSTPRIVATE,
C/C++ array reductions, OMP_CLAUSE_{IS,USE}_DEVICE_PTR clauses,
OMP_CLAUSE_{PRIORITY,GRAINSIZE,NUM_TASKS,SIMDLEN,THREADS,SIMD} and
OMP_CLAUSE_{NOGROUP,DEFAULTMAP} clauses, OMP_CLAUSE__LOOPTEMP_ clause
on taskloop, GOMP_MAP_FIRSTPRIVATE_POINTER, OMP_CLAUSE_MAP_PRIVATE.
(create_omp_child_function): Set TREE_READONLY on .omp_data_i.
(find_combined_for): Allow searching for different GIMPLE_OMP_FOR
kinds.
(add_taskreg_looptemp_clauses): New function.
(scan_omp_parallel): Use it.
(scan_omp_task): Likewise.
(finish_taskreg_scan): Handle OMP_CLAUSE_SHARED_FIRSTPRIVATE.
For taskloop, move fields for the first two _LOOPTEMP_ clauses first.
(check_omp_nesting_restrictions): Handle GF_OMP_TARGET_KIND_ENTER_DATA
and GF_OMP_TARGET_KIND_EXIT_DATA. Formatting fixes. Allow the
sandwiched taskloop constructs. Type check
OMP_CLAUSE_DEPEND_{KIND,SOURCE}. Allow ordered simd inside of simd
region. Diagnose depend(source) or depend(sink:...) on
target constructs or task/taskloop.
(handle_simd_reference): Use get_name.
(lower_rec_input_clauses): Likewise. Ignore all
OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE clauses on taskloop construct.
Allow _LOOPTEMP_ clause on GOMP_TASK. Unshare new_var
before passing it to omp_clause_{default,copy}_ctor. Handle
OMP_CLAUSE_REDUCTION with MEM_REF OMP_CLAUSE_DECL. Set
lastprivate_firstprivate flag for linear that needs copyin and
copyout. Use BUILT_IN_ALLOCA_WITH_ALIGN instead of BUILT_IN_ALLOCA.
(lower_lastprivate_clauses): For OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE
on taskloop lookup decl in outer context. Pass true to
build_outer_var_ref lastprivate argument. Handle
OMP_CLAUSE_LASTPRIVATE_TASKLOOP_IV lastprivate if the decl is global
outside of outer taskloop for.
(lower_reduction_clauses): Handle OMP_CLAUSE_REDUCTION with MEM_REF
OMP_CLAUSE_DECL.
(lower_send_clauses): Ignore first two _LOOPTEMP_ clauses in taskloop
GOMP_TASK. Handle OMP_CLAUSE_SHARED_FIRSTPRIVATE. Handle
omp_member_access_dummy_var vars. Handle OMP_CLAUSE_REDUCTION
with MEM_REF OMP_CLAUSE_DECL. Use new lookup_sfield overload.
(lower_send_shared_vars): Ignore fields with NULL or FIELD_DECL
abstract origin. Handle omp_member_access_dummy_var vars.
(expand_parallel_call): Use expand_omp_build_assign.
(expand_task_call): Handle taskloop construct expansion. Add
REGION argument. Use GOMP_TASK_* defines instead of hardcoded
integers. Add priority argument to GOMP_task* calls. Or in
GOMP_TASK_FLAG_PRIORITY into flags if priority is present for
GOMP_task call.
(expand_omp_build_assign): Add prototype. Add AFTER
argument, if true emit statements after *GSI_P and continue linking.
(expand_omp_taskreg): Adjust expand_task_call caller.
(expand_omp_for_init_counts): Rename zero_iter_bb argument to
zero_iter1_bb and first_zero_iter to first_zero_iter1, add
zero_iter2_bb and first_zero_iter2 arguments, handle computation
of counts even for ordered loops.
(expand_omp_for_init_vars): Handle GOMP_TASK inner_stmt.
(expand_omp_ordered_source, expand_omp_ordered_sink,
expand_omp_ordered_source_sink, expand_omp_for_ordered_loops): New
functions.
(expand_omp_for_generic): Use omp_adjust_chunk_size. Handle linear
clauses on worksharing loop. Handle DOACROSS loop expansion.
(expand_omp_for_static_nochunk): Handle linear clauses on
worksharing loop. Adjust expand_omp_for_init_counts
callers.
(expand_omp_for_static_chunk): Likewise. Use omp_adjust_chunk_size.
(expand_omp_simd): Handle addressable fd->loop.v. Adjust
expand_omp_for_init_counts callers.
(expand_omp_taskloop_for_outer, expand_omp_taskloop_for_inner): New
functions.
(expand_omp_for): Call expand_omp_taskloop_for_* for taskloop.
Handle doacross loops.
(expand_omp_target): Handle GF_OMP_TARGET_KIND_ENTER_DATA and
GF_OMP_TARGET_KIND_EXIT_DATA. Pass flags and depend arguments to
GOMP_target_{41,update_41,enter_exit_data} libcalls.
(expand_omp): Don't expand ordered depend constructs here, record
ord_stmt instead for later expand_omp_for_generic.
(build_omp_regions_1): Handle GF_OMP_TARGET_KIND_ENTER_DATA and
GF_OMP_TARGET_KIND_EXIT_DATA. Treat GIMPLE_OMP_ORDERED with depend
clause as stand-alone directive.
(lower_omp_ordered_clauses): New function.
(lower_omp_ordered): Handle OMP_CLAUSE_SIMD, for OMP_CLAUSE_DEPEND
don't lower anything.
(lower_omp_for_lastprivate): Use last _looptemp_ clause
on taskloop for comparison.
(lower_omp_for): Handle taskloop constructs. Adjust OMP_CLAUSE_DECL
and OMP_CLAUSE_LINEAR_STEP so that expand_omp_for_* can use it during
expansion for linear adjustments.
(create_task_copyfn): Handle OMP_CLAUSE_SHARED_FIRSTPRIVATE.
(lower_depend_clauses): Assert not seeing sink/source depend kinds.
Set TREE_ADDRESSABLE on array. Change first argument from gimple *
to tree * pointing to the stmt's clauses.
(lower_omp_taskreg): Adjust lower_depend_clauses caller.
(lower_omp_target): Handle GF_OMP_TARGET_KIND_ENTER_DATA
and GF_OMP_TARGET_KIND_EXIT_DATA, depend clauses,
GOMP_MAP_{RELEASE,ALWAYS_{TO,FROM,TOFROM},FIRSTPRIVATE_POINTER,STRUCT}
map kinds, OMP_CLAUSE_{FIRSTPRIVATE,PRIVATE,{IS,USE}_DEVICE_PTR
clauses. Always use short kind and 8-bit align shift.
(lower_omp_regimplify_p): Use IS_TYPE_OR_DECL_P macro.
(struct lower_omp_regimplify_operands_data): New type.
(lower_omp_regimplify_operands_p, lower_omp_regimplify_operands):
New functions.
(lower_omp_1): Use lower_omp_regimplify_operands instead of
gimple_regimplify_operands.
(make_gimple_omp_edges): Handle GF_OMP_TARGET_KIND_ENTER_DATA and
GF_OMP_TARGET_KIND_EXIT_DATA. Treat GIMPLE_OMP_ORDERED with depend
clause as stand-alone directive.
(simd_clone_clauses_extract): Honor OMP_CLAUSE_LINEAR_KIND.
(simd_clone_mangle): Mangle the various linear kinds
per the new ABI.
(simd_clone_adjust_argument_types): Handle
SIMD_CLONE_ARG_TYPE_LINEAR_*_CONSTANT_STEP.
(simd_clone_init_simd_arrays): Don't do anything for uval.
(simd_clone_adjust): Handle
SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP like
SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP.
Handle SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP.
* omp-low.h (omp_member_access_dummy_var): New prototype.
* passes.def (pass_simduid_cleanup): Schedule another copy of the
pass after all optimizations.
* tree.c (omp_clause_code_name): Add entries for
OMP_CLAUSE_{TO_DECLARE,LINK,{USE,IS}_DEVICE_PTR,DEFAULTMAP,HINT}
and OMP_CLAUSE_{PRIORITY,GRAINSIZE,NUM_TASKS,NOGROUP,THREADS,SIMD}.
(omp_clause_num_ops): Likewise. Bump number of OMP_CLAUSE_REDUCTION
arguments to 5 and for OMP_CLAUSE_ORDERED to 1.
(walk_tree_1): Adjust for OMP_CLAUSE_ORDERED having 1 argument and
OMP_CLAUSE_REDUCTION 5 arguments. Handle
OMP_CLAUSE_{TO_DECLARE,LINK,{USE,IS}_DEVICE_PTR,DEFAULTMAP,HINT}
and OMP_CLAUSE_{PRIORITY,GRAINSIZE,NUM_TASKS,NOGROUP,THREADS,SIMD}
clauses.
* tree-core.h (enum omp_clause_linear_kind): New.
(struct tree_omp_clause): Change type of map_kind
from unsigned char to unsigned int. Add subcode.if_modifier
and subcode.linear_kind fields.
(enum omp_clause_code): Add
OMP_CLAUSE_{TO_DECLARE,LINK,{USE,IS}_DEVICE_PTR,DEFAULTMAP,HINT}
and OMP_CLAUSE_{PRIORITY,GRAINSIZE,NUM_TASKS,NOGROUP,THREADS,SIMD}.
(OMP_CLAUSE_REDUCTION): Document
OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER.
(enum omp_clause_depend_kind): Add OMP_CLAUSE_DEPEND_{SOURCE,SINK}.
* tree.def (OMP_FOR): Add OMP_FOR_ORIG_DECLS operand.
(OMP_CRITICAL): Move before OMP_SINGLE. Add OMP_CRITICAL_CLAUSES
operand.
(OMP_ORDERED): Move before OMP_SINGLE. Add OMP_ORDERED_CLAUSES
operand.
(OMP_TASKLOOP, OMP_TARGET_ENTER_DATA, OMP_TARGET_EXIT_DATA): New tree
codes.
* tree.h (OMP_BODY): Replace OMP_CRITICAL with OMP_TASKGROUP.
(OMP_CLAUSE_SET_MAP_KIND): Cast to unsigned int rather than unsigned
char.
(OMP_CRITICAL_NAME): Adjust to be 3rd operand instead of 2nd.
(OMP_CLAUSE_NUM_TASKS_EXPR): Formatting fix.
(OMP_STANDALONE_CLAUSES): Adjust to cover OMP_TARGET_{ENTER,EXIT}_DATA.
(OMP_CLAUSE_DEPEND_SINK_NEGATIVE, OMP_TARGET_COMBINED,
OMP_CLAUSE_MAP_PRIVATE, OMP_FOR_ORIG_DECLS, OMP_CLAUSE_IF_MODIFIER,
OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION, OMP_CRITICAL_CLAUSES,
OMP_CLAUSE_PRIVATE_TASKLOOP_IV, OMP_CLAUSE_LASTPRIVATE_TASKLOOP_IV,
OMP_CLAUSE_HINT_EXPR, OMP_CLAUSE_SCHEDULE_SIMD,
OMP_CLAUSE_LINEAR_KIND, OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER,
OMP_CLAUSE_SHARED_FIRSTPRIVATE, OMP_ORDERED_CLAUSES,
OMP_TARGET_ENTER_DATA_CLAUSES, OMP_TARGET_EXIT_DATA_CLAUSES,
OMP_CLAUSE_NUM_TASKS_EXPR, OMP_CLAUSE_GRAINSIZE_EXPR,
OMP_CLAUSE_PRIORITY_EXPR, OMP_CLAUSE_ORDERED_EXPR): Define.
* tree-inline.c (remap_gimple_stmt): Handle clauses on
GIMPLE_OMP_ORDERED and GIMPLE_OMP_CRITICAL. For
IFN_GOMP_SIMD_ORDERED_{START,END} set has_simduid_loops.
* tree-nested.c (convert_nonlocal_omp_clauses): Handle
OMP_CLAUSE_{TO_DECLARE,LINK,{USE,IS}_DEVICE_PTR,SIMDLEN,PRIORITY,SIMD}
and OMP_CLAUSE_{GRAINSIZE,NUM_TASKS,HINT,NOGROUP,THREADS,DEFAULTMAP}
clauses. Handle OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER.
(convert_local_omp_clauses): Likewise.
* tree-pretty-print.c (dump_omp_clause): Handle
OMP_CLAUSE_{TO_DECLARE,LINK,{USE,IS}_DEVICE_PTR,SIMDLEN,PRIORITY,SIMD}
and OMP_CLAUSE_{GRAINSIZE,NUM_TASKS,HINT,NOGROUP,THREADS,DEFAULTMAP}
clauses. Handle OMP_CLAUSE_IF_MODIFIER, OMP_CLAUSE_ORDERED_EXPR,
OMP_CLAUSE_SCHEDULE_SIMD, OMP_CLAUSE_LINEAR_KIND,
OMP_CLAUSE_DEPEND_{SOURCE,SINK}. Use "delete" for
GOMP_MAP_FORCE_DEALLOC. Handle
GOMP_MAP_{ALWAYS_{TO,FROM,TOFROM},RELEASE,FIRSTPRIVATE_POINTER,STRUCT}.
(dump_generic_node): Handle OMP_TASKLOOP, OMP_TARGET_{ENTER,EXIT}_DATA
and clauses on OMP_ORDERED and OMP_CRITICAL.
* tree-vectorizer.c (adjust_simduid_builtins): Adjust comment.
Remove IFN_GOMP_SIMD_ORDERED_{START,END}.
(vectorize_loops): Adjust comments.
(pass_simduid_cleanup::execute): Likewise.
* tree-vect-stmts.c (vectorizable_simd_clone_call): Handle
SIMD_CLONE_ARG_TYPE_LINEAR_{REF,VAL,UVAL}_CONSTANT_STEP.
* wide-int.h (wi::gcd): New.
gcc/c-family/
2015-10-13 Jakub Jelinek <jakub@redhat.com>
Aldy Hernandez <aldyh@redhat.com>
* c-common.c (enum c_builtin_type): Define DEF_FUNCTION_TYPE_9,
DEF_FUNCTION_TYPE_10 and DEF_FUNCTION_TYPE_11.
(c_define_builtins): Likewise.
* c-common.h (enum c_omp_clause_split): Add
C_OMP_CLAUSE_SPLIT_TASKLOOP.
(c_finish_omp_critical, c_finish_omp_ordered): Add CLAUSES argument.
(c_finish_omp_for): Add ORIG_DECLV argument.
* c-cppbuiltin.c (c_cpp_builtins): Predefine _OPENMP as
201511 instead of 201307.
* c-omp.c (c_finish_omp_critical): Add CLAUSES argument, set
OMP_CRITICAL_CLAUSES to it.
(c_finish_omp_ordered): Add CLAUSES argument, set
OMP_ORDERED_CLAUSES to it.
(c_finish_omp_for): Add ORIG_DECLV argument, set OMP_FOR_ORIG_DECLS
to it if OMP_FOR. Clear DECL_INITIAL on the IVs.
(c_omp_split_clauses): Handle OpenMP 4.5 combined/composite
constructs and new OpenMP 4.5 clauses. Clear
OMP_CLAUSE_SCHEDULE_SIMD if not combined with OMP_SIMD. Add
verification code.
* c-pragma.c (omp_pragmas_simd): Add taskloop.
* c-pragma.h (enum pragma_kind): Add PRAGMA_OMP_TASKLOOP.
(enum pragma_omp_clause): Add
PRAGMA_OMP_CLAUSE_{DEFAULTMAP,GRAINSIZE,HINT,{IS,USE}_DEVICE_PTR}
and PRAGMA_OMP_CLAUSE_{LINK,NOGROUP,NUM_TASKS,PRIORITY,SIMD,THREADS}.
gcc/c/
2015-10-13 Jakub Jelinek <jakub@redhat.com>
Aldy Hernandez <aldyh@redhat.com>
* c-parser.c (c_parser_pragma): Handle PRAGMA_OMP_ORDERED here.
(c_parser_omp_clause_name): Handle OpenMP 4.5 clauses.
(c_parser_omp_variable_list): Handle structure elements for
map, to and from clauses. Handle array sections in reduction
clause. Formatting fixes.
(c_parser_omp_clause_if): Add IS_OMP argument, handle parsing of
if clause modifiers.
(c_parser_omp_clause_num_tasks, c_parser_omp_clause_grainsize,
c_parser_omp_clause_priority, c_parser_omp_clause_hint,
c_parser_omp_clause_defaultmap, c_parser_omp_clause_use_device_ptr,
c_parser_omp_clause_is_device_ptr): New functions.
(c_parser_omp_clause_ordered): Parse optional parameter.
(c_parser_omp_clause_reduction): Handle array reductions.
(c_parser_omp_clause_schedule): Parse optional simd modifier.
(c_parser_omp_clause_nogroup, c_parser_omp_clause_orderedkind): New
functions.
(c_parser_omp_clause_linear): Parse linear clause modifiers.
(c_parser_omp_clause_depend_sink): New function.
(c_parser_omp_clause_depend): Parse source/sink depend kinds.
(c_parser_omp_clause_map): Parse release/delete map kinds and
optional always modifier.
(c_parser_oacc_all_clauses): Adjust c_parser_omp_clause_if
and c_finish_omp_clauses callers.
(c_parser_omp_all_clauses): Likewise. Parse OpenMP 4.5 clauses.
Parse "to" as OMP_CLAUSE_TO_DECLARE if on declare target directive.
(c_parser_oacc_cache): Adjust c_finish_omp_clauses caller.
(OMP_CRITICAL_CLAUSE_MASK): Define.
(c_parser_omp_critical): Parse critical clauses.
(c_parser_omp_for_loop): Handle doacross loops, adjust
c_finish_omp_for and c_finish_omp_clauses callers.
(OMP_SIMD_CLAUSE_MASK): Add simdlen clause.
(c_parser_omp_simd): Allow ordered clause if it has no parameter.
(OMP_FOR_CLAUSE_MASK): Add linear clause.
(c_parser_omp_for): Disallow ordered clause when combined with
distribute. Disallow linear clause when combined with distribute
and not combined with simd.
(OMP_ORDERED_CLAUSE_MASK, OMP_ORDERED_DEPEND_CLAUSE_MASK): Define.
(c_parser_omp_ordered): Add CONTEXT argument, remove LOC argument,
parse clauses and if depend clause is found, don't parse a body.
(c_parser_omp_parallel): Disallow copyin clause on target parallel.
Allow target parallel without for after it.
(OMP_TASK_CLAUSE_MASK): Add priority clause.
(OMP_TARGET_DATA_CLAUSE_MASK): Add use_device_ptr clause.
(c_parser_omp_target_data): Diagnose no map clauses or clauses with
invalid kinds.
(OMP_TARGET_UPDATE_CLAUSE_MASK): Add depend and nowait clauses.
(OMP_TARGET_ENTER_DATA_CLAUSE_MASK,
OMP_TARGET_EXIT_DATA_CLAUSE_MASK): Define.
(c_parser_omp_target_enter_data, c_parser_omp_target_exit_data): New
functions.
(OMP_TARGET_CLAUSE_MASK): Add depend, nowait, private, firstprivate,
defaultmap and is_device_ptr clauses.
(c_parser_omp_target): Parse target parallel and target simd. Set
OMP_TARGET_COMBINED on combined constructs. Parse target enter data
and target exit data. Diagnose invalid map kinds.
(OMP_DECLARE_TARGET_CLAUSE_MASK): Define.
(c_parser_omp_declare_target): Parse OpenMP 4.5 forms of this
construct.
(c_parser_omp_declare_reduction): Use STRIP_NOPS when checking for
&omp_priv.
(OMP_TASKLOOP_CLAUSE_MASK): Define.
(c_parser_omp_taskloop): New function.
(c_parser_omp_construct): Don't handle PRAGMA_OMP_ORDERED here,
handle PRAGMA_OMP_TASKLOOP.
(c_parser_cilk_for): Adjust c_finish_omp_clauses callers.
* c-tree.h (c_finish_omp_clauses): Add two new arguments.
* c-typeck.c (handle_omp_array_sections_1): Fix comment typo.
Add IS_OMP argument, handle structure element bases, diagnose
bitfields, pass IS_OMP recursively, diagnose known zero length
array sections in depend clauses, handle array sections in reduction
clause, diagnose negative length even for pointers.
(handle_omp_array_sections): Add IS_OMP argument, use auto_vec for
types, pass IS_OMP down to handle_omp_array_sections_1, handle
array sections in reduction clause, set
OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION if map could be zero
length array section, use GOMP_MAP_FIRSTPRIVATE_POINTER for IS_OMP.
(c_finish_omp_clauses): Add IS_OMP and DECLARE_SIMD arguments.
Handle new OpenMP 4.5 clauses and new restrictions for the old ones.
gcc/cp/
2015-10-13 Jakub Jelinek <jakub@redhat.com>
Aldy Hernandez <aldyh@redhat.com>
* class.c (finish_struct_1): Call finish_omp_declare_simd_methods.
* cp-gimplify.c (cp_gimplify_expr): Handle OMP_TASKLOOP.
(cp_genericize_r): Likewise.
(cxx_omp_finish_clause): Don't diagnose references.
(cxx_omp_disregard_value_expr): New function.
* cp-objcp-common.h (LANG_HOOKS_OMP_DISREGARD_VALUE_EXPR): Redefine.
* cp-tree.h (OMP_FOR_GIMPLIFYING_P): Document for OMP_TASKLOOP.
(DECL_OMP_PRIVATIZED_MEMBER): Define.
(finish_omp_declare_simd_methods, push_omp_privatization_clauses,
pop_omp_privatization_clauses, save_omp_privatization_clauses,
restore_omp_privatization_clauses, omp_privatize_field,
cxx_omp_disregard_value_expr): New prototypes.
(finish_omp_clauses): Add two new arguments.
(finish_omp_for): Add ORIG_DECLV argument.
* parser.c (cp_parser_lambda_body): Call
save_omp_privatization_clauses and restore_omp_privatization_clauses.
(cp_parser_omp_clause_name): Handle OpenMP 4.5 clauses.
(cp_parser_omp_var_list_no_open): Handle structure elements for
map, to and from clauses. Handle array sections in reduction
clause. Parse this keyword. Formatting fixes.
(cp_parser_omp_clause_if): Add IS_OMP argument, handle parsing of
if clause modifiers.
(cp_parser_omp_clause_num_tasks, cp_parser_omp_clause_grainsize,
cp_parser_omp_clause_priority, cp_parser_omp_clause_hint,
cp_parser_omp_clause_defaultmap): New functions.
(cp_parser_omp_clause_ordered): Parse optional parameter.
(cp_parser_omp_clause_reduction): Handle array reductions.
(cp_parser_omp_clause_schedule): Parse optional simd modifier.
(cp_parser_omp_clause_nogroup, cp_parser_omp_clause_orderedkind):
New functions.
(cp_parser_omp_clause_linear): Parse linear clause modifiers.
(cp_parser_omp_clause_depend_sink): New function.
(cp_parser_omp_clause_depend): Parse source/sink depend kinds.
(cp_parser_omp_clause_map): Parse release/delete map kinds and
optional always modifier.
(cp_parser_oacc_all_clauses): Adjust cp_parser_omp_clause_if
and finish_omp_clauses callers.
(cp_parser_omp_all_clauses): Likewise. Parse OpenMP 4.5 clauses.
Parse "to" as OMP_CLAUSE_TO_DECLARE if on declare target directive.
(OMP_CRITICAL_CLAUSE_MASK): Define.
(cp_parser_omp_critical): Parse critical clauses.
(cp_parser_omp_for_incr): Use cp_tree_equal if
processing_template_decl.
(cp_parser_omp_for_loop_init): Return tree instead of bool. Handle
non-static data member iterators.
(cp_parser_omp_for_loop): Handle doacross loops, adjust
finish_omp_for and finish_omp_clauses callers.
(cp_omp_split_clauses): Adjust finish_omp_clauses caller.
(OMP_SIMD_CLAUSE_MASK): Add simdlen clause.
(cp_parser_omp_simd): Allow ordered clause if it has no parameter.
(OMP_FOR_CLAUSE_MASK): Add linear clause.
(cp_parser_omp_for): Disallow ordered clause when combined with
distribute. Disallow linear clause when combined with distribute
and not combined with simd.
(OMP_ORDERED_CLAUSE_MASK, OMP_ORDERED_DEPEND_CLAUSE_MASK): Define.
(cp_parser_omp_ordered): Add CONTEXT argument, return bool instead
of tree, parse clauses and if depend clause is found, don't parse
a body.
(cp_parser_omp_parallel): Disallow copyin clause on target parallel.
Allow target parallel without for after it.
(OMP_TASK_CLAUSE_MASK): Add priority clause.
(OMP_TARGET_DATA_CLAUSE_MASK): Add use_device_ptr clause.
(cp_parser_omp_target_data): Diagnose no map clauses or clauses with
invalid kinds.
(OMP_TARGET_UPDATE_CLAUSE_MASK): Add depend and nowait clauses.
(OMP_TARGET_ENTER_DATA_CLAUSE_MASK,
OMP_TARGET_EXIT_DATA_CLAUSE_MASK): Define.
(cp_parser_omp_target_enter_data, cp_parser_omp_target_exit_data): New
functions.
(OMP_TARGET_CLAUSE_MASK): Add depend, nowait, private, firstprivate,
defaultmap and is_device_ptr clauses.
(cp_parser_omp_target): Parse target parallel and target simd. Set
OMP_TARGET_COMBINED on combined constructs. Parse target enter data
and target exit data. Diagnose invalid map kinds.
(cp_parser_oacc_cache): Adjust finish_omp_clauses caller.
(OMP_DECLARE_TARGET_CLAUSE_MASK): Define.
(cp_parser_omp_declare_target): Parse OpenMP 4.5 forms of this
construct.
(OMP_TASKLOOP_CLAUSE_MASK): Define.
(cp_parser_omp_taskloop): New function.
(cp_parser_omp_construct): Don't handle PRAGMA_OMP_ORDERED here,
handle PRAGMA_OMP_TASKLOOP.
(cp_parser_pragma): Handle PRAGMA_OMP_ORDERED here directly,
handle PRAGMA_OMP_TASKLOOP, call push_omp_privatization_clauses
and pop_omp_privatization_clauses around parsing calls.
(cp_parser_cilk_for): Adjust finish_omp_clauses caller.
* pt.c (apply_late_template_attributes): Adjust tsubst_omp_clauses
and finish_omp_clauses callers.
(tsubst_omp_clause_decl): Return NULL if decl is NULL.
For TREE_LIST, copy over OMP_CLAUSE_DEPEND_SINK_NEGATIVE bit.
Use tsubst_expr instead of tsubst_copy, undo convert_from_reference
effects.
(tsubst_omp_clauses): Add ALLOW_FIELDS argument. Handle new
OpenMP 4.5 clauses. Use tsubst_omp_clause_decl for more clauses.
If ALLOW_FIELDS, handle non-static data members in the clauses.
Clear OMP_CLAUSE_LINEAR_STEP if it has been cleared before.
(omp_parallel_combined_clauses): New variable.
(tsubst_omp_for_iterator): Add ORIG_DECLV argument, recur on
OMP_FOR_ORIG_DECLS, handle non-static data member iterators.
Improve handling of clauses on combined constructs.
(tsubst_expr): Call push_omp_privatization_clauses and
pop_omp_privatization_clauses around instantiation of certain
OpenMP constructs, improve handling of clauses on combined
constructs, handle OMP_TASKLOOP, adjust tsubst_omp_for_iterator,
tsubst_omp_clauses and finish_omp_for callers, handle clauses on
critical and ordered, handle OMP_TARGET_{ENTER,EXIT}_DATA.
(instantiate_decl): Call save_omp_privatization_clauses and
restore_omp_privatization_clauses around instantiation.
(dependent_omp_for_p): Fix up comment typo. Handle SCOPE_REF.
* semantics.c (omp_private_member_map, omp_private_member_vec,
omp_private_member_ignore_next): New variables.
(finish_non_static_data_member): Return dummy decl for privatized
non-static data members.
(omp_clause_decl_field, omp_clause_printable_decl,
omp_note_field_privatization, omp_privatize_field): New functions.
(handle_omp_array_sections_1): Fix comment typo.
Add IS_OMP argument, handle structure element bases, diagnose
bitfields, pass IS_OMP recursively, diagnose known zero length
array sections in depend clauses, handle array sections in reduction
clause, diagnose negative length even for pointers.
(handle_omp_array_sections): Add IS_OMP argument, use auto_vec for
types, pass IS_OMP down to handle_omp_array_sections_1, handle
array sections in reduction clause, set
OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION if map could be zero
length array section, use GOMP_MAP_FIRSTPRIVATE_POINTER for IS_OMP.
(finish_omp_reduction_clause): Handle array sections and arrays.
Use omp_clause_printable_decl.
(finish_omp_declare_simd_methods, cp_finish_omp_clause_depend_sink):
New functions.
(finish_omp_clauses): Add ALLOW_FIELDS and DECLARE_SIMD arguments.
Handle new OpenMP 4.5 clauses and new restrictions for the old
ones, handle non-static data members, reject this keyword when not
allowed.
(push_omp_privatization_clauses, pop_omp_privatization_clauses,
save_omp_privatization_clauses, restore_omp_privatization_clauses):
New functions.
(handle_omp_for_class_iterator): Handle OMP_TASKLOOP class iterators.
Add collapse and ordered arguments. Fix handling of lastprivate
iterators in doacross loops.
(finish_omp_for): Add ORIG_DECLV argument, handle doacross loops,
adjust c_finish_omp_for, handle_omp_for_class_iterator and
finish_omp_clauses callers. Fill in OMP_CLAUSE_LINEAR_STEP on simd
loops with non-static data member iterators.
gcc/fortran/
2015-10-13 Jakub Jelinek <jakub@redhat.com>
Ilya Verbin <ilya.verbin@intel.com>
* f95-lang.c (DEF_FUNCTION_TYPE_9, DEF_FUNCTION_TYPE_10,
DEF_FUNCTION_TYPE_11, DEF_FUNCTION_TYPE_VAR_1): Define.
* trans-openmp.c (gfc_trans_omp_clauses): Set
OMP_CLAUSE_IF_MODIFIER to ERROR_MARK, OMP_CLAUSE_ORDERED_EXPR
to NULL.
(gfc_trans_omp_critical): Adjust for addition of clauses.
(gfc_trans_omp_ordered): Likewise.
* types.def (BT_FN_BOOL_UINT_LONGPTR_LONGPTR_LONGPTR,
BT_FN_BOOL_UINT_ULLPTR_ULLPTR_ULLPTR,
BT_FN_BOOL_UINT_LONGPTR_LONG_LONGPTR_LONGPTR,
BT_FN_BOOL_UINT_ULLPTR_ULL_ULLPTR_ULLPTR,
BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_UINT_PTR,
BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR,
BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT,
BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_UINT_LONG_INT_LONG_LONG_LONG,
BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_UINT_LONG_INT_ULL_ULL_ULL,
BT_FN_VOID_LONG_VAR, BT_FN_VOID_ULL_VAR): New.
(BT_FN_VOID_INT_PTR_SIZE_PTR_PTR_PTR,
BT_FN_VOID_INT_OMPFN_PTR_SIZE_PTR_PTR_PTR,
BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR): Remove.
gcc/lto/
2015-10-13 Jakub Jelinek <jakub@redhat.com>
* lto-lang.c (DEF_FUNCTION_TYPE_9, DEF_FUNCTION_TYPE_10,
DEF_FUNCTION_TYPE_11): Define.
gcc/jit/
2015-10-13 Jakub Jelinek <jakub@redhat.com>
* jit-builtins.c (DEF_FUNCTION_TYPE_9, DEF_FUNCTION_TYPE_10,
DEF_FUNCTION_TYPE_11): Define.
* jit-builtins.h (DEF_FUNCTION_TYPE_9, DEF_FUNCTION_TYPE_10,
DEF_FUNCTION_TYPE_11): Define.
gcc/ada/
2015-10-13 Jakub Jelinek <jakub@redhat.com>
* gcc-interface/utils.c (DEF_FUNCTION_TYPE_9, DEF_FUNCTION_TYPE_10,
DEF_FUNCTION_TYPE_11): Define.
gcc/testsuite/
2015-10-13 Jakub Jelinek <jakub@redhat.com>
Aldy Hernandez <aldyh@redhat.com>
* c-c++-common/gomp/cancel-1.c (f2): Add map clause to target data.
* c-c++-common/gomp/clauses-1.c: New test.
* c-c++-common/gomp/clauses-2.c: New test.
* c-c++-common/gomp/clauses-3.c: New test.
* c-c++-common/gomp/clauses-4.c: New test.
* c-c++-common/gomp/declare-target-1.c: New test.
* c-c++-common/gomp/declare-target-2.c: New test.
* c-c++-common/gomp/depend-3.c: New test.
* c-c++-common/gomp/depend-4.c: New test.
* c-c++-common/gomp/doacross-1.c: New test.
* c-c++-common/gomp/if-1.c: New test.
* c-c++-common/gomp/if-2.c: New test.
* c-c++-common/gomp/linear-1.c: New test.
* c-c++-common/gomp/map-2.c: New test.
* c-c++-common/gomp/map-3.c: New test.
* c-c++-common/gomp/nesting-1.c (f_omp_parallel,
f_omp_target_data): Add map clause to target data.
* c-c++-common/gomp/nesting-warn-1.c (f_omp_target): Likewise.
* c-c++-common/gomp/ordered-1.c: New test.
* c-c++-common/gomp/ordered-2.c: New test.
* c-c++-common/gomp/ordered-3.c: New test.
* c-c++-common/gomp/pr61486-1.c (foo): Remove linear clause
on non-iterator.
* c-c++-common/gomp/pr61486-2.c (test, test2): Remove ordered
clause and ordered construct where no longer allowed.
* c-c++-common/gomp/priority-1.c: New test.
* c-c++-common/gomp/reduction-1.c: New test.
* c-c++-common/gomp/schedule-simd-1.c: New test.
* c-c++-common/gomp/sink-1.c: New test.
* c-c++-common/gomp/sink-2.c: New test.
* c-c++-common/gomp/sink-3.c: New test.
* c-c++-common/gomp/sink-4.c: New test.
* c-c++-common/gomp/udr-1.c: New test.
* c-c++-common/taskloop-1.c: New test.
* c-c++-common/cpp/openmp-define-3.c: Adjust for the new
value of _OPENMP macro.
* c-c++-common/cilk-plus/PS/body.c (foo): Adjust expected diagnostics.
* c-c++-common/goacc-gomp/nesting-fail-1.c (f_acc_parallel,
f_acc_kernels, f_acc_data, f_acc_loop): Add map clause to target data.
* gcc.dg/gomp/clause-1.c:
* gcc.dg/gomp/reduction-1.c: New test.
* gcc.dg/gomp/sink-fold-1.c: New test.
* gcc.dg/gomp/sink-fold-2.c: New test.
* gcc.dg/gomp/sink-fold-3.c: New test.
* gcc.dg/vect/vect-simd-clone-15.c: New test.
* g++.dg/gomp/clause-1.C (T::test): Remove dg-error on privatization
of non-static data members.
* g++.dg/gomp/clause-3.C (foo): Remove one dg-error directive.
Add some linear clause tests.
* g++.dg/gomp/declare-simd-3.C: New test.
* g++.dg/gomp/linear-1.C: New test.
* g++.dg/gomp/member-1.C: New test.
* g++.dg/gomp/member-2.C: New test.
* g++.dg/gomp/pr66571-2.C: New test.
* g++.dg/gomp/pr67504.C (foo): Add test for ordered clause with
dependent argument.
* g++.dg/gomp/pr67522.C (foo): Add test for invalid array section
in reduction clause.
* g++.dg/gomp/reference-1.C: New test.
* g++.dg/gomp/sink-1.C: New test.
* g++.dg/gomp/sink-2.C: New test.
* g++.dg/gomp/sink-3.C: New test.
* g++.dg/gomp/task-1.C: Remove both dg-error directives.
* g++.dg/gomp/this-1.C: New test.
* g++.dg/gomp/this-2.C: New test.
* g++.dg/vect/simd-clone-2.cc: New test.
* g++.dg/vect/simd-clone-2.h: New test.
* g++.dg/vect/simd-clone-3.cc: New test.
* g++.dg/vect/simd-clone-4.cc: New test.
* g++.dg/vect/simd-clone-4.h: New test.
* g++.dg/vect/simd-clone-5.cc: New test.
include/
2015-10-13 Jakub Jelinek <jakub@redhat.com>
Ilya Verbin <ilya.verbin@intel.com>
* gomp-constants.h (GOMP_MAP_FLAG_ALWAYS): Define.
(enum gomp_map_kind): Add GOMP_MAP_FIRSTPRIVATE,
GOMP_MAP_FIRSTPRIVATE_INT, GOMP_MAP_USE_DEVICE_PTR,
GOMP_MAP_ZERO_LEN_ARRAY_SECTION, GOMP_MAP_ALWAYS_TO,
GOMP_MAP_ALWAYS_FROM, GOMP_MAP_ALWAYS_TOFROM, GOMP_MAP_STRUCT,
GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION, GOMP_MAP_DELETE,
GOMP_MAP_RELEASE, GOMP_MAP_FIRSTPRIVATE_POINTER.
(GOMP_MAP_ALWAYS_TO_P, GOMP_MAP_ALWAYS_FROM_P): Define.
(GOMP_TASK_FLAG_UNTIED, GOMP_TASK_FLAG_FINAL, GOMP_TASK_FLAG_MERGEABLE,
GOMP_TASK_FLAG_DEPEND, GOMP_TASK_FLAG_PRIORITY, GOMP_TASK_FLAG_UP,
GOMP_TASK_FLAG_GRAINSIZE, GOMP_TASK_FLAG_IF, GOMP_TASK_FLAG_NOGROUP,
GOMP_TARGET_FLAG_NOWAIT, GOMP_TARGET_FLAG_EXIT_DATA,
GOMP_TARGET_FLAG_UPDATE): Define.
libgomp/
2015-10-13 Jakub Jelinek <jakub@redhat.com>
Aldy Hernandez <aldyh@redhat.com>
Ilya Verbin <ilya.verbin@intel.com>
* config/linux/affinity.c (omp_get_place_num_procs,
omp_get_place_proc_ids, gomp_get_place_proc_ids_8): New functions.
* config/linux/doacross.h: New file.
* config/posix/affinity.c (omp_get_place_num_procs,
omp_get_place_proc_ids, gomp_get_place_proc_ids_8): New functions.
* config/posix/doacross.h: New file.
* env.c: Include gomp-constants.h.
(struct gomp_task_icv): Rename run_sched_modifier to
run_sched_chunk_size.
(gomp_max_task_priority_var): New variable.
(parse_schedule): Rename run_sched_modifier to run_sched_chunk_size.
(handle_omp_display_env): Change _OPENMP value from 201307 to
201511. Print OMP_MAX_TASK_PRIORITY.
(initialize_env): Parse OMP_MAX_TASK_PRIORITY.
(omp_set_schedule, omp_get_schedule): Rename modifier argument to
chunk_size and run_sched_modifier to run_sched_chunk_size.
(omp_get_max_task_priority, omp_get_initial_device,
omp_get_num_places, omp_get_place_num, omp_get_partition_num_places,
omp_get_partition_place_nums): New functions.
* fortran.c (omp_set_schedule_, omp_set_schedule_8_,
omp_get_schedule_, omp_get_schedule_8_): Rename modifier argument
to chunk_size.
(omp_get_num_places_, omp_get_place_num_procs_,
omp_get_place_num_procs_8_, omp_get_place_proc_ids_,
omp_get_place_proc_ids_8_, omp_get_place_num_,
omp_get_partition_num_places_, omp_get_partition_place_nums_,
omp_get_partition_place_nums_8_, omp_get_initial_device_,
omp_get_max_task_priority_): New functions.
* libgomp_g.h (GOMP_loop_doacross_static_start,
GOMP_loop_doacross_dynamic_start, GOMP_loop_doacross_guided_start,
GOMP_loop_doacross_runtime_start, GOMP_loop_ull_doacross_static_start,
GOMP_loop_ull_doacross_dynamic_start,
GOMP_loop_ull_doacross_guided_start,
GOMP_loop_ull_doacross_runtime_start, GOMP_doacross_post,
GOMP_doacross_wait, GOMP_doacross_ull_post, GOMP_doacross_wait,
GOMP_taskloop, GOMP_taskloop_ull, GOMP_target_41,
GOMP_target_data_41, GOMP_target_update_41,
GOMP_target_enter_exit_data): New prototypes.
(GOMP_task): Add prototype argument.
* libgomp.h (_LIBGOMP_CHECKING_): Define to 0 if not yet defined.
(struct gomp_doacross_work_share): New type.
(struct gomp_work_share): Add doacross field.
(struct gomp_task_icv): Rename run_sched_modifier to
run_sched_chunk_size.
(enum gomp_task_kind): Rename GOMP_TASK_IFFALSE to
GOMP_TASK_UNDEFERRED. Add comments.
(struct gomp_task_depend_entry): Add comments.
(struct gomp_task): Likewise.
(struct gomp_taskgroup): Likewise.
(struct gomp_target_task): New type.
(struct gomp_team): Add comment.
(gomp_get_place_proc_ids_8, gomp_doacross_init,
gomp_doacross_ull_init, gomp_task_maybe_wait_for_dependencies,
gomp_create_target_task, gomp_target_task_fn): New prototypes.
(struct target_var_desc): New type.
(struct target_mem_desc): Adjust comment. Use struct
target_var_desc instead of splay_tree_key for list.
(REFCOUNT_INFINITY): Define.
(struct splay_tree_key_s): Remove copy_from field.
(struct gomp_device_descr): Add dev2dev_func field.
(enum gomp_map_vars_kind): New enum.
(gomp_map_vars): Add one argument.
* libgomp.map (OMP_4.5): Export omp_get_max_task_priority,
omp_get_max_task_priority_, omp_get_num_places, omp_get_num_places_,
omp_get_place_num_procs, omp_get_place_num_procs_,
omp_get_place_num_procs_8_, omp_get_place_proc_ids,
omp_get_place_proc_ids_, omp_get_place_proc_ids_8_, omp_get_place_num,
omp_get_place_num_, omp_get_partition_num_places,
omp_get_partition_num_places_, omp_get_partition_place_nums,
omp_get_partition_place_nums_, omp_get_partition_place_nums_8_,
omp_get_initial_device, omp_get_initial_device_, omp_target_alloc,
omp_target_free, omp_target_is_present, omp_target_memcpy,
omp_target_memcpy_rect, omp_target_associate_ptr and
omp_target_disassociate_ptr.
(GOMP_4.0.2): Renamed to ...
(GOMP_4.5): ... this. Export GOMP_target_41, GOMP_target_data_41,
GOMP_target_update_41, GOMP_target_enter_exit_data, GOMP_taskloop,
GOMP_taskloop_ull, GOMP_loop_doacross_dynamic_start,
GOMP_loop_doacross_guided_start, GOMP_loop_doacross_runtime_start,
GOMP_loop_doacross_static_start, GOMP_doacross_post,
GOMP_doacross_wait, GOMP_loop_ull_doacross_dynamic_start,
GOMP_loop_ull_doacross_guided_start,
GOMP_loop_ull_doacross_runtime_start,
GOMP_loop_ull_doacross_static_start, GOMP_doacross_ull_post and
GOMP_doacross_ull_wait.
* libgomp.texi: Document omp_get_max_task_priority.
Rename modifier argument to chunk_size for omp_set_schedule and
omp_get_schedule. Document OMP_MAX_TASK_PRIORITY env var.
* loop.c (GOMP_loop_runtime_start): Adjust for run_sched_modifier
to run_sched_chunk_size renaming.
(GOMP_loop_ordered_runtime_start): Likewise.
(gomp_loop_doacross_static_start, gomp_loop_doacross_dynamic_start,
gomp_loop_doacross_guided_start, GOMP_loop_doacross_runtime_start,
GOMP_parallel_loop_runtime_start): New functions.
(GOMP_parallel_loop_runtime): Adjust for run_sched_modifier
to run_sched_chunk_size renaming.
(GOMP_loop_doacross_static_start, GOMP_loop_doacross_dynamic_start,
GOMP_loop_doacross_guided_start): New functions or aliases.
* loop_ull.c (GOMP_loop_ull_runtime_start): Adjust for
run_sched_modifier to run_sched_chunk_size renaming.
(GOMP_loop_ull_ordered_runtime_start): Likewise.
(gomp_loop_ull_doacross_static_start,
gomp_loop_ull_doacross_dynamic_start,
gomp_loop_ull_doacross_guided_start,
GOMP_loop_ull_doacross_runtime_start): New functions.
(GOMP_loop_ull_doacross_static_start,
GOMP_loop_ull_doacross_dynamic_start,
GOMP_loop_ull_doacross_guided_start): New functions or aliases.
* oacc-mem.c (acc_map_data, present_create_copy,
gomp_acc_insert_pointer): Pass GOMP_MAP_VARS_OPENACC instead of false
to gomp_map_vars.
(gomp_acc_remove_pointer): Use copy_from from target_var_desc.
* oacc-parallel.c (GOACC_data_start): Pass GOMP_MAP_VARS_OPENACC
instead of false to gomp_map_vars.
(GOACC_parallel_keyed): Likewise. Use copy_from from target_var_desc.
* omp.h.in (omp_lock_hint_t): New type.
(omp_init_lock_with_hint, omp_init_nest_lock_with_hint,
omp_get_num_places, omp_get_place_num_procs, omp_get_place_proc_ids,
omp_get_place_num, omp_get_partition_num_places,
omp_get_partition_place_nums, omp_get_initial_device,
omp_get_max_task_priority, omp_target_alloc, omp_target_free,
omp_target_is_present, omp_target_memcpy, omp_target_memcpy_rect,
omp_target_associate_ptr, omp_target_disassociate_ptr): New
prototypes.
* omp_lib.f90.in (omp_lock_hint_kind): New parameter.
(omp_lock_hint_none, omp_lock_hint_uncontended,
omp_lock_hint_contended, omp_lock_hint_nonspeculative,
omp_lock_hint_speculative): New parameters.
(omp_init_lock_with_hint, omp_init_nest_lock_with_hint,
omp_get_num_places, omp_get_place_num_procs, omp_get_place_proc_ids,
omp_get_place_num, omp_get_partition_num_places,
omp_get_partition_place_nums, omp_get_initial_device,
omp_get_max_task_priority): New interfaces.
(omp_set_schedule, omp_get_schedule): Rename modifier argument
to chunk_size.
* omp_lib.h.in (omp_lock_hint_kind): New parameter.
(omp_lock_hint_none, omp_lock_hint_uncontended,
omp_lock_hint_contended, omp_lock_hint_nonspeculative,
omp_lock_hint_speculative): New parameters.
(omp_init_lock_with_hint, omp_init_nest_lock_with_hint,
omp_get_num_places, omp_get_place_num_procs, omp_get_place_proc_ids,
omp_get_place_num, omp_get_partition_num_places,
omp_get_partition_place_nums, omp_get_initial_device,
omp_get_max_task_priority): New functions and subroutines.
* ordered.c: Include stdarg.h and string.h.
(MAX_COLLAPSED_BITS): Define.
(gomp_doacross_init, GOMP_doacross_post, GOMP_doacross_wait,
gomp_doacross_ull_init, GOMP_doacross_ull_post,
GOMP_doacross_ull_wait): New functions.
* target.c: Include errno.h.
(resolve_device): If device is not initialized, call
gomp_init_device on it.
(gomp_map_lookup): New function.
(gomp_map_vars_existing): Add tgt_var argument, fill it in.
Don't bump refcount if REFCOUNT_INFINITY. Handle
GOMP_MAP_ALWAYS_TO_P.
(get_kind): Rename is_openacc argument to short_mapkind.
(gomp_map_pointer): Use gomp_map_lookup.
(gomp_map_fields_existing): New function.
(gomp_map_vars): Rename is_openacc argument to short_mapkind
and is_target to pragma_kind. Handle GOMP_MAP_VARS_ENTER_DATA,
handle GOMP_MAP_FIRSTPRIVATE_INT, GOMP_MAP_STRUCT,
GOMP_MAP_USE_DEVICE_PTR, GOMP_MAP_ZERO_LEN_ARRAY_SECTION.
Adjust for tgt->list changed type and copy_from living in there.
(gomp_copy_from_async): Adjust for tgt->list changed type and
copy_from living in there.
(gomp_unmap_vars): Likewise.
(gomp_update): Likewise. Rename is_openacc argument to
short_mapkind. Don't fail if object is not mapped.
(gomp_load_image_to_device): Initialize refcount to
REFCOUNT_INFINITY.
(gomp_target_fallback): New function.
(gomp_get_target_fn_addr): Likewise.
(GOMP_target): Adjust gomp_map_vars caller, use
gomp_get_target_fn_addr and gomp_target_fallback.
(GOMP_target_41): New function.
(gomp_target_data_fallback): New function.
(GOMP_target_data): Use it, adjust gomp_map_vars caller.
(GOMP_target_data_41): New function.
(GOMP_target_update): Adjust gomp_update caller.
(GOMP_target_update_41): New function.
(gomp_exit_data, GOMP_target_enter_exit_data,
gomp_target_task_fn, omp_target_alloc, omp_target_free,
omp_target_is_present, omp_target_memcpy,
omp_target_memcpy_rect_worker, omp_target_memcpy_rect,
omp_target_associate_ptr, omp_target_disassociate_ptr,
gomp_load_plugin_for_device): New functions.
* task.c: Include gomp-constants.h. Include taskloop.c
twice to get GOMP_taskloop and GOMP_taskloop_ull definitions.
(gomp_task_handle_depend): New function.
(GOMP_task): Use it. Add priority argument. Use
gomp-constant.h constants instead of hardcoded numbers.
Rename GOMP_TASK_IFFALSE to GOMP_TASK_UNDEFERRED.
(gomp_create_target_task): New function.
(verify_children_queue, verify_taskgroup_queue,
verify_task_queue): New functions.
(gomp_task_run_pre): Call verify_*_queue functions.
If an upcoming tied task is about to leave the sibling or
taskgroup queues in an invalid state, adjust appropriately.
Remove taskgroup argument. Add comments.
(gomp_task_run_post_handle_dependers): Add comments.
(gomp_task_run_post_remove_parent): Likewise.
(gomp_barrier_handle_tasks): Adjust gomp_task_run_pre caller.
(GOMP_taskwait): Likewise. Add comments.
(gomp_task_maybe_wait_for_dependencies): Fix scheduling
problem such that the first non parent_depends_on task does not
end up at the end of the children queue.
(GOMP_taskgroup_start): Rename GOMP_TASK_IFFALSE to
GOMP_TASK_UNDEFERRED.
(GOMP_taskgroup_end): Adjust gomp_task_run_pre caller.
* taskloop.c: New file.
* testsuite/lib/libgomp.exp
(check_effective_target_offload_device_nonshared_as): New proc.
* testsuite/libgomp.c/affinity-2.c: New test.
* testsuite/libgomp.c/doacross-1.c: New test.
* testsuite/libgomp.c/doacross-2.c: New test.
* testsuite/libgomp.c/examples-4/declare_target-1.c (fib_wrapper):
Add map clause to target.
* testsuite/libgomp.c/examples-4/declare_target-4.c (accum): Likewise.
* testsuite/libgomp.c/examples-4/declare_target-5.c (accum): Likewise.
* testsuite/libgomp.c/examples-4/device-1.c (main): Likewise.
* testsuite/libgomp.c/examples-4/device-3.c (main): Likewise.
* testsuite/libgomp.c/examples-4/target_data-3.c (gramSchmidt):
Likewise.
* testsuite/libgomp.c/examples-4/teams-2.c (dotprod): Likewise.
* testsuite/libgomp.c/examples-4/teams-3.c (dotprod): Likewise.
* testsuite/libgomp.c/examples-4/teams-4.c (dotprod): Likewise.
* testsuite/libgomp.c/for-2.h (OMPTGT, OMPTO, OMPFROM): Define if
not defined. Use those where needed.
* testsuite/libgomp.c/for-4.c: New test.
* testsuite/libgomp.c/for-5.c: New test.
* testsuite/libgomp.c/for-6.c: New test.
* testsuite/libgomp.c/linear-1.c: New test.
* testsuite/libgomp.c/ordered-4.c: New test.
* testsuite/libgomp.c/pr66199-2.c (f2): Adjust for linear clause
only allowed on the loop iterator.
* testsuite/libgomp.c/pr66199-3.c: New test.
* testsuite/libgomp.c/pr66199-4.c: New test.
* testsuite/libgomp.c/reduction-7.c: New test.
* testsuite/libgomp.c/reduction-8.c: New test.
* testsuite/libgomp.c/reduction-9.c: New test.
* testsuite/libgomp.c/reduction-10.c: New test.
* testsuite/libgomp.c/target-1.c (fn2, fn3, fn4): Add
map(tofrom:s).
* testsuite/libgomp.c/target-2.c (fn2, fn3, fn4): Likewise.
* testsuite/libgomp.c/target-7.c (foo): Add map(h) where needed.
* testsuite/libgomp.c/target-11.c: New test.
* testsuite/libgomp.c/target-12.c: New test.
* testsuite/libgomp.c/target-13.c: New test.
* testsuite/libgomp.c/target-14.c: New test.
* testsuite/libgomp.c/target-15.c: New test.
* testsuite/libgomp.c/target-16.c: New test.
* testsuite/libgomp.c/target-17.c: New test.
* testsuite/libgomp.c/target-18.c: New test.
* testsuite/libgomp.c/target-19.c: New test.
* testsuite/libgomp.c/target-20.c: New test.
* testsuite/libgomp.c/target-21.c: New test.
* testsuite/libgomp.c/target-22.c: New test.
* testsuite/libgomp.c/target-23.c: New test.
* testsuite/libgomp.c/target-24.c: New test.
* testsuite/libgomp.c/target-25.c: New test.
* testsuite/libgomp.c/target-26.c: New test.
* testsuite/libgomp.c/target-27.c: New test.
* testsuite/libgomp.c/taskloop-1.c: New test.
* testsuite/libgomp.c/taskloop-2.c: New test.
* testsuite/libgomp.c/taskloop-3.c: New test.
* testsuite/libgomp.c/taskloop-4.c: New test.
* testsuite/libgomp.c++/ctor-13.C: New test.
* testsuite/libgomp.c++/doacross-1.C: New test.
* testsuite/libgomp.c++/examples-4/declare_target-2.C:
Replace offload_device with offload_device_nonshared_as.
* testsuite/libgomp.c++/for-12.C: New test.
* testsuite/libgomp.c++/for-13.C: New test.
* testsuite/libgomp.c++/for-14.C: New test.
* testsuite/libgomp.c++/linear-1.C: New test.
* testsuite/libgomp.c++/member-1.C: New test.
* testsuite/libgomp.c++/member-2.C: New test.
* testsuite/libgomp.c++/member-3.C: New test.
* testsuite/libgomp.c++/member-4.C: New test.
* testsuite/libgomp.c++/member-5.C: New test.
* testsuite/libgomp.c++/ordered-1.C: New test.
* testsuite/libgomp.c++/reduction-5.C: New test.
* testsuite/libgomp.c++/reduction-6.C: New test.
* testsuite/libgomp.c++/reduction-7.C: New test.
* testsuite/libgomp.c++/reduction-8.C: New test.
* testsuite/libgomp.c++/reduction-9.C: New test.
* testsuite/libgomp.c++/reduction-10.C: New test.
* testsuite/libgomp.c++/reference-1.C: New test.
* testsuite/libgomp.c++/simd14.C: New test.
* testsuite/libgomp.c++/target-2.C (fn2): Add map(tofrom: s) clause.
* testsuite/libgomp.c++/target-5.C: New test.
* testsuite/libgomp.c++/target-6.C: New test.
* testsuite/libgomp.c++/target-7.C: New test.
* testsuite/libgomp.c++/target-8.C: New test.
* testsuite/libgomp.c++/target-9.C: New test.
* testsuite/libgomp.c++/target-10.C: New test.
* testsuite/libgomp.c++/target-11.C: New test.
* testsuite/libgomp.c++/target-12.C: New test.
* testsuite/libgomp.c++/taskloop-1.C: New test.
* testsuite/libgomp.c++/taskloop-2.C: New test.
* testsuite/libgomp.c++/taskloop-3.C: New test.
* testsuite/libgomp.c++/taskloop-4.C: New test.
* testsuite/libgomp.c++/taskloop-5.C: New test.
* testsuite/libgomp.c++/taskloop-6.C: New test.
* testsuite/libgomp.c++/taskloop-7.C: New test.
* testsuite/libgomp.c++/taskloop-8.C: New test.
* testsuite/libgomp.c++/taskloop-9.C: New test.
* testsuite/libgomp.fortran/affinity1.f90: New test.
* testsuite/libgomp.fortran/affinity2.f90: New test.
liboffloadmic/
2015-10-13 Ilya Verbin <ilya.verbin@intel.com>
* plugin/libgomp-plugin-intelmic.cpp (GOMP_OFFLOAD_dev2dev): New
function.
* plugin/offload_target_main.cpp (__offload_target_tgt2tgt): New
static function, register it in liboffloadmic.
From-SVN: r228777
Diffstat (limited to 'libgomp/target.c')
-rw-r--r-- | libgomp/target.c | 1274 |
1 files changed, 1115 insertions, 159 deletions
diff --git a/libgomp/target.c b/libgomp/target.c index 758ece5d78c..de6a2c9c9c5 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -38,6 +38,7 @@ #endif #include <string.h> #include <assert.h> +#include <errno.h> #ifdef PLUGIN_SUPPORT #include <dlfcn.h> @@ -133,17 +134,48 @@ resolve_device (int device_id) if (device_id < 0 || device_id >= gomp_get_num_devices ()) return NULL; + gomp_mutex_lock (&devices[device_id].lock); + if (!devices[device_id].is_initialized) + gomp_init_device (&devices[device_id]); + gomp_mutex_unlock (&devices[device_id].lock); + return &devices[device_id]; } -/* Handle the case where splay_tree_lookup found oldn for newn. +static inline splay_tree_key +gomp_map_lookup (splay_tree mem_map, splay_tree_key key) +{ + if (key->host_start != key->host_end) + return splay_tree_lookup (mem_map, key); + + key->host_end++; + splay_tree_key n = splay_tree_lookup (mem_map, key); + key->host_end--; + if (n) + return n; + key->host_start--; + n = splay_tree_lookup (mem_map, key); + key->host_start++; + if (n) + return n; + return splay_tree_lookup (mem_map, key); +} + +/* Handle the case where gomp_map_lookup found oldn for newn. Helper function of gomp_map_vars. */ static inline void gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn, - splay_tree_key newn, unsigned char kind) + splay_tree_key newn, struct target_var_desc *tgt_var, + unsigned char kind) { + tgt_var->key = oldn; + tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind); + tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind); + tgt_var->offset = newn->host_start - oldn->host_start; + tgt_var->length = newn->host_end - newn->host_start; + if ((kind & GOMP_MAP_FLAG_FORCE) || oldn->host_start > newn->host_start || oldn->host_end < newn->host_end) @@ -154,14 +186,22 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn, (void *) newn->host_start, (void *) newn->host_end, (void *) oldn->host_start, (void *) oldn->host_end); } - oldn->refcount++; + + if (GOMP_MAP_ALWAYS_TO_P (kind)) + devicep->host2dev_func (devicep->target_id, + (void *) (oldn->tgt->tgt_start + oldn->tgt_offset + + newn->host_start - oldn->host_start), + (void *) newn->host_start, + newn->host_end - newn->host_start); + if (oldn->refcount != REFCOUNT_INFINITY) + oldn->refcount++; } static int -get_kind (bool is_openacc, void *kinds, int idx) +get_kind (bool short_mapkind, void *kinds, int idx) { - return is_openacc ? ((unsigned short *) kinds)[idx] - : ((unsigned char *) kinds)[idx]; + return short_mapkind ? ((unsigned short *) kinds)[idx] + : ((unsigned char *) kinds)[idx]; } static void @@ -185,20 +225,8 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr, } /* Add bias to the pointer value. */ cur_node.host_start += bias; - cur_node.host_end = cur_node.host_start + 1; - splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); - if (n == NULL) - { - /* Could be possibly zero size array section. */ - cur_node.host_end--; - n = splay_tree_lookup (mem_map, &cur_node); - if (n == NULL) - { - cur_node.host_start--; - n = splay_tree_lookup (mem_map, &cur_node); - cur_node.host_start++; - } - } + cur_node.host_end = cur_node.host_start; + splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); if (n == NULL) { gomp_mutex_unlock (&devicep->lock); @@ -218,20 +246,81 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr, sizeof (void *)); } +static void +gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n, + size_t first, size_t i, void **hostaddrs, + size_t *sizes, void *kinds) +{ + struct gomp_device_descr *devicep = tgt->device_descr; + struct splay_tree_s *mem_map = &devicep->mem_map; + struct splay_tree_key_s cur_node; + int kind; + const bool short_mapkind = true; + const int typemask = short_mapkind ? 0xff : 0x7; + + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = cur_node.host_start + sizes[i]; + splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node); + kind = get_kind (short_mapkind, kinds, i); + if (n2 + && n2->tgt == n->tgt + && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) + { + gomp_map_vars_existing (devicep, n2, &cur_node, + &tgt->list[i], kind & typemask); + return; + } + if (sizes[i] == 0) + { + if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1]) + { + cur_node.host_start--; + n2 = splay_tree_lookup (mem_map, &cur_node); + cur_node.host_start++; + if (n2 + && n2->tgt == n->tgt + && n2->host_start - n->host_start + == n2->tgt_offset - n->tgt_offset) + { + gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i], + kind & typemask); + return; + } + } + cur_node.host_end++; + n2 = splay_tree_lookup (mem_map, &cur_node); + cur_node.host_end--; + if (n2 + && n2->tgt == n->tgt + && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) + { + gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i], + kind & typemask); + return; + } + } + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("Trying to map into device [%p..%p) structure element when " + "other mapped elements from the same structure weren't mapped " + "together with it", (void *) cur_node.host_start, + (void *) cur_node.host_end); +} + attribute_hidden struct target_mem_desc * gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds, - bool is_openacc, bool is_target) + bool short_mapkind, enum gomp_map_vars_kind pragma_kind) { size_t i, tgt_align, tgt_size, not_found_cnt = 0; - const int rshift = is_openacc ? 8 : 3; - const int typemask = is_openacc ? 0xff : 0x7; + bool has_firstprivate = false; + const int rshift = short_mapkind ? 8 : 3; + const int typemask = short_mapkind ? 0xff : 0x7; struct splay_tree_s *mem_map = &devicep->mem_map; struct splay_tree_key_s cur_node; struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum); tgt->list_count = mapnum; - tgt->refcount = 1; + tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1; tgt->device_descr = devicep; if (mapnum == 0) @@ -239,7 +328,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, tgt_align = sizeof (void *); tgt_size = 0; - if (is_target) + if (pragma_kind == GOMP_MAP_VARS_TARGET) { size_t align = 4 * sizeof (void *); tgt_align = align; @@ -250,10 +339,61 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, for (i = 0; i < mapnum; i++) { - int kind = get_kind (is_openacc, kinds, i); - if (hostaddrs[i] == NULL) + int kind = get_kind (short_mapkind, kinds, i); + if (hostaddrs[i] == NULL + || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT) { - tgt->list[i] = NULL; + tgt->list[i].key = NULL; + tgt->list[i].offset = ~(uintptr_t) 0; + continue; + } + else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR) + { + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = cur_node.host_start; + splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); + if (n == NULL) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("use_device_ptr pointer wasn't mapped"); + } + cur_node.host_start -= n->host_start; + hostaddrs[i] + = (void *) (n->tgt->tgt_start + n->tgt_offset + + cur_node.host_start); + tgt->list[i].key = NULL; + tgt->list[i].offset = ~(uintptr_t) 0; + continue; + } + else if ((kind & typemask) == GOMP_MAP_STRUCT) + { + size_t first = i + 1; + size_t last = i + sizes[i]; + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = (uintptr_t) hostaddrs[last] + + sizes[last]; + tgt->list[i].key = NULL; + tgt->list[i].offset = ~(uintptr_t) 2; + splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); + if (n == NULL) + { + size_t align = (size_t) 1 << (kind >> rshift); + if (tgt_align < align) + tgt_align = align; + tgt_size -= (uintptr_t) hostaddrs[first] + - (uintptr_t) hostaddrs[i]; + tgt_size = (tgt_size + align - 1) & ~(align - 1); + tgt_size += cur_node.host_end - (uintptr_t) hostaddrs[i]; + not_found_cnt += last - i; + for (i = first; i <= last; i++) + tgt->list[i].key = NULL; + i--; + continue; + } + for (i = first; i <= last; i++) + gomp_map_fields_existing (tgt, n, first, i, hostaddrs, + sizes, kinds); + i--; continue; } cur_node.host_start = (uintptr_t) hostaddrs[i]; @@ -261,15 +401,37 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, cur_node.host_end = cur_node.host_start + sizes[i]; else cur_node.host_end = cur_node.host_start + sizeof (void *); - splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); - if (n) + if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE) + { + tgt->list[i].key = NULL; + + size_t align = (size_t) 1 << (kind >> rshift); + if (tgt_align < align) + tgt_align = align; + tgt_size = (tgt_size + align - 1) & ~(align - 1); + tgt_size += cur_node.host_end - cur_node.host_start; + has_firstprivate = true; + continue; + } + splay_tree_key n; + if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION) { - tgt->list[i] = n; - gomp_map_vars_existing (devicep, n, &cur_node, kind & typemask); + n = gomp_map_lookup (mem_map, &cur_node); + if (!n) + { + tgt->list[i].key = NULL; + tgt->list[i].offset = ~(uintptr_t) 1; + continue; + } } else + n = splay_tree_lookup (mem_map, &cur_node); + if (n) + gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i], + kind & typemask); + else { - tgt->list[i] = NULL; + tgt->list[i].key = NULL; size_t align = (size_t) 1 << (kind >> rshift); not_found_cnt++; @@ -281,7 +443,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, { size_t j; for (j = i + 1; j < mapnum; j++) - if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j) + if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j) & typemask)) break; else if ((uintptr_t) hostaddrs[j] < cur_node.host_start @@ -290,7 +452,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, break; else { - tgt->list[j] = NULL; + tgt->list[j].key = NULL; i++; } } @@ -308,7 +470,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, tgt->tgt_start = (uintptr_t) tgt->to_free; tgt->tgt_end = tgt->tgt_start + sizes[0]; } - else if (not_found_cnt || is_target) + else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET) { /* Allocate tgt_align aligned tgt_size block of memory. */ /* FIXME: Perhaps change interface to allocate properly aligned @@ -327,22 +489,74 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, } tgt_size = 0; - if (is_target) + if (pragma_kind == GOMP_MAP_VARS_TARGET) tgt_size = mapnum * sizeof (void *); tgt->array = NULL; - if (not_found_cnt) + if (not_found_cnt || has_firstprivate) { - tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array)); + if (not_found_cnt) + tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array)); splay_tree_node array = tgt->array; - size_t j; + size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0; + uintptr_t field_tgt_base = 0; for (i = 0; i < mapnum; i++) - if (tgt->list[i] == NULL) + if (tgt->list[i].key == NULL) { - int kind = get_kind (is_openacc, kinds, i); + int kind = get_kind (short_mapkind, kinds, i); if (hostaddrs[i] == NULL) continue; + switch (kind & typemask) + { + size_t align, len, first, last; + splay_tree_key n; + case GOMP_MAP_FIRSTPRIVATE: + align = (size_t) 1 << (kind >> rshift); + tgt_size = (tgt_size + align - 1) & ~(align - 1); + tgt->list[i].offset = tgt_size; + len = sizes[i]; + devicep->host2dev_func (devicep->target_id, + (void *) (tgt->tgt_start + tgt_size), + (void *) hostaddrs[i], len); + tgt_size += len; + continue; + case GOMP_MAP_FIRSTPRIVATE_INT: + case GOMP_MAP_USE_DEVICE_PTR: + case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: + continue; + case GOMP_MAP_STRUCT: + first = i + 1; + last = i + sizes[i]; + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = (uintptr_t) hostaddrs[last] + + sizes[last]; + if (tgt->list[first].key != NULL) + continue; + n = splay_tree_lookup (mem_map, &cur_node); + if (n == NULL) + { + size_t align = (size_t) 1 << (kind >> rshift); + tgt_size -= (uintptr_t) hostaddrs[first] + - (uintptr_t) hostaddrs[i]; + tgt_size = (tgt_size + align - 1) & ~(align - 1); + tgt_size += (uintptr_t) hostaddrs[first] + - (uintptr_t) hostaddrs[i]; + field_tgt_base = (uintptr_t) hostaddrs[first]; + field_tgt_offset = tgt_size; + field_tgt_clear = last; + tgt_size += cur_node.host_end + - (uintptr_t) hostaddrs[first]; + continue; + } + for (i = first; i <= last; i++) + gomp_map_fields_existing (tgt, n, first, i, hostaddrs, + sizes, kinds); + i--; + continue; + default: + break; + } splay_tree_key k = &array->key; k->host_start = (uintptr_t) hostaddrs[i]; if (!GOMP_MAP_POINTER_P (kind & typemask)) @@ -351,19 +565,31 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, k->host_end = k->host_start + sizeof (void *); splay_tree_key n = splay_tree_lookup (mem_map, k); if (n) - { - tgt->list[i] = n; - gomp_map_vars_existing (devicep, n, k, kind & typemask); - } + gomp_map_vars_existing (devicep, n, k, &tgt->list[i], + kind & typemask); else { size_t align = (size_t) 1 << (kind >> rshift); - tgt->list[i] = k; - tgt_size = (tgt_size + align - 1) & ~(align - 1); + tgt->list[i].key = k; k->tgt = tgt; - k->tgt_offset = tgt_size; - tgt_size += k->host_end - k->host_start; - k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask); + if (field_tgt_clear != ~(size_t) 0) + { + k->tgt_offset = k->host_start - field_tgt_base + + field_tgt_offset; + if (i == field_tgt_clear) + field_tgt_clear = ~(size_t) 0; + } + else + { + tgt_size = (tgt_size + align - 1) & ~(align - 1); + k->tgt_offset = tgt_size; + tgt_size += k->host_end - k->host_start; + } + tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask); + tgt->list[i].always_copy_from + = GOMP_MAP_ALWAYS_FROM_P (kind & typemask); + tgt->list[i].offset = 0; + tgt->list[i].length = k->host_end - k->host_start; k->refcount = 1; k->async_refcount = 0; tgt->refcount++; @@ -376,11 +602,14 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, case GOMP_MAP_FROM: case GOMP_MAP_FORCE_ALLOC: case GOMP_MAP_FORCE_FROM: + case GOMP_MAP_ALWAYS_FROM: break; case GOMP_MAP_TO: case GOMP_MAP_TOFROM: case GOMP_MAP_FORCE_TO: case GOMP_MAP_FORCE_TOFROM: + case GOMP_MAP_ALWAYS_TO: + case GOMP_MAP_ALWAYS_TOFROM: /* FIXME: Perhaps add some smarts, like if copying several adjacent fields from host to target, use some host buffer to avoid sending each var individually. */ @@ -403,7 +632,8 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, k->host_end - k->host_start); for (j = i + 1; j < mapnum; j++) - if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j) + if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, + j) & typemask)) break; else if ((uintptr_t) hostaddrs[j] < k->host_start @@ -412,8 +642,11 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, break; else { - tgt->list[j] = k; - k->refcount++; + tgt->list[j].key = k; + tgt->list[j].copy_from = false; + tgt->list[j].always_copy_from = false; + if (k->refcount != REFCOUNT_INFINITY) + k->refcount++; gomp_map_pointer (tgt, (uintptr_t) *(void **) hostaddrs[j], k->tgt_offset @@ -460,15 +693,30 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, } } - if (is_target) + if (pragma_kind == GOMP_MAP_VARS_TARGET) { for (i = 0; i < mapnum; i++) { - if (tgt->list[i] == NULL) - cur_node.tgt_offset = (uintptr_t) NULL; + if (tgt->list[i].key == NULL) + { + if (tgt->list[i].offset == ~(uintptr_t) 0) + cur_node.tgt_offset = (uintptr_t) hostaddrs[i]; + else if (tgt->list[i].offset == ~(uintptr_t) 1) + cur_node.tgt_offset = 0; + else if (tgt->list[i].offset == ~(uintptr_t) 2) + cur_node.tgt_offset = tgt->list[i + 1].key->tgt->tgt_start + + tgt->list[i + 1].key->tgt_offset + + tgt->list[i + 1].offset + + (uintptr_t) hostaddrs[i] + - (uintptr_t) hostaddrs[i + 1]; + else + cur_node.tgt_offset = tgt->tgt_start + + tgt->list[i].offset; + } else - cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start - + tgt->list[i]->tgt_offset; + cur_node.tgt_offset = tgt->list[i].key->tgt->tgt_start + + tgt->list[i].key->tgt_offset + + tgt->list[i].offset; /* FIXME: see above FIXME comment. */ devicep->host2dev_func (devicep->target_id, (void *) (tgt->tgt_start @@ -478,6 +726,15 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, } } + /* If the variable from "omp target enter data" map-list was already mapped, + tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or + gomp_exit_data. */ + if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0) + { + free (tgt); + tgt = NULL; + } + gomp_mutex_unlock (&devicep->lock); return tgt; } @@ -508,17 +765,17 @@ gomp_copy_from_async (struct target_mem_desc *tgt) gomp_mutex_lock (&devicep->lock); for (i = 0; i < tgt->list_count; i++) - if (tgt->list[i] == NULL) + if (tgt->list[i].key == NULL) ; - else if (tgt->list[i]->refcount > 1) + else if (tgt->list[i].key->refcount > 1) { - tgt->list[i]->refcount--; - tgt->list[i]->async_refcount++; + tgt->list[i].key->refcount--; + tgt->list[i].key->async_refcount++; } else { - splay_tree_key k = tgt->list[i]; - if (k->copy_from) + splay_tree_key k = tgt->list[i].key; + if (tgt->list[i].copy_from) devicep->dev2host_func (devicep->target_id, (void *) k->host_start, (void *) (k->tgt->tgt_start + k->tgt_offset), k->host_end - k->host_start); @@ -546,25 +803,41 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom) size_t i; for (i = 0; i < tgt->list_count; i++) - if (tgt->list[i] == NULL) - ; - else if (tgt->list[i]->refcount > 1) - tgt->list[i]->refcount--; - else if (tgt->list[i]->async_refcount > 0) - tgt->list[i]->async_refcount--; - else - { - splay_tree_key k = tgt->list[i]; - if (k->copy_from && do_copyfrom) - devicep->dev2host_func (devicep->target_id, (void *) k->host_start, - (void *) (k->tgt->tgt_start + k->tgt_offset), - k->host_end - k->host_start); - splay_tree_remove (&devicep->mem_map, k); - if (k->tgt->refcount > 1) - k->tgt->refcount--; - else - gomp_unmap_tgt (k->tgt); - } + { + splay_tree_key k = tgt->list[i].key; + if (k == NULL) + continue; + + bool do_unmap = false; + if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY) + k->refcount--; + else if (k->refcount == 1) + { + if (k->async_refcount > 0) + k->async_refcount--; + else + { + k->refcount--; + do_unmap = true; + } + } + + if ((do_unmap && do_copyfrom && tgt->list[i].copy_from) + || tgt->list[i].always_copy_from) + devicep->dev2host_func (devicep->target_id, + (void *) (k->host_start + tgt->list[i].offset), + (void *) (k->tgt->tgt_start + k->tgt_offset + + tgt->list[i].offset), + tgt->list[i].length); + if (do_unmap) + { + splay_tree_remove (&devicep->mem_map, k); + if (k->tgt->refcount > 1) + k->tgt->refcount--; + else + gomp_unmap_tgt (k->tgt); + } + } if (tgt->refcount > 1) tgt->refcount--; @@ -576,11 +849,11 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom) static void gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, - size_t *sizes, void *kinds, bool is_openacc) + size_t *sizes, void *kinds, bool short_mapkind) { size_t i; struct splay_tree_key_s cur_node; - const int typemask = is_openacc ? 0xff : 0x7; + const int typemask = short_mapkind ? 0xff : 0x7; if (!devicep) return; @@ -597,7 +870,7 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node); if (n) { - int kind = get_kind (is_openacc, kinds, i); + int kind = get_kind (short_mapkind, kinds, i); if (n->host_start > cur_node.host_start || n->host_end < cur_node.host_end) { @@ -626,13 +899,6 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, - n->host_start), cur_node.host_end - cur_node.host_start); } - else - { - gomp_mutex_unlock (&devicep->lock); - gomp_fatal ("Trying to update [%p..%p) object that is not mapped", - (void *) cur_node.host_start, - (void *) cur_node.host_end); - } } gomp_mutex_unlock (&devicep->lock); } @@ -678,7 +944,7 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, /* Insert host-target address mapping into splay tree. */ struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array)); - tgt->refcount = 1; + tgt->refcount = REFCOUNT_INFINITY; tgt->tgt_start = 0; tgt->tgt_end = 0; tgt->to_free = NULL; @@ -694,9 +960,8 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, k->host_end = k->host_start + 1; k->tgt = tgt; k->tgt_offset = target_table[i].start; - k->refcount = 1; + k->refcount = REFCOUNT_INFINITY; k->async_refcount = 0; - k->copy_from = false; array->left = NULL; array->right = NULL; splay_tree_insert (&devicep->mem_map, array); @@ -720,9 +985,8 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, k->host_end = k->host_start + (uintptr_t) host_var_table[i * 2 + 1]; k->tgt = tgt; k->tgt_offset = target_var->start; - k->refcount = 1; + k->refcount = REFCOUNT_INFINITY; k->async_refcount = 0; - k->copy_from = false; array->left = NULL; array->right = NULL; splay_tree_insert (&devicep->mem_map, array); @@ -945,6 +1209,47 @@ gomp_fini_device (struct gomp_device_descr *devicep) devicep->is_initialized = false; } +/* Host fallback for GOMP_target{,_41} routines. */ + +static void +gomp_target_fallback (void (*fn) (void *), void **hostaddrs) +{ + struct gomp_thread old_thr, *thr = gomp_thread (); + old_thr = *thr; + memset (thr, '\0', sizeof (*thr)); + if (gomp_places_list) + { + thr->place = old_thr.place; + thr->ts.place_partition_len = gomp_places_list_len; + } + fn (hostaddrs); + gomp_free_thread (thr); + *thr = old_thr; +} + +/* Helper function of GOMP_target{,_41} routines. */ + +static void * +gomp_get_target_fn_addr (struct gomp_device_descr *devicep, + void (*host_fn) (void *)) +{ + if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC) + return (void *) host_fn; + else + { + gomp_mutex_lock (&devicep->lock); + struct splay_tree_key_s k; + k.host_start = (uintptr_t) host_fn; + k.host_end = k.host_start + 1; + splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k); + gomp_mutex_unlock (&devicep->lock); + if (tgt_fn == NULL) + gomp_fatal ("Target function wasn't mapped"); + + return (void *) tgt_fn->tgt_offset; + } +} + /* Called when encountering a target directive. If DEVICE is GOMP_DEVICE_ICV, it means use device-var ICV. If it is GOMP_DEVICE_HOST_FALLBACK (or any value @@ -964,51 +1269,85 @@ GOMP_target (int device, void (*fn) (void *), const void *unused, if (devicep == NULL || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return gomp_target_fallback (fn, hostaddrs); + + void *fn_addr = gomp_get_target_fn_addr (devicep, fn); + + struct target_mem_desc *tgt_vars + = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, + GOMP_MAP_VARS_TARGET); + struct gomp_thread old_thr, *thr = gomp_thread (); + old_thr = *thr; + memset (thr, '\0', sizeof (*thr)); + if (gomp_places_list) { - /* Host fallback. */ - struct gomp_thread old_thr, *thr = gomp_thread (); - old_thr = *thr; - memset (thr, '\0', sizeof (*thr)); - if (gomp_places_list) - { - thr->place = old_thr.place; - thr->ts.place_partition_len = gomp_places_list_len; - } - fn (hostaddrs); - gomp_free_thread (thr); - *thr = old_thr; - return; + thr->place = old_thr.place; + thr->ts.place_partition_len = gomp_places_list_len; } + devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start); + gomp_free_thread (thr); + *thr = old_thr; + gomp_unmap_vars (tgt_vars, true); +} - gomp_mutex_lock (&devicep->lock); - if (!devicep->is_initialized) - gomp_init_device (devicep); - gomp_mutex_unlock (&devicep->lock); +void +GOMP_target_41 (int device, void (*fn) (void *), size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned short *kinds, + unsigned int flags, void **depend) +{ + struct gomp_device_descr *devicep = resolve_device (device); - void *fn_addr; + /* If there are depend clauses, but nowait is not present, + block the parent task until the dependencies are resolved + and then just continue with the rest of the function as if it + is a merged task. */ + if (depend != NULL) + { + struct gomp_thread *thr = gomp_thread (); + if (thr->task && thr->task->depend_hash) + gomp_task_maybe_wait_for_dependencies (depend); + } - if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC) - fn_addr = (void *) fn; - else + if (devicep == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) { - gomp_mutex_lock (&devicep->lock); - struct splay_tree_key_s k; - k.host_start = (uintptr_t) fn; - k.host_end = k.host_start + 1; - splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k); - if (tgt_fn == NULL) + size_t i, tgt_align = 0, tgt_size = 0; + char *tgt = NULL; + for (i = 0; i < mapnum; i++) + if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) + { + size_t align = (size_t) 1 << (kinds[i] >> 8); + if (tgt_align < align) + tgt_align = align; + tgt_size = (tgt_size + align - 1) & ~(align - 1); + tgt_size += sizes[i]; + } + if (tgt_align) { - gomp_mutex_unlock (&devicep->lock); - gomp_fatal ("Target function wasn't mapped"); + tgt = gomp_alloca (tgt_size + tgt_align - 1); + uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); + if (al) + tgt += tgt_align - al; + tgt_size = 0; + for (i = 0; i < mapnum; i++) + if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) + { + size_t align = (size_t) 1 << (kinds[i] >> 8); + tgt_size = (tgt_size + align - 1) & ~(align - 1); + memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); + hostaddrs[i] = tgt + tgt_size; + tgt_size = tgt_size + sizes[i]; + } } - gomp_mutex_unlock (&devicep->lock); - - fn_addr = (void *) tgt_fn->tgt_offset; + gomp_target_fallback (fn, hostaddrs); + return; } + void *fn_addr = gomp_get_target_fn_addr (devicep, fn); + struct target_mem_desc *tgt_vars - = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, - true); + = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, + GOMP_MAP_VARS_TARGET); struct gomp_thread old_thr, *thr = gomp_thread (); old_thr = *thr; memset (thr, '\0', sizeof (*thr)); @@ -1023,6 +1362,26 @@ GOMP_target (int device, void (*fn) (void *), const void *unused, gomp_unmap_vars (tgt_vars, true); } +/* Host fallback for GOMP_target_data{,_41} routines. */ + +static void +gomp_target_data_fallback (void) +{ + struct gomp_task_icv *icv = gomp_icv (false); + if (icv->target_data) + { + /* Even when doing a host fallback, if there are any active + #pragma omp target data constructs, need to remember the + new #pragma omp target data, otherwise GOMP_target_end_data + would get out of sync. */ + struct target_mem_desc *tgt + = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, + GOMP_MAP_VARS_DATA); + tgt->prev = icv->target_data; + icv->target_data = tgt; + } +} + void GOMP_target_data (int device, const void *unused, size_t mapnum, void **hostaddrs, size_t *sizes, unsigned char *kinds) @@ -1031,31 +1390,29 @@ GOMP_target_data (int device, const void *unused, size_t mapnum, if (devicep == NULL || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) - { - /* Host fallback. */ - struct gomp_task_icv *icv = gomp_icv (false); - if (icv->target_data) - { - /* Even when doing a host fallback, if there are any active - #pragma omp target data constructs, need to remember the - new #pragma omp target data, otherwise GOMP_target_end_data - would get out of sync. */ - struct target_mem_desc *tgt - = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false); - tgt->prev = icv->target_data; - icv->target_data = tgt; - } - return; - } - - gomp_mutex_lock (&devicep->lock); - if (!devicep->is_initialized) - gomp_init_device (devicep); - gomp_mutex_unlock (&devicep->lock); + return gomp_target_data_fallback (); struct target_mem_desc *tgt = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, - false); + GOMP_MAP_VARS_DATA); + struct gomp_task_icv *icv = gomp_icv (true); + tgt->prev = icv->target_data; + icv->target_data = tgt; +} + +void +GOMP_target_data_41 (int device, size_t mapnum, void **hostaddrs, size_t *sizes, + unsigned short *kinds) +{ + struct gomp_device_descr *devicep = resolve_device (device); + + if (devicep == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return gomp_target_data_fallback (); + + struct target_mem_desc *tgt + = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, + GOMP_MAP_VARS_DATA); struct gomp_task_icv *icv = gomp_icv (true); tgt->prev = icv->target_data; icv->target_data = tgt; @@ -1083,12 +1440,230 @@ GOMP_target_update (int device, const void *unused, size_t mapnum, || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) return; + gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false); +} + +void +GOMP_target_update_41 (int device, size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds, + unsigned int flags, void **depend) +{ + struct gomp_device_descr *devicep = resolve_device (device); + + /* If there are depend clauses, but nowait is not present, + block the parent task until the dependencies are resolved + and then just continue with the rest of the function as if it + is a merged task. Until we are able to schedule task during + variable mapping or unmapping, ignore nowait if depend clauses + are not present. */ + if (depend != NULL) + { + struct gomp_thread *thr = gomp_thread (); + if (thr->task && thr->task->depend_hash) + { + if ((flags & GOMP_TARGET_FLAG_NOWAIT) + && thr->ts.team + && !thr->task->final_task) + { + gomp_create_target_task (devicep, (void (*) (void *)) NULL, + mapnum, hostaddrs, sizes, kinds, + flags | GOMP_TARGET_FLAG_UPDATE, + depend); + return; + } + + struct gomp_team *team = thr->ts.team; + /* If parallel or taskgroup has been cancelled, don't start new + tasks. */ + if (team + && (gomp_team_barrier_cancelled (&team->barrier) + || (thr->task->taskgroup + && thr->task->taskgroup->cancelled))) + return; + + gomp_task_maybe_wait_for_dependencies (depend); + } + } + + if (devicep == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return; + + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + /* If parallel or taskgroup has been cancelled, don't start new tasks. */ + if (team + && (gomp_team_barrier_cancelled (&team->barrier) + || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) + return; + + gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true); +} + +static void +gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned short *kinds) +{ + const int typemask = 0xff; + size_t i; gomp_mutex_lock (&devicep->lock); - if (!devicep->is_initialized) - gomp_init_device (devicep); + for (i = 0; i < mapnum; i++) + { + struct splay_tree_key_s cur_node; + unsigned char kind = kinds[i] & typemask; + switch (kind) + { + case GOMP_MAP_FROM: + case GOMP_MAP_ALWAYS_FROM: + case GOMP_MAP_DELETE: + case GOMP_MAP_RELEASE: + case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: + case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION: + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = cur_node.host_start + sizes[i]; + splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION + || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION) + ? gomp_map_lookup (&devicep->mem_map, &cur_node) + : splay_tree_lookup (&devicep->mem_map, &cur_node); + if (!k) + continue; + + if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY) + k->refcount--; + if ((kind == GOMP_MAP_DELETE + || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION) + && k->refcount != REFCOUNT_INFINITY) + k->refcount = 0; + + if ((kind == GOMP_MAP_FROM && k->refcount == 0) + || kind == GOMP_MAP_ALWAYS_FROM) + devicep->dev2host_func (devicep->target_id, + (void *) cur_node.host_start, + (void *) (k->tgt->tgt_start + k->tgt_offset + + cur_node.host_start + - k->host_start), + cur_node.host_end - cur_node.host_start); + if (k->refcount == 0) + { + splay_tree_remove (&devicep->mem_map, k); + if (k->tgt->refcount > 1) + k->tgt->refcount--; + else + gomp_unmap_tgt (k->tgt); + } + + break; + default: + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x", + kind); + } + } + gomp_mutex_unlock (&devicep->lock); +} - gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false); +void +GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds, + unsigned int flags, void **depend) +{ + struct gomp_device_descr *devicep = resolve_device (device); + + /* If there are depend clauses, but nowait is not present, + block the parent task until the dependencies are resolved + and then just continue with the rest of the function as if it + is a merged task. Until we are able to schedule task during + variable mapping or unmapping, ignore nowait if depend clauses + are not present. */ + if (depend != NULL) + { + struct gomp_thread *thr = gomp_thread (); + if (thr->task && thr->task->depend_hash) + { + if ((flags & GOMP_TARGET_FLAG_NOWAIT) + && thr->ts.team + && !thr->task->final_task) + { + gomp_create_target_task (devicep, (void (*) (void *)) NULL, + mapnum, hostaddrs, sizes, kinds, + flags, depend); + return; + } + + struct gomp_team *team = thr->ts.team; + /* If parallel or taskgroup has been cancelled, don't start new + tasks. */ + if (team + && (gomp_team_barrier_cancelled (&team->barrier) + || (thr->task->taskgroup + && thr->task->taskgroup->cancelled))) + return; + + gomp_task_maybe_wait_for_dependencies (depend); + } + } + + if (devicep == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return; + + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + /* If parallel or taskgroup has been cancelled, don't start new tasks. */ + if (team + && (gomp_team_barrier_cancelled (&team->barrier) + || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) + return; + + size_t i; + if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) + for (i = 0; i < mapnum; i++) + if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT) + { + gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i], + &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA); + i += sizes[i]; + } + else + gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i], + true, GOMP_MAP_VARS_ENTER_DATA); + else + gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds); +} + +void +gomp_target_task_fn (void *data) +{ + struct gomp_target_task *ttask = (struct gomp_target_task *) data; + if (ttask->fn != NULL) + { + /* GOMP_target_41 */ + } + else if (ttask->devicep == NULL + || !(ttask->devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return; + + size_t i; + if (ttask->flags & GOMP_TARGET_FLAG_UPDATE) + gomp_update (ttask->devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes, + ttask->kinds, true); + else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) + for (i = 0; i < ttask->mapnum; i++) + if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT) + { + gomp_map_vars (ttask->devicep, ttask->sizes[i] + 1, + &ttask->hostaddrs[i], NULL, &ttask->sizes[i], + &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA); + i += ttask->sizes[i]; + } + else + gomp_map_vars (ttask->devicep, 1, &ttask->hostaddrs[i], NULL, + &ttask->sizes[i], &ttask->kinds[i], + true, GOMP_MAP_VARS_ENTER_DATA); + else + gomp_exit_data (ttask->devicep, ttask->mapnum, ttask->hostaddrs, + ttask->sizes, ttask->kinds); } void @@ -1103,6 +1678,384 @@ GOMP_teams (unsigned int num_teams, unsigned int thread_limit) (void) num_teams; } +void * +omp_target_alloc (size_t size, int device_num) +{ + if (device_num == GOMP_DEVICE_HOST_FALLBACK) + return malloc (size); + + if (device_num < 0) + return NULL; + + struct gomp_device_descr *devicep = resolve_device (device_num); + if (devicep == NULL) + return NULL; + + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return malloc (size); + + gomp_mutex_lock (&devicep->lock); + void *ret = devicep->alloc_func (devicep->target_id, size); + gomp_mutex_unlock (&devicep->lock); + return ret; +} + +void +omp_target_free (void *device_ptr, int device_num) +{ + if (device_ptr == NULL) + return; + + if (device_num == GOMP_DEVICE_HOST_FALLBACK) + { + free (device_ptr); + return; + } + + if (device_num < 0) + return; + + struct gomp_device_descr *devicep = resolve_device (device_num); + if (devicep == NULL) + return; + + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + { + free (device_ptr); + return; + } + + gomp_mutex_lock (&devicep->lock); + devicep->free_func (devicep->target_id, device_ptr); + gomp_mutex_unlock (&devicep->lock); +} + +int +omp_target_is_present (void *ptr, int device_num) +{ + if (ptr == NULL) + return 1; + + if (device_num == GOMP_DEVICE_HOST_FALLBACK) + return 1; + + if (device_num < 0) + return 0; + + struct gomp_device_descr *devicep = resolve_device (device_num); + if (devicep == NULL) + return 0; + + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return 1; + + gomp_mutex_lock (&devicep->lock); + struct splay_tree_s *mem_map = &devicep->mem_map; + struct splay_tree_key_s cur_node; + + cur_node.host_start = (uintptr_t) ptr; + cur_node.host_end = cur_node.host_start; + splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); + int ret = n != NULL; + gomp_mutex_unlock (&devicep->lock); + return ret; +} + +int +omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset, + size_t src_offset, int dst_device_num, int src_device_num) +{ + struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; + + if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK) + { + if (dst_device_num < 0) + return EINVAL; + + dst_devicep = resolve_device (dst_device_num); + if (dst_devicep == NULL) + return EINVAL; + + if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + dst_devicep = NULL; + } + if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) + { + if (src_device_num < 0) + return EINVAL; + + src_devicep = resolve_device (src_device_num); + if (src_devicep == NULL) + return EINVAL; + + if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + src_devicep = NULL; + } + if (src_devicep == NULL && dst_devicep == NULL) + { + memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length); + return 0; + } + if (src_devicep == NULL) + { + gomp_mutex_lock (&dst_devicep->lock); + dst_devicep->host2dev_func (dst_devicep->target_id, + (char *) dst + dst_offset, + (char *) src + src_offset, length); + gomp_mutex_unlock (&dst_devicep->lock); + return 0; + } + if (dst_devicep == NULL) + { + gomp_mutex_lock (&src_devicep->lock); + src_devicep->dev2host_func (src_devicep->target_id, + (char *) dst + dst_offset, + (char *) src + src_offset, length); + gomp_mutex_unlock (&src_devicep->lock); + return 0; + } + if (src_devicep == dst_devicep) + { + gomp_mutex_lock (&src_devicep->lock); + src_devicep->dev2dev_func (src_devicep->target_id, + (char *) dst + dst_offset, + (char *) src + src_offset, length); + gomp_mutex_unlock (&src_devicep->lock); + return 0; + } + return EINVAL; +} + +static int +omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size, + int num_dims, const size_t *volume, + const size_t *dst_offsets, + const size_t *src_offsets, + const size_t *dst_dimensions, + const size_t *src_dimensions, + struct gomp_device_descr *dst_devicep, + struct gomp_device_descr *src_devicep) +{ + size_t dst_slice = element_size; + size_t src_slice = element_size; + size_t j, dst_off, src_off, length; + int i, ret; + + if (num_dims == 1) + { + if (__builtin_mul_overflow (element_size, volume[0], &length) + || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off) + || __builtin_mul_overflow (element_size, src_offsets[0], &src_off)) + return EINVAL; + if (dst_devicep == NULL && src_devicep == NULL) + memcpy ((char *) dst + dst_off, (char *) src + src_off, length); + else if (src_devicep == NULL) + dst_devicep->host2dev_func (dst_devicep->target_id, + (char *) dst + dst_off, + (char *) src + src_off, length); + else if (dst_devicep == NULL) + src_devicep->dev2host_func (src_devicep->target_id, + (char *) dst + dst_off, + (char *) src + src_off, length); + else if (src_devicep == dst_devicep) + src_devicep->dev2dev_func (src_devicep->target_id, + (char *) dst + dst_off, + (char *) src + src_off, length); + else + return EINVAL; + return 0; + } + + /* FIXME: it would be nice to have some plugin function to handle + num_dims == 2 and num_dims == 3 more efficiently. Larger ones can + be handled in the generic recursion below, and for host-host it + should be used even for any num_dims >= 2. */ + + for (i = 1; i < num_dims; i++) + if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice) + || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice)) + return EINVAL; + if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off) + || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off)) + return EINVAL; + for (j = 0; j < volume[0]; j++) + { + ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off, + (char *) src + src_off, + element_size, num_dims - 1, + volume + 1, dst_offsets + 1, + src_offsets + 1, dst_dimensions + 1, + src_dimensions + 1, dst_devicep, + src_devicep); + if (ret) + return ret; + dst_off += dst_slice; + src_off += src_slice; + } + return 0; +} + +int +omp_target_memcpy_rect (void *dst, void *src, size_t element_size, + int num_dims, const size_t *volume, + const size_t *dst_offsets, + const size_t *src_offsets, + const size_t *dst_dimensions, + const size_t *src_dimensions, + int dst_device_num, int src_device_num) +{ + struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; + + if (!dst && !src) + return INT_MAX; + + if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK) + { + if (dst_device_num < 0) + return EINVAL; + + dst_devicep = resolve_device (dst_device_num); + if (dst_devicep == NULL) + return EINVAL; + + if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + dst_devicep = NULL; + } + if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) + { + if (src_device_num < 0) + return EINVAL; + + src_devicep = resolve_device (src_device_num); + if (src_devicep == NULL) + return EINVAL; + + if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + src_devicep = NULL; + } + + if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep) + return EINVAL; + + if (src_devicep) + gomp_mutex_lock (&src_devicep->lock); + else if (dst_devicep) + gomp_mutex_lock (&dst_devicep->lock); + int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims, + volume, dst_offsets, src_offsets, + dst_dimensions, src_dimensions, + dst_devicep, src_devicep); + if (src_devicep) + gomp_mutex_unlock (&src_devicep->lock); + else if (dst_devicep) + gomp_mutex_unlock (&dst_devicep->lock); + return ret; +} + +int +omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size, + size_t device_offset, int device_num) +{ + if (device_num == GOMP_DEVICE_HOST_FALLBACK) + return EINVAL; + + if (device_num < 0) + return EINVAL; + + struct gomp_device_descr *devicep = resolve_device (device_num); + if (devicep == NULL) + return EINVAL; + + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return EINVAL; + + gomp_mutex_lock (&devicep->lock); + + struct splay_tree_s *mem_map = &devicep->mem_map; + struct splay_tree_key_s cur_node; + int ret = EINVAL; + + cur_node.host_start = (uintptr_t) host_ptr; + cur_node.host_end = cur_node.host_start + size; + splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); + if (n) + { + if (n->tgt->tgt_start + n->tgt_offset + == (uintptr_t) device_ptr + device_offset + && n->host_start <= cur_node.host_start + && n->host_end >= cur_node.host_end) + ret = 0; + } + else + { + struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); + tgt->array = gomp_malloc (sizeof (*tgt->array)); + tgt->refcount = 1; + tgt->tgt_start = 0; + tgt->tgt_end = 0; + tgt->to_free = NULL; + tgt->prev = NULL; + tgt->list_count = 0; + tgt->device_descr = devicep; + splay_tree_node array = tgt->array; + splay_tree_key k = &array->key; + k->host_start = cur_node.host_start; + k->host_end = cur_node.host_end; + k->tgt = tgt; + k->tgt_offset = (uintptr_t) device_ptr + device_offset; + k->refcount = REFCOUNT_INFINITY; + k->async_refcount = 0; + array->left = NULL; + array->right = NULL; + splay_tree_insert (&devicep->mem_map, array); + ret = 0; + } + gomp_mutex_unlock (&devicep->lock); + return ret; +} + +int +omp_target_disassociate_ptr (void *ptr, int device_num) +{ + if (device_num == GOMP_DEVICE_HOST_FALLBACK) + return EINVAL; + + if (device_num < 0) + return EINVAL; + + struct gomp_device_descr *devicep = resolve_device (device_num); + if (devicep == NULL) + return EINVAL; + + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return EINVAL; + + gomp_mutex_lock (&devicep->lock); + + struct splay_tree_s *mem_map = &devicep->mem_map; + struct splay_tree_key_s cur_node; + int ret = EINVAL; + + cur_node.host_start = (uintptr_t) ptr; + cur_node.host_end = cur_node.host_start; + splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); + if (n + && n->host_start == cur_node.host_start + && n->refcount == REFCOUNT_INFINITY + && n->tgt->tgt_start == 0 + && n->tgt->to_free == NULL + && n->tgt->refcount == 1 + && n->tgt->list_count == 0) + { + splay_tree_remove (&devicep->mem_map, n); + gomp_unmap_tgt (n->tgt); + ret = 0; + } + + gomp_mutex_unlock (&devicep->lock); + return ret; +} + #ifdef PLUGIN_SUPPORT /* This function tries to load a plugin for DEVICE. Name of plugin is passed @@ -1153,7 +2106,10 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device, DLSYM (host2dev); device->capabilities = device->get_caps_func (); if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) - DLSYM (run); + { + DLSYM (run); + DLSYM (dev2dev); + } if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) { if (!DLSYM_OPT (openacc.exec, openacc_parallel) |