diff options
author | Alexander Monakov <amonakov@ispras.ru> | 2016-01-15 13:37:30 +0300 |
---|---|---|
committer | Alexander Monakov <amonakov@ispras.ru> | 2016-01-20 18:35:27 +0300 |
commit | 9599235d847610670c186e6d672250c7a28f0d38 (patch) | |
tree | b195b212181a39ed54a468bd9d16c17fc5322d5e | |
parent | 01f92ec03161b4539fb5466f49ab1c547e9bd5cd (diff) | |
download | gcc-9599235d847610670c186e6d672250c7a28f0d38.tar.gz |
pick GOMP_target_ext changes from the hsa branch
This adds necessary plumbing to spawn multiple teams.
To be reverted on this branch prior to merge.
-rw-r--r-- | gcc/builtin-types.def | 7 | ||||
-rw-r--r-- | gcc/fortran/types.def | 5 | ||||
-rw-r--r-- | gcc/omp-builtins.def | 2 | ||||
-rw-r--r-- | gcc/omp-low.c | 149 | ||||
-rw-r--r-- | include/gomp-constants.h | 21 | ||||
-rw-r--r-- | libgomp/libgomp.h | 12 | ||||
-rw-r--r-- | libgomp/libgomp_g.h | 3 | ||||
-rw-r--r-- | libgomp/oacc-host.c | 3 | ||||
-rw-r--r-- | libgomp/target.c | 179 | ||||
-rw-r--r-- | libgomp/task.c | 3 | ||||
-rw-r--r-- | liboffloadmic/plugin/libgomp-plugin-intelmic.cpp | 4 |
11 files changed, 299 insertions, 89 deletions
diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def index c68fb198c85..33bee1d1e49 100644 --- a/gcc/builtin-types.def +++ b/gcc/builtin-types.def @@ -555,10 +555,9 @@ DEF_FUNCTION_TYPE_9 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT, BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_PTR_FN_VOID_PTR_PTR, BT_LONG, BT_LONG, BT_BOOL, BT_UINT, BT_PTR, BT_INT) - -DEF_FUNCTION_TYPE_10 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_INT_INT, - BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE, BT_PTR, - BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_INT, BT_INT) +DEF_FUNCTION_TYPE_9 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR, + BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE, BT_PTR, + BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_PTR) DEF_FUNCTION_TYPE_11 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_UINT_LONG_INT_LONG_LONG_LONG, BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, diff --git a/gcc/fortran/types.def b/gcc/fortran/types.def index a37e856708b..5838f04868a 100644 --- a/gcc/fortran/types.def +++ b/gcc/fortran/types.def @@ -220,10 +220,9 @@ DEF_FUNCTION_TYPE_9 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT, BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_PTR_FN_VOID_PTR_PTR, BT_LONG, BT_LONG, BT_BOOL, BT_UINT, BT_PTR, BT_INT) - -DEF_FUNCTION_TYPE_10 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_INT_INT, +DEF_FUNCTION_TYPE_9 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR, BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE, BT_PTR, - BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_INT, BT_INT) + BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_PTR) DEF_FUNCTION_TYPE_11 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_UINT_LONG_INT_LONG_LONG_LONG, BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def index 35f5014a62b..35c2724f6f1 100644 --- a/gcc/omp-builtins.def +++ b/gcc/omp-builtins.def @@ -341,7 +341,7 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SINGLE_COPY_START, "GOMP_single_copy_start", DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SINGLE_COPY_END, "GOMP_single_copy_end", BT_FN_VOID_PTR, ATTR_NOTHROW_LEAF_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TARGET, "GOMP_target_ext", - BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_INT_INT, + BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR, ATTR_NOTHROW_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TARGET_DATA, "GOMP_target_data_ext", BT_FN_VOID_INT_SIZE_PTR_PTR_PTR, ATTR_NOTHROW_LIST) diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 8996b8dc04e..2e02c6f8bbd 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -12731,6 +12731,130 @@ mark_loops_in_oacc_kernels_region (basic_block region_entry, loop->in_oacc_kernels_region = true; } +/* Build target argument identifier from the DEVICE identifier, value + identifier ID and whether the element also has a SUBSEQUENT_PARAM. */ + +static tree +get_target_argument_identifier_1 (int device, bool subseqent_param, int id) +{ + tree t = build_int_cst (integer_type_node, device); + if (subseqent_param) + t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, + build_int_cst (integer_type_node, + GOMP_TARGET_ARG_SUBSEQUENT_PARAM)); + t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, + build_int_cst (integer_type_node, id)); + return t; +} + +/* Like above but return it in type that can be directly stored as an element + of the argument array. */ + +static tree +get_target_argument_identifier (int device, bool subseqent_param, int id) +{ + tree t = get_target_argument_identifier_1 (device, subseqent_param, id); + return fold_convert (ptr_type_node, t); +} + +/* Return a target argument consisiting of DEVICE identifier, value identifier + ID, and the actual VALUE. */ + +static tree +get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id, + tree value) +{ + tree t = fold_build2 (LSHIFT_EXPR, integer_type_node, + fold_convert (integer_type_node, value), + build_int_cst (unsigned_type_node, + GOMP_TARGET_ARG_VALUE_SHIFT)); + t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, + get_target_argument_identifier_1 (device, false, id)); + t = fold_convert (ptr_type_node, t); + return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT); +} + +/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15, + push one argument to ARGS with bot the DEVICE, ID and VALUE embeded in it, + otherwise push an iedntifier (with DEVICE and ID) and the VALUE in two + arguments. */ + +static void +push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device, + int id, tree value, vec <tree> *args) +{ + if (tree_fits_shwi_p (value) + && tree_to_shwi (value) > -(1 << 15) + && tree_to_shwi (value) < (1 << 15)) + args->quick_push (get_target_argument_value (gsi, device, id, value)); + else + { + args->quick_push (get_target_argument_identifier (device, true, id)); + value = fold_convert (ptr_type_node, value); + value = force_gimple_operand_gsi (gsi, value, true, NULL, true, + GSI_SAME_STMT); + args->quick_push (value); + } +} + +/* Create an array of arguments that is then passed to GOMP_target. */ + +static tree +get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt) +{ + auto_vec <tree, 6> args; + tree clauses = gimple_omp_target_clauses (tgt_stmt); + tree t, c = find_omp_clause (clauses, OMP_CLAUSE_NUM_TEAMS); + if (c) + t = OMP_CLAUSE_NUM_TEAMS_EXPR (c); + else + t = integer_minus_one_node; + push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL, + GOMP_TARGET_ARG_NUM_TEAMS, t, &args); + + c = find_omp_clause (clauses, OMP_CLAUSE_THREAD_LIMIT); + if (c) + t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c); + else + t = integer_minus_one_node; + push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL, + GOMP_TARGET_ARG_THREAD_LIMIT, t, + &args); + +#if 0 + /* Add HSA-specific grid sizes, if available. */ + if (find_omp_clause (gimple_omp_target_clauses (tgt_stmt), + OMP_CLAUSE__GRIDDIM_)) + { + t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, + GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES); + args.quick_push (t); + args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt)); + } +#endif + + /* Produce more, perhaps device specific, arguments here. */ + + tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node, + args.length () + 1), + ".omp_target_args"); + for (unsigned i = 0; i < args.length (); i++) + { + tree ref = build4 (ARRAY_REF, ptr_type_node, argarray, + build_int_cst (integer_type_node, i), + NULL_TREE, NULL_TREE); + gsi_insert_before (gsi, gimple_build_assign (ref, args[i]), + GSI_SAME_STMT); + } + tree ref = build4 (ARRAY_REF, ptr_type_node, argarray, + build_int_cst (integer_type_node, args.length ()), + NULL_TREE, NULL_TREE); + gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node), + GSI_SAME_STMT); + TREE_ADDRESSABLE (argarray) = 1; + return build_fold_addr_expr (argarray); +} + /* Expand the GIMPLE_OMP_TARGET starting at REGION. */ static void @@ -13148,30 +13272,7 @@ expand_omp_target (struct omp_region *region) depend = build_int_cst (ptr_type_node, 0); args.quick_push (depend); if (start_ix == BUILT_IN_GOMP_TARGET) - { - c = find_omp_clause (clauses, OMP_CLAUSE_NUM_TEAMS); - if (c) - { - t = fold_convert (integer_type_node, - OMP_CLAUSE_NUM_TEAMS_EXPR (c)); - t = force_gimple_operand_gsi (&gsi, t, true, NULL, - true, GSI_SAME_STMT); - } - else - t = integer_minus_one_node; - args.quick_push (t); - c = find_omp_clause (clauses, OMP_CLAUSE_THREAD_LIMIT); - if (c) - { - t = fold_convert (integer_type_node, - OMP_CLAUSE_THREAD_LIMIT_EXPR (c)); - t = force_gimple_operand_gsi (&gsi, t, true, NULL, - true, GSI_SAME_STMT); - } - else - t = integer_minus_one_node; - args.quick_push (t); - } + args.quick_push (get_target_arguments (&gsi, entry_stmt)); break; case BUILT_IN_GOACC_PARALLEL: { diff --git a/include/gomp-constants.h b/include/gomp-constants.h index dffd631aff6..fef27e4c52c 100644 --- a/include/gomp-constants.h +++ b/include/gomp-constants.h @@ -228,4 +228,25 @@ enum gomp_map_kind #define GOMP_LAUNCH_OP(X) (((X) >> GOMP_LAUNCH_OP_SHIFT) & 0xffff) #define GOMP_LAUNCH_OP_MAX 0xffff +/* Bitmask to apply in order to find out the intended device of a target + argument. */ +#define GOMP_TARGET_ARG_DEVICE_MASK ((1 << 7) - 1) +/* The target argument is significant for all devices. */ +#define GOMP_TARGET_ARG_DEVICE_ALL 0 + +/* Flag set when the subsequent element in the device-specific argument + values. */ +#define GOMP_TARGET_ARG_SUBSEQUENT_PARAM (1 << 7) + +/* Bitmask to apply to a target argument to find out the value identifier. */ +#define GOMP_TARGET_ARG_ID_MASK (((1 << 8) - 1) << 8) +/* Target argument index of NUM_TEAMS. */ +#define GOMP_TARGET_ARG_NUM_TEAMS (1 << 8) +/* Target argument index of THREAD_LIMIT. */ +#define GOMP_TARGET_ARG_THREAD_LIMIT (2 << 8) + +/* If the value is directly embeded in target argument, it should be a 16-bit + at most and shifted by this many bits. */ +#define GOMP_TARGET_ARG_VALUE_SHIFT 16 + #endif diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 31ffba04d1c..1d137f102b3 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -499,6 +499,10 @@ struct gomp_target_task struct target_mem_desc *tgt; struct gomp_task *task; struct gomp_team *team; + /* Copies of firstprivate mapped data for shared memory accelerators. */ + void *firstprivate_copies; + /* Device-specific target arguments. */ + void **args; void *hostaddrs[]; }; @@ -765,7 +769,8 @@ extern void gomp_task_maybe_wait_for_dependencies (void **); extern bool gomp_create_target_task (struct gomp_device_descr *, void (*) (void *), size_t, void **, size_t *, unsigned short *, unsigned int, - void **, enum gomp_target_task_state); + void **, void **, + enum gomp_target_task_state); static void inline gomp_finish_task (struct gomp_task *task) @@ -939,8 +944,9 @@ struct gomp_device_descr void *(*dev2host_func) (int, void *, const void *, size_t); void *(*host2dev_func) (int, void *, const void *, size_t); void *(*dev2dev_func) (int, void *, const void *, size_t); - void (*run_func) (int, void *, void *); - void (*async_run_func) (int, void *, void *, void *); + bool (*can_run_func) (void *); + void (*run_func) (int, void *, void *, void **); + void (*async_run_func) (int, void *, void *, void **, void *); /* Splay tree containing information about mapped memory regions. */ struct splay_tree_s mem_map; diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h index c238e6a7556..9c90d59598e 100644 --- a/libgomp/libgomp_g.h +++ b/libgomp/libgomp_g.h @@ -278,8 +278,7 @@ extern void GOMP_single_copy_end (void *); extern void GOMP_target (int, void (*) (void *), const void *, size_t, void **, size_t *, unsigned char *); extern void GOMP_target_ext (int, void (*) (void *), size_t, void **, size_t *, - unsigned short *, unsigned int, void **, - int, int); + unsigned short *, unsigned int, void **, void **); extern void GOMP_target_data (int, const void *, size_t, void **, size_t *, unsigned char *); extern void GOMP_target_data_ext (int, size_t, void **, size_t *, diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c index 98748041bfc..a7692116e94 100644 --- a/libgomp/oacc-host.c +++ b/libgomp/oacc-host.c @@ -123,7 +123,8 @@ host_host2dev (int n __attribute__ ((unused)), } static void -host_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars) +host_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars, + void **args __attribute__((unused))) { void (*fn)(void *) = (void (*)(void *)) fn_ptr; diff --git a/libgomp/target.c b/libgomp/target.c index cf9d0e64c7c..f990a9e7966 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -1261,15 +1261,38 @@ gomp_target_fallback (void (*fn) (void *), void **hostaddrs) *thr = old_thr; } -/* Host fallback with firstprivate map-type handling. */ +/* Calculate alignment and size requirements of a private copy of data shared + as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */ -static void -gomp_target_fallback_firstprivate (void (*fn) (void *), size_t mapnum, - void **hostaddrs, size_t *sizes, - unsigned short *kinds) +static inline void +calculate_firstprivate_requirements (size_t mapnum, size_t *sizes, + unsigned short *kinds, size_t *tgt_align, + size_t *tgt_size) { - size_t i, tgt_align = 0, tgt_size = 0; - char *tgt = NULL; + size_t i; + for (i = 0; i < mapnum; i++) + if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) + { + size_t align = (size_t) 1 << (kinds[i] >> 8); + if (*tgt_align < align) + *tgt_align = align; + *tgt_size = (*tgt_size + align - 1) & ~(align - 1); + *tgt_size += sizes[i]; + } +} + +/* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */ + +static inline void +copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds, size_t tgt_align, + size_t tgt_size) +{ + uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); + if (al) + tgt += tgt_align - al; + tgt_size = 0; + size_t i; for (i = 0; i < mapnum; i++) if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) { @@ -1277,28 +1300,53 @@ gomp_target_fallback_firstprivate (void (*fn) (void *), size_t mapnum, if (tgt_align < align) tgt_align = align; tgt_size = (tgt_size + align - 1) & ~(align - 1); - tgt_size += sizes[i]; + memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); + hostaddrs[i] = tgt + tgt_size; + tgt_size = tgt_size + sizes[i]; } +} + +/* Host fallback with firstprivate map-type handling. */ + +static void +gomp_target_fallback_firstprivate (void (*fn) (void *), size_t mapnum, + void **hostaddrs, size_t *sizes, + unsigned short *kinds) +{ + size_t tgt_align = 0, tgt_size = 0; + calculate_firstprivate_requirements (mapnum, sizes, kinds, &tgt_align, + &tgt_size); if (tgt_align) { - tgt = gomp_alloca (tgt_size + tgt_align - 1); - uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); - if (al) - tgt += tgt_align - al; - tgt_size = 0; - for (i = 0; i < mapnum; i++) - if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) - { - size_t align = (size_t) 1 << (kinds[i] >> 8); - tgt_size = (tgt_size + align - 1) & ~(align - 1); - memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); - hostaddrs[i] = tgt + tgt_size; - tgt_size = tgt_size + sizes[i]; - } + char *tgt = gomp_alloca (tgt_size + tgt_align - 1); + copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, tgt_align, + tgt_size); } gomp_target_fallback (fn, hostaddrs); } +/* Handle firstprivate map-type for shared memory devices and the host + fallback. Return the pointer of firstprivate copies which has to be freed + after use. */ + +static void * +gomp_target_unshare_firstprivate (size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds) +{ + size_t tgt_align = 0, tgt_size = 0; + char *tgt = NULL; + + calculate_firstprivate_requirements (mapnum, sizes, kinds, &tgt_align, + &tgt_size); + if (tgt_align) + { + tgt = gomp_malloc (tgt_size + tgt_align - 1); + copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, tgt_align, + tgt_size); + } + return tgt; +} + /* Helper function of GOMP_target{,_ext} routines. */ static void * @@ -1348,7 +1396,8 @@ GOMP_target (int device, void (*fn) (void *), const void *unused, struct target_mem_desc *tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, GOMP_MAP_VARS_TARGET); - devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start); + devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start, + NULL); gomp_unmap_vars (tgt_vars, true); } @@ -1356,6 +1405,15 @@ GOMP_target (int device, void (*fn) (void *), const void *unused, and several arguments have been added: FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h. DEPEND is array of dependencies, see GOMP_task for details. + + ARGS is a pointer to an array consisting of a variable number of both + device-independent and device-specific arguments, which can take one two + elements where the first specifies for which device it is intended, the type + and optionally also the value. If the value is not present in the first + one, the whole second element the actual value. The last element of the + array is a single NULL. Among the device independent can be for example + NUM_TEAMS and THREAD_LIMIT. + NUM_TEAMS is positive if GOMP_teams will be called in the body with that value, or 1 if teams construct is not present, or 0, if teams construct does not have num_teams clause and so the choice is @@ -1369,14 +1427,10 @@ GOMP_target (int device, void (*fn) (void *), const void *unused, void GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, void **hostaddrs, size_t *sizes, unsigned short *kinds, - unsigned int flags, void **depend, int num_teams, - int thread_limit) + unsigned int flags, void **depend, void **args) { struct gomp_device_descr *devicep = resolve_device (device); - (void) num_teams; - (void) thread_limit; - if (flags & GOMP_TARGET_FLAG_NOWAIT) { struct gomp_thread *thr = gomp_thread (); @@ -1413,7 +1467,7 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, && !thr->task->final_task) { gomp_create_target_task (devicep, fn, mapnum, hostaddrs, - sizes, kinds, flags, depend, + sizes, kinds, flags, depend, args, GOMP_TARGET_TASK_BEFORE_MAP); return; } @@ -1430,20 +1484,33 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, gomp_task_maybe_wait_for_dependencies (depend); } + void *fn_addr; if (devicep == NULL - || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)) + || (devicep->can_run_func && !devicep->can_run_func (fn_addr))) { gomp_target_fallback_firstprivate (fn, mapnum, hostaddrs, sizes, kinds); return; } - void *fn_addr = gomp_get_target_fn_addr (devicep, fn); - - struct target_mem_desc *tgt_vars - = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, - GOMP_MAP_VARS_TARGET); - devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start); - gomp_unmap_vars (tgt_vars, true); + struct target_mem_desc *tgt_vars; + void *fpc = NULL; + if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + { + fpc = gomp_target_unshare_firstprivate (mapnum, hostaddrs, sizes, kinds); + tgt_vars = NULL; + } + else + tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, + true, GOMP_MAP_VARS_TARGET); + devicep->run_func (devicep->target_id, fn_addr, + tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs, + args); + if (tgt_vars) + gomp_unmap_vars (tgt_vars, true); + else + free (fpc); } /* Host fallback for GOMP_target_data{,_ext} routines. */ @@ -1552,7 +1619,7 @@ GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs, if (gomp_create_target_task (devicep, (void (*) (void *)) NULL, mapnum, hostaddrs, sizes, kinds, flags | GOMP_TARGET_FLAG_UPDATE, - depend, GOMP_TARGET_TASK_DATA)) + depend, NULL, GOMP_TARGET_TASK_DATA)) return; } else @@ -1673,7 +1740,7 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, { if (gomp_create_target_task (devicep, (void (*) (void *)) NULL, mapnum, hostaddrs, sizes, kinds, - flags, depend, + flags, depend, NULL, GOMP_TARGET_TASK_DATA)) return; } @@ -1729,8 +1796,11 @@ gomp_target_task_fn (void *data) if (ttask->fn != NULL) { + void *fn_addr; if (devicep == NULL - || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn)) + || (devicep->can_run_func && !devicep->can_run_func (fn_addr))) { ttask->state = GOMP_TARGET_TASK_FALLBACK; gomp_target_fallback_firstprivate (ttask->fn, ttask->mapnum, @@ -1745,19 +1815,31 @@ gomp_target_task_fn (void *data) return false; } - void *fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn); - ttask->tgt - = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs, NULL, - ttask->sizes, ttask->kinds, true, - GOMP_MAP_VARS_TARGET); + void *actual_arguments; + if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + { + ttask->tgt = NULL; + ttask->firstprivate_copies + = gomp_target_unshare_firstprivate (ttask->mapnum, ttask->hostaddrs, + ttask->sizes, ttask->kinds); + actual_arguments = ttask->hostaddrs; + } + else + { + ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs, + NULL, ttask->sizes, ttask->kinds, true, + GOMP_MAP_VARS_TARGET); + actual_arguments = (void *) ttask->tgt->tgt_start; + } ttask->state = GOMP_TARGET_TASK_READY_TO_RUN; - devicep->async_run_func (devicep->target_id, fn_addr, - (void *) ttask->tgt->tgt_start, (void *) ttask); + devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments, + ttask->args, (void *) ttask); return true; } else if (devicep == NULL - || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return false; size_t i; @@ -2225,6 +2307,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device, { DLSYM (run); DLSYM (async_run); + DLSYM_OPT (can_run, can_run); DLSYM (dev2dev); } if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) diff --git a/libgomp/task.c b/libgomp/task.c index 620facd3fea..f3b05e5ee73 100644 --- a/libgomp/task.c +++ b/libgomp/task.c @@ -593,7 +593,7 @@ bool gomp_create_target_task (struct gomp_device_descr *devicep, void (*fn) (void *), size_t mapnum, void **hostaddrs, size_t *sizes, unsigned short *kinds, - unsigned int flags, void **depend, + unsigned int flags, void **depend, void **args, enum gomp_target_task_state state) { struct gomp_thread *thr = gomp_thread (); @@ -653,6 +653,7 @@ gomp_create_target_task (struct gomp_device_descr *devicep, ttask->devicep = devicep; ttask->fn = fn; ttask->mapnum = mapnum; + ttask->args = args; memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *)); ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum]; memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t)); diff --git a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp index f8c172562ae..48599dd0d85 100644 --- a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp +++ b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp @@ -539,7 +539,7 @@ GOMP_OFFLOAD_dev2dev (int device, void *dst_ptr, const void *src_ptr, extern "C" void GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars, - void *async_data) + void **, void *async_data) { TRACE ("(device = %d, tgt_fn = %p, tgt_vars = %p, async_data = %p)", device, tgt_fn, tgt_vars, async_data); @@ -555,7 +555,7 @@ GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars, } extern "C" void -GOMP_OFFLOAD_run (int device, void *tgt_fn, void *tgt_vars) +GOMP_OFFLOAD_run (int device, void *tgt_fn, void *tgt_vars, void **) { TRACE ("(device = %d, tgt_fn = %p, tgt_vars = %p)", device, tgt_fn, tgt_vars); |