summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Monakov <amonakov@ispras.ru>2016-01-15 13:37:30 +0300
committerAlexander Monakov <amonakov@ispras.ru>2016-01-20 18:35:27 +0300
commit9599235d847610670c186e6d672250c7a28f0d38 (patch)
treeb195b212181a39ed54a468bd9d16c17fc5322d5e
parent01f92ec03161b4539fb5466f49ab1c547e9bd5cd (diff)
downloadgcc-9599235d847610670c186e6d672250c7a28f0d38.tar.gz
pick GOMP_target_ext changes from the hsa branch
This adds necessary plumbing to spawn multiple teams. To be reverted on this branch prior to merge.
-rw-r--r--gcc/builtin-types.def7
-rw-r--r--gcc/fortran/types.def5
-rw-r--r--gcc/omp-builtins.def2
-rw-r--r--gcc/omp-low.c149
-rw-r--r--include/gomp-constants.h21
-rw-r--r--libgomp/libgomp.h12
-rw-r--r--libgomp/libgomp_g.h3
-rw-r--r--libgomp/oacc-host.c3
-rw-r--r--libgomp/target.c179
-rw-r--r--libgomp/task.c3
-rw-r--r--liboffloadmic/plugin/libgomp-plugin-intelmic.cpp4
11 files changed, 299 insertions, 89 deletions
diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def
index c68fb198c85..33bee1d1e49 100644
--- a/gcc/builtin-types.def
+++ b/gcc/builtin-types.def
@@ -555,10 +555,9 @@ DEF_FUNCTION_TYPE_9 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT,
BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR,
BT_PTR_FN_VOID_PTR_PTR, BT_LONG, BT_LONG,
BT_BOOL, BT_UINT, BT_PTR, BT_INT)
-
-DEF_FUNCTION_TYPE_10 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_INT_INT,
- BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE, BT_PTR,
- BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_INT, BT_INT)
+DEF_FUNCTION_TYPE_9 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR,
+ BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE, BT_PTR,
+ BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_PTR)
DEF_FUNCTION_TYPE_11 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_UINT_LONG_INT_LONG_LONG_LONG,
BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR,
diff --git a/gcc/fortran/types.def b/gcc/fortran/types.def
index a37e856708b..5838f04868a 100644
--- a/gcc/fortran/types.def
+++ b/gcc/fortran/types.def
@@ -220,10 +220,9 @@ DEF_FUNCTION_TYPE_9 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT,
BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR,
BT_PTR_FN_VOID_PTR_PTR, BT_LONG, BT_LONG,
BT_BOOL, BT_UINT, BT_PTR, BT_INT)
-
-DEF_FUNCTION_TYPE_10 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_INT_INT,
+DEF_FUNCTION_TYPE_9 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR,
BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE, BT_PTR,
- BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_INT, BT_INT)
+ BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_PTR)
DEF_FUNCTION_TYPE_11 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_UINT_LONG_INT_LONG_LONG_LONG,
BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR,
diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def
index 35f5014a62b..35c2724f6f1 100644
--- a/gcc/omp-builtins.def
+++ b/gcc/omp-builtins.def
@@ -341,7 +341,7 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SINGLE_COPY_START, "GOMP_single_copy_start",
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SINGLE_COPY_END, "GOMP_single_copy_end",
BT_FN_VOID_PTR, ATTR_NOTHROW_LEAF_LIST)
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TARGET, "GOMP_target_ext",
- BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_INT_INT,
+ BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR,
ATTR_NOTHROW_LIST)
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TARGET_DATA, "GOMP_target_data_ext",
BT_FN_VOID_INT_SIZE_PTR_PTR_PTR, ATTR_NOTHROW_LIST)
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 8996b8dc04e..2e02c6f8bbd 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -12731,6 +12731,130 @@ mark_loops_in_oacc_kernels_region (basic_block region_entry,
loop->in_oacc_kernels_region = true;
}
+/* Build target argument identifier from the DEVICE identifier, value
+ identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
+
+static tree
+get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
+{
+ tree t = build_int_cst (integer_type_node, device);
+ if (subseqent_param)
+ t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
+ build_int_cst (integer_type_node,
+ GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
+ t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
+ build_int_cst (integer_type_node, id));
+ return t;
+}
+
+/* Like above but return it in type that can be directly stored as an element
+ of the argument array. */
+
+static tree
+get_target_argument_identifier (int device, bool subseqent_param, int id)
+{
+ tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
+ return fold_convert (ptr_type_node, t);
+}
+
+/* Return a target argument consisiting of DEVICE identifier, value identifier
+ ID, and the actual VALUE. */
+
+static tree
+get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
+ tree value)
+{
+ tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
+ fold_convert (integer_type_node, value),
+ build_int_cst (unsigned_type_node,
+ GOMP_TARGET_ARG_VALUE_SHIFT));
+ t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
+ get_target_argument_identifier_1 (device, false, id));
+ t = fold_convert (ptr_type_node, t);
+ return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
+}
+
+/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
+ push one argument to ARGS with bot the DEVICE, ID and VALUE embeded in it,
+ otherwise push an iedntifier (with DEVICE and ID) and the VALUE in two
+ arguments. */
+
+static void
+push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
+ int id, tree value, vec <tree> *args)
+{
+ if (tree_fits_shwi_p (value)
+ && tree_to_shwi (value) > -(1 << 15)
+ && tree_to_shwi (value) < (1 << 15))
+ args->quick_push (get_target_argument_value (gsi, device, id, value));
+ else
+ {
+ args->quick_push (get_target_argument_identifier (device, true, id));
+ value = fold_convert (ptr_type_node, value);
+ value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
+ GSI_SAME_STMT);
+ args->quick_push (value);
+ }
+}
+
+/* Create an array of arguments that is then passed to GOMP_target. */
+
+static tree
+get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
+{
+ auto_vec <tree, 6> args;
+ tree clauses = gimple_omp_target_clauses (tgt_stmt);
+ tree t, c = find_omp_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
+ if (c)
+ t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
+ else
+ t = integer_minus_one_node;
+ push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
+ GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
+
+ c = find_omp_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
+ if (c)
+ t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
+ else
+ t = integer_minus_one_node;
+ push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
+ GOMP_TARGET_ARG_THREAD_LIMIT, t,
+ &args);
+
+#if 0
+ /* Add HSA-specific grid sizes, if available. */
+ if (find_omp_clause (gimple_omp_target_clauses (tgt_stmt),
+ OMP_CLAUSE__GRIDDIM_))
+ {
+ t = get_target_argument_identifier (GOMP_DEVICE_HSA, true,
+ GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES);
+ args.quick_push (t);
+ args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
+ }
+#endif
+
+ /* Produce more, perhaps device specific, arguments here. */
+
+ tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
+ args.length () + 1),
+ ".omp_target_args");
+ for (unsigned i = 0; i < args.length (); i++)
+ {
+ tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
+ build_int_cst (integer_type_node, i),
+ NULL_TREE, NULL_TREE);
+ gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
+ GSI_SAME_STMT);
+ }
+ tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
+ build_int_cst (integer_type_node, args.length ()),
+ NULL_TREE, NULL_TREE);
+ gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
+ GSI_SAME_STMT);
+ TREE_ADDRESSABLE (argarray) = 1;
+ return build_fold_addr_expr (argarray);
+}
+
/* Expand the GIMPLE_OMP_TARGET starting at REGION. */
static void
@@ -13148,30 +13272,7 @@ expand_omp_target (struct omp_region *region)
depend = build_int_cst (ptr_type_node, 0);
args.quick_push (depend);
if (start_ix == BUILT_IN_GOMP_TARGET)
- {
- c = find_omp_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
- if (c)
- {
- t = fold_convert (integer_type_node,
- OMP_CLAUSE_NUM_TEAMS_EXPR (c));
- t = force_gimple_operand_gsi (&gsi, t, true, NULL,
- true, GSI_SAME_STMT);
- }
- else
- t = integer_minus_one_node;
- args.quick_push (t);
- c = find_omp_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
- if (c)
- {
- t = fold_convert (integer_type_node,
- OMP_CLAUSE_THREAD_LIMIT_EXPR (c));
- t = force_gimple_operand_gsi (&gsi, t, true, NULL,
- true, GSI_SAME_STMT);
- }
- else
- t = integer_minus_one_node;
- args.quick_push (t);
- }
+ args.quick_push (get_target_arguments (&gsi, entry_stmt));
break;
case BUILT_IN_GOACC_PARALLEL:
{
diff --git a/include/gomp-constants.h b/include/gomp-constants.h
index dffd631aff6..fef27e4c52c 100644
--- a/include/gomp-constants.h
+++ b/include/gomp-constants.h
@@ -228,4 +228,25 @@ enum gomp_map_kind
#define GOMP_LAUNCH_OP(X) (((X) >> GOMP_LAUNCH_OP_SHIFT) & 0xffff)
#define GOMP_LAUNCH_OP_MAX 0xffff
+/* Bitmask to apply in order to find out the intended device of a target
+ argument. */
+#define GOMP_TARGET_ARG_DEVICE_MASK ((1 << 7) - 1)
+/* The target argument is significant for all devices. */
+#define GOMP_TARGET_ARG_DEVICE_ALL 0
+
+/* Flag set when the subsequent element in the device-specific argument
+ values. */
+#define GOMP_TARGET_ARG_SUBSEQUENT_PARAM (1 << 7)
+
+/* Bitmask to apply to a target argument to find out the value identifier. */
+#define GOMP_TARGET_ARG_ID_MASK (((1 << 8) - 1) << 8)
+/* Target argument index of NUM_TEAMS. */
+#define GOMP_TARGET_ARG_NUM_TEAMS (1 << 8)
+/* Target argument index of THREAD_LIMIT. */
+#define GOMP_TARGET_ARG_THREAD_LIMIT (2 << 8)
+
+/* If the value is directly embeded in target argument, it should be a 16-bit
+ at most and shifted by this many bits. */
+#define GOMP_TARGET_ARG_VALUE_SHIFT 16
+
#endif
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 31ffba04d1c..1d137f102b3 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -499,6 +499,10 @@ struct gomp_target_task
struct target_mem_desc *tgt;
struct gomp_task *task;
struct gomp_team *team;
+ /* Copies of firstprivate mapped data for shared memory accelerators. */
+ void *firstprivate_copies;
+ /* Device-specific target arguments. */
+ void **args;
void *hostaddrs[];
};
@@ -765,7 +769,8 @@ extern void gomp_task_maybe_wait_for_dependencies (void **);
extern bool gomp_create_target_task (struct gomp_device_descr *,
void (*) (void *), size_t, void **,
size_t *, unsigned short *, unsigned int,
- void **, enum gomp_target_task_state);
+ void **, void **,
+ enum gomp_target_task_state);
static void inline
gomp_finish_task (struct gomp_task *task)
@@ -939,8 +944,9 @@ struct gomp_device_descr
void *(*dev2host_func) (int, void *, const void *, size_t);
void *(*host2dev_func) (int, void *, const void *, size_t);
void *(*dev2dev_func) (int, void *, const void *, size_t);
- void (*run_func) (int, void *, void *);
- void (*async_run_func) (int, void *, void *, void *);
+ bool (*can_run_func) (void *);
+ void (*run_func) (int, void *, void *, void **);
+ void (*async_run_func) (int, void *, void *, void **, void *);
/* Splay tree containing information about mapped memory regions. */
struct splay_tree_s mem_map;
diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h
index c238e6a7556..9c90d59598e 100644
--- a/libgomp/libgomp_g.h
+++ b/libgomp/libgomp_g.h
@@ -278,8 +278,7 @@ extern void GOMP_single_copy_end (void *);
extern void GOMP_target (int, void (*) (void *), const void *,
size_t, void **, size_t *, unsigned char *);
extern void GOMP_target_ext (int, void (*) (void *), size_t, void **, size_t *,
- unsigned short *, unsigned int, void **,
- int, int);
+ unsigned short *, unsigned int, void **, void **);
extern void GOMP_target_data (int, const void *,
size_t, void **, size_t *, unsigned char *);
extern void GOMP_target_data_ext (int, size_t, void **, size_t *,
diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c
index 98748041bfc..a7692116e94 100644
--- a/libgomp/oacc-host.c
+++ b/libgomp/oacc-host.c
@@ -123,7 +123,8 @@ host_host2dev (int n __attribute__ ((unused)),
}
static void
-host_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars)
+host_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars,
+ void **args __attribute__((unused)))
{
void (*fn)(void *) = (void (*)(void *)) fn_ptr;
diff --git a/libgomp/target.c b/libgomp/target.c
index cf9d0e64c7c..f990a9e7966 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -1261,15 +1261,38 @@ gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
*thr = old_thr;
}
-/* Host fallback with firstprivate map-type handling. */
+/* Calculate alignment and size requirements of a private copy of data shared
+ as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
-static void
-gomp_target_fallback_firstprivate (void (*fn) (void *), size_t mapnum,
- void **hostaddrs, size_t *sizes,
- unsigned short *kinds)
+static inline void
+calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
+ unsigned short *kinds, size_t *tgt_align,
+ size_t *tgt_size)
{
- size_t i, tgt_align = 0, tgt_size = 0;
- char *tgt = NULL;
+ size_t i;
+ for (i = 0; i < mapnum; i++)
+ if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
+ {
+ size_t align = (size_t) 1 << (kinds[i] >> 8);
+ if (*tgt_align < align)
+ *tgt_align = align;
+ *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
+ *tgt_size += sizes[i];
+ }
+}
+
+/* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
+
+static inline void
+copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
+ size_t *sizes, unsigned short *kinds, size_t tgt_align,
+ size_t tgt_size)
+{
+ uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
+ if (al)
+ tgt += tgt_align - al;
+ tgt_size = 0;
+ size_t i;
for (i = 0; i < mapnum; i++)
if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
{
@@ -1277,28 +1300,53 @@ gomp_target_fallback_firstprivate (void (*fn) (void *), size_t mapnum,
if (tgt_align < align)
tgt_align = align;
tgt_size = (tgt_size + align - 1) & ~(align - 1);
- tgt_size += sizes[i];
+ memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
+ hostaddrs[i] = tgt + tgt_size;
+ tgt_size = tgt_size + sizes[i];
}
+}
+
+/* Host fallback with firstprivate map-type handling. */
+
+static void
+gomp_target_fallback_firstprivate (void (*fn) (void *), size_t mapnum,
+ void **hostaddrs, size_t *sizes,
+ unsigned short *kinds)
+{
+ size_t tgt_align = 0, tgt_size = 0;
+ calculate_firstprivate_requirements (mapnum, sizes, kinds, &tgt_align,
+ &tgt_size);
if (tgt_align)
{
- tgt = gomp_alloca (tgt_size + tgt_align - 1);
- uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
- if (al)
- tgt += tgt_align - al;
- tgt_size = 0;
- for (i = 0; i < mapnum; i++)
- if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
- {
- size_t align = (size_t) 1 << (kinds[i] >> 8);
- tgt_size = (tgt_size + align - 1) & ~(align - 1);
- memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
- hostaddrs[i] = tgt + tgt_size;
- tgt_size = tgt_size + sizes[i];
- }
+ char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
+ copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, tgt_align,
+ tgt_size);
}
gomp_target_fallback (fn, hostaddrs);
}
+/* Handle firstprivate map-type for shared memory devices and the host
+ fallback. Return the pointer of firstprivate copies which has to be freed
+ after use. */
+
+static void *
+gomp_target_unshare_firstprivate (size_t mapnum, void **hostaddrs,
+ size_t *sizes, unsigned short *kinds)
+{
+ size_t tgt_align = 0, tgt_size = 0;
+ char *tgt = NULL;
+
+ calculate_firstprivate_requirements (mapnum, sizes, kinds, &tgt_align,
+ &tgt_size);
+ if (tgt_align)
+ {
+ tgt = gomp_malloc (tgt_size + tgt_align - 1);
+ copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, tgt_align,
+ tgt_size);
+ }
+ return tgt;
+}
+
/* Helper function of GOMP_target{,_ext} routines. */
static void *
@@ -1348,7 +1396,8 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,
struct target_mem_desc *tgt_vars
= gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
GOMP_MAP_VARS_TARGET);
- devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
+ devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
+ NULL);
gomp_unmap_vars (tgt_vars, true);
}
@@ -1356,6 +1405,15 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,
and several arguments have been added:
FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
DEPEND is array of dependencies, see GOMP_task for details.
+
+ ARGS is a pointer to an array consisting of a variable number of both
+ device-independent and device-specific arguments, which can take one two
+ elements where the first specifies for which device it is intended, the type
+ and optionally also the value. If the value is not present in the first
+ one, the whole second element the actual value. The last element of the
+ array is a single NULL. Among the device independent can be for example
+ NUM_TEAMS and THREAD_LIMIT.
+
NUM_TEAMS is positive if GOMP_teams will be called in the body with
that value, or 1 if teams construct is not present, or 0, if
teams construct does not have num_teams clause and so the choice is
@@ -1369,14 +1427,10 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,
void
GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
void **hostaddrs, size_t *sizes, unsigned short *kinds,
- unsigned int flags, void **depend, int num_teams,
- int thread_limit)
+ unsigned int flags, void **depend, void **args)
{
struct gomp_device_descr *devicep = resolve_device (device);
- (void) num_teams;
- (void) thread_limit;
-
if (flags & GOMP_TARGET_FLAG_NOWAIT)
{
struct gomp_thread *thr = gomp_thread ();
@@ -1413,7 +1467,7 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
&& !thr->task->final_task)
{
gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
- sizes, kinds, flags, depend,
+ sizes, kinds, flags, depend, args,
GOMP_TARGET_TASK_BEFORE_MAP);
return;
}
@@ -1430,20 +1484,33 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
gomp_task_maybe_wait_for_dependencies (depend);
}
+ void *fn_addr;
if (devicep == NULL
- || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
+ || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
{
gomp_target_fallback_firstprivate (fn, mapnum, hostaddrs, sizes, kinds);
return;
}
- void *fn_addr = gomp_get_target_fn_addr (devicep, fn);
-
- struct target_mem_desc *tgt_vars
- = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
- GOMP_MAP_VARS_TARGET);
- devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
- gomp_unmap_vars (tgt_vars, true);
+ struct target_mem_desc *tgt_vars;
+ void *fpc = NULL;
+ if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ {
+ fpc = gomp_target_unshare_firstprivate (mapnum, hostaddrs, sizes, kinds);
+ tgt_vars = NULL;
+ }
+ else
+ tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
+ true, GOMP_MAP_VARS_TARGET);
+ devicep->run_func (devicep->target_id, fn_addr,
+ tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
+ args);
+ if (tgt_vars)
+ gomp_unmap_vars (tgt_vars, true);
+ else
+ free (fpc);
}
/* Host fallback for GOMP_target_data{,_ext} routines. */
@@ -1552,7 +1619,7 @@ GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
mapnum, hostaddrs, sizes, kinds,
flags | GOMP_TARGET_FLAG_UPDATE,
- depend, GOMP_TARGET_TASK_DATA))
+ depend, NULL, GOMP_TARGET_TASK_DATA))
return;
}
else
@@ -1673,7 +1740,7 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
{
if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
mapnum, hostaddrs, sizes, kinds,
- flags, depend,
+ flags, depend, NULL,
GOMP_TARGET_TASK_DATA))
return;
}
@@ -1729,8 +1796,11 @@ gomp_target_task_fn (void *data)
if (ttask->fn != NULL)
{
+ void *fn_addr;
if (devicep == NULL
- || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
+ || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
{
ttask->state = GOMP_TARGET_TASK_FALLBACK;
gomp_target_fallback_firstprivate (ttask->fn, ttask->mapnum,
@@ -1745,19 +1815,31 @@ gomp_target_task_fn (void *data)
return false;
}
- void *fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn);
- ttask->tgt
- = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs, NULL,
- ttask->sizes, ttask->kinds, true,
- GOMP_MAP_VARS_TARGET);
+ void *actual_arguments;
+ if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ {
+ ttask->tgt = NULL;
+ ttask->firstprivate_copies
+ = gomp_target_unshare_firstprivate (ttask->mapnum, ttask->hostaddrs,
+ ttask->sizes, ttask->kinds);
+ actual_arguments = ttask->hostaddrs;
+ }
+ else
+ {
+ ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
+ NULL, ttask->sizes, ttask->kinds, true,
+ GOMP_MAP_VARS_TARGET);
+ actual_arguments = (void *) ttask->tgt->tgt_start;
+ }
ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
- devicep->async_run_func (devicep->target_id, fn_addr,
- (void *) ttask->tgt->tgt_start, (void *) ttask);
+ devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
+ ttask->args, (void *) ttask);
return true;
}
else if (devicep == NULL
- || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
return false;
size_t i;
@@ -2225,6 +2307,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
{
DLSYM (run);
DLSYM (async_run);
+ DLSYM_OPT (can_run, can_run);
DLSYM (dev2dev);
}
if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
diff --git a/libgomp/task.c b/libgomp/task.c
index 620facd3fea..f3b05e5ee73 100644
--- a/libgomp/task.c
+++ b/libgomp/task.c
@@ -593,7 +593,7 @@ bool
gomp_create_target_task (struct gomp_device_descr *devicep,
void (*fn) (void *), size_t mapnum, void **hostaddrs,
size_t *sizes, unsigned short *kinds,
- unsigned int flags, void **depend,
+ unsigned int flags, void **depend, void **args,
enum gomp_target_task_state state)
{
struct gomp_thread *thr = gomp_thread ();
@@ -653,6 +653,7 @@ gomp_create_target_task (struct gomp_device_descr *devicep,
ttask->devicep = devicep;
ttask->fn = fn;
ttask->mapnum = mapnum;
+ ttask->args = args;
memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
diff --git a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
index f8c172562ae..48599dd0d85 100644
--- a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
+++ b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
@@ -539,7 +539,7 @@ GOMP_OFFLOAD_dev2dev (int device, void *dst_ptr, const void *src_ptr,
extern "C" void
GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
- void *async_data)
+ void **, void *async_data)
{
TRACE ("(device = %d, tgt_fn = %p, tgt_vars = %p, async_data = %p)", device,
tgt_fn, tgt_vars, async_data);
@@ -555,7 +555,7 @@ GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
}
extern "C" void
-GOMP_OFFLOAD_run (int device, void *tgt_fn, void *tgt_vars)
+GOMP_OFFLOAD_run (int device, void *tgt_fn, void *tgt_vars, void **)
{
TRACE ("(device = %d, tgt_fn = %p, tgt_vars = %p)", device, tgt_fn, tgt_vars);