diff options
Diffstat (limited to 'libgomp/target.c')
-rw-r--r-- | libgomp/target.c | 1274 |
1 files changed, 1115 insertions, 159 deletions
diff --git a/libgomp/target.c b/libgomp/target.c index 758ece5d78c..de6a2c9c9c5 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -38,6 +38,7 @@ #endif #include <string.h> #include <assert.h> +#include <errno.h> #ifdef PLUGIN_SUPPORT #include <dlfcn.h> @@ -133,17 +134,48 @@ resolve_device (int device_id) if (device_id < 0 || device_id >= gomp_get_num_devices ()) return NULL; + gomp_mutex_lock (&devices[device_id].lock); + if (!devices[device_id].is_initialized) + gomp_init_device (&devices[device_id]); + gomp_mutex_unlock (&devices[device_id].lock); + return &devices[device_id]; } -/* Handle the case where splay_tree_lookup found oldn for newn. +static inline splay_tree_key +gomp_map_lookup (splay_tree mem_map, splay_tree_key key) +{ + if (key->host_start != key->host_end) + return splay_tree_lookup (mem_map, key); + + key->host_end++; + splay_tree_key n = splay_tree_lookup (mem_map, key); + key->host_end--; + if (n) + return n; + key->host_start--; + n = splay_tree_lookup (mem_map, key); + key->host_start++; + if (n) + return n; + return splay_tree_lookup (mem_map, key); +} + +/* Handle the case where gomp_map_lookup found oldn for newn. Helper function of gomp_map_vars. */ static inline void gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn, - splay_tree_key newn, unsigned char kind) + splay_tree_key newn, struct target_var_desc *tgt_var, + unsigned char kind) { + tgt_var->key = oldn; + tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind); + tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind); + tgt_var->offset = newn->host_start - oldn->host_start; + tgt_var->length = newn->host_end - newn->host_start; + if ((kind & GOMP_MAP_FLAG_FORCE) || oldn->host_start > newn->host_start || oldn->host_end < newn->host_end) @@ -154,14 +186,22 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn, (void *) newn->host_start, (void *) newn->host_end, (void *) oldn->host_start, (void *) oldn->host_end); } - oldn->refcount++; + + if (GOMP_MAP_ALWAYS_TO_P (kind)) + devicep->host2dev_func (devicep->target_id, + (void *) (oldn->tgt->tgt_start + oldn->tgt_offset + + newn->host_start - oldn->host_start), + (void *) newn->host_start, + newn->host_end - newn->host_start); + if (oldn->refcount != REFCOUNT_INFINITY) + oldn->refcount++; } static int -get_kind (bool is_openacc, void *kinds, int idx) +get_kind (bool short_mapkind, void *kinds, int idx) { - return is_openacc ? ((unsigned short *) kinds)[idx] - : ((unsigned char *) kinds)[idx]; + return short_mapkind ? ((unsigned short *) kinds)[idx] + : ((unsigned char *) kinds)[idx]; } static void @@ -185,20 +225,8 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr, } /* Add bias to the pointer value. */ cur_node.host_start += bias; - cur_node.host_end = cur_node.host_start + 1; - splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); - if (n == NULL) - { - /* Could be possibly zero size array section. */ - cur_node.host_end--; - n = splay_tree_lookup (mem_map, &cur_node); - if (n == NULL) - { - cur_node.host_start--; - n = splay_tree_lookup (mem_map, &cur_node); - cur_node.host_start++; - } - } + cur_node.host_end = cur_node.host_start; + splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); if (n == NULL) { gomp_mutex_unlock (&devicep->lock); @@ -218,20 +246,81 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr, sizeof (void *)); } +static void +gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n, + size_t first, size_t i, void **hostaddrs, + size_t *sizes, void *kinds) +{ + struct gomp_device_descr *devicep = tgt->device_descr; + struct splay_tree_s *mem_map = &devicep->mem_map; + struct splay_tree_key_s cur_node; + int kind; + const bool short_mapkind = true; + const int typemask = short_mapkind ? 0xff : 0x7; + + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = cur_node.host_start + sizes[i]; + splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node); + kind = get_kind (short_mapkind, kinds, i); + if (n2 + && n2->tgt == n->tgt + && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) + { + gomp_map_vars_existing (devicep, n2, &cur_node, + &tgt->list[i], kind & typemask); + return; + } + if (sizes[i] == 0) + { + if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1]) + { + cur_node.host_start--; + n2 = splay_tree_lookup (mem_map, &cur_node); + cur_node.host_start++; + if (n2 + && n2->tgt == n->tgt + && n2->host_start - n->host_start + == n2->tgt_offset - n->tgt_offset) + { + gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i], + kind & typemask); + return; + } + } + cur_node.host_end++; + n2 = splay_tree_lookup (mem_map, &cur_node); + cur_node.host_end--; + if (n2 + && n2->tgt == n->tgt + && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) + { + gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i], + kind & typemask); + return; + } + } + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("Trying to map into device [%p..%p) structure element when " + "other mapped elements from the same structure weren't mapped " + "together with it", (void *) cur_node.host_start, + (void *) cur_node.host_end); +} + attribute_hidden struct target_mem_desc * gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds, - bool is_openacc, bool is_target) + bool short_mapkind, enum gomp_map_vars_kind pragma_kind) { size_t i, tgt_align, tgt_size, not_found_cnt = 0; - const int rshift = is_openacc ? 8 : 3; - const int typemask = is_openacc ? 0xff : 0x7; + bool has_firstprivate = false; + const int rshift = short_mapkind ? 8 : 3; + const int typemask = short_mapkind ? 0xff : 0x7; struct splay_tree_s *mem_map = &devicep->mem_map; struct splay_tree_key_s cur_node; struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum); tgt->list_count = mapnum; - tgt->refcount = 1; + tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1; tgt->device_descr = devicep; if (mapnum == 0) @@ -239,7 +328,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, tgt_align = sizeof (void *); tgt_size = 0; - if (is_target) + if (pragma_kind == GOMP_MAP_VARS_TARGET) { size_t align = 4 * sizeof (void *); tgt_align = align; @@ -250,10 +339,61 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, for (i = 0; i < mapnum; i++) { - int kind = get_kind (is_openacc, kinds, i); - if (hostaddrs[i] == NULL) + int kind = get_kind (short_mapkind, kinds, i); + if (hostaddrs[i] == NULL + || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT) { - tgt->list[i] = NULL; + tgt->list[i].key = NULL; + tgt->list[i].offset = ~(uintptr_t) 0; + continue; + } + else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR) + { + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = cur_node.host_start; + splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); + if (n == NULL) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("use_device_ptr pointer wasn't mapped"); + } + cur_node.host_start -= n->host_start; + hostaddrs[i] + = (void *) (n->tgt->tgt_start + n->tgt_offset + + cur_node.host_start); + tgt->list[i].key = NULL; + tgt->list[i].offset = ~(uintptr_t) 0; + continue; + } + else if ((kind & typemask) == GOMP_MAP_STRUCT) + { + size_t first = i + 1; + size_t last = i + sizes[i]; + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = (uintptr_t) hostaddrs[last] + + sizes[last]; + tgt->list[i].key = NULL; + tgt->list[i].offset = ~(uintptr_t) 2; + splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); + if (n == NULL) + { + size_t align = (size_t) 1 << (kind >> rshift); + if (tgt_align < align) + tgt_align = align; + tgt_size -= (uintptr_t) hostaddrs[first] + - (uintptr_t) hostaddrs[i]; + tgt_size = (tgt_size + align - 1) & ~(align - 1); + tgt_size += cur_node.host_end - (uintptr_t) hostaddrs[i]; + not_found_cnt += last - i; + for (i = first; i <= last; i++) + tgt->list[i].key = NULL; + i--; + continue; + } + for (i = first; i <= last; i++) + gomp_map_fields_existing (tgt, n, first, i, hostaddrs, + sizes, kinds); + i--; continue; } cur_node.host_start = (uintptr_t) hostaddrs[i]; @@ -261,15 +401,37 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, cur_node.host_end = cur_node.host_start + sizes[i]; else cur_node.host_end = cur_node.host_start + sizeof (void *); - splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); - if (n) + if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE) + { + tgt->list[i].key = NULL; + + size_t align = (size_t) 1 << (kind >> rshift); + if (tgt_align < align) + tgt_align = align; + tgt_size = (tgt_size + align - 1) & ~(align - 1); + tgt_size += cur_node.host_end - cur_node.host_start; + has_firstprivate = true; + continue; + } + splay_tree_key n; + if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION) { - tgt->list[i] = n; - gomp_map_vars_existing (devicep, n, &cur_node, kind & typemask); + n = gomp_map_lookup (mem_map, &cur_node); + if (!n) + { + tgt->list[i].key = NULL; + tgt->list[i].offset = ~(uintptr_t) 1; + continue; + } } else + n = splay_tree_lookup (mem_map, &cur_node); + if (n) + gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i], + kind & typemask); + else { - tgt->list[i] = NULL; + tgt->list[i].key = NULL; size_t align = (size_t) 1 << (kind >> rshift); not_found_cnt++; @@ -281,7 +443,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, { size_t j; for (j = i + 1; j < mapnum; j++) - if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j) + if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j) & typemask)) break; else if ((uintptr_t) hostaddrs[j] < cur_node.host_start @@ -290,7 +452,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, break; else { - tgt->list[j] = NULL; + tgt->list[j].key = NULL; i++; } } @@ -308,7 +470,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, tgt->tgt_start = (uintptr_t) tgt->to_free; tgt->tgt_end = tgt->tgt_start + sizes[0]; } - else if (not_found_cnt || is_target) + else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET) { /* Allocate tgt_align aligned tgt_size block of memory. */ /* FIXME: Perhaps change interface to allocate properly aligned @@ -327,22 +489,74 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, } tgt_size = 0; - if (is_target) + if (pragma_kind == GOMP_MAP_VARS_TARGET) tgt_size = mapnum * sizeof (void *); tgt->array = NULL; - if (not_found_cnt) + if (not_found_cnt || has_firstprivate) { - tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array)); + if (not_found_cnt) + tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array)); splay_tree_node array = tgt->array; - size_t j; + size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0; + uintptr_t field_tgt_base = 0; for (i = 0; i < mapnum; i++) - if (tgt->list[i] == NULL) + if (tgt->list[i].key == NULL) { - int kind = get_kind (is_openacc, kinds, i); + int kind = get_kind (short_mapkind, kinds, i); if (hostaddrs[i] == NULL) continue; + switch (kind & typemask) + { + size_t align, len, first, last; + splay_tree_key n; + case GOMP_MAP_FIRSTPRIVATE: + align = (size_t) 1 << (kind >> rshift); + tgt_size = (tgt_size + align - 1) & ~(align - 1); + tgt->list[i].offset = tgt_size; + len = sizes[i]; + devicep->host2dev_func (devicep->target_id, + (void *) (tgt->tgt_start + tgt_size), + (void *) hostaddrs[i], len); + tgt_size += len; + continue; + case GOMP_MAP_FIRSTPRIVATE_INT: + case GOMP_MAP_USE_DEVICE_PTR: + case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: + continue; + case GOMP_MAP_STRUCT: + first = i + 1; + last = i + sizes[i]; + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = (uintptr_t) hostaddrs[last] + + sizes[last]; + if (tgt->list[first].key != NULL) + continue; + n = splay_tree_lookup (mem_map, &cur_node); + if (n == NULL) + { + size_t align = (size_t) 1 << (kind >> rshift); + tgt_size -= (uintptr_t) hostaddrs[first] + - (uintptr_t) hostaddrs[i]; + tgt_size = (tgt_size + align - 1) & ~(align - 1); + tgt_size += (uintptr_t) hostaddrs[first] + - (uintptr_t) hostaddrs[i]; + field_tgt_base = (uintptr_t) hostaddrs[first]; + field_tgt_offset = tgt_size; + field_tgt_clear = last; + tgt_size += cur_node.host_end + - (uintptr_t) hostaddrs[first]; + continue; + } + for (i = first; i <= last; i++) + gomp_map_fields_existing (tgt, n, first, i, hostaddrs, + sizes, kinds); + i--; + continue; + default: + break; + } splay_tree_key k = &array->key; k->host_start = (uintptr_t) hostaddrs[i]; if (!GOMP_MAP_POINTER_P (kind & typemask)) @@ -351,19 +565,31 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, k->host_end = k->host_start + sizeof (void *); splay_tree_key n = splay_tree_lookup (mem_map, k); if (n) - { - tgt->list[i] = n; - gomp_map_vars_existing (devicep, n, k, kind & typemask); - } + gomp_map_vars_existing (devicep, n, k, &tgt->list[i], + kind & typemask); else { size_t align = (size_t) 1 << (kind >> rshift); - tgt->list[i] = k; - tgt_size = (tgt_size + align - 1) & ~(align - 1); + tgt->list[i].key = k; k->tgt = tgt; - k->tgt_offset = tgt_size; - tgt_size += k->host_end - k->host_start; - k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask); + if (field_tgt_clear != ~(size_t) 0) + { + k->tgt_offset = k->host_start - field_tgt_base + + field_tgt_offset; + if (i == field_tgt_clear) + field_tgt_clear = ~(size_t) 0; + } + else + { + tgt_size = (tgt_size + align - 1) & ~(align - 1); + k->tgt_offset = tgt_size; + tgt_size += k->host_end - k->host_start; + } + tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask); + tgt->list[i].always_copy_from + = GOMP_MAP_ALWAYS_FROM_P (kind & typemask); + tgt->list[i].offset = 0; + tgt->list[i].length = k->host_end - k->host_start; k->refcount = 1; k->async_refcount = 0; tgt->refcount++; @@ -376,11 +602,14 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, case GOMP_MAP_FROM: case GOMP_MAP_FORCE_ALLOC: case GOMP_MAP_FORCE_FROM: + case GOMP_MAP_ALWAYS_FROM: break; case GOMP_MAP_TO: case GOMP_MAP_TOFROM: case GOMP_MAP_FORCE_TO: case GOMP_MAP_FORCE_TOFROM: + case GOMP_MAP_ALWAYS_TO: + case GOMP_MAP_ALWAYS_TOFROM: /* FIXME: Perhaps add some smarts, like if copying several adjacent fields from host to target, use some host buffer to avoid sending each var individually. */ @@ -403,7 +632,8 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, k->host_end - k->host_start); for (j = i + 1; j < mapnum; j++) - if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j) + if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, + j) & typemask)) break; else if ((uintptr_t) hostaddrs[j] < k->host_start @@ -412,8 +642,11 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, break; else { - tgt->list[j] = k; - k->refcount++; + tgt->list[j].key = k; + tgt->list[j].copy_from = false; + tgt->list[j].always_copy_from = false; + if (k->refcount != REFCOUNT_INFINITY) + k->refcount++; gomp_map_pointer (tgt, (uintptr_t) *(void **) hostaddrs[j], k->tgt_offset @@ -460,15 +693,30 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, } } - if (is_target) + if (pragma_kind == GOMP_MAP_VARS_TARGET) { for (i = 0; i < mapnum; i++) { - if (tgt->list[i] == NULL) - cur_node.tgt_offset = (uintptr_t) NULL; + if (tgt->list[i].key == NULL) + { + if (tgt->list[i].offset == ~(uintptr_t) 0) + cur_node.tgt_offset = (uintptr_t) hostaddrs[i]; + else if (tgt->list[i].offset == ~(uintptr_t) 1) + cur_node.tgt_offset = 0; + else if (tgt->list[i].offset == ~(uintptr_t) 2) + cur_node.tgt_offset = tgt->list[i + 1].key->tgt->tgt_start + + tgt->list[i + 1].key->tgt_offset + + tgt->list[i + 1].offset + + (uintptr_t) hostaddrs[i] + - (uintptr_t) hostaddrs[i + 1]; + else + cur_node.tgt_offset = tgt->tgt_start + + tgt->list[i].offset; + } else - cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start - + tgt->list[i]->tgt_offset; + cur_node.tgt_offset = tgt->list[i].key->tgt->tgt_start + + tgt->list[i].key->tgt_offset + + tgt->list[i].offset; /* FIXME: see above FIXME comment. */ devicep->host2dev_func (devicep->target_id, (void *) (tgt->tgt_start @@ -478,6 +726,15 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, } } + /* If the variable from "omp target enter data" map-list was already mapped, + tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or + gomp_exit_data. */ + if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0) + { + free (tgt); + tgt = NULL; + } + gomp_mutex_unlock (&devicep->lock); return tgt; } @@ -508,17 +765,17 @@ gomp_copy_from_async (struct target_mem_desc *tgt) gomp_mutex_lock (&devicep->lock); for (i = 0; i < tgt->list_count; i++) - if (tgt->list[i] == NULL) + if (tgt->list[i].key == NULL) ; - else if (tgt->list[i]->refcount > 1) + else if (tgt->list[i].key->refcount > 1) { - tgt->list[i]->refcount--; - tgt->list[i]->async_refcount++; + tgt->list[i].key->refcount--; + tgt->list[i].key->async_refcount++; } else { - splay_tree_key k = tgt->list[i]; - if (k->copy_from) + splay_tree_key k = tgt->list[i].key; + if (tgt->list[i].copy_from) devicep->dev2host_func (devicep->target_id, (void *) k->host_start, (void *) (k->tgt->tgt_start + k->tgt_offset), k->host_end - k->host_start); @@ -546,25 +803,41 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom) size_t i; for (i = 0; i < tgt->list_count; i++) - if (tgt->list[i] == NULL) - ; - else if (tgt->list[i]->refcount > 1) - tgt->list[i]->refcount--; - else if (tgt->list[i]->async_refcount > 0) - tgt->list[i]->async_refcount--; - else - { - splay_tree_key k = tgt->list[i]; - if (k->copy_from && do_copyfrom) - devicep->dev2host_func (devicep->target_id, (void *) k->host_start, - (void *) (k->tgt->tgt_start + k->tgt_offset), - k->host_end - k->host_start); - splay_tree_remove (&devicep->mem_map, k); - if (k->tgt->refcount > 1) - k->tgt->refcount--; - else - gomp_unmap_tgt (k->tgt); - } + { + splay_tree_key k = tgt->list[i].key; + if (k == NULL) + continue; + + bool do_unmap = false; + if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY) + k->refcount--; + else if (k->refcount == 1) + { + if (k->async_refcount > 0) + k->async_refcount--; + else + { + k->refcount--; + do_unmap = true; + } + } + + if ((do_unmap && do_copyfrom && tgt->list[i].copy_from) + || tgt->list[i].always_copy_from) + devicep->dev2host_func (devicep->target_id, + (void *) (k->host_start + tgt->list[i].offset), + (void *) (k->tgt->tgt_start + k->tgt_offset + + tgt->list[i].offset), + tgt->list[i].length); + if (do_unmap) + { + splay_tree_remove (&devicep->mem_map, k); + if (k->tgt->refcount > 1) + k->tgt->refcount--; + else + gomp_unmap_tgt (k->tgt); + } + } if (tgt->refcount > 1) tgt->refcount--; @@ -576,11 +849,11 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom) static void gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, - size_t *sizes, void *kinds, bool is_openacc) + size_t *sizes, void *kinds, bool short_mapkind) { size_t i; struct splay_tree_key_s cur_node; - const int typemask = is_openacc ? 0xff : 0x7; + const int typemask = short_mapkind ? 0xff : 0x7; if (!devicep) return; @@ -597,7 +870,7 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node); if (n) { - int kind = get_kind (is_openacc, kinds, i); + int kind = get_kind (short_mapkind, kinds, i); if (n->host_start > cur_node.host_start || n->host_end < cur_node.host_end) { @@ -626,13 +899,6 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, - n->host_start), cur_node.host_end - cur_node.host_start); } - else - { - gomp_mutex_unlock (&devicep->lock); - gomp_fatal ("Trying to update [%p..%p) object that is not mapped", - (void *) cur_node.host_start, - (void *) cur_node.host_end); - } } gomp_mutex_unlock (&devicep->lock); } @@ -678,7 +944,7 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, /* Insert host-target address mapping into splay tree. */ struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array)); - tgt->refcount = 1; + tgt->refcount = REFCOUNT_INFINITY; tgt->tgt_start = 0; tgt->tgt_end = 0; tgt->to_free = NULL; @@ -694,9 +960,8 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, k->host_end = k->host_start + 1; k->tgt = tgt; k->tgt_offset = target_table[i].start; - k->refcount = 1; + k->refcount = REFCOUNT_INFINITY; k->async_refcount = 0; - k->copy_from = false; array->left = NULL; array->right = NULL; splay_tree_insert (&devicep->mem_map, array); @@ -720,9 +985,8 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, k->host_end = k->host_start + (uintptr_t) host_var_table[i * 2 + 1]; k->tgt = tgt; k->tgt_offset = target_var->start; - k->refcount = 1; + k->refcount = REFCOUNT_INFINITY; k->async_refcount = 0; - k->copy_from = false; array->left = NULL; array->right = NULL; splay_tree_insert (&devicep->mem_map, array); @@ -945,6 +1209,47 @@ gomp_fini_device (struct gomp_device_descr *devicep) devicep->is_initialized = false; } +/* Host fallback for GOMP_target{,_41} routines. */ + +static void +gomp_target_fallback (void (*fn) (void *), void **hostaddrs) +{ + struct gomp_thread old_thr, *thr = gomp_thread (); + old_thr = *thr; + memset (thr, '\0', sizeof (*thr)); + if (gomp_places_list) + { + thr->place = old_thr.place; + thr->ts.place_partition_len = gomp_places_list_len; + } + fn (hostaddrs); + gomp_free_thread (thr); + *thr = old_thr; +} + +/* Helper function of GOMP_target{,_41} routines. */ + +static void * +gomp_get_target_fn_addr (struct gomp_device_descr *devicep, + void (*host_fn) (void *)) +{ + if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC) + return (void *) host_fn; + else + { + gomp_mutex_lock (&devicep->lock); + struct splay_tree_key_s k; + k.host_start = (uintptr_t) host_fn; + k.host_end = k.host_start + 1; + splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k); + gomp_mutex_unlock (&devicep->lock); + if (tgt_fn == NULL) + gomp_fatal ("Target function wasn't mapped"); + + return (void *) tgt_fn->tgt_offset; + } +} + /* Called when encountering a target directive. If DEVICE is GOMP_DEVICE_ICV, it means use device-var ICV. If it is GOMP_DEVICE_HOST_FALLBACK (or any value @@ -964,51 +1269,85 @@ GOMP_target (int device, void (*fn) (void *), const void *unused, if (devicep == NULL || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return gomp_target_fallback (fn, hostaddrs); + + void *fn_addr = gomp_get_target_fn_addr (devicep, fn); + + struct target_mem_desc *tgt_vars + = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, + GOMP_MAP_VARS_TARGET); + struct gomp_thread old_thr, *thr = gomp_thread (); + old_thr = *thr; + memset (thr, '\0', sizeof (*thr)); + if (gomp_places_list) { - /* Host fallback. */ - struct gomp_thread old_thr, *thr = gomp_thread (); - old_thr = *thr; - memset (thr, '\0', sizeof (*thr)); - if (gomp_places_list) - { - thr->place = old_thr.place; - thr->ts.place_partition_len = gomp_places_list_len; - } - fn (hostaddrs); - gomp_free_thread (thr); - *thr = old_thr; - return; + thr->place = old_thr.place; + thr->ts.place_partition_len = gomp_places_list_len; } + devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start); + gomp_free_thread (thr); + *thr = old_thr; + gomp_unmap_vars (tgt_vars, true); +} - gomp_mutex_lock (&devicep->lock); - if (!devicep->is_initialized) - gomp_init_device (devicep); - gomp_mutex_unlock (&devicep->lock); +void +GOMP_target_41 (int device, void (*fn) (void *), size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned short *kinds, + unsigned int flags, void **depend) +{ + struct gomp_device_descr *devicep = resolve_device (device); - void *fn_addr; + /* If there are depend clauses, but nowait is not present, + block the parent task until the dependencies are resolved + and then just continue with the rest of the function as if it + is a merged task. */ + if (depend != NULL) + { + struct gomp_thread *thr = gomp_thread (); + if (thr->task && thr->task->depend_hash) + gomp_task_maybe_wait_for_dependencies (depend); + } - if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC) - fn_addr = (void *) fn; - else + if (devicep == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) { - gomp_mutex_lock (&devicep->lock); - struct splay_tree_key_s k; - k.host_start = (uintptr_t) fn; - k.host_end = k.host_start + 1; - splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k); - if (tgt_fn == NULL) + size_t i, tgt_align = 0, tgt_size = 0; + char *tgt = NULL; + for (i = 0; i < mapnum; i++) + if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) + { + size_t align = (size_t) 1 << (kinds[i] >> 8); + if (tgt_align < align) + tgt_align = align; + tgt_size = (tgt_size + align - 1) & ~(align - 1); + tgt_size += sizes[i]; + } + if (tgt_align) { - gomp_mutex_unlock (&devicep->lock); - gomp_fatal ("Target function wasn't mapped"); + tgt = gomp_alloca (tgt_size + tgt_align - 1); + uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); + if (al) + tgt += tgt_align - al; + tgt_size = 0; + for (i = 0; i < mapnum; i++) + if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) + { + size_t align = (size_t) 1 << (kinds[i] >> 8); + tgt_size = (tgt_size + align - 1) & ~(align - 1); + memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); + hostaddrs[i] = tgt + tgt_size; + tgt_size = tgt_size + sizes[i]; + } } - gomp_mutex_unlock (&devicep->lock); - - fn_addr = (void *) tgt_fn->tgt_offset; + gomp_target_fallback (fn, hostaddrs); + return; } + void *fn_addr = gomp_get_target_fn_addr (devicep, fn); + struct target_mem_desc *tgt_vars - = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, - true); + = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, + GOMP_MAP_VARS_TARGET); struct gomp_thread old_thr, *thr = gomp_thread (); old_thr = *thr; memset (thr, '\0', sizeof (*thr)); @@ -1023,6 +1362,26 @@ GOMP_target (int device, void (*fn) (void *), const void *unused, gomp_unmap_vars (tgt_vars, true); } +/* Host fallback for GOMP_target_data{,_41} routines. */ + +static void +gomp_target_data_fallback (void) +{ + struct gomp_task_icv *icv = gomp_icv (false); + if (icv->target_data) + { + /* Even when doing a host fallback, if there are any active + #pragma omp target data constructs, need to remember the + new #pragma omp target data, otherwise GOMP_target_end_data + would get out of sync. */ + struct target_mem_desc *tgt + = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, + GOMP_MAP_VARS_DATA); + tgt->prev = icv->target_data; + icv->target_data = tgt; + } +} + void GOMP_target_data (int device, const void *unused, size_t mapnum, void **hostaddrs, size_t *sizes, unsigned char *kinds) @@ -1031,31 +1390,29 @@ GOMP_target_data (int device, const void *unused, size_t mapnum, if (devicep == NULL || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) - { - /* Host fallback. */ - struct gomp_task_icv *icv = gomp_icv (false); - if (icv->target_data) - { - /* Even when doing a host fallback, if there are any active - #pragma omp target data constructs, need to remember the - new #pragma omp target data, otherwise GOMP_target_end_data - would get out of sync. */ - struct target_mem_desc *tgt - = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false); - tgt->prev = icv->target_data; - icv->target_data = tgt; - } - return; - } - - gomp_mutex_lock (&devicep->lock); - if (!devicep->is_initialized) - gomp_init_device (devicep); - gomp_mutex_unlock (&devicep->lock); + return gomp_target_data_fallback (); struct target_mem_desc *tgt = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, - false); + GOMP_MAP_VARS_DATA); + struct gomp_task_icv *icv = gomp_icv (true); + tgt->prev = icv->target_data; + icv->target_data = tgt; +} + +void +GOMP_target_data_41 (int device, size_t mapnum, void **hostaddrs, size_t *sizes, + unsigned short *kinds) +{ + struct gomp_device_descr *devicep = resolve_device (device); + + if (devicep == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return gomp_target_data_fallback (); + + struct target_mem_desc *tgt + = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, + GOMP_MAP_VARS_DATA); struct gomp_task_icv *icv = gomp_icv (true); tgt->prev = icv->target_data; icv->target_data = tgt; @@ -1083,12 +1440,230 @@ GOMP_target_update (int device, const void *unused, size_t mapnum, || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) return; + gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false); +} + +void +GOMP_target_update_41 (int device, size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds, + unsigned int flags, void **depend) +{ + struct gomp_device_descr *devicep = resolve_device (device); + + /* If there are depend clauses, but nowait is not present, + block the parent task until the dependencies are resolved + and then just continue with the rest of the function as if it + is a merged task. Until we are able to schedule task during + variable mapping or unmapping, ignore nowait if depend clauses + are not present. */ + if (depend != NULL) + { + struct gomp_thread *thr = gomp_thread (); + if (thr->task && thr->task->depend_hash) + { + if ((flags & GOMP_TARGET_FLAG_NOWAIT) + && thr->ts.team + && !thr->task->final_task) + { + gomp_create_target_task (devicep, (void (*) (void *)) NULL, + mapnum, hostaddrs, sizes, kinds, + flags | GOMP_TARGET_FLAG_UPDATE, + depend); + return; + } + + struct gomp_team *team = thr->ts.team; + /* If parallel or taskgroup has been cancelled, don't start new + tasks. */ + if (team + && (gomp_team_barrier_cancelled (&team->barrier) + || (thr->task->taskgroup + && thr->task->taskgroup->cancelled))) + return; + + gomp_task_maybe_wait_for_dependencies (depend); + } + } + + if (devicep == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return; + + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + /* If parallel or taskgroup has been cancelled, don't start new tasks. */ + if (team + && (gomp_team_barrier_cancelled (&team->barrier) + || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) + return; + + gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true); +} + +static void +gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned short *kinds) +{ + const int typemask = 0xff; + size_t i; gomp_mutex_lock (&devicep->lock); - if (!devicep->is_initialized) - gomp_init_device (devicep); + for (i = 0; i < mapnum; i++) + { + struct splay_tree_key_s cur_node; + unsigned char kind = kinds[i] & typemask; + switch (kind) + { + case GOMP_MAP_FROM: + case GOMP_MAP_ALWAYS_FROM: + case GOMP_MAP_DELETE: + case GOMP_MAP_RELEASE: + case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: + case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION: + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = cur_node.host_start + sizes[i]; + splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION + || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION) + ? gomp_map_lookup (&devicep->mem_map, &cur_node) + : splay_tree_lookup (&devicep->mem_map, &cur_node); + if (!k) + continue; + + if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY) + k->refcount--; + if ((kind == GOMP_MAP_DELETE + || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION) + && k->refcount != REFCOUNT_INFINITY) + k->refcount = 0; + + if ((kind == GOMP_MAP_FROM && k->refcount == 0) + || kind == GOMP_MAP_ALWAYS_FROM) + devicep->dev2host_func (devicep->target_id, + (void *) cur_node.host_start, + (void *) (k->tgt->tgt_start + k->tgt_offset + + cur_node.host_start + - k->host_start), + cur_node.host_end - cur_node.host_start); + if (k->refcount == 0) + { + splay_tree_remove (&devicep->mem_map, k); + if (k->tgt->refcount > 1) + k->tgt->refcount--; + else + gomp_unmap_tgt (k->tgt); + } + + break; + default: + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x", + kind); + } + } + gomp_mutex_unlock (&devicep->lock); +} - gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false); +void +GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds, + unsigned int flags, void **depend) +{ + struct gomp_device_descr *devicep = resolve_device (device); + + /* If there are depend clauses, but nowait is not present, + block the parent task until the dependencies are resolved + and then just continue with the rest of the function as if it + is a merged task. Until we are able to schedule task during + variable mapping or unmapping, ignore nowait if depend clauses + are not present. */ + if (depend != NULL) + { + struct gomp_thread *thr = gomp_thread (); + if (thr->task && thr->task->depend_hash) + { + if ((flags & GOMP_TARGET_FLAG_NOWAIT) + && thr->ts.team + && !thr->task->final_task) + { + gomp_create_target_task (devicep, (void (*) (void *)) NULL, + mapnum, hostaddrs, sizes, kinds, + flags, depend); + return; + } + + struct gomp_team *team = thr->ts.team; + /* If parallel or taskgroup has been cancelled, don't start new + tasks. */ + if (team + && (gomp_team_barrier_cancelled (&team->barrier) + || (thr->task->taskgroup + && thr->task->taskgroup->cancelled))) + return; + + gomp_task_maybe_wait_for_dependencies (depend); + } + } + + if (devicep == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return; + + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + /* If parallel or taskgroup has been cancelled, don't start new tasks. */ + if (team + && (gomp_team_barrier_cancelled (&team->barrier) + || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) + return; + + size_t i; + if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) + for (i = 0; i < mapnum; i++) + if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT) + { + gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i], + &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA); + i += sizes[i]; + } + else + gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i], + true, GOMP_MAP_VARS_ENTER_DATA); + else + gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds); +} + +void +gomp_target_task_fn (void *data) +{ + struct gomp_target_task *ttask = (struct gomp_target_task *) data; + if (ttask->fn != NULL) + { + /* GOMP_target_41 */ + } + else if (ttask->devicep == NULL + || !(ttask->devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return; + + size_t i; + if (ttask->flags & GOMP_TARGET_FLAG_UPDATE) + gomp_update (ttask->devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes, + ttask->kinds, true); + else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) + for (i = 0; i < ttask->mapnum; i++) + if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT) + { + gomp_map_vars (ttask->devicep, ttask->sizes[i] + 1, + &ttask->hostaddrs[i], NULL, &ttask->sizes[i], + &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA); + i += ttask->sizes[i]; + } + else + gomp_map_vars (ttask->devicep, 1, &ttask->hostaddrs[i], NULL, + &ttask->sizes[i], &ttask->kinds[i], + true, GOMP_MAP_VARS_ENTER_DATA); + else + gomp_exit_data (ttask->devicep, ttask->mapnum, ttask->hostaddrs, + ttask->sizes, ttask->kinds); } void @@ -1103,6 +1678,384 @@ GOMP_teams (unsigned int num_teams, unsigned int thread_limit) (void) num_teams; } +void * +omp_target_alloc (size_t size, int device_num) +{ + if (device_num == GOMP_DEVICE_HOST_FALLBACK) + return malloc (size); + + if (device_num < 0) + return NULL; + + struct gomp_device_descr *devicep = resolve_device (device_num); + if (devicep == NULL) + return NULL; + + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return malloc (size); + + gomp_mutex_lock (&devicep->lock); + void *ret = devicep->alloc_func (devicep->target_id, size); + gomp_mutex_unlock (&devicep->lock); + return ret; +} + +void +omp_target_free (void *device_ptr, int device_num) +{ + if (device_ptr == NULL) + return; + + if (device_num == GOMP_DEVICE_HOST_FALLBACK) + { + free (device_ptr); + return; + } + + if (device_num < 0) + return; + + struct gomp_device_descr *devicep = resolve_device (device_num); + if (devicep == NULL) + return; + + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + { + free (device_ptr); + return; + } + + gomp_mutex_lock (&devicep->lock); + devicep->free_func (devicep->target_id, device_ptr); + gomp_mutex_unlock (&devicep->lock); +} + +int +omp_target_is_present (void *ptr, int device_num) +{ + if (ptr == NULL) + return 1; + + if (device_num == GOMP_DEVICE_HOST_FALLBACK) + return 1; + + if (device_num < 0) + return 0; + + struct gomp_device_descr *devicep = resolve_device (device_num); + if (devicep == NULL) + return 0; + + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return 1; + + gomp_mutex_lock (&devicep->lock); + struct splay_tree_s *mem_map = &devicep->mem_map; + struct splay_tree_key_s cur_node; + + cur_node.host_start = (uintptr_t) ptr; + cur_node.host_end = cur_node.host_start; + splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); + int ret = n != NULL; + gomp_mutex_unlock (&devicep->lock); + return ret; +} + +int +omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset, + size_t src_offset, int dst_device_num, int src_device_num) +{ + struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; + + if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK) + { + if (dst_device_num < 0) + return EINVAL; + + dst_devicep = resolve_device (dst_device_num); + if (dst_devicep == NULL) + return EINVAL; + + if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + dst_devicep = NULL; + } + if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) + { + if (src_device_num < 0) + return EINVAL; + + src_devicep = resolve_device (src_device_num); + if (src_devicep == NULL) + return EINVAL; + + if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + src_devicep = NULL; + } + if (src_devicep == NULL && dst_devicep == NULL) + { + memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length); + return 0; + } + if (src_devicep == NULL) + { + gomp_mutex_lock (&dst_devicep->lock); + dst_devicep->host2dev_func (dst_devicep->target_id, + (char *) dst + dst_offset, + (char *) src + src_offset, length); + gomp_mutex_unlock (&dst_devicep->lock); + return 0; + } + if (dst_devicep == NULL) + { + gomp_mutex_lock (&src_devicep->lock); + src_devicep->dev2host_func (src_devicep->target_id, + (char *) dst + dst_offset, + (char *) src + src_offset, length); + gomp_mutex_unlock (&src_devicep->lock); + return 0; + } + if (src_devicep == dst_devicep) + { + gomp_mutex_lock (&src_devicep->lock); + src_devicep->dev2dev_func (src_devicep->target_id, + (char *) dst + dst_offset, + (char *) src + src_offset, length); + gomp_mutex_unlock (&src_devicep->lock); + return 0; + } + return EINVAL; +} + +static int +omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size, + int num_dims, const size_t *volume, + const size_t *dst_offsets, + const size_t *src_offsets, + const size_t *dst_dimensions, + const size_t *src_dimensions, + struct gomp_device_descr *dst_devicep, + struct gomp_device_descr *src_devicep) +{ + size_t dst_slice = element_size; + size_t src_slice = element_size; + size_t j, dst_off, src_off, length; + int i, ret; + + if (num_dims == 1) + { + if (__builtin_mul_overflow (element_size, volume[0], &length) + || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off) + || __builtin_mul_overflow (element_size, src_offsets[0], &src_off)) + return EINVAL; + if (dst_devicep == NULL && src_devicep == NULL) + memcpy ((char *) dst + dst_off, (char *) src + src_off, length); + else if (src_devicep == NULL) + dst_devicep->host2dev_func (dst_devicep->target_id, + (char *) dst + dst_off, + (char *) src + src_off, length); + else if (dst_devicep == NULL) + src_devicep->dev2host_func (src_devicep->target_id, + (char *) dst + dst_off, + (char *) src + src_off, length); + else if (src_devicep == dst_devicep) + src_devicep->dev2dev_func (src_devicep->target_id, + (char *) dst + dst_off, + (char *) src + src_off, length); + else + return EINVAL; + return 0; + } + + /* FIXME: it would be nice to have some plugin function to handle + num_dims == 2 and num_dims == 3 more efficiently. Larger ones can + be handled in the generic recursion below, and for host-host it + should be used even for any num_dims >= 2. */ + + for (i = 1; i < num_dims; i++) + if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice) + || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice)) + return EINVAL; + if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off) + || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off)) + return EINVAL; + for (j = 0; j < volume[0]; j++) + { + ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off, + (char *) src + src_off, + element_size, num_dims - 1, + volume + 1, dst_offsets + 1, + src_offsets + 1, dst_dimensions + 1, + src_dimensions + 1, dst_devicep, + src_devicep); + if (ret) + return ret; + dst_off += dst_slice; + src_off += src_slice; + } + return 0; +} + +int +omp_target_memcpy_rect (void *dst, void *src, size_t element_size, + int num_dims, const size_t *volume, + const size_t *dst_offsets, + const size_t *src_offsets, + const size_t *dst_dimensions, + const size_t *src_dimensions, + int dst_device_num, int src_device_num) +{ + struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; + + if (!dst && !src) + return INT_MAX; + + if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK) + { + if (dst_device_num < 0) + return EINVAL; + + dst_devicep = resolve_device (dst_device_num); + if (dst_devicep == NULL) + return EINVAL; + + if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + dst_devicep = NULL; + } + if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) + { + if (src_device_num < 0) + return EINVAL; + + src_devicep = resolve_device (src_device_num); + if (src_devicep == NULL) + return EINVAL; + + if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + src_devicep = NULL; + } + + if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep) + return EINVAL; + + if (src_devicep) + gomp_mutex_lock (&src_devicep->lock); + else if (dst_devicep) + gomp_mutex_lock (&dst_devicep->lock); + int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims, + volume, dst_offsets, src_offsets, + dst_dimensions, src_dimensions, + dst_devicep, src_devicep); + if (src_devicep) + gomp_mutex_unlock (&src_devicep->lock); + else if (dst_devicep) + gomp_mutex_unlock (&dst_devicep->lock); + return ret; +} + +int +omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size, + size_t device_offset, int device_num) +{ + if (device_num == GOMP_DEVICE_HOST_FALLBACK) + return EINVAL; + + if (device_num < 0) + return EINVAL; + + struct gomp_device_descr *devicep = resolve_device (device_num); + if (devicep == NULL) + return EINVAL; + + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return EINVAL; + + gomp_mutex_lock (&devicep->lock); + + struct splay_tree_s *mem_map = &devicep->mem_map; + struct splay_tree_key_s cur_node; + int ret = EINVAL; + + cur_node.host_start = (uintptr_t) host_ptr; + cur_node.host_end = cur_node.host_start + size; + splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); + if (n) + { + if (n->tgt->tgt_start + n->tgt_offset + == (uintptr_t) device_ptr + device_offset + && n->host_start <= cur_node.host_start + && n->host_end >= cur_node.host_end) + ret = 0; + } + else + { + struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); + tgt->array = gomp_malloc (sizeof (*tgt->array)); + tgt->refcount = 1; + tgt->tgt_start = 0; + tgt->tgt_end = 0; + tgt->to_free = NULL; + tgt->prev = NULL; + tgt->list_count = 0; + tgt->device_descr = devicep; + splay_tree_node array = tgt->array; + splay_tree_key k = &array->key; + k->host_start = cur_node.host_start; + k->host_end = cur_node.host_end; + k->tgt = tgt; + k->tgt_offset = (uintptr_t) device_ptr + device_offset; + k->refcount = REFCOUNT_INFINITY; + k->async_refcount = 0; + array->left = NULL; + array->right = NULL; + splay_tree_insert (&devicep->mem_map, array); + ret = 0; + } + gomp_mutex_unlock (&devicep->lock); + return ret; +} + +int +omp_target_disassociate_ptr (void *ptr, int device_num) +{ + if (device_num == GOMP_DEVICE_HOST_FALLBACK) + return EINVAL; + + if (device_num < 0) + return EINVAL; + + struct gomp_device_descr *devicep = resolve_device (device_num); + if (devicep == NULL) + return EINVAL; + + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + return EINVAL; + + gomp_mutex_lock (&devicep->lock); + + struct splay_tree_s *mem_map = &devicep->mem_map; + struct splay_tree_key_s cur_node; + int ret = EINVAL; + + cur_node.host_start = (uintptr_t) ptr; + cur_node.host_end = cur_node.host_start; + splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); + if (n + && n->host_start == cur_node.host_start + && n->refcount == REFCOUNT_INFINITY + && n->tgt->tgt_start == 0 + && n->tgt->to_free == NULL + && n->tgt->refcount == 1 + && n->tgt->list_count == 0) + { + splay_tree_remove (&devicep->mem_map, n); + gomp_unmap_tgt (n->tgt); + ret = 0; + } + + gomp_mutex_unlock (&devicep->lock); + return ret; +} + #ifdef PLUGIN_SUPPORT /* This function tries to load a plugin for DEVICE. Name of plugin is passed @@ -1153,7 +2106,10 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device, DLSYM (host2dev); device->capabilities = device->get_caps_func (); if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) - DLSYM (run); + { + DLSYM (run); + DLSYM (dev2dev); + } if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) { if (!DLSYM_OPT (openacc.exec, openacc_parallel) |