summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJon Chesterfield <jonathanchesterfield@gmail.com>2021-11-19 20:18:23 +0000
committerJon Chesterfield <jonathanchesterfield@gmail.com>2021-11-19 20:18:23 +0000
commit04954824ee158fffe6653c7eb51c07347b36ff21 (patch)
treeff546e7f59b2bcaee5052322a4fb7337a66c900f
parentb07b5bd72716625e0976a84d23652d94d8d0165a (diff)
downloadllvm-04954824ee158fffe6653c7eb51c07347b36ff21.tar.gz
[openmp][amdgpu][nfc] Simplify implicit args handling
Removes a +x/-x pair on the only store/load of a variable and deletes some nearby dead code. Also reduces the size of the implicit struct to reflect the code currently emitted by clang. Differential Revision: https://reviews.llvm.org/D114270
-rw-r--r--openmp/libomptarget/plugins/amdgpu/impl/internal.h13
-rw-r--r--openmp/libomptarget/plugins/amdgpu/impl/system.cpp30
-rw-r--r--openmp/libomptarget/plugins/amdgpu/src/rtl.cpp12
3 files changed, 10 insertions, 45 deletions
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/internal.h b/openmp/libomptarget/plugins/amdgpu/impl/internal.h
index 8bca165dd5ae..bdac98c1e101 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/internal.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/internal.h
@@ -38,13 +38,11 @@ typedef struct impl_implicit_args_s {
unsigned long offset_y;
unsigned long offset_z;
unsigned long hostcall_ptr;
- char num_gpu_queues;
- unsigned long gpu_queue_ptr;
- char num_cpu_queues;
- unsigned long cpu_worker_signals;
- unsigned long cpu_queue_ptr;
- unsigned long kernarg_template_ptr;
+ unsigned long unused0;
+ unsigned long unused1;
+ unsigned long unused2;
} impl_implicit_args_t;
+static_assert(sizeof(impl_implicit_args_t) == 56, "");
// ---------------------- Kernel Start -------------
typedef struct atl_kernel_info_s {
@@ -57,9 +55,6 @@ typedef struct atl_kernel_info_s {
uint32_t vgpr_spill_count;
uint32_t kernel_segment_size;
uint32_t num_args;
- std::vector<uint64_t> arg_alignments;
- std::vector<uint64_t> arg_offsets;
- std::vector<uint64_t> arg_sizes;
} atl_kernel_info_t;
typedef struct atl_symbol_info_s {
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
index 9151da95d6e4..4d661dc38100 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
@@ -62,26 +62,16 @@ public:
};
KernelArgMD()
- : name_(std::string()), typeName_(std::string()), size_(0), offset_(0),
- align_(0), valueKind_(ValueKind::Unknown) {}
+ : name_(std::string()), size_(0), offset_(0),
+ valueKind_(ValueKind::Unknown) {}
// fields
std::string name_;
- std::string typeName_;
uint32_t size_;
uint32_t offset_;
- uint32_t align_;
ValueKind valueKind_;
};
-class KernelMD {
-public:
- KernelMD() : kernargSegmentSize_(0ull) {}
-
- // fields
- uint64_t kernargSegmentSize_;
-};
-
static const std::map<std::string, KernelArgMD::ValueKind> ArgValueKind = {
// v3
// {"by_value", KernelArgMD::ValueKind::ByValue},
@@ -320,10 +310,6 @@ int populate_kernelArgMD(msgpack::byte_range args_element,
foronly_string(value, [&](size_t N, const unsigned char *str) {
kernelarg->name_ = std::string(str, str + N);
});
- } else if (message_is_string(key, ".type_name")) {
- foronly_string(value, [&](size_t N, const unsigned char *str) {
- kernelarg->typeName_ = std::string(str, str + N);
- });
} else if (message_is_string(key, ".size")) {
foronly_unsigned(value, [&](uint64_t x) { kernelarg->size_ = x; });
} else if (message_is_string(key, ".offset")) {
@@ -395,7 +381,7 @@ static hsa_status_t get_code_object_custom_metadata(
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
- atl_kernel_info_t info = {0, 0, 0, 0, 0, 0, 0, 0, 0, {}, {}, {}};
+ atl_kernel_info_t info = {0, 0, 0, 0, 0, 0, 0, 0, 0};
uint64_t sgpr_count, vgpr_count, sgpr_spill_count, vgpr_spill_count;
msgpack_errors += map_lookup_uint64_t(element, ".sgpr_count", &sgpr_count);
@@ -479,13 +465,10 @@ static hsa_status_t get_code_object_custom_metadata(
"iterate args map in kernel args metadata");
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
- // populate info with sizes and offsets
- info.arg_sizes.push_back(lcArg.size_);
// v3 has offset field and not align field
size_t new_offset = lcArg.offset_;
size_t padding = new_offset - offset;
offset = new_offset;
- info.arg_offsets.push_back(lcArg.offset_);
DP("Arg[%lu] \"%s\" (%u, %u)\n", i, lcArg.name_.c_str(), lcArg.size_,
lcArg.offset_);
offset += lcArg.size_;
@@ -501,12 +484,9 @@ static hsa_status_t get_code_object_custom_metadata(
}
}
- // add size of implicit args, e.g.: offset x, y and z and pipe pointer, but
- // do not count the compiler set implicit args, but set your own implicit
- // args by discounting the compiler set implicit args
+ // TODO: Probably don't want this arithmetic
info.kernel_segment_size =
- (hasHiddenArgs ? kernel_explicit_args_size : kernel_segment_size) +
- sizeof(impl_implicit_args_t);
+ (hasHiddenArgs ? kernel_explicit_args_size : kernel_segment_size);
DP("[%s: kernarg seg size] (%lu --> %u)\n", kernelName.c_str(),
kernel_segment_size, info.kernel_segment_size);
diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index 906b856c6311..71321be3e83c 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -1631,22 +1631,12 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
auto It = KernelInfoMap.find(kernelStr);
if (It != KernelInfoMap.end()) {
atl_kernel_info_t info = It->second;
- // return the size for non-implicit args
- kernarg_segment_size =
- info.kernel_segment_size - sizeof(impl_implicit_args_t);
+ kernarg_segment_size = info.kernel_segment_size;
} else {
err = HSA_STATUS_ERROR;
}
}
- // each arg is a void * in this openmp implementation
- uint32_t arg_num = kernarg_segment_size / sizeof(void *);
- std::vector<size_t> arg_sizes(arg_num);
- for (std::vector<size_t>::iterator it = arg_sizes.begin();
- it != arg_sizes.end(); it++) {
- *it = sizeof(void *);
- }
-
// default value GENERIC (in case symbol is missing from cubin file)
llvm::omp::OMPTgtExecModeFlags ExecModeVal =
llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC;