summaryrefslogtreecommitdiff
path: root/erts/emulator/beam/jit/x86/beam_asm.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'erts/emulator/beam/jit/x86/beam_asm.cpp')
-rw-r--r--erts/emulator/beam/jit/x86/beam_asm.cpp914
1 files changed, 914 insertions, 0 deletions
diff --git a/erts/emulator/beam/jit/x86/beam_asm.cpp b/erts/emulator/beam/jit/x86/beam_asm.cpp
new file mode 100644
index 0000000000..4368cc9479
--- /dev/null
+++ b/erts/emulator/beam/jit/x86/beam_asm.cpp
@@ -0,0 +1,914 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2020-2020. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+#include "beam_asm.hpp"
+
+extern "C"
+{
+#include "bif.h"
+#include "beam_common.h"
+#include "code_ix.h"
+#include "export.h"
+}
+
+/* Global configuration variables (under the `+J` prefix) */
+#ifdef HAVE_LINUX_PERF_SUPPORT
+int erts_jit_perf_support;
+#endif
+
+/*
+ * Special Beam instructions.
+ */
+
+ErtsCodePtr beam_apply;
+ErtsCodePtr beam_normal_exit;
+ErtsCodePtr beam_exit;
+ErtsCodePtr beam_export_trampoline;
+ErtsCodePtr beam_bif_export_trap;
+ErtsCodePtr beam_continue_exit;
+ErtsCodePtr beam_save_calls;
+
+/* NOTE These should be the only variables containing trace instructions.
+** Sometimes tests are for the instruction value, and sometimes
+** for the variable reference (one of these), and rogue references
+** will most likely cause chaos.
+*/
+ErtsCodePtr beam_return_to_trace; /* OpCode(i_return_to_trace) */
+ErtsCodePtr beam_return_trace; /* OpCode(i_return_trace) */
+ErtsCodePtr beam_exception_trace; /* UGLY also OpCode(i_return_trace) */
+ErtsCodePtr beam_return_time_trace; /* OpCode(i_return_time_trace) */
+
+static JitAllocator *jit_allocator;
+
+static BeamGlobalAssembler *bga;
+static BeamModuleAssembler *bma;
+static CpuInfo cpuinfo;
+
+static void beamasm_init_gdb_jit_info(void);
+
+/*
+ * Enter all BIFs into the export table.
+ *
+ * Note that they will all call the error_handler until their modules have been
+ * loaded, which may prevent the system from booting if BIFs from non-preloaded
+ * modules are apply/3'd while loading code. Ordinary BIF calls will work fine
+ * however since they won't go through export entries.
+ */
+static void install_bifs(void) {
+ typedef Eterm (*bif_func_type)(Process *, Eterm *, ErtsCodePtr);
+ int i;
+
+ ASSERT(beam_export_trampoline != NULL);
+ ASSERT(beam_save_calls != NULL);
+
+ for (i = 0; i < BIF_SIZE; i++) {
+ BifEntry *entry;
+ Export *ep;
+ int j;
+
+ entry = &bif_table[i];
+
+ ep = erts_export_put(entry->module, entry->name, entry->arity);
+
+ ep->info.op = op_i_func_info_IaaI;
+ ep->info.mfa.module = entry->module;
+ ep->info.mfa.function = entry->name;
+ ep->info.mfa.arity = entry->arity;
+ ep->bif_number = i;
+
+ for (j = 0; j < ERTS_NUM_CODE_IX; j++) {
+ erts_activate_export_trampoline(ep, j);
+ }
+
+ /* Set up a hidden export entry so we can trap to this BIF without
+ * it being seen when tracing. */
+ erts_init_trap_export(BIF_TRAP_EXPORT(i),
+ entry->module,
+ entry->name,
+ entry->arity,
+ (bif_func_type)entry->f);
+ }
+}
+
+static JitAllocator *create_allocator(JitAllocator::CreateParams *params) {
+ void *test_ro, *test_rw;
+ Error err;
+
+ auto *allocator = new JitAllocator(params);
+
+ err = allocator->alloc(&test_ro, &test_rw, 1);
+ allocator->release(test_ro);
+
+ if (err == ErrorCode::kErrorOk) {
+ return allocator;
+ }
+
+ delete allocator;
+ return nullptr;
+}
+
+static JitAllocator *pick_allocator() {
+ JitAllocator::CreateParams single_params;
+ single_params.reset();
+
+#if defined(HAVE_LINUX_PERF_SUPPORT)
+ /* `perf` has a hard time showing symbols for dual-mapped memory, so we'll
+ * use single-mapped memory when enabled. */
+ if (erts_jit_perf_support & (BEAMASM_PERF_DUMP | BEAMASM_PERF_MAP)) {
+ if (auto *alloc = create_allocator(&single_params)) {
+ return alloc;
+ }
+
+ ERTS_INTERNAL_ERROR("jit: Failed to allocate executable+writable "
+ "memory. Either allow this or disable the "
+ "'+JPperf' option.");
+ }
+#endif
+
+#if !defined(VALGRIND)
+ /* Default to dual-mapped memory with separate executable and writable
+ * regions of the same code. This is required for platforms that enforce
+ * W^X, and we prefer it when available to catch errors sooner.
+ *
+ * `blockSize` is analogous to "carrier size," and we pick something
+ * much larger than the default since dual-mapping implies having one
+ * file descriptor per block on most platforms. The block sizes do grow
+ * over time, but we don't want to waste half a dozen fds just to get to
+ * the shell on platforms that are very fd-constrained. */
+ JitAllocator::CreateParams dual_params;
+
+ dual_params.reset();
+ dual_params.options = JitAllocator::kOptionUseDualMapping,
+ dual_params.blockSize = 4 << 20;
+
+ if (auto *alloc = create_allocator(&dual_params)) {
+ return alloc;
+ } else if (auto *alloc = create_allocator(&single_params)) {
+ return alloc;
+ }
+
+ ERTS_INTERNAL_ERROR("jit: Cannot allocate executable memory. Use the "
+ "interpreter instead.");
+#elif defined(VALGRIND)
+ if (auto *alloc = create_allocator(&single_params)) {
+ return alloc;
+ }
+
+ ERTS_INTERNAL_ERROR("jit: the valgrind emulator requires the ability to "
+ "allocate executable+writable memory.");
+#endif
+}
+
+void beamasm_init() {
+ unsigned label = 1;
+
+ ASSERT(bga == nullptr && bma == nullptr);
+
+ struct operands {
+ Eterm name;
+ BeamInstr operand;
+ ErtsCodePtr *target;
+ };
+
+ std::vector<struct operands> operands = {
+ {am_exit, op_error_action_code, &beam_exit},
+ {am_continue_exit, op_continue_exit, &beam_continue_exit},
+ {am_return_trace, op_return_trace, &beam_return_trace},
+ {am_return_to_trace, op_i_return_to_trace, &beam_return_to_trace},
+ {am_return_time_trace,
+ op_i_return_time_trace,
+ &beam_return_time_trace},
+ {am_exception_trace, op_return_trace, &beam_exception_trace}};
+
+ Eterm mod_name;
+ ERTS_DECL_AM(erts_beamasm);
+ mod_name = AM_erts_beamasm;
+
+ beamasm_init_perf();
+ beamasm_init_gdb_jit_info();
+
+ /*
+ * Ensure that commonly used fields in the PCB can be accessed with
+ * short instructions. Before removing any of these assertions, please
+ * consider the effect it will have on code size and/or performance.
+ */
+
+ ERTS_CT_ASSERT(offsetof(Process, htop) < 128);
+ ERTS_CT_ASSERT(offsetof(Process, stop) < 128);
+ ERTS_CT_ASSERT(offsetof(Process, fcalls) < 128);
+ ERTS_CT_ASSERT(offsetof(Process, freason) < 128);
+ ERTS_CT_ASSERT(offsetof(Process, fvalue) < 128);
+
+ cpuinfo = CpuInfo::host();
+
+ jit_allocator = pick_allocator();
+
+ bga = new BeamGlobalAssembler(jit_allocator);
+
+ bma = new BeamModuleAssembler(bga, mod_name, 4 + operands.size() * 2);
+
+ for (auto &op : operands) {
+ unsigned func_label, entry_label;
+
+ func_label = label++;
+ entry_label = label++;
+
+ bma->emit(op_aligned_label_Lt,
+ {ArgVal(ArgVal::i, func_label),
+ ArgVal(ArgVal::u, sizeof(UWord))});
+ bma->emit(op_i_func_info_IaaI,
+ {ArgVal(ArgVal::i, func_label),
+ ArgVal(ArgVal::i, am_erts_internal),
+ ArgVal(ArgVal::i, op.name),
+ ArgVal(ArgVal::i, 0)});
+ bma->emit(op_aligned_label_Lt,
+ {ArgVal(ArgVal::i, entry_label),
+ ArgVal(ArgVal::u, sizeof(UWord))});
+ bma->emit(op.operand, {});
+
+ op.operand = entry_label;
+ }
+
+ {
+ unsigned func_label, apply_label, normal_exit_label;
+
+ func_label = label++;
+ apply_label = label++;
+ normal_exit_label = label++;
+
+ bma->emit(op_aligned_label_Lt,
+ {ArgVal(ArgVal::i, func_label),
+ ArgVal(ArgVal::u, sizeof(UWord))});
+ bma->emit(op_i_func_info_IaaI,
+ {ArgVal(ArgVal::i, func_label),
+ ArgVal(ArgVal::i, am_erts_internal),
+ ArgVal(ArgVal::i, am_apply),
+ ArgVal(ArgVal::i, 3)});
+ bma->emit(op_aligned_label_Lt,
+ {ArgVal(ArgVal::i, apply_label),
+ ArgVal(ArgVal::u, sizeof(UWord))});
+ bma->emit(op_i_apply, {});
+ bma->emit(op_aligned_label_Lt,
+ {ArgVal(ArgVal::i, normal_exit_label),
+ ArgVal(ArgVal::u, sizeof(UWord))});
+ bma->emit(op_normal_exit, {});
+
+ bma->emit(op_int_code_end, {});
+
+ {
+ /* We have no need of the module pointers as we use `getCode(...)`
+ * for everything, and the code will live as long as the emulator
+ * itself. */
+ const void *_ignored_exec;
+ void *_ignored_rw;
+ bma->codegen(jit_allocator, &_ignored_exec, &_ignored_rw);
+ }
+
+ beam_apply = bma->getCode(apply_label);
+ beam_normal_exit = bma->getCode(normal_exit_label);
+ }
+
+ for (auto op : operands) {
+ if (op.target) {
+ *op.target = bma->getCode(op.operand);
+ }
+ }
+
+ /* This instruction relies on register contents, and can only be reached
+ * from a `call_ext_*`-instruction, hence the lack of a wrapper function. */
+ beam_save_calls = (ErtsCodePtr)bga->get_dispatch_save_calls();
+ beam_export_trampoline = (ErtsCodePtr)bga->get_export_trampoline();
+ beam_bif_export_trap = (ErtsCodePtr)bga->get_bif_export_trap();
+}
+
+bool BeamAssembler::hasCpuFeature(uint32_t featureId) {
+ return cpuinfo.hasFeature(featureId);
+}
+
+void init_emulator(void) {
+ install_bifs();
+}
+
+void process_main(ErtsSchedulerData *esdp) {
+ typedef void (*pmain_type)(ErtsSchedulerData *);
+
+ pmain_type pmain = (pmain_type)bga->get_process_main();
+ pmain(esdp);
+}
+
+#if defined(DEBUG) || defined(ERTS_ENABLE_LOCK_CHECK)
+static Process *erts_debug_schedule(ErtsSchedulerData *esdp,
+ Process *c_p,
+ int calls) {
+ PROCESS_MAIN_CHK_LOCKS(c_p);
+ ERTS_UNREQ_PROC_MAIN_LOCK(c_p);
+ ERTS_VERIFY_UNUSED_TEMP_ALLOC(c_p);
+ c_p = erts_schedule(esdp, c_p, calls);
+ ERTS_VERIFY_UNUSED_TEMP_ALLOC(c_p);
+ ERTS_REQ_PROC_MAIN_LOCK(c_p);
+ PROCESS_MAIN_CHK_LOCKS(c_p);
+ return c_p;
+}
+#endif
+
+/* void process_main(ErtsSchedulerData *esdp); */
+void BeamGlobalAssembler::emit_process_main() {
+ Label context_switch_local = a.newLabel(),
+ context_switch_simplified_local = a.newLabel(),
+ do_schedule_local = a.newLabel(), schedule_next = a.newLabel();
+
+ const x86::Mem start_time_i =
+ getSchedulerRegRef(offsetof(ErtsSchedulerRegisters, start_time_i));
+ const x86::Mem start_time =
+ getSchedulerRegRef(offsetof(ErtsSchedulerRegisters, start_time));
+
+ /* Allocate the register structure on the stack to allow computing the
+ * runtime stack address from it, greatly reducing the cost of stack
+ * swapping. */
+ a.sub(x86::rsp, imm(sizeof(ErtsSchedulerRegisters) + ERTS_CACHE_LINE_SIZE));
+ a.and_(x86::rsp, imm(~ERTS_CACHE_LINE_MASK));
+
+ a.mov(x86::qword_ptr(ARG1, offsetof(ErtsSchedulerData, registers)),
+ x86::rsp);
+
+ /* Center `registers` at the base of x_reg_array so we can use negative
+ * 8-bit displacement to address the commonly used aux_regs, located at the
+ * start of the ErtsSchedulerRegisters struct. */
+ a.lea(registers,
+ x86::qword_ptr(x86::rsp,
+ offsetof(ErtsSchedulerRegisters, x_reg_array.d)));
+
+#if defined(DEBUG) && defined(NATIVE_ERLANG_STACK)
+ /* Save stack bounds so they can be tested without clobbering anything. */
+ a.call(erts_get_stacklimit);
+
+ a.mov(getSchedulerRegRef(
+ offsetof(ErtsSchedulerRegisters, runtime_stack_end)),
+ RET);
+ a.mov(getSchedulerRegRef(
+ offsetof(ErtsSchedulerRegisters, runtime_stack_start)),
+ x86::rsp);
+#elif !defined(NATIVE_ERLANG_STACK)
+ /* Save the initial SP of the thread so that we can verify that it
+ * doesn't grow. */
+# ifdef JIT_HARD_DEBUG
+ a.mov(getInitialSPRef(), x86::rsp);
+# endif
+
+ /* Manually do an `emit_enter_runtime` to match the `emit_leave_runtime`
+ * below. We avoid `emit_enter_runtime` because it may do additional
+ * assertions that may currently fail.
+ *
+ * IMPORTANT: We must ensure that this sequence leaves the stack
+ * aligned on a 16-byte boundary. */
+ a.mov(getRuntimeStackRef(), x86::rsp);
+ a.sub(x86::rsp, imm(15));
+ a.and_(x86::rsp, imm(-16));
+#endif
+
+ load_erl_bits_state(ARG1);
+ runtime_call<1>(erts_bits_init_state);
+
+ a.mov(start_time_i, imm(0));
+ a.mov(start_time, imm(0));
+
+ mov_imm(c_p, 0);
+ mov_imm(FCALLS, 0);
+ mov_imm(ARG3, 0); /* Set reds_used for erts_schedule call */
+
+ a.jmp(schedule_next);
+
+ a.bind(do_schedule_local);
+ {
+ /* Figure out reds_used. def_arg_reg[5] = REDS_IN */
+ a.mov(ARG3, x86::qword_ptr(c_p, offsetof(Process, def_arg_reg[5])));
+ a.sub(ARG3, FCALLS);
+
+ a.jmp(schedule_next);
+ }
+
+ a.bind(context_switch_local);
+ comment("Context switch, unknown arity/MFA");
+ {
+ Sint arity_offset = offsetof(ErtsCodeMFA, arity) - sizeof(ErtsCodeMFA);
+
+ a.mov(ARG1, x86::qword_ptr(ARG3, arity_offset));
+ a.mov(x86::qword_ptr(c_p, offsetof(Process, arity)), ARG1);
+
+ a.lea(ARG1, x86::qword_ptr(ARG3, -(Sint)sizeof(ErtsCodeMFA)));
+ a.mov(x86::qword_ptr(c_p, offsetof(Process, current)), ARG1);
+
+ /* !! Fall through !! */
+ }
+
+ a.bind(context_switch_simplified_local);
+ comment("Context switch, known arity and MFA");
+ {
+ Label not_exiting = a.newLabel();
+
+#ifdef DEBUG
+ Label check_i = a.newLabel();
+ /* Check that ARG3 is set to a valid CP. */
+ a.test(ARG3, imm(_CPMASK));
+ a.je(check_i);
+ a.ud2();
+ a.bind(check_i);
+#endif
+
+ a.mov(x86::qword_ptr(c_p, offsetof(Process, i)), ARG3);
+
+#ifdef WIN32
+ a.mov(ARG1d, x86::dword_ptr(c_p, offsetof(Process, state.value)));
+#else
+ a.mov(ARG1d, x86::dword_ptr(c_p, offsetof(Process, state.counter)));
+#endif
+
+ a.test(ARG1d, imm(ERTS_PSFLG_EXITING));
+ a.short_().je(not_exiting);
+ {
+ comment("Process exiting");
+
+ /* We load the beam_exit from memory because it has not yet been set
+ * when the global assembler is created. */
+ a.mov(ARG1, imm(&beam_exit));
+ a.mov(ARG1, x86::qword_ptr(ARG1));
+ a.mov(x86::qword_ptr(c_p, offsetof(Process, i)), ARG1);
+ a.mov(x86::qword_ptr(c_p, offsetof(Process, arity)), imm(0));
+ a.mov(x86::qword_ptr(c_p, offsetof(Process, current)), imm(0));
+ a.jmp(do_schedule_local);
+ }
+ a.bind(not_exiting);
+
+ /* Figure out reds_used. def_arg_reg[5] = REDS_IN */
+ a.mov(ARG3, x86::qword_ptr(c_p, offsetof(Process, def_arg_reg[5])));
+ a.sub(ARG3, FCALLS);
+
+ /* Spill reds_used to FCALLS as we no longer need that value */
+ a.mov(FCALLS, ARG3);
+
+ a.mov(ARG1, c_p);
+ load_x_reg_array(ARG2);
+ runtime_call<2>(copy_out_registers);
+
+ /* Restore reds_used from FCALLS */
+ a.mov(ARG3, FCALLS);
+
+ /* !! Fall through !! */
+ }
+
+ a.bind(schedule_next);
+ comment("schedule_next");
+ {
+ Label schedule = a.newLabel(), skip_long_schedule = a.newLabel();
+
+ /* ARG3 contains reds_used at this point */
+
+ a.cmp(start_time, imm(0));
+ a.short_().je(schedule);
+ {
+ a.mov(ARG1, c_p);
+ a.mov(ARG2, start_time);
+
+ /* Spill reds_used in start_time slot */
+ a.mov(start_time, ARG3);
+
+ a.mov(ARG3, start_time_i);
+ runtime_call<3>(check_monitor_long_schedule);
+
+ /* Restore reds_used */
+ a.mov(ARG3, start_time);
+ }
+ a.bind(schedule);
+
+ mov_imm(ARG1, 0);
+ a.mov(ARG2, c_p);
+#if defined(DEBUG) || defined(ERTS_ENABLE_LOCK_CHECK)
+ runtime_call<3>(erts_debug_schedule);
+#else
+ runtime_call<3>(erts_schedule);
+#endif
+ a.mov(c_p, RET);
+
+#ifdef ERTS_MSACC_EXTENDED_STATES
+ a.lea(ARG1,
+ x86::qword_ptr(registers,
+ offsetof(ErtsSchedulerRegisters,
+ aux_regs.d.erts_msacc_cache)));
+ runtime_call<1>(erts_msacc_update_cache);
+#endif
+
+ a.mov(ARG1, imm((UWord)&erts_system_monitor_long_schedule));
+ a.cmp(x86::qword_ptr(ARG1), imm(0));
+ a.mov(start_time, imm(0));
+ a.short_().je(skip_long_schedule);
+ {
+ /* Enable long schedule test */
+ runtime_call<0>(erts_timestamp_millis);
+ a.mov(start_time, RET);
+ a.mov(RET, x86::qword_ptr(c_p, offsetof(Process, i)));
+ a.mov(start_time_i, RET);
+ }
+ a.bind(skip_long_schedule);
+
+ /* Copy arguments */
+ a.mov(ARG1, c_p);
+ load_x_reg_array(ARG2);
+ runtime_call<2>(copy_in_registers);
+
+ /* Setup reduction counting */
+ a.mov(FCALLS, x86::qword_ptr(c_p, offsetof(Process, fcalls)));
+ a.mov(x86::qword_ptr(c_p, offsetof(Process, def_arg_reg[5])), FCALLS);
+
+#ifdef DEBUG
+ a.mov(x86::qword_ptr(c_p, offsetof(Process, debug_reds_in)), FCALLS);
+#endif
+
+ /* Check whether save calls is on */
+ a.mov(ARG1, c_p);
+ a.mov(ARG2, imm(ERTS_PSD_SAVED_CALLS_BUF));
+ runtime_call<2>(erts_psd_get);
+
+ /* Read the active code index, overriding it with
+ * ERTS_SAVE_CALLS_CODE_IX when save_calls is enabled (RET != 0). */
+ a.test(RET, RET);
+ a.mov(ARG1, imm(&the_active_code_index));
+ a.mov(ARG2, imm(ERTS_SAVE_CALLS_CODE_IX));
+ a.mov(active_code_ix.r32(), x86::dword_ptr(ARG1));
+ a.cmovnz(active_code_ix, ARG2);
+
+ /* Start executing the Erlang process. Note that reductions have
+ * already been set up above. */
+ emit_leave_runtime<Update::eStack | Update::eHeap>();
+
+ /* Check if we are just returning from a dirty nif/bif call and if so we
+ * need to do a bit of cleaning up before continuing. */
+ a.mov(RET, x86::qword_ptr(c_p, offsetof(Process, i)));
+ a.cmp(x86::qword_ptr(RET), imm(op_call_nif_WWW));
+ a.je(labels[dispatch_nif]);
+ a.cmp(x86::qword_ptr(RET), imm(op_call_bif_W));
+ a.je(labels[dispatch_bif]);
+ a.jmp(RET);
+ }
+
+ /* Processes may jump to the exported entry points below, executing on the
+ * Erlang stack when entering. These are separate from the `_local` labels
+ * above as we don't want to worry about which stack we're on when the
+ * cases overlap. */
+
+ /* `ga->get_context_switch()`
+ *
+ * The *next* instruction pointer is provided in ARG3, and must be preceded
+ * by an ErtsCodeMFA. */
+ a.bind(labels[context_switch]);
+ {
+ emit_enter_runtime<Update::eStack | Update::eHeap>();
+
+ a.jmp(context_switch_local);
+ }
+
+ /* `ga->get_context_switch_simplified()`
+ *
+ * The next instruction pointer is provided in ARG3, which does not need to
+ * point past an ErtsCodeMFA as the process structure has already been
+ * updated. */
+ a.bind(labels[context_switch_simplified]);
+ {
+ emit_enter_runtime<Update::eStack | Update::eHeap>();
+
+ a.jmp(context_switch_simplified_local);
+ }
+
+ /* `ga->get_do_schedule()`
+ *
+ * `c_p->i` must be set prior to jumping here. */
+ a.bind(labels[do_schedule]);
+ {
+ emit_enter_runtime<Update::eStack | Update::eHeap>();
+
+ a.jmp(do_schedule_local);
+ }
+}
+
+enum jit_actions : uint32_t {
+ JIT_NOACTION = 0,
+ JIT_REGISTER_FN,
+ JIT_UNREGISTER_FN,
+};
+
+struct jit_code_entry {
+ struct jit_code_entry *next_entry;
+ struct jit_code_entry *prev_entry;
+ const char *symfile_addr;
+ uint64_t symfile_size;
+};
+
+struct jit_descriptor {
+ uint32_t version;
+ jit_actions action_flag;
+ struct jit_code_entry *relevant_entry;
+ struct jit_code_entry *first_entry;
+};
+
+extern "C"
+{
+ extern void ERTS_NOINLINE __jit_debug_register_code(void);
+
+ /* Make sure to specify the version statically, because the
+ * debugger may check the version before we can set it. */
+ struct jit_descriptor __jit_debug_descriptor = {1,
+ JIT_NOACTION,
+ NULL,
+ NULL};
+} /* extern "C" */
+
+static void beamasm_init_gdb_jit_info(void) {
+ Sint symfile_size = sizeof(uint64_t) * 2;
+ uint64_t *symfile = (uint64_t *)malloc(symfile_size);
+ jit_code_entry *entry;
+
+ symfile[0] = 0;
+ symfile[1] = (uint64_t)beam_normal_exit;
+
+ entry = (jit_code_entry *)malloc(sizeof(jit_code_entry));
+
+ /* Add address description */
+ entry->symfile_addr = (char *)symfile;
+ entry->symfile_size = symfile_size;
+
+ /* Insert into linked list */
+ entry->next_entry = __jit_debug_descriptor.first_entry;
+ if (entry->next_entry) {
+ entry->next_entry->prev_entry = entry;
+ } else {
+ entry->prev_entry = nullptr;
+ }
+
+ /* register with dbg */
+ __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
+ __jit_debug_descriptor.first_entry = entry;
+ __jit_debug_descriptor.relevant_entry = entry;
+ __jit_debug_register_code();
+}
+
+void BeamAssembler::update_gdb_jit_info(std::string modulename,
+ std::vector<AsmRange> &functions) {
+ Sint symfile_size = sizeof(uint64_t) * 3 + modulename.size() + 1;
+
+ for (auto fun : functions) {
+ symfile_size += sizeof(uint64_t) * 2;
+ symfile_size += fun.name.size() + 1;
+ }
+
+ char *symfile = (char *)malloc(symfile_size);
+ jit_code_entry *entry;
+
+ entry = (jit_code_entry *)malloc(sizeof(jit_code_entry));
+
+ /* Add address description */
+ entry->symfile_addr = symfile;
+ entry->symfile_size = symfile_size;
+
+ ((uint64_t *)symfile)[0] = functions.size();
+ ((uint64_t *)symfile)[1] = code.baseAddress();
+ ((uint64_t *)symfile)[2] = (uint64_t)code.codeSize();
+
+ symfile += sizeof(uint64_t) * 3;
+
+ sys_memcpy(symfile, modulename.c_str(), modulename.size() + 1);
+ symfile += modulename.size() + 1;
+
+ for (unsigned i = 0; i < functions.size(); i++) {
+ ((uint64_t *)symfile)[0] = (uint64_t)functions[i].start;
+ ((uint64_t *)symfile)[1] = (uint64_t)functions[i].stop;
+
+ ASSERT(functions[i].start <= functions[i].stop);
+
+ symfile += sizeof(uint64_t) * 2;
+
+ sys_memcpy(symfile,
+ functions[i].name.c_str(),
+ functions[i].name.size() + 1);
+ symfile += functions[i].name.size() + 1;
+ }
+
+ ASSERT(symfile_size == (symfile - entry->symfile_addr));
+
+ /* Insert into linked list */
+ entry->next_entry = __jit_debug_descriptor.first_entry;
+ if (entry->next_entry) {
+ entry->next_entry->prev_entry = entry;
+ } else {
+ entry->prev_entry = nullptr;
+ }
+
+ /* register with dbg */
+ __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
+ __jit_debug_descriptor.first_entry = entry;
+ __jit_debug_descriptor.relevant_entry = entry;
+ __jit_debug_register_code();
+}
+
+extern "C"
+{
+ int erts_beam_jump_table(void) {
+#if defined(NO_JUMP_TABLE)
+ return 0;
+#else
+ return 1;
+#endif
+ }
+
+ void *beamasm_new_assembler(Eterm mod, int num_labels, int num_functions) {
+ return new BeamModuleAssembler(bga, mod, num_labels, num_functions);
+ }
+
+ int beamasm_emit(void *instance, unsigned specific_op, BeamOp *op) {
+ BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
+ const std::vector<ArgVal> args(&op->a[0], &op->a[op->arity]);
+
+ return ba->emit(specific_op, args);
+ }
+
+ void beamasm_emit_call_nif(const ErtsCodeInfo *info,
+ void *normal_fptr,
+ void *lib,
+ void *dirty_fptr,
+ char *buff,
+ unsigned buff_len) {
+ BeamModuleAssembler ba(bga, info->mfa.module, 3);
+
+ ba.emit(op_aligned_label_Lt,
+ {ArgVal(ArgVal::i, 1), ArgVal(ArgVal::u, sizeof(UWord))});
+ ba.emit(op_i_func_info_IaaI,
+ {ArgVal(ArgVal::i, 1),
+ ArgVal(ArgVal::i, info->mfa.module),
+ ArgVal(ArgVal::i, info->mfa.function),
+ ArgVal(ArgVal::i, info->mfa.arity)});
+ ba.emit(op_aligned_label_Lt,
+ {ArgVal(ArgVal::i, 2), ArgVal(ArgVal::u, sizeof(UWord))});
+ ba.emit(op_i_breakpoint_trampoline, {});
+ ba.emit(op_call_nif_WWW,
+ {ArgVal(ArgVal::i, (BeamInstr)normal_fptr),
+ ArgVal(ArgVal::i, (BeamInstr)lib),
+ ArgVal(ArgVal::i, (BeamInstr)dirty_fptr)});
+
+ ba.codegen(buff, buff_len);
+ }
+
+ void beamasm_emit_call_bif(const ErtsCodeInfo *info,
+ Eterm (*bif)(BIF_ALIST),
+ char *buff,
+ unsigned buff_len) {
+ BeamModuleAssembler ba(bga, info->mfa.module, 3);
+
+ ba.emit(op_aligned_label_Lt,
+ {ArgVal(ArgVal::i, 1), ArgVal(ArgVal::u, sizeof(UWord))});
+ ba.emit(op_i_func_info_IaaI,
+ {ArgVal(ArgVal::i, 1),
+ ArgVal(ArgVal::i, info->mfa.module),
+ ArgVal(ArgVal::i, info->mfa.function),
+ ArgVal(ArgVal::i, info->mfa.arity)});
+ ba.emit(op_aligned_label_Lt,
+ {ArgVal(ArgVal::i, 2), ArgVal(ArgVal::u, sizeof(UWord))});
+ ba.emit(op_i_breakpoint_trampoline, {});
+ ba.emit(op_call_bif_W, {ArgVal(ArgVal::i, (BeamInstr)bif)});
+
+ ba.codegen(buff, buff_len);
+ }
+
+ void beamasm_delete_assembler(void *instance) {
+ BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
+ delete ba;
+ }
+
+ void beamasm_purge_module(const void *native_module_exec,
+ void *native_module_rw) {
+ ASSERT(native_module_exec != native_module_rw);
+
+ jit_allocator->release(const_cast<void *>(native_module_exec));
+ }
+
+ ErtsCodePtr beamasm_get_code(void *instance, int label) {
+ BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
+ return reinterpret_cast<ErtsCodePtr>(ba->getCode(label));
+ }
+
+ const byte *beamasm_get_rodata(void *instance, char *label) {
+ BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
+ return reinterpret_cast<const byte *>(ba->getCode(label));
+ }
+
+ void beamasm_embed_rodata(void *instance,
+ const char *labelName,
+ const char *buff,
+ size_t size) {
+ BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
+ if (size) {
+ ba->embed_rodata(labelName, buff, size);
+ }
+ }
+
+ void beamasm_embed_bss(void *instance, char *labelName, size_t size) {
+ BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
+ if (size) {
+ ba->embed_bss(labelName, size);
+ }
+ }
+
+ void beamasm_codegen(void *instance,
+ const void **native_module_exec,
+ void **native_module_rw,
+ const BeamCodeHeader *in_hdr,
+ const BeamCodeHeader **out_exec_hdr,
+ BeamCodeHeader **out_rw_hdr) {
+ BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
+
+ ba->codegen(jit_allocator,
+ native_module_exec,
+ native_module_rw,
+ in_hdr,
+ out_exec_hdr,
+ out_rw_hdr);
+ }
+
+ Uint beamasm_get_header(void *instance, const BeamCodeHeader **hdr) {
+ BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
+
+ *hdr = ba->getCodeHeader();
+
+ return ba->getCodeSize();
+ }
+
+ char *beamasm_get_base(void *instance) {
+ BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
+ return (char *)ba->getBaseAddress();
+ }
+
+ size_t beamasm_get_offset(void *instance) {
+ BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
+ return ba->getOffset();
+ }
+
+ const ErtsCodeInfo *beamasm_get_on_load(void *instance) {
+ BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
+ return ba->getOnLoad();
+ }
+
+ unsigned int beamasm_patch_catches(void *instance, char *rw_base) {
+ BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
+ return ba->patchCatches(rw_base);
+ }
+
+ void beamasm_patch_import(void *instance,
+ char *rw_base,
+ int index,
+ BeamInstr import) {
+ BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
+ ba->patchImport(rw_base, index, import);
+ }
+
+ void beamasm_patch_literal(void *instance,
+ char *rw_base,
+ int index,
+ Eterm lit) {
+ BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
+ ba->patchLiteral(rw_base, index, lit);
+ }
+
+ void beamasm_patch_lambda(void *instance,
+ char *rw_base,
+ int index,
+ BeamInstr fe) {
+ BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
+ ba->patchLambda(rw_base, index, fe);
+ }
+
+ void beamasm_patch_strings(void *instance,
+ char *rw_base,
+ const byte *string_table) {
+ BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
+ ba->patchStrings(rw_base, string_table);
+ }
+}
+
+const uint8_t *BeamAssembler::nops[3] = {nop1, nop2, nop3};
+const uint8_t BeamAssembler::nop1[1] = {0x90};
+const uint8_t BeamAssembler::nop2[2] = {0x66, 0x90};
+const uint8_t BeamAssembler::nop3[3] = {0x0F, 0x1F, 0x00};