diff options
Diffstat (limited to 'erts/emulator/beam/jit/arm/instr_bif.cpp')
-rw-r--r-- | erts/emulator/beam/jit/arm/instr_bif.cpp | 995 |
1 files changed, 995 insertions, 0 deletions
diff --git a/erts/emulator/beam/jit/arm/instr_bif.cpp b/erts/emulator/beam/jit/arm/instr_bif.cpp new file mode 100644 index 0000000000..43890fc9a5 --- /dev/null +++ b/erts/emulator/beam/jit/arm/instr_bif.cpp @@ -0,0 +1,995 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2020-2023. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + */ + +#include "beam_asm.hpp" + +extern "C" +{ +#include "beam_common.h" +#include "code_ix.h" +#include "erl_bif_table.h" +#include "erl_nfunc_sched.h" +#include "bif.h" +#include "erl_msacc.h" +} + +/* ARG2 = argument vector, ARG4 (!) = bif function pointer + * + * Result is returned in ARG1 (will be THE_NON_VALUE if the BIF call failed). */ +void BeamGlobalAssembler::emit_i_bif_guard_shared() { + /* We use the X register array for the arguments for the BIF. The + * actual contents of the first three X registers are kept safe in + * callee-saved machine registers (XREG0 through XREG2). + */ + ERTS_CT_ASSERT(ERTS_HIGHEST_CALLEE_SAVE_XREG >= 2); + + emit_enter_runtime_frame(); + emit_enter_runtime<Update::eReductions>(); + + a.mov(ARG1, c_p); + lea(ARG2, getXRef(0)); + mov_imm(ARG3, 0); + runtime_call(ARG4, 3); /* ARG3 is never used by guard BIFs. */ + + emit_leave_runtime<Update::eReductions>(); + emit_leave_runtime_frame(); + a.ret(a64::x30); +} + +/* ARG2 = argument vector, ARG4 (!) = bif function pointer + * + * Result is returned in RET. */ +void BeamGlobalAssembler::emit_i_bif_body_shared() { + Label error = a.newLabel(); + + /* See comment in emit_i_bif_guard_shared. */ + ERTS_CT_ASSERT(ERTS_HIGHEST_CALLEE_SAVE_XREG >= 2); + + emit_enter_runtime_frame(); + emit_enter_runtime<Update::eReductions>(); + + /* Save current BIF for the error path. */ + a.mov(ARG1, c_p); + lea(ARG2, getXRef(0)); + a.str(ARG4, TMP_MEM1q); + mov_imm(ARG3, 0); /* ARG3 is never used by guard BIFs. */ + + runtime_call(ARG4, 3); + emit_branch_if_not_value(ARG1, error); + + emit_leave_runtime<Update::eReductions>(); + + emit_leave_runtime_frame(); + a.ret(a64::x30); + + a.bind(error); + { + /* Find the correct MFA from the BIF's function address. */ + a.ldr(ARG1, TMP_MEM1q); + runtime_call<1>(ubif2mfa); + + /* The argument registers must be reloaded on error, as the machine + * registers may contain garbage, which will later be swapped into the + * register array in the `raise_exception` fragment. */ + emit_leave_runtime<Update::eReductions | Update::eXRegs>(3); + emit_leave_runtime_frame(); + + a.mov(ARG4, ARG1); + a.b(labels[raise_exception]); + } +} + +void BeamModuleAssembler::emit_i_bif1(const ArgSource &Src1, + const ArgLabel &Fail, + const ArgWord &Bif, + const ArgRegister &Dst) { + auto src1 = load_source(Src1, TMP1); + + a.str(src1.reg, getXRef(0)); + + emit_i_bif(Fail, Bif, Dst); +} + +void BeamModuleAssembler::emit_i_bif2(const ArgSource &Src1, + const ArgSource &Src2, + const ArgLabel &Fail, + const ArgWord &Bif, + const ArgRegister &Dst) { + auto [src1, src2] = load_sources(Src1, TMP1, Src2, TMP2); + + a.stp(src1.reg, src2.reg, getXRef(0)); + + emit_i_bif(Fail, Bif, Dst); +} + +void BeamModuleAssembler::emit_i_bif3(const ArgSource &Src1, + const ArgSource &Src2, + const ArgSource &Src3, + const ArgLabel &Fail, + const ArgWord &Bif, + const ArgRegister &Dst) { + auto [src1, src2] = load_sources(Src1, TMP1, Src2, TMP2); + auto src3 = load_source(Src3, TMP3); + + a.stp(src1.reg, src2.reg, getXRef(0)); + a.str(src3.reg, getXRef(2)); + + emit_i_bif(Fail, Bif, Dst); +} + +void BeamModuleAssembler::emit_i_bif(const ArgLabel &Fail, + const ArgWord &Bif, + const ArgRegister &Dst) { + mov_arg(ARG4, Bif); + + if (Fail.get() != 0) { + fragment_call(ga->get_i_bif_guard_shared()); + emit_branch_if_not_value(ARG1, resolve_beam_label(Fail, dispUnknown)); + } else { + fragment_call(ga->get_i_bif_body_shared()); + } + + mov_arg(Dst, ARG1); +} + +/* + * Emit code for guard BIFs that can't fail (e.g. is_list/1). We + * don't need to test for failure. + */ + +void BeamModuleAssembler::emit_nofail_bif1(const ArgSource &Src1, + const ArgWord &Bif, + const ArgRegister &Dst) { + auto src1 = load_source(Src1, TMP1); + + a.str(src1.reg, getXRef(0)); + + mov_arg(ARG4, Bif); + fragment_call(ga->get_i_bif_guard_shared()); + mov_arg(Dst, ARG1); +} + +void BeamModuleAssembler::emit_nofail_bif2(const ArgSource &Src1, + const ArgSource &Src2, + const ArgWord &Bif, + const ArgRegister &Dst) { + auto [src1, src2] = load_sources(Src1, TMP1, Src2, TMP2); + + a.stp(src1.reg, src2.reg, getXRef(0)); + + mov_arg(ARG4, Bif); + fragment_call(ga->get_i_bif_guard_shared()); + mov_arg(Dst, ARG1); +} + +void BeamModuleAssembler::emit_i_length_setup(const ArgLabel &Fail, + const ArgWord &Live, + const ArgSource &Src) { + mov_arg(TMP1, Src); + mov_imm(TMP2, make_small(0)); + + /* Store trap state after the currently live registers. There are + * 3 extra registers beyond the ordinary ones that we're free to + * use for whatever purpose. */ + ERTS_CT_ASSERT(ERTS_X_REGS_ALLOCATED - MAX_REG >= 3); + mov_arg(ArgXRegister(Live.get() + 0), TMP1); + mov_arg(ArgXRegister(Live.get() + 1), TMP2); + + /* Store original argument. This is only needed for exceptions and can be + * safely skipped in guards. */ + if (Fail.get() == 0) { + mov_arg(ArgXRegister(Live.get() + 2), TMP1); + } +} + +/* ARG2 = live registers, ARG3 = entry address + * + * Result is returned in RET. */ +void BeamGlobalAssembler::emit_i_length_common(Label fail, int state_size) { + Label trap_or_error = a.newLabel(); + + ASSERT(state_size >= 2 && state_size <= ERTS_X_REGS_ALLOCATED - MAX_REG); + + /* Save arguments for error/trapping path. */ + a.stp(ARG2, ARG3, TMP_MEM1q); + + emit_enter_runtime_frame(); + emit_enter_runtime<Update::eReductions | Update::eXRegs>(); + + a.mov(ARG1, c_p); + lea(TMP1, getXRef(0)); + a.add(ARG2, TMP1, ARG2, arm::lsl(3)); + runtime_call<2>(erts_trapping_length_1); + + emit_branch_if_not_value(ARG1, trap_or_error); + + emit_leave_runtime<Update::eReductions | Update::eXRegs>(); + emit_leave_runtime_frame(); + + a.ret(a64::x30); + + a.bind(trap_or_error); + { + a.ldp(ARG2, ARG3, TMP_MEM1q); + a.ldr(TMP1, arm::Mem(c_p, offsetof(Process, freason))); + a.cmp(TMP1, imm(TRAP)); + a.b_ne(fail); + + emit_leave_runtime<Update::eReductions | Update::eXRegs>(); + emit_leave_runtime_frame(); + + /* The trap state is stored in the registers above the current live + * ones, so we add the state size (in words) to keep it alive. */ + a.add(ARG2, ARG2, imm(state_size)); + + a.str(ZERO, arm::Mem(c_p, offsetof(Process, current))); + a.str(ARG2, arm::Mem(c_p, offsetof(Process, arity))); + + /* We'll find our way back through the entry address (ARG3). */ + a.b(labels[context_switch_simplified]); + } +} + +/* ARG2 = live registers, ARG3 = entry address + * + * Result is returned in RET. */ +void BeamGlobalAssembler::emit_i_length_body_shared() { + Label error = a.newLabel(); + /* `state_size = 3` to include the original argument. */ + emit_i_length_common(error, 3); + + a.bind(error); + { + static const ErtsCodeMFA bif_mfa = {am_erlang, am_length, 1}; + + /* Move the original argument to x0. It's stored in the third word of + * the trap state. */ + lea(TMP1, getXRef(0)); + a.add(ARG2, TMP1, ARG2, arm::lsl(3)); + a.ldr(TMP1, arm::Mem(ARG2, sizeof(Eterm[2]))); + + emit_leave_runtime<Update::eReductions | Update::eXRegs>(); + emit_leave_runtime_frame(); + + a.mov(XREG0, TMP1); + + mov_imm(ARG4, &bif_mfa); + emit_raise_exception(); + } +} + +/* ARG2 = live registers, ARG3 = entry address + * + * Result is returned in ARG. Error is indicated by THE_NON_VALUE. */ +void BeamGlobalAssembler::emit_i_length_guard_shared() { + Label error = a.newLabel(); + + emit_i_length_common(error, 2); + + a.bind(error); + { + emit_leave_runtime<Update::eReductions | Update::eXRegs>(); + emit_leave_runtime_frame(); + + a.ret(a64::x30); + } +} + +void BeamModuleAssembler::emit_i_length(const ArgLabel &Fail, + const ArgWord &Live, + const ArgRegister &Dst) { + Label entry = a.newLabel(); + + a.bind(entry); + + mov_arg(ARG2, Live); + a.adr(ARG3, entry); + if (Fail.get() != 0) { + fragment_call(ga->get_i_length_guard_shared()); + emit_branch_if_not_value(ARG1, resolve_beam_label(Fail, dispUnknown)); + } else { + fragment_call(ga->get_i_length_body_shared()); + } + + mov_arg(Dst, ARG1); +} + +#if defined(DEBUG) || defined(ERTS_ENABLE_LOCK_CHECK) + +static Eterm debug_call_light_bif(Process *c_p, + Eterm *reg, + ErtsCodePtr I, + ErtsBifFunc vbf) { + Eterm result; + + ERTS_UNREQ_PROC_MAIN_LOCK(c_p); + { + ERTS_CHK_MBUF_SZ(c_p); + ASSERT(!ERTS_PROC_IS_EXITING(c_p)); + result = vbf(c_p, reg, I); + ASSERT(!ERTS_PROC_IS_EXITING(c_p) || is_non_value(result)); + ERTS_CHK_MBUF_SZ(c_p); + + ERTS_VERIFY_UNUSED_TEMP_ALLOC(c_p); + ERTS_HOLE_CHECK(c_p); + } + PROCESS_MAIN_CHK_LOCKS(c_p); + ERTS_REQ_PROC_MAIN_LOCK(c_p); + + return result; +} +#endif + +/* It is important that the below code is as optimized as possible. + * When doing any changes, make sure to look at the estone bif_dispatch + * benchmark to make sure you don't introduce any regressions. + * + * ARG3 = entry + * ARG4 = export entry + * ARG8 = BIF pointer + */ +void BeamGlobalAssembler::emit_call_light_bif_shared() { + /* We use the HTOP, FCALLS, and XREG1 registers as they are not + * used on the runtime-stack and are caller save. */ + + arm::Mem entry_mem = TMP_MEM1q, export_mem = TMP_MEM2q, + mbuf_mem = TMP_MEM3q; + + Label trace = a.newLabel(), yield = a.newLabel(); + + /* Spill everything we may need on the error and GC paths. */ + a.ldr(TMP1, arm::Mem(c_p, offsetof(Process, mbuf))); + a.stp(ARG3, ARG4, TMP_MEM1q); + a.str(TMP1, mbuf_mem); + + /* Check if we should trace this bif call or handle save_calls. Both + * variants dispatch through the export entry. */ + a.ldr(TMP1.w(), arm::Mem(ARG4, offsetof(Export, is_bif_traced))); + a.cmp(TMP1, imm(0)); + a.ccmp(active_code_ix, + imm(ERTS_SAVE_CALLS_CODE_IX), + imm(NZCV::kZF), + imm(arm::CondCode::kEQ)); + a.b_eq(trace); + + a.subs(FCALLS, FCALLS, imm(1)); + a.b_le(yield); + { + Label check_bif_return = a.newLabel(), gc_after_bif_call = a.newLabel(); + + emit_enter_runtime_frame(); + emit_enter_runtime<Update::eReductions | Update::eStack | + Update::eHeap | Update::eXRegs>(); + +#ifdef ERTS_MSACC_EXTENDED_STATES + { + Label skip_msacc = a.newLabel(); + + a.ldr(TMP1, erts_msacc_cache); + a.cbz(TMP1, skip_msacc); + + /* The values of the X registers are in the X register array, so we + * use XREG0 to save the entry pointer (ARG3) over this call. */ + a.mov(XREG0, ARG3); + + a.ldr(ARG1, erts_msacc_cache); + a.ldr(ARG2, arm::Mem(ARG4, offsetof(Export, info.mfa.module))); + a.mov(ARG3, ARG8); + runtime_call<3>(erts_msacc_set_bif_state); + + a.mov(ARG8, ARG1); + a.mov(ARG3, XREG0); + + a.bind(skip_msacc); + } +#endif + + { + /* Call the BIF proper. ARG3 and ARG8 have been set earlier. */ + a.mov(ARG1, c_p); + load_x_reg_array(ARG2); + +#if defined(DEBUG) || defined(ERTS_ENABLE_LOCK_CHECK) + a.mov(ARG4, ARG8); + runtime_call<4>(debug_call_light_bif); +#else + runtime_call(ARG8, 3); +#endif + } + +#ifdef ERTS_MSACC_EXTENDED_STATES + { + Label skip_msacc = a.newLabel(); + + a.mov(XREG0, ARG1); + + a.ldr(TMP1, erts_msacc_cache); + a.cbz(TMP1, skip_msacc); + + lea(ARG1, erts_msacc_cache); + runtime_call<1>(erts_msacc_update_cache); + + /* Set state to emulator if msacc has been enabled */ + a.ldr(ARG1, erts_msacc_cache); + a.cbz(ARG1, skip_msacc); + + mov_imm(ARG2, ERTS_MSACC_STATE_EMULATOR); + mov_imm(ARG3, 1); + runtime_call<3>(erts_msacc_set_state_m__); + + a.bind(skip_msacc); + a.mov(ARG1, XREG0); + } +#endif + + /* We must update the active code index in case another process has + * loaded new code, as the result of this BIF may be observable on both + * ends. + * + * It doesn't matter whether the BIF modifies anything; if process A + * loads new code and calls erlang:monotonic_time/0 soon after, we'd + * break the illusion of atomic upgrades if process B still ran old code + * after seeing a later timestamp from its own call to + * erlang:monotonic_time/0. */ + emit_leave_runtime<Update::eReductions | Update::eCodeIndex | + Update::eHeap | Update::eStack | Update::eXRegs>(); + emit_leave_runtime_frame(); + + /* ERTS_IS_GC_DESIRED_INTERNAL */ + { + a.ldr(TMP1.w(), arm::Mem(c_p, offsetof(Process, flags))); + a.tst(TMP1, imm(F_FORCE_GC | F_DISABLE_GC)); + + a.ldr(TMP1, arm::Mem(c_p, offsetof(Process, bin_vheap_sz))); + a.ldr(TMP2, arm::Mem(c_p, offsetof(Process, off_heap.overhead))); + + /* If neither F_FORCE_GC nor F_DISABLE_GC were set, + * test whether binary heap size should trigger GC. + * + * Otherwise, set the flags as if `off_heap.overhead > bin_vheap_sz` + * to force a GC. */ + a.ccmp(TMP2, TMP1, imm(NZCV::kCF), imm(arm::CondCode::kEQ)); + + a.sub(TMP1, E, HTOP); + a.asr(TMP1, TMP1, imm(3)); + a.ldr(TMP2, arm::Mem(c_p, offsetof(Process, mbuf_sz))); + + /* If our binary heap size was small enough not to need a GC, check + * whether the heap fragment size is larger than the remaining heap + * size. + * + * Otherwise, set the flags as if it is to force a GC. */ + a.ccmp(TMP1, TMP2, imm(NZCV::kVF), imm(arm::CondCode::kLS)); + a.b_lt(gc_after_bif_call); + } + + a.bind(check_bif_return); + { + Label error = a.newLabel(), trap = a.newLabel(); + + emit_branch_if_not_value(ARG1, trap); + + a.mov(XREG0, ARG1); + a.ret(a64::x30); + + a.bind(trap); + { + a.ldr(TMP1, arm::Mem(c_p, offsetof(Process, freason))); + emit_branch_if_ne(TMP1, TRAP, error); + + /* Push our return address to the Erlang stack and trap out. + * + * The BIF_TRAP macros all set up c_p->arity and c_p->current, + * so we can use a simplified context switch. */ + emit_enter_erlang_frame(); + a.ldr(ARG3, arm::Mem(c_p, offsetof(Process, i))); + a.b(labels[context_switch_simplified]); + } + + a.bind(error); + { + /* raise_exception_shared expects current PC in ARG2 and MFA in + * ARG4. */ + a.ldp(ARG2, ARG4, entry_mem); + add(ARG4, ARG4, offsetof(Export, info.mfa)); + a.b(labels[raise_exception_shared]); + } + } + + a.bind(gc_after_bif_call); + { + emit_enter_runtime_frame(); + emit_enter_runtime<Update::eReductions | Update::eStack | + Update::eHeap | Update::eXRegs>(); + + a.mov(ARG3, ARG1); + + a.mov(ARG1, c_p); + a.ldr(ARG2, mbuf_mem); + load_x_reg_array(ARG4); + a.ldr(ARG5, export_mem); + a.ldr(ARG5, arm::Mem(ARG5, offsetof(Export, info.mfa.arity))); + runtime_call<5>(erts_gc_after_bif_call_lhf); + + emit_leave_runtime<Update::eReductions | Update::eStack | + Update::eHeap | Update::eXRegs>(); + emit_leave_runtime_frame(); + + a.b(check_bif_return); + } + } + + a.bind(trace); + { + /* Call the export entry instead of the BIF. */ + branch(emit_setup_dispatchable_call(ARG4)); + } + + a.bind(yield); + { + a.ldr(ARG2, arm::Mem(ARG4, offsetof(Export, info.mfa.arity))); + lea(ARG4, arm::Mem(ARG4, offsetof(Export, info.mfa))); + a.str(ARG2, arm::Mem(c_p, offsetof(Process, arity))); + a.str(ARG4, arm::Mem(c_p, offsetof(Process, current))); + + /* We'll find our way back through ARG3 (entry address). */ + a.b(labels[context_switch_simplified]); + } +} + +void BeamModuleAssembler::emit_call_light_bif(const ArgWord &Bif, + const ArgExport &Exp) { + Label entry = a.newLabel(); + + a.bind(entry); + + mov_arg(ARG4, Exp); + mov_arg(ARG8, Bif); + a.adr(ARG3, entry); + + fragment_call(ga->get_call_light_bif_shared()); +} + +void BeamModuleAssembler::emit_send() { + Label entry = a.newLabel(); + + /* This is essentially a mirror of call_light_bif, there's no point to + * specializing send/2 anymore. We do it here because it's far more work to + * do it in the loader. */ + a.bind(entry); + + a.ldr(ARG4, embed_constant(BIF_TRAP_EXPORT(BIF_send_2), disp32K)); + a.ldr(ARG8, embed_constant(send_2, disp32K)); + a.adr(ARG3, entry); + + fragment_call(ga->get_call_light_bif_shared()); +} + +void BeamModuleAssembler::emit_nif_start() { + /* load time only instruction */ +} + +void BeamGlobalAssembler::emit_bif_nif_epilogue(void) { + Label check_trap = a.newLabel(), trap = a.newLabel(), error = a.newLabel(); + +#ifdef ERTS_MSACC_EXTENDED_STATES + { + Label skip_msacc = a.newLabel(); + + a.ldr(TMP1, erts_msacc_cache); + a.cbz(TMP1, skip_msacc); + + /* The values of the X registers are in the X register array, + * so we can use XREG0 to save the contents of ARG1 during the + * call. */ + a.mov(XREG0, ARG1); + a.ldr(ARG1, erts_msacc_cache); + mov_imm(ARG2, ERTS_MSACC_STATE_EMULATOR); + mov_imm(ARG3, 1); + runtime_call<3>(erts_msacc_set_state_m__); + a.mov(ARG1, XREG0); + + a.bind(skip_msacc); + } +#endif + + /* Another process may have loaded new code and somehow notified us through + * this call, so we must update the active code index. */ + emit_leave_runtime<Update::eStack | Update::eHeap | Update::eXRegs | + Update::eReductions | Update::eCodeIndex>(); + + emit_branch_if_not_value(ARG1, check_trap); + + comment("Do return and dispatch to it"); + a.mov(XREG0, ARG1); + + emit_leave_erlang_frame(); + a.ret(a64::x30); + + a.bind(check_trap); + a.ldr(TMP1, arm::Mem(c_p, offsetof(Process, freason))); + a.cmp(TMP1, imm(TRAP)); + a.b_ne(error); + { + comment("yield"); + + comment("test trap to hibernate"); + a.ldr(TMP1.w(), arm::Mem(c_p, offsetof(Process, flags))); + a.tbz(TMP1, imm(Support::ctz(F_HIBERNATE_SCHED)), trap); + + comment("do hibernate trap"); + a.and_(TMP1, TMP1, imm(~F_HIBERNATE_SCHED)); + a.str(TMP1.w(), arm::Mem(c_p, offsetof(Process, flags))); + a.b(labels[do_schedule]); + } + + a.bind(trap); + { + comment("do normal trap"); + + /* The BIF_TRAP macros all set up c_p->arity and c_p->current, so we + * can use a simplified context switch. */ + a.ldr(ARG3, arm::Mem(c_p, offsetof(Process, i))); + a.b(labels[context_switch_simplified]); + } + + a.bind(error); + { + a.mov(ARG2, E); + + emit_enter_runtime(); + + a.mov(ARG1, c_p); + runtime_call<2>(erts_printable_return_address); + + emit_leave_runtime(); + + a.mov(ARG2, ARG1); + a.ldr(ARG4, arm::Mem(c_p, offsetof(Process, current))); + a.b(labels[raise_exception_shared]); + } +} + +/* Used by call_bif, dispatch_bif, and export_trampoline. + * + * Note that we don't check reductions here as we may have jumped here through + * interpreted code (e.g. an ErtsNativeFunc or export entry) and it's very + * tricky to yield back. Reductions are checked in module code instead. + * + * ARG2 = BIF MFA + * ARG3 = I (rip), doesn't need to point past an MFA + * ARG4 = function to be called */ +void BeamGlobalAssembler::emit_call_bif_shared(void) { + /* "Heavy" BIFs need up-to-date values for `c_p->i`, `c_p->current`, and + * `c_p->arity`. */ + + emit_enter_runtime_frame(); + a.str(ARG2, arm::Mem(c_p, offsetof(Process, current))); + /* `call_bif` wants arity in ARG5. */ + a.ldr(ARG5, arm::Mem(ARG2, offsetof(ErtsCodeMFA, arity))); + a.str(ARG5, arm::Mem(c_p, offsetof(Process, arity))); + a.str(ARG3, arm::Mem(c_p, offsetof(Process, i))); + + /* The corresponding leave can be found in the epilogue. */ + emit_enter_runtime<Update::eStack | Update::eHeap | Update::eXRegs | + Update::eReductions>(); + +#ifdef ERTS_MSACC_EXTENDED_STATES + { + Label skip_msacc = a.newLabel(); + + a.ldr(TMP1, erts_msacc_cache); + a.cbz(TMP1, skip_msacc); + + /* The values of the X registers are in the X register array, so we can + * use XREG0 and XREG1 to save the contents of the ARG* registers + * during the call. */ + a.mov(XREG0, ARG3); + a.mov(XREG1, ARG5); + + a.ldr(ARG1, erts_msacc_cache); + a.ldr(ARG2, arm::Mem(ARG2, offsetof(ErtsCodeMFA, module))); + a.mov(ARG3, ARG4); + runtime_call<3>(erts_msacc_set_bif_state); + a.mov(ARG4, ARG1); + + a.mov(ARG3, XREG0); + a.mov(ARG5, XREG1); + + a.bind(skip_msacc); + } +#endif + + a.mov(ARG1, c_p); + load_x_reg_array(ARG2); + /* ARG3 (I), ARG4 (func), and ARG5 (arity) have already been provided. */ + runtime_call<5>(beam_jit_call_bif); + +#ifdef ERTS_MSACC_EXTENDED_STATES + /* The values of the X registers are in the X register array, so we can use + * XREG0 to save the contents of ARG1 during the call. */ + a.mov(XREG0, ARG1); + lea(ARG1, erts_msacc_cache); + runtime_call<1>(erts_msacc_update_cache); + a.mov(ARG1, XREG0); +#endif + + emit_leave_runtime_frame(); + emit_bif_nif_epilogue(); +} + +void BeamGlobalAssembler::emit_dispatch_bif(void) { + /* c_p->i points into the trampoline of a ErtsNativeFunc, right after the + * `info` structure. */ + a.ldr(ARG3, arm::Mem(c_p, offsetof(Process, i))); + + ERTS_CT_ASSERT(offsetof(ErtsNativeFunc, trampoline.call_bif_nif) == + sizeof(ErtsCodeInfo)); + + ssize_t mfa_offset = offsetof(ErtsNativeFunc, trampoline.call_bif_nif) - + offsetof(ErtsNativeFunc, trampoline.info.mfa); + + a.sub(ARG2, ARG3, imm(mfa_offset)); + + ssize_t dfunc_offset = offsetof(ErtsNativeFunc, trampoline.dfunc) - + offsetof(ErtsNativeFunc, trampoline.call_bif_nif); + a.ldr(ARG4, arm::Mem(ARG3, dfunc_offset)); + + a.b(labels[call_bif_shared]); +} + +/* This is only used for opcode compatibility with the interpreter, it's never + * actually called. */ +void BeamModuleAssembler::emit_call_bif(const ArgWord &Func) { + (void)Func; + + emit_nyi("emit_call_bif"); +} + +void BeamModuleAssembler::emit_call_bif_mfa(const ArgAtom &M, + const ArgAtom &F, + const ArgWord &A) { + BeamInstr func; + Export *e; + + e = erts_active_export_entry(M.get(), F.get(), A.get()); + ASSERT(e != NULL && e->bif_number != -1); + + func = (BeamInstr)bif_table[e->bif_number].f; + + a.adr(ARG3, current_label); + a.sub(ARG2, ARG3, imm(sizeof(ErtsCodeMFA))); + a.mov(ARG4, imm(func)); + + a.b(resolve_fragment(ga->get_call_bif_shared(), disp128MB)); +} + +void BeamGlobalAssembler::emit_call_nif_early() { + a.mov(ARG2, a64::x30); + a.sub(ARG2, ARG2, imm(BEAM_ASM_FUNC_PROLOGUE_SIZE + sizeof(ErtsCodeInfo))); + + emit_enter_runtime(); + + a.mov(ARG1, c_p); + runtime_call<2>(erts_call_nif_early); + + emit_leave_runtime(); + + /* Emulate `emit_call_nif`, loading the current (phony) instruction + * pointer into ARG3. + * + * Note that we "inherit" the frame that was pushed to the stack prior to + * running the breakpoint instruction, discarding the current content of + * LR (x30). */ + a.mov(ARG3, ARG1); + a.b(labels[call_nif_shared]); +} + +/* Used by call_nif, call_nif_early, and dispatch_nif. + * + * Note that we don't check reductions here as we may have jumped here through + * interpreted code (e.g. an ErtsNativeFunc or export entry) and it's very + * tricky to yield back. Reductions are checked in module code instead. + * + * ARG3 = current I, just past the end of an ErtsCodeInfo. */ +void BeamGlobalAssembler::emit_call_nif_shared(void) { + /* The corresponding leave can be found in the epilogue. */ + emit_enter_runtime<Update::eStack | Update::eHeap | Update::eXRegs | + Update::eReductions>(); + +#ifdef ERTS_MSACC_EXTENDED_STATES + { + Label skip_msacc = a.newLabel(); + + a.ldr(TMP1, erts_msacc_cache); + a.cbz(TMP1, skip_msacc); + + /* The values of the X registers are in the X register array, + * so we can use XREG0 to save the contents of ARG3 during the + * call. */ + a.mov(XREG0, ARG3); + a.ldr(ARG1, erts_msacc_cache); + mov_imm(ARG2, ERTS_MSACC_STATE_NIF); + mov_imm(ARG3, 1); + runtime_call<3>(erts_msacc_set_state_m__); + a.mov(ARG3, XREG0); + + a.bind(skip_msacc); + } +#endif + + a.mov(ARG1, c_p); + a.mov(ARG2, ARG3); + load_x_reg_array(ARG3); + ERTS_CT_ASSERT((4 + BEAM_ASM_FUNC_PROLOGUE_SIZE) % sizeof(UWord) == 0); + a.ldr(ARG4, arm::Mem(ARG2, 4 + BEAM_ASM_FUNC_PROLOGUE_SIZE)); + a.ldr(ARG5, arm::Mem(ARG2, 12 + BEAM_ASM_FUNC_PROLOGUE_SIZE)); + a.ldr(ARG6, arm::Mem(ARG2, 16 + BEAM_ASM_FUNC_PROLOGUE_SIZE)); + runtime_call<5>(beam_jit_call_nif); + + emit_bif_nif_epilogue(); +} + +void BeamGlobalAssembler::emit_dispatch_nif(void) { + /* c_p->i points into the trampoline of a ErtsNativeFunc, right after the + * `info` structure. + * + * ErtsNativeFunc already follows the NIF call layout, so we don't need to + * do anything beyond loading the address. */ + a.ldr(ARG3, arm::Mem(c_p, offsetof(Process, i))); + a.b(labels[call_nif_shared]); +} + +void BeamGlobalAssembler::emit_call_nif_yield_helper() { + Label yield = a.newLabel(); + + a.subs(FCALLS, FCALLS, imm(1)); + a.b_le(yield); + a.b(labels[call_nif_shared]); + + a.bind(yield); + { + int mfa_offset = sizeof(ErtsCodeMFA); + int arity_offset = offsetof(ErtsCodeMFA, arity) - mfa_offset; + + a.ldur(TMP1, arm::Mem(ARG3, arity_offset)); + a.str(TMP1, arm::Mem(c_p, offsetof(Process, arity))); + + a.sub(TMP1, ARG3, imm(mfa_offset)); + a.str(TMP1, arm::Mem(c_p, offsetof(Process, current))); + + /* Yield to `dispatch` rather than `entry` to avoid pushing too many + * frames to the stack. See `emit_call_nif` for details. */ + a.add(ARG3, ARG3, imm(BEAM_ASM_NFUNC_SIZE + sizeof(UWord[3]))); + a.b(labels[context_switch_simplified]); + } +} + +/* WARNING: This stub is memcpy'd, so all code herein must be explicitly + * position-independent. */ +void BeamModuleAssembler::emit_call_nif(const ArgWord &Func, + const ArgWord &NifMod, + const ArgWord &DirtyFunc) { + Label entry = a.newLabel(), dispatch = a.newLabel(); + + /* The start of this function must mimic the layout of ErtsNativeFunc. + * + * We jump here on the very first entry. */ + a.bind(entry); + { + a.b(dispatch); + + /* Everything prior to this, including the breakpoint, is part of the + * `call_bif_nif` field. */ + ASSERT(a.offset() % sizeof(UWord) == 0); + + /* ErtsNativeFunc.func */ + a.embedUInt64(Func.get()); + + /* ErtsNativeFunc.m */ + a.embedUInt64(NifMod.get()); + + /* ErtsNativeFunc.dfunc */ + a.embedUInt64(DirtyFunc.get()); + } + + /* `emit_call_nif_yield_helper` relies on this to compute the address of + * `dispatch` */ + ASSERT((a.offset() - code.labelOffsetFromBase(current_label)) == + BEAM_ASM_NFUNC_SIZE + sizeof(UWord[3])); + + a.bind(dispatch); + { + a.adr(ARG3, current_label); + pic_jmp(ga->get_call_nif_yield_helper()); + } +} + +static ErtsCodePtr get_on_load_address(Process *c_p, Eterm module) { + const Module *modp = erts_get_module(module, erts_active_code_ix()); + + if (modp && modp->on_load) { + const BeamCodeHeader *hdr = (modp->on_load)->code_hdr; + + if (hdr) { + return erts_codeinfo_to_code(hdr->on_load); + } + } + + c_p->freason = BADARG; + + return NULL; +} + +/* Implements the internal and undocumented erlang:call_on_load_function/1, + * which is very tricky to implement as a BIF. */ +void BeamModuleAssembler::emit_i_call_on_load_function() { + static ErtsCodeMFA mfa = {am_erlang, am_call_on_load_function, 1}; + Label next = a.newLabel(); + + a.mov(ARG2, XREG0); + + /* The first X register must be preserved for the error path. */ + emit_enter_runtime(1); + + a.mov(ARG1, c_p); + runtime_call<2>(get_on_load_address); + + emit_leave_runtime(1); + + a.cbnz(ARG1, next); + emit_raise_exception(&mfa); + + a.bind(next); + erlang_call(ARG1); +} + +void BeamModuleAssembler::emit_i_load_nif() { + static ErtsCodeMFA mfa = {am_erlang, am_load_nif, 2}; + + Label entry = a.newLabel(), next = a.newLabel(), schedule = a.newLabel(); + + a.bind(entry); + + emit_enter_runtime<Update::eStack | Update::eHeap | Update::eXRegs>(2); + + a.mov(ARG1, c_p); + a.adr(ARG2, current_label); + load_x_reg_array(ARG3); + runtime_call<3>(beam_jit_load_nif); + + emit_leave_runtime<Update::eStack | Update::eHeap | Update::eXRegs>(2); + + a.cmp(ARG1, imm(RET_NIF_yield)); + a.b_eq(schedule); + + a.cmp(ARG1, imm(RET_NIF_success)); + a.b_eq(next); + + emit_raise_exception(current_label, &mfa); + + a.bind(schedule); + { + a.adr(ARG3, entry); + a.b(resolve_fragment(ga->get_context_switch_simplified(), disp128MB)); + } + + a.bind(next); +} |