diff options
Diffstat (limited to 'erts/emulator/beam/jit/x86/instr_bif.cpp')
-rw-r--r-- | erts/emulator/beam/jit/x86/instr_bif.cpp | 619 |
1 files changed, 344 insertions, 275 deletions
diff --git a/erts/emulator/beam/jit/x86/instr_bif.cpp b/erts/emulator/beam/jit/x86/instr_bif.cpp index bc390b0fa8..4c2d4f007e 100644 --- a/erts/emulator/beam/jit/x86/instr_bif.cpp +++ b/erts/emulator/beam/jit/x86/instr_bif.cpp @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2020-2022. All Rights Reserved. + * Copyright Ericsson AB 2020-2023. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,6 +34,7 @@ extern "C" * * Result is returned in RET, error is indicated by ZF. */ void BeamGlobalAssembler::emit_i_bif_guard_shared() { + emit_enter_frame(); emit_enter_runtime<Update::eReductions>(); a.mov(ARG1, c_p); @@ -42,6 +43,7 @@ void BeamGlobalAssembler::emit_i_bif_guard_shared() { runtime_call(ARG4, 3); emit_leave_runtime<Update::eReductions>(); + emit_leave_frame(); emit_test_the_non_value(RET); a.ret(); @@ -53,6 +55,7 @@ void BeamGlobalAssembler::emit_i_bif_guard_shared() { void BeamGlobalAssembler::emit_i_bif_body_shared() { Label error = a.newLabel(); + emit_enter_frame(); emit_enter_runtime<Update::eReductions>(); /* Save current BIF and argument vector for the error path. */ @@ -68,6 +71,7 @@ void BeamGlobalAssembler::emit_i_bif_body_shared() { a.short_().je(error); emit_leave_runtime<Update::eReductions>(); + emit_leave_frame(); a.ret(); @@ -87,14 +91,15 @@ void BeamGlobalAssembler::emit_i_bif_body_shared() { runtime_call<1>(ubif2mfa); emit_leave_runtime<Update::eReductions>(); + emit_leave_frame(); a.mov(ARG4, RET); - a.jmp(labels[handle_error_shared_prologue]); + a.jmp(labels[raise_exception]); } } void BeamModuleAssembler::emit_setup_guard_bif(const std::vector<ArgVal> &args, - const ArgVal &bif) { + const ArgWord &bif) { bool is_contiguous_mem = false; ASSERT(args.size() > 0 && args.size() <= 3); @@ -102,12 +107,12 @@ void BeamModuleAssembler::emit_setup_guard_bif(const std::vector<ArgVal> &args, /* If the guard BIF's arguments are in memory and continuous, for example * `map_get(x0, x1)`, then we can pass the address of the first argument * instead of filling in the argument vector. */ - is_contiguous_mem = args.size() && args[0].isMem(); + is_contiguous_mem = args.size() && args[0].isRegister(); for (size_t i = 1; i < args.size() && is_contiguous_mem; i++) { - const ArgVal &curr = args[i], &prev = args[i - 1]; + const ArgSource &curr = args[i], &prev = args[i - 1]; - is_contiguous_mem = curr.getType() == prev.getType() && - curr.getValue() == prev.getValue() + 1; + is_contiguous_mem = + ArgVal::memory_relation(prev, curr) == ArgVal::consecutive; } if (is_contiguous_mem) { @@ -123,15 +128,15 @@ void BeamModuleAssembler::emit_setup_guard_bif(const std::vector<ArgVal> &args, mov_arg(ARG4, bif); } -void BeamModuleAssembler::emit_i_bif1(const ArgVal &Src1, - const ArgVal &Fail, - const ArgVal &Bif, - const ArgVal &Dst) { +void BeamModuleAssembler::emit_i_bif1(const ArgSource &Src1, + const ArgLabel &Fail, + const ArgWord &Bif, + const ArgRegister &Dst) { emit_setup_guard_bif({Src1}, Bif); - if (Fail.getValue() != 0) { + if (Fail.get() != 0) { safe_fragment_call(ga->get_i_bif_guard_shared()); - a.je(labels[Fail.getValue()]); + a.je(resolve_beam_label(Fail)); } else { safe_fragment_call(ga->get_i_bif_body_shared()); } @@ -139,16 +144,16 @@ void BeamModuleAssembler::emit_i_bif1(const ArgVal &Src1, mov_arg(Dst, RET); } -void BeamModuleAssembler::emit_i_bif2(const ArgVal &Src1, - const ArgVal &Src2, - const ArgVal &Fail, - const ArgVal &Bif, - const ArgVal &Dst) { +void BeamModuleAssembler::emit_i_bif2(const ArgSource &Src1, + const ArgSource &Src2, + const ArgLabel &Fail, + const ArgWord &Bif, + const ArgRegister &Dst) { emit_setup_guard_bif({Src1, Src2}, Bif); - if (Fail.getValue() != 0) { + if (Fail.get() != 0) { safe_fragment_call(ga->get_i_bif_guard_shared()); - a.je(labels[Fail.getValue()]); + a.je(resolve_beam_label(Fail)); } else { safe_fragment_call(ga->get_i_bif_body_shared()); } @@ -156,17 +161,17 @@ void BeamModuleAssembler::emit_i_bif2(const ArgVal &Src1, mov_arg(Dst, RET); } -void BeamModuleAssembler::emit_i_bif3(const ArgVal &Src1, - const ArgVal &Src2, - const ArgVal &Src3, - const ArgVal &Fail, - const ArgVal &Bif, - const ArgVal &Dst) { +void BeamModuleAssembler::emit_i_bif3(const ArgSource &Src1, + const ArgSource &Src2, + const ArgSource &Src3, + const ArgLabel &Fail, + const ArgWord &Bif, + const ArgRegister &Dst) { emit_setup_guard_bif({Src1, Src2, Src3}, Bif); - if (Fail.getValue() != 0) { + if (Fail.get() != 0) { safe_fragment_call(ga->get_i_bif_guard_shared()); - a.je(labels[Fail.getValue()]); + a.je(resolve_beam_label(Fail)); } else { safe_fragment_call(ga->get_i_bif_body_shared()); } @@ -180,34 +185,34 @@ void BeamModuleAssembler::emit_i_bif3(const ArgVal &Src1, * to align the call targeting the shared fragment. */ -void BeamModuleAssembler::emit_nofail_bif1(const ArgVal &Src1, - const ArgVal &Bif, - const ArgVal &Dst) { +void BeamModuleAssembler::emit_nofail_bif1(const ArgSource &Src1, + const ArgWord &Bif, + const ArgRegister &Dst) { emit_setup_guard_bif({Src1}, Bif); safe_fragment_call(ga->get_i_bif_guard_shared()); mov_arg(Dst, RET); } -void BeamModuleAssembler::emit_nofail_bif2(const ArgVal &Src1, - const ArgVal &Src2, - const ArgVal &Bif, - const ArgVal &Dst) { +void BeamModuleAssembler::emit_nofail_bif2(const ArgSource &Src1, + const ArgSource &Src2, + const ArgWord &Bif, + const ArgRegister &Dst) { emit_setup_guard_bif({Src1, Src2}, Bif); safe_fragment_call(ga->get_i_bif_guard_shared()); mov_arg(Dst, RET); } -void BeamModuleAssembler::emit_i_length_setup(const ArgVal &Fail, - const ArgVal &Live, - const ArgVal &Src) { +void BeamModuleAssembler::emit_i_length_setup(const ArgLabel &Fail, + const ArgWord &Live, + const ArgSource &Src) { x86::Mem trap_state; /* Store trap state after the currently live registers. There's an extra 3 * registers beyond the ordinary ones that we're free to use for whatever * purpose. */ ERTS_CT_ASSERT(ERTS_X_REGS_ALLOCATED - MAX_REG >= 3); - ASSERT(Live.getValue() <= MAX_REG); - trap_state = getXRef(Live.getValue()); + ASSERT(Live.get() <= MAX_REG); + trap_state = getXRef(Live.get()); /* Remainder of the list. */ mov_arg(trap_state, Src); @@ -217,7 +222,7 @@ void BeamModuleAssembler::emit_i_length_setup(const ArgVal &Fail, /* Original argument. This is only needed for exceptions and can be safely * skipped in guards. */ - if (Fail.getValue() == 0) { + if (Fail.get() == 0) { x86::Mem original_argument; original_argument = trap_state.cloneAdjusted(2 * sizeof(Eterm)); @@ -238,6 +243,8 @@ x86::Mem BeamGlobalAssembler::emit_i_length_common(Label fail, int state_size) { trap_state = getXRef(0); trap_state.setIndex(ARG2, 3); + emit_enter_frame(); + /* Save arguments for error/trapping path. */ a.mov(TMP_MEM1q, ARG2); a.mov(TMP_MEM2q, ARG3); @@ -249,6 +256,7 @@ x86::Mem BeamGlobalAssembler::emit_i_length_common(Label fail, int state_size) { runtime_call<2>(erts_trapping_length_1); emit_leave_runtime<Update::eReductions>(); + emit_leave_frame(); emit_test_the_non_value(RET); a.short_().je(trap); @@ -268,7 +276,7 @@ x86::Mem BeamGlobalAssembler::emit_i_length_common(Label fail, int state_size) { a.add(ARG2, imm(state_size)); /* We'll find our way back through the entry address (ARG3). */ - emit_discard_cp(); + a.add(x86::rsp, imm(sizeof(UWord))); a.mov(x86::qword_ptr(c_p, offsetof(Process, current)), imm(0)); a.mov(x86::qword_ptr(c_p, offsetof(Process, arity)), ARG2); @@ -298,7 +306,7 @@ void BeamGlobalAssembler::emit_i_length_body_shared() { a.mov(getXRef(0), ARG1); a.mov(ARG4, imm(&bif_mfa)); - emit_handle_error(); + a.jmp(labels[raise_exception]); } } @@ -312,14 +320,14 @@ void BeamGlobalAssembler::emit_i_length_guard_shared() { a.bind(error); { - mov_imm(RET, 0); + a.sub(RET, RET); a.ret(); } } -void BeamModuleAssembler::emit_i_length(const ArgVal &Fail, - const ArgVal &Live, - const ArgVal &Dst) { +void BeamModuleAssembler::emit_i_length(const ArgLabel &Fail, + const ArgWord &Live, + const ArgRegister &Dst) { Label entry = a.newLabel(); align_erlang_cp(); @@ -328,13 +336,13 @@ void BeamModuleAssembler::emit_i_length(const ArgVal &Fail, mov_arg(ARG2, Live); a.lea(ARG3, x86::qword_ptr(entry)); - if (Fail.getValue() != 0) { + if (Fail.get() != 0) { /* The return address is discarded when yielding, so it doesn't need to * be aligned. */ safe_fragment_call(ga->get_i_length_guard_shared()); - a.je(labels[Fail.getValue()]); + a.je(resolve_beam_label(Fail)); } else { - fragment_call(ga->get_i_length_body_shared()); + safe_fragment_call(ga->get_i_length_body_shared()); } mov_arg(Dst, RET); @@ -375,31 +383,35 @@ static Eterm debug_call_light_bif(Process *c_p, * RET = BIF pointer */ void BeamGlobalAssembler::emit_call_light_bif_shared() { - /* We use the HTOP and FCALLS registers as they are - not used on the runtime-stack and are caller save. */ + x86::Mem entry_mem = TMP_MEM1q, export_mem = TMP_MEM2q, + mbuf_mem = TMP_MEM3q; + + Label trace = a.newLabel(), yield = a.newLabel(); - x86::Gp I = HTOP, exp = FCALLS; + emit_enter_frame(); - Label error = a.newLabel(), trace = a.newLabel(), trap = a.newLabel(), - yield = a.newLabel(), call_save_calls = a.newLabel(), - call_bif = a.newLabel(), gc_after_bif_call = a.newLabel(), - check_bif_return = a.newLabel(); + /* Spill everything we may need on the error and GC paths. */ + a.mov(ARG1, x86::qword_ptr(c_p, offsetof(Process, mbuf))); + a.mov(entry_mem, ARG3); + a.mov(export_mem, ARG4); + a.mov(mbuf_mem, ARG1); - /* Check if we should trace this bif call */ + /* Check if we should trace this bif call or handle save_calls. Both + * variants dispatch through the export entry. */ a.cmp(x86::dword_ptr(ARG4, offsetof(Export, is_bif_traced)), imm(0)); a.jne(trace); + a.cmp(active_code_ix, imm(ERTS_SAVE_CALLS_CODE_IX)); + a.je(trace); a.dec(FCALLS); a.jle(yield); { + Label check_bif_return = a.newLabel(), gc_after_bif_call = a.newLabel(); + emit_enter_runtime<Update::eReductions | Update::eStack | Update::eHeap>(); - /* Spill the arguments we may need on the error path. */ - a.mov(I, ARG3); - a.mov(exp, ARG4); - #ifdef ERTS_MSACC_EXTENDED_STATES { Label skip_msacc = a.newLabel(); @@ -407,7 +419,7 @@ void BeamGlobalAssembler::emit_call_light_bif_shared() { a.cmp(erts_msacc_cache, imm(0)); a.short_().je(skip_msacc); - a.mov(TMP_MEM1q, RET); + a.mov(TMP_MEM4q, ARG3); a.mov(ARG1, erts_msacc_cache); a.mov(ARG2, @@ -415,29 +427,23 @@ void BeamGlobalAssembler::emit_call_light_bif_shared() { a.mov(ARG3, RET); runtime_call<3>(erts_msacc_set_bif_state); - a.mov(ARG3, I); - a.mov(RET, TMP_MEM1q); + a.mov(ARG3, TMP_MEM4q); a.bind(skip_msacc); } #endif - /* Check if we need to call save_calls */ - a.cmp(active_code_ix, imm(ERTS_SAVE_CALLS_CODE_IX)); - a.je(call_save_calls); - a.bind(call_bif); - a.mov(ARG1, x86::qword_ptr(c_p, offsetof(Process, mbuf))); - a.mov(TMP_MEM1q, ARG1); - - /* ARG3 and RET have been set earlier. */ - a.mov(ARG1, c_p); - load_x_reg_array(ARG2); + { + /* Call the BIF proper. ARG3 and RET have been set earlier. */ + a.mov(ARG1, c_p); + load_x_reg_array(ARG2); #if defined(DEBUG) || defined(ERTS_ENABLE_LOCK_CHECK) - a.mov(ARG4, RET); - runtime_call<4>(debug_call_light_bif); + a.mov(ARG4, RET); + runtime_call<4>(debug_call_light_bif); #else - runtime_call(RET, 3); + runtime_call(RET, 3); #endif + } #ifdef ERTS_MSACC_EXTENDED_STATES { @@ -445,180 +451,153 @@ void BeamGlobalAssembler::emit_call_light_bif_shared() { a.cmp(erts_msacc_cache, imm(0)); a.short_().je(skip_msacc); - - /* update cache if it was changed in the bif. - TMP_MEM1q is already taken to save ARG1 above */ - a.mov(TMP_MEM2q, RET); - a.lea(ARG1, erts_msacc_cache); - runtime_call<1>(erts_msacc_update_cache); - a.mov(RET, TMP_MEM2q); - - /* set state to emulator if msacc has been enabled */ - a.cmp(erts_msacc_cache, imm(0)); - a.short_().je(skip_msacc); - a.mov(ARG1, erts_msacc_cache); - a.mov(ARG2, imm(ERTS_MSACC_STATE_EMULATOR)); - a.mov(ARG3, imm(1)); - runtime_call<3>(erts_msacc_set_state_m__); - a.mov(RET, TMP_MEM2q); - + { + /* Update cache if it was changed in the BIF, stashing the + * return value in TMP_MEM4q. */ + a.mov(TMP_MEM4q, RET); + a.lea(ARG1, erts_msacc_cache); + runtime_call<1>(erts_msacc_update_cache); + a.mov(RET, TMP_MEM4q); + + /* set state to emulator if msacc has been enabled */ + a.cmp(erts_msacc_cache, imm(0)); + a.short_().je(skip_msacc); + + a.mov(ARG1, erts_msacc_cache); + a.mov(ARG2, imm(ERTS_MSACC_STATE_EMULATOR)); + a.mov(ARG3, imm(1)); + runtime_call<3>(erts_msacc_set_state_m__); + a.mov(RET, TMP_MEM4q); + } a.bind(skip_msacc); } #endif + /* We must update the active code index in case another process has + * loaded new code, as the result of this BIF may be observable on + * both ends. + * + * It doesn't matter whether the BIF modifies anything; if process + * A loads new code and calls erlang:monotonic_time/0 soon after, + * we'd break the illusion of atomic upgrades if process B still + * ran old code after seeing a later timestamp from its own call to + * erlang:monotonic_time/0. */ + emit_leave_runtime<Update::eReductions | Update::eStack | + Update::eHeap | Update::eCodeIndex>(); + /* ERTS_IS_GC_DESIRED_INTERNAL */ { - a.mov(ARG2, x86::qword_ptr(c_p, offsetof(Process, stop))); - a.mov(ARG3, RET); - a.mov(ARG5, x86::qword_ptr(c_p, offsetof(Process, htop))); + /* Test whether GC is forced. */ + a.test(x86::dword_ptr(c_p, offsetof(Process, flags)), + imm(F_FORCE_GC | F_DISABLE_GC)); + a.jne(gc_after_bif_call); - /* Test if binary heap size should trigger gc */ - a.mov(RET, x86::qword_ptr(c_p, offsetof(Process, bin_vheap_sz))); + /* Test if binary heap size should trigger GC. */ + a.mov(ARG1, x86::qword_ptr(c_p, offsetof(Process, bin_vheap_sz))); a.cmp(x86::qword_ptr(c_p, offsetof(Process, off_heap.overhead)), - RET); - a.mov(RETd, x86::dword_ptr(c_p, offsetof(Process, flags))); - a.seta(x86::cl); /* Clobber ARG1 on windows and ARG4 on Linux */ - a.and_(RETd, imm(F_FORCE_GC | F_DISABLE_GC)); - a.or_(x86::cl, RETb); - a.jne(gc_after_bif_call); + ARG1); + a.ja(gc_after_bif_call); /* Test if heap fragment size is larger than remaining heap size. */ - a.mov(RET, ARG2); - a.sub(RET, ARG5); - a.sar(RET, imm(3)); - a.cmp(RET, x86::qword_ptr(c_p, offsetof(Process, mbuf_sz))); + a.mov(ARG2, x86::qword_ptr(c_p, offsetof(Process, mbuf_sz))); + a.lea(ARG1, x86::qword_ptr(HTOP, ARG2, 0, 3)); + a.cmp(E, ARG1); a.jl(gc_after_bif_call); } - /* - ARG2 is set to E - ARG3 is set to bif return - ARG5 is set to HTOP - - HTOP is exp - E_saved|E is I - */ + /* ! FALL THROUGH ! */ a.bind(check_bif_return); - emit_test_the_non_value(ARG3); - - /* NOTE: Short won't reach if JIT_HARD_DEBUG is defined. */ - a.je(trap); - - a.mov(HTOP, ARG5); -#ifdef NATIVE_ERLANG_STACK - a.mov(E_saved, ARG2); -#else - a.mov(E, ARG2); -#endif - - /* We must update the active code index in case another process has - * loaded new code, as the result of this BIF may be observable on both - * ends. - * - * It doesn't matter whether the BIF modifies anything; if process A - * loads new code and calls erlang:monotonic_time/0 soon after, we'd - * break the illusion of atomic upgrades if process B still ran old code - * after seeing a later timestamp from its own call to - * erlang:monotonic_time/0. */ - - emit_leave_runtime<Update::eReductions | Update::eCodeIndex>(); - - a.mov(getXRef(0), ARG3); - a.ret(); - - a.bind(call_save_calls); { - /* Stash the bif function pointer */ - a.mov(TMP_MEM1q, RET); + Label trap = a.newLabel(), error = a.newLabel(); - /* Setup the arguments to call */ - a.mov(ARG1, c_p); - a.mov(ARG2, exp); - runtime_call<2>(save_calls); - - /* Restore RET and ARG3 to the values expected - by the bif call */ - a.mov(RET, TMP_MEM1q); - a.mov(ARG3, I); - a.jmp(call_bif); - } + emit_test_the_non_value(RET); + a.short_().je(trap); - a.bind(trap); - { - a.cmp(x86::qword_ptr(c_p, offsetof(Process, freason)), imm(TRAP)); - a.short_().jne(error); + a.mov(getXRef(0), RET); - emit_leave_runtime<Update::eHeap | Update::eStack | - Update::eReductions | Update::eCodeIndex>(); + emit_leave_frame(); + a.ret(); + + a.bind(trap); + { + a.cmp(x86::qword_ptr(c_p, offsetof(Process, freason)), + imm(TRAP)); + a.short_().jne(error); #if !defined(NATIVE_ERLANG_STACK) - a.pop(getCPRef()); + a.pop(getCPRef()); #endif - /* Trap out, our return address is on the Erlang stack. - * - * The BIF_TRAP macros all set up c_p->arity and c_p->current, so - * we can use a simplified context switch. */ - a.mov(ARG3, x86::qword_ptr(c_p, offsetof(Process, i))); - a.jmp(labels[context_switch_simplified]); - } - - a.bind(error); - { - a.mov(ARG4, exp); - a.mov(RET, I); - - /* Update::eCodeIndex clobbers ARG1 + ARG2 */ - emit_leave_runtime<Update::eHeap | Update::eStack | - Update::eReductions | Update::eCodeIndex>(); + /* Trap out, our return address is on the Erlang stack. + * + * The BIF_TRAP macros all set up c_p->arity and c_p->current, + * so we can use a simplified context switch. */ + a.mov(ARG3, x86::qword_ptr(c_p, offsetof(Process, i))); + a.jmp(labels[context_switch_simplified]); + } - /* handle_error_shared needs the entry address in ARG2 */ - a.mov(ARG2, RET); + a.bind(error); + { + a.mov(ARG2, entry_mem); + a.mov(ARG4, export_mem); + a.add(ARG4, imm(offsetof(Export, info.mfa))); #if !defined(NATIVE_ERLANG_STACK) - /* Discard the continuation pointer as it will never be used. */ - emit_discard_cp(); + /* Discard the continuation pointer as it will never be + * used. */ + emit_unwind_frame(); #endif - /* get_handle_error expects current PC in ARG2 and MFA in ARG4. */ - a.lea(ARG4, x86::qword_ptr(ARG4, offsetof(Export, info.mfa))); - - /* Overwrite the return address with the entry address to ensure - * that only the entry address ends up in the stack trace. */ - a.mov(x86::qword_ptr(E), ARG2); - - a.jmp(labels[handle_error_shared]); + /* Overwrite the return address with the entry address to + * ensure that only the entry address ends up in the stack + * trace. */ + if (erts_frame_layout == ERTS_FRAME_LAYOUT_RA) { + a.mov(x86::qword_ptr(E), ARG2); + } else { + ASSERT(erts_frame_layout == ERTS_FRAME_LAYOUT_FP_RA); + a.mov(x86::qword_ptr(E, 8), ARG2); + } + + a.jmp(labels[raise_exception_shared]); + } } a.bind(gc_after_bif_call); { + a.mov(ARG2, mbuf_mem); + a.mov(ARG5, export_mem); + a.mov(ARG5, x86::qword_ptr(ARG5, offsetof(Export, info.mfa.arity))); + + emit_enter_runtime<Update::eReductions | Update::eStack | + Update::eHeap>(); + a.mov(ARG1, c_p); - a.mov(ARG2, TMP_MEM1q); - /* ARG3 already contains result */ + a.mov(ARG3, RET); load_x_reg_array(ARG4); - a.mov(ARG5, x86::qword_ptr(exp, offsetof(Export, info.mfa.arity))); runtime_call<5>(erts_gc_after_bif_call_lhf); - a.mov(ARG3, RET); - a.mov(ARG5, x86::qword_ptr(c_p, offsetof(Process, htop))); - a.mov(ARG2, x86::qword_ptr(c_p, offsetof(Process, stop))); + + emit_leave_runtime<Update::eReductions | Update::eStack | + Update::eHeap>(); + a.jmp(check_bif_return); } } a.bind(trace); { - /* Call the export entry instead of the BIF. If we use the - * native stack as the Erlang stack our return address is - * already on the Erlang stack. Otherwise we will have to move - * the return address from the native stack to the Erlang - * stack. */ + /* Tail call the export entry instead of the BIF. If we use the native + * stack as the Erlang stack our return address is already on the + * Erlang stack. Otherwise we will have to move the return address from + * the native stack to the Erlang stack. */ + + emit_leave_frame(); #if !defined(NATIVE_ERLANG_STACK) /* The return address must be on the Erlang stack. */ a.pop(getCPRef()); #endif - x86::Mem destination = emit_setup_export_call(ARG4); + x86::Mem destination = emit_setup_dispatchable_call(ARG4); a.jmp(destination); } @@ -630,21 +609,21 @@ void BeamGlobalAssembler::emit_call_light_bif_shared() { a.mov(x86::qword_ptr(c_p, offsetof(Process, current)), ARG4); /* We'll find our way back through ARG3 (entry address). */ - emit_discard_cp(); + emit_unwind_frame(); a.jmp(labels[context_switch_simplified]); } } -void BeamModuleAssembler::emit_call_light_bif(const ArgVal &Bif, - const ArgVal &Exp) { +void BeamModuleAssembler::emit_call_light_bif(const ArgWord &Bif, + const ArgExport &Exp) { Label entry = a.newLabel(); align_erlang_cp(); a.bind(entry); - make_move_patch(ARG4, imports[Exp.getValue()].patches); - a.mov(RET, imm(Bif.getValue())); + mov_arg(ARG4, Exp); + a.mov(RET, imm(Bif.get())); a.lea(ARG3, x86::qword_ptr(entry)); fragment_call(ga->get_call_light_bif_shared()); @@ -654,9 +633,8 @@ void BeamModuleAssembler::emit_send() { Label entry = a.newLabel(); /* This is essentially a mirror of call_light_bif, there's no point to - * specializing send/2 anymore. - * - * FIXME: Rewrite this to an ordinary BIF in the loader instead. */ + * specializing send/2 anymore. We do it here because it's far more work to + * do it in the loader. */ align_erlang_cp(); a.bind(entry); @@ -667,6 +645,10 @@ void BeamModuleAssembler::emit_send() { fragment_call(ga->get_call_light_bif_shared()); } +void BeamModuleAssembler::emit_nif_start() { + /* load time only instruction */ +} + void BeamGlobalAssembler::emit_bif_nif_epilogue(void) { Label check_trap = a.newLabel(), trap = a.newLabel(), error = a.newLabel(); @@ -696,6 +678,9 @@ void BeamGlobalAssembler::emit_bif_nif_epilogue(void) { comment("Do return and dispatch to it"); a.mov(getXRef(0), RET); + + emit_leave_frame(); + #ifdef NATIVE_ERLANG_STACK a.ret(); #else @@ -745,7 +730,7 @@ void BeamGlobalAssembler::emit_bif_nif_epilogue(void) { a.mov(ARG2, RET); a.mov(ARG4, x86::qword_ptr(c_p, offsetof(Process, current))); - a.jmp(labels[handle_error_shared]); + a.jmp(labels[raise_exception_shared]); } } @@ -779,17 +764,16 @@ void BeamGlobalAssembler::emit_call_bif_shared(void) { a.short_().je(skip_msacc); a.mov(TMP_MEM1q, ARG3); - a.mov(TMP_MEM2q, ARG4); - a.mov(TMP_MEM3q, ARG5); + a.mov(TMP_MEM2q, ARG5); a.mov(ARG1, erts_msacc_cache); a.mov(ARG2, x86::qword_ptr(ARG2, offsetof(ErtsCodeMFA, module))); a.mov(ARG3, ARG4); runtime_call<3>(erts_msacc_set_bif_state); + a.mov(ARG4, RET); a.mov(ARG3, TMP_MEM1q); - a.mov(ARG4, TMP_MEM2q); - a.mov(ARG5, TMP_MEM3q); + a.mov(ARG5, TMP_MEM2q); a.bind(skip_msacc); } #endif @@ -814,41 +798,52 @@ void BeamGlobalAssembler::emit_dispatch_bif(void) { * `info` structure. */ a.mov(ARG3, x86::qword_ptr(c_p, offsetof(Process, i))); - ERTS_CT_ASSERT(offsetof(ErtsNativeFunc, trampoline.trace) == + ERTS_CT_ASSERT(offsetof(ErtsNativeFunc, trampoline.call_bif_nif) == sizeof(ErtsCodeInfo)); ssize_t mfa_offset = offsetof(ErtsNativeFunc, trampoline.info.mfa) - - offsetof(ErtsNativeFunc, trampoline.trace); + offsetof(ErtsNativeFunc, trampoline.call_bif_nif); a.lea(ARG2, x86::qword_ptr(ARG3, mfa_offset)); ssize_t dfunc_offset = offsetof(ErtsNativeFunc, trampoline.dfunc) - - offsetof(ErtsNativeFunc, trampoline.trace); + offsetof(ErtsNativeFunc, trampoline.call_bif_nif); a.mov(ARG4, x86::qword_ptr(ARG3, dfunc_offset)); a.jmp(labels[call_bif_shared]); } -void BeamModuleAssembler::emit_call_bif(const ArgVal &Func) { +void BeamModuleAssembler::emit_call_bif(const ArgWord &Func) { int mfa_offset = -(int)sizeof(ErtsCodeMFA); - a.lea(ARG2, x86::qword_ptr(currLabel, mfa_offset)); - a.lea(ARG3, x86::qword_ptr(currLabel)); - mov_arg(ARG4, Func); + Label entry = a.newLabel(); + + /* This is _always_ the first instruction in a function and replaces the + * yield test that would otherwise add a frame, so we must add a frame + * here. */ + emit_enter_frame(); + + /* Yield entry point; must be after entering frame. */ + a.bind(entry); + { + a.lea(ARG2, x86::qword_ptr(current_label, mfa_offset)); + a.lea(ARG3, x86::qword_ptr(entry)); + mov_arg(ARG4, Func); - abs_jmp(ga->get_call_bif_shared()); + a.jmp(resolve_fragment(ga->get_call_bif_shared())); + } } -void BeamModuleAssembler::emit_call_bif_mfa(const ArgVal &M, - const ArgVal &F, - const ArgVal &A) { +void BeamModuleAssembler::emit_call_bif_mfa(const ArgAtom &M, + const ArgAtom &F, + const ArgWord &A) { BeamInstr func; Export *e; - e = erts_active_export_entry(M.getValue(), F.getValue(), A.getValue()); + e = erts_active_export_entry(M.get(), F.get(), A.get()); ASSERT(e != NULL && e->bif_number != -1); func = (BeamInstr)bif_table[e->bif_number].f; - emit_call_bif(ArgVal(ArgVal::i, func)); + emit_call_bif(ArgWord(func)); } void BeamGlobalAssembler::emit_call_nif_early() { @@ -866,6 +861,7 @@ void BeamGlobalAssembler::emit_call_nif_early() { a.test(ARG2, imm(sizeof(UWord) - 1)); a.short_().je(next); + comment("# Return address isn't word-aligned"); a.ud2(); a.bind(next); @@ -879,11 +875,14 @@ void BeamGlobalAssembler::emit_call_nif_early() { emit_leave_runtime(); - /* We won't return to the original code. */ - emit_discard_cp(); + /* We won't return to the original code. We KNOW that the stack points at + * a return address. */ + a.add(x86::rsp, imm(8)); /* Emulate `emit_call_nif`, loading the current (phony) instruction - * pointer into ARG2. */ + * pointer into ARG2. We push a (redundant) frame pointer to match the + * corresponding `emit_leave_frame` in `call_nif_shared`. */ + emit_enter_frame(); a.mov(ARG3, RET); a.jmp(labels[call_nif_shared]); } @@ -932,48 +931,71 @@ void BeamGlobalAssembler::emit_dispatch_nif(void) { * * ErtsNativeFunc already follows the NIF call layout, so we don't need to * do anything beyond loading the address. */ - ERTS_CT_ASSERT(offsetof(ErtsNativeFunc, trampoline.trace) == + ERTS_CT_ASSERT(offsetof(ErtsNativeFunc, trampoline.call_bif_nif) == sizeof(ErtsCodeInfo)); a.mov(ARG3, x86::qword_ptr(c_p, offsetof(Process, i))); a.jmp(labels[call_nif_shared]); } -/* WARNING: This stub is memcpy'd, so all code herein must be explicitly - * position-independent. */ -void BeamModuleAssembler::emit_call_nif(const ArgVal &Func, - const ArgVal &NifMod, - const ArgVal &DirtyFunc) { - Label dispatch = a.newLabel(); - uint64_t val; - - /* The start of this function has to mimic the layout of ErtsNativeFunc. */ - a.jmp(dispatch); /* call_op */ - - a.align(AlignMode::kCode, 8); - /* ErtsNativeFunc.dfunc */ - val = Func.getValue(); - a.embed(&val, sizeof(val)); - /* ErtsNativeFunc.m */ - val = NifMod.getValue(); - a.embed(&val, sizeof(val)); - /* ErtsNativeFunc.func */ - val = DirtyFunc.getValue(); - a.embed(&val, sizeof(val)); - - /* The real code starts here */ - a.bind(dispatch); +void BeamGlobalAssembler::emit_call_nif_yield_helper() { + Label yield = a.newLabel(); + + a.dec(FCALLS); + a.short_().jl(yield); + a.jmp(labels[call_nif_shared]); + + a.bind(yield); { - Label yield = a.newLabel(); + int mfa_offset = -(int)sizeof(ErtsCodeMFA); + int arity_offset = mfa_offset + (int)offsetof(ErtsCodeMFA, arity); - a.lea(ARG3, x86::qword_ptr(currLabel)); + a.mov(ARG1, x86::qword_ptr(ARG3, arity_offset)); + a.mov(x86::qword_ptr(c_p, offsetof(Process, arity)), ARG1); - a.dec(FCALLS); - a.jl(yield); + a.lea(ARG1, x86::qword_ptr(ARG3, mfa_offset)); + a.mov(x86::qword_ptr(c_p, offsetof(Process, current)), ARG1); - pic_jmp(ga->get_call_nif_shared()); + /* Yield to `dispatch` rather than `entry` to avoid pushing too many + * frames to the stack. See `emit_call_nif` for details. */ + a.add(ARG3, imm(BEAM_ASM_NFUNC_SIZE + sizeof(UWord[3]))); + a.jmp(labels[context_switch_simplified]); + } +} - a.bind(yield); - pic_jmp(ga->get_context_switch()); +/* WARNING: This stub is memcpy'd, so all code herein must be explicitly + * position-independent. */ +void BeamModuleAssembler::emit_call_nif(const ArgWord &Func, + const ArgWord &NifMod, + const ArgWord &DirtyFunc) { + Label entry = a.newLabel(), dispatch = a.newLabel(); + + /* The start of this function must mimic the layout of ErtsNativeFunc. + * + * We jump here on the very first entry, pushing a stack frame if + * applicable. */ + a.bind(entry); + { + emit_enter_frame(); + a.short_().jmp(dispatch); /* call_op */ + + a.align(AlignMode::kCode, 8); + /* ErtsNativeFunc.dfunc */ + a.embedUInt64(Func.get()); + /* ErtsNativeFunc.m */ + a.embedUInt64(NifMod.get()); + /* ErtsNativeFunc.func */ + a.embedUInt64(DirtyFunc.get()); + } + + /* `emit_call_nif_yield_helper` relies on this to compute the address of + * `dispatch` */ + ASSERT((a.offset() - code.labelOffsetFromBase(current_label)) == + BEAM_ASM_NFUNC_SIZE + sizeof(UWord[3])); + + a.bind(dispatch); + { + a.lea(ARG3, x86::qword_ptr(current_label)); + pic_jmp(ga->get_call_nif_yield_helper()); } } @@ -996,15 +1018,18 @@ void BeamGlobalAssembler::emit_i_load_nif_shared() { a.cmp(RET, RET_NIF_yield); a.short_().je(yield); + + /* We entered the frame in module code. */ + emit_leave_frame(); + a.cmp(RET, RET_NIF_success); a.short_().jne(error); - a.ret(); a.bind(error); { a.mov(ARG4, imm(&bif_mfa)); - emit_handle_error(); + a.jmp(labels[raise_exception]); } a.bind(yield); @@ -1014,10 +1039,49 @@ void BeamGlobalAssembler::emit_i_load_nif_shared() { } } +static ErtsCodePtr get_on_load_address(Process *c_p, Eterm module) { + const Module *modp = erts_get_module(module, erts_active_code_ix()); + + if (modp && modp->on_load) { + const BeamCodeHeader *hdr = (modp->on_load)->code_hdr; + + if (hdr) { + return erts_codeinfo_to_code(hdr->on_load); + } + } + + c_p->freason = BADARG; + + return NULL; +} + +/* Implements the internal and undocumented erlang:call_on_load_function/1, + * which is tricky to implement in the face of frame pointers. */ +void BeamModuleAssembler::emit_i_call_on_load_function() { + static ErtsCodeMFA mfa = {am_erlang, am_call_on_load_function, 1}; + Label next = a.newLabel(); + + emit_enter_runtime(); + + a.mov(ARG1, c_p); + a.mov(ARG2, getXRef(0)); + runtime_call<2>(get_on_load_address); + + emit_leave_runtime(); + + a.test(RET, RET); + a.jne(next); + + emit_raise_exception(&mfa); + + a.bind(next); + erlang_call(RET, ARG1); +} + #ifdef NATIVE_ERLANG_STACK void BeamModuleAssembler::emit_i_load_nif() { - Label entry = a.newLabel(), next = a.newLabel(); + Label entry = a.newLabel(), yield = a.newLabel(), next = a.newLabel(); /* i_load_nif is a rewrite of a call_ext instruction, so we'll body-call * ourselves to ensure the stack is consistent with that. This greatly @@ -1028,8 +1092,13 @@ void BeamModuleAssembler::emit_i_load_nif() { align_erlang_cp(); a.bind(entry); { - a.lea(ARG2, x86::qword_ptr(entry)); - abs_jmp(ga->get_i_load_nif_shared()); + emit_enter_frame(); + + a.bind(yield); + { + a.lea(ARG2, x86::qword_ptr(yield)); + a.jmp(resolve_fragment(ga->get_i_load_nif_shared())); + } } a.bind(next); @@ -1048,7 +1117,7 @@ void BeamModuleAssembler::emit_i_load_nif() { emit_enter_runtime<Update::eStack | Update::eHeap>(); a.mov(ARG1, c_p); - a.lea(ARG2, x86::qword_ptr(currLabel)); + a.lea(ARG2, x86::qword_ptr(current_label)); load_x_reg_array(ARG3); runtime_call<3>(beam_jit_load_nif); @@ -1059,12 +1128,12 @@ void BeamModuleAssembler::emit_i_load_nif() { a.cmp(RET, imm(RET_NIF_success)); a.je(next); - emit_handle_error(currLabel, &mfa); + emit_raise_exception(current_label, &mfa); a.bind(schedule); { a.lea(ARG3, x86::qword_ptr(entry)); - abs_jmp(ga->get_context_switch_simplified()); + a.jmp(resolve_fragment(ga->get_context_switch_simplified())); } a.bind(next); |