diff options
Diffstat (limited to 'erts/emulator/beam/jit/x86/process_main.cpp')
-rw-r--r-- | erts/emulator/beam/jit/x86/process_main.cpp | 348 |
1 files changed, 348 insertions, 0 deletions
diff --git a/erts/emulator/beam/jit/x86/process_main.cpp b/erts/emulator/beam/jit/x86/process_main.cpp new file mode 100644 index 0000000000..210aecb0c2 --- /dev/null +++ b/erts/emulator/beam/jit/x86/process_main.cpp @@ -0,0 +1,348 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2020-2021. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + */ + +#include "beam_asm.hpp" + +extern "C" +{ +#include "bif.h" +#include "beam_common.h" +#include "code_ix.h" +#include "export.h" +} + +const uint8_t *BeamAssembler::nops[3] = {nop1, nop2, nop3}; +const uint8_t BeamAssembler::nop1[1] = {0x90}; +const uint8_t BeamAssembler::nop2[2] = {0x66, 0x90}; +const uint8_t BeamAssembler::nop3[3] = {0x0F, 0x1F, 0x00}; + +#if defined(DEBUG) || defined(ERTS_ENABLE_LOCK_CHECK) +static Process *erts_debug_schedule(ErtsSchedulerData *esdp, + Process *c_p, + int calls) { + PROCESS_MAIN_CHK_LOCKS(c_p); + ERTS_UNREQ_PROC_MAIN_LOCK(c_p); + ERTS_VERIFY_UNUSED_TEMP_ALLOC(c_p); + c_p = erts_schedule(esdp, c_p, calls); + ERTS_VERIFY_UNUSED_TEMP_ALLOC(c_p); + ERTS_REQ_PROC_MAIN_LOCK(c_p); + PROCESS_MAIN_CHK_LOCKS(c_p); + return c_p; +} +#endif + +/* void process_main(ErtsSchedulerData *esdp); */ +void BeamGlobalAssembler::emit_process_main() { + Label context_switch_local = a.newLabel(), + context_switch_simplified_local = a.newLabel(), + do_schedule_local = a.newLabel(), schedule_next = a.newLabel(); + + const x86::Mem start_time_i = + getSchedulerRegRef(offsetof(ErtsSchedulerRegisters, start_time_i)); + const x86::Mem start_time = + getSchedulerRegRef(offsetof(ErtsSchedulerRegisters, start_time)); + + /* Allocate the register structure on the stack to allow computing the + * runtime stack address from it, greatly reducing the cost of stack + * swapping. */ + a.sub(x86::rsp, imm(sizeof(ErtsSchedulerRegisters) + ERTS_CACHE_LINE_SIZE)); + a.and_(x86::rsp, imm(~ERTS_CACHE_LINE_MASK)); + + a.mov(x86::qword_ptr(ARG1, offsetof(ErtsSchedulerData, registers)), + x86::rsp); + + /* Center `registers` at the base of x_reg_array so we can use negative + * 8-bit displacement to address the commonly used aux_regs, located at the + * start of the ErtsSchedulerRegisters struct. */ + a.lea(registers, + x86::qword_ptr(x86::rsp, + offsetof(ErtsSchedulerRegisters, x_reg_array.d))); + + load_erl_bits_state(ARG1); + runtime_call<1>(erts_bits_init_state); + +#if defined(DEBUG) && defined(NATIVE_ERLANG_STACK) + /* Save stack bounds so they can be tested without clobbering anything. */ + runtime_call<0>(erts_get_stacklimit); + + a.mov(getSchedulerRegRef( + offsetof(ErtsSchedulerRegisters, runtime_stack_end)), + RET); + a.mov(getSchedulerRegRef( + offsetof(ErtsSchedulerRegisters, runtime_stack_start)), + x86::rsp); +#elif !defined(NATIVE_ERLANG_STACK) + /* Save the initial SP of the thread so that we can verify that it + * doesn't grow. */ +# ifdef JIT_HARD_DEBUG + a.mov(getInitialSPRef(), x86::rsp); +# endif + + /* Manually do an `emit_enter_runtime` to match the `emit_leave_runtime` + * below. We avoid `emit_enter_runtime` because it may do additional + * assertions that may currently fail. + * + * IMPORTANT: We must ensure that this sequence leaves the stack + * aligned on a 16-byte boundary. */ + a.mov(getRuntimeStackRef(), x86::rsp); + a.sub(x86::rsp, imm(15)); + a.and_(x86::rsp, imm(-16)); +#endif + + a.mov(start_time_i, imm(0)); + a.mov(start_time, imm(0)); + + mov_imm(c_p, 0); + mov_imm(FCALLS, 0); + mov_imm(ARG3, 0); /* Set reds_used for erts_schedule call */ + + a.jmp(schedule_next); + + a.bind(do_schedule_local); + { + /* Figure out reds_used. def_arg_reg[5] = REDS_IN */ + a.mov(ARG3, x86::qword_ptr(c_p, offsetof(Process, def_arg_reg[5]))); + a.sub(ARG3, FCALLS); + + a.jmp(schedule_next); + } + + a.bind(context_switch_local); + comment("Context switch, unknown arity/MFA"); + { + Sint arity_offset = offsetof(ErtsCodeMFA, arity) - sizeof(ErtsCodeMFA); + + a.mov(ARG1, x86::qword_ptr(ARG3, arity_offset)); + a.mov(x86::qword_ptr(c_p, offsetof(Process, arity)), ARG1); + + a.lea(ARG1, x86::qword_ptr(ARG3, -(Sint)sizeof(ErtsCodeMFA))); + a.mov(x86::qword_ptr(c_p, offsetof(Process, current)), ARG1); + + /* !! Fall through !! */ + } + + a.bind(context_switch_simplified_local); + comment("Context switch, known arity and MFA"); + { + Label not_exiting = a.newLabel(); + +#ifdef ERLANG_FRAME_POINTERS + /* Kill the current frame pointer to avoid confusing `perf` and similar + * tools. */ + a.sub(frame_pointer, frame_pointer); +#endif + +#ifdef DEBUG + Label check_i = a.newLabel(); + /* Check that ARG3 is set to a valid CP. */ + a.test(ARG3, imm(_CPMASK)); + a.je(check_i); + comment("# ARG3 is not a valid CP"); + a.ud2(); + a.bind(check_i); +#endif + + a.mov(x86::qword_ptr(c_p, offsetof(Process, i)), ARG3); + +#if defined(JIT_HARD_DEBUG) && defined(ERLANG_FRAME_POINTERS) + a.mov(ARG1, c_p); + a.mov(ARG2, x86::qword_ptr(c_p, offsetof(Process, frame_pointer))); + a.mov(ARG3, x86::qword_ptr(c_p, offsetof(Process, stop))); + + runtime_call<3>(erts_validate_stack); +#endif + +#ifdef WIN32 + a.mov(ARG1d, x86::dword_ptr(c_p, offsetof(Process, state.value))); +#else + a.mov(ARG1d, x86::dword_ptr(c_p, offsetof(Process, state.counter))); +#endif + + a.test(ARG1d, imm(ERTS_PSFLG_EXITING)); + a.short_().je(not_exiting); + { + comment("Process exiting"); + + a.lea(ARG1, x86::qword_ptr(labels[process_exit])); + a.mov(x86::qword_ptr(c_p, offsetof(Process, i)), ARG1); + a.mov(x86::qword_ptr(c_p, offsetof(Process, arity)), imm(0)); + a.mov(x86::qword_ptr(c_p, offsetof(Process, current)), imm(0)); + a.jmp(do_schedule_local); + } + a.bind(not_exiting); + + /* Figure out reds_used. def_arg_reg[5] = REDS_IN */ + a.mov(ARG3, x86::qword_ptr(c_p, offsetof(Process, def_arg_reg[5]))); + a.sub(ARG3, FCALLS); + + /* Spill reds_used to FCALLS as we no longer need that value */ + a.mov(FCALLS, ARG3); + + a.mov(ARG1, c_p); + load_x_reg_array(ARG2); + runtime_call<2>(copy_out_registers); + + /* Restore reds_used from FCALLS */ + a.mov(ARG3, FCALLS); + + /* !! Fall through !! */ + } + + a.bind(schedule_next); + comment("schedule_next"); + { + Label schedule = a.newLabel(), skip_long_schedule = a.newLabel(); + + /* ARG3 contains reds_used at this point */ + + a.cmp(start_time, imm(0)); + a.short_().je(schedule); + { + a.mov(ARG1, c_p); + a.mov(ARG2, start_time); + + /* Spill reds_used in start_time slot */ + a.mov(start_time, ARG3); + + a.mov(ARG3, start_time_i); + runtime_call<3>(check_monitor_long_schedule); + + /* Restore reds_used */ + a.mov(ARG3, start_time); + } + a.bind(schedule); + +#ifdef ERLANG_FRAME_POINTERS + if (erts_frame_layout == ERTS_FRAME_LAYOUT_FP_RA) { + /* Kill the current frame pointer so that misc jobs that execute + * during `erts_schedule` aren't attributed to the function we + * were scheduled out of. */ + a.sub(frame_pointer, frame_pointer); + } +#endif + + mov_imm(ARG1, 0); + a.mov(ARG2, c_p); +#if defined(DEBUG) || defined(ERTS_ENABLE_LOCK_CHECK) + runtime_call<3>(erts_debug_schedule); +#else + runtime_call<3>(erts_schedule); +#endif + a.mov(c_p, RET); + +#ifdef ERTS_MSACC_EXTENDED_STATES + a.lea(ARG1, + x86::qword_ptr(registers, + offsetof(ErtsSchedulerRegisters, + aux_regs.d.erts_msacc_cache))); + runtime_call<1>(erts_msacc_update_cache); +#endif + + a.mov(ARG1, imm((UWord)&erts_system_monitor_long_schedule)); + a.cmp(x86::qword_ptr(ARG1), imm(0)); + a.mov(start_time, imm(0)); + a.short_().je(skip_long_schedule); + { + /* Enable long schedule test */ + runtime_call<0>(erts_timestamp_millis); + a.mov(start_time, RET); + a.mov(RET, x86::qword_ptr(c_p, offsetof(Process, i))); + a.mov(start_time_i, RET); + } + a.bind(skip_long_schedule); + + /* Copy arguments */ + a.mov(ARG1, c_p); + load_x_reg_array(ARG2); + runtime_call<2>(copy_in_registers); + + /* Setup reduction counting */ + a.mov(FCALLS, x86::qword_ptr(c_p, offsetof(Process, fcalls))); + a.mov(x86::qword_ptr(c_p, offsetof(Process, def_arg_reg[5])), FCALLS); + +#ifdef DEBUG + a.mov(x86::qword_ptr(c_p, offsetof(Process, debug_reds_in)), FCALLS); +#endif + + /* Check whether save calls is on */ + a.mov(ARG1, c_p); + a.mov(ARG2, imm(ERTS_PSD_SAVED_CALLS_BUF)); + runtime_call<2>(erts_psd_get); + + /* Read the active code index, overriding it with + * ERTS_SAVE_CALLS_CODE_IX when save_calls is enabled (RET != 0). */ + a.test(RET, RET); + a.mov(ARG1, imm(&the_active_code_index)); + a.mov(ARG2, imm(ERTS_SAVE_CALLS_CODE_IX)); + a.mov(active_code_ix.r32(), x86::dword_ptr(ARG1)); + a.cmovnz(active_code_ix, ARG2); + + /* Start executing the Erlang process. Note that reductions have + * already been set up above. */ + emit_leave_runtime<Update::eStack | Update::eHeap>(); + + /* Check if we are just returning from a dirty nif/bif call and if so we + * need to do a bit of cleaning up before continuing. */ + a.mov(RET, x86::qword_ptr(c_p, offsetof(Process, i))); + a.cmp(x86::qword_ptr(RET), imm(op_call_nif_WWW)); + a.je(labels[dispatch_nif]); + a.cmp(x86::qword_ptr(RET), imm(op_call_bif_W)); + a.je(labels[dispatch_bif]); + a.jmp(RET); + } + + /* Processes may jump to the exported entry points below, executing on the + * Erlang stack when entering. These are separate from the `_local` labels + * above as we don't want to worry about which stack we're on when the + * cases overlap. */ + + /* `ga->get_context_switch()` + * + * The *next* instruction pointer is provided in ARG3, and must be preceded + * by an ErtsCodeMFA. */ + a.bind(labels[context_switch]); + { + emit_enter_runtime<Update::eStack | Update::eHeap>(); + + a.jmp(context_switch_local); + } + + /* `ga->get_context_switch_simplified()` + * + * The next instruction pointer is provided in ARG3, which does not need to + * point past an ErtsCodeMFA as the process structure has already been + * updated. */ + a.bind(labels[context_switch_simplified]); + { + emit_enter_runtime<Update::eStack | Update::eHeap>(); + + a.jmp(context_switch_simplified_local); + } + + /* `ga->get_do_schedule()` + * + * `c_p->i` must be set prior to jumping here. */ + a.bind(labels[do_schedule]); + { + emit_enter_runtime<Update::eStack | Update::eHeap>(); + + a.jmp(do_schedule_local); + } +} |