diff options
Diffstat (limited to 'erts/emulator/beam/jit/arm/beam_asm.hpp')
-rw-r--r-- | erts/emulator/beam/jit/arm/beam_asm.hpp | 1700 |
1 files changed, 1700 insertions, 0 deletions
diff --git a/erts/emulator/beam/jit/arm/beam_asm.hpp b/erts/emulator/beam/jit/arm/beam_asm.hpp new file mode 100644 index 0000000000..a2d28c27b5 --- /dev/null +++ b/erts/emulator/beam/jit/arm/beam_asm.hpp @@ -0,0 +1,1700 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2020-2023. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + */ + +#include <string> +#include <vector> +#include <unordered_map> +#include <queue> +#include <map> +#include <functional> +#include <algorithm> +#include <cmath> + +#ifndef ASMJIT_ASMJIT_H_INCLUDED +# include <asmjit/asmjit.hpp> +#endif + +#include <asmjit/a64.h> + +extern "C" +{ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "sys.h" +#include "erl_vm.h" +#include "global.h" +#include "beam_catches.h" +#include "big.h" + +#include "beam_asm.h" +} + +#include "beam_jit_common.hpp" + +/* Is it safe to STP or LDP `Struct->Field1` and `Struct->Field2`? */ +#define ERTS_CT_ASSERT_FIELD_PAIR(Struct, Field1, Field2) \ + static_assert(std::is_standard_layout<Struct>::value && \ + (offsetof(Struct, Field2) - offsetof(Struct, Field1) == \ + sizeof(((Struct *)nullptr)->Field1)) && \ + (sizeof(((Struct *)nullptr)->Field1) == \ + sizeof(((Struct *)nullptr)->Field2))) + +using namespace asmjit; + +class BeamAssembler : public ErrorHandler { +protected: + /* Holds code and relocation information. */ + CodeHolder code; + + a64::Assembler a; + + FileLogger logger; + + Section *rodata = nullptr; + + /* * * * * * * * * */ + + /* Points at x_reg_array inside an ErtsSchedulerRegisters struct, allowing + * the aux_regs field to be addressed with an 8-bit displacement. */ + const arm::Gp scheduler_registers = a64::x19; + + const arm::Gp E = a64::x20; + const arm::Gp c_p = a64::x21; + const arm::Gp FCALLS = a64::x22; + const arm::Gp HTOP = a64::x23; + + /* Local copy of the active code index. + * + * This is set to ERTS_SAVE_CALLS_CODE_IX when save_calls is active, which + * routes us to a common handler routine that calls save_calls before + * jumping to the actual code. */ + const arm::Gp active_code_ix = a64::x24; + + /* X registers */ +#if defined(DEBUG) + /* + * To ensure that we thoroughly test flushing of caller-save X + * registers, define more caller-save X registers in a DEBUG + * build. + */ +# define ERTS_HIGHEST_CALLEE_SAVE_XREG 2 +# define ERTS_HIGHEST_CALLER_SAVE_XREG 5 + const arm::Gp XREG0 = a64::x25; + const arm::Gp XREG1 = a64::x26; + const arm::Gp XREG2 = a64::x27; + + /* + * Caller-save X registers. Must be flushed before calling C + * code. + */ + const arm::Gp XREG3 = a64::x15; + const arm::Gp XREG4 = a64::x16; + const arm::Gp XREG5 = a64::x17; +#else +# define ERTS_HIGHEST_CALLEE_SAVE_XREG 3 +# define ERTS_HIGHEST_CALLER_SAVE_XREG 5 + const arm::Gp XREG0 = a64::x25; + const arm::Gp XREG1 = a64::x26; + const arm::Gp XREG2 = a64::x27; + const arm::Gp XREG3 = a64::x28; + + /* + * Caller-save X registers. Must be flushed before calling C + * code. + */ + const arm::Gp XREG4 = a64::x15; + const arm::Gp XREG5 = a64::x16; +#endif + +#define ERTS_LOWEST_CALLEE_SAVE_XREG (0) +#define ERTS_LOWEST_CALLER_SAVE_XREG (ERTS_HIGHEST_CALLEE_SAVE_XREG + 1) + + static const int num_register_backed_xregs = 6; + const arm::Gp register_backed_xregs[num_register_backed_xregs] = + {XREG0, XREG1, XREG2, XREG3, XREG4, XREG5}; + +#ifdef ERTS_MSACC_EXTENDED_STATES + const arm::Mem erts_msacc_cache = getSchedulerRegRef( + offsetof(ErtsSchedulerRegisters, aux_regs.d.erts_msacc_cache)); +#endif + + /* * * * * * * * * */ + const arm::Gp ZERO = a64::xzr; + + /* + * All of the following registers are caller-save. + * + * Note that ARG1 is also the register for the return value. + */ + const arm::Gp ARG1 = a64::x0; + const arm::Gp ARG2 = a64::x1; + const arm::Gp ARG3 = a64::x2; + const arm::Gp ARG4 = a64::x3; + const arm::Gp ARG5 = a64::x4; + const arm::Gp ARG6 = a64::x5; + const arm::Gp ARG7 = a64::x6; + const arm::Gp ARG8 = a64::x7; + + const arm::Gp TMP1 = a64::x8; + const arm::Gp TMP2 = a64::x9; + const arm::Gp TMP3 = a64::x10; + const arm::Gp TMP4 = a64::x11; + const arm::Gp TMP5 = a64::x12; + const arm::Gp TMP6 = a64::x13; + + /* + * Assume that SUPER_TMP will be destroyed by any helper function. + */ + const arm::Gp SUPER_TMP = a64::x14; + + /* + * Note that x18 is reserved on Apple platforms and must not be used. + */ + + /* Callee-saved floating-point registers. + * + * Note that only the bottom 64 bits of these (128-bit) registers are + * callee-save, so we cannot pack two floats into each register. */ + const arm::VecD FREG0 = a64::d8; + const arm::VecD FREG1 = a64::d9; + const arm::VecD FREG2 = a64::d10; + const arm::VecD FREG3 = a64::d11; + const arm::VecD FREG4 = a64::d12; + const arm::VecD FREG5 = a64::d13; + const arm::VecD FREG6 = a64::d14; + const arm::VecD FREG7 = a64::d15; + static const int num_register_backed_fregs = 8; + const arm::VecD register_backed_fregs[num_register_backed_fregs] = + {FREG0, FREG1, FREG2, FREG3, FREG4, FREG5, FREG6, FREG7}; + + const arm::Mem TMP_MEM1q = getSchedulerRegRef( + offsetof(ErtsSchedulerRegisters, aux_regs.d.TMP_MEM[0])); + const arm::Mem TMP_MEM2q = getSchedulerRegRef( + offsetof(ErtsSchedulerRegisters, aux_regs.d.TMP_MEM[1])); + const arm::Mem TMP_MEM3q = getSchedulerRegRef( + offsetof(ErtsSchedulerRegisters, aux_regs.d.TMP_MEM[2])); + const arm::Mem TMP_MEM4q = getSchedulerRegRef( + offsetof(ErtsSchedulerRegisters, aux_regs.d.TMP_MEM[3])); + const arm::Mem TMP_MEM5q = getSchedulerRegRef( + offsetof(ErtsSchedulerRegisters, aux_regs.d.TMP_MEM[4])); + + /* Fill registers with undefined contents to find bugs faster. + * A boxed value is most likely to cause noticeable trouble. */ + static const Uint64 bad_boxed_ptr = 0xcafebad0000002UL; + + /* Number of highest element displacement for L/SDP and L/STR. */ + static const size_t MAX_LDP_STP_DISPLACEMENT = 0x3F; + static const size_t MAX_LDR_STR_DISPLACEMENT = 0xFFF; + + /* Constants for "alternate flag state" operands, which are distinct from + * `arm::CondCode::xyz`. Mainly used in `CCMP` instructions. */ + enum NZCV : int { + kNF = 8, + kSigned = kNF, + + kZF = 4, + kEqual = kZF, + + kCF = 2, + kCarry = kCF, + + kVF = 1, + kOverflow = kVF, + + kNone = 0 + }; + +public: + static bool hasCpuFeature(uint32_t featureId); + + BeamAssembler(); + BeamAssembler(const std::string &log); + + ~BeamAssembler(); + + void *getBaseAddress(); + size_t getOffset(); + +protected: + void _codegen(JitAllocator *allocator, + const void **executable_ptr, + void **writable_ptr); + + void *getCode(Label label); + byte *getCode(char *labelName); + + void handleError(Error err, const char *message, BaseEmitter *origin); + +#ifdef JIT_HARD_DEBUG + constexpr arm::Mem getInitialSPRef() const { + int base = offsetof(ErtsSchedulerRegisters, initial_sp); + + return getSchedulerRegRef(base); + } +#endif + + constexpr arm::Mem getSchedulerRegRef(int offset) const { + ASSERT((offset & (sizeof(Eterm) - 1)) == 0); + return arm::Mem(scheduler_registers, offset); + } + + constexpr arm::Mem getFRef(int index, size_t size = sizeof(UWord)) const { + int base = offsetof(ErtsSchedulerRegisters, f_reg_array.d); + int offset = index * sizeof(FloatDef); + + ASSERT(0 <= index && index <= 1023); + return getSchedulerRegRef(base + offset); + } + + constexpr arm::Mem getXRef(int index) const { + int base = offsetof(ErtsSchedulerRegisters, x_reg_array.d); + int offset = index * sizeof(Eterm); + + ASSERT(0 <= index && index < ERTS_X_REGS_ALLOCATED); + return getSchedulerRegRef(base + offset); + } + + constexpr arm::Mem getYRef(int index) const { + ASSERT(0 <= index && index <= 1023); + + return arm::Mem(E, index * sizeof(Eterm)); + } + + constexpr arm::Mem getCARRef(arm::Gp Src) const { + return arm::Mem(Src, -TAG_PRIMARY_LIST); + } + + constexpr arm::Mem getCDRRef(arm::Gp Src, + size_t size = sizeof(UWord)) const { + return arm::Mem(Src, -TAG_PRIMARY_LIST + sizeof(Eterm)); + } + + /* Loads the X register array into `to`. Remember to sync the registers in + * `emit_enter_runtime`. */ + void load_x_reg_array(arm::Gp to) { + int offset = offsetof(ErtsSchedulerRegisters, x_reg_array.d); + + lea(to, getSchedulerRegRef(offset)); + } + + void load_erl_bits_state(arm::Gp to) { + int offset = + offsetof(ErtsSchedulerRegisters, aux_regs.d.erl_bits_state); + + lea(to, getSchedulerRegRef(offset)); + } + + void emit_assert_redzone_unused() { +#ifdef JIT_HARD_DEBUG + const int REDZONE_BYTES = S_REDZONE * sizeof(Eterm); + Label next = a.newLabel(); + + a.sub(SUPER_TMP, E, imm(REDZONE_BYTES)); + a.cmp(HTOP, SUPER_TMP); + + a.b_ls(next); + a.udf(0xbeef); + + a.bind(next); +#endif + } + + /* + * Calls an Erlang function. + */ + template<typename Any> + void erlang_call(Any Target) { + emit_assert_redzone_unused(); + aligned_call(Target); + } + + void branch(arm::Mem target) { + a.ldr(SUPER_TMP, target); + a.br(SUPER_TMP); + } + + template<typename FuncPtr> + void aligned_call(FuncPtr(*target)) { + mov_imm(SUPER_TMP, target); + a.blr(SUPER_TMP); + } + + void aligned_call(Label target) { + a.bl(target); + } + + void aligned_call(arm::Gp target) { + a.blr(target); + } + + /* Calls the given address. In DEBUG builds, make + * sure that the CP is aligned. */ + template<typename OperandType> + void aligned_call(OperandType target) { + ERTS_CT_ASSERT(_CPMASK == 3); + ASSERT(is_CP(a.offset())); + a.ldr(TMP1, target); + a.blr(TMP1); + } + + void runtime_call(arm::Gp func, unsigned args) { + ASSERT(args < 5); + a.blr(func); + } + + template<typename T> + struct function_arity; + template<typename T, typename... Args> + struct function_arity<T(Args...)> + : std::integral_constant<int, sizeof...(Args)> {}; + + template<int expected_arity, typename T> + void runtime_call(T(*func)) { + static_assert(expected_arity == function_arity<T>()); + + a.mov(TMP1, func); + a.blr(TMP1); + } + + /* Explicitly position-independent absolute jump, for use in fragments that + * need to be memcpy'd for performance reasons (e.g. NIF stubs) */ + template<typename T> + void pic_jmp(T(*addr)) { + a.mov(SUPER_TMP, addr); + a.br(SUPER_TMP); + } + + constexpr arm::Mem getArgRef(const ArgRegister &arg) const { + if (arg.isXRegister()) { + return getXRef(arg.as<ArgXRegister>().get()); + } else if (arg.isYRegister()) { + return getYRef(arg.as<ArgYRegister>().get()); + } + + return getFRef(arg.as<ArgFRegister>().get()); + } + + /* Returns the current code address for the `Export` or `ErlFunEntry` in + * `Src`. + * + * Export tracing, save_calls, etc are implemented by shared fragments that + * assume that the respective entry is in ARG1, so we have to copy it over + * if it isn't already. */ + arm::Mem emit_setup_dispatchable_call(const arm::Gp &Src) { + return emit_setup_dispatchable_call(Src, active_code_ix); + } + + arm::Mem emit_setup_dispatchable_call(const arm::Gp &Src, + const arm::Gp &CodeIndex) { + if (ARG1 != Src) { + a.mov(ARG1, Src); + } + + ERTS_CT_ASSERT(offsetof(ErlFunEntry, dispatch) == 0); + ERTS_CT_ASSERT(offsetof(Export, dispatch) == 0); + ERTS_CT_ASSERT(offsetof(ErtsDispatchable, addresses) == 0); + + return arm::Mem(ARG1, CodeIndex, arm::lsl(3)); + } + + enum Update : int { + eStack = (1 << 0), + eHeap = (1 << 1), + eReductions = (1 << 2), + eCodeIndex = (1 << 3), + eXRegs = (1 << 4) + }; + + void emit_enter_erlang_frame() { + a.str(a64::x30, arm::Mem(E, -8).pre()); + } + + void emit_leave_erlang_frame() { + a.ldr(a64::x30, arm::Mem(E).post(8)); + } + + void emit_enter_runtime_frame() { + a.stp(a64::x29, a64::x30, arm::Mem(a64::sp, -16).pre()); + a.mov(a64::x29, a64::sp); + } + + void emit_leave_runtime_frame() { + a.mov(a64::sp, a64::x29); + a.ldp(a64::x29, a64::x30, arm::Mem(a64::sp).post(16)); + } + + /* We keep the first six X registers in machine registers. Some of those + * registers are callee-saved and some are caller-saved. + * + * We ignore the ones above `live` to reduce the save/restore traffic on + * these registers. It's enough for this figure to be at least as high as + * the number of actually live registers, and we default to all six + * registers when we don't know the exact number. + * + * Furthermore, we only save the callee-save registers when told to sync + * all registers with the `Update::eXRegs` flag, as this is very rarely + * needed. */ + + template<int Spec = 0> + void emit_enter_runtime(int live = num_register_backed_xregs) { + ERTS_CT_ASSERT((Spec & (Update::eReductions | Update::eStack | + Update::eHeap | Update::eXRegs)) == Spec); + + if ((Spec & Update::eStack) && (Spec & Update::eHeap)) { + /* Store HTOP and E in one go. */ + ERTS_CT_ASSERT_FIELD_PAIR(Process, htop, stop); + a.stp(HTOP, E, arm::Mem(c_p, offsetof(Process, htop))); + } else if (Spec & Update::eStack) { + a.str(E, arm::Mem(c_p, offsetof(Process, stop))); + } else if (Spec & Update::eHeap) { + a.str(HTOP, arm::Mem(c_p, offsetof(Process, htop))); + } + + if (Spec & Update::eReductions) { + a.str(FCALLS, arm::Mem(c_p, offsetof(Process, fcalls))); + } + + /* Save register-backed X registers to the X register array when + * needed. The backing registers must NOT be used afterwards. + * + * In a DEBUG build, the backing X registers will be overwritten with + * garbage values. */ + if (live > 0) { + int num_to_save = MIN(live, ERTS_HIGHEST_CALLER_SAVE_XREG + 1); + int i; + + if (Spec & Update::eXRegs) { + i = ERTS_LOWEST_CALLEE_SAVE_XREG; + } else { + /* If we don't need to sync the X register array, then we can + * get away with saving only the fragile X registers. */ + i = ERTS_LOWEST_CALLER_SAVE_XREG; + } + +#ifdef DEBUG + /* Destroy the saved X registers to find bugs sooner.*/ + if (i < num_to_save) { + mov_imm(SUPER_TMP, bad_boxed_ptr + 0x20 + (Spec << 8)); + } +#endif + + while (i < num_to_save - 1) { + a.stp(register_backed_xregs[i + 0], + register_backed_xregs[i + 1], + getXRef(i)); + +#ifdef DEBUG + a.mov(register_backed_xregs[i + 0], SUPER_TMP); + a.mov(register_backed_xregs[i + 1], SUPER_TMP); +#endif + + i += 2; + } + + if (i < num_to_save) { + a.str(register_backed_xregs[i], getXRef(i)); + +#ifdef DEBUG + a.mov(register_backed_xregs[i], SUPER_TMP); +#endif + } + } + } + + template<int Spec = 0> + void emit_leave_runtime(int live = num_register_backed_xregs) { + ERTS_CT_ASSERT( + (Spec & (Update::eReductions | Update::eStack | Update::eHeap | + Update::eXRegs | Update::eCodeIndex)) == Spec); + + if ((Spec & Update::eStack) && (Spec & Update::eHeap)) { + /* Load HTOP and E in one go. */ + ERTS_CT_ASSERT_FIELD_PAIR(Process, htop, stop); + a.ldp(HTOP, E, arm::Mem(c_p, offsetof(Process, htop))); + } else if (Spec & Update::eHeap) { + a.ldr(HTOP, arm::Mem(c_p, offsetof(Process, htop))); + } else if (Spec & Update::eStack) { + a.ldr(E, arm::Mem(c_p, offsetof(Process, stop))); + } + + if (Spec & Update::eReductions) { + a.ldr(FCALLS, arm::Mem(c_p, offsetof(Process, fcalls))); + } + + if (Spec & Update::eCodeIndex) { + /* Updates the local copy of the active code index, retaining + * save_calls if active. */ + mov_imm(SUPER_TMP, &the_active_code_index); + a.ldr(SUPER_TMP.w(), arm::Mem(SUPER_TMP)); + a.cmp(active_code_ix, imm(ERTS_SAVE_CALLS_CODE_IX)); + a.csel(active_code_ix, + active_code_ix, + SUPER_TMP, + arm::CondCode::kEQ); + } + + /* Restore register-backed X registers from the X register array when + * needed. The register array must NOT be used afterwards. + * + * In a DEBUG build, the register array will be overwritten with + * garbage values. */ + if (live > 0) { + int num_to_restore = MIN(live, ERTS_HIGHEST_CALLER_SAVE_XREG + 1); + int i; + + if (Spec & Update::eXRegs) { + i = ERTS_LOWEST_CALLEE_SAVE_XREG; + } else { + /* If we don't need to sync the X register array, then we can + * get away with loading only the fragile X registers. */ + i = ERTS_LOWEST_CALLER_SAVE_XREG; + } + +#ifdef DEBUG + /* Destroy the restored X registers to find bugs sooner.*/ + if (i < num_to_restore) { + mov_imm(SUPER_TMP, bad_boxed_ptr + 0x80 + (Spec << 8)); + } +#endif + + while (i < num_to_restore - 1) { + a.ldp(register_backed_xregs[i], + register_backed_xregs[i + 1], + getXRef(i)); + +#ifdef DEBUG + a.stp(SUPER_TMP, SUPER_TMP, getXRef(i)); +#endif + + i += 2; + } + + if (i < num_to_restore) { + a.ldr(register_backed_xregs[i], getXRef(i)); + +#ifdef DEBUG + a.str(SUPER_TMP, getXRef(i)); +#endif + } + } + } + + void emit_is_boxed(Label Fail, arm::Gp Src) { + const int bitNumber = 0; + ERTS_CT_ASSERT(_TAG_PRIMARY_MASK - TAG_PRIMARY_BOXED == + (1 << bitNumber)); + a.tbnz(Src, imm(bitNumber), Fail); + } + + void emit_is_not_boxed(Label Fail, arm::Gp Src) { + const int bitNumber = 0; + ERTS_CT_ASSERT(_TAG_PRIMARY_MASK - TAG_PRIMARY_BOXED == + (1 << bitNumber)); + a.tbz(Src, imm(bitNumber), Fail); + } + + arm::Gp emit_ptr_val(arm::Gp Dst, arm::Gp Src) { +#if !defined(TAG_LITERAL_PTR) + return Src; +#else + /* We intentionally skip TAG_PTR_MASK__ here, as we want to use + * plain `emit_boxed_val` when we know the argument can't be a literal, + * such as in bit-syntax matching. + * + * This comes at very little cost as `emit_boxed_val` nearly always has + * a displacement. */ + a.and_(Dst, Src, imm(~TAG_LITERAL_PTR)); + return Dst; +#endif + } + + void emit_untag_ptr(arm::Gp Dst, arm::Gp Src) { + a.and_(Dst, Src, imm(~TAG_PTR_MASK__)); + } + + constexpr arm::Mem emit_boxed_val(arm::Gp Src, int32_t bytes = 0) const { + ASSERT(bytes % sizeof(Eterm) == 0); + return arm::Mem(Src, bytes - TAG_PRIMARY_BOXED); + } + + void emit_branch_if_not_value(arm::Gp reg, Label lbl) { + emit_branch_if_eq(reg, THE_NON_VALUE, lbl); + } + + void emit_branch_if_value(arm::Gp reg, Label lbl) { + emit_branch_if_ne(reg, THE_NON_VALUE, lbl); + } + + void emit_branch_if_eq(arm::Gp reg, Uint value, Label lbl) { + if (value == 0) { + a.cbz(reg, lbl); + } else { + a.cmp(reg, imm(value)); + a.b_eq(lbl); + } + } + + void emit_branch_if_ne(arm::Gp reg, Uint value, Label lbl) { + if (value == 0) { + a.cbnz(reg, lbl); + } else { + a.cmp(reg, imm(value)); + a.b_ne(lbl); + } + } + + /* Set the Z flag if Reg1 and Reg2 are both immediates. */ + void emit_are_both_immediate(arm::Gp Reg1, arm::Gp Reg2) { + ERTS_CT_ASSERT(TAG_PRIMARY_IMMED1 == _TAG_PRIMARY_MASK); + a.and_(SUPER_TMP, Reg1, Reg2); + a.and_(SUPER_TMP, SUPER_TMP, imm(_TAG_PRIMARY_MASK)); + a.cmp(SUPER_TMP, imm(TAG_PRIMARY_IMMED1)); + } + + /* Set the Z flag if Reg1 and Reg2 are definitely not equal based + * on their tags alone. (They may still be equal if both are + * immediates and all other bits are equal too.) */ + void emit_is_unequal_based_on_tags(arm::Gp Reg1, arm::Gp Reg2) { + ERTS_CT_ASSERT(TAG_PRIMARY_IMMED1 == _TAG_PRIMARY_MASK); + ERTS_CT_ASSERT((TAG_PRIMARY_LIST | TAG_PRIMARY_BOXED) == + TAG_PRIMARY_IMMED1); + a.orr(SUPER_TMP, Reg1, Reg2); + a.and_(SUPER_TMP, SUPER_TMP, imm(_TAG_PRIMARY_MASK)); + + /* + * SUPER_TMP will be now be TAG_PRIMARY_IMMED1 if either + * one or both registers are immediates, or if one register + * is a list and the other a boxed. + */ + a.cmp(SUPER_TMP, imm(TAG_PRIMARY_IMMED1)); + } + + template<typename T> + void mov_imm(arm::Gp to, T value) { + static_assert(std::is_integral<T>::value || std::is_pointer<T>::value); + if (value) { + a.mov(to, imm(value)); + } else { + a.mov(to, ZERO); + } + } + + void mov_imm(arm::Gp to, std::nullptr_t value) { + (void)value; + mov_imm(to, 0); + } + + void sub(arm::Gp to, arm::Gp src, int64_t val) { + if (val < 0) { + add(to, src, -val); + } else if (val == 0 && to != src) { + a.mov(to, src); + } else if (val < (1 << 24)) { + if (val & 0xFFF) { + a.sub(to, src, imm(val & 0xFFF)); + src = to; + } + + if (val & 0xFFF000) { + a.sub(to, src, imm(val & 0xFFF000)); + } + } else { + mov_imm(SUPER_TMP, val); + a.sub(to, src, SUPER_TMP); + } + } + + void add(arm::Gp to, arm::Gp src, int64_t val) { + if (val < 0) { + sub(to, src, -val); + } else if (val == 0 && to != src) { + a.mov(to, src); + } else if (val < (1 << 24)) { + if (val & 0xFFF) { + a.add(to, src, imm(val & 0xFFF)); + src = to; + } + + if (val & 0xFFF000) { + a.add(to, src, imm(val & 0xFFF000)); + } + } else { + mov_imm(SUPER_TMP, val); + a.add(to, src, SUPER_TMP); + } + } + + void cmp(arm::Gp src, int64_t val) { + if (Support::isUInt12(val)) { + a.cmp(src, imm(val)); + } else if (Support::isUInt12(-val)) { + a.cmn(src, imm(-val)); + } else { + mov_imm(SUPER_TMP, val); + a.cmp(src, SUPER_TMP); + } + } + + void ldur(arm::Gp reg, arm::Mem mem) { + safe_9bit_imm(a64::Inst::kIdLdur, reg, mem); + } + + void stur(arm::Gp reg, arm::Mem mem) { + safe_9bit_imm(a64::Inst::kIdStur, reg, mem); + } + + void safe_9bit_imm(uint32_t instId, arm::Gp reg, arm::Mem mem) { + int64_t offset = mem.offset(); + + ASSERT(mem.hasBaseReg() && !mem.hasIndex()); + + if (Support::isInt9(offset)) { + a.emit(instId, reg, mem); + } else { + lea(SUPER_TMP, mem); + a.emit(instId, reg, arm::Mem(SUPER_TMP)); + } + } + + /* + * ARM has no LEA instruction. Implement our own to enable us + * to use helpers based on getSchedulerRegRef() that return an + * arm::Mem class. + */ + void lea(arm::Gp to, arm::Mem mem) { + int64_t offset = mem.offset(); + + ASSERT(mem.hasBaseReg() && !mem.hasIndex()); + + if (offset == 0) { + a.mov(to, arm::GpX(mem.baseId())); + } else { + add(to, arm::GpX(mem.baseId()), offset); + } + } + +public: + void embed_rodata(const char *labelName, const char *buff, size_t size); + void embed_bss(const char *labelName, size_t size); + void embed_zeros(size_t size); + + void setLogger(std::string log); + void setLogger(FILE *log); + + void comment(const char *format) { + if (logger.file()) { + a.commentf("# %s", format); + } + } + + template<typename... Ts> + void comment(const char *format, Ts... args) { + if (logger.file()) { + char buff[1024]; + erts_snprintf(buff, sizeof(buff), format, args...); + a.commentf("# %s", buff); + } + } + + struct AsmRange { + ErtsCodePtr start; + ErtsCodePtr stop; + const std::string name; + + struct LineData { + ErtsCodePtr start; + const std::string file; + unsigned line; + }; + + const std::vector<LineData> lines; + }; + + void embed(void *data, uint32_t size) { + a.embed((char *)data, size); + } +}; + +#include "beam_asm_global.hpp" + +class BeamModuleAssembler : public BeamAssembler { + typedef unsigned BeamLabel; + + /* Map of BEAM label number to asmjit Label. These should not be used + * directly by most instructions because of displacement limits, use + * `resolve_beam_label` instead. */ + typedef std::unordered_map<BeamLabel, const Label> LabelMap; + LabelMap rawLabels; + + /* Sequence number used to create unique named labels by + * resolve_label(). Only used when assembly output has been + * requested. */ + long labelSeq = 0; + + struct patch { + Label where; + uint64_t val_offs; + }; + + struct patch_catch { + struct patch patch; + Label handler; + }; + std::vector<struct patch_catch> catches; + + /* Map of import entry to patch labels and mfa */ + struct patch_import { + std::vector<struct patch> patches; + ErtsCodeMFA mfa; + }; + typedef std::unordered_map<unsigned, struct patch_import> ImportMap; + ImportMap imports; + + /* Map of fun entry to patch labels */ + struct patch_lambda { + std::vector<struct patch> patches; + Label trampoline; + }; + typedef std::unordered_map<unsigned, struct patch_lambda> LambdaMap; + LambdaMap lambdas; + + /* Map of literals to patch labels */ + struct patch_literal { + std::vector<struct patch> patches; + }; + typedef std::unordered_map<unsigned, struct patch_literal> LiteralMap; + LiteralMap literals; + + /* All string patches */ + std::vector<struct patch> strings; + + /* All functions that have been seen so far */ + std::vector<BeamLabel> functions; + + /* The BEAM file we've been loaded from, if any. */ + const BeamFile *beam; + + BeamGlobalAssembler *ga; + + /* Used by emit to populate the labelToMFA map */ + Label current_label; + + /* The offset of our BeamCodeHeader, if any. */ + Label code_header; + + /* The module's on_load function, if any. */ + Label on_load; + + /* The end of the last function. Note that the dispatch table, constants, + * and veneers may follow. */ + Label code_end; + + Eterm mod; + + /* Save the last PC for an error. */ + size_t last_error_offset = 0; + + static constexpr ptrdiff_t STUB_CHECK_INTERVAL = 4 << 10; + size_t last_stub_check_offset = 0; + + enum Displacement : size_t { + /* Pessimistic estimate for helper functions, where we don't know the + * branch displacement or whether it will be used near label + * resolution. + * + * Note that we subtract the size of one instruction to handle + * backward displacements. */ + dispUnknown = (32 << 10) - sizeof(Uint32) - STUB_CHECK_INTERVAL, + + /* +- 32KB: `tbz`, `tbnz`, `ldr` of 8-byte literal. */ + disp32K = (32 << 10) - sizeof(Uint32), + + /* +- 1MB: `adr`, `b.cond`, `cb.cond` */ + disp1MB = (1 << 20) - sizeof(Uint32), + + /* +- 128MB: `b`, `blr` */ + disp128MB = (128 << 20) - sizeof(Uint32), + + dispMin = dispUnknown, + dispMax = disp128MB + }; + + static_assert(dispMin <= dispUnknown && dispMax >= disp128MB); + static_assert(STUB_CHECK_INTERVAL < dispMin / 2); + + struct Veneer { + ssize_t latestOffset; + Label anchor; + + Label target; + + constexpr bool operator>(const Veneer &other) const { + return latestOffset > other.latestOffset; + } + }; + + struct Constant { + ssize_t latestOffset; + Label anchor; + + ArgVal value; + + constexpr bool operator>(const Constant &other) const { + return latestOffset > other.latestOffset; + } + }; + + /* ArgVal -> Constant + * + * `_pending_constants` points directly into this container, which is + * documented to be safe as long as we only insert elements. */ + std::unordered_multimap<ArgVal, const Constant, ArgVal::Hash> _constants; + + /* Label::id() -> Veneer + * + * `_pending_veneers` points directly into this container. */ + std::unordered_multimap<uint32_t, const Veneer> _veneers; + + template<typename T> + using PendingStubs = + std::priority_queue<std::reference_wrapper<const T>, + std::deque<std::reference_wrapper<const T>>, + std::greater<const T &>>; + + /* All pending stubs, segregated by type and sorted by `latestOffset` in + * ascending order. + * + * We use separate queues to avoid interleaving them, as they have + * different sizes and alignment requirements. */ + PendingStubs<Constant> _pending_constants; + PendingStubs<Veneer> _pending_veneers; + + /* Maps code pointers to thunks that jump to them, letting us treat global + * fragments as if they were local. */ + std::unordered_map<void (*)(), Label> _dispatchTable; + +public: + BeamModuleAssembler(BeamGlobalAssembler *ga, + Eterm mod, + int num_labels, + const BeamFile *file = NULL); + BeamModuleAssembler(BeamGlobalAssembler *ga, + Eterm mod, + int num_labels, + int num_functions, + const BeamFile *file = NULL); + + bool emit(unsigned op, const Span<ArgVal> &args); + + void codegen(JitAllocator *allocator, + const void **executable_ptr, + void **writable_ptr, + const BeamCodeHeader *in_hdr, + const BeamCodeHeader **out_exec_hdr, + BeamCodeHeader **out_rw_hdr); + + void codegen(JitAllocator *allocator, + const void **executable_ptr, + void **writable_ptr); + + void codegen(char *buff, size_t len); + + void register_metadata(const BeamCodeHeader *header); + + ErtsCodePtr getCode(unsigned label); + ErtsCodePtr getLambda(unsigned index); + + void *getCode(Label label) { + return BeamAssembler::getCode(label); + } + + byte *getCode(char *labelName) { + return BeamAssembler::getCode(labelName); + } + + void embed_vararg_rodata(const Span<ArgVal> &args, a64::Gp reg); + + unsigned getCodeSize() { + ASSERT(code.hasBaseAddress()); + return code.codeSize(); + } + + void copyCodeHeader(BeamCodeHeader *hdr); + BeamCodeHeader *getCodeHeader(void); + const ErtsCodeInfo *getOnLoad(void); + + unsigned patchCatches(char *rw_base); + void patchLambda(char *rw_base, unsigned index, BeamInstr I); + void patchLiteral(char *rw_base, unsigned index, Eterm lit); + void patchImport(char *rw_base, unsigned index, BeamInstr I); + void patchStrings(char *rw_base, const byte *string); + +protected: + int getTypeUnion(const ArgSource &arg) const { + auto typeIndex = + arg.isRegister() ? arg.as<ArgRegister>().typeIndex() : 0; + + ASSERT(typeIndex < beam->types.count); + return beam->types.entries[typeIndex].type_union; + } + + auto getIntRange(const ArgSource &arg) const { + if (arg.isSmall()) { + Sint value = arg.as<ArgSmall>().getSigned(); + return std::make_pair(value, value); + } else { + auto typeIndex = + arg.isRegister() ? arg.as<ArgRegister>().typeIndex() : 0; + + ASSERT(typeIndex < beam->types.count); + const auto &entry = beam->types.entries[typeIndex]; + ASSERT(entry.type_union & BEAM_TYPE_INTEGER); + return std::make_pair(entry.min, entry.max); + } + } + + bool always_small(const ArgSource &arg) const { + if (arg.isSmall()) { + return true; + } + + int type_union = getTypeUnion(arg); + if (type_union == BEAM_TYPE_INTEGER) { + auto [min, max] = getIntRange(arg); + return min <= max; + } else { + return false; + } + } + + bool always_immediate(const ArgSource &arg) const { + if (arg.isImmed() || always_small(arg)) { + return true; + } + + int type_union = getTypeUnion(arg); + return (type_union & BEAM_TYPE_MASK_ALWAYS_IMMEDIATE) == type_union; + } + + bool always_same_types(const ArgSource &lhs, const ArgSource &rhs) const { + int lhs_types = getTypeUnion(lhs); + int rhs_types = getTypeUnion(rhs); + + /* We can only be certain that the types are the same when there's + * one possible type. For example, if one is a number and the other + * is an integer, they could differ if the former is a float. */ + if ((lhs_types & (lhs_types - 1)) == 0) { + return lhs_types == rhs_types; + } + + return false; + } + + bool always_one_of(const ArgSource &arg, int types) const { + if (arg.isImmed()) { + if (arg.isSmall()) { + return !!(types & BEAM_TYPE_INTEGER); + } else if (arg.isAtom()) { + return !!(types & BEAM_TYPE_ATOM); + } else if (arg.isNil()) { + return !!(types & BEAM_TYPE_NIL); + } + + return false; + } else { + int type_union = getTypeUnion(arg); + return type_union == (type_union & types); + } + } + + int masked_types(const ArgSource &arg, int mask) const { + if (arg.isImmed()) { + if (arg.isSmall()) { + return mask & BEAM_TYPE_INTEGER; + } else if (arg.isAtom()) { + return mask & BEAM_TYPE_ATOM; + } else if (arg.isNil()) { + return mask & BEAM_TYPE_NIL; + } + + return BEAM_TYPE_NONE; + } else { + return getTypeUnion(arg) & mask; + } + } + + bool exact_type(const ArgSource &arg, int type_id) const { + return always_one_of(arg, type_id); + } + + bool is_sum_small(const ArgSource &LHS, const ArgSource &RHS) { + if (!(always_small(LHS) && always_small(RHS))) { + return false; + } else { + Sint min, max; + auto [min1, max1] = getIntRange(LHS); + auto [min2, max2] = getIntRange(RHS); + min = min1 + min2; + max = max1 + max2; + return IS_SSMALL(min) && IS_SSMALL(max); + } + } + + bool is_difference_small(const ArgSource &LHS, const ArgSource &RHS) { + if (!(always_small(LHS) && always_small(RHS))) { + return false; + } else { + Sint min, max; + auto [min1, max1] = getIntRange(LHS); + auto [min2, max2] = getIntRange(RHS); + min = min1 - max2; + max = max1 - min2; + return IS_SSMALL(min) && IS_SSMALL(max); + } + } + + bool is_product_small(const ArgSource &LHS, const ArgSource &RHS) { + if (!(always_small(LHS) && always_small(RHS))) { + return false; + } else { + auto [min1, max1] = getIntRange(LHS); + auto [min2, max2] = getIntRange(RHS); + auto mag1 = std::max(std::abs(min1), std::abs(max1)); + auto mag2 = std::max(std::abs(min2), std::abs(max2)); + + /* + * mag1 * mag2 <= MAX_SMALL + * mag1 <= MAX_SMALL / mag2 (when mag2 != 0) + */ + ERTS_CT_ASSERT(MAX_SMALL < -MIN_SMALL); + return mag2 == 0 || mag1 <= MAX_SMALL / mag2; + } + } + + bool is_bsl_small(const ArgSource &LHS, const ArgSource &RHS) { + /* + * In the code compiled by scripts/diffable, there never + * seems to be any range information for the RHS. Therefore, + * don't bother unless RHS is an immediate small. + */ + if (!(always_small(LHS) && RHS.isSmall())) { + return false; + } else { + auto [min1, max1] = getIntRange(LHS); + auto rhs_val = RHS.as<ArgSmall>().getSigned(); + + if (min1 < 0 || max1 == 0 || rhs_val < 0) { + return false; + } + + return rhs_val < Support::clz(max1) - _TAG_IMMED1_SIZE; + } + } + + /* Helpers */ + void emit_gc_test(const ArgWord &Stack, + const ArgWord &Heap, + const ArgWord &Live); + void emit_gc_test_preserve(const ArgWord &Need, + const ArgWord &Live, + arm::Gp term); + + arm::Mem emit_variable_apply(bool includeI); + arm::Mem emit_fixed_apply(const ArgWord &arity, bool includeI); + + arm::Gp emit_call_fun(bool skip_box_test = false, + bool skip_fun_test = false, + bool skip_arity_test = false); + + arm::Gp emit_is_binary(const ArgLabel &Fail, + const ArgSource &Src, + Label next, + Label subbin); + + void emit_is_boxed(Label Fail, arm::Gp Src) { + BeamAssembler::emit_is_boxed(Fail, Src); + } + + void emit_is_boxed(Label Fail, const ArgVal &Arg, arm::Gp Src) { + if (always_one_of(Arg, BEAM_TYPE_MASK_ALWAYS_BOXED)) { + comment("skipped box test since argument is always boxed"); + return; + } + + BeamAssembler::emit_is_boxed(Fail, Src); + } + + void emit_get_list(const arm::Gp boxed_ptr, + const ArgRegister &Hd, + const ArgRegister &Tl); + + void emit_div_rem(const ArgLabel &Fail, + const ArgSource &LHS, + const ArgSource &RHS, + const ErtsCodeMFA *error_mfa, + const ArgRegister &Quotient, + const ArgRegister &Remainder, + bool need_div, + bool need_rem); + + void emit_i_bif(const ArgLabel &Fail, + const ArgWord &Bif, + const ArgRegister &Dst); + + void emit_error(int code); + + int emit_bs_get_field_size(const ArgSource &Size, + int unit, + Label Fail, + const arm::Gp &out); + + void emit_bs_get_utf8(const ArgRegister &Ctx, const ArgLabel &Fail); + void emit_bs_get_utf16(const ArgRegister &Ctx, + const ArgLabel &Fail, + const ArgWord &Flags); + + void emit_raise_exception(); + void emit_raise_exception(const ErtsCodeMFA *exp); + void emit_raise_exception(Label I, const ErtsCodeMFA *exp); + + void emit_validate(const ArgWord &Arity); + void emit_bs_skip_bits(const ArgLabel &Fail, const ArgRegister &Ctx); + + void emit_linear_search(arm::Gp val, Label fail, const Span<ArgVal> &args); + + void emit_float_instr(uint32_t instId, + const ArgFRegister &LHS, + const ArgFRegister &RHS, + const ArgFRegister &Dst); + + void emit_validate_unicode(Label next, Label fail, arm::Gp value); + + void emit_bif_is_eq_ne_exact_immed(const ArgSource &LHS, + const ArgSource &RHS, + const ArgRegister &Dst, + Eterm fail_value, + Eterm succ_value); + + void emit_proc_lc_unrequire(void); + void emit_proc_lc_require(void); + + void emit_nyi(const char *msg); + void emit_nyi(void); + + /* Returns a vector of the untagged and rebased `args`. The adjusted + * `comparand` is stored in ARG1. */ + const std::vector<ArgVal> emit_select_untag(const Span<ArgVal> &args, + a64::Gp comparand, + Label fail, + UWord base, + int shift); + + void emit_binsearch_nodes(arm::Gp reg, + size_t Left, + size_t Right, + Label fail, + const Span<ArgVal> &args); + + bool emit_optimized_three_way_select(arm::Gp reg, + Label fail, + const Span<ArgVal> &args); + +#ifdef DEBUG + void emit_tuple_assertion(const ArgSource &Src, arm::Gp tuple_reg); +#endif + +#include "beamasm_protos.h" + + /* Resolves a BEAM label. + * + * When the branch type is not `dispUnknown`, this must be used + * _IMMEDIATELY BEFORE_ the instruction that the label is used in. */ + const Label &resolve_beam_label(const ArgLabel &Label, + enum Displacement disp); + const Label &resolve_label(const Label &target, + enum Displacement disp, + const char *name = nullptr); + + /* Resolves a shared fragment, creating a trampoline that loads the + * appropriate address before jumping there. + * + * When the branch type is not `dispUnknown`, this must be used + * _IMMEDIATELY BEFORE_ the instruction that the label is used in. */ + const Label &resolve_fragment(void (*fragment)(), enum Displacement disp); + + /* Embeds a constant argument and returns its address. All kinds of + * constants are accepted, including labels and export entries. + * + * When the branch type is not `dispUnknown`, this must be used + * _IMMEDIATELY BEFORE_ the instruction that the label is used in. */ + arm::Mem embed_constant(const ArgVal &value, enum Displacement disp); + + /* Convenience wrapper for embedding raw pointers or immediates. */ + template<typename T, + std::enable_if_t<std::is_integral<T>::value || + std::is_pointer<T>::value, + bool> = true> + arm::Mem embed_constant(T data, enum Displacement disp) { + return embed_constant(ArgWord((UWord)data), disp); + } + + /* Binds a label and all related veneers that are within reach of it. */ + void bind_veneer_target(const Label &target); + + void emit_constant(const Constant &constant); + void emit_veneer(const Veneer &veneer); + + /* Unconditionally emits all veneers and constants that are due within + * `range` bytes. */ + void flush_pending_stubs(size_t range); + + /* Emits pending veneers when appropriate. Must be called at least once + * every `STUB_CHECK_INTERVAL` bytes for veneers and constants to work. */ + void check_pending_stubs(); + + /* Calls the given shared fragment, ensuring that the redzone is unused and + * that the return address forms a valid CP. */ + template<typename Any> + void fragment_call(Any target) { + emit_assert_redzone_unused(); + +#if defined(JIT_HARD_DEBUG) + /* Verify that the stack has not grown. */ + Label next = a.newLabel(); + a.ldr(SUPER_TMP, getInitialSPRef()); + a.cmp(a64::sp, SUPER_TMP); + a.b_eq(next); + a.udf(0xdead); + a.bind(next); +#endif + + a.bl(resolve_fragment((void (*)())target, disp128MB)); + } + + template<typename T> + struct function_arity; + template<typename T, typename... Args> + struct function_arity<T(Args...)> + : std::integral_constant<int, sizeof...(Args)> {}; + + template<int expected_arity, typename T> + void runtime_call(T(*func)) { + static_assert(expected_arity == function_arity<T>()); + + a.bl(resolve_fragment((void (*)())func, disp128MB)); + } + + bool isRegisterBacked(const ArgVal &arg) { + if (arg.isXRegister()) { + return arg.as<ArgXRegister>().get() < num_register_backed_xregs; + } else if (arg.isFRegister()) { + return arg.as<ArgFRegister>().get() < num_register_backed_fregs; + } + + return false; + } + + template<typename RegType = arm::Gp> + struct Variable { + RegType reg; + arm::Mem mem; + + Variable(RegType _r) : Variable(_r, arm::Mem()) { + } + Variable(RegType _r, arm::Mem _mem) : reg(_r), mem(_mem) { + } + }; + + Variable<arm::Gp> init_destination(const ArgVal &arg, arm::Gp tmp) { + /* We don't support storing into GpW since their maximum displacement + * is 16K, which means we have to check stubs far more often. */ + ASSERT(tmp.isGpX()); + + if (isRegisterBacked(arg)) { + auto index = arg.as<ArgXRegister>().get(); + return Variable(register_backed_xregs[index]); + } else { + return Variable(tmp, getArgRef(arg)); + } + } + + Variable<arm::VecD> init_destination(const ArgVal &arg, arm::VecD tmp) { + if (isRegisterBacked(arg)) { + auto index = arg.as<ArgFRegister>().get(); + return Variable(register_backed_fregs[index]); + } else { + return Variable(tmp, getArgRef(arg)); + } + } + + Variable<arm::Gp> load_source(const ArgVal &arg, arm::Gp tmp) { + /* We don't support loading into GpW since their maximum displacement + * is 16K, which means we have to check stubs far more often. */ + ASSERT(tmp.isGpX()); + + if (arg.isLiteral()) { + a.ldr(tmp, embed_constant(arg, disp32K)); + return Variable(tmp); + } else if (arg.isRegister()) { + if (isRegisterBacked(arg)) { + auto index = arg.as<ArgXRegister>().get(); + return Variable(register_backed_xregs[index]); + } + + auto ref = getArgRef(arg); + a.ldr(tmp, ref); + return Variable(tmp, ref); + } else { + if (arg.isImmed() || arg.isWord()) { + auto val = arg.isImmed() ? arg.as<ArgImmed>().get() + : arg.as<ArgWord>().get(); + + if (Support::isIntOrUInt32(val)) { + mov_imm(tmp, val); + return Variable(tmp); + } + } + + a.ldr(tmp, embed_constant(arg, disp32K)); + return Variable(tmp); + } + } + + auto load_sources(const ArgVal &Src1, + arm::Gp tmp1, + const ArgVal &Src2, + arm::Gp tmp2) { + if (Src1.isRegister() && Src2.isRegister() && !isRegisterBacked(Src1) && + !isRegisterBacked(Src2)) { + switch (ArgVal::memory_relation(Src1, Src2)) { + case ArgVal::Relation::consecutive: + safe_ldp(tmp1, tmp2, Src1, Src2); + return std::make_pair(Variable(tmp1, getArgRef(Src1)), + Variable(tmp2, getArgRef(Src2))); + case ArgVal::Relation::reverse_consecutive: + safe_ldp(tmp2, tmp1, Src2, Src1); + return std::make_pair(Variable(tmp1, getArgRef(Src1)), + Variable(tmp2, getArgRef(Src2))); + case ArgVal::Relation::none: + break; + } + } + + return std::make_pair(load_source(Src1, tmp1), load_source(Src2, tmp2)); + } + + Variable<arm::VecD> load_source(const ArgVal &arg, arm::VecD tmp) { + if (isRegisterBacked(arg)) { + auto index = arg.as<ArgFRegister>().get(); + return Variable<arm::VecD>(register_backed_fregs[index]); + } + + a.ldr(tmp, getArgRef(arg)); + return Variable<arm::VecD>(tmp); + } + + template<typename Reg> + void mov_var(const Variable<Reg> &to, const Variable<Reg> &from) { + mov_var(to.reg, from); + } + + template<typename Reg> + void mov_var(const Variable<Reg> &to, Reg from) { + if (to.reg != from) { + a.mov(to.reg, from); + } + } + + template<typename Reg> + void mov_var(Reg to, const Variable<Reg> &from) { + if (to != from.reg) { + a.mov(to, from.reg); + } + } + + template<typename Reg> + void flush_var(const Variable<Reg> &to) { + if (to.mem.hasBase()) { + a.str(to.reg, to.mem); + } + } + + void flush_vars(const Variable<arm::Gp> &to1, + const Variable<arm::Gp> &to2) { + const arm::Mem &mem1 = to1.mem; + const arm::Mem &mem2 = to2.mem; + + if (mem1.hasBaseReg() && mem2.hasBaseReg() && + mem1.baseId() == mem2.baseId()) { + if (mem1.offset() + 8 == mem2.offset()) { + safe_stp(to1.reg, to2.reg, mem1); + return; + } else if (mem1.offset() == mem2.offset() + 8) { + safe_stp(to2.reg, to1.reg, mem2); + return; + } + } + + /* Not possible to optimize with stp. */ + flush_var(to1); + flush_var(to2); + } + + void mov_arg(const ArgVal &To, const ArgVal &From) { + if (isRegisterBacked(To)) { + auto to = init_destination(To, SUPER_TMP); + auto from = load_source(From, to.reg); + mov_var(to, from); + flush_var(to); + } else { + auto from = load_source(From, SUPER_TMP); + auto to = init_destination(To, from.reg); + mov_var(to, from); + flush_var(to); + } + } + + void mov_arg(const ArgVal &To, arm::Mem From) { + auto to = init_destination(To, SUPER_TMP); + a.ldr(to.reg, From); + flush_var(to); + } + + void mov_arg(arm::Mem To, const ArgVal &From) { + auto from = load_source(From, SUPER_TMP); + auto to = Variable(from.reg, To); + flush_var(to); + } + + void mov_arg(arm::Gp to, const ArgVal &from) { + auto r = load_source(from, to); + if (r.reg != to) { + a.mov(to, r.reg); + } + } + + void mov_arg(const ArgVal &to, arm::Gp from) { + auto r = init_destination(to, from); + if (r.reg != from) { + a.mov(r.reg, from); + } + flush_var(r); + } + + void cmp_arg(arm::Gp gp, const ArgVal &arg) { + if (arg.isImmed() || arg.isWord()) { + Sint val = arg.isImmed() ? arg.as<ArgImmed>().get() + : arg.as<ArgWord>().get(); + + if (Support::isUInt12(val)) { + a.cmp(gp, imm(val)); + return; + } else if (Support::isUInt12(-val)) { + a.cmn(gp, imm(-val)); + return; + } + } + + mov_arg(SUPER_TMP, arg); + a.cmp(gp, SUPER_TMP); + } + + void safe_stp(arm::Gp gp1, + arm::Gp gp2, + const ArgVal &Dst1, + const ArgVal &Dst2) { + ASSERT(ArgVal::memory_relation(Dst1, Dst2) == + ArgVal::Relation::consecutive); + safe_stp(gp1, gp2, getArgRef(Dst1)); + } + + void safe_stp(arm::Gp gp1, arm::Gp gp2, arm::Mem mem) { + auto offset = mem.offset(); + + ASSERT(gp1.isGpX() && gp2.isGpX()); + + if (std::abs(offset) <= sizeof(Eterm) * MAX_LDP_STP_DISPLACEMENT) { + a.stp(gp1, gp2, mem); + } else if (std::abs(offset) < + sizeof(Eterm) * MAX_LDR_STR_DISPLACEMENT) { + /* Note that we used `<` instead of `<=`, as we're loading two + * elements rather than one. */ + a.str(gp1, mem); + a.str(gp2, mem.cloneAdjusted(sizeof(Eterm))); + } else { + add(SUPER_TMP, arm::GpX(mem.baseId()), offset); + a.stp(gp1, gp2, arm::Mem(SUPER_TMP)); + } + } + + void safe_ldr(arm::Gp gp, arm::Mem mem) { + auto offset = mem.offset(); + + ASSERT(mem.hasBaseReg() && !mem.hasIndex()); + ASSERT(gp.isGpX()); + + if (std::abs(offset) <= sizeof(Eterm) * MAX_LDR_STR_DISPLACEMENT) { + a.ldr(gp, mem); + } else { + add(SUPER_TMP, arm::GpX(mem.baseId()), offset); + a.ldr(gp, arm::Mem(SUPER_TMP)); + } + } + + void safe_ldp(arm::Gp gp1, + arm::Gp gp2, + const ArgVal &Src1, + const ArgVal &Src2) { + ASSERT(ArgVal::memory_relation(Src1, Src2) == + ArgVal::Relation::consecutive); + + safe_ldp(gp1, gp2, getArgRef(Src1)); + } + + void safe_ldp(arm::Gp gp1, arm::Gp gp2, arm::Mem mem) { + auto offset = mem.offset(); + + ASSERT(gp1.isGpX() && gp2.isGpX()); + ASSERT(gp1 != gp2); + + if (std::abs(offset) <= sizeof(Eterm) * MAX_LDP_STP_DISPLACEMENT) { + a.ldp(gp1, gp2, mem); + } else if (std::abs(offset) < + sizeof(Eterm) * MAX_LDR_STR_DISPLACEMENT) { + /* Note that we used `<` instead of `<=`, as we're loading two + * elements rather than one. */ + a.ldr(gp1, mem); + a.ldr(gp2, mem.cloneAdjusted(sizeof(Eterm))); + } else { + add(SUPER_TMP, arm::GpX(mem.baseId()), offset); + a.ldp(gp1, gp2, arm::Mem(SUPER_TMP)); + } + } +}; + +void beamasm_metadata_update( + std::string module_name, + ErtsCodePtr base_address, + size_t code_size, + const std::vector<BeamAssembler::AsmRange> &ranges); +void beamasm_metadata_early_init(); +void beamasm_metadata_late_init(); |