diff options
Diffstat (limited to 'erts/emulator/beam/jit/x86/instr_common.cpp')
-rw-r--r-- | erts/emulator/beam/jit/x86/instr_common.cpp | 1877 |
1 files changed, 1877 insertions, 0 deletions
diff --git a/erts/emulator/beam/jit/x86/instr_common.cpp b/erts/emulator/beam/jit/x86/instr_common.cpp new file mode 100644 index 0000000000..4ca7a10629 --- /dev/null +++ b/erts/emulator/beam/jit/x86/instr_common.cpp @@ -0,0 +1,1877 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2020-2020. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + */ + +/* + * Some notes on how to minimize the code size. + * + * Instructions that use 32-bit registers (e.g. eax) are generally + * one byte shorter than instructions that use 64-bits registers + * (e.g. rax). This does not apply to registers r8-r15 beacuse they'll + * always need a rex prefix. The `and`, `or`, and `cmp` instructions + * are even shorter than operating on the RETb (al) register. The + * `test` instruction with an immediate second operand is shorter + * when operating on an 8-bit register. + * + * On both Unix and Windows, instructions can be shortened by using + * RETd, ARG1d, or ARG2d instead of RET, ARG1, or ARG2, respectively. + * On Unix, but not on Windows, ARG3d and ARG4d will also result in + * shorter instructions. + * + * Here are some examples. If we know that the higher 32 bits of + * a register is uninteresting or should be zeroed, we can write: + * + * a.mov(RETd, ARG1d) + * + * (When writing to the lower 32 bits of a register, the high 32 + * bits are zeroed.) + * + * Here is a tag test on the contents of ARG1: + * + * a.and_(ARG1d, 15) + * a.cmp(ARG1d, 15) + * + * The same tag test on RET can be even shorter if written like this: + * + * a.and_(RETb, 15) + * a.cmp(RETb, 15) + * + * An alignment test can be written like this (when unit <= 256): + * + * a.test(RETb, imm(unit - 1)); + * a.test(ARG1.r8(), imm(unit -1)); + * + * ASMJIT will automatically encode backward jumps (jumps to bound + * labels) in the shortest form possible. However, forward jumps + * (jumps to unbound labels) will by default be encoded in the long + * form (using a 32-bit relative address). + * + * Within a single BEAM instruction, a `short_()` prefix can be used + * to emit short forward jumps (using a signed byte as an offset, + * limiting the distance to about 128 bytes). + * + * Example: + * + * a.short_().je(next); + * . + * . + * . + * a.bind(next); + */ + +#include <algorithm> +#include "beam_asm.hpp" + +extern "C" +{ +#include "erl_bif_table.h" +#include "big.h" +#include "beam_catches.h" +#include "beam_common.h" +#include "code_ix.h" +} + +using namespace asmjit; + +/* Helpers */ + +void BeamModuleAssembler::emit_error(int reason) { + a.mov(x86::qword_ptr(c_p, offsetof(Process, freason)), imm(reason)); + emit_handle_error(); +} + +void BeamModuleAssembler::emit_gc_test_preserve(const ArgVal &Need, + const ArgVal &Live, + x86::Gp term) { + const int32_t bytes_needed = (Need.getValue() + S_RESERVED) * sizeof(Eterm); + Label after_gc_check = a.newLabel(); + + ASSERT(term != ARG3); + + a.lea(ARG3, x86::qword_ptr(HTOP, bytes_needed)); + a.cmp(ARG3, E); + a.short_().jbe(after_gc_check); + + a.mov(getXRef(Live.getValue()), term); + mov_imm(ARG4, Live.getValue() + 1); + fragment_call(ga->get_garbage_collect()); + a.mov(term, getXRef(Live.getValue())); + + a.bind(after_gc_check); +} + +void BeamModuleAssembler::emit_gc_test(const ArgVal &Ns, + const ArgVal &Nh, + const ArgVal &Live) { + const int32_t bytes_needed = + (Ns.getValue() + Nh.getValue() + S_RESERVED) * sizeof(Eterm); + Label after_gc_check = a.newLabel(); + + a.lea(ARG3, x86::qword_ptr(HTOP, bytes_needed)); + a.cmp(ARG3, E); + a.short_().jbe(after_gc_check); + + mov_imm(ARG4, Live.getValue()); + + fragment_call(ga->get_garbage_collect()); + a.bind(after_gc_check); +} + +void BeamModuleAssembler::emit_validate(const ArgVal &arity) { +#ifdef DEBUG + Label next = a.newLabel(), crash = a.newLabel(); + + /* Crash if the Erlang heap is not word-aligned */ + a.test(HTOP, imm(sizeof(Eterm) - 1)); + a.jne(crash); + + /* Crash if the Erlang stack is not word-aligned */ + a.test(E, imm(sizeof(Eterm) - 1)); + a.jne(crash); + + /* Crash if we've overrun the stack */ + a.lea(ARG1, x86::qword_ptr(E, -(int32_t)(S_REDZONE * sizeof(Eterm)))); + a.cmp(HTOP, ARG1); + a.ja(crash); + + a.jmp(next); + a.bind(crash); + a.hlt(); + a.bind(next); + +# ifdef JIT_HARD_DEBUG + emit_enter_runtime(); + + for (unsigned i = 0; i < arity.getValue(); i++) { + a.mov(ARG1, getXRef(i)); + runtime_call<1>(beam_jit_validate_term); + } + + emit_leave_runtime(); +# endif + +#endif +} + +/* Instrs */ + +void BeamModuleAssembler::emit_i_validate(const ArgVal &Arity) { + emit_validate(Arity); +} + +void BeamModuleAssembler::emit_allocate_heap(const ArgVal &NeedStack, + const ArgVal &NeedHeap, + const ArgVal &Live) { + ASSERT(NeedStack.getType() == ArgVal::TYPE::u); + ASSERT(NeedStack.getValue() <= MAX_REG); + ArgVal needed = NeedStack; + +#if !defined(NATIVE_ERLANG_STACK) + needed = needed + CP_SIZE; +#endif + + emit_gc_test(needed, NeedHeap, Live); + + if (needed.getValue() > 0) { + a.sub(E, imm(needed.getValue() * sizeof(Eterm))); + } +#if !defined(NATIVE_ERLANG_STACK) + a.mov(getCPRef(), imm(NIL)); +#endif +} + +void BeamModuleAssembler::emit_allocate(const ArgVal &NeedStack, + const ArgVal &Live) { + emit_allocate_heap(NeedStack, ArgVal(ArgVal::TYPE::u, 0), Live); +} + +void BeamModuleAssembler::emit_deallocate(const ArgVal &Deallocate) { + ASSERT(Deallocate.getType() == ArgVal::TYPE::u); + ASSERT(Deallocate.getValue() <= 1023); + ArgVal dealloc = Deallocate; + +#if !defined(NATIVE_ERLANG_STACK) + dealloc = dealloc + CP_SIZE; +#endif + + if (dealloc.getValue() > 0) { + a.add(E, imm(dealloc.getValue() * sizeof(Eterm))); + } +} + +void BeamModuleAssembler::emit_test_heap(const ArgVal &Nh, const ArgVal &Live) { + emit_gc_test(ArgVal(ArgVal::u, 0), Nh, Live); +} + +void BeamModuleAssembler::emit_normal_exit() { + /* This is implictly global; it does not normally appear in modules and + * doesn't require size optimization. */ + + emit_enter_runtime<Update::eReductions | Update::eStack | Update::eHeap>(); + emit_proc_lc_unrequire(); + + a.mov(x86::qword_ptr(c_p, offsetof(Process, freason)), imm(EXC_NORMAL)); + a.mov(x86::qword_ptr(c_p, offsetof(Process, arity)), imm(0)); + a.mov(ARG1, c_p); + mov_imm(ARG2, am_normal); + runtime_call<2>(erts_do_exit_process); + + emit_proc_lc_require(); + emit_leave_runtime<Update::eReductions | Update::eStack | Update::eHeap>(); + + abs_jmp(ga->get_do_schedule()); +} + +void BeamModuleAssembler::emit_continue_exit() { + /* This is implictly global; it does not normally appear in modules and + * doesn't require size optimization. */ + + emit_enter_runtime<Update::eReductions | Update::eStack | Update::eHeap>(); + emit_proc_lc_unrequire(); + + a.mov(ARG1, c_p); + runtime_call<1>(erts_continue_exit_process); + + emit_proc_lc_require(); + emit_leave_runtime<Update::eReductions | Update::eStack | Update::eHeap>(); + + abs_jmp(ga->get_do_schedule()); +} + +/* This is an alias for handle_error */ +void BeamModuleAssembler::emit_error_action_code() { + abs_jmp(ga->get_error_action_code()); +} + +/* Psuedo-instruction for signalling lambda load errors. Never actually runs. */ +void BeamModuleAssembler::emit_i_lambda_error(const ArgVal &Dummy) { + a.hlt(); +} + +void BeamModuleAssembler::emit_i_make_fun3(const ArgVal &Fun, + const ArgVal &Dst, + const ArgVal &NumFree, + const std::vector<ArgVal> &env) { + size_t num_free = env.size(); + ASSERT(NumFree.getValue() == num_free); + + mov_arg(ARG3, NumFree); + + emit_enter_runtime<Update::eHeap>(); + + a.mov(ARG1, c_p); + make_move_patch(ARG2, lambdas[Fun.getValue()].patches); + runtime_call<3>(new_fun_thing); + + emit_leave_runtime<Update::eHeap>(); + + comment("Move fun environment"); + for (unsigned i = 0; i < num_free; i++) { + mov_arg(x86::qword_ptr(RET, + offsetof(ErlFunThing, env) + i * sizeof(Eterm)), + env[i]); + } + + comment("Create boxed ptr"); + a.or_(RETb, TAG_PRIMARY_BOXED); + mov_arg(Dst, RET); +} + +void BeamModuleAssembler::emit_get_list(const x86::Gp src, + const ArgVal &Hd, + const ArgVal &Tl) { + x86::Gp boxed_ptr = emit_ptr_val(src, src); + + switch (ArgVal::register_relation(Hd, Tl)) { + case ArgVal::Relation::consecutive: { + comment("(moving head and tail together)"); + x86::Mem dst_ptr = getArgRef(Hd, 16); + x86::Mem src_ptr = getCARRef(boxed_ptr, 16); + a.movups(x86::xmm0, src_ptr); + a.movups(dst_ptr, x86::xmm0); + break; + } + case ArgVal::Relation::reverse_consecutive: { + if (!hasCpuFeature(x86::Features::kAVX)) { + goto fallback; + } + + comment("(moving and swapping head and tail together)"); + x86::Mem dst_ptr = getArgRef(Tl, 16); + x86::Mem src_ptr = getCARRef(boxed_ptr, 16); + a.vpermilpd(x86::xmm0, src_ptr, 1); /* Load and swap */ + a.vmovups(dst_ptr, x86::xmm0); + break; + } + case ArgVal::Relation::none: + fallback: + a.mov(ARG2, getCARRef(boxed_ptr)); + a.mov(ARG3, getCDRRef(boxed_ptr)); + mov_arg(Hd, ARG2); + mov_arg(Tl, ARG3); + break; + } +} + +void BeamModuleAssembler::emit_get_list(const ArgVal &Src, + const ArgVal &Hd, + const ArgVal &Tl) { + mov_arg(ARG1, Src); + emit_get_list(ARG1, Hd, Tl); +} + +void BeamModuleAssembler::emit_get_hd(const ArgVal &Src, const ArgVal &Hd) { + mov_arg(ARG1, Src); + + x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1); + + a.mov(ARG2, getCARRef(boxed_ptr)); + + mov_arg(Hd, ARG2); +} + +void BeamModuleAssembler::emit_get_tl(const ArgVal &Src, const ArgVal &Tl) { + mov_arg(ARG1, Src); + + x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1); + + a.mov(ARG2, getCDRRef(boxed_ptr)); + + mov_arg(Tl, ARG2); +} + +void BeamModuleAssembler::emit_is_nonempty_list_get_list(const ArgVal &Fail, + const ArgVal &Src, + const ArgVal &Hd, + const ArgVal &Tl) { + mov_arg(RET, Src); + a.test(RETb, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_LIST)); + a.jne(labels[Fail.getValue()]); + emit_get_list(RET, Hd, Tl); +} + +void BeamModuleAssembler::emit_is_nonempty_list_get_hd(const ArgVal &Fail, + const ArgVal &Src, + const ArgVal &Hd) { + mov_arg(RET, Src); + a.test(RETb, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_LIST)); + a.jne(labels[Fail.getValue()]); + + x86::Gp boxed_ptr = emit_ptr_val(RET, RET); + + a.mov(ARG2, getCARRef(boxed_ptr)); + + mov_arg(Hd, ARG2); +} + +void BeamModuleAssembler::emit_is_nonempty_list_get_tl(const ArgVal &Fail, + const ArgVal &Src, + const ArgVal &Tl) { + mov_arg(RET, Src); + a.test(RETb, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_LIST)); + a.jne(labels[Fail.getValue()]); + + x86::Gp boxed_ptr = emit_ptr_val(RET, RET); + + a.mov(ARG2, getCDRRef(boxed_ptr)); + + mov_arg(Tl, ARG2); +} + +void BeamModuleAssembler::emit_i_get(const ArgVal &Src, const ArgVal &Dst) { + mov_arg(ARG2, Src); + + emit_enter_runtime(); + + a.mov(ARG1, c_p); + runtime_call<2>(erts_pd_hash_get); + + emit_leave_runtime(); + + mov_arg(Dst, RET); +} + +void BeamModuleAssembler::emit_i_get_hash(const ArgVal &Src, + const ArgVal &Hash, + const ArgVal &Dst) { + mov_arg(ARG2, Hash); + mov_arg(ARG3, Src); + + emit_enter_runtime(); + + a.mov(ARG1, c_p); + runtime_call<3>(erts_pd_hash_get_with_hx); + + emit_leave_runtime(); + + mov_arg(Dst, RET); +} + +/* Store the pointer to a tuple in ARG2. Remove any LITERAL_PTR tag. */ +void BeamModuleAssembler::emit_load_tuple_ptr(const ArgVal &Term) { + mov_arg(ARG2, Term); + (void)emit_ptr_val(ARG2, ARG2); +} + +#ifdef DEBUG +/* Emit an assertion to ensure that tuple_reg points into the same + * tuple as Src. */ +void BeamModuleAssembler::emit_tuple_assertion(const ArgVal &Src, + x86::Gp tuple_reg) { + Label ok = a.newLabel(), fatal = a.newLabel(); + ASSERT(tuple_reg != RET); + mov_arg(RET, Src); + emit_is_boxed(fatal, RET, dShort); + (void)emit_ptr_val(RET, RET); + a.cmp(RET, tuple_reg); + a.short_().je(ok); + + a.bind(fatal); + { a.ud2(); } + a.bind(ok); +} +#endif + +/* Fetch an element from the tuple pointed to by the boxed pointer + * in ARG2. */ +void BeamModuleAssembler::emit_i_get_tuple_element(const ArgVal &Src, + const ArgVal &Element, + const ArgVal &Dst) { +#ifdef DEBUG + emit_tuple_assertion(Src, ARG2); +#endif + + a.mov(ARG1, emit_boxed_val(ARG2, Element.getValue())); + mov_arg(Dst, ARG1); +} + +/* Fetch two consecutive tuple elements from the tuple pointed to by + * the boxed pointer in ARG2. */ +void BeamModuleAssembler::emit_get_two_tuple_elements(const ArgVal &Src, + const ArgVal &Element, + const ArgVal &Dst1, + const ArgVal &Dst2) { +#ifdef DEBUG + emit_tuple_assertion(Src, ARG2); +#endif + + x86::Mem element_ptr = + emit_boxed_val(ARG2, Element.getValue(), 2 * sizeof(Eterm)); + + switch (ArgVal::register_relation(Dst1, Dst2)) { + case ArgVal::Relation::consecutive: { + x86::Mem dst_ptr = getArgRef(Dst1, 16); + a.movups(x86::xmm0, element_ptr); + a.movups(dst_ptr, x86::xmm0); + break; + } + case ArgVal::Relation::reverse_consecutive: { + if (!hasCpuFeature(x86::Features::kAVX)) { + goto fallback; + } else { + x86::Mem dst_ptr = getArgRef(Dst2, 16); + a.vpermilpd(x86::xmm0, element_ptr, 1); /* Load and swap */ + a.vmovups(dst_ptr, x86::xmm0); + break; + } + } + case ArgVal::Relation::none: + fallback: + a.mov(ARG1, emit_boxed_val(ARG2, Element.getValue())); + a.mov(ARG3, emit_boxed_val(ARG2, (Element + sizeof(Eterm)).getValue())); + mov_arg(Dst1, ARG1); + mov_arg(Dst2, ARG3); + break; + } +} + +void BeamModuleAssembler::emit_init(const ArgVal &Y) { + mov_arg(Y, NIL); +} + +void BeamModuleAssembler::emit_init_yregs(const ArgVal &Size, + const std::vector<ArgVal> &args) { + unsigned count = Size.getValue(); + ASSERT(count == args.size()); + + if (count == 1) { + mov_arg(args.front(), NIL); + return; + } + + /* There at least two slots. */ + unsigned i = 0; + int y_ptr = -1; + + mov_imm(x86::rax, NIL); + + while (i < count) { + unsigned slots = 1; + unsigned first_y = args.at(i).getValue(); + + while (i + slots < count) { + ArgVal current_y = args.at(i + slots); + if (first_y + slots != current_y.getValue()) { + break; + } + slots++; + } + + /* + * Now first_y is the number of the first y register to be initialized + * and slots is the number of y registers to be initialized. + */ + + if (slots == 1) { + a.mov(getYRef(first_y), x86::rax); + } else { + /* + * There are at least two consecutive y registers to be initialized. + * Use `stosq` with or without `rep`. + */ + if (first_y == 0) { +#ifdef NATIVE_ERLANG_STACK + /* `mov` is two bytes shorter than `lea`. */ + a.mov(x86::rdi, E); +#else + /* y(0) is at E+8. Must use `lea` here. */ + a.lea(x86::rdi, getYRef(0)); +#endif + y_ptr = 0; + } else if (y_ptr < 0) { + /* Initialize rdi for the first time. */ + y_ptr = first_y; + a.lea(x86::rdi, getYRef(y_ptr)); + } else { + /* Update rdi using `add`. This is one byte shorter than using + * `lea`. */ + unsigned offset = (first_y - y_ptr) * sizeof(Eterm); + a.add(x86::rdi, imm(offset)); + y_ptr = first_y; + } + + if (slots <= 4) { + /* Slightly more compact than `rep stosq`. */ + for (unsigned j = 0; j < slots; j++) { + a.stosq(); + } + } else { + mov_imm(x86::rcx, slots); + a.rep().stosq(); + } + + /* Update y_ptr to account for the incrementing done by `stosq`. */ + y_ptr += slots; + } + + i += slots; + } +} + +void BeamModuleAssembler::emit_i_trim(const ArgVal &Words) { + ASSERT(Words.getType() == ArgVal::TYPE::u); + ASSERT(Words.getValue() <= 1023); + + if (Words.getValue() > 0) { + a.add(E, imm(Words.getValue() * sizeof(Eterm))); + } +} + +void BeamModuleAssembler::emit_i_move(const ArgVal &Src, const ArgVal &Dst) { + mov_arg(Dst, Src); +} + +/* Move two words at consecutive addresses to consecutive or reverse + * consecutive destinations. */ +void BeamModuleAssembler::emit_move_two_words(const ArgVal &Src1, + const ArgVal &Dst1, + const ArgVal &Src2, + const ArgVal &Dst2) { + x86::Mem src_ptr = getArgRef(Src1, 16); + + ASSERT(ArgVal::register_relation(Src1, Src2) == + ArgVal::Relation::consecutive); + + switch (ArgVal::register_relation(Dst1, Dst2)) { + case ArgVal::Relation::consecutive: { + x86::Mem dst_ptr = getArgRef(Dst1, 16); + a.movups(x86::xmm0, src_ptr); + a.movups(dst_ptr, x86::xmm0); + break; + } + case ArgVal::Relation::reverse_consecutive: { + x86::Mem dst_ptr = getArgRef(Dst2, 16); + comment("(moving and swapping)"); + if (hasCpuFeature(x86::Features::kAVX)) { + a.vpermilpd(x86::xmm0, src_ptr, 1); /* Load and swap */ + a.vmovups(dst_ptr, x86::xmm0); + } else { + mov_arg(ARG1, Src1); + mov_arg(ARG2, Src2); + mov_arg(Dst1, ARG1); + mov_arg(Dst2, ARG2); + } + break; + } + case ArgVal::Relation::none: + ASSERT(0); + break; + } +} + +void BeamModuleAssembler::emit_swap(const ArgVal &R1, const ArgVal &R2) { + if (!hasCpuFeature(x86::Features::kAVX)) { + goto fallback; + } + + switch (ArgVal::register_relation(R1, R2)) { + case ArgVal::Relation::consecutive: { + x86::Mem ptr = getArgRef(R1, 16); + comment("(swapping using AVX)"); + a.vpermilpd(x86::xmm0, ptr, 1); /* Load and swap */ + a.vmovups(ptr, x86::xmm0); + break; + } + case ArgVal::Relation::reverse_consecutive: { + x86::Mem ptr = getArgRef(R2, 16); + comment("(swapping using AVX)"); + a.vpermilpd(x86::xmm0, ptr, 1); /* Load and swap */ + a.vmovups(ptr, x86::xmm0); + break; + } + case ArgVal::Relation::none: + fallback: + mov_arg(ARG1, R1); + mov_arg(ARG2, R2); + mov_arg(R2, ARG1); + mov_arg(R1, ARG2); + break; + } +} + +void BeamModuleAssembler::emit_node(const ArgVal &Dst) { + a.mov(ARG1, imm(&erts_this_node)); + a.mov(ARG1, x86::qword_ptr(ARG1)); + a.mov(ARG1, x86::qword_ptr(ARG1, offsetof(ErlNode, sysname))); + mov_arg(Dst, ARG1); +} + +void BeamModuleAssembler::emit_put_cons(const ArgVal &Hd, const ArgVal &Tl) { + switch (ArgVal::register_relation(Hd, Tl)) { + case ArgVal::Relation::consecutive: { + x86::Mem src_ptr = getArgRef(Hd, 16); + x86::Mem dst_ptr = x86::xmmword_ptr(HTOP, 0); + comment("(put head and tail together)"); + a.movups(x86::xmm0, src_ptr); + a.movups(dst_ptr, x86::xmm0); + break; + } + case ArgVal::Relation::reverse_consecutive: { + if (!hasCpuFeature(x86::Features::kAVX)) { + goto fallback; + } + + x86::Mem src_ptr = getArgRef(Tl, 16); + x86::Mem dst_ptr = x86::xmmword_ptr(HTOP, 0); + comment("(putting and swapping head and tail together)"); + a.vpermilpd(x86::xmm0, src_ptr, 1); /* Load and swap */ + a.vmovups(dst_ptr, x86::xmm0); + break; + } + case ArgVal::Relation::none: + fallback: + mov_arg(x86::qword_ptr(HTOP, 0), Hd); + mov_arg(x86::qword_ptr(HTOP, 1 * sizeof(Eterm)), Tl); + break; + } + a.lea(ARG2, x86::qword_ptr(HTOP, TAG_PRIMARY_LIST)); +} + +void BeamModuleAssembler::emit_append_cons(const ArgVal &index, + const ArgVal &Hd) { + size_t offset = 2 * index.getValue() * sizeof(Eterm); + mov_arg(x86::qword_ptr(HTOP, offset), Hd); + a.mov(x86::qword_ptr(HTOP, offset + sizeof(Eterm)), ARG2); + a.lea(ARG2, x86::qword_ptr(HTOP, offset + TAG_PRIMARY_LIST)); +} + +void BeamModuleAssembler::emit_store_cons(const ArgVal &len, + const ArgVal &Dst) { + a.add(HTOP, imm(len.getValue() * 2 * sizeof(Eterm))); + mov_arg(Dst, ARG2); +} + +void BeamModuleAssembler::emit_put_tuple2(const ArgVal &Dst, + const ArgVal &Arity, + const std::vector<ArgVal> &args) { + size_t size = args.size(); + ASSERT(arityval(Arity.getValue()) == size); + + comment("Move arity word"); + mov_arg(x86::qword_ptr(HTOP, 0), Arity); + + comment("Move tuple data"); + for (unsigned i = 0; i < size; i++) { + x86::Mem dst_ptr = x86::qword_ptr(HTOP, (i + 1) * sizeof(Eterm)); + + if (i + 1 == size) { + mov_arg(dst_ptr, args[i]); + } else { + switch (ArgVal::register_relation(args[i], args[i + 1])) { + case ArgVal::consecutive: { + x86::Mem src_ptr = getArgRef(args[i], 16); + + comment("(moving two elements at once)"); + dst_ptr.setSize(16); + a.movups(x86::xmm0, src_ptr); + a.movups(dst_ptr, x86::xmm0); + i++; + break; + } + case ArgVal::reverse_consecutive: { + if (!hasCpuFeature(x86::Features::kAVX)) { + mov_arg(dst_ptr, args[i]); + } else { + x86::Mem src_ptr = getArgRef(args[i + 1], 16); + + comment("(moving and swapping two elements at once)"); + dst_ptr.setSize(16); + a.vpermilpd(x86::xmm0, src_ptr, 1); /* Load and swap */ + a.vmovups(dst_ptr, x86::xmm0); + i++; + } + break; + } + case ArgVal::none: + mov_arg(dst_ptr, args[i]); + break; + } + } + } + + comment("Create boxed ptr"); + a.lea(ARG1, x86::qword_ptr(HTOP, TAG_PRIMARY_BOXED)); + a.add(HTOP, imm((size + 1) * sizeof(Eterm))); + + mov_arg(Dst, ARG1); +} + +void BeamModuleAssembler::emit_self(const ArgVal &Dst) { + a.mov(ARG1, x86::qword_ptr(c_p, offsetof(Process, common.id))); + + mov_arg(Dst, ARG1); +} + +void BeamModuleAssembler::emit_set_tuple_element(const ArgVal &Element, + const ArgVal &Tuple, + const ArgVal &Offset) { + mov_arg(ARG1, Tuple); + + x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1); + mov_arg(emit_boxed_val(boxed_ptr, Offset.getValue()), Element, ARG2); +} + +void BeamModuleAssembler::emit_is_nonempty_list(const ArgVal &Fail, + const ArgVal &Src) { + x86::Mem list_ptr = getArgRef(Src, 1); + + a.test(list_ptr, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_LIST)); + a.jne(labels[Fail.getValue()]); +} + +void BeamModuleAssembler::emit_jump(const ArgVal &Fail) { + a.jmp(labels[Fail.getValue()]); +} + +void BeamModuleAssembler::emit_is_atom(const ArgVal &Fail, const ArgVal &Src) { + mov_arg(RET, Src); + ERTS_CT_ASSERT(_TAG_IMMED2_MASK < 256); + a.and_(RETb, imm(_TAG_IMMED2_MASK)); + a.cmp(RETb, imm(_TAG_IMMED2_ATOM)); + a.jne(labels[Fail.getValue()]); +} + +void BeamModuleAssembler::emit_is_boolean(const ArgVal &Fail, + const ArgVal &Src) { + /* Since am_true and am_false differ by a single bit, we can simplify the + * check by clearing said bit and comparing against the lesser one. */ + ERTS_CT_ASSERT(am_false == make_atom(0)); + ERTS_CT_ASSERT(am_true == make_atom(1)); + + mov_arg(ARG1, Src); + + a.and_(ARG1, imm(~(am_true & ~_TAG_IMMED1_MASK))); + a.cmp(ARG1, imm(am_false)); + a.jne(labels[Fail.getValue()]); +} + +void BeamModuleAssembler::emit_is_binary(Label fail, + x86::Gp src, + Label next, + Label subbin) { + ASSERT(src != RET && src != ARG2); + + emit_is_boxed(fail, src); + + x86::Gp boxed_ptr = emit_ptr_val(src, src); + a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32))); + + a.and_(RETb, imm(_TAG_HEADER_MASK)); + a.cmp(RETb, imm(_TAG_HEADER_SUB_BIN)); + a.short_().je(subbin); + ERTS_CT_ASSERT(_TAG_HEADER_REFC_BIN + 4 == _TAG_HEADER_HEAP_BIN); + a.and_(RETb, imm(~4)); + a.cmp(RETb, imm(_TAG_HEADER_REFC_BIN)); + a.short_().je(next); + a.jmp(fail); +} + +void BeamModuleAssembler::emit_is_binary(const ArgVal &Fail, + const ArgVal &Src) { + Label next = a.newLabel(), subbin = a.newLabel(); + + mov_arg(ARG1, Src); + + emit_is_binary(labels[Fail.getValue()], ARG1, next, subbin); + + a.bind(subbin); + { + /* emit_is_binary has already removed the literal tag from Src, if + * applicable. */ + a.cmp(emit_boxed_val(ARG1, offsetof(ErlSubBin, bitsize), sizeof(byte)), + imm(0)); + a.jne(labels[Fail.getValue()]); + } + + a.bind(next); +} + +void BeamModuleAssembler::emit_is_bitstring(const ArgVal &Fail, + const ArgVal &Src) { + Label next = a.newLabel(); + + mov_arg(ARG1, Src); + + emit_is_binary(labels[Fail.getValue()], ARG1, next, next); + + a.bind(next); +} + +void BeamModuleAssembler::emit_is_float(const ArgVal &Fail, const ArgVal &Src) { + mov_arg(ARG1, Src); + + emit_is_boxed(labels[Fail.getValue()], ARG1); + + x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1); + a.cmp(emit_boxed_val(boxed_ptr), imm(HEADER_FLONUM)); + a.jne(labels[Fail.getValue()]); +} + +void BeamModuleAssembler::emit_is_function(const ArgVal &Fail, + const ArgVal &Src) { + Label next = a.newLabel(); + + mov_arg(RET, Src); + + emit_is_boxed(labels[Fail.getValue()], RET); + + x86::Gp boxed_ptr = emit_ptr_val(RET, RET); + a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32))); + a.cmp(RET, imm(HEADER_FUN)); + a.short_().je(next); + ERTS_CT_ASSERT(HEADER_EXPORT < 256); + a.cmp(RETb, imm(HEADER_EXPORT)); + a.jne(labels[Fail.getValue()]); + + a.bind(next); +} + +void BeamModuleAssembler::emit_is_function2(const ArgVal &Fail, + const ArgVal &Src, + const ArgVal &Arity) { + if (Arity.getType() != ArgVal::i) { + /* + * Non-literal arity - extremely uncommon. Generate simple code. + */ + mov_arg(ARG2, Src); + mov_arg(ARG3, Arity); + + emit_enter_runtime(); + + a.mov(ARG1, c_p); + runtime_call<3>(erl_is_function); + + emit_leave_runtime(); + + a.cmp(RET, imm(am_true)); + a.jne(labels[Fail.getValue()]); + return; + } + + unsigned arity = unsigned_val(Arity.getValue()); + if (arity > MAX_ARG) { + /* Arity is negative or too large. */ + a.jmp(labels[Fail.getValue()]); + return; + } + + Label next = a.newLabel(), fun = a.newLabel(); + + mov_arg(ARG1, Src); + + emit_is_boxed(labels[Fail.getValue()], ARG1); + + x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1); + a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32))); + a.cmp(RETd, imm(HEADER_FUN)); + a.short_().je(fun); + ERTS_CT_ASSERT(HEADER_EXPORT < 256); + a.cmp(RETb, imm(HEADER_EXPORT)); + a.jne(labels[Fail.getValue()]); + + comment("Check arity of export fun"); + a.mov(ARG2, emit_boxed_val(boxed_ptr, sizeof(Eterm))); + a.cmp(x86::qword_ptr(ARG2, offsetof(Export, info.mfa.arity)), imm(arity)); + a.jne(labels[Fail.getValue()]); + a.short_().jmp(next); + + comment("Check arity of fun"); + a.bind(fun); + { + a.cmp(emit_boxed_val(boxed_ptr, offsetof(ErlFunThing, arity)), + imm(arity)); + a.jne(labels[Fail.getValue()]); + } + + a.bind(next); +} + +void BeamModuleAssembler::emit_is_integer(const ArgVal &Fail, + const ArgVal &Src) { + Label next = a.newLabel(); + Label fail = labels[Fail.getValue()]; + + mov_arg(ARG1, Src); + + a.mov(RETd, ARG1d); + a.and_(RETb, imm(_TAG_IMMED1_MASK)); + a.cmp(RETb, imm(_TAG_IMMED1_SMALL)); + a.short_().je(next); + + emit_is_boxed(fail, RET); + + x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1); + a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32))); + + a.and_(RETb, imm(_TAG_HEADER_MASK - _BIG_SIGN_BIT)); + a.cmp(RETb, imm(_TAG_HEADER_POS_BIG)); + a.jne(fail); + + a.bind(next); +} + +void BeamModuleAssembler::emit_is_list(const ArgVal &Fail, const ArgVal &Src) { + Label next = a.newLabel(); + + mov_arg(RET, Src); + + a.cmp(RET, imm(NIL)); + a.short_().je(next); + a.test(RETb, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_LIST)); + a.jne(labels[Fail.getValue()]); + a.bind(next); +} + +void BeamModuleAssembler::emit_is_map(const ArgVal &Fail, const ArgVal &Src) { + mov_arg(RET, Src); + + emit_is_boxed(labels[Fail.getValue()], RET); + + x86::Gp boxed_ptr = emit_ptr_val(RET, RET); + a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32))); + a.and_(RETb, imm(_TAG_HEADER_MASK)); + a.cmp(RETb, imm(_TAG_HEADER_MAP)); + a.jne(labels[Fail.getValue()]); +} + +void BeamModuleAssembler::emit_is_nil(const ArgVal &Fail, const ArgVal &Src) { + a.cmp(getArgRef(Src), imm(NIL)); + a.jne(labels[Fail.getValue()]); +} + +void BeamModuleAssembler::emit_is_number(const ArgVal &Fail, + const ArgVal &Src) { + Label next = a.newLabel(); + Label fail = labels[Fail.getValue()]; + + mov_arg(ARG1, Src); + + a.mov(RETd, ARG1d); + a.and_(RETb, imm(_TAG_IMMED1_MASK)); + a.cmp(RETb, imm(_TAG_IMMED1_SMALL)); + a.short_().je(next); + + emit_is_boxed(fail, RET); + + x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1); + a.mov(ARG1, emit_boxed_val(boxed_ptr)); + + a.mov(RETd, ARG1d); + a.and_(RETb, imm(_TAG_HEADER_MASK - _BIG_SIGN_BIT)); + a.cmp(RETb, imm(_TAG_HEADER_POS_BIG)); + a.short_().je(next); + + a.cmp(ARG1d, imm(HEADER_FLONUM)); + a.jne(fail); + + a.bind(next); +} + +void BeamModuleAssembler::emit_is_pid(const ArgVal &Fail, const ArgVal &Src) { + Label next = a.newLabel(); + + mov_arg(ARG1, Src); + + a.mov(RETd, ARG1d); + a.and_(RETb, imm(_TAG_IMMED1_MASK)); + a.cmp(RETb, imm(_TAG_IMMED1_PID)); + a.short_().je(next); + + /* Reuse RET as the important bits are still available. */ + emit_is_boxed(labels[Fail.getValue()], RET); + + x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1); + a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32))); + a.and_(RETb, _TAG_HEADER_MASK); + a.cmp(RETb, _TAG_HEADER_EXTERNAL_PID); + a.jne(labels[Fail.getValue()]); + a.bind(next); +} + +void BeamModuleAssembler::emit_is_port(const ArgVal &Fail, const ArgVal &Src) { + Label next = a.newLabel(); + mov_arg(ARG1, Src); + + a.mov(RETd, ARG1d); + a.and_(RETb, imm(_TAG_IMMED1_MASK)); + a.cmp(RETb, imm(_TAG_IMMED1_PORT)); + a.short_().je(next); + + /* Reuse RET as the important bits are still available. */ + emit_is_boxed(labels[Fail.getValue()], RET); + + x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1); + a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32))); + a.and_(RETb, imm(_TAG_HEADER_MASK)); + a.cmp(RETb, imm(_TAG_HEADER_EXTERNAL_PORT)); + a.jne(labels[Fail.getValue()]); + a.bind(next); +} + +void BeamModuleAssembler::emit_is_reference(const ArgVal &Fail, + const ArgVal &Src) { + Label next = a.newLabel(); + + mov_arg(RET, Src); + + emit_is_boxed(labels[Fail.getValue()], RET); + + x86::Gp boxed_ptr = emit_ptr_val(RET, RET); + a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32))); + a.and_(RETb, imm(_TAG_HEADER_MASK)); + a.cmp(RETb, imm(_TAG_HEADER_REF)); + a.short_().je(next); + a.cmp(RETb, imm(_TAG_HEADER_EXTERNAL_REF)); + a.jne(labels[Fail.getValue()]); + + a.bind(next); +} + +/* Note: This instruction leaves the pointer to the tuple in ARG2. */ +void BeamModuleAssembler::emit_i_is_tagged_tuple(const ArgVal &Fail, + const ArgVal &Src, + const ArgVal &Arity, + const ArgVal &Tag) { + mov_arg(ARG2, Src); + + emit_is_boxed(labels[Fail.getValue()], ARG2); + + x86::Gp boxed_ptr = emit_ptr_val(ARG2, ARG2); + ERTS_CT_ASSERT(Support::isInt32(make_arityval(MAX_ARITYVAL))); + a.cmp(emit_boxed_val(boxed_ptr, 0, sizeof(Uint32)), imm(Arity.getValue())); + a.jne(labels[Fail.getValue()]); + + a.cmp(emit_boxed_val(boxed_ptr, sizeof(Eterm)), imm(Tag.getValue())); + a.jne(labels[Fail.getValue()]); +} + +/* Note: This instruction leaves the pointer to the tuple in ARG2. */ +void BeamModuleAssembler::emit_i_is_tagged_tuple_ff(const ArgVal &NotTuple, + const ArgVal &NotRecord, + const ArgVal &Src, + const ArgVal &Arity, + const ArgVal &Tag) { + mov_arg(ARG2, Src); + emit_is_boxed(labels[NotTuple.getValue()], ARG2); + (void)emit_ptr_val(ARG2, ARG2); + a.mov(ARG1, emit_boxed_val(ARG2)); + + ERTS_CT_ASSERT(_TAG_HEADER_ARITYVAL == 0); + a.test(ARG1.r8(), imm(_TAG_HEADER_MASK)); + a.jne(labels[NotTuple.getValue()]); + + ERTS_CT_ASSERT(Support::isInt32(make_arityval(MAX_ARITYVAL))); + a.cmp(ARG1d, imm(Arity.getValue())); + a.jne(labels[NotRecord.getValue()]); + + a.cmp(emit_boxed_val(ARG2, sizeof(Eterm)), imm(Tag.getValue())); + a.jne(labels[NotRecord.getValue()]); +} + +/* Note: This instruction leaves the pointer to the tuple in ARG2. */ +void BeamModuleAssembler::emit_i_is_tuple(const ArgVal &Fail, + const ArgVal &Src) { + mov_arg(ARG2, Src); + + emit_is_boxed(labels[Fail.getValue()], ARG2); + + (void)emit_ptr_val(ARG2, ARG2); + ERTS_CT_ASSERT(_TAG_HEADER_ARITYVAL == 0); + a.test(emit_boxed_val(ARG2, 0, sizeof(byte)), imm(_TAG_HEADER_MASK)); + + a.jne(labels[Fail.getValue()]); +} + +/* Note: This instruction leaves the pointer to the tuple in ARG2. */ +void BeamModuleAssembler::emit_i_is_tuple_of_arity(const ArgVal &Fail, + const ArgVal &Src, + const ArgVal &Arity) { + mov_arg(ARG2, Src); + + emit_is_boxed(labels[Fail.getValue()], ARG2); + + (void)emit_ptr_val(ARG2, ARG2); + ERTS_CT_ASSERT(Support::isInt32(make_arityval(MAX_ARITYVAL))); + a.cmp(emit_boxed_val(ARG2, 0, sizeof(Uint32)), imm(Arity.getValue())); + a.jne(labels[Fail.getValue()]); +} + +/* Note: This instruction leaves the pointer to the tuple in ARG2. */ +void BeamModuleAssembler::emit_i_test_arity(const ArgVal &Fail, + const ArgVal &Src, + const ArgVal &Arity) { + mov_arg(ARG2, Src); + + (void)emit_ptr_val(ARG2, ARG2); + ERTS_CT_ASSERT(Support::isInt32(make_arityval(MAX_ARITYVAL))); + a.cmp(emit_boxed_val(ARG2, 0, sizeof(Uint32)), imm(Arity.getValue())); + a.jne(labels[Fail.getValue()]); +} + +void BeamModuleAssembler::emit_i_is_eq_exact_immed(const ArgVal &Fail, + const ArgVal &X, + const ArgVal &Y) { + cmp_arg(getArgRef(X), Y); + a.jne(labels[Fail.getValue()]); +} + +void BeamModuleAssembler::emit_i_is_ne_exact_immed(const ArgVal &Fail, + const ArgVal &X, + const ArgVal &Y) { + cmp_arg(getArgRef(X), Y); + a.je(labels[Fail.getValue()]); +} + +void BeamModuleAssembler::emit_is_eq_exact(const ArgVal &Fail, + const ArgVal &X, + const ArgVal &Y) { + Label next = a.newLabel(); + + mov_arg(ARG2, Y); /* May clobber ARG1 */ + mov_arg(ARG1, X); + + a.cmp(ARG1, ARG2); +#ifdef JIT_HARD_DEBUG + a.je(next); +#else + a.short_().je(next); +#endif + + /* Fancy way of checking if both are immediates. */ + a.mov(RETd, ARG1d); + a.and_(RETd, ARG2d); + a.and_(RETb, imm(_TAG_PRIMARY_MASK)); + a.cmp(RETb, imm(TAG_PRIMARY_IMMED1)); + a.je(labels[Fail.getValue()]); + + emit_enter_runtime(); + + runtime_call<2>(eq); + + emit_leave_runtime(); + + a.test(RET, RET); + a.je(labels[Fail.getValue()]); + + a.bind(next); +} + +void BeamModuleAssembler::emit_i_is_eq_exact_literal(const ArgVal &Fail, + const ArgVal &Src, + const ArgVal &Literal, + const ArgVal &tag_test) { + mov_arg(ARG2, Literal); /* May clobber ARG1 */ + mov_arg(ARG1, Src); + + /* Fail immediately unless Src is the same type of pointer as the literal. + */ + a.test(ARG1.r8(), imm(tag_test.getValue())); + a.jne(labels[Fail.getValue()]); + + emit_enter_runtime(); + + runtime_call<2>(eq); + + emit_leave_runtime(); + + a.test(RET, RET); + a.jz(labels[Fail.getValue()]); +} + +void BeamModuleAssembler::emit_is_ne_exact(const ArgVal &Fail, + const ArgVal &X, + const ArgVal &Y) { + Label next = a.newLabel(); + + mov_arg(ARG2, Y); /* May clobber ARG1 */ + mov_arg(ARG1, X); + + a.cmp(ARG1, ARG2); + a.je(labels[Fail.getValue()]); + + /* Fancy way of checking if both are immediates. */ + a.mov(RETd, ARG1d); + a.and_(RETd, ARG2d); + a.and_(RETb, imm(_TAG_PRIMARY_MASK)); + a.cmp(RETb, imm(TAG_PRIMARY_IMMED1)); +#ifdef JIT_HARD_DEBUG + a.je(next); +#else + a.short_().je(next); +#endif + + emit_enter_runtime(); + + runtime_call<2>(eq); + + emit_leave_runtime(); + + a.test(RET, RET); + a.jnz(labels[Fail.getValue()]); + + a.bind(next); +} + +void BeamModuleAssembler::emit_i_is_ne_exact_literal(const ArgVal &Fail, + const ArgVal &Src, + const ArgVal &Literal) { + Label next = a.newLabel(); + + mov_arg(ARG2, Literal); /* May clobber ARG1 */ + mov_arg(ARG1, Src); + + a.mov(RETd, ARG1d); + a.and_(RETb, imm(_TAG_IMMED1_MASK)); + a.cmp(RETb, imm(TAG_PRIMARY_IMMED1)); + a.short_().je(next); + + emit_enter_runtime(); + + runtime_call<2>(eq); + + emit_leave_runtime(); + + a.test(RET, RET); + a.jnz(labels[Fail.getValue()]); + + a.bind(next); +} + +void BeamGlobalAssembler::emit_arith_eq_shared() { + Label generic_compare = a.newLabel(); + + /* Are both floats? */ + a.mov(ARG3d, ARG1d); + a.or_(ARG3d, ARG2d); + a.and_(ARG3d, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_BOXED)); + a.short_().jne(generic_compare); + + x86::Gp boxed_ptr = emit_ptr_val(ARG3, ARG1); + a.mov(ARG3, emit_boxed_val(boxed_ptr)); + boxed_ptr = emit_ptr_val(ARG5, ARG2); + a.mov(ARG5, emit_boxed_val(boxed_ptr)); + a.and_(ARG3d, imm(_TAG_HEADER_MASK)); + a.and_(ARG5d, imm(_TAG_HEADER_MASK)); + a.sub(ARG3d, imm(_TAG_HEADER_FLOAT)); + a.sub(ARG5d, imm(_TAG_HEADER_FLOAT)); + a.or_(ARG3d, ARG5d); + a.short_().jne(generic_compare); + + boxed_ptr = emit_ptr_val(ARG1, ARG1); + a.movsd(x86::xmm0, emit_boxed_val(boxed_ptr, sizeof(Eterm))); + boxed_ptr = emit_ptr_val(ARG2, ARG2); + a.movsd(x86::xmm1, emit_boxed_val(boxed_ptr, sizeof(Eterm))); + + /* All float terms are finite so our caller only needs to check ZF. We don't + * need to check for errors (PF). */ + a.comisd(x86::xmm0, x86::xmm1); + + a.ret(); + + a.bind(generic_compare); + { + emit_enter_runtime(); + + /* Generic eq-only arithmetic comparison. */ + comment("erts_cmp_compound(X, Y, 0, 1);"); + mov_imm(ARG3, 0); + mov_imm(ARG4, 1); + runtime_call<4>(erts_cmp_compound); + + emit_leave_runtime(); + + a.test(RET, RET); + + a.ret(); + } +} + +void BeamModuleAssembler::emit_is_eq(const ArgVal &Fail, + const ArgVal &A, + const ArgVal &B) { + Label fail = labels[Fail.getValue()], next = a.newLabel(); + + mov_arg(ARG2, B); /* May clobber ARG1 */ + mov_arg(ARG1, A); + + a.cmp(ARG1, ARG2); + a.short_().je(next); + + /* We can skip deep comparisons when both args are immediates. */ + a.mov(RETd, ARG1d); + a.and_(RETd, ARG2d); + a.and_(RETb, imm(_TAG_PRIMARY_MASK)); + a.cmp(RETb, imm(TAG_PRIMARY_IMMED1)); + a.je(fail); + + safe_fragment_call(ga->get_arith_eq_shared()); + a.jne(fail); + a.bind(next); +} + +void BeamModuleAssembler::emit_is_ne(const ArgVal &Fail, + const ArgVal &A, + const ArgVal &B) { + Label fail = labels[Fail.getValue()], next = a.newLabel(); + + mov_arg(ARG2, B); /* May clobber ARG1 */ + mov_arg(ARG1, A); + + a.cmp(ARG1, ARG2); + a.je(fail); + + /* We can skip deep comparisons when both args are immediates. */ + a.mov(RETd, ARG1d); + a.and_(RETd, ARG2d); + a.and_(RETb, imm(_TAG_PRIMARY_MASK)); + a.cmp(RETb, imm(TAG_PRIMARY_IMMED1)); + a.short_().je(next); + + safe_fragment_call(ga->get_arith_eq_shared()); + a.je(fail); + a.bind(next); +} + +void BeamGlobalAssembler::emit_arith_compare_shared() { + Label atom_compare, generic_compare; + + atom_compare = a.newLabel(); + generic_compare = a.newLabel(); + + /* Are both floats? + * + * This is done first as relative comparisons on atoms doesn't make much + * sense. */ + a.mov(ARG3d, ARG1d); + a.or_(ARG3d, ARG2d); + a.and_(ARG3d, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_BOXED)); + a.short_().jne(atom_compare); + + x86::Gp boxed_ptr = emit_ptr_val(ARG3, ARG1); + a.mov(ARG3, emit_boxed_val(boxed_ptr)); + boxed_ptr = emit_ptr_val(ARG5, ARG2); + a.mov(ARG5, emit_boxed_val(boxed_ptr)); + a.and_(ARG3d, imm(_TAG_HEADER_MASK)); + a.and_(ARG5d, imm(_TAG_HEADER_MASK)); + a.sub(ARG3d, imm(_TAG_HEADER_FLOAT)); + a.sub(ARG5d, imm(_TAG_HEADER_FLOAT)); + a.or_(ARG3d, ARG5d); + + /* NOTE: Short won't reach if JIT_HARD_DEBUG is defined. */ + a.jne(generic_compare); + + boxed_ptr = emit_ptr_val(ARG1, ARG1); + a.movsd(x86::xmm0, emit_boxed_val(boxed_ptr, sizeof(Eterm))); + boxed_ptr = emit_ptr_val(ARG2, ARG2); + a.movsd(x86::xmm1, emit_boxed_val(boxed_ptr, sizeof(Eterm))); + a.comisd(x86::xmm0, x86::xmm1); + + /* `comisd` doesn't set the flags the same way `test` and friends do, so + * they need to be converted for jl/jge to work. */ + a.setae(x86::al); + a.dec(x86::al); + + a.ret(); + + a.bind(atom_compare); + { + /* Are both atoms? */ + a.mov(ARG3d, ARG1d); + a.mov(ARG5d, ARG2d); + a.and_(ARG3d, imm(_TAG_IMMED2_MASK)); + a.and_(ARG5d, imm(_TAG_IMMED2_MASK)); + a.sub(ARG3d, imm(_TAG_IMMED2_ATOM)); + a.sub(ARG5d, imm(_TAG_IMMED2_ATOM)); + a.or_(ARG3d, ARG5d); + a.jne(generic_compare); + + emit_enter_runtime(); + + runtime_call<2>(erts_cmp_atoms); + + emit_leave_runtime(); + + /* !! erts_cmp_atoms returns int, not Sint !! */ + a.test(RETd, RETd); + + a.ret(); + } + + a.bind(generic_compare); + { + emit_enter_runtime(); + + comment("erts_cmp_compound(X, Y, 0, 0);"); + mov_imm(ARG3, 0); + mov_imm(ARG4, 0); + runtime_call<4>(erts_cmp_compound); + + emit_leave_runtime(); + + a.test(RET, RET); + + a.ret(); + } +} + +void BeamModuleAssembler::emit_is_lt(const ArgVal &Fail, + const ArgVal &LHS, + const ArgVal &RHS) { + Label fail = labels[Fail.getValue()]; + Label generic = a.newLabel(), next = a.newLabel(); + + mov_arg(ARG2, RHS); /* May clobber ARG1 */ + mov_arg(ARG1, LHS); + + a.cmp(ARG1, ARG2); + a.je(fail); + + /* Relative comparisons are overwhelmingly likely to be used on smalls, so + * we'll specialize those and keep the rest in a shared fragment. */ + + if (RHS.isImmed() && is_small(RHS.getValue())) { + a.mov(RETd, ARG1d); + } else if (LHS.isImmed() && is_small(LHS.getValue())) { + a.mov(RETd, ARG2d); + } else { + a.mov(RETd, ARG1d); + a.and_(RETd, ARG2d); + } + + a.and_(RETb, imm(_TAG_IMMED1_MASK)); + a.cmp(RETb, imm(_TAG_IMMED1_SMALL)); + a.short_().jne(generic); + + a.cmp(ARG1, ARG2); + a.short_().jl(next); + a.jmp(fail); + + a.bind(generic); + { + safe_fragment_call(ga->get_arith_compare_shared()); + a.jge(fail); + } + + a.bind(next); +} + +void BeamModuleAssembler::emit_is_ge(const ArgVal &Fail, + const ArgVal &LHS, + const ArgVal &RHS) { + Label fail = labels[Fail.getValue()]; + Label generic = a.newLabel(), next = a.newLabel(); + + mov_arg(ARG2, RHS); /* May clobber ARG1 */ + mov_arg(ARG1, LHS); + + a.cmp(ARG1, ARG2); + a.short_().je(next); + + /* Relative comparisons are overwhelmingly likely to be used on smalls, so + * we'll specialize those and keep the rest in a shared fragment. */ + + if (RHS.isImmed() && is_small(RHS.getValue())) { + a.mov(RETd, ARG1d); + } else if (LHS.isImmed() && is_small(LHS.getValue())) { + a.mov(RETd, ARG2d); + } else { + a.mov(RETd, ARG1d); + a.and_(RETd, ARG2d); + } + + a.and_(RETb, imm(_TAG_IMMED1_MASK)); + a.cmp(RETb, imm(_TAG_IMMED1_SMALL)); + a.short_().jne(generic); + + a.cmp(ARG1, ARG2); + a.short_().jge(next); + a.jmp(fail); + + a.bind(generic); + { + safe_fragment_call(ga->get_arith_compare_shared()); + a.jl(fail); + } + + a.bind(next); +} + +void BeamModuleAssembler::emit_bif_is_eq_ne_exact_immed(const ArgVal &Src, + const ArgVal &Immed, + const ArgVal &Dst, + Eterm fail_value, + Eterm succ_value) { + cmp_arg(getArgRef(Src), Immed); + mov_imm(RET, fail_value); + mov_imm(ARG1, succ_value); + a.cmove(RET, ARG1); + mov_arg(Dst, RET); +} + +void BeamModuleAssembler::emit_bif_is_eq_exact_immed(const ArgVal &Src, + const ArgVal &Immed, + const ArgVal &Dst) { + emit_bif_is_eq_ne_exact_immed(Src, Immed, Dst, am_false, am_true); +} + +void BeamModuleAssembler::emit_bif_is_ne_exact_immed(const ArgVal &Src, + const ArgVal &Immed, + const ArgVal &Dst) { + emit_bif_is_eq_ne_exact_immed(Src, Immed, Dst, am_true, am_false); +} + +void BeamModuleAssembler::emit_badmatch(const ArgVal &Src) { + mov_arg(x86::qword_ptr(c_p, offsetof(Process, fvalue)), Src); + emit_error(BADMATCH); +} + +void BeamModuleAssembler::emit_case_end(const ArgVal &Src) { + mov_arg(x86::qword_ptr(c_p, offsetof(Process, fvalue)), Src); + emit_error(EXC_CASE_CLAUSE); +} + +void BeamModuleAssembler::emit_system_limit_body() { + emit_error(SYSTEM_LIMIT); +} + +void BeamModuleAssembler::emit_if_end() { + emit_error(EXC_IF_CLAUSE); +} + +void BeamModuleAssembler::emit_catch(const ArgVal &Y, const ArgVal &Fail) { + a.inc(x86::qword_ptr(c_p, offsetof(Process, catches))); + + Label patch_addr = a.newLabel(); + + /* + * Emit the following instruction: + * + * b8 ff ff ff 7f mov eax,0x7fffffff + * ^ + * | + * | + * offset to be patched + * with the tagged catch + */ + a.bind(patch_addr); + a.mov(RETd, imm(0x7fffffff)); + + mov_arg(Y, RET); + + /* Offset = 1 for `mov` payload */ + catches.push_back({{patch_addr, 0x1, 0}, labels[Fail.getValue()]}); +} + +void BeamGlobalAssembler::emit_catch_end_shared() { + Label not_throw = a.newLabel(), not_error = a.newLabel(), + after_gc = a.newLabel(); + + /* Load thrown value / reason into ARG2 for add_stacktrace */ + a.mov(ARG2, getXRef(2)); + a.mov(x86::qword_ptr(c_p, offsetof(Process, fvalue)), imm(NIL)); + + a.cmp(getXRef(1), imm(am_throw)); + a.short_().jne(not_throw); + + /* Thrown value, return it in x0 */ + a.mov(getXRef(0), ARG2); + + a.ret(); + + a.bind(not_throw); + { + a.cmp(getXRef(1), imm(am_error)); + /* NOTE: Short won't reach if JIT_HARD_DEBUG is defined. */ + a.jne(not_error); + + /* This is an error, attach a stacktrace to the reason. */ + emit_enter_runtime<Update::eStack | Update::eHeap>(); + + a.mov(ARG1, c_p); + /* ARG2 set above. */ + a.mov(ARG3, getXRef(3)); + runtime_call<3>(add_stacktrace); + + emit_leave_runtime<Update::eStack | Update::eHeap>(); + + /* not_error assumes stacktrace/reason is in ARG2 */ + a.mov(ARG2, RET); + } + + a.bind(not_error); + { + const int32_t bytes_needed = (3 + S_RESERVED) * sizeof(Eterm); + + a.lea(ARG3, x86::qword_ptr(HTOP, bytes_needed)); + a.cmp(ARG3, E); + a.short_().jbe(after_gc); + + /* Preserve stacktrace / reason */ + a.mov(getXRef(0), ARG2); + mov_imm(ARG4, 1); + aligned_call(labels[garbage_collect]); + a.mov(ARG2, getXRef(0)); + + a.bind(after_gc); + + a.mov(x86::qword_ptr(HTOP), imm(make_arityval(2))); + a.mov(x86::qword_ptr(HTOP, sizeof(Eterm) * 1), imm(am_EXIT)); + a.mov(x86::qword_ptr(HTOP, sizeof(Eterm) * 2), ARG2); + + a.lea(RET, x86::qword_ptr(HTOP, TAG_PRIMARY_BOXED)); + a.add(HTOP, imm(3 * sizeof(Eterm))); + + a.mov(getXRef(0), RET); + } + + a.ret(); +} + +void BeamModuleAssembler::emit_catch_end(const ArgVal &Y) { + Label next = a.newLabel(); + + emit_try_end(Y); + + a.cmp(getXRef(0), imm(THE_NON_VALUE)); + a.short_().jne(next); + fragment_call(ga->get_catch_end_shared()); + a.bind(next); +} + +void BeamModuleAssembler::emit_try_end(const ArgVal &Y) { + a.dec(x86::qword_ptr(c_p, offsetof(Process, catches))); + emit_init(Y); +} + +void BeamModuleAssembler::emit_try_case(const ArgVal &Y) { + a.dec(x86::qword_ptr(c_p, offsetof(Process, catches))); + mov_imm(RET, NIL); + mov_arg(Y, RET); + a.mov(x86::qword_ptr(c_p, offsetof(Process, fvalue)), RET); + a.movups(x86::xmm0, x86::xmmword_ptr(registers, 1 * sizeof(Eterm))); + a.mov(RET, getXRef(3)); + a.movups(x86::xmmword_ptr(registers, 0 * sizeof(Eterm)), x86::xmm0); + a.mov(getXRef(2), RET); +} + +void BeamModuleAssembler::emit_try_case_end(const ArgVal &Src) { + mov_arg(x86::qword_ptr(c_p, offsetof(Process, fvalue)), Src); + emit_error(EXC_TRY_CLAUSE); +} + +void BeamModuleAssembler::emit_raise(const ArgVal &Trace, const ArgVal &Value) { + mov_arg(ARG3, Value); + mov_arg(ARG2, Trace); + + /* This is an error, attach a stacktrace to the reason. */ + a.mov(x86::qword_ptr(c_p, offsetof(Process, fvalue)), ARG3); + a.mov(x86::qword_ptr(c_p, offsetof(Process, ftrace)), ARG2); + + emit_enter_runtime(); + + a.mov(ARG1, c_p); + runtime_call<2>(erts_sanitize_freason); + + emit_leave_runtime(); + + emit_handle_error(); +} + +void BeamModuleAssembler::emit_build_stacktrace() { + emit_enter_runtime<Update::eStack | Update::eHeap>(); + + a.mov(ARG1, c_p); + a.mov(ARG2, getXRef(0)); + runtime_call<2>(build_stacktrace); + + emit_leave_runtime<Update::eStack | Update::eHeap>(); + + a.mov(getXRef(0), RET); +} + +void BeamModuleAssembler::emit_raw_raise() { + Label next = a.newLabel(); + + emit_enter_runtime(); + + a.mov(ARG1, getXRef(2)); + a.mov(ARG2, getXRef(0)); + a.mov(ARG3, getXRef(1)); + a.mov(ARG4, c_p); + runtime_call<4>(raw_raise); + + emit_leave_runtime(); + + a.test(RET, RET); + a.short_().jne(next); + emit_handle_error(); + a.bind(next); + a.mov(getXRef(0), imm(am_badarg)); +} + +void BeamGlobalAssembler::emit_i_test_yield_shared() { + int mfa_offset = -(int)sizeof(ErtsCodeMFA) - BEAM_ASM_FUNC_PROLOGUE_SIZE; + + /* Yield address is in ARG3. */ + a.lea(ARG2, x86::qword_ptr(ARG3, mfa_offset)); + a.mov(x86::qword_ptr(c_p, offsetof(Process, current)), ARG2); + a.mov(ARG2, x86::qword_ptr(ARG2, offsetof(ErtsCodeMFA, arity))); + a.mov(x86::qword_ptr(c_p, offsetof(Process, arity)), ARG2); + + emit_discard_cp(); + + a.jmp(labels[context_switch_simplified]); +} + +void BeamModuleAssembler::emit_i_test_yield() { + Label next = a.newLabel(), entry = a.newLabel(); + + /* When present, this is guaranteed to be the first instruction after the + * breakpoint trampoline. */ + + ASSERT(a.offset() % 8 == 0); + a.bind(entry); + a.dec(FCALLS); + a.short_().jg(next); + a.lea(ARG3, x86::qword_ptr(entry)); + a.call(funcYield); + a.bind(next); +} + +void BeamModuleAssembler::emit_i_yield() { + a.mov(getXRef(0), imm(am_true)); +#ifdef NATIVE_ERLANG_STACK + fragment_call(ga->get_dispatch_return()); +#else + Label next = a.newLabel(); + + a.lea(ARG3, x86::qword_ptr(next)); + abs_jmp(ga->get_dispatch_return()); + + a.align(kAlignCode, 8); + a.bind(next); +#endif +} + +void BeamModuleAssembler::emit_i_perf_counter() { + Label next = a.newLabel(), small = a.newLabel(); + + emit_enter_runtime(); + +#ifdef WIN32 + /* Call the function pointer used by erts_sys_perf_counter */ + runtime_call<0>(erts_sys_time_data__.r.o.sys_hrtime); +#else + runtime_call<0>(erts_sys_time_data__.r.o.perf_counter); +#endif + + emit_leave_runtime(); + + a.mov(ARG1, RET); + a.sar(ARG1, imm(SMALL_BITS - 1)); + a.add(ARG1, 1); + a.cmp(ARG1, 1); + a.jbe(small); + + { + a.mov(TMP_MEM1q, RET); + + emit_gc_test(ArgVal(ArgVal::i, 0), + ArgVal(ArgVal::i, ERTS_MAX_UINT64_HEAP_SIZE), + ArgVal(ArgVal::i, 0)); + + a.mov(ARG1, TMP_MEM1q); + + a.mov(x86::qword_ptr(HTOP, sizeof(Eterm) * 0), + imm(make_pos_bignum_header(1))); + a.mov(x86::qword_ptr(HTOP, sizeof(Eterm) * 1), ARG1); + a.lea(RET, x86::qword_ptr(HTOP, TAG_PRIMARY_BOXED)); + a.add(HTOP, imm(sizeof(Eterm) * 2)); + a.short_().jmp(next); + } + + a.bind(small); + { + a.shl(RET, imm(_TAG_IMMED1_SIZE)); + a.or_(RET, imm(_TAG_IMMED1_SMALL)); + } + + a.bind(next); + a.mov(getXRef(0), RET); +} |