summaryrefslogtreecommitdiff
path: root/erts/emulator/beam/jit/x86/instr_common.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'erts/emulator/beam/jit/x86/instr_common.cpp')
-rw-r--r--erts/emulator/beam/jit/x86/instr_common.cpp1877
1 files changed, 1877 insertions, 0 deletions
diff --git a/erts/emulator/beam/jit/x86/instr_common.cpp b/erts/emulator/beam/jit/x86/instr_common.cpp
new file mode 100644
index 0000000000..4ca7a10629
--- /dev/null
+++ b/erts/emulator/beam/jit/x86/instr_common.cpp
@@ -0,0 +1,1877 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2020-2020. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Some notes on how to minimize the code size.
+ *
+ * Instructions that use 32-bit registers (e.g. eax) are generally
+ * one byte shorter than instructions that use 64-bits registers
+ * (e.g. rax). This does not apply to registers r8-r15 beacuse they'll
+ * always need a rex prefix. The `and`, `or`, and `cmp` instructions
+ * are even shorter than operating on the RETb (al) register. The
+ * `test` instruction with an immediate second operand is shorter
+ * when operating on an 8-bit register.
+ *
+ * On both Unix and Windows, instructions can be shortened by using
+ * RETd, ARG1d, or ARG2d instead of RET, ARG1, or ARG2, respectively.
+ * On Unix, but not on Windows, ARG3d and ARG4d will also result in
+ * shorter instructions.
+ *
+ * Here are some examples. If we know that the higher 32 bits of
+ * a register is uninteresting or should be zeroed, we can write:
+ *
+ * a.mov(RETd, ARG1d)
+ *
+ * (When writing to the lower 32 bits of a register, the high 32
+ * bits are zeroed.)
+ *
+ * Here is a tag test on the contents of ARG1:
+ *
+ * a.and_(ARG1d, 15)
+ * a.cmp(ARG1d, 15)
+ *
+ * The same tag test on RET can be even shorter if written like this:
+ *
+ * a.and_(RETb, 15)
+ * a.cmp(RETb, 15)
+ *
+ * An alignment test can be written like this (when unit <= 256):
+ *
+ * a.test(RETb, imm(unit - 1));
+ * a.test(ARG1.r8(), imm(unit -1));
+ *
+ * ASMJIT will automatically encode backward jumps (jumps to bound
+ * labels) in the shortest form possible. However, forward jumps
+ * (jumps to unbound labels) will by default be encoded in the long
+ * form (using a 32-bit relative address).
+ *
+ * Within a single BEAM instruction, a `short_()` prefix can be used
+ * to emit short forward jumps (using a signed byte as an offset,
+ * limiting the distance to about 128 bytes).
+ *
+ * Example:
+ *
+ * a.short_().je(next);
+ * .
+ * .
+ * .
+ * a.bind(next);
+ */
+
+#include <algorithm>
+#include "beam_asm.hpp"
+
+extern "C"
+{
+#include "erl_bif_table.h"
+#include "big.h"
+#include "beam_catches.h"
+#include "beam_common.h"
+#include "code_ix.h"
+}
+
+using namespace asmjit;
+
+/* Helpers */
+
+void BeamModuleAssembler::emit_error(int reason) {
+ a.mov(x86::qword_ptr(c_p, offsetof(Process, freason)), imm(reason));
+ emit_handle_error();
+}
+
+void BeamModuleAssembler::emit_gc_test_preserve(const ArgVal &Need,
+ const ArgVal &Live,
+ x86::Gp term) {
+ const int32_t bytes_needed = (Need.getValue() + S_RESERVED) * sizeof(Eterm);
+ Label after_gc_check = a.newLabel();
+
+ ASSERT(term != ARG3);
+
+ a.lea(ARG3, x86::qword_ptr(HTOP, bytes_needed));
+ a.cmp(ARG3, E);
+ a.short_().jbe(after_gc_check);
+
+ a.mov(getXRef(Live.getValue()), term);
+ mov_imm(ARG4, Live.getValue() + 1);
+ fragment_call(ga->get_garbage_collect());
+ a.mov(term, getXRef(Live.getValue()));
+
+ a.bind(after_gc_check);
+}
+
+void BeamModuleAssembler::emit_gc_test(const ArgVal &Ns,
+ const ArgVal &Nh,
+ const ArgVal &Live) {
+ const int32_t bytes_needed =
+ (Ns.getValue() + Nh.getValue() + S_RESERVED) * sizeof(Eterm);
+ Label after_gc_check = a.newLabel();
+
+ a.lea(ARG3, x86::qword_ptr(HTOP, bytes_needed));
+ a.cmp(ARG3, E);
+ a.short_().jbe(after_gc_check);
+
+ mov_imm(ARG4, Live.getValue());
+
+ fragment_call(ga->get_garbage_collect());
+ a.bind(after_gc_check);
+}
+
+void BeamModuleAssembler::emit_validate(const ArgVal &arity) {
+#ifdef DEBUG
+ Label next = a.newLabel(), crash = a.newLabel();
+
+ /* Crash if the Erlang heap is not word-aligned */
+ a.test(HTOP, imm(sizeof(Eterm) - 1));
+ a.jne(crash);
+
+ /* Crash if the Erlang stack is not word-aligned */
+ a.test(E, imm(sizeof(Eterm) - 1));
+ a.jne(crash);
+
+ /* Crash if we've overrun the stack */
+ a.lea(ARG1, x86::qword_ptr(E, -(int32_t)(S_REDZONE * sizeof(Eterm))));
+ a.cmp(HTOP, ARG1);
+ a.ja(crash);
+
+ a.jmp(next);
+ a.bind(crash);
+ a.hlt();
+ a.bind(next);
+
+# ifdef JIT_HARD_DEBUG
+ emit_enter_runtime();
+
+ for (unsigned i = 0; i < arity.getValue(); i++) {
+ a.mov(ARG1, getXRef(i));
+ runtime_call<1>(beam_jit_validate_term);
+ }
+
+ emit_leave_runtime();
+# endif
+
+#endif
+}
+
+/* Instrs */
+
+void BeamModuleAssembler::emit_i_validate(const ArgVal &Arity) {
+ emit_validate(Arity);
+}
+
+void BeamModuleAssembler::emit_allocate_heap(const ArgVal &NeedStack,
+ const ArgVal &NeedHeap,
+ const ArgVal &Live) {
+ ASSERT(NeedStack.getType() == ArgVal::TYPE::u);
+ ASSERT(NeedStack.getValue() <= MAX_REG);
+ ArgVal needed = NeedStack;
+
+#if !defined(NATIVE_ERLANG_STACK)
+ needed = needed + CP_SIZE;
+#endif
+
+ emit_gc_test(needed, NeedHeap, Live);
+
+ if (needed.getValue() > 0) {
+ a.sub(E, imm(needed.getValue() * sizeof(Eterm)));
+ }
+#if !defined(NATIVE_ERLANG_STACK)
+ a.mov(getCPRef(), imm(NIL));
+#endif
+}
+
+void BeamModuleAssembler::emit_allocate(const ArgVal &NeedStack,
+ const ArgVal &Live) {
+ emit_allocate_heap(NeedStack, ArgVal(ArgVal::TYPE::u, 0), Live);
+}
+
+void BeamModuleAssembler::emit_deallocate(const ArgVal &Deallocate) {
+ ASSERT(Deallocate.getType() == ArgVal::TYPE::u);
+ ASSERT(Deallocate.getValue() <= 1023);
+ ArgVal dealloc = Deallocate;
+
+#if !defined(NATIVE_ERLANG_STACK)
+ dealloc = dealloc + CP_SIZE;
+#endif
+
+ if (dealloc.getValue() > 0) {
+ a.add(E, imm(dealloc.getValue() * sizeof(Eterm)));
+ }
+}
+
+void BeamModuleAssembler::emit_test_heap(const ArgVal &Nh, const ArgVal &Live) {
+ emit_gc_test(ArgVal(ArgVal::u, 0), Nh, Live);
+}
+
+void BeamModuleAssembler::emit_normal_exit() {
+ /* This is implictly global; it does not normally appear in modules and
+ * doesn't require size optimization. */
+
+ emit_enter_runtime<Update::eReductions | Update::eStack | Update::eHeap>();
+ emit_proc_lc_unrequire();
+
+ a.mov(x86::qword_ptr(c_p, offsetof(Process, freason)), imm(EXC_NORMAL));
+ a.mov(x86::qword_ptr(c_p, offsetof(Process, arity)), imm(0));
+ a.mov(ARG1, c_p);
+ mov_imm(ARG2, am_normal);
+ runtime_call<2>(erts_do_exit_process);
+
+ emit_proc_lc_require();
+ emit_leave_runtime<Update::eReductions | Update::eStack | Update::eHeap>();
+
+ abs_jmp(ga->get_do_schedule());
+}
+
+void BeamModuleAssembler::emit_continue_exit() {
+ /* This is implictly global; it does not normally appear in modules and
+ * doesn't require size optimization. */
+
+ emit_enter_runtime<Update::eReductions | Update::eStack | Update::eHeap>();
+ emit_proc_lc_unrequire();
+
+ a.mov(ARG1, c_p);
+ runtime_call<1>(erts_continue_exit_process);
+
+ emit_proc_lc_require();
+ emit_leave_runtime<Update::eReductions | Update::eStack | Update::eHeap>();
+
+ abs_jmp(ga->get_do_schedule());
+}
+
+/* This is an alias for handle_error */
+void BeamModuleAssembler::emit_error_action_code() {
+ abs_jmp(ga->get_error_action_code());
+}
+
+/* Psuedo-instruction for signalling lambda load errors. Never actually runs. */
+void BeamModuleAssembler::emit_i_lambda_error(const ArgVal &Dummy) {
+ a.hlt();
+}
+
+void BeamModuleAssembler::emit_i_make_fun3(const ArgVal &Fun,
+ const ArgVal &Dst,
+ const ArgVal &NumFree,
+ const std::vector<ArgVal> &env) {
+ size_t num_free = env.size();
+ ASSERT(NumFree.getValue() == num_free);
+
+ mov_arg(ARG3, NumFree);
+
+ emit_enter_runtime<Update::eHeap>();
+
+ a.mov(ARG1, c_p);
+ make_move_patch(ARG2, lambdas[Fun.getValue()].patches);
+ runtime_call<3>(new_fun_thing);
+
+ emit_leave_runtime<Update::eHeap>();
+
+ comment("Move fun environment");
+ for (unsigned i = 0; i < num_free; i++) {
+ mov_arg(x86::qword_ptr(RET,
+ offsetof(ErlFunThing, env) + i * sizeof(Eterm)),
+ env[i]);
+ }
+
+ comment("Create boxed ptr");
+ a.or_(RETb, TAG_PRIMARY_BOXED);
+ mov_arg(Dst, RET);
+}
+
+void BeamModuleAssembler::emit_get_list(const x86::Gp src,
+ const ArgVal &Hd,
+ const ArgVal &Tl) {
+ x86::Gp boxed_ptr = emit_ptr_val(src, src);
+
+ switch (ArgVal::register_relation(Hd, Tl)) {
+ case ArgVal::Relation::consecutive: {
+ comment("(moving head and tail together)");
+ x86::Mem dst_ptr = getArgRef(Hd, 16);
+ x86::Mem src_ptr = getCARRef(boxed_ptr, 16);
+ a.movups(x86::xmm0, src_ptr);
+ a.movups(dst_ptr, x86::xmm0);
+ break;
+ }
+ case ArgVal::Relation::reverse_consecutive: {
+ if (!hasCpuFeature(x86::Features::kAVX)) {
+ goto fallback;
+ }
+
+ comment("(moving and swapping head and tail together)");
+ x86::Mem dst_ptr = getArgRef(Tl, 16);
+ x86::Mem src_ptr = getCARRef(boxed_ptr, 16);
+ a.vpermilpd(x86::xmm0, src_ptr, 1); /* Load and swap */
+ a.vmovups(dst_ptr, x86::xmm0);
+ break;
+ }
+ case ArgVal::Relation::none:
+ fallback:
+ a.mov(ARG2, getCARRef(boxed_ptr));
+ a.mov(ARG3, getCDRRef(boxed_ptr));
+ mov_arg(Hd, ARG2);
+ mov_arg(Tl, ARG3);
+ break;
+ }
+}
+
+void BeamModuleAssembler::emit_get_list(const ArgVal &Src,
+ const ArgVal &Hd,
+ const ArgVal &Tl) {
+ mov_arg(ARG1, Src);
+ emit_get_list(ARG1, Hd, Tl);
+}
+
+void BeamModuleAssembler::emit_get_hd(const ArgVal &Src, const ArgVal &Hd) {
+ mov_arg(ARG1, Src);
+
+ x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1);
+
+ a.mov(ARG2, getCARRef(boxed_ptr));
+
+ mov_arg(Hd, ARG2);
+}
+
+void BeamModuleAssembler::emit_get_tl(const ArgVal &Src, const ArgVal &Tl) {
+ mov_arg(ARG1, Src);
+
+ x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1);
+
+ a.mov(ARG2, getCDRRef(boxed_ptr));
+
+ mov_arg(Tl, ARG2);
+}
+
+void BeamModuleAssembler::emit_is_nonempty_list_get_list(const ArgVal &Fail,
+ const ArgVal &Src,
+ const ArgVal &Hd,
+ const ArgVal &Tl) {
+ mov_arg(RET, Src);
+ a.test(RETb, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_LIST));
+ a.jne(labels[Fail.getValue()]);
+ emit_get_list(RET, Hd, Tl);
+}
+
+void BeamModuleAssembler::emit_is_nonempty_list_get_hd(const ArgVal &Fail,
+ const ArgVal &Src,
+ const ArgVal &Hd) {
+ mov_arg(RET, Src);
+ a.test(RETb, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_LIST));
+ a.jne(labels[Fail.getValue()]);
+
+ x86::Gp boxed_ptr = emit_ptr_val(RET, RET);
+
+ a.mov(ARG2, getCARRef(boxed_ptr));
+
+ mov_arg(Hd, ARG2);
+}
+
+void BeamModuleAssembler::emit_is_nonempty_list_get_tl(const ArgVal &Fail,
+ const ArgVal &Src,
+ const ArgVal &Tl) {
+ mov_arg(RET, Src);
+ a.test(RETb, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_LIST));
+ a.jne(labels[Fail.getValue()]);
+
+ x86::Gp boxed_ptr = emit_ptr_val(RET, RET);
+
+ a.mov(ARG2, getCDRRef(boxed_ptr));
+
+ mov_arg(Tl, ARG2);
+}
+
+void BeamModuleAssembler::emit_i_get(const ArgVal &Src, const ArgVal &Dst) {
+ mov_arg(ARG2, Src);
+
+ emit_enter_runtime();
+
+ a.mov(ARG1, c_p);
+ runtime_call<2>(erts_pd_hash_get);
+
+ emit_leave_runtime();
+
+ mov_arg(Dst, RET);
+}
+
+void BeamModuleAssembler::emit_i_get_hash(const ArgVal &Src,
+ const ArgVal &Hash,
+ const ArgVal &Dst) {
+ mov_arg(ARG2, Hash);
+ mov_arg(ARG3, Src);
+
+ emit_enter_runtime();
+
+ a.mov(ARG1, c_p);
+ runtime_call<3>(erts_pd_hash_get_with_hx);
+
+ emit_leave_runtime();
+
+ mov_arg(Dst, RET);
+}
+
+/* Store the pointer to a tuple in ARG2. Remove any LITERAL_PTR tag. */
+void BeamModuleAssembler::emit_load_tuple_ptr(const ArgVal &Term) {
+ mov_arg(ARG2, Term);
+ (void)emit_ptr_val(ARG2, ARG2);
+}
+
+#ifdef DEBUG
+/* Emit an assertion to ensure that tuple_reg points into the same
+ * tuple as Src. */
+void BeamModuleAssembler::emit_tuple_assertion(const ArgVal &Src,
+ x86::Gp tuple_reg) {
+ Label ok = a.newLabel(), fatal = a.newLabel();
+ ASSERT(tuple_reg != RET);
+ mov_arg(RET, Src);
+ emit_is_boxed(fatal, RET, dShort);
+ (void)emit_ptr_val(RET, RET);
+ a.cmp(RET, tuple_reg);
+ a.short_().je(ok);
+
+ a.bind(fatal);
+ { a.ud2(); }
+ a.bind(ok);
+}
+#endif
+
+/* Fetch an element from the tuple pointed to by the boxed pointer
+ * in ARG2. */
+void BeamModuleAssembler::emit_i_get_tuple_element(const ArgVal &Src,
+ const ArgVal &Element,
+ const ArgVal &Dst) {
+#ifdef DEBUG
+ emit_tuple_assertion(Src, ARG2);
+#endif
+
+ a.mov(ARG1, emit_boxed_val(ARG2, Element.getValue()));
+ mov_arg(Dst, ARG1);
+}
+
+/* Fetch two consecutive tuple elements from the tuple pointed to by
+ * the boxed pointer in ARG2. */
+void BeamModuleAssembler::emit_get_two_tuple_elements(const ArgVal &Src,
+ const ArgVal &Element,
+ const ArgVal &Dst1,
+ const ArgVal &Dst2) {
+#ifdef DEBUG
+ emit_tuple_assertion(Src, ARG2);
+#endif
+
+ x86::Mem element_ptr =
+ emit_boxed_val(ARG2, Element.getValue(), 2 * sizeof(Eterm));
+
+ switch (ArgVal::register_relation(Dst1, Dst2)) {
+ case ArgVal::Relation::consecutive: {
+ x86::Mem dst_ptr = getArgRef(Dst1, 16);
+ a.movups(x86::xmm0, element_ptr);
+ a.movups(dst_ptr, x86::xmm0);
+ break;
+ }
+ case ArgVal::Relation::reverse_consecutive: {
+ if (!hasCpuFeature(x86::Features::kAVX)) {
+ goto fallback;
+ } else {
+ x86::Mem dst_ptr = getArgRef(Dst2, 16);
+ a.vpermilpd(x86::xmm0, element_ptr, 1); /* Load and swap */
+ a.vmovups(dst_ptr, x86::xmm0);
+ break;
+ }
+ }
+ case ArgVal::Relation::none:
+ fallback:
+ a.mov(ARG1, emit_boxed_val(ARG2, Element.getValue()));
+ a.mov(ARG3, emit_boxed_val(ARG2, (Element + sizeof(Eterm)).getValue()));
+ mov_arg(Dst1, ARG1);
+ mov_arg(Dst2, ARG3);
+ break;
+ }
+}
+
+void BeamModuleAssembler::emit_init(const ArgVal &Y) {
+ mov_arg(Y, NIL);
+}
+
+void BeamModuleAssembler::emit_init_yregs(const ArgVal &Size,
+ const std::vector<ArgVal> &args) {
+ unsigned count = Size.getValue();
+ ASSERT(count == args.size());
+
+ if (count == 1) {
+ mov_arg(args.front(), NIL);
+ return;
+ }
+
+ /* There at least two slots. */
+ unsigned i = 0;
+ int y_ptr = -1;
+
+ mov_imm(x86::rax, NIL);
+
+ while (i < count) {
+ unsigned slots = 1;
+ unsigned first_y = args.at(i).getValue();
+
+ while (i + slots < count) {
+ ArgVal current_y = args.at(i + slots);
+ if (first_y + slots != current_y.getValue()) {
+ break;
+ }
+ slots++;
+ }
+
+ /*
+ * Now first_y is the number of the first y register to be initialized
+ * and slots is the number of y registers to be initialized.
+ */
+
+ if (slots == 1) {
+ a.mov(getYRef(first_y), x86::rax);
+ } else {
+ /*
+ * There are at least two consecutive y registers to be initialized.
+ * Use `stosq` with or without `rep`.
+ */
+ if (first_y == 0) {
+#ifdef NATIVE_ERLANG_STACK
+ /* `mov` is two bytes shorter than `lea`. */
+ a.mov(x86::rdi, E);
+#else
+ /* y(0) is at E+8. Must use `lea` here. */
+ a.lea(x86::rdi, getYRef(0));
+#endif
+ y_ptr = 0;
+ } else if (y_ptr < 0) {
+ /* Initialize rdi for the first time. */
+ y_ptr = first_y;
+ a.lea(x86::rdi, getYRef(y_ptr));
+ } else {
+ /* Update rdi using `add`. This is one byte shorter than using
+ * `lea`. */
+ unsigned offset = (first_y - y_ptr) * sizeof(Eterm);
+ a.add(x86::rdi, imm(offset));
+ y_ptr = first_y;
+ }
+
+ if (slots <= 4) {
+ /* Slightly more compact than `rep stosq`. */
+ for (unsigned j = 0; j < slots; j++) {
+ a.stosq();
+ }
+ } else {
+ mov_imm(x86::rcx, slots);
+ a.rep().stosq();
+ }
+
+ /* Update y_ptr to account for the incrementing done by `stosq`. */
+ y_ptr += slots;
+ }
+
+ i += slots;
+ }
+}
+
+void BeamModuleAssembler::emit_i_trim(const ArgVal &Words) {
+ ASSERT(Words.getType() == ArgVal::TYPE::u);
+ ASSERT(Words.getValue() <= 1023);
+
+ if (Words.getValue() > 0) {
+ a.add(E, imm(Words.getValue() * sizeof(Eterm)));
+ }
+}
+
+void BeamModuleAssembler::emit_i_move(const ArgVal &Src, const ArgVal &Dst) {
+ mov_arg(Dst, Src);
+}
+
+/* Move two words at consecutive addresses to consecutive or reverse
+ * consecutive destinations. */
+void BeamModuleAssembler::emit_move_two_words(const ArgVal &Src1,
+ const ArgVal &Dst1,
+ const ArgVal &Src2,
+ const ArgVal &Dst2) {
+ x86::Mem src_ptr = getArgRef(Src1, 16);
+
+ ASSERT(ArgVal::register_relation(Src1, Src2) ==
+ ArgVal::Relation::consecutive);
+
+ switch (ArgVal::register_relation(Dst1, Dst2)) {
+ case ArgVal::Relation::consecutive: {
+ x86::Mem dst_ptr = getArgRef(Dst1, 16);
+ a.movups(x86::xmm0, src_ptr);
+ a.movups(dst_ptr, x86::xmm0);
+ break;
+ }
+ case ArgVal::Relation::reverse_consecutive: {
+ x86::Mem dst_ptr = getArgRef(Dst2, 16);
+ comment("(moving and swapping)");
+ if (hasCpuFeature(x86::Features::kAVX)) {
+ a.vpermilpd(x86::xmm0, src_ptr, 1); /* Load and swap */
+ a.vmovups(dst_ptr, x86::xmm0);
+ } else {
+ mov_arg(ARG1, Src1);
+ mov_arg(ARG2, Src2);
+ mov_arg(Dst1, ARG1);
+ mov_arg(Dst2, ARG2);
+ }
+ break;
+ }
+ case ArgVal::Relation::none:
+ ASSERT(0);
+ break;
+ }
+}
+
+void BeamModuleAssembler::emit_swap(const ArgVal &R1, const ArgVal &R2) {
+ if (!hasCpuFeature(x86::Features::kAVX)) {
+ goto fallback;
+ }
+
+ switch (ArgVal::register_relation(R1, R2)) {
+ case ArgVal::Relation::consecutive: {
+ x86::Mem ptr = getArgRef(R1, 16);
+ comment("(swapping using AVX)");
+ a.vpermilpd(x86::xmm0, ptr, 1); /* Load and swap */
+ a.vmovups(ptr, x86::xmm0);
+ break;
+ }
+ case ArgVal::Relation::reverse_consecutive: {
+ x86::Mem ptr = getArgRef(R2, 16);
+ comment("(swapping using AVX)");
+ a.vpermilpd(x86::xmm0, ptr, 1); /* Load and swap */
+ a.vmovups(ptr, x86::xmm0);
+ break;
+ }
+ case ArgVal::Relation::none:
+ fallback:
+ mov_arg(ARG1, R1);
+ mov_arg(ARG2, R2);
+ mov_arg(R2, ARG1);
+ mov_arg(R1, ARG2);
+ break;
+ }
+}
+
+void BeamModuleAssembler::emit_node(const ArgVal &Dst) {
+ a.mov(ARG1, imm(&erts_this_node));
+ a.mov(ARG1, x86::qword_ptr(ARG1));
+ a.mov(ARG1, x86::qword_ptr(ARG1, offsetof(ErlNode, sysname)));
+ mov_arg(Dst, ARG1);
+}
+
+void BeamModuleAssembler::emit_put_cons(const ArgVal &Hd, const ArgVal &Tl) {
+ switch (ArgVal::register_relation(Hd, Tl)) {
+ case ArgVal::Relation::consecutive: {
+ x86::Mem src_ptr = getArgRef(Hd, 16);
+ x86::Mem dst_ptr = x86::xmmword_ptr(HTOP, 0);
+ comment("(put head and tail together)");
+ a.movups(x86::xmm0, src_ptr);
+ a.movups(dst_ptr, x86::xmm0);
+ break;
+ }
+ case ArgVal::Relation::reverse_consecutive: {
+ if (!hasCpuFeature(x86::Features::kAVX)) {
+ goto fallback;
+ }
+
+ x86::Mem src_ptr = getArgRef(Tl, 16);
+ x86::Mem dst_ptr = x86::xmmword_ptr(HTOP, 0);
+ comment("(putting and swapping head and tail together)");
+ a.vpermilpd(x86::xmm0, src_ptr, 1); /* Load and swap */
+ a.vmovups(dst_ptr, x86::xmm0);
+ break;
+ }
+ case ArgVal::Relation::none:
+ fallback:
+ mov_arg(x86::qword_ptr(HTOP, 0), Hd);
+ mov_arg(x86::qword_ptr(HTOP, 1 * sizeof(Eterm)), Tl);
+ break;
+ }
+ a.lea(ARG2, x86::qword_ptr(HTOP, TAG_PRIMARY_LIST));
+}
+
+void BeamModuleAssembler::emit_append_cons(const ArgVal &index,
+ const ArgVal &Hd) {
+ size_t offset = 2 * index.getValue() * sizeof(Eterm);
+ mov_arg(x86::qword_ptr(HTOP, offset), Hd);
+ a.mov(x86::qword_ptr(HTOP, offset + sizeof(Eterm)), ARG2);
+ a.lea(ARG2, x86::qword_ptr(HTOP, offset + TAG_PRIMARY_LIST));
+}
+
+void BeamModuleAssembler::emit_store_cons(const ArgVal &len,
+ const ArgVal &Dst) {
+ a.add(HTOP, imm(len.getValue() * 2 * sizeof(Eterm)));
+ mov_arg(Dst, ARG2);
+}
+
+void BeamModuleAssembler::emit_put_tuple2(const ArgVal &Dst,
+ const ArgVal &Arity,
+ const std::vector<ArgVal> &args) {
+ size_t size = args.size();
+ ASSERT(arityval(Arity.getValue()) == size);
+
+ comment("Move arity word");
+ mov_arg(x86::qword_ptr(HTOP, 0), Arity);
+
+ comment("Move tuple data");
+ for (unsigned i = 0; i < size; i++) {
+ x86::Mem dst_ptr = x86::qword_ptr(HTOP, (i + 1) * sizeof(Eterm));
+
+ if (i + 1 == size) {
+ mov_arg(dst_ptr, args[i]);
+ } else {
+ switch (ArgVal::register_relation(args[i], args[i + 1])) {
+ case ArgVal::consecutive: {
+ x86::Mem src_ptr = getArgRef(args[i], 16);
+
+ comment("(moving two elements at once)");
+ dst_ptr.setSize(16);
+ a.movups(x86::xmm0, src_ptr);
+ a.movups(dst_ptr, x86::xmm0);
+ i++;
+ break;
+ }
+ case ArgVal::reverse_consecutive: {
+ if (!hasCpuFeature(x86::Features::kAVX)) {
+ mov_arg(dst_ptr, args[i]);
+ } else {
+ x86::Mem src_ptr = getArgRef(args[i + 1], 16);
+
+ comment("(moving and swapping two elements at once)");
+ dst_ptr.setSize(16);
+ a.vpermilpd(x86::xmm0, src_ptr, 1); /* Load and swap */
+ a.vmovups(dst_ptr, x86::xmm0);
+ i++;
+ }
+ break;
+ }
+ case ArgVal::none:
+ mov_arg(dst_ptr, args[i]);
+ break;
+ }
+ }
+ }
+
+ comment("Create boxed ptr");
+ a.lea(ARG1, x86::qword_ptr(HTOP, TAG_PRIMARY_BOXED));
+ a.add(HTOP, imm((size + 1) * sizeof(Eterm)));
+
+ mov_arg(Dst, ARG1);
+}
+
+void BeamModuleAssembler::emit_self(const ArgVal &Dst) {
+ a.mov(ARG1, x86::qword_ptr(c_p, offsetof(Process, common.id)));
+
+ mov_arg(Dst, ARG1);
+}
+
+void BeamModuleAssembler::emit_set_tuple_element(const ArgVal &Element,
+ const ArgVal &Tuple,
+ const ArgVal &Offset) {
+ mov_arg(ARG1, Tuple);
+
+ x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1);
+ mov_arg(emit_boxed_val(boxed_ptr, Offset.getValue()), Element, ARG2);
+}
+
+void BeamModuleAssembler::emit_is_nonempty_list(const ArgVal &Fail,
+ const ArgVal &Src) {
+ x86::Mem list_ptr = getArgRef(Src, 1);
+
+ a.test(list_ptr, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_LIST));
+ a.jne(labels[Fail.getValue()]);
+}
+
+void BeamModuleAssembler::emit_jump(const ArgVal &Fail) {
+ a.jmp(labels[Fail.getValue()]);
+}
+
+void BeamModuleAssembler::emit_is_atom(const ArgVal &Fail, const ArgVal &Src) {
+ mov_arg(RET, Src);
+ ERTS_CT_ASSERT(_TAG_IMMED2_MASK < 256);
+ a.and_(RETb, imm(_TAG_IMMED2_MASK));
+ a.cmp(RETb, imm(_TAG_IMMED2_ATOM));
+ a.jne(labels[Fail.getValue()]);
+}
+
+void BeamModuleAssembler::emit_is_boolean(const ArgVal &Fail,
+ const ArgVal &Src) {
+ /* Since am_true and am_false differ by a single bit, we can simplify the
+ * check by clearing said bit and comparing against the lesser one. */
+ ERTS_CT_ASSERT(am_false == make_atom(0));
+ ERTS_CT_ASSERT(am_true == make_atom(1));
+
+ mov_arg(ARG1, Src);
+
+ a.and_(ARG1, imm(~(am_true & ~_TAG_IMMED1_MASK)));
+ a.cmp(ARG1, imm(am_false));
+ a.jne(labels[Fail.getValue()]);
+}
+
+void BeamModuleAssembler::emit_is_binary(Label fail,
+ x86::Gp src,
+ Label next,
+ Label subbin) {
+ ASSERT(src != RET && src != ARG2);
+
+ emit_is_boxed(fail, src);
+
+ x86::Gp boxed_ptr = emit_ptr_val(src, src);
+ a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32)));
+
+ a.and_(RETb, imm(_TAG_HEADER_MASK));
+ a.cmp(RETb, imm(_TAG_HEADER_SUB_BIN));
+ a.short_().je(subbin);
+ ERTS_CT_ASSERT(_TAG_HEADER_REFC_BIN + 4 == _TAG_HEADER_HEAP_BIN);
+ a.and_(RETb, imm(~4));
+ a.cmp(RETb, imm(_TAG_HEADER_REFC_BIN));
+ a.short_().je(next);
+ a.jmp(fail);
+}
+
+void BeamModuleAssembler::emit_is_binary(const ArgVal &Fail,
+ const ArgVal &Src) {
+ Label next = a.newLabel(), subbin = a.newLabel();
+
+ mov_arg(ARG1, Src);
+
+ emit_is_binary(labels[Fail.getValue()], ARG1, next, subbin);
+
+ a.bind(subbin);
+ {
+ /* emit_is_binary has already removed the literal tag from Src, if
+ * applicable. */
+ a.cmp(emit_boxed_val(ARG1, offsetof(ErlSubBin, bitsize), sizeof(byte)),
+ imm(0));
+ a.jne(labels[Fail.getValue()]);
+ }
+
+ a.bind(next);
+}
+
+void BeamModuleAssembler::emit_is_bitstring(const ArgVal &Fail,
+ const ArgVal &Src) {
+ Label next = a.newLabel();
+
+ mov_arg(ARG1, Src);
+
+ emit_is_binary(labels[Fail.getValue()], ARG1, next, next);
+
+ a.bind(next);
+}
+
+void BeamModuleAssembler::emit_is_float(const ArgVal &Fail, const ArgVal &Src) {
+ mov_arg(ARG1, Src);
+
+ emit_is_boxed(labels[Fail.getValue()], ARG1);
+
+ x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1);
+ a.cmp(emit_boxed_val(boxed_ptr), imm(HEADER_FLONUM));
+ a.jne(labels[Fail.getValue()]);
+}
+
+void BeamModuleAssembler::emit_is_function(const ArgVal &Fail,
+ const ArgVal &Src) {
+ Label next = a.newLabel();
+
+ mov_arg(RET, Src);
+
+ emit_is_boxed(labels[Fail.getValue()], RET);
+
+ x86::Gp boxed_ptr = emit_ptr_val(RET, RET);
+ a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32)));
+ a.cmp(RET, imm(HEADER_FUN));
+ a.short_().je(next);
+ ERTS_CT_ASSERT(HEADER_EXPORT < 256);
+ a.cmp(RETb, imm(HEADER_EXPORT));
+ a.jne(labels[Fail.getValue()]);
+
+ a.bind(next);
+}
+
+void BeamModuleAssembler::emit_is_function2(const ArgVal &Fail,
+ const ArgVal &Src,
+ const ArgVal &Arity) {
+ if (Arity.getType() != ArgVal::i) {
+ /*
+ * Non-literal arity - extremely uncommon. Generate simple code.
+ */
+ mov_arg(ARG2, Src);
+ mov_arg(ARG3, Arity);
+
+ emit_enter_runtime();
+
+ a.mov(ARG1, c_p);
+ runtime_call<3>(erl_is_function);
+
+ emit_leave_runtime();
+
+ a.cmp(RET, imm(am_true));
+ a.jne(labels[Fail.getValue()]);
+ return;
+ }
+
+ unsigned arity = unsigned_val(Arity.getValue());
+ if (arity > MAX_ARG) {
+ /* Arity is negative or too large. */
+ a.jmp(labels[Fail.getValue()]);
+ return;
+ }
+
+ Label next = a.newLabel(), fun = a.newLabel();
+
+ mov_arg(ARG1, Src);
+
+ emit_is_boxed(labels[Fail.getValue()], ARG1);
+
+ x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1);
+ a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32)));
+ a.cmp(RETd, imm(HEADER_FUN));
+ a.short_().je(fun);
+ ERTS_CT_ASSERT(HEADER_EXPORT < 256);
+ a.cmp(RETb, imm(HEADER_EXPORT));
+ a.jne(labels[Fail.getValue()]);
+
+ comment("Check arity of export fun");
+ a.mov(ARG2, emit_boxed_val(boxed_ptr, sizeof(Eterm)));
+ a.cmp(x86::qword_ptr(ARG2, offsetof(Export, info.mfa.arity)), imm(arity));
+ a.jne(labels[Fail.getValue()]);
+ a.short_().jmp(next);
+
+ comment("Check arity of fun");
+ a.bind(fun);
+ {
+ a.cmp(emit_boxed_val(boxed_ptr, offsetof(ErlFunThing, arity)),
+ imm(arity));
+ a.jne(labels[Fail.getValue()]);
+ }
+
+ a.bind(next);
+}
+
+void BeamModuleAssembler::emit_is_integer(const ArgVal &Fail,
+ const ArgVal &Src) {
+ Label next = a.newLabel();
+ Label fail = labels[Fail.getValue()];
+
+ mov_arg(ARG1, Src);
+
+ a.mov(RETd, ARG1d);
+ a.and_(RETb, imm(_TAG_IMMED1_MASK));
+ a.cmp(RETb, imm(_TAG_IMMED1_SMALL));
+ a.short_().je(next);
+
+ emit_is_boxed(fail, RET);
+
+ x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1);
+ a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32)));
+
+ a.and_(RETb, imm(_TAG_HEADER_MASK - _BIG_SIGN_BIT));
+ a.cmp(RETb, imm(_TAG_HEADER_POS_BIG));
+ a.jne(fail);
+
+ a.bind(next);
+}
+
+void BeamModuleAssembler::emit_is_list(const ArgVal &Fail, const ArgVal &Src) {
+ Label next = a.newLabel();
+
+ mov_arg(RET, Src);
+
+ a.cmp(RET, imm(NIL));
+ a.short_().je(next);
+ a.test(RETb, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_LIST));
+ a.jne(labels[Fail.getValue()]);
+ a.bind(next);
+}
+
+void BeamModuleAssembler::emit_is_map(const ArgVal &Fail, const ArgVal &Src) {
+ mov_arg(RET, Src);
+
+ emit_is_boxed(labels[Fail.getValue()], RET);
+
+ x86::Gp boxed_ptr = emit_ptr_val(RET, RET);
+ a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32)));
+ a.and_(RETb, imm(_TAG_HEADER_MASK));
+ a.cmp(RETb, imm(_TAG_HEADER_MAP));
+ a.jne(labels[Fail.getValue()]);
+}
+
+void BeamModuleAssembler::emit_is_nil(const ArgVal &Fail, const ArgVal &Src) {
+ a.cmp(getArgRef(Src), imm(NIL));
+ a.jne(labels[Fail.getValue()]);
+}
+
+void BeamModuleAssembler::emit_is_number(const ArgVal &Fail,
+ const ArgVal &Src) {
+ Label next = a.newLabel();
+ Label fail = labels[Fail.getValue()];
+
+ mov_arg(ARG1, Src);
+
+ a.mov(RETd, ARG1d);
+ a.and_(RETb, imm(_TAG_IMMED1_MASK));
+ a.cmp(RETb, imm(_TAG_IMMED1_SMALL));
+ a.short_().je(next);
+
+ emit_is_boxed(fail, RET);
+
+ x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1);
+ a.mov(ARG1, emit_boxed_val(boxed_ptr));
+
+ a.mov(RETd, ARG1d);
+ a.and_(RETb, imm(_TAG_HEADER_MASK - _BIG_SIGN_BIT));
+ a.cmp(RETb, imm(_TAG_HEADER_POS_BIG));
+ a.short_().je(next);
+
+ a.cmp(ARG1d, imm(HEADER_FLONUM));
+ a.jne(fail);
+
+ a.bind(next);
+}
+
+void BeamModuleAssembler::emit_is_pid(const ArgVal &Fail, const ArgVal &Src) {
+ Label next = a.newLabel();
+
+ mov_arg(ARG1, Src);
+
+ a.mov(RETd, ARG1d);
+ a.and_(RETb, imm(_TAG_IMMED1_MASK));
+ a.cmp(RETb, imm(_TAG_IMMED1_PID));
+ a.short_().je(next);
+
+ /* Reuse RET as the important bits are still available. */
+ emit_is_boxed(labels[Fail.getValue()], RET);
+
+ x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1);
+ a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32)));
+ a.and_(RETb, _TAG_HEADER_MASK);
+ a.cmp(RETb, _TAG_HEADER_EXTERNAL_PID);
+ a.jne(labels[Fail.getValue()]);
+ a.bind(next);
+}
+
+void BeamModuleAssembler::emit_is_port(const ArgVal &Fail, const ArgVal &Src) {
+ Label next = a.newLabel();
+ mov_arg(ARG1, Src);
+
+ a.mov(RETd, ARG1d);
+ a.and_(RETb, imm(_TAG_IMMED1_MASK));
+ a.cmp(RETb, imm(_TAG_IMMED1_PORT));
+ a.short_().je(next);
+
+ /* Reuse RET as the important bits are still available. */
+ emit_is_boxed(labels[Fail.getValue()], RET);
+
+ x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1);
+ a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32)));
+ a.and_(RETb, imm(_TAG_HEADER_MASK));
+ a.cmp(RETb, imm(_TAG_HEADER_EXTERNAL_PORT));
+ a.jne(labels[Fail.getValue()]);
+ a.bind(next);
+}
+
+void BeamModuleAssembler::emit_is_reference(const ArgVal &Fail,
+ const ArgVal &Src) {
+ Label next = a.newLabel();
+
+ mov_arg(RET, Src);
+
+ emit_is_boxed(labels[Fail.getValue()], RET);
+
+ x86::Gp boxed_ptr = emit_ptr_val(RET, RET);
+ a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32)));
+ a.and_(RETb, imm(_TAG_HEADER_MASK));
+ a.cmp(RETb, imm(_TAG_HEADER_REF));
+ a.short_().je(next);
+ a.cmp(RETb, imm(_TAG_HEADER_EXTERNAL_REF));
+ a.jne(labels[Fail.getValue()]);
+
+ a.bind(next);
+}
+
+/* Note: This instruction leaves the pointer to the tuple in ARG2. */
+void BeamModuleAssembler::emit_i_is_tagged_tuple(const ArgVal &Fail,
+ const ArgVal &Src,
+ const ArgVal &Arity,
+ const ArgVal &Tag) {
+ mov_arg(ARG2, Src);
+
+ emit_is_boxed(labels[Fail.getValue()], ARG2);
+
+ x86::Gp boxed_ptr = emit_ptr_val(ARG2, ARG2);
+ ERTS_CT_ASSERT(Support::isInt32(make_arityval(MAX_ARITYVAL)));
+ a.cmp(emit_boxed_val(boxed_ptr, 0, sizeof(Uint32)), imm(Arity.getValue()));
+ a.jne(labels[Fail.getValue()]);
+
+ a.cmp(emit_boxed_val(boxed_ptr, sizeof(Eterm)), imm(Tag.getValue()));
+ a.jne(labels[Fail.getValue()]);
+}
+
+/* Note: This instruction leaves the pointer to the tuple in ARG2. */
+void BeamModuleAssembler::emit_i_is_tagged_tuple_ff(const ArgVal &NotTuple,
+ const ArgVal &NotRecord,
+ const ArgVal &Src,
+ const ArgVal &Arity,
+ const ArgVal &Tag) {
+ mov_arg(ARG2, Src);
+ emit_is_boxed(labels[NotTuple.getValue()], ARG2);
+ (void)emit_ptr_val(ARG2, ARG2);
+ a.mov(ARG1, emit_boxed_val(ARG2));
+
+ ERTS_CT_ASSERT(_TAG_HEADER_ARITYVAL == 0);
+ a.test(ARG1.r8(), imm(_TAG_HEADER_MASK));
+ a.jne(labels[NotTuple.getValue()]);
+
+ ERTS_CT_ASSERT(Support::isInt32(make_arityval(MAX_ARITYVAL)));
+ a.cmp(ARG1d, imm(Arity.getValue()));
+ a.jne(labels[NotRecord.getValue()]);
+
+ a.cmp(emit_boxed_val(ARG2, sizeof(Eterm)), imm(Tag.getValue()));
+ a.jne(labels[NotRecord.getValue()]);
+}
+
+/* Note: This instruction leaves the pointer to the tuple in ARG2. */
+void BeamModuleAssembler::emit_i_is_tuple(const ArgVal &Fail,
+ const ArgVal &Src) {
+ mov_arg(ARG2, Src);
+
+ emit_is_boxed(labels[Fail.getValue()], ARG2);
+
+ (void)emit_ptr_val(ARG2, ARG2);
+ ERTS_CT_ASSERT(_TAG_HEADER_ARITYVAL == 0);
+ a.test(emit_boxed_val(ARG2, 0, sizeof(byte)), imm(_TAG_HEADER_MASK));
+
+ a.jne(labels[Fail.getValue()]);
+}
+
+/* Note: This instruction leaves the pointer to the tuple in ARG2. */
+void BeamModuleAssembler::emit_i_is_tuple_of_arity(const ArgVal &Fail,
+ const ArgVal &Src,
+ const ArgVal &Arity) {
+ mov_arg(ARG2, Src);
+
+ emit_is_boxed(labels[Fail.getValue()], ARG2);
+
+ (void)emit_ptr_val(ARG2, ARG2);
+ ERTS_CT_ASSERT(Support::isInt32(make_arityval(MAX_ARITYVAL)));
+ a.cmp(emit_boxed_val(ARG2, 0, sizeof(Uint32)), imm(Arity.getValue()));
+ a.jne(labels[Fail.getValue()]);
+}
+
+/* Note: This instruction leaves the pointer to the tuple in ARG2. */
+void BeamModuleAssembler::emit_i_test_arity(const ArgVal &Fail,
+ const ArgVal &Src,
+ const ArgVal &Arity) {
+ mov_arg(ARG2, Src);
+
+ (void)emit_ptr_val(ARG2, ARG2);
+ ERTS_CT_ASSERT(Support::isInt32(make_arityval(MAX_ARITYVAL)));
+ a.cmp(emit_boxed_val(ARG2, 0, sizeof(Uint32)), imm(Arity.getValue()));
+ a.jne(labels[Fail.getValue()]);
+}
+
+void BeamModuleAssembler::emit_i_is_eq_exact_immed(const ArgVal &Fail,
+ const ArgVal &X,
+ const ArgVal &Y) {
+ cmp_arg(getArgRef(X), Y);
+ a.jne(labels[Fail.getValue()]);
+}
+
+void BeamModuleAssembler::emit_i_is_ne_exact_immed(const ArgVal &Fail,
+ const ArgVal &X,
+ const ArgVal &Y) {
+ cmp_arg(getArgRef(X), Y);
+ a.je(labels[Fail.getValue()]);
+}
+
+void BeamModuleAssembler::emit_is_eq_exact(const ArgVal &Fail,
+ const ArgVal &X,
+ const ArgVal &Y) {
+ Label next = a.newLabel();
+
+ mov_arg(ARG2, Y); /* May clobber ARG1 */
+ mov_arg(ARG1, X);
+
+ a.cmp(ARG1, ARG2);
+#ifdef JIT_HARD_DEBUG
+ a.je(next);
+#else
+ a.short_().je(next);
+#endif
+
+ /* Fancy way of checking if both are immediates. */
+ a.mov(RETd, ARG1d);
+ a.and_(RETd, ARG2d);
+ a.and_(RETb, imm(_TAG_PRIMARY_MASK));
+ a.cmp(RETb, imm(TAG_PRIMARY_IMMED1));
+ a.je(labels[Fail.getValue()]);
+
+ emit_enter_runtime();
+
+ runtime_call<2>(eq);
+
+ emit_leave_runtime();
+
+ a.test(RET, RET);
+ a.je(labels[Fail.getValue()]);
+
+ a.bind(next);
+}
+
+void BeamModuleAssembler::emit_i_is_eq_exact_literal(const ArgVal &Fail,
+ const ArgVal &Src,
+ const ArgVal &Literal,
+ const ArgVal &tag_test) {
+ mov_arg(ARG2, Literal); /* May clobber ARG1 */
+ mov_arg(ARG1, Src);
+
+ /* Fail immediately unless Src is the same type of pointer as the literal.
+ */
+ a.test(ARG1.r8(), imm(tag_test.getValue()));
+ a.jne(labels[Fail.getValue()]);
+
+ emit_enter_runtime();
+
+ runtime_call<2>(eq);
+
+ emit_leave_runtime();
+
+ a.test(RET, RET);
+ a.jz(labels[Fail.getValue()]);
+}
+
+void BeamModuleAssembler::emit_is_ne_exact(const ArgVal &Fail,
+ const ArgVal &X,
+ const ArgVal &Y) {
+ Label next = a.newLabel();
+
+ mov_arg(ARG2, Y); /* May clobber ARG1 */
+ mov_arg(ARG1, X);
+
+ a.cmp(ARG1, ARG2);
+ a.je(labels[Fail.getValue()]);
+
+ /* Fancy way of checking if both are immediates. */
+ a.mov(RETd, ARG1d);
+ a.and_(RETd, ARG2d);
+ a.and_(RETb, imm(_TAG_PRIMARY_MASK));
+ a.cmp(RETb, imm(TAG_PRIMARY_IMMED1));
+#ifdef JIT_HARD_DEBUG
+ a.je(next);
+#else
+ a.short_().je(next);
+#endif
+
+ emit_enter_runtime();
+
+ runtime_call<2>(eq);
+
+ emit_leave_runtime();
+
+ a.test(RET, RET);
+ a.jnz(labels[Fail.getValue()]);
+
+ a.bind(next);
+}
+
+void BeamModuleAssembler::emit_i_is_ne_exact_literal(const ArgVal &Fail,
+ const ArgVal &Src,
+ const ArgVal &Literal) {
+ Label next = a.newLabel();
+
+ mov_arg(ARG2, Literal); /* May clobber ARG1 */
+ mov_arg(ARG1, Src);
+
+ a.mov(RETd, ARG1d);
+ a.and_(RETb, imm(_TAG_IMMED1_MASK));
+ a.cmp(RETb, imm(TAG_PRIMARY_IMMED1));
+ a.short_().je(next);
+
+ emit_enter_runtime();
+
+ runtime_call<2>(eq);
+
+ emit_leave_runtime();
+
+ a.test(RET, RET);
+ a.jnz(labels[Fail.getValue()]);
+
+ a.bind(next);
+}
+
+void BeamGlobalAssembler::emit_arith_eq_shared() {
+ Label generic_compare = a.newLabel();
+
+ /* Are both floats? */
+ a.mov(ARG3d, ARG1d);
+ a.or_(ARG3d, ARG2d);
+ a.and_(ARG3d, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_BOXED));
+ a.short_().jne(generic_compare);
+
+ x86::Gp boxed_ptr = emit_ptr_val(ARG3, ARG1);
+ a.mov(ARG3, emit_boxed_val(boxed_ptr));
+ boxed_ptr = emit_ptr_val(ARG5, ARG2);
+ a.mov(ARG5, emit_boxed_val(boxed_ptr));
+ a.and_(ARG3d, imm(_TAG_HEADER_MASK));
+ a.and_(ARG5d, imm(_TAG_HEADER_MASK));
+ a.sub(ARG3d, imm(_TAG_HEADER_FLOAT));
+ a.sub(ARG5d, imm(_TAG_HEADER_FLOAT));
+ a.or_(ARG3d, ARG5d);
+ a.short_().jne(generic_compare);
+
+ boxed_ptr = emit_ptr_val(ARG1, ARG1);
+ a.movsd(x86::xmm0, emit_boxed_val(boxed_ptr, sizeof(Eterm)));
+ boxed_ptr = emit_ptr_val(ARG2, ARG2);
+ a.movsd(x86::xmm1, emit_boxed_val(boxed_ptr, sizeof(Eterm)));
+
+ /* All float terms are finite so our caller only needs to check ZF. We don't
+ * need to check for errors (PF). */
+ a.comisd(x86::xmm0, x86::xmm1);
+
+ a.ret();
+
+ a.bind(generic_compare);
+ {
+ emit_enter_runtime();
+
+ /* Generic eq-only arithmetic comparison. */
+ comment("erts_cmp_compound(X, Y, 0, 1);");
+ mov_imm(ARG3, 0);
+ mov_imm(ARG4, 1);
+ runtime_call<4>(erts_cmp_compound);
+
+ emit_leave_runtime();
+
+ a.test(RET, RET);
+
+ a.ret();
+ }
+}
+
+void BeamModuleAssembler::emit_is_eq(const ArgVal &Fail,
+ const ArgVal &A,
+ const ArgVal &B) {
+ Label fail = labels[Fail.getValue()], next = a.newLabel();
+
+ mov_arg(ARG2, B); /* May clobber ARG1 */
+ mov_arg(ARG1, A);
+
+ a.cmp(ARG1, ARG2);
+ a.short_().je(next);
+
+ /* We can skip deep comparisons when both args are immediates. */
+ a.mov(RETd, ARG1d);
+ a.and_(RETd, ARG2d);
+ a.and_(RETb, imm(_TAG_PRIMARY_MASK));
+ a.cmp(RETb, imm(TAG_PRIMARY_IMMED1));
+ a.je(fail);
+
+ safe_fragment_call(ga->get_arith_eq_shared());
+ a.jne(fail);
+ a.bind(next);
+}
+
+void BeamModuleAssembler::emit_is_ne(const ArgVal &Fail,
+ const ArgVal &A,
+ const ArgVal &B) {
+ Label fail = labels[Fail.getValue()], next = a.newLabel();
+
+ mov_arg(ARG2, B); /* May clobber ARG1 */
+ mov_arg(ARG1, A);
+
+ a.cmp(ARG1, ARG2);
+ a.je(fail);
+
+ /* We can skip deep comparisons when both args are immediates. */
+ a.mov(RETd, ARG1d);
+ a.and_(RETd, ARG2d);
+ a.and_(RETb, imm(_TAG_PRIMARY_MASK));
+ a.cmp(RETb, imm(TAG_PRIMARY_IMMED1));
+ a.short_().je(next);
+
+ safe_fragment_call(ga->get_arith_eq_shared());
+ a.je(fail);
+ a.bind(next);
+}
+
+void BeamGlobalAssembler::emit_arith_compare_shared() {
+ Label atom_compare, generic_compare;
+
+ atom_compare = a.newLabel();
+ generic_compare = a.newLabel();
+
+ /* Are both floats?
+ *
+ * This is done first as relative comparisons on atoms doesn't make much
+ * sense. */
+ a.mov(ARG3d, ARG1d);
+ a.or_(ARG3d, ARG2d);
+ a.and_(ARG3d, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_BOXED));
+ a.short_().jne(atom_compare);
+
+ x86::Gp boxed_ptr = emit_ptr_val(ARG3, ARG1);
+ a.mov(ARG3, emit_boxed_val(boxed_ptr));
+ boxed_ptr = emit_ptr_val(ARG5, ARG2);
+ a.mov(ARG5, emit_boxed_val(boxed_ptr));
+ a.and_(ARG3d, imm(_TAG_HEADER_MASK));
+ a.and_(ARG5d, imm(_TAG_HEADER_MASK));
+ a.sub(ARG3d, imm(_TAG_HEADER_FLOAT));
+ a.sub(ARG5d, imm(_TAG_HEADER_FLOAT));
+ a.or_(ARG3d, ARG5d);
+
+ /* NOTE: Short won't reach if JIT_HARD_DEBUG is defined. */
+ a.jne(generic_compare);
+
+ boxed_ptr = emit_ptr_val(ARG1, ARG1);
+ a.movsd(x86::xmm0, emit_boxed_val(boxed_ptr, sizeof(Eterm)));
+ boxed_ptr = emit_ptr_val(ARG2, ARG2);
+ a.movsd(x86::xmm1, emit_boxed_val(boxed_ptr, sizeof(Eterm)));
+ a.comisd(x86::xmm0, x86::xmm1);
+
+ /* `comisd` doesn't set the flags the same way `test` and friends do, so
+ * they need to be converted for jl/jge to work. */
+ a.setae(x86::al);
+ a.dec(x86::al);
+
+ a.ret();
+
+ a.bind(atom_compare);
+ {
+ /* Are both atoms? */
+ a.mov(ARG3d, ARG1d);
+ a.mov(ARG5d, ARG2d);
+ a.and_(ARG3d, imm(_TAG_IMMED2_MASK));
+ a.and_(ARG5d, imm(_TAG_IMMED2_MASK));
+ a.sub(ARG3d, imm(_TAG_IMMED2_ATOM));
+ a.sub(ARG5d, imm(_TAG_IMMED2_ATOM));
+ a.or_(ARG3d, ARG5d);
+ a.jne(generic_compare);
+
+ emit_enter_runtime();
+
+ runtime_call<2>(erts_cmp_atoms);
+
+ emit_leave_runtime();
+
+ /* !! erts_cmp_atoms returns int, not Sint !! */
+ a.test(RETd, RETd);
+
+ a.ret();
+ }
+
+ a.bind(generic_compare);
+ {
+ emit_enter_runtime();
+
+ comment("erts_cmp_compound(X, Y, 0, 0);");
+ mov_imm(ARG3, 0);
+ mov_imm(ARG4, 0);
+ runtime_call<4>(erts_cmp_compound);
+
+ emit_leave_runtime();
+
+ a.test(RET, RET);
+
+ a.ret();
+ }
+}
+
+void BeamModuleAssembler::emit_is_lt(const ArgVal &Fail,
+ const ArgVal &LHS,
+ const ArgVal &RHS) {
+ Label fail = labels[Fail.getValue()];
+ Label generic = a.newLabel(), next = a.newLabel();
+
+ mov_arg(ARG2, RHS); /* May clobber ARG1 */
+ mov_arg(ARG1, LHS);
+
+ a.cmp(ARG1, ARG2);
+ a.je(fail);
+
+ /* Relative comparisons are overwhelmingly likely to be used on smalls, so
+ * we'll specialize those and keep the rest in a shared fragment. */
+
+ if (RHS.isImmed() && is_small(RHS.getValue())) {
+ a.mov(RETd, ARG1d);
+ } else if (LHS.isImmed() && is_small(LHS.getValue())) {
+ a.mov(RETd, ARG2d);
+ } else {
+ a.mov(RETd, ARG1d);
+ a.and_(RETd, ARG2d);
+ }
+
+ a.and_(RETb, imm(_TAG_IMMED1_MASK));
+ a.cmp(RETb, imm(_TAG_IMMED1_SMALL));
+ a.short_().jne(generic);
+
+ a.cmp(ARG1, ARG2);
+ a.short_().jl(next);
+ a.jmp(fail);
+
+ a.bind(generic);
+ {
+ safe_fragment_call(ga->get_arith_compare_shared());
+ a.jge(fail);
+ }
+
+ a.bind(next);
+}
+
+void BeamModuleAssembler::emit_is_ge(const ArgVal &Fail,
+ const ArgVal &LHS,
+ const ArgVal &RHS) {
+ Label fail = labels[Fail.getValue()];
+ Label generic = a.newLabel(), next = a.newLabel();
+
+ mov_arg(ARG2, RHS); /* May clobber ARG1 */
+ mov_arg(ARG1, LHS);
+
+ a.cmp(ARG1, ARG2);
+ a.short_().je(next);
+
+ /* Relative comparisons are overwhelmingly likely to be used on smalls, so
+ * we'll specialize those and keep the rest in a shared fragment. */
+
+ if (RHS.isImmed() && is_small(RHS.getValue())) {
+ a.mov(RETd, ARG1d);
+ } else if (LHS.isImmed() && is_small(LHS.getValue())) {
+ a.mov(RETd, ARG2d);
+ } else {
+ a.mov(RETd, ARG1d);
+ a.and_(RETd, ARG2d);
+ }
+
+ a.and_(RETb, imm(_TAG_IMMED1_MASK));
+ a.cmp(RETb, imm(_TAG_IMMED1_SMALL));
+ a.short_().jne(generic);
+
+ a.cmp(ARG1, ARG2);
+ a.short_().jge(next);
+ a.jmp(fail);
+
+ a.bind(generic);
+ {
+ safe_fragment_call(ga->get_arith_compare_shared());
+ a.jl(fail);
+ }
+
+ a.bind(next);
+}
+
+void BeamModuleAssembler::emit_bif_is_eq_ne_exact_immed(const ArgVal &Src,
+ const ArgVal &Immed,
+ const ArgVal &Dst,
+ Eterm fail_value,
+ Eterm succ_value) {
+ cmp_arg(getArgRef(Src), Immed);
+ mov_imm(RET, fail_value);
+ mov_imm(ARG1, succ_value);
+ a.cmove(RET, ARG1);
+ mov_arg(Dst, RET);
+}
+
+void BeamModuleAssembler::emit_bif_is_eq_exact_immed(const ArgVal &Src,
+ const ArgVal &Immed,
+ const ArgVal &Dst) {
+ emit_bif_is_eq_ne_exact_immed(Src, Immed, Dst, am_false, am_true);
+}
+
+void BeamModuleAssembler::emit_bif_is_ne_exact_immed(const ArgVal &Src,
+ const ArgVal &Immed,
+ const ArgVal &Dst) {
+ emit_bif_is_eq_ne_exact_immed(Src, Immed, Dst, am_true, am_false);
+}
+
+void BeamModuleAssembler::emit_badmatch(const ArgVal &Src) {
+ mov_arg(x86::qword_ptr(c_p, offsetof(Process, fvalue)), Src);
+ emit_error(BADMATCH);
+}
+
+void BeamModuleAssembler::emit_case_end(const ArgVal &Src) {
+ mov_arg(x86::qword_ptr(c_p, offsetof(Process, fvalue)), Src);
+ emit_error(EXC_CASE_CLAUSE);
+}
+
+void BeamModuleAssembler::emit_system_limit_body() {
+ emit_error(SYSTEM_LIMIT);
+}
+
+void BeamModuleAssembler::emit_if_end() {
+ emit_error(EXC_IF_CLAUSE);
+}
+
+void BeamModuleAssembler::emit_catch(const ArgVal &Y, const ArgVal &Fail) {
+ a.inc(x86::qword_ptr(c_p, offsetof(Process, catches)));
+
+ Label patch_addr = a.newLabel();
+
+ /*
+ * Emit the following instruction:
+ *
+ * b8 ff ff ff 7f mov eax,0x7fffffff
+ * ^
+ * |
+ * |
+ * offset to be patched
+ * with the tagged catch
+ */
+ a.bind(patch_addr);
+ a.mov(RETd, imm(0x7fffffff));
+
+ mov_arg(Y, RET);
+
+ /* Offset = 1 for `mov` payload */
+ catches.push_back({{patch_addr, 0x1, 0}, labels[Fail.getValue()]});
+}
+
+void BeamGlobalAssembler::emit_catch_end_shared() {
+ Label not_throw = a.newLabel(), not_error = a.newLabel(),
+ after_gc = a.newLabel();
+
+ /* Load thrown value / reason into ARG2 for add_stacktrace */
+ a.mov(ARG2, getXRef(2));
+ a.mov(x86::qword_ptr(c_p, offsetof(Process, fvalue)), imm(NIL));
+
+ a.cmp(getXRef(1), imm(am_throw));
+ a.short_().jne(not_throw);
+
+ /* Thrown value, return it in x0 */
+ a.mov(getXRef(0), ARG2);
+
+ a.ret();
+
+ a.bind(not_throw);
+ {
+ a.cmp(getXRef(1), imm(am_error));
+ /* NOTE: Short won't reach if JIT_HARD_DEBUG is defined. */
+ a.jne(not_error);
+
+ /* This is an error, attach a stacktrace to the reason. */
+ emit_enter_runtime<Update::eStack | Update::eHeap>();
+
+ a.mov(ARG1, c_p);
+ /* ARG2 set above. */
+ a.mov(ARG3, getXRef(3));
+ runtime_call<3>(add_stacktrace);
+
+ emit_leave_runtime<Update::eStack | Update::eHeap>();
+
+ /* not_error assumes stacktrace/reason is in ARG2 */
+ a.mov(ARG2, RET);
+ }
+
+ a.bind(not_error);
+ {
+ const int32_t bytes_needed = (3 + S_RESERVED) * sizeof(Eterm);
+
+ a.lea(ARG3, x86::qword_ptr(HTOP, bytes_needed));
+ a.cmp(ARG3, E);
+ a.short_().jbe(after_gc);
+
+ /* Preserve stacktrace / reason */
+ a.mov(getXRef(0), ARG2);
+ mov_imm(ARG4, 1);
+ aligned_call(labels[garbage_collect]);
+ a.mov(ARG2, getXRef(0));
+
+ a.bind(after_gc);
+
+ a.mov(x86::qword_ptr(HTOP), imm(make_arityval(2)));
+ a.mov(x86::qword_ptr(HTOP, sizeof(Eterm) * 1), imm(am_EXIT));
+ a.mov(x86::qword_ptr(HTOP, sizeof(Eterm) * 2), ARG2);
+
+ a.lea(RET, x86::qword_ptr(HTOP, TAG_PRIMARY_BOXED));
+ a.add(HTOP, imm(3 * sizeof(Eterm)));
+
+ a.mov(getXRef(0), RET);
+ }
+
+ a.ret();
+}
+
+void BeamModuleAssembler::emit_catch_end(const ArgVal &Y) {
+ Label next = a.newLabel();
+
+ emit_try_end(Y);
+
+ a.cmp(getXRef(0), imm(THE_NON_VALUE));
+ a.short_().jne(next);
+ fragment_call(ga->get_catch_end_shared());
+ a.bind(next);
+}
+
+void BeamModuleAssembler::emit_try_end(const ArgVal &Y) {
+ a.dec(x86::qword_ptr(c_p, offsetof(Process, catches)));
+ emit_init(Y);
+}
+
+void BeamModuleAssembler::emit_try_case(const ArgVal &Y) {
+ a.dec(x86::qword_ptr(c_p, offsetof(Process, catches)));
+ mov_imm(RET, NIL);
+ mov_arg(Y, RET);
+ a.mov(x86::qword_ptr(c_p, offsetof(Process, fvalue)), RET);
+ a.movups(x86::xmm0, x86::xmmword_ptr(registers, 1 * sizeof(Eterm)));
+ a.mov(RET, getXRef(3));
+ a.movups(x86::xmmword_ptr(registers, 0 * sizeof(Eterm)), x86::xmm0);
+ a.mov(getXRef(2), RET);
+}
+
+void BeamModuleAssembler::emit_try_case_end(const ArgVal &Src) {
+ mov_arg(x86::qword_ptr(c_p, offsetof(Process, fvalue)), Src);
+ emit_error(EXC_TRY_CLAUSE);
+}
+
+void BeamModuleAssembler::emit_raise(const ArgVal &Trace, const ArgVal &Value) {
+ mov_arg(ARG3, Value);
+ mov_arg(ARG2, Trace);
+
+ /* This is an error, attach a stacktrace to the reason. */
+ a.mov(x86::qword_ptr(c_p, offsetof(Process, fvalue)), ARG3);
+ a.mov(x86::qword_ptr(c_p, offsetof(Process, ftrace)), ARG2);
+
+ emit_enter_runtime();
+
+ a.mov(ARG1, c_p);
+ runtime_call<2>(erts_sanitize_freason);
+
+ emit_leave_runtime();
+
+ emit_handle_error();
+}
+
+void BeamModuleAssembler::emit_build_stacktrace() {
+ emit_enter_runtime<Update::eStack | Update::eHeap>();
+
+ a.mov(ARG1, c_p);
+ a.mov(ARG2, getXRef(0));
+ runtime_call<2>(build_stacktrace);
+
+ emit_leave_runtime<Update::eStack | Update::eHeap>();
+
+ a.mov(getXRef(0), RET);
+}
+
+void BeamModuleAssembler::emit_raw_raise() {
+ Label next = a.newLabel();
+
+ emit_enter_runtime();
+
+ a.mov(ARG1, getXRef(2));
+ a.mov(ARG2, getXRef(0));
+ a.mov(ARG3, getXRef(1));
+ a.mov(ARG4, c_p);
+ runtime_call<4>(raw_raise);
+
+ emit_leave_runtime();
+
+ a.test(RET, RET);
+ a.short_().jne(next);
+ emit_handle_error();
+ a.bind(next);
+ a.mov(getXRef(0), imm(am_badarg));
+}
+
+void BeamGlobalAssembler::emit_i_test_yield_shared() {
+ int mfa_offset = -(int)sizeof(ErtsCodeMFA) - BEAM_ASM_FUNC_PROLOGUE_SIZE;
+
+ /* Yield address is in ARG3. */
+ a.lea(ARG2, x86::qword_ptr(ARG3, mfa_offset));
+ a.mov(x86::qword_ptr(c_p, offsetof(Process, current)), ARG2);
+ a.mov(ARG2, x86::qword_ptr(ARG2, offsetof(ErtsCodeMFA, arity)));
+ a.mov(x86::qword_ptr(c_p, offsetof(Process, arity)), ARG2);
+
+ emit_discard_cp();
+
+ a.jmp(labels[context_switch_simplified]);
+}
+
+void BeamModuleAssembler::emit_i_test_yield() {
+ Label next = a.newLabel(), entry = a.newLabel();
+
+ /* When present, this is guaranteed to be the first instruction after the
+ * breakpoint trampoline. */
+
+ ASSERT(a.offset() % 8 == 0);
+ a.bind(entry);
+ a.dec(FCALLS);
+ a.short_().jg(next);
+ a.lea(ARG3, x86::qword_ptr(entry));
+ a.call(funcYield);
+ a.bind(next);
+}
+
+void BeamModuleAssembler::emit_i_yield() {
+ a.mov(getXRef(0), imm(am_true));
+#ifdef NATIVE_ERLANG_STACK
+ fragment_call(ga->get_dispatch_return());
+#else
+ Label next = a.newLabel();
+
+ a.lea(ARG3, x86::qword_ptr(next));
+ abs_jmp(ga->get_dispatch_return());
+
+ a.align(kAlignCode, 8);
+ a.bind(next);
+#endif
+}
+
+void BeamModuleAssembler::emit_i_perf_counter() {
+ Label next = a.newLabel(), small = a.newLabel();
+
+ emit_enter_runtime();
+
+#ifdef WIN32
+ /* Call the function pointer used by erts_sys_perf_counter */
+ runtime_call<0>(erts_sys_time_data__.r.o.sys_hrtime);
+#else
+ runtime_call<0>(erts_sys_time_data__.r.o.perf_counter);
+#endif
+
+ emit_leave_runtime();
+
+ a.mov(ARG1, RET);
+ a.sar(ARG1, imm(SMALL_BITS - 1));
+ a.add(ARG1, 1);
+ a.cmp(ARG1, 1);
+ a.jbe(small);
+
+ {
+ a.mov(TMP_MEM1q, RET);
+
+ emit_gc_test(ArgVal(ArgVal::i, 0),
+ ArgVal(ArgVal::i, ERTS_MAX_UINT64_HEAP_SIZE),
+ ArgVal(ArgVal::i, 0));
+
+ a.mov(ARG1, TMP_MEM1q);
+
+ a.mov(x86::qword_ptr(HTOP, sizeof(Eterm) * 0),
+ imm(make_pos_bignum_header(1)));
+ a.mov(x86::qword_ptr(HTOP, sizeof(Eterm) * 1), ARG1);
+ a.lea(RET, x86::qword_ptr(HTOP, TAG_PRIMARY_BOXED));
+ a.add(HTOP, imm(sizeof(Eterm) * 2));
+ a.short_().jmp(next);
+ }
+
+ a.bind(small);
+ {
+ a.shl(RET, imm(_TAG_IMMED1_SIZE));
+ a.or_(RET, imm(_TAG_IMMED1_SMALL));
+ }
+
+ a.bind(next);
+ a.mov(getXRef(0), RET);
+}