summaryrefslogtreecommitdiff
path: root/erts/emulator/beam/jit/x86/instr_arith.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'erts/emulator/beam/jit/x86/instr_arith.cpp')
-rw-r--r--erts/emulator/beam/jit/x86/instr_arith.cpp1251
1 files changed, 1251 insertions, 0 deletions
diff --git a/erts/emulator/beam/jit/x86/instr_arith.cpp b/erts/emulator/beam/jit/x86/instr_arith.cpp
new file mode 100644
index 0000000000..5615230551
--- /dev/null
+++ b/erts/emulator/beam/jit/x86/instr_arith.cpp
@@ -0,0 +1,1251 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2020-2020. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * See the comment in instr_common.cpp for some notes on how
+ * to minimize code size.
+ */
+
+#include "beam_asm.hpp"
+
+extern "C"
+{
+#include "erl_bif_table.h"
+}
+
+void BeamModuleAssembler::emit_bif_arg_error(std::vector<ArgVal> args,
+ const ErtsCodeMFA *mfa) {
+ comment("handle_error");
+ for (unsigned i = 0; i < args.size(); i++)
+ mov_arg(ArgVal(ArgVal::x, i), args[i]);
+ emit_handle_error(mfa);
+}
+
+void BeamModuleAssembler::emit_is_small(Label fail, x86::Gp Reg) {
+ ASSERT(ARG1 != Reg);
+
+ comment("is_small(X)");
+ a.mov(ARG1d, Reg.r32());
+ a.and_(ARG1d, imm(_TAG_IMMED1_MASK));
+ a.cmp(ARG1d, imm(_TAG_IMMED1_SMALL));
+ a.short_().jne(fail);
+}
+
+void BeamModuleAssembler::emit_is_both_small(Label fail, x86::Gp A, x86::Gp B) {
+ ASSERT(ARG1 != A && ARG1 != B);
+
+ comment("is_both_small(X, Y)");
+ if (A != RET && B != RET) {
+ a.mov(RETd, A.r32());
+ a.and_(RETd, B.r32());
+ a.and_(RETb, imm(_TAG_IMMED1_MASK));
+ a.cmp(RETb, imm(_TAG_IMMED1_SMALL));
+ } else {
+ a.mov(ARG1d, A.r32());
+ a.and_(ARG1d, B.r32());
+ a.and_(ARG1d, imm(_TAG_IMMED1_MASK));
+ a.cmp(ARG1d, imm(_TAG_IMMED1_SMALL));
+ }
+ a.short_().jne(fail);
+}
+
+void BeamGlobalAssembler::emit_increment_body_shared() {
+ Label error = a.newLabel();
+
+ emit_enter_runtime();
+
+ a.mov(ARG1, c_p);
+ a.or_(ARG3, imm(_TAG_IMMED1_SMALL));
+ runtime_call<3>(erts_mixed_plus);
+
+ emit_leave_runtime();
+
+ emit_test_the_non_value(RET);
+ a.short_().je(error);
+
+ a.ret();
+
+ a.bind(error);
+ {
+ mov_imm(ARG4, 0);
+ emit_handle_error_shared_prologue();
+ }
+}
+
+void BeamModuleAssembler::emit_i_increment(const ArgVal &Src,
+ const ArgVal &Val,
+ const ArgVal &Dst) {
+ Label mixed = a.newLabel(), next = a.newLabel();
+
+ /* Place the values in ARG2 and ARG3 to prepare for the mixed call. Note
+ * that ARG3 is untagged at this point */
+ mov_arg(ARG2, Src);
+ mov_imm(ARG3, Val.getValue() << _TAG_IMMED1_SIZE);
+ a.mov(RETd, ARG2d);
+ a.and_(RETb, imm(_TAG_IMMED1_MASK));
+ a.cmp(RETb, imm(_TAG_IMMED1_SMALL));
+ a.short_().jne(mixed);
+
+ a.mov(RET, ARG2);
+ a.add(RET, ARG3);
+ a.short_().jno(next);
+
+ a.bind(mixed);
+ safe_fragment_call(ga->get_increment_body_shared());
+
+ /* all went well, store result in dst */
+ a.bind(next);
+ mov_arg(Dst, RET);
+}
+
+void BeamGlobalAssembler::emit_plus_body_shared() {
+ static const ErtsCodeMFA bif_mfa = {am_erlang, am_Plus, 2};
+
+ Label error = a.newLabel();
+
+ emit_enter_runtime();
+
+ /* Save original arguments for the error path. */
+ a.mov(TMP_MEM1q, ARG2);
+ a.mov(TMP_MEM2q, ARG3);
+
+ a.mov(ARG1, c_p);
+ runtime_call<3>(erts_mixed_plus);
+
+ emit_leave_runtime();
+
+ emit_test_the_non_value(RET);
+ a.short_().je(error);
+
+ a.ret();
+
+ a.bind(error);
+ {
+ /* Place the original arguments in x-registers. */
+ a.mov(ARG1, TMP_MEM1q);
+ a.mov(ARG2, TMP_MEM2q);
+ a.mov(getXRef(0), ARG1);
+ a.mov(getXRef(1), ARG2);
+
+ a.mov(ARG4, imm(&bif_mfa));
+ emit_handle_error_shared_prologue();
+ }
+}
+
+void BeamGlobalAssembler::emit_plus_guard_shared() {
+ emit_enter_runtime();
+
+ a.mov(ARG1, c_p);
+ /* ARG2 and ARG3 were set by the caller */
+ runtime_call<3>(erts_mixed_plus);
+
+ emit_leave_runtime();
+
+ /* Set ZF if the addition failed. */
+ emit_test_the_non_value(RET);
+ a.ret();
+}
+
+void BeamModuleAssembler::emit_i_plus(const ArgVal &LHS,
+ const ArgVal &RHS,
+ const ArgVal &Fail,
+ const ArgVal &Dst) {
+ Label next = a.newLabel(), mixed = a.newLabel();
+
+ mov_arg(ARG2, LHS); /* Used by erts_mixed_plus in this slot */
+ mov_arg(ARG3, RHS); /* Used by erts_mixed_plus in this slot */
+ emit_is_both_small(mixed, ARG2, ARG3);
+
+ comment("add with overflow check");
+ a.mov(RET, ARG2);
+ a.mov(ARG4, ARG3);
+ a.and_(ARG4, imm(~_TAG_IMMED1_MASK));
+ a.add(RET, ARG4);
+ a.short_().jno(next);
+
+ /* Call mixed addition. */
+ a.bind(mixed);
+ {
+ if (Fail.getValue() != 0) {
+ safe_fragment_call(ga->get_plus_guard_shared());
+ a.je(labels[Fail.getValue()]);
+ } else {
+ safe_fragment_call(ga->get_plus_body_shared());
+ }
+ }
+
+ a.bind(next);
+ mov_arg(Dst, RET);
+}
+
+void BeamGlobalAssembler::emit_minus_body_shared() {
+ static const ErtsCodeMFA bif_mfa = {am_erlang, am_Minus, 2};
+
+ Label error = a.newLabel();
+
+ emit_enter_runtime();
+
+ /* Save original arguments for the error path. */
+ a.mov(TMP_MEM1q, ARG2);
+ a.mov(TMP_MEM2q, ARG3);
+
+ a.mov(ARG1, c_p);
+ runtime_call<3>(erts_mixed_minus);
+
+ emit_leave_runtime();
+
+ emit_test_the_non_value(RET);
+ a.short_().je(error);
+
+ a.ret();
+
+ a.bind(error);
+ {
+ /* Place the original arguments in x-registers. */
+ a.mov(ARG1, TMP_MEM1q);
+ a.mov(ARG2, TMP_MEM2q);
+ a.mov(getXRef(0), ARG1);
+ a.mov(getXRef(1), ARG2);
+
+ a.mov(ARG4, imm(&bif_mfa));
+ emit_handle_error_shared_prologue();
+ }
+}
+
+void BeamGlobalAssembler::emit_minus_guard_shared() {
+ emit_enter_runtime();
+
+ a.mov(ARG1, c_p);
+ /* ARG2 and ARG3 were set by the caller */
+ runtime_call<3>(erts_mixed_minus);
+
+ emit_leave_runtime();
+
+ /* Set ZF if the addition failed. */
+ emit_test_the_non_value(RET);
+ a.ret();
+}
+
+void BeamModuleAssembler::emit_i_minus(const ArgVal &LHS,
+ const ArgVal &RHS,
+ const ArgVal &Fail,
+ const ArgVal &Dst) {
+ Label next = a.newLabel(), mixed = a.newLabel();
+
+ mov_arg(ARG2, LHS); /* Used by erts_mixed_plus in this slot */
+ mov_arg(ARG3, RHS); /* Used by erts_mixed_plus in this slot */
+
+ if (RHS.isImmed() && is_small(RHS.getValue())) {
+ a.mov(RETd, ARG2d);
+ } else if (LHS.isImmed() && is_small(LHS.getValue())) {
+ a.mov(RETd, ARG3d);
+ } else {
+ a.mov(RETd, ARG2d);
+ a.and_(RETd, ARG3d);
+ }
+ a.and_(RETb, imm(_TAG_IMMED1_MASK));
+ a.cmp(RETb, imm(_TAG_IMMED1_SMALL));
+ a.short_().jne(mixed);
+
+ comment("sub with overflow check");
+ a.mov(RET, ARG2);
+ a.mov(ARG4, ARG3);
+ a.and_(ARG4, imm(~_TAG_IMMED1_MASK));
+ a.sub(RET, ARG4);
+ a.short_().jno(next);
+
+ a.bind(mixed);
+ if (Fail.getValue() != 0) {
+ safe_fragment_call(ga->get_minus_guard_shared());
+ a.je(labels[Fail.getValue()]);
+ } else {
+ safe_fragment_call(ga->get_minus_body_shared());
+ }
+
+ a.bind(next);
+ mov_arg(Dst, RET);
+}
+
+void BeamGlobalAssembler::emit_unary_minus_body_shared() {
+ static const ErtsCodeMFA bif_mfa = {am_erlang, am_Minus, 1};
+
+ Label error = a.newLabel();
+
+ emit_enter_runtime();
+
+ /* Save original arguments for the error path. */
+ a.mov(TMP_MEM1q, ARG2);
+
+ a.mov(ARG1, c_p);
+ runtime_call<2>(erts_unary_minus);
+
+ emit_leave_runtime();
+
+ emit_test_the_non_value(RET);
+ a.short_().je(error);
+
+ a.ret();
+
+ a.bind(error);
+ {
+ /* Place the original argument in x0. */
+ a.mov(ARG1, TMP_MEM1q);
+ a.mov(getXRef(0), ARG1);
+
+ a.mov(ARG4, imm(&bif_mfa));
+ emit_handle_error_shared_prologue();
+ }
+}
+
+void BeamGlobalAssembler::emit_unary_minus_guard_shared() {
+ emit_enter_runtime();
+
+ a.mov(ARG1, c_p);
+ /* ARG2 was set by the caller */
+ runtime_call<2>(erts_unary_minus);
+
+ emit_leave_runtime();
+
+ /* Set ZF if the negation failed. */
+ emit_test_the_non_value(RET);
+ a.ret();
+}
+
+void BeamModuleAssembler::emit_i_unary_minus(const ArgVal &Src,
+ const ArgVal &Fail,
+ const ArgVal &Dst) {
+ Label next = a.newLabel(), mixed = a.newLabel();
+
+ mov_arg(ARG2, Src);
+ a.mov(RETd, ARG2d);
+ a.and_(RETb, imm(_TAG_IMMED1_MASK));
+ a.cmp(RETb, imm(_TAG_IMMED1_SMALL));
+ a.short_().jne(mixed);
+
+ comment("negation with overflow test");
+ /* RETb is now equal to _TAG_IMMED1_SMALL. */
+ a.movzx(RET, RETb); /* Set RET to make_small(0). */
+ a.mov(ARG3, ARG2);
+ a.and_(ARG3, imm(~_TAG_IMMED1_MASK));
+ a.sub(RET, ARG3);
+ a.short_().jno(next);
+
+ a.bind(mixed);
+ if (Fail.getValue() != 0) {
+ safe_fragment_call(ga->get_unary_minus_guard_shared());
+ a.je(labels[Fail.getValue()]);
+ } else {
+ safe_fragment_call(ga->get_unary_minus_body_shared());
+ }
+
+ a.bind(next);
+ mov_arg(Dst, RET);
+}
+
+/* ARG1 = LHS, ARG4 (!) = RHS
+ *
+ * We avoid using ARG2 and ARG3 because multiplication clobbers RDX, which is
+ * ARG2 on Windows and ARG3 on SystemV.
+ *
+ * Quotient is returned in RAX, remainder in RDX. Error is indicated by ZF. */
+void BeamGlobalAssembler::emit_int_div_rem_guard_shared() {
+ Label exit = a.newLabel(), generic = a.newLabel();
+
+ a.cmp(ARG4, imm(SMALL_ZERO));
+ a.je(exit);
+
+ /* Are both smalls? */
+ a.mov(ARG2d, ARG1d);
+ a.and_(ARG2d, ARG4d);
+ a.and_(ARG2d, imm(_TAG_IMMED1_MASK));
+ a.cmp(ARG2d, imm(_TAG_IMMED1_SMALL));
+ a.jne(generic);
+
+ a.mov(ARG5, ARG4);
+ a.sar(ARG5, imm(_TAG_IMMED1_SIZE));
+ a.mov(x86::rax, ARG1);
+ a.sar(x86::rax, imm(_TAG_IMMED1_SIZE));
+
+ /* Sign-extend and divide. The result is implicitly placed in RAX and the
+ * remainder in RDX (ARG3). */
+ a.cqo();
+ a.idiv(ARG5);
+
+ a.mov(ARG5, x86::rax);
+
+ /* Speculatively tag the result as overflow is very rare. */
+ a.sal(x86::rax, imm(_TAG_IMMED1_SIZE));
+ a.sal(x86::rdx, imm(_TAG_IMMED1_SIZE));
+ a.or_(x86::rax, imm(_TAG_IMMED1_SMALL));
+ a.or_(x86::rdx, imm(_TAG_IMMED1_SMALL));
+
+ /* MIN_SMALL divided by -1 will overflow, and we'll need to fall back to the
+ * generic handler in that case. */
+ a.sar(ARG5, imm(SMALL_BITS - 1));
+ a.cmp(ARG5, imm(1));
+ a.jl(exit);
+
+ /* Fall through to `generic` */
+
+ a.bind(generic);
+ {
+ emit_enter_runtime();
+
+ a.mov(ARG2, ARG1);
+ a.mov(ARG3, ARG4);
+ a.lea(ARG4, TMP_MEM1q);
+ a.lea(ARG5, TMP_MEM2q);
+ a.mov(ARG1, c_p);
+ runtime_call<5>(erts_int_div_rem);
+
+ emit_leave_runtime();
+
+ /* Place the result in RAX:RDX, mirroring the `idiv` instruction. */
+ a.mov(x86::rax, TMP_MEM1q);
+ a.mov(x86::rdx, TMP_MEM2q);
+
+ /* erts_int_div returns a tagged value, so we know it's non-zero and can
+ * clear ZF by and it with itself. */
+ a.test(RET, RET);
+
+ /* Fall through */
+ }
+
+ /* Return with a potential error in ZF. It will be set if we came here from
+ * the guard against SMALL_ZERO or if we're returning THE_NON_VALUE. */
+ a.bind(exit);
+ a.ret();
+}
+
+/* ARG1 = LHS, ARG4 (!) = RHS, ARG5 = error MFA
+ *
+ * We avoid using ARG2 and ARG3 because multiplication clobbers RDX, which is
+ * ARG2 on Windows and ARG3 on SystemV.
+ *
+ * Quotient is returned in RAX, remainder in RDX. */
+void BeamGlobalAssembler::emit_int_div_rem_body_shared() {
+ Label div_zero = a.newLabel(), generic_div = a.newLabel(),
+ generic_error = a.newLabel(), error = a.newLabel();
+
+ a.cmp(ARG4, imm(SMALL_ZERO));
+ a.je(div_zero);
+
+ /* Are both smalls? */
+ a.mov(ARG2d, ARG1d);
+ a.and_(ARG2d, ARG4d);
+ a.and_(ARG2d, imm(_TAG_IMMED1_MASK));
+ a.cmp(ARG2d, imm(_TAG_IMMED1_SMALL));
+ a.jne(generic_div);
+
+ a.mov(ARG6, ARG4);
+ a.sar(ARG6, imm(_TAG_IMMED1_SIZE));
+ a.mov(x86::rax, ARG1);
+ a.sar(x86::rax, imm(_TAG_IMMED1_SIZE));
+
+ /* Sign-extend and divide. The result is implicitly placed in RAX and the
+ * remainder in RDX (ARG3). */
+ a.cqo();
+ a.idiv(ARG6);
+
+ a.mov(ARG6, x86::rax);
+
+ /* Speculatively tag the result as overflow is very rare. */
+ a.sal(x86::rax, imm(_TAG_IMMED1_SIZE));
+ a.sal(x86::rdx, imm(_TAG_IMMED1_SIZE));
+ a.or_(x86::rax, imm(_TAG_IMMED1_SMALL));
+ a.or_(x86::rdx, imm(_TAG_IMMED1_SMALL));
+
+ /* MIN_SMALL divided by -1 will overflow, and we'll need to fall back to the
+ * generic handler in that case. */
+ a.sar(ARG6, imm(SMALL_BITS - 1));
+ a.cmp(ARG6, imm(1));
+ a.short_().jge(generic_div);
+
+ a.ret();
+
+ a.bind(generic_div);
+ {
+ emit_enter_runtime();
+
+ /* Save MFA and original arguments for the error path. */
+ a.mov(TMP_MEM1q, ARG1);
+ a.mov(TMP_MEM2q, ARG4);
+ a.mov(TMP_MEM3q, ARG5);
+
+ a.mov(ARG2, ARG1);
+ a.mov(ARG3, ARG4);
+ a.lea(ARG4, TMP_MEM4q);
+ a.lea(ARG5, TMP_MEM5q);
+ a.mov(ARG1, c_p);
+ runtime_call<5>(erts_int_div_rem);
+
+ emit_leave_runtime();
+
+ a.test(RET, RET);
+
+ /* Place the result in RAX:RDX, mirroring the `idiv` instruction. */
+ a.mov(x86::rax, TMP_MEM4q);
+ a.mov(x86::rdx, TMP_MEM5q);
+
+ a.short_().je(generic_error);
+ a.ret();
+ }
+
+ a.bind(div_zero);
+ {
+ /* Set up a badarith exception and place the original arguments in
+ * x-registers. */
+ a.mov(x86::qword_ptr(c_p, offsetof(Process, freason)),
+ imm(EXC_BADARITH));
+
+ a.mov(getXRef(0), ARG1);
+ a.mov(getXRef(1), ARG4);
+
+ a.mov(ARG4, ARG5);
+ a.short_().jmp(error);
+ }
+
+ a.bind(generic_error);
+ {
+ /* Place the original arguments in x-registers. */
+ a.mov(ARG1, TMP_MEM1q);
+ a.mov(ARG2, TMP_MEM2q);
+ a.mov(getXRef(0), ARG1);
+ a.mov(getXRef(1), ARG2);
+
+ /* Read saved MFA. */
+ a.mov(ARG4, TMP_MEM3q);
+
+ /* Fall through to `error` */
+ }
+
+ a.bind(error);
+ emit_handle_error_shared_prologue();
+}
+
+void BeamModuleAssembler::emit_div_rem(const ArgVal &Fail,
+ const ArgVal &LHS,
+ const ArgVal &RHS,
+ const ErtsCodeMFA *error_mfa) {
+ mov_arg(ARG4, RHS); /* Done first as mov_arg may clobber ARG1 */
+ mov_arg(ARG1, LHS);
+
+ /* TODO: Specialize division with immediates, either here or in the
+ * compiler. */
+ if (Fail.getValue() != 0) {
+ safe_fragment_call(ga->get_int_div_rem_guard_shared());
+ a.je(labels[Fail.getValue()]);
+ } else {
+ a.mov(ARG5, imm(error_mfa));
+ safe_fragment_call(ga->get_int_div_rem_body_shared());
+ }
+}
+
+void BeamModuleAssembler::emit_i_rem_div(const ArgVal &LHS,
+ const ArgVal &RHS,
+ const ArgVal &Fail,
+ const ArgVal &Remainder,
+ const ArgVal &Quotient) {
+ static const ErtsCodeMFA bif_mfa = {am_erlang, am_rem, 2};
+
+ emit_div_rem(Fail, LHS, RHS, &bif_mfa);
+
+ mov_arg(Remainder, x86::rdx);
+ mov_arg(Quotient, x86::rax);
+}
+
+void BeamModuleAssembler::emit_i_div_rem(const ArgVal &LHS,
+ const ArgVal &RHS,
+ const ArgVal &Fail,
+ const ArgVal &Quotient,
+ const ArgVal &Remainder) {
+ static const ErtsCodeMFA bif_mfa = {am_erlang, am_div, 2};
+
+ emit_div_rem(Fail, LHS, RHS, &bif_mfa);
+
+ mov_arg(Quotient, x86::rax);
+ mov_arg(Remainder, x86::rdx);
+}
+
+void BeamModuleAssembler::emit_i_int_div(const ArgVal &Fail,
+ const ArgVal &LHS,
+ const ArgVal &RHS,
+ const ArgVal &Quotient) {
+ static const ErtsCodeMFA bif_mfa = {am_erlang, am_div, 2};
+
+ emit_div_rem(Fail, LHS, RHS, &bif_mfa);
+
+ mov_arg(Quotient, x86::rax);
+}
+
+void BeamModuleAssembler::emit_i_rem(const ArgVal &LHS,
+ const ArgVal &RHS,
+ const ArgVal &Fail,
+ const ArgVal &Remainder) {
+ static const ErtsCodeMFA bif_mfa = {am_erlang, am_rem, 2};
+
+ emit_div_rem(Fail, LHS, RHS, &bif_mfa);
+
+ mov_arg(Remainder, x86::rdx);
+}
+
+void BeamModuleAssembler::emit_i_m_div(const ArgVal &Fail,
+ const ArgVal &LHS,
+ const ArgVal &RHS,
+ const ArgVal &Dst) {
+ static const ErtsCodeMFA bif_mfa = {am_erlang, am_Div, 2};
+
+ Label next = a.newLabel();
+
+ mov_arg(ARG2, LHS);
+ mov_arg(ARG3, RHS);
+
+ emit_enter_runtime();
+
+ /* Must be set last since mov_arg() may clobber ARG1 */
+ a.mov(ARG1, c_p);
+ runtime_call<3>(erts_mixed_div);
+
+ emit_leave_runtime();
+
+ emit_test_the_non_value(RET);
+
+ if (Fail.getValue() != 0) {
+ a.je(labels[Fail.getValue()]);
+ } else {
+ a.short_().jne(next);
+ emit_bif_arg_error({LHS, RHS}, &bif_mfa);
+ }
+
+ a.bind(next);
+ mov_arg(Dst, RET);
+}
+
+/* ARG1 = LHS, ARG4 (!) = RHS
+ *
+ * We avoid using ARG2 and ARG3 because multiplication clobbers RDX, which is
+ * ARG2 on Windows and ARG3 on SystemV.
+ *
+ * Result is returned in RET, error is indicated by ZF. */
+void BeamGlobalAssembler::emit_times_guard_shared() {
+ Label generic = a.newLabel();
+
+ /* Are both smalls? */
+ a.mov(ARG2d, ARG1d);
+ a.and_(ARG2d, ARG4d);
+ a.and_(ARG2d, imm(_TAG_IMMED1_MASK));
+ a.cmp(ARG2d, imm(_TAG_IMMED1_SMALL));
+ a.short_().jne(generic);
+
+ a.mov(RET, ARG1);
+ a.mov(ARG2, ARG4);
+ a.and_(RET, imm(~_TAG_IMMED1_MASK));
+ a.sar(ARG2, imm(_TAG_IMMED1_SIZE));
+ a.imul(RET, ARG2); /* Clobbers RDX */
+ a.short_().jo(generic);
+
+ a.or_(RET, imm(_TAG_IMMED1_SMALL)); /* Always sets ZF to false */
+
+ a.ret();
+
+ a.bind(generic);
+ {
+ emit_enter_runtime();
+
+ a.mov(ARG2, ARG1);
+ a.mov(ARG3, ARG4);
+ a.mov(ARG1, c_p);
+ runtime_call<3>(erts_mixed_times);
+
+ emit_leave_runtime();
+
+ emit_test_the_non_value(RET); /* Sets ZF for use in caller */
+
+ a.ret();
+ }
+}
+
+/* ARG1 = LHS, ARG4 (!) = RHS
+ *
+ * We avoid using ARG2 and ARG3 because multiplication clobbers RDX, which is
+ * ARG2 on Windows and ARG3 on SystemV.
+ *
+ * Result is returned in RET. */
+void BeamGlobalAssembler::emit_times_body_shared() {
+ static const ErtsCodeMFA bif_mfa = {am_erlang, am_Times, 2};
+
+ Label generic = a.newLabel(), error = a.newLabel();
+
+ /* Are both smalls? */
+ a.mov(ARG2d, ARG1d);
+ a.and_(ARG2d, ARG4d);
+ a.and_(ARG2d, imm(_TAG_IMMED1_MASK));
+ a.cmp(ARG2, imm(_TAG_IMMED1_SMALL));
+ a.jne(generic);
+
+ a.mov(RET, ARG1);
+ a.mov(ARG2, ARG4);
+ a.and_(RET, imm(~_TAG_IMMED1_MASK));
+ a.sar(ARG2, imm(_TAG_IMMED1_SIZE));
+ a.imul(RET, ARG2); /* Clobbers RDX */
+ a.short_().jo(generic);
+
+ a.or_(RET, imm(_TAG_IMMED1_SMALL));
+
+ a.ret();
+
+ a.bind(generic);
+ {
+ emit_enter_runtime();
+
+ /* Save original arguments for the error path. */
+ a.mov(TMP_MEM1q, ARG1);
+ a.mov(TMP_MEM2q, ARG4);
+
+ a.mov(ARG2, ARG1);
+ a.mov(ARG3, ARG4);
+ a.mov(ARG1, c_p);
+ runtime_call<3>(erts_mixed_times);
+
+ emit_leave_runtime();
+
+ emit_test_the_non_value(RET);
+ a.short_().je(error);
+
+ a.ret();
+ }
+
+ a.bind(error);
+ {
+ /* Place the original arguments in x-registers. */
+ a.mov(ARG1, TMP_MEM1q);
+ a.mov(ARG2, TMP_MEM2q);
+ a.mov(getXRef(0), ARG1);
+ a.mov(getXRef(1), ARG2);
+
+ a.mov(ARG4, imm(&bif_mfa));
+ emit_handle_error_shared_prologue();
+ }
+}
+
+void BeamModuleAssembler::emit_i_times(const ArgVal &Fail,
+ const ArgVal &LHS,
+ const ArgVal &RHS,
+ const ArgVal &Dst) {
+ mov_arg(ARG4, RHS); /* Done first as mov_arg may clobber ARG1 */
+ mov_arg(ARG1, LHS);
+
+ /* TODO: Specialize multiplication with immediates, either here or in the
+ * compiler. */
+ if (Fail.getValue() != 0) {
+ safe_fragment_call(ga->get_times_guard_shared());
+ a.je(labels[Fail.getValue()]);
+ } else {
+ safe_fragment_call(ga->get_times_body_shared());
+ }
+
+ mov_arg(Dst, RET);
+}
+
+/* ARG2 (!) = LHS, RET (!) = RHS
+ *
+ * Result is returned in RET. Error is indicated by ZF. */
+template<typename T>
+void BeamGlobalAssembler::emit_bitwise_fallback_guard(T(*func_ptr)) {
+ emit_enter_runtime();
+
+ a.mov(ARG1, c_p);
+ /* ARG2 is already set to LHS */
+ a.mov(ARG3, RET);
+ runtime_call<3>(func_ptr);
+
+ emit_leave_runtime();
+
+ emit_test_the_non_value(RET);
+ a.ret();
+}
+
+/* ARG2 (!) = LHS, RET (!) = RHS
+ *
+ * Result is returned in RET. */
+template<typename T>
+void BeamGlobalAssembler::emit_bitwise_fallback_body(T(*func_ptr),
+ const ErtsCodeMFA *mfa) {
+ Label error = a.newLabel();
+
+ emit_enter_runtime();
+
+ /* Save original arguments for the error path. */
+ a.mov(TMP_MEM1q, ARG2);
+ a.mov(TMP_MEM2q, RET);
+
+ a.mov(ARG1, c_p);
+ /* ARG2 is already set to LHS */
+ a.mov(ARG3, RET);
+ runtime_call<3>(func_ptr);
+
+ emit_leave_runtime();
+
+ emit_test_the_non_value(RET);
+ a.short_().je(error);
+
+ a.ret();
+
+ a.bind(error);
+ {
+ /* Place the original arguments in x-registers. */
+ a.mov(ARG1, TMP_MEM1q);
+ a.mov(ARG2, TMP_MEM2q);
+ a.mov(getXRef(0), ARG1);
+ a.mov(getXRef(1), ARG2);
+
+ a.mov(ARG4, imm(mfa));
+ emit_handle_error_shared_prologue();
+ }
+}
+
+void BeamGlobalAssembler::emit_i_band_guard_shared() {
+ emit_bitwise_fallback_guard(erts_band);
+}
+
+void BeamGlobalAssembler::emit_i_band_body_shared() {
+ static const ErtsCodeMFA bif_mfa = {am_erlang, am_band, 2};
+ emit_bitwise_fallback_body(erts_band, &bif_mfa);
+}
+
+void BeamModuleAssembler::emit_i_band(const ArgVal &LHS,
+ const ArgVal &RHS,
+ const ArgVal &Fail,
+ const ArgVal &Dst) {
+ Label generic = a.newLabel(), next = a.newLabel();
+
+ mov_arg(ARG2, LHS);
+ mov_arg(RET, RHS);
+
+ if (RHS.isImmed() && is_small(RHS.getValue())) {
+ emit_is_small(generic, ARG2);
+ } else {
+ emit_is_both_small(generic, RET, ARG2);
+ }
+
+ /* TAG & TAG = TAG, so we don't need to tag it again. */
+ a.and_(RET, ARG2);
+ a.short_().jmp(next);
+
+ a.bind(generic);
+ {
+ if (Fail.getValue() != 0) {
+ safe_fragment_call(ga->get_i_band_guard_shared());
+ a.je(labels[Fail.getValue()]);
+ } else {
+ safe_fragment_call(ga->get_i_band_body_shared());
+ }
+ }
+
+ a.bind(next);
+ mov_arg(Dst, RET);
+}
+
+/* ARG2 (!) = LHS, RET (!) = RHS
+ *
+ * Result is returned in RET. Error is indicated by ZF. */
+void BeamGlobalAssembler::emit_i_bor_guard_shared() {
+ emit_bitwise_fallback_guard(erts_bor);
+}
+
+/* ARG2 (!) = LHS, RET (!) = RHS
+ *
+ * Result is returned in RET. */
+void BeamGlobalAssembler::emit_i_bor_body_shared() {
+ static const ErtsCodeMFA bif_mfa = {am_erlang, am_bor, 2};
+ emit_bitwise_fallback_body(erts_bor, &bif_mfa);
+}
+
+void BeamModuleAssembler::emit_i_bor(const ArgVal &Fail,
+ const ArgVal &LHS,
+ const ArgVal &RHS,
+ const ArgVal &Dst) {
+ Label generic = a.newLabel(), next = a.newLabel();
+
+ mov_arg(ARG2, LHS);
+ mov_arg(RET, RHS);
+
+ if (RHS.isImmed() && is_small(RHS.getValue())) {
+ emit_is_small(generic, ARG2);
+ } else {
+ emit_is_both_small(generic, RET, ARG2);
+ }
+
+ /* TAG | TAG = TAG, so we don't need to tag it again. */
+ a.or_(RET, ARG2);
+ a.short_().jmp(next);
+
+ a.bind(generic);
+ {
+ if (Fail.getValue() != 0) {
+ safe_fragment_call(ga->get_i_bor_guard_shared());
+ a.je(labels[Fail.getValue()]);
+ } else {
+ safe_fragment_call(ga->get_i_bor_body_shared());
+ }
+ }
+
+ a.bind(next);
+ mov_arg(Dst, RET);
+}
+
+/* ARG2 (!) = LHS, RET (!) = RHS
+ *
+ * Result is returned in RET. Error is indicated by ZF. */
+void BeamGlobalAssembler::emit_i_bxor_guard_shared() {
+ emit_bitwise_fallback_guard(erts_bxor);
+}
+
+/* ARG2 (!) = LHS, RET (!) = RHS
+ *
+ * Result is returned in RET. */
+void BeamGlobalAssembler::emit_i_bxor_body_shared() {
+ static const ErtsCodeMFA bif_mfa = {am_erlang, am_bxor, 2};
+ emit_bitwise_fallback_body(erts_bxor, &bif_mfa);
+}
+
+void BeamModuleAssembler::emit_i_bxor(const ArgVal &Fail,
+ const ArgVal &LHS,
+ const ArgVal &RHS,
+ const ArgVal &Dst) {
+ Label generic = a.newLabel(), next = a.newLabel();
+
+ mov_arg(ARG2, LHS);
+ mov_arg(RET, RHS);
+
+ if (RHS.isImmed() && is_small(RHS.getValue())) {
+ emit_is_small(generic, ARG2);
+ } else {
+ emit_is_both_small(generic, RET, ARG2);
+ }
+
+ /* TAG ^ TAG = 0, so we need to tag it again. */
+ a.xor_(RET, ARG2);
+ a.or_(RET, imm(_TAG_IMMED1_SMALL));
+ a.short_().jmp(next);
+
+ a.bind(generic);
+ {
+ if (Fail.getValue() != 0) {
+ safe_fragment_call(ga->get_i_bxor_guard_shared());
+ a.je(labels[Fail.getValue()]);
+ } else {
+ safe_fragment_call(ga->get_i_bxor_body_shared());
+ }
+ }
+
+ a.bind(next);
+ mov_arg(Dst, RET);
+}
+
+/* RET (!) = Src
+ *
+ * Result is returned in RET. Error is indicated by ZF. */
+void BeamGlobalAssembler::emit_i_bnot_guard_shared() {
+ /* Undo the speculative inversion in module code */
+ a.xor_(RET, imm(~_TAG_IMMED1_MASK));
+
+ emit_enter_runtime();
+
+ a.mov(ARG1, c_p);
+ a.mov(ARG2, RET);
+ runtime_call<2>(erts_bnot);
+
+ emit_leave_runtime();
+
+ emit_test_the_non_value(RET);
+ a.ret();
+}
+
+/* RET (!) = Src
+ *
+ * Result is returned in RET. */
+void BeamGlobalAssembler::emit_i_bnot_body_shared() {
+ static const ErtsCodeMFA bif_mfa = {am_erlang, am_bnot, 1};
+
+ Label error = a.newLabel();
+
+ /* Undo the speculative inversion in module code */
+ a.xor_(RET, imm(~_TAG_IMMED1_MASK));
+
+ emit_enter_runtime();
+
+ /* Save original arguments for the error path. */
+ a.mov(TMP_MEM1q, RET);
+
+ a.mov(ARG1, c_p);
+ a.mov(ARG2, RET);
+ runtime_call<2>(erts_bnot);
+
+ emit_leave_runtime();
+
+ emit_test_the_non_value(RET);
+ a.short_().je(error);
+
+ a.ret();
+
+ a.bind(error);
+ {
+ /* Place the original arguments in x-registers. */
+ a.mov(ARG1, TMP_MEM1q);
+ a.mov(getXRef(0), ARG1);
+
+ a.mov(ARG4, imm(&bif_mfa));
+ emit_handle_error_shared_prologue();
+ }
+}
+
+void BeamModuleAssembler::emit_i_bnot(const ArgVal &Fail,
+ const ArgVal &Src,
+ const ArgVal &Dst) {
+ Label next = a.newLabel();
+
+ mov_arg(RET, Src);
+
+ /* Invert everything except the tag so we don't have to tag it again. */
+ a.xor_(RET, imm(~_TAG_IMMED1_MASK));
+
+ /* Fall through to the generic path if the result is not a small, where the
+ * above operation will be reverted. */
+ a.mov(ARG1d, RETd);
+ a.and_(ARG1d, imm(_TAG_IMMED1_MASK));
+ a.cmp(ARG1d, imm(_TAG_IMMED1_SMALL));
+ a.short_().je(next);
+
+ if (Fail.getValue() != 0) {
+ safe_fragment_call(ga->get_i_bnot_guard_shared());
+ a.je(labels[Fail.getValue()]);
+ } else {
+ safe_fragment_call(ga->get_i_bnot_body_shared());
+ }
+
+ a.bind(next);
+ mov_arg(Dst, RET);
+}
+
+/* ARG2 (!) = LHS, RET (!) = RHS
+ *
+ * Result is returned in RET. Error is indicated by ZF. */
+void BeamGlobalAssembler::emit_i_bsr_guard_shared() {
+ emit_bitwise_fallback_guard(erts_bsr);
+}
+
+/* ARG2 (!) = LHS, RET (!) = RHS
+ *
+ * Result is returned in RET. */
+void BeamGlobalAssembler::emit_i_bsr_body_shared() {
+ static const ErtsCodeMFA bif_mfa = {am_erlang, am_bsr, 2};
+ emit_bitwise_fallback_body(erts_bsr, &bif_mfa);
+}
+
+void BeamModuleAssembler::emit_i_bsr(const ArgVal &LHS,
+ const ArgVal &RHS,
+ const ArgVal &Fail,
+ const ArgVal &Dst) {
+ Label generic = a.newLabel(), next = a.newLabel();
+
+ mov_arg(ARG2, LHS);
+
+ if (RHS.isImmed() && is_small(RHS.getValue())) {
+ Sint shift = signed_val(RHS.getValue());
+
+ if (shift >= 0 && shift < SMALL_BITS - 1) {
+ emit_is_small(generic, ARG2);
+
+ a.mov(RET, ARG2);
+
+ /* We don't need to clear the mask after shifting because
+ * _TAG_IMMED1_SMALL will set all the bits anyway. */
+ ERTS_CT_ASSERT(_TAG_IMMED1_MASK == _TAG_IMMED1_SMALL);
+ a.sar(RET, imm(shift));
+ a.or_(RET, imm(_TAG_IMMED1_SMALL));
+
+ a.short_().jmp(next);
+ } else {
+ /* Constant shift is negative or too big to fit the `sar`
+ * instruction, fall back to the generic path. */
+ }
+ }
+
+ a.bind(generic);
+ {
+ mov_arg(RET, RHS);
+
+ if (Fail.getValue() != 0) {
+ safe_fragment_call(ga->get_i_bsr_guard_shared());
+ a.je(labels[Fail.getValue()]);
+ } else {
+ safe_fragment_call(ga->get_i_bsr_body_shared());
+ }
+ }
+
+ a.bind(next);
+ mov_arg(Dst, RET);
+}
+
+/* ARG2 (!) = LHS, RET (!) = RHS
+ *
+ * Result is returned in RET. Error is indicated by ZF. */
+void BeamGlobalAssembler::emit_i_bsl_guard_shared() {
+ emit_bitwise_fallback_guard(erts_bsl);
+}
+
+/* ARG2 (!) = LHS, RET (!) = RHS
+ *
+ * Result is returned in RET. */
+void BeamGlobalAssembler::emit_i_bsl_body_shared() {
+ static const ErtsCodeMFA bif_mfa = {am_erlang, am_bsl, 2};
+ emit_bitwise_fallback_body(erts_bsl, &bif_mfa);
+}
+
+static int count_leading_zeroes(UWord value) {
+ const int word_bits = sizeof(value) * CHAR_BIT;
+
+ if (value == 0) {
+ return word_bits;
+ }
+
+ UWord mask = UWORD_CONSTANT(1) << (word_bits - 1);
+ int count = 0;
+
+ while ((value & mask) == 0) {
+ mask >>= 1;
+ count++;
+ }
+
+ return count;
+}
+
+void BeamModuleAssembler::emit_i_bsl(const ArgVal &LHS,
+ const ArgVal &RHS,
+ const ArgVal &Fail,
+ const ArgVal &Dst) {
+ bool inline_shift = hasCpuFeature(x86::Features::kLZCNT);
+ Label generic = a.newLabel(), next = a.newLabel();
+
+ mov_arg(ARG2, LHS);
+ mov_arg(RET, RHS);
+
+ if (LHS.isImmed() && RHS.isImmed()) {
+ /* The compiler should've optimized this away, so we'll fall back to the
+ * generic path to simplify the inline implementation. */
+ inline_shift = false;
+ } else if (LHS.isLiteral() || RHS.isLiteral()) {
+ /* At least one argument is not a small. */
+ inline_shift = false;
+ } else if (LHS.isImmed() && !is_small(LHS.getValue())) {
+ /* Invalid constant. */
+ inline_shift = false;
+ } else if (RHS.isImmed() &&
+ (!is_small(RHS.getValue()) || signed_val(RHS.getValue()) < 0 ||
+ signed_val(RHS.getValue()) >= SMALL_BITS - 1)) {
+ /* Constant shift is invalid or always produces a bignum. */
+ inline_shift = false;
+ }
+
+ if (inline_shift) {
+ Operand shiftLimit, shiftCount;
+
+ ASSERT(!(LHS.isImmed() && RHS.isImmed()));
+
+ if (LHS.isMem()) {
+ a.mov(ARG1, ARG2);
+ a.mov(ARG3, ARG2);
+
+ /* If LHS is negative we need to invert the leading-zero count
+ * below. We leave the tag alone so we can test it later on. */
+ a.xor_(ARG3, imm(~_TAG_IMMED1_MASK));
+ a.cmovns(ARG1, ARG3);
+
+ /* Get number of leading zeroes in LHS so we can test whether it
+ * will overflow before shifting, as `sal` doesn't set the overflow
+ * flag on shifts greater than 1. */
+ a.lzcnt(ARG3, ARG1);
+
+ a.and_(ARG1d, imm(_TAG_IMMED1_MASK));
+ a.cmp(ARG1d, imm(_TAG_IMMED1_SMALL));
+ a.short_().jne(generic);
+
+ shiftLimit = ARG3;
+ } else {
+ UWord value = LHS.getValue();
+
+ if (signed_val(value) < 0) {
+ value ^= ~(UWord)_TAG_IMMED1_MASK;
+ }
+
+ shiftLimit = imm(count_leading_zeroes(value));
+ }
+
+ if (RHS.isMem()) {
+ /* Move RHS to the counter register, as it's the only one that can
+ * be used for variable shifts. */
+ a.mov(x86::rcx, RET);
+
+ /* Negate the tag bits and then rotate them out, forcing the
+ * comparison below to fail for non-smalls. */
+ ERTS_CT_ASSERT(_TAG_IMMED1_SMALL == _TAG_IMMED1_MASK);
+ a.xor_(x86::rcx, imm(_TAG_IMMED1_SMALL));
+ a.ror(x86::rcx, imm(_TAG_IMMED1_SIZE));
+
+ /* Fall back to generic path when the shift magnitude is negative or
+ * greater than the leading zero count.
+ *
+ * The raw emit form is used since `shiftLimit` may be a register
+ * or immediate, and the `cmp` helper doesn't accept untyped
+ * `Operand`s. */
+ a.emit(x86::Inst::kIdCmp, x86::rcx, shiftLimit);
+ a.short_().jae(generic);
+
+ shiftCount = x86::cl;
+ } else {
+ ASSERT(!shiftLimit.isImm());
+
+ shiftCount = imm(signed_val(RHS.getValue()));
+
+ a.emit(x86::Inst::kIdCmp, shiftLimit, shiftCount);
+ a.short_().jbe(generic);
+ }
+
+ a.and_(ARG2, imm(~_TAG_IMMED1_MASK));
+ a.emit(x86::Inst::kIdSal, ARG2, shiftCount);
+
+ a.lea(RET, x86::qword_ptr(ARG2, _TAG_IMMED1_SMALL));
+ a.short_().jmp(next);
+ }
+
+ a.bind(generic);
+ {
+ if (Fail.getValue() != 0) {
+ safe_fragment_call(ga->get_i_bsl_guard_shared());
+ a.je(labels[Fail.getValue()]);
+ } else {
+ safe_fragment_call(ga->get_i_bsl_body_shared());
+ }
+ }
+
+ a.bind(next);
+ mov_arg(Dst, RET);
+}