/* * %CopyrightBegin% * * Copyright Ericsson AB 2020-2023. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * %CopyrightEnd% */ #include "beam_asm.hpp" extern "C" { #include "bif.h" #include "beam_common.h" #include "code_ix.h" #include "export.h" #include "erl_threads.h" #if defined(__APPLE__) # include #elif defined(WIN32) # include #endif } #ifdef ERLANG_FRAME_POINTERS ErtsFrameLayout ERTS_WRITE_UNLIKELY(erts_frame_layout); #endif /* Global configuration variables (under the `+J` prefix) */ #ifdef HAVE_LINUX_PERF_SUPPORT enum beamasm_perf_flags erts_jit_perf_support; #endif /* * Special Beam instructions. */ ErtsCodePtr beam_run_process; ErtsCodePtr beam_normal_exit; ErtsCodePtr beam_exit; ErtsCodePtr beam_export_trampoline; ErtsCodePtr beam_bif_export_trap; ErtsCodePtr beam_continue_exit; ErtsCodePtr beam_save_calls; ErtsCodePtr beam_unloaded_fun; /* NOTE These should be the only variables containing trace instructions. ** Sometimes tests are for the instruction value, and sometimes ** for the variable reference (one of these), and rogue references ** will most likely cause chaos. */ ErtsCodePtr beam_return_to_trace; /* OpCode(i_return_to_trace) */ ErtsCodePtr beam_return_trace; /* OpCode(i_return_trace) */ ErtsCodePtr beam_exception_trace; /* UGLY also OpCode(i_return_trace) */ ErtsCodePtr beam_return_time_trace; /* OpCode(i_return_time_trace) */ static JitAllocator *jit_allocator; static BeamGlobalAssembler *bga; static BeamModuleAssembler *bma; static CpuInfo cpuinfo; /* * Enter all BIFs into the export table. * * Note that they will all call the error_handler until their modules have been * loaded, which may prevent the system from booting if BIFs from non-preloaded * modules are apply/3'd while loading code. Ordinary BIF calls will work fine * however since they won't go through export entries. */ static void install_bifs(void) { typedef Eterm (*bif_func_type)(Process *, Eterm *, ErtsCodePtr); int i; ASSERT(beam_export_trampoline != NULL); ASSERT(beam_save_calls != NULL); for (i = 0; i < BIF_SIZE; i++) { BifEntry *entry; Export *ep; int j; entry = &bif_table[i]; ep = erts_export_put(entry->module, entry->name, entry->arity); sys_memset(&ep->info.u, 0, sizeof(ep->info.u)); ep->info.mfa.module = entry->module; ep->info.mfa.function = entry->name; ep->info.mfa.arity = entry->arity; ep->bif_number = i; for (j = 0; j < ERTS_NUM_CODE_IX; j++) { erts_activate_export_trampoline(ep, j); } /* Set up a hidden export entry so we can trap to this BIF without * it being seen when tracing. */ erts_init_trap_export(BIF_TRAP_EXPORT(i), entry->module, entry->name, entry->arity, (bif_func_type)entry->f); } } static JitAllocator *create_allocator(JitAllocator::CreateParams *params) { void *test_ro, *test_rw; Error err; auto *allocator = new JitAllocator(params); err = allocator->alloc(&test_ro, &test_rw, 1); allocator->release(test_ro); if (err == ErrorCode::kErrorOk) { return allocator; } delete allocator; return nullptr; } static JitAllocator *pick_allocator() { JitAllocator::CreateParams single_params; single_params.reset(); #if defined(HAVE_LINUX_PERF_SUPPORT) /* `perf` has a hard time showing symbols for dual-mapped memory, so we'll * use single-mapped memory when enabled. */ if (erts_jit_perf_support & BEAMASM_PERF_ENABLED) { if (auto *alloc = create_allocator(&single_params)) { return alloc; } ERTS_INTERNAL_ERROR("jit: Failed to allocate executable+writable " "memory. Either allow this or disable the " "'+JPperf' option."); } #endif #if !defined(VALGRIND) /* Default to dual-mapped memory with separate executable and writable * regions of the same code. This is required for platforms that enforce * W^X, and we prefer it when available to catch errors sooner. * * `blockSize` is analogous to "carrier size," and we pick something * much larger than the default since dual-mapping implies having one * file descriptor per block on most platforms. The block sizes do grow * over time, but we don't want to waste half a dozen fds just to get to * the shell on platforms that are very fd-constrained. */ JitAllocator::CreateParams dual_params; dual_params.reset(); dual_params.options = JitAllocatorOptions::kUseDualMapping, dual_params.blockSize = 4 << 20; if (auto *alloc = create_allocator(&dual_params)) { return alloc; } else if (auto *alloc = create_allocator(&single_params)) { return alloc; } ERTS_INTERNAL_ERROR("jit: Cannot allocate executable memory. Use the " "interpreter instead."); #elif defined(VALGRIND) if (auto *alloc = create_allocator(&single_params)) { return alloc; } ERTS_INTERNAL_ERROR("jit: the valgrind emulator requires the ability to " "allocate executable+writable memory."); #endif } void beamasm_init() { unsigned label = 1; ASSERT(bga == nullptr && bma == nullptr); struct operands { Eterm name; int arity; BeamInstr operand; ErtsCodePtr *target; }; std::vector operands = { {am_run_process, 3, op_i_apply_only, &beam_run_process}, {am_normal_exit, 0, op_normal_exit, &beam_normal_exit}, {am_continue_exit, 0, op_continue_exit, &beam_continue_exit}, {am_exception_trace, 0, op_return_trace, &beam_exception_trace}, {am_return_trace, 0, op_return_trace, &beam_return_trace}, {am_return_to_trace, 0, op_i_return_to_trace, &beam_return_to_trace}, {am_return_time_trace, 0, op_i_return_time_trace, &beam_return_time_trace}}; Eterm mod_name; ERTS_DECL_AM(erts_beamasm); mod_name = AM_erts_beamasm; /* erts_frame_layout is hardcoded to ERTS_FRAME_LAYOUT_RA when Erlang * frame pointers are disabled or unsupported. */ #if defined(ERLANG_FRAME_POINTERS) # ifdef HAVE_LINUX_PERF_SUPPORT if (erts_jit_perf_support & BEAMASM_PERF_FP) { erts_frame_layout = ERTS_FRAME_LAYOUT_FP_RA; } else { erts_frame_layout = ERTS_FRAME_LAYOUT_RA; } # else erts_frame_layout = ERTS_FRAME_LAYOUT_RA; # endif #else ERTS_CT_ASSERT(erts_frame_layout == ERTS_FRAME_LAYOUT_RA); #endif beamasm_metadata_early_init(); /* * Ensure that commonly used fields in the PCB can be accessed with * short instructions. Before removing any of these assertions, please * consider the effect it will have on code size and/or performance. */ ERTS_CT_ASSERT(offsetof(Process, htop) < 128); ERTS_CT_ASSERT(offsetof(Process, stop) < 128); ERTS_CT_ASSERT(offsetof(Process, fcalls) < 128); ERTS_CT_ASSERT(offsetof(Process, freason) < 128); ERTS_CT_ASSERT(offsetof(Process, fvalue) < 128); #ifdef ERLANG_FRAME_POINTERS ERTS_CT_ASSERT(offsetof(Process, frame_pointer) < 128); #endif cpuinfo = CpuInfo::host(); jit_allocator = pick_allocator(); bga = new BeamGlobalAssembler(jit_allocator); bma = new BeamModuleAssembler(bga, mod_name, 1 + operands.size() * 2, operands.size()); std::vector args; for (auto &op : operands) { unsigned func_label, entry_label; func_label = label++; entry_label = label++; args = {ArgVal(ArgVal::Label, func_label), ArgVal(ArgVal::Word, sizeof(UWord))}; bma->emit(op_aligned_label_Lt, args); args = {ArgVal(ArgVal::Word, func_label), ArgVal(ArgVal::Immediate, mod_name), ArgVal(ArgVal::Immediate, op.name), ArgVal(ArgVal::Word, op.arity)}; bma->emit(op_i_func_info_IaaI, args); args = {ArgVal(ArgVal::Label, entry_label), ArgVal(ArgVal::Word, sizeof(UWord))}; bma->emit(op_aligned_label_Lt, args); args = {}; bma->emit(op.operand, args); op.operand = entry_label; } args = {}; bma->emit(op_int_code_end, args); { /* We have no need of the module pointers as we use `getCode(...)` * for everything, and the code will live as long as the emulator * itself. */ const void *_ignored_exec; void *_ignored_rw; /* Register our global code with gdb/perf so it shows up nicely in * stack traces. */ BeamCodeHeader *_ignored_code_hdr_rw = NULL; const BeamCodeHeader *code_hdr_ro = NULL; BeamCodeHeader load_header = {}; load_header.num_functions = operands.size(); bma->codegen(jit_allocator, &_ignored_exec, &_ignored_rw, &load_header, &code_hdr_ro, &_ignored_code_hdr_rw); bma->register_metadata(code_hdr_ro); } for (auto op : operands) { if (op.target) { *op.target = bma->getCode(op.operand); } } /* These instructions rely on register contents, and can only be reached * from a `call_ext_*`-instruction or trapping from the emulator, hence the * lack of wrapper functions. */ beam_save_calls = (ErtsCodePtr)bga->get_dispatch_save_calls(); beam_export_trampoline = (ErtsCodePtr)bga->get_export_trampoline(); /* Used when trappping to Erlang code from the emulator, setting up * registers in the same way as call_ext so that save_calls and tracing * works when trapping. */ beam_bif_export_trap = (ErtsCodePtr)bga->get_bif_export_trap(); beam_exit = (ErtsCodePtr)bga->get_process_exit(); beam_unloaded_fun = (ErtsCodePtr)bga->get_unloaded_fun(); beamasm_metadata_late_init(); } bool BeamAssembler::hasCpuFeature(uint32_t featureId) { return cpuinfo.hasFeature(featureId); } void init_emulator(void) { install_bifs(); } void process_main(ErtsSchedulerData *esdp) { typedef void (*pmain_type)(ErtsSchedulerData *); pmain_type pmain = (pmain_type)bga->get_process_main(); pmain(esdp); } extern "C" { int erts_beam_jump_table(void) { #if defined(NO_JUMP_TABLE) return 0; #else return 1; #endif } void beamasm_flush_icache(const void *address, size_t size) { #ifdef DEBUG erts_debug_require_code_barrier(); #endif #if defined(__aarch64__) && defined(WIN32) /* Issues full memory/instruction barriers on all threads for us. */ FlushInstructionCache(GetCurrentProcess(), address, size); #elif defined(__aarch64__) && defined(__APPLE__) /* Issues full memory/instruction barriers on all threads for us. */ sys_icache_invalidate((char *)address, size); #elif defined(__aarch64__) && defined(__GNUC__) && \ defined(ETHR_HAVE_GCC_ASM_ARM_IC_IVAU_INSTRUCTION) && \ defined(ETHR_HAVE_GCC_ASM_ARM_DC_CVAU_INSTRUCTION) && \ defined(ERTS_THR_INSTRUCTION_BARRIER) /* Note that we do not issue any barriers here, whether instruction or * memory. This is on purpose as we must issue those on all schedulers * and not just the calling thread, and the chances of us forgetting to * do that is much higher if we issue them here. */ UWord start = reinterpret_cast(address); UWord end = start + size; ETHR_COMPILER_BARRIER; for (size_t i = start & ~ERTS_CACHE_LINE_MASK; i < end; i += ERTS_CACHE_LINE_SIZE) { __asm__ __volatile__("dc cvau, %0\n" "ic ivau, %0\n" ::"r"(i) :); } #elif (defined(__x86_64__) || defined(_M_X64)) && \ defined(ERTS_THR_INSTRUCTION_BARRIER) /* We don't need to invalidate cache on this platform, but since we * might be modifying code with a different linear address than the one * we execute from (dual-mapped memory), we still need to issue an * instruction barrier on all schedulers to ensure that the change is * visible. */ (void)address; (void)size; #else # error "Platform lacks implementation for clearing instruction cache." \ "Please report this bug." #endif } void *beamasm_new_assembler(Eterm mod, int num_labels, int num_functions, BeamFile *file) { return new BeamModuleAssembler(bga, mod, num_labels, num_functions, file); } int beamasm_emit(void *instance, unsigned specific_op, BeamOp *op) { /* The argument array must be safely convertible from `BeamOpArg*` to * `ArgVal*` for us to reuse it directly. * * The exact traits we need weren't introduced until C++20, but the * assertions below will catch just about everything that would break * this conversion. */ static_assert(std::is_base_of::value); static_assert(std::is_standard_layout::value); BeamModuleAssembler *ba = static_cast(instance); const Span args(static_cast(op->a), op->arity); return ba->emit(specific_op, args); } void beamasm_emit_call_nif(const ErtsCodeInfo *info, void *normal_fptr, void *lib, void *dirty_fptr, char *buff, unsigned buff_len) { BeamModuleAssembler ba(bga, info->mfa.module, 3); std::vector args; args = {ArgVal(ArgVal::Label, 1), ArgVal(ArgVal::Word, sizeof(UWord))}; ba.emit(op_aligned_label_Lt, args); args = {ArgVal(ArgVal::Word, 1), ArgVal(ArgVal::Immediate, info->mfa.module), ArgVal(ArgVal::Immediate, info->mfa.function), ArgVal(ArgVal::Word, info->mfa.arity)}; ba.emit(op_i_func_info_IaaI, args); args = {ArgVal(ArgVal::Label, 2), ArgVal(ArgVal::Word, sizeof(UWord))}; ba.emit(op_aligned_label_Lt, args); args = {}; ba.emit(op_i_breakpoint_trampoline, args); args = {ArgVal(ArgVal::Word, (BeamInstr)normal_fptr), ArgVal(ArgVal::Word, (BeamInstr)lib), ArgVal(ArgVal::Word, (BeamInstr)dirty_fptr)}; ba.emit(op_call_nif_WWW, args); ba.codegen(buff, buff_len); } void beamasm_delete_assembler(void *instance) { BeamModuleAssembler *ba = static_cast(instance); delete ba; } void beamasm_purge_module(const void *native_module_exec, void *native_module_rw) { jit_allocator->release(const_cast(native_module_exec)); } ErtsCodePtr beamasm_get_code(void *instance, int label) { BeamModuleAssembler *ba = static_cast(instance); return reinterpret_cast(ba->getCode(label)); } ErtsCodePtr beamasm_get_lambda(void *instance, int index) { BeamModuleAssembler *ba = static_cast(instance); return reinterpret_cast(ba->getLambda(index)); } const byte *beamasm_get_rodata(void *instance, char *label) { BeamModuleAssembler *ba = static_cast(instance); return reinterpret_cast(ba->getCode(label)); } void beamasm_embed_rodata(void *instance, const char *labelName, const char *buff, size_t size) { BeamModuleAssembler *ba = static_cast(instance); if (size) { ba->embed_rodata(labelName, buff, size); } } void beamasm_embed_bss(void *instance, char *labelName, size_t size) { BeamModuleAssembler *ba = static_cast(instance); if (size) { ba->embed_bss(labelName, size); } } void beamasm_codegen(void *instance, const void **native_module_exec, void **native_module_rw, const BeamCodeHeader *in_hdr, const BeamCodeHeader **out_exec_hdr, BeamCodeHeader **out_rw_hdr) { BeamModuleAssembler *ba = static_cast(instance); ba->codegen(jit_allocator, native_module_exec, native_module_rw, in_hdr, out_exec_hdr, out_rw_hdr); } void beamasm_register_metadata(void *instance, const BeamCodeHeader *hdr) { BeamModuleAssembler *ba = static_cast(instance); ba->register_metadata(hdr); } Uint beamasm_get_header(void *instance, const BeamCodeHeader **hdr) { BeamModuleAssembler *ba = static_cast(instance); *hdr = ba->getCodeHeader(); return ba->getCodeSize(); } char *beamasm_get_base(void *instance) { BeamModuleAssembler *ba = static_cast(instance); return (char *)ba->getBaseAddress(); } size_t beamasm_get_offset(void *instance) { BeamModuleAssembler *ba = static_cast(instance); return ba->getOffset(); } const ErtsCodeInfo *beamasm_get_on_load(void *instance) { BeamModuleAssembler *ba = static_cast(instance); return ba->getOnLoad(); } unsigned int beamasm_patch_catches(void *instance, char *rw_base) { BeamModuleAssembler *ba = static_cast(instance); return ba->patchCatches(rw_base); } void beamasm_patch_import(void *instance, char *rw_base, int index, BeamInstr import) { BeamModuleAssembler *ba = static_cast(instance); ba->patchImport(rw_base, index, import); } void beamasm_patch_literal(void *instance, char *rw_base, int index, Eterm lit) { BeamModuleAssembler *ba = static_cast(instance); ba->patchLiteral(rw_base, index, lit); } void beamasm_patch_lambda(void *instance, char *rw_base, int index, BeamInstr fe) { BeamModuleAssembler *ba = static_cast(instance); ba->patchLambda(rw_base, index, fe); } void beamasm_patch_strings(void *instance, char *rw_base, const byte *string_table) { BeamModuleAssembler *ba = static_cast(instance); ba->patchStrings(rw_base, string_table); } }