erts/emulator/beam/jit/beam_jit_main.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592

/*
 * %CopyrightBegin%
 *
 * Copyright Ericsson AB 2020-2023. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * %CopyrightEnd%
 */

#include "beam_asm.hpp"

extern "C"
{
#include "bif.h"
#include "beam_common.h"
#include "code_ix.h"
#include "export.h"
#include "erl_threads.h"

#if defined(__APPLE__)
#    include <libkern/OSCacheControl.h>
#elif defined(WIN32)
#    include <windows.h>
#endif
}

#ifdef ERLANG_FRAME_POINTERS
ErtsFrameLayout ERTS_WRITE_UNLIKELY(erts_frame_layout);
#endif

/* Global configuration variables (under the `+J` prefix) */
#ifdef HAVE_LINUX_PERF_SUPPORT
enum beamasm_perf_flags erts_jit_perf_support;
#endif

/*
 * Special Beam instructions.
 */

ErtsCodePtr beam_run_process;
ErtsCodePtr beam_normal_exit;
ErtsCodePtr beam_exit;
ErtsCodePtr beam_export_trampoline;
ErtsCodePtr beam_bif_export_trap;
ErtsCodePtr beam_continue_exit;
ErtsCodePtr beam_save_calls;
ErtsCodePtr beam_unloaded_fun;

/* NOTE These should be the only variables containing trace instructions.
**      Sometimes tests are for the instruction value, and sometimes
**      for the variable reference (one of these), and rogue references
**      will most likely cause chaos.
*/
ErtsCodePtr beam_return_to_trace;   /* OpCode(i_return_to_trace) */
ErtsCodePtr beam_return_trace;      /* OpCode(i_return_trace) */
ErtsCodePtr beam_exception_trace;   /* UGLY also OpCode(i_return_trace) */
ErtsCodePtr beam_return_time_trace; /* OpCode(i_return_time_trace) */

static JitAllocator *jit_allocator;

static BeamGlobalAssembler *bga;
static BeamModuleAssembler *bma;
static CpuInfo cpuinfo;

/*
 * Enter all BIFs into the export table.
 *
 * Note that they will all call the error_handler until their modules have been
 * loaded, which may prevent the system from booting if BIFs from non-preloaded
 * modules are apply/3'd while loading code. Ordinary BIF calls will work fine
 * however since they won't go through export entries.
 */
static void install_bifs(void) {
    typedef Eterm (*bif_func_type)(Process *, Eterm *, ErtsCodePtr);
    int i;

    ASSERT(beam_export_trampoline != NULL);
    ASSERT(beam_save_calls != NULL);

    for (i = 0; i < BIF_SIZE; i++) {
        BifEntry *entry;
        Export *ep;
        int j;

        entry = &bif_table[i];

        ep = erts_export_put(entry->module, entry->name, entry->arity);

        sys_memset(&ep->info.u, 0, sizeof(ep->info.u));
        ep->info.mfa.module = entry->module;
        ep->info.mfa.function = entry->name;
        ep->info.mfa.arity = entry->arity;
        ep->bif_number = i;

        for (j = 0; j < ERTS_NUM_CODE_IX; j++) {
            erts_activate_export_trampoline(ep, j);
        }

        /* Set up a hidden export entry so we can trap to this BIF without
         * it being seen when tracing. */
        erts_init_trap_export(BIF_TRAP_EXPORT(i),
                              entry->module,
                              entry->name,
                              entry->arity,
                              (bif_func_type)entry->f);
    }
}

static JitAllocator *create_allocator(JitAllocator::CreateParams *params) {
    void *test_ro, *test_rw;
    Error err;

    auto *allocator = new JitAllocator(params);

    err = allocator->alloc(&test_ro, &test_rw, 1);
    allocator->release(test_ro);

    if (err == ErrorCode::kErrorOk) {
        return allocator;
    }

    delete allocator;
    return nullptr;
}

static JitAllocator *pick_allocator() {
    JitAllocator::CreateParams single_params;
    single_params.reset();

#if defined(HAVE_LINUX_PERF_SUPPORT)
    /* `perf` has a hard time showing symbols for dual-mapped memory, so we'll
     * use single-mapped memory when enabled. */
    if (erts_jit_perf_support & BEAMASM_PERF_ENABLED) {
        if (auto *alloc = create_allocator(&single_params)) {
            return alloc;
        }

        ERTS_INTERNAL_ERROR("jit: Failed to allocate executable+writable "
                            "memory. Either allow this or disable the "
                            "'+JPperf' option.");
    }
#endif

#if !defined(VALGRIND)
    /* Default to dual-mapped memory with separate executable and writable
     * regions of the same code. This is required for platforms that enforce
     * W^X, and we prefer it when available to catch errors sooner.
     *
     * `blockSize` is analogous to "carrier size," and we pick something
     * much larger than the default since dual-mapping implies having one
     * file descriptor per block on most platforms. The block sizes do grow
     * over time, but we don't want to waste half a dozen fds just to get to
     * the shell on platforms that are very fd-constrained. */
    JitAllocator::CreateParams dual_params;

    dual_params.reset();
    dual_params.options = JitAllocatorOptions::kUseDualMapping,
    dual_params.blockSize = 4 << 20;

    if (auto *alloc = create_allocator(&dual_params)) {
        return alloc;
    } else if (auto *alloc = create_allocator(&single_params)) {
        return alloc;
    }

    ERTS_INTERNAL_ERROR("jit: Cannot allocate executable memory. Use the "
                        "interpreter instead.");
#elif defined(VALGRIND)
    if (auto *alloc = create_allocator(&single_params)) {
        return alloc;
    }

    ERTS_INTERNAL_ERROR("jit: the valgrind emulator requires the ability to "
                        "allocate executable+writable memory.");
#endif
}

void beamasm_init() {
    unsigned label = 1;

    ASSERT(bga == nullptr && bma == nullptr);

    struct operands {
        Eterm name;
        int arity;
        BeamInstr operand;
        ErtsCodePtr *target;
    };

    std::vector<struct operands> operands = {
            {am_run_process, 3, op_i_apply_only, &beam_run_process},
            {am_normal_exit, 0, op_normal_exit, &beam_normal_exit},
            {am_continue_exit, 0, op_continue_exit, &beam_continue_exit},
            {am_exception_trace, 0, op_return_trace, &beam_exception_trace},
            {am_return_trace, 0, op_return_trace, &beam_return_trace},
            {am_return_to_trace,
             0,
             op_i_return_to_trace,
             &beam_return_to_trace},
            {am_return_time_trace,
             0,
             op_i_return_time_trace,
             &beam_return_time_trace}};

    Eterm mod_name;
    ERTS_DECL_AM(erts_beamasm);
    mod_name = AM_erts_beamasm;

    /* erts_frame_layout is hardcoded to ERTS_FRAME_LAYOUT_RA when Erlang
     * frame pointers are disabled or unsupported. */
#if defined(ERLANG_FRAME_POINTERS)
#    ifdef HAVE_LINUX_PERF_SUPPORT
    if (erts_jit_perf_support & BEAMASM_PERF_FP) {
        erts_frame_layout = ERTS_FRAME_LAYOUT_FP_RA;
    } else {
        erts_frame_layout = ERTS_FRAME_LAYOUT_RA;
    }
#    else
    erts_frame_layout = ERTS_FRAME_LAYOUT_RA;
#    endif
#else
    ERTS_CT_ASSERT(erts_frame_layout == ERTS_FRAME_LAYOUT_RA);
#endif

    beamasm_metadata_early_init();

    /*
     * Ensure that commonly used fields in the PCB can be accessed with
     * short instructions. Before removing any of these assertions, please
     * consider the effect it will have on code size and/or performance.
     */
    ERTS_CT_ASSERT(offsetof(Process, htop) < 128);
    ERTS_CT_ASSERT(offsetof(Process, stop) < 128);
    ERTS_CT_ASSERT(offsetof(Process, fcalls) < 128);
    ERTS_CT_ASSERT(offsetof(Process, freason) < 128);
    ERTS_CT_ASSERT(offsetof(Process, fvalue) < 128);

#ifdef ERLANG_FRAME_POINTERS
    ERTS_CT_ASSERT(offsetof(Process, frame_pointer) < 128);
#endif

    cpuinfo = CpuInfo::host();

    jit_allocator = pick_allocator();

    bga = new BeamGlobalAssembler(jit_allocator);

    bma = new BeamModuleAssembler(bga,
                                  mod_name,
                                  1 + operands.size() * 2,
                                  operands.size());

    std::vector<ArgVal> args;

    for (auto &op : operands) {
        unsigned func_label, entry_label;

        func_label = label++;
        entry_label = label++;

        args = {ArgVal(ArgVal::Label, func_label),
                ArgVal(ArgVal::Word, sizeof(UWord))};
        bma->emit(op_aligned_label_Lt, args);

        args = {ArgVal(ArgVal::Word, func_label),
                ArgVal(ArgVal::Immediate, mod_name),
                ArgVal(ArgVal::Immediate, op.name),
                ArgVal(ArgVal::Word, op.arity)};
        bma->emit(op_i_func_info_IaaI, args);

        args = {ArgVal(ArgVal::Label, entry_label),
                ArgVal(ArgVal::Word, sizeof(UWord))};
        bma->emit(op_aligned_label_Lt, args);

        args = {};
        bma->emit(op.operand, args);

        op.operand = entry_label;
    }

    args = {};
    bma->emit(op_int_code_end, args);

    {
        /* We have no need of the module pointers as we use `getCode(...)`
         * for everything, and the code will live as long as the emulator
         * itself. */
        const void *_ignored_exec;
        void *_ignored_rw;

        /* Register our global code with gdb/perf so it shows up nicely in
         * stack traces. */
        BeamCodeHeader *_ignored_code_hdr_rw = NULL;
        const BeamCodeHeader *code_hdr_ro = NULL;
        BeamCodeHeader load_header = {};

        load_header.num_functions = operands.size();

        bma->codegen(jit_allocator,
                     &_ignored_exec,
                     &_ignored_rw,
                     &load_header,
                     &code_hdr_ro,
                     &_ignored_code_hdr_rw);
        bma->register_metadata(code_hdr_ro);
    }

    for (auto op : operands) {
        if (op.target) {
            *op.target = bma->getCode(op.operand);
        }
    }

    /* These instructions rely on register contents, and can only be reached
     * from a `call_ext_*`-instruction or trapping from the emulator, hence the
     * lack of wrapper functions. */
    beam_save_calls = (ErtsCodePtr)bga->get_dispatch_save_calls();
    beam_export_trampoline = (ErtsCodePtr)bga->get_export_trampoline();

    /* Used when trappping to Erlang code from the emulator, setting up
     * registers in the same way as call_ext so that save_calls and tracing
     * works when trapping. */
    beam_bif_export_trap = (ErtsCodePtr)bga->get_bif_export_trap();

    beam_exit = (ErtsCodePtr)bga->get_process_exit();

    beam_unloaded_fun = (ErtsCodePtr)bga->get_unloaded_fun();

    beamasm_metadata_late_init();
}

bool BeamAssembler::hasCpuFeature(uint32_t featureId) {
    return cpuinfo.hasFeature(featureId);
}

void init_emulator(void) {
    install_bifs();
}

void process_main(ErtsSchedulerData *esdp) {
    typedef void (*pmain_type)(ErtsSchedulerData *);

    pmain_type pmain = (pmain_type)bga->get_process_main();
    pmain(esdp);
}

extern "C"
{
    int erts_beam_jump_table(void) {
#if defined(NO_JUMP_TABLE)
        return 0;
#else
        return 1;
#endif
    }

    void beamasm_flush_icache(const void *address, size_t size) {
#ifdef DEBUG
        erts_debug_require_code_barrier();
#endif

#if defined(__aarch64__) && defined(WIN32)
        /* Issues full memory/instruction barriers on all threads for us. */
        FlushInstructionCache(GetCurrentProcess(), address, size);
#elif defined(__aarch64__) && defined(__APPLE__)
        /* Issues full memory/instruction barriers on all threads for us. */
        sys_icache_invalidate((char *)address, size);
#elif defined(__aarch64__) && defined(__GNUC__) &&                             \
        defined(ETHR_HAVE_GCC_ASM_ARM_IC_IVAU_INSTRUCTION) &&                  \
        defined(ETHR_HAVE_GCC_ASM_ARM_DC_CVAU_INSTRUCTION) &&                  \
        defined(ERTS_THR_INSTRUCTION_BARRIER)
        /* Note that we do not issue any barriers here, whether instruction or
         * memory. This is on purpose as we must issue those on all schedulers
         * and not just the calling thread, and the chances of us forgetting to
         * do that is much higher if we issue them here. */
        UWord start = reinterpret_cast<UWord>(address);
        UWord end = start + size;

        ETHR_COMPILER_BARRIER;

        for (size_t i = start & ~ERTS_CACHE_LINE_MASK; i < end;
             i += ERTS_CACHE_LINE_SIZE) {
            __asm__ __volatile__("dc cvau, %0\n"
                                 "ic ivau, %0\n" ::"r"(i)
                                 :);
        }
#elif (defined(__x86_64__) || defined(_M_X64)) &&                              \
        defined(ERTS_THR_INSTRUCTION_BARRIER)
        /* We don't need to invalidate cache on this platform, but since we
         * might be modifying code with a different linear address than the one
         * we execute from (dual-mapped memory), we still need to issue an
         * instruction barrier on all schedulers to ensure that the change is
         * visible. */
        (void)address;
        (void)size;
#else
#    error "Platform lacks implementation for clearing instruction cache." \
                "Please report this bug."
#endif
    }

    void *beamasm_new_assembler(Eterm mod,
                                int num_labels,
                                int num_functions,
                                BeamFile *file) {
        return new BeamModuleAssembler(bga,
                                       mod,
                                       num_labels,
                                       num_functions,
                                       file);
    }

    int beamasm_emit(void *instance, unsigned specific_op, BeamOp *op) {
        /* The argument array must be safely convertible from `BeamOpArg*` to
         * `ArgVal*` for us to reuse it directly.
         *
         * The exact traits we need weren't introduced until C++20, but the
         * assertions below will catch just about everything that would break
         * this conversion. */
        static_assert(std::is_base_of<BeamOpArg, ArgVal>::value);
        static_assert(std::is_standard_layout<ArgVal>::value);

        BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
        const Span<ArgVal> args(static_cast<ArgVal *>(op->a), op->arity);
        return ba->emit(specific_op, args);
    }

    void beamasm_emit_call_nif(const ErtsCodeInfo *info,
                               void *normal_fptr,
                               void *lib,
                               void *dirty_fptr,
                               char *buff,
                               unsigned buff_len) {
        BeamModuleAssembler ba(bga, info->mfa.module, 3);
        std::vector<ArgVal> args;

        args = {ArgVal(ArgVal::Label, 1), ArgVal(ArgVal::Word, sizeof(UWord))};
        ba.emit(op_aligned_label_Lt, args);

        args = {ArgVal(ArgVal::Word, 1),
                ArgVal(ArgVal::Immediate, info->mfa.module),
                ArgVal(ArgVal::Immediate, info->mfa.function),
                ArgVal(ArgVal::Word, info->mfa.arity)};
        ba.emit(op_i_func_info_IaaI, args);

        args = {ArgVal(ArgVal::Label, 2), ArgVal(ArgVal::Word, sizeof(UWord))};
        ba.emit(op_aligned_label_Lt, args);

        args = {};
        ba.emit(op_i_breakpoint_trampoline, args);

        args = {ArgVal(ArgVal::Word, (BeamInstr)normal_fptr),
                ArgVal(ArgVal::Word, (BeamInstr)lib),
                ArgVal(ArgVal::Word, (BeamInstr)dirty_fptr)};
        ba.emit(op_call_nif_WWW, args);

        ba.codegen(buff, buff_len);
    }

    void beamasm_delete_assembler(void *instance) {
        BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
        delete ba;
    }

    void beamasm_purge_module(const void *native_module_exec,
                              void *native_module_rw) {
        jit_allocator->release(const_cast<void *>(native_module_exec));
    }

    ErtsCodePtr beamasm_get_code(void *instance, int label) {
        BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
        return reinterpret_cast<ErtsCodePtr>(ba->getCode(label));
    }

    ErtsCodePtr beamasm_get_lambda(void *instance, int index) {
        BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
        return reinterpret_cast<ErtsCodePtr>(ba->getLambda(index));
    }

    const byte *beamasm_get_rodata(void *instance, char *label) {
        BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
        return reinterpret_cast<const byte *>(ba->getCode(label));
    }

    void beamasm_embed_rodata(void *instance,
                              const char *labelName,
                              const char *buff,
                              size_t size) {
        BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
        if (size) {
            ba->embed_rodata(labelName, buff, size);
        }
    }

    void beamasm_embed_bss(void *instance, char *labelName, size_t size) {
        BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
        if (size) {
            ba->embed_bss(labelName, size);
        }
    }

    void beamasm_codegen(void *instance,
                         const void **native_module_exec,
                         void **native_module_rw,
                         const BeamCodeHeader *in_hdr,
                         const BeamCodeHeader **out_exec_hdr,
                         BeamCodeHeader **out_rw_hdr) {
        BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);

        ba->codegen(jit_allocator,
                    native_module_exec,
                    native_module_rw,
                    in_hdr,
                    out_exec_hdr,
                    out_rw_hdr);
    }

    void beamasm_register_metadata(void *instance, const BeamCodeHeader *hdr) {
        BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
        ba->register_metadata(hdr);
    }

    Uint beamasm_get_header(void *instance, const BeamCodeHeader **hdr) {
        BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);

        *hdr = ba->getCodeHeader();

        return ba->getCodeSize();
    }

    char *beamasm_get_base(void *instance) {
        BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
        return (char *)ba->getBaseAddress();
    }

    size_t beamasm_get_offset(void *instance) {
        BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
        return ba->getOffset();
    }

    const ErtsCodeInfo *beamasm_get_on_load(void *instance) {
        BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
        return ba->getOnLoad();
    }

    unsigned int beamasm_patch_catches(void *instance, char *rw_base) {
        BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
        return ba->patchCatches(rw_base);
    }

    void beamasm_patch_import(void *instance,
                              char *rw_base,
                              int index,
                              BeamInstr import) {
        BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
        ba->patchImport(rw_base, index, import);
    }

    void beamasm_patch_literal(void *instance,
                               char *rw_base,
                               int index,
                               Eterm lit) {
        BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
        ba->patchLiteral(rw_base, index, lit);
    }

    void beamasm_patch_lambda(void *instance,
                              char *rw_base,
                              int index,
                              BeamInstr fe) {
        BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
        ba->patchLambda(rw_base, index, fe);
    }

    void beamasm_patch_strings(void *instance,
                               char *rw_base,
                               const byte *string_table) {
        BeamModuleAssembler *ba = static_cast<BeamModuleAssembler *>(instance);
        ba->patchStrings(rw_base, string_table);
    }
}