summaryrefslogtreecommitdiff
path: root/opcodes/riscv-dis.c
diff options
context:
space:
mode:
authorNelson Chu <nelson.chu@sifive.com>2021-07-13 03:09:38 -0700
committerNelson Chu <nelson.chu@sifive.com>2021-08-30 17:36:11 +0800
commit9b9b1092f0a8e6b7d240ea05a74968a883b8a05c (patch)
treedc5a9df30faf6439ec510b2462fc98e04beb1e3a /opcodes/riscv-dis.c
parent590d3faada8a12bf0937bbf68413956dc6a339a9 (diff)
downloadbinutils-gdb-9b9b1092f0a8e6b7d240ea05a74968a883b8a05c.tar.gz
RISC-V: PR27916, Support mapping symbols.
Similar to ARM/AARCH64, we add mapping symbols in the symbol table, to mark the start addresses of data and instructions. The $d means data, and the $x means instruction. Then the disassembler uses these symbols to decide whether we should dump data or instruction. Consider the mapping-04 test case, $ cat tmp.s .text .option norelax .option norvc .fill 2, 4, 0x1001 .byte 1 .word 0 .balign 8 add a0, a0, a0 .fill 5, 2, 0x2002 add a1, a1, a1 .data .word 0x1 # No need to add mapping symbols. .word 0x2 $ riscv64-unknown-elf-as tmp.s -o tmp.o $ riscv64-unknown-elf-objdump -d tmp.o Disassembly of section .text: 0000000000000000 <.text>: 0: 00001001 .word 0x00001001 # Marked $d, .fill directive. 4: 00001001 .word 0x00001001 8: 00000001 .word 0x00000001 # .byte + part of .word. c: 00 .byte 0x00 # remaining .word. d: 00 .byte 0x00 # Marked $d, odd byte of alignment. e: 0001 nop # Marked $x, nops for alignment. 10: 00a50533 add a0,a0,a0 14: 20022002 .word 0x20022002 # Marked $d, .fill directive. 18: 20022002 .word 0x20022002 1c: 2002 .short 0x2002 1e: 00b585b3 add a1,a1,a1 # Marked $x. 22: 0001 nop # Section tail alignment. 24: 00000013 nop * Use $d and $x to mark the distribution of data and instructions. Alignments of code are recognized as instructions, since we usually fill nops for them. * If the alignment have odd bytes, then we cannot just fill the nops into the spaces. We always fill an odd byte 0x00 at the start of the spaces. Therefore, add a $d mapping symbol for the odd byte, to tell disassembler that it isn't an instruction. The behavior is same as Arm and Aarch64. The elf/linux toolchain regressions all passed. Besides, I also disable the mapping symbols internally, but use the new objudmp, the regressions passed, too. Therefore, the new objudmp should dump the objects corretly, even if they don't have any mapping symbols. bfd/ pr 27916 * cpu-riscv.c (riscv_elf_is_mapping_symbols): Define mapping symbols. * cpu-riscv.h: extern riscv_elf_is_mapping_symbols. * elfnn-riscv.c (riscv_maybe_function_sym): Do not choose mapping symbols as a function name. (riscv_elf_is_target_special_symbol): Add mapping symbols. binutils/ pr 27916 * testsuite/binutils-all/readelf.s: Updated. * testsuite/binutils-all/readelf.s-64: Likewise. * testsuite/binutils-all/readelf.s-64-unused: Likewise. * testsuite/binutils-all/readelf.ss: Likewise. * testsuite/binutils-all/readelf.ss-64: Likewise. * testsuite/binutils-all/readelf.ss-64-unused: Likewise. gas/ pr 27916 * config/tc-riscv.c (make_mapping_symbol): Create a new mapping symbol. (riscv_mapping_state): Decide whether to create mapping symbol for frag_now. Only add the mapping symbols to text sections. (riscv_add_odd_padding_symbol): Add the mapping symbols for the riscv_handle_align, which have odd bytes spaces. (riscv_check_mapping_symbols): Remove any excess mapping symbols. (md_assemble): Marked as MAP_INSN. (riscv_frag_align_code): Marked as MAP_INSN. (riscv_init_frag): Add mapping symbols for frag, it usually called by frag_var. Marked as MAP_DATA for rs_align and rs_fill, and marked as MAP_INSN for rs_align_code. (s_riscv_insn): Marked as MAP_INSN. (riscv_adjust_symtab): Call riscv_check_mapping_symbols. * config/tc-riscv.h (md_cons_align): Defined to riscv_mapping_state with MAP_DATA. (TC_SEGMENT_INFO_TYPE): Record mapping state for each segment. (TC_FRAG_TYPE): Record the first and last mapping symbols for the fragments. The first mapping symbol must be placed at the start of the fragment. (TC_FRAG_INIT): Defined to riscv_init_frag. * testsuite/gas/riscv/mapping-01.s: New testcase. * testsuite/gas/riscv/mapping-01a.d: Likewise. * testsuite/gas/riscv/mapping-01b.d: Likewise. * testsuite/gas/riscv/mapping-02.s: Likewise. * testsuite/gas/riscv/mapping-02a.d: Likewise. * testsuite/gas/riscv/mapping-02b.d: Likewise. * testsuite/gas/riscv/mapping-03.s: Likewise. * testsuite/gas/riscv/mapping-03a.d: Likewise. * testsuite/gas/riscv/mapping-03b.d: Likewise. * testsuite/gas/riscv/mapping-04.s: Likewise. * testsuite/gas/riscv/mapping-04a.d: Likewise. * testsuite/gas/riscv/mapping-04b.d: Likewise. * testsuite/gas/riscv/mapping-norelax-04a.d: Likewise. * testsuite/gas/riscv/mapping-norelax-04b.d: Likewise. * testsuite/gas/riscv/no-relax-align.d: Updated. * testsuite/gas/riscv/no-relax-align-2.d: Likewise. include/ pr 27916 * opcode/riscv.h (enum riscv_seg_mstate): Added. opcodes/ pr 27916 * riscv-dis.c (last_map_symbol, last_stop_offset, last_map_state): Added to dump sections with mapping symbols. (riscv_get_map_state): Get the mapping state from the symbol. (riscv_search_mapping_symbol): Check the sorted symbol table, and then find the suitable mapping symbol. (riscv_data_length): Decide which data size we should print. (riscv_disassemble_data): Dump the data contents. (print_insn_riscv): Handle the mapping symbols. (riscv_symbol_is_valid): Marked mapping symbols as invalid.
Diffstat (limited to 'opcodes/riscv-dis.c')
-rw-r--r--opcodes/riscv-dis.c245
1 files changed, 233 insertions, 12 deletions
diff --git a/opcodes/riscv-dis.c b/opcodes/riscv-dis.c
index fe8dfb88d90..30b42ef3aec 100644
--- a/opcodes/riscv-dis.c
+++ b/opcodes/riscv-dis.c
@@ -41,6 +41,11 @@ struct riscv_private_data
bfd_vma hi_addr[OP_MASK_RD + 1];
};
+/* Used for mapping symbols. */
+static int last_map_symbol = -1;
+static bfd_vma last_stop_offset = 0;
+enum riscv_seg_mstate last_map_state;
+
static const char * const *riscv_gpr_names;
static const char * const *riscv_fpr_names;
@@ -556,13 +561,209 @@ riscv_disassemble_insn (bfd_vma memaddr, insn_t word, disassemble_info *info)
return insnlen;
}
+/* Return true if we find the suitable mapping symbol,
+ and also update the STATE. Otherwise, return false. */
+
+static bool
+riscv_get_map_state (int n,
+ enum riscv_seg_mstate *state,
+ struct disassemble_info *info)
+{
+ const char *name;
+
+ /* If the symbol is in a different section, ignore it. */
+ if (info->section != NULL
+ && info->section != info->symtab[n]->section)
+ return false;
+
+ name = bfd_asymbol_name(info->symtab[n]);
+ if (strcmp (name, "$x") == 0)
+ *state = MAP_INSN;
+ else if (strcmp (name, "$d") == 0)
+ *state = MAP_DATA;
+ else
+ return false;
+
+ return true;
+}
+
+/* Check the sorted symbol table (sorted by the symbol value), find the
+ suitable mapping symbols. */
+
+static enum riscv_seg_mstate
+riscv_search_mapping_symbol (bfd_vma memaddr,
+ struct disassemble_info *info)
+{
+ enum riscv_seg_mstate mstate;
+ bool from_last_map_symbol;
+ bool found = false;
+ int symbol = -1;
+ int n;
+
+ /* Decide whether to print the data or instruction by default, in case
+ we can not find the corresponding mapping symbols. */
+ mstate = MAP_DATA;
+ if ((info->section
+ && info->section->flags & SEC_CODE)
+ || !info->section)
+ mstate = MAP_INSN;
+
+ if (info->symtab_size == 0
+ || bfd_asymbol_flavour (*info->symtab) != bfd_target_elf_flavour)
+ return mstate;
+
+ /* Reset the last_map_symbol if we start to dump a new section. */
+ if (memaddr <= 0)
+ last_map_symbol = -1;
+
+ /* If the last stop offset is different from the current one, then
+ don't use the last_map_symbol to search. We usually reset the
+ info->stop_offset when handling a new section. */
+ from_last_map_symbol = (last_map_symbol >= 0
+ && info->stop_offset == last_stop_offset);
+
+ /* Start scanning at the start of the function, or wherever
+ we finished last time. */
+ n = info->symtab_pos + 1;
+ if (from_last_map_symbol && n >= last_map_symbol)
+ n = last_map_symbol;
+
+ /* Find the suitable mapping symbol to dump. */
+ for (; n < info->symtab_size; n++)
+ {
+ bfd_vma addr = bfd_asymbol_value (info->symtab[n]);
+ /* We have searched all possible symbols in the range. */
+ if (addr > memaddr)
+ break;
+ if (riscv_get_map_state (n, &mstate, info))
+ {
+ symbol = n;
+ found = true;
+ /* Do not stop searching, in case there are some mapping
+ symbols have the same value, but have different names.
+ Use the last one. */
+ }
+ }
+
+ /* We can not find the suitable mapping symbol above. Therefore, we
+ look forwards and try to find it again, but don't go pass the start
+ of the section. Otherwise a data section without mapping symbols
+ can pick up a text mapping symbol of a preceeding section. */
+ if (!found)
+ {
+ n = info->symtab_pos;
+ if (from_last_map_symbol && n >= last_map_symbol)
+ n = last_map_symbol;
+
+ for (; n >= 0; n--)
+ {
+ bfd_vma addr = bfd_asymbol_value (info->symtab[n]);
+ /* We have searched all possible symbols in the range. */
+ if (addr < (info->section ? info->section->vma : 0))
+ break;
+ /* Stop searching once we find the closed mapping symbol. */
+ if (riscv_get_map_state (n, &mstate, info))
+ {
+ symbol = n;
+ found = true;
+ break;
+ }
+ }
+ }
+
+ /* Save the information for next use. */
+ last_map_symbol = symbol;
+ last_stop_offset = info->stop_offset;
+
+ return mstate;
+}
+
+/* Decide which data size we should print. */
+
+static bfd_vma
+riscv_data_length (bfd_vma memaddr,
+ disassemble_info *info)
+{
+ bfd_vma length;
+ bool found = false;
+
+ length = 4;
+ if (info->symtab_size != 0
+ && bfd_asymbol_flavour (*info->symtab) == bfd_target_elf_flavour
+ && last_map_symbol >= 0)
+ {
+ int n;
+ enum riscv_seg_mstate m = MAP_NONE;
+ for (n = last_map_symbol + 1; n < info->symtab_size; n++)
+ {
+ bfd_vma addr = bfd_asymbol_value (info->symtab[n]);
+ if (addr > memaddr
+ && riscv_get_map_state (n, &m, info))
+ {
+ if (addr - memaddr < length)
+ length = addr - memaddr;
+ found = true;
+ break;
+ }
+ }
+ }
+ if (!found)
+ {
+ /* Do not set the length which exceeds the section size. */
+ bfd_vma offset = info->section->vma + info->section->size;
+ offset -= memaddr;
+ length = (offset < length) ? offset : length;
+ }
+ length = length == 3 ? 2 : length;
+ return length;
+}
+
+/* Dump the data contents. */
+
+static int
+riscv_disassemble_data (bfd_vma memaddr ATTRIBUTE_UNUSED,
+ insn_t data,
+ disassemble_info *info)
+{
+ info->display_endian = info->endian;
+
+ switch (info->bytes_per_chunk)
+ {
+ case 1:
+ info->bytes_per_line = 6;
+ (*info->fprintf_func) (info->stream, ".byte\t0x%02llx",
+ (unsigned long long) data);
+ break;
+ case 2:
+ info->bytes_per_line = 8;
+ (*info->fprintf_func) (info->stream, ".short\t0x%04llx",
+ (unsigned long long) data);
+ break;
+ case 4:
+ info->bytes_per_line = 8;
+ (*info->fprintf_func) (info->stream, ".word\t0x%08llx",
+ (unsigned long long) data);
+ break;
+ case 8:
+ info->bytes_per_line = 8;
+ (*info->fprintf_func) (info->stream, ".dword\t0x%016llx",
+ (unsigned long long) data);
+ break;
+ default:
+ abort ();
+ }
+ return info->bytes_per_chunk;
+}
+
int
print_insn_riscv (bfd_vma memaddr, struct disassemble_info *info)
{
- bfd_byte packet[2];
+ bfd_byte packet[8];
insn_t insn = 0;
- bfd_vma n;
+ bfd_vma dump_size;
int status;
+ enum riscv_seg_mstate mstate;
+ int (*riscv_disassembler) (bfd_vma, insn_t, struct disassemble_info *);
if (info->disassembler_options != NULL)
{
@@ -573,23 +774,42 @@ print_insn_riscv (bfd_vma memaddr, struct disassemble_info *info)
else if (riscv_gpr_names == NULL)
set_default_riscv_dis_options ();
- /* Instructions are a sequence of 2-byte packets in little-endian order. */
- for (n = 0; n < sizeof (insn) && n < riscv_insn_length (insn); n += 2)
+ mstate = riscv_search_mapping_symbol (memaddr, info);
+ /* Save the last mapping state. */
+ last_map_state = mstate;
+
+ /* Set the size to dump. */
+ if (mstate == MAP_DATA
+ && (info->flags & DISASSEMBLE_DATA) == 0)
+ {
+ dump_size = riscv_data_length (memaddr, info);
+ info->bytes_per_chunk = dump_size;
+ riscv_disassembler = riscv_disassemble_data;
+ }
+ else
{
- status = (*info->read_memory_func) (memaddr + n, packet, 2, info);
+ /* Get the first 2-bytes to check the lenghth of instruction. */
+ status = (*info->read_memory_func) (memaddr, packet, 2, info);
if (status != 0)
{
- /* Don't fail just because we fell off the end. */
- if (n > 0)
- break;
(*info->memory_error_func) (status, memaddr, info);
- return status;
+ return 1;
}
+ insn = (insn_t) bfd_getl16 (packet);
+ dump_size = riscv_insn_length (insn);
+ riscv_disassembler = riscv_disassemble_insn;
+ }
- insn |= ((insn_t) bfd_getl16 (packet)) << (8 * n);
+ /* Fetch the instruction to dump. */
+ status = (*info->read_memory_func) (memaddr, packet, dump_size, info);
+ if (status != 0)
+ {
+ (*info->memory_error_func) (status, memaddr, info);
+ return 1;
}
+ insn = (insn_t) bfd_get_bits (packet, dump_size * 8, false);
- return riscv_disassemble_insn (memaddr, insn, info);
+ return (*riscv_disassembler) (memaddr, insn, info);
}
disassembler_ftype
@@ -631,7 +851,8 @@ riscv_symbol_is_valid (asymbol * sym,
name = bfd_asymbol_name (sym);
- return (strcmp (name, RISCV_FAKE_LABEL_NAME) != 0);
+ return (strcmp (name, RISCV_FAKE_LABEL_NAME) != 0
+ && !riscv_elf_is_mapping_symbols (name));
}
void