diff options
Diffstat (limited to 'kexec/arch/i386/crashdump-x86.c')
-rw-r--r-- | kexec/arch/i386/crashdump-x86.c | 337 |
1 files changed, 154 insertions, 183 deletions
diff --git a/kexec/arch/i386/crashdump-x86.c b/kexec/arch/i386/crashdump-x86.c index e44fceb..77bdad5 100644 --- a/kexec/arch/i386/crashdump-x86.c +++ b/kexec/arch/i386/crashdump-x86.c @@ -34,6 +34,7 @@ #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> +#include <dirent.h> #include "../../kexec.h" #include "../../kexec-elf.h" #include "../../kexec-syscall.h" @@ -43,16 +44,11 @@ #include "crashdump-x86.h" #ifdef HAVE_LIBXENCTRL -#ifdef HAVE_XC_GET_MACHINE_MEMORY_MAP #include <xenctrl.h> -#else -#define __XEN_TOOLS__ 1 -#include <xen/xen.h> -#include <xen/memory.h> -#include <xen/sys/privcmd.h> -#endif /* HAVE_XC_GET_MACHINE_MEMORY_MAP */ #endif /* HAVE_LIBXENCTRL */ +#include "x86-linux-setup.h" + #include <x86/x86-linux.h> extern struct arch_options_t arch_options; @@ -105,6 +101,36 @@ static int get_kernel_paddr(struct kexec_info *UNUSED(info), return -1; } +/* Retrieve kernel _stext symbol virtual address from /proc/kallsyms */ +static unsigned long long get_kernel_stext_sym(void) +{ + const char *kallsyms = "/proc/kallsyms"; + const char *stext = "_stext"; + char sym[128]; + char line[128]; + FILE *fp; + unsigned long long vaddr; + char type; + + fp = fopen(kallsyms, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s\n", kallsyms); + return 0; + } + + while(fgets(line, sizeof(line), fp) != NULL) { + if (sscanf(line, "%Lx %c %s", &vaddr, &type, sym) != 3) + continue; + if (strcmp(sym, stext) == 0) { + dbgprintf("kernel symbol %s vaddr = %16llx\n", stext, vaddr); + return vaddr; + } + } + + fprintf(stderr, "Cannot get kernel %s symbol address\n", stext); + return 0; +} + /* Retrieve info regarding virtual address kernel has been compiled for and * size of the kernel from /proc/kcore. Current /proc/kcore parsing from * from kexec-tools fails because of malformed elf notes. A kernel patch has @@ -121,8 +147,9 @@ static int get_kernel_vaddr_and_size(struct kexec_info *UNUSED(info), struct mem_ehdr ehdr; struct mem_phdr *phdr, *end_phdr; int align; - unsigned long size; + off_t size; uint32_t elf_flags = 0; + uint64_t stext_sym; if (elf_info->machine != EM_X86_64) return 0; @@ -131,8 +158,7 @@ static int get_kernel_vaddr_and_size(struct kexec_info *UNUSED(info), return 0; align = getpagesize(); - size = KCORE_ELF_HEADERS_SIZE; - buf = slurp_file_len(kcore, size); + buf = slurp_file_len(kcore, KCORE_ELF_HEADERS_SIZE, &size); if (!buf) { fprintf(stderr, "Cannot read %s: %s\n", kcore, strerror(errno)); return -1; @@ -151,9 +177,36 @@ static int get_kernel_vaddr_and_size(struct kexec_info *UNUSED(info), return -1; } - /* Traverse through the Elf headers and find the region where - * kernel is mapped. */ end_phdr = &ehdr.e_phdr[ehdr.e_phnum]; + + /* Traverse through the Elf headers and find the region where + * _stext symbol is located in. That's where kernel is mapped */ + stext_sym = get_kernel_stext_sym(); + for(phdr = ehdr.e_phdr; stext_sym && phdr != end_phdr; phdr++) { + if (phdr->p_type == PT_LOAD) { + unsigned long long saddr = phdr->p_vaddr; + unsigned long long eaddr = phdr->p_vaddr + phdr->p_memsz; + unsigned long long size; + + /* Look for kernel text mapping header. */ + if (saddr < stext_sym && eaddr > stext_sym) { + saddr = _ALIGN_DOWN(saddr, X86_64_KERN_VADDR_ALIGN); + elf_info->kern_vaddr_start = saddr; + size = eaddr - saddr; + /* Align size to page size boundary. */ + size = _ALIGN(size, align); + elf_info->kern_size = size; + dbgprintf("kernel vaddr = 0x%llx size = 0x%llx\n", + saddr, size); + return 0; + } + } + } + + /* If failed to retrieve kernel text mapping through + * /proc/kallsyms, Traverse through the Elf headers again and + * find the region where kernel is mapped using hard-coded + * kernel mapping boundries */ for(phdr = ehdr.e_phdr; phdr != end_phdr; phdr++) { if (phdr->p_type == PT_LOAD) { unsigned long long saddr = phdr->p_vaddr; @@ -175,6 +228,7 @@ static int get_kernel_vaddr_and_size(struct kexec_info *UNUSED(info), } } } + fprintf(stderr, "Can't find kernel text map area from kcore\n"); return -1; } @@ -247,6 +301,8 @@ static int get_crash_memory_ranges(struct memory_range **range, int *ranges, type = RANGE_ACPI; } else if(memcmp(str,"ACPI Non-volatile Storage\n",26) == 0 ) { type = RANGE_ACPI_NVS; + } else if(memcmp(str,"reserved\n",9) == 0 ) { + type = RANGE_RESERVED; } else if (memcmp(str, "GART\n", 5) == 0) { gart_start = start; gart_end = end; @@ -300,34 +356,20 @@ static int get_crash_memory_ranges(struct memory_range **range, int *ranges, } #ifdef HAVE_LIBXENCTRL -#ifdef HAVE_XC_GET_MACHINE_MEMORY_MAP static int get_crash_memory_ranges_xen(struct memory_range **range, int *ranges, unsigned long lowmem_limit) { int j, rc, ret = -1; struct e820entry e820entries[CRASH_MAX_MEMORY_RANGES]; unsigned int i; -#ifdef XENCTRL_HAS_XC_INTERFACE xc_interface *xc; -#else - int xc; -#endif -#ifdef XENCTRL_HAS_XC_INTERFACE xc = xc_interface_open(NULL, NULL, 0); if (!xc) { fprintf(stderr, "%s: Failed to open Xen control interface\n", __func__); - goto err; - } -#else - xc = xc_interface_open(); - - if (xc == -1) { - fprintf(stderr, "%s: Failed to open Xen control interface\n", __func__); - goto err; + return -1; } -#endif rc = xc_get_machine_memory_map(xc, e820entries, CRASH_MAX_MEMORY_RANGES); @@ -364,95 +406,6 @@ err: static int get_crash_memory_ranges_xen(struct memory_range **range, int *ranges, unsigned long lowmem_limit) { - int fd, j, rc, ret = -1; - privcmd_hypercall_t hypercall; - struct e820entry *e820entries = NULL; - struct xen_memory_map *xen_memory_map = NULL; - unsigned int i; - - fd = open("/proc/xen/privcmd", O_RDWR); - - if (fd == -1) { - fprintf(stderr, "%s: open(/proc/xen/privcmd): %m\n", __func__); - goto err; - } - - rc = posix_memalign((void **)&e820entries, getpagesize(), - sizeof(struct e820entry) * CRASH_MAX_MEMORY_RANGES); - - if (rc) { - fprintf(stderr, "%s: posix_memalign(e820entries): %s\n", __func__, strerror(rc)); - e820entries = NULL; - goto err; - } - - rc = posix_memalign((void **)&xen_memory_map, getpagesize(), - sizeof(struct xen_memory_map)); - - if (rc) { - fprintf(stderr, "%s: posix_memalign(xen_memory_map): %s\n", __func__, strerror(rc)); - xen_memory_map = NULL; - goto err; - } - - if (mlock(e820entries, sizeof(struct e820entry) * CRASH_MAX_MEMORY_RANGES) == -1) { - fprintf(stderr, "%s: mlock(e820entries): %m\n", __func__); - goto err; - } - - if (mlock(xen_memory_map, sizeof(struct xen_memory_map)) == -1) { - fprintf(stderr, "%s: mlock(xen_memory_map): %m\n", __func__); - goto err; - } - - xen_memory_map->nr_entries = CRASH_MAX_MEMORY_RANGES; - set_xen_guest_handle(xen_memory_map->buffer, e820entries); - - hypercall.op = __HYPERVISOR_memory_op; - hypercall.arg[0] = XENMEM_machine_memory_map; - hypercall.arg[1] = (__u64)xen_memory_map; - - rc = ioctl(fd, IOCTL_PRIVCMD_HYPERCALL, &hypercall); - - if (rc == -1) { - fprintf(stderr, "%s: ioctl(IOCTL_PRIVCMD_HYPERCALL): %m\n", __func__); - goto err; - } - - for (i = 0, j = 0; i < xen_memory_map->nr_entries && - j < CRASH_MAX_MEMORY_RANGES; ++i, ++j) { - crash_memory_range[j].start = e820entries[i].addr; - crash_memory_range[j].end = e820entries[i].addr + e820entries[i].size - 1; - crash_memory_range[j].type = xen_e820_to_kexec_type(e820entries[i].type); - segregate_lowmem_region(&j, lowmem_limit); - } - - *range = crash_memory_range; - *ranges = j; - - qsort(*range, *ranges, sizeof(struct memory_range), compare_ranges); - - for (i = 0; i < crash_reserved_mem_nr; i++) - if (exclude_region(ranges, crash_reserved_mem[i].start, - crash_reserved_mem[i].end) < 0) - goto err; - - ret = 0; - -err: - munlock(xen_memory_map, sizeof(struct xen_memory_map)); - munlock(e820entries, sizeof(struct e820entry) * CRASH_MAX_MEMORY_RANGES); - free(xen_memory_map); - free(e820entries); - close(fd); - - return ret; -} -#endif /* HAVE_XC_GET_MACHINE_MEMORY_MAP */ -#else -static int get_crash_memory_ranges_xen(struct memory_range **range, - int *ranges, unsigned long lowmem_limit) -{ return 0; } #endif /* HAVE_LIBXENCTRL */ @@ -526,14 +479,14 @@ static int exclude_region(int *nr_ranges, uint64_t start, uint64_t end) /* Adds a segment from list of memory regions which new kernel can use to * boot. Segment start and end should be aligned to 1K boundary. */ -static int add_memmap(struct memory_range *memmap_p, unsigned long long addr, - size_t size) +static int add_memmap(struct memory_range *memmap_p, int *nr_memmap, + unsigned long long addr, size_t size, int type) { int i, j, nr_entries = 0, tidx = 0, align = 1024; unsigned long long mstart, mend; - /* Do alignment check. */ - if ((addr%align) || (size%align)) + /* Do alignment check if it's RANGE_RAM */ + if ((type == RANGE_RAM) && ((addr%align) || (size%align))) return -1; /* Make sure at least one entry in list is free. */ @@ -559,29 +512,23 @@ static int add_memmap(struct memory_range *memmap_p, unsigned long long addr, else if (addr > mend) tidx = i+1; } - /* Insert the memory region. */ - for (j = nr_entries-1; j >= tidx; j--) - memmap_p[j+1] = memmap_p[j]; - memmap_p[tidx].start = addr; - memmap_p[tidx].end = addr + size - 1; + /* Insert the memory region. */ + for (j = nr_entries-1; j >= tidx; j--) + memmap_p[j+1] = memmap_p[j]; + memmap_p[tidx].start = addr; + memmap_p[tidx].end = addr + size - 1; + memmap_p[tidx].type = type; + *nr_memmap = nr_entries + 1; - dbgprintf("Memmap after adding segment\n"); - for (i = 0; i < CRASH_MAX_MEMMAP_NR; i++) { - mstart = memmap_p[i].start; - mend = memmap_p[i].end; - if (mstart == 0 && mend == 0) - break; - dbgprintf("%016llx - %016llx\n", - mstart, mend); - } + dbgprint_mem_range("Memmap after adding segment", memmap_p, *nr_memmap); return 0; } /* Removes a segment from list of memory regions which new kernel can use to * boot. Segment start and end should be aligned to 1K boundary. */ -static int delete_memmap(struct memory_range *memmap_p, unsigned long long addr, - size_t size) +static int delete_memmap(struct memory_range *memmap_p, int *nr_memmap, + unsigned long long addr, size_t size) { int i, j, nr_entries = 0, tidx = -1, operation = 0, align = 1024; unsigned long long mstart, mend; @@ -643,24 +590,17 @@ static int delete_memmap(struct memory_range *memmap_p, unsigned long long addr, for (j = nr_entries-1; j > tidx; j--) memmap_p[j+1] = memmap_p[j]; memmap_p[tidx+1] = temp_region; + *nr_memmap = nr_entries + 1; } if ((operation == -1) && tidx >=0) { /* Delete the exact match memory region. */ for (j = i+1; j < CRASH_MAX_MEMMAP_NR; j++) memmap_p[j-1] = memmap_p[j]; memmap_p[j-1].start = memmap_p[j-1].end = 0; + *nr_memmap = nr_entries - 1; } - dbgprintf("Memmap after deleting segment\n"); - for (i = 0; i < CRASH_MAX_MEMMAP_NR; i++) { - mstart = memmap_p[i].start; - mend = memmap_p[i].end; - if (mstart == 0 && mend == 0) { - break; - } - dbgprintf("%016llx - %016llx\n", - mstart, mend); - } + dbgprint_mem_range("Memmap after deleting segment", memmap_p, *nr_memmap); return 0; } @@ -728,18 +668,31 @@ static int cmdline_add_memmap(char *cmdline, struct memory_range *memmap_p) strcat(cmdline, str_mmap); for (i = 0; i < CRASH_MAX_MEMMAP_NR; i++) { - unsigned long startk, endk; - startk = (memmap_p[i].start/1024); - endk = ((memmap_p[i].end + 1)/1024); + unsigned long startk, endk, type; + + startk = memmap_p[i].start/1024; + endk = (memmap_p[i].end + 1)/1024; + type = memmap_p[i].type; + + /* Only adding memory regions of RAM and ACPI */ + if (type != RANGE_RAM && + type != RANGE_ACPI && + type != RANGE_ACPI_NVS) + continue; + + if (type == RANGE_ACPI || type == RANGE_ACPI_NVS) + endk = _ALIGN_UP(memmap_p[i].end + 1, 1024)/1024; + if (!startk && !endk) /* All regions traversed. */ break; - /* A region is not worth adding if region size < 100K. It eats - * up precious command line length. */ - if ((endk - startk) < min_sizek) + /* A RAM region is not worth adding if region size < 100K. + * It eats up precious command line length. */ + if (type == RANGE_RAM && (endk - startk) < min_sizek) continue; - cmdline_add_memmap_internal(cmdline, startk, endk, RANGE_RAM); + /* And do not add e820 reserved region either */ + cmdline_add_memmap_internal(cmdline, startk, endk, type); } dbgprintf("Command line after adding memmap\n"); @@ -828,24 +781,16 @@ static enum coretype get_core_type(struct crash_elf_info *elf_info, } } -/* Appends memmap=X#Y commandline for ACPI to command line*/ -static int cmdline_add_memmap_acpi(char *cmdline, unsigned long start, - unsigned long end) +static int sysfs_efi_runtime_map_exist(void) { - int align = 1024; - unsigned long startk, endk; + DIR *dir; - if (!(end - start)) + dir = opendir("/sys/firmware/efi/runtime-map"); + if (!dir) return 0; - startk = start/1024; - endk = (end + align - 1)/1024; - cmdline_add_memmap_internal(cmdline, startk, endk, RANGE_ACPI); - - dbgprintf("Command line after adding acpi memmap\n"); - dbgprintf("%s\n", cmdline); - - return 0; + closedir(dir); + return 1; } /* Appends 'acpi_rsdp=' commandline for efi boot crash dump */ @@ -915,7 +860,7 @@ int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline, { void *tmp; unsigned long sz, bufsz, memsz, elfcorehdr; - int nr_ranges = 0, align = 1024, i; + int nr_ranges = 0, nr_memmap = 0, align = 1024, i; struct memory_range *mem_range, *memmap_p; struct crash_elf_info elf_info; unsigned kexec_arch; @@ -959,10 +904,7 @@ int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline, get_backup_area(info, mem_range, nr_ranges); - dbgprintf("CRASH MEMORY RANGES\n"); - - for(i = 0; i < nr_ranges; ++i) - dbgprintf("%016Lx-%016Lx\n", mem_range[i].start, mem_range[i].end); + dbgprint_mem_range("CRASH MEMORY RANGES", mem_range, nr_ranges); /* * if the core type has not been set on command line, set it here @@ -991,10 +933,10 @@ int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline, sz = (sizeof(struct memory_range) * CRASH_MAX_MEMMAP_NR); memmap_p = xmalloc(sz); memset(memmap_p, 0, sz); - add_memmap(memmap_p, info->backup_src_start, info->backup_src_size); + add_memmap(memmap_p, &nr_memmap, info->backup_src_start, info->backup_src_size, RANGE_RAM); for (i = 0; i < crash_reserved_mem_nr; i++) { sz = crash_reserved_mem[i].end - crash_reserved_mem[i].start +1; - if (add_memmap(memmap_p, crash_reserved_mem[i].start, sz) < 0) + if (add_memmap(memmap_p, &nr_memmap, crash_reserved_mem[i].start, sz, RANGE_RAM) < 0) return ENOCRASHKERNEL; } @@ -1007,7 +949,7 @@ int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline, 0, max_addr, -1); dbgprintf("Created backup segment at 0x%lx\n", info->backup_start); - if (delete_memmap(memmap_p, info->backup_start, sz) < 0) + if (delete_memmap(memmap_p, &nr_memmap, info->backup_start, sz) < 0) return EFAILED; } @@ -1043,22 +985,35 @@ int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline, elfcorehdr = add_buffer(info, tmp, bufsz, memsz, align, min_base, max_addr, -1); dbgprintf("Created elf header segment at 0x%lx\n", elfcorehdr); - if (delete_memmap(memmap_p, elfcorehdr, memsz) < 0) + if (delete_memmap(memmap_p, &nr_memmap, elfcorehdr, memsz) < 0) return -1; - cmdline_add_memmap(mod_cmdline, memmap_p); - cmdline_add_efi(mod_cmdline); + if (!bzImage_support_efi_boot || arch_options.noefi || + !sysfs_efi_runtime_map_exist()) + cmdline_add_efi(mod_cmdline); cmdline_add_elfcorehdr(mod_cmdline, elfcorehdr); /* Inform second kernel about the presence of ACPI tables. */ for (i = 0; i < CRASH_MAX_MEMORY_RANGES; i++) { - unsigned long start, end; + unsigned long start, end, size, type; if ( !( mem_range[i].type == RANGE_ACPI - || mem_range[i].type == RANGE_ACPI_NVS) ) + || mem_range[i].type == RANGE_ACPI_NVS + || mem_range[i].type == RANGE_RESERVED)) continue; start = mem_range[i].start; end = mem_range[i].end; - cmdline_add_memmap_acpi(mod_cmdline, start, end); + type = mem_range[i].type; + size = end - start + 1; + add_memmap(memmap_p, &nr_memmap, start, size, type); } + + if (arch_options.pass_memmap_cmdline) + cmdline_add_memmap(mod_cmdline, memmap_p); + + /* Store 2nd kernel boot memory ranges for later reference in + * x86-setup-linux.c: setup_linux_system_parameters() */ + info->crash_range = memmap_p; + info->nr_crash_ranges = nr_memmap; + return 0; } @@ -1100,8 +1055,24 @@ static int crashkernel_mem_callback(void *UNUSED(data), int nr, int is_crashkernel_mem_reserved(void) { - crash_reserved_mem_nr = kexec_iomem_for_each_line("Crash kernel\n", - crashkernel_mem_callback, NULL); + int ret; + + if (xen_present()) { + uint64_t start, end; + + ret = xen_get_crashkernel_region(&start, &end); + if (ret < 0) + return 0; + + crash_reserved_mem[0].start = start; + crash_reserved_mem[0].end = end; + crash_reserved_mem[0].type = RANGE_RAM; + crash_reserved_mem_nr = 1; + } else { + ret = kexec_iomem_for_each_line("Crash kernel\n", + crashkernel_mem_callback, NULL); + crash_reserved_mem_nr = ret; + } return !!crash_reserved_mem_nr; } |