diff options
78 files changed, 4296 insertions, 1432 deletions
diff --git a/Makefile.in b/Makefile.in index c1859d1..54c206f 100644 --- a/Makefile.in +++ b/Makefile.in @@ -22,6 +22,7 @@ includedir = @includedir@ # The target architecture ARCH = @ARCH@ +SUBARCH = @SUBARCH@ OBJDIR = @OBJDIR@ target = @target@ host = @host@ diff --git a/config/config.guess b/config/config.guess index d622a44..c954d5e 100755 --- a/config/config.guess +++ b/config/config.guess @@ -4,7 +4,7 @@ # 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, # 2011, 2012 Free Software Foundation, Inc. -timestamp='2012-02-10' +timestamp='2013-10-21' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by @@ -972,6 +972,9 @@ EOF ppc64:Linux:*:*) echo powerpc64-unknown-linux-gnu exit ;; + ppc64le:Linux:*:*) + echo powerpc64le-unknown-linux-gnu + exit ;; ppc:Linux:*:*) echo powerpc-unknown-linux-gnu exit ;; diff --git a/configure.ac b/configure.ac index 7b61dbf..ebacc05 100644 --- a/configure.ac +++ b/configure.ac @@ -4,7 +4,7 @@ dnl dnl dnl ---Required -AC_INIT(kexec-tools, 2.0.4.git) +AC_INIT(kexec-tools, 2.0.8.git) AC_CONFIG_AUX_DIR(./config) AC_CONFIG_HEADERS([include/config.h]) AC_LANG(C) @@ -29,6 +29,14 @@ case $target_cpu in ;; powerpc64 ) ARCH="ppc64" + SUBARCH="BE" + ;; + powerpc64le ) + ARCH="ppc64" + SUBARCH="LE" + ;; + aarch64* ) + ARCH="arm64" ;; arm* ) ARCH="arm" @@ -45,7 +53,7 @@ case $target_cpu in cris|crisv32 ) ARCH="cris" ;; - ia64|x86_64|alpha ) + ia64|x86_64|alpha|m68k ) ARCH="$target_cpu" ;; * ) @@ -161,11 +169,8 @@ fi dnl find Xen control stack libraries if test "$with_xen" = yes ; then AC_CHECK_HEADER(xenctrl.h, - AC_CHECK_LIB(xenctrl, xc_version, , + AC_CHECK_LIB(xenctrl, xc_kexec_load, , AC_MSG_NOTICE([Xen support disabled]))) - if test "$ac_cv_lib_xenctrl_xc_version" = yes ; then - AC_CHECK_FUNCS(xc_get_machine_memory_map) - fi fi dnl ---Sanity checks @@ -197,6 +202,7 @@ AC_SUBST([TARGET_CFLAGS]) AC_SUBST([ASFLAGS]) AC_SUBST([ARCH]) +AC_SUBST([SUBARCH]) AC_SUBST([OBJDIR]) AC_SUBST([INSTALL]) diff --git a/include/elf.h b/include/elf.h index 1a2eb5a..5db637b 100644 --- a/include/elf.h +++ b/include/elf.h @@ -1984,6 +1984,10 @@ typedef Elf32_Addr Elf32_Conflict; #define R_PPC64_DTPREL16_HIGHERA 104 /* half16 (sym+add)@dtprel@highera */ #define R_PPC64_DTPREL16_HIGHEST 105 /* half16 (sym+add)@dtprel@highest */ #define R_PPC64_DTPREL16_HIGHESTA 106 /* half16 (sym+add)@dtprel@highesta */ +#define R_PPC64_REL16 249 /* half16 (sym+add-.) */ +#define R_PPC64_REL16_LO 250 /* half16 (sym+add-.)@l */ +#define R_PPC64_REL16_HI 251 /* half16 (sym+add-.)@h */ +#define R_PPC64_REL16_HA 252 /* half16 (sym+add-.)@ha */ /* Keep this the last entry. */ #define R_PPC64_NUM 107 diff --git a/include/x86/x86-linux.h b/include/x86/x86-linux.h index 0949dc2..50c7324 100644 --- a/include/x86/x86-linux.h +++ b/include/x86/x86-linux.h @@ -12,8 +12,6 @@ #ifndef ASSEMBLY -#define PACKED __attribute__((packed)) - #ifndef E820_RAM struct e820entry { uint64_t addr; /* start of memory segment */ @@ -23,7 +21,7 @@ struct e820entry { #define E820_RESERVED 2 #define E820_ACPI 3 /* usable as RAM once ACPI tables have been read */ #define E820_NVS 4 -} PACKED; +} __attribute__((packed)); #endif /* FIXME expand on drive_info_)struct... */ @@ -115,7 +113,8 @@ struct x86_linux_param_header { uint32_t ext_ramdisk_image; /* 0xc0 */ uint32_t ext_ramdisk_size; /* 0xc4 */ uint32_t ext_cmd_line_ptr; /* 0xc8 */ - uint8_t reserved4_1[0x1e0 - 0xcc]; /* 0xcc */ + uint8_t reserved4_1[0x1c0 - 0xcc]; /* 0xe4 */ + uint8_t efi_info[32]; /* 0x1c0 */ uint32_t alt_mem_k; /* 0x1e0 */ uint8_t reserved5[4]; /* 0x1e4 */ uint8_t e820_map_nr; /* 0x1e8 */ @@ -255,7 +254,7 @@ struct x86_linux_header { uint64_t pref_address; /* 0x258 */ uint32_t init_size; /* 0x260 */ uint32_t handover_offset; /* 0x264 */ -} PACKED; +} __attribute__((packed)); #endif /* ASSEMBLY */ diff --git a/kexec/Makefile b/kexec/Makefile index 8a6138d..fac6680 100644 --- a/kexec/Makefile +++ b/kexec/Makefile @@ -25,10 +25,11 @@ KEXEC_SRCS_base += kexec/phys_arch.c KEXEC_SRCS_base += kexec/kernel_version.c KEXEC_SRCS_base += kexec/lzma.c KEXEC_SRCS_base += kexec/zlib.c +KEXEC_SRCS_base += kexec/kexec-xen.c KEXEC_GENERATED_SRCS += $(PURGATORY_HEX_C) -dist += kexec/Makefile $(KEXEC_GENERATED_SRCS) \ +dist += kexec/Makefile \ $(KEXEC_SRCS_base) kexec/crashdump-elf.c \ kexec/crashdump.h kexec/firmware_memmap.h \ kexec/kexec-elf-boot.h \ @@ -68,10 +69,16 @@ dist += kexec/fs2dt.c kexec/fs2dt.h $(ARCH)_FS2DT = KEXEC_SRCS += $($(ARCH)_FS2DT) +dist += kexec/dt-ops.c kexec/dt-ops.h +$(ARCH)_DT_OPS = +KEXEC_SRCS += $($(ARCH)_DT_OPS) + include $(srcdir)/kexec/arch/alpha/Makefile include $(srcdir)/kexec/arch/arm/Makefile +include $(srcdir)/kexec/arch/arm64/Makefile include $(srcdir)/kexec/arch/i386/Makefile include $(srcdir)/kexec/arch/ia64/Makefile +include $(srcdir)/kexec/arch/m68k/Makefile include $(srcdir)/kexec/arch/mips/Makefile include $(srcdir)/kexec/arch/cris/Makefile include $(srcdir)/kexec/arch/ppc/Makefile diff --git a/kexec/arch/arm/crashdump-arm.c b/kexec/arch/arm/crashdump-arm.c index 01973b1..b523e5f 100644 --- a/kexec/arch/arm/crashdump-arm.c +++ b/kexec/arch/arm/crashdump-arm.c @@ -20,6 +20,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include <limits.h> #include <elf.h> #include <errno.h> #include <stdio.h> @@ -31,6 +32,13 @@ #include "../../crashdump.h" #include "crashdump-arm.h" +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define ELFDATANATIVE ELFDATA2LSB +#elif __BYTE_ORDER == __BIG_ENDIAN +#define ELFDATANATIVE ELFDATA2MSB +#else +#error "Unknown machine endian" +#endif /* * Used to save various memory ranges/regions needed for the captured @@ -47,12 +55,71 @@ static struct memory_range crash_reserved_mem; static struct crash_elf_info elf_info = { .class = ELFCLASS32, - .data = ELFDATA2LSB, + .data = ELFDATANATIVE, .machine = EM_ARM, - .page_offset = PAGE_OFFSET, + .page_offset = DEFAULT_PAGE_OFFSET, }; unsigned long phys_offset; +extern unsigned long long user_page_offset; + +/* Retrieve kernel _stext symbol virtual address from /proc/kallsyms */ +static unsigned long long get_kernel_stext_sym(void) +{ + const char *kallsyms = "/proc/kallsyms"; + const char *stext = "_stext"; + char sym[128]; + char line[128]; + FILE *fp; + unsigned long long vaddr; + char type; + + fp = fopen(kallsyms, "r"); if (!fp) { + fprintf(stderr, "Cannot open %s\n", kallsyms); + return 0; + } + + while(fgets(line, sizeof(line), fp) != NULL) { + if (sscanf(line, "%Lx %c %s", &vaddr, &type, sym) != 3) + continue; + if (strcmp(sym, stext) == 0) { + dbgprintf("kernel symbol %s vaddr = %16llx\n", stext, vaddr); + return vaddr; + } + } + + fprintf(stderr, "Cannot get kernel %s symbol address\n", stext); + return 0; +} + +static int get_kernel_page_offset(struct kexec_info *info, + struct crash_elf_info *elf_info) +{ + unsigned long long stext_sym_addr = get_kernel_stext_sym(); + if (stext_sym_addr == 0) { + if (user_page_offset != (-1ULL)) { + elf_info->page_offset = user_page_offset; + dbgprintf("Unable to get _stext symbol from /proc/kallsyms, " + "use user provided vaule: %llx\n", + elf_info->page_offset); + return 0; + } + elf_info->page_offset = (unsigned long long)DEFAULT_PAGE_OFFSET; + dbgprintf("Unable to get _stext symbol from /proc/kallsyms, " + "use default: %llx\n", + elf_info->page_offset); + return 0; + } else if ((user_page_offset != (-1ULL)) && + (user_page_offset != stext_sym_addr)) { + fprintf(stderr, "PAGE_OFFSET is set to %llx " + "instead of user provided value %llx\n", + stext_sym_addr & (~KVBASE_MASK), + user_page_offset); + } + elf_info->page_offset = stext_sym_addr & (~KVBASE_MASK); + dbgprintf("page_offset is set to %llx\n", elf_info->page_offset); + return 0; +} /** * crash_range_callback() - callback called for each iomem region @@ -68,8 +135,8 @@ unsigned long phys_offset; * regions is placed in @crash_memory_nr_ranges. */ static int crash_range_callback(void *UNUSED(data), int UNUSED(nr), - char *str, unsigned long base, - unsigned long length) + char *str, unsigned long long base, + unsigned long long length) { struct memory_range *range; @@ -269,6 +336,7 @@ int load_crashdump_segments(struct kexec_info *info, char *mod_cmdline) unsigned long bufsz; void *buf; int err; + int last_ranges; /* * First fetch all the memory (RAM) ranges that we are going to pass to @@ -285,10 +353,28 @@ int load_crashdump_segments(struct kexec_info *info, char *mod_cmdline) phys_offset = usablemem_rgns.ranges->start; dbgprintf("phys_offset: %#lx\n", phys_offset); - err = crash_create_elf32_headers(info, &elf_info, + if (get_kernel_page_offset(info, &elf_info)) + return -1; + + last_ranges = usablemem_rgns.size - 1; + if (last_ranges < 0) + last_ranges = 0; + + if (crash_memory_ranges[last_ranges].end > ULONG_MAX) { + + /* for support LPAE enabled kernel*/ + elf_info.class = ELFCLASS64; + + err = crash_create_elf64_headers(info, &elf_info, + usablemem_rgns.ranges, + usablemem_rgns.size, &buf, &bufsz, + ELF_CORE_HEADER_ALIGN); + } else { + err = crash_create_elf32_headers(info, &elf_info, usablemem_rgns.ranges, usablemem_rgns.size, &buf, &bufsz, ELF_CORE_HEADER_ALIGN); + } if (err) return err; diff --git a/kexec/arch/arm/crashdump-arm.h b/kexec/arch/arm/crashdump-arm.h index a342922..2dbde04 100644 --- a/kexec/arch/arm/crashdump-arm.h +++ b/kexec/arch/arm/crashdump-arm.h @@ -6,9 +6,11 @@ extern "C" { #endif #define COMMAND_LINE_SIZE 1024 -#define PAGE_OFFSET 0xc0000000 +#define DEFAULT_PAGE_OFFSET (0xc0000000) +#define KVBASE_MASK (0x1ffffff) #define CRASH_MAX_MEMORY_RANGES 32 + extern struct memory_ranges usablemem_rgns; struct kexec_info; diff --git a/kexec/arch/arm/include/arch/options.h b/kexec/arch/arm/include/arch/options.h index b355c26..6437c7d 100644 --- a/kexec/arch/arm/include/arch/options.h +++ b/kexec/arch/arm/include/arch/options.h @@ -5,8 +5,10 @@ #define OPT_APPEND 'a' #define OPT_RAMDISK 'r' -#define OPT_DTB (OPT_ARCH_MAX+0) -#define OPT_ATAGS (OPT_ARCH_MAX+1) +#define OPT_DTB (OPT_ARCH_MAX+0) +#define OPT_ATAGS (OPT_ARCH_MAX+1) +#define OPT_IMAGE_SIZE (OPT_ARCH_MAX+2) +#define OPT_PAGE_OFFSET (OPT_ARCH_MAX+3) /* Options relevant to the architecture (excluding loader-specific ones), * in this case none: @@ -37,8 +39,12 @@ { "initrd", 1, 0, OPT_RAMDISK }, \ { "ramdisk", 1, 0, OPT_RAMDISK }, \ { "dtb", 1, 0, OPT_DTB }, \ - { "atags", 0, 0, OPT_ATAGS }, + { "atags", 0, 0, OPT_ATAGS }, \ + { "image-size", 1, 0, OPT_IMAGE_SIZE }, \ + { "page-offset", 1, 0, OPT_PAGE_OFFSET }, -#define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR "a:r:" +#define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR "a:r:s:" + +extern unsigned int kexec_arm_image_size; #endif /* KEXEC_ARCH_ARM_OPTIONS_H */ diff --git a/kexec/arch/arm/kexec-arm.c b/kexec/arch/arm/kexec-arm.c index 8646833..6e8e320 100644 --- a/kexec/arch/arm/kexec-arm.c +++ b/kexec/arch/arm/kexec-arm.c @@ -81,9 +81,15 @@ struct file_type file_type[] = { }; int file_types = sizeof(file_type) / sizeof(file_type[0]); - void arch_usage(void) { + printf(" --image-size=<size>\n" + " Specify the assumed total image size of\n" + " the kernel that is about to be loaded,\n" + " including the .bss section, as reported\n" + " by 'arm-linux-size vmlinux'. If not\n" + " specified, this value is implicitly set\n" + " to the compressed images size * 4.\n"); } int arch_process_options(int argc, char **argv) diff --git a/kexec/arch/arm/kexec-zImage-arm.c b/kexec/arch/arm/kexec-zImage-arm.c index bb1b002..ff4e38d 100644 --- a/kexec/arch/arm/kexec-zImage-arm.c +++ b/kexec/arch/arm/kexec-zImage-arm.c @@ -24,6 +24,8 @@ #define BOOT_PARAMS_SIZE 1536 off_t initrd_base = 0, initrd_size = 0; +unsigned int kexec_arm_image_size = 0; +unsigned long long user_page_offset = (-1ULL); struct tag_header { uint32_t size; @@ -103,6 +105,8 @@ void zImage_arm_usage(void) " --ramdisk=FILE Use FILE as the kernel's initial ramdisk.\n" " --dtb=FILE Use FILE as the fdt blob.\n" " --atags Use ATAGs instead of device-tree.\n" + " --page-offset=PAGE_OFFSET\n" + " Set PAGE_OFFSET of crash dump vmcore\n" ); } @@ -215,12 +219,73 @@ int atag_arm_load(struct kexec_info *info, unsigned long base, return 0; } +static int setup_dtb_prop(char **bufp, off_t *sizep, const char *node_name, + const char *prop_name, const void *val, int len) +{ + char *dtb_buf; + off_t dtb_size; + int off; + int prop_len = 0; + const struct fdt_property *prop; + + if ((bufp == NULL) || (sizep == NULL) || (*bufp == NULL)) + die("Internal error\n"); + + dtb_buf = *bufp; + dtb_size = *sizep; + + /* check if the subnode has already exist */ + off = fdt_path_offset(dtb_buf, node_name); + if (off == -FDT_ERR_NOTFOUND) { + dtb_size += fdt_node_len(node_name); + fdt_set_totalsize(dtb_buf, dtb_size); + dtb_buf = xrealloc(dtb_buf, dtb_size); + if (dtb_buf == NULL) + die("xrealloc failed\n"); + off = fdt_add_subnode(dtb_buf, off, node_name); + } + + if (off < 0) { + fprintf(stderr, "FDT: Error adding %s node.\n", node_name); + return -1; + } + + prop = fdt_get_property(dtb_buf, off, prop_name, &prop_len); + if ((prop == NULL) && (prop_len != -FDT_ERR_NOTFOUND)) { + die("FDT: fdt_get_property"); + } else if (prop == NULL) { + /* prop_len == -FDT_ERR_NOTFOUND */ + /* prop doesn't exist */ + dtb_size += fdt_prop_len(prop_name, len); + } else { + if (prop_len < len) + dtb_size += FDT_TAGALIGN(len - prop_len); + } + + if (fdt_totalsize(dtb_buf) < dtb_size) { + fdt_set_totalsize(dtb_buf, dtb_size); + dtb_buf = xrealloc(dtb_buf, dtb_size); + if (dtb_buf == NULL) + die("xrealloc failed\n"); + } + + if (fdt_setprop(dtb_buf, off, prop_name, + val, len) != 0) { + fprintf(stderr, "FDT: Error setting %s/%s property.\n", + node_name, prop_name); + return -1; + } + *bufp = dtb_buf; + *sizep = dtb_size; + return 0; +} + int zImage_arm_load(int argc, char **argv, const char *buf, off_t len, struct kexec_info *info) { unsigned long base; unsigned int atag_offset = 0x1000; /* 4k offset from memory start */ - unsigned int offset = 0x8000; /* 32k offset from memory start */ + unsigned int extra_size = 0x8000; /* TEXT_OFFSET */ const char *command_line; char *modified_cmdline = NULL; off_t command_line_len; @@ -232,6 +297,7 @@ int zImage_arm_load(int argc, char **argv, const char *buf, off_t len, off_t dtb_length; char *dtb_file; off_t dtb_offset; + char *end; /* See options.h -- add any more there, too. */ static const struct option options[] = { @@ -242,6 +308,8 @@ int zImage_arm_load(int argc, char **argv, const char *buf, off_t len, { "ramdisk", 1, 0, OPT_RAMDISK }, { "dtb", 1, 0, OPT_DTB }, { "atags", 0, 0, OPT_ATAGS }, + { "image-size", 1, 0, OPT_IMAGE_SIZE }, + { "page-offset", 1, 0, OPT_PAGE_OFFSET }, { 0, 0, 0, 0 }, }; static const char short_options[] = KEXEC_ARCH_OPT_STR "a:r:"; @@ -275,6 +343,12 @@ int zImage_arm_load(int argc, char **argv, const char *buf, off_t len, case OPT_ATAGS: use_atags = 1; break; + case OPT_IMAGE_SIZE: + kexec_arm_image_size = strtoul(optarg, &end, 0); + break; + case OPT_PAGE_OFFSET: + user_page_offset = strtoull(optarg, &end, 0); + break; } } @@ -289,9 +363,11 @@ int zImage_arm_load(int argc, char **argv, const char *buf, off_t len, if (command_line_len > COMMAND_LINE_SIZE) command_line_len = COMMAND_LINE_SIZE; } - if (ramdisk) { + if (ramdisk) ramdisk_buf = slurp_file(ramdisk, &initrd_size); - } + + if (dtb_file) + dtb_buf = slurp_file(dtb_file, &dtb_length); /* * If we are loading a dump capture kernel, we need to update kernel @@ -304,6 +380,8 @@ int zImage_arm_load(int argc, char **argv, const char *buf, off_t len, if (!modified_cmdline) return -1; + memset(modified_cmdline, '\0', COMMAND_LINE_SIZE); + if (command_line) { (void) strncpy(modified_cmdline, command_line, COMMAND_LINE_SIZE); @@ -331,16 +409,23 @@ int zImage_arm_load(int argc, char **argv, const char *buf, off_t len, } base = start; } else { - base = locate_hole(info,len+offset,0,0,ULONG_MAX,INT_MAX); + base = locate_hole(info, len + extra_size, 0, 0, + ULONG_MAX, INT_MAX); } if (base == ULONG_MAX) return -1; - /* assume the maximum kernel compression ratio is 4, - * and just to be safe, place ramdisk after that - */ - initrd_base = base + len * 4; + if (kexec_arm_image_size) { + /* If the image size was passed as command line argument, + * use that value for determining the address for initrd, + * atags and dtb images. page-align the given length.*/ + initrd_base = base + _ALIGN(kexec_arm_image_size, getpagesize()); + } else { + /* Otherwise, assume the maximum kernel compression ratio + * is 4, and just to be safe, place ramdisk after that */ + initrd_base = base + _ALIGN(len * 4, getpagesize()); + } if (use_atags) { /* @@ -355,40 +440,20 @@ int zImage_arm_load(int argc, char **argv, const char *buf, off_t len, * Read a user-specified DTB file. */ if (dtb_file) { - dtb_buf = slurp_file(dtb_file, &dtb_length); - if (fdt_check_header(dtb_buf) != 0) { fprintf(stderr, "Invalid FDT buffer.\n"); return -1; } if (command_line) { - const char *node_name = "/chosen"; - const char *prop_name = "bootargs"; - int off; - - dtb_length = fdt_totalsize(dtb_buf) + 1024 + - strlen(command_line); - dtb_buf = xrealloc(dtb_buf, dtb_length); - fdt_set_totalsize(dtb_buf, dtb_length); - - /* check if a /choosen subnode already exists */ - off = fdt_path_offset(dtb_buf, node_name); - - if (off == -FDT_ERR_NOTFOUND) - off = fdt_add_subnode(dtb_buf, off, node_name); - - if (off < 0) { - fprintf(stderr, "FDT: Error adding %s node.\n", node_name); - return -1; - } - - if (fdt_setprop(dtb_buf, off, prop_name, - command_line, strlen(command_line) + 1) != 0) { - fprintf(stderr, "FDT: Error setting %s/%s property.\n", - node_name, prop_name); + /* + * Error should have been reported so + * directly return -1 + */ + if (setup_dtb_prop(&dtb_buf, &dtb_length, "/chosen", + "bootargs", command_line, + strlen(command_line) + 1)) return -1; - } } } else { /* @@ -397,14 +462,48 @@ int zImage_arm_load(int argc, char **argv, const char *buf, off_t len, create_flatten_tree(&dtb_buf, &dtb_length, command_line); } - if (base + atag_offset + dtb_length > base + offset) { - fprintf(stderr, "DTB too large!\n"); - return -1; + /* + * Search in memory to make sure there is enough memory + * to hold initrd and dtb. + * + * Even if no initrd is used, this check is still + * required for dtb. + * + * Crash kernel use fixed address, no check is ok. + */ + if ((info->kexec_flags & KEXEC_ON_CRASH) == 0) { + unsigned long page_size = getpagesize(); + /* + * DTB size may be increase a little + * when setup initrd size. Add a full page + * for it is enough. + */ + unsigned long hole_size = _ALIGN_UP(initrd_size, page_size) + + _ALIGN(dtb_length + page_size, page_size); + unsigned long initrd_base_new = locate_hole(info, + hole_size, page_size, + initrd_base, ULONG_MAX, INT_MAX); + if (base == ULONG_MAX) + return -1; + initrd_base = initrd_base_new; } if (ramdisk) { add_segment(info, ramdisk_buf, initrd_size, initrd_base, initrd_size); + + unsigned long start, end; + start = cpu_to_be32((unsigned long)(initrd_base)); + end = cpu_to_be32((unsigned long)(initrd_base + initrd_size)); + + if (setup_dtb_prop(&dtb_buf, &dtb_length, "/chosen", + "linux,initrd-start", &start, + sizeof(start))) + return -1; + if (setup_dtb_prop(&dtb_buf, &dtb_length, "/chosen", + "linux,initrd-end", &end, + sizeof(end))) + return -1; } /* Stick the dtb at the end of the initrd and page @@ -417,9 +516,9 @@ int zImage_arm_load(int argc, char **argv, const char *buf, off_t len, dtb_offset, dtb_length); } - add_segment(info, buf, len, base + offset, len); + add_segment(info, buf, len, base + extra_size, len); - info->entry = (void*)base + offset; + info->entry = (void*)base + extra_size; return 0; } diff --git a/kexec/arch/arm64/Makefile b/kexec/arch/arm64/Makefile new file mode 100644 index 0000000..37414dc --- /dev/null +++ b/kexec/arch/arm64/Makefile @@ -0,0 +1,40 @@ + +arm64_FS2DT += kexec/fs2dt.c +arm64_FS2DT_INCLUDE += -include $(srcdir)/kexec/arch/arm64/kexec-arm64.h \ + -include $(srcdir)/kexec/arch/arm64/crashdump-arm64.h + +arm64_DT_OPS += kexec/dt-ops.c + +arm64_CPPFLAGS += -I $(srcdir)/kexec/ + +arm64_KEXEC_SRCS += \ + kexec/arch/arm64/kexec-arm64.c \ + kexec/arch/arm64/kexec-image-arm64.c \ + kexec/arch/arm64/kexec-elf-arm64.c \ + kexec/arch/arm64/crashdump-arm64.c + +arm64_ARCH_REUSE_INITRD = +arm64_ADD_SEGMENT = +arm64_VIRT_TO_PHYS = +arm64_PHYS_TO_VIRT = + +dist += $(arm64_KEXEC_SRCS) \ + kexec/arch/arm64/Makefile \ + kexec/arch/arm64/kexec-arm64.h \ + kexec/arch/arm64/crashdump-arm64.h + +ifdef HAVE_LIBFDT + +LIBS += -lfdt + +else + +include $(srcdir)/kexec/libfdt/Makefile.libfdt + +libfdt_SRCS += $(LIBFDT_SRCS:%=kexec/libfdt/%) + +arm64_CPPFLAGS += -I$(srcdir)/kexec/libfdt + +arm64_KEXEC_SRCS += $(libfdt_SRCS) + +endif diff --git a/kexec/arch/arm64/crashdump-arm64.c b/kexec/arch/arm64/crashdump-arm64.c new file mode 100644 index 0000000..d2272c8 --- /dev/null +++ b/kexec/arch/arm64/crashdump-arm64.c @@ -0,0 +1,21 @@ +/* + * ARM64 crashdump. + */ + +#define _GNU_SOURCE + +#include <errno.h> +#include <linux/elf.h> + +#include "kexec.h" +#include "crashdump.h" +#include "crashdump-arm64.h" +#include "kexec-arm64.h" +#include "kexec-elf.h" + +struct memory_ranges usablemem_rgns = {}; + +int is_crashkernel_mem_reserved(void) +{ + return 0; +} diff --git a/kexec/arch/arm64/crashdump-arm64.h b/kexec/arch/arm64/crashdump-arm64.h new file mode 100644 index 0000000..f33c7a2 --- /dev/null +++ b/kexec/arch/arm64/crashdump-arm64.h @@ -0,0 +1,12 @@ +/* + * ARM64 crashdump. + */ + +#if !defined(CRASHDUMP_ARM64_H) +#define CRASHDUMP_ARM64_H + +#include "kexec.h" + +extern struct memory_ranges usablemem_rgns; + +#endif diff --git a/kexec/arch/arm64/image-header.h b/kexec/arch/arm64/image-header.h new file mode 100644 index 0000000..acb839a --- /dev/null +++ b/kexec/arch/arm64/image-header.h @@ -0,0 +1,95 @@ +/* + * ARM64 binary image support. + * Copyright (C) 2014 Linaro. + */ + +#if !defined(__ARM64_IMAGE_HEADER_H) +#define __ARM64_IMAGE_HEADER_H + +#if !defined(__KERNEL__) +#include <stdint.h> +#endif + +#if !defined(__ASSEMBLY__) + +/** + * struct arm64_image_header - arm64 kernel image header. + * + * @pe_sig: Optional PE format 'MZ' signature. + * @branch_code: Reserved for instructions to branch to stext. + * @text_offset: The image load offset in LSB byte order. + * @image_size: An estimated size of the memory image size in LSB byte order. + * @flags: Bit flags: + * Bit 7.0: Image byte order, 1=MSB. + * @reserved_1: Reserved. + * @magic: Magic number, "ARM\x64". + * @pe_header: Optional offset to a PE format header. + **/ + +struct arm64_image_header { + uint8_t pe_sig[2]; + uint16_t branch_code[3]; + uint64_t text_offset; + uint64_t image_size; + uint8_t flags[8]; + uint64_t reserved_1[3]; + uint8_t magic[4]; + uint32_t pe_header; +}; + +static const uint8_t arm64_image_magic[4] = {'A', 'R', 'M', 0x64U}; +static const uint8_t arm64_image_pe_sig[2] = {'M', 'Z'}; +static const uint64_t arm64_image_flag_7_be = 0x01U; + +/** + * arm64_header_check_magic - Helper to check the arm64 image header. + * + * Returns non-zero if header is OK. + */ + +static inline int arm64_header_check_magic(const struct arm64_image_header *h) +{ + if (!h) + return 0; + + if (!h->text_offset) + return 0; + + return (h->magic[0] == arm64_image_magic[0] + && h->magic[1] == arm64_image_magic[1] + && h->magic[2] == arm64_image_magic[2] + && h->magic[3] == arm64_image_magic[3]); +} + +/** + * arm64_header_check_pe_sig - Helper to check the arm64 image header. + * + * Returns non-zero if 'MZ' signature is found. + */ + +static inline int arm64_header_check_pe_sig(const struct arm64_image_header *h) +{ + if (!h) + return 0; + + return (h->pe_sig[0] == arm64_image_pe_sig[0] + && h->pe_sig[1] == arm64_image_pe_sig[1]); +} + +/** + * arm64_header_check_msb - Helper to check the arm64 image header. + * + * Returns non-zero if the image was built as big endian. + */ + +static inline int arm64_header_check_msb(const struct arm64_image_header *h) +{ + if (!h) + return 0; + + return !!(h->flags[7] & arm64_image_flag_7_be); +} + +#endif /* !defined(__ASSEMBLY__) */ + +#endif diff --git a/kexec/arch/arm64/include/arch/options.h b/kexec/arch/arm64/include/arch/options.h new file mode 100644 index 0000000..d2a3c5c --- /dev/null +++ b/kexec/arch/arm64/include/arch/options.h @@ -0,0 +1,44 @@ +#if !defined(KEXEC_ARCH_ARM64_OPTIONS_H) +#define KEXEC_ARCH_ARM64_OPTIONS_H + +#define OPT_APPEND ((OPT_MAX)+0) +#define OPT_DTB ((OPT_MAX)+1) +#define OPT_INITRD ((OPT_MAX)+2) +#define OPT_LITE ((OPT_MAX)+3) +#define OPT_PORT ((OPT_MAX)+4) +#define OPT_ARCH_MAX ((OPT_MAX)+5) + +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + { "append", 1, NULL, OPT_APPEND }, \ + { "command-line", 1, NULL, OPT_APPEND }, \ + { "dtb", 1, NULL, OPT_DTB }, \ + { "initrd", 1, NULL, OPT_INITRD }, \ + { "lite", 0, NULL, OPT_LITE }, \ + { "port", 1, NULL, OPT_PORT }, \ + { "ramdisk", 1, NULL, OPT_INITRD }, \ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR /* Only accept long arch options. */ +#define KEXEC_ALL_OPTIONS KEXEC_ARCH_OPTIONS +#define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR + +static const char *arm64_opts_usage __attribute__ ((unused)) = +" --append=STRING Set the kernel command line to STRING.\n" +" --command-line=STRING Set the kernel command line to STRING.\n" +" --dtb=FILE Use FILE as the device tree blob.\n" +" --initrd=FILE Use FILE as the kernel initial ramdisk.\n" +" --lite Non-purgatory boot.\n" +" --port=ADDRESS Purgatory output to port ADDRESS.\n" +" --ramdisk=FILE Use FILE as the kernel initial ramdisk.\n"; + +struct arm64_opts { + const char *command_line; + const char *dtb; + const char *initrd; + uint64_t port; + int lite; +}; + +extern struct arm64_opts arm64_opts; + +#endif diff --git a/kexec/arch/arm64/kexec-arm64.c b/kexec/arch/arm64/kexec-arm64.c new file mode 100644 index 0000000..bf6aba0 --- /dev/null +++ b/kexec/arch/arm64/kexec-arm64.c @@ -0,0 +1,940 @@ +/* + * ARM64 kexec. + */ + +#define _GNU_SOURCE + +#include <assert.h> +#include <ctype.h> +#include <dirent.h> +#include <errno.h> +#include <getopt.h> +#include <inttypes.h> +#include <libfdt.h> +#include <limits.h> +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +#include <sys/stat.h> + +#include <linux/elf.h> + +#include "dt-ops.h" +#include "kexec.h" +#include "crashdump.h" +#include "crashdump-arm64.h" +#include "kexec-arm64.h" +#include "fs2dt.h" +#include "kexec-syscall.h" +#include "arch/options.h" + +#ifndef EM_AARCH64 +#define EM_AARCH64 183 +#define R_AARCH64_ABS64 257 +#endif + +/* Global varables the core kexec routines expect. */ + +unsigned char reuse_initrd; + +off_t initrd_base; +off_t initrd_size; + +const struct arch_map_entry arches[] = { + { "aarch64", KEXEC_ARCH_ARM64 }, + { "aarch64_be", KEXEC_ARCH_ARM64 }, + { NULL, 0 }, +}; + +/* arm64 global varables. */ + +struct arm64_opts arm64_opts; +struct arm64_mem arm64_mem; + +void arch_usage(void) +{ + dbgprintf("Build time: %s : %s\n", __DATE__, __TIME__); + printf(arm64_opts_usage); +} + +int arch_process_options(int argc, char **argv) +{ + static const char short_options[] = KEXEC_OPT_STR ""; + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { 0 } + }; + int opt; + + for (opt = 0; opt != -1; ) { + opt = getopt_long(argc, argv, short_options, options, 0); + + switch (opt) { + case OPT_APPEND: + arm64_opts.command_line = optarg; + break; + case OPT_DTB: + arm64_opts.dtb = optarg; + break; + case OPT_INITRD: + arm64_opts.initrd = optarg; + break; + case OPT_LITE: + arm64_opts.lite = 1; + break; + case OPT_PORT: + arm64_opts.port = strtoull(optarg, NULL, 0); + break; + default: + break; /* Ignore core and unknown options. */ + } + } + + kexec_debug = 1; // FIXME: for debugging only. + + dbgprintf("%s:%d: command_line: %s\n", __func__, __LINE__, + arm64_opts.command_line); + dbgprintf("%s:%d: initrd: %s\n", __func__, __LINE__, + arm64_opts.initrd); + dbgprintf("%s:%d: dtb: %s\n", __func__, __LINE__, arm64_opts.dtb); + dbgprintf("%s:%d: lite: %d\n", __func__, __LINE__, arm64_opts.lite); + dbgprintf("%s:%d: port: 0x%" PRIx64 "\n", __func__, __LINE__, + arm64_opts.port); + + return 0; +} + +struct dtb { + char *buf; + off_t size; + const char *name; +}; + +static void dump_reservemap(const struct dtb *dtb) +{ + int i; + + for (i = 0; ; i++) { + uint64_t address; + uint64_t size; + + fdt_get_mem_rsv(dtb->buf, i, &address, &size); + + if (!size) + break; + + dbgprintf("%s: %s {%" PRIx64 ", %" PRIx64 "}\n", __func__, + dtb->name, address, size); + } +} + +enum cpu_enable_method { + cpu_enable_method_unknown, + cpu_enable_method_psci, + cpu_enable_method_spin_table, +}; + +/** + * struct cpu_properties - Various properties from a device tree cpu node. + * + * These properties will be valid over a dtb re-size. + */ + +struct cpu_properties { + uint64_t hwid; + uint64_t cpu_release_addr; + char node_path[128]; + char enable_method[128]; + enum cpu_enable_method type; +}; + +/** + * read_cpu_properties - Helper to read the device tree cpu properties. + */ + +static int read_cpu_properties(struct cpu_properties *cp, + const struct dtb *dtb, int node_offset) +{ + int result; + const void *data; + + memset(cp, 0, sizeof(*cp)); + + data = fdt_getprop(dtb->buf, node_offset, "reg", &result); + + if (!data) { + fprintf(stderr, "kexec: %s:%d: read hwid failed: %s\n", + __func__, __LINE__, fdt_strerror(result)); + return result; + } + + cp->hwid = fdt64_to_cpu(*(uint64_t *)data); + + result = fdt_get_path(dtb->buf, node_offset, cp->node_path, + sizeof(cp->node_path)); + + if (result < 0) { + dbgprintf("%s:%d: hwid-%" PRIx64 ": fdt_get_path failed %s.\n", + __func__, __LINE__, cp->hwid, + fdt_strerror(result)); + return result; + } + + data = fdt_getprop(dtb->buf, node_offset, "enable-method", &result); + + if (!data) { + fprintf(stderr, + "kexec: %s:%d: read enable_method failed: %s\n", + __func__, __LINE__, fdt_strerror(result)); + return result; + } + + strncpy(cp->enable_method, data, sizeof(cp->enable_method)); + cp->enable_method[sizeof(cp->enable_method) - 1] = 0; + + if (!strcmp(cp->enable_method, "psci")) { + cp->type = cpu_enable_method_psci; + return 0; + } + + if (strcmp(cp->enable_method, "spin-table")) { + cp->type = cpu_enable_method_unknown; + return -1; + } + + cp->type = cpu_enable_method_spin_table; + + data = fdt_getprop(dtb->buf, node_offset, "cpu-release-addr", &result); + + if (!data) { + fprintf(stderr, "kexec: %s:%d: " + "read cpu-release-addr failed: %s\n", + __func__, __LINE__, fdt_strerror(result)); + return result; + } + + cp->cpu_release_addr = fdt64_to_cpu(*(uint64_t *)data); + + return 0; +} + +static int check_cpu_properties(const struct cpu_properties *cp_1, + const struct cpu_properties *cp_2) +{ + assert(cp_1->hwid == cp_2->hwid); + + if (cp_1->type != cp_2->type) { + fprintf(stderr, + "%s:%d: hwid-%" PRIx64 ": " + "Error: Different enable methods: %s -> %s\n", + __func__, __LINE__, cp_1->hwid, cp_1->enable_method, + cp_2->enable_method); + return -EINVAL; + } + + if (cp_1->type != cpu_enable_method_psci + && cp_1->type != cpu_enable_method_spin_table) { + fprintf(stderr, + "%s:%d: hwid-%" PRIx64 ": " + "Warning: Unknown enable method: %s.\n", + __func__, __LINE__, cp_1->hwid, + cp_1->enable_method); + } + + if (cp_1->type == cpu_enable_method_spin_table) { + if (cp_1->cpu_release_addr != cp_2->cpu_release_addr) { + fprintf(stderr, "%s:%d: hwid-%" PRIx64 ": " + "Error: Different cpu-release-addr: " + "%" PRIx64 " -> %" PRIx64 ".\n", + __func__, __LINE__, + cp_1->hwid, + cp_2->cpu_release_addr, + cp_1->cpu_release_addr); + return -EINVAL; + } + } + + dbgprintf("%s: hwid-%" PRIx64 ": OK\n", __func__, cp_1->hwid); + + return 0; +} + +struct cpu_info { + unsigned int cpu_count; + struct cpu_properties *cp; +}; + +static int read_cpu_info(struct cpu_info *info, const struct dtb *dtb) +{ + int i; + int offset; + int result; + int depth; + + offset = fdt_subnode_offset(dtb->buf, 0, "cpus"); + + if (offset < 0) { + fprintf(stderr, "kexec: %s:%d: read cpus node failed: %s\n", + __func__, __LINE__, fdt_strerror(offset)); + return offset; + } + + for (i = 0, depth = 0; ; i++) { + + offset = fdt_next_node(dtb->buf, offset, &depth); + + if (offset < 0) { + fprintf(stderr, "kexec: %s:%d: " + "read cpu node failed: %s\n", __func__, + __LINE__, fdt_strerror(offset)); + result = offset; + goto on_error; + } + + if (depth != 1) + break; + + info->cp = realloc(info->cp, (i + 1) * sizeof(*info->cp)); + + if (!info->cp) { + fprintf(stderr, "kexec: %s:%d: malloc failed: %s\n", + __func__, __LINE__, fdt_strerror(offset)); + result = -ENOMEM; + goto on_error; + } + + result = read_cpu_properties(&info->cp[i], dtb, offset); + + if (result) + goto on_error; + + if (info->cp[i].type == cpu_enable_method_psci) + dbgprintf("%s: %s cpu-%d (%s): hwid-%" PRIx64 ", '%s'\n", + __func__, dtb->name, i, info->cp[i].node_path, + info->cp[i].hwid, + info->cp[i].enable_method); + else + dbgprintf("%s: %s cpu-%d (%s): hwid-%" PRIx64 ", '%s', " + "cpu-release-addr %" PRIx64 "\n", + __func__, dtb->name, i, info->cp[i].node_path, + info->cp[i].hwid, + info->cp[i].enable_method, + info->cp[i].cpu_release_addr); + } + + info->cpu_count = i; + return 0; + +on_error: + free(info->cp); + info->cp = NULL; + return result; +} + +static int check_cpu_nodes(const struct dtb *dtb_1, const struct dtb *dtb_2) +{ + int result; + unsigned int cpu_1; + struct cpu_info info_1; + struct cpu_info info_2; + unsigned int to_process; + + memset(&info_1, 0, sizeof(info_1)); + memset(&info_2, 0, sizeof(info_2)); + + result = read_cpu_info(&info_1, dtb_1); + + if (result) + goto on_exit; + + result = read_cpu_info(&info_2, dtb_2); + + if (result) + goto on_exit; + + to_process = info_1.cpu_count < info_2.cpu_count + ? info_1.cpu_count : info_2.cpu_count; + + for (cpu_1 = 0; cpu_1 < info_1.cpu_count; cpu_1++) { + struct cpu_properties *cp_1 = &info_1.cp[cpu_1]; + unsigned int cpu_2; + + for (cpu_2 = 0; cpu_2 < info_2.cpu_count; cpu_2++) { + struct cpu_properties *cp_2 = &info_2.cp[cpu_2]; + + if (cp_1->hwid != cp_2->hwid) + continue; + + to_process--; + + result = check_cpu_properties(cp_1, cp_2); + + if (result) + goto on_exit; + } + } + + if (to_process) { + fprintf(stderr, "kexec: %s:%d: Warning: " + "Failed to process %u CPUs.\n", + __func__, __LINE__, to_process); + result = -EINVAL; + goto on_exit; + } + +on_exit: + free(info_1.cp); + free(info_2.cp); + return result; +} + +static int read_1st_dtb(struct dtb *dtb, const char *command_line) +{ + int result; + struct stat s; + + result = stat("/proc/device-tree/", &s); + + if (result) { + dbgprintf("%s: %s\n", __func__, strerror(errno)); + return -1; + } + + create_flatten_tree((char **)&dtb->buf, &dtb->size, + command_line[0] ? command_line : NULL); + + return 0; +} + +static int setup_2nd_dtb(char *command_line, const struct dtb *dtb_1, + struct dtb *dtb_2) +{ + int result; + + result = fdt_check_header(dtb_2->buf); + + if (result) { + fprintf(stderr, "kexec: Invalid 2nd device tree.\n"); + return -EINVAL; + } + + if (command_line[0]) { + result = dtb_set_bootargs((char **)&dtb_2->buf, &dtb_2->size, + command_line); + + if (result) { + fprintf(stderr, + "kexec: Set device tree bootargs failed.\n"); + return result; + } + } + + dump_reservemap(dtb_2); + + return 0; +} + +static uint64_t read_sink(const char *command_line) +{ + uint64_t v; + const char *p; + + if (arm64_opts.port) + return arm64_opts.port; + +#if defined(ARM64_DEBUG_PORT) + return (uint64_t)(ARM64_DEBUG_PORT); +#endif + if (!command_line) + return 0; + + p = strstr(command_line, "earlyprintk="); + + if (!p) + return 0; + + while (*p != ',') + p++; + + p++; + + while (isspace(*p)) + p++; + + if (*p == 0) + return 0; + + errno = 0; + + v = strtoull(p, NULL, 0); + + if (errno) + return 0; + + return v; +} + +/** + * arm64_load_other_segments - Prepare the dtb, initrd and purgatory segments. + */ + +int arm64_load_other_segments(struct kexec_info *info, + unsigned long kernel_entry) +{ + int result; + struct mem_ehdr ehdr; + unsigned long dtb_max; + unsigned long dtb_base; + char *initrd_buf = NULL; + uint64_t purgatory_sink; + unsigned long purgatory_base; + struct dtb dtb_1 = {.name = "dtb_1"}; + struct dtb dtb_2 = {.name = "dtb_2"}; + char command_line[COMMAND_LINE_SIZE] = ""; + + if (arm64_opts.command_line) { + strncpy(command_line, arm64_opts.command_line, + sizeof(command_line)); + command_line[sizeof(command_line) - 1] = 0; + } + + purgatory_sink = read_sink(command_line); + + dbgprintf("%s:%d: purgatory sink: 0x%" PRIx64 "\n", __func__, __LINE__, + purgatory_sink); + + if (arm64_opts.dtb) { + dtb_2.buf = slurp_file(arm64_opts.dtb, &dtb_2.size); + assert(dtb_2.buf); + } + + result = read_1st_dtb(&dtb_1, command_line); + + if (result && !arm64_opts.dtb) { + fprintf(stderr, "kexec: Error: No device tree available.\n"); + return result; + } + + if (result && arm64_opts.dtb) + dtb_1 = dtb_2; + else if (!result && !arm64_opts.dtb) + dtb_2 = dtb_1; + + result = setup_2nd_dtb(command_line, &dtb_1, &dtb_2); + + if (result) + return result; + + result = check_cpu_nodes(&dtb_1, &dtb_2); + + if (result) + return result; + + /* + * Put the DTB after the kernel with an alignment of 128 KiB, giving + * a max supported DTB size of 128 KiB (worst case). Also add 2 KiB + * to the DTB size for any DTB growth. + */ + + dtb_max = dtb_2.size + 2 * 1024; + + dtb_base = locate_hole(info, dtb_max, 128UL * 1024, + arm64_mem.memstart + arm64_mem.text_offset + + arm64_mem.image_size, + _ALIGN_UP(arm64_mem.memstart + arm64_mem.text_offset, + 512UL * 1024 * 1024), + 1); + + dbgprintf("dtb: base %lx, size %lxh (%ld)\n", dtb_base, dtb_2.size, + dtb_2.size); + + if (dtb_base == ULONG_MAX) + return -ENOMEM; + + purgatory_base = dtb_base + dtb_2.size; + initrd_base = 0; + initrd_size = 0; + + if (arm64_opts.initrd) { + initrd_buf = slurp_file(arm64_opts.initrd, &initrd_size); + + if (!initrd_buf) + fprintf(stderr, "kexec: Empty ramdisk file.\n"); + else { + /* Put the initrd after the DTB with an alignment of + * page size. */ + + initrd_base = locate_hole(info, initrd_size, 0, + dtb_base + dtb_max, -1, 1); + + dbgprintf("initrd: base %lx, size %lxh (%ld)\n", + initrd_base, initrd_size, initrd_size); + + if (initrd_base == ULONG_MAX) + return -ENOMEM; + + result = dtb_set_initrd((char **)&dtb_2.buf, + &dtb_2.size, initrd_base, + initrd_base + initrd_size); + + if (result) + return result; + + purgatory_base = initrd_base + initrd_size; + } + } + + if (dtb_2.size > dtb_max) { + fprintf(stderr, "%s: Error: Too many DTB mods.\n", __func__); + return -EINVAL; + } + + add_segment_phys_virt(info, dtb_2.buf, dtb_2.size, dtb_base, + dtb_2.size, 0); + + if (arm64_opts.initrd) + add_segment_phys_virt(info, initrd_buf, initrd_size, + initrd_base, initrd_size, 0); + + if (arm64_opts.lite) + info->entry = (void *)kernel_entry; + else { + result = build_elf_rel_info(purgatory, purgatory_size, &ehdr, + 0); + + if (result < 0) { + fprintf(stderr, "%s: Error: " + "build_elf_rel_info failed.\n", __func__); + return -EBADF; + } + + elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size, + purgatory_base, ULONG_MAX, 1, 0); + + info->entry = (void *)elf_rel_get_addr(&info->rhdr, + "purgatory_start"); + + elf_rel_set_symbol(&info->rhdr, "arm64_sink", &purgatory_sink, + sizeof(purgatory_sink)); + + elf_rel_set_symbol(&info->rhdr, "arm64_kernel_entry", + &kernel_entry, sizeof(kernel_entry)); + + elf_rel_set_symbol(&info->rhdr, "arm64_dtb_addr", &dtb_base, + sizeof(dtb_base)); + } + + return 0; +} + +unsigned long virt_to_phys(unsigned long v) +{ + unsigned long p; + + assert(arm64_mem.page_offset); + assert(arm64_mem.memstart); + + p = v - arm64_mem.page_offset + arm64_mem.memstart; + + dbgprintf("%s: %016lx -> %016lx\n", __func__, v, p); + return p; +} + +unsigned long phys_to_virt(struct crash_elf_info *UNUSED(elf_info), + unsigned long p) +{ + unsigned long v; + + assert(arm64_mem.page_offset); + assert(arm64_mem.memstart); + + v = p - arm64_mem.memstart + arm64_mem.page_offset; + + dbgprintf("%s: %016lx -> %016lx\n", __func__, p, v); + return p; +} + +void add_segment(struct kexec_info *info, const void *buf, size_t bufsz, + unsigned long base, size_t memsz) +{ + add_segment_phys_virt(info, buf, bufsz, base, memsz, 1); +} + +int arm64_process_image_header(const struct arm64_image_header *h) +{ +#if !defined(KERNEL_IMAGE_SIZE) +# define KERNEL_IMAGE_SIZE (768 * 1024) +#endif + + if (!arm64_header_check_magic(h)) + return -EINVAL; + + if (h->image_size) { + arm64_mem.text_offset = le64_to_cpu(h->text_offset); + arm64_mem.image_size = le64_to_cpu(h->image_size); + } else { + /* For 3.16 and older kernels. */ + arm64_mem.text_offset = 0x80000; + arm64_mem.image_size = KERNEL_IMAGE_SIZE; + } + + return 0; +} + +static int get_memory_ranges_dt(struct memory_range *array, unsigned int *count) +{ + struct region {uint64_t base; uint64_t size;}; + char *dtb; + off_t size; + int offset; + int result; + + *count = 0; + + create_flatten_tree(&dtb, &size, NULL); + + result = fdt_check_header(dtb); + + if (result) { + dbgprintf("%s:%d: fdt_check_header failed:%s\n", __func__, + __LINE__, fdt_strerror(result)); + goto on_error; + } + + for (offset = 0; ; ) { + const struct region *region; + const struct region *end; + int len; + + offset = fdt_subnode_offset(dtb, offset, "memory"); + + if (offset == -FDT_ERR_NOTFOUND) + break; + + if (offset <= 0) { + dbgprintf("%s:%d: fdt_subnode_offset failed: %d %s\n", + __func__, __LINE__, offset, + fdt_strerror(offset)); + goto on_error; + } + + dbgprintf("%s:%d: node_%d %s\n", __func__, __LINE__, offset, + fdt_get_name(dtb, offset, NULL)); + + region = fdt_getprop(dtb, offset, "reg", &len); + + if (region <= 0) { + dbgprintf("%s:%d: fdt_getprop failed: %d %s\n", + __func__, __LINE__, offset, + fdt_strerror(offset)); + goto on_error; + } + + for (end = region + len / sizeof(*region); + region < end && *count < KEXEC_SEGMENT_MAX; + region++) { + struct memory_range r; + + r.type = RANGE_RAM; + r.start = fdt64_to_cpu(region->base); + r.end = r.start + fdt64_to_cpu(region->size); + + if (!region->size) { + dbgprintf("%s:%d: SKIP: %016llx - %016llx\n", + __func__, __LINE__, r.start, r.end); + continue; + } + + dbgprintf("%s:%d: RAM: %016llx - %016llx\n", __func__, + __LINE__, r.start, r.end); + + array[(*count)++] = r; + + if (!arm64_mem.memstart || r.start < arm64_mem.memstart) + arm64_mem.memstart = r.start; + } + } + + if (!*count) { + dbgprintf("%s:%d: failed: No RAM found.\n", __func__, __LINE__); + goto on_error; + } + + dbgprintf("%s:%d: Success\n", __func__, __LINE__); + result = 0; + goto on_exit; + +on_error: + fprintf(stderr, "%s:%d: Invalid /proc/device-tree.\n", __func__, + __LINE__); + result = -1; + +on_exit: + free(dtb); + return result; +} + +static int get_memory_ranges_iomem(struct memory_range *array, + unsigned int *count) +{ + const char *iomem; + char line[MAX_LINE]; + FILE *fp; + + *count = 0; + + iomem = proc_iomem(); + fp = fopen(iomem, "r"); + + if (!fp) { + fprintf(stderr, "Cannot open %s: %s\n", iomem, strerror(errno)); + return -1; + } + + while(fgets(line, sizeof(line), fp) != 0) { + struct memory_range r; + char *str; + int consumed; + + if (*count >= KEXEC_SEGMENT_MAX) + break; + + if (sscanf(line, "%Lx-%Lx : %n", &r.start, &r.end, &consumed) + != 2) + continue; + + str = line + consumed; + r.end++; + + if (memcmp(str, "System RAM\n", 11)) { + dbgprintf("%s:%d: SKIP: %016Lx - %016Lx : %s", __func__, + __LINE__, r.start, r.end, str); + continue; + } + + r.type = RANGE_RAM; + + dbgprintf("%s:%d: RAM: %016llx - %016llx\n : %s", __func__, + __LINE__, r.start, r.end, str); + + array[(*count)++] = r; + + if (!arm64_mem.memstart || r.start < arm64_mem.memstart) + arm64_mem.memstart = r.start; + } + + fclose(fp); + + if (!*count) { + dbgprintf("%s:%d: failed: No RAM found.\n", __func__, __LINE__); + return -1; + } + + dbgprintf("%s:%d: Success\n", __func__, __LINE__); + return 0; +} + +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long kexec_flags) +{ + static struct memory_range array[KEXEC_SEGMENT_MAX]; + unsigned int count; + int result; + + result = get_memory_ranges_dt(array, &count); + + if (result) + result = get_memory_ranges_iomem(array, &count); + + *range = result ? NULL : array; + *ranges = result ? 0 : count; + + return result; +} + +struct file_type file_type[] = { + {"elf-arm64", elf_arm64_probe, elf_arm64_load, elf_arm64_usage}, + {"image-arm64", image_arm64_probe, image_arm64_load, image_arm64_usage}, +}; + +int file_types = sizeof(file_type) / sizeof(file_type[0]); + +int arch_compat_trampoline(struct kexec_info *info) +{ + return 0; +} + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + return (ehdr->e_machine == EM_AARCH64); +} + +void machine_apply_elf_rel(struct mem_ehdr *ehdr, unsigned long r_type, + void *ptr, unsigned long address, unsigned long value) +{ +#if !defined(R_AARCH64_ABS64) +# define R_AARCH64_ABS64 257 +#endif + +#if !defined(R_AARCH64_LD_PREL_LO19) +# define R_AARCH64_LD_PREL_LO19 273 +#endif + +#if !defined(R_AARCH64_ADR_PREL_LO21) +# define R_AARCH64_ADR_PREL_LO21 274 +#endif + +#if !defined(R_AARCH64_JUMP26) +# define R_AARCH64_JUMP26 282 +#endif + +#if !defined(R_AARCH64_CALL26) +# define R_AARCH64_CALL26 283 +#endif + + uint32_t *location = (uint32_t *)ptr; + uint32_t data = *location; + const char *type = NULL; + + switch(r_type) { + case R_AARCH64_ABS64: + type = "ABS64"; + *location += value; + break; + case R_AARCH64_LD_PREL_LO19: + type = "LD_PREL_LO19"; + *location += ((value - address) << 3) & 0xffffe0; + break; + case R_AARCH64_ADR_PREL_LO21: + if (value & 3) + die("%s: ERROR Unaligned value: %lx\n", __func__, + value); + type = "ADR_PREL_LO21"; + *location += ((value - address) << 3) & 0xffffe0; + break; + case R_AARCH64_JUMP26: + type = "JUMP26"; + *location += ((value - address) >> 2) & 0x3ffffff; + break; + case R_AARCH64_CALL26: + type = "CALL26"; + *location += ((value - address) >> 2) & 0x3ffffff; + break; + default: + die("%s: ERROR Unknown type: %lu\n", __func__, r_type); + break; + } + + dbgprintf("%s: %s %x->%x\n", __func__, type, data, *location); +} + +void arch_reuse_initrd(void) +{ + reuse_initrd = 1; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} diff --git a/kexec/arch/arm64/kexec-arm64.h b/kexec/arch/arm64/kexec-arm64.h new file mode 100644 index 0000000..057acf3 --- /dev/null +++ b/kexec/arch/arm64/kexec-arm64.h @@ -0,0 +1,51 @@ +/* + * ARM64 kexec. + */ + +#if !defined(KEXEC_ARM64_H) +#define KEXEC_ARM64_H + +#include <stdbool.h> +#include <sys/types.h> + +#include "image-header.h" +#include "kexec.h" + +#define KEXEC_SEGMENT_MAX 16 + +#define BOOT_BLOCK_VERSION 17 +#define BOOT_BLOCK_LAST_COMP_VERSION 16 +#define COMMAND_LINE_SIZE 512 + +int elf_arm64_probe(const char *kernel_buf, off_t kernel_size); +int elf_arm64_load(int argc, char **argv, const char *kernel_buf, + off_t kernel_size, struct kexec_info *info); +void elf_arm64_usage(void); + +int image_arm64_probe(const char *kernel_buf, off_t kernel_size); +int image_arm64_load(int argc, char **argv, const char *kernel_buf, + off_t kernel_size, struct kexec_info *info); +void image_arm64_usage(void); + +struct memory_ranges usablemem_rgns; +off_t initrd_base; +off_t initrd_size; + +/** + * struct arm64_mem - Memory layout info. + */ + +struct arm64_mem { + uint64_t text_offset; + uint64_t image_size; + uint64_t page_offset; + uint64_t memstart; +}; + +extern struct arm64_mem arm64_mem; + +int arm64_process_image_header(const struct arm64_image_header *h); +int arm64_load_other_segments(struct kexec_info *info, + unsigned long kernel_entry); + +#endif diff --git a/kexec/arch/arm64/kexec-elf-arm64.c b/kexec/arch/arm64/kexec-elf-arm64.c new file mode 100644 index 0000000..5d8e124 --- /dev/null +++ b/kexec/arch/arm64/kexec-elf-arm64.c @@ -0,0 +1,129 @@ +/* + * ARM64 kexec elf support. + */ + +#define _GNU_SOURCE + +#include <assert.h> +#include <errno.h> +#include <getopt.h> +#include <libfdt.h> + +#include <linux/elf.h> + +#include "dt-ops.h" +#include "crashdump-arm64.h" +#include "kexec-arm64.h" +#include "fs2dt.h" +#include "kexec-syscall.h" +#include "arch/options.h" + +#ifndef EM_AARCH64 +#define EM_AARCH64 183 +#define R_AARCH64_ABS64 257 +#endif + + +int elf_arm64_probe(const char *kernel_buf, off_t kernel_size) +{ + int result; + struct mem_ehdr ehdr; + + result = build_elf_exec_info(kernel_buf, kernel_size, &ehdr, 0); + + if (result < 0) { + dbgprintf("%s: Not an ELF executable.\n", __func__); + goto on_exit; + } + + if (ehdr.e_machine != EM_AARCH64) { + dbgprintf("%s: Not an AARCH64 ELF executable.\n", __func__); + result = -EINVAL; + goto on_exit; + } + + result = 0; + +on_exit: + free_elf_info(&ehdr); + return result; +} + +int elf_arm64_load(int argc, char **argv, const char *kernel_buf, + off_t kernel_size, struct kexec_info *info) +{ + int result; + struct mem_ehdr ehdr; + bool found_header; + int i; + + if (info->kexec_flags & KEXEC_ON_CRASH) { + fprintf(stderr, "kexec: kdump not yet supported on arm64\n"); + return -EINVAL; + } + + result = build_elf_exec_info(kernel_buf, kernel_size, &ehdr, 0); + + if (result < 0) { + dbgprintf("%s: build_elf_exec_info failed\n", __func__); + goto exit; + } + + /* Find and process the arm64 image header. */ + + for (i = 0, found_header = false; i < ehdr.e_phnum; i++) { + struct mem_phdr *phdr = &ehdr.e_phdr[i]; + const struct arm64_image_header *h; + + if (phdr->p_type != PT_LOAD) + continue; + + h = (const struct arm64_image_header *)(kernel_buf + + phdr->p_offset); + + if (arm64_process_image_header(h)) + continue; + + found_header = true; + + arm64_mem.page_offset = phdr->p_vaddr - arm64_mem.text_offset; + + dbgprintf("%s: PE format: %s\n", __func__, + (arm64_header_check_pe_sig(h) ? "yes" : "no")); + dbgprintf("p_vaddr: %016llx\n", phdr->p_vaddr); + + break; + } + + if (!found_header) { + fprintf(stderr, "kexec: Bad arm64 image header.\n"); + result = -EINVAL; + goto exit; + } + + result = elf_exec_load(&ehdr, info); + + if (result) { + fprintf(stderr, "kexec: Elf load failed.\n"); + goto exit; + } + + dbgprintf("%s: text_offset: %016lx\n", __func__, arm64_mem.text_offset); + dbgprintf("%s: image_size: %016lx\n", __func__, arm64_mem.image_size); + dbgprintf("%s: page_offset: %016lx\n", __func__, arm64_mem.page_offset); + dbgprintf("%s: memstart: %016lx\n", __func__, arm64_mem.memstart); + dbgprintf("%s: e_entry: %016llx -> %016lx\n", __func__, + ehdr.e_entry, virt_to_phys(ehdr.e_entry)); + + result = arm64_load_other_segments(info, virt_to_phys(ehdr.e_entry)); +exit: + free_elf_info(&ehdr); + return result; +} + +void elf_arm64_usage(void) +{ + printf( +" An arm64 ELF file, big or little endian.\n" +" Typically vmlinux or a stripped version of vmlinux.\n\n"); +} diff --git a/kexec/arch/arm64/kexec-image-arm64.c b/kexec/arch/arm64/kexec-image-arm64.c new file mode 100644 index 0000000..b025dc6 --- /dev/null +++ b/kexec/arch/arm64/kexec-image-arm64.c @@ -0,0 +1,50 @@ +/* + * ARM64 kexec binary image support. + */ + +#define _GNU_SOURCE + +#include <assert.h> +#include <errno.h> +#include <getopt.h> +#include <libfdt.h> + +#include "dt-ops.h" +#include "image-header.h" +#include "kexec-arm64.h" +#include "fs2dt.h" +#include "kexec-syscall.h" +#include "arch/options.h" + +int image_arm64_probe(const char *kernel_buf, off_t kernel_size) +{ + const struct arm64_image_header *h; + + if (kernel_size < sizeof(struct arm64_image_header)) + return -EINVAL; + + h = (const struct arm64_image_header *)(kernel_buf); + + if (!arm64_header_check_magic(h)) + return -1; + + dbgprintf("%s: PE format: %s\n", __func__, + (arm64_header_check_pe_sig(h) ? "yes" : "no")); + + fprintf(stderr, "kexec: arm64 binary Image files are currently NOT SUPPORTED.\n"); + + return -1; +} + +int image_arm64_load(int argc, char **argv, const char *kernel_buf, + off_t kernel_size, struct kexec_info *info) +{ + return -ENOSYS; +} + +void image_arm64_usage(void) +{ + printf( +" An arm64 binary Image file, big or little endian.\n" +" This file type is currently NOT SUPPORTED.\n\n"); +} diff --git a/kexec/arch/i386/crashdump-x86.c b/kexec/arch/i386/crashdump-x86.c index e44fceb..77bdad5 100644 --- a/kexec/arch/i386/crashdump-x86.c +++ b/kexec/arch/i386/crashdump-x86.c @@ -34,6 +34,7 @@ #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> +#include <dirent.h> #include "../../kexec.h" #include "../../kexec-elf.h" #include "../../kexec-syscall.h" @@ -43,16 +44,11 @@ #include "crashdump-x86.h" #ifdef HAVE_LIBXENCTRL -#ifdef HAVE_XC_GET_MACHINE_MEMORY_MAP #include <xenctrl.h> -#else -#define __XEN_TOOLS__ 1 -#include <xen/xen.h> -#include <xen/memory.h> -#include <xen/sys/privcmd.h> -#endif /* HAVE_XC_GET_MACHINE_MEMORY_MAP */ #endif /* HAVE_LIBXENCTRL */ +#include "x86-linux-setup.h" + #include <x86/x86-linux.h> extern struct arch_options_t arch_options; @@ -105,6 +101,36 @@ static int get_kernel_paddr(struct kexec_info *UNUSED(info), return -1; } +/* Retrieve kernel _stext symbol virtual address from /proc/kallsyms */ +static unsigned long long get_kernel_stext_sym(void) +{ + const char *kallsyms = "/proc/kallsyms"; + const char *stext = "_stext"; + char sym[128]; + char line[128]; + FILE *fp; + unsigned long long vaddr; + char type; + + fp = fopen(kallsyms, "r"); + if (!fp) { + fprintf(stderr, "Cannot open %s\n", kallsyms); + return 0; + } + + while(fgets(line, sizeof(line), fp) != NULL) { + if (sscanf(line, "%Lx %c %s", &vaddr, &type, sym) != 3) + continue; + if (strcmp(sym, stext) == 0) { + dbgprintf("kernel symbol %s vaddr = %16llx\n", stext, vaddr); + return vaddr; + } + } + + fprintf(stderr, "Cannot get kernel %s symbol address\n", stext); + return 0; +} + /* Retrieve info regarding virtual address kernel has been compiled for and * size of the kernel from /proc/kcore. Current /proc/kcore parsing from * from kexec-tools fails because of malformed elf notes. A kernel patch has @@ -121,8 +147,9 @@ static int get_kernel_vaddr_and_size(struct kexec_info *UNUSED(info), struct mem_ehdr ehdr; struct mem_phdr *phdr, *end_phdr; int align; - unsigned long size; + off_t size; uint32_t elf_flags = 0; + uint64_t stext_sym; if (elf_info->machine != EM_X86_64) return 0; @@ -131,8 +158,7 @@ static int get_kernel_vaddr_and_size(struct kexec_info *UNUSED(info), return 0; align = getpagesize(); - size = KCORE_ELF_HEADERS_SIZE; - buf = slurp_file_len(kcore, size); + buf = slurp_file_len(kcore, KCORE_ELF_HEADERS_SIZE, &size); if (!buf) { fprintf(stderr, "Cannot read %s: %s\n", kcore, strerror(errno)); return -1; @@ -151,9 +177,36 @@ static int get_kernel_vaddr_and_size(struct kexec_info *UNUSED(info), return -1; } - /* Traverse through the Elf headers and find the region where - * kernel is mapped. */ end_phdr = &ehdr.e_phdr[ehdr.e_phnum]; + + /* Traverse through the Elf headers and find the region where + * _stext symbol is located in. That's where kernel is mapped */ + stext_sym = get_kernel_stext_sym(); + for(phdr = ehdr.e_phdr; stext_sym && phdr != end_phdr; phdr++) { + if (phdr->p_type == PT_LOAD) { + unsigned long long saddr = phdr->p_vaddr; + unsigned long long eaddr = phdr->p_vaddr + phdr->p_memsz; + unsigned long long size; + + /* Look for kernel text mapping header. */ + if (saddr < stext_sym && eaddr > stext_sym) { + saddr = _ALIGN_DOWN(saddr, X86_64_KERN_VADDR_ALIGN); + elf_info->kern_vaddr_start = saddr; + size = eaddr - saddr; + /* Align size to page size boundary. */ + size = _ALIGN(size, align); + elf_info->kern_size = size; + dbgprintf("kernel vaddr = 0x%llx size = 0x%llx\n", + saddr, size); + return 0; + } + } + } + + /* If failed to retrieve kernel text mapping through + * /proc/kallsyms, Traverse through the Elf headers again and + * find the region where kernel is mapped using hard-coded + * kernel mapping boundries */ for(phdr = ehdr.e_phdr; phdr != end_phdr; phdr++) { if (phdr->p_type == PT_LOAD) { unsigned long long saddr = phdr->p_vaddr; @@ -175,6 +228,7 @@ static int get_kernel_vaddr_and_size(struct kexec_info *UNUSED(info), } } } + fprintf(stderr, "Can't find kernel text map area from kcore\n"); return -1; } @@ -247,6 +301,8 @@ static int get_crash_memory_ranges(struct memory_range **range, int *ranges, type = RANGE_ACPI; } else if(memcmp(str,"ACPI Non-volatile Storage\n",26) == 0 ) { type = RANGE_ACPI_NVS; + } else if(memcmp(str,"reserved\n",9) == 0 ) { + type = RANGE_RESERVED; } else if (memcmp(str, "GART\n", 5) == 0) { gart_start = start; gart_end = end; @@ -300,34 +356,20 @@ static int get_crash_memory_ranges(struct memory_range **range, int *ranges, } #ifdef HAVE_LIBXENCTRL -#ifdef HAVE_XC_GET_MACHINE_MEMORY_MAP static int get_crash_memory_ranges_xen(struct memory_range **range, int *ranges, unsigned long lowmem_limit) { int j, rc, ret = -1; struct e820entry e820entries[CRASH_MAX_MEMORY_RANGES]; unsigned int i; -#ifdef XENCTRL_HAS_XC_INTERFACE xc_interface *xc; -#else - int xc; -#endif -#ifdef XENCTRL_HAS_XC_INTERFACE xc = xc_interface_open(NULL, NULL, 0); if (!xc) { fprintf(stderr, "%s: Failed to open Xen control interface\n", __func__); - goto err; - } -#else - xc = xc_interface_open(); - - if (xc == -1) { - fprintf(stderr, "%s: Failed to open Xen control interface\n", __func__); - goto err; + return -1; } -#endif rc = xc_get_machine_memory_map(xc, e820entries, CRASH_MAX_MEMORY_RANGES); @@ -364,95 +406,6 @@ err: static int get_crash_memory_ranges_xen(struct memory_range **range, int *ranges, unsigned long lowmem_limit) { - int fd, j, rc, ret = -1; - privcmd_hypercall_t hypercall; - struct e820entry *e820entries = NULL; - struct xen_memory_map *xen_memory_map = NULL; - unsigned int i; - - fd = open("/proc/xen/privcmd", O_RDWR); - - if (fd == -1) { - fprintf(stderr, "%s: open(/proc/xen/privcmd): %m\n", __func__); - goto err; - } - - rc = posix_memalign((void **)&e820entries, getpagesize(), - sizeof(struct e820entry) * CRASH_MAX_MEMORY_RANGES); - - if (rc) { - fprintf(stderr, "%s: posix_memalign(e820entries): %s\n", __func__, strerror(rc)); - e820entries = NULL; - goto err; - } - - rc = posix_memalign((void **)&xen_memory_map, getpagesize(), - sizeof(struct xen_memory_map)); - - if (rc) { - fprintf(stderr, "%s: posix_memalign(xen_memory_map): %s\n", __func__, strerror(rc)); - xen_memory_map = NULL; - goto err; - } - - if (mlock(e820entries, sizeof(struct e820entry) * CRASH_MAX_MEMORY_RANGES) == -1) { - fprintf(stderr, "%s: mlock(e820entries): %m\n", __func__); - goto err; - } - - if (mlock(xen_memory_map, sizeof(struct xen_memory_map)) == -1) { - fprintf(stderr, "%s: mlock(xen_memory_map): %m\n", __func__); - goto err; - } - - xen_memory_map->nr_entries = CRASH_MAX_MEMORY_RANGES; - set_xen_guest_handle(xen_memory_map->buffer, e820entries); - - hypercall.op = __HYPERVISOR_memory_op; - hypercall.arg[0] = XENMEM_machine_memory_map; - hypercall.arg[1] = (__u64)xen_memory_map; - - rc = ioctl(fd, IOCTL_PRIVCMD_HYPERCALL, &hypercall); - - if (rc == -1) { - fprintf(stderr, "%s: ioctl(IOCTL_PRIVCMD_HYPERCALL): %m\n", __func__); - goto err; - } - - for (i = 0, j = 0; i < xen_memory_map->nr_entries && - j < CRASH_MAX_MEMORY_RANGES; ++i, ++j) { - crash_memory_range[j].start = e820entries[i].addr; - crash_memory_range[j].end = e820entries[i].addr + e820entries[i].size - 1; - crash_memory_range[j].type = xen_e820_to_kexec_type(e820entries[i].type); - segregate_lowmem_region(&j, lowmem_limit); - } - - *range = crash_memory_range; - *ranges = j; - - qsort(*range, *ranges, sizeof(struct memory_range), compare_ranges); - - for (i = 0; i < crash_reserved_mem_nr; i++) - if (exclude_region(ranges, crash_reserved_mem[i].start, - crash_reserved_mem[i].end) < 0) - goto err; - - ret = 0; - -err: - munlock(xen_memory_map, sizeof(struct xen_memory_map)); - munlock(e820entries, sizeof(struct e820entry) * CRASH_MAX_MEMORY_RANGES); - free(xen_memory_map); - free(e820entries); - close(fd); - - return ret; -} -#endif /* HAVE_XC_GET_MACHINE_MEMORY_MAP */ -#else -static int get_crash_memory_ranges_xen(struct memory_range **range, - int *ranges, unsigned long lowmem_limit) -{ return 0; } #endif /* HAVE_LIBXENCTRL */ @@ -526,14 +479,14 @@ static int exclude_region(int *nr_ranges, uint64_t start, uint64_t end) /* Adds a segment from list of memory regions which new kernel can use to * boot. Segment start and end should be aligned to 1K boundary. */ -static int add_memmap(struct memory_range *memmap_p, unsigned long long addr, - size_t size) +static int add_memmap(struct memory_range *memmap_p, int *nr_memmap, + unsigned long long addr, size_t size, int type) { int i, j, nr_entries = 0, tidx = 0, align = 1024; unsigned long long mstart, mend; - /* Do alignment check. */ - if ((addr%align) || (size%align)) + /* Do alignment check if it's RANGE_RAM */ + if ((type == RANGE_RAM) && ((addr%align) || (size%align))) return -1; /* Make sure at least one entry in list is free. */ @@ -559,29 +512,23 @@ static int add_memmap(struct memory_range *memmap_p, unsigned long long addr, else if (addr > mend) tidx = i+1; } - /* Insert the memory region. */ - for (j = nr_entries-1; j >= tidx; j--) - memmap_p[j+1] = memmap_p[j]; - memmap_p[tidx].start = addr; - memmap_p[tidx].end = addr + size - 1; + /* Insert the memory region. */ + for (j = nr_entries-1; j >= tidx; j--) + memmap_p[j+1] = memmap_p[j]; + memmap_p[tidx].start = addr; + memmap_p[tidx].end = addr + size - 1; + memmap_p[tidx].type = type; + *nr_memmap = nr_entries + 1; - dbgprintf("Memmap after adding segment\n"); - for (i = 0; i < CRASH_MAX_MEMMAP_NR; i++) { - mstart = memmap_p[i].start; - mend = memmap_p[i].end; - if (mstart == 0 && mend == 0) - break; - dbgprintf("%016llx - %016llx\n", - mstart, mend); - } + dbgprint_mem_range("Memmap after adding segment", memmap_p, *nr_memmap); return 0; } /* Removes a segment from list of memory regions which new kernel can use to * boot. Segment start and end should be aligned to 1K boundary. */ -static int delete_memmap(struct memory_range *memmap_p, unsigned long long addr, - size_t size) +static int delete_memmap(struct memory_range *memmap_p, int *nr_memmap, + unsigned long long addr, size_t size) { int i, j, nr_entries = 0, tidx = -1, operation = 0, align = 1024; unsigned long long mstart, mend; @@ -643,24 +590,17 @@ static int delete_memmap(struct memory_range *memmap_p, unsigned long long addr, for (j = nr_entries-1; j > tidx; j--) memmap_p[j+1] = memmap_p[j]; memmap_p[tidx+1] = temp_region; + *nr_memmap = nr_entries + 1; } if ((operation == -1) && tidx >=0) { /* Delete the exact match memory region. */ for (j = i+1; j < CRASH_MAX_MEMMAP_NR; j++) memmap_p[j-1] = memmap_p[j]; memmap_p[j-1].start = memmap_p[j-1].end = 0; + *nr_memmap = nr_entries - 1; } - dbgprintf("Memmap after deleting segment\n"); - for (i = 0; i < CRASH_MAX_MEMMAP_NR; i++) { - mstart = memmap_p[i].start; - mend = memmap_p[i].end; - if (mstart == 0 && mend == 0) { - break; - } - dbgprintf("%016llx - %016llx\n", - mstart, mend); - } + dbgprint_mem_range("Memmap after deleting segment", memmap_p, *nr_memmap); return 0; } @@ -728,18 +668,31 @@ static int cmdline_add_memmap(char *cmdline, struct memory_range *memmap_p) strcat(cmdline, str_mmap); for (i = 0; i < CRASH_MAX_MEMMAP_NR; i++) { - unsigned long startk, endk; - startk = (memmap_p[i].start/1024); - endk = ((memmap_p[i].end + 1)/1024); + unsigned long startk, endk, type; + + startk = memmap_p[i].start/1024; + endk = (memmap_p[i].end + 1)/1024; + type = memmap_p[i].type; + + /* Only adding memory regions of RAM and ACPI */ + if (type != RANGE_RAM && + type != RANGE_ACPI && + type != RANGE_ACPI_NVS) + continue; + + if (type == RANGE_ACPI || type == RANGE_ACPI_NVS) + endk = _ALIGN_UP(memmap_p[i].end + 1, 1024)/1024; + if (!startk && !endk) /* All regions traversed. */ break; - /* A region is not worth adding if region size < 100K. It eats - * up precious command line length. */ - if ((endk - startk) < min_sizek) + /* A RAM region is not worth adding if region size < 100K. + * It eats up precious command line length. */ + if (type == RANGE_RAM && (endk - startk) < min_sizek) continue; - cmdline_add_memmap_internal(cmdline, startk, endk, RANGE_RAM); + /* And do not add e820 reserved region either */ + cmdline_add_memmap_internal(cmdline, startk, endk, type); } dbgprintf("Command line after adding memmap\n"); @@ -828,24 +781,16 @@ static enum coretype get_core_type(struct crash_elf_info *elf_info, } } -/* Appends memmap=X#Y commandline for ACPI to command line*/ -static int cmdline_add_memmap_acpi(char *cmdline, unsigned long start, - unsigned long end) +static int sysfs_efi_runtime_map_exist(void) { - int align = 1024; - unsigned long startk, endk; + DIR *dir; - if (!(end - start)) + dir = opendir("/sys/firmware/efi/runtime-map"); + if (!dir) return 0; - startk = start/1024; - endk = (end + align - 1)/1024; - cmdline_add_memmap_internal(cmdline, startk, endk, RANGE_ACPI); - - dbgprintf("Command line after adding acpi memmap\n"); - dbgprintf("%s\n", cmdline); - - return 0; + closedir(dir); + return 1; } /* Appends 'acpi_rsdp=' commandline for efi boot crash dump */ @@ -915,7 +860,7 @@ int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline, { void *tmp; unsigned long sz, bufsz, memsz, elfcorehdr; - int nr_ranges = 0, align = 1024, i; + int nr_ranges = 0, nr_memmap = 0, align = 1024, i; struct memory_range *mem_range, *memmap_p; struct crash_elf_info elf_info; unsigned kexec_arch; @@ -959,10 +904,7 @@ int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline, get_backup_area(info, mem_range, nr_ranges); - dbgprintf("CRASH MEMORY RANGES\n"); - - for(i = 0; i < nr_ranges; ++i) - dbgprintf("%016Lx-%016Lx\n", mem_range[i].start, mem_range[i].end); + dbgprint_mem_range("CRASH MEMORY RANGES", mem_range, nr_ranges); /* * if the core type has not been set on command line, set it here @@ -991,10 +933,10 @@ int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline, sz = (sizeof(struct memory_range) * CRASH_MAX_MEMMAP_NR); memmap_p = xmalloc(sz); memset(memmap_p, 0, sz); - add_memmap(memmap_p, info->backup_src_start, info->backup_src_size); + add_memmap(memmap_p, &nr_memmap, info->backup_src_start, info->backup_src_size, RANGE_RAM); for (i = 0; i < crash_reserved_mem_nr; i++) { sz = crash_reserved_mem[i].end - crash_reserved_mem[i].start +1; - if (add_memmap(memmap_p, crash_reserved_mem[i].start, sz) < 0) + if (add_memmap(memmap_p, &nr_memmap, crash_reserved_mem[i].start, sz, RANGE_RAM) < 0) return ENOCRASHKERNEL; } @@ -1007,7 +949,7 @@ int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline, 0, max_addr, -1); dbgprintf("Created backup segment at 0x%lx\n", info->backup_start); - if (delete_memmap(memmap_p, info->backup_start, sz) < 0) + if (delete_memmap(memmap_p, &nr_memmap, info->backup_start, sz) < 0) return EFAILED; } @@ -1043,22 +985,35 @@ int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline, elfcorehdr = add_buffer(info, tmp, bufsz, memsz, align, min_base, max_addr, -1); dbgprintf("Created elf header segment at 0x%lx\n", elfcorehdr); - if (delete_memmap(memmap_p, elfcorehdr, memsz) < 0) + if (delete_memmap(memmap_p, &nr_memmap, elfcorehdr, memsz) < 0) return -1; - cmdline_add_memmap(mod_cmdline, memmap_p); - cmdline_add_efi(mod_cmdline); + if (!bzImage_support_efi_boot || arch_options.noefi || + !sysfs_efi_runtime_map_exist()) + cmdline_add_efi(mod_cmdline); cmdline_add_elfcorehdr(mod_cmdline, elfcorehdr); /* Inform second kernel about the presence of ACPI tables. */ for (i = 0; i < CRASH_MAX_MEMORY_RANGES; i++) { - unsigned long start, end; + unsigned long start, end, size, type; if ( !( mem_range[i].type == RANGE_ACPI - || mem_range[i].type == RANGE_ACPI_NVS) ) + || mem_range[i].type == RANGE_ACPI_NVS + || mem_range[i].type == RANGE_RESERVED)) continue; start = mem_range[i].start; end = mem_range[i].end; - cmdline_add_memmap_acpi(mod_cmdline, start, end); + type = mem_range[i].type; + size = end - start + 1; + add_memmap(memmap_p, &nr_memmap, start, size, type); } + + if (arch_options.pass_memmap_cmdline) + cmdline_add_memmap(mod_cmdline, memmap_p); + + /* Store 2nd kernel boot memory ranges for later reference in + * x86-setup-linux.c: setup_linux_system_parameters() */ + info->crash_range = memmap_p; + info->nr_crash_ranges = nr_memmap; + return 0; } @@ -1100,8 +1055,24 @@ static int crashkernel_mem_callback(void *UNUSED(data), int nr, int is_crashkernel_mem_reserved(void) { - crash_reserved_mem_nr = kexec_iomem_for_each_line("Crash kernel\n", - crashkernel_mem_callback, NULL); + int ret; + + if (xen_present()) { + uint64_t start, end; + + ret = xen_get_crashkernel_region(&start, &end); + if (ret < 0) + return 0; + + crash_reserved_mem[0].start = start; + crash_reserved_mem[0].end = end; + crash_reserved_mem[0].type = RANGE_RAM; + crash_reserved_mem_nr = 1; + } else { + ret = kexec_iomem_for_each_line("Crash kernel\n", + crashkernel_mem_callback, NULL); + crash_reserved_mem_nr = ret; + } return !!crash_reserved_mem_nr; } diff --git a/kexec/arch/i386/crashdump-x86.h b/kexec/arch/i386/crashdump-x86.h index b61cf0a..ddee19f 100644 --- a/kexec/arch/i386/crashdump-x86.h +++ b/kexec/arch/i386/crashdump-x86.h @@ -20,7 +20,7 @@ int load_crashdump_segments(struct kexec_info *info, char *mod_cmdline, /* Kernel text size */ #define X86_64_KERNEL_TEXT_SIZE (512UL*1024*1024) -#define CRASH_MAX_MEMMAP_NR (KEXEC_MAX_SEGMENTS + 1) +#define CRASH_MAX_MEMMAP_NR 1024 #define CRASH_MAX_MEMORY_RANGES (MAX_MEMORY_RANGES + 2) /* Backup Region, First 640K of System RAM. */ diff --git a/kexec/arch/i386/include/arch/options.h b/kexec/arch/i386/include/arch/options.h index aaac731..c113a83 100644 --- a/kexec/arch/i386/include/arch/options.h +++ b/kexec/arch/i386/include/arch/options.h @@ -30,6 +30,8 @@ #define OPT_VGA (OPT_ARCH_MAX+8) #define OPT_REAL_MODE (OPT_ARCH_MAX+9) #define OPT_ENTRY_32BIT (OPT_ARCH_MAX+10) +#define OPT_PASS_MEMMAP_CMDLINE (OPT_ARCH_MAX+11) +#define OPT_NOEFI (OPT_ARCH_MAX+12) /* Options relevant to the architecture (excluding loader-specific ones): */ #define KEXEC_ARCH_OPTIONS \ @@ -41,6 +43,8 @@ { "console-serial", 0, 0, OPT_CONSOLE_SERIAL }, \ { "elf32-core-headers", 0, 0, OPT_ELF32_CORE }, \ { "elf64-core-headers", 0, 0, OPT_ELF64_CORE }, \ + { "pass-memmap-cmdline", 0, 0, OPT_PASS_MEMMAP_CMDLINE }, \ + { "noefi", 0, 0, OPT_NOEFI}, \ #define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" diff --git a/kexec/arch/i386/kexec-bzImage.c b/kexec/arch/i386/kexec-bzImage.c index fc1a54f..e7bc1d6 100644 --- a/kexec/arch/i386/kexec-bzImage.c +++ b/kexec/arch/i386/kexec-bzImage.c @@ -40,6 +40,7 @@ #include <arch/options.h> static const int probe_debug = 0; +int bzImage_support_efi_boot = 0; int bzImage_probe(const char *buf, off_t len) { diff --git a/kexec/arch/i386/kexec-multiboot-x86.c b/kexec/arch/i386/kexec-multiboot-x86.c index 2f59d7b..fce7f05 100644 --- a/kexec/arch/i386/kexec-multiboot-x86.c +++ b/kexec/arch/i386/kexec-multiboot-x86.c @@ -258,10 +258,18 @@ int multiboot_x86_load(int argc, char **argv, const char *buf, off_t len, mmap[i].length_high = length >> 32; if (range[i].type == RANGE_RAM) { mmap[i].Type = 1; /* RAM */ - /* Is this the "low" memory? */ - if ((range[i].start == 0) - && (range[i].end > mem_lower)) + /* + * Is this the "low" memory? Can't just test + * against zero, because Linux protects (and + * hides) the first few pages of physical + * memory. + */ + + if ((range[i].start <= 64*1024) + && (range[i].end > mem_lower)) { + range[i].start = 0; mem_lower = range[i].end; + } /* Is this the "high" memory? */ if ((range[i].start <= 0x100000) && (range[i].end > mem_upper + 0x100000)) diff --git a/kexec/arch/i386/kexec-x86-common.c b/kexec/arch/i386/kexec-x86-common.c index ed6c950..bc622e9 100644 --- a/kexec/arch/i386/kexec-x86-common.c +++ b/kexec/arch/i386/kexec-x86-common.c @@ -40,15 +40,7 @@ #include "kexec-x86.h" #ifdef HAVE_LIBXENCTRL -#ifdef HAVE_XC_GET_MACHINE_MEMORY_MAP #include <xenctrl.h> -#else -#define __XEN_TOOLS__ 1 -#include <x86/x86-linux.h> -#include <xen/xen.h> -#include <xen/memory.h> -#include <xen/sys/privcmd.h> -#endif /* HAVE_XC_GET_MACHINE_MEMORY_MAP */ #endif /* HAVE_LIBXENCTRL */ static struct memory_range memory_range[MAX_MEMORY_RANGES]; @@ -87,7 +79,6 @@ static int get_memory_ranges_proc_iomem(struct memory_range **range, int *ranges if (count != 2) continue; str = line + consumed; - end = end + 1; dbgprintf("%016Lx-%016Lx : %s", start, end, str); @@ -173,33 +164,19 @@ unsigned xen_e820_to_kexec_type(uint32_t type) * * @return 0 on success, any other value on failure. */ -#ifdef HAVE_XC_GET_MACHINE_MEMORY_MAP static int get_memory_ranges_xen(struct memory_range **range, int *ranges) { int rc, ret = -1; struct e820entry e820entries[MAX_MEMORY_RANGES]; unsigned int i; -#ifdef XENCTRL_HAS_XC_INTERFACE xc_interface *xc; -#else - int xc; -#endif -#ifdef XENCTRL_HAS_XC_INTERFACE xc = xc_interface_open(NULL, NULL, 0); if (!xc) { fprintf(stderr, "%s: Failed to open Xen control interface\n", __func__); - goto err; - } -#else - xc = xc_interface_open(); - - if (xc == -1) { - fprintf(stderr, "%s: Failed to open Xen control interface\n", __func__); - goto err; + return -1; } -#endif rc = xc_get_machine_memory_map(xc, e820entries, MAX_MEMORY_RANGES); @@ -210,7 +187,7 @@ static int get_memory_ranges_xen(struct memory_range **range, int *ranges) for (i = 0; i < rc; ++i) { memory_range[i].start = e820entries[i].addr; - memory_range[i].end = e820entries[i].addr + e820entries[i].size; + memory_range[i].end = e820entries[i].addr + e820entries[i].size - 1; memory_range[i].type = xen_e820_to_kexec_type(e820entries[i].type); } @@ -229,87 +206,6 @@ err: #else static int get_memory_ranges_xen(struct memory_range **range, int *ranges) { - int fd, rc, ret = -1; - privcmd_hypercall_t hypercall; - struct e820entry *e820entries = NULL; - struct xen_memory_map *xen_memory_map = NULL; - unsigned int i; - - fd = open("/proc/xen/privcmd", O_RDWR); - - if (fd == -1) { - fprintf(stderr, "%s: open(/proc/xen/privcmd): %m\n", __func__); - goto err; - } - - rc = posix_memalign((void **)&e820entries, sysconf(_SC_PAGESIZE), - sizeof(struct e820entry) * MAX_MEMORY_RANGES); - - if (rc) { - fprintf(stderr, "%s: posix_memalign(e820entries): %s\n", __func__, strerror(rc)); - e820entries = NULL; - goto err; - } - - rc = posix_memalign((void **)&xen_memory_map, sysconf(_SC_PAGESIZE), - sizeof(struct xen_memory_map)); - - if (rc) { - fprintf(stderr, "%s: posix_memalign(xen_memory_map): %s\n", __func__, strerror(rc)); - xen_memory_map = NULL; - goto err; - } - - if (mlock(e820entries, sizeof(struct e820entry) * MAX_MEMORY_RANGES) == -1) { - fprintf(stderr, "%s: mlock(e820entries): %m\n", __func__); - goto err; - } - - if (mlock(xen_memory_map, sizeof(struct xen_memory_map)) == -1) { - fprintf(stderr, "%s: mlock(xen_memory_map): %m\n", __func__); - goto err; - } - - xen_memory_map->nr_entries = MAX_MEMORY_RANGES; - set_xen_guest_handle(xen_memory_map->buffer, e820entries); - - hypercall.op = __HYPERVISOR_memory_op; - hypercall.arg[0] = XENMEM_machine_memory_map; - hypercall.arg[1] = (__u64)xen_memory_map; - - rc = ioctl(fd, IOCTL_PRIVCMD_HYPERCALL, &hypercall); - - if (rc == -1) { - fprintf(stderr, "%s: ioctl(IOCTL_PRIVCMD_HYPERCALL): %m\n", __func__); - goto err; - } - - for (i = 0; i < xen_memory_map->nr_entries; ++i) { - memory_range[i].start = e820entries[i].addr; - memory_range[i].end = e820entries[i].addr + e820entries[i].size; - memory_range[i].type = xen_e820_to_kexec_type(e820entries[i].type); - } - - qsort(memory_range, xen_memory_map->nr_entries, sizeof(struct memory_range), compare_ranges); - - *range = memory_range; - *ranges = xen_memory_map->nr_entries; - - ret = 0; - -err: - munlock(xen_memory_map, sizeof(struct xen_memory_map)); - munlock(e820entries, sizeof(struct e820entry) * MAX_MEMORY_RANGES); - free(xen_memory_map); - free(e820entries); - close(fd); - - return ret; -} -#endif /* HAVE_XC_GET_MACHINE_MEMORY_MAP */ -#else -static int get_memory_ranges_xen(struct memory_range **range, int *ranges) -{ return 0; } #endif /* HAVE_LIBXENCTRL */ @@ -477,11 +373,7 @@ int get_memory_ranges(struct memory_range **range, int *ranges, mem_max = end; } - dbgprintf("MEMORY RANGES\n"); - for (i = 0; i < *ranges; i++) { - dbgprintf("%016Lx-%016Lx (%d)\n", (*range)[i].start, - (*range)[i].end, (*range)[i].type); - } + dbgprint_mem_range("MEMORY RANGES", *range, *ranges); return ret; } diff --git a/kexec/arch/i386/kexec-x86.c b/kexec/arch/i386/kexec-x86.c index 014ecd5..fb0e6f9 100644 --- a/kexec/arch/i386/kexec-x86.c +++ b/kexec/arch/i386/kexec-x86.c @@ -54,6 +54,8 @@ void arch_usage(void) " --console-serial Enable the serial console\n" " --elf32-core-headers Prepare core headers in ELF32 format\n" " --elf64-core-headers Prepare core headers in ELF64 format\n" + " --pass-memmap-cmdline Pass memory map via command line in kexec on panic case\n" + " --noefi Disable efi support\n" ); } @@ -64,6 +66,8 @@ struct arch_options_t arch_options = { .console_vga = 0, .console_serial = 0, .core_header_type = CORE_TYPE_UNDEF, + .pass_memmap_cmdline = 0, + .noefi = 0, }; int arch_process_options(int argc, char **argv) @@ -133,6 +137,12 @@ int arch_process_options(int argc, char **argv) case OPT_ELF64_CORE: arch_options.core_header_type = CORE_TYPE_ELF64; break; + case OPT_PASS_MEMMAP_CMDLINE: + arch_options.pass_memmap_cmdline = 1; + break; + case OPT_NOEFI: + arch_options.noefi = 1; + break; } } /* Reset getopt for the next pass; called in other source modules */ diff --git a/kexec/arch/i386/kexec-x86.h b/kexec/arch/i386/kexec-x86.h index 5aa2a46..33df352 100644 --- a/kexec/arch/i386/kexec-x86.h +++ b/kexec/arch/i386/kexec-x86.h @@ -50,6 +50,8 @@ struct arch_options_t { uint8_t console_vga; uint8_t console_serial; enum coretype core_header_type; + uint8_t pass_memmap_cmdline; + uint8_t noefi; }; int multiboot_x86_probe(const char *buf, off_t len); diff --git a/kexec/arch/i386/x86-linux-setup.c b/kexec/arch/i386/x86-linux-setup.c index 454fad6..9271c6c 100644 --- a/kexec/arch/i386/x86-linux-setup.c +++ b/kexec/arch/i386/x86-linux-setup.c @@ -436,39 +436,394 @@ char *find_mnt_by_fsname(char *fsname) return mntdir; } -void setup_subarch(struct x86_linux_param_header *real_mode) +static int get_bootparam(void *buf, off_t offset, size_t size) { int data_file; - const off_t offset = offsetof(typeof(*real_mode), hardware_subarch); - char *debugfs_mnt; + char *debugfs_mnt, *sysfs_mnt; char filename[PATH_MAX]; + int err, has_sysfs_params = 0; + + sysfs_mnt = find_mnt_by_fsname("sysfs"); + if (sysfs_mnt) { + snprintf(filename, PATH_MAX, "%s/%s", sysfs_mnt, + "kernel/boot_params/data"); + free(sysfs_mnt); + err = access(filename, F_OK); + if (!err) + has_sysfs_params = 1; + } - debugfs_mnt = find_mnt_by_fsname("debugfs"); - if (!debugfs_mnt) - return; - snprintf(filename, PATH_MAX, "%s/%s", debugfs_mnt, "boot_params/data"); - filename[PATH_MAX-1] = 0; - free(debugfs_mnt); + if (!has_sysfs_params) { + debugfs_mnt = find_mnt_by_fsname("debugfs"); + if (!debugfs_mnt) + return 1; + snprintf(filename, PATH_MAX, "%s/%s", debugfs_mnt, + "boot_params/data"); + free(debugfs_mnt); + } data_file = open(filename, O_RDONLY); if (data_file < 0) - return; + return 1; if (lseek(data_file, offset, SEEK_SET) < 0) goto close; - read(data_file, &real_mode->hardware_subarch, sizeof(uint32_t)); + read(data_file, buf, size); close: close(data_file); + return 0; } -void setup_linux_system_parameters(struct kexec_info *info, - struct x86_linux_param_header *real_mode) +void setup_subarch(struct x86_linux_param_header *real_mode) +{ + off_t offset = offsetof(typeof(*real_mode), hardware_subarch); + + get_bootparam(&real_mode->hardware_subarch, offset, sizeof(uint32_t)); +} + +struct efi_mem_descriptor { + uint32_t type; + uint32_t pad; + uint64_t phys_addr; + uint64_t virt_addr; + uint64_t num_pages; + uint64_t attribute; +}; + +struct efi_setup_data { + uint64_t fw_vendor; + uint64_t runtime; + uint64_t tables; + uint64_t smbios; + uint64_t reserved[8]; +}; + +struct setup_data { + uint64_t next; + uint32_t type; +#define SETUP_NONE 0 +#define SETUP_E820_EXT 1 +#define SETUP_DTB 2 +#define SETUP_PCI 3 +#define SETUP_EFI 4 + uint32_t len; + uint8_t data[0]; +} __attribute__((packed)); + +static int get_efi_value(const char *filename, + const char *pattern, uint64_t *val) +{ + FILE *fp; + char line[1024], *s, *end; + + fp = fopen(filename, "r"); + if (!fp) + return 1; + + while (fgets(line, sizeof(line), fp) != 0) { + s = strstr(line, pattern); + if (!s) + continue; + *val = strtoull(s + strlen(pattern), &end, 16); + if (*val == ULLONG_MAX) { + fclose(fp); + return 2; + } + break; + } + + fclose(fp); + return 0; +} + +static int get_efi_values(struct efi_setup_data *esd) +{ + int ret = 0; + + ret = get_efi_value("/sys/firmware/efi/systab", "SMBIOS=0x", + &esd->smbios); + ret |= get_efi_value("/sys/firmware/efi/fw_vendor", "0x", + &esd->fw_vendor); + ret |= get_efi_value("/sys/firmware/efi/runtime", "0x", + &esd->runtime); + ret |= get_efi_value("/sys/firmware/efi/config_table", "0x", + &esd->tables); + return ret; +} + +static int get_efi_runtime_map(struct efi_mem_descriptor **map) +{ + DIR *dirp; + struct dirent *entry; + char filename[1024]; + struct efi_mem_descriptor md, *p = NULL; + int nr_maps = 0; + + dirp = opendir("/sys/firmware/efi/runtime-map"); + if (!dirp) + return 0; + while ((entry = readdir(dirp)) != NULL) { + sprintf(filename, + "/sys/firmware/efi/runtime-map/%s", + (char *)entry->d_name); + if (*entry->d_name == '.') + continue; + file_scanf(filename, "type", "0x%x", (unsigned int *)&md.type); + file_scanf(filename, "phys_addr", "0x%llx", + (unsigned long long *)&md.phys_addr); + file_scanf(filename, "virt_addr", "0x%llx", + (unsigned long long *)&md.virt_addr); + file_scanf(filename, "num_pages", "0x%llx", + (unsigned long long *)&md.num_pages); + file_scanf(filename, "attribute", "0x%llx", + (unsigned long long *)&md.attribute); + p = realloc(p, (nr_maps + 1) * sizeof(md)); + if (!p) + goto err_out; + + *(p + nr_maps) = md; + *map = p; + nr_maps++; + } + + closedir(dirp); + return nr_maps; +err_out: + if (map) + free(map); + closedir(dirp); + return 0; +} + +struct efi_info { + uint32_t efi_loader_signature; + uint32_t efi_systab; + uint32_t efi_memdesc_size; + uint32_t efi_memdesc_version; + uint32_t efi_memmap; + uint32_t efi_memmap_size; + uint32_t efi_systab_hi; + uint32_t efi_memmap_hi; +}; + +/* + * Add another instance to single linked list of struct setup_data. + * Please refer to kernel Documentation/x86/boot.txt for more details + * about setup_data structure. + */ +static void add_setup_data(struct kexec_info *info, + struct x86_linux_param_header *real_mode, + struct setup_data *sd) +{ + int sdsize = sizeof(struct setup_data) + sd->len; + + sd->next = real_mode->setup_data; + real_mode->setup_data = add_buffer(info, sd, sdsize, sdsize, getpagesize(), + 0x100000, ULONG_MAX, INT_MAX); +} + +/* + * setup_efi_data will collect below data and pass them to 2nd kernel. + * 1) SMBIOS, fw_vendor, runtime, config_table, they are passed via x86 + * setup_data. + * 2) runtime memory regions, set the memmap related fields in efi_info. + */ +static int setup_efi_data(struct kexec_info *info, + struct x86_linux_param_header *real_mode) +{ + int64_t memmap_paddr; + struct setup_data *sd; + struct efi_setup_data *esd; + struct efi_mem_descriptor *maps; + int nr_maps, size, ret = 0; + struct efi_info *ei = (struct efi_info *)real_mode->efi_info; + + ret = access("/sys/firmware/efi/systab", F_OK); + if (ret < 0) + goto out; + + esd = malloc(sizeof(struct efi_setup_data)); + if (!esd) { + ret = 1; + goto out; + } + memset(esd, 0, sizeof(struct efi_setup_data)); + ret = get_efi_values(esd); + if (ret) + goto free_esd; + nr_maps = get_efi_runtime_map(&maps); + if (!nr_maps) { + ret = 2; + goto free_esd; + } + sd = malloc(sizeof(struct setup_data) + sizeof(*esd)); + if (!sd) { + ret = 3; + goto free_maps; + } + + memset(sd, 0, sizeof(struct setup_data) + sizeof(*esd)); + sd->next = 0; + sd->type = SETUP_EFI; + sd->len = sizeof(*esd); + memcpy(sd->data, esd, sizeof(*esd)); + free(esd); + + add_setup_data(info, real_mode, sd); + + size = nr_maps * sizeof(struct efi_mem_descriptor); + memmap_paddr = add_buffer(info, maps, size, size, getpagesize(), + 0x100000, ULONG_MAX, INT_MAX); + ei->efi_memmap = memmap_paddr & 0xffffffff; + ei->efi_memmap_hi = memmap_paddr >> 32; + ei->efi_memmap_size = size; + ei->efi_memdesc_size = sizeof(struct efi_mem_descriptor); + + return 0; +free_maps: + free(maps); +free_esd: + free(esd); +out: + return ret; +} + +static void add_e820_map_from_mr(struct x86_linux_param_header *real_mode, + struct e820entry *e820, struct memory_range *range, int nr_range) +{ + int i; + + for (i = 0; i < nr_range; i++) { + e820[i].addr = range[i].start; + e820[i].size = range[i].end - range[i].start + 1; + switch (range[i].type) { + case RANGE_RAM: + e820[i].type = E820_RAM; + break; + case RANGE_ACPI: + e820[i].type = E820_ACPI; + break; + case RANGE_ACPI_NVS: + e820[i].type = E820_NVS; + break; + default: + case RANGE_RESERVED: + e820[i].type = E820_RESERVED; + break; + } + dbgprintf("%016lx-%016lx (%d)\n", + e820[i].addr, + e820[i].addr + e820[i].size - 1, + e820[i].type); + + if (range[i].type != RANGE_RAM) + continue; + if ((range[i].start <= 0x100000) && range[i].end > 0x100000) { + unsigned long long mem_k = (range[i].end >> 10) - (0x100000 >> 10); + real_mode->ext_mem_k = mem_k; + real_mode->alt_mem_k = mem_k; + if (mem_k > 0xfc00) { + real_mode->ext_mem_k = 0xfc00; /* 64M */ + } + if (mem_k > 0xffffffff) { + real_mode->alt_mem_k = 0xffffffff; + } + } + } +} + +static void setup_e820_ext(struct kexec_info *info, struct x86_linux_param_header *real_mode, + struct memory_range *range, int nr_range) +{ + struct setup_data *sd; + struct e820entry *e820; + int nr_range_ext; + + nr_range_ext = nr_range - E820MAX; + sd = xmalloc(sizeof(struct setup_data) + nr_range_ext * sizeof(struct e820entry)); + sd->next = 0; + sd->len = nr_range_ext * sizeof(struct e820entry); + sd->type = SETUP_E820_EXT; + + e820 = (struct e820entry *) sd->data; + dbgprintf("Extended E820 via setup_data:\n"); + add_e820_map_from_mr(real_mode, e820, range + E820MAX, nr_range_ext); + add_setup_data(info, real_mode, sd); +} + +static void setup_e820(struct kexec_info *info, struct x86_linux_param_header *real_mode) { - /* Fill in information the BIOS would usually provide */ struct memory_range *range; - int i, ranges; + int nr_range, nr_range_saved; + + if (info->kexec_flags & KEXEC_ON_CRASH && !arch_options.pass_memmap_cmdline) { + range = info->crash_range; + nr_range = info->nr_crash_ranges; + } else { + range = info->memory_range; + nr_range = info->memory_ranges; + } + + nr_range_saved = nr_range; + if (nr_range > E820MAX) { + nr_range = E820MAX; + } + + real_mode->e820_map_nr = nr_range; + dbgprintf("E820 memmap:\n"); + add_e820_map_from_mr(real_mode, real_mode->e820_map, range, nr_range); + + if (nr_range_saved > E820MAX) { + dbgprintf("extra E820 memmap are passed via setup_data\n"); + setup_e820_ext(info, real_mode, range, nr_range_saved); + } +} + +static int +get_efi_mem_desc_version(struct x86_linux_param_header *real_mode) +{ + struct efi_info *ei = (struct efi_info *)real_mode->efi_info; + + return ei->efi_memdesc_version; +} + +static void setup_efi_info(struct kexec_info *info, + struct x86_linux_param_header *real_mode) +{ + int ret, desc_version; + off_t offset = offsetof(typeof(*real_mode), efi_info); + + ret = get_bootparam(&real_mode->efi_info, offset, 32); + if (ret) + return; + if (((struct efi_info *)real_mode->efi_info)->efi_memmap_size == 0) + /* zero filled efi_info */ + goto out; + desc_version = get_efi_mem_desc_version(real_mode); + if (desc_version != 1) { + fprintf(stderr, + "efi memory descriptor version %d is not supported!\n", + desc_version); + goto out; + } + ret = setup_efi_data(info, real_mode); + if (ret) + goto out; + + return; + +out: + memset(&real_mode->efi_info, 0, 32); + return; +} + +void setup_linux_system_parameters(struct kexec_info *info, + struct x86_linux_param_header *real_mode) +{ /* get subarch from running kernel */ setup_subarch(real_mode); + if (bzImage_support_efi_boot && !arch_options.noefi) + setup_efi_info(info, real_mode); /* Default screen size */ real_mode->orig_x = 0; @@ -505,51 +860,7 @@ void setup_linux_system_parameters(struct kexec_info *info, /* another safe default */ real_mode->aux_device_info = 0; - range = info->memory_range; - ranges = info->memory_ranges; - if (ranges > E820MAX) { - if (!(info->kexec_flags & KEXEC_ON_CRASH)) - /* - * this e820 not used for capture kernel, see - * do_bzImage_load() - */ - fprintf(stderr, - "Too many memory ranges, truncating...\n"); - ranges = E820MAX; - } - real_mode->e820_map_nr = ranges; - for(i = 0; i < ranges; i++) { - real_mode->e820_map[i].addr = range[i].start; - real_mode->e820_map[i].size = range[i].end - range[i].start; - switch (range[i].type) { - case RANGE_RAM: - real_mode->e820_map[i].type = E820_RAM; - break; - case RANGE_ACPI: - real_mode->e820_map[i].type = E820_ACPI; - break; - case RANGE_ACPI_NVS: - real_mode->e820_map[i].type = E820_NVS; - break; - default: - case RANGE_RESERVED: - real_mode->e820_map[i].type = E820_RESERVED; - break; - } - if (range[i].type != RANGE_RAM) - continue; - if ((range[i].start <= 0x100000) && range[i].end > 0x100000) { - unsigned long long mem_k = (range[i].end >> 10) - (0x100000 >> 10); - real_mode->ext_mem_k = mem_k; - real_mode->alt_mem_k = mem_k; - if (mem_k > 0xfc00) { - real_mode->ext_mem_k = 0xfc00; /* 64M */ - } - if (mem_k > 0xffffffff) { - real_mode->alt_mem_k = 0xffffffff; - } - } - } + setup_e820(info, real_mode); /* fill the EDD information */ setup_edd_info(real_mode); diff --git a/kexec/arch/i386/x86-linux-setup.h b/kexec/arch/i386/x86-linux-setup.h index 09aed4d..f5d23d3 100644 --- a/kexec/arch/i386/x86-linux-setup.h +++ b/kexec/arch/i386/x86-linux-setup.h @@ -1,5 +1,6 @@ #ifndef X86_LINUX_SETUP_H #define X86_LINUX_SETUP_H +#include <x86/x86-linux.h> void init_linux_parameters(struct x86_linux_param_header *real_mode); void setup_linux_bootloader_parameters_high( @@ -28,5 +29,7 @@ void setup_linux_system_parameters(struct kexec_info *info, /* command line parameter may be appended by purgatory */ #define PURGATORY_CMDLINE_SIZE 64 +extern int bzImage_support_efi_boot; +extern struct arch_options_t arch_options; #endif /* X86_LINUX_SETUP_H */ diff --git a/kexec/arch/m68k/Makefile b/kexec/arch/m68k/Makefile new file mode 100644 index 0000000..eeaacbd --- /dev/null +++ b/kexec/arch/m68k/Makefile @@ -0,0 +1,15 @@ +# +# kexec m68k (linux booting linux) +# +m68k_KEXEC_SRCS = kexec/arch/m68k/kexec-m68k.c +m68k_KEXEC_SRCS += kexec/arch/m68k/kexec-elf-m68k.c +m68k_KEXEC_SRCS += kexec/arch/m68k/kexec-elf-rel-m68k.c +m68k_KEXEC_SRCS += kexec/arch/m68k/bootinfo.c + +m68k_ADD_SEGMENT = +m68k_VIRT_TO_PHYS = + +dist += kexec/arch/m68k/Makefile $(m68k_KEXEC_SRCS) \ + kexec/arch/m68k/bootinfo.h \ + kexec/arch/m68k/kexec-m68k.h \ + kexec/arch/m68k/include/arch/options.h diff --git a/kexec/arch/m68k/bootinfo.c b/kexec/arch/m68k/bootinfo.c new file mode 100644 index 0000000..18bf226 --- /dev/null +++ b/kexec/arch/m68k/bootinfo.c @@ -0,0 +1,262 @@ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "../../kexec.h" + +#include "bootinfo.h" + +const char *bootinfo_file = DEFAULT_BOOTINFO_FILE; +static struct bi_rec *bootinfo; +static off_t bootinfo_size; + +static unsigned int num_memchunks; + +static struct bi_rec *bi_next(struct bi_rec *bi, uint16_t size) +{ + return (void *)((unsigned long)bi + size); +} + +static struct bi_rec *bi_find(struct bi_rec *prev, uint16_t tag) +{ + struct bi_rec *bi = prev ? bi_next(prev, prev->size) : bootinfo; + + for (bi = prev ? bi_next(prev, prev->size) : bootinfo; + bi->tag != BI_LAST; bi = bi_next(bi, bi->size)) + if (bi->tag == tag) + return bi; + return NULL; +} + +static void bi_remove(uint16_t tag) +{ + struct bi_rec *bi; + off_t rem; + uint16_t size; + + bi = bootinfo; + rem = bootinfo_size; + while (1) { + if (bi->tag == BI_LAST) + break; + + size = bi->size; + if (bi->tag == tag) { + memmove(bi, bi_next(bi, size), rem - size); + bootinfo_size -= size; + rem -= size; + continue; + } + + bi = bi_next(bi, size); + rem -= size; + } +} + +static struct bi_rec *bi_add(uint16_t tag, uint16_t size) +{ + struct bi_rec *bi; + + /* Add 4-byte header and round up to multiple of 4 bytes */ + size = _ALIGN_UP(4 + size, 4); + + bootinfo = xrealloc(bootinfo, bootinfo_size + size); + + /* Replace old sentinel by new record */ + bi = bi_next(bootinfo, bootinfo_size - 2); + bootinfo_size += size; + memset(bi, 0, size); + bi->tag = tag; + bi->size = size; + + /* Re-add sentinel */ + bi_next(bi, size)->tag = BI_LAST; + + return bi; +} + +void bootinfo_load(void) +{ + struct bi_rec *bi; + off_t rem; + uint16_t tag, size; + + dbgprintf("Loading bootinfo from %s\n", bootinfo_file); + bootinfo = (void *)slurp_file_len(bootinfo_file, MAX_BOOTINFO_SIZE, + &bootinfo_size); + if (!bootinfo) + die("No bootinfo\n"); + + bi = bootinfo; + rem = bootinfo_size; + while (1) { + if (rem < 2) + die("Unexpected end of bootinfo\n"); + + tag = bi->tag; + if (tag == BI_LAST) { + rem -= 2; + break; + } + + if (rem < 4) + die("Unexpected end of bootinfo\n"); + + size = bi->size; + if (size < 4 || size % 4) + die("Invalid tag size\n"); + if (rem < size) + die("Unexpected end of bootinfo\n"); + + if (tag == BI_MEMCHUNK) + num_memchunks++; + + bi = bi_next(bi, size); + rem -= size; + } + + if (rem) + die("Trailing data at end of bootinfo\n"); +} + +void bootinfo_print(void) +{ + struct bi_rec *bi = bootinfo; + uint16_t tag, size; + + while (1) { + tag = bi->tag; + if (tag == BI_LAST) { + puts("BI_LAST"); + break; + } + + size = bi->size; + switch (tag) { + case BI_MACHTYPE: + printf("BI_MACHTYPE: 0x%08x\n", bi->machtype); + break; + + case BI_MEMCHUNK: + printf("BI_MEMCHUNK: 0x%08x bytes at 0x%08x\n", + bi->mem_info.size, bi->mem_info.addr); + break; + + case BI_RAMDISK: + printf("BI_RAMDISK: 0x%08x bytes at 0x%08x\n", + bi->mem_info.size, bi->mem_info.addr); + break; + + case BI_COMMAND_LINE: + printf("BI_COMMAND_LINE: %s\n", bi->string); + break; + + default: + printf("BI tag 0x%04x size %u\n", tag, size); + break; + } + bi = bi_next(bi, size); + } +} + +int bootinfo_get_memory_ranges(struct memory_range **range) +{ + struct memory_range *ranges; + unsigned int i; + struct bi_rec *bi; + + ranges = xmalloc(num_memchunks * sizeof(struct memory_range)); + for (i = 0, bi = NULL; + i < num_memchunks && (bi = bi_find(bi, BI_MEMCHUNK)); i++) { + ranges[i].start = bi->mem_info.addr; + ranges[i].end = bi->mem_info.addr + bi->mem_info.size - 1; + ranges[i].type = RANGE_RAM; + } + + *range = ranges; + return i; +} + +void bootinfo_set_cmdline(const char *cmdline) +{ + struct bi_rec *bi; + uint16_t size; + + /* Remove existing command line records */ + bi_remove(BI_COMMAND_LINE); + + if (!cmdline) + return; + + /* Add new command line record */ + size = strlen(cmdline) + 1; + bi = bi_add(BI_COMMAND_LINE, size); + memcpy(bi->string, cmdline, size); +} + +void bootinfo_set_ramdisk(unsigned long ramdisk_addr, + unsigned long ramdisk_size) +{ + struct bi_rec *bi; + + /* Remove existing ramdisk records */ + bi_remove(BI_RAMDISK); + + if (!ramdisk_size) + return; + + /* Add new ramdisk record */ + bi = bi_add(BI_RAMDISK, sizeof(bi->mem_info)); + bi->mem_info.addr = ramdisk_addr; + bi->mem_info.size = ramdisk_size; +} + + + /* + * Check the bootinfo version in the kernel image + * All failures are non-fatal, as kexec may be used to load + * non-Linux images + */ + +void bootinfo_check_bootversion(const struct kexec_info *info) +{ + struct bi_rec *bi; + const struct bootversion *bv; + uint16_t major, minor; + unsigned int i; + + bv = info->segment[0].buf; + if (bv->magic != BOOTINFOV_MAGIC) { + printf("WARNING: No bootversion in kernel image\n"); + return; + } + + bi = bi_find(NULL, BI_MACHTYPE); + if (!bi) { + printf("WARNING: No machtype in bootinfo\n"); + return; + } + + for (i = 0; bv->machversions[i].machtype != bi->machtype; i++) + if (!bv->machversions[i].machtype) { + printf("WARNING: Machtype 0x%08x not in kernel bootversion\n", + bi->machtype); + return; + } + + major = BI_VERSION_MAJOR(bv->machversions[i].version); + minor = BI_VERSION_MINOR(bv->machversions[i].version); + dbgprintf("Kernel uses bootversion %u.%u\n", major, minor); + if (major != SUPPORTED_BOOTINFO_VERSION) + printf("WARNING: Kernel bootversion %u.%u is too %s for this kexec (expected %u.x)\n", + major, minor, + major < SUPPORTED_BOOTINFO_VERSION ? "old" : "new", + SUPPORTED_BOOTINFO_VERSION); +} + +void add_bootinfo(struct kexec_info *info, unsigned long addr) +{ + add_buffer(info, bootinfo, bootinfo_size, bootinfo_size, + sizeof(void *), addr, 0x0fffffff, 1); +} diff --git a/kexec/arch/m68k/bootinfo.h b/kexec/arch/m68k/bootinfo.h new file mode 100644 index 0000000..b6f453d --- /dev/null +++ b/kexec/arch/m68k/bootinfo.h @@ -0,0 +1,43 @@ +#include <asm/bootinfo.h> + +#define DEFAULT_BOOTINFO_FILE "/proc/bootinfo" +#define MAX_BOOTINFO_SIZE 1536 + + + /* + * Convenience overlay of several struct bi_record variants + */ + +struct bi_rec { + __be16 tag; + __be16 size; + union { + __be32 data[0]; + /* shorthands for the types we use */ + __be32 machtype; + struct { + __be32 addr; + __be32 size; + } mem_info; + char string[0]; + }; +}; + + + /* + * We only support the "new" tagged bootinfo (v2) + */ + +#define SUPPORTED_BOOTINFO_VERSION 2 + + +extern const char *bootinfo_file; + +extern void bootinfo_load(void); +extern void bootinfo_print(void); +extern int bootinfo_get_memory_ranges(struct memory_range **range); +extern void bootinfo_set_cmdline(const char *cmdline); +extern void bootinfo_set_ramdisk(unsigned long ramdisk_addr, + unsigned long ramdisk_size); +extern void bootinfo_check_bootversion(const struct kexec_info *info); +extern void add_bootinfo(struct kexec_info *info, unsigned long addr); diff --git a/kexec/arch/m68k/include/arch/options.h b/kexec/arch/m68k/include/arch/options.h new file mode 100644 index 0000000..f279d54 --- /dev/null +++ b/kexec/arch/m68k/include/arch/options.h @@ -0,0 +1,45 @@ +#ifndef KEXEC_ARCH_M68K_OPTIONS_H +#define KEXEC_ARCH_M68K_OPTIONS_H + +#define OPT_ARCH_MAX (OPT_MAX+0) + +/* All 'local' loader options: */ +#define OPT_APPEND (OPT_ARCH_MAX+0) +#define OPT_REUSE_CMDLINE (OPT_ARCH_MAX+1) +#define OPT_RAMDISK (OPT_ARCH_MAX+2) +#define OPT_BOOTINFO (OPT_ARCH_MAX+3) + +/* Options relevant to the architecture (excluding loader-specific ones), + * in this case none: + */ +#define KEXEC_ARCH_OPTIONS \ + KEXEC_OPTIONS \ + +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR "" + +/* The following two #defines list ALL of the options added by all of the + * architecture's loaders. + * o main() uses this complete list to scan for its options, ignoring + * arch-specific/loader-specific ones. + * o Then, arch_process_options() uses this complete list to scan for its + * options, ignoring general/loader-specific ones. + * o Then, the file_type[n].load re-scans for options, using + * KEXEC_ARCH_OPTIONS plus its loader-specific options subset. + * Any unrecognised options cause an error here. + * + * This is done so that main()'s/arch_process_options()'s getopt_long() calls + * don't choose a kernel filename from random arguments to options they don't + * recognise -- as they now recognise (if not act upon) all possible options. + */ +#define KEXEC_ALL_OPTIONS \ + KEXEC_ARCH_OPTIONS \ + { "command-line", 1, NULL, OPT_APPEND }, \ + { "append", 1, NULL, OPT_APPEND }, \ + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE }, \ + { "ramdisk", 1, NULL, OPT_RAMDISK }, \ + { "initrd", 1, NULL, OPT_RAMDISK }, \ + { "bootinfo", 1, NULL, OPT_BOOTINFO }, + +#define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR + +#endif /* KEXEC_ARCH_M68K_OPTIONS_H */ diff --git a/kexec/arch/m68k/kexec-elf-m68k.c b/kexec/arch/m68k/kexec-elf-m68k.c new file mode 100644 index 0000000..8d00eb9 --- /dev/null +++ b/kexec/arch/m68k/kexec-elf-m68k.c @@ -0,0 +1,182 @@ +/* + * kexec-elf-m68k.c - kexec Elf loader for m68k + * + * Copyright (C) 2013 Geert Uytterhoeven + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. +*/ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <elf.h> +#include <boot/elf_boot.h> +#include <ip_checksum.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "kexec-m68k.h" +#include "bootinfo.h" +#include <arch/options.h> + +#define KiB * 1024 +#define MiB * 1024 KiB + +#define PAGE_SIZE 4 KiB + + +int elf_m68k_probe(const char *buf, off_t len) +{ + struct mem_ehdr ehdr; + int result; + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) + goto out; + + /* Verify the architecuture specific bits */ + if (ehdr.e_machine != EM_68K) { + /* for a different architecture */ + fprintf(stderr, "Not for this architecture.\n"); + result = -1; + goto out; + } + result = 0; + out: + free_elf_info(&ehdr); + return result; +} + +void elf_m68k_usage(void) +{ + printf(" --command-line=STRING Set the kernel command line to STRING\n" + " --append=STRING Set the kernel command line to STRING\n" + " --reuse-cmdline Use kernel command line from running system.\n" + " --ramdisk=FILE Use FILE as the kernel's initial ramdisk.\n" + " --initrd=FILE Use FILE as the kernel's initial ramdisk.\n" + " --bootinfo=FILE Use FILE as the kernel's bootinfo\n" + ); +} + +static unsigned long segment_end(const struct kexec_info *info, int i) +{ + return (unsigned long)info->segment[i].mem + info->segment[i].memsz - 1; +} + +int elf_m68k_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info) +{ + struct mem_ehdr ehdr; + const char *cmdline = NULL, *ramdisk_file = NULL; + int opt, result, i; + unsigned long bootinfo_addr, ramdisk_addr = 0; + off_t ramdisk_size = 0; + + /* See options.h if adding any more options. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, NULL, OPT_APPEND }, + { "append", 1, NULL, OPT_APPEND }, + { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE }, + { "ramdisk", 1, NULL, OPT_RAMDISK }, + { "initrd", 1, NULL, OPT_RAMDISK }, + { "bootinfo", 1, NULL, OPT_BOOTINFO }, + { 0, 0, NULL, 0 }, + }; + + static const char short_options[] = KEXEC_ARCH_OPT_STR "d"; + + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != + -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) + break; + case OPT_APPEND: + cmdline = optarg; + break; + case OPT_REUSE_CMDLINE: + cmdline = get_command_line(); + break; + case OPT_RAMDISK: + ramdisk_file = optarg; + break; + case OPT_BOOTINFO: + break; + } + } + + result = build_elf_exec_info(buf, len, &ehdr, 0); + if (result < 0) + die("ELF exec parse failed\n"); + + /* Fixup PT_LOAD segments that include the ELF header (offset zero) */ + for (i = 0; i < ehdr.e_phnum; i++) { + struct mem_phdr *phdr; + phdr = &ehdr.e_phdr[i]; + if (phdr->p_type != PT_LOAD || phdr->p_offset) + continue; + + dbgprintf("Removing ELF header from segment %d\n", i); + phdr->p_paddr += PAGE_SIZE; + phdr->p_vaddr += PAGE_SIZE; + phdr->p_filesz -= PAGE_SIZE; + phdr->p_memsz -= PAGE_SIZE; + phdr->p_offset += PAGE_SIZE; + phdr->p_data += PAGE_SIZE; + } + + /* Load the ELF data */ + result = elf_exec_load(&ehdr, info); + if (result < 0) + die("ELF exec load failed\n"); + + info->entry = (void *)virt_to_phys(ehdr.e_entry); + + /* Bootinfo must be stored right after the kernel */ + bootinfo_addr = segment_end(info, info->nr_segments - 1) + 1; + + /* Load ramdisk */ + if (ramdisk_file) { + void *ramdisk = slurp_decompress_file(ramdisk_file, + &ramdisk_size); + /* Store ramdisk at top of first memory chunk */ + ramdisk_addr = _ALIGN_DOWN(info->memory_range[0].end - + ramdisk_size + 1, + PAGE_SIZE); + if (!buf) + die("Ramdisk load failed\n"); + add_buffer(info, ramdisk, ramdisk_size, ramdisk_size, + PAGE_SIZE, ramdisk_addr, info->memory_range[0].end, + 1); + } + + /* Update and add bootinfo */ + bootinfo_set_cmdline(cmdline); + bootinfo_set_ramdisk(ramdisk_addr, ramdisk_size); + if (kexec_debug) + bootinfo_print(); + add_bootinfo(info, bootinfo_addr); + + /* + * Check if the kernel (and bootinfo) exceed 4 MiB, as current kernels + * don't support that. + * As the segments are still unsorted, the bootinfo is located in the + * last segment. + */ + if (segment_end(info, info->nr_segments - 1) >= virt_to_phys(4 MiB - 1)) + printf("WARNING: Kernel is larger than 4 MiB\n"); + + /* Check struct bootversion at start of kernel */ + bootinfo_check_bootversion(info); + + return 0; +} diff --git a/kexec/arch/m68k/kexec-elf-rel-m68k.c b/kexec/arch/m68k/kexec-elf-rel-m68k.c new file mode 100644 index 0000000..fa12a16 --- /dev/null +++ b/kexec/arch/m68k/kexec-elf-rel-m68k.c @@ -0,0 +1,37 @@ +/* + * kexec-elf-rel-m68k.c - kexec Elf relocation routines + * + * Copyright (C) 2013 Geert Uytterhoeven + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. +*/ + +#include <stdio.h> +#include <elf.h> +#include "../../kexec.h" +#include "../../kexec-elf.h" + +int machine_verify_elf_rel(struct mem_ehdr *ehdr) +{ + if (ehdr->ei_data != ELFDATA2MSB) + return 0; + if (ehdr->ei_class != ELFCLASS32) + return 0; + if (ehdr->e_machine != EM_68K) + return 0; + return 1; +} + +void machine_apply_elf_rel(struct mem_ehdr *UNUSED(ehdr), unsigned long r_type, + void *UNUSED(location), + unsigned long UNUSED(address), + unsigned long UNUSED(value)) +{ + switch (r_type) { + default: + die("Unknown rela relocation: %lu\n", r_type); + break; + } + return; +} diff --git a/kexec/arch/m68k/kexec-m68k.c b/kexec/arch/m68k/kexec-m68k.c new file mode 100644 index 0000000..372aa37 --- /dev/null +++ b/kexec/arch/m68k/kexec-m68k.c @@ -0,0 +1,104 @@ +/* + * kexec-m68k.c - kexec for m68k + * + * Copyright (C) 2013 Geert Uytterhoeven + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <getopt.h> +#include "../../kexec.h" +#include "../../kexec-syscall.h" +#include "kexec-m68k.h" +#include "bootinfo.h" +#include <arch/options.h> + + +static unsigned long m68k_memoffset; + + +/* Return a sorted list of memory ranges. */ +int get_memory_ranges(struct memory_range **range, int *ranges, + unsigned long kexec_flags) +{ + bootinfo_load(); + *ranges = bootinfo_get_memory_ranges(range); + m68k_memoffset = (*range)[0].start; + return 0; +} + + +struct file_type file_type[] = { + {"elf-m68k", elf_m68k_probe, elf_m68k_load, elf_m68k_usage}, +}; +int file_types = sizeof(file_type) / sizeof(file_type[0]); + +void arch_usage(void) +{ +} + +int arch_process_options(int argc, char **argv) +{ + static const struct option options[] = { + KEXEC_ALL_OPTIONS + { "bootinfo", 1, NULL, OPT_BOOTINFO }, + { 0, 0, NULL, 0 }, + }; + static const char short_options[] = KEXEC_ALL_OPT_STR; + int opt; + + opterr = 0; /* Don't complain about unrecognized options here */ + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != + -1) { + switch (opt) { + default: + break; + case OPT_BOOTINFO: + bootinfo_file = optarg; + break; + } + } + /* Reset getopt for the next pass; called in other source modules */ + opterr = 1; + optind = 1; + return 0; +} + +const struct arch_map_entry arches[] = { + { "m68k", KEXEC_ARCH_68K }, + { NULL, 0 }, +}; + +int arch_compat_trampoline(struct kexec_info *UNUSED(info)) +{ + return 0; +} + +void arch_update_purgatory(struct kexec_info *UNUSED(info)) +{ +} + +int is_crashkernel_mem_reserved(void) +{ + return 0; +} + +unsigned long virt_to_phys(unsigned long addr) +{ + return addr + m68k_memoffset; +} + +/* + * add_segment() should convert base to a physical address on m68k, + * while the default is just to work with base as is */ +void add_segment(struct kexec_info *info, const void *buf, size_t bufsz, + unsigned long base, size_t memsz) +{ + add_segment_phys_virt(info, buf, bufsz, base, memsz, 1); +} diff --git a/kexec/arch/m68k/kexec-m68k.h b/kexec/arch/m68k/kexec-m68k.h new file mode 100644 index 0000000..99482c4 --- /dev/null +++ b/kexec/arch/m68k/kexec-m68k.h @@ -0,0 +1,9 @@ +#ifndef KEXEC_M68K_H +#define KEXEC_M68K_H + +int elf_m68k_probe(const char *buf, off_t len); +int elf_m68k_load(int argc, char **argv, const char *buf, off_t len, + struct kexec_info *info); +void elf_m68k_usage(void); + +#endif /* KEXEC_M68K_H */ diff --git a/kexec/arch/ppc/crashdump-powerpc.c b/kexec/arch/ppc/crashdump-powerpc.c index c06d310..3dc35eb 100644 --- a/kexec/arch/ppc/crashdump-powerpc.c +++ b/kexec/arch/ppc/crashdump-powerpc.c @@ -41,6 +41,7 @@ lowmem_limit: MAXMEM, * A separate program header is created for backup region */ static struct memory_range *crash_memory_range; +static int crash_nr_memory_ranges; /* Define a variable to replace the CRASH_MAX_MEMORY_RANGES macro */ static int crash_max_memory_ranges; @@ -51,6 +52,29 @@ static int crash_max_memory_ranges; */ mem_rgns_t usablemem_rgns = {0, NULL}; +/* Append a segment to crash_memory_range, splitting it into two if + * it contains both lowmem and highmem */ +static void add_crash_memory_range(unsigned long long start, + unsigned long long end) +{ +#ifndef CONFIG_PPC64 + if (start < elf_info32.lowmem_limit && end > elf_info32.lowmem_limit) { + add_crash_memory_range(start, elf_info32.lowmem_limit); + add_crash_memory_range(elf_info32.lowmem_limit, end); + return; + } +#endif + + if (crash_nr_memory_ranges < crash_max_memory_ranges) { + crash_memory_range[crash_nr_memory_ranges].start = start; + crash_memory_range[crash_nr_memory_ranges].end = end; + crash_memory_range[crash_nr_memory_ranges].type = RANGE_RAM; + } + + crash_nr_memory_ranges++; +} + + /* Reads the appropriate file and retrieves the SYSTEM RAM regions for whom to * create Elf headers. Keeping it separate from get_memory_ranges() as * requirements are different in the case of normal kexec and crashdumps. @@ -65,13 +89,12 @@ mem_rgns_t usablemem_rgns = {0, NULL}; static int get_crash_memory_ranges(struct memory_range **range, int *ranges) { - int memory_ranges = 0; char device_tree[256] = "/proc/device-tree/"; char fname[256]; DIR *dir, *dmem; int fd; struct dirent *dentry, *mentry; - int i, n, crash_rng_len = 0; + int n, crash_rng_len = 0; unsigned long long start, end, cstart, cend; crash_max_memory_ranges = max_memory_ranges + 6; @@ -83,13 +106,11 @@ static int get_crash_memory_ranges(struct memory_range **range, int *ranges) return -1; } memset(crash_memory_range, 0, crash_rng_len); + crash_nr_memory_ranges = 0; #ifndef CONFIG_BOOKE /* create a separate program header for the backup region */ - crash_memory_range[0].start = BACKUP_SRC_START; - crash_memory_range[0].end = BACKUP_SRC_END + 1; - crash_memory_range[0].type = RANGE_RAM; - memory_ranges++; + add_crash_memory_range(BACKUP_SRC_START, BACKUP_SRC_END + 1); #endif dir = opendir(device_tree); @@ -128,70 +149,37 @@ static int get_crash_memory_ranges(struct memory_range **range, int *ranges) closedir(dir); goto err; } - if (memory_ranges >= (max_memory_ranges + 1)) { - /* No space to insert another element. */ - fprintf(stderr, - "Error: Number of crash memory ranges" - " excedeed the max limit\n"); - goto err; - } #ifndef CONFIG_BOOKE if (start == 0 && end >= (BACKUP_SRC_END + 1)) start = BACKUP_SRC_END + 1; #endif - cstart = crash_base; - cend = crash_base + crash_size; /* * Exclude the region that lies within crashkernel. * If memory limit is set then exclude memory region * above it. */ + if (memory_limit) { if (start >= memory_limit) continue; if (end > memory_limit) end = memory_limit; } - if (cstart < end && cend > start) { - if (start < cstart && end > cend) { - crash_memory_range[memory_ranges].start - = start; - crash_memory_range[memory_ranges].end - = cstart; - crash_memory_range[memory_ranges].type - = RANGE_RAM; - memory_ranges++; - crash_memory_range[memory_ranges].start - = cend; - crash_memory_range[memory_ranges].end - = end; - crash_memory_range[memory_ranges].type - = RANGE_RAM; - memory_ranges++; - } else if (start < cstart) { - crash_memory_range[memory_ranges].start - = start; - crash_memory_range[memory_ranges].end - = cstart; - crash_memory_range[memory_ranges].type - = RANGE_RAM; - memory_ranges++; - } else if (end > cend) { - crash_memory_range[memory_ranges].start - = cend; - crash_memory_range[memory_ranges].end - = end; - crash_memory_range[memory_ranges].type - = RANGE_RAM; - memory_ranges++; - } - } else { - crash_memory_range[memory_ranges].start = start; - crash_memory_range[memory_ranges].end = end; - crash_memory_range[memory_ranges].type - = RANGE_RAM; - memory_ranges++; + + /* + * Exclure region used by crash kernel + */ + cstart = crash_base; + cend = crash_base + crash_size; + + if (cstart >= end || cend <= start) + add_crash_memory_range(start, end); + else { + if (start < cstart) + add_crash_memory_range(start, cstart); + if (cend < end) + add_crash_memory_range(cend, end); } } closedir(dmem); @@ -210,12 +198,18 @@ static int get_crash_memory_ranges(struct memory_range **range, int *ranges) cstart = crash_base; if (cend > crash_base + crash_size) cend = crash_base + crash_size; - crash_memory_range[memory_ranges].start = cstart; - crash_memory_range[memory_ranges++].end = cend; + add_crash_memory_range(cstart, cend); + } + + if (crash_nr_memory_ranges >= crash_max_memory_ranges) { + fprintf(stderr, + "Error: Number of crash memory ranges" + " excedeed the max limit\n"); + goto err; } *range = crash_memory_range; - *ranges = memory_ranges; + *ranges = crash_nr_memory_ranges; int j; dbgprintf("CRASH MEMORY RANGES\n"); diff --git a/kexec/arch/ppc/crashdump-powerpc.h b/kexec/arch/ppc/crashdump-powerpc.h index efdc7e3..9b9b01e 100644 --- a/kexec/arch/ppc/crashdump-powerpc.h +++ b/kexec/arch/ppc/crashdump-powerpc.h @@ -40,6 +40,8 @@ extern unsigned long long crash_base; extern unsigned long long crash_size; extern unsigned int rtas_base; extern unsigned int rtas_size; +extern uint64_t opal_base; +extern uint64_t opal_size; extern uint64_t memory_limit; #endif /* CRASHDUMP_POWERPC_H */ diff --git a/kexec/arch/ppc64/Makefile b/kexec/arch/ppc64/Makefile index 7ed0aa9..9a6e475 100644 --- a/kexec/arch/ppc64/Makefile +++ b/kexec/arch/ppc64/Makefile @@ -3,13 +3,16 @@ # ppc64_KEXEC_SRCS = kexec/arch/ppc64/kexec-elf-rel-ppc64.c ppc64_KEXEC_SRCS += kexec/arch/ppc64/kexec-zImage-ppc64.c -ppc64_KEXEC_SRCS += kexec/arch/ppc64/fs2dt.c ppc64_KEXEC_SRCS += kexec/arch/ppc64/kexec-elf-ppc64.c ppc64_KEXEC_SRCS += kexec/arch/ppc64/kexec-ppc64.c ppc64_KEXEC_SRCS += kexec/arch/ppc64/crashdump-ppc64.c ppc64_ARCH_REUSE_INITRD = +ppc64_FS2DT = kexec/fs2dt.c +ppc64_FS2DT_INCLUDE = -include $(srcdir)/kexec/arch/ppc64/crashdump-ppc64.h \ + -include $(srcdir)/kexec/arch/ppc64/kexec-ppc64.h + dist += kexec/arch/ppc64/Makefile $(ppc64_KEXEC_SRCS) \ kexec/arch/ppc64/kexec-ppc64.h kexec/arch/ppc64/crashdump-ppc64.h \ kexec/arch/ppc64/include/arch/options.h diff --git a/kexec/arch/ppc64/crashdump-ppc64.c b/kexec/arch/ppc64/crashdump-ppc64.c index e31dd6d..6214b83 100644 --- a/kexec/arch/ppc64/crashdump-ppc64.c +++ b/kexec/arch/ppc64/crashdump-ppc64.c @@ -38,7 +38,11 @@ static struct crash_elf_info elf_info64 = { class: ELFCLASS64, +#if BYTE_ORDER == LITTLE_ENDIAN + data: ELFDATA2LSB, +#else data: ELFDATA2MSB, +#endif machine: EM_PPC64, page_offset: PAGE_OFFSET, lowmem_limit: MAXMEM, @@ -146,12 +150,12 @@ static int get_dyn_reconf_crash_memory_ranges(void) return -1; } - start = ((uint64_t *)buf)[DRCONF_ADDR]; + start = be64_to_cpu(((uint64_t *)buf)[DRCONF_ADDR]); end = start + lmb_size; if (start == 0 && end >= (BACKUP_SRC_END + 1)) start = BACKUP_SRC_END + 1; - flags = (*((uint32_t *)&buf[DRCONF_FLAGS])); + flags = be32_to_cpu((*((uint32_t *)&buf[DRCONF_FLAGS]))); /* skip this block if the reserved bit is set in flags (0x80) or if the block is not assigned to this partition (0x8) */ if ((flags & 0x80) || !(flags & 0x8)) @@ -252,8 +256,9 @@ static int get_crash_memory_ranges(struct memory_range **range, int *ranges) goto err; } - start = ((unsigned long long *)buf)[0]; - end = start + ((unsigned long long *)buf)[1]; + start = be64_to_cpu(((unsigned long long *)buf)[0]); + end = start + + be64_to_cpu(((unsigned long long *)buf)[1]); if (start == 0 && end >= (BACKUP_SRC_END + 1)) start = BACKUP_SRC_END + 1; @@ -293,6 +298,34 @@ static int get_crash_memory_ranges(struct memory_range **range, int *ranges) crash_memory_range[memory_ranges++].end = cend; } + /* + * If OPAL region is overlapped with crashkernel, need to create ELF + * Program header for the overlapped memory. + */ + if (crash_base < opal_base + opal_size && + opal_base < crash_base + crash_size) { + page_size = getpagesize(); + cstart = opal_base; + cend = opal_base + opal_size; + if (cstart < crash_base) + cstart = crash_base; + if (cend > crash_base + crash_size) + cend = crash_base + crash_size; + /* + * The opal section created here is formed by reading opal-base + * and opal-size from /proc/device-tree/ibm,opal. Unfortunately + * opal-size is not required to be a multiple of PAGE_SIZE + * The remainder of the page it ends on is just garbage, and is + * safe to read, its just not accounted in opal-size. Since + * we're creating an elf section here though, lets round it up + * to the next page size boundary though, so makedumpfile can + * read it safely without going south on us. + */ + cend = _ALIGN(cend, page_size); + + crash_memory_range[memory_ranges].start = cstart; + crash_memory_range[memory_ranges++].end = cend; + } *range = crash_memory_range; *ranges = memory_ranges; diff --git a/kexec/arch/ppc64/crashdump-ppc64.h b/kexec/arch/ppc64/crashdump-ppc64.h index e9d28c9..d654c6b 100644 --- a/kexec/arch/ppc64/crashdump-ppc64.h +++ b/kexec/arch/ppc64/crashdump-ppc64.h @@ -1,6 +1,9 @@ #ifndef CRASHDUMP_PPC64_H #define CRASHDUMP_PPC64_H +#include <stdint.h> +#include <sys/types.h> + struct kexec_info; int load_crashdump_segments(struct kexec_info *info, char *mod_cmdline, uint64_t max_addr, unsigned long min_base); @@ -28,6 +31,8 @@ extern uint64_t crash_size; extern uint64_t memory_limit; extern unsigned int rtas_base; extern unsigned int rtas_size; +extern uint64_t opal_base; +extern uint64_t opal_size; uint64_t lmb_size; unsigned int num_of_lmbs; diff --git a/kexec/arch/ppc64/fs2dt.c b/kexec/arch/ppc64/fs2dt.c deleted file mode 100644 index 924c7ff..0000000 --- a/kexec/arch/ppc64/fs2dt.c +++ /dev/null @@ -1,738 +0,0 @@ -/* - * fs2dt: creates a flattened device-tree - * - * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation - * Copyright (C) 2005 R Sharada (sharada@in.ibm.com), IBM Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation (version 2 of the License). - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define _GNU_SOURCE -#include <sys/types.h> -#include <sys/stat.h> - -#include <fcntl.h> -#include <dirent.h> -#include <unistd.h> -#include <stdlib.h> -#include <string.h> -#include <errno.h> -#include <stdio.h> -#include "../../kexec.h" -#include "kexec-ppc64.h" -#include "crashdump-ppc64.h" - -#define MAXPATH 1024 /* max path name length */ -#define NAMESPACE 16384 /* max bytes for property names */ -#define INIT_TREE_WORDS 65536 /* Initial num words for prop values */ -#define MEMRESERVE 256 /* max number of reserved memory blocks */ -#define MEM_RANGE_CHUNK_SZ 2048 /* Initial num dwords for mem ranges */ - -static char pathname[MAXPATH], *pathstart; -static char propnames[NAMESPACE] = { 0 }; -static unsigned *dt_base, *dt; -static unsigned int dt_cur_size; -static unsigned long long mem_rsrv[2*MEMRESERVE] = { 0, 0 }; - -static int crash_param = 0; -static char local_cmdline[COMMAND_LINE_SIZE] = { "" }; -extern mem_rgns_t usablemem_rgns; -static struct bootblock bb[1]; -extern int my_debug; - -/* Before we add something to the dt, reserve N words using this. - * If there isn't enough room, it's realloced -- and you don't overflow and - * splat bits of your heap. - */ -void dt_reserve(unsigned **dt_ptr, unsigned words) -{ - unsigned int sz = INIT_TREE_WORDS; - - if (sz < words) - sz = words; - - if (((*dt_ptr - dt_base) + words) >= dt_cur_size) { - int offset; - unsigned int new_size = dt_cur_size + sz; - unsigned *new_dt = realloc(dt_base, new_size*4); - - if (!new_dt) - die("unrecoverable error: Can't realloc %d bytes for " - "device tree\n", new_size*4); - offset = *dt_ptr - dt_base; - dt_base = new_dt; - dt_cur_size = new_size; - *dt_ptr = dt_base + offset; - memset(*dt_ptr, 0, (new_size - offset)*4); - } -} - -void reserve(unsigned long long where, unsigned long long length) -{ - size_t offset; - - for (offset = 0; mem_rsrv[offset + 1]; offset += 2) - ; - - if (offset + 4 >= 2 * MEMRESERVE) - die("unrecoverable error: exhasuted reservation meta data\n"); - - mem_rsrv[offset] = where; - mem_rsrv[offset + 1] = length; - mem_rsrv[offset + 3] = 0; /* N.B: don't care about offset + 2 */ -} - -/* look for properties we need to reserve memory space for */ -static void checkprop(char *name, unsigned *data, int len) -{ - static unsigned long long base, size, end; - - if ((data == NULL) && (base || size || end)) - die("unrecoverable error: no property data"); - else if (!strcmp(name, "linux,rtas-base")) - base = *data; - else if (!strcmp(name, "linux,tce-base")) - base = *(unsigned long long *) data; - else if (!strcmp(name, "rtas-size") || - !strcmp(name, "linux,tce-size")) - size = *data; - else if (reuse_initrd && !strcmp(name, "linux,initrd-start")) - if (len == 8) - base = *(unsigned long long *) data; - else - base = *data; - else if (reuse_initrd && !strcmp(name, "linux,initrd-end")) - end = *(unsigned long long *) data; - - if (size && end) - die("unrecoverable error: size and end set at same time\n"); - if (base && size) { - reserve(base, size); - base = size = 0; - } - if (base && end) { - reserve(base, end-base); - base = end = 0; - } -} - -/* - * return the property index for a property name, creating a new one - * if needed. - */ -static unsigned propnum(const char *name) -{ - unsigned offset = 0; - - while(propnames[offset]) - if (strcmp(name, propnames+offset)) - offset += strlen(propnames+offset)+1; - else - return offset; - - if (NAMESPACE - offset < strlen(name) + 1) - die("unrecoverable error: propnames overrun\n"); - - strcpy(propnames+offset, name); - - return offset; -} - -static void add_dyn_reconf_usable_mem_property(int fd) -{ - char fname[MAXPATH], *bname; - uint64_t buf[32]; - uint64_t *ranges; - int ranges_size = MEM_RANGE_CHUNK_SZ; - uint64_t base, end, loc_base, loc_end; - size_t i, rngs_cnt, range; - int rlen = 0; - int tmp_indx; - - strcpy(fname, pathname); - bname = strrchr(fname, '/'); - bname[0] = '\0'; - bname = strrchr(fname, '/'); - if (strncmp(bname, "/ibm,dynamic-reconfiguration-memory", 36)) - return; - - if (lseek(fd, 4, SEEK_SET) < 0) - die("unrecoverable error: error seeking in \"%s\": %s\n", - pathname, strerror(errno)); - - ranges = malloc(ranges_size*8); - if (!ranges) - die("unrecoverable error: can't alloc %d bytes for ranges.\n", - ranges_size*8); - - rlen = 0; - for (i = 0; i < num_of_lmbs; i++) { - if (read(fd, buf, 24) < 0) - die("unrecoverable error: error reading \"%s\": %s\n", - pathname, strerror(errno)); - - base = (uint64_t) buf[0]; - end = base + lmb_size; - if (~0ULL - base < end) - die("unrecoverable error: mem property overflow\n"); - - tmp_indx = rlen++; - - rngs_cnt = 0; - for (range = 0; range < usablemem_rgns.size; range++) { - int add = 0; - loc_base = usablemem_rgns.ranges[range].start; - loc_end = usablemem_rgns.ranges[range].end; - if (loc_base >= base && loc_end <= end) { - add = 1; - } else if (base < loc_end && end > loc_base) { - if (loc_base < base) - loc_base = base; - if (loc_end > end) - loc_end = end; - add = 1; - } - - if (add) { - if (rlen >= (ranges_size-2)) { - ranges_size += MEM_RANGE_CHUNK_SZ; - ranges = realloc(ranges, ranges_size*8); - if (!ranges) - die("unrecoverable error: can't" - " realloc %d bytes for" - " ranges.\n", - ranges_size*8); - } - ranges[rlen++] = loc_base; - ranges[rlen++] = loc_end - loc_base; - rngs_cnt++; - } - } - if (rngs_cnt == 0) { - /* We still need to add a counter for every LMB because - * the kernel parsing code is dumb. We just have - * a zero in this case, with no following base/len. - */ - ranges[tmp_indx] = 0; - /* rlen is already just tmp_indx+1 as we didn't write - * anything. Check array size here, as we'll probably - * go on for a while writing zeros now. - */ - if (rlen >= (ranges_size-1)) { - ranges_size += MEM_RANGE_CHUNK_SZ; - ranges = realloc(ranges, ranges_size*8); - if (!ranges) - die("unrecoverable error: can't" - " realloc %d bytes for" - " ranges.\n", - ranges_size*8); - } - } else { - /* Store the count of (base, size) duple */ - ranges[tmp_indx] = rngs_cnt; - } - } - - rlen = rlen * sizeof(uint64_t); - /* - * Add linux,drconf-usable-memory property. - */ - dt_reserve(&dt, 4+((rlen + 3)/4)); - *dt++ = 3; - *dt++ = rlen; - *dt++ = propnum("linux,drconf-usable-memory"); - if ((rlen >= 8) && ((unsigned long)dt & 0x4)) - dt++; - memcpy(dt, ranges, rlen); - free(ranges); - dt += (rlen + 3)/4; -} - -static void add_usable_mem_property(int fd, size_t len) -{ - char fname[MAXPATH], *bname; - uint64_t buf[2]; - uint64_t *ranges; - int ranges_size = MEM_RANGE_CHUNK_SZ; - uint64_t base, end, loc_base, loc_end; - size_t range; - int rlen = 0; - ssize_t slen; - - strcpy(fname, pathname); - bname = strrchr(fname,'/'); - bname[0] = '\0'; - bname = strrchr(fname,'/'); - if (strncmp(bname, "/memory@", 8)) - return; - - if (len < 2 * sizeof(uint64_t)) - die("unrecoverable error: not enough data for mem property\n"); - slen = 2 * sizeof(uint64_t); - - if (lseek(fd, 0, SEEK_SET) < 0) - die("unrecoverable error: error seeking in \"%s\": %s\n", - pathname, strerror(errno)); - if (read(fd, buf, slen) != slen) - die("unrecoverable error: error reading \"%s\": %s\n", - pathname, strerror(errno)); - - if (~0ULL - buf[0] < buf[1]) - die("unrecoverable error: mem property overflow\n"); - base = buf[0]; - end = base + buf[1]; - - ranges = malloc(ranges_size*8); - if (!ranges) - die("unrecoverable error: can't alloc %d bytes for ranges.\n", - ranges_size*8); - - for (range = 0; range < usablemem_rgns.size; range++) { - int add = 0; - loc_base = usablemem_rgns.ranges[range].start; - loc_end = usablemem_rgns.ranges[range].end; - if (loc_base >= base && loc_end <= end) { - add = 1; - } else if (base < loc_end && end > loc_base) { - if (loc_base < base) - loc_base = base; - if (loc_end > end) - loc_end = end; - add = 1; - } - if (add) { - if (rlen >= (ranges_size-2)) { - ranges_size += MEM_RANGE_CHUNK_SZ; - ranges = realloc(ranges, ranges_size*8); - if (!ranges) - die("unrecoverable error: can't realloc" - "%d bytes for ranges.\n", - ranges_size*8); - } - ranges[rlen++] = loc_base; - ranges[rlen++] = loc_end - loc_base; - } - } - - if (!rlen) { - /* - * User did not pass any ranges for thsi region. Hence, write - * (0,0) duple in linux,usable-memory property such that - * this region will be ignored. - */ - ranges[rlen++] = 0; - ranges[rlen++] = 0; - } - - rlen = rlen * sizeof(uint64_t); - /* - * No add linux,usable-memory property. - */ - dt_reserve(&dt, 4+((rlen + 3)/4)); - *dt++ = 3; - *dt++ = rlen; - *dt++ = propnum("linux,usable-memory"); - if ((rlen >= 8) && ((unsigned long)dt & 0x4)) - dt++; - memcpy(dt, ranges, rlen); - free(ranges); - dt += (rlen + 3)/4; -} - -/* put all properties (files) in the property structure */ -static void putprops(char *fn, struct dirent **nlist, int numlist) -{ - struct dirent *dp; - int i = 0, fd; - size_t len; - ssize_t slen; - struct stat statbuf; - - for (i = 0; i < numlist; i++) { - dp = nlist[i]; - strcpy(fn, dp->d_name); - - if (!strcmp(dp->d_name, ".") || !strcmp(dp->d_name, "..")) - continue; - - if (lstat(pathname, &statbuf)) - die("unrecoverable error: could not stat \"%s\": %s\n", - pathname, strerror(errno)); - - if (!crash_param && !strcmp(fn,"linux,crashkernel-base")) - continue; - - if (!crash_param && !strcmp(fn,"linux,crashkernel-size")) - continue; - - /* - * This property will be created for each node during kexec - * boot. So, ignore it. - */ - if (!strcmp(dp->d_name, "linux,pci-domain") || - !strcmp(dp->d_name, "linux,htab-base") || - !strcmp(dp->d_name, "linux,htab-size") || - !strcmp(dp->d_name, "linux,kernel-end")) - continue; - - /* This property will be created/modified later in putnode() - * So ignore it, unless we are reusing the initrd. - */ - if ((!strcmp(dp->d_name, "linux,initrd-start") || - !strcmp(dp->d_name, "linux,initrd-end")) && - !reuse_initrd) - continue; - - /* This property will be created later in putnode() So - * ignore it now. - */ - if (!strcmp(dp->d_name, "bootargs")) - continue; - - if (! S_ISREG(statbuf.st_mode)) - continue; - - len = statbuf.st_size; - - dt_reserve(&dt, 4+((len + 3)/4)); - *dt++ = 3; - *dt++ = len; - *dt++ = propnum(fn); - - if ((len >= 8) && ((unsigned long)dt & 0x4)) - dt++; - - fd = open(pathname, O_RDONLY); - if (fd == -1) - die("unrecoverable error: could not open \"%s\": %s\n", - pathname, strerror(errno)); - - slen = read(fd, dt, len); - if (slen < 0) - die("unrecoverable error: could not read \"%s\": %s\n", - pathname, strerror(errno)); - if ((size_t)slen != len) - die("unrecoverable error: short read from\"%s\"\n", - pathname); - - checkprop(fn, dt, len); - - dt += (len + 3)/4; - if (!strcmp(dp->d_name, "reg") && usablemem_rgns.size) - add_usable_mem_property(fd, len); - if (!strcmp(dp->d_name, "ibm,dynamic-memory") && - usablemem_rgns.size) - add_dyn_reconf_usable_mem_property(fd); - - close(fd); - } - - fn[0] = '\0'; - checkprop(pathname, NULL, 0); -} - -/* - * Compare function used to sort the device-tree directories - * This function will be passed to scandir. - */ -static int comparefunc(const struct dirent **dentry1, - const struct dirent **dentry2) -{ - char *str1 = (*(struct dirent **)dentry1)->d_name; - char *str2 = (*(struct dirent **)dentry2)->d_name; - - /* - * strcmp scans from left to right and fails to idetify for some - * strings such as memory@10000000 and memory@f000000. - * Therefore, we get the wrong sorted order like memory@10000000 and - * memory@f000000. - */ - if (strchr(str1, '@') && strchr(str2, '@') && - (strlen(str1) > strlen(str2))) - return 1; - - return strcmp(str1, str2); -} - -/* - * put a node (directory) in the property structure. first properties - * then children. - */ -static void putnode(void) -{ - char *dn; - struct dirent *dp; - char *basename; - struct dirent **namelist; - int numlist, i; - struct stat statbuf; - int plen; - - plen = *pathstart ? strlen(pathstart) : 1; - /* Reserve space for string packed to words; e.g. string length 10 - * occupies 3 words, length 12 occupies 4 (for terminating \0s). - * So round up & include the \0: - */ - dt_reserve(&dt, 1+((plen + 4)/4)); - *dt++ = 1; - strcpy((void *)dt, *pathstart ? pathstart : "/"); - dt += ((plen + 4)/4); - - numlist = scandir(pathname, &namelist, 0, comparefunc); - if (numlist < 0) - die("unrecoverable error: could not scan \"%s\": %s\n", - pathname, strerror(errno)); - if (numlist == 0) - die("unrecoverable error: no directory entries in \"%s\"", - pathname); - - basename = strrchr(pathname,'/'); - - strncat(pathname, "/", MAXPATH - strlen(pathname) - 1); - dn = pathname + strlen(pathname); - - putprops(dn, namelist, numlist); - - /* Add initrd entries to the second kernel */ - if (initrd_base && !strcmp(basename,"/chosen/")) { - int len = 8; - unsigned long long initrd_end; - - dt_reserve(&dt, 12); /* both props, of 6 words ea. */ - *dt++ = 3; - *dt++ = len; - *dt++ = propnum("linux,initrd-start"); - - if ((len >= 8) && ((unsigned long)dt & 0x4)) - dt++; - - memcpy(dt,&initrd_base,len); - dt += (len + 3)/4; - - len = 8; - *dt++ = 3; - *dt++ = len; - *dt++ = propnum("linux,initrd-end"); - - initrd_end = initrd_base + initrd_size; - if ((len >= 8) && ((unsigned long)dt & 0x4)) - dt++; - - memcpy(dt,&initrd_end,len); - dt += (len + 3)/4; - - reserve(initrd_base, initrd_size); - } - - /* Add cmdline to the second kernel. Check to see if the new - * cmdline has a root=. If not, use the old root= cmdline. */ - if (!strcmp(basename,"/chosen/")) { - size_t cmd_len = 0; - char *param = NULL; - char filename[MAXPATH]; - char *buff; - int fd; - - cmd_len = strlen(local_cmdline); - if (cmd_len != 0) { - param = strstr(local_cmdline, "crashkernel="); - if (param) - crash_param = 1; - /* does the new cmdline have a root= ? ... */ - param = strstr(local_cmdline, "root="); - } - - /* ... if not, grab root= from the old command line */ - if (!param) { - FILE *fp; - char *last_cmdline = NULL; - char *old_param; - - strcpy(filename, pathname); - strncat(filename, "bootargs", MAXPATH - strlen(filename) - 1); - fp = fopen(filename, "r"); - if (fp) { - if (getline(&last_cmdline, &cmd_len, fp) == -1) - die("unable to read %s\n", filename); - - param = strstr(last_cmdline, "root="); - if (param) { - old_param = strtok(param, " "); - if (cmd_len != 0) - strcat(local_cmdline, " "); - strcat(local_cmdline, old_param); - } - } - if (last_cmdline) - free(last_cmdline); - } - strcat(local_cmdline, " "); - cmd_len = strlen(local_cmdline); - cmd_len = cmd_len + 1; - - /* add new bootargs */ - dt_reserve(&dt, 4+((cmd_len+3)/4)); - *dt++ = 3; - *dt++ = cmd_len; - *dt++ = propnum("bootargs"); - if ((cmd_len >= 8) && ((unsigned long)dt & 0x4)) - dt++; - memcpy(dt, local_cmdline,cmd_len); - dt += (cmd_len + 3)/4; - - fprintf(stderr, "Modified cmdline:%s\n", local_cmdline); - - /* - * Determine the platform type/stdout type, so that purgatory - * code can print 'I'm in purgatory' message. Currently only - * pseries/hvcterminal is supported. - */ - strcpy(filename, pathname); - strncat(filename, "linux,stdout-path", MAXPATH - strlen(filename) - 1); - fd = open(filename, O_RDONLY); - if (fd == -1) { - printf("Unable to find %s, printing from purgatory is disabled\n", filename); - goto no_debug; - } - if (fstat(fd, &statbuf)) { - printf("Unable to stat %s, printing from purgatory is disabled\n", filename); - close(fd); - goto no_debug; - - } - - buff = malloc(statbuf.st_size); - if (!buff) { - printf("Can not allocate memory for buff\n"); - close(fd); - goto no_debug; - } - read(fd, buff, statbuf.st_size); - close(fd); - strncpy(filename, "/proc/device-tree/", MAXPATH); - strncat(filename, buff, MAXPATH - strlen(filename) - 1); - strncat(filename, "/compatible", MAXPATH - strlen(filename) - 1); - fd = open(filename, O_RDONLY); - if (fd == -1) { - printf("Unable to find %s printing from purgatory is disabled\n", filename); - goto no_debug; - } - if (fstat(fd, &statbuf)) { - printf("Unable to stat %s printing from purgatory is disabled\n", filename); - close(fd); - goto no_debug; - } - buff = realloc(buff, statbuf.st_size); - if (!buff) { - printf("Can not allocate memory for buff\n"); - close(fd); - goto no_debug; - } - read(fd, buff, statbuf.st_size); - if (!strcmp(buff, "hvterm1") || !strcmp(buff, "hvterm-protocol")) - my_debug = 1; - close(fd); - free(buff); - } - -no_debug: - for (i=0; i < numlist; i++) { - dp = namelist[i]; - strcpy(dn, dp->d_name); - free(namelist[i]); - - if (!strcmp(dn, ".") || !strcmp(dn, "..")) - continue; - - if (lstat(pathname, &statbuf)) - die("unrecoverable error: could not stat \"%s\": %s\n", - pathname, strerror(errno)); - - if (S_ISDIR(statbuf.st_mode)) - putnode(); - } - - dt_reserve(&dt, 1); - *dt++ = 2; - dn[-1] = '\0'; - free(namelist); -} - -int create_flatten_tree(char **bufp, off_t *sizep, char *cmdline) -{ - unsigned long len; - unsigned long tlen; - char *buf; - unsigned long me; - - me = 0; - - strcpy(pathname, "/proc/device-tree/"); - - pathstart = pathname + strlen(pathname); - - dt_cur_size = INIT_TREE_WORDS; - dt_base = malloc(dt_cur_size*4); - if (!dt_base) { - die("Can't malloc %d bytes for dt struct!\n", dt_cur_size*4); - } - memset(dt_base, 0, dt_cur_size*4); - - dt = dt_base; - - if (cmdline) - strcpy(local_cmdline, cmdline); - - putnode(); - dt_reserve(&dt, 1); - *dt++ = 9; - - len = _ALIGN(sizeof(bb[0]), 8); - - bb->off_mem_rsvmap = len; - - for (len = 1; mem_rsrv[len]; len += 2) - ; - len+= 3; - len *= sizeof(mem_rsrv[0]); - - bb->off_dt_struct = bb->off_mem_rsvmap + len; - - len = dt - dt_base; - len *= sizeof(unsigned); - bb->off_dt_strings = bb->off_dt_struct + len; - - len = _ALIGN(propnum(""), 4); - bb->totalsize = bb->off_dt_strings + len; - - bb->magic = 0xd00dfeed; - bb->version = 2; - bb->last_comp_version = 2; - - reserve(me, bb->totalsize); /* patched later in kexec_load */ - - buf = malloc(bb->totalsize); - *bufp = buf; - memcpy(buf, bb, bb->off_mem_rsvmap); - tlen = bb->off_mem_rsvmap; - memcpy(buf+tlen, mem_rsrv, bb->off_dt_struct - bb->off_mem_rsvmap); - tlen = tlen + (bb->off_dt_struct - bb->off_mem_rsvmap); - memcpy(buf+tlen, dt_base, bb->off_dt_strings - bb->off_dt_struct); - tlen = tlen + (bb->off_dt_strings - bb->off_dt_struct); - memcpy(buf+tlen, propnames, bb->totalsize - bb->off_dt_strings); - tlen = tlen + bb->totalsize - bb->off_dt_strings; - *sizep = tlen; - free(dt_base); - return 0; -} diff --git a/kexec/arch/ppc64/kexec-elf-ppc64.c b/kexec/arch/ppc64/kexec-elf-ppc64.c index cc0531d..ce10367 100644 --- a/kexec/arch/ppc64/kexec-elf-ppc64.c +++ b/kexec/arch/ppc64/kexec-elf-ppc64.c @@ -35,6 +35,7 @@ #include "../../kexec-elf.h" #include "../../kexec-syscall.h" #include "kexec-ppc64.h" +#include "../../fs2dt.h" #include "crashdump-ppc64.h" #include <arch/options.h> @@ -70,6 +71,26 @@ void arch_reuse_initrd(void) reuse_initrd = 1; } +static int read_prop(char *name, void *value, size_t len) +{ + int fd; + size_t rlen; + + fd = open(name, O_RDONLY); + if (fd == -1) + return -1; + + rlen = read(fd, value, len); + if (rlen < 0) + fprintf(stderr, "Warning : Can't read %s : %s", + name, strerror(errno)); + else if (rlen != len) + fprintf(stderr, "Warning : short read from %s", name); + + close(fd); + return 0; +} + int elf_ppc64_load(int argc, char **argv, const char *buf, off_t len, struct kexec_info *info) { @@ -82,11 +103,14 @@ int elf_ppc64_load(int argc, char **argv, const char *buf, off_t len, off_t seg_size = 0; struct mem_phdr *phdr; size_t size; +#ifdef NEED_RESERVE_DTB uint64_t *rsvmap_ptr; struct bootblock *bb_ptr; +#endif int i; int result, opt; uint64_t my_kernel, my_dt_offset; + uint64_t my_opal_base = 0, my_opal_entry = 0; unsigned int my_panic_kernel; uint64_t my_stack, my_backup_start; uint64_t toc_addr; @@ -228,18 +252,34 @@ int elf_ppc64_load(int argc, char **argv, const char *buf, off_t len, my_dt_offset = add_buffer(info, seg_buf, seg_size, seg_size, 0, 0, max_addr, -1); +#ifdef NEED_RESERVE_DTB /* patch reserve map address for flattened device-tree * find last entry (both 0) in the reserve mem list. Assume DT * entry is before this one */ bb_ptr = (struct bootblock *)(seg_buf); - rsvmap_ptr = (uint64_t *)(seg_buf + bb_ptr->off_mem_rsvmap); + rsvmap_ptr = (uint64_t *)(seg_buf + be32_to_cpu(bb_ptr->off_mem_rsvmap)); while (*rsvmap_ptr || *(rsvmap_ptr+1)) rsvmap_ptr += 2; rsvmap_ptr -= 2; - *rsvmap_ptr = my_dt_offset; + *rsvmap_ptr = cpu_to_be64(my_dt_offset); rsvmap_ptr++; - *rsvmap_ptr = bb_ptr->totalsize; + *rsvmap_ptr = cpu_to_be64((uint64_t)be32_to_cpu(bb_ptr->totalsize)); +#endif + + if (read_prop("/proc/device-tree/ibm,opal/opal-base-address", + &my_opal_base, sizeof(my_opal_base)) == 0) { + my_opal_base = be64_to_cpu(my_opal_base); + elf_rel_set_symbol(&info->rhdr, "opal_base", + &my_opal_base, sizeof(my_opal_base)); + } + + if (read_prop("/proc/device-tree/ibm,opal/opal-entry-address", + &my_opal_entry, sizeof(my_opal_entry)) == 0) { + my_opal_entry = be64_to_cpu(my_opal_entry); + elf_rel_set_symbol(&info->rhdr, "opal_entry", + &my_opal_entry, sizeof(my_opal_entry)); + } /* Set kernel */ elf_rel_set_symbol(&info->rhdr, "kernel", &my_kernel, sizeof(my_kernel)); @@ -301,7 +341,13 @@ int elf_ppc64_load(int argc, char **argv, const char *buf, off_t len, toc_addr = 0; my_run_at_load = 0; my_debug = 0; + my_opal_base = 0; + my_opal_entry = 0; + elf_rel_get_symbol(&info->rhdr, "opal_base", &my_opal_base, + sizeof(my_opal_base)); + elf_rel_get_symbol(&info->rhdr, "opal_entry", &my_opal_entry, + sizeof(my_opal_entry)); elf_rel_get_symbol(&info->rhdr, "kernel", &my_kernel, sizeof(my_kernel)); elf_rel_get_symbol(&info->rhdr, "dt_offset", &my_dt_offset, sizeof(my_dt_offset)); @@ -328,6 +374,8 @@ int elf_ppc64_load(int argc, char **argv, const char *buf, off_t len, dbgprintf("toc_addr is %llx\n", (unsigned long long)toc_addr); dbgprintf("purgatory size is %zu\n", purgatory_size); dbgprintf("debug is %d\n", my_debug); + dbgprintf("opal_base is %llx\n", (unsigned long long) my_opal_base); + dbgprintf("opal_entry is %llx\n", (unsigned long long) my_opal_entry); for (i = 0; i < info->nr_segments; i++) fprintf(stderr, "segment[%d].mem:%p memsz:%zu\n", i, diff --git a/kexec/arch/ppc64/kexec-elf-rel-ppc64.c b/kexec/arch/ppc64/kexec-elf-rel-ppc64.c index 54d506a..9b191d0 100644 --- a/kexec/arch/ppc64/kexec-elf-rel-ppc64.c +++ b/kexec/arch/ppc64/kexec-elf-rel-ppc64.c @@ -7,9 +7,6 @@ int machine_verify_elf_rel(struct mem_ehdr *ehdr) { - if (ehdr->ei_data != ELFDATA2MSB) { - return 0; - } if (ehdr->ei_class != ELFCLASS64) { return 0; } @@ -147,6 +144,33 @@ void machine_apply_elf_rel(struct mem_ehdr *ehdr, unsigned long r_type, *(uint16_t *)location = (((uint64_t)value >> 48) & 0xffff); break; + /* R_PPC64_REL16_HA and R_PPC64_REL16_LO are handled to support + * ABIv2 r2 assignment based on r12 for PIC executable. + * Here address is know so replace + * 0: addis 2,12,.TOC.-0b@ha + * addi 2,2,.TOC.-0b@l + * by + * lis 2,.TOC.@ha + * addi 2,2,.TOC.@l + */ + case R_PPC64_REL16_HA: + /* check that we are dealing with the addis 2,12 instruction */ + if (((*(uint32_t*)location) & 0xffff0000) != 0x3c4c0000) + die("Unexpected instruction for R_PPC64_REL16_HA"); + value += my_r2(ehdr); + /* replacing by lis 2 */ + *(uint32_t *)location = 0x3c400000 + ((value >> 16) & 0xffff); + break; + + case R_PPC64_REL16_LO: + /* check that we are dealing with the addi 2,2 instruction */ + if (((*(uint32_t*)location) & 0xffff0000) != 0x38420000) + die("Unexpected instruction for R_PPC64_REL16_LO"); + + value += my_r2(ehdr) - 4; + *(uint16_t *)location = value & 0xffff; + break; + default: die("Unknown rela relocation: %lu\n", r_type); break; diff --git a/kexec/arch/ppc64/kexec-ppc64.c b/kexec/arch/ppc64/kexec-ppc64.c index 21bd0a8..6e79f52 100644 --- a/kexec/arch/ppc64/kexec-ppc64.c +++ b/kexec/arch/ppc64/kexec-ppc64.c @@ -31,6 +31,7 @@ #include "../../kexec.h" #include "../../kexec-syscall.h" #include "kexec-ppc64.h" +#include "../../fs2dt.h" #include "crashdump-ppc64.h" #include <arch/options.h> @@ -43,6 +44,7 @@ uint64_t memory_limit; static int nr_memory_ranges, nr_exclude_ranges; uint64_t crash_base, crash_size; unsigned int rtas_base, rtas_size; +uint64_t opal_base, opal_size; int max_memory_ranges; static void cleanup_memory_ranges(void) @@ -167,7 +169,7 @@ static int get_dyn_reconf_base_ranges(void) * lmb_size, num_of_lmbs(global variables) are * initialized once here. */ - lmb_size = ((uint64_t *)buf)[0]; + lmb_size = be64_to_cpu(((uint64_t *)buf)[0]); fclose(file); strcpy(fname, "/proc/device-tree/"); @@ -183,7 +185,7 @@ static int get_dyn_reconf_base_ranges(void) fclose(file); return -1; } - num_of_lmbs = ((unsigned int *)buf)[0]; + num_of_lmbs = be32_to_cpu(((unsigned int *)buf)[0]); for (i = 0; i < num_of_lmbs; i++) { if ((n = fread(buf, 1, 24, file)) < 0) { @@ -194,7 +196,7 @@ static int get_dyn_reconf_base_ranges(void) if (nr_memory_ranges >= max_memory_ranges) return -1; - start = ((uint64_t *)buf)[0]; + start = be64_to_cpu(((uint64_t *)buf)[0]); end = start + lmb_size; add_base_memory_range(start, end); } @@ -278,8 +280,8 @@ static int get_base_ranges(void) if (realloc_memory_ranges() < 0) break; } - start = ((uint64_t *)buf)[0]; - end = start + ((uint64_t *)buf)[1]; + start = be64_to_cpu(((uint64_t *)buf)[0]); + end = start + be64_to_cpu(((uint64_t *)buf)[1]); add_base_memory_range(start, end); fclose(file); } @@ -313,6 +315,47 @@ static int sort_ranges(void) return 0; } +void scan_reserved_ranges(unsigned long kexec_flags, int *range_index) +{ + char fname[256], buf[16]; + FILE *file; + int i = *range_index; + + strcpy(fname, "/proc/device-tree/reserved-ranges"); + + file = fopen(fname, "r"); + if (file == NULL) { + if (errno != ENOENT) { + perror(fname); + return; + } + errno = 0; + /* File not present. Non PowerKVM system. */ + return; + } + + /* + * Each reserved range is an (address,size) pair, 2 cells each, + * totalling 4 cells per range. + */ + while (fread(buf, sizeof(uint64_t) * 2, 1, file) == 1) { + uint64_t base, size; + + base = be64_to_cpu(((uint64_t *)buf)[0]); + size = be64_to_cpu(((uint64_t *)buf)[1]); + + exclude_range[i].start = base; + exclude_range[i].end = base + size; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + + reserve(base, size); + } + fclose(file); + *range_index = i; +} + /* Get devtree details and create exclude_range array * Also create usablemem_ranges for KEXEC_ON_CRASH */ @@ -338,12 +381,15 @@ static int get_devtree_details(unsigned long kexec_flags) return -1; } + scan_reserved_ranges(kexec_flags, &i); + while ((dentry = readdir(dir)) != NULL) { if (strncmp(dentry->d_name, "chosen", 6) && strncmp(dentry->d_name, "memory@", 7) && strcmp(dentry->d_name, "memory") && strncmp(dentry->d_name, "pci@", 4) && - strncmp(dentry->d_name, "rtas", 4)) + strncmp(dentry->d_name, "rtas", 4) && + strncmp(dentry->d_name, "ibm,opal", 8)) continue; strcpy(fname, device_tree); strcat(fname, dentry->d_name); @@ -363,6 +409,7 @@ static int get_devtree_details(unsigned long kexec_flags) goto error_openfile; } fclose(file); + kernel_end = be64_to_cpu(kernel_end); /* Add kernel memory to exclude_range */ exclude_range[i].start = 0x0UL; @@ -386,6 +433,7 @@ static int get_devtree_details(unsigned long kexec_flags) goto error_openfile; } fclose(file); + crash_base = be64_to_cpu(crash_base); memset(fname, 0, sizeof(fname)); strcpy(fname, device_tree); @@ -400,6 +448,8 @@ static int get_devtree_details(unsigned long kexec_flags) perror(fname); goto error_openfile; } + fclose(file); + crash_size = be64_to_cpu(crash_size); if (crash_base > mem_min) mem_min = crash_base; @@ -430,10 +480,14 @@ static int get_devtree_details(unsigned long kexec_flags) * fall through. On older kernel this file * is not present. */ - } else if (fread(&memory_limit, sizeof(uint64_t), 1, - file) != 1) { - perror(fname); - goto error_openfile; + } else { + if (fread(&memory_limit, sizeof(uint64_t), 1, + file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + memory_limit = be64_to_cpu(memory_limit); } memset(fname, 0, sizeof(fname)); @@ -454,6 +508,9 @@ static int get_devtree_details(unsigned long kexec_flags) perror(fname); goto error_openfile; } + fclose(file); + htab_base = be64_to_cpu(htab_base); + memset(fname, 0, sizeof(fname)); strcpy(fname, device_tree); strcat(fname, dentry->d_name); @@ -466,6 +523,9 @@ static int get_devtree_details(unsigned long kexec_flags) perror(fname); goto error_openfile; } + fclose(file); + htab_size = be64_to_cpu(htab_size); + /* Add htab address to exclude_range - NON-LPAR only */ exclude_range[i].start = htab_base; exclude_range[i].end = htab_base + htab_size; @@ -492,6 +552,7 @@ static int get_devtree_details(unsigned long kexec_flags) perror(fname); goto error_openfile; } + initrd_start = be64_to_cpu(initrd_start); fclose(file); memset(fname, 0, sizeof(fname)); @@ -511,6 +572,7 @@ static int get_devtree_details(unsigned long kexec_flags) perror(fname); goto error_openfile; } + initrd_end = be64_to_cpu(initrd_end); fclose(file); /* Add initrd address to exclude_range */ @@ -532,6 +594,8 @@ static int get_devtree_details(unsigned long kexec_flags) perror(fname); goto error_openfile; } + fclose(file); + rtas_base = be32_to_cpu(rtas_base); memset(fname, 0, sizeof(fname)); strcpy(fname, device_tree); strcat(fname, dentry->d_name); @@ -544,7 +608,9 @@ static int get_devtree_details(unsigned long kexec_flags) perror(fname); goto error_openfile; } + fclose(file); closedir(cdir); + rtas_size = be32_to_cpu(rtas_size); /* Add rtas to exclude_range */ exclude_range[i].start = rtas_base; exclude_range[i].end = rtas_base + rtas_size; @@ -555,6 +621,46 @@ static int get_devtree_details(unsigned long kexec_flags) add_usable_mem_rgns(rtas_base, rtas_size); } /* rtas */ + if (strncmp(dentry->d_name, "ibm,opal", 8) == 0) { + strcat(fname, "/opal-base-address"); + file = fopen(fname, "r"); + if (file == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&opal_base, sizeof(uint64_t), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + opal_base = be64_to_cpu(opal_base); + fclose(file); + + memset(fname, 0, sizeof(fname)); + strcpy(fname, device_tree); + strcat(fname, dentry->d_name); + strcat(fname, "/opal-runtime-size"); + file = fopen(fname, "r"); + if (file == NULL) { + perror(fname); + goto error_opencdir; + } + if (fread(&opal_size, sizeof(uint64_t), 1, file) != 1) { + perror(fname); + goto error_openfile; + } + fclose(file); + closedir(cdir); + opal_size = be64_to_cpu(opal_size); + /* Add OPAL to exclude_range */ + exclude_range[i].start = opal_base; + exclude_range[i].end = opal_base + opal_size; + i++; + if (i >= max_memory_ranges) + realloc_memory_ranges(); + if (kexec_flags & KEXEC_ON_CRASH) + add_usable_mem_rgns(opal_base, opal_size); + } /* ibm,opal */ + if (!strncmp(dentry->d_name, "memory@", 7) || !strcmp(dentry->d_name, "memory")) { strcat(fname, "/reg"); @@ -566,8 +672,8 @@ static int get_devtree_details(unsigned long kexec_flags) perror(fname); goto error_openfile; } - rmo_base = ((uint64_t *)buf)[0]; - rmo_top = rmo_base + ((uint64_t *)buf)[1]; + rmo_base = be64_to_cpu(((uint64_t *)buf)[0]); + rmo_top = rmo_base + be64_to_cpu(((uint64_t *)buf)[1]); if (rmo_top > 0x30000000UL) rmo_top = 0x30000000UL; @@ -591,6 +697,8 @@ static int get_devtree_details(unsigned long kexec_flags) perror(fname); goto error_openfile; } + fclose(file); + tce_base = be64_to_cpu(tce_base); memset(fname, 0, sizeof(fname)); strcpy(fname, device_tree); strcat(fname, dentry->d_name); @@ -603,6 +711,8 @@ static int get_devtree_details(unsigned long kexec_flags) perror(fname); goto error_openfile; } + fclose(file); + tce_size = be32_to_cpu(tce_size); /* Add tce to exclude_range - NON-LPAR only */ exclude_range[i].start = tce_base; exclude_range[i].end = tce_base + tce_size; @@ -798,6 +908,7 @@ const struct arch_map_entry arches[] = { * So pass KEXEC_ARCH_PPC64 here */ { "ppc64", KEXEC_ARCH_PPC64 }, + { "ppc64le", KEXEC_ARCH_PPC64 }, { NULL, 0 }, }; diff --git a/kexec/arch/ppc64/kexec-ppc64.h b/kexec/arch/ppc64/kexec-ppc64.h index 920ac46..89ee942 100644 --- a/kexec/arch/ppc64/kexec-ppc64.h +++ b/kexec/arch/ppc64/kexec-ppc64.h @@ -6,33 +6,28 @@ #define CORE_TYPE_ELF32 1 #define CORE_TYPE_ELF64 2 +#define BOOT_BLOCK_VERSION 17 +#define BOOT_BLOCK_LAST_COMP_VERSION 17 +#if (BOOT_BLOCK_VERSION < 16) +# define NEED_STRUCTURE_BLOCK_EXTRA_PAD +#endif +#define HAVE_DYNAMIC_MEMORY +#define NEED_RESERVE_DTB + int setup_memory_ranges(unsigned long kexec_flags); int elf_ppc64_probe(const char *buf, off_t len); int elf_ppc64_load(int argc, char **argv, const char *buf, off_t len, struct kexec_info *info); void elf_ppc64_usage(void); -void reserve(unsigned long long where, unsigned long long length); -int create_flatten_tree(char **, off_t *, char *); +struct mem_ehdr; unsigned long my_r2(const struct mem_ehdr *ehdr); extern uint64_t initrd_base, initrd_size; extern int max_memory_ranges; extern unsigned char reuse_initrd; -/* boot block version 2 as defined by the linux kernel */ -struct bootblock { - unsigned magic, - totalsize, - off_dt_struct, - off_dt_strings, - off_mem_rsvmap, - version, - last_comp_version, - boot_physid; -}; - struct arch_options_t { int core_header_type; }; diff --git a/kexec/arch/ppc64/kexec-zImage-ppc64.c b/kexec/arch/ppc64/kexec-zImage-ppc64.c index 24a87c6..d084ee5 100644 --- a/kexec/arch/ppc64/kexec-zImage-ppc64.c +++ b/kexec/arch/ppc64/kexec-zImage-ppc64.c @@ -36,6 +36,7 @@ int zImage_ppc64_probe(FILE *file) { Elf32_Ehdr elf; + int valid; if (fseek(file, 0, SEEK_SET) < 0) { fprintf(stderr, "seek error: %s\n", @@ -53,7 +54,7 @@ int zImage_ppc64_probe(FILE *file) return -1; } - return (elf.e_ident[EI_MAG0] == ELFMAG0 && + valid = (elf.e_ident[EI_MAG0] == ELFMAG0 && elf.e_ident[EI_MAG1] == ELFMAG1 && elf.e_ident[EI_MAG2] == ELFMAG2 && elf.e_ident[EI_MAG3] == ELFMAG3 && @@ -61,6 +62,8 @@ int zImage_ppc64_probe(FILE *file) elf.e_ident[EI_DATA] == ELFDATA2MSB && elf.e_type == ET_EXEC && elf.e_machine == EM_PPC); + + return valid ? 0 : -1; } int zImage_ppc64_load(FILE *file, int UNUSED(argc), char **UNUSED(argv), diff --git a/kexec/arch/x86_64/kexec-bzImage64.c b/kexec/arch/x86_64/kexec-bzImage64.c index a7b9f15..8edb3e4 100644 --- a/kexec/arch/x86_64/kexec-bzImage64.c +++ b/kexec/arch/x86_64/kexec-bzImage64.c @@ -42,6 +42,7 @@ #include <arch/options.h> static const int probe_debug = 0; +int bzImage_support_efi_boot; int bzImage64_probe(const char *buf, off_t len) { @@ -82,6 +83,11 @@ int bzImage64_probe(const char *buf, off_t len) /* Must be KERNEL_64 and CAN_BE_LOADED_ABOVE_4G */ return -1; } + +#define XLF_EFI_KEXEC (1 << 4) + if ((header->xloadflags & XLF_EFI_KEXEC) == XLF_EFI_KEXEC) + bzImage_support_efi_boot = 1; + /* I've got a relocatable bzImage64 */ if (probe_debug) fprintf(stderr, "It's a relocatable bzImage64\n"); @@ -229,6 +235,89 @@ static int do_bzImage64_load(struct kexec_info *info, return 0; } +/* This assumes file is being loaded using file based kexec syscall */ +int bzImage64_load_file(int argc, char **argv, struct kexec_info *info) +{ + int ret = 0; + char *command_line = NULL, *tmp_cmdline = NULL; + const char *ramdisk = NULL, *append = NULL; + int entry_16bit = 0, entry_32bit = 0; + int opt; + int command_line_len; + + /* See options.h -- add any more there, too. */ + static const struct option options[] = { + KEXEC_ARCH_OPTIONS + { "command-line", 1, 0, OPT_APPEND }, + { "append", 1, 0, OPT_APPEND }, + { "reuse-cmdline", 0, 0, OPT_REUSE_CMDLINE }, + { "initrd", 1, 0, OPT_RAMDISK }, + { "ramdisk", 1, 0, OPT_RAMDISK }, + { "real-mode", 0, 0, OPT_REAL_MODE }, + { "entry-32bit", 0, 0, OPT_ENTRY_32BIT }, + { 0, 0, 0, 0 }, + }; + static const char short_options[] = KEXEC_ARCH_OPT_STR "d"; + + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { + switch (opt) { + default: + /* Ignore core options */ + if (opt < OPT_ARCH_MAX) + break; + case OPT_APPEND: + append = optarg; + break; + case OPT_REUSE_CMDLINE: + tmp_cmdline = get_command_line(); + break; + case OPT_RAMDISK: + ramdisk = optarg; + break; + case OPT_REAL_MODE: + entry_16bit = 1; + break; + case OPT_ENTRY_32BIT: + entry_32bit = 1; + break; + } + } + command_line = concat_cmdline(tmp_cmdline, append); + if (tmp_cmdline) + free(tmp_cmdline); + command_line_len = 0; + if (command_line) { + command_line_len = strlen(command_line) + 1; + } else { + command_line = strdup("\0"); + command_line_len = 1; + } + + if (entry_16bit || entry_32bit) { + fprintf(stderr, "Kexec2 syscall does not support 16bit" + " or 32bit entry yet\n"); + ret = -1; + goto out; + } + + if (ramdisk) { + info->initrd_fd = open(ramdisk, O_RDONLY); + if (info->initrd_fd == -1) { + fprintf(stderr, "Could not open initrd file %s:%s\n", + ramdisk, strerror(errno)); + ret = -1; + goto out; + } + } + + info->command_line = command_line; + info->command_line_len = command_line_len; + return ret; +out: + free(command_line); + return ret; +} + int bzImage64_load(int argc, char **argv, const char *buf, off_t len, struct kexec_info *info) { @@ -241,6 +330,9 @@ int bzImage64_load(int argc, char **argv, const char *buf, off_t len, int opt; int result; + if (info->file_mode) + return bzImage64_load_file(argc, argv, info); + /* See options.h -- add any more there, too. */ static const struct option options[] = { KEXEC_ARCH_OPTIONS diff --git a/kexec/arch/x86_64/kexec-x86_64.c b/kexec/arch/x86_64/kexec-x86_64.c index 5c23e01..041b007 100644 --- a/kexec/arch/x86_64/kexec-x86_64.c +++ b/kexec/arch/x86_64/kexec-x86_64.c @@ -53,6 +53,8 @@ void arch_usage(void) " --serial-baud=<baud_rate> Specify the serial port baud rate\n" " --console-vga Enable the vga console\n" " --console-serial Enable the serial console\n" + " --pass-memmap-cmdline Pass memory map via command line in kexec on panic case\n" + " --noefi Disable efi support\n" ); } @@ -63,6 +65,8 @@ struct arch_options_t arch_options = { .console_vga = 0, .console_serial = 0, .core_header_type = CORE_TYPE_ELF64, + .pass_memmap_cmdline = 0, + .noefi = 0, }; int arch_process_options(int argc, char **argv) @@ -126,6 +130,12 @@ int arch_process_options(int argc, char **argv) } arch_options.serial_baud = value; break; + case OPT_PASS_MEMMAP_CMDLINE: + arch_options.pass_memmap_cmdline = 1; + break; + case OPT_NOEFI: + arch_options.noefi = 1; + break; } } /* Reset getopt for the next pass; called in other source modules */ diff --git a/kexec/crashdump-elf.c b/kexec/crashdump-elf.c index 2baa357..c869347 100644 --- a/kexec/crashdump-elf.c +++ b/kexec/crashdump-elf.c @@ -41,6 +41,7 @@ int FUNC(struct kexec_info *info, uint64_t vmcoreinfo_addr, vmcoreinfo_len; int has_vmcoreinfo = 0; int (*get_note_info)(int cpu, uint64_t *addr, uint64_t *len); + long int count_cpu; if (xen_present()) nr_cpus = xen_get_nr_phys_cpus(); @@ -138,11 +139,13 @@ int FUNC(struct kexec_info *info, /* PT_NOTE program headers. One per cpu */ - for (i = 0; i < nr_cpus; i++) { + count_cpu = nr_cpus; + for (i = 0; count_cpu > 0; i++) { if (get_note_info(i, ¬es_addr, ¬es_len) < 0) { /* This cpu is not present. Skip it. */ continue; } + count_cpu--; phdr = (PHDR *) bufp; bufp += sizeof(PHDR); diff --git a/kexec/crashdump-xen.c b/kexec/crashdump-xen.c index d8bd0f4..60594f6 100644 --- a/kexec/crashdump-xen.c +++ b/kexec/crashdump-xen.c @@ -131,30 +131,18 @@ unsigned long xen_architecture(struct crash_elf_info *elf_info) #ifdef HAVE_LIBXENCTRL int rc; xen_capabilities_info_t capabilities; -#ifdef XENCTRL_HAS_XC_INTERFACE xc_interface *xc; -#else - int xc; -#endif if (!xen_present()) goto out; memset(capabilities, '0', XEN_CAPABILITIES_INFO_LEN); -#ifdef XENCTRL_HAS_XC_INTERFACE xc = xc_interface_open(NULL, NULL, 0); if ( !xc ) { fprintf(stderr, "failed to open xen control interface.\n"); goto out; } -#else - xc = xc_interface_open(); - if ( xc == -1 ) { - fprintf(stderr, "failed to open xen control interface.\n"); - goto out; - } -#endif rc = xc_version(xc, XENVER_capabilities, &capabilities[0]); if ( rc == -1 ) { @@ -175,42 +163,80 @@ unsigned long xen_architecture(struct crash_elf_info *elf_info) return machine; } -static int xen_crash_note_callback(void *UNUSED(data), int nr, - char *UNUSED(str), - unsigned long base, - unsigned long length) +#ifdef HAVE_LIBXENCTRL +int get_xen_vmcoreinfo(uint64_t *addr, uint64_t *len) { - struct crash_note_info *note = xen_phys_notes + nr; + xc_interface *xc; + int ret = 0; + + xc = xc_interface_open(NULL, NULL, 0); + if (!xc) { + fprintf(stderr, "failed to open xen control interface.\n"); + return -1; + } - note->base = base; - note->length = length; + ret = xc_kexec_get_range(xc, KEXEC_RANGE_MA_VMCOREINFO, 0, len, addr); + xc_interface_close(xc); + + if (ret < 0) + return -1; return 0; } int xen_get_nr_phys_cpus(void) { - char *match = "Crash note\n"; - int cpus, n; + xc_interface *xc; + int max_cpus; + int cpu = -1; if (xen_phys_cpus) return xen_phys_cpus; - if ((cpus = kexec_iomem_for_each_line(match, NULL, NULL))) { - n = sizeof(struct crash_note_info) * cpus; - xen_phys_notes = malloc(n); - if (!xen_phys_notes) { - fprintf(stderr, "failed to allocate xen_phys_notes.\n"); - return -1; - } - memset(xen_phys_notes, 0, n); - kexec_iomem_for_each_line(match, - xen_crash_note_callback, NULL); - xen_phys_cpus = cpus; + xc = xc_interface_open(NULL, NULL, 0); + if (!xc) { + fprintf(stderr, "failed to open xen control interface.\n"); + return -1; + } + + max_cpus = xc_get_max_cpus(xc); + if (max_cpus <= 0) + goto out; + + xen_phys_notes = calloc(max_cpus, sizeof(*xen_phys_notes)); + if (xen_phys_notes == NULL) + goto out; + + for (cpu = 0; cpu < max_cpus; cpu++) { + uint64_t size, start; + int ret; + + ret = xc_kexec_get_range(xc, KEXEC_RANGE_MA_CPU, cpu, &size, &start); + if (ret < 0) + break; + + xen_phys_notes[cpu].base = start; + xen_phys_notes[cpu].length = size; } - return cpus; + xen_phys_cpus = cpu; + +out: + xc_interface_close(xc); + return cpu; +} +#else +int get_xen_vmcoreinfo(uint64_t *addr, uint64_t *len) +{ + return -1; +} + +int xen_get_nr_phys_cpus(void) +{ + return -1; } +#endif + int xen_get_note(int cpu, uint64_t *addr, uint64_t *len) { @@ -226,3 +252,37 @@ int xen_get_note(int cpu, uint64_t *addr, uint64_t *len) return 0; } + +#ifdef HAVE_LIBXENCTRL +int xen_get_crashkernel_region(uint64_t *start, uint64_t *end) +{ + uint64_t size; + xc_interface *xc; + int rc = -1; + + xc = xc_interface_open(NULL, NULL, 0); + if (!xc) { + fprintf(stderr, "failed to open xen control interface.\n"); + goto out; + } + + rc = xc_kexec_get_range(xc, KEXEC_RANGE_MA_CRASH, 0, &size, start); + if (rc < 0) { + fprintf(stderr, "failed to get crash region from hypervisor.\n"); + goto out_close; + } + + *end = *start + size - 1; + +out_close: + xc_interface_close(xc); + +out: + return rc; +} +#else +int xen_get_crashkernel_region(uint64_t *start, uint64_t *end) +{ + return -1; +} +#endif diff --git a/kexec/crashdump.c b/kexec/crashdump.c index 8d88fdf..15c1105 100644 --- a/kexec/crashdump.c +++ b/kexec/crashdump.c @@ -84,7 +84,7 @@ int get_crash_notes_per_cpu(int cpu, uint64_t *addr, uint64_t *len) if (fopen_errno != ENOENT) die("Could not open \"%s\": %s\n", crash_notes, strerror(fopen_errno)); - if (!stat("/sys/devices", &cpu_stat)) { + if (stat("/sys/devices", &cpu_stat)) { stat_errno = errno; if (stat_errno == ENOENT) die("\"/sys/devices\" does not exist. " @@ -157,8 +157,3 @@ int get_kernel_vmcoreinfo(uint64_t *addr, uint64_t *len) { return get_vmcoreinfo("/sys/kernel/vmcoreinfo", addr, len); } - -int get_xen_vmcoreinfo(uint64_t *addr, uint64_t *len) -{ - return get_vmcoreinfo("/sys/hypervisor/vmcoreinfo", addr, len); -} diff --git a/kexec/crashdump.h b/kexec/crashdump.h index 0f7c2ea..95f1f0c 100644 --- a/kexec/crashdump.h +++ b/kexec/crashdump.h @@ -1,6 +1,7 @@ #ifndef CRASHDUMP_H #define CRASHDUMP_H +int get_crashkernel_region(uint64_t *start, uint64_t *end); extern int get_crash_notes_per_cpu(int cpu, uint64_t *addr, uint64_t *len); extern int get_kernel_vmcoreinfo(uint64_t *addr, uint64_t *len); extern int get_xen_vmcoreinfo(uint64_t *addr, uint64_t *len); @@ -56,9 +57,9 @@ unsigned long crash_architecture(struct crash_elf_info *elf_info); unsigned long phys_to_virt(struct crash_elf_info *elf_info, unsigned long paddr); -int xen_present(void); unsigned long xen_architecture(struct crash_elf_info *elf_info); int xen_get_nr_phys_cpus(void); int xen_get_note(int cpu, uint64_t *addr, uint64_t *len); +int xen_get_crashkernel_region(uint64_t *start, uint64_t *end); #endif /* CRASHDUMP_H */ diff --git a/kexec/dt-ops.c b/kexec/dt-ops.c new file mode 100644 index 0000000..bbba986 --- /dev/null +++ b/kexec/dt-ops.c @@ -0,0 +1,136 @@ +#include <assert.h> +#include <errno.h> +#include <inttypes.h> +#include <libfdt.h> +#include <stdio.h> +#include <stdlib.h> + +#include "kexec.h" +#include "dt-ops.h" + +static const char n_chosen[] = "/chosen"; + +static const char p_bootargs[] = "bootargs"; +static const char p_initrd_start[] = "linux,initrd-start"; +static const char p_initrd_end[] = "linux,initrd-end"; + +int dtb_set_initrd(char **dtb, off_t *dtb_size, off_t start, off_t end) +{ + int result; + uint64_t value; + + dbgprintf("%s: start %jd, end %jd, size %jd (%jd KiB)\n", + __func__, (intmax_t)start, (intmax_t)end, + (intmax_t)(end - start), + (intmax_t)(end - start) / 1024); + + value = cpu_to_fdt64(start); + + result = dtb_set_property(dtb, dtb_size, n_chosen, p_initrd_start, + &value, sizeof(value)); + + if (result) + return result; + + value = cpu_to_fdt64(end); + + result = dtb_set_property(dtb, dtb_size, n_chosen, p_initrd_end, + &value, sizeof(value)); + + if (result) { + dtb_delete_property(*dtb, n_chosen, p_initrd_start); + return result; + } + + return 0; +} + +int dtb_set_bootargs(char **dtb, off_t *dtb_size, const char *command_line) +{ + return dtb_set_property(dtb, dtb_size, n_chosen, p_bootargs, + command_line, strlen(command_line) + 1); +} + +int dtb_set_property(char **dtb, off_t *dtb_size, const char *node, + const char *prop, const void *value, int value_len) +{ + int result; + int nodeoffset; + void *new_dtb; + int new_size; + + value_len = FDT_TAGALIGN(value_len); + + new_size = FDT_TAGALIGN(*dtb_size + fdt_node_len(node) + + fdt_prop_len(prop, value_len)); + + new_dtb = malloc(new_size); + + if (!new_dtb) { + dbgprintf("%s: malloc failed\n", __func__); + return -ENOMEM; + } + + result = fdt_open_into(*dtb, new_dtb, new_size); + + if (result) { + dbgprintf("%s: fdt_open_into failed: %s\n", __func__, + fdt_strerror(result)); + goto on_error; + } + + nodeoffset = fdt_path_offset(new_dtb, node); + + if (nodeoffset == -FDT_ERR_NOTFOUND) { + result = fdt_add_subnode(new_dtb, nodeoffset, node); + + if (result) { + dbgprintf("%s: fdt_add_subnode failed: %s\n", __func__, + fdt_strerror(result)); + goto on_error; + } + } else if (nodeoffset < 0) { + dbgprintf("%s: fdt_path_offset failed: %s\n", __func__, + fdt_strerror(nodeoffset)); + goto on_error; + } + + result = fdt_setprop(new_dtb, nodeoffset, prop, value, value_len); + + if (result) { + dbgprintf("%s: fdt_setprop failed: %s\n", __func__, + fdt_strerror(result)); + goto on_error; + } + + free(*dtb); + + *dtb = new_dtb; + *dtb_size = new_size; + + return 0; + +on_error: + free(new_dtb); + return result; +} + +int dtb_delete_property(char *dtb, const char *node, const char *prop) +{ + int result; + int nodeoffset = fdt_path_offset(dtb, node); + + if (nodeoffset < 0) { + dbgprintf("%s: fdt_path_offset failed: %s\n", __func__, + fdt_strerror(nodeoffset)); + return nodeoffset; + } + + result = fdt_delprop(dtb, nodeoffset, prop); + + if (result) + dbgprintf("%s: fdt_delprop failed: %s\n", __func__, + fdt_strerror(nodeoffset)); + + return result; +} diff --git a/kexec/dt-ops.h b/kexec/dt-ops.h new file mode 100644 index 0000000..e70d15d --- /dev/null +++ b/kexec/dt-ops.h @@ -0,0 +1,13 @@ +#if !defined(KEXEC_DT_OPS_H) +#define KEXEC_DT_OPS_H + +#include <sys/types.h> + +int dtb_set_initrd(char **dtb, off_t *dtb_size, off_t start, off_t end); +int dtb_set_bootargs(char **dtb, off_t *dtb_size, const char *command_line); +int dtb_set_property(char **dtb, off_t *dtb_size, const char *node, + const char *prop, const void *value, int value_len); + +int dtb_delete_property(char *dtb, const char *node, const char *prop); + +#endif diff --git a/kexec/firmware_memmap.c b/kexec/firmware_memmap.c index 9598933..6be3c7c 100644 --- a/kexec/firmware_memmap.c +++ b/kexec/firmware_memmap.c @@ -145,7 +145,6 @@ static int parse_memmap_entry(const char *entry, struct memory_range *range) range->end = parse_numeric_sysfs(filename); if (range->end == ULLONG_MAX) return -1; - range->end++; /* inclusive vs. exclusive ranges */ /* * entry/type diff --git a/kexec/fs2dt.c b/kexec/fs2dt.c index 1f5b0cf..304f6cb 100644 --- a/kexec/fs2dt.c +++ b/kexec/fs2dt.c @@ -84,7 +84,7 @@ static void dt_reserve(unsigned **dt_ptr, unsigned words) offset = *dt_ptr - dt_base; dt_base = new_dt; dt_cur_size = new_size; - *dt_ptr = cpu_to_be32((unsigned)dt_base + offset); + *dt_ptr = dt_base + offset; memset(*dt_ptr, 0, (new_size - offset)*4); } } @@ -112,19 +112,26 @@ static void checkprop(char *name, unsigned *data, int len) if ((data == NULL) && (base || size || end)) die("unrecoverable error: no property data"); else if (!strcmp(name, "linux,rtas-base")) - base = *data; + base = be32_to_cpu(*data); + else if (!strcmp(name, "opal-base-address")) + base = be64_to_cpu(*(unsigned long long *)data); + else if (!strcmp(name, "opal-runtime-size")) + size = be64_to_cpu(*(unsigned long long *)data); else if (!strcmp(name, "linux,tce-base")) - base = *(unsigned long long *) data; + base = be64_to_cpu(*(unsigned long long *) data); else if (!strcmp(name, "rtas-size") || !strcmp(name, "linux,tce-size")) - size = *data; + size = be32_to_cpu(*data); else if (reuse_initrd && !strcmp(name, "linux,initrd-start")) if (len == 8) - base = *(unsigned long long *) data; + base = be64_to_cpu(*(unsigned long long *) data); else - base = *data; + base = be32_to_cpu(*data); else if (reuse_initrd && !strcmp(name, "linux,initrd-end")) - end = *(unsigned long long *) data; + if (len == 8) + end = be64_to_cpu(*(unsigned long long *) data); + else + end = be32_to_cpu(*data); if (size && end) die("unrecoverable error: size and end set at same time\n"); @@ -194,7 +201,7 @@ static void add_dyn_reconf_usable_mem_property__(int fd) die("unrecoverable error: error reading \"%s\": %s\n", pathname, strerror(errno)); - base = (uint64_t) buf[0]; + base = be64_to_cpu((uint64_t) buf[0]); end = base + lmb_size; if (~0ULL - base < end) die("unrecoverable error: mem property overflow\n"); @@ -226,8 +233,8 @@ static void add_dyn_reconf_usable_mem_property__(int fd) " ranges.\n", ranges_size*8); } - ranges[rlen++] = loc_base; - ranges[rlen++] = loc_end - loc_base; + ranges[rlen++] = cpu_to_be64(loc_base); + ranges[rlen++] = cpu_to_be64(loc_end - loc_base); rngs_cnt++; } } @@ -252,7 +259,7 @@ static void add_dyn_reconf_usable_mem_property__(int fd) } } else { /* Store the count of (base, size) duple */ - ranges[tmp_indx] = rngs_cnt; + ranges[tmp_indx] = cpu_to_be64((uint64_t) rngs_cnt); } } @@ -267,7 +274,7 @@ static void add_dyn_reconf_usable_mem_property__(int fd) pad_structure_block(rlen); memcpy(dt, ranges, rlen); free(ranges); - dt += cpu_to_be32((rlen + 3)/4); + dt += (rlen + 3)/4; } static void add_dyn_reconf_usable_mem_property(struct dirent *dp, int fd) @@ -282,8 +289,8 @@ static void add_dyn_reconf_usable_mem_property(struct dirent *dp, int fd) {} static void add_usable_mem_property(int fd, size_t len) { char fname[MAXPATH], *bname; - uint32_t buf[2]; - uint32_t *ranges; + uint64_t buf[2]; + uint64_t *ranges; int ranges_size = MEM_RANGE_CHUNK_SZ; uint64_t base, end, loc_base, loc_end; size_t range; @@ -306,10 +313,11 @@ static void add_usable_mem_property(int fd, size_t len) die("unrecoverable error: error reading \"%s\": %s\n", pathname, strerror(errno)); - if (~0ULL - buf[0] < buf[1]) + base = be64_to_cpu(buf[0]); + end = be64_to_cpu(buf[1]); + if (~0ULL - base < end) die("unrecoverable error: mem property overflow\n"); - base = be32_to_cpu(buf[0]); - end = base + be32_to_cpu(buf[1]); + end += base; ranges = malloc(ranges_size * sizeof(*ranges)); if (!ranges) @@ -339,8 +347,8 @@ static void add_usable_mem_property(int fd, size_t len) "%d bytes for ranges.\n", ranges_size*sizeof(*ranges)); } - ranges[rlen++] = loc_base; - ranges[rlen++] = loc_end - loc_base; + ranges[rlen++] = cpu_to_be64(loc_base); + ranges[rlen++] = cpu_to_be64(loc_end - loc_base); } } @@ -471,6 +479,8 @@ static int comparefunc(const struct dirent **dentry1, { char *str1 = (*(struct dirent **)dentry1)->d_name; char *str2 = (*(struct dirent **)dentry2)->d_name; + char *sep1 = strchr(str1, '@'); + char *sep2 = strchr(str2, '@'); /* * strcmp scans from left to right and fails to idetify for some @@ -478,9 +488,20 @@ static int comparefunc(const struct dirent **dentry1, * Therefore, we get the wrong sorted order like memory@10000000 and * memory@f000000. */ - if (strchr(str1, '@') && strchr(str2, '@') && - (strlen(str1) > strlen(str2))) - return 1; + if (sep1 && sep2) { + int baselen1 = sep1 - str1; + int baselen2 = sep2 - str2; + int len1 = strlen(str1); + int len2 = strlen(str2); + + /* + * Check the base name matches, and the properties are + * different lengths. + */ + if ((baselen1 == baselen2) && (len1 != len2) && + !strncmp(str1, str2, baselen2)) + return (len1 > len2) - (len1 < len2); + } return strcmp(str1, str2); } @@ -527,7 +548,7 @@ static void putnode(void) /* Add initrd entries to the second kernel */ if (initrd_base && initrd_size && !strcmp(basename,"chosen/")) { int len = 8; - unsigned long long initrd_end; + uint64_t bevalue; dt_reserve(&dt, 12); /* both props, of 6 words ea. */ *dt++ = cpu_to_be32(3); @@ -535,7 +556,8 @@ static void putnode(void) *dt++ = cpu_to_be32(propnum("linux,initrd-start")); pad_structure_block(len); - memcpy(dt,&initrd_base,len); + bevalue = cpu_to_be64(initrd_base); + memcpy(dt, &bevalue, len); dt += (len + 3)/4; len = 8; @@ -543,10 +565,10 @@ static void putnode(void) *dt++ = cpu_to_be32(len); *dt++ = cpu_to_be32(propnum("linux,initrd-end")); - initrd_end = initrd_base + initrd_size; + bevalue = cpu_to_be64(initrd_base + initrd_size); pad_structure_block(len); - memcpy(dt,&initrd_end,len); + memcpy(dt, &bevalue, len); dt += (len + 3)/4; reserve(initrd_base, initrd_size); @@ -555,6 +577,7 @@ static void putnode(void) /* Add cmdline to the second kernel. Check to see if the new * cmdline has a root=. If not, use the old root= cmdline. */ if (!strcmp(basename,"chosen/")) { + size_t result; size_t cmd_len = 0; char *param = NULL; char filename[MAXPATH]; @@ -614,8 +637,7 @@ static void putnode(void) * code can print 'I'm in purgatory' message. Currently only * pseries/hvcterminal is supported. */ - strcpy(filename, pathname); - strncat(filename, "linux,stdout-path", MAXPATH); + snprintf(filename, MAXPATH, "%slinux,stdout-path", pathname); fd = open(filename, O_RDONLY); if (fd == -1) { printf("Unable to find %s, printing from purgatory is diabled\n", @@ -636,11 +658,14 @@ static void putnode(void) close(fd); goto no_debug; } - read(fd, buff, statbuf.st_size); + result = read(fd, buff, statbuf.st_size); close(fd); - strncpy(filename, "/proc/device-tree/", MAXPATH); - strncat(filename, buff, MAXPATH); - strncat(filename, "/compatible", MAXPATH); + if (result <= 0) { + printf("Unable to read %s, printing from purgatory is diabled\n", + filename); + goto no_debug; + } + snprintf(filename, MAXPATH, "/proc/device-tree/%s/compatible", buff); fd = open(filename, O_RDONLY); if (fd == -1) { printf("Unable to find %s printing from purgatory is diabled\n", @@ -659,8 +684,9 @@ static void putnode(void) close(fd); goto no_debug; } - read(fd, buff, statbuf.st_size); - if (!strcmp(buff, "hvterm1") || !strcmp(buff, "hvterm-protocol")) + result = read(fd, buff, statbuf.st_size); + if (result && (!strcmp(buff, "hvterm1") + || !strcmp(buff, "hvterm-protocol"))) my_debug = 1; close(fd); free(buff); diff --git a/kexec/kexec-elf-rel.c b/kexec/kexec-elf-rel.c index 020cab0..c625f30 100644 --- a/kexec/kexec-elf-rel.c +++ b/kexec/kexec-elf-rel.c @@ -379,12 +379,12 @@ int elf_rel_load(struct mem_ehdr *ehdr, struct kexec_info *info, * Relocation Entries: If the index is STN_UNDEF, * the undefined symbol index, the relocation uses 0 * as the "symbol value". - * So, is this really an error condition to flag die? + * TOC symbols appear as undefined but should be + * resolved as well. Their type is STT_NOTYPE so allow + * such symbols to be processed. */ - /* - die("Undefined symbol: %s\n", name); - */ - continue; + if (ELF32_ST_TYPE(sym.st_info) != STT_NOTYPE) + die("Undefined symbol: %s\n", name); } sec_base = 0; if (sym.st_shndx == SHN_COMMON) { diff --git a/kexec/kexec-iomem.c b/kexec/kexec-iomem.c index 0396713..485a2e8 100644 --- a/kexec/kexec-iomem.c +++ b/kexec/kexec-iomem.c @@ -26,8 +26,8 @@ int kexec_iomem_for_each_line(char *match, int (*callback)(void *data, int nr, char *str, - unsigned long base, - unsigned long length), + unsigned long long base, + unsigned long long length), void *data) { const char *iomem = proc_iomem(); @@ -65,8 +65,8 @@ int kexec_iomem_for_each_line(char *match, static int kexec_iomem_single_callback(void *data, int nr, char *UNUSED(str), - unsigned long base, - unsigned long length) + unsigned long long base, + unsigned long long length) { struct memory_range *range = data; diff --git a/kexec/kexec-syscall.h b/kexec/kexec-syscall.h index b56cb00..267b75b 100644 --- a/kexec/kexec-syscall.h +++ b/kexec/kexec-syscall.h @@ -39,17 +39,33 @@ #ifdef __s390__ #define __NR_kexec_load 277 #endif -#ifdef __arm__ -#define __NR_kexec_load __NR_SYSCALL_BASE + 347 +#if defined(__arm__) || defined(__arm64__) +#define __NR_kexec_load __NR_SYSCALL_BASE + 347 #endif #if defined(__mips__) #define __NR_kexec_load 4311 #endif +#ifdef __m68k__ +#define __NR_kexec_load 313 +#endif #ifndef __NR_kexec_load #error Unknown processor architecture. Needs a kexec_load syscall number. #endif #endif /*ifndef __NR_kexec_load*/ +#ifndef __NR_kexec_file_load + +#ifdef __x86_64__ +#define __NR_kexec_file_load 320 +#endif + +#ifndef __NR_kexec_file_load +/* system call not available for the arch */ +#define __NR_kexec_file_load 0xffffffff /* system call not available */ +#endif + +#endif /*ifndef __NR_kexec_file_load*/ + struct kexec_segment; static inline long kexec_load(void *entry, unsigned long nr_segments, @@ -58,20 +74,42 @@ static inline long kexec_load(void *entry, unsigned long nr_segments, return (long) syscall(__NR_kexec_load, entry, nr_segments, segments, flags); } +static inline int is_kexec_file_load_implemented(void) { + if (__NR_kexec_file_load != 0xffffffff) + return 1; + return 0; +} + +static inline long kexec_file_load(int kernel_fd, int initrd_fd, + unsigned long cmdline_len, const char *cmdline_ptr, + unsigned long flags) +{ + return (long) syscall(__NR_kexec_file_load, kernel_fd, initrd_fd, + cmdline_len, cmdline_ptr, flags); +} + #define KEXEC_ON_CRASH 0x00000001 #define KEXEC_PRESERVE_CONTEXT 0x00000002 #define KEXEC_ARCH_MASK 0xffff0000 +/* Flags for kexec file based system call */ +#define KEXEC_FILE_UNLOAD 0x00000001 +#define KEXEC_FILE_ON_CRASH 0x00000002 +#define KEXEC_FILE_NO_INITRAMFS 0x00000004 + /* These values match the ELF architecture values. * Unless there is a good reason that should continue to be the case. */ #define KEXEC_ARCH_DEFAULT ( 0 << 16) #define KEXEC_ARCH_386 ( 3 << 16) +#define KEXEC_ARCH_68K ( 4 << 16) #define KEXEC_ARCH_X86_64 (62 << 16) #define KEXEC_ARCH_PPC (20 << 16) #define KEXEC_ARCH_PPC64 (21 << 16) #define KEXEC_ARCH_IA_64 (50 << 16) #define KEXEC_ARCH_ARM (40 << 16) +#define KEXEC_ARCH_ARM64 (183 << 16) +/* #define KEXEC_ARCH_AARCH64 (183 << 16) */ #define KEXEC_ARCH_S390 (22 << 16) #define KEXEC_ARCH_SH (42 << 16) #define KEXEC_ARCH_MIPS_LE (10 << 16) @@ -114,5 +152,11 @@ static inline long kexec_load(void *entry, unsigned long nr_segments, #if defined(__mips__) #define KEXEC_ARCH_NATIVE KEXEC_ARCH_MIPS #endif +#ifdef __m68k__ +#define KEXEC_ARCH_NATIVE KEXEC_ARCH_68K +#endif +#if defined(__arm64__) +#define KEXEC_ARCH_NATIVE KEXEC_ARCH_ARM64 +#endif #endif /* KEXEC_SYSCALL_H */ diff --git a/kexec/kexec-xen.c b/kexec/kexec-xen.c new file mode 100644 index 0000000..24a4191 --- /dev/null +++ b/kexec/kexec-xen.c @@ -0,0 +1,137 @@ +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <elf.h> +#include "kexec.h" +#include "kexec-syscall.h" +#include "crashdump.h" + +#include "config.h" + +#ifdef HAVE_LIBXENCTRL +#include <xenctrl.h> + +#include "crashdump.h" + +int xen_kexec_load(struct kexec_info *info) +{ + uint32_t nr_segments = info->nr_segments; + struct kexec_segment *segments = info->segment; + xc_interface *xch; + xc_hypercall_buffer_array_t *array = NULL; + uint8_t type; + uint8_t arch; + xen_kexec_segment_t *xen_segs; + int s; + int ret = -1; + + xch = xc_interface_open(NULL, NULL, 0); + if (!xch) + return -1; + + xen_segs = calloc(nr_segments + 1, sizeof(*xen_segs)); + if (!xen_segs) + goto out; + + array = xc_hypercall_buffer_array_create(xch, nr_segments); + if (array == NULL) + goto out; + + for (s = 0; s < nr_segments; s++) { + DECLARE_HYPERCALL_BUFFER(void, seg_buf); + + seg_buf = xc_hypercall_buffer_array_alloc(xch, array, s, + seg_buf, segments[s].bufsz); + if (seg_buf == NULL) + goto out; + memcpy(seg_buf, segments[s].buf, segments[s].bufsz); + + set_xen_guest_handle(xen_segs[s].buf.h, seg_buf); + xen_segs[s].buf_size = segments[s].bufsz; + xen_segs[s].dest_maddr = (uint64_t)segments[s].mem; + xen_segs[s].dest_size = segments[s].memsz; + } + + /* + * Ensure 0 - 1 MiB is mapped and accessible by the image. + * + * This allows access to the VGA memory and the region + * purgatory copies in the crash case. + */ + set_xen_guest_handle(xen_segs[s].buf.h, HYPERCALL_BUFFER_NULL); + xen_segs[s].buf_size = 0; + xen_segs[s].dest_maddr = 0; + xen_segs[s].dest_size = 1 * 1024 * 1024; + nr_segments++; + + type = (info->kexec_flags & KEXEC_ON_CRASH) ? KEXEC_TYPE_CRASH + : KEXEC_TYPE_DEFAULT; + + arch = (info->kexec_flags & KEXEC_ARCH_MASK) >> 16; +#if defined(__i386__) || defined(__x86_64__) + if (!arch) + arch = EM_386; +#endif + + ret = xc_kexec_load(xch, type, arch, (uint64_t)info->entry, + nr_segments, xen_segs); + +out: + xc_hypercall_buffer_array_destroy(xch, array); + free(xen_segs); + xc_interface_close(xch); + + return ret; +} + +int xen_kexec_unload(uint64_t kexec_flags) +{ + xc_interface *xch; + uint8_t type; + int ret; + + xch = xc_interface_open(NULL, NULL, 0); + if (!xch) + return -1; + + type = (kexec_flags & KEXEC_ON_CRASH) ? KEXEC_TYPE_CRASH + : KEXEC_TYPE_DEFAULT; + + ret = xc_kexec_unload(xch, type); + + xc_interface_close(xch); + + return ret; +} + +void xen_kexec_exec(void) +{ + xc_interface *xch; + + xch = xc_interface_open(NULL, NULL, 0); + if (!xch) + return; + + xc_kexec_exec(xch, KEXEC_TYPE_DEFAULT); + + xc_interface_close(xch); +} + +#else /* ! HAVE_LIBXENCTRL */ + +int xen_kexec_load(struct kexec_info *UNUSED(info)) +{ + return -1; +} + +int xen_kexec_unload(uint64_t kexec_flags) +{ + return -1; +} + +void xen_kexec_exec(void) +{ +} + +#endif diff --git a/kexec/kexec.c b/kexec/kexec.c index 2ce570f..a5e78c6 100644 --- a/kexec/kexec.c +++ b/kexec/kexec.c @@ -51,8 +51,20 @@ unsigned long long mem_min = 0; unsigned long long mem_max = ULONG_MAX; static unsigned long kexec_flags = 0; +/* Flags for kexec file (fd) based syscall */ +static unsigned long kexec_file_flags = 0; int kexec_debug = 0; +void dbgprint_mem_range(const char *prefix, struct memory_range *mr, int nr_mr) +{ + int i; + dbgprintf("%s\n", prefix); + for (i = 0; i < nr_mr; i++) { + dbgprintf("%016llx-%016llx (%d)\n", mr[i].start, + mr[i].end, mr[i].type); + } +} + void die(const char *fmt, ...) { va_list args; @@ -275,7 +287,7 @@ unsigned long locate_hole(struct kexec_info *info, hole_base = start; break; } else { - hole_base = _ALIGN_DOWN(end - hole_size, + hole_base = _ALIGN_DOWN(end - hole_size + 1, hole_align); } } @@ -469,14 +481,46 @@ static int add_backup_segments(struct kexec_info *info, return 0; } +static char *slurp_fd(int fd, const char *filename, off_t size, off_t *nread) +{ + char *buf; + off_t progress; + ssize_t result; + + buf = xmalloc(size); + progress = 0; + while (progress < size) { + result = read(fd, buf + progress, size - progress); + if (result < 0) { + if ((errno == EINTR) || (errno == EAGAIN)) + continue; + fprintf(stderr, "Read on %s failed: %s\n", filename, + strerror(errno)); + free(buf); + close(fd); + return NULL; + } + if (result == 0) + /* EOF */ + break; + progress += result; + } + result = close(fd); + if (result < 0) + die("Close of %s failed: %s\n", filename, strerror(errno)); + + if (nread) + *nread = progress; + return buf; +} + char *slurp_file(const char *filename, off_t *r_size) { int fd; char *buf; - off_t size, progress, err; + off_t size, err, nread; ssize_t result; struct stat stats; - if (!filename) { *r_size = 0; @@ -512,37 +556,23 @@ char *slurp_file(const char *filename, off_t *r_size) size = stats.st_size; } + buf = slurp_fd(fd, filename, size, &nread); + if (!buf) + die("Cannot read %s", filename); + + if (nread != size) + die("Read on %s ended before stat said it should\n", filename); + *r_size = size; - buf = xmalloc(size); - progress = 0; - while(progress < size) { - result = read(fd, buf + progress, size - progress); - if (result < 0) { - if ((errno == EINTR) || (errno == EAGAIN)) - continue; - die("read on %s of %ld bytes failed: %s\n", filename, - (size - progress)+ 0UL, strerror(errno)); - } - if (result == 0) - die("read on %s ended before stat said it should\n", filename); - progress += result; - } - result = close(fd); - if (result < 0) { - die("Close of %s failed: %s\n", filename, strerror(errno)); - } return buf; } /* This functions reads either specified number of bytes from the file or lesser if EOF is met. */ -char *slurp_file_len(const char *filename, off_t size) +char *slurp_file_len(const char *filename, off_t size, off_t *nread) { int fd; - char *buf; - off_t progress; - ssize_t result; if (!filename) return 0; @@ -552,30 +582,8 @@ char *slurp_file_len(const char *filename, off_t size) strerror(errno)); return 0; } - buf = xmalloc(size); - progress = 0; - while(progress < size) { - result = read(fd, buf + progress, size - progress); - if (result < 0) { - if ((errno == EINTR) || (errno == EAGAIN)) - continue; - fprintf(stderr, "read on %s of %ld bytes failed: %s\n", - filename, (size - progress)+ 0UL, - strerror(errno)); - free(buf); - return 0; - } - if (result == 0) - /* EOF */ - break; - progress += result; - } - result = close(fd); - if (result < 0) { - die("Close of %s failed: %s\n", - filename, strerror(errno)); - } - return buf; + + return slurp_fd(fd, filename, size, nread); } char *slurp_decompress_file(const char *filename, off_t *r_size) @@ -695,7 +703,7 @@ static int my_load(const char *type, int fileind, int argc, char **argv, } if (!type || guess_only) { for (i = 0; i < file_types; i++) { - if (file_type[i].probe(kernel_buf, kernel_size) >= 0) + if (file_type[i].probe(kernel_buf, kernel_size) == 0) break; } if (i == file_types) { @@ -764,8 +772,12 @@ static int my_load(const char *type, int fileind, int argc, char **argv, if (kexec_debug) print_segments(stderr, &info); - result = kexec_load( - info.entry, info.nr_segments, info.segment, info.kexec_flags); + if (xen_present()) + result = xen_kexec_load(&info); + else + result = kexec_load(info.entry, + info.nr_segments, info.segment, + info.kexec_flags); if (result != 0) { /* The load failed, print some debugging information */ fprintf(stderr, "kexec_load failed: %s\n", @@ -777,6 +789,19 @@ static int my_load(const char *type, int fileind, int argc, char **argv, return result; } +static int kexec_file_unload(unsigned long kexec_file_flags) +{ + int ret = 0; + + ret = kexec_file_load(-1, -1, 0, NULL, kexec_file_flags); + if (ret != 0) { + /* The unload failed, print some debugging information */ + fprintf(stderr, "kexec_file_load(unload) failed\n: %s\n", + strerror(errno)); + } + return ret; +} + static int k_unload (unsigned long kexec_flags) { int result; @@ -789,10 +814,13 @@ static int k_unload (unsigned long kexec_flags) } kexec_flags |= native_arch; - result = kexec_load(NULL, 0, NULL, kexec_flags); + if (xen_present()) + result = xen_kexec_unload(kexec_flags); + else + result = kexec_load(NULL, 0, NULL, kexec_flags); if (result != 0) { /* The unload failed, print some debugging information */ - fprintf(stderr, "kexec_load (0 segments) failed: %s\n", + fprintf(stderr, "kexec unload failed: %s\n", strerror(errno)); } return result; @@ -823,7 +851,10 @@ static int my_shutdown(void) */ static int my_exec(void) { - reboot(LINUX_REBOOT_CMD_KEXEC); + if (xen_present()) + xen_kexec_exec(); + else + reboot(LINUX_REBOOT_CMD_KEXEC); /* I have failed if I make it here */ fprintf(stderr, "kexec failed: %s\n", strerror(errno)); @@ -909,6 +940,7 @@ void usage(void) " (0 means it's not jump back or\n" " preserve context)\n" " to original kernel.\n" + " -s, --kexec-file-syscall Use file based syscall for kexec operation\n" " -d, --debug Enable debugging to help spot a failure.\n" "\n" "Supported kernel file types and options: \n"); @@ -928,6 +960,10 @@ static int kexec_loaded(void) char *p; char line[3]; + /* No way to tell if an image is loaded under Xen, assume it is. */ + if (xen_present()) + return 1; + fp = fopen("/sys/kernel/kexec_loaded", "r"); if (fp == NULL) return -1; @@ -1052,6 +1088,82 @@ char *concat_cmdline(const char *base, const char *append) return cmdline; } +/* New file based kexec system call related code */ +static int do_kexec_file_load(int fileind, int argc, char **argv, + unsigned long flags) { + + char *kernel; + int kernel_fd, i; + struct kexec_info info; + int ret = 0; + char *kernel_buf; + off_t kernel_size; + + memset(&info, 0, sizeof(info)); + info.segment = NULL; + info.nr_segments = 0; + info.entry = NULL; + info.backup_start = 0; + info.kexec_flags = flags; + + info.file_mode = 1; + info.initrd_fd = -1; + + if (!is_kexec_file_load_implemented()) { + fprintf(stderr, "syscall kexec_file_load not available.\n"); + return -1; + } + + if (argc - fileind <= 0) { + fprintf(stderr, "No kernel specified\n"); + usage(); + return -1; + } + + kernel = argv[fileind]; + + kernel_fd = open(kernel, O_RDONLY); + if (kernel_fd == -1) { + fprintf(stderr, "Failed to open file %s:%s\n", kernel, + strerror(errno)); + return -1; + } + + /* slurp in the input kernel */ + kernel_buf = slurp_decompress_file(kernel, &kernel_size); + + for (i = 0; i < file_types; i++) { + if (file_type[i].probe(kernel_buf, kernel_size) >= 0) + break; + } + + if (i == file_types) { + fprintf(stderr, "Cannot determine the file type " "of %s\n", + kernel); + return -1; + } + + ret = file_type[i].load(argc, argv, kernel_buf, kernel_size, &info); + if (ret < 0) { + fprintf(stderr, "Cannot load %s\n", kernel); + return ret; + } + + /* + * If there is no initramfs, set KEXEC_FILE_NO_INITRAMFS flag so that + * kernel does not return error with negative initrd_fd. + */ + if (info.initrd_fd == -1) + info.kexec_flags |= KEXEC_FILE_NO_INITRAMFS; + + ret = kexec_file_load(kernel_fd, info.initrd_fd, info.command_line_len, + info.command_line, info.kexec_flags); + if (ret != 0) + fprintf(stderr, "kexec_file_load failed: %s\n", + strerror(errno)); + return ret; +} + int main(int argc, char *argv[]) { @@ -1063,6 +1175,7 @@ int main(int argc, char *argv[]) int do_ifdown = 0; int do_unload = 0; int do_reuse_initrd = 0; + int do_kexec_file_syscall = 0; void *entry = 0; char *type = 0; char *endptr; @@ -1075,10 +1188,29 @@ int main(int argc, char *argv[]) }; static const char short_options[] = KEXEC_ALL_OPT_STR; + /* + * First check if --use-kexec-file-syscall is set. That changes lot of + * things + */ + while ((opt = getopt_long(argc, argv, short_options, + options, 0)) != -1) { + switch(opt) { + case OPT_KEXEC_FILE_SYSCALL: + do_kexec_file_syscall = 1; + break; + } + } + + /* Reset getopt for the next pass. */ + opterr = 1; + optind = 1; + while ((opt = getopt_long(argc, argv, short_options, options, 0)) != -1) { switch(opt) { case '?': + usage(); + return 1; case OPT_HELP: usage(); return 0; @@ -1107,6 +1239,8 @@ int main(int argc, char *argv[]) do_shutdown = 0; do_sync = 0; do_unload = 1; + if (do_kexec_file_syscall) + kexec_file_flags |= KEXEC_FILE_UNLOAD; break; case OPT_EXEC: do_load = 0; @@ -1149,7 +1283,10 @@ int main(int argc, char *argv[]) do_exec = 0; do_shutdown = 0; do_sync = 0; - kexec_flags = KEXEC_ON_CRASH; + if (do_kexec_file_syscall) + kexec_file_flags |= KEXEC_FILE_ON_CRASH; + else + kexec_flags = KEXEC_ON_CRASH; break; case OPT_MEM_MIN: mem_min = strtoul(optarg, &endptr, 0); @@ -1174,6 +1311,9 @@ int main(int argc, char *argv[]) case OPT_REUSE_INITRD: do_reuse_initrd = 1; break; + case OPT_KEXEC_FILE_SYSCALL: + /* We already parsed it. Nothing to do. */ + break; default: break; } @@ -1218,10 +1358,18 @@ int main(int argc, char *argv[]) } if (do_unload) { - result = k_unload(kexec_flags); + if (do_kexec_file_syscall) + result = kexec_file_unload(kexec_file_flags); + else + result = k_unload(kexec_flags); } if (do_load && (result == 0)) { - result = my_load(type, fileind, argc, argv, kexec_flags, entry); + if (do_kexec_file_syscall) + result = do_kexec_file_load(fileind, argc, argv, + kexec_file_flags); + else + result = my_load(type, fileind, argc, argv, + kexec_flags, entry); } /* Don't shutdown unless there is something to reboot to! */ if ((result == 0) && (do_shutdown || do_exec) && !kexec_loaded()) { diff --git a/kexec/kexec.h b/kexec/kexec.h index 2904e03..4be2b2f 100644 --- a/kexec/kexec.h +++ b/kexec/kexec.h @@ -148,12 +148,21 @@ struct kexec_info { int nr_segments; struct memory_range *memory_range; int memory_ranges; + struct memory_range *crash_range; + int nr_crash_ranges; void *entry; struct mem_ehdr rhdr; unsigned long backup_start; unsigned long kexec_flags; unsigned long backup_src_start; unsigned long backup_src_size; + /* Set to 1 if we are using kexec file syscall */ + unsigned long file_mode :1; + + /* Filled by kernel image processing code */ + int initrd_fd; + char *command_line; + int command_line_len; }; struct arch_map_entry { @@ -205,6 +214,7 @@ extern int file_types; #define OPT_UNLOAD 'u' #define OPT_TYPE 't' #define OPT_PANIC 'p' +#define OPT_KEXEC_FILE_SYSCALL 's' #define OPT_MEM_MIN 256 #define OPT_MEM_MAX 257 #define OPT_REUSE_INITRD 258 @@ -228,16 +238,18 @@ extern int file_types; { "mem-min", 1, 0, OPT_MEM_MIN }, \ { "mem-max", 1, 0, OPT_MEM_MAX }, \ { "reuseinitrd", 0, 0, OPT_REUSE_INITRD }, \ + { "kexec-file-syscall", 0, 0, OPT_KEXEC_FILE_SYSCALL }, \ { "debug", 0, 0, OPT_DEBUG }, \ -#define KEXEC_OPT_STR "h?vdfxluet:p" +#define KEXEC_OPT_STR "h?vdfxluet:ps" +extern void dbgprint_mem_range(const char *prefix, struct memory_range *mr, int nr_mr); extern void die(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); extern void *xmalloc(size_t size); extern void *xrealloc(void *ptr, size_t size); extern char *slurp_file(const char *filename, off_t *r_size); -extern char *slurp_file_len(const char *filename, off_t size); +extern char *slurp_file_len(const char *filename, off_t size, off_t *nread); extern char *slurp_decompress_file(const char *filename, off_t *r_size); extern unsigned long virt_to_phys(unsigned long addr); extern void add_segment(struct kexec_info *info, @@ -279,8 +291,8 @@ int kexec_iomem_for_each_line(char *match, int (*callback)(void *data, int nr, char *str, - unsigned long base, - unsigned long length), + unsigned long long base, + unsigned long long length), void *data); int parse_iomem_single(char *str, uint64_t *start, uint64_t *end); const char * proc_iomem(void); @@ -289,4 +301,9 @@ const char * proc_iomem(void); char *concat_cmdline(const char *base, const char *append); +int xen_present(void); +int xen_kexec_load(struct kexec_info *info); +int xen_kexec_unload(uint64_t kexec_flags); +void xen_kexec_exec(void); + #endif /* KEXEC_H */ diff --git a/kexec/libfdt/libfdt.h b/kexec/libfdt/libfdt.h index ce80e4f..87a24ab 100644 --- a/kexec/libfdt/libfdt.h +++ b/kexec/libfdt/libfdt.h @@ -1073,4 +1073,28 @@ int fdt_del_node(void *fdt, int nodeoffset); const char *fdt_strerror(int errval); +#define FDT_ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1)) +#define FDT_TAGALIGN(x) (FDT_ALIGN((x), FDT_TAGSIZE)) + +/* + * if add a new subnode: + * see: fdt_add_subnode -> fdt_add_subnode_namelen + */ +static inline int fdt_node_len(const char* node_name) +{ + return sizeof(struct fdt_node_header) + + FDT_TAGALIGN(strlen(node_name) + 1) + FDT_TAGSIZE; +} + +/* + * if add a new prop: (assume prop_name not exist in strtab) + * see: fdt_setprop -> _fdt_add_property + */ +static inline int fdt_prop_len(const char* prop_name, int len) +{ + return (strlen(prop_name) + 1) + + sizeof(struct fdt_property) + + FDT_TAGALIGN(len); +} + #endif /* _LIBFDT_H */ diff --git a/kexec/libfdt/libfdt_internal.h b/kexec/libfdt/libfdt_internal.h index 46eb93e..7e6c4c8 100644 --- a/kexec/libfdt/libfdt_internal.h +++ b/kexec/libfdt/libfdt_internal.h @@ -52,9 +52,6 @@ */ #include <fdt.h> -#define FDT_ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1)) -#define FDT_TAGALIGN(x) (FDT_ALIGN((x), FDT_TAGSIZE)) - #define FDT_CHECK_HEADER(fdt) \ { \ int err; \ diff --git a/purgatory/Makefile b/purgatory/Makefile index 1945702..0c85da6 100644 --- a/purgatory/Makefile +++ b/purgatory/Makefile @@ -18,6 +18,7 @@ dist += purgatory/Makefile $(PURGATORY_SRCS) \ include $(srcdir)/purgatory/arch/alpha/Makefile include $(srcdir)/purgatory/arch/arm/Makefile +include $(srcdir)/purgatory/arch/arm64/Makefile include $(srcdir)/purgatory/arch/i386/Makefile include $(srcdir)/purgatory/arch/ia64/Makefile include $(srcdir)/purgatory/arch/mips/Makefile diff --git a/purgatory/arch/arm64/Makefile b/purgatory/arch/arm64/Makefile new file mode 100644 index 0000000..4749715 --- /dev/null +++ b/purgatory/arch/arm64/Makefile @@ -0,0 +1,18 @@ + +arm64_PURGATORY_EXTRA_CFLAGS = \ + -mcmodel=large \ + -Wl,-Map=purgatory/purgatory.map \ + -fno-stack-protector \ + -Wundef \ + -Werror-implicit-function-declaration \ + -Wdeclaration-after-statement \ + -Werror=implicit-int \ + -Werror=strict-prototypes + +arm64_PURGATORY_SRCS += \ + purgatory/arch/arm64/entry.S \ + purgatory/arch/arm64/purgatory-arm64.c + +dist += \ + $(arm64_PURGATORY_SRCS) \ + purgatory/arch/arm64/Makefile diff --git a/purgatory/arch/arm64/entry.S b/purgatory/arch/arm64/entry.S new file mode 100644 index 0000000..140e91d --- /dev/null +++ b/purgatory/arch/arm64/entry.S @@ -0,0 +1,54 @@ +/* + * ARM64 purgatory. + */ + +.macro debug_brk + mov x0, #0x18; /* angel_SWIreason_ReportException */ + mov x1, #0x20000; + add x1, x1, #0x20; /* ADP_Stopped_BreakPoint */ + hlt #0xf000 /* A64 semihosting */ +.endm + +.macro size, sym:req + .size \sym, . - \sym +.endm + +.text + +.globl purgatory_start +purgatory_start: + + adr x19, .Lstack + mov sp, x19 + + bl purgatory + +1: debug_brk + b 1b + +size purgatory_start + +.align 4 + .rept 256 + .quad 0 + .endr +.Lstack: + +.data + +.align 3 + +.globl arm64_sink +arm64_sink: + .quad 0 +size arm64_sink + +.globl arm64_kernel_entry +arm64_kernel_entry: + .quad 0 +size arm64_kernel_entry + +.globl arm64_dtb_addr +arm64_dtb_addr: + .quad 0 +size arm64_dtb_addr diff --git a/purgatory/arch/arm64/purgatory-arm64.c b/purgatory/arch/arm64/purgatory-arm64.c new file mode 100644 index 0000000..25960c3 --- /dev/null +++ b/purgatory/arch/arm64/purgatory-arm64.c @@ -0,0 +1,35 @@ +/* + * ARM64 purgatory. + */ + +#include <stdint.h> +#include <purgatory.h> + +/* Symbols set by kexec. */ + +extern uint32_t *arm64_sink; +extern void (*arm64_kernel_entry)(uint64_t); +extern uint64_t arm64_dtb_addr; + +void putchar(int ch) +{ + if (!arm64_sink) + return; + + *arm64_sink = ch; + + if (ch == '\n') + *arm64_sink = '\r'; +} + +void setup_arch(void) +{ + printf("purgatory: kernel_entry: %lx\n", + (unsigned long)arm64_kernel_entry); + printf("purgatory: dtb: %lx\n", arm64_dtb_addr); +} + +void post_verification_setup_arch(void) +{ + arm64_kernel_entry(arm64_dtb_addr); +} diff --git a/purgatory/arch/ppc64/Makefile b/purgatory/arch/ppc64/Makefile index 67f41ae..6c58fa2 100644 --- a/purgatory/arch/ppc64/Makefile +++ b/purgatory/arch/ppc64/Makefile @@ -9,10 +9,20 @@ ppc64_PURGATORY_SRCS += purgatory/arch/ppc64/console-ppc64.c ppc64_PURGATORY_SRCS += purgatory/arch/ppc64/crashdump_backup.c ppc64_PURGATORY_SRCS += purgatory/arch/ppc64/misc.S -ppc64_PURGATORY_EXTRA_CFLAGS += -m64 -mcall-aixdesc -msoft-float -ppc64_PURGATORY_EXTRA_ASFLAGS += -m64 -mcall-aixdesc -ppc64_PURGATORY_EXTRA_LDFLAGS += -melf64ppc +ppc64_PURGATORY_EXTRA_CFLAGS += -m64 -msoft-float -fno-stack-protector \ + -fno-exceptions +ppc64_PURGATORY_EXTRA_ASFLAGS += -m64 +ifeq ($(SUBARCH),BE) + ppc64_PURGATORY_EXTRA_LDFLAGS += -melf64ppc + ppc64_PURGATORY_EXTRA_CFLAGS += -mcall-aixdesc + ppc64_PURGATORY_EXTRA_ASFLAGS += -mcall-aixdesc +else + ppc64_PURGATORY_EXTRA_LDFLAGS += -melf64lppc + ppc64_PURGATORY_EXTRA_CFLAGS += -mlittle-endian + ppc64_PURGATORY_EXTRA_ASFLAGS += -mlittle-endian +endif dist += purgatory/arch/ppc64/Makefile $(ppc64_PURGATORY_SRCS) \ purgatory/arch/ppc64/hvCall.h \ + purgatory/arch/ppc64/ppc64_asm.h \ purgatory/arch/ppc64/purgatory-ppc64.h diff --git a/purgatory/arch/ppc64/console-ppc64.c b/purgatory/arch/ppc64/console-ppc64.c index 78a233b..b80183c 100644 --- a/purgatory/arch/ppc64/console-ppc64.c +++ b/purgatory/arch/ppc64/console-ppc64.c @@ -21,6 +21,9 @@ #include <purgatory.h> #include "hvCall.h" +#include <byteswap.h> +#include <endian.h> +#include <asm/byteorder.h> extern int debug; @@ -36,6 +39,7 @@ void putchar(int c) putchar('\r'); buff[0] = c; - plpar_hcall_norets(H_PUT_TERM_CHAR, 0, 1, lbuf[0], lbuf[1]); + plpar_hcall_norets(H_PUT_TERM_CHAR, 0, 1, + __cpu_to_be64(lbuf[0]), __cpu_to_be64(lbuf[1])); return; } diff --git a/purgatory/arch/ppc64/hvCall.S b/purgatory/arch/ppc64/hvCall.S index bdc4cb0..a96c489 100644 --- a/purgatory/arch/ppc64/hvCall.S +++ b/purgatory/arch/ppc64/hvCall.S @@ -9,12 +9,13 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ +#include "ppc64_asm.h" #define HVSC .long 0x44000022 .text .machine ppc64 -.globl .plpar_hcall_norets -.plpar_hcall_norets: +.globl DOTSYM(plpar_hcall_norets) +DOTSYM(plpar_hcall_norets): or 6,6,6 # medium low priority mfcr 0 stw 0,8(1) diff --git a/purgatory/arch/ppc64/ppc64_asm.h b/purgatory/arch/ppc64/ppc64_asm.h new file mode 100644 index 0000000..b8746fd --- /dev/null +++ b/purgatory/arch/ppc64/ppc64_asm.h @@ -0,0 +1,16 @@ +/* + * ppc64_asm.h - common defines for PPC64 assembly parts + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +/* + * ABIv1 requires dot symbol while ABIv2 does not. + */ +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define DOTSYM(a) a +#else +#define GLUE(a,b) a##b +#define DOTSYM(a) GLUE(.,a) +#endif diff --git a/purgatory/arch/ppc64/purgatory-ppc64.c b/purgatory/arch/ppc64/purgatory-ppc64.c index 0b6d326..7248ac8 100644 --- a/purgatory/arch/ppc64/purgatory-ppc64.c +++ b/purgatory/arch/ppc64/purgatory-ppc64.c @@ -29,6 +29,8 @@ unsigned long dt_offset = 0; unsigned long my_toc = 0; unsigned long kernel = 0; unsigned int debug = 0; +unsigned long opal_base = 0; +unsigned long opal_entry = 0; void setup_arch(void) { diff --git a/purgatory/arch/ppc64/v2wrap.S b/purgatory/arch/ppc64/v2wrap.S index 3f78620..dc5034f 100644 --- a/purgatory/arch/ppc64/v2wrap.S +++ b/purgatory/arch/ppc64/v2wrap.S @@ -18,6 +18,8 @@ # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # +#include "ppc64_asm.h" + # v2wrap.S # a wrapper to call purgatory code to backup first # 32kB of first kernel into the backup region @@ -46,6 +48,7 @@ ori rn,rn,name##@l .machine ppc64 + .align 8 .globl purgatory_start purgatory_start: b master .org purgatory_start + 0x5c # ABI: possible run_at_load flag at 0x5c @@ -76,7 +79,7 @@ master: ld 1,0(6) #setup stack subi 1,1,112 - bl .purgatory + bl DOTSYM(purgatory) nop or 3,3,3 # ok now to high priority, lets boot @@ -87,11 +90,25 @@ master: LOADADDR(16, dt_offset) ld 3,0(16) # load device-tree address mr 16,3 # save dt address in reg16 +#ifdef __BIG_ENDIAN__ lwz 6,20(3) # fetch version number +#else + li 4,20 + lwbrx 6,3,4 # fetch BE version number +#endif cmpwi 0,6,2 # v2 ? blt 80f +#ifdef __BIG_ENDIAN__ stw 17,28(3) # save my cpu number as boot_cpu_phys +#else + li 4,28 + stwbrx 17,3,4 # Store my cpu as BE value +#endif 80: + LOADADDR(6,opal_base) # For OPAL early debug + ld 8,0(6) # load the OPAL base address in r8 + LOADADDR(6,opal_entry) # For OPAL early debug + ld 9,0(6) # load the OPAL entry address in r9 LOADADDR(6,kernel) ld 4,0(6) # load the kernel address LOADADDR(6,run_at_load) # the load flag diff --git a/vmcore-dmesg/vmcore-dmesg.c b/vmcore-dmesg/vmcore-dmesg.c index 0f477c0..f47ee11 100644 --- a/vmcore-dmesg/vmcore-dmesg.c +++ b/vmcore-dmesg/vmcore-dmesg.c @@ -16,6 +16,7 @@ #include <elf.h> #include <stdbool.h> #include <inttypes.h> +#include <ctype.h> /* The 32bit and 64bit note headers make it clear we don't care */ typedef Elf32_Nhdr Elf_Nhdr; @@ -34,10 +35,10 @@ static loff_t logged_chars_vaddr; static loff_t log_first_idx_vaddr; static loff_t log_next_idx_vaddr; -/* struct log size */ +/* struct printk_log (or older log) size */ static uint64_t log_sz; -/* struct log field offsets */ +/* struct printk_log (or older log) field offsets */ static uint64_t log_offset_ts_nsec = UINT64_MAX; static uint16_t log_offset_len = UINT16_MAX; static uint16_t log_offset_text_len = UINT16_MAX; @@ -254,6 +255,7 @@ static void scan_vmcoreinfo(char *start, size_t size) char *pos, *eol; char temp_buf[1024]; bool last_line = false; + char *str; #define SYMBOL(sym) { \ .str = "SYMBOL(" #sym ")=", \ @@ -324,19 +326,41 @@ static void scan_vmcoreinfo(char *start, size_t size) *symbol[i].vaddr = vaddr; } - /* Check for "SIZE(log)=" */ - if (memcmp("SIZE(log)=", pos, 10) == 0) - log_sz = strtoull(pos + 10, NULL, 10); - - /* Check for struct log field offsets */ - if (memcmp("OFFSET(log.ts_nsec)=", pos, 20) == 0) - log_offset_ts_nsec = strtoull(pos + 20, NULL, 10); - - if (memcmp("OFFSET(log.len)=", pos, 16) == 0) - log_offset_len = strtoul(pos + 16, NULL, 10); - - if (memcmp("OFFSET(log.text_len)=", pos, 21) == 0) - log_offset_text_len = strtoul(pos + 21, NULL, 10); + /* Check for "SIZE(printk_log)" or older "SIZE(log)=" */ + str = "SIZE(log)="; + if (memcmp(str, pos, strlen(str)) == 0) + log_sz = strtoull(pos + strlen(str), NULL, 10); + + str = "SIZE(printk_log)="; + if (memcmp(str, pos, strlen(str)) == 0) + log_sz = strtoull(pos + strlen(str), NULL, 10); + + /* Check for struct printk_log (or older log) field offsets */ + str = "OFFSET(log.ts_nsec)="; + if (memcmp(str, pos, strlen(str)) == 0) + log_offset_ts_nsec = strtoull(pos + strlen(str), NULL, + 10); + str = "OFFSET(printk_log.ts_nsec)="; + if (memcmp(str, pos, strlen(str)) == 0) + log_offset_ts_nsec = strtoull(pos + strlen(str), NULL, + 10); + + str = "OFFSET(log.len)="; + if (memcmp(str, pos, strlen(str)) == 0) + log_offset_len = strtoul(pos + strlen(str), NULL, 10); + + str = "OFFSET(printk_log.len)="; + if (memcmp(str, pos, strlen(str)) == 0) + log_offset_len = strtoul(pos + strlen(str), NULL, 10); + + str = "OFFSET(log.text_len)="; + if (memcmp(str, pos, strlen(str)) == 0) + log_offset_text_len = strtoul(pos + strlen(str), NULL, + 10); + str = "OFFSET(printk_log.text_len)="; + if (memcmp(str, pos, strlen(str)) == 0) + log_offset_text_len = strtoul(pos + strlen(str), NULL, + 10); if (last_line) break; @@ -529,7 +553,7 @@ static inline uint32_t struct_val_u32(char *ptr, unsigned int offset) return(file32_to_cpu(*(uint32_t *)(ptr + offset))); } -static inline uint32_t struct_val_u64(char *ptr, unsigned int offset) +static inline uint64_t struct_val_u64(char *ptr, unsigned int offset) { return(file64_to_cpu(*(uint64_t *)(ptr + offset))); } @@ -668,12 +692,12 @@ static void dump_dmesg_structured(int fd) for (i = 0; i < text_len; i++) { unsigned char c = log_text(msg)[i]; - if (c < ' ' || c >= 128) + if (!isprint(c) && !isspace(c)) len += sprintf(out_buf + len, "\\x%02x", c); else out_buf[len++] = c; - if (len >= OUT_BUF_SIZE - 16) { + if (len >= OUT_BUF_SIZE - 64) { write_to_stdout(out_buf, len); len = 0; } |