diff options
Diffstat (limited to 'arch/x86/kernel/e820.c')
-rw-r--r-- | arch/x86/kernel/e820.c | 1054 |
1 files changed, 529 insertions, 525 deletions
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index b2bbad6ebe4d..6e9b26fa6d05 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1,49 +1,55 @@ /* - * Handle the memory map. - * The functions here do the job until bootmem takes over. + * Low level x86 E820 memory map handling functions. * - * Getting sanitize_e820_map() in sync with i386 version by applying change: - * - Provisions for empty E820 memory regions (reported by certain BIOSes). - * Alex Achenbach <xela@slit.de>, December 2002. - * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> + * The firmware and bootloader passes us the "E820 table", which is the primary + * physical memory layout description available about x86 systems. * + * The kernel takes the E820 memory layout and optionally modifies it with + * quirks and other tweaks, and feeds that into the generic Linux memory + * allocation code routines via a platform independent interface (memblock, etc.). */ -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/init.h> #include <linux/crash_dump.h> -#include <linux/export.h> #include <linux/bootmem.h> -#include <linux/pfn.h> #include <linux/suspend.h> #include <linux/acpi.h> #include <linux/firmware-map.h> #include <linux/memblock.h> #include <linux/sort.h> -#include <asm/e820.h> -#include <asm/proto.h> +#include <asm/e820/api.h> #include <asm/setup.h> -#include <asm/cpufeature.h> /* - * The e820 map is the map that gets modified e.g. with command line parameters - * and that is also registered with modifications in the kernel resource tree - * with the iomem_resource as parent. + * We organize the E820 table into two main data structures: * - * The e820_saved is directly saved after the BIOS-provided memory map is - * copied. It doesn't get modified afterwards. It's registered for the - * /sys/firmware/memmap interface. + * - 'e820_table_firmware': the original firmware version passed to us by the + * bootloader - not modified by the kernel. We use this to: * - * That memory map is not modified and is used as base for kexec. The kexec'd - * kernel should get the same memory map as the firmware provides. Then the - * user can e.g. boot the original kernel with mem=1G while still booting the - * next kernel with full memory. + * - inform the user about the firmware's notion of memory layout + * via /sys/firmware/memmap + * + * - the hibernation code uses it to generate a kernel-independent MD5 + * fingerprint of the physical memory layout of a system. + * + * - kexec, which is a bootloader in disguise, uses the original E820 + * layout to pass to the kexec-ed kernel. This way the original kernel + * can have a restricted E820 map while the kexec()-ed kexec-kernel + * can have access to full memory - etc. + * + * - 'e820_table': this is the main E820 table that is massaged by the + * low level x86 platform code, or modified by boot parameters, before + * passed on to higher level MM layers. + * + * Once the E820 map has been converted to the standard Linux memory layout + * information its role stops - modifying it has no effect and does not get + * re-propagated. So itsmain role is a temporary bootstrap storage of firmware + * specific memory layout data during early bootup. */ -static struct e820map initial_e820 __initdata; -static struct e820map initial_e820_saved __initdata; -struct e820map *e820 __refdata = &initial_e820; -struct e820map *e820_saved __refdata = &initial_e820_saved; +static struct e820_table e820_table_init __initdata; +static struct e820_table e820_table_firmware_init __initdata; + +struct e820_table *e820_table __refdata = &e820_table_init; +struct e820_table *e820_table_firmware __refdata = &e820_table_firmware_init; /* For PCI or other memory-mapped resources */ unsigned long pci_mem_start = 0xaeedbabe; @@ -55,51 +61,53 @@ EXPORT_SYMBOL(pci_mem_start); * This function checks if any part of the range <start,end> is mapped * with type. */ -int -e820_any_mapped(u64 start, u64 end, unsigned type) +bool e820__mapped_any(u64 start, u64 end, enum e820_type type) { int i; - for (i = 0; i < e820->nr_map; i++) { - struct e820entry *ei = &e820->map[i]; + for (i = 0; i < e820_table->nr_entries; i++) { + struct e820_entry *entry = &e820_table->entries[i]; - if (type && ei->type != type) + if (type && entry->type != type) continue; - if (ei->addr >= end || ei->addr + ei->size <= start) + if (entry->addr >= end || entry->addr + entry->size <= start) continue; return 1; } return 0; } -EXPORT_SYMBOL_GPL(e820_any_mapped); +EXPORT_SYMBOL_GPL(e820__mapped_any); /* - * This function checks if the entire range <start,end> is mapped with type. + * This function checks if the entire <start,end> range is mapped with 'type'. * - * Note: this function only works correct if the e820 table is sorted and - * not-overlapping, which is the case + * Note: this function only works correctly once the E820 table is sorted and + * not-overlapping (at least for the range specified), which is the case normally. */ -int __init e820_all_mapped(u64 start, u64 end, unsigned type) +bool __init e820__mapped_all(u64 start, u64 end, enum e820_type type) { int i; - for (i = 0; i < e820->nr_map; i++) { - struct e820entry *ei = &e820->map[i]; + for (i = 0; i < e820_table->nr_entries; i++) { + struct e820_entry *entry = &e820_table->entries[i]; - if (type && ei->type != type) + if (type && entry->type != type) continue; - /* is the region (part) in overlap with the current region ?*/ - if (ei->addr >= end || ei->addr + ei->size <= start) + + /* Is the region (part) in overlap with the current region? */ + if (entry->addr >= end || entry->addr + entry->size <= start) continue; - /* if the region is at the beginning of <start,end> we move - * start to the end of the region since it's ok until there + /* + * If the region is at the beginning of <start,end> we move + * 'start' to the end of the region since it's ok until there */ - if (ei->addr <= start) - start = ei->addr + ei->size; + if (entry->addr <= start) + start = entry->addr + entry->size; + /* - * if start is now at or beyond end, we're done, full - * coverage + * If 'start' is now at or beyond 'end', we're done, full + * coverage of the desired range exists: */ if (start >= end) return 1; @@ -108,94 +116,77 @@ int __init e820_all_mapped(u64 start, u64 end, unsigned type) } /* - * Add a memory region to the kernel e820 map. + * Add a memory region to the kernel E820 map. */ -static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size, - int type) +static void __init __e820__range_add(struct e820_table *table, u64 start, u64 size, enum e820_type type) { - int x = e820x->nr_map; + int x = table->nr_entries; - if (x >= ARRAY_SIZE(e820x->map)) { - printk(KERN_ERR "e820: too many entries; ignoring [mem %#010llx-%#010llx]\n", - (unsigned long long) start, - (unsigned long long) (start + size - 1)); + if (x >= ARRAY_SIZE(table->entries)) { + pr_err("e820: too many entries; ignoring [mem %#010llx-%#010llx]\n", start, start + size - 1); return; } - e820x->map[x].addr = start; - e820x->map[x].size = size; - e820x->map[x].type = type; - e820x->nr_map++; + table->entries[x].addr = start; + table->entries[x].size = size; + table->entries[x].type = type; + table->nr_entries++; } -void __init e820_add_region(u64 start, u64 size, int type) +void __init e820__range_add(u64 start, u64 size, enum e820_type type) { - __e820_add_region(e820, start, size, type); + __e820__range_add(e820_table, start, size, type); } -static void __init e820_print_type(u32 type) +static void __init e820_print_type(enum e820_type type) { switch (type) { - case E820_RAM: - case E820_RESERVED_KERN: - printk(KERN_CONT "usable"); - break; - case E820_RESERVED: - printk(KERN_CONT "reserved"); - break; - case E820_ACPI: - printk(KERN_CONT "ACPI data"); - break; - case E820_NVS: - printk(KERN_CONT "ACPI NVS"); - break; - case E820_UNUSABLE: - printk(KERN_CONT "unusable"); - break; - case E820_PMEM: - case E820_PRAM: - printk(KERN_CONT "persistent (type %u)", type); - break; - default: - printk(KERN_CONT "type %u", type); - break; + case E820_TYPE_RAM: /* Fall through: */ + case E820_TYPE_RESERVED_KERN: pr_cont("usable"); break; + case E820_TYPE_RESERVED: pr_cont("reserved"); break; + case E820_TYPE_ACPI: pr_cont("ACPI data"); break; + case E820_TYPE_NVS: pr_cont("ACPI NVS"); break; + case E820_TYPE_UNUSABLE: pr_cont("unusable"); break; + case E820_TYPE_PMEM: /* Fall through: */ + case E820_TYPE_PRAM: pr_cont("persistent (type %u)", type); break; + default: pr_cont("type %u", type); break; } } -void __init e820_print_map(char *who) +void __init e820__print_table(char *who) { int i; - for (i = 0; i < e820->nr_map; i++) { - printk(KERN_INFO "%s: [mem %#018Lx-%#018Lx] ", who, - (unsigned long long) e820->map[i].addr, - (unsigned long long) - (e820->map[i].addr + e820->map[i].size - 1)); - e820_print_type(e820->map[i].type); - printk(KERN_CONT "\n"); + for (i = 0; i < e820_table->nr_entries; i++) { + pr_info("%s: [mem %#018Lx-%#018Lx] ", who, + e820_table->entries[i].addr, + e820_table->entries[i].addr + e820_table->entries[i].size - 1); + + e820_print_type(e820_table->entries[i].type); + pr_cont("\n"); } } /* - * Sanitize the BIOS e820 map. + * Sanitize an E820 map. * - * Some e820 responses include overlapping entries. The following - * replaces the original e820 map with a new one, removing overlaps, + * Some E820 layouts include overlapping entries. The following + * replaces the original E820 map with a new one, removing overlaps, * and resolving conflicting memory types in favor of highest * numbered type. * - * The input parameter biosmap points to an array of 'struct - * e820entry' which on entry has elements in the range [0, *pnr_map) - * valid, and which has space for up to max_nr_map entries. - * On return, the resulting sanitized e820 map entries will be in - * overwritten in the same location, starting at biosmap. + * The input parameter 'entries' points to an array of 'struct + * e820_entry' which on entry has elements in the range [0, *nr_entries) + * valid, and which has space for up to max_nr_entries entries. + * On return, the resulting sanitized E820 map entries will be in + * overwritten in the same location, starting at 'entries'. * - * The integer pointed to by pnr_map must be valid on entry (the - * current number of valid entries located at biosmap). If the - * sanitizing succeeds the *pnr_map will be updated with the new - * number of valid entries (something no more than max_nr_map). + * The integer pointed to by nr_entries must be valid on entry (the + * current number of valid entries located at 'entries'). If the + * sanitizing succeeds the *nr_entries will be updated with the new + * number of valid entries (something no more than max_nr_entries). * - * The return value from sanitize_e820_map() is zero if it + * The return value from e820__update_table() is zero if it * successfully 'sanitized' the map entries passed in, and is -1 * if it did nothing, which can happen if either of (1) it was * only passed one map entry, or (2) any of the input map entries @@ -238,10 +229,17 @@ void __init e820_print_map(char *who) * ______________________4_ */ struct change_member { - struct e820entry *pbios; /* pointer to original bios entry */ - unsigned long long addr; /* address for this change point */ + /* Pointer to the original entry: */ + struct e820_entry *entry; + /* Address for this change point: */ + unsigned long long addr; }; +static struct change_member change_point_list[2*E820_MAX_ENTRIES] __initdata; +static struct change_member *change_point[2*E820_MAX_ENTRIES] __initdata; +static struct e820_entry *overlap_list[E820_MAX_ENTRIES] __initdata; +static struct e820_entry new_entries[E820_MAX_ENTRIES] __initdata; + static int __init cpcompare(const void *a, const void *b) { struct change_member * const *app = a, * const *bpp = b; @@ -249,164 +247,141 @@ static int __init cpcompare(const void *a, const void *b) /* * Inputs are pointers to two elements of change_point[]. If their - * addresses are unequal, their difference dominates. If the addresses + * addresses are not equal, their difference dominates. If the addresses * are equal, then consider one that represents the end of its region * to be greater than one that does not. */ if (ap->addr != bp->addr) return ap->addr > bp->addr ? 1 : -1; - return (ap->addr != ap->pbios->addr) - (bp->addr != bp->pbios->addr); + return (ap->addr != ap->entry->addr) - (bp->addr != bp->entry->addr); } -int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, - u32 *pnr_map) +int __init e820__update_table(struct e820_table *table) { - static struct change_member change_point_list[2*E820_X_MAX] __initdata; - static struct change_member *change_point[2*E820_X_MAX] __initdata; - static struct e820entry *overlap_list[E820_X_MAX] __initdata; - static struct e820entry new_bios[E820_X_MAX] __initdata; - unsigned long current_type, last_type; + struct e820_entry *entries = table->entries; + u32 max_nr_entries = ARRAY_SIZE(table->entries); + enum e820_type current_type, last_type; unsigned long long last_addr; - int chgidx; - int overlap_entries; - int new_bios_entry; - int old_nr, new_nr, chg_nr; - int i; + u32 new_nr_entries, overlap_entries; + u32 i, chg_idx, chg_nr; - /* if there's only one memory region, don't bother */ - if (*pnr_map < 2) + /* If there's only one memory region, don't bother: */ + if (table->nr_entries < 2) return -1; - old_nr = *pnr_map; - BUG_ON(old_nr > max_nr_map); + table->nr_entries = table->nr_entries; + BUG_ON(table->nr_entries > max_nr_entries); - /* bail out if we find any unreasonable addresses in bios map */ - for (i = 0; i < old_nr; i++) - if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) + /* Bail out if we find any unreasonable addresses in the map: */ + for (i = 0; i < table->nr_entries; i++) { + if (entries[i].addr + entries[i].size < entries[i].addr) return -1; + } - /* create pointers for initial change-point information (for sorting) */ - for (i = 0; i < 2 * old_nr; i++) + /* Create pointers for initial change-point information (for sorting): */ + for (i = 0; i < 2 * table->nr_entries; i++) change_point[i] = &change_point_list[i]; - /* record all known change-points (starting and ending addresses), - omitting those that are for empty memory regions */ - chgidx = 0; - for (i = 0; i < old_nr; i++) { - if (biosmap[i].size != 0) { - change_point[chgidx]->addr = biosmap[i].addr; - change_point[chgidx++]->pbios = &biosmap[i]; - change_point[chgidx]->addr = biosmap[i].addr + - biosmap[i].size; - change_point[chgidx++]->pbios = &biosmap[i]; + /* + * Record all known change-points (starting and ending addresses), + * omitting empty memory regions: + */ + chg_idx = 0; + for (i = 0; i < table->nr_entries; i++) { + if (entries[i].size != 0) { + change_point[chg_idx]->addr = entries[i].addr; + change_point[chg_idx++]->entry = &entries[i]; + change_point[chg_idx]->addr = entries[i].addr + entries[i].size; + change_point[chg_idx++]->entry = &entries[i]; } } - chg_nr = chgidx; - - /* sort change-point list by memory addresses (low -> high) */ - sort(change_point, chg_nr, sizeof *change_point, cpcompare, NULL); - - /* create a new bios memory map, removing overlaps */ - overlap_entries = 0; /* number of entries in the overlap table */ - new_bios_entry = 0; /* index for creating new bios map entries */ - last_type = 0; /* start with undefined memory type */ - last_addr = 0; /* start with 0 as last starting address */ - - /* loop through change-points, determining affect on the new bios map */ - for (chgidx = 0; chgidx < chg_nr; chgidx++) { - /* keep track of all overlapping bios entries */ - if (change_point[chgidx]->addr == - change_point[chgidx]->pbios->addr) { - /* - * add map entry to overlap list (> 1 entry - * implies an overlap) - */ - overlap_list[overlap_entries++] = - change_point[chgidx]->pbios; + chg_nr = chg_idx; + + /* Sort change-point list by memory addresses (low -> high): */ + sort(change_point, chg_nr, sizeof(*change_point), cpcompare, NULL); + + /* Create a new memory map, removing overlaps: */ + overlap_entries = 0; /* Number of entries in the overlap table */ + new_nr_entries = 0; /* Index for creating new map entries */ + last_type = 0; /* Start with undefined memory type */ + last_addr = 0; /* Start with 0 as last starting address */ + + /* Loop through change-points, determining effect on the new map: */ + for (chg_idx = 0; chg_idx < chg_nr; chg_idx++) { + /* Keep track of all overlapping entries */ + if (change_point[chg_idx]->addr == change_point[chg_idx]->entry->addr) { + /* Add map entry to overlap list (> 1 entry implies an overlap) */ + overlap_list[overlap_entries++] = change_point[chg_idx]->entry; } else { - /* - * remove entry from list (order independent, - * so swap with last) - */ + /* Remove entry from list (order independent, so swap with last): */ for (i = 0; i < overlap_entries; i++) { - if (overlap_list[i] == - change_point[chgidx]->pbios) - overlap_list[i] = - overlap_list[overlap_entries-1]; + if (overlap_list[i] == change_point[chg_idx]->entry) + overlap_list[i] = overlap_list[overlap_entries-1]; } overlap_entries--; } /* - * if there are overlapping entries, decide which + * If there are overlapping entries, decide which * "type" to use (larger value takes precedence -- * 1=usable, 2,3,4,4+=unusable) */ current_type = 0; - for (i = 0; i < overlap_entries; i++) + for (i = 0; i < overlap_entries; i++) { if (overlap_list[i]->type > current_type) current_type = overlap_list[i]->type; - /* - * continue building up new bios map based on this - * information - */ - if (current_type != last_type || current_type == E820_PRAM) { + } + + /* Continue building up new map based on this information: */ + if (current_type != last_type || current_type == E820_TYPE_PRAM) { if (last_type != 0) { - new_bios[new_bios_entry].size = - change_point[chgidx]->addr - last_addr; - /* - * move forward only if the new size - * was non-zero - */ - if (new_bios[new_bios_entry].size != 0) - /* - * no more space left for new - * bios entries ? - */ - if (++new_bios_entry >= max_nr_map) + new_entries[new_nr_entries].size = change_point[chg_idx]->addr - last_addr; + /* Move forward only if the new size was non-zero: */ + if (new_entries[new_nr_entries].size != 0) + /* No more space left for new entries? */ + if (++new_nr_entries >= max_nr_entries) break; } if (current_type != 0) { - new_bios[new_bios_entry].addr = - change_point[chgidx]->addr; - new_bios[new_bios_entry].type = current_type; - last_addr = change_point[chgidx]->addr; + new_entries[new_nr_entries].addr = change_point[chg_idx]->addr; + new_entries[new_nr_entries].type = current_type; + last_addr = change_point[chg_idx]->addr; } last_type = current_type; } } - /* retain count for new bios entries */ - new_nr = new_bios_entry; - /* copy new bios mapping into original location */ - memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry)); - *pnr_map = new_nr; + /* Copy the new entries into the original location: */ + memcpy(entries, new_entries, new_nr_entries*sizeof(*entries)); + table->nr_entries = new_nr_entries; return 0; } -static int __init __append_e820_map(struct e820entry *biosmap, int nr_map) +static int __init __append_e820_table(struct boot_e820_entry *entries, u32 nr_entries) { - while (nr_map) { - u64 start = biosmap->addr; - u64 size = biosmap->size; + struct boot_e820_entry *entry = entries; + + while (nr_entries) { + u64 start = entry->addr; + u64 size = entry->size; u64 end = start + size - 1; - u32 type = biosmap->type; + u32 type = entry->type; - /* Overflow in 64 bits? Ignore the memory map. */ + /* Ignore the entry on 64-bit overflow: */ if (start > end && likely(size)) return -1; - e820_add_region(start, size, type); + e820__range_add(start, size, type); - biosmap++; - nr_map--; + entry++; + nr_entries--; } return 0; } /* - * Copy the BIOS e820 map into a safe place. + * Copy the BIOS E820 map into a safe place. * * Sanity-check it while we're at it.. * @@ -414,18 +389,17 @@ static int __init __append_e820_map(struct e820entry *biosmap, int nr_map) * will have given us a memory map that we can use to properly * set up memory. If we aren't, we'll fake a memory map. */ -static int __init append_e820_map(struct e820entry *biosmap, int nr_map) +static int __init append_e820_table(struct boot_e820_entry *entries, u32 nr_entries) { /* Only one memory region (or negative)? Ignore it */ - if (nr_map < 2) + if (nr_entries < 2) return -1; - return __append_e820_map(biosmap, nr_map); + return __append_e820_table(entries, nr_entries); } -static u64 __init __e820_update_range(struct e820map *e820x, u64 start, - u64 size, unsigned old_type, - unsigned new_type) +static u64 __init +__e820__range_update(struct e820_table *table, u64 start, u64 size, enum e820_type old_type, enum e820_type new_type) { u64 end; unsigned int i; @@ -437,77 +411,73 @@ static u64 __init __e820_update_range(struct e820map *e820x, u64 start, size = ULLONG_MAX - start; end = start + size; - printk(KERN_DEBUG "e820: update [mem %#010Lx-%#010Lx] ", - (unsigned long long) start, (unsigned long long) (end - 1)); + printk(KERN_DEBUG "e820: update [mem %#010Lx-%#010Lx] ", start, end - 1); e820_print_type(old_type); - printk(KERN_CONT " ==> "); + pr_cont(" ==> "); e820_print_type(new_type); - printk(KERN_CONT "\n"); + pr_cont("\n"); - for (i = 0; i < e820x->nr_map; i++) { - struct e820entry *ei = &e820x->map[i]; + for (i = 0; i < table->nr_entries; i++) { + struct e820_entry *entry = &table->entries[i]; u64 final_start, final_end; - u64 ei_end; + u64 entry_end; - if (ei->type != old_type) + if (entry->type != old_type) continue; - ei_end = ei->addr + ei->size; - /* totally covered by new range? */ - if (ei->addr >= start && ei_end <= end) { - ei->type = new_type; - real_updated_size += ei->size; + entry_end = entry->addr + entry->size; + + /* Completely covered by new range? */ + if (entry->addr >= start && entry_end <= end) { + entry->type = new_type; + real_updated_size += entry->size; continue; } - /* new range is totally covered? */ - if (ei->addr < start && ei_end > end) { - __e820_add_region(e820x, start, size, new_type); - __e820_add_region(e820x, end, ei_end - end, ei->type); - ei->size = start - ei->addr; + /* New range is completely covered? */ + if (entry->addr < start && entry_end > end) { + __e820__range_add(table, start, size, new_type); + __e820__range_add(table, end, entry_end - end, entry->type); + entry->size = start - entry->addr; real_updated_size += size; continue; } - /* partially covered */ - final_start = max(start, ei->addr); - final_end = min(end, ei_end); + /* Partially covered: */ + final_start = max(start, entry->addr); + final_end = min(end, entry_end); if (final_start >= final_end) continue; - __e820_add_region(e820x, final_start, final_end - final_start, - new_type); + __e820__range_add(table, final_start, final_end - final_start, new_type); real_updated_size += final_end - final_start; /* - * left range could be head or tail, so need to update - * size at first. + * Left range could be head or tail, so need to update + * its size first: */ - ei->size -= final_end - final_start; - if (ei->addr < final_start) + entry->size -= final_end - final_start; + if (entry->addr < final_start) continue; - ei->addr = final_end; + + entry->addr = final_end; } return real_updated_size; } -u64 __init e820_update_range(u64 start, u64 size, unsigned old_type, - unsigned new_type) +u64 __init e820__range_update(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type) { - return __e820_update_range(e820, start, size, old_type, new_type); + return __e820__range_update(e820_table, start, size, old_type, new_type); } -static u64 __init e820_update_range_saved(u64 start, u64 size, - unsigned old_type, unsigned new_type) +static u64 __init e820__range_update_firmware(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type) { - return __e820_update_range(e820_saved, start, size, old_type, - new_type); + return __e820__range_update(e820_table_firmware, start, size, old_type, new_type); } -/* make e820 not cover the range */ -u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, - int checktype) +/* Remove a range of memory from the E820 table: */ +u64 __init e820__range_remove(u64 start, u64 size, enum e820_type old_type, bool check_type) { int i; u64 end; @@ -517,85 +487,89 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, size = ULLONG_MAX - start; end = start + size; - printk(KERN_DEBUG "e820: remove [mem %#010Lx-%#010Lx] ", - (unsigned long long) start, (unsigned long long) (end - 1)); - if (checktype) + printk(KERN_DEBUG "e820: remove [mem %#010Lx-%#010Lx] ", start, end - 1); + if (check_type) e820_print_type(old_type); - printk(KERN_CONT "\n"); + pr_cont("\n"); - for (i = 0; i < e820->nr_map; i++) { - struct e820entry *ei = &e820->map[i]; + for (i = 0; i < e820_table->nr_entries; i++) { + struct e820_entry *entry = &e820_table->entries[i]; u64 final_start, final_end; - u64 ei_end; + u64 entry_end; - if (checktype && ei->type != old_type) + if (check_type && entry->type != old_type) continue; - ei_end = ei->addr + ei->size; - /* totally covered? */ - if (ei->addr >= start && ei_end <= end) { - real_removed_size += ei->size; - memset(ei, 0, sizeof(struct e820entry)); + entry_end = entry->addr + entry->size; + + /* Completely covered? */ + if (entry->addr >= start && entry_end <= end) { + real_removed_size += entry->size; + memset(entry, 0, sizeof(*entry)); continue; } - /* new range is totally covered? */ - if (ei->addr < start && ei_end > end) { - e820_add_region(end, ei_end - end, ei->type); - ei->size = start - ei->addr; + /* Is the new range completely covered? */ + if (entry->addr < start && entry_end > end) { + e820__range_add(end, entry_end - end, entry->type); + entry->size = start - entry->addr; real_removed_size += size; continue; } - /* partially covered */ - final_start = max(start, ei->addr); - final_end = min(end, ei_end); + /* Partially covered: */ + final_start = max(start, entry->addr); + final_end = min(end, entry_end); if (final_start >= final_end) continue; + real_removed_size += final_end - final_start; /* - * left range could be head or tail, so need to update - * size at first. + * Left range could be head or tail, so need to update + * the size first: */ - ei->size -= final_end - final_start; - if (ei->addr < final_start) + entry->size -= final_end - final_start; + if (entry->addr < final_start) continue; - ei->addr = final_end; + + entry->addr = final_end; } return real_removed_size; } -void __init update_e820(void) +void __init e820__update_table_print(void) { - if (sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map)) + if (e820__update_table(e820_table)) return; - printk(KERN_INFO "e820: modified physical RAM map:\n"); - e820_print_map("modified"); + + pr_info("e820: modified physical RAM map:\n"); + e820__print_table("modified"); } -static void __init update_e820_saved(void) + +static void __init e820__update_table_firmware(void) { - sanitize_e820_map(e820_saved->map, ARRAY_SIZE(e820_saved->map), - &e820_saved->nr_map); + e820__update_table(e820_table_firmware); } + #define MAX_GAP_END 0x100000000ull + /* - * Search for a gap in the e820 memory space from 0 to MAX_GAP_END. + * Search for a gap in the E820 memory space from 0 to MAX_GAP_END (4GB). */ -static int __init e820_search_gap(unsigned long *gapstart, - unsigned long *gapsize) +static int __init e820_search_gap(unsigned long *gapstart, unsigned long *gapsize) { unsigned long long last = MAX_GAP_END; - int i = e820->nr_map; + int i = e820_table->nr_entries; int found = 0; while (--i >= 0) { - unsigned long long start = e820->map[i].addr; - unsigned long long end = start + e820->map[i].size; + unsigned long long start = e820_table->entries[i].addr; + unsigned long long end = start + e820_table->entries[i].size; /* * Since "last" is at most 4GB, we know we'll - * fit in 32 bits if this condition is true + * fit in 32 bits if this condition is true: */ if (last > end) { unsigned long gap = last - end; @@ -613,12 +587,14 @@ static int __init e820_search_gap(unsigned long *gapstart, } /* - * Search for the biggest gap in the low 32 bits of the e820 - * memory space. We pass this space to PCI to assign MMIO resources - * for hotplug or unconfigured devices in. + * Search for the biggest gap in the low 32 bits of the E820 + * memory space. We pass this space to the PCI subsystem, so + * that it can assign MMIO resources for hotplug or + * unconfigured devices in. + * * Hopefully the BIOS let enough space left. */ -__init void e820_setup_gap(void) +__init void e820__setup_pci_gap(void) { unsigned long gapstart, gapsize; int found; @@ -629,138 +605,143 @@ __init void e820_setup_gap(void) if (!found) { #ifdef CONFIG_X86_64 gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024; - printk(KERN_ERR - "e820: cannot find a gap in the 32bit address range\n" - "e820: PCI devices with unassigned 32bit BARs may break!\n"); + pr_err( + "e820: Cannot find an available gap in the 32-bit address range\n" + "e820: PCI devices with unassigned 32-bit BARs may not work!\n"); #else gapstart = 0x10000000; #endif } /* - * e820_reserve_resources_late protect stolen RAM already + * e820__reserve_resources_late() protects stolen RAM already: */ pci_mem_start = gapstart; - printk(KERN_INFO - "e820: [mem %#010lx-%#010lx] available for PCI devices\n", - gapstart, gapstart + gapsize - 1); + pr_info("e820: [mem %#010lx-%#010lx] available for PCI devices\n", gapstart, gapstart + gapsize - 1); } /* * Called late during init, in free_initmem(). * - * Initial e820 and e820_saved are largish __initdata arrays. - * Copy them to (usually much smaller) dynamically allocated area. - * This is done after all tweaks we ever do to them: - * all functions which modify them are __init functions, - * they won't exist after this point. + * Initial e820_table and e820_table_firmware are largish __initdata arrays. + * + * Copy them to a (usually much smaller) dynamically allocated area that is + * sized precisely after the number of e820 entries. + * + * This is done after we've performed all the fixes and tweaks to the tables. + * All functions which modify them are __init functions, which won't exist + * after free_initmem(). */ -__init void e820_reallocate_tables(void) +__init void e820__reallocate_tables(void) { - struct e820map *n; + struct e820_table *n; int size; - size = offsetof(struct e820map, map) + sizeof(struct e820entry) * e820->nr_map; + size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table->nr_entries; n = kmalloc(size, GFP_KERNEL); BUG_ON(!n); - memcpy(n, e820, size); - e820 = n; + memcpy(n, e820_table, size); + e820_table = n; - size = offsetof(struct e820map, map) + sizeof(struct e820entry) * e820_saved->nr_map; + size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table_firmware->nr_entries; n = kmalloc(size, GFP_KERNEL); BUG_ON(!n); - memcpy(n, e820_saved, size); - e820_saved = n; + memcpy(n, e820_table_firmware, size); + e820_table_firmware = n; } -/** - * Because of the size limitation of struct boot_params, only first - * 128 E820 memory entries are passed to kernel via - * boot_params.e820_map, others are passed via SETUP_E820_EXT node of - * linked list of struct setup_data, which is parsed here. +/* + * Because of the small fixed size of struct boot_params, only the first + * 128 E820 memory entries are passed to the kernel via boot_params.e820_table, + * the remaining (if any) entries are passed via the SETUP_E820_EXT node of + * struct setup_data, which is parsed here. */ -void __init parse_e820_ext(u64 phys_addr, u32 data_len) +void __init e820__memory_setup_extended(u64 phys_addr, u32 data_len) { int entries; - struct e820entry *extmap; + struct boot_e820_entry *extmap; struct setup_data *sdata; sdata = early_memremap(phys_addr, data_len); - entries = sdata->len / sizeof(struct e820entry); - extmap = (struct e820entry *)(sdata->data); - __append_e820_map(extmap, entries); - sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map); + entries = sdata->len / sizeof(*extmap); + extmap = (struct boot_e820_entry *)(sdata->data); + + __append_e820_table(extmap, entries); + e820__update_table(e820_table); + early_memunmap(sdata, data_len); - printk(KERN_INFO "e820: extended physical RAM map:\n"); - e820_print_map("extended"); + pr_info("e820: extended physical RAM map:\n"); + e820__print_table("extended"); } -#if defined(CONFIG_X86_64) || \ - (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) -/** +/* * Find the ranges of physical addresses that do not correspond to - * e820 RAM areas and mark the corresponding pages as nosave for - * hibernation (32 bit) or software suspend and suspend to RAM (64 bit). + * E820 RAM areas and register the corresponding pages as 'nosave' for + * hibernation (32-bit) or software suspend and suspend to RAM (64-bit). * - * This function requires the e820 map to be sorted and without any + * This function requires the E820 map to be sorted and without any * overlapping entries. */ -void __init e820_mark_nosave_regions(unsigned long limit_pfn) +void __init e820__register_nosave_regions(unsigned long limit_pfn) { int i; unsigned long pfn = 0; - for (i = 0; i < e820->nr_map; i++) { - struct e820entry *ei = &e820->map[i]; + for (i = 0; i < e820_table->nr_entries; i++) { + struct e820_entry *entry = &e820_table->entries[i]; - if (pfn < PFN_UP(ei->addr)) - register_nosave_region(pfn, PFN_UP(ei->addr)); + if (pfn < PFN_UP(entry->addr)) + register_nosave_region(pfn, PFN_UP(entry->addr)); - pfn = PFN_DOWN(ei->addr + ei->size); + pfn = PFN_DOWN(entry->addr + entry->size); - if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN) - register_nosave_region(PFN_UP(ei->addr), pfn); + if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN) + register_nosave_region(PFN_UP(entry->addr), pfn); if (pfn >= limit_pfn) break; } } -#endif #ifdef CONFIG_ACPI -/** - * Mark ACPI NVS memory region, so that we can save/restore it during - * hibernation and the subsequent resume. +/* + * Register ACPI NVS memory regions, so that we can save/restore them during + * hibernation and the subsequent resume: */ -static int __init e820_mark_nvs_memory(void) +static int __init e820__register_nvs_regions(void) { int i; - for (i = 0; i < e820->nr_map; i++) { - struct e820entry *ei = &e820->map[i]; + for (i = 0; i < e820_table->nr_entries; i++) { + struct e820_entry *entry = &e820_table->entries[i]; - if (ei->type == E820_NVS) - acpi_nvs_register(ei->addr, ei->size); + if (entry->type == E820_TYPE_NVS) + acpi_nvs_register(entry->addr, entry->size); } return 0; } -core_initcall(e820_mark_nvs_memory); +core_initcall(e820__register_nvs_regions); #endif /* - * pre allocated 4k and reserved it in memblock and e820_saved + * Allocate the requested number of bytes with the requsted alignment + * and return (the physical address) to the caller. Also register this + * range in the 'firmware' E820 table as a reserved range. + * + * This allows kexec to fake a new mptable, as if it came from the real + * system. */ -u64 __init early_reserve_e820(u64 size, u64 align) +u64 __init e820__memblock_alloc_reserved(u64 size, u64 align) { u64 addr; addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); if (addr) { - e820_update_range_saved(addr, size, E820_RAM, E820_RESERVED); - printk(KERN_INFO "e820: update e820_saved for early_reserve_e820\n"); - update_e820_saved(); + e820__range_update_firmware(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); + pr_info("e820: update e820_table_firmware for e820__memblock_alloc_reserved()\n"); + e820__update_table_firmware(); } return addr; @@ -779,22 +760,22 @@ u64 __init early_reserve_e820(u64 size, u64 align) /* * Find the highest page frame number we have available */ -static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type) +static unsigned long __init e820_end_pfn(unsigned long limit_pfn, enum e820_type type) { int i; unsigned long last_pfn = 0; unsigned long max_arch_pfn = MAX_ARCH_PFN; - for (i = 0; i < e820->nr_map; i++) { - struct e820entry *ei = &e820->map[i]; + for (i = 0; i < e820_table->nr_entries; i++) { + struct e820_entry *entry = &e820_table->entries[i]; unsigned long start_pfn; unsigned long end_pfn; - if (ei->type != type) + if (entry->type != type) continue; - start_pfn = ei->addr >> PAGE_SHIFT; - end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT; + start_pfn = entry->addr >> PAGE_SHIFT; + end_pfn = (entry->addr + entry->size) >> PAGE_SHIFT; if (start_pfn >= limit_pfn) continue; @@ -809,18 +790,19 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type) if (last_pfn > max_arch_pfn) last_pfn = max_arch_pfn; - printk(KERN_INFO "e820: last_pfn = %#lx max_arch_pfn = %#lx\n", + pr_info("e820: last_pfn = %#lx max_arch_pfn = %#lx\n", last_pfn, max_arch_pfn); return last_pfn; } -unsigned long __init e820_end_of_ram_pfn(void) + +unsigned long __init e820__end_of_ram_pfn(void) { - return e820_end_pfn(MAX_ARCH_PFN, E820_RAM); + return e820_end_pfn(MAX_ARCH_PFN, E820_TYPE_RAM); } -unsigned long __init e820_end_of_low_ram_pfn(void) +unsigned long __init e820__end_of_low_ram_pfn(void) { - return e820_end_pfn(1UL << (32 - PAGE_SHIFT), E820_RAM); + return e820_end_pfn(1UL << (32 - PAGE_SHIFT), E820_TYPE_RAM); } static void __init early_panic(char *msg) @@ -831,7 +813,7 @@ static void __init early_panic(char *msg) static int userdef __initdata; -/* "mem=nopentium" disables the 4MB page tables. */ +/* The "mem=nopentium" boot option disables 4MB page tables on 32-bit kernels: */ static int __init parse_memopt(char *p) { u64 mem_size; @@ -844,17 +826,19 @@ static int __init parse_memopt(char *p) setup_clear_cpu_cap(X86_FEATURE_PSE); return 0; #else - printk(KERN_WARNING "mem=nopentium ignored! (only supported on x86_32)\n"); + pr_warn("mem=nopentium ignored! (only supported on x86_32)\n"); return -EINVAL; #endif } userdef = 1; mem_size = memparse(p, &p); - /* don't remove all of memory when handling "mem={invalid}" param */ + + /* Don't remove all memory when getting "mem={invalid}" parameter: */ if (mem_size == 0) return -EINVAL; - e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); + + e820__range_remove(mem_size, ULLONG_MAX - mem_size, E820_TYPE_RAM, 1); return 0; } @@ -872,12 +856,12 @@ static int __init parse_memmap_one(char *p) #ifdef CONFIG_CRASH_DUMP /* * If we are doing a crash dump, we still need to know - * the real mem size before original memory map is + * the real memory size before the original memory map is * reset. */ - saved_max_pfn = e820_end_of_ram_pfn(); + saved_max_pfn = e820__end_of_ram_pfn(); #endif - e820->nr_map = 0; + e820_table->nr_entries = 0; userdef = 1; return 0; } @@ -890,21 +874,23 @@ static int __init parse_memmap_one(char *p) userdef = 1; if (*p == '@') { start_at = memparse(p+1, &p); - e820_add_region(start_at, mem_size, E820_RAM); + e820__range_add(start_at, mem_size, E820_TYPE_RAM); } else if (*p == '#') { start_at = memparse(p+1, &p); - e820_add_region(start_at, mem_size, E820_ACPI); + e820__range_add(start_at, mem_size, E820_TYPE_ACPI); } else if (*p == '$') { start_at = memparse(p+1, &p); - e820_add_region(start_at, mem_size, E820_RESERVED); + e820__range_add(start_at, mem_size, E820_TYPE_RESERVED); } else if (*p == '!') { start_at = memparse(p+1, &p); - e820_add_region(start_at, mem_size, E820_PRAM); - } else - e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); + e820__range_add(start_at, mem_size, E820_TYPE_PRAM); + } else { + e820__range_remove(mem_size, ULLONG_MAX - mem_size, E820_TYPE_RAM, 1); + } return *p == '\0' ? 0 : -EINVAL; } + static int __init parse_memmap_opt(char *str) { while (str) { @@ -921,68 +907,97 @@ static int __init parse_memmap_opt(char *str) } early_param("memmap", parse_memmap_opt); -void __init finish_e820_parsing(void) +/* + * Reserve all entries from the bootloader's extensible data nodes list, + * because if present we are going to use it later on to fetch e820 + * entries from it: + */ +void __init e820__reserve_setup_data(void) +{ + struct setup_data *data; + u64 pa_data; + + pa_data = boot_params.hdr.setup_data; + if (!pa_data) + return; + + while (pa_data) { + data = early_memremap(pa_data, sizeof(*data)); + e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); + pa_data = data->next; + early_memunmap(data, sizeof(*data)); + } + + e820__update_table(e820_table); + + memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware)); + + pr_info("extended physical RAM map:\n"); + e820__print_table("reserve setup_data"); +} + +/* + * Called after parse_early_param(), after early parameters (such as mem=) + * have been processed, in which case we already have an E820 table filled in + * via the parameter callback function(s), but it's not sorted and printed yet: + */ +void __init e820__finish_early_params(void) { if (userdef) { - if (sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), - &e820->nr_map) < 0) + if (e820__update_table(e820_table) < 0) early_panic("Invalid user supplied memory map"); - printk(KERN_INFO "e820: user-defined physical RAM map:\n"); - e820_print_map("user"); + pr_info("e820: user-defined physical RAM map:\n"); + e820__print_table("user"); } } -static const char *__init e820_type_to_string(int e820_type) +static const char *__init e820_type_to_string(struct e820_entry *entry) { - switch (e820_type) { - case E820_RESERVED_KERN: - case E820_RAM: return "System RAM"; - case E820_ACPI: return "ACPI Tables"; - case E820_NVS: return "ACPI Non-volatile Storage"; - case E820_UNUSABLE: return "Unusable memory"; - case E820_PRAM: return "Persistent Memory (legacy)"; - case E820_PMEM: return "Persistent Memory"; - default: return "reserved"; + switch (entry->type) { + case E820_TYPE_RESERVED_KERN: /* Fall-through: */ + case E820_TYPE_RAM: return "System RAM"; + case E820_TYPE_ACPI: return "ACPI Tables"; + case E820_TYPE_NVS: return "ACPI Non-volatile Storage"; + case E820_TYPE_UNUSABLE: return "Unusable memory"; + case E820_TYPE_PRAM: return "Persistent Memory (legacy)"; + case E820_TYPE_PMEM: return "Persistent Memory"; + case E820_TYPE_RESERVED: return "Reserved"; + default: return "Unknown E820 type"; } } -static unsigned long __init e820_type_to_iomem_type(int e820_type) +static unsigned long __init e820_type_to_iomem_type(struct e820_entry *entry) { - switch (e820_type) { - case E820_RESERVED_KERN: - case E820_RAM: - return IORESOURCE_SYSTEM_RAM; - case E820_ACPI: - case E820_NVS: - case E820_UNUSABLE: - case E820_PRAM: - case E820_PMEM: - default: - return IORESOURCE_MEM; + switch (entry->type) { + case E820_TYPE_RESERVED_KERN: /* Fall-through: */ + case E820_TYPE_RAM: return IORESOURCE_SYSTEM_RAM; + case E820_TYPE_ACPI: /* Fall-through: */ + case E820_TYPE_NVS: /* Fall-through: */ + case E820_TYPE_UNUSABLE: /* Fall-through: */ + case E820_TYPE_PRAM: /* Fall-through: */ + case E820_TYPE_PMEM: /* Fall-through: */ + case E820_TYPE_RESERVED: /* Fall-through: */ + default: return IORESOURCE_MEM; } } -static unsigned long __init e820_type_to_iores_desc(int e820_type) +static unsigned long __init e820_type_to_iores_desc(struct e820_entry *entry) { - switch (e820_type) { - case E820_ACPI: - return IORES_DESC_ACPI_TABLES; - case E820_NVS: - return IORES_DESC_ACPI_NV_STORAGE; - case E820_PMEM: - return IORES_DESC_PERSISTENT_MEMORY; - case E820_PRAM: - return IORES_DESC_PERSISTENT_MEMORY_LEGACY; - case E820_RESERVED_KERN: - case E820_RAM: - case E820_UNUSABLE: - default: - return IORES_DESC_NONE; + switch (entry->type) { + case E820_TYPE_ACPI: return IORES_DESC_ACPI_TABLES; + case E820_TYPE_NVS: return IORES_DESC_ACPI_NV_STORAGE; + case E820_TYPE_PMEM: return IORES_DESC_PERSISTENT_MEMORY; + case E820_TYPE_PRAM: return IORES_DESC_PERSISTENT_MEMORY_LEGACY; + case E820_TYPE_RESERVED_KERN: /* Fall-through: */ + case E820_TYPE_RAM: /* Fall-through: */ + case E820_TYPE_UNUSABLE: /* Fall-through: */ + case E820_TYPE_RESERVED: /* Fall-through: */ + default: return IORES_DESC_NONE; } } -static bool __init do_mark_busy(u32 type, struct resource *res) +static bool __init do_mark_busy(enum e820_type type, struct resource *res) { /* this is the legacy bios/dos rom-shadow + mmio region */ if (res->start < (1ULL<<20)) @@ -993,61 +1008,71 @@ static bool __init do_mark_busy(u32 type, struct resource *res) * for exclusive use of a driver */ switch (type) { - case E820_RESERVED: - case E820_PRAM: - case E820_PMEM: + case E820_TYPE_RESERVED: + case E820_TYPE_PRAM: + case E820_TYPE_PMEM: return false; + case E820_TYPE_RESERVED_KERN: + case E820_TYPE_RAM: + case E820_TYPE_ACPI: + case E820_TYPE_NVS: + case E820_TYPE_UNUSABLE: default: return true; } } /* - * Mark e820 reserved areas as busy for the resource manager. + * Mark E820 reserved areas as busy for the resource manager: */ + static struct resource __initdata *e820_res; -void __init e820_reserve_resources(void) + +void __init e820__reserve_resources(void) { int i; struct resource *res; u64 end; - res = alloc_bootmem(sizeof(struct resource) * e820->nr_map); + res = alloc_bootmem(sizeof(*res) * e820_table->nr_entries); e820_res = res; - for (i = 0; i < e820->nr_map; i++) { - end = e820->map[i].addr + e820->map[i].size - 1; + + for (i = 0; i < e820_table->nr_entries; i++) { + struct e820_entry *entry = e820_table->entries + i; + + end = entry->addr + entry->size - 1; if (end != (resource_size_t)end) { res++; continue; } - res->name = e820_type_to_string(e820->map[i].type); - res->start = e820->map[i].addr; - res->end = end; - - res->flags = e820_type_to_iomem_type(e820->map[i].type); - res->desc = e820_type_to_iores_desc(e820->map[i].type); + res->start = entry->addr; + res->end = end; + res->name = e820_type_to_string(entry); + res->flags = e820_type_to_iomem_type(entry); + res->desc = e820_type_to_iores_desc(entry); /* - * don't register the region that could be conflicted with - * pci device BAR resource and insert them later in - * pcibios_resource_survey() + * Don't register the region that could be conflicted with + * PCI device BAR resources and insert them later in + * pcibios_resource_survey(): */ - if (do_mark_busy(e820->map[i].type, res)) { + if (do_mark_busy(entry->type, res)) { res->flags |= IORESOURCE_BUSY; insert_resource(&iomem_resource, res); } res++; } - for (i = 0; i < e820_saved->nr_map; i++) { - struct e820entry *entry = &e820_saved->map[i]; - firmware_map_add_early(entry->addr, - entry->addr + entry->size, - e820_type_to_string(entry->type)); + for (i = 0; i < e820_table_firmware->nr_entries; i++) { + struct e820_entry *entry = e820_table_firmware->entries + i; + + firmware_map_add_early(entry->addr, entry->addr + entry->size, e820_type_to_string(entry)); } } -/* How much should we pad RAM ending depending on where it is? */ +/* + * How much should we pad the end of RAM, depending on where it is? + */ static unsigned long __init ram_alignment(resource_size_t pos) { unsigned long mb = pos >> 20; @@ -1066,64 +1091,59 @@ static unsigned long __init ram_alignment(resource_size_t pos) #define MAX_RESOURCE_SIZE ((resource_size_t)-1) -void __init e820_reserve_resources_late(void) +void __init e820__reserve_resources_late(void) { int i; struct resource *res; res = e820_res; - for (i = 0; i < e820->nr_map; i++) { + for (i = 0; i < e820_table->nr_entries; i++) { if (!res->parent && res->end) insert_resource_expand_to_fit(&iomem_resource, res); res++; } /* - * Try to bump up RAM regions to reasonable boundaries to + * Try to bump up RAM regions to reasonable boundaries, to * avoid stolen RAM: */ - for (i = 0; i < e820->nr_map; i++) { - struct e820entry *entry = &e820->map[i]; + for (i = 0; i < e820_table->nr_entries; i++) { + struct e820_entry *entry = &e820_table->entries[i]; u64 start, end; - if (entry->type != E820_RAM) + if (entry->type != E820_TYPE_RAM) continue; + start = entry->addr + entry->size; end = round_up(start, ram_alignment(start)) - 1; if (end > MAX_RESOURCE_SIZE) end = MAX_RESOURCE_SIZE; if (start >= end) continue; - printk(KERN_DEBUG - "e820: reserve RAM buffer [mem %#010llx-%#010llx]\n", - start, end); - reserve_region_with_split(&iomem_resource, start, end, - "RAM buffer"); + + printk(KERN_DEBUG "e820: reserve RAM buffer [mem %#010llx-%#010llx]\n", start, end); + reserve_region_with_split(&iomem_resource, start, end, "RAM buffer"); } } -char *__init default_machine_specific_memory_setup(void) +/* + * Pass the firmware (bootloader) E820 map to the kernel and process it: + */ +char *__init e820__memory_setup_default(void) { char *who = "BIOS-e820"; - u32 new_nr; + /* * Try to copy the BIOS-supplied E820-map. * * Otherwise fake a memory map; one section from 0k->640k, * the next section from 1mb->appropriate_mem_k */ - new_nr = boot_params.e820_entries; - sanitize_e820_map(boot_params.e820_map, - ARRAY_SIZE(boot_params.e820_map), - &new_nr); - boot_params.e820_entries = new_nr; - if (append_e820_map(boot_params.e820_map, boot_params.e820_entries) - < 0) { + if (append_e820_table(boot_params.e820_table, boot_params.e820_entries) < 0) { u64 mem_size; - /* compare results from other methods and take the greater */ - if (boot_params.alt_mem_k - < boot_params.screen_info.ext_mem_k) { + /* Compare results from other methods and take the one that gives more RAM: */ + if (boot_params.alt_mem_k < boot_params.screen_info.ext_mem_k) { mem_size = boot_params.screen_info.ext_mem_k; who = "BIOS-88"; } else { @@ -1131,84 +1151,68 @@ char *__init default_machine_specific_memory_setup(void) who = "BIOS-e801"; } - e820->nr_map = 0; - e820_add_region(0, LOWMEMSIZE(), E820_RAM); - e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM); + e820_table->nr_entries = 0; + e820__range_add(0, LOWMEMSIZE(), E820_TYPE_RAM); + e820__range_add(HIGH_MEMORY, mem_size << 10, E820_TYPE_RAM); } - /* In case someone cares... */ + /* We just appended a lot of ranges, sanitize the table: */ + e820__update_table(e820_table); + return who; } -void __init setup_memory_map(void) +/* + * Calls e820__memory_setup_default() in essence to pick up the firmware/bootloader + * E820 map - with an optional platform quirk available for virtual platforms + * to override this method of boot environment processing: + */ +void __init e820__memory_setup(void) { char *who; + /* This is a firmware interface ABI - make sure we don't break it: */ + BUILD_BUG_ON(sizeof(struct boot_e820_entry) != 20); + who = x86_init.resources.memory_setup(); - memcpy(e820_saved, e820, sizeof(struct e820map)); - printk(KERN_INFO "e820: BIOS-provided physical RAM map:\n"); - e820_print_map(who); + + memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware)); + + pr_info("e820: BIOS-provided physical RAM map:\n"); + e820__print_table(who); } -void __init memblock_x86_fill(void) +void __init e820__memblock_setup(void) { int i; u64 end; /* - * EFI may have more than 128 entries - * We are safe to enable resizing, beause memblock_x86_fill() - * is rather later for x86 + * The bootstrap memblock region count maximum is 128 entries + * (INIT_MEMBLOCK_REGIONS), but EFI might pass us more E820 entries + * than that - so allow memblock resizing. + * + * This is safe, because this call happens pretty late during x86 setup, + * so we know about reserved memory regions already. (This is important + * so that memblock resizing does no stomp over reserved areas.) */ memblock_allow_resize(); - for (i = 0; i < e820->nr_map; i++) { - struct e820entry *ei = &e820->map[i]; + for (i = 0; i < e820_table->nr_entries; i++) { + struct e820_entry *entry = &e820_table->entries[i]; - end = ei->addr + ei->size; + end = entry->addr + entry->size; if (end != (resource_size_t)end) continue; - if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN) + if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN) continue; - memblock_add(ei->addr, ei->size); + memblock_add(entry->addr, entry->size); } - /* throw away partial pages */ + /* Throw away partial pages: */ memblock_trim_memory(PAGE_SIZE); memblock_dump_all(); } - -void __init memblock_find_dma_reserve(void) -{ -#ifdef CONFIG_X86_64 - u64 nr_pages = 0, nr_free_pages = 0; - unsigned long start_pfn, end_pfn; - phys_addr_t start, end; - int i; - u64 u; - - /* - * need to find out used area below MAX_DMA_PFN - * need to use memblock to get free size in [0, MAX_DMA_PFN] - * at first, and assume boot_mem will not take below MAX_DMA_PFN - */ - for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { - start_pfn = min(start_pfn, MAX_DMA_PFN); - end_pfn = min(end_pfn, MAX_DMA_PFN); - nr_pages += end_pfn - start_pfn; - } - - for_each_free_mem_range(u, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end, - NULL) { - start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN); - end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN); - if (start_pfn < end_pfn) - nr_free_pages += end_pfn - start_pfn; - } - - set_dma_reserve(nr_pages - nr_free_pages); -#endif -} |