diff options
Diffstat (limited to 'arch')
35 files changed, 184 insertions, 86 deletions
diff --git a/arch/arm/boot/dts/aspeed-bmc-asrock-e3c246d4i.dts b/arch/arm/boot/dts/aspeed-bmc-asrock-e3c246d4i.dts index 9b4cf5ebe6d5..c62aff908ab4 100644 --- a/arch/arm/boot/dts/aspeed-bmc-asrock-e3c246d4i.dts +++ b/arch/arm/boot/dts/aspeed-bmc-asrock-e3c246d4i.dts @@ -63,7 +63,7 @@ status = "okay"; m25p,fast-read; label = "bmc"; - spi-max-frequency = <100000000>; /* 100 MHz */ + spi-max-frequency = <50000000>; /* 50 MHz */ #include "openbmc-flash-layout.dtsi" }; }; diff --git a/arch/arm/boot/dts/aspeed-bmc-asrock-romed8hm3.dts b/arch/arm/boot/dts/aspeed-bmc-asrock-romed8hm3.dts index ff4c07c69af1..4554abf0c7cd 100644 --- a/arch/arm/boot/dts/aspeed-bmc-asrock-romed8hm3.dts +++ b/arch/arm/boot/dts/aspeed-bmc-asrock-romed8hm3.dts @@ -31,7 +31,7 @@ }; system-fault { - gpios = <&gpio ASPEED_GPIO(Z, 2) GPIO_ACTIVE_LOW>; + gpios = <&gpio ASPEED_GPIO(Z, 2) GPIO_ACTIVE_HIGH>; panic-indicator; }; }; @@ -51,7 +51,7 @@ status = "okay"; m25p,fast-read; label = "bmc"; - spi-max-frequency = <100000000>; /* 100 MHz */ + spi-max-frequency = <50000000>; /* 50 MHz */ #include "openbmc-flash-layout-64.dtsi" }; }; diff --git a/arch/arm/boot/dts/exynos4412-itop-elite.dts b/arch/arm/boot/dts/exynos4412-itop-elite.dts index b596e997e451..6260da187e92 100644 --- a/arch/arm/boot/dts/exynos4412-itop-elite.dts +++ b/arch/arm/boot/dts/exynos4412-itop-elite.dts @@ -182,7 +182,7 @@ compatible = "wlf,wm8960"; reg = <0x1a>; clocks = <&pmu_system_controller 0>; - clock-names = "MCLK1"; + clock-names = "mclk"; wlf,shared-lrclk; #sound-dai-cells = <0>; }; diff --git a/arch/arm/boot/dts/s5pv210.dtsi b/arch/arm/boot/dts/s5pv210.dtsi index 12e90a1cc6a1..1a9e4a96b2ff 100644 --- a/arch/arm/boot/dts/s5pv210.dtsi +++ b/arch/arm/boot/dts/s5pv210.dtsi @@ -566,7 +566,7 @@ interrupts = <29>; clocks = <&clocks CLK_CSIS>, <&clocks SCLK_CSIS>; - clock-names = "clk_csis", + clock-names = "csis", "sclk_csis"; bus-width = <4>; status = "disabled"; diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S index 6b752fe89745..c87445dde674 100644 --- a/arch/arm64/kernel/cpu-reset.S +++ b/arch/arm64/kernel/cpu-reset.S @@ -14,7 +14,7 @@ #include <asm/virt.h> .text -.pushsection .idmap.text, "awx" +.pushsection .idmap.text, "a" /* * cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index b98970907226..e92caebff46a 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -150,8 +150,8 @@ CPU_BE( tbz x19, #SCTLR_ELx_EE_SHIFT, 1f ) pre_disable_mmu_workaround msr sctlr_el2, x19 b 3f - pre_disable_mmu_workaround -2: msr sctlr_el1, x19 +2: pre_disable_mmu_workaround + msr sctlr_el1, x19 3: isb mov x19, xzr ret diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 2ae7cff1953a..2aa5129d8253 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -97,7 +97,7 @@ SYM_FUNC_START(__cpu_suspend_enter) ret SYM_FUNC_END(__cpu_suspend_enter) - .pushsection ".idmap.text", "awx" + .pushsection ".idmap.text", "a" SYM_CODE_START(cpu_resume) mov x0, xzr bl init_kernel_el diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 91410f488090..c2cb437821ca 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -167,7 +167,7 @@ alternative_else_nop_endif SYM_FUNC_END(cpu_do_resume) #endif - .pushsection ".idmap.text", "awx" + .pushsection ".idmap.text", "a" .macro __idmap_cpu_set_reserved_ttbr1, tmp1, tmp2 adrp \tmp1, reserved_pg_dir @@ -201,7 +201,7 @@ SYM_FUNC_END(idmap_cpu_replace_ttbr1) #define KPTI_NG_PTE_FLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) - .pushsection ".idmap.text", "awx" + .pushsection ".idmap.text", "a" .macro kpti_mk_tbl_ng, type, num_entries add end_\type\()p, cur_\type\()p, #\num_entries * 8 @@ -400,7 +400,7 @@ SYM_FUNC_END(idmap_kpti_install_ng_mappings) * Output: * Return in x0 the value of the SCTLR_EL1 register. */ - .pushsection ".idmap.text", "awx" + .pushsection ".idmap.text", "a" SYM_FUNC_START(__cpu_setup) tlbi vmalle1 // Invalidate local TLB dsb nsh diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index e2950f5db7c9..e715df5385d6 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -413,12 +413,12 @@ extern void paging_init (void); * For the 64bit version, the offset is extended by 32bit. */ #define __swp_type(x) ((x).val & 0x1f) -#define __swp_offset(x) ( (((x).val >> 6) & 0x7) | \ - (((x).val >> 8) & ~0x7) ) +#define __swp_offset(x) ( (((x).val >> 5) & 0x7) | \ + (((x).val >> 10) << 3) ) #define __swp_entry(type, offset) ((swp_entry_t) { \ ((type) & 0x1f) | \ - ((offset & 0x7) << 6) | \ - ((offset & ~0x7) << 8) }) + ((offset & 0x7) << 5) | \ + ((offset >> 3) << 10) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 4cf303a779ab..8d02b9d05738 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -9,6 +9,7 @@ CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_sbi.o = $(CC_FLAGS_FTRACE) endif CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,) +CFLAGS_compat_syscall_table.o += $(call cc-option,-Wno-override-init,) ifdef CONFIG_KEXEC AFLAGS_kexec_relocate.o := -mcmodel=medany $(call cc-option,-mno-relax) diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c index 86c56616e5de..ea3d61de065b 100644 --- a/arch/riscv/mm/pageattr.c +++ b/arch/riscv/mm/pageattr.c @@ -217,18 +217,26 @@ bool kernel_page_present(struct page *page) pgd = pgd_offset_k(addr); if (!pgd_present(*pgd)) return false; + if (pgd_leaf(*pgd)) + return true; p4d = p4d_offset(pgd, addr); if (!p4d_present(*p4d)) return false; + if (p4d_leaf(*p4d)) + return true; pud = pud_offset(p4d, addr); if (!pud_present(*pud)) return false; + if (pud_leaf(*pud)) + return true; pmd = pmd_offset(pud, addr); if (!pmd_present(*pmd)) return false; + if (pmd_leaf(*pmd)) + return true; pte = pte_offset_kernel(pmd, addr); return pte_present(*pte); diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 4d1d0d8e99cb..a354d8bc1f0f 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -10,6 +10,10 @@ #include "decompressor.h" #include "boot.h" +#ifdef CONFIG_PROC_FS +atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]); +#endif + #define init_mm (*(struct mm_struct *)vmlinux.init_mm_off) #define swapper_pg_dir vmlinux.swapper_pg_dir_off #define invalid_pg_dir vmlinux.invalid_pg_dir_off @@ -29,7 +33,7 @@ unsigned long __bootdata(pgalloc_low); enum populate_mode { POPULATE_NONE, - POPULATE_ONE2ONE, + POPULATE_DIRECT, POPULATE_ABS_LOWCORE, }; @@ -102,7 +106,7 @@ static unsigned long _pa(unsigned long addr, enum populate_mode mode) switch (mode) { case POPULATE_NONE: return -1; - case POPULATE_ONE2ONE: + case POPULATE_DIRECT: return addr; case POPULATE_ABS_LOWCORE: return __abs_lowcore_pa(addr); @@ -126,7 +130,7 @@ static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end) static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end, enum populate_mode mode) { - unsigned long next; + unsigned long pages = 0; pte_t *pte, entry; pte = pte_offset_kernel(pmd, addr); @@ -135,14 +139,17 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e entry = __pte(_pa(addr, mode)); entry = set_pte_bit(entry, PAGE_KERNEL_EXEC); set_pte(pte, entry); + pages++; } } + if (mode == POPULATE_DIRECT) + update_page_count(PG_DIRECT_MAP_4K, pages); } static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end, enum populate_mode mode) { - unsigned long next; + unsigned long next, pages = 0; pmd_t *pmd, entry; pte_t *pte; @@ -154,6 +161,7 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e entry = __pmd(_pa(addr, mode)); entry = set_pmd_bit(entry, SEGMENT_KERNEL_EXEC); set_pmd(pmd, entry); + pages++; continue; } pte = boot_pte_alloc(); @@ -163,12 +171,14 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e } pgtable_pte_populate(pmd, addr, next, mode); } + if (mode == POPULATE_DIRECT) + update_page_count(PG_DIRECT_MAP_1M, pages); } static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end, enum populate_mode mode) { - unsigned long next; + unsigned long next, pages = 0; pud_t *pud, entry; pmd_t *pmd; @@ -180,6 +190,7 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e entry = __pud(_pa(addr, mode)); entry = set_pud_bit(entry, REGION3_KERNEL_EXEC); set_pud(pud, entry); + pages++; continue; } pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY); @@ -189,6 +200,8 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e } pgtable_pmd_populate(pud, addr, next, mode); } + if (mode == POPULATE_DIRECT) + update_page_count(PG_DIRECT_MAP_2G, pages); } static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long end, @@ -251,9 +264,9 @@ void setup_vmem(unsigned long asce_limit) * the lowcore and create the identity mapping only afterwards. */ pgtable_populate_init(); - pgtable_populate(0, sizeof(struct lowcore), POPULATE_ONE2ONE); + pgtable_populate(0, sizeof(struct lowcore), POPULATE_DIRECT); for_each_mem_detect_usable_block(i, &start, &end) - pgtable_populate(start, end, POPULATE_ONE2ONE); + pgtable_populate(start, end, POPULATE_DIRECT); pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore), POPULATE_ABS_LOWCORE); pgtable_populate(__memcpy_real_area, __memcpy_real_area + PAGE_SIZE, diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 2c70b4d1263d..acbe1ac2d571 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -34,7 +34,7 @@ enum { PG_DIRECT_MAP_MAX }; -extern atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX]; +extern atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]); static inline void update_page_count(int level, long count) { diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 9f18a4af9c13..cb2ee06df286 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -192,21 +192,10 @@ static int expected_page_refs(struct page *page) return res; } -static int make_secure_pte(pte_t *ptep, unsigned long addr, - struct page *exp_page, struct uv_cb_header *uvcb) +static int make_page_secure(struct page *page, struct uv_cb_header *uvcb) { - pte_t entry = READ_ONCE(*ptep); - struct page *page; int expected, cc = 0; - if (!pte_present(entry)) - return -ENXIO; - if (pte_val(entry) & _PAGE_INVALID) - return -ENXIO; - - page = pte_page(entry); - if (page != exp_page) - return -ENXIO; if (PageWriteback(page)) return -EAGAIN; expected = expected_page_refs(page); @@ -304,17 +293,18 @@ again: goto out; rc = -ENXIO; - page = follow_page(vma, uaddr, FOLL_WRITE); - if (IS_ERR_OR_NULL(page)) - goto out; - - lock_page(page); ptep = get_locked_pte(gmap->mm, uaddr, &ptelock); - if (should_export_before_import(uvcb, gmap->mm)) - uv_convert_from_secure(page_to_phys(page)); - rc = make_secure_pte(ptep, uaddr, page, uvcb); + if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) { + page = pte_page(*ptep); + rc = -EAGAIN; + if (trylock_page(page)) { + if (should_export_before_import(uvcb, gmap->mm)) + uv_convert_from_secure(page_to_phys(page)); + rc = make_page_secure(page, uvcb); + unlock_page(page); + } + } pte_unmap_unlock(ptep, ptelock); - unlock_page(page); out: mmap_read_unlock(gmap->mm); diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index e032ebbf51b9..3ce5f4351156 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -314,6 +314,11 @@ int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc) */ if (kvm->arch.pv.set_aside) return -EINVAL; + + /* Guest with segment type ASCE, refuse to destroy asynchronously */ + if ((kvm->arch.gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT) + return -EINVAL; + priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 5a716bdcba05..2267cf9819b2 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2833,6 +2833,9 @@ EXPORT_SYMBOL_GPL(s390_unlist_old_asce); * s390_replace_asce - Try to replace the current ASCE of a gmap with a copy * @gmap: the gmap whose ASCE needs to be replaced * + * If the ASCE is a SEGMENT type then this function will return -EINVAL, + * otherwise the pointers in the host_to_guest radix tree will keep pointing + * to the wrong pages, causing use-after-free and memory corruption. * If the allocation of the new top level page table fails, the ASCE is not * replaced. * In any case, the old ASCE is always removed from the gmap CRST list. @@ -2847,6 +2850,10 @@ int s390_replace_asce(struct gmap *gmap) s390_unlist_old_asce(gmap); + /* Replacing segment type ASCEs would cause serious issues */ + if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT) + return -EINVAL; + page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); if (!page) return -ENOMEM; diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 85195c18b2e8..7be699b4974a 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -41,7 +41,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end) } #ifdef CONFIG_PROC_FS -atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX]; +atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]); void arch_report_meminfo(struct seq_file *m) { diff --git a/arch/sh/Kconfig.debug b/arch/sh/Kconfig.debug index 10290e5c1f43..c449e7c1b20f 100644 --- a/arch/sh/Kconfig.debug +++ b/arch/sh/Kconfig.debug @@ -15,7 +15,7 @@ config SH_STANDARD_BIOS config STACK_DEBUG bool "Check for stack overflows" - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && PRINTK help This option will cause messages to be printed if free stack space drops below a certain limit. Saying Y here will add overhead to diff --git a/arch/sh/kernel/head_32.S b/arch/sh/kernel/head_32.S index 4adbd4ade319..b603b7968b38 100644 --- a/arch/sh/kernel/head_32.S +++ b/arch/sh/kernel/head_32.S @@ -64,7 +64,7 @@ ENTRY(_stext) ldc r0, r6_bank #endif -#ifdef CONFIG_OF_FLATTREE +#ifdef CONFIG_OF_EARLY_FLATTREE mov r4, r12 ! Store device tree blob pointer in r12 #endif @@ -315,7 +315,7 @@ ENTRY(_stext) 10: #endif -#ifdef CONFIG_OF_FLATTREE +#ifdef CONFIG_OF_EARLY_FLATTREE mov.l 8f, r0 ! Make flat device tree available early. jsr @r0 mov r12, r4 @@ -346,7 +346,7 @@ ENTRY(stack_start) 5: .long start_kernel 6: .long cpu_init 7: .long init_thread_union -#if defined(CONFIG_OF_FLATTREE) +#if defined(CONFIG_OF_EARLY_FLATTREE) 8: .long sh_fdt_init #endif diff --git a/arch/sh/kernel/nmi_debug.c b/arch/sh/kernel/nmi_debug.c index 11777867c6f5..a212b645b4cf 100644 --- a/arch/sh/kernel/nmi_debug.c +++ b/arch/sh/kernel/nmi_debug.c @@ -49,7 +49,7 @@ static int __init nmi_debug_setup(char *str) register_die_notifier(&nmi_debug_nb); if (*str != '=') - return 0; + return 1; for (p = str + 1; *p; p = sep + 1) { sep = strchr(p, ','); @@ -70,6 +70,6 @@ static int __init nmi_debug_setup(char *str) break; } - return 0; + return 1; } __setup("nmi_debug", nmi_debug_setup); diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index 1fcb6659822a..af977ec4ca5e 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -244,7 +244,7 @@ void __init __weak plat_early_device_setup(void) { } -#ifdef CONFIG_OF_FLATTREE +#ifdef CONFIG_OF_EARLY_FLATTREE void __ref sh_fdt_init(phys_addr_t dt_phys) { static int done = 0; @@ -326,7 +326,7 @@ void __init setup_arch(char **cmdline_p) /* Let earlyprintk output early console messages */ sh_early_platform_driver_probe("earlyprintk", 1, 1); -#ifdef CONFIG_OF_FLATTREE +#ifdef CONFIG_OF_EARLY_FLATTREE #ifdef CONFIG_USE_BUILTIN_DTB unflatten_and_copy_device_tree(); #else diff --git a/arch/sh/math-emu/sfp-util.h b/arch/sh/math-emu/sfp-util.h index 784f541344f3..bda50762b3d3 100644 --- a/arch/sh/math-emu/sfp-util.h +++ b/arch/sh/math-emu/sfp-util.h @@ -67,7 +67,3 @@ } while (0) #define abort() return 0 - -#define __BYTE_ORDER __LITTLE_ENDIAN - - diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index d096b04bf80e..9d248703cbdd 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1703,10 +1703,8 @@ int x86_pmu_handle_irq(struct pt_regs *regs) perf_sample_data_init(&data, 0, event->hw.last_period); - if (has_branch_stack(event)) { - data.br_stack = &cpuc->lbr_stack; - data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; - } + if (has_branch_stack(event)) + perf_sample_save_brstack(&data, event, &cpuc->lbr_stack); if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 4266b64631a4..7e331e8f3692 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -36,6 +36,7 @@ #define PCI_DEVICE_ID_AMD_19H_M50H_DF_F4 0x166e #define PCI_DEVICE_ID_AMD_19H_M60H_DF_F4 0x14e4 #define PCI_DEVICE_ID_AMD_19H_M70H_DF_F4 0x14f4 +#define PCI_DEVICE_ID_AMD_19H_M78H_DF_F4 0x12fc /* Protect the PCI config register pairs used for SMN. */ static DEFINE_MUTEX(smn_mutex); @@ -79,6 +80,7 @@ static const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M78H_DF_F3) }, {} }; diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index 4c91f626c058..e50d353b5c1c 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -4,7 +4,7 @@ #include <linux/kvm_host.h> -#define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS +#define KVM_POSSIBLE_CR0_GUEST_BITS (X86_CR0_TS | X86_CR0_WP) #define KVM_POSSIBLE_CR4_GUEST_BITS \ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ | X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE) diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 168c46fd8dd1..0f38b78ab04b 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -113,6 +113,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu); int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, u64 fault_address, char *insn, int insn_len); +void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu, + struct kvm_mmu *mmu); int kvm_mmu_load(struct kvm_vcpu *vcpu); void kvm_mmu_unload(struct kvm_vcpu *vcpu); @@ -153,6 +155,24 @@ static inline void kvm_mmu_load_pgd(struct kvm_vcpu *vcpu) vcpu->arch.mmu->root_role.level); } +static inline void kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu, + struct kvm_mmu *mmu) +{ + /* + * When EPT is enabled, KVM may passthrough CR0.WP to the guest, i.e. + * @mmu's snapshot of CR0.WP and thus all related paging metadata may + * be stale. Refresh CR0.WP and the metadata on-demand when checking + * for permission faults. Exempt nested MMUs, i.e. MMUs for shadowing + * nEPT and nNPT, as CR0.WP is ignored in both cases. Note, KVM does + * need to refresh nested_mmu, a.k.a. the walker used to translate L2 + * GVAs to GPAs, as that "MMU" needs to honor L2's CR0.WP. + */ + if (!tdp_enabled || mmu == &vcpu->arch.guest_mmu) + return; + + __kvm_mmu_refresh_passthrough_bits(vcpu, mmu); +} + /* * Check if a given access (described through the I/D, W/R and U/S bits of a * page fault error code pfec) causes a permission fault with the given PTE @@ -184,8 +204,12 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, u64 implicit_access = access & PFERR_IMPLICIT_ACCESS; bool not_smap = ((rflags & X86_EFLAGS_AC) | implicit_access) == X86_EFLAGS_AC; int index = (pfec + (not_smap << PFERR_RSVD_BIT)) >> 1; - bool fault = (mmu->permissions[index] >> pte_access) & 1; u32 errcode = PFERR_PRESENT_MASK; + bool fault; + + kvm_mmu_refresh_passthrough_bits(vcpu, mmu); + + fault = (mmu->permissions[index] >> pte_access) & 1; WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK)); if (unlikely(mmu->pkru_mask)) { diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index c8ebe542c565..d3812de54b02 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -242,6 +242,20 @@ static struct kvm_mmu_role_regs vcpu_to_role_regs(struct kvm_vcpu *vcpu) return regs; } +static unsigned long get_guest_cr3(struct kvm_vcpu *vcpu) +{ + return kvm_read_cr3(vcpu); +} + +static inline unsigned long kvm_mmu_get_guest_pgd(struct kvm_vcpu *vcpu, + struct kvm_mmu *mmu) +{ + if (IS_ENABLED(CONFIG_RETPOLINE) && mmu->get_guest_pgd == get_guest_cr3) + return kvm_read_cr3(vcpu); + + return mmu->get_guest_pgd(vcpu); +} + static inline bool kvm_available_flush_tlb_with_range(void) { return kvm_x86_ops.tlb_remote_flush_with_range; @@ -3731,7 +3745,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) int quadrant, i, r; hpa_t root; - root_pgd = mmu->get_guest_pgd(vcpu); + root_pgd = kvm_mmu_get_guest_pgd(vcpu, mmu); root_gfn = root_pgd >> PAGE_SHIFT; if (mmu_check_root(vcpu, root_gfn)) @@ -4181,7 +4195,7 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, arch.token = alloc_apf_token(vcpu); arch.gfn = gfn; arch.direct_map = vcpu->arch.mmu->root_role.direct; - arch.cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu); + arch.cr3 = kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu); return kvm_setup_async_pf(vcpu, cr2_or_gpa, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); @@ -4200,7 +4214,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) return; if (!vcpu->arch.mmu->root_role.direct && - work->arch.cr3 != vcpu->arch.mmu->get_guest_pgd(vcpu)) + work->arch.cr3 != kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu)) return; kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true); @@ -4604,11 +4618,6 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd) } EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd); -static unsigned long get_cr3(struct kvm_vcpu *vcpu) -{ - return kvm_read_cr3(vcpu); -} - static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, unsigned int access) { @@ -5112,6 +5121,21 @@ kvm_calc_cpu_role(struct kvm_vcpu *vcpu, const struct kvm_mmu_role_regs *regs) return role; } +void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu, + struct kvm_mmu *mmu) +{ + const bool cr0_wp = !!kvm_read_cr0_bits(vcpu, X86_CR0_WP); + + BUILD_BUG_ON((KVM_MMU_CR0_ROLE_BITS & KVM_POSSIBLE_CR0_GUEST_BITS) != X86_CR0_WP); + BUILD_BUG_ON((KVM_MMU_CR4_ROLE_BITS & KVM_POSSIBLE_CR4_GUEST_BITS)); + + if (is_cr0_wp(mmu) == cr0_wp) + return; + + mmu->cpu_role.base.cr0_wp = cr0_wp; + reset_guest_paging_metadata(vcpu, mmu); +} + static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu) { /* tdp_root_level is architecture forced level, use it if nonzero */ @@ -5159,7 +5183,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu, context->page_fault = kvm_tdp_page_fault; context->sync_page = nonpaging_sync_page; context->invlpg = NULL; - context->get_guest_pgd = get_cr3; + context->get_guest_pgd = get_guest_cr3; context->get_pdptr = kvm_pdptr_read; context->inject_page_fault = kvm_inject_page_fault; @@ -5309,7 +5333,7 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu, kvm_init_shadow_mmu(vcpu, cpu_role); - context->get_guest_pgd = get_cr3; + context->get_guest_pgd = get_guest_cr3; context->get_pdptr = kvm_pdptr_read; context->inject_page_fault = kvm_inject_page_fault; } @@ -5323,7 +5347,7 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu, return; g_context->cpu_role.as_u64 = new_mode.as_u64; - g_context->get_guest_pgd = get_cr3; + g_context->get_guest_pgd = get_guest_cr3; g_context->get_pdptr = kvm_pdptr_read; g_context->inject_page_fault = kvm_inject_page_fault; diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 57f0b75c80f9..2ea2861bbb3c 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -324,7 +324,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, trace_kvm_mmu_pagetable_walk(addr, access); retry_walk: walker->level = mmu->cpu_role.base.level; - pte = mmu->get_guest_pgd(vcpu); + pte = kvm_mmu_get_guest_pgd(vcpu, mmu); have_ad = PT_HAVE_ACCESSED_DIRTY(mmu); #if PTTYPE == 64 diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 612e6c70ce2e..f4aa170b5b97 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -540,9 +540,9 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) if (!pmc) return 1; - if (!(kvm_read_cr4(vcpu) & X86_CR4_PCE) && + if (!(kvm_read_cr4_bits(vcpu, X86_CR4_PCE)) && (static_call(kvm_x86_get_cpl)(vcpu) != 0) && - (kvm_read_cr0(vcpu) & X86_CR0_PE)) + (kvm_read_cr0_bits(vcpu, X86_CR0_PE))) return 1; *data = pmc_read_counter(pmc) & mask; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 768487611db7..89fa35fba3d8 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4483,7 +4483,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, * CR0_GUEST_HOST_MASK is already set in the original vmcs01 * (KVM doesn't change it); */ - vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS; + vcpu->arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits(); vmx_set_cr0(vcpu, vmcs12->host_cr0); /* Same as above - no reason to call set_cr4_guest_host_mask(). */ @@ -4634,7 +4634,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) */ vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx)); - vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS; + vcpu->arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits(); vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW)); vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index dd92361f41b3..8ead0916e252 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4773,7 +4773,7 @@ static void init_vmcs(struct vcpu_vmx *vmx) /* 22.2.1, 20.8.1 */ vm_entry_controls_set(vmx, vmx_vmentry_ctrl()); - vmx->vcpu.arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS; + vmx->vcpu.arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits(); vmcs_writel(CR0_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr0_guest_owned_bits); set_cr4_guest_host_mask(vmx); @@ -5500,7 +5500,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) break; case 3: /* lmsw */ val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f; - trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val); + trace_kvm_cr_write(0, (kvm_read_cr0_bits(vcpu, ~0xful) | val)); kvm_lmsw(vcpu, val); return kvm_skip_emulated_instruction(vcpu); @@ -7558,7 +7558,7 @@ static u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT; - if (kvm_read_cr0(vcpu) & X86_CR0_CD) { + if (kvm_read_cr0_bits(vcpu, X86_CR0_CD)) { if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) cache = MTRR_TYPE_WRBACK; else diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 2acdc54bc34b..423e9d3c9c40 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -640,6 +640,24 @@ BUILD_CONTROLS_SHADOW(tertiary_exec, TERTIARY_VM_EXEC_CONTROL, 64) (1 << VCPU_EXREG_EXIT_INFO_1) | \ (1 << VCPU_EXREG_EXIT_INFO_2)) +static inline unsigned long vmx_l1_guest_owned_cr0_bits(void) +{ + unsigned long bits = KVM_POSSIBLE_CR0_GUEST_BITS; + + /* + * CR0.WP needs to be intercepted when KVM is shadowing legacy paging + * in order to construct shadow PTEs with the correct protections. + * Note! CR0.WP technically can be passed through to the guest if + * paging is disabled, but checking CR0.PG would generate a cyclical + * dependency of sorts due to forcing the caller to ensure CR0 holds + * the correct value prior to determining which CR0 bits can be owned + * by L1. Keep it simple and limit the optimization to EPT. + */ + if (!enable_ept) + bits &= ~X86_CR0_WP; + return bits; +} + static __always_inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm) { return container_of(kvm, struct kvm_vmx, kvm); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3d852ce84920..999b2db0737b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -906,6 +906,18 @@ EXPORT_SYMBOL_GPL(load_pdptrs); void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0) { + /* + * CR0.WP is incorporated into the MMU role, but only for non-nested, + * indirect shadow MMUs. If TDP is enabled, the MMU's metadata needs + * to be updated, e.g. so that emulating guest translations does the + * right thing, but there's no need to unload the root as CR0.WP + * doesn't affect SPTEs. + */ + if (tdp_enabled && (cr0 ^ old_cr0) == X86_CR0_WP) { + kvm_init_mmu(vcpu); + return; + } + if ((cr0 ^ old_cr0) & X86_CR0_PG) { kvm_clear_async_pf_completion_queue(vcpu); kvm_async_pf_hash_reset(vcpu); diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index ecbfb4dd3b01..faa4cdc747a3 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -142,8 +142,8 @@ SYM_FUNC_START(clear_user_rep_good) and $7, %edx jz .Lrep_good_exit -.Lrep_good_bytes: mov %edx, %ecx +.Lrep_good_bytes: rep stosb .Lrep_good_exit: diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 5f61c65322be..22fc313c6500 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -144,8 +144,8 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array) */ .align 64 .skip 63, 0xcc -SYM_FUNC_START_NOALIGN(zen_untrain_ret); - +SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) + ANNOTATE_NOENDBR /* * As executed from zen_untrain_ret, this is: * |