#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "cpu.h" /* * Pre-canned values for overriding the CPUID features * and extended features masks. * * Currently supported processors: * * "fam_0f_rev_c" * "fam_0f_rev_d" * "fam_0f_rev_e" * "fam_0f_rev_f" * "fam_0f_rev_g" * "fam_10_rev_b" * "fam_10_rev_c" * "fam_11_rev_b" */ static char __initdata opt_famrev[14]; string_param("cpuid_mask_cpu", opt_famrev); static unsigned int __initdata opt_cpuid_mask_l7s0_eax = ~0u; integer_param("cpuid_mask_l7s0_eax", opt_cpuid_mask_l7s0_eax); static unsigned int __initdata opt_cpuid_mask_l7s0_ebx = ~0u; integer_param("cpuid_mask_l7s0_ebx", opt_cpuid_mask_l7s0_ebx); static unsigned int __initdata opt_cpuid_mask_thermal_ecx = ~0u; integer_param("cpuid_mask_thermal_ecx", opt_cpuid_mask_thermal_ecx); /* 1 = allow, 0 = don't allow guest creation, -1 = don't allow boot */ s8 __read_mostly opt_allow_unsafe; boolean_param("allow_unsafe", opt_allow_unsafe); /* Signal whether the ACPI C1E quirk is required. */ bool __read_mostly amd_acpi_c1e_quirk; bool __ro_after_init amd_legacy_ssbd; bool __initdata amd_virt_spec_ctrl; static inline int rdmsr_amd_safe(unsigned int msr, unsigned int *lo, unsigned int *hi) { int err; asm volatile("1: rdmsr\n2:\n" ".section .fixup,\"ax\"\n" "3: movl %6,%2\n" " jmp 2b\n" ".previous\n" _ASM_EXTABLE(1b, 3b) : "=a" (*lo), "=d" (*hi), "=r" (err) : "c" (msr), "D" (0x9c5a203a), "2" (0), "i" (-EFAULT)); return err; } static inline int wrmsr_amd_safe(unsigned int msr, unsigned int lo, unsigned int hi) { int err; asm volatile("1: wrmsr\n2:\n" ".section .fixup,\"ax\"\n" "3: movl %6,%0\n" " jmp 2b\n" ".previous\n" _ASM_EXTABLE(1b, 3b) : "=r" (err) : "c" (msr), "a" (lo), "d" (hi), "D" (0x9c5a203a), "0" (0), "i" (-EFAULT)); return err; } static void wrmsr_amd(unsigned int msr, uint64_t val) { asm volatile("wrmsr" :: "c" (msr), "a" ((uint32_t)val), "d" (val >> 32), "D" (0x9c5a203a)); } static const struct cpuidmask { uint16_t fam; char rev[2]; unsigned int ecx, edx, ext_ecx, ext_edx; } pre_canned[] __initconst = { #define CAN(fam, id, rev) { \ fam, #rev, \ AMD_FEATURES_##id##_REV_##rev##_ECX, \ AMD_FEATURES_##id##_REV_##rev##_EDX, \ AMD_EXTFEATURES_##id##_REV_##rev##_ECX, \ AMD_EXTFEATURES_##id##_REV_##rev##_EDX \ } #define CAN_FAM(fam, rev) CAN(0x##fam, FAM##fam##h, rev) #define CAN_K8(rev) CAN(0x0f, K8, rev) CAN_FAM(11, B), CAN_FAM(10, C), CAN_FAM(10, B), CAN_K8(G), CAN_K8(F), CAN_K8(E), CAN_K8(D), CAN_K8(C) #undef CAN }; static const struct cpuidmask *__init noinline get_cpuidmask(const char *opt) { unsigned long fam; char rev; unsigned int i; if (strncmp(opt, "fam_", 4)) return NULL; fam = simple_strtoul(opt + 4, &opt, 16); if (strncmp(opt, "_rev_", 5) || !opt[5] || opt[6]) return NULL; rev = toupper(opt[5]); for (i = 0; i < ARRAY_SIZE(pre_canned); ++i) if (fam == pre_canned[i].fam && rev == *pre_canned[i].rev) return &pre_canned[i]; return NULL; } /* * Sets caps in expected_levelling_cap, probes for the specified mask MSR, and * set caps in levelling_caps if it is found. Processors prior to Fam 10h * required a 32-bit password for masking MSRs. Returns the default value. */ static uint64_t __init _probe_mask_msr(unsigned int msr, uint64_t caps) { unsigned int hi, lo; expected_levelling_cap |= caps; if ((rdmsr_amd_safe(msr, &lo, &hi) == 0) && (wrmsr_amd_safe(msr, lo, hi) == 0)) levelling_caps |= caps; return ((uint64_t)hi << 32) | lo; } /* * Probe for the existance of the expected masking MSRs. They might easily * not be available if Xen is running virtualised. */ static void __init noinline probe_masking_msrs(void) { const struct cpuinfo_x86 *c = &boot_cpu_data; /* * First, work out which masking MSRs we should have, based on * revision and cpuid. */ /* Fam11 doesn't support masking at all. */ if (c->x86 == 0x11) return; cpuidmask_defaults._1cd = _probe_mask_msr(MSR_K8_FEATURE_MASK, LCAP_1cd); cpuidmask_defaults.e1cd = _probe_mask_msr(MSR_K8_EXT_FEATURE_MASK, LCAP_e1cd); if (c->cpuid_level >= 7) cpuidmask_defaults._7ab0 = _probe_mask_msr(MSR_AMD_L7S0_FEATURE_MASK, LCAP_7ab0); if (c->x86 == 0x15 && c->cpuid_level >= 6 && cpuid_ecx(6)) cpuidmask_defaults._6c = _probe_mask_msr(MSR_AMD_THRM_FEATURE_MASK, LCAP_6c); /* * Don't bother warning about a mismatch if virtualised. These MSRs * are not architectural and almost never virtualised. */ if ((expected_levelling_cap == levelling_caps) || cpu_has_hypervisor) return; printk(XENLOG_WARNING "Mismatch between expected (%#x) " "and real (%#x) levelling caps: missing %#x\n", expected_levelling_cap, levelling_caps, (expected_levelling_cap ^ levelling_caps) & levelling_caps); printk(XENLOG_WARNING "Fam %#x, model %#x level %#x\n", c->x86, c->x86_model, c->cpuid_level); printk(XENLOG_WARNING "If not running virtualised, please report a bug\n"); } /* * Context switch CPUID masking state to the next domain. Only called if * CPUID Faulting isn't available, but masking MSRs have been detected. A * parameter of NULL is used to context switch to the default host state (by * the cpu bringup-code, crash path, etc). */ static void cf_check amd_ctxt_switch_masking(const struct vcpu *next) { struct cpuidmasks *these_masks = &this_cpu(cpuidmasks); const struct domain *nextd = next ? next->domain : NULL; const struct cpuidmasks *masks = (nextd && is_pv_domain(nextd) && nextd->arch.pv.cpuidmasks) ? nextd->arch.pv.cpuidmasks : &cpuidmask_defaults; if ((levelling_caps & LCAP_1cd) == LCAP_1cd) { uint64_t val = masks->_1cd; /* * OSXSAVE defaults to 1, which causes fast-forwarding of * Xen's real setting. Clobber it if disabled by the guest * kernel. */ if (next && is_pv_vcpu(next) && !is_idle_vcpu(next) && !(next->arch.pv.ctrlreg[4] & X86_CR4_OSXSAVE)) val &= ~((uint64_t)cpufeat_mask(X86_FEATURE_OSXSAVE) << 32); if (unlikely(these_masks->_1cd != val)) { wrmsr_amd(MSR_K8_FEATURE_MASK, val); these_masks->_1cd = val; } } #define LAZY(cap, msr, field) \ ({ \ if (unlikely(these_masks->field != masks->field) && \ ((levelling_caps & cap) == cap)) \ { \ wrmsr_amd(msr, masks->field); \ these_masks->field = masks->field; \ } \ }) LAZY(LCAP_e1cd, MSR_K8_EXT_FEATURE_MASK, e1cd); LAZY(LCAP_7ab0, MSR_AMD_L7S0_FEATURE_MASK, _7ab0); LAZY(LCAP_6c, MSR_AMD_THRM_FEATURE_MASK, _6c); #undef LAZY } /* * Mask the features and extended features returned by CPUID. Parameters are * set from the boot line via two methods: * * 1) Specific processor revision string * 2) User-defined masks * * The user-defined masks take precedence. * * AMD "masking msrs" are actually overrides, making it possible to advertise * features which are not supported by the hardware. Care must be taken to * avoid this, as the accidentally-advertised features will not actually * function. */ static void __init noinline amd_init_levelling(void) { const struct cpuidmask *m = NULL; /* * If there's support for CpuidUserDis or CPUID faulting then * we can skip levelling because CPUID accesses are trapped anyway. * * CPUID faulting is an Intel feature analogous to CpuidUserDis, so * that can only be present when Xen is itself virtualized (because * it can be emulated). * * Note that probing for the Intel feature _first_ isn't a mistake, * but a means to ensure MSR_INTEL_PLATFORM_INFO is read and added * to the raw CPU policy if present. */ if ((cpu_has_hypervisor && probe_cpuid_faulting()) || boot_cpu_has(X86_FEATURE_CPUID_USER_DIS)) { expected_levelling_cap |= LCAP_faulting; levelling_caps |= LCAP_faulting; return; } probe_masking_msrs(); if (*opt_famrev != '\0') { m = get_cpuidmask(opt_famrev); if (!m) printk("Invalid processor string: %s\n", opt_famrev); } if ((levelling_caps & LCAP_1cd) == LCAP_1cd) { uint32_t ecx, edx, tmp; cpuid(0x00000001, &tmp, &tmp, &ecx, &edx); if (~(opt_cpuid_mask_ecx & opt_cpuid_mask_edx)) { ecx &= opt_cpuid_mask_ecx; edx &= opt_cpuid_mask_edx; } else if (m) { ecx &= m->ecx; edx &= m->edx; } /* Fast-forward bits - Must be set. */ if (ecx & cpufeat_mask(X86_FEATURE_XSAVE)) ecx |= cpufeat_mask(X86_FEATURE_OSXSAVE); edx |= cpufeat_mask(X86_FEATURE_APIC); cpuidmask_defaults._1cd = ((uint64_t)ecx << 32) | edx; } if ((levelling_caps & LCAP_e1cd) == LCAP_e1cd) { uint32_t ecx, edx, tmp; cpuid(0x80000001, &tmp, &tmp, &ecx, &edx); if (~(opt_cpuid_mask_ext_ecx & opt_cpuid_mask_ext_edx)) { ecx &= opt_cpuid_mask_ext_ecx; edx &= opt_cpuid_mask_ext_edx; } else if (m) { ecx &= m->ext_ecx; edx &= m->ext_edx; } /* Fast-forward bits - Must be set. */ edx |= cpufeat_mask(X86_FEATURE_APIC); cpuidmask_defaults.e1cd = ((uint64_t)ecx << 32) | edx; } if ((levelling_caps & LCAP_7ab0) == LCAP_7ab0) { uint32_t eax, ebx, tmp; cpuid(0x00000007, &eax, &ebx, &tmp, &tmp); if (~(opt_cpuid_mask_l7s0_eax & opt_cpuid_mask_l7s0_ebx)) { eax &= opt_cpuid_mask_l7s0_eax; ebx &= opt_cpuid_mask_l7s0_ebx; } cpuidmask_defaults._7ab0 &= ((uint64_t)eax << 32) | ebx; } if ((levelling_caps & LCAP_6c) == LCAP_6c) { uint32_t ecx = cpuid_ecx(6); if (~opt_cpuid_mask_thermal_ecx) ecx &= opt_cpuid_mask_thermal_ecx; cpuidmask_defaults._6c &= (~0ULL << 32) | ecx; } if (opt_cpu_info) { printk(XENLOG_INFO "Levelling caps: %#x\n", levelling_caps); printk(XENLOG_INFO "MSR defaults: 1d 0x%08x, 1c 0x%08x, e1d 0x%08x, " "e1c 0x%08x, 7a0 0x%08x, 7b0 0x%08x, 6c 0x%08x\n", (uint32_t)cpuidmask_defaults._1cd, (uint32_t)(cpuidmask_defaults._1cd >> 32), (uint32_t)cpuidmask_defaults.e1cd, (uint32_t)(cpuidmask_defaults.e1cd >> 32), (uint32_t)(cpuidmask_defaults._7ab0 >> 32), (uint32_t)cpuidmask_defaults._7ab0, (uint32_t)cpuidmask_defaults._6c); } if (levelling_caps) ctxt_switch_masking = amd_ctxt_switch_masking; } void amd_set_cpuid_user_dis(bool enable) { const uint64_t bit = K8_HWCR_CPUID_USER_DIS; uint64_t val; rdmsrl(MSR_K8_HWCR, val); if (!!(val & bit) == enable) return; val ^= bit; wrmsrl(MSR_K8_HWCR, val); } /* * Check for the presence of an AMD erratum. Arguments are defined in amd.h * for each known erratum. Return 1 if erratum is found. */ int cpu_has_amd_erratum(const struct cpuinfo_x86 *cpu, int osvw_id, ...) { va_list ap; u32 range; u32 ms; if (cpu->x86_vendor != X86_VENDOR_AMD) return 0; if (osvw_id >= 0 && cpu_has(cpu, X86_FEATURE_OSVW)) { u64 osvw_len; rdmsrl(MSR_AMD_OSVW_ID_LENGTH, osvw_len); if (osvw_id < osvw_len) { u64 osvw_bits; rdmsrl(MSR_AMD_OSVW_STATUS + (osvw_id >> 6), osvw_bits); return (osvw_bits >> (osvw_id & 0x3f)) & 1; } } /* OSVW unavailable or ID unknown, match family-model-stepping range */ va_start(ap, osvw_id); ms = (cpu->x86_model << 4) | cpu->x86_mask; while ((range = va_arg(ap, int))) { if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && (ms >= AMD_MODEL_RANGE_START(range)) && (ms <= AMD_MODEL_RANGE_END(range))) { va_end(ap); return 1; } } va_end(ap); return 0; } /* * Disable C1-Clock ramping if enabled in PMM7.CpuLowPwrEnh on 8th-generation * cores only. Assume BIOS has setup all Northbridges equivalently. */ static void disable_c1_ramping(void) { u8 pmm7; int node, nr_nodes; /* Read the number of nodes from the first Northbridge. */ nr_nodes = ((pci_conf_read32(PCI_SBDF(0, 0, 0x18, 0), 0x60) >> 4) & 0x07) + 1; for (node = 0; node < nr_nodes; node++) { /* PMM7: bus=0, dev=0x18+node, function=0x3, register=0x87. */ pmm7 = pci_conf_read8(PCI_SBDF(0, 0, 0x18 + node, 3), 0x87); /* Invalid read means we've updated every Northbridge. */ if (pmm7 == 0xFF) break; pmm7 &= 0xFC; /* clear pmm7[1:0] */ pci_conf_write8(PCI_SBDF(0, 0, 0x18 + node, 3), 0x87, pmm7); printk ("AMD: Disabling C1 Clock Ramping Node #%x\n", node); } } static void cf_check disable_c1e(void *unused) { uint64_t msr_content; /* * Disable C1E mode, as the APIC timer stops in that mode. * The MSR does not exist in all FamilyF CPUs (only Rev F and above), * but we safely catch the #GP in that case. */ if ((rdmsr_safe(MSR_K8_ENABLE_C1E, msr_content) == 0) && (msr_content & (3ULL << 27)) && (wrmsr_safe(MSR_K8_ENABLE_C1E, msr_content & ~(3ULL << 27)) != 0)) printk(KERN_ERR "Failed to disable C1E on CPU#%u (%16"PRIx64")\n", smp_processor_id(), msr_content); } void amd_check_disable_c1e(unsigned int port, u8 value) { /* C1E is sometimes enabled during entry to ACPI mode. */ if ((port == acpi_smi_cmd) && (value == acpi_enable_value)) on_each_cpu(disable_c1e, NULL, 1); } /* * BIOS is expected to clear MtrrFixDramModEn bit. According to AMD BKDG : * "The MtrrFixDramModEn bit should be set to 1 during BIOS initalization of * the fixed MTRRs, then cleared to 0 for operation." */ static void check_syscfg_dram_mod_en(void) { uint64_t syscfg; static bool_t printed = 0; if (!((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0x0f))) return; rdmsrl(MSR_K8_SYSCFG, syscfg); if (!(syscfg & SYSCFG_MTRR_FIX_DRAM_MOD_EN)) return; if (!test_and_set_bool(printed)) printk(KERN_ERR "MTRR: SYSCFG[MtrrFixDramModEn] not " "cleared by BIOS, clearing this bit\n"); syscfg &= ~SYSCFG_MTRR_FIX_DRAM_MOD_EN; wrmsrl(MSR_K8_SYSCFG, syscfg); } static void amd_get_topology(struct cpuinfo_x86 *c) { int cpu; unsigned bits; if (c->x86_max_cores <= 1) return; /* * On a AMD multi core setup the lower bits of the APIC id * distingush the cores. */ cpu = smp_processor_id(); bits = (cpuid_ecx(0x80000008) >> 12) & 0xf; if (bits == 0) { while ((1 << bits) < c->x86_max_cores) bits++; } /* Low order bits define the core id */ c->cpu_core_id = c->phys_proc_id & ((1<phys_proc_id >>= bits; /* Collect compute unit ID if available */ if (cpu_has(c, X86_FEATURE_TOPOEXT)) { u32 eax, ebx, ecx, edx; cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); c->x86_num_siblings = ((ebx >> 8) & 0xff) + 1; if (c->x86 < 0x17) c->compute_unit_id = ebx & 0xFF; else { c->cpu_core_id = ebx & 0xFF; c->x86_max_cores /= c->x86_num_siblings; } /* * In case leaf B is available, use it to derive * topology information. */ if (detect_extended_topology(c)) return; } if (opt_cpu_info) printk("CPU %d(%d) -> Processor %d, %s %d\n", cpu, c->x86_max_cores, c->phys_proc_id, c->compute_unit_id != INVALID_CUID ? "Compute Unit" : "Core", c->compute_unit_id != INVALID_CUID ? c->compute_unit_id : c->cpu_core_id); } void amd_log_freq(const struct cpuinfo_x86 *c) { unsigned int idx = 0, h; uint64_t hi, lo, val; if (c->x86 < 0x10 || c->x86 > 0x19 || (c != &boot_cpu_data && (!opt_cpu_info || (c->apicid & (c->x86_num_siblings - 1))))) return; if (c->x86 < 0x17) { unsigned int node = 0; uint64_t nbcfg; /* * Make an attempt at determining the node ID, but assume * symmetric setup (using node 0) if this fails. */ if (c->extended_cpuid_level >= 0x8000001e && cpu_has(c, X86_FEATURE_TOPOEXT)) { node = cpuid_ecx(0x8000001e) & 0xff; if (node > 7) node = 0; } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { rdmsrl(0xC001100C, val); node = val & 7; } /* * Enable (and use) Extended Config Space accesses, as we * can't be certain that MCFG is available here during boot. */ rdmsrl(MSR_AMD64_NB_CFG, nbcfg); wrmsrl(MSR_AMD64_NB_CFG, nbcfg | (1ULL << AMD64_NB_CFG_CF8_EXT_ENABLE_BIT)); #define PCI_ECS_ADDRESS(sbdf, reg) \ (0x80000000 | ((sbdf).bdf << 8) | ((reg) & 0xfc) | (((reg) & 0xf00) << 16)) for ( ; ; ) { pci_sbdf_t sbdf = PCI_SBDF(0, 0, 0x18 | node, 4); switch (pci_conf_read32(sbdf, PCI_VENDOR_ID)) { case 0x00000000: case 0xffffffff: /* No device at this SBDF. */ if (!node) break; node = 0; continue; default: /* * Core Performance Boost Control, family * dependent up to 3 bits starting at bit 2. * * Note that boost states operate at a frequency * above the base one, and thus need to be * accounted for in order to correctly fetch the * nominal frequency of the processor. */ switch (c->x86) { case 0x10: idx = 1; break; case 0x12: idx = 7; break; case 0x14: idx = 7; break; case 0x15: idx = 7; break; case 0x16: idx = 7; break; } idx &= pci_conf_read(PCI_ECS_ADDRESS(sbdf, 0x15c), 0, 4) >> 2; break; } break; } #undef PCI_ECS_ADDRESS wrmsrl(MSR_AMD64_NB_CFG, nbcfg); } lo = 0; /* gcc may not recognize the loop having at least 5 iterations */ for (h = c->x86 == 0x10 ? 5 : 8; h--; ) if (!rdmsr_safe(0xC0010064 + h, lo) && (lo >> 63)) break; if (!(lo >> 63)) return; #define FREQ(v) (c->x86 < 0x17 ? ((((v) & 0x3f) + 0x10) * 100) >> (((v) >> 6) & 7) \ : (((v) & 0xff) * 25 * 8) / (((v) >> 8) & 0x3f)) if (idx && idx < h && !rdmsr_safe(0xC0010064 + idx, val) && (val >> 63) && !rdmsr_safe(0xC0010064, hi) && (hi >> 63)) printk("CPU%u: %lu (%lu ... %lu) MHz\n", smp_processor_id(), FREQ(val), FREQ(lo), FREQ(hi)); else if (h && !rdmsr_safe(0xC0010064, hi) && (hi >> 63)) printk("CPU%u: %lu ... %lu MHz\n", smp_processor_id(), FREQ(lo), FREQ(hi)); else printk("CPU%u: %lu MHz\n", smp_processor_id(), FREQ(lo)); #undef FREQ } void cf_check early_init_amd(struct cpuinfo_x86 *c) { if (c == &boot_cpu_data) amd_init_levelling(); ctxt_switch_levelling(NULL); } void amd_init_lfence(struct cpuinfo_x86 *c) { uint64_t value; /* * Some hardware has LFENCE dispatch serialising always enabled, * nothing to do on that case. */ if (test_bit(X86_FEATURE_LFENCE_DISPATCH, c->x86_capability)) return; /* * Attempt to set lfence to be Dispatch Serialising. This MSR almost * certainly isn't virtualised (and Xen at least will leak the real * value in but silently discard writes), as well as being per-core * rather than per-thread, so do a full safe read/write/readback cycle * in the worst case. */ if (rdmsr_safe(MSR_AMD64_DE_CFG, value)) /* Unable to read. Assume the safer default. */ __clear_bit(X86_FEATURE_LFENCE_DISPATCH, c->x86_capability); else if (value & AMD64_DE_CFG_LFENCE_SERIALISE) /* Already dispatch serialising. */ __set_bit(X86_FEATURE_LFENCE_DISPATCH, c->x86_capability); else if (wrmsr_safe(MSR_AMD64_DE_CFG, value | AMD64_DE_CFG_LFENCE_SERIALISE) || rdmsr_safe(MSR_AMD64_DE_CFG, value) || !(value & AMD64_DE_CFG_LFENCE_SERIALISE)) /* Attempt to set failed. Assume the safer default. */ __clear_bit(X86_FEATURE_LFENCE_DISPATCH, c->x86_capability); else /* Successfully enabled! */ __set_bit(X86_FEATURE_LFENCE_DISPATCH, c->x86_capability); } /* * Refer to the AMD Speculative Store Bypass whitepaper: * https://developer.amd.com/wp-content/resources/124441_AMD64_SpeculativeStoreBypassDisable_Whitepaper_final.pdf */ static bool set_legacy_ssbd(const struct cpuinfo_x86 *c, bool enable) { int bit = -1; switch (c->x86) { case 0x15: bit = 54; break; case 0x16: bit = 33; break; case 0x17: case 0x18: bit = 10; break; } if (bit >= 0) { uint64_t val, mask = 1ull << bit; if (rdmsr_safe(MSR_AMD64_LS_CFG, val) || ({ val &= ~mask; if (enable) val |= mask; false; }) || wrmsr_safe(MSR_AMD64_LS_CFG, val) || ({ rdmsrl(MSR_AMD64_LS_CFG, val); (val & mask) != (enable * mask); })) bit = -1; } return bit >= 0; } void amd_init_ssbd(const struct cpuinfo_x86 *c) { if (cpu_has_ssb_no) return; if (cpu_has_amd_ssbd) { /* Handled by common MSR_SPEC_CTRL logic */ return; } if (cpu_has_virt_ssbd) { wrmsrl(MSR_VIRT_SPEC_CTRL, opt_ssbd ? SPEC_CTRL_SSBD : 0); return; } if (!set_legacy_ssbd(c, opt_ssbd)) { printk_once(XENLOG_ERR "No SSBD controls available\n"); if (amd_legacy_ssbd) panic("CPU feature mismatch: no legacy SSBD\n"); } else if (c == &boot_cpu_data) amd_legacy_ssbd = true; } static struct ssbd_ls_cfg { spinlock_t lock; unsigned int count; } __cacheline_aligned *ssbd_ls_cfg; static unsigned int __ro_after_init ssbd_max_cores; #define AMD_FAM17H_MAX_SOCKETS 2 bool __init amd_setup_legacy_ssbd(void) { unsigned int i; if ((boot_cpu_data.x86 != 0x17 && boot_cpu_data.x86 != 0x18) || boot_cpu_data.x86_num_siblings <= 1 || opt_ssbd) return true; /* * One could be forgiven for thinking that c->x86_max_cores is the * correct value to use here. * * However, that value is derived from the current configuration, and * c->cpu_core_id is sparse on all but the top end CPUs. Derive * max_cpus from ApicIdCoreIdSize which will cover any sparseness. */ if (boot_cpu_data.extended_cpuid_level >= 0x80000008) { ssbd_max_cores = 1u << MASK_EXTR(cpuid_ecx(0x80000008), 0xf000); ssbd_max_cores /= boot_cpu_data.x86_num_siblings; } if (!ssbd_max_cores) return false; ssbd_ls_cfg = xzalloc_array(struct ssbd_ls_cfg, ssbd_max_cores * AMD_FAM17H_MAX_SOCKETS); if (!ssbd_ls_cfg) return false; for (i = 0; i < ssbd_max_cores * AMD_FAM17H_MAX_SOCKETS; i++) spin_lock_init(&ssbd_ls_cfg[i].lock); return true; } /* * legacy_ssbd is always initialized to false because when SSBD is set * from the command line guest attempts to change it are a no-op (see * amd_set_legacy_ssbd()), whereas when SSBD is inactive hardware will * be forced into that mode (see amd_init_ssbd()). */ static DEFINE_PER_CPU(bool, legacy_ssbd); /* Must be called only when the SSBD setting needs toggling. */ static void core_set_legacy_ssbd(bool enable) { const struct cpuinfo_x86 *c = ¤t_cpu_data; struct ssbd_ls_cfg *status; unsigned long flags; BUG_ON(this_cpu(legacy_ssbd) == enable); if ((c->x86 != 0x17 && c->x86 != 0x18) || c->x86_num_siblings <= 1) { BUG_ON(!set_legacy_ssbd(c, enable)); return; } BUG_ON(c->phys_proc_id >= AMD_FAM17H_MAX_SOCKETS); BUG_ON(c->cpu_core_id >= ssbd_max_cores); status = &ssbd_ls_cfg[c->phys_proc_id * ssbd_max_cores + c->cpu_core_id]; spin_lock_irqsave(&status->lock, flags); status->count += enable ? 1 : -1; ASSERT(status->count <= c->x86_num_siblings); if (enable ? status->count == 1 : !status->count) BUG_ON(!set_legacy_ssbd(c, enable)); spin_unlock_irqrestore(&status->lock, flags); } void amd_set_legacy_ssbd(bool enable) { if (opt_ssbd) /* * Ignore attempts to turn off SSBD, it's hardcoded on the * command line. */ return; if (this_cpu(legacy_ssbd) == enable) return; if (cpu_has_virt_ssbd) wrmsr(MSR_VIRT_SPEC_CTRL, enable ? SPEC_CTRL_SSBD : 0, 0); else if (amd_legacy_ssbd) core_set_legacy_ssbd(enable); else ASSERT_UNREACHABLE(); this_cpu(legacy_ssbd) = enable; } /* * On Zen2 we offer this chicken (bit) on the altar of Speculation. * * Refer to the AMD Branch Type Confusion whitepaper: * https://www.amd.com/system/files/documents/technical-guidance-for-mitigating-branch-type-confusion.pdf * * Setting this unnamed bit supposedly causes prediction information on * non-branch instructions to be ignored. It is to be set unilaterally in * newer microcode. * * This chickenbit is something unrelated on Zen1, and Zen1 vs Zen2 isn't a * simple model number comparison, so use STIBP as a heuristic to separate the * two uarches in Fam17h(AMD)/18h(Hygon). */ void amd_init_spectral_chicken(void) { uint64_t val, chickenbit = 1 << 1; if (cpu_has_hypervisor || !boot_cpu_has(X86_FEATURE_AMD_STIBP)) return; if (rdmsr_safe(MSR_AMD64_DE_CFG2, val) == 0 && !(val & chickenbit)) wrmsr_safe(MSR_AMD64_DE_CFG2, val | chickenbit); } void __init detect_zen2_null_seg_behaviour(void) { uint64_t base; wrmsrl(MSR_FS_BASE, 1); asm volatile ( "mov %0, %%fs" :: "r" (0) ); rdmsrl(MSR_FS_BASE, base); if (base == 0) setup_force_cpu_cap(X86_FEATURE_NSCB); } static void cf_check init_amd(struct cpuinfo_x86 *c) { u32 l, h; unsigned long long value; /* Disable TLB flush filter by setting HWCR.FFDIS on K8 * bit 6 of msr C001_0015 * * Errata 63 for SH-B3 steppings * Errata 122 for all steppings (F+ have it disabled by default) */ if (c->x86 == 15) { rdmsrl(MSR_K8_HWCR, value); value |= 1 << 6; wrmsrl(MSR_K8_HWCR, value); } /* * Some AMD CPUs duplicate the 3DNow bit in base and extended CPUID * leaves. Unfortunately, this aliases PBE on Intel CPUs. Clobber the * alias, leaving 3DNow in the extended leaf. */ __clear_bit(X86_FEATURE_PBE, c->x86_capability); if (c->x86 == 0xf && c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) { /* * Some BIOSes incorrectly force this feature, but only K8 * revision D (model = 0x14) and later actually support it. * (AMD Erratum #110, docId: 25759). */ __clear_bit(X86_FEATURE_LAHF_LM, c->x86_capability); if (!rdmsr_amd_safe(0xc001100d, &l, &h)) wrmsr_amd_safe(0xc001100d, l, h & ~1); } /* * Older AMD CPUs don't save/load FOP/FIP/FDP unless an FPU exception * is pending. Xen works around this at (F)XRSTOR time. */ if (c == &boot_cpu_data && !cpu_has(c, X86_FEATURE_RSTR_FP_ERR_PTRS)) setup_force_cpu_cap(X86_BUG_FPU_PTRS); if (c->x86 == 0x0f || c->x86 == 0x11) /* Always dispatch serialising on this hardare. */ __set_bit(X86_FEATURE_LFENCE_DISPATCH, c->x86_capability); else /* Implicily "== 0x10 || >= 0x12" by being 64bit. */ amd_init_lfence(c); amd_init_ssbd(c); if (c->x86 == 0x17) amd_init_spectral_chicken(); /* Probe for NSCB on Zen2 CPUs when not virtualised */ if (!cpu_has_hypervisor && !cpu_has_nscb && c == &boot_cpu_data && c->x86 == 0x17) detect_zen2_null_seg_behaviour(); /* * AMD CPUs before Zen2 don't clear segment bases/limits when loading * a NULL selector. */ if (c == &boot_cpu_data && !cpu_has_nscb) setup_force_cpu_cap(X86_BUG_NULL_SEG); /* MFENCE stops RDTSC speculation */ if (!cpu_has_lfence_dispatch) __set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability); /* * On pre-CLFLUSHOPT AMD CPUs, CLFLUSH is weakly ordered with * everything, including reads and writes to address, and * LFENCE/SFENCE instructions. */ if (c == &boot_cpu_data && !cpu_has_clflushopt) setup_force_cpu_cap(X86_BUG_CLFLUSH_MFENCE); switch(c->x86) { case 0xf ... 0x11: disable_c1e(NULL); if (acpi_smi_cmd && (acpi_enable_value | acpi_disable_value)) amd_acpi_c1e_quirk = true; break; case 0x15: case 0x16: /* * There are some Fam15/Fam16 systems where upon resume from S3 * firmware fails to re-setup properly functioning RDRAND. * By the time we can spot the problem, it is too late to take * action, and there is nothing Xen can do to repair the problem. * Clear the feature unless force-enabled on the command line. */ if (c == &boot_cpu_data && cpu_has(c, X86_FEATURE_RDRAND) && !is_forced_cpu_cap(X86_FEATURE_RDRAND)) { static const char __initconst text[] = "RDRAND may cease to work on this hardware upon resume from S3.\n" "Please choose an explicit cpuid={no-}rdrand setting.\n"; setup_clear_cpu_cap(X86_FEATURE_RDRAND); warning_add(text); } break; case 0x19: /* * Zen3 (Fam19h model < 0x10) parts are not susceptible to * Branch Type Confusion, but predate the allocation of the * BTC_NO bit. Fill it back in if we're not virtualised. */ if (!cpu_has_hypervisor && !cpu_has(c, X86_FEATURE_BTC_NO)) __set_bit(X86_FEATURE_BTC_NO, c->x86_capability); break; } display_cacheinfo(c); if (c->extended_cpuid_level >= 0x80000008) { c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; } if (c->extended_cpuid_level >= 0x80000007) { if (cpu_has(c, X86_FEATURE_ITSC)) { __set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); __set_bit(X86_FEATURE_NONSTOP_TSC, c->x86_capability); if (c->x86 != 0x11) __set_bit(X86_FEATURE_TSC_RELIABLE, c->x86_capability); } } /* re-enable TopologyExtensions if switched off by BIOS */ if ((c->x86 == 0x15) && (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) && !cpu_has(c, X86_FEATURE_TOPOEXT) && !rdmsr_safe(MSR_K8_EXT_FEATURE_MASK, value)) { value |= 1ULL << 54; wrmsr_safe(MSR_K8_EXT_FEATURE_MASK, value); rdmsrl(MSR_K8_EXT_FEATURE_MASK, value); if (value & (1ULL << 54)) { __set_bit(X86_FEATURE_TOPOEXT, c->x86_capability); printk(KERN_INFO "CPU: Re-enabling disabled " "Topology Extensions Support\n"); } } /* * The way access filter has a performance penalty on some workloads. * Disable it on the affected CPUs. */ if (c->x86 == 0x15 && c->x86_model >= 0x02 && c->x86_model < 0x20 && !rdmsr_safe(MSR_AMD64_IC_CFG, value) && (value & 0x1e) != 0x1e) wrmsr_safe(MSR_AMD64_IC_CFG, value | 0x1e); amd_get_topology(c); /* Pointless to use MWAIT on Family10 as it does not deep sleep. */ if (c->x86 == 0x10) __clear_bit(X86_FEATURE_MONITOR, c->x86_capability); if (!cpu_has_amd_erratum(c, AMD_ERRATUM_121)) opt_allow_unsafe = 1; else if (opt_allow_unsafe < 0) panic("Xen will not boot on this CPU for security reasons" "Pass \"allow_unsafe\" if you're trusting all your" " (PV) guest kernels.\n"); else if (!opt_allow_unsafe && c == &boot_cpu_data) printk(KERN_WARNING "*** Xen will not allow creation of DomU-s on" " this CPU for security reasons. ***\n" KERN_WARNING "*** Pass \"allow_unsafe\" if you're trusting" " all your (PV) guest kernels. ***\n"); if (c->x86 == 0x16 && c->x86_model <= 0xf) { if (c == &boot_cpu_data) { l = pci_conf_read32(PCI_SBDF(0, 0, 0x18, 3), 0x58); h = pci_conf_read32(PCI_SBDF(0, 0, 0x18, 3), 0x5c); if ((l & 0x1f) | (h & 0x1)) printk(KERN_WARNING "Applying workaround for erratum 792: %s%s%s\n", (l & 0x1f) ? "clearing D18F3x58[4:0]" : "", ((l & 0x1f) && (h & 0x1)) ? " and " : "", (h & 0x1) ? "clearing D18F3x5C[0]" : ""); if (l & 0x1f) pci_conf_write32(PCI_SBDF(0, 0, 0x18, 3), 0x58, l & ~0x1f); if (h & 0x1) pci_conf_write32(PCI_SBDF(0, 0, 0x18, 3), 0x5c, h & ~0x1); } rdmsrl(MSR_AMD64_LS_CFG, value); if (!(value & (1 << 15))) { static bool_t warned; if (c == &boot_cpu_data || opt_cpu_info || !test_and_set_bool(warned)) printk(KERN_WARNING "CPU%u: Applying workaround for erratum 793\n", smp_processor_id()); wrmsrl(MSR_AMD64_LS_CFG, value | (1 << 15)); } } else if (c->x86 == 0x12) { rdmsrl(MSR_AMD64_DE_CFG, value); if (!(value & (1U << 31))) { static bool warned; if (c == &boot_cpu_data || opt_cpu_info || !test_and_set_bool(warned)) printk(KERN_WARNING "CPU%u: Applying workaround for erratum 665\n", smp_processor_id()); wrmsrl(MSR_AMD64_DE_CFG, value | (1U << 31)); } } /* AMD CPUs do not support SYSENTER outside of legacy mode. */ __clear_bit(X86_FEATURE_SEP, c->x86_capability); if (c->x86 == 0x10) { /* do this for boot cpu */ if (c == &boot_cpu_data) check_enable_amd_mmconf_dmi(); fam10h_check_enable_mmcfg(); /* * On family 10h BIOS may not have properly enabled WC+ * support, causing it to be converted to CD memtype. This may * result in performance degradation for certain nested-paging * guests. Prevent this conversion by clearing bit 24 in * MSR_F10_BU_CFG2. */ rdmsrl(MSR_F10_BU_CFG2, value); value &= ~(1ULL << 24); wrmsrl(MSR_F10_BU_CFG2, value); } /* * Family 0x12 and above processors have APIC timer * running in deep C states. */ if ( opt_arat && c->x86 > 0x11 ) __set_bit(X86_FEATURE_ARAT, c->x86_capability); /* * Prior to Family 0x14, perf counters are not reset during warm reboot. * We have to reset them manually. */ if (nmi_watchdog != NMI_LOCAL_APIC && c->x86 < 0x14) { wrmsrl(MSR_K7_PERFCTR0, 0); wrmsrl(MSR_K7_PERFCTR1, 0); wrmsrl(MSR_K7_PERFCTR2, 0); wrmsrl(MSR_K7_PERFCTR3, 0); } if (cpu_has(c, X86_FEATURE_EFRO)) { rdmsr(MSR_K8_HWCR, l, h); l |= (1 << 27); /* Enable read-only APERF/MPERF bit */ wrmsr(MSR_K8_HWCR, l, h); } /* Prevent TSC drift in non single-processor, single-core platforms. */ if ((smp_processor_id() == 1) && !cpu_has(c, X86_FEATURE_ITSC)) disable_c1_ramping(); check_syscfg_dram_mod_en(); amd_log_freq(c); } const struct cpu_dev amd_cpu_dev = { .c_early_init = early_init_amd, .c_init = init_amd, };