diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-17 20:03:47 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-17 20:03:47 -0700 |
commit | 588ab3f9afdfa1a6b1e5761c858b2c4ab6098285 (patch) | |
tree | c9aa4c4f8a63d25c3cf05330c68948dceec79cc2 /arch/arm64/kernel | |
parent | 3d15cfdb1b77536c205d8e49c0312219ddf162ec (diff) | |
parent | 2776e0e8ef683a42fe3e9a5facf576b73579700e (diff) | |
download | linux-588ab3f9afdfa1a6b1e5761c858b2c4ab6098285.tar.gz |
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Catalin Marinas:
"Here are the main arm64 updates for 4.6. There are some relatively
intrusive changes to support KASLR, the reworking of the kernel
virtual memory layout and initial page table creation.
Summary:
- Initial page table creation reworked to avoid breaking large block
mappings (huge pages) into smaller ones. The ARM architecture
requires break-before-make in such cases to avoid TLB conflicts but
that's not always possible on live page tables
- Kernel virtual memory layout: the kernel image is no longer linked
to the bottom of the linear mapping (PAGE_OFFSET) but at the bottom
of the vmalloc space, allowing the kernel to be loaded (nearly)
anywhere in physical RAM
- Kernel ASLR: position independent kernel Image and modules being
randomly mapped in the vmalloc space with the randomness is
provided by UEFI (efi_get_random_bytes() patches merged via the
arm64 tree, acked by Matt Fleming)
- Implement relative exception tables for arm64, required by KASLR
(initial code for ARCH_HAS_RELATIVE_EXTABLE added to lib/extable.c
but actual x86 conversion to deferred to 4.7 because of the merge
dependencies)
- Support for the User Access Override feature of ARMv8.2: this
allows uaccess functions (get_user etc.) to be implemented using
LDTR/STTR instructions. Such instructions, when run by the kernel,
perform unprivileged accesses adding an extra level of protection.
The set_fs() macro is used to "upgrade" such instruction to
privileged accesses via the UAO bit
- Half-precision floating point support (part of ARMv8.2)
- Optimisations for CPUs with or without a hardware prefetcher (using
run-time code patching)
- copy_page performance improvement to deal with 128 bytes at a time
- Sanity checks on the CPU capabilities (via CPUID) to prevent
incompatible secondary CPUs from being brought up (e.g. weird
big.LITTLE configurations)
- valid_user_regs() reworked for better sanity check of the
sigcontext information (restored pstate information)
- ACPI parking protocol implementation
- CONFIG_DEBUG_RODATA enabled by default
- VDSO code marked as read-only
- DEBUG_PAGEALLOC support
- ARCH_HAS_UBSAN_SANITIZE_ALL enabled
- Erratum workaround Cavium ThunderX SoC
- set_pte_at() fix for PROT_NONE mappings
- Code clean-ups"
* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (99 commits)
arm64: kasan: Fix zero shadow mapping overriding kernel image shadow
arm64: kasan: Use actual memory node when populating the kernel image shadow
arm64: Update PTE_RDONLY in set_pte_at() for PROT_NONE permission
arm64: Fix misspellings in comments.
arm64: efi: add missing frame pointer assignment
arm64: make mrs_s prefixing implicit in read_cpuid
arm64: enable CONFIG_DEBUG_RODATA by default
arm64: Rework valid_user_regs
arm64: mm: check at build time that PAGE_OFFSET divides the VA space evenly
arm64: KVM: Move kvm_call_hyp back to its original localtion
arm64: mm: treat memstart_addr as a signed quantity
arm64: mm: list kernel sections in order
arm64: lse: deal with clobbered IP registers after branch via PLT
arm64: mm: dump: Use VA_START directly instead of private LOWEST_ADDR
arm64: kconfig: add submenu for 8.2 architectural features
arm64: kernel: acpi: fix ioremap in ACPI parking protocol cpu_postboot
arm64: Add support for Half precision floating point
arm64: Remove fixmap include fragility
arm64: Add workaround for Cavium erratum 27456
arm64: mm: Mark .rodata as RO
...
Diffstat (limited to 'arch/arm64/kernel')
27 files changed, 1202 insertions, 220 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 83cd7e68e83b..3793003e16a2 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -30,6 +30,7 @@ arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ ../../arm/kernel/opcodes.o arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o +arm64-obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o @@ -41,7 +42,9 @@ arm64-obj-$(CONFIG_EFI) += efi.o efi-entry.stub.o arm64-obj-$(CONFIG_PCI) += pci.o arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o arm64-obj-$(CONFIG_ACPI) += acpi.o +arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o +arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/acpi_parking_protocol.c b/arch/arm64/kernel/acpi_parking_protocol.c new file mode 100644 index 000000000000..a32b4011d711 --- /dev/null +++ b/arch/arm64/kernel/acpi_parking_protocol.c @@ -0,0 +1,141 @@ +/* + * ARM64 ACPI Parking Protocol implementation + * + * Authors: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> + * Mark Salter <msalter@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/acpi.h> +#include <linux/types.h> + +#include <asm/cpu_ops.h> + +struct parking_protocol_mailbox { + __le32 cpu_id; + __le32 reserved; + __le64 entry_point; +}; + +struct cpu_mailbox_entry { + struct parking_protocol_mailbox __iomem *mailbox; + phys_addr_t mailbox_addr; + u8 version; + u8 gic_cpu_id; +}; + +static struct cpu_mailbox_entry cpu_mailbox_entries[NR_CPUS]; + +void __init acpi_set_mailbox_entry(int cpu, + struct acpi_madt_generic_interrupt *p) +{ + struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu]; + + cpu_entry->mailbox_addr = p->parked_address; + cpu_entry->version = p->parking_version; + cpu_entry->gic_cpu_id = p->cpu_interface_number; +} + +bool acpi_parking_protocol_valid(int cpu) +{ + struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu]; + + return cpu_entry->mailbox_addr && cpu_entry->version; +} + +static int acpi_parking_protocol_cpu_init(unsigned int cpu) +{ + pr_debug("%s: ACPI parked addr=%llx\n", __func__, + cpu_mailbox_entries[cpu].mailbox_addr); + + return 0; +} + +static int acpi_parking_protocol_cpu_prepare(unsigned int cpu) +{ + return 0; +} + +static int acpi_parking_protocol_cpu_boot(unsigned int cpu) +{ + struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu]; + struct parking_protocol_mailbox __iomem *mailbox; + __le32 cpu_id; + + /* + * Map mailbox memory with attribute device nGnRE (ie ioremap - + * this deviates from the parking protocol specifications since + * the mailboxes are required to be mapped nGnRnE; the attribute + * discrepancy is harmless insofar as the protocol specification + * is concerned). + * If the mailbox is mistakenly allocated in the linear mapping + * by FW ioremap will fail since the mapping will be prevented + * by the kernel (it clashes with the linear mapping attributes + * specifications). + */ + mailbox = ioremap(cpu_entry->mailbox_addr, sizeof(*mailbox)); + if (!mailbox) + return -EIO; + + cpu_id = readl_relaxed(&mailbox->cpu_id); + /* + * Check if firmware has set-up the mailbox entry properly + * before kickstarting the respective cpu. + */ + if (cpu_id != ~0U) { + iounmap(mailbox); + return -ENXIO; + } + + /* + * stash the mailbox address mapping to use it for further FW + * checks in the postboot method + */ + cpu_entry->mailbox = mailbox; + + /* + * We write the entry point and cpu id as LE regardless of the + * native endianness of the kernel. Therefore, any boot-loaders + * that read this address need to convert this address to the + * Boot-Loader's endianness before jumping. + */ + writeq_relaxed(__pa(secondary_entry), &mailbox->entry_point); + writel_relaxed(cpu_entry->gic_cpu_id, &mailbox->cpu_id); + + arch_send_wakeup_ipi_mask(cpumask_of(cpu)); + + return 0; +} + +static void acpi_parking_protocol_cpu_postboot(void) +{ + int cpu = smp_processor_id(); + struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu]; + struct parking_protocol_mailbox __iomem *mailbox = cpu_entry->mailbox; + __le64 entry_point; + + entry_point = readl_relaxed(&mailbox->entry_point); + /* + * Check if firmware has cleared the entry_point as expected + * by the protocol specification. + */ + WARN_ON(entry_point); +} + +const struct cpu_operations acpi_parking_protocol_ops = { + .name = "parking-protocol", + .cpu_init = acpi_parking_protocol_cpu_init, + .cpu_prepare = acpi_parking_protocol_cpu_prepare, + .cpu_boot = acpi_parking_protocol_cpu_boot, + .cpu_postboot = acpi_parking_protocol_cpu_postboot +}; diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 3e01207917b1..c37202c0c838 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -297,11 +297,8 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table) "4: mov %w0, %w5\n" \ " b 3b\n" \ " .popsection" \ - " .pushsection __ex_table,\"a\"\n" \ - " .align 3\n" \ - " .quad 0b, 4b\n" \ - " .quad 1b, 4b\n" \ - " .popsection\n" \ + _ASM_EXTABLE(0b, 4b) \ + _ASM_EXTABLE(1b, 4b) \ ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) \ : "=&r" (res), "+r" (data), "=&r" (temp) \ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index b0ab4e93db0d..3ae6b310ac9b 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -104,6 +104,8 @@ int main(void) DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); BLANK(); + DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack)); + BLANK(); #ifdef CONFIG_KVM_ARM_HOST DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index feb6b4efa641..06afd04e02c0 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -21,24 +21,12 @@ #include <asm/cputype.h> #include <asm/cpufeature.h> -#define MIDR_CORTEX_A53 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) -#define MIDR_CORTEX_A57 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) -#define MIDR_THUNDERX MIDR_CPU_PART(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) - -#define CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \ - MIDR_ARCHITECTURE_MASK) - static bool __maybe_unused is_affected_midr_range(const struct arm64_cpu_capabilities *entry) { - u32 midr = read_cpuid_id(); - - if ((midr & CPU_MODEL_MASK) != entry->midr_model) - return false; - - midr &= MIDR_REVISION_MASK | MIDR_VARIANT_MASK; - - return (midr >= entry->midr_range_min && midr <= entry->midr_range_max); + return MIDR_IS_CPU_MODEL_RANGE(read_cpuid_id(), entry->midr_model, + entry->midr_range_min, + entry->midr_range_max); } #define MIDR_RANGE(model, min, max) \ @@ -100,6 +88,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = { MIDR_RANGE(MIDR_THUNDERX, 0x00, 0x01), }, #endif +#ifdef CONFIG_CAVIUM_ERRATUM_27456 + { + /* Cavium ThunderX, T88 pass 1.x - 2.1 */ + .desc = "Cavium erratum 27456", + .capability = ARM64_WORKAROUND_CAVIUM_27456, + MIDR_RANGE(MIDR_THUNDERX, 0x00, + (1 << MIDR_VARIANT_SHIFT) | 1), + }, +#endif { } }; diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index b6bd7d447768..c7cfb8fe06f9 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -25,19 +25,30 @@ #include <asm/smp_plat.h> extern const struct cpu_operations smp_spin_table_ops; +extern const struct cpu_operations acpi_parking_protocol_ops; extern const struct cpu_operations cpu_psci_ops; const struct cpu_operations *cpu_ops[NR_CPUS]; -static const struct cpu_operations *supported_cpu_ops[] __initconst = { +static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = { &smp_spin_table_ops, &cpu_psci_ops, NULL, }; +static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = { +#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL + &acpi_parking_protocol_ops, +#endif + &cpu_psci_ops, + NULL, +}; + static const struct cpu_operations * __init cpu_get_ops(const char *name) { - const struct cpu_operations **ops = supported_cpu_ops; + const struct cpu_operations **ops; + + ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops; while (*ops) { if (!strcmp(name, (*ops)->name)) @@ -75,8 +86,16 @@ static const char *__init cpu_read_enable_method(int cpu) } } else { enable_method = acpi_get_enable_method(cpu); - if (!enable_method) - pr_err("Unsupported ACPI enable-method\n"); + if (!enable_method) { + /* + * In ACPI systems the boot CPU does not require + * checking the enable method since for some + * boot protocol (ie parking protocol) it need not + * be initialized. Don't warn spuriously. + */ + if (cpu != 0) + pr_err("Unsupported ACPI enable-method\n"); + } } return enable_method; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index ba745199297e..943f5140e0f3 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -24,6 +24,7 @@ #include <asm/cpu.h> #include <asm/cpufeature.h> #include <asm/cpu_ops.h> +#include <asm/mmu_context.h> #include <asm/processor.h> #include <asm/sysreg.h> #include <asm/virt.h> @@ -55,19 +56,23 @@ DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); .safe_val = SAFE_VAL, \ } -/* Define a feature with signed values */ +/* Define a feature with unsigned values */ #define ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ - __ARM64_FTR_BITS(FTR_SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) - -/* Define a feature with unsigned value */ -#define U_ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ __ARM64_FTR_BITS(FTR_UNSIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) +/* Define a feature with a signed value */ +#define S_ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ + __ARM64_FTR_BITS(FTR_SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) + #define ARM64_FTR_END \ { \ .width = 0, \ } +/* meta feature for alternatives */ +static bool __maybe_unused +cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry); + static struct arm64_ftr_bits ftr_id_aa64isar0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0), @@ -85,8 +90,8 @@ static struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), + S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), + S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), /* Linux doesn't care about the EL3 */ ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64PFR0_EL3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL2_SHIFT, 4, 0), @@ -97,8 +102,8 @@ static struct arm64_ftr_bits ftr_id_aa64pfr0[] = { static struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), + S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), + S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0), /* Linux shouldn't care about secure memory */ @@ -109,7 +114,7 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { * Differing PARange is fine as long as all peripherals and memory are mapped * within the minimum PARange of all CPUs */ - U_ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -124,29 +129,34 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { ARM64_FTR_END, }; +static struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_UAO_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static struct arm64_ftr_bits ftr_ctr[] = { - U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 3, 0), - U_ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */ - U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */ - U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */ + ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */ + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */ + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */ /* * Linux can handle differing I-cache policies. Userspace JITs will * make use of *minLine */ - U_ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0), /* L1Ip */ + ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0), /* L1Ip */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 10, 0), /* RAZ */ - U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */ + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */ ARM64_FTR_END, }; static struct arm64_ftr_bits ftr_id_mmfr0[] = { - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0), /* InnerShr */ + S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0xf), /* InnerShr */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0), /* FCSE */ ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0), /* AuxReg */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 4, 0), /* TCM */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0), /* ShareLvl */ - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0), /* OuterShr */ + S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0xf), /* OuterShr */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* PMSA */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* VMSA */ ARM64_FTR_END, @@ -154,12 +164,12 @@ static struct arm64_ftr_bits ftr_id_mmfr0[] = { static struct arm64_ftr_bits ftr_id_aa64dfr0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), - U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0), - U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0), - U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0), - U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0), - U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0), - U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0), + S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6), ARM64_FTR_END, }; @@ -205,6 +215,18 @@ static struct arm64_ftr_bits ftr_id_pfr0[] = { ARM64_FTR_END, }; +static struct arm64_ftr_bits ftr_id_dfr0[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0), + S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf), /* PerfMon */ + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), + ARM64_FTR_END, +}; + /* * Common ftr bits for a 32bit register with all hidden, strict * attributes, with 4bit feature fields and a default safe value of @@ -250,7 +272,7 @@ static struct arm64_ftr_reg arm64_ftr_regs[] = { /* Op1 = 0, CRn = 0, CRm = 1 */ ARM64_FTR_REG(SYS_ID_PFR0_EL1, ftr_id_pfr0), ARM64_FTR_REG(SYS_ID_PFR1_EL1, ftr_generic_32bits), - ARM64_FTR_REG(SYS_ID_DFR0_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_DFR0_EL1, ftr_id_dfr0), ARM64_FTR_REG(SYS_ID_MMFR0_EL1, ftr_id_mmfr0), ARM64_FTR_REG(SYS_ID_MMFR1_EL1, ftr_generic_32bits), ARM64_FTR_REG(SYS_ID_MMFR2_EL1, ftr_generic_32bits), @@ -285,6 +307,7 @@ static struct arm64_ftr_reg arm64_ftr_regs[] = { /* Op1 = 0, CRn = 0, CRm = 7 */ ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0), ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1), + ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2), /* Op1 = 3, CRn = 0, CRm = 0 */ ARM64_FTR_REG(SYS_CTR_EL0, ftr_ctr), @@ -409,6 +432,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1); init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0); init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1); + init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2); init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0); init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1); init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0); @@ -518,6 +542,8 @@ void update_cpu_features(int cpu, info->reg_id_aa64mmfr0, boot->reg_id_aa64mmfr0); taint |= check_update_ftr_reg(SYS_ID_AA64MMFR1_EL1, cpu, info->reg_id_aa64mmfr1, boot->reg_id_aa64mmfr1); + taint |= check_update_ftr_reg(SYS_ID_AA64MMFR2_EL1, cpu, + info->reg_id_aa64mmfr2, boot->reg_id_aa64mmfr2); /* * EL3 is not our concern. @@ -593,7 +619,7 @@ u64 read_system_reg(u32 id) static bool feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) { - int val = cpuid_feature_extract_field(reg, entry->field_pos); + int val = cpuid_feature_extract_field(reg, entry->field_pos, entry->sign); return val >= entry->min_field_value; } @@ -622,6 +648,18 @@ static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry) return has_sre; } +static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry) +{ + u32 midr = read_cpuid_id(); + u32 rv_min, rv_max; + + /* Cavium ThunderX pass 1.x and 2.x */ + rv_min = 0; + rv_max = (1 << MIDR_VARIANT_SHIFT) | MIDR_REVISION_MASK; + + return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX, rv_min, rv_max); +} + static bool runs_at_el2(const struct arm64_cpu_capabilities *entry) { return is_kernel_in_hyp_mode(); @@ -634,6 +672,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_useable_gicv3_cpuif, .sys_reg = SYS_ID_AA64PFR0_EL1, .field_pos = ID_AA64PFR0_GIC_SHIFT, + .sign = FTR_UNSIGNED, .min_field_value = 1, }, #ifdef CONFIG_ARM64_PAN @@ -643,6 +682,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR1_EL1, .field_pos = ID_AA64MMFR1_PAN_SHIFT, + .sign = FTR_UNSIGNED, .min_field_value = 1, .enable = cpu_enable_pan, }, @@ -654,10 +694,33 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, .field_pos = ID_AA64ISAR0_ATOMICS_SHIFT, + .sign = FTR_UNSIGNED, .min_field_value = 2, }, #endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */ { + .desc = "Software prefetching using PRFM", + .capability = ARM64_HAS_NO_HW_PREFETCH, + .matches = has_no_hw_prefetch, + }, +#ifdef CONFIG_ARM64_UAO + { + .desc = "User Access Override", + .capability = ARM64_HAS_UAO, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64MMFR2_EL1, + .field_pos = ID_AA64MMFR2_UAO_SHIFT, + .min_field_value = 1, + .enable = cpu_enable_uao, + }, +#endif /* CONFIG_ARM64_UAO */ +#ifdef CONFIG_ARM64_PAN + { + .capability = ARM64_ALT_PAN_NOT_UAO, + .matches = cpufeature_pan_not_uao, + }, +#endif /* CONFIG_ARM64_PAN */ + { .desc = "Virtualization Host Extensions", .capability = ARM64_HAS_VIRT_HOST_EXTN, .matches = runs_at_el2, @@ -665,32 +728,35 @@ static const struct arm64_cpu_capabilities arm64_features[] = { {}, }; -#define HWCAP_CAP(reg, field, min_value, type, cap) \ +#define HWCAP_CAP(reg, field, s, min_value, type, cap) \ { \ .desc = #cap, \ .matches = has_cpuid_feature, \ .sys_reg = reg, \ .field_pos = field, \ + .sign = s, \ .min_field_value = min_value, \ .hwcap_type = type, \ .hwcap = cap, \ } static const struct arm64_cpu_capabilities arm64_hwcaps[] = { - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 2, CAP_HWCAP, HWCAP_PMULL), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 1, CAP_HWCAP, HWCAP_AES), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, 1, CAP_HWCAP, HWCAP_SHA1), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 1, CAP_HWCAP, HWCAP_SHA2), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, 1, CAP_HWCAP, HWCAP_CRC32), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, 2, CAP_HWCAP, HWCAP_ATOMICS), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 0, CAP_HWCAP, HWCAP_FP), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 0, CAP_HWCAP, HWCAP_ASIMD), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_PMULL), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_AES), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA1), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA2), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_CRC32), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ATOMICS), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP), #ifdef CONFIG_COMPAT - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32), #endif {}, }; @@ -745,7 +811,7 @@ static void __init setup_cpu_hwcaps(void) int i; const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps; - for (i = 0; hwcaps[i].desc; i++) + for (i = 0; hwcaps[i].matches; i++) if (hwcaps[i].matches(&hwcaps[i])) cap_set_hwcap(&hwcaps[i]); } @@ -755,11 +821,11 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, { int i; - for (i = 0; caps[i].desc; i++) { + for (i = 0; caps[i].matches; i++) { if (!caps[i].matches(&caps[i])) continue; - if (!cpus_have_cap(caps[i].capability)) + if (!cpus_have_cap(caps[i].capability) && caps[i].desc) pr_info("%s %s\n", info, caps[i].desc); cpus_set_cap(caps[i].capability); } @@ -774,13 +840,11 @@ enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) { int i; - for (i = 0; caps[i].desc; i++) + for (i = 0; caps[i].matches; i++) if (caps[i].enable && cpus_have_cap(caps[i].capability)) on_each_cpu(caps[i].enable, NULL, true); } -#ifdef CONFIG_HOTPLUG_CPU - /* * Flag to indicate if we have computed the system wide * capabilities based on the boot time active CPUs. This @@ -802,35 +866,36 @@ static inline void set_sys_caps_initialised(void) static u64 __raw_read_system_reg(u32 sys_id) { switch (sys_id) { - case SYS_ID_PFR0_EL1: return (u64)read_cpuid(ID_PFR0_EL1); - case SYS_ID_PFR1_EL1: return (u64)read_cpuid(ID_PFR1_EL1); - case SYS_ID_DFR0_EL1: return (u64)read_cpuid(ID_DFR0_EL1); - case SYS_ID_MMFR0_EL1: return (u64)read_cpuid(ID_MMFR0_EL1); - case SYS_ID_MMFR1_EL1: return (u64)read_cpuid(ID_MMFR1_EL1); - case SYS_ID_MMFR2_EL1: return (u64)read_cpuid(ID_MMFR2_EL1); - case SYS_ID_MMFR3_EL1: return (u64)read_cpuid(ID_MMFR3_EL1); - case SYS_ID_ISAR0_EL1: return (u64)read_cpuid(ID_ISAR0_EL1); - case SYS_ID_ISAR1_EL1: return (u64)read_cpuid(ID_ISAR1_EL1); - case SYS_ID_ISAR2_EL1: return (u64)read_cpuid(ID_ISAR2_EL1); - case SYS_ID_ISAR3_EL1: return (u64)read_cpuid(ID_ISAR3_EL1); - case SYS_ID_ISAR4_EL1: return (u64)read_cpuid(ID_ISAR4_EL1); - case SYS_ID_ISAR5_EL1: return (u64)read_cpuid(ID_ISAR4_EL1); - case SYS_MVFR0_EL1: return (u64)read_cpuid(MVFR0_EL1); - case SYS_MVFR1_EL1: return (u64)read_cpuid(MVFR1_EL1); - case SYS_MVFR2_EL1: return (u64)read_cpuid(MVFR2_EL1); - - case SYS_ID_AA64PFR0_EL1: return (u64)read_cpuid(ID_AA64PFR0_EL1); - case SYS_ID_AA64PFR1_EL1: return (u64)read_cpuid(ID_AA64PFR0_EL1); - case SYS_ID_AA64DFR0_EL1: return (u64)read_cpuid(ID_AA64DFR0_EL1); - case SYS_ID_AA64DFR1_EL1: return (u64)read_cpuid(ID_AA64DFR0_EL1); - case SYS_ID_AA64MMFR0_EL1: return (u64)read_cpuid(ID_AA64MMFR0_EL1); - case SYS_ID_AA64MMFR1_EL1: return (u64)read_cpuid(ID_AA64MMFR1_EL1); - case SYS_ID_AA64ISAR0_EL1: return (u64)read_cpuid(ID_AA64ISAR0_EL1); - case SYS_ID_AA64ISAR1_EL1: return (u64)read_cpuid(ID_AA64ISAR1_EL1); - - case SYS_CNTFRQ_EL0: return (u64)read_cpuid(CNTFRQ_EL0); - case SYS_CTR_EL0: return (u64)read_cpuid(CTR_EL0); - case SYS_DCZID_EL0: return (u64)read_cpuid(DCZID_EL0); + case SYS_ID_PFR0_EL1: return read_cpuid(ID_PFR0_EL1); + case SYS_ID_PFR1_EL1: return read_cpuid(ID_PFR1_EL1); + case SYS_ID_DFR0_EL1: return read_cpuid(ID_DFR0_EL1); + case SYS_ID_MMFR0_EL1: return read_cpuid(ID_MMFR0_EL1); + case SYS_ID_MMFR1_EL1: return read_cpuid(ID_MMFR1_EL1); + case SYS_ID_MMFR2_EL1: return read_cpuid(ID_MMFR2_EL1); + case SYS_ID_MMFR3_EL1: return read_cpuid(ID_MMFR3_EL1); + case SYS_ID_ISAR0_EL1: return read_cpuid(ID_ISAR0_EL1); + case SYS_ID_ISAR1_EL1: return read_cpuid(ID_ISAR1_EL1); + case SYS_ID_ISAR2_EL1: return read_cpuid(ID_ISAR2_EL1); + case SYS_ID_ISAR3_EL1: return read_cpuid(ID_ISAR3_EL1); + case SYS_ID_ISAR4_EL1: return read_cpuid(ID_ISAR4_EL1); + case SYS_ID_ISAR5_EL1: return read_cpuid(ID_ISAR4_EL1); + case SYS_MVFR0_EL1: return read_cpuid(MVFR0_EL1); + case SYS_MVFR1_EL1: return read_cpuid(MVFR1_EL1); + case SYS_MVFR2_EL1: return read_cpuid(MVFR2_EL1); + + case SYS_ID_AA64PFR0_EL1: return read_cpuid(ID_AA64PFR0_EL1); + case SYS_ID_AA64PFR1_EL1: return read_cpuid(ID_AA64PFR0_EL1); + case SYS_ID_AA64DFR0_EL1: return read_cpuid(ID_AA64DFR0_EL1); + case SYS_ID_AA64DFR1_EL1: return read_cpuid(ID_AA64DFR0_EL1); + case SYS_ID_AA64MMFR0_EL1: return read_cpuid(ID_AA64MMFR0_EL1); + case SYS_ID_AA64MMFR1_EL1: return read_cpuid(ID_AA64MMFR1_EL1); + case SYS_ID_AA64MMFR2_EL1: return read_cpuid(ID_AA64MMFR2_EL1); + case SYS_ID_AA64ISAR0_EL1: return read_cpuid(ID_AA64ISAR0_EL1); + case SYS_ID_AA64ISAR1_EL1: return read_cpuid(ID_AA64ISAR1_EL1); + + case SYS_CNTFRQ_EL0: return read_cpuid(CNTFRQ_EL0); + case SYS_CTR_EL0: return read_cpuid(CTR_EL0); + case SYS_DCZID_EL0: return read_cpuid(DCZID_EL0); default: BUG(); return 0; @@ -838,25 +903,12 @@ static u64 __raw_read_system_reg(u32 sys_id) } /* - * Park the CPU which doesn't have the capability as advertised - * by the system. + * Check for CPU features that are used in early boot + * based on the Boot CPU value. */ -static void fail_incapable_cpu(char *cap_type, - const struct arm64_cpu_capabilities *cap) +static void check_early_cpu_features(void) { - int cpu = smp_processor_id(); - - pr_crit("CPU%d: missing %s : %s\n", cpu, cap_type, cap->desc); - /* Mark this CPU absent */ - set_cpu_present(cpu, 0); - - /* Check if we can park ourselves */ - if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die) - cpu_ops[cpu]->cpu_die(cpu); - asm( - "1: wfe\n" - " wfi\n" - " b 1b"); + verify_cpu_asid_bits(); } /* @@ -872,6 +924,8 @@ void verify_local_cpu_capabilities(void) int i; const struct arm64_cpu_capabilities *caps; + check_early_cpu_features(); + /* * If we haven't computed the system capabilities, there is nothing * to verify. @@ -880,35 +934,33 @@ void verify_local_cpu_capabilities(void) return; caps = arm64_features; - for (i = 0; caps[i].desc; i++) { + for (i = 0; caps[i].matches; i++) { if (!cpus_have_cap(caps[i].capability) || !caps[i].sys_reg) continue; /* * If the new CPU misses an advertised feature, we cannot proceed * further, park the cpu. */ - if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) - fail_incapable_cpu("arm64_features", &caps[i]); + if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) { + pr_crit("CPU%d: missing feature: %s\n", + smp_processor_id(), caps[i].desc); + cpu_die_early(); + } if (caps[i].enable) caps[i].enable(NULL); } - for (i = 0, caps = arm64_hwcaps; caps[i].desc; i++) { + for (i = 0, caps = arm64_hwcaps; caps[i].matches; i++) { if (!cpus_have_hwcap(&caps[i])) continue; - if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) - fail_incapable_cpu("arm64_hwcaps", &caps[i]); + if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) { + pr_crit("CPU%d: missing HWCAP: %s\n", + smp_processor_id(), caps[i].desc); + cpu_die_early(); + } } } -#else /* !CONFIG_HOTPLUG_CPU */ - -static inline void set_sys_caps_initialised(void) -{ -} - -#endif /* CONFIG_HOTPLUG_CPU */ - static void __init setup_feature_capabilities(void) { update_cpu_capabilities(arm64_features, "detected feature:"); @@ -939,3 +991,9 @@ void __init setup_cpu_features(void) pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n", L1_CACHE_BYTES, cls); } + +static bool __maybe_unused +cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry) +{ + return (cpus_have_cap(ARM64_HAS_PAN) && !cpus_have_cap(ARM64_HAS_UAO)); +} diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 212ae6361d8b..84c8684431c7 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -59,6 +59,8 @@ static const char *const hwcap_str[] = { "sha2", "crc32", "atomics", + "fphp", + "asimdhp", NULL }; @@ -210,6 +212,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1); info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); + info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1); info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1); info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1); diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index c536c9e307b9..c45f2968bc8c 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -34,7 +34,7 @@ /* Determine debug architecture. */ u8 debug_monitors_arch(void) { - return cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1), + return cpuid_feature_extract_unsigned_field(read_system_reg(SYS_ID_AA64DFR0_EL1), ID_AA64DFR0_DEBUGVER_SHIFT); } @@ -186,20 +186,21 @@ static void clear_regs_spsr_ss(struct pt_regs *regs) /* EL1 Single Step Handler hooks */ static LIST_HEAD(step_hook); -static DEFINE_RWLOCK(step_hook_lock); +static DEFINE_SPINLOCK(step_hook_lock); void register_step_hook(struct step_hook *hook) { - write_lock(&step_hook_lock); - list_add(&hook->node, &step_hook); - write_unlock(&step_hook_lock); + spin_lock(&step_hook_lock); + list_add_rcu(&hook->node, &step_hook); + spin_unlock(&step_hook_lock); } void unregister_step_hook(struct step_hook *hook) { - write_lock(&step_hook_lock); - list_del(&hook->node); - write_unlock(&step_hook_lock); + spin_lock(&step_hook_lock); + list_del_rcu(&hook->node); + spin_unlock(&step_hook_lock); + synchronize_rcu(); } /* @@ -213,15 +214,15 @@ static int call_step_hook(struct pt_regs *regs, unsigned int esr) struct step_hook *hook; int retval = DBG_HOOK_ERROR; - read_lock(&step_hook_lock); + rcu_read_lock(); - list_for_each_entry(hook, &step_hook, node) { + list_for_each_entry_rcu(hook, &step_hook, node) { retval = hook->fn(regs, esr); if (retval == DBG_HOOK_HANDLED) break; } - read_unlock(&step_hook_lock); + rcu_read_unlock(); return retval; } diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index a773db92908b..cae3112f7791 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -35,6 +35,7 @@ ENTRY(entry) * for image_addr variable passed to efi_entry(). */ stp x29, x30, [sp, #-32]! + mov x29, sp /* * Call efi_entry to do the real work. @@ -61,7 +62,7 @@ ENTRY(entry) */ mov x20, x0 // DTB address ldr x0, [sp, #16] // relocated _text address - ldr x21, =stext_offset + movz x21, #:abs_g0:stext_offset add x21, x0, x21 /* diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index acc1afd5c749..975b274ee7b5 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -45,7 +45,7 @@ * been used to perform kernel mode NEON in the meantime. * * For (a), we add a 'cpu' field to struct fpsimd_state, which gets updated to - * the id of the current CPU everytime the state is loaded onto a CPU. For (b), + * the id of the current CPU every time the state is loaded onto a CPU. For (b), * we add the per-cpu variable 'fpsimd_last_state' (below), which contains the * address of the userland FPSIMD state of the task that was loaded onto the CPU * the most recently, or NULL if kernel mode NEON has been performed after that. diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 6f2f37743d3b..6ebd204da16a 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -29,12 +29,14 @@ #include <asm/asm-offsets.h> #include <asm/cache.h> #include <asm/cputype.h> +#include <asm/elf.h> #include <asm/kernel-pgtable.h> #include <asm/kvm_arm.h> #include <asm/memory.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> #include <asm/page.h> +#include <asm/smp.h> #include <asm/sysreg.h> #include <asm/thread_info.h> #include <asm/virt.h> @@ -68,12 +70,11 @@ * in the entry routines. */ __HEAD - +_head: /* * DO NOT MODIFY. Image header expected by Linux boot-loaders. */ #ifdef CONFIG_EFI -efi_head: /* * This add instruction has no meaningful effect except that * its opcode forms the magic "MZ" signature required by UEFI. @@ -84,9 +85,9 @@ efi_head: b stext // branch to kernel start, magic .long 0 // reserved #endif - .quad _kernel_offset_le // Image load offset from start of RAM, little-endian - .quad _kernel_size_le // Effective size of kernel image, little-endian - .quad _kernel_flags_le // Informative flags, little-endian + le64sym _kernel_offset_le // Image load offset from start of RAM, little-endian + le64sym _kernel_size_le // Effective size of kernel image, little-endian + le64sym _kernel_flags_le // Informative flags, little-endian .quad 0 // reserved .quad 0 // reserved .quad 0 // reserved @@ -95,14 +96,14 @@ efi_head: .byte 0x4d .byte 0x64 #ifdef CONFIG_EFI - .long pe_header - efi_head // Offset to the PE header. + .long pe_header - _head // Offset to the PE header. #else .word 0 // reserved #endif #ifdef CONFIG_EFI .globl __efistub_stext_offset - .set __efistub_stext_offset, stext - efi_head + .set __efistub_stext_offset, stext - _head .align 3 pe_header: .ascii "PE" @@ -125,7 +126,7 @@ optional_header: .long _end - stext // SizeOfCode .long 0 // SizeOfInitializedData .long 0 // SizeOfUninitializedData - .long __efistub_entry - efi_head // AddressOfEntryPoint + .long __efistub_entry - _head // AddressOfEntryPoint .long __efistub_stext_offset // BaseOfCode extra_header_fields: @@ -140,7 +141,7 @@ extra_header_fields: .short 0 // MinorSubsystemVersion .long 0 // Win32VersionValue - .long _end - efi_head // SizeOfImage + .long _end - _head // SizeOfImage // Everything before the kernel image is considered part of the header .long __efistub_stext_offset // SizeOfHeaders @@ -211,6 +212,7 @@ section_table: ENTRY(stext) bl preserve_boot_args bl el2_setup // Drop to EL1, w20=cpu_boot_mode + mov x23, xzr // KASLR offset, defaults to 0 adrp x24, __PHYS_OFFSET bl set_cpu_boot_mode_flag bl __create_page_tables // x25=TTBR0, x26=TTBR1 @@ -220,11 +222,13 @@ ENTRY(stext) * On return, the CPU will be ready for the MMU to be turned on and * the TCR will have been set. */ - ldr x27, =__mmap_switched // address to jump to after + ldr x27, 0f // address to jump to after // MMU has been enabled adr_l lr, __enable_mmu // return (PIC) address b __cpu_setup // initialise processor ENDPROC(stext) + .align 3 +0: .quad __mmap_switched - (_head - TEXT_OFFSET) + KIMAGE_VADDR /* * Preserve the arguments passed by the bootloader in x0 .. x3 @@ -312,7 +316,7 @@ ENDPROC(preserve_boot_args) __create_page_tables: adrp x25, idmap_pg_dir adrp x26, swapper_pg_dir - mov x27, lr + mov x28, lr /* * Invalidate the idmap and swapper page tables to avoid potential @@ -390,9 +394,11 @@ __create_page_tables: * Map the kernel image (starting with PHYS_OFFSET). */ mov x0, x26 // swapper_pg_dir - mov x5, #PAGE_OFFSET + ldr x5, =KIMAGE_VADDR + add x5, x5, x23 // add KASLR displacement create_pgd_entry x0, x5, x3, x6 - ldr x6, =KERNEL_END // __va(KERNEL_END) + ldr w6, kernel_img_size + add x6, x6, x5 mov x3, x24 // phys offset create_block_map x0, x7, x3, x5, x6 @@ -406,9 +412,11 @@ __create_page_tables: dmb sy bl __inval_cache_range - mov lr, x27 - ret + ret x28 ENDPROC(__create_page_tables) + +kernel_img_size: + .long _end - (_head - TEXT_OFFSET) .ltorg /* @@ -416,23 +424,81 @@ ENDPROC(__create_page_tables) */ .set initial_sp, init_thread_union + THREAD_START_SP __mmap_switched: + mov x28, lr // preserve LR + adr_l x8, vectors // load VBAR_EL1 with virtual + msr vbar_el1, x8 // vector table address + isb + // Clear BSS adr_l x0, __bss_start mov x1, xzr adr_l x2, __bss_stop sub x2, x2, x0 bl __pi_memset + dsb ishst // Make zero page visible to PTW + +#ifdef CONFIG_RELOCATABLE + + /* + * Iterate over each entry in the relocation table, and apply the + * relocations in place. + */ + adr_l x8, __dynsym_start // start of symbol table + adr_l x9, __reloc_start // start of reloc table + adr_l x10, __reloc_end // end of reloc table + +0: cmp x9, x10 + b.hs 2f + ldp x11, x12, [x9], #24 + ldr x13, [x9, #-8] + cmp w12, #R_AARCH64_RELATIVE + b.ne 1f + add x13, x13, x23 // relocate + str x13, [x11, x23] + b 0b + +1: cmp w12, #R_AARCH64_ABS64 + b.ne 0b + add x12, x12, x12, lsl #1 // symtab offset: 24x top word + add x12, x8, x12, lsr #(32 - 3) // ... shifted into bottom word + ldrsh w14, [x12, #6] // Elf64_Sym::st_shndx + ldr x15, [x12, #8] // Elf64_Sym::st_value + cmp w14, #-0xf // SHN_ABS (0xfff1) ? + add x14, x15, x23 // relocate + csel x15, x14, x15, ne + add x15, x13, x15 + str x15, [x11, x23] + b 0b + +2: adr_l x8, kimage_vaddr // make relocated kimage_vaddr + dc cvac, x8 // value visible to secondaries + dsb sy // with MMU off +#endif adr_l sp, initial_sp, x4 mov x4, sp and x4, x4, #~(THREAD_SIZE - 1) msr sp_el0, x4 // Save thread_info str_l x21, __fdt_pointer, x5 // Save FDT pointer - str_l x24, memstart_addr, x6 // Save PHYS_OFFSET + + ldr_l x4, kimage_vaddr // Save the offset between + sub x4, x4, x24 // the kernel virtual and + str_l x4, kimage_voffset, x5 // physical mappings + mov x29, #0 #ifdef CONFIG_KASAN bl kasan_early_init #endif +#ifdef CONFIG_RANDOMIZE_BASE + cbnz x23, 0f // already running randomized? + mov x0, x21 // pass FDT address in x0 + bl kaslr_early_init // parse FDT for KASLR options + cbz x0, 0f // KASLR disabled? just proceed + mov x23, x0 // record KASLR offset + ret x28 // we must enable KASLR, return + // to __enable_mmu() +0: +#endif b start_kernel ENDPROC(__mmap_switched) @@ -441,6 +507,10 @@ ENDPROC(__mmap_switched) * hotplug and needs to have the same protections as the text region */ .section ".text","ax" + +ENTRY(kimage_vaddr) + .quad _text - TEXT_OFFSET + /* * If we're fortunate enough to boot at EL2, ensure that the world is * sane before dropping to EL1. @@ -631,13 +701,20 @@ ENTRY(secondary_startup) adrp x26, swapper_pg_dir bl __cpu_setup // initialise processor - ldr x21, =secondary_data - ldr x27, =__secondary_switched // address to jump to after enabling the MMU + ldr x8, kimage_vaddr + ldr w9, 0f + sub x27, x8, w9, sxtw // address to jump to after enabling the MMU b __enable_mmu ENDPROC(secondary_startup) +0: .long (_text - TEXT_OFFSET) - __secondary_switched ENTRY(__secondary_switched) - ldr x0, [x21] // get secondary_data.stack + adr_l x5, vectors + msr vbar_el1, x5 + isb + + adr_l x0, secondary_data + ldr x0, [x0, #CPU_BOOT_STACK] // get secondary_data.stack mov sp, x0 and x0, x0, #~(THREAD_SIZE - 1) msr sp_el0, x0 // save thread_info @@ -646,6 +723,29 @@ ENTRY(__secondary_switched) ENDPROC(__secondary_switched) /* + * The booting CPU updates the failed status @__early_cpu_boot_status, + * with MMU turned off. + * + * update_early_cpu_boot_status tmp, status + * - Corrupts tmp1, tmp2 + * - Writes 'status' to __early_cpu_boot_status and makes sure + * it is committed to memory. + */ + + .macro update_early_cpu_boot_status status, tmp1, tmp2 + mov \tmp2, #\status + str_l \tmp2, __early_cpu_boot_status, \tmp1 + dmb sy + dc ivac, \tmp1 // Invalidate potentially stale cache line + .endm + + .pushsection .data..cacheline_aligned + .align L1_CACHE_SHIFT +ENTRY(__early_cpu_boot_status) + .long 0 + .popsection + +/* * Enable the MMU. * * x0 = SCTLR_EL1 value for turning on the MMU. @@ -658,12 +758,12 @@ ENDPROC(__secondary_switched) */ .section ".idmap.text", "ax" __enable_mmu: + mrs x18, sctlr_el1 // preserve old SCTLR_EL1 value mrs x1, ID_AA64MMFR0_EL1 ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4 cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED b.ne __no_granule_support - ldr x5, =vectors - msr vbar_el1, x5 + update_early_cpu_boot_status 0, x1, x2 msr ttbr0_el1, x25 // load TTBR0 msr ttbr1_el1, x26 // load TTBR1 isb @@ -677,10 +777,33 @@ __enable_mmu: ic iallu dsb nsh isb +#ifdef CONFIG_RANDOMIZE_BASE + mov x19, x0 // preserve new SCTLR_EL1 value + blr x27 + + /* + * If we return here, we have a KASLR displacement in x23 which we need + * to take into account by discarding the current kernel mapping and + * creating a new one. + */ + msr sctlr_el1, x18 // disable the MMU + isb + bl __create_page_tables // recreate kernel mapping + + msr sctlr_el1, x19 // re-enable the MMU + isb + ic ialluis // flush instructions fetched + isb // via old mapping + add x27, x27, x23 // relocated __mmap_switched +#endif br x27 ENDPROC(__enable_mmu) __no_granule_support: + /* Indicate that this CPU can't boot and is stuck in the kernel */ + update_early_cpu_boot_status CPU_STUCK_IN_KERNEL, x1, x2 +1: wfe - b __no_granule_support + wfi + b 1b ENDPROC(__no_granule_support) diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index 352f7abd91c9..5e360ce88f10 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -26,31 +26,40 @@ * There aren't any ELF relocations we can use to endian-swap values known only * at link time (e.g. the subtraction of two symbol addresses), so we must get * the linker to endian-swap certain values before emitting them. + * + * Note that, in order for this to work when building the ELF64 PIE executable + * (for KASLR), these values should not be referenced via R_AARCH64_ABS64 + * relocations, since these are fixed up at runtime rather than at build time + * when PIE is in effect. So we need to split them up in 32-bit high and low + * words. */ #ifdef CONFIG_CPU_BIG_ENDIAN -#define DATA_LE64(data) \ - ((((data) & 0x00000000000000ff) << 56) | \ - (((data) & 0x000000000000ff00) << 40) | \ - (((data) & 0x0000000000ff0000) << 24) | \ - (((data) & 0x00000000ff000000) << 8) | \ - (((data) & 0x000000ff00000000) >> 8) | \ - (((data) & 0x0000ff0000000000) >> 24) | \ - (((data) & 0x00ff000000000000) >> 40) | \ - (((data) & 0xff00000000000000) >> 56)) +#define DATA_LE32(data) \ + ((((data) & 0x000000ff) << 24) | \ + (((data) & 0x0000ff00) << 8) | \ + (((data) & 0x00ff0000) >> 8) | \ + (((data) & 0xff000000) >> 24)) #else -#define DATA_LE64(data) ((data) & 0xffffffffffffffff) +#define DATA_LE32(data) ((data) & 0xffffffff) #endif +#define DEFINE_IMAGE_LE64(sym, data) \ + sym##_lo32 = DATA_LE32((data) & 0xffffffff); \ + sym##_hi32 = DATA_LE32((data) >> 32) + #ifdef CONFIG_CPU_BIG_ENDIAN -#define __HEAD_FLAG_BE 1 +#define __HEAD_FLAG_BE 1 #else -#define __HEAD_FLAG_BE 0 +#define __HEAD_FLAG_BE 0 #endif -#define __HEAD_FLAG_PAGE_SIZE ((PAGE_SHIFT - 10) / 2) +#define __HEAD_FLAG_PAGE_SIZE ((PAGE_SHIFT - 10) / 2) + +#define __HEAD_FLAG_PHYS_BASE 1 -#define __HEAD_FLAGS ((__HEAD_FLAG_BE << 0) | \ - (__HEAD_FLAG_PAGE_SIZE << 1)) +#define __HEAD_FLAGS ((__HEAD_FLAG_BE << 0) | \ + (__HEAD_FLAG_PAGE_SIZE << 1) | \ + (__HEAD_FLAG_PHYS_BASE << 3)) /* * These will output as part of the Image header, which should be little-endian @@ -58,9 +67,9 @@ * endian swapped in head.S, all are done here for consistency. */ #define HEAD_SYMBOLS \ - _kernel_size_le = DATA_LE64(_end - _text); \ - _kernel_offset_le = DATA_LE64(TEXT_OFFSET); \ - _kernel_flags_le = DATA_LE64(__HEAD_FLAGS); + DEFINE_IMAGE_LE64(_kernel_size_le, _end - _text); \ + DEFINE_IMAGE_LE64(_kernel_offset_le, TEXT_OFFSET); \ + DEFINE_IMAGE_LE64(_kernel_flags_le, __HEAD_FLAGS); #ifdef CONFIG_EFI diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c new file mode 100644 index 000000000000..582983920054 --- /dev/null +++ b/arch/arm64/kernel/kaslr.c @@ -0,0 +1,177 @@ +/* + * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/crc32.h> +#include <linux/init.h> +#include <linux/libfdt.h> +#include <linux/mm_types.h> +#include <linux/sched.h> +#include <linux/types.h> + +#include <asm/fixmap.h> +#include <asm/kernel-pgtable.h> +#include <asm/memory.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/sections.h> + +u64 __read_mostly module_alloc_base; +u16 __initdata memstart_offset_seed; + +static __init u64 get_kaslr_seed(void *fdt) +{ + int node, len; + u64 *prop; + u64 ret; + + node = fdt_path_offset(fdt, "/chosen"); + if (node < 0) + return 0; + + prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len); + if (!prop || len != sizeof(u64)) + return 0; + + ret = fdt64_to_cpu(*prop); + *prop = 0; + return ret; +} + +static __init const u8 *get_cmdline(void *fdt) +{ + static __initconst const u8 default_cmdline[] = CONFIG_CMDLINE; + + if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) { + int node; + const u8 *prop; + + node = fdt_path_offset(fdt, "/chosen"); + if (node < 0) + goto out; + + prop = fdt_getprop(fdt, node, "bootargs", NULL); + if (!prop) + goto out; + return prop; + } +out: + return default_cmdline; +} + +extern void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, + pgprot_t prot); + +/* + * This routine will be executed with the kernel mapped at its default virtual + * address, and if it returns successfully, the kernel will be remapped, and + * start_kernel() will be executed from a randomized virtual offset. The + * relocation will result in all absolute references (e.g., static variables + * containing function pointers) to be reinitialized, and zero-initialized + * .bss variables will be reset to 0. + */ +u64 __init kaslr_early_init(u64 dt_phys) +{ + void *fdt; + u64 seed, offset, mask, module_range; + const u8 *cmdline, *str; + int size; + + /* + * Set a reasonable default for module_alloc_base in case + * we end up running with module randomization disabled. + */ + module_alloc_base = (u64)_etext - MODULES_VSIZE; + + /* + * Try to map the FDT early. If this fails, we simply bail, + * and proceed with KASLR disabled. We will make another + * attempt at mapping the FDT in setup_machine() + */ + early_fixmap_init(); + fdt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL); + if (!fdt) + return 0; + + /* + * Retrieve (and wipe) the seed from the FDT + */ + seed = get_kaslr_seed(fdt); + if (!seed) + return 0; + + /* + * Check if 'nokaslr' appears on the command line, and + * return 0 if that is the case. + */ + cmdline = get_cmdline(fdt); + str = strstr(cmdline, "nokaslr"); + if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) + return 0; + + /* + * OK, so we are proceeding with KASLR enabled. Calculate a suitable + * kernel image offset from the seed. Let's place the kernel in the + * lower half of the VMALLOC area (VA_BITS - 2). + * Even if we could randomize at page granularity for 16k and 64k pages, + * let's always round to 2 MB so we don't interfere with the ability to + * map using contiguous PTEs + */ + mask = ((1UL << (VA_BITS - 2)) - 1) & ~(SZ_2M - 1); + offset = seed & mask; + + /* use the top 16 bits to randomize the linear region */ + memstart_offset_seed = seed >> 48; + + /* + * The kernel Image should not extend across a 1GB/32MB/512MB alignment + * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this + * happens, increase the KASLR offset by the size of the kernel image. + */ + if ((((u64)_text + offset) >> SWAPPER_TABLE_SHIFT) != + (((u64)_end + offset) >> SWAPPER_TABLE_SHIFT)) + offset = (offset + (u64)(_end - _text)) & mask; + + if (IS_ENABLED(CONFIG_KASAN)) + /* + * KASAN does not expect the module region to intersect the + * vmalloc region, since shadow memory is allocated for each + * module at load time, whereas the vmalloc region is shadowed + * by KASAN zero pages. So keep modules out of the vmalloc + * region if KASAN is enabled. + */ + return offset; + + if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { + /* + * Randomize the module region independently from the core + * kernel. This prevents modules from leaking any information + * about the address of the kernel itself, but results in + * branches between modules and the core kernel that are + * resolved via PLTs. (Branches between modules will be + * resolved normally.) + */ + module_range = VMALLOC_END - VMALLOC_START - MODULES_VSIZE; + module_alloc_base = VMALLOC_START; + } else { + /* + * Randomize the module region by setting module_alloc_base to + * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE, + * _stext) . This guarantees that the resulting region still + * covers [_stext, _etext], and that all relative branches can + * be resolved without veneers. + */ + module_range = MODULES_VSIZE - (u64)(_etext - _stext); + module_alloc_base = (u64)_etext + offset - MODULES_VSIZE; + } + + /* use the lower 21 bits to randomize the base of the module region */ + module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21; + module_alloc_base &= PAGE_MASK; + + return offset; +} diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index bcac81e600b9..b67531a13136 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -292,8 +292,8 @@ static struct notifier_block kgdb_notifier = { }; /* - * kgdb_arch_init - Perform any architecture specific initalization. - * This function will handle the initalization of any architecture + * kgdb_arch_init - Perform any architecture specific initialization. + * This function will handle the initialization of any architecture * specific callbacks. */ int kgdb_arch_init(void) diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c new file mode 100644 index 000000000000..1ce90d8450ae --- /dev/null +++ b/arch/arm64/kernel/module-plts.c @@ -0,0 +1,201 @@ +/* + * Copyright (C) 2014-2016 Linaro Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/elf.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sort.h> + +struct plt_entry { + /* + * A program that conforms to the AArch64 Procedure Call Standard + * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or + * IP1 (x17) may be inserted at any branch instruction that is + * exposed to a relocation that supports long branches. Since that + * is exactly what we are dealing with here, we are free to use x16 + * as a scratch register in the PLT veneers. + */ + __le32 mov0; /* movn x16, #0x.... */ + __le32 mov1; /* movk x16, #0x...., lsl #16 */ + __le32 mov2; /* movk x16, #0x...., lsl #32 */ + __le32 br; /* br x16 */ +}; + +u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela, + Elf64_Sym *sym) +{ + struct plt_entry *plt = (struct plt_entry *)mod->arch.plt->sh_addr; + int i = mod->arch.plt_num_entries; + u64 val = sym->st_value + rela->r_addend; + + /* + * We only emit PLT entries against undefined (SHN_UNDEF) symbols, + * which are listed in the ELF symtab section, but without a type + * or a size. + * So, similar to how the module loader uses the Elf64_Sym::st_value + * field to store the resolved addresses of undefined symbols, let's + * borrow the Elf64_Sym::st_size field (whose value is never used by + * the module loader, even for symbols that are defined) to record + * the address of a symbol's associated PLT entry as we emit it for a + * zero addend relocation (which is the only kind we have to deal with + * in practice). This allows us to find duplicates without having to + * go through the table every time. + */ + if (rela->r_addend == 0 && sym->st_size != 0) { + BUG_ON(sym->st_size < (u64)plt || sym->st_size >= (u64)&plt[i]); + return sym->st_size; + } + + mod->arch.plt_num_entries++; + BUG_ON(mod->arch.plt_num_entries > mod->arch.plt_max_entries); + + /* + * MOVK/MOVN/MOVZ opcode: + * +--------+------------+--------+-----------+-------------+---------+ + * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | + * +--------+------------+--------+-----------+-------------+---------+ + * + * Rd := 0x10 (x16) + * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32) + * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ) + * sf := 1 (64-bit variant) + */ + plt[i] = (struct plt_entry){ + cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5), + cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5), + cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5), + cpu_to_le32(0xd61f0200) + }; + + if (rela->r_addend == 0) + sym->st_size = (u64)&plt[i]; + + return (u64)&plt[i]; +} + +#define cmp_3way(a,b) ((a) < (b) ? -1 : (a) > (b)) + +static int cmp_rela(const void *a, const void *b) +{ + const Elf64_Rela *x = a, *y = b; + int i; + + /* sort by type, symbol index and addend */ + i = cmp_3way(ELF64_R_TYPE(x->r_info), ELF64_R_TYPE(y->r_info)); + if (i == 0) + i = cmp_3way(ELF64_R_SYM(x->r_info), ELF64_R_SYM(y->r_info)); + if (i == 0) + i = cmp_3way(x->r_addend, y->r_addend); + return i; +} + +static bool duplicate_rel(const Elf64_Rela *rela, int num) +{ + /* + * Entries are sorted by type, symbol index and addend. That means + * that, if a duplicate entry exists, it must be in the preceding + * slot. + */ + return num > 0 && cmp_rela(rela + num, rela + num - 1) == 0; +} + +static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num) +{ + unsigned int ret = 0; + Elf64_Sym *s; + int i; + + for (i = 0; i < num; i++) { + switch (ELF64_R_TYPE(rela[i].r_info)) { + case R_AARCH64_JUMP26: + case R_AARCH64_CALL26: + /* + * We only have to consider branch targets that resolve + * to undefined symbols. This is not simply a heuristic, + * it is a fundamental limitation, since the PLT itself + * is part of the module, and needs to be within 128 MB + * as well, so modules can never grow beyond that limit. + */ + s = syms + ELF64_R_SYM(rela[i].r_info); + if (s->st_shndx != SHN_UNDEF) + break; + + /* + * Jump relocations with non-zero addends against + * undefined symbols are supported by the ELF spec, but + * do not occur in practice (e.g., 'jump n bytes past + * the entry point of undefined function symbol f'). + * So we need to support them, but there is no need to + * take them into consideration when trying to optimize + * this code. So let's only check for duplicates when + * the addend is zero: this allows us to record the PLT + * entry address in the symbol table itself, rather than + * having to search the list for duplicates each time we + * emit one. + */ + if (rela[i].r_addend != 0 || !duplicate_rel(rela, i)) + ret++; + break; + } + } + return ret; +} + +int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, + char *secstrings, struct module *mod) +{ + unsigned long plt_max_entries = 0; + Elf64_Sym *syms = NULL; + int i; + + /* + * Find the empty .plt section so we can expand it to store the PLT + * entries. Record the symtab address as well. + */ + for (i = 0; i < ehdr->e_shnum; i++) { + if (strcmp(".plt", secstrings + sechdrs[i].sh_name) == 0) + mod->arch.plt = sechdrs + i; + else if (sechdrs[i].sh_type == SHT_SYMTAB) + syms = (Elf64_Sym *)sechdrs[i].sh_addr; + } + + if (!mod->arch.plt) { + pr_err("%s: module PLT section missing\n", mod->name); + return -ENOEXEC; + } + if (!syms) { + pr_err("%s: module symtab section missing\n", mod->name); + return -ENOEXEC; + } + + for (i = 0; i < ehdr->e_shnum; i++) { + Elf64_Rela *rels = (void *)ehdr + sechdrs[i].sh_offset; + int numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela); + Elf64_Shdr *dstsec = sechdrs + sechdrs[i].sh_info; + + if (sechdrs[i].sh_type != SHT_RELA) + continue; + + /* ignore relocations that operate on non-exec sections */ + if (!(dstsec->sh_flags & SHF_EXECINSTR)) + continue; + + /* sort by type, symbol index and addend */ + sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL); + + plt_max_entries += count_plts(syms, rels, numrels); + } + + mod->arch.plt->sh_type = SHT_NOBITS; + mod->arch.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.plt->sh_addralign = L1_CACHE_BYTES; + mod->arch.plt->sh_size = plt_max_entries * sizeof(struct plt_entry); + mod->arch.plt_num_entries = 0; + mod->arch.plt_max_entries = plt_max_entries; + return 0; +} diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 93e970231ca9..7f316982ce00 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -34,10 +34,26 @@ void *module_alloc(unsigned long size) { void *p; - p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END, + p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, + module_alloc_base + MODULES_VSIZE, GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); + if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && + !IS_ENABLED(CONFIG_KASAN)) + /* + * KASAN can only deal with module allocations being served + * from the reserved module region, since the remainder of + * the vmalloc region is already backed by zero shadow pages, + * and punching holes into it is non-trivial. Since the module + * region is not randomized when KASAN is enabled, it is even + * less likely that the module region gets exhausted, so we + * can simply omit this fallback in that case. + */ + p = __vmalloc_node_range(size, MODULE_ALIGN, VMALLOC_START, + VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_EXEC, 0, + NUMA_NO_NODE, __builtin_return_address(0)); + if (p && (kasan_module_alloc(p, size) < 0)) { vfree(p); return NULL; @@ -361,6 +377,13 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_AARCH64_CALL26: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26, AARCH64_INSN_IMM_26); + + if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && + ovf == -ERANGE) { + val = module_emit_plt_entry(me, &rel[i], sym); + ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, + 26, AARCH64_INSN_IMM_26); + } break; default: diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds new file mode 100644 index 000000000000..8949f6c6f729 --- /dev/null +++ b/arch/arm64/kernel/module.lds @@ -0,0 +1,3 @@ +SECTIONS { + .plt (NOLOAD) : { BYTE(0) } +} diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 88d742ba19d5..80624829db61 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -46,6 +46,7 @@ #include <linux/notifier.h> #include <trace/events/power.h> +#include <asm/alternative.h> #include <asm/compat.h> #include <asm/cacheflush.h> #include <asm/fpsimd.h> @@ -280,6 +281,9 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, } else { memset(childregs, 0, sizeof(struct pt_regs)); childregs->pstate = PSR_MODE_EL1h; + if (IS_ENABLED(CONFIG_ARM64_UAO) && + cpus_have_cap(ARM64_HAS_UAO)) + childregs->pstate |= PSR_UAO_BIT; p->thread.cpu_context.x19 = stack_start; p->thread.cpu_context.x20 = stk_sz; } @@ -308,6 +312,17 @@ static void tls_thread_switch(struct task_struct *next) : : "r" (tpidr), "r" (tpidrro)); } +/* Restore the UAO state depending on next's addr_limit */ +static void uao_thread_switch(struct task_struct *next) +{ + if (IS_ENABLED(CONFIG_ARM64_UAO)) { + if (task_thread_info(next)->addr_limit == KERNEL_DS) + asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO)); + else + asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO)); + } +} + /* * Thread switching. */ @@ -320,6 +335,7 @@ struct task_struct *__switch_to(struct task_struct *prev, tls_thread_switch(next); hw_breakpoint_thread_switch(next); contextidr_thread_switch(next); + uao_thread_switch(next); /* * Complete any pending TLB or cache maintenance on this CPU in case diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index ff7f13239515..3f6cd5c5234f 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -500,7 +500,7 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, if (ret) return ret; - if (!valid_user_regs(&newregs)) + if (!valid_user_regs(&newregs, target)) return -EINVAL; task_pt_regs(target)->user_regs = newregs; @@ -770,7 +770,7 @@ static int compat_gpr_set(struct task_struct *target, } - if (valid_user_regs(&newregs.user_regs)) + if (valid_user_regs(&newregs.user_regs, target)) *task_pt_regs(target) = newregs; else ret = -EINVAL; @@ -1272,3 +1272,79 @@ asmlinkage void syscall_trace_exit(struct pt_regs *regs) if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT); } + +/* + * Bits which are always architecturally RES0 per ARM DDI 0487A.h + * Userspace cannot use these until they have an architectural meaning. + * We also reserve IL for the kernel; SS is handled dynamically. + */ +#define SPSR_EL1_AARCH64_RES0_BITS \ + (GENMASK_ULL(63,32) | GENMASK_ULL(27, 22) | GENMASK_ULL(20, 10) | \ + GENMASK_ULL(5, 5)) +#define SPSR_EL1_AARCH32_RES0_BITS \ + (GENMASK_ULL(63,32) | GENMASK_ULL(24, 22) | GENMASK_ULL(20,20)) + +static int valid_compat_regs(struct user_pt_regs *regs) +{ + regs->pstate &= ~SPSR_EL1_AARCH32_RES0_BITS; + + if (!system_supports_mixed_endian_el0()) { + if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) + regs->pstate |= COMPAT_PSR_E_BIT; + else + regs->pstate &= ~COMPAT_PSR_E_BIT; + } + + if (user_mode(regs) && (regs->pstate & PSR_MODE32_BIT) && + (regs->pstate & COMPAT_PSR_A_BIT) == 0 && + (regs->pstate & COMPAT_PSR_I_BIT) == 0 && + (regs->pstate & COMPAT_PSR_F_BIT) == 0) { + return 1; + } + + /* + * Force PSR to a valid 32-bit EL0t, preserving the same bits as + * arch/arm. + */ + regs->pstate &= COMPAT_PSR_N_BIT | COMPAT_PSR_Z_BIT | + COMPAT_PSR_C_BIT | COMPAT_PSR_V_BIT | + COMPAT_PSR_Q_BIT | COMPAT_PSR_IT_MASK | + COMPAT_PSR_GE_MASK | COMPAT_PSR_E_BIT | + COMPAT_PSR_T_BIT; + regs->pstate |= PSR_MODE32_BIT; + + return 0; +} + +static int valid_native_regs(struct user_pt_regs *regs) +{ + regs->pstate &= ~SPSR_EL1_AARCH64_RES0_BITS; + + if (user_mode(regs) && !(regs->pstate & PSR_MODE32_BIT) && + (regs->pstate & PSR_D_BIT) == 0 && + (regs->pstate & PSR_A_BIT) == 0 && + (regs->pstate & PSR_I_BIT) == 0 && + (regs->pstate & PSR_F_BIT) == 0) { + return 1; + } + + /* Force PSR to a valid 64-bit EL0t */ + regs->pstate &= PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT; + + return 0; +} + +/* + * Are the current registers suitable for user mode? (used to maintain + * security in signal handlers) + */ +int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task) +{ + if (!test_tsk_thread_flag(task, TIF_SINGLESTEP)) + regs->pstate &= ~DBG_SPSR_SS; + + if (is_compat_thread(task_thread_info(task))) + return valid_compat_regs(regs); + else + return valid_native_regs(regs); +} diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 450987d99b9b..9dc67769b6a4 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -62,6 +62,7 @@ #include <asm/memblock.h> #include <asm/efi.h> #include <asm/xen/hypervisor.h> +#include <asm/mmu_context.h> phys_addr_t __fdt_pointer __initdata; @@ -313,6 +314,12 @@ void __init setup_arch(char **cmdline_p) */ local_async_enable(); + /* + * TTBR0 is only used for the identity mapping at this stage. Make it + * point to zero page to avoid speculatively fetching new entries. + */ + cpu_uninstall_idmap(); + efi_init(); arm64_memblock_init(); @@ -381,3 +388,32 @@ static int __init topology_init(void) return 0; } subsys_initcall(topology_init); + +/* + * Dump out kernel offset information on panic. + */ +static int dump_kernel_offset(struct notifier_block *self, unsigned long v, + void *p) +{ + u64 const kaslr_offset = kimage_vaddr - KIMAGE_VADDR; + + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset > 0) { + pr_emerg("Kernel Offset: 0x%llx from 0x%lx\n", + kaslr_offset, KIMAGE_VADDR); + } else { + pr_emerg("Kernel Offset: disabled\n"); + } + return 0; +} + +static struct notifier_block kernel_offset_notifier = { + .notifier_call = dump_kernel_offset +}; + +static int __init register_kernel_offset_dumper(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, + &kernel_offset_notifier); + return 0; +} +__initcall(register_kernel_offset_dumper); diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index e18c48cb6db1..a8eafdbc7cb8 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -115,7 +115,7 @@ static int restore_sigframe(struct pt_regs *regs, */ regs->syscallno = ~0UL; - err |= !valid_user_regs(®s->user_regs); + err |= !valid_user_regs(®s->user_regs, current); if (err == 0) { struct fpsimd_context *fpsimd_ctx = @@ -307,7 +307,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) /* * Check that the resulting registers are actually sane. */ - ret |= !valid_user_regs(®s->user_regs); + ret |= !valid_user_regs(®s->user_regs, current); /* * Fast forward the stepping logic so we step into the signal diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 71ef6dc89ae5..b7063de792f7 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -166,7 +166,7 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) #ifdef BUS_MCEERR_AO /* * Other callers might not initialize the si_lsb field, - * so check explicitely for the right codes here. + * so check explicitly for the right codes here. */ if (from->si_signo == SIGBUS && (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO)) @@ -356,7 +356,7 @@ static int compat_restore_sigframe(struct pt_regs *regs, */ regs->syscallno = ~0UL; - err |= !valid_user_regs(®s->user_regs); + err |= !valid_user_regs(®s->user_regs, current); aux = (struct compat_aux_sigframe __user *) sf->uc.uc_regspace; if (err == 0) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 460765799c64..b2d5f4ee9a1c 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -63,6 +63,8 @@ * where to place its SVC stack */ struct secondary_data secondary_data; +/* Number of CPUs which aren't online, but looping in kernel text. */ +int cpus_stuck_in_kernel; enum ipi_msg_type { IPI_RESCHEDULE, @@ -70,8 +72,19 @@ enum ipi_msg_type { IPI_CPU_STOP, IPI_TIMER, IPI_IRQ_WORK, + IPI_WAKEUP }; +#ifdef CONFIG_HOTPLUG_CPU +static int op_cpu_kill(unsigned int cpu); +#else +static inline int op_cpu_kill(unsigned int cpu) +{ + return -ENOSYS; +} +#endif + + /* * Boot a secondary CPU, and assign it the specified idle task. * This also gives us the initial stack to use for this CPU. @@ -89,12 +102,14 @@ static DECLARE_COMPLETION(cpu_running); int __cpu_up(unsigned int cpu, struct task_struct *idle) { int ret; + long status; /* * We need to tell the secondary core where to find its stack and the * page tables. */ secondary_data.stack = task_stack_page(idle) + THREAD_START_SP; + update_cpu_boot_status(CPU_MMU_OFF); __flush_dcache_area(&secondary_data, sizeof(secondary_data)); /* @@ -118,6 +133,32 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) } secondary_data.stack = NULL; + status = READ_ONCE(secondary_data.status); + if (ret && status) { + + if (status == CPU_MMU_OFF) + status = READ_ONCE(__early_cpu_boot_status); + + switch (status) { + default: + pr_err("CPU%u: failed in unknown state : 0x%lx\n", + cpu, status); + break; + case CPU_KILL_ME: + if (!op_cpu_kill(cpu)) { + pr_crit("CPU%u: died during early boot\n", cpu); + break; + } + /* Fall through */ + pr_crit("CPU%u: may not have shut down cleanly\n", cpu); + case CPU_STUCK_IN_KERNEL: + pr_crit("CPU%u: is stuck in kernel\n", cpu); + cpus_stuck_in_kernel++; + break; + case CPU_PANIC_KERNEL: + panic("CPU%u detected unsupported configuration\n", cpu); + } + } return ret; } @@ -149,9 +190,7 @@ asmlinkage void secondary_start_kernel(void) * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. */ - cpu_set_reserved_ttbr0(); - local_flush_tlb_all(); - cpu_set_default_tcr_t0sz(); + cpu_uninstall_idmap(); preempt_disable(); trace_hardirqs_off(); @@ -185,6 +224,9 @@ asmlinkage void secondary_start_kernel(void) */ pr_info("CPU%u: Booted secondary processor [%08x]\n", cpu, read_cpuid_id()); + update_cpu_boot_status(CPU_BOOT_SUCCESS); + /* Make sure the status update is visible before we complete */ + smp_wmb(); set_cpu_online(cpu, true); complete(&cpu_running); @@ -313,6 +355,30 @@ void cpu_die(void) } #endif +/* + * Kill the calling secondary CPU, early in bringup before it is turned + * online. + */ +void cpu_die_early(void) +{ + int cpu = smp_processor_id(); + + pr_crit("CPU%d: will not boot\n", cpu); + + /* Mark this CPU absent */ + set_cpu_present(cpu, 0); + +#ifdef CONFIG_HOTPLUG_CPU + update_cpu_boot_status(CPU_KILL_ME); + /* Check if we can park ourselves */ + if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die) + cpu_ops[cpu]->cpu_die(cpu); +#endif + update_cpu_boot_status(CPU_STUCK_IN_KERNEL); + + cpu_park_loop(); +} + static void __init hyp_mode_check(void) { if (is_hyp_mode_available()) @@ -445,6 +511,17 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) /* map the logical cpu id to cpu MPIDR */ cpu_logical_map(cpu_count) = hwid; + /* + * Set-up the ACPI parking protocol cpu entries + * while initializing the cpu_logical_map to + * avoid parsing MADT entries multiple times for + * nothing (ie a valid cpu_logical_map entry should + * contain a valid parking protocol data set to + * initialize the cpu if the parking protocol is + * the only available enable method). + */ + acpi_set_mailbox_entry(cpu_count, processor); + cpu_count++; } @@ -627,6 +704,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = { S(IPI_CPU_STOP, "CPU stop interrupts"), S(IPI_TIMER, "Timer broadcast interrupts"), S(IPI_IRQ_WORK, "IRQ work interrupts"), + S(IPI_WAKEUP, "CPU wake-up interrupts"), }; static void smp_cross_call(const struct cpumask *target, unsigned int ipinr) @@ -670,6 +748,13 @@ void arch_send_call_function_single_ipi(int cpu) smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC); } +#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL +void arch_send_wakeup_ipi_mask(const struct cpumask *mask) +{ + smp_cross_call(mask, IPI_WAKEUP); +} +#endif + #ifdef CONFIG_IRQ_WORK void arch_irq_work_raise(void) { @@ -747,6 +832,14 @@ void handle_IPI(int ipinr, struct pt_regs *regs) break; #endif +#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL + case IPI_WAKEUP: + WARN_ONCE(!acpi_parking_protocol_valid(cpu), + "CPU%u: Wake-up IPI outside the ACPI parking protocol\n", + cpu); + break; +#endif + default: pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); break; diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 1095aa483a1c..66055392f445 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -60,7 +60,6 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) */ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) { - struct mm_struct *mm = current->active_mm; int ret; unsigned long flags; @@ -87,22 +86,11 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) ret = __cpu_suspend_enter(arg, fn); if (ret == 0) { /* - * We are resuming from reset with TTBR0_EL1 set to the - * idmap to enable the MMU; set the TTBR0 to the reserved - * page tables to prevent speculative TLB allocations, flush - * the local tlb and set the default tcr_el1.t0sz so that - * the TTBR0 address space set-up is properly restored. - * If the current active_mm != &init_mm we entered cpu_suspend - * with mappings in TTBR0 that must be restored, so we switch - * them back to complete the address space configuration - * restoration before returning. + * We are resuming from reset with the idmap active in TTBR0_EL1. + * We must uninstall the idmap and restore the expected MMU + * state before we can possibly return to userspace. */ - cpu_set_reserved_ttbr0(); - local_flush_tlb_all(); - cpu_set_default_tcr_t0sz(); - - if (mm != &init_mm) - cpu_switch_mm(mm->pgd, mm); + cpu_uninstall_idmap(); /* * Restore per-cpu offset before any kernel diff --git a/arch/arm64/kernel/vdso/vdso.S b/arch/arm64/kernel/vdso/vdso.S index 60c1db54b41a..82379a70ef03 100644 --- a/arch/arm64/kernel/vdso/vdso.S +++ b/arch/arm64/kernel/vdso/vdso.S @@ -21,9 +21,8 @@ #include <linux/const.h> #include <asm/page.h> - __PAGE_ALIGNED_DATA - .globl vdso_start, vdso_end + .section .rodata .balign PAGE_SIZE vdso_start: .incbin "arch/arm64/kernel/vdso/vdso.so" diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index e3928f578891..4c56e7a0621b 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -87,15 +87,16 @@ SECTIONS EXIT_CALL *(.discard) *(.discard.*) + *(.interp .dynamic) } - . = PAGE_OFFSET + TEXT_OFFSET; + . = KIMAGE_VADDR + TEXT_OFFSET; .head.text : { _text = .; HEAD_TEXT } - ALIGN_DEBUG_RO + ALIGN_DEBUG_RO_MIN(PAGE_SIZE) .text : { /* Real text segment */ _stext = .; /* Text and read-only data */ __exception_text_start = .; @@ -113,13 +114,13 @@ SECTIONS *(.got) /* Global offset table */ } - RO_DATA(PAGE_SIZE) - EXCEPTION_TABLE(8) + ALIGN_DEBUG_RO_MIN(PAGE_SIZE) + RO_DATA(PAGE_SIZE) /* everything from this point to */ + EXCEPTION_TABLE(8) /* _etext will be marked RO NX */ NOTES - ALIGN_DEBUG_RO - _etext = .; /* End of text and rodata section */ ALIGN_DEBUG_RO_MIN(PAGE_SIZE) + _etext = .; /* End of text and rodata section */ __init_begin = .; INIT_TEXT_SECTION(8) @@ -150,6 +151,21 @@ SECTIONS .altinstr_replacement : { *(.altinstr_replacement) } + .rela : ALIGN(8) { + __reloc_start = .; + *(.rela .rela*) + __reloc_end = .; + } + .dynsym : ALIGN(8) { + __dynsym_start = .; + *(.dynsym) + } + .dynstr : { + *(.dynstr) + } + .hash : { + *(.hash) + } . = ALIGN(PAGE_SIZE); __init_end = .; @@ -187,4 +203,4 @@ ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K, /* * If padding is applied before .head.text, virt<->phys conversions will fail. */ -ASSERT(_text == (PAGE_OFFSET + TEXT_OFFSET), "HEAD is misaligned") +ASSERT(_text == (KIMAGE_VADDR + TEXT_OFFSET), "HEAD is misaligned") |