From 247055aa21ffef1c49dd64710d5e94c2aee19b58 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 13 Sep 2010 16:03:21 +0100 Subject: ARM: 6384/1: Remove the domain switching on ARMv6k/v7 CPUs This patch removes the domain switching functionality via the set_fs and __switch_to functions on cores that have a TLS register. Currently, the ioremap and vmalloc areas share the same level 1 page tables and therefore have the same domain (DOMAIN_KERNEL). When the kernel domain is modified from Client to Manager (via the __set_fs or in the __switch_to function), the XN (eXecute Never) bit is overridden and newer CPUs can speculatively prefetch the ioremap'ed memory. Linux performs the kernel domain switching to allow user-specific functions (copy_to/from_user, get/put_user etc.) to access kernel memory. In order for these functions to work with the kernel domain set to Client, the patch modifies the LDRT/STRT and related instructions to the LDR/STR ones. The user pages access rights are also modified for kernel read-only access rather than read/write so that the copy-on-write mechanism still works. CPU_USE_DOMAINS gets disabled only if the hardware has a TLS register (CPU_32v6K is defined) since writing the TLS value to the high vectors page isn't possible. The user addresses passed to the kernel are checked by the access_ok() function so that they do not point to the kernel space. Tested-by: Anton Vorontsov Cc: Tony Lindgren Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/kernel/entry-armv.S | 4 ++-- arch/arm/kernel/fiq.c | 5 +++++ arch/arm/kernel/traps.c | 14 ++++++++++---- 3 files changed, 17 insertions(+), 6 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index c09e3573c5de..35f3f20d6731 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -735,7 +735,7 @@ ENTRY(__switch_to) THUMB( stmia ip!, {r4 - sl, fp} ) @ Store most regs on stack THUMB( str sp, [ip], #4 ) THUMB( str lr, [ip], #4 ) -#ifdef CONFIG_MMU +#ifdef CONFIG_CPU_USE_DOMAINS ldr r6, [r2, #TI_CPU_DOMAIN] #endif set_tls r3, r4, r5 @@ -744,7 +744,7 @@ ENTRY(__switch_to) ldr r8, =__stack_chk_guard ldr r7, [r7, #TSK_STACK_CANARY] #endif -#ifdef CONFIG_MMU +#ifdef CONFIG_CPU_USE_DOMAINS mcr p15, 0, r6, c3, c0, 0 @ Set domain register #endif mov r5, r0 diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c index 6ff7919613d7..d601ef297eb6 100644 --- a/arch/arm/kernel/fiq.c +++ b/arch/arm/kernel/fiq.c @@ -45,6 +45,7 @@ #include #include #include +#include static unsigned long no_fiq_insn; @@ -77,7 +78,11 @@ int show_fiq_list(struct seq_file *p, void *v) void set_fiq_handler(void *start, unsigned int length) { +#if defined(CONFIG_CPU_USE_DOMAINS) memcpy((void *)0xffff001c, start, length); +#else + memcpy(vectors_page + 0x1c, start, length); +#endif flush_icache_range(0xffff001c, 0xffff001c + length); if (!vectors_high()) flush_icache_range(0x1c, 0x1c + length); diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index cda78d59aa31..87abca018054 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -37,6 +37,8 @@ static const char *handler[]= { "prefetch abort", "data abort", "address exception", "interrupt" }; +void *vectors_page; + #ifdef CONFIG_DEBUG_USER unsigned int user_debug; @@ -759,7 +761,11 @@ static void __init kuser_get_tls_init(unsigned long vectors) void __init early_trap_init(void) { +#if defined(CONFIG_CPU_USE_DOMAINS) unsigned long vectors = CONFIG_VECTORS_BASE; +#else + unsigned long vectors = (unsigned long)vectors_page; +#endif extern char __stubs_start[], __stubs_end[]; extern char __vectors_start[], __vectors_end[]; extern char __kuser_helper_start[], __kuser_helper_end[]; @@ -783,10 +789,10 @@ void __init early_trap_init(void) * Copy signal return handlers into the vector page, and * set sigreturn to be a pointer to these. */ - memcpy((void *)KERN_SIGRETURN_CODE, sigreturn_codes, - sizeof(sigreturn_codes)); - memcpy((void *)KERN_RESTART_CODE, syscall_restart_code, - sizeof(syscall_restart_code)); + memcpy((void *)(vectors + KERN_SIGRETURN_CODE - CONFIG_VECTORS_BASE), + sigreturn_codes, sizeof(sigreturn_codes)); + memcpy((void *)(vectors + KERN_RESTART_CODE - CONFIG_VECTORS_BASE), + syscall_restart_code, sizeof(syscall_restart_code)); flush_icache_range(vectors, vectors + PAGE_SIZE); modify_domain(DOMAIN_USER, DOMAIN_CLIENT); -- cgit v1.2.1 From 64d2dc384e41e2b7acead6804593ddaaf8aad8e1 Mon Sep 17 00:00:00 2001 From: Leif Lindholm Date: Thu, 16 Sep 2010 18:00:47 +0100 Subject: ARM: 6396/1: Add SWP/SWPB emulation for ARMv7 processors The SWP instruction was deprecated in the ARMv6 architecture, superseded by the LDREX/STREX family of instructions for load-linked/store-conditional operations. The ARMv7 multiprocessing extensions mandate that SWP/SWPB instructions are treated as undefined from reset, with the ability to enable them through the System Control Register SW bit. This patch adds the alternative solution to emulate the SWP and SWPB instructions using LDREX/STREX sequences, and log statistics to /proc/cpu/swp_emulation. To correctly deal with copy-on-write, it also modifies cpu_v7_set_pte_ext to change the mappings to priviliged RO when user RO. Signed-off-by: Leif Lindholm Acked-by: Catalin Marinas Acked-by: Kirill A. Shutemov Signed-off-by: Russell King --- arch/arm/kernel/Makefile | 1 + arch/arm/kernel/swp_emulate.c | 267 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 268 insertions(+) create mode 100644 arch/arm/kernel/swp_emulate.c (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 5b9b268f4fbb..119a6bb59bde 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -42,6 +42,7 @@ obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_ARM_UNWIND) += unwind.o obj-$(CONFIG_HAVE_TCM) += tcm.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_SWP_EMULATE) += swp_emulate.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_CRUNCH) += crunch.o crunch-bits.o diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c new file mode 100644 index 000000000000..7a5760922914 --- /dev/null +++ b/arch/arm/kernel/swp_emulate.c @@ -0,0 +1,267 @@ +/* + * linux/arch/arm/kernel/swp_emulate.c + * + * Copyright (C) 2009 ARM Limited + * __user_* functions adapted from include/asm/uaccess.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Implements emulation of the SWP/SWPB instructions using load-exclusive and + * store-exclusive for processors that have them disabled (or future ones that + * might not implement them). + * + * Syntax of SWP{B} instruction: SWP{B} , , [] + * Where: Rt = destination + * Rt2 = source + * Rn = address + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +/* + * Error-checking SWP macros implemented using ldrex{b}/strex{b} + */ +#define __user_swpX_asm(data, addr, res, temp, B) \ + __asm__ __volatile__( \ + " mov %2, %1\n" \ + "0: ldrex"B" %1, [%3]\n" \ + "1: strex"B" %0, %2, [%3]\n" \ + " cmp %0, #0\n" \ + " movne %0, %4\n" \ + "2:\n" \ + " .section .fixup,\"ax\"\n" \ + " .align 2\n" \ + "3: mov %0, %5\n" \ + " b 2b\n" \ + " .previous\n" \ + " .section __ex_table,\"a\"\n" \ + " .align 3\n" \ + " .long 0b, 3b\n" \ + " .long 1b, 3b\n" \ + " .previous" \ + : "=&r" (res), "+r" (data), "=&r" (temp) \ + : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT) \ + : "cc", "memory") + +#define __user_swp_asm(data, addr, res, temp) \ + __user_swpX_asm(data, addr, res, temp, "") +#define __user_swpb_asm(data, addr, res, temp) \ + __user_swpX_asm(data, addr, res, temp, "b") + +/* + * Macros/defines for extracting register numbers from instruction. + */ +#define EXTRACT_REG_NUM(instruction, offset) \ + (((instruction) & (0xf << (offset))) >> (offset)) +#define RN_OFFSET 16 +#define RT_OFFSET 12 +#define RT2_OFFSET 0 +/* + * Bit 22 of the instruction encoding distinguishes between + * the SWP and SWPB variants (bit set means SWPB). + */ +#define TYPE_SWPB (1 << 22) + +static unsigned long swpcounter; +static unsigned long swpbcounter; +static unsigned long abtcounter; +static pid_t previous_pid; + +#ifdef CONFIG_PROC_FS +static int proc_read_status(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + char *p = page; + int len; + + p += sprintf(p, "Emulated SWP:\t\t%lu\n", swpcounter); + p += sprintf(p, "Emulated SWPB:\t\t%lu\n", swpbcounter); + p += sprintf(p, "Aborted SWP{B}:\t\t%lu\n", abtcounter); + if (previous_pid != 0) + p += sprintf(p, "Last process:\t\t%d\n", previous_pid); + + len = (p - page) - off; + if (len < 0) + len = 0; + + *eof = (len <= count) ? 1 : 0; + *start = page + off; + + return len; +} +#endif + +/* + * Set up process info to signal segmentation fault - called on access error. + */ +static void set_segfault(struct pt_regs *regs, unsigned long addr) +{ + siginfo_t info; + + if (find_vma(current->mm, addr) == NULL) + info.si_code = SEGV_MAPERR; + else + info.si_code = SEGV_ACCERR; + + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_addr = (void *) instruction_pointer(regs); + + pr_debug("SWP{B} emulation: access caused memory abort!\n"); + arm_notify_die("Illegal memory access", regs, &info, 0, 0); + + abtcounter++; +} + +static int emulate_swpX(unsigned int address, unsigned int *data, + unsigned int type) +{ + unsigned int res = 0; + + if ((type != TYPE_SWPB) && (address & 0x3)) { + /* SWP to unaligned address not permitted */ + pr_debug("SWP instruction on unaligned pointer!\n"); + return -EFAULT; + } + + while (1) { + unsigned long temp; + + /* + * Barrier required between accessing protected resource and + * releasing a lock for it. Legacy code might not have done + * this, and we cannot determine that this is not the case + * being emulated, so insert always. + */ + smp_mb(); + + if (type == TYPE_SWPB) + __user_swpb_asm(*data, address, res, temp); + else + __user_swp_asm(*data, address, res, temp); + + if (likely(res != -EAGAIN) || signal_pending(current)) + break; + + cond_resched(); + } + + if (res == 0) { + /* + * Barrier also required between aquiring a lock for a + * protected resource and accessing the resource. Inserted for + * same reason as above. + */ + smp_mb(); + + if (type == TYPE_SWPB) + swpbcounter++; + else + swpcounter++; + } + + return res; +} + +/* + * swp_handler logs the id of calling process, dissects the instruction, sanity + * checks the memory location, calls emulate_swpX for the actual operation and + * deals with fixup/error handling before returning + */ +static int swp_handler(struct pt_regs *regs, unsigned int instr) +{ + unsigned int address, destreg, data, type; + unsigned int res = 0; + + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, regs->ARM_pc); + + if (current->pid != previous_pid) { + pr_debug("\"%s\" (%ld) uses deprecated SWP{B} instruction\n", + current->comm, (unsigned long)current->pid); + previous_pid = current->pid; + } + + address = regs->uregs[EXTRACT_REG_NUM(instr, RN_OFFSET)]; + data = regs->uregs[EXTRACT_REG_NUM(instr, RT2_OFFSET)]; + destreg = EXTRACT_REG_NUM(instr, RT_OFFSET); + + type = instr & TYPE_SWPB; + + pr_debug("addr in r%d->0x%08x, dest is r%d, source in r%d->0x%08x)\n", + EXTRACT_REG_NUM(instr, RN_OFFSET), address, + destreg, EXTRACT_REG_NUM(instr, RT2_OFFSET), data); + + /* Check access in reasonable access range for both SWP and SWPB */ + if (!access_ok(VERIFY_WRITE, (address & ~3), 4)) { + pr_debug("SWP{B} emulation: access to %p not allowed!\n", + (void *)address); + res = -EFAULT; + } else { + res = emulate_swpX(address, &data, type); + } + + if (res == 0) { + /* + * On successful emulation, revert the adjustment to the PC + * made in kernel/traps.c in order to resume execution at the + * instruction following the SWP{B}. + */ + regs->ARM_pc += 4; + regs->uregs[destreg] = data; + } else if (res == -EFAULT) { + /* + * Memory errors do not mean emulation failed. + * Set up signal info to return SEGV, then return OK + */ + set_segfault(regs, address); + } + + return 0; +} + +/* + * Only emulate SWP/SWPB executed in ARM state/User mode. + * The kernel must be SWP free and SWP{B} does not exist in Thumb/ThumbEE. + */ +static struct undef_hook swp_hook = { + .instr_mask = 0x0fb00ff0, + .instr_val = 0x01000090, + .cpsr_mask = MODE_MASK | PSR_T_BIT | PSR_J_BIT, + .cpsr_val = USR_MODE, + .fn = swp_handler +}; + +/* + * Register handler and create status file in /proc/cpu + * Invoked as late_initcall, since not needed before init spawned. + */ +static int __init swp_emulation_init(void) +{ +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *res; + + res = create_proc_entry("cpu/swp_emulation", S_IRUGO, NULL); + + if (!res) + return -ENOMEM; + + res->read_proc = proc_read_status; +#endif /* CONFIG_PROC_FS */ + + printk(KERN_NOTICE "Registering SWP/SWPB emulation handler\n"); + register_undef_hook(&swp_hook); + + return 0; +} + +late_initcall(swp_emulation_init); -- cgit v1.2.1 From 61b5cb1c3bff8875d2fd289c7b6ac344f95261fa Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Thu, 7 Oct 2010 20:51:58 +0530 Subject: ARM: place C irq handlers in IRQ_ENTRY for ftrace When FUNCTION_GRAPH_TRACER is enabled, place do_IRQ() and friends in the IRQ_ENTRY section so that the irq-related features of the function graph tracer work. Signed-off-by: Rabin Vincent --- arch/arm/kernel/irq.c | 4 +++- arch/arm/kernel/smp.c | 5 +++-- arch/arm/kernel/vmlinux.lds.S | 1 + 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 36ad3be4692a..6d616333340f 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -105,7 +106,8 @@ unlock: * come via this function. Instead, they should provide their * own 'handler' */ -asmlinkage void __exception asm_do_IRQ(unsigned int irq, struct pt_regs *regs) +asmlinkage void __exception_irq_entry +asm_do_IRQ(unsigned int irq, struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 8c1959590252..bbca89872c18 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -457,7 +458,7 @@ static void ipi_timer(void) } #ifdef CONFIG_LOCAL_TIMERS -asmlinkage void __exception do_local_timer(struct pt_regs *regs) +asmlinkage void __exception_irq_entry do_local_timer(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); int cpu = smp_processor_id(); @@ -544,7 +545,7 @@ static void ipi_cpu_stop(unsigned int cpu) * * Bit 0 - Inter-processor function call */ -asmlinkage void __exception do_IPI(struct pt_regs *regs) +asmlinkage void __exception_irq_entry do_IPI(struct pt_regs *regs) { unsigned int cpu = smp_processor_id(); struct ipi_data *ipi = &per_cpu(ipi_data, cpu); diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index cead8893b46b..897c1a8f1694 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -101,6 +101,7 @@ SECTIONS __exception_text_start = .; *(.exception.text) __exception_text_end = .; + IRQENTRY_TEXT TEXT_TEXT SCHED_TEXT LOCK_TEXT -- cgit v1.2.1 From d3b9dc9dd2b994f396741f7086ffe7a48bacb165 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Thu, 7 Oct 2010 17:39:47 +0530 Subject: ARM: ftrace: use gas macros to avoid code duplication Use assembler macros to avoid copy/pasting code between the implementations of the two variants of the mcount call. Signed-off-by: Rabin Vincent --- arch/arm/kernel/entry-common.S | 146 ++++++++++++++++++++++------------------- 1 file changed, 80 insertions(+), 66 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 8bfa98757cd2..fe1d5862b19f 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -141,98 +141,112 @@ ENDPROC(ret_from_fork) #endif #endif -#ifdef CONFIG_DYNAMIC_FTRACE -ENTRY(__gnu_mcount_nc) - mov ip, lr - ldmia sp!, {lr} - mov pc, ip -ENDPROC(__gnu_mcount_nc) +.macro __mcount suffix + mcount_enter + ldr r0, =ftrace_trace_function + ldr r2, [r0] + adr r0, .Lftrace_stub + cmp r0, r2 + bne 1f + mcount_exit -ENTRY(ftrace_caller) - stmdb sp!, {r0-r3, lr} - mov r0, lr +1: mcount_get_lr r1 @ lr of instrumented func + mov r0, lr @ instrumented function + sub r0, r0, #MCOUNT_INSN_SIZE + adr lr, BSYM(2f) + mov pc, r2 +2: mcount_exit +.endm + +.macro __ftrace_caller suffix + mcount_enter + + mcount_get_lr r1 @ lr of instrumented func + mov r0, lr @ instrumented function sub r0, r0, #MCOUNT_INSN_SIZE - ldr r1, [sp, #20] - .global ftrace_call -ftrace_call: + .globl ftrace_call\suffix +ftrace_call\suffix: bl ftrace_stub - ldmia sp!, {r0-r3, ip, lr} - mov pc, ip -ENDPROC(ftrace_caller) + + mcount_exit +.endm #ifdef CONFIG_OLD_MCOUNT +/* + * mcount + */ + +.macro mcount_enter + stmdb sp!, {r0-r3, lr} +.endm + +.macro mcount_get_lr reg + ldr \reg, [fp, #-4] +.endm + +.macro mcount_exit + ldr lr, [fp, #-4] + ldmia sp!, {r0-r3, pc} +.endm + ENTRY(mcount) +#ifdef CONFIG_DYNAMIC_FTRACE stmdb sp!, {lr} ldr lr, [fp, #-4] ldmia sp!, {pc} +#else + __mcount _old +#endif ENDPROC(mcount) +#ifdef CONFIG_DYNAMIC_FTRACE ENTRY(ftrace_caller_old) - stmdb sp!, {r0-r3, lr} - ldr r1, [fp, #-4] - mov r0, lr - sub r0, r0, #MCOUNT_INSN_SIZE - - .globl ftrace_call_old -ftrace_call_old: - bl ftrace_stub - ldr lr, [fp, #-4] @ restore lr - ldmia sp!, {r0-r3, pc} + __ftrace_caller _old ENDPROC(ftrace_caller_old) #endif -#else +.purgem mcount_enter +.purgem mcount_get_lr +.purgem mcount_exit +#endif -ENTRY(__gnu_mcount_nc) +/* + * __gnu_mcount_nc + */ + +.macro mcount_enter stmdb sp!, {r0-r3, lr} - ldr r0, =ftrace_trace_function - ldr r2, [r0] - adr r0, .Lftrace_stub - cmp r0, r2 - bne gnu_trace +.endm + +.macro mcount_get_lr reg + ldr \reg, [sp, #20] +.endm + +.macro mcount_exit ldmia sp!, {r0-r3, ip, lr} mov pc, ip +.endm -gnu_trace: - ldr r1, [sp, #20] @ lr of instrumented routine - mov r0, lr - sub r0, r0, #MCOUNT_INSN_SIZE - adr lr, BSYM(1f) - mov pc, r2 -1: - ldmia sp!, {r0-r3, ip, lr} +ENTRY(__gnu_mcount_nc) +#ifdef CONFIG_DYNAMIC_FTRACE + mov ip, lr + ldmia sp!, {lr} mov pc, ip +#else + __mcount +#endif ENDPROC(__gnu_mcount_nc) -#ifdef CONFIG_OLD_MCOUNT -/* - * This is under an ifdef in order to force link-time errors for people trying - * to build with !FRAME_POINTER with a GCC which doesn't use the new-style - * mcount. - */ -ENTRY(mcount) - stmdb sp!, {r0-r3, lr} - ldr r0, =ftrace_trace_function - ldr r2, [r0] - adr r0, ftrace_stub - cmp r0, r2 - bne trace - ldr lr, [fp, #-4] @ restore lr - ldmia sp!, {r0-r3, pc} - -trace: - ldr r1, [fp, #-4] @ lr of instrumented routine - mov r0, lr - sub r0, r0, #MCOUNT_INSN_SIZE - mov lr, pc - mov pc, r2 - ldr lr, [fp, #-4] @ restore lr - ldmia sp!, {r0-r3, pc} -ENDPROC(mcount) +#ifdef CONFIG_DYNAMIC_FTRACE +ENTRY(ftrace_caller) + __ftrace_caller +ENDPROC(ftrace_caller) #endif -#endif /* CONFIG_DYNAMIC_FTRACE */ +.purgem mcount_enter +.purgem mcount_get_lr +.purgem mcount_exit ENTRY(ftrace_stub) .Lftrace_stub: -- cgit v1.2.1 From 376cfa8730c08c0394d0aa1d4a80fd8c9971f323 Mon Sep 17 00:00:00 2001 From: Tim Bird Date: Sat, 9 Oct 2010 22:24:38 +0530 Subject: ARM: ftrace: function graph tracer support Cc: Tim Bird [rabin@rab.in: rebase on top of latest code, keep code in ftrace.c instead of separate file, check for ftrace_graph_entry also] Signed-off-by: Rabin Vincent --- arch/arm/kernel/Makefile | 3 ++- arch/arm/kernel/entry-common.S | 46 ++++++++++++++++++++++++++++++++++++++++++ arch/arm/kernel/ftrace.c | 34 +++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 1 deletion(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 5b9b268f4fbb..679851a9f589 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -5,7 +5,7 @@ CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) -ifdef CONFIG_DYNAMIC_FTRACE +ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_ftrace.o = -pg endif @@ -33,6 +33,7 @@ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o obj-$(CONFIG_ATAGS_PROC) += atags.o diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index fe1d5862b19f..9f1766211668 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -148,6 +148,20 @@ ENDPROC(ret_from_fork) adr r0, .Lftrace_stub cmp r0, r2 bne 1f + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + ldr r1, =ftrace_graph_return + ldr r2, [r1] + cmp r0, r2 + bne ftrace_graph_caller\suffix + + ldr r1, =ftrace_graph_entry + ldr r2, [r1] + ldr r0, =ftrace_graph_entry_stub + cmp r0, r2 + bne ftrace_graph_caller\suffix +#endif + mcount_exit 1: mcount_get_lr r1 @ lr of instrumented func @@ -172,6 +186,15 @@ ftrace_call\suffix: mcount_exit .endm +.macro __ftrace_graph_caller + sub r0, fp, #4 @ &lr of instrumented routine (&parent) + mov r1, lr @ instrumented routine (func) + sub r1, r1, #MCOUNT_INSN_SIZE + mov r2, fp @ frame pointer + bl prepare_ftrace_return + mcount_exit +.endm + #ifdef CONFIG_OLD_MCOUNT /* * mcount @@ -206,6 +229,12 @@ ENTRY(ftrace_caller_old) ENDPROC(ftrace_caller_old) #endif +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +ENTRY(ftrace_graph_caller_old) + __ftrace_graph_caller +ENDPROC(ftrace_graph_caller_old) +#endif + .purgem mcount_enter .purgem mcount_get_lr .purgem mcount_exit @@ -244,10 +273,27 @@ ENTRY(ftrace_caller) ENDPROC(ftrace_caller) #endif +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +ENTRY(ftrace_graph_caller) + __ftrace_graph_caller +ENDPROC(ftrace_graph_caller) +#endif + .purgem mcount_enter .purgem mcount_get_lr .purgem mcount_exit +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .globl return_to_handler +return_to_handler: + stmdb sp!, {r0-r3} + mov r0, fp @ frame pointer + bl ftrace_return_to_handler + mov lr, r0 @ r0 has real ret addr + ldmia sp!, {r0-r3} + mov pc, lr +#endif + ENTRY(ftrace_stub) .Lftrace_stub: mov pc, lr diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 971ac8c36ea7..7a702a502871 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -24,6 +24,7 @@ #define NOP 0xe8bd4000 /* pop {lr} */ #endif +#ifdef CONFIG_DYNAMIC_FTRACE #ifdef CONFIG_OLD_MCOUNT #define OLD_MCOUNT_ADDR ((unsigned long) mcount) #define OLD_FTRACE_ADDR ((unsigned long) ftrace_caller_old) @@ -193,3 +194,36 @@ int __init ftrace_dyn_arch_init(void *data) return 0; } +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + unsigned long frame_pointer) +{ + unsigned long return_hooker = (unsigned long) &return_to_handler; + struct ftrace_graph_ent trace; + unsigned long old; + int err; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + old = *parent; + *parent = return_hooker; + + err = ftrace_push_return_trace(old, self_addr, &trace.depth, + frame_pointer); + if (err == -EBUSY) { + *parent = old; + return; + } + + trace.func = self_addr; + + /* Only trace if the calling function expects to */ + if (!ftrace_graph_entry(&trace)) { + current->curr_ret_stack--; + *parent = old; + } +} +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -- cgit v1.2.1 From dd686eb13959e49a1112fd608c124ab711050582 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Sat, 6 Nov 2010 23:03:21 +0530 Subject: ARM: ftrace: graph tracer + dynamic ftrace Support the graph tracer + dynamic ftrace combination on ARM. Signed-off-by: Rabin Vincent --- arch/arm/kernel/entry-common.S | 12 ++++++++ arch/arm/kernel/ftrace.c | 69 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 76 insertions(+), 5 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 9f1766211668..aae802ee12f8 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -183,12 +183,24 @@ ENDPROC(ret_from_fork) ftrace_call\suffix: bl ftrace_stub +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .globl ftrace_graph_call\suffix +ftrace_graph_call\suffix: + mov r0, r0 +#endif + mcount_exit .endm .macro __ftrace_graph_caller sub r0, fp, #4 @ &lr of instrumented routine (&parent) +#ifdef CONFIG_DYNAMIC_FTRACE + @ called from __ftrace_caller, saved in mcount_enter + ldr r1, [sp, #16] @ instrumented routine (func) +#else + @ called from __mcount, untouched in lr mov r1, lr @ instrumented routine (func) +#endif sub r1, r1, #MCOUNT_INSN_SIZE mov r2, fp @ frame pointer bl prepare_ftrace_return diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 7a702a502871..c0062ad1e847 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -60,9 +60,9 @@ static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr) } #endif -/* construct a branch (BL) instruction to addr */ #ifdef CONFIG_THUMB2_KERNEL -static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) +static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr, + bool link) { unsigned long s, j1, j2, i1, i2, imm10, imm11; unsigned long first, second; @@ -84,15 +84,22 @@ static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) j2 = (!i2) ^ s; first = 0xf000 | (s << 10) | imm10; - second = 0xd000 | (j1 << 13) | (j2 << 11) | imm11; + second = 0x9000 | (j1 << 13) | (j2 << 11) | imm11; + if (link) + second |= 1 << 14; return (second << 16) | first; } #else -static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) +static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr, + bool link) { + unsigned long opcode = 0xea000000; long offset; + if (link) + opcode |= 1 << 24; + offset = (long)addr - (long)(pc + 8); if (unlikely(offset < -33554432 || offset > 33554428)) { /* Can't generate branches that far (from ARM ARM). Ftrace @@ -104,10 +111,15 @@ static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) offset = (offset >> 2) & 0x00ffffff; - return 0xeb000000 | offset; + return opcode | offset; } #endif +static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) +{ + return ftrace_gen_branch(pc, addr, true); +} + static int ftrace_modify_code(unsigned long pc, unsigned long old, unsigned long new) { @@ -226,4 +238,51 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, *parent = old; } } + +#ifdef CONFIG_DYNAMIC_FTRACE +extern unsigned long ftrace_graph_call; +extern unsigned long ftrace_graph_call_old; +extern void ftrace_graph_caller_old(void); + +static int __ftrace_modify_caller(unsigned long *callsite, + void (*func) (void), bool enable) +{ + unsigned long caller_fn = (unsigned long) func; + unsigned long pc = (unsigned long) callsite; + unsigned long branch = ftrace_gen_branch(pc, caller_fn, false); + unsigned long nop = 0xe1a00000; /* mov r0, r0 */ + unsigned long old = enable ? nop : branch; + unsigned long new = enable ? branch : nop; + + return ftrace_modify_code(pc, old, new); +} + +static int ftrace_modify_graph_caller(bool enable) +{ + int ret; + + ret = __ftrace_modify_caller(&ftrace_graph_call, + ftrace_graph_caller, + enable); + +#ifdef CONFIG_OLD_MCOUNT + if (!ret) + ret = __ftrace_modify_caller(&ftrace_graph_call_old, + ftrace_graph_caller_old, + enable); +#endif + + return ret; +} + +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(false); +} +#endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -- cgit v1.2.1 From 84fee97a026ca085f08381054513f9e24689a303 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sat, 13 Nov 2010 17:13:56 +0000 Subject: ARM: perf: consolidate common PMU behaviour The functions for mapping PMU events (perf, cache and raw) are common between all PMU types and differ only in the data on which they operate. This patch implements common definitions of these mapping functions and changes the arm_pmu struct to hold pointers to the data which they require. This is in anticipation of separating out the PMU-specific code into separate files. Acked-by: Jamie Iles Acked-by: Jean Pihet Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 131 +++++++++++++------------------------------ 1 file changed, 38 insertions(+), 93 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 07a50357492a..c49e1701a2f6 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -84,14 +84,17 @@ struct arm_pmu { irqreturn_t (*handle_irq)(int irq_num, void *dev); void (*enable)(struct hw_perf_event *evt, int idx); void (*disable)(struct hw_perf_event *evt, int idx); - int (*event_map)(int evt); - u64 (*raw_event)(u64); int (*get_event_idx)(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc); u32 (*read_counter)(int idx); void (*write_counter)(int idx, u32 val); void (*start)(void); void (*stop)(void); + const unsigned (*cache_map)[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + const unsigned (*event_map)[PERF_COUNT_HW_MAX]; + u32 raw_event_mask; int num_events; u64 max_period; }; @@ -136,10 +139,6 @@ EXPORT_SYMBOL_GPL(perf_num_counters); #define CACHE_OP_UNSUPPORTED 0xFFFF -static unsigned armpmu_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX]; - static int armpmu_map_cache_event(u64 config) { @@ -157,7 +156,7 @@ armpmu_map_cache_event(u64 config) if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) return -EINVAL; - ret = (int)armpmu_perf_cache_map[cache_type][cache_op][cache_result]; + ret = (int)(*armpmu->cache_map)[cache_type][cache_op][cache_result]; if (ret == CACHE_OP_UNSUPPORTED) return -ENOENT; @@ -165,6 +164,19 @@ armpmu_map_cache_event(u64 config) return ret; } +static int +armpmu_map_event(u64 config) +{ + int mapping = (*armpmu->event_map)[config]; + return mapping == HW_OP_UNSUPPORTED ? -EOPNOTSUPP : mapping; +} + +static int +armpmu_map_raw_event(u64 config) +{ + return (int)(config & armpmu->raw_event_mask); +} + static int armpmu_event_set_period(struct perf_event *event, struct hw_perf_event *hwc, @@ -458,11 +470,11 @@ __hw_perf_event_init(struct perf_event *event) /* Decode the generic type into an ARM event identifier. */ if (PERF_TYPE_HARDWARE == event->attr.type) { - mapping = armpmu->event_map(event->attr.config); + mapping = armpmu_map_event(event->attr.config); } else if (PERF_TYPE_HW_CACHE == event->attr.type) { mapping = armpmu_map_cache_event(event->attr.config); } else if (PERF_TYPE_RAW == event->attr.type) { - mapping = armpmu->raw_event(event->attr.config); + mapping = armpmu_map_raw_event(event->attr.config); } else { pr_debug("event type %x not supported\n", event->attr.type); return -EOPNOTSUPP; @@ -1121,30 +1133,6 @@ armv6pmu_stop(void) spin_unlock_irqrestore(&pmu_lock, flags); } -static inline int -armv6pmu_event_map(int config) -{ - int mapping = armv6_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static inline int -armv6mpcore_pmu_event_map(int config) -{ - int mapping = armv6mpcore_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static u64 -armv6pmu_raw_event(u64 config) -{ - return config & 0xff; -} - static int armv6pmu_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *event) @@ -1240,13 +1228,14 @@ static const struct arm_pmu armv6pmu = { .handle_irq = armv6pmu_handle_irq, .enable = armv6pmu_enable_event, .disable = armv6pmu_disable_event, - .event_map = armv6pmu_event_map, - .raw_event = armv6pmu_raw_event, .read_counter = armv6pmu_read_counter, .write_counter = armv6pmu_write_counter, .get_event_idx = armv6pmu_get_event_idx, .start = armv6pmu_start, .stop = armv6pmu_stop, + .cache_map = &armv6_perf_cache_map, + .event_map = &armv6_perf_map, + .raw_event_mask = 0xFF, .num_events = 3, .max_period = (1LLU << 32) - 1, }; @@ -1263,13 +1252,14 @@ static const struct arm_pmu armv6mpcore_pmu = { .handle_irq = armv6pmu_handle_irq, .enable = armv6pmu_enable_event, .disable = armv6mpcore_pmu_disable_event, - .event_map = armv6mpcore_pmu_event_map, - .raw_event = armv6pmu_raw_event, .read_counter = armv6pmu_read_counter, .write_counter = armv6pmu_write_counter, .get_event_idx = armv6pmu_get_event_idx, .start = armv6pmu_start, .stop = armv6pmu_stop, + .cache_map = &armv6mpcore_perf_cache_map, + .event_map = &armv6mpcore_perf_map, + .raw_event_mask = 0xFF, .num_events = 3, .max_period = (1LLU << 32) - 1, }; @@ -2093,27 +2083,6 @@ static void armv7pmu_stop(void) spin_unlock_irqrestore(&pmu_lock, flags); } -static inline int armv7_a8_pmu_event_map(int config) -{ - int mapping = armv7_a8_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static inline int armv7_a9_pmu_event_map(int config) -{ - int mapping = armv7_a9_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static u64 armv7pmu_raw_event(u64 config) -{ - return config & 0xff; -} - static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *event) { @@ -2144,12 +2113,12 @@ static struct arm_pmu armv7pmu = { .handle_irq = armv7pmu_handle_irq, .enable = armv7pmu_enable_event, .disable = armv7pmu_disable_event, - .raw_event = armv7pmu_raw_event, .read_counter = armv7pmu_read_counter, .write_counter = armv7pmu_write_counter, .get_event_idx = armv7pmu_get_event_idx, .start = armv7pmu_start, .stop = armv7pmu_stop, + .raw_event_mask = 0xFF, .max_period = (1LLU << 32) - 1, }; @@ -2318,21 +2287,6 @@ static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] #define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) #define XSCALE_PMU_CNT64 0x008 -static inline int -xscalepmu_event_map(int config) -{ - int mapping = xscale_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static u64 -xscalepmu_raw_event(u64 config) -{ - return config & 0xff; -} - #define XSCALE1_OVERFLOWED_MASK 0x700 #define XSCALE1_CCOUNT_OVERFLOW 0x400 #define XSCALE1_COUNT0_OVERFLOW 0x100 @@ -2598,13 +2552,14 @@ static const struct arm_pmu xscale1pmu = { .handle_irq = xscale1pmu_handle_irq, .enable = xscale1pmu_enable_event, .disable = xscale1pmu_disable_event, - .event_map = xscalepmu_event_map, - .raw_event = xscalepmu_raw_event, .read_counter = xscale1pmu_read_counter, .write_counter = xscale1pmu_write_counter, .get_event_idx = xscale1pmu_get_event_idx, .start = xscale1pmu_start, .stop = xscale1pmu_stop, + .cache_map = &xscale_perf_cache_map, + .event_map = &xscale_perf_map, + .raw_event_mask = 0xFF, .num_events = 3, .max_period = (1LLU << 32) - 1, }; @@ -2953,13 +2908,14 @@ static const struct arm_pmu xscale2pmu = { .handle_irq = xscale2pmu_handle_irq, .enable = xscale2pmu_enable_event, .disable = xscale2pmu_disable_event, - .event_map = xscalepmu_event_map, - .raw_event = xscalepmu_raw_event, .read_counter = xscale2pmu_read_counter, .write_counter = xscale2pmu_write_counter, .get_event_idx = xscale2pmu_get_event_idx, .start = xscale2pmu_start, .stop = xscale2pmu_stop, + .cache_map = &xscale_perf_cache_map, + .event_map = &xscale_perf_map, + .raw_event_mask = 0xFF, .num_events = 5, .max_period = (1LLU << 32) - 1, }; @@ -2978,20 +2934,14 @@ init_hw_perf_events(void) case 0xB560: /* ARM1156 */ case 0xB760: /* ARM1176 */ armpmu = &armv6pmu; - memcpy(armpmu_perf_cache_map, armv6_perf_cache_map, - sizeof(armv6_perf_cache_map)); break; case 0xB020: /* ARM11mpcore */ armpmu = &armv6mpcore_pmu; - memcpy(armpmu_perf_cache_map, - armv6mpcore_perf_cache_map, - sizeof(armv6mpcore_perf_cache_map)); break; case 0xC080: /* Cortex-A8 */ armv7pmu.id = ARM_PERF_PMU_ID_CA8; - memcpy(armpmu_perf_cache_map, armv7_a8_perf_cache_map, - sizeof(armv7_a8_perf_cache_map)); - armv7pmu.event_map = armv7_a8_pmu_event_map; + armv7pmu.cache_map = &armv7_a8_perf_cache_map; + armv7pmu.event_map = &armv7_a8_perf_map; armpmu = &armv7pmu; /* Reset PMNC and read the nb of CNTx counters @@ -3000,9 +2950,8 @@ init_hw_perf_events(void) break; case 0xC090: /* Cortex-A9 */ armv7pmu.id = ARM_PERF_PMU_ID_CA9; - memcpy(armpmu_perf_cache_map, armv7_a9_perf_cache_map, - sizeof(armv7_a9_perf_cache_map)); - armv7pmu.event_map = armv7_a9_pmu_event_map; + armv7pmu.cache_map = &armv7_a9_perf_cache_map; + armv7pmu.event_map = &armv7_a9_perf_map; armpmu = &armv7pmu; /* Reset PMNC and read the nb of CNTx counters @@ -3016,13 +2965,9 @@ init_hw_perf_events(void) switch (part_number) { case 1: armpmu = &xscale1pmu; - memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, - sizeof(xscale_perf_cache_map)); break; case 2: armpmu = &xscale2pmu; - memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, - sizeof(xscale_perf_cache_map)); break; } } -- cgit v1.2.1 From 59a98a1e56edea4d7d9c5f4ce9d50e271a04993c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sat, 13 Nov 2010 17:18:36 +0000 Subject: ARM: perf: avoid exposing internal stop function for v6 PMU Unlike other pmu functions, armv6pmu_pmu_stop is not declared static. This patch adds the missing keyword. Acked-by: Jamie Iles Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index c49e1701a2f6..35319b8e4d4b 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -1121,7 +1121,7 @@ armv6pmu_start(void) spin_unlock_irqrestore(&pmu_lock, flags); } -void +static void armv6pmu_stop(void) { unsigned long flags, val; -- cgit v1.2.1 From 3cb314bae2191b432a7e898abf865db880f6d07d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sat, 13 Nov 2010 17:37:46 +0000 Subject: ARM: perf: add _init() functions to PMUs In preparation for separating the PMU-specific code, this patch adds self-contained init functions to each PMU, therefore removing any PMU-specific knowledge from the PMU-agnostic init_hw_perf_events function. Acked-by: Jamie Iles Acked-by: Jean Pihet Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 65 ++++++++++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 20 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 35319b8e4d4b..acc4e91dd300 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -1240,6 +1240,11 @@ static const struct arm_pmu armv6pmu = { .max_period = (1LLU << 32) - 1, }; +const struct arm_pmu *__init armv6pmu_init(void) +{ + return &armv6pmu; +} + /* * ARMv6mpcore is almost identical to single core ARMv6 with the exception * that some of the events have different enumerations and that there is no @@ -1264,6 +1269,11 @@ static const struct arm_pmu armv6mpcore_pmu = { .max_period = (1LLU << 32) - 1, }; +const struct arm_pmu *__init armv6mpcore_pmu_init(void) +{ + return &armv6mpcore_pmu; +} + /* * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code. * @@ -2136,6 +2146,25 @@ static u32 __init armv7_reset_read_pmnc(void) return nb_cnt + 1; } +const struct arm_pmu *__init armv7_a8_pmu_init(void) +{ + armv7pmu.id = ARM_PERF_PMU_ID_CA8; + armv7pmu.cache_map = &armv7_a8_perf_cache_map; + armv7pmu.event_map = &armv7_a8_perf_map; + armv7pmu.num_events = armv7_reset_read_pmnc(); + return &armv7pmu; +} + +const struct arm_pmu *__init armv7_a9_pmu_init(void) +{ + armv7pmu.id = ARM_PERF_PMU_ID_CA9; + armv7pmu.cache_map = &armv7_a9_perf_cache_map; + armv7pmu.event_map = &armv7_a9_perf_map; + armv7pmu.num_events = armv7_reset_read_pmnc(); + return &armv7pmu; +} + + /* * ARMv5 [xscale] Performance counter handling code. * @@ -2564,6 +2593,11 @@ static const struct arm_pmu xscale1pmu = { .max_period = (1LLU << 32) - 1, }; +const struct arm_pmu *__init xscale1pmu_init(void) +{ + return &xscale1pmu; +} + #define XSCALE2_OVERFLOWED_MASK 0x01f #define XSCALE2_CCOUNT_OVERFLOW 0x001 #define XSCALE2_COUNT0_OVERFLOW 0x002 @@ -2920,6 +2954,11 @@ static const struct arm_pmu xscale2pmu = { .max_period = (1LLU << 32) - 1, }; +const struct arm_pmu *__init xscale2pmu_init(void) +{ + return &xscale2pmu; +} + static int __init init_hw_perf_events(void) { @@ -2933,30 +2972,16 @@ init_hw_perf_events(void) case 0xB360: /* ARM1136 */ case 0xB560: /* ARM1156 */ case 0xB760: /* ARM1176 */ - armpmu = &armv6pmu; + armpmu = armv6pmu_init(); break; case 0xB020: /* ARM11mpcore */ - armpmu = &armv6mpcore_pmu; + armpmu = armv6mpcore_pmu_init(); break; case 0xC080: /* Cortex-A8 */ - armv7pmu.id = ARM_PERF_PMU_ID_CA8; - armv7pmu.cache_map = &armv7_a8_perf_cache_map; - armv7pmu.event_map = &armv7_a8_perf_map; - armpmu = &armv7pmu; - - /* Reset PMNC and read the nb of CNTx counters - supported */ - armv7pmu.num_events = armv7_reset_read_pmnc(); + armpmu = armv7_a8_pmu_init(); break; case 0xC090: /* Cortex-A9 */ - armv7pmu.id = ARM_PERF_PMU_ID_CA9; - armv7pmu.cache_map = &armv7_a9_perf_cache_map; - armv7pmu.event_map = &armv7_a9_perf_map; - armpmu = &armv7pmu; - - /* Reset PMNC and read the nb of CNTx counters - supported */ - armv7pmu.num_events = armv7_reset_read_pmnc(); + armpmu = armv7_a9_pmu_init(); break; } /* Intel CPUs [xscale]. */ @@ -2964,10 +2989,10 @@ init_hw_perf_events(void) part_number = (cpuid >> 13) & 0x7; switch (part_number) { case 1: - armpmu = &xscale1pmu; + armpmu = xscale1pmu_init(); break; case 2: - armpmu = &xscale2pmu; + armpmu = xscale2pmu_init(); break; } } -- cgit v1.2.1 From 629948310e4270e9b32c37b4a65a8cd5d6ebf38a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sat, 13 Nov 2010 18:45:27 +0000 Subject: ARM: perf: encode PMU name in arm_pmu structure Currently, perf uses the PMU ID as an index into a string table to look up the name of a given PMU. This patch encodes the name of a PMU directly into the arm_pmu structure so that PMU-specific code can be factored out into separate files. Acked-by: Jamie Iles Acked-by: Jean Pihet Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index acc4e91dd300..ac4e9a1ed80b 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -69,18 +69,9 @@ struct cpu_hw_events { }; DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); -/* PMU names. */ -static const char *arm_pmu_names[] = { - [ARM_PERF_PMU_ID_XSCALE1] = "xscale1", - [ARM_PERF_PMU_ID_XSCALE2] = "xscale2", - [ARM_PERF_PMU_ID_V6] = "v6", - [ARM_PERF_PMU_ID_V6MP] = "v6mpcore", - [ARM_PERF_PMU_ID_CA8] = "ARMv7 Cortex-A8", - [ARM_PERF_PMU_ID_CA9] = "ARMv7 Cortex-A9", -}; - struct arm_pmu { enum arm_perf_pmu_ids id; + const char *name; irqreturn_t (*handle_irq)(int irq_num, void *dev); void (*enable)(struct hw_perf_event *evt, int idx); void (*disable)(struct hw_perf_event *evt, int idx); @@ -1225,6 +1216,7 @@ armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, static const struct arm_pmu armv6pmu = { .id = ARM_PERF_PMU_ID_V6, + .name = "v6", .handle_irq = armv6pmu_handle_irq, .enable = armv6pmu_enable_event, .disable = armv6pmu_disable_event, @@ -1254,6 +1246,7 @@ const struct arm_pmu *__init armv6pmu_init(void) */ static const struct arm_pmu armv6mpcore_pmu = { .id = ARM_PERF_PMU_ID_V6MP, + .name = "v6mpcore", .handle_irq = armv6pmu_handle_irq, .enable = armv6pmu_enable_event, .disable = armv6mpcore_pmu_disable_event, @@ -2149,6 +2142,7 @@ static u32 __init armv7_reset_read_pmnc(void) const struct arm_pmu *__init armv7_a8_pmu_init(void) { armv7pmu.id = ARM_PERF_PMU_ID_CA8; + armv7pmu.name = "ARMv7 Cortex-A8"; armv7pmu.cache_map = &armv7_a8_perf_cache_map; armv7pmu.event_map = &armv7_a8_perf_map; armv7pmu.num_events = armv7_reset_read_pmnc(); @@ -2158,6 +2152,7 @@ const struct arm_pmu *__init armv7_a8_pmu_init(void) const struct arm_pmu *__init armv7_a9_pmu_init(void) { armv7pmu.id = ARM_PERF_PMU_ID_CA9; + armv7pmu.name = "ARMv7 Cortex-A9"; armv7pmu.cache_map = &armv7_a9_perf_cache_map; armv7pmu.event_map = &armv7_a9_perf_map; armv7pmu.num_events = armv7_reset_read_pmnc(); @@ -2578,6 +2573,7 @@ xscale1pmu_write_counter(int counter, u32 val) static const struct arm_pmu xscale1pmu = { .id = ARM_PERF_PMU_ID_XSCALE1, + .name = "xscale1", .handle_irq = xscale1pmu_handle_irq, .enable = xscale1pmu_enable_event, .disable = xscale1pmu_disable_event, @@ -2939,6 +2935,7 @@ xscale2pmu_write_counter(int counter, u32 val) static const struct arm_pmu xscale2pmu = { .id = ARM_PERF_PMU_ID_XSCALE2, + .name = "xscale2", .handle_irq = xscale2pmu_handle_irq, .enable = xscale2pmu_enable_event, .disable = xscale2pmu_disable_event, @@ -2999,7 +2996,7 @@ init_hw_perf_events(void) if (armpmu) { pr_info("enabled with %s PMU driver, %d counters available\n", - arm_pmu_names[armpmu->id], armpmu->num_events); + armpmu->name, armpmu->num_events); } else { pr_info("no hardware support available\n"); } -- cgit v1.2.1 From 43eab87828fee65f89f4088736b2b7a187390a2f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sat, 13 Nov 2010 19:04:32 +0000 Subject: ARM: perf: separate PMU backends into multiple files The ARM perf_event.c file contains all PMU backends and, as new PMUs are introduced, will continue to grow. This patch follows the example of x86 and splits the PMU implementations into separate files which are then #included back into the main file. Compile-time guards are added to each PMU file to avoid compiling in code that is not relevant for the version of the architecture which we are targetting. Acked-by: Jean Pihet Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 2357 +---------------------------------- arch/arm/kernel/perf_event_v6.c | 672 ++++++++++ arch/arm/kernel/perf_event_v7.c | 906 ++++++++++++++ arch/arm/kernel/perf_event_xscale.c | 807 ++++++++++++ 4 files changed, 2390 insertions(+), 2352 deletions(-) create mode 100644 arch/arm/kernel/perf_event_v6.c create mode 100644 arch/arm/kernel/perf_event_v7.c create mode 100644 arch/arm/kernel/perf_event_xscale.c (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index ac4e9a1ed80b..421a4bb88fed 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -4,9 +4,7 @@ * ARM performance counter support. * * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles - * - * ARMv7 support: Jean Pihet - * 2010 (c) MontaVista Software, LLC. + * Copyright (C) 2010 ARM Ltd., Will Deacon * * This code is based on the sparc64 perf event code, which is in turn based * on the x86 code. Callchain code is based on the ARM OProfile backtrace @@ -606,2355 +604,10 @@ static struct pmu pmu = { .read = armpmu_read, }; -/* - * ARMv6 Performance counter handling code. - * - * ARMv6 has 2 configurable performance counters and a single cycle counter. - * They all share a single reset bit but can be written to zero so we can use - * that for a reset. - * - * The counters can't be individually enabled or disabled so when we remove - * one event and replace it with another we could get spurious counts from the - * wrong event. However, we can take advantage of the fact that the - * performance counters can export events to the event bus, and the event bus - * itself can be monitored. This requires that we *don't* export the events to - * the event bus. The procedure for disabling a configurable counter is: - * - change the counter to count the ETMEXTOUT[0] signal (0x20). This - * effectively stops the counter from counting. - * - disable the counter's interrupt generation (each counter has it's - * own interrupt enable bit). - * Once stopped, the counter value can be written as 0 to reset. - * - * To enable a counter: - * - enable the counter's interrupt generation. - * - set the new event type. - * - * Note: the dedicated cycle counter only counts cycles and can't be - * enabled/disabled independently of the others. When we want to disable the - * cycle counter, we have to just disable the interrupt reporting and start - * ignoring that counter. When re-enabling, we have to reset the value and - * enable the interrupt. - */ - -enum armv6_perf_types { - ARMV6_PERFCTR_ICACHE_MISS = 0x0, - ARMV6_PERFCTR_IBUF_STALL = 0x1, - ARMV6_PERFCTR_DDEP_STALL = 0x2, - ARMV6_PERFCTR_ITLB_MISS = 0x3, - ARMV6_PERFCTR_DTLB_MISS = 0x4, - ARMV6_PERFCTR_BR_EXEC = 0x5, - ARMV6_PERFCTR_BR_MISPREDICT = 0x6, - ARMV6_PERFCTR_INSTR_EXEC = 0x7, - ARMV6_PERFCTR_DCACHE_HIT = 0x9, - ARMV6_PERFCTR_DCACHE_ACCESS = 0xA, - ARMV6_PERFCTR_DCACHE_MISS = 0xB, - ARMV6_PERFCTR_DCACHE_WBACK = 0xC, - ARMV6_PERFCTR_SW_PC_CHANGE = 0xD, - ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF, - ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10, - ARMV6_PERFCTR_LSU_FULL_STALL = 0x11, - ARMV6_PERFCTR_WBUF_DRAINED = 0x12, - ARMV6_PERFCTR_CPU_CYCLES = 0xFF, - ARMV6_PERFCTR_NOP = 0x20, -}; - -enum armv6_counters { - ARMV6_CYCLE_COUNTER = 1, - ARMV6_COUNTER0, - ARMV6_COUNTER1, -}; - -/* - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, -}; - -static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - /* - * The performance counters don't differentiate between read - * and write accesses/misses so this isn't strictly correct, - * but it's the best we can do. Writes and reads get - * combined. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * The ARM performance counters can count micro DTLB misses, - * micro ITLB misses and main TLB misses. There isn't an event - * for TLB misses, so use the micro misses here and if users - * want the main TLB misses they can use a raw counter. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -enum armv6mpcore_perf_types { - ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0, - ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1, - ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2, - ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3, - ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4, - ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5, - ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6, - ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7, - ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8, - ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA, - ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB, - ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC, - ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD, - ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE, - ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF, - ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10, - ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11, - ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12, - ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13, - ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF, -}; - -/* - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, -}; - -static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = - ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS, - [C(RESULT_MISS)] = - ARMV6MPCORE_PERFCTR_DCACHE_RDMISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = - ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS, - [C(RESULT_MISS)] = - ARMV6MPCORE_PERFCTR_DCACHE_WRMISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * The ARM performance counters can count micro DTLB misses, - * micro ITLB misses and main TLB misses. There isn't an event - * for TLB misses, so use the micro misses here and if users - * want the main TLB misses they can use a raw counter. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -static inline unsigned long -armv6_pmcr_read(void) -{ - u32 val; - asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val)); - return val; -} - -static inline void -armv6_pmcr_write(unsigned long val) -{ - asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val)); -} - -#define ARMV6_PMCR_ENABLE (1 << 0) -#define ARMV6_PMCR_CTR01_RESET (1 << 1) -#define ARMV6_PMCR_CCOUNT_RESET (1 << 2) -#define ARMV6_PMCR_CCOUNT_DIV (1 << 3) -#define ARMV6_PMCR_COUNT0_IEN (1 << 4) -#define ARMV6_PMCR_COUNT1_IEN (1 << 5) -#define ARMV6_PMCR_CCOUNT_IEN (1 << 6) -#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8) -#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9) -#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10) -#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20 -#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT) -#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12 -#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT) - -#define ARMV6_PMCR_OVERFLOWED_MASK \ - (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \ - ARMV6_PMCR_CCOUNT_OVERFLOW) - -static inline int -armv6_pmcr_has_overflowed(unsigned long pmcr) -{ - return (pmcr & ARMV6_PMCR_OVERFLOWED_MASK); -} - -static inline int -armv6_pmcr_counter_has_overflowed(unsigned long pmcr, - enum armv6_counters counter) -{ - int ret = 0; - - if (ARMV6_CYCLE_COUNTER == counter) - ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW; - else if (ARMV6_COUNTER0 == counter) - ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW; - else if (ARMV6_COUNTER1 == counter) - ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW; - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - - return ret; -} - -static inline u32 -armv6pmu_read_counter(int counter) -{ - unsigned long value = 0; - - if (ARMV6_CYCLE_COUNTER == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value)); - else if (ARMV6_COUNTER0 == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value)); - else if (ARMV6_COUNTER1 == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value)); - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - - return value; -} - -static inline void -armv6pmu_write_counter(int counter, - u32 value) -{ - if (ARMV6_CYCLE_COUNTER == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value)); - else if (ARMV6_COUNTER0 == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value)); - else if (ARMV6_COUNTER1 == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value)); - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); -} - -void -armv6pmu_enable_event(struct hw_perf_event *hwc, - int idx) -{ - unsigned long val, mask, evt, flags; - - if (ARMV6_CYCLE_COUNTER == idx) { - mask = 0; - evt = ARMV6_PMCR_CCOUNT_IEN; - } else if (ARMV6_COUNTER0 == idx) { - mask = ARMV6_PMCR_EVT_COUNT0_MASK; - evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) | - ARMV6_PMCR_COUNT0_IEN; - } else if (ARMV6_COUNTER1 == idx) { - mask = ARMV6_PMCR_EVT_COUNT1_MASK; - evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) | - ARMV6_PMCR_COUNT1_IEN; - } else { - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - /* - * Mask out the current event and set the counter to count the event - * that we're interested in. - */ - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~mask; - val |= evt; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static irqreturn_t -armv6pmu_handle_irq(int irq_num, - void *dev) -{ - unsigned long pmcr = armv6_pmcr_read(); - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - if (!armv6_pmcr_has_overflowed(pmcr)) - return IRQ_NONE; - - regs = get_irq_regs(); - - /* - * The interrupts are cleared by writing the overflow flags back to - * the control register. All of the other bits don't have any effect - * if they are rewritten, so write the whole value back. - */ - armv6_pmcr_write(pmcr); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - /* - * We have a single interrupt for all counters. Check that - * each counter has overflowed before we process it. - */ - if (!armv6_pmcr_counter_has_overflowed(pmcr, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - /* - * Handle the pending perf events. - * - * Note: this call *must* be run with interrupts disabled. For - * platforms that can have the PMU interrupts raised as an NMI, this - * will not work. - */ - irq_work_run(); - - return IRQ_HANDLED; -} - -static void -armv6pmu_start(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val |= ARMV6_PMCR_ENABLE; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -armv6pmu_stop(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~ARMV6_PMCR_ENABLE; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static int -armv6pmu_get_event_idx(struct cpu_hw_events *cpuc, - struct hw_perf_event *event) -{ - /* Always place a cycle counter into the cycle counter. */ - if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) { - if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return ARMV6_CYCLE_COUNTER; - } else { - /* - * For anything other than a cycle counter, try and use - * counter0 and counter1. - */ - if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) { - return ARMV6_COUNTER1; - } - - if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) { - return ARMV6_COUNTER0; - } - - /* The counters are all in use. */ - return -EAGAIN; - } -} - -static void -armv6pmu_disable_event(struct hw_perf_event *hwc, - int idx) -{ - unsigned long val, mask, evt, flags; - - if (ARMV6_CYCLE_COUNTER == idx) { - mask = ARMV6_PMCR_CCOUNT_IEN; - evt = 0; - } else if (ARMV6_COUNTER0 == idx) { - mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK; - evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT; - } else if (ARMV6_COUNTER1 == idx) { - mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK; - evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT; - } else { - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - /* - * Mask out the current event and set the counter to count the number - * of ETM bus signal assertion cycles. The external reporting should - * be disabled and so this should never increment. - */ - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~mask; - val |= evt; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, - int idx) -{ - unsigned long val, mask, flags, evt = 0; - - if (ARMV6_CYCLE_COUNTER == idx) { - mask = ARMV6_PMCR_CCOUNT_IEN; - } else if (ARMV6_COUNTER0 == idx) { - mask = ARMV6_PMCR_COUNT0_IEN; - } else if (ARMV6_COUNTER1 == idx) { - mask = ARMV6_PMCR_COUNT1_IEN; - } else { - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - /* - * Unlike UP ARMv6, we don't have a way of stopping the counters. We - * simply disable the interrupt reporting. - */ - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~mask; - val |= evt; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static const struct arm_pmu armv6pmu = { - .id = ARM_PERF_PMU_ID_V6, - .name = "v6", - .handle_irq = armv6pmu_handle_irq, - .enable = armv6pmu_enable_event, - .disable = armv6pmu_disable_event, - .read_counter = armv6pmu_read_counter, - .write_counter = armv6pmu_write_counter, - .get_event_idx = armv6pmu_get_event_idx, - .start = armv6pmu_start, - .stop = armv6pmu_stop, - .cache_map = &armv6_perf_cache_map, - .event_map = &armv6_perf_map, - .raw_event_mask = 0xFF, - .num_events = 3, - .max_period = (1LLU << 32) - 1, -}; - -const struct arm_pmu *__init armv6pmu_init(void) -{ - return &armv6pmu; -} - -/* - * ARMv6mpcore is almost identical to single core ARMv6 with the exception - * that some of the events have different enumerations and that there is no - * *hack* to stop the programmable counters. To stop the counters we simply - * disable the interrupt reporting and update the event. When unthrottling we - * reset the period and enable the interrupt reporting. - */ -static const struct arm_pmu armv6mpcore_pmu = { - .id = ARM_PERF_PMU_ID_V6MP, - .name = "v6mpcore", - .handle_irq = armv6pmu_handle_irq, - .enable = armv6pmu_enable_event, - .disable = armv6mpcore_pmu_disable_event, - .read_counter = armv6pmu_read_counter, - .write_counter = armv6pmu_write_counter, - .get_event_idx = armv6pmu_get_event_idx, - .start = armv6pmu_start, - .stop = armv6pmu_stop, - .cache_map = &armv6mpcore_perf_cache_map, - .event_map = &armv6mpcore_perf_map, - .raw_event_mask = 0xFF, - .num_events = 3, - .max_period = (1LLU << 32) - 1, -}; - -const struct arm_pmu *__init armv6mpcore_pmu_init(void) -{ - return &armv6mpcore_pmu; -} - -/* - * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code. - * - * Copied from ARMv6 code, with the low level code inspired - * by the ARMv7 Oprofile code. - * - * Cortex-A8 has up to 4 configurable performance counters and - * a single cycle counter. - * Cortex-A9 has up to 31 configurable performance counters and - * a single cycle counter. - * - * All counters can be enabled/disabled and IRQ masked separately. The cycle - * counter and all 4 performance counters together can be reset separately. - */ - -/* Common ARMv7 event types */ -enum armv7_perf_types { - ARMV7_PERFCTR_PMNC_SW_INCR = 0x00, - ARMV7_PERFCTR_IFETCH_MISS = 0x01, - ARMV7_PERFCTR_ITLB_MISS = 0x02, - ARMV7_PERFCTR_DCACHE_REFILL = 0x03, - ARMV7_PERFCTR_DCACHE_ACCESS = 0x04, - ARMV7_PERFCTR_DTLB_REFILL = 0x05, - ARMV7_PERFCTR_DREAD = 0x06, - ARMV7_PERFCTR_DWRITE = 0x07, - - ARMV7_PERFCTR_EXC_TAKEN = 0x09, - ARMV7_PERFCTR_EXC_EXECUTED = 0x0A, - ARMV7_PERFCTR_CID_WRITE = 0x0B, - /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS. - * It counts: - * - all branch instructions, - * - instructions that explicitly write the PC, - * - exception generating instructions. - */ - ARMV7_PERFCTR_PC_WRITE = 0x0C, - ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D, - ARMV7_PERFCTR_UNALIGNED_ACCESS = 0x0F, - ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10, - ARMV7_PERFCTR_CLOCK_CYCLES = 0x11, - - ARMV7_PERFCTR_PC_BRANCH_MIS_USED = 0x12, - - ARMV7_PERFCTR_CPU_CYCLES = 0xFF -}; - -/* ARMv7 Cortex-A8 specific event types */ -enum armv7_a8_perf_types { - ARMV7_PERFCTR_INSTR_EXECUTED = 0x08, - - ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E, - - ARMV7_PERFCTR_WRITE_BUFFER_FULL = 0x40, - ARMV7_PERFCTR_L2_STORE_MERGED = 0x41, - ARMV7_PERFCTR_L2_STORE_BUFF = 0x42, - ARMV7_PERFCTR_L2_ACCESS = 0x43, - ARMV7_PERFCTR_L2_CACH_MISS = 0x44, - ARMV7_PERFCTR_AXI_READ_CYCLES = 0x45, - ARMV7_PERFCTR_AXI_WRITE_CYCLES = 0x46, - ARMV7_PERFCTR_MEMORY_REPLAY = 0x47, - ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY = 0x48, - ARMV7_PERFCTR_L1_DATA_MISS = 0x49, - ARMV7_PERFCTR_L1_INST_MISS = 0x4A, - ARMV7_PERFCTR_L1_DATA_COLORING = 0x4B, - ARMV7_PERFCTR_L1_NEON_DATA = 0x4C, - ARMV7_PERFCTR_L1_NEON_CACH_DATA = 0x4D, - ARMV7_PERFCTR_L2_NEON = 0x4E, - ARMV7_PERFCTR_L2_NEON_HIT = 0x4F, - ARMV7_PERFCTR_L1_INST = 0x50, - ARMV7_PERFCTR_PC_RETURN_MIS_PRED = 0x51, - ARMV7_PERFCTR_PC_BRANCH_FAILED = 0x52, - ARMV7_PERFCTR_PC_BRANCH_TAKEN = 0x53, - ARMV7_PERFCTR_PC_BRANCH_EXECUTED = 0x54, - ARMV7_PERFCTR_OP_EXECUTED = 0x55, - ARMV7_PERFCTR_CYCLES_INST_STALL = 0x56, - ARMV7_PERFCTR_CYCLES_INST = 0x57, - ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL = 0x58, - ARMV7_PERFCTR_CYCLES_NEON_INST_STALL = 0x59, - ARMV7_PERFCTR_NEON_CYCLES = 0x5A, - - ARMV7_PERFCTR_PMU0_EVENTS = 0x70, - ARMV7_PERFCTR_PMU1_EVENTS = 0x71, - ARMV7_PERFCTR_PMU_EVENTS = 0x72, -}; - -/* ARMv7 Cortex-A9 specific event types */ -enum armv7_a9_perf_types { - ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC = 0x40, - ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC = 0x41, - ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC = 0x42, - - ARMV7_PERFCTR_COHERENT_LINE_MISS = 0x50, - ARMV7_PERFCTR_COHERENT_LINE_HIT = 0x51, - - ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES = 0x60, - ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES = 0x61, - ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62, - ARMV7_PERFCTR_STREX_EXECUTED_PASSED = 0x63, - ARMV7_PERFCTR_STREX_EXECUTED_FAILED = 0x64, - ARMV7_PERFCTR_DATA_EVICTION = 0x65, - ARMV7_PERFCTR_ISSUE_STAGE_NO_INST = 0x66, - ARMV7_PERFCTR_ISSUE_STAGE_EMPTY = 0x67, - ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE = 0x68, - - ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E, - - ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST = 0x70, - ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71, - ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST = 0x72, - ARMV7_PERFCTR_FP_EXECUTED_INST = 0x73, - ARMV7_PERFCTR_NEON_EXECUTED_INST = 0x74, - - ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80, - ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES = 0x81, - ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES = 0x82, - ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES = 0x83, - ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84, - ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85, - ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES = 0x86, - - ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES = 0x8A, - ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B, - - ARMV7_PERFCTR_ISB_INST = 0x90, - ARMV7_PERFCTR_DSB_INST = 0x91, - ARMV7_PERFCTR_DMB_INST = 0x92, - ARMV7_PERFCTR_EXT_INTERRUPTS = 0x93, - - ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0, - ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1, - ARMV7_PERFCTR_PLE_FIFO_FLUSH = 0xA2, - ARMV7_PERFCTR_PLE_RQST_COMPLETED = 0xA3, - ARMV7_PERFCTR_PLE_FIFO_OVERFLOW = 0xA4, - ARMV7_PERFCTR_PLE_RQST_PROG = 0xA5 -}; - -/* - * Cortex-A8 HW events mapping - * - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, -}; - -static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - /* - * The performance counters don't differentiate between read - * and write accesses/misses so this isn't strictly correct, - * but it's the best we can do. Writes and reads get - * combined. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * Only ITLB misses and DTLB refills are supported. - * If users want the DTLB refills misses a raw counter - * must be used. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, - [C(RESULT_MISS)] - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, - [C(RESULT_MISS)] - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -/* - * Cortex-A9 HW events mapping - */ -static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = - ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, -}; - -static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - /* - * The performance counters don't differentiate between read - * and write accesses/misses so this isn't strictly correct, - * but it's the best we can do. Writes and reads get - * combined. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * Only ITLB misses and DTLB refills are supported. - * If users want the DTLB refills misses a raw counter - * must be used. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, - [C(RESULT_MISS)] - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, - [C(RESULT_MISS)] - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -/* - * Perf Events counters - */ -enum armv7_counters { - ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */ - ARMV7_COUNTER0 = 2, /* First event counter */ -}; - -/* - * The cycle counter is ARMV7_CYCLE_COUNTER. - * The first event counter is ARMV7_COUNTER0. - * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1). - */ -#define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1) - -/* - * ARMv7 low level PMNC access - */ - -/* - * Per-CPU PMNC: config reg - */ -#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */ -#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */ -#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */ -#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */ -#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */ -#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ -#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */ -#define ARMV7_PMNC_N_MASK 0x1f -#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */ - -/* - * Available counters - */ -#define ARMV7_CNT0 0 /* First event counter */ -#define ARMV7_CCNT 31 /* Cycle counter */ - -/* Perf Event to low level counters mapping */ -#define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0) - -/* - * CNTENS: counters enable reg - */ -#define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_CNTENS_C (1 << ARMV7_CCNT) - -/* - * CNTENC: counters disable reg - */ -#define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_CNTENC_C (1 << ARMV7_CCNT) - -/* - * INTENS: counters overflow interrupt enable reg - */ -#define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_INTENS_C (1 << ARMV7_CCNT) - -/* - * INTENC: counters overflow interrupt disable reg - */ -#define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_INTENC_C (1 << ARMV7_CCNT) - -/* - * EVTSEL: Event selection reg - */ -#define ARMV7_EVTSEL_MASK 0xff /* Mask for writable bits */ - -/* - * SELECT: Counter selection reg - */ -#define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */ - -/* - * FLAG: counters overflow flag status reg - */ -#define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_FLAG_C (1 << ARMV7_CCNT) -#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */ -#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK - -static inline unsigned long armv7_pmnc_read(void) -{ - u32 val; - asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val)); - return val; -} - -static inline void armv7_pmnc_write(unsigned long val) -{ - val &= ARMV7_PMNC_MASK; - asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val)); -} - -static inline int armv7_pmnc_has_overflowed(unsigned long pmnc) -{ - return pmnc & ARMV7_OVERFLOWED_MASK; -} - -static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc, - enum armv7_counters counter) -{ - int ret = 0; - - if (counter == ARMV7_CYCLE_COUNTER) - ret = pmnc & ARMV7_FLAG_C; - else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST)) - ret = pmnc & ARMV7_FLAG_P(counter); - else - pr_err("CPU%u checking wrong counter %d overflow status\n", - smp_processor_id(), counter); - - return ret; -} - -static inline int armv7_pmnc_select_counter(unsigned int idx) -{ - u32 val; - - if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) { - pr_err("CPU%u selecting wrong PMNC counter" - " %d\n", smp_processor_id(), idx); - return -1; - } - - val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK; - asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val)); - - return idx; -} - -static inline u32 armv7pmu_read_counter(int idx) -{ - unsigned long value = 0; - - if (idx == ARMV7_CYCLE_COUNTER) - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); - else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { - if (armv7_pmnc_select_counter(idx) == idx) - asm volatile("mrc p15, 0, %0, c9, c13, 2" - : "=r" (value)); - } else - pr_err("CPU%u reading wrong counter %d\n", - smp_processor_id(), idx); - - return value; -} - -static inline void armv7pmu_write_counter(int idx, u32 value) -{ - if (idx == ARMV7_CYCLE_COUNTER) - asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); - else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { - if (armv7_pmnc_select_counter(idx) == idx) - asm volatile("mcr p15, 0, %0, c9, c13, 2" - : : "r" (value)); - } else - pr_err("CPU%u writing wrong counter %d\n", - smp_processor_id(), idx); -} - -static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val) -{ - if (armv7_pmnc_select_counter(idx) == idx) { - val &= ARMV7_EVTSEL_MASK; - asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); - } -} - -static inline u32 armv7_pmnc_enable_counter(unsigned int idx) -{ - u32 val; - - if ((idx != ARMV7_CYCLE_COUNTER) && - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { - pr_err("CPU%u enabling wrong PMNC counter" - " %d\n", smp_processor_id(), idx); - return -1; - } - - if (idx == ARMV7_CYCLE_COUNTER) - val = ARMV7_CNTENS_C; - else - val = ARMV7_CNTENS_P(idx); - - asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val)); - - return idx; -} - -static inline u32 armv7_pmnc_disable_counter(unsigned int idx) -{ - u32 val; - - - if ((idx != ARMV7_CYCLE_COUNTER) && - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { - pr_err("CPU%u disabling wrong PMNC counter" - " %d\n", smp_processor_id(), idx); - return -1; - } - - if (idx == ARMV7_CYCLE_COUNTER) - val = ARMV7_CNTENC_C; - else - val = ARMV7_CNTENC_P(idx); - - asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val)); - - return idx; -} - -static inline u32 armv7_pmnc_enable_intens(unsigned int idx) -{ - u32 val; - - if ((idx != ARMV7_CYCLE_COUNTER) && - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { - pr_err("CPU%u enabling wrong PMNC counter" - " interrupt enable %d\n", smp_processor_id(), idx); - return -1; - } - - if (idx == ARMV7_CYCLE_COUNTER) - val = ARMV7_INTENS_C; - else - val = ARMV7_INTENS_P(idx); - - asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val)); - - return idx; -} - -static inline u32 armv7_pmnc_disable_intens(unsigned int idx) -{ - u32 val; - - if ((idx != ARMV7_CYCLE_COUNTER) && - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { - pr_err("CPU%u disabling wrong PMNC counter" - " interrupt enable %d\n", smp_processor_id(), idx); - return -1; - } - - if (idx == ARMV7_CYCLE_COUNTER) - val = ARMV7_INTENC_C; - else - val = ARMV7_INTENC_P(idx); - - asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val)); - - return idx; -} - -static inline u32 armv7_pmnc_getreset_flags(void) -{ - u32 val; - - /* Read */ - asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); - - /* Write to clear flags */ - val &= ARMV7_FLAG_MASK; - asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val)); - - return val; -} - -#ifdef DEBUG -static void armv7_pmnc_dump_regs(void) -{ - u32 val; - unsigned int cnt; - - printk(KERN_INFO "PMNC registers dump:\n"); - - asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); - printk(KERN_INFO "PMNC =0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); - printk(KERN_INFO "CNTENS=0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val)); - printk(KERN_INFO "INTENS=0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); - printk(KERN_INFO "FLAGS =0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val)); - printk(KERN_INFO "SELECT=0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); - printk(KERN_INFO "CCNT =0x%08x\n", val); - - for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) { - armv7_pmnc_select_counter(cnt); - asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); - printk(KERN_INFO "CNT[%d] count =0x%08x\n", - cnt-ARMV7_EVENT_CNT_TO_CNTx, val); - asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); - printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n", - cnt-ARMV7_EVENT_CNT_TO_CNTx, val); - } -} -#endif - -void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long flags; - - /* - * Enable counter and interrupt, and set the counter to count - * the event that we're interested in. - */ - spin_lock_irqsave(&pmu_lock, flags); - - /* - * Disable counter - */ - armv7_pmnc_disable_counter(idx); - - /* - * Set event (if destined for PMNx counters) - * We don't need to set the event if it's a cycle count - */ - if (idx != ARMV7_CYCLE_COUNTER) - armv7_pmnc_write_evtsel(idx, hwc->config_base); - - /* - * Enable interrupt for this counter - */ - armv7_pmnc_enable_intens(idx); - - /* - * Enable counter - */ - armv7_pmnc_enable_counter(idx); - - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long flags; - - /* - * Disable counter and interrupt - */ - spin_lock_irqsave(&pmu_lock, flags); - - /* - * Disable counter - */ - armv7_pmnc_disable_counter(idx); - - /* - * Disable interrupt for this counter - */ - armv7_pmnc_disable_intens(idx); - - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) -{ - unsigned long pmnc; - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - /* - * Get and reset the IRQ flags - */ - pmnc = armv7_pmnc_getreset_flags(); - - /* - * Did an overflow occur? - */ - if (!armv7_pmnc_has_overflowed(pmnc)) - return IRQ_NONE; - - /* - * Handle the counter(s) overflow(s) - */ - regs = get_irq_regs(); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - /* - * We have a single interrupt for all counters. Check that - * each counter has overflowed before we process it. - */ - if (!armv7_pmnc_counter_has_overflowed(pmnc, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - /* - * Handle the pending perf events. - * - * Note: this call *must* be run with interrupts disabled. For - * platforms that can have the PMU interrupts raised as an NMI, this - * will not work. - */ - irq_work_run(); - - return IRQ_HANDLED; -} - -static void armv7pmu_start(void) -{ - unsigned long flags; - - spin_lock_irqsave(&pmu_lock, flags); - /* Enable all counters */ - armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void armv7pmu_stop(void) -{ - unsigned long flags; - - spin_lock_irqsave(&pmu_lock, flags); - /* Disable all counters */ - armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc, - struct hw_perf_event *event) -{ - int idx; - - /* Always place a cycle counter into the cycle counter. */ - if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) { - if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return ARMV7_CYCLE_COUNTER; - } else { - /* - * For anything other than a cycle counter, try and use - * the events counters - */ - for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) { - if (!test_and_set_bit(idx, cpuc->used_mask)) - return idx; - } - - /* The counters are all in use. */ - return -EAGAIN; - } -} - -static struct arm_pmu armv7pmu = { - .handle_irq = armv7pmu_handle_irq, - .enable = armv7pmu_enable_event, - .disable = armv7pmu_disable_event, - .read_counter = armv7pmu_read_counter, - .write_counter = armv7pmu_write_counter, - .get_event_idx = armv7pmu_get_event_idx, - .start = armv7pmu_start, - .stop = armv7pmu_stop, - .raw_event_mask = 0xFF, - .max_period = (1LLU << 32) - 1, -}; - -static u32 __init armv7_reset_read_pmnc(void) -{ - u32 nb_cnt; - - /* Initialize & Reset PMNC: C and P bits */ - armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C); - - /* Read the nb of CNTx counters supported from PMNC */ - nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; - - /* Add the CPU cycles counter and return */ - return nb_cnt + 1; -} - -const struct arm_pmu *__init armv7_a8_pmu_init(void) -{ - armv7pmu.id = ARM_PERF_PMU_ID_CA8; - armv7pmu.name = "ARMv7 Cortex-A8"; - armv7pmu.cache_map = &armv7_a8_perf_cache_map; - armv7pmu.event_map = &armv7_a8_perf_map; - armv7pmu.num_events = armv7_reset_read_pmnc(); - return &armv7pmu; -} - -const struct arm_pmu *__init armv7_a9_pmu_init(void) -{ - armv7pmu.id = ARM_PERF_PMU_ID_CA9; - armv7pmu.name = "ARMv7 Cortex-A9"; - armv7pmu.cache_map = &armv7_a9_perf_cache_map; - armv7pmu.event_map = &armv7_a9_perf_map; - armv7pmu.num_events = armv7_reset_read_pmnc(); - return &armv7pmu; -} - - -/* - * ARMv5 [xscale] Performance counter handling code. - * - * Based on xscale OProfile code. - * - * There are two variants of the xscale PMU that we support: - * - xscale1pmu: 2 event counters and a cycle counter - * - xscale2pmu: 4 event counters and a cycle counter - * The two variants share event definitions, but have different - * PMU structures. - */ - -enum xscale_perf_types { - XSCALE_PERFCTR_ICACHE_MISS = 0x00, - XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, - XSCALE_PERFCTR_DATA_STALL = 0x02, - XSCALE_PERFCTR_ITLB_MISS = 0x03, - XSCALE_PERFCTR_DTLB_MISS = 0x04, - XSCALE_PERFCTR_BRANCH = 0x05, - XSCALE_PERFCTR_BRANCH_MISS = 0x06, - XSCALE_PERFCTR_INSTRUCTION = 0x07, - XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08, - XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09, - XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A, - XSCALE_PERFCTR_DCACHE_MISS = 0x0B, - XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C, - XSCALE_PERFCTR_PC_CHANGED = 0x0D, - XSCALE_PERFCTR_BCU_REQUEST = 0x10, - XSCALE_PERFCTR_BCU_FULL = 0x11, - XSCALE_PERFCTR_BCU_DRAIN = 0x12, - XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14, - XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15, - XSCALE_PERFCTR_RMW = 0x16, - /* XSCALE_PERFCTR_CCNT is not hardware defined */ - XSCALE_PERFCTR_CCNT = 0xFE, - XSCALE_PERFCTR_UNUSED = 0xFF, -}; - -enum xscale_counters { - XSCALE_CYCLE_COUNTER = 1, - XSCALE_COUNTER0, - XSCALE_COUNTER1, - XSCALE_COUNTER2, - XSCALE_COUNTER3, -}; - -static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT, - [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH, - [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, -}; - -static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -#define XSCALE_PMU_ENABLE 0x001 -#define XSCALE_PMN_RESET 0x002 -#define XSCALE_CCNT_RESET 0x004 -#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) -#define XSCALE_PMU_CNT64 0x008 - -#define XSCALE1_OVERFLOWED_MASK 0x700 -#define XSCALE1_CCOUNT_OVERFLOW 0x400 -#define XSCALE1_COUNT0_OVERFLOW 0x100 -#define XSCALE1_COUNT1_OVERFLOW 0x200 -#define XSCALE1_CCOUNT_INT_EN 0x040 -#define XSCALE1_COUNT0_INT_EN 0x010 -#define XSCALE1_COUNT1_INT_EN 0x020 -#define XSCALE1_COUNT0_EVT_SHFT 12 -#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT) -#define XSCALE1_COUNT1_EVT_SHFT 20 -#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT) - -static inline u32 -xscale1pmu_read_pmnc(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val)); - return val; -} - -static inline void -xscale1pmu_write_pmnc(u32 val) -{ - /* upper 4bits and 7, 11 are write-as-0 */ - val &= 0xffff77f; - asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val)); -} - -static inline int -xscale1_pmnc_counter_has_overflowed(unsigned long pmnc, - enum xscale_counters counter) -{ - int ret = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - ret = pmnc & XSCALE1_CCOUNT_OVERFLOW; - break; - case XSCALE_COUNTER0: - ret = pmnc & XSCALE1_COUNT0_OVERFLOW; - break; - case XSCALE_COUNTER1: - ret = pmnc & XSCALE1_COUNT1_OVERFLOW; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - } - - return ret; -} - -static irqreturn_t -xscale1pmu_handle_irq(int irq_num, void *dev) -{ - unsigned long pmnc; - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - /* - * NOTE: there's an A stepping erratum that states if an overflow - * bit already exists and another occurs, the previous - * Overflow bit gets cleared. There's no workaround. - * Fixed in B stepping or later. - */ - pmnc = xscale1pmu_read_pmnc(); - - /* - * Write the value back to clear the overflow flags. Overflow - * flags remain in pmnc for use below. We also disable the PMU - * while we process the interrupt. - */ - xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); - - if (!(pmnc & XSCALE1_OVERFLOWED_MASK)) - return IRQ_NONE; - - regs = get_irq_regs(); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - irq_work_run(); - - /* - * Re-enable the PMU. - */ - pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE; - xscale1pmu_write_pmnc(pmnc); - - return IRQ_HANDLED; -} - -static void -xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long val, mask, evt, flags; - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - mask = 0; - evt = XSCALE1_CCOUNT_INT_EN; - break; - case XSCALE_COUNTER0: - mask = XSCALE1_COUNT0_EVT_MASK; - evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) | - XSCALE1_COUNT0_INT_EN; - break; - case XSCALE_COUNTER1: - mask = XSCALE1_COUNT1_EVT_MASK; - evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) | - XSCALE1_COUNT1_INT_EN; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale1pmu_read_pmnc(); - val &= ~mask; - val |= evt; - xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long val, mask, evt, flags; - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - mask = XSCALE1_CCOUNT_INT_EN; - evt = 0; - break; - case XSCALE_COUNTER0: - mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK; - evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT; - break; - case XSCALE_COUNTER1: - mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK; - evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale1pmu_read_pmnc(); - val &= ~mask; - val |= evt; - xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static int -xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc, - struct hw_perf_event *event) -{ - if (XSCALE_PERFCTR_CCNT == event->config_base) { - if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return XSCALE_CYCLE_COUNTER; - } else { - if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) { - return XSCALE_COUNTER1; - } - - if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) { - return XSCALE_COUNTER0; - } - - return -EAGAIN; - } -} - -static void -xscale1pmu_start(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale1pmu_read_pmnc(); - val |= XSCALE_PMU_ENABLE; - xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -xscale1pmu_stop(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale1pmu_read_pmnc(); - val &= ~XSCALE_PMU_ENABLE; - xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static inline u32 -xscale1pmu_read_counter(int counter) -{ - u32 val = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val)); - break; - } - - return val; -} - -static inline void -xscale1pmu_write_counter(int counter, u32 val) -{ - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val)); - break; - } -} - -static const struct arm_pmu xscale1pmu = { - .id = ARM_PERF_PMU_ID_XSCALE1, - .name = "xscale1", - .handle_irq = xscale1pmu_handle_irq, - .enable = xscale1pmu_enable_event, - .disable = xscale1pmu_disable_event, - .read_counter = xscale1pmu_read_counter, - .write_counter = xscale1pmu_write_counter, - .get_event_idx = xscale1pmu_get_event_idx, - .start = xscale1pmu_start, - .stop = xscale1pmu_stop, - .cache_map = &xscale_perf_cache_map, - .event_map = &xscale_perf_map, - .raw_event_mask = 0xFF, - .num_events = 3, - .max_period = (1LLU << 32) - 1, -}; - -const struct arm_pmu *__init xscale1pmu_init(void) -{ - return &xscale1pmu; -} - -#define XSCALE2_OVERFLOWED_MASK 0x01f -#define XSCALE2_CCOUNT_OVERFLOW 0x001 -#define XSCALE2_COUNT0_OVERFLOW 0x002 -#define XSCALE2_COUNT1_OVERFLOW 0x004 -#define XSCALE2_COUNT2_OVERFLOW 0x008 -#define XSCALE2_COUNT3_OVERFLOW 0x010 -#define XSCALE2_CCOUNT_INT_EN 0x001 -#define XSCALE2_COUNT0_INT_EN 0x002 -#define XSCALE2_COUNT1_INT_EN 0x004 -#define XSCALE2_COUNT2_INT_EN 0x008 -#define XSCALE2_COUNT3_INT_EN 0x010 -#define XSCALE2_COUNT0_EVT_SHFT 0 -#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT) -#define XSCALE2_COUNT1_EVT_SHFT 8 -#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT) -#define XSCALE2_COUNT2_EVT_SHFT 16 -#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT) -#define XSCALE2_COUNT3_EVT_SHFT 24 -#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT) - -static inline u32 -xscale2pmu_read_pmnc(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val)); - /* bits 1-2 and 4-23 are read-unpredictable */ - return val & 0xff000009; -} - -static inline void -xscale2pmu_write_pmnc(u32 val) -{ - /* bits 4-23 are write-as-0, 24-31 are write ignored */ - val &= 0xf; - asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val)); -} - -static inline u32 -xscale2pmu_read_overflow_flags(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val)); - return val; -} - -static inline void -xscale2pmu_write_overflow_flags(u32 val) -{ - asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val)); -} - -static inline u32 -xscale2pmu_read_event_select(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val)); - return val; -} - -static inline void -xscale2pmu_write_event_select(u32 val) -{ - asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val)); -} - -static inline u32 -xscale2pmu_read_int_enable(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val)); - return val; -} - -static void -xscale2pmu_write_int_enable(u32 val) -{ - asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val)); -} - -static inline int -xscale2_pmnc_counter_has_overflowed(unsigned long of_flags, - enum xscale_counters counter) -{ - int ret = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - ret = of_flags & XSCALE2_CCOUNT_OVERFLOW; - break; - case XSCALE_COUNTER0: - ret = of_flags & XSCALE2_COUNT0_OVERFLOW; - break; - case XSCALE_COUNTER1: - ret = of_flags & XSCALE2_COUNT1_OVERFLOW; - break; - case XSCALE_COUNTER2: - ret = of_flags & XSCALE2_COUNT2_OVERFLOW; - break; - case XSCALE_COUNTER3: - ret = of_flags & XSCALE2_COUNT3_OVERFLOW; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - } - - return ret; -} - -static irqreturn_t -xscale2pmu_handle_irq(int irq_num, void *dev) -{ - unsigned long pmnc, of_flags; - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - /* Disable the PMU. */ - pmnc = xscale2pmu_read_pmnc(); - xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); - - /* Check the overflow flag register. */ - of_flags = xscale2pmu_read_overflow_flags(); - if (!(of_flags & XSCALE2_OVERFLOWED_MASK)) - return IRQ_NONE; - - /* Clear the overflow bits. */ - xscale2pmu_write_overflow_flags(of_flags); - - regs = get_irq_regs(); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - irq_work_run(); - - /* - * Re-enable the PMU. - */ - pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE; - xscale2pmu_write_pmnc(pmnc); - - return IRQ_HANDLED; -} - -static void -xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long flags, ien, evtsel; - - ien = xscale2pmu_read_int_enable(); - evtsel = xscale2pmu_read_event_select(); - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - ien |= XSCALE2_CCOUNT_INT_EN; - break; - case XSCALE_COUNTER0: - ien |= XSCALE2_COUNT0_INT_EN; - evtsel &= ~XSCALE2_COUNT0_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT; - break; - case XSCALE_COUNTER1: - ien |= XSCALE2_COUNT1_INT_EN; - evtsel &= ~XSCALE2_COUNT1_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT; - break; - case XSCALE_COUNTER2: - ien |= XSCALE2_COUNT2_INT_EN; - evtsel &= ~XSCALE2_COUNT2_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT; - break; - case XSCALE_COUNTER3: - ien |= XSCALE2_COUNT3_INT_EN; - evtsel &= ~XSCALE2_COUNT3_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - spin_lock_irqsave(&pmu_lock, flags); - xscale2pmu_write_event_select(evtsel); - xscale2pmu_write_int_enable(ien); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long flags, ien, evtsel; - - ien = xscale2pmu_read_int_enable(); - evtsel = xscale2pmu_read_event_select(); - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - ien &= ~XSCALE2_CCOUNT_INT_EN; - break; - case XSCALE_COUNTER0: - ien &= ~XSCALE2_COUNT0_INT_EN; - evtsel &= ~XSCALE2_COUNT0_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT; - break; - case XSCALE_COUNTER1: - ien &= ~XSCALE2_COUNT1_INT_EN; - evtsel &= ~XSCALE2_COUNT1_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT; - break; - case XSCALE_COUNTER2: - ien &= ~XSCALE2_COUNT2_INT_EN; - evtsel &= ~XSCALE2_COUNT2_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT; - break; - case XSCALE_COUNTER3: - ien &= ~XSCALE2_COUNT3_INT_EN; - evtsel &= ~XSCALE2_COUNT3_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - spin_lock_irqsave(&pmu_lock, flags); - xscale2pmu_write_event_select(evtsel); - xscale2pmu_write_int_enable(ien); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static int -xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc, - struct hw_perf_event *event) -{ - int idx = xscale1pmu_get_event_idx(cpuc, event); - if (idx >= 0) - goto out; - - if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask)) - idx = XSCALE_COUNTER3; - else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask)) - idx = XSCALE_COUNTER2; -out: - return idx; -} - -static void -xscale2pmu_start(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; - val |= XSCALE_PMU_ENABLE; - xscale2pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -xscale2pmu_stop(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale2pmu_read_pmnc(); - val &= ~XSCALE_PMU_ENABLE; - xscale2pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static inline u32 -xscale2pmu_read_counter(int counter) -{ - u32 val = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val)); - break; - case XSCALE_COUNTER2: - asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val)); - break; - case XSCALE_COUNTER3: - asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val)); - break; - } - - return val; -} - -static inline void -xscale2pmu_write_counter(int counter, u32 val) -{ - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val)); - break; - case XSCALE_COUNTER2: - asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val)); - break; - case XSCALE_COUNTER3: - asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val)); - break; - } -} - -static const struct arm_pmu xscale2pmu = { - .id = ARM_PERF_PMU_ID_XSCALE2, - .name = "xscale2", - .handle_irq = xscale2pmu_handle_irq, - .enable = xscale2pmu_enable_event, - .disable = xscale2pmu_disable_event, - .read_counter = xscale2pmu_read_counter, - .write_counter = xscale2pmu_write_counter, - .get_event_idx = xscale2pmu_get_event_idx, - .start = xscale2pmu_start, - .stop = xscale2pmu_stop, - .cache_map = &xscale_perf_cache_map, - .event_map = &xscale_perf_map, - .raw_event_mask = 0xFF, - .num_events = 5, - .max_period = (1LLU << 32) - 1, -}; - -const struct arm_pmu *__init xscale2pmu_init(void) -{ - return &xscale2pmu; -} +/* Include the PMU-specific implementations. */ +#include "perf_event_xscale.c" +#include "perf_event_v6.c" +#include "perf_event_v7.c" static int __init init_hw_perf_events(void) diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c new file mode 100644 index 000000000000..7aeb07da9076 --- /dev/null +++ b/arch/arm/kernel/perf_event_v6.c @@ -0,0 +1,672 @@ +/* + * ARMv6 Performance counter handling code. + * + * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles + * + * ARMv6 has 2 configurable performance counters and a single cycle counter. + * They all share a single reset bit but can be written to zero so we can use + * that for a reset. + * + * The counters can't be individually enabled or disabled so when we remove + * one event and replace it with another we could get spurious counts from the + * wrong event. However, we can take advantage of the fact that the + * performance counters can export events to the event bus, and the event bus + * itself can be monitored. This requires that we *don't* export the events to + * the event bus. The procedure for disabling a configurable counter is: + * - change the counter to count the ETMEXTOUT[0] signal (0x20). This + * effectively stops the counter from counting. + * - disable the counter's interrupt generation (each counter has it's + * own interrupt enable bit). + * Once stopped, the counter value can be written as 0 to reset. + * + * To enable a counter: + * - enable the counter's interrupt generation. + * - set the new event type. + * + * Note: the dedicated cycle counter only counts cycles and can't be + * enabled/disabled independently of the others. When we want to disable the + * cycle counter, we have to just disable the interrupt reporting and start + * ignoring that counter. When re-enabling, we have to reset the value and + * enable the interrupt. + */ + +#ifdef CONFIG_CPU_V6 +enum armv6_perf_types { + ARMV6_PERFCTR_ICACHE_MISS = 0x0, + ARMV6_PERFCTR_IBUF_STALL = 0x1, + ARMV6_PERFCTR_DDEP_STALL = 0x2, + ARMV6_PERFCTR_ITLB_MISS = 0x3, + ARMV6_PERFCTR_DTLB_MISS = 0x4, + ARMV6_PERFCTR_BR_EXEC = 0x5, + ARMV6_PERFCTR_BR_MISPREDICT = 0x6, + ARMV6_PERFCTR_INSTR_EXEC = 0x7, + ARMV6_PERFCTR_DCACHE_HIT = 0x9, + ARMV6_PERFCTR_DCACHE_ACCESS = 0xA, + ARMV6_PERFCTR_DCACHE_MISS = 0xB, + ARMV6_PERFCTR_DCACHE_WBACK = 0xC, + ARMV6_PERFCTR_SW_PC_CHANGE = 0xD, + ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF, + ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10, + ARMV6_PERFCTR_LSU_FULL_STALL = 0x11, + ARMV6_PERFCTR_WBUF_DRAINED = 0x12, + ARMV6_PERFCTR_CPU_CYCLES = 0xFF, + ARMV6_PERFCTR_NOP = 0x20, +}; + +enum armv6_counters { + ARMV6_CYCLE_COUNTER = 1, + ARMV6_COUNTER0, + ARMV6_COUNTER1, +}; + +/* + * The hardware events that we support. We do support cache operations but + * we have harvard caches and no way to combine instruction and data + * accesses/misses in hardware. + */ +static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, +}; + +static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + /* + * The performance counters don't differentiate between read + * and write accesses/misses so this isn't strictly correct, + * but it's the best we can do. Writes and reads get + * combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + /* + * The ARM performance counters can count micro DTLB misses, + * micro ITLB misses and main TLB misses. There isn't an event + * for TLB misses, so use the micro misses here and if users + * want the main TLB misses they can use a raw counter. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +enum armv6mpcore_perf_types { + ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0, + ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1, + ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2, + ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3, + ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4, + ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5, + ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6, + ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7, + ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8, + ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA, + ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB, + ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC, + ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD, + ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE, + ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF, + ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10, + ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11, + ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12, + ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13, + ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF, +}; + +/* + * The hardware events that we support. We do support cache operations but + * we have harvard caches and no way to combine instruction and data + * accesses/misses in hardware. + */ +static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, +}; + +static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = + ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS, + [C(RESULT_MISS)] = + ARMV6MPCORE_PERFCTR_DCACHE_RDMISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = + ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS, + [C(RESULT_MISS)] = + ARMV6MPCORE_PERFCTR_DCACHE_WRMISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + /* + * The ARM performance counters can count micro DTLB misses, + * micro ITLB misses and main TLB misses. There isn't an event + * for TLB misses, so use the micro misses here and if users + * want the main TLB misses they can use a raw counter. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +static inline unsigned long +armv6_pmcr_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val)); + return val; +} + +static inline void +armv6_pmcr_write(unsigned long val) +{ + asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val)); +} + +#define ARMV6_PMCR_ENABLE (1 << 0) +#define ARMV6_PMCR_CTR01_RESET (1 << 1) +#define ARMV6_PMCR_CCOUNT_RESET (1 << 2) +#define ARMV6_PMCR_CCOUNT_DIV (1 << 3) +#define ARMV6_PMCR_COUNT0_IEN (1 << 4) +#define ARMV6_PMCR_COUNT1_IEN (1 << 5) +#define ARMV6_PMCR_CCOUNT_IEN (1 << 6) +#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8) +#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9) +#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10) +#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20 +#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT) +#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12 +#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT) + +#define ARMV6_PMCR_OVERFLOWED_MASK \ + (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \ + ARMV6_PMCR_CCOUNT_OVERFLOW) + +static inline int +armv6_pmcr_has_overflowed(unsigned long pmcr) +{ + return pmcr & ARMV6_PMCR_OVERFLOWED_MASK; +} + +static inline int +armv6_pmcr_counter_has_overflowed(unsigned long pmcr, + enum armv6_counters counter) +{ + int ret = 0; + + if (ARMV6_CYCLE_COUNTER == counter) + ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW; + else if (ARMV6_COUNTER0 == counter) + ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW; + else if (ARMV6_COUNTER1 == counter) + ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW; + else + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + + return ret; +} + +static inline u32 +armv6pmu_read_counter(int counter) +{ + unsigned long value = 0; + + if (ARMV6_CYCLE_COUNTER == counter) + asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value)); + else if (ARMV6_COUNTER0 == counter) + asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value)); + else if (ARMV6_COUNTER1 == counter) + asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value)); + else + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + + return value; +} + +static inline void +armv6pmu_write_counter(int counter, + u32 value) +{ + if (ARMV6_CYCLE_COUNTER == counter) + asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value)); + else if (ARMV6_COUNTER0 == counter) + asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value)); + else if (ARMV6_COUNTER1 == counter) + asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value)); + else + WARN_ONCE(1, "invalid counter number (%d)\n", counter); +} + +void +armv6pmu_enable_event(struct hw_perf_event *hwc, + int idx) +{ + unsigned long val, mask, evt, flags; + + if (ARMV6_CYCLE_COUNTER == idx) { + mask = 0; + evt = ARMV6_PMCR_CCOUNT_IEN; + } else if (ARMV6_COUNTER0 == idx) { + mask = ARMV6_PMCR_EVT_COUNT0_MASK; + evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) | + ARMV6_PMCR_COUNT0_IEN; + } else if (ARMV6_COUNTER1 == idx) { + mask = ARMV6_PMCR_EVT_COUNT1_MASK; + evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) | + ARMV6_PMCR_COUNT1_IEN; + } else { + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + /* + * Mask out the current event and set the counter to count the event + * that we're interested in. + */ + spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val &= ~mask; + val |= evt; + armv6_pmcr_write(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static irqreturn_t +armv6pmu_handle_irq(int irq_num, + void *dev) +{ + unsigned long pmcr = armv6_pmcr_read(); + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + if (!armv6_pmcr_has_overflowed(pmcr)) + return IRQ_NONE; + + regs = get_irq_regs(); + + /* + * The interrupts are cleared by writing the overflow flags back to + * the control register. All of the other bits don't have any effect + * if they are rewritten, so write the whole value back. + */ + armv6_pmcr_write(pmcr); + + perf_sample_data_init(&data, 0); + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx <= armpmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + /* + * We have a single interrupt for all counters. Check that + * each counter has overflowed before we process it. + */ + if (!armv6_pmcr_counter_has_overflowed(pmcr, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event, hwc, idx); + data.period = event->hw.last_period; + if (!armpmu_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); + } + + /* + * Handle the pending perf events. + * + * Note: this call *must* be run with interrupts disabled. For + * platforms that can have the PMU interrupts raised as an NMI, this + * will not work. + */ + irq_work_run(); + + return IRQ_HANDLED; +} + +static void +armv6pmu_start(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val |= ARMV6_PMCR_ENABLE; + armv6_pmcr_write(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +armv6pmu_stop(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val &= ~ARMV6_PMCR_ENABLE; + armv6_pmcr_write(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static int +armv6pmu_get_event_idx(struct cpu_hw_events *cpuc, + struct hw_perf_event *event) +{ + /* Always place a cycle counter into the cycle counter. */ + if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) { + if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return ARMV6_CYCLE_COUNTER; + } else { + /* + * For anything other than a cycle counter, try and use + * counter0 and counter1. + */ + if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) + return ARMV6_COUNTER1; + + if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) + return ARMV6_COUNTER0; + + /* The counters are all in use. */ + return -EAGAIN; + } +} + +static void +armv6pmu_disable_event(struct hw_perf_event *hwc, + int idx) +{ + unsigned long val, mask, evt, flags; + + if (ARMV6_CYCLE_COUNTER == idx) { + mask = ARMV6_PMCR_CCOUNT_IEN; + evt = 0; + } else if (ARMV6_COUNTER0 == idx) { + mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK; + evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT; + } else if (ARMV6_COUNTER1 == idx) { + mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK; + evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT; + } else { + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + /* + * Mask out the current event and set the counter to count the number + * of ETM bus signal assertion cycles. The external reporting should + * be disabled and so this should never increment. + */ + spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val &= ~mask; + val |= evt; + armv6_pmcr_write(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, + int idx) +{ + unsigned long val, mask, flags, evt = 0; + + if (ARMV6_CYCLE_COUNTER == idx) { + mask = ARMV6_PMCR_CCOUNT_IEN; + } else if (ARMV6_COUNTER0 == idx) { + mask = ARMV6_PMCR_COUNT0_IEN; + } else if (ARMV6_COUNTER1 == idx) { + mask = ARMV6_PMCR_COUNT1_IEN; + } else { + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + /* + * Unlike UP ARMv6, we don't have a way of stopping the counters. We + * simply disable the interrupt reporting. + */ + spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val &= ~mask; + val |= evt; + armv6_pmcr_write(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static const struct arm_pmu armv6pmu = { + .id = ARM_PERF_PMU_ID_V6, + .name = "v6", + .handle_irq = armv6pmu_handle_irq, + .enable = armv6pmu_enable_event, + .disable = armv6pmu_disable_event, + .read_counter = armv6pmu_read_counter, + .write_counter = armv6pmu_write_counter, + .get_event_idx = armv6pmu_get_event_idx, + .start = armv6pmu_start, + .stop = armv6pmu_stop, + .cache_map = &armv6_perf_cache_map, + .event_map = &armv6_perf_map, + .raw_event_mask = 0xFF, + .num_events = 3, + .max_period = (1LLU << 32) - 1, +}; + +const struct arm_pmu *__init armv6pmu_init(void) +{ + return &armv6pmu; +} + +/* + * ARMv6mpcore is almost identical to single core ARMv6 with the exception + * that some of the events have different enumerations and that there is no + * *hack* to stop the programmable counters. To stop the counters we simply + * disable the interrupt reporting and update the event. When unthrottling we + * reset the period and enable the interrupt reporting. + */ +static const struct arm_pmu armv6mpcore_pmu = { + .id = ARM_PERF_PMU_ID_V6MP, + .name = "v6mpcore", + .handle_irq = armv6pmu_handle_irq, + .enable = armv6pmu_enable_event, + .disable = armv6mpcore_pmu_disable_event, + .read_counter = armv6pmu_read_counter, + .write_counter = armv6pmu_write_counter, + .get_event_idx = armv6pmu_get_event_idx, + .start = armv6pmu_start, + .stop = armv6pmu_stop, + .cache_map = &armv6mpcore_perf_cache_map, + .event_map = &armv6mpcore_perf_map, + .raw_event_mask = 0xFF, + .num_events = 3, + .max_period = (1LLU << 32) - 1, +}; + +const struct arm_pmu *__init armv6mpcore_pmu_init(void) +{ + return &armv6mpcore_pmu; +} +#else +const struct arm_pmu *__init armv6pmu_init(void) +{ + return NULL; +} + +const struct arm_pmu *__init armv6mpcore_pmu_init(void) +{ + return NULL; +} +#endif /* CONFIG_CPU_V6 */ diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c new file mode 100644 index 000000000000..4d0423969df9 --- /dev/null +++ b/arch/arm/kernel/perf_event_v7.c @@ -0,0 +1,906 @@ +/* + * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code. + * + * ARMv7 support: Jean Pihet + * 2010 (c) MontaVista Software, LLC. + * + * Copied from ARMv6 code, with the low level code inspired + * by the ARMv7 Oprofile code. + * + * Cortex-A8 has up to 4 configurable performance counters and + * a single cycle counter. + * Cortex-A9 has up to 31 configurable performance counters and + * a single cycle counter. + * + * All counters can be enabled/disabled and IRQ masked separately. The cycle + * counter and all 4 performance counters together can be reset separately. + */ + +#ifdef CONFIG_CPU_V7 +/* Common ARMv7 event types */ +enum armv7_perf_types { + ARMV7_PERFCTR_PMNC_SW_INCR = 0x00, + ARMV7_PERFCTR_IFETCH_MISS = 0x01, + ARMV7_PERFCTR_ITLB_MISS = 0x02, + ARMV7_PERFCTR_DCACHE_REFILL = 0x03, + ARMV7_PERFCTR_DCACHE_ACCESS = 0x04, + ARMV7_PERFCTR_DTLB_REFILL = 0x05, + ARMV7_PERFCTR_DREAD = 0x06, + ARMV7_PERFCTR_DWRITE = 0x07, + + ARMV7_PERFCTR_EXC_TAKEN = 0x09, + ARMV7_PERFCTR_EXC_EXECUTED = 0x0A, + ARMV7_PERFCTR_CID_WRITE = 0x0B, + /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS. + * It counts: + * - all branch instructions, + * - instructions that explicitly write the PC, + * - exception generating instructions. + */ + ARMV7_PERFCTR_PC_WRITE = 0x0C, + ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D, + ARMV7_PERFCTR_UNALIGNED_ACCESS = 0x0F, + ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10, + ARMV7_PERFCTR_CLOCK_CYCLES = 0x11, + + ARMV7_PERFCTR_PC_BRANCH_MIS_USED = 0x12, + + ARMV7_PERFCTR_CPU_CYCLES = 0xFF +}; + +/* ARMv7 Cortex-A8 specific event types */ +enum armv7_a8_perf_types { + ARMV7_PERFCTR_INSTR_EXECUTED = 0x08, + + ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E, + + ARMV7_PERFCTR_WRITE_BUFFER_FULL = 0x40, + ARMV7_PERFCTR_L2_STORE_MERGED = 0x41, + ARMV7_PERFCTR_L2_STORE_BUFF = 0x42, + ARMV7_PERFCTR_L2_ACCESS = 0x43, + ARMV7_PERFCTR_L2_CACH_MISS = 0x44, + ARMV7_PERFCTR_AXI_READ_CYCLES = 0x45, + ARMV7_PERFCTR_AXI_WRITE_CYCLES = 0x46, + ARMV7_PERFCTR_MEMORY_REPLAY = 0x47, + ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY = 0x48, + ARMV7_PERFCTR_L1_DATA_MISS = 0x49, + ARMV7_PERFCTR_L1_INST_MISS = 0x4A, + ARMV7_PERFCTR_L1_DATA_COLORING = 0x4B, + ARMV7_PERFCTR_L1_NEON_DATA = 0x4C, + ARMV7_PERFCTR_L1_NEON_CACH_DATA = 0x4D, + ARMV7_PERFCTR_L2_NEON = 0x4E, + ARMV7_PERFCTR_L2_NEON_HIT = 0x4F, + ARMV7_PERFCTR_L1_INST = 0x50, + ARMV7_PERFCTR_PC_RETURN_MIS_PRED = 0x51, + ARMV7_PERFCTR_PC_BRANCH_FAILED = 0x52, + ARMV7_PERFCTR_PC_BRANCH_TAKEN = 0x53, + ARMV7_PERFCTR_PC_BRANCH_EXECUTED = 0x54, + ARMV7_PERFCTR_OP_EXECUTED = 0x55, + ARMV7_PERFCTR_CYCLES_INST_STALL = 0x56, + ARMV7_PERFCTR_CYCLES_INST = 0x57, + ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL = 0x58, + ARMV7_PERFCTR_CYCLES_NEON_INST_STALL = 0x59, + ARMV7_PERFCTR_NEON_CYCLES = 0x5A, + + ARMV7_PERFCTR_PMU0_EVENTS = 0x70, + ARMV7_PERFCTR_PMU1_EVENTS = 0x71, + ARMV7_PERFCTR_PMU_EVENTS = 0x72, +}; + +/* ARMv7 Cortex-A9 specific event types */ +enum armv7_a9_perf_types { + ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC = 0x40, + ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC = 0x41, + ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC = 0x42, + + ARMV7_PERFCTR_COHERENT_LINE_MISS = 0x50, + ARMV7_PERFCTR_COHERENT_LINE_HIT = 0x51, + + ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES = 0x60, + ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES = 0x61, + ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62, + ARMV7_PERFCTR_STREX_EXECUTED_PASSED = 0x63, + ARMV7_PERFCTR_STREX_EXECUTED_FAILED = 0x64, + ARMV7_PERFCTR_DATA_EVICTION = 0x65, + ARMV7_PERFCTR_ISSUE_STAGE_NO_INST = 0x66, + ARMV7_PERFCTR_ISSUE_STAGE_EMPTY = 0x67, + ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE = 0x68, + + ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E, + + ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST = 0x70, + ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71, + ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST = 0x72, + ARMV7_PERFCTR_FP_EXECUTED_INST = 0x73, + ARMV7_PERFCTR_NEON_EXECUTED_INST = 0x74, + + ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80, + ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES = 0x81, + ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES = 0x82, + ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES = 0x83, + ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84, + ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85, + ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES = 0x86, + + ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES = 0x8A, + ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B, + + ARMV7_PERFCTR_ISB_INST = 0x90, + ARMV7_PERFCTR_DSB_INST = 0x91, + ARMV7_PERFCTR_DMB_INST = 0x92, + ARMV7_PERFCTR_EXT_INTERRUPTS = 0x93, + + ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0, + ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1, + ARMV7_PERFCTR_PLE_FIFO_FLUSH = 0xA2, + ARMV7_PERFCTR_PLE_RQST_COMPLETED = 0xA3, + ARMV7_PERFCTR_PLE_FIFO_OVERFLOW = 0xA4, + ARMV7_PERFCTR_PLE_RQST_PROG = 0xA5 +}; + +/* + * Cortex-A8 HW events mapping + * + * The hardware events that we support. We do support cache operations but + * we have harvard caches and no way to combine instruction and data + * accesses/misses in hardware. + */ +static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, +}; + +static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + /* + * The performance counters don't differentiate between read + * and write accesses/misses so this isn't strictly correct, + * but it's the best we can do. Writes and reads get + * combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + /* + * Only ITLB misses and DTLB refills are supported. + * If users want the DTLB refills misses a raw counter + * must be used. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, + [C(RESULT_MISS)] + = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, + [C(RESULT_MISS)] + = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +/* + * Cortex-A9 HW events mapping + */ +static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = + ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, +}; + +static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + /* + * The performance counters don't differentiate between read + * and write accesses/misses so this isn't strictly correct, + * but it's the best we can do. Writes and reads get + * combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + /* + * Only ITLB misses and DTLB refills are supported. + * If users want the DTLB refills misses a raw counter + * must be used. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, + [C(RESULT_MISS)] + = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, + [C(RESULT_MISS)] + = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +/* + * Perf Events counters + */ +enum armv7_counters { + ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */ + ARMV7_COUNTER0 = 2, /* First event counter */ +}; + +/* + * The cycle counter is ARMV7_CYCLE_COUNTER. + * The first event counter is ARMV7_COUNTER0. + * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1). + */ +#define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1) + +/* + * ARMv7 low level PMNC access + */ + +/* + * Per-CPU PMNC: config reg + */ +#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */ +#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */ +#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */ +#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */ +#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */ +#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ +#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */ +#define ARMV7_PMNC_N_MASK 0x1f +#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */ + +/* + * Available counters + */ +#define ARMV7_CNT0 0 /* First event counter */ +#define ARMV7_CCNT 31 /* Cycle counter */ + +/* Perf Event to low level counters mapping */ +#define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0) + +/* + * CNTENS: counters enable reg + */ +#define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_CNTENS_C (1 << ARMV7_CCNT) + +/* + * CNTENC: counters disable reg + */ +#define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_CNTENC_C (1 << ARMV7_CCNT) + +/* + * INTENS: counters overflow interrupt enable reg + */ +#define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_INTENS_C (1 << ARMV7_CCNT) + +/* + * INTENC: counters overflow interrupt disable reg + */ +#define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_INTENC_C (1 << ARMV7_CCNT) + +/* + * EVTSEL: Event selection reg + */ +#define ARMV7_EVTSEL_MASK 0xff /* Mask for writable bits */ + +/* + * SELECT: Counter selection reg + */ +#define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */ + +/* + * FLAG: counters overflow flag status reg + */ +#define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_FLAG_C (1 << ARMV7_CCNT) +#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */ +#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK + +static inline unsigned long armv7_pmnc_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val)); + return val; +} + +static inline void armv7_pmnc_write(unsigned long val) +{ + val &= ARMV7_PMNC_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val)); +} + +static inline int armv7_pmnc_has_overflowed(unsigned long pmnc) +{ + return pmnc & ARMV7_OVERFLOWED_MASK; +} + +static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc, + enum armv7_counters counter) +{ + int ret = 0; + + if (counter == ARMV7_CYCLE_COUNTER) + ret = pmnc & ARMV7_FLAG_C; + else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST)) + ret = pmnc & ARMV7_FLAG_P(counter); + else + pr_err("CPU%u checking wrong counter %d overflow status\n", + smp_processor_id(), counter); + + return ret; +} + +static inline int armv7_pmnc_select_counter(unsigned int idx) +{ + u32 val; + + if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) { + pr_err("CPU%u selecting wrong PMNC counter" + " %d\n", smp_processor_id(), idx); + return -1; + } + + val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val)); + + return idx; +} + +static inline u32 armv7pmu_read_counter(int idx) +{ + unsigned long value = 0; + + if (idx == ARMV7_CYCLE_COUNTER) + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); + else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { + if (armv7_pmnc_select_counter(idx) == idx) + asm volatile("mrc p15, 0, %0, c9, c13, 2" + : "=r" (value)); + } else + pr_err("CPU%u reading wrong counter %d\n", + smp_processor_id(), idx); + + return value; +} + +static inline void armv7pmu_write_counter(int idx, u32 value) +{ + if (idx == ARMV7_CYCLE_COUNTER) + asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); + else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { + if (armv7_pmnc_select_counter(idx) == idx) + asm volatile("mcr p15, 0, %0, c9, c13, 2" + : : "r" (value)); + } else + pr_err("CPU%u writing wrong counter %d\n", + smp_processor_id(), idx); +} + +static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val) +{ + if (armv7_pmnc_select_counter(idx) == idx) { + val &= ARMV7_EVTSEL_MASK; + asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); + } +} + +static inline u32 armv7_pmnc_enable_counter(unsigned int idx) +{ + u32 val; + + if ((idx != ARMV7_CYCLE_COUNTER) && + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { + pr_err("CPU%u enabling wrong PMNC counter" + " %d\n", smp_processor_id(), idx); + return -1; + } + + if (idx == ARMV7_CYCLE_COUNTER) + val = ARMV7_CNTENS_C; + else + val = ARMV7_CNTENS_P(idx); + + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val)); + + return idx; +} + +static inline u32 armv7_pmnc_disable_counter(unsigned int idx) +{ + u32 val; + + + if ((idx != ARMV7_CYCLE_COUNTER) && + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { + pr_err("CPU%u disabling wrong PMNC counter" + " %d\n", smp_processor_id(), idx); + return -1; + } + + if (idx == ARMV7_CYCLE_COUNTER) + val = ARMV7_CNTENC_C; + else + val = ARMV7_CNTENC_P(idx); + + asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val)); + + return idx; +} + +static inline u32 armv7_pmnc_enable_intens(unsigned int idx) +{ + u32 val; + + if ((idx != ARMV7_CYCLE_COUNTER) && + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { + pr_err("CPU%u enabling wrong PMNC counter" + " interrupt enable %d\n", smp_processor_id(), idx); + return -1; + } + + if (idx == ARMV7_CYCLE_COUNTER) + val = ARMV7_INTENS_C; + else + val = ARMV7_INTENS_P(idx); + + asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val)); + + return idx; +} + +static inline u32 armv7_pmnc_disable_intens(unsigned int idx) +{ + u32 val; + + if ((idx != ARMV7_CYCLE_COUNTER) && + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { + pr_err("CPU%u disabling wrong PMNC counter" + " interrupt enable %d\n", smp_processor_id(), idx); + return -1; + } + + if (idx == ARMV7_CYCLE_COUNTER) + val = ARMV7_INTENC_C; + else + val = ARMV7_INTENC_P(idx); + + asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val)); + + return idx; +} + +static inline u32 armv7_pmnc_getreset_flags(void) +{ + u32 val; + + /* Read */ + asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); + + /* Write to clear flags */ + val &= ARMV7_FLAG_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val)); + + return val; +} + +#ifdef DEBUG +static void armv7_pmnc_dump_regs(void) +{ + u32 val; + unsigned int cnt; + + printk(KERN_INFO "PMNC registers dump:\n"); + + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); + printk(KERN_INFO "PMNC =0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); + printk(KERN_INFO "CNTENS=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val)); + printk(KERN_INFO "INTENS=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); + printk(KERN_INFO "FLAGS =0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val)); + printk(KERN_INFO "SELECT=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); + printk(KERN_INFO "CCNT =0x%08x\n", val); + + for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) { + armv7_pmnc_select_counter(cnt); + asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); + printk(KERN_INFO "CNT[%d] count =0x%08x\n", + cnt-ARMV7_EVENT_CNT_TO_CNTx, val); + asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); + printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n", + cnt-ARMV7_EVENT_CNT_TO_CNTx, val); + } +} +#endif + +void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags; + + /* + * Enable counter and interrupt, and set the counter to count + * the event that we're interested in. + */ + spin_lock_irqsave(&pmu_lock, flags); + + /* + * Disable counter + */ + armv7_pmnc_disable_counter(idx); + + /* + * Set event (if destined for PMNx counters) + * We don't need to set the event if it's a cycle count + */ + if (idx != ARMV7_CYCLE_COUNTER) + armv7_pmnc_write_evtsel(idx, hwc->config_base); + + /* + * Enable interrupt for this counter + */ + armv7_pmnc_enable_intens(idx); + + /* + * Enable counter + */ + armv7_pmnc_enable_counter(idx); + + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags; + + /* + * Disable counter and interrupt + */ + spin_lock_irqsave(&pmu_lock, flags); + + /* + * Disable counter + */ + armv7_pmnc_disable_counter(idx); + + /* + * Disable interrupt for this counter + */ + armv7_pmnc_disable_intens(idx); + + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) +{ + unsigned long pmnc; + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + /* + * Get and reset the IRQ flags + */ + pmnc = armv7_pmnc_getreset_flags(); + + /* + * Did an overflow occur? + */ + if (!armv7_pmnc_has_overflowed(pmnc)) + return IRQ_NONE; + + /* + * Handle the counter(s) overflow(s) + */ + regs = get_irq_regs(); + + perf_sample_data_init(&data, 0); + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx <= armpmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + /* + * We have a single interrupt for all counters. Check that + * each counter has overflowed before we process it. + */ + if (!armv7_pmnc_counter_has_overflowed(pmnc, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event, hwc, idx); + data.period = event->hw.last_period; + if (!armpmu_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); + } + + /* + * Handle the pending perf events. + * + * Note: this call *must* be run with interrupts disabled. For + * platforms that can have the PMU interrupts raised as an NMI, this + * will not work. + */ + irq_work_run(); + + return IRQ_HANDLED; +} + +static void armv7pmu_start(void) +{ + unsigned long flags; + + spin_lock_irqsave(&pmu_lock, flags); + /* Enable all counters */ + armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void armv7pmu_stop(void) +{ + unsigned long flags; + + spin_lock_irqsave(&pmu_lock, flags); + /* Disable all counters */ + armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc, + struct hw_perf_event *event) +{ + int idx; + + /* Always place a cycle counter into the cycle counter. */ + if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) { + if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return ARMV7_CYCLE_COUNTER; + } else { + /* + * For anything other than a cycle counter, try and use + * the events counters + */ + for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + } + + /* The counters are all in use. */ + return -EAGAIN; + } +} + +static struct arm_pmu armv7pmu = { + .handle_irq = armv7pmu_handle_irq, + .enable = armv7pmu_enable_event, + .disable = armv7pmu_disable_event, + .read_counter = armv7pmu_read_counter, + .write_counter = armv7pmu_write_counter, + .get_event_idx = armv7pmu_get_event_idx, + .start = armv7pmu_start, + .stop = armv7pmu_stop, + .raw_event_mask = 0xFF, + .max_period = (1LLU << 32) - 1, +}; + +static u32 __init armv7_reset_read_pmnc(void) +{ + u32 nb_cnt; + + /* Initialize & Reset PMNC: C and P bits */ + armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C); + + /* Read the nb of CNTx counters supported from PMNC */ + nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; + + /* Add the CPU cycles counter and return */ + return nb_cnt + 1; +} + +const struct arm_pmu *__init armv7_a8_pmu_init(void) +{ + armv7pmu.id = ARM_PERF_PMU_ID_CA8; + armv7pmu.name = "ARMv7 Cortex-A8"; + armv7pmu.cache_map = &armv7_a8_perf_cache_map; + armv7pmu.event_map = &armv7_a8_perf_map; + armv7pmu.num_events = armv7_reset_read_pmnc(); + return &armv7pmu; +} + +const struct arm_pmu *__init armv7_a9_pmu_init(void) +{ + armv7pmu.id = ARM_PERF_PMU_ID_CA9; + armv7pmu.name = "ARMv7 Cortex-A9"; + armv7pmu.cache_map = &armv7_a9_perf_cache_map; + armv7pmu.event_map = &armv7_a9_perf_map; + armv7pmu.num_events = armv7_reset_read_pmnc(); + return &armv7pmu; +} +#else +const struct arm_pmu *__init armv7_a8_pmu_init(void) +{ + return NULL; +} + +const struct arm_pmu *__init armv7_a9_pmu_init(void) +{ + return NULL; +} +#endif /* CONFIG_CPU_V7 */ diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c new file mode 100644 index 000000000000..4e9592789d40 --- /dev/null +++ b/arch/arm/kernel/perf_event_xscale.c @@ -0,0 +1,807 @@ +/* + * ARMv5 [xscale] Performance counter handling code. + * + * Copyright (C) 2010, ARM Ltd., Will Deacon + * + * Based on the previous xscale OProfile code. + * + * There are two variants of the xscale PMU that we support: + * - xscale1pmu: 2 event counters and a cycle counter + * - xscale2pmu: 4 event counters and a cycle counter + * The two variants share event definitions, but have different + * PMU structures. + */ + +#ifdef CONFIG_CPU_XSCALE +enum xscale_perf_types { + XSCALE_PERFCTR_ICACHE_MISS = 0x00, + XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, + XSCALE_PERFCTR_DATA_STALL = 0x02, + XSCALE_PERFCTR_ITLB_MISS = 0x03, + XSCALE_PERFCTR_DTLB_MISS = 0x04, + XSCALE_PERFCTR_BRANCH = 0x05, + XSCALE_PERFCTR_BRANCH_MISS = 0x06, + XSCALE_PERFCTR_INSTRUCTION = 0x07, + XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08, + XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09, + XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A, + XSCALE_PERFCTR_DCACHE_MISS = 0x0B, + XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C, + XSCALE_PERFCTR_PC_CHANGED = 0x0D, + XSCALE_PERFCTR_BCU_REQUEST = 0x10, + XSCALE_PERFCTR_BCU_FULL = 0x11, + XSCALE_PERFCTR_BCU_DRAIN = 0x12, + XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14, + XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15, + XSCALE_PERFCTR_RMW = 0x16, + /* XSCALE_PERFCTR_CCNT is not hardware defined */ + XSCALE_PERFCTR_CCNT = 0xFE, + XSCALE_PERFCTR_UNUSED = 0xFF, +}; + +enum xscale_counters { + XSCALE_CYCLE_COUNTER = 1, + XSCALE_COUNTER0, + XSCALE_COUNTER1, + XSCALE_COUNTER2, + XSCALE_COUNTER3, +}; + +static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT, + [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH, + [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, +}; + +static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +#define XSCALE_PMU_ENABLE 0x001 +#define XSCALE_PMN_RESET 0x002 +#define XSCALE_CCNT_RESET 0x004 +#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) +#define XSCALE_PMU_CNT64 0x008 + +#define XSCALE1_OVERFLOWED_MASK 0x700 +#define XSCALE1_CCOUNT_OVERFLOW 0x400 +#define XSCALE1_COUNT0_OVERFLOW 0x100 +#define XSCALE1_COUNT1_OVERFLOW 0x200 +#define XSCALE1_CCOUNT_INT_EN 0x040 +#define XSCALE1_COUNT0_INT_EN 0x010 +#define XSCALE1_COUNT1_INT_EN 0x020 +#define XSCALE1_COUNT0_EVT_SHFT 12 +#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT) +#define XSCALE1_COUNT1_EVT_SHFT 20 +#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT) + +static inline u32 +xscale1pmu_read_pmnc(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val)); + return val; +} + +static inline void +xscale1pmu_write_pmnc(u32 val) +{ + /* upper 4bits and 7, 11 are write-as-0 */ + val &= 0xffff77f; + asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val)); +} + +static inline int +xscale1_pmnc_counter_has_overflowed(unsigned long pmnc, + enum xscale_counters counter) +{ + int ret = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + ret = pmnc & XSCALE1_CCOUNT_OVERFLOW; + break; + case XSCALE_COUNTER0: + ret = pmnc & XSCALE1_COUNT0_OVERFLOW; + break; + case XSCALE_COUNTER1: + ret = pmnc & XSCALE1_COUNT1_OVERFLOW; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + } + + return ret; +} + +static irqreturn_t +xscale1pmu_handle_irq(int irq_num, void *dev) +{ + unsigned long pmnc; + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + /* + * NOTE: there's an A stepping erratum that states if an overflow + * bit already exists and another occurs, the previous + * Overflow bit gets cleared. There's no workaround. + * Fixed in B stepping or later. + */ + pmnc = xscale1pmu_read_pmnc(); + + /* + * Write the value back to clear the overflow flags. Overflow + * flags remain in pmnc for use below. We also disable the PMU + * while we process the interrupt. + */ + xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); + + if (!(pmnc & XSCALE1_OVERFLOWED_MASK)) + return IRQ_NONE; + + regs = get_irq_regs(); + + perf_sample_data_init(&data, 0); + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx <= armpmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event, hwc, idx); + data.period = event->hw.last_period; + if (!armpmu_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); + } + + irq_work_run(); + + /* + * Re-enable the PMU. + */ + pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(pmnc); + + return IRQ_HANDLED; +} + +static void +xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long val, mask, evt, flags; + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + mask = 0; + evt = XSCALE1_CCOUNT_INT_EN; + break; + case XSCALE_COUNTER0: + mask = XSCALE1_COUNT0_EVT_MASK; + evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) | + XSCALE1_COUNT0_INT_EN; + break; + case XSCALE_COUNTER1: + mask = XSCALE1_COUNT1_EVT_MASK; + evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) | + XSCALE1_COUNT1_INT_EN; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val &= ~mask; + val |= evt; + xscale1pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long val, mask, evt, flags; + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + mask = XSCALE1_CCOUNT_INT_EN; + evt = 0; + break; + case XSCALE_COUNTER0: + mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK; + evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT; + break; + case XSCALE_COUNTER1: + mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK; + evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val &= ~mask; + val |= evt; + xscale1pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static int +xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc, + struct hw_perf_event *event) +{ + if (XSCALE_PERFCTR_CCNT == event->config_base) { + if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return XSCALE_CYCLE_COUNTER; + } else { + if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) + return XSCALE_COUNTER1; + + if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) + return XSCALE_COUNTER0; + + return -EAGAIN; + } +} + +static void +xscale1pmu_start(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val |= XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +xscale1pmu_stop(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val &= ~XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static inline u32 +xscale1pmu_read_counter(int counter) +{ + u32 val = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val)); + break; + } + + return val; +} + +static inline void +xscale1pmu_write_counter(int counter, u32 val) +{ + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val)); + break; + } +} + +static const struct arm_pmu xscale1pmu = { + .id = ARM_PERF_PMU_ID_XSCALE1, + .name = "xscale1", + .handle_irq = xscale1pmu_handle_irq, + .enable = xscale1pmu_enable_event, + .disable = xscale1pmu_disable_event, + .read_counter = xscale1pmu_read_counter, + .write_counter = xscale1pmu_write_counter, + .get_event_idx = xscale1pmu_get_event_idx, + .start = xscale1pmu_start, + .stop = xscale1pmu_stop, + .cache_map = &xscale_perf_cache_map, + .event_map = &xscale_perf_map, + .raw_event_mask = 0xFF, + .num_events = 3, + .max_period = (1LLU << 32) - 1, +}; + +const struct arm_pmu *__init xscale1pmu_init(void) +{ + return &xscale1pmu; +} + +#define XSCALE2_OVERFLOWED_MASK 0x01f +#define XSCALE2_CCOUNT_OVERFLOW 0x001 +#define XSCALE2_COUNT0_OVERFLOW 0x002 +#define XSCALE2_COUNT1_OVERFLOW 0x004 +#define XSCALE2_COUNT2_OVERFLOW 0x008 +#define XSCALE2_COUNT3_OVERFLOW 0x010 +#define XSCALE2_CCOUNT_INT_EN 0x001 +#define XSCALE2_COUNT0_INT_EN 0x002 +#define XSCALE2_COUNT1_INT_EN 0x004 +#define XSCALE2_COUNT2_INT_EN 0x008 +#define XSCALE2_COUNT3_INT_EN 0x010 +#define XSCALE2_COUNT0_EVT_SHFT 0 +#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT) +#define XSCALE2_COUNT1_EVT_SHFT 8 +#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT) +#define XSCALE2_COUNT2_EVT_SHFT 16 +#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT) +#define XSCALE2_COUNT3_EVT_SHFT 24 +#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT) + +static inline u32 +xscale2pmu_read_pmnc(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val)); + /* bits 1-2 and 4-23 are read-unpredictable */ + return val & 0xff000009; +} + +static inline void +xscale2pmu_write_pmnc(u32 val) +{ + /* bits 4-23 are write-as-0, 24-31 are write ignored */ + val &= 0xf; + asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val)); +} + +static inline u32 +xscale2pmu_read_overflow_flags(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val)); + return val; +} + +static inline void +xscale2pmu_write_overflow_flags(u32 val) +{ + asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val)); +} + +static inline u32 +xscale2pmu_read_event_select(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val)); + return val; +} + +static inline void +xscale2pmu_write_event_select(u32 val) +{ + asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val)); +} + +static inline u32 +xscale2pmu_read_int_enable(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val)); + return val; +} + +static void +xscale2pmu_write_int_enable(u32 val) +{ + asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val)); +} + +static inline int +xscale2_pmnc_counter_has_overflowed(unsigned long of_flags, + enum xscale_counters counter) +{ + int ret = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + ret = of_flags & XSCALE2_CCOUNT_OVERFLOW; + break; + case XSCALE_COUNTER0: + ret = of_flags & XSCALE2_COUNT0_OVERFLOW; + break; + case XSCALE_COUNTER1: + ret = of_flags & XSCALE2_COUNT1_OVERFLOW; + break; + case XSCALE_COUNTER2: + ret = of_flags & XSCALE2_COUNT2_OVERFLOW; + break; + case XSCALE_COUNTER3: + ret = of_flags & XSCALE2_COUNT3_OVERFLOW; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + } + + return ret; +} + +static irqreturn_t +xscale2pmu_handle_irq(int irq_num, void *dev) +{ + unsigned long pmnc, of_flags; + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + /* Disable the PMU. */ + pmnc = xscale2pmu_read_pmnc(); + xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); + + /* Check the overflow flag register. */ + of_flags = xscale2pmu_read_overflow_flags(); + if (!(of_flags & XSCALE2_OVERFLOWED_MASK)) + return IRQ_NONE; + + /* Clear the overflow bits. */ + xscale2pmu_write_overflow_flags(of_flags); + + regs = get_irq_regs(); + + perf_sample_data_init(&data, 0); + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx <= armpmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event, hwc, idx); + data.period = event->hw.last_period; + if (!armpmu_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); + } + + irq_work_run(); + + /* + * Re-enable the PMU. + */ + pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE; + xscale2pmu_write_pmnc(pmnc); + + return IRQ_HANDLED; +} + +static void +xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags, ien, evtsel; + + ien = xscale2pmu_read_int_enable(); + evtsel = xscale2pmu_read_event_select(); + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + ien |= XSCALE2_CCOUNT_INT_EN; + break; + case XSCALE_COUNTER0: + ien |= XSCALE2_COUNT0_INT_EN; + evtsel &= ~XSCALE2_COUNT0_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT; + break; + case XSCALE_COUNTER1: + ien |= XSCALE2_COUNT1_INT_EN; + evtsel &= ~XSCALE2_COUNT1_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT; + break; + case XSCALE_COUNTER2: + ien |= XSCALE2_COUNT2_INT_EN; + evtsel &= ~XSCALE2_COUNT2_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT; + break; + case XSCALE_COUNTER3: + ien |= XSCALE2_COUNT3_INT_EN; + evtsel &= ~XSCALE2_COUNT3_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + spin_lock_irqsave(&pmu_lock, flags); + xscale2pmu_write_event_select(evtsel); + xscale2pmu_write_int_enable(ien); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags, ien, evtsel; + + ien = xscale2pmu_read_int_enable(); + evtsel = xscale2pmu_read_event_select(); + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + ien &= ~XSCALE2_CCOUNT_INT_EN; + break; + case XSCALE_COUNTER0: + ien &= ~XSCALE2_COUNT0_INT_EN; + evtsel &= ~XSCALE2_COUNT0_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT; + break; + case XSCALE_COUNTER1: + ien &= ~XSCALE2_COUNT1_INT_EN; + evtsel &= ~XSCALE2_COUNT1_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT; + break; + case XSCALE_COUNTER2: + ien &= ~XSCALE2_COUNT2_INT_EN; + evtsel &= ~XSCALE2_COUNT2_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT; + break; + case XSCALE_COUNTER3: + ien &= ~XSCALE2_COUNT3_INT_EN; + evtsel &= ~XSCALE2_COUNT3_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + spin_lock_irqsave(&pmu_lock, flags); + xscale2pmu_write_event_select(evtsel); + xscale2pmu_write_int_enable(ien); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static int +xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc, + struct hw_perf_event *event) +{ + int idx = xscale1pmu_get_event_idx(cpuc, event); + if (idx >= 0) + goto out; + + if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask)) + idx = XSCALE_COUNTER3; + else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask)) + idx = XSCALE_COUNTER2; +out: + return idx; +} + +static void +xscale2pmu_start(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; + val |= XSCALE_PMU_ENABLE; + xscale2pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +xscale2pmu_stop(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale2pmu_read_pmnc(); + val &= ~XSCALE_PMU_ENABLE; + xscale2pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static inline u32 +xscale2pmu_read_counter(int counter) +{ + u32 val = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val)); + break; + case XSCALE_COUNTER2: + asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val)); + break; + case XSCALE_COUNTER3: + asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val)); + break; + } + + return val; +} + +static inline void +xscale2pmu_write_counter(int counter, u32 val) +{ + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val)); + break; + case XSCALE_COUNTER2: + asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val)); + break; + case XSCALE_COUNTER3: + asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val)); + break; + } +} + +static const struct arm_pmu xscale2pmu = { + .id = ARM_PERF_PMU_ID_XSCALE2, + .name = "xscale2", + .handle_irq = xscale2pmu_handle_irq, + .enable = xscale2pmu_enable_event, + .disable = xscale2pmu_disable_event, + .read_counter = xscale2pmu_read_counter, + .write_counter = xscale2pmu_write_counter, + .get_event_idx = xscale2pmu_get_event_idx, + .start = xscale2pmu_start, + .stop = xscale2pmu_stop, + .cache_map = &xscale_perf_cache_map, + .event_map = &xscale_perf_map, + .raw_event_mask = 0xFF, + .num_events = 5, + .max_period = (1LLU << 32) - 1, +}; + +const struct arm_pmu *__init xscale2pmu_init(void) +{ + return &xscale2pmu; +} +#else +const struct arm_pmu *__init xscale1pmu_init(void) +{ + return NULL; +} + +const struct arm_pmu *__init xscale2pmu_init(void) +{ + return NULL; +} +#endif /* CONFIG_CPU_XSCALE */ -- cgit v1.2.1 From 283a1b92e25fe3a62c766a042f96dad2eefa7d17 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 8 Nov 2010 10:11:19 +0000 Subject: ARM: always build swp_emulate as ARMv7 swp_emulate is only used on ARMv7+, and includes ARMv7+ assembly instructions. Allow the assembler to accept ARMv7 instructions, but leave the compiler's code generation options alone. Signed-off-by: Russell King --- arch/arm/kernel/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 119a6bb59bde..4036a5926465 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -43,6 +43,7 @@ obj-$(CONFIG_ARM_UNWIND) += unwind.o obj-$(CONFIG_HAVE_TCM) += tcm.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_SWP_EMULATE) += swp_emulate.o +CFLAGS_swp_emulate.o := -Wa,-march=armv7-a obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_CRUNCH) += crunch.o crunch-bits.o -- cgit v1.2.1 From 69529c0eb76469168f1dd5851f363dbab17ce8fd Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 16 Nov 2010 00:19:55 +0000 Subject: ARM: pgtable: directly pass pgd/pmd/pte to their error functions Rather than passing the pte value to __pte_error, pass the raw pte_t cookie instead. Do the same for pmd and pgd functions. Signed-off-by: Russell King --- arch/arm/kernel/traps.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 446aee97436f..354cd4ce400a 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -708,19 +708,19 @@ void __readwrite_bug(const char *fn) } EXPORT_SYMBOL(__readwrite_bug); -void __pte_error(const char *file, int line, unsigned long val) +void __pte_error(const char *file, int line, pte_t pte) { - printk("%s:%d: bad pte %08lx.\n", file, line, val); + printk("%s:%d: bad pte %08lx.\n", file, line, pte_val(pte)); } -void __pmd_error(const char *file, int line, unsigned long val) +void __pmd_error(const char *file, int line, pmd_t pmd) { - printk("%s:%d: bad pmd %08lx.\n", file, line, val); + printk("%s:%d: bad pmd %08lx.\n", file, line, pmd_val(pmd)); } -void __pgd_error(const char *file, int line, unsigned long val) +void __pgd_error(const char *file, int line, pgd_t pgd) { - printk("%s:%d: bad pgd %08lx.\n", file, line, val); + printk("%s:%d: bad pgd %08lx.\n", file, line, pgd_val(pgd)); } asmlinkage void __div0(void) -- cgit v1.2.1 From 8931360eb9c6ec8bd30efef579cef81917a2fcf3 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 12 Nov 2010 13:02:46 +0000 Subject: ARM: module: clean up handling of ELF unwind tables There's no need to keep pointers to the ELF sections available while the module is loaded - we only need the section pointers while we're finding and registering the unwind tables, which can all be done during the finalize stage of loading. Signed-off-by: Russell King --- arch/arm/kernel/module.c | 106 +++++++++++++++++++++++------------------------ 1 file changed, 51 insertions(+), 55 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index d9bd786ce23d..f9937c4b7d1a 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -67,35 +67,6 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, char *secstrings, struct module *mod) { -#ifdef CONFIG_ARM_UNWIND - Elf_Shdr *s, *sechdrs_end = sechdrs + hdr->e_shnum; - struct arm_unwind_mapping *maps = mod->arch.map; - - for (s = sechdrs; s < sechdrs_end; s++) { - char const *secname = secstrings + s->sh_name; - - if (strcmp(".ARM.exidx.init.text", secname) == 0) - maps[ARM_SEC_INIT].unw_sec = s; - else if (strcmp(".ARM.exidx.devinit.text", secname) == 0) - maps[ARM_SEC_DEVINIT].unw_sec = s; - else if (strcmp(".ARM.exidx", secname) == 0) - maps[ARM_SEC_CORE].unw_sec = s; - else if (strcmp(".ARM.exidx.exit.text", secname) == 0) - maps[ARM_SEC_EXIT].unw_sec = s; - else if (strcmp(".ARM.exidx.devexit.text", secname) == 0) - maps[ARM_SEC_DEVEXIT].unw_sec = s; - else if (strcmp(".init.text", secname) == 0) - maps[ARM_SEC_INIT].sec_text = s; - else if (strcmp(".devinit.text", secname) == 0) - maps[ARM_SEC_DEVINIT].sec_text = s; - else if (strcmp(".text", secname) == 0) - maps[ARM_SEC_CORE].sec_text = s; - else if (strcmp(".exit.text", secname) == 0) - maps[ARM_SEC_EXIT].sec_text = s; - else if (strcmp(".devexit.text", secname) == 0) - maps[ARM_SEC_DEVEXIT].sec_text = s; - } -#endif return 0; } @@ -300,41 +271,66 @@ apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab, return -ENOEXEC; } -#ifdef CONFIG_ARM_UNWIND -static void register_unwind_tables(struct module *mod) +struct mod_unwind_map { + const Elf_Shdr *unw_sec; + const Elf_Shdr *txt_sec; +}; + +int module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs, + struct module *mod) { +#ifdef CONFIG_ARM_UNWIND + const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + const Elf_Shdr *s, *sechdrs_end = sechdrs + hdr->e_shnum; + struct mod_unwind_map maps[ARM_SEC_MAX]; int i; - for (i = 0; i < ARM_SEC_MAX; ++i) { - struct arm_unwind_mapping *map = &mod->arch.map[i]; - if (map->unw_sec && map->sec_text) - map->unwind = unwind_table_add(map->unw_sec->sh_addr, - map->unw_sec->sh_size, - map->sec_text->sh_addr, - map->sec_text->sh_size); + + memset(maps, 0, sizeof(maps)); + + for (s = sechdrs; s < sechdrs_end; s++) { + const char *secname = secstrs + s->sh_name; + + if (strcmp(".ARM.exidx.init.text", secname) == 0) + maps[ARM_SEC_INIT].unw_sec = s; + else if (strcmp(".ARM.exidx.devinit.text", secname) == 0) + maps[ARM_SEC_DEVINIT].unw_sec = s; + else if (strcmp(".ARM.exidx", secname) == 0) + maps[ARM_SEC_CORE].unw_sec = s; + else if (strcmp(".ARM.exidx.exit.text", secname) == 0) + maps[ARM_SEC_EXIT].unw_sec = s; + else if (strcmp(".ARM.exidx.devexit.text", secname) == 0) + maps[ARM_SEC_DEVEXIT].unw_sec = s; + else if (strcmp(".init.text", secname) == 0) + maps[ARM_SEC_INIT].txt_sec = s; + else if (strcmp(".devinit.text", secname) == 0) + maps[ARM_SEC_DEVINIT].txt_sec = s; + else if (strcmp(".text", secname) == 0) + maps[ARM_SEC_CORE].txt_sec = s; + else if (strcmp(".exit.text", secname) == 0) + maps[ARM_SEC_EXIT].txt_sec = s; + else if (strcmp(".devexit.text", secname) == 0) + maps[ARM_SEC_DEVEXIT].txt_sec = s; } -} -static void unregister_unwind_tables(struct module *mod) -{ - int i = ARM_SEC_MAX; - while (--i >= 0) - unwind_table_del(mod->arch.map[i].unwind); -} -#else -static inline void register_unwind_tables(struct module *mod) { } -static inline void unregister_unwind_tables(struct module *mod) { } + for (i = 0; i < ARM_SEC_MAX; i++) + if (maps[i].unw_sec && maps[i].txt_sec) + mod->arch.unwind[i] = + unwind_table_add(maps[i].unw_sec->sh_addr, + maps[i].unw_sec->sh_size, + maps[i].txt_sec->sh_addr, + maps[i].txt_sec->sh_size); #endif - -int -module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs, - struct module *module) -{ - register_unwind_tables(module); return 0; } void module_arch_cleanup(struct module *mod) { - unregister_unwind_tables(mod); +#ifdef CONFIG_ARM_UNWIND + int i; + + for (i = 0; i < ARM_SEC_MAX; i++) + if (mod->arch.unwind[i]) + unwind_table_del(mod->arch.unwind[i]); +#endif } -- cgit v1.2.1 From 50005a8deb38e5e6456ebd94e57adb321d4589de Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 12 Nov 2010 13:04:16 +0000 Subject: ARM: module: ignore unwind for sections not marked SHF_ALLOC If a section is not marked with SHF_ALLOC, it will be discarded by the module code. Therefore, it is not correct to register the unwind tables. Signed-off-by: Russell King --- arch/arm/kernel/module.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index f9937c4b7d1a..0c1bb68ff4a8 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -290,6 +290,9 @@ int module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs, for (s = sechdrs; s < sechdrs_end; s++) { const char *secname = secstrs + s->sh_name; + if (!(s->sh_flags & SHF_ALLOC)) + continue; + if (strcmp(".ARM.exidx.init.text", secname) == 0) maps[ARM_SEC_INIT].unw_sec = s; else if (strcmp(".ARM.exidx.devinit.text", secname) == 0) -- cgit v1.2.1 From ad3b6993b9c5482e8a2ec5aed181538c921fdcbd Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Nov 2010 09:42:08 +0000 Subject: ARM: SMP: pass an ipi number to smp_cross_call() This allows us to use smp_cross_call() to trigger a number of different software generated interrupts, rather than combining them all on one SGI. Recover the SGI number via do_IPI. Reviewed-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/kernel/entry-armv.S | 2 +- arch/arm/kernel/smp.c | 10 ++-------- 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index c09e3573c5de..955cf5f539ed 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -48,7 +48,7 @@ */ ALT_SMP(test_for_ipi r0, r6, r5, lr) ALT_UP_B(9997f) - movne r0, sp + movne r1, sp adrne lr, BSYM(1b) bne do_IPI diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 8c1959590252..7a236db03fb5 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -404,7 +404,7 @@ static void send_ipi_message(const struct cpumask *mask, enum ipi_msg_type msg) /* * Call the platform specific cross-CPU call function. */ - smp_cross_call(mask); + smp_cross_call(mask, 1); local_irq_restore(flags); } @@ -537,14 +537,8 @@ static void ipi_cpu_stop(unsigned int cpu) /* * Main handler for inter-processor interrupts - * - * For ARM, the ipimask now only identifies a single - * category of IPI (Bit 1 IPIs have been replaced by a - * different mechanism): - * - * Bit 0 - Inter-processor function call */ -asmlinkage void __exception do_IPI(struct pt_regs *regs) +asmlinkage void __exception do_IPI(int ipinr, struct pt_regs *regs) { unsigned int cpu = smp_processor_id(); struct ipi_data *ipi = &per_cpu(ipi_data, cpu); -- cgit v1.2.1 From 24480d980e9063b3ebd0dfdf2f396c305956c356 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Nov 2010 09:54:18 +0000 Subject: ARM: SMP: avoid using bitmasks and locks for IPIs, use hardware instead Avoid using bitmasks and locks in the percpu area for IPIs, and instead use individual software generated interrupts to identify the reason for the IPI. This avoids the problems of having spinlocks in the percpu area. Reviewed-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/kernel/smp.c | 87 +++++++++++++++------------------------------------ 1 file changed, 26 insertions(+), 61 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 7a236db03fb5..78d55c681a4f 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -48,20 +48,15 @@ struct secondary_data secondary_data; /* * structures for inter-processor calls - * - A collection of single bit ipi messages. */ struct ipi_data { - spinlock_t lock; unsigned long ipi_count; - unsigned long bits; }; -static DEFINE_PER_CPU(struct ipi_data, ipi_data) = { - .lock = SPIN_LOCK_UNLOCKED, -}; +static DEFINE_PER_CPU(struct ipi_data, ipi_data); enum ipi_msg_type { - IPI_TIMER, + IPI_TIMER = 2, IPI_RESCHEDULE, IPI_CALL_FUNC, IPI_CALL_FUNC_SINGLE, @@ -389,22 +384,13 @@ void __init smp_prepare_boot_cpu(void) static void send_ipi_message(const struct cpumask *mask, enum ipi_msg_type msg) { unsigned long flags; - unsigned int cpu; local_irq_save(flags); - for_each_cpu(cpu, mask) { - struct ipi_data *ipi = &per_cpu(ipi_data, cpu); - - spin_lock(&ipi->lock); - ipi->bits |= 1 << msg; - spin_unlock(&ipi->lock); - } - /* * Call the platform specific cross-CPU call function. */ - smp_cross_call(mask, 1); + smp_cross_call(mask, msg); local_irq_restore(flags); } @@ -546,56 +532,35 @@ asmlinkage void __exception do_IPI(int ipinr, struct pt_regs *regs) ipi->ipi_count++; - for (;;) { - unsigned long msgs; - - spin_lock(&ipi->lock); - msgs = ipi->bits; - ipi->bits = 0; - spin_unlock(&ipi->lock); - - if (!msgs) - break; - - do { - unsigned nextmsg; - - nextmsg = msgs & -msgs; - msgs &= ~nextmsg; - nextmsg = ffz(~nextmsg); - - switch (nextmsg) { - case IPI_TIMER: - ipi_timer(); - break; + switch (ipinr) { + case IPI_TIMER: + ipi_timer(); + break; - case IPI_RESCHEDULE: - /* - * nothing more to do - eveything is - * done on the interrupt return path - */ - break; + case IPI_RESCHEDULE: + /* + * nothing more to do - eveything is + * done on the interrupt return path + */ + break; - case IPI_CALL_FUNC: - generic_smp_call_function_interrupt(); - break; + case IPI_CALL_FUNC: + generic_smp_call_function_interrupt(); + break; - case IPI_CALL_FUNC_SINGLE: - generic_smp_call_function_single_interrupt(); - break; + case IPI_CALL_FUNC_SINGLE: + generic_smp_call_function_single_interrupt(); + break; - case IPI_CPU_STOP: - ipi_cpu_stop(cpu); - break; + case IPI_CPU_STOP: + ipi_cpu_stop(cpu); + break; - default: - printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n", - cpu, nextmsg); - break; - } - } while (msgs); + default: + printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n", + cpu, ipinr); + break; } - set_irq_regs(old_regs); } -- cgit v1.2.1 From 0df7095205cbf6ea1cdfe6254e0d6a3b823caa3b Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 2 Dec 2010 19:16:56 +0000 Subject: ARM: SMP: remove IRQ-disabling for smp_cross_call() As we've now removed the spinlock and bitmask, we have nothing left which requires interrupts to be disabled when sending an IPI. All current IPI-sending implementations use the GIC, which also does not require interrupts disabled when calling gic_raise_softirq(). Remove the now unnecessary IRQ disable. Signed-off-by: Russell King --- arch/arm/kernel/smp.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 78d55c681a4f..4878e51561f9 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -383,16 +383,10 @@ void __init smp_prepare_boot_cpu(void) static void send_ipi_message(const struct cpumask *mask, enum ipi_msg_type msg) { - unsigned long flags; - - local_irq_save(flags); - /* * Call the platform specific cross-CPU call function. */ smp_cross_call(mask, msg); - - local_irq_restore(flags); } void arch_send_call_function_ipi_mask(const struct cpumask *mask) -- cgit v1.2.1 From b23065313297e750edd57ab6edfd36224826724e Mon Sep 17 00:00:00 2001 From: Per Fransson Date: Fri, 3 Dec 2010 10:53:38 +0100 Subject: ARM: 6522/1: kexec: Add call to non-crashing cores through IPI When kexec is used to start a crash kernel the other cores are notified. These non-crashing cores will save their state in the crash notes and then do nothing. Signed-off-by: Per Fransson Signed-off-by: Russell King --- arch/arm/kernel/machine_kexec.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index 3a8fd5140d7a..30ead135ff5f 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -23,6 +23,8 @@ extern unsigned long kexec_indirection_page; extern unsigned long kexec_mach_type; extern unsigned long kexec_boot_atags; +static atomic_t waiting_for_crash_ipi; + /* * Provide a dummy crash_notes definition while crash dump arrives to arm. * This prevents breakage of crash_notes attribute in kernel/ksysfs.c. @@ -37,9 +39,37 @@ void machine_kexec_cleanup(struct kimage *image) { } +void machine_crash_nonpanic_core(void *unused) +{ + struct pt_regs regs; + + crash_setup_regs(®s, NULL); + printk(KERN_DEBUG "CPU %u will stop doing anything useful since another CPU has crashed\n", + smp_processor_id()); + crash_save_cpu(®s, smp_processor_id()); + flush_cache_all(); + + atomic_dec(&waiting_for_crash_ipi); + while (1) + cpu_relax(); +} + void machine_crash_shutdown(struct pt_regs *regs) { + unsigned long msecs; + local_irq_disable(); + + atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); + smp_call_function(machine_crash_nonpanic_core, NULL, false); + msecs = 1000; /* Wait at most a second for the other cpus to stop */ + while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { + mdelay(1); + msecs--; + } + if (atomic_read(&waiting_for_crash_ipi) > 0) + printk(KERN_WARNING "Non-crashing CPUs did not react to IPI\n"); + crash_save_cpu(regs, smp_processor_id()); printk(KERN_INFO "Loading crashdump kernel...\n"); -- cgit v1.2.1 From 4d6b7a779be34e1df296abc1dc555134a8cf34af Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 30 Nov 2010 18:15:53 +0100 Subject: ARM: 6512/1: perf: fix warnings generated by sparse Russell reported a number of warnings coming from sparse when checking the ARM perf_event.c files: | perf_event.c seems to also have problems too: | | CHECK arch/arm/kernel/perf_event.c | arch/arm/kernel/perf_event.c:37:1: warning: symbol 'pmu_lock' was not declared. Should it be static? | arch/arm/kernel/perf_event.c:70:1: warning: symbol 'cpu_hw_events' was not declared. Should it be static? | arch/arm/kernel/perf_event.c:1006:1: warning: symbol 'armv6pmu_enable_event' was not declared. Should it be static? | arch/arm/kernel/perf_event.c:1113:1: warning: symbol 'armv6pmu_stop' was not declared. Should it be static? | arch/arm/kernel/perf_event.c:1956:6: warning: symbol 'armv7pmu_enable_event' was not declared. Should it be static? | arch/arm/kernel/perf_event.c:3072:14: warning: incorrect type in argument 1 (different address spaces) | arch/arm/kernel/perf_event.c:3072:14: expected void const volatile [noderef] * | arch/arm/kernel/perf_event.c:3072:14: got struct frame_tail *tail | arch/arm/kernel/perf_event.c:3074:49: warning: incorrect type in argument 2 (different address spaces) | arch/arm/kernel/perf_event.c:3074:49: expected void const [noderef] *from | arch/arm/kernel/perf_event.c:3074:49: got struct frame_tail *tail This patch resolves these issues so we can live in silence again. Reported-by: Russell King Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/perf_event.c | 18 +++++++++--------- arch/arm/kernel/perf_event_v6.c | 10 +++++----- arch/arm/kernel/perf_event_v7.c | 10 +++++----- arch/arm/kernel/perf_event_xscale.c | 8 ++++---- 4 files changed, 23 insertions(+), 23 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 421a4bb88fed..50c197bfac0f 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -32,7 +32,7 @@ static struct platform_device *pmu_device; * Hardware lock to serialize accesses to PMU registers. Needed for the * read/modify/write sequences. */ -DEFINE_SPINLOCK(pmu_lock); +static DEFINE_SPINLOCK(pmu_lock); /* * ARMv6 supports a maximum of 3 events, starting from index 1. If we add @@ -65,7 +65,7 @@ struct cpu_hw_events { */ unsigned long active_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)]; }; -DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); struct arm_pmu { enum arm_perf_pmu_ids id; @@ -673,17 +673,17 @@ arch_initcall(init_hw_perf_events); * This code has been adapted from the ARM OProfile support. */ struct frame_tail { - struct frame_tail *fp; - unsigned long sp; - unsigned long lr; + struct frame_tail __user *fp; + unsigned long sp; + unsigned long lr; } __attribute__((packed)); /* * Get the return address for a single stackframe and return a pointer to the * next frame tail. */ -static struct frame_tail * -user_backtrace(struct frame_tail *tail, +static struct frame_tail __user * +user_backtrace(struct frame_tail __user *tail, struct perf_callchain_entry *entry) { struct frame_tail buftail; @@ -709,10 +709,10 @@ user_backtrace(struct frame_tail *tail, void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { - struct frame_tail *tail; + struct frame_tail __user *tail; - tail = (struct frame_tail *)regs->ARM_fp - 1; + tail = (struct frame_tail __user *)regs->ARM_fp - 1; while (tail && !((unsigned long)tail & 0x3)) tail = user_backtrace(tail, entry); diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index 7aeb07da9076..3f427aae211d 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -400,7 +400,7 @@ armv6pmu_write_counter(int counter, WARN_ONCE(1, "invalid counter number (%d)\n", counter); } -void +static void armv6pmu_enable_event(struct hw_perf_event *hwc, int idx) { @@ -625,7 +625,7 @@ static const struct arm_pmu armv6pmu = { .max_period = (1LLU << 32) - 1, }; -const struct arm_pmu *__init armv6pmu_init(void) +static const struct arm_pmu *__init armv6pmu_init(void) { return &armv6pmu; } @@ -655,17 +655,17 @@ static const struct arm_pmu armv6mpcore_pmu = { .max_period = (1LLU << 32) - 1, }; -const struct arm_pmu *__init armv6mpcore_pmu_init(void) +static const struct arm_pmu *__init armv6mpcore_pmu_init(void) { return &armv6mpcore_pmu; } #else -const struct arm_pmu *__init armv6pmu_init(void) +static const struct arm_pmu *__init armv6pmu_init(void) { return NULL; } -const struct arm_pmu *__init armv6mpcore_pmu_init(void) +static const struct arm_pmu *__init armv6mpcore_pmu_init(void) { return NULL; } diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 4d0423969df9..a68ff1c10dec 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -681,7 +681,7 @@ static void armv7_pmnc_dump_regs(void) } #endif -void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) +static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) { unsigned long flags; @@ -874,7 +874,7 @@ static u32 __init armv7_reset_read_pmnc(void) return nb_cnt + 1; } -const struct arm_pmu *__init armv7_a8_pmu_init(void) +static const struct arm_pmu *__init armv7_a8_pmu_init(void) { armv7pmu.id = ARM_PERF_PMU_ID_CA8; armv7pmu.name = "ARMv7 Cortex-A8"; @@ -884,7 +884,7 @@ const struct arm_pmu *__init armv7_a8_pmu_init(void) return &armv7pmu; } -const struct arm_pmu *__init armv7_a9_pmu_init(void) +static const struct arm_pmu *__init armv7_a9_pmu_init(void) { armv7pmu.id = ARM_PERF_PMU_ID_CA9; armv7pmu.name = "ARMv7 Cortex-A9"; @@ -894,12 +894,12 @@ const struct arm_pmu *__init armv7_a9_pmu_init(void) return &armv7pmu; } #else -const struct arm_pmu *__init armv7_a8_pmu_init(void) +static const struct arm_pmu *__init armv7_a8_pmu_init(void) { return NULL; } -const struct arm_pmu *__init armv7_a9_pmu_init(void) +static const struct arm_pmu *__init armv7_a9_pmu_init(void) { return NULL; } diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 4e9592789d40..f14fbb6c345b 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -428,7 +428,7 @@ static const struct arm_pmu xscale1pmu = { .max_period = (1LLU << 32) - 1, }; -const struct arm_pmu *__init xscale1pmu_init(void) +static const struct arm_pmu *__init xscale1pmu_init(void) { return &xscale1pmu; } @@ -790,17 +790,17 @@ static const struct arm_pmu xscale2pmu = { .max_period = (1LLU << 32) - 1, }; -const struct arm_pmu *__init xscale2pmu_init(void) +static const struct arm_pmu *__init xscale2pmu_init(void) { return &xscale2pmu; } #else -const struct arm_pmu *__init xscale1pmu_init(void) +static const struct arm_pmu *__init xscale1pmu_init(void) { return NULL; } -const struct arm_pmu *__init xscale2pmu_init(void) +static const struct arm_pmu *__init xscale2pmu_init(void) { return NULL; } -- cgit v1.2.1 From 961ec6daa7b14f376c30d447a830fa4783a2112c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 2 Dec 2010 18:01:49 +0100 Subject: ARM: 6521/1: perf: use raw_spinlock_t for pmu_lock For kernels built with PREEMPT_RT, critical sections protected by standard spinlocks are preemptible. This is not acceptable on perf as (a) we may be scheduled onto a different CPU whilst reading/writing banked PMU registers and (b) the latency when reading the PMU registers becomes unpredictable. This patch upgrades the pmu_lock spinlock to a raw_spinlock instead. Reported-by: Jamie Iles Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/perf_event.c | 2 +- arch/arm/kernel/perf_event_v6.c | 20 ++++++++++---------- arch/arm/kernel/perf_event_v7.c | 16 ++++++++-------- arch/arm/kernel/perf_event_xscale.c | 32 ++++++++++++++++---------------- 4 files changed, 35 insertions(+), 35 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 50c197bfac0f..624e2a5de2b3 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -32,7 +32,7 @@ static struct platform_device *pmu_device; * Hardware lock to serialize accesses to PMU registers. Needed for the * read/modify/write sequences. */ -static DEFINE_SPINLOCK(pmu_lock); +static DEFINE_RAW_SPINLOCK(pmu_lock); /* * ARMv6 supports a maximum of 3 events, starting from index 1. If we add diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index 3f427aae211d..c058bfc8532b 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -426,12 +426,12 @@ armv6pmu_enable_event(struct hw_perf_event *hwc, * Mask out the current event and set the counter to count the event * that we're interested in. */ - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = armv6_pmcr_read(); val &= ~mask; val |= evt; armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static irqreturn_t @@ -500,11 +500,11 @@ armv6pmu_start(void) { unsigned long flags, val; - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = armv6_pmcr_read(); val |= ARMV6_PMCR_ENABLE; armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static void @@ -512,11 +512,11 @@ armv6pmu_stop(void) { unsigned long flags, val; - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = armv6_pmcr_read(); val &= ~ARMV6_PMCR_ENABLE; armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static int @@ -570,12 +570,12 @@ armv6pmu_disable_event(struct hw_perf_event *hwc, * of ETM bus signal assertion cycles. The external reporting should * be disabled and so this should never increment. */ - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = armv6_pmcr_read(); val &= ~mask; val |= evt; armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static void @@ -599,12 +599,12 @@ armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, * Unlike UP ARMv6, we don't have a way of stopping the counters. We * simply disable the interrupt reporting. */ - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = armv6_pmcr_read(); val &= ~mask; val |= evt; armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static const struct arm_pmu armv6pmu = { diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index a68ff1c10dec..2e1402556fa0 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -689,7 +689,7 @@ static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) * Enable counter and interrupt, and set the counter to count * the event that we're interested in. */ - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); /* * Disable counter @@ -713,7 +713,7 @@ static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) */ armv7_pmnc_enable_counter(idx); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx) @@ -723,7 +723,7 @@ static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx) /* * Disable counter and interrupt */ - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); /* * Disable counter @@ -735,7 +735,7 @@ static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx) */ armv7_pmnc_disable_intens(idx); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) @@ -805,20 +805,20 @@ static void armv7pmu_start(void) { unsigned long flags; - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); /* Enable all counters */ armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static void armv7pmu_stop(void) { unsigned long flags; - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); /* Disable all counters */ armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc, diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index f14fbb6c345b..28cd3b025bc3 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -291,12 +291,12 @@ xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx) return; } - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = xscale1pmu_read_pmnc(); val &= ~mask; val |= evt; xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static void @@ -322,12 +322,12 @@ xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx) return; } - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = xscale1pmu_read_pmnc(); val &= ~mask; val |= evt; xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static int @@ -355,11 +355,11 @@ xscale1pmu_start(void) { unsigned long flags, val; - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = xscale1pmu_read_pmnc(); val |= XSCALE_PMU_ENABLE; xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static void @@ -367,11 +367,11 @@ xscale1pmu_stop(void) { unsigned long flags, val; - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = xscale1pmu_read_pmnc(); val &= ~XSCALE_PMU_ENABLE; xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static inline u32 @@ -635,10 +635,10 @@ xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx) return; } - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); xscale2pmu_write_event_select(evtsel); xscale2pmu_write_int_enable(ien); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static void @@ -678,10 +678,10 @@ xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx) return; } - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); xscale2pmu_write_event_select(evtsel); xscale2pmu_write_int_enable(ien); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static int @@ -705,11 +705,11 @@ xscale2pmu_start(void) { unsigned long flags, val; - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; val |= XSCALE_PMU_ENABLE; xscale2pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static void @@ -717,11 +717,11 @@ xscale2pmu_stop(void) { unsigned long flags, val; - spin_lock_irqsave(&pmu_lock, flags); + raw_spin_lock_irqsave(&pmu_lock, flags); val = xscale2pmu_read_pmnc(); val &= ~XSCALE_PMU_ENABLE; xscale2pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); + raw_spin_unlock_irqrestore(&pmu_lock, flags); } static inline u32 -- cgit v1.2.1 From daf8741675562197d4fb4c4e9d773f53494203a5 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 4 Dec 2010 17:08:32 +0000 Subject: ARM: implement support for read-mostly sections As our SMP implementation uses MESI protocols. Grouping together data which is mostly only read together means that we avoid unnecessary cache line bouncing when this code shares a cache line with other data. In other words, cache lines associated with read-mostly data are expected to spend most of their time in shared state. Signed-off-by: Russell King --- arch/arm/kernel/vmlinux.lds.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index cead8893b46b..1581f6d18cca 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -167,6 +167,7 @@ SECTIONS NOSAVE_DATA CACHELINE_ALIGNED_DATA(32) + READ_MOSTLY_DATA(32) /* * The exception fixup table (might need resorting at runtime) -- cgit v1.2.1 From 0385ebc0c9b16fc2d8262c082843165313f7b1e4 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 4 Dec 2010 17:45:55 +0000 Subject: ARM: move high-usage mostly read variables in setup.c to __read_mostly Signed-off-by: Russell King --- arch/arm/kernel/setup.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 336f14e0e5c2..8075e592f902 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -75,9 +75,9 @@ extern void reboot_setup(char *str); unsigned int processor_id; EXPORT_SYMBOL(processor_id); -unsigned int __machine_arch_type; +unsigned int __machine_arch_type __read_mostly; EXPORT_SYMBOL(__machine_arch_type); -unsigned int cacheid; +unsigned int cacheid __read_mostly; EXPORT_SYMBOL(cacheid); unsigned int __atags_pointer __initdata; @@ -91,24 +91,24 @@ EXPORT_SYMBOL(system_serial_low); unsigned int system_serial_high; EXPORT_SYMBOL(system_serial_high); -unsigned int elf_hwcap; +unsigned int elf_hwcap __read_mostly; EXPORT_SYMBOL(elf_hwcap); #ifdef MULTI_CPU -struct processor processor; +struct processor processor __read_mostly; #endif #ifdef MULTI_TLB -struct cpu_tlb_fns cpu_tlb; +struct cpu_tlb_fns cpu_tlb __read_mostly; #endif #ifdef MULTI_USER -struct cpu_user_fns cpu_user; +struct cpu_user_fns cpu_user __read_mostly; #endif #ifdef MULTI_CACHE -struct cpu_cache_fns cpu_cache; +struct cpu_cache_fns cpu_cache __read_mostly; #endif #ifdef CONFIG_OUTER_CACHE -struct outer_cache_fns outer_cache; +struct outer_cache_fns outer_cache __read_mostly; EXPORT_SYMBOL(outer_cache); #endif -- cgit v1.2.1 From ac88e07122fc0eb5cbad403be97ef02c317a06b7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 24 Nov 2010 16:51:17 +0000 Subject: ARM: hw_breakpoint: ensure OS lock is clear before writing to debug registers ARMv7 architects a system for saving and restoring the debug registers across low-power modes. At the heart of this system is a lock register which, when set, forbids writes to the debug registers. While locked, writes to debug registers via the co-processor interface will result in undefined instruction traps. Linux currently doesn't make use of this feature because we update the debug registers on context switch anyway, however the status of the lock is IMPLEMENTATION DEFINED on reset. This patch ensures that the lock is cleared during boot so that we can write to the debug registers safely. Signed-off-by: Will Deacon --- arch/arm/kernel/hw_breakpoint.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 21e3a4ab3b8c..793959ec8982 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -768,6 +768,23 @@ static void __init reset_ctrl_regs(void *unused) { int i; + /* + * v7 debug contains save and restore registers so that debug state + * can be maintained across low-power modes without leaving + * the debug logic powered up. It is IMPLEMENTATION DEFINED whether + * we can write to the debug registers out of reset, so we must + * unlock the OS Lock Access Register to avoid taking undefined + * instruction exceptions later on. + */ + if (debug_arch >= ARM_DEBUG_ARCH_V7_ECP14) { + /* + * Unconditionally clear the lock by writing a value + * other than 0xC5ACCE55 to the access register. + */ + asm volatile("mcr p14, 0, %0, c1, c0, 4" : : "r" (0)); + isb(); + } + if (enable_monitor_mode()) return; @@ -810,17 +827,17 @@ static int __init arch_hw_breakpoint_init(void) pr_warning("halting debug mode enabled. Assuming maximum " "watchpoint size of 4 bytes."); } else { - /* Work out the maximum supported watchpoint length. */ - max_watchpoint_len = get_max_wp_len(); - pr_info("maximum watchpoint size is %u bytes.\n", - max_watchpoint_len); - /* * Reset the breakpoint resources. We assume that a halting * debugger will leave the world in a nice state for us. */ smp_call_function(reset_ctrl_regs, NULL, 1); reset_ctrl_regs(NULL); + + /* Work out the maximum supported watchpoint length. */ + max_watchpoint_len = get_max_wp_len(); + pr_info("maximum watchpoint size is %u bytes.\n", + max_watchpoint_len); } /* Register debug fault handler. */ -- cgit v1.2.1 From 7d99331e4793b52d488e911876ef11d843c6c8c9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 24 Nov 2010 17:45:49 +0000 Subject: ARM: hw_breakpoint: reset control registers in hotplug path The ARMv7 debug architecture doesn't make any guarantees about the contents of debug control registers following a debug logic reset. This patch ensures that we reset the control registers when a cpu comes ONLINE (for example, with hotplug) so that when we enable monitor mode while inserting a breakpoint we won't exhibit random behaviour. Signed-off-by: Will Deacon --- arch/arm/kernel/hw_breakpoint.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 793959ec8982..515a3c44c118 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -764,7 +764,7 @@ out: /* * One-time initialisation. */ -static void __init reset_ctrl_regs(void *unused) +static void reset_ctrl_regs(void *unused) { int i; @@ -799,6 +799,18 @@ static void __init reset_ctrl_regs(void *unused) } } +static int __cpuinit dbg_reset_notify(struct notifier_block *self, + unsigned long action, void *cpu) +{ + if (action == CPU_ONLINE) + smp_call_function_single((int)cpu, reset_ctrl_regs, NULL, 1); + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata dbg_reset_nb = { + .notifier_call = dbg_reset_notify, +}; + static int __init arch_hw_breakpoint_init(void) { int ret = 0; @@ -846,6 +858,8 @@ static int __init arch_hw_breakpoint_init(void) hook_ifault_code(2, hw_breakpoint_pending, SIGTRAP, TRAP_HWBKPT, "breakpoint debug exception"); + /* Register hotplug notifier. */ + register_cpu_notifier(&dbg_reset_nb); out: return ret; } -- cgit v1.2.1 From 6ee33c2712fcdff2568d9bbadb25c8e5a7c36212 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 25 Nov 2010 12:01:54 +0000 Subject: ARM: hw_breakpoint: correct and simplify alignment fixup code The current hw_breakpoint code tries to fix up the alignment of breakpoints so that we can make use of sparse byte-address-select bits in the control register and give the illusion that we can set breakpoints on unaligned addresses. Although this works on v6 cores, v7 forbids this behaviour, instead requiring breakpoints to be set on aligned addresses and have contiguous byte-address-select ranges depending on the instruction set in use. For ARM the only supported size is 4 bytes, whilst Thumb-2 also permits 2 byte breakpoints (watchpoints can be of 1, 2, 4 or 8 bytes long). This patch simplifies the alignment fixup code so that we require addresses to be aligned to the size of the corresponding breakpoint. This allows us to handle the common case of breaking on a half-word aligned Thumb-2 instruction and also allows us to set byte watchpoints on arbitrary addresses. Signed-off-by: Will Deacon --- arch/arm/kernel/hw_breakpoint.c | 57 ++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 26 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 515a3c44c118..d37ed3501e57 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -537,6 +537,17 @@ static int arch_build_bp_info(struct perf_event *bp) return -EINVAL; } + /* + * Breakpoints must be of length 2 (thumb) or 4 (ARM) bytes. + * Watchpoints can be of length 1, 2, 4 or 8 bytes if supported + * by the hardware and must be aligned to the appropriate number of + * bytes. + */ + if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE && + info->ctrl.len != ARM_BREAKPOINT_LEN_2 && + info->ctrl.len != ARM_BREAKPOINT_LEN_4) + return -EINVAL; + /* Address */ info->address = bp->attr.bp_addr; @@ -561,7 +572,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) { struct arch_hw_breakpoint *info = counter_arch_bp(bp); int ret = 0; - u32 bytelen, max_len, offset, alignment_mask = 0x3; + u32 offset, alignment_mask = 0x3; /* Build the arch_hw_breakpoint. */ ret = arch_build_bp_info(bp); @@ -571,32 +582,27 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) /* Check address alignment. */ if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) alignment_mask = 0x7; - if (info->address & alignment_mask) { - /* - * Try to fix the alignment. This may result in a length - * that is too large, so we must check for that. - */ - bytelen = get_hbp_len(info->ctrl.len); - max_len = info->ctrl.type == ARM_BREAKPOINT_EXECUTE ? 4 : - max_watchpoint_len; - - if (max_len >= 8) - offset = info->address & 0x7; - else - offset = info->address & 0x3; - - if (bytelen > (1 << ((max_len - (offset + 1)) >> 1))) { - ret = -EFBIG; - goto out; - } - - info->ctrl.len <<= offset; - info->address &= ~offset; - - pr_debug("breakpoint alignment fixup: length = 0x%x, " - "address = 0x%x\n", info->ctrl.len, info->address); + offset = info->address & alignment_mask; + switch (offset) { + case 0: + /* Aligned */ + break; + case 1: + /* Allow single byte watchpoint. */ + if (info->ctrl.len == ARM_BREAKPOINT_LEN_1) + break; + case 2: + /* Allow halfword watchpoints and breakpoints. */ + if (info->ctrl.len == ARM_BREAKPOINT_LEN_2) + break; + default: + ret = -EINVAL; + goto out; } + info->address &= ~alignment_mask; + info->ctrl.len <<= offset; + /* * Currently we rely on an overflow handler to take * care of single-stepping the breakpoint when it fires. @@ -607,7 +613,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) (arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_bps()), "overflow handler required but none found")) { ret = -EINVAL; - goto out; } out: return ret; -- cgit v1.2.1 From 7e20269647169e7ea08a62bdc4979a3ba32e615c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sun, 28 Nov 2010 14:57:24 +0000 Subject: ARM: hw_breakpoint: disable preemption during debug exception handling On ARM, debug exceptions occur in the form of data or prefetch aborts. One difference is that debug exceptions require access to per-cpu banked registers and data structures which are not saved in the low-level exception code. For kernels built with CONFIG_PREEMPT, there is an unlikely scenario that the debug handler ends up running on a different CPU from the one that originally signalled the event, resulting in random data being read from the wrong registers. This patch adds a debug_entry macro to the low-level exception handling code which checks whether the taken exception is a debug exception. If it is, the preempt count for the faulting process is incremented. After the debug handler has finished, the count is decremented. Acked-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm/kernel/entry-armv.S | 4 ++++ arch/arm/kernel/entry-header.S | 19 +++++++++++++++++++ arch/arm/kernel/hw_breakpoint.c | 18 +++++++++++++----- 3 files changed, 36 insertions(+), 5 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index c09e3573c5de..34bbef0d2e7e 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -198,6 +198,7 @@ __dabt_svc: @ @ set desired IRQ state, then call main handler @ + debug_entry r1 msr cpsr_c, r9 mov r2, sp bl do_DataAbort @@ -324,6 +325,7 @@ __pabt_svc: #else bl CPU_PABORT_HANDLER #endif + debug_entry r1 msr cpsr_c, r9 @ Maybe enable interrupts mov r2, sp @ regs bl do_PrefetchAbort @ call abort handler @@ -439,6 +441,7 @@ __dabt_usr: @ @ IRQs on, then call the main handler @ + debug_entry r1 enable_irq mov r2, sp adr lr, BSYM(ret_from_exception) @@ -703,6 +706,7 @@ __pabt_usr: #else bl CPU_PABORT_HANDLER #endif + debug_entry r1 enable_irq @ Enable interrupts mov r2, sp @ regs bl do_PrefetchAbort @ call abort handler diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index d93f976fb389..ae9464900168 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -165,6 +165,25 @@ .endm #endif /* !CONFIG_THUMB2_KERNEL */ + @ + @ Debug exceptions are taken as prefetch or data aborts. + @ We must disable preemption during the handler so that + @ we can access the debug registers safely. + @ + .macro debug_entry, fsr +#if defined(CONFIG_HAVE_HW_BREAKPOINT) && defined(CONFIG_PREEMPT) + ldr r4, =0x40f @ mask out fsr.fs + and r5, r4, \fsr + cmp r5, #2 @ debug exception + bne 1f + get_thread_info r10 + ldr r6, [r10, #TI_PREEMPT] @ get preempt count + add r11, r6, #1 @ increment it + str r11, [r10, #TI_PREEMPT] +1: +#endif + .endm + /* * These are the registers used in the syscall handler, and allow us to * have in theory up to 7 arguments to a function - r0 to r6. diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index d37ed3501e57..eeba38008032 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -24,6 +24,7 @@ #define pr_fmt(fmt) "hw-breakpoint: " fmt #include +#include #include #include #include @@ -736,14 +737,17 @@ unlock: /* * Called from either the Data Abort Handler [watchpoint] or the - * Prefetch Abort Handler [breakpoint]. + * Prefetch Abort Handler [breakpoint] with preemption disabled. */ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { - int ret = 1; /* Unhandled fault. */ + int ret = 0; u32 dscr; + /* We must be called with preemption disabled. */ + WARN_ON(preemptible()); + /* We only handle watchpoints and hardware breakpoints. */ ARM_DBG_READ(c1, 0, dscr); @@ -758,11 +762,15 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr, watchpoint_handler(addr, regs); break; default: - goto out; + ret = 1; /* Unhandled fault. */ } - ret = 0; -out: + /* + * Re-enable preemption after it was disabled in the + * low-level exception handling code. + */ + preempt_enable(); + return ret; } -- cgit v1.2.1 From 0017ff42ac37ff6aeb87d0b006c5d32b9a39f5fc Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sun, 28 Nov 2010 15:09:36 +0000 Subject: ARM: hw_breakpoint: don't advertise reserved breakpoints To permit handling of watchpoint exceptions without signalling a debugger, it is necessary to reserve breakpoint registers for in-kernel use only. This patch ensures that we record and subtract the number of reserved breakpoints from the number of usable breakpoint registers that we advertise to userspace via the ptrace API. Signed-off-by: Will Deacon --- arch/arm/kernel/hw_breakpoint.c | 206 +++++++++++++++++++++++----------------- 1 file changed, 117 insertions(+), 89 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index eeba38008032..b16c4568cb01 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -45,6 +45,7 @@ static DEFINE_PER_CPU(struct perf_event *, wp_on_reg[ARM_MAX_WRP]); /* Number of BRP/WRP registers on this CPU. */ static int core_num_brps; +static int core_num_reserved_brps; static int core_num_wrps; /* Debug architecture version. */ @@ -53,87 +54,6 @@ static u8 debug_arch; /* Maximum supported watchpoint length. */ static u8 max_watchpoint_len; -/* Determine number of BRP registers available. */ -static int get_num_brps(void) -{ - u32 didr; - ARM_DBG_READ(c0, 0, didr); - return ((didr >> 24) & 0xf) + 1; -} - -/* Determine number of WRP registers available. */ -static int get_num_wrps(void) -{ - /* - * FIXME: When a watchpoint fires, the only way to work out which - * watchpoint it was is by disassembling the faulting instruction - * and working out the address of the memory access. - * - * Furthermore, we can only do this if the watchpoint was precise - * since imprecise watchpoints prevent us from calculating register - * based addresses. - * - * For the time being, we only report 1 watchpoint register so we - * always know which watchpoint fired. In the future we can either - * add a disassembler and address generation emulator, or we can - * insert a check to see if the DFAR is set on watchpoint exception - * entry [the ARM ARM states that the DFAR is UNKNOWN, but - * experience shows that it is set on some implementations]. - */ - -#if 0 - u32 didr, wrps; - ARM_DBG_READ(c0, 0, didr); - return ((didr >> 28) & 0xf) + 1; -#endif - - return 1; -} - -int hw_breakpoint_slots(int type) -{ - /* - * We can be called early, so don't rely on - * our static variables being initialised. - */ - switch (type) { - case TYPE_INST: - return get_num_brps(); - case TYPE_DATA: - return get_num_wrps(); - default: - pr_warning("unknown slot type: %d\n", type); - return 0; - } -} - -/* Determine debug architecture. */ -static u8 get_debug_arch(void) -{ - u32 didr; - - /* Do we implement the extended CPUID interface? */ - if (((read_cpuid_id() >> 16) & 0xf) != 0xf) { - pr_warning("CPUID feature registers not supported. " - "Assuming v6 debug is present.\n"); - return ARM_DEBUG_ARCH_V6; - } - - ARM_DBG_READ(c0, 0, didr); - return (didr >> 16) & 0xf; -} - -/* Does this core support mismatch breakpoints? */ -static int core_has_mismatch_bps(void) -{ - return debug_arch >= ARM_DEBUG_ARCH_V7_ECP14 && core_num_brps > 1; -} - -u8 arch_get_debug_arch(void) -{ - return debug_arch; -} - #define READ_WB_REG_CASE(OP2, M, VAL) \ case ((OP2 << 4) + M): \ ARM_DBG_READ(c ## M, OP2, VAL); \ @@ -211,6 +131,111 @@ static void write_wb_reg(int n, u32 val) isb(); } +/* Determine debug architecture. */ +static u8 get_debug_arch(void) +{ + u32 didr; + + /* Do we implement the extended CPUID interface? */ + if (((read_cpuid_id() >> 16) & 0xf) != 0xf) { + pr_warning("CPUID feature registers not supported. " + "Assuming v6 debug is present.\n"); + return ARM_DEBUG_ARCH_V6; + } + + ARM_DBG_READ(c0, 0, didr); + return (didr >> 16) & 0xf; +} + +u8 arch_get_debug_arch(void) +{ + return debug_arch; +} + +/* Determine number of BRP register available. */ +static int get_num_brp_resources(void) +{ + u32 didr; + ARM_DBG_READ(c0, 0, didr); + return ((didr >> 24) & 0xf) + 1; +} + +/* Does this core support mismatch breakpoints? */ +static int core_has_mismatch_brps(void) +{ + return (get_debug_arch() >= ARM_DEBUG_ARCH_V7_ECP14 && + get_num_brp_resources() > 1); +} + +/* Determine number of usable WRPs available. */ +static int get_num_wrps(void) +{ + /* + * FIXME: When a watchpoint fires, the only way to work out which + * watchpoint it was is by disassembling the faulting instruction + * and working out the address of the memory access. + * + * Furthermore, we can only do this if the watchpoint was precise + * since imprecise watchpoints prevent us from calculating register + * based addresses. + * + * Providing we have more than 1 breakpoint register, we only report + * a single watchpoint register for the time being. This way, we always + * know which watchpoint fired. In the future we can either add a + * disassembler and address generation emulator, or we can insert a + * check to see if the DFAR is set on watchpoint exception entry + * [the ARM ARM states that the DFAR is UNKNOWN, but experience shows + * that it is set on some implementations]. + */ + +#if 0 + int wrps; + u32 didr; + ARM_DBG_READ(c0, 0, didr); + wrps = ((didr >> 28) & 0xf) + 1; +#endif + int wrps = 1; + + if (core_has_mismatch_brps() && wrps >= get_num_brp_resources()) + wrps = get_num_brp_resources() - 1; + + return wrps; +} + +/* We reserve one breakpoint for each watchpoint. */ +static int get_num_reserved_brps(void) +{ + if (core_has_mismatch_brps()) + return get_num_wrps(); + return 0; +} + +/* Determine number of usable BRPs available. */ +static int get_num_brps(void) +{ + int brps = get_num_brp_resources(); + if (core_has_mismatch_brps()) + brps -= get_num_reserved_brps(); + return brps; +} + +int hw_breakpoint_slots(int type) +{ + /* + * We can be called early, so don't rely on + * our static variables being initialised. + */ + switch (type) { + case TYPE_INST: + return get_num_brps(); + case TYPE_DATA: + return get_num_wrps(); + default: + pr_warning("unknown slot type: %d\n", type); + return 0; + } +} + /* * In order to access the breakpoint/watchpoint control registers, * we must be running in debug monitor mode. Unfortunately, we can @@ -326,7 +351,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) ctrl_base = ARM_BASE_BCR; val_base = ARM_BASE_BVR; slots = __get_cpu_var(bp_on_reg); - max_slots = core_num_brps - 1; + max_slots = core_num_brps; if (bp_is_single_step(bp)) { info->ctrl.mismatch = 1; @@ -377,7 +402,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) /* Breakpoint */ base = ARM_BASE_BCR; slots = __get_cpu_var(bp_on_reg); - max_slots = core_num_brps - 1; + max_slots = core_num_brps; if (bp_is_single_step(bp)) { i = max_slots; @@ -611,7 +636,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) * we can use the mismatch feature as a poor-man's hardware single-step. */ if (WARN_ONCE(!bp->overflow_handler && - (arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_bps()), + (arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_brps()), "overflow handler required but none found")) { ret = -EINVAL; } @@ -698,7 +723,7 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) /* The exception entry code places the amended lr in the PC. */ addr = regs->ARM_pc; - for (i = 0; i < core_num_brps; ++i) { + for (i = 0; i < core_num_brps + core_num_reserved_brps; ++i) { rcu_read_lock(); bp = slots[i]; @@ -801,7 +826,8 @@ static void reset_ctrl_regs(void *unused) if (enable_monitor_mode()) return; - for (i = 0; i < core_num_brps; ++i) { + /* We must also reset any reserved registers. */ + for (i = 0; i < core_num_brps + core_num_reserved_brps; ++i) { write_wb_reg(ARM_BASE_BCR + i, 0UL); write_wb_reg(ARM_BASE_BVR + i, 0UL); } @@ -839,13 +865,15 @@ static int __init arch_hw_breakpoint_init(void) /* Determine how many BRPs/WRPs are available. */ core_num_brps = get_num_brps(); + core_num_reserved_brps = get_num_reserved_brps(); core_num_wrps = get_num_wrps(); pr_info("found %d breakpoint and %d watchpoint registers.\n", - core_num_brps, core_num_wrps); + core_num_brps + core_num_reserved_brps, core_num_wrps); - if (core_has_mismatch_bps()) - pr_info("1 breakpoint reserved for watchpoint single-step.\n"); + if (core_num_reserved_brps) + pr_info("%d breakpoint(s) reserved for watchpoint " + "single-step.\n", core_num_reserved_brps); ARM_DBG_READ(c1, 0, dscr); if (dscr & ARM_DSCR_HDBGEN) { -- cgit v1.2.1 From 93a04a3416da12647c47840ebe2bb812fcb801d0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 29 Nov 2010 16:56:01 +0000 Subject: ARM: hw_breakpoint: do not allocate new breakpoints with preemption disabled The watchpoint single-stepping code calls register_user_hw_breakpoint to register a mismatch breakpoint for stepping over the watchpoint. This is performed with preemption disabled, which is unsafe as we may end up scheduling whilst in_atomic(). Furthermore, using the perf API is rather overkill since we are already in the hw-breakpoint backend and only require access to reserved breakpoints anyway. This patch reworks the watchpoint stepping code so that we don't require another perf_event for the mismatch breakpoint. Instead, we hold a separate arch_hw_breakpoint_ctrl struct inside the watchpoint which is used exclusively for stepping. We can check whether or not stepping is enabled when installing or uninstalling the watchpoint and operate on the breakpoint accordingly. Signed-off-by: Will Deacon --- arch/arm/kernel/hw_breakpoint.c | 134 +++++++++++++++++++++++----------------- 1 file changed, 77 insertions(+), 57 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index b16c4568cb01..81b63e94dfe0 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -315,23 +315,6 @@ u8 arch_get_max_wp_len(void) return max_watchpoint_len; } -/* - * Handler for reactivating a suspended watchpoint when the single - * step `mismatch' breakpoint is triggered. - */ -static void wp_single_step_handler(struct perf_event *bp, int unused, - struct perf_sample_data *data, - struct pt_regs *regs) -{ - perf_event_enable(counter_arch_bp(bp)->suspended_wp); - unregister_hw_breakpoint(bp); -} - -static int bp_is_single_step(struct perf_event *bp) -{ - return bp->overflow_handler == wp_single_step_handler; -} - /* * Install a perf counter breakpoint. */ @@ -340,29 +323,35 @@ int arch_install_hw_breakpoint(struct perf_event *bp) struct arch_hw_breakpoint *info = counter_arch_bp(bp); struct perf_event **slot, **slots; int i, max_slots, ctrl_base, val_base, ret = 0; + u32 addr, ctrl; /* Ensure that we are in monitor mode and halting mode is disabled. */ ret = enable_monitor_mode(); if (ret) goto out; + addr = info->address; + ctrl = encode_ctrl_reg(info->ctrl) | 0x1; + if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { /* Breakpoint */ ctrl_base = ARM_BASE_BCR; val_base = ARM_BASE_BVR; slots = __get_cpu_var(bp_on_reg); max_slots = core_num_brps; - - if (bp_is_single_step(bp)) { - info->ctrl.mismatch = 1; - i = max_slots; - slots[i] = bp; - goto setup; - } } else { /* Watchpoint */ - ctrl_base = ARM_BASE_WCR; - val_base = ARM_BASE_WVR; + if (info->step_ctrl.enabled) { + /* Install into the reserved breakpoint region. */ + ctrl_base = ARM_BASE_BCR + core_num_brps; + val_base = ARM_BASE_BVR + core_num_brps; + /* Override the watchpoint data with the step data. */ + addr = info->trigger & ~0x3; + ctrl = encode_ctrl_reg(info->step_ctrl); + } else { + ctrl_base = ARM_BASE_WCR; + val_base = ARM_BASE_WVR; + } slots = __get_cpu_var(wp_on_reg); max_slots = core_num_wrps; } @@ -381,12 +370,11 @@ int arch_install_hw_breakpoint(struct perf_event *bp) goto out; } -setup: /* Setup the address register. */ - write_wb_reg(val_base + i, info->address); + write_wb_reg(val_base + i, addr); /* Setup the control register. */ - write_wb_reg(ctrl_base + i, encode_ctrl_reg(info->ctrl) | 0x1); + write_wb_reg(ctrl_base + i, ctrl); out: return ret; @@ -403,15 +391,12 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) base = ARM_BASE_BCR; slots = __get_cpu_var(bp_on_reg); max_slots = core_num_brps; - - if (bp_is_single_step(bp)) { - i = max_slots; - slots[i] = NULL; - goto reset; - } } else { /* Watchpoint */ - base = ARM_BASE_WCR; + if (info->step_ctrl.enabled) + base = ARM_BASE_BCR + core_num_brps; + else + base = ARM_BASE_WCR; slots = __get_cpu_var(wp_on_reg); max_slots = core_num_wrps; } @@ -429,7 +414,6 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot")) return; -reset: /* Reset the control register. */ write_wb_reg(base + i, 0); } @@ -579,7 +563,7 @@ static int arch_build_bp_info(struct perf_event *bp) /* Privilege */ info->ctrl.privilege = ARM_BREAKPOINT_USER; - if (arch_check_bp_in_kernelspace(bp) && !bp_is_single_step(bp)) + if (arch_check_bp_in_kernelspace(bp)) info->ctrl.privilege |= ARM_BREAKPOINT_PRIV; /* Enabled? */ @@ -664,22 +648,18 @@ static void update_mismatch_flag(int idx, int flag) static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs) { int i; - struct perf_event *bp, **slots = __get_cpu_var(wp_on_reg); + struct perf_event *wp, **slots = __get_cpu_var(wp_on_reg); struct arch_hw_breakpoint *info; - struct perf_event_attr attr; /* Without a disassembler, we can only handle 1 watchpoint. */ BUG_ON(core_num_wrps > 1); - hw_breakpoint_init(&attr); - attr.bp_addr = regs->ARM_pc & ~0x3; - attr.bp_len = HW_BREAKPOINT_LEN_4; - attr.bp_type = HW_BREAKPOINT_X; - for (i = 0; i < core_num_wrps; ++i) { rcu_read_lock(); - if (slots[i] == NULL) { + wp = slots[i]; + + if (wp == NULL) { rcu_read_unlock(); continue; } @@ -689,24 +669,60 @@ static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs) * single watchpoint, we can set the trigger to the lowest * possible faulting address. */ - info = counter_arch_bp(slots[i]); - info->trigger = slots[i]->attr.bp_addr; + info = counter_arch_bp(wp); + info->trigger = wp->attr.bp_addr; pr_debug("watchpoint fired: address = 0x%x\n", info->trigger); - perf_bp_event(slots[i], regs); + perf_bp_event(wp, regs); /* * If no overflow handler is present, insert a temporary * mismatch breakpoint so we can single-step over the * watchpoint trigger. */ - if (!slots[i]->overflow_handler) { - bp = register_user_hw_breakpoint(&attr, - wp_single_step_handler, - current); - counter_arch_bp(bp)->suspended_wp = slots[i]; - perf_event_disable(slots[i]); + if (!wp->overflow_handler) { + arch_uninstall_hw_breakpoint(wp); + info->step_ctrl.mismatch = 1; + info->step_ctrl.len = ARM_BREAKPOINT_LEN_4; + info->step_ctrl.type = ARM_BREAKPOINT_EXECUTE; + info->step_ctrl.privilege = info->ctrl.privilege; + info->step_ctrl.enabled = 1; + info->trigger = regs->ARM_pc; + arch_install_hw_breakpoint(wp); + } + + rcu_read_unlock(); + } +} + +static void watchpoint_single_step_handler(unsigned long pc) +{ + int i; + struct perf_event *wp, **slots = __get_cpu_var(wp_on_reg); + struct arch_hw_breakpoint *info; + + for (i = 0; i < core_num_reserved_brps; ++i) { + rcu_read_lock(); + + wp = slots[i]; + + if (wp == NULL) + goto unlock; + + info = counter_arch_bp(wp); + if (!info->step_ctrl.enabled) + goto unlock; + + /* + * Restore the original watchpoint if we've completed the + * single-step. + */ + if (info->trigger != pc) { + arch_uninstall_hw_breakpoint(wp); + info->step_ctrl.enabled = 0; + arch_install_hw_breakpoint(wp); } +unlock: rcu_read_unlock(); } } @@ -723,7 +739,8 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) /* The exception entry code places the amended lr in the PC. */ addr = regs->ARM_pc; - for (i = 0; i < core_num_brps + core_num_reserved_brps; ++i) { + /* Check the currently installed breakpoints first. */ + for (i = 0; i < core_num_brps; ++i) { rcu_read_lock(); bp = slots[i]; @@ -750,7 +767,7 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) } unlock: - if ((mismatch && !info->ctrl.mismatch) || bp_is_single_step(bp)) { + if (mismatch && !info->ctrl.mismatch) { pr_debug("breakpoint fired: address = 0x%x\n", addr); perf_bp_event(bp, regs); } @@ -758,6 +775,9 @@ unlock: update_mismatch_flag(i, mismatch); rcu_read_unlock(); } + + /* Handle any pending watchpoint single-step breakpoints. */ + watchpoint_single_step_handler(addr); } /* -- cgit v1.2.1 From 9ebb3cbcc39d4e61ae6751167086acfb5c201e6f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 1 Dec 2010 14:12:13 +0000 Subject: ARM: hw_breakpoint: unify single-stepping code for watchpoints and breakpoints The single-stepping code is currently different depending on whether we are stepping over a breakpoint or a watchpoint. There is no good reason for this, so let's sort it out. This patch adds functions for enabling/disabling single-step for a particular hw_breakpoint and integrates this with the exception handling code. Signed-off-by: Will Deacon --- arch/arm/kernel/hw_breakpoint.c | 81 ++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 41 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 81b63e94dfe0..36cd7680d3d2 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -339,6 +339,11 @@ int arch_install_hw_breakpoint(struct perf_event *bp) val_base = ARM_BASE_BVR; slots = __get_cpu_var(bp_on_reg); max_slots = core_num_brps; + if (info->step_ctrl.enabled) { + /* Override the breakpoint data with the step data. */ + addr = info->trigger & ~0x3; + ctrl = encode_ctrl_reg(info->step_ctrl); + } } else { /* Watchpoint */ if (info->step_ctrl.enabled) { @@ -628,21 +633,28 @@ out: return ret; } -static void update_mismatch_flag(int idx, int flag) +/* + * Enable/disable single-stepping over the breakpoint bp at address addr. + */ +static void enable_single_step(struct perf_event *bp, u32 addr) { - struct perf_event *bp = __get_cpu_var(bp_on_reg[idx]); - struct arch_hw_breakpoint *info; - - if (bp == NULL) - return; + struct arch_hw_breakpoint *info = counter_arch_bp(bp); - info = counter_arch_bp(bp); + arch_uninstall_hw_breakpoint(bp); + info->step_ctrl.mismatch = 1; + info->step_ctrl.len = ARM_BREAKPOINT_LEN_4; + info->step_ctrl.type = ARM_BREAKPOINT_EXECUTE; + info->step_ctrl.privilege = info->ctrl.privilege; + info->step_ctrl.enabled = 1; + info->trigger = addr; + arch_install_hw_breakpoint(bp); +} - /* Update the mismatch field to enter/exit `single-step' mode */ - if (!bp->overflow_handler && info->ctrl.mismatch != flag) { - info->ctrl.mismatch = flag; - write_wb_reg(ARM_BASE_BCR + idx, encode_ctrl_reg(info->ctrl) | 0x1); - } +static void disable_single_step(struct perf_event *bp) +{ + arch_uninstall_hw_breakpoint(bp); + counter_arch_bp(bp)->step_ctrl.enabled = 0; + arch_install_hw_breakpoint(bp); } static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs) @@ -679,16 +691,8 @@ static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs) * mismatch breakpoint so we can single-step over the * watchpoint trigger. */ - if (!wp->overflow_handler) { - arch_uninstall_hw_breakpoint(wp); - info->step_ctrl.mismatch = 1; - info->step_ctrl.len = ARM_BREAKPOINT_LEN_4; - info->step_ctrl.type = ARM_BREAKPOINT_EXECUTE; - info->step_ctrl.privilege = info->ctrl.privilege; - info->step_ctrl.enabled = 1; - info->trigger = regs->ARM_pc; - arch_install_hw_breakpoint(wp); - } + if (!wp->overflow_handler) + enable_single_step(wp, instruction_pointer(regs)); rcu_read_unlock(); } @@ -716,11 +720,8 @@ static void watchpoint_single_step_handler(unsigned long pc) * Restore the original watchpoint if we've completed the * single-step. */ - if (info->trigger != pc) { - arch_uninstall_hw_breakpoint(wp); - info->step_ctrl.enabled = 0; - arch_install_hw_breakpoint(wp); - } + if (info->trigger != pc) + disable_single_step(wp); unlock: rcu_read_unlock(); @@ -730,7 +731,6 @@ unlock: static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) { int i; - int mismatch; u32 ctrl_reg, val, addr; struct perf_event *bp, **slots = __get_cpu_var(bp_on_reg); struct arch_hw_breakpoint *info; @@ -745,34 +745,33 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) bp = slots[i]; - if (bp == NULL) { - rcu_read_unlock(); - continue; - } + if (bp == NULL) + goto unlock; - mismatch = 0; + info = counter_arch_bp(bp); /* Check if the breakpoint value matches. */ val = read_wb_reg(ARM_BASE_BVR + i); if (val != (addr & ~0x3)) - goto unlock; + goto mismatch; /* Possible match, check the byte address select to confirm. */ ctrl_reg = read_wb_reg(ARM_BASE_BCR + i); decode_ctrl_reg(ctrl_reg, &ctrl); if ((1 << (addr & 0x3)) & ctrl.len) { - mismatch = 1; - info = counter_arch_bp(bp); info->trigger = addr; - } - -unlock: - if (mismatch && !info->ctrl.mismatch) { pr_debug("breakpoint fired: address = 0x%x\n", addr); perf_bp_event(bp, regs); + if (!bp->overflow_handler) + enable_single_step(bp, addr); + goto unlock; } - update_mismatch_flag(i, mismatch); +mismatch: + /* If we're stepping a breakpoint, it can now be restored. */ + if (info->step_ctrl.enabled) + disable_single_step(bp); +unlock: rcu_read_unlock(); } -- cgit v1.2.1 From 3ce70b2e24cd35cc9f2df8cf5205b8ab4e6178e1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 1 Dec 2010 17:05:24 +0000 Subject: ARM: hw_breakpoint: disallow per-cpu breakpoints without overflow handler Single-stepping a breakpoint requires us to disable it temporarily so that we don't get stuck in a recursive debug trap. With per-cpu breakpoints this presents a problem where an interrupt can be taken before the single-step has completed and a new task is eventually scheduled. This new task will not hit the breakpoint because it will have been disabled during the previous handling code. This patch disallows per-cpu breakpoints on ARM when an overflow handler is not present. A similar effect can be created by placing breakpoints on a shell and then running applications there. Signed-off-by: Will Deacon --- arch/arm/kernel/hw_breakpoint.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 36cd7680d3d2..eef1b1e235a7 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -622,10 +622,12 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) * Currently we rely on an overflow handler to take * care of single-stepping the breakpoint when it fires. * In the case of userspace breakpoints on a core with V7 debug, - * we can use the mismatch feature as a poor-man's hardware single-step. + * we can use the mismatch feature as a poor-man's hardware + * single-step, but this only works for per-task breakpoints. */ if (WARN_ONCE(!bp->overflow_handler && - (arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_brps()), + (arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_brps() + || !bp->hw.bp_target), "overflow handler required but none found")) { ret = -EINVAL; } -- cgit v1.2.1 From ce9b1b09520789223f72a9fefd5f0e329f8d89d0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 25 Nov 2010 12:59:31 +0000 Subject: ARM: ptrace: fix style issue with hw_breakpoint interface This patch fixes a trivial style issue in ptrace.c. Signed-off-by: Will Deacon --- arch/arm/kernel/ptrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 3e97483abcf0..19c6816db61e 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -1060,8 +1060,8 @@ static int ptrace_sethbpregs(struct task_struct *tsk, long num, goto out; if ((gen_type & implied_type) != gen_type) { - ret = -EINVAL; - goto out; + ret = -EINVAL; + goto out; } attr.bp_len = gen_len; -- cgit v1.2.1 From 4a55c18e2023096c8684fae5fa1cfa96a03172ff Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 29 Nov 2010 17:06:53 +0000 Subject: ARM: hw_breakpoint: fix warnings generated by sparse sparse doesn't like per-cpu accesses such as: static DEFINE_PER_CPU(struct perf_event *, foo[MAXLEN]); struct perf_event **bar = __get_cpu_var(foo); and shouts quite loudly about it: | warning: incorrect type in assignment (different modifiers) | expected struct perf_event **slots | got struct perf_event *[noderef] * This patch adds casts to these sorts of assignments in hw_breakpoint.c in order to silence the warnings. Reported-by: Russell King Signed-off-by: Will Deacon --- arch/arm/kernel/hw_breakpoint.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index eef1b1e235a7..56ed9a62013b 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -337,7 +337,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) /* Breakpoint */ ctrl_base = ARM_BASE_BCR; val_base = ARM_BASE_BVR; - slots = __get_cpu_var(bp_on_reg); + slots = (struct perf_event **)__get_cpu_var(bp_on_reg); max_slots = core_num_brps; if (info->step_ctrl.enabled) { /* Override the breakpoint data with the step data. */ @@ -357,7 +357,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) ctrl_base = ARM_BASE_WCR; val_base = ARM_BASE_WVR; } - slots = __get_cpu_var(wp_on_reg); + slots = (struct perf_event **)__get_cpu_var(wp_on_reg); max_slots = core_num_wrps; } @@ -394,7 +394,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { /* Breakpoint */ base = ARM_BASE_BCR; - slots = __get_cpu_var(bp_on_reg); + slots = (struct perf_event **)__get_cpu_var(bp_on_reg); max_slots = core_num_brps; } else { /* Watchpoint */ @@ -402,7 +402,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) base = ARM_BASE_BCR + core_num_brps; else base = ARM_BASE_WCR; - slots = __get_cpu_var(wp_on_reg); + slots = (struct perf_event **)__get_cpu_var(wp_on_reg); max_slots = core_num_wrps; } @@ -662,9 +662,11 @@ static void disable_single_step(struct perf_event *bp) static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs) { int i; - struct perf_event *wp, **slots = __get_cpu_var(wp_on_reg); + struct perf_event *wp, **slots; struct arch_hw_breakpoint *info; + slots = (struct perf_event **)__get_cpu_var(wp_on_reg); + /* Without a disassembler, we can only handle 1 watchpoint. */ BUG_ON(core_num_wrps > 1); @@ -703,9 +705,11 @@ static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs) static void watchpoint_single_step_handler(unsigned long pc) { int i; - struct perf_event *wp, **slots = __get_cpu_var(wp_on_reg); + struct perf_event *wp, **slots; struct arch_hw_breakpoint *info; + slots = (struct perf_event **)__get_cpu_var(wp_on_reg); + for (i = 0; i < core_num_reserved_brps; ++i) { rcu_read_lock(); @@ -734,10 +738,12 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) { int i; u32 ctrl_reg, val, addr; - struct perf_event *bp, **slots = __get_cpu_var(bp_on_reg); + struct perf_event *bp, **slots; struct arch_hw_breakpoint *info; struct arch_hw_breakpoint_ctrl ctrl; + slots = (struct perf_event **)__get_cpu_var(bp_on_reg); + /* The exception entry code places the amended lr in the PC. */ addr = regs->ARM_pc; -- cgit v1.2.1 From ac61d143ffe2a6db4d4bcf47c21a5159d6a1b644 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 6 Dec 2010 10:38:14 +0000 Subject: ARM: GIC: move enablement of PPI interrupts to gic.c Avoid adding nasty genirq-specific code to local timers to enable PPI interrupts. Instead, provide a gic function to do this. Signed-off-by: Russell King --- arch/arm/kernel/smp_twd.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index 35882fbf37f9..67f933ec4177 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -127,8 +127,6 @@ static void __cpuinit twd_calibrate_rate(void) */ void __cpuinit twd_timer_setup(struct clock_event_device *clk) { - unsigned long flags; - twd_calibrate_rate(); clk->name = "local_timer"; @@ -143,10 +141,7 @@ void __cpuinit twd_timer_setup(struct clock_event_device *clk) clk->min_delta_ns = clockevent_delta2ns(0xf, clk); /* Make sure our local interrupt controller has this enabled */ - local_irq_save(flags); - irq_to_desc(clk->irq)->status |= IRQ_NOPROBE; - get_irq_chip(clk->irq)->unmask(clk->irq); - local_irq_restore(flags); + gic_enable_ppi(clk->irq); clockevents_register_device(clk); } -- cgit v1.2.1 From 8fbf397c3389c1dedfa9ee412715046ab28fd82d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 1 Dec 2010 17:37:45 +0000 Subject: ARM: hw_breakpoint: do not fail initcall if monitor mode is disabled The debug registers can only be manipulated from software if monitor debug mode is enabled. On some cores, this can never be enabled (i.e. the corresponding bit in the DSCR is RAZ/WI). This patch ensures we can handle this hardware configuration and fail gracefully, rather than blow up the kernel during boot. Reported-by: Cyril Chemparathy Signed-off-by: Will Deacon --- arch/arm/kernel/hw_breakpoint.c | 54 +++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 29 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 56ed9a62013b..c9f3f0467570 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -219,23 +219,6 @@ static int get_num_brps(void) return brps; } -int hw_breakpoint_slots(int type) -{ - /* - * We can be called early, so don't rely on - * our static variables being initialised. - */ - switch (type) { - case TYPE_INST: - return get_num_brps(); - case TYPE_DATA: - return get_num_wrps(); - default: - pr_warning("unknown slot type: %d\n", type); - return 0; - } -} - /* * In order to access the breakpoint/watchpoint control registers, * we must be running in debug monitor mode. Unfortunately, we can @@ -256,8 +239,12 @@ static int enable_monitor_mode(void) goto out; } + /* If monitor mode is already enabled, just return. */ + if (dscr & ARM_DSCR_MDBGEN) + goto out; + /* Write to the corresponding DSCR. */ - switch (debug_arch) { + switch (get_debug_arch()) { case ARM_DEBUG_ARCH_V6: case ARM_DEBUG_ARCH_V6_1: ARM_DBG_WRITE(c1, 0, (dscr | ARM_DSCR_MDBGEN)); @@ -272,15 +259,30 @@ static int enable_monitor_mode(void) /* Check that the write made it through. */ ARM_DBG_READ(c1, 0, dscr); - if (WARN_ONCE(!(dscr & ARM_DSCR_MDBGEN), - "failed to enable monitor mode.")) { + if (!(dscr & ARM_DSCR_MDBGEN)) ret = -EPERM; - } out: return ret; } +int hw_breakpoint_slots(int type) +{ + /* + * We can be called early, so don't rely on + * our static variables being initialised. + */ + switch (type) { + case TYPE_INST: + return get_num_brps(); + case TYPE_DATA: + return get_num_wrps(); + default: + pr_warning("unknown slot type: %d\n", type); + return 0; + } +} + /* * Check if 8-bit byte-address select is available. * This clobbers WRP 0. @@ -294,9 +296,6 @@ static u8 get_max_wp_len(void) if (debug_arch < ARM_DEBUG_ARCH_V7_ECP14) goto out; - if (enable_monitor_mode()) - goto out; - memset(&ctrl, 0, sizeof(ctrl)); ctrl.len = ARM_BREAKPOINT_LEN_8; ctrl_reg = encode_ctrl_reg(ctrl); @@ -879,15 +878,13 @@ static struct notifier_block __cpuinitdata dbg_reset_nb = { static int __init arch_hw_breakpoint_init(void) { - int ret = 0; u32 dscr; debug_arch = get_debug_arch(); if (debug_arch > ARM_DEBUG_ARCH_V7_ECP14) { pr_info("debug architecture 0x%x unsupported.\n", debug_arch); - ret = -ENODEV; - goto out; + return 0; } /* Determine how many BRPs/WRPs are available. */ @@ -928,8 +925,7 @@ static int __init arch_hw_breakpoint_init(void) /* Register hotplug notifier. */ register_cpu_notifier(&dbg_reset_nb); -out: - return ret; + return 0; } arch_initcall(arch_hw_breakpoint_init); -- cgit v1.2.1 From e3fbb087650df130788d8e3ac29875ee56819249 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 20 Dec 2010 14:47:19 +0000 Subject: ARM: SMP: remove send_ipi_message() send_ipi_message() does nothing except call smp_cross_call(). As this is a static function, nothing external to this file calls it, so we can easily clean up this now unnecessary indirection. Signed-off-by: Russell King --- arch/arm/kernel/smp.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 4878e51561f9..3772cfc6953a 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -381,22 +381,14 @@ void __init smp_prepare_boot_cpu(void) per_cpu(cpu_data, cpu).idle = current; } -static void send_ipi_message(const struct cpumask *mask, enum ipi_msg_type msg) -{ - /* - * Call the platform specific cross-CPU call function. - */ - smp_cross_call(mask, msg); -} - void arch_send_call_function_ipi_mask(const struct cpumask *mask) { - send_ipi_message(mask, IPI_CALL_FUNC); + smp_cross_call(mask, IPI_CALL_FUNC); } void arch_send_call_function_single_ipi(int cpu) { - send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); + smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); } void show_ipi_list(struct seq_file *p) @@ -454,7 +446,7 @@ asmlinkage void __exception do_local_timer(struct pt_regs *regs) #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST static void smp_timer_broadcast(const struct cpumask *mask) { - send_ipi_message(mask, IPI_TIMER); + smp_cross_call(mask, IPI_TIMER); } #else #define smp_timer_broadcast NULL @@ -560,7 +552,7 @@ asmlinkage void __exception do_IPI(int ipinr, struct pt_regs *regs) void smp_send_reschedule(int cpu) { - send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE); + smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE); } void smp_send_stop(void) @@ -568,7 +560,7 @@ void smp_send_stop(void) cpumask_t mask = cpu_online_map; cpu_clear(smp_processor_id(), mask); if (!cpus_empty(mask)) - send_ipi_message(&mask, IPI_CPU_STOP); + smp_cross_call(&mask, IPI_CPU_STOP); } /* -- cgit v1.2.1 From ec405ea9fe5fdeb40824edba7082803b3e98f176 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Nov 2010 13:38:06 +0000 Subject: ARM: include local timer irq stats only when local timers configured Reviewed-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/kernel/irq.c | 2 ++ arch/arm/kernel/smp.c | 24 ++++++++++++------------ 2 files changed, 14 insertions(+), 12 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 36ad3be4692a..ea29721ba348 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -93,6 +93,8 @@ unlock: #endif #ifdef CONFIG_SMP show_ipi_list(p); +#endif +#ifdef CONFIG_LOCAL_TIMERS show_local_irqs(p); #endif seq_printf(p, "Err: %10lu\n", irq_err_count); diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 3772cfc6953a..36d4b9140dcf 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -403,18 +403,6 @@ void show_ipi_list(struct seq_file *p) seq_putc(p, '\n'); } -void show_local_irqs(struct seq_file *p) -{ - unsigned int cpu; - - seq_printf(p, "LOC: "); - - for_each_present_cpu(cpu) - seq_printf(p, "%10u ", irq_stat[cpu].local_timer_irqs); - - seq_putc(p, '\n'); -} - /* * Timer (local or broadcast) support */ @@ -441,6 +429,18 @@ asmlinkage void __exception do_local_timer(struct pt_regs *regs) set_irq_regs(old_regs); } + +void show_local_irqs(struct seq_file *p) +{ + unsigned int cpu; + + seq_printf(p, "LOC: "); + + for_each_present_cpu(cpu) + seq_printf(p, "%10u ", irq_stat[cpu].local_timer_irqs); + + seq_putc(p, '\n'); +} #endif #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST -- cgit v1.2.1 From 46c48f222f568decb881a552caa1c8f9c96c521e Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Nov 2010 14:15:03 +0000 Subject: ARM: SMP: provide accessors for irq_stat data Provide __inc_irq_stat() and __get_irq_stat() to increment and read the irq stat counters. Reviewed-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/kernel/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 36d4b9140dcf..24131264ec2c 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -423,7 +423,7 @@ asmlinkage void __exception do_local_timer(struct pt_regs *regs) int cpu = smp_processor_id(); if (local_timer_ack()) { - irq_stat[cpu].local_timer_irqs++; + __inc_irq_stat(cpu, local_timer_irqs); ipi_timer(); } @@ -437,7 +437,7 @@ void show_local_irqs(struct seq_file *p) seq_printf(p, "LOC: "); for_each_present_cpu(cpu) - seq_printf(p, "%10u ", irq_stat[cpu].local_timer_irqs); + seq_printf(p, "%10u ", __get_irq_stat(cpu, local_timer_irqs)); seq_putc(p, '\n'); } -- cgit v1.2.1 From cab8c6f3053c1b147bba825844c8e208f8b3b9f4 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Nov 2010 14:20:41 +0000 Subject: ARM: SMP: move ipi_count into irq_stat structure Move the ipi_count into irq_stat, which allows the ipi_data structure to be entirely removed. Reviewed-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/kernel/smp.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 24131264ec2c..65b5ba867805 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -46,15 +46,6 @@ */ struct secondary_data secondary_data; -/* - * structures for inter-processor calls - */ -struct ipi_data { - unsigned long ipi_count; -}; - -static DEFINE_PER_CPU(struct ipi_data, ipi_data); - enum ipi_msg_type { IPI_TIMER = 2, IPI_RESCHEDULE, @@ -398,7 +389,7 @@ void show_ipi_list(struct seq_file *p) seq_puts(p, "IPI:"); for_each_present_cpu(cpu) - seq_printf(p, " %10lu", per_cpu(ipi_data, cpu).ipi_count); + seq_printf(p, " %10u", __get_irq_stat(cpu, ipi_irqs)); seq_putc(p, '\n'); } @@ -513,10 +504,9 @@ static void ipi_cpu_stop(unsigned int cpu) asmlinkage void __exception do_IPI(int ipinr, struct pt_regs *regs) { unsigned int cpu = smp_processor_id(); - struct ipi_data *ipi = &per_cpu(ipi_data, cpu); struct pt_regs *old_regs = set_irq_regs(regs); - ipi->ipi_count++; + __inc_irq_stat(cpu, ipi_irqs); switch (ipinr) { case IPI_TIMER: -- cgit v1.2.1 From f13cd4170ee789f63b3c9585c1ae34e028bd549d Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Nov 2010 14:33:51 +0000 Subject: ARM: fix /proc/interrupts formatting As per x86, align the initial column according to how many IRQs we have. Also, provide an english explaination for the 'LOC:' and 'IPI:' lines. Reviewed-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/kernel/fiq.c | 5 +++-- arch/arm/kernel/irq.c | 16 ++++++++++------ arch/arm/kernel/smp.c | 14 +++++++------- 3 files changed, 20 insertions(+), 15 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c index 6ff7919613d7..47837b85c07c 100644 --- a/arch/arm/kernel/fiq.c +++ b/arch/arm/kernel/fiq.c @@ -67,10 +67,11 @@ static struct fiq_handler default_owner = { static struct fiq_handler *current_fiq = &default_owner; -int show_fiq_list(struct seq_file *p, void *v) +int show_fiq_list(struct seq_file *p, int prec) { if (current_fiq != &default_owner) - seq_printf(p, "FIQ: %s\n", current_fiq->name); + seq_printf(p, "%*s: %s\n", prec, "FIQ", + current_fiq->name); return 0; } diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index ea29721ba348..4e7a7d272212 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -57,11 +57,15 @@ int show_interrupts(struct seq_file *p, void *v) struct irq_desc *desc; struct irqaction * action; unsigned long flags; + int prec, n; + + for (prec = 3, n = 1000; prec < 10 && n <= nr_irqs; prec++) + n *= 10; if (i == 0) { char cpuname[12]; - seq_printf(p, " "); + seq_printf(p, "%*s ", prec, ""); for_each_present_cpu(cpu) { sprintf(cpuname, "CPU%d", cpu); seq_printf(p, " %10s", cpuname); @@ -76,7 +80,7 @@ int show_interrupts(struct seq_file *p, void *v) if (!action) goto unlock; - seq_printf(p, "%3d: ", i); + seq_printf(p, "%*d: ", prec, i); for_each_present_cpu(cpu) seq_printf(p, "%10u ", kstat_irqs_cpu(i, cpu)); seq_printf(p, " %10s", desc->chip->name ? : "-"); @@ -89,15 +93,15 @@ unlock: raw_spin_unlock_irqrestore(&desc->lock, flags); } else if (i == nr_irqs) { #ifdef CONFIG_FIQ - show_fiq_list(p, v); + show_fiq_list(p, prec); #endif #ifdef CONFIG_SMP - show_ipi_list(p); + show_ipi_list(p, prec); #endif #ifdef CONFIG_LOCAL_TIMERS - show_local_irqs(p); + show_local_irqs(p, prec); #endif - seq_printf(p, "Err: %10lu\n", irq_err_count); + seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count); } return 0; } diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 65b5ba867805..269237ed76a0 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -382,16 +382,16 @@ void arch_send_call_function_single_ipi(int cpu) smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); } -void show_ipi_list(struct seq_file *p) +void show_ipi_list(struct seq_file *p, int prec) { unsigned int cpu; - seq_puts(p, "IPI:"); + seq_printf(p, "%*s: ", prec, "IPI"); for_each_present_cpu(cpu) - seq_printf(p, " %10u", __get_irq_stat(cpu, ipi_irqs)); + seq_printf(p, "%10u ", __get_irq_stat(cpu, ipi_irqs)); - seq_putc(p, '\n'); + seq_printf(p, " Inter-processor interrupts\n"); } /* @@ -421,16 +421,16 @@ asmlinkage void __exception do_local_timer(struct pt_regs *regs) set_irq_regs(old_regs); } -void show_local_irqs(struct seq_file *p) +void show_local_irqs(struct seq_file *p, int prec) { unsigned int cpu; - seq_printf(p, "LOC: "); + seq_printf(p, "%*s: ", prec, "LOC"); for_each_present_cpu(cpu) seq_printf(p, "%10u ", __get_irq_stat(cpu, local_timer_irqs)); - seq_putc(p, '\n'); + seq_printf(p, " Local timer interrupts\n"); } #endif -- cgit v1.2.1 From ef6c84454f8567d4968c210d7d194fb711ed3739 Mon Sep 17 00:00:00 2001 From: Haojian Zhuang Date: Wed, 24 Nov 2010 11:54:25 +0800 Subject: ARM: pxa: add iwmmx support for PJ4 iwmmxt is used in XScale, XScale3, Mohawk and PJ4 core. But the instructions of accessing CP0 and CP1 is changed in PJ4. Append more files to support iwmmxt in PJ4 core. Signed-off-by: Zhou Zhu Signed-off-by: Haojian Zhuang Acked-by: Nicolas Pitre Signed-off-by: Eric Miao --- arch/arm/kernel/Makefile | 1 + arch/arm/kernel/iwmmxt.S | 55 ++++++++++++++++++++------- arch/arm/kernel/pj4-cp0.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 137 insertions(+), 13 deletions(-) create mode 100644 arch/arm/kernel/pj4-cp0.c (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 5b9b268f4fbb..b0f11fa7a4b4 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -50,6 +50,7 @@ AFLAGS_crunch-bits.o := -Wa,-mcpu=ep9312 obj-$(CONFIG_CPU_XSCALE) += xscale-cp0.o obj-$(CONFIG_CPU_XSC3) += xscale-cp0.o obj-$(CONFIG_CPU_MOHAWK) += xscale-cp0.o +obj-$(CONFIG_CPU_PJ4) += pj4-cp0.o obj-$(CONFIG_IWMMXT) += iwmmxt.o obj-$(CONFIG_CPU_HAS_PMU) += pmu.o obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S index b63b528f22a6..7fa3bb0d2397 100644 --- a/arch/arm/kernel/iwmmxt.S +++ b/arch/arm/kernel/iwmmxt.S @@ -19,6 +19,14 @@ #include #include +#if defined(CONFIG_CPU_PJ4) +#define PJ4(code...) code +#define XSC(code...) +#else +#define PJ4(code...) +#define XSC(code...) code +#endif + #define MMX_WR0 (0x00) #define MMX_WR1 (0x08) #define MMX_WR2 (0x10) @@ -58,11 +66,17 @@ ENTRY(iwmmxt_task_enable) - mrc p15, 0, r2, c15, c1, 0 - tst r2, #0x3 @ CP0 and CP1 accessible? + XSC(mrc p15, 0, r2, c15, c1, 0) + PJ4(mrc p15, 0, r2, c1, c0, 2) + @ CP0 and CP1 accessible? + XSC(tst r2, #0x3) + PJ4(tst r2, #0xf) movne pc, lr @ if so no business here - orr r2, r2, #0x3 @ enable access to CP0 and CP1 - mcr p15, 0, r2, c15, c1, 0 + @ enable access to CP0 and CP1 + XSC(orr r2, r2, #0x3) + XSC(mcr p15, 0, r2, c15, c1, 0) + PJ4(orr r2, r2, #0xf) + PJ4(mcr p15, 0, r2, c1, c0, 2) ldr r3, =concan_owner add r0, r10, #TI_IWMMXT_STATE @ get task Concan save area @@ -179,17 +193,26 @@ ENTRY(iwmmxt_task_disable) teqne r1, r2 @ or specified one? bne 1f @ no: quit - mrc p15, 0, r4, c15, c1, 0 - orr r4, r4, #0x3 @ enable access to CP0 and CP1 - mcr p15, 0, r4, c15, c1, 0 + @ enable access to CP0 and CP1 + XSC(mrc p15, 0, r4, c15, c1, 0) + XSC(orr r4, r4, #0xf) + XSC(mcr p15, 0, r4, c15, c1, 0) + PJ4(mrc p15, 0, r4, c1, c0, 2) + PJ4(orr r4, r4, #0x3) + PJ4(mcr p15, 0, r4, c1, c0, 2) + mov r0, #0 @ nothing to load str r0, [r3] @ no more current owner mrc p15, 0, r2, c2, c0, 0 mov r2, r2 @ cpwait bl concan_save - bic r4, r4, #0x3 @ disable access to CP0 and CP1 - mcr p15, 0, r4, c15, c1, 0 + @ disable access to CP0 and CP1 + XSC(bic r4, r4, #0x3) + XSC(mcr p15, 0, r4, c15, c1, 0) + PJ4(bic r4, r4, #0xf) + PJ4(mcr p15, 0, r4, c1, c0, 2) + mrc p15, 0, r2, c2, c0, 0 mov r2, r2 @ cpwait @@ -277,8 +300,11 @@ ENTRY(iwmmxt_task_restore) */ ENTRY(iwmmxt_task_switch) - mrc p15, 0, r1, c15, c1, 0 - tst r1, #0x3 @ CP0 and CP1 accessible? + XSC(mrc p15, 0, r1, c15, c1, 0) + PJ4(mrc p15, 0, r1, c1, c0, 2) + @ CP0 and CP1 accessible? + XSC(tst r1, #0x3) + PJ4(tst r1, #0xf) bne 1f @ yes: block them for next task ldr r2, =concan_owner @@ -287,8 +313,11 @@ ENTRY(iwmmxt_task_switch) teq r2, r3 @ next task owns it? movne pc, lr @ no: leave Concan disabled -1: eor r1, r1, #3 @ flip Concan access - mcr p15, 0, r1, c15, c1, 0 +1: @ flip Conan access + XSC(eor r1, r1, #0x3) + XSC(mcr p15, 0, r1, c15, c1, 0) + PJ4(eor r1, r1, #0xf) + PJ4(mcr p15, 0, r1, c1, c0, 2) mrc p15, 0, r1, c2, c0, 0 sub pc, lr, r1, lsr #32 @ cpwait and return diff --git a/arch/arm/kernel/pj4-cp0.c b/arch/arm/kernel/pj4-cp0.c new file mode 100644 index 000000000000..a4b1b0748fd3 --- /dev/null +++ b/arch/arm/kernel/pj4-cp0.c @@ -0,0 +1,94 @@ +/* + * linux/arch/arm/kernel/pj4-cp0.c + * + * PJ4 iWMMXt coprocessor context switching and handling + * + * Copyright (c) 2010 Marvell International Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static int iwmmxt_do(struct notifier_block *self, unsigned long cmd, void *t) +{ + struct thread_info *thread = t; + + switch (cmd) { + case THREAD_NOTIFY_FLUSH: + /* + * flush_thread() zeroes thread->fpstate, so no need + * to do anything here. + * + * FALLTHROUGH: Ensure we don't try to overwrite our newly + * initialised state information on the first fault. + */ + + case THREAD_NOTIFY_EXIT: + iwmmxt_task_release(thread); + break; + + case THREAD_NOTIFY_SWITCH: + iwmmxt_task_switch(thread); + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block iwmmxt_notifier_block = { + .notifier_call = iwmmxt_do, +}; + + +static u32 __init pj4_cp_access_read(void) +{ + u32 value; + + __asm__ __volatile__ ( + "mrc p15, 0, %0, c1, c0, 2\n\t" + : "=r" (value)); + return value; +} + +static void __init pj4_cp_access_write(u32 value) +{ + u32 temp; + + __asm__ __volatile__ ( + "mcr p15, 0, %1, c1, c0, 2\n\t" + "mrc p15, 0, %0, c1, c0, 2\n\t" + "mov %0, %0\n\t" + "sub pc, pc, #4\n\t" + : "=r" (temp) : "r" (value)); +} + + +/* + * Disable CP0/CP1 on boot, and let call_fpe() and the iWMMXt lazy + * switch code handle iWMMXt context switching. + */ +static int __init pj4_cp0_init(void) +{ + u32 cp_access; + + cp_access = pj4_cp_access_read() & ~0xf; + pj4_cp_access_write(cp_access); + + printk(KERN_INFO "PJ4 iWMMXt coprocessor enabled.\n"); + elf_hwcap |= HWCAP_IWMMXT; + thread_register_notifier(&iwmmxt_notifier_block); + + return 0; +} + +late_initcall(pj4_cp0_init); -- cgit v1.2.1 From 4a88abd7b48e8ec8084b1252d0f5ebdab43c2508 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Nov 2010 14:40:29 +0000 Subject: ARM: SMP: provide individual IPI interrupt statistics This separates out the individual IPI interrupt counts from the total IPI count, which allows better visibility of what IPIs are being used for. Reviewed-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/kernel/irq.c | 5 +++++ arch/arm/kernel/smp.c | 25 +++++++++++++++++++------ 2 files changed, 24 insertions(+), 6 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 4e7a7d272212..6276f01df9e4 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -62,6 +62,11 @@ int show_interrupts(struct seq_file *p, void *v) for (prec = 3, n = 1000; prec < 10 && n <= nr_irqs; prec++) n *= 10; +#ifdef CONFIG_SMP + if (prec < 4) + prec = 4; +#endif + if (i == 0) { char cpuname[12]; diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 269237ed76a0..fa0c5f6e1587 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -382,16 +382,28 @@ void arch_send_call_function_single_ipi(int cpu) smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); } +static const char *ipi_types[NR_IPI] = { +#define S(x,s) [x - IPI_TIMER] = s + S(IPI_TIMER, "Timer broadcast interrupts"), + S(IPI_RESCHEDULE, "Rescheduling interrupts"), + S(IPI_CALL_FUNC, "Function call interrupts"), + S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"), + S(IPI_CPU_STOP, "CPU stop interrupts"), +}; + void show_ipi_list(struct seq_file *p, int prec) { - unsigned int cpu; + unsigned int cpu, i; - seq_printf(p, "%*s: ", prec, "IPI"); + for (i = 0; i < NR_IPI; i++) { + seq_printf(p, "%*s%u: ", prec - 1, "IPI", i); - for_each_present_cpu(cpu) - seq_printf(p, "%10u ", __get_irq_stat(cpu, ipi_irqs)); + for_each_present_cpu(cpu) + seq_printf(p, "%10u ", + __get_irq_stat(cpu, ipi_irqs[i])); - seq_printf(p, " Inter-processor interrupts\n"); + seq_printf(p, " %s\n", ipi_types[i]); + } } /* @@ -506,7 +518,8 @@ asmlinkage void __exception do_IPI(int ipinr, struct pt_regs *regs) unsigned int cpu = smp_processor_id(); struct pt_regs *old_regs = set_irq_regs(regs); - __inc_irq_stat(cpu, ipi_irqs); + if (ipinr >= IPI_TIMER && ipinr < IPI_TIMER + NR_IPI) + __inc_irq_stat(cpu, ipi_irqs[ipinr - IPI_TIMER]); switch (ipinr) { case IPI_TIMER: -- cgit v1.2.1 From b54992fe1b4bad7b7488d58b8696e4e8974fdab0 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Nov 2010 14:46:46 +0000 Subject: ARM: SMP: collect IPI and local timer IRQs for /proc/stat The IPI and local timer interrupts weren't being properly accounted for in /proc/stat. Collect them from the irq_stat structure, and return their sum. Reviewed-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/kernel/smp.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index fa0c5f6e1587..1de3e13a42a1 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -406,6 +406,21 @@ void show_ipi_list(struct seq_file *p, int prec) } } +u64 smp_irq_stat_cpu(unsigned int cpu) +{ + u64 sum = 0; + int i; + + for (i = 0; i < NR_IPI; i++) + sum += __get_irq_stat(cpu, ipi_irqs[i]); + +#ifdef CONFIG_LOCAL_TIMERS + sum += __get_irq_stat(cpu, local_timer_irqs); +#endif + + return sum; +} + /* * Timer (local or broadcast) support */ -- cgit v1.2.1 From 0eb0511d176534674600a1986c3c766756288908 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 22 Nov 2010 12:06:28 +0000 Subject: ARM: SMP: use more sane register allocation for __fixup_smp_on_up Use r0,r3-r6 rather than r0,r3,r4,r6,r7, which makes it easier to understand which registers can be modified. Also document which registers hold values which must be preserved. Signed-off-by: Russell King --- arch/arm/kernel/head.S | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index dd6b369ac69c..fd94e4e82fc9 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -89,6 +89,11 @@ ENTRY(stext) bl __lookup_machine_type @ r5=machinfo movs r8, r5 @ invalid machine (r5=0)? beq __error_a @ yes, error 'a' + + /* + * r1 = machine no, r2 = atags, + * r8 = machinfo, r9 = cpuid, r10 = procinfo + */ bl __vet_atags #ifdef CONFIG_SMP_ON_UP bl __fixup_smp @@ -381,19 +386,19 @@ ENDPROC(__turn_mmu_on) #ifdef CONFIG_SMP_ON_UP __fixup_smp: - mov r7, #0x00070000 - orr r6, r7, #0xff000000 @ mask 0xff070000 - orr r7, r7, #0x41000000 @ val 0x41070000 - and r0, r9, r6 - teq r0, r7 @ ARM CPU and ARMv6/v7? + mov r4, #0x00070000 + orr r3, r4, #0xff000000 @ mask 0xff070000 + orr r4, r4, #0x41000000 @ val 0x41070000 + and r0, r9, r3 + teq r0, r4 @ ARM CPU and ARMv6/v7? bne __fixup_smp_on_up @ no, assume UP - orr r6, r6, #0x0000ff00 - orr r6, r6, #0x000000f0 @ mask 0xff07fff0 - orr r7, r7, #0x0000b000 - orr r7, r7, #0x00000020 @ val 0x4107b020 - and r0, r9, r6 - teq r0, r7 @ ARM 11MPCore? + orr r3, r3, #0x0000ff00 + orr r3, r3, #0x000000f0 @ mask 0xff07fff0 + orr r4, r4, #0x0000b000 + orr r4, r4, #0x00000020 @ val 0x4107b020 + and r0, r9, r3 + teq r0, r4 @ ARM 11MPCore? moveq pc, lr @ yes, assume SMP mrc p15, 0, r0, c0, c0, 5 @ read MPIDR @@ -402,13 +407,13 @@ __fixup_smp: __fixup_smp_on_up: adr r0, 1f - ldmia r0, {r3, r6, r7} + ldmia r0, {r3 - r5} sub r3, r0, r3 - add r6, r6, r3 - add r7, r7, r3 -2: cmp r6, r7 - ldmia r6!, {r0, r4} - strlo r4, [r0, r3] + add r4, r4, r3 + add r5, r5, r3 +2: cmp r4, r5 + ldmia r4!, {r0, r6} + strlo r6, [r0, r3] blo 2b mov pc, lr ENDPROC(__fixup_smp) -- cgit v1.2.1 From 28e18293cf0f8d23a0950d7b1d2212d11af494dc Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 2 Dec 2010 09:53:54 +0000 Subject: ARM: SMP: ensure smp_send_stop() waits for CPUs to stop Wait for CPUs to indicate that they've stopped, after sending the stop IPI, rather than blindly continuing on and hoping that they've stopped in time. Print a warning if we fail to stop the other CPUs. Signed-off-by: Russell King --- arch/arm/kernel/smp.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 1de3e13a42a1..64f2d198c764 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -575,10 +575,22 @@ void smp_send_reschedule(int cpu) void smp_send_stop(void) { - cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); - if (!cpus_empty(mask)) + unsigned long timeout; + + if (num_online_cpus() > 1) { + cpumask_t mask = cpu_online_map; + cpu_clear(smp_processor_id(), mask); + smp_cross_call(&mask, IPI_CPU_STOP); + } + + /* Wait up to one second for other CPUs to stop */ + timeout = USEC_PER_SEC; + while (num_online_cpus() > 1 && timeout--) + udelay(1); + + if (num_online_cpus() > 1) + pr_warning("SMP: failed to stop secondary CPUs\n"); } /* -- cgit v1.2.1 From 05c74a6cbcfb416286a947668ba32f63d99fe74a Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 3 Dec 2010 11:09:48 +0000 Subject: ARM: SMP: consolidate the common parts of smp_prepare_cpus() There is a certain amount of smp_prepare_cpus() which doesn't belong in the platform support code - that is, code which is invariant to the SMP implementation. Move this code into arch/arm/kernel/smp.c, and add a platform_ prefix to the original function. Signed-off-by: Russell King --- arch/arm/kernel/smp.c | 49 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 11 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 64f2d198c764..c66f2d3f65d3 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -281,6 +281,17 @@ void __ref cpu_die(void) } #endif /* CONFIG_HOTPLUG_CPU */ +/* + * Called by both boot and secondaries to move global data into + * per-processor storage. + */ +static void __cpuinit smp_store_cpu_info(unsigned int cpuid) +{ + struct cpuinfo_arm *cpu_info = &per_cpu(cpu_data, cpuid); + + cpu_info->loops_per_jiffy = loops_per_jiffy; +} + /* * This is the secondary CPU boot entry. We're using this CPUs * idle thread stack, but a set of temporary page tables. @@ -339,17 +350,6 @@ asmlinkage void __cpuinit secondary_start_kernel(void) cpu_idle(); } -/* - * Called by both boot and secondaries to move global data into - * per-processor storage. - */ -void __cpuinit smp_store_cpu_info(unsigned int cpuid) -{ - struct cpuinfo_arm *cpu_info = &per_cpu(cpu_data, cpuid); - - cpu_info->loops_per_jiffy = loops_per_jiffy; -} - void __init smp_cpus_done(unsigned int max_cpus) { int cpu; @@ -372,6 +372,33 @@ void __init smp_prepare_boot_cpu(void) per_cpu(cpu_data, cpu).idle = current; } +void __init smp_prepare_cpus(unsigned int max_cpus) +{ + unsigned int ncores = num_possible_cpus(); + + smp_store_cpu_info(smp_processor_id()); + + /* + * are we trying to boot more cores than exist? + */ + if (max_cpus > ncores) + max_cpus = ncores; + + if (max_cpus > 1) { + /* + * Enable the local timer or broadcast device for the + * boot CPU, but only if we have more than one CPU. + */ + percpu_timer_setup(); + + /* + * Initialise the SCU if there are more than one CPU + * and let them know where to start. + */ + platform_smp_prepare_cpus(max_cpus); + } +} + void arch_send_call_function_ipi_mask(const struct cpumask *mask) { smp_cross_call(mask, IPI_CALL_FUNC); -- cgit v1.2.1 From 2c0136dba4e43b0916ccc9ecc7f11e6d6b73f046 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 3 Dec 2010 15:00:49 +0000 Subject: ARM: SMP: consolidate trace_hardirqs_off() into common SMP code All platforms call trace_hardirqs_off() in their secondary startup code, so move this into the core SMP code - it doesn't need to be in the per-platform code. Signed-off-by: Russell King --- arch/arm/kernel/smp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index c66f2d3f65d3..a30c4094db3a 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -317,6 +317,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void) cpu_init(); preempt_disable(); + trace_hardirqs_off(); /* * Give the platform a chance to do its own initialisation. -- cgit v1.2.1 From 3c030beabf937b1d3b4ecaedfd1fb2f1e2aa0c70 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 30 Nov 2010 11:07:35 +0000 Subject: ARM: CPU hotplug: move cpu_killed completion to core code We always need to wait for the dying CPU to reach a safe state before taking it down, irrespective of the requirements of the platform. Move the completion code into the ARM SMP hotplug code rather than having each platform re-implement this. Signed-off-by: Russell King --- arch/arm/kernel/smp.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index a30c4094db3a..8c81ff9b3732 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -238,12 +239,20 @@ int __cpu_disable(void) return 0; } +static DECLARE_COMPLETION(cpu_died); + /* * called on the thread which is asking for a CPU to be shutdown - * waits until shutdown has completed, or it is timed out. */ void __cpu_die(unsigned int cpu) { + if (!wait_for_completion_timeout(&cpu_died, msecs_to_jiffies(5000))) { + pr_err("CPU%u: cpu didn't die\n", cpu); + return; + } + printk(KERN_NOTICE "CPU%u: shutdown\n", cpu); + if (!platform_cpu_kill(cpu)) printk("CPU%u: unable to kill\n", cpu); } @@ -263,9 +272,12 @@ void __ref cpu_die(void) local_irq_disable(); idle_task_exit(); + /* Tell __cpu_die() that this CPU is now safe to dispose of */ + complete(&cpu_died); + /* * actual CPU shutdown procedure is at least platform (if not - * CPU) specific + * CPU) specific. */ platform_cpu_die(cpu); -- cgit v1.2.1 From f36d340122ae8744e64af0a92a6f77b97542c0a4 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 30 Nov 2010 12:21:30 +0000 Subject: ARM: CPU hotplug: ensure correct ordering of unplug Don't call idle_task_exit() with interrupts disabled, and ensure that we have a memory barrier after interrupts are disabled but before signalling that this CPU has shut down. Signed-off-by: Russell King --- arch/arm/kernel/smp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 8c81ff9b3732..1a1c5e2b3ef9 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -269,9 +269,11 @@ void __ref cpu_die(void) { unsigned int cpu = smp_processor_id(); - local_irq_disable(); idle_task_exit(); + local_irq_disable(); + mb(); + /* Tell __cpu_die() that this CPU is now safe to dispose of */ complete(&cpu_died); -- cgit v1.2.1 From ed3768a8d9dc2d345d4f27eb44ee1e4825056c08 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 1 Dec 2010 15:39:23 +0100 Subject: ARM: 6516/1: Allow SMP_ON_UP to work with Thumb-2 kernels. * __fixup_smp_on_up has been modified with support for the THUMB2_KERNEL case. For THUMB2_KERNEL only, fixups are split into halfwords in case of misalignment, since we can't rely on unaligned accesses working before turning the MMU on. No attempt is made to optimise the aligned case, since the number of fixups is typically small, and it seems best to keep the code as simple as possible. * Add a rotate in the fixup_smp code in order to support CPU_BIG_ENDIAN, as suggested by Nicolas Pitre. * Add an assembly-time sanity-check to ALT_UP() to ensure that the content really is the right size (4 bytes). (No check is done for ALT_SMP(). Possibly, this could be fixed by splitting the two uses ot ALT_SMP() (ALT_SMP...SMP_UP versus ALT_SMP...SMP_UP_B) into two macros. In the first case, ALT_SMP needs to expand to >= 4 bytes, not == 4.) * smp_mpidr.h (which implements ALT_SMP()/ALT_UP() manually due to macro limitations) has not been modified: the affected instruction (mov) has no 16-bit encoding, so the correct instruction size is satisfied in this case. * A "mode" parameter has been added to smp_dmb: smp_dmb arm @ assumes 4-byte instructions (for ARM code, e.g. kuser) smp_dmb @ uses W() to ensure 4-byte instructions for ALT_SMP() This avoids assembly failures due to use of W() inside smp_dmb, when assembling pure-ARM code in the vectors page. There might be a better way to achieve this. * Kconfig: make SMP_ON_UP depend on (!THUMB2_KERNEL || !BIG_ENDIAN) i.e., THUMB2_KERNEL is now supported, but only if !BIG_ENDIAN (The fixup code for Thumb-2 currently assumes little-endian order.) Tested using a single generic realview kernel on: ARM RealView PB-A8 (CONFIG_THUMB2_KERNEL={n,y}) ARM RealView PBX-A9 (SMP) Signed-off-by: Dave Martin Acked-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/kernel/entry-armv.S | 4 ++-- arch/arm/kernel/head.S | 13 ++++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 955cf5f539ed..7f22a11a5105 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -842,7 +842,7 @@ __kuser_helper_start: */ __kuser_memory_barrier: @ 0xffff0fa0 - smp_dmb + smp_dmb arm usr_ret lr .align 5 @@ -959,7 +959,7 @@ kuser_cmpxchg_fixup: #else - smp_dmb + smp_dmb arm 1: ldrex r3, [r2] subs r3, r3, r0 strexeq r3, r1, [r2] diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index fd94e4e82fc9..359e54e83bd5 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -412,10 +412,17 @@ __fixup_smp_on_up: add r4, r4, r3 add r5, r5, r3 2: cmp r4, r5 + movhs pc, lr ldmia r4!, {r0, r6} - strlo r6, [r0, r3] - blo 2b - mov pc, lr + ARM( str r6, [r0, r3] ) + THUMB( add r0, r0, r3 ) +#ifdef __ARMEB__ + THUMB( mov r6, r6, ror #16 ) @ Convert word order for big-endian. +#endif + THUMB( strh r6, [r0], #2 ) @ For Thumb-2, store as two halfwords + THUMB( mov r6, r6, lsr #16 ) @ to be robust against misaligned r3. + THUMB( strh r6, [r0] ) + b 2b ENDPROC(__fixup_smp) 1: .word . -- cgit v1.2.1 From 58613cd1d4f8c2d5f25b6c57ad7fbed80e75a67b Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 18 Dec 2010 12:34:39 +0000 Subject: ARM: smp: improve CPU bringup failure diagnostics We used to print a bland error message which gave no clue as to the failure when we failed to bring up a secondary CPU. Resolve this by separating the two failure cases. If boot_secondary() fails, we print a message indicating the returned error code from boot_secondary(): "CPU%u: failed to boot: %d\n", cpu, ret. However, if boot_secondary() succeeded, but the CPU did not appear to mark itself online within the timeout, indicate that it failed to come online: "CPU%u: failed to come online\n", cpu Signed-off-by: Russell King --- arch/arm/kernel/smp.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 1a1c5e2b3ef9..6afaf6f73069 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -164,8 +164,12 @@ int __cpuinit __cpu_up(unsigned int cpu) barrier(); } - if (!cpu_online(cpu)) + if (!cpu_online(cpu)) { + pr_crit("CPU%u: failed to come online\n", cpu); ret = -EIO; + } + } else { + pr_err("CPU%u: failed to boot: %d\n", cpu, ret); } secondary_data.stack = NULL; @@ -181,14 +185,6 @@ int __cpuinit __cpu_up(unsigned int cpu) pgd_free(&init_mm, pgd); - if (ret) { - printk(KERN_CRIT "CPU%u: processor failed to boot\n", cpu); - - /* - * FIXME: We need to clean up the new idle thread. --rmk - */ - } - return ret; } -- cgit v1.2.1 From 10034aabca9032246762daaca3152f3e79380ea0 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 20 Dec 2010 14:28:02 +0000 Subject: ARM: localtimer: clean up local timer on hot unplug When a CPU is hot unplugged, the generic tick code cleans up the clock event device, but fails to call down to the device's set_mode function to actually shut the device down. To work around this, we've historically had a local_timer_stop() callback out of the hotplug code. However, this adds needless complexity when we have the clock event device itself available. Explicitly call the clock event device's set_mode function with CLOCK_EVT_MODE_UNUSED, so that the hardware can be cleanly shutdown without any special external callbacks. When/if the generic code is fixed, percpu_timer_stop() can be killed off. Signed-off-by: Russell King --- arch/arm/kernel/smp.c | 19 ++++++++++++++++++- arch/arm/kernel/smp_twd.c | 10 ---------- 2 files changed, 18 insertions(+), 11 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 6afaf6f73069..4dc864ef9cdf 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -189,6 +189,8 @@ int __cpuinit __cpu_up(unsigned int cpu) } #ifdef CONFIG_HOTPLUG_CPU +static void percpu_timer_stop(void); + /* * __cpu_disable runs on the processor to be shutdown. */ @@ -216,7 +218,7 @@ int __cpu_disable(void) /* * Stop the local timer for this CPU. */ - local_timer_stop(); + percpu_timer_stop(); /* * Flush user cache and TLB mappings, and then remove this CPU @@ -539,6 +541,21 @@ void __cpuinit percpu_timer_setup(void) local_timer_setup(evt); } +#ifdef CONFIG_HOTPLUG_CPU +/* + * The generic clock events code purposely does not stop the local timer + * on CPU_DEAD/CPU_DEAD_FROZEN hotplug events, so we have to do it + * manually here. + */ +static void percpu_timer_stop(void) +{ + unsigned int cpu = smp_processor_id(); + struct clock_event_device *evt = &per_cpu(percpu_clockevent, cpu); + + evt->set_mode(CLOCK_EVT_MODE_UNUSED, evt); +} +#endif + static DEFINE_SPINLOCK(stop_lock); /* diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index 35882fbf37f9..24585d97c104 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -150,13 +150,3 @@ void __cpuinit twd_timer_setup(struct clock_event_device *clk) clockevents_register_device(clk); } - -#ifdef CONFIG_HOTPLUG_CPU -/* - * take a local timer down - */ -void twd_timer_stop(void) -{ - __raw_writel(0, twd_base + TWD_TIMER_CONTROL); -} -#endif -- cgit v1.2.1 From 03b505eae6a276b8c38b6222694afb6cea10b1cc Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 20 Dec 2010 14:44:32 +0000 Subject: ARM: SMP: split out software TLB maintainence broadcasting smp.c is becoming too large, so split out the TLB maintainence broadcasting into a separate smp_tlb.c file. Signed-off-by: Russell King --- arch/arm/kernel/Makefile | 2 +- arch/arm/kernel/smp.c | 126 ----------------------------------------- arch/arm/kernel/smp_tlb.c | 139 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 140 insertions(+), 127 deletions(-) create mode 100644 arch/arm/kernel/smp_tlb.c (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 5b9b268f4fbb..659937b0a896 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -29,7 +29,7 @@ obj-$(CONFIG_MODULES) += armksyms.o module.o obj-$(CONFIG_ARTHUR) += arthur.o obj-$(CONFIG_ISA_DMA) += dma-isa.o obj-$(CONFIG_PCI) += bios32.o isa.o -obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_SMP) += smp.o smp_tlb.o obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 4dc864ef9cdf..6d0c66bc434d 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -38,7 +38,6 @@ #include #include #include -#include /* * as from 2.5, kernels no longer have an init_tasks structure @@ -655,128 +654,3 @@ int setup_profiling_timer(unsigned int multiplier) { return -EINVAL; } - -static void -on_each_cpu_mask(void (*func)(void *), void *info, int wait, - const struct cpumask *mask) -{ - preempt_disable(); - - smp_call_function_many(mask, func, info, wait); - if (cpumask_test_cpu(smp_processor_id(), mask)) - func(info); - - preempt_enable(); -} - -/**********************************************************************/ - -/* - * TLB operations - */ -struct tlb_args { - struct vm_area_struct *ta_vma; - unsigned long ta_start; - unsigned long ta_end; -}; - -static inline void ipi_flush_tlb_all(void *ignored) -{ - local_flush_tlb_all(); -} - -static inline void ipi_flush_tlb_mm(void *arg) -{ - struct mm_struct *mm = (struct mm_struct *)arg; - - local_flush_tlb_mm(mm); -} - -static inline void ipi_flush_tlb_page(void *arg) -{ - struct tlb_args *ta = (struct tlb_args *)arg; - - local_flush_tlb_page(ta->ta_vma, ta->ta_start); -} - -static inline void ipi_flush_tlb_kernel_page(void *arg) -{ - struct tlb_args *ta = (struct tlb_args *)arg; - - local_flush_tlb_kernel_page(ta->ta_start); -} - -static inline void ipi_flush_tlb_range(void *arg) -{ - struct tlb_args *ta = (struct tlb_args *)arg; - - local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end); -} - -static inline void ipi_flush_tlb_kernel_range(void *arg) -{ - struct tlb_args *ta = (struct tlb_args *)arg; - - local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end); -} - -void flush_tlb_all(void) -{ - if (tlb_ops_need_broadcast()) - on_each_cpu(ipi_flush_tlb_all, NULL, 1); - else - local_flush_tlb_all(); -} - -void flush_tlb_mm(struct mm_struct *mm) -{ - if (tlb_ops_need_broadcast()) - on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, mm_cpumask(mm)); - else - local_flush_tlb_mm(mm); -} - -void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) -{ - if (tlb_ops_need_broadcast()) { - struct tlb_args ta; - ta.ta_vma = vma; - ta.ta_start = uaddr; - on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, mm_cpumask(vma->vm_mm)); - } else - local_flush_tlb_page(vma, uaddr); -} - -void flush_tlb_kernel_page(unsigned long kaddr) -{ - if (tlb_ops_need_broadcast()) { - struct tlb_args ta; - ta.ta_start = kaddr; - on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1); - } else - local_flush_tlb_kernel_page(kaddr); -} - -void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - if (tlb_ops_need_broadcast()) { - struct tlb_args ta; - ta.ta_vma = vma; - ta.ta_start = start; - ta.ta_end = end; - on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, mm_cpumask(vma->vm_mm)); - } else - local_flush_tlb_range(vma, start, end); -} - -void flush_tlb_kernel_range(unsigned long start, unsigned long end) -{ - if (tlb_ops_need_broadcast()) { - struct tlb_args ta; - ta.ta_start = start; - ta.ta_end = end; - on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1); - } else - local_flush_tlb_kernel_range(start, end); -} diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c new file mode 100644 index 000000000000..7dcb35285be7 --- /dev/null +++ b/arch/arm/kernel/smp_tlb.c @@ -0,0 +1,139 @@ +/* + * linux/arch/arm/kernel/smp_tlb.c + * + * Copyright (C) 2002 ARM Limited, All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include + +#include +#include + +static void on_each_cpu_mask(void (*func)(void *), void *info, int wait, + const struct cpumask *mask) +{ + preempt_disable(); + + smp_call_function_many(mask, func, info, wait); + if (cpumask_test_cpu(smp_processor_id(), mask)) + func(info); + + preempt_enable(); +} + +/**********************************************************************/ + +/* + * TLB operations + */ +struct tlb_args { + struct vm_area_struct *ta_vma; + unsigned long ta_start; + unsigned long ta_end; +}; + +static inline void ipi_flush_tlb_all(void *ignored) +{ + local_flush_tlb_all(); +} + +static inline void ipi_flush_tlb_mm(void *arg) +{ + struct mm_struct *mm = (struct mm_struct *)arg; + + local_flush_tlb_mm(mm); +} + +static inline void ipi_flush_tlb_page(void *arg) +{ + struct tlb_args *ta = (struct tlb_args *)arg; + + local_flush_tlb_page(ta->ta_vma, ta->ta_start); +} + +static inline void ipi_flush_tlb_kernel_page(void *arg) +{ + struct tlb_args *ta = (struct tlb_args *)arg; + + local_flush_tlb_kernel_page(ta->ta_start); +} + +static inline void ipi_flush_tlb_range(void *arg) +{ + struct tlb_args *ta = (struct tlb_args *)arg; + + local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end); +} + +static inline void ipi_flush_tlb_kernel_range(void *arg) +{ + struct tlb_args *ta = (struct tlb_args *)arg; + + local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end); +} + +void flush_tlb_all(void) +{ + if (tlb_ops_need_broadcast()) + on_each_cpu(ipi_flush_tlb_all, NULL, 1); + else + local_flush_tlb_all(); +} + +void flush_tlb_mm(struct mm_struct *mm) +{ + if (tlb_ops_need_broadcast()) + on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, mm_cpumask(mm)); + else + local_flush_tlb_mm(mm); +} + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) +{ + if (tlb_ops_need_broadcast()) { + struct tlb_args ta; + ta.ta_vma = vma; + ta.ta_start = uaddr; + on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, mm_cpumask(vma->vm_mm)); + } else + local_flush_tlb_page(vma, uaddr); +} + +void flush_tlb_kernel_page(unsigned long kaddr) +{ + if (tlb_ops_need_broadcast()) { + struct tlb_args ta; + ta.ta_start = kaddr; + on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1); + } else + local_flush_tlb_kernel_page(kaddr); +} + +void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + if (tlb_ops_need_broadcast()) { + struct tlb_args ta; + ta.ta_vma = vma; + ta.ta_start = start; + ta.ta_end = end; + on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, mm_cpumask(vma->vm_mm)); + } else + local_flush_tlb_range(vma, start, end); +} + +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + if (tlb_ops_need_broadcast()) { + struct tlb_args ta; + ta.ta_start = start; + ta.ta_end = end; + on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1); + } else + local_flush_tlb_kernel_range(start, end); +} + -- cgit v1.2.1 From faabfa0816916b0a7cfc93f6a9be382830658c80 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 20 Dec 2010 16:58:19 +0000 Subject: ARM: SMP: ensure frame pointer is reinitialized for soft-CPU hotplug When we soft-CPU hotplug a CPU, we reset the stack pointer and jump back to start_secondary(). This allows us to restart as if the CPU was actually reset. However, we weren't resetting the frame pointer, which could cause problems with backtracing. Reset the frame pointer to zero (which means no parent frame) just like the early assembly code also does. Signed-off-by: Russell King --- arch/arm/kernel/smp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 6d0c66bc434d..5341b0b19701 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -286,6 +286,7 @@ void __ref cpu_die(void) * to be repeated to undo the effects of taking the CPU offline. */ __asm__("mov sp, %0\n" + " mov fp, #0\n" " b secondary_start_kernel" : : "r" (task_stack_page(current) + THREAD_SIZE - 8)); -- cgit v1.2.1 From 26bbf0b57a0848932f725076bcb1245ca696e8d3 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 21 Nov 2010 11:30:36 +0000 Subject: ARM: pgtable: remove L2 cache flushes for SMP page table bring-up The MMU is always configured to read page tables from the L2 cache so there's little point flushing them out of the L2 cache back to RAM. Remove these flushes. Signed-off-by: Russell King --- arch/arm/kernel/smp.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 8c1959590252..46313805f430 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -85,7 +85,6 @@ static inline void identity_mapping_add(pgd_t *pgd, unsigned long start, pmd[1] = __pmd(addr | prot); addr += SECTION_SIZE; flush_pmd_entry(pmd); - outer_clean_range(__pa(pmd), __pa(pmd + 1)); } } @@ -100,7 +99,6 @@ static inline void identity_mapping_del(pgd_t *pgd, unsigned long start, pmd[0] = __pmd(0); pmd[1] = __pmd(0); clean_pmd_entry(pmd); - outer_clean_range(__pa(pmd), __pa(pmd + 1)); } } -- cgit v1.2.1 From 614dd0585f376a25c638abbed9c5fbd21d7baece Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 21 Nov 2010 11:41:57 +0000 Subject: ARM: pgtable: collect up identity mapping functions We have two places where we create identity mappings - one when we bring secondary CPUs online, and one where we setup some mappings for soft- reboot. Combine these two into a single implementation. Also collect the identity mapping deletion function. Signed-off-by: Russell King --- arch/arm/kernel/smp.c | 34 ---------------------------------- 1 file changed, 34 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 46313805f430..73cef403352a 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -68,40 +68,6 @@ enum ipi_msg_type { IPI_CPU_STOP, }; -static inline void identity_mapping_add(pgd_t *pgd, unsigned long start, - unsigned long end) -{ - unsigned long addr, prot; - pmd_t *pmd; - - prot = PMD_TYPE_SECT | PMD_SECT_AP_WRITE; - if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale()) - prot |= PMD_BIT4; - - for (addr = start & PGDIR_MASK; addr < end;) { - pmd = pmd_offset(pgd + pgd_index(addr), addr); - pmd[0] = __pmd(addr | prot); - addr += SECTION_SIZE; - pmd[1] = __pmd(addr | prot); - addr += SECTION_SIZE; - flush_pmd_entry(pmd); - } -} - -static inline void identity_mapping_del(pgd_t *pgd, unsigned long start, - unsigned long end) -{ - unsigned long addr; - pmd_t *pmd; - - for (addr = start & PGDIR_MASK; addr < end; addr += PGDIR_SIZE) { - pmd = pmd_offset(pgd + pgd_index(addr), addr); - pmd[0] = __pmd(0); - pmd[1] = __pmd(0); - clean_pmd_entry(pmd); - } -} - int __cpuinit __cpu_up(unsigned int cpu) { struct cpuinfo_arm *ci = &per_cpu(cpu_data, cpu); -- cgit v1.2.1 From 112f38a4a31668eb6a7d91d128296a26afdf7c4b Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 15 Dec 2010 19:23:07 +0000 Subject: ARM: sched_clock: provide common infrastructure for sched_clock() Provide common sched_clock() infrastructure for platforms to use to create a 64-bit ns based sched_clock() implementation from a counter running at a non-variable clock rate. This implementation is based upon maintaining an epoch for the counter and an epoch for the nanosecond time. When we desire a sched_clock() time, we calculate the number of counter ticks since the last epoch update, convert this to nanoseconds and add to the epoch nanoseconds. We regularly refresh these epochs within the counter wrap interval. We perform a similar calculation as above, and store the new epochs. We read and write the epochs in such a way that sched_clock() can easily (and locklessly) detect when an update is in progress, and repeat the loading of these constants when they're known not to be stable. The one caveat is that sched_clock() is not called in the middle of an update. We achieve that by disabling IRQs. Finally, if the clock rate is known at compile time, the counter to ns conversion factors can be specified, allowing sched_clock() to be tightly optimized. We ensure that these factors are correct by providing an initialization function which performs a run-time check. Acked-by: Peter Zijlstra Tested-by: Santosh Shilimkar Tested-by: Will Deacon Tested-by: Mikael Pettersson Tested-by: Eric Miao Tested-by: Olof Johansson Tested-by: Jamie Iles Reviewed-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/kernel/Makefile | 1 + arch/arm/kernel/sched_clock.c | 69 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 arch/arm/kernel/sched_clock.c (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 679851a9f589..fd3ec49bfba6 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_MODULES) += armksyms.o module.o obj-$(CONFIG_ARTHUR) += arthur.o obj-$(CONFIG_ISA_DMA) += dma-isa.o obj-$(CONFIG_PCI) += bios32.o isa.o +obj-$(CONFIG_HAVE_SCHED_CLOCK) += sched_clock.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c new file mode 100644 index 000000000000..2cdcc9287c74 --- /dev/null +++ b/arch/arm/kernel/sched_clock.c @@ -0,0 +1,69 @@ +/* + * sched_clock.c: support for extending counters to full 64-bit ns counter + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include + +#include + +static void sched_clock_poll(unsigned long wrap_ticks); +static DEFINE_TIMER(sched_clock_timer, sched_clock_poll, 0, 0); +static void (*sched_clock_update_fn)(void); + +static void sched_clock_poll(unsigned long wrap_ticks) +{ + mod_timer(&sched_clock_timer, round_jiffies(jiffies + wrap_ticks)); + sched_clock_update_fn(); +} + +void __init init_sched_clock(struct clock_data *cd, void (*update)(void), + unsigned int clock_bits, unsigned long rate) +{ + unsigned long r, w; + u64 res, wrap; + char r_unit; + + sched_clock_update_fn = update; + + /* calculate the mult/shift to convert counter ticks to ns. */ + clocks_calc_mult_shift(&cd->mult, &cd->shift, rate, NSEC_PER_SEC, 60); + + r = rate; + if (r >= 4000000) { + r /= 1000000; + r_unit = 'M'; + } else { + r /= 1000; + r_unit = 'k'; + } + + /* calculate how many ns until we wrap */ + wrap = cyc_to_ns((1ULL << clock_bits) - 1, cd->mult, cd->shift); + do_div(wrap, NSEC_PER_MSEC); + w = wrap; + + /* calculate the ns resolution of this counter */ + res = cyc_to_ns(1ULL, cd->mult, cd->shift); + pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lums\n", + clock_bits, r, r_unit, res, w); + + /* + * Start the timer to keep sched_clock() properly updated and + * sets the initial epoch. + */ + sched_clock_timer.data = msecs_to_jiffies(w - (w / 10)); + sched_clock_poll(sched_clock_timer.data); + + /* + * Ensure that sched_clock() starts off at 0ns + */ + cd->epoch_ns = 0; +} -- cgit v1.2.1 From 521086412ee423fbdfc7da81f257239c43f707b4 Mon Sep 17 00:00:00 2001 From: eric miao Date: Mon, 13 Dec 2010 09:42:34 +0100 Subject: ARM: 6532/1: Allow machine to specify it's own IRQ handlers at run-time Normally different ARM platform has different way to decode the IRQ hardware status and demultiplex to the corresponding IRQ handler. This is highly optimized by macro irq_handler in entry-armv.S, and each machine defines their own macro to decode the IRQ number. However, this prevents multiple machine classes to be built into a single kernel. By allowing each machine to specify thier own handler, and making function pointer 'handle_arch_irq' to point to it at run time, this can be solved. And introduce CONFIG_MULTI_IRQ_HANDLER to allow both solutions to work. Comparing with the highly optimized macro of irq_handler, the new function must be written with care not to lose too much performance. And the IPI stuff on SMP is expected to move to the provided arch IRQ handler as well. The assembly code to invoke handle_arch_irq is optimized by Russell King. Signed-off-by: Eric Miao Acked-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/kernel/entry-armv.S | 17 +++++++++++++++-- arch/arm/kernel/setup.c | 3 +++ 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 35f3f20d6731..caa6c396ec78 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -30,6 +30,14 @@ * Interrupt handling. Preserves r7, r8, r9 */ .macro irq_handler +#ifdef CONFIG_MULTI_IRQ_HANDLER + ldr r5, =handle_arch_irq + mov r0, sp + ldr r5, [r5] + adr lr, BSYM(9997f) + teq r5, #0 + movne pc, r5 +#endif get_irqnr_preamble r5, lr 1: get_irqnr_and_base r0, r6, r5, lr movne r1, sp @@ -58,9 +66,8 @@ adrne lr, BSYM(1b) bne do_local_timer #endif -9997: #endif - +9997: .endm #ifdef CONFIG_KPROBES @@ -1245,3 +1252,9 @@ cr_alignment: .space 4 cr_no_alignment: .space 4 + +#ifdef CONFIG_MULTI_IRQ_HANDLER + .globl handle_arch_irq +handle_arch_irq: + .space 4 +#endif diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 8075e592f902..0826f36330c4 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -875,6 +875,9 @@ void __init setup_arch(char **cmdline_p) init_arch_irq = mdesc->init_irq; system_timer = mdesc->timer; init_machine = mdesc->init_machine; +#ifdef CONFIG_MULTI_IRQ_HANDLER + handle_arch_irq = mdesc->handle_irq; +#endif #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) -- cgit v1.2.1 From cd544ce754ac2432ffcc0626ea802d2b30876b50 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 22 Dec 2010 13:20:08 +0100 Subject: ARM: 6538/1: Subarch IRQ handler macros V3 Per subarch interrupt handler macros V3. This patch breaks out code from the irq_handler macro into arch_irq_handler and arch_irq_handler_default. The macros are put in the header file "entry-macro-multi.S" The arch_irq_handler_default macro is designed to be used by irq_handler in entry-armv.S while arch_irq_handler is suitable for per-subarch use. Signed-off-by: Magnus Damm Signed-off-by: Russell King --- arch/arm/kernel/entry-armv.S | 31 ++----------------------------- 1 file changed, 2 insertions(+), 29 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index caa6c396ec78..a866dce611d0 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -25,6 +25,7 @@ #include #include "entry-header.S" +#include /* * Interrupt handling. Preserves r7, r8, r9 @@ -38,35 +39,7 @@ teq r5, #0 movne pc, r5 #endif - get_irqnr_preamble r5, lr -1: get_irqnr_and_base r0, r6, r5, lr - movne r1, sp - @ - @ routine called with r0 = irq number, r1 = struct pt_regs * - @ - adrne lr, BSYM(1b) - bne asm_do_IRQ - -#ifdef CONFIG_SMP - /* - * XXX - * - * this macro assumes that irqstat (r6) and base (r5) are - * preserved from get_irqnr_and_base above - */ - ALT_SMP(test_for_ipi r0, r6, r5, lr) - ALT_UP_B(9997f) - movne r0, sp - adrne lr, BSYM(1b) - bne do_IPI - -#ifdef CONFIG_LOCAL_TIMERS - test_for_ltirq r0, r6, r5, lr - movne r0, sp - adrne lr, BSYM(1b) - bne do_local_timer -#endif -#endif + arch_irq_handler_default 9997: .endm -- cgit v1.2.1 From 8ff1443c5439ecee7472b80cf12ecfc337e6ee98 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 20 Dec 2010 10:18:36 +0000 Subject: ARM: simplify early machine init hooks Rather than storing each machine init hook separately, store a pointer to the machine description record and dereference this instead. This pointer is only available while the init sections are present, which is not a problem as we only use it from init code. Signed-off-by: Russell King --- arch/arm/kernel/irq.c | 7 +++---- arch/arm/kernel/setup.c | 15 ++++----------- arch/arm/kernel/time.c | 4 +++- 3 files changed, 10 insertions(+), 16 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 36ad3be4692a..a591971dbf7b 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -37,6 +37,7 @@ #include #include +#include #include #include @@ -47,8 +48,6 @@ #define irq_finish(irq) do { } while (0) #endif -unsigned int arch_nr_irqs; -void (*init_arch_irq)(void) __initdata = NULL; unsigned long irq_err_count; int show_interrupts(struct seq_file *p, void *v) @@ -154,13 +153,13 @@ void set_irq_flags(unsigned int irq, unsigned int iflags) void __init init_IRQ(void) { - init_arch_irq(); + machine_desc->init_irq(); } #ifdef CONFIG_SPARSE_IRQ int __init arch_probe_nr_irqs(void) { - nr_irqs = arch_nr_irqs ? arch_nr_irqs : NR_IRQS; + nr_irqs = machine_desc->nr_irqs ? machine_desc->nr_irqs : NR_IRQS; return nr_irqs; } #endif diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 0826f36330c4..e53132eee27a 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -126,6 +126,7 @@ EXPORT_SYMBOL(elf_platform); static const char *cpu_name; static const char *machine_name; static char __initdata cmd_line[COMMAND_LINE_SIZE]; +struct machine_desc *machine_desc __initdata; static char default_command_line[COMMAND_LINE_SIZE] __initdata = CONFIG_CMDLINE; static union { char c[4]; unsigned long l; } endian_test __initdata = { { 'l', '?', '?', 'b' } }; @@ -708,13 +709,11 @@ static struct init_tags { { 0, ATAG_NONE } }; -static void (*init_machine)(void) __initdata; - static int __init customize_machine(void) { /* customizes platform devices, or adds new ones */ - if (init_machine) - init_machine(); + if (machine_desc->init_machine) + machine_desc->init_machine(); return 0; } arch_initcall(customize_machine); @@ -809,6 +808,7 @@ void __init setup_arch(char **cmdline_p) setup_processor(); mdesc = setup_machine(machine_arch_type); + machine_desc = mdesc; machine_name = mdesc->name; if (mdesc->soft_reboot) @@ -868,13 +868,6 @@ void __init setup_arch(char **cmdline_p) cpu_init(); tcm_init(); - /* - * Set up various architecture-specific pointers - */ - arch_nr_irqs = mdesc->nr_irqs; - init_arch_irq = mdesc->init_irq; - system_timer = mdesc->timer; - init_machine = mdesc->init_machine; #ifdef CONFIG_MULTI_IRQ_HANDLER handle_arch_irq = mdesc->handle_irq; #endif diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index 38c261f9951c..f1e2eb19a67d 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -30,12 +30,13 @@ #include #include #include +#include #include /* * Our system timer. */ -struct sys_timer *system_timer; +static struct sys_timer *system_timer; #if defined(CONFIG_RTC_DRV_CMOS) || defined(CONFIG_RTC_DRV_CMOS_MODULE) /* this needs a better home */ @@ -160,6 +161,7 @@ device_initcall(timer_init_sysfs); void __init time_init(void) { + system_timer = machine_desc->timer; system_timer->init(); } -- cgit v1.2.1 From dec12e62c03d26bbc7a142f067215a3a43cce7d0 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 16 Dec 2010 13:49:34 +0000 Subject: ARM: provide an early platform initialization hook This allows platforms to hook into the initialization early to setup things like scheduler clocks, etc. Signed-off-by: Russell King --- arch/arm/kernel/setup.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index e53132eee27a..3455ad33de4c 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -880,6 +880,9 @@ void __init setup_arch(char **cmdline_p) #endif #endif early_trap_init(); + + if (mdesc->init_early) + mdesc->init_early(); } -- cgit v1.2.1