diff options
author | Rob Barnes <robbarnes@google.com> | 2022-11-15 01:46:49 +0000 |
---|---|---|
committer | Chromeos LUCI <chromeos-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2023-05-01 21:29:22 +0000 |
commit | f2e7fd5a61404ff60652fe6dfc9a4e0d1cdad5a9 (patch) | |
tree | e1790e86d150b2234e5ec4bbc2f72f84c87d6f34 | |
parent | 3567580eb767809e3c0791fb0aba9346752b55ae (diff) | |
download | chrome-ec-f2e7fd5a61404ff60652fe6dfc9a4e0d1cdad5a9.tar.gz |
system: Implement system safe mode
Basic implementation of system safe mode recovery.
System safe mode is a recovery mode that may be started after
a fault/panic. It allows the AP to collect info about the fault
and system state before the system resets
This CL only includes support for legacy CrOS EC
BUG=b:249128225
BRANCH=None
TEST=Manually tested on octopus
Orig-Change-Id: I15139bb082011485b54e4ca7813839940bf5401a
Orig-Signed-off-by: Rob Barnes <robbarnes@google.com>
Orig-Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/ec/+/4029604
Orig-Reviewed-by: Daisuke Nojiri <dnojiri@chromium.org>
Change-Id: I8abd563b82b611dafbc9fe1fda05ac6ade2b7c91
Signed-off-by: Rob Barnes <robbarnes@google.com>
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/ec/+/4320966
Reviewed-by: Boris Mittelberg <bmbm@google.com>
-rw-r--r-- | common/build.mk | 1 | ||||
-rw-r--r-- | common/host_command.c | 6 | ||||
-rw-r--r-- | common/system_safe_mode.c | 127 | ||||
-rw-r--r-- | core/cortex-m/cpu.c | 38 | ||||
-rw-r--r-- | core/cortex-m/cpu.h | 5 | ||||
-rw-r--r-- | core/cortex-m/panic.c | 32 | ||||
-rw-r--r-- | include/config.h | 7 | ||||
-rw-r--r-- | include/panic.h | 4 | ||||
-rw-r--r-- | include/system_safe_mode.h | 68 |
9 files changed, 288 insertions, 0 deletions
diff --git a/common/build.mk b/common/build.mk index 32e61123cb..f91d020f54 100644 --- a/common/build.mk +++ b/common/build.mk @@ -151,6 +151,7 @@ common-$(HAS_TASK_KEYSCAN)+=keyboard_scan.o common-$(HAS_TASK_LIGHTBAR)+=lb_common.o lightbar.o common-$(HAS_TASK_MOTIONSENSE)+=motion_sense.o common-$(HAS_TASK_TPM)+=tpm_registers.o +common-$(CONFIG_SYSTEM_SAFE_MODE)+=system_safe_mode.o ifneq ($(CONFIG_COMMON_RUNTIME),) common-$(CONFIG_MALLOC)+=shmalloc.o diff --git a/common/host_command.c b/common/host_command.c index cad9fd94a9..38113f021e 100644 --- a/common/host_command.c +++ b/common/host_command.c @@ -14,6 +14,7 @@ #include "lpc.h" #include "shared_mem.h" #include "system.h" +#include "system_safe_mode.h" #include "task.h" #include "timer.h" #include "util.h" @@ -403,6 +404,11 @@ static const struct host_command *find_host_command(int command) #else const struct host_command *cmd; + if (IS_ENABLED(CONFIG_SYSTEM_SAFE_MODE) && system_is_in_safe_mode()) { + if (!command_is_allowed_in_safe_mode(command)) + return NULL; + } + for (cmd = __hcmds; cmd < __hcmds_end; cmd++) { if (command == cmd->command) return cmd; diff --git a/common/system_safe_mode.c b/common/system_safe_mode.c new file mode 100644 index 0000000000..dbad287463 --- /dev/null +++ b/common/system_safe_mode.c @@ -0,0 +1,127 @@ +/* Copyright 2022 The ChromiumOS Authors + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "common.h" +#include "console.h" +#include "cpu.h" +#include "ec_commands.h" +#include "hooks.h" +#include "panic.h" +#include "stdbool.h" +#include "stddef.h" +#include "system.h" +#include "system_safe_mode.h" +#include "task.h" +#include "timer.h" +#include "watchdog.h" + +static bool in_safe_mode; + +static const int safe_mode_allowed_hostcmds[] = { + EC_CMD_GET_PROTOCOL_INFO, + EC_CMD_GET_VERSION, EC_CMD_CONSOLE_SNAPSHOT, + EC_CMD_CONSOLE_READ, EC_CMD_GET_NEXT_EVENT, + EC_CMD_GET_UPTIME_INFO +}; + +#ifndef CONFIG_ZEPHYR + +/* TODO: This function can be generalized for zephyr and legacy EC by + * improving ec_tasks support in zephyr. + */ +static bool task_is_safe_mode_critical(task_id_t task_id) +{ + const task_id_t safe_mode_critical_tasks[] = { + TASK_ID_HOOKS, + TASK_ID_IDLE, + TASK_ID_HOSTCMD, + }; + for (int i = 0; i < ARRAY_SIZE(safe_mode_critical_tasks); i++) + if (safe_mode_critical_tasks[i] == task_id) + return true; + return false; +} + +bool current_task_is_safe_mode_critical(void) +{ + return task_is_safe_mode_critical(task_get_current()); +} + +int disable_non_safe_mode_critical_tasks(void) +{ + for (task_id_t task_id = 0; task_id < TASK_ID_COUNT; task_id++) { + if (!task_is_safe_mode_critical(task_id)) { + task_disable_task(task_id); + } + } + return EC_SUCCESS; +} + +#endif /* CONFIG_ZEPHYR */ + +void handle_system_safe_mode_timeout(void) +{ + panic_printf("Safe mode timeout after %d msec\n", + CONFIG_SYSTEM_SAFE_MODE_TIMEOUT_MSEC); + panic_reboot(); +} +DECLARE_DEFERRED(handle_system_safe_mode_timeout); + +__overridable int schedule_system_safe_mode_timeout(void) +{ + hook_call_deferred(&handle_system_safe_mode_timeout_data, + CONFIG_SYSTEM_SAFE_MODE_TIMEOUT_MSEC * MSEC); + return EC_SUCCESS; +} + +bool system_is_in_safe_mode(void) +{ + return !!in_safe_mode; +} + +bool command_is_allowed_in_safe_mode(int command) +{ + for (int i = 0; i < ARRAY_SIZE(safe_mode_allowed_hostcmds); i++) + if (command == safe_mode_allowed_hostcmds[i]) + return true; + return false; +} + +int start_system_safe_mode(void) +{ + if (!system_is_in_rw()) { + panic_printf("Can only enter safe mode from RW image\n"); + return EC_ERROR_INVAL; + } + + if (system_is_in_safe_mode()) { + panic_printf("Already in system safe mode"); + return EC_ERROR_INVAL; + } + + if (current_task_is_safe_mode_critical()) { + /* TODO: Restart critical tasks */ + panic_printf( + "Fault in critical task, cannot enter system safe mode\n"); + return EC_ERROR_INVAL; + } + + disable_non_safe_mode_critical_tasks(); + + schedule_system_safe_mode_timeout(); + + in_safe_mode = true; + + panic_printf("\nStarting system safe mode\n"); + + return EC_SUCCESS; +} + +#ifdef TEST_BUILD +void set_system_safe_mode(bool mode) +{ + in_safe_mode = mode; +} +#endif diff --git a/core/cortex-m/cpu.c b/core/cortex-m/cpu.c index 4ed61f44e0..1bc8d75ebc 100644 --- a/core/cortex-m/cpu.c +++ b/core/cortex-m/cpu.c @@ -9,6 +9,10 @@ #include "cpu.h" #include "hooks.h" +#define STACK_IDX_REG_LR 5 +#define STACK_IDX_REG_PC 6 +#define STACK_IDX_REG_PSR 7 + void cpu_init(void) { /* Catch divide by 0 and unaligned access */ @@ -19,6 +23,40 @@ void cpu_init(void) CPU_NVIC_SHCSR_BUSFAULTENA | CPU_NVIC_SHCSR_USGFAULTENA; } +void cpu_return_from_exception_msp(void (*func)(void)) +{ + uint32_t *msp; + + __asm__ volatile("mrs %0, msp" : "=r"(msp)); + + msp[STACK_IDX_REG_LR] = 0; /* Will never return */ + msp[STACK_IDX_REG_PC] = (uint32_t)func; /* Return to this function */ + msp[STACK_IDX_REG_PSR] = (1 << 24); /* Just set thumb mode */ + + /* Return from exception using main stack */ + __asm__ volatile("bx %0" : : "r"(0xFFFFFFF9)); + + /* should not reach here */ + __builtin_unreachable(); +} + +void cpu_return_from_exception_psp(void (*func)(void)) +{ + uint32_t *psp; + + __asm__ volatile("mrs %0, psp" : "=r"(psp)); + + psp[STACK_IDX_REG_LR] = 0; /* Will never return */ + psp[STACK_IDX_REG_PC] = (uint32_t)func; /* Return to this function */ + psp[STACK_IDX_REG_PSR] = (1 << 24); /* Just set thumb mode */ + + /* Return from exception using main stack */ + __asm__ volatile("bx %0" : : "r"(0xFFFFFFFD)); + + /* should not reach here */ + __builtin_unreachable(); +} + #ifdef CONFIG_ARMV7M_CACHE static void cpu_invalidate_icache(void) { diff --git a/core/cortex-m/cpu.h b/core/cortex-m/cpu.h index a6029e2e7e..4ff788dbf0 100644 --- a/core/cortex-m/cpu.h +++ b/core/cortex-m/cpu.h @@ -71,4 +71,9 @@ void cpu_invalidate_dcache(void); /* Clean and Invalidate the D-cache to the Point of Coherency */ void cpu_clean_invalidate_dcache(void); +/* Return to specified function from exception handler using main stack. */ +void cpu_return_from_exception_msp(void (*func)(void)); +/* Return to specified function from exception handler using process stack. */ +void cpu_return_from_exception_psp(void (*func)(void)); + #endif /* __CROS_EC_CPU_H */ diff --git a/core/cortex-m/panic.c b/core/cortex-m/panic.c index 9820214ae7..5722e48910 100644 --- a/core/cortex-m/panic.c +++ b/core/cortex-m/panic.c @@ -11,6 +11,7 @@ #include "panic-internal.h" #include "printf.h" #include "system.h" +#include "system_safe_mode.h" #include "task.h" #include "timer.h" #include "uart.h" @@ -303,6 +304,16 @@ void panic_data_print(const struct panic_data *pdata) #endif } +/* This is just a placeholder function for returning from exception. + * It's not expected to actually be executed. + */ +static void exception_return_placeholder(void) +{ + panic_printf("Unexpected return from exception\n"); + panic_reboot(); + __builtin_unreachable(); +} + void __keep report_panic(void) { /* @@ -352,6 +363,27 @@ void __keep report_panic(void) * exception happened in a handler's context. */ #endif + + /* Start safe mode if possible */ + if (IS_ENABLED(CONFIG_SYSTEM_SAFE_MODE)) { + /* TODO: check for nested exceptions */ + if (start_system_safe_mode() == EC_SUCCESS) { + /* Return from exception on process stack. + * We should not actually land in + * exception_return_placeholder function. Instead the + * scheduler should interrupt and schedule + * a different task since the current task has + * been disabled. + */ + pdata->flags |= PANIC_DATA_FLAG_SAFE_MODE_STARTED; + cpu_return_from_exception_psp( + exception_return_placeholder); + + __builtin_unreachable(); + } + pdata->flags |= PANIC_DATA_FLAG_SAFE_MODE_FAIL_PRECONDITIONS; + } + panic_reboot(); } diff --git a/include/config.h b/include/config.h index f153145e30..753a97e6d5 100644 --- a/include/config.h +++ b/include/config.h @@ -1159,6 +1159,13 @@ #undef CONFIG_CHIP_PANIC_BACKUP /* + * When defined, it enables system safe mode. System safe mode allows the AP to + * capture the EC state after a panic. + */ +#undef CONFIG_SYSTEM_SAFE_MODE +#define CONFIG_SYSTEM_SAFE_MODE_TIMEOUT_MSEC 2000 + +/* * Provide the default GPIO abstraction layer. * You want this unless you are doing a really tiny firmware. */ diff --git a/include/panic.h b/include/panic.h index 80f69a85cc..0d3a83a783 100644 --- a/include/panic.h +++ b/include/panic.h @@ -82,6 +82,10 @@ enum panic_arch { #define PANIC_DATA_FLAG_OLD_HOSTEVENT (1 << 3) /* The data was truncated to fit panic info host cmd */ #define PANIC_DATA_FLAG_TRUNCATED (1 << 4) +/* System safe mode was started after a panic */ +#define PANIC_DATA_FLAG_SAFE_MODE_STARTED (1 << 5) +/* System safe mode failed to start */ +#define PANIC_DATA_FLAG_SAFE_MODE_FAIL_PRECONDITIONS (1 << 6) /** * Write a string to the panic reporting device diff --git a/include/system_safe_mode.h b/include/system_safe_mode.h new file mode 100644 index 0000000000..b7b31d9707 --- /dev/null +++ b/include/system_safe_mode.h @@ -0,0 +1,68 @@ +/* Copyright 2022 The ChromiumOS Authors + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef __CROS_EC_SYSTEM_SAFE_MODE_H +#define __CROS_EC_SYSTEM_SAFE_MODE_H + +#include "stdbool.h" + +/** + * Checks if running in system safe mode + * + * @return True if system is running in system safe mode + */ +bool system_is_in_safe_mode(void); + +/** + * Checks if command is allowed in system safe mode + * + * @return True if command is allowed in system safe mode + */ +bool command_is_allowed_in_safe_mode(int command); + +/** + * Checks if a task is critical for system safe mode + * + * @return True if task is safe mode critical + */ +bool is_task_safe_mode_critical(task_id_t task_id); + +/** + * Disables tasks that are not critical for safe mode + * + * @return EC_SUCCESS or EC_xxx on error + */ +int disable_non_safe_mode_critical_tasks(void); + +/** + * Start system safe mode. + * + * System safe mode can only be started after a panic in RW image. + * It will only run briefly so the AP can capture EC state. + * + * @return EC_SUCCESS or EC_xxx on error + */ +int start_system_safe_mode(void); + +/** + * Schedules safe mode timeout. + * + * @return EC_SUCCESS or EC_xxx on error + */ +int schedule_system_safe_mode_timeout(void); + +/** + * This handler is called when safe mode times out. + */ +void handle_system_safe_mode_timeout(void); + +#ifdef TEST_BUILD +/** + * Directly set safe mode flag. Only used in tests. + */ +void set_system_safe_mode(bool mode); +#endif + +#endif /* __CROS_EC_SYSTEM_SAFE_MODE_H */ |