diff options
author | Anita Zhang <the.anitazha@gmail.com> | 2021-03-26 03:01:38 -0700 |
---|---|---|
committer | Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl> | 2021-03-30 14:44:09 +0200 |
commit | 37a7e15968b5dc0d2e3c2160586aeb6b4cd6c90b (patch) | |
tree | eb85353ed5076b3edfde09b048180e75cc40fa10 | |
parent | b240c08d0970b0e90f204d54eec653f3c379fd60 (diff) | |
download | systemd-37a7e15968b5dc0d2e3c2160586aeb6b4cd6c90b.tar.gz |
oomd: make it more clear when a kill happens
Improve the logging to only print if systemd-oomd killed something. And
also print which cgroup was targeted.
Demote general swap above/pressure above messages to debug.
[zjs: fix some issuelets found in review]
-rw-r--r-- | src/oom/oomd-manager.c | 38 | ||||
-rw-r--r-- | src/oom/oomd-util.c | 56 | ||||
-rw-r--r-- | src/oom/oomd-util.h | 7 |
3 files changed, 77 insertions, 24 deletions
diff --git a/src/oom/oomd-manager.c b/src/oom/oomd-manager.c index 345f8a77cf..c3e84aadde 100644 --- a/src/oom/oomd-manager.c +++ b/src/oom/oomd-manager.c @@ -378,10 +378,18 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo OomdCGroupContext *t; SET_FOREACH(t, targets) { - log_notice("Memory pressure for %s is greater than %lu for more than %"PRIu64" seconds and there was reclaim activity", - t->path, LOAD_INT(t->mem_pressure_limit), m->default_mem_pressure_duration_usec / USEC_PER_SEC); - - r = oomd_kill_by_pgscan_rate(m->monitored_mem_pressure_cgroup_contexts_candidates, t->path, m->dry_run); + _cleanup_free_ char *selected = NULL; + char ts[FORMAT_TIMESPAN_MAX]; + + log_debug("Memory pressure for %s is %lu.%02lu%% > %lu.%02lu%% for > %s with reclaim activity", + t->path, + LOAD_INT(t->memory_pressure.avg10), LOAD_FRAC(t->memory_pressure.avg10), + LOAD_INT(t->mem_pressure_limit), LOAD_FRAC(t->mem_pressure_limit), + format_timespan(ts, sizeof ts, + m->default_mem_pressure_duration_usec, + USEC_PER_SEC)); + + r = oomd_kill_by_pgscan_rate(m->monitored_mem_pressure_cgroup_contexts_candidates, t->path, m->dry_run, &selected); if (r == -ENOMEM) return log_oom(); if (r < 0) @@ -389,6 +397,15 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo else { /* Don't act on all the high pressure cgroups at once; return as soon as we kill one */ m->post_action_delay_start = usec_now; + if (selected) + log_notice("Killed %s due to memory pressure for %s being %lu.%02lu%% > %lu.%02lu%%" + " for > %s with reclaim activity", + selected, t->path, + LOAD_INT(t->memory_pressure.avg10), LOAD_FRAC(t->memory_pressure.avg10), + LOAD_INT(t->mem_pressure_limit), LOAD_FRAC(t->mem_pressure_limit), + format_timespan(ts, sizeof ts, + m->default_mem_pressure_duration_usec, + USEC_PER_SEC)); return 0; } } @@ -397,9 +414,11 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo if (oomd_swap_free_below(&m->system_context, 10000 - m->swap_used_limit_permyriad)) { _cleanup_hashmap_free_ Hashmap *candidates = NULL; + _cleanup_free_ char *selected = NULL; - log_notice("Swap used (%"PRIu64") / total (%"PRIu64") is more than " PERMYRIAD_AS_PERCENT_FORMAT_STR, - m->system_context.swap_used, m->system_context.swap_total, PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad)); + log_debug("Swap used (%"PRIu64") / total (%"PRIu64") is more than " PERMYRIAD_AS_PERCENT_FORMAT_STR, + m->system_context.swap_used, m->system_context.swap_total, + PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad)); r = get_monitored_cgroup_contexts_candidates(m->monitored_swap_cgroup_contexts, &candidates); if (r == -ENOMEM) @@ -407,13 +426,18 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo if (r < 0) log_debug_errno(r, "Failed to get monitored swap cgroup candidates, ignoring: %m"); - r = oomd_kill_by_swap_usage(candidates, m->dry_run); + r = oomd_kill_by_swap_usage(candidates, m->dry_run, &selected); if (r == -ENOMEM) return log_oom(); if (r < 0) log_notice_errno(r, "Failed to kill any cgroup(s) based on swap: %m"); else { m->post_action_delay_start = usec_now; + if (selected) + log_notice("Killed %s due to swap used (%"PRIu64") / total (%"PRIu64") being more than " + PERMYRIAD_AS_PERCENT_FORMAT_STR, + selected, m->system_context.swap_used, m->system_context.swap_total, + PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad)); return 0; } } diff --git a/src/oom/oomd-util.c b/src/oom/oomd-util.c index 7860f2154d..da994848ae 100644 --- a/src/oom/oomd-util.c +++ b/src/oom/oomd-util.c @@ -208,35 +208,50 @@ int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run) { return set_size(pids_killed) != 0; } -int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run) { +int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run, char **ret_selected) { _cleanup_free_ OomdCGroupContext **sorted = NULL; - int r; + int r, ret = 0; assert(h); + assert(ret_selected); r = oomd_sort_cgroup_contexts(h, compare_pgscan_rate_and_memory_usage, prefix, &sorted); if (r < 0) return r; for (int i = 0; i < r; i++) { - /* Skip cgroups with no reclaim and memory usage; it won't alleviate pressure. */ - /* Don't break since there might be "avoid" cgroups at the end. */ + /* Skip cgroups with no reclaim and memory usage; it won't alleviate pressure. + * Continue since there might be "avoid" cgroups at the end. */ if (sorted[i]->pgscan == 0 && sorted[i]->current_memory_usage == 0) continue; r = oomd_cgroup_kill(sorted[i]->path, true, dry_run); - if (r > 0 || r == -ENOMEM) - break; + if (r == 0) + continue; /* We didn't find anything to kill */ + if (r == -ENOMEM) + return r; /* Treat oom as a hard error */ + if (r < 0) { + if (ret == 0) + ret = r; + continue; /* Try to find something else to kill */ + } + + char *selected = strdup(sorted[i]->path); + if (!selected) + return -ENOMEM; + *ret_selected = selected; + return 1; } - return r; + return ret; } -int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run) { +int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run, char **ret_selected) { _cleanup_free_ OomdCGroupContext **sorted = NULL; - int r; + int r, ret = 0; assert(h); + assert(ret_selected); r = oomd_sort_cgroup_contexts(h, compare_swap_usage, NULL, &sorted); if (r < 0) @@ -245,17 +260,30 @@ int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run) { /* Try to kill cgroups with non-zero swap usage until we either succeed in * killing or we get to a cgroup with no swap usage. */ for (int i = 0; i < r; i++) { - /* Skip over cgroups with no resource usage. Don't break since there might be "avoid" - * cgroups at the end. */ + /* Skip over cgroups with no resource usage. + * Continue break since there might be "avoid" cgroups at the end. */ if (sorted[i]->swap_usage == 0) continue; r = oomd_cgroup_kill(sorted[i]->path, true, dry_run); - if (r > 0 || r == -ENOMEM) - break; + if (r == 0) + continue; /* We didn't find anything to kill */ + if (r == -ENOMEM) + return r; /* Treat oom as a hard error */ + if (r < 0) { + if (ret == 0) + ret = r; + continue; /* Try to find something else to kill */ + } + + char *selected = strdup(sorted[i]->path); + if (!selected) + return -ENOMEM; + *ret_selected = selected; + return 1; } - return r; + return ret; } int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret) { diff --git a/src/oom/oomd-util.h b/src/oom/oomd-util.h index 560697a4f4..51423130d1 100644 --- a/src/oom/oomd-util.h +++ b/src/oom/oomd-util.h @@ -122,9 +122,10 @@ int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run); /* The following oomd_kill_by_* functions return 1 if processes were killed, or negative otherwise. */ /* If `prefix` is supplied, only cgroups whose paths start with `prefix` are eligible candidates. Otherwise, - * everything in `h` is a candidate. */ -int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run); -int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run); + * everything in `h` is a candidate. + * Returns the killed cgroup in ret_selected. */ +int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run, char **ret_selected); +int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run, char **ret_selected); int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret); int oomd_system_context_acquire(const char *proc_swaps_path, OomdSystemContext *ret); |