summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnita Zhang <the.anitazha@gmail.com>2021-03-26 03:01:38 -0700
committerZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>2021-03-30 14:44:09 +0200
commit37a7e15968b5dc0d2e3c2160586aeb6b4cd6c90b (patch)
treeeb85353ed5076b3edfde09b048180e75cc40fa10
parentb240c08d0970b0e90f204d54eec653f3c379fd60 (diff)
downloadsystemd-37a7e15968b5dc0d2e3c2160586aeb6b4cd6c90b.tar.gz
oomd: make it more clear when a kill happens
Improve the logging to only print if systemd-oomd killed something. And also print which cgroup was targeted. Demote general swap above/pressure above messages to debug. [zjs: fix some issuelets found in review]
-rw-r--r--src/oom/oomd-manager.c38
-rw-r--r--src/oom/oomd-util.c56
-rw-r--r--src/oom/oomd-util.h7
3 files changed, 77 insertions, 24 deletions
diff --git a/src/oom/oomd-manager.c b/src/oom/oomd-manager.c
index 345f8a77cf..c3e84aadde 100644
--- a/src/oom/oomd-manager.c
+++ b/src/oom/oomd-manager.c
@@ -378,10 +378,18 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo
OomdCGroupContext *t;
SET_FOREACH(t, targets) {
- log_notice("Memory pressure for %s is greater than %lu for more than %"PRIu64" seconds and there was reclaim activity",
- t->path, LOAD_INT(t->mem_pressure_limit), m->default_mem_pressure_duration_usec / USEC_PER_SEC);
-
- r = oomd_kill_by_pgscan_rate(m->monitored_mem_pressure_cgroup_contexts_candidates, t->path, m->dry_run);
+ _cleanup_free_ char *selected = NULL;
+ char ts[FORMAT_TIMESPAN_MAX];
+
+ log_debug("Memory pressure for %s is %lu.%02lu%% > %lu.%02lu%% for > %s with reclaim activity",
+ t->path,
+ LOAD_INT(t->memory_pressure.avg10), LOAD_FRAC(t->memory_pressure.avg10),
+ LOAD_INT(t->mem_pressure_limit), LOAD_FRAC(t->mem_pressure_limit),
+ format_timespan(ts, sizeof ts,
+ m->default_mem_pressure_duration_usec,
+ USEC_PER_SEC));
+
+ r = oomd_kill_by_pgscan_rate(m->monitored_mem_pressure_cgroup_contexts_candidates, t->path, m->dry_run, &selected);
if (r == -ENOMEM)
return log_oom();
if (r < 0)
@@ -389,6 +397,15 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo
else {
/* Don't act on all the high pressure cgroups at once; return as soon as we kill one */
m->post_action_delay_start = usec_now;
+ if (selected)
+ log_notice("Killed %s due to memory pressure for %s being %lu.%02lu%% > %lu.%02lu%%"
+ " for > %s with reclaim activity",
+ selected, t->path,
+ LOAD_INT(t->memory_pressure.avg10), LOAD_FRAC(t->memory_pressure.avg10),
+ LOAD_INT(t->mem_pressure_limit), LOAD_FRAC(t->mem_pressure_limit),
+ format_timespan(ts, sizeof ts,
+ m->default_mem_pressure_duration_usec,
+ USEC_PER_SEC));
return 0;
}
}
@@ -397,9 +414,11 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo
if (oomd_swap_free_below(&m->system_context, 10000 - m->swap_used_limit_permyriad)) {
_cleanup_hashmap_free_ Hashmap *candidates = NULL;
+ _cleanup_free_ char *selected = NULL;
- log_notice("Swap used (%"PRIu64") / total (%"PRIu64") is more than " PERMYRIAD_AS_PERCENT_FORMAT_STR,
- m->system_context.swap_used, m->system_context.swap_total, PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad));
+ log_debug("Swap used (%"PRIu64") / total (%"PRIu64") is more than " PERMYRIAD_AS_PERCENT_FORMAT_STR,
+ m->system_context.swap_used, m->system_context.swap_total,
+ PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad));
r = get_monitored_cgroup_contexts_candidates(m->monitored_swap_cgroup_contexts, &candidates);
if (r == -ENOMEM)
@@ -407,13 +426,18 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo
if (r < 0)
log_debug_errno(r, "Failed to get monitored swap cgroup candidates, ignoring: %m");
- r = oomd_kill_by_swap_usage(candidates, m->dry_run);
+ r = oomd_kill_by_swap_usage(candidates, m->dry_run, &selected);
if (r == -ENOMEM)
return log_oom();
if (r < 0)
log_notice_errno(r, "Failed to kill any cgroup(s) based on swap: %m");
else {
m->post_action_delay_start = usec_now;
+ if (selected)
+ log_notice("Killed %s due to swap used (%"PRIu64") / total (%"PRIu64") being more than "
+ PERMYRIAD_AS_PERCENT_FORMAT_STR,
+ selected, m->system_context.swap_used, m->system_context.swap_total,
+ PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad));
return 0;
}
}
diff --git a/src/oom/oomd-util.c b/src/oom/oomd-util.c
index 7860f2154d..da994848ae 100644
--- a/src/oom/oomd-util.c
+++ b/src/oom/oomd-util.c
@@ -208,35 +208,50 @@ int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run) {
return set_size(pids_killed) != 0;
}
-int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run) {
+int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run, char **ret_selected) {
_cleanup_free_ OomdCGroupContext **sorted = NULL;
- int r;
+ int r, ret = 0;
assert(h);
+ assert(ret_selected);
r = oomd_sort_cgroup_contexts(h, compare_pgscan_rate_and_memory_usage, prefix, &sorted);
if (r < 0)
return r;
for (int i = 0; i < r; i++) {
- /* Skip cgroups with no reclaim and memory usage; it won't alleviate pressure. */
- /* Don't break since there might be "avoid" cgroups at the end. */
+ /* Skip cgroups with no reclaim and memory usage; it won't alleviate pressure.
+ * Continue since there might be "avoid" cgroups at the end. */
if (sorted[i]->pgscan == 0 && sorted[i]->current_memory_usage == 0)
continue;
r = oomd_cgroup_kill(sorted[i]->path, true, dry_run);
- if (r > 0 || r == -ENOMEM)
- break;
+ if (r == 0)
+ continue; /* We didn't find anything to kill */
+ if (r == -ENOMEM)
+ return r; /* Treat oom as a hard error */
+ if (r < 0) {
+ if (ret == 0)
+ ret = r;
+ continue; /* Try to find something else to kill */
+ }
+
+ char *selected = strdup(sorted[i]->path);
+ if (!selected)
+ return -ENOMEM;
+ *ret_selected = selected;
+ return 1;
}
- return r;
+ return ret;
}
-int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run) {
+int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run, char **ret_selected) {
_cleanup_free_ OomdCGroupContext **sorted = NULL;
- int r;
+ int r, ret = 0;
assert(h);
+ assert(ret_selected);
r = oomd_sort_cgroup_contexts(h, compare_swap_usage, NULL, &sorted);
if (r < 0)
@@ -245,17 +260,30 @@ int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run) {
/* Try to kill cgroups with non-zero swap usage until we either succeed in
* killing or we get to a cgroup with no swap usage. */
for (int i = 0; i < r; i++) {
- /* Skip over cgroups with no resource usage. Don't break since there might be "avoid"
- * cgroups at the end. */
+ /* Skip over cgroups with no resource usage.
+ * Continue break since there might be "avoid" cgroups at the end. */
if (sorted[i]->swap_usage == 0)
continue;
r = oomd_cgroup_kill(sorted[i]->path, true, dry_run);
- if (r > 0 || r == -ENOMEM)
- break;
+ if (r == 0)
+ continue; /* We didn't find anything to kill */
+ if (r == -ENOMEM)
+ return r; /* Treat oom as a hard error */
+ if (r < 0) {
+ if (ret == 0)
+ ret = r;
+ continue; /* Try to find something else to kill */
+ }
+
+ char *selected = strdup(sorted[i]->path);
+ if (!selected)
+ return -ENOMEM;
+ *ret_selected = selected;
+ return 1;
}
- return r;
+ return ret;
}
int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret) {
diff --git a/src/oom/oomd-util.h b/src/oom/oomd-util.h
index 560697a4f4..51423130d1 100644
--- a/src/oom/oomd-util.h
+++ b/src/oom/oomd-util.h
@@ -122,9 +122,10 @@ int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run);
/* The following oomd_kill_by_* functions return 1 if processes were killed, or negative otherwise. */
/* If `prefix` is supplied, only cgroups whose paths start with `prefix` are eligible candidates. Otherwise,
- * everything in `h` is a candidate. */
-int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run);
-int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run);
+ * everything in `h` is a candidate.
+ * Returns the killed cgroup in ret_selected. */
+int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run, char **ret_selected);
+int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run, char **ret_selected);
int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret);
int oomd_system_context_acquire(const char *proc_swaps_path, OomdSystemContext *ret);