summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnita Zhang <the.anitazha@gmail.com>2021-03-26 02:37:01 -0700
committerZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>2021-05-07 12:08:28 +0200
commitaf5d0d737c70b4d7a7f7bd95a918494240d9e652 (patch)
treed355f5a0ccdca7bfb4054057243852679cab0364
parentf64e1846ee51a1969f0abadddca66ab2fc1235c8 (diff)
downloadsystemd-af5d0d737c70b4d7a7f7bd95a918494240d9e652.tar.gz
oomd: threshold swap kill candidates to usages of more than 5%
In some instances, particularly with swap on zram, swap used will be high while there is still a lot of memory available. FB OOMD handles this by thresholding kills to X% of total swap usage. Let's do the same thing here. Anecdotally with these thresholds and my laptop which is exclusively swap on zram I can sit at 0K / 4G swap free with most of memory free and systemd-oomd doesn't kill anything. Partially addresses aggressive kill behavior from https://bugzilla.redhat.com/show_bug.cgi?id=1941170 (cherry picked from commit 685b0985f0faeb349d1449f8e9d87a9c87e1e24f)
-rw-r--r--man/oomd.conf.xml6
-rw-r--r--src/oom/oomd-manager.c4
-rw-r--r--src/oom/oomd-manager.h3
-rw-r--r--src/oom/oomd-util.c12
-rw-r--r--src/oom/oomd-util.h2
5 files changed, 16 insertions, 11 deletions
diff --git a/man/oomd.conf.xml b/man/oomd.conf.xml
index 0ae7e109b1..27914e0699 100644
--- a/man/oomd.conf.xml
+++ b/man/oomd.conf.xml
@@ -52,9 +52,9 @@
<listitem><para>Sets the limit for swap usage on the system before <command>systemd-oomd</command>
will take action. If the fraction of swap used on the system is more than what is defined here,
- <command>systemd-oomd</command> will act on eligible descendant control groups, starting from the
- ones with the highest swap usage to the lowest swap usage. Which control groups are monitored and
- what action gets taken depends on what the unit has configured for
+ <command>systemd-oomd</command> will act on eligible descendant control groups with swap usage greater
+ than 5% of total swap, starting from the ones with the highest swap usage. Which
+ control groups are monitored and what action gets taken depends on what the unit has configured for
<varname>ManagedOOMSwap=</varname>. Takes a value specified in percent (when suffixed with "%"),
permille ("‰") or permyriad ("‱"), between 0% and 100%, inclusive. Defaults to 90%.</para></listitem>
</varlistentry>
diff --git a/src/oom/oomd-manager.c b/src/oom/oomd-manager.c
index 07a5d52d1e..49dc5eb26e 100644
--- a/src/oom/oomd-manager.c
+++ b/src/oom/oomd-manager.c
@@ -345,6 +345,7 @@ static int monitor_swap_contexts_handler(sd_event_source *s, uint64_t usec, void
if (oomd_swap_free_below(&m->system_context, 10000 - m->swap_used_limit_permyriad)) {
_cleanup_hashmap_free_ Hashmap *candidates = NULL;
_cleanup_free_ char *selected = NULL;
+ uint64_t threshold;
log_debug("Swap used (%"PRIu64") / total (%"PRIu64") is more than " PERMYRIAD_AS_PERCENT_FORMAT_STR,
m->system_context.swap_used, m->system_context.swap_total,
@@ -356,7 +357,8 @@ static int monitor_swap_contexts_handler(sd_event_source *s, uint64_t usec, void
if (r < 0)
log_debug_errno(r, "Failed to get monitored swap cgroup candidates, ignoring: %m");
- r = oomd_kill_by_swap_usage(candidates, m->dry_run, &selected);
+ threshold = m->system_context.swap_total * THRESHOLD_SWAP_USED_PERCENT / 100;
+ r = oomd_kill_by_swap_usage(candidates, threshold, m->dry_run, &selected);
if (r == -ENOMEM)
return log_oom();
if (r < 0)
diff --git a/src/oom/oomd-manager.h b/src/oom/oomd-manager.h
index e08ad74701..dc170f2bda 100644
--- a/src/oom/oomd-manager.h
+++ b/src/oom/oomd-manager.h
@@ -19,6 +19,9 @@
#define DEFAULT_MEM_PRESSURE_LIMIT_PERCENT 60
#define DEFAULT_SWAP_USED_LIMIT_PERCENT 90
+/* Only tackle candidates with large swap usage. */
+#define THRESHOLD_SWAP_USED_PERCENT 5
+
#define RECLAIM_DURATION_USEC (30 * USEC_PER_SEC)
#define POST_ACTION_DELAY_USEC (15 * USEC_PER_SEC)
diff --git a/src/oom/oomd-util.c b/src/oom/oomd-util.c
index a1813bfd81..5bf81479c9 100644
--- a/src/oom/oomd-util.c
+++ b/src/oom/oomd-util.c
@@ -233,7 +233,7 @@ int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run, char
return ret;
}
-int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run, char **ret_selected) {
+int oomd_kill_by_swap_usage(Hashmap *h, uint64_t threshold_usage, bool dry_run, char **ret_selected) {
_cleanup_free_ OomdCGroupContext **sorted = NULL;
int n, r, ret = 0;
@@ -244,12 +244,12 @@ int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run, char **ret_selected) {
if (n < 0)
return n;
- /* Try to kill cgroups with non-zero swap usage until we either succeed in
- * killing or we get to a cgroup with no swap usage. */
+ /* Try to kill cgroups with non-zero swap usage until we either succeed in killing or we get to a cgroup with
+ * no swap usage. Threshold killing only cgroups with more than threshold swap usage. */
for (int i = 0; i < n; i++) {
- /* Skip over cgroups with no resource usage.
- * Continue break since there might be "avoid" cgroups at the end. */
- if (sorted[i]->swap_usage == 0)
+ /* Skip over cgroups with not enough swap usage. Don't break since there might be "avoid"
+ * cgroups at the end. */
+ if (sorted[i]->swap_usage <= threshold_usage)
continue;
r = oomd_cgroup_kill(sorted[i]->path, true, dry_run);
diff --git a/src/oom/oomd-util.h b/src/oom/oomd-util.h
index a8fa090755..81fdc5e088 100644
--- a/src/oom/oomd-util.h
+++ b/src/oom/oomd-util.h
@@ -111,7 +111,7 @@ int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run);
* everything in `h` is a candidate.
* Returns the killed cgroup in ret_selected. */
int oomd_kill_by_pgscan_rate(Hashmap *h, const char *prefix, bool dry_run, char **ret_selected);
-int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run, char **ret_selected);
+int oomd_kill_by_swap_usage(Hashmap *h, uint64_t threshold_usage, bool dry_run, char **ret_selected);
int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret);
int oomd_system_context_acquire(const char *proc_swaps_path, OomdSystemContext *ret);