summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>2019-11-05 13:50:28 +0100
committerZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>2019-11-05 20:53:47 +0100
commitf7d2991539635b25ab1005887e1502dd8b98f18d (patch)
tree4014a8d334efa21fb6068887e8d8aec1eb1587f6
parentc631c3d6a340c13adf05dc9b06bd7bd566bd6c9e (diff)
downloadsystemd-f7d2991539635b25ab1005887e1502dd8b98f18d.tar.gz
core: make TasksMax a partially dynamic property
TasksMax= and DefaultTasksMax= can be specified as percentages. We don't actually document of what the percentage is relative to, but the implementation uses the smallest of /proc/sys/kernel/pid_max, /proc/sys/kernel/threads-max, and /sys/fs/cgroup/pids.max (when present). When the value is a percentage, we immediately convert it to an absolute value. If the limit later changes (which can happen e.g. when systemd-sysctl runs), the absolute value becomes outdated. So let's store either the percentage or absolute value, whatever was specified, and only convert to an absolute value when the value is used. For example, when starting a unit, the absolute value will be calculated when the cgroup for the unit is created. Fixes #13419.
-rw-r--r--src/basic/cgroup-util.h1
-rw-r--r--src/basic/limits-util.c1
-rw-r--r--src/core/cgroup.c24
-rw-r--r--src/core/cgroup.h15
-rw-r--r--src/core/dbus-cgroup.c73
-rw-r--r--src/core/dbus-cgroup.h2
-rw-r--r--src/core/dbus-manager.c3
-rw-r--r--src/core/load-fragment.c24
-rw-r--r--src/core/main.c6
-rw-r--r--src/core/manager.c2
-rw-r--r--src/core/manager.h3
-rw-r--r--src/test/test-cgroup-mask.c2
12 files changed, 126 insertions, 30 deletions
diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h
index 0a64715af5..6b4d6e1006 100644
--- a/src/basic/cgroup-util.h
+++ b/src/basic/cgroup-util.h
@@ -130,7 +130,6 @@ static inline bool CGROUP_BLKIO_WEIGHT_IS_OK(uint64_t x) {
}
/* Default resource limits */
-#define DEFAULT_TASKS_MAX_PERCENTAGE 15U /* 15% of PIDs, 4915 on default settings */
#define DEFAULT_USER_TASKS_MAX_PERCENTAGE 33U /* 33% of PIDs, 10813 on default settings */
typedef enum CGroupUnified {
diff --git a/src/basic/limits-util.c b/src/basic/limits-util.c
index fbf52e55f1..a74d8197ea 100644
--- a/src/basic/limits-util.c
+++ b/src/basic/limits-util.c
@@ -99,7 +99,6 @@ uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
}
uint64_t system_tasks_max(void) {
-
uint64_t a = TASKS_MAX, b = TASKS_MAX;
_cleanup_free_ char *root = NULL;
int r;
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index ec8a262314..9aa3fadecc 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -17,6 +17,7 @@
#include "fd-util.h"
#include "fileio.h"
#include "fs-util.h"
+#include "limits-util.h"
#include "nulstr-util.h"
#include "parse-util.h"
#include "path-util.h"
@@ -36,6 +37,13 @@
* out specific attributes from us. */
#define LOG_LEVEL_CGROUP_WRITE(r) (IN_SET(abs(r), ENOENT, EROFS, EACCES, EPERM) ? LOG_DEBUG : LOG_WARNING)
+uint64_t tasks_max_scale(const TasksMax *tasks_max) {
+ if (tasks_max->scale == 0)
+ return tasks_max->value;
+
+ return system_tasks_max_scale(tasks_max->value, tasks_max->scale);
+}
+
bool manager_owns_host_root_cgroup(Manager *m) {
assert(m);
@@ -119,7 +127,7 @@ void cgroup_context_init(CGroupContext *c) {
.blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID,
.startup_blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID,
- .tasks_max = CGROUP_LIMIT_MAX,
+ .tasks_max = (TasksMax) { CGROUP_LIMIT_MAX },
};
}
@@ -449,7 +457,7 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) {
prefix, c->memory_max, format_cgroup_memory_limit_comparison(cdd, sizeof(cdd), u, "MemoryMax"),
prefix, c->memory_swap_max, format_cgroup_memory_limit_comparison(cde, sizeof(cde), u, "MemorySwapMax"),
prefix, c->memory_limit,
- prefix, c->tasks_max,
+ prefix, tasks_max_scale(&c->tasks_max),
prefix, cgroup_device_policy_to_string(c->device_policy),
prefix, strempty(disable_controllers_str),
prefix, yes_no(c->delegate));
@@ -1518,9 +1526,9 @@ static void cgroup_context_apply(
* which is desirable so that there's an official way to release control of the sysctl from
* systemd: set the limit to unbounded and reload. */
- if (c->tasks_max != CGROUP_LIMIT_MAX) {
+ if (tasks_max_isset(&c->tasks_max)) {
u->manager->sysctl_pid_max_changed = true;
- r = procfs_tasks_set_limit(c->tasks_max);
+ r = procfs_tasks_set_limit(tasks_max_scale(&c->tasks_max));
} else if (u->manager->sysctl_pid_max_changed)
r = procfs_tasks_set_limit(TASKS_MAX);
else
@@ -1533,10 +1541,10 @@ static void cgroup_context_apply(
/* The attribute itself is not available on the host root cgroup, and in the container case we want to
* leave it for the container manager. */
if (!is_local_root) {
- if (c->tasks_max != CGROUP_LIMIT_MAX) {
- char buf[DECIMAL_STR_MAX(uint64_t) + 2];
+ if (tasks_max_isset(&c->tasks_max)) {
+ char buf[DECIMAL_STR_MAX(uint64_t) + 1];
- sprintf(buf, "%" PRIu64 "\n", c->tasks_max);
+ xsprintf(buf, "%" PRIu64 "\n", tasks_max_scale(&c->tasks_max));
(void) set_attribute_and_warn(u, "pids", "pids.max", buf);
} else
(void) set_attribute_and_warn(u, "pids", "pids.max", "max\n");
@@ -1613,7 +1621,7 @@ static CGroupMask unit_get_cgroup_mask(Unit *u) {
mask |= CGROUP_MASK_DEVICES | CGROUP_MASK_BPF_DEVICES;
if (c->tasks_accounting ||
- c->tasks_max != CGROUP_LIMIT_MAX)
+ tasks_max_isset(&c->tasks_max))
mask |= CGROUP_MASK_PIDS;
return CGROUP_MASK_EXTEND_JOINED(mask);
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
index a66c702125..8f7d19ad4f 100644
--- a/src/core/cgroup.h
+++ b/src/core/cgroup.h
@@ -9,6 +9,19 @@
#include "list.h"
#include "time-util.h"
+typedef struct TasksMax {
+ /* If scale == 0, just use value; otherwise, value / scale.
+ * See tasks_max_scale(). */
+ uint64_t value;
+ uint64_t scale;
+} TasksMax;
+
+static inline bool tasks_max_isset(const TasksMax *tasks_max) {
+ return tasks_max->value != CGROUP_LIMIT_MAX || tasks_max->scale != 0;
+}
+
+uint64_t tasks_max_scale(const TasksMax *tasks_max);
+
typedef struct CGroupContext CGroupContext;
typedef struct CGroupDeviceAllow CGroupDeviceAllow;
typedef struct CGroupIODeviceWeight CGroupIODeviceWeight;
@@ -136,7 +149,7 @@ struct CGroupContext {
LIST_HEAD(CGroupDeviceAllow, device_allow);
/* Common */
- uint64_t tasks_max;
+ TasksMax tasks_max;
};
/* Used when querying IP accounting data */
diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c
index 647c3a512c..01f9039660 100644
--- a/src/core/dbus-cgroup.c
+++ b/src/core/dbus-cgroup.c
@@ -16,6 +16,8 @@
#include "limits-util.h"
#include "path-util.h"
+BUS_DEFINE_PROPERTY_GET(bus_property_get_tasks_max, "t", TasksMax, tasks_max_scale);
+
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_cgroup_device_policy, cgroup_device_policy, CGroupDevicePolicy);
static int property_get_cgroup_mask(
@@ -382,7 +384,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_PROPERTY("DevicePolicy", "s", property_get_cgroup_device_policy, offsetof(CGroupContext, device_policy), 0),
SD_BUS_PROPERTY("DeviceAllow", "a(ss)", property_get_device_allow, 0, 0),
SD_BUS_PROPERTY("TasksAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, tasks_accounting), 0),
- SD_BUS_PROPERTY("TasksMax", "t", NULL, offsetof(CGroupContext, tasks_max), 0),
+ SD_BUS_PROPERTY("TasksMax", "t", bus_property_get_tasks_max, offsetof(CGroupContext, tasks_max), 0),
SD_BUS_PROPERTY("IPAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, ip_accounting), 0),
SD_BUS_PROPERTY("IPAddressAllow", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_allow), 0),
SD_BUS_PROPERTY("IPAddressDeny", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_deny), 0),
@@ -715,9 +717,76 @@ BUS_DEFINE_SET_CGROUP_WEIGHT(blockio_weight, CGROUP_MASK_BLKIO, CGROUP_BLKIO_WEI
BUS_DEFINE_SET_CGROUP_LIMIT(memory, CGROUP_MASK_MEMORY, physical_memory_scale, 1);
BUS_DEFINE_SET_CGROUP_LIMIT(memory_protection, CGROUP_MASK_MEMORY, physical_memory_scale, 0);
BUS_DEFINE_SET_CGROUP_LIMIT(swap, CGROUP_MASK_MEMORY, physical_memory_scale, 0);
-BUS_DEFINE_SET_CGROUP_LIMIT(tasks_max, CGROUP_MASK_PIDS, system_tasks_max_scale, 1);
#pragma GCC diagnostic pop
+static int bus_cgroup_set_tasks_max(
+ Unit *u,
+ const char *name,
+ TasksMax *p,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ uint64_t v;
+ int r;
+
+ assert(p);
+
+ r = sd_bus_message_read(message, "t", &v);
+ if (r < 0)
+ return r;
+
+ if (v < 1)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "Value specified in %s is out of range", name);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ *p = (TasksMax) { v };
+ unit_invalidate_cgroup(u, CGROUP_MASK_PIDS);
+
+ if (v == CGROUP_LIMIT_MAX)
+ unit_write_settingf(u, flags, name,
+ "%s=infinity", name);
+ else
+ unit_write_settingf(u, flags, name,
+ "%s=%" PRIu64, name, v);
+ }
+
+ return 1;
+}
+
+static int bus_cgroup_set_tasks_max_scale(
+ Unit *u,
+ const char *name,
+ TasksMax *p,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ uint32_t v;
+ int r;
+
+ assert(p);
+
+ r = sd_bus_message_read(message, "u", &v);
+ if (r < 0)
+ return r;
+
+ if (v < 1 || v >= UINT32_MAX)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "Value specified in %s is out of range", name);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ *p = (TasksMax) { v, UINT32_MAX };
+ unit_invalidate_cgroup(u, CGROUP_MASK_PIDS);
+
+ unit_write_settingf(u, flags, name, "%s=%" PRIu32 "%%", "TasksMax",
+ (uint32_t) (DIV_ROUND_UP((uint64_t) v * 100U, (uint64_t) UINT32_MAX)));
+ }
+
+ return 1;
+}
+
int bus_cgroup_set_property(
Unit *u,
CGroupContext *c,
diff --git a/src/core/dbus-cgroup.h b/src/core/dbus-cgroup.h
index 188baa99e9..5ca68a63bf 100644
--- a/src/core/dbus-cgroup.h
+++ b/src/core/dbus-cgroup.h
@@ -9,4 +9,6 @@
extern const sd_bus_vtable bus_cgroup_vtable[];
+int bus_property_get_tasks_max(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *ret_error);
+
int bus_cgroup_set_property(Unit *u, CGroupContext *c, const char *name, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c
index 76d8a03702..64a612a7f0 100644
--- a/src/core/dbus-manager.c
+++ b/src/core/dbus-manager.c
@@ -9,6 +9,7 @@
#include "architecture.h"
#include "build.h"
#include "bus-common-errors.h"
+#include "dbus-cgroup.h"
#include "dbus-execute.h"
#include "dbus-job.h"
#include "dbus-manager.h"
@@ -2465,7 +2466,7 @@ const sd_bus_vtable bus_manager_vtable[] = {
SD_BUS_PROPERTY("DefaultLimitRTPRIOSoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_RTPRIO]), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("DefaultLimitRTTIME", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_RTTIME]), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("DefaultLimitRTTIMESoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_RTTIME]), SD_BUS_VTABLE_PROPERTY_CONST),
- SD_BUS_PROPERTY("DefaultTasksMax", "t", NULL, offsetof(Manager, default_tasks_max), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultTasksMax", "t", bus_property_get_tasks_max, offsetof(Manager, default_tasks_max), 0),
SD_BUS_PROPERTY("TimerSlackNSec", "t", property_get_timer_slack_nsec, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("DefaultOOMPolicy", "s", bus_property_get_oom_policy, offsetof(Manager, default_oom_policy), SD_BUS_VTABLE_PROPERTY_CONST),
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 288de616b6..8e33c83398 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -3263,36 +3263,38 @@ int config_parse_tasks_max(
void *data,
void *userdata) {
- uint64_t *tasks_max = data, v;
- const Unit *u = userdata;
+ TasksMax *tasks_max = data;
+ uint64_t v;
int r;
if (isempty(rvalue)) {
- *tasks_max = u ? u->manager->default_tasks_max : UINT64_MAX;
+ *tasks_max = (TasksMax) {};
return 0;
}
if (streq(rvalue, "infinity")) {
- *tasks_max = CGROUP_LIMIT_MAX;
+ *tasks_max = (TasksMax) { CGROUP_LIMIT_MAX };
return 0;
}
r = parse_permille(rvalue);
- if (r < 0) {
+ if (r >= 0)
+ *tasks_max = (TasksMax) { v, 1000U };
+ else {
r = safe_atou64(rvalue, &v);
if (r < 0) {
log_syntax(unit, LOG_ERR, filename, line, r, "Invalid maximum tasks value '%s', ignoring: %m", rvalue);
return 0;
}
- } else
- v = system_tasks_max_scale(r, 1000U);
- if (v <= 0 || v >= UINT64_MAX) {
- log_syntax(unit, LOG_ERR, filename, line, 0, "Maximum tasks value '%s' out of range, ignoring.", rvalue);
- return 0;
+ if (v <= 0 || v >= UINT64_MAX) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Maximum tasks value '%s' out of range, ignoring.", rvalue);
+ return 0;
+ }
+
+ *tasks_max = (TasksMax) { v };
}
- *tasks_max = v;
return 0;
}
diff --git a/src/core/main.c b/src/core/main.c
index bfbb9331f3..6e101f0278 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -84,6 +84,8 @@
#include <sanitizer/lsan_interface.h>
#endif
+#define DEFAULT_TASKS_MAX ((TasksMax) { 15U, 100U }) /* 15% */
+
static enum {
ACTION_RUN,
ACTION_HELP,
@@ -135,7 +137,7 @@ static bool arg_default_ip_accounting;
static bool arg_default_blockio_accounting;
static bool arg_default_memory_accounting;
static bool arg_default_tasks_accounting;
-static uint64_t arg_default_tasks_max;
+static TasksMax arg_default_tasks_max;
static sd_id128_t arg_machine_id;
static EmergencyAction arg_cad_burst_action;
static OOMPolicy arg_default_oom_policy;
@@ -2131,7 +2133,7 @@ static void reset_arguments(void) {
arg_default_blockio_accounting = false;
arg_default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT;
arg_default_tasks_accounting = true;
- arg_default_tasks_max = system_tasks_max_scale(DEFAULT_TASKS_MAX_PERCENTAGE, 100U);
+ arg_default_tasks_max = DEFAULT_TASKS_MAX;
arg_machine_id = (sd_id128_t) {};
arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE;
arg_default_oom_policy = OOM_STOP;
diff --git a/src/core/manager.c b/src/core/manager.c
index 700493ee0d..71078abfa5 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -762,7 +762,7 @@ int manager_new(UnitFileScope scope, ManagerTestRunFlags test_run_flags, Manager
.default_timer_accuracy_usec = USEC_PER_MINUTE,
.default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT,
.default_tasks_accounting = true,
- .default_tasks_max = UINT64_MAX,
+ .default_tasks_max = (TasksMax) { UINT64_MAX },
.default_timeout_start_usec = DEFAULT_TIMEOUT_USEC,
.default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC,
.default_restart_usec = DEFAULT_RESTART_USEC,
diff --git a/src/core/manager.h b/src/core/manager.h
index 424bf71704..51df7f8cd9 100644
--- a/src/core/manager.h
+++ b/src/core/manager.h
@@ -9,6 +9,7 @@
#include "sd-event.h"
#include "cgroup-util.h"
+#include "cgroup.h"
#include "fdset.h"
#include "hashmap.h"
#include "ip-address-access.h"
@@ -349,7 +350,7 @@ struct Manager {
bool default_tasks_accounting;
bool default_ip_accounting;
- uint64_t default_tasks_max;
+ TasksMax default_tasks_max;
usec_t default_timer_accuracy_usec;
OOMPolicy default_oom_policy;
diff --git a/src/test/test-cgroup-mask.c b/src/test/test-cgroup-mask.c
index ef3755de41..c26ab6e27e 100644
--- a/src/test/test-cgroup-mask.c
+++ b/src/test/test-cgroup-mask.c
@@ -57,7 +57,7 @@ static int test_cgroup_mask(void) {
m->default_blockio_accounting =
m->default_io_accounting =
m->default_tasks_accounting = false;
- m->default_tasks_max = (uint64_t) -1;
+ m->default_tasks_max = (TasksMax) { UINT64_MAX };
assert_se(manager_startup(m, NULL, NULL) >= 0);