diff options
author | Filipe Brandenburger <filbranden@google.com> | 2018-11-02 09:21:57 -0700 |
---|---|---|
committer | Filipe Brandenburger <filbranden@google.com> | 2019-02-14 11:04:42 -0800 |
commit | 10f28641115733c61754342d5dcbe70b083bea4b (patch) | |
tree | f0f8ab9e60ffa381fed0ad638e785ac601d29164 | |
parent | 7b61ce3c44ef5908e817009ce4f9d2a7a37722be (diff) | |
download | systemd-10f28641115733c61754342d5dcbe70b083bea4b.tar.gz |
core: add CPUQuotaPeriodSec=
This new setting allows configuration of CFS period on the CPU cgroup, instead
of using a hardcoded default of 100ms.
Tested:
- Legacy cgroup + Unified cgroup
- systemctl set-property
- systemctl show
- Confirmed that the cgroup settings (such as cpu.cfs_period_ns) were set
appropriately, including updating the CPU quota (cpu.cfs_quota_ns) when
CPUQuotaPeriodSec= is updated.
- Checked that clamping works properly when either period or (quota * period)
are below the resolution of 1ms, or if period is above the max of 1s.
-rw-r--r-- | docs/TRANSIENT-SETTINGS.md | 1 | ||||
-rw-r--r-- | man/systemd.resource-control.xml | 19 | ||||
-rw-r--r-- | src/core/cgroup.c | 58 | ||||
-rw-r--r-- | src/core/cgroup.h | 3 | ||||
-rw-r--r-- | src/core/dbus-cgroup.c | 23 | ||||
-rw-r--r-- | src/core/load-fragment-gperf.gperf.m4 | 1 | ||||
-rw-r--r-- | src/core/load-fragment.c | 1 | ||||
-rw-r--r-- | src/shared/bus-unit-util.c | 14 | ||||
-rw-r--r-- | src/test/meson.build | 5 | ||||
-rw-r--r-- | src/test/test-cgroup-cpu.c | 38 |
10 files changed, 154 insertions, 9 deletions
diff --git a/docs/TRANSIENT-SETTINGS.md b/docs/TRANSIENT-SETTINGS.md index 0ac77f0497..ac89fb174f 100644 --- a/docs/TRANSIENT-SETTINGS.md +++ b/docs/TRANSIENT-SETTINGS.md @@ -224,6 +224,7 @@ All cgroup/resource control settings are available for transient units ✓ CPUShares= ✓ StartupCPUShares= ✓ CPUQuota= +✓ CPUQuotaPeriodSec= ✓ MemoryAccounting= ✓ MemoryMin= ✓ MemoryLow= diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml index a4d793c32e..744bfa938f 100644 --- a/man/systemd.resource-control.xml +++ b/man/systemd.resource-control.xml @@ -199,6 +199,25 @@ </varlistentry> <varlistentry> + <term><varname>CPUQuotaPeriodSec=</varname></term> + + <listitem> + <para>Assign the duration over which the CPU time quota specified by <varname>CPUQuota=</varname> is measured. + Takes a time duration value in seconds, with an optional suffix such as "ms" for milliseconds (or "s" for seconds.) + The default setting is 100ms. The period is clamped to the range supported by the kernel, which is [1ms, 1000ms]. + Additionally, the period is adjusted up so that the quota interval is also at least 1ms. + Setting <varname>CPUQuotaPeriodSec=</varname> to an empty value resets it to the default.</para> + + <para>This controls the second field of <literal>cpu.max</literal> attribute on the unified control group hierarchy + and <literal>cpu.cfs_period_us</literal> on legacy. For details about these control group attributes, see + <ulink url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink> and + <ulink url="https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt">sched-design-CFS.txt</ulink>.</para> + + <para>Example: <varname>CPUQuotaPeriodSec=10ms</varname> to request that the CPU quota is measured in periods of 10ms.</para> + </listitem> + </varlistentry> + + <varlistentry> <term><varname>MemoryAccounting=</varname></term> <listitem> diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 18d470b6d6..9fe3ba9907 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -25,7 +25,7 @@ #include "string-util.h" #include "virt.h" -#define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC) +#define CGROUP_CPU_QUOTA_DEFAULT_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC) /* Returns the log level to use when cgroup attribute writes fail. When an attribute is missing or we have access * problems we downgrade to LOG_DEBUG. This is supposed to be nice to container managers and kernels which want to mask @@ -98,6 +98,7 @@ void cgroup_context_init(CGroupContext *c) { .cpu_weight = CGROUP_WEIGHT_INVALID, .startup_cpu_weight = CGROUP_WEIGHT_INVALID, .cpu_quota_per_sec_usec = USEC_INFINITY, + .cpu_quota_period_usec = USEC_INFINITY, .cpu_shares = CGROUP_CPU_SHARES_INVALID, .startup_cpu_shares = CGROUP_CPU_SHARES_INVALID, @@ -206,6 +207,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { CGroupDeviceAllow *a; IPAddressAccessItem *iaai; char u[FORMAT_TIMESPAN_MAX]; + char v[FORMAT_TIMESPAN_MAX]; assert(c); assert(f); @@ -224,6 +226,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { "%sCPUShares=%" PRIu64 "\n" "%sStartupCPUShares=%" PRIu64 "\n" "%sCPUQuotaPerSecSec=%s\n" + "%sCPUQuotaPeriodSec=%s\n" "%sIOWeight=%" PRIu64 "\n" "%sStartupIOWeight=%" PRIu64 "\n" "%sBlockIOWeight=%" PRIu64 "\n" @@ -248,6 +251,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { prefix, c->cpu_shares, prefix, c->startup_cpu_shares, prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1), + prefix, format_timespan(v, sizeof(v), c->cpu_quota_period_usec, 1), prefix, c->io_weight, prefix, c->startup_io_weight, prefix, c->blockio_weight, @@ -656,6 +660,39 @@ static uint64_t cgroup_context_cpu_shares(CGroupContext *c, ManagerState state) return CGROUP_CPU_SHARES_DEFAULT; } +usec_t cgroup_cpu_adjust_period(usec_t period, usec_t quota, usec_t resolution, usec_t max_period) { + /* kernel uses a minimum resolution of 1ms, so both period and (quota * period) + * need to be higher than that boundary. quota is specified in USecPerSec. + * Additionally, period must be at most max_period. */ + assert(quota > 0); + + return MIN(MAX3(period, resolution, resolution * USEC_PER_SEC / quota), max_period); +} + +static usec_t cgroup_cpu_adjust_period_and_log(Unit *u, usec_t period, usec_t quota) { + usec_t new_period; + + if (quota == USEC_INFINITY) + /* Always use default period for infinity quota. */ + return CGROUP_CPU_QUOTA_DEFAULT_PERIOD_USEC; + + if (period == USEC_INFINITY) + /* Default period was requested. */ + period = CGROUP_CPU_QUOTA_DEFAULT_PERIOD_USEC; + + /* Clamp to interval [1ms, 1s] */ + new_period = cgroup_cpu_adjust_period(period, quota, USEC_PER_MSEC, USEC_PER_SEC); + + if (new_period != period) { + char v[FORMAT_TIMESPAN_MAX]; + log_unit_full(u, LOG_WARNING, 0, + "Clamping CPU interval for cpu.max: period is now %s", + format_timespan(v, sizeof(v), new_period, 1)); + } + + return new_period; +} + static void cgroup_apply_unified_cpu_weight(Unit *u, uint64_t weight) { char buf[DECIMAL_STR_MAX(uint64_t) + 2]; @@ -663,14 +700,15 @@ static void cgroup_apply_unified_cpu_weight(Unit *u, uint64_t weight) { (void) set_attribute_and_warn(u, "cpu", "cpu.weight", buf); } -static void cgroup_apply_unified_cpu_quota(Unit *u, usec_t quota) { +static void cgroup_apply_unified_cpu_quota(Unit *u, usec_t quota, usec_t period) { char buf[(DECIMAL_STR_MAX(usec_t) + 1) * 2 + 1]; + period = cgroup_cpu_adjust_period_and_log(u, period, quota); if (quota != USEC_INFINITY) xsprintf(buf, USEC_FMT " " USEC_FMT "\n", - quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC, CGROUP_CPU_QUOTA_PERIOD_USEC); + MAX(quota * period / USEC_PER_SEC, USEC_PER_MSEC), period); else - xsprintf(buf, "max " USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC); + xsprintf(buf, "max " USEC_FMT "\n", period); (void) set_attribute_and_warn(u, "cpu", "cpu.max", buf); } @@ -681,14 +719,16 @@ static void cgroup_apply_legacy_cpu_shares(Unit *u, uint64_t shares) { (void) set_attribute_and_warn(u, "cpu", "cpu.shares", buf); } -static void cgroup_apply_legacy_cpu_quota(Unit *u, usec_t quota) { +static void cgroup_apply_legacy_cpu_quota(Unit *u, usec_t quota, usec_t period) { char buf[DECIMAL_STR_MAX(usec_t) + 2]; - xsprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC); + period = cgroup_cpu_adjust_period_and_log(u, period, quota); + + xsprintf(buf, USEC_FMT "\n", period); (void) set_attribute_and_warn(u, "cpu", "cpu.cfs_period_us", buf); if (quota != USEC_INFINITY) { - xsprintf(buf, USEC_FMT "\n", quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC); + xsprintf(buf, USEC_FMT "\n", MAX(quota * period / USEC_PER_SEC, USEC_PER_MSEC)); (void) set_attribute_and_warn(u, "cpu", "cpu.cfs_quota_us", buf); } else (void) set_attribute_and_warn(u, "cpu", "cpu.cfs_quota_us", "-1\n"); @@ -911,7 +951,7 @@ static void cgroup_context_apply( weight = CGROUP_WEIGHT_DEFAULT; cgroup_apply_unified_cpu_weight(u, weight); - cgroup_apply_unified_cpu_quota(u, c->cpu_quota_per_sec_usec); + cgroup_apply_unified_cpu_quota(u, c->cpu_quota_per_sec_usec, c->cpu_quota_period_usec); } else { uint64_t shares; @@ -930,7 +970,7 @@ static void cgroup_context_apply( shares = CGROUP_CPU_SHARES_DEFAULT; cgroup_apply_legacy_cpu_shares(u, shares); - cgroup_apply_legacy_cpu_quota(u, c->cpu_quota_per_sec_usec); + cgroup_apply_legacy_cpu_quota(u, c->cpu_quota_per_sec_usec, c->cpu_quota_period_usec); } } diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 266daa20a5..42da777c29 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -83,6 +83,7 @@ struct CGroupContext { uint64_t cpu_weight; uint64_t startup_cpu_weight; usec_t cpu_quota_per_sec_usec; + usec_t cpu_quota_period_usec; uint64_t io_weight; uint64_t startup_io_weight; @@ -135,6 +136,8 @@ typedef enum CGroupIPAccountingMetric { typedef struct Unit Unit; typedef struct Manager Manager; +usec_t cgroup_cpu_adjust_period(usec_t period, usec_t quota, usec_t resolution, usec_t max_period); + void cgroup_context_init(CGroupContext *c); void cgroup_context_done(CGroupContext *c); void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix); diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c index 53890bcafb..d1c34fed1b 100644 --- a/src/core/dbus-cgroup.c +++ b/src/core/dbus-cgroup.c @@ -330,6 +330,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = { SD_BUS_PROPERTY("CPUShares", "t", NULL, offsetof(CGroupContext, cpu_shares), 0), SD_BUS_PROPERTY("StartupCPUShares", "t", NULL, offsetof(CGroupContext, startup_cpu_shares), 0), SD_BUS_PROPERTY("CPUQuotaPerSecUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_per_sec_usec), 0), + SD_BUS_PROPERTY("CPUQuotaPeriodUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_period_usec), 0), SD_BUS_PROPERTY("IOAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, io_accounting), 0), SD_BUS_PROPERTY("IOWeight", "t", NULL, offsetof(CGroupContext, io_weight), 0), SD_BUS_PROPERTY("StartupIOWeight", "t", NULL, offsetof(CGroupContext, startup_io_weight), 0), @@ -727,6 +728,28 @@ int bus_cgroup_set_property( return 1; + } else if (streq(name, "CPUQuotaPeriodUSec")) { + uint64_t u64; + + r = sd_bus_message_read(message, "t", &u64); + if (r < 0) + return r; + + if (!UNIT_WRITE_FLAGS_NOOP(flags)) { + c->cpu_quota_period_usec = u64; + unit_invalidate_cgroup(u, CGROUP_MASK_CPU); + if (c->cpu_quota_period_usec == USEC_INFINITY) + unit_write_setting(u, flags, "CPUQuotaPeriodSec", "CPUQuotaPeriodSec="); + else { + char v[FORMAT_TIMESPAN_MAX]; + unit_write_settingf(u, flags, "CPUQuotaPeriodSec", + "CPUQuotaPeriodSec=%s", + format_timespan(v, sizeof(v), c->cpu_quota_period_usec, 1)); + } + } + + return 1; + } else if ((iol_type = cgroup_io_limit_type_from_string(name)) >= 0) { const char *path; unsigned n = 0; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index cdbc67f885..ca3923bbb0 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -165,6 +165,7 @@ $1.StartupCPUWeight, config_parse_cg_weight, 0, $1.CPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.cpu_shares) $1.StartupCPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.startup_cpu_shares) $1.CPUQuota, config_parse_cpu_quota, 0, offsetof($1, cgroup_context) +$1.CPUQuotaPeriodSec, config_parse_sec_def_infinity, 0, offsetof($1, cgroup_context.cpu_quota_period_usec) $1.MemoryAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.memory_accounting) $1.MemoryMin, config_parse_memory_limit, 0, offsetof($1, cgroup_context) $1.MemoryLow, config_parse_memory_limit, 0, offsetof($1, cgroup_context) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 44f00a3046..1d4a1bd343 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -54,6 +54,7 @@ #include "unit-name.h" #include "unit-printf.h" #include "user-util.h" +#include "time-util.h" #include "web-util.h" static int parse_socket_protocol(const char *s) { diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index 9a8051d063..a74b213155 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -464,6 +464,20 @@ static int bus_append_cgroup_property(sd_bus_message *m, const char *field, cons return 1; } + if (streq(field, "CPUQuotaPeriodSec")) { + usec_t u = USEC_INFINITY; + + r = parse_sec_def_infinity(eq, &u); + if (r < 0) + return log_error_errno(r, "CPU quota period '%s' invalid.", eq); + + r = sd_bus_message_append(m, "(sv)", "CPUQuotaPeriodUSec", "t", u); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + if (streq(field, "DeviceAllow")) { if (isempty(eq)) diff --git a/src/test/meson.build b/src/test/meson.build index 08026ea8c4..7537bdae34 100644 --- a/src/test/meson.build +++ b/src/test/meson.build @@ -560,6 +560,11 @@ tests += [ [], '', 'manual'], + [['src/test/test-cgroup-cpu.c'], + [libcore, + libshared], + []], + [['src/test/test-cgroup-mask.c', 'src/test/test-helper.c'], [libcore, diff --git a/src/test/test-cgroup-cpu.c b/src/test/test-cgroup-cpu.c new file mode 100644 index 0000000000..a445acc955 --- /dev/null +++ b/src/test/test-cgroup-cpu.c @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include "cgroup.h" +#include "log.h" + +static void test_cgroup_cpu_adjust_period(void) { + log_info("/* %s */", __func__); + + /* Period 1ms, quota 40% -> Period 2.5ms */ + assert_se(2500 == cgroup_cpu_adjust_period(USEC_PER_MSEC, 400 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC)); + /* Period 10ms, quota 10% -> keep. */ + assert_se(10 * USEC_PER_MSEC == cgroup_cpu_adjust_period(10 * USEC_PER_MSEC, 100 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC)); + /* Period 1ms, quota 1000% -> keep. */ + assert_se(USEC_PER_MSEC == cgroup_cpu_adjust_period(USEC_PER_MSEC, 10000 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC)); + /* Period 100ms, quota 30% -> keep. */ + assert_se(100 * USEC_PER_MSEC == cgroup_cpu_adjust_period(100 * USEC_PER_MSEC, 300 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC)); + /* Period 5s, quota 40% -> adjust to 1s. */ + assert_se(USEC_PER_SEC == cgroup_cpu_adjust_period(5 * USEC_PER_SEC, 400 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC)); + /* Period 2s, quota 250% -> adjust to 1s. */ + assert_se(USEC_PER_SEC == cgroup_cpu_adjust_period(2 * USEC_PER_SEC, 2500 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC)); + /* Period 10us, quota 5,000,000% -> adjust to 1ms. */ + assert_se(USEC_PER_MSEC == cgroup_cpu_adjust_period(10, 50000000 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC)); + /* Period 10ms, quota 50,000% -> keep. */ + assert_se(10 * USEC_PER_MSEC == cgroup_cpu_adjust_period(10 * USEC_PER_MSEC, 500000 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC)); + /* Period 10ms, quota 1% -> adjust to 100ms. */ + assert_se(100 * USEC_PER_MSEC == cgroup_cpu_adjust_period(10 * USEC_PER_MSEC, 10 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC)); + /* Period 10ms, quota .001% -> adjust to 1s. */ + assert_se(1 * USEC_PER_SEC == cgroup_cpu_adjust_period(10 * USEC_PER_MSEC, 10, USEC_PER_MSEC, USEC_PER_SEC)); + /* Period 0ms, quota 200% -> adjust to 1ms. */ + assert_se(1 * USEC_PER_MSEC == cgroup_cpu_adjust_period(0, 2 * USEC_PER_SEC, USEC_PER_MSEC, USEC_PER_SEC)); + /* Period 0ms, quota 40% -> adjust to 2.5ms. */ + assert_se(2500 == cgroup_cpu_adjust_period(0, 400 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC)); +} + +int main(int argc, char *argv[]) { + test_cgroup_cpu_adjust_period(); + return 0; +} |