summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTejun Heo <htejun@fb.com>2016-05-18 17:35:12 -0700
committerTejun Heo <tj@kernel.org>2016-05-18 17:35:12 -0700
commit538b48524cf48afc299ab78690bc03c18af67ede (patch)
tree70c5612292ae16f1485644bab7c80bdba365565e
parent64faf04c1f178804e066972ab0d6ff1181c0d373 (diff)
downloadsystemd-538b48524cf48afc299ab78690bc03c18af67ede.tar.gz
core: translate between IO and BlockIO settings to ease transition
Due to the substantial interface changes in cgroup unified hierarchy, new IO settings are introduced. Currently, IO settings apply only to unified hierarchy and BlockIO to legacy. While the transition is necessary, it's painful for users to have to provide configs for both. This patch implements translation from one config set to another for configs which make sense. * The translation takes place during application of the configs. Users won't see IO or BlockIO settings appearing without being explicitly created. * The translation takes place only if there is no config for the matching cgroup hierarchy type at all. While this doesn't provide comprehensive compatibility, it should considerably ease transition to the new IO settings which are a superset of BlockIO settings. v2: - Update test-cgroup-mask.c so that it accounts for the fact that CGROUP_MASK_IO and CGROUP_MASK_BLKIO move together. Also, test/parent.slice now sets IOWeight instead of BlockIOWeight.
-rw-r--r--man/systemd.resource-control.xml25
-rw-r--r--src/core/cgroup.c146
-rw-r--r--src/test/test-cgroup-mask.c13
-rw-r--r--test/parent.slice2
4 files changed, 148 insertions, 38 deletions
diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml
index 8a95e1196b..066f2cc19b 100644
--- a/man/systemd.resource-control.xml
+++ b/man/systemd.resource-control.xml
@@ -99,6 +99,31 @@
</refsect1>
<refsect1>
+ <title>Unified and Legacy Control Group Hierarchies</title>
+
+ <para>Unified control group hierarchy is the new version of kernel control group interface. Depending on the
+ resource type, there are differences in resource control capabilities. Also, because of interface changes, some
+ resource types have a separate set of options on the unified hierarchy.</para>
+
+ <para>
+ <variablelist>
+ <varlistentry>
+ <term><option>IO</option></term>
+ <listitem>
+ <para><varname>IO</varname> prefixed settings are superset of and replace <varname>BlockIO</varname>
+ prefixed ones. On unified hierarchy, IO resource control also applies to buffered writes.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </para>
+
+ <para>To ease the transition, there is best-effort translation between the two versions of settings. If all
+ settings of a unit for a given resource type are for the other hierarchy type, the settings are translated and
+ applied. If there are any valid settings for the hierarchy in use, all translations are disabled for the resource
+ type. Mixing the two types of settings on a unit can lead to confusing results.</para>
+ </refsect1>
+
+ <refsect1>
<title>Options</title>
<para>Units of the types listed above can have settings
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index ddc4f69ebe..8b8b2ac5ff 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -358,6 +358,24 @@ fail:
return -errno;
}
+static bool cgroup_context_has_io_config(CGroupContext *c)
+{
+ return c->io_accounting ||
+ c->io_weight != CGROUP_WEIGHT_INVALID ||
+ c->startup_io_weight != CGROUP_WEIGHT_INVALID ||
+ c->io_device_weights ||
+ c->io_device_limits;
+}
+
+static bool cgroup_context_has_blockio_config(CGroupContext *c)
+{
+ return c->blockio_accounting ||
+ c->blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID ||
+ c->startup_blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID ||
+ c->blockio_device_weights ||
+ c->blockio_device_bandwidths;
+}
+
static uint64_t cgroup_context_io_weight(CGroupContext *c, ManagerState state)
{
if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) &&
@@ -380,6 +398,18 @@ static uint64_t cgroup_context_blkio_weight(CGroupContext *c, ManagerState state
return CGROUP_BLKIO_WEIGHT_DEFAULT;
}
+static uint64_t cgroup_weight_blkio_to_io(uint64_t blkio_weight)
+{
+ return CLAMP(blkio_weight * CGROUP_WEIGHT_DEFAULT / CGROUP_BLKIO_WEIGHT_DEFAULT,
+ CGROUP_WEIGHT_MIN, CGROUP_WEIGHT_MAX);
+}
+
+static uint64_t cgroup_weight_io_to_blkio(uint64_t io_weight)
+{
+ return CLAMP(io_weight * CGROUP_BLKIO_WEIGHT_DEFAULT / CGROUP_WEIGHT_DEFAULT,
+ CGROUP_BLKIO_WEIGHT_MIN, CGROUP_BLKIO_WEIGHT_MAX);
+}
+
static void cgroup_apply_io_device_weight(const char *path, const char *dev_path, uint64_t io_weight)
{
char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1];
@@ -525,14 +555,19 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M
}
if (mask & CGROUP_MASK_IO) {
- CGroupIODeviceLimit *l, *next;
+ bool has_io = cgroup_context_has_io_config(c);
+ bool has_blockio = cgroup_context_has_blockio_config(c);
if (!is_root) {
char buf[8+DECIMAL_STR_MAX(uint64_t)+1];
uint64_t weight;
- CGroupIODeviceWeight *w;
- weight = cgroup_context_io_weight(c, state);
+ if (has_io)
+ weight = cgroup_context_io_weight(c, state);
+ else if (has_blockio)
+ weight = cgroup_weight_blkio_to_io(cgroup_context_blkio_weight(c, state));
+ else
+ weight = CGROUP_WEIGHT_DEFAULT;
xsprintf(buf, "default %" PRIu64 "\n", weight);
r = cg_set_attribute("io", path, "io.weight", buf);
@@ -540,27 +575,62 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M
log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
"Failed to set io.weight on %s: %m", path);
- /* FIXME: no way to reset this list */
- LIST_FOREACH(device_weights, w, c->io_device_weights)
- cgroup_apply_io_device_weight(path, w->path, w->weight);
+ if (has_io) {
+ CGroupIODeviceWeight *w;
+
+ /* FIXME: no way to reset this list */
+ LIST_FOREACH(device_weights, w, c->io_device_weights)
+ cgroup_apply_io_device_weight(path, w->path, w->weight);
+ } else if (has_blockio) {
+ CGroupBlockIODeviceWeight *w;
+
+ /* FIXME: no way to reset this list */
+ LIST_FOREACH(device_weights, w, c->blockio_device_weights)
+ cgroup_apply_io_device_weight(path, w->path, cgroup_weight_blkio_to_io(w->weight));
+ }
}
/* Apply limits and free ones without config. */
- LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) {
- if (!cgroup_apply_io_device_limit(path, l->path, l->limits))
- cgroup_context_free_io_device_limit(c, l);
+ if (has_io) {
+ CGroupIODeviceLimit *l, *next;
+
+ LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) {
+ if (!cgroup_apply_io_device_limit(path, l->path, l->limits))
+ cgroup_context_free_io_device_limit(c, l);
+ }
+ } else if (has_blockio) {
+ CGroupBlockIODeviceBandwidth *b, *next;
+
+ LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths) {
+ uint64_t limits[_CGROUP_IO_LIMIT_TYPE_MAX];
+ CGroupIOLimitType type;
+
+ for (type = 0; type < _CGROUP_IO_LIMIT_TYPE_MAX; type++)
+ limits[type] = cgroup_io_limit_defaults[type];
+
+ limits[CGROUP_IO_RBPS_MAX] = b->rbps;
+ limits[CGROUP_IO_WBPS_MAX] = b->wbps;
+
+ if (!cgroup_apply_io_device_limit(path, b->path, limits))
+ cgroup_context_free_blockio_device_bandwidth(c, b);
+ }
}
}
if (mask & CGROUP_MASK_BLKIO) {
- CGroupBlockIODeviceBandwidth *b, *next;
+ bool has_io = cgroup_context_has_io_config(c);
+ bool has_blockio = cgroup_context_has_blockio_config(c);
if (!is_root) {
char buf[DECIMAL_STR_MAX(uint64_t)+1];
uint64_t weight;
- CGroupBlockIODeviceWeight *w;
- weight = cgroup_context_blkio_weight(c, state);
+ if (has_blockio)
+ weight = cgroup_context_blkio_weight(c, state);
+ else if (has_io)
+ weight = cgroup_weight_io_to_blkio(cgroup_context_io_weight(c, state));
+ else
+ weight = CGROUP_BLKIO_WEIGHT_DEFAULT;
xsprintf(buf, "%" PRIu64 "\n", weight);
r = cg_set_attribute("blkio", path, "blkio.weight", buf);
@@ -568,15 +638,36 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M
log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
"Failed to set blkio.weight on %s: %m", path);
- /* FIXME: no way to reset this list */
- LIST_FOREACH(device_weights, w, c->blockio_device_weights)
- cgroup_apply_blkio_device_weight(path, w->path, w->weight);
+ if (has_blockio) {
+ CGroupBlockIODeviceWeight *w;
+
+ /* FIXME: no way to reset this list */
+ LIST_FOREACH(device_weights, w, c->blockio_device_weights)
+ cgroup_apply_blkio_device_weight(path, w->path, w->weight);
+ } else if (has_io) {
+ CGroupIODeviceWeight *w;
+
+ /* FIXME: no way to reset this list */
+ LIST_FOREACH(device_weights, w, c->io_device_weights)
+ cgroup_apply_blkio_device_weight(path, w->path, cgroup_weight_io_to_blkio(w->weight));
+ }
}
/* Apply limits and free ones without config. */
- LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths) {
- if (!cgroup_apply_blkio_device_limit(path, b->path, b->rbps, b->wbps))
- cgroup_context_free_blockio_device_bandwidth(c, b);
+ if (has_blockio) {
+ CGroupBlockIODeviceBandwidth *b, *next;
+
+ LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths) {
+ if (!cgroup_apply_blkio_device_limit(path, b->path, b->rbps, b->wbps))
+ cgroup_context_free_blockio_device_bandwidth(c, b);
+ }
+ } else if (has_io) {
+ CGroupIODeviceLimit *l, *next;
+
+ LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) {
+ if (!cgroup_apply_blkio_device_limit(path, l->path, l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX]))
+ cgroup_context_free_io_device_limit(c, l);
+ }
}
}
@@ -693,19 +784,8 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) {
c->cpu_quota_per_sec_usec != USEC_INFINITY)
mask |= CGROUP_MASK_CPUACCT | CGROUP_MASK_CPU;
- if (c->io_accounting ||
- c->io_weight != CGROUP_WEIGHT_INVALID ||
- c->startup_io_weight != CGROUP_WEIGHT_INVALID ||
- c->io_device_weights ||
- c->io_device_limits)
- mask |= CGROUP_MASK_IO;
-
- if (c->blockio_accounting ||
- c->blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID ||
- c->startup_blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID ||
- c->blockio_device_weights ||
- c->blockio_device_bandwidths)
- mask |= CGROUP_MASK_BLKIO;
+ if (cgroup_context_has_io_config(c) || cgroup_context_has_blockio_config(c))
+ mask |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO;
if (c->memory_accounting ||
c->memory_limit != (uint64_t) -1)
@@ -1795,6 +1875,10 @@ void unit_invalidate_cgroup(Unit *u, CGroupMask m) {
if (m == 0)
return;
+ /* always invalidate compat pairs together */
+ if (m & (CGROUP_MASK_IO | CGROUP_MASK_BLKIO))
+ m |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO;
+
if ((u->cgroup_realized_mask & m) == 0)
return;
diff --git a/src/test/test-cgroup-mask.c b/src/test/test-cgroup-mask.c
index 4eb8fcd773..a025704722 100644
--- a/src/test/test-cgroup-mask.c
+++ b/src/test/test-cgroup-mask.c
@@ -48,6 +48,7 @@ static int test_cgroup_mask(void) {
m->default_cpu_accounting =
m->default_memory_accounting =
m->default_blockio_accounting =
+ m->default_io_accounting =
m->default_tasks_accounting = false;
m->default_tasks_max = (uint64_t) -1;
@@ -76,7 +77,7 @@ static int test_cgroup_mask(void) {
assert_se(unit_get_own_mask(daughter) == 0);
assert_se(unit_get_own_mask(grandchild) == 0);
assert_se(unit_get_own_mask(parent_deep) == CGROUP_MASK_MEMORY);
- assert_se(unit_get_own_mask(parent) == CGROUP_MASK_BLKIO);
+ assert_se(unit_get_own_mask(parent) == (CGROUP_MASK_IO | CGROUP_MASK_BLKIO));
assert_se(unit_get_own_mask(root) == 0);
/* Verify aggregation of member masks */
@@ -85,23 +86,23 @@ static int test_cgroup_mask(void) {
assert_se(unit_get_members_mask(grandchild) == 0);
assert_se(unit_get_members_mask(parent_deep) == 0);
assert_se(unit_get_members_mask(parent) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY));
- assert_se(unit_get_members_mask(root) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
+ assert_se(unit_get_members_mask(root) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
/* Verify aggregation of sibling masks. */
assert_se(unit_get_siblings_mask(son) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY));
assert_se(unit_get_siblings_mask(daughter) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY));
assert_se(unit_get_siblings_mask(grandchild) == 0);
assert_se(unit_get_siblings_mask(parent_deep) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY));
- assert_se(unit_get_siblings_mask(parent) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
- assert_se(unit_get_siblings_mask(root) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
+ assert_se(unit_get_siblings_mask(parent) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
+ assert_se(unit_get_siblings_mask(root) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
/* Verify aggregation of target masks. */
assert_se(unit_get_target_mask(son) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY) & m->cgroup_supported));
assert_se(unit_get_target_mask(daughter) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY) & m->cgroup_supported));
assert_se(unit_get_target_mask(grandchild) == 0);
assert_se(unit_get_target_mask(parent_deep) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY) & m->cgroup_supported));
- assert_se(unit_get_target_mask(parent) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported));
- assert_se(unit_get_target_mask(root) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported));
+ assert_se(unit_get_target_mask(parent) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported));
+ assert_se(unit_get_target_mask(root) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported));
manager_free(m);
diff --git a/test/parent.slice b/test/parent.slice
index 0222f8eb47..a95f90392d 100644
--- a/test/parent.slice
+++ b/test/parent.slice
@@ -2,4 +2,4 @@
Description=Parent Slice
[Slice]
-BlockIOWeight=200
+IOWeight=200