summaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
Diffstat (limited to 'src/core')
-rw-r--r--src/core/automount.c33
-rw-r--r--src/core/bpf-devices.c271
-rw-r--r--src/core/bpf-devices.h17
-rw-r--r--src/core/bpf-firewall.c42
-rw-r--r--src/core/bpf-firewall.h1
-rw-r--r--src/core/cgroup.c1218
-rw-r--r--src/core/cgroup.h28
-rw-r--r--src/core/chown-recursive.c160
-rw-r--r--src/core/dbus-cgroup.c158
-rw-r--r--src/core/dbus-execute.c120
-rw-r--r--src/core/dbus-job.c24
-rw-r--r--src/core/dbus-job.h1
-rw-r--r--src/core/dbus-kill.c10
-rw-r--r--src/core/dbus-manager.c89
-rw-r--r--src/core/dbus-mount.c2
-rw-r--r--src/core/dbus-scope.c6
-rw-r--r--src/core/dbus-scope.h2
-rw-r--r--src/core/dbus-service.c44
-rw-r--r--src/core/dbus-slice.c2
-rw-r--r--src/core/dbus-socket.c36
-rw-r--r--src/core/dbus-swap.c2
-rw-r--r--src/core/dbus-unit.c162
-rw-r--r--src/core/dbus-unit.h1
-rw-r--r--src/core/dbus.c63
-rw-r--r--src/core/dbus.h2
-rw-r--r--src/core/device.c291
-rw-r--r--src/core/device.h6
-rw-r--r--src/core/dynamic-user.c111
-rw-r--r--src/core/dynamic-user.h2
-rw-r--r--src/core/emergency-action.c96
-rw-r--r--src/core/emergency-action.h18
-rw-r--r--src/core/execute.c498
-rw-r--r--src/core/execute.h63
-rw-r--r--src/core/ima-setup.c21
-rw-r--r--src/core/ip-address-access.h1
-rw-r--r--src/core/job.c349
-rw-r--r--src/core/job.h9
-rw-r--r--src/core/kill.c4
-rw-r--r--src/core/kill.h2
-rw-r--r--src/core/killall.c21
-rw-r--r--src/core/kmod-setup.c31
-rw-r--r--src/core/load-dropin.c2
-rw-r--r--src/core/load-fragment-gperf.gperf.m413
-rw-r--r--src/core/load-fragment.c334
-rw-r--r--src/core/load-fragment.h6
-rw-r--r--src/core/locale-setup.c104
-rw-r--r--src/core/loopback-setup.c6
-rw-r--r--src/core/machine-id-setup.c17
-rw-r--r--src/core/macros.systemd.in8
-rw-r--r--src/core/main.c464
-rw-r--r--src/core/manager.c994
-rw-r--r--src/core/manager.h92
-rw-r--r--src/core/meson.build2
-rw-r--r--src/core/mount-setup.c281
-rw-r--r--src/core/mount-setup.h2
-rw-r--r--src/core/mount.c407
-rw-r--r--src/core/mount.h13
-rw-r--r--src/core/namespace.c291
-rw-r--r--src/core/path.c26
-rw-r--r--src/core/scope.c19
-rw-r--r--src/core/selinux-access.c3
-rw-r--r--src/core/selinux-access.h4
-rw-r--r--src/core/service.c581
-rw-r--r--src/core/service.h11
-rw-r--r--src/core/show-status.c52
-rw-r--r--src/core/show-status.h22
-rw-r--r--src/core/shutdown.c41
-rw-r--r--src/core/slice.c16
-rw-r--r--src/core/smack-setup.c85
-rw-r--r--src/core/socket.c271
-rw-r--r--src/core/swap.c269
-rw-r--r--src/core/swap.h6
-rw-r--r--src/core/system.conf.in6
-rw-r--r--src/core/target.c11
-rw-r--r--src/core/timer.c55
-rw-r--r--src/core/transaction.c22
-rw-r--r--src/core/umount.c135
-rw-r--r--src/core/unit-printf.c82
-rw-r--r--src/core/unit-printf.h1
-rw-r--r--src/core/unit.c1110
-rw-r--r--src/core/unit.h107
81 files changed, 6310 insertions, 3678 deletions
diff --git a/src/core/automount.c b/src/core/automount.c
index 1b96a52c00..de8010bf2e 100644
--- a/src/core/automount.c
+++ b/src/core/automount.c
@@ -16,6 +16,7 @@
#include "bus-error.h"
#include "bus-util.h"
#include "dbus-automount.h"
+#include "dbus-unit.h"
#include "fd-util.h"
#include "format-util.h"
#include "io-util.h"
@@ -23,9 +24,11 @@
#include "mkdir.h"
#include "mount-util.h"
#include "mount.h"
+#include "mountpoint-util.h"
#include "parse-util.h"
#include "path-util.h"
#include "process-util.h"
+#include "serialize.h"
#include "special.h"
#include "stdio-util.h"
#include "string-table.h"
@@ -85,7 +88,7 @@ static void unmount_autofs(Automount *a) {
a->pipe_fd = safe_close(a->pipe_fd);
/* If we reload/reexecute things we keep the mount point around */
- if (!IN_SET(UNIT(a)->manager->exit_code, MANAGER_RELOAD, MANAGER_REEXECUTE)) {
+ if (!IN_SET(UNIT(a)->manager->objective, MANAGER_RELOAD, MANAGER_REEXECUTE)) {
automount_send_ready(a, a->tokens, -EHOSTDOWN);
automount_send_ready(a, a->expire_tokens, -EHOSTDOWN);
@@ -149,7 +152,7 @@ static int automount_add_default_dependencies(Automount *a) {
if (!MANAGER_IS_SYSTEM(UNIT(a)->manager))
return 0;
- r = unit_add_two_dependencies_by_name(UNIT(a), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_UMOUNT_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT);
+ r = unit_add_two_dependencies_by_name(UNIT(a), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_UMOUNT_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
if (r < 0)
return r;
@@ -235,6 +238,9 @@ static void automount_set_state(Automount *a, AutomountState state) {
AutomountState old_state;
assert(a);
+ if (a->state != state)
+ bus_unit_send_pending_change_signal(UNIT(a), false);
+
old_state = a->state;
a->state = state;
@@ -314,9 +320,7 @@ static void automount_enter_dead(Automount *a, AutomountResult f) {
if (a->result == AUTOMOUNT_SUCCESS)
a->result = f;
- if (a->result != AUTOMOUNT_SUCCESS)
- log_unit_warning(UNIT(a), "Failed with result '%s'.", automount_result_to_string(a->result));
-
+ unit_log_result(UNIT(a), a->result == AUTOMOUNT_SUCCESS, automount_result_to_string(a->result));
automount_set_state(a, a->result != AUTOMOUNT_SUCCESS ? AUTOMOUNT_FAILED : AUTOMOUNT_DEAD);
}
@@ -841,16 +845,16 @@ static int automount_serialize(Unit *u, FILE *f, FDSet *fds) {
assert(f);
assert(fds);
- unit_serialize_item(u, f, "state", automount_state_to_string(a->state));
- unit_serialize_item(u, f, "result", automount_result_to_string(a->result));
- unit_serialize_item_format(u, f, "dev-id", "%u", (unsigned) a->dev_id);
+ (void) serialize_item(f, "state", automount_state_to_string(a->state));
+ (void) serialize_item(f, "result", automount_result_to_string(a->result));
+ (void) serialize_item_format(f, "dev-id", "%lu", (unsigned long) a->dev_id);
SET_FOREACH(p, a->tokens, i)
- unit_serialize_item_format(u, f, "token", "%u", PTR_TO_UINT(p));
+ (void) serialize_item_format(f, "token", "%u", PTR_TO_UINT(p));
SET_FOREACH(p, a->expire_tokens, i)
- unit_serialize_item_format(u, f, "expire-token", "%u", PTR_TO_UINT(p));
+ (void) serialize_item_format(f, "expire-token", "%u", PTR_TO_UINT(p));
- r = unit_serialize_item_fd(u, f, fds, "pipe-fd", a->pipe_fd);
+ r = serialize_fd(f, fds, "pipe-fd", a->pipe_fd);
if (r < 0)
return r;
@@ -882,12 +886,13 @@ static int automount_deserialize_item(Unit *u, const char *key, const char *valu
a->result = f;
} else if (streq(key, "dev-id")) {
- unsigned d;
+ unsigned long d;
- if (safe_atou(value, &d) < 0)
+ if (safe_atolu(value, &d) < 0)
log_unit_debug(u, "Failed to parse dev-id value: %s", value);
else
- a->dev_id = (unsigned) d;
+ a->dev_id = (dev_t) d;
+
} else if (streq(key, "token")) {
unsigned token;
diff --git a/src/core/bpf-devices.c b/src/core/bpf-devices.c
new file mode 100644
index 0000000000..dade7f0490
--- /dev/null
+++ b/src/core/bpf-devices.c
@@ -0,0 +1,271 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#include <linux/libbpf.h>
+
+#include "bpf-devices.h"
+#include "bpf-program.h"
+
+#define PASS_JUMP_OFF 4096
+
+static int bpf_access_type(const char *acc) {
+ int r = 0;
+
+ assert(acc);
+
+ for (; *acc; acc++)
+ switch(*acc) {
+ case 'r':
+ r |= BPF_DEVCG_ACC_READ;
+ break;
+ case 'w':
+ r |= BPF_DEVCG_ACC_WRITE;
+ break;
+ case 'm':
+ r |= BPF_DEVCG_ACC_MKNOD;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return r;
+}
+
+int cgroup_bpf_whitelist_device(BPFProgram *prog, int type, int major, int minor, const char *acc) {
+ struct bpf_insn insn[] = {
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, type, 6), /* compare device type */
+ BPF_MOV32_REG(BPF_REG_1, BPF_REG_3), /* calculate access type */
+ BPF_ALU32_IMM(BPF_AND, BPF_REG_1, 0),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 3), /* compare access type */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 2), /* compare major */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_5, minor, 1), /* compare minor */
+ BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
+ };
+ int r, access;
+
+ assert(prog);
+ assert(acc);
+
+ access = bpf_access_type(acc);
+ if (access <= 0)
+ return -EINVAL;
+
+ insn[2].imm = access;
+
+ r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
+ if (r < 0)
+ log_error_errno(r, "Extending device control BPF program failed: %m");
+
+ return r;
+}
+
+int cgroup_bpf_whitelist_major(BPFProgram *prog, int type, int major, const char *acc) {
+ struct bpf_insn insn[] = {
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, type, 5), /* compare device type */
+ BPF_MOV32_REG(BPF_REG_1, BPF_REG_3), /* calculate access type */
+ BPF_ALU32_IMM(BPF_AND, BPF_REG_1, 0),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 2), /* compare access type */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 1), /* compare major */
+ BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
+ };
+ int r, access;
+
+ assert(prog);
+ assert(acc);
+
+ access = bpf_access_type(acc);
+ if (access <= 0)
+ return -EINVAL;
+
+ insn[2].imm = access;
+
+ r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
+ if (r < 0)
+ log_error_errno(r, "Extending device control BPF program failed: %m");
+
+ return r;
+}
+
+int cgroup_bpf_whitelist_class(BPFProgram *prog, int type, const char *acc) {
+ struct bpf_insn insn[] = {
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, type, 5), /* compare device type */
+ BPF_MOV32_REG(BPF_REG_1, BPF_REG_3), /* calculate access type */
+ BPF_ALU32_IMM(BPF_AND, BPF_REG_1, 0),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 1), /* compare access type */
+ BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
+ };
+ int r, access;
+
+ assert(prog);
+ assert(acc);
+
+ access = bpf_access_type(acc);
+ if (access <= 0)
+ return -EINVAL;
+
+ insn[2].imm = access;
+
+ r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
+ if (r < 0)
+ log_error_errno(r, "Extending device control BPF program failed: %m");
+
+ return r;
+}
+
+int cgroup_init_device_bpf(BPFProgram **ret, CGroupDevicePolicy policy, bool whitelist) {
+ struct bpf_insn pre_insn[] = {
+ /* load device type to r2 */
+ BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1,
+ offsetof(struct bpf_cgroup_dev_ctx, access_type)),
+
+ /* load access type to r3 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct bpf_cgroup_dev_ctx, access_type)),
+ BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
+
+ /* load major number to r4 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct bpf_cgroup_dev_ctx, major)),
+
+ /* load minor number to r5 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+ offsetof(struct bpf_cgroup_dev_ctx, minor)),
+ };
+
+ _cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
+ int r;
+
+ assert(ret);
+
+ if (policy == CGROUP_AUTO && !whitelist)
+ return 0;
+
+ r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, &prog);
+ if (r < 0)
+ return log_error_errno(r, "Loading device control BPF program failed: %m");
+
+ if (policy == CGROUP_CLOSED || whitelist) {
+ r = bpf_program_add_instructions(prog, pre_insn, ELEMENTSOF(pre_insn));
+ if (r < 0)
+ return log_error_errno(r, "Extending device control BPF program failed: %m");
+ }
+
+ *ret = TAKE_PTR(prog);
+
+ return 0;
+}
+
+int cgroup_apply_device_bpf(Unit *u, BPFProgram *prog, CGroupDevicePolicy policy, bool whitelist) {
+ struct bpf_insn post_insn[] = {
+ /* return DENY */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ };
+
+ struct bpf_insn exit_insn[] = {
+ /* else return ALLOW */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN()
+ };
+
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ if (!prog) {
+ /* Remove existing program. */
+ u->bpf_device_control_installed = bpf_program_unref(u->bpf_device_control_installed);
+ return 0;
+ }
+
+ if (policy != CGROUP_STRICT || whitelist) {
+ size_t off;
+
+ r = bpf_program_add_instructions(prog, post_insn, ELEMENTSOF(post_insn));
+ if (r < 0)
+ return log_error_errno(r, "Extending device control BPF program failed: %m");
+
+ /* Fixup PASS_JUMP_OFF jump offsets. */
+ for (off = 0; off < prog->n_instructions; off++) {
+ struct bpf_insn *ins = &prog->instructions[off];
+
+ if (ins->code == (BPF_JMP | BPF_JA) && ins->off == PASS_JUMP_OFF)
+ ins->off = prog->n_instructions - off - 1;
+ }
+ } else
+ /* Explicitly forbid everything. */
+ exit_insn[0].imm = 0;
+
+ r = bpf_program_add_instructions(prog, exit_insn, ELEMENTSOF(exit_insn));
+ if (r < 0)
+ return log_error_errno(r, "Extending device control BPF program failed: %m");
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine cgroup path: %m");
+
+
+ r = bpf_program_cgroup_attach(prog, BPF_CGROUP_DEVICE, path, BPF_F_ALLOW_MULTI);
+ if (r < 0)
+ return log_error_errno(r, "Attaching device control BPF program to cgroup %s failed: %m", path);
+
+ /* Unref the old BPF program (which will implicitly detach it) right before attaching the new program. */
+ u->bpf_device_control_installed = bpf_program_unref(u->bpf_device_control_installed);
+
+ /* Remember that this BPF program is installed now. */
+ u->bpf_device_control_installed = bpf_program_ref(prog);
+
+ return 0;
+}
+
+int bpf_devices_supported(void) {
+ struct bpf_insn trivial[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN()
+ };
+
+ _cleanup_(bpf_program_unrefp) BPFProgram *program = NULL;
+ static int supported = -1;
+ int r;
+
+ /* Checks whether BPF device controller is supported. For this, we check five things:
+ *
+ * a) whether we are privileged
+ * b) whether the unified hierarchy is being used
+ * c) the BPF implementation in the kernel supports BPF_PROG_TYPE_CGROUP_DEVICE programs, which we require
+ */
+
+ if (supported >= 0)
+ return supported;
+
+ if (geteuid() != 0) {
+ log_debug("Not enough privileges, BPF device control is not supported.");
+ return supported = 0;
+ }
+
+ r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+ if (r < 0)
+ return log_error_errno(r, "Can't determine whether the unified hierarchy is used: %m");
+ if (r == 0) {
+ log_debug("Not running with unified cgroups, BPF device control is not supported.");
+ return supported = 0;
+ }
+
+ r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, &program);
+ if (r < 0) {
+ log_debug_errno(r, "Can't allocate CGROUP DEVICE BPF program, BPF device control is not supported: %m");
+ return supported = 0;
+ }
+
+ r = bpf_program_add_instructions(program, trivial, ELEMENTSOF(trivial));
+ if (r < 0) {
+ log_debug_errno(r, "Can't add trivial instructions to CGROUP DEVICE BPF program, BPF device control is not supported: %m");
+ return supported = 0;
+ }
+
+ r = bpf_program_load_kernel(program, NULL, 0);
+ if (r < 0) {
+ log_debug_errno(r, "Can't load kernel CGROUP DEVICE BPF program, BPF device control is not supported: %m");
+ return supported = 0;
+ }
+
+ return supported = 1;
+}
diff --git a/src/core/bpf-devices.h b/src/core/bpf-devices.h
new file mode 100644
index 0000000000..8d3de3bd94
--- /dev/null
+++ b/src/core/bpf-devices.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+
+#include "unit.h"
+
+struct BPFProgram;
+
+int bpf_devices_supported(void);
+
+int cgroup_bpf_whitelist_device(BPFProgram *p, int type, int major, int minor, const char *acc);
+int cgroup_bpf_whitelist_major(BPFProgram *p, int type, int major, const char *acc);
+int cgroup_bpf_whitelist_class(BPFProgram *prog, int type, const char *acc);
+
+int cgroup_init_device_bpf(BPFProgram **ret, CGroupDevicePolicy policy, bool whitelist);
+int cgroup_apply_device_bpf(Unit *u, BPFProgram *p, CGroupDevicePolicy policy, bool whitelist);
diff --git a/src/core/bpf-firewall.c b/src/core/bpf-firewall.c
index 8b66ef73dc..b9a611fd9e 100644
--- a/src/core/bpf-firewall.c
+++ b/src/core/bpf-firewall.c
@@ -20,6 +20,7 @@
#include "bpf-program.h"
#include "fd-util.h"
#include "ip-address-access.h"
+#include "missing_syscall.h"
#include "unit.h"
enum {
@@ -483,7 +484,7 @@ int bpf_firewall_compile(Unit *u) {
if (supported < 0)
return supported;
if (supported == BPF_FIREWALL_UNSUPPORTED) {
- log_debug("BPF firewalling not supported on this manager, proceeding without.");
+ log_unit_debug(u, "BPF firewalling not supported on this manager, proceeding without.");
return -EOPNOTSUPP;
}
if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI && u->type == UNIT_SLICE) {
@@ -492,7 +493,7 @@ int bpf_firewall_compile(Unit *u) {
* that BPF is more interesting on leaf nodes we hence avoid it on inner nodes in that case. This is
* consistent with old systemd behaviour from before v238, where BPF wasn't supported in inner nodes at
* all, either. */
- log_debug("BPF_F_ALLOW_MULTI is not supported on this manager, not doing BPF firewall on slice units.");
+ log_unit_debug(u, "BPF_F_ALLOW_MULTI is not supported on this manager, not doing BPF firewall on slice units.");
return -EOPNOTSUPP;
}
@@ -518,24 +519,24 @@ int bpf_firewall_compile(Unit *u) {
r = bpf_firewall_prepare_access_maps(u, ACCESS_ALLOWED, &u->ipv4_allow_map_fd, &u->ipv6_allow_map_fd);
if (r < 0)
- return log_error_errno(r, "Preparation of eBPF allow maps failed: %m");
+ return log_unit_error_errno(u, r, "Preparation of eBPF allow maps failed: %m");
r = bpf_firewall_prepare_access_maps(u, ACCESS_DENIED, &u->ipv4_deny_map_fd, &u->ipv6_deny_map_fd);
if (r < 0)
- return log_error_errno(r, "Preparation of eBPF deny maps failed: %m");
+ return log_unit_error_errno(u, r, "Preparation of eBPF deny maps failed: %m");
}
r = bpf_firewall_prepare_accounting_maps(u, cc->ip_accounting, &u->ip_accounting_ingress_map_fd, &u->ip_accounting_egress_map_fd);
if (r < 0)
- return log_error_errno(r, "Preparation of eBPF accounting maps failed: %m");
+ return log_unit_error_errno(u, r, "Preparation of eBPF accounting maps failed: %m");
r = bpf_firewall_compile_bpf(u, true, &u->ip_bpf_ingress);
if (r < 0)
- return log_error_errno(r, "Compilation for ingress BPF program failed: %m");
+ return log_unit_error_errno(u, r, "Compilation for ingress BPF program failed: %m");
r = bpf_firewall_compile_bpf(u, false, &u->ip_bpf_egress);
if (r < 0)
- return log_error_errno(r, "Compilation for egress BPF program failed: %m");
+ return log_unit_error_errno(u, r, "Compilation for egress BPF program failed: %m");
return 0;
}
@@ -560,17 +561,17 @@ int bpf_firewall_install(Unit *u) {
if (supported < 0)
return supported;
if (supported == BPF_FIREWALL_UNSUPPORTED) {
- log_debug("BPF firewalling not supported on this manager, proceeding without.");
+ log_unit_debug(u, "BPF firewalling not supported on this manager, proceeding without.");
return -EOPNOTSUPP;
}
if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI && u->type == UNIT_SLICE) {
- log_debug("BPF_F_ALLOW_MULTI is not supported on this manager, not doing BPF firewall on slice units.");
+ log_unit_debug(u, "BPF_F_ALLOW_MULTI is not supported on this manager, not doing BPF firewall on slice units.");
return -EOPNOTSUPP;
}
r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path);
if (r < 0)
- return log_error_errno(r, "Failed to determine cgroup path: %m");
+ return log_unit_error_errno(u, r, "Failed to determine cgroup path: %m");
flags = (supported == BPF_FIREWALL_SUPPORTED_WITH_MULTI &&
(u->type == UNIT_SLICE || unit_cgroup_delegate(u))) ? BPF_F_ALLOW_MULTI : 0;
@@ -583,7 +584,7 @@ int bpf_firewall_install(Unit *u) {
if (u->ip_bpf_egress) {
r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, flags);
if (r < 0)
- return log_error_errno(r, "Attaching egress BPF program to cgroup %s failed: %m", path);
+ return log_unit_error_errno(u, r, "Attaching egress BPF program to cgroup %s failed: %m", path);
/* Remember that this BPF program is installed now. */
u->ip_bpf_egress_installed = bpf_program_ref(u->ip_bpf_egress);
@@ -592,7 +593,7 @@ int bpf_firewall_install(Unit *u) {
if (u->ip_bpf_ingress) {
r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, flags);
if (r < 0)
- return log_error_errno(r, "Attaching ingress BPF program to cgroup %s failed: %m", path);
+ return log_unit_error_errno(u, r, "Attaching ingress BPF program to cgroup %s failed: %m", path);
u->ip_bpf_ingress_installed = bpf_program_ref(u->ip_bpf_ingress);
}
@@ -660,8 +661,7 @@ int bpf_firewall_supported(void) {
* b) whether the unified hierarchy is being used
* c) the BPF implementation in the kernel supports BPF LPM TRIE maps, which we require
* d) the BPF implementation in the kernel supports BPF_PROG_TYPE_CGROUP_SKB programs, which we require
- * e) the BPF implementation in the kernel supports the BPF_PROG_ATTACH call, which we require
- *
+ * e) the BPF implementation in the kernel supports the BPF_PROG_DETACH call, which we require
*/
if (supported >= 0)
@@ -714,7 +714,7 @@ int bpf_firewall_supported(void) {
* is turned off at kernel compilation time. This sucks of course: why does it allow us to create a cgroup BPF
* program if we can't do a thing with it later?
*
- * We detect this case by issuing the BPF_PROG_ATTACH bpf() call with invalid file descriptors: if
+ * We detect this case by issuing the BPF_PROG_DETACH bpf() call with invalid file descriptors: if
* CONFIG_CGROUP_BPF is turned off, then the call will fail early with EINVAL. If it is turned on the
* parameters are validated however, and that'll fail with EBADF then. */
@@ -724,22 +724,22 @@ int bpf_firewall_supported(void) {
.attach_bpf_fd = -1,
};
- if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0) {
+ if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0) {
if (errno != EBADF) {
- log_debug_errno(errno, "Didn't get EBADF from BPF_PROG_ATTACH, BPF firewalling is not supported: %m");
+ log_debug_errno(errno, "Didn't get EBADF from BPF_PROG_DETACH, BPF firewalling is not supported: %m");
return supported = BPF_FIREWALL_UNSUPPORTED;
}
/* YAY! */
} else {
- log_debug("Wut? Kernel accepted our invalid BPF_PROG_ATTACH call? Something is weird, assuming BPF firewalling is broken and hence not supported.");
+ log_debug("Wut? Kernel accepted our invalid BPF_PROG_DETACH call? Something is weird, assuming BPF firewalling is broken and hence not supported.");
return supported = BPF_FIREWALL_UNSUPPORTED;
}
/* So now we know that the BPF program is generally available, let's see if BPF_F_ALLOW_MULTI is also supported
- * (which was added in kernel 4.15). We use a similar logic as before, but this time we use
- * BPF_F_ALLOW_MULTI. Since the flags are checked early in the system call we'll get EINVAL if it's not
- * supported, and EBADF as before if it is available. */
+ * (which was added in kernel 4.15). We use a similar logic as before, but this time we use the BPF_PROG_ATTACH
+ * bpf() call and the BPF_F_ALLOW_MULTI flags value. Since the flags are checked early in the system call we'll
+ * get EINVAL if it's not supported, and EBADF as before if it is available. */
attr = (union bpf_attr) {
.attach_type = BPF_CGROUP_INET_EGRESS,
diff --git a/src/core/bpf-firewall.h b/src/core/bpf-firewall.h
index e2d08a0fc8..7d38483dbd 100644
--- a/src/core/bpf-firewall.h
+++ b/src/core/bpf-firewall.h
@@ -1,7 +1,6 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#pragma once
-
#include <inttypes.h>
#include "unit.h"
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index bb02436203..a7ce3fceaa 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -7,6 +7,7 @@
#include "blockdev-util.h"
#include "bpf-firewall.h"
#include "btrfs-util.h"
+#include "bpf-devices.h"
#include "bus-error.h"
#include "cgroup-util.h"
#include "cgroup.h"
@@ -18,6 +19,7 @@
#include "process-util.h"
#include "procfs-util.h"
#include "special.h"
+#include "stat-util.h"
#include "stdio-util.h"
#include "string-table.h"
#include "string-util.h"
@@ -25,7 +27,12 @@
#define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
-bool manager_owns_root_cgroup(Manager *m) {
+/* Returns the log level to use when cgroup attribute writes fail. When an attribute is missing or we have access
+ * problems we downgrade to LOG_DEBUG. This is supposed to be nice to container managers and kernels which want to mask
+ * out specific attributes from us. */
+#define LOG_LEVEL_CGROUP_WRITE(r) (IN_SET(abs(r), ENOENT, EROFS, EACCES, EPERM) ? LOG_DEBUG : LOG_WARNING)
+
+bool manager_owns_host_root_cgroup(Manager *m) {
assert(m);
/* Returns true if we are managing the root cgroup. Note that it isn't sufficient to just check whether the
@@ -33,24 +40,38 @@ bool manager_owns_root_cgroup(Manager *m) {
* appears to be no nice way to detect whether we are in a CLONE_NEWCGROUP namespace we instead just check if
* we run in any kind of container virtualization. */
+ if (MANAGER_IS_USER(m))
+ return false;
+
if (detect_container() > 0)
return false;
return empty_or_root(m->cgroup_root);
}
-bool unit_has_root_cgroup(Unit *u) {
+bool unit_has_host_root_cgroup(Unit *u) {
assert(u);
/* Returns whether this unit manages the root cgroup. This will return true if this unit is the root slice and
* the manager manages the root cgroup. */
- if (!manager_owns_root_cgroup(u->manager))
+ if (!manager_owns_host_root_cgroup(u->manager))
return false;
return unit_has_name(u, SPECIAL_ROOT_SLICE);
}
+static int set_attribute_and_warn(Unit *u, const char *controller, const char *attribute, const char *value) {
+ int r;
+
+ r = cg_set_attribute(controller, u->cgroup_path, attribute, value);
+ if (r < 0)
+ log_unit_full(u, LOG_LEVEL_CGROUP_WRITE(r), r, "Failed to set '%s' attribute on '%s' to '%.*s': %m",
+ strna(attribute), isempty(u->cgroup_path) ? "/" : u->cgroup_path, (int) strcspn(value, NEWLINE), value);
+
+ return r;
+}
+
static void cgroup_compat_warn(void) {
static bool cgroup_compat_warned = false;
@@ -71,29 +92,30 @@ static void cgroup_compat_warn(void) {
void cgroup_context_init(CGroupContext *c) {
assert(c);
- /* Initialize everything to the kernel defaults, assuming the
- * structure is preinitialized to 0 */
+ /* Initialize everything to the kernel defaults. */
- c->cpu_weight = CGROUP_WEIGHT_INVALID;
- c->startup_cpu_weight = CGROUP_WEIGHT_INVALID;
- c->cpu_quota_per_sec_usec = USEC_INFINITY;
+ *c = (CGroupContext) {
+ .cpu_weight = CGROUP_WEIGHT_INVALID,
+ .startup_cpu_weight = CGROUP_WEIGHT_INVALID,
+ .cpu_quota_per_sec_usec = USEC_INFINITY,
- c->cpu_shares = CGROUP_CPU_SHARES_INVALID;
- c->startup_cpu_shares = CGROUP_CPU_SHARES_INVALID;
+ .cpu_shares = CGROUP_CPU_SHARES_INVALID,
+ .startup_cpu_shares = CGROUP_CPU_SHARES_INVALID,
- c->memory_high = CGROUP_LIMIT_MAX;
- c->memory_max = CGROUP_LIMIT_MAX;
- c->memory_swap_max = CGROUP_LIMIT_MAX;
+ .memory_high = CGROUP_LIMIT_MAX,
+ .memory_max = CGROUP_LIMIT_MAX,
+ .memory_swap_max = CGROUP_LIMIT_MAX,
- c->memory_limit = CGROUP_LIMIT_MAX;
+ .memory_limit = CGROUP_LIMIT_MAX,
- c->io_weight = CGROUP_WEIGHT_INVALID;
- c->startup_io_weight = CGROUP_WEIGHT_INVALID;
+ .io_weight = CGROUP_WEIGHT_INVALID,
+ .startup_io_weight = CGROUP_WEIGHT_INVALID,
- c->blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID;
- c->startup_blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID;
+ .blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID,
+ .startup_blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID,
- c->tasks_max = (uint64_t) -1;
+ .tasks_max = CGROUP_LIMIT_MAX,
+ };
}
void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) {
@@ -114,6 +136,15 @@ void cgroup_context_free_io_device_weight(CGroupContext *c, CGroupIODeviceWeight
free(w);
}
+void cgroup_context_free_io_device_latency(CGroupContext *c, CGroupIODeviceLatency *l) {
+ assert(c);
+ assert(l);
+
+ LIST_REMOVE(device_latencies, c->io_device_latencies, l);
+ free(l->path);
+ free(l);
+}
+
void cgroup_context_free_io_device_limit(CGroupContext *c, CGroupIODeviceLimit *l) {
assert(c);
assert(l);
@@ -147,6 +178,9 @@ void cgroup_context_done(CGroupContext *c) {
while (c->io_device_weights)
cgroup_context_free_io_device_weight(c, c->io_device_weights);
+ while (c->io_device_latencies)
+ cgroup_context_free_io_device_latency(c, c->io_device_latencies);
+
while (c->io_device_limits)
cgroup_context_free_io_device_limit(c, c->io_device_limits);
@@ -166,6 +200,7 @@ void cgroup_context_done(CGroupContext *c) {
void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
CGroupIODeviceLimit *il;
CGroupIODeviceWeight *iw;
+ CGroupIODeviceLatency *l;
CGroupBlockIODeviceBandwidth *b;
CGroupBlockIODeviceWeight *w;
CGroupDeviceAllow *a;
@@ -193,6 +228,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
"%sStartupIOWeight=%" PRIu64 "\n"
"%sBlockIOWeight=%" PRIu64 "\n"
"%sStartupBlockIOWeight=%" PRIu64 "\n"
+ "%sMemoryMin=%" PRIu64 "\n"
"%sMemoryLow=%" PRIu64 "\n"
"%sMemoryHigh=%" PRIu64 "\n"
"%sMemoryMax=%" PRIu64 "\n"
@@ -216,6 +252,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
prefix, c->startup_io_weight,
prefix, c->blockio_weight,
prefix, c->startup_blockio_weight,
+ prefix, c->memory_min,
prefix, c->memory_low,
prefix, c->memory_high,
prefix, c->memory_max,
@@ -244,11 +281,18 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
LIST_FOREACH(device_weights, iw, c->io_device_weights)
fprintf(f,
- "%sIODeviceWeight=%s %" PRIu64,
+ "%sIODeviceWeight=%s %" PRIu64 "\n",
prefix,
iw->path,
iw->weight);
+ LIST_FOREACH(device_latencies, l, c->io_device_latencies)
+ fprintf(f,
+ "%sIODeviceLatencyTargetSec=%s %s\n",
+ prefix,
+ l->path,
+ format_timespan(u, sizeof(u), l->target_usec, 1));
+
LIST_FOREACH(device_limits, il, c->io_device_limits) {
char buf[FORMAT_BYTES_MAX];
CGroupIOLimitType type;
@@ -302,17 +346,73 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
}
}
+int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode) {
+ _cleanup_free_ CGroupDeviceAllow *a = NULL;
+ _cleanup_free_ char *d = NULL;
+
+ assert(c);
+ assert(dev);
+ assert(isempty(mode) || in_charset(mode, "rwm"));
+
+ a = new(CGroupDeviceAllow, 1);
+ if (!a)
+ return -ENOMEM;
+
+ d = strdup(dev);
+ if (!d)
+ return -ENOMEM;
+
+ *a = (CGroupDeviceAllow) {
+ .path = TAKE_PTR(d),
+ .r = isempty(mode) || strchr(mode, 'r'),
+ .w = isempty(mode) || strchr(mode, 'w'),
+ .m = isempty(mode) || strchr(mode, 'm'),
+ };
+
+ LIST_PREPEND(device_allow, c->device_allow, a);
+ TAKE_PTR(a);
+
+ return 0;
+}
+
+static void cgroup_xattr_apply(Unit *u) {
+ char ids[SD_ID128_STRING_MAX];
+ int r;
+
+ assert(u);
+
+ if (!MANAGER_IS_SYSTEM(u->manager))
+ return;
+
+ if (sd_id128_is_null(u->invocation_id))
+ return;
+
+ r = cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path,
+ "trusted.invocation_id",
+ sd_id128_to_string(u->invocation_id, ids), 32,
+ 0);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to set invocation ID on control group %s, ignoring: %m", u->cgroup_path);
+}
+
static int lookup_block_device(const char *p, dev_t *ret) {
- struct stat st;
+ struct stat st = {};
int r;
assert(p);
assert(ret);
- if (stat(p, &st) < 0)
- return log_warning_errno(errno, "Couldn't stat device '%s': %m", p);
-
- if (S_ISBLK(st.st_mode))
+ r = device_path_parse_major_minor(p, &st.st_mode, &st.st_rdev);
+ if (r == -ENODEV) { /* not a parsable device node, need to go to disk */
+ if (stat(p, &st) < 0)
+ return log_warning_errno(errno, "Couldn't stat device '%s': %m", p);
+ } else if (r < 0)
+ return log_warning_errno(r, "Failed to parse major/minor from path '%s': %m", p);
+
+ if (S_ISCHR(st.st_mode)) {
+ log_warning("Device node '%s' is a character device, but block device needed.", p);
+ return -ENOTBLK;
+ } else if (S_ISBLK(st.st_mode))
*ret = st.st_rdev;
else if (major(st.st_dev) != 0)
*ret = st.st_dev; /* If this is not a device node then use the block device this file is stored on */
@@ -335,67 +435,123 @@ static int lookup_block_device(const char *p, dev_t *ret) {
return 0;
}
-static int whitelist_device(const char *path, const char *node, const char *acc) {
- char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4];
- struct stat st;
- bool ignore_notfound;
+static int whitelist_device(BPFProgram *prog, const char *path, const char *node, const char *acc) {
+ struct stat st = {};
int r;
assert(path);
assert(acc);
- if (node[0] == '-') {
- /* Non-existent paths starting with "-" must be silently ignored */
- node++;
- ignore_notfound = true;
- } else
- ignore_notfound = false;
+ /* Some special handling for /dev/block/%u:%u, /dev/char/%u:%u, /run/systemd/inaccessible/chr and
+ * /run/systemd/inaccessible/blk paths. Instead of stat()ing these we parse out the major/minor directly. This
+ * means clients can use these path without the device node actually around */
+ r = device_path_parse_major_minor(node, &st.st_mode, &st.st_rdev);
+ if (r < 0) {
+ if (r != -ENODEV)
+ return log_warning_errno(r, "Couldn't parse major/minor from device path '%s': %m", node);
- if (stat(node, &st) < 0) {
- if (errno == ENOENT && ignore_notfound)
- return 0;
+ if (stat(node, &st) < 0)
+ return log_warning_errno(errno, "Couldn't stat device %s: %m", node);
- return log_warning_errno(errno, "Couldn't stat device %s: %m", node);
+ if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
+ log_warning("%s is not a device.", node);
+ return -ENODEV;
+ }
}
- if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
- log_warning("%s is not a device.", node);
- return -ENODEV;
- }
+ if (cg_all_unified() > 0) {
+ if (!prog)
+ return 0;
- sprintf(buf,
- "%c %u:%u %s",
- S_ISCHR(st.st_mode) ? 'c' : 'b',
- major(st.st_rdev), minor(st.st_rdev),
- acc);
+ return cgroup_bpf_whitelist_device(prog, S_ISCHR(st.st_mode) ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK,
+ major(st.st_rdev), minor(st.st_rdev), acc);
- r = cg_set_attribute("devices", path, "devices.allow", buf);
- if (r < 0)
- log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set devices.allow on %s: %m", path);
+ } else {
+ char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4];
- return r;
+ sprintf(buf,
+ "%c %u:%u %s",
+ S_ISCHR(st.st_mode) ? 'c' : 'b',
+ major(st.st_rdev), minor(st.st_rdev),
+ acc);
+
+ /* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore EINVAL here. */
+
+ r = cg_set_attribute("devices", path, "devices.allow", buf);
+ if (r < 0)
+ return log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING,
+ r, "Failed to set devices.allow on %s: %m", path);
+
+ return 0;
+ }
}
-static int whitelist_major(const char *path, const char *name, char type, const char *acc) {
+static int whitelist_major(BPFProgram *prog, const char *path, const char *name, char type, const char *acc) {
_cleanup_fclose_ FILE *f = NULL;
- char line[LINE_MAX];
+ char buf[2+DECIMAL_STR_MAX(unsigned)+3+4];
bool good = false;
+ unsigned maj;
int r;
assert(path);
assert(acc);
assert(IN_SET(type, 'b', 'c'));
+ if (streq(name, "*")) {
+ /* If the name is a wildcard, then apply this list to all devices of this type */
+
+ if (cg_all_unified() > 0) {
+ if (!prog)
+ return 0;
+
+ (void) cgroup_bpf_whitelist_class(prog, type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK, acc);
+ } else {
+ xsprintf(buf, "%c *:* %s", type, acc);
+
+ r = cg_set_attribute("devices", path, "devices.allow", buf);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set devices.allow on %s: %m", path);
+ return 0;
+ }
+ }
+
+ if (safe_atou(name, &maj) >= 0 && DEVICE_MAJOR_VALID(maj)) {
+ /* The name is numeric and suitable as major. In that case, let's take is major, and create the entry
+ * directly */
+
+ if (cg_all_unified() > 0) {
+ if (!prog)
+ return 0;
+
+ (void) cgroup_bpf_whitelist_major(prog,
+ type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK,
+ maj, acc);
+ } else {
+ xsprintf(buf, "%c %u:* %s", type, maj, acc);
+
+ r = cg_set_attribute("devices", path, "devices.allow", buf);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set devices.allow on %s: %m", path);
+ }
+
+ return 0;
+ }
+
f = fopen("/proc/devices", "re");
if (!f)
return log_warning_errno(errno, "Cannot open /proc/devices to resolve %s (%c): %m", name, type);
- FOREACH_LINE(line, f, goto fail) {
- char buf[2+DECIMAL_STR_MAX(unsigned)+3+4], *p, *w;
- unsigned maj;
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *w, *p;
- truncate_nl(line);
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to read /proc/devices: %m");
+ if (r == 0)
+ break;
if (type == 'c' && streq(line, "Character devices:")) {
good = true;
@@ -434,22 +590,31 @@ static int whitelist_major(const char *path, const char *name, char type, const
if (fnmatch(name, w, 0) != 0)
continue;
- sprintf(buf,
- "%c %u:* %s",
- type,
- maj,
- acc);
+ if (cg_all_unified() > 0) {
+ if (!prog)
+ continue;
- r = cg_set_attribute("devices", path, "devices.allow", buf);
- if (r < 0)
- log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set devices.allow on %s: %m", path);
+ (void) cgroup_bpf_whitelist_major(prog,
+ type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK,
+ maj, acc);
+ } else {
+ sprintf(buf,
+ "%c %u:* %s",
+ type,
+ maj,
+ acc);
+
+ /* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore EINVAL
+ * here. */
+
+ r = cg_set_attribute("devices", path, "devices.allow", buf);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING,
+ r, "Failed to set devices.allow on %s: %m", path);
+ }
}
return 0;
-
-fail:
- return log_warning_errno(errno, "Failed to read /proc/devices: %m");
}
static bool cgroup_context_has_cpu_weight(CGroupContext *c) {
@@ -482,53 +647,42 @@ static uint64_t cgroup_context_cpu_shares(CGroupContext *c, ManagerState state)
return CGROUP_CPU_SHARES_DEFAULT;
}
-static void cgroup_apply_unified_cpu_config(Unit *u, uint64_t weight, uint64_t quota) {
- char buf[MAX(DECIMAL_STR_MAX(uint64_t) + 1, (DECIMAL_STR_MAX(usec_t) + 1) * 2)];
- int r;
+static void cgroup_apply_unified_cpu_weight(Unit *u, uint64_t weight) {
+ char buf[DECIMAL_STR_MAX(uint64_t) + 2];
xsprintf(buf, "%" PRIu64 "\n", weight);
- r = cg_set_attribute("cpu", u->cgroup_path, "cpu.weight", buf);
- if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set cpu.weight: %m");
+ (void) set_attribute_and_warn(u, "cpu", "cpu.weight", buf);
+}
+
+static void cgroup_apply_unified_cpu_quota(Unit *u, usec_t quota) {
+ char buf[(DECIMAL_STR_MAX(usec_t) + 1) * 2 + 1];
if (quota != USEC_INFINITY)
xsprintf(buf, USEC_FMT " " USEC_FMT "\n",
quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC, CGROUP_CPU_QUOTA_PERIOD_USEC);
else
xsprintf(buf, "max " USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
-
- r = cg_set_attribute("cpu", u->cgroup_path, "cpu.max", buf);
-
- if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set cpu.max: %m");
+ (void) set_attribute_and_warn(u, "cpu", "cpu.max", buf);
}
-static void cgroup_apply_legacy_cpu_config(Unit *u, uint64_t shares, uint64_t quota) {
- char buf[MAX(DECIMAL_STR_MAX(uint64_t), DECIMAL_STR_MAX(usec_t)) + 1];
- int r;
+static void cgroup_apply_legacy_cpu_shares(Unit *u, uint64_t shares) {
+ char buf[DECIMAL_STR_MAX(uint64_t) + 2];
xsprintf(buf, "%" PRIu64 "\n", shares);
- r = cg_set_attribute("cpu", u->cgroup_path, "cpu.shares", buf);
- if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set cpu.shares: %m");
+ (void) set_attribute_and_warn(u, "cpu", "cpu.shares", buf);
+}
+
+static void cgroup_apply_legacy_cpu_quota(Unit *u, usec_t quota) {
+ char buf[DECIMAL_STR_MAX(usec_t) + 2];
xsprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
- r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_period_us", buf);
- if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set cpu.cfs_period_us: %m");
+ (void) set_attribute_and_warn(u, "cpu", "cpu.cfs_period_us", buf);
if (quota != USEC_INFINITY) {
xsprintf(buf, USEC_FMT "\n", quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC);
- r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_quota_us", buf);
+ (void) set_attribute_and_warn(u, "cpu", "cpu.cfs_quota_us", buf);
} else
- r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_quota_us", "-1");
- if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set cpu.cfs_quota_us: %m");
+ (void) set_attribute_and_warn(u, "cpu", "cpu.cfs_quota_us", "-1\n");
}
static uint64_t cgroup_cpu_shares_to_weight(uint64_t shares) {
@@ -546,6 +700,7 @@ static bool cgroup_context_has_io_config(CGroupContext *c) {
c->io_weight != CGROUP_WEIGHT_INVALID ||
c->startup_io_weight != CGROUP_WEIGHT_INVALID ||
c->io_device_weights ||
+ c->io_device_latencies ||
c->io_device_limits;
}
@@ -597,10 +752,7 @@ static void cgroup_apply_io_device_weight(Unit *u, const char *dev_path, uint64_
return;
xsprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), io_weight);
- r = cg_set_attribute("io", u->cgroup_path, "io.weight", buf);
- if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set io.weight: %m");
+ (void) set_attribute_and_warn(u, "io", "io.weight", buf);
}
static void cgroup_apply_blkio_device_weight(Unit *u, const char *dev_path, uint64_t blkio_weight) {
@@ -613,10 +765,24 @@ static void cgroup_apply_blkio_device_weight(Unit *u, const char *dev_path, uint
return;
xsprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), blkio_weight);
- r = cg_set_attribute("blkio", u->cgroup_path, "blkio.weight_device", buf);
+ (void) set_attribute_and_warn(u, "blkio", "blkio.weight_device", buf);
+}
+
+static void cgroup_apply_io_device_latency(Unit *u, const char *dev_path, usec_t target) {
+ char buf[DECIMAL_STR_MAX(dev_t)*2+2+7+DECIMAL_STR_MAX(uint64_t)+1];
+ dev_t dev;
+ int r;
+
+ r = lookup_block_device(dev_path, &dev);
if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set blkio.weight_device: %m");
+ return;
+
+ if (target != USEC_INFINITY)
+ xsprintf(buf, "%u:%u target=%" PRIu64 "\n", major(dev), minor(dev), target);
+ else
+ xsprintf(buf, "%u:%u target=max\n", major(dev), minor(dev));
+
+ (void) set_attribute_and_warn(u, "io", "io.latency", buf);
}
static void cgroup_apply_io_device_limit(Unit *u, const char *dev_path, uint64_t *limits) {
@@ -639,10 +805,7 @@ static void cgroup_apply_io_device_limit(Unit *u, const char *dev_path, uint64_t
xsprintf(buf, "%u:%u rbps=%s wbps=%s riops=%s wiops=%s\n", major(dev), minor(dev),
limit_bufs[CGROUP_IO_RBPS_MAX], limit_bufs[CGROUP_IO_WBPS_MAX],
limit_bufs[CGROUP_IO_RIOPS_MAX], limit_bufs[CGROUP_IO_WIOPS_MAX]);
- r = cg_set_attribute("io", u->cgroup_path, "io.max", buf);
- if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set io.max: %m");
+ (void) set_attribute_and_warn(u, "io", "io.max", buf);
}
static void cgroup_apply_blkio_device_limit(Unit *u, const char *dev_path, uint64_t rbps, uint64_t wbps) {
@@ -655,33 +818,23 @@ static void cgroup_apply_blkio_device_limit(Unit *u, const char *dev_path, uint6
return;
sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), rbps);
- r = cg_set_attribute("blkio", u->cgroup_path, "blkio.throttle.read_bps_device", buf);
- if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set blkio.throttle.read_bps_device: %m");
+ (void) set_attribute_and_warn(u, "blkio", "blkio.throttle.read_bps_device", buf);
sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), wbps);
- r = cg_set_attribute("blkio", u->cgroup_path, "blkio.throttle.write_bps_device", buf);
- if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set blkio.throttle.write_bps_device: %m");
+ (void) set_attribute_and_warn(u, "blkio", "blkio.throttle.write_bps_device", buf);
}
static bool cgroup_context_has_unified_memory_config(CGroupContext *c) {
- return c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX || c->memory_swap_max != CGROUP_LIMIT_MAX;
+ return c->memory_min > 0 || c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX || c->memory_swap_max != CGROUP_LIMIT_MAX;
}
static void cgroup_apply_unified_memory_limit(Unit *u, const char *file, uint64_t v) {
- char buf[DECIMAL_STR_MAX(uint64_t) + 1] = "max";
- int r;
+ char buf[DECIMAL_STR_MAX(uint64_t) + 1] = "max\n";
if (v != CGROUP_LIMIT_MAX)
xsprintf(buf, "%" PRIu64 "\n", v);
- r = cg_set_attribute("memory", u->cgroup_path, file, buf);
- if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set %s: %m", file);
+ (void) set_attribute_and_warn(u, "memory", file, buf);
}
static void cgroup_apply_firewall(Unit *u) {
@@ -698,130 +851,133 @@ static void cgroup_apply_firewall(Unit *u) {
static void cgroup_context_apply(
Unit *u,
CGroupMask apply_mask,
- bool apply_bpf,
ManagerState state) {
const char *path;
CGroupContext *c;
- bool is_root;
+ bool is_host_root, is_local_root;
int r;
assert(u);
/* Nothing to do? Exit early! */
- if (apply_mask == 0 && !apply_bpf)
+ if (apply_mask == 0)
return;
- /* Some cgroup attributes are not supported on the root cgroup, hence silently ignore */
- is_root = unit_has_root_cgroup(u);
+ /* Some cgroup attributes are not supported on the host root cgroup, hence silently ignore them here. And other
+ * attributes should only be managed for cgroups further down the tree. */
+ is_local_root = unit_has_name(u, SPECIAL_ROOT_SLICE);
+ is_host_root = unit_has_host_root_cgroup(u);
assert_se(c = unit_get_cgroup_context(u));
assert_se(path = u->cgroup_path);
- if (is_root) /* Make sure we don't try to display messages with an empty path. */
+ if (is_local_root) /* Make sure we don't try to display messages with an empty path. */
path = "/";
- /* We generally ignore errors caused by read-only mounted
- * cgroup trees (assuming we are running in a container then),
- * and missing cgroups, i.e. EROFS and ENOENT. */
-
- if ((apply_mask & CGROUP_MASK_CPU) && !is_root) {
- bool has_weight, has_shares;
+ /* We generally ignore errors caused by read-only mounted cgroup trees (assuming we are running in a container
+ * then), and missing cgroups, i.e. EROFS and ENOENT. */
- has_weight = cgroup_context_has_cpu_weight(c);
- has_shares = cgroup_context_has_cpu_shares(c);
+ /* In fully unified mode these attributes don't exist on the host cgroup root. On legacy the weights exist, but
+ * setting the weight makes very little sense on the host root cgroup, as there are no other cgroups at this
+ * level. The quota exists there too, but any attempt to write to it is refused with EINVAL. Inside of
+ * containers we want to leave control of these to the container manager (and if cgroupsv2 delegation is used
+ * we couldn't even write to them if we wanted to). */
+ if ((apply_mask & CGROUP_MASK_CPU) && !is_local_root) {
if (cg_all_unified() > 0) {
uint64_t weight;
- if (has_weight)
+ if (cgroup_context_has_cpu_weight(c))
weight = cgroup_context_cpu_weight(c, state);
- else if (has_shares) {
- uint64_t shares = cgroup_context_cpu_shares(c, state);
+ else if (cgroup_context_has_cpu_shares(c)) {
+ uint64_t shares;
+ shares = cgroup_context_cpu_shares(c, state);
weight = cgroup_cpu_shares_to_weight(shares);
- log_cgroup_compat(u, "Applying [Startup]CpuShares %" PRIu64 " as [Startup]CpuWeight %" PRIu64 " on %s",
+ log_cgroup_compat(u, "Applying [Startup]CPUShares=%" PRIu64 " as [Startup]CPUWeight=%" PRIu64 " on %s",
shares, weight, path);
} else
weight = CGROUP_WEIGHT_DEFAULT;
- cgroup_apply_unified_cpu_config(u, weight, c->cpu_quota_per_sec_usec);
+ cgroup_apply_unified_cpu_weight(u, weight);
+ cgroup_apply_unified_cpu_quota(u, c->cpu_quota_per_sec_usec);
+
} else {
uint64_t shares;
- if (has_weight) {
- uint64_t weight = cgroup_context_cpu_weight(c, state);
+ if (cgroup_context_has_cpu_weight(c)) {
+ uint64_t weight;
+ weight = cgroup_context_cpu_weight(c, state);
shares = cgroup_cpu_weight_to_shares(weight);
- log_cgroup_compat(u, "Applying [Startup]CpuWeight %" PRIu64 " as [Startup]CpuShares %" PRIu64 " on %s",
+ log_cgroup_compat(u, "Applying [Startup]CPUWeight=%" PRIu64 " as [Startup]CPUShares=%" PRIu64 " on %s",
weight, shares, path);
- } else if (has_shares)
+ } else if (cgroup_context_has_cpu_shares(c))
shares = cgroup_context_cpu_shares(c, state);
else
shares = CGROUP_CPU_SHARES_DEFAULT;
- cgroup_apply_legacy_cpu_config(u, shares, c->cpu_quota_per_sec_usec);
+ cgroup_apply_legacy_cpu_shares(u, shares);
+ cgroup_apply_legacy_cpu_quota(u, c->cpu_quota_per_sec_usec);
}
}
- if (apply_mask & CGROUP_MASK_IO) {
- bool has_io = cgroup_context_has_io_config(c);
- bool has_blockio = cgroup_context_has_blockio_config(c);
+ /* The 'io' controller attributes are not exported on the host's root cgroup (being a pure cgroupsv2
+ * controller), and in case of containers we want to leave control of these attributes to the container manager
+ * (and we couldn't access that stuff anyway, even if we tried if proper delegation is used). */
+ if ((apply_mask & CGROUP_MASK_IO) && !is_local_root) {
+ char buf[8+DECIMAL_STR_MAX(uint64_t)+1];
+ bool has_io, has_blockio;
+ uint64_t weight;
- if (!is_root) {
- char buf[8+DECIMAL_STR_MAX(uint64_t)+1];
- uint64_t weight;
+ has_io = cgroup_context_has_io_config(c);
+ has_blockio = cgroup_context_has_blockio_config(c);
- if (has_io)
- weight = cgroup_context_io_weight(c, state);
- else if (has_blockio) {
- uint64_t blkio_weight = cgroup_context_blkio_weight(c, state);
+ if (has_io)
+ weight = cgroup_context_io_weight(c, state);
+ else if (has_blockio) {
+ uint64_t blkio_weight;
- weight = cgroup_weight_blkio_to_io(blkio_weight);
+ blkio_weight = cgroup_context_blkio_weight(c, state);
+ weight = cgroup_weight_blkio_to_io(blkio_weight);
- log_cgroup_compat(u, "Applying [Startup]BlockIOWeight %" PRIu64 " as [Startup]IOWeight %" PRIu64,
- blkio_weight, weight);
- } else
- weight = CGROUP_WEIGHT_DEFAULT;
+ log_cgroup_compat(u, "Applying [Startup]BlockIOWeight=%" PRIu64 " as [Startup]IOWeight=%" PRIu64,
+ blkio_weight, weight);
+ } else
+ weight = CGROUP_WEIGHT_DEFAULT;
- xsprintf(buf, "default %" PRIu64 "\n", weight);
- r = cg_set_attribute("io", path, "io.weight", buf);
- if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set io.weight: %m");
+ xsprintf(buf, "default %" PRIu64 "\n", weight);
+ (void) set_attribute_and_warn(u, "io", "io.weight", buf);
- if (has_io) {
- CGroupIODeviceWeight *w;
+ if (has_io) {
+ CGroupIODeviceLatency *latency;
+ CGroupIODeviceLimit *limit;
+ CGroupIODeviceWeight *w;
- /* FIXME: no way to reset this list */
- LIST_FOREACH(device_weights, w, c->io_device_weights)
- cgroup_apply_io_device_weight(u, w->path, w->weight);
- } else if (has_blockio) {
- CGroupBlockIODeviceWeight *w;
+ LIST_FOREACH(device_weights, w, c->io_device_weights)
+ cgroup_apply_io_device_weight(u, w->path, w->weight);
- /* FIXME: no way to reset this list */
- LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
- weight = cgroup_weight_blkio_to_io(w->weight);
+ LIST_FOREACH(device_limits, limit, c->io_device_limits)
+ cgroup_apply_io_device_limit(u, limit->path, limit->limits);
- log_cgroup_compat(u, "Applying BlockIODeviceWeight %" PRIu64 " as IODeviceWeight %" PRIu64 " for %s",
- w->weight, weight, w->path);
+ LIST_FOREACH(device_latencies, latency, c->io_device_latencies)
+ cgroup_apply_io_device_latency(u, latency->path, latency->target_usec);
- cgroup_apply_io_device_weight(u, w->path, weight);
- }
- }
- }
+ } else if (has_blockio) {
+ CGroupBlockIODeviceWeight *w;
+ CGroupBlockIODeviceBandwidth *b;
- /* Apply limits and free ones without config. */
- if (has_io) {
- CGroupIODeviceLimit *l;
+ LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
+ weight = cgroup_weight_blkio_to_io(w->weight);
- LIST_FOREACH(device_limits, l, c->io_device_limits)
- cgroup_apply_io_device_limit(u, l->path, l->limits);
+ log_cgroup_compat(u, "Applying BlockIODeviceWeight=%" PRIu64 " as IODeviceWeight=%" PRIu64 " for %s",
+ w->weight, weight, w->path);
- } else if (has_blockio) {
- CGroupBlockIODeviceBandwidth *b;
+ cgroup_apply_io_device_weight(u, w->path, weight);
+ }
LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
uint64_t limits[_CGROUP_IO_LIMIT_TYPE_MAX];
@@ -833,7 +989,7 @@ static void cgroup_context_apply(
limits[CGROUP_IO_RBPS_MAX] = b->rbps;
limits[CGROUP_IO_WBPS_MAX] = b->wbps;
- log_cgroup_compat(u, "Applying BlockIO{Read|Write}Bandwidth %" PRIu64 " %" PRIu64 " as IO{Read|Write}BandwidthMax for %s",
+ log_cgroup_compat(u, "Applying BlockIO{Read|Write}Bandwidth=%" PRIu64 " %" PRIu64 " as IO{Read|Write}BandwidthMax= for %s",
b->rbps, b->wbps, b->path);
cgroup_apply_io_device_limit(u, b->path, limits);
@@ -842,19 +998,24 @@ static void cgroup_context_apply(
}
if (apply_mask & CGROUP_MASK_BLKIO) {
- bool has_io = cgroup_context_has_io_config(c);
- bool has_blockio = cgroup_context_has_blockio_config(c);
+ bool has_io, has_blockio;
- if (!is_root) {
+ has_io = cgroup_context_has_io_config(c);
+ has_blockio = cgroup_context_has_blockio_config(c);
+
+ /* Applying a 'weight' never makes sense for the host root cgroup, and for containers this should be
+ * left to our container manager, too. */
+ if (!is_local_root) {
char buf[DECIMAL_STR_MAX(uint64_t)+1];
uint64_t weight;
if (has_io) {
- uint64_t io_weight = cgroup_context_io_weight(c, state);
+ uint64_t io_weight;
+ io_weight = cgroup_context_io_weight(c, state);
weight = cgroup_weight_io_to_blkio(cgroup_context_io_weight(c, state));
- log_cgroup_compat(u, "Applying [Startup]IOWeight %" PRIu64 " as [Startup]BlockIOWeight %" PRIu64,
+ log_cgroup_compat(u, "Applying [Startup]IOWeight=%" PRIu64 " as [Startup]BlockIOWeight=%" PRIu64,
io_weight, weight);
} else if (has_blockio)
weight = cgroup_context_blkio_weight(c, state);
@@ -862,19 +1023,15 @@ static void cgroup_context_apply(
weight = CGROUP_BLKIO_WEIGHT_DEFAULT;
xsprintf(buf, "%" PRIu64 "\n", weight);
- r = cg_set_attribute("blkio", path, "blkio.weight", buf);
- if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set blkio.weight: %m");
+ (void) set_attribute_and_warn(u, "blkio", "blkio.weight", buf);
if (has_io) {
CGroupIODeviceWeight *w;
- /* FIXME: no way to reset this list */
LIST_FOREACH(device_weights, w, c->io_device_weights) {
weight = cgroup_weight_io_to_blkio(w->weight);
- log_cgroup_compat(u, "Applying IODeviceWeight %" PRIu64 " as BlockIODeviceWeight %" PRIu64 " for %s",
+ log_cgroup_compat(u, "Applying IODeviceWeight=%" PRIu64 " as BlockIODeviceWeight=%" PRIu64 " for %s",
w->weight, weight, w->path);
cgroup_apply_blkio_device_weight(u, w->path, weight);
@@ -882,31 +1039,38 @@ static void cgroup_context_apply(
} else if (has_blockio) {
CGroupBlockIODeviceWeight *w;
- /* FIXME: no way to reset this list */
LIST_FOREACH(device_weights, w, c->blockio_device_weights)
cgroup_apply_blkio_device_weight(u, w->path, w->weight);
}
}
- /* Apply limits and free ones without config. */
- if (has_io) {
- CGroupIODeviceLimit *l;
+ /* The bandwith limits are something that make sense to be applied to the host's root but not container
+ * roots, as there we want the container manager to handle it */
+ if (is_host_root || !is_local_root) {
+ if (has_io) {
+ CGroupIODeviceLimit *l;
- LIST_FOREACH(device_limits, l, c->io_device_limits) {
- log_cgroup_compat(u, "Applying IO{Read|Write}Bandwidth %" PRIu64 " %" PRIu64 " as BlockIO{Read|Write}BandwidthMax for %s",
- l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX], l->path);
+ LIST_FOREACH(device_limits, l, c->io_device_limits) {
+ log_cgroup_compat(u, "Applying IO{Read|Write}Bandwidth=%" PRIu64 " %" PRIu64 " as BlockIO{Read|Write}BandwidthMax= for %s",
+ l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX], l->path);
- cgroup_apply_blkio_device_limit(u, l->path, l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX]);
- }
- } else if (has_blockio) {
- CGroupBlockIODeviceBandwidth *b;
+ cgroup_apply_blkio_device_limit(u, l->path, l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX]);
+ }
+ } else if (has_blockio) {
+ CGroupBlockIODeviceBandwidth *b;
- LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths)
- cgroup_apply_blkio_device_limit(u, b->path, b->rbps, b->wbps);
+ LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths)
+ cgroup_apply_blkio_device_limit(u, b->path, b->rbps, b->wbps);
+ }
}
}
- if ((apply_mask & CGROUP_MASK_MEMORY) && !is_root) {
+ /* In unified mode 'memory' attributes do not exist on the root cgroup. In legacy mode 'memory.limit_in_bytes'
+ * exists on the root cgroup, but any writes to it are refused with EINVAL. And if we run in a container we
+ * want to leave control to the container manager (and if proper cgroupsv2 delegation is used we couldn't even
+ * write to this if we wanted to.) */
+ if ((apply_mask & CGROUP_MASK_MEMORY) && !is_local_root) {
+
if (cg_all_unified() > 0) {
uint64_t max, swap_max = CGROUP_LIMIT_MAX;
@@ -917,20 +1081,22 @@ static void cgroup_context_apply(
max = c->memory_limit;
if (max != CGROUP_LIMIT_MAX)
- log_cgroup_compat(u, "Applying MemoryLimit %" PRIu64 " as MemoryMax", max);
+ log_cgroup_compat(u, "Applying MemoryLimit=%" PRIu64 " as MemoryMax=", max);
}
+ cgroup_apply_unified_memory_limit(u, "memory.min", c->memory_min);
cgroup_apply_unified_memory_limit(u, "memory.low", c->memory_low);
cgroup_apply_unified_memory_limit(u, "memory.high", c->memory_high);
cgroup_apply_unified_memory_limit(u, "memory.max", max);
cgroup_apply_unified_memory_limit(u, "memory.swap.max", swap_max);
+
} else {
char buf[DECIMAL_STR_MAX(uint64_t) + 1];
uint64_t val;
if (cgroup_context_has_unified_memory_config(c)) {
val = c->memory_max;
- log_cgroup_compat(u, "Applying MemoryMax %" PRIi64 " as MemoryLimit", val);
+ log_cgroup_compat(u, "Applying MemoryMax=%" PRIi64 " as MemoryLimit=", val);
} else
val = c->memory_limit;
@@ -939,27 +1105,33 @@ static void cgroup_context_apply(
else
xsprintf(buf, "%" PRIu64 "\n", val);
- r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf);
- if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set memory.limit_in_bytes: %m");
+ (void) set_attribute_and_warn(u, "memory", "memory.limit_in_bytes", buf);
}
}
- if ((apply_mask & CGROUP_MASK_DEVICES) && !is_root) {
+ /* On cgroupsv2 we can apply BPF everywhere. On cgroupsv1 we apply it everywhere except for the root of
+ * containers, where we leave this to the manager */
+ if ((apply_mask & (CGROUP_MASK_DEVICES | CGROUP_MASK_BPF_DEVICES)) &&
+ (is_host_root || cg_all_unified() > 0 || !is_local_root)) {
+ _cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
CGroupDeviceAllow *a;
- /* Changing the devices list of a populated cgroup
- * might result in EINVAL, hence ignore EINVAL
- * here. */
+ if (cg_all_unified() > 0) {
+ r = cgroup_init_device_bpf(&prog, c->device_policy, c->device_allow);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Failed to initialize device control bpf program: %m");
+ } else {
+ /* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore EINVAL
+ * here. */
- if (c->device_allow || c->device_policy != CGROUP_AUTO)
- r = cg_set_attribute("devices", path, "devices.deny", "a");
- else
- r = cg_set_attribute("devices", path, "devices.allow", "a");
- if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to reset devices.list: %m");
+ if (c->device_allow || c->device_policy != CGROUP_AUTO)
+ r = cg_set_attribute("devices", path, "devices.deny", "a");
+ else
+ r = cg_set_attribute("devices", path, "devices.allow", "a");
+ if (r < 0)
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to reset devices.allow/devices.deny: %m");
+ }
if (c->device_policy == CGROUP_CLOSED ||
(c->device_policy == CGROUP_AUTO && c->device_allow)) {
@@ -972,16 +1144,16 @@ static void cgroup_context_apply(
"/dev/tty\0" "rwm\0"
"/dev/ptmx\0" "rwm\0"
/* Allow /run/systemd/inaccessible/{chr,blk} devices for mapping InaccessiblePaths */
- "-/run/systemd/inaccessible/chr\0" "rwm\0"
- "-/run/systemd/inaccessible/blk\0" "rwm\0";
+ "/run/systemd/inaccessible/chr\0" "rwm\0"
+ "/run/systemd/inaccessible/blk\0" "rwm\0";
const char *x, *y;
NULSTR_FOREACH_PAIR(x, y, auto_devices)
- whitelist_device(path, x, y);
+ (void) whitelist_device(prog, path, x, y);
/* PTS (/dev/pts) devices may not be duplicated, but accessed */
- whitelist_major(path, "pts", 'c', "rw");
+ (void) whitelist_major(prog, path, "pts", 'c', "rw");
}
LIST_FOREACH(device_allow, a, c->device_allow) {
@@ -1001,19 +1173,31 @@ static void cgroup_context_apply(
acc[k++] = 0;
if (path_startswith(a->path, "/dev/"))
- whitelist_device(path, a->path, acc);
+ (void) whitelist_device(prog, path, a->path, acc);
else if ((val = startswith(a->path, "block-")))
- whitelist_major(path, val, 'b', acc);
+ (void) whitelist_major(prog, path, val, 'b', acc);
else if ((val = startswith(a->path, "char-")))
- whitelist_major(path, val, 'c', acc);
+ (void) whitelist_major(prog, path, val, 'c', acc);
else
- log_unit_debug(u, "Ignoring device %s while writing cgroup attribute.", a->path);
+ log_unit_debug(u, "Ignoring device '%s' while writing cgroup attribute.", a->path);
+ }
+
+ r = cgroup_apply_device_bpf(u, prog, c->device_policy, c->device_allow);
+ if (r < 0) {
+ static bool warned = false;
+
+ log_full_errno(warned ? LOG_DEBUG : LOG_WARNING, r,
+ "Unit %s configures device ACL, but the local system doesn't seem to support the BPF-based device controller.\n"
+ "Proceeding WITHOUT applying ACL (all devices will be accessible)!\n"
+ "(This warning is only shown for the first loaded unit using device ACL.)", u->id);
+
+ warned = true;
}
}
if (apply_mask & CGROUP_MASK_PIDS) {
- if (is_root) {
+ if (is_host_root) {
/* So, the "pids" controller does not expose anything on the root cgroup, in order not to
* replicate knobs exposed elsewhere needlessly. We abstract this away here however, and when
* the knobs of the root cgroup are modified propagate this to the relevant sysctls. There's a
@@ -1034,39 +1218,68 @@ static void cgroup_context_apply(
r = procfs_tasks_set_limit(TASKS_MAX);
else
r = 0;
-
if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ log_unit_full(u, LOG_LEVEL_CGROUP_WRITE(r), r,
"Failed to write to tasks limit sysctls: %m");
+ }
- } else {
+ /* The attribute itself is not available on the host root cgroup, and in the container case we want to
+ * leave it for the container manager. */
+ if (!is_local_root) {
if (c->tasks_max != CGROUP_LIMIT_MAX) {
char buf[DECIMAL_STR_MAX(uint64_t) + 2];
sprintf(buf, "%" PRIu64 "\n", c->tasks_max);
- r = cg_set_attribute("pids", path, "pids.max", buf);
+ (void) set_attribute_and_warn(u, "pids", "pids.max", buf);
} else
- r = cg_set_attribute("pids", path, "pids.max", "max");
- if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set pids.max: %m");
+ (void) set_attribute_and_warn(u, "pids", "pids.max", "max\n");
}
}
- if (apply_bpf)
+ if (apply_mask & CGROUP_MASK_BPF_FIREWALL)
cgroup_apply_firewall(u);
}
-CGroupMask cgroup_context_get_mask(CGroupContext *c) {
+static bool unit_get_needs_bpf_firewall(Unit *u) {
+ CGroupContext *c;
+ Unit *p;
+ assert(u);
+
+ c = unit_get_cgroup_context(u);
+ if (!c)
+ return false;
+
+ if (c->ip_accounting ||
+ c->ip_address_allow ||
+ c->ip_address_deny)
+ return true;
+
+ /* If any parent slice has an IP access list defined, it applies too */
+ for (p = UNIT_DEREF(u->slice); p; p = UNIT_DEREF(p->slice)) {
+ c = unit_get_cgroup_context(p);
+ if (!c)
+ return false;
+
+ if (c->ip_address_allow ||
+ c->ip_address_deny)
+ return true;
+ }
+
+ return false;
+}
+
+static CGroupMask cgroup_context_get_mask(CGroupContext *c) {
CGroupMask mask = 0;
- /* Figure out which controllers we need */
+ /* Figure out which controllers we need, based on the cgroup context object */
+
+ if (c->cpu_accounting)
+ mask |= get_cpu_accounting_mask();
- if (c->cpu_accounting ||
- cgroup_context_has_cpu_weight(c) ||
+ if (cgroup_context_has_cpu_weight(c) ||
cgroup_context_has_cpu_shares(c) ||
c->cpu_quota_per_sec_usec != USEC_INFINITY)
- mask |= CGROUP_MASK_CPUACCT | CGROUP_MASK_CPU;
+ mask |= CGROUP_MASK_CPU;
if (cgroup_context_has_io_config(c) || cgroup_context_has_blockio_config(c))
mask |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO;
@@ -1078,25 +1291,41 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) {
if (c->device_allow ||
c->device_policy != CGROUP_AUTO)
- mask |= CGROUP_MASK_DEVICES;
+ mask |= CGROUP_MASK_DEVICES | CGROUP_MASK_BPF_DEVICES;
if (c->tasks_accounting ||
c->tasks_max != CGROUP_LIMIT_MAX)
mask |= CGROUP_MASK_PIDS;
+ return CGROUP_MASK_EXTEND_JOINED(mask);
+}
+
+static CGroupMask unit_get_bpf_mask(Unit *u) {
+ CGroupMask mask = 0;
+
+ /* Figure out which controllers we need, based on the cgroup context, possibly taking into account children
+ * too. */
+
+ if (unit_get_needs_bpf_firewall(u))
+ mask |= CGROUP_MASK_BPF_FIREWALL;
+
return mask;
}
CGroupMask unit_get_own_mask(Unit *u) {
CGroupContext *c;
- /* Returns the mask of controllers the unit needs for itself */
+ /* Returns the mask of controllers the unit needs for itself. If a unit is not properly loaded, return an empty
+ * mask, as we shouldn't reflect it in the cgroup hierarchy then. */
+
+ if (u->load_state != UNIT_LOADED)
+ return 0;
c = unit_get_cgroup_context(u);
if (!c)
return 0;
- return cgroup_context_get_mask(c) | unit_get_delegate_mask(u);
+ return (cgroup_context_get_mask(c) | unit_get_bpf_mask(u) | unit_get_delegate_mask(u)) & ~unit_get_ancestor_disable_mask(u);
}
CGroupMask unit_get_delegate_mask(Unit *u) {
@@ -1119,7 +1348,7 @@ CGroupMask unit_get_delegate_mask(Unit *u) {
}
assert_se(c = unit_get_cgroup_context(u));
- return c->delegate_controllers;
+ return CGROUP_MASK_EXTEND_JOINED(c->delegate_controllers);
}
CGroupMask unit_get_members_mask(Unit *u) {
@@ -1128,7 +1357,7 @@ CGroupMask unit_get_members_mask(Unit *u) {
/* Returns the mask of controllers all of the unit's children require, merged */
if (u->cgroup_members_mask_valid)
- return u->cgroup_members_mask;
+ return u->cgroup_members_mask; /* Use cached value if possible */
u->cgroup_members_mask = 0;
@@ -1138,14 +1367,8 @@ CGroupMask unit_get_members_mask(Unit *u) {
Iterator i;
HASHMAP_FOREACH_KEY(v, member, u->dependencies[UNIT_BEFORE], i) {
-
- if (member == u)
- continue;
-
- if (UNIT_DEREF(member->slice) != u)
- continue;
-
- u->cgroup_members_mask |= unit_get_subtree_mask(member); /* note that this calls ourselves again, for the children */
+ if (UNIT_DEREF(member->slice) == u)
+ u->cgroup_members_mask |= unit_get_subtree_mask(member); /* note that this calls ourselves again, for the children */
}
}
@@ -1166,6 +1389,31 @@ CGroupMask unit_get_siblings_mask(Unit *u) {
return unit_get_subtree_mask(u); /* we are the top-level slice */
}
+CGroupMask unit_get_disable_mask(Unit *u) {
+ CGroupContext *c;
+
+ c = unit_get_cgroup_context(u);
+ if (!c)
+ return 0;
+
+ return c->disable_controllers;
+}
+
+CGroupMask unit_get_ancestor_disable_mask(Unit *u) {
+ CGroupMask mask;
+
+ assert(u);
+ mask = unit_get_disable_mask(u);
+
+ /* Returns the mask of controllers which are marked as forcibly
+ * disabled in any ancestor unit or the unit in question. */
+
+ if (UNIT_ISSET(u->slice))
+ mask |= unit_get_ancestor_disable_mask(UNIT_DEREF(u->slice));
+
+ return mask;
+}
+
CGroupMask unit_get_subtree_mask(Unit *u) {
/* Returns the mask of this subtree, meaning of the group
@@ -1186,6 +1434,7 @@ CGroupMask unit_get_target_mask(Unit *u) {
mask = unit_get_own_mask(u) | unit_get_members_mask(u) | unit_get_siblings_mask(u);
mask &= u->manager->cgroup_supported;
+ mask &= ~unit_get_ancestor_disable_mask(u);
return mask;
}
@@ -1200,85 +1449,19 @@ CGroupMask unit_get_enable_mask(Unit *u) {
mask = unit_get_members_mask(u);
mask &= u->manager->cgroup_supported;
+ mask &= ~unit_get_ancestor_disable_mask(u);
return mask;
}
-bool unit_get_needs_bpf(Unit *u) {
- CGroupContext *c;
- Unit *p;
+void unit_invalidate_cgroup_members_masks(Unit *u) {
assert(u);
- c = unit_get_cgroup_context(u);
- if (!c)
- return false;
+ /* Recurse invalidate the member masks cache all the way up the tree */
+ u->cgroup_members_mask_valid = false;
- if (c->ip_accounting ||
- c->ip_address_allow ||
- c->ip_address_deny)
- return true;
-
- /* If any parent slice has an IP access list defined, it applies too */
- for (p = UNIT_DEREF(u->slice); p; p = UNIT_DEREF(p->slice)) {
- c = unit_get_cgroup_context(p);
- if (!c)
- return false;
-
- if (c->ip_address_allow ||
- c->ip_address_deny)
- return true;
- }
-
- return false;
-}
-
-/* Recurse from a unit up through its containing slices, propagating
- * mask bits upward. A unit is also member of itself. */
-void unit_update_cgroup_members_masks(Unit *u) {
- CGroupMask m;
- bool more;
-
- assert(u);
-
- /* Calculate subtree mask */
- m = unit_get_subtree_mask(u);
-
- /* See if anything changed from the previous invocation. If
- * not, we're done. */
- if (u->cgroup_subtree_mask_valid && m == u->cgroup_subtree_mask)
- return;
-
- more =
- u->cgroup_subtree_mask_valid &&
- ((m & ~u->cgroup_subtree_mask) != 0) &&
- ((~m & u->cgroup_subtree_mask) == 0);
-
- u->cgroup_subtree_mask = m;
- u->cgroup_subtree_mask_valid = true;
-
- if (UNIT_ISSET(u->slice)) {
- Unit *s = UNIT_DEREF(u->slice);
-
- if (more)
- /* There's more set now than before. We
- * propagate the new mask to the parent's mask
- * (not caring if it actually was valid or
- * not). */
-
- s->cgroup_members_mask |= m;
-
- else
- /* There's less set now than before (or we
- * don't know), we need to recalculate
- * everything, so let's invalidate the
- * parent's members mask */
-
- s->cgroup_members_mask_valid = false;
-
- /* And now make sure that this change also hits our
- * grandparents */
- unit_update_cgroup_members_masks(s);
- }
+ if (UNIT_ISSET(u->slice))
+ unit_invalidate_cgroup_members_masks(UNIT_DEREF(u->slice));
}
const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask) {
@@ -1302,7 +1485,7 @@ static const char *migrate_callback(CGroupMask mask, void *userdata) {
return unit_get_realized_cgroup_path(userdata, mask);
}
-char *unit_default_cgroup_path(Unit *u) {
+char *unit_default_cgroup_path(const Unit *u) {
_cleanup_free_ char *escaped = NULL, *slice = NULL;
int r;
@@ -1435,16 +1618,14 @@ static int unit_create_cgroup(
Unit *u,
CGroupMask target_mask,
CGroupMask enable_mask,
- bool needs_bpf) {
+ ManagerState state) {
- CGroupContext *c;
- int r;
bool created;
+ int r;
assert(u);
- c = unit_get_cgroup_context(u);
- if (!c)
+ if (!UNIT_HAS_CGROUP_CONTEXT(u))
return 0;
/* Figure out our cgroup path */
@@ -1456,26 +1637,44 @@ static int unit_create_cgroup(
r = cg_create_everywhere(u->manager->cgroup_supported, target_mask, u->cgroup_path);
if (r < 0)
return log_unit_error_errno(u, r, "Failed to create cgroup %s: %m", u->cgroup_path);
- created = !!r;
+ created = r;
/* Start watching it */
(void) unit_watch_cgroup(u);
/* Preserve enabled controllers in delegated units, adjust others. */
- if (created || !unit_cgroup_delegate(u)) {
+ if (created || !u->cgroup_realized || !unit_cgroup_delegate(u)) {
+ CGroupMask result_mask = 0;
/* Enable all controllers we need */
- r = cg_enable_everywhere(u->manager->cgroup_supported, enable_mask, u->cgroup_path);
+ r = cg_enable_everywhere(u->manager->cgroup_supported, enable_mask, u->cgroup_path, &result_mask);
if (r < 0)
- log_unit_warning_errno(u, r, "Failed to enable controllers on cgroup %s, ignoring: %m",
- u->cgroup_path);
+ log_unit_warning_errno(u, r, "Failed to enable/disable controllers on cgroup %s, ignoring: %m", u->cgroup_path);
+
+ /* If we just turned off a controller, this might release the controller for our parent too, let's
+ * enqueue the parent for re-realization in that case again. */
+ if (UNIT_ISSET(u->slice)) {
+ CGroupMask turned_off;
+
+ turned_off = (u->cgroup_realized ? u->cgroup_enabled_mask & ~result_mask : 0);
+ if (turned_off != 0) {
+ Unit *parent;
+
+ /* Force the parent to propagate the enable mask to the kernel again, by invalidating
+ * the controller we just turned off. */
+
+ for (parent = UNIT_DEREF(u->slice); parent; parent = UNIT_DEREF(parent->slice))
+ unit_invalidate_cgroup(parent, turned_off);
+ }
+ }
+
+ /* Remember what's actually enabled now */
+ u->cgroup_enabled_mask = result_mask;
}
/* Keep track that this is now realized */
u->cgroup_realized = true;
u->cgroup_realized_mask = target_mask;
- u->cgroup_enabled_mask = enable_mask;
- u->cgroup_bpf_state = needs_bpf ? UNIT_CGROUP_BPF_ON : UNIT_CGROUP_BPF_OFF;
if (u->type != UNIT_SLICE && !unit_cgroup_delegate(u)) {
@@ -1487,6 +1686,10 @@ static int unit_create_cgroup(
log_unit_warning_errno(u, r, "Failed to migrate cgroup from to %s, ignoring: %m", u->cgroup_path);
}
+ /* Set attributes */
+ cgroup_context_apply(u, target_mask, state);
+ cgroup_xattr_apply(u);
+
return 0;
}
@@ -1628,42 +1831,69 @@ int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path) {
return r;
}
-static void cgroup_xattr_apply(Unit *u) {
- char ids[SD_ID128_STRING_MAX];
- int r;
+static bool unit_has_mask_realized(
+ Unit *u,
+ CGroupMask target_mask,
+ CGroupMask enable_mask) {
assert(u);
- if (!MANAGER_IS_SYSTEM(u->manager))
- return;
+ /* Returns true if this unit is fully realized. We check four things:
+ *
+ * 1. Whether the cgroup was created at all
+ * 2. Whether the cgroup was created in all the hierarchies we need it to be created in (in case of cgroupsv1)
+ * 3. Whether the cgroup has all the right controllers enabled (in case of cgroupsv2)
+ * 4. Whether the invalidation mask is currently zero
+ *
+ * If you wonder why we mask the target realization and enable mask with CGROUP_MASK_V1/CGROUP_MASK_V2: note
+ * that there are three sets of bitmasks: CGROUP_MASK_V1 (for real cgroupv1 controllers), CGROUP_MASK_V2 (for
+ * real cgroupv2 controllers) and CGROUP_MASK_BPF (for BPF-based pseudo-controllers). Now, cgroup_realized_mask
+ * is only matters for cgroupsv1 controllers, and cgroup_enabled_mask only used for cgroupsv2, and if they
+ * differ in the others, we don't really care. (After all, the cgroup_enabled_mask tracks with controllers are
+ * enabled through cgroup.subtree_control, and since the BPF pseudo-controllers don't show up there, they
+ * simply don't matter. */
- if (sd_id128_is_null(u->invocation_id))
- return;
+ return u->cgroup_realized &&
+ ((u->cgroup_realized_mask ^ target_mask) & CGROUP_MASK_V1) == 0 &&
+ ((u->cgroup_enabled_mask ^ enable_mask) & CGROUP_MASK_V2) == 0 &&
+ u->cgroup_invalidated_mask == 0;
+}
- r = cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path,
- "trusted.invocation_id",
- sd_id128_to_string(u->invocation_id, ids), 32,
- 0);
- if (r < 0)
- log_unit_debug_errno(u, r, "Failed to set invocation ID on control group %s, ignoring: %m", u->cgroup_path);
+static bool unit_has_mask_disables_realized(
+ Unit *u,
+ CGroupMask target_mask,
+ CGroupMask enable_mask) {
+
+ assert(u);
+
+ /* Returns true if all controllers which should be disabled are indeed disabled.
+ *
+ * Unlike unit_has_mask_realized, we don't care what was enabled, only that anything we want to remove is
+ * already removed. */
+
+ return !u->cgroup_realized ||
+ (FLAGS_SET(u->cgroup_realized_mask, target_mask & CGROUP_MASK_V1) &&
+ FLAGS_SET(u->cgroup_enabled_mask, enable_mask & CGROUP_MASK_V2));
}
-static bool unit_has_mask_realized(
+static bool unit_has_mask_enables_realized(
Unit *u,
CGroupMask target_mask,
- CGroupMask enable_mask,
- bool needs_bpf) {
+ CGroupMask enable_mask) {
assert(u);
+ /* Returns true if all controllers which should be enabled are indeed enabled.
+ *
+ * Unlike unit_has_mask_realized, we don't care about the controllers that are not present, only that anything
+ * we want to add is already added. */
+
return u->cgroup_realized &&
- u->cgroup_realized_mask == target_mask &&
- u->cgroup_enabled_mask == enable_mask &&
- ((needs_bpf && u->cgroup_bpf_state == UNIT_CGROUP_BPF_ON) ||
- (!needs_bpf && u->cgroup_bpf_state == UNIT_CGROUP_BPF_OFF));
+ ((u->cgroup_realized_mask | target_mask) & CGROUP_MASK_V1) == (u->cgroup_realized_mask & CGROUP_MASK_V1) &&
+ ((u->cgroup_enabled_mask | enable_mask) & CGROUP_MASK_V2) == (u->cgroup_enabled_mask & CGROUP_MASK_V2);
}
-static void unit_add_to_cgroup_realize_queue(Unit *u) {
+void unit_add_to_cgroup_realize_queue(Unit *u) {
assert(u);
if (u->in_cgroup_realize_queue)
@@ -1683,15 +1913,131 @@ static void unit_remove_from_cgroup_realize_queue(Unit *u) {
u->in_cgroup_realize_queue = false;
}
+/* Controllers can only be enabled breadth-first, from the root of the
+ * hierarchy downwards to the unit in question. */
+static int unit_realize_cgroup_now_enable(Unit *u, ManagerState state) {
+ CGroupMask target_mask, enable_mask, new_target_mask, new_enable_mask;
+ int r;
+
+ assert(u);
+
+ /* First go deal with this unit's parent, or we won't be able to enable
+ * any new controllers at this layer. */
+ if (UNIT_ISSET(u->slice)) {
+ r = unit_realize_cgroup_now_enable(UNIT_DEREF(u->slice), state);
+ if (r < 0)
+ return r;
+ }
+
+ target_mask = unit_get_target_mask(u);
+ enable_mask = unit_get_enable_mask(u);
+
+ /* We can only enable in this direction, don't try to disable anything.
+ */
+ if (unit_has_mask_enables_realized(u, target_mask, enable_mask))
+ return 0;
+
+ new_target_mask = u->cgroup_realized_mask | target_mask;
+ new_enable_mask = u->cgroup_enabled_mask | enable_mask;
+
+ return unit_create_cgroup(u, new_target_mask, new_enable_mask, state);
+}
+
+/* Controllers can only be disabled depth-first, from the leaves of the
+ * hierarchy upwards to the unit in question. */
+static int unit_realize_cgroup_now_disable(Unit *u, ManagerState state) {
+ Iterator i;
+ Unit *m;
+ void *v;
+
+ assert(u);
+
+ if (u->type != UNIT_SLICE)
+ return 0;
+
+ HASHMAP_FOREACH_KEY(v, m, u->dependencies[UNIT_BEFORE], i) {
+ CGroupMask target_mask, enable_mask, new_target_mask, new_enable_mask;
+ int r;
+
+ if (UNIT_DEREF(m->slice) != u)
+ continue;
+
+ /* The cgroup for this unit might not actually be fully
+ * realised yet, in which case it isn't holding any controllers
+ * open anyway. */
+ if (!m->cgroup_path)
+ continue;
+
+ /* We must disable those below us first in order to release the
+ * controller. */
+ if (m->type == UNIT_SLICE)
+ (void) unit_realize_cgroup_now_disable(m, state);
+
+ target_mask = unit_get_target_mask(m);
+ enable_mask = unit_get_enable_mask(m);
+
+ /* We can only disable in this direction, don't try to enable
+ * anything. */
+ if (unit_has_mask_disables_realized(m, target_mask, enable_mask))
+ continue;
+
+ new_target_mask = m->cgroup_realized_mask & target_mask;
+ new_enable_mask = m->cgroup_enabled_mask & enable_mask;
+
+ r = unit_create_cgroup(m, new_target_mask, new_enable_mask, state);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
/* Check if necessary controllers and attributes for a unit are in place.
*
- * If so, do nothing.
- * If not, create paths, move processes over, and set attributes.
+ * - If so, do nothing.
+ * - If not, create paths, move processes over, and set attributes.
+ *
+ * Controllers can only be *enabled* in a breadth-first way, and *disabled* in
+ * a depth-first way. As such the process looks like this:
+ *
+ * Suppose we have a cgroup hierarchy which looks like this:
+ *
+ * root
+ * / \
+ * / \
+ * / \
+ * a b
+ * / \ / \
+ * / \ / \
+ * c d e f
+ * / \ / \ / \ / \
+ * h i j k l m n o
+ *
+ * 1. We want to realise cgroup "d" now.
+ * 2. cgroup "a" has DisableControllers=cpu in the associated unit.
+ * 3. cgroup "k" just started requesting the memory controller.
+ *
+ * To make this work we must do the following in order:
+ *
+ * 1. Disable CPU controller in k, j
+ * 2. Disable CPU controller in d
+ * 3. Enable memory controller in root
+ * 4. Enable memory controller in a
+ * 5. Enable memory controller in d
+ * 6. Enable memory controller in k
+ *
+ * Notice that we need to touch j in one direction, but not the other. We also
+ * don't go beyond d when disabling -- it's up to "a" to get realized if it
+ * wants to disable further. The basic rules are therefore:
+ *
+ * - If you're disabling something, you need to realise all of the cgroups from
+ * your recursive descendants to the root. This starts from the leaves.
+ * - If you're enabling something, you need to realise from the root cgroup
+ * downwards, but you don't need to iterate your recursive descendants.
*
* Returns 0 on success and < 0 on failure. */
static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
CGroupMask target_mask, enable_mask;
- bool needs_bpf, apply_bpf;
int r;
assert(u);
@@ -1700,32 +2046,29 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
target_mask = unit_get_target_mask(u);
enable_mask = unit_get_enable_mask(u);
- needs_bpf = unit_get_needs_bpf(u);
- if (unit_has_mask_realized(u, target_mask, enable_mask, needs_bpf))
+ if (unit_has_mask_realized(u, target_mask, enable_mask))
return 0;
- /* Make sure we apply the BPF filters either when one is configured, or if none is configured but previously
- * the state was anything but off. This way, if a unit with a BPF filter applied is reconfigured to lose it
- * this will trickle down properly to cgroupfs. */
- apply_bpf = needs_bpf || u->cgroup_bpf_state != UNIT_CGROUP_BPF_OFF;
+ /* Disable controllers below us, if there are any */
+ r = unit_realize_cgroup_now_disable(u, state);
+ if (r < 0)
+ return r;
- /* First, realize parents */
+ /* Enable controllers above us, if there are any */
if (UNIT_ISSET(u->slice)) {
- r = unit_realize_cgroup_now(UNIT_DEREF(u->slice), state);
+ r = unit_realize_cgroup_now_enable(UNIT_DEREF(u->slice), state);
if (r < 0)
return r;
}
- /* And then do the real work */
- r = unit_create_cgroup(u, target_mask, enable_mask, needs_bpf);
+ /* Now actually deal with the cgroup we were trying to realise and set attributes */
+ r = unit_create_cgroup(u, target_mask, enable_mask, state);
if (r < 0)
return r;
- /* Finally, apply the necessary attributes. */
- cgroup_context_apply(u, target_mask, apply_bpf, state);
- cgroup_xattr_apply(u);
-
+ /* Now, reset the invalidation mask */
+ u->cgroup_invalidated_mask = 0;
return 0;
}
@@ -1771,9 +2114,6 @@ static void unit_add_siblings_to_cgroup_realize_queue(Unit *u) {
void *v;
HASHMAP_FOREACH_KEY(v, m, u->dependencies[UNIT_BEFORE], i) {
- if (m == u)
- continue;
-
/* Skip units that have a dependency on the slice
* but aren't actually in it. */
if (UNIT_DEREF(m->slice) != slice)
@@ -1789,8 +2129,7 @@ static void unit_add_siblings_to_cgroup_realize_queue(Unit *u) {
* any changes. */
if (unit_has_mask_realized(m,
unit_get_target_mask(m),
- unit_get_enable_mask(m),
- unit_get_needs_bpf(m)))
+ unit_get_enable_mask(m)))
continue;
unit_add_to_cgroup_realize_queue(m);
@@ -1827,7 +2166,8 @@ int unit_realize_cgroup(Unit *u) {
void unit_release_cgroup(Unit *u) {
assert(u);
- /* Forgets all cgroup details for this cgroup */
+ /* Forgets all cgroup details for this cgroup — but does *not* destroy the cgroup. This is hence OK to call
+ * when we close down everything for reexecution, where we really want to leave the cgroup in place. */
if (u->cgroup_path) {
(void) hashmap_remove(u->manager->cgroup_unit, u->cgroup_path);
@@ -1836,7 +2176,7 @@ void unit_release_cgroup(Unit *u) {
if (u->cgroup_inotify_wd >= 0) {
if (inotify_rm_watch(u->manager->cgroup_inotify_fd, u->cgroup_inotify_wd) < 0)
- log_unit_debug_errno(u, errno, "Failed to remove cgroup inotify watch %i for %s, ignoring", u->cgroup_inotify_wd, u->id);
+ log_unit_debug_errno(u, errno, "Failed to remove cgroup inotify watch %i for %s, ignoring: %m", u->cgroup_inotify_wd, u->id);
(void) hashmap_remove(u->manager->cgroup_inotify_wd_unit, INT_TO_PTR(u->cgroup_inotify_wd));
u->cgroup_inotify_wd = -1;
@@ -1872,6 +2212,8 @@ void unit_prune_cgroup(Unit *u) {
u->cgroup_realized = false;
u->cgroup_realized_mask = 0;
u->cgroup_enabled_mask = 0;
+
+ u->bpf_device_control_installed = bpf_program_unref(u->bpf_device_control_installed);
}
int unit_search_main_pid(Unit *u, pid_t *ret) {
@@ -2133,11 +2475,30 @@ static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents,
}
}
+static int cg_bpf_mask_supported(CGroupMask *ret) {
+ CGroupMask mask = 0;
+ int r;
+
+ /* BPF-based firewall */
+ r = bpf_firewall_supported();
+ if (r > 0)
+ mask |= CGROUP_MASK_BPF_FIREWALL;
+
+ /* BPF-based device access control */
+ r = bpf_devices_supported();
+ if (r > 0)
+ mask |= CGROUP_MASK_BPF_DEVICES;
+
+ *ret = mask;
+ return 0;
+}
+
int manager_setup_cgroup(Manager *m) {
_cleanup_free_ char *path = NULL;
const char *scope_path;
CGroupController c;
int r, all_unified;
+ CGroupMask mask;
char *e;
assert(m);
@@ -2231,7 +2592,7 @@ int manager_setup_cgroup(Manager *m) {
(void) sd_event_source_set_description(m->cgroup_inotify_event_source, "cgroup-inotify");
- } else if (MANAGER_IS_SYSTEM(m) && m->test_run_flags == 0) {
+ } else if (MANAGER_IS_SYSTEM(m) && manager_owns_host_root_cgroup(m) && !MANAGER_IS_TEST_RUN(m)) {
/* On the legacy hierarchy we only get notifications via cgroup agents. (Which isn't really reliable,
* since it does not generate events when control groups with children run empty. */
@@ -2260,17 +2621,25 @@ int manager_setup_cgroup(Manager *m) {
if (m->pin_cgroupfs_fd < 0)
return log_error_errno(errno, "Failed to open pin file: %m");
- } else if (r < 0 && !m->test_run_flags)
+ } else if (!MANAGER_IS_TEST_RUN(m))
return log_error_errno(r, "Failed to create %s control group: %m", scope_path);
/* 7. Always enable hierarchical support if it exists... */
- if (!all_unified && m->test_run_flags == 0)
+ if (!all_unified && !MANAGER_IS_TEST_RUN(m))
(void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
- /* 8. Figure out which controllers are supported, and log about it */
+ /* 8. Figure out which controllers are supported */
r = cg_mask_supported(&m->cgroup_supported);
if (r < 0)
return log_error_errno(r, "Failed to determine supported controllers: %m");
+
+ /* 9. Figure out which bpf-based pseudo-controllers are supported */
+ r = cg_bpf_mask_supported(&mask);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine supported bpf-based pseudo-controllers: %m");
+ m->cgroup_supported |= mask;
+
+ /* 10. Log which controllers are supported */
for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++)
log_debug("Controller '%s' supported: %s", cgroup_controller_to_string(c), yes_no(m->cgroup_supported & CGROUP_CONTROLLER_TO_MASK(c)));
@@ -2401,7 +2770,7 @@ int unit_get_memory_current(Unit *u, uint64_t *ret) {
return -ENODATA;
/* The root cgroup doesn't expose this information, let's get it from /proc instead */
- if (unit_has_root_cgroup(u))
+ if (unit_has_host_root_cgroup(u))
return procfs_memory_get_current(ret);
if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0)
@@ -2436,7 +2805,7 @@ int unit_get_tasks_current(Unit *u, uint64_t *ret) {
return -ENODATA;
/* The root cgroup doesn't expose this information, let's get it from /proc instead */
- if (unit_has_root_cgroup(u))
+ if (unit_has_host_root_cgroup(u))
return procfs_tasks_get_current(ret);
if ((u->cgroup_realized_mask & CGROUP_MASK_PIDS) == 0)
@@ -2463,9 +2832,13 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
return -ENODATA;
/* The root cgroup doesn't expose this information, let's get it from /proc instead */
- if (unit_has_root_cgroup(u))
+ if (unit_has_host_root_cgroup(u))
return procfs_cpu_get_usage(ret);
+ /* Requisite controllers for CPU accounting are not enabled */
+ if ((get_cpu_accounting_mask() & ~u->cgroup_realized_mask) != 0)
+ return -ENODATA;
+
r = cg_all_unified();
if (r < 0)
return r;
@@ -2473,14 +2846,11 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
_cleanup_free_ char *val = NULL;
uint64_t us;
- if ((u->cgroup_realized_mask & CGROUP_MASK_CPU) == 0)
- return -ENODATA;
-
r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", STRV_MAKE("usage_usec"), &val);
- if (r < 0)
- return r;
if (IN_SET(r, -ENOENT, -ENXIO))
return -ENODATA;
+ if (r < 0)
+ return r;
r = safe_atou64(val, &us);
if (r < 0)
@@ -2488,9 +2858,6 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
ns = us * NSEC_PER_USEC;
} else {
- if ((u->cgroup_realized_mask & CGROUP_MASK_CPUACCT) == 0)
- return -ENODATA;
-
r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v);
if (r == -ENOENT)
return -ENODATA;
@@ -2631,10 +2998,10 @@ void unit_invalidate_cgroup(Unit *u, CGroupMask m) {
if (m & (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT))
m |= CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT;
- if ((u->cgroup_realized_mask & m) == 0) /* NOP? */
+ if (FLAGS_SET(u->cgroup_invalidated_mask, m)) /* NOP? */
return;
- u->cgroup_realized_mask &= ~m;
+ u->cgroup_invalidated_mask |= m;
unit_add_to_cgroup_realize_queue(u);
}
@@ -2644,10 +3011,10 @@ void unit_invalidate_cgroup_bpf(Unit *u) {
if (!UNIT_HAS_CGROUP_CONTEXT(u))
return;
- if (u->cgroup_bpf_state == UNIT_CGROUP_BPF_INVALIDATED) /* NOP? */
+ if (u->cgroup_invalidated_mask & CGROUP_MASK_BPF_FIREWALL) /* NOP? */
return;
- u->cgroup_bpf_state = UNIT_CGROUP_BPF_INVALIDATED;
+ u->cgroup_invalidated_mask |= CGROUP_MASK_BPF_FIREWALL;
unit_add_to_cgroup_realize_queue(u);
/* If we are a slice unit, we also need to put compile a new BPF program for all our children, as the IP access
@@ -2658,13 +3025,8 @@ void unit_invalidate_cgroup_bpf(Unit *u) {
void *v;
HASHMAP_FOREACH_KEY(v, member, u->dependencies[UNIT_BEFORE], i) {
- if (member == u)
- continue;
-
- if (UNIT_DEREF(member->slice) != u)
- continue;
-
- unit_invalidate_cgroup_bpf(member);
+ if (UNIT_DEREF(member->slice) == u)
+ unit_invalidate_cgroup_bpf(member);
}
}
}
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
index 2d2ff6fc3c..266daa20a5 100644
--- a/src/core/cgroup.h
+++ b/src/core/cgroup.h
@@ -12,6 +12,7 @@ typedef struct CGroupContext CGroupContext;
typedef struct CGroupDeviceAllow CGroupDeviceAllow;
typedef struct CGroupIODeviceWeight CGroupIODeviceWeight;
typedef struct CGroupIODeviceLimit CGroupIODeviceLimit;
+typedef struct CGroupIODeviceLatency CGroupIODeviceLatency;
typedef struct CGroupBlockIODeviceWeight CGroupBlockIODeviceWeight;
typedef struct CGroupBlockIODeviceBandwidth CGroupBlockIODeviceBandwidth;
@@ -51,6 +52,12 @@ struct CGroupIODeviceLimit {
uint64_t limits[_CGROUP_IO_LIMIT_TYPE_MAX];
};
+struct CGroupIODeviceLatency {
+ LIST_FIELDS(CGroupIODeviceLatency, device_latencies);
+ char *path;
+ usec_t target_usec;
+};
+
struct CGroupBlockIODeviceWeight {
LIST_FIELDS(CGroupBlockIODeviceWeight, device_weights);
char *path;
@@ -81,7 +88,9 @@ struct CGroupContext {
uint64_t startup_io_weight;
LIST_HEAD(CGroupIODeviceWeight, io_device_weights);
LIST_HEAD(CGroupIODeviceLimit, io_device_limits);
+ LIST_HEAD(CGroupIODeviceLatency, io_device_latencies);
+ uint64_t memory_min;
uint64_t memory_low;
uint64_t memory_high;
uint64_t memory_max;
@@ -109,6 +118,8 @@ struct CGroupContext {
bool delegate;
CGroupMask delegate_controllers;
+
+ CGroupMask disable_controllers;
};
/* Used when querying IP accounting data */
@@ -128,29 +139,32 @@ void cgroup_context_init(CGroupContext *c);
void cgroup_context_done(CGroupContext *c);
void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix);
-CGroupMask cgroup_context_get_mask(CGroupContext *c);
-
void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a);
void cgroup_context_free_io_device_weight(CGroupContext *c, CGroupIODeviceWeight *w);
void cgroup_context_free_io_device_limit(CGroupContext *c, CGroupIODeviceLimit *l);
+void cgroup_context_free_io_device_latency(CGroupContext *c, CGroupIODeviceLatency *l);
void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w);
void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b);
+int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode);
+
CGroupMask unit_get_own_mask(Unit *u);
CGroupMask unit_get_delegate_mask(Unit *u);
CGroupMask unit_get_members_mask(Unit *u);
CGroupMask unit_get_siblings_mask(Unit *u);
CGroupMask unit_get_subtree_mask(Unit *u);
+CGroupMask unit_get_disable_mask(Unit *u);
+CGroupMask unit_get_ancestor_disable_mask(Unit *u);
CGroupMask unit_get_target_mask(Unit *u);
CGroupMask unit_get_enable_mask(Unit *u);
-bool unit_get_needs_bpf(Unit *u);
+void unit_invalidate_cgroup_members_masks(Unit *u);
-void unit_update_cgroup_members_masks(Unit *u);
+void unit_add_to_cgroup_realize_queue(Unit *u);
const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask);
-char *unit_default_cgroup_path(Unit *u);
+char *unit_default_cgroup_path(const Unit *u);
int unit_set_cgroup_path(Unit *u, const char *path);
int unit_pick_cgroup_path(Unit *u);
@@ -191,8 +205,8 @@ int unit_reset_ip_accounting(Unit *u);
cc ? cc->name : false; \
})
-bool manager_owns_root_cgroup(Manager *m);
-bool unit_has_root_cgroup(Unit *u);
+bool manager_owns_host_root_cgroup(Manager *m);
+bool unit_has_host_root_cgroup(Unit *u);
int manager_notify_cgroup_empty(Manager *m, const char *group);
diff --git a/src/core/chown-recursive.c b/src/core/chown-recursive.c
index c4794501c2..7767301f7d 100644
--- a/src/core/chown-recursive.c
+++ b/src/core/chown-recursive.c
@@ -1,17 +1,21 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
-#include <sys/types.h>
-#include <sys/stat.h>
#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
-#include "user-util.h"
-#include "macro.h"
-#include "fd-util.h"
-#include "dirent-util.h"
#include "chown-recursive.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "macro.h"
+#include "stdio-util.h"
+#include "strv.h"
+#include "user-util.h"
-static int chown_one(int fd, const char *name, const struct stat *st, uid_t uid, gid_t gid) {
- int r;
+static int chown_one(int fd, const struct stat *st, uid_t uid, gid_t gid) {
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
+ const char *n;
assert(fd >= 0);
assert(st);
@@ -20,98 +24,95 @@ static int chown_one(int fd, const char *name, const struct stat *st, uid_t uid,
(!gid_is_valid(gid) || st->st_gid == gid))
return 0;
- if (name)
- r = fchownat(fd, name, uid, gid, AT_SYMLINK_NOFOLLOW);
- else
- r = fchown(fd, uid, gid);
- if (r < 0)
- return -errno;
+ /* We change ownership through the /proc/self/fd/%i path, so that we have a stable reference that works with
+ * O_PATH. (Note: fchown() and fchmod() do not work with O_PATH, the kernel refuses that. */
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
- /* The linux kernel alters the mode in some cases of chown(). Let's undo this. */
- if (name) {
- if (!S_ISLNK(st->st_mode))
- r = fchmodat(fd, name, st->st_mode, 0);
- else /* There's currently no AT_SYMLINK_NOFOLLOW for fchmodat() */
- r = 0;
- } else
- r = fchmod(fd, st->st_mode);
- if (r < 0)
+ /* Drop any ACL if there is one */
+ FOREACH_STRING(n, "system.posix_acl_access", "system.posix_acl_default")
+ if (removexattr(procfs_path, n) < 0)
+ if (!IN_SET(errno, ENODATA, EOPNOTSUPP, ENOSYS, ENOTTY))
+ return -errno;
+
+ if (chown(procfs_path, uid, gid) < 0)
return -errno;
+ /* The linux kernel alters the mode in some cases of chown(), as well when we change ACLs. Let's undo this. We
+ * do this only for non-symlinks however. That's because for symlinks the access mode is ignored anyway and
+ * because on some kernels/file systems trying to change the access mode will succeed but has no effect while
+ * on others it actively fails. */
+ if (!S_ISLNK(st->st_mode))
+ if (chmod(procfs_path, st->st_mode & 07777) < 0)
+ return -errno;
+
return 1;
}
static int chown_recursive_internal(int fd, const struct stat *st, uid_t uid, gid_t gid) {
+ _cleanup_closedir_ DIR *d = NULL;
bool changed = false;
+ struct dirent *de;
int r;
assert(fd >= 0);
assert(st);
- if (S_ISDIR(st->st_mode)) {
- _cleanup_closedir_ DIR *d = NULL;
- struct dirent *de;
-
- d = fdopendir(fd);
- if (!d) {
- r = -errno;
- goto finish;
- }
- fd = -1;
-
- FOREACH_DIRENT_ALL(de, d, r = -errno; goto finish) {
- struct stat fst;
-
- if (dot_or_dot_dot(de->d_name))
- continue;
-
- if (fstatat(dirfd(d), de->d_name, &fst, AT_SYMLINK_NOFOLLOW) < 0) {
- r = -errno;
- goto finish;
- }
-
- if (S_ISDIR(fst.st_mode)) {
- int subdir_fd;
-
- subdir_fd = openat(dirfd(d), de->d_name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME);
- if (subdir_fd < 0) {
- r = -errno;
- goto finish;
- }
-
- r = chown_recursive_internal(subdir_fd, &fst, uid, gid);
- if (r < 0)
- goto finish;
- if (r > 0)
- changed = true;
- } else {
- r = chown_one(dirfd(d), de->d_name, &fst, uid, gid);
- if (r < 0)
- goto finish;
- if (r > 0)
- changed = true;
- }
+ d = fdopendir(fd);
+ if (!d) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+ _cleanup_close_ int path_fd = -1;
+ struct stat fst;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ /* Let's pin the child inode we want to fix now with an O_PATH fd, so that it cannot be swapped out
+ * while we manipulate it. */
+ path_fd = openat(dirfd(d), de->d_name, O_PATH|O_CLOEXEC|O_NOFOLLOW);
+ if (path_fd < 0)
+ return -errno;
+
+ if (fstat(path_fd, &fst) < 0)
+ return -errno;
+
+ if (S_ISDIR(fst.st_mode)) {
+ int subdir_fd;
+
+ /* Convert it to a "real" (i.e. non-O_PATH) fd now */
+ subdir_fd = fd_reopen(path_fd, O_RDONLY|O_CLOEXEC|O_NOATIME);
+ if (subdir_fd < 0)
+ return subdir_fd;
+
+ r = chown_recursive_internal(subdir_fd, &fst, uid, gid); /* takes possession of subdir_fd even on failure */
+ if (r < 0)
+ return r;
+ if (r > 0)
+ changed = true;
+ } else {
+ r = chown_one(path_fd, &fst, uid, gid);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ changed = true;
}
+ }
- r = chown_one(dirfd(d), NULL, st, uid, gid);
- } else
- r = chown_one(fd, NULL, st, uid, gid);
+ r = chown_one(dirfd(d), st, uid, gid);
if (r < 0)
- goto finish;
+ return r;
- r = r > 0 || changed;
-
-finish:
- safe_close(fd);
- return r;
+ return r > 0 || changed;
}
int path_chown_recursive(const char *path, uid_t uid, gid_t gid) {
_cleanup_close_ int fd = -1;
struct stat st;
- int r;
- fd = open(path, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME);
+ fd = open(path, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME);
if (fd < 0)
return -errno;
@@ -128,8 +129,5 @@ int path_chown_recursive(const char *path, uid_t uid, gid_t gid) {
(!gid_is_valid(gid) || st.st_gid == gid))
return 0;
- r = chown_recursive_internal(fd, &st, uid, gid);
- fd = -1; /* we donated the fd to the call, regardless if it succeeded or failed */
-
- return r;
+ return chown_recursive_internal(TAKE_FD(fd), &st, uid, gid); /* we donate the fd to the call, regardless if it succeeded or failed */
}
diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c
index 540bc77aed..53890bcafb 100644
--- a/src/core/dbus-cgroup.c
+++ b/src/core/dbus-cgroup.c
@@ -17,7 +17,7 @@
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_cgroup_device_policy, cgroup_device_policy, CGroupDevicePolicy);
-static int property_get_delegate_controllers(
+static int property_get_cgroup_mask(
sd_bus *bus,
const char *path,
const char *interface,
@@ -26,26 +26,22 @@ static int property_get_delegate_controllers(
void *userdata,
sd_bus_error *error) {
- CGroupContext *c = userdata;
- CGroupController cc;
+ CGroupMask *mask = userdata;
+ CGroupController ctrl;
int r;
assert(bus);
assert(reply);
- assert(c);
-
- if (!c->delegate)
- return sd_bus_message_append(reply, "as", 0);
r = sd_bus_message_open_container(reply, 'a', "s");
if (r < 0)
return r;
- for (cc = 0; cc < _CGROUP_CONTROLLER_MAX; cc++) {
- if ((c->delegate_controllers & CGROUP_CONTROLLER_TO_MASK(cc)) == 0)
+ for (ctrl = 0; ctrl < _CGROUP_CONTROLLER_MAX; ctrl++) {
+ if ((*mask & CGROUP_CONTROLLER_TO_MASK(ctrl)) == 0)
continue;
- r = sd_bus_message_append(reply, "s", cgroup_controller_to_string(cc));
+ r = sd_bus_message_append(reply, "s", cgroup_controller_to_string(ctrl));
if (r < 0)
return r;
}
@@ -53,6 +49,27 @@ static int property_get_delegate_controllers(
return sd_bus_message_close_container(reply);
}
+static int property_get_delegate_controllers(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ CGroupContext *c = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ if (!c->delegate)
+ return sd_bus_message_append(reply, "as", 0);
+
+ return property_get_cgroup_mask(bus, path, interface, property, reply, &c->delegate_controllers, error);
+}
+
static int property_get_io_device_weight(
sd_bus *bus,
const char *path,
@@ -119,6 +136,36 @@ static int property_get_io_device_limits(
return sd_bus_message_close_container(reply);
}
+static int property_get_io_device_latency(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ CGroupContext *c = userdata;
+ CGroupIODeviceLatency *l;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ r = sd_bus_message_open_container(reply, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(device_latencies, l, c->io_device_latencies) {
+ r = sd_bus_message_append(reply, "(st)", l->path, l->target_usec);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
static int property_get_blockio_device_weight(
sd_bus *bus,
const char *path,
@@ -291,6 +338,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_PROPERTY("IOWriteBandwidthMax", "a(st)", property_get_io_device_limits, 0, 0),
SD_BUS_PROPERTY("IOReadIOPSMax", "a(st)", property_get_io_device_limits, 0, 0),
SD_BUS_PROPERTY("IOWriteIOPSMax", "a(st)", property_get_io_device_limits, 0, 0),
+ SD_BUS_PROPERTY("IODeviceLatencyTargetUSec", "a(st)", property_get_io_device_latency, 0, 0),
SD_BUS_PROPERTY("BlockIOAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, blockio_accounting), 0),
SD_BUS_PROPERTY("BlockIOWeight", "t", NULL, offsetof(CGroupContext, blockio_weight), 0),
SD_BUS_PROPERTY("StartupBlockIOWeight", "t", NULL, offsetof(CGroupContext, startup_blockio_weight), 0),
@@ -298,6 +346,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_PROPERTY("BlockIOReadBandwidth", "a(st)", property_get_blockio_device_bandwidths, 0, 0),
SD_BUS_PROPERTY("BlockIOWriteBandwidth", "a(st)", property_get_blockio_device_bandwidths, 0, 0),
SD_BUS_PROPERTY("MemoryAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, memory_accounting), 0),
+ SD_BUS_PROPERTY("MemoryMin", "t", NULL, offsetof(CGroupContext, memory_min), 0),
SD_BUS_PROPERTY("MemoryLow", "t", NULL, offsetof(CGroupContext, memory_low), 0),
SD_BUS_PROPERTY("MemoryHigh", "t", NULL, offsetof(CGroupContext, memory_high), 0),
SD_BUS_PROPERTY("MemoryMax", "t", NULL, offsetof(CGroupContext, memory_max), 0),
@@ -310,6 +359,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_PROPERTY("IPAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, ip_accounting), 0),
SD_BUS_PROPERTY("IPAddressAllow", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_allow), 0),
SD_BUS_PROPERTY("IPAddressDeny", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_deny), 0),
+ SD_BUS_PROPERTY("DisableControllers", "as", property_get_cgroup_mask, offsetof(CGroupContext, disable_controllers), 0),
SD_BUS_VTABLE_END
};
@@ -571,7 +621,7 @@ int bus_cgroup_set_property(
flags |= UNIT_PRIVATE;
if (streq(name, "CPUAccounting"))
- return bus_cgroup_set_boolean(u, name, &c->cpu_accounting, CGROUP_MASK_CPUACCT|CGROUP_MASK_CPU, message, flags, error);
+ return bus_cgroup_set_boolean(u, name, &c->cpu_accounting, get_cpu_accounting_mask(), message, flags, error);
if (streq(name, "CPUWeight"))
return bus_cgroup_set_cpu_weight(u, name, &c->cpu_weight, message, flags, error);
@@ -606,6 +656,9 @@ int bus_cgroup_set_property(
if (streq(name, "MemoryAccounting"))
return bus_cgroup_set_boolean(u, name, &c->memory_accounting, CGROUP_MASK_MEMORY, message, flags, error);
+ if (streq(name, "MemoryMin"))
+ return bus_cgroup_set_memory(u, name, &c->memory_min, message, flags, error);
+
if (streq(name, "MemoryLow"))
return bus_cgroup_set_memory(u, name, &c->memory_low, message, flags, error);
@@ -621,6 +674,9 @@ int bus_cgroup_set_property(
if (streq(name, "MemoryLimit"))
return bus_cgroup_set_memory(u, name, &c->memory_limit, message, flags, error);
+ if (streq(name, "MemoryMinScale"))
+ return bus_cgroup_set_memory_scale(u, name, &c->memory_min, message, flags, error);
+
if (streq(name, "MemoryLowScale"))
return bus_cgroup_set_memory_scale(u, name, &c->memory_low, message, flags, error);
@@ -839,6 +895,86 @@ int bus_cgroup_set_property(
return 1;
+ } else if (streq(name, "IODeviceLatencyTargetUSec")) {
+ const char *path;
+ uint64_t target;
+ unsigned n = 0;
+
+ r = sd_bus_message_enter_container(message, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(st)", &path, &target)) > 0) {
+
+ if (!path_is_normalized(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path '%s' specified in %s= is not normalized.", name, path);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ CGroupIODeviceLatency *a = NULL, *b;
+
+ LIST_FOREACH(device_latencies, b, c->io_device_latencies) {
+ if (path_equal(b->path, path)) {
+ a = b;
+ break;
+ }
+ }
+
+ if (!a) {
+ a = new0(CGroupIODeviceLatency, 1);
+ if (!a)
+ return -ENOMEM;
+
+ a->path = strdup(path);
+ if (!a->path) {
+ free(a);
+ return -ENOMEM;
+ }
+ LIST_PREPEND(device_latencies, c->io_device_latencies, a);
+ }
+
+ a->target_usec = target;
+ }
+
+ n++;
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *buf = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ char ts[FORMAT_TIMESPAN_MAX];
+ CGroupIODeviceLatency *a;
+ size_t size = 0;
+
+ if (n == 0) {
+ while (c->io_device_latencies)
+ cgroup_context_free_io_device_latency(c, c->io_device_latencies);
+ }
+
+ unit_invalidate_cgroup(u, CGROUP_MASK_IO);
+
+ f = open_memstream(&buf, &size);
+ if (!f)
+ return -ENOMEM;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ fputs("IODeviceLatencyTargetSec=\n", f);
+ LIST_FOREACH(device_latencies, a, c->io_device_latencies)
+ fprintf(f, "IODeviceLatencyTargetSec=%s %s\n",
+ a->path, format_timespan(ts, sizeof(ts), a->target_usec, 1));
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+ unit_write_setting(u, flags, name, buf);
+ }
+
+ return 1;
+
} else if (STR_IN_SET(name, "BlockIOReadBandwidth", "BlockIOWriteBandwidth")) {
const char *path;
bool read = true;
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c
index c44970c10c..11301e4b69 100644
--- a/src/core/dbus-execute.c
+++ b/src/core/dbus-execute.c
@@ -27,7 +27,7 @@
#include "ioprio.h"
#include "journal-util.h"
#include "missing.h"
-#include "mount-util.h"
+#include "mountpoint-util.h"
#include "namespace.h"
#include "parse-util.h"
#include "path-util.h"
@@ -718,6 +718,8 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("SyslogLevel", "i", property_get_syslog_level, offsetof(ExecContext, syslog_priority), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SyslogFacility", "i", property_get_syslog_facility, offsetof(ExecContext, syslog_priority), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("LogLevelMax", "i", bus_property_get_int, offsetof(ExecContext, log_level_max), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LogRateLimitIntervalUSec", "t", bus_property_get_usec, offsetof(ExecContext, log_rate_limit_interval_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LogRateLimitBurst", "u", bus_property_get_unsigned, offsetof(ExecContext, log_rate_limit_burst), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("LogExtraFields", "aay", property_get_log_extra_fields, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SecureBits", "i", bus_property_get_int, offsetof(ExecContext, secure_bits), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("CapabilityBoundingSet", "t", NULL, offsetof(ExecContext, capability_bounding_set), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -1015,8 +1017,6 @@ static BUS_DEFINE_SET_TRANSIENT_IS_VALID(log_level, "i", int32_t, int, "%" PRIi3
#if HAVE_SECCOMP
static BUS_DEFINE_SET_TRANSIENT_IS_VALID(errno, "i", int32_t, int, "%" PRIi32, errno_is_valid);
#endif
-static BUS_DEFINE_SET_TRANSIENT_IS_VALID(sched_priority, "i", int32_t, int, "%" PRIi32, sched_priority_is_valid);
-static BUS_DEFINE_SET_TRANSIENT_IS_VALID(nice, "i", int32_t, int, "%" PRIi32, nice_is_valid);
static BUS_DEFINE_SET_TRANSIENT_PARSE(std_input, ExecInput, exec_input_from_string);
static BUS_DEFINE_SET_TRANSIENT_PARSE(std_output, ExecOutput, exec_output_from_string);
static BUS_DEFINE_SET_TRANSIENT_PARSE(utmp_mode, ExecUtmpMode, exec_utmp_mode_from_string);
@@ -1027,7 +1027,6 @@ static BUS_DEFINE_SET_TRANSIENT_PARSE(preserve_mode, ExecPreserveMode, exec_pres
static BUS_DEFINE_SET_TRANSIENT_PARSE_PTR(personality, unsigned long, parse_personality);
static BUS_DEFINE_SET_TRANSIENT_TO_STRING_ALLOC(secure_bits, "i", int32_t, int, "%" PRIi32, secure_bits_to_string_alloc_with_check);
static BUS_DEFINE_SET_TRANSIENT_TO_STRING_ALLOC(capability, "t", uint64_t, uint64_t, "%" PRIu64, capability_set_to_string_alloc);
-static BUS_DEFINE_SET_TRANSIENT_TO_STRING_ALLOC(sched_policy, "i", int32_t, int, "%" PRIi32, sched_policy_to_string_alloc_with_check);
static BUS_DEFINE_SET_TRANSIENT_TO_STRING_ALLOC(namespace_flag, "t", uint64_t, unsigned long, "%" PRIu64, namespace_flags_to_string);
static BUS_DEFINE_SET_TRANSIENT_TO_STRING(mount_flags, "t", uint64_t, unsigned long, "%" PRIu64, mount_propagation_flags_to_string_with_check);
@@ -1070,15 +1069,15 @@ int bus_exec_context_set_transient_property(
if (streq(name, "LogLevelMax"))
return bus_set_transient_log_level(u, name, &c->log_level_max, message, flags, error);
- if (streq(name, "CPUSchedulingPriority"))
- return bus_set_transient_sched_priority(u, name, &c->cpu_sched_priority, message, flags, error);
+ if (streq(name, "LogRateLimitIntervalUSec"))
+ return bus_set_transient_usec(u, name, &c->log_rate_limit_interval_usec, message, flags, error);
+
+ if (streq(name, "LogRateLimitBurst"))
+ return bus_set_transient_unsigned(u, name, &c->log_rate_limit_burst, message, flags, error);
if (streq(name, "Personality"))
return bus_set_transient_personality(u, name, &c->personality, message, flags, error);
- if (streq(name, "Nice"))
- return bus_set_transient_nice(u, name, &c->nice, message, flags, error);
-
if (streq(name, "StandardInput"))
return bus_set_transient_std_input(u, name, &c->std_input, message, flags, error);
@@ -1208,9 +1207,6 @@ int bus_exec_context_set_transient_property(
if (streq(name, "AmbientCapabilities"))
return bus_set_transient_capability(u, name, &c->capability_ambient_set, message, flags, error);
- if (streq(name, "CPUSchedulingPolicy"))
- return bus_set_transient_sched_policy(u, name, &c->cpu_sched_policy, message, flags, error);
-
if (streq(name, "RestrictNamespaces"))
return bus_set_transient_namespace_flag(u, name, &c->restrict_namespaces, message, flags, error);
@@ -1521,8 +1517,8 @@ int bus_exec_context_set_transient_property(
int af;
af = af_from_name(*s);
- if (af <= 0)
- return -EINVAL;
+ if (af < 0)
+ return af;
if (!invert == c->address_families_whitelist) {
r = set_put(c->address_families, INT_TO_PTR(af));
@@ -1609,6 +1605,72 @@ int bus_exec_context_set_transient_property(
return 1;
+ } else if (streq(name, "Nice")) {
+ int32_t q;
+
+ r = sd_bus_message_read(message, "i", &q);
+ if (r < 0)
+ return r;
+
+ if (!nice_is_valid(q))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid Nice value: %i", q);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->nice = q;
+ c->nice_set = true;
+
+ unit_write_settingf(u, flags, name, "Nice=%i", q);
+ }
+
+ return 1;
+
+ } else if (streq(name, "CPUSchedulingPolicy")) {
+ int32_t q;
+
+ r = sd_bus_message_read(message, "i", &q);
+ if (r < 0)
+ return r;
+
+ if (!sched_policy_is_valid(q))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid CPU scheduling policy: %i", q);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *s = NULL;
+
+ r = sched_policy_to_string_alloc(q, &s);
+ if (r < 0)
+ return r;
+
+ c->cpu_sched_policy = q;
+ c->cpu_sched_priority = CLAMP(c->cpu_sched_priority, sched_get_priority_min(q), sched_get_priority_max(q));
+ c->cpu_sched_set = true;
+
+ unit_write_settingf(u, flags, name, "CPUSchedulingPolicy=%s", s);
+ }
+
+ return 1;
+
+ } else if (streq(name, "CPUSchedulingPriority")) {
+ int32_t p, min, max;
+
+ r = sd_bus_message_read(message, "i", &p);
+ if (r < 0)
+ return r;
+
+ min = sched_get_priority_min(c->cpu_sched_policy);
+ max = sched_get_priority_max(c->cpu_sched_policy);
+ if (p < min || p > max)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid CPU scheduling priority: %i", p);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->cpu_sched_priority = p;
+ c->cpu_sched_set = true;
+
+ unit_write_settingf(u, flags, name, "CPUSchedulingPriority=%i", p);
+ }
+
+ return 1;
+
} else if (streq(name, "IOSchedulingClass")) {
int32_t q;
@@ -1731,7 +1793,10 @@ int bus_exec_context_set_transient_property(
return 1;
- } else if (STR_IN_SET(name, "StandardInputFile", "StandardOutputFile", "StandardErrorFile")) {
+ } else if (STR_IN_SET(name,
+ "StandardInputFile",
+ "StandardOutputFile", "StandardOutputFileToAppend",
+ "StandardErrorFile", "StandardErrorFileToAppend")) {
const char *s;
r = sd_bus_message_read(message, "s", &s);
@@ -1755,23 +1820,34 @@ int bus_exec_context_set_transient_property(
c->std_input = EXEC_INPUT_FILE;
unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardInput=file:%s", s);
- } else if (streq(name, "StandardOutputFile")) {
+ } else if (STR_IN_SET(name, "StandardOutputFile", "StandardOutputFileToAppend")) {
r = free_and_strdup(&c->stdio_file[STDOUT_FILENO], empty_to_null(s));
if (r < 0)
return r;
- c->std_output = EXEC_OUTPUT_FILE;
- unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardOutput=file:%s", s);
-
+ if (streq(name, "StandardOutputFile")) {
+ c->std_output = EXEC_OUTPUT_FILE;
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardOutput=file:%s", s);
+ } else {
+ assert(streq(name, "StandardOutputFileToAppend"));
+ c->std_output = EXEC_OUTPUT_FILE_APPEND;
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardOutput=append:%s", s);
+ }
} else {
- assert(streq(name, "StandardErrorFile"));
+ assert(STR_IN_SET(name, "StandardErrorFile", "StandardErrorFileToAppend"));
r = free_and_strdup(&c->stdio_file[STDERR_FILENO], empty_to_null(s));
if (r < 0)
return r;
- c->std_error = EXEC_OUTPUT_FILE;
- unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardError=file:%s", s);
+ if (streq(name, "StandardErrorFile")) {
+ c->std_error = EXEC_OUTPUT_FILE;
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardError=file:%s", s);
+ } else {
+ assert(streq(name, "StandardErrorFileToAppend"));
+ c->std_error = EXEC_OUTPUT_FILE_APPEND;
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardError=append:%s", s);
+ }
}
}
diff --git a/src/core/dbus-job.c b/src/core/dbus-job.c
index 5551c56d0e..d11e58b51d 100644
--- a/src/core/dbus-job.c
+++ b/src/core/dbus-job.c
@@ -4,6 +4,7 @@
#include "alloc-util.h"
#include "dbus-job.h"
+#include "dbus-unit.h"
#include "dbus.h"
#include "job.h"
#include "log.h"
@@ -50,7 +51,7 @@ int bus_job_method_cancel(sd_bus_message *message, void *userdata, sd_bus_error
/* Access is granted to the job owner */
if (!sd_bus_track_contains(j->bus_track, sd_bus_message_get_sender(message))) {
- /* And for everybody else consult PolicyKit */
+ /* And for everybody else consult polkit */
r = bus_verify_manage_units_async(j->unit->manager, message, error);
if (r < 0)
return r;
@@ -173,6 +174,9 @@ void bus_job_send_change_signal(Job *j) {
assert(j);
+ /* Make sure that any change signal on the unit is reflected before we send out the change signal on the job */
+ bus_unit_send_pending_change_signal(j->unit, true);
+
if (j->in_dbus_queue) {
LIST_REMOVE(dbus_queue, j->manager->dbus_job_queue, j);
j->in_dbus_queue = false;
@@ -185,6 +189,21 @@ void bus_job_send_change_signal(Job *j) {
j->sent_dbus_new_signal = true;
}
+void bus_job_send_pending_change_signal(Job *j, bool including_new) {
+ assert(j);
+
+ if (!j->in_dbus_queue)
+ return;
+
+ if (!j->sent_dbus_new_signal && !including_new)
+ return;
+
+ if (MANAGER_IS_RELOADING(j->unit->manager))
+ return;
+
+ bus_job_send_change_signal(j);
+}
+
static int send_removed_signal(sd_bus *bus, void *userdata) {
_cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
_cleanup_free_ char *p = NULL;
@@ -222,6 +241,9 @@ void bus_job_send_removed_signal(Job *j) {
if (!j->sent_dbus_new_signal)
bus_job_send_change_signal(j);
+ /* Make sure that any change signal on the unit is reflected before we send out the change signal on the job */
+ bus_unit_send_pending_change_signal(j->unit, true);
+
r = bus_foreach_bus(j->manager, j->bus_track, send_removed_signal, j);
if (r < 0)
log_debug_errno(r, "Failed to send job remove signal for %u: %m", j->id);
diff --git a/src/core/dbus-job.h b/src/core/dbus-job.h
index 3cc60f22ee..c9f6fc7187 100644
--- a/src/core/dbus-job.h
+++ b/src/core/dbus-job.h
@@ -12,6 +12,7 @@ int bus_job_method_cancel(sd_bus_message *message, void *job, sd_bus_error *erro
int bus_job_method_get_waiting_jobs(sd_bus_message *message, void *userdata, sd_bus_error *error);
void bus_job_send_change_signal(Job *j);
+void bus_job_send_pending_change_signal(Job *j, bool including_new);
void bus_job_send_removed_signal(Job *j);
int bus_job_coldplug_bus_track(Job *j);
diff --git a/src/core/dbus-kill.c b/src/core/dbus-kill.c
index 028e7ec1c1..e2b3a0d517 100644
--- a/src/core/dbus-kill.c
+++ b/src/core/dbus-kill.c
@@ -12,13 +12,17 @@ const sd_bus_vtable bus_kill_vtable[] = {
SD_BUS_VTABLE_START(0),
SD_BUS_PROPERTY("KillMode", "s", property_get_kill_mode, offsetof(KillContext, kill_mode), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("KillSignal", "i", bus_property_get_int, offsetof(KillContext, kill_signal), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("FinalKillSignal", "i", bus_property_get_int, offsetof(KillContext, final_kill_signal), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SendSIGKILL", "b", bus_property_get_bool, offsetof(KillContext, send_sigkill), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SendSIGHUP", "b", bus_property_get_bool, offsetof(KillContext, send_sighup), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("WatchdogSignal", "i", bus_property_get_int, offsetof(KillContext, watchdog_signal), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_VTABLE_END
};
static BUS_DEFINE_SET_TRANSIENT_PARSE(kill_mode, KillMode, kill_mode_from_string);
static BUS_DEFINE_SET_TRANSIENT_TO_STRING(kill_signal, "i", int32_t, int, "%" PRIi32, signal_to_string_with_check);
+static BUS_DEFINE_SET_TRANSIENT_TO_STRING(final_kill_signal, "i", int32_t, int, "%" PRIi32, signal_to_string_with_check);
+static BUS_DEFINE_SET_TRANSIENT_TO_STRING(watchdog_signal, "i", int32_t, int, "%" PRIi32, signal_to_string_with_check);
int bus_kill_context_set_transient_property(
Unit *u,
@@ -47,5 +51,11 @@ int bus_kill_context_set_transient_property(
if (streq(name, "KillSignal"))
return bus_set_transient_kill_signal(u, name, &c->kill_signal, message, flags, error);
+ if (streq(name, "FinalKillSignal"))
+ return bus_set_transient_final_kill_signal(u, name, &c->final_kill_signal, message, flags, error);
+
+ if (streq(name, "WatchdogSignal"))
+ return bus_set_transient_watchdog_signal(u, name, &c->watchdog_signal, message, flags, error);
+
return 0;
}
diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c
index 4ed68af1e0..8da07adfe7 100644
--- a/src/core/dbus-manager.c
+++ b/src/core/dbus-manager.c
@@ -12,6 +12,7 @@
#include "dbus-execute.h"
#include "dbus-job.h"
#include "dbus-manager.h"
+#include "dbus-scope.h"
#include "dbus-unit.h"
#include "dbus.h"
#include "env-util.h"
@@ -216,6 +217,30 @@ static int property_get_progress(
return sd_bus_message_append(reply, "d", d);
}
+static int property_get_environment(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_strv_free_ char **l = NULL;
+ Manager *m = userdata;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(m);
+
+ r = manager_get_effective_environment(m, &l);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_append_strv(reply, l);
+}
+
static int property_get_show_status(
sd_bus *bus,
const char *path,
@@ -232,7 +257,7 @@ static int property_get_show_status(
assert(reply);
assert(m);
- b = m->show_status > 0;
+ b = IN_SET(m->show_status, SHOW_STATUS_TEMPORARY, SHOW_STATUS_YES);
return sd_bus_message_append_basic(reply, 'b', &b);
}
@@ -1298,9 +1323,9 @@ int verify_run_space_and_log(const char *message) {
r = verify_run_space(message, &error);
if (r < 0)
- log_error_errno(r, "%s", bus_error_message(&error, r));
+ return log_error_errno(r, "%s", bus_error_message(&error, r));
- return r;
+ return 0;
}
static int method_reload(sd_bus_message *message, void *userdata, sd_bus_error *error) {
@@ -1329,12 +1354,12 @@ static int method_reload(sd_bus_message *message, void *userdata, sd_bus_error *
* is finished. That way the caller knows when the reload
* finished. */
- assert(!m->queued_message);
- r = sd_bus_message_new_method_return(message, &m->queued_message);
+ assert(!m->pending_reload_message);
+ r = sd_bus_message_new_method_return(message, &m->pending_reload_message);
if (r < 0)
return r;
- m->exit_code = MANAGER_RELOAD;
+ m->objective = MANAGER_RELOAD;
return 1;
}
@@ -1363,7 +1388,7 @@ static int method_reexecute(sd_bus_message *message, void *userdata, sd_bus_erro
/* We don't send a reply back here, the client should
* just wait for us disconnecting. */
- m->exit_code = MANAGER_REEXECUTE;
+ m->objective = MANAGER_REEXECUTE;
return 1;
}
@@ -1383,7 +1408,7 @@ static int method_exit(sd_bus_message *message, void *userdata, sd_bus_error *er
* systemd-shutdown if it cannot do the exit() because it isn't a
* container. */
- m->exit_code = MANAGER_EXIT;
+ m->objective = MANAGER_EXIT;
return sd_bus_reply_method_return(message, NULL);
}
@@ -1402,7 +1427,7 @@ static int method_reboot(sd_bus_message *message, void *userdata, sd_bus_error *
if (!MANAGER_IS_SYSTEM(m))
return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Reboot is only supported for system managers.");
- m->exit_code = MANAGER_REBOOT;
+ m->objective = MANAGER_REBOOT;
return sd_bus_reply_method_return(message, NULL);
}
@@ -1421,7 +1446,7 @@ static int method_poweroff(sd_bus_message *message, void *userdata, sd_bus_error
if (!MANAGER_IS_SYSTEM(m))
return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Powering off is only supported for system managers.");
- m->exit_code = MANAGER_POWEROFF;
+ m->objective = MANAGER_POWEROFF;
return sd_bus_reply_method_return(message, NULL);
}
@@ -1440,7 +1465,7 @@ static int method_halt(sd_bus_message *message, void *userdata, sd_bus_error *er
if (!MANAGER_IS_SYSTEM(m))
return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Halt is only supported for system managers.");
- m->exit_code = MANAGER_HALT;
+ m->objective = MANAGER_HALT;
return sd_bus_reply_method_return(message, NULL);
}
@@ -1459,7 +1484,7 @@ static int method_kexec(sd_bus_message *message, void *userdata, sd_bus_error *e
if (!MANAGER_IS_SYSTEM(m))
return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "KExec is only supported for system managers.");
- m->exit_code = MANAGER_KEXEC;
+ m->objective = MANAGER_KEXEC;
return sd_bus_reply_method_return(message, NULL);
}
@@ -1549,7 +1574,7 @@ static int method_switch_root(sd_bus_message *message, void *userdata, sd_bus_er
free(m->switch_root_init);
m->switch_root_init = ri;
- m->exit_code = MANAGER_SWITCH_ROOT;
+ m->objective = MANAGER_SWITCH_ROOT;
return sd_bus_reply_method_return(message, NULL);
}
@@ -1578,7 +1603,7 @@ static int method_set_environment(sd_bus_message *message, void *userdata, sd_bu
if (r == 0)
return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
- r = manager_environment_add(m, NULL, plus);
+ r = manager_client_environment_modify(m, NULL, plus);
if (r < 0)
return r;
@@ -1610,7 +1635,7 @@ static int method_unset_environment(sd_bus_message *message, void *userdata, sd_
if (r == 0)
return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
- r = manager_environment_add(m, minus, NULL);
+ r = manager_client_environment_modify(m, minus, NULL);
if (r < 0)
return r;
@@ -1648,7 +1673,7 @@ static int method_unset_and_set_environment(sd_bus_message *message, void *userd
if (r == 0)
return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
- r = manager_environment_add(m, minus, plus);
+ r = manager_client_environment_modify(m, minus, plus);
if (r < 0)
return r;
@@ -2398,6 +2423,29 @@ static int method_get_job_waiting(sd_bus_message *message, void *userdata, sd_bu
return bus_job_method_get_waiting_jobs(message, j, error);
}
+static int method_abandon_scope(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = bus_get_unit_by_name(m, message, name, &u, error);
+ if (r < 0)
+ return r;
+
+ if (u->type != UNIT_SCOPE)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unit '%s' is not a scope unit, refusing.", name);
+
+ return bus_scope_method_abandon(message, u, error);
+}
+
const sd_bus_vtable bus_manager_vtable[] = {
SD_BUS_VTABLE_START(0),
@@ -2418,6 +2466,12 @@ const sd_bus_vtable bus_manager_vtable[] = {
BUS_PROPERTY_DUAL_TIMESTAMP("GeneratorsFinishTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_GENERATORS_FINISH]), SD_BUS_VTABLE_PROPERTY_CONST),
BUS_PROPERTY_DUAL_TIMESTAMP("UnitsLoadStartTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_UNITS_LOAD_START]), SD_BUS_VTABLE_PROPERTY_CONST),
BUS_PROPERTY_DUAL_TIMESTAMP("UnitsLoadFinishTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_UNITS_LOAD_FINISH]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("InitRDSecurityStartTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_INITRD_SECURITY_START]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("InitRDSecurityFinishTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_INITRD_SECURITY_FINISH]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("InitRDGeneratorsStartTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_INITRD_GENERATORS_START]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("InitRDGeneratorsFinishTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_INITRD_GENERATORS_FINISH]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("InitRDUnitsLoadStartTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_START]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("InitRDUnitsLoadFinishTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_FINISH]), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_WRITABLE_PROPERTY("LogLevel", "s", property_get_log_level, property_set_log_level, 0, 0),
SD_BUS_WRITABLE_PROPERTY("LogTarget", "s", property_get_log_target, property_set_log_target, 0, 0),
SD_BUS_PROPERTY("NNames", "u", property_get_hashmap_size, offsetof(Manager, units), 0),
@@ -2426,7 +2480,7 @@ const sd_bus_vtable bus_manager_vtable[] = {
SD_BUS_PROPERTY("NInstalledJobs", "u", bus_property_get_unsigned, offsetof(Manager, n_installed_jobs), 0),
SD_BUS_PROPERTY("NFailedJobs", "u", bus_property_get_unsigned, offsetof(Manager, n_failed_jobs), 0),
SD_BUS_PROPERTY("Progress", "d", property_get_progress, 0, 0),
- SD_BUS_PROPERTY("Environment", "as", NULL, offsetof(Manager, environment), 0),
+ SD_BUS_PROPERTY("Environment", "as", property_get_environment, 0, 0),
SD_BUS_PROPERTY("ConfirmSpawn", "b", bus_property_get_bool, offsetof(Manager, confirm_spawn), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ShowStatus", "b", property_get_show_status, 0, 0),
SD_BUS_PROPERTY("UnitPath", "as", NULL, offsetof(Manager, lookup_paths.search_path), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -2507,6 +2561,7 @@ const sd_bus_vtable bus_manager_vtable[] = {
SD_BUS_METHOD("StartTransientUnit", "ssa(sv)a(sa(sv))", "o", method_start_transient_unit, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("GetUnitProcesses", "s", "a(sus)", method_get_unit_processes, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("AttachProcessesToUnit", "ssau", NULL, method_attach_processes_to_unit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("AbandonScope", "s", NULL, method_abandon_scope, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("GetJob", "u", "o", method_get_job, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("GetJobAfter", "u", "a(usssoo)", method_get_job_waiting, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("GetJobBefore", "u", "a(usssoo)", method_get_job_waiting, SD_BUS_VTABLE_UNPRIVILEGED),
diff --git a/src/core/dbus-mount.c b/src/core/dbus-mount.c
index 3f98d3ecf0..b6d61627eb 100644
--- a/src/core/dbus-mount.c
+++ b/src/core/dbus-mount.c
@@ -145,7 +145,7 @@ int bus_mount_set_property(
int bus_mount_commit_properties(Unit *u) {
assert(u);
- unit_update_cgroup_members_masks(u);
+ unit_invalidate_cgroup_members_masks(u);
unit_realize_cgroup(u);
return 0;
diff --git a/src/core/dbus-scope.c b/src/core/dbus-scope.c
index 6725f62794..bb807df2e9 100644
--- a/src/core/dbus-scope.c
+++ b/src/core/dbus-scope.c
@@ -14,7 +14,7 @@
#include "selinux-access.h"
#include "unit.h"
-static int bus_scope_abandon(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+int bus_scope_method_abandon(sd_bus_message *message, void *userdata, sd_bus_error *error) {
Scope *s = userdata;
int r;
@@ -48,7 +48,7 @@ const sd_bus_vtable bus_scope_vtable[] = {
SD_BUS_PROPERTY("TimeoutStopUSec", "t", bus_property_get_usec, offsetof(Scope, timeout_stop_usec), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Scope, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
SD_BUS_SIGNAL("RequestStop", NULL, 0),
- SD_BUS_METHOD("Abandon", NULL, NULL, bus_scope_abandon, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Abandon", NULL, NULL, bus_scope_method_abandon, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_VTABLE_END
};
@@ -186,7 +186,7 @@ int bus_scope_set_property(
int bus_scope_commit_properties(Unit *u) {
assert(u);
- unit_update_cgroup_members_masks(u);
+ unit_invalidate_cgroup_members_masks(u);
unit_realize_cgroup(u);
return 0;
diff --git a/src/core/dbus-scope.h b/src/core/dbus-scope.h
index 7c080dbcf7..702f55898d 100644
--- a/src/core/dbus-scope.h
+++ b/src/core/dbus-scope.h
@@ -14,4 +14,6 @@ int bus_scope_commit_properties(Unit *u);
int bus_scope_send_request_stop(Scope *s);
+int bus_scope_method_abandon(sd_bus_message *message, void *userdata, sd_bus_error *error);
+
int bus_scope_track_controller(Scope *s);
diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c
index 1b4c98c7d2..ec61ea2772 100644
--- a/src/core/dbus-service.c
+++ b/src/core/dbus-service.c
@@ -105,7 +105,7 @@ const sd_bus_vtable bus_service_vtable[] = {
SD_BUS_PROPERTY("RuntimeMaxUSec", "t", bus_property_get_usec, offsetof(Service, runtime_max_usec), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("WatchdogUSec", "t", bus_property_get_usec, offsetof(Service, watchdog_usec), SD_BUS_VTABLE_PROPERTY_CONST),
BUS_PROPERTY_DUAL_TIMESTAMP("WatchdogTimestamp", offsetof(Service, watchdog_timestamp), 0),
- SD_BUS_PROPERTY("PermissionsStartOnly", "b", bus_property_get_bool, offsetof(Service, permissions_start_only), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PermissionsStartOnly", "b", bus_property_get_bool, offsetof(Service, permissions_start_only), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), /* 😷 deprecated */
SD_BUS_PROPERTY("RootDirectoryStartOnly", "b", bus_property_get_bool, offsetof(Service, root_directory_start_only), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RemainAfterExit", "b", bus_property_get_bool, offsetof(Service, remain_after_exit), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("GuessMainPID", "b", bus_property_get_bool, offsetof(Service, guess_main_pid), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -120,8 +120,8 @@ const sd_bus_vtable bus_service_vtable[] = {
SD_BUS_PROPERTY("StatusText", "s", NULL, offsetof(Service, status_text), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
SD_BUS_PROPERTY("StatusErrno", "i", bus_property_get_int, offsetof(Service, status_errno), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Service, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
- SD_BUS_PROPERTY("USBFunctionDescriptors", "s", NULL, offsetof(Service, usb_function_descriptors), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
- SD_BUS_PROPERTY("USBFunctionStrings", "s", NULL, offsetof(Service, usb_function_strings), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("USBFunctionDescriptors", "s", NULL, offsetof(Service, usb_function_descriptors), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("USBFunctionStrings", "s", NULL, offsetof(Service, usb_function_strings), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("UID", "u", bus_property_get_uid, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
SD_BUS_PROPERTY("GID", "u", bus_property_get_gid, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
SD_BUS_PROPERTY("NRestarts", "u", bus_property_get_unsigned, offsetof(Service, n_restarts), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
@@ -312,8 +312,40 @@ static int bus_service_set_transient_property(
if (streq(name, "NotifyAccess"))
return bus_set_transient_notify_access(u, name, &s->notify_access, message, flags, error);
- if (streq(name, "PIDFile"))
- return bus_set_transient_path(u, name, &s->pid_file, message, flags, error);
+ if (streq(name, "PIDFile")) {
+ _cleanup_free_ char *n = NULL;
+ const char *v, *e;
+
+ r = sd_bus_message_read(message, "s", &v);
+ if (r < 0)
+ return r;
+
+ n = path_make_absolute(v, u->manager->prefix[EXEC_DIRECTORY_RUNTIME]);
+ if (!n)
+ return -ENOMEM;
+
+ path_simplify(n, true);
+
+ if (!path_is_normalized(n))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "PIDFile= path '%s' is not valid", n);
+
+ e = path_startswith(n, "/var/run/");
+ if (e) {
+ char *z;
+
+ z = strjoin("/run/", e);
+ if (!z)
+ return log_oom();
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags))
+ log_unit_notice(u, "Transient unit's PIDFile= property references path below legacy directory /var/run, updating %s → %s; please update client accordingly.", n, z);
+
+ free_and_replace(s->pid_file, z);
+ } else
+ free_and_replace(s->pid_file, n);
+
+ return 1;
+ }
if (streq(name, "USBFunctionDescriptors"))
return bus_set_transient_path(u, name, &s->usb_function_descriptors, message, flags, error);
@@ -392,7 +424,7 @@ int bus_service_set_property(
int bus_service_commit_properties(Unit *u) {
assert(u);
- unit_update_cgroup_members_masks(u);
+ unit_invalidate_cgroup_members_masks(u);
unit_realize_cgroup(u);
return 0;
diff --git a/src/core/dbus-slice.c b/src/core/dbus-slice.c
index 722a5688a5..effd5fa5d7 100644
--- a/src/core/dbus-slice.c
+++ b/src/core/dbus-slice.c
@@ -28,7 +28,7 @@ int bus_slice_set_property(
int bus_slice_commit_properties(Unit *u) {
assert(u);
- unit_update_cgroup_members_masks(u);
+ unit_invalidate_cgroup_members_masks(u);
unit_realize_cgroup(u);
return 0;
diff --git a/src/core/dbus-socket.c b/src/core/dbus-socket.c
index 913cc74918..37cf9d204c 100644
--- a/src/core/dbus-socket.c
+++ b/src/core/dbus-socket.c
@@ -8,10 +8,10 @@
#include "dbus-socket.h"
#include "dbus-util.h"
#include "fd-util.h"
+#include "ip-protocol-list.h"
#include "parse-util.h"
#include "path-util.h"
#include "socket.h"
-#include "socket-protocol-list.h"
#include "socket-util.h"
#include "string-util.h"
#include "unit.h"
@@ -138,14 +138,14 @@ static inline bool check_size_t_truncation(uint64_t t) {
return (size_t) t == t;
}
-static inline const char* supported_socket_protocol_to_string(int32_t i) {
+static inline const char* socket_protocol_to_string(int32_t i) {
if (i == IPPROTO_IP)
return "";
if (!IN_SET(i, IPPROTO_UDPLITE, IPPROTO_SCTP))
return NULL;
- return socket_protocol_to_name(i);
+ return ip_protocol_to_name(i);
}
static BUS_DEFINE_SET_TRANSIENT(int, "i", int32_t, int, "%" PRIi32);
@@ -155,7 +155,7 @@ static BUS_DEFINE_SET_TRANSIENT_PARSE(bind_ipv6_only, SocketAddressBindIPv6Only,
static BUS_DEFINE_SET_TRANSIENT_STRING_WITH_CHECK(fdname, fdname_is_valid);
static BUS_DEFINE_SET_TRANSIENT_STRING_WITH_CHECK(ifname, ifname_valid);
static BUS_DEFINE_SET_TRANSIENT_TO_STRING_ALLOC(ip_tos, "i", int32_t, int, "%" PRIi32, ip_tos_to_string_alloc);
-static BUS_DEFINE_SET_TRANSIENT_TO_STRING(socket_protocol, "i", int32_t, int, "%" PRIi32, supported_socket_protocol_to_string);
+static BUS_DEFINE_SET_TRANSIENT_TO_STRING(socket_protocol, "i", int32_t, int, "%" PRIi32, socket_protocol_to_string);
static int bus_socket_set_transient_property(
Socket *s,
@@ -351,16 +351,27 @@ static int bus_socket_set_transient_property(
while ((r = sd_bus_message_read(message, "(ss)", &t, &a)) > 0) {
_cleanup_free_ SocketPort *p = NULL;
- p = new0(SocketPort, 1);
+ p = new(SocketPort, 1);
if (!p)
return log_oom();
+ *p = (SocketPort) {
+ .fd = -1,
+ .socket = s,
+ };
+
p->type = socket_port_type_from_string(t);
if (p->type < 0)
return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown Socket type: %s", t);
if (p->type != SOCKET_SOCKET) {
+ if (!path_is_valid(p->path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid socket path: %s", t);
+
p->path = strdup(a);
+ if (!p->path)
+ return log_oom();
+
path_simplify(p->path, false);
} else if (streq(t, "Netlink")) {
@@ -381,21 +392,10 @@ static int bus_socket_set_transient_property(
return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Address family not supported: %s", a);
}
- p->fd = -1;
- p->auxiliary_fds = NULL;
- p->n_auxiliary_fds = 0;
- p->socket = s;
-
empty = false;
if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
- SocketPort *tail;
-
- LIST_FIND_TAIL(port, s->ports, tail);
- LIST_INSERT_AFTER(port, s->ports, tail, p);
-
- p = NULL;
-
+ LIST_APPEND(port, s->ports, TAKE_PTR(p));
unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "Listen%s=%s", t, a);
}
}
@@ -461,7 +461,7 @@ int bus_socket_set_property(
int bus_socket_commit_properties(Unit *u) {
assert(u);
- unit_update_cgroup_members_masks(u);
+ unit_invalidate_cgroup_members_masks(u);
unit_realize_cgroup(u);
return 0;
diff --git a/src/core/dbus-swap.c b/src/core/dbus-swap.c
index b272d10113..353fa20132 100644
--- a/src/core/dbus-swap.c
+++ b/src/core/dbus-swap.c
@@ -63,7 +63,7 @@ int bus_swap_set_property(
int bus_swap_commit_properties(Unit *u) {
assert(u);
- unit_update_cgroup_members_masks(u);
+ unit_invalidate_cgroup_members_masks(u);
unit_realize_cgroup(u);
return 0;
diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c
index ae0410414e..968166ee60 100644
--- a/src/core/dbus-unit.c
+++ b/src/core/dbus-unit.c
@@ -434,7 +434,7 @@ int bus_unit_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *
u,
"kill",
CAP_KILL,
- N_("Authentication is required to kill '$(unit)'."),
+ N_("Authentication is required to send a UNIX signal to the processes of '$(unit)'."),
true,
message,
error);
@@ -561,6 +561,44 @@ int bus_unit_method_unref(sd_bus_message *message, void *userdata, sd_bus_error
return sd_bus_reply_method_return(message, NULL);
}
+static int property_get_refs(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Unit *u = userdata;
+ const char *i;
+ int r;
+
+ assert(bus);
+ assert(reply);
+
+ r = sd_bus_message_open_container(reply, 'a', "s");
+ if (r < 0)
+ return r;
+
+ for (i = sd_bus_track_first(u->bus_track); i; i = sd_bus_track_next(u->bus_track)) {
+ int c, k;
+
+ c = sd_bus_track_count_name(u->bus_track, i);
+ if (c < 0)
+ return c;
+
+ /* Add the item multiple times if the ref count for each is above 1 */
+ for (k = 0; k < c; k++) {
+ r = sd_bus_message_append(reply, "s", i);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
const sd_bus_vtable bus_unit_vtable[] = {
SD_BUS_VTABLE_START(0),
@@ -624,8 +662,8 @@ const sd_bus_vtable bus_unit_vtable[] = {
SD_BUS_PROPERTY("AssertResult", "b", bus_property_get_bool, offsetof(Unit, assert_result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
BUS_PROPERTY_DUAL_TIMESTAMP("ConditionTimestamp", offsetof(Unit, condition_timestamp), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
BUS_PROPERTY_DUAL_TIMESTAMP("AssertTimestamp", offsetof(Unit, assert_timestamp), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
- SD_BUS_PROPERTY("Conditions", "a(sbbsi)", property_get_conditions, offsetof(Unit, conditions), 0),
- SD_BUS_PROPERTY("Asserts", "a(sbbsi)", property_get_conditions, offsetof(Unit, asserts), 0),
+ SD_BUS_PROPERTY("Conditions", "a(sbbsi)", property_get_conditions, offsetof(Unit, conditions), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_PROPERTY("Asserts", "a(sbbsi)", property_get_conditions, offsetof(Unit, asserts), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
SD_BUS_PROPERTY("LoadError", "(ss)", property_get_load_error, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Transient", "b", bus_property_get_bool, offsetof(Unit, transient), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Perpetual", "b", bus_property_get_bool, offsetof(Unit, perpetual), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -633,10 +671,13 @@ const sd_bus_vtable bus_unit_vtable[] = {
SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("StartLimitAction", "s", property_get_emergency_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("FailureAction", "s", property_get_emergency_action, offsetof(Unit, failure_action), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("FailureActionExitStatus", "i", bus_property_get_int, offsetof(Unit, failure_action_exit_status), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SuccessAction", "s", property_get_emergency_action, offsetof(Unit, success_action), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SuccessActionExitStatus", "i", bus_property_get_int, offsetof(Unit, success_action_exit_status), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RebootArgument", "s", NULL, offsetof(Unit, reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST),
- SD_BUS_PROPERTY("InvocationID", "ay", bus_property_get_id128, offsetof(Unit, invocation_id), 0),
- SD_BUS_PROPERTY("CollectMode", "s", property_get_collect_mode, offsetof(Unit, collect_mode), 0),
+ SD_BUS_PROPERTY("InvocationID", "ay", bus_property_get_id128, offsetof(Unit, invocation_id), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("CollectMode", "s", property_get_collect_mode, offsetof(Unit, collect_mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Refs", "as", property_get_refs, 0, 0),
SD_BUS_METHOD("Start", "s", "o", method_start, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("Stop", "s", "o", method_stop, SD_BUS_VTABLE_UNPRIVILEGED),
@@ -1033,7 +1074,7 @@ int bus_unit_method_attach_processes(sd_bus_message *message, void *userdata, sd
if (r < 0)
return r;
- /* Let's validate security: if the sender is root, then all is OK. If the sender is is any other unit,
+ /* Let's validate security: if the sender is root, then all is OK. If the sender is any other unit,
* then the process' UID and the target unit's UID have to match the sender's UID */
if (sender_uid != 0 && sender_uid != getuid()) {
r = get_process_uid(pid, &process_uid);
@@ -1161,6 +1202,27 @@ void bus_unit_send_change_signal(Unit *u) {
u->sent_dbus_new_signal = true;
}
+void bus_unit_send_pending_change_signal(Unit *u, bool including_new) {
+
+ /* Sends out any pending change signals, but only if they really are pending. This call is used when we are
+ * about to change state in order to force out a PropertiesChanged signal beforehand if there was one pending
+ * so that clients can follow the full state transition */
+
+ if (!u->in_dbus_queue) /* If not enqueued, don't bother */
+ return;
+
+ if (!u->sent_dbus_new_signal && !including_new) /* If the unit was never announced, don't bother, it's fine if
+ * the unit appears in the new state right-away (except if the
+ * caller explicitly asked us to send it anyway) */
+ return;
+
+ if (MANAGER_IS_RELOADING(u->manager)) /* Don't generate unnecessary PropertiesChanged signals for the same unit
+ * when we are reloading. */
+ return;
+
+ bus_unit_send_change_signal(u);
+}
+
static int send_removed_signal(sd_bus *bus, void *userdata) {
_cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
_cleanup_free_ char *p = NULL;
@@ -1259,6 +1321,9 @@ int bus_unit_queue_job(
if (!path)
return -ENOMEM;
+ /* Before we send the method reply, force out the announcement JobNew for this job */
+ bus_job_send_pending_change_signal(j, true);
+
return sd_bus_reply_method_return(message, "o", path);
}
@@ -1299,8 +1364,75 @@ static int bus_unit_set_live_property(
return 0;
}
+static int bus_set_transient_emergency_action(
+ Unit *u,
+ const char *name,
+ EmergencyAction *p,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ const char *s;
+ EmergencyAction v;
+ int r;
+ bool system;
+
+ assert(p);
+
+ r = sd_bus_message_read(message, "s", &s);
+ if (r < 0)
+ return r;
+
+ system = MANAGER_IS_SYSTEM(u->manager);
+ r = parse_emergency_action(s, system, &v);
+ if (v < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ v == -EOPNOTSUPP ? "EmergencyAction setting invalid for manager type: %s"
+ : "Invalid %s setting: %s",
+ name, s);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ *p = v;
+ unit_write_settingf(u, flags, name,
+ "%s=%s", name, s);
+ }
+
+ return 1;
+}
+
+static int bus_set_transient_exit_status(
+ Unit *u,
+ const char *name,
+ int *p,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ int32_t k;
+ int r;
+
+ assert(p);
+
+ r = sd_bus_message_read(message, "i", &k);
+ if (r < 0)
+ return r;
+
+ if (k > 255)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Exit status must be in range 0…255 or negative.");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ *p = k < 0 ? -1 : k;
+
+ if (k < 0)
+ unit_write_settingf(u, flags, name, "%s=", name);
+ else
+ unit_write_settingf(u, flags, name, "%s=%i", name, k);
+ }
+
+ return 1;
+}
+
static BUS_DEFINE_SET_TRANSIENT_PARSE(collect_mode, CollectMode, collect_mode_from_string);
-static BUS_DEFINE_SET_TRANSIENT_PARSE(emergency_action, EmergencyAction, emergency_action_from_string);
static BUS_DEFINE_SET_TRANSIENT_PARSE(job_mode, JobMode, job_mode_from_string);
static int bus_set_transient_conditions(
@@ -1450,6 +1582,12 @@ static int bus_unit_set_transient_property(
if (streq(name, "SuccessAction"))
return bus_set_transient_emergency_action(u, name, &u->success_action, message, flags, error);
+ if (streq(name, "FailureActionExitStatus"))
+ return bus_set_transient_exit_status(u, name, &u->failure_action_exit_status, message, flags, error);
+
+ if (streq(name, "SuccessActionExitStatus"))
+ return bus_set_transient_exit_status(u, name, &u->success_action_exit_status, message, flags, error);
+
if (streq(name, "RebootArgument"))
return bus_set_transient_string(u, name, &u->reboot_arg, message, flags, error);
@@ -1572,7 +1710,7 @@ static int bus_unit_set_transient_property(
if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
_cleanup_free_ char *label = NULL;
- r = unit_add_dependency_by_name(u, d, other, NULL, true, UNIT_DEPENDENCY_FILE);
+ r = unit_add_dependency_by_name(u, d, other, true, UNIT_DEPENDENCY_FILE);
if (r < 0)
return r;
@@ -1726,7 +1864,7 @@ int bus_unit_validate_load_state(Unit *u, sd_bus_error *error) {
return sd_bus_error_setf(error, BUS_ERROR_BAD_UNIT_SETTING, "Unit %s has a bad unit file setting.", u->id);
case UNIT_ERROR: /* Only show .load_error in UNIT_ERROR state */
- return sd_bus_error_set_errnof(error, u->load_error, "Unit %s failed to loaded properly: %m.", u->id);
+ return sd_bus_error_set_errnof(error, u->load_error, "Unit %s failed to load properly: %m.", u->id);
case UNIT_MASKED:
return sd_bus_error_setf(error, BUS_ERROR_UNIT_MASKED, "Unit %s is masked.", u->id);
@@ -1746,7 +1884,13 @@ static int bus_unit_track_handler(sd_bus_track *t, void *userdata) {
u->bus_track = sd_bus_track_unref(u->bus_track); /* make sure we aren't called again */
+ /* If the client that tracks us disappeared, then there's reason to believe that the cgroup is empty now too,
+ * let's see */
+ unit_add_to_cgroup_empty_queue(u);
+
+ /* Also add the unit to the GC queue, after all if the client left it might be time to GC this unit */
unit_add_to_gc_queue(u);
+
return 0;
}
diff --git a/src/core/dbus-unit.h b/src/core/dbus-unit.h
index 68eb621836..345345e3eb 100644
--- a/src/core/dbus-unit.h
+++ b/src/core/dbus-unit.h
@@ -11,6 +11,7 @@ extern const sd_bus_vtable bus_unit_vtable[];
extern const sd_bus_vtable bus_unit_cgroup_vtable[];
void bus_unit_send_change_signal(Unit *u);
+void bus_unit_send_pending_change_signal(Unit *u, bool including_new);
void bus_unit_send_removed_signal(Unit *u);
int bus_unit_method_start_generic(sd_bus_message *message, Unit *u, JobType job_type, bool reload_if_possible, sd_bus_error *error);
diff --git a/src/core/dbus.c b/src/core/dbus.c
index bf5917696e..5908ad792a 100644
--- a/src/core/dbus.c
+++ b/src/core/dbus.c
@@ -36,6 +36,7 @@
#include "mkdir.h"
#include "process-util.h"
#include "selinux-access.h"
+#include "serialize.h"
#include "service.h"
#include "special.h"
#include "string-util.h"
@@ -47,23 +48,22 @@
static void destroy_bus(Manager *m, sd_bus **bus);
-int bus_send_queued_message(Manager *m) {
+int bus_send_pending_reload_message(Manager *m) {
int r;
assert(m);
- if (!m->queued_message)
+ if (!m->pending_reload_message)
return 0;
- /* If we cannot get rid of this message we won't dispatch any
- * D-Bus messages, so that we won't end up wanting to queue
- * another message. */
+ /* If we cannot get rid of this message we won't dispatch any D-Bus messages, so that we won't end up wanting
+ * to queue another message. */
- r = sd_bus_send(NULL, m->queued_message, NULL);
+ r = sd_bus_send(NULL, m->pending_reload_message, NULL);
if (r < 0)
- log_warning_errno(r, "Failed to send queued message: %m");
+ log_warning_errno(r, "Failed to send queued message, ignoring: %m");
- m->queued_message = sd_bus_message_unref(m->queued_message);
+ m->pending_reload_message = sd_bus_message_unref(m->pending_reload_message);
return 0;
}
@@ -974,12 +974,9 @@ int bus_init_system(Manager *m) {
int bus_init_private(Manager *m) {
_cleanup_close_ int fd = -1;
- union sockaddr_union sa = {
- .un.sun_family = AF_UNIX
- };
+ union sockaddr_union sa = {};
sd_event_source *s;
- socklen_t salen;
- int r;
+ int r, salen;
assert(m);
@@ -992,27 +989,23 @@ int bus_init_private(Manager *m) {
if (getpid_cached() != 1)
return 0;
- strcpy(sa.un.sun_path, "/run/systemd/private");
- salen = SOCKADDR_UN_LEN(sa.un);
+ salen = sockaddr_un_set_path(&sa.un, "/run/systemd/private");
} else {
- size_t left = sizeof(sa.un.sun_path);
- char *p = sa.un.sun_path;
- const char *e;
+ const char *e, *joined;
e = secure_getenv("XDG_RUNTIME_DIR");
- if (!e) {
- log_error("Failed to determine XDG_RUNTIME_DIR");
- return -EHOSTDOWN;
- }
-
- left = strpcpy(&p, left, e);
- left = strpcpy(&p, left, "/systemd/private");
+ if (!e)
+ return log_error_errno(SYNTHETIC_ERRNO(EHOSTDOWN),
+ "XDG_RUNTIME_DIR is not set, refusing.");
- salen = sizeof(sa.un) - left;
+ joined = strjoina(e, "/systemd/private");
+ salen = sockaddr_un_set_path(&sa.un, joined);
}
+ if (salen < 0)
+ return log_error_errno(salen, "Can't set path for AF_UNIX socket to bind to: %m");
(void) mkdir_parents_label(sa.un.sun_path, 0755);
- (void) unlink(sa.un.sun_path);
+ (void) sockaddr_un_unlink(&sa.un);
fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
if (fd < 0)
@@ -1035,9 +1028,8 @@ int bus_init_private(Manager *m) {
(void) sd_event_source_set_description(s, "bus-connection");
- m->private_listen_fd = fd;
+ m->private_listen_fd = TAKE_FD(fd);
m->private_listen_event_source = s;
- fd = -1;
log_debug("Successfully created private D-Bus server.");
@@ -1079,8 +1071,8 @@ static void destroy_bus(Manager *m, sd_bus **bus) {
u->bus_track = sd_bus_track_unref(u->bus_track);
/* Get rid of queued message on this bus */
- if (m->queued_message && sd_bus_message_get_bus(m->queued_message) == *bus)
- m->queued_message = sd_bus_message_unref(m->queued_message);
+ if (m->pending_reload_message && sd_bus_message_get_bus(m->pending_reload_message) == *bus)
+ m->pending_reload_message = sd_bus_message_unref(m->pending_reload_message);
/* Possibly flush unwritten data, but only if we are
* unprivileged, since we don't want to sync here */
@@ -1211,13 +1203,8 @@ void bus_track_serialize(sd_bus_track *t, FILE *f, const char *prefix) {
int c, j;
c = sd_bus_track_count_name(t, n);
-
- for (j = 0; j < c; j++) {
- fputs(prefix, f);
- fputc('=', f);
- fputs(n, f);
- fputc('\n', f);
- }
+ for (j = 0; j < c; j++)
+ (void) serialize_item(f, prefix, n);
}
}
diff --git a/src/core/dbus.h b/src/core/dbus.h
index 382a96da7d..f1c0fa86c0 100644
--- a/src/core/dbus.h
+++ b/src/core/dbus.h
@@ -5,7 +5,7 @@
#include "manager.h"
-int bus_send_queued_message(Manager *m);
+int bus_send_pending_reload_message(Manager *m);
int bus_init_private(Manager *m);
int bus_init_api(Manager *m);
diff --git a/src/core/device.c b/src/core/device.c
index a2d00a0fbe..960f403718 100644
--- a/src/core/device.c
+++ b/src/core/device.c
@@ -3,19 +3,20 @@
#include <errno.h>
#include <sys/epoll.h>
-#include "libudev.h"
-
#include "alloc-util.h"
#include "bus-error.h"
#include "dbus-device.h"
+#include "dbus-unit.h"
+#include "device-private.h"
+#include "device-util.h"
#include "device.h"
#include "log.h"
#include "parse-util.h"
#include "path-util.h"
+#include "serialize.h"
#include "stat-util.h"
#include "string-util.h"
#include "swap.h"
-#include "udev-util.h"
#include "unit-name.h"
#include "unit.h"
@@ -25,7 +26,7 @@ static const UnitActiveState state_translation_table[_DEVICE_STATE_MAX] = {
[DEVICE_PLUGGED] = UNIT_ACTIVE,
};
-static int device_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+static int device_dispatch_io(sd_device_monitor *monitor, sd_device *dev, void *userdata);
static void device_update_found_one(Device *d, DeviceFound found, DeviceFound mask);
static void device_unset_sysfs(Device *d) {
@@ -111,10 +112,31 @@ static void device_done(Unit *u) {
d->wants_property = strv_free(d->wants_property);
}
+static int device_load(Unit *u) {
+ int r;
+
+ r = unit_load_fragment_and_dropin_optional(u);
+ if (r < 0)
+ return r;
+
+ if (!u->description) {
+ /* Generate a description based on the path, to be used until the
+ device is initialized properly */
+ r = unit_name_to_path(u->id, &u->description);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to unescape name: %m");
+ }
+
+ return 0;
+}
+
static void device_set_state(Device *d, DeviceState state) {
DeviceState old_state;
assert(d);
+ if (d->state != state)
+ bus_unit_send_pending_change_signal(UNIT(d), false);
+
old_state = d->state;
d->state = state;
@@ -226,10 +248,10 @@ static int device_serialize(Unit *u, FILE *f, FDSet *fds) {
assert(f);
assert(fds);
- unit_serialize_item(u, f, "state", device_state_to_string(d->state));
+ (void) serialize_item(f, "state", device_state_to_string(d->state));
if (device_found_to_string_many(d->found, &s) >= 0)
- unit_serialize_item(u, f, "found", s);
+ (void) serialize_item(f, "found", s);
return 0;
}
@@ -255,7 +277,7 @@ static int device_deserialize_item(Unit *u, const char *key, const char *value,
} else if (streq(key, "found")) {
r = device_found_from_string_many(value, &d->deserialized_found);
if (r < 0)
- log_unit_debug_errno(u, r, "Failed to parse found value, ignoring: %s", value);
+ log_unit_debug_errno(u, r, "Failed to parse found value '%s', ignoring: %m", value);
} else
log_unit_debug(u, "Unknown serialization key: %s", key);
@@ -300,47 +322,40 @@ _pure_ static const char *device_sub_state_to_string(Unit *u) {
return device_state_to_string(DEVICE(u)->state);
}
-static int device_update_description(Unit *u, struct udev_device *dev, const char *path) {
- const char *model;
+static int device_update_description(Unit *u, sd_device *dev, const char *path) {
+ _cleanup_free_ char *j = NULL;
+ const char *model, *label, *desc;
int r;
assert(u);
- assert(dev);
assert(path);
- model = udev_device_get_property_value(dev, "ID_MODEL_FROM_DATABASE");
- if (!model)
- model = udev_device_get_property_value(dev, "ID_MODEL");
+ desc = path;
- if (model) {
- const char *label;
+ if (dev &&
+ (sd_device_get_property_value(dev, "ID_MODEL_FROM_DATABASE", &model) >= 0 ||
+ sd_device_get_property_value(dev, "ID_MODEL", &model) >= 0)) {
+ desc = model;
/* Try to concatenate the device model string with a label, if there is one */
- label = udev_device_get_property_value(dev, "ID_FS_LABEL");
- if (!label)
- label = udev_device_get_property_value(dev, "ID_PART_ENTRY_NAME");
- if (!label)
- label = udev_device_get_property_value(dev, "ID_PART_ENTRY_NUMBER");
+ if (sd_device_get_property_value(dev, "ID_FS_LABEL", &label) >= 0 ||
+ sd_device_get_property_value(dev, "ID_PART_ENTRY_NAME", &label) >= 0 ||
+ sd_device_get_property_value(dev, "ID_PART_ENTRY_NUMBER", &label) >= 0) {
- if (label) {
- _cleanup_free_ char *j;
-
- j = strjoin(model, " ", label);
+ desc = j = strjoin(model, " ", label);
if (!j)
return log_oom();
+ }
+ }
- r = unit_set_description(u, j);
- } else
- r = unit_set_description(u, model);
- } else
- r = unit_set_description(u, path);
+ r = unit_set_description(u, desc);
if (r < 0)
return log_unit_error_errno(u, r, "Failed to set device description: %m");
return 0;
}
-static int device_add_udev_wants(Unit *u, struct udev_device *dev) {
+static int device_add_udev_wants(Unit *u, sd_device *dev) {
_cleanup_strv_free_ char **added = NULL;
const char *wants, *property;
Device *d = DEVICE(u);
@@ -351,8 +366,8 @@ static int device_add_udev_wants(Unit *u, struct udev_device *dev) {
property = MANAGER_IS_USER(u->manager) ? "SYSTEMD_USER_WANTS" : "SYSTEMD_WANTS";
- wants = udev_device_get_property_value(dev, property);
- if (!wants)
+ r = sd_device_get_property_value(dev, property, &wants);
+ if (r < 0)
return 0;
for (;;) {
@@ -387,7 +402,7 @@ static int device_add_udev_wants(Unit *u, struct udev_device *dev) {
return log_unit_error_errno(u, r, "Failed to mangle unit name \"%s\": %m", word);
}
- r = unit_add_dependency_by_name(u, UNIT_WANTS, k, NULL, true, UNIT_DEPENDENCY_UDEV);
+ r = unit_add_dependency_by_name(u, UNIT_WANTS, k, true, UNIT_DEPENDENCY_UDEV);
if (r < 0)
return log_unit_error_errno(u, r, "Failed to add Wants= dependency: %m");
@@ -429,18 +444,17 @@ static int device_add_udev_wants(Unit *u, struct udev_device *dev) {
return 0;
}
-static bool device_is_bound_by_mounts(Device *d, struct udev_device *dev) {
+static bool device_is_bound_by_mounts(Device *d, sd_device *dev) {
const char *bound_by;
int r;
assert(d);
assert(dev);
- bound_by = udev_device_get_property_value(dev, "SYSTEMD_MOUNT_DEVICE_BOUND");
- if (bound_by) {
+ if (sd_device_get_property_value(dev, "SYSTEMD_MOUNT_DEVICE_BOUND", &bound_by) >= 0) {
r = parse_boolean(bound_by);
if (r < 0)
- log_warning_errno(r, "Failed to parse SYSTEMD_MOUNT_DEVICE_BOUND='%s' udev property of %s, ignoring: %m", bound_by, strna(d->sysfs));
+ log_device_warning_errno(dev, r, "Failed to parse SYSTEMD_MOUNT_DEVICE_BOUND='%s' udev property, ignoring: %m", bound_by);
d->bind_mounts = r > 0;
} else
@@ -467,7 +481,7 @@ static void device_upgrade_mount_deps(Unit *u) {
}
}
-static int device_setup_unit(Manager *m, struct udev_device *dev, const char *path, bool main) {
+static int device_setup_unit(Manager *m, sd_device *dev, const char *path, bool main) {
_cleanup_free_ char *e = NULL;
const char *sysfs = NULL;
Unit *u = NULL;
@@ -478,16 +492,16 @@ static int device_setup_unit(Manager *m, struct udev_device *dev, const char *pa
assert(path);
if (dev) {
- sysfs = udev_device_get_syspath(dev);
- if (!sysfs) {
- log_debug("Couldn't get syspath from udev device, ignoring.");
+ r = sd_device_get_syspath(dev, &sysfs);
+ if (r < 0) {
+ log_device_debug_errno(dev, r, "Couldn't get syspath from device, ignoring: %m");
return 0;
}
}
r = unit_name_from_path(path, ".device", &e);
if (r < 0)
- return log_error_errno(r, "Failed to generate unit name from device path: %m");
+ return log_device_error_errno(dev, r, "Failed to generate unit name from device path: %m");
u = manager_get_unit(m, e);
if (u) {
@@ -518,7 +532,7 @@ static int device_setup_unit(Manager *m, struct udev_device *dev, const char *pa
r = unit_new_for_name(m, sizeof(Device), e, &u);
if (r < 0) {
- log_error_errno(r, "Failed to allocate device unit %s: %m", e);
+ log_device_error_errno(dev, r, "Failed to allocate device unit %s: %m", e);
goto fail;
}
@@ -530,17 +544,17 @@ static int device_setup_unit(Manager *m, struct udev_device *dev, const char *pa
if (sysfs) {
r = device_set_sysfs(DEVICE(u), sysfs);
if (r < 0) {
- log_error_errno(r, "Failed to set sysfs path %s for device unit %s: %m", sysfs, e);
+ log_unit_error_errno(u, r, "Failed to set sysfs path %s: %m", sysfs);
goto fail;
}
- (void) device_update_description(u, dev, path);
-
/* The additional systemd udev properties we only interpret for the main object */
if (main)
(void) device_add_udev_wants(u, dev);
}
+ (void) device_update_description(u, dev, path);
+
/* So the user wants the mount units to be bound to the device but a mount unit might has been seen by systemd
* before the device appears on its radar. In this case the device unit is partially initialized and includes
* the deps on the mount unit but at that time the "bind mounts" flag wasn't not present. Fix this up now. */
@@ -559,15 +573,14 @@ fail:
return r;
}
-static int device_process_new(Manager *m, struct udev_device *dev) {
+static int device_process_new(Manager *m, sd_device *dev) {
const char *sysfs, *dn, *alias;
- struct udev_list_entry *item = NULL, *first = NULL;
+ dev_t devnum;
int r;
assert(m);
- sysfs = udev_device_get_syspath(dev);
- if (!sysfs)
+ if (sd_device_get_syspath(dev, &sysfs) < 0)
return 0;
/* Add the main unit named after the sysfs path */
@@ -576,40 +589,39 @@ static int device_process_new(Manager *m, struct udev_device *dev) {
return r;
/* Add an additional unit for the device node */
- dn = udev_device_get_devnode(dev);
- if (dn)
+ if (sd_device_get_devname(dev, &dn) >= 0)
(void) device_setup_unit(m, dev, dn, false);
/* Add additional units for all symlinks */
- first = udev_device_get_devlinks_list_entry(dev);
- udev_list_entry_foreach(item, first) {
+ if (sd_device_get_devnum(dev, &devnum) >= 0) {
const char *p;
- struct stat st;
- /* Don't bother with the /dev/block links */
- p = udev_list_entry_get_name(item);
+ FOREACH_DEVICE_DEVLINK(dev, p) {
+ struct stat st;
- if (PATH_STARTSWITH_SET(p, "/dev/block/", "/dev/char/"))
- continue;
+ if (PATH_STARTSWITH_SET(p, "/dev/block/", "/dev/char/"))
+ continue;
- /* Verify that the symlink in the FS actually belongs
- * to this device. This is useful to deal with
- * conflicting devices, e.g. when two disks want the
- * same /dev/disk/by-label/xxx link because they have
- * the same label. We want to make sure that the same
- * device that won the symlink wins in systemd, so we
- * check the device node major/minor */
- if (stat(p, &st) >= 0)
- if ((!S_ISBLK(st.st_mode) && !S_ISCHR(st.st_mode)) ||
- st.st_rdev != udev_device_get_devnum(dev))
+ /* Verify that the symlink in the FS actually belongs
+ * to this device. This is useful to deal with
+ * conflicting devices, e.g. when two disks want the
+ * same /dev/disk/by-label/xxx link because they have
+ * the same label. We want to make sure that the same
+ * device that won the symlink wins in systemd, so we
+ * check the device node major/minor */
+ if (stat(p, &st) >= 0 &&
+ ((!S_ISBLK(st.st_mode) && !S_ISCHR(st.st_mode)) ||
+ st.st_rdev != devnum))
continue;
- (void) device_setup_unit(m, dev, p, false);
+ (void) device_setup_unit(m, dev, p, false);
+ }
}
- /* Add additional units for all explicitly configured
- * aliases */
- alias = udev_device_get_property_value(dev, "SYSTEMD_ALIAS");
+ /* Add additional units for all explicitly configured aliases */
+ if (sd_device_get_property_value(dev, "SYSTEMD_ALIAS", &alias) < 0)
+ return 0;
+
for (;;) {
_cleanup_free_ char *word = NULL;
@@ -619,12 +631,12 @@ static int device_process_new(Manager *m, struct udev_device *dev) {
if (r == -ENOMEM)
return log_oom();
if (r < 0)
- return log_warning_errno(r, "Failed to add parse SYSTEMD_ALIAS for %s: %m", sysfs);
+ return log_device_warning_errno(dev, r, "Failed to add parse SYSTEMD_ALIAS property: %m");
if (!path_is_absolute(word))
- log_warning("SYSTEMD_ALIAS for %s is not an absolute path, ignoring: %s", sysfs, word);
+ log_device_warning(dev, "SYSTEMD_ALIAS is not an absolute path, ignoring: %s", word);
else if (!path_is_normalized(word))
- log_warning("SYSTEMD_ALIAS for %s is not a normalized path, ignoring: %s", sysfs, word);
+ log_device_warning(dev, "SYSTEMD_ALIAS is not a normalized path, ignoring: %s", word);
else
(void) device_setup_unit(m, dev, word, false);
}
@@ -712,13 +724,12 @@ static int device_update_found_by_name(Manager *m, const char *path, DeviceFound
return 0;
}
-static bool device_is_ready(struct udev_device *dev) {
+static bool device_is_ready(sd_device *dev) {
const char *ready;
assert(dev);
- ready = udev_device_get_property_value(dev, "SYSTEMD_READY");
- if (!ready)
+ if (sd_device_get_property_value(dev, "SYSTEMD_READY", &ready) < 0)
return true;
return parse_boolean(ready) != 0;
@@ -734,11 +745,11 @@ static Unit *device_following(Unit *u) {
return NULL;
/* Make everybody follow the unit that's named after the sysfs path */
- for (other = d->same_sysfs_next; other; other = other->same_sysfs_next)
+ LIST_FOREACH_AFTER(same_sysfs, other, d)
if (startswith(UNIT(other)->id, "sys-"))
return UNIT(other);
- for (other = d->same_sysfs_prev; other; other = other->same_sysfs_prev) {
+ LIST_FOREACH_BEFORE(same_sysfs, other, d) {
if (startswith(UNIT(other)->id, "sys-"))
return UNIT(other);
@@ -784,98 +795,71 @@ static int device_following_set(Unit *u, Set **_set) {
static void device_shutdown(Manager *m) {
assert(m);
- m->udev_event_source = sd_event_source_unref(m->udev_event_source);
- m->udev_monitor = udev_monitor_unref(m->udev_monitor);
+ m->device_monitor = sd_device_monitor_unref(m->device_monitor);
m->devices_by_sysfs = hashmap_free(m->devices_by_sysfs);
}
static void device_enumerate(Manager *m) {
- _cleanup_(udev_enumerate_unrefp) struct udev_enumerate *e = NULL;
- struct udev_list_entry *item = NULL, *first = NULL;
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *dev;
int r;
assert(m);
- if (!m->udev_monitor) {
- m->udev_monitor = udev_monitor_new_from_netlink(m->udev, "udev");
- if (!m->udev_monitor) {
- log_error_errno(errno, "Failed to allocate udev monitor: %m");
+ if (!m->device_monitor) {
+ r = sd_device_monitor_new(&m->device_monitor);
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate device monitor: %m");
goto fail;
}
/* This will fail if we are unprivileged, but that
* should not matter much, as user instances won't run
* during boot. */
- (void) udev_monitor_set_receive_buffer_size(m->udev_monitor, 128*1024*1024);
+ (void) sd_device_monitor_set_receive_buffer_size(m->device_monitor, 128*1024*1024);
- r = udev_monitor_filter_add_match_tag(m->udev_monitor, "systemd");
+ r = sd_device_monitor_filter_add_match_tag(m->device_monitor, "systemd");
if (r < 0) {
log_error_errno(r, "Failed to add udev tag match: %m");
goto fail;
}
- r = udev_monitor_enable_receiving(m->udev_monitor);
+ r = sd_device_monitor_attach_event(m->device_monitor, m->event);
if (r < 0) {
- log_error_errno(r, "Failed to enable udev event reception: %m");
+ log_error_errno(r, "Failed to attach event to device monitor: %m");
goto fail;
}
- r = sd_event_add_io(m->event, &m->udev_event_source, udev_monitor_get_fd(m->udev_monitor), EPOLLIN, device_dispatch_io, m);
+ r = sd_device_monitor_start(m->device_monitor, device_dispatch_io, m);
if (r < 0) {
- log_error_errno(r, "Failed to watch udev file descriptor: %m");
+ log_error_errno(r, "Failed to start device monitor: %m");
goto fail;
}
-
- (void) sd_event_source_set_description(m->udev_event_source, "device");
- }
-
- e = udev_enumerate_new(m->udev);
- if (!e) {
- log_error_errno(errno, "Failed to alloacte udev enumerator: %m");
- goto fail;
- }
-
- r = udev_enumerate_add_match_tag(e, "systemd");
- if (r < 0) {
- log_error_errno(r, "Failed to create udev tag enumeration: %m");
- goto fail;
}
- r = udev_enumerate_add_match_is_initialized(e);
+ r = sd_device_enumerator_new(&e);
if (r < 0) {
- log_error_errno(r, "Failed to install initialization match into enumeration: %m");
+ log_error_errno(r, "Failed to allocate device enumerator: %m");
goto fail;
}
- r = udev_enumerate_scan_devices(e);
+ r = sd_device_enumerator_add_match_tag(e, "systemd");
if (r < 0) {
- log_error_errno(r, "Failed to enumerate devices: %m");
+ log_error_errno(r, "Failed to set tag for device enumeration: %m");
goto fail;
}
- first = udev_enumerate_get_list_entry(e);
- udev_list_entry_foreach(item, first) {
- _cleanup_(udev_device_unrefp) struct udev_device *dev = NULL;
+ FOREACH_DEVICE(e, dev) {
const char *sysfs;
- sysfs = udev_list_entry_get_name(item);
-
- dev = udev_device_new_from_syspath(m->udev, sysfs);
- if (!dev) {
- if (errno == ENOMEM) {
- log_oom();
- goto fail;
- }
-
- /* If we can't create a device, don't bother, it probably just disappeared. */
- log_debug_errno(errno, "Failed to create udev device object for %s: %m", sysfs);
- continue;
- }
-
if (!device_is_ready(dev))
continue;
(void) device_process_new(m, dev);
+
+ if (sd_device_get_syspath(dev, &sysfs) < 0)
+ continue;
+
device_update_found_by_sysfs(m, sysfs, DEVICE_FOUND_UDEV, DEVICE_FOUND_UDEV);
}
@@ -903,40 +887,23 @@ static void device_propagate_reload_by_sysfs(Manager *m, const char *sysfs) {
}
}
-static int device_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
- _cleanup_(udev_device_unrefp) struct udev_device *dev = NULL;
+static int device_dispatch_io(sd_device_monitor *monitor, sd_device *dev, void *userdata) {
Manager *m = userdata;
const char *action, *sysfs;
int r;
assert(m);
+ assert(dev);
- if (revents != EPOLLIN) {
- static RATELIMIT_DEFINE(limit, 10*USEC_PER_SEC, 5);
-
- if (ratelimit_below(&limit))
- log_warning("Failed to get udev event");
- if (!(revents & EPOLLIN))
- return 0;
- }
-
- /*
- * libudev might filter-out devices which pass the bloom
- * filter, so getting NULL here is not necessarily an error.
- */
- dev = udev_monitor_receive_device(m->udev_monitor);
- if (!dev)
- return 0;
-
- sysfs = udev_device_get_syspath(dev);
- if (!sysfs) {
- log_error("Failed to get udev sys path.");
+ r = sd_device_get_syspath(dev, &sysfs);
+ if (r < 0) {
+ log_device_error_errno(dev, r, "Failed to get device sys path: %m");
return 0;
}
- action = udev_device_get_action(dev);
- if (!action) {
- log_error("Failed to get udev action string.");
+ r = sd_device_get_property_value(dev, "ACTION", &action);
+ if (r < 0) {
+ log_device_error_errno(dev, r, "Failed to get udev action string: %m");
return 0;
}
@@ -949,7 +916,7 @@ static int device_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
if (streq(action, "remove")) {
r = swap_process_device_remove(m, dev);
if (r < 0)
- log_warning_errno(r, "Failed to process swap device remove event, ignoring: %m");
+ log_device_warning_errno(dev, r, "Failed to process swap device remove event, ignoring: %m");
/* If we get notified that a device was removed by
* udev, then it's completely gone, hence unset all
@@ -962,7 +929,7 @@ static int device_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
r = swap_process_device_new(m, dev);
if (r < 0)
- log_warning_errno(r, "Failed to process swap device new event, ignoring: %m");
+ log_device_warning_errno(dev, r, "Failed to process swap device new event, ignoring: %m");
manager_dispatch_load_queue(m);
@@ -992,7 +959,7 @@ static bool device_supported(void) {
return read_only <= 0;
}
-static int validate_node(Manager *m, const char *node, struct udev_device **ret) {
+static int validate_node(Manager *m, const char *node, sd_device **ret) {
struct stat st;
int r;
@@ -1016,9 +983,9 @@ static int validate_node(Manager *m, const char *node, struct udev_device **ret)
return 1; /* good! (though missing) */
} else {
- _cleanup_(udev_device_unrefp) struct udev_device *dev = NULL;
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
- r = udev_device_new_from_stat_rdev(m->udev, &st, &dev);
+ r = device_new_from_stat_rdev(&dev, &st);
if (r == -ENOENT) {
*ret = NULL;
return 1; /* good! (though missing) */
@@ -1054,7 +1021,7 @@ void device_found_node(Manager *m, const char *node, DeviceFound found, DeviceFo
* and unset individual bits in a single call, while merging partially with previous state. */
if ((found & mask) != 0) {
- _cleanup_(udev_device_unrefp) struct udev_device *dev = NULL;
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
/* If the device is known in the kernel and newly appeared, then we'll create a device unit for it,
* under the name referenced in /proc/swaps or /proc/self/mountinfo. But first, let's validate if
@@ -1092,7 +1059,7 @@ const UnitVTable device_vtable = {
.init = device_init,
.done = device_done,
- .load = unit_load_fragment_and_dropin_optional,
+ .load = device_load,
.coldplug = device_coldplug,
.catchup = device_catchup,
diff --git a/src/core/device.h b/src/core/device.h
index a119b33e57..3062be782d 100644
--- a/src/core/device.h
+++ b/src/core/device.h
@@ -11,9 +11,9 @@ typedef struct Device Device;
* in quick succession). Hence we need to track precisely where it is already visible and where not. */
typedef enum DeviceFound {
DEVICE_NOT_FOUND = 0,
- DEVICE_FOUND_UDEV = 1U << 1, /* The device has shown up in the udev database */
- DEVICE_FOUND_MOUNT = 1U << 2, /* The device has shown up in /proc/self/mountinfo */
- DEVICE_FOUND_SWAP = 1U << 3, /* The device has shown up in /proc/swaps */
+ DEVICE_FOUND_UDEV = 1 << 0, /* The device has shown up in the udev database */
+ DEVICE_FOUND_MOUNT = 1 << 1, /* The device has shown up in /proc/self/mountinfo */
+ DEVICE_FOUND_SWAP = 1 << 2, /* The device has shown up in /proc/swaps */
DEVICE_FOUND_MASK = DEVICE_FOUND_UDEV|DEVICE_FOUND_MOUNT|DEVICE_FOUND_SWAP,
} DeviceFound;
diff --git a/src/core/dynamic-user.c b/src/core/dynamic-user.c
index 7c5111ddf6..089461a18a 100644
--- a/src/core/dynamic-user.c
+++ b/src/core/dynamic-user.c
@@ -10,8 +10,10 @@
#include "fileio.h"
#include "fs-util.h"
#include "io-util.h"
+#include "nscd-flush.h"
#include "parse-util.h"
#include "random-util.h"
+#include "serialize.h"
#include "socket-util.h"
#include "stdio-util.h"
#include "string-util.h"
@@ -20,6 +22,8 @@
/* Takes a value generated randomly or by hashing and turns it into a UID in the right range */
#define UID_CLAMP_INTO_RANGE(rnd) (((uid_t) (rnd) % (DYNAMIC_UID_MAX - DYNAMIC_UID_MIN + 1)) + DYNAMIC_UID_MIN)
+DEFINE_PRIVATE_TRIVIAL_REF_FUNC(DynamicUser, dynamic_user);
+
static DynamicUser* dynamic_user_free(DynamicUser *d) {
if (!d)
return NULL;
@@ -32,7 +36,7 @@ static DynamicUser* dynamic_user_free(DynamicUser *d) {
}
static int dynamic_user_add(Manager *m, const char *name, int storage_socket[2], DynamicUser **ret) {
- DynamicUser *d = NULL;
+ DynamicUser *d;
int r;
assert(m);
@@ -102,9 +106,11 @@ static int dynamic_user_acquire(Manager *m, const char *name, DynamicUser** ret)
d = hashmap_get(m->dynamic_users, name);
if (d) {
- /* We already have a structure for the dynamic user, let's increase the ref count and reuse it */
- d->n_ref++;
- *ret = d;
+ if (ret) {
+ /* We already have a structure for the dynamic user, let's increase the ref count and reuse it */
+ d->n_ref++;
+ *ret = d;
+ }
return 0;
}
@@ -173,7 +179,7 @@ static int pick_uid(char **suggested_paths, const char *name, uid_t *ret_uid) {
*
* 1. Initially, we try to read the UID of a number of specified paths. If any of these UIDs works, we use
* them. We use in order to increase the chance of UID reuse, if StateDirectory=, CacheDirectory= or
- * LogDirectory= are used, as reusing the UID these directories are owned by saves us from having to
+ * LogsDirectory= are used, as reusing the UID these directories are owned by saves us from having to
* recursively chown() them to new users.
*
* 2. If that didn't yield a currently unused UID, we hash the user name, and try to use that. This should be
@@ -312,20 +318,8 @@ static int pick_uid(char **suggested_paths, const char *name, uid_t *ret_uid) {
static int dynamic_user_pop(DynamicUser *d, uid_t *ret_uid, int *ret_lock_fd) {
uid_t uid = UID_INVALID;
struct iovec iov = IOVEC_INIT(&uid, sizeof(uid));
- union {
- struct cmsghdr cmsghdr;
- uint8_t buf[CMSG_SPACE(sizeof(int))];
- } control = {};
- struct msghdr mh = {
- .msg_control = &control,
- .msg_controllen = sizeof(control),
- .msg_iov = &iov,
- .msg_iovlen = 1,
- };
- struct cmsghdr *cmsg;
-
+ int lock_fd;
ssize_t k;
- int lock_fd = -1;
assert(d);
assert(ret_uid);
@@ -334,15 +328,9 @@ static int dynamic_user_pop(DynamicUser *d, uid_t *ret_uid, int *ret_lock_fd) {
/* Read the UID and lock fd that is stored in the storage AF_UNIX socket. This should be called with the lock
* on the socket taken. */
- k = recvmsg(d->storage_socket[0], &mh, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
+ k = receive_one_fd_iov(d->storage_socket[0], &iov, 1, MSG_DONTWAIT, &lock_fd);
if (k < 0)
- return -errno;
-
- cmsg = cmsg_find(&mh, SOL_SOCKET, SCM_RIGHTS, CMSG_LEN(sizeof(int)));
- if (cmsg)
- lock_fd = *(int*) CMSG_DATA(cmsg);
- else
- cmsg_close_all(&mh); /* just in case... */
+ return (int) k;
*ret_uid = uid;
*ret_lock_fd = lock_fd;
@@ -352,42 +340,11 @@ static int dynamic_user_pop(DynamicUser *d, uid_t *ret_uid, int *ret_lock_fd) {
static int dynamic_user_push(DynamicUser *d, uid_t uid, int lock_fd) {
struct iovec iov = IOVEC_INIT(&uid, sizeof(uid));
- union {
- struct cmsghdr cmsghdr;
- uint8_t buf[CMSG_SPACE(sizeof(int))];
- } control = {};
- struct msghdr mh = {
- .msg_control = &control,
- .msg_controllen = sizeof(control),
- .msg_iov = &iov,
- .msg_iovlen = 1,
- };
- ssize_t k;
assert(d);
/* Store the UID and lock_fd in the storage socket. This should be called with the socket pair lock taken. */
-
- if (lock_fd >= 0) {
- struct cmsghdr *cmsg;
-
- cmsg = CMSG_FIRSTHDR(&mh);
- cmsg->cmsg_level = SOL_SOCKET;
- cmsg->cmsg_type = SCM_RIGHTS;
- cmsg->cmsg_len = CMSG_LEN(sizeof(int));
- memcpy(CMSG_DATA(cmsg), &lock_fd, sizeof(int));
-
- mh.msg_controllen = CMSG_SPACE(sizeof(int));
- } else {
- mh.msg_control = NULL;
- mh.msg_controllen = 0;
- }
-
- k = sendmsg(d->storage_socket[1], &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
- if (k < 0)
- return -errno;
-
- return 0;
+ return send_one_fd_iov(d->storage_socket[1], lock_fd, &iov, 1, MSG_DONTWAIT);
}
static void unlink_uid_lock(int lock_fd, uid_t uid, const char *name) {
@@ -427,6 +384,7 @@ static int dynamic_user_realize(
_cleanup_close_ int etc_passwd_lock_fd = -1;
uid_t num = UID_INVALID; /* a uid if is_user, and a gid otherwise */
gid_t gid = GID_INVALID; /* a gid if is_user, ignored otherwise */
+ bool flush_cache = false;
int r;
assert(d);
@@ -515,6 +473,7 @@ static int dynamic_user_realize(
}
/* Great! Nothing is stored here, still. Store our newly acquired data. */
+ flush_cache = true;
} else {
/* Hmm, so as it appears there's now something stored in the storage socket. Throw away what we
* acquired, and use what's stored now. */
@@ -525,6 +484,16 @@ static int dynamic_user_realize(
num = new_uid;
uid_lock_fd = new_uid_lock_fd;
}
+ } else if (is_user && !uid_is_dynamic(num)) {
+ struct passwd *p;
+
+ /* Statically allocated user may have different uid and gid. So, let's obtain the gid. */
+ errno = 0;
+ p = getpwuid(num);
+ if (!p)
+ return errno > 0 ? -errno : -ESRCH;
+
+ gid = p->pw_gid;
}
/* If the UID/GID was already allocated dynamically, push the data we popped out back in. If it was already
@@ -534,6 +503,14 @@ static int dynamic_user_realize(
if (r < 0)
return r;
+ if (flush_cache) {
+ /* If we allocated a new dynamic UID, refresh nscd, so that it forgets about potentially cached
+ * negative entries. But let's do so after we release the /etc/passwd lock, so that there's no
+ * potential for nscd wanting to lock that for completing the invalidation. */
+ etc_passwd_lock_fd = safe_close(etc_passwd_lock_fd);
+ (void) nscd_flush_cache(STRV_MAKE("passwd", "group"));
+ }
+
if (is_user) {
*ret_uid = num;
*ret_gid = gid != GID_INVALID ? gid : num;
@@ -570,16 +547,6 @@ int dynamic_user_current(DynamicUser *d, uid_t *ret) {
return 0;
}
-static DynamicUser* dynamic_user_ref(DynamicUser *d) {
- if (!d)
- return NULL;
-
- assert(d->n_ref > 0);
- d->n_ref++;
-
- return d;
-}
-
static DynamicUser* dynamic_user_unref(DynamicUser *d) {
if (!d)
return NULL;
@@ -616,6 +583,8 @@ static int dynamic_user_close(DynamicUser *d) {
/* This dynamic user was realized and dynamically allocated. In this case, let's remove the lock file. */
unlink_uid_lock(lock_fd, uid, d->name);
+
+ (void) nscd_flush_cache(STRV_MAKE("passwd", "group"));
return 1;
}
@@ -652,13 +621,13 @@ int dynamic_user_serialize(Manager *m, FILE *f, FDSet *fds) {
copy0 = fdset_put_dup(fds, d->storage_socket[0]);
if (copy0 < 0)
- return copy0;
+ return log_error_errno(copy0, "Failed to add dynamic user storage fd to serialization: %m");
copy1 = fdset_put_dup(fds, d->storage_socket[1]);
if (copy1 < 0)
- return copy1;
+ return log_error_errno(copy1, "Failed to add dynamic user storage fd to serialization: %m");
- fprintf(f, "dynamic-user=%s %i %i\n", d->name, copy0, copy1);
+ (void) serialize_item_format(f, "dynamic-user", "%s %i %i", d->name, copy0, copy1);
}
return 0;
diff --git a/src/core/dynamic-user.h b/src/core/dynamic-user.h
index 791a8ba0ef..112f91e63a 100644
--- a/src/core/dynamic-user.h
+++ b/src/core/dynamic-user.h
@@ -15,7 +15,7 @@ typedef struct DynamicCreds {
* used. This means, if you want to allocate a group and user pair, and they might have two different names, then you
* need to allocated two of these objects. DynamicCreds below makes that easy. */
struct DynamicUser {
- int n_ref;
+ unsigned n_ref;
Manager *manager;
/* An AF_UNIX socket pair that contains a datagram containing both the numeric ID assigned, as well as a lock
diff --git a/src/core/emergency-action.c b/src/core/emergency-action.c
index 76e1124cff..f98b0de792 100644
--- a/src/core/emergency-action.c
+++ b/src/core/emergency-action.c
@@ -1,7 +1,4 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
-/***
- Copyright © 2012 Michael Olbrich
-***/
#include <sys/reboot.h>
@@ -13,18 +10,22 @@
#include "special.h"
#include "string-table.h"
#include "terminal-util.h"
-
-static void log_and_status(Manager *m, const char *message, const char *reason) {
- log_warning("%s: %s", message, reason);
- manager_status_printf(m, STATUS_TYPE_EMERGENCY,
- ANSI_HIGHLIGHT_RED " !! " ANSI_NORMAL,
- "%s: %s", message, reason);
+#include "virt.h"
+
+static void log_and_status(Manager *m, bool warn, const char *message, const char *reason) {
+ log_full(warn ? LOG_WARNING : LOG_DEBUG, "%s: %s", message, reason);
+ if (warn)
+ manager_status_printf(m, STATUS_TYPE_EMERGENCY,
+ ANSI_HIGHLIGHT_RED " !! " ANSI_NORMAL,
+ "%s: %s", message, reason);
}
int emergency_action(
Manager *m,
EmergencyAction action,
+ EmergencyActionFlags options,
const char *reboot_arg,
+ int exit_status,
const char *reason) {
assert(m);
@@ -34,24 +35,17 @@ int emergency_action(
if (action == EMERGENCY_ACTION_NONE)
return -ECANCELED;
- if (!m->service_watchdogs) {
+ if (FLAGS_SET(options, EMERGENCY_ACTION_IS_WATCHDOG) && !m->service_watchdogs) {
log_warning("Watchdog disabled! Not acting on: %s", reason);
return -ECANCELED;
}
- if (!MANAGER_IS_SYSTEM(m)) {
- /* Downgrade all options to simply exiting if we run
- * in user mode */
-
- log_warning("Exiting: %s", reason);
- m->exit_code = MANAGER_EXIT;
- return -ECANCELED;
- }
+ bool warn = FLAGS_SET(options, EMERGENCY_ACTION_WARN);
switch (action) {
case EMERGENCY_ACTION_REBOOT:
- log_and_status(m, "Rebooting", reason);
+ log_and_status(m, warn, "Rebooting", reason);
(void) update_reboot_parameter_and_warn(reboot_arg);
(void) manager_add_job_by_name_and_warn(m, JOB_START, SPECIAL_REBOOT_TARGET, JOB_REPLACE_IRREVERSIBLY, NULL);
@@ -59,15 +53,15 @@ int emergency_action(
break;
case EMERGENCY_ACTION_REBOOT_FORCE:
- log_and_status(m, "Forcibly rebooting", reason);
+ log_and_status(m, warn, "Forcibly rebooting", reason);
(void) update_reboot_parameter_and_warn(reboot_arg);
- m->exit_code = MANAGER_REBOOT;
+ m->objective = MANAGER_REBOOT;
break;
case EMERGENCY_ACTION_REBOOT_IMMEDIATE:
- log_and_status(m, "Rebooting immediately", reason);
+ log_and_status(m, warn, "Rebooting immediately", reason);
sync();
@@ -81,18 +75,46 @@ int emergency_action(
(void) reboot(RB_AUTOBOOT);
break;
+ case EMERGENCY_ACTION_EXIT:
+
+ if (exit_status >= 0)
+ m->return_value = exit_status;
+
+ if (MANAGER_IS_USER(m) || detect_container() > 0) {
+ log_and_status(m, warn, "Exiting", reason);
+ (void) manager_add_job_by_name_and_warn(m, JOB_START, SPECIAL_EXIT_TARGET, JOB_REPLACE_IRREVERSIBLY, NULL);
+ break;
+ }
+
+ log_notice("Doing \"poweroff\" action instead of an \"exit\" emergency action.");
+ _fallthrough_;
+
case EMERGENCY_ACTION_POWEROFF:
- log_and_status(m, "Powering off", reason);
+ log_and_status(m, warn, "Powering off", reason);
(void) manager_add_job_by_name_and_warn(m, JOB_START, SPECIAL_POWEROFF_TARGET, JOB_REPLACE_IRREVERSIBLY, NULL);
break;
+ case EMERGENCY_ACTION_EXIT_FORCE:
+
+ if (exit_status >= 0)
+ m->return_value = exit_status;
+
+ if (MANAGER_IS_USER(m) || detect_container() > 0) {
+ log_and_status(m, warn, "Exiting immediately", reason);
+ m->objective = MANAGER_EXIT;
+ break;
+ }
+
+ log_notice("Doing \"poweroff-force\" action instead of an \"exit-force\" emergency action.");
+ _fallthrough_;
+
case EMERGENCY_ACTION_POWEROFF_FORCE:
- log_and_status(m, "Forcibly powering off", reason);
- m->exit_code = MANAGER_POWEROFF;
+ log_and_status(m, warn, "Forcibly powering off", reason);
+ m->objective = MANAGER_POWEROFF;
break;
case EMERGENCY_ACTION_POWEROFF_IMMEDIATE:
- log_and_status(m, "Powering off immediately", reason);
+ log_and_status(m, warn, "Powering off immediately", reason);
sync();
@@ -114,6 +136,26 @@ static const char* const emergency_action_table[_EMERGENCY_ACTION_MAX] = {
[EMERGENCY_ACTION_REBOOT_IMMEDIATE] = "reboot-immediate",
[EMERGENCY_ACTION_POWEROFF] = "poweroff",
[EMERGENCY_ACTION_POWEROFF_FORCE] = "poweroff-force",
- [EMERGENCY_ACTION_POWEROFF_IMMEDIATE] = "poweroff-immediate"
+ [EMERGENCY_ACTION_POWEROFF_IMMEDIATE] = "poweroff-immediate",
+ [EMERGENCY_ACTION_EXIT] = "exit",
+ [EMERGENCY_ACTION_EXIT_FORCE] = "exit-force",
};
DEFINE_STRING_TABLE_LOOKUP(emergency_action, EmergencyAction);
+
+int parse_emergency_action(
+ const char *value,
+ bool system,
+ EmergencyAction *ret) {
+
+ EmergencyAction x;
+
+ x = emergency_action_from_string(value);
+ if (x < 0)
+ return -EINVAL;
+
+ if (!system && x != EMERGENCY_ACTION_NONE && x < _EMERGENCY_ACTION_FIRST_USER_ACTION)
+ return -EOPNOTSUPP;
+
+ *ret = x;
+ return 0;
+}
diff --git a/src/core/emergency-action.h b/src/core/emergency-action.h
index 61791f176f..6e6c69ddfc 100644
--- a/src/core/emergency-action.h
+++ b/src/core/emergency-action.h
@@ -1,10 +1,6 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#pragma once
-/***
- Copyright © 2012 Michael Olbrich
-***/
-
typedef enum EmergencyAction {
EMERGENCY_ACTION_NONE,
EMERGENCY_ACTION_REBOOT,
@@ -13,14 +9,26 @@ typedef enum EmergencyAction {
EMERGENCY_ACTION_POWEROFF,
EMERGENCY_ACTION_POWEROFF_FORCE,
EMERGENCY_ACTION_POWEROFF_IMMEDIATE,
+ EMERGENCY_ACTION_EXIT,
+ _EMERGENCY_ACTION_FIRST_USER_ACTION = EMERGENCY_ACTION_EXIT,
+ EMERGENCY_ACTION_EXIT_FORCE,
_EMERGENCY_ACTION_MAX,
_EMERGENCY_ACTION_INVALID = -1
} EmergencyAction;
+typedef enum EmergencyActionFlags {
+ EMERGENCY_ACTION_IS_WATCHDOG = 1 << 0,
+ EMERGENCY_ACTION_WARN = 1 << 1,
+} EmergencyActionFlags;
+
#include "macro.h"
#include "manager.h"
-int emergency_action(Manager *m, EmergencyAction action, const char *reboot_arg, const char *reason);
+int emergency_action(Manager *m,
+ EmergencyAction action, EmergencyActionFlags options,
+ const char *reboot_arg, int exit_status, const char *reason);
const char* emergency_action_to_string(EmergencyAction i) _const_;
EmergencyAction emergency_action_from_string(const char *s) _pure_;
+
+int parse_emergency_action(const char *value, bool system, EmergencyAction *ret);
diff --git a/src/core/execute.c b/src/core/execute.c
index 8ac69d1a0f..595a3c6eca 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -50,12 +50,12 @@
#include "chown-recursive.h"
#include "cpu-set-util.h"
#include "def.h"
+#include "env-file.h"
#include "env-util.h"
#include "errno-list.h"
#include "execute.h"
#include "exit-status.h"
#include "fd-util.h"
-#include "fileio.h"
#include "format-util.h"
#include "fs-util.h"
#include "glob-util.h"
@@ -76,7 +76,6 @@
#if HAVE_SECCOMP
#include "seccomp-util.h"
#endif
-#include "securebits.h"
#include "securebits-util.h"
#include "selinux-util.h"
#include "signal-util.h"
@@ -89,6 +88,7 @@
#include "strv.h"
#include "syslog-util.h"
#include "terminal-util.h"
+#include "umask-util.h"
#include "unit.h"
#include "user-util.h"
#include "util.h"
@@ -147,11 +147,11 @@ static int shift_fds(int fds[], size_t n_fds) {
return 0;
}
-static int flags_fds(const int fds[], size_t n_storage_fds, size_t n_socket_fds, bool nonblock) {
+static int flags_fds(const int fds[], size_t n_socket_fds, size_t n_storage_fds, bool nonblock) {
size_t i, n_fds;
int r;
- n_fds = n_storage_fds + n_socket_fds;
+ n_fds = n_socket_fds + n_storage_fds;
if (n_fds <= 0)
return 0;
@@ -323,7 +323,8 @@ static int connect_logger_as(
uid_t uid,
gid_t gid) {
- int fd, r;
+ _cleanup_close_ int fd = -1;
+ int r;
assert(context);
assert(params);
@@ -339,14 +340,12 @@ static int connect_logger_as(
if (r < 0)
return r;
- if (shutdown(fd, SHUT_RD) < 0) {
- safe_close(fd);
+ if (shutdown(fd, SHUT_RD) < 0)
return -errno;
- }
(void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
- dprintf(fd,
+ if (dprintf(fd,
"%s\n"
"%s\n"
"%i\n"
@@ -360,10 +359,12 @@ static int connect_logger_as(
!!context->syslog_level_prefix,
is_syslog_output(output),
is_kmsg_output(output),
- is_terminal_output(output));
+ is_terminal_output(output)) < 0)
+ return -errno;
- return move_fd(fd, nfd, false);
+ return move_fd(TAKE_FD(fd), nfd, false);
}
+
static int open_terminal_as(const char *path, int flags, int nfd) {
int fd;
@@ -378,10 +379,9 @@ static int open_terminal_as(const char *path, int flags, int nfd) {
}
static int acquire_path(const char *path, int flags, mode_t mode) {
- union sockaddr_union sa = {
- .sa.sa_family = AF_UNIX,
- };
- int fd, r;
+ union sockaddr_union sa = {};
+ _cleanup_close_ int fd = -1;
+ int r, salen;
assert(path);
@@ -390,11 +390,11 @@ static int acquire_path(const char *path, int flags, mode_t mode) {
fd = open(path, flags|O_NOCTTY, mode);
if (fd >= 0)
- return fd;
+ return TAKE_FD(fd);
if (errno != ENXIO) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
return -errno;
- if (strlen(path) > sizeof(sa.un.sun_path)) /* Too long, can't be a UNIX socket */
+ if (strlen(path) >= sizeof(sa.un.sun_path)) /* Too long, can't be a UNIX socket */
return -ENXIO;
/* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
@@ -403,25 +403,24 @@ static int acquire_path(const char *path, int flags, mode_t mode) {
if (fd < 0)
return -errno;
- strncpy(sa.un.sun_path, path, sizeof(sa.un.sun_path));
- if (connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) {
- safe_close(fd);
+ salen = sockaddr_un_set_path(&sa.un, path);
+ if (salen < 0)
+ return salen;
+
+ if (connect(fd, &sa.sa, salen) < 0)
return errno == EINVAL ? -ENXIO : -errno; /* Propagate initial error if we get EINVAL, i.e. we have
* indication that his wasn't an AF_UNIX socket after all */
- }
if ((flags & O_ACCMODE) == O_RDONLY)
r = shutdown(fd, SHUT_WR);
else if ((flags & O_ACCMODE) == O_WRONLY)
r = shutdown(fd, SHUT_RD);
else
- return fd;
- if (r < 0) {
- safe_close(fd);
+ return TAKE_FD(fd);
+ if (r < 0)
return -errno;
- }
- return fd;
+ return TAKE_FD(fd);
}
static int fixup_input(
@@ -544,6 +543,30 @@ static int setup_input(
}
}
+static bool can_inherit_stderr_from_stdout(
+ const ExecContext *context,
+ ExecOutput o,
+ ExecOutput e) {
+
+ assert(context);
+
+ /* Returns true, if given the specified STDERR and STDOUT output we can directly dup() the stdout fd to the
+ * stderr fd */
+
+ if (e == EXEC_OUTPUT_INHERIT)
+ return true;
+ if (e != o)
+ return false;
+
+ if (e == EXEC_OUTPUT_NAMED_FD)
+ return streq_ptr(context->stdio_fdname[STDOUT_FILENO], context->stdio_fdname[STDERR_FILENO]);
+
+ if (IN_SET(e, EXEC_OUTPUT_FILE, EXEC_OUTPUT_FILE_APPEND))
+ return streq_ptr(context->stdio_file[STDOUT_FILENO], context->stdio_file[STDERR_FILENO]);
+
+ return true;
+}
+
static int setup_output(
const Unit *unit,
const ExecContext *context,
@@ -602,7 +625,7 @@ static int setup_output(
return fileno;
/* Duplicate from stdout if possible */
- if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT)
+ if (can_inherit_stderr_from_stdout(context, o, e))
return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
o = e;
@@ -675,9 +698,10 @@ static int setup_output(
(void) fd_nonblock(named_iofds[fileno], false);
return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
- case EXEC_OUTPUT_FILE: {
+ case EXEC_OUTPUT_FILE:
+ case EXEC_OUTPUT_FILE_APPEND: {
bool rw;
- int fd;
+ int fd, flags;
assert(context->stdio_file[fileno]);
@@ -687,11 +711,15 @@ static int setup_output(
if (rw)
return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
- fd = acquire_path(context->stdio_file[fileno], O_WRONLY, 0666 & ~context->umask);
+ flags = O_WRONLY;
+ if (o == EXEC_OUTPUT_FILE_APPEND)
+ flags |= O_APPEND;
+
+ fd = acquire_path(context->stdio_file[fileno], flags, 0666 & ~context->umask);
if (fd < 0)
return fd;
- return move_fd(fd, fileno, false);
+ return move_fd(fd, fileno, 0);
}
default:
@@ -914,7 +942,7 @@ static int get_fixed_user(const ExecContext *c, const char **user,
* (i.e. are "/" or "/bin/nologin"). */
name = c->user;
- r = get_user_creds_clean(&name, uid, gid, home, shell);
+ r = get_user_creds(&name, uid, gid, home, shell, USER_CREDS_CLEAN);
if (r < 0)
return r;
@@ -932,7 +960,7 @@ static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid)
return 0;
name = c->group;
- r = get_group_creds(&name, gid);
+ r = get_group_creds(&name, gid, 0);
if (r < 0)
return r;
@@ -1004,7 +1032,7 @@ static int get_supplementary_groups(const ExecContext *c, const char *user,
return -E2BIG;
g = *i;
- r = get_group_creds(&g, l_gids+k);
+ r = get_group_creds(&g, l_gids+k, 0);
if (r < 0)
return r;
@@ -1151,6 +1179,16 @@ static int setup_pam(
goto fail;
}
+ if (!tty) {
+ _cleanup_free_ char *q = NULL;
+
+ /* Hmm, so no TTY was explicitly passed, but an fd passed to us directly might be a TTY. Let's figure
+ * out if that's the case, and read the TTY off it. */
+
+ if (getttyname_malloc(STDIN_FILENO, &q) >= 0)
+ tty = strjoina("/dev/", q);
+ }
+
if (tty) {
pam_code = pam_set_item(handle, PAM_TTY, tty);
if (pam_code != PAM_SUCCESS)
@@ -1415,7 +1453,7 @@ static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_
return r;
}
- return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action);
+ return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action, false);
}
static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
@@ -1498,7 +1536,7 @@ static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
return 0;
- return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
+ return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM), false);
}
static int apply_private_devices(const Unit *u, const ExecContext *c) {
@@ -1513,7 +1551,7 @@ static int apply_private_devices(const Unit *u, const ExecContext *c) {
if (skip_seccomp_unavailable(u, "PrivateDevices="))
return 0;
- return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
+ return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM), false);
}
static int apply_restrict_namespaces(const Unit *u, const ExecContext *c) {
@@ -1585,6 +1623,8 @@ static void do_idle_pipe_dance(int idle_pipe[4]) {
idle_pipe[3] = safe_close(idle_pipe[3]);
}
+static const char *exec_directory_env_name_to_string(ExecDirectoryType t);
+
static int build_environment(
const Unit *u,
const ExecContext *c,
@@ -1598,14 +1638,16 @@ static int build_environment(
char ***ret) {
_cleanup_strv_free_ char **our_env = NULL;
+ ExecDirectoryType t;
size_t n_env = 0;
char *x;
assert(u);
assert(c);
+ assert(p);
assert(ret);
- our_env = new0(char*, 14);
+ our_env = new0(char*, 14 + _EXEC_DIRECTORY_TYPE_MAX);
if (!our_env)
return -ENOMEM;
@@ -1710,8 +1752,37 @@ static int build_environment(
our_env[n_env++] = x;
}
+ for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
+ _cleanup_free_ char *pre = NULL, *joined = NULL;
+ const char *n;
+
+ if (!p->prefix[t])
+ continue;
+
+ if (strv_isempty(c->directories[t].paths))
+ continue;
+
+ n = exec_directory_env_name_to_string(t);
+ if (!n)
+ continue;
+
+ pre = strjoin(p->prefix[t], "/");
+ if (!pre)
+ return -ENOMEM;
+
+ joined = strv_join_prefix(c->directories[t].paths, ":", pre);
+ if (!joined)
+ return -ENOMEM;
+
+ x = strjoin(n, "=", joined);
+ if (!x)
+ return -ENOMEM;
+
+ our_env[n_env++] = x;
+ }
+
our_env[n_env++] = NULL;
- assert(n_env <= 12);
+ assert(n_env <= 14 + _EXEC_DIRECTORY_TYPE_MAX);
*ret = TAKE_PTR(our_env);
@@ -2010,7 +2081,7 @@ static int setup_exec_directory(
if (context->dynamic_user &&
!IN_SET(type, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION)) {
- _cleanup_free_ char *private_root = NULL, *relative = NULL, *parent = NULL;
+ _cleanup_free_ char *private_root = NULL;
/* So, here's one extra complication when dealing with DynamicUser=1 units. In that case we
* want to avoid leaving a directory around fully accessible that is owned by a dynamic user
@@ -2075,18 +2146,8 @@ static int setup_exec_directory(
goto fail;
}
- parent = dirname_malloc(p);
- if (!parent) {
- r = -ENOMEM;
- goto fail;
- }
-
- r = path_make_relative(parent, pp, &relative);
- if (r < 0)
- goto fail;
-
/* And link it up from the original place */
- r = symlink_idempotent(relative, p);
+ r = symlink_idempotent(pp, p, true);
if (r < 0)
goto fail;
@@ -2379,11 +2440,24 @@ static int apply_mount_namespace(
bind_mount_free_many(bind_mounts, n_bind_mounts);
- /* If we couldn't set up the namespace this is probably due to a
- * missing capability. In this case, silently proceeed. */
- if (IN_SET(r, -EPERM, -EACCES)) {
- log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
- return 0;
+ /* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports
+ * that with a special, recognizable error ENOANO. In this case, silently proceeed, but only if exclusively
+ * sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a
+ * completely different execution environment. */
+ if (r == -ENOANO) {
+ if (n_bind_mounts == 0 &&
+ context->n_temporary_filesystems == 0 &&
+ !root_dir && !root_image &&
+ !context->dynamic_user) {
+ log_unit_debug(u, "Failed to set up namespace, assuming containerized execution and ignoring.");
+ return 0;
+ }
+
+ log_unit_debug(u, "Failed to set up namespace, and refusing to continue since the selected namespacing options alter mount environment non-trivially.\n"
+ "Bind mounts: %zu, temporary filesystems: %zu, root directory: %s, root image: %s, dynamic user: %s",
+ n_bind_mounts, context->n_temporary_filesystems, yes_no(root_dir), yes_no(root_image), yes_no(context->dynamic_user));
+
+ return -EOPNOTSUPP;
}
return r;
@@ -2456,9 +2530,6 @@ static int setup_keyring(
* on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
* UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
- if (!(p->flags & EXEC_NEW_KEYRING))
- return 0;
-
if (context->keyring_mode == EXEC_KEYRING_INHERIT)
return 0;
@@ -2566,6 +2637,7 @@ static int close_remaining_fds(
const DynamicCreds *dcreds,
int user_lookup_fd,
int socket_fd,
+ int exec_fd,
int *fds, size_t n_fds) {
size_t n_dont_close = 0;
@@ -2582,6 +2654,8 @@ static int close_remaining_fds(
if (socket_fd >= 0)
dont_close[n_dont_close++] = socket_fd;
+ if (exec_fd >= 0)
+ dont_close[n_dont_close++] = exec_fd;
if (n_fds > 0) {
memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
n_dont_close += n_fds;
@@ -2707,6 +2781,37 @@ static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p
static char *exec_command_line(char **argv);
+static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **ret) {
+ bool using_subcgroup;
+ char *p;
+
+ assert(params);
+ assert(ret);
+
+ if (!params->cgroup_path)
+ return -EINVAL;
+
+ /* If we are called for a unit where cgroup delegation is on, and the payload created its own populated
+ * subcgroup (which we expect it to do, after all it asked for delegation), then we cannot place the control
+ * processes started after the main unit's process in the unit's main cgroup because it is now an inner one,
+ * and inner cgroups may not contain processes. Hence, if delegation is on, and this is a control process,
+ * let's use ".control" as subcgroup instead. Note that we do so only for ExecStartPost=, ExecReload=,
+ * ExecStop=, ExecStopPost=, i.e. for the commands where the main process is already forked. For ExecStartPre=
+ * this is not necessary, the cgroup is still empty. We distinguish these cases with the EXEC_CONTROL_CGROUP
+ * flag, which is only passed for the former statements, not for the latter. */
+
+ using_subcgroup = FLAGS_SET(params->flags, EXEC_CONTROL_CGROUP|EXEC_CGROUP_DELEGATE|EXEC_IS_CONTROL);
+ if (using_subcgroup)
+ p = strjoin(params->cgroup_path, "/.control");
+ else
+ p = strdup(params->cgroup_path);
+ if (!p)
+ return -ENOMEM;
+
+ *ret = p;
+ return using_subcgroup;
+}
+
static int exec_child(
Unit *unit,
const ExecCommand *command,
@@ -2714,20 +2819,20 @@ static int exec_child(
const ExecParameters *params,
ExecRuntime *runtime,
DynamicCreds *dcreds,
- char **argv,
int socket_fd,
int named_iofds[3],
int *fds,
- size_t n_storage_fds,
size_t n_socket_fds,
+ size_t n_storage_fds,
char **files_env,
int user_lookup_fd,
int *exit_status) {
_cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
- _cleanup_free_ char *home_buffer = NULL;
+ int *fds_with_exec_fd, n_fds_with_exec_fd, r, ngids = 0, exec_fd = -1;
_cleanup_free_ gid_t *supplementary_gids = NULL;
const char *username = NULL, *groupname = NULL;
+ _cleanup_free_ char *home_buffer = NULL;
const char *home = NULL, *shell = NULL;
dev_t journal_stream_dev = 0;
ino_t journal_stream_ino = 0;
@@ -2747,7 +2852,6 @@ static int exec_child(
#endif
uid_t uid = UID_INVALID;
gid_t gid = GID_INVALID;
- int r, ngids = 0;
size_t n_fds;
ExecDirectoryType dt;
int secure_bits;
@@ -2791,8 +2895,8 @@ static int exec_child(
/* In case anything used libc syslog(), close this here, too */
closelog();
- n_fds = n_storage_fds + n_socket_fds;
- r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds);
+ n_fds = n_socket_fds + n_storage_fds;
+ r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, params->exec_fd, fds, n_fds);
if (r < 0) {
*exit_status = EXIT_FDS;
return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
@@ -2810,7 +2914,7 @@ static int exec_child(
const char *vc = params->confirm_spawn;
_cleanup_free_ char *cmdline = NULL;
- cmdline = exec_command_line(argv);
+ cmdline = exec_command_line(command->argv);
if (!cmdline) {
*exit_status = EXIT_MEMORY;
return log_oom();
@@ -2828,10 +2932,22 @@ static int exec_child(
}
}
+ /* We are about to invoke NSS and PAM modules. Let's tell them what we are doing here, maybe they care. This is
+ * used by nss-resolve to disable itself when we are about to start systemd-resolved, to avoid deadlocks. Note
+ * that these env vars do not survive the execve(), which means they really only apply to the PAM and NSS
+ * invocations themselves. Also note that while we'll only invoke NSS modules involved in user management they
+ * might internally call into other NSS modules that are involved in hostname resolution, we never know. */
+ if (setenv("SYSTEMD_ACTIVATION_UNIT", unit->id, true) != 0 ||
+ setenv("SYSTEMD_ACTIVATION_SCOPE", MANAGER_IS_SYSTEM(unit->manager) ? "system" : "user", true) != 0) {
+ *exit_status = EXIT_MEMORY;
+ return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
+ }
+
if (context->dynamic_user && dcreds) {
_cleanup_strv_free_ char **suggested_paths = NULL;
- /* Make sure we bypass our own NSS module for any NSS checks */
+ /* On top of that, make sure we bypass our own NSS module nss-systemd comprehensively for any NSS
+ * checks, if DynamicUser=1 is used, as we shouldn't create a feedback loop with ourselves here.*/
if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
*exit_status = EXIT_USER;
return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
@@ -2909,6 +3025,24 @@ static int exec_child(
if (socket_fd >= 0)
(void) fd_nonblock(socket_fd, false);
+ /* Journald will try to look-up our cgroup in order to populate _SYSTEMD_CGROUP and _SYSTEMD_UNIT fields.
+ * Hence we need to migrate to the target cgroup from init.scope before connecting to journald */
+ if (params->cgroup_path) {
+ _cleanup_free_ char *p = NULL;
+
+ r = exec_parameters_get_cgroup_path(params, &p);
+ if (r < 0) {
+ *exit_status = EXIT_CGROUP;
+ return log_unit_error_errno(unit, r, "Failed to acquire cgroup path: %m");
+ }
+
+ r = cg_attach_everywhere(params->cgroup_supported, p, 0, NULL, NULL);
+ if (r < 0) {
+ *exit_status = EXIT_CGROUP;
+ return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", p);
+ }
+ }
+
r = setup_input(context, params, socket_fd, named_iofds);
if (r < 0) {
*exit_status = EXIT_STDIN;
@@ -2927,14 +3061,6 @@ static int exec_child(
return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
}
- if (params->cgroup_path) {
- r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
- if (r < 0) {
- *exit_status = EXIT_CGROUP;
- return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", params->cgroup_path);
- }
- }
-
if (context->oom_score_adjust_set) {
/* When we can't make this change due to EPERM, then let's silently skip over it. User namespaces
* prohibit write access to this file, and we shouldn't trip up over that. */
@@ -3130,11 +3256,6 @@ static int exec_child(
}
}
- /* Apply just after mount namespace setup */
- r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
- if (r < 0)
- return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
-
/* Drop groups as early as possbile */
if (needs_setuid) {
r = enforce_groups(gid, supplementary_gids, ngids);
@@ -3165,18 +3286,59 @@ static int exec_child(
}
/* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
- * more aggressive this time since socket_fd and the netns fds we don't need anymore. The custom endpoint fd
- * was needed to upload the policy and can now be closed as well. */
- r = close_all_fds(fds, n_fds);
+ * more aggressive this time since socket_fd and the netns fds we don't need anymore. We do keep the exec_fd
+ * however if we have it as we want to keep it open until the final execve(). */
+
+ if (params->exec_fd >= 0) {
+ exec_fd = params->exec_fd;
+
+ if (exec_fd < 3 + (int) n_fds) {
+ int moved_fd;
+
+ /* Let's move the exec fd far up, so that it's outside of the fd range we want to pass to the
+ * process we are about to execute. */
+
+ moved_fd = fcntl(exec_fd, F_DUPFD_CLOEXEC, 3 + (int) n_fds);
+ if (moved_fd < 0) {
+ *exit_status = EXIT_FDS;
+ return log_unit_error_errno(unit, errno, "Couldn't move exec fd up: %m");
+ }
+
+ safe_close(exec_fd);
+ exec_fd = moved_fd;
+ } else {
+ /* This fd should be FD_CLOEXEC already, but let's make sure. */
+ r = fd_cloexec(exec_fd, true);
+ if (r < 0) {
+ *exit_status = EXIT_FDS;
+ return log_unit_error_errno(unit, r, "Failed to make exec fd FD_CLOEXEC: %m");
+ }
+ }
+
+ fds_with_exec_fd = newa(int, n_fds + 1);
+ memcpy_safe(fds_with_exec_fd, fds, n_fds * sizeof(int));
+ fds_with_exec_fd[n_fds] = exec_fd;
+ n_fds_with_exec_fd = n_fds + 1;
+ } else {
+ fds_with_exec_fd = fds;
+ n_fds_with_exec_fd = n_fds;
+ }
+
+ r = close_all_fds(fds_with_exec_fd, n_fds_with_exec_fd);
if (r >= 0)
r = shift_fds(fds, n_fds);
if (r >= 0)
- r = flags_fds(fds, n_storage_fds, n_socket_fds, context->non_blocking);
+ r = flags_fds(fds, n_socket_fds, n_storage_fds, context->non_blocking);
if (r < 0) {
*exit_status = EXIT_FDS;
return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
}
+ /* At this point, the fds we want to pass to the program are all ready and set up, with O_CLOEXEC turned off
+ * and at the right fd numbers. The are no other fds open, with one exception: the exec_fd if it is defined,
+ * and it has O_CLOEXEC set, after all we want it to be closed by the execve(), so that our parent knows we
+ * came this far. */
+
secure_bits = context->secure_bits;
if (needs_sandboxing) {
@@ -3268,6 +3430,12 @@ static int exec_child(
}
}
+ /* Apply working directory here, because the working directory might be on NFS and only the user running
+ * this service might have the correct privilege to change to the working directory */
+ r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
+ if (r < 0)
+ return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
+
if (needs_sandboxing) {
/* Apply other MAC contexts late, but before seccomp syscall filtering, as those should really be last to
* influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
@@ -3389,7 +3557,7 @@ static int exec_child(
strv_free_and_replace(accum_env, ee);
}
- final_argv = replace_env_argv(argv, accum_env);
+ final_argv = replace_env_argv(command->argv, accum_env);
if (!final_argv) {
*exit_status = EXIT_MEMORY;
return log_oom();
@@ -3407,10 +3575,35 @@ static int exec_child(
LOG_UNIT_INVOCATION_ID(unit));
}
+ if (exec_fd >= 0) {
+ uint8_t hot = 1;
+
+ /* We have finished with all our initializations. Let's now let the manager know that. From this point
+ * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
+
+ if (write(exec_fd, &hot, sizeof(hot)) < 0) {
+ *exit_status = EXIT_EXEC;
+ return log_unit_error_errno(unit, errno, "Failed to enable exec_fd: %m");
+ }
+ }
+
execve(command->path, final_argv, accum_env);
+ r = -errno;
+
+ if (exec_fd >= 0) {
+ uint8_t hot = 0;
- if (errno == ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
- log_struct_errno(LOG_INFO, errno,
+ /* The execve() failed. This means the exec_fd is still open. Which means we need to tell the manager
+ * that POLLHUP on it no longer means execve() succeeded. */
+
+ if (write(exec_fd, &hot, sizeof(hot)) < 0) {
+ *exit_status = EXIT_EXEC;
+ return log_unit_error_errno(unit, errno, "Failed to disable exec_fd: %m");
+ }
+ }
+
+ if (r == -ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
+ log_struct_errno(LOG_INFO, r,
"MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
LOG_UNIT_ID(unit),
LOG_UNIT_INVOCATION_ID(unit),
@@ -3421,7 +3614,7 @@ static int exec_child(
}
*exit_status = EXIT_EXEC;
- return log_unit_error_errno(unit, errno, "Failed to execute command: %m");
+ return log_unit_error_errno(unit, r, "Failed to execute command: %m");
}
static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l);
@@ -3435,13 +3628,11 @@ int exec_spawn(Unit *unit,
DynamicCreds *dcreds,
pid_t *ret) {
+ int socket_fd, r, named_iofds[3] = { -1, -1, -1 }, *fds = NULL;
+ _cleanup_free_ char *subcgroup_path = NULL;
_cleanup_strv_free_ char **files_env = NULL;
- int *fds = NULL;
size_t n_storage_fds = 0, n_socket_fds = 0;
_cleanup_free_ char *line = NULL;
- int socket_fd, r;
- int named_iofds[3] = { -1, -1, -1 };
- char **argv;
pid_t pid;
assert(unit);
@@ -3449,7 +3640,7 @@ int exec_spawn(Unit *unit,
assert(context);
assert(ret);
assert(params);
- assert(params->fds || (params->n_storage_fds + params->n_socket_fds <= 0));
+ assert(params->fds || (params->n_socket_fds + params->n_storage_fds <= 0));
if (context->std_input == EXEC_INPUT_SOCKET ||
context->std_output == EXEC_OUTPUT_SOCKET ||
@@ -3469,8 +3660,8 @@ int exec_spawn(Unit *unit,
} else {
socket_fd = -1;
fds = params->fds;
- n_storage_fds = params->n_storage_fds;
n_socket_fds = params->n_socket_fds;
+ n_storage_fds = params->n_storage_fds;
}
r = exec_context_named_iofds(context, params, named_iofds);
@@ -3481,8 +3672,7 @@ int exec_spawn(Unit *unit,
if (r < 0)
return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
- argv = params->argv ?: command->argv;
- line = exec_command_line(argv);
+ line = exec_command_line(command->argv);
if (!line)
return log_oom();
@@ -3492,6 +3682,17 @@ int exec_spawn(Unit *unit,
LOG_UNIT_ID(unit),
LOG_UNIT_INVOCATION_ID(unit));
+ if (params->cgroup_path) {
+ r = exec_parameters_get_cgroup_path(params, &subcgroup_path);
+ if (r < 0)
+ return log_unit_error_errno(unit, r, "Failed to acquire subcgroup path: %m");
+ if (r > 0) { /* We are using a child cgroup */
+ r = cg_create(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path);
+ if (r < 0)
+ return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path);
+ }
+ }
+
pid = fork();
if (pid < 0)
return log_unit_error_errno(unit, errno, "Failed to fork: %m");
@@ -3505,12 +3706,11 @@ int exec_spawn(Unit *unit,
params,
runtime,
dcreds,
- argv,
socket_fd,
named_iofds,
fds,
- n_storage_fds,
n_socket_fds,
+ n_storage_fds,
files_env,
unit->manager->user_lookup_fds[1],
&exit_status);
@@ -3530,13 +3730,11 @@ int exec_spawn(Unit *unit,
log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
- /* We add the new process to the cgroup both in the child (so
- * that we can be sure that no user code is ever executed
- * outside of the cgroup) and in the parent (so that we can be
- * sure that when we kill the cgroup the process will be
- * killed too). */
- if (params->cgroup_path)
- (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
+ /* We add the new process to the cgroup both in the child (so that we can be sure that no user code is ever
+ * executed outside of the cgroup) and in the parent (so that we can be sure that when we kill the cgroup the
+ * process will be killed too). */
+ if (subcgroup_path)
+ (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path, pid);
exec_status_start(&command->exec_status, pid);
@@ -3624,6 +3822,9 @@ void exec_context_done(ExecContext *c) {
exec_context_free_log_extra_fields(c);
+ c->log_rate_limit_interval_usec = 0;
+ c->log_rate_limit_burst = 0;
+
c->stdin_data = mfree(c->stdin_data);
c->stdin_data_size = 0;
}
@@ -3655,7 +3856,6 @@ static void exec_command_done(ExecCommand *c) {
assert(c);
c->path = mfree(c->path);
-
c->argv = strv_free(c->argv);
}
@@ -3685,6 +3885,24 @@ void exec_command_free_array(ExecCommand **c, size_t n) {
c[i] = exec_command_free_list(c[i]);
}
+void exec_command_reset_status_array(ExecCommand *c, size_t n) {
+ size_t i;
+
+ for (i = 0; i < n; i++)
+ exec_status_reset(&c[i].exec_status);
+}
+
+void exec_command_reset_status_list_array(ExecCommand **c, size_t n) {
+ size_t i;
+
+ for (i = 0; i < n; i++) {
+ ExecCommand *z;
+
+ LIST_FOREACH(command, z, c[i])
+ exec_status_reset(&z->exec_status);
+ }
+}
+
typedef struct InvalidEnvInfo {
const Unit *unit;
const char *path;
@@ -3813,7 +4031,7 @@ static int exec_context_load_environment(const Unit *unit, const ExecContext *c,
assert(pglob.gl_pathc > 0);
for (n = 0; n < pglob.gl_pathc; n++) {
- k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
+ k = load_env_file(NULL, pglob.gl_pathv[n], &p);
if (k < 0) {
if (ignore)
continue;
@@ -3976,9 +4194,9 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
for (i = 0; i < RLIM_NLIMITS; i++)
if (c->rlimit[i]) {
- fprintf(f, "Limit%s%s: " RLIM_FMT "\n",
+ fprintf(f, "%sLimit%s: " RLIM_FMT "\n",
prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
- fprintf(f, "Limit%s%sSoft: " RLIM_FMT "\n",
+ fprintf(f, "%sLimit%sSoft: " RLIM_FMT "\n",
prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
}
@@ -4036,8 +4254,12 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
fprintf(f, "%sStandardInputFile: %s\n", prefix, c->stdio_file[STDIN_FILENO]);
if (c->std_output == EXEC_OUTPUT_FILE)
fprintf(f, "%sStandardOutputFile: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
+ if (c->std_output == EXEC_OUTPUT_FILE_APPEND)
+ fprintf(f, "%sStandardOutputFileToAppend: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
if (c->std_error == EXEC_OUTPUT_FILE)
fprintf(f, "%sStandardErrorFile: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
+ if (c->std_error == EXEC_OUTPUT_FILE_APPEND)
+ fprintf(f, "%sStandardErrorFileToAppend: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
if (c->tty_path)
fprintf(f,
@@ -4084,6 +4306,17 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
}
+ if (c->log_rate_limit_interval_usec > 0) {
+ char buf_timespan[FORMAT_TIMESPAN_MAX];
+
+ fprintf(f,
+ "%sLogRateLimitIntervalSec: %s\n",
+ prefix, format_timespan(buf_timespan, sizeof(buf_timespan), c->log_rate_limit_interval_usec, USEC_PER_SEC));
+ }
+
+ if (c->log_rate_limit_burst > 0)
+ fprintf(f, "%sLogRateLimitBurst: %u\n", prefix, c->log_rate_limit_burst);
+
if (c->n_log_extra_fields > 0) {
size_t j;
@@ -4331,18 +4564,22 @@ void exec_context_free_log_extra_fields(ExecContext *c) {
void exec_status_start(ExecStatus *s, pid_t pid) {
assert(s);
- zero(*s);
- s->pid = pid;
+ *s = (ExecStatus) {
+ .pid = pid,
+ };
+
dual_timestamp_get(&s->start_timestamp);
}
void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status) {
assert(s);
- if (s->pid && s->pid != pid)
- zero(*s);
+ if (s->pid != pid) {
+ *s = (ExecStatus) {
+ .pid = pid,
+ };
+ }
- s->pid = pid;
dual_timestamp_get(&s->exit_timestamp);
s->code = code;
@@ -4350,12 +4587,18 @@ void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int
if (context) {
if (context->utmp_id)
- utmp_put_dead_process(context->utmp_id, pid, code, status);
+ (void) utmp_put_dead_process(context->utmp_id, pid, code, status);
exec_context_tty_reset(context, NULL);
}
}
+void exec_status_reset(ExecStatus *s) {
+ assert(s);
+
+ *s = (ExecStatus) {};
+}
+
void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix) {
char buf[FORMAT_TIMESTAMP_MAX];
@@ -4487,8 +4730,7 @@ int exec_command_set(ExecCommand *c, const char *path, ...) {
return -ENOMEM;
}
- free(c->path);
- c->path = p;
+ free_and_replace(c->path, p);
return strv_free_and_replace(c->argv, l);
}
@@ -4920,10 +5162,8 @@ void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
finalize:
r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, (int[]) { fd0, fd1 }, NULL);
- if (r < 0) {
+ if (r < 0)
log_debug_errno(r, "Failed to add exec-runtime: %m");
- return;
- }
}
void exec_runtime_vacuum(Manager *m) {
@@ -4942,6 +5182,13 @@ void exec_runtime_vacuum(Manager *m) {
}
}
+void exec_params_clear(ExecParameters *p) {
+ if (!p)
+ return;
+
+ strv_free(p->environment);
+}
+
static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
[EXEC_INPUT_NULL] = "null",
[EXEC_INPUT_TTY] = "tty",
@@ -4968,6 +5215,7 @@ static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
[EXEC_OUTPUT_SOCKET] = "socket",
[EXEC_OUTPUT_NAMED_FD] = "fd",
[EXEC_OUTPUT_FILE] = "file",
+ [EXEC_OUTPUT_FILE_APPEND] = "append",
};
DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
@@ -4998,6 +5246,16 @@ static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
+static const char* const exec_directory_env_name_table[_EXEC_DIRECTORY_TYPE_MAX] = {
+ [EXEC_DIRECTORY_RUNTIME] = "RUNTIME_DIRECTORY",
+ [EXEC_DIRECTORY_STATE] = "STATE_DIRECTORY",
+ [EXEC_DIRECTORY_CACHE] = "CACHE_DIRECTORY",
+ [EXEC_DIRECTORY_LOGS] = "LOGS_DIRECTORY",
+ [EXEC_DIRECTORY_CONFIGURATION] = "CONFIGURATION_DIRECTORY",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(exec_directory_env_name, ExecDirectoryType);
+
static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
[EXEC_KEYRING_INHERIT] = "inherit",
[EXEC_KEYRING_PRIVATE] = "private",
diff --git a/src/core/execute.h b/src/core/execute.h
index 77ffe82323..0f1bf56744 100644
--- a/src/core/execute.h
+++ b/src/core/execute.h
@@ -16,7 +16,7 @@ typedef struct Manager Manager;
#include "cgroup-util.h"
#include "fdset.h"
#include "list.h"
-#include "missing.h"
+#include "missing_resource.h"
#include "namespace.h"
#include "nsflags.h"
@@ -56,6 +56,7 @@ typedef enum ExecOutput {
EXEC_OUTPUT_SOCKET,
EXEC_OUTPUT_NAMED_FD,
EXEC_OUTPUT_FILE,
+ EXEC_OUTPUT_FILE_APPEND,
_EXEC_OUTPUT_MAX,
_EXEC_OUTPUT_INVALID = -1
} ExecOutput;
@@ -76,21 +77,23 @@ typedef enum ExecKeyringMode {
_EXEC_KEYRING_MODE_INVALID = -1,
} ExecKeyringMode;
+/* Contains start and exit information about an executed command. */
struct ExecStatus {
+ pid_t pid;
dual_timestamp start_timestamp;
dual_timestamp exit_timestamp;
- pid_t pid;
int code; /* as in siginfo_t::si_code */
int status; /* as in sigingo_t::si_status */
};
typedef enum ExecCommandFlags {
- EXEC_COMMAND_IGNORE_FAILURE = 1,
- EXEC_COMMAND_FULLY_PRIVILEGED = 2,
- EXEC_COMMAND_NO_SETUID = 4,
- EXEC_COMMAND_AMBIENT_MAGIC = 8,
+ EXEC_COMMAND_IGNORE_FAILURE = 1 << 0,
+ EXEC_COMMAND_FULLY_PRIVILEGED = 1 << 1,
+ EXEC_COMMAND_NO_SETUID = 1 << 2,
+ EXEC_COMMAND_AMBIENT_MAGIC = 1 << 3,
} ExecCommandFlags;
+/* Stores information about commands we execute. Covers both configuration settings as well as runtime data. */
struct ExecCommand {
char *path;
char **argv;
@@ -99,13 +102,16 @@ struct ExecCommand {
LIST_FIELDS(ExecCommand, command); /* useful for chaining commands */
};
+/* Encapsulates certain aspects of the runtime environment that is to be shared between multiple otherwise separate
+ * invocations of commands. Specifically, this allows sharing of /tmp and /var/tmp data as well as network namespaces
+ * between invocations of commands. This is a reference counted object, with one reference taken by each currently
+ * active command invocation that wants to share this runtime. */
struct ExecRuntime {
- int n_ref;
+ unsigned n_ref;
Manager *manager;
- /* unit id of the owner */
- char *id;
+ char *id; /* Unit id of the owner */
char *tmp_dir;
char *var_tmp_dir;
@@ -130,6 +136,9 @@ typedef struct ExecDirectory {
mode_t mode;
} ExecDirectory;
+/* Encodes configuration parameters applied to invoked commands. Does not carry runtime data, but only configuration
+ * changes sourced from unit files and suchlike. ExecContext objects are usually embedded into Unit objects, and do not
+ * change after being loaded. */
struct ExecContext {
char **environment;
char **environment_files;
@@ -216,6 +225,9 @@ struct ExecContext {
struct iovec* log_extra_fields;
size_t n_log_extra_fields;
+ usec_t log_rate_limit_interval_usec;
+ unsigned log_rate_limit_burst;
+
bool cpu_sched_reset_on_fork;
bool non_blocking;
bool private_tmp;
@@ -277,27 +289,28 @@ typedef enum ExecFlags {
EXEC_APPLY_SANDBOXING = 1 << 0,
EXEC_APPLY_CHROOT = 1 << 1,
EXEC_APPLY_TTY_STDIN = 1 << 2,
- EXEC_NEW_KEYRING = 1 << 3,
- EXEC_PASS_LOG_UNIT = 1 << 4, /* Whether to pass the unit name to the service's journal stream connection */
- EXEC_CHOWN_DIRECTORIES = 1 << 5, /* chown() the runtime/state/cache/log directories to the user we run as, under all conditions */
- EXEC_NSS_BYPASS_BUS = 1 << 6, /* Set the SYSTEMD_NSS_BYPASS_BUS environment variable, to disable nss-systemd for dbus */
- EXEC_CGROUP_DELEGATE = 1 << 7,
+ EXEC_PASS_LOG_UNIT = 1 << 3, /* Whether to pass the unit name to the service's journal stream connection */
+ EXEC_CHOWN_DIRECTORIES = 1 << 4, /* chown() the runtime/state/cache/log directories to the user we run as, under all conditions */
+ EXEC_NSS_BYPASS_BUS = 1 << 5, /* Set the SYSTEMD_NSS_BYPASS_BUS environment variable, to disable nss-systemd for dbus */
+ EXEC_CGROUP_DELEGATE = 1 << 6,
+ EXEC_IS_CONTROL = 1 << 7,
+ EXEC_CONTROL_CGROUP = 1 << 8, /* Place the process not in the indicated cgroup but in a subcgroup '/.control', but only EXEC_CGROUP_DELEGATE and EXEC_IS_CONTROL is set, too */
/* The following are not used by execute.c, but by consumers internally */
- EXEC_PASS_FDS = 1 << 8,
- EXEC_IS_CONTROL = 1 << 9,
+ EXEC_PASS_FDS = 1 << 9,
EXEC_SETENV_RESULT = 1 << 10,
EXEC_SET_WATCHDOG = 1 << 11,
} ExecFlags;
+/* Parameters for a specific invocation of a command. This structure is put together right before a command is
+ * executed. */
struct ExecParameters {
- char **argv;
char **environment;
int *fds;
char **fd_names;
- size_t n_storage_fds;
size_t n_socket_fds;
+ size_t n_storage_fds;
ExecFlags flags;
bool selinux_context_net:1;
@@ -316,6 +329,9 @@ struct ExecParameters {
int stdin_fd;
int stdout_fd;
int stderr_fd;
+
+ /* An fd that is closed by the execve(), and thus will result in EOF when the execve() is done */
+ int exec_fd;
};
#include "unit.h"
@@ -330,14 +346,14 @@ int exec_spawn(Unit *unit,
pid_t *ret);
void exec_command_done_array(ExecCommand *c, size_t n);
-
ExecCommand* exec_command_free_list(ExecCommand *c);
void exec_command_free_array(ExecCommand **c, size_t n);
-
+void exec_command_reset_status_array(ExecCommand *c, size_t n);
+void exec_command_reset_status_list_array(ExecCommand **c, size_t n);
void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix);
void exec_command_append_list(ExecCommand **l, ExecCommand *e);
-int exec_command_set(ExecCommand *c, const char *path, ...);
-int exec_command_append(ExecCommand *c, const char *path, ...);
+int exec_command_set(ExecCommand *c, const char *path, ...) _sentinel_;
+int exec_command_append(ExecCommand *c, const char *path, ...) _sentinel_;
void exec_context_init(ExecContext *c);
void exec_context_done(ExecContext *c);
@@ -357,6 +373,7 @@ void exec_context_free_log_extra_fields(ExecContext *c);
void exec_status_start(ExecStatus *s, pid_t pid);
void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status);
void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix);
+void exec_status_reset(ExecStatus *s);
int exec_runtime_acquire(Manager *m, const ExecContext *c, const char *name, bool create, ExecRuntime **ret);
ExecRuntime *exec_runtime_unref(ExecRuntime *r, bool destroy);
@@ -366,6 +383,8 @@ int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value,
void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds);
void exec_runtime_vacuum(Manager *m);
+void exec_params_clear(ExecParameters *p);
+
const char* exec_output_to_string(ExecOutput i) _const_;
ExecOutput exec_output_from_string(const char *s) _pure_;
diff --git a/src/core/ima-setup.c b/src/core/ima-setup.c
index 013d6c5de3..fd7c5f64af 100644
--- a/src/core/ima-setup.c
+++ b/src/core/ima-setup.c
@@ -7,6 +7,7 @@
#include <errno.h>
#include <unistd.h>
+#include "alloc-util.h"
#include "fd-util.h"
#include "fileio.h"
#include "ima-setup.h"
@@ -22,20 +23,20 @@ int ima_setup(void) {
_cleanup_fclose_ FILE *input = NULL;
_cleanup_close_ int imafd = -1;
unsigned lineno = 0;
- char line[page_size()];
+ int r;
if (access(IMA_SECFS_DIR, F_OK) < 0) {
- log_debug("IMA support is disabled in the kernel, ignoring.");
+ log_debug_errno(errno, "IMA support is disabled in the kernel, ignoring: %m");
return 0;
}
if (access(IMA_SECFS_POLICY, W_OK) < 0) {
- log_warning("Another IMA custom policy has already been loaded, ignoring.");
+ log_warning_errno(errno, "Another IMA custom policy has already been loaded, ignoring: %m");
return 0;
}
if (access(IMA_POLICY_PATH, F_OK) < 0) {
- log_debug("No IMA custom policy file "IMA_POLICY_PATH", ignoring.");
+ log_debug_errno(errno, "No IMA custom policy file "IMA_POLICY_PATH", ignoring: %m");
return 0;
}
@@ -56,7 +57,7 @@ int ima_setup(void) {
return 0;
}
- close(imafd);
+ safe_close(imafd);
imafd = open(IMA_SECFS_POLICY, O_WRONLY|O_CLOEXEC);
if (imafd < 0) {
@@ -64,10 +65,16 @@ int ima_setup(void) {
return 0;
}
- FOREACH_LINE(line, input,
- return log_error_errno(errno, "Failed to read the IMA custom policy file "IMA_POLICY_PATH": %m")) {
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
size_t len;
+ r = read_line(input, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read the IMA custom policy file "IMA_POLICY_PATH": %m");
+ if (r == 0)
+ break;
+
len = strlen(line);
lineno++;
diff --git a/src/core/ip-address-access.h b/src/core/ip-address-access.h
index 7babf19562..77078e1f14 100644
--- a/src/core/ip-address-access.h
+++ b/src/core/ip-address-access.h
@@ -1,7 +1,6 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#pragma once
-
#include "conf-parser.h"
#include "in-addr-util.h"
#include "list.h"
diff --git a/src/core/job.c b/src/core/job.c
index 734756b666..f635b7e933 100644
--- a/src/core/job.c
+++ b/src/core/job.c
@@ -10,10 +10,12 @@
#include "dbus-job.h"
#include "dbus.h"
#include "escape.h"
+#include "fileio.h"
#include "job.h"
#include "log.h"
#include "macro.h"
#include "parse-util.h"
+#include "serialize.h"
#include "set.h"
#include "special.h"
#include "stdio-util.h"
@@ -31,14 +33,15 @@ Job* job_new_raw(Unit *unit) {
assert(unit);
- j = new0(Job, 1);
+ j = new(Job, 1);
if (!j)
return NULL;
- j->manager = unit->manager;
- j->unit = unit;
- j->type = _JOB_TYPE_INVALID;
- j->reloaded = false;
+ *j = (Job) {
+ .manager = unit->manager,
+ .unit = unit,
+ .type = _JOB_TYPE_INVALID,
+ };
return j;
}
@@ -86,7 +89,7 @@ void job_unlink(Job *j) {
j->timer_event_source = sd_event_source_unref(j->timer_event_source);
}
-void job_free(Job *j) {
+Job* job_free(Job *j) {
assert(j);
assert(!j->installed);
assert(!j->transaction_prev);
@@ -99,7 +102,7 @@ void job_free(Job *j) {
sd_bus_track_unref(j->bus_track);
strv_free(j->deserialized_clients);
- free(j);
+ return mfree(j);
}
static void job_set_state(Job *j, JobState state) {
@@ -148,7 +151,7 @@ void job_uninstall(Job *j) {
unit_add_to_gc_queue(j->unit);
- hashmap_remove(j->manager->jobs, UINT32_TO_PTR(j->id));
+ hashmap_remove_value(j->manager->jobs, UINT32_TO_PTR(j->id), j);
j->installed = false;
}
@@ -174,7 +177,7 @@ static void job_merge_into_installed(Job *j, Job *other) {
assert(j->unit == other->unit);
if (j->type != JOB_NOP)
- job_type_merge_and_collapse(&j->type, other->type, j->unit);
+ assert_se(job_type_merge_and_collapse(&j->type, other->type, j->unit) == 0);
else
assert(other->type == JOB_NOP);
@@ -233,28 +236,36 @@ Job* job_install(Job *j) {
job_add_to_gc_queue(j);
+ job_add_to_dbus_queue(j); /* announce this job to clients */
+ unit_add_to_dbus_queue(j->unit); /* The Job property of the unit has changed now */
+
return j;
}
int job_install_deserialized(Job *j) {
Job **pj;
+ int r;
assert(!j->installed);
- if (j->type < 0 || j->type >= _JOB_TYPE_MAX_IN_TRANSACTION) {
- log_debug("Invalid job type %s in deserialization.", strna(job_type_to_string(j->type)));
- return -EINVAL;
- }
+ if (j->type < 0 || j->type >= _JOB_TYPE_MAX_IN_TRANSACTION)
+ return log_unit_debug_errno(j->unit, SYNTHETIC_ERRNO(EINVAL),
+ "Invalid job type %s in deserialization.",
+ strna(job_type_to_string(j->type)));
pj = (j->type == JOB_NOP) ? &j->unit->nop_job : &j->unit->job;
- if (*pj) {
- log_unit_debug(j->unit, "Unit already has a job installed. Not installing deserialized job.");
- return -EEXIST;
- }
+ if (*pj)
+ return log_unit_debug_errno(j->unit, SYNTHETIC_ERRNO(EEXIST),
+ "Unit already has a job installed. Not installing deserialized job.");
+
+ r = hashmap_put(j->manager->jobs, UINT32_TO_PTR(j->id), j);
+ if (r == -EEXIST)
+ return log_unit_debug_errno(j->unit, r, "Job ID %" PRIu32 " already used, cannot deserialize job.", j->id);
+ if (r < 0)
+ return log_unit_debug_errno(j->unit, r, "Failed to insert job into jobs hash table: %m");
*pj = j;
j->installed = true;
- j->reloaded = true;
if (j->state == JOB_RUNNING)
j->unit->manager->n_running_jobs++;
@@ -303,7 +314,7 @@ void job_dependency_free(JobDependency *l) {
free(l);
}
-void job_dump(Job *j, FILE*f, const char *prefix) {
+void job_dump(Job *j, FILE *f, const char *prefix) {
assert(j);
assert(f);
@@ -506,6 +517,95 @@ static void job_change_type(Job *j, JobType newtype) {
j->type = newtype;
}
+_pure_ static const char* job_get_begin_status_message_format(Unit *u, JobType t) {
+ const char *format;
+
+ assert(u);
+
+ if (t == JOB_RELOAD)
+ return "Reloading %s.";
+
+ assert(IN_SET(t, JOB_START, JOB_STOP));
+
+ format = UNIT_VTABLE(u)->status_message_formats.starting_stopping[t == JOB_STOP];
+ if (format)
+ return format;
+
+ /* Return generic strings */
+ if (t == JOB_START)
+ return "Starting %s.";
+ else {
+ assert(t == JOB_STOP);
+ return "Stopping %s.";
+ }
+}
+
+static void job_print_begin_status_message(Unit *u, JobType t) {
+ const char *format;
+
+ assert(u);
+
+ /* Reload status messages have traditionally not been printed to console. */
+ if (!IN_SET(t, JOB_START, JOB_STOP))
+ return;
+
+ format = job_get_begin_status_message_format(u, t);
+
+ DISABLE_WARNING_FORMAT_NONLITERAL;
+ unit_status_printf(u, "", format);
+ REENABLE_WARNING;
+}
+
+static void job_log_begin_status_message(Unit *u, uint32_t job_id, JobType t) {
+ const char *format, *mid;
+ char buf[LINE_MAX];
+
+ assert(u);
+ assert(t >= 0);
+ assert(t < _JOB_TYPE_MAX);
+
+ if (!IN_SET(t, JOB_START, JOB_STOP, JOB_RELOAD))
+ return;
+
+ if (log_on_console()) /* Skip this if it would only go on the console anyway */
+ return;
+
+ /* We log status messages for all units and all operations. */
+
+ format = job_get_begin_status_message_format(u, t);
+
+ DISABLE_WARNING_FORMAT_NONLITERAL;
+ (void) snprintf(buf, sizeof buf, format, unit_description(u));
+ REENABLE_WARNING;
+
+ mid = t == JOB_START ? "MESSAGE_ID=" SD_MESSAGE_UNIT_STARTING_STR :
+ t == JOB_STOP ? "MESSAGE_ID=" SD_MESSAGE_UNIT_STOPPING_STR :
+ "MESSAGE_ID=" SD_MESSAGE_UNIT_RELOADING_STR;
+
+ /* Note that we deliberately use LOG_MESSAGE() instead of
+ * LOG_UNIT_MESSAGE() here, since this is supposed to mimic
+ * closely what is written to screen using the status output,
+ * which is supposed the highest level, friendliest output
+ * possible, which means we should avoid the low-level unit
+ * name. */
+ log_struct(LOG_INFO,
+ LOG_MESSAGE("%s", buf),
+ "JOB_ID=%" PRIu32, job_id,
+ "JOB_TYPE=%s", job_type_to_string(t),
+ LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
+ mid);
+}
+
+static void job_emit_begin_status_message(Unit *u, uint32_t job_id, JobType t) {
+ assert(u);
+ assert(t >= 0);
+ assert(t < _JOB_TYPE_MAX);
+
+ job_log_begin_status_message(u, job_id, t);
+ job_print_begin_status_message(u, t);
+}
+
static int job_perform_on_unit(Job **j) {
uint32_t id;
Manager *m;
@@ -547,11 +647,12 @@ static int job_perform_on_unit(Job **j) {
assert_not_reached("Invalid job type");
}
- /* Log if the job still exists and the start/stop/reload function
- * actually did something. */
+ /* Log if the job still exists and the start/stop/reload function actually did something. Note that this means
+ * for units for which there's no 'activating' phase (i.e. because we transition directly from 'inactive' to
+ * 'active') we'll possibly skip the "Starting..." message. */
*j = manager_get_job(m, id);
if (*j && r > 0)
- unit_status_emit_starting_stopping_reloading(u, t);
+ job_emit_begin_status_message(u, id, t);
return r;
}
@@ -580,7 +681,9 @@ int job_run_and_invalidate(Job *j) {
switch (j->type) {
case JOB_VERIFY_ACTIVE: {
- UnitActiveState t = unit_active_state(j->unit);
+ UnitActiveState t;
+
+ t = unit_active_state(j->unit);
if (UNIT_IS_ACTIVE_OR_RELOADING(t))
r = -EALREADY;
else if (t == UNIT_ACTIVATING)
@@ -595,8 +698,7 @@ int job_run_and_invalidate(Job *j) {
case JOB_RESTART:
r = job_perform_on_unit(&j);
- /* If the unit type does not support starting/stopping,
- * then simply wait. */
+ /* If the unit type does not support starting/stopping, then simply wait. */
if (r == -EBADR)
r = 0;
break;
@@ -614,8 +716,12 @@ int job_run_and_invalidate(Job *j) {
}
if (j) {
- if (r == -EALREADY)
+ if (r == -EAGAIN)
+ job_set_state(j, JOB_WAITING); /* Hmm, not ready after all, let's return to JOB_WAITING state */
+ else if (r == -EALREADY) /* already being executed */
r = job_finish_and_invalidate(j, JOB_DONE, true, true);
+ else if (r == -ECOMM) /* condition failed, but all is good */
+ r = job_finish_and_invalidate(j, JOB_DONE, true, false);
else if (r == -EBADR)
r = job_finish_and_invalidate(j, JOB_SKIPPED, true, false);
else if (r == -ENOEXEC)
@@ -628,8 +734,6 @@ int job_run_and_invalidate(Job *j) {
r = job_finish_and_invalidate(j, JOB_DEPENDENCY, true, false);
else if (r == -ESTALE)
r = job_finish_and_invalidate(j, JOB_ONCE, true, false);
- else if (r == -EAGAIN)
- job_set_state(j, JOB_WAITING);
else if (r < 0)
r = job_finish_and_invalidate(j, JOB_FAILED, true, false);
}
@@ -637,7 +741,7 @@ int job_run_and_invalidate(Job *j) {
return r;
}
-_pure_ static const char *job_get_status_message_format(Unit *u, JobType t, JobResult result) {
+_pure_ static const char *job_get_done_status_message_format(Unit *u, JobType t, JobResult result) {
static const char *const generic_finished_start_job[_JOB_RESULT_MAX] = {
[JOB_DONE] = "Started %s.",
@@ -666,7 +770,6 @@ _pure_ static const char *job_get_status_message_format(Unit *u, JobType t, JobR
[JOB_SKIPPED] = "%s is not active.",
};
- const UnitStatusMessageFormats *format_table;
const char *format;
assert(u);
@@ -674,13 +777,11 @@ _pure_ static const char *job_get_status_message_format(Unit *u, JobType t, JobR
assert(t < _JOB_TYPE_MAX);
if (IN_SET(t, JOB_START, JOB_STOP, JOB_RESTART)) {
- format_table = &UNIT_VTABLE(u)->status_message_formats;
- if (format_table) {
- format = t == JOB_START ? format_table->finished_start_job[result] :
- format_table->finished_stop_job[result];
- if (format)
- return format;
- }
+ format = t == JOB_START ?
+ UNIT_VTABLE(u)->status_message_formats.finished_start_job[result] :
+ UNIT_VTABLE(u)->status_message_formats.finished_stop_job[result];
+ if (format)
+ return format;
}
/* Return generic strings */
@@ -698,7 +799,7 @@ _pure_ static const char *job_get_status_message_format(Unit *u, JobType t, JobR
static const struct {
const char *color, *word;
-} job_print_status_messages [_JOB_RESULT_MAX] = {
+} job_print_done_status_messages[_JOB_RESULT_MAX] = {
[JOB_DONE] = { ANSI_OK_COLOR, " OK " },
[JOB_TIMEOUT] = { ANSI_HIGHLIGHT_RED, " TIME " },
[JOB_FAILED] = { ANSI_HIGHLIGHT_RED, "FAILED" },
@@ -710,7 +811,7 @@ static const struct {
[JOB_ONCE] = { ANSI_HIGHLIGHT_RED, " ONCE " },
};
-static void job_print_status_message(Unit *u, JobType t, JobResult result) {
+static void job_print_done_status_message(Unit *u, JobType t, JobResult result) {
const char *format;
const char *status;
@@ -722,19 +823,23 @@ static void job_print_status_message(Unit *u, JobType t, JobResult result) {
if (t == JOB_RELOAD)
return;
- if (!job_print_status_messages[result].word)
+ /* No message if the job did not actually do anything due to failed condition. */
+ if (t == JOB_START && result == JOB_DONE && !u->condition_result)
return;
- format = job_get_status_message_format(u, t, result);
+ if (!job_print_done_status_messages[result].word)
+ return;
+
+ format = job_get_done_status_message_format(u, t, result);
if (!format)
return;
if (log_get_show_color())
- status = strjoina(job_print_status_messages[result].color,
- job_print_status_messages[result].word,
+ status = strjoina(job_print_done_status_messages[result].color,
+ job_print_done_status_messages[result].word,
ANSI_NORMAL);
else
- status = job_print_status_messages[result].word;
+ status = job_print_done_status_messages[result].word;
if (result != JOB_DONE)
manager_flip_auto_status(u->manager, true);
@@ -751,7 +856,7 @@ static void job_print_status_message(Unit *u, JobType t, JobResult result) {
}
}
-static void job_log_status_message(Unit *u, JobType t, JobResult result) {
+static void job_log_done_status_message(Unit *u, uint32_t job_id, JobType t, JobResult result) {
const char *format, *mid;
char buf[LINE_MAX];
static const int job_result_log_level[_JOB_RESULT_MAX] = {
@@ -774,10 +879,24 @@ static void job_log_status_message(Unit *u, JobType t, JobResult result) {
/* Skip printing if output goes to the console, and job_print_status_message()
will actually print something to the console. */
- if (log_on_console() && job_print_status_messages[result].word)
+ if (log_on_console() && job_print_done_status_messages[result].word)
+ return;
+
+ /* Show condition check message if the job did not actually do anything due to failed condition. */
+ if (t == JOB_START && result == JOB_DONE && !u->condition_result) {
+ log_struct(LOG_INFO,
+ "MESSAGE=Condition check resulted in %s being skipped.", unit_description(u),
+ "JOB_ID=%" PRIu32, job_id,
+ "JOB_TYPE=%s", job_type_to_string(t),
+ "JOB_RESULT=%s", job_result_to_string(result),
+ LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
+ "MESSAGE_ID=" SD_MESSAGE_UNIT_STARTED_STR);
+
return;
+ }
- format = job_get_status_message_format(u, t, result);
+ format = job_get_done_status_message_format(u, t, result);
if (!format)
return;
@@ -810,6 +929,7 @@ static void job_log_status_message(Unit *u, JobType t, JobResult result) {
default:
log_struct(job_result_log_level[result],
LOG_MESSAGE("%s", buf),
+ "JOB_ID=%" PRIu32, job_id,
"JOB_TYPE=%s", job_type_to_string(t),
"JOB_RESULT=%s", job_result_to_string(result),
LOG_UNIT_ID(u),
@@ -819,6 +939,7 @@ static void job_log_status_message(Unit *u, JobType t, JobResult result) {
log_struct(job_result_log_level[result],
LOG_MESSAGE("%s", buf),
+ "JOB_ID=%" PRIu32, job_id,
"JOB_TYPE=%s", job_type_to_string(t),
"JOB_RESULT=%s", job_result_to_string(result),
LOG_UNIT_ID(u),
@@ -826,15 +947,11 @@ static void job_log_status_message(Unit *u, JobType t, JobResult result) {
mid);
}
-static void job_emit_status_message(Unit *u, JobType t, JobResult result) {
+static void job_emit_done_status_message(Unit *u, uint32_t job_id, JobType t, JobResult result) {
assert(u);
- /* No message if the job did not actually do anything due to failed condition. */
- if (t == JOB_START && result == JOB_DONE && !u->condition_result)
- return;
-
- job_log_status_message(u, t, result);
- job_print_status_message(u, t, result);
+ job_log_done_status_message(u, job_id, t, result);
+ job_print_done_status_message(u, t, result);
}
static void job_fail_dependencies(Unit *u, UnitDependency d) {
@@ -856,19 +973,6 @@ static void job_fail_dependencies(Unit *u, UnitDependency d) {
}
}
-static int job_save_pending_finished_job(Job *j) {
- int r;
-
- assert(j);
-
- r = set_ensure_allocated(&j->manager->pending_finished_jobs, NULL);
- if (r < 0)
- return r;
-
- job_unlink(j);
- return set_put(j->manager->pending_finished_jobs, j);
-}
-
int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool already) {
Unit *u;
Unit *other;
@@ -885,11 +989,11 @@ int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool alr
j->result = result;
- log_unit_debug(u, "Job %s/%s finished, result=%s", u->id, job_type_to_string(t), job_result_to_string(result));
+ log_unit_debug(u, "Job %" PRIu32 " %s/%s finished, result=%s", j->id, u->id, job_type_to_string(t), job_result_to_string(result));
/* If this job did nothing to respective unit we don't log the status message */
if (!already)
- job_emit_status_message(u, t, result);
+ job_emit_done_status_message(u, j->id, t, result);
/* Patch restart jobs so that they become normal start jobs */
if (result == JOB_DONE && t == JOB_RESTART) {
@@ -908,11 +1012,7 @@ int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool alr
j->manager->n_failed_jobs++;
job_uninstall(j);
- /* Keep jobs started before the reload to send singal later, free all others */
- if (!MANAGER_IS_RELOADING(j->manager) ||
- !j->reloaded ||
- job_save_pending_finished_job(j) < 0)
- job_free(j);
+ job_free(j);
/* Fail depending jobs on failure */
if (result != JOB_DONE && recursive) {
@@ -973,7 +1073,9 @@ static int job_dispatch_timer(sd_event_source *s, uint64_t monotonic, void *user
u = j->unit;
job_finish_and_invalidate(j, JOB_TIMEOUT, true, false);
- emergency_action(u->manager, u->job_timeout_action, u->job_timeout_reboot_arg, "job timed out");
+ emergency_action(u->manager, u->job_timeout_action,
+ EMERGENCY_ACTION_IS_WATCHDOG|EMERGENCY_ACTION_WARN,
+ u->job_timeout_reboot_arg, -1, "job timed out");
return 0;
}
@@ -1028,14 +1130,19 @@ int job_start_timer(Job *j, bool job_running) {
}
void job_add_to_run_queue(Job *j) {
+ int r;
+
assert(j);
assert(j->installed);
if (j->in_run_queue)
return;
- if (!j->manager->run_queue)
- sd_event_source_set_enabled(j->manager->run_queue_event_source, SD_EVENT_ONESHOT);
+ if (!j->manager->run_queue) {
+ r = sd_event_source_set_enabled(j->manager->run_queue_event_source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ log_warning_errno(r, "Failed to enable job run queue event source, ignoring: %m");
+ }
LIST_PREPEND(run_queue, j->manager->run_queue, j);
j->in_run_queue = true;
@@ -1071,17 +1178,17 @@ int job_serialize(Job *j, FILE *f) {
assert(j);
assert(f);
- fprintf(f, "job-id=%u\n", j->id);
- fprintf(f, "job-type=%s\n", job_type_to_string(j->type));
- fprintf(f, "job-state=%s\n", job_state_to_string(j->state));
- fprintf(f, "job-irreversible=%s\n", yes_no(j->irreversible));
- fprintf(f, "job-sent-dbus-new-signal=%s\n", yes_no(j->sent_dbus_new_signal));
- fprintf(f, "job-ignore-order=%s\n", yes_no(j->ignore_order));
+ (void) serialize_item_format(f, "job-id", "%u", j->id);
+ (void) serialize_item(f, "job-type", job_type_to_string(j->type));
+ (void) serialize_item(f, "job-state", job_state_to_string(j->state));
+ (void) serialize_bool(f, "job-irreversible", j->irreversible);
+ (void) serialize_bool(f, "job-sent-dbus-new-signal", j->sent_dbus_new_signal);
+ (void) serialize_bool(f, "job-ignore-order", j->ignore_order);
if (j->begin_usec > 0)
- fprintf(f, "job-begin="USEC_FMT"\n", j->begin_usec);
+ (void) serialize_usec(f, "job-begin", j->begin_usec);
if (j->begin_running_usec > 0)
- fprintf(f, "job-begin-running="USEC_FMT"\n", j->begin_running_usec);
+ (void) serialize_usec(f, "job-begin-running", j->begin_running_usec);
bus_track_serialize(j->bus_track, f, "subscribed");
@@ -1091,24 +1198,26 @@ int job_serialize(Job *j, FILE *f) {
}
int job_deserialize(Job *j, FILE *f) {
+ int r;
+
assert(j);
assert(f);
for (;;) {
- char line[LINE_MAX], *l, *v;
+ _cleanup_free_ char *line = NULL;
+ char *l, *v;
size_t k;
- if (!fgets(line, sizeof(line), f)) {
- if (feof(f))
- return 0;
- return -errno;
- }
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read serialization line: %m");
+ if (r == 0)
+ return 0;
- char_array_0(line);
l = strstrip(line);
/* End marker */
- if (l[0] == 0)
+ if (isempty(l))
return 0;
k = strcspn(l, "=");
@@ -1122,16 +1231,16 @@ int job_deserialize(Job *j, FILE *f) {
if (streq(l, "job-id")) {
if (safe_atou32(v, &j->id) < 0)
- log_debug("Failed to parse job id value %s", v);
+ log_debug("Failed to parse job id value: %s", v);
} else if (streq(l, "job-type")) {
JobType t;
t = job_type_from_string(v);
if (t < 0)
- log_debug("Failed to parse job type %s", v);
+ log_debug("Failed to parse job type: %s", v);
else if (t >= _JOB_TYPE_MAX_IN_TRANSACTION)
- log_debug("Cannot deserialize job of type %s", v);
+ log_debug("Cannot deserialize job of type: %s", v);
else
j->type = t;
@@ -1140,7 +1249,7 @@ int job_deserialize(Job *j, FILE *f) {
s = job_state_from_string(v);
if (s < 0)
- log_debug("Failed to parse job state %s", v);
+ log_debug("Failed to parse job state: %s", v);
else
job_set_state(j, s);
@@ -1149,7 +1258,7 @@ int job_deserialize(Job *j, FILE *f) {
b = parse_boolean(v);
if (b < 0)
- log_debug("Failed to parse job irreversible flag %s", v);
+ log_debug("Failed to parse job irreversible flag: %s", v);
else
j->irreversible = j->irreversible || b;
@@ -1158,7 +1267,7 @@ int job_deserialize(Job *j, FILE *f) {
b = parse_boolean(v);
if (b < 0)
- log_debug("Failed to parse job sent_dbus_new_signal flag %s", v);
+ log_debug("Failed to parse job sent_dbus_new_signal flag: %s", v);
else
j->sent_dbus_new_signal = j->sent_dbus_new_signal || b;
@@ -1167,31 +1276,21 @@ int job_deserialize(Job *j, FILE *f) {
b = parse_boolean(v);
if (b < 0)
- log_debug("Failed to parse job ignore_order flag %s", v);
+ log_debug("Failed to parse job ignore_order flag: %s", v);
else
j->ignore_order = j->ignore_order || b;
- } else if (streq(l, "job-begin")) {
- unsigned long long ull;
-
- if (sscanf(v, "%llu", &ull) != 1)
- log_debug("Failed to parse job-begin value %s", v);
- else
- j->begin_usec = ull;
-
- } else if (streq(l, "job-begin-running")) {
- unsigned long long ull;
-
- if (sscanf(v, "%llu", &ull) != 1)
- log_debug("Failed to parse job-begin-running value %s", v);
- else
- j->begin_running_usec = ull;
+ } else if (streq(l, "job-begin"))
+ (void) deserialize_usec(v, &j->begin_usec);
- } else if (streq(l, "subscribed")) {
+ else if (streq(l, "job-begin-running"))
+ (void) deserialize_usec(v, &j->begin_running_usec);
+ else if (streq(l, "subscribed")) {
if (strv_extend(&j->deserialized_clients, v) < 0)
- log_oom();
- }
+ return log_oom();
+ } else
+ log_debug("Unknown job serialization key: %s", l);
}
}
@@ -1366,7 +1465,6 @@ bool job_may_gc(Job *j) {
* we start + other stop → gc
* we stop + other start → stay
* we stop + other stop → stay
- *
*/
return true;
@@ -1385,15 +1483,8 @@ void job_add_to_gc_queue(Job *j) {
j->in_gc_queue = true;
}
-static int job_compare(const void *a, const void *b) {
- Job *x = *(Job**) a, *y = *(Job**) b;
-
- if (x->id < y->id)
- return -1;
- if (x->id > y->id)
- return 1;
-
- return 0;
+static int job_compare(Job * const *a, Job * const *b) {
+ return CMP((*a)->id, (*b)->id);
}
static size_t sort_job_list(Job **list, size_t n) {
@@ -1401,7 +1492,7 @@ static size_t sort_job_list(Job **list, size_t n) {
size_t a, b;
/* Order by numeric IDs */
- qsort_safe(list, n, sizeof(Job*), job_compare);
+ typesafe_qsort(list, n, job_compare);
/* Filter out duplicates */
for (a = 0, b = 0; a < n; a++) {
diff --git a/src/core/job.h b/src/core/job.h
index 2f5f3f3989..1b9bcdd895 100644
--- a/src/core/job.h
+++ b/src/core/job.h
@@ -80,7 +80,7 @@ enum JobMode {
};
enum JobResult {
- JOB_DONE, /* Job completed successfully */
+ JOB_DONE, /* Job completed successfully (or skipped due to a failed ConditionXYZ=) */
JOB_CANCELED, /* Job canceled by a conflicting job installation or by explicit cancel request */
JOB_TIMEOUT, /* Job timeout elapsed */
JOB_FAILED, /* Job failed */
@@ -156,17 +156,16 @@ struct Job {
bool irreversible:1;
bool in_gc_queue:1;
bool ref_by_private_bus:1;
- bool reloaded:1;
};
Job* job_new(Unit *unit, JobType type);
Job* job_new_raw(Unit *unit);
void job_unlink(Job *job);
-void job_free(Job *job);
+Job* job_free(Job *job);
Job* job_install(Job *j);
int job_install_deserialized(Job *j);
void job_uninstall(Job *j);
-void job_dump(Job *j, FILE*f, const char *prefix);
+void job_dump(Job *j, FILE *f, const char *prefix);
int job_serialize(Job *j, FILE *f);
int job_deserialize(Job *j, FILE *f);
int job_coldplug(Job *j);
@@ -223,6 +222,8 @@ void job_add_to_gc_queue(Job *j);
int job_get_before(Job *j, Job*** ret);
int job_get_after(Job *j, Job*** ret);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Job*, job_free);
+
const char* job_type_to_string(JobType t) _const_;
JobType job_type_from_string(const char *s) _pure_;
diff --git a/src/core/kill.c b/src/core/kill.c
index 929eebfe37..6fe96cfc07 100644
--- a/src/core/kill.c
+++ b/src/core/kill.c
@@ -9,8 +9,10 @@ void kill_context_init(KillContext *c) {
assert(c);
c->kill_signal = SIGTERM;
+ c->final_kill_signal = SIGKILL;
c->send_sigkill = true;
c->send_sighup = false;
+ c->watchdog_signal = SIGABRT;
}
void kill_context_dump(KillContext *c, FILE *f, const char *prefix) {
@@ -21,10 +23,12 @@ void kill_context_dump(KillContext *c, FILE *f, const char *prefix) {
fprintf(f,
"%sKillMode: %s\n"
"%sKillSignal: SIG%s\n"
+ "%sFinalKillSignal: SIG%s\n"
"%sSendSIGKILL: %s\n"
"%sSendSIGHUP: %s\n",
prefix, kill_mode_to_string(c->kill_mode),
prefix, signal_to_string(c->kill_signal),
+ prefix, signal_to_string(c->final_kill_signal),
prefix, yes_no(c->send_sigkill),
prefix, yes_no(c->send_sighup));
}
diff --git a/src/core/kill.h b/src/core/kill.h
index 2d6aa943a6..f3915be1dc 100644
--- a/src/core/kill.h
+++ b/src/core/kill.h
@@ -21,8 +21,10 @@ typedef enum KillMode {
struct KillContext {
KillMode kill_mode;
int kill_signal;
+ int final_kill_signal;
bool send_sigkill;
bool send_sighup;
+ int watchdog_signal;
};
typedef enum KillWho {
diff --git a/src/core/killall.c b/src/core/killall.c
index 87d207fd3d..f0ce996556 100644
--- a/src/core/killall.c
+++ b/src/core/killall.c
@@ -23,16 +23,20 @@
static bool ignore_proc(pid_t pid, bool warn_rootfs) {
_cleanup_fclose_ FILE *f = NULL;
- char c;
const char *p;
- size_t count;
+ char c = 0;
uid_t uid;
int r;
/* We are PID 1, let's not commit suicide */
- if (pid == 1)
+ if (pid <= 1)
return true;
+ /* Ignore kernel threads */
+ r = is_kernel_thread(pid);
+ if (r != 0)
+ return true; /* also ignore processes where we can't determine this */
+
r = get_process_uid(pid, &uid);
if (r < 0)
return true; /* not really, but better safe than sorry */
@@ -46,11 +50,10 @@ static bool ignore_proc(pid_t pid, bool warn_rootfs) {
if (!f)
return true; /* not really, but has the desired effect */
- count = fread(&c, 1, 1, f);
-
- /* Kernel threads have an empty cmdline */
- if (count <= 0)
- return true;
+ /* Try to read the first character of the command line. If the cmdline is empty (which might be the case for
+ * kernel threads but potentially also other stuff), this line won't do anything, but we don't care much, as
+ * actual kernel threads are already filtered out above. */
+ (void) fread(&c, 1, 1, f);
/* Processes with argv[0][0] = '@' we ignore from the killing spree.
*
@@ -63,7 +66,7 @@ static bool ignore_proc(pid_t pid, bool warn_rootfs) {
_cleanup_free_ char *comm = NULL;
- get_process_comm(pid, &comm);
+ (void) get_process_comm(pid, &comm);
log_notice("Process " PID_FMT " (%s) has been marked to be excluded from killing. It is "
"running from the root file system, and thus likely to block re-mounting of the "
diff --git a/src/core/kmod-setup.c b/src/core/kmod-setup.c
index 9251929558..a91cfebc67 100644
--- a/src/core/kmod-setup.c
+++ b/src/core/kmod-setup.c
@@ -76,13 +76,15 @@ int kmod_setup(void) {
bool warn_if_module:1;
bool (*condition_fn)(void);
} kmod_table[] = {
- /* auto-loading on use doesn't work before udev is up */
+ /* This one we need to load explicitly, since auto-loading on use doesn't work
+ * before udev created the ghost device nodes, and we need it earlier than that. */
{ "autofs4", "/sys/class/misc/autofs", true, false, NULL },
- /* early configure of ::1 on the loopback device */
+ /* This one we need to load explicitly, since auto-loading of IPv6 is not done when
+ * we try to configure ::1 on the loopback device. */
{ "ipv6", "/sys/module/ipv6", false, true, NULL },
- /* this should never be a module */
+ /* This should never be a module */
{ "unix", "/proc/net/unix", true, true, NULL },
#if HAVE_LIBIPTC
@@ -93,15 +95,12 @@ int kmod_setup(void) {
{ "virtio_rng", NULL, false, false, has_virtio_rng },
};
_cleanup_(kmod_unrefp) struct kmod_ctx *ctx = NULL;
- unsigned int i;
- int r;
+ unsigned i;
if (have_effective_cap(CAP_SYS_MODULE) == 0)
return 0;
for (i = 0; i < ELEMENTSOF(kmod_table); i++) {
- _cleanup_(kmod_module_unrefp) struct kmod_module *mod = NULL;
-
if (kmod_table[i].path && access(kmod_table[i].path, F_OK) >= 0)
continue;
@@ -122,23 +121,7 @@ int kmod_setup(void) {
kmod_load_resources(ctx);
}
- r = kmod_module_new_from_name(ctx, kmod_table[i].module, &mod);
- if (r < 0) {
- log_error("Failed to lookup module '%s'", kmod_table[i].module);
- continue;
- }
-
- r = kmod_module_probe_insert_module(mod, KMOD_PROBE_APPLY_BLACKLIST, NULL, NULL, NULL, NULL);
- if (r == 0)
- log_debug("Inserted module '%s'", kmod_module_get_name(mod));
- else if (r == KMOD_PROBE_APPLY_BLACKLIST)
- log_info("Module '%s' is blacklisted", kmod_module_get_name(mod));
- else {
- bool print_warning = kmod_table[i].warn_if_unavailable || (r < 0 && r != -ENOENT);
-
- log_full_errno(print_warning ? LOG_WARNING : LOG_DEBUG, r,
- "Failed to insert module '%s': %m", kmod_module_get_name(mod));
- }
+ (void) module_load_and_warn(ctx, kmod_table[i].module, kmod_table[i].warn_if_unavailable);
}
#endif
diff --git a/src/core/load-dropin.c b/src/core/load-dropin.c
index 4b422cc54e..a50b200f5b 100644
--- a/src/core/load-dropin.c
+++ b/src/core/load-dropin.c
@@ -92,7 +92,7 @@ static int process_deps(Unit *u, UnitDependency dependency, const char *dir_suff
log_unit_warning(u, "%s dependency dropin %s target %s has different name",
unit_dependency_to_string(dependency), *p, target);
- r = unit_add_dependency_by_name(u, dependency, entry, *p, true, UNIT_DEPENDENCY_FILE);
+ r = unit_add_dependency_by_name(u, dependency, entry, true, UNIT_DEPENDENCY_FILE);
if (r < 0)
log_unit_warning_errno(u, r, "Cannot add %s dependency on %s, ignoring: %m",
unit_dependency_to_string(dependency), entry);
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index 15fb47838c..cdbc67f885 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -57,6 +57,8 @@ $1.SyslogFacility, config_parse_log_facility, 0,
$1.SyslogLevel, config_parse_log_level, 0, offsetof($1, exec_context.syslog_priority)
$1.SyslogLevelPrefix, config_parse_bool, 0, offsetof($1, exec_context.syslog_level_prefix)
$1.LogLevelMax, config_parse_log_level, 0, offsetof($1, exec_context.log_level_max)
+$1.LogRateLimitIntervalSec, config_parse_sec, 0, offsetof($1, exec_context.log_rate_limit_interval_usec)
+$1.LogRateLimitBurst, config_parse_unsigned, 0, offsetof($1, exec_context.log_rate_limit_burst)
$1.LogExtraFields, config_parse_log_extra_fields, 0, offsetof($1, exec_context)
$1.Capabilities, config_parse_warn_compat, DISABLED_LEGACY, offsetof($1, exec_context)
$1.SecureBits, config_parse_exec_secure_bits, 0, offsetof($1, exec_context.secure_bits)
@@ -151,7 +153,9 @@ m4_define(`KILL_CONTEXT_CONFIG_ITEMS',
`$1.SendSIGKILL, config_parse_bool, 0, offsetof($1, kill_context.send_sigkill)
$1.SendSIGHUP, config_parse_bool, 0, offsetof($1, kill_context.send_sighup)
$1.KillMode, config_parse_kill_mode, 0, offsetof($1, kill_context.kill_mode)
-$1.KillSignal, config_parse_signal, 0, offsetof($1, kill_context.kill_signal)'
+$1.KillSignal, config_parse_signal, 0, offsetof($1, kill_context.kill_signal)
+$1.FinalKillSignal, config_parse_signal, 0, offsetof($1, kill_context.final_kill_signal)
+$1.WatchdogSignal, config_parse_signal, 0, offsetof($1, kill_context.watchdog_signal)'
)m4_dnl
m4_define(`CGROUP_CONTEXT_CONFIG_ITEMS',
`$1.Slice, config_parse_unit_slice, 0, 0
@@ -162,6 +166,7 @@ $1.CPUShares, config_parse_cpu_shares, 0,
$1.StartupCPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.startup_cpu_shares)
$1.CPUQuota, config_parse_cpu_quota, 0, offsetof($1, cgroup_context)
$1.MemoryAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.memory_accounting)
+$1.MemoryMin, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
$1.MemoryLow, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
$1.MemoryHigh, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
$1.MemoryMax, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
@@ -177,6 +182,7 @@ $1.IOReadBandwidthMax, config_parse_io_limit, 0,
$1.IOWriteBandwidthMax, config_parse_io_limit, 0, offsetof($1, cgroup_context)
$1.IOReadIOPSMax, config_parse_io_limit, 0, offsetof($1, cgroup_context)
$1.IOWriteIOPSMax, config_parse_io_limit, 0, offsetof($1, cgroup_context)
+$1.IODeviceLatencyTargetSec, config_parse_io_device_latency, 0, offsetof($1, cgroup_context)
$1.BlockIOAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.blockio_accounting)
$1.BlockIOWeight, config_parse_blockio_weight, 0, offsetof($1, cgroup_context.blockio_weight)
$1.StartupBlockIOWeight, config_parse_blockio_weight, 0, offsetof($1, cgroup_context.startup_blockio_weight)
@@ -186,6 +192,7 @@ $1.BlockIOWriteBandwidth, config_parse_blockio_bandwidth, 0,
$1.TasksAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.tasks_accounting)
$1.TasksMax, config_parse_tasks_max, 0, offsetof($1, cgroup_context.tasks_max)
$1.Delegate, config_parse_delegate, 0, offsetof($1, cgroup_context)
+$1.DisableControllers, config_parse_disable_controllers, 0, offsetof($1, cgroup_context)
$1.IPAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.ip_accounting)
$1.IPAddressAllow, config_parse_ip_address_access, 0, offsetof($1, cgroup_context.ip_address_allow)
$1.IPAddressDeny, config_parse_ip_address_access, 0, offsetof($1, cgroup_context.ip_address_deny)
@@ -233,6 +240,8 @@ Unit.StartLimitBurst, config_parse_unsigned, 0,
Unit.StartLimitAction, config_parse_emergency_action, 0, offsetof(Unit, start_limit_action)
Unit.FailureAction, config_parse_emergency_action, 0, offsetof(Unit, failure_action)
Unit.SuccessAction, config_parse_emergency_action, 0, offsetof(Unit, success_action)
+Unit.FailureActionExitStatus, config_parse_exit_status, 0, offsetof(Unit, failure_action_exit_status)
+Unit.SuccessActionExitStatus, config_parse_exit_status, 0, offsetof(Unit, success_action_exit_status)
Unit.RebootArgument, config_parse_unit_string_printf, 0, offsetof(Unit, reboot_arg)
Unit.ConditionPathExists, config_parse_unit_condition_path, CONDITION_PATH_EXISTS, offsetof(Unit, conditions)
Unit.ConditionPathExistsGlob, config_parse_unit_condition_path, CONDITION_PATH_EXISTS_GLOB, offsetof(Unit, conditions)
@@ -282,7 +291,7 @@ Unit.AssertControlGroupController, config_parse_unit_condition_string, CONDI
Unit.AssertNull, config_parse_unit_condition_null, 0, offsetof(Unit, asserts)
Unit.CollectMode, config_parse_collect_mode, 0, offsetof(Unit, collect_mode)
m4_dnl
-Service.PIDFile, config_parse_unit_path_printf, 0, offsetof(Service, pid_file)
+Service.PIDFile, config_parse_pid_file, 0, offsetof(Service, pid_file)
Service.ExecStartPre, config_parse_exec, SERVICE_EXEC_START_PRE, offsetof(Service, exec_command)
Service.ExecStart, config_parse_exec, SERVICE_EXEC_START, offsetof(Service, exec_command)
Service.ExecStartPost, config_parse_exec, SERVICE_EXEC_START_POST, offsetof(Service, exec_command)
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index d9a5094aa0..4ebe92fd45 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -34,21 +34,20 @@
#include "hexdecoct.h"
#include "io-util.h"
#include "ioprio.h"
+#include "ip-protocol-list.h"
#include "journal-util.h"
#include "load-fragment.h"
#include "log.h"
#include "missing.h"
-#include "mount-util.h"
+#include "mountpoint-util.h"
#include "parse-util.h"
#include "path-util.h"
#include "process-util.h"
#if HAVE_SECCOMP
#include "seccomp-util.h"
#endif
-#include "securebits.h"
#include "securebits-util.h"
#include "signal-util.h"
-#include "socket-protocol-list.h"
#include "stat-util.h"
#include "string-util.h"
#include "strv.h"
@@ -57,26 +56,22 @@
#include "user-util.h"
#include "web-util.h"
-static int supported_socket_protocol_from_string(const char *s) {
+static int parse_socket_protocol(const char *s) {
int r;
- if (isempty(s))
- return IPPROTO_IP;
-
- r = socket_protocol_from_name(s);
+ r = parse_ip_protocol(s);
if (r < 0)
- return -EINVAL;
+ return r;
if (!IN_SET(r, IPPROTO_UDPLITE, IPPROTO_SCTP))
return -EPROTONOSUPPORT;
return r;
}
-DEFINE_CONFIG_PARSE(config_parse_socket_protocol, supported_socket_protocol_from_string, "Failed to parse socket protocol");
+DEFINE_CONFIG_PARSE(config_parse_socket_protocol, parse_socket_protocol, "Failed to parse socket protocol");
DEFINE_CONFIG_PARSE(config_parse_exec_secure_bits, secure_bits_from_string, "Failed to parse secure bits");
DEFINE_CONFIG_PARSE_ENUM(config_parse_collect_mode, collect_mode, CollectMode, "Failed to parse garbage collection mode");
DEFINE_CONFIG_PARSE_ENUM(config_parse_device_policy, cgroup_device_policy, CGroupDevicePolicy, "Failed to parse device policy");
-DEFINE_CONFIG_PARSE_ENUM(config_parse_emergency_action, emergency_action, EmergencyAction, "Failed to parse failure action specifier");
DEFINE_CONFIG_PARSE_ENUM(config_parse_exec_keyring_mode, exec_keyring_mode, ExecKeyringMode, "Failed to parse keyring mode");
DEFINE_CONFIG_PARSE_ENUM(config_parse_exec_utmp_mode, exec_utmp_mode, ExecUtmpMode, "Failed to parse utmp mode");
DEFINE_CONFIG_PARSE_ENUM(config_parse_job_mode, job_mode, JobMode, "Failed to parse job mode");
@@ -135,7 +130,7 @@ int config_parse_unit_deps(
continue;
}
- r = unit_add_dependency_by_name(u, d, k, NULL, true, UNIT_DEPENDENCY_FILE);
+ r = unit_add_dependency_by_name(u, d, k, true, UNIT_DEPENDENCY_FILE);
if (r < 0)
log_syntax(unit, LOG_ERR, filename, line, r, "Failed to add dependency on %s, ignoring: %m", k);
}
@@ -1015,6 +1010,17 @@ int config_parse_exec_output(
eo = EXEC_OUTPUT_FILE;
+ } else if ((n = startswith(rvalue, "append:"))) {
+
+ r = unit_full_printf(u, n, &resolved);
+ if (r < 0)
+ return log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s: %m", n);
+
+ r = path_simplify_and_warn(resolved, PATH_CHECK_ABSOLUTE | PATH_CHECK_FATAL, unit, filename, line, lvalue);
+ if (r < 0)
+ return -ENOEXEC;
+
+ eo = EXEC_OUTPUT_FILE_APPEND;
} else {
eo = exec_output_from_string(rvalue);
if (eo < 0) {
@@ -1557,7 +1563,7 @@ int config_parse_trigger_unit(
return 0;
}
- r = unit_add_two_dependencies_by_name(u, UNIT_BEFORE, UNIT_TRIGGERS, p, NULL, true, UNIT_DEPENDENCY_FILE);
+ r = unit_add_two_dependencies_by_name(u, UNIT_BEFORE, UNIT_TRIGGERS, p, true, UNIT_DEPENDENCY_FILE);
if (r < 0) {
log_syntax(unit, LOG_ERR, filename, line, r, "Failed to add trigger on %s, ignoring: %m", p);
return 0;
@@ -1755,11 +1761,11 @@ int config_parse_service_sockets(
continue;
}
- r = unit_add_two_dependencies_by_name(UNIT(s), UNIT_WANTS, UNIT_AFTER, k, NULL, true, UNIT_DEPENDENCY_FILE);
+ r = unit_add_two_dependencies_by_name(UNIT(s), UNIT_WANTS, UNIT_AFTER, k, true, UNIT_DEPENDENCY_FILE);
if (r < 0)
log_syntax(unit, LOG_ERR, filename, line, r, "Failed to add dependency on %s, ignoring: %m", k);
- r = unit_add_dependency_by_name(UNIT(s), UNIT_TRIGGERED_BY, k, NULL, true, UNIT_DEPENDENCY_FILE);
+ r = unit_add_dependency_by_name(UNIT(s), UNIT_TRIGGERED_BY, k, true, UNIT_DEPENDENCY_FILE);
if (r < 0)
log_syntax(unit, LOG_ERR, filename, line, r, "Failed to add dependency on %s, ignoring: %m", k);
}
@@ -2861,8 +2867,8 @@ int config_parse_address_families(
}
af = af_from_name(word);
- if (af <= 0) {
- log_syntax(unit, LOG_ERR, filename, line, 0,
+ if (af < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, af,
"Failed to parse address family, ignoring: %s", word);
continue;
}
@@ -3002,13 +3008,13 @@ int config_parse_cpu_quota(
return 0;
}
- r = parse_percent_unbounded(rvalue);
+ r = parse_permille_unbounded(rvalue);
if (r <= 0) {
log_syntax(unit, LOG_ERR, filename, line, r, "Invalid CPU quota '%s', ignoring.", rvalue);
return 0;
}
- c->cpu_quota_per_sec_usec = ((usec_t) r * USEC_PER_SEC) / 100U;
+ c->cpu_quota_per_sec_usec = ((usec_t) r * USEC_PER_SEC) / 1000U;
return 0;
}
@@ -3030,7 +3036,7 @@ int config_parse_memory_limit(
if (!isempty(rvalue) && !streq(rvalue, "infinity")) {
- r = parse_percent(rvalue);
+ r = parse_permille(rvalue);
if (r < 0) {
r = parse_size(rvalue, 1024, &bytes);
if (r < 0) {
@@ -3038,7 +3044,7 @@ int config_parse_memory_limit(
return 0;
}
} else
- bytes = physical_memory_scale(r, 100U);
+ bytes = physical_memory_scale(r, 1000U);
if (bytes >= UINT64_MAX ||
(bytes <= 0 && !streq(lvalue, "MemorySwapMax"))) {
@@ -3047,7 +3053,9 @@ int config_parse_memory_limit(
}
}
- if (streq(lvalue, "MemoryLow"))
+ if (streq(lvalue, "MemoryMin"))
+ c->memory_min = bytes;
+ else if (streq(lvalue, "MemoryLow"))
c->memory_low = bytes;
else if (streq(lvalue, "MemoryHigh"))
c->memory_high = bytes;
@@ -3080,7 +3088,7 @@ int config_parse_tasks_max(
int r;
if (isempty(rvalue)) {
- *tasks_max = u->manager->default_tasks_max;
+ *tasks_max = u ? u->manager->default_tasks_max : UINT64_MAX;
return 0;
}
@@ -3089,7 +3097,7 @@ int config_parse_tasks_max(
return 0;
}
- r = parse_percent(rvalue);
+ r = parse_permille(rvalue);
if (r < 0) {
r = safe_atou64(rvalue, &v);
if (r < 0) {
@@ -3097,7 +3105,7 @@ int config_parse_tasks_max(
return 0;
}
} else
- v = system_tasks_max_scale(r, 100U);
+ v = system_tasks_max_scale(r, 1000U);
if (v <= 0 || v >= UINT64_MAX) {
log_syntax(unit, LOG_ERR, filename, line, 0, "Maximum tasks value '%s' out of range, ignoring.", rvalue);
@@ -3199,7 +3207,6 @@ int config_parse_device_allow(
_cleanup_free_ char *path = NULL, *resolved = NULL;
CGroupContext *c = data;
- CGroupDeviceAllow *a;
const char *p = rvalue;
int r;
@@ -3231,7 +3238,7 @@ int config_parse_device_allow(
return 0;
}
- if (!startswith(resolved, "block-") && !startswith(resolved, "char-")) {
+ if (!STARTSWITH_SET(resolved, "block-", "char-")) {
r = path_simplify_and_warn(resolved, 0, unit, filename, line, lvalue);
if (r < 0)
@@ -3248,17 +3255,7 @@ int config_parse_device_allow(
return 0;
}
- a = new0(CGroupDeviceAllow, 1);
- if (!a)
- return log_oom();
-
- a->path = TAKE_PTR(resolved);
- a->r = isempty(p) || !!strchr(p, 'r');
- a->w = isempty(p) || !!strchr(p, 'w');
- a->m = isempty(p) || !!strchr(p, 'm');
-
- LIST_PREPEND(device_allow, c->device_allow, a);
- return 0;
+ return cgroup_add_device_allow(c, resolved, p);
}
int config_parse_io_device_weight(
@@ -3335,6 +3332,77 @@ int config_parse_io_device_weight(
return 0;
}
+int config_parse_io_device_latency(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *path = NULL, *resolved = NULL;
+ CGroupIODeviceLatency *l;
+ CGroupContext *c = data;
+ const char *p = rvalue;
+ usec_t usec;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ while (c->io_device_latencies)
+ cgroup_context_free_io_device_latency(c, c->io_device_latencies);
+
+ return 0;
+ }
+
+ r = extract_first_word(&p, &path, NULL, EXTRACT_QUOTES);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0 || isempty(p)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to extract device path and latency from '%s', ignoring.", rvalue);
+ return 0;
+ }
+
+ r = unit_full_printf(userdata, path, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to resolve unit specifiers in '%s', ignoring: %m", path);
+ return 0;
+ }
+
+ r = path_simplify_and_warn(resolved, 0, unit, filename, line, lvalue);
+ if (r < 0)
+ return 0;
+
+ if (parse_sec(p, &usec) < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse timer value, ignoring: %s", p);
+ return 0;
+ }
+
+ l = new0(CGroupIODeviceLatency, 1);
+ if (!l)
+ return log_oom();
+
+ l->path = TAKE_PTR(resolved);
+ l->target_usec = usec;
+
+ LIST_PREPEND(device_latencies, c->io_device_latencies, l);
+ return 0;
+}
+
int config_parse_io_limit(
const char *unit,
const char *filename,
@@ -3904,13 +3972,9 @@ int config_parse_temporary_filesystems(
if (r < 0)
continue;
- r = temporary_filesystem_add(&c->temporary_filesystems, &c->n_temporary_filesystems, path, w);
- if (r == -ENOMEM)
+ r = temporary_filesystem_add(&c->temporary_filesystems, &c->n_temporary_filesystems, resolved, w);
+ if (r < 0)
return log_oom();
- if (r < 0) {
- log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse mount options, ignoring: %s", word);
- continue;
- }
}
}
@@ -4116,6 +4180,183 @@ int config_parse_job_running_timeout_sec(
return 0;
}
+int config_parse_emergency_action(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Manager *m = NULL;
+ EmergencyAction *x = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (unit)
+ m = ((Unit*) userdata)->manager;
+ else
+ m = data;
+
+ r = parse_emergency_action(rvalue, MANAGER_IS_SYSTEM(m), x);
+ if (r < 0) {
+ if (r == -EOPNOTSUPP && MANAGER_IS_USER(m)) {
+ /* Compat mode: remove for systemd 241. */
+
+ log_syntax(unit, LOG_INFO, filename, line, r,
+ "%s= in user mode specified as \"%s\", using \"exit-force\" instead.",
+ lvalue, rvalue);
+ *x = EMERGENCY_ACTION_EXIT_FORCE;
+ return 0;
+ }
+
+ if (r == -EOPNOTSUPP)
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "%s= specified as %s mode action, ignoring: %s",
+ lvalue, MANAGER_IS_SYSTEM(m) ? "user" : "system", rvalue);
+ else
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to parse %s=, ignoring: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ return 0;
+}
+
+int config_parse_pid_file(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *k = NULL, *n = NULL;
+ Unit *u = userdata;
+ char **s = data;
+ const char *e;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ /* If this is a relative path make it absolute by prefixing the /run */
+ n = path_make_absolute(k, u->manager->prefix[EXEC_DIRECTORY_RUNTIME]);
+ if (!n)
+ return log_oom();
+
+ /* Check that the result is a sensible path */
+ r = path_simplify_and_warn(n, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+ if (r < 0)
+ return r;
+
+ e = path_startswith(n, "/var/run/");
+ if (e) {
+ char *z;
+
+ z = strjoin("/run/", e);
+ if (!z)
+ return log_oom();
+
+ log_syntax(unit, LOG_NOTICE, filename, line, 0, "PIDFile= references path below legacy directory /var/run/, updating %s → %s; please update the unit file accordingly.", n, z);
+
+ free_and_replace(*s, z);
+ } else
+ free_and_replace(*s, n);
+
+ return 0;
+}
+
+int config_parse_exit_status(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int *exit_status = data, r;
+ uint8_t u;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(exit_status);
+
+ if (isempty(rvalue)) {
+ *exit_status = -1;
+ return 0;
+ }
+
+ r = safe_atou8(rvalue, &u);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse exit status '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ *exit_status = u;
+ return 0;
+}
+
+int config_parse_disable_controllers(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int r;
+ CGroupContext *c = data;
+ CGroupMask disabled_mask;
+
+ /* 1. If empty, make all controllers eligible for use again.
+ * 2. If non-empty, merge all listed controllers, space separated. */
+
+ if (isempty(rvalue)) {
+ c->disable_controllers = 0;
+ return 0;
+ }
+
+ r = cg_mask_from_string(rvalue, &disabled_mask);
+ if (r < 0 || disabled_mask <= 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid cgroup string: %s, ignoring", rvalue);
+ return 0;
+ }
+
+ c->disable_controllers |= disabled_mask;
+
+ return 0;
+}
+
#define FOLLOW_MAX 8
static int open_follow(char **filename, FILE **_f, Set *names, char **_final) {
@@ -4174,7 +4415,7 @@ static int open_follow(char **filename, FILE **_f, Set *names, char **_final) {
free_and_replace(*filename, target);
}
- f = fdopen(fd, "re");
+ f = fdopen(fd, "r");
if (!f) {
safe_close(fd);
return -errno;
@@ -4290,7 +4531,6 @@ static int load_from_path(Unit *u, const char *path) {
r = open_follow(&filename, &f, symlink_names, &id);
if (r >= 0)
break;
- filename = mfree(filename);
/* ENOENT means that the file is missing or is a dangling symlink.
* ENOTDIR means that one of paths we expect to be is a directory
@@ -4302,6 +4542,7 @@ static int load_from_path(Unit *u, const char *path) {
else if (!IN_SET(r, -ENOENT, -ENOTDIR))
return r;
+ filename = mfree(filename);
/* Empty the symlink names for the next run */
set_clear_free(symlink_names);
}
@@ -4524,6 +4765,7 @@ void unit_dump_config_items(FILE *f) {
{ config_parse_device_policy, "POLICY" },
{ config_parse_io_limit, "LIMIT" },
{ config_parse_io_device_weight, "DEVICEWEIGHT" },
+ { config_parse_io_device_latency, "DEVICELATENCY" },
{ config_parse_blockio_bandwidth, "BANDWIDTH" },
{ config_parse_blockio_weight, "WEIGHT" },
{ config_parse_blockio_device_weight, "DEVICEWEIGHT" },
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
index dad281ef72..e0d3b4ec3b 100644
--- a/src/core/load-fragment.h
+++ b/src/core/load-fragment.h
@@ -39,6 +39,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_exec_cpu_affinity);
CONFIG_PARSER_PROTOTYPE(config_parse_exec_secure_bits);
CONFIG_PARSER_PROTOTYPE(config_parse_capability_set);
CONFIG_PARSER_PROTOTYPE(config_parse_kill_signal);
+CONFIG_PARSER_PROTOTYPE(config_parse_final_kill_signal);
CONFIG_PARSER_PROTOTYPE(config_parse_exec_mount_flags);
CONFIG_PARSER_PROTOTYPE(config_parse_timer);
CONFIG_PARSER_PROTOTYPE(config_parse_trigger_unit);
@@ -68,12 +69,12 @@ CONFIG_PARSER_PROTOTYPE(config_parse_tasks_max);
CONFIG_PARSER_PROTOTYPE(config_parse_delegate);
CONFIG_PARSER_PROTOTYPE(config_parse_device_policy);
CONFIG_PARSER_PROTOTYPE(config_parse_device_allow);
+CONFIG_PARSER_PROTOTYPE(config_parse_io_device_latency);
CONFIG_PARSER_PROTOTYPE(config_parse_io_device_weight);
CONFIG_PARSER_PROTOTYPE(config_parse_io_limit);
CONFIG_PARSER_PROTOTYPE(config_parse_blockio_weight);
CONFIG_PARSER_PROTOTYPE(config_parse_blockio_device_weight);
CONFIG_PARSER_PROTOTYPE(config_parse_blockio_bandwidth);
-CONFIG_PARSER_PROTOTYPE(config_parse_netclass);
CONFIG_PARSER_PROTOTYPE(config_parse_job_mode);
CONFIG_PARSER_PROTOTYPE(config_parse_job_mode_isolate);
CONFIG_PARSER_PROTOTYPE(config_parse_exec_selinux_context);
@@ -102,6 +103,9 @@ CONFIG_PARSER_PROTOTYPE(config_parse_job_timeout_sec);
CONFIG_PARSER_PROTOTYPE(config_parse_job_running_timeout_sec);
CONFIG_PARSER_PROTOTYPE(config_parse_log_extra_fields);
CONFIG_PARSER_PROTOTYPE(config_parse_collect_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_pid_file);
+CONFIG_PARSER_PROTOTYPE(config_parse_exit_status);
+CONFIG_PARSER_PROTOTYPE(config_parse_disable_controllers);
/* gperf prototypes */
const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
diff --git a/src/core/locale-setup.c b/src/core/locale-setup.c
index c14523fee9..584fb220a1 100644
--- a/src/core/locale-setup.c
+++ b/src/core/locale-setup.c
@@ -4,46 +4,43 @@
#include <stdlib.h>
#include <string.h>
+#include "env-file.h"
#include "env-util.h"
-#include "fileio.h"
#include "locale-setup.h"
#include "locale-util.h"
+#include "proc-cmdline.h"
#include "string-util.h"
#include "strv.h"
#include "util.h"
#include "virt.h"
int locale_setup(char ***environment) {
- char **add;
- char *variables[_VARIABLE_LC_MAX] = {};
- int r = 0, i;
-
- if (detect_container() <= 0) {
- r = parse_env_file(NULL, "/proc/cmdline", WHITESPACE,
- "locale.LANG", &variables[VARIABLE_LANG],
- "locale.LANGUAGE", &variables[VARIABLE_LANGUAGE],
- "locale.LC_CTYPE", &variables[VARIABLE_LC_CTYPE],
- "locale.LC_NUMERIC", &variables[VARIABLE_LC_NUMERIC],
- "locale.LC_TIME", &variables[VARIABLE_LC_TIME],
- "locale.LC_COLLATE", &variables[VARIABLE_LC_COLLATE],
- "locale.LC_MONETARY", &variables[VARIABLE_LC_MONETARY],
- "locale.LC_MESSAGES", &variables[VARIABLE_LC_MESSAGES],
- "locale.LC_PAPER", &variables[VARIABLE_LC_PAPER],
- "locale.LC_NAME", &variables[VARIABLE_LC_NAME],
- "locale.LC_ADDRESS", &variables[VARIABLE_LC_ADDRESS],
- "locale.LC_TELEPHONE", &variables[VARIABLE_LC_TELEPHONE],
- "locale.LC_MEASUREMENT", &variables[VARIABLE_LC_MEASUREMENT],
- "locale.LC_IDENTIFICATION", &variables[VARIABLE_LC_IDENTIFICATION],
- NULL);
-
- if (r < 0 && r != -ENOENT)
- log_warning_errno(r, "Failed to read /proc/cmdline: %m");
- }
-
- /* Hmm, nothing set on the kernel cmd line? Then let's
- * try /etc/locale.conf */
+ _cleanup_(locale_variables_freep) char *variables[_VARIABLE_LC_MAX] = {};
+ _cleanup_strv_free_ char **add = NULL;
+ LocaleVariable i;
+ int r;
+
+ r = proc_cmdline_get_key_many(PROC_CMDLINE_STRIP_RD_PREFIX,
+ "locale.LANG", &variables[VARIABLE_LANG],
+ "locale.LANGUAGE", &variables[VARIABLE_LANGUAGE],
+ "locale.LC_CTYPE", &variables[VARIABLE_LC_CTYPE],
+ "locale.LC_NUMERIC", &variables[VARIABLE_LC_NUMERIC],
+ "locale.LC_TIME", &variables[VARIABLE_LC_TIME],
+ "locale.LC_COLLATE", &variables[VARIABLE_LC_COLLATE],
+ "locale.LC_MONETARY", &variables[VARIABLE_LC_MONETARY],
+ "locale.LC_MESSAGES", &variables[VARIABLE_LC_MESSAGES],
+ "locale.LC_PAPER", &variables[VARIABLE_LC_PAPER],
+ "locale.LC_NAME", &variables[VARIABLE_LC_NAME],
+ "locale.LC_ADDRESS", &variables[VARIABLE_LC_ADDRESS],
+ "locale.LC_TELEPHONE", &variables[VARIABLE_LC_TELEPHONE],
+ "locale.LC_MEASUREMENT", &variables[VARIABLE_LC_MEASUREMENT],
+ "locale.LC_IDENTIFICATION", &variables[VARIABLE_LC_IDENTIFICATION]);
+ if (r < 0 && r != -ENOENT)
+ log_warning_errno(r, "Failed to read /proc/cmdline: %m");
+
+ /* Hmm, nothing set on the kernel cmd line? Then let's try /etc/locale.conf */
if (r <= 0) {
- r = parse_env_file(NULL, "/etc/locale.conf", NEWLINE,
+ r = parse_env_file(NULL, "/etc/locale.conf",
"LANG", &variables[VARIABLE_LANG],
"LANGUAGE", &variables[VARIABLE_LANGUAGE],
"LC_CTYPE", &variables[VARIABLE_LC_CTYPE],
@@ -57,14 +54,11 @@ int locale_setup(char ***environment) {
"LC_ADDRESS", &variables[VARIABLE_LC_ADDRESS],
"LC_TELEPHONE", &variables[VARIABLE_LC_TELEPHONE],
"LC_MEASUREMENT", &variables[VARIABLE_LC_MEASUREMENT],
- "LC_IDENTIFICATION", &variables[VARIABLE_LC_IDENTIFICATION],
- NULL);
-
+ "LC_IDENTIFICATION", &variables[VARIABLE_LC_IDENTIFICATION]);
if (r < 0 && r != -ENOENT)
log_warning_errno(r, "Failed to read /etc/locale.conf: %m");
}
- add = NULL;
for (i = 0; i < _VARIABLE_LC_MAX; i++) {
char *s;
@@ -72,36 +66,32 @@ int locale_setup(char ***environment) {
continue;
s = strjoin(locale_variable_to_string(i), "=", variables[i]);
- if (!s) {
- r = -ENOMEM;
- goto finish;
- }
+ if (!s)
+ return -ENOMEM;
- if (strv_consume(&add, s) < 0) {
- r = -ENOMEM;
- goto finish;
- }
+ if (strv_consume(&add, s) < 0)
+ return -ENOMEM;
}
- if (!strv_isempty(add)) {
- char **e;
+ if (strv_isempty(add)) {
+ /* If no locale is configured then default to C.UTF-8. */
- e = strv_env_merge(2, *environment, add);
- if (!e) {
- r = -ENOMEM;
- goto finish;
- }
-
- strv_free_and_replace(*environment, e);
+ add = strv_new("LANG=C.UTF-8");
+ if (!add)
+ return -ENOMEM;
}
- r = 0;
+ if (strv_isempty(*environment))
+ strv_free_and_replace(*environment, add);
+ else {
+ char **merged;
-finish:
- strv_free(add);
+ merged = strv_env_merge(2, *environment, add);
+ if (!merged)
+ return -ENOMEM;
- for (i = 0; i < _VARIABLE_LC_MAX; i++)
- free(variables[i]);
+ strv_free_and_replace(*environment, merged);
+ }
- return r;
+ return 0;
}
diff --git a/src/core/loopback-setup.c b/src/core/loopback-setup.c
index 835553ec8f..f613db83ce 100644
--- a/src/core/loopback-setup.c
+++ b/src/core/loopback-setup.c
@@ -53,7 +53,7 @@ static int start_loopback(sd_netlink *rtnl, struct state *s) {
if (r < 0)
return r;
- r = sd_netlink_call_async(rtnl, req, generic_handler, s, LOOPBACK_SETUP_TIMEOUT_USEC, NULL);
+ r = sd_netlink_call_async(rtnl, NULL, req, generic_handler, NULL, s, LOOPBACK_SETUP_TIMEOUT_USEC, "systemd-start-loopback");
if (r < 0)
return r;
@@ -88,7 +88,7 @@ static int add_ipv4_address(sd_netlink *rtnl, struct state *s) {
if (r < 0)
return r;
- r = sd_netlink_call_async(rtnl, req, generic_handler, s, USEC_INFINITY, NULL);
+ r = sd_netlink_call_async(rtnl, NULL, req, generic_handler, NULL, s, USEC_INFINITY, "systemd-loopback-ipv4");
if (r < 0)
return r;
@@ -123,7 +123,7 @@ static int add_ipv6_address(sd_netlink *rtnl, struct state *s) {
if (r < 0)
return r;
- r = sd_netlink_call_async(rtnl, req, generic_handler, s, USEC_INFINITY, NULL);
+ r = sd_netlink_call_async(rtnl, NULL, req, generic_handler, NULL, s, USEC_INFINITY, "systemd-loopback-ipv6");
if (r < 0)
return r;
diff --git a/src/core/machine-id-setup.c b/src/core/machine-id-setup.c
index 11528f83c4..aae548064e 100644
--- a/src/core/machine-id-setup.c
+++ b/src/core/machine-id-setup.c
@@ -15,7 +15,7 @@
#include "machine-id-setup.h"
#include "macro.h"
#include "mkdir.h"
-#include "mount-util.h"
+#include "mountpoint-util.h"
#include "path-util.h"
#include "process-util.h"
#include "stat-util.h"
@@ -73,7 +73,7 @@ static int generate_machine_id(const char *root, sd_id128_t *ret) {
/* If that didn't work, generate a random machine id */
r = sd_id128_randomize(ret);
if (r < 0)
- return log_error_errno(r, "Failed to generate randomized : %m");
+ return log_error_errno(r, "Failed to generate randomized machine ID: %m");
log_info("Initializing machine ID from random generator.");
return 0;
@@ -108,8 +108,7 @@ int machine_id_setup(const char *root, sd_id128_t machine_id, sd_id128_t *ret) {
"2) /etc/machine-id exists and is empty.\n"
"3) /etc/machine-id is missing and /etc is writable.\n");
else
- return log_error_errno(errno,
- "Cannot open %s: %m", etc_machine_id);
+ return log_error_errno(errno, "Cannot open %s: %m", etc_machine_id);
}
writable = false;
@@ -201,14 +200,14 @@ int machine_id_commit(const char *root) {
r = fd_is_temporary_fs(fd);
if (r < 0)
return log_error_errno(r, "Failed to determine whether %s is on a temporary file system: %m", etc_machine_id);
- if (r == 0) {
- log_error("%s is not on a temporary file system.", etc_machine_id);
- return -EROFS;
- }
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EROFS),
+ "%s is not on a temporary file system.",
+ etc_machine_id);
r = id128_read_fd(fd, ID128_PLAIN, &id);
if (r < 0)
- return log_error_errno(r, "We didn't find a valid machine ID in %s.", etc_machine_id);
+ return log_error_errno(r, "We didn't find a valid machine ID in %s: %m", etc_machine_id);
fd = safe_close(fd);
diff --git a/src/core/macros.systemd.in b/src/core/macros.systemd.in
index f3b74f4273..9ccad5ebfe 100644
--- a/src/core/macros.systemd.in
+++ b/src/core/macros.systemd.in
@@ -2,8 +2,6 @@
# SPDX-License-Identifier: LGPL-2.1+
#
# This file is part of systemd.
-#
-# Copyright 2012 Lennart Poettering
# RPM macros for packages installing systemd unit files
@@ -18,7 +16,7 @@
%_sysctldir @sysctldir@
%_sysusersdir @sysusersdir@
%_tmpfilesdir @tmpfilesdir@
-%_environmnentdir @environmentdir@
+%_environmentdir @environmentdir@
%_modulesloaddir @modulesloaddir@
%_modprobedir @modprobedir@
%_systemdgeneratordir @systemgeneratordir@
@@ -26,6 +24,10 @@
%_systemd_system_env_generator_dir @systemenvgeneratordir@
%_systemd_user_env_generator_dir @userenvgeneratordir@
+# Because we had one release with a typo...
+# This is temporary (Remove after systemd 240 is released)
+%_environmnentdir %{warn:Use %%_environmentdir instead}%_environmentdir
+
%systemd_requires \
Requires(post): systemd \
Requires(preun): systemd \
diff --git a/src/core/main.c b/src/core/main.c
index 44dd8348be..839dc062ff 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -28,6 +28,7 @@
#include "bus-error.h"
#include "bus-util.h"
#include "capability-util.h"
+#include "cgroup-util.h"
#include "clock-util.h"
#include "conf-parser.h"
#include "cpu-set-util.h"
@@ -57,6 +58,7 @@
#include "pager.h"
#include "parse-util.h"
#include "path-util.h"
+#include "pretty-print.h"
#include "proc-cmdline.h"
#include "process-util.h"
#include "raw-clone.h"
@@ -73,6 +75,7 @@
#include "stdio-util.h"
#include "strv.h"
#include "switch-root.h"
+#include "sysctl-util.h"
#include "terminal-util.h"
#include "umask-util.h"
#include "user-util.h"
@@ -95,11 +98,10 @@ static int arg_crash_chvt = -1;
static bool arg_crash_shell = false;
static bool arg_crash_reboot = false;
static char *arg_confirm_spawn = NULL;
-static ShowStatus arg_show_status = _SHOW_STATUS_UNSET;
+static ShowStatus arg_show_status = _SHOW_STATUS_INVALID;
static bool arg_switched_root = false;
-static bool arg_no_pager = false;
+static PagerFlags arg_pager_flags = 0;
static bool arg_service_watchdogs = true;
-static char ***arg_join_controllers = NULL;
static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL;
static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
static usec_t arg_default_restart_usec = DEFAULT_RESTART_USEC;
@@ -109,6 +111,7 @@ static usec_t arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL;
static unsigned arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST;
static usec_t arg_runtime_watchdog = 0;
static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE;
+static char *arg_early_core_pattern = NULL;
static char *arg_watchdog_device = NULL;
static char **arg_default_environment = NULL;
static struct rlimit *arg_default_rlimit[_RLIMIT_MAX] = {};
@@ -118,7 +121,7 @@ static nsec_t arg_timer_slack_nsec = NSEC_INFINITY;
static usec_t arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE;
static Set* arg_syscall_archs = NULL;
static FILE* arg_serialization = NULL;
-static bool arg_default_cpu_accounting = false;
+static int arg_default_cpu_accounting = -1;
static bool arg_default_io_accounting = false;
static bool arg_default_ip_accounting = false;
static bool arg_default_blockio_accounting = false;
@@ -128,7 +131,14 @@ static uint64_t arg_default_tasks_max = UINT64_MAX;
static sd_id128_t arg_machine_id = {};
static EmergencyAction arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE;
-_noreturn_ static void freeze_or_reboot(void) {
+_noreturn_ static void freeze_or_exit_or_reboot(void) {
+
+ /* If we are running in a contianer, let's prefer exiting, after all we can propagate an exit code to the
+ * container manager, and thus inform it that something went wrong. */
+ if (detect_container() > 0) {
+ log_emergency("Exiting PID 1...");
+ exit(EXIT_EXCEPTION);
+ }
if (arg_crash_reboot) {
log_notice("Rebooting in 10s...");
@@ -183,7 +193,7 @@ _noreturn_ static void crash(int sig) {
(void) kill(pid, sig); /* raise() would kill the parent */
assert_not_reached("We shouldn't be here...");
- _exit(EXIT_FAILURE);
+ _exit(EXIT_EXCEPTION);
} else {
siginfo_t status;
int r;
@@ -226,17 +236,18 @@ _noreturn_ static void crash(int sig) {
else if (pid == 0) {
(void) setsid();
(void) make_console_stdio();
+ (void) rlimit_nofile_safe();
(void) execle("/bin/sh", "/bin/sh", NULL, environ);
log_emergency_errno(errno, "execle() failed: %m");
- _exit(EXIT_FAILURE);
+ _exit(EXIT_EXCEPTION);
} else {
log_info("Spawned crash shell as PID "PID_FMT".", pid);
(void) wait_for_terminate(pid, NULL);
}
}
- freeze_or_reboot();
+ freeze_or_exit_or_reboot();
}
static void install_crash_handler(void) {
@@ -347,22 +358,35 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
r = value ? parse_boolean(value) : true;
if (r < 0)
- log_warning("Failed to parse dump core switch %s. Ignoring.", value);
+ log_warning_errno(r, "Failed to parse dump core switch %s, ignoring: %m", value);
else
arg_dump_core = r;
+ } else if (proc_cmdline_key_streq(key, "systemd.early_core_pattern")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (path_is_absolute(value))
+ (void) parse_path_argument_and_warn(value, false, &arg_early_core_pattern);
+ else
+ log_warning("Specified core pattern '%s' is not an absolute path, ignoring.", value);
+
} else if (proc_cmdline_key_streq(key, "systemd.crash_chvt")) {
if (!value)
arg_crash_chvt = 0; /* turn on */
- else if (parse_crash_chvt(value) < 0)
- log_warning("Failed to parse crash chvt switch %s. Ignoring.", value);
+ else {
+ r = parse_crash_chvt(value);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse crash chvt switch %s, ignoring: %m", value);
+ }
} else if (proc_cmdline_key_streq(key, "systemd.crash_shell")) {
r = value ? parse_boolean(value) : true;
if (r < 0)
- log_warning("Failed to parse crash shell switch %s. Ignoring.", value);
+ log_warning_errno(r, "Failed to parse crash shell switch %s, ignoring: %m", value);
else
arg_crash_shell = r;
@@ -370,7 +394,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
r = value ? parse_boolean(value) : true;
if (r < 0)
- log_warning("Failed to parse crash reboot switch %s. Ignoring.", value);
+ log_warning_errno(r, "Failed to parse crash reboot switch %s, ignoring: %m", value);
else
arg_crash_reboot = r;
@@ -379,17 +403,15 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
r = parse_confirm_spawn(value, &s);
if (r < 0)
- log_warning_errno(r, "Failed to parse confirm_spawn switch %s. Ignoring.", value);
- else {
- free(arg_confirm_spawn);
- arg_confirm_spawn = s;
- }
+ log_warning_errno(r, "Failed to parse confirm_spawn switch %s, ignoring: %m", value);
+ else
+ free_and_replace(arg_confirm_spawn, s);
} else if (proc_cmdline_key_streq(key, "systemd.service_watchdogs")) {
r = value ? parse_boolean(value) : true;
if (r < 0)
- log_warning("Failed to parse service watchdog switch %s. Ignoring.", value);
+ log_warning_errno(r, "Failed to parse service watchdog switch %s, ignoring: %m", value);
else
arg_service_watchdogs = r;
@@ -398,7 +420,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
if (value) {
r = parse_show_status(value, &arg_show_status);
if (r < 0)
- log_warning("Failed to parse show status switch %s. Ignoring.", value);
+ log_warning_errno(r, "Failed to parse show status switch %s, ignoring: %m", value);
} else
arg_show_status = SHOW_STATUS_YES;
@@ -409,7 +431,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
r = exec_output_from_string(value);
if (r < 0)
- log_warning("Failed to parse default standard output switch %s. Ignoring.", value);
+ log_warning_errno(r, "Failed to parse default standard output switch %s, ignoring: %m", value);
else
arg_default_std_output = r;
@@ -420,7 +442,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
r = exec_output_from_string(value);
if (r < 0)
- log_warning("Failed to parse default standard error switch %s. Ignoring.", value);
+ log_warning_errno(r, "Failed to parse default standard error switch %s, ignoring: %m", value);
else
arg_default_std_error = r;
@@ -447,7 +469,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
r = set_machine_id(value);
if (r < 0)
- log_warning("MachineID '%s' is not valid. Ignoring.", value);
+ log_warning_errno(r, "MachineID '%s' is not valid, ignoring: %m", value);
} else if (proc_cmdline_key_streq(key, "systemd.default_timeout_start_sec")) {
@@ -456,7 +478,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
r = parse_sec(value, &arg_default_timeout_start_usec);
if (r < 0)
- log_warning_errno(r, "Failed to parse default start timeout: %s, ignoring.", value);
+ log_warning_errno(r, "Failed to parse default start timeout '%s', ignoring: %m", value);
if (arg_default_timeout_start_usec <= 0)
arg_default_timeout_start_usec = USEC_INFINITY;
@@ -466,11 +488,11 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
if (proc_cmdline_value_missing(key, value))
return 0;
- parse_path_argument_and_warn(value, false, &arg_watchdog_device);
+ (void) parse_path_argument_and_warn(value, false, &arg_watchdog_device);
} else if (streq(key, "quiet") && !value) {
- if (arg_show_status == _SHOW_STATUS_UNSET)
+ if (arg_show_status == _SHOW_STATUS_INVALID)
arg_show_status = SHOW_STATUS_AUTO;
} else if (streq(key, "debug") && !value) {
@@ -604,8 +626,8 @@ static int config_parse_output_restricted(
return 0;
}
- if (IN_SET(t, EXEC_OUTPUT_SOCKET, EXEC_OUTPUT_NAMED_FD, EXEC_OUTPUT_FILE)) {
- log_syntax(unit, LOG_ERR, filename, line, 0, "Standard output types socket, fd:, file: are not supported as defaults, ignoring: %s", rvalue);
+ if (IN_SET(t, EXEC_OUTPUT_SOCKET, EXEC_OUTPUT_NAMED_FD, EXEC_OUTPUT_FILE, EXEC_OUTPUT_FILE_APPEND)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Standard output types socket, fd:, file:, append: are not supported as defaults, ignoring: %s", rvalue);
return 0;
}
@@ -654,7 +676,7 @@ static int parse_config_file(void) {
{ "Manager", "CrashReboot", config_parse_bool, 0, &arg_crash_reboot },
{ "Manager", "ShowStatus", config_parse_show_status, 0, &arg_show_status },
{ "Manager", "CPUAffinity", config_parse_cpu_affinity2, 0, NULL },
- { "Manager", "JoinControllers", config_parse_join_controllers, 0, &arg_join_controllers },
+ { "Manager", "JoinControllers", config_parse_warn_compat, DISABLED_CONFIGURATION, NULL },
{ "Manager", "RuntimeWatchdogSec", config_parse_sec, 0, &arg_runtime_watchdog },
{ "Manager", "ShutdownWatchdogSec", config_parse_sec, 0, &arg_shutdown_watchdog },
{ "Manager", "WatchdogDevice", config_parse_path, 0, &arg_watchdog_device },
@@ -690,7 +712,7 @@ static int parse_config_file(void) {
{ "Manager", "DefaultLimitNICE", config_parse_rlimit, RLIMIT_NICE, arg_default_rlimit },
{ "Manager", "DefaultLimitRTPRIO", config_parse_rlimit, RLIMIT_RTPRIO, arg_default_rlimit },
{ "Manager", "DefaultLimitRTTIME", config_parse_rlimit, RLIMIT_RTTIME, arg_default_rlimit },
- { "Manager", "DefaultCPUAccounting", config_parse_bool, 0, &arg_default_cpu_accounting },
+ { "Manager", "DefaultCPUAccounting", config_parse_tristate, 0, &arg_default_cpu_accounting },
{ "Manager", "DefaultIOAccounting", config_parse_bool, 0, &arg_default_io_accounting },
{ "Manager", "DefaultIPAccounting", config_parse_bool, 0, &arg_default_ip_accounting },
{ "Manager", "DefaultBlockIOAccounting", config_parse_bool, 0, &arg_default_blockio_accounting },
@@ -739,7 +761,14 @@ static void set_manager_defaults(Manager *m) {
m->default_restart_usec = arg_default_restart_usec;
m->default_start_limit_interval = arg_default_start_limit_interval;
m->default_start_limit_burst = arg_default_start_limit_burst;
- m->default_cpu_accounting = arg_default_cpu_accounting;
+
+ /* On 4.15+ with unified hierarchy, CPU accounting is essentially free as it doesn't require the CPU
+ * controller to be enabled, so the default is to enable it unless we got told otherwise. */
+ if (arg_default_cpu_accounting >= 0)
+ m->default_cpu_accounting = arg_default_cpu_accounting;
+ else
+ m->default_cpu_accounting = cpu_accounting_is_cheap();
+
m->default_io_accounting = arg_default_io_accounting;
m->default_ip_accounting = arg_default_ip_accounting;
m->default_blockio_accounting = arg_default_blockio_accounting;
@@ -747,8 +776,10 @@ static void set_manager_defaults(Manager *m) {
m->default_tasks_accounting = arg_default_tasks_accounting;
m->default_tasks_max = arg_default_tasks_max;
- manager_set_default_rlimits(m, arg_default_rlimit);
- manager_environment_add(m, NULL, arg_default_environment);
+ (void) manager_set_default_rlimits(m, arg_default_rlimit);
+
+ (void) manager_default_environment(m);
+ (void) manager_transient_environment_add(m, arg_default_environment);
}
static void set_manager_settings(Manager *m) {
@@ -838,19 +869,15 @@ static int parse_argv(int argc, char *argv[]) {
case ARG_LOG_LEVEL:
r = log_set_max_level_from_string(optarg);
- if (r < 0) {
- log_error("Failed to parse log level %s.", optarg);
- return r;
- }
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse log level \"%s\": %m", optarg);
break;
case ARG_LOG_TARGET:
r = log_set_target_from_string(optarg);
- if (r < 0) {
- log_error("Failed to parse log target %s.", optarg);
- return r;
- }
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse log target \"%s\": %m", optarg);
break;
@@ -858,10 +885,9 @@ static int parse_argv(int argc, char *argv[]) {
if (optarg) {
r = log_show_color_from_string(optarg);
- if (r < 0) {
- log_error("Failed to parse log color setting %s.", optarg);
- return r;
- }
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse log color setting \"%s\": %m",
+ optarg);
} else
log_show_color(true);
@@ -870,10 +896,9 @@ static int parse_argv(int argc, char *argv[]) {
case ARG_LOG_LOCATION:
if (optarg) {
r = log_show_location_from_string(optarg);
- if (r < 0) {
- log_error("Failed to parse log location setting %s.", optarg);
- return r;
- }
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse log location setting \"%s\": %m",
+ optarg);
} else
log_show_location(true);
@@ -881,26 +906,24 @@ static int parse_argv(int argc, char *argv[]) {
case ARG_DEFAULT_STD_OUTPUT:
r = exec_output_from_string(optarg);
- if (r < 0) {
- log_error("Failed to parse default standard output setting %s.", optarg);
- return r;
- } else
- arg_default_std_output = r;
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse default standard output setting \"%s\": %m",
+ optarg);
+ arg_default_std_output = r;
break;
case ARG_DEFAULT_STD_ERROR:
r = exec_output_from_string(optarg);
- if (r < 0) {
- log_error("Failed to parse default standard error output setting %s.", optarg);
- return r;
- } else
- arg_default_std_error = r;
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse default standard error output setting \"%s\": %m",
+ optarg);
+ arg_default_std_error = r;
break;
case ARG_UNIT:
r = free_and_strdup(&arg_default_unit, optarg);
if (r < 0)
- return log_error_errno(r, "Failed to set default unit %s: %m", optarg);
+ return log_error_errno(r, "Failed to set default unit \"%s\": %m", optarg);
break;
@@ -917,7 +940,7 @@ static int parse_argv(int argc, char *argv[]) {
break;
case ARG_NO_PAGER:
- arg_no_pager = true;
+ arg_pager_flags |= PAGER_DISABLE;
break;
case ARG_VERSION:
@@ -938,7 +961,8 @@ static int parse_argv(int argc, char *argv[]) {
else {
r = parse_boolean(optarg);
if (r < 0)
- return log_error_errno(r, "Failed to parse dump core boolean: %s", optarg);
+ return log_error_errno(r, "Failed to parse dump core boolean: \"%s\": %m",
+ optarg);
arg_dump_core = r;
}
break;
@@ -946,7 +970,8 @@ static int parse_argv(int argc, char *argv[]) {
case ARG_CRASH_CHVT:
r = parse_crash_chvt(optarg);
if (r < 0)
- return log_error_errno(r, "Failed to parse crash virtual terminal index: %s", optarg);
+ return log_error_errno(r, "Failed to parse crash virtual terminal index: \"%s\": %m",
+ optarg);
break;
case ARG_CRASH_SHELL:
@@ -955,7 +980,8 @@ static int parse_argv(int argc, char *argv[]) {
else {
r = parse_boolean(optarg);
if (r < 0)
- return log_error_errno(r, "Failed to parse crash shell boolean: %s", optarg);
+ return log_error_errno(r, "Failed to parse crash shell boolean: \"%s\": %m",
+ optarg);
arg_crash_shell = r;
}
break;
@@ -966,7 +992,8 @@ static int parse_argv(int argc, char *argv[]) {
else {
r = parse_boolean(optarg);
if (r < 0)
- return log_error_errno(r, "Failed to parse crash shell boolean: %s", optarg);
+ return log_error_errno(r, "Failed to parse crash shell boolean: \"%s\": %m",
+ optarg);
arg_crash_reboot = r;
}
break;
@@ -976,23 +1003,24 @@ static int parse_argv(int argc, char *argv[]) {
r = parse_confirm_spawn(optarg, &arg_confirm_spawn);
if (r < 0)
- return log_error_errno(r, "Failed to parse confirm spawn option: %m");
+ return log_error_errno(r, "Failed to parse confirm spawn option: \"%s\": %m",
+ optarg);
break;
case ARG_SERVICE_WATCHDOGS:
r = parse_boolean(optarg);
if (r < 0)
- return log_error_errno(r, "Failed to parse service watchdogs boolean: %s", optarg);
+ return log_error_errno(r, "Failed to parse service watchdogs boolean: \"%s\": %m",
+ optarg);
arg_service_watchdogs = r;
break;
case ARG_SHOW_STATUS:
if (optarg) {
r = parse_show_status(optarg, &arg_show_status);
- if (r < 0) {
- log_error("Failed to parse show status boolean %s.", optarg);
- return r;
- }
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse show status boolean: \"%s\": %m",
+ optarg);
} else
arg_show_status = SHOW_STATUS_YES;
break;
@@ -1002,16 +1030,18 @@ static int parse_argv(int argc, char *argv[]) {
FILE *f;
r = safe_atoi(optarg, &fd);
- if (r < 0 || fd < 0) {
- log_error("Failed to parse deserialize option %s.", optarg);
- return -EINVAL;
- }
+ if (r < 0)
+ log_error_errno(r, "Failed to parse deserialize option \"%s\": %m", optarg);
+ if (fd < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid deserialize fd: %d",
+ fd);
(void) fd_cloexec(fd, true);
f = fdopen(fd, "r");
if (!f)
- return log_error_errno(errno, "Failed to open serialization fd: %m");
+ return log_error_errno(errno, "Failed to open serialization fd %d: %m", fd);
safe_fclose(arg_serialization);
arg_serialization = f;
@@ -1026,7 +1056,7 @@ static int parse_argv(int argc, char *argv[]) {
case ARG_MACHINE_ID:
r = set_machine_id(optarg);
if (r < 0)
- return log_error_errno(r, "MachineID '%s' is not valid.", optarg);
+ return log_error_errno(r, "MachineID '%s' is not valid: %m", optarg);
break;
case 'h':
@@ -1059,14 +1089,20 @@ static int parse_argv(int argc, char *argv[]) {
/* Hmm, when we aren't run as init system
* let's complain about excess arguments */
- log_error("Excess arguments.");
- return -EINVAL;
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Excess arguments.");
}
return 0;
}
static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd", "1", &link);
+ if (r < 0)
+ return log_oom();
printf("%s [OPTIONS...]\n\n"
"Starts up and maintains the system or user services.\n\n"
@@ -1090,20 +1126,28 @@ static int help(void) {
" --log-color[=BOOL] Highlight important log messages\n"
" --log-location[=BOOL] Include code location in log messages\n"
" --default-standard-output= Set default standard output for services\n"
- " --default-standard-error= Set default standard error output for services\n",
- program_invocation_short_name);
+ " --default-standard-error= Set default standard error output for services\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
return 0;
}
-static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching_root) {
+static int prepare_reexecute(
+ Manager *m,
+ FILE **ret_f,
+ FDSet **ret_fds,
+ bool switching_root) {
+
_cleanup_fdset_free_ FDSet *fds = NULL;
_cleanup_fclose_ FILE *f = NULL;
int r;
assert(m);
- assert(_f);
- assert(_fds);
+ assert(ret_f);
+ assert(ret_fds);
r = manager_open_serialization(m, &f);
if (r < 0)
@@ -1119,7 +1163,7 @@ static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching
r = manager_serialize(m, f, fds, switching_root);
if (r < 0)
- return log_error_errno(r, "Failed to serialize state: %m");
+ return r;
if (fseeko(f, 0, SEEK_SET) == (off_t) -1)
return log_error_errno(errno, "Failed to rewind serialization fd: %m");
@@ -1132,24 +1176,108 @@ static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching
if (r < 0)
return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization fds: %m");
- *_f = TAKE_PTR(f);
- *_fds = TAKE_PTR(fds);
+ *ret_f = TAKE_PTR(f);
+ *ret_fds = TAKE_PTR(fds);
return 0;
}
+static void bump_file_max_and_nr_open(void) {
+
+ /* Let's bump fs.file-max and fs.nr_open to their respective maximums. On current kernels large numbers of file
+ * descriptors are no longer a performance problem and their memory is properly tracked by memcg, thus counting
+ * them and limiting them in another two layers of limits is unnecessary and just complicates things. This
+ * function hence turns off 2 of the 4 levels of limits on file descriptors, and makes RLIMIT_NOLIMIT (soft +
+ * hard) the only ones that really matter. */
+
+#if BUMP_PROC_SYS_FS_FILE_MAX || BUMP_PROC_SYS_FS_NR_OPEN
+ _cleanup_free_ char *t = NULL;
+ int r;
+#endif
+
+#if BUMP_PROC_SYS_FS_FILE_MAX
+ /* I so wanted to use STRINGIFY(ULONG_MAX) here, but alas we can't as glibc/gcc define that as
+ * "(0x7fffffffffffffffL * 2UL + 1UL)". Seriously. 😢 */
+ if (asprintf(&t, "%lu\n", ULONG_MAX) < 0) {
+ log_oom();
+ return;
+ }
+
+ r = sysctl_write("fs/file-max", t);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.file-max, ignoring: %m");
+#endif
+
+#if BUMP_PROC_SYS_FS_FILE_MAX && BUMP_PROC_SYS_FS_NR_OPEN
+ t = mfree(t);
+#endif
+
+#if BUMP_PROC_SYS_FS_NR_OPEN
+ int v = INT_MAX;
+
+ /* Arg! The kernel enforces maximum and minimum values on the fs.nr_open, but we don't really know what they
+ * are. The expression by which the maximum is determined is dependent on the architecture, and is something we
+ * don't really want to copy to userspace, as it is dependent on implementation details of the kernel. Since
+ * the kernel doesn't expose the maximum value to us, we can only try and hope. Hence, let's start with
+ * INT_MAX, and then keep halving the value until we find one that works. Ugly? Yes, absolutely, but kernel
+ * APIs are kernel APIs, so what do can we do... 🤯 */
+
+ for (;;) {
+ int k;
+
+ v &= ~(__SIZEOF_POINTER__ - 1); /* Round down to next multiple of the pointer size */
+ if (v < 1024) {
+ log_warning("Can't bump fs.nr_open, value too small.");
+ break;
+ }
+
+ k = read_nr_open();
+ if (k < 0) {
+ log_error_errno(k, "Failed to read fs.nr_open: %m");
+ break;
+ }
+ if (k >= v) { /* Already larger */
+ log_debug("Skipping bump, value is already larger.");
+ break;
+ }
+
+ if (asprintf(&t, "%i\n", v) < 0) {
+ log_oom();
+ return;
+ }
+
+ r = sysctl_write("fs/nr_open", t);
+ t = mfree(t);
+ if (r == -EINVAL) {
+ log_debug("Couldn't write fs.nr_open as %i, halving it.", v);
+ v /= 2;
+ continue;
+ }
+ if (r < 0) {
+ log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.nr_open, ignoring: %m");
+ break;
+ }
+
+ log_debug("Successfully bumped fs.nr_open to %i", v);
+ break;
+ }
+#endif
+}
+
static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
int r, nr;
assert(saved_rlimit);
- /* Save the original RLIMIT_NOFILE so that we can reset it
- * later when transitioning from the initrd to the main
+ /* Save the original RLIMIT_NOFILE so that we can reset it later when transitioning from the initrd to the main
* systemd or suchlike. */
if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0)
return log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m");
- /* Make sure forked processes get the default kernel setting */
+ /* Get the underlying absolute limit the kernel enforces */
+ nr = read_nr_open();
+
+ /* Make sure forked processes get limits based on the original kernel setting */
if (!arg_default_rlimit[RLIMIT_NOFILE]) {
struct rlimit *rl;
@@ -1157,11 +1285,25 @@ static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
if (!rl)
return log_oom();
+ /* Bump the hard limit for system services to a substantially higher value. The default hard limit
+ * current kernels set is pretty low (4K), mostly for historical reasons. According to kernel
+ * developers, the fd handling in recent kernels has been optimized substantially enough, so that we
+ * can bump the limit now, without paying too high a price in memory or performance. Note however that
+ * we only bump the hard limit, not the soft limit. That's because select() works the way it works, and
+ * chokes on fds >= 1024. If we'd bump the soft limit globally, it might accidentally happen to
+ * unexpecting programs that they get fds higher than what they can process using select(). By only
+ * bumping the hard limit but leaving the low limit as it is we avoid this pitfall: programs that are
+ * written by folks aware of the select() problem in mind (and thus use poll()/epoll instead of
+ * select(), the way everybody should) can explicitly opt into high fds by bumping their soft limit
+ * beyond 1024, to the hard limit we pass. */
+ if (arg_system)
+ rl->rlim_max = MIN((rlim_t) nr, MAX(rl->rlim_max, (rlim_t) HIGH_RLIMIT_NOFILE));
+
arg_default_rlimit[RLIMIT_NOFILE] = rl;
}
- /* Bump up the resource limit for ourselves substantially, all the way to the maximum the kernel allows */
- nr = read_nr_open();
+ /* Bump up the resource limit for ourselves substantially, all the way to the maximum the kernel allows, for
+ * both hard and soft. */
r = setrlimit_closest(RLIMIT_NOFILE, &RLIMIT_MAKE_CONST(nr));
if (r < 0)
return log_warning_errno(r, "Setting RLIMIT_NOFILE failed, ignoring: %m");
@@ -1173,16 +1315,15 @@ static int bump_rlimit_memlock(struct rlimit *saved_rlimit) {
int r;
assert(saved_rlimit);
- assert(getuid() == 0);
- /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even though we have CAP_IPC_LOCK which
- * should normally disable such checks. We need them to implement IPAccessAllow= and IPAccessDeny=, hence let's
- * bump the value high enough for the root user. */
+ /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even if we have CAP_IPC_LOCK which should
+ * normally disable such checks. We need them to implement IPAccessAllow= and IPAccessDeny=, hence let's bump
+ * the value high enough for our user. */
if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit) < 0)
return log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
- r = setrlimit_closest(RLIMIT_MEMLOCK, &RLIMIT_MAKE_CONST(1024ULL*1024ULL*16ULL));
+ r = setrlimit_closest(RLIMIT_MEMLOCK, &RLIMIT_MAKE_CONST(HIGH_RLIMIT_MEMLOCK));
if (r < 0)
return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
@@ -1219,7 +1360,7 @@ static int status_welcome(void) {
_cleanup_free_ char *pretty_name = NULL, *ansi_color = NULL;
int r;
- if (arg_show_status <= 0)
+ if (IN_SET(arg_show_status, SHOW_STATUS_NO, SHOW_STATUS_AUTO))
return 0;
r = parse_os_release(NULL,
@@ -1231,12 +1372,12 @@ static int status_welcome(void) {
"Failed to read os-release file, ignoring: %m");
if (log_get_show_color())
- return status_printf(NULL, false, false,
+ return status_printf(NULL, 0,
"\nWelcome to \x1B[%sm%s\x1B[0m!\n",
isempty(ansi_color) ? "1" : ansi_color,
isempty(pretty_name) ? "Linux" : pretty_name);
else
- return status_printf(NULL, false, false,
+ return status_printf(NULL, 0,
"\nWelcome to %s!\n",
isempty(pretty_name) ? "Linux" : pretty_name);
}
@@ -1268,7 +1409,7 @@ static int bump_unix_max_dgram_qlen(void) {
r = read_one_line_file("/proc/sys/net/unix/max_dgram_qlen", &qlen);
if (r < 0)
- return log_warning_errno(r, "Failed to read AF_UNIX datagram queue length, ignoring: %m");
+ return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r, "Failed to read AF_UNIX datagram queue length, ignoring: %m");
r = safe_atolu(qlen, &v);
if (r < 0)
@@ -1277,7 +1418,7 @@ static int bump_unix_max_dgram_qlen(void) {
if (v >= DEFAULT_UNIX_MAX_DGRAM_QLEN)
return 0;
- r = write_string_filef("/proc/sys/net/unix/max_dgram_qlen", 0, "%lu", DEFAULT_UNIX_MAX_DGRAM_QLEN);
+ r = write_string_filef("/proc/sys/net/unix/max_dgram_qlen", WRITE_STRING_FILE_DISABLE_BUFFER, "%lu", DEFAULT_UNIX_MAX_DGRAM_QLEN);
if (r < 0)
return log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
"Failed to bump AF_UNIX datagram queue length, ignoring: %m");
@@ -1474,13 +1615,29 @@ static void initialize_coredump(bool skip_setup) {
if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0)
log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m");
- /* But at the same time, turn off the core_pattern logic by default, so that no coredumps are stored
- * until the systemd-coredump tool is enabled via sysctl. */
+ /* But at the same time, turn off the core_pattern logic by default, so that no
+ * coredumps are stored until the systemd-coredump tool is enabled via
+ * sysctl. However it can be changed via the kernel command line later so core
+ * dumps can still be generated during early startup and in initramfs. */
if (!skip_setup)
disable_coredumps();
#endif
}
+static void initialize_core_pattern(bool skip_setup) {
+ int r;
+
+ if (skip_setup || !arg_early_core_pattern)
+ return;
+
+ if (getpid_cached() != 1)
+ return;
+
+ r = write_string_file("/proc/sys/kernel/core_pattern", arg_early_core_pattern, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ log_warning_errno(r, "Failed to write '%s' to /proc/sys/kernel/core_pattern, ignoring: %m", arg_early_core_pattern);
+}
+
static void do_reexecute(
int argc,
char *argv[],
@@ -1577,6 +1734,7 @@ static void do_reexecute(
/* Reenable any blocked signals, especially important if we switch from initial ramdisk to init=... */
(void) reset_all_signal_handlers();
(void) reset_signal_mask();
+ (void) rlimit_nofile_safe();
if (switch_root_init) {
args[0] = switch_root_init;
@@ -1633,7 +1791,7 @@ static int invoke_main_loop(
return log_emergency_errno(r, "Failed to run main loop: %m");
}
- switch (m->exit_code) {
+ switch ((ManagerObjective) r) {
case MANAGER_RELOAD: {
LogTarget saved_log_target;
@@ -1660,7 +1818,8 @@ static int invoke_main_loop(
r = manager_reload(m);
if (r < 0)
- log_warning_errno(r, "Failed to reload, ignoring: %m");
+ /* Reloading failed before the point of no return. Let's continue running as if nothing happened. */
+ m->objective = MANAGER_OK;
break;
}
@@ -1724,19 +1883,19 @@ static int invoke_main_loop(
case MANAGER_POWEROFF:
case MANAGER_HALT:
case MANAGER_KEXEC: {
- static const char * const table[_MANAGER_EXIT_CODE_MAX] = {
- [MANAGER_EXIT] = "exit",
- [MANAGER_REBOOT] = "reboot",
+ static const char * const table[_MANAGER_OBJECTIVE_MAX] = {
+ [MANAGER_EXIT] = "exit",
+ [MANAGER_REBOOT] = "reboot",
[MANAGER_POWEROFF] = "poweroff",
- [MANAGER_HALT] = "halt",
- [MANAGER_KEXEC] = "kexec"
+ [MANAGER_HALT] = "halt",
+ [MANAGER_KEXEC] = "kexec",
};
log_notice("Shutting down.");
*ret_reexecute = false;
*ret_retval = m->return_value;
- assert_se(*ret_shutdown_verb = table[m->exit_code]);
+ assert_se(*ret_shutdown_verb = table[m->objective]);
*ret_fds = NULL;
*ret_switch_root_dir = *ret_switch_root_init = NULL;
@@ -1744,7 +1903,7 @@ static int invoke_main_loop(
}
default:
- assert_not_reached("Unknown exit code.");
+ assert_not_reached("Unknown or unexpected manager objective.");
}
}
}
@@ -1816,7 +1975,7 @@ static int initialize_runtime(
install_crash_handler();
if (!skip_setup) {
- r = mount_cgroup_controllers(arg_join_controllers);
+ r = mount_cgroup_controllers();
if (r < 0) {
*ret_error_message = "Failed to mount cgroup hierarchies";
return r;
@@ -1827,6 +1986,7 @@ static int initialize_runtime(
machine_id_setup(NULL, arg_machine_id, NULL);
loopback_setup();
bump_unix_max_dgram_qlen();
+ bump_file_max_and_nr_open();
test_usr();
write_container_id();
}
@@ -1879,11 +2039,9 @@ static int initialize_runtime(
if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0)
log_warning_errno(errno, "Failed to make us a subreaper: %m");
- if (arg_system) {
- /* Bump up RLIMIT_NOFILE for systemd itself */
- (void) bump_rlimit_nofile(saved_rlimit_nofile);
- (void) bump_rlimit_memlock(saved_rlimit_memlock);
- }
+ /* Bump up RLIMIT_NOFILE for systemd itself */
+ (void) bump_rlimit_nofile(saved_rlimit_nofile);
+ (void) bump_rlimit_memlock(saved_rlimit_memlock);
return 0;
}
@@ -1942,7 +2100,6 @@ static void free_arguments(void) {
arg_default_unit = mfree(arg_default_unit);
arg_confirm_spawn = mfree(arg_confirm_spawn);
- arg_join_controllers = strv_free_free(arg_join_controllers);
arg_default_environment = strv_free(arg_default_environment);
arg_syscall_archs = set_free(arg_syscall_archs);
}
@@ -1985,7 +2142,7 @@ static int load_configuration(int argc, char **argv, const char **ret_error_mess
}
/* Initialize the show status setting if it hasn't been set explicitly yet */
- if (arg_show_status == _SHOW_STATUS_UNSET)
+ if (arg_show_status == _SHOW_STATUS_INVALID)
arg_show_status = SHOW_STATUS_YES;
return 0;
@@ -1994,50 +2151,43 @@ static int load_configuration(int argc, char **argv, const char **ret_error_mess
static int safety_checks(void) {
if (getpid_cached() == 1 &&
- arg_action != ACTION_RUN) {
- log_error("Unsupported execution mode while PID 1.");
- return -EPERM;
- }
+ arg_action != ACTION_RUN)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "Unsupported execution mode while PID 1.");
if (getpid_cached() == 1 &&
- !arg_system) {
- log_error("Can't run --user mode as PID 1.");
- return -EPERM;
- }
+ !arg_system)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "Can't run --user mode as PID 1.");
if (arg_action == ACTION_RUN &&
arg_system &&
- getpid_cached() != 1) {
- log_error("Can't run system mode unless PID 1.");
- return -EPERM;
- }
+ getpid_cached() != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "Can't run system mode unless PID 1.");
if (arg_action == ACTION_TEST &&
- geteuid() == 0) {
- log_error("Don't run test mode as root.");
- return -EPERM;
- }
+ geteuid() == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "Don't run test mode as root.");
if (!arg_system &&
arg_action == ACTION_RUN &&
- sd_booted() <= 0) {
- log_error("Trying to run as user instance, but the system has not been booted with systemd.");
- return -EOPNOTSUPP;
- }
+ sd_booted() <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Trying to run as user instance, but the system has not been booted with systemd.");
if (!arg_system &&
arg_action == ACTION_RUN &&
- !getenv("XDG_RUNTIME_DIR")) {
- log_error("Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
- return -EUNATCH;
- }
+ !getenv("XDG_RUNTIME_DIR"))
+ return log_error_errno(SYNTHETIC_ERRNO(EUNATCH),
+ "Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
if (arg_system &&
arg_action == ACTION_RUN &&
- running_in_chroot() > 0) {
- log_error("Cannot be run in a chroot() environment.");
- return -EOPNOTSUPP;
- }
+ running_in_chroot() > 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Cannot be run in a chroot() environment.");
return 0;
}
@@ -2309,13 +2459,13 @@ int main(int argc, char *argv[]) {
goto finish;
if (IN_SET(arg_action, ACTION_TEST, ACTION_HELP, ACTION_DUMP_CONFIGURATION_ITEMS, ACTION_DUMP_BUS_PROPERTIES))
- (void) pager_open(arg_no_pager, false);
+ (void) pager_open(arg_pager_flags);
if (arg_action != ACTION_RUN)
skip_setup = true;
if (arg_action == ACTION_HELP) {
- retval = help();
+ retval = help() < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
goto finish;
} else if (arg_action == ACTION_VERSION) {
retval = version();
@@ -2337,6 +2487,9 @@ int main(int argc, char *argv[]) {
if (arg_action == ACTION_RUN) {
+ /* A core pattern might have been specified via the cmdline. */
+ initialize_core_pattern(skip_setup);
+
/* Close logging fds, in order not to confuse collecting passed fds and terminal logic below */
log_close();
@@ -2373,8 +2526,8 @@ int main(int argc, char *argv[]) {
m->timestamps[MANAGER_TIMESTAMP_KERNEL] = kernel_timestamp;
m->timestamps[MANAGER_TIMESTAMP_INITRD] = initrd_timestamp;
m->timestamps[MANAGER_TIMESTAMP_USERSPACE] = userspace_timestamp;
- m->timestamps[MANAGER_TIMESTAMP_SECURITY_START] = security_start_timestamp;
- m->timestamps[MANAGER_TIMESTAMP_SECURITY_FINISH] = security_finish_timestamp;
+ m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_START)] = security_start_timestamp;
+ m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_FINISH)] = security_finish_timestamp;
set_manager_defaults(m);
set_manager_settings(m);
@@ -2387,7 +2540,6 @@ int main(int argc, char *argv[]) {
r = manager_startup(m, arg_serialization, fds);
if (r < 0) {
- log_error_errno(r, "Failed to fully start up daemon: %m");
error_message = "Failed to start up manager";
goto finish;
}
@@ -2473,8 +2625,8 @@ finish:
if (error_message)
manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
ANSI_HIGHLIGHT_RED "!!!!!!" ANSI_NORMAL,
- "%s, freezing.", error_message);
- freeze_or_reboot();
+ "%s.", error_message);
+ freeze_or_exit_or_reboot();
}
return retval;
diff --git a/src/core/manager.c b/src/core/manager.c
index 930df4e23a..35d9753b12 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -22,8 +22,8 @@
#include "sd-messages.h"
#include "sd-path.h"
-#include "alloc-util.h"
#include "all-units.h"
+#include "alloc-util.h"
#include "audit-fd.h"
#include "boot-timestamps.h"
#include "bus-common-errors.h"
@@ -61,6 +61,7 @@
#include "ratelimit.h"
#include "rlimit-util.h"
#include "rm-rf.h"
+#include "serialize.h"
#include "signal-util.h"
#include "socket-util.h"
#include "special.h"
@@ -351,7 +352,7 @@ static int manager_setup_time_change(Manager *m) {
assert(m);
- if (m->test_run_flags)
+ if (MANAGER_IS_TEST_RUN(m))
return 0;
m->time_change_event_source = sd_event_source_unref(m->time_change_event_source);
@@ -407,7 +408,7 @@ static int manager_setup_timezone_change(Manager *m) {
assert(m);
- if (m->test_run_flags != 0)
+ if (MANAGER_IS_TEST_RUN(m))
return 0;
/* We watch /etc/localtime for three events: change of the link count (which might mean removal from /etc even
@@ -423,10 +424,14 @@ static int manager_setup_timezone_change(Manager *m) {
r = sd_event_add_inotify(m->event, &new_event, "/etc/localtime",
IN_ATTRIB|IN_MOVE_SELF|IN_CLOSE_WRITE|IN_DONT_FOLLOW, manager_dispatch_timezone_change, m);
- if (r == -ENOENT) /* If the file doesn't exist yet, subscribe to /etc instead, and wait until it is created
- * either by O_CREATE or by rename() */
+ if (r == -ENOENT) {
+ /* If the file doesn't exist yet, subscribe to /etc instead, and wait until it is created either by
+ * O_CREATE or by rename() */
+
+ log_debug_errno(r, "/etc/localtime doesn't exist yet, watching /etc instead.");
r = sd_event_add_inotify(m->event, &new_event, "/etc",
IN_CREATE|IN_MOVED_TO|IN_ONLYDIR, manager_dispatch_timezone_change, m);
+ }
if (r < 0)
return log_error_errno(r, "Failed to create timezone change event source: %m");
@@ -446,7 +451,7 @@ static int enable_special_signals(Manager *m) {
assert(m);
- if (m->test_run_flags)
+ if (MANAGER_IS_TEST_RUN(m))
return 0;
/* Enable that we get SIGINT on control-alt-del. In containers
@@ -567,12 +572,11 @@ static int manager_setup_signals(Manager *m) {
return 0;
}
-static void manager_sanitize_environment(Manager *m) {
- assert(m);
+static char** sanitize_environment(char **l) {
/* Let's remove some environment variables that we need ourselves to communicate with our clients */
strv_env_unset_many(
- m->environment,
+ l,
"EXIT_CODE",
"EXIT_STATUS",
"INVOCATION_ID",
@@ -591,12 +595,16 @@ static void manager_sanitize_environment(Manager *m) {
NULL);
/* Let's order the environment alphabetically, just to make it pretty */
- strv_sort(m->environment);
+ strv_sort(l);
+
+ return l;
}
-static int manager_default_environment(Manager *m) {
+int manager_default_environment(Manager *m) {
assert(m);
+ m->transient_environment = strv_free(m->transient_environment);
+
if (MANAGER_IS_SYSTEM(m)) {
/* The system manager always starts with a clean
* environment for its children. It does not import
@@ -605,20 +613,19 @@ static int manager_default_environment(Manager *m) {
* The initial passed environment is untouched to keep
* /proc/self/environ valid; it is used for tagging
* the init process inside containers. */
- m->environment = strv_new("PATH=" DEFAULT_PATH,
- NULL);
+ m->transient_environment = strv_new("PATH=" DEFAULT_PATH);
/* Import locale variables LC_*= from configuration */
- locale_setup(&m->environment);
+ (void) locale_setup(&m->transient_environment);
} else
/* The user manager passes its own environment
* along to its children. */
- m->environment = strv_copy(environ);
+ m->transient_environment = strv_copy(environ);
- if (!m->environment)
- return -ENOMEM;
+ if (!m->transient_environment)
+ return log_oom();
- manager_sanitize_environment(m);
+ sanitize_environment(m->transient_environment);
return 0;
}
@@ -711,28 +718,51 @@ static int manager_setup_sigchld_event_source(Manager *m) {
return 0;
}
-int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) {
+int manager_new(UnitFileScope scope, ManagerTestRunFlags test_run_flags, Manager **_m) {
_cleanup_(manager_freep) Manager *m = NULL;
int r;
assert(_m);
assert(IN_SET(scope, UNIT_FILE_SYSTEM, UNIT_FILE_USER));
- m = new0(Manager, 1);
+ m = new(Manager, 1);
if (!m)
return -ENOMEM;
- m->unit_file_scope = scope;
- m->exit_code = _MANAGER_EXIT_CODE_INVALID;
- m->default_timer_accuracy_usec = USEC_PER_MINUTE;
- m->default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT;
- m->default_tasks_accounting = true;
- m->default_tasks_max = UINT64_MAX;
- m->default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
- m->default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
- m->default_restart_usec = DEFAULT_RESTART_USEC;
- m->original_log_level = -1;
- m->original_log_target = _LOG_TARGET_INVALID;
+ *m = (Manager) {
+ .unit_file_scope = scope,
+ .objective = _MANAGER_OBJECTIVE_INVALID,
+
+ .default_timer_accuracy_usec = USEC_PER_MINUTE,
+ .default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT,
+ .default_tasks_accounting = true,
+ .default_tasks_max = UINT64_MAX,
+ .default_timeout_start_usec = DEFAULT_TIMEOUT_USEC,
+ .default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC,
+ .default_restart_usec = DEFAULT_RESTART_USEC,
+
+ .original_log_level = -1,
+ .original_log_target = _LOG_TARGET_INVALID,
+
+ .notify_fd = -1,
+ .cgroups_agent_fd = -1,
+ .signal_fd = -1,
+ .time_change_fd = -1,
+ .user_lookup_fds = { -1, -1 },
+ .private_listen_fd = -1,
+ .dev_autofs_fd = -1,
+ .cgroup_inotify_fd = -1,
+ .pin_cgroupfs_fd = -1,
+ .ask_password_inotify_fd = -1,
+ .idle_pipe = { -1, -1, -1, -1},
+
+ /* start as id #1, so that we can leave #0 around as "null-like" value */
+ .current_job_id = 1,
+
+ .have_ask_password = -EINVAL, /* we don't know */
+ .first_boot = -1,
+ .test_run_flags = test_run_flags,
+ };
#if ENABLE_EFI
if (MANAGER_IS_SYSTEM(m) && detect_container() <= 0)
@@ -756,21 +786,6 @@ int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) {
m->invocation_log_format_string = "USER_INVOCATION_ID=%s";
}
- m->idle_pipe[0] = m->idle_pipe[1] = m->idle_pipe[2] = m->idle_pipe[3] = -1;
-
- m->pin_cgroupfs_fd = m->notify_fd = m->cgroups_agent_fd = m->signal_fd = m->time_change_fd =
- m->dev_autofs_fd = m->private_listen_fd = m->cgroup_inotify_fd =
- m->ask_password_inotify_fd = -1;
-
- m->user_lookup_fds[0] = m->user_lookup_fds[1] = -1;
-
- m->current_job_id = 1; /* start as id #1, so that we can leave #0 around as "null-like" value */
-
- m->have_ask_password = -EINVAL; /* we don't know */
- m->first_boot = -1;
-
- m->test_run_flags = test_run_flags;
-
/* Reboot immediately if the user hits C-A-D more often than 7x per 2s */
RATELIMIT_INIT(m->ctrl_alt_del_ratelimit, 2 * USEC_PER_SEC, 7);
@@ -798,10 +813,6 @@ int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) {
if (r < 0)
return r;
- m->udev = udev_new();
- if (!m->udev)
- return -ENOMEM;
-
r = sd_event_default(&m->event);
if (r < 0)
return r;
@@ -831,9 +842,7 @@ int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) {
if (r < 0)
return r;
- r = manager_setup_timezone_change(m);
- if (r < 0)
- return r;
+ (void) manager_setup_timezone_change(m);
r = manager_setup_sigchld_event_source(m);
if (r < 0)
@@ -861,15 +870,13 @@ int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) {
static int manager_setup_notify(Manager *m) {
int r;
- if (m->test_run_flags)
+ if (MANAGER_IS_TEST_RUN(m))
return 0;
if (m->notify_fd < 0) {
_cleanup_close_ int fd = -1;
- union sockaddr_union sa = {
- .sa.sa_family = AF_UNIX,
- };
- static const int one = 1;
+ union sockaddr_union sa = {};
+ int salen;
/* First free all secondary fields */
m->notify_socket = mfree(m->notify_socket);
@@ -885,17 +892,20 @@ static int manager_setup_notify(Manager *m) {
if (!m->notify_socket)
return log_oom();
+ salen = sockaddr_un_set_path(&sa.un, m->notify_socket);
+ if (salen < 0)
+ return log_error_errno(salen, "Notify socket '%s' not valid for AF_UNIX socket address, refusing.", m->notify_socket);
+
(void) mkdir_parents_label(m->notify_socket, 0755);
- (void) unlink(m->notify_socket);
+ (void) sockaddr_un_unlink(&sa.un);
- strncpy(sa.un.sun_path, m->notify_socket, sizeof(sa.un.sun_path)-1);
- r = bind(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
+ r = bind(fd, &sa.sa, salen);
if (r < 0)
- return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path);
+ return log_error_errno(errno, "bind(%s) failed: %m", m->notify_socket);
- r = setsockopt(fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
+ r = setsockopt_int(fd, SOL_SOCKET, SO_PASSCRED, true);
if (r < 0)
- return log_error_errno(errno, "SO_PASSCRED failed: %m");
+ return log_error_errno(r, "SO_PASSCRED failed: %m");
m->notify_fd = TAKE_FD(fd);
@@ -940,7 +950,7 @@ static int manager_setup_cgroups_agent(Manager *m) {
* to it. The system instance hence listens on this special socket, but the user instances listen on the system
* bus for these messages. */
- if (m->test_run_flags)
+ if (MANAGER_IS_TEST_RUN(m))
return 0;
if (!MANAGER_IS_SYSTEM(m))
@@ -964,7 +974,7 @@ static int manager_setup_cgroups_agent(Manager *m) {
fd_inc_rcvbuf(fd, CGROUPS_AGENT_RCVBUF_SIZE);
- (void) unlink(sa.un.sun_path);
+ (void) sockaddr_un_unlink(&sa.un);
/* Only allow root to connect to this socket */
RUN_WITH_UMASK(0077)
@@ -972,8 +982,7 @@ static int manager_setup_cgroups_agent(Manager *m) {
if (r < 0)
return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path);
- m->cgroups_agent_fd = fd;
- fd = -1;
+ m->cgroups_agent_fd = TAKE_FD(fd);
}
if (!m->cgroups_agent_event_source) {
@@ -1211,6 +1220,45 @@ static unsigned manager_dispatch_gc_job_queue(Manager *m) {
return n;
}
+static unsigned manager_dispatch_stop_when_unneeded_queue(Manager *m) {
+ unsigned n = 0;
+ Unit *u;
+ int r;
+
+ assert(m);
+
+ while ((u = m->stop_when_unneeded_queue)) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ assert(m->stop_when_unneeded_queue);
+
+ assert(u->in_stop_when_unneeded_queue);
+ LIST_REMOVE(stop_when_unneeded_queue, m->stop_when_unneeded_queue, u);
+ u->in_stop_when_unneeded_queue = false;
+
+ n++;
+
+ if (!unit_is_unneeded(u))
+ continue;
+
+ log_unit_debug(u, "Unit is not needed anymore.");
+
+ /* If stopping a unit fails continuously we might enter a stop loop here, hence stop acting on the
+ * service being unnecessary after a while. */
+
+ if (!ratelimit_below(&u->auto_stop_ratelimit)) {
+ log_unit_warning(u, "Unit not needed anymore, but not stopping since we tried this too often recently.");
+ continue;
+ }
+
+ /* Ok, nobody needs us anymore. Sniff. Then let's commit suicide */
+ r = manager_add_job(u->manager, JOB_STOP, u, JOB_FAIL, &error, NULL);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Failed to enqueue stop job, ignoring: %s", bus_error_message(&error, r));
+ }
+
+ return n;
+}
+
static void manager_clear_jobs_and_units(Manager *m) {
Unit *u;
@@ -1228,17 +1276,20 @@ static void manager_clear_jobs_and_units(Manager *m) {
assert(!m->cleanup_queue);
assert(!m->gc_unit_queue);
assert(!m->gc_job_queue);
+ assert(!m->stop_when_unneeded_queue);
assert(hashmap_isempty(m->jobs));
assert(hashmap_isempty(m->units));
m->n_on_console = 0;
m->n_running_jobs = 0;
+ m->n_installed_jobs = 0;
+ m->n_failed_jobs = 0;
}
Manager* manager_free(Manager *m) {
- UnitType c;
ExecDirectoryType dt;
+ UnitType c;
if (!m)
return NULL;
@@ -1249,8 +1300,8 @@ Manager* manager_free(Manager *m) {
if (unit_vtable[c]->shutdown)
unit_vtable[c]->shutdown(m);
- /* If we reexecute ourselves, we keep the root cgroup around */
- manager_shutdown_cgroup(m, m->exit_code != MANAGER_REEXECUTE);
+ /* Keep the cgroup hierarchy in place except when we know we are going down for good */
+ manager_shutdown_cgroup(m, IN_SET(m->objective, MANAGER_EXIT, MANAGER_REBOOT, MANAGER_POWEROFF, MANAGER_HALT, MANAGER_KEXEC));
lookup_paths_flush_generator(&m->lookup_paths);
@@ -1292,13 +1343,13 @@ Manager* manager_free(Manager *m) {
manager_close_idle_pipe(m);
- udev_unref(m->udev);
sd_event_unref(m->event);
free(m->notify_socket);
lookup_paths_free(&m->lookup_paths);
- strv_free(m->environment);
+ strv_free(m->transient_environment);
+ strv_free(m->client_environment);
hashmap_free(m->cgroup_unit);
set_free_free(m->unit_path_cache);
@@ -1476,14 +1527,9 @@ static bool manager_dbus_is_running(Manager *m, bool deserialized) {
* and the service unit. If the 'deserialized' parameter is true we'll check the deserialized state of the unit
* rather than the current one. */
- if (m->test_run_flags != 0)
+ if (MANAGER_IS_TEST_RUN(m))
return false;
- /* If we are in the user instance, and the env var is already set for us, then this means D-Bus is ran
- * somewhere outside of our own logic. Let's use it */
- if (MANAGER_IS_USER(m) && getenv("DBUS_SESSION_BUS_ADDRESS"))
- return true;
-
u = manager_get_unit(m, SPECIAL_DBUS_SOCKET);
if (!u)
return false;
@@ -1529,7 +1575,7 @@ static void manager_preset_all(Manager *m) {
if (!MANAGER_IS_SYSTEM(m))
return;
- if (m->test_run_flags != 0)
+ if (MANAGER_IS_TEST_RUN(m))
return;
/* If this is the first boot, and we are in the host system, then preset everything */
@@ -1541,6 +1587,49 @@ static void manager_preset_all(Manager *m) {
log_info("Populated /etc with preset unit settings.");
}
+static void manager_vacuum(Manager *m) {
+ assert(m);
+
+ /* Release any dynamic users no longer referenced */
+ dynamic_user_vacuum(m, true);
+
+ /* Release any references to UIDs/GIDs no longer referenced, and destroy any IPC owned by them */
+ manager_vacuum_uid_refs(m);
+ manager_vacuum_gid_refs(m);
+
+ /* Release any runtimes no longer referenced */
+ exec_runtime_vacuum(m);
+}
+
+static void manager_ready(Manager *m) {
+ assert(m);
+
+ /* After having loaded everything, do the final round of catching up with what might have changed */
+
+ m->objective = MANAGER_OK; /* Tell everyone we are up now */
+
+ /* It might be safe to log to the journal now and connect to dbus */
+ manager_recheck_journal(m);
+ manager_recheck_dbus(m);
+
+ /* Sync current state of bus names with our set of listening units */
+ (void) manager_enqueue_sync_bus_names(m);
+
+ /* Let's finally catch up with any changes that took place while we were reloading/reexecing */
+ manager_catchup(m);
+}
+
+static Manager* manager_reloading_start(Manager *m) {
+ m->n_reloading++;
+ return m;
+}
+static void manager_reloading_stopp(Manager **m) {
+ if (*m) {
+ assert((*m)->n_reloading > 0);
+ (*m)->n_reloading--;
+ }
+}
+
int manager_startup(Manager *m, FILE *serialization, FDSet *fds) {
int r;
@@ -1549,98 +1638,92 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) {
/* If we are running in test mode, we still want to run the generators,
* but we should not touch the real generator directories. */
r = lookup_paths_init(&m->lookup_paths, m->unit_file_scope,
- m->test_run_flags ? LOOKUP_PATHS_TEMPORARY_GENERATED : 0,
+ MANAGER_IS_TEST_RUN(m) ? LOOKUP_PATHS_TEMPORARY_GENERATED : 0,
NULL);
if (r < 0)
- return r;
+ return log_error_errno(r, "Failed to initialize path lookup table: %m");
+ dual_timestamp_get(m->timestamps + manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_GENERATORS_START));
r = manager_run_environment_generators(m);
+ if (r >= 0)
+ r = manager_run_generators(m);
+ dual_timestamp_get(m->timestamps + manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_GENERATORS_FINISH));
if (r < 0)
return r;
- dual_timestamp_get(m->timestamps + MANAGER_TIMESTAMP_GENERATORS_START);
- r = manager_run_generators(m);
- dual_timestamp_get(m->timestamps + MANAGER_TIMESTAMP_GENERATORS_FINISH);
+ manager_preset_all(m);
+
+ r = lookup_paths_reduce(&m->lookup_paths);
if (r < 0)
- return r;
+ log_warning_errno(r, "Failed ot reduce unit file paths, ignoring: %m");
- manager_preset_all(m);
- lookup_paths_reduce(&m->lookup_paths);
manager_build_unit_path_cache(m);
- /* If we will deserialize make sure that during enumeration
- * this is already known, so we increase the counter here
- * already */
- if (serialization)
- m->n_reloading++;
+ {
+ /* This block is (optionally) done with the reloading counter bumped */
+ _cleanup_(manager_reloading_stopp) Manager *reloading = NULL;
- /* First, enumerate what we can from all config files */
- dual_timestamp_get(m->timestamps + MANAGER_TIMESTAMP_UNITS_LOAD_START);
- manager_enumerate_perpetual(m);
- manager_enumerate(m);
- dual_timestamp_get(m->timestamps + MANAGER_TIMESTAMP_UNITS_LOAD_FINISH);
+ /* If we will deserialize make sure that during enumeration this is already known, so we increase the
+ * counter here already */
+ if (serialization)
+ reloading = manager_reloading_start(m);
- /* Second, deserialize if there is something to deserialize */
- if (serialization) {
- r = manager_deserialize(m, serialization, fds);
- if (r < 0)
- return log_error_errno(r, "Deserialization failed: %m");
- }
-
- /* Any fds left? Find some unit which wants them. This is
- * useful to allow container managers to pass some file
- * descriptors to us pre-initialized. This enables
- * socket-based activation of entire containers. */
- manager_distribute_fds(m, fds);
+ /* First, enumerate what we can from all config files */
+ dual_timestamp_get(m->timestamps + manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_UNITS_LOAD_START));
+ manager_enumerate_perpetual(m);
+ manager_enumerate(m);
+ dual_timestamp_get(m->timestamps + manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_UNITS_LOAD_FINISH));
- /* We might have deserialized the notify fd, but if we didn't
- * then let's create the bus now */
- r = manager_setup_notify(m);
- if (r < 0)
- /* No sense to continue without notifications, our children would fail anyway. */
- return r;
-
- r = manager_setup_cgroups_agent(m);
- if (r < 0)
- /* Likewise, no sense to continue without empty cgroup notifications. */
- return r;
+ /* Second, deserialize if there is something to deserialize */
+ if (serialization) {
+ r = manager_deserialize(m, serialization, fds);
+ if (r < 0)
+ return log_error_errno(r, "Deserialization failed: %m");
+ }
- r = manager_setup_user_lookup_fd(m);
- if (r < 0)
- /* This shouldn't fail, except if things are really broken. */
- return r;
+ /* Any fds left? Find some unit which wants them. This is useful to allow container managers to pass
+ * some file descriptors to us pre-initialized. This enables socket-based activation of entire
+ * containers. */
+ manager_distribute_fds(m, fds);
- /* Connect to the bus if we are good for it */
- manager_setup_bus(m);
+ /* We might have deserialized the notify fd, but if we didn't then let's create the bus now */
+ r = manager_setup_notify(m);
+ if (r < 0)
+ /* No sense to continue without notifications, our children would fail anyway. */
+ return r;
- /* Now that we are connected to all possible busses, let's deserialize who is tracking us. */
- (void) bus_track_coldplug(m, &m->subscribed, false, m->deserialized_subscribed);
- m->deserialized_subscribed = strv_free(m->deserialized_subscribed);
+ r = manager_setup_cgroups_agent(m);
+ if (r < 0)
+ /* Likewise, no sense to continue without empty cgroup notifications. */
+ return r;
- /* Third, fire things up! */
- manager_coldplug(m);
+ r = manager_setup_user_lookup_fd(m);
+ if (r < 0)
+ /* This shouldn't fail, except if things are really broken. */
+ return r;
- /* Release any dynamic users no longer referenced */
- dynamic_user_vacuum(m, true);
+ /* Connect to the bus if we are good for it */
+ manager_setup_bus(m);
- exec_runtime_vacuum(m);
+ /* Now that we are connected to all possible busses, let's deserialize who is tracking us. */
+ r = bus_track_coldplug(m, &m->subscribed, false, m->deserialized_subscribed);
+ if (r < 0)
+ log_warning_errno(r, "Failed to deserialized tracked clients, ignoring: %m");
+ m->deserialized_subscribed = strv_free(m->deserialized_subscribed);
- /* Release any references to UIDs/GIDs no longer referenced, and destroy any IPC owned by them */
- manager_vacuum_uid_refs(m);
- manager_vacuum_gid_refs(m);
+ /* Third, fire things up! */
+ manager_coldplug(m);
- if (serialization) {
- assert(m->n_reloading > 0);
- m->n_reloading--;
+ /* Clean up runtime objects */
+ manager_vacuum(m);
- /* Let's wait for the UnitNew/JobNew messages being
- * sent, before we notify that the reload is
- * finished */
- m->send_reloading_done = true;
+ if (serialization)
+ /* Let's wait for the UnitNew/JobNew messages being sent, before we notify that the
+ * reload is finished */
+ m->send_reloading_done = true;
}
- /* Let's finally catch up with any changes that took place while we were reloading/reexecing */
- manager_catchup(m);
+ manager_ready(m);
return 0;
}
@@ -2055,7 +2138,7 @@ static int manager_dispatch_run_queue(sd_event_source *source, void *userdata) {
assert(j->installed);
assert(j->in_run_queue);
- job_run_and_invalidate(j);
+ (void) job_run_and_invalidate(j);
}
if (m->n_running_jobs > 0)
@@ -2074,57 +2157,63 @@ static unsigned manager_dispatch_dbus_queue(Manager *m) {
assert(m);
- if (m->dispatching_dbus_queue)
- return 0;
-
- /* Anything to do at all? */
- if (!m->dbus_unit_queue && !m->dbus_job_queue && !m->send_reloading_done && !m->queued_message)
- return 0;
-
- /* Do we have overly many messages queued at the moment? If so, let's not enqueue more on top, let's sit this
- * cycle out, and process things in a later cycle when the queues got a bit emptier. */
- if (manager_bus_n_queued_write(m) > MANAGER_BUS_BUSY_THRESHOLD)
- return 0;
+ /* When we are reloading, let's not wait with generating signals, since we need to exit the manager as quickly
+ * as we can. There's no point in throttling generation of signals in that case. */
+ if (MANAGER_IS_RELOADING(m) || m->send_reloading_done || m->pending_reload_message)
+ budget = (unsigned) -1; /* infinite budget in this case */
+ else {
+ /* Anything to do at all? */
+ if (!m->dbus_unit_queue && !m->dbus_job_queue)
+ return 0;
- /* Only process a certain number of units/jobs per event loop iteration. Even if the bus queue wasn't overly
- * full before this call we shouldn't increase it in size too wildly in one step, and we shouldn't monopolize
- * CPU time with generating these messages. Note the difference in counting of this "budget" and the
- * "threshold" above: the "budget" is decreased only once per generated message, regardless how many
- * busses/direct connections it is enqueued on, while the "threshold" is applied to each queued instance of bus
- * message, i.e. if the same message is enqueued to five busses/direct connections it will be counted five
- * times. This difference in counting ("references" vs. "instances") is primarily a result of the fact that
- * it's easier to implement it this way, however it also reflects the thinking that the "threshold" should put
- * a limit on used queue memory, i.e. space, while the "budget" should put a limit on time. Also note that
- * the "threshold" is currently chosen much higher than the "budget". */
- budget = MANAGER_BUS_MESSAGE_BUDGET;
+ /* Do we have overly many messages queued at the moment? If so, let's not enqueue more on top, let's
+ * sit this cycle out, and process things in a later cycle when the queues got a bit emptier. */
+ if (manager_bus_n_queued_write(m) > MANAGER_BUS_BUSY_THRESHOLD)
+ return 0;
- m->dispatching_dbus_queue = true;
+ /* Only process a certain number of units/jobs per event loop iteration. Even if the bus queue wasn't
+ * overly full before this call we shouldn't increase it in size too wildly in one step, and we
+ * shouldn't monopolize CPU time with generating these messages. Note the difference in counting of
+ * this "budget" and the "threshold" above: the "budget" is decreased only once per generated message,
+ * regardless how many busses/direct connections it is enqueued on, while the "threshold" is applied to
+ * each queued instance of bus message, i.e. if the same message is enqueued to five busses/direct
+ * connections it will be counted five times. This difference in counting ("references"
+ * vs. "instances") is primarily a result of the fact that it's easier to implement it this way,
+ * however it also reflects the thinking that the "threshold" should put a limit on used queue memory,
+ * i.e. space, while the "budget" should put a limit on time. Also note that the "threshold" is
+ * currently chosen much higher than the "budget". */
+ budget = MANAGER_BUS_MESSAGE_BUDGET;
+ }
- while (budget > 0 && (u = m->dbus_unit_queue)) {
+ while (budget != 0 && (u = m->dbus_unit_queue)) {
assert(u->in_dbus_queue);
bus_unit_send_change_signal(u);
- n++, budget--;
+ n++;
+
+ if (budget != (unsigned) -1)
+ budget--;
}
- while (budget > 0 && (j = m->dbus_job_queue)) {
+ while (budget != 0 && (j = m->dbus_job_queue)) {
assert(j->in_dbus_queue);
bus_job_send_change_signal(j);
- n++, budget--;
- }
+ n++;
- m->dispatching_dbus_queue = false;
+ if (budget != (unsigned) -1)
+ budget--;
+ }
- if (budget > 0 && m->send_reloading_done) {
+ if (m->send_reloading_done) {
m->send_reloading_done = false;
bus_manager_send_reloading(m, false);
- n++, budget--;
+ n++;
}
- if (budget > 0 && m->queued_message) {
- bus_send_queued_message(m);
+ if (m->pending_reload_message) {
+ bus_send_pending_reload_message(m);
n++;
}
@@ -2133,7 +2222,7 @@ static unsigned manager_dispatch_dbus_queue(Manager *m) {
static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
Manager *m = userdata;
- char buf[PATH_MAX+1];
+ char buf[PATH_MAX];
ssize_t n;
n = recv(fd, buf, sizeof(buf), 0);
@@ -2461,7 +2550,7 @@ static void manager_handle_ctrl_alt_del(Manager *m) {
if (ratelimit_below(&m->ctrl_alt_del_ratelimit) || m->cad_burst_action == EMERGENCY_ACTION_NONE)
manager_start_target(m, SPECIAL_CTRL_ALT_DEL_TARGET, JOB_REPLACE_IRREVERSIBLY);
else
- emergency_action(m, m->cad_burst_action, NULL,
+ emergency_action(m, m->cad_burst_action, EMERGENCY_ACTION_WARN, NULL, -1,
"Ctrl-Alt-Del was pressed more than 7 times within 2s");
}
@@ -2511,9 +2600,10 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t
case SIGTERM:
if (MANAGER_IS_SYSTEM(m)) {
/* This is for compatibility with the original sysvinit */
- r = verify_run_space_and_log("Refusing to reexecute");
- if (r >= 0)
- m->exit_code = MANAGER_REEXECUTE;
+ if (verify_run_space_and_log("Refusing to reexecute") < 0)
+ break;
+
+ m->objective = MANAGER_REEXECUTE;
break;
}
@@ -2569,9 +2659,10 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t
}
case SIGHUP:
- r = verify_run_space_and_log("Refusing to reload");
- if (r >= 0)
- m->exit_code = MANAGER_RELOAD;
+ if (verify_run_space_and_log("Refusing to reload") < 0)
+ break;
+
+ m->objective = MANAGER_RELOAD;
break;
default: {
@@ -2591,7 +2682,7 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t
};
/* Starting SIGRTMIN+13, so that target halt and system halt are 10 apart */
- static const ManagerExitCode code_table[] = {
+ static const ManagerObjective objective_table[] = {
[0] = MANAGER_HALT,
[1] = MANAGER_POWEROFF,
[2] = MANAGER_REBOOT,
@@ -2607,8 +2698,8 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t
}
if ((int) sfsi.ssi_signo >= SIGRTMIN+13 &&
- (int) sfsi.ssi_signo < SIGRTMIN+13+(int) ELEMENTSOF(code_table)) {
- m->exit_code = code_table[sfsi.ssi_signo - SIGRTMIN - 13];
+ (int) sfsi.ssi_signo < SIGRTMIN+13+(int) ELEMENTSOF(objective_table)) {
+ m->objective = objective_table[sfsi.ssi_signo - SIGRTMIN - 13];
break;
}
@@ -2632,7 +2723,7 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t
case 24:
if (MANAGER_IS_USER(m)) {
- m->exit_code = MANAGER_EXIT;
+ m->objective = MANAGER_EXIT;
return 0;
}
@@ -2697,10 +2788,8 @@ static int manager_dispatch_timezone_change(
log_debug("inotify event for /etc/localtime");
changed = manager_read_timezone_stat(m);
- if (changed < 0)
+ if (changed <= 0)
return changed;
- if (!changed)
- return 0;
/* Something changed, restart the watch, to ensure we watch the new /etc/localtime if it changed */
(void) manager_setup_timezone_change(m);
@@ -2761,7 +2850,7 @@ int manager_loop(Manager *m) {
RATELIMIT_DEFINE(rl, 1*USEC_PER_SEC, 50000);
assert(m);
- m->exit_code = MANAGER_OK;
+ assert(m->objective == MANAGER_OK); /* Ensure manager_startup() has been called */
/* Release the path cache */
m->unit_path_cache = set_free_free(m->unit_path_cache);
@@ -2773,7 +2862,7 @@ int manager_loop(Manager *m) {
if (r < 0)
return log_error_errno(r, "Failed to enable SIGCHLD event source: %m");
- while (m->exit_code == MANAGER_OK) {
+ while (m->objective == MANAGER_OK) {
usec_t wait_usec;
if (m->runtime_watchdog > 0 && m->runtime_watchdog != USEC_INFINITY && MANAGER_IS_SYSTEM(m))
@@ -2800,6 +2889,9 @@ int manager_loop(Manager *m) {
if (manager_dispatch_cgroup_realize_queue(m) > 0)
continue;
+ if (manager_dispatch_stop_when_unneeded_queue(m) > 0)
+ continue;
+
if (manager_dispatch_dbus_queue(m) > 0)
continue;
@@ -2816,7 +2908,7 @@ int manager_loop(Manager *m) {
return log_error_errno(r, "Failed to run event loop: %m");
}
- return m->exit_code;
+ return m->objective;
}
int manager_load_unit_from_dbus_path(Manager *m, const char *s, sd_bus_error *e, Unit **_u) {
@@ -2998,7 +3090,25 @@ int manager_open_serialization(Manager *m, FILE **_f) {
return 0;
}
-int manager_serialize(Manager *m, FILE *f, FDSet *fds, bool switching_root) {
+static bool manager_timestamp_shall_serialize(ManagerTimestamp t) {
+
+ if (!in_initrd())
+ return true;
+
+ /* The following timestamps only apply to the host system, hence only serialize them there */
+ return !IN_SET(t,
+ MANAGER_TIMESTAMP_USERSPACE, MANAGER_TIMESTAMP_FINISH,
+ MANAGER_TIMESTAMP_SECURITY_START, MANAGER_TIMESTAMP_SECURITY_FINISH,
+ MANAGER_TIMESTAMP_GENERATORS_START, MANAGER_TIMESTAMP_GENERATORS_FINISH,
+ MANAGER_TIMESTAMP_UNITS_LOAD_START, MANAGER_TIMESTAMP_UNITS_LOAD_FINISH);
+}
+
+int manager_serialize(
+ Manager *m,
+ FILE *f,
+ FDSet *fds,
+ bool switching_root) {
+
ManagerTimestamp q;
const char *t;
Iterator i;
@@ -3009,56 +3119,53 @@ int manager_serialize(Manager *m, FILE *f, FDSet *fds, bool switching_root) {
assert(f);
assert(fds);
- m->n_reloading++;
+ _cleanup_(manager_reloading_stopp) _unused_ Manager *reloading = manager_reloading_start(m);
- fprintf(f, "current-job-id=%"PRIu32"\n", m->current_job_id);
- fprintf(f, "n-installed-jobs=%u\n", m->n_installed_jobs);
- fprintf(f, "n-failed-jobs=%u\n", m->n_failed_jobs);
- fprintf(f, "taint-usr=%s\n", yes_no(m->taint_usr));
- fprintf(f, "ready-sent=%s\n", yes_no(m->ready_sent));
- fprintf(f, "taint-logged=%s\n", yes_no(m->taint_logged));
- fprintf(f, "service-watchdogs=%s\n", yes_no(m->service_watchdogs));
+ (void) serialize_item_format(f, "current-job-id", "%" PRIu32, m->current_job_id);
+ (void) serialize_item_format(f, "n-installed-jobs", "%u", m->n_installed_jobs);
+ (void) serialize_item_format(f, "n-failed-jobs", "%u", m->n_failed_jobs);
+ (void) serialize_bool(f, "taint-usr", m->taint_usr);
+ (void) serialize_bool(f, "ready-sent", m->ready_sent);
+ (void) serialize_bool(f, "taint-logged", m->taint_logged);
+ (void) serialize_bool(f, "service-watchdogs", m->service_watchdogs);
+
+ t = show_status_to_string(m->show_status);
+ if (t)
+ (void) serialize_item(f, "show-status", t);
if (m->log_level_overridden)
- fprintf(f, "log-level-override=%i\n", log_get_max_level());
+ (void) serialize_item_format(f, "log-level-override", "%i", log_get_max_level());
if (m->log_target_overridden)
- fprintf(f, "log-target-override=%s\n", log_target_to_string(log_get_target()));
+ (void) serialize_item(f, "log-target-override", log_target_to_string(log_get_target()));
for (q = 0; q < _MANAGER_TIMESTAMP_MAX; q++) {
- /* The userspace and finish timestamps only apply to the host system, hence only serialize them there */
- if (in_initrd() && IN_SET(q, MANAGER_TIMESTAMP_USERSPACE, MANAGER_TIMESTAMP_FINISH))
+ _cleanup_free_ char *joined = NULL;
+
+ if (!manager_timestamp_shall_serialize(q))
continue;
- t = manager_timestamp_to_string(q);
- {
- char field[strlen(t) + STRLEN("-timestamp") + 1];
- strcpy(stpcpy(field, t), "-timestamp");
- dual_timestamp_serialize(f, field, m->timestamps + q);
- }
+ joined = strjoin(manager_timestamp_to_string(q), "-timestamp");
+ if (!joined)
+ return log_oom();
+
+ (void) serialize_dual_timestamp(f, joined, m->timestamps + q);
}
if (!switching_root)
- (void) serialize_environment(f, m->environment);
+ (void) serialize_strv(f, "env", m->client_environment);
if (m->notify_fd >= 0) {
- int copy;
-
- copy = fdset_put_dup(fds, m->notify_fd);
- if (copy < 0)
- return copy;
+ r = serialize_fd(f, fds, "notify-fd", m->notify_fd);
+ if (r < 0)
+ return r;
- fprintf(f, "notify-fd=%i\n", copy);
- fprintf(f, "notify-socket=%s\n", m->notify_socket);
+ (void) serialize_item(f, "notify-socket", m->notify_socket);
}
if (m->cgroups_agent_fd >= 0) {
- int copy;
-
- copy = fdset_put_dup(fds, m->cgroups_agent_fd);
- if (copy < 0)
- return copy;
-
- fprintf(f, "cgroups-agent-fd=%i\n", copy);
+ r = serialize_fd(f, fds, "cgroups-agent-fd", m->cgroups_agent_fd);
+ if (r < 0)
+ return r;
}
if (m->user_lookup_fds[0] >= 0) {
@@ -3066,13 +3173,13 @@ int manager_serialize(Manager *m, FILE *f, FDSet *fds, bool switching_root) {
copy0 = fdset_put_dup(fds, m->user_lookup_fds[0]);
if (copy0 < 0)
- return copy0;
+ return log_error_errno(copy0, "Failed to add user lookup fd to serialization: %m");
copy1 = fdset_put_dup(fds, m->user_lookup_fds[1]);
if (copy1 < 0)
- return copy1;
+ return log_error_errno(copy1, "Failed to add user lookup fd to serialization: %m");
- fprintf(f, "user-lookup=%i %i\n", copy0, copy1);
+ (void) serialize_item_format(f, "user-lookup", "%i %i", copy0, copy1);
}
bus_track_serialize(m->subscribed, f, "subscribed");
@@ -3099,22 +3206,66 @@ int manager_serialize(Manager *m, FILE *f, FDSet *fds, bool switching_root) {
fputc('\n', f);
r = unit_serialize(u, f, fds, !switching_root);
- if (r < 0) {
- m->n_reloading--;
+ if (r < 0)
return r;
- }
}
- assert(m->n_reloading > 0);
- m->n_reloading--;
-
r = fflush_and_check(f);
if (r < 0)
- return r;
+ return log_error_errno(r, "Failed to flush serialization: %m");
r = bus_fdset_add_all(m, fds);
if (r < 0)
- return r;
+ return log_error_errno(r, "Failed to add bus sockets to serialization: %m");
+
+ return 0;
+}
+
+static int manager_deserialize_one_unit(Manager *m, const char *name, FILE *f, FDSet *fds) {
+ Unit *u;
+ int r;
+
+ r = manager_load_unit(m, name, NULL, NULL, &u);
+ if (r < 0) {
+ if (r == -ENOMEM)
+ return r;
+ return log_notice_errno(r, "Failed to load unit \"%s\", skipping deserialization: %m", name);
+ }
+
+ r = unit_deserialize(u, f, fds);
+ if (r < 0) {
+ if (r == -ENOMEM)
+ return r;
+ return log_notice_errno(r, "Failed to deserialize unit \"%s\", skipping: %m", name);
+ }
+
+ return 0;
+}
+
+static int manager_deserialize_units(Manager *m, FILE *f, FDSet *fds) {
+ _cleanup_free_ char *line = NULL;
+ const char *unit_name;
+ int r;
+
+ for (;;) {
+ /* Start marker */
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read serialization line: %m");
+ if (r == 0)
+ break;
+
+ unit_name = strstrip(line);
+
+ r = manager_deserialize_one_unit(m, unit_name, f, fds);
+ if (r == -ENOMEM)
+ return r;
+ if (r < 0) {
+ r = unit_deserialize_skip(f);
+ if (r < 0)
+ return r;
+ }
+ }
return 0;
}
@@ -3127,32 +3278,30 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
log_debug("Deserializing state...");
- m->n_reloading++;
+ /* If we are not in reload mode yet, enter it now. Not that this is recursive, a caller might already have
+ * increased it to non-zero, which is why we just increase it by one here and down again at the end of this
+ * call. */
+ _cleanup_(manager_reloading_stopp) _unused_ Manager *reloading = manager_reloading_start(m);
for (;;) {
- char line[LINE_MAX];
+ _cleanup_free_ char *line = NULL;
const char *val, *l;
- if (!fgets(line, sizeof(line), f)) {
- if (feof(f))
- r = 0;
- else
- r = -errno;
-
- goto finish;
- }
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read serialization line: %m");
+ if (r == 0)
+ break;
- char_array_0(line);
l = strstrip(line);
-
- if (l[0] == 0)
+ if (isempty(l)) /* end marker */
break;
if ((val = startswith(l, "current-job-id="))) {
uint32_t id;
if (safe_atou32(val, &id) < 0)
- log_notice("Failed to parse current job id value %s", val);
+ log_notice("Failed to parse current job id value '%s', ignoring.", val);
else
m->current_job_id = MAX(m->current_job_id, id);
@@ -3160,7 +3309,7 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
uint32_t n;
if (safe_atou32(val, &n) < 0)
- log_notice("Failed to parse installed jobs counter %s", val);
+ log_notice("Failed to parse installed jobs counter '%s', ignoring.", val);
else
m->n_installed_jobs += n;
@@ -3168,7 +3317,7 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
uint32_t n;
if (safe_atou32(val, &n) < 0)
- log_notice("Failed to parse failed jobs counter %s", val);
+ log_notice("Failed to parse failed jobs counter '%s', ignoring.", val);
else
m->n_failed_jobs += n;
@@ -3177,7 +3326,7 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
b = parse_boolean(val);
if (b < 0)
- log_notice("Failed to parse taint /usr flag %s", val);
+ log_notice("Failed to parse taint /usr flag '%s', ignoring.", val);
else
m->taint_usr = m->taint_usr || b;
@@ -3186,7 +3335,7 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
b = parse_boolean(val);
if (b < 0)
- log_notice("Failed to parse ready-sent flag %s", val);
+ log_notice("Failed to parse ready-sent flag '%s', ignoring.", val);
else
m->ready_sent = m->ready_sent || b;
@@ -3195,7 +3344,7 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
b = parse_boolean(val);
if (b < 0)
- log_notice("Failed to parse taint-logged flag %s", val);
+ log_notice("Failed to parse taint-logged flag '%s', ignoring.", val);
else
m->taint_logged = m->taint_logged || b;
@@ -3204,10 +3353,19 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
b = parse_boolean(val);
if (b < 0)
- log_notice("Failed to parse service-watchdogs flag %s", val);
+ log_notice("Failed to parse service-watchdogs flag '%s', ignoring.", val);
else
m->service_watchdogs = b;
+ } else if ((val = startswith(l, "show-status="))) {
+ ShowStatus s;
+
+ s = show_status_from_string(val);
+ if (s < 0)
+ log_notice("Failed to parse show-status flag '%s', ignoring.", val);
+ else
+ manager_set_show_status(m, s);
+
} else if ((val = startswith(l, "log-level-override="))) {
int level;
@@ -3227,17 +3385,15 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
manager_override_log_target(m, target);
} else if (startswith(l, "env=")) {
- r = deserialize_environment(&m->environment, l);
- if (r == -ENOMEM)
- goto finish;
+ r = deserialize_environment(l + 4, &m->client_environment);
if (r < 0)
- log_notice_errno(r, "Failed to parse environment entry: \"%s\": %m", l);
+ log_notice_errno(r, "Failed to parse environment entry: \"%s\", ignoring: %m", l);
} else if ((val = startswith(l, "notify-fd="))) {
int fd;
if (safe_atoi(val, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
- log_notice("Failed to parse notify fd: \"%s\"", val);
+ log_notice("Failed to parse notify fd, ignoring: \"%s\"", val);
else {
m->notify_event_source = sd_event_source_unref(m->notify_event_source);
safe_close(m->notify_fd);
@@ -3245,22 +3401,15 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
}
} else if ((val = startswith(l, "notify-socket="))) {
- char *n;
-
- n = strdup(val);
- if (!n) {
- r = -ENOMEM;
- goto finish;
- }
-
- free(m->notify_socket);
- m->notify_socket = n;
+ r = free_and_strdup(&m->notify_socket, val);
+ if (r < 0)
+ return r;
} else if ((val = startswith(l, "cgroups-agent-fd="))) {
int fd;
if (safe_atoi(val, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
- log_notice("Failed to parse cgroups agent fd: %s", val);
+ log_notice("Failed to parse cgroups agent fd, ignoring.: %s", val);
else {
m->cgroups_agent_event_source = sd_event_source_unref(m->cgroups_agent_event_source);
safe_close(m->cgroups_agent_fd);
@@ -3271,7 +3420,7 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
int fd0, fd1;
if (sscanf(val, "%i %i", &fd0, &fd1) != 2 || fd0 < 0 || fd1 < 0 || fd0 == fd1 || !fdset_contains(fds, fd0) || !fdset_contains(fds, fd1))
- log_notice("Failed to parse user lookup fd: %s", val);
+ log_notice("Failed to parse user lookup fd, ignoring: %s", val);
else {
m->user_lookup_event_source = sd_event_source_unref(m->user_lookup_event_source);
safe_close_pair(m->user_lookup_fds);
@@ -3290,7 +3439,8 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
else if ((val = startswith(l, "subscribed="))) {
if (strv_extend(&m->deserialized_subscribed, val) < 0)
- log_oom();
+ return -ENOMEM;
+
} else {
ManagerTimestamp q;
@@ -3305,100 +3455,50 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
}
if (q < _MANAGER_TIMESTAMP_MAX) /* found it */
- dual_timestamp_deserialize(val, m->timestamps + q);
+ (void) deserialize_dual_timestamp(val, m->timestamps + q);
else if (!startswith(l, "kdbus-fd=")) /* ignore kdbus */
- log_notice("Unknown serialization item '%s'", l);
- }
- }
-
- for (;;) {
- Unit *u;
- char name[UNIT_NAME_MAX+2];
- const char* unit_name;
-
- /* Start marker */
- if (!fgets(name, sizeof(name), f)) {
- if (feof(f))
- r = 0;
- else
- r = -errno;
-
- goto finish;
- }
-
- char_array_0(name);
- unit_name = strstrip(name);
-
- r = manager_load_unit(m, unit_name, NULL, NULL, &u);
- if (r < 0) {
- log_notice_errno(r, "Failed to load unit \"%s\", skipping deserialization: %m", unit_name);
- if (r == -ENOMEM)
- goto finish;
- unit_deserialize_skip(f);
- continue;
- }
-
- r = unit_deserialize(u, f, fds);
- if (r < 0) {
- log_notice_errno(r, "Failed to deserialize unit \"%s\": %m", unit_name);
- if (r == -ENOMEM)
- goto finish;
+ log_notice("Unknown serialization item '%s', ignoring.", l);
}
}
-finish:
- if (ferror(f))
- r = -EIO;
-
- assert(m->n_reloading > 0);
- m->n_reloading--;
-
- return r;
-}
-
-static void manager_flush_finished_jobs(Manager *m) {
- Job *j;
-
- while ((j = set_steal_first(m->pending_finished_jobs))) {
- bus_job_send_removed_signal(j);
- job_free(j);
- }
-
- m->pending_finished_jobs = set_free(m->pending_finished_jobs);
+ return manager_deserialize_units(m, f, fds);
}
int manager_reload(Manager *m) {
- int r, q;
- _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_(manager_reloading_stopp) Manager *reloading = NULL;
_cleanup_fdset_free_ FDSet *fds = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
assert(m);
r = manager_open_serialization(m, &f);
if (r < 0)
- return r;
-
- m->n_reloading++;
- bus_manager_send_reloading(m, true);
+ return log_error_errno(r, "Failed to create serialization file: %m");
fds = fdset_new();
- if (!fds) {
- m->n_reloading--;
- return -ENOMEM;
- }
+ if (!fds)
+ return log_oom();
+
+ /* We are officially in reload mode from here on. */
+ reloading = manager_reloading_start(m);
r = manager_serialize(m, f, fds, false);
- if (r < 0) {
- m->n_reloading--;
+ if (r < 0)
return r;
- }
- if (fseeko(f, 0, SEEK_SET) < 0) {
- m->n_reloading--;
- return -errno;
- }
+ if (fseeko(f, 0, SEEK_SET) < 0)
+ return log_error_errno(errno, "Failed to seek to beginning of serialization: %m");
+
+ /* 💀 This is the point of no return, from here on there is no way back. 💀 */
+ reloading = NULL;
+
+ bus_manager_send_reloading(m, true);
+
+ /* Start by flushing out all jobs and units, all generated units, all runtime environments, all dynamic users
+ * and everything else that is worth flushing out. We'll get it all back from the serialization — if we need
+ * it.*/
- /* From here on there is no way back. */
manager_clear_jobs_and_units(m);
lookup_paths_flush_generator(&m->lookup_paths);
lookup_paths_free(&m->lookup_paths);
@@ -3407,82 +3507,50 @@ int manager_reload(Manager *m) {
m->uid_refs = hashmap_free(m->uid_refs);
m->gid_refs = hashmap_free(m->gid_refs);
- q = lookup_paths_init(&m->lookup_paths, m->unit_file_scope, 0, NULL);
- if (q < 0 && r >= 0)
- r = q;
+ r = lookup_paths_init(&m->lookup_paths, m->unit_file_scope, 0, NULL);
+ if (r < 0)
+ log_warning_errno(r, "Failed to initialize path lookup table, ignoring: %m");
- q = manager_run_environment_generators(m);
- if (q < 0 && r >= 0)
- r = q;
+ (void) manager_run_environment_generators(m);
+ (void) manager_run_generators(m);
- /* Find new unit paths */
- q = manager_run_generators(m);
- if (q < 0 && r >= 0)
- r = q;
+ r = lookup_paths_reduce(&m->lookup_paths);
+ if (r < 0)
+ log_warning_errno(r, "Failed ot reduce unit file paths, ignoring: %m");
- lookup_paths_reduce(&m->lookup_paths);
manager_build_unit_path_cache(m);
- /* First, enumerate what we can from all config files */
+ /* First, enumerate what we can from kernel and suchlike */
+ manager_enumerate_perpetual(m);
manager_enumerate(m);
/* Second, deserialize our stored data */
- q = manager_deserialize(m, f, fds);
- if (q < 0) {
- log_error_errno(q, "Deserialization failed: %m");
-
- if (r >= 0)
- r = q;
- }
+ r = manager_deserialize(m, f, fds);
+ if (r < 0)
+ log_warning_errno(r, "Deserialization failed, proceeding anyway: %m");
+ /* We don't need the serialization anymore */
f = safe_fclose(f);
- /* Re-register notify_fd as event source */
- q = manager_setup_notify(m);
- if (q < 0 && r >= 0)
- r = q;
-
- q = manager_setup_cgroups_agent(m);
- if (q < 0 && r >= 0)
- r = q;
-
- q = manager_setup_user_lookup_fd(m);
- if (q < 0 && r >= 0)
- r = q;
+ /* Re-register notify_fd as event source, and set up other sockets/communication channels we might need */
+ (void) manager_setup_notify(m);
+ (void) manager_setup_cgroups_agent(m);
+ (void) manager_setup_user_lookup_fd(m);
/* Third, fire things up! */
manager_coldplug(m);
- /* Release any dynamic users no longer referenced */
- dynamic_user_vacuum(m, true);
-
- /* Release any references to UIDs/GIDs no longer referenced, and destroy any IPC owned by them */
- manager_vacuum_uid_refs(m);
- manager_vacuum_gid_refs(m);
-
- exec_runtime_vacuum(m);
+ /* Clean up runtime objects no longer referenced */
+ manager_vacuum(m);
+ /* Consider the reload process complete now. */
assert(m->n_reloading > 0);
m->n_reloading--;
- /* It might be safe to log to the journal now and connect to dbus */
- manager_recheck_journal(m);
- manager_recheck_dbus(m);
-
- /* Let's finally catch up with any changes that took place while we were reloading/reexecing */
- manager_catchup(m);
-
- /* Sync current state of bus names with our set of listening units */
- q = manager_enqueue_sync_bus_names(m);
- if (q < 0 && r >= 0)
- r = q;
-
- if (!MANAGER_IS_RELOADING(m))
- manager_flush_finished_jobs(m);
+ manager_ready(m);
m->send_reloading_done = true;
-
- return r;
+ return 0;
}
void manager_reset_failed(Manager *m) {
@@ -3533,7 +3601,7 @@ static void manager_notify_finished(Manager *m) {
char userspace[FORMAT_TIMESPAN_MAX], initrd[FORMAT_TIMESPAN_MAX], kernel[FORMAT_TIMESPAN_MAX], sum[FORMAT_TIMESPAN_MAX];
usec_t firmware_usec, loader_usec, kernel_usec, initrd_usec, userspace_usec, total_usec;
- if (m->test_run_flags)
+ if (MANAGER_IS_TEST_RUN(m))
return;
if (MANAGER_IS_SYSTEM(m) && detect_container() <= 0) {
@@ -3726,9 +3794,14 @@ static const char* user_env_generator_binary_paths[] = {
static int manager_run_environment_generators(Manager *m) {
char **tmp = NULL; /* this is only used in the forked process, no cleanup here */
const char **paths;
- void* args[] = {&tmp, &tmp, &m->environment};
+ void* args[] = {
+ [STDOUT_GENERATE] = &tmp,
+ [STDOUT_COLLECT] = &tmp,
+ [STDOUT_CONSUME] = &m->transient_environment,
+ };
+ int r;
- if (m->test_run_flags && !(m->test_run_flags & MANAGER_TEST_RUN_ENV_GENERATORS))
+ if (MANAGER_IS_TEST_RUN(m) && !(m->test_run_flags & MANAGER_TEST_RUN_ENV_GENERATORS))
return 0;
paths = MANAGER_IS_SYSTEM(m) ? system_env_generator_binary_paths : user_env_generator_binary_paths;
@@ -3736,7 +3809,10 @@ static int manager_run_environment_generators(Manager *m) {
if (!generator_path_any(paths))
return 0;
- return execute_directories(paths, DEFAULT_TIMEOUT_USEC, gather_environment, args, NULL);
+ RUN_WITH_UMASK(0022)
+ r = execute_directories(paths, DEFAULT_TIMEOUT_USEC, gather_environment, args, NULL, m->transient_environment);
+
+ return r;
}
static int manager_run_generators(Manager *m) {
@@ -3746,7 +3822,7 @@ static int manager_run_generators(Manager *m) {
assert(m);
- if (m->test_run_flags && !(m->test_run_flags & MANAGER_TEST_RUN_GENERATORS))
+ if (MANAGER_IS_TEST_RUN(m) && !(m->test_run_flags & MANAGER_TEST_RUN_GENERATORS))
return 0;
paths = generator_binary_paths(m->unit_file_scope);
@@ -3757,8 +3833,10 @@ static int manager_run_generators(Manager *m) {
return 0;
r = lookup_paths_mkdir_generator(&m->lookup_paths);
- if (r < 0)
+ if (r < 0) {
+ log_error_errno(r, "Failed to create generator directories: %m");
goto finish;
+ }
argv[0] = NULL; /* Leave this empty, execute_directory() will fill something in */
argv[1] = m->lookup_paths.generator;
@@ -3767,19 +3845,46 @@ static int manager_run_generators(Manager *m) {
argv[4] = NULL;
RUN_WITH_UMASK(0022)
- execute_directories((const char* const*) paths, DEFAULT_TIMEOUT_USEC,
- NULL, NULL, (char**) argv);
+ (void) execute_directories((const char* const*) paths, DEFAULT_TIMEOUT_USEC,
+ NULL, NULL, (char**) argv, m->transient_environment);
+
+ r = 0;
finish:
lookup_paths_trim_generator(&m->lookup_paths);
return r;
}
-int manager_environment_add(Manager *m, char **minus, char **plus) {
+int manager_transient_environment_add(Manager *m, char **plus) {
+ char **a;
+
+ assert(m);
+
+ if (strv_isempty(plus))
+ return 0;
+
+ a = strv_env_merge(2, m->transient_environment, plus);
+ if (!a)
+ return log_oom();
+
+ sanitize_environment(a);
+
+ return strv_free_and_replace(m->transient_environment, a);
+}
+
+int manager_client_environment_modify(
+ Manager *m,
+ char **minus,
+ char **plus) {
+
char **a = NULL, **b = NULL, **l;
+
assert(m);
- l = m->environment;
+ if (strv_isempty(minus) && strv_isempty(plus))
+ return 0;
+
+ l = m->client_environment;
if (!strv_isempty(minus)) {
a = strv_env_delete(l, 1, minus);
@@ -3799,16 +3904,29 @@ int manager_environment_add(Manager *m, char **minus, char **plus) {
l = b;
}
- if (m->environment != l)
- strv_free(m->environment);
+ if (m->client_environment != l)
+ strv_free(m->client_environment);
+
if (a != l)
strv_free(a);
if (b != l)
strv_free(b);
- m->environment = l;
- manager_sanitize_environment(m);
+ m->client_environment = sanitize_environment(l);
+ return 0;
+}
+
+int manager_get_effective_environment(Manager *m, char ***ret) {
+ char **l;
+
+ assert(m);
+ assert(ret);
+
+ l = strv_env_merge(2, m->transient_environment, m->client_environment);
+ if (!l)
+ return -ENOMEM;
+ *ret = l;
return 0;
}
@@ -3860,7 +3978,7 @@ static bool manager_journal_is_running(Manager *m) {
assert(m);
- if (m->test_run_flags != 0)
+ if (MANAGER_IS_TEST_RUN(m))
return false;
/* If we are the user manager we can safely assume that the journal is up */
@@ -3915,7 +4033,7 @@ void manager_set_show_status(Manager *m, ShowStatus mode) {
mode == SHOW_STATUS_NO ? "Disabling" : "Enabling");
m->show_status = mode;
- if (mode > 0)
+ if (IN_SET(mode, SHOW_STATUS_TEMPORARY, SHOW_STATUS_YES))
(void) touch("/run/systemd/show-status");
else
(void) unlink("/run/systemd/show-status");
@@ -3937,7 +4055,7 @@ static bool manager_get_show_status(Manager *m, StatusType type) {
if (type != STATUS_TYPE_EMERGENCY && manager_check_ask_password(m) > 0)
return false;
- return m->show_status > 0;
+ return IN_SET(m->show_status, SHOW_STATUS_TEMPORARY, SHOW_STATUS_YES);
}
const char *manager_get_confirm_spawn(Manager *m) {
@@ -4024,7 +4142,7 @@ void manager_status_printf(Manager *m, StatusType type, const char *status, cons
return;
va_start(ap, format);
- status_vprintf(status, true, type == STATUS_TYPE_EPHEMERAL, format, ap);
+ status_vprintf(status, SHOW_STATUS_ELLIPSIZE|(type == STATUS_TYPE_EPHEMERAL ? SHOW_STATUS_EPHEMERAL : 0), format, ap);
va_end(ap);
}
@@ -4278,7 +4396,7 @@ static void manager_serialize_uid_refs_internal(
if (!(c & DESTROY_IPC_FLAG))
continue;
- fprintf(f, "%s=" UID_FMT "\n", field_name, uid);
+ (void) serialize_item_format(f, field_name, UID_FMT, uid);
}
}
@@ -4323,7 +4441,7 @@ static void manager_deserialize_uid_refs_one_internal(
r = hashmap_replace(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c));
if (r < 0) {
- log_debug("Failed to add UID reference entry");
+ log_debug_errno(r, "Failed to add UID reference entry: %m");
return;
}
}
@@ -4517,6 +4635,14 @@ void manager_restore_original_log_target(Manager *m) {
m->log_target_overridden = false;
}
+ManagerTimestamp manager_timestamp_initrd_mangle(ManagerTimestamp s) {
+ if (in_initrd() &&
+ s >= MANAGER_TIMESTAMP_SECURITY_START &&
+ s <= MANAGER_TIMESTAMP_UNITS_LOAD_FINISH)
+ return s - MANAGER_TIMESTAMP_SECURITY_START + MANAGER_TIMESTAMP_INITRD_SECURITY_START;
+ return s;
+}
+
static const char *const manager_state_table[_MANAGER_STATE_MAX] = {
[MANAGER_INITIALIZING] = "initializing",
[MANAGER_STARTING] = "starting",
@@ -4541,6 +4667,12 @@ static const char *const manager_timestamp_table[_MANAGER_TIMESTAMP_MAX] = {
[MANAGER_TIMESTAMP_GENERATORS_FINISH] = "generators-finish",
[MANAGER_TIMESTAMP_UNITS_LOAD_START] = "units-load-start",
[MANAGER_TIMESTAMP_UNITS_LOAD_FINISH] = "units-load-finish",
+ [MANAGER_TIMESTAMP_INITRD_SECURITY_START] = "initrd-security-start",
+ [MANAGER_TIMESTAMP_INITRD_SECURITY_FINISH] = "initrd-security-finish",
+ [MANAGER_TIMESTAMP_INITRD_GENERATORS_START] = "initrd-generators-start",
+ [MANAGER_TIMESTAMP_INITRD_GENERATORS_FINISH] = "initrd-generators-finish",
+ [MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_START] = "initrd-units-load-start",
+ [MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_FINISH] = "initrd-units-load-finish",
};
DEFINE_STRING_TABLE_LOOKUP(manager_timestamp, ManagerTimestamp);
diff --git a/src/core/manager.h b/src/core/manager.h
index ea5d425030..bce8020cfd 100644
--- a/src/core/manager.h
+++ b/src/core/manager.h
@@ -5,6 +5,7 @@
#include <stdio.h>
#include "sd-bus.h"
+#include "sd-device.h"
#include "sd-event.h"
#include "cgroup-util.h"
@@ -22,6 +23,8 @@ typedef struct Unit Unit;
typedef struct Manager Manager;
+/* An externally visible state. We don't actually maintain this as state variable, but derive it from various fields
+ * when requested */
typedef enum ManagerState {
MANAGER_INITIALIZING,
MANAGER_STARTING,
@@ -33,7 +36,7 @@ typedef enum ManagerState {
_MANAGER_STATE_INVALID = -1
} ManagerState;
-typedef enum ManagerExitCode {
+typedef enum ManagerObjective {
MANAGER_OK,
MANAGER_EXIT,
MANAGER_RELOAD,
@@ -43,9 +46,9 @@ typedef enum ManagerExitCode {
MANAGER_HALT,
MANAGER_KEXEC,
MANAGER_SWITCH_ROOT,
- _MANAGER_EXIT_CODE_MAX,
- _MANAGER_EXIT_CODE_INVALID = -1
-} ManagerExitCode;
+ _MANAGER_OBJECTIVE_MAX,
+ _MANAGER_OBJECTIVE_INVALID = -1
+} ManagerObjective;
typedef enum StatusType {
STATUS_TYPE_EPHEMERAL,
@@ -53,6 +56,27 @@ typedef enum StatusType {
STATUS_TYPE_EMERGENCY,
} StatusType;
+/* Notes:
+ * 1. TIMESTAMP_FIRMWARE, TIMESTAMP_LOADER, TIMESTAMP_KERNEL, TIMESTAMP_INITRD,
+ * TIMESTAMP_SECURITY_START, and TIMESTAMP_SECURITY_FINISH are set only when
+ * the manager is system and not running under container environment.
+ *
+ * 2. The monotonic timestamp of TIMESTAMP_KERNEL is always zero.
+ *
+ * 3. The realtime timestamp of TIMESTAMP_KERNEL will be unset if the system does not
+ * have RTC.
+ *
+ * 4. TIMESTAMP_FIRMWARE and TIMESTAMP_LOADER will be unset if the system does not
+ * have RTC, or systemd is built without EFI support.
+ *
+ * 5. The monotonic timestamps of TIMESTAMP_FIRMWARE and TIMESTAMP_LOADER are stored as
+ * negative of the actual value.
+ *
+ * 6. TIMESTAMP_USERSPACE is the timestamp of when the manager was started.
+ *
+ * 7. TIMESTAMP_INITRD_* are set only when the system is booted with an initrd.
+ */
+
typedef enum ManagerTimestamp {
MANAGER_TIMESTAMP_FIRMWARE,
MANAGER_TIMESTAMP_LOADER,
@@ -67,6 +91,13 @@ typedef enum ManagerTimestamp {
MANAGER_TIMESTAMP_GENERATORS_FINISH,
MANAGER_TIMESTAMP_UNITS_LOAD_START,
MANAGER_TIMESTAMP_UNITS_LOAD_FINISH,
+
+ MANAGER_TIMESTAMP_INITRD_SECURITY_START,
+ MANAGER_TIMESTAMP_INITRD_SECURITY_FINISH,
+ MANAGER_TIMESTAMP_INITRD_GENERATORS_START,
+ MANAGER_TIMESTAMP_INITRD_GENERATORS_FINISH,
+ MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_START,
+ MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_FINISH,
_MANAGER_TIMESTAMP_MAX,
_MANAGER_TIMESTAMP_INVALID = -1,
} ManagerTimestamp;
@@ -77,14 +108,15 @@ typedef enum ManagerTimestamp {
#include "show-status.h"
#include "unit-name.h"
-enum {
- /* 0 = run normally */
- MANAGER_TEST_RUN_MINIMAL = 1 << 1, /* create basic data structures */
- MANAGER_TEST_RUN_BASIC = 1 << 2, /* interact with the environment */
- MANAGER_TEST_RUN_ENV_GENERATORS = 1 << 3, /* also run env generators */
- MANAGER_TEST_RUN_GENERATORS = 1 << 4, /* also run unit generators */
+typedef enum ManagerTestRunFlags {
+ MANAGER_TEST_NORMAL = 0, /* run normally */
+ MANAGER_TEST_RUN_MINIMAL = 1 << 0, /* create basic data structures */
+ MANAGER_TEST_RUN_BASIC = 1 << 1, /* interact with the environment */
+ MANAGER_TEST_RUN_ENV_GENERATORS = 1 << 2, /* also run env generators */
+ MANAGER_TEST_RUN_GENERATORS = 1 << 3, /* also run unit generators */
MANAGER_TEST_FULL = MANAGER_TEST_RUN_BASIC | MANAGER_TEST_RUN_ENV_GENERATORS | MANAGER_TEST_RUN_GENERATORS,
-};
+} ManagerTestRunFlags;
+
assert_cc((MANAGER_TEST_FULL & UINT8_MAX) == MANAGER_TEST_FULL);
struct Manager {
@@ -130,6 +162,9 @@ struct Manager {
/* Target units whose default target dependencies haven't been set yet */
LIST_HEAD(Unit, target_deps_queue);
+ /* Units that might be subject to StopWhenUnneeded= clean-up */
+ LIST_HEAD(Unit, stop_when_unneeded_queue);
+
sd_event *event;
/* This maps PIDs we care about to units that are interested in. We allow multiple units to he interested in
@@ -177,18 +212,16 @@ struct Manager {
LookupPaths lookup_paths;
Set *unit_path_cache;
- char **environment;
+ char **transient_environment; /* The environment, as determined from config files, kernel cmdline and environment generators */
+ char **client_environment; /* Environment variables created by clients through the bus API */
usec_t runtime_watchdog;
usec_t shutdown_watchdog;
dual_timestamp timestamps[_MANAGER_TIMESTAMP_MAX];
- struct udev* udev;
-
/* Data specific to the device subsystem */
- struct udev_monitor* udev_monitor;
- sd_event_source *udev_event_source;
+ sd_device_monitor *device_monitor;
Hashmap *devices_by_sysfs;
/* Data specific to the mount subsystem */
@@ -215,7 +248,7 @@ struct Manager {
/* This is used during reloading: before the reload we queue
* the reply message here, and afterwards we send it */
- sd_bus_message *queued_message;
+ sd_bus_message *pending_reload_message;
Hashmap *watch_bus; /* D-Bus names => Unit object n:1 */
@@ -250,11 +283,10 @@ struct Manager {
usec_t etc_localtime_mtime;
bool etc_localtime_accessible:1;
- /* Flags */
- ManagerExitCode exit_code:5;
+ ManagerObjective objective:5;
+ /* Flags */
bool dispatching_load_queue:1;
- bool dispatching_dbus_queue:1;
bool taint_usr:1;
@@ -267,7 +299,7 @@ struct Manager {
/* Have we ever changed the "kernel.pid_max" sysctl? */
bool sysctl_pid_max_changed:1;
- unsigned test_run_flags:8;
+ ManagerTestRunFlags test_run_flags:8;
/* If non-zero, exit with the following value when the systemd
* process terminate. Useful for containers: systemd-nspawn could get
@@ -305,9 +337,6 @@ struct Manager {
/* non-zero if we are reloading or reexecuting, */
int n_reloading;
- /* A set which contains all jobs that started before reload and finished
- * during it */
- Set *pending_finished_jobs;
unsigned n_installed_jobs;
unsigned n_failed_jobs;
@@ -375,10 +404,12 @@ struct Manager {
#define MANAGER_IS_FINISHED(m) (dual_timestamp_is_set((m)->timestamps + MANAGER_TIMESTAMP_FINISH))
-/* The exit code is set to OK as soon as we enter the main loop, and set otherwise as soon as we are done with it */
-#define MANAGER_IS_RUNNING(m) ((m)->exit_code == MANAGER_OK)
+/* The objective is set to OK as soon as we enter the main loop, and set otherwise as soon as we are done with it */
+#define MANAGER_IS_RUNNING(m) ((m)->objective == MANAGER_OK)
-int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **m);
+#define MANAGER_IS_TEST_RUN(m) ((m)->test_run_flags != 0)
+
+int manager_new(UnitFileScope scope, ManagerTestRunFlags test_run_flags, Manager **m);
Manager* manager_free(Manager *m);
DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);
@@ -408,7 +439,11 @@ void manager_clear_jobs(Manager *m);
unsigned manager_dispatch_load_queue(Manager *m);
-int manager_environment_add(Manager *m, char **minus, char **plus);
+int manager_default_environment(Manager *m);
+int manager_transient_environment_add(Manager *m, char **plus);
+int manager_client_environment_modify(Manager *m, char **minus, char **plus);
+int manager_get_effective_environment(Manager *m, char ***ret);
+
int manager_set_default_rlimits(Manager *m, struct rlimit **default_rlimit);
int manager_loop(Manager *m);
@@ -479,3 +514,4 @@ void manager_disable_confirm_spawn(void);
const char *manager_timestamp_to_string(ManagerTimestamp m) _const_;
ManagerTimestamp manager_timestamp_from_string(const char *s) _pure_;
+ManagerTimestamp manager_timestamp_initrd_mangle(ManagerTimestamp s);
diff --git a/src/core/meson.build b/src/core/meson.build
index 3852c5e9d8..450d6f72a9 100644
--- a/src/core/meson.build
+++ b/src/core/meson.build
@@ -5,6 +5,8 @@ libcore_la_sources = '''
audit-fd.h
automount.c
automount.h
+ bpf-devices.c
+ bpf-devices.h
bpf-firewall.c
bpf-firewall.h
cgroup.c
diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c
index 16880e6157..3ce6164b06 100644
--- a/src/core/mount-setup.c
+++ b/src/core/mount-setup.c
@@ -11,7 +11,9 @@
#include "bus-util.h"
#include "cgroup-util.h"
#include "dev-setup.h"
+#include "dirent-util.h"
#include "efivars.h"
+#include "fd-util.h"
#include "fileio.h"
#include "fs-util.h"
#include "label.h"
@@ -20,7 +22,7 @@
#include "missing.h"
#include "mkdir.h"
#include "mount-setup.h"
-#include "mount-util.h"
+#include "mountpoint-util.h"
#include "path-util.h"
#include "set.h"
#include "smack-util.h"
@@ -229,76 +231,105 @@ int mount_setup_early(void) {
return mount_points_setup(N_EARLY_MOUNT, false);
}
-int mount_cgroup_controllers(char ***join_controllers) {
+static const char *join_with(const char *controller) {
+
+ static const char* const pairs[] = {
+ "cpu", "cpuacct",
+ "net_cls", "net_prio",
+ NULL
+ };
+
+ const char *const *x, *const *y;
+
+ assert(controller);
+
+ /* This will lookup which controller to mount another controller with. Input is a controller name, and output
+ * is the other controller name. The function works both ways: you can input one and get the other, and input
+ * the other to get the one. */
+
+ STRV_FOREACH_PAIR(x, y, pairs) {
+ if (streq(controller, *x))
+ return *y;
+ if (streq(controller, *y))
+ return *x;
+ }
+
+ return NULL;
+}
+
+static int symlink_controller(const char *target, const char *alias) {
+ const char *a;
+ int r;
+
+ assert(target);
+ assert(alias);
+
+ a = strjoina("/sys/fs/cgroup/", alias);
+
+ r = symlink_idempotent(target, a, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create symlink %s: %m", a);
+
+#ifdef SMACK_RUN_LABEL
+ const char *p;
+
+ p = strjoina("/sys/fs/cgroup/", target);
+
+ r = mac_smack_copy(a, p);
+ if (r < 0 && r != -EOPNOTSUPP)
+ return log_error_errno(r, "Failed to copy smack label from %s to %s: %m", p, a);
+#endif
+
+ return 0;
+}
+
+int mount_cgroup_controllers(void) {
_cleanup_set_free_free_ Set *controllers = NULL;
- bool has_argument = !!join_controllers;
int r;
if (!cg_is_legacy_wanted())
return 0;
/* Mount all available cgroup controllers that are built into the kernel. */
-
- if (!has_argument)
- /* The defaults:
- * mount "cpu" + "cpuacct" together, and "net_cls" + "net_prio".
- *
- * We'd like to add "cpuset" to the mix, but "cpuset" doesn't really
- * work for groups with no initialized attributes.
- */
- join_controllers = (char**[]) {
- STRV_MAKE("cpu", "cpuacct"),
- STRV_MAKE("net_cls", "net_prio"),
- NULL,
- };
-
r = cg_kernel_controllers(&controllers);
if (r < 0)
return log_error_errno(r, "Failed to enumerate cgroup controllers: %m");
for (;;) {
_cleanup_free_ char *options = NULL, *controller = NULL, *where = NULL;
+ const char *other_controller;
MountPoint p = {
.what = "cgroup",
.type = "cgroup",
.flags = MS_NOSUID|MS_NOEXEC|MS_NODEV,
.mode = MNT_IN_CONTAINER,
};
- char ***k = NULL;
controller = set_steal_first(controllers);
if (!controller)
break;
- for (k = join_controllers; *k; k++)
- if (strv_find(*k, controller))
- break;
-
- if (*k) {
- char **i, **j;
-
- for (i = *k, j = *k; *i; i++) {
-
- if (!streq(*i, controller)) {
- _cleanup_free_ char *t;
-
- t = set_remove(controllers, *i);
- if (!t) {
- if (has_argument)
- free(*i);
- continue;
- }
- }
-
- *(j++) = *i;
+ /* Check if we shall mount this together with another controller */
+ other_controller = join_with(controller);
+ if (other_controller) {
+ _cleanup_free_ char *c = NULL;
+
+ /* Check if the other controller is actually available in the kernel too */
+ c = set_remove(controllers, other_controller);
+ if (c) {
+
+ /* Join the two controllers into one string, and maintain a stable ordering */
+ if (strcmp(controller, other_controller) < 0)
+ options = strjoin(controller, ",", other_controller);
+ else
+ options = strjoin(other_controller, ",", controller);
+ if (!options)
+ return log_oom();
}
+ }
- *j = NULL;
-
- options = strv_join(*k, ",");
- if (!options)
- return log_oom();
- } else
+ /* The simple case, where there's only one controller to mount together */
+ if (!options)
options = TAKE_PTR(controller);
where = strappend("/sys/fs/cgroup/", options);
@@ -312,35 +343,14 @@ int mount_cgroup_controllers(char ***join_controllers) {
if (r < 0)
return r;
- if (r > 0 && *k) {
- char **i;
-
- for (i = *k; *i; i++) {
- _cleanup_free_ char *t = NULL;
-
- t = strappend("/sys/fs/cgroup/", *i);
- if (!t)
- return log_oom();
-
- r = symlink(options, t);
- if (r >= 0) {
-#ifdef SMACK_RUN_LABEL
- _cleanup_free_ char *src;
- src = strappend("/sys/fs/cgroup/", options);
- if (!src)
- return log_oom();
- r = mac_smack_copy(t, src);
- if (r < 0 && r != -EOPNOTSUPP)
- return log_error_errno(r, "Failed to copy smack label from %s to %s: %m", src, t);
-#endif
- } else if (errno != EEXIST)
- return log_error_errno(errno, "Failed to create symlink %s: %m", t);
- }
- }
+ /* Create symlinks from the individual controller names, in case we have a joined mount */
+ if (controller)
+ (void) symlink_controller(options, controller);
+ if (other_controller)
+ (void) symlink_controller(options, other_controller);
}
- /* Now that we mounted everything, let's make the tmpfs the
- * cgroup file systems are mounted into read-only. */
+ /* Now that we mounted everything, let's make the tmpfs the cgroup file systems are mounted into read-only. */
(void) mount("tmpfs", "/sys/fs/cgroup", "tmpfs", MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755");
return 0;
@@ -396,6 +406,100 @@ static int relabel_cgroup_filesystems(void) {
return 0;
}
+
+static int relabel_extra(void) {
+ _cleanup_closedir_ DIR *d = NULL;
+ int r, c = 0;
+
+ /* Support for relabelling additional files or directories after loading the policy. For this, code in the
+ * initrd simply has to drop in *.relabel files into /run/systemd/relabel-extra.d/. We'll read all such files
+ * expecting one absolute path by line and will relabel each (and everyone below that in case the path refers
+ * to a directory). These drop-in files are supposed to be absolutely minimal, and do not understand comments
+ * and such. After the operation succeeded the files are removed, and the drop-in directory as well, if
+ * possible.
+ */
+
+ d = opendir("/run/systemd/relabel-extra.d/");
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to open /run/systemd/relabel-extra.d/, ignoring: %m");
+ }
+
+ for (;;) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_close_ int fd = -1;
+ struct dirent *de;
+
+ errno = 0;
+ de = readdir_no_dot(d);
+ if (!de) {
+ if (errno != 0)
+ return log_error_errno(errno, "Failed read directory /run/systemd/relabel-extra.d/, ignoring: %m");
+ break;
+ }
+
+ if (hidden_or_backup_file(de->d_name))
+ continue;
+
+ if (!endswith(de->d_name, ".relabel"))
+ continue;
+
+ if (!IN_SET(de->d_type, DT_REG, DT_UNKNOWN))
+ continue;
+
+ fd = openat(dirfd(d), de->d_name, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
+ if (fd < 0) {
+ log_warning_errno(errno, "Failed to open /run/systemd/relabel-extra.d/%s, ignoring: %m", de->d_name);
+ continue;
+ }
+
+ f = fdopen(fd, "r");
+ if (!f) {
+ log_warning_errno(errno, "Failed to convert file descriptor into file object, ignoring: %m");
+ continue;
+ }
+ TAKE_FD(fd);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to read from /run/systemd/relabel-extra.d/%s, ignoring: %m", de->d_name);
+ break;
+ }
+ if (r == 0) /* EOF */
+ break;
+
+ path_simplify(line, true);
+
+ if (!path_is_normalized(line)) {
+ log_warning("Path to relabel is not normalized, ignoring: %s", line);
+ continue;
+ }
+
+ if (!path_is_absolute(line)) {
+ log_warning("Path to relabel is not absolute, ignoring: %s", line);
+ continue;
+ }
+
+ log_debug("Relabelling additional file/directory '%s'.", line);
+ (void) nftw(line, nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
+ c++;
+ }
+
+ if (unlinkat(dirfd(d), de->d_name, 0) < 0)
+ log_warning_errno(errno, "Failed to remove /run/systemd/relabel-extra.d/%s, ignoring: %m", de->d_name);
+ }
+
+ /* Remove when we completing things. */
+ if (rmdir("/run/systemd/relabel-extra.d") < 0)
+ log_warning_errno(errno, "Failed to remove /run/systemd/relabel-extra.d/ directory: %m");
+
+ return c;
+}
#endif
int mount_setup(bool loaded_policy) {
@@ -413,20 +517,22 @@ int mount_setup(bool loaded_policy) {
if (loaded_policy) {
usec_t before_relabel, after_relabel;
char timespan[FORMAT_TIMESPAN_MAX];
+ const char *i;
+ int n_extra;
before_relabel = now(CLOCK_MONOTONIC);
- (void) nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
- (void) nftw("/dev/shm", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
- (void) nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
+ FOREACH_STRING(i, "/dev", "/dev/shm", "/run")
+ (void) nftw(i, nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
- r = relabel_cgroup_filesystems();
- if (r < 0)
- return r;
+ (void) relabel_cgroup_filesystems();
+
+ n_extra = relabel_extra();
after_relabel = now(CLOCK_MONOTONIC);
- log_info("Relabelled /dev, /run and /sys/fs/cgroup in %s.",
+ log_info("Relabelled /dev, /dev/shm, /run, /sys/fs/cgroup%s in %s.",
+ n_extra > 0 ? ", additional files" : "",
format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel, 0));
}
#endif
@@ -452,20 +558,9 @@ int mount_setup(bool loaded_policy) {
(void) mkdir_label("/run/systemd", 0755);
(void) mkdir_label("/run/systemd/system", 0755);
- /* Set up inaccessible (and empty) file nodes of all types */
- (void) mkdir_label("/run/systemd/inaccessible", 0000);
- (void) mknod("/run/systemd/inaccessible/reg", S_IFREG | 0000, 0);
- (void) mkdir_label("/run/systemd/inaccessible/dir", 0000);
- (void) mkfifo("/run/systemd/inaccessible/fifo", 0000);
- (void) mknod("/run/systemd/inaccessible/sock", S_IFSOCK | 0000, 0);
-
- /* The following two are likely to fail if we lack the privs for it (for example in an userns environment, if
- * CAP_SYS_MKNOD is missing, or if a device node policy prohibit major/minor of 0 device nodes to be
- * created). But that's entirely fine. Consumers of these files should carry fallback to use a different node
- * then, for example /run/systemd/inaccessible/sock, which is close enough in behaviour and semantics for most
- * uses. */
- (void) mknod("/run/systemd/inaccessible/chr", S_IFCHR | 0000, makedev(0, 0));
- (void) mknod("/run/systemd/inaccessible/blk", S_IFBLK | 0000, makedev(0, 0));
+ /* Also create /run/systemd/inaccessible nodes, so that we always have something to mount inaccessible nodes
+ * from. */
+ (void) make_inaccessible_nodes(NULL, UID_INVALID, GID_INVALID);
return 0;
}
diff --git a/src/core/mount-setup.h b/src/core/mount-setup.h
index 43cd8908de..b4ca2cf4b4 100644
--- a/src/core/mount-setup.h
+++ b/src/core/mount-setup.h
@@ -6,7 +6,7 @@
int mount_setup_early(void);
int mount_setup(bool loaded_policy);
-int mount_cgroup_controllers(char ***join_controllers);
+int mount_cgroup_controllers(void);
bool mount_point_is_api(const char *path);
bool mount_point_ignore(const char *path);
diff --git a/src/core/mount.c b/src/core/mount.c
index 21437dad08..ead9bc1f44 100644
--- a/src/core/mount.c
+++ b/src/core/mount.c
@@ -11,6 +11,7 @@
#include "alloc-util.h"
#include "dbus-mount.h"
+#include "dbus-unit.h"
#include "device.h"
#include "escape.h"
#include "exit-status.h"
@@ -20,11 +21,12 @@
#include "manager.h"
#include "mkdir.h"
#include "mount-setup.h"
-#include "mount-util.h"
#include "mount.h"
+#include "mountpoint-util.h"
#include "parse-util.h"
#include "path-util.h"
#include "process-util.h"
+#include "serialize.h"
#include "special.h"
#include "string-table.h"
#include "string-util.h"
@@ -66,22 +68,18 @@ static bool MOUNT_STATE_WITH_PROCESS(MountState state) {
MOUNT_UNMOUNTING_SIGKILL);
}
-static bool mount_needs_network(const char *options, const char *fstype) {
- if (fstab_test_option(options, "_netdev\0"))
+static bool mount_is_network(const MountParameters *p) {
+ assert(p);
+
+ if (fstab_test_option(p->options, "_netdev\0"))
return true;
- if (fstype && fstype_is_network(fstype))
+ if (p->fstype && fstype_is_network(p->fstype))
return true;
return false;
}
-static bool mount_is_network(const MountParameters *p) {
- assert(p);
-
- return mount_needs_network(p->options, p->fstype);
-}
-
static bool mount_is_loop(const MountParameters *p) {
assert(p);
@@ -127,11 +125,11 @@ static bool mount_is_bound_to_device(const Mount *m) {
return fstab_test_option(p->options, "x-systemd.device-bound\0");
}
-static bool needs_quota(const MountParameters *p) {
+static bool mount_needs_quota(const MountParameters *p) {
assert(p);
- /* Quotas are not enabled on network filesystems,
- * but we want them, for example, on storage connected via iscsi */
+ /* Quotas are not enabled on network filesystems, but we want them, for example, on storage connected via
+ * iscsi. We hence don't use mount_is_network() here, as that would also return true for _netdev devices. */
if (p->fstype && fstype_is_network(p->fstype))
return false;
@@ -209,11 +207,9 @@ static void mount_unwatch_control_pid(Mount *m) {
static void mount_parameters_done(MountParameters *p) {
assert(p);
- free(p->what);
- free(p->options);
- free(p->fstype);
-
- p->what = p->options = p->fstype = NULL;
+ p->what = mfree(p->what);
+ p->options = mfree(p->options);
+ p->fstype = mfree(p->fstype);
}
static void mount_done(Unit *u) {
@@ -316,7 +312,7 @@ static int mount_add_mount_dependencies(Mount *m) {
}
static int mount_add_device_dependencies(Mount *m) {
- bool device_wants_mount = false;
+ bool device_wants_mount;
UnitDependencyMask mask;
MountParameters *p;
UnitDependency dep;
@@ -346,8 +342,8 @@ static int mount_add_device_dependencies(Mount *m) {
if (path_equal(m->where, "/"))
return 0;
- if (mount_is_auto(p) && !mount_is_automount(p) && MANAGER_IS_SYSTEM(UNIT(m)->manager))
- device_wants_mount = true;
+ device_wants_mount =
+ mount_is_auto(p) && !mount_is_automount(p) && MANAGER_IS_SYSTEM(UNIT(m)->manager);
/* Mount units from /proc/self/mountinfo are not bound to devices
* by default since they're subject to races when devices are
@@ -379,16 +375,16 @@ static int mount_add_quota_dependencies(Mount *m) {
if (!p)
return 0;
- if (!needs_quota(p))
+ if (!mount_needs_quota(p))
return 0;
mask = m->from_fragment ? UNIT_DEPENDENCY_FILE : UNIT_DEPENDENCY_MOUNTINFO_IMPLICIT;
- r = unit_add_two_dependencies_by_name(UNIT(m), UNIT_BEFORE, UNIT_WANTS, SPECIAL_QUOTACHECK_SERVICE, NULL, true, mask);
+ r = unit_add_two_dependencies_by_name(UNIT(m), UNIT_BEFORE, UNIT_WANTS, SPECIAL_QUOTACHECK_SERVICE, true, mask);
if (r < 0)
return r;
- r = unit_add_two_dependencies_by_name(UNIT(m), UNIT_BEFORE, UNIT_WANTS, SPECIAL_QUOTAON_SERVICE, NULL, true, mask);
+ r = unit_add_two_dependencies_by_name(UNIT(m), UNIT_BEFORE, UNIT_WANTS, SPECIAL_QUOTAON_SERVICE, true, mask);
if (r < 0)
return r;
@@ -427,10 +423,10 @@ static bool mount_is_extrinsic(Mount *m) {
}
static int mount_add_default_dependencies(Mount *m) {
+ const char *after, *before;
UnitDependencyMask mask;
- int r;
MountParameters *p;
- const char *after;
+ int r;
assert(m);
@@ -456,7 +452,7 @@ static int mount_add_default_dependencies(Mount *m) {
* network.target, so that they are shut down only
* after this mount unit is stopped. */
- r = unit_add_dependency_by_name(UNIT(m), UNIT_AFTER, SPECIAL_NETWORK_TARGET, NULL, true, mask);
+ r = unit_add_dependency_by_name(UNIT(m), UNIT_AFTER, SPECIAL_NETWORK_TARGET, true, mask);
if (r < 0)
return r;
@@ -467,25 +463,32 @@ static int mount_add_default_dependencies(Mount *m) {
* whose purpose it is to delay this until the network
* is "up". */
- r = unit_add_two_dependencies_by_name(UNIT(m), UNIT_WANTS, UNIT_AFTER, SPECIAL_NETWORK_ONLINE_TARGET, NULL, true, mask);
+ r = unit_add_two_dependencies_by_name(UNIT(m), UNIT_WANTS, UNIT_AFTER, SPECIAL_NETWORK_ONLINE_TARGET, true, mask);
if (r < 0)
return r;
after = SPECIAL_REMOTE_FS_PRE_TARGET;
- } else
+ before = SPECIAL_REMOTE_FS_TARGET;
+ } else {
after = SPECIAL_LOCAL_FS_PRE_TARGET;
+ before = SPECIAL_LOCAL_FS_TARGET;
+ }
+
+ r = unit_add_dependency_by_name(UNIT(m), UNIT_BEFORE, before, true, mask);
+ if (r < 0)
+ return r;
- r = unit_add_dependency_by_name(UNIT(m), UNIT_AFTER, after, NULL, true, mask);
+ r = unit_add_dependency_by_name(UNIT(m), UNIT_AFTER, after, true, mask);
if (r < 0)
return r;
- r = unit_add_two_dependencies_by_name(UNIT(m), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_UMOUNT_TARGET, NULL, true, mask);
+ r = unit_add_two_dependencies_by_name(UNIT(m), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_UMOUNT_TARGET, true, mask);
if (r < 0)
return r;
/* If this is a tmpfs mount then we have to unmount it before we try to deactivate swaps */
if (streq_ptr(p->fstype, "tmpfs")) {
- r = unit_add_dependency_by_name(UNIT(m), UNIT_AFTER, SPECIAL_SWAP_TARGET, NULL, true, mask);
+ r = unit_add_dependency_by_name(UNIT(m), UNIT_AFTER, SPECIAL_SWAP_TARGET, true, mask);
if (r < 0)
return r;
}
@@ -540,6 +543,10 @@ static int mount_add_extras(Mount *m) {
assert(m);
+ /* Note: this call might be called after we already have been loaded once (and even when it has already been
+ * activated), in case data from /proc/self/mountinfo has changed. This means all code here needs to be ready
+ * to run with an already set up unit. */
+
if (u->fragment_path)
m->from_fragment = true;
@@ -609,28 +616,33 @@ static int mount_load_root_mount(Unit *u) {
static int mount_load(Unit *u) {
Mount *m = MOUNT(u);
- int r;
+ int r, q, w;
assert(u);
assert(u->load_state == UNIT_STUB);
r = mount_load_root_mount(u);
- if (r < 0)
- return r;
if (m->from_proc_self_mountinfo || u->perpetual)
- r = unit_load_fragment_and_dropin_optional(u);
+ q = unit_load_fragment_and_dropin_optional(u);
else
- r = unit_load_fragment_and_dropin(u);
+ q = unit_load_fragment_and_dropin(u);
+
+ /* Add in some extras. Note we do this in all cases (even if we failed to load the unit) when announced by the
+ * kernel, because we need some things to be set up no matter what when the kernel establishes a mount and thus
+ * we need to update the state in our unit to track it. After all, consider that we don't allow changing the
+ * 'slice' field for a unit once it is active. */
+ if (u->load_state == UNIT_LOADED || m->from_proc_self_mountinfo || u->perpetual)
+ w = mount_add_extras(m);
+ else
+ w = 0;
+
if (r < 0)
return r;
-
- /* This is a new unit? Then let's add in some extras */
- if (u->load_state == UNIT_LOADED) {
- r = mount_add_extras(m);
- if (r < 0)
- return r;
- }
+ if (q < 0)
+ return q;
+ if (w < 0)
+ return w;
return mount_verify(m);
}
@@ -639,6 +651,9 @@ static void mount_set_state(Mount *m, MountState state) {
MountState old_state;
assert(m);
+ if (m->state != state)
+ bus_unit_send_pending_change_signal(UNIT(m), false);
+
old_state = m->state;
m->state = state;
@@ -746,11 +761,12 @@ static void mount_dump(Unit *u, FILE *f, const char *prefix) {
static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
- ExecParameters exec_params = {
- .flags = EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN,
- .stdin_fd = -1,
- .stdout_fd = -1,
- .stderr_fd = -1,
+ _cleanup_(exec_params_clear) ExecParameters exec_params = {
+ .flags = EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN,
+ .stdin_fd = -1,
+ .stdout_fd = -1,
+ .stderr_fd = -1,
+ .exec_fd = -1,
};
pid_t pid;
int r;
@@ -767,7 +783,9 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
if (r < 0)
return r;
- unit_set_exec_params(UNIT(m), &exec_params);
+ r = unit_set_exec_params(UNIT(m), &exec_params);
+ if (r < 0)
+ return r;
r = exec_spawn(UNIT(m),
c,
@@ -795,9 +813,7 @@ static void mount_enter_dead(Mount *m, MountResult f) {
if (m->result == MOUNT_SUCCESS)
m->result = f;
- if (m->result != MOUNT_SUCCESS)
- log_unit_warning(UNIT(m), "Failed with result '%s'.", mount_result_to_string(m->result));
-
+ unit_log_result(UNIT(m), m->result == MOUNT_SUCCESS, mount_result_to_string(m->result));
mount_set_state(m, m->result != MOUNT_SUCCESS ? MOUNT_FAILED : MOUNT_DEAD);
m->exec_runtime = exec_runtime_unref(m->exec_runtime, true);
@@ -938,7 +954,6 @@ static void mount_enter_mounting(Mount *m) {
(void) mkdir_p_label(m->where, m->directory_mode);
unit_warn_if_dir_nonempty(UNIT(m), m->where);
-
unit_warn_leftover_processes(UNIT(m));
m->control_command_id = MOUNT_EXEC_MOUNT;
@@ -1042,6 +1057,17 @@ fail:
mount_enter_dead_or_mounted(m, MOUNT_SUCCESS);
}
+static void mount_cycle_clear(Mount *m) {
+ assert(m);
+
+ /* Clear all state we shall forget for this new cycle */
+
+ m->result = MOUNT_SUCCESS;
+ m->reload_result = MOUNT_SUCCESS;
+ exec_command_reset_status_array(m->exec_command, _MOUNT_EXEC_COMMAND_MAX);
+ UNIT(m)->reset_accounting = true;
+}
+
static int mount_start(Unit *u) {
Mount *m = MOUNT(u);
int r;
@@ -1072,12 +1098,9 @@ static int mount_start(Unit *u) {
if (r < 0)
return r;
- m->result = MOUNT_SUCCESS;
- m->reload_result = MOUNT_SUCCESS;
-
- u->reset_accounting = true;
-
+ mount_cycle_clear(m);
mount_enter_mounting(m);
+
return 1;
}
@@ -1138,21 +1161,23 @@ static int mount_serialize(Unit *u, FILE *f, FDSet *fds) {
assert(f);
assert(fds);
- unit_serialize_item(u, f, "state", mount_state_to_string(m->state));
- unit_serialize_item(u, f, "result", mount_result_to_string(m->result));
- unit_serialize_item(u, f, "reload-result", mount_result_to_string(m->reload_result));
+ (void) serialize_item(f, "state", mount_state_to_string(m->state));
+ (void) serialize_item(f, "result", mount_result_to_string(m->result));
+ (void) serialize_item(f, "reload-result", mount_result_to_string(m->reload_result));
+ (void) serialize_item_format(f, "n-retry-umount", "%u", m->n_retry_umount);
if (m->control_pid > 0)
- unit_serialize_item_format(u, f, "control-pid", PID_FMT, m->control_pid);
+ (void) serialize_item_format(f, "control-pid", PID_FMT, m->control_pid);
if (m->control_command_id >= 0)
- unit_serialize_item(u, f, "control-command", mount_exec_command_to_string(m->control_command_id));
+ (void) serialize_item(f, "control-command", mount_exec_command_to_string(m->control_command_id));
return 0;
}
static int mount_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
Mount *m = MOUNT(u);
+ int r;
assert(u);
assert(key);
@@ -1166,6 +1191,7 @@ static int mount_deserialize_item(Unit *u, const char *key, const char *value, F
log_unit_debug(u, "Failed to parse state value: %s", value);
else
m->deserialized_state = state;
+
} else if (streq(key, "result")) {
MountResult f;
@@ -1184,13 +1210,17 @@ static int mount_deserialize_item(Unit *u, const char *key, const char *value, F
else if (f != MOUNT_SUCCESS)
m->reload_result = f;
+ } else if (streq(key, "n-retry-umount")) {
+
+ r = safe_atou(value, &m->n_retry_umount);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse n-retry-umount value: %s", value);
+
} else if (streq(key, "control-pid")) {
- pid_t pid;
- if (parse_pid(value, &pid) < 0)
+ if (parse_pid(value, &m->control_pid) < 0)
log_unit_debug(u, "Failed to parse control-pid value: %s", value);
- else
- m->control_pid = pid;
+
} else if (streq(key, "control-command")) {
MountExecCommand id;
@@ -1265,8 +1295,11 @@ static void mount_sigchld_event(Unit *u, pid_t pid, int code, int status) {
m->control_command_id = _MOUNT_EXEC_COMMAND_INVALID;
}
- log_unit_full(u, f == MOUNT_SUCCESS ? LOG_DEBUG : LOG_NOTICE, 0,
- "Mount process exited, code=%s status=%i", sigchld_code_to_string(code), status);
+ unit_log_process_exit(
+ u, f == MOUNT_SUCCESS ? LOG_DEBUG : LOG_NOTICE,
+ "Mount process",
+ mount_exec_command_to_string(m->control_command_id),
+ code, status);
/* Note that due to the io event priority logic, we can be sure the new mountinfo is loaded
* before we process the SIGCHLD for the mount command. */
@@ -1395,59 +1428,77 @@ static int mount_dispatch_timer(sd_event_source *source, usec_t usec, void *user
return 0;
}
-typedef struct {
- bool is_mounted;
- bool just_mounted;
- bool just_changed;
-} MountSetupFlags;
+static int update_parameters_proc_self_mount_info(
+ Mount *m,
+ const char *what,
+ const char *options,
+ const char *fstype) {
+
+ MountParameters *p;
+ int r, q, w;
+
+ p = &m->parameters_proc_self_mountinfo;
+
+ r = free_and_strdup(&p->what, what);
+ if (r < 0)
+ return r;
+
+ q = free_and_strdup(&p->options, options);
+ if (q < 0)
+ return q;
+
+ w = free_and_strdup(&p->fstype, fstype);
+ if (w < 0)
+ return w;
+
+ return r > 0 || q > 0 || w > 0;
+}
static int mount_setup_new_unit(
- Unit *u,
+ Manager *m,
+ const char *name,
const char *what,
const char *where,
const char *options,
const char *fstype,
- MountSetupFlags *flags) {
-
- MountParameters *p;
+ MountProcFlags *ret_flags,
+ Unit **ret) {
- assert(u);
- assert(flags);
+ _cleanup_(unit_freep) Unit *u = NULL;
+ int r;
- u->source_path = strdup("/proc/self/mountinfo");
- MOUNT(u)->where = strdup(where);
- if (!u->source_path || !MOUNT(u)->where)
- return -ENOMEM;
+ assert(m);
+ assert(name);
+ assert(ret_flags);
+ assert(ret);
- /* Make sure to initialize those fields before mount_is_extrinsic(). */
- MOUNT(u)->from_proc_self_mountinfo = true;
- p = &MOUNT(u)->parameters_proc_self_mountinfo;
+ r = unit_new_for_name(m, sizeof(Mount), name, &u);
+ if (r < 0)
+ return r;
- p->what = strdup(what);
- p->options = strdup(options);
- p->fstype = strdup(fstype);
- if (!p->what || !p->options || !p->fstype)
- return -ENOMEM;
+ r = free_and_strdup(&u->source_path, "/proc/self/mountinfo");
+ if (r < 0)
+ return r;
- if (!mount_is_extrinsic(MOUNT(u))) {
- const char *target;
- int r;
+ r = free_and_strdup(&MOUNT(u)->where, where);
+ if (r < 0)
+ return r;
- target = mount_is_network(p) ? SPECIAL_REMOTE_FS_TARGET : SPECIAL_LOCAL_FS_TARGET;
- r = unit_add_dependency_by_name(u, UNIT_BEFORE, target, NULL, true, UNIT_DEPENDENCY_MOUNTINFO_IMPLICIT);
- if (r < 0)
- return r;
+ r = update_parameters_proc_self_mount_info(MOUNT(u), what, options, fstype);
+ if (r < 0)
+ return r;
- r = unit_add_dependency_by_name(u, UNIT_CONFLICTS, SPECIAL_UMOUNT_TARGET, NULL, true, UNIT_DEPENDENCY_MOUNTINFO_IMPLICIT);
- if (r < 0)
- return r;
- }
+ /* This unit was generated because /proc/self/mountinfo reported it. Remember this, so that by the time we load
+ * the unit file for it (and thus add in extra deps right after) we know what source to attributes the deps
+ * to.*/
+ MOUNT(u)->from_proc_self_mountinfo = true;
+ /* We have only allocated the stub now, let's enqueue this unit for loading now, so that everything else is
+ * loaded in now. */
unit_add_to_load_queue(u);
- flags->is_mounted = true;
- flags->just_mounted = true;
- flags->just_changed = true;
+ *ret_flags = MOUNT_PROC_IS_MOUNTED | MOUNT_PROC_JUST_MOUNTED | MOUNT_PROC_JUST_CHANGED;
+ *ret = TAKE_PTR(u);
return 0;
}
@@ -1457,11 +1508,10 @@ static int mount_setup_existing_unit(
const char *where,
const char *options,
const char *fstype,
- MountSetupFlags *flags) {
+ MountProcFlags *ret_flags) {
- MountParameters *p;
- bool load_extras = false;
- int r1, r2, r3;
+ MountProcFlags flags = MOUNT_PROC_IS_MOUNTED;
+ int r;
assert(u);
assert(flags);
@@ -1472,49 +1522,38 @@ static int mount_setup_existing_unit(
return -ENOMEM;
}
- /* Make sure to initialize those fields before mount_is_extrinsic(). */
- p = &MOUNT(u)->parameters_proc_self_mountinfo;
-
- r1 = free_and_strdup(&p->what, what);
- r2 = free_and_strdup(&p->options, options);
- r3 = free_and_strdup(&p->fstype, fstype);
- if (r1 < 0 || r2 < 0 || r3 < 0)
- return -ENOMEM;
-
- flags->just_changed = r1 > 0 || r2 > 0 || r3 > 0;
- flags->is_mounted = true;
- flags->just_mounted = !MOUNT(u)->from_proc_self_mountinfo || MOUNT(u)->just_mounted;
-
- MOUNT(u)->from_proc_self_mountinfo = true;
+ r = update_parameters_proc_self_mount_info(MOUNT(u), what, options, fstype);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ flags |= MOUNT_PROC_JUST_CHANGED;
- if (!mount_is_extrinsic(MOUNT(u)) && mount_is_network(p)) {
- /* _netdev option may have shown up late, or on a
- * remount. Add remote-fs dependencies, even though
- * local-fs ones may already be there.
- *
- * Note: due to a current limitation (we don't track
- * in the dependency "Set*" objects who created a
- * dependency), we can only add deps, never lose them,
- * until the next full daemon-reload. */
- unit_add_dependency_by_name(u, UNIT_BEFORE, SPECIAL_REMOTE_FS_TARGET, NULL, true, UNIT_DEPENDENCY_MOUNTINFO_IMPLICIT);
- load_extras = true;
+ if (!MOUNT(u)->from_proc_self_mountinfo) {
+ flags |= MOUNT_PROC_JUST_MOUNTED;
+ MOUNT(u)->from_proc_self_mountinfo = true;
}
- if (u->load_state == UNIT_NOT_FOUND) {
+ if (IN_SET(u->load_state, UNIT_NOT_FOUND, UNIT_BAD_SETTING, UNIT_ERROR)) {
+ /* The unit was previously not found or otherwise not loaded. Now that the unit shows up in
+ * /proc/self/mountinfo we should reconsider it this, hence set it to UNIT_LOADED. */
u->load_state = UNIT_LOADED;
u->load_error = 0;
- /* Load in the extras later on, after we
- * finished initialization of the unit */
-
- /* FIXME: since we're going to load the unit later on, why setting load_extras=true ? */
- load_extras = true;
- flags->just_changed = true;
+ flags |= MOUNT_PROC_JUST_CHANGED;
}
- if (load_extras)
- return mount_add_extras(MOUNT(u));
+ if (FLAGS_SET(flags, MOUNT_PROC_JUST_CHANGED)) {
+ /* If things changed, then make sure that all deps are regenerated. Let's
+ * first remove all automatic deps, and then add in the new ones. */
+
+ unit_remove_dependencies(u, UNIT_DEPENDENCY_MOUNTINFO_IMPLICIT);
+ r = mount_add_extras(MOUNT(u));
+ if (r < 0)
+ return r;
+ }
+
+ *ret_flags = flags;
return 0;
}
@@ -1527,7 +1566,7 @@ static int mount_setup_unit(
bool set_flags) {
_cleanup_free_ char *e = NULL;
- MountSetupFlags flags;
+ MountProcFlags flags;
Unit *u;
int r;
@@ -1551,45 +1590,32 @@ static int mount_setup_unit(
r = unit_name_from_path(where, ".mount", &e);
if (r < 0)
- return r;
+ return log_error_errno(r, "Failed to generate unit name from path '%s': %m", where);
u = manager_get_unit(m, e);
- if (!u) {
- /* First time we see this mount point meaning that it's
- * not been initiated by a mount unit but rather by the
- * sysadmin having called mount(8) directly. */
- r = unit_new_for_name(m, sizeof(Mount), e, &u);
- if (r < 0)
- goto fail;
-
- r = mount_setup_new_unit(u, what, where, options, fstype, &flags);
- if (r < 0)
- unit_free(u);
- } else
+ if (u)
r = mount_setup_existing_unit(u, what, where, options, fstype, &flags);
-
+ else
+ /* First time we see this mount point meaning that it's not been initiated by a mount unit but rather
+ * by the sysadmin having called mount(8) directly. */
+ r = mount_setup_new_unit(m, e, what, where, options, fstype, &flags, &u);
if (r < 0)
- goto fail;
+ return log_warning_errno(r, "Failed to set up mount unit: %m");
- if (set_flags) {
- MOUNT(u)->is_mounted = flags.is_mounted;
- MOUNT(u)->just_mounted = flags.just_mounted;
- MOUNT(u)->just_changed = flags.just_changed;
- }
-
- if (flags.just_changed)
+ /* If the mount changed properties or state, let's notify our clients */
+ if (flags & (MOUNT_PROC_JUST_CHANGED|MOUNT_PROC_JUST_MOUNTED))
unit_add_to_dbus_queue(u);
+ if (set_flags)
+ MOUNT(u)->proc_flags = flags;
+
return 0;
-fail:
- log_warning_errno(r, "Failed to set up mount unit: %m");
- return r;
}
static int mount_load_proc_self_mountinfo(Manager *m, bool set_flags) {
_cleanup_(mnt_free_tablep) struct libmnt_table *t = NULL;
_cleanup_(mnt_free_iterp) struct libmnt_iter *i = NULL;
- int r = 0;
+ int r;
assert(m);
@@ -1602,7 +1628,6 @@ static int mount_load_proc_self_mountinfo(Manager *m, bool set_flags) {
if (r < 0)
return log_error_errno(r, "Failed to parse /proc/self/mountinfo: %m");
- r = 0;
for (;;) {
struct libmnt_fs *fs;
const char *device, *path, *options, *fstype;
@@ -1631,12 +1656,10 @@ static int mount_load_proc_self_mountinfo(Manager *m, bool set_flags) {
device_found_node(m, d, DEVICE_FOUND_MOUNT, DEVICE_FOUND_MOUNT);
- k = mount_setup_unit(m, d, p, options, fstype, set_flags);
- if (r == 0 && k < 0)
- r = k;
+ (void) mount_setup_unit(m, d, p, options, fstype, set_flags);
}
- return r;
+ return 0;
}
static void mount_shutdown(Manager *m) {
@@ -1694,7 +1717,7 @@ static void mount_enumerate_perpetual(Manager *m) {
static bool mount_is_mounted(Mount *m) {
assert(m);
- return UNIT(m)->perpetual || m->is_mounted;
+ return UNIT(m)->perpetual || FLAGS_SET(m->proc_flags, MOUNT_PROC_IS_MOUNTED);
}
static void mount_enumerate(Manager *m) {
@@ -1758,7 +1781,7 @@ fail:
}
static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
- _cleanup_set_free_ Set *around = NULL, *gone = NULL;
+ _cleanup_set_free_free_ Set *around = NULL, *gone = NULL;
Manager *m = userdata;
const char *what;
Iterator i;
@@ -1783,7 +1806,7 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
if (r == 0)
rescan = true;
else if (r < 0)
- return log_error_errno(r, "Failed to drain libmount events");
+ return log_error_errno(r, "Failed to drain libmount events: %m");
} while (r == 0);
log_debug("libmount event [rescan: %s]", yes_no(rescan));
@@ -1794,11 +1817,8 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
r = mount_load_proc_self_mountinfo(m, true);
if (r < 0) {
/* Reset flags, just in case, for later calls */
- LIST_FOREACH(units_by_type, u, m->units_by_type[UNIT_MOUNT]) {
- Mount *mount = MOUNT(u);
-
- mount->is_mounted = mount->just_mounted = mount->just_changed = false;
- }
+ LIST_FOREACH(units_by_type, u, m->units_by_type[UNIT_MOUNT])
+ MOUNT(u)->proc_flags = 0;
return 0;
}
@@ -1819,7 +1839,7 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
/* Remember that this device might just have disappeared */
if (set_ensure_allocated(&gone, &path_hash_ops) < 0 ||
- set_put(gone, mount->parameters_proc_self_mountinfo.what) < 0)
+ set_put_strdup(gone, mount->parameters_proc_self_mountinfo.what) < 0)
log_oom(); /* we don't care too much about OOM here... */
}
@@ -1828,10 +1848,7 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
switch (mount->state) {
case MOUNT_MOUNTED:
- /* This has just been unmounted by
- * somebody else, follow the state
- * change. */
- mount->result = MOUNT_SUCCESS; /* make sure we forget any earlier umount failures */
+ /* This has just been unmounted by somebody else, follow the state change. */
mount_enter_dead(mount, MOUNT_SUCCESS);
break;
@@ -1839,7 +1856,7 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
break;
}
- } else if (mount->just_mounted || mount->just_changed) {
+ } else if (mount->proc_flags & (MOUNT_PROC_JUST_MOUNTED|MOUNT_PROC_JUST_CHANGED)) {
/* A mount point was added or changed */
@@ -1850,7 +1867,8 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
/* This has just been mounted by somebody else, follow the state change, but let's
* generate a new invocation ID for this implicitly and automatically. */
- (void) unit_acquire_invocation_id(UNIT(mount));
+ (void) unit_acquire_invocation_id(u);
+ mount_cycle_clear(mount);
mount_enter_mounted(mount, MOUNT_SUCCESS);
break;
@@ -1872,14 +1890,15 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
if (mount_is_mounted(mount) &&
mount->from_proc_self_mountinfo &&
mount->parameters_proc_self_mountinfo.what) {
+ /* Track devices currently used */
if (set_ensure_allocated(&around, &path_hash_ops) < 0 ||
- set_put(around, mount->parameters_proc_self_mountinfo.what) < 0)
+ set_put_strdup(around, mount->parameters_proc_self_mountinfo.what) < 0)
log_oom();
}
/* Reset the flags for later calls */
- mount->is_mounted = mount->just_mounted = mount->just_changed = false;
+ mount->proc_flags = 0;
}
SET_FOREACH(what, gone, i) {
diff --git a/src/core/mount.h b/src/core/mount.h
index 67ab8ecf93..2e59f1fe04 100644
--- a/src/core/mount.h
+++ b/src/core/mount.h
@@ -34,6 +34,13 @@ typedef struct MountParameters {
char *fstype;
} MountParameters;
+/* Used while looking for mount points that vanished or got added from/to /proc/self/mountinfo */
+typedef enum MountProcFlags {
+ MOUNT_PROC_IS_MOUNTED = 1 << 0,
+ MOUNT_PROC_JUST_MOUNTED = 1 << 1,
+ MOUNT_PROC_JUST_CHANGED = 1 << 2,
+} MountProcFlags;
+
struct Mount {
Unit meta;
@@ -45,11 +52,7 @@ struct Mount {
bool from_proc_self_mountinfo:1;
bool from_fragment:1;
- /* Used while looking for mount points that vanished or got
- * added from/to /proc/self/mountinfo */
- bool is_mounted:1;
- bool just_mounted:1;
- bool just_changed:1;
+ MountProcFlags proc_flags;
bool sloppy_options;
diff --git a/src/core/namespace.c b/src/core/namespace.c
index e4930db15c..c2ca3e0334 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -20,6 +20,7 @@
#include "missing.h"
#include "mkdir.h"
#include "mount-util.h"
+#include "mountpoint-util.h"
#include "namespace.h"
#include "path-util.h"
#include "selinux-util.h"
@@ -236,7 +237,8 @@ static int append_access_mounts(MountEntry **p, char **strv, MountMode mode, boo
}
if (!path_is_absolute(e))
- return -EINVAL;
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Path is not absolute: %s", e);
*((*p)++) = (MountEntry) {
.path_const = e,
@@ -263,7 +265,6 @@ static int append_empty_dir_mounts(MountEntry **p, char **strv) {
.path_const = *i,
.mode = EMPTY_DIR,
.ignore = false,
- .has_prefix = false,
.read_only = true,
.options_const = "mode=755",
.flags = MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
@@ -302,35 +303,33 @@ static int append_tmpfs_mounts(MountEntry **p, const TemporaryFileSystem *tmpfs,
for (i = 0; i < n; i++) {
const TemporaryFileSystem *t = tmpfs + i;
_cleanup_free_ char *o = NULL, *str = NULL;
- unsigned long flags = MS_NODEV|MS_STRICTATIME;
+ unsigned long flags;
bool ro = false;
if (!path_is_absolute(t->path))
- return -EINVAL;
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Path is not absolute: %s",
+ t->path);
- if (!isempty(t->options)) {
- str = strjoin("mode=0755,", t->options);
- if (!str)
- return -ENOMEM;
+ str = strjoin("mode=0755,", t->options);
+ if (!str)
+ return -ENOMEM;
- r = mount_option_mangle(str, MS_NODEV|MS_STRICTATIME, &flags, &o);
- if (r < 0)
- return r;
+ r = mount_option_mangle(str, MS_NODEV|MS_STRICTATIME, &flags, &o);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse mount option '%s': %m", str);
- ro = flags & MS_RDONLY;
- if (ro)
- flags ^= MS_RDONLY;
- }
+ ro = flags & MS_RDONLY;
+ if (ro)
+ flags ^= MS_RDONLY;
*((*p)++) = (MountEntry) {
.path_const = t->path,
.mode = TMPFS,
.read_only = ro,
- .options_malloc = o,
+ .options_malloc = TAKE_PTR(o),
.flags = flags,
};
-
- o = NULL;
}
return 0;
@@ -398,32 +397,22 @@ static int append_protect_system(MountEntry **p, ProtectSystem protect_system, b
}
}
-static int mount_path_compare(const void *a, const void *b) {
- const MountEntry *p = a, *q = b;
+static int mount_path_compare(const MountEntry *a, const MountEntry *b) {
int d;
/* If the paths are not equal, then order prefixes first */
- d = path_compare(mount_entry_path(p), mount_entry_path(q));
+ d = path_compare(mount_entry_path(a), mount_entry_path(b));
if (d != 0)
return d;
/* If the paths are equal, check the mode */
- if (p->mode < q->mode)
- return -1;
- if (p->mode > q->mode)
- return 1;
-
- return 0;
+ return CMP((int) a->mode, (int) b->mode);
}
static int prefix_where_needed(MountEntry *m, size_t n, const char *root_directory) {
size_t i;
- /* Prefixes all paths in the bind mount table with the root directory if it is specified and the entry needs
- * that. */
-
- if (!root_directory)
- return 0;
+ /* Prefixes all paths in the bind mount table with the root directory if the entry needs that. */
for (i = 0; i < n; i++) {
char *s;
@@ -566,36 +555,44 @@ static void drop_outside_root(const char *root_directory, MountEntry *m, size_t
*n = t - m;
}
-static int clone_device_node(const char *d, const char *temporary_mount, bool *make_devnode) {
- const char *dn;
+static int clone_device_node(
+ const char *d,
+ const char *temporary_mount,
+ bool *make_devnode) {
+
+ _cleanup_free_ char *sl = NULL;
+ const char *dn, *bn, *t;
struct stat st;
int r;
if (stat(d, &st) < 0) {
- if (errno == ENOENT)
+ if (errno == ENOENT) {
+ log_debug_errno(errno, "Device node '%s' to clone does not exist, ignoring.", d);
return -ENXIO;
- return -errno;
+ }
+
+ return log_debug_errno(errno, "Failed to stat() device node '%s' to clone, ignoring: %m", d);
}
if (!S_ISBLK(st.st_mode) &&
!S_ISCHR(st.st_mode))
- return -EINVAL;
-
- if (st.st_rdev == 0)
- return -ENXIO;
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Device node '%s' to clone is not a device node, ignoring.",
+ d);
dn = strjoina(temporary_mount, d);
+ /* First, try to create device node properly */
if (*make_devnode) {
mac_selinux_create_file_prepare(d, st.st_mode);
r = mknod(dn, st.st_mode, st.st_rdev);
mac_selinux_create_file_clear();
-
- if (r == 0)
- return 0;
+ if (r >= 0)
+ goto add_symlink;
if (errno != EPERM)
return log_debug_errno(errno, "mknod failed for %s: %m", d);
+ /* This didn't work, let's not try this again for the next iterations. */
*make_devnode = false;
}
@@ -604,9 +601,8 @@ static int clone_device_node(const char *d, const char *temporary_mount, bool *m
mac_selinux_create_file_prepare(d, 0);
r = mknod(dn, S_IFREG, 0);
mac_selinux_create_file_clear();
-
if (r < 0 && errno != EEXIST)
- return log_debug_errno(errno, "mknod fallback failed for %s: %m", d);
+ return log_debug_errno(errno, "mknod() fallback failed for '%s': %m", d);
/* Fallback to bind-mounting:
* The assumption here is that all used device nodes carry standard
@@ -614,7 +610,23 @@ static int clone_device_node(const char *d, const char *temporary_mount, bool *m
* either be owned by root:root or root:tty (e.g. /dev/tty, /dev/ptmx)
* and should not carry ACLs. */
if (mount(d, dn, NULL, MS_BIND, NULL) < 0)
- return log_debug_errno(errno, "mount failed for %s: %m", d);
+ return log_debug_errno(errno, "Bind mounting failed for '%s': %m", d);
+
+add_symlink:
+ bn = path_startswith(d, "/dev/");
+ if (!bn)
+ return 0;
+
+ /* Create symlinks like /dev/char/1:9 → ../urandom */
+ if (asprintf(&sl, "%s/dev/%s/%u:%u", temporary_mount, S_ISCHR(st.st_mode) ? "char" : "block", major(st.st_rdev), minor(st.st_rdev)) < 0)
+ return log_oom();
+
+ (void) mkdir_parents(sl, 0755);
+
+ t = strjoina("../", bn);
+
+ if (symlink(t, sl) < 0)
+ log_debug_errno(errno, "Failed to symlink '%s' to '%s', ignoring: %m", t, sl);
return 0;
}
@@ -639,35 +651,34 @@ static int mount_private_dev(MountEntry *m) {
u = umask(0000);
if (!mkdtemp(temporary_mount))
- return -errno;
+ return log_debug_errno(errno, "Failed to create temporary directory '%s': %m", temporary_mount);
dev = strjoina(temporary_mount, "/dev");
(void) mkdir(dev, 0755);
if (mount("tmpfs", dev, "tmpfs", DEV_MOUNT_OPTIONS, "mode=755") < 0) {
- r = -errno;
+ r = log_debug_errno(errno, "Failed to mount tmpfs on '%s': %m", dev);
goto fail;
}
devpts = strjoina(temporary_mount, "/dev/pts");
(void) mkdir(devpts, 0755);
if (mount("/dev/pts", devpts, NULL, MS_BIND, NULL) < 0) {
- r = -errno;
+ r = log_debug_errno(errno, "Failed to bind mount /dev/pts on '%s': %m", devpts);
goto fail;
}
- /* /dev/ptmx can either be a device node or a symlink to /dev/pts/ptmx
- * when /dev/ptmx a device node, /dev/pts/ptmx has 000 permissions making it inaccessible
- * thus, in that case make a clone
- *
- * in nspawn and other containers it will be a symlink, in that case make it a symlink
- */
+ /* /dev/ptmx can either be a device node or a symlink to /dev/pts/ptmx.
+ * When /dev/ptmx a device node, /dev/pts/ptmx has 000 permissions making it inaccessible.
+ * Thus, in that case make a clone.
+ * In nspawn and other containers it will be a symlink, in that case make it a symlink. */
r = is_symlink("/dev/ptmx");
- if (r < 0)
+ if (r < 0) {
+ log_debug_errno(r, "Failed to detect whether /dev/ptmx is a symlink or not: %m");
goto fail;
- if (r > 0) {
+ } else if (r > 0) {
devptmx = strjoina(temporary_mount, "/dev/ptmx");
if (symlink("pts/ptmx", devptmx) < 0) {
- r = -errno;
+ r = log_debug_errno(errno, "Failed to create a symlink '%s' to pts/ptmx: %m", devptmx);
goto fail;
}
} else {
@@ -680,20 +691,23 @@ static int mount_private_dev(MountEntry *m) {
(void) mkdir(devshm, 0755);
r = mount("/dev/shm", devshm, NULL, MS_BIND, NULL);
if (r < 0) {
- r = -errno;
+ r = log_debug_errno(errno, "Failed to bind mount /dev/shm on '%s': %m", devshm);
goto fail;
}
devmqueue = strjoina(temporary_mount, "/dev/mqueue");
(void) mkdir(devmqueue, 0755);
- (void) mount("/dev/mqueue", devmqueue, NULL, MS_BIND, NULL);
+ if (mount("/dev/mqueue", devmqueue, NULL, MS_BIND, NULL) < 0)
+ log_debug_errno(errno, "Failed to bind mount /dev/mqueue on '%s', ignoring: %m", devmqueue);
devhugepages = strjoina(temporary_mount, "/dev/hugepages");
(void) mkdir(devhugepages, 0755);
- (void) mount("/dev/hugepages", devhugepages, NULL, MS_BIND, NULL);
+ if (mount("/dev/hugepages", devhugepages, NULL, MS_BIND, NULL) < 0)
+ log_debug_errno(errno, "Failed to bind mount /dev/hugepages on '%s', ignoring: %m", devhugepages);
devlog = strjoina(temporary_mount, "/dev/log");
- (void) symlink("/run/systemd/journal/dev-log", devlog);
+ if (symlink("/run/systemd/journal/dev-log", devlog) < 0)
+ log_debug_errno(errno, "Failed to create a symlink '%s' to /run/systemd/journal/dev-log, ignoring: %m", devlog);
NULSTR_FOREACH(d, devnodes) {
r = clone_device_node(d, temporary_mount, &can_mknod);
@@ -702,7 +716,9 @@ static int mount_private_dev(MountEntry *m) {
goto fail;
}
- dev_setup(temporary_mount, UID_INVALID, GID_INVALID);
+ r = dev_setup(temporary_mount, UID_INVALID, GID_INVALID);
+ if (r < 0)
+ log_debug_errno(r, "Failed to setup basic device tree at '%s', ignoring: %m", temporary_mount);
/* Create the /dev directory if missing. It is more likely to be
* missing when the service is started with RootDirectory. This is
@@ -711,9 +727,12 @@ static int mount_private_dev(MountEntry *m) {
(void) mkdir_p_label(mount_entry_path(m), 0755);
/* Unmount everything in old /dev */
- umount_recursive(mount_entry_path(m), 0);
+ r = umount_recursive(mount_entry_path(m), 0);
+ if (r < 0)
+ log_debug_errno(r, "Failed to unmount directories below '%s', ignoring: %m", mount_entry_path(m));
+
if (mount(dev, mount_entry_path(m), NULL, MS_MOVE, NULL) < 0) {
- r = -errno;
+ r = log_debug_errno(errno, "Failed to move mount point '%s' to '%s': %m", dev, mount_entry_path(m));
goto fail;
}
@@ -836,10 +855,10 @@ static int follow_symlink(
if (r > 0) /* Reached the end, nothing more to resolve */
return 1;
- if (m->n_followed >= CHASE_SYMLINKS_MAX) { /* put a boundary on things */
- log_debug("Symlink loop on '%s'.", mount_entry_path(m));
- return -ELOOP;
- }
+ if (m->n_followed >= CHASE_SYMLINKS_MAX) /* put a boundary on things */
+ return log_debug_errno(SYNTHETIC_ERRNO(ELOOP),
+ "Symlink loop on '%s'.",
+ mount_entry_path(m));
log_debug("Followed mount entry path symlink %s → %s.", mount_entry_path(m), target);
@@ -881,10 +900,9 @@ static int apply_mount(
}
what = mode_to_inaccessible_node(target.st_mode);
- if (!what) {
- log_debug("File type not supported for inaccessible mounts. Note that symlinks are not allowed");
- return -ELOOP;
- }
+ if (!what)
+ return log_debug_errno(SYNTHETIC_ERRNO(ELOOP),
+ "File type not supported for inaccessible mounts. Note that symlinks are not allowed");
break;
}
@@ -999,7 +1017,17 @@ static int apply_mount(
return 0;
}
+/* Change the per-mount readonly flag on an existing mount */
+static int remount_bind_readonly(const char *path, unsigned long orig_flags) {
+ int r;
+
+ r = mount(NULL, path, NULL, MS_REMOUNT | MS_BIND | MS_RDONLY | orig_flags, NULL);
+
+ return r < 0 ? -errno : 0;
+}
+
static int make_read_only(const MountEntry *m, char **blacklist, FILE *proc_self_mountinfo) {
+ bool submounts = false;
int r = 0;
assert(m);
@@ -1007,15 +1035,15 @@ static int make_read_only(const MountEntry *m, char **blacklist, FILE *proc_self
if (mount_entry_read_only(m)) {
if (IN_SET(m->mode, EMPTY_DIR, TMPFS)) {
- /* Make superblock readonly */
- if (mount(NULL, mount_entry_path(m), NULL, MS_REMOUNT | MS_RDONLY | m->flags, mount_entry_options(m)) < 0)
- r = -errno;
- } else
+ r = remount_bind_readonly(mount_entry_path(m), m->flags);
+ } else {
+ submounts = true;
r = bind_remount_recursive_with_mountinfo(mount_entry_path(m), true, blacklist, proc_self_mountinfo);
+ }
} else if (m->mode == PRIVATE_DEV) {
- /* Superblock can be readonly but the submounts can't */
- if (mount(NULL, mount_entry_path(m), NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0)
- r = -errno;
+ /* Set /dev readonly, but not submounts like /dev/shm. Also, we only set the per-mount read-only flag.
+ * We can't set it on the superblock, if we are inside a user namespace and running Linux <= 4.17. */
+ r = remount_bind_readonly(mount_entry_path(m), DEV_MOUNT_OPTIONS);
} else
return 0;
@@ -1026,27 +1054,28 @@ static int make_read_only(const MountEntry *m, char **blacklist, FILE *proc_self
if (r == -ENOENT && m->ignore)
r = 0;
- return r;
+ if (r < 0)
+ return log_debug_errno(r, "Failed to re-mount '%s'%s read-only: %m", mount_entry_path(m),
+ submounts ? " and its submounts" : "");
+
+ return 0;
}
-static bool namespace_info_mount_apivfs(const char *root_directory, const NamespaceInfo *ns_info) {
+static bool namespace_info_mount_apivfs(const NamespaceInfo *ns_info) {
assert(ns_info);
/*
* ProtectControlGroups= and ProtectKernelTunables= imply MountAPIVFS=,
* since to protect the API VFS mounts, they need to be around in the
- * first place... and RootDirectory= or RootImage= need to be set.
+ * first place...
*/
- /* root_directory should point to a mount point */
- return root_directory &&
- (ns_info->mount_apivfs ||
- ns_info->protect_control_groups ||
- ns_info->protect_kernel_tunables);
+ return ns_info->mount_apivfs ||
+ ns_info->protect_control_groups ||
+ ns_info->protect_kernel_tunables;
}
static size_t namespace_calculate_mounts(
- const char* root_directory,
const NamespaceInfo *ns_info,
char** read_write_paths,
char** read_only_paths,
@@ -1088,14 +1117,15 @@ static size_t namespace_calculate_mounts(
(ns_info->protect_control_groups ? 1 : 0) +
(ns_info->protect_kernel_modules ? ELEMENTSOF(protect_kernel_modules_table) : 0) +
protect_home_cnt + protect_system_cnt +
- (namespace_info_mount_apivfs(root_directory, ns_info) ? ELEMENTSOF(apivfs_table) : 0);
+ (namespace_info_mount_apivfs(ns_info) ? ELEMENTSOF(apivfs_table) : 0);
}
static void normalize_mounts(const char *root_directory, MountEntry *mounts, size_t *n_mounts) {
+ assert(root_directory);
assert(n_mounts);
assert(mounts || *n_mounts == 0);
- qsort_safe(mounts, *n_mounts, sizeof(MountEntry), mount_path_compare);
+ typesafe_qsort(mounts, *n_mounts, mount_path_compare);
drop_duplicates(mounts, n_mounts);
drop_outside_root(root_directory, mounts, n_mounts);
@@ -1127,11 +1157,9 @@ int setup_namespace(
_cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
_cleanup_free_ void *root_hash = NULL;
MountEntry *m, *mounts = NULL;
- size_t root_hash_size = 0;
- const char *root;
- size_t n_mounts;
- bool make_slave;
+ size_t n_mounts, root_hash_size = 0;
bool require_prefix = false;
+ const char *root;
int r = 0;
assert(ns_info);
@@ -1151,19 +1179,19 @@ int setup_namespace(
dissect_image_flags & DISSECT_IMAGE_READ_ONLY ? O_RDONLY : O_RDWR,
&loop_device);
if (r < 0)
- return r;
+ return log_debug_errno(r, "Failed to create loop device for root image: %m");
r = root_hash_load(root_image, &root_hash, &root_hash_size);
if (r < 0)
- return r;
+ return log_debug_errno(r, "Failed to load root hash: %m");
r = dissect_image(loop_device->fd, root_hash, root_hash_size, dissect_image_flags, &dissected_image);
if (r < 0)
- return r;
+ return log_debug_errno(r, "Failed to dissect image: %m");
r = dissected_image_decrypt(dissected_image, NULL, root_hash, root_hash_size, dissect_image_flags, &decrypted_image);
if (r < 0)
- return r;
+ return log_debug_errno(r, "Failed to decrypt dissected image: %m");
}
if (root_directory)
@@ -1181,7 +1209,6 @@ int setup_namespace(
}
n_mounts = namespace_calculate_mounts(
- root,
ns_info,
read_write_paths,
read_only_paths,
@@ -1192,9 +1219,6 @@ int setup_namespace(
tmp_dir, var_tmp_dir,
protect_home, protect_system);
- /* Set mount slave mode */
- make_slave = root || n_mounts > 0 || ns_info->private_mounts;
-
if (n_mounts > 0) {
m = mounts = (MountEntry *) alloca0(n_mounts * sizeof(MountEntry));
r = append_access_mounts(&m, read_write_paths, READWRITE, require_prefix);
@@ -1271,7 +1295,7 @@ int setup_namespace(
if (r < 0)
goto finish;
- if (namespace_info_mount_apivfs(root, ns_info)) {
+ if (namespace_info_mount_apivfs(ns_info)) {
r = append_static_mounts(&m, apivfs_table, ELEMENTSOF(apivfs_table), ns_info->ignore_protect_paths);
if (r < 0)
goto finish;
@@ -1284,33 +1308,44 @@ int setup_namespace(
if (r < 0)
goto finish;
- normalize_mounts(root_directory, mounts, &n_mounts);
+ normalize_mounts(root, mounts, &n_mounts);
}
+ /* All above is just preparation, figuring out what to do. Let's now actually start doing something. */
+
if (unshare(CLONE_NEWNS) < 0) {
- r = -errno;
+ r = log_debug_errno(errno, "Failed to unshare the mount namespace: %m");
+ if (IN_SET(r, -EACCES, -EPERM, -EOPNOTSUPP, -ENOSYS))
+ /* If the kernel doesn't support namespaces, or when there's a MAC or seccomp filter in place
+ * that doesn't allow us to create namespaces (or a missing cap), then propagate a recognizable
+ * error back, which the caller can use to detect this case (and only this) and optionally
+ * continue without namespacing applied. */
+ r = -ENOANO;
+
goto finish;
}
- if (make_slave) {
- /* Remount / as SLAVE so that nothing now mounted in the namespace
- shows up in the parent */
- if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
- r = -errno;
- goto finish;
- }
+ /* Remount / as SLAVE so that nothing now mounted in the namespace
+ * shows up in the parent */
+ if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
+ r = log_debug_errno(errno, "Failed to remount '/' as SLAVE: %m");
+ goto finish;
}
if (root_image) {
/* A root image is specified, mount it to the right place */
r = dissected_image_mount(dissected_image, root, UID_INVALID, dissect_image_flags);
- if (r < 0)
+ if (r < 0) {
+ log_debug_errno(r, "Failed to mount root image: %m");
goto finish;
+ }
if (decrypted_image) {
r = decrypted_image_relinquish(decrypted_image);
- if (r < 0)
+ if (r < 0) {
+ log_debug_errno(r, "Failed to relinquish decrypted image: %m");
goto finish;
+ }
}
loop_device_relinquish(loop_device);
@@ -1319,20 +1354,22 @@ int setup_namespace(
/* A root directory is specified. Turn its directory into bind mount, if it isn't one yet. */
r = path_is_mount_point(root, NULL, AT_SYMLINK_FOLLOW);
- if (r < 0)
+ if (r < 0) {
+ log_debug_errno(r, "Failed to detect that %s is a mount point or not: %m", root);
goto finish;
+ }
if (r == 0) {
if (mount(root, root, NULL, MS_BIND|MS_REC, NULL) < 0) {
- r = -errno;
+ r = log_debug_errno(errno, "Failed to bind mount '%s': %m", root);
goto finish;
}
}
- } else if (root) {
+ } else {
/* Let's mount the main root directory to the root directory to use */
if (mount("/", root, NULL, MS_BIND|MS_REC, NULL) < 0) {
- r = -errno;
+ r = log_debug_errno(errno, "Failed to bind mount '/' on '%s': %m", root);
goto finish;
}
}
@@ -1350,7 +1387,7 @@ int setup_namespace(
* For example, this is the case with the option: 'InaccessiblePaths=/proc' */
proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
if (!proc_self_mountinfo) {
- r = -errno;
+ r = log_debug_errno(errno, "Failed to open /proc/self/mountinfo: %m");
goto finish;
}
@@ -1385,7 +1422,7 @@ int setup_namespace(
if (!again)
break;
- normalize_mounts(root_directory, mounts, &n_mounts);
+ normalize_mounts(root, mounts, &n_mounts);
}
/* Create a blacklist we can pass to bind_mount_recursive() */
@@ -1402,18 +1439,18 @@ int setup_namespace(
}
}
- if (root) {
- /* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */
- r = mount_move_root(root);
- if (r < 0)
- goto finish;
+ /* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */
+ r = mount_move_root(root);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to mount root with MS_MOVE: %m");
+ goto finish;
}
/* Remount / as the desired mode. Note that this will not
* reestablish propagation from our side to the host, since
* what's disconnected is disconnected. */
if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) {
- r = -errno;
+ r = log_debug_errno(errno, "Failed to remount '/' with desired mount flags: %m");
goto finish;
}
diff --git a/src/core/path.c b/src/core/path.c
index 68b13b610a..831e49df29 100644
--- a/src/core/path.c
+++ b/src/core/path.c
@@ -8,12 +8,14 @@
#include "bus-error.h"
#include "bus-util.h"
#include "dbus-path.h"
+#include "dbus-unit.h"
#include "fd-util.h"
#include "fs-util.h"
#include "glob-util.h"
#include "macro.h"
#include "mkdir.h"
#include "path.h"
+#include "serialize.h"
#include "special.h"
#include "stat-util.h"
#include "string-table.h"
@@ -145,10 +147,9 @@ int path_spec_fd_event(PathSpec *s, uint32_t revents) {
ssize_t l;
int r = 0;
- if (revents != EPOLLIN) {
- log_error("Got invalid poll event on inotify.");
- return -EINVAL;
- }
+ if (revents != EPOLLIN)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Got invalid poll event on inotify.");
l = read(s->inotify_fd, &buffer, sizeof(buffer));
if (l < 0) {
@@ -298,17 +299,17 @@ static int path_add_default_dependencies(Path *p) {
if (!UNIT(p)->default_dependencies)
return 0;
- r = unit_add_dependency_by_name(UNIT(p), UNIT_BEFORE, SPECIAL_PATHS_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT);
+ r = unit_add_dependency_by_name(UNIT(p), UNIT_BEFORE, SPECIAL_PATHS_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
if (r < 0)
return r;
if (MANAGER_IS_SYSTEM(UNIT(p)->manager)) {
- r = unit_add_two_dependencies_by_name(UNIT(p), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT);
+ r = unit_add_two_dependencies_by_name(UNIT(p), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
if (r < 0)
return r;
}
- return unit_add_two_dependencies_by_name(UNIT(p), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT);
+ return unit_add_two_dependencies_by_name(UNIT(p), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
}
static int path_add_trigger_dependencies(Path *p) {
@@ -410,6 +411,9 @@ static void path_set_state(Path *p, PathState state) {
PathState old_state;
assert(p);
+ if (p->state != state)
+ bus_unit_send_pending_change_signal(UNIT(p), false);
+
old_state = p->state;
p->state = state;
@@ -448,9 +452,7 @@ static void path_enter_dead(Path *p, PathResult f) {
if (p->result == PATH_SUCCESS)
p->result = f;
- if (p->result != PATH_SUCCESS)
- log_unit_warning(UNIT(p), "Failed with result '%s'.", path_result_to_string(p->result));
-
+ unit_log_result(UNIT(p), p->result == PATH_SUCCESS, path_result_to_string(p->result));
path_set_state(p, p->result != PATH_SUCCESS ? PATH_FAILED : PATH_DEAD);
}
@@ -600,8 +602,8 @@ static int path_serialize(Unit *u, FILE *f, FDSet *fds) {
assert(f);
assert(fds);
- unit_serialize_item(u, f, "state", path_state_to_string(p->state));
- unit_serialize_item(u, f, "result", path_result_to_string(p->result));
+ (void) serialize_item(f, "state", path_state_to_string(p->state));
+ (void) serialize_item(f, "result", path_result_to_string(p->result));
return 0;
}
diff --git a/src/core/scope.c b/src/core/scope.c
index 751556fecf..e478661f94 100644
--- a/src/core/scope.c
+++ b/src/core/scope.c
@@ -5,9 +5,11 @@
#include "alloc-util.h"
#include "dbus-scope.h"
+#include "dbus-unit.h"
#include "load-dropin.h"
#include "log.h"
#include "scope.h"
+#include "serialize.h"
#include "special.h"
#include "string-table.h"
#include "string-util.h"
@@ -81,6 +83,9 @@ static void scope_set_state(Scope *s, ScopeState state) {
ScopeState old_state;
assert(s);
+ if (s->state != state)
+ bus_unit_send_pending_change_signal(UNIT(s), false);
+
old_state = s->state;
s->state = state;
@@ -110,7 +115,7 @@ static int scope_add_default_dependencies(Scope *s) {
r = unit_add_two_dependencies_by_name(
UNIT(s),
UNIT_BEFORE, UNIT_CONFLICTS,
- SPECIAL_SHUTDOWN_TARGET, NULL, true,
+ SPECIAL_SHUTDOWN_TARGET, true,
UNIT_DEPENDENCY_DEFAULT);
if (r < 0)
return r;
@@ -239,9 +244,7 @@ static void scope_enter_dead(Scope *s, ScopeResult f) {
if (s->result == SCOPE_SUCCESS)
s->result = f;
- if (s->result != SCOPE_SUCCESS)
- log_unit_warning(UNIT(s), "Failed with result '%s'.", scope_result_to_string(s->result));
-
+ unit_log_result(UNIT(s), s->result == SCOPE_SUCCESS, scope_result_to_string(s->result));
scope_set_state(s, s->result != SCOPE_SUCCESS ? SCOPE_FAILED : SCOPE_DEAD);
}
@@ -402,11 +405,11 @@ static int scope_serialize(Unit *u, FILE *f, FDSet *fds) {
assert(f);
assert(fds);
- unit_serialize_item(u, f, "state", scope_state_to_string(s->state));
- unit_serialize_item(u, f, "was-abandoned", yes_no(s->was_abandoned));
+ (void) serialize_item(f, "state", scope_state_to_string(s->state));
+ (void) serialize_bool(f, "was-abandoned", s->was_abandoned);
if (s->controller)
- unit_serialize_item(u, f, "controller", s->controller);
+ (void) serialize_item(f, "controller", s->controller);
return 0;
}
@@ -441,7 +444,7 @@ static int scope_deserialize_item(Unit *u, const char *key, const char *value, F
r = free_and_strdup(&s->controller, value);
if (r < 0)
- log_oom();
+ return log_oom();
} else
log_unit_debug(u, "Unknown serialization key: %s", key);
diff --git a/src/core/selinux-access.c b/src/core/selinux-access.c
index 39e994afd7..0c6d885b8c 100644
--- a/src/core/selinux-access.c
+++ b/src/core/selinux-access.c
@@ -1,7 +1,4 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
-/***
- Copyright © 2012 Dan Walsh
-***/
#include "selinux-access.h"
diff --git a/src/core/selinux-access.h b/src/core/selinux-access.h
index 59f2e60c77..1e75930f57 100644
--- a/src/core/selinux-access.h
+++ b/src/core/selinux-access.h
@@ -1,10 +1,6 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#pragma once
-/***
- Copyright © 2012 Dan Walsh
-***/
-
#include "sd-bus.h"
#include "bus-util.h"
diff --git a/src/core/service.c b/src/core/service.c
index db1356c417..cfa3271232 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -12,6 +12,7 @@
#include "bus-kernel.h"
#include "bus-util.h"
#include "dbus-service.h"
+#include "dbus-unit.h"
#include "def.h"
#include "env-util.h"
#include "escape.h"
@@ -27,6 +28,7 @@
#include "parse-util.h"
#include "path-util.h"
#include "process-util.h"
+#include "serialize.h"
#include "service.h"
#include "signal-util.h"
#include "special.h"
@@ -48,7 +50,7 @@ static const UnitActiveState state_translation_table[_SERVICE_STATE_MAX] = {
[SERVICE_EXITED] = UNIT_ACTIVE,
[SERVICE_RELOAD] = UNIT_RELOADING,
[SERVICE_STOP] = UNIT_DEACTIVATING,
- [SERVICE_STOP_SIGABRT] = UNIT_DEACTIVATING,
+ [SERVICE_STOP_WATCHDOG] = UNIT_DEACTIVATING,
[SERVICE_STOP_SIGTERM] = UNIT_DEACTIVATING,
[SERVICE_STOP_SIGKILL] = UNIT_DEACTIVATING,
[SERVICE_STOP_POST] = UNIT_DEACTIVATING,
@@ -69,7 +71,7 @@ static const UnitActiveState state_translation_table_idle[_SERVICE_STATE_MAX] =
[SERVICE_EXITED] = UNIT_ACTIVE,
[SERVICE_RELOAD] = UNIT_RELOADING,
[SERVICE_STOP] = UNIT_DEACTIVATING,
- [SERVICE_STOP_SIGABRT] = UNIT_DEACTIVATING,
+ [SERVICE_STOP_WATCHDOG] = UNIT_DEACTIVATING,
[SERVICE_STOP_SIGTERM] = UNIT_DEACTIVATING,
[SERVICE_STOP_SIGKILL] = UNIT_DEACTIVATING,
[SERVICE_STOP_POST] = UNIT_DEACTIVATING,
@@ -79,9 +81,10 @@ static const UnitActiveState state_translation_table_idle[_SERVICE_STATE_MAX] =
[SERVICE_AUTO_RESTART] = UNIT_ACTIVATING
};
-static int service_dispatch_io(sd_event_source *source, int fd, uint32_t events, void *userdata);
+static int service_dispatch_inotify_io(sd_event_source *source, int fd, uint32_t events, void *userdata);
static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata);
static int service_dispatch_watchdog(sd_event_source *source, usec_t usec, void *userdata);
+static int service_dispatch_exec_io(sd_event_source *source, int fd, uint32_t events, void *userdata);
static void service_enter_signal(Service *s, ServiceState state, ServiceResult f);
static void service_enter_reload_by_notify(Service *s);
@@ -105,6 +108,8 @@ static void service_init(Unit *u) {
s->exec_context.keyring_mode = MANAGER_IS_SYSTEM(u->manager) ?
EXEC_KEYRING_PRIVATE : EXEC_KEYRING_INHERIT;
+
+ s->watchdog_original_usec = USEC_INFINITY;
}
static void service_unwatch_control_pid(Service *s) {
@@ -193,19 +198,21 @@ static usec_t service_get_watchdog_usec(Service *s) {
if (s->watchdog_override_enable)
return s->watchdog_override_usec;
- else
- return s->watchdog_usec;
+
+ return s->watchdog_original_usec;
}
static void service_start_watchdog(Service *s) {
- int r;
usec_t watchdog_usec;
+ int r;
assert(s);
watchdog_usec = service_get_watchdog_usec(s);
- if (IN_SET(watchdog_usec, 0, USEC_INFINITY))
+ if (IN_SET(watchdog_usec, 0, USEC_INFINITY)) {
+ service_stop_watchdog(s);
return;
+ }
if (s->watchdog_event_source) {
r = sd_event_source_set_time(s->watchdog_event_source, usec_add(s->watchdog_timestamp.monotonic, watchdog_usec));
@@ -233,50 +240,55 @@ static void service_start_watchdog(Service *s) {
* of living before we consider a service died. */
r = sd_event_source_set_priority(s->watchdog_event_source, SD_EVENT_PRIORITY_IDLE);
}
-
if (r < 0)
log_unit_warning_errno(UNIT(s), r, "Failed to install watchdog timer: %m");
}
-static void service_extend_timeout(Service *s, usec_t extend_timeout_usec) {
- assert(s);
+static void service_extend_event_source_timeout(Service *s, sd_event_source *source, usec_t extended) {
+ usec_t current;
+ int r;
- if (s->timer_event_source) {
- uint64_t current = 0, extended = 0;
- int r;
+ assert(s);
- if (IN_SET(extend_timeout_usec, 0, USEC_INFINITY))
- return;
+ /* Extends the specified event source timer to at least the specified time, unless it is already later
+ * anyway. */
- extended = usec_add(now(CLOCK_MONOTONIC), extend_timeout_usec);
+ if (!source)
+ return;
- r = sd_event_source_get_time(s->timer_event_source, &current);
- if (r < 0)
- log_unit_error_errno(UNIT(s), r, "Failed to retrieve timeout timer: %m");
- else if (extended > current) {
- r = sd_event_source_set_time(s->timer_event_source, extended);
- if (r < 0)
- log_unit_warning_errno(UNIT(s), r, "Failed to set timeout timer: %m");
- }
+ r = sd_event_source_get_time(source, &current);
+ if (r < 0) {
+ const char *desc;
+ (void) sd_event_source_get_description(s->timer_event_source, &desc);
+ log_unit_warning_errno(UNIT(s), r, "Failed to retrieve timeout time for event source '%s', ignoring: %m", strna(desc));
+ return;
+ }
- if (s->watchdog_event_source) {
- /* extend watchdog if necessary. We've asked for an extended timeout so we
- * shouldn't expect a watchdog timeout in the interval in between */
- r = sd_event_source_get_time(s->watchdog_event_source, &current);
- if (r < 0) {
- log_unit_error_errno(UNIT(s), r, "Failed to retrieve watchdog timer: %m");
- return;
- }
+ if (current >= extended) /* Current timeout is already longer, ignore this. */
+ return;
- if (extended > current) {
- r = sd_event_source_set_time(s->watchdog_event_source, extended);
- if (r < 0)
- log_unit_warning_errno(UNIT(s), r, "Failed to set watchdog timer: %m");
- }
- }
+ r = sd_event_source_set_time(source, extended);
+ if (r < 0) {
+ const char *desc;
+ (void) sd_event_source_get_description(s->timer_event_source, &desc);
+ log_unit_warning_errno(UNIT(s), r, "Failed to set timeout time for even source '%s', ignoring %m", strna(desc));
}
}
+static void service_extend_timeout(Service *s, usec_t extend_timeout_usec) {
+ usec_t extended;
+
+ assert(s);
+
+ if (IN_SET(extend_timeout_usec, 0, USEC_INFINITY))
+ return;
+
+ extended = usec_add(now(CLOCK_MONOTONIC), extend_timeout_usec);
+
+ service_extend_event_source_timeout(s, s->timer_event_source, extended);
+ service_extend_event_source_timeout(s, s->watchdog_event_source, extended);
+}
+
static void service_reset_watchdog(Service *s) {
assert(s);
@@ -284,7 +296,7 @@ static void service_reset_watchdog(Service *s) {
service_start_watchdog(s);
}
-static void service_reset_watchdog_timeout(Service *s, usec_t watchdog_override_usec) {
+static void service_override_watchdog_timeout(Service *s, usec_t watchdog_override_usec) {
assert(s);
s->watchdog_override_enable = true;
@@ -389,6 +401,7 @@ static void service_done(Unit *u) {
service_stop_watchdog(s);
s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+ s->exec_fd_event_source = sd_event_source_unref(s->exec_fd_event_source);
service_release_resources(u);
}
@@ -536,8 +549,13 @@ static int service_verify(Service *s) {
if (UNIT(s)->load_state != UNIT_LOADED)
return 0;
- if (!s->exec_command[SERVICE_EXEC_START] && !s->exec_command[SERVICE_EXEC_STOP]) {
- log_unit_error(UNIT(s), "Service lacks both ExecStart= and ExecStop= setting. Refusing.");
+ if (!s->exec_command[SERVICE_EXEC_START] && !s->exec_command[SERVICE_EXEC_STOP]
+ && UNIT(s)->success_action == EMERGENCY_ACTION_NONE) {
+ /* FailureAction= only makes sense if one of the start or stop commands is specified.
+ * SuccessAction= will be executed unconditionally if no commands are specified. Hence,
+ * either a command or SuccessAction= are required. */
+
+ log_unit_error(UNIT(s), "Service has no ExecStart=, ExecStop=, or SuccessAction=. Refusing.");
return -ENOEXEC;
}
@@ -546,8 +564,8 @@ static int service_verify(Service *s) {
return -ENOEXEC;
}
- if (!s->remain_after_exit && !s->exec_command[SERVICE_EXEC_START]) {
- log_unit_error(UNIT(s), "Service has no ExecStart= setting, which is only allowed for RemainAfterExit=yes services. Refusing.");
+ if (!s->remain_after_exit && !s->exec_command[SERVICE_EXEC_START] && UNIT(s)->success_action == EMERGENCY_ACTION_NONE) {
+ log_unit_error(UNIT(s), "Service has no ExecStart= and no SuccessAction= settings and does not have RemainAfterExit=yes set. Refusing.");
return -ENOEXEC;
}
@@ -607,7 +625,7 @@ static int service_add_default_dependencies(Service *s) {
* require it, so that we fail if we can't acquire
* it. */
- r = unit_add_two_dependencies_by_name(UNIT(s), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT);
+ r = unit_add_two_dependencies_by_name(UNIT(s), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
if (r < 0)
return r;
} else {
@@ -615,7 +633,7 @@ static int service_add_default_dependencies(Service *s) {
/* In the --user instance there's no sysinit.target,
* in that case require basic.target instead. */
- r = unit_add_dependency_by_name(UNIT(s), UNIT_REQUIRES, SPECIAL_BASIC_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT);
+ r = unit_add_dependency_by_name(UNIT(s), UNIT_REQUIRES, SPECIAL_BASIC_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
if (r < 0)
return r;
}
@@ -623,12 +641,12 @@ static int service_add_default_dependencies(Service *s) {
/* Second, if the rest of the base system is in the same
* transaction, order us after it, but do not pull it in or
* even require it. */
- r = unit_add_dependency_by_name(UNIT(s), UNIT_AFTER, SPECIAL_BASIC_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT);
+ r = unit_add_dependency_by_name(UNIT(s), UNIT_AFTER, SPECIAL_BASIC_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
if (r < 0)
return r;
/* Third, add us in for normal shutdown. */
- return unit_add_two_dependencies_by_name(UNIT(s), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT);
+ return unit_add_two_dependencies_by_name(UNIT(s), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
}
static void service_fix_output(Service *s) {
@@ -659,12 +677,12 @@ static int service_setup_bus_name(Service *s) {
if (!s->bus_name)
return 0;
- r = unit_add_dependency_by_name(UNIT(s), UNIT_REQUIRES, SPECIAL_DBUS_SOCKET, NULL, true, UNIT_DEPENDENCY_FILE);
+ r = unit_add_dependency_by_name(UNIT(s), UNIT_REQUIRES, SPECIAL_DBUS_SOCKET, true, UNIT_DEPENDENCY_FILE);
if (r < 0)
return log_unit_error_errno(UNIT(s), r, "Failed to add dependency on " SPECIAL_DBUS_SOCKET ": %m");
/* We always want to be ordered against dbus.socket if both are in the transaction. */
- r = unit_add_dependency_by_name(UNIT(s), UNIT_AFTER, SPECIAL_DBUS_SOCKET, NULL, true, UNIT_DEPENDENCY_FILE);
+ r = unit_add_dependency_by_name(UNIT(s), UNIT_AFTER, SPECIAL_DBUS_SOCKET, true, UNIT_DEPENDENCY_FILE);
if (r < 0)
return log_unit_error_errno(UNIT(s), r, "Failed to add dependency on " SPECIAL_DBUS_SOCKET ": %m");
@@ -917,8 +935,8 @@ static int service_load_pid_file(Service *s, bool may_warn) {
prio = may_warn ? LOG_INFO : LOG_DEBUG;
fd = chase_symlinks(s->pid_file, NULL, CHASE_OPEN|CHASE_SAFE, NULL);
- if (fd == -EPERM) {
- log_unit_full(UNIT(s), LOG_DEBUG, fd, "Permission denied while opening PID file or potentially unsafe symlink chain, will now retry with relaxed checks: %s", s->pid_file);
+ if (fd == -ENOLINK) {
+ log_unit_full(UNIT(s), LOG_DEBUG, fd, "Potentially unsafe symlink chain, will now retry with relaxed checks: %s", s->pid_file);
questionable_pid_file = true;
@@ -1018,6 +1036,9 @@ static void service_set_state(Service *s, ServiceState state) {
assert(s);
+ if (s->state != state)
+ bus_unit_send_pending_change_signal(UNIT(s), false);
+
table = s->type == SERVICE_IDLE ? state_translation_table_idle : state_translation_table;
old_state = s->state;
@@ -1029,7 +1050,7 @@ static void service_set_state(Service *s, ServiceState state) {
SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
SERVICE_RUNNING,
SERVICE_RELOAD,
- SERVICE_STOP, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+ SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL,
SERVICE_AUTO_RESTART))
s->timer_event_source = sd_event_source_unref(s->timer_event_source);
@@ -1037,7 +1058,7 @@ static void service_set_state(Service *s, ServiceState state) {
if (!IN_SET(state,
SERVICE_START, SERVICE_START_POST,
SERVICE_RUNNING, SERVICE_RELOAD,
- SERVICE_STOP, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+ SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) {
service_unwatch_main_pid(s);
s->main_command = NULL;
@@ -1046,7 +1067,7 @@ static void service_set_state(Service *s, ServiceState state) {
if (!IN_SET(state,
SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
SERVICE_RELOAD,
- SERVICE_STOP, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+ SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) {
service_unwatch_control_pid(s);
s->control_command = NULL;
@@ -1061,11 +1082,14 @@ static void service_set_state(Service *s, ServiceState state) {
if (!IN_SET(state,
SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
SERVICE_RUNNING, SERVICE_RELOAD,
- SERVICE_STOP, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+ SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL) &&
!(state == SERVICE_DEAD && UNIT(s)->job))
service_close_socket_fd(s);
+ if (state != SERVICE_START)
+ s->exec_fd_event_source = sd_event_source_unref(s->exec_fd_event_source);
+
if (!IN_SET(state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD))
service_stop_watchdog(s);
@@ -1097,7 +1121,7 @@ static usec_t service_coldplug_timeout(Service *s) {
return usec_add(UNIT(s)->active_enter_timestamp.monotonic, s->runtime_max_usec);
case SERVICE_STOP:
- case SERVICE_STOP_SIGABRT:
+ case SERVICE_STOP_WATCHDOG:
case SERVICE_STOP_SIGTERM:
case SERVICE_STOP_SIGKILL:
case SERVICE_STOP_POST:
@@ -1132,7 +1156,7 @@ static int service_coldplug(Unit *u) {
(IN_SET(s->deserialized_state,
SERVICE_START, SERVICE_START_POST,
SERVICE_RUNNING, SERVICE_RELOAD,
- SERVICE_STOP, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+ SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL))) {
r = unit_watch_pid(UNIT(s), s->main_pid);
if (r < 0)
@@ -1144,7 +1168,7 @@ static int service_coldplug(Unit *u) {
IN_SET(s->deserialized_state,
SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
SERVICE_RELOAD,
- SERVICE_STOP, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+ SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) {
r = unit_watch_pid(UNIT(s), s->control_pid);
if (r < 0)
@@ -1178,21 +1202,23 @@ static int service_coldplug(Unit *u) {
return 0;
}
-static int service_collect_fds(Service *s,
- int **fds,
- char ***fd_names,
- unsigned *n_storage_fds,
- unsigned *n_socket_fds) {
+static int service_collect_fds(
+ Service *s,
+ int **fds,
+ char ***fd_names,
+ size_t *n_socket_fds,
+ size_t *n_storage_fds) {
_cleanup_strv_free_ char **rfd_names = NULL;
_cleanup_free_ int *rfds = NULL;
- unsigned rn_socket_fds = 0, rn_storage_fds = 0;
+ size_t rn_socket_fds = 0, rn_storage_fds = 0;
int r;
assert(s);
assert(fds);
assert(fd_names);
assert(n_socket_fds);
+ assert(n_storage_fds);
if (s->socket_fd >= 0) {
@@ -1203,7 +1229,7 @@ static int service_collect_fds(Service *s,
return -ENOMEM;
rfds[0] = s->socket_fd;
- rfd_names = strv_new("connection", NULL);
+ rfd_names = strv_new("connection");
if (!rfd_names)
return -ENOMEM;
@@ -1256,7 +1282,7 @@ static int service_collect_fds(Service *s,
if (s->n_fd_store > 0) {
ServiceFDStore *fs;
- unsigned n_fds;
+ size_t n_fds;
char **nl;
int *t;
@@ -1294,6 +1320,63 @@ static int service_collect_fds(Service *s,
return 0;
}
+static int service_allocate_exec_fd_event_source(
+ Service *s,
+ int fd,
+ sd_event_source **ret_event_source) {
+
+ _cleanup_(sd_event_source_unrefp) sd_event_source *source = NULL;
+ int r;
+
+ assert(s);
+ assert(fd >= 0);
+ assert(ret_event_source);
+
+ r = sd_event_add_io(UNIT(s)->manager->event, &source, fd, 0, service_dispatch_exec_io, s);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to allocate exec_fd event source: %m");
+
+ /* This is a bit lower priority than SIGCHLD, as that carries a lot more interesting failure information */
+
+ r = sd_event_source_set_priority(source, SD_EVENT_PRIORITY_NORMAL-3);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to adjust priority of exec_fd event source: %m");
+
+ (void) sd_event_source_set_description(source, "service event_fd");
+
+ r = sd_event_source_set_io_fd_own(source, true);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to pass ownership of fd to event source: %m");
+
+ *ret_event_source = TAKE_PTR(source);
+ return 0;
+}
+
+static int service_allocate_exec_fd(
+ Service *s,
+ sd_event_source **ret_event_source,
+ int* ret_exec_fd) {
+
+ _cleanup_close_pair_ int p[2] = { -1, -1 };
+ int r;
+
+ assert(s);
+ assert(ret_event_source);
+ assert(ret_exec_fd);
+
+ if (pipe2(p, O_CLOEXEC|O_NONBLOCK) < 0)
+ return log_unit_error_errno(UNIT(s), errno, "Failed to allocate exec_fd pipe: %m");
+
+ r = service_allocate_exec_fd_event_source(s, p[0], ret_event_source);
+ if (r < 0)
+ return r;
+
+ p[0] = -1;
+ *ret_exec_fd = TAKE_FD(p[1]);
+
+ return 0;
+}
+
static bool service_exec_needs_notify_socket(Service *s, ExecFlags flags) {
assert(s);
@@ -1320,14 +1403,17 @@ static int service_spawn(
ExecFlags flags,
pid_t *_pid) {
- ExecParameters exec_params = {
+ _cleanup_(exec_params_clear) ExecParameters exec_params = {
.flags = flags,
.stdin_fd = -1,
.stdout_fd = -1,
.stderr_fd = -1,
+ .exec_fd = -1,
};
_cleanup_strv_free_ char **final_env = NULL, **our_env = NULL, **fd_names = NULL;
- unsigned n_storage_fds = 0, n_socket_fds = 0, n_env = 0;
+ _cleanup_(sd_event_source_unrefp) sd_event_source *exec_fd_source = NULL;
+ size_t n_socket_fds = 0, n_storage_fds = 0, n_env = 0;
+ _cleanup_close_ int exec_fd = -1;
_cleanup_free_ int *fds = NULL;
pid_t pid;
int r;
@@ -1336,7 +1422,7 @@ static int service_spawn(
assert(c);
assert(_pid);
- r = unit_prepare_exec(UNIT(s));
+ r = unit_prepare_exec(UNIT(s)); /* This realizes the cgroup, among other things */
if (r < 0)
return r;
@@ -1353,11 +1439,19 @@ static int service_spawn(
s->exec_context.std_output == EXEC_OUTPUT_SOCKET ||
s->exec_context.std_error == EXEC_OUTPUT_SOCKET) {
- r = service_collect_fds(s, &fds, &fd_names, &n_storage_fds, &n_socket_fds);
+ r = service_collect_fds(s, &fds, &fd_names, &n_socket_fds, &n_storage_fds);
if (r < 0)
return r;
- log_unit_debug(UNIT(s), "Passing %i fds to service", n_storage_fds + n_socket_fds);
+ log_unit_debug(UNIT(s), "Passing %zu fds to service", n_socket_fds + n_storage_fds);
+ }
+
+ if (!FLAGS_SET(flags, EXEC_IS_CONTROL) && s->type == SERVICE_EXEC) {
+ assert(!s->exec_fd_event_source);
+
+ r = service_allocate_exec_fd(s, &exec_fd_source, &exec_fd);
+ if (r < 0)
+ return r;
}
r = service_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), timeout));
@@ -1433,32 +1527,31 @@ static int service_spawn(
}
}
- unit_set_exec_params(UNIT(s), &exec_params);
+ r = unit_set_exec_params(UNIT(s), &exec_params);
+ if (r < 0)
+ return r;
final_env = strv_env_merge(2, exec_params.environment, our_env, NULL);
if (!final_env)
return -ENOMEM;
- /* System services should get a new keyring by default. */
- SET_FLAG(exec_params.flags, EXEC_NEW_KEYRING, MANAGER_IS_SYSTEM(UNIT(s)->manager));
-
/* System D-Bus needs nss-systemd disabled, so that we don't deadlock */
SET_FLAG(exec_params.flags, EXEC_NSS_BYPASS_BUS,
MANAGER_IS_SYSTEM(UNIT(s)->manager) && unit_has_name(UNIT(s), SPECIAL_DBUS_SERVICE));
- exec_params.argv = c->argv;
- exec_params.environment = final_env;
+ strv_free_and_replace(exec_params.environment, final_env);
exec_params.fds = fds;
exec_params.fd_names = fd_names;
- exec_params.n_storage_fds = n_storage_fds;
exec_params.n_socket_fds = n_socket_fds;
- exec_params.watchdog_usec = s->watchdog_usec;
+ exec_params.n_storage_fds = n_storage_fds;
+ exec_params.watchdog_usec = service_get_watchdog_usec(s);
exec_params.selinux_context_net = s->socket_fd_selinux_context_net;
if (s->type == SERVICE_IDLE)
exec_params.idle_pipe = UNIT(s)->manager->idle_pipe;
exec_params.stdin_fd = s->stdin_fd;
exec_params.stdout_fd = s->stdout_fd;
exec_params.stderr_fd = s->stderr_fd;
+ exec_params.exec_fd = exec_fd;
r = exec_spawn(UNIT(s),
c,
@@ -1470,6 +1563,9 @@ static int service_spawn(
if (r < 0)
return r;
+ s->exec_fd_event_source = TAKE_PTR(exec_fd_source);
+ s->exec_fd_hot = false;
+
r = unit_watch_pid(UNIT(s), pid);
if (r < 0) /* FIXME: we need to do something here */
return r;
@@ -1604,8 +1700,7 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart)
if (s->result == SERVICE_SUCCESS)
s->result = f;
- if (s->result != SERVICE_SUCCESS)
- log_unit_warning(UNIT(s), "Failed with result '%s'.", service_result_to_string(s->result));
+ unit_log_result(UNIT(s), s->result == SERVICE_SUCCESS, service_result_to_string(s->result));
if (allow_restart && service_shall_restart(s))
s->will_auto_restart = true;
@@ -1674,7 +1769,6 @@ static void service_enter_stop_post(Service *s, ServiceResult f) {
s->result = f;
service_unwatch_control_pid(s);
-
(void) unit_enqueue_rewatch_pids(UNIT(s));
s->control_command = s->exec_command[SERVICE_EXEC_STOP_POST];
@@ -1684,7 +1778,7 @@ static void service_enter_stop_post(Service *s, ServiceResult f) {
r = service_spawn(s,
s->control_command,
s->timeout_stop_usec,
- EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_IS_CONTROL|EXEC_SETENV_RESULT,
+ EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_IS_CONTROL|EXEC_SETENV_RESULT|EXEC_CONTROL_CGROUP,
&s->control_pid);
if (r < 0)
goto fail;
@@ -1703,8 +1797,8 @@ fail:
static int state_to_kill_operation(ServiceState state) {
switch (state) {
- case SERVICE_STOP_SIGABRT:
- return KILL_ABORT;
+ case SERVICE_STOP_WATCHDOG:
+ return KILL_WATCHDOG;
case SERVICE_STOP_SIGTERM:
case SERVICE_FINAL_SIGTERM:
@@ -1750,9 +1844,9 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f
goto fail;
service_set_state(s, state);
- } else if (IN_SET(state, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM) && s->kill_context.send_sigkill)
+ } else if (IN_SET(state, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM) && s->kill_context.send_sigkill)
service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_SUCCESS);
- else if (IN_SET(state, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL))
+ else if (IN_SET(state, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL))
service_enter_stop_post(s, SERVICE_SUCCESS);
else if (state == SERVICE_FINAL_SIGTERM && s->kill_context.send_sigkill)
service_enter_signal(s, SERVICE_FINAL_SIGKILL, SERVICE_SUCCESS);
@@ -1764,7 +1858,7 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f
fail:
log_unit_warning_errno(UNIT(s), r, "Failed to kill processes: %m");
- if (IN_SET(state, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL))
+ if (IN_SET(state, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL))
service_enter_stop_post(s, SERVICE_FAILURE_RESOURCES);
else
service_enter_dead(s, SERVICE_FAILURE_RESOURCES, true);
@@ -1799,7 +1893,7 @@ static void service_enter_stop(Service *s, ServiceResult f) {
r = service_spawn(s,
s->control_command,
s->timeout_stop_usec,
- EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_SETENV_RESULT,
+ EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_SETENV_RESULT|EXEC_CONTROL_CGROUP,
&s->control_pid);
if (r < 0)
goto fail;
@@ -1877,7 +1971,7 @@ static void service_enter_start_post(Service *s) {
r = service_spawn(s,
s->control_command,
s->timeout_start_usec,
- EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL,
+ EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_CONTROL_CGROUP,
&s->control_pid);
if (r < 0)
goto fail;
@@ -1947,6 +2041,12 @@ static void service_enter_start(Service *s) {
goto fail;
}
+ /* We force a fake state transition here. Otherwise, the unit would go directly from
+ * SERVICE_DEAD to SERVICE_DEAD without SERVICE_ACTIVATING or SERVICE_ACTIVE
+ * inbetween. This way we can later trigger actions that depend on the state
+ * transition, including SuccessAction=. */
+ service_set_state(s, SERVICE_START);
+
service_enter_start_post(s);
return;
}
@@ -1981,14 +2081,12 @@ static void service_enter_start(Service *s) {
s->control_pid = pid;
service_set_state(s, SERVICE_START);
- } else if (IN_SET(s->type, SERVICE_ONESHOT, SERVICE_DBUS, SERVICE_NOTIFY)) {
+ } else if (IN_SET(s->type, SERVICE_ONESHOT, SERVICE_DBUS, SERVICE_NOTIFY, SERVICE_EXEC)) {
- /* For oneshot services we wait until the start
- * process exited, too, but it is our main process. */
+ /* For oneshot services we wait until the start process exited, too, but it is our main process. */
- /* For D-Bus services we know the main pid right away,
- * but wait for the bus name to appear on the
- * bus. Notify services are similar. */
+ /* For D-Bus services we know the main pid right away, but wait for the bus name to appear on the
+ * bus. 'notify' and 'exec' services are similar. */
service_set_main_pid(s, pid);
service_set_state(s, SERVICE_START);
@@ -2117,7 +2215,7 @@ static void service_enter_reload(Service *s) {
r = service_spawn(s,
s->control_command,
s->timeout_start_usec,
- EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL,
+ EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_CONTROL_CGROUP,
&s->control_pid);
if (r < 0)
goto fail;
@@ -2157,7 +2255,8 @@ static void service_run_next_control(Service *s) {
timeout,
EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|
(IN_SET(s->control_command_id, SERVICE_EXEC_START_PRE, SERVICE_EXEC_STOP_POST) ? EXEC_APPLY_TTY_STDIN : 0)|
- (IN_SET(s->control_command_id, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST) ? EXEC_SETENV_RESULT : 0),
+ (IN_SET(s->control_command_id, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST) ? EXEC_SETENV_RESULT : 0)|
+ (IN_SET(s->control_command_id, SERVICE_EXEC_START_POST, SERVICE_EXEC_RELOAD, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST) ? EXEC_CONTROL_CGROUP : 0),
&s->control_pid);
if (r < 0)
goto fail;
@@ -2216,7 +2315,7 @@ static int service_start(Unit *u) {
/* We cannot fulfill this request right now, try again later
* please! */
if (IN_SET(s->state,
- SERVICE_STOP, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+ SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL))
return -EAGAIN;
@@ -2253,15 +2352,17 @@ static int service_start(Unit *u) {
s->main_pid_alien = false;
s->forbid_restart = false;
- u->reset_accounting = true;
-
s->status_text = mfree(s->status_text);
s->status_errno = 0;
s->notify_state = NOTIFY_UNKNOWN;
+ s->watchdog_original_usec = s->watchdog_usec;
s->watchdog_override_enable = false;
- s->watchdog_override_usec = 0;
+ s->watchdog_override_usec = USEC_INFINITY;
+
+ exec_command_reset_status_list_array(s->exec_command, _SERVICE_EXEC_COMMAND_MAX);
+ exec_status_reset(&s->main_exec_status);
/* This is not an automatic restart? Flush the restart counter then */
if (s->flush_n_restarts) {
@@ -2269,6 +2370,8 @@ static int service_start(Unit *u) {
s->flush_n_restarts = false;
}
+ u->reset_accounting = true;
+
service_enter_start_pre(s);
return 1;
}
@@ -2283,7 +2386,7 @@ static int service_stop(Unit *u) {
/* Already on it */
if (IN_SET(s->state,
- SERVICE_STOP, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+ SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL))
return 0;
@@ -2342,13 +2445,13 @@ static unsigned service_exec_command_index(Unit *u, ServiceExecCommand id, ExecC
}
static int service_serialize_exec_command(Unit *u, FILE *f, ExecCommand *command) {
+ _cleanup_free_ char *args = NULL, *p = NULL;
+ size_t allocated = 0, length = 0;
Service *s = SERVICE(u);
+ const char *type, *key;
ServiceExecCommand id;
unsigned idx;
- const char *type;
char **arg;
- _cleanup_free_ char *args = NULL, *p = NULL;
- size_t allocated = 0, length = 0;
assert(s);
assert(f);
@@ -2367,16 +2470,16 @@ static int service_serialize_exec_command(Unit *u, FILE *f, ExecCommand *command
idx = service_exec_command_index(u, id, command);
STRV_FOREACH(arg, command->argv) {
- size_t n;
_cleanup_free_ char *e = NULL;
+ size_t n;
- e = xescape(*arg, WHITESPACE);
+ e = cescape(*arg);
if (!e)
- return -ENOMEM;
+ return log_oom();
n = strlen(e);
if (!GREEDY_REALLOC(args, allocated, length + 1 + n + 1))
- return -ENOMEM;
+ return log_oom();
if (length > 0)
args[length++] = ' ';
@@ -2386,16 +2489,16 @@ static int service_serialize_exec_command(Unit *u, FILE *f, ExecCommand *command
}
if (!GREEDY_REALLOC(args, allocated, length + 1))
- return -ENOMEM;
+ return log_oom();
+
args[length++] = 0;
- p = xescape(command->path, WHITESPACE);
+ p = cescape(command->path);
if (!p)
return -ENOMEM;
- fprintf(f, "%s-command=%s %u %s %s\n", type, service_exec_command_to_string(id), idx, p, args);
-
- return 0;
+ key = strjoina(type, "-command");
+ return serialize_item_format(f, key, "%s %u %s %s", service_exec_command_to_string(id), idx, p, args);
}
static int service_serialize(Unit *u, FILE *f, FDSet *fds) {
@@ -2407,47 +2510,55 @@ static int service_serialize(Unit *u, FILE *f, FDSet *fds) {
assert(f);
assert(fds);
- unit_serialize_item(u, f, "state", service_state_to_string(s->state));
- unit_serialize_item(u, f, "result", service_result_to_string(s->result));
- unit_serialize_item(u, f, "reload-result", service_result_to_string(s->reload_result));
+ (void) serialize_item(f, "state", service_state_to_string(s->state));
+ (void) serialize_item(f, "result", service_result_to_string(s->result));
+ (void) serialize_item(f, "reload-result", service_result_to_string(s->reload_result));
if (s->control_pid > 0)
- unit_serialize_item_format(u, f, "control-pid", PID_FMT, s->control_pid);
+ (void) serialize_item_format(f, "control-pid", PID_FMT, s->control_pid);
if (s->main_pid_known && s->main_pid > 0)
- unit_serialize_item_format(u, f, "main-pid", PID_FMT, s->main_pid);
+ (void) serialize_item_format(f, "main-pid", PID_FMT, s->main_pid);
- unit_serialize_item(u, f, "main-pid-known", yes_no(s->main_pid_known));
- unit_serialize_item(u, f, "bus-name-good", yes_no(s->bus_name_good));
- unit_serialize_item(u, f, "bus-name-owner", s->bus_name_owner);
+ (void) serialize_bool(f, "main-pid-known", s->main_pid_known);
+ (void) serialize_bool(f, "bus-name-good", s->bus_name_good);
+ (void) serialize_bool(f, "bus-name-owner", s->bus_name_owner);
- unit_serialize_item_format(u, f, "n-restarts", "%u", s->n_restarts);
- unit_serialize_item(u, f, "flush-n-restarts", yes_no(s->flush_n_restarts));
+ (void) serialize_item_format(f, "n-restarts", "%u", s->n_restarts);
+ (void) serialize_bool(f, "flush-n-restarts", s->flush_n_restarts);
- r = unit_serialize_item_escaped(u, f, "status-text", s->status_text);
+ r = serialize_item_escaped(f, "status-text", s->status_text);
if (r < 0)
return r;
service_serialize_exec_command(u, f, s->control_command);
service_serialize_exec_command(u, f, s->main_command);
- r = unit_serialize_item_fd(u, f, fds, "stdin-fd", s->stdin_fd);
+ r = serialize_fd(f, fds, "stdin-fd", s->stdin_fd);
if (r < 0)
return r;
- r = unit_serialize_item_fd(u, f, fds, "stdout-fd", s->stdout_fd);
+ r = serialize_fd(f, fds, "stdout-fd", s->stdout_fd);
if (r < 0)
return r;
- r = unit_serialize_item_fd(u, f, fds, "stderr-fd", s->stderr_fd);
+ r = serialize_fd(f, fds, "stderr-fd", s->stderr_fd);
if (r < 0)
return r;
+ if (s->exec_fd_event_source) {
+ r = serialize_fd(f, fds, "exec-fd", sd_event_source_get_io_fd(s->exec_fd_event_source));
+ if (r < 0)
+ return r;
+
+ (void) serialize_bool(f, "exec-fd-hot", s->exec_fd_hot);
+ }
+
if (UNIT_ISSET(s->accept_socket)) {
- r = unit_serialize_item(u, f, "accept-socket", UNIT_DEREF(s->accept_socket)->id);
+ r = serialize_item(f, "accept-socket", UNIT_DEREF(s->accept_socket)->id);
if (r < 0)
return r;
}
- r = unit_serialize_item_fd(u, f, fds, "socket-fd", s->socket_fd);
+ r = serialize_fd(f, fds, "socket-fd", s->socket_fd);
if (r < 0)
return r;
@@ -2457,30 +2568,34 @@ static int service_serialize(Unit *u, FILE *f, FDSet *fds) {
copy = fdset_put_dup(fds, fs->fd);
if (copy < 0)
- return copy;
+ return log_error_errno(copy, "Failed to copy file descriptor for serialization: %m");
c = cescape(fs->fdname);
+ if (!c)
+ return log_oom();
- unit_serialize_item_format(u, f, "fd-store-fd", "%i %s", copy, strempty(c));
+ (void) serialize_item_format(f, "fd-store-fd", "%i %s", copy, c);
}
if (s->main_exec_status.pid > 0) {
- unit_serialize_item_format(u, f, "main-exec-status-pid", PID_FMT, s->main_exec_status.pid);
- dual_timestamp_serialize(f, "main-exec-status-start", &s->main_exec_status.start_timestamp);
- dual_timestamp_serialize(f, "main-exec-status-exit", &s->main_exec_status.exit_timestamp);
+ (void) serialize_item_format(f, "main-exec-status-pid", PID_FMT, s->main_exec_status.pid);
+ (void) serialize_dual_timestamp(f, "main-exec-status-start", &s->main_exec_status.start_timestamp);
+ (void) serialize_dual_timestamp(f, "main-exec-status-exit", &s->main_exec_status.exit_timestamp);
if (dual_timestamp_is_set(&s->main_exec_status.exit_timestamp)) {
- unit_serialize_item_format(u, f, "main-exec-status-code", "%i", s->main_exec_status.code);
- unit_serialize_item_format(u, f, "main-exec-status-status", "%i", s->main_exec_status.status);
+ (void) serialize_item_format(f, "main-exec-status-code", "%i", s->main_exec_status.code);
+ (void) serialize_item_format(f, "main-exec-status-status", "%i", s->main_exec_status.status);
}
}
- dual_timestamp_serialize(f, "watchdog-timestamp", &s->watchdog_timestamp);
-
- unit_serialize_item(u, f, "forbid-restart", yes_no(s->forbid_restart));
+ (void) serialize_dual_timestamp(f, "watchdog-timestamp", &s->watchdog_timestamp);
+ (void) serialize_bool(f, "forbid-restart", s->forbid_restart);
if (s->watchdog_override_enable)
- unit_serialize_item_format(u, f, "watchdog-override-usec", USEC_FMT, s->watchdog_override_usec);
+ (void) serialize_item_format(f, "watchdog-override-usec", USEC_FMT, s->watchdog_override_usec);
+
+ if (s->watchdog_original_usec != USEC_INFINITY)
+ (void) serialize_item_format(f, "watchdog-original-usec", USEC_FMT, s->watchdog_original_usec);
return 0;
}
@@ -2516,10 +2631,10 @@ static int service_deserialize_exec_command(Unit *u, const char *key, const char
_cleanup_free_ char *arg = NULL;
r = extract_first_word(&value, &arg, NULL, EXTRACT_CUNESCAPE);
+ if (r < 0)
+ return r;
if (r == 0)
break;
- else if (r < 0)
- return r;
switch (state) {
case STATE_EXEC_COMMAND_TYPE:
@@ -2658,18 +2773,16 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
r = cunescape(value, 0, &t);
if (r < 0)
- log_unit_debug_errno(u, r, "Failed to unescape status text: %s", value);
- else {
- free(s->status_text);
- s->status_text = t;
- }
+ log_unit_debug_errno(u, r, "Failed to unescape status text '%s': %m", value);
+ else
+ free_and_replace(s->status_text, t);
} else if (streq(key, "accept-socket")) {
Unit *socket;
r = manager_load_unit(u->manager, value, NULL, NULL, &socket);
if (r < 0)
- log_unit_debug_errno(u, r, "Failed to load accept-socket unit: %s", value);
+ log_unit_debug_errno(u, r, "Failed to load accept-socket unit '%s': %m", value);
else {
unit_ref_set(&s->accept_socket, u, socket);
SOCKET(socket)->n_connections++;
@@ -2731,11 +2844,11 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
else
s->main_exec_status.status = i;
} else if (streq(key, "main-exec-status-start"))
- dual_timestamp_deserialize(value, &s->main_exec_status.start_timestamp);
+ deserialize_dual_timestamp(value, &s->main_exec_status.start_timestamp);
else if (streq(key, "main-exec-status-exit"))
- dual_timestamp_deserialize(value, &s->main_exec_status.exit_timestamp);
+ deserialize_dual_timestamp(value, &s->main_exec_status.exit_timestamp);
else if (streq(key, "watchdog-timestamp"))
- dual_timestamp_deserialize(value, &s->watchdog_timestamp);
+ deserialize_dual_timestamp(value, &s->watchdog_timestamp);
else if (streq(key, "forbid-restart")) {
int b;
@@ -2774,14 +2887,28 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
s->stderr_fd = fdset_remove(fds, fd);
s->exec_context.stdio_as_fds = true;
}
+ } else if (streq(key, "exec-fd")) {
+ int fd;
+
+ if (safe_atoi(value, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
+ log_unit_debug(u, "Failed to parse exec-fd value: %s", value);
+ else {
+ s->exec_fd_event_source = sd_event_source_unref(s->exec_fd_event_source);
+
+ fd = fdset_remove(fds, fd);
+ if (service_allocate_exec_fd_event_source(s, fd, &s->exec_fd_event_source) < 0)
+ safe_close(fd);
+ }
} else if (streq(key, "watchdog-override-usec")) {
- usec_t watchdog_override_usec;
- if (timestamp_deserialize(value, &watchdog_override_usec) < 0)
+ if (deserialize_usec(value, &s->watchdog_override_usec) < 0)
log_unit_debug(u, "Failed to parse watchdog_override_usec value: %s", value);
- else {
+ else
s->watchdog_override_enable = true;
- s->watchdog_override_usec = watchdog_override_usec;
- }
+
+ } else if (streq(key, "watchdog-original-usec")) {
+ if (deserialize_usec(value, &s->watchdog_original_usec) < 0)
+ log_unit_debug(u, "Failed to parse watchdog_original_usec value: %s", value);
+
} else if (STR_IN_SET(key, "main-command", "control-command")) {
r = service_deserialize_exec_command(u, key, value);
if (r < 0)
@@ -2857,7 +2984,7 @@ static int service_watch_pid_file(Service *s) {
log_unit_debug(UNIT(s), "Setting watch for PID file %s", s->pid_file_pathspec->path);
- r = path_spec_watch(s->pid_file_pathspec, service_dispatch_io);
+ r = path_spec_watch(s->pid_file_pathspec, service_dispatch_inotify_io);
if (r < 0)
goto fail;
@@ -2901,7 +3028,7 @@ static int service_demand_pid_file(Service *s) {
return service_watch_pid_file(s);
}
-static int service_dispatch_io(sd_event_source *source, int fd, uint32_t events, void *userdata) {
+static int service_dispatch_inotify_io(sd_event_source *source, int fd, uint32_t events, void *userdata) {
PathSpec *p = userdata;
Service *s;
@@ -2934,6 +3061,59 @@ fail:
return 0;
}
+static int service_dispatch_exec_io(sd_event_source *source, int fd, uint32_t events, void *userdata) {
+ Service *s = SERVICE(userdata);
+
+ assert(s);
+
+ log_unit_debug(UNIT(s), "got exec-fd event");
+
+ /* If Type=exec is set, we'll consider a service started successfully the instant we invoked execve()
+ * successfully for it. We implement this through a pipe() towards the child, which the kernel automatically
+ * closes for us due to O_CLOEXEC on execve() in the child, which then triggers EOF on the pipe in the
+ * parent. We need to be careful however, as there are other reasons that we might cause the child's side of
+ * the pipe to be closed (for example, a simple exit()). To deal with that we'll ignore EOFs on the pipe unless
+ * the child signalled us first that it is about to call the execve(). It does so by sending us a simple
+ * non-zero byte via the pipe. We also provide the child with a way to inform us in case execve() failed: if it
+ * sends a zero byte we'll ignore POLLHUP on the fd again. */
+
+ for (;;) {
+ uint8_t x;
+ ssize_t n;
+
+ n = read(fd, &x, sizeof(x));
+ if (n < 0) {
+ if (errno == EAGAIN) /* O_NONBLOCK in effect → everything queued has now been processed. */
+ return 0;
+
+ return log_unit_error_errno(UNIT(s), errno, "Failed to read from exec_fd: %m");
+ }
+ if (n == 0) { /* EOF → the event we are waiting for */
+
+ s->exec_fd_event_source = sd_event_source_unref(s->exec_fd_event_source);
+
+ if (s->exec_fd_hot) { /* Did the child tell us to expect EOF now? */
+ log_unit_debug(UNIT(s), "Got EOF on exec-fd");
+
+ s->exec_fd_hot = false;
+
+ /* Nice! This is what we have been waiting for. Transition to next state. */
+ if (s->type == SERVICE_EXEC && s->state == SERVICE_START)
+ service_enter_start_post(s);
+ } else
+ log_unit_debug(UNIT(s), "Got EOF on exec-fd while it was disabled, ignoring.");
+
+ return 0;
+ }
+
+ /* A byte was read → this turns on/off the exec fd logic */
+ assert(n == sizeof(x));
+ s->exec_fd_hot = x;
+ }
+
+ return 0;
+}
+
static void service_notify_cgroup_empty_event(Unit *u) {
Service *s = SERVICE(u);
@@ -2980,7 +3160,7 @@ static void service_notify_cgroup_empty_event(Unit *u) {
service_enter_running(s, SERVICE_SUCCESS);
break;
- case SERVICE_STOP_SIGABRT:
+ case SERVICE_STOP_WATCHDOG:
case SERVICE_STOP_SIGTERM:
case SERVICE_STOP_SIGKILL:
@@ -3054,21 +3234,13 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
/* When this is a successful exit, let's log about the exit code on DEBUG level. If this is a failure
* and the process exited on its own via exit(), then let's make this a NOTICE, under the assumption
- * that the service already logged the reason at a higher log level on its own. However, if the service
- * died due to a signal, then it most likely didn't say anything about any reason, hence let's raise
- * our log level to WARNING then. */
-
- log_struct(f == SERVICE_SUCCESS ? LOG_DEBUG :
- (code == CLD_EXITED ? LOG_NOTICE : LOG_WARNING),
- LOG_UNIT_MESSAGE(u, "Main process exited, code=%s, status=%i/%s",
- sigchld_code_to_string(code), status,
- strna(code == CLD_EXITED
- ? exit_status_to_string(status, EXIT_STATUS_FULL)
- : signal_to_string(status))),
- "EXIT_CODE=%s", sigchld_code_to_string(code),
- "EXIT_STATUS=%i", status,
- LOG_UNIT_ID(u),
- LOG_UNIT_INVOCATION_ID(u));
+ * that the service already logged the reason at a higher log level on its own. (Internally,
+ * unit_log_process_exit() will possibly bump this to WARNING if the service died due to a signal.) */
+ unit_log_process_exit(
+ u, f == SERVICE_SUCCESS ? LOG_DEBUG : LOG_NOTICE,
+ "Main process",
+ service_exec_command_to_string(SERVICE_EXEC_START),
+ code, status);
if (s->result == SERVICE_SUCCESS)
s->result = f;
@@ -3124,7 +3296,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
service_enter_running(s, f);
break;
- case SERVICE_STOP_SIGABRT:
+ case SERVICE_STOP_WATCHDOG:
case SERVICE_STOP_SIGTERM:
case SERVICE_STOP_SIGKILL:
@@ -3157,9 +3329,11 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
f = SERVICE_SUCCESS;
}
- log_unit_full(u, f == SERVICE_SUCCESS ? LOG_DEBUG : LOG_NOTICE, 0,
- "Control process exited, code=%s status=%i",
- sigchld_code_to_string(code), status);
+ unit_log_process_exit(
+ u, f == SERVICE_SUCCESS ? LOG_DEBUG : LOG_NOTICE,
+ "Control process",
+ service_exec_command_to_string(s->control_command_id),
+ code, status);
if (s->result == SERVICE_SUCCESS)
s->result = f;
@@ -3259,7 +3433,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
service_enter_signal(s, SERVICE_STOP_SIGTERM, f);
break;
- case SERVICE_STOP_SIGABRT:
+ case SERVICE_STOP_WATCHDOG:
case SERVICE_STOP_SIGTERM:
case SERVICE_STOP_SIGKILL:
if (main_pid_good(s) <= 0)
@@ -3330,8 +3504,8 @@ static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *us
service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_TIMEOUT);
break;
- case SERVICE_STOP_SIGABRT:
- log_unit_warning(UNIT(s), "State 'stop-sigabrt' timed out. Terminating.");
+ case SERVICE_STOP_WATCHDOG:
+ log_unit_warning(UNIT(s), "State 'stop-watchdog' timed out. Terminating.");
service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_TIMEOUT);
break;
@@ -3410,7 +3584,7 @@ static int service_dispatch_watchdog(sd_event_source *source, usec_t usec, void
log_unit_error(UNIT(s), "Watchdog timeout (limit %s)!",
format_timespan(t, sizeof(t), watchdog_usec, 1));
- service_enter_signal(s, SERVICE_STOP_SIGABRT, SERVICE_FAILURE_WATCHDOG);
+ service_enter_signal(s, SERVICE_STOP_WATCHDOG, SERVICE_FAILURE_WATCHDOG);
} else
log_unit_warning(UNIT(s), "Watchdog disabled! Ignoring watchdog timeout (limit %s)!",
format_timespan(t, sizeof(t), watchdog_usec, 1));
@@ -3498,7 +3672,11 @@ static void service_notify_message(
}
if (r > 0) {
service_set_main_pid(s, new_main_pid);
- unit_watch_pid(UNIT(s), new_main_pid);
+
+ r = unit_watch_pid(UNIT(s), new_main_pid);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "Failed to watch new main PID "PID_FMT" for service: %m", new_main_pid);
+
notify_dbus = true;
}
}
@@ -3549,8 +3727,12 @@ static void service_notify_message(
_cleanup_free_ char *t = NULL;
if (!isempty(e)) {
- if (!utf8_is_valid(e))
- log_unit_warning(u, "Status message in notification message is not UTF-8 clean.");
+ /* Note that this size limit check is mostly paranoia: since the datagram size we are willing
+ * to process is already limited to NOTIFY_BUFFER_MAX, this limit here should never be hit. */
+ if (strlen(e) > STATUS_TEXT_MAX)
+ log_unit_warning(u, "Status message overly long (%zu > %u), ignoring.", strlen(e), STATUS_TEXT_MAX);
+ else if (!utf8_is_valid(e))
+ log_unit_warning(u, "Status message in notification message is not UTF-8 clean, ignoring.");
else {
t = strdup(e);
if (!t)
@@ -3572,7 +3754,7 @@ static void service_notify_message(
status_errno = parse_errno(e);
if (status_errno < 0)
log_unit_warning_errno(u, status_errno,
- "Failed to parse ERRNO= field in notification message: %s", e);
+ "Failed to parse ERRNO= field value '%s' in notification message: %m", e);
else if (s->status_errno != status_errno) {
s->status_errno = status_errno;
notify_dbus = true;
@@ -3599,7 +3781,7 @@ static void service_notify_message(
if (safe_atou64(e, &watchdog_override_usec) < 0)
log_unit_warning(u, "Failed to parse WATCHDOG_USEC=%s", e);
else
- service_reset_watchdog_timeout(s, watchdog_override_usec);
+ service_override_watchdog_timeout(s, watchdog_override_usec);
}
/* Process FD store messages. Either FDSTOREREMOVE=1 for removal, or FDSTORE=1 for addition. In both cases,
@@ -3817,7 +3999,7 @@ static bool service_needs_console(Unit *u) {
SERVICE_RUNNING,
SERVICE_RELOAD,
SERVICE_STOP,
- SERVICE_STOP_SIGABRT,
+ SERVICE_STOP_WATCHDOG,
SERVICE_STOP_SIGTERM,
SERVICE_STOP_SIGKILL,
SERVICE_STOP_POST,
@@ -3825,6 +4007,21 @@ static bool service_needs_console(Unit *u) {
SERVICE_FINAL_SIGKILL);
}
+static int service_exit_status(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(u);
+
+ if (s->main_exec_status.pid <= 0 ||
+ !dual_timestamp_is_set(&s->main_exec_status.exit_timestamp))
+ return -ENODATA;
+
+ if (s->main_exec_status.code != CLD_EXITED)
+ return -EBADE;
+
+ return s->main_exec_status.status;
+}
+
static const char* const service_restart_table[_SERVICE_RESTART_MAX] = {
[SERVICE_RESTART_NO] = "no",
[SERVICE_RESTART_ON_SUCCESS] = "on-success",
@@ -3843,7 +4040,8 @@ static const char* const service_type_table[_SERVICE_TYPE_MAX] = {
[SERVICE_ONESHOT] = "oneshot",
[SERVICE_DBUS] = "dbus",
[SERVICE_NOTIFY] = "notify",
- [SERVICE_IDLE] = "idle"
+ [SERVICE_IDLE] = "idle",
+ [SERVICE_EXEC] = "exec",
};
DEFINE_STRING_TABLE_LOOKUP(service_type, ServiceType);
@@ -3944,6 +4142,7 @@ const UnitVTable service_vtable = {
.get_timeout = service_get_timeout,
.needs_console = service_needs_console,
+ .exit_status = service_exit_status,
.status_message_formats = {
.starting_stopping = {
diff --git a/src/core/service.h b/src/core/service.h
index 9c06e91883..9c4340c70e 100644
--- a/src/core/service.h
+++ b/src/core/service.h
@@ -30,6 +30,7 @@ typedef enum ServiceType {
SERVICE_DBUS, /* we fork and wait until a specific D-Bus name appears on the bus */
SERVICE_NOTIFY, /* we fork and wait until a daemon sends us a ready message with sd_notify() */
SERVICE_IDLE, /* much like simple, but delay exec() until all jobs are dispatched. */
+ SERVICE_EXEC, /* we fork and wait until we execute exec() (this means our own setup is waited for) */
_SERVICE_TYPE_MAX,
_SERVICE_TYPE_INVALID = -1
} ServiceType;
@@ -98,8 +99,9 @@ struct Service {
usec_t runtime_max_usec;
dual_timestamp watchdog_timestamp;
- usec_t watchdog_usec;
- usec_t watchdog_override_usec;
+ usec_t watchdog_usec; /* the requested watchdog timeout in the unit file */
+ usec_t watchdog_original_usec; /* the watchdog timeout that was in effect when the unit was started, i.e. the timeout the forked off processes currently see */
+ usec_t watchdog_override_usec; /* the watchdog timeout requested by the service itself through sd_notify() */
bool watchdog_override_enable;
sd_event_source *watchdog_event_source;
@@ -165,6 +167,8 @@ struct Service {
NotifyAccess notify_access;
NotifyState notify_state;
+ sd_event_source *exec_fd_event_source;
+
ServiceFDStore *fd_store;
size_t n_fd_store;
unsigned n_fd_store_max;
@@ -179,6 +183,7 @@ struct Service {
unsigned n_restarts;
bool flush_n_restarts;
+ bool exec_fd_hot;
};
extern const UnitVTable service_vtable;
@@ -202,3 +207,5 @@ const char* service_result_to_string(ServiceResult i) _const_;
ServiceResult service_result_from_string(const char *s) _pure_;
DEFINE_CAST(SERVICE, Service);
+
+#define STATUS_TEXT_MAX (16U*1024U)
diff --git a/src/core/show-status.c b/src/core/show-status.c
index 63262cc716..f748a82084 100644
--- a/src/core/show-status.c
+++ b/src/core/show-status.c
@@ -5,34 +5,38 @@
#include "io-util.h"
#include "parse-util.h"
#include "show-status.h"
+#include "string-table.h"
#include "string-util.h"
#include "terminal-util.h"
#include "util.h"
+static const char* const show_status_table[_SHOW_STATUS_MAX] = {
+ [SHOW_STATUS_NO] = "no",
+ [SHOW_STATUS_AUTO] = "auto",
+ [SHOW_STATUS_TEMPORARY] = "temporary",
+ [SHOW_STATUS_YES] = "yes",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(show_status, ShowStatus, SHOW_STATUS_YES);
+
int parse_show_status(const char *v, ShowStatus *ret) {
- int r;
+ ShowStatus s;
- assert(v);
assert(ret);
- if (streq(v, "auto")) {
- *ret = SHOW_STATUS_AUTO;
- return 0;
- }
-
- r = parse_boolean(v);
- if (r < 0)
- return r;
+ s = show_status_from_string(v);
+ if (s < 0 || s == SHOW_STATUS_TEMPORARY)
+ return -EINVAL;
- *ret = r ? SHOW_STATUS_YES : SHOW_STATUS_NO;
+ *ret = s;
return 0;
}
-int status_vprintf(const char *status, bool ellipse, bool ephemeral, const char *format, va_list ap) {
+int status_vprintf(const char *status, ShowStatusFlags flags, const char *format, va_list ap) {
static const char status_indent[] = " "; /* "[" STATUS "] " */
_cleanup_free_ char *s = NULL;
_cleanup_close_ int fd = -1;
- struct iovec iovec[6] = {};
+ struct iovec iovec[7] = {};
int n = 0;
static bool prev_ephemeral;
@@ -53,7 +57,7 @@ int status_vprintf(const char *status, bool ellipse, bool ephemeral, const char
if (fd < 0)
return fd;
- if (ellipse) {
+ if (FLAGS_SET(flags, SHOW_STATUS_ELLIPSIZE)) {
char *e;
size_t emax, sl;
int c;
@@ -69,15 +73,12 @@ int status_vprintf(const char *status, bool ellipse, bool ephemeral, const char
emax = 3;
e = ellipsize(s, emax, 50);
- if (e) {
- free(s);
- s = e;
- }
+ if (e)
+ free_and_replace(s, e);
}
if (prev_ephemeral)
- iovec[n++] = IOVEC_MAKE_STRING("\r" ANSI_ERASE_TO_END_OF_LINE);
- prev_ephemeral = ephemeral;
+ iovec[n++] = IOVEC_MAKE_STRING(ANSI_REVERSE_LINEFEED "\r" ANSI_ERASE_TO_END_OF_LINE);
if (status) {
if (!isempty(status)) {
@@ -89,8 +90,11 @@ int status_vprintf(const char *status, bool ellipse, bool ephemeral, const char
}
iovec[n++] = IOVEC_MAKE_STRING(s);
- if (!ephemeral)
- iovec[n++] = IOVEC_MAKE_STRING("\n");
+ iovec[n++] = IOVEC_MAKE_STRING("\n");
+
+ if (prev_ephemeral && !FLAGS_SET(flags, SHOW_STATUS_EPHEMERAL))
+ iovec[n++] = IOVEC_MAKE_STRING(ANSI_ERASE_TO_END_OF_LINE);
+ prev_ephemeral = FLAGS_SET(flags, SHOW_STATUS_EPHEMERAL) ;
if (writev(fd, iovec, n) < 0)
return -errno;
@@ -98,14 +102,14 @@ int status_vprintf(const char *status, bool ellipse, bool ephemeral, const char
return 0;
}
-int status_printf(const char *status, bool ellipse, bool ephemeral, const char *format, ...) {
+int status_printf(const char *status, ShowStatusFlags flags, const char *format, ...) {
va_list ap;
int r;
assert(format);
va_start(ap, format);
- r = status_vprintf(status, ellipse, ephemeral, format, ap);
+ r = status_vprintf(status, flags, format, ap);
va_end(ap);
return r;
diff --git a/src/core/show-status.h b/src/core/show-status.h
index 1a80de33d9..f574d92d84 100644
--- a/src/core/show-status.h
+++ b/src/core/show-status.h
@@ -8,14 +8,22 @@
/* Manager status */
typedef enum ShowStatus {
- _SHOW_STATUS_UNSET = -2,
- SHOW_STATUS_AUTO = -1,
- SHOW_STATUS_NO = 0,
- SHOW_STATUS_YES = 1,
- SHOW_STATUS_TEMPORARY = 2,
+ SHOW_STATUS_NO,
+ SHOW_STATUS_AUTO,
+ SHOW_STATUS_TEMPORARY,
+ SHOW_STATUS_YES,
+ _SHOW_STATUS_MAX,
+ _SHOW_STATUS_INVALID = -1,
} ShowStatus;
+typedef enum ShowStatusFlags {
+ SHOW_STATUS_ELLIPSIZE = 1 << 0,
+ SHOW_STATUS_EPHEMERAL = 1 << 1,
+} ShowStatusFlags;
+
+ShowStatus show_status_from_string(const char *v) _const_;
+const char* show_status_to_string(ShowStatus s) _pure_;
int parse_show_status(const char *v, ShowStatus *ret);
-int status_vprintf(const char *status, bool ellipse, bool ephemeral, const char *format, va_list ap) _printf_(4,0);
-int status_printf(const char *status, bool ellipse, bool ephemeral, const char *format, ...) _printf_(4,5);
+int status_vprintf(const char *status, ShowStatusFlags flags, const char *format, va_list ap) _printf_(3,0);
+int status_printf(const char *status, ShowStatusFlags flags, const char *format, ...) _printf_(3,4);
diff --git a/src/core/shutdown.c b/src/core/shutdown.c
index 038345b752..cb47ee8984 100644
--- a/src/core/shutdown.c
+++ b/src/core/shutdown.c
@@ -28,6 +28,7 @@
#include "parse-util.h"
#include "process-util.h"
#include "reboot-util.h"
+#include "rlimit-util.h"
#include "signal-util.h"
#include "string-util.h"
#include "switch-root.h"
@@ -77,14 +78,14 @@ static int parse_argv(int argc, char *argv[]) {
case ARG_LOG_LEVEL:
r = log_set_max_level_from_string(optarg);
if (r < 0)
- log_error_errno(r, "Failed to parse log level %s, ignoring.", optarg);
+ log_error_errno(r, "Failed to parse log level %s, ignoring: %m", optarg);
break;
case ARG_LOG_TARGET:
r = log_set_target_from_string(optarg);
if (r < 0)
- log_error_errno(r, "Failed to parse log target %s, ignoring", optarg);
+ log_error_errno(r, "Failed to parse log target %s, ignoring: %m", optarg);
break;
@@ -93,7 +94,7 @@ static int parse_argv(int argc, char *argv[]) {
if (optarg) {
r = log_show_color_from_string(optarg);
if (r < 0)
- log_error_errno(r, "Failed to parse log color setting %s, ignoring", optarg);
+ log_error_errno(r, "Failed to parse log color setting %s, ignoring: %m", optarg);
} else
log_show_color(true);
@@ -103,7 +104,7 @@ static int parse_argv(int argc, char *argv[]) {
if (optarg) {
r = log_show_location_from_string(optarg);
if (r < 0)
- log_error_errno(r, "Failed to parse log location setting %s, ignoring", optarg);
+ log_error_errno(r, "Failed to parse log location setting %s, ignoring: %m", optarg);
} else
log_show_location(true);
@@ -112,14 +113,14 @@ static int parse_argv(int argc, char *argv[]) {
case ARG_EXIT_CODE:
r = safe_atou8(optarg, &arg_exit_code);
if (r < 0)
- log_error_errno(r, "Failed to parse exit code %s, ignoring", optarg);
+ log_error_errno(r, "Failed to parse exit code %s, ignoring: %m", optarg);
break;
case ARG_TIMEOUT:
r = parse_sec(optarg, &arg_timeout);
if (r < 0)
- log_error_errno(r, "Failed to parse shutdown timeout %s, ignoring", optarg);
+ log_error_errno(r, "Failed to parse shutdown timeout %s, ignoring: %m", optarg);
break;
@@ -137,10 +138,9 @@ static int parse_argv(int argc, char *argv[]) {
assert_not_reached("Unhandled option code.");
}
- if (!arg_verb) {
- log_error("Verb argument missing.");
- return -EINVAL;
- }
+ if (!arg_verb)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Verb argument missing.");
return 0;
}
@@ -171,16 +171,23 @@ static int switch_root_initramfs(void) {
*/
static bool sync_making_progress(unsigned long long *prev_dirty) {
_cleanup_fclose_ FILE *f = NULL;
- char line[LINE_MAX];
- bool r = false;
unsigned long long val = 0;
+ bool r = false;
f = fopen("/proc/meminfo", "re");
if (!f)
return log_warning_errno(errno, "Failed to open /proc/meminfo: %m");
- FOREACH_LINE(line, f, log_warning_errno(errno, "Failed to parse /proc/meminfo: %m")) {
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
unsigned long long ull = 0;
+ int q;
+
+ q = read_line(f, LONG_LINE_MAX, &line);
+ if (q < 0)
+ return log_warning_errno(q, "Failed to parse /proc/meminfo: %m");
+ if (q == 0)
+ break;
if (!first_word(line, "NFS_Unstable:") && !first_word(line, "Writeback:") && !first_word(line, "Dirty:"))
continue;
@@ -435,15 +442,17 @@ int main(int argc, char *argv[]) {
arguments[0] = NULL;
arguments[1] = arg_verb;
arguments[2] = NULL;
- execute_directories(dirs, DEFAULT_TIMEOUT_USEC, NULL, NULL, arguments);
+ execute_directories(dirs, DEFAULT_TIMEOUT_USEC, NULL, NULL, arguments, NULL);
+
+ (void) rlimit_nofile_safe();
if (can_initrd) {
r = switch_root_initramfs();
if (r >= 0) {
argv[0] = (char*) "/shutdown";
- setsid();
- make_console_stdio();
+ (void) setsid();
+ (void) make_console_stdio();
log_info("Successfully changed into root pivot.\n"
"Returning to initrd...");
diff --git a/src/core/slice.c b/src/core/slice.c
index 58f18a4dad..15b18bcad3 100644
--- a/src/core/slice.c
+++ b/src/core/slice.c
@@ -4,7 +4,9 @@
#include "alloc-util.h"
#include "dbus-slice.h"
+#include "dbus-unit.h"
#include "log.h"
+#include "serialize.h"
#include "slice.h"
#include "special.h"
#include "string-util.h"
@@ -28,6 +30,9 @@ static void slice_set_state(Slice *t, SliceState state) {
SliceState old_state;
assert(t);
+ if (t->state != state)
+ bus_unit_send_pending_change_signal(UNIT(t), false);
+
old_state = t->state;
t->state = state;
@@ -74,7 +79,7 @@ static int slice_add_default_dependencies(Slice *s) {
r = unit_add_two_dependencies_by_name(
UNIT(s),
UNIT_BEFORE, UNIT_CONFLICTS,
- SPECIAL_SHUTDOWN_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT);
+ SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
if (r < 0)
return r;
@@ -123,7 +128,7 @@ static int slice_load_root_slice(Unit *u) {
if (!u->description)
u->description = strdup("Root Slice");
if (!u->documentation)
- u->documentation = strv_new("man:systemd.special(7)", NULL);
+ u->documentation = strv_new("man:systemd.special(7)");
return 1;
}
@@ -146,7 +151,7 @@ static int slice_load_system_slice(Unit *u) {
if (!u->description)
u->description = strdup("System Slice");
if (!u->documentation)
- u->documentation = strv_new("man:systemd.special(7)", NULL);
+ u->documentation = strv_new("man:systemd.special(7)");
return 1;
}
@@ -256,7 +261,8 @@ static int slice_serialize(Unit *u, FILE *f, FDSet *fds) {
assert(f);
assert(fds);
- unit_serialize_item(u, f, "state", slice_state_to_string(s->state));
+ (void) serialize_item(f, "state", slice_state_to_string(s->state));
+
return 0;
}
@@ -328,7 +334,7 @@ static void slice_enumerate_perpetual(Manager *m) {
assert(m);
r = slice_make_perpetual(m, SPECIAL_ROOT_SLICE, &u);
- if (r >= 0 && manager_owns_root_cgroup(m)) {
+ if (r >= 0 && manager_owns_host_root_cgroup(m)) {
Slice *s = SLICE(u);
/* If we are managing the root cgroup then this means our root slice covers the whole system, which
diff --git a/src/core/smack-setup.c b/src/core/smack-setup.c
index 50115c0454..49b37aefc7 100644
--- a/src/core/smack-setup.c
+++ b/src/core/smack-setup.c
@@ -29,7 +29,6 @@ static int write_access2_rules(const char* srcdir) {
_cleanup_close_ int load2_fd = -1, change_fd = -1;
_cleanup_closedir_ DIR *dir = NULL;
struct dirent *entry;
- char buf[NAME_MAX];
int dfd = -1;
int r = 0;
@@ -73,7 +72,7 @@ static int write_access2_rules(const char* srcdir) {
continue;
}
- policy = fdopen(fd, "re");
+ policy = fdopen(fd, "r");
if (!policy) {
if (r == 0)
r = -errno;
@@ -83,13 +82,17 @@ static int write_access2_rules(const char* srcdir) {
}
/* load2 write rules in the kernel require a line buffered stream */
- FOREACH_LINE(buf, policy,
- log_error_errno(errno, "Failed to read line from '%s': %m",
- entry->d_name)) {
+ for (;;) {
+ _cleanup_free_ char *buf = NULL, *sbj = NULL, *obj = NULL, *acc1 = NULL, *acc2 = NULL;
+ int q;
- _cleanup_free_ char *sbj = NULL, *obj = NULL, *acc1 = NULL, *acc2 = NULL;
+ q = read_line(policy, NAME_MAX, &buf);
+ if (q < 0)
+ return log_error_errno(q, "Failed to read line from '%s': %m", entry->d_name);
+ if (q == 0)
+ break;
- if (isempty(truncate_nl(buf)) || strchr(COMMENTS, *buf))
+ if (isempty(buf) || strchr(COMMENTS, buf[0]))
continue;
/* if 3 args -> load rule : subject object access1 */
@@ -102,7 +105,7 @@ static int write_access2_rules(const char* srcdir) {
if (write(isempty(acc2) ? load2_fd : change_fd, buf, strlen(buf)) < 0) {
if (r == 0)
r = -errno;
- log_error_errno(errno, "Failed to write '%s' to '%s' in '%s'",
+ log_error_errno(errno, "Failed to write '%s' to '%s' in '%s': %m",
buf, isempty(acc2) ? "/sys/fs/smackfs/load2" : "/sys/fs/smackfs/change-rule", entry->d_name);
}
}
@@ -115,7 +118,6 @@ static int write_cipso2_rules(const char* srcdir) {
_cleanup_close_ int cipso2_fd = -1;
_cleanup_closedir_ DIR *dir = NULL;
struct dirent *entry;
- char buf[NAME_MAX];
int dfd = -1;
int r = 0;
@@ -152,7 +154,7 @@ static int write_cipso2_rules(const char* srcdir) {
continue;
}
- policy = fdopen(fd, "re");
+ policy = fdopen(fd, "r");
if (!policy) {
if (r == 0)
r = -errno;
@@ -162,17 +164,23 @@ static int write_cipso2_rules(const char* srcdir) {
}
/* cipso2 write rules in the kernel require a line buffered stream */
- FOREACH_LINE(buf, policy,
- log_error_errno(errno, "Failed to read line from '%s': %m",
- entry->d_name)) {
+ for (;;) {
+ _cleanup_free_ char *buf = NULL;
+ int q;
+
+ q = read_line(policy, NAME_MAX, &buf);
+ if (q < 0)
+ return log_error_errno(q, "Failed to read line from '%s': %m", entry->d_name);
+ if (q == 0)
+ break;
- if (isempty(truncate_nl(buf)) || strchr(COMMENTS, *buf))
+ if (isempty(buf) || strchr(COMMENTS, buf[0]))
continue;
if (write(cipso2_fd, buf, strlen(buf)) < 0) {
if (r == 0)
r = -errno;
- log_error_errno(errno, "Failed to write '%s' to '/sys/fs/smackfs/cipso2' in '%s'",
+ log_error_errno(errno, "Failed to write '%s' to '/sys/fs/smackfs/cipso2' in '%s': %m",
buf, entry->d_name);
break;
}
@@ -186,7 +194,6 @@ static int write_netlabel_rules(const char* srcdir) {
_cleanup_fclose_ FILE *dst = NULL;
_cleanup_closedir_ DIR *dir = NULL;
struct dirent *entry;
- char buf[NAME_MAX];
int dfd = -1;
int r = 0;
@@ -220,7 +227,7 @@ static int write_netlabel_rules(const char* srcdir) {
continue;
}
- policy = fdopen(fd, "re");
+ policy = fdopen(fd, "r");
if (!policy) {
if (r == 0)
r = -errno;
@@ -232,15 +239,20 @@ static int write_netlabel_rules(const char* srcdir) {
(void) __fsetlocking(policy, FSETLOCKING_BYCALLER);
/* load2 write rules in the kernel require a line buffered stream */
- FOREACH_LINE(buf, policy,
- log_error_errno(errno, "Failed to read line from %s: %m", entry->d_name)) {
-
+ for (;;) {
+ _cleanup_free_ char *buf = NULL;
int q;
+ q = read_line(policy, NAME_MAX, &buf);
+ if (q < 0)
+ return log_error_errno(q, "Failed to read line from %s: %m", entry->d_name);
+ if (q == 0)
+ break;
+
if (!fputs(buf, dst)) {
if (r == 0)
r = -EINVAL;
- log_error_errno(errno, "Failed to write line to /sys/fs/smackfs/netlabel");
+ log_error_errno(errno, "Failed to write line to /sys/fs/smackfs/netlabel: %m");
break;
}
q = fflush_and_check(dst);
@@ -261,20 +273,27 @@ static int write_onlycap_list(void) {
_cleanup_free_ char *list = NULL;
_cleanup_fclose_ FILE *f = NULL;
size_t len = 0, allocated = 0;
- char buf[LINE_MAX];
int r;
f = fopen("/etc/smack/onlycap", "re");
if (!f) {
if (errno != ENOENT)
- log_warning_errno(errno, "Failed to read '/etc/smack/onlycap'");
+ log_warning_errno(errno, "Failed to read '/etc/smack/onlycap': %m");
+
return errno == ENOENT ? ENOENT : -errno;
}
- FOREACH_LINE(buf, f, return -errno) {
+ for (;;) {
+ _cleanup_free_ char *buf = NULL;
size_t l;
- if (isempty(truncate_nl(buf)) || strchr(COMMENTS, *buf))
+ r = read_line(f, LONG_LINE_MAX, &buf);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read line from /etc/smack/onlycap: %m");
+ if (r == 0)
+ break;
+
+ if (isempty(buf) || strchr(COMMENTS, *buf))
continue;
l = strlen(buf);
@@ -285,7 +304,7 @@ static int write_onlycap_list(void) {
len += l + 1;
}
- if (!len)
+ if (len == 0)
return 0;
list[len - 1] = 0;
@@ -293,13 +312,13 @@ static int write_onlycap_list(void) {
onlycap_fd = open("/sys/fs/smackfs/onlycap", O_WRONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
if (onlycap_fd < 0) {
if (errno != ENOENT)
- log_warning_errno(errno, "Failed to open '/sys/fs/smackfs/onlycap'");
+ log_warning_errno(errno, "Failed to open '/sys/fs/smackfs/onlycap': %m");
return -errno; /* negative error */
}
r = write(onlycap_fd, list, len);
if (r < 0)
- return log_error_errno(errno, "Failed to write onlycap list(%s) to '/sys/fs/smackfs/onlycap'", list);
+ return log_error_errno(errno, "Failed to write onlycap list(%s) to '/sys/fs/smackfs/onlycap': %m", list);
return 0;
}
@@ -331,17 +350,17 @@ int mac_smack_setup(bool *loaded_policy) {
}
#ifdef SMACK_RUN_LABEL
- r = write_string_file("/proc/self/attr/current", SMACK_RUN_LABEL, 0);
+ r = write_string_file("/proc/self/attr/current", SMACK_RUN_LABEL, WRITE_STRING_FILE_DISABLE_BUFFER);
if (r < 0)
log_warning_errno(r, "Failed to set SMACK label \"" SMACK_RUN_LABEL "\" on self: %m");
- r = write_string_file("/sys/fs/smackfs/ambient", SMACK_RUN_LABEL, 0);
+ r = write_string_file("/sys/fs/smackfs/ambient", SMACK_RUN_LABEL, WRITE_STRING_FILE_DISABLE_BUFFER);
if (r < 0)
log_warning_errno(r, "Failed to set SMACK ambient label \"" SMACK_RUN_LABEL "\": %m");
r = write_string_file("/sys/fs/smackfs/netlabel",
- "0.0.0.0/0 " SMACK_RUN_LABEL, 0);
+ "0.0.0.0/0 " SMACK_RUN_LABEL, WRITE_STRING_FILE_DISABLE_BUFFER);
if (r < 0)
log_warning_errno(r, "Failed to set SMACK netlabel rule \"0.0.0.0/0 " SMACK_RUN_LABEL "\": %m");
- r = write_string_file("/sys/fs/smackfs/netlabel", "127.0.0.1 -CIPSO", 0);
+ r = write_string_file("/sys/fs/smackfs/netlabel", "127.0.0.1 -CIPSO", WRITE_STRING_FILE_DISABLE_BUFFER);
if (r < 0)
log_warning_errno(r, "Failed to set SMACK netlabel rule \"127.0.0.1 -CIPSO\": %m");
#endif
@@ -390,7 +409,7 @@ int mac_smack_setup(bool *loaded_policy) {
log_info("Successfully wrote Smack onlycap list.");
break;
default:
- log_emergency_errno(r, "Failed to write Smack onlycap list.");
+ log_emergency_errno(r, "Failed to write Smack onlycap list: %m");
return r;
}
diff --git a/src/core/socket.c b/src/core/socket.c
index 56d32225c4..dd126a7f21 100644
--- a/src/core/socket.c
+++ b/src/core/socket.c
@@ -17,6 +17,7 @@
#include "bus-util.h"
#include "copy.h"
#include "dbus-socket.h"
+#include "dbus-unit.h"
#include "def.h"
#include "exit-status.h"
#include "fd-util.h"
@@ -24,6 +25,7 @@
#include "fs-util.h"
#include "in-addr-util.h"
#include "io-util.h"
+#include "ip-protocol-list.h"
#include "label.h"
#include "log.h"
#include "missing.h"
@@ -32,10 +34,10 @@
#include "path-util.h"
#include "process-util.h"
#include "selinux-util.h"
+#include "serialize.h"
#include "signal-util.h"
#include "smack-util.h"
#include "socket.h"
-#include "socket-protocol-list.h"
#include "special.h"
#include "string-table.h"
#include "string-util.h"
@@ -301,17 +303,17 @@ static int socket_add_default_dependencies(Socket *s) {
if (!UNIT(s)->default_dependencies)
return 0;
- r = unit_add_dependency_by_name(UNIT(s), UNIT_BEFORE, SPECIAL_SOCKETS_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT);
+ r = unit_add_dependency_by_name(UNIT(s), UNIT_BEFORE, SPECIAL_SOCKETS_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
if (r < 0)
return r;
if (MANAGER_IS_SYSTEM(UNIT(s)->manager)) {
- r = unit_add_two_dependencies_by_name(UNIT(s), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT);
+ r = unit_add_two_dependencies_by_name(UNIT(s), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
if (r < 0)
return r;
}
- return unit_add_two_dependencies_by_name(UNIT(s), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT);
+ return unit_add_two_dependencies_by_name(UNIT(s), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
}
_pure_ static bool socket_has_exec(Socket *s) {
@@ -467,9 +469,7 @@ static int socket_verify(Socket *s) {
return 0;
}
-static void peer_address_hash_func(const void *p, struct siphash *state) {
- const SocketPeer *s = p;
-
+static void peer_address_hash_func(const SocketPeer *s, struct siphash *state) {
assert(s);
if (s->peer.sa.sa_family == AF_INET)
@@ -482,13 +482,12 @@ static void peer_address_hash_func(const void *p, struct siphash *state) {
assert_not_reached("Unknown address family.");
}
-static int peer_address_compare_func(const void *a, const void *b) {
- const SocketPeer *x = a, *y = b;
+static int peer_address_compare_func(const SocketPeer *x, const SocketPeer *y) {
+ int r;
- if (x->peer.sa.sa_family < y->peer.sa.sa_family)
- return -1;
- if (x->peer.sa.sa_family > y->peer.sa.sa_family)
- return 1;
+ r = CMP(x->peer.sa.sa_family, y->peer.sa.sa_family);
+ if (r != 0)
+ return r;
switch(x->peer.sa.sa_family) {
case AF_INET:
@@ -496,19 +495,12 @@ static int peer_address_compare_func(const void *a, const void *b) {
case AF_INET6:
return memcmp(&x->peer.in6.sin6_addr, &y->peer.in6.sin6_addr, sizeof(x->peer.in6.sin6_addr));
case AF_VSOCK:
- if (x->peer.vm.svm_cid < y->peer.vm.svm_cid)
- return -1;
- if (x->peer.vm.svm_cid > y->peer.vm.svm_cid)
- return 1;
- return 0;
+ return CMP(x->peer.vm.svm_cid, y->peer.vm.svm_cid);
}
assert_not_reached("Black sheep in the family!");
}
-const struct hash_ops peer_address_hash_ops = {
- .hash = peer_address_hash_func,
- .compare = peer_address_compare_func
-};
+DEFINE_PRIVATE_HASH_OPS(peer_address_hash_ops, SocketPeer, peer_address_hash_func, peer_address_compare_func);
static int socket_load(Unit *u) {
Socket *s = SOCKET(u);
@@ -547,26 +539,8 @@ static SocketPeer *socket_peer_new(void) {
return p;
}
-SocketPeer *socket_peer_ref(SocketPeer *p) {
- if (!p)
- return NULL;
-
- assert(p->n_ref > 0);
- p->n_ref++;
-
- return p;
-}
-
-SocketPeer *socket_peer_unref(SocketPeer *p) {
- if (!p)
- return NULL;
-
- assert(p->n_ref > 0);
-
- p->n_ref--;
-
- if (p->n_ref > 0)
- return NULL;
+static SocketPeer *socket_peer_free(SocketPeer *p) {
+ assert(p);
if (p->socket)
set_remove(p->socket->peers_by_address, p);
@@ -574,6 +548,8 @@ SocketPeer *socket_peer_unref(SocketPeer *p) {
return mfree(p);
}
+DEFINE_TRIVIAL_REF_UNREF_FUNC(SocketPeer, socket_peer, socket_peer_free);
+
int socket_acquire_peer(Socket *s, int fd, SocketPeer **p) {
_cleanup_(socket_peer_unrefp) SocketPeer *remote = NULL;
SocketPeer sa = {}, *i;
@@ -833,7 +809,7 @@ static void socket_dump(Unit *u, FILE *f, const char *prefix) {
prefix, format_timespan(time_string, FORMAT_TIMESPAN_MAX, s->trigger_limit.interval, USEC_PER_SEC),
prefix, s->trigger_limit.burst);
- str = socket_protocol_to_name(s->socket_protocol);
+ str = ip_protocol_to_name(s->socket_protocol);
if (str)
fprintf(f, "%sSocketProtocol: %s\n", prefix, str);
@@ -1034,107 +1010,112 @@ static void socket_apply_socket_options(Socket *s, int fd) {
assert(fd >= 0);
if (s->keep_alive) {
- int one = 1;
- if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one)) < 0)
- log_unit_warning_errno(UNIT(s), errno, "SO_KEEPALIVE failed: %m");
+ r = setsockopt_int(fd, SOL_SOCKET, SO_KEEPALIVE, true);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "SO_KEEPALIVE failed: %m");
}
if (s->keep_alive_time > 0) {
- int value = s->keep_alive_time / USEC_PER_SEC;
- if (setsockopt(fd, SOL_TCP, TCP_KEEPIDLE, &value, sizeof(value)) < 0)
- log_unit_warning_errno(UNIT(s), errno, "TCP_KEEPIDLE failed: %m");
+ r = setsockopt_int(fd, SOL_TCP, TCP_KEEPIDLE, s->keep_alive_time / USEC_PER_SEC);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "TCP_KEEPIDLE failed: %m");
}
if (s->keep_alive_interval > 0) {
- int value = s->keep_alive_interval / USEC_PER_SEC;
- if (setsockopt(fd, SOL_TCP, TCP_KEEPINTVL, &value, sizeof(value)) < 0)
- log_unit_warning_errno(UNIT(s), errno, "TCP_KEEPINTVL failed: %m");
+ r = setsockopt_int(fd, SOL_TCP, TCP_KEEPINTVL, s->keep_alive_interval / USEC_PER_SEC);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "TCP_KEEPINTVL failed: %m");
}
if (s->keep_alive_cnt > 0) {
- int value = s->keep_alive_cnt;
- if (setsockopt(fd, SOL_TCP, TCP_KEEPCNT, &value, sizeof(value)) < 0)
- log_unit_warning_errno(UNIT(s), errno, "TCP_KEEPCNT failed: %m");
+ r = setsockopt_int(fd, SOL_TCP, TCP_KEEPCNT, s->keep_alive_cnt);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "TCP_KEEPCNT failed: %m");
}
if (s->defer_accept > 0) {
- int value = s->defer_accept / USEC_PER_SEC;
- if (setsockopt(fd, SOL_TCP, TCP_DEFER_ACCEPT, &value, sizeof(value)) < 0)
- log_unit_warning_errno(UNIT(s), errno, "TCP_DEFER_ACCEPT failed: %m");
+ r = setsockopt_int(fd, SOL_TCP, TCP_DEFER_ACCEPT, s->defer_accept / USEC_PER_SEC);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "TCP_DEFER_ACCEPT failed: %m");
}
if (s->no_delay) {
- int one = 1;
-
if (s->socket_protocol == IPPROTO_SCTP) {
- if (setsockopt(fd, SOL_SCTP, SCTP_NODELAY, &one, sizeof(one)) < 0)
- log_unit_warning_errno(UNIT(s), errno, "SCTP_NODELAY failed: %m");
+ r = setsockopt_int(fd, SOL_SCTP, SCTP_NODELAY, true);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "SCTP_NODELAY failed: %m");
} else {
- if (setsockopt(fd, SOL_TCP, TCP_NODELAY, &one, sizeof(one)) < 0)
- log_unit_warning_errno(UNIT(s), errno, "TCP_NODELAY failed: %m");
+ r = setsockopt_int(fd, SOL_TCP, TCP_NODELAY, true);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "TCP_NODELAY failed: %m");
}
}
if (s->broadcast) {
- int one = 1;
- if (setsockopt(fd, SOL_SOCKET, SO_BROADCAST, &one, sizeof(one)) < 0)
- log_unit_warning_errno(UNIT(s), errno, "SO_BROADCAST failed: %m");
+ r = setsockopt_int(fd, SOL_SOCKET, SO_BROADCAST, true);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "SO_BROADCAST failed: %m");
}
if (s->pass_cred) {
- int one = 1;
- if (setsockopt(fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one)) < 0)
- log_unit_warning_errno(UNIT(s), errno, "SO_PASSCRED failed: %m");
+ r = setsockopt_int(fd, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "SO_PASSCRED failed: %m");
}
if (s->pass_sec) {
- int one = 1;
- if (setsockopt(fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one)) < 0)
- log_unit_warning_errno(UNIT(s), errno, "SO_PASSSEC failed: %m");
+ r = setsockopt_int(fd, SOL_SOCKET, SO_PASSSEC, true);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "SO_PASSSEC failed: %m");
}
- if (s->priority >= 0)
- if (setsockopt(fd, SOL_SOCKET, SO_PRIORITY, &s->priority, sizeof(s->priority)) < 0)
- log_unit_warning_errno(UNIT(s), errno, "SO_PRIORITY failed: %m");
+ if (s->priority >= 0) {
+ r = setsockopt_int(fd, SOL_SOCKET, SO_PRIORITY, s->priority);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "SO_PRIORITY failed: %m");
+ }
if (s->receive_buffer > 0) {
- int value = (int) s->receive_buffer;
-
/* We first try with SO_RCVBUFFORCE, in case we have the perms for that */
- if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &value, sizeof(value)) < 0)
- if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &value, sizeof(value)) < 0)
- log_unit_warning_errno(UNIT(s), errno, "SO_RCVBUF failed: %m");
+ if (setsockopt_int(fd, SOL_SOCKET, SO_RCVBUFFORCE, s->receive_buffer) < 0) {
+ r = setsockopt_int(fd, SOL_SOCKET, SO_RCVBUF, s->receive_buffer);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "SO_RCVBUF failed: %m");
+ }
}
if (s->send_buffer > 0) {
- int value = (int) s->send_buffer;
- if (setsockopt(fd, SOL_SOCKET, SO_SNDBUFFORCE, &value, sizeof(value)) < 0)
- if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &value, sizeof(value)) < 0)
- log_unit_warning_errno(UNIT(s), errno, "SO_SNDBUF failed: %m");
+ if (setsockopt_int(fd, SOL_SOCKET, SO_SNDBUFFORCE, s->send_buffer) < 0) {
+ r = setsockopt_int(fd, SOL_SOCKET, SO_SNDBUF, s->send_buffer);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "SO_SNDBUF failed: %m");
+ }
}
- if (s->mark >= 0)
- if (setsockopt(fd, SOL_SOCKET, SO_MARK, &s->mark, sizeof(s->mark)) < 0)
- log_unit_warning_errno(UNIT(s), errno, "SO_MARK failed: %m");
+ if (s->mark >= 0) {
+ r = setsockopt_int(fd, SOL_SOCKET, SO_MARK, s->mark);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "SO_MARK failed: %m");
+ }
- if (s->ip_tos >= 0)
- if (setsockopt(fd, IPPROTO_IP, IP_TOS, &s->ip_tos, sizeof(s->ip_tos)) < 0)
- log_unit_warning_errno(UNIT(s), errno, "IP_TOS failed: %m");
+ if (s->ip_tos >= 0) {
+ r = setsockopt_int(fd, IPPROTO_IP, IP_TOS, s->ip_tos);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "IP_TOS failed: %m");
+ }
if (s->ip_ttl >= 0) {
int x;
- r = setsockopt(fd, IPPROTO_IP, IP_TTL, &s->ip_ttl, sizeof(s->ip_ttl));
+ r = setsockopt_int(fd, IPPROTO_IP, IP_TTL, s->ip_ttl);
if (socket_ipv6_is_supported())
- x = setsockopt(fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, &s->ip_ttl, sizeof(s->ip_ttl));
- else {
- x = -1;
- errno = EAFNOSUPPORT;
- }
+ x = setsockopt_int(fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, s->ip_ttl);
+ else
+ x = -EAFNOSUPPORT;
if (r < 0 && x < 0)
- log_unit_warning_errno(UNIT(s), errno, "IP_TTL/IPV6_UNICAST_HOPS failed: %m");
+ log_unit_warning_errno(UNIT(s), r, "IP_TTL/IPV6_UNICAST_HOPS failed: %m");
}
if (s->tcp_congestion)
@@ -1329,7 +1310,7 @@ static int socket_symlink(Socket *s) {
STRV_FOREACH(i, s->symlinks) {
(void) mkdir_parents_label(*i, s->directory_mode);
- r = symlink_idempotent(p, *i);
+ r = symlink_idempotent(p, *i, false);
if (r == -EEXIST && s->remove_on_stop) {
/* If there's already something where we want to create the symlink, and the destructive
@@ -1337,7 +1318,7 @@ static int socket_symlink(Socket *s) {
* again. */
if (unlink(*i) >= 0)
- r = symlink_idempotent(p, *i);
+ r = symlink_idempotent(p, *i, false);
}
if (r < 0)
@@ -1375,8 +1356,10 @@ static int usbffs_dispatch_eps(SocketPort *p) {
n = (size_t) r;
p->auxiliary_fds = new(int, n);
- if (!p->auxiliary_fds)
- return -ENOMEM;
+ if (!p->auxiliary_fds) {
+ r = -ENOMEM;
+ goto clear;
+ }
p->n_auxiliary_fds = n;
@@ -1385,8 +1368,10 @@ static int usbffs_dispatch_eps(SocketPort *p) {
_cleanup_free_ char *ep = NULL;
ep = path_make_absolute(ent[i]->d_name, p->path);
- if (!ep)
- return -ENOMEM;
+ if (!ep) {
+ r = -ENOMEM;
+ goto fail;
+ }
path_simplify(ep, false);
@@ -1395,16 +1380,20 @@ static int usbffs_dispatch_eps(SocketPort *p) {
goto fail;
p->auxiliary_fds[k++] = r;
- free(ent[i]);
}
- return r;
+ r = 0;
+ goto clear;
fail:
close_many(p->auxiliary_fds, k);
p->auxiliary_fds = mfree(p->auxiliary_fds);
p->n_auxiliary_fds = 0;
+clear:
+ for (i = 0; i < n; ++i)
+ free(ent[i]);
+
return r;
}
@@ -1754,6 +1743,9 @@ static void socket_set_state(Socket *s, SocketState state) {
SocketState old_state;
assert(s);
+ if (s->state != state)
+ bus_unit_send_pending_change_signal(UNIT(s), false);
+
old_state = s->state;
s->state = state;
@@ -1866,11 +1858,12 @@ static int socket_coldplug(Unit *u) {
static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
- ExecParameters exec_params = {
- .flags = EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN,
- .stdin_fd = -1,
- .stdout_fd = -1,
- .stderr_fd = -1,
+ _cleanup_(exec_params_clear) ExecParameters exec_params = {
+ .flags = EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN,
+ .stdin_fd = -1,
+ .stdout_fd = -1,
+ .stderr_fd = -1,
+ .exec_fd = -1,
};
pid_t pid;
int r;
@@ -1887,9 +1880,9 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
if (r < 0)
return r;
- unit_set_exec_params(UNIT(s), &exec_params);
-
- exec_params.argv = c->argv;
+ r = unit_set_exec_params(UNIT(s), &exec_params);
+ if (r < 0)
+ return r;
r = exec_spawn(UNIT(s),
c,
@@ -1935,7 +1928,7 @@ static int socket_chown(Socket *s, pid_t *_pid) {
if (!isempty(s->user)) {
const char *user = s->user;
- r = get_user_creds(&user, &uid, &gid, NULL, NULL);
+ r = get_user_creds(&user, &uid, &gid, NULL, NULL, 0);
if (r < 0) {
log_unit_error_errno(UNIT(s), r, "Failed to resolve user %s: %m", user);
_exit(EXIT_USER);
@@ -1945,7 +1938,7 @@ static int socket_chown(Socket *s, pid_t *_pid) {
if (!isempty(s->group)) {
const char *group = s->group;
- r = get_group_creds(&group, &gid);
+ r = get_group_creds(&group, &gid, 0);
if (r < 0) {
log_unit_error_errno(UNIT(s), r, "Failed to resolve group %s: %m", group);
_exit(EXIT_GROUP);
@@ -1990,8 +1983,10 @@ static void socket_enter_dead(Socket *s, SocketResult f) {
if (s->result == SOCKET_SUCCESS)
s->result = f;
- if (s->result != SOCKET_SUCCESS)
- log_unit_warning(UNIT(s), "Failed with result '%s'.", socket_result_to_string(s->result));
+ if (s->result == SOCKET_SUCCESS)
+ unit_log_success(UNIT(s));
+ else
+ unit_log_failure(UNIT(s), socket_result_to_string(s->result));
socket_set_state(s, s->result != SOCKET_SUCCESS ? SOCKET_FAILED : SOCKET_DEAD);
@@ -2459,6 +2454,7 @@ static int socket_start(Unit *u) {
return r;
s->result = SOCKET_SUCCESS;
+ exec_command_reset_status_list_array(s->exec_command, _SOCKET_EXEC_COMMAND_MAX);
u->reset_accounting = true;
@@ -2506,16 +2502,16 @@ static int socket_serialize(Unit *u, FILE *f, FDSet *fds) {
assert(f);
assert(fds);
- unit_serialize_item(u, f, "state", socket_state_to_string(s->state));
- unit_serialize_item(u, f, "result", socket_result_to_string(s->result));
- unit_serialize_item_format(u, f, "n-accepted", "%u", s->n_accepted);
- unit_serialize_item_format(u, f, "n-refused", "%u", s->n_refused);
+ (void) serialize_item(f, "state", socket_state_to_string(s->state));
+ (void) serialize_item(f, "result", socket_result_to_string(s->result));
+ (void) serialize_item_format(f, "n-accepted", "%u", s->n_accepted);
+ (void) serialize_item_format(f, "n-refused", "%u", s->n_refused);
if (s->control_pid > 0)
- unit_serialize_item_format(u, f, "control-pid", PID_FMT, s->control_pid);
+ (void) serialize_item_format(f, "control-pid", PID_FMT, s->control_pid);
if (s->control_command_id >= 0)
- unit_serialize_item(u, f, "control-command", socket_exec_command_to_string(s->control_command_id));
+ (void) serialize_item(f, "control-command", socket_exec_command_to_string(s->control_command_id));
LIST_FOREACH(port, p, s->ports) {
int copy;
@@ -2525,29 +2521,28 @@ static int socket_serialize(Unit *u, FILE *f, FDSet *fds) {
copy = fdset_put_dup(fds, p->fd);
if (copy < 0)
- return copy;
+ return log_warning_errno(copy, "Failed to serialize socket fd: %m");
if (p->type == SOCKET_SOCKET) {
_cleanup_free_ char *t = NULL;
r = socket_address_print(&p->address, &t);
if (r < 0)
- return r;
+ return log_error_errno(r, "Failed to format socket address: %m");
if (socket_address_family(&p->address) == AF_NETLINK)
- unit_serialize_item_format(u, f, "netlink", "%i %s", copy, t);
+ (void) serialize_item_format(f, "netlink", "%i %s", copy, t);
else
- unit_serialize_item_format(u, f, "socket", "%i %i %s", copy, p->address.type, t);
-
+ (void) serialize_item_format(f, "socket", "%i %i %s", copy, p->address.type, t);
} else if (p->type == SOCKET_SPECIAL)
- unit_serialize_item_format(u, f, "special", "%i %s", copy, p->path);
+ (void) serialize_item_format(f, "special", "%i %s", copy, p->path);
else if (p->type == SOCKET_MQUEUE)
- unit_serialize_item_format(u, f, "mqueue", "%i %s", copy, p->path);
+ (void) serialize_item_format(f, "mqueue", "%i %s", copy, p->path);
else if (p->type == SOCKET_USB_FUNCTION)
- unit_serialize_item_format(u, f, "ffs", "%i %s", copy, p->path);
+ (void) serialize_item_format(f, "ffs", "%i %s", copy, p->path);
else {
assert(p->type == SOCKET_FIFO);
- unit_serialize_item_format(u, f, "fifo", "%i %s", copy, p->path);
+ (void) serialize_item_format(f, "fifo", "%i %s", copy, p->path);
}
}
@@ -2555,6 +2550,8 @@ static int socket_serialize(Unit *u, FILE *f, FDSet *fds) {
}
static void socket_port_take_fd(SocketPort *p, FDSet *fds, int fd) {
+ assert(p);
+
safe_close(p->fd);
p->fd = fdset_remove(fds, fd);
}
@@ -2971,9 +2968,11 @@ static void socket_sigchld_event(Unit *u, pid_t pid, int code, int status) {
f = SOCKET_SUCCESS;
}
- log_unit_full(u, f == SOCKET_SUCCESS ? LOG_DEBUG : LOG_NOTICE, 0,
- "Control process exited, code=%s status=%i",
- sigchld_code_to_string(code), status);
+ unit_log_process_exit(
+ u, f == SOCKET_SUCCESS ? LOG_DEBUG : LOG_NOTICE,
+ "Control process",
+ socket_exec_command_to_string(s->control_command_id),
+ code, status);
if (s->result == SOCKET_SUCCESS)
s->result = f;
diff --git a/src/core/swap.c b/src/core/swap.c
index b78b1aa266..2d8463b8b1 100644
--- a/src/core/swap.c
+++ b/src/core/swap.c
@@ -5,10 +5,13 @@
#include <sys/stat.h>
#include <unistd.h>
-#include "libudev.h"
+#include "sd-device.h"
#include "alloc-util.h"
#include "dbus-swap.h"
+#include "dbus-unit.h"
+#include "device-private.h"
+#include "device-util.h"
#include "device.h"
#include "escape.h"
#include "exit-status.h"
@@ -18,11 +21,11 @@
#include "parse-util.h"
#include "path-util.h"
#include "process-util.h"
+#include "serialize.h"
#include "special.h"
#include "string-table.h"
#include "string-util.h"
#include "swap.h"
-#include "udev-util.h"
#include "unit-name.h"
#include "unit.h"
#include "virt.h"
@@ -196,7 +199,7 @@ static int swap_add_device_dependencies(Swap *s) {
/* File based swap devices need to be ordered after
* systemd-remount-fs.service, since they might need a
* writable file system. */
- return unit_add_dependency_by_name(UNIT(s), UNIT_AFTER, SPECIAL_REMOUNT_FS_SERVICE, NULL, true, UNIT_DEPENDENCY_FILE);
+ return unit_add_dependency_by_name(UNIT(s), UNIT_AFTER, SPECIAL_REMOUNT_FS_SERVICE, true, UNIT_DEPENDENCY_FILE);
}
static int swap_add_default_dependencies(Swap *s) {
@@ -215,11 +218,11 @@ static int swap_add_default_dependencies(Swap *s) {
/* swap units generated for the swap dev links are missing the
* ordering dep against the swap target. */
- r = unit_add_dependency_by_name(UNIT(s), UNIT_BEFORE, SPECIAL_SWAP_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT);
+ r = unit_add_dependency_by_name(UNIT(s), UNIT_BEFORE, SPECIAL_SWAP_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
if (r < 0)
return r;
- return unit_add_two_dependencies_by_name(UNIT(s), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_UMOUNT_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT);
+ return unit_add_two_dependencies_by_name(UNIT(s), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_UMOUNT_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
}
static int swap_verify(Swap *s) {
@@ -247,7 +250,7 @@ static int swap_verify(Swap *s) {
}
static int swap_load_devnode(Swap *s) {
- _cleanup_(udev_device_unrefp) struct udev_device *d = NULL;
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
struct stat st;
const char *p;
int r;
@@ -257,91 +260,105 @@ static int swap_load_devnode(Swap *s) {
if (stat(s->what, &st) < 0 || !S_ISBLK(st.st_mode))
return 0;
- r = udev_device_new_from_stat_rdev(UNIT(s)->manager->udev, &st, &d);
+ r = device_new_from_stat_rdev(&d, &st);
if (r < 0) {
log_unit_full(UNIT(s), r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to allocate udev device for swap %s: %m", s->what);
+ "Failed to allocate device for swap %s: %m", s->what);
return 0;
}
- p = udev_device_get_devnode(d);
- if (!p)
+ if (sd_device_get_devname(d, &p) < 0)
return 0;
return swap_set_devnode(s, p);
}
-static int swap_load(Unit *u) {
+static int swap_add_extras(Swap *s) {
int r;
- Swap *s = SWAP(u);
assert(s);
- assert(u->load_state == UNIT_STUB);
- /* Load a .swap file */
- if (SWAP(u)->from_proc_swaps)
- r = unit_load_fragment_and_dropin_optional(u);
- else
- r = unit_load_fragment_and_dropin(u);
+ if (UNIT(s)->fragment_path)
+ s->from_fragment = true;
+
+ if (!s->what) {
+ if (s->parameters_fragment.what)
+ s->what = strdup(s->parameters_fragment.what);
+ else if (s->parameters_proc_swaps.what)
+ s->what = strdup(s->parameters_proc_swaps.what);
+ else {
+ r = unit_name_to_path(UNIT(s)->id, &s->what);
+ if (r < 0)
+ return r;
+ }
+
+ if (!s->what)
+ return -ENOMEM;
+ }
+
+ path_simplify(s->what, false);
+
+ if (!UNIT(s)->description) {
+ r = unit_set_description(UNIT(s), s->what);
+ if (r < 0)
+ return r;
+ }
+
+ r = unit_require_mounts_for(UNIT(s), s->what, UNIT_DEPENDENCY_IMPLICIT);
if (r < 0)
return r;
- if (u->load_state == UNIT_LOADED) {
-
- if (UNIT(s)->fragment_path)
- s->from_fragment = true;
+ r = swap_add_device_dependencies(s);
+ if (r < 0)
+ return r;
- if (!s->what) {
- if (s->parameters_fragment.what)
- s->what = strdup(s->parameters_fragment.what);
- else if (s->parameters_proc_swaps.what)
- s->what = strdup(s->parameters_proc_swaps.what);
- else {
- r = unit_name_to_path(u->id, &s->what);
- if (r < 0)
- return r;
- }
+ r = swap_load_devnode(s);
+ if (r < 0)
+ return r;
- if (!s->what)
- return -ENOMEM;
- }
+ r = unit_patch_contexts(UNIT(s));
+ if (r < 0)
+ return r;
- path_simplify(s->what, false);
+ r = unit_add_exec_dependencies(UNIT(s), &s->exec_context);
+ if (r < 0)
+ return r;
- if (!UNIT(s)->description) {
- r = unit_set_description(u, s->what);
- if (r < 0)
- return r;
- }
+ r = unit_set_default_slice(UNIT(s));
+ if (r < 0)
+ return r;
- r = unit_require_mounts_for(UNIT(s), s->what, UNIT_DEPENDENCY_IMPLICIT);
- if (r < 0)
- return r;
+ r = swap_add_default_dependencies(s);
+ if (r < 0)
+ return r;
- r = swap_add_device_dependencies(s);
- if (r < 0)
- return r;
+ return 0;
+}
- r = swap_load_devnode(s);
- if (r < 0)
- return r;
+static int swap_load(Unit *u) {
+ Swap *s = SWAP(u);
+ int r, q;
- r = unit_patch_contexts(u);
- if (r < 0)
- return r;
+ assert(s);
+ assert(u->load_state == UNIT_STUB);
- r = unit_add_exec_dependencies(u, &s->exec_context);
- if (r < 0)
- return r;
+ /* Load a .swap file */
+ if (SWAP(u)->from_proc_swaps)
+ r = unit_load_fragment_and_dropin_optional(u);
+ else
+ r = unit_load_fragment_and_dropin(u);
- r = unit_set_default_slice(u);
- if (r < 0)
- return r;
+ /* Add in some extras, and do so either when we successfully loaded something or when /proc/swaps is already
+ * active. */
+ if (u->load_state == UNIT_LOADED || s->from_proc_swaps)
+ q = swap_add_extras(s);
+ else
+ q = 0;
- r = swap_add_default_dependencies(s);
- if (r < 0)
- return r;
- }
+ if (r < 0)
+ return r;
+ if (q < 0)
+ return q;
return swap_verify(s);
}
@@ -368,13 +385,12 @@ static int swap_setup_unit(
return log_unit_error_errno(u, r, "Failed to generate unit name from path: %m");
u = manager_get_unit(m, e);
-
if (u &&
SWAP(u)->from_proc_swaps &&
- !path_equal(SWAP(u)->parameters_proc_swaps.what, what_proc_swaps)) {
- log_error("Swap %s appeared twice with different device paths %s and %s", e, SWAP(u)->parameters_proc_swaps.what, what_proc_swaps);
- return -EEXIST;
- }
+ !path_equal(SWAP(u)->parameters_proc_swaps.what, what_proc_swaps))
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST),
+ "Swap %s appeared twice with different device paths %s and %s",
+ e, SWAP(u)->parameters_proc_swaps.what, what_proc_swaps);
if (!u) {
delete = true;
@@ -403,6 +419,13 @@ static int swap_setup_unit(
}
}
+ /* The unit is definitely around now, mark it as loaded if it was previously referenced but could not be
+ * loaded. After all we can load it now, from the data in /proc/swaps. */
+ if (IN_SET(u->load_state, UNIT_NOT_FOUND, UNIT_BAD_SETTING, UNIT_ERROR)) {
+ u->load_state = UNIT_LOADED;
+ u->load_error = 0;
+ }
+
if (set_flags) {
SWAP(u)->is_active = true;
SWAP(u)->just_activated = !SWAP(u)->from_proc_swaps;
@@ -425,10 +448,9 @@ fail:
}
static int swap_process_new(Manager *m, const char *device, int prio, bool set_flags) {
- _cleanup_(udev_device_unrefp) struct udev_device *d = NULL;
- struct udev_list_entry *item = NULL, *first = NULL;
- const char *dn;
- struct stat st;
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ const char *dn, *devlink;
+ struct stat st, st_link;
int r;
assert(m);
@@ -442,41 +464,36 @@ static int swap_process_new(Manager *m, const char *device, int prio, bool set_f
if (stat(device, &st) < 0 || !S_ISBLK(st.st_mode))
return 0;
- r = udev_device_new_from_stat_rdev(m->udev, &st, &d);
+ r = device_new_from_stat_rdev(&d, &st);
if (r < 0) {
log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to allocate udev device for swap %s: %m", device);
+ "Failed to allocate device for swap %s: %m", device);
return 0;
}
/* Add the main device node */
- dn = udev_device_get_devnode(d);
- if (dn && !streq(dn, device))
+ if (sd_device_get_devname(d, &dn) >= 0 && !streq(dn, device))
swap_setup_unit(m, dn, device, prio, set_flags);
/* Add additional units for all symlinks */
- first = udev_device_get_devlinks_list_entry(d);
- udev_list_entry_foreach(item, first) {
- const char *p;
+ FOREACH_DEVICE_DEVLINK(d, devlink) {
/* Don't bother with the /dev/block links */
- p = udev_list_entry_get_name(item);
-
- if (streq(p, device))
+ if (streq(devlink, device))
continue;
- if (path_startswith(p, "/dev/block/"))
+ if (path_startswith(devlink, "/dev/block/"))
continue;
- if (stat(p, &st) >= 0)
- if (!S_ISBLK(st.st_mode) ||
- st.st_rdev != udev_device_get_devnum(d))
- continue;
+ if (stat(devlink, &st_link) >= 0 &&
+ (!S_ISBLK(st_link.st_mode) ||
+ st_link.st_rdev != st.st_rdev))
+ continue;
- swap_setup_unit(m, p, device, prio, set_flags);
+ swap_setup_unit(m, devlink, device, prio, set_flags);
}
- return r;
+ return 0;
}
static void swap_set_state(Swap *s, SwapState state) {
@@ -485,6 +502,9 @@ static void swap_set_state(Swap *s, SwapState state) {
assert(s);
+ if (s->state != state)
+ bus_unit_send_pending_change_signal(UNIT(s), false);
+
old_state = s->state;
s->state = state;
@@ -601,11 +621,12 @@ static void swap_dump(Unit *u, FILE *f, const char *prefix) {
static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
- ExecParameters exec_params = {
+ _cleanup_(exec_params_clear) ExecParameters exec_params = {
.flags = EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN,
.stdin_fd = -1,
.stdout_fd = -1,
.stderr_fd = -1,
+ .exec_fd = -1,
};
pid_t pid;
int r;
@@ -622,7 +643,9 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
if (r < 0)
goto fail;
- unit_set_exec_params(UNIT(s), &exec_params);
+ r = unit_set_exec_params(UNIT(s), &exec_params);
+ if (r < 0)
+ goto fail;
r = exec_spawn(UNIT(s),
c,
@@ -655,9 +678,7 @@ static void swap_enter_dead(Swap *s, SwapResult f) {
if (s->result == SWAP_SUCCESS)
s->result = f;
- if (s->result != SWAP_SUCCESS)
- log_unit_warning(UNIT(s), "Failed with result '%s'.", swap_result_to_string(s->result));
-
+ unit_log_result(UNIT(s), s->result == SWAP_SUCCESS, swap_result_to_string(s->result));
swap_set_state(s, s->result != SWAP_SUCCESS ? SWAP_FAILED : SWAP_DEAD);
s->exec_runtime = exec_runtime_unref(s->exec_runtime, true);
@@ -813,6 +834,14 @@ fail:
swap_enter_dead_or_active(s, SWAP_FAILURE_RESOURCES);
}
+static void swap_cycle_clear(Swap *s) {
+ assert(s);
+
+ s->result = SWAP_SUCCESS;
+ exec_command_reset_status_array(s->exec_command, _SWAP_EXEC_COMMAND_MAX);
+ UNIT(s)->reset_accounting = true;
+}
+
static int swap_start(Unit *u) {
Swap *s = SWAP(u), *other;
int r;
@@ -852,10 +881,7 @@ static int swap_start(Unit *u) {
if (r < 0)
return r;
- s->result = SWAP_SUCCESS;
-
- u->reset_accounting = true;
-
+ swap_cycle_clear(s);
swap_enter_activating(s);
return 1;
}
@@ -898,14 +924,14 @@ static int swap_serialize(Unit *u, FILE *f, FDSet *fds) {
assert(f);
assert(fds);
- unit_serialize_item(u, f, "state", swap_state_to_string(s->state));
- unit_serialize_item(u, f, "result", swap_result_to_string(s->result));
+ (void) serialize_item(f, "state", swap_state_to_string(s->state));
+ (void) serialize_item(f, "result", swap_result_to_string(s->result));
if (s->control_pid > 0)
- unit_serialize_item_format(u, f, "control-pid", PID_FMT, s->control_pid);
+ (void) serialize_item_format(f, "control-pid", PID_FMT, s->control_pid);
if (s->control_command_id >= 0)
- unit_serialize_item(u, f, "control-command", swap_exec_command_to_string(s->control_command_id));
+ (void) serialize_item(f, "control-command", swap_exec_command_to_string(s->control_command_id));
return 0;
}
@@ -1012,8 +1038,11 @@ static void swap_sigchld_event(Unit *u, pid_t pid, int code, int status) {
s->control_command_id = _SWAP_EXEC_COMMAND_INVALID;
}
- log_unit_full(u, f == SWAP_SUCCESS ? LOG_DEBUG : LOG_NOTICE, 0,
- "Swap process exited, code=%s status=%i", sigchld_code_to_string(code), status);
+ unit_log_process_exit(
+ u, f == SWAP_SUCCESS ? LOG_DEBUG : LOG_NOTICE,
+ "Swap process",
+ swap_exec_command_to_string(s->control_command_id),
+ code, status);
switch (s->state) {
@@ -1084,7 +1113,6 @@ static int swap_dispatch_timer(sd_event_source *source, usec_t usec, void *userd
static int swap_load_proc_swaps(Manager *m, bool set_flags) {
unsigned i;
- int r = 0;
assert(m);
@@ -1116,12 +1144,10 @@ static int swap_load_proc_swaps(Manager *m, bool set_flags) {
device_found_node(m, d, DEVICE_FOUND_SWAP, DEVICE_FOUND_SWAP);
- k = swap_process_new(m, d, prio, set_flags);
- if (k < 0)
- r = k;
+ (void) swap_process_new(m, d, prio, set_flags);
}
- return r;
+ return 0;
}
static int swap_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
@@ -1152,13 +1178,13 @@ static int swap_dispatch_io(sd_event_source *source, int fd, uint32_t revents, v
Swap *swap = SWAP(u);
if (!swap->is_active) {
- /* This has just been deactivated */
swap_unset_proc_swaps(swap);
switch (swap->state) {
case SWAP_ACTIVE:
+ /* This has just been deactivated */
swap_enter_dead(swap, SWAP_SUCCESS);
break;
@@ -1179,7 +1205,8 @@ static int swap_dispatch_io(sd_event_source *source, int fd, uint32_t revents, v
case SWAP_DEAD:
case SWAP_FAILED:
- (void) unit_acquire_invocation_id(UNIT(swap));
+ (void) unit_acquire_invocation_id(u);
+ swap_cycle_clear(swap);
swap_enter_active(swap, SWAP_SUCCESS);
break;
@@ -1320,18 +1347,17 @@ fail:
swap_shutdown(m);
}
-int swap_process_device_new(Manager *m, struct udev_device *dev) {
- struct udev_list_entry *item = NULL, *first = NULL;
+int swap_process_device_new(Manager *m, sd_device *dev) {
_cleanup_free_ char *e = NULL;
- const char *dn;
+ const char *dn, *devlink;
Unit *u;
int r = 0;
assert(m);
assert(dev);
- dn = udev_device_get_devnode(dev);
- if (!dn)
+ r = sd_device_get_devname(dev, &dn);
+ if (r < 0)
return 0;
r = unit_name_from_path(dn, ".swap", &e);
@@ -1342,12 +1368,11 @@ int swap_process_device_new(Manager *m, struct udev_device *dev) {
if (u)
r = swap_set_devnode(SWAP(u), dn);
- first = udev_device_get_devlinks_list_entry(dev);
- udev_list_entry_foreach(item, first) {
+ FOREACH_DEVICE_DEVLINK(dev, devlink) {
_cleanup_free_ char *n = NULL;
int q;
- q = unit_name_from_path(udev_list_entry_get_name(item), ".swap", &n);
+ q = unit_name_from_path(devlink, ".swap", &n);
if (q < 0)
return q;
@@ -1362,13 +1387,13 @@ int swap_process_device_new(Manager *m, struct udev_device *dev) {
return r;
}
-int swap_process_device_remove(Manager *m, struct udev_device *dev) {
+int swap_process_device_remove(Manager *m, sd_device *dev) {
const char *dn;
int r = 0;
Swap *s;
- dn = udev_device_get_devnode(dev);
- if (!dn)
+ r = sd_device_get_devname(dev, &dn);
+ if (r < 0)
return 0;
while ((s = hashmap_get(m->swaps_by_devnode, dn))) {
diff --git a/src/core/swap.h b/src/core/swap.h
index 1c0c7fcadc..1a4b60b957 100644
--- a/src/core/swap.h
+++ b/src/core/swap.h
@@ -5,7 +5,7 @@
Copyright © 2010 Maarten Lankhorst
***/
-#include "libudev.h"
+#include "sd-device.h"
#include "unit.h"
typedef struct Swap Swap;
@@ -85,8 +85,8 @@ struct Swap {
extern const UnitVTable swap_vtable;
-int swap_process_device_new(Manager *m, struct udev_device *dev);
-int swap_process_device_remove(Manager *m, struct udev_device *dev);
+int swap_process_device_new(Manager *m, sd_device *dev);
+int swap_process_device_remove(Manager *m, sd_device *dev);
const char* swap_exec_command_to_string(SwapExecCommand i) _const_;
SwapExecCommand swap_exec_command_from_string(const char *s) _pure_;
diff --git a/src/core/system.conf.in b/src/core/system.conf.in
index f0a59a79a5..0a58737b82 100644
--- a/src/core/system.conf.in
+++ b/src/core/system.conf.in
@@ -23,9 +23,9 @@
#CrashReboot=no
#CtrlAltDelBurstAction=reboot-force
#CPUAffinity=1 2
-#JoinControllers=cpu,cpuacct net_cls,net_prio
#RuntimeWatchdogSec=0
#ShutdownWatchdogSec=10min
+#WatchdogDevice=
#CapabilityBoundingSet=
#NoNewPrivileges=no
#SystemCallArchitectures=
@@ -52,7 +52,7 @@
#DefaultLimitSTACK=
#DefaultLimitCORE=
#DefaultLimitRSS=
-#DefaultLimitNOFILE=
+#DefaultLimitNOFILE=1024:@HIGH_RLIMIT_NOFILE@
#DefaultLimitAS=
#DefaultLimitNPROC=
#DefaultLimitMEMLOCK=
@@ -62,5 +62,3 @@
#DefaultLimitNICE=
#DefaultLimitRTPRIO=
#DefaultLimitRTTIME=
-#IPAddressAllow=
-#IPAddressDeny=
diff --git a/src/core/target.c b/src/core/target.c
index 6446767504..421a304c73 100644
--- a/src/core/target.c
+++ b/src/core/target.c
@@ -1,12 +1,14 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#include "dbus-target.h"
+#include "dbus-unit.h"
#include "log.h"
+#include "serialize.h"
#include "special.h"
#include "string-util.h"
+#include "target.h"
#include "unit-name.h"
#include "unit.h"
-#include "target.h"
static const UnitActiveState state_translation_table[_TARGET_STATE_MAX] = {
[TARGET_DEAD] = UNIT_INACTIVE,
@@ -17,6 +19,9 @@ static void target_set_state(Target *t, TargetState state) {
TargetState old_state;
assert(t);
+ if (t->state != state)
+ bus_unit_send_pending_change_signal(UNIT(t), false);
+
old_state = t->state;
t->state = state;
@@ -66,7 +71,7 @@ static int target_add_default_dependencies(Target *t) {
return 0;
/* Make sure targets are unloaded on shutdown */
- return unit_add_two_dependencies_by_name(UNIT(t), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT);
+ return unit_add_two_dependencies_by_name(UNIT(t), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
}
static int target_load(Unit *u) {
@@ -144,7 +149,7 @@ static int target_serialize(Unit *u, FILE *f, FDSet *fds) {
assert(f);
assert(fds);
- unit_serialize_item(u, f, "state", target_state_to_string(s->state));
+ (void) serialize_item(f, "state", target_state_to_string(s->state));
return 0;
}
diff --git a/src/core/timer.c b/src/core/timer.c
index db202971d3..d9ba2f76b3 100644
--- a/src/core/timer.c
+++ b/src/core/timer.c
@@ -6,9 +6,11 @@
#include "bus-error.h"
#include "bus-util.h"
#include "dbus-timer.h"
+#include "dbus-unit.h"
#include "fs-util.h"
#include "parse-util.h"
#include "random-util.h"
+#include "serialize.h"
#include "special.h"
#include "string-table.h"
#include "string-util.h"
@@ -88,18 +90,18 @@ static int timer_add_default_dependencies(Timer *t) {
if (!UNIT(t)->default_dependencies)
return 0;
- r = unit_add_dependency_by_name(UNIT(t), UNIT_BEFORE, SPECIAL_TIMERS_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT);
+ r = unit_add_dependency_by_name(UNIT(t), UNIT_BEFORE, SPECIAL_TIMERS_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
if (r < 0)
return r;
if (MANAGER_IS_SYSTEM(UNIT(t)->manager)) {
- r = unit_add_two_dependencies_by_name(UNIT(t), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT);
+ r = unit_add_two_dependencies_by_name(UNIT(t), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
if (r < 0)
return r;
LIST_FOREACH(value, v, t->values) {
if (v->base == TIMER_CALENDAR) {
- r = unit_add_dependency_by_name(UNIT(t), UNIT_AFTER, SPECIAL_TIME_SYNC_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT);
+ r = unit_add_dependency_by_name(UNIT(t), UNIT_AFTER, SPECIAL_TIME_SYNC_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
if (r < 0)
return r;
break;
@@ -107,7 +109,7 @@ static int timer_add_default_dependencies(Timer *t) {
}
}
- return unit_add_two_dependencies_by_name(UNIT(t), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT);
+ return unit_add_two_dependencies_by_name(UNIT(t), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
}
static int timer_add_trigger_dependencies(Timer *t) {
@@ -246,6 +248,9 @@ static void timer_set_state(Timer *t, TimerState state) {
TimerState old_state;
assert(t);
+ if (t->state != state)
+ bus_unit_send_pending_change_signal(UNIT(t), false);
+
old_state = t->state;
t->state = state;
@@ -262,7 +267,7 @@ static void timer_set_state(Timer *t, TimerState state) {
unit_notify(UNIT(t), state_translation_table[old_state], state_translation_table[state], 0);
}
-static void timer_enter_waiting(Timer *t, bool initial);
+static void timer_enter_waiting(Timer *t, bool time_change);
static int timer_coldplug(Unit *u) {
Timer *t = TIMER(u);
@@ -287,9 +292,7 @@ static void timer_enter_dead(Timer *t, TimerResult f) {
if (t->result == TIMER_SUCCESS)
t->result = f;
- if (t->result != TIMER_SUCCESS)
- log_unit_warning(UNIT(t), "Failed with result '%s'.", timer_result_to_string(t->result));
-
+ unit_log_result(UNIT(t), t->result == TIMER_SUCCESS, timer_result_to_string(t->result));
timer_set_state(t, t->result != TIMER_SUCCESS ? TIMER_FAILED : TIMER_DEAD);
}
@@ -332,7 +335,7 @@ static void add_random(Timer *t, usec_t *v) {
log_unit_debug(UNIT(t), "Adding %s random time.", format_timespan(s, sizeof(s), add, 0));
}
-static void timer_enter_waiting(Timer *t, bool initial) {
+static void timer_enter_waiting(Timer *t, bool time_change) {
bool found_monotonic = false, found_realtime = false;
bool leave_around = false;
triple_timestamp ts;
@@ -442,7 +445,8 @@ static void timer_enter_waiting(Timer *t, bool initial) {
v->next_elapse = usec_add(usec_shift_clock(base, CLOCK_MONOTONIC, TIMER_MONOTONIC_CLOCK(t)), v->value);
- if (!initial &&
+ if (dual_timestamp_is_set(&t->last_trigger) &&
+ !time_change &&
v->next_elapse < triple_timestamp_by_clock(&ts, TIMER_MONOTONIC_CLOCK(t)) &&
IN_SET(v->base, TIMER_ACTIVE, TIMER_BOOT, TIMER_STARTUP)) {
/* This is a one time trigger, disable it now */
@@ -640,7 +644,7 @@ static int timer_start(Unit *u) {
}
t->result = TIMER_SUCCESS;
- timer_enter_waiting(t, true);
+ timer_enter_waiting(t, false);
return 1;
}
@@ -661,21 +665,20 @@ static int timer_serialize(Unit *u, FILE *f, FDSet *fds) {
assert(f);
assert(fds);
- unit_serialize_item(u, f, "state", timer_state_to_string(t->state));
- unit_serialize_item(u, f, "result", timer_result_to_string(t->result));
+ (void) serialize_item(f, "state", timer_state_to_string(t->state));
+ (void) serialize_item(f, "result", timer_result_to_string(t->result));
if (t->last_trigger.realtime > 0)
- unit_serialize_item_format(u, f, "last-trigger-realtime", "%" PRIu64, t->last_trigger.realtime);
+ (void) serialize_usec(f, "last-trigger-realtime", t->last_trigger.realtime);
if (t->last_trigger.monotonic > 0)
- unit_serialize_item_format(u, f, "last-trigger-monotonic", "%" PRIu64, t->last_trigger.monotonic);
+ (void) serialize_usec(f, "last-trigger-monotonic", t->last_trigger.monotonic);
return 0;
}
static int timer_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
Timer *t = TIMER(u);
- int r;
assert(u);
assert(key);
@@ -690,6 +693,7 @@ static int timer_deserialize_item(Unit *u, const char *key, const char *value, F
log_unit_debug(u, "Failed to parse state value: %s", value);
else
t->deserialized_state = state;
+
} else if (streq(key, "result")) {
TimerResult f;
@@ -698,19 +702,12 @@ static int timer_deserialize_item(Unit *u, const char *key, const char *value, F
log_unit_debug(u, "Failed to parse result value: %s", value);
else if (f != TIMER_SUCCESS)
t->result = f;
- } else if (streq(key, "last-trigger-realtime")) {
-
- r = safe_atou64(value, &t->last_trigger.realtime);
- if (r < 0)
- log_unit_debug(u, "Failed to parse last-trigger-realtime value: %s", value);
-
- } else if (streq(key, "last-trigger-monotonic")) {
-
- r = safe_atou64(value, &t->last_trigger.monotonic);
- if (r < 0)
- log_unit_debug(u, "Failed to parse last-trigger-monotonic value: %s", value);
- } else
+ } else if (streq(key, "last-trigger-realtime"))
+ (void) deserialize_usec(value, &t->last_trigger.realtime);
+ else if (streq(key, "last-trigger-monotonic"))
+ (void) deserialize_usec(value, &t->last_trigger.monotonic);
+ else
log_unit_debug(u, "Unknown serialization key: %s", key);
return 0;
@@ -811,7 +808,7 @@ static void timer_time_change(Unit *u) {
t->last_trigger.realtime = ts;
log_unit_debug(u, "Time change, recalculating next elapse.");
- timer_enter_waiting(t, false);
+ timer_enter_waiting(t, true);
}
static void timer_timezone_change(Unit *u) {
diff --git a/src/core/transaction.c b/src/core/transaction.c
index 1c7efb207a..486c6a4a05 100644
--- a/src/core/transaction.c
+++ b/src/core/transaction.c
@@ -526,7 +526,9 @@ static int transaction_is_destructive(Transaction *tr, JobMode mode, sd_bus_erro
if (j->unit->job && (mode == JOB_FAIL || j->unit->job->irreversible) &&
job_type_is_conflicting(j->unit->job->type, j->type))
return sd_bus_error_setf(e, BUS_ERROR_TRANSACTION_IS_DESTRUCTIVE,
- "Transaction is destructive.");
+ "Transaction for %s/%s is destructive (%s has '%s' job queued, but '%s' is included in transaction).",
+ tr->anchor_job->unit->id, job_type_to_string(tr->anchor_job->type),
+ j->unit->id, job_type_to_string(j->unit->job->type), job_type_to_string(j->type));
}
return 0;
@@ -695,10 +697,8 @@ int transaction_activate(Transaction *tr, Manager *m, JobMode mode, sd_bus_error
if (r >= 0)
break;
- if (r != -EAGAIN) {
- log_warning("Requested transaction contains an unfixable cyclic ordering dependency: %s", bus_error_message(e, r));
- return r;
- }
+ if (r != -EAGAIN)
+ return log_warning_errno(r, "Requested transaction contains an unfixable cyclic ordering dependency: %s", bus_error_message(e, r));
/* Let's see if the resulting transaction ordering
* graph is still cyclic... */
@@ -712,10 +712,8 @@ int transaction_activate(Transaction *tr, Manager *m, JobMode mode, sd_bus_error
if (r >= 0)
break;
- if (r != -EAGAIN) {
- log_warning("Requested transaction contains unmergeable jobs: %s", bus_error_message(e, r));
- return r;
- }
+ if (r != -EAGAIN)
+ return log_warning_errno(r, "Requested transaction contains unmergeable jobs: %s", bus_error_message(e, r));
/* Seventh step: an entry got dropped, let's garbage
* collect its dependencies. */
@@ -731,10 +729,8 @@ int transaction_activate(Transaction *tr, Manager *m, JobMode mode, sd_bus_error
/* Ninth step: check whether we can actually apply this */
r = transaction_is_destructive(tr, mode, e);
- if (r < 0) {
- log_notice("Requested transaction contradicts existing jobs: %s", bus_error_message(e, r));
- return r;
- }
+ if (r < 0)
+ return log_notice_errno(r, "Requested transaction contradicts existing jobs: %s", bus_error_message(e, r));
/* Tenth step: apply changes */
r = transaction_apply(tr, m, mode);
diff --git a/src/core/umount.c b/src/core/umount.c
index 241fe6fc62..7af0195aab 100644
--- a/src/core/umount.c
+++ b/src/core/umount.c
@@ -13,22 +13,24 @@
/* This needs to be after sys/mount.h :( */
#include <libmount.h>
-#include "libudev.h"
+#include "sd-device.h"
#include "alloc-util.h"
#include "blockdev-util.h"
#include "def.h"
+#include "device-util.h"
#include "escape.h"
#include "fd-util.h"
#include "fstab-util.h"
#include "linux-3.13/dm-ioctl.h"
#include "mount-setup.h"
#include "mount-util.h"
+#include "mountpoint-util.h"
#include "path-util.h"
#include "process-util.h"
#include "signal-util.h"
#include "string-util.h"
-#include "udev-util.h"
+#include "strv.h"
#include "umount.h"
#include "util.h"
#include "virt.h"
@@ -72,7 +74,8 @@ int mount_points_list_get(const char *mountinfo, MountPoint **head) {
for (;;) {
struct libmnt_fs *fs;
- const char *path, *options, *fstype;
+ const char *path, *fstype;
+ _cleanup_free_ char *options = NULL;
_cleanup_free_ char *p = NULL;
unsigned long remount_flags = 0u;
_cleanup_free_ char *remount_options = NULL;
@@ -92,9 +95,25 @@ int mount_points_list_get(const char *mountinfo, MountPoint **head) {
if (cunescape(path, UNESCAPE_RELAX, &p) < 0)
return log_oom();
- options = mnt_fs_get_options(fs);
fstype = mnt_fs_get_fstype(fs);
+ /* Combine the generic VFS options with the FS-specific
+ * options. Duplicates are not a problem here, because the only
+ * options that should come up twice are typically ro/rw, which
+ * are turned into MS_RDONLY or the invertion of it.
+ *
+ * Even if there are duplicates later in mount_option_mangle()
+ * it shouldn't hurt anyways as they override each other.
+ */
+ if (!strextend_with_separator(&options, ",",
+ mnt_fs_get_vfs_options(fs),
+ NULL))
+ return log_oom();
+ if (!strextend_with_separator(&options, ",",
+ mnt_fs_get_fs_options(fs),
+ NULL))
+ return log_oom();
+
/* Ignore mount points we can't unmount because they
* are API or because we are keeping them open (like
* /dev/console). Also, ignore all mounts below API
@@ -104,9 +123,7 @@ int mount_points_list_get(const char *mountinfo, MountPoint **head) {
* unmount these things, hence don't bother. */
if (mount_point_is_api(p) ||
mount_point_ignore(p) ||
- path_startswith(p, "/dev") ||
- path_startswith(p, "/sys") ||
- path_startswith(p, "/proc"))
+ PATH_STARTSWITH_SET(p, "/dev", "/sys", "/proc"))
continue;
/* If we are in a container, don't attempt to
@@ -212,121 +229,105 @@ int swap_list_get(const char *swaps, MountPoint **head) {
}
static int loopback_list_get(MountPoint **head) {
- _cleanup_(udev_enumerate_unrefp) struct udev_enumerate *e = NULL;
- struct udev_list_entry *item = NULL, *first = NULL;
- _cleanup_(udev_unrefp) struct udev *udev = NULL;
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *d;
int r;
assert(head);
- udev = udev_new();
- if (!udev)
- return -ENOMEM;
-
- e = udev_enumerate_new(udev);
- if (!e)
- return -ENOMEM;
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
- r = udev_enumerate_add_match_subsystem(e, "block");
+ r = sd_device_enumerator_allow_uninitialized(e);
if (r < 0)
return r;
- r = udev_enumerate_add_match_sysname(e, "loop*");
+ r = sd_device_enumerator_add_match_subsystem(e, "block", true);
if (r < 0)
return r;
- r = udev_enumerate_add_match_sysattr(e, "loop/backing_file", NULL);
+ r = sd_device_enumerator_add_match_sysname(e, "loop*");
if (r < 0)
return r;
- r = udev_enumerate_scan_devices(e);
+ r = sd_device_enumerator_add_match_sysattr(e, "loop/backing_file", NULL, true);
if (r < 0)
return r;
- first = udev_enumerate_get_list_entry(e);
- udev_list_entry_foreach(item, first) {
- _cleanup_(udev_device_unrefp) struct udev_device *d;
+ FOREACH_DEVICE(e, d) {
+ _cleanup_free_ char *p = NULL;
const char *dn;
- _cleanup_free_ MountPoint *lb = NULL;
-
- d = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item));
- if (!d)
- return -ENOMEM;
+ MountPoint *lb;
- dn = udev_device_get_devnode(d);
- if (!dn)
+ if (sd_device_get_devname(d, &dn) < 0)
continue;
- lb = new0(MountPoint, 1);
+ p = strdup(dn);
+ if (!p)
+ return -ENOMEM;
+
+ lb = new(MountPoint, 1);
if (!lb)
return -ENOMEM;
- r = free_and_strdup(&lb->path, dn);
- if (r < 0)
- return r;
+ *lb = (MountPoint) {
+ .path = TAKE_PTR(p),
+ };
LIST_PREPEND(mount_point, *head, lb);
- lb = NULL;
}
return 0;
}
static int dm_list_get(MountPoint **head) {
- _cleanup_(udev_enumerate_unrefp) struct udev_enumerate *e = NULL;
- struct udev_list_entry *item = NULL, *first = NULL;
- _cleanup_(udev_unrefp) struct udev *udev = NULL;
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *d;
int r;
assert(head);
- udev = udev_new();
- if (!udev)
- return -ENOMEM;
-
- e = udev_enumerate_new(udev);
- if (!e)
- return -ENOMEM;
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
- r = udev_enumerate_add_match_subsystem(e, "block");
+ r = sd_device_enumerator_allow_uninitialized(e);
if (r < 0)
return r;
- r = udev_enumerate_add_match_sysname(e, "dm-*");
+ r = sd_device_enumerator_add_match_subsystem(e, "block", true);
if (r < 0)
return r;
- r = udev_enumerate_scan_devices(e);
+ r = sd_device_enumerator_add_match_sysname(e, "dm-*");
if (r < 0)
return r;
- first = udev_enumerate_get_list_entry(e);
- udev_list_entry_foreach(item, first) {
- _cleanup_(udev_device_unrefp) struct udev_device *d;
- dev_t devnum;
+ FOREACH_DEVICE(e, d) {
+ _cleanup_free_ char *p = NULL;
const char *dn;
- _cleanup_free_ MountPoint *m = NULL;
-
- d = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item));
- if (!d)
- return -ENOMEM;
+ MountPoint *m;
+ dev_t devnum;
- devnum = udev_device_get_devnum(d);
- dn = udev_device_get_devnode(d);
- if (major(devnum) == 0 || !dn)
+ if (sd_device_get_devnum(d, &devnum) < 0 ||
+ sd_device_get_devname(d, &dn) < 0)
continue;
- m = new0(MountPoint, 1);
+ p = strdup(dn);
+ if (!p)
+ return -ENOMEM;
+
+ m = new(MountPoint, 1);
if (!m)
return -ENOMEM;
- m->devnum = devnum;
- r = free_and_strdup(&m->path, dn);
- if (r < 0)
- return r;
+ *m = (MountPoint) {
+ .path = TAKE_PTR(p),
+ .devnum = devnum,
+ };
LIST_PREPEND(mount_point, *head, m);
- m = NULL;
}
return 0;
diff --git a/src/core/unit-printf.c b/src/core/unit-printf.c
index 046e937e92..a8e84ebe80 100644
--- a/src/core/unit-printf.c
+++ b/src/core/unit-printf.c
@@ -12,25 +12,25 @@
#include "unit.h"
#include "user-util.h"
-static int specifier_prefix_and_instance(char specifier, void *data, void *userdata, char **ret) {
- Unit *u = userdata;
+static int specifier_prefix_and_instance(char specifier, const void *data, const void *userdata, char **ret) {
+ const Unit *u = userdata;
assert(u);
return unit_name_to_prefix_and_instance(u->id, ret);
}
-static int specifier_prefix(char specifier, void *data, void *userdata, char **ret) {
- Unit *u = userdata;
+static int specifier_prefix(char specifier, const void *data, const void *userdata, char **ret) {
+ const Unit *u = userdata;
assert(u);
return unit_name_to_prefix(u->id, ret);
}
-static int specifier_prefix_unescaped(char specifier, void *data, void *userdata, char **ret) {
+static int specifier_prefix_unescaped(char specifier, const void *data, const void *userdata, char **ret) {
_cleanup_free_ char *p = NULL;
- Unit *u = userdata;
+ const Unit *u = userdata;
int r;
assert(u);
@@ -42,16 +42,16 @@ static int specifier_prefix_unescaped(char specifier, void *data, void *userdata
return unit_name_unescape(p, ret);
}
-static int specifier_instance_unescaped(char specifier, void *data, void *userdata, char **ret) {
- Unit *u = userdata;
+static int specifier_instance_unescaped(char specifier, const void *data, const void *userdata, char **ret) {
+ const Unit *u = userdata;
assert(u);
return unit_name_unescape(strempty(u->instance), ret);
}
-static int specifier_last_component(char specifier, void *data, void *userdata, char **ret) {
- Unit *u = userdata;
+static int specifier_last_component(char specifier, const void *data, const void *userdata, char **ret) {
+ const Unit *u = userdata;
_cleanup_free_ char *prefix = NULL;
char *dash;
int r;
@@ -70,7 +70,7 @@ static int specifier_last_component(char specifier, void *data, void *userdata,
return 0;
}
-static int specifier_last_component_unescaped(char specifier, void *data, void *userdata, char **ret) {
+static int specifier_last_component_unescaped(char specifier, const void *data, const void *userdata, char **ret) {
_cleanup_free_ char *p = NULL;
int r;
@@ -81,8 +81,8 @@ static int specifier_last_component_unescaped(char specifier, void *data, void *
return unit_name_unescape(p, ret);
}
-static int specifier_filename(char specifier, void *data, void *userdata, char **ret) {
- Unit *u = userdata;
+static int specifier_filename(char specifier, const void *data, const void *userdata, char **ret) {
+ const Unit *u = userdata;
assert(u);
@@ -92,12 +92,12 @@ static int specifier_filename(char specifier, void *data, void *userdata, char *
return unit_name_to_path(u->id, ret);
}
-static void bad_specifier(Unit *u, char specifier) {
+static void bad_specifier(const Unit *u, char specifier) {
log_unit_warning(u, "Specifier '%%%c' used in unit configuration, which is deprecated. Please update your unit file, as it does not work as intended.", specifier);
}
-static int specifier_cgroup(char specifier, void *data, void *userdata, char **ret) {
- Unit *u = userdata;
+static int specifier_cgroup(char specifier, const void *data, const void *userdata, char **ret) {
+ const Unit *u = userdata;
char *n;
assert(u);
@@ -115,8 +115,8 @@ static int specifier_cgroup(char specifier, void *data, void *userdata, char **r
return 0;
}
-static int specifier_cgroup_root(char specifier, void *data, void *userdata, char **ret) {
- Unit *u = userdata;
+static int specifier_cgroup_root(char specifier, const void *data, const void *userdata, char **ret) {
+ const Unit *u = userdata;
char *n;
assert(u);
@@ -131,8 +131,8 @@ static int specifier_cgroup_root(char specifier, void *data, void *userdata, cha
return 0;
}
-static int specifier_cgroup_slice(char specifier, void *data, void *userdata, char **ret) {
- Unit *u = userdata;
+static int specifier_cgroup_slice(char specifier, const void *data, const void *userdata, char **ret) {
+ const Unit *u = userdata;
char *n;
assert(u);
@@ -140,7 +140,7 @@ static int specifier_cgroup_slice(char specifier, void *data, void *userdata, ch
bad_specifier(u, specifier);
if (UNIT_ISSET(u->slice)) {
- Unit *slice;
+ const Unit *slice;
slice = UNIT_DEREF(u->slice);
@@ -157,8 +157,8 @@ static int specifier_cgroup_slice(char specifier, void *data, void *userdata, ch
return 0;
}
-static int specifier_special_directory(char specifier, void *data, void *userdata, char **ret) {
- Unit *u = userdata;
+static int specifier_special_directory(char specifier, const void *data, const void *userdata, char **ret) {
+ const Unit *u = userdata;
char *n = NULL;
assert(u);
@@ -196,6 +196,8 @@ int unit_name_printf(Unit *u, const char* format, char **ret) {
{ 'p', specifier_prefix, NULL },
{ 'i', specifier_string, u->instance },
+ { 'g', specifier_group_name, NULL },
+ { 'G', specifier_group_id, NULL },
{ 'U', specifier_user_id, NULL },
{ 'u', specifier_user_name, NULL },
@@ -264,6 +266,8 @@ int unit_full_printf(Unit *u, const char *format, char **ret) {
{ 'T', specifier_tmp_dir, NULL },
{ 'V', specifier_var_tmp_dir, NULL },
+ { 'g', specifier_group_name, NULL },
+ { 'G', specifier_group_id, NULL },
{ 'U', specifier_user_id, NULL },
{ 'u', specifier_user_name, NULL },
{ 'h', specifier_user_home, NULL },
@@ -282,35 +286,3 @@ int unit_full_printf(Unit *u, const char *format, char **ret) {
return specifier_printf(format, table, u, ret);
}
-
-int unit_full_printf_strv(Unit *u, char **l, char ***ret) {
- size_t n;
- char **r, **i, **j;
- int q;
-
- /* Applies unit_full_printf to every entry in l */
-
- assert(u);
-
- n = strv_length(l);
- r = new(char*, n+1);
- if (!r)
- return -ENOMEM;
-
- for (i = l, j = r; *i; i++, j++) {
- q = unit_full_printf(u, *i, j);
- if (q < 0)
- goto fail;
- }
-
- *j = NULL;
- *ret = r;
- return 0;
-
-fail:
- for (j--; j >= r; j--)
- free(*j);
-
- free(r);
- return q;
-}
diff --git a/src/core/unit-printf.h b/src/core/unit-printf.h
index 5bd1d77bb2..f3dae159d5 100644
--- a/src/core/unit-printf.h
+++ b/src/core/unit-printf.h
@@ -5,4 +5,3 @@
int unit_name_printf(Unit *u, const char* text, char **ret);
int unit_full_printf(Unit *u, const char *text, char **ret);
-int unit_full_printf_strv(Unit *u, char **l, char ***ret);
diff --git a/src/core/unit.c b/src/core/unit.c
index 113205bf25..24b14fbcd6 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -10,8 +10,8 @@
#include "sd-id128.h"
#include "sd-messages.h"
-#include "alloc-util.h"
#include "all-units.h"
+#include "alloc-util.h"
#include "bus-common-errors.h"
#include "bus-util.h"
#include "cgroup-util.h"
@@ -22,6 +22,7 @@
#include "execute.h"
#include "fd-util.h"
#include "fileio-label.h"
+#include "fileio.h"
#include "format-util.h"
#include "fs-util.h"
#include "id128-util.h"
@@ -35,6 +36,7 @@
#include "parse-util.h"
#include "path-util.h"
#include "process-util.h"
+#include "serialize.h"
#include "set.h"
#include "signal-util.h"
#include "sparse-endian.h"
@@ -45,6 +47,8 @@
#include "string-table.h"
#include "string-util.h"
#include "strv.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
#include "umask-util.h"
#include "unit-name.h"
#include "unit.h"
@@ -93,7 +97,8 @@ Unit *unit_new(Manager *m, size_t size) {
u->ref_uid = UID_INVALID;
u->ref_gid = GID_INVALID;
u->cpu_usage_last = NSEC_INFINITY;
- u->cgroup_bpf_state = UNIT_CGROUP_BPF_INVALIDATED;
+ u->cgroup_invalidated_mask |= CGROUP_MASK_BPF_FIREWALL;
+ u->failure_action_exit_status = u->success_action_exit_status = -1;
u->ip_accounting_ingress_map_fd = -1;
u->ip_accounting_egress_map_fd = -1;
@@ -127,7 +132,7 @@ int unit_new_for_name(Manager *m, size_t size, const char *name, Unit **ret) {
return r;
}
-bool unit_has_name(Unit *u, const char *name) {
+bool unit_has_name(const Unit *u, const char *name) {
assert(u);
assert(name);
@@ -438,6 +443,22 @@ void unit_add_to_dbus_queue(Unit *u) {
u->in_dbus_queue = true;
}
+void unit_submit_to_stop_when_unneeded_queue(Unit *u) {
+ assert(u);
+
+ if (u->in_stop_when_unneeded_queue)
+ return;
+
+ if (!u->stop_when_unneeded)
+ return;
+
+ if (!UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u)))
+ return;
+
+ LIST_PREPEND(stop_when_unneeded_queue, u->manager->stop_when_unneeded_queue, u);
+ u->in_stop_when_unneeded_queue = true;
+}
+
static void bidi_set_free(Unit *u, Hashmap *h) {
Unit *other;
Iterator i;
@@ -553,6 +574,14 @@ void unit_free(Unit *u) {
if (!u)
return;
+ if (UNIT_ISSET(u->slice)) {
+ /* A unit is being dropped from the tree, make sure our parent slice recalculates the member mask */
+ unit_invalidate_cgroup_members_masks(UNIT_DEREF(u->slice));
+
+ /* And make sure the parent is realized again, updating cgroup memberships */
+ unit_add_to_cgroup_realize_queue(UNIT_DEREF(u->slice));
+ }
+
u->transient_file = safe_fclose(u->transient_file);
if (!MANAGER_IS_RELOADING(u->manager))
@@ -634,6 +663,9 @@ void unit_free(Unit *u) {
if (u->in_target_deps_queue)
LIST_REMOVE(target_deps_queue, u->manager->target_deps_queue, u);
+ if (u->in_stop_when_unneeded_queue)
+ LIST_REMOVE(stop_when_unneeded_queue, u->manager->stop_when_unneeded_queue, u);
+
safe_close(u->ip_accounting_ingress_map_fd);
safe_close(u->ip_accounting_egress_map_fd);
@@ -647,6 +679,8 @@ void unit_free(Unit *u) {
bpf_program_unref(u->ip_bpf_egress);
bpf_program_unref(u->ip_bpf_egress_installed);
+ bpf_program_unref(u->bpf_device_control_installed);
+
condition_free_list(u->conditions);
condition_free_list(u->asserts);
@@ -943,7 +977,7 @@ int unit_add_exec_dependencies(Unit *u, ExecContext *c) {
assert(u);
assert(c);
- if (c->working_directory) {
+ if (c->working_directory && !c->working_directory_missing_ok) {
r = unit_require_mounts_for(u, c->working_directory, UNIT_DEPENDENCY_FILE);
if (r < 0)
return r;
@@ -990,7 +1024,7 @@ int unit_add_exec_dependencies(Unit *u, ExecContext *c) {
return r;
}
- r = unit_add_dependency_by_name(u, UNIT_AFTER, SPECIAL_TMPFILES_SETUP_SERVICE, NULL, true, UNIT_DEPENDENCY_FILE);
+ r = unit_add_dependency_by_name(u, UNIT_AFTER, SPECIAL_TMPFILES_SETUP_SERVICE, true, UNIT_DEPENDENCY_FILE);
if (r < 0)
return r;
}
@@ -1008,7 +1042,7 @@ int unit_add_exec_dependencies(Unit *u, ExecContext *c) {
/* If syslog or kernel logging is requested, make sure our own
* logging daemon is run first. */
- r = unit_add_dependency_by_name(u, UNIT_AFTER, SPECIAL_JOURNALD_SOCKET, NULL, true, UNIT_DEPENDENCY_FILE);
+ r = unit_add_dependency_by_name(u, UNIT_AFTER, SPECIAL_JOURNALD_SOCKET, true, UNIT_DEPENDENCY_FILE);
if (r < 0)
return r;
@@ -1133,17 +1167,20 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
(void) cg_mask_to_string(u->cgroup_realized_mask, &s);
fprintf(f, "%s\tCGroup realized mask: %s\n", prefix, strnull(s));
}
+
if (u->cgroup_enabled_mask != 0) {
_cleanup_free_ char *s = NULL;
(void) cg_mask_to_string(u->cgroup_enabled_mask, &s);
fprintf(f, "%s\tCGroup enabled mask: %s\n", prefix, strnull(s));
}
+
m = unit_get_own_mask(u);
if (m != 0) {
_cleanup_free_ char *s = NULL;
(void) cg_mask_to_string(m, &s);
fprintf(f, "%s\tCGroup own mask: %s\n", prefix, strnull(s));
}
+
m = unit_get_members_mask(u);
if (m != 0) {
_cleanup_free_ char *s = NULL;
@@ -1151,6 +1188,13 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
fprintf(f, "%s\tCGroup members mask: %s\n", prefix, strnull(s));
}
+ m = unit_get_delegate_mask(u);
+ if (m != 0) {
+ _cleanup_free_ char *s = NULL;
+ (void) cg_mask_to_string(m, &s);
+ fprintf(f, "%s\tCGroup delegate mask: %s\n", prefix, strnull(s));
+ }
+
SET_FOREACH(t, u->names, i)
fprintf(f, "%s\tName: %s\n", prefix, t);
@@ -1184,8 +1228,12 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
if (u->failure_action != EMERGENCY_ACTION_NONE)
fprintf(f, "%s\tFailure Action: %s\n", prefix, emergency_action_to_string(u->failure_action));
+ if (u->failure_action_exit_status >= 0)
+ fprintf(f, "%s\tFailure Action Exit Status: %i\n", prefix, u->failure_action_exit_status);
if (u->success_action != EMERGENCY_ACTION_NONE)
fprintf(f, "%s\tSuccess Action: %s\n", prefix, emergency_action_to_string(u->success_action));
+ if (u->success_action_exit_status >= 0)
+ fprintf(f, "%s\tSuccess Action Exit Status: %i\n", prefix, u->success_action_exit_status);
if (u->job_timeout != USEC_INFINITY)
fprintf(f, "%s\tJob Timeout: %s\n", prefix, format_timespan(timespan, sizeof(timespan), u->job_timeout, 0));
@@ -1379,7 +1427,7 @@ static int unit_add_slice_dependencies(Unit *u) {
if (unit_has_name(u, SPECIAL_ROOT_SLICE))
return 0;
- return unit_add_two_dependencies_by_name(u, UNIT_AFTER, UNIT_REQUIRES, SPECIAL_ROOT_SLICE, NULL, true, mask);
+ return unit_add_two_dependencies_by_name(u, UNIT_AFTER, UNIT_REQUIRES, SPECIAL_ROOT_SLICE, true, mask);
}
static int unit_add_mount_dependencies(Unit *u) {
@@ -1469,6 +1517,9 @@ int unit_load(Unit *u) {
return 0;
if (u->transient_file) {
+ /* Finalize transient file: if this is a transient unit file, as soon as we reach unit_load() the setup
+ * is complete, hence let's synchronize the unit file we just wrote to disk. */
+
r = fflush_and_check(u->transient_file);
if (r < 0)
goto fail;
@@ -1512,7 +1563,8 @@ int unit_load(Unit *u) {
if (u->job_running_timeout != USEC_INFINITY && u->job_running_timeout > u->job_timeout)
log_unit_warning(u, "JobRunningTimeoutSec= is greater than JobTimeoutSec=, it has no effect.");
- unit_update_cgroup_members_masks(u);
+ /* We finished loading, let's ensure our parents recalculate the members mask */
+ unit_invalidate_cgroup_members_masks(u);
}
assert((u->load_state != UNIT_MERGED) == !u->merged_into);
@@ -1587,6 +1639,8 @@ static bool unit_condition_test(Unit *u) {
dual_timestamp_get(&u->condition_timestamp);
u->condition_result = unit_condition_test_list(u, u->conditions, condition_type_to_string);
+ unit_add_to_dbus_queue(u);
+
return u->condition_result;
}
@@ -1596,103 +1650,26 @@ static bool unit_assert_test(Unit *u) {
dual_timestamp_get(&u->assert_timestamp);
u->assert_result = unit_condition_test_list(u, u->asserts, assert_type_to_string);
+ unit_add_to_dbus_queue(u);
+
return u->assert_result;
}
void unit_status_printf(Unit *u, const char *status, const char *unit_status_msg_format) {
- DISABLE_WARNING_FORMAT_NONLITERAL;
- manager_status_printf(u->manager, STATUS_TYPE_NORMAL, status, unit_status_msg_format, unit_description(u));
- REENABLE_WARNING;
-}
-
-_pure_ static const char* unit_get_status_message_format(Unit *u, JobType t) {
- const char *format;
- const UnitStatusMessageFormats *format_table;
-
- assert(u);
- assert(IN_SET(t, JOB_START, JOB_STOP, JOB_RELOAD));
-
- if (t != JOB_RELOAD) {
- format_table = &UNIT_VTABLE(u)->status_message_formats;
- if (format_table) {
- format = format_table->starting_stopping[t == JOB_STOP];
- if (format)
- return format;
- }
- }
-
- /* Return generic strings */
- if (t == JOB_START)
- return "Starting %s.";
- else if (t == JOB_STOP)
- return "Stopping %s.";
- else
- return "Reloading %s.";
-}
-
-static void unit_status_print_starting_stopping(Unit *u, JobType t) {
- const char *format;
-
- assert(u);
-
- /* Reload status messages have traditionally not been printed to console. */
- if (!IN_SET(t, JOB_START, JOB_STOP))
- return;
+ const char *d;
- format = unit_get_status_message_format(u, t);
+ d = unit_description(u);
+ if (log_get_show_color())
+ d = strjoina(ANSI_HIGHLIGHT, d, ANSI_NORMAL);
DISABLE_WARNING_FORMAT_NONLITERAL;
- unit_status_printf(u, "", format);
+ manager_status_printf(u->manager, STATUS_TYPE_NORMAL, status, unit_status_msg_format, d);
REENABLE_WARNING;
}
-static void unit_status_log_starting_stopping_reloading(Unit *u, JobType t) {
- const char *format, *mid;
- char buf[LINE_MAX];
-
- assert(u);
-
- if (!IN_SET(t, JOB_START, JOB_STOP, JOB_RELOAD))
- return;
-
- if (log_on_console())
- return;
-
- /* We log status messages for all units and all operations. */
-
- format = unit_get_status_message_format(u, t);
-
- DISABLE_WARNING_FORMAT_NONLITERAL;
- (void) snprintf(buf, sizeof buf, format, unit_description(u));
- REENABLE_WARNING;
-
- mid = t == JOB_START ? "MESSAGE_ID=" SD_MESSAGE_UNIT_STARTING_STR :
- t == JOB_STOP ? "MESSAGE_ID=" SD_MESSAGE_UNIT_STOPPING_STR :
- "MESSAGE_ID=" SD_MESSAGE_UNIT_RELOADING_STR;
-
- /* Note that we deliberately use LOG_MESSAGE() instead of
- * LOG_UNIT_MESSAGE() here, since this is supposed to mimic
- * closely what is written to screen using the status output,
- * which is supposed the highest level, friendliest output
- * possible, which means we should avoid the low-level unit
- * name. */
- log_struct(LOG_INFO,
- LOG_MESSAGE("%s", buf),
- LOG_UNIT_ID(u),
- LOG_UNIT_INVOCATION_ID(u),
- mid);
-}
-
-void unit_status_emit_starting_stopping_reloading(Unit *u, JobType t) {
- assert(u);
- assert(t >= 0);
- assert(t < _JOB_TYPE_MAX);
-
- unit_status_log_starting_stopping_reloading(u, t);
- unit_status_print_starting_stopping(u, t);
-}
-
int unit_start_limit_test(Unit *u) {
+ const char *reason;
+
assert(u);
if (ratelimit_below(&u->start_limit)) {
@@ -1703,7 +1680,11 @@ int unit_start_limit_test(Unit *u) {
log_unit_warning(u, "Start request repeated too quickly.");
u->start_limit_hit = true;
- return emergency_action(u->manager, u->start_limit_action, u->reboot_arg, "unit failed");
+ reason = strjoina("unit ", u->id, " failed");
+
+ return emergency_action(u->manager, u->start_limit_action,
+ EMERGENCY_ACTION_IS_WATCHDOG|EMERGENCY_ACTION_WARN,
+ u->reboot_arg, -1, reason);
}
bool unit_shall_confirm_spawn(Unit *u) {
@@ -1784,7 +1765,7 @@ int unit_start(Unit *u) {
if (state != UNIT_ACTIVATING &&
!unit_condition_test(u)) {
log_unit_debug(u, "Starting requested but condition failed. Not starting unit.");
- return -EALREADY;
+ return -ECOMM;
}
/* If the asserts failed, fail the entire job */
@@ -1950,55 +1931,71 @@ bool unit_can_reload(Unit *u) {
return UNIT_VTABLE(u)->reload;
}
-static void unit_check_unneeded(Unit *u) {
-
- _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
-
- static const UnitDependency needed_dependencies[] = {
+bool unit_is_unneeded(Unit *u) {
+ static const UnitDependency deps[] = {
UNIT_REQUIRED_BY,
UNIT_REQUISITE_OF,
UNIT_WANTED_BY,
UNIT_BOUND_BY,
};
-
- unsigned j;
- int r;
+ size_t j;
assert(u);
- /* If this service shall be shut down when unneeded then do
- * so. */
-
if (!u->stop_when_unneeded)
- return;
+ return false;
- if (!UNIT_IS_ACTIVE_OR_ACTIVATING(unit_active_state(u)))
- return;
+ /* Don't clean up while the unit is transitioning or is even inactive. */
+ if (!UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u)))
+ return false;
+ if (u->job)
+ return false;
- for (j = 0; j < ELEMENTSOF(needed_dependencies); j++) {
+ for (j = 0; j < ELEMENTSOF(deps); j++) {
Unit *other;
Iterator i;
void *v;
- HASHMAP_FOREACH_KEY(v, other, u->dependencies[needed_dependencies[j]], i)
- if (unit_active_or_pending(other) || unit_will_restart(other))
- return;
- }
+ /* If a dependent unit has a job queued, is active or transitioning, or is marked for
+ * restart, then don't clean this one up. */
- /* If stopping a unit fails continuously we might enter a stop
- * loop here, hence stop acting on the service being
- * unnecessary after a while. */
- if (!ratelimit_below(&u->auto_stop_ratelimit)) {
- log_unit_warning(u, "Unit not needed anymore, but not stopping since we tried this too often recently.");
- return;
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[deps[j]], i) {
+ if (other->job)
+ return false;
+
+ if (!UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(other)))
+ return false;
+
+ if (unit_will_restart(other))
+ return false;
+ }
}
- log_unit_info(u, "Unit not needed anymore. Stopping.");
+ return true;
+}
- /* Ok, nobody needs us anymore. Sniff. Then let's commit suicide */
- r = manager_add_job(u->manager, JOB_STOP, u, JOB_FAIL, &error, NULL);
- if (r < 0)
- log_unit_warning_errno(u, r, "Failed to enqueue stop job, ignoring: %s", bus_error_message(&error, r));
+static void check_unneeded_dependencies(Unit *u) {
+
+ static const UnitDependency deps[] = {
+ UNIT_REQUIRES,
+ UNIT_REQUISITE,
+ UNIT_WANTS,
+ UNIT_BINDS_TO,
+ };
+ size_t j;
+
+ assert(u);
+
+ /* Add all units this unit depends on to the queue that processes StopWhenUnneeded= behaviour. */
+
+ for (j = 0; j < ELEMENTSOF(deps); j++) {
+ Unit *other;
+ Iterator i;
+ void *v;
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[deps[j]], i)
+ unit_submit_to_stop_when_unneeded_queue(other);
+ }
}
static void unit_check_binds_to(Unit *u) {
@@ -2098,29 +2095,6 @@ static void retroactively_stop_dependencies(Unit *u) {
manager_add_job(u->manager, JOB_STOP, other, JOB_REPLACE, NULL, NULL);
}
-static void check_unneeded_dependencies(Unit *u) {
- Unit *other;
- Iterator i;
- void *v;
-
- assert(u);
- assert(UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(u)));
-
- /* Garbage collect services that might not be needed anymore, if enabled */
- HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_REQUIRES], i)
- if (!UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(other)))
- unit_check_unneeded(other);
- HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_WANTS], i)
- if (!UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(other)))
- unit_check_unneeded(other);
- HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_REQUISITE], i)
- if (!UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(other)))
- unit_check_unneeded(other);
- HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_BINDS_TO], i)
- if (!UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(other)))
- unit_check_unneeded(other);
-}
-
void unit_start_on_failure(Unit *u) {
Unit *other;
Iterator i;
@@ -2156,8 +2130,9 @@ void unit_trigger_notify(Unit *u) {
}
static int unit_log_resources(Unit *u) {
-
struct iovec iovec[1 + _CGROUP_IP_ACCOUNTING_METRIC_MAX + 4];
+ bool any_traffic = false, have_ip_accounting = false;
+ _cleanup_free_ char *igress = NULL, *egress = NULL;
size_t n_message_parts = 0, n_iovec = 0;
char* message_parts[3 + 1], *t;
nsec_t nsec = NSEC_INFINITY;
@@ -2190,7 +2165,7 @@ static int unit_log_resources(Unit *u) {
/* Format the CPU time for inclusion in the human language message string */
format_timespan(buf, sizeof(buf), nsec / NSEC_PER_USEC, USEC_PER_MSEC);
- t = strjoin(n_message_parts > 0 ? "consumed " : "Consumed ", buf, " CPU time");
+ t = strjoin("consumed ", buf, " CPU time");
if (!t) {
r = log_oom();
goto finish;
@@ -2209,6 +2184,10 @@ static int unit_log_resources(Unit *u) {
if (value == UINT64_MAX)
continue;
+ have_ip_accounting = true;
+ if (value > 0)
+ any_traffic = true;
+
/* Format IP accounting data for inclusion in the structured log message */
if (asprintf(&t, "%s=%" PRIu64, ip_fields[m], value) < 0) {
r = log_oom();
@@ -2218,22 +2197,41 @@ static int unit_log_resources(Unit *u) {
/* Format the IP accounting data for inclusion in the human language message string, but only for the
* bytes counters (and not for the packets counters) */
- if (m == CGROUP_IP_INGRESS_BYTES)
- t = strjoin(n_message_parts > 0 ? "received " : "Received ",
- format_bytes(buf, sizeof(buf), value),
- " IP traffic");
- else if (m == CGROUP_IP_EGRESS_BYTES)
- t = strjoin(n_message_parts > 0 ? "sent " : "Sent ",
- format_bytes(buf, sizeof(buf), value),
- " IP traffic");
- else
- continue;
- if (!t) {
- r = log_oom();
- goto finish;
+ if (m == CGROUP_IP_INGRESS_BYTES) {
+ assert(!igress);
+ igress = strjoin("received ", format_bytes(buf, sizeof(buf), value), " IP traffic");
+ if (!igress) {
+ r = log_oom();
+ goto finish;
+ }
+ } else if (m == CGROUP_IP_EGRESS_BYTES) {
+ assert(!egress);
+ egress = strjoin("sent ", format_bytes(buf, sizeof(buf), value), " IP traffic");
+ if (!egress) {
+ r = log_oom();
+ goto finish;
+ }
}
+ }
- message_parts[n_message_parts++] = t;
+ if (have_ip_accounting) {
+ if (any_traffic) {
+ if (igress)
+ message_parts[n_message_parts++] = TAKE_PTR(igress);
+ if (egress)
+ message_parts[n_message_parts++] = TAKE_PTR(egress);
+
+ } else {
+ char *k;
+
+ k = strdup("no IP traffic");
+ if (!k) {
+ r = log_oom();
+ goto finish;
+ }
+
+ message_parts[n_message_parts++] = k;
+ }
}
/* Is there any accounting data available at all? */
@@ -2243,7 +2241,7 @@ static int unit_log_resources(Unit *u) {
}
if (n_message_parts == 0)
- t = strjoina("MESSAGE=", u->id, ": Completed");
+ t = strjoina("MESSAGE=", u->id, ": Completed.");
else {
_cleanup_free_ char *joined;
@@ -2255,7 +2253,8 @@ static int unit_log_resources(Unit *u) {
goto finish;
}
- t = strjoina("MESSAGE=", u->id, ": ", joined);
+ joined[0] = ascii_toupper(joined[0]);
+ t = strjoina("MESSAGE=", u->id, ": ", joined, ".");
}
/* The following four fields we allocate on the stack or are static strings, we hence don't want to free them,
@@ -2299,8 +2298,105 @@ static void unit_update_on_console(Unit *u) {
manager_unref_console(u->manager);
}
+static void unit_emit_audit_start(Unit *u) {
+ assert(u);
+
+ if (u->type != UNIT_SERVICE)
+ return;
+
+ /* Write audit record if we have just finished starting up */
+ manager_send_unit_audit(u->manager, u, AUDIT_SERVICE_START, true);
+ u->in_audit = true;
+}
+
+static void unit_emit_audit_stop(Unit *u, UnitActiveState state) {
+ assert(u);
+
+ if (u->type != UNIT_SERVICE)
+ return;
+
+ if (u->in_audit) {
+ /* Write audit record if we have just finished shutting down */
+ manager_send_unit_audit(u->manager, u, AUDIT_SERVICE_STOP, state == UNIT_INACTIVE);
+ u->in_audit = false;
+ } else {
+ /* Hmm, if there was no start record written write it now, so that we always have a nice pair */
+ manager_send_unit_audit(u->manager, u, AUDIT_SERVICE_START, state == UNIT_INACTIVE);
+
+ if (state == UNIT_INACTIVE)
+ manager_send_unit_audit(u->manager, u, AUDIT_SERVICE_STOP, true);
+ }
+}
+
+static bool unit_process_job(Job *j, UnitActiveState ns, UnitNotifyFlags flags) {
+ bool unexpected = false;
+
+ assert(j);
+
+ if (j->state == JOB_WAITING)
+
+ /* So we reached a different state for this job. Let's see if we can run it now if it failed previously
+ * due to EAGAIN. */
+ job_add_to_run_queue(j);
+
+ /* Let's check whether the unit's new state constitutes a finished job, or maybe contradicts a running job and
+ * hence needs to invalidate jobs. */
+
+ switch (j->type) {
+
+ case JOB_START:
+ case JOB_VERIFY_ACTIVE:
+
+ if (UNIT_IS_ACTIVE_OR_RELOADING(ns))
+ job_finish_and_invalidate(j, JOB_DONE, true, false);
+ else if (j->state == JOB_RUNNING && ns != UNIT_ACTIVATING) {
+ unexpected = true;
+
+ if (UNIT_IS_INACTIVE_OR_FAILED(ns))
+ job_finish_and_invalidate(j, ns == UNIT_FAILED ? JOB_FAILED : JOB_DONE, true, false);
+ }
+
+ break;
+
+ case JOB_RELOAD:
+ case JOB_RELOAD_OR_START:
+ case JOB_TRY_RELOAD:
+
+ if (j->state == JOB_RUNNING) {
+ if (ns == UNIT_ACTIVE)
+ job_finish_and_invalidate(j, (flags & UNIT_NOTIFY_RELOAD_FAILURE) ? JOB_FAILED : JOB_DONE, true, false);
+ else if (!IN_SET(ns, UNIT_ACTIVATING, UNIT_RELOADING)) {
+ unexpected = true;
+
+ if (UNIT_IS_INACTIVE_OR_FAILED(ns))
+ job_finish_and_invalidate(j, ns == UNIT_FAILED ? JOB_FAILED : JOB_DONE, true, false);
+ }
+ }
+
+ break;
+
+ case JOB_STOP:
+ case JOB_RESTART:
+ case JOB_TRY_RESTART:
+
+ if (UNIT_IS_INACTIVE_OR_FAILED(ns))
+ job_finish_and_invalidate(j, JOB_DONE, true, false);
+ else if (j->state == JOB_RUNNING && ns != UNIT_DEACTIVATING) {
+ unexpected = true;
+ job_finish_and_invalidate(j, JOB_FAILED, true, false);
+ }
+
+ break;
+
+ default:
+ assert_not_reached("Job type unknown");
+ }
+
+ return unexpected;
+}
+
void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlags flags) {
- bool unexpected;
+ const char *reason;
Manager *m;
assert(u);
@@ -2313,6 +2409,10 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlag
m = u->manager;
+ /* Let's enqueue the change signal early. In case this unit has a job associated we want that this unit is in
+ * the bus queue, so that any job change signal queued will force out the unit change signal first. */
+ unit_add_to_dbus_queue(u);
+
/* Update timestamps for state changes */
if (!MANAGER_IS_RELOADING(m)) {
dual_timestamp_get(&u->state_change_timestamp);
@@ -2329,7 +2429,7 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlag
}
/* Keep track of failed units */
- (void) manager_update_failed_units(u->manager, u, ns == UNIT_FAILED);
+ (void) manager_update_failed_units(m, u, ns == UNIT_FAILED);
/* Make sure the cgroup and state files are always removed when we become inactive */
if (UNIT_IS_INACTIVE_OR_FAILED(ns)) {
@@ -2339,81 +2439,18 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlag
unit_update_on_console(u);
- if (u->job) {
- unexpected = false;
-
- if (u->job->state == JOB_WAITING)
-
- /* So we reached a different state for this
- * job. Let's see if we can run it now if it
- * failed previously due to EAGAIN. */
- job_add_to_run_queue(u->job);
-
- /* Let's check whether this state change constitutes a
- * finished job, or maybe contradicts a running job and
- * hence needs to invalidate jobs. */
-
- switch (u->job->type) {
-
- case JOB_START:
- case JOB_VERIFY_ACTIVE:
-
- if (UNIT_IS_ACTIVE_OR_RELOADING(ns))
- job_finish_and_invalidate(u->job, JOB_DONE, true, false);
- else if (u->job->state == JOB_RUNNING && ns != UNIT_ACTIVATING) {
- unexpected = true;
-
- if (UNIT_IS_INACTIVE_OR_FAILED(ns))
- job_finish_and_invalidate(u->job, ns == UNIT_FAILED ? JOB_FAILED : JOB_DONE, true, false);
- }
-
- break;
-
- case JOB_RELOAD:
- case JOB_RELOAD_OR_START:
- case JOB_TRY_RELOAD:
-
- if (u->job->state == JOB_RUNNING) {
- if (ns == UNIT_ACTIVE)
- job_finish_and_invalidate(u->job, (flags & UNIT_NOTIFY_RELOAD_FAILURE) ? JOB_FAILED : JOB_DONE, true, false);
- else if (!IN_SET(ns, UNIT_ACTIVATING, UNIT_RELOADING)) {
- unexpected = true;
-
- if (UNIT_IS_INACTIVE_OR_FAILED(ns))
- job_finish_and_invalidate(u->job, ns == UNIT_FAILED ? JOB_FAILED : JOB_DONE, true, false);
- }
- }
-
- break;
-
- case JOB_STOP:
- case JOB_RESTART:
- case JOB_TRY_RESTART:
-
- if (UNIT_IS_INACTIVE_OR_FAILED(ns))
- job_finish_and_invalidate(u->job, JOB_DONE, true, false);
- else if (u->job->state == JOB_RUNNING && ns != UNIT_DEACTIVATING) {
- unexpected = true;
- job_finish_and_invalidate(u->job, JOB_FAILED, true, false);
- }
-
- break;
-
- default:
- assert_not_reached("Job type unknown");
- }
-
- } else
- unexpected = true;
-
if (!MANAGER_IS_RELOADING(m)) {
+ bool unexpected;
- /* If this state change happened without being
- * requested by a job, then let's retroactively start
- * or stop dependencies. We skip that step when
- * deserializing, since we don't want to create any
- * additional jobs just because something is already
- * activated. */
+ /* Let's propagate state changes to the job */
+ if (u->job)
+ unexpected = unit_process_job(u->job, ns, flags);
+ else
+ unexpected = true;
+
+ /* If this state change happened without being requested by a job, then let's retroactively start or
+ * stop dependencies. We skip that step when deserializing, since we don't want to create any
+ * additional jobs just because something is already activated. */
if (unexpected) {
if (UNIT_IS_INACTIVE_OR_FAILED(os) && UNIT_IS_ACTIVE_OR_ACTIVATING(ns))
@@ -2423,7 +2460,7 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlag
}
/* stop unneeded units regardless if going down was expected or not */
- if (UNIT_IS_INACTIVE_OR_DEACTIVATING(ns))
+ if (UNIT_IS_INACTIVE_OR_FAILED(ns))
check_unneeded_dependencies(u);
if (ns != os && ns == UNIT_FAILED) {
@@ -2432,46 +2469,18 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlag
if (!(flags & UNIT_NOTIFY_WILL_AUTO_RESTART))
unit_start_on_failure(u);
}
- }
-
- if (UNIT_IS_ACTIVE_OR_RELOADING(ns)) {
- if (u->type == UNIT_SERVICE &&
- !UNIT_IS_ACTIVE_OR_RELOADING(os) &&
- !MANAGER_IS_RELOADING(m)) {
- /* Write audit record if we have just finished starting up */
- manager_send_unit_audit(m, u, AUDIT_SERVICE_START, true);
- u->in_audit = true;
- }
+ if (UNIT_IS_ACTIVE_OR_RELOADING(ns) && !UNIT_IS_ACTIVE_OR_RELOADING(os)) {
+ /* This unit just finished starting up */
- if (!UNIT_IS_ACTIVE_OR_RELOADING(os))
+ unit_emit_audit_start(u);
manager_send_unit_plymouth(m, u);
+ }
- } else {
-
- if (UNIT_IS_INACTIVE_OR_FAILED(ns) &&
- !UNIT_IS_INACTIVE_OR_FAILED(os)
- && !MANAGER_IS_RELOADING(m)) {
-
+ if (UNIT_IS_INACTIVE_OR_FAILED(ns) && !UNIT_IS_INACTIVE_OR_FAILED(os)) {
/* This unit just stopped/failed. */
- if (u->type == UNIT_SERVICE) {
-
- /* Hmm, if there was no start record written
- * write it now, so that we always have a nice
- * pair */
- if (!u->in_audit) {
- manager_send_unit_audit(m, u, AUDIT_SERVICE_START, ns == UNIT_INACTIVE);
- if (ns == UNIT_INACTIVE)
- manager_send_unit_audit(m, u, AUDIT_SERVICE_STOP, true);
- } else
- /* Write audit record if we have just finished shutting down */
- manager_send_unit_audit(m, u, AUDIT_SERVICE_STOP, ns == UNIT_INACTIVE);
-
- u->in_audit = false;
- }
-
- /* Write a log message about consumed resources */
+ unit_emit_audit_stop(u, ns);
unit_log_resources(u);
}
}
@@ -2481,22 +2490,24 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlag
unit_trigger_notify(u);
- if (!MANAGER_IS_RELOADING(u->manager)) {
+ if (!MANAGER_IS_RELOADING(m)) {
/* Maybe we finished startup and are now ready for being stopped because unneeded? */
- unit_check_unneeded(u);
+ unit_submit_to_stop_when_unneeded_queue(u);
/* Maybe we finished startup, but something we needed has vanished? Let's die then. (This happens when
* something BindsTo= to a Type=oneshot unit, as these units go directly from starting to inactive,
* without ever entering started.) */
unit_check_binds_to(u);
- if (os != UNIT_FAILED && ns == UNIT_FAILED)
- (void) emergency_action(u->manager, u->failure_action, u->reboot_arg, "unit failed");
- else if (!UNIT_IS_INACTIVE_OR_FAILED(os) && ns == UNIT_INACTIVE)
- (void) emergency_action(u->manager, u->success_action, u->reboot_arg, "unit succeeded");
+ if (os != UNIT_FAILED && ns == UNIT_FAILED) {
+ reason = strjoina("unit ", u->id, " failed");
+ (void) emergency_action(m, u->failure_action, 0, u->reboot_arg, unit_failure_action_exit_status(u), reason);
+ } else if (!UNIT_IS_INACTIVE_OR_FAILED(os) && ns == UNIT_INACTIVE) {
+ reason = strjoina("unit ", u->id, " succeeded");
+ (void) emergency_action(m, u->success_action, 0, u->reboot_arg, unit_success_action_exit_status(u), reason);
+ }
}
- unit_add_to_dbus_queue(u);
unit_add_to_gc_queue(u);
}
@@ -2882,17 +2893,14 @@ int unit_add_two_dependencies(Unit *u, UnitDependency d, UnitDependency e, Unit
return unit_add_dependency(u, e, other, add_reference, mask);
}
-static int resolve_template(Unit *u, const char *name, const char*path, char **buf, const char **ret) {
+static int resolve_template(Unit *u, const char *name, char **buf, const char **ret) {
int r;
assert(u);
- assert(name || path);
+ assert(name);
assert(buf);
assert(ret);
- if (!name)
- name = basename(path);
-
if (!unit_name_is_valid(name, UNIT_NAME_TEMPLATE)) {
*buf = NULL;
*ret = name;
@@ -2917,38 +2925,38 @@ static int resolve_template(Unit *u, const char *name, const char*path, char **b
return 0;
}
-int unit_add_dependency_by_name(Unit *u, UnitDependency d, const char *name, const char *path, bool add_reference, UnitDependencyMask mask) {
+int unit_add_dependency_by_name(Unit *u, UnitDependency d, const char *name, bool add_reference, UnitDependencyMask mask) {
_cleanup_free_ char *buf = NULL;
Unit *other;
int r;
assert(u);
- assert(name || path);
+ assert(name);
- r = resolve_template(u, name, path, &buf, &name);
+ r = resolve_template(u, name, &buf, &name);
if (r < 0)
return r;
- r = manager_load_unit(u->manager, name, path, NULL, &other);
+ r = manager_load_unit(u->manager, name, NULL, NULL, &other);
if (r < 0)
return r;
return unit_add_dependency(u, d, other, add_reference, mask);
}
-int unit_add_two_dependencies_by_name(Unit *u, UnitDependency d, UnitDependency e, const char *name, const char *path, bool add_reference, UnitDependencyMask mask) {
+int unit_add_two_dependencies_by_name(Unit *u, UnitDependency d, UnitDependency e, const char *name, bool add_reference, UnitDependencyMask mask) {
_cleanup_free_ char *buf = NULL;
Unit *other;
int r;
assert(u);
- assert(name || path);
+ assert(name);
- r = resolve_template(u, name, path, &buf, &name);
+ r = resolve_template(u, name, &buf, &name);
if (r < 0)
return r;
- r = manager_load_unit(u->manager, name, path, NULL, &other);
+ r = manager_load_unit(u->manager, name, NULL, NULL, &other);
if (r < 0)
return r;
@@ -3019,7 +3027,6 @@ int unit_set_slice(Unit *u, Unit *slice) {
}
int unit_set_default_slice(Unit *u) {
- _cleanup_free_ char *b = NULL;
const char *slice_name;
Unit *slice;
int r;
@@ -3047,13 +3054,9 @@ int unit_set_default_slice(Unit *u) {
return -ENOMEM;
if (MANAGER_IS_SYSTEM(u->manager))
- b = strjoin("system-", escaped, ".slice");
+ slice_name = strjoina("system-", escaped, ".slice");
else
- b = strappend(escaped, ".slice");
- if (!b)
- return -ENOMEM;
-
- slice_name = b;
+ slice_name = strjoina(escaped, ".slice");
} else
slice_name =
MANAGER_IS_SYSTEM(u->manager) && !unit_has_name(u, SPECIAL_INIT_SCOPE)
@@ -3178,23 +3181,21 @@ bool unit_can_serialize(Unit *u) {
return UNIT_VTABLE(u)->serialize && UNIT_VTABLE(u)->deserialize_item;
}
-static int unit_serialize_cgroup_mask(FILE *f, const char *key, CGroupMask mask) {
+static int serialize_cgroup_mask(FILE *f, const char *key, CGroupMask mask) {
_cleanup_free_ char *s = NULL;
- int r = 0;
+ int r;
assert(f);
assert(key);
- if (mask != 0) {
- r = cg_mask_to_string(mask, &s);
- if (r >= 0) {
- fputs(key, f);
- fputc('=', f);
- fputs(s, f);
- fputc('\n', f);
- }
- }
- return r;
+ if (mask == 0)
+ return 0;
+
+ r = cg_mask_to_string(mask, &s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to format cgroup mask: %m");
+
+ return serialize_item(f, key, s);
}
static const char *ip_accounting_metric_field[_CGROUP_IP_ACCOUNTING_METRIC_MAX] = {
@@ -3218,46 +3219,50 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
return r;
}
- dual_timestamp_serialize(f, "state-change-timestamp", &u->state_change_timestamp);
+ (void) serialize_dual_timestamp(f, "state-change-timestamp", &u->state_change_timestamp);
- dual_timestamp_serialize(f, "inactive-exit-timestamp", &u->inactive_exit_timestamp);
- dual_timestamp_serialize(f, "active-enter-timestamp", &u->active_enter_timestamp);
- dual_timestamp_serialize(f, "active-exit-timestamp", &u->active_exit_timestamp);
- dual_timestamp_serialize(f, "inactive-enter-timestamp", &u->inactive_enter_timestamp);
+ (void) serialize_dual_timestamp(f, "inactive-exit-timestamp", &u->inactive_exit_timestamp);
+ (void) serialize_dual_timestamp(f, "active-enter-timestamp", &u->active_enter_timestamp);
+ (void) serialize_dual_timestamp(f, "active-exit-timestamp", &u->active_exit_timestamp);
+ (void) serialize_dual_timestamp(f, "inactive-enter-timestamp", &u->inactive_enter_timestamp);
- dual_timestamp_serialize(f, "condition-timestamp", &u->condition_timestamp);
- dual_timestamp_serialize(f, "assert-timestamp", &u->assert_timestamp);
+ (void) serialize_dual_timestamp(f, "condition-timestamp", &u->condition_timestamp);
+ (void) serialize_dual_timestamp(f, "assert-timestamp", &u->assert_timestamp);
if (dual_timestamp_is_set(&u->condition_timestamp))
- unit_serialize_item(u, f, "condition-result", yes_no(u->condition_result));
+ (void) serialize_bool(f, "condition-result", u->condition_result);
if (dual_timestamp_is_set(&u->assert_timestamp))
- unit_serialize_item(u, f, "assert-result", yes_no(u->assert_result));
+ (void) serialize_bool(f, "assert-result", u->assert_result);
- unit_serialize_item(u, f, "transient", yes_no(u->transient));
+ (void) serialize_bool(f, "transient", u->transient);
+ (void) serialize_bool(f, "in-audit", u->in_audit);
- unit_serialize_item(u, f, "exported-invocation-id", yes_no(u->exported_invocation_id));
- unit_serialize_item(u, f, "exported-log-level-max", yes_no(u->exported_log_level_max));
- unit_serialize_item(u, f, "exported-log-extra-fields", yes_no(u->exported_log_extra_fields));
+ (void) serialize_bool(f, "exported-invocation-id", u->exported_invocation_id);
+ (void) serialize_bool(f, "exported-log-level-max", u->exported_log_level_max);
+ (void) serialize_bool(f, "exported-log-extra-fields", u->exported_log_extra_fields);
+ (void) serialize_bool(f, "exported-log-rate-limit-interval", u->exported_log_rate_limit_interval);
+ (void) serialize_bool(f, "exported-log-rate-limit-burst", u->exported_log_rate_limit_burst);
- unit_serialize_item_format(u, f, "cpu-usage-base", "%" PRIu64, u->cpu_usage_base);
+ (void) serialize_item_format(f, "cpu-usage-base", "%" PRIu64, u->cpu_usage_base);
if (u->cpu_usage_last != NSEC_INFINITY)
- unit_serialize_item_format(u, f, "cpu-usage-last", "%" PRIu64, u->cpu_usage_last);
+ (void) serialize_item_format(f, "cpu-usage-last", "%" PRIu64, u->cpu_usage_last);
if (u->cgroup_path)
- unit_serialize_item(u, f, "cgroup", u->cgroup_path);
- unit_serialize_item(u, f, "cgroup-realized", yes_no(u->cgroup_realized));
- (void) unit_serialize_cgroup_mask(f, "cgroup-realized-mask", u->cgroup_realized_mask);
- (void) unit_serialize_cgroup_mask(f, "cgroup-enabled-mask", u->cgroup_enabled_mask);
- unit_serialize_item_format(u, f, "cgroup-bpf-realized", "%i", u->cgroup_bpf_state);
+ (void) serialize_item(f, "cgroup", u->cgroup_path);
+
+ (void) serialize_bool(f, "cgroup-realized", u->cgroup_realized);
+ (void) serialize_cgroup_mask(f, "cgroup-realized-mask", u->cgroup_realized_mask);
+ (void) serialize_cgroup_mask(f, "cgroup-enabled-mask", u->cgroup_enabled_mask);
+ (void) serialize_cgroup_mask(f, "cgroup-invalidated-mask", u->cgroup_invalidated_mask);
if (uid_is_valid(u->ref_uid))
- unit_serialize_item_format(u, f, "ref-uid", UID_FMT, u->ref_uid);
+ (void) serialize_item_format(f, "ref-uid", UID_FMT, u->ref_uid);
if (gid_is_valid(u->ref_gid))
- unit_serialize_item_format(u, f, "ref-gid", GID_FMT, u->ref_gid);
+ (void) serialize_item_format(f, "ref-gid", GID_FMT, u->ref_gid);
if (!sd_id128_is_null(u->invocation_id))
- unit_serialize_item_format(u, f, "invocation-id", SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id));
+ (void) serialize_item_format(f, "invocation-id", SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id));
bus_track_serialize(u->bus_track, f, "ref");
@@ -3266,17 +3271,17 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
r = unit_get_ip_accounting(u, m, &v);
if (r >= 0)
- unit_serialize_item_format(u, f, ip_accounting_metric_field[m], "%" PRIu64, v);
+ (void) serialize_item_format(f, ip_accounting_metric_field[m], "%" PRIu64, v);
}
if (serialize_jobs) {
if (u->job) {
- fprintf(f, "job\n");
+ fputs("job\n", f);
job_serialize(u->job, f);
}
if (u->nop_job) {
- fprintf(f, "job\n");
+ fputs("job\n", f);
job_serialize(u->nop_job, f);
}
}
@@ -3286,78 +3291,27 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
return 0;
}
-int unit_serialize_item(Unit *u, FILE *f, const char *key, const char *value) {
- assert(u);
- assert(f);
- assert(key);
-
- if (!value)
- return 0;
-
- fputs(key, f);
- fputc('=', f);
- fputs(value, f);
- fputc('\n', f);
-
- return 1;
-}
-
-int unit_serialize_item_escaped(Unit *u, FILE *f, const char *key, const char *value) {
- _cleanup_free_ char *c = NULL;
-
- assert(u);
- assert(f);
- assert(key);
-
- if (!value)
- return 0;
-
- c = cescape(value);
- if (!c)
- return -ENOMEM;
-
- fputs(key, f);
- fputc('=', f);
- fputs(c, f);
- fputc('\n', f);
-
- return 1;
-}
-
-int unit_serialize_item_fd(Unit *u, FILE *f, FDSet *fds, const char *key, int fd) {
- int copy;
+static int unit_deserialize_job(Unit *u, FILE *f) {
+ _cleanup_(job_freep) Job *j = NULL;
+ int r;
assert(u);
assert(f);
- assert(key);
-
- if (fd < 0)
- return 0;
- copy = fdset_put_dup(fds, fd);
- if (copy < 0)
- return copy;
+ j = job_new_raw(u);
+ if (!j)
+ return log_oom();
- fprintf(f, "%s=%i\n", key, copy);
- return 1;
-}
-
-void unit_serialize_item_format(Unit *u, FILE *f, const char *key, const char *format, ...) {
- va_list ap;
-
- assert(u);
- assert(f);
- assert(key);
- assert(format);
-
- fputs(key, f);
- fputc('=', f);
+ r = job_deserialize(j, f);
+ if (r < 0)
+ return r;
- va_start(ap, format);
- vfprintf(f, format, ap);
- va_end(ap);
+ r = job_install_deserialized(j);
+ if (r < 0)
+ return r;
- fputc('\n', f);
+ TAKE_PTR(j);
+ return 0;
}
int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
@@ -3368,21 +3322,19 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
assert(fds);
for (;;) {
- char line[LINE_MAX], *l, *v;
+ _cleanup_free_ char *line = NULL;
CGroupIPAccountingMetric m;
+ char *l, *v;
size_t k;
- if (!fgets(line, sizeof(line), f)) {
- if (feof(f))
- return 0;
- return -errno;
- }
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read serialization line: %m");
+ if (r == 0) /* eof */
+ break;
- char_array_0(line);
l = strstrip(line);
-
- /* End marker */
- if (isempty(l))
+ if (isempty(l)) /* End marker */
break;
k = strcspn(l, "=");
@@ -3395,54 +3347,33 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
if (streq(l, "job")) {
if (v[0] == '\0') {
- /* new-style serialized job */
- Job *j;
-
- j = job_new_raw(u);
- if (!j)
- return log_oom();
-
- r = job_deserialize(j, f);
- if (r < 0) {
- job_free(j);
- return r;
- }
-
- r = hashmap_put(u->manager->jobs, UINT32_TO_PTR(j->id), j);
- if (r < 0) {
- job_free(j);
- return r;
- }
-
- r = job_install_deserialized(j);
- if (r < 0) {
- hashmap_remove(u->manager->jobs, UINT32_TO_PTR(j->id));
- job_free(j);
+ /* New-style serialized job */
+ r = unit_deserialize_job(u, f);
+ if (r < 0)
return r;
- }
- } else /* legacy for pre-44 */
+ } else /* Legacy for pre-44 */
log_unit_warning(u, "Update from too old systemd versions are unsupported, cannot deserialize job: %s", v);
continue;
} else if (streq(l, "state-change-timestamp")) {
- dual_timestamp_deserialize(v, &u->state_change_timestamp);
+ (void) deserialize_dual_timestamp(v, &u->state_change_timestamp);
continue;
} else if (streq(l, "inactive-exit-timestamp")) {
- dual_timestamp_deserialize(v, &u->inactive_exit_timestamp);
+ (void) deserialize_dual_timestamp(v, &u->inactive_exit_timestamp);
continue;
} else if (streq(l, "active-enter-timestamp")) {
- dual_timestamp_deserialize(v, &u->active_enter_timestamp);
+ (void) deserialize_dual_timestamp(v, &u->active_enter_timestamp);
continue;
} else if (streq(l, "active-exit-timestamp")) {
- dual_timestamp_deserialize(v, &u->active_exit_timestamp);
+ (void) deserialize_dual_timestamp(v, &u->active_exit_timestamp);
continue;
} else if (streq(l, "inactive-enter-timestamp")) {
- dual_timestamp_deserialize(v, &u->inactive_enter_timestamp);
+ (void) deserialize_dual_timestamp(v, &u->inactive_enter_timestamp);
continue;
} else if (streq(l, "condition-timestamp")) {
- dual_timestamp_deserialize(v, &u->condition_timestamp);
+ (void) deserialize_dual_timestamp(v, &u->condition_timestamp);
continue;
} else if (streq(l, "assert-timestamp")) {
- dual_timestamp_deserialize(v, &u->assert_timestamp);
+ (void) deserialize_dual_timestamp(v, &u->assert_timestamp);
continue;
} else if (streq(l, "condition-result")) {
@@ -3474,6 +3405,16 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
continue;
+ } else if (streq(l, "in-audit")) {
+
+ r = parse_boolean(v);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse in-audit bool %s, ignoring.", v);
+ else
+ u->in_audit = r;
+
+ continue;
+
} else if (streq(l, "exported-invocation-id")) {
r = parse_boolean(v);
@@ -3504,6 +3445,26 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
continue;
+ } else if (streq(l, "exported-log-rate-limit-interval")) {
+
+ r = parse_boolean(v);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse exported log rate limit interval %s, ignoring.", v);
+ else
+ u->exported_log_rate_limit_interval = r;
+
+ continue;
+
+ } else if (streq(l, "exported-log-rate-limit-burst")) {
+
+ r = parse_boolean(v);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse exported log rate limit burst %s, ignoring.", v);
+ else
+ u->exported_log_rate_limit_burst = r;
+
+ continue;
+
} else if (STR_IN_SET(l, "cpu-usage-base", "cpuacct-usage-base")) {
r = safe_atou64(v, &u->cpu_usage_base);
@@ -3554,18 +3515,11 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
log_unit_debug(u, "Failed to parse cgroup-enabled-mask %s, ignoring.", v);
continue;
- } else if (streq(l, "cgroup-bpf-realized")) {
- int i;
+ } else if (streq(l, "cgroup-invalidated-mask")) {
- r = safe_atoi(v, &i);
+ r = cg_mask_from_string(v, &u->cgroup_invalidated_mask);
if (r < 0)
- log_unit_debug(u, "Failed to parse cgroup BPF state %s, ignoring.", v);
- else
- u->cgroup_bpf_state =
- i < 0 ? UNIT_CGROUP_BPF_INVALIDATED :
- i > 0 ? UNIT_CGROUP_BPF_ON :
- UNIT_CGROUP_BPF_OFF;
-
+ log_unit_debug(u, "Failed to parse cgroup-invalidated-mask %s, ignoring.", v);
continue;
} else if (streq(l, "ref-uid")) {
@@ -3588,11 +3542,13 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
else
unit_ref_uid_gid(u, UID_INVALID, gid);
+ continue;
+
} else if (streq(l, "ref")) {
r = strv_extend(&u->deserialized_refs, v);
if (r < 0)
- log_oom();
+ return log_oom();
continue;
} else if (streq(l, "invocation-id")) {
@@ -3657,23 +3613,27 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
return 0;
}
-void unit_deserialize_skip(FILE *f) {
+int unit_deserialize_skip(FILE *f) {
+ int r;
assert(f);
/* Skip serialized data for this unit. We don't know what it is. */
for (;;) {
- char line[LINE_MAX], *l;
+ _cleanup_free_ char *line = NULL;
+ char *l;
- if (!fgets(line, sizeof line, f))
- return;
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read serialization line: %m");
+ if (r == 0)
+ return 0;
- char_array_0(line);
l = strstrip(line);
/* End marker */
if (isempty(l))
- return;
+ return 1;
}
}
@@ -4143,12 +4103,28 @@ int unit_patch_contexts(Unit *u) {
}
cc = unit_get_cgroup_context(u);
- if (cc) {
+ if (cc && ec) {
- if (ec &&
- ec->private_devices &&
+ if (ec->private_devices &&
cc->device_policy == CGROUP_AUTO)
cc->device_policy = CGROUP_CLOSED;
+
+ if (ec->root_image &&
+ (cc->device_policy != CGROUP_AUTO || cc->device_allow)) {
+
+ /* When RootImage= is specified, the following devices are touched. */
+ r = cgroup_add_device_allow(cc, "/dev/loop-control", "rw");
+ if (r < 0)
+ return r;
+
+ r = cgroup_add_device_allow(cc, "block-loop", "rwm");
+ if (r < 0)
+ return r;
+
+ r = cgroup_add_device_allow(cc, "block-blkext", "rwm");
+ if (r < 0)
+ return r;
+ }
}
return 0;
@@ -4479,10 +4455,10 @@ static int operation_to_signal(KillContext *c, KillOperation k) {
return c->kill_signal;
case KILL_KILL:
- return SIGKILL;
+ return c->final_kill_signal;
- case KILL_ABORT:
- return SIGABRT;
+ case KILL_WATCHDOG:
+ return c->watchdog_signal;
default:
assert_not_reached("KillOperation unknown");
@@ -4609,7 +4585,6 @@ int unit_kill_context(
int unit_require_mounts_for(Unit *u, const char *path, UnitDependencyMask mask) {
_cleanup_free_ char *p = NULL;
- char *prefix;
UnitDependencyInfo di;
int r;
@@ -4649,7 +4624,7 @@ int unit_require_mounts_for(Unit *u, const char *path, UnitDependencyMask mask)
return r;
p = NULL;
- prefix = alloca(strlen(path) + 1);
+ char prefix[strlen(path) + 1];
PATH_FOREACH_PREFIX_MORE(prefix, path) {
Set *x;
@@ -4766,7 +4741,7 @@ void unit_warn_if_dir_nonempty(Unit *u, const char* where) {
}
int unit_fail_if_noncanonical(Unit *u, const char* where) {
- _cleanup_free_ char *canonical_where;
+ _cleanup_free_ char *canonical_where = NULL;
int r;
assert(u);
@@ -4966,7 +4941,7 @@ void unit_notify_user_lookup(Unit *u, uid_t uid, gid_t gid) {
r = unit_ref_uid_gid(u, uid, gid);
if (r > 0)
- bus_unit_send_change_signal(u);
+ unit_add_to_dbus_queue(u);
}
int unit_set_invocation_id(Unit *u, sd_id128_t id) {
@@ -5020,15 +4995,21 @@ int unit_acquire_invocation_id(Unit *u) {
if (r < 0)
return log_unit_error_errno(u, r, "Failed to set invocation ID for unit: %m");
+ unit_add_to_dbus_queue(u);
return 0;
}
-void unit_set_exec_params(Unit *u, ExecParameters *p) {
+int unit_set_exec_params(Unit *u, ExecParameters *p) {
+ int r;
+
assert(u);
assert(p);
/* Copy parameters from manager */
- p->environment = u->manager->environment;
+ r = manager_get_effective_environment(u->manager, &p->environment);
+ if (r < 0)
+ return r;
+
p->confirm_spawn = manager_get_confirm_spawn(u->manager);
p->cgroup_supported = u->manager->cgroup_supported;
p->prefix = u->manager->prefix;
@@ -5037,6 +5018,8 @@ void unit_set_exec_params(Unit *u, ExecParameters *p) {
/* Copy paramaters from unit */
p->cgroup_path = u->cgroup_path;
SET_FLAG(p->flags, EXEC_CGROUP_DELEGATE, unit_cgroup_delegate(u));
+
+ return 0;
}
int unit_fork_helper_process(Unit *u, const char *name, pid_t *ret) {
@@ -5242,6 +5225,60 @@ fail:
return r;
}
+static int unit_export_log_rate_limit_interval(Unit *u, const ExecContext *c) {
+ _cleanup_free_ char *buf = NULL;
+ const char *p;
+ int r;
+
+ assert(u);
+ assert(c);
+
+ if (u->exported_log_rate_limit_interval)
+ return 0;
+
+ if (c->log_rate_limit_interval_usec == 0)
+ return 0;
+
+ p = strjoina("/run/systemd/units/log-rate-limit-interval:", u->id);
+
+ if (asprintf(&buf, "%" PRIu64, c->log_rate_limit_interval_usec) < 0)
+ return log_oom();
+
+ r = symlink_atomic(buf, p);
+ if (r < 0)
+ return log_unit_debug_errno(u, r, "Failed to create log rate limit interval symlink %s: %m", p);
+
+ u->exported_log_rate_limit_interval = true;
+ return 0;
+}
+
+static int unit_export_log_rate_limit_burst(Unit *u, const ExecContext *c) {
+ _cleanup_free_ char *buf = NULL;
+ const char *p;
+ int r;
+
+ assert(u);
+ assert(c);
+
+ if (u->exported_log_rate_limit_burst)
+ return 0;
+
+ if (c->log_rate_limit_burst == 0)
+ return 0;
+
+ p = strjoina("/run/systemd/units/log-rate-limit-burst:", u->id);
+
+ if (asprintf(&buf, "%u", c->log_rate_limit_burst) < 0)
+ return log_oom();
+
+ r = symlink_atomic(buf, p);
+ if (r < 0)
+ return log_unit_debug_errno(u, r, "Failed to create log rate limit burst symlink %s: %m", p);
+
+ u->exported_log_rate_limit_burst = true;
+ return 0;
+}
+
void unit_export_state_files(Unit *u) {
const ExecContext *c;
@@ -5253,7 +5290,7 @@ void unit_export_state_files(Unit *u) {
if (!MANAGER_IS_SYSTEM(u->manager))
return;
- if (u->manager->test_run_flags != 0)
+ if (MANAGER_IS_TEST_RUN(u->manager))
return;
/* Exports a couple of unit properties to /run/systemd/units/, so that journald can quickly query this data
@@ -5275,6 +5312,8 @@ void unit_export_state_files(Unit *u) {
if (c) {
(void) unit_export_log_level_max(u, c);
(void) unit_export_log_extra_fields(u, c);
+ (void) unit_export_log_rate_limit_interval(u, c);
+ (void) unit_export_log_rate_limit_burst(u, c);
}
}
@@ -5311,6 +5350,20 @@ void unit_unlink_state_files(Unit *u) {
u->exported_log_extra_fields = false;
}
+
+ if (u->exported_log_rate_limit_interval) {
+ p = strjoina("/run/systemd/units/log-rate-limit-interval:", u->id);
+ (void) unlink(p);
+
+ u->exported_log_rate_limit_interval = false;
+ }
+
+ if (u->exported_log_rate_limit_burst) {
+ p = strjoina("/run/systemd/units/log-rate-limit-burst:", u->id);
+ (void) unlink(p);
+
+ u->exported_log_rate_limit_burst = false;
+ }
}
int unit_prepare_exec(Unit *u) {
@@ -5433,6 +5486,105 @@ int unit_pid_attachable(Unit *u, pid_t pid, sd_bus_error *error) {
return 0;
}
+void unit_log_success(Unit *u) {
+ assert(u);
+
+ log_struct(LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_UNIT_SUCCESS_STR,
+ LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
+ LOG_UNIT_MESSAGE(u, "Succeeded."));
+}
+
+void unit_log_failure(Unit *u, const char *result) {
+ assert(u);
+ assert(result);
+
+ log_struct(LOG_WARNING,
+ "MESSAGE_ID=" SD_MESSAGE_UNIT_FAILURE_RESULT_STR,
+ LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
+ LOG_UNIT_MESSAGE(u, "Failed with result '%s'.", result),
+ "UNIT_RESULT=%s", result);
+}
+
+void unit_log_process_exit(
+ Unit *u,
+ int level,
+ const char *kind,
+ const char *command,
+ int code,
+ int status) {
+
+ assert(u);
+ assert(kind);
+
+ if (code != CLD_EXITED)
+ level = LOG_WARNING;
+
+ log_struct(level,
+ "MESSAGE_ID=" SD_MESSAGE_UNIT_PROCESS_EXIT_STR,
+ LOG_UNIT_MESSAGE(u, "%s exited, code=%s, status=%i/%s",
+ kind,
+ sigchld_code_to_string(code), status,
+ strna(code == CLD_EXITED
+ ? exit_status_to_string(status, EXIT_STATUS_FULL)
+ : signal_to_string(status))),
+ "EXIT_CODE=%s", sigchld_code_to_string(code),
+ "EXIT_STATUS=%i", status,
+ "COMMAND=%s", strna(command),
+ LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u));
+}
+
+int unit_exit_status(Unit *u) {
+ assert(u);
+
+ /* Returns the exit status to propagate for the most recent cycle of this unit. Returns a value in the range
+ * 0…255 if there's something to propagate. EOPNOTSUPP if the concept does not apply to this unit type, ENODATA
+ * if no data is currently known (for example because the unit hasn't deactivated yet) and EBADE if the main
+ * service process has exited abnormally (signal/coredump). */
+
+ if (!UNIT_VTABLE(u)->exit_status)
+ return -EOPNOTSUPP;
+
+ return UNIT_VTABLE(u)->exit_status(u);
+}
+
+int unit_failure_action_exit_status(Unit *u) {
+ int r;
+
+ assert(u);
+
+ /* Returns the exit status to propagate on failure, or an error if there's nothing to propagate */
+
+ if (u->failure_action_exit_status >= 0)
+ return u->failure_action_exit_status;
+
+ r = unit_exit_status(u);
+ if (r == -EBADE) /* Exited, but not cleanly (i.e. by signal or such) */
+ return 255;
+
+ return r;
+}
+
+int unit_success_action_exit_status(Unit *u) {
+ int r;
+
+ assert(u);
+
+ /* Returns the exit status to propagate on success, or an error if there's nothing to propagate */
+
+ if (u->success_action_exit_status >= 0)
+ return u->success_action_exit_status;
+
+ r = unit_exit_status(u);
+ if (r == -EBADE) /* Exited, but not cleanly (i.e. by signal or such) */
+ return 255;
+
+ return r;
+}
+
static const char* const collect_mode_table[_COLLECT_MODE_MAX] = {
[COLLECT_INACTIVE] = "inactive",
[COLLECT_INACTIVE_OR_FAILED] = "inactive-or-failed",
diff --git a/src/core/unit.h b/src/core/unit.h
index b3131eba1b..6fd39eaca3 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -19,7 +19,7 @@ typedef enum KillOperation {
KILL_TERMINATE,
KILL_TERMINATE_AND_LOG,
KILL_KILL,
- KILL_ABORT,
+ KILL_WATCHDOG,
_KILL_OPERATION_MAX,
_KILL_OPERATION_INVALID = -1
} KillOperation;
@@ -105,12 +105,6 @@ struct UnitRef {
LIST_FIELDS(UnitRef, refs_by_target);
};
-typedef enum UnitCGroupBPFState {
- UNIT_CGROUP_BPF_OFF = 0,
- UNIT_CGROUP_BPF_ON = 1,
- UNIT_CGROUP_BPF_INVALIDATED = -1,
-} UnitCGroupBPFState;
-
typedef struct Unit {
Manager *manager;
@@ -188,9 +182,6 @@ typedef struct Unit {
/* Per type list */
LIST_FIELDS(Unit, units_by_type);
- /* All units which have requires_mounts_for set */
- LIST_FIELDS(Unit, has_requires_mounts_for);
-
/* Load queue */
LIST_FIELDS(Unit, load_queue);
@@ -212,6 +203,9 @@ typedef struct Unit {
/* Target dependencies queue */
LIST_FIELDS(Unit, target_deps_queue);
+ /* Queue of units with StopWhenUnneeded set that shell be checked for clean-up. */
+ LIST_FIELDS(Unit, stop_when_unneeded_queue);
+
/* PIDs we keep an eye on. Note that a unit might have many
* more, but these are the ones we care enough about to
* process SIGCHLD for */
@@ -232,8 +226,9 @@ typedef struct Unit {
RateLimit start_limit;
EmergencyAction start_limit_action;
- EmergencyAction failure_action;
- EmergencyAction success_action;
+ /* What to do on failure or success */
+ EmergencyAction success_action, failure_action;
+ int success_action_exit_status, failure_action_exit_status;
char *reboot_arg;
/* Make sure we never enter endless loops with the check unneeded logic, or the BindsTo= logic */
@@ -253,12 +248,15 @@ typedef struct Unit {
/* Counterparts in the cgroup filesystem */
char *cgroup_path;
- CGroupMask cgroup_realized_mask;
- CGroupMask cgroup_enabled_mask;
- CGroupMask cgroup_subtree_mask;
- CGroupMask cgroup_members_mask;
+ CGroupMask cgroup_realized_mask; /* In which hierarchies does this unit's cgroup exist? (only relevant on cgroupsv1) */
+ CGroupMask cgroup_enabled_mask; /* Which controllers are enabled (or more correctly: enabled for the children) for this unit's cgroup? (only relevant on cgroupsv2) */
+ CGroupMask cgroup_invalidated_mask; /* A mask specifiying controllers which shall be considered invalidated, and require re-realization */
+ CGroupMask cgroup_members_mask; /* A cache for the controllers required by all children of this cgroup (only relevant for slice units) */
int cgroup_inotify_wd;
+ /* Device Controller BPF program */
+ BPFProgram *bpf_device_control_installed;
+
/* IP BPF Firewalling/accounting */
int ip_accounting_ingress_map_fd;
int ip_accounting_egress_map_fd;
@@ -315,6 +313,7 @@ typedef struct Unit {
/* Is this a unit that is always running and cannot be stopped? */
bool perpetual;
+ /* Booleans indicating membership of this unit in the various queues */
bool in_load_queue:1;
bool in_dbus_queue:1;
bool in_cleanup_queue:1;
@@ -322,6 +321,7 @@ typedef struct Unit {
bool in_cgroup_realize_queue:1;
bool in_cgroup_empty_queue:1;
bool in_target_deps_queue:1;
+ bool in_stop_when_unneeded_queue:1;
bool sent_dbus_new_signal:1;
@@ -330,9 +330,6 @@ typedef struct Unit {
bool cgroup_realized:1;
bool cgroup_members_mask_valid:1;
- bool cgroup_subtree_mask_valid:1;
-
- UnitCGroupBPFState cgroup_bpf_state:2;
/* Reset cgroup accounting next time we fork something off */
bool reset_accounting:1;
@@ -349,10 +346,12 @@ typedef struct Unit {
bool exported_invocation_id:1;
bool exported_log_level_max:1;
bool exported_log_extra_fields:1;
+ bool exported_log_rate_limit_interval:1;
+ bool exported_log_rate_limit_burst:1;
/* When writing transient unit files, stores which section we stored last. If < 0, we didn't write any yet. If
* == 0 we are in the [Unit] section, if > 0 we are in the unit type-specific section. */
- int last_section_private:2;
+ signed int last_section_private:2;
} Unit;
typedef struct UnitStatusMessageFormats {
@@ -380,7 +379,9 @@ typedef enum UnitWriteFlags {
} UnitWriteFlags;
/* Returns true if neither persistent, nor runtime storage is requested, i.e. this is a check invocation only */
-#define UNIT_WRITE_FLAGS_NOOP(flags) (((flags) & (UNIT_RUNTIME|UNIT_PERSISTENT)) == 0)
+static inline bool UNIT_WRITE_FLAGS_NOOP(UnitWriteFlags flags) {
+ return (flags & (UNIT_RUNTIME|UNIT_PERSISTENT)) == 0;
+}
#include "kill.h"
@@ -432,11 +433,16 @@ typedef struct UnitVTable {
int (*load)(Unit *u);
/* During deserialization we only record the intended state to return to. With coldplug() we actually put the
- * deserialized state in effect. This is where unit_notify() should be called to start things up. */
+ * deserialized state in effect. This is where unit_notify() should be called to start things up. Note that
+ * this callback is invoked *before* we leave the reloading state of the manager, i.e. *before* we consider the
+ * reloading to be complete. Thus, this callback should just restore the exact same state for any unit that was
+ * in effect before the reload, i.e. units should not catch up with changes happened during the reload. That's
+ * what catchup() below is for. */
int (*coldplug)(Unit *u);
- /* This is called shortly after all units' coldplug() call was invoked. It's supposed to catch up state changes
- * we missed so far (for example because they took place while we were reloading/reexecing) */
+ /* This is called shortly after all units' coldplug() call was invoked, and *after* the manager left the
+ * reloading state. It's supposed to catch up with state changes due to external events we missed so far (for
+ * example because they took place while we were reloading/reexecing) */
void (*catchup)(Unit *u);
void (*dump)(Unit *u, FILE *f, const char *prefix);
@@ -529,6 +535,10 @@ typedef struct UnitVTable {
/* Returns true if the unit currently needs access to the console */
bool (*needs_console)(Unit *u);
+ /* Returns the exit status to propagate in case of FailureAction=exit/SuccessAction=exit; usually returns the
+ * exit code of the "main" process of the service or similar. */
+ int (*exit_status)(Unit *u);
+
/* Like the enumerate() callback further down, but only enumerates the perpetual units, i.e. all units that
* unconditionally exist and are always active. The main reason to keep both enumeration functions separate is
* philosophical: the state of perpetual units should be put in place by coldplug(), while the state of those
@@ -568,7 +578,9 @@ typedef struct UnitVTable {
extern const UnitVTable * const unit_vtable[_UNIT_TYPE_MAX];
-#define UNIT_VTABLE(u) unit_vtable[(u)->type]
+static inline const UnitVTable* UNIT_VTABLE(Unit *u) {
+ return unit_vtable[u->type];
+}
/* For casting a unit into the various unit types */
#define DEFINE_CAST(UPPERCASE, MixedCase) \
@@ -580,13 +592,20 @@ extern const UnitVTable * const unit_vtable[_UNIT_TYPE_MAX];
}
/* For casting the various unit types into a unit */
-#define UNIT(u) (&(u)->meta)
+#define UNIT(u) \
+ ({ \
+ typeof(u) _u_ = (u); \
+ Unit *_w_ = _u_ ? &(_u_)->meta : NULL; \
+ _w_; \
+ })
#define UNIT_HAS_EXEC_CONTEXT(u) (UNIT_VTABLE(u)->exec_context_offset > 0)
#define UNIT_HAS_CGROUP_CONTEXT(u) (UNIT_VTABLE(u)->cgroup_context_offset > 0)
#define UNIT_HAS_KILL_CONTEXT(u) (UNIT_VTABLE(u)->kill_context_offset > 0)
-#define UNIT_TRIGGER(u) ((Unit*) hashmap_first_key((u)->dependencies[UNIT_TRIGGERS]))
+static inline Unit* UNIT_TRIGGER(Unit *u) {
+ return hashmap_first_key(u->dependencies[UNIT_TRIGGERS]);
+}
Unit *unit_new(Manager *m, size_t size);
void unit_free(Unit *u);
@@ -598,8 +617,8 @@ int unit_add_name(Unit *u, const char *name);
int unit_add_dependency(Unit *u, UnitDependency d, Unit *other, bool add_reference, UnitDependencyMask mask);
int unit_add_two_dependencies(Unit *u, UnitDependency d, UnitDependency e, Unit *other, bool add_reference, UnitDependencyMask mask);
-int unit_add_dependency_by_name(Unit *u, UnitDependency d, const char *name, const char *filename, bool add_reference, UnitDependencyMask mask);
-int unit_add_two_dependencies_by_name(Unit *u, UnitDependency d, UnitDependency e, const char *name, const char *path, bool add_reference, UnitDependencyMask mask);
+int unit_add_dependency_by_name(Unit *u, UnitDependency d, const char *name, bool add_reference, UnitDependencyMask mask);
+int unit_add_two_dependencies_by_name(Unit *u, UnitDependency d, UnitDependency e, const char *name, bool add_reference, UnitDependencyMask mask);
int unit_add_exec_dependencies(Unit *u, ExecContext *c);
@@ -613,6 +632,7 @@ void unit_add_to_dbus_queue(Unit *u);
void unit_add_to_cleanup_queue(Unit *u);
void unit_add_to_gc_queue(Unit *u);
void unit_add_to_target_deps_queue(Unit *u);
+void unit_submit_to_stop_when_unneeded_queue(Unit *u);
int unit_merge(Unit *u, Unit *other);
int unit_merge_by_name(Unit *u, const char *other);
@@ -628,7 +648,7 @@ int unit_set_default_slice(Unit *u);
const char *unit_description(Unit *u) _pure_;
-bool unit_has_name(Unit *u, const char *name);
+bool unit_has_name(const Unit *u, const char *name);
UnitActiveState unit_active_state(Unit *u);
@@ -679,12 +699,7 @@ bool unit_can_serialize(Unit *u) _pure_;
int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs);
int unit_deserialize(Unit *u, FILE *f, FDSet *fds);
-void unit_deserialize_skip(FILE *f);
-
-int unit_serialize_item(Unit *u, FILE *f, const char *key, const char *value);
-int unit_serialize_item_escaped(Unit *u, FILE *f, const char *key, const char *value);
-int unit_serialize_item_fd(Unit *u, FILE *f, FDSet *fds, const char *key, int fd);
-void unit_serialize_item_format(Unit *u, FILE *f, const char *key, const char *value, ...) _printf_(4,5);
+int unit_deserialize_skip(FILE *f);
int unit_add_node_dependency(Unit *u, const char *what, bool wants, UnitDependency d, UnitDependencyMask mask);
@@ -692,7 +707,6 @@ int unit_coldplug(Unit *u);
void unit_catchup(Unit *u);
void unit_status_printf(Unit *u, const char *status, const char *unit_status_msg_format) _printf_(3, 0);
-void unit_status_emit_starting_stopping_reloading(Unit *u, JobType t);
bool unit_need_daemon_reload(Unit *u);
@@ -749,6 +763,8 @@ bool unit_type_supported(UnitType t);
bool unit_is_pristine(Unit *u);
+bool unit_is_unneeded(Unit *u);
+
pid_t unit_control_pid(Unit *u);
pid_t unit_main_pid(Unit *u);
@@ -777,7 +793,7 @@ int unit_acquire_invocation_id(Unit *u);
bool unit_shall_confirm_spawn(Unit *u);
-void unit_set_exec_params(Unit *s, ExecParameters *p);
+int unit_set_exec_params(Unit *s, ExecParameters *p);
int unit_fork_helper_process(Unit *u, const char *name, pid_t *ret);
@@ -796,6 +812,21 @@ const char *unit_label_path(Unit *u);
int unit_pid_attachable(Unit *unit, pid_t pid, sd_bus_error *error);
+void unit_log_success(Unit *u);
+void unit_log_failure(Unit *u, const char *result);
+static inline void unit_log_result(Unit *u, bool success, const char *result) {
+ if (success)
+ unit_log_success(u);
+ else
+ unit_log_failure(u, result);
+}
+
+void unit_log_process_exit(Unit *u, int level, const char *kind, const char *command, int code, int status);
+
+int unit_exit_status(Unit *u);
+int unit_success_action_exit_status(Unit *u);
+int unit_failure_action_exit_status(Unit *u);
+
/* Macros which append UNIT= or USER_UNIT= to the message */
#define log_unit_full(unit, level, error, ...) \