diff options
Diffstat (limited to 'src/core')
81 files changed, 6310 insertions, 3678 deletions
diff --git a/src/core/automount.c b/src/core/automount.c index 1b96a52c00..de8010bf2e 100644 --- a/src/core/automount.c +++ b/src/core/automount.c @@ -16,6 +16,7 @@ #include "bus-error.h" #include "bus-util.h" #include "dbus-automount.h" +#include "dbus-unit.h" #include "fd-util.h" #include "format-util.h" #include "io-util.h" @@ -23,9 +24,11 @@ #include "mkdir.h" #include "mount-util.h" #include "mount.h" +#include "mountpoint-util.h" #include "parse-util.h" #include "path-util.h" #include "process-util.h" +#include "serialize.h" #include "special.h" #include "stdio-util.h" #include "string-table.h" @@ -85,7 +88,7 @@ static void unmount_autofs(Automount *a) { a->pipe_fd = safe_close(a->pipe_fd); /* If we reload/reexecute things we keep the mount point around */ - if (!IN_SET(UNIT(a)->manager->exit_code, MANAGER_RELOAD, MANAGER_REEXECUTE)) { + if (!IN_SET(UNIT(a)->manager->objective, MANAGER_RELOAD, MANAGER_REEXECUTE)) { automount_send_ready(a, a->tokens, -EHOSTDOWN); automount_send_ready(a, a->expire_tokens, -EHOSTDOWN); @@ -149,7 +152,7 @@ static int automount_add_default_dependencies(Automount *a) { if (!MANAGER_IS_SYSTEM(UNIT(a)->manager)) return 0; - r = unit_add_two_dependencies_by_name(UNIT(a), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_UMOUNT_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT); + r = unit_add_two_dependencies_by_name(UNIT(a), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_UMOUNT_TARGET, true, UNIT_DEPENDENCY_DEFAULT); if (r < 0) return r; @@ -235,6 +238,9 @@ static void automount_set_state(Automount *a, AutomountState state) { AutomountState old_state; assert(a); + if (a->state != state) + bus_unit_send_pending_change_signal(UNIT(a), false); + old_state = a->state; a->state = state; @@ -314,9 +320,7 @@ static void automount_enter_dead(Automount *a, AutomountResult f) { if (a->result == AUTOMOUNT_SUCCESS) a->result = f; - if (a->result != AUTOMOUNT_SUCCESS) - log_unit_warning(UNIT(a), "Failed with result '%s'.", automount_result_to_string(a->result)); - + unit_log_result(UNIT(a), a->result == AUTOMOUNT_SUCCESS, automount_result_to_string(a->result)); automount_set_state(a, a->result != AUTOMOUNT_SUCCESS ? AUTOMOUNT_FAILED : AUTOMOUNT_DEAD); } @@ -841,16 +845,16 @@ static int automount_serialize(Unit *u, FILE *f, FDSet *fds) { assert(f); assert(fds); - unit_serialize_item(u, f, "state", automount_state_to_string(a->state)); - unit_serialize_item(u, f, "result", automount_result_to_string(a->result)); - unit_serialize_item_format(u, f, "dev-id", "%u", (unsigned) a->dev_id); + (void) serialize_item(f, "state", automount_state_to_string(a->state)); + (void) serialize_item(f, "result", automount_result_to_string(a->result)); + (void) serialize_item_format(f, "dev-id", "%lu", (unsigned long) a->dev_id); SET_FOREACH(p, a->tokens, i) - unit_serialize_item_format(u, f, "token", "%u", PTR_TO_UINT(p)); + (void) serialize_item_format(f, "token", "%u", PTR_TO_UINT(p)); SET_FOREACH(p, a->expire_tokens, i) - unit_serialize_item_format(u, f, "expire-token", "%u", PTR_TO_UINT(p)); + (void) serialize_item_format(f, "expire-token", "%u", PTR_TO_UINT(p)); - r = unit_serialize_item_fd(u, f, fds, "pipe-fd", a->pipe_fd); + r = serialize_fd(f, fds, "pipe-fd", a->pipe_fd); if (r < 0) return r; @@ -882,12 +886,13 @@ static int automount_deserialize_item(Unit *u, const char *key, const char *valu a->result = f; } else if (streq(key, "dev-id")) { - unsigned d; + unsigned long d; - if (safe_atou(value, &d) < 0) + if (safe_atolu(value, &d) < 0) log_unit_debug(u, "Failed to parse dev-id value: %s", value); else - a->dev_id = (unsigned) d; + a->dev_id = (dev_t) d; + } else if (streq(key, "token")) { unsigned token; diff --git a/src/core/bpf-devices.c b/src/core/bpf-devices.c new file mode 100644 index 0000000000..dade7f0490 --- /dev/null +++ b/src/core/bpf-devices.c @@ -0,0 +1,271 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#include <linux/libbpf.h> + +#include "bpf-devices.h" +#include "bpf-program.h" + +#define PASS_JUMP_OFF 4096 + +static int bpf_access_type(const char *acc) { + int r = 0; + + assert(acc); + + for (; *acc; acc++) + switch(*acc) { + case 'r': + r |= BPF_DEVCG_ACC_READ; + break; + case 'w': + r |= BPF_DEVCG_ACC_WRITE; + break; + case 'm': + r |= BPF_DEVCG_ACC_MKNOD; + break; + default: + return -EINVAL; + } + + return r; +} + +int cgroup_bpf_whitelist_device(BPFProgram *prog, int type, int major, int minor, const char *acc) { + struct bpf_insn insn[] = { + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, type, 6), /* compare device type */ + BPF_MOV32_REG(BPF_REG_1, BPF_REG_3), /* calculate access type */ + BPF_ALU32_IMM(BPF_AND, BPF_REG_1, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 3), /* compare access type */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 2), /* compare major */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_5, minor, 1), /* compare minor */ + BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */ + }; + int r, access; + + assert(prog); + assert(acc); + + access = bpf_access_type(acc); + if (access <= 0) + return -EINVAL; + + insn[2].imm = access; + + r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn)); + if (r < 0) + log_error_errno(r, "Extending device control BPF program failed: %m"); + + return r; +} + +int cgroup_bpf_whitelist_major(BPFProgram *prog, int type, int major, const char *acc) { + struct bpf_insn insn[] = { + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, type, 5), /* compare device type */ + BPF_MOV32_REG(BPF_REG_1, BPF_REG_3), /* calculate access type */ + BPF_ALU32_IMM(BPF_AND, BPF_REG_1, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 2), /* compare access type */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 1), /* compare major */ + BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */ + }; + int r, access; + + assert(prog); + assert(acc); + + access = bpf_access_type(acc); + if (access <= 0) + return -EINVAL; + + insn[2].imm = access; + + r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn)); + if (r < 0) + log_error_errno(r, "Extending device control BPF program failed: %m"); + + return r; +} + +int cgroup_bpf_whitelist_class(BPFProgram *prog, int type, const char *acc) { + struct bpf_insn insn[] = { + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, type, 5), /* compare device type */ + BPF_MOV32_REG(BPF_REG_1, BPF_REG_3), /* calculate access type */ + BPF_ALU32_IMM(BPF_AND, BPF_REG_1, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 1), /* compare access type */ + BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */ + }; + int r, access; + + assert(prog); + assert(acc); + + access = bpf_access_type(acc); + if (access <= 0) + return -EINVAL; + + insn[2].imm = access; + + r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn)); + if (r < 0) + log_error_errno(r, "Extending device control BPF program failed: %m"); + + return r; +} + +int cgroup_init_device_bpf(BPFProgram **ret, CGroupDevicePolicy policy, bool whitelist) { + struct bpf_insn pre_insn[] = { + /* load device type to r2 */ + BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1, + offsetof(struct bpf_cgroup_dev_ctx, access_type)), + + /* load access type to r3 */ + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct bpf_cgroup_dev_ctx, access_type)), + BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16), + + /* load major number to r4 */ + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct bpf_cgroup_dev_ctx, major)), + + /* load minor number to r5 */ + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct bpf_cgroup_dev_ctx, minor)), + }; + + _cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL; + int r; + + assert(ret); + + if (policy == CGROUP_AUTO && !whitelist) + return 0; + + r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, &prog); + if (r < 0) + return log_error_errno(r, "Loading device control BPF program failed: %m"); + + if (policy == CGROUP_CLOSED || whitelist) { + r = bpf_program_add_instructions(prog, pre_insn, ELEMENTSOF(pre_insn)); + if (r < 0) + return log_error_errno(r, "Extending device control BPF program failed: %m"); + } + + *ret = TAKE_PTR(prog); + + return 0; +} + +int cgroup_apply_device_bpf(Unit *u, BPFProgram *prog, CGroupDevicePolicy policy, bool whitelist) { + struct bpf_insn post_insn[] = { + /* return DENY */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + }; + + struct bpf_insn exit_insn[] = { + /* else return ALLOW */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN() + }; + + _cleanup_free_ char *path = NULL; + int r; + + if (!prog) { + /* Remove existing program. */ + u->bpf_device_control_installed = bpf_program_unref(u->bpf_device_control_installed); + return 0; + } + + if (policy != CGROUP_STRICT || whitelist) { + size_t off; + + r = bpf_program_add_instructions(prog, post_insn, ELEMENTSOF(post_insn)); + if (r < 0) + return log_error_errno(r, "Extending device control BPF program failed: %m"); + + /* Fixup PASS_JUMP_OFF jump offsets. */ + for (off = 0; off < prog->n_instructions; off++) { + struct bpf_insn *ins = &prog->instructions[off]; + + if (ins->code == (BPF_JMP | BPF_JA) && ins->off == PASS_JUMP_OFF) + ins->off = prog->n_instructions - off - 1; + } + } else + /* Explicitly forbid everything. */ + exit_insn[0].imm = 0; + + r = bpf_program_add_instructions(prog, exit_insn, ELEMENTSOF(exit_insn)); + if (r < 0) + return log_error_errno(r, "Extending device control BPF program failed: %m"); + + r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path); + if (r < 0) + return log_error_errno(r, "Failed to determine cgroup path: %m"); + + + r = bpf_program_cgroup_attach(prog, BPF_CGROUP_DEVICE, path, BPF_F_ALLOW_MULTI); + if (r < 0) + return log_error_errno(r, "Attaching device control BPF program to cgroup %s failed: %m", path); + + /* Unref the old BPF program (which will implicitly detach it) right before attaching the new program. */ + u->bpf_device_control_installed = bpf_program_unref(u->bpf_device_control_installed); + + /* Remember that this BPF program is installed now. */ + u->bpf_device_control_installed = bpf_program_ref(prog); + + return 0; +} + +int bpf_devices_supported(void) { + struct bpf_insn trivial[] = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN() + }; + + _cleanup_(bpf_program_unrefp) BPFProgram *program = NULL; + static int supported = -1; + int r; + + /* Checks whether BPF device controller is supported. For this, we check five things: + * + * a) whether we are privileged + * b) whether the unified hierarchy is being used + * c) the BPF implementation in the kernel supports BPF_PROG_TYPE_CGROUP_DEVICE programs, which we require + */ + + if (supported >= 0) + return supported; + + if (geteuid() != 0) { + log_debug("Not enough privileges, BPF device control is not supported."); + return supported = 0; + } + + r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER); + if (r < 0) + return log_error_errno(r, "Can't determine whether the unified hierarchy is used: %m"); + if (r == 0) { + log_debug("Not running with unified cgroups, BPF device control is not supported."); + return supported = 0; + } + + r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, &program); + if (r < 0) { + log_debug_errno(r, "Can't allocate CGROUP DEVICE BPF program, BPF device control is not supported: %m"); + return supported = 0; + } + + r = bpf_program_add_instructions(program, trivial, ELEMENTSOF(trivial)); + if (r < 0) { + log_debug_errno(r, "Can't add trivial instructions to CGROUP DEVICE BPF program, BPF device control is not supported: %m"); + return supported = 0; + } + + r = bpf_program_load_kernel(program, NULL, 0); + if (r < 0) { + log_debug_errno(r, "Can't load kernel CGROUP DEVICE BPF program, BPF device control is not supported: %m"); + return supported = 0; + } + + return supported = 1; +} diff --git a/src/core/bpf-devices.h b/src/core/bpf-devices.h new file mode 100644 index 0000000000..8d3de3bd94 --- /dev/null +++ b/src/core/bpf-devices.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <inttypes.h> + +#include "unit.h" + +struct BPFProgram; + +int bpf_devices_supported(void); + +int cgroup_bpf_whitelist_device(BPFProgram *p, int type, int major, int minor, const char *acc); +int cgroup_bpf_whitelist_major(BPFProgram *p, int type, int major, const char *acc); +int cgroup_bpf_whitelist_class(BPFProgram *prog, int type, const char *acc); + +int cgroup_init_device_bpf(BPFProgram **ret, CGroupDevicePolicy policy, bool whitelist); +int cgroup_apply_device_bpf(Unit *u, BPFProgram *p, CGroupDevicePolicy policy, bool whitelist); diff --git a/src/core/bpf-firewall.c b/src/core/bpf-firewall.c index 8b66ef73dc..b9a611fd9e 100644 --- a/src/core/bpf-firewall.c +++ b/src/core/bpf-firewall.c @@ -20,6 +20,7 @@ #include "bpf-program.h" #include "fd-util.h" #include "ip-address-access.h" +#include "missing_syscall.h" #include "unit.h" enum { @@ -483,7 +484,7 @@ int bpf_firewall_compile(Unit *u) { if (supported < 0) return supported; if (supported == BPF_FIREWALL_UNSUPPORTED) { - log_debug("BPF firewalling not supported on this manager, proceeding without."); + log_unit_debug(u, "BPF firewalling not supported on this manager, proceeding without."); return -EOPNOTSUPP; } if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI && u->type == UNIT_SLICE) { @@ -492,7 +493,7 @@ int bpf_firewall_compile(Unit *u) { * that BPF is more interesting on leaf nodes we hence avoid it on inner nodes in that case. This is * consistent with old systemd behaviour from before v238, where BPF wasn't supported in inner nodes at * all, either. */ - log_debug("BPF_F_ALLOW_MULTI is not supported on this manager, not doing BPF firewall on slice units."); + log_unit_debug(u, "BPF_F_ALLOW_MULTI is not supported on this manager, not doing BPF firewall on slice units."); return -EOPNOTSUPP; } @@ -518,24 +519,24 @@ int bpf_firewall_compile(Unit *u) { r = bpf_firewall_prepare_access_maps(u, ACCESS_ALLOWED, &u->ipv4_allow_map_fd, &u->ipv6_allow_map_fd); if (r < 0) - return log_error_errno(r, "Preparation of eBPF allow maps failed: %m"); + return log_unit_error_errno(u, r, "Preparation of eBPF allow maps failed: %m"); r = bpf_firewall_prepare_access_maps(u, ACCESS_DENIED, &u->ipv4_deny_map_fd, &u->ipv6_deny_map_fd); if (r < 0) - return log_error_errno(r, "Preparation of eBPF deny maps failed: %m"); + return log_unit_error_errno(u, r, "Preparation of eBPF deny maps failed: %m"); } r = bpf_firewall_prepare_accounting_maps(u, cc->ip_accounting, &u->ip_accounting_ingress_map_fd, &u->ip_accounting_egress_map_fd); if (r < 0) - return log_error_errno(r, "Preparation of eBPF accounting maps failed: %m"); + return log_unit_error_errno(u, r, "Preparation of eBPF accounting maps failed: %m"); r = bpf_firewall_compile_bpf(u, true, &u->ip_bpf_ingress); if (r < 0) - return log_error_errno(r, "Compilation for ingress BPF program failed: %m"); + return log_unit_error_errno(u, r, "Compilation for ingress BPF program failed: %m"); r = bpf_firewall_compile_bpf(u, false, &u->ip_bpf_egress); if (r < 0) - return log_error_errno(r, "Compilation for egress BPF program failed: %m"); + return log_unit_error_errno(u, r, "Compilation for egress BPF program failed: %m"); return 0; } @@ -560,17 +561,17 @@ int bpf_firewall_install(Unit *u) { if (supported < 0) return supported; if (supported == BPF_FIREWALL_UNSUPPORTED) { - log_debug("BPF firewalling not supported on this manager, proceeding without."); + log_unit_debug(u, "BPF firewalling not supported on this manager, proceeding without."); return -EOPNOTSUPP; } if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI && u->type == UNIT_SLICE) { - log_debug("BPF_F_ALLOW_MULTI is not supported on this manager, not doing BPF firewall on slice units."); + log_unit_debug(u, "BPF_F_ALLOW_MULTI is not supported on this manager, not doing BPF firewall on slice units."); return -EOPNOTSUPP; } r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path); if (r < 0) - return log_error_errno(r, "Failed to determine cgroup path: %m"); + return log_unit_error_errno(u, r, "Failed to determine cgroup path: %m"); flags = (supported == BPF_FIREWALL_SUPPORTED_WITH_MULTI && (u->type == UNIT_SLICE || unit_cgroup_delegate(u))) ? BPF_F_ALLOW_MULTI : 0; @@ -583,7 +584,7 @@ int bpf_firewall_install(Unit *u) { if (u->ip_bpf_egress) { r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, flags); if (r < 0) - return log_error_errno(r, "Attaching egress BPF program to cgroup %s failed: %m", path); + return log_unit_error_errno(u, r, "Attaching egress BPF program to cgroup %s failed: %m", path); /* Remember that this BPF program is installed now. */ u->ip_bpf_egress_installed = bpf_program_ref(u->ip_bpf_egress); @@ -592,7 +593,7 @@ int bpf_firewall_install(Unit *u) { if (u->ip_bpf_ingress) { r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, flags); if (r < 0) - return log_error_errno(r, "Attaching ingress BPF program to cgroup %s failed: %m", path); + return log_unit_error_errno(u, r, "Attaching ingress BPF program to cgroup %s failed: %m", path); u->ip_bpf_ingress_installed = bpf_program_ref(u->ip_bpf_ingress); } @@ -660,8 +661,7 @@ int bpf_firewall_supported(void) { * b) whether the unified hierarchy is being used * c) the BPF implementation in the kernel supports BPF LPM TRIE maps, which we require * d) the BPF implementation in the kernel supports BPF_PROG_TYPE_CGROUP_SKB programs, which we require - * e) the BPF implementation in the kernel supports the BPF_PROG_ATTACH call, which we require - * + * e) the BPF implementation in the kernel supports the BPF_PROG_DETACH call, which we require */ if (supported >= 0) @@ -714,7 +714,7 @@ int bpf_firewall_supported(void) { * is turned off at kernel compilation time. This sucks of course: why does it allow us to create a cgroup BPF * program if we can't do a thing with it later? * - * We detect this case by issuing the BPF_PROG_ATTACH bpf() call with invalid file descriptors: if + * We detect this case by issuing the BPF_PROG_DETACH bpf() call with invalid file descriptors: if * CONFIG_CGROUP_BPF is turned off, then the call will fail early with EINVAL. If it is turned on the * parameters are validated however, and that'll fail with EBADF then. */ @@ -724,22 +724,22 @@ int bpf_firewall_supported(void) { .attach_bpf_fd = -1, }; - if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0) { + if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0) { if (errno != EBADF) { - log_debug_errno(errno, "Didn't get EBADF from BPF_PROG_ATTACH, BPF firewalling is not supported: %m"); + log_debug_errno(errno, "Didn't get EBADF from BPF_PROG_DETACH, BPF firewalling is not supported: %m"); return supported = BPF_FIREWALL_UNSUPPORTED; } /* YAY! */ } else { - log_debug("Wut? Kernel accepted our invalid BPF_PROG_ATTACH call? Something is weird, assuming BPF firewalling is broken and hence not supported."); + log_debug("Wut? Kernel accepted our invalid BPF_PROG_DETACH call? Something is weird, assuming BPF firewalling is broken and hence not supported."); return supported = BPF_FIREWALL_UNSUPPORTED; } /* So now we know that the BPF program is generally available, let's see if BPF_F_ALLOW_MULTI is also supported - * (which was added in kernel 4.15). We use a similar logic as before, but this time we use - * BPF_F_ALLOW_MULTI. Since the flags are checked early in the system call we'll get EINVAL if it's not - * supported, and EBADF as before if it is available. */ + * (which was added in kernel 4.15). We use a similar logic as before, but this time we use the BPF_PROG_ATTACH + * bpf() call and the BPF_F_ALLOW_MULTI flags value. Since the flags are checked early in the system call we'll + * get EINVAL if it's not supported, and EBADF as before if it is available. */ attr = (union bpf_attr) { .attach_type = BPF_CGROUP_INET_EGRESS, diff --git a/src/core/bpf-firewall.h b/src/core/bpf-firewall.h index e2d08a0fc8..7d38483dbd 100644 --- a/src/core/bpf-firewall.h +++ b/src/core/bpf-firewall.h @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: LGPL-2.1+ */ #pragma once - #include <inttypes.h> #include "unit.h" diff --git a/src/core/cgroup.c b/src/core/cgroup.c index bb02436203..a7ce3fceaa 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -7,6 +7,7 @@ #include "blockdev-util.h" #include "bpf-firewall.h" #include "btrfs-util.h" +#include "bpf-devices.h" #include "bus-error.h" #include "cgroup-util.h" #include "cgroup.h" @@ -18,6 +19,7 @@ #include "process-util.h" #include "procfs-util.h" #include "special.h" +#include "stat-util.h" #include "stdio-util.h" #include "string-table.h" #include "string-util.h" @@ -25,7 +27,12 @@ #define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC) -bool manager_owns_root_cgroup(Manager *m) { +/* Returns the log level to use when cgroup attribute writes fail. When an attribute is missing or we have access + * problems we downgrade to LOG_DEBUG. This is supposed to be nice to container managers and kernels which want to mask + * out specific attributes from us. */ +#define LOG_LEVEL_CGROUP_WRITE(r) (IN_SET(abs(r), ENOENT, EROFS, EACCES, EPERM) ? LOG_DEBUG : LOG_WARNING) + +bool manager_owns_host_root_cgroup(Manager *m) { assert(m); /* Returns true if we are managing the root cgroup. Note that it isn't sufficient to just check whether the @@ -33,24 +40,38 @@ bool manager_owns_root_cgroup(Manager *m) { * appears to be no nice way to detect whether we are in a CLONE_NEWCGROUP namespace we instead just check if * we run in any kind of container virtualization. */ + if (MANAGER_IS_USER(m)) + return false; + if (detect_container() > 0) return false; return empty_or_root(m->cgroup_root); } -bool unit_has_root_cgroup(Unit *u) { +bool unit_has_host_root_cgroup(Unit *u) { assert(u); /* Returns whether this unit manages the root cgroup. This will return true if this unit is the root slice and * the manager manages the root cgroup. */ - if (!manager_owns_root_cgroup(u->manager)) + if (!manager_owns_host_root_cgroup(u->manager)) return false; return unit_has_name(u, SPECIAL_ROOT_SLICE); } +static int set_attribute_and_warn(Unit *u, const char *controller, const char *attribute, const char *value) { + int r; + + r = cg_set_attribute(controller, u->cgroup_path, attribute, value); + if (r < 0) + log_unit_full(u, LOG_LEVEL_CGROUP_WRITE(r), r, "Failed to set '%s' attribute on '%s' to '%.*s': %m", + strna(attribute), isempty(u->cgroup_path) ? "/" : u->cgroup_path, (int) strcspn(value, NEWLINE), value); + + return r; +} + static void cgroup_compat_warn(void) { static bool cgroup_compat_warned = false; @@ -71,29 +92,30 @@ static void cgroup_compat_warn(void) { void cgroup_context_init(CGroupContext *c) { assert(c); - /* Initialize everything to the kernel defaults, assuming the - * structure is preinitialized to 0 */ + /* Initialize everything to the kernel defaults. */ - c->cpu_weight = CGROUP_WEIGHT_INVALID; - c->startup_cpu_weight = CGROUP_WEIGHT_INVALID; - c->cpu_quota_per_sec_usec = USEC_INFINITY; + *c = (CGroupContext) { + .cpu_weight = CGROUP_WEIGHT_INVALID, + .startup_cpu_weight = CGROUP_WEIGHT_INVALID, + .cpu_quota_per_sec_usec = USEC_INFINITY, - c->cpu_shares = CGROUP_CPU_SHARES_INVALID; - c->startup_cpu_shares = CGROUP_CPU_SHARES_INVALID; + .cpu_shares = CGROUP_CPU_SHARES_INVALID, + .startup_cpu_shares = CGROUP_CPU_SHARES_INVALID, - c->memory_high = CGROUP_LIMIT_MAX; - c->memory_max = CGROUP_LIMIT_MAX; - c->memory_swap_max = CGROUP_LIMIT_MAX; + .memory_high = CGROUP_LIMIT_MAX, + .memory_max = CGROUP_LIMIT_MAX, + .memory_swap_max = CGROUP_LIMIT_MAX, - c->memory_limit = CGROUP_LIMIT_MAX; + .memory_limit = CGROUP_LIMIT_MAX, - c->io_weight = CGROUP_WEIGHT_INVALID; - c->startup_io_weight = CGROUP_WEIGHT_INVALID; + .io_weight = CGROUP_WEIGHT_INVALID, + .startup_io_weight = CGROUP_WEIGHT_INVALID, - c->blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID; - c->startup_blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID; + .blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID, + .startup_blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID, - c->tasks_max = (uint64_t) -1; + .tasks_max = CGROUP_LIMIT_MAX, + }; } void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) { @@ -114,6 +136,15 @@ void cgroup_context_free_io_device_weight(CGroupContext *c, CGroupIODeviceWeight free(w); } +void cgroup_context_free_io_device_latency(CGroupContext *c, CGroupIODeviceLatency *l) { + assert(c); + assert(l); + + LIST_REMOVE(device_latencies, c->io_device_latencies, l); + free(l->path); + free(l); +} + void cgroup_context_free_io_device_limit(CGroupContext *c, CGroupIODeviceLimit *l) { assert(c); assert(l); @@ -147,6 +178,9 @@ void cgroup_context_done(CGroupContext *c) { while (c->io_device_weights) cgroup_context_free_io_device_weight(c, c->io_device_weights); + while (c->io_device_latencies) + cgroup_context_free_io_device_latency(c, c->io_device_latencies); + while (c->io_device_limits) cgroup_context_free_io_device_limit(c, c->io_device_limits); @@ -166,6 +200,7 @@ void cgroup_context_done(CGroupContext *c) { void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { CGroupIODeviceLimit *il; CGroupIODeviceWeight *iw; + CGroupIODeviceLatency *l; CGroupBlockIODeviceBandwidth *b; CGroupBlockIODeviceWeight *w; CGroupDeviceAllow *a; @@ -193,6 +228,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { "%sStartupIOWeight=%" PRIu64 "\n" "%sBlockIOWeight=%" PRIu64 "\n" "%sStartupBlockIOWeight=%" PRIu64 "\n" + "%sMemoryMin=%" PRIu64 "\n" "%sMemoryLow=%" PRIu64 "\n" "%sMemoryHigh=%" PRIu64 "\n" "%sMemoryMax=%" PRIu64 "\n" @@ -216,6 +252,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { prefix, c->startup_io_weight, prefix, c->blockio_weight, prefix, c->startup_blockio_weight, + prefix, c->memory_min, prefix, c->memory_low, prefix, c->memory_high, prefix, c->memory_max, @@ -244,11 +281,18 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { LIST_FOREACH(device_weights, iw, c->io_device_weights) fprintf(f, - "%sIODeviceWeight=%s %" PRIu64, + "%sIODeviceWeight=%s %" PRIu64 "\n", prefix, iw->path, iw->weight); + LIST_FOREACH(device_latencies, l, c->io_device_latencies) + fprintf(f, + "%sIODeviceLatencyTargetSec=%s %s\n", + prefix, + l->path, + format_timespan(u, sizeof(u), l->target_usec, 1)); + LIST_FOREACH(device_limits, il, c->io_device_limits) { char buf[FORMAT_BYTES_MAX]; CGroupIOLimitType type; @@ -302,17 +346,73 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { } } +int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode) { + _cleanup_free_ CGroupDeviceAllow *a = NULL; + _cleanup_free_ char *d = NULL; + + assert(c); + assert(dev); + assert(isempty(mode) || in_charset(mode, "rwm")); + + a = new(CGroupDeviceAllow, 1); + if (!a) + return -ENOMEM; + + d = strdup(dev); + if (!d) + return -ENOMEM; + + *a = (CGroupDeviceAllow) { + .path = TAKE_PTR(d), + .r = isempty(mode) || strchr(mode, 'r'), + .w = isempty(mode) || strchr(mode, 'w'), + .m = isempty(mode) || strchr(mode, 'm'), + }; + + LIST_PREPEND(device_allow, c->device_allow, a); + TAKE_PTR(a); + + return 0; +} + +static void cgroup_xattr_apply(Unit *u) { + char ids[SD_ID128_STRING_MAX]; + int r; + + assert(u); + + if (!MANAGER_IS_SYSTEM(u->manager)) + return; + + if (sd_id128_is_null(u->invocation_id)) + return; + + r = cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, + "trusted.invocation_id", + sd_id128_to_string(u->invocation_id, ids), 32, + 0); + if (r < 0) + log_unit_debug_errno(u, r, "Failed to set invocation ID on control group %s, ignoring: %m", u->cgroup_path); +} + static int lookup_block_device(const char *p, dev_t *ret) { - struct stat st; + struct stat st = {}; int r; assert(p); assert(ret); - if (stat(p, &st) < 0) - return log_warning_errno(errno, "Couldn't stat device '%s': %m", p); - - if (S_ISBLK(st.st_mode)) + r = device_path_parse_major_minor(p, &st.st_mode, &st.st_rdev); + if (r == -ENODEV) { /* not a parsable device node, need to go to disk */ + if (stat(p, &st) < 0) + return log_warning_errno(errno, "Couldn't stat device '%s': %m", p); + } else if (r < 0) + return log_warning_errno(r, "Failed to parse major/minor from path '%s': %m", p); + + if (S_ISCHR(st.st_mode)) { + log_warning("Device node '%s' is a character device, but block device needed.", p); + return -ENOTBLK; + } else if (S_ISBLK(st.st_mode)) *ret = st.st_rdev; else if (major(st.st_dev) != 0) *ret = st.st_dev; /* If this is not a device node then use the block device this file is stored on */ @@ -335,67 +435,123 @@ static int lookup_block_device(const char *p, dev_t *ret) { return 0; } -static int whitelist_device(const char *path, const char *node, const char *acc) { - char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4]; - struct stat st; - bool ignore_notfound; +static int whitelist_device(BPFProgram *prog, const char *path, const char *node, const char *acc) { + struct stat st = {}; int r; assert(path); assert(acc); - if (node[0] == '-') { - /* Non-existent paths starting with "-" must be silently ignored */ - node++; - ignore_notfound = true; - } else - ignore_notfound = false; + /* Some special handling for /dev/block/%u:%u, /dev/char/%u:%u, /run/systemd/inaccessible/chr and + * /run/systemd/inaccessible/blk paths. Instead of stat()ing these we parse out the major/minor directly. This + * means clients can use these path without the device node actually around */ + r = device_path_parse_major_minor(node, &st.st_mode, &st.st_rdev); + if (r < 0) { + if (r != -ENODEV) + return log_warning_errno(r, "Couldn't parse major/minor from device path '%s': %m", node); - if (stat(node, &st) < 0) { - if (errno == ENOENT && ignore_notfound) - return 0; + if (stat(node, &st) < 0) + return log_warning_errno(errno, "Couldn't stat device %s: %m", node); - return log_warning_errno(errno, "Couldn't stat device %s: %m", node); + if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) { + log_warning("%s is not a device.", node); + return -ENODEV; + } } - if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) { - log_warning("%s is not a device.", node); - return -ENODEV; - } + if (cg_all_unified() > 0) { + if (!prog) + return 0; - sprintf(buf, - "%c %u:%u %s", - S_ISCHR(st.st_mode) ? 'c' : 'b', - major(st.st_rdev), minor(st.st_rdev), - acc); + return cgroup_bpf_whitelist_device(prog, S_ISCHR(st.st_mode) ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK, + major(st.st_rdev), minor(st.st_rdev), acc); - r = cg_set_attribute("devices", path, "devices.allow", buf); - if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set devices.allow on %s: %m", path); + } else { + char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4]; - return r; + sprintf(buf, + "%c %u:%u %s", + S_ISCHR(st.st_mode) ? 'c' : 'b', + major(st.st_rdev), minor(st.st_rdev), + acc); + + /* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore EINVAL here. */ + + r = cg_set_attribute("devices", path, "devices.allow", buf); + if (r < 0) + return log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING, + r, "Failed to set devices.allow on %s: %m", path); + + return 0; + } } -static int whitelist_major(const char *path, const char *name, char type, const char *acc) { +static int whitelist_major(BPFProgram *prog, const char *path, const char *name, char type, const char *acc) { _cleanup_fclose_ FILE *f = NULL; - char line[LINE_MAX]; + char buf[2+DECIMAL_STR_MAX(unsigned)+3+4]; bool good = false; + unsigned maj; int r; assert(path); assert(acc); assert(IN_SET(type, 'b', 'c')); + if (streq(name, "*")) { + /* If the name is a wildcard, then apply this list to all devices of this type */ + + if (cg_all_unified() > 0) { + if (!prog) + return 0; + + (void) cgroup_bpf_whitelist_class(prog, type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK, acc); + } else { + xsprintf(buf, "%c *:* %s", type, acc); + + r = cg_set_attribute("devices", path, "devices.allow", buf); + if (r < 0) + log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set devices.allow on %s: %m", path); + return 0; + } + } + + if (safe_atou(name, &maj) >= 0 && DEVICE_MAJOR_VALID(maj)) { + /* The name is numeric and suitable as major. In that case, let's take is major, and create the entry + * directly */ + + if (cg_all_unified() > 0) { + if (!prog) + return 0; + + (void) cgroup_bpf_whitelist_major(prog, + type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK, + maj, acc); + } else { + xsprintf(buf, "%c %u:* %s", type, maj, acc); + + r = cg_set_attribute("devices", path, "devices.allow", buf); + if (r < 0) + log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set devices.allow on %s: %m", path); + } + + return 0; + } + f = fopen("/proc/devices", "re"); if (!f) return log_warning_errno(errno, "Cannot open /proc/devices to resolve %s (%c): %m", name, type); - FOREACH_LINE(line, f, goto fail) { - char buf[2+DECIMAL_STR_MAX(unsigned)+3+4], *p, *w; - unsigned maj; + for (;;) { + _cleanup_free_ char *line = NULL; + char *w, *p; - truncate_nl(line); + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return log_warning_errno(r, "Failed to read /proc/devices: %m"); + if (r == 0) + break; if (type == 'c' && streq(line, "Character devices:")) { good = true; @@ -434,22 +590,31 @@ static int whitelist_major(const char *path, const char *name, char type, const if (fnmatch(name, w, 0) != 0) continue; - sprintf(buf, - "%c %u:* %s", - type, - maj, - acc); + if (cg_all_unified() > 0) { + if (!prog) + continue; - r = cg_set_attribute("devices", path, "devices.allow", buf); - if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set devices.allow on %s: %m", path); + (void) cgroup_bpf_whitelist_major(prog, + type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK, + maj, acc); + } else { + sprintf(buf, + "%c %u:* %s", + type, + maj, + acc); + + /* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore EINVAL + * here. */ + + r = cg_set_attribute("devices", path, "devices.allow", buf); + if (r < 0) + log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING, + r, "Failed to set devices.allow on %s: %m", path); + } } return 0; - -fail: - return log_warning_errno(errno, "Failed to read /proc/devices: %m"); } static bool cgroup_context_has_cpu_weight(CGroupContext *c) { @@ -482,53 +647,42 @@ static uint64_t cgroup_context_cpu_shares(CGroupContext *c, ManagerState state) return CGROUP_CPU_SHARES_DEFAULT; } -static void cgroup_apply_unified_cpu_config(Unit *u, uint64_t weight, uint64_t quota) { - char buf[MAX(DECIMAL_STR_MAX(uint64_t) + 1, (DECIMAL_STR_MAX(usec_t) + 1) * 2)]; - int r; +static void cgroup_apply_unified_cpu_weight(Unit *u, uint64_t weight) { + char buf[DECIMAL_STR_MAX(uint64_t) + 2]; xsprintf(buf, "%" PRIu64 "\n", weight); - r = cg_set_attribute("cpu", u->cgroup_path, "cpu.weight", buf); - if (r < 0) - log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set cpu.weight: %m"); + (void) set_attribute_and_warn(u, "cpu", "cpu.weight", buf); +} + +static void cgroup_apply_unified_cpu_quota(Unit *u, usec_t quota) { + char buf[(DECIMAL_STR_MAX(usec_t) + 1) * 2 + 1]; if (quota != USEC_INFINITY) xsprintf(buf, USEC_FMT " " USEC_FMT "\n", quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC, CGROUP_CPU_QUOTA_PERIOD_USEC); else xsprintf(buf, "max " USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC); - - r = cg_set_attribute("cpu", u->cgroup_path, "cpu.max", buf); - - if (r < 0) - log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set cpu.max: %m"); + (void) set_attribute_and_warn(u, "cpu", "cpu.max", buf); } -static void cgroup_apply_legacy_cpu_config(Unit *u, uint64_t shares, uint64_t quota) { - char buf[MAX(DECIMAL_STR_MAX(uint64_t), DECIMAL_STR_MAX(usec_t)) + 1]; - int r; +static void cgroup_apply_legacy_cpu_shares(Unit *u, uint64_t shares) { + char buf[DECIMAL_STR_MAX(uint64_t) + 2]; xsprintf(buf, "%" PRIu64 "\n", shares); - r = cg_set_attribute("cpu", u->cgroup_path, "cpu.shares", buf); - if (r < 0) - log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set cpu.shares: %m"); + (void) set_attribute_and_warn(u, "cpu", "cpu.shares", buf); +} + +static void cgroup_apply_legacy_cpu_quota(Unit *u, usec_t quota) { + char buf[DECIMAL_STR_MAX(usec_t) + 2]; xsprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC); - r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_period_us", buf); - if (r < 0) - log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set cpu.cfs_period_us: %m"); + (void) set_attribute_and_warn(u, "cpu", "cpu.cfs_period_us", buf); if (quota != USEC_INFINITY) { xsprintf(buf, USEC_FMT "\n", quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC); - r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_quota_us", buf); + (void) set_attribute_and_warn(u, "cpu", "cpu.cfs_quota_us", buf); } else - r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_quota_us", "-1"); - if (r < 0) - log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set cpu.cfs_quota_us: %m"); + (void) set_attribute_and_warn(u, "cpu", "cpu.cfs_quota_us", "-1\n"); } static uint64_t cgroup_cpu_shares_to_weight(uint64_t shares) { @@ -546,6 +700,7 @@ static bool cgroup_context_has_io_config(CGroupContext *c) { c->io_weight != CGROUP_WEIGHT_INVALID || c->startup_io_weight != CGROUP_WEIGHT_INVALID || c->io_device_weights || + c->io_device_latencies || c->io_device_limits; } @@ -597,10 +752,7 @@ static void cgroup_apply_io_device_weight(Unit *u, const char *dev_path, uint64_ return; xsprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), io_weight); - r = cg_set_attribute("io", u->cgroup_path, "io.weight", buf); - if (r < 0) - log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set io.weight: %m"); + (void) set_attribute_and_warn(u, "io", "io.weight", buf); } static void cgroup_apply_blkio_device_weight(Unit *u, const char *dev_path, uint64_t blkio_weight) { @@ -613,10 +765,24 @@ static void cgroup_apply_blkio_device_weight(Unit *u, const char *dev_path, uint return; xsprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), blkio_weight); - r = cg_set_attribute("blkio", u->cgroup_path, "blkio.weight_device", buf); + (void) set_attribute_and_warn(u, "blkio", "blkio.weight_device", buf); +} + +static void cgroup_apply_io_device_latency(Unit *u, const char *dev_path, usec_t target) { + char buf[DECIMAL_STR_MAX(dev_t)*2+2+7+DECIMAL_STR_MAX(uint64_t)+1]; + dev_t dev; + int r; + + r = lookup_block_device(dev_path, &dev); if (r < 0) - log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set blkio.weight_device: %m"); + return; + + if (target != USEC_INFINITY) + xsprintf(buf, "%u:%u target=%" PRIu64 "\n", major(dev), minor(dev), target); + else + xsprintf(buf, "%u:%u target=max\n", major(dev), minor(dev)); + + (void) set_attribute_and_warn(u, "io", "io.latency", buf); } static void cgroup_apply_io_device_limit(Unit *u, const char *dev_path, uint64_t *limits) { @@ -639,10 +805,7 @@ static void cgroup_apply_io_device_limit(Unit *u, const char *dev_path, uint64_t xsprintf(buf, "%u:%u rbps=%s wbps=%s riops=%s wiops=%s\n", major(dev), minor(dev), limit_bufs[CGROUP_IO_RBPS_MAX], limit_bufs[CGROUP_IO_WBPS_MAX], limit_bufs[CGROUP_IO_RIOPS_MAX], limit_bufs[CGROUP_IO_WIOPS_MAX]); - r = cg_set_attribute("io", u->cgroup_path, "io.max", buf); - if (r < 0) - log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set io.max: %m"); + (void) set_attribute_and_warn(u, "io", "io.max", buf); } static void cgroup_apply_blkio_device_limit(Unit *u, const char *dev_path, uint64_t rbps, uint64_t wbps) { @@ -655,33 +818,23 @@ static void cgroup_apply_blkio_device_limit(Unit *u, const char *dev_path, uint6 return; sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), rbps); - r = cg_set_attribute("blkio", u->cgroup_path, "blkio.throttle.read_bps_device", buf); - if (r < 0) - log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set blkio.throttle.read_bps_device: %m"); + (void) set_attribute_and_warn(u, "blkio", "blkio.throttle.read_bps_device", buf); sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), wbps); - r = cg_set_attribute("blkio", u->cgroup_path, "blkio.throttle.write_bps_device", buf); - if (r < 0) - log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set blkio.throttle.write_bps_device: %m"); + (void) set_attribute_and_warn(u, "blkio", "blkio.throttle.write_bps_device", buf); } static bool cgroup_context_has_unified_memory_config(CGroupContext *c) { - return c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX || c->memory_swap_max != CGROUP_LIMIT_MAX; + return c->memory_min > 0 || c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX || c->memory_swap_max != CGROUP_LIMIT_MAX; } static void cgroup_apply_unified_memory_limit(Unit *u, const char *file, uint64_t v) { - char buf[DECIMAL_STR_MAX(uint64_t) + 1] = "max"; - int r; + char buf[DECIMAL_STR_MAX(uint64_t) + 1] = "max\n"; if (v != CGROUP_LIMIT_MAX) xsprintf(buf, "%" PRIu64 "\n", v); - r = cg_set_attribute("memory", u->cgroup_path, file, buf); - if (r < 0) - log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set %s: %m", file); + (void) set_attribute_and_warn(u, "memory", file, buf); } static void cgroup_apply_firewall(Unit *u) { @@ -698,130 +851,133 @@ static void cgroup_apply_firewall(Unit *u) { static void cgroup_context_apply( Unit *u, CGroupMask apply_mask, - bool apply_bpf, ManagerState state) { const char *path; CGroupContext *c; - bool is_root; + bool is_host_root, is_local_root; int r; assert(u); /* Nothing to do? Exit early! */ - if (apply_mask == 0 && !apply_bpf) + if (apply_mask == 0) return; - /* Some cgroup attributes are not supported on the root cgroup, hence silently ignore */ - is_root = unit_has_root_cgroup(u); + /* Some cgroup attributes are not supported on the host root cgroup, hence silently ignore them here. And other + * attributes should only be managed for cgroups further down the tree. */ + is_local_root = unit_has_name(u, SPECIAL_ROOT_SLICE); + is_host_root = unit_has_host_root_cgroup(u); assert_se(c = unit_get_cgroup_context(u)); assert_se(path = u->cgroup_path); - if (is_root) /* Make sure we don't try to display messages with an empty path. */ + if (is_local_root) /* Make sure we don't try to display messages with an empty path. */ path = "/"; - /* We generally ignore errors caused by read-only mounted - * cgroup trees (assuming we are running in a container then), - * and missing cgroups, i.e. EROFS and ENOENT. */ - - if ((apply_mask & CGROUP_MASK_CPU) && !is_root) { - bool has_weight, has_shares; + /* We generally ignore errors caused by read-only mounted cgroup trees (assuming we are running in a container + * then), and missing cgroups, i.e. EROFS and ENOENT. */ - has_weight = cgroup_context_has_cpu_weight(c); - has_shares = cgroup_context_has_cpu_shares(c); + /* In fully unified mode these attributes don't exist on the host cgroup root. On legacy the weights exist, but + * setting the weight makes very little sense on the host root cgroup, as there are no other cgroups at this + * level. The quota exists there too, but any attempt to write to it is refused with EINVAL. Inside of + * containers we want to leave control of these to the container manager (and if cgroupsv2 delegation is used + * we couldn't even write to them if we wanted to). */ + if ((apply_mask & CGROUP_MASK_CPU) && !is_local_root) { if (cg_all_unified() > 0) { uint64_t weight; - if (has_weight) + if (cgroup_context_has_cpu_weight(c)) weight = cgroup_context_cpu_weight(c, state); - else if (has_shares) { - uint64_t shares = cgroup_context_cpu_shares(c, state); + else if (cgroup_context_has_cpu_shares(c)) { + uint64_t shares; + shares = cgroup_context_cpu_shares(c, state); weight = cgroup_cpu_shares_to_weight(shares); - log_cgroup_compat(u, "Applying [Startup]CpuShares %" PRIu64 " as [Startup]CpuWeight %" PRIu64 " on %s", + log_cgroup_compat(u, "Applying [Startup]CPUShares=%" PRIu64 " as [Startup]CPUWeight=%" PRIu64 " on %s", shares, weight, path); } else weight = CGROUP_WEIGHT_DEFAULT; - cgroup_apply_unified_cpu_config(u, weight, c->cpu_quota_per_sec_usec); + cgroup_apply_unified_cpu_weight(u, weight); + cgroup_apply_unified_cpu_quota(u, c->cpu_quota_per_sec_usec); + } else { uint64_t shares; - if (has_weight) { - uint64_t weight = cgroup_context_cpu_weight(c, state); + if (cgroup_context_has_cpu_weight(c)) { + uint64_t weight; + weight = cgroup_context_cpu_weight(c, state); shares = cgroup_cpu_weight_to_shares(weight); - log_cgroup_compat(u, "Applying [Startup]CpuWeight %" PRIu64 " as [Startup]CpuShares %" PRIu64 " on %s", + log_cgroup_compat(u, "Applying [Startup]CPUWeight=%" PRIu64 " as [Startup]CPUShares=%" PRIu64 " on %s", weight, shares, path); - } else if (has_shares) + } else if (cgroup_context_has_cpu_shares(c)) shares = cgroup_context_cpu_shares(c, state); else shares = CGROUP_CPU_SHARES_DEFAULT; - cgroup_apply_legacy_cpu_config(u, shares, c->cpu_quota_per_sec_usec); + cgroup_apply_legacy_cpu_shares(u, shares); + cgroup_apply_legacy_cpu_quota(u, c->cpu_quota_per_sec_usec); } } - if (apply_mask & CGROUP_MASK_IO) { - bool has_io = cgroup_context_has_io_config(c); - bool has_blockio = cgroup_context_has_blockio_config(c); + /* The 'io' controller attributes are not exported on the host's root cgroup (being a pure cgroupsv2 + * controller), and in case of containers we want to leave control of these attributes to the container manager + * (and we couldn't access that stuff anyway, even if we tried if proper delegation is used). */ + if ((apply_mask & CGROUP_MASK_IO) && !is_local_root) { + char buf[8+DECIMAL_STR_MAX(uint64_t)+1]; + bool has_io, has_blockio; + uint64_t weight; - if (!is_root) { - char buf[8+DECIMAL_STR_MAX(uint64_t)+1]; - uint64_t weight; + has_io = cgroup_context_has_io_config(c); + has_blockio = cgroup_context_has_blockio_config(c); - if (has_io) - weight = cgroup_context_io_weight(c, state); - else if (has_blockio) { - uint64_t blkio_weight = cgroup_context_blkio_weight(c, state); + if (has_io) + weight = cgroup_context_io_weight(c, state); + else if (has_blockio) { + uint64_t blkio_weight; - weight = cgroup_weight_blkio_to_io(blkio_weight); + blkio_weight = cgroup_context_blkio_weight(c, state); + weight = cgroup_weight_blkio_to_io(blkio_weight); - log_cgroup_compat(u, "Applying [Startup]BlockIOWeight %" PRIu64 " as [Startup]IOWeight %" PRIu64, - blkio_weight, weight); - } else - weight = CGROUP_WEIGHT_DEFAULT; + log_cgroup_compat(u, "Applying [Startup]BlockIOWeight=%" PRIu64 " as [Startup]IOWeight=%" PRIu64, + blkio_weight, weight); + } else + weight = CGROUP_WEIGHT_DEFAULT; - xsprintf(buf, "default %" PRIu64 "\n", weight); - r = cg_set_attribute("io", path, "io.weight", buf); - if (r < 0) - log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set io.weight: %m"); + xsprintf(buf, "default %" PRIu64 "\n", weight); + (void) set_attribute_and_warn(u, "io", "io.weight", buf); - if (has_io) { - CGroupIODeviceWeight *w; + if (has_io) { + CGroupIODeviceLatency *latency; + CGroupIODeviceLimit *limit; + CGroupIODeviceWeight *w; - /* FIXME: no way to reset this list */ - LIST_FOREACH(device_weights, w, c->io_device_weights) - cgroup_apply_io_device_weight(u, w->path, w->weight); - } else if (has_blockio) { - CGroupBlockIODeviceWeight *w; + LIST_FOREACH(device_weights, w, c->io_device_weights) + cgroup_apply_io_device_weight(u, w->path, w->weight); - /* FIXME: no way to reset this list */ - LIST_FOREACH(device_weights, w, c->blockio_device_weights) { - weight = cgroup_weight_blkio_to_io(w->weight); + LIST_FOREACH(device_limits, limit, c->io_device_limits) + cgroup_apply_io_device_limit(u, limit->path, limit->limits); - log_cgroup_compat(u, "Applying BlockIODeviceWeight %" PRIu64 " as IODeviceWeight %" PRIu64 " for %s", - w->weight, weight, w->path); + LIST_FOREACH(device_latencies, latency, c->io_device_latencies) + cgroup_apply_io_device_latency(u, latency->path, latency->target_usec); - cgroup_apply_io_device_weight(u, w->path, weight); - } - } - } + } else if (has_blockio) { + CGroupBlockIODeviceWeight *w; + CGroupBlockIODeviceBandwidth *b; - /* Apply limits and free ones without config. */ - if (has_io) { - CGroupIODeviceLimit *l; + LIST_FOREACH(device_weights, w, c->blockio_device_weights) { + weight = cgroup_weight_blkio_to_io(w->weight); - LIST_FOREACH(device_limits, l, c->io_device_limits) - cgroup_apply_io_device_limit(u, l->path, l->limits); + log_cgroup_compat(u, "Applying BlockIODeviceWeight=%" PRIu64 " as IODeviceWeight=%" PRIu64 " for %s", + w->weight, weight, w->path); - } else if (has_blockio) { - CGroupBlockIODeviceBandwidth *b; + cgroup_apply_io_device_weight(u, w->path, weight); + } LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) { uint64_t limits[_CGROUP_IO_LIMIT_TYPE_MAX]; @@ -833,7 +989,7 @@ static void cgroup_context_apply( limits[CGROUP_IO_RBPS_MAX] = b->rbps; limits[CGROUP_IO_WBPS_MAX] = b->wbps; - log_cgroup_compat(u, "Applying BlockIO{Read|Write}Bandwidth %" PRIu64 " %" PRIu64 " as IO{Read|Write}BandwidthMax for %s", + log_cgroup_compat(u, "Applying BlockIO{Read|Write}Bandwidth=%" PRIu64 " %" PRIu64 " as IO{Read|Write}BandwidthMax= for %s", b->rbps, b->wbps, b->path); cgroup_apply_io_device_limit(u, b->path, limits); @@ -842,19 +998,24 @@ static void cgroup_context_apply( } if (apply_mask & CGROUP_MASK_BLKIO) { - bool has_io = cgroup_context_has_io_config(c); - bool has_blockio = cgroup_context_has_blockio_config(c); + bool has_io, has_blockio; - if (!is_root) { + has_io = cgroup_context_has_io_config(c); + has_blockio = cgroup_context_has_blockio_config(c); + + /* Applying a 'weight' never makes sense for the host root cgroup, and for containers this should be + * left to our container manager, too. */ + if (!is_local_root) { char buf[DECIMAL_STR_MAX(uint64_t)+1]; uint64_t weight; if (has_io) { - uint64_t io_weight = cgroup_context_io_weight(c, state); + uint64_t io_weight; + io_weight = cgroup_context_io_weight(c, state); weight = cgroup_weight_io_to_blkio(cgroup_context_io_weight(c, state)); - log_cgroup_compat(u, "Applying [Startup]IOWeight %" PRIu64 " as [Startup]BlockIOWeight %" PRIu64, + log_cgroup_compat(u, "Applying [Startup]IOWeight=%" PRIu64 " as [Startup]BlockIOWeight=%" PRIu64, io_weight, weight); } else if (has_blockio) weight = cgroup_context_blkio_weight(c, state); @@ -862,19 +1023,15 @@ static void cgroup_context_apply( weight = CGROUP_BLKIO_WEIGHT_DEFAULT; xsprintf(buf, "%" PRIu64 "\n", weight); - r = cg_set_attribute("blkio", path, "blkio.weight", buf); - if (r < 0) - log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set blkio.weight: %m"); + (void) set_attribute_and_warn(u, "blkio", "blkio.weight", buf); if (has_io) { CGroupIODeviceWeight *w; - /* FIXME: no way to reset this list */ LIST_FOREACH(device_weights, w, c->io_device_weights) { weight = cgroup_weight_io_to_blkio(w->weight); - log_cgroup_compat(u, "Applying IODeviceWeight %" PRIu64 " as BlockIODeviceWeight %" PRIu64 " for %s", + log_cgroup_compat(u, "Applying IODeviceWeight=%" PRIu64 " as BlockIODeviceWeight=%" PRIu64 " for %s", w->weight, weight, w->path); cgroup_apply_blkio_device_weight(u, w->path, weight); @@ -882,31 +1039,38 @@ static void cgroup_context_apply( } else if (has_blockio) { CGroupBlockIODeviceWeight *w; - /* FIXME: no way to reset this list */ LIST_FOREACH(device_weights, w, c->blockio_device_weights) cgroup_apply_blkio_device_weight(u, w->path, w->weight); } } - /* Apply limits and free ones without config. */ - if (has_io) { - CGroupIODeviceLimit *l; + /* The bandwith limits are something that make sense to be applied to the host's root but not container + * roots, as there we want the container manager to handle it */ + if (is_host_root || !is_local_root) { + if (has_io) { + CGroupIODeviceLimit *l; - LIST_FOREACH(device_limits, l, c->io_device_limits) { - log_cgroup_compat(u, "Applying IO{Read|Write}Bandwidth %" PRIu64 " %" PRIu64 " as BlockIO{Read|Write}BandwidthMax for %s", - l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX], l->path); + LIST_FOREACH(device_limits, l, c->io_device_limits) { + log_cgroup_compat(u, "Applying IO{Read|Write}Bandwidth=%" PRIu64 " %" PRIu64 " as BlockIO{Read|Write}BandwidthMax= for %s", + l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX], l->path); - cgroup_apply_blkio_device_limit(u, l->path, l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX]); - } - } else if (has_blockio) { - CGroupBlockIODeviceBandwidth *b; + cgroup_apply_blkio_device_limit(u, l->path, l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX]); + } + } else if (has_blockio) { + CGroupBlockIODeviceBandwidth *b; - LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) - cgroup_apply_blkio_device_limit(u, b->path, b->rbps, b->wbps); + LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) + cgroup_apply_blkio_device_limit(u, b->path, b->rbps, b->wbps); + } } } - if ((apply_mask & CGROUP_MASK_MEMORY) && !is_root) { + /* In unified mode 'memory' attributes do not exist on the root cgroup. In legacy mode 'memory.limit_in_bytes' + * exists on the root cgroup, but any writes to it are refused with EINVAL. And if we run in a container we + * want to leave control to the container manager (and if proper cgroupsv2 delegation is used we couldn't even + * write to this if we wanted to.) */ + if ((apply_mask & CGROUP_MASK_MEMORY) && !is_local_root) { + if (cg_all_unified() > 0) { uint64_t max, swap_max = CGROUP_LIMIT_MAX; @@ -917,20 +1081,22 @@ static void cgroup_context_apply( max = c->memory_limit; if (max != CGROUP_LIMIT_MAX) - log_cgroup_compat(u, "Applying MemoryLimit %" PRIu64 " as MemoryMax", max); + log_cgroup_compat(u, "Applying MemoryLimit=%" PRIu64 " as MemoryMax=", max); } + cgroup_apply_unified_memory_limit(u, "memory.min", c->memory_min); cgroup_apply_unified_memory_limit(u, "memory.low", c->memory_low); cgroup_apply_unified_memory_limit(u, "memory.high", c->memory_high); cgroup_apply_unified_memory_limit(u, "memory.max", max); cgroup_apply_unified_memory_limit(u, "memory.swap.max", swap_max); + } else { char buf[DECIMAL_STR_MAX(uint64_t) + 1]; uint64_t val; if (cgroup_context_has_unified_memory_config(c)) { val = c->memory_max; - log_cgroup_compat(u, "Applying MemoryMax %" PRIi64 " as MemoryLimit", val); + log_cgroup_compat(u, "Applying MemoryMax=%" PRIi64 " as MemoryLimit=", val); } else val = c->memory_limit; @@ -939,27 +1105,33 @@ static void cgroup_context_apply( else xsprintf(buf, "%" PRIu64 "\n", val); - r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf); - if (r < 0) - log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set memory.limit_in_bytes: %m"); + (void) set_attribute_and_warn(u, "memory", "memory.limit_in_bytes", buf); } } - if ((apply_mask & CGROUP_MASK_DEVICES) && !is_root) { + /* On cgroupsv2 we can apply BPF everywhere. On cgroupsv1 we apply it everywhere except for the root of + * containers, where we leave this to the manager */ + if ((apply_mask & (CGROUP_MASK_DEVICES | CGROUP_MASK_BPF_DEVICES)) && + (is_host_root || cg_all_unified() > 0 || !is_local_root)) { + _cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL; CGroupDeviceAllow *a; - /* Changing the devices list of a populated cgroup - * might result in EINVAL, hence ignore EINVAL - * here. */ + if (cg_all_unified() > 0) { + r = cgroup_init_device_bpf(&prog, c->device_policy, c->device_allow); + if (r < 0) + log_unit_warning_errno(u, r, "Failed to initialize device control bpf program: %m"); + } else { + /* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore EINVAL + * here. */ - if (c->device_allow || c->device_policy != CGROUP_AUTO) - r = cg_set_attribute("devices", path, "devices.deny", "a"); - else - r = cg_set_attribute("devices", path, "devices.allow", "a"); - if (r < 0) - log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to reset devices.list: %m"); + if (c->device_allow || c->device_policy != CGROUP_AUTO) + r = cg_set_attribute("devices", path, "devices.deny", "a"); + else + r = cg_set_attribute("devices", path, "devices.allow", "a"); + if (r < 0) + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to reset devices.allow/devices.deny: %m"); + } if (c->device_policy == CGROUP_CLOSED || (c->device_policy == CGROUP_AUTO && c->device_allow)) { @@ -972,16 +1144,16 @@ static void cgroup_context_apply( "/dev/tty\0" "rwm\0" "/dev/ptmx\0" "rwm\0" /* Allow /run/systemd/inaccessible/{chr,blk} devices for mapping InaccessiblePaths */ - "-/run/systemd/inaccessible/chr\0" "rwm\0" - "-/run/systemd/inaccessible/blk\0" "rwm\0"; + "/run/systemd/inaccessible/chr\0" "rwm\0" + "/run/systemd/inaccessible/blk\0" "rwm\0"; const char *x, *y; NULSTR_FOREACH_PAIR(x, y, auto_devices) - whitelist_device(path, x, y); + (void) whitelist_device(prog, path, x, y); /* PTS (/dev/pts) devices may not be duplicated, but accessed */ - whitelist_major(path, "pts", 'c', "rw"); + (void) whitelist_major(prog, path, "pts", 'c', "rw"); } LIST_FOREACH(device_allow, a, c->device_allow) { @@ -1001,19 +1173,31 @@ static void cgroup_context_apply( acc[k++] = 0; if (path_startswith(a->path, "/dev/")) - whitelist_device(path, a->path, acc); + (void) whitelist_device(prog, path, a->path, acc); else if ((val = startswith(a->path, "block-"))) - whitelist_major(path, val, 'b', acc); + (void) whitelist_major(prog, path, val, 'b', acc); else if ((val = startswith(a->path, "char-"))) - whitelist_major(path, val, 'c', acc); + (void) whitelist_major(prog, path, val, 'c', acc); else - log_unit_debug(u, "Ignoring device %s while writing cgroup attribute.", a->path); + log_unit_debug(u, "Ignoring device '%s' while writing cgroup attribute.", a->path); + } + + r = cgroup_apply_device_bpf(u, prog, c->device_policy, c->device_allow); + if (r < 0) { + static bool warned = false; + + log_full_errno(warned ? LOG_DEBUG : LOG_WARNING, r, + "Unit %s configures device ACL, but the local system doesn't seem to support the BPF-based device controller.\n" + "Proceeding WITHOUT applying ACL (all devices will be accessible)!\n" + "(This warning is only shown for the first loaded unit using device ACL.)", u->id); + + warned = true; } } if (apply_mask & CGROUP_MASK_PIDS) { - if (is_root) { + if (is_host_root) { /* So, the "pids" controller does not expose anything on the root cgroup, in order not to * replicate knobs exposed elsewhere needlessly. We abstract this away here however, and when * the knobs of the root cgroup are modified propagate this to the relevant sysctls. There's a @@ -1034,39 +1218,68 @@ static void cgroup_context_apply( r = procfs_tasks_set_limit(TASKS_MAX); else r = 0; - if (r < 0) - log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + log_unit_full(u, LOG_LEVEL_CGROUP_WRITE(r), r, "Failed to write to tasks limit sysctls: %m"); + } - } else { + /* The attribute itself is not available on the host root cgroup, and in the container case we want to + * leave it for the container manager. */ + if (!is_local_root) { if (c->tasks_max != CGROUP_LIMIT_MAX) { char buf[DECIMAL_STR_MAX(uint64_t) + 2]; sprintf(buf, "%" PRIu64 "\n", c->tasks_max); - r = cg_set_attribute("pids", path, "pids.max", buf); + (void) set_attribute_and_warn(u, "pids", "pids.max", buf); } else - r = cg_set_attribute("pids", path, "pids.max", "max"); - if (r < 0) - log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set pids.max: %m"); + (void) set_attribute_and_warn(u, "pids", "pids.max", "max\n"); } } - if (apply_bpf) + if (apply_mask & CGROUP_MASK_BPF_FIREWALL) cgroup_apply_firewall(u); } -CGroupMask cgroup_context_get_mask(CGroupContext *c) { +static bool unit_get_needs_bpf_firewall(Unit *u) { + CGroupContext *c; + Unit *p; + assert(u); + + c = unit_get_cgroup_context(u); + if (!c) + return false; + + if (c->ip_accounting || + c->ip_address_allow || + c->ip_address_deny) + return true; + + /* If any parent slice has an IP access list defined, it applies too */ + for (p = UNIT_DEREF(u->slice); p; p = UNIT_DEREF(p->slice)) { + c = unit_get_cgroup_context(p); + if (!c) + return false; + + if (c->ip_address_allow || + c->ip_address_deny) + return true; + } + + return false; +} + +static CGroupMask cgroup_context_get_mask(CGroupContext *c) { CGroupMask mask = 0; - /* Figure out which controllers we need */ + /* Figure out which controllers we need, based on the cgroup context object */ + + if (c->cpu_accounting) + mask |= get_cpu_accounting_mask(); - if (c->cpu_accounting || - cgroup_context_has_cpu_weight(c) || + if (cgroup_context_has_cpu_weight(c) || cgroup_context_has_cpu_shares(c) || c->cpu_quota_per_sec_usec != USEC_INFINITY) - mask |= CGROUP_MASK_CPUACCT | CGROUP_MASK_CPU; + mask |= CGROUP_MASK_CPU; if (cgroup_context_has_io_config(c) || cgroup_context_has_blockio_config(c)) mask |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO; @@ -1078,25 +1291,41 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) { if (c->device_allow || c->device_policy != CGROUP_AUTO) - mask |= CGROUP_MASK_DEVICES; + mask |= CGROUP_MASK_DEVICES | CGROUP_MASK_BPF_DEVICES; if (c->tasks_accounting || c->tasks_max != CGROUP_LIMIT_MAX) mask |= CGROUP_MASK_PIDS; + return CGROUP_MASK_EXTEND_JOINED(mask); +} + +static CGroupMask unit_get_bpf_mask(Unit *u) { + CGroupMask mask = 0; + + /* Figure out which controllers we need, based on the cgroup context, possibly taking into account children + * too. */ + + if (unit_get_needs_bpf_firewall(u)) + mask |= CGROUP_MASK_BPF_FIREWALL; + return mask; } CGroupMask unit_get_own_mask(Unit *u) { CGroupContext *c; - /* Returns the mask of controllers the unit needs for itself */ + /* Returns the mask of controllers the unit needs for itself. If a unit is not properly loaded, return an empty + * mask, as we shouldn't reflect it in the cgroup hierarchy then. */ + + if (u->load_state != UNIT_LOADED) + return 0; c = unit_get_cgroup_context(u); if (!c) return 0; - return cgroup_context_get_mask(c) | unit_get_delegate_mask(u); + return (cgroup_context_get_mask(c) | unit_get_bpf_mask(u) | unit_get_delegate_mask(u)) & ~unit_get_ancestor_disable_mask(u); } CGroupMask unit_get_delegate_mask(Unit *u) { @@ -1119,7 +1348,7 @@ CGroupMask unit_get_delegate_mask(Unit *u) { } assert_se(c = unit_get_cgroup_context(u)); - return c->delegate_controllers; + return CGROUP_MASK_EXTEND_JOINED(c->delegate_controllers); } CGroupMask unit_get_members_mask(Unit *u) { @@ -1128,7 +1357,7 @@ CGroupMask unit_get_members_mask(Unit *u) { /* Returns the mask of controllers all of the unit's children require, merged */ if (u->cgroup_members_mask_valid) - return u->cgroup_members_mask; + return u->cgroup_members_mask; /* Use cached value if possible */ u->cgroup_members_mask = 0; @@ -1138,14 +1367,8 @@ CGroupMask unit_get_members_mask(Unit *u) { Iterator i; HASHMAP_FOREACH_KEY(v, member, u->dependencies[UNIT_BEFORE], i) { - - if (member == u) - continue; - - if (UNIT_DEREF(member->slice) != u) - continue; - - u->cgroup_members_mask |= unit_get_subtree_mask(member); /* note that this calls ourselves again, for the children */ + if (UNIT_DEREF(member->slice) == u) + u->cgroup_members_mask |= unit_get_subtree_mask(member); /* note that this calls ourselves again, for the children */ } } @@ -1166,6 +1389,31 @@ CGroupMask unit_get_siblings_mask(Unit *u) { return unit_get_subtree_mask(u); /* we are the top-level slice */ } +CGroupMask unit_get_disable_mask(Unit *u) { + CGroupContext *c; + + c = unit_get_cgroup_context(u); + if (!c) + return 0; + + return c->disable_controllers; +} + +CGroupMask unit_get_ancestor_disable_mask(Unit *u) { + CGroupMask mask; + + assert(u); + mask = unit_get_disable_mask(u); + + /* Returns the mask of controllers which are marked as forcibly + * disabled in any ancestor unit or the unit in question. */ + + if (UNIT_ISSET(u->slice)) + mask |= unit_get_ancestor_disable_mask(UNIT_DEREF(u->slice)); + + return mask; +} + CGroupMask unit_get_subtree_mask(Unit *u) { /* Returns the mask of this subtree, meaning of the group @@ -1186,6 +1434,7 @@ CGroupMask unit_get_target_mask(Unit *u) { mask = unit_get_own_mask(u) | unit_get_members_mask(u) | unit_get_siblings_mask(u); mask &= u->manager->cgroup_supported; + mask &= ~unit_get_ancestor_disable_mask(u); return mask; } @@ -1200,85 +1449,19 @@ CGroupMask unit_get_enable_mask(Unit *u) { mask = unit_get_members_mask(u); mask &= u->manager->cgroup_supported; + mask &= ~unit_get_ancestor_disable_mask(u); return mask; } -bool unit_get_needs_bpf(Unit *u) { - CGroupContext *c; - Unit *p; +void unit_invalidate_cgroup_members_masks(Unit *u) { assert(u); - c = unit_get_cgroup_context(u); - if (!c) - return false; + /* Recurse invalidate the member masks cache all the way up the tree */ + u->cgroup_members_mask_valid = false; - if (c->ip_accounting || - c->ip_address_allow || - c->ip_address_deny) - return true; - - /* If any parent slice has an IP access list defined, it applies too */ - for (p = UNIT_DEREF(u->slice); p; p = UNIT_DEREF(p->slice)) { - c = unit_get_cgroup_context(p); - if (!c) - return false; - - if (c->ip_address_allow || - c->ip_address_deny) - return true; - } - - return false; -} - -/* Recurse from a unit up through its containing slices, propagating - * mask bits upward. A unit is also member of itself. */ -void unit_update_cgroup_members_masks(Unit *u) { - CGroupMask m; - bool more; - - assert(u); - - /* Calculate subtree mask */ - m = unit_get_subtree_mask(u); - - /* See if anything changed from the previous invocation. If - * not, we're done. */ - if (u->cgroup_subtree_mask_valid && m == u->cgroup_subtree_mask) - return; - - more = - u->cgroup_subtree_mask_valid && - ((m & ~u->cgroup_subtree_mask) != 0) && - ((~m & u->cgroup_subtree_mask) == 0); - - u->cgroup_subtree_mask = m; - u->cgroup_subtree_mask_valid = true; - - if (UNIT_ISSET(u->slice)) { - Unit *s = UNIT_DEREF(u->slice); - - if (more) - /* There's more set now than before. We - * propagate the new mask to the parent's mask - * (not caring if it actually was valid or - * not). */ - - s->cgroup_members_mask |= m; - - else - /* There's less set now than before (or we - * don't know), we need to recalculate - * everything, so let's invalidate the - * parent's members mask */ - - s->cgroup_members_mask_valid = false; - - /* And now make sure that this change also hits our - * grandparents */ - unit_update_cgroup_members_masks(s); - } + if (UNIT_ISSET(u->slice)) + unit_invalidate_cgroup_members_masks(UNIT_DEREF(u->slice)); } const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask) { @@ -1302,7 +1485,7 @@ static const char *migrate_callback(CGroupMask mask, void *userdata) { return unit_get_realized_cgroup_path(userdata, mask); } -char *unit_default_cgroup_path(Unit *u) { +char *unit_default_cgroup_path(const Unit *u) { _cleanup_free_ char *escaped = NULL, *slice = NULL; int r; @@ -1435,16 +1618,14 @@ static int unit_create_cgroup( Unit *u, CGroupMask target_mask, CGroupMask enable_mask, - bool needs_bpf) { + ManagerState state) { - CGroupContext *c; - int r; bool created; + int r; assert(u); - c = unit_get_cgroup_context(u); - if (!c) + if (!UNIT_HAS_CGROUP_CONTEXT(u)) return 0; /* Figure out our cgroup path */ @@ -1456,26 +1637,44 @@ static int unit_create_cgroup( r = cg_create_everywhere(u->manager->cgroup_supported, target_mask, u->cgroup_path); if (r < 0) return log_unit_error_errno(u, r, "Failed to create cgroup %s: %m", u->cgroup_path); - created = !!r; + created = r; /* Start watching it */ (void) unit_watch_cgroup(u); /* Preserve enabled controllers in delegated units, adjust others. */ - if (created || !unit_cgroup_delegate(u)) { + if (created || !u->cgroup_realized || !unit_cgroup_delegate(u)) { + CGroupMask result_mask = 0; /* Enable all controllers we need */ - r = cg_enable_everywhere(u->manager->cgroup_supported, enable_mask, u->cgroup_path); + r = cg_enable_everywhere(u->manager->cgroup_supported, enable_mask, u->cgroup_path, &result_mask); if (r < 0) - log_unit_warning_errno(u, r, "Failed to enable controllers on cgroup %s, ignoring: %m", - u->cgroup_path); + log_unit_warning_errno(u, r, "Failed to enable/disable controllers on cgroup %s, ignoring: %m", u->cgroup_path); + + /* If we just turned off a controller, this might release the controller for our parent too, let's + * enqueue the parent for re-realization in that case again. */ + if (UNIT_ISSET(u->slice)) { + CGroupMask turned_off; + + turned_off = (u->cgroup_realized ? u->cgroup_enabled_mask & ~result_mask : 0); + if (turned_off != 0) { + Unit *parent; + + /* Force the parent to propagate the enable mask to the kernel again, by invalidating + * the controller we just turned off. */ + + for (parent = UNIT_DEREF(u->slice); parent; parent = UNIT_DEREF(parent->slice)) + unit_invalidate_cgroup(parent, turned_off); + } + } + + /* Remember what's actually enabled now */ + u->cgroup_enabled_mask = result_mask; } /* Keep track that this is now realized */ u->cgroup_realized = true; u->cgroup_realized_mask = target_mask; - u->cgroup_enabled_mask = enable_mask; - u->cgroup_bpf_state = needs_bpf ? UNIT_CGROUP_BPF_ON : UNIT_CGROUP_BPF_OFF; if (u->type != UNIT_SLICE && !unit_cgroup_delegate(u)) { @@ -1487,6 +1686,10 @@ static int unit_create_cgroup( log_unit_warning_errno(u, r, "Failed to migrate cgroup from to %s, ignoring: %m", u->cgroup_path); } + /* Set attributes */ + cgroup_context_apply(u, target_mask, state); + cgroup_xattr_apply(u); + return 0; } @@ -1628,42 +1831,69 @@ int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path) { return r; } -static void cgroup_xattr_apply(Unit *u) { - char ids[SD_ID128_STRING_MAX]; - int r; +static bool unit_has_mask_realized( + Unit *u, + CGroupMask target_mask, + CGroupMask enable_mask) { assert(u); - if (!MANAGER_IS_SYSTEM(u->manager)) - return; + /* Returns true if this unit is fully realized. We check four things: + * + * 1. Whether the cgroup was created at all + * 2. Whether the cgroup was created in all the hierarchies we need it to be created in (in case of cgroupsv1) + * 3. Whether the cgroup has all the right controllers enabled (in case of cgroupsv2) + * 4. Whether the invalidation mask is currently zero + * + * If you wonder why we mask the target realization and enable mask with CGROUP_MASK_V1/CGROUP_MASK_V2: note + * that there are three sets of bitmasks: CGROUP_MASK_V1 (for real cgroupv1 controllers), CGROUP_MASK_V2 (for + * real cgroupv2 controllers) and CGROUP_MASK_BPF (for BPF-based pseudo-controllers). Now, cgroup_realized_mask + * is only matters for cgroupsv1 controllers, and cgroup_enabled_mask only used for cgroupsv2, and if they + * differ in the others, we don't really care. (After all, the cgroup_enabled_mask tracks with controllers are + * enabled through cgroup.subtree_control, and since the BPF pseudo-controllers don't show up there, they + * simply don't matter. */ - if (sd_id128_is_null(u->invocation_id)) - return; + return u->cgroup_realized && + ((u->cgroup_realized_mask ^ target_mask) & CGROUP_MASK_V1) == 0 && + ((u->cgroup_enabled_mask ^ enable_mask) & CGROUP_MASK_V2) == 0 && + u->cgroup_invalidated_mask == 0; +} - r = cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, - "trusted.invocation_id", - sd_id128_to_string(u->invocation_id, ids), 32, - 0); - if (r < 0) - log_unit_debug_errno(u, r, "Failed to set invocation ID on control group %s, ignoring: %m", u->cgroup_path); +static bool unit_has_mask_disables_realized( + Unit *u, + CGroupMask target_mask, + CGroupMask enable_mask) { + + assert(u); + + /* Returns true if all controllers which should be disabled are indeed disabled. + * + * Unlike unit_has_mask_realized, we don't care what was enabled, only that anything we want to remove is + * already removed. */ + + return !u->cgroup_realized || + (FLAGS_SET(u->cgroup_realized_mask, target_mask & CGROUP_MASK_V1) && + FLAGS_SET(u->cgroup_enabled_mask, enable_mask & CGROUP_MASK_V2)); } -static bool unit_has_mask_realized( +static bool unit_has_mask_enables_realized( Unit *u, CGroupMask target_mask, - CGroupMask enable_mask, - bool needs_bpf) { + CGroupMask enable_mask) { assert(u); + /* Returns true if all controllers which should be enabled are indeed enabled. + * + * Unlike unit_has_mask_realized, we don't care about the controllers that are not present, only that anything + * we want to add is already added. */ + return u->cgroup_realized && - u->cgroup_realized_mask == target_mask && - u->cgroup_enabled_mask == enable_mask && - ((needs_bpf && u->cgroup_bpf_state == UNIT_CGROUP_BPF_ON) || - (!needs_bpf && u->cgroup_bpf_state == UNIT_CGROUP_BPF_OFF)); + ((u->cgroup_realized_mask | target_mask) & CGROUP_MASK_V1) == (u->cgroup_realized_mask & CGROUP_MASK_V1) && + ((u->cgroup_enabled_mask | enable_mask) & CGROUP_MASK_V2) == (u->cgroup_enabled_mask & CGROUP_MASK_V2); } -static void unit_add_to_cgroup_realize_queue(Unit *u) { +void unit_add_to_cgroup_realize_queue(Unit *u) { assert(u); if (u->in_cgroup_realize_queue) @@ -1683,15 +1913,131 @@ static void unit_remove_from_cgroup_realize_queue(Unit *u) { u->in_cgroup_realize_queue = false; } +/* Controllers can only be enabled breadth-first, from the root of the + * hierarchy downwards to the unit in question. */ +static int unit_realize_cgroup_now_enable(Unit *u, ManagerState state) { + CGroupMask target_mask, enable_mask, new_target_mask, new_enable_mask; + int r; + + assert(u); + + /* First go deal with this unit's parent, or we won't be able to enable + * any new controllers at this layer. */ + if (UNIT_ISSET(u->slice)) { + r = unit_realize_cgroup_now_enable(UNIT_DEREF(u->slice), state); + if (r < 0) + return r; + } + + target_mask = unit_get_target_mask(u); + enable_mask = unit_get_enable_mask(u); + + /* We can only enable in this direction, don't try to disable anything. + */ + if (unit_has_mask_enables_realized(u, target_mask, enable_mask)) + return 0; + + new_target_mask = u->cgroup_realized_mask | target_mask; + new_enable_mask = u->cgroup_enabled_mask | enable_mask; + + return unit_create_cgroup(u, new_target_mask, new_enable_mask, state); +} + +/* Controllers can only be disabled depth-first, from the leaves of the + * hierarchy upwards to the unit in question. */ +static int unit_realize_cgroup_now_disable(Unit *u, ManagerState state) { + Iterator i; + Unit *m; + void *v; + + assert(u); + + if (u->type != UNIT_SLICE) + return 0; + + HASHMAP_FOREACH_KEY(v, m, u->dependencies[UNIT_BEFORE], i) { + CGroupMask target_mask, enable_mask, new_target_mask, new_enable_mask; + int r; + + if (UNIT_DEREF(m->slice) != u) + continue; + + /* The cgroup for this unit might not actually be fully + * realised yet, in which case it isn't holding any controllers + * open anyway. */ + if (!m->cgroup_path) + continue; + + /* We must disable those below us first in order to release the + * controller. */ + if (m->type == UNIT_SLICE) + (void) unit_realize_cgroup_now_disable(m, state); + + target_mask = unit_get_target_mask(m); + enable_mask = unit_get_enable_mask(m); + + /* We can only disable in this direction, don't try to enable + * anything. */ + if (unit_has_mask_disables_realized(m, target_mask, enable_mask)) + continue; + + new_target_mask = m->cgroup_realized_mask & target_mask; + new_enable_mask = m->cgroup_enabled_mask & enable_mask; + + r = unit_create_cgroup(m, new_target_mask, new_enable_mask, state); + if (r < 0) + return r; + } + + return 0; +} + /* Check if necessary controllers and attributes for a unit are in place. * - * If so, do nothing. - * If not, create paths, move processes over, and set attributes. + * - If so, do nothing. + * - If not, create paths, move processes over, and set attributes. + * + * Controllers can only be *enabled* in a breadth-first way, and *disabled* in + * a depth-first way. As such the process looks like this: + * + * Suppose we have a cgroup hierarchy which looks like this: + * + * root + * / \ + * / \ + * / \ + * a b + * / \ / \ + * / \ / \ + * c d e f + * / \ / \ / \ / \ + * h i j k l m n o + * + * 1. We want to realise cgroup "d" now. + * 2. cgroup "a" has DisableControllers=cpu in the associated unit. + * 3. cgroup "k" just started requesting the memory controller. + * + * To make this work we must do the following in order: + * + * 1. Disable CPU controller in k, j + * 2. Disable CPU controller in d + * 3. Enable memory controller in root + * 4. Enable memory controller in a + * 5. Enable memory controller in d + * 6. Enable memory controller in k + * + * Notice that we need to touch j in one direction, but not the other. We also + * don't go beyond d when disabling -- it's up to "a" to get realized if it + * wants to disable further. The basic rules are therefore: + * + * - If you're disabling something, you need to realise all of the cgroups from + * your recursive descendants to the root. This starts from the leaves. + * - If you're enabling something, you need to realise from the root cgroup + * downwards, but you don't need to iterate your recursive descendants. * * Returns 0 on success and < 0 on failure. */ static int unit_realize_cgroup_now(Unit *u, ManagerState state) { CGroupMask target_mask, enable_mask; - bool needs_bpf, apply_bpf; int r; assert(u); @@ -1700,32 +2046,29 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) { target_mask = unit_get_target_mask(u); enable_mask = unit_get_enable_mask(u); - needs_bpf = unit_get_needs_bpf(u); - if (unit_has_mask_realized(u, target_mask, enable_mask, needs_bpf)) + if (unit_has_mask_realized(u, target_mask, enable_mask)) return 0; - /* Make sure we apply the BPF filters either when one is configured, or if none is configured but previously - * the state was anything but off. This way, if a unit with a BPF filter applied is reconfigured to lose it - * this will trickle down properly to cgroupfs. */ - apply_bpf = needs_bpf || u->cgroup_bpf_state != UNIT_CGROUP_BPF_OFF; + /* Disable controllers below us, if there are any */ + r = unit_realize_cgroup_now_disable(u, state); + if (r < 0) + return r; - /* First, realize parents */ + /* Enable controllers above us, if there are any */ if (UNIT_ISSET(u->slice)) { - r = unit_realize_cgroup_now(UNIT_DEREF(u->slice), state); + r = unit_realize_cgroup_now_enable(UNIT_DEREF(u->slice), state); if (r < 0) return r; } - /* And then do the real work */ - r = unit_create_cgroup(u, target_mask, enable_mask, needs_bpf); + /* Now actually deal with the cgroup we were trying to realise and set attributes */ + r = unit_create_cgroup(u, target_mask, enable_mask, state); if (r < 0) return r; - /* Finally, apply the necessary attributes. */ - cgroup_context_apply(u, target_mask, apply_bpf, state); - cgroup_xattr_apply(u); - + /* Now, reset the invalidation mask */ + u->cgroup_invalidated_mask = 0; return 0; } @@ -1771,9 +2114,6 @@ static void unit_add_siblings_to_cgroup_realize_queue(Unit *u) { void *v; HASHMAP_FOREACH_KEY(v, m, u->dependencies[UNIT_BEFORE], i) { - if (m == u) - continue; - /* Skip units that have a dependency on the slice * but aren't actually in it. */ if (UNIT_DEREF(m->slice) != slice) @@ -1789,8 +2129,7 @@ static void unit_add_siblings_to_cgroup_realize_queue(Unit *u) { * any changes. */ if (unit_has_mask_realized(m, unit_get_target_mask(m), - unit_get_enable_mask(m), - unit_get_needs_bpf(m))) + unit_get_enable_mask(m))) continue; unit_add_to_cgroup_realize_queue(m); @@ -1827,7 +2166,8 @@ int unit_realize_cgroup(Unit *u) { void unit_release_cgroup(Unit *u) { assert(u); - /* Forgets all cgroup details for this cgroup */ + /* Forgets all cgroup details for this cgroup — but does *not* destroy the cgroup. This is hence OK to call + * when we close down everything for reexecution, where we really want to leave the cgroup in place. */ if (u->cgroup_path) { (void) hashmap_remove(u->manager->cgroup_unit, u->cgroup_path); @@ -1836,7 +2176,7 @@ void unit_release_cgroup(Unit *u) { if (u->cgroup_inotify_wd >= 0) { if (inotify_rm_watch(u->manager->cgroup_inotify_fd, u->cgroup_inotify_wd) < 0) - log_unit_debug_errno(u, errno, "Failed to remove cgroup inotify watch %i for %s, ignoring", u->cgroup_inotify_wd, u->id); + log_unit_debug_errno(u, errno, "Failed to remove cgroup inotify watch %i for %s, ignoring: %m", u->cgroup_inotify_wd, u->id); (void) hashmap_remove(u->manager->cgroup_inotify_wd_unit, INT_TO_PTR(u->cgroup_inotify_wd)); u->cgroup_inotify_wd = -1; @@ -1872,6 +2212,8 @@ void unit_prune_cgroup(Unit *u) { u->cgroup_realized = false; u->cgroup_realized_mask = 0; u->cgroup_enabled_mask = 0; + + u->bpf_device_control_installed = bpf_program_unref(u->bpf_device_control_installed); } int unit_search_main_pid(Unit *u, pid_t *ret) { @@ -2133,11 +2475,30 @@ static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents, } } +static int cg_bpf_mask_supported(CGroupMask *ret) { + CGroupMask mask = 0; + int r; + + /* BPF-based firewall */ + r = bpf_firewall_supported(); + if (r > 0) + mask |= CGROUP_MASK_BPF_FIREWALL; + + /* BPF-based device access control */ + r = bpf_devices_supported(); + if (r > 0) + mask |= CGROUP_MASK_BPF_DEVICES; + + *ret = mask; + return 0; +} + int manager_setup_cgroup(Manager *m) { _cleanup_free_ char *path = NULL; const char *scope_path; CGroupController c; int r, all_unified; + CGroupMask mask; char *e; assert(m); @@ -2231,7 +2592,7 @@ int manager_setup_cgroup(Manager *m) { (void) sd_event_source_set_description(m->cgroup_inotify_event_source, "cgroup-inotify"); - } else if (MANAGER_IS_SYSTEM(m) && m->test_run_flags == 0) { + } else if (MANAGER_IS_SYSTEM(m) && manager_owns_host_root_cgroup(m) && !MANAGER_IS_TEST_RUN(m)) { /* On the legacy hierarchy we only get notifications via cgroup agents. (Which isn't really reliable, * since it does not generate events when control groups with children run empty. */ @@ -2260,17 +2621,25 @@ int manager_setup_cgroup(Manager *m) { if (m->pin_cgroupfs_fd < 0) return log_error_errno(errno, "Failed to open pin file: %m"); - } else if (r < 0 && !m->test_run_flags) + } else if (!MANAGER_IS_TEST_RUN(m)) return log_error_errno(r, "Failed to create %s control group: %m", scope_path); /* 7. Always enable hierarchical support if it exists... */ - if (!all_unified && m->test_run_flags == 0) + if (!all_unified && !MANAGER_IS_TEST_RUN(m)) (void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1"); - /* 8. Figure out which controllers are supported, and log about it */ + /* 8. Figure out which controllers are supported */ r = cg_mask_supported(&m->cgroup_supported); if (r < 0) return log_error_errno(r, "Failed to determine supported controllers: %m"); + + /* 9. Figure out which bpf-based pseudo-controllers are supported */ + r = cg_bpf_mask_supported(&mask); + if (r < 0) + return log_error_errno(r, "Failed to determine supported bpf-based pseudo-controllers: %m"); + m->cgroup_supported |= mask; + + /* 10. Log which controllers are supported */ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) log_debug("Controller '%s' supported: %s", cgroup_controller_to_string(c), yes_no(m->cgroup_supported & CGROUP_CONTROLLER_TO_MASK(c))); @@ -2401,7 +2770,7 @@ int unit_get_memory_current(Unit *u, uint64_t *ret) { return -ENODATA; /* The root cgroup doesn't expose this information, let's get it from /proc instead */ - if (unit_has_root_cgroup(u)) + if (unit_has_host_root_cgroup(u)) return procfs_memory_get_current(ret); if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0) @@ -2436,7 +2805,7 @@ int unit_get_tasks_current(Unit *u, uint64_t *ret) { return -ENODATA; /* The root cgroup doesn't expose this information, let's get it from /proc instead */ - if (unit_has_root_cgroup(u)) + if (unit_has_host_root_cgroup(u)) return procfs_tasks_get_current(ret); if ((u->cgroup_realized_mask & CGROUP_MASK_PIDS) == 0) @@ -2463,9 +2832,13 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) { return -ENODATA; /* The root cgroup doesn't expose this information, let's get it from /proc instead */ - if (unit_has_root_cgroup(u)) + if (unit_has_host_root_cgroup(u)) return procfs_cpu_get_usage(ret); + /* Requisite controllers for CPU accounting are not enabled */ + if ((get_cpu_accounting_mask() & ~u->cgroup_realized_mask) != 0) + return -ENODATA; + r = cg_all_unified(); if (r < 0) return r; @@ -2473,14 +2846,11 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) { _cleanup_free_ char *val = NULL; uint64_t us; - if ((u->cgroup_realized_mask & CGROUP_MASK_CPU) == 0) - return -ENODATA; - r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", STRV_MAKE("usage_usec"), &val); - if (r < 0) - return r; if (IN_SET(r, -ENOENT, -ENXIO)) return -ENODATA; + if (r < 0) + return r; r = safe_atou64(val, &us); if (r < 0) @@ -2488,9 +2858,6 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) { ns = us * NSEC_PER_USEC; } else { - if ((u->cgroup_realized_mask & CGROUP_MASK_CPUACCT) == 0) - return -ENODATA; - r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v); if (r == -ENOENT) return -ENODATA; @@ -2631,10 +2998,10 @@ void unit_invalidate_cgroup(Unit *u, CGroupMask m) { if (m & (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT)) m |= CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT; - if ((u->cgroup_realized_mask & m) == 0) /* NOP? */ + if (FLAGS_SET(u->cgroup_invalidated_mask, m)) /* NOP? */ return; - u->cgroup_realized_mask &= ~m; + u->cgroup_invalidated_mask |= m; unit_add_to_cgroup_realize_queue(u); } @@ -2644,10 +3011,10 @@ void unit_invalidate_cgroup_bpf(Unit *u) { if (!UNIT_HAS_CGROUP_CONTEXT(u)) return; - if (u->cgroup_bpf_state == UNIT_CGROUP_BPF_INVALIDATED) /* NOP? */ + if (u->cgroup_invalidated_mask & CGROUP_MASK_BPF_FIREWALL) /* NOP? */ return; - u->cgroup_bpf_state = UNIT_CGROUP_BPF_INVALIDATED; + u->cgroup_invalidated_mask |= CGROUP_MASK_BPF_FIREWALL; unit_add_to_cgroup_realize_queue(u); /* If we are a slice unit, we also need to put compile a new BPF program for all our children, as the IP access @@ -2658,13 +3025,8 @@ void unit_invalidate_cgroup_bpf(Unit *u) { void *v; HASHMAP_FOREACH_KEY(v, member, u->dependencies[UNIT_BEFORE], i) { - if (member == u) - continue; - - if (UNIT_DEREF(member->slice) != u) - continue; - - unit_invalidate_cgroup_bpf(member); + if (UNIT_DEREF(member->slice) == u) + unit_invalidate_cgroup_bpf(member); } } } diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 2d2ff6fc3c..266daa20a5 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -12,6 +12,7 @@ typedef struct CGroupContext CGroupContext; typedef struct CGroupDeviceAllow CGroupDeviceAllow; typedef struct CGroupIODeviceWeight CGroupIODeviceWeight; typedef struct CGroupIODeviceLimit CGroupIODeviceLimit; +typedef struct CGroupIODeviceLatency CGroupIODeviceLatency; typedef struct CGroupBlockIODeviceWeight CGroupBlockIODeviceWeight; typedef struct CGroupBlockIODeviceBandwidth CGroupBlockIODeviceBandwidth; @@ -51,6 +52,12 @@ struct CGroupIODeviceLimit { uint64_t limits[_CGROUP_IO_LIMIT_TYPE_MAX]; }; +struct CGroupIODeviceLatency { + LIST_FIELDS(CGroupIODeviceLatency, device_latencies); + char *path; + usec_t target_usec; +}; + struct CGroupBlockIODeviceWeight { LIST_FIELDS(CGroupBlockIODeviceWeight, device_weights); char *path; @@ -81,7 +88,9 @@ struct CGroupContext { uint64_t startup_io_weight; LIST_HEAD(CGroupIODeviceWeight, io_device_weights); LIST_HEAD(CGroupIODeviceLimit, io_device_limits); + LIST_HEAD(CGroupIODeviceLatency, io_device_latencies); + uint64_t memory_min; uint64_t memory_low; uint64_t memory_high; uint64_t memory_max; @@ -109,6 +118,8 @@ struct CGroupContext { bool delegate; CGroupMask delegate_controllers; + + CGroupMask disable_controllers; }; /* Used when querying IP accounting data */ @@ -128,29 +139,32 @@ void cgroup_context_init(CGroupContext *c); void cgroup_context_done(CGroupContext *c); void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix); -CGroupMask cgroup_context_get_mask(CGroupContext *c); - void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a); void cgroup_context_free_io_device_weight(CGroupContext *c, CGroupIODeviceWeight *w); void cgroup_context_free_io_device_limit(CGroupContext *c, CGroupIODeviceLimit *l); +void cgroup_context_free_io_device_latency(CGroupContext *c, CGroupIODeviceLatency *l); void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w); void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b); +int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode); + CGroupMask unit_get_own_mask(Unit *u); CGroupMask unit_get_delegate_mask(Unit *u); CGroupMask unit_get_members_mask(Unit *u); CGroupMask unit_get_siblings_mask(Unit *u); CGroupMask unit_get_subtree_mask(Unit *u); +CGroupMask unit_get_disable_mask(Unit *u); +CGroupMask unit_get_ancestor_disable_mask(Unit *u); CGroupMask unit_get_target_mask(Unit *u); CGroupMask unit_get_enable_mask(Unit *u); -bool unit_get_needs_bpf(Unit *u); +void unit_invalidate_cgroup_members_masks(Unit *u); -void unit_update_cgroup_members_masks(Unit *u); +void unit_add_to_cgroup_realize_queue(Unit *u); const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask); -char *unit_default_cgroup_path(Unit *u); +char *unit_default_cgroup_path(const Unit *u); int unit_set_cgroup_path(Unit *u, const char *path); int unit_pick_cgroup_path(Unit *u); @@ -191,8 +205,8 @@ int unit_reset_ip_accounting(Unit *u); cc ? cc->name : false; \ }) -bool manager_owns_root_cgroup(Manager *m); -bool unit_has_root_cgroup(Unit *u); +bool manager_owns_host_root_cgroup(Manager *m); +bool unit_has_host_root_cgroup(Unit *u); int manager_notify_cgroup_empty(Manager *m, const char *group); diff --git a/src/core/chown-recursive.c b/src/core/chown-recursive.c index c4794501c2..7767301f7d 100644 --- a/src/core/chown-recursive.c +++ b/src/core/chown-recursive.c @@ -1,17 +1,21 @@ /* SPDX-License-Identifier: LGPL-2.1+ */ -#include <sys/types.h> -#include <sys/stat.h> #include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/xattr.h> -#include "user-util.h" -#include "macro.h" -#include "fd-util.h" -#include "dirent-util.h" #include "chown-recursive.h" +#include "dirent-util.h" +#include "fd-util.h" +#include "macro.h" +#include "stdio-util.h" +#include "strv.h" +#include "user-util.h" -static int chown_one(int fd, const char *name, const struct stat *st, uid_t uid, gid_t gid) { - int r; +static int chown_one(int fd, const struct stat *st, uid_t uid, gid_t gid) { + char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1]; + const char *n; assert(fd >= 0); assert(st); @@ -20,98 +24,95 @@ static int chown_one(int fd, const char *name, const struct stat *st, uid_t uid, (!gid_is_valid(gid) || st->st_gid == gid)) return 0; - if (name) - r = fchownat(fd, name, uid, gid, AT_SYMLINK_NOFOLLOW); - else - r = fchown(fd, uid, gid); - if (r < 0) - return -errno; + /* We change ownership through the /proc/self/fd/%i path, so that we have a stable reference that works with + * O_PATH. (Note: fchown() and fchmod() do not work with O_PATH, the kernel refuses that. */ + xsprintf(procfs_path, "/proc/self/fd/%i", fd); - /* The linux kernel alters the mode in some cases of chown(). Let's undo this. */ - if (name) { - if (!S_ISLNK(st->st_mode)) - r = fchmodat(fd, name, st->st_mode, 0); - else /* There's currently no AT_SYMLINK_NOFOLLOW for fchmodat() */ - r = 0; - } else - r = fchmod(fd, st->st_mode); - if (r < 0) + /* Drop any ACL if there is one */ + FOREACH_STRING(n, "system.posix_acl_access", "system.posix_acl_default") + if (removexattr(procfs_path, n) < 0) + if (!IN_SET(errno, ENODATA, EOPNOTSUPP, ENOSYS, ENOTTY)) + return -errno; + + if (chown(procfs_path, uid, gid) < 0) return -errno; + /* The linux kernel alters the mode in some cases of chown(), as well when we change ACLs. Let's undo this. We + * do this only for non-symlinks however. That's because for symlinks the access mode is ignored anyway and + * because on some kernels/file systems trying to change the access mode will succeed but has no effect while + * on others it actively fails. */ + if (!S_ISLNK(st->st_mode)) + if (chmod(procfs_path, st->st_mode & 07777) < 0) + return -errno; + return 1; } static int chown_recursive_internal(int fd, const struct stat *st, uid_t uid, gid_t gid) { + _cleanup_closedir_ DIR *d = NULL; bool changed = false; + struct dirent *de; int r; assert(fd >= 0); assert(st); - if (S_ISDIR(st->st_mode)) { - _cleanup_closedir_ DIR *d = NULL; - struct dirent *de; - - d = fdopendir(fd); - if (!d) { - r = -errno; - goto finish; - } - fd = -1; - - FOREACH_DIRENT_ALL(de, d, r = -errno; goto finish) { - struct stat fst; - - if (dot_or_dot_dot(de->d_name)) - continue; - - if (fstatat(dirfd(d), de->d_name, &fst, AT_SYMLINK_NOFOLLOW) < 0) { - r = -errno; - goto finish; - } - - if (S_ISDIR(fst.st_mode)) { - int subdir_fd; - - subdir_fd = openat(dirfd(d), de->d_name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME); - if (subdir_fd < 0) { - r = -errno; - goto finish; - } - - r = chown_recursive_internal(subdir_fd, &fst, uid, gid); - if (r < 0) - goto finish; - if (r > 0) - changed = true; - } else { - r = chown_one(dirfd(d), de->d_name, &fst, uid, gid); - if (r < 0) - goto finish; - if (r > 0) - changed = true; - } + d = fdopendir(fd); + if (!d) { + safe_close(fd); + return -errno; + } + + FOREACH_DIRENT_ALL(de, d, return -errno) { + _cleanup_close_ int path_fd = -1; + struct stat fst; + + if (dot_or_dot_dot(de->d_name)) + continue; + + /* Let's pin the child inode we want to fix now with an O_PATH fd, so that it cannot be swapped out + * while we manipulate it. */ + path_fd = openat(dirfd(d), de->d_name, O_PATH|O_CLOEXEC|O_NOFOLLOW); + if (path_fd < 0) + return -errno; + + if (fstat(path_fd, &fst) < 0) + return -errno; + + if (S_ISDIR(fst.st_mode)) { + int subdir_fd; + + /* Convert it to a "real" (i.e. non-O_PATH) fd now */ + subdir_fd = fd_reopen(path_fd, O_RDONLY|O_CLOEXEC|O_NOATIME); + if (subdir_fd < 0) + return subdir_fd; + + r = chown_recursive_internal(subdir_fd, &fst, uid, gid); /* takes possession of subdir_fd even on failure */ + if (r < 0) + return r; + if (r > 0) + changed = true; + } else { + r = chown_one(path_fd, &fst, uid, gid); + if (r < 0) + return r; + if (r > 0) + changed = true; } + } - r = chown_one(dirfd(d), NULL, st, uid, gid); - } else - r = chown_one(fd, NULL, st, uid, gid); + r = chown_one(dirfd(d), st, uid, gid); if (r < 0) - goto finish; + return r; - r = r > 0 || changed; - -finish: - safe_close(fd); - return r; + return r > 0 || changed; } int path_chown_recursive(const char *path, uid_t uid, gid_t gid) { _cleanup_close_ int fd = -1; struct stat st; - int r; - fd = open(path, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME); + fd = open(path, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME); if (fd < 0) return -errno; @@ -128,8 +129,5 @@ int path_chown_recursive(const char *path, uid_t uid, gid_t gid) { (!gid_is_valid(gid) || st.st_gid == gid)) return 0; - r = chown_recursive_internal(fd, &st, uid, gid); - fd = -1; /* we donated the fd to the call, regardless if it succeeded or failed */ - - return r; + return chown_recursive_internal(TAKE_FD(fd), &st, uid, gid); /* we donate the fd to the call, regardless if it succeeded or failed */ } diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c index 540bc77aed..53890bcafb 100644 --- a/src/core/dbus-cgroup.c +++ b/src/core/dbus-cgroup.c @@ -17,7 +17,7 @@ static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_cgroup_device_policy, cgroup_device_policy, CGroupDevicePolicy); -static int property_get_delegate_controllers( +static int property_get_cgroup_mask( sd_bus *bus, const char *path, const char *interface, @@ -26,26 +26,22 @@ static int property_get_delegate_controllers( void *userdata, sd_bus_error *error) { - CGroupContext *c = userdata; - CGroupController cc; + CGroupMask *mask = userdata; + CGroupController ctrl; int r; assert(bus); assert(reply); - assert(c); - - if (!c->delegate) - return sd_bus_message_append(reply, "as", 0); r = sd_bus_message_open_container(reply, 'a', "s"); if (r < 0) return r; - for (cc = 0; cc < _CGROUP_CONTROLLER_MAX; cc++) { - if ((c->delegate_controllers & CGROUP_CONTROLLER_TO_MASK(cc)) == 0) + for (ctrl = 0; ctrl < _CGROUP_CONTROLLER_MAX; ctrl++) { + if ((*mask & CGROUP_CONTROLLER_TO_MASK(ctrl)) == 0) continue; - r = sd_bus_message_append(reply, "s", cgroup_controller_to_string(cc)); + r = sd_bus_message_append(reply, "s", cgroup_controller_to_string(ctrl)); if (r < 0) return r; } @@ -53,6 +49,27 @@ static int property_get_delegate_controllers( return sd_bus_message_close_container(reply); } +static int property_get_delegate_controllers( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + CGroupContext *c = userdata; + + assert(bus); + assert(reply); + assert(c); + + if (!c->delegate) + return sd_bus_message_append(reply, "as", 0); + + return property_get_cgroup_mask(bus, path, interface, property, reply, &c->delegate_controllers, error); +} + static int property_get_io_device_weight( sd_bus *bus, const char *path, @@ -119,6 +136,36 @@ static int property_get_io_device_limits( return sd_bus_message_close_container(reply); } +static int property_get_io_device_latency( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + CGroupContext *c = userdata; + CGroupIODeviceLatency *l; + int r; + + assert(bus); + assert(reply); + assert(c); + + r = sd_bus_message_open_container(reply, 'a', "(st)"); + if (r < 0) + return r; + + LIST_FOREACH(device_latencies, l, c->io_device_latencies) { + r = sd_bus_message_append(reply, "(st)", l->path, l->target_usec); + if (r < 0) + return r; + } + + return sd_bus_message_close_container(reply); +} + static int property_get_blockio_device_weight( sd_bus *bus, const char *path, @@ -291,6 +338,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = { SD_BUS_PROPERTY("IOWriteBandwidthMax", "a(st)", property_get_io_device_limits, 0, 0), SD_BUS_PROPERTY("IOReadIOPSMax", "a(st)", property_get_io_device_limits, 0, 0), SD_BUS_PROPERTY("IOWriteIOPSMax", "a(st)", property_get_io_device_limits, 0, 0), + SD_BUS_PROPERTY("IODeviceLatencyTargetUSec", "a(st)", property_get_io_device_latency, 0, 0), SD_BUS_PROPERTY("BlockIOAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, blockio_accounting), 0), SD_BUS_PROPERTY("BlockIOWeight", "t", NULL, offsetof(CGroupContext, blockio_weight), 0), SD_BUS_PROPERTY("StartupBlockIOWeight", "t", NULL, offsetof(CGroupContext, startup_blockio_weight), 0), @@ -298,6 +346,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = { SD_BUS_PROPERTY("BlockIOReadBandwidth", "a(st)", property_get_blockio_device_bandwidths, 0, 0), SD_BUS_PROPERTY("BlockIOWriteBandwidth", "a(st)", property_get_blockio_device_bandwidths, 0, 0), SD_BUS_PROPERTY("MemoryAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, memory_accounting), 0), + SD_BUS_PROPERTY("MemoryMin", "t", NULL, offsetof(CGroupContext, memory_min), 0), SD_BUS_PROPERTY("MemoryLow", "t", NULL, offsetof(CGroupContext, memory_low), 0), SD_BUS_PROPERTY("MemoryHigh", "t", NULL, offsetof(CGroupContext, memory_high), 0), SD_BUS_PROPERTY("MemoryMax", "t", NULL, offsetof(CGroupContext, memory_max), 0), @@ -310,6 +359,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = { SD_BUS_PROPERTY("IPAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, ip_accounting), 0), SD_BUS_PROPERTY("IPAddressAllow", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_allow), 0), SD_BUS_PROPERTY("IPAddressDeny", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_deny), 0), + SD_BUS_PROPERTY("DisableControllers", "as", property_get_cgroup_mask, offsetof(CGroupContext, disable_controllers), 0), SD_BUS_VTABLE_END }; @@ -571,7 +621,7 @@ int bus_cgroup_set_property( flags |= UNIT_PRIVATE; if (streq(name, "CPUAccounting")) - return bus_cgroup_set_boolean(u, name, &c->cpu_accounting, CGROUP_MASK_CPUACCT|CGROUP_MASK_CPU, message, flags, error); + return bus_cgroup_set_boolean(u, name, &c->cpu_accounting, get_cpu_accounting_mask(), message, flags, error); if (streq(name, "CPUWeight")) return bus_cgroup_set_cpu_weight(u, name, &c->cpu_weight, message, flags, error); @@ -606,6 +656,9 @@ int bus_cgroup_set_property( if (streq(name, "MemoryAccounting")) return bus_cgroup_set_boolean(u, name, &c->memory_accounting, CGROUP_MASK_MEMORY, message, flags, error); + if (streq(name, "MemoryMin")) + return bus_cgroup_set_memory(u, name, &c->memory_min, message, flags, error); + if (streq(name, "MemoryLow")) return bus_cgroup_set_memory(u, name, &c->memory_low, message, flags, error); @@ -621,6 +674,9 @@ int bus_cgroup_set_property( if (streq(name, "MemoryLimit")) return bus_cgroup_set_memory(u, name, &c->memory_limit, message, flags, error); + if (streq(name, "MemoryMinScale")) + return bus_cgroup_set_memory_scale(u, name, &c->memory_min, message, flags, error); + if (streq(name, "MemoryLowScale")) return bus_cgroup_set_memory_scale(u, name, &c->memory_low, message, flags, error); @@ -839,6 +895,86 @@ int bus_cgroup_set_property( return 1; + } else if (streq(name, "IODeviceLatencyTargetUSec")) { + const char *path; + uint64_t target; + unsigned n = 0; + + r = sd_bus_message_enter_container(message, 'a', "(st)"); + if (r < 0) + return r; + + while ((r = sd_bus_message_read(message, "(st)", &path, &target)) > 0) { + + if (!path_is_normalized(path)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path '%s' specified in %s= is not normalized.", name, path); + + if (!UNIT_WRITE_FLAGS_NOOP(flags)) { + CGroupIODeviceLatency *a = NULL, *b; + + LIST_FOREACH(device_latencies, b, c->io_device_latencies) { + if (path_equal(b->path, path)) { + a = b; + break; + } + } + + if (!a) { + a = new0(CGroupIODeviceLatency, 1); + if (!a) + return -ENOMEM; + + a->path = strdup(path); + if (!a->path) { + free(a); + return -ENOMEM; + } + LIST_PREPEND(device_latencies, c->io_device_latencies, a); + } + + a->target_usec = target; + } + + n++; + } + + r = sd_bus_message_exit_container(message); + if (r < 0) + return r; + + if (!UNIT_WRITE_FLAGS_NOOP(flags)) { + _cleanup_free_ char *buf = NULL; + _cleanup_fclose_ FILE *f = NULL; + char ts[FORMAT_TIMESPAN_MAX]; + CGroupIODeviceLatency *a; + size_t size = 0; + + if (n == 0) { + while (c->io_device_latencies) + cgroup_context_free_io_device_latency(c, c->io_device_latencies); + } + + unit_invalidate_cgroup(u, CGROUP_MASK_IO); + + f = open_memstream(&buf, &size); + if (!f) + return -ENOMEM; + + (void) __fsetlocking(f, FSETLOCKING_BYCALLER); + + fputs("IODeviceLatencyTargetSec=\n", f); + LIST_FOREACH(device_latencies, a, c->io_device_latencies) + fprintf(f, "IODeviceLatencyTargetSec=%s %s\n", + a->path, format_timespan(ts, sizeof(ts), a->target_usec, 1)); + + r = fflush_and_check(f); + if (r < 0) + return r; + unit_write_setting(u, flags, name, buf); + } + + return 1; + } else if (STR_IN_SET(name, "BlockIOReadBandwidth", "BlockIOWriteBandwidth")) { const char *path; bool read = true; diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index c44970c10c..11301e4b69 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -27,7 +27,7 @@ #include "ioprio.h" #include "journal-util.h" #include "missing.h" -#include "mount-util.h" +#include "mountpoint-util.h" #include "namespace.h" #include "parse-util.h" #include "path-util.h" @@ -718,6 +718,8 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("SyslogLevel", "i", property_get_syslog_level, offsetof(ExecContext, syslog_priority), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("SyslogFacility", "i", property_get_syslog_facility, offsetof(ExecContext, syslog_priority), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("LogLevelMax", "i", bus_property_get_int, offsetof(ExecContext, log_level_max), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("LogRateLimitIntervalUSec", "t", bus_property_get_usec, offsetof(ExecContext, log_rate_limit_interval_usec), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("LogRateLimitBurst", "u", bus_property_get_unsigned, offsetof(ExecContext, log_rate_limit_burst), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("LogExtraFields", "aay", property_get_log_extra_fields, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("SecureBits", "i", bus_property_get_int, offsetof(ExecContext, secure_bits), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("CapabilityBoundingSet", "t", NULL, offsetof(ExecContext, capability_bounding_set), SD_BUS_VTABLE_PROPERTY_CONST), @@ -1015,8 +1017,6 @@ static BUS_DEFINE_SET_TRANSIENT_IS_VALID(log_level, "i", int32_t, int, "%" PRIi3 #if HAVE_SECCOMP static BUS_DEFINE_SET_TRANSIENT_IS_VALID(errno, "i", int32_t, int, "%" PRIi32, errno_is_valid); #endif -static BUS_DEFINE_SET_TRANSIENT_IS_VALID(sched_priority, "i", int32_t, int, "%" PRIi32, sched_priority_is_valid); -static BUS_DEFINE_SET_TRANSIENT_IS_VALID(nice, "i", int32_t, int, "%" PRIi32, nice_is_valid); static BUS_DEFINE_SET_TRANSIENT_PARSE(std_input, ExecInput, exec_input_from_string); static BUS_DEFINE_SET_TRANSIENT_PARSE(std_output, ExecOutput, exec_output_from_string); static BUS_DEFINE_SET_TRANSIENT_PARSE(utmp_mode, ExecUtmpMode, exec_utmp_mode_from_string); @@ -1027,7 +1027,6 @@ static BUS_DEFINE_SET_TRANSIENT_PARSE(preserve_mode, ExecPreserveMode, exec_pres static BUS_DEFINE_SET_TRANSIENT_PARSE_PTR(personality, unsigned long, parse_personality); static BUS_DEFINE_SET_TRANSIENT_TO_STRING_ALLOC(secure_bits, "i", int32_t, int, "%" PRIi32, secure_bits_to_string_alloc_with_check); static BUS_DEFINE_SET_TRANSIENT_TO_STRING_ALLOC(capability, "t", uint64_t, uint64_t, "%" PRIu64, capability_set_to_string_alloc); -static BUS_DEFINE_SET_TRANSIENT_TO_STRING_ALLOC(sched_policy, "i", int32_t, int, "%" PRIi32, sched_policy_to_string_alloc_with_check); static BUS_DEFINE_SET_TRANSIENT_TO_STRING_ALLOC(namespace_flag, "t", uint64_t, unsigned long, "%" PRIu64, namespace_flags_to_string); static BUS_DEFINE_SET_TRANSIENT_TO_STRING(mount_flags, "t", uint64_t, unsigned long, "%" PRIu64, mount_propagation_flags_to_string_with_check); @@ -1070,15 +1069,15 @@ int bus_exec_context_set_transient_property( if (streq(name, "LogLevelMax")) return bus_set_transient_log_level(u, name, &c->log_level_max, message, flags, error); - if (streq(name, "CPUSchedulingPriority")) - return bus_set_transient_sched_priority(u, name, &c->cpu_sched_priority, message, flags, error); + if (streq(name, "LogRateLimitIntervalUSec")) + return bus_set_transient_usec(u, name, &c->log_rate_limit_interval_usec, message, flags, error); + + if (streq(name, "LogRateLimitBurst")) + return bus_set_transient_unsigned(u, name, &c->log_rate_limit_burst, message, flags, error); if (streq(name, "Personality")) return bus_set_transient_personality(u, name, &c->personality, message, flags, error); - if (streq(name, "Nice")) - return bus_set_transient_nice(u, name, &c->nice, message, flags, error); - if (streq(name, "StandardInput")) return bus_set_transient_std_input(u, name, &c->std_input, message, flags, error); @@ -1208,9 +1207,6 @@ int bus_exec_context_set_transient_property( if (streq(name, "AmbientCapabilities")) return bus_set_transient_capability(u, name, &c->capability_ambient_set, message, flags, error); - if (streq(name, "CPUSchedulingPolicy")) - return bus_set_transient_sched_policy(u, name, &c->cpu_sched_policy, message, flags, error); - if (streq(name, "RestrictNamespaces")) return bus_set_transient_namespace_flag(u, name, &c->restrict_namespaces, message, flags, error); @@ -1521,8 +1517,8 @@ int bus_exec_context_set_transient_property( int af; af = af_from_name(*s); - if (af <= 0) - return -EINVAL; + if (af < 0) + return af; if (!invert == c->address_families_whitelist) { r = set_put(c->address_families, INT_TO_PTR(af)); @@ -1609,6 +1605,72 @@ int bus_exec_context_set_transient_property( return 1; + } else if (streq(name, "Nice")) { + int32_t q; + + r = sd_bus_message_read(message, "i", &q); + if (r < 0) + return r; + + if (!nice_is_valid(q)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid Nice value: %i", q); + + if (!UNIT_WRITE_FLAGS_NOOP(flags)) { + c->nice = q; + c->nice_set = true; + + unit_write_settingf(u, flags, name, "Nice=%i", q); + } + + return 1; + + } else if (streq(name, "CPUSchedulingPolicy")) { + int32_t q; + + r = sd_bus_message_read(message, "i", &q); + if (r < 0) + return r; + + if (!sched_policy_is_valid(q)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid CPU scheduling policy: %i", q); + + if (!UNIT_WRITE_FLAGS_NOOP(flags)) { + _cleanup_free_ char *s = NULL; + + r = sched_policy_to_string_alloc(q, &s); + if (r < 0) + return r; + + c->cpu_sched_policy = q; + c->cpu_sched_priority = CLAMP(c->cpu_sched_priority, sched_get_priority_min(q), sched_get_priority_max(q)); + c->cpu_sched_set = true; + + unit_write_settingf(u, flags, name, "CPUSchedulingPolicy=%s", s); + } + + return 1; + + } else if (streq(name, "CPUSchedulingPriority")) { + int32_t p, min, max; + + r = sd_bus_message_read(message, "i", &p); + if (r < 0) + return r; + + min = sched_get_priority_min(c->cpu_sched_policy); + max = sched_get_priority_max(c->cpu_sched_policy); + if (p < min || p > max) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid CPU scheduling priority: %i", p); + + if (!UNIT_WRITE_FLAGS_NOOP(flags)) { + c->cpu_sched_priority = p; + c->cpu_sched_set = true; + + unit_write_settingf(u, flags, name, "CPUSchedulingPriority=%i", p); + } + + return 1; + } else if (streq(name, "IOSchedulingClass")) { int32_t q; @@ -1731,7 +1793,10 @@ int bus_exec_context_set_transient_property( return 1; - } else if (STR_IN_SET(name, "StandardInputFile", "StandardOutputFile", "StandardErrorFile")) { + } else if (STR_IN_SET(name, + "StandardInputFile", + "StandardOutputFile", "StandardOutputFileToAppend", + "StandardErrorFile", "StandardErrorFileToAppend")) { const char *s; r = sd_bus_message_read(message, "s", &s); @@ -1755,23 +1820,34 @@ int bus_exec_context_set_transient_property( c->std_input = EXEC_INPUT_FILE; unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardInput=file:%s", s); - } else if (streq(name, "StandardOutputFile")) { + } else if (STR_IN_SET(name, "StandardOutputFile", "StandardOutputFileToAppend")) { r = free_and_strdup(&c->stdio_file[STDOUT_FILENO], empty_to_null(s)); if (r < 0) return r; - c->std_output = EXEC_OUTPUT_FILE; - unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardOutput=file:%s", s); - + if (streq(name, "StandardOutputFile")) { + c->std_output = EXEC_OUTPUT_FILE; + unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardOutput=file:%s", s); + } else { + assert(streq(name, "StandardOutputFileToAppend")); + c->std_output = EXEC_OUTPUT_FILE_APPEND; + unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardOutput=append:%s", s); + } } else { - assert(streq(name, "StandardErrorFile")); + assert(STR_IN_SET(name, "StandardErrorFile", "StandardErrorFileToAppend")); r = free_and_strdup(&c->stdio_file[STDERR_FILENO], empty_to_null(s)); if (r < 0) return r; - c->std_error = EXEC_OUTPUT_FILE; - unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardError=file:%s", s); + if (streq(name, "StandardErrorFile")) { + c->std_error = EXEC_OUTPUT_FILE; + unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardError=file:%s", s); + } else { + assert(streq(name, "StandardErrorFileToAppend")); + c->std_error = EXEC_OUTPUT_FILE_APPEND; + unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardError=append:%s", s); + } } } diff --git a/src/core/dbus-job.c b/src/core/dbus-job.c index 5551c56d0e..d11e58b51d 100644 --- a/src/core/dbus-job.c +++ b/src/core/dbus-job.c @@ -4,6 +4,7 @@ #include "alloc-util.h" #include "dbus-job.h" +#include "dbus-unit.h" #include "dbus.h" #include "job.h" #include "log.h" @@ -50,7 +51,7 @@ int bus_job_method_cancel(sd_bus_message *message, void *userdata, sd_bus_error /* Access is granted to the job owner */ if (!sd_bus_track_contains(j->bus_track, sd_bus_message_get_sender(message))) { - /* And for everybody else consult PolicyKit */ + /* And for everybody else consult polkit */ r = bus_verify_manage_units_async(j->unit->manager, message, error); if (r < 0) return r; @@ -173,6 +174,9 @@ void bus_job_send_change_signal(Job *j) { assert(j); + /* Make sure that any change signal on the unit is reflected before we send out the change signal on the job */ + bus_unit_send_pending_change_signal(j->unit, true); + if (j->in_dbus_queue) { LIST_REMOVE(dbus_queue, j->manager->dbus_job_queue, j); j->in_dbus_queue = false; @@ -185,6 +189,21 @@ void bus_job_send_change_signal(Job *j) { j->sent_dbus_new_signal = true; } +void bus_job_send_pending_change_signal(Job *j, bool including_new) { + assert(j); + + if (!j->in_dbus_queue) + return; + + if (!j->sent_dbus_new_signal && !including_new) + return; + + if (MANAGER_IS_RELOADING(j->unit->manager)) + return; + + bus_job_send_change_signal(j); +} + static int send_removed_signal(sd_bus *bus, void *userdata) { _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; _cleanup_free_ char *p = NULL; @@ -222,6 +241,9 @@ void bus_job_send_removed_signal(Job *j) { if (!j->sent_dbus_new_signal) bus_job_send_change_signal(j); + /* Make sure that any change signal on the unit is reflected before we send out the change signal on the job */ + bus_unit_send_pending_change_signal(j->unit, true); + r = bus_foreach_bus(j->manager, j->bus_track, send_removed_signal, j); if (r < 0) log_debug_errno(r, "Failed to send job remove signal for %u: %m", j->id); diff --git a/src/core/dbus-job.h b/src/core/dbus-job.h index 3cc60f22ee..c9f6fc7187 100644 --- a/src/core/dbus-job.h +++ b/src/core/dbus-job.h @@ -12,6 +12,7 @@ int bus_job_method_cancel(sd_bus_message *message, void *job, sd_bus_error *erro int bus_job_method_get_waiting_jobs(sd_bus_message *message, void *userdata, sd_bus_error *error); void bus_job_send_change_signal(Job *j); +void bus_job_send_pending_change_signal(Job *j, bool including_new); void bus_job_send_removed_signal(Job *j); int bus_job_coldplug_bus_track(Job *j); diff --git a/src/core/dbus-kill.c b/src/core/dbus-kill.c index 028e7ec1c1..e2b3a0d517 100644 --- a/src/core/dbus-kill.c +++ b/src/core/dbus-kill.c @@ -12,13 +12,17 @@ const sd_bus_vtable bus_kill_vtable[] = { SD_BUS_VTABLE_START(0), SD_BUS_PROPERTY("KillMode", "s", property_get_kill_mode, offsetof(KillContext, kill_mode), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("KillSignal", "i", bus_property_get_int, offsetof(KillContext, kill_signal), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("FinalKillSignal", "i", bus_property_get_int, offsetof(KillContext, final_kill_signal), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("SendSIGKILL", "b", bus_property_get_bool, offsetof(KillContext, send_sigkill), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("SendSIGHUP", "b", bus_property_get_bool, offsetof(KillContext, send_sighup), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("WatchdogSignal", "i", bus_property_get_int, offsetof(KillContext, watchdog_signal), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_VTABLE_END }; static BUS_DEFINE_SET_TRANSIENT_PARSE(kill_mode, KillMode, kill_mode_from_string); static BUS_DEFINE_SET_TRANSIENT_TO_STRING(kill_signal, "i", int32_t, int, "%" PRIi32, signal_to_string_with_check); +static BUS_DEFINE_SET_TRANSIENT_TO_STRING(final_kill_signal, "i", int32_t, int, "%" PRIi32, signal_to_string_with_check); +static BUS_DEFINE_SET_TRANSIENT_TO_STRING(watchdog_signal, "i", int32_t, int, "%" PRIi32, signal_to_string_with_check); int bus_kill_context_set_transient_property( Unit *u, @@ -47,5 +51,11 @@ int bus_kill_context_set_transient_property( if (streq(name, "KillSignal")) return bus_set_transient_kill_signal(u, name, &c->kill_signal, message, flags, error); + if (streq(name, "FinalKillSignal")) + return bus_set_transient_final_kill_signal(u, name, &c->final_kill_signal, message, flags, error); + + if (streq(name, "WatchdogSignal")) + return bus_set_transient_watchdog_signal(u, name, &c->watchdog_signal, message, flags, error); + return 0; } diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c index 4ed68af1e0..8da07adfe7 100644 --- a/src/core/dbus-manager.c +++ b/src/core/dbus-manager.c @@ -12,6 +12,7 @@ #include "dbus-execute.h" #include "dbus-job.h" #include "dbus-manager.h" +#include "dbus-scope.h" #include "dbus-unit.h" #include "dbus.h" #include "env-util.h" @@ -216,6 +217,30 @@ static int property_get_progress( return sd_bus_message_append(reply, "d", d); } +static int property_get_environment( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + _cleanup_strv_free_ char **l = NULL; + Manager *m = userdata; + int r; + + assert(bus); + assert(reply); + assert(m); + + r = manager_get_effective_environment(m, &l); + if (r < 0) + return r; + + return sd_bus_message_append_strv(reply, l); +} + static int property_get_show_status( sd_bus *bus, const char *path, @@ -232,7 +257,7 @@ static int property_get_show_status( assert(reply); assert(m); - b = m->show_status > 0; + b = IN_SET(m->show_status, SHOW_STATUS_TEMPORARY, SHOW_STATUS_YES); return sd_bus_message_append_basic(reply, 'b', &b); } @@ -1298,9 +1323,9 @@ int verify_run_space_and_log(const char *message) { r = verify_run_space(message, &error); if (r < 0) - log_error_errno(r, "%s", bus_error_message(&error, r)); + return log_error_errno(r, "%s", bus_error_message(&error, r)); - return r; + return 0; } static int method_reload(sd_bus_message *message, void *userdata, sd_bus_error *error) { @@ -1329,12 +1354,12 @@ static int method_reload(sd_bus_message *message, void *userdata, sd_bus_error * * is finished. That way the caller knows when the reload * finished. */ - assert(!m->queued_message); - r = sd_bus_message_new_method_return(message, &m->queued_message); + assert(!m->pending_reload_message); + r = sd_bus_message_new_method_return(message, &m->pending_reload_message); if (r < 0) return r; - m->exit_code = MANAGER_RELOAD; + m->objective = MANAGER_RELOAD; return 1; } @@ -1363,7 +1388,7 @@ static int method_reexecute(sd_bus_message *message, void *userdata, sd_bus_erro /* We don't send a reply back here, the client should * just wait for us disconnecting. */ - m->exit_code = MANAGER_REEXECUTE; + m->objective = MANAGER_REEXECUTE; return 1; } @@ -1383,7 +1408,7 @@ static int method_exit(sd_bus_message *message, void *userdata, sd_bus_error *er * systemd-shutdown if it cannot do the exit() because it isn't a * container. */ - m->exit_code = MANAGER_EXIT; + m->objective = MANAGER_EXIT; return sd_bus_reply_method_return(message, NULL); } @@ -1402,7 +1427,7 @@ static int method_reboot(sd_bus_message *message, void *userdata, sd_bus_error * if (!MANAGER_IS_SYSTEM(m)) return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Reboot is only supported for system managers."); - m->exit_code = MANAGER_REBOOT; + m->objective = MANAGER_REBOOT; return sd_bus_reply_method_return(message, NULL); } @@ -1421,7 +1446,7 @@ static int method_poweroff(sd_bus_message *message, void *userdata, sd_bus_error if (!MANAGER_IS_SYSTEM(m)) return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Powering off is only supported for system managers."); - m->exit_code = MANAGER_POWEROFF; + m->objective = MANAGER_POWEROFF; return sd_bus_reply_method_return(message, NULL); } @@ -1440,7 +1465,7 @@ static int method_halt(sd_bus_message *message, void *userdata, sd_bus_error *er if (!MANAGER_IS_SYSTEM(m)) return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Halt is only supported for system managers."); - m->exit_code = MANAGER_HALT; + m->objective = MANAGER_HALT; return sd_bus_reply_method_return(message, NULL); } @@ -1459,7 +1484,7 @@ static int method_kexec(sd_bus_message *message, void *userdata, sd_bus_error *e if (!MANAGER_IS_SYSTEM(m)) return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "KExec is only supported for system managers."); - m->exit_code = MANAGER_KEXEC; + m->objective = MANAGER_KEXEC; return sd_bus_reply_method_return(message, NULL); } @@ -1549,7 +1574,7 @@ static int method_switch_root(sd_bus_message *message, void *userdata, sd_bus_er free(m->switch_root_init); m->switch_root_init = ri; - m->exit_code = MANAGER_SWITCH_ROOT; + m->objective = MANAGER_SWITCH_ROOT; return sd_bus_reply_method_return(message, NULL); } @@ -1578,7 +1603,7 @@ static int method_set_environment(sd_bus_message *message, void *userdata, sd_bu if (r == 0) return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */ - r = manager_environment_add(m, NULL, plus); + r = manager_client_environment_modify(m, NULL, plus); if (r < 0) return r; @@ -1610,7 +1635,7 @@ static int method_unset_environment(sd_bus_message *message, void *userdata, sd_ if (r == 0) return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */ - r = manager_environment_add(m, minus, NULL); + r = manager_client_environment_modify(m, minus, NULL); if (r < 0) return r; @@ -1648,7 +1673,7 @@ static int method_unset_and_set_environment(sd_bus_message *message, void *userd if (r == 0) return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */ - r = manager_environment_add(m, minus, plus); + r = manager_client_environment_modify(m, minus, plus); if (r < 0) return r; @@ -2398,6 +2423,29 @@ static int method_get_job_waiting(sd_bus_message *message, void *userdata, sd_bu return bus_job_method_get_waiting_jobs(message, j, error); } +static int method_abandon_scope(sd_bus_message *message, void *userdata, sd_bus_error *error) { + Manager *m = userdata; + const char *name; + Unit *u; + int r; + + assert(message); + assert(m); + + r = sd_bus_message_read(message, "s", &name); + if (r < 0) + return r; + + r = bus_get_unit_by_name(m, message, name, &u, error); + if (r < 0) + return r; + + if (u->type != UNIT_SCOPE) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unit '%s' is not a scope unit, refusing.", name); + + return bus_scope_method_abandon(message, u, error); +} + const sd_bus_vtable bus_manager_vtable[] = { SD_BUS_VTABLE_START(0), @@ -2418,6 +2466,12 @@ const sd_bus_vtable bus_manager_vtable[] = { BUS_PROPERTY_DUAL_TIMESTAMP("GeneratorsFinishTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_GENERATORS_FINISH]), SD_BUS_VTABLE_PROPERTY_CONST), BUS_PROPERTY_DUAL_TIMESTAMP("UnitsLoadStartTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_UNITS_LOAD_START]), SD_BUS_VTABLE_PROPERTY_CONST), BUS_PROPERTY_DUAL_TIMESTAMP("UnitsLoadFinishTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_UNITS_LOAD_FINISH]), SD_BUS_VTABLE_PROPERTY_CONST), + BUS_PROPERTY_DUAL_TIMESTAMP("InitRDSecurityStartTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_INITRD_SECURITY_START]), SD_BUS_VTABLE_PROPERTY_CONST), + BUS_PROPERTY_DUAL_TIMESTAMP("InitRDSecurityFinishTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_INITRD_SECURITY_FINISH]), SD_BUS_VTABLE_PROPERTY_CONST), + BUS_PROPERTY_DUAL_TIMESTAMP("InitRDGeneratorsStartTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_INITRD_GENERATORS_START]), SD_BUS_VTABLE_PROPERTY_CONST), + BUS_PROPERTY_DUAL_TIMESTAMP("InitRDGeneratorsFinishTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_INITRD_GENERATORS_FINISH]), SD_BUS_VTABLE_PROPERTY_CONST), + BUS_PROPERTY_DUAL_TIMESTAMP("InitRDUnitsLoadStartTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_START]), SD_BUS_VTABLE_PROPERTY_CONST), + BUS_PROPERTY_DUAL_TIMESTAMP("InitRDUnitsLoadFinishTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_FINISH]), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_WRITABLE_PROPERTY("LogLevel", "s", property_get_log_level, property_set_log_level, 0, 0), SD_BUS_WRITABLE_PROPERTY("LogTarget", "s", property_get_log_target, property_set_log_target, 0, 0), SD_BUS_PROPERTY("NNames", "u", property_get_hashmap_size, offsetof(Manager, units), 0), @@ -2426,7 +2480,7 @@ const sd_bus_vtable bus_manager_vtable[] = { SD_BUS_PROPERTY("NInstalledJobs", "u", bus_property_get_unsigned, offsetof(Manager, n_installed_jobs), 0), SD_BUS_PROPERTY("NFailedJobs", "u", bus_property_get_unsigned, offsetof(Manager, n_failed_jobs), 0), SD_BUS_PROPERTY("Progress", "d", property_get_progress, 0, 0), - SD_BUS_PROPERTY("Environment", "as", NULL, offsetof(Manager, environment), 0), + SD_BUS_PROPERTY("Environment", "as", property_get_environment, 0, 0), SD_BUS_PROPERTY("ConfirmSpawn", "b", bus_property_get_bool, offsetof(Manager, confirm_spawn), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ShowStatus", "b", property_get_show_status, 0, 0), SD_BUS_PROPERTY("UnitPath", "as", NULL, offsetof(Manager, lookup_paths.search_path), SD_BUS_VTABLE_PROPERTY_CONST), @@ -2507,6 +2561,7 @@ const sd_bus_vtable bus_manager_vtable[] = { SD_BUS_METHOD("StartTransientUnit", "ssa(sv)a(sa(sv))", "o", method_start_transient_unit, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("GetUnitProcesses", "s", "a(sus)", method_get_unit_processes, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("AttachProcessesToUnit", "ssau", NULL, method_attach_processes_to_unit, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD("AbandonScope", "s", NULL, method_abandon_scope, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("GetJob", "u", "o", method_get_job, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("GetJobAfter", "u", "a(usssoo)", method_get_job_waiting, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("GetJobBefore", "u", "a(usssoo)", method_get_job_waiting, SD_BUS_VTABLE_UNPRIVILEGED), diff --git a/src/core/dbus-mount.c b/src/core/dbus-mount.c index 3f98d3ecf0..b6d61627eb 100644 --- a/src/core/dbus-mount.c +++ b/src/core/dbus-mount.c @@ -145,7 +145,7 @@ int bus_mount_set_property( int bus_mount_commit_properties(Unit *u) { assert(u); - unit_update_cgroup_members_masks(u); + unit_invalidate_cgroup_members_masks(u); unit_realize_cgroup(u); return 0; diff --git a/src/core/dbus-scope.c b/src/core/dbus-scope.c index 6725f62794..bb807df2e9 100644 --- a/src/core/dbus-scope.c +++ b/src/core/dbus-scope.c @@ -14,7 +14,7 @@ #include "selinux-access.h" #include "unit.h" -static int bus_scope_abandon(sd_bus_message *message, void *userdata, sd_bus_error *error) { +int bus_scope_method_abandon(sd_bus_message *message, void *userdata, sd_bus_error *error) { Scope *s = userdata; int r; @@ -48,7 +48,7 @@ const sd_bus_vtable bus_scope_vtable[] = { SD_BUS_PROPERTY("TimeoutStopUSec", "t", bus_property_get_usec, offsetof(Scope, timeout_stop_usec), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Scope, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_SIGNAL("RequestStop", NULL, 0), - SD_BUS_METHOD("Abandon", NULL, NULL, bus_scope_abandon, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD("Abandon", NULL, NULL, bus_scope_method_abandon, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_VTABLE_END }; @@ -186,7 +186,7 @@ int bus_scope_set_property( int bus_scope_commit_properties(Unit *u) { assert(u); - unit_update_cgroup_members_masks(u); + unit_invalidate_cgroup_members_masks(u); unit_realize_cgroup(u); return 0; diff --git a/src/core/dbus-scope.h b/src/core/dbus-scope.h index 7c080dbcf7..702f55898d 100644 --- a/src/core/dbus-scope.h +++ b/src/core/dbus-scope.h @@ -14,4 +14,6 @@ int bus_scope_commit_properties(Unit *u); int bus_scope_send_request_stop(Scope *s); +int bus_scope_method_abandon(sd_bus_message *message, void *userdata, sd_bus_error *error); + int bus_scope_track_controller(Scope *s); diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c index 1b4c98c7d2..ec61ea2772 100644 --- a/src/core/dbus-service.c +++ b/src/core/dbus-service.c @@ -105,7 +105,7 @@ const sd_bus_vtable bus_service_vtable[] = { SD_BUS_PROPERTY("RuntimeMaxUSec", "t", bus_property_get_usec, offsetof(Service, runtime_max_usec), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("WatchdogUSec", "t", bus_property_get_usec, offsetof(Service, watchdog_usec), SD_BUS_VTABLE_PROPERTY_CONST), BUS_PROPERTY_DUAL_TIMESTAMP("WatchdogTimestamp", offsetof(Service, watchdog_timestamp), 0), - SD_BUS_PROPERTY("PermissionsStartOnly", "b", bus_property_get_bool, offsetof(Service, permissions_start_only), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("PermissionsStartOnly", "b", bus_property_get_bool, offsetof(Service, permissions_start_only), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), /* 😷 deprecated */ SD_BUS_PROPERTY("RootDirectoryStartOnly", "b", bus_property_get_bool, offsetof(Service, root_directory_start_only), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RemainAfterExit", "b", bus_property_get_bool, offsetof(Service, remain_after_exit), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("GuessMainPID", "b", bus_property_get_bool, offsetof(Service, guess_main_pid), SD_BUS_VTABLE_PROPERTY_CONST), @@ -120,8 +120,8 @@ const sd_bus_vtable bus_service_vtable[] = { SD_BUS_PROPERTY("StatusText", "s", NULL, offsetof(Service, status_text), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("StatusErrno", "i", bus_property_get_int, offsetof(Service, status_errno), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Service, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), - SD_BUS_PROPERTY("USBFunctionDescriptors", "s", NULL, offsetof(Service, usb_function_descriptors), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), - SD_BUS_PROPERTY("USBFunctionStrings", "s", NULL, offsetof(Service, usb_function_strings), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), + SD_BUS_PROPERTY("USBFunctionDescriptors", "s", NULL, offsetof(Service, usb_function_descriptors), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("USBFunctionStrings", "s", NULL, offsetof(Service, usb_function_strings), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("UID", "u", bus_property_get_uid, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("GID", "u", bus_property_get_gid, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("NRestarts", "u", bus_property_get_unsigned, offsetof(Service, n_restarts), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), @@ -312,8 +312,40 @@ static int bus_service_set_transient_property( if (streq(name, "NotifyAccess")) return bus_set_transient_notify_access(u, name, &s->notify_access, message, flags, error); - if (streq(name, "PIDFile")) - return bus_set_transient_path(u, name, &s->pid_file, message, flags, error); + if (streq(name, "PIDFile")) { + _cleanup_free_ char *n = NULL; + const char *v, *e; + + r = sd_bus_message_read(message, "s", &v); + if (r < 0) + return r; + + n = path_make_absolute(v, u->manager->prefix[EXEC_DIRECTORY_RUNTIME]); + if (!n) + return -ENOMEM; + + path_simplify(n, true); + + if (!path_is_normalized(n)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "PIDFile= path '%s' is not valid", n); + + e = path_startswith(n, "/var/run/"); + if (e) { + char *z; + + z = strjoin("/run/", e); + if (!z) + return log_oom(); + + if (!UNIT_WRITE_FLAGS_NOOP(flags)) + log_unit_notice(u, "Transient unit's PIDFile= property references path below legacy directory /var/run, updating %s → %s; please update client accordingly.", n, z); + + free_and_replace(s->pid_file, z); + } else + free_and_replace(s->pid_file, n); + + return 1; + } if (streq(name, "USBFunctionDescriptors")) return bus_set_transient_path(u, name, &s->usb_function_descriptors, message, flags, error); @@ -392,7 +424,7 @@ int bus_service_set_property( int bus_service_commit_properties(Unit *u) { assert(u); - unit_update_cgroup_members_masks(u); + unit_invalidate_cgroup_members_masks(u); unit_realize_cgroup(u); return 0; diff --git a/src/core/dbus-slice.c b/src/core/dbus-slice.c index 722a5688a5..effd5fa5d7 100644 --- a/src/core/dbus-slice.c +++ b/src/core/dbus-slice.c @@ -28,7 +28,7 @@ int bus_slice_set_property( int bus_slice_commit_properties(Unit *u) { assert(u); - unit_update_cgroup_members_masks(u); + unit_invalidate_cgroup_members_masks(u); unit_realize_cgroup(u); return 0; diff --git a/src/core/dbus-socket.c b/src/core/dbus-socket.c index 913cc74918..37cf9d204c 100644 --- a/src/core/dbus-socket.c +++ b/src/core/dbus-socket.c @@ -8,10 +8,10 @@ #include "dbus-socket.h" #include "dbus-util.h" #include "fd-util.h" +#include "ip-protocol-list.h" #include "parse-util.h" #include "path-util.h" #include "socket.h" -#include "socket-protocol-list.h" #include "socket-util.h" #include "string-util.h" #include "unit.h" @@ -138,14 +138,14 @@ static inline bool check_size_t_truncation(uint64_t t) { return (size_t) t == t; } -static inline const char* supported_socket_protocol_to_string(int32_t i) { +static inline const char* socket_protocol_to_string(int32_t i) { if (i == IPPROTO_IP) return ""; if (!IN_SET(i, IPPROTO_UDPLITE, IPPROTO_SCTP)) return NULL; - return socket_protocol_to_name(i); + return ip_protocol_to_name(i); } static BUS_DEFINE_SET_TRANSIENT(int, "i", int32_t, int, "%" PRIi32); @@ -155,7 +155,7 @@ static BUS_DEFINE_SET_TRANSIENT_PARSE(bind_ipv6_only, SocketAddressBindIPv6Only, static BUS_DEFINE_SET_TRANSIENT_STRING_WITH_CHECK(fdname, fdname_is_valid); static BUS_DEFINE_SET_TRANSIENT_STRING_WITH_CHECK(ifname, ifname_valid); static BUS_DEFINE_SET_TRANSIENT_TO_STRING_ALLOC(ip_tos, "i", int32_t, int, "%" PRIi32, ip_tos_to_string_alloc); -static BUS_DEFINE_SET_TRANSIENT_TO_STRING(socket_protocol, "i", int32_t, int, "%" PRIi32, supported_socket_protocol_to_string); +static BUS_DEFINE_SET_TRANSIENT_TO_STRING(socket_protocol, "i", int32_t, int, "%" PRIi32, socket_protocol_to_string); static int bus_socket_set_transient_property( Socket *s, @@ -351,16 +351,27 @@ static int bus_socket_set_transient_property( while ((r = sd_bus_message_read(message, "(ss)", &t, &a)) > 0) { _cleanup_free_ SocketPort *p = NULL; - p = new0(SocketPort, 1); + p = new(SocketPort, 1); if (!p) return log_oom(); + *p = (SocketPort) { + .fd = -1, + .socket = s, + }; + p->type = socket_port_type_from_string(t); if (p->type < 0) return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown Socket type: %s", t); if (p->type != SOCKET_SOCKET) { + if (!path_is_valid(p->path)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid socket path: %s", t); + p->path = strdup(a); + if (!p->path) + return log_oom(); + path_simplify(p->path, false); } else if (streq(t, "Netlink")) { @@ -381,21 +392,10 @@ static int bus_socket_set_transient_property( return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Address family not supported: %s", a); } - p->fd = -1; - p->auxiliary_fds = NULL; - p->n_auxiliary_fds = 0; - p->socket = s; - empty = false; if (!UNIT_WRITE_FLAGS_NOOP(flags)) { - SocketPort *tail; - - LIST_FIND_TAIL(port, s->ports, tail); - LIST_INSERT_AFTER(port, s->ports, tail, p); - - p = NULL; - + LIST_APPEND(port, s->ports, TAKE_PTR(p)); unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "Listen%s=%s", t, a); } } @@ -461,7 +461,7 @@ int bus_socket_set_property( int bus_socket_commit_properties(Unit *u) { assert(u); - unit_update_cgroup_members_masks(u); + unit_invalidate_cgroup_members_masks(u); unit_realize_cgroup(u); return 0; diff --git a/src/core/dbus-swap.c b/src/core/dbus-swap.c index b272d10113..353fa20132 100644 --- a/src/core/dbus-swap.c +++ b/src/core/dbus-swap.c @@ -63,7 +63,7 @@ int bus_swap_set_property( int bus_swap_commit_properties(Unit *u) { assert(u); - unit_update_cgroup_members_masks(u); + unit_invalidate_cgroup_members_masks(u); unit_realize_cgroup(u); return 0; diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c index ae0410414e..968166ee60 100644 --- a/src/core/dbus-unit.c +++ b/src/core/dbus-unit.c @@ -434,7 +434,7 @@ int bus_unit_method_kill(sd_bus_message *message, void *userdata, sd_bus_error * u, "kill", CAP_KILL, - N_("Authentication is required to kill '$(unit)'."), + N_("Authentication is required to send a UNIX signal to the processes of '$(unit)'."), true, message, error); @@ -561,6 +561,44 @@ int bus_unit_method_unref(sd_bus_message *message, void *userdata, sd_bus_error return sd_bus_reply_method_return(message, NULL); } +static int property_get_refs( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + Unit *u = userdata; + const char *i; + int r; + + assert(bus); + assert(reply); + + r = sd_bus_message_open_container(reply, 'a', "s"); + if (r < 0) + return r; + + for (i = sd_bus_track_first(u->bus_track); i; i = sd_bus_track_next(u->bus_track)) { + int c, k; + + c = sd_bus_track_count_name(u->bus_track, i); + if (c < 0) + return c; + + /* Add the item multiple times if the ref count for each is above 1 */ + for (k = 0; k < c; k++) { + r = sd_bus_message_append(reply, "s", i); + if (r < 0) + return r; + } + } + + return sd_bus_message_close_container(reply); +} + const sd_bus_vtable bus_unit_vtable[] = { SD_BUS_VTABLE_START(0), @@ -624,8 +662,8 @@ const sd_bus_vtable bus_unit_vtable[] = { SD_BUS_PROPERTY("AssertResult", "b", bus_property_get_bool, offsetof(Unit, assert_result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), BUS_PROPERTY_DUAL_TIMESTAMP("ConditionTimestamp", offsetof(Unit, condition_timestamp), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), BUS_PROPERTY_DUAL_TIMESTAMP("AssertTimestamp", offsetof(Unit, assert_timestamp), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), - SD_BUS_PROPERTY("Conditions", "a(sbbsi)", property_get_conditions, offsetof(Unit, conditions), 0), - SD_BUS_PROPERTY("Asserts", "a(sbbsi)", property_get_conditions, offsetof(Unit, asserts), 0), + SD_BUS_PROPERTY("Conditions", "a(sbbsi)", property_get_conditions, offsetof(Unit, conditions), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), + SD_BUS_PROPERTY("Asserts", "a(sbbsi)", property_get_conditions, offsetof(Unit, asserts), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), SD_BUS_PROPERTY("LoadError", "(ss)", property_get_load_error, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Transient", "b", bus_property_get_bool, offsetof(Unit, transient), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Perpetual", "b", bus_property_get_bool, offsetof(Unit, perpetual), SD_BUS_VTABLE_PROPERTY_CONST), @@ -633,10 +671,13 @@ const sd_bus_vtable bus_unit_vtable[] = { SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("StartLimitAction", "s", property_get_emergency_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("FailureAction", "s", property_get_emergency_action, offsetof(Unit, failure_action), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("FailureActionExitStatus", "i", bus_property_get_int, offsetof(Unit, failure_action_exit_status), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("SuccessAction", "s", property_get_emergency_action, offsetof(Unit, success_action), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("SuccessActionExitStatus", "i", bus_property_get_int, offsetof(Unit, success_action_exit_status), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RebootArgument", "s", NULL, offsetof(Unit, reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST), - SD_BUS_PROPERTY("InvocationID", "ay", bus_property_get_id128, offsetof(Unit, invocation_id), 0), - SD_BUS_PROPERTY("CollectMode", "s", property_get_collect_mode, offsetof(Unit, collect_mode), 0), + SD_BUS_PROPERTY("InvocationID", "ay", bus_property_get_id128, offsetof(Unit, invocation_id), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), + SD_BUS_PROPERTY("CollectMode", "s", property_get_collect_mode, offsetof(Unit, collect_mode), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("Refs", "as", property_get_refs, 0, 0), SD_BUS_METHOD("Start", "s", "o", method_start, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("Stop", "s", "o", method_stop, SD_BUS_VTABLE_UNPRIVILEGED), @@ -1033,7 +1074,7 @@ int bus_unit_method_attach_processes(sd_bus_message *message, void *userdata, sd if (r < 0) return r; - /* Let's validate security: if the sender is root, then all is OK. If the sender is is any other unit, + /* Let's validate security: if the sender is root, then all is OK. If the sender is any other unit, * then the process' UID and the target unit's UID have to match the sender's UID */ if (sender_uid != 0 && sender_uid != getuid()) { r = get_process_uid(pid, &process_uid); @@ -1161,6 +1202,27 @@ void bus_unit_send_change_signal(Unit *u) { u->sent_dbus_new_signal = true; } +void bus_unit_send_pending_change_signal(Unit *u, bool including_new) { + + /* Sends out any pending change signals, but only if they really are pending. This call is used when we are + * about to change state in order to force out a PropertiesChanged signal beforehand if there was one pending + * so that clients can follow the full state transition */ + + if (!u->in_dbus_queue) /* If not enqueued, don't bother */ + return; + + if (!u->sent_dbus_new_signal && !including_new) /* If the unit was never announced, don't bother, it's fine if + * the unit appears in the new state right-away (except if the + * caller explicitly asked us to send it anyway) */ + return; + + if (MANAGER_IS_RELOADING(u->manager)) /* Don't generate unnecessary PropertiesChanged signals for the same unit + * when we are reloading. */ + return; + + bus_unit_send_change_signal(u); +} + static int send_removed_signal(sd_bus *bus, void *userdata) { _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; _cleanup_free_ char *p = NULL; @@ -1259,6 +1321,9 @@ int bus_unit_queue_job( if (!path) return -ENOMEM; + /* Before we send the method reply, force out the announcement JobNew for this job */ + bus_job_send_pending_change_signal(j, true); + return sd_bus_reply_method_return(message, "o", path); } @@ -1299,8 +1364,75 @@ static int bus_unit_set_live_property( return 0; } +static int bus_set_transient_emergency_action( + Unit *u, + const char *name, + EmergencyAction *p, + sd_bus_message *message, + UnitWriteFlags flags, + sd_bus_error *error) { + + const char *s; + EmergencyAction v; + int r; + bool system; + + assert(p); + + r = sd_bus_message_read(message, "s", &s); + if (r < 0) + return r; + + system = MANAGER_IS_SYSTEM(u->manager); + r = parse_emergency_action(s, system, &v); + if (v < 0) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, + v == -EOPNOTSUPP ? "EmergencyAction setting invalid for manager type: %s" + : "Invalid %s setting: %s", + name, s); + + if (!UNIT_WRITE_FLAGS_NOOP(flags)) { + *p = v; + unit_write_settingf(u, flags, name, + "%s=%s", name, s); + } + + return 1; +} + +static int bus_set_transient_exit_status( + Unit *u, + const char *name, + int *p, + sd_bus_message *message, + UnitWriteFlags flags, + sd_bus_error *error) { + + int32_t k; + int r; + + assert(p); + + r = sd_bus_message_read(message, "i", &k); + if (r < 0) + return r; + + if (k > 255) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Exit status must be in range 0…255 or negative."); + + if (!UNIT_WRITE_FLAGS_NOOP(flags)) { + *p = k < 0 ? -1 : k; + + if (k < 0) + unit_write_settingf(u, flags, name, "%s=", name); + else + unit_write_settingf(u, flags, name, "%s=%i", name, k); + } + + return 1; +} + static BUS_DEFINE_SET_TRANSIENT_PARSE(collect_mode, CollectMode, collect_mode_from_string); -static BUS_DEFINE_SET_TRANSIENT_PARSE(emergency_action, EmergencyAction, emergency_action_from_string); static BUS_DEFINE_SET_TRANSIENT_PARSE(job_mode, JobMode, job_mode_from_string); static int bus_set_transient_conditions( @@ -1450,6 +1582,12 @@ static int bus_unit_set_transient_property( if (streq(name, "SuccessAction")) return bus_set_transient_emergency_action(u, name, &u->success_action, message, flags, error); + if (streq(name, "FailureActionExitStatus")) + return bus_set_transient_exit_status(u, name, &u->failure_action_exit_status, message, flags, error); + + if (streq(name, "SuccessActionExitStatus")) + return bus_set_transient_exit_status(u, name, &u->success_action_exit_status, message, flags, error); + if (streq(name, "RebootArgument")) return bus_set_transient_string(u, name, &u->reboot_arg, message, flags, error); @@ -1572,7 +1710,7 @@ static int bus_unit_set_transient_property( if (!UNIT_WRITE_FLAGS_NOOP(flags)) { _cleanup_free_ char *label = NULL; - r = unit_add_dependency_by_name(u, d, other, NULL, true, UNIT_DEPENDENCY_FILE); + r = unit_add_dependency_by_name(u, d, other, true, UNIT_DEPENDENCY_FILE); if (r < 0) return r; @@ -1726,7 +1864,7 @@ int bus_unit_validate_load_state(Unit *u, sd_bus_error *error) { return sd_bus_error_setf(error, BUS_ERROR_BAD_UNIT_SETTING, "Unit %s has a bad unit file setting.", u->id); case UNIT_ERROR: /* Only show .load_error in UNIT_ERROR state */ - return sd_bus_error_set_errnof(error, u->load_error, "Unit %s failed to loaded properly: %m.", u->id); + return sd_bus_error_set_errnof(error, u->load_error, "Unit %s failed to load properly: %m.", u->id); case UNIT_MASKED: return sd_bus_error_setf(error, BUS_ERROR_UNIT_MASKED, "Unit %s is masked.", u->id); @@ -1746,7 +1884,13 @@ static int bus_unit_track_handler(sd_bus_track *t, void *userdata) { u->bus_track = sd_bus_track_unref(u->bus_track); /* make sure we aren't called again */ + /* If the client that tracks us disappeared, then there's reason to believe that the cgroup is empty now too, + * let's see */ + unit_add_to_cgroup_empty_queue(u); + + /* Also add the unit to the GC queue, after all if the client left it might be time to GC this unit */ unit_add_to_gc_queue(u); + return 0; } diff --git a/src/core/dbus-unit.h b/src/core/dbus-unit.h index 68eb621836..345345e3eb 100644 --- a/src/core/dbus-unit.h +++ b/src/core/dbus-unit.h @@ -11,6 +11,7 @@ extern const sd_bus_vtable bus_unit_vtable[]; extern const sd_bus_vtable bus_unit_cgroup_vtable[]; void bus_unit_send_change_signal(Unit *u); +void bus_unit_send_pending_change_signal(Unit *u, bool including_new); void bus_unit_send_removed_signal(Unit *u); int bus_unit_method_start_generic(sd_bus_message *message, Unit *u, JobType job_type, bool reload_if_possible, sd_bus_error *error); diff --git a/src/core/dbus.c b/src/core/dbus.c index bf5917696e..5908ad792a 100644 --- a/src/core/dbus.c +++ b/src/core/dbus.c @@ -36,6 +36,7 @@ #include "mkdir.h" #include "process-util.h" #include "selinux-access.h" +#include "serialize.h" #include "service.h" #include "special.h" #include "string-util.h" @@ -47,23 +48,22 @@ static void destroy_bus(Manager *m, sd_bus **bus); -int bus_send_queued_message(Manager *m) { +int bus_send_pending_reload_message(Manager *m) { int r; assert(m); - if (!m->queued_message) + if (!m->pending_reload_message) return 0; - /* If we cannot get rid of this message we won't dispatch any - * D-Bus messages, so that we won't end up wanting to queue - * another message. */ + /* If we cannot get rid of this message we won't dispatch any D-Bus messages, so that we won't end up wanting + * to queue another message. */ - r = sd_bus_send(NULL, m->queued_message, NULL); + r = sd_bus_send(NULL, m->pending_reload_message, NULL); if (r < 0) - log_warning_errno(r, "Failed to send queued message: %m"); + log_warning_errno(r, "Failed to send queued message, ignoring: %m"); - m->queued_message = sd_bus_message_unref(m->queued_message); + m->pending_reload_message = sd_bus_message_unref(m->pending_reload_message); return 0; } @@ -974,12 +974,9 @@ int bus_init_system(Manager *m) { int bus_init_private(Manager *m) { _cleanup_close_ int fd = -1; - union sockaddr_union sa = { - .un.sun_family = AF_UNIX - }; + union sockaddr_union sa = {}; sd_event_source *s; - socklen_t salen; - int r; + int r, salen; assert(m); @@ -992,27 +989,23 @@ int bus_init_private(Manager *m) { if (getpid_cached() != 1) return 0; - strcpy(sa.un.sun_path, "/run/systemd/private"); - salen = SOCKADDR_UN_LEN(sa.un); + salen = sockaddr_un_set_path(&sa.un, "/run/systemd/private"); } else { - size_t left = sizeof(sa.un.sun_path); - char *p = sa.un.sun_path; - const char *e; + const char *e, *joined; e = secure_getenv("XDG_RUNTIME_DIR"); - if (!e) { - log_error("Failed to determine XDG_RUNTIME_DIR"); - return -EHOSTDOWN; - } - - left = strpcpy(&p, left, e); - left = strpcpy(&p, left, "/systemd/private"); + if (!e) + return log_error_errno(SYNTHETIC_ERRNO(EHOSTDOWN), + "XDG_RUNTIME_DIR is not set, refusing."); - salen = sizeof(sa.un) - left; + joined = strjoina(e, "/systemd/private"); + salen = sockaddr_un_set_path(&sa.un, joined); } + if (salen < 0) + return log_error_errno(salen, "Can't set path for AF_UNIX socket to bind to: %m"); (void) mkdir_parents_label(sa.un.sun_path, 0755); - (void) unlink(sa.un.sun_path); + (void) sockaddr_un_unlink(&sa.un); fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0); if (fd < 0) @@ -1035,9 +1028,8 @@ int bus_init_private(Manager *m) { (void) sd_event_source_set_description(s, "bus-connection"); - m->private_listen_fd = fd; + m->private_listen_fd = TAKE_FD(fd); m->private_listen_event_source = s; - fd = -1; log_debug("Successfully created private D-Bus server."); @@ -1079,8 +1071,8 @@ static void destroy_bus(Manager *m, sd_bus **bus) { u->bus_track = sd_bus_track_unref(u->bus_track); /* Get rid of queued message on this bus */ - if (m->queued_message && sd_bus_message_get_bus(m->queued_message) == *bus) - m->queued_message = sd_bus_message_unref(m->queued_message); + if (m->pending_reload_message && sd_bus_message_get_bus(m->pending_reload_message) == *bus) + m->pending_reload_message = sd_bus_message_unref(m->pending_reload_message); /* Possibly flush unwritten data, but only if we are * unprivileged, since we don't want to sync here */ @@ -1211,13 +1203,8 @@ void bus_track_serialize(sd_bus_track *t, FILE *f, const char *prefix) { int c, j; c = sd_bus_track_count_name(t, n); - - for (j = 0; j < c; j++) { - fputs(prefix, f); - fputc('=', f); - fputs(n, f); - fputc('\n', f); - } + for (j = 0; j < c; j++) + (void) serialize_item(f, prefix, n); } } diff --git a/src/core/dbus.h b/src/core/dbus.h index 382a96da7d..f1c0fa86c0 100644 --- a/src/core/dbus.h +++ b/src/core/dbus.h @@ -5,7 +5,7 @@ #include "manager.h" -int bus_send_queued_message(Manager *m); +int bus_send_pending_reload_message(Manager *m); int bus_init_private(Manager *m); int bus_init_api(Manager *m); diff --git a/src/core/device.c b/src/core/device.c index a2d00a0fbe..960f403718 100644 --- a/src/core/device.c +++ b/src/core/device.c @@ -3,19 +3,20 @@ #include <errno.h> #include <sys/epoll.h> -#include "libudev.h" - #include "alloc-util.h" #include "bus-error.h" #include "dbus-device.h" +#include "dbus-unit.h" +#include "device-private.h" +#include "device-util.h" #include "device.h" #include "log.h" #include "parse-util.h" #include "path-util.h" +#include "serialize.h" #include "stat-util.h" #include "string-util.h" #include "swap.h" -#include "udev-util.h" #include "unit-name.h" #include "unit.h" @@ -25,7 +26,7 @@ static const UnitActiveState state_translation_table[_DEVICE_STATE_MAX] = { [DEVICE_PLUGGED] = UNIT_ACTIVE, }; -static int device_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata); +static int device_dispatch_io(sd_device_monitor *monitor, sd_device *dev, void *userdata); static void device_update_found_one(Device *d, DeviceFound found, DeviceFound mask); static void device_unset_sysfs(Device *d) { @@ -111,10 +112,31 @@ static void device_done(Unit *u) { d->wants_property = strv_free(d->wants_property); } +static int device_load(Unit *u) { + int r; + + r = unit_load_fragment_and_dropin_optional(u); + if (r < 0) + return r; + + if (!u->description) { + /* Generate a description based on the path, to be used until the + device is initialized properly */ + r = unit_name_to_path(u->id, &u->description); + if (r < 0) + log_unit_debug_errno(u, r, "Failed to unescape name: %m"); + } + + return 0; +} + static void device_set_state(Device *d, DeviceState state) { DeviceState old_state; assert(d); + if (d->state != state) + bus_unit_send_pending_change_signal(UNIT(d), false); + old_state = d->state; d->state = state; @@ -226,10 +248,10 @@ static int device_serialize(Unit *u, FILE *f, FDSet *fds) { assert(f); assert(fds); - unit_serialize_item(u, f, "state", device_state_to_string(d->state)); + (void) serialize_item(f, "state", device_state_to_string(d->state)); if (device_found_to_string_many(d->found, &s) >= 0) - unit_serialize_item(u, f, "found", s); + (void) serialize_item(f, "found", s); return 0; } @@ -255,7 +277,7 @@ static int device_deserialize_item(Unit *u, const char *key, const char *value, } else if (streq(key, "found")) { r = device_found_from_string_many(value, &d->deserialized_found); if (r < 0) - log_unit_debug_errno(u, r, "Failed to parse found value, ignoring: %s", value); + log_unit_debug_errno(u, r, "Failed to parse found value '%s', ignoring: %m", value); } else log_unit_debug(u, "Unknown serialization key: %s", key); @@ -300,47 +322,40 @@ _pure_ static const char *device_sub_state_to_string(Unit *u) { return device_state_to_string(DEVICE(u)->state); } -static int device_update_description(Unit *u, struct udev_device *dev, const char *path) { - const char *model; +static int device_update_description(Unit *u, sd_device *dev, const char *path) { + _cleanup_free_ char *j = NULL; + const char *model, *label, *desc; int r; assert(u); - assert(dev); assert(path); - model = udev_device_get_property_value(dev, "ID_MODEL_FROM_DATABASE"); - if (!model) - model = udev_device_get_property_value(dev, "ID_MODEL"); + desc = path; - if (model) { - const char *label; + if (dev && + (sd_device_get_property_value(dev, "ID_MODEL_FROM_DATABASE", &model) >= 0 || + sd_device_get_property_value(dev, "ID_MODEL", &model) >= 0)) { + desc = model; /* Try to concatenate the device model string with a label, if there is one */ - label = udev_device_get_property_value(dev, "ID_FS_LABEL"); - if (!label) - label = udev_device_get_property_value(dev, "ID_PART_ENTRY_NAME"); - if (!label) - label = udev_device_get_property_value(dev, "ID_PART_ENTRY_NUMBER"); + if (sd_device_get_property_value(dev, "ID_FS_LABEL", &label) >= 0 || + sd_device_get_property_value(dev, "ID_PART_ENTRY_NAME", &label) >= 0 || + sd_device_get_property_value(dev, "ID_PART_ENTRY_NUMBER", &label) >= 0) { - if (label) { - _cleanup_free_ char *j; - - j = strjoin(model, " ", label); + desc = j = strjoin(model, " ", label); if (!j) return log_oom(); + } + } - r = unit_set_description(u, j); - } else - r = unit_set_description(u, model); - } else - r = unit_set_description(u, path); + r = unit_set_description(u, desc); if (r < 0) return log_unit_error_errno(u, r, "Failed to set device description: %m"); return 0; } -static int device_add_udev_wants(Unit *u, struct udev_device *dev) { +static int device_add_udev_wants(Unit *u, sd_device *dev) { _cleanup_strv_free_ char **added = NULL; const char *wants, *property; Device *d = DEVICE(u); @@ -351,8 +366,8 @@ static int device_add_udev_wants(Unit *u, struct udev_device *dev) { property = MANAGER_IS_USER(u->manager) ? "SYSTEMD_USER_WANTS" : "SYSTEMD_WANTS"; - wants = udev_device_get_property_value(dev, property); - if (!wants) + r = sd_device_get_property_value(dev, property, &wants); + if (r < 0) return 0; for (;;) { @@ -387,7 +402,7 @@ static int device_add_udev_wants(Unit *u, struct udev_device *dev) { return log_unit_error_errno(u, r, "Failed to mangle unit name \"%s\": %m", word); } - r = unit_add_dependency_by_name(u, UNIT_WANTS, k, NULL, true, UNIT_DEPENDENCY_UDEV); + r = unit_add_dependency_by_name(u, UNIT_WANTS, k, true, UNIT_DEPENDENCY_UDEV); if (r < 0) return log_unit_error_errno(u, r, "Failed to add Wants= dependency: %m"); @@ -429,18 +444,17 @@ static int device_add_udev_wants(Unit *u, struct udev_device *dev) { return 0; } -static bool device_is_bound_by_mounts(Device *d, struct udev_device *dev) { +static bool device_is_bound_by_mounts(Device *d, sd_device *dev) { const char *bound_by; int r; assert(d); assert(dev); - bound_by = udev_device_get_property_value(dev, "SYSTEMD_MOUNT_DEVICE_BOUND"); - if (bound_by) { + if (sd_device_get_property_value(dev, "SYSTEMD_MOUNT_DEVICE_BOUND", &bound_by) >= 0) { r = parse_boolean(bound_by); if (r < 0) - log_warning_errno(r, "Failed to parse SYSTEMD_MOUNT_DEVICE_BOUND='%s' udev property of %s, ignoring: %m", bound_by, strna(d->sysfs)); + log_device_warning_errno(dev, r, "Failed to parse SYSTEMD_MOUNT_DEVICE_BOUND='%s' udev property, ignoring: %m", bound_by); d->bind_mounts = r > 0; } else @@ -467,7 +481,7 @@ static void device_upgrade_mount_deps(Unit *u) { } } -static int device_setup_unit(Manager *m, struct udev_device *dev, const char *path, bool main) { +static int device_setup_unit(Manager *m, sd_device *dev, const char *path, bool main) { _cleanup_free_ char *e = NULL; const char *sysfs = NULL; Unit *u = NULL; @@ -478,16 +492,16 @@ static int device_setup_unit(Manager *m, struct udev_device *dev, const char *pa assert(path); if (dev) { - sysfs = udev_device_get_syspath(dev); - if (!sysfs) { - log_debug("Couldn't get syspath from udev device, ignoring."); + r = sd_device_get_syspath(dev, &sysfs); + if (r < 0) { + log_device_debug_errno(dev, r, "Couldn't get syspath from device, ignoring: %m"); return 0; } } r = unit_name_from_path(path, ".device", &e); if (r < 0) - return log_error_errno(r, "Failed to generate unit name from device path: %m"); + return log_device_error_errno(dev, r, "Failed to generate unit name from device path: %m"); u = manager_get_unit(m, e); if (u) { @@ -518,7 +532,7 @@ static int device_setup_unit(Manager *m, struct udev_device *dev, const char *pa r = unit_new_for_name(m, sizeof(Device), e, &u); if (r < 0) { - log_error_errno(r, "Failed to allocate device unit %s: %m", e); + log_device_error_errno(dev, r, "Failed to allocate device unit %s: %m", e); goto fail; } @@ -530,17 +544,17 @@ static int device_setup_unit(Manager *m, struct udev_device *dev, const char *pa if (sysfs) { r = device_set_sysfs(DEVICE(u), sysfs); if (r < 0) { - log_error_errno(r, "Failed to set sysfs path %s for device unit %s: %m", sysfs, e); + log_unit_error_errno(u, r, "Failed to set sysfs path %s: %m", sysfs); goto fail; } - (void) device_update_description(u, dev, path); - /* The additional systemd udev properties we only interpret for the main object */ if (main) (void) device_add_udev_wants(u, dev); } + (void) device_update_description(u, dev, path); + /* So the user wants the mount units to be bound to the device but a mount unit might has been seen by systemd * before the device appears on its radar. In this case the device unit is partially initialized and includes * the deps on the mount unit but at that time the "bind mounts" flag wasn't not present. Fix this up now. */ @@ -559,15 +573,14 @@ fail: return r; } -static int device_process_new(Manager *m, struct udev_device *dev) { +static int device_process_new(Manager *m, sd_device *dev) { const char *sysfs, *dn, *alias; - struct udev_list_entry *item = NULL, *first = NULL; + dev_t devnum; int r; assert(m); - sysfs = udev_device_get_syspath(dev); - if (!sysfs) + if (sd_device_get_syspath(dev, &sysfs) < 0) return 0; /* Add the main unit named after the sysfs path */ @@ -576,40 +589,39 @@ static int device_process_new(Manager *m, struct udev_device *dev) { return r; /* Add an additional unit for the device node */ - dn = udev_device_get_devnode(dev); - if (dn) + if (sd_device_get_devname(dev, &dn) >= 0) (void) device_setup_unit(m, dev, dn, false); /* Add additional units for all symlinks */ - first = udev_device_get_devlinks_list_entry(dev); - udev_list_entry_foreach(item, first) { + if (sd_device_get_devnum(dev, &devnum) >= 0) { const char *p; - struct stat st; - /* Don't bother with the /dev/block links */ - p = udev_list_entry_get_name(item); + FOREACH_DEVICE_DEVLINK(dev, p) { + struct stat st; - if (PATH_STARTSWITH_SET(p, "/dev/block/", "/dev/char/")) - continue; + if (PATH_STARTSWITH_SET(p, "/dev/block/", "/dev/char/")) + continue; - /* Verify that the symlink in the FS actually belongs - * to this device. This is useful to deal with - * conflicting devices, e.g. when two disks want the - * same /dev/disk/by-label/xxx link because they have - * the same label. We want to make sure that the same - * device that won the symlink wins in systemd, so we - * check the device node major/minor */ - if (stat(p, &st) >= 0) - if ((!S_ISBLK(st.st_mode) && !S_ISCHR(st.st_mode)) || - st.st_rdev != udev_device_get_devnum(dev)) + /* Verify that the symlink in the FS actually belongs + * to this device. This is useful to deal with + * conflicting devices, e.g. when two disks want the + * same /dev/disk/by-label/xxx link because they have + * the same label. We want to make sure that the same + * device that won the symlink wins in systemd, so we + * check the device node major/minor */ + if (stat(p, &st) >= 0 && + ((!S_ISBLK(st.st_mode) && !S_ISCHR(st.st_mode)) || + st.st_rdev != devnum)) continue; - (void) device_setup_unit(m, dev, p, false); + (void) device_setup_unit(m, dev, p, false); + } } - /* Add additional units for all explicitly configured - * aliases */ - alias = udev_device_get_property_value(dev, "SYSTEMD_ALIAS"); + /* Add additional units for all explicitly configured aliases */ + if (sd_device_get_property_value(dev, "SYSTEMD_ALIAS", &alias) < 0) + return 0; + for (;;) { _cleanup_free_ char *word = NULL; @@ -619,12 +631,12 @@ static int device_process_new(Manager *m, struct udev_device *dev) { if (r == -ENOMEM) return log_oom(); if (r < 0) - return log_warning_errno(r, "Failed to add parse SYSTEMD_ALIAS for %s: %m", sysfs); + return log_device_warning_errno(dev, r, "Failed to add parse SYSTEMD_ALIAS property: %m"); if (!path_is_absolute(word)) - log_warning("SYSTEMD_ALIAS for %s is not an absolute path, ignoring: %s", sysfs, word); + log_device_warning(dev, "SYSTEMD_ALIAS is not an absolute path, ignoring: %s", word); else if (!path_is_normalized(word)) - log_warning("SYSTEMD_ALIAS for %s is not a normalized path, ignoring: %s", sysfs, word); + log_device_warning(dev, "SYSTEMD_ALIAS is not a normalized path, ignoring: %s", word); else (void) device_setup_unit(m, dev, word, false); } @@ -712,13 +724,12 @@ static int device_update_found_by_name(Manager *m, const char *path, DeviceFound return 0; } -static bool device_is_ready(struct udev_device *dev) { +static bool device_is_ready(sd_device *dev) { const char *ready; assert(dev); - ready = udev_device_get_property_value(dev, "SYSTEMD_READY"); - if (!ready) + if (sd_device_get_property_value(dev, "SYSTEMD_READY", &ready) < 0) return true; return parse_boolean(ready) != 0; @@ -734,11 +745,11 @@ static Unit *device_following(Unit *u) { return NULL; /* Make everybody follow the unit that's named after the sysfs path */ - for (other = d->same_sysfs_next; other; other = other->same_sysfs_next) + LIST_FOREACH_AFTER(same_sysfs, other, d) if (startswith(UNIT(other)->id, "sys-")) return UNIT(other); - for (other = d->same_sysfs_prev; other; other = other->same_sysfs_prev) { + LIST_FOREACH_BEFORE(same_sysfs, other, d) { if (startswith(UNIT(other)->id, "sys-")) return UNIT(other); @@ -784,98 +795,71 @@ static int device_following_set(Unit *u, Set **_set) { static void device_shutdown(Manager *m) { assert(m); - m->udev_event_source = sd_event_source_unref(m->udev_event_source); - m->udev_monitor = udev_monitor_unref(m->udev_monitor); + m->device_monitor = sd_device_monitor_unref(m->device_monitor); m->devices_by_sysfs = hashmap_free(m->devices_by_sysfs); } static void device_enumerate(Manager *m) { - _cleanup_(udev_enumerate_unrefp) struct udev_enumerate *e = NULL; - struct udev_list_entry *item = NULL, *first = NULL; + _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL; + sd_device *dev; int r; assert(m); - if (!m->udev_monitor) { - m->udev_monitor = udev_monitor_new_from_netlink(m->udev, "udev"); - if (!m->udev_monitor) { - log_error_errno(errno, "Failed to allocate udev monitor: %m"); + if (!m->device_monitor) { + r = sd_device_monitor_new(&m->device_monitor); + if (r < 0) { + log_error_errno(r, "Failed to allocate device monitor: %m"); goto fail; } /* This will fail if we are unprivileged, but that * should not matter much, as user instances won't run * during boot. */ - (void) udev_monitor_set_receive_buffer_size(m->udev_monitor, 128*1024*1024); + (void) sd_device_monitor_set_receive_buffer_size(m->device_monitor, 128*1024*1024); - r = udev_monitor_filter_add_match_tag(m->udev_monitor, "systemd"); + r = sd_device_monitor_filter_add_match_tag(m->device_monitor, "systemd"); if (r < 0) { log_error_errno(r, "Failed to add udev tag match: %m"); goto fail; } - r = udev_monitor_enable_receiving(m->udev_monitor); + r = sd_device_monitor_attach_event(m->device_monitor, m->event); if (r < 0) { - log_error_errno(r, "Failed to enable udev event reception: %m"); + log_error_errno(r, "Failed to attach event to device monitor: %m"); goto fail; } - r = sd_event_add_io(m->event, &m->udev_event_source, udev_monitor_get_fd(m->udev_monitor), EPOLLIN, device_dispatch_io, m); + r = sd_device_monitor_start(m->device_monitor, device_dispatch_io, m); if (r < 0) { - log_error_errno(r, "Failed to watch udev file descriptor: %m"); + log_error_errno(r, "Failed to start device monitor: %m"); goto fail; } - - (void) sd_event_source_set_description(m->udev_event_source, "device"); - } - - e = udev_enumerate_new(m->udev); - if (!e) { - log_error_errno(errno, "Failed to alloacte udev enumerator: %m"); - goto fail; - } - - r = udev_enumerate_add_match_tag(e, "systemd"); - if (r < 0) { - log_error_errno(r, "Failed to create udev tag enumeration: %m"); - goto fail; } - r = udev_enumerate_add_match_is_initialized(e); + r = sd_device_enumerator_new(&e); if (r < 0) { - log_error_errno(r, "Failed to install initialization match into enumeration: %m"); + log_error_errno(r, "Failed to allocate device enumerator: %m"); goto fail; } - r = udev_enumerate_scan_devices(e); + r = sd_device_enumerator_add_match_tag(e, "systemd"); if (r < 0) { - log_error_errno(r, "Failed to enumerate devices: %m"); + log_error_errno(r, "Failed to set tag for device enumeration: %m"); goto fail; } - first = udev_enumerate_get_list_entry(e); - udev_list_entry_foreach(item, first) { - _cleanup_(udev_device_unrefp) struct udev_device *dev = NULL; + FOREACH_DEVICE(e, dev) { const char *sysfs; - sysfs = udev_list_entry_get_name(item); - - dev = udev_device_new_from_syspath(m->udev, sysfs); - if (!dev) { - if (errno == ENOMEM) { - log_oom(); - goto fail; - } - - /* If we can't create a device, don't bother, it probably just disappeared. */ - log_debug_errno(errno, "Failed to create udev device object for %s: %m", sysfs); - continue; - } - if (!device_is_ready(dev)) continue; (void) device_process_new(m, dev); + + if (sd_device_get_syspath(dev, &sysfs) < 0) + continue; + device_update_found_by_sysfs(m, sysfs, DEVICE_FOUND_UDEV, DEVICE_FOUND_UDEV); } @@ -903,40 +887,23 @@ static void device_propagate_reload_by_sysfs(Manager *m, const char *sysfs) { } } -static int device_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) { - _cleanup_(udev_device_unrefp) struct udev_device *dev = NULL; +static int device_dispatch_io(sd_device_monitor *monitor, sd_device *dev, void *userdata) { Manager *m = userdata; const char *action, *sysfs; int r; assert(m); + assert(dev); - if (revents != EPOLLIN) { - static RATELIMIT_DEFINE(limit, 10*USEC_PER_SEC, 5); - - if (ratelimit_below(&limit)) - log_warning("Failed to get udev event"); - if (!(revents & EPOLLIN)) - return 0; - } - - /* - * libudev might filter-out devices which pass the bloom - * filter, so getting NULL here is not necessarily an error. - */ - dev = udev_monitor_receive_device(m->udev_monitor); - if (!dev) - return 0; - - sysfs = udev_device_get_syspath(dev); - if (!sysfs) { - log_error("Failed to get udev sys path."); + r = sd_device_get_syspath(dev, &sysfs); + if (r < 0) { + log_device_error_errno(dev, r, "Failed to get device sys path: %m"); return 0; } - action = udev_device_get_action(dev); - if (!action) { - log_error("Failed to get udev action string."); + r = sd_device_get_property_value(dev, "ACTION", &action); + if (r < 0) { + log_device_error_errno(dev, r, "Failed to get udev action string: %m"); return 0; } @@ -949,7 +916,7 @@ static int device_dispatch_io(sd_event_source *source, int fd, uint32_t revents, if (streq(action, "remove")) { r = swap_process_device_remove(m, dev); if (r < 0) - log_warning_errno(r, "Failed to process swap device remove event, ignoring: %m"); + log_device_warning_errno(dev, r, "Failed to process swap device remove event, ignoring: %m"); /* If we get notified that a device was removed by * udev, then it's completely gone, hence unset all @@ -962,7 +929,7 @@ static int device_dispatch_io(sd_event_source *source, int fd, uint32_t revents, r = swap_process_device_new(m, dev); if (r < 0) - log_warning_errno(r, "Failed to process swap device new event, ignoring: %m"); + log_device_warning_errno(dev, r, "Failed to process swap device new event, ignoring: %m"); manager_dispatch_load_queue(m); @@ -992,7 +959,7 @@ static bool device_supported(void) { return read_only <= 0; } -static int validate_node(Manager *m, const char *node, struct udev_device **ret) { +static int validate_node(Manager *m, const char *node, sd_device **ret) { struct stat st; int r; @@ -1016,9 +983,9 @@ static int validate_node(Manager *m, const char *node, struct udev_device **ret) return 1; /* good! (though missing) */ } else { - _cleanup_(udev_device_unrefp) struct udev_device *dev = NULL; + _cleanup_(sd_device_unrefp) sd_device *dev = NULL; - r = udev_device_new_from_stat_rdev(m->udev, &st, &dev); + r = device_new_from_stat_rdev(&dev, &st); if (r == -ENOENT) { *ret = NULL; return 1; /* good! (though missing) */ @@ -1054,7 +1021,7 @@ void device_found_node(Manager *m, const char *node, DeviceFound found, DeviceFo * and unset individual bits in a single call, while merging partially with previous state. */ if ((found & mask) != 0) { - _cleanup_(udev_device_unrefp) struct udev_device *dev = NULL; + _cleanup_(sd_device_unrefp) sd_device *dev = NULL; /* If the device is known in the kernel and newly appeared, then we'll create a device unit for it, * under the name referenced in /proc/swaps or /proc/self/mountinfo. But first, let's validate if @@ -1092,7 +1059,7 @@ const UnitVTable device_vtable = { .init = device_init, .done = device_done, - .load = unit_load_fragment_and_dropin_optional, + .load = device_load, .coldplug = device_coldplug, .catchup = device_catchup, diff --git a/src/core/device.h b/src/core/device.h index a119b33e57..3062be782d 100644 --- a/src/core/device.h +++ b/src/core/device.h @@ -11,9 +11,9 @@ typedef struct Device Device; * in quick succession). Hence we need to track precisely where it is already visible and where not. */ typedef enum DeviceFound { DEVICE_NOT_FOUND = 0, - DEVICE_FOUND_UDEV = 1U << 1, /* The device has shown up in the udev database */ - DEVICE_FOUND_MOUNT = 1U << 2, /* The device has shown up in /proc/self/mountinfo */ - DEVICE_FOUND_SWAP = 1U << 3, /* The device has shown up in /proc/swaps */ + DEVICE_FOUND_UDEV = 1 << 0, /* The device has shown up in the udev database */ + DEVICE_FOUND_MOUNT = 1 << 1, /* The device has shown up in /proc/self/mountinfo */ + DEVICE_FOUND_SWAP = 1 << 2, /* The device has shown up in /proc/swaps */ DEVICE_FOUND_MASK = DEVICE_FOUND_UDEV|DEVICE_FOUND_MOUNT|DEVICE_FOUND_SWAP, } DeviceFound; diff --git a/src/core/dynamic-user.c b/src/core/dynamic-user.c index 7c5111ddf6..089461a18a 100644 --- a/src/core/dynamic-user.c +++ b/src/core/dynamic-user.c @@ -10,8 +10,10 @@ #include "fileio.h" #include "fs-util.h" #include "io-util.h" +#include "nscd-flush.h" #include "parse-util.h" #include "random-util.h" +#include "serialize.h" #include "socket-util.h" #include "stdio-util.h" #include "string-util.h" @@ -20,6 +22,8 @@ /* Takes a value generated randomly or by hashing and turns it into a UID in the right range */ #define UID_CLAMP_INTO_RANGE(rnd) (((uid_t) (rnd) % (DYNAMIC_UID_MAX - DYNAMIC_UID_MIN + 1)) + DYNAMIC_UID_MIN) +DEFINE_PRIVATE_TRIVIAL_REF_FUNC(DynamicUser, dynamic_user); + static DynamicUser* dynamic_user_free(DynamicUser *d) { if (!d) return NULL; @@ -32,7 +36,7 @@ static DynamicUser* dynamic_user_free(DynamicUser *d) { } static int dynamic_user_add(Manager *m, const char *name, int storage_socket[2], DynamicUser **ret) { - DynamicUser *d = NULL; + DynamicUser *d; int r; assert(m); @@ -102,9 +106,11 @@ static int dynamic_user_acquire(Manager *m, const char *name, DynamicUser** ret) d = hashmap_get(m->dynamic_users, name); if (d) { - /* We already have a structure for the dynamic user, let's increase the ref count and reuse it */ - d->n_ref++; - *ret = d; + if (ret) { + /* We already have a structure for the dynamic user, let's increase the ref count and reuse it */ + d->n_ref++; + *ret = d; + } return 0; } @@ -173,7 +179,7 @@ static int pick_uid(char **suggested_paths, const char *name, uid_t *ret_uid) { * * 1. Initially, we try to read the UID of a number of specified paths. If any of these UIDs works, we use * them. We use in order to increase the chance of UID reuse, if StateDirectory=, CacheDirectory= or - * LogDirectory= are used, as reusing the UID these directories are owned by saves us from having to + * LogsDirectory= are used, as reusing the UID these directories are owned by saves us from having to * recursively chown() them to new users. * * 2. If that didn't yield a currently unused UID, we hash the user name, and try to use that. This should be @@ -312,20 +318,8 @@ static int pick_uid(char **suggested_paths, const char *name, uid_t *ret_uid) { static int dynamic_user_pop(DynamicUser *d, uid_t *ret_uid, int *ret_lock_fd) { uid_t uid = UID_INVALID; struct iovec iov = IOVEC_INIT(&uid, sizeof(uid)); - union { - struct cmsghdr cmsghdr; - uint8_t buf[CMSG_SPACE(sizeof(int))]; - } control = {}; - struct msghdr mh = { - .msg_control = &control, - .msg_controllen = sizeof(control), - .msg_iov = &iov, - .msg_iovlen = 1, - }; - struct cmsghdr *cmsg; - + int lock_fd; ssize_t k; - int lock_fd = -1; assert(d); assert(ret_uid); @@ -334,15 +328,9 @@ static int dynamic_user_pop(DynamicUser *d, uid_t *ret_uid, int *ret_lock_fd) { /* Read the UID and lock fd that is stored in the storage AF_UNIX socket. This should be called with the lock * on the socket taken. */ - k = recvmsg(d->storage_socket[0], &mh, MSG_DONTWAIT|MSG_CMSG_CLOEXEC); + k = receive_one_fd_iov(d->storage_socket[0], &iov, 1, MSG_DONTWAIT, &lock_fd); if (k < 0) - return -errno; - - cmsg = cmsg_find(&mh, SOL_SOCKET, SCM_RIGHTS, CMSG_LEN(sizeof(int))); - if (cmsg) - lock_fd = *(int*) CMSG_DATA(cmsg); - else - cmsg_close_all(&mh); /* just in case... */ + return (int) k; *ret_uid = uid; *ret_lock_fd = lock_fd; @@ -352,42 +340,11 @@ static int dynamic_user_pop(DynamicUser *d, uid_t *ret_uid, int *ret_lock_fd) { static int dynamic_user_push(DynamicUser *d, uid_t uid, int lock_fd) { struct iovec iov = IOVEC_INIT(&uid, sizeof(uid)); - union { - struct cmsghdr cmsghdr; - uint8_t buf[CMSG_SPACE(sizeof(int))]; - } control = {}; - struct msghdr mh = { - .msg_control = &control, - .msg_controllen = sizeof(control), - .msg_iov = &iov, - .msg_iovlen = 1, - }; - ssize_t k; assert(d); /* Store the UID and lock_fd in the storage socket. This should be called with the socket pair lock taken. */ - - if (lock_fd >= 0) { - struct cmsghdr *cmsg; - - cmsg = CMSG_FIRSTHDR(&mh); - cmsg->cmsg_level = SOL_SOCKET; - cmsg->cmsg_type = SCM_RIGHTS; - cmsg->cmsg_len = CMSG_LEN(sizeof(int)); - memcpy(CMSG_DATA(cmsg), &lock_fd, sizeof(int)); - - mh.msg_controllen = CMSG_SPACE(sizeof(int)); - } else { - mh.msg_control = NULL; - mh.msg_controllen = 0; - } - - k = sendmsg(d->storage_socket[1], &mh, MSG_DONTWAIT|MSG_NOSIGNAL); - if (k < 0) - return -errno; - - return 0; + return send_one_fd_iov(d->storage_socket[1], lock_fd, &iov, 1, MSG_DONTWAIT); } static void unlink_uid_lock(int lock_fd, uid_t uid, const char *name) { @@ -427,6 +384,7 @@ static int dynamic_user_realize( _cleanup_close_ int etc_passwd_lock_fd = -1; uid_t num = UID_INVALID; /* a uid if is_user, and a gid otherwise */ gid_t gid = GID_INVALID; /* a gid if is_user, ignored otherwise */ + bool flush_cache = false; int r; assert(d); @@ -515,6 +473,7 @@ static int dynamic_user_realize( } /* Great! Nothing is stored here, still. Store our newly acquired data. */ + flush_cache = true; } else { /* Hmm, so as it appears there's now something stored in the storage socket. Throw away what we * acquired, and use what's stored now. */ @@ -525,6 +484,16 @@ static int dynamic_user_realize( num = new_uid; uid_lock_fd = new_uid_lock_fd; } + } else if (is_user && !uid_is_dynamic(num)) { + struct passwd *p; + + /* Statically allocated user may have different uid and gid. So, let's obtain the gid. */ + errno = 0; + p = getpwuid(num); + if (!p) + return errno > 0 ? -errno : -ESRCH; + + gid = p->pw_gid; } /* If the UID/GID was already allocated dynamically, push the data we popped out back in. If it was already @@ -534,6 +503,14 @@ static int dynamic_user_realize( if (r < 0) return r; + if (flush_cache) { + /* If we allocated a new dynamic UID, refresh nscd, so that it forgets about potentially cached + * negative entries. But let's do so after we release the /etc/passwd lock, so that there's no + * potential for nscd wanting to lock that for completing the invalidation. */ + etc_passwd_lock_fd = safe_close(etc_passwd_lock_fd); + (void) nscd_flush_cache(STRV_MAKE("passwd", "group")); + } + if (is_user) { *ret_uid = num; *ret_gid = gid != GID_INVALID ? gid : num; @@ -570,16 +547,6 @@ int dynamic_user_current(DynamicUser *d, uid_t *ret) { return 0; } -static DynamicUser* dynamic_user_ref(DynamicUser *d) { - if (!d) - return NULL; - - assert(d->n_ref > 0); - d->n_ref++; - - return d; -} - static DynamicUser* dynamic_user_unref(DynamicUser *d) { if (!d) return NULL; @@ -616,6 +583,8 @@ static int dynamic_user_close(DynamicUser *d) { /* This dynamic user was realized and dynamically allocated. In this case, let's remove the lock file. */ unlink_uid_lock(lock_fd, uid, d->name); + + (void) nscd_flush_cache(STRV_MAKE("passwd", "group")); return 1; } @@ -652,13 +621,13 @@ int dynamic_user_serialize(Manager *m, FILE *f, FDSet *fds) { copy0 = fdset_put_dup(fds, d->storage_socket[0]); if (copy0 < 0) - return copy0; + return log_error_errno(copy0, "Failed to add dynamic user storage fd to serialization: %m"); copy1 = fdset_put_dup(fds, d->storage_socket[1]); if (copy1 < 0) - return copy1; + return log_error_errno(copy1, "Failed to add dynamic user storage fd to serialization: %m"); - fprintf(f, "dynamic-user=%s %i %i\n", d->name, copy0, copy1); + (void) serialize_item_format(f, "dynamic-user", "%s %i %i", d->name, copy0, copy1); } return 0; diff --git a/src/core/dynamic-user.h b/src/core/dynamic-user.h index 791a8ba0ef..112f91e63a 100644 --- a/src/core/dynamic-user.h +++ b/src/core/dynamic-user.h @@ -15,7 +15,7 @@ typedef struct DynamicCreds { * used. This means, if you want to allocate a group and user pair, and they might have two different names, then you * need to allocated two of these objects. DynamicCreds below makes that easy. */ struct DynamicUser { - int n_ref; + unsigned n_ref; Manager *manager; /* An AF_UNIX socket pair that contains a datagram containing both the numeric ID assigned, as well as a lock diff --git a/src/core/emergency-action.c b/src/core/emergency-action.c index 76e1124cff..f98b0de792 100644 --- a/src/core/emergency-action.c +++ b/src/core/emergency-action.c @@ -1,7 +1,4 @@ /* SPDX-License-Identifier: LGPL-2.1+ */ -/*** - Copyright © 2012 Michael Olbrich -***/ #include <sys/reboot.h> @@ -13,18 +10,22 @@ #include "special.h" #include "string-table.h" #include "terminal-util.h" - -static void log_and_status(Manager *m, const char *message, const char *reason) { - log_warning("%s: %s", message, reason); - manager_status_printf(m, STATUS_TYPE_EMERGENCY, - ANSI_HIGHLIGHT_RED " !! " ANSI_NORMAL, - "%s: %s", message, reason); +#include "virt.h" + +static void log_and_status(Manager *m, bool warn, const char *message, const char *reason) { + log_full(warn ? LOG_WARNING : LOG_DEBUG, "%s: %s", message, reason); + if (warn) + manager_status_printf(m, STATUS_TYPE_EMERGENCY, + ANSI_HIGHLIGHT_RED " !! " ANSI_NORMAL, + "%s: %s", message, reason); } int emergency_action( Manager *m, EmergencyAction action, + EmergencyActionFlags options, const char *reboot_arg, + int exit_status, const char *reason) { assert(m); @@ -34,24 +35,17 @@ int emergency_action( if (action == EMERGENCY_ACTION_NONE) return -ECANCELED; - if (!m->service_watchdogs) { + if (FLAGS_SET(options, EMERGENCY_ACTION_IS_WATCHDOG) && !m->service_watchdogs) { log_warning("Watchdog disabled! Not acting on: %s", reason); return -ECANCELED; } - if (!MANAGER_IS_SYSTEM(m)) { - /* Downgrade all options to simply exiting if we run - * in user mode */ - - log_warning("Exiting: %s", reason); - m->exit_code = MANAGER_EXIT; - return -ECANCELED; - } + bool warn = FLAGS_SET(options, EMERGENCY_ACTION_WARN); switch (action) { case EMERGENCY_ACTION_REBOOT: - log_and_status(m, "Rebooting", reason); + log_and_status(m, warn, "Rebooting", reason); (void) update_reboot_parameter_and_warn(reboot_arg); (void) manager_add_job_by_name_and_warn(m, JOB_START, SPECIAL_REBOOT_TARGET, JOB_REPLACE_IRREVERSIBLY, NULL); @@ -59,15 +53,15 @@ int emergency_action( break; case EMERGENCY_ACTION_REBOOT_FORCE: - log_and_status(m, "Forcibly rebooting", reason); + log_and_status(m, warn, "Forcibly rebooting", reason); (void) update_reboot_parameter_and_warn(reboot_arg); - m->exit_code = MANAGER_REBOOT; + m->objective = MANAGER_REBOOT; break; case EMERGENCY_ACTION_REBOOT_IMMEDIATE: - log_and_status(m, "Rebooting immediately", reason); + log_and_status(m, warn, "Rebooting immediately", reason); sync(); @@ -81,18 +75,46 @@ int emergency_action( (void) reboot(RB_AUTOBOOT); break; + case EMERGENCY_ACTION_EXIT: + + if (exit_status >= 0) + m->return_value = exit_status; + + if (MANAGER_IS_USER(m) || detect_container() > 0) { + log_and_status(m, warn, "Exiting", reason); + (void) manager_add_job_by_name_and_warn(m, JOB_START, SPECIAL_EXIT_TARGET, JOB_REPLACE_IRREVERSIBLY, NULL); + break; + } + + log_notice("Doing \"poweroff\" action instead of an \"exit\" emergency action."); + _fallthrough_; + case EMERGENCY_ACTION_POWEROFF: - log_and_status(m, "Powering off", reason); + log_and_status(m, warn, "Powering off", reason); (void) manager_add_job_by_name_and_warn(m, JOB_START, SPECIAL_POWEROFF_TARGET, JOB_REPLACE_IRREVERSIBLY, NULL); break; + case EMERGENCY_ACTION_EXIT_FORCE: + + if (exit_status >= 0) + m->return_value = exit_status; + + if (MANAGER_IS_USER(m) || detect_container() > 0) { + log_and_status(m, warn, "Exiting immediately", reason); + m->objective = MANAGER_EXIT; + break; + } + + log_notice("Doing \"poweroff-force\" action instead of an \"exit-force\" emergency action."); + _fallthrough_; + case EMERGENCY_ACTION_POWEROFF_FORCE: - log_and_status(m, "Forcibly powering off", reason); - m->exit_code = MANAGER_POWEROFF; + log_and_status(m, warn, "Forcibly powering off", reason); + m->objective = MANAGER_POWEROFF; break; case EMERGENCY_ACTION_POWEROFF_IMMEDIATE: - log_and_status(m, "Powering off immediately", reason); + log_and_status(m, warn, "Powering off immediately", reason); sync(); @@ -114,6 +136,26 @@ static const char* const emergency_action_table[_EMERGENCY_ACTION_MAX] = { [EMERGENCY_ACTION_REBOOT_IMMEDIATE] = "reboot-immediate", [EMERGENCY_ACTION_POWEROFF] = "poweroff", [EMERGENCY_ACTION_POWEROFF_FORCE] = "poweroff-force", - [EMERGENCY_ACTION_POWEROFF_IMMEDIATE] = "poweroff-immediate" + [EMERGENCY_ACTION_POWEROFF_IMMEDIATE] = "poweroff-immediate", + [EMERGENCY_ACTION_EXIT] = "exit", + [EMERGENCY_ACTION_EXIT_FORCE] = "exit-force", }; DEFINE_STRING_TABLE_LOOKUP(emergency_action, EmergencyAction); + +int parse_emergency_action( + const char *value, + bool system, + EmergencyAction *ret) { + + EmergencyAction x; + + x = emergency_action_from_string(value); + if (x < 0) + return -EINVAL; + + if (!system && x != EMERGENCY_ACTION_NONE && x < _EMERGENCY_ACTION_FIRST_USER_ACTION) + return -EOPNOTSUPP; + + *ret = x; + return 0; +} diff --git a/src/core/emergency-action.h b/src/core/emergency-action.h index 61791f176f..6e6c69ddfc 100644 --- a/src/core/emergency-action.h +++ b/src/core/emergency-action.h @@ -1,10 +1,6 @@ /* SPDX-License-Identifier: LGPL-2.1+ */ #pragma once -/*** - Copyright © 2012 Michael Olbrich -***/ - typedef enum EmergencyAction { EMERGENCY_ACTION_NONE, EMERGENCY_ACTION_REBOOT, @@ -13,14 +9,26 @@ typedef enum EmergencyAction { EMERGENCY_ACTION_POWEROFF, EMERGENCY_ACTION_POWEROFF_FORCE, EMERGENCY_ACTION_POWEROFF_IMMEDIATE, + EMERGENCY_ACTION_EXIT, + _EMERGENCY_ACTION_FIRST_USER_ACTION = EMERGENCY_ACTION_EXIT, + EMERGENCY_ACTION_EXIT_FORCE, _EMERGENCY_ACTION_MAX, _EMERGENCY_ACTION_INVALID = -1 } EmergencyAction; +typedef enum EmergencyActionFlags { + EMERGENCY_ACTION_IS_WATCHDOG = 1 << 0, + EMERGENCY_ACTION_WARN = 1 << 1, +} EmergencyActionFlags; + #include "macro.h" #include "manager.h" -int emergency_action(Manager *m, EmergencyAction action, const char *reboot_arg, const char *reason); +int emergency_action(Manager *m, + EmergencyAction action, EmergencyActionFlags options, + const char *reboot_arg, int exit_status, const char *reason); const char* emergency_action_to_string(EmergencyAction i) _const_; EmergencyAction emergency_action_from_string(const char *s) _pure_; + +int parse_emergency_action(const char *value, bool system, EmergencyAction *ret); diff --git a/src/core/execute.c b/src/core/execute.c index 8ac69d1a0f..595a3c6eca 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -50,12 +50,12 @@ #include "chown-recursive.h" #include "cpu-set-util.h" #include "def.h" +#include "env-file.h" #include "env-util.h" #include "errno-list.h" #include "execute.h" #include "exit-status.h" #include "fd-util.h" -#include "fileio.h" #include "format-util.h" #include "fs-util.h" #include "glob-util.h" @@ -76,7 +76,6 @@ #if HAVE_SECCOMP #include "seccomp-util.h" #endif -#include "securebits.h" #include "securebits-util.h" #include "selinux-util.h" #include "signal-util.h" @@ -89,6 +88,7 @@ #include "strv.h" #include "syslog-util.h" #include "terminal-util.h" +#include "umask-util.h" #include "unit.h" #include "user-util.h" #include "util.h" @@ -147,11 +147,11 @@ static int shift_fds(int fds[], size_t n_fds) { return 0; } -static int flags_fds(const int fds[], size_t n_storage_fds, size_t n_socket_fds, bool nonblock) { +static int flags_fds(const int fds[], size_t n_socket_fds, size_t n_storage_fds, bool nonblock) { size_t i, n_fds; int r; - n_fds = n_storage_fds + n_socket_fds; + n_fds = n_socket_fds + n_storage_fds; if (n_fds <= 0) return 0; @@ -323,7 +323,8 @@ static int connect_logger_as( uid_t uid, gid_t gid) { - int fd, r; + _cleanup_close_ int fd = -1; + int r; assert(context); assert(params); @@ -339,14 +340,12 @@ static int connect_logger_as( if (r < 0) return r; - if (shutdown(fd, SHUT_RD) < 0) { - safe_close(fd); + if (shutdown(fd, SHUT_RD) < 0) return -errno; - } (void) fd_inc_sndbuf(fd, SNDBUF_SIZE); - dprintf(fd, + if (dprintf(fd, "%s\n" "%s\n" "%i\n" @@ -360,10 +359,12 @@ static int connect_logger_as( !!context->syslog_level_prefix, is_syslog_output(output), is_kmsg_output(output), - is_terminal_output(output)); + is_terminal_output(output)) < 0) + return -errno; - return move_fd(fd, nfd, false); + return move_fd(TAKE_FD(fd), nfd, false); } + static int open_terminal_as(const char *path, int flags, int nfd) { int fd; @@ -378,10 +379,9 @@ static int open_terminal_as(const char *path, int flags, int nfd) { } static int acquire_path(const char *path, int flags, mode_t mode) { - union sockaddr_union sa = { - .sa.sa_family = AF_UNIX, - }; - int fd, r; + union sockaddr_union sa = {}; + _cleanup_close_ int fd = -1; + int r, salen; assert(path); @@ -390,11 +390,11 @@ static int acquire_path(const char *path, int flags, mode_t mode) { fd = open(path, flags|O_NOCTTY, mode); if (fd >= 0) - return fd; + return TAKE_FD(fd); if (errno != ENXIO) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */ return -errno; - if (strlen(path) > sizeof(sa.un.sun_path)) /* Too long, can't be a UNIX socket */ + if (strlen(path) >= sizeof(sa.un.sun_path)) /* Too long, can't be a UNIX socket */ return -ENXIO; /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */ @@ -403,25 +403,24 @@ static int acquire_path(const char *path, int flags, mode_t mode) { if (fd < 0) return -errno; - strncpy(sa.un.sun_path, path, sizeof(sa.un.sun_path)); - if (connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) { - safe_close(fd); + salen = sockaddr_un_set_path(&sa.un, path); + if (salen < 0) + return salen; + + if (connect(fd, &sa.sa, salen) < 0) return errno == EINVAL ? -ENXIO : -errno; /* Propagate initial error if we get EINVAL, i.e. we have * indication that his wasn't an AF_UNIX socket after all */ - } if ((flags & O_ACCMODE) == O_RDONLY) r = shutdown(fd, SHUT_WR); else if ((flags & O_ACCMODE) == O_WRONLY) r = shutdown(fd, SHUT_RD); else - return fd; - if (r < 0) { - safe_close(fd); + return TAKE_FD(fd); + if (r < 0) return -errno; - } - return fd; + return TAKE_FD(fd); } static int fixup_input( @@ -544,6 +543,30 @@ static int setup_input( } } +static bool can_inherit_stderr_from_stdout( + const ExecContext *context, + ExecOutput o, + ExecOutput e) { + + assert(context); + + /* Returns true, if given the specified STDERR and STDOUT output we can directly dup() the stdout fd to the + * stderr fd */ + + if (e == EXEC_OUTPUT_INHERIT) + return true; + if (e != o) + return false; + + if (e == EXEC_OUTPUT_NAMED_FD) + return streq_ptr(context->stdio_fdname[STDOUT_FILENO], context->stdio_fdname[STDERR_FILENO]); + + if (IN_SET(e, EXEC_OUTPUT_FILE, EXEC_OUTPUT_FILE_APPEND)) + return streq_ptr(context->stdio_file[STDOUT_FILENO], context->stdio_file[STDERR_FILENO]); + + return true; +} + static int setup_output( const Unit *unit, const ExecContext *context, @@ -602,7 +625,7 @@ static int setup_output( return fileno; /* Duplicate from stdout if possible */ - if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT) + if (can_inherit_stderr_from_stdout(context, o, e)) return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno; o = e; @@ -675,9 +698,10 @@ static int setup_output( (void) fd_nonblock(named_iofds[fileno], false); return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno; - case EXEC_OUTPUT_FILE: { + case EXEC_OUTPUT_FILE: + case EXEC_OUTPUT_FILE_APPEND: { bool rw; - int fd; + int fd, flags; assert(context->stdio_file[fileno]); @@ -687,11 +711,15 @@ static int setup_output( if (rw) return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno; - fd = acquire_path(context->stdio_file[fileno], O_WRONLY, 0666 & ~context->umask); + flags = O_WRONLY; + if (o == EXEC_OUTPUT_FILE_APPEND) + flags |= O_APPEND; + + fd = acquire_path(context->stdio_file[fileno], flags, 0666 & ~context->umask); if (fd < 0) return fd; - return move_fd(fd, fileno, false); + return move_fd(fd, fileno, 0); } default: @@ -914,7 +942,7 @@ static int get_fixed_user(const ExecContext *c, const char **user, * (i.e. are "/" or "/bin/nologin"). */ name = c->user; - r = get_user_creds_clean(&name, uid, gid, home, shell); + r = get_user_creds(&name, uid, gid, home, shell, USER_CREDS_CLEAN); if (r < 0) return r; @@ -932,7 +960,7 @@ static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) return 0; name = c->group; - r = get_group_creds(&name, gid); + r = get_group_creds(&name, gid, 0); if (r < 0) return r; @@ -1004,7 +1032,7 @@ static int get_supplementary_groups(const ExecContext *c, const char *user, return -E2BIG; g = *i; - r = get_group_creds(&g, l_gids+k); + r = get_group_creds(&g, l_gids+k, 0); if (r < 0) return r; @@ -1151,6 +1179,16 @@ static int setup_pam( goto fail; } + if (!tty) { + _cleanup_free_ char *q = NULL; + + /* Hmm, so no TTY was explicitly passed, but an fd passed to us directly might be a TTY. Let's figure + * out if that's the case, and read the TTY off it. */ + + if (getttyname_malloc(STDIN_FILENO, &q) >= 0) + tty = strjoina("/dev/", q); + } + if (tty) { pam_code = pam_set_item(handle, PAM_TTY, tty); if (pam_code != PAM_SUCCESS) @@ -1415,7 +1453,7 @@ static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ return r; } - return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action); + return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action, false); } static int apply_syscall_archs(const Unit *u, const ExecContext *c) { @@ -1498,7 +1536,7 @@ static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) { if (skip_seccomp_unavailable(u, "ProtectKernelModules=")) return 0; - return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM)); + return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM), false); } static int apply_private_devices(const Unit *u, const ExecContext *c) { @@ -1513,7 +1551,7 @@ static int apply_private_devices(const Unit *u, const ExecContext *c) { if (skip_seccomp_unavailable(u, "PrivateDevices=")) return 0; - return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM)); + return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM), false); } static int apply_restrict_namespaces(const Unit *u, const ExecContext *c) { @@ -1585,6 +1623,8 @@ static void do_idle_pipe_dance(int idle_pipe[4]) { idle_pipe[3] = safe_close(idle_pipe[3]); } +static const char *exec_directory_env_name_to_string(ExecDirectoryType t); + static int build_environment( const Unit *u, const ExecContext *c, @@ -1598,14 +1638,16 @@ static int build_environment( char ***ret) { _cleanup_strv_free_ char **our_env = NULL; + ExecDirectoryType t; size_t n_env = 0; char *x; assert(u); assert(c); + assert(p); assert(ret); - our_env = new0(char*, 14); + our_env = new0(char*, 14 + _EXEC_DIRECTORY_TYPE_MAX); if (!our_env) return -ENOMEM; @@ -1710,8 +1752,37 @@ static int build_environment( our_env[n_env++] = x; } + for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) { + _cleanup_free_ char *pre = NULL, *joined = NULL; + const char *n; + + if (!p->prefix[t]) + continue; + + if (strv_isempty(c->directories[t].paths)) + continue; + + n = exec_directory_env_name_to_string(t); + if (!n) + continue; + + pre = strjoin(p->prefix[t], "/"); + if (!pre) + return -ENOMEM; + + joined = strv_join_prefix(c->directories[t].paths, ":", pre); + if (!joined) + return -ENOMEM; + + x = strjoin(n, "=", joined); + if (!x) + return -ENOMEM; + + our_env[n_env++] = x; + } + our_env[n_env++] = NULL; - assert(n_env <= 12); + assert(n_env <= 14 + _EXEC_DIRECTORY_TYPE_MAX); *ret = TAKE_PTR(our_env); @@ -2010,7 +2081,7 @@ static int setup_exec_directory( if (context->dynamic_user && !IN_SET(type, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION)) { - _cleanup_free_ char *private_root = NULL, *relative = NULL, *parent = NULL; + _cleanup_free_ char *private_root = NULL; /* So, here's one extra complication when dealing with DynamicUser=1 units. In that case we * want to avoid leaving a directory around fully accessible that is owned by a dynamic user @@ -2075,18 +2146,8 @@ static int setup_exec_directory( goto fail; } - parent = dirname_malloc(p); - if (!parent) { - r = -ENOMEM; - goto fail; - } - - r = path_make_relative(parent, pp, &relative); - if (r < 0) - goto fail; - /* And link it up from the original place */ - r = symlink_idempotent(relative, p); + r = symlink_idempotent(pp, p, true); if (r < 0) goto fail; @@ -2379,11 +2440,24 @@ static int apply_mount_namespace( bind_mount_free_many(bind_mounts, n_bind_mounts); - /* If we couldn't set up the namespace this is probably due to a - * missing capability. In this case, silently proceeed. */ - if (IN_SET(r, -EPERM, -EACCES)) { - log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m"); - return 0; + /* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports + * that with a special, recognizable error ENOANO. In this case, silently proceeed, but only if exclusively + * sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a + * completely different execution environment. */ + if (r == -ENOANO) { + if (n_bind_mounts == 0 && + context->n_temporary_filesystems == 0 && + !root_dir && !root_image && + !context->dynamic_user) { + log_unit_debug(u, "Failed to set up namespace, assuming containerized execution and ignoring."); + return 0; + } + + log_unit_debug(u, "Failed to set up namespace, and refusing to continue since the selected namespacing options alter mount environment non-trivially.\n" + "Bind mounts: %zu, temporary filesystems: %zu, root directory: %s, root image: %s, dynamic user: %s", + n_bind_mounts, context->n_temporary_filesystems, yes_no(root_dir), yes_no(root_image), yes_no(context->dynamic_user)); + + return -EOPNOTSUPP; } return r; @@ -2456,9 +2530,6 @@ static int setup_keyring( * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */ - if (!(p->flags & EXEC_NEW_KEYRING)) - return 0; - if (context->keyring_mode == EXEC_KEYRING_INHERIT) return 0; @@ -2566,6 +2637,7 @@ static int close_remaining_fds( const DynamicCreds *dcreds, int user_lookup_fd, int socket_fd, + int exec_fd, int *fds, size_t n_fds) { size_t n_dont_close = 0; @@ -2582,6 +2654,8 @@ static int close_remaining_fds( if (socket_fd >= 0) dont_close[n_dont_close++] = socket_fd; + if (exec_fd >= 0) + dont_close[n_dont_close++] = exec_fd; if (n_fds > 0) { memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds); n_dont_close += n_fds; @@ -2707,6 +2781,37 @@ static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p static char *exec_command_line(char **argv); +static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **ret) { + bool using_subcgroup; + char *p; + + assert(params); + assert(ret); + + if (!params->cgroup_path) + return -EINVAL; + + /* If we are called for a unit where cgroup delegation is on, and the payload created its own populated + * subcgroup (which we expect it to do, after all it asked for delegation), then we cannot place the control + * processes started after the main unit's process in the unit's main cgroup because it is now an inner one, + * and inner cgroups may not contain processes. Hence, if delegation is on, and this is a control process, + * let's use ".control" as subcgroup instead. Note that we do so only for ExecStartPost=, ExecReload=, + * ExecStop=, ExecStopPost=, i.e. for the commands where the main process is already forked. For ExecStartPre= + * this is not necessary, the cgroup is still empty. We distinguish these cases with the EXEC_CONTROL_CGROUP + * flag, which is only passed for the former statements, not for the latter. */ + + using_subcgroup = FLAGS_SET(params->flags, EXEC_CONTROL_CGROUP|EXEC_CGROUP_DELEGATE|EXEC_IS_CONTROL); + if (using_subcgroup) + p = strjoin(params->cgroup_path, "/.control"); + else + p = strdup(params->cgroup_path); + if (!p) + return -ENOMEM; + + *ret = p; + return using_subcgroup; +} + static int exec_child( Unit *unit, const ExecCommand *command, @@ -2714,20 +2819,20 @@ static int exec_child( const ExecParameters *params, ExecRuntime *runtime, DynamicCreds *dcreds, - char **argv, int socket_fd, int named_iofds[3], int *fds, - size_t n_storage_fds, size_t n_socket_fds, + size_t n_storage_fds, char **files_env, int user_lookup_fd, int *exit_status) { _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL; - _cleanup_free_ char *home_buffer = NULL; + int *fds_with_exec_fd, n_fds_with_exec_fd, r, ngids = 0, exec_fd = -1; _cleanup_free_ gid_t *supplementary_gids = NULL; const char *username = NULL, *groupname = NULL; + _cleanup_free_ char *home_buffer = NULL; const char *home = NULL, *shell = NULL; dev_t journal_stream_dev = 0; ino_t journal_stream_ino = 0; @@ -2747,7 +2852,6 @@ static int exec_child( #endif uid_t uid = UID_INVALID; gid_t gid = GID_INVALID; - int r, ngids = 0; size_t n_fds; ExecDirectoryType dt; int secure_bits; @@ -2791,8 +2895,8 @@ static int exec_child( /* In case anything used libc syslog(), close this here, too */ closelog(); - n_fds = n_storage_fds + n_socket_fds; - r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds); + n_fds = n_socket_fds + n_storage_fds; + r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, params->exec_fd, fds, n_fds); if (r < 0) { *exit_status = EXIT_FDS; return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m"); @@ -2810,7 +2914,7 @@ static int exec_child( const char *vc = params->confirm_spawn; _cleanup_free_ char *cmdline = NULL; - cmdline = exec_command_line(argv); + cmdline = exec_command_line(command->argv); if (!cmdline) { *exit_status = EXIT_MEMORY; return log_oom(); @@ -2828,10 +2932,22 @@ static int exec_child( } } + /* We are about to invoke NSS and PAM modules. Let's tell them what we are doing here, maybe they care. This is + * used by nss-resolve to disable itself when we are about to start systemd-resolved, to avoid deadlocks. Note + * that these env vars do not survive the execve(), which means they really only apply to the PAM and NSS + * invocations themselves. Also note that while we'll only invoke NSS modules involved in user management they + * might internally call into other NSS modules that are involved in hostname resolution, we never know. */ + if (setenv("SYSTEMD_ACTIVATION_UNIT", unit->id, true) != 0 || + setenv("SYSTEMD_ACTIVATION_SCOPE", MANAGER_IS_SYSTEM(unit->manager) ? "system" : "user", true) != 0) { + *exit_status = EXIT_MEMORY; + return log_unit_error_errno(unit, errno, "Failed to update environment: %m"); + } + if (context->dynamic_user && dcreds) { _cleanup_strv_free_ char **suggested_paths = NULL; - /* Make sure we bypass our own NSS module for any NSS checks */ + /* On top of that, make sure we bypass our own NSS module nss-systemd comprehensively for any NSS + * checks, if DynamicUser=1 is used, as we shouldn't create a feedback loop with ourselves here.*/ if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) { *exit_status = EXIT_USER; return log_unit_error_errno(unit, errno, "Failed to update environment: %m"); @@ -2909,6 +3025,24 @@ static int exec_child( if (socket_fd >= 0) (void) fd_nonblock(socket_fd, false); + /* Journald will try to look-up our cgroup in order to populate _SYSTEMD_CGROUP and _SYSTEMD_UNIT fields. + * Hence we need to migrate to the target cgroup from init.scope before connecting to journald */ + if (params->cgroup_path) { + _cleanup_free_ char *p = NULL; + + r = exec_parameters_get_cgroup_path(params, &p); + if (r < 0) { + *exit_status = EXIT_CGROUP; + return log_unit_error_errno(unit, r, "Failed to acquire cgroup path: %m"); + } + + r = cg_attach_everywhere(params->cgroup_supported, p, 0, NULL, NULL); + if (r < 0) { + *exit_status = EXIT_CGROUP; + return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", p); + } + } + r = setup_input(context, params, socket_fd, named_iofds); if (r < 0) { *exit_status = EXIT_STDIN; @@ -2927,14 +3061,6 @@ static int exec_child( return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m"); } - if (params->cgroup_path) { - r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL); - if (r < 0) { - *exit_status = EXIT_CGROUP; - return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", params->cgroup_path); - } - } - if (context->oom_score_adjust_set) { /* When we can't make this change due to EPERM, then let's silently skip over it. User namespaces * prohibit write access to this file, and we shouldn't trip up over that. */ @@ -3130,11 +3256,6 @@ static int exec_child( } } - /* Apply just after mount namespace setup */ - r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status); - if (r < 0) - return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m"); - /* Drop groups as early as possbile */ if (needs_setuid) { r = enforce_groups(gid, supplementary_gids, ngids); @@ -3165,18 +3286,59 @@ static int exec_child( } /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are - * more aggressive this time since socket_fd and the netns fds we don't need anymore. The custom endpoint fd - * was needed to upload the policy and can now be closed as well. */ - r = close_all_fds(fds, n_fds); + * more aggressive this time since socket_fd and the netns fds we don't need anymore. We do keep the exec_fd + * however if we have it as we want to keep it open until the final execve(). */ + + if (params->exec_fd >= 0) { + exec_fd = params->exec_fd; + + if (exec_fd < 3 + (int) n_fds) { + int moved_fd; + + /* Let's move the exec fd far up, so that it's outside of the fd range we want to pass to the + * process we are about to execute. */ + + moved_fd = fcntl(exec_fd, F_DUPFD_CLOEXEC, 3 + (int) n_fds); + if (moved_fd < 0) { + *exit_status = EXIT_FDS; + return log_unit_error_errno(unit, errno, "Couldn't move exec fd up: %m"); + } + + safe_close(exec_fd); + exec_fd = moved_fd; + } else { + /* This fd should be FD_CLOEXEC already, but let's make sure. */ + r = fd_cloexec(exec_fd, true); + if (r < 0) { + *exit_status = EXIT_FDS; + return log_unit_error_errno(unit, r, "Failed to make exec fd FD_CLOEXEC: %m"); + } + } + + fds_with_exec_fd = newa(int, n_fds + 1); + memcpy_safe(fds_with_exec_fd, fds, n_fds * sizeof(int)); + fds_with_exec_fd[n_fds] = exec_fd; + n_fds_with_exec_fd = n_fds + 1; + } else { + fds_with_exec_fd = fds; + n_fds_with_exec_fd = n_fds; + } + + r = close_all_fds(fds_with_exec_fd, n_fds_with_exec_fd); if (r >= 0) r = shift_fds(fds, n_fds); if (r >= 0) - r = flags_fds(fds, n_storage_fds, n_socket_fds, context->non_blocking); + r = flags_fds(fds, n_socket_fds, n_storage_fds, context->non_blocking); if (r < 0) { *exit_status = EXIT_FDS; return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m"); } + /* At this point, the fds we want to pass to the program are all ready and set up, with O_CLOEXEC turned off + * and at the right fd numbers. The are no other fds open, with one exception: the exec_fd if it is defined, + * and it has O_CLOEXEC set, after all we want it to be closed by the execve(), so that our parent knows we + * came this far. */ + secure_bits = context->secure_bits; if (needs_sandboxing) { @@ -3268,6 +3430,12 @@ static int exec_child( } } + /* Apply working directory here, because the working directory might be on NFS and only the user running + * this service might have the correct privilege to change to the working directory */ + r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status); + if (r < 0) + return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m"); + if (needs_sandboxing) { /* Apply other MAC contexts late, but before seccomp syscall filtering, as those should really be last to * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires @@ -3389,7 +3557,7 @@ static int exec_child( strv_free_and_replace(accum_env, ee); } - final_argv = replace_env_argv(argv, accum_env); + final_argv = replace_env_argv(command->argv, accum_env); if (!final_argv) { *exit_status = EXIT_MEMORY; return log_oom(); @@ -3407,10 +3575,35 @@ static int exec_child( LOG_UNIT_INVOCATION_ID(unit)); } + if (exec_fd >= 0) { + uint8_t hot = 1; + + /* We have finished with all our initializations. Let's now let the manager know that. From this point + * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */ + + if (write(exec_fd, &hot, sizeof(hot)) < 0) { + *exit_status = EXIT_EXEC; + return log_unit_error_errno(unit, errno, "Failed to enable exec_fd: %m"); + } + } + execve(command->path, final_argv, accum_env); + r = -errno; + + if (exec_fd >= 0) { + uint8_t hot = 0; - if (errno == ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) { - log_struct_errno(LOG_INFO, errno, + /* The execve() failed. This means the exec_fd is still open. Which means we need to tell the manager + * that POLLHUP on it no longer means execve() succeeded. */ + + if (write(exec_fd, &hot, sizeof(hot)) < 0) { + *exit_status = EXIT_EXEC; + return log_unit_error_errno(unit, errno, "Failed to disable exec_fd: %m"); + } + } + + if (r == -ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) { + log_struct_errno(LOG_INFO, r, "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR, LOG_UNIT_ID(unit), LOG_UNIT_INVOCATION_ID(unit), @@ -3421,7 +3614,7 @@ static int exec_child( } *exit_status = EXIT_EXEC; - return log_unit_error_errno(unit, errno, "Failed to execute command: %m"); + return log_unit_error_errno(unit, r, "Failed to execute command: %m"); } static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l); @@ -3435,13 +3628,11 @@ int exec_spawn(Unit *unit, DynamicCreds *dcreds, pid_t *ret) { + int socket_fd, r, named_iofds[3] = { -1, -1, -1 }, *fds = NULL; + _cleanup_free_ char *subcgroup_path = NULL; _cleanup_strv_free_ char **files_env = NULL; - int *fds = NULL; size_t n_storage_fds = 0, n_socket_fds = 0; _cleanup_free_ char *line = NULL; - int socket_fd, r; - int named_iofds[3] = { -1, -1, -1 }; - char **argv; pid_t pid; assert(unit); @@ -3449,7 +3640,7 @@ int exec_spawn(Unit *unit, assert(context); assert(ret); assert(params); - assert(params->fds || (params->n_storage_fds + params->n_socket_fds <= 0)); + assert(params->fds || (params->n_socket_fds + params->n_storage_fds <= 0)); if (context->std_input == EXEC_INPUT_SOCKET || context->std_output == EXEC_OUTPUT_SOCKET || @@ -3469,8 +3660,8 @@ int exec_spawn(Unit *unit, } else { socket_fd = -1; fds = params->fds; - n_storage_fds = params->n_storage_fds; n_socket_fds = params->n_socket_fds; + n_storage_fds = params->n_storage_fds; } r = exec_context_named_iofds(context, params, named_iofds); @@ -3481,8 +3672,7 @@ int exec_spawn(Unit *unit, if (r < 0) return log_unit_error_errno(unit, r, "Failed to load environment files: %m"); - argv = params->argv ?: command->argv; - line = exec_command_line(argv); + line = exec_command_line(command->argv); if (!line) return log_oom(); @@ -3492,6 +3682,17 @@ int exec_spawn(Unit *unit, LOG_UNIT_ID(unit), LOG_UNIT_INVOCATION_ID(unit)); + if (params->cgroup_path) { + r = exec_parameters_get_cgroup_path(params, &subcgroup_path); + if (r < 0) + return log_unit_error_errno(unit, r, "Failed to acquire subcgroup path: %m"); + if (r > 0) { /* We are using a child cgroup */ + r = cg_create(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path); + if (r < 0) + return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path); + } + } + pid = fork(); if (pid < 0) return log_unit_error_errno(unit, errno, "Failed to fork: %m"); @@ -3505,12 +3706,11 @@ int exec_spawn(Unit *unit, params, runtime, dcreds, - argv, socket_fd, named_iofds, fds, - n_storage_fds, n_socket_fds, + n_storage_fds, files_env, unit->manager->user_lookup_fds[1], &exit_status); @@ -3530,13 +3730,11 @@ int exec_spawn(Unit *unit, log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid); - /* We add the new process to the cgroup both in the child (so - * that we can be sure that no user code is ever executed - * outside of the cgroup) and in the parent (so that we can be - * sure that when we kill the cgroup the process will be - * killed too). */ - if (params->cgroup_path) - (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid); + /* We add the new process to the cgroup both in the child (so that we can be sure that no user code is ever + * executed outside of the cgroup) and in the parent (so that we can be sure that when we kill the cgroup the + * process will be killed too). */ + if (subcgroup_path) + (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path, pid); exec_status_start(&command->exec_status, pid); @@ -3624,6 +3822,9 @@ void exec_context_done(ExecContext *c) { exec_context_free_log_extra_fields(c); + c->log_rate_limit_interval_usec = 0; + c->log_rate_limit_burst = 0; + c->stdin_data = mfree(c->stdin_data); c->stdin_data_size = 0; } @@ -3655,7 +3856,6 @@ static void exec_command_done(ExecCommand *c) { assert(c); c->path = mfree(c->path); - c->argv = strv_free(c->argv); } @@ -3685,6 +3885,24 @@ void exec_command_free_array(ExecCommand **c, size_t n) { c[i] = exec_command_free_list(c[i]); } +void exec_command_reset_status_array(ExecCommand *c, size_t n) { + size_t i; + + for (i = 0; i < n; i++) + exec_status_reset(&c[i].exec_status); +} + +void exec_command_reset_status_list_array(ExecCommand **c, size_t n) { + size_t i; + + for (i = 0; i < n; i++) { + ExecCommand *z; + + LIST_FOREACH(command, z, c[i]) + exec_status_reset(&z->exec_status); + } +} + typedef struct InvalidEnvInfo { const Unit *unit; const char *path; @@ -3813,7 +4031,7 @@ static int exec_context_load_environment(const Unit *unit, const ExecContext *c, assert(pglob.gl_pathc > 0); for (n = 0; n < pglob.gl_pathc; n++) { - k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p); + k = load_env_file(NULL, pglob.gl_pathv[n], &p); if (k < 0) { if (ignore) continue; @@ -3976,9 +4194,9 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) { for (i = 0; i < RLIM_NLIMITS; i++) if (c->rlimit[i]) { - fprintf(f, "Limit%s%s: " RLIM_FMT "\n", + fprintf(f, "%sLimit%s: " RLIM_FMT "\n", prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max); - fprintf(f, "Limit%s%sSoft: " RLIM_FMT "\n", + fprintf(f, "%sLimit%sSoft: " RLIM_FMT "\n", prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur); } @@ -4036,8 +4254,12 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) { fprintf(f, "%sStandardInputFile: %s\n", prefix, c->stdio_file[STDIN_FILENO]); if (c->std_output == EXEC_OUTPUT_FILE) fprintf(f, "%sStandardOutputFile: %s\n", prefix, c->stdio_file[STDOUT_FILENO]); + if (c->std_output == EXEC_OUTPUT_FILE_APPEND) + fprintf(f, "%sStandardOutputFileToAppend: %s\n", prefix, c->stdio_file[STDOUT_FILENO]); if (c->std_error == EXEC_OUTPUT_FILE) fprintf(f, "%sStandardErrorFile: %s\n", prefix, c->stdio_file[STDERR_FILENO]); + if (c->std_error == EXEC_OUTPUT_FILE_APPEND) + fprintf(f, "%sStandardErrorFileToAppend: %s\n", prefix, c->stdio_file[STDERR_FILENO]); if (c->tty_path) fprintf(f, @@ -4084,6 +4306,17 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) { fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t)); } + if (c->log_rate_limit_interval_usec > 0) { + char buf_timespan[FORMAT_TIMESPAN_MAX]; + + fprintf(f, + "%sLogRateLimitIntervalSec: %s\n", + prefix, format_timespan(buf_timespan, sizeof(buf_timespan), c->log_rate_limit_interval_usec, USEC_PER_SEC)); + } + + if (c->log_rate_limit_burst > 0) + fprintf(f, "%sLogRateLimitBurst: %u\n", prefix, c->log_rate_limit_burst); + if (c->n_log_extra_fields > 0) { size_t j; @@ -4331,18 +4564,22 @@ void exec_context_free_log_extra_fields(ExecContext *c) { void exec_status_start(ExecStatus *s, pid_t pid) { assert(s); - zero(*s); - s->pid = pid; + *s = (ExecStatus) { + .pid = pid, + }; + dual_timestamp_get(&s->start_timestamp); } void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status) { assert(s); - if (s->pid && s->pid != pid) - zero(*s); + if (s->pid != pid) { + *s = (ExecStatus) { + .pid = pid, + }; + } - s->pid = pid; dual_timestamp_get(&s->exit_timestamp); s->code = code; @@ -4350,12 +4587,18 @@ void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int if (context) { if (context->utmp_id) - utmp_put_dead_process(context->utmp_id, pid, code, status); + (void) utmp_put_dead_process(context->utmp_id, pid, code, status); exec_context_tty_reset(context, NULL); } } +void exec_status_reset(ExecStatus *s) { + assert(s); + + *s = (ExecStatus) {}; +} + void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix) { char buf[FORMAT_TIMESTAMP_MAX]; @@ -4487,8 +4730,7 @@ int exec_command_set(ExecCommand *c, const char *path, ...) { return -ENOMEM; } - free(c->path); - c->path = p; + free_and_replace(c->path, p); return strv_free_and_replace(c->argv, l); } @@ -4920,10 +5162,8 @@ void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) { finalize: r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, (int[]) { fd0, fd1 }, NULL); - if (r < 0) { + if (r < 0) log_debug_errno(r, "Failed to add exec-runtime: %m"); - return; - } } void exec_runtime_vacuum(Manager *m) { @@ -4942,6 +5182,13 @@ void exec_runtime_vacuum(Manager *m) { } } +void exec_params_clear(ExecParameters *p) { + if (!p) + return; + + strv_free(p->environment); +} + static const char* const exec_input_table[_EXEC_INPUT_MAX] = { [EXEC_INPUT_NULL] = "null", [EXEC_INPUT_TTY] = "tty", @@ -4968,6 +5215,7 @@ static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = { [EXEC_OUTPUT_SOCKET] = "socket", [EXEC_OUTPUT_NAMED_FD] = "fd", [EXEC_OUTPUT_FILE] = "file", + [EXEC_OUTPUT_FILE_APPEND] = "append", }; DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput); @@ -4998,6 +5246,16 @@ static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = { DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType); +static const char* const exec_directory_env_name_table[_EXEC_DIRECTORY_TYPE_MAX] = { + [EXEC_DIRECTORY_RUNTIME] = "RUNTIME_DIRECTORY", + [EXEC_DIRECTORY_STATE] = "STATE_DIRECTORY", + [EXEC_DIRECTORY_CACHE] = "CACHE_DIRECTORY", + [EXEC_DIRECTORY_LOGS] = "LOGS_DIRECTORY", + [EXEC_DIRECTORY_CONFIGURATION] = "CONFIGURATION_DIRECTORY", +}; + +DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(exec_directory_env_name, ExecDirectoryType); + static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = { [EXEC_KEYRING_INHERIT] = "inherit", [EXEC_KEYRING_PRIVATE] = "private", diff --git a/src/core/execute.h b/src/core/execute.h index 77ffe82323..0f1bf56744 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -16,7 +16,7 @@ typedef struct Manager Manager; #include "cgroup-util.h" #include "fdset.h" #include "list.h" -#include "missing.h" +#include "missing_resource.h" #include "namespace.h" #include "nsflags.h" @@ -56,6 +56,7 @@ typedef enum ExecOutput { EXEC_OUTPUT_SOCKET, EXEC_OUTPUT_NAMED_FD, EXEC_OUTPUT_FILE, + EXEC_OUTPUT_FILE_APPEND, _EXEC_OUTPUT_MAX, _EXEC_OUTPUT_INVALID = -1 } ExecOutput; @@ -76,21 +77,23 @@ typedef enum ExecKeyringMode { _EXEC_KEYRING_MODE_INVALID = -1, } ExecKeyringMode; +/* Contains start and exit information about an executed command. */ struct ExecStatus { + pid_t pid; dual_timestamp start_timestamp; dual_timestamp exit_timestamp; - pid_t pid; int code; /* as in siginfo_t::si_code */ int status; /* as in sigingo_t::si_status */ }; typedef enum ExecCommandFlags { - EXEC_COMMAND_IGNORE_FAILURE = 1, - EXEC_COMMAND_FULLY_PRIVILEGED = 2, - EXEC_COMMAND_NO_SETUID = 4, - EXEC_COMMAND_AMBIENT_MAGIC = 8, + EXEC_COMMAND_IGNORE_FAILURE = 1 << 0, + EXEC_COMMAND_FULLY_PRIVILEGED = 1 << 1, + EXEC_COMMAND_NO_SETUID = 1 << 2, + EXEC_COMMAND_AMBIENT_MAGIC = 1 << 3, } ExecCommandFlags; +/* Stores information about commands we execute. Covers both configuration settings as well as runtime data. */ struct ExecCommand { char *path; char **argv; @@ -99,13 +102,16 @@ struct ExecCommand { LIST_FIELDS(ExecCommand, command); /* useful for chaining commands */ }; +/* Encapsulates certain aspects of the runtime environment that is to be shared between multiple otherwise separate + * invocations of commands. Specifically, this allows sharing of /tmp and /var/tmp data as well as network namespaces + * between invocations of commands. This is a reference counted object, with one reference taken by each currently + * active command invocation that wants to share this runtime. */ struct ExecRuntime { - int n_ref; + unsigned n_ref; Manager *manager; - /* unit id of the owner */ - char *id; + char *id; /* Unit id of the owner */ char *tmp_dir; char *var_tmp_dir; @@ -130,6 +136,9 @@ typedef struct ExecDirectory { mode_t mode; } ExecDirectory; +/* Encodes configuration parameters applied to invoked commands. Does not carry runtime data, but only configuration + * changes sourced from unit files and suchlike. ExecContext objects are usually embedded into Unit objects, and do not + * change after being loaded. */ struct ExecContext { char **environment; char **environment_files; @@ -216,6 +225,9 @@ struct ExecContext { struct iovec* log_extra_fields; size_t n_log_extra_fields; + usec_t log_rate_limit_interval_usec; + unsigned log_rate_limit_burst; + bool cpu_sched_reset_on_fork; bool non_blocking; bool private_tmp; @@ -277,27 +289,28 @@ typedef enum ExecFlags { EXEC_APPLY_SANDBOXING = 1 << 0, EXEC_APPLY_CHROOT = 1 << 1, EXEC_APPLY_TTY_STDIN = 1 << 2, - EXEC_NEW_KEYRING = 1 << 3, - EXEC_PASS_LOG_UNIT = 1 << 4, /* Whether to pass the unit name to the service's journal stream connection */ - EXEC_CHOWN_DIRECTORIES = 1 << 5, /* chown() the runtime/state/cache/log directories to the user we run as, under all conditions */ - EXEC_NSS_BYPASS_BUS = 1 << 6, /* Set the SYSTEMD_NSS_BYPASS_BUS environment variable, to disable nss-systemd for dbus */ - EXEC_CGROUP_DELEGATE = 1 << 7, + EXEC_PASS_LOG_UNIT = 1 << 3, /* Whether to pass the unit name to the service's journal stream connection */ + EXEC_CHOWN_DIRECTORIES = 1 << 4, /* chown() the runtime/state/cache/log directories to the user we run as, under all conditions */ + EXEC_NSS_BYPASS_BUS = 1 << 5, /* Set the SYSTEMD_NSS_BYPASS_BUS environment variable, to disable nss-systemd for dbus */ + EXEC_CGROUP_DELEGATE = 1 << 6, + EXEC_IS_CONTROL = 1 << 7, + EXEC_CONTROL_CGROUP = 1 << 8, /* Place the process not in the indicated cgroup but in a subcgroup '/.control', but only EXEC_CGROUP_DELEGATE and EXEC_IS_CONTROL is set, too */ /* The following are not used by execute.c, but by consumers internally */ - EXEC_PASS_FDS = 1 << 8, - EXEC_IS_CONTROL = 1 << 9, + EXEC_PASS_FDS = 1 << 9, EXEC_SETENV_RESULT = 1 << 10, EXEC_SET_WATCHDOG = 1 << 11, } ExecFlags; +/* Parameters for a specific invocation of a command. This structure is put together right before a command is + * executed. */ struct ExecParameters { - char **argv; char **environment; int *fds; char **fd_names; - size_t n_storage_fds; size_t n_socket_fds; + size_t n_storage_fds; ExecFlags flags; bool selinux_context_net:1; @@ -316,6 +329,9 @@ struct ExecParameters { int stdin_fd; int stdout_fd; int stderr_fd; + + /* An fd that is closed by the execve(), and thus will result in EOF when the execve() is done */ + int exec_fd; }; #include "unit.h" @@ -330,14 +346,14 @@ int exec_spawn(Unit *unit, pid_t *ret); void exec_command_done_array(ExecCommand *c, size_t n); - ExecCommand* exec_command_free_list(ExecCommand *c); void exec_command_free_array(ExecCommand **c, size_t n); - +void exec_command_reset_status_array(ExecCommand *c, size_t n); +void exec_command_reset_status_list_array(ExecCommand **c, size_t n); void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix); void exec_command_append_list(ExecCommand **l, ExecCommand *e); -int exec_command_set(ExecCommand *c, const char *path, ...); -int exec_command_append(ExecCommand *c, const char *path, ...); +int exec_command_set(ExecCommand *c, const char *path, ...) _sentinel_; +int exec_command_append(ExecCommand *c, const char *path, ...) _sentinel_; void exec_context_init(ExecContext *c); void exec_context_done(ExecContext *c); @@ -357,6 +373,7 @@ void exec_context_free_log_extra_fields(ExecContext *c); void exec_status_start(ExecStatus *s, pid_t pid); void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status); void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix); +void exec_status_reset(ExecStatus *s); int exec_runtime_acquire(Manager *m, const ExecContext *c, const char *name, bool create, ExecRuntime **ret); ExecRuntime *exec_runtime_unref(ExecRuntime *r, bool destroy); @@ -366,6 +383,8 @@ int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds); void exec_runtime_vacuum(Manager *m); +void exec_params_clear(ExecParameters *p); + const char* exec_output_to_string(ExecOutput i) _const_; ExecOutput exec_output_from_string(const char *s) _pure_; diff --git a/src/core/ima-setup.c b/src/core/ima-setup.c index 013d6c5de3..fd7c5f64af 100644 --- a/src/core/ima-setup.c +++ b/src/core/ima-setup.c @@ -7,6 +7,7 @@ #include <errno.h> #include <unistd.h> +#include "alloc-util.h" #include "fd-util.h" #include "fileio.h" #include "ima-setup.h" @@ -22,20 +23,20 @@ int ima_setup(void) { _cleanup_fclose_ FILE *input = NULL; _cleanup_close_ int imafd = -1; unsigned lineno = 0; - char line[page_size()]; + int r; if (access(IMA_SECFS_DIR, F_OK) < 0) { - log_debug("IMA support is disabled in the kernel, ignoring."); + log_debug_errno(errno, "IMA support is disabled in the kernel, ignoring: %m"); return 0; } if (access(IMA_SECFS_POLICY, W_OK) < 0) { - log_warning("Another IMA custom policy has already been loaded, ignoring."); + log_warning_errno(errno, "Another IMA custom policy has already been loaded, ignoring: %m"); return 0; } if (access(IMA_POLICY_PATH, F_OK) < 0) { - log_debug("No IMA custom policy file "IMA_POLICY_PATH", ignoring."); + log_debug_errno(errno, "No IMA custom policy file "IMA_POLICY_PATH", ignoring: %m"); return 0; } @@ -56,7 +57,7 @@ int ima_setup(void) { return 0; } - close(imafd); + safe_close(imafd); imafd = open(IMA_SECFS_POLICY, O_WRONLY|O_CLOEXEC); if (imafd < 0) { @@ -64,10 +65,16 @@ int ima_setup(void) { return 0; } - FOREACH_LINE(line, input, - return log_error_errno(errno, "Failed to read the IMA custom policy file "IMA_POLICY_PATH": %m")) { + for (;;) { + _cleanup_free_ char *line = NULL; size_t len; + r = read_line(input, LONG_LINE_MAX, &line); + if (r < 0) + return log_error_errno(r, "Failed to read the IMA custom policy file "IMA_POLICY_PATH": %m"); + if (r == 0) + break; + len = strlen(line); lineno++; diff --git a/src/core/ip-address-access.h b/src/core/ip-address-access.h index 7babf19562..77078e1f14 100644 --- a/src/core/ip-address-access.h +++ b/src/core/ip-address-access.h @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: LGPL-2.1+ */ #pragma once - #include "conf-parser.h" #include "in-addr-util.h" #include "list.h" diff --git a/src/core/job.c b/src/core/job.c index 734756b666..f635b7e933 100644 --- a/src/core/job.c +++ b/src/core/job.c @@ -10,10 +10,12 @@ #include "dbus-job.h" #include "dbus.h" #include "escape.h" +#include "fileio.h" #include "job.h" #include "log.h" #include "macro.h" #include "parse-util.h" +#include "serialize.h" #include "set.h" #include "special.h" #include "stdio-util.h" @@ -31,14 +33,15 @@ Job* job_new_raw(Unit *unit) { assert(unit); - j = new0(Job, 1); + j = new(Job, 1); if (!j) return NULL; - j->manager = unit->manager; - j->unit = unit; - j->type = _JOB_TYPE_INVALID; - j->reloaded = false; + *j = (Job) { + .manager = unit->manager, + .unit = unit, + .type = _JOB_TYPE_INVALID, + }; return j; } @@ -86,7 +89,7 @@ void job_unlink(Job *j) { j->timer_event_source = sd_event_source_unref(j->timer_event_source); } -void job_free(Job *j) { +Job* job_free(Job *j) { assert(j); assert(!j->installed); assert(!j->transaction_prev); @@ -99,7 +102,7 @@ void job_free(Job *j) { sd_bus_track_unref(j->bus_track); strv_free(j->deserialized_clients); - free(j); + return mfree(j); } static void job_set_state(Job *j, JobState state) { @@ -148,7 +151,7 @@ void job_uninstall(Job *j) { unit_add_to_gc_queue(j->unit); - hashmap_remove(j->manager->jobs, UINT32_TO_PTR(j->id)); + hashmap_remove_value(j->manager->jobs, UINT32_TO_PTR(j->id), j); j->installed = false; } @@ -174,7 +177,7 @@ static void job_merge_into_installed(Job *j, Job *other) { assert(j->unit == other->unit); if (j->type != JOB_NOP) - job_type_merge_and_collapse(&j->type, other->type, j->unit); + assert_se(job_type_merge_and_collapse(&j->type, other->type, j->unit) == 0); else assert(other->type == JOB_NOP); @@ -233,28 +236,36 @@ Job* job_install(Job *j) { job_add_to_gc_queue(j); + job_add_to_dbus_queue(j); /* announce this job to clients */ + unit_add_to_dbus_queue(j->unit); /* The Job property of the unit has changed now */ + return j; } int job_install_deserialized(Job *j) { Job **pj; + int r; assert(!j->installed); - if (j->type < 0 || j->type >= _JOB_TYPE_MAX_IN_TRANSACTION) { - log_debug("Invalid job type %s in deserialization.", strna(job_type_to_string(j->type))); - return -EINVAL; - } + if (j->type < 0 || j->type >= _JOB_TYPE_MAX_IN_TRANSACTION) + return log_unit_debug_errno(j->unit, SYNTHETIC_ERRNO(EINVAL), + "Invalid job type %s in deserialization.", + strna(job_type_to_string(j->type))); pj = (j->type == JOB_NOP) ? &j->unit->nop_job : &j->unit->job; - if (*pj) { - log_unit_debug(j->unit, "Unit already has a job installed. Not installing deserialized job."); - return -EEXIST; - } + if (*pj) + return log_unit_debug_errno(j->unit, SYNTHETIC_ERRNO(EEXIST), + "Unit already has a job installed. Not installing deserialized job."); + + r = hashmap_put(j->manager->jobs, UINT32_TO_PTR(j->id), j); + if (r == -EEXIST) + return log_unit_debug_errno(j->unit, r, "Job ID %" PRIu32 " already used, cannot deserialize job.", j->id); + if (r < 0) + return log_unit_debug_errno(j->unit, r, "Failed to insert job into jobs hash table: %m"); *pj = j; j->installed = true; - j->reloaded = true; if (j->state == JOB_RUNNING) j->unit->manager->n_running_jobs++; @@ -303,7 +314,7 @@ void job_dependency_free(JobDependency *l) { free(l); } -void job_dump(Job *j, FILE*f, const char *prefix) { +void job_dump(Job *j, FILE *f, const char *prefix) { assert(j); assert(f); @@ -506,6 +517,95 @@ static void job_change_type(Job *j, JobType newtype) { j->type = newtype; } +_pure_ static const char* job_get_begin_status_message_format(Unit *u, JobType t) { + const char *format; + + assert(u); + + if (t == JOB_RELOAD) + return "Reloading %s."; + + assert(IN_SET(t, JOB_START, JOB_STOP)); + + format = UNIT_VTABLE(u)->status_message_formats.starting_stopping[t == JOB_STOP]; + if (format) + return format; + + /* Return generic strings */ + if (t == JOB_START) + return "Starting %s."; + else { + assert(t == JOB_STOP); + return "Stopping %s."; + } +} + +static void job_print_begin_status_message(Unit *u, JobType t) { + const char *format; + + assert(u); + + /* Reload status messages have traditionally not been printed to console. */ + if (!IN_SET(t, JOB_START, JOB_STOP)) + return; + + format = job_get_begin_status_message_format(u, t); + + DISABLE_WARNING_FORMAT_NONLITERAL; + unit_status_printf(u, "", format); + REENABLE_WARNING; +} + +static void job_log_begin_status_message(Unit *u, uint32_t job_id, JobType t) { + const char *format, *mid; + char buf[LINE_MAX]; + + assert(u); + assert(t >= 0); + assert(t < _JOB_TYPE_MAX); + + if (!IN_SET(t, JOB_START, JOB_STOP, JOB_RELOAD)) + return; + + if (log_on_console()) /* Skip this if it would only go on the console anyway */ + return; + + /* We log status messages for all units and all operations. */ + + format = job_get_begin_status_message_format(u, t); + + DISABLE_WARNING_FORMAT_NONLITERAL; + (void) snprintf(buf, sizeof buf, format, unit_description(u)); + REENABLE_WARNING; + + mid = t == JOB_START ? "MESSAGE_ID=" SD_MESSAGE_UNIT_STARTING_STR : + t == JOB_STOP ? "MESSAGE_ID=" SD_MESSAGE_UNIT_STOPPING_STR : + "MESSAGE_ID=" SD_MESSAGE_UNIT_RELOADING_STR; + + /* Note that we deliberately use LOG_MESSAGE() instead of + * LOG_UNIT_MESSAGE() here, since this is supposed to mimic + * closely what is written to screen using the status output, + * which is supposed the highest level, friendliest output + * possible, which means we should avoid the low-level unit + * name. */ + log_struct(LOG_INFO, + LOG_MESSAGE("%s", buf), + "JOB_ID=%" PRIu32, job_id, + "JOB_TYPE=%s", job_type_to_string(t), + LOG_UNIT_ID(u), + LOG_UNIT_INVOCATION_ID(u), + mid); +} + +static void job_emit_begin_status_message(Unit *u, uint32_t job_id, JobType t) { + assert(u); + assert(t >= 0); + assert(t < _JOB_TYPE_MAX); + + job_log_begin_status_message(u, job_id, t); + job_print_begin_status_message(u, t); +} + static int job_perform_on_unit(Job **j) { uint32_t id; Manager *m; @@ -547,11 +647,12 @@ static int job_perform_on_unit(Job **j) { assert_not_reached("Invalid job type"); } - /* Log if the job still exists and the start/stop/reload function - * actually did something. */ + /* Log if the job still exists and the start/stop/reload function actually did something. Note that this means + * for units for which there's no 'activating' phase (i.e. because we transition directly from 'inactive' to + * 'active') we'll possibly skip the "Starting..." message. */ *j = manager_get_job(m, id); if (*j && r > 0) - unit_status_emit_starting_stopping_reloading(u, t); + job_emit_begin_status_message(u, id, t); return r; } @@ -580,7 +681,9 @@ int job_run_and_invalidate(Job *j) { switch (j->type) { case JOB_VERIFY_ACTIVE: { - UnitActiveState t = unit_active_state(j->unit); + UnitActiveState t; + + t = unit_active_state(j->unit); if (UNIT_IS_ACTIVE_OR_RELOADING(t)) r = -EALREADY; else if (t == UNIT_ACTIVATING) @@ -595,8 +698,7 @@ int job_run_and_invalidate(Job *j) { case JOB_RESTART: r = job_perform_on_unit(&j); - /* If the unit type does not support starting/stopping, - * then simply wait. */ + /* If the unit type does not support starting/stopping, then simply wait. */ if (r == -EBADR) r = 0; break; @@ -614,8 +716,12 @@ int job_run_and_invalidate(Job *j) { } if (j) { - if (r == -EALREADY) + if (r == -EAGAIN) + job_set_state(j, JOB_WAITING); /* Hmm, not ready after all, let's return to JOB_WAITING state */ + else if (r == -EALREADY) /* already being executed */ r = job_finish_and_invalidate(j, JOB_DONE, true, true); + else if (r == -ECOMM) /* condition failed, but all is good */ + r = job_finish_and_invalidate(j, JOB_DONE, true, false); else if (r == -EBADR) r = job_finish_and_invalidate(j, JOB_SKIPPED, true, false); else if (r == -ENOEXEC) @@ -628,8 +734,6 @@ int job_run_and_invalidate(Job *j) { r = job_finish_and_invalidate(j, JOB_DEPENDENCY, true, false); else if (r == -ESTALE) r = job_finish_and_invalidate(j, JOB_ONCE, true, false); - else if (r == -EAGAIN) - job_set_state(j, JOB_WAITING); else if (r < 0) r = job_finish_and_invalidate(j, JOB_FAILED, true, false); } @@ -637,7 +741,7 @@ int job_run_and_invalidate(Job *j) { return r; } -_pure_ static const char *job_get_status_message_format(Unit *u, JobType t, JobResult result) { +_pure_ static const char *job_get_done_status_message_format(Unit *u, JobType t, JobResult result) { static const char *const generic_finished_start_job[_JOB_RESULT_MAX] = { [JOB_DONE] = "Started %s.", @@ -666,7 +770,6 @@ _pure_ static const char *job_get_status_message_format(Unit *u, JobType t, JobR [JOB_SKIPPED] = "%s is not active.", }; - const UnitStatusMessageFormats *format_table; const char *format; assert(u); @@ -674,13 +777,11 @@ _pure_ static const char *job_get_status_message_format(Unit *u, JobType t, JobR assert(t < _JOB_TYPE_MAX); if (IN_SET(t, JOB_START, JOB_STOP, JOB_RESTART)) { - format_table = &UNIT_VTABLE(u)->status_message_formats; - if (format_table) { - format = t == JOB_START ? format_table->finished_start_job[result] : - format_table->finished_stop_job[result]; - if (format) - return format; - } + format = t == JOB_START ? + UNIT_VTABLE(u)->status_message_formats.finished_start_job[result] : + UNIT_VTABLE(u)->status_message_formats.finished_stop_job[result]; + if (format) + return format; } /* Return generic strings */ @@ -698,7 +799,7 @@ _pure_ static const char *job_get_status_message_format(Unit *u, JobType t, JobR static const struct { const char *color, *word; -} job_print_status_messages [_JOB_RESULT_MAX] = { +} job_print_done_status_messages[_JOB_RESULT_MAX] = { [JOB_DONE] = { ANSI_OK_COLOR, " OK " }, [JOB_TIMEOUT] = { ANSI_HIGHLIGHT_RED, " TIME " }, [JOB_FAILED] = { ANSI_HIGHLIGHT_RED, "FAILED" }, @@ -710,7 +811,7 @@ static const struct { [JOB_ONCE] = { ANSI_HIGHLIGHT_RED, " ONCE " }, }; -static void job_print_status_message(Unit *u, JobType t, JobResult result) { +static void job_print_done_status_message(Unit *u, JobType t, JobResult result) { const char *format; const char *status; @@ -722,19 +823,23 @@ static void job_print_status_message(Unit *u, JobType t, JobResult result) { if (t == JOB_RELOAD) return; - if (!job_print_status_messages[result].word) + /* No message if the job did not actually do anything due to failed condition. */ + if (t == JOB_START && result == JOB_DONE && !u->condition_result) return; - format = job_get_status_message_format(u, t, result); + if (!job_print_done_status_messages[result].word) + return; + + format = job_get_done_status_message_format(u, t, result); if (!format) return; if (log_get_show_color()) - status = strjoina(job_print_status_messages[result].color, - job_print_status_messages[result].word, + status = strjoina(job_print_done_status_messages[result].color, + job_print_done_status_messages[result].word, ANSI_NORMAL); else - status = job_print_status_messages[result].word; + status = job_print_done_status_messages[result].word; if (result != JOB_DONE) manager_flip_auto_status(u->manager, true); @@ -751,7 +856,7 @@ static void job_print_status_message(Unit *u, JobType t, JobResult result) { } } -static void job_log_status_message(Unit *u, JobType t, JobResult result) { +static void job_log_done_status_message(Unit *u, uint32_t job_id, JobType t, JobResult result) { const char *format, *mid; char buf[LINE_MAX]; static const int job_result_log_level[_JOB_RESULT_MAX] = { @@ -774,10 +879,24 @@ static void job_log_status_message(Unit *u, JobType t, JobResult result) { /* Skip printing if output goes to the console, and job_print_status_message() will actually print something to the console. */ - if (log_on_console() && job_print_status_messages[result].word) + if (log_on_console() && job_print_done_status_messages[result].word) + return; + + /* Show condition check message if the job did not actually do anything due to failed condition. */ + if (t == JOB_START && result == JOB_DONE && !u->condition_result) { + log_struct(LOG_INFO, + "MESSAGE=Condition check resulted in %s being skipped.", unit_description(u), + "JOB_ID=%" PRIu32, job_id, + "JOB_TYPE=%s", job_type_to_string(t), + "JOB_RESULT=%s", job_result_to_string(result), + LOG_UNIT_ID(u), + LOG_UNIT_INVOCATION_ID(u), + "MESSAGE_ID=" SD_MESSAGE_UNIT_STARTED_STR); + return; + } - format = job_get_status_message_format(u, t, result); + format = job_get_done_status_message_format(u, t, result); if (!format) return; @@ -810,6 +929,7 @@ static void job_log_status_message(Unit *u, JobType t, JobResult result) { default: log_struct(job_result_log_level[result], LOG_MESSAGE("%s", buf), + "JOB_ID=%" PRIu32, job_id, "JOB_TYPE=%s", job_type_to_string(t), "JOB_RESULT=%s", job_result_to_string(result), LOG_UNIT_ID(u), @@ -819,6 +939,7 @@ static void job_log_status_message(Unit *u, JobType t, JobResult result) { log_struct(job_result_log_level[result], LOG_MESSAGE("%s", buf), + "JOB_ID=%" PRIu32, job_id, "JOB_TYPE=%s", job_type_to_string(t), "JOB_RESULT=%s", job_result_to_string(result), LOG_UNIT_ID(u), @@ -826,15 +947,11 @@ static void job_log_status_message(Unit *u, JobType t, JobResult result) { mid); } -static void job_emit_status_message(Unit *u, JobType t, JobResult result) { +static void job_emit_done_status_message(Unit *u, uint32_t job_id, JobType t, JobResult result) { assert(u); - /* No message if the job did not actually do anything due to failed condition. */ - if (t == JOB_START && result == JOB_DONE && !u->condition_result) - return; - - job_log_status_message(u, t, result); - job_print_status_message(u, t, result); + job_log_done_status_message(u, job_id, t, result); + job_print_done_status_message(u, t, result); } static void job_fail_dependencies(Unit *u, UnitDependency d) { @@ -856,19 +973,6 @@ static void job_fail_dependencies(Unit *u, UnitDependency d) { } } -static int job_save_pending_finished_job(Job *j) { - int r; - - assert(j); - - r = set_ensure_allocated(&j->manager->pending_finished_jobs, NULL); - if (r < 0) - return r; - - job_unlink(j); - return set_put(j->manager->pending_finished_jobs, j); -} - int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool already) { Unit *u; Unit *other; @@ -885,11 +989,11 @@ int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool alr j->result = result; - log_unit_debug(u, "Job %s/%s finished, result=%s", u->id, job_type_to_string(t), job_result_to_string(result)); + log_unit_debug(u, "Job %" PRIu32 " %s/%s finished, result=%s", j->id, u->id, job_type_to_string(t), job_result_to_string(result)); /* If this job did nothing to respective unit we don't log the status message */ if (!already) - job_emit_status_message(u, t, result); + job_emit_done_status_message(u, j->id, t, result); /* Patch restart jobs so that they become normal start jobs */ if (result == JOB_DONE && t == JOB_RESTART) { @@ -908,11 +1012,7 @@ int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool alr j->manager->n_failed_jobs++; job_uninstall(j); - /* Keep jobs started before the reload to send singal later, free all others */ - if (!MANAGER_IS_RELOADING(j->manager) || - !j->reloaded || - job_save_pending_finished_job(j) < 0) - job_free(j); + job_free(j); /* Fail depending jobs on failure */ if (result != JOB_DONE && recursive) { @@ -973,7 +1073,9 @@ static int job_dispatch_timer(sd_event_source *s, uint64_t monotonic, void *user u = j->unit; job_finish_and_invalidate(j, JOB_TIMEOUT, true, false); - emergency_action(u->manager, u->job_timeout_action, u->job_timeout_reboot_arg, "job timed out"); + emergency_action(u->manager, u->job_timeout_action, + EMERGENCY_ACTION_IS_WATCHDOG|EMERGENCY_ACTION_WARN, + u->job_timeout_reboot_arg, -1, "job timed out"); return 0; } @@ -1028,14 +1130,19 @@ int job_start_timer(Job *j, bool job_running) { } void job_add_to_run_queue(Job *j) { + int r; + assert(j); assert(j->installed); if (j->in_run_queue) return; - if (!j->manager->run_queue) - sd_event_source_set_enabled(j->manager->run_queue_event_source, SD_EVENT_ONESHOT); + if (!j->manager->run_queue) { + r = sd_event_source_set_enabled(j->manager->run_queue_event_source, SD_EVENT_ONESHOT); + if (r < 0) + log_warning_errno(r, "Failed to enable job run queue event source, ignoring: %m"); + } LIST_PREPEND(run_queue, j->manager->run_queue, j); j->in_run_queue = true; @@ -1071,17 +1178,17 @@ int job_serialize(Job *j, FILE *f) { assert(j); assert(f); - fprintf(f, "job-id=%u\n", j->id); - fprintf(f, "job-type=%s\n", job_type_to_string(j->type)); - fprintf(f, "job-state=%s\n", job_state_to_string(j->state)); - fprintf(f, "job-irreversible=%s\n", yes_no(j->irreversible)); - fprintf(f, "job-sent-dbus-new-signal=%s\n", yes_no(j->sent_dbus_new_signal)); - fprintf(f, "job-ignore-order=%s\n", yes_no(j->ignore_order)); + (void) serialize_item_format(f, "job-id", "%u", j->id); + (void) serialize_item(f, "job-type", job_type_to_string(j->type)); + (void) serialize_item(f, "job-state", job_state_to_string(j->state)); + (void) serialize_bool(f, "job-irreversible", j->irreversible); + (void) serialize_bool(f, "job-sent-dbus-new-signal", j->sent_dbus_new_signal); + (void) serialize_bool(f, "job-ignore-order", j->ignore_order); if (j->begin_usec > 0) - fprintf(f, "job-begin="USEC_FMT"\n", j->begin_usec); + (void) serialize_usec(f, "job-begin", j->begin_usec); if (j->begin_running_usec > 0) - fprintf(f, "job-begin-running="USEC_FMT"\n", j->begin_running_usec); + (void) serialize_usec(f, "job-begin-running", j->begin_running_usec); bus_track_serialize(j->bus_track, f, "subscribed"); @@ -1091,24 +1198,26 @@ int job_serialize(Job *j, FILE *f) { } int job_deserialize(Job *j, FILE *f) { + int r; + assert(j); assert(f); for (;;) { - char line[LINE_MAX], *l, *v; + _cleanup_free_ char *line = NULL; + char *l, *v; size_t k; - if (!fgets(line, sizeof(line), f)) { - if (feof(f)) - return 0; - return -errno; - } + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return log_error_errno(r, "Failed to read serialization line: %m"); + if (r == 0) + return 0; - char_array_0(line); l = strstrip(line); /* End marker */ - if (l[0] == 0) + if (isempty(l)) return 0; k = strcspn(l, "="); @@ -1122,16 +1231,16 @@ int job_deserialize(Job *j, FILE *f) { if (streq(l, "job-id")) { if (safe_atou32(v, &j->id) < 0) - log_debug("Failed to parse job id value %s", v); + log_debug("Failed to parse job id value: %s", v); } else if (streq(l, "job-type")) { JobType t; t = job_type_from_string(v); if (t < 0) - log_debug("Failed to parse job type %s", v); + log_debug("Failed to parse job type: %s", v); else if (t >= _JOB_TYPE_MAX_IN_TRANSACTION) - log_debug("Cannot deserialize job of type %s", v); + log_debug("Cannot deserialize job of type: %s", v); else j->type = t; @@ -1140,7 +1249,7 @@ int job_deserialize(Job *j, FILE *f) { s = job_state_from_string(v); if (s < 0) - log_debug("Failed to parse job state %s", v); + log_debug("Failed to parse job state: %s", v); else job_set_state(j, s); @@ -1149,7 +1258,7 @@ int job_deserialize(Job *j, FILE *f) { b = parse_boolean(v); if (b < 0) - log_debug("Failed to parse job irreversible flag %s", v); + log_debug("Failed to parse job irreversible flag: %s", v); else j->irreversible = j->irreversible || b; @@ -1158,7 +1267,7 @@ int job_deserialize(Job *j, FILE *f) { b = parse_boolean(v); if (b < 0) - log_debug("Failed to parse job sent_dbus_new_signal flag %s", v); + log_debug("Failed to parse job sent_dbus_new_signal flag: %s", v); else j->sent_dbus_new_signal = j->sent_dbus_new_signal || b; @@ -1167,31 +1276,21 @@ int job_deserialize(Job *j, FILE *f) { b = parse_boolean(v); if (b < 0) - log_debug("Failed to parse job ignore_order flag %s", v); + log_debug("Failed to parse job ignore_order flag: %s", v); else j->ignore_order = j->ignore_order || b; - } else if (streq(l, "job-begin")) { - unsigned long long ull; - - if (sscanf(v, "%llu", &ull) != 1) - log_debug("Failed to parse job-begin value %s", v); - else - j->begin_usec = ull; - - } else if (streq(l, "job-begin-running")) { - unsigned long long ull; - - if (sscanf(v, "%llu", &ull) != 1) - log_debug("Failed to parse job-begin-running value %s", v); - else - j->begin_running_usec = ull; + } else if (streq(l, "job-begin")) + (void) deserialize_usec(v, &j->begin_usec); - } else if (streq(l, "subscribed")) { + else if (streq(l, "job-begin-running")) + (void) deserialize_usec(v, &j->begin_running_usec); + else if (streq(l, "subscribed")) { if (strv_extend(&j->deserialized_clients, v) < 0) - log_oom(); - } + return log_oom(); + } else + log_debug("Unknown job serialization key: %s", l); } } @@ -1366,7 +1465,6 @@ bool job_may_gc(Job *j) { * we start + other stop → gc * we stop + other start → stay * we stop + other stop → stay - * */ return true; @@ -1385,15 +1483,8 @@ void job_add_to_gc_queue(Job *j) { j->in_gc_queue = true; } -static int job_compare(const void *a, const void *b) { - Job *x = *(Job**) a, *y = *(Job**) b; - - if (x->id < y->id) - return -1; - if (x->id > y->id) - return 1; - - return 0; +static int job_compare(Job * const *a, Job * const *b) { + return CMP((*a)->id, (*b)->id); } static size_t sort_job_list(Job **list, size_t n) { @@ -1401,7 +1492,7 @@ static size_t sort_job_list(Job **list, size_t n) { size_t a, b; /* Order by numeric IDs */ - qsort_safe(list, n, sizeof(Job*), job_compare); + typesafe_qsort(list, n, job_compare); /* Filter out duplicates */ for (a = 0, b = 0; a < n; a++) { diff --git a/src/core/job.h b/src/core/job.h index 2f5f3f3989..1b9bcdd895 100644 --- a/src/core/job.h +++ b/src/core/job.h @@ -80,7 +80,7 @@ enum JobMode { }; enum JobResult { - JOB_DONE, /* Job completed successfully */ + JOB_DONE, /* Job completed successfully (or skipped due to a failed ConditionXYZ=) */ JOB_CANCELED, /* Job canceled by a conflicting job installation or by explicit cancel request */ JOB_TIMEOUT, /* Job timeout elapsed */ JOB_FAILED, /* Job failed */ @@ -156,17 +156,16 @@ struct Job { bool irreversible:1; bool in_gc_queue:1; bool ref_by_private_bus:1; - bool reloaded:1; }; Job* job_new(Unit *unit, JobType type); Job* job_new_raw(Unit *unit); void job_unlink(Job *job); -void job_free(Job *job); +Job* job_free(Job *job); Job* job_install(Job *j); int job_install_deserialized(Job *j); void job_uninstall(Job *j); -void job_dump(Job *j, FILE*f, const char *prefix); +void job_dump(Job *j, FILE *f, const char *prefix); int job_serialize(Job *j, FILE *f); int job_deserialize(Job *j, FILE *f); int job_coldplug(Job *j); @@ -223,6 +222,8 @@ void job_add_to_gc_queue(Job *j); int job_get_before(Job *j, Job*** ret); int job_get_after(Job *j, Job*** ret); +DEFINE_TRIVIAL_CLEANUP_FUNC(Job*, job_free); + const char* job_type_to_string(JobType t) _const_; JobType job_type_from_string(const char *s) _pure_; diff --git a/src/core/kill.c b/src/core/kill.c index 929eebfe37..6fe96cfc07 100644 --- a/src/core/kill.c +++ b/src/core/kill.c @@ -9,8 +9,10 @@ void kill_context_init(KillContext *c) { assert(c); c->kill_signal = SIGTERM; + c->final_kill_signal = SIGKILL; c->send_sigkill = true; c->send_sighup = false; + c->watchdog_signal = SIGABRT; } void kill_context_dump(KillContext *c, FILE *f, const char *prefix) { @@ -21,10 +23,12 @@ void kill_context_dump(KillContext *c, FILE *f, const char *prefix) { fprintf(f, "%sKillMode: %s\n" "%sKillSignal: SIG%s\n" + "%sFinalKillSignal: SIG%s\n" "%sSendSIGKILL: %s\n" "%sSendSIGHUP: %s\n", prefix, kill_mode_to_string(c->kill_mode), prefix, signal_to_string(c->kill_signal), + prefix, signal_to_string(c->final_kill_signal), prefix, yes_no(c->send_sigkill), prefix, yes_no(c->send_sighup)); } diff --git a/src/core/kill.h b/src/core/kill.h index 2d6aa943a6..f3915be1dc 100644 --- a/src/core/kill.h +++ b/src/core/kill.h @@ -21,8 +21,10 @@ typedef enum KillMode { struct KillContext { KillMode kill_mode; int kill_signal; + int final_kill_signal; bool send_sigkill; bool send_sighup; + int watchdog_signal; }; typedef enum KillWho { diff --git a/src/core/killall.c b/src/core/killall.c index 87d207fd3d..f0ce996556 100644 --- a/src/core/killall.c +++ b/src/core/killall.c @@ -23,16 +23,20 @@ static bool ignore_proc(pid_t pid, bool warn_rootfs) { _cleanup_fclose_ FILE *f = NULL; - char c; const char *p; - size_t count; + char c = 0; uid_t uid; int r; /* We are PID 1, let's not commit suicide */ - if (pid == 1) + if (pid <= 1) return true; + /* Ignore kernel threads */ + r = is_kernel_thread(pid); + if (r != 0) + return true; /* also ignore processes where we can't determine this */ + r = get_process_uid(pid, &uid); if (r < 0) return true; /* not really, but better safe than sorry */ @@ -46,11 +50,10 @@ static bool ignore_proc(pid_t pid, bool warn_rootfs) { if (!f) return true; /* not really, but has the desired effect */ - count = fread(&c, 1, 1, f); - - /* Kernel threads have an empty cmdline */ - if (count <= 0) - return true; + /* Try to read the first character of the command line. If the cmdline is empty (which might be the case for + * kernel threads but potentially also other stuff), this line won't do anything, but we don't care much, as + * actual kernel threads are already filtered out above. */ + (void) fread(&c, 1, 1, f); /* Processes with argv[0][0] = '@' we ignore from the killing spree. * @@ -63,7 +66,7 @@ static bool ignore_proc(pid_t pid, bool warn_rootfs) { _cleanup_free_ char *comm = NULL; - get_process_comm(pid, &comm); + (void) get_process_comm(pid, &comm); log_notice("Process " PID_FMT " (%s) has been marked to be excluded from killing. It is " "running from the root file system, and thus likely to block re-mounting of the " diff --git a/src/core/kmod-setup.c b/src/core/kmod-setup.c index 9251929558..a91cfebc67 100644 --- a/src/core/kmod-setup.c +++ b/src/core/kmod-setup.c @@ -76,13 +76,15 @@ int kmod_setup(void) { bool warn_if_module:1; bool (*condition_fn)(void); } kmod_table[] = { - /* auto-loading on use doesn't work before udev is up */ + /* This one we need to load explicitly, since auto-loading on use doesn't work + * before udev created the ghost device nodes, and we need it earlier than that. */ { "autofs4", "/sys/class/misc/autofs", true, false, NULL }, - /* early configure of ::1 on the loopback device */ + /* This one we need to load explicitly, since auto-loading of IPv6 is not done when + * we try to configure ::1 on the loopback device. */ { "ipv6", "/sys/module/ipv6", false, true, NULL }, - /* this should never be a module */ + /* This should never be a module */ { "unix", "/proc/net/unix", true, true, NULL }, #if HAVE_LIBIPTC @@ -93,15 +95,12 @@ int kmod_setup(void) { { "virtio_rng", NULL, false, false, has_virtio_rng }, }; _cleanup_(kmod_unrefp) struct kmod_ctx *ctx = NULL; - unsigned int i; - int r; + unsigned i; if (have_effective_cap(CAP_SYS_MODULE) == 0) return 0; for (i = 0; i < ELEMENTSOF(kmod_table); i++) { - _cleanup_(kmod_module_unrefp) struct kmod_module *mod = NULL; - if (kmod_table[i].path && access(kmod_table[i].path, F_OK) >= 0) continue; @@ -122,23 +121,7 @@ int kmod_setup(void) { kmod_load_resources(ctx); } - r = kmod_module_new_from_name(ctx, kmod_table[i].module, &mod); - if (r < 0) { - log_error("Failed to lookup module '%s'", kmod_table[i].module); - continue; - } - - r = kmod_module_probe_insert_module(mod, KMOD_PROBE_APPLY_BLACKLIST, NULL, NULL, NULL, NULL); - if (r == 0) - log_debug("Inserted module '%s'", kmod_module_get_name(mod)); - else if (r == KMOD_PROBE_APPLY_BLACKLIST) - log_info("Module '%s' is blacklisted", kmod_module_get_name(mod)); - else { - bool print_warning = kmod_table[i].warn_if_unavailable || (r < 0 && r != -ENOENT); - - log_full_errno(print_warning ? LOG_WARNING : LOG_DEBUG, r, - "Failed to insert module '%s': %m", kmod_module_get_name(mod)); - } + (void) module_load_and_warn(ctx, kmod_table[i].module, kmod_table[i].warn_if_unavailable); } #endif diff --git a/src/core/load-dropin.c b/src/core/load-dropin.c index 4b422cc54e..a50b200f5b 100644 --- a/src/core/load-dropin.c +++ b/src/core/load-dropin.c @@ -92,7 +92,7 @@ static int process_deps(Unit *u, UnitDependency dependency, const char *dir_suff log_unit_warning(u, "%s dependency dropin %s target %s has different name", unit_dependency_to_string(dependency), *p, target); - r = unit_add_dependency_by_name(u, dependency, entry, *p, true, UNIT_DEPENDENCY_FILE); + r = unit_add_dependency_by_name(u, dependency, entry, true, UNIT_DEPENDENCY_FILE); if (r < 0) log_unit_warning_errno(u, r, "Cannot add %s dependency on %s, ignoring: %m", unit_dependency_to_string(dependency), entry); diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 15fb47838c..cdbc67f885 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -57,6 +57,8 @@ $1.SyslogFacility, config_parse_log_facility, 0, $1.SyslogLevel, config_parse_log_level, 0, offsetof($1, exec_context.syslog_priority) $1.SyslogLevelPrefix, config_parse_bool, 0, offsetof($1, exec_context.syslog_level_prefix) $1.LogLevelMax, config_parse_log_level, 0, offsetof($1, exec_context.log_level_max) +$1.LogRateLimitIntervalSec, config_parse_sec, 0, offsetof($1, exec_context.log_rate_limit_interval_usec) +$1.LogRateLimitBurst, config_parse_unsigned, 0, offsetof($1, exec_context.log_rate_limit_burst) $1.LogExtraFields, config_parse_log_extra_fields, 0, offsetof($1, exec_context) $1.Capabilities, config_parse_warn_compat, DISABLED_LEGACY, offsetof($1, exec_context) $1.SecureBits, config_parse_exec_secure_bits, 0, offsetof($1, exec_context.secure_bits) @@ -151,7 +153,9 @@ m4_define(`KILL_CONTEXT_CONFIG_ITEMS', `$1.SendSIGKILL, config_parse_bool, 0, offsetof($1, kill_context.send_sigkill) $1.SendSIGHUP, config_parse_bool, 0, offsetof($1, kill_context.send_sighup) $1.KillMode, config_parse_kill_mode, 0, offsetof($1, kill_context.kill_mode) -$1.KillSignal, config_parse_signal, 0, offsetof($1, kill_context.kill_signal)' +$1.KillSignal, config_parse_signal, 0, offsetof($1, kill_context.kill_signal) +$1.FinalKillSignal, config_parse_signal, 0, offsetof($1, kill_context.final_kill_signal) +$1.WatchdogSignal, config_parse_signal, 0, offsetof($1, kill_context.watchdog_signal)' )m4_dnl m4_define(`CGROUP_CONTEXT_CONFIG_ITEMS', `$1.Slice, config_parse_unit_slice, 0, 0 @@ -162,6 +166,7 @@ $1.CPUShares, config_parse_cpu_shares, 0, $1.StartupCPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.startup_cpu_shares) $1.CPUQuota, config_parse_cpu_quota, 0, offsetof($1, cgroup_context) $1.MemoryAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.memory_accounting) +$1.MemoryMin, config_parse_memory_limit, 0, offsetof($1, cgroup_context) $1.MemoryLow, config_parse_memory_limit, 0, offsetof($1, cgroup_context) $1.MemoryHigh, config_parse_memory_limit, 0, offsetof($1, cgroup_context) $1.MemoryMax, config_parse_memory_limit, 0, offsetof($1, cgroup_context) @@ -177,6 +182,7 @@ $1.IOReadBandwidthMax, config_parse_io_limit, 0, $1.IOWriteBandwidthMax, config_parse_io_limit, 0, offsetof($1, cgroup_context) $1.IOReadIOPSMax, config_parse_io_limit, 0, offsetof($1, cgroup_context) $1.IOWriteIOPSMax, config_parse_io_limit, 0, offsetof($1, cgroup_context) +$1.IODeviceLatencyTargetSec, config_parse_io_device_latency, 0, offsetof($1, cgroup_context) $1.BlockIOAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.blockio_accounting) $1.BlockIOWeight, config_parse_blockio_weight, 0, offsetof($1, cgroup_context.blockio_weight) $1.StartupBlockIOWeight, config_parse_blockio_weight, 0, offsetof($1, cgroup_context.startup_blockio_weight) @@ -186,6 +192,7 @@ $1.BlockIOWriteBandwidth, config_parse_blockio_bandwidth, 0, $1.TasksAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.tasks_accounting) $1.TasksMax, config_parse_tasks_max, 0, offsetof($1, cgroup_context.tasks_max) $1.Delegate, config_parse_delegate, 0, offsetof($1, cgroup_context) +$1.DisableControllers, config_parse_disable_controllers, 0, offsetof($1, cgroup_context) $1.IPAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.ip_accounting) $1.IPAddressAllow, config_parse_ip_address_access, 0, offsetof($1, cgroup_context.ip_address_allow) $1.IPAddressDeny, config_parse_ip_address_access, 0, offsetof($1, cgroup_context.ip_address_deny) @@ -233,6 +240,8 @@ Unit.StartLimitBurst, config_parse_unsigned, 0, Unit.StartLimitAction, config_parse_emergency_action, 0, offsetof(Unit, start_limit_action) Unit.FailureAction, config_parse_emergency_action, 0, offsetof(Unit, failure_action) Unit.SuccessAction, config_parse_emergency_action, 0, offsetof(Unit, success_action) +Unit.FailureActionExitStatus, config_parse_exit_status, 0, offsetof(Unit, failure_action_exit_status) +Unit.SuccessActionExitStatus, config_parse_exit_status, 0, offsetof(Unit, success_action_exit_status) Unit.RebootArgument, config_parse_unit_string_printf, 0, offsetof(Unit, reboot_arg) Unit.ConditionPathExists, config_parse_unit_condition_path, CONDITION_PATH_EXISTS, offsetof(Unit, conditions) Unit.ConditionPathExistsGlob, config_parse_unit_condition_path, CONDITION_PATH_EXISTS_GLOB, offsetof(Unit, conditions) @@ -282,7 +291,7 @@ Unit.AssertControlGroupController, config_parse_unit_condition_string, CONDI Unit.AssertNull, config_parse_unit_condition_null, 0, offsetof(Unit, asserts) Unit.CollectMode, config_parse_collect_mode, 0, offsetof(Unit, collect_mode) m4_dnl -Service.PIDFile, config_parse_unit_path_printf, 0, offsetof(Service, pid_file) +Service.PIDFile, config_parse_pid_file, 0, offsetof(Service, pid_file) Service.ExecStartPre, config_parse_exec, SERVICE_EXEC_START_PRE, offsetof(Service, exec_command) Service.ExecStart, config_parse_exec, SERVICE_EXEC_START, offsetof(Service, exec_command) Service.ExecStartPost, config_parse_exec, SERVICE_EXEC_START_POST, offsetof(Service, exec_command) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index d9a5094aa0..4ebe92fd45 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -34,21 +34,20 @@ #include "hexdecoct.h" #include "io-util.h" #include "ioprio.h" +#include "ip-protocol-list.h" #include "journal-util.h" #include "load-fragment.h" #include "log.h" #include "missing.h" -#include "mount-util.h" +#include "mountpoint-util.h" #include "parse-util.h" #include "path-util.h" #include "process-util.h" #if HAVE_SECCOMP #include "seccomp-util.h" #endif -#include "securebits.h" #include "securebits-util.h" #include "signal-util.h" -#include "socket-protocol-list.h" #include "stat-util.h" #include "string-util.h" #include "strv.h" @@ -57,26 +56,22 @@ #include "user-util.h" #include "web-util.h" -static int supported_socket_protocol_from_string(const char *s) { +static int parse_socket_protocol(const char *s) { int r; - if (isempty(s)) - return IPPROTO_IP; - - r = socket_protocol_from_name(s); + r = parse_ip_protocol(s); if (r < 0) - return -EINVAL; + return r; if (!IN_SET(r, IPPROTO_UDPLITE, IPPROTO_SCTP)) return -EPROTONOSUPPORT; return r; } -DEFINE_CONFIG_PARSE(config_parse_socket_protocol, supported_socket_protocol_from_string, "Failed to parse socket protocol"); +DEFINE_CONFIG_PARSE(config_parse_socket_protocol, parse_socket_protocol, "Failed to parse socket protocol"); DEFINE_CONFIG_PARSE(config_parse_exec_secure_bits, secure_bits_from_string, "Failed to parse secure bits"); DEFINE_CONFIG_PARSE_ENUM(config_parse_collect_mode, collect_mode, CollectMode, "Failed to parse garbage collection mode"); DEFINE_CONFIG_PARSE_ENUM(config_parse_device_policy, cgroup_device_policy, CGroupDevicePolicy, "Failed to parse device policy"); -DEFINE_CONFIG_PARSE_ENUM(config_parse_emergency_action, emergency_action, EmergencyAction, "Failed to parse failure action specifier"); DEFINE_CONFIG_PARSE_ENUM(config_parse_exec_keyring_mode, exec_keyring_mode, ExecKeyringMode, "Failed to parse keyring mode"); DEFINE_CONFIG_PARSE_ENUM(config_parse_exec_utmp_mode, exec_utmp_mode, ExecUtmpMode, "Failed to parse utmp mode"); DEFINE_CONFIG_PARSE_ENUM(config_parse_job_mode, job_mode, JobMode, "Failed to parse job mode"); @@ -135,7 +130,7 @@ int config_parse_unit_deps( continue; } - r = unit_add_dependency_by_name(u, d, k, NULL, true, UNIT_DEPENDENCY_FILE); + r = unit_add_dependency_by_name(u, d, k, true, UNIT_DEPENDENCY_FILE); if (r < 0) log_syntax(unit, LOG_ERR, filename, line, r, "Failed to add dependency on %s, ignoring: %m", k); } @@ -1015,6 +1010,17 @@ int config_parse_exec_output( eo = EXEC_OUTPUT_FILE; + } else if ((n = startswith(rvalue, "append:"))) { + + r = unit_full_printf(u, n, &resolved); + if (r < 0) + return log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s: %m", n); + + r = path_simplify_and_warn(resolved, PATH_CHECK_ABSOLUTE | PATH_CHECK_FATAL, unit, filename, line, lvalue); + if (r < 0) + return -ENOEXEC; + + eo = EXEC_OUTPUT_FILE_APPEND; } else { eo = exec_output_from_string(rvalue); if (eo < 0) { @@ -1557,7 +1563,7 @@ int config_parse_trigger_unit( return 0; } - r = unit_add_two_dependencies_by_name(u, UNIT_BEFORE, UNIT_TRIGGERS, p, NULL, true, UNIT_DEPENDENCY_FILE); + r = unit_add_two_dependencies_by_name(u, UNIT_BEFORE, UNIT_TRIGGERS, p, true, UNIT_DEPENDENCY_FILE); if (r < 0) { log_syntax(unit, LOG_ERR, filename, line, r, "Failed to add trigger on %s, ignoring: %m", p); return 0; @@ -1755,11 +1761,11 @@ int config_parse_service_sockets( continue; } - r = unit_add_two_dependencies_by_name(UNIT(s), UNIT_WANTS, UNIT_AFTER, k, NULL, true, UNIT_DEPENDENCY_FILE); + r = unit_add_two_dependencies_by_name(UNIT(s), UNIT_WANTS, UNIT_AFTER, k, true, UNIT_DEPENDENCY_FILE); if (r < 0) log_syntax(unit, LOG_ERR, filename, line, r, "Failed to add dependency on %s, ignoring: %m", k); - r = unit_add_dependency_by_name(UNIT(s), UNIT_TRIGGERED_BY, k, NULL, true, UNIT_DEPENDENCY_FILE); + r = unit_add_dependency_by_name(UNIT(s), UNIT_TRIGGERED_BY, k, true, UNIT_DEPENDENCY_FILE); if (r < 0) log_syntax(unit, LOG_ERR, filename, line, r, "Failed to add dependency on %s, ignoring: %m", k); } @@ -2861,8 +2867,8 @@ int config_parse_address_families( } af = af_from_name(word); - if (af <= 0) { - log_syntax(unit, LOG_ERR, filename, line, 0, + if (af < 0) { + log_syntax(unit, LOG_ERR, filename, line, af, "Failed to parse address family, ignoring: %s", word); continue; } @@ -3002,13 +3008,13 @@ int config_parse_cpu_quota( return 0; } - r = parse_percent_unbounded(rvalue); + r = parse_permille_unbounded(rvalue); if (r <= 0) { log_syntax(unit, LOG_ERR, filename, line, r, "Invalid CPU quota '%s', ignoring.", rvalue); return 0; } - c->cpu_quota_per_sec_usec = ((usec_t) r * USEC_PER_SEC) / 100U; + c->cpu_quota_per_sec_usec = ((usec_t) r * USEC_PER_SEC) / 1000U; return 0; } @@ -3030,7 +3036,7 @@ int config_parse_memory_limit( if (!isempty(rvalue) && !streq(rvalue, "infinity")) { - r = parse_percent(rvalue); + r = parse_permille(rvalue); if (r < 0) { r = parse_size(rvalue, 1024, &bytes); if (r < 0) { @@ -3038,7 +3044,7 @@ int config_parse_memory_limit( return 0; } } else - bytes = physical_memory_scale(r, 100U); + bytes = physical_memory_scale(r, 1000U); if (bytes >= UINT64_MAX || (bytes <= 0 && !streq(lvalue, "MemorySwapMax"))) { @@ -3047,7 +3053,9 @@ int config_parse_memory_limit( } } - if (streq(lvalue, "MemoryLow")) + if (streq(lvalue, "MemoryMin")) + c->memory_min = bytes; + else if (streq(lvalue, "MemoryLow")) c->memory_low = bytes; else if (streq(lvalue, "MemoryHigh")) c->memory_high = bytes; @@ -3080,7 +3088,7 @@ int config_parse_tasks_max( int r; if (isempty(rvalue)) { - *tasks_max = u->manager->default_tasks_max; + *tasks_max = u ? u->manager->default_tasks_max : UINT64_MAX; return 0; } @@ -3089,7 +3097,7 @@ int config_parse_tasks_max( return 0; } - r = parse_percent(rvalue); + r = parse_permille(rvalue); if (r < 0) { r = safe_atou64(rvalue, &v); if (r < 0) { @@ -3097,7 +3105,7 @@ int config_parse_tasks_max( return 0; } } else - v = system_tasks_max_scale(r, 100U); + v = system_tasks_max_scale(r, 1000U); if (v <= 0 || v >= UINT64_MAX) { log_syntax(unit, LOG_ERR, filename, line, 0, "Maximum tasks value '%s' out of range, ignoring.", rvalue); @@ -3199,7 +3207,6 @@ int config_parse_device_allow( _cleanup_free_ char *path = NULL, *resolved = NULL; CGroupContext *c = data; - CGroupDeviceAllow *a; const char *p = rvalue; int r; @@ -3231,7 +3238,7 @@ int config_parse_device_allow( return 0; } - if (!startswith(resolved, "block-") && !startswith(resolved, "char-")) { + if (!STARTSWITH_SET(resolved, "block-", "char-")) { r = path_simplify_and_warn(resolved, 0, unit, filename, line, lvalue); if (r < 0) @@ -3248,17 +3255,7 @@ int config_parse_device_allow( return 0; } - a = new0(CGroupDeviceAllow, 1); - if (!a) - return log_oom(); - - a->path = TAKE_PTR(resolved); - a->r = isempty(p) || !!strchr(p, 'r'); - a->w = isempty(p) || !!strchr(p, 'w'); - a->m = isempty(p) || !!strchr(p, 'm'); - - LIST_PREPEND(device_allow, c->device_allow, a); - return 0; + return cgroup_add_device_allow(c, resolved, p); } int config_parse_io_device_weight( @@ -3335,6 +3332,77 @@ int config_parse_io_device_weight( return 0; } +int config_parse_io_device_latency( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_free_ char *path = NULL, *resolved = NULL; + CGroupIODeviceLatency *l; + CGroupContext *c = data; + const char *p = rvalue; + usec_t usec; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + if (isempty(rvalue)) { + while (c->io_device_latencies) + cgroup_context_free_io_device_latency(c, c->io_device_latencies); + + return 0; + } + + r = extract_first_word(&p, &path, NULL, EXTRACT_QUOTES); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Invalid syntax, ignoring: %s", rvalue); + return 0; + } + if (r == 0 || isempty(p)) { + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Failed to extract device path and latency from '%s', ignoring.", rvalue); + return 0; + } + + r = unit_full_printf(userdata, path, &resolved); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to resolve unit specifiers in '%s', ignoring: %m", path); + return 0; + } + + r = path_simplify_and_warn(resolved, 0, unit, filename, line, lvalue); + if (r < 0) + return 0; + + if (parse_sec(p, &usec) < 0) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse timer value, ignoring: %s", p); + return 0; + } + + l = new0(CGroupIODeviceLatency, 1); + if (!l) + return log_oom(); + + l->path = TAKE_PTR(resolved); + l->target_usec = usec; + + LIST_PREPEND(device_latencies, c->io_device_latencies, l); + return 0; +} + int config_parse_io_limit( const char *unit, const char *filename, @@ -3904,13 +3972,9 @@ int config_parse_temporary_filesystems( if (r < 0) continue; - r = temporary_filesystem_add(&c->temporary_filesystems, &c->n_temporary_filesystems, path, w); - if (r == -ENOMEM) + r = temporary_filesystem_add(&c->temporary_filesystems, &c->n_temporary_filesystems, resolved, w); + if (r < 0) return log_oom(); - if (r < 0) { - log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse mount options, ignoring: %s", word); - continue; - } } } @@ -4116,6 +4180,183 @@ int config_parse_job_running_timeout_sec( return 0; } +int config_parse_emergency_action( + const char* unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + Manager *m = NULL; + EmergencyAction *x = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + if (unit) + m = ((Unit*) userdata)->manager; + else + m = data; + + r = parse_emergency_action(rvalue, MANAGER_IS_SYSTEM(m), x); + if (r < 0) { + if (r == -EOPNOTSUPP && MANAGER_IS_USER(m)) { + /* Compat mode: remove for systemd 241. */ + + log_syntax(unit, LOG_INFO, filename, line, r, + "%s= in user mode specified as \"%s\", using \"exit-force\" instead.", + lvalue, rvalue); + *x = EMERGENCY_ACTION_EXIT_FORCE; + return 0; + } + + if (r == -EOPNOTSUPP) + log_syntax(unit, LOG_ERR, filename, line, r, + "%s= specified as %s mode action, ignoring: %s", + lvalue, MANAGER_IS_SYSTEM(m) ? "user" : "system", rvalue); + else + log_syntax(unit, LOG_ERR, filename, line, r, + "Failed to parse %s=, ignoring: %s", lvalue, rvalue); + return 0; + } + + return 0; +} + +int config_parse_pid_file( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_free_ char *k = NULL, *n = NULL; + Unit *u = userdata; + char **s = data; + const char *e; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(u); + + r = unit_full_printf(u, rvalue, &k); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue); + return 0; + } + + /* If this is a relative path make it absolute by prefixing the /run */ + n = path_make_absolute(k, u->manager->prefix[EXEC_DIRECTORY_RUNTIME]); + if (!n) + return log_oom(); + + /* Check that the result is a sensible path */ + r = path_simplify_and_warn(n, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue); + if (r < 0) + return r; + + e = path_startswith(n, "/var/run/"); + if (e) { + char *z; + + z = strjoin("/run/", e); + if (!z) + return log_oom(); + + log_syntax(unit, LOG_NOTICE, filename, line, 0, "PIDFile= references path below legacy directory /var/run/, updating %s → %s; please update the unit file accordingly.", n, z); + + free_and_replace(*s, z); + } else + free_and_replace(*s, n); + + return 0; +} + +int config_parse_exit_status( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + int *exit_status = data, r; + uint8_t u; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(exit_status); + + if (isempty(rvalue)) { + *exit_status = -1; + return 0; + } + + r = safe_atou8(rvalue, &u); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse exit status '%s', ignoring: %m", rvalue); + return 0; + } + + *exit_status = u; + return 0; +} + +int config_parse_disable_controllers( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + int r; + CGroupContext *c = data; + CGroupMask disabled_mask; + + /* 1. If empty, make all controllers eligible for use again. + * 2. If non-empty, merge all listed controllers, space separated. */ + + if (isempty(rvalue)) { + c->disable_controllers = 0; + return 0; + } + + r = cg_mask_from_string(rvalue, &disabled_mask); + if (r < 0 || disabled_mask <= 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Invalid cgroup string: %s, ignoring", rvalue); + return 0; + } + + c->disable_controllers |= disabled_mask; + + return 0; +} + #define FOLLOW_MAX 8 static int open_follow(char **filename, FILE **_f, Set *names, char **_final) { @@ -4174,7 +4415,7 @@ static int open_follow(char **filename, FILE **_f, Set *names, char **_final) { free_and_replace(*filename, target); } - f = fdopen(fd, "re"); + f = fdopen(fd, "r"); if (!f) { safe_close(fd); return -errno; @@ -4290,7 +4531,6 @@ static int load_from_path(Unit *u, const char *path) { r = open_follow(&filename, &f, symlink_names, &id); if (r >= 0) break; - filename = mfree(filename); /* ENOENT means that the file is missing or is a dangling symlink. * ENOTDIR means that one of paths we expect to be is a directory @@ -4302,6 +4542,7 @@ static int load_from_path(Unit *u, const char *path) { else if (!IN_SET(r, -ENOENT, -ENOTDIR)) return r; + filename = mfree(filename); /* Empty the symlink names for the next run */ set_clear_free(symlink_names); } @@ -4524,6 +4765,7 @@ void unit_dump_config_items(FILE *f) { { config_parse_device_policy, "POLICY" }, { config_parse_io_limit, "LIMIT" }, { config_parse_io_device_weight, "DEVICEWEIGHT" }, + { config_parse_io_device_latency, "DEVICELATENCY" }, { config_parse_blockio_bandwidth, "BANDWIDTH" }, { config_parse_blockio_weight, "WEIGHT" }, { config_parse_blockio_device_weight, "DEVICEWEIGHT" }, diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index dad281ef72..e0d3b4ec3b 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -39,6 +39,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_exec_cpu_affinity); CONFIG_PARSER_PROTOTYPE(config_parse_exec_secure_bits); CONFIG_PARSER_PROTOTYPE(config_parse_capability_set); CONFIG_PARSER_PROTOTYPE(config_parse_kill_signal); +CONFIG_PARSER_PROTOTYPE(config_parse_final_kill_signal); CONFIG_PARSER_PROTOTYPE(config_parse_exec_mount_flags); CONFIG_PARSER_PROTOTYPE(config_parse_timer); CONFIG_PARSER_PROTOTYPE(config_parse_trigger_unit); @@ -68,12 +69,12 @@ CONFIG_PARSER_PROTOTYPE(config_parse_tasks_max); CONFIG_PARSER_PROTOTYPE(config_parse_delegate); CONFIG_PARSER_PROTOTYPE(config_parse_device_policy); CONFIG_PARSER_PROTOTYPE(config_parse_device_allow); +CONFIG_PARSER_PROTOTYPE(config_parse_io_device_latency); CONFIG_PARSER_PROTOTYPE(config_parse_io_device_weight); CONFIG_PARSER_PROTOTYPE(config_parse_io_limit); CONFIG_PARSER_PROTOTYPE(config_parse_blockio_weight); CONFIG_PARSER_PROTOTYPE(config_parse_blockio_device_weight); CONFIG_PARSER_PROTOTYPE(config_parse_blockio_bandwidth); -CONFIG_PARSER_PROTOTYPE(config_parse_netclass); CONFIG_PARSER_PROTOTYPE(config_parse_job_mode); CONFIG_PARSER_PROTOTYPE(config_parse_job_mode_isolate); CONFIG_PARSER_PROTOTYPE(config_parse_exec_selinux_context); @@ -102,6 +103,9 @@ CONFIG_PARSER_PROTOTYPE(config_parse_job_timeout_sec); CONFIG_PARSER_PROTOTYPE(config_parse_job_running_timeout_sec); CONFIG_PARSER_PROTOTYPE(config_parse_log_extra_fields); CONFIG_PARSER_PROTOTYPE(config_parse_collect_mode); +CONFIG_PARSER_PROTOTYPE(config_parse_pid_file); +CONFIG_PARSER_PROTOTYPE(config_parse_exit_status); +CONFIG_PARSER_PROTOTYPE(config_parse_disable_controllers); /* gperf prototypes */ const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, GPERF_LEN_TYPE length); diff --git a/src/core/locale-setup.c b/src/core/locale-setup.c index c14523fee9..584fb220a1 100644 --- a/src/core/locale-setup.c +++ b/src/core/locale-setup.c @@ -4,46 +4,43 @@ #include <stdlib.h> #include <string.h> +#include "env-file.h" #include "env-util.h" -#include "fileio.h" #include "locale-setup.h" #include "locale-util.h" +#include "proc-cmdline.h" #include "string-util.h" #include "strv.h" #include "util.h" #include "virt.h" int locale_setup(char ***environment) { - char **add; - char *variables[_VARIABLE_LC_MAX] = {}; - int r = 0, i; - - if (detect_container() <= 0) { - r = parse_env_file(NULL, "/proc/cmdline", WHITESPACE, - "locale.LANG", &variables[VARIABLE_LANG], - "locale.LANGUAGE", &variables[VARIABLE_LANGUAGE], - "locale.LC_CTYPE", &variables[VARIABLE_LC_CTYPE], - "locale.LC_NUMERIC", &variables[VARIABLE_LC_NUMERIC], - "locale.LC_TIME", &variables[VARIABLE_LC_TIME], - "locale.LC_COLLATE", &variables[VARIABLE_LC_COLLATE], - "locale.LC_MONETARY", &variables[VARIABLE_LC_MONETARY], - "locale.LC_MESSAGES", &variables[VARIABLE_LC_MESSAGES], - "locale.LC_PAPER", &variables[VARIABLE_LC_PAPER], - "locale.LC_NAME", &variables[VARIABLE_LC_NAME], - "locale.LC_ADDRESS", &variables[VARIABLE_LC_ADDRESS], - "locale.LC_TELEPHONE", &variables[VARIABLE_LC_TELEPHONE], - "locale.LC_MEASUREMENT", &variables[VARIABLE_LC_MEASUREMENT], - "locale.LC_IDENTIFICATION", &variables[VARIABLE_LC_IDENTIFICATION], - NULL); - - if (r < 0 && r != -ENOENT) - log_warning_errno(r, "Failed to read /proc/cmdline: %m"); - } - - /* Hmm, nothing set on the kernel cmd line? Then let's - * try /etc/locale.conf */ + _cleanup_(locale_variables_freep) char *variables[_VARIABLE_LC_MAX] = {}; + _cleanup_strv_free_ char **add = NULL; + LocaleVariable i; + int r; + + r = proc_cmdline_get_key_many(PROC_CMDLINE_STRIP_RD_PREFIX, + "locale.LANG", &variables[VARIABLE_LANG], + "locale.LANGUAGE", &variables[VARIABLE_LANGUAGE], + "locale.LC_CTYPE", &variables[VARIABLE_LC_CTYPE], + "locale.LC_NUMERIC", &variables[VARIABLE_LC_NUMERIC], + "locale.LC_TIME", &variables[VARIABLE_LC_TIME], + "locale.LC_COLLATE", &variables[VARIABLE_LC_COLLATE], + "locale.LC_MONETARY", &variables[VARIABLE_LC_MONETARY], + "locale.LC_MESSAGES", &variables[VARIABLE_LC_MESSAGES], + "locale.LC_PAPER", &variables[VARIABLE_LC_PAPER], + "locale.LC_NAME", &variables[VARIABLE_LC_NAME], + "locale.LC_ADDRESS", &variables[VARIABLE_LC_ADDRESS], + "locale.LC_TELEPHONE", &variables[VARIABLE_LC_TELEPHONE], + "locale.LC_MEASUREMENT", &variables[VARIABLE_LC_MEASUREMENT], + "locale.LC_IDENTIFICATION", &variables[VARIABLE_LC_IDENTIFICATION]); + if (r < 0 && r != -ENOENT) + log_warning_errno(r, "Failed to read /proc/cmdline: %m"); + + /* Hmm, nothing set on the kernel cmd line? Then let's try /etc/locale.conf */ if (r <= 0) { - r = parse_env_file(NULL, "/etc/locale.conf", NEWLINE, + r = parse_env_file(NULL, "/etc/locale.conf", "LANG", &variables[VARIABLE_LANG], "LANGUAGE", &variables[VARIABLE_LANGUAGE], "LC_CTYPE", &variables[VARIABLE_LC_CTYPE], @@ -57,14 +54,11 @@ int locale_setup(char ***environment) { "LC_ADDRESS", &variables[VARIABLE_LC_ADDRESS], "LC_TELEPHONE", &variables[VARIABLE_LC_TELEPHONE], "LC_MEASUREMENT", &variables[VARIABLE_LC_MEASUREMENT], - "LC_IDENTIFICATION", &variables[VARIABLE_LC_IDENTIFICATION], - NULL); - + "LC_IDENTIFICATION", &variables[VARIABLE_LC_IDENTIFICATION]); if (r < 0 && r != -ENOENT) log_warning_errno(r, "Failed to read /etc/locale.conf: %m"); } - add = NULL; for (i = 0; i < _VARIABLE_LC_MAX; i++) { char *s; @@ -72,36 +66,32 @@ int locale_setup(char ***environment) { continue; s = strjoin(locale_variable_to_string(i), "=", variables[i]); - if (!s) { - r = -ENOMEM; - goto finish; - } + if (!s) + return -ENOMEM; - if (strv_consume(&add, s) < 0) { - r = -ENOMEM; - goto finish; - } + if (strv_consume(&add, s) < 0) + return -ENOMEM; } - if (!strv_isempty(add)) { - char **e; + if (strv_isempty(add)) { + /* If no locale is configured then default to C.UTF-8. */ - e = strv_env_merge(2, *environment, add); - if (!e) { - r = -ENOMEM; - goto finish; - } - - strv_free_and_replace(*environment, e); + add = strv_new("LANG=C.UTF-8"); + if (!add) + return -ENOMEM; } - r = 0; + if (strv_isempty(*environment)) + strv_free_and_replace(*environment, add); + else { + char **merged; -finish: - strv_free(add); + merged = strv_env_merge(2, *environment, add); + if (!merged) + return -ENOMEM; - for (i = 0; i < _VARIABLE_LC_MAX; i++) - free(variables[i]); + strv_free_and_replace(*environment, merged); + } - return r; + return 0; } diff --git a/src/core/loopback-setup.c b/src/core/loopback-setup.c index 835553ec8f..f613db83ce 100644 --- a/src/core/loopback-setup.c +++ b/src/core/loopback-setup.c @@ -53,7 +53,7 @@ static int start_loopback(sd_netlink *rtnl, struct state *s) { if (r < 0) return r; - r = sd_netlink_call_async(rtnl, req, generic_handler, s, LOOPBACK_SETUP_TIMEOUT_USEC, NULL); + r = sd_netlink_call_async(rtnl, NULL, req, generic_handler, NULL, s, LOOPBACK_SETUP_TIMEOUT_USEC, "systemd-start-loopback"); if (r < 0) return r; @@ -88,7 +88,7 @@ static int add_ipv4_address(sd_netlink *rtnl, struct state *s) { if (r < 0) return r; - r = sd_netlink_call_async(rtnl, req, generic_handler, s, USEC_INFINITY, NULL); + r = sd_netlink_call_async(rtnl, NULL, req, generic_handler, NULL, s, USEC_INFINITY, "systemd-loopback-ipv4"); if (r < 0) return r; @@ -123,7 +123,7 @@ static int add_ipv6_address(sd_netlink *rtnl, struct state *s) { if (r < 0) return r; - r = sd_netlink_call_async(rtnl, req, generic_handler, s, USEC_INFINITY, NULL); + r = sd_netlink_call_async(rtnl, NULL, req, generic_handler, NULL, s, USEC_INFINITY, "systemd-loopback-ipv6"); if (r < 0) return r; diff --git a/src/core/machine-id-setup.c b/src/core/machine-id-setup.c index 11528f83c4..aae548064e 100644 --- a/src/core/machine-id-setup.c +++ b/src/core/machine-id-setup.c @@ -15,7 +15,7 @@ #include "machine-id-setup.h" #include "macro.h" #include "mkdir.h" -#include "mount-util.h" +#include "mountpoint-util.h" #include "path-util.h" #include "process-util.h" #include "stat-util.h" @@ -73,7 +73,7 @@ static int generate_machine_id(const char *root, sd_id128_t *ret) { /* If that didn't work, generate a random machine id */ r = sd_id128_randomize(ret); if (r < 0) - return log_error_errno(r, "Failed to generate randomized : %m"); + return log_error_errno(r, "Failed to generate randomized machine ID: %m"); log_info("Initializing machine ID from random generator."); return 0; @@ -108,8 +108,7 @@ int machine_id_setup(const char *root, sd_id128_t machine_id, sd_id128_t *ret) { "2) /etc/machine-id exists and is empty.\n" "3) /etc/machine-id is missing and /etc is writable.\n"); else - return log_error_errno(errno, - "Cannot open %s: %m", etc_machine_id); + return log_error_errno(errno, "Cannot open %s: %m", etc_machine_id); } writable = false; @@ -201,14 +200,14 @@ int machine_id_commit(const char *root) { r = fd_is_temporary_fs(fd); if (r < 0) return log_error_errno(r, "Failed to determine whether %s is on a temporary file system: %m", etc_machine_id); - if (r == 0) { - log_error("%s is not on a temporary file system.", etc_machine_id); - return -EROFS; - } + if (r == 0) + return log_error_errno(SYNTHETIC_ERRNO(EROFS), + "%s is not on a temporary file system.", + etc_machine_id); r = id128_read_fd(fd, ID128_PLAIN, &id); if (r < 0) - return log_error_errno(r, "We didn't find a valid machine ID in %s.", etc_machine_id); + return log_error_errno(r, "We didn't find a valid machine ID in %s: %m", etc_machine_id); fd = safe_close(fd); diff --git a/src/core/macros.systemd.in b/src/core/macros.systemd.in index f3b74f4273..9ccad5ebfe 100644 --- a/src/core/macros.systemd.in +++ b/src/core/macros.systemd.in @@ -2,8 +2,6 @@ # SPDX-License-Identifier: LGPL-2.1+ # # This file is part of systemd. -# -# Copyright 2012 Lennart Poettering # RPM macros for packages installing systemd unit files @@ -18,7 +16,7 @@ %_sysctldir @sysctldir@ %_sysusersdir @sysusersdir@ %_tmpfilesdir @tmpfilesdir@ -%_environmnentdir @environmentdir@ +%_environmentdir @environmentdir@ %_modulesloaddir @modulesloaddir@ %_modprobedir @modprobedir@ %_systemdgeneratordir @systemgeneratordir@ @@ -26,6 +24,10 @@ %_systemd_system_env_generator_dir @systemenvgeneratordir@ %_systemd_user_env_generator_dir @userenvgeneratordir@ +# Because we had one release with a typo... +# This is temporary (Remove after systemd 240 is released) +%_environmnentdir %{warn:Use %%_environmentdir instead}%_environmentdir + %systemd_requires \ Requires(post): systemd \ Requires(preun): systemd \ diff --git a/src/core/main.c b/src/core/main.c index 44dd8348be..839dc062ff 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -28,6 +28,7 @@ #include "bus-error.h" #include "bus-util.h" #include "capability-util.h" +#include "cgroup-util.h" #include "clock-util.h" #include "conf-parser.h" #include "cpu-set-util.h" @@ -57,6 +58,7 @@ #include "pager.h" #include "parse-util.h" #include "path-util.h" +#include "pretty-print.h" #include "proc-cmdline.h" #include "process-util.h" #include "raw-clone.h" @@ -73,6 +75,7 @@ #include "stdio-util.h" #include "strv.h" #include "switch-root.h" +#include "sysctl-util.h" #include "terminal-util.h" #include "umask-util.h" #include "user-util.h" @@ -95,11 +98,10 @@ static int arg_crash_chvt = -1; static bool arg_crash_shell = false; static bool arg_crash_reboot = false; static char *arg_confirm_spawn = NULL; -static ShowStatus arg_show_status = _SHOW_STATUS_UNSET; +static ShowStatus arg_show_status = _SHOW_STATUS_INVALID; static bool arg_switched_root = false; -static bool arg_no_pager = false; +static PagerFlags arg_pager_flags = 0; static bool arg_service_watchdogs = true; -static char ***arg_join_controllers = NULL; static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL; static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT; static usec_t arg_default_restart_usec = DEFAULT_RESTART_USEC; @@ -109,6 +111,7 @@ static usec_t arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL; static unsigned arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST; static usec_t arg_runtime_watchdog = 0; static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE; +static char *arg_early_core_pattern = NULL; static char *arg_watchdog_device = NULL; static char **arg_default_environment = NULL; static struct rlimit *arg_default_rlimit[_RLIMIT_MAX] = {}; @@ -118,7 +121,7 @@ static nsec_t arg_timer_slack_nsec = NSEC_INFINITY; static usec_t arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE; static Set* arg_syscall_archs = NULL; static FILE* arg_serialization = NULL; -static bool arg_default_cpu_accounting = false; +static int arg_default_cpu_accounting = -1; static bool arg_default_io_accounting = false; static bool arg_default_ip_accounting = false; static bool arg_default_blockio_accounting = false; @@ -128,7 +131,14 @@ static uint64_t arg_default_tasks_max = UINT64_MAX; static sd_id128_t arg_machine_id = {}; static EmergencyAction arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE; -_noreturn_ static void freeze_or_reboot(void) { +_noreturn_ static void freeze_or_exit_or_reboot(void) { + + /* If we are running in a contianer, let's prefer exiting, after all we can propagate an exit code to the + * container manager, and thus inform it that something went wrong. */ + if (detect_container() > 0) { + log_emergency("Exiting PID 1..."); + exit(EXIT_EXCEPTION); + } if (arg_crash_reboot) { log_notice("Rebooting in 10s..."); @@ -183,7 +193,7 @@ _noreturn_ static void crash(int sig) { (void) kill(pid, sig); /* raise() would kill the parent */ assert_not_reached("We shouldn't be here..."); - _exit(EXIT_FAILURE); + _exit(EXIT_EXCEPTION); } else { siginfo_t status; int r; @@ -226,17 +236,18 @@ _noreturn_ static void crash(int sig) { else if (pid == 0) { (void) setsid(); (void) make_console_stdio(); + (void) rlimit_nofile_safe(); (void) execle("/bin/sh", "/bin/sh", NULL, environ); log_emergency_errno(errno, "execle() failed: %m"); - _exit(EXIT_FAILURE); + _exit(EXIT_EXCEPTION); } else { log_info("Spawned crash shell as PID "PID_FMT".", pid); (void) wait_for_terminate(pid, NULL); } } - freeze_or_reboot(); + freeze_or_exit_or_reboot(); } static void install_crash_handler(void) { @@ -347,22 +358,35 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat r = value ? parse_boolean(value) : true; if (r < 0) - log_warning("Failed to parse dump core switch %s. Ignoring.", value); + log_warning_errno(r, "Failed to parse dump core switch %s, ignoring: %m", value); else arg_dump_core = r; + } else if (proc_cmdline_key_streq(key, "systemd.early_core_pattern")) { + + if (proc_cmdline_value_missing(key, value)) + return 0; + + if (path_is_absolute(value)) + (void) parse_path_argument_and_warn(value, false, &arg_early_core_pattern); + else + log_warning("Specified core pattern '%s' is not an absolute path, ignoring.", value); + } else if (proc_cmdline_key_streq(key, "systemd.crash_chvt")) { if (!value) arg_crash_chvt = 0; /* turn on */ - else if (parse_crash_chvt(value) < 0) - log_warning("Failed to parse crash chvt switch %s. Ignoring.", value); + else { + r = parse_crash_chvt(value); + if (r < 0) + log_warning_errno(r, "Failed to parse crash chvt switch %s, ignoring: %m", value); + } } else if (proc_cmdline_key_streq(key, "systemd.crash_shell")) { r = value ? parse_boolean(value) : true; if (r < 0) - log_warning("Failed to parse crash shell switch %s. Ignoring.", value); + log_warning_errno(r, "Failed to parse crash shell switch %s, ignoring: %m", value); else arg_crash_shell = r; @@ -370,7 +394,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat r = value ? parse_boolean(value) : true; if (r < 0) - log_warning("Failed to parse crash reboot switch %s. Ignoring.", value); + log_warning_errno(r, "Failed to parse crash reboot switch %s, ignoring: %m", value); else arg_crash_reboot = r; @@ -379,17 +403,15 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat r = parse_confirm_spawn(value, &s); if (r < 0) - log_warning_errno(r, "Failed to parse confirm_spawn switch %s. Ignoring.", value); - else { - free(arg_confirm_spawn); - arg_confirm_spawn = s; - } + log_warning_errno(r, "Failed to parse confirm_spawn switch %s, ignoring: %m", value); + else + free_and_replace(arg_confirm_spawn, s); } else if (proc_cmdline_key_streq(key, "systemd.service_watchdogs")) { r = value ? parse_boolean(value) : true; if (r < 0) - log_warning("Failed to parse service watchdog switch %s. Ignoring.", value); + log_warning_errno(r, "Failed to parse service watchdog switch %s, ignoring: %m", value); else arg_service_watchdogs = r; @@ -398,7 +420,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat if (value) { r = parse_show_status(value, &arg_show_status); if (r < 0) - log_warning("Failed to parse show status switch %s. Ignoring.", value); + log_warning_errno(r, "Failed to parse show status switch %s, ignoring: %m", value); } else arg_show_status = SHOW_STATUS_YES; @@ -409,7 +431,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat r = exec_output_from_string(value); if (r < 0) - log_warning("Failed to parse default standard output switch %s. Ignoring.", value); + log_warning_errno(r, "Failed to parse default standard output switch %s, ignoring: %m", value); else arg_default_std_output = r; @@ -420,7 +442,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat r = exec_output_from_string(value); if (r < 0) - log_warning("Failed to parse default standard error switch %s. Ignoring.", value); + log_warning_errno(r, "Failed to parse default standard error switch %s, ignoring: %m", value); else arg_default_std_error = r; @@ -447,7 +469,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat r = set_machine_id(value); if (r < 0) - log_warning("MachineID '%s' is not valid. Ignoring.", value); + log_warning_errno(r, "MachineID '%s' is not valid, ignoring: %m", value); } else if (proc_cmdline_key_streq(key, "systemd.default_timeout_start_sec")) { @@ -456,7 +478,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat r = parse_sec(value, &arg_default_timeout_start_usec); if (r < 0) - log_warning_errno(r, "Failed to parse default start timeout: %s, ignoring.", value); + log_warning_errno(r, "Failed to parse default start timeout '%s', ignoring: %m", value); if (arg_default_timeout_start_usec <= 0) arg_default_timeout_start_usec = USEC_INFINITY; @@ -466,11 +488,11 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat if (proc_cmdline_value_missing(key, value)) return 0; - parse_path_argument_and_warn(value, false, &arg_watchdog_device); + (void) parse_path_argument_and_warn(value, false, &arg_watchdog_device); } else if (streq(key, "quiet") && !value) { - if (arg_show_status == _SHOW_STATUS_UNSET) + if (arg_show_status == _SHOW_STATUS_INVALID) arg_show_status = SHOW_STATUS_AUTO; } else if (streq(key, "debug") && !value) { @@ -604,8 +626,8 @@ static int config_parse_output_restricted( return 0; } - if (IN_SET(t, EXEC_OUTPUT_SOCKET, EXEC_OUTPUT_NAMED_FD, EXEC_OUTPUT_FILE)) { - log_syntax(unit, LOG_ERR, filename, line, 0, "Standard output types socket, fd:, file: are not supported as defaults, ignoring: %s", rvalue); + if (IN_SET(t, EXEC_OUTPUT_SOCKET, EXEC_OUTPUT_NAMED_FD, EXEC_OUTPUT_FILE, EXEC_OUTPUT_FILE_APPEND)) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Standard output types socket, fd:, file:, append: are not supported as defaults, ignoring: %s", rvalue); return 0; } @@ -654,7 +676,7 @@ static int parse_config_file(void) { { "Manager", "CrashReboot", config_parse_bool, 0, &arg_crash_reboot }, { "Manager", "ShowStatus", config_parse_show_status, 0, &arg_show_status }, { "Manager", "CPUAffinity", config_parse_cpu_affinity2, 0, NULL }, - { "Manager", "JoinControllers", config_parse_join_controllers, 0, &arg_join_controllers }, + { "Manager", "JoinControllers", config_parse_warn_compat, DISABLED_CONFIGURATION, NULL }, { "Manager", "RuntimeWatchdogSec", config_parse_sec, 0, &arg_runtime_watchdog }, { "Manager", "ShutdownWatchdogSec", config_parse_sec, 0, &arg_shutdown_watchdog }, { "Manager", "WatchdogDevice", config_parse_path, 0, &arg_watchdog_device }, @@ -690,7 +712,7 @@ static int parse_config_file(void) { { "Manager", "DefaultLimitNICE", config_parse_rlimit, RLIMIT_NICE, arg_default_rlimit }, { "Manager", "DefaultLimitRTPRIO", config_parse_rlimit, RLIMIT_RTPRIO, arg_default_rlimit }, { "Manager", "DefaultLimitRTTIME", config_parse_rlimit, RLIMIT_RTTIME, arg_default_rlimit }, - { "Manager", "DefaultCPUAccounting", config_parse_bool, 0, &arg_default_cpu_accounting }, + { "Manager", "DefaultCPUAccounting", config_parse_tristate, 0, &arg_default_cpu_accounting }, { "Manager", "DefaultIOAccounting", config_parse_bool, 0, &arg_default_io_accounting }, { "Manager", "DefaultIPAccounting", config_parse_bool, 0, &arg_default_ip_accounting }, { "Manager", "DefaultBlockIOAccounting", config_parse_bool, 0, &arg_default_blockio_accounting }, @@ -739,7 +761,14 @@ static void set_manager_defaults(Manager *m) { m->default_restart_usec = arg_default_restart_usec; m->default_start_limit_interval = arg_default_start_limit_interval; m->default_start_limit_burst = arg_default_start_limit_burst; - m->default_cpu_accounting = arg_default_cpu_accounting; + + /* On 4.15+ with unified hierarchy, CPU accounting is essentially free as it doesn't require the CPU + * controller to be enabled, so the default is to enable it unless we got told otherwise. */ + if (arg_default_cpu_accounting >= 0) + m->default_cpu_accounting = arg_default_cpu_accounting; + else + m->default_cpu_accounting = cpu_accounting_is_cheap(); + m->default_io_accounting = arg_default_io_accounting; m->default_ip_accounting = arg_default_ip_accounting; m->default_blockio_accounting = arg_default_blockio_accounting; @@ -747,8 +776,10 @@ static void set_manager_defaults(Manager *m) { m->default_tasks_accounting = arg_default_tasks_accounting; m->default_tasks_max = arg_default_tasks_max; - manager_set_default_rlimits(m, arg_default_rlimit); - manager_environment_add(m, NULL, arg_default_environment); + (void) manager_set_default_rlimits(m, arg_default_rlimit); + + (void) manager_default_environment(m); + (void) manager_transient_environment_add(m, arg_default_environment); } static void set_manager_settings(Manager *m) { @@ -838,19 +869,15 @@ static int parse_argv(int argc, char *argv[]) { case ARG_LOG_LEVEL: r = log_set_max_level_from_string(optarg); - if (r < 0) { - log_error("Failed to parse log level %s.", optarg); - return r; - } + if (r < 0) + return log_error_errno(r, "Failed to parse log level \"%s\": %m", optarg); break; case ARG_LOG_TARGET: r = log_set_target_from_string(optarg); - if (r < 0) { - log_error("Failed to parse log target %s.", optarg); - return r; - } + if (r < 0) + return log_error_errno(r, "Failed to parse log target \"%s\": %m", optarg); break; @@ -858,10 +885,9 @@ static int parse_argv(int argc, char *argv[]) { if (optarg) { r = log_show_color_from_string(optarg); - if (r < 0) { - log_error("Failed to parse log color setting %s.", optarg); - return r; - } + if (r < 0) + return log_error_errno(r, "Failed to parse log color setting \"%s\": %m", + optarg); } else log_show_color(true); @@ -870,10 +896,9 @@ static int parse_argv(int argc, char *argv[]) { case ARG_LOG_LOCATION: if (optarg) { r = log_show_location_from_string(optarg); - if (r < 0) { - log_error("Failed to parse log location setting %s.", optarg); - return r; - } + if (r < 0) + return log_error_errno(r, "Failed to parse log location setting \"%s\": %m", + optarg); } else log_show_location(true); @@ -881,26 +906,24 @@ static int parse_argv(int argc, char *argv[]) { case ARG_DEFAULT_STD_OUTPUT: r = exec_output_from_string(optarg); - if (r < 0) { - log_error("Failed to parse default standard output setting %s.", optarg); - return r; - } else - arg_default_std_output = r; + if (r < 0) + return log_error_errno(r, "Failed to parse default standard output setting \"%s\": %m", + optarg); + arg_default_std_output = r; break; case ARG_DEFAULT_STD_ERROR: r = exec_output_from_string(optarg); - if (r < 0) { - log_error("Failed to parse default standard error output setting %s.", optarg); - return r; - } else - arg_default_std_error = r; + if (r < 0) + return log_error_errno(r, "Failed to parse default standard error output setting \"%s\": %m", + optarg); + arg_default_std_error = r; break; case ARG_UNIT: r = free_and_strdup(&arg_default_unit, optarg); if (r < 0) - return log_error_errno(r, "Failed to set default unit %s: %m", optarg); + return log_error_errno(r, "Failed to set default unit \"%s\": %m", optarg); break; @@ -917,7 +940,7 @@ static int parse_argv(int argc, char *argv[]) { break; case ARG_NO_PAGER: - arg_no_pager = true; + arg_pager_flags |= PAGER_DISABLE; break; case ARG_VERSION: @@ -938,7 +961,8 @@ static int parse_argv(int argc, char *argv[]) { else { r = parse_boolean(optarg); if (r < 0) - return log_error_errno(r, "Failed to parse dump core boolean: %s", optarg); + return log_error_errno(r, "Failed to parse dump core boolean: \"%s\": %m", + optarg); arg_dump_core = r; } break; @@ -946,7 +970,8 @@ static int parse_argv(int argc, char *argv[]) { case ARG_CRASH_CHVT: r = parse_crash_chvt(optarg); if (r < 0) - return log_error_errno(r, "Failed to parse crash virtual terminal index: %s", optarg); + return log_error_errno(r, "Failed to parse crash virtual terminal index: \"%s\": %m", + optarg); break; case ARG_CRASH_SHELL: @@ -955,7 +980,8 @@ static int parse_argv(int argc, char *argv[]) { else { r = parse_boolean(optarg); if (r < 0) - return log_error_errno(r, "Failed to parse crash shell boolean: %s", optarg); + return log_error_errno(r, "Failed to parse crash shell boolean: \"%s\": %m", + optarg); arg_crash_shell = r; } break; @@ -966,7 +992,8 @@ static int parse_argv(int argc, char *argv[]) { else { r = parse_boolean(optarg); if (r < 0) - return log_error_errno(r, "Failed to parse crash shell boolean: %s", optarg); + return log_error_errno(r, "Failed to parse crash shell boolean: \"%s\": %m", + optarg); arg_crash_reboot = r; } break; @@ -976,23 +1003,24 @@ static int parse_argv(int argc, char *argv[]) { r = parse_confirm_spawn(optarg, &arg_confirm_spawn); if (r < 0) - return log_error_errno(r, "Failed to parse confirm spawn option: %m"); + return log_error_errno(r, "Failed to parse confirm spawn option: \"%s\": %m", + optarg); break; case ARG_SERVICE_WATCHDOGS: r = parse_boolean(optarg); if (r < 0) - return log_error_errno(r, "Failed to parse service watchdogs boolean: %s", optarg); + return log_error_errno(r, "Failed to parse service watchdogs boolean: \"%s\": %m", + optarg); arg_service_watchdogs = r; break; case ARG_SHOW_STATUS: if (optarg) { r = parse_show_status(optarg, &arg_show_status); - if (r < 0) { - log_error("Failed to parse show status boolean %s.", optarg); - return r; - } + if (r < 0) + return log_error_errno(r, "Failed to parse show status boolean: \"%s\": %m", + optarg); } else arg_show_status = SHOW_STATUS_YES; break; @@ -1002,16 +1030,18 @@ static int parse_argv(int argc, char *argv[]) { FILE *f; r = safe_atoi(optarg, &fd); - if (r < 0 || fd < 0) { - log_error("Failed to parse deserialize option %s.", optarg); - return -EINVAL; - } + if (r < 0) + log_error_errno(r, "Failed to parse deserialize option \"%s\": %m", optarg); + if (fd < 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Invalid deserialize fd: %d", + fd); (void) fd_cloexec(fd, true); f = fdopen(fd, "r"); if (!f) - return log_error_errno(errno, "Failed to open serialization fd: %m"); + return log_error_errno(errno, "Failed to open serialization fd %d: %m", fd); safe_fclose(arg_serialization); arg_serialization = f; @@ -1026,7 +1056,7 @@ static int parse_argv(int argc, char *argv[]) { case ARG_MACHINE_ID: r = set_machine_id(optarg); if (r < 0) - return log_error_errno(r, "MachineID '%s' is not valid.", optarg); + return log_error_errno(r, "MachineID '%s' is not valid: %m", optarg); break; case 'h': @@ -1059,14 +1089,20 @@ static int parse_argv(int argc, char *argv[]) { /* Hmm, when we aren't run as init system * let's complain about excess arguments */ - log_error("Excess arguments."); - return -EINVAL; + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Excess arguments."); } return 0; } static int help(void) { + _cleanup_free_ char *link = NULL; + int r; + + r = terminal_urlify_man("systemd", "1", &link); + if (r < 0) + return log_oom(); printf("%s [OPTIONS...]\n\n" "Starts up and maintains the system or user services.\n\n" @@ -1090,20 +1126,28 @@ static int help(void) { " --log-color[=BOOL] Highlight important log messages\n" " --log-location[=BOOL] Include code location in log messages\n" " --default-standard-output= Set default standard output for services\n" - " --default-standard-error= Set default standard error output for services\n", - program_invocation_short_name); + " --default-standard-error= Set default standard error output for services\n" + "\nSee the %s for details.\n" + , program_invocation_short_name + , link + ); return 0; } -static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching_root) { +static int prepare_reexecute( + Manager *m, + FILE **ret_f, + FDSet **ret_fds, + bool switching_root) { + _cleanup_fdset_free_ FDSet *fds = NULL; _cleanup_fclose_ FILE *f = NULL; int r; assert(m); - assert(_f); - assert(_fds); + assert(ret_f); + assert(ret_fds); r = manager_open_serialization(m, &f); if (r < 0) @@ -1119,7 +1163,7 @@ static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching r = manager_serialize(m, f, fds, switching_root); if (r < 0) - return log_error_errno(r, "Failed to serialize state: %m"); + return r; if (fseeko(f, 0, SEEK_SET) == (off_t) -1) return log_error_errno(errno, "Failed to rewind serialization fd: %m"); @@ -1132,24 +1176,108 @@ static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching if (r < 0) return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization fds: %m"); - *_f = TAKE_PTR(f); - *_fds = TAKE_PTR(fds); + *ret_f = TAKE_PTR(f); + *ret_fds = TAKE_PTR(fds); return 0; } +static void bump_file_max_and_nr_open(void) { + + /* Let's bump fs.file-max and fs.nr_open to their respective maximums. On current kernels large numbers of file + * descriptors are no longer a performance problem and their memory is properly tracked by memcg, thus counting + * them and limiting them in another two layers of limits is unnecessary and just complicates things. This + * function hence turns off 2 of the 4 levels of limits on file descriptors, and makes RLIMIT_NOLIMIT (soft + + * hard) the only ones that really matter. */ + +#if BUMP_PROC_SYS_FS_FILE_MAX || BUMP_PROC_SYS_FS_NR_OPEN + _cleanup_free_ char *t = NULL; + int r; +#endif + +#if BUMP_PROC_SYS_FS_FILE_MAX + /* I so wanted to use STRINGIFY(ULONG_MAX) here, but alas we can't as glibc/gcc define that as + * "(0x7fffffffffffffffL * 2UL + 1UL)". Seriously. 😢 */ + if (asprintf(&t, "%lu\n", ULONG_MAX) < 0) { + log_oom(); + return; + } + + r = sysctl_write("fs/file-max", t); + if (r < 0) + log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.file-max, ignoring: %m"); +#endif + +#if BUMP_PROC_SYS_FS_FILE_MAX && BUMP_PROC_SYS_FS_NR_OPEN + t = mfree(t); +#endif + +#if BUMP_PROC_SYS_FS_NR_OPEN + int v = INT_MAX; + + /* Arg! The kernel enforces maximum and minimum values on the fs.nr_open, but we don't really know what they + * are. The expression by which the maximum is determined is dependent on the architecture, and is something we + * don't really want to copy to userspace, as it is dependent on implementation details of the kernel. Since + * the kernel doesn't expose the maximum value to us, we can only try and hope. Hence, let's start with + * INT_MAX, and then keep halving the value until we find one that works. Ugly? Yes, absolutely, but kernel + * APIs are kernel APIs, so what do can we do... 🤯 */ + + for (;;) { + int k; + + v &= ~(__SIZEOF_POINTER__ - 1); /* Round down to next multiple of the pointer size */ + if (v < 1024) { + log_warning("Can't bump fs.nr_open, value too small."); + break; + } + + k = read_nr_open(); + if (k < 0) { + log_error_errno(k, "Failed to read fs.nr_open: %m"); + break; + } + if (k >= v) { /* Already larger */ + log_debug("Skipping bump, value is already larger."); + break; + } + + if (asprintf(&t, "%i\n", v) < 0) { + log_oom(); + return; + } + + r = sysctl_write("fs/nr_open", t); + t = mfree(t); + if (r == -EINVAL) { + log_debug("Couldn't write fs.nr_open as %i, halving it.", v); + v /= 2; + continue; + } + if (r < 0) { + log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.nr_open, ignoring: %m"); + break; + } + + log_debug("Successfully bumped fs.nr_open to %i", v); + break; + } +#endif +} + static int bump_rlimit_nofile(struct rlimit *saved_rlimit) { int r, nr; assert(saved_rlimit); - /* Save the original RLIMIT_NOFILE so that we can reset it - * later when transitioning from the initrd to the main + /* Save the original RLIMIT_NOFILE so that we can reset it later when transitioning from the initrd to the main * systemd or suchlike. */ if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0) return log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m"); - /* Make sure forked processes get the default kernel setting */ + /* Get the underlying absolute limit the kernel enforces */ + nr = read_nr_open(); + + /* Make sure forked processes get limits based on the original kernel setting */ if (!arg_default_rlimit[RLIMIT_NOFILE]) { struct rlimit *rl; @@ -1157,11 +1285,25 @@ static int bump_rlimit_nofile(struct rlimit *saved_rlimit) { if (!rl) return log_oom(); + /* Bump the hard limit for system services to a substantially higher value. The default hard limit + * current kernels set is pretty low (4K), mostly for historical reasons. According to kernel + * developers, the fd handling in recent kernels has been optimized substantially enough, so that we + * can bump the limit now, without paying too high a price in memory or performance. Note however that + * we only bump the hard limit, not the soft limit. That's because select() works the way it works, and + * chokes on fds >= 1024. If we'd bump the soft limit globally, it might accidentally happen to + * unexpecting programs that they get fds higher than what they can process using select(). By only + * bumping the hard limit but leaving the low limit as it is we avoid this pitfall: programs that are + * written by folks aware of the select() problem in mind (and thus use poll()/epoll instead of + * select(), the way everybody should) can explicitly opt into high fds by bumping their soft limit + * beyond 1024, to the hard limit we pass. */ + if (arg_system) + rl->rlim_max = MIN((rlim_t) nr, MAX(rl->rlim_max, (rlim_t) HIGH_RLIMIT_NOFILE)); + arg_default_rlimit[RLIMIT_NOFILE] = rl; } - /* Bump up the resource limit for ourselves substantially, all the way to the maximum the kernel allows */ - nr = read_nr_open(); + /* Bump up the resource limit for ourselves substantially, all the way to the maximum the kernel allows, for + * both hard and soft. */ r = setrlimit_closest(RLIMIT_NOFILE, &RLIMIT_MAKE_CONST(nr)); if (r < 0) return log_warning_errno(r, "Setting RLIMIT_NOFILE failed, ignoring: %m"); @@ -1173,16 +1315,15 @@ static int bump_rlimit_memlock(struct rlimit *saved_rlimit) { int r; assert(saved_rlimit); - assert(getuid() == 0); - /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even though we have CAP_IPC_LOCK which - * should normally disable such checks. We need them to implement IPAccessAllow= and IPAccessDeny=, hence let's - * bump the value high enough for the root user. */ + /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even if we have CAP_IPC_LOCK which should + * normally disable such checks. We need them to implement IPAccessAllow= and IPAccessDeny=, hence let's bump + * the value high enough for our user. */ if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit) < 0) return log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m"); - r = setrlimit_closest(RLIMIT_MEMLOCK, &RLIMIT_MAKE_CONST(1024ULL*1024ULL*16ULL)); + r = setrlimit_closest(RLIMIT_MEMLOCK, &RLIMIT_MAKE_CONST(HIGH_RLIMIT_MEMLOCK)); if (r < 0) return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m"); @@ -1219,7 +1360,7 @@ static int status_welcome(void) { _cleanup_free_ char *pretty_name = NULL, *ansi_color = NULL; int r; - if (arg_show_status <= 0) + if (IN_SET(arg_show_status, SHOW_STATUS_NO, SHOW_STATUS_AUTO)) return 0; r = parse_os_release(NULL, @@ -1231,12 +1372,12 @@ static int status_welcome(void) { "Failed to read os-release file, ignoring: %m"); if (log_get_show_color()) - return status_printf(NULL, false, false, + return status_printf(NULL, 0, "\nWelcome to \x1B[%sm%s\x1B[0m!\n", isempty(ansi_color) ? "1" : ansi_color, isempty(pretty_name) ? "Linux" : pretty_name); else - return status_printf(NULL, false, false, + return status_printf(NULL, 0, "\nWelcome to %s!\n", isempty(pretty_name) ? "Linux" : pretty_name); } @@ -1268,7 +1409,7 @@ static int bump_unix_max_dgram_qlen(void) { r = read_one_line_file("/proc/sys/net/unix/max_dgram_qlen", &qlen); if (r < 0) - return log_warning_errno(r, "Failed to read AF_UNIX datagram queue length, ignoring: %m"); + return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r, "Failed to read AF_UNIX datagram queue length, ignoring: %m"); r = safe_atolu(qlen, &v); if (r < 0) @@ -1277,7 +1418,7 @@ static int bump_unix_max_dgram_qlen(void) { if (v >= DEFAULT_UNIX_MAX_DGRAM_QLEN) return 0; - r = write_string_filef("/proc/sys/net/unix/max_dgram_qlen", 0, "%lu", DEFAULT_UNIX_MAX_DGRAM_QLEN); + r = write_string_filef("/proc/sys/net/unix/max_dgram_qlen", WRITE_STRING_FILE_DISABLE_BUFFER, "%lu", DEFAULT_UNIX_MAX_DGRAM_QLEN); if (r < 0) return log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump AF_UNIX datagram queue length, ignoring: %m"); @@ -1474,13 +1615,29 @@ static void initialize_coredump(bool skip_setup) { if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0) log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m"); - /* But at the same time, turn off the core_pattern logic by default, so that no coredumps are stored - * until the systemd-coredump tool is enabled via sysctl. */ + /* But at the same time, turn off the core_pattern logic by default, so that no + * coredumps are stored until the systemd-coredump tool is enabled via + * sysctl. However it can be changed via the kernel command line later so core + * dumps can still be generated during early startup and in initramfs. */ if (!skip_setup) disable_coredumps(); #endif } +static void initialize_core_pattern(bool skip_setup) { + int r; + + if (skip_setup || !arg_early_core_pattern) + return; + + if (getpid_cached() != 1) + return; + + r = write_string_file("/proc/sys/kernel/core_pattern", arg_early_core_pattern, WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) + log_warning_errno(r, "Failed to write '%s' to /proc/sys/kernel/core_pattern, ignoring: %m", arg_early_core_pattern); +} + static void do_reexecute( int argc, char *argv[], @@ -1577,6 +1734,7 @@ static void do_reexecute( /* Reenable any blocked signals, especially important if we switch from initial ramdisk to init=... */ (void) reset_all_signal_handlers(); (void) reset_signal_mask(); + (void) rlimit_nofile_safe(); if (switch_root_init) { args[0] = switch_root_init; @@ -1633,7 +1791,7 @@ static int invoke_main_loop( return log_emergency_errno(r, "Failed to run main loop: %m"); } - switch (m->exit_code) { + switch ((ManagerObjective) r) { case MANAGER_RELOAD: { LogTarget saved_log_target; @@ -1660,7 +1818,8 @@ static int invoke_main_loop( r = manager_reload(m); if (r < 0) - log_warning_errno(r, "Failed to reload, ignoring: %m"); + /* Reloading failed before the point of no return. Let's continue running as if nothing happened. */ + m->objective = MANAGER_OK; break; } @@ -1724,19 +1883,19 @@ static int invoke_main_loop( case MANAGER_POWEROFF: case MANAGER_HALT: case MANAGER_KEXEC: { - static const char * const table[_MANAGER_EXIT_CODE_MAX] = { - [MANAGER_EXIT] = "exit", - [MANAGER_REBOOT] = "reboot", + static const char * const table[_MANAGER_OBJECTIVE_MAX] = { + [MANAGER_EXIT] = "exit", + [MANAGER_REBOOT] = "reboot", [MANAGER_POWEROFF] = "poweroff", - [MANAGER_HALT] = "halt", - [MANAGER_KEXEC] = "kexec" + [MANAGER_HALT] = "halt", + [MANAGER_KEXEC] = "kexec", }; log_notice("Shutting down."); *ret_reexecute = false; *ret_retval = m->return_value; - assert_se(*ret_shutdown_verb = table[m->exit_code]); + assert_se(*ret_shutdown_verb = table[m->objective]); *ret_fds = NULL; *ret_switch_root_dir = *ret_switch_root_init = NULL; @@ -1744,7 +1903,7 @@ static int invoke_main_loop( } default: - assert_not_reached("Unknown exit code."); + assert_not_reached("Unknown or unexpected manager objective."); } } } @@ -1816,7 +1975,7 @@ static int initialize_runtime( install_crash_handler(); if (!skip_setup) { - r = mount_cgroup_controllers(arg_join_controllers); + r = mount_cgroup_controllers(); if (r < 0) { *ret_error_message = "Failed to mount cgroup hierarchies"; return r; @@ -1827,6 +1986,7 @@ static int initialize_runtime( machine_id_setup(NULL, arg_machine_id, NULL); loopback_setup(); bump_unix_max_dgram_qlen(); + bump_file_max_and_nr_open(); test_usr(); write_container_id(); } @@ -1879,11 +2039,9 @@ static int initialize_runtime( if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0) log_warning_errno(errno, "Failed to make us a subreaper: %m"); - if (arg_system) { - /* Bump up RLIMIT_NOFILE for systemd itself */ - (void) bump_rlimit_nofile(saved_rlimit_nofile); - (void) bump_rlimit_memlock(saved_rlimit_memlock); - } + /* Bump up RLIMIT_NOFILE for systemd itself */ + (void) bump_rlimit_nofile(saved_rlimit_nofile); + (void) bump_rlimit_memlock(saved_rlimit_memlock); return 0; } @@ -1942,7 +2100,6 @@ static void free_arguments(void) { arg_default_unit = mfree(arg_default_unit); arg_confirm_spawn = mfree(arg_confirm_spawn); - arg_join_controllers = strv_free_free(arg_join_controllers); arg_default_environment = strv_free(arg_default_environment); arg_syscall_archs = set_free(arg_syscall_archs); } @@ -1985,7 +2142,7 @@ static int load_configuration(int argc, char **argv, const char **ret_error_mess } /* Initialize the show status setting if it hasn't been set explicitly yet */ - if (arg_show_status == _SHOW_STATUS_UNSET) + if (arg_show_status == _SHOW_STATUS_INVALID) arg_show_status = SHOW_STATUS_YES; return 0; @@ -1994,50 +2151,43 @@ static int load_configuration(int argc, char **argv, const char **ret_error_mess static int safety_checks(void) { if (getpid_cached() == 1 && - arg_action != ACTION_RUN) { - log_error("Unsupported execution mode while PID 1."); - return -EPERM; - } + arg_action != ACTION_RUN) + return log_error_errno(SYNTHETIC_ERRNO(EPERM), + "Unsupported execution mode while PID 1."); if (getpid_cached() == 1 && - !arg_system) { - log_error("Can't run --user mode as PID 1."); - return -EPERM; - } + !arg_system) + return log_error_errno(SYNTHETIC_ERRNO(EPERM), + "Can't run --user mode as PID 1."); if (arg_action == ACTION_RUN && arg_system && - getpid_cached() != 1) { - log_error("Can't run system mode unless PID 1."); - return -EPERM; - } + getpid_cached() != 1) + return log_error_errno(SYNTHETIC_ERRNO(EPERM), + "Can't run system mode unless PID 1."); if (arg_action == ACTION_TEST && - geteuid() == 0) { - log_error("Don't run test mode as root."); - return -EPERM; - } + geteuid() == 0) + return log_error_errno(SYNTHETIC_ERRNO(EPERM), + "Don't run test mode as root."); if (!arg_system && arg_action == ACTION_RUN && - sd_booted() <= 0) { - log_error("Trying to run as user instance, but the system has not been booted with systemd."); - return -EOPNOTSUPP; - } + sd_booted() <= 0) + return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), + "Trying to run as user instance, but the system has not been booted with systemd."); if (!arg_system && arg_action == ACTION_RUN && - !getenv("XDG_RUNTIME_DIR")) { - log_error("Trying to run as user instance, but $XDG_RUNTIME_DIR is not set."); - return -EUNATCH; - } + !getenv("XDG_RUNTIME_DIR")) + return log_error_errno(SYNTHETIC_ERRNO(EUNATCH), + "Trying to run as user instance, but $XDG_RUNTIME_DIR is not set."); if (arg_system && arg_action == ACTION_RUN && - running_in_chroot() > 0) { - log_error("Cannot be run in a chroot() environment."); - return -EOPNOTSUPP; - } + running_in_chroot() > 0) + return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), + "Cannot be run in a chroot() environment."); return 0; } @@ -2309,13 +2459,13 @@ int main(int argc, char *argv[]) { goto finish; if (IN_SET(arg_action, ACTION_TEST, ACTION_HELP, ACTION_DUMP_CONFIGURATION_ITEMS, ACTION_DUMP_BUS_PROPERTIES)) - (void) pager_open(arg_no_pager, false); + (void) pager_open(arg_pager_flags); if (arg_action != ACTION_RUN) skip_setup = true; if (arg_action == ACTION_HELP) { - retval = help(); + retval = help() < 0 ? EXIT_FAILURE : EXIT_SUCCESS; goto finish; } else if (arg_action == ACTION_VERSION) { retval = version(); @@ -2337,6 +2487,9 @@ int main(int argc, char *argv[]) { if (arg_action == ACTION_RUN) { + /* A core pattern might have been specified via the cmdline. */ + initialize_core_pattern(skip_setup); + /* Close logging fds, in order not to confuse collecting passed fds and terminal logic below */ log_close(); @@ -2373,8 +2526,8 @@ int main(int argc, char *argv[]) { m->timestamps[MANAGER_TIMESTAMP_KERNEL] = kernel_timestamp; m->timestamps[MANAGER_TIMESTAMP_INITRD] = initrd_timestamp; m->timestamps[MANAGER_TIMESTAMP_USERSPACE] = userspace_timestamp; - m->timestamps[MANAGER_TIMESTAMP_SECURITY_START] = security_start_timestamp; - m->timestamps[MANAGER_TIMESTAMP_SECURITY_FINISH] = security_finish_timestamp; + m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_START)] = security_start_timestamp; + m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_FINISH)] = security_finish_timestamp; set_manager_defaults(m); set_manager_settings(m); @@ -2387,7 +2540,6 @@ int main(int argc, char *argv[]) { r = manager_startup(m, arg_serialization, fds); if (r < 0) { - log_error_errno(r, "Failed to fully start up daemon: %m"); error_message = "Failed to start up manager"; goto finish; } @@ -2473,8 +2625,8 @@ finish: if (error_message) manager_status_printf(NULL, STATUS_TYPE_EMERGENCY, ANSI_HIGHLIGHT_RED "!!!!!!" ANSI_NORMAL, - "%s, freezing.", error_message); - freeze_or_reboot(); + "%s.", error_message); + freeze_or_exit_or_reboot(); } return retval; diff --git a/src/core/manager.c b/src/core/manager.c index 930df4e23a..35d9753b12 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -22,8 +22,8 @@ #include "sd-messages.h" #include "sd-path.h" -#include "alloc-util.h" #include "all-units.h" +#include "alloc-util.h" #include "audit-fd.h" #include "boot-timestamps.h" #include "bus-common-errors.h" @@ -61,6 +61,7 @@ #include "ratelimit.h" #include "rlimit-util.h" #include "rm-rf.h" +#include "serialize.h" #include "signal-util.h" #include "socket-util.h" #include "special.h" @@ -351,7 +352,7 @@ static int manager_setup_time_change(Manager *m) { assert(m); - if (m->test_run_flags) + if (MANAGER_IS_TEST_RUN(m)) return 0; m->time_change_event_source = sd_event_source_unref(m->time_change_event_source); @@ -407,7 +408,7 @@ static int manager_setup_timezone_change(Manager *m) { assert(m); - if (m->test_run_flags != 0) + if (MANAGER_IS_TEST_RUN(m)) return 0; /* We watch /etc/localtime for three events: change of the link count (which might mean removal from /etc even @@ -423,10 +424,14 @@ static int manager_setup_timezone_change(Manager *m) { r = sd_event_add_inotify(m->event, &new_event, "/etc/localtime", IN_ATTRIB|IN_MOVE_SELF|IN_CLOSE_WRITE|IN_DONT_FOLLOW, manager_dispatch_timezone_change, m); - if (r == -ENOENT) /* If the file doesn't exist yet, subscribe to /etc instead, and wait until it is created - * either by O_CREATE or by rename() */ + if (r == -ENOENT) { + /* If the file doesn't exist yet, subscribe to /etc instead, and wait until it is created either by + * O_CREATE or by rename() */ + + log_debug_errno(r, "/etc/localtime doesn't exist yet, watching /etc instead."); r = sd_event_add_inotify(m->event, &new_event, "/etc", IN_CREATE|IN_MOVED_TO|IN_ONLYDIR, manager_dispatch_timezone_change, m); + } if (r < 0) return log_error_errno(r, "Failed to create timezone change event source: %m"); @@ -446,7 +451,7 @@ static int enable_special_signals(Manager *m) { assert(m); - if (m->test_run_flags) + if (MANAGER_IS_TEST_RUN(m)) return 0; /* Enable that we get SIGINT on control-alt-del. In containers @@ -567,12 +572,11 @@ static int manager_setup_signals(Manager *m) { return 0; } -static void manager_sanitize_environment(Manager *m) { - assert(m); +static char** sanitize_environment(char **l) { /* Let's remove some environment variables that we need ourselves to communicate with our clients */ strv_env_unset_many( - m->environment, + l, "EXIT_CODE", "EXIT_STATUS", "INVOCATION_ID", @@ -591,12 +595,16 @@ static void manager_sanitize_environment(Manager *m) { NULL); /* Let's order the environment alphabetically, just to make it pretty */ - strv_sort(m->environment); + strv_sort(l); + + return l; } -static int manager_default_environment(Manager *m) { +int manager_default_environment(Manager *m) { assert(m); + m->transient_environment = strv_free(m->transient_environment); + if (MANAGER_IS_SYSTEM(m)) { /* The system manager always starts with a clean * environment for its children. It does not import @@ -605,20 +613,19 @@ static int manager_default_environment(Manager *m) { * The initial passed environment is untouched to keep * /proc/self/environ valid; it is used for tagging * the init process inside containers. */ - m->environment = strv_new("PATH=" DEFAULT_PATH, - NULL); + m->transient_environment = strv_new("PATH=" DEFAULT_PATH); /* Import locale variables LC_*= from configuration */ - locale_setup(&m->environment); + (void) locale_setup(&m->transient_environment); } else /* The user manager passes its own environment * along to its children. */ - m->environment = strv_copy(environ); + m->transient_environment = strv_copy(environ); - if (!m->environment) - return -ENOMEM; + if (!m->transient_environment) + return log_oom(); - manager_sanitize_environment(m); + sanitize_environment(m->transient_environment); return 0; } @@ -711,28 +718,51 @@ static int manager_setup_sigchld_event_source(Manager *m) { return 0; } -int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) { +int manager_new(UnitFileScope scope, ManagerTestRunFlags test_run_flags, Manager **_m) { _cleanup_(manager_freep) Manager *m = NULL; int r; assert(_m); assert(IN_SET(scope, UNIT_FILE_SYSTEM, UNIT_FILE_USER)); - m = new0(Manager, 1); + m = new(Manager, 1); if (!m) return -ENOMEM; - m->unit_file_scope = scope; - m->exit_code = _MANAGER_EXIT_CODE_INVALID; - m->default_timer_accuracy_usec = USEC_PER_MINUTE; - m->default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT; - m->default_tasks_accounting = true; - m->default_tasks_max = UINT64_MAX; - m->default_timeout_start_usec = DEFAULT_TIMEOUT_USEC; - m->default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC; - m->default_restart_usec = DEFAULT_RESTART_USEC; - m->original_log_level = -1; - m->original_log_target = _LOG_TARGET_INVALID; + *m = (Manager) { + .unit_file_scope = scope, + .objective = _MANAGER_OBJECTIVE_INVALID, + + .default_timer_accuracy_usec = USEC_PER_MINUTE, + .default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT, + .default_tasks_accounting = true, + .default_tasks_max = UINT64_MAX, + .default_timeout_start_usec = DEFAULT_TIMEOUT_USEC, + .default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC, + .default_restart_usec = DEFAULT_RESTART_USEC, + + .original_log_level = -1, + .original_log_target = _LOG_TARGET_INVALID, + + .notify_fd = -1, + .cgroups_agent_fd = -1, + .signal_fd = -1, + .time_change_fd = -1, + .user_lookup_fds = { -1, -1 }, + .private_listen_fd = -1, + .dev_autofs_fd = -1, + .cgroup_inotify_fd = -1, + .pin_cgroupfs_fd = -1, + .ask_password_inotify_fd = -1, + .idle_pipe = { -1, -1, -1, -1}, + + /* start as id #1, so that we can leave #0 around as "null-like" value */ + .current_job_id = 1, + + .have_ask_password = -EINVAL, /* we don't know */ + .first_boot = -1, + .test_run_flags = test_run_flags, + }; #if ENABLE_EFI if (MANAGER_IS_SYSTEM(m) && detect_container() <= 0) @@ -756,21 +786,6 @@ int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) { m->invocation_log_format_string = "USER_INVOCATION_ID=%s"; } - m->idle_pipe[0] = m->idle_pipe[1] = m->idle_pipe[2] = m->idle_pipe[3] = -1; - - m->pin_cgroupfs_fd = m->notify_fd = m->cgroups_agent_fd = m->signal_fd = m->time_change_fd = - m->dev_autofs_fd = m->private_listen_fd = m->cgroup_inotify_fd = - m->ask_password_inotify_fd = -1; - - m->user_lookup_fds[0] = m->user_lookup_fds[1] = -1; - - m->current_job_id = 1; /* start as id #1, so that we can leave #0 around as "null-like" value */ - - m->have_ask_password = -EINVAL; /* we don't know */ - m->first_boot = -1; - - m->test_run_flags = test_run_flags; - /* Reboot immediately if the user hits C-A-D more often than 7x per 2s */ RATELIMIT_INIT(m->ctrl_alt_del_ratelimit, 2 * USEC_PER_SEC, 7); @@ -798,10 +813,6 @@ int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) { if (r < 0) return r; - m->udev = udev_new(); - if (!m->udev) - return -ENOMEM; - r = sd_event_default(&m->event); if (r < 0) return r; @@ -831,9 +842,7 @@ int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) { if (r < 0) return r; - r = manager_setup_timezone_change(m); - if (r < 0) - return r; + (void) manager_setup_timezone_change(m); r = manager_setup_sigchld_event_source(m); if (r < 0) @@ -861,15 +870,13 @@ int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) { static int manager_setup_notify(Manager *m) { int r; - if (m->test_run_flags) + if (MANAGER_IS_TEST_RUN(m)) return 0; if (m->notify_fd < 0) { _cleanup_close_ int fd = -1; - union sockaddr_union sa = { - .sa.sa_family = AF_UNIX, - }; - static const int one = 1; + union sockaddr_union sa = {}; + int salen; /* First free all secondary fields */ m->notify_socket = mfree(m->notify_socket); @@ -885,17 +892,20 @@ static int manager_setup_notify(Manager *m) { if (!m->notify_socket) return log_oom(); + salen = sockaddr_un_set_path(&sa.un, m->notify_socket); + if (salen < 0) + return log_error_errno(salen, "Notify socket '%s' not valid for AF_UNIX socket address, refusing.", m->notify_socket); + (void) mkdir_parents_label(m->notify_socket, 0755); - (void) unlink(m->notify_socket); + (void) sockaddr_un_unlink(&sa.un); - strncpy(sa.un.sun_path, m->notify_socket, sizeof(sa.un.sun_path)-1); - r = bind(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)); + r = bind(fd, &sa.sa, salen); if (r < 0) - return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path); + return log_error_errno(errno, "bind(%s) failed: %m", m->notify_socket); - r = setsockopt(fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one)); + r = setsockopt_int(fd, SOL_SOCKET, SO_PASSCRED, true); if (r < 0) - return log_error_errno(errno, "SO_PASSCRED failed: %m"); + return log_error_errno(r, "SO_PASSCRED failed: %m"); m->notify_fd = TAKE_FD(fd); @@ -940,7 +950,7 @@ static int manager_setup_cgroups_agent(Manager *m) { * to it. The system instance hence listens on this special socket, but the user instances listen on the system * bus for these messages. */ - if (m->test_run_flags) + if (MANAGER_IS_TEST_RUN(m)) return 0; if (!MANAGER_IS_SYSTEM(m)) @@ -964,7 +974,7 @@ static int manager_setup_cgroups_agent(Manager *m) { fd_inc_rcvbuf(fd, CGROUPS_AGENT_RCVBUF_SIZE); - (void) unlink(sa.un.sun_path); + (void) sockaddr_un_unlink(&sa.un); /* Only allow root to connect to this socket */ RUN_WITH_UMASK(0077) @@ -972,8 +982,7 @@ static int manager_setup_cgroups_agent(Manager *m) { if (r < 0) return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path); - m->cgroups_agent_fd = fd; - fd = -1; + m->cgroups_agent_fd = TAKE_FD(fd); } if (!m->cgroups_agent_event_source) { @@ -1211,6 +1220,45 @@ static unsigned manager_dispatch_gc_job_queue(Manager *m) { return n; } +static unsigned manager_dispatch_stop_when_unneeded_queue(Manager *m) { + unsigned n = 0; + Unit *u; + int r; + + assert(m); + + while ((u = m->stop_when_unneeded_queue)) { + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + assert(m->stop_when_unneeded_queue); + + assert(u->in_stop_when_unneeded_queue); + LIST_REMOVE(stop_when_unneeded_queue, m->stop_when_unneeded_queue, u); + u->in_stop_when_unneeded_queue = false; + + n++; + + if (!unit_is_unneeded(u)) + continue; + + log_unit_debug(u, "Unit is not needed anymore."); + + /* If stopping a unit fails continuously we might enter a stop loop here, hence stop acting on the + * service being unnecessary after a while. */ + + if (!ratelimit_below(&u->auto_stop_ratelimit)) { + log_unit_warning(u, "Unit not needed anymore, but not stopping since we tried this too often recently."); + continue; + } + + /* Ok, nobody needs us anymore. Sniff. Then let's commit suicide */ + r = manager_add_job(u->manager, JOB_STOP, u, JOB_FAIL, &error, NULL); + if (r < 0) + log_unit_warning_errno(u, r, "Failed to enqueue stop job, ignoring: %s", bus_error_message(&error, r)); + } + + return n; +} + static void manager_clear_jobs_and_units(Manager *m) { Unit *u; @@ -1228,17 +1276,20 @@ static void manager_clear_jobs_and_units(Manager *m) { assert(!m->cleanup_queue); assert(!m->gc_unit_queue); assert(!m->gc_job_queue); + assert(!m->stop_when_unneeded_queue); assert(hashmap_isempty(m->jobs)); assert(hashmap_isempty(m->units)); m->n_on_console = 0; m->n_running_jobs = 0; + m->n_installed_jobs = 0; + m->n_failed_jobs = 0; } Manager* manager_free(Manager *m) { - UnitType c; ExecDirectoryType dt; + UnitType c; if (!m) return NULL; @@ -1249,8 +1300,8 @@ Manager* manager_free(Manager *m) { if (unit_vtable[c]->shutdown) unit_vtable[c]->shutdown(m); - /* If we reexecute ourselves, we keep the root cgroup around */ - manager_shutdown_cgroup(m, m->exit_code != MANAGER_REEXECUTE); + /* Keep the cgroup hierarchy in place except when we know we are going down for good */ + manager_shutdown_cgroup(m, IN_SET(m->objective, MANAGER_EXIT, MANAGER_REBOOT, MANAGER_POWEROFF, MANAGER_HALT, MANAGER_KEXEC)); lookup_paths_flush_generator(&m->lookup_paths); @@ -1292,13 +1343,13 @@ Manager* manager_free(Manager *m) { manager_close_idle_pipe(m); - udev_unref(m->udev); sd_event_unref(m->event); free(m->notify_socket); lookup_paths_free(&m->lookup_paths); - strv_free(m->environment); + strv_free(m->transient_environment); + strv_free(m->client_environment); hashmap_free(m->cgroup_unit); set_free_free(m->unit_path_cache); @@ -1476,14 +1527,9 @@ static bool manager_dbus_is_running(Manager *m, bool deserialized) { * and the service unit. If the 'deserialized' parameter is true we'll check the deserialized state of the unit * rather than the current one. */ - if (m->test_run_flags != 0) + if (MANAGER_IS_TEST_RUN(m)) return false; - /* If we are in the user instance, and the env var is already set for us, then this means D-Bus is ran - * somewhere outside of our own logic. Let's use it */ - if (MANAGER_IS_USER(m) && getenv("DBUS_SESSION_BUS_ADDRESS")) - return true; - u = manager_get_unit(m, SPECIAL_DBUS_SOCKET); if (!u) return false; @@ -1529,7 +1575,7 @@ static void manager_preset_all(Manager *m) { if (!MANAGER_IS_SYSTEM(m)) return; - if (m->test_run_flags != 0) + if (MANAGER_IS_TEST_RUN(m)) return; /* If this is the first boot, and we are in the host system, then preset everything */ @@ -1541,6 +1587,49 @@ static void manager_preset_all(Manager *m) { log_info("Populated /etc with preset unit settings."); } +static void manager_vacuum(Manager *m) { + assert(m); + + /* Release any dynamic users no longer referenced */ + dynamic_user_vacuum(m, true); + + /* Release any references to UIDs/GIDs no longer referenced, and destroy any IPC owned by them */ + manager_vacuum_uid_refs(m); + manager_vacuum_gid_refs(m); + + /* Release any runtimes no longer referenced */ + exec_runtime_vacuum(m); +} + +static void manager_ready(Manager *m) { + assert(m); + + /* After having loaded everything, do the final round of catching up with what might have changed */ + + m->objective = MANAGER_OK; /* Tell everyone we are up now */ + + /* It might be safe to log to the journal now and connect to dbus */ + manager_recheck_journal(m); + manager_recheck_dbus(m); + + /* Sync current state of bus names with our set of listening units */ + (void) manager_enqueue_sync_bus_names(m); + + /* Let's finally catch up with any changes that took place while we were reloading/reexecing */ + manager_catchup(m); +} + +static Manager* manager_reloading_start(Manager *m) { + m->n_reloading++; + return m; +} +static void manager_reloading_stopp(Manager **m) { + if (*m) { + assert((*m)->n_reloading > 0); + (*m)->n_reloading--; + } +} + int manager_startup(Manager *m, FILE *serialization, FDSet *fds) { int r; @@ -1549,98 +1638,92 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) { /* If we are running in test mode, we still want to run the generators, * but we should not touch the real generator directories. */ r = lookup_paths_init(&m->lookup_paths, m->unit_file_scope, - m->test_run_flags ? LOOKUP_PATHS_TEMPORARY_GENERATED : 0, + MANAGER_IS_TEST_RUN(m) ? LOOKUP_PATHS_TEMPORARY_GENERATED : 0, NULL); if (r < 0) - return r; + return log_error_errno(r, "Failed to initialize path lookup table: %m"); + dual_timestamp_get(m->timestamps + manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_GENERATORS_START)); r = manager_run_environment_generators(m); + if (r >= 0) + r = manager_run_generators(m); + dual_timestamp_get(m->timestamps + manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_GENERATORS_FINISH)); if (r < 0) return r; - dual_timestamp_get(m->timestamps + MANAGER_TIMESTAMP_GENERATORS_START); - r = manager_run_generators(m); - dual_timestamp_get(m->timestamps + MANAGER_TIMESTAMP_GENERATORS_FINISH); + manager_preset_all(m); + + r = lookup_paths_reduce(&m->lookup_paths); if (r < 0) - return r; + log_warning_errno(r, "Failed ot reduce unit file paths, ignoring: %m"); - manager_preset_all(m); - lookup_paths_reduce(&m->lookup_paths); manager_build_unit_path_cache(m); - /* If we will deserialize make sure that during enumeration - * this is already known, so we increase the counter here - * already */ - if (serialization) - m->n_reloading++; + { + /* This block is (optionally) done with the reloading counter bumped */ + _cleanup_(manager_reloading_stopp) Manager *reloading = NULL; - /* First, enumerate what we can from all config files */ - dual_timestamp_get(m->timestamps + MANAGER_TIMESTAMP_UNITS_LOAD_START); - manager_enumerate_perpetual(m); - manager_enumerate(m); - dual_timestamp_get(m->timestamps + MANAGER_TIMESTAMP_UNITS_LOAD_FINISH); + /* If we will deserialize make sure that during enumeration this is already known, so we increase the + * counter here already */ + if (serialization) + reloading = manager_reloading_start(m); - /* Second, deserialize if there is something to deserialize */ - if (serialization) { - r = manager_deserialize(m, serialization, fds); - if (r < 0) - return log_error_errno(r, "Deserialization failed: %m"); - } - - /* Any fds left? Find some unit which wants them. This is - * useful to allow container managers to pass some file - * descriptors to us pre-initialized. This enables - * socket-based activation of entire containers. */ - manager_distribute_fds(m, fds); + /* First, enumerate what we can from all config files */ + dual_timestamp_get(m->timestamps + manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_UNITS_LOAD_START)); + manager_enumerate_perpetual(m); + manager_enumerate(m); + dual_timestamp_get(m->timestamps + manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_UNITS_LOAD_FINISH)); - /* We might have deserialized the notify fd, but if we didn't - * then let's create the bus now */ - r = manager_setup_notify(m); - if (r < 0) - /* No sense to continue without notifications, our children would fail anyway. */ - return r; - - r = manager_setup_cgroups_agent(m); - if (r < 0) - /* Likewise, no sense to continue without empty cgroup notifications. */ - return r; + /* Second, deserialize if there is something to deserialize */ + if (serialization) { + r = manager_deserialize(m, serialization, fds); + if (r < 0) + return log_error_errno(r, "Deserialization failed: %m"); + } - r = manager_setup_user_lookup_fd(m); - if (r < 0) - /* This shouldn't fail, except if things are really broken. */ - return r; + /* Any fds left? Find some unit which wants them. This is useful to allow container managers to pass + * some file descriptors to us pre-initialized. This enables socket-based activation of entire + * containers. */ + manager_distribute_fds(m, fds); - /* Connect to the bus if we are good for it */ - manager_setup_bus(m); + /* We might have deserialized the notify fd, but if we didn't then let's create the bus now */ + r = manager_setup_notify(m); + if (r < 0) + /* No sense to continue without notifications, our children would fail anyway. */ + return r; - /* Now that we are connected to all possible busses, let's deserialize who is tracking us. */ - (void) bus_track_coldplug(m, &m->subscribed, false, m->deserialized_subscribed); - m->deserialized_subscribed = strv_free(m->deserialized_subscribed); + r = manager_setup_cgroups_agent(m); + if (r < 0) + /* Likewise, no sense to continue without empty cgroup notifications. */ + return r; - /* Third, fire things up! */ - manager_coldplug(m); + r = manager_setup_user_lookup_fd(m); + if (r < 0) + /* This shouldn't fail, except if things are really broken. */ + return r; - /* Release any dynamic users no longer referenced */ - dynamic_user_vacuum(m, true); + /* Connect to the bus if we are good for it */ + manager_setup_bus(m); - exec_runtime_vacuum(m); + /* Now that we are connected to all possible busses, let's deserialize who is tracking us. */ + r = bus_track_coldplug(m, &m->subscribed, false, m->deserialized_subscribed); + if (r < 0) + log_warning_errno(r, "Failed to deserialized tracked clients, ignoring: %m"); + m->deserialized_subscribed = strv_free(m->deserialized_subscribed); - /* Release any references to UIDs/GIDs no longer referenced, and destroy any IPC owned by them */ - manager_vacuum_uid_refs(m); - manager_vacuum_gid_refs(m); + /* Third, fire things up! */ + manager_coldplug(m); - if (serialization) { - assert(m->n_reloading > 0); - m->n_reloading--; + /* Clean up runtime objects */ + manager_vacuum(m); - /* Let's wait for the UnitNew/JobNew messages being - * sent, before we notify that the reload is - * finished */ - m->send_reloading_done = true; + if (serialization) + /* Let's wait for the UnitNew/JobNew messages being sent, before we notify that the + * reload is finished */ + m->send_reloading_done = true; } - /* Let's finally catch up with any changes that took place while we were reloading/reexecing */ - manager_catchup(m); + manager_ready(m); return 0; } @@ -2055,7 +2138,7 @@ static int manager_dispatch_run_queue(sd_event_source *source, void *userdata) { assert(j->installed); assert(j->in_run_queue); - job_run_and_invalidate(j); + (void) job_run_and_invalidate(j); } if (m->n_running_jobs > 0) @@ -2074,57 +2157,63 @@ static unsigned manager_dispatch_dbus_queue(Manager *m) { assert(m); - if (m->dispatching_dbus_queue) - return 0; - - /* Anything to do at all? */ - if (!m->dbus_unit_queue && !m->dbus_job_queue && !m->send_reloading_done && !m->queued_message) - return 0; - - /* Do we have overly many messages queued at the moment? If so, let's not enqueue more on top, let's sit this - * cycle out, and process things in a later cycle when the queues got a bit emptier. */ - if (manager_bus_n_queued_write(m) > MANAGER_BUS_BUSY_THRESHOLD) - return 0; + /* When we are reloading, let's not wait with generating signals, since we need to exit the manager as quickly + * as we can. There's no point in throttling generation of signals in that case. */ + if (MANAGER_IS_RELOADING(m) || m->send_reloading_done || m->pending_reload_message) + budget = (unsigned) -1; /* infinite budget in this case */ + else { + /* Anything to do at all? */ + if (!m->dbus_unit_queue && !m->dbus_job_queue) + return 0; - /* Only process a certain number of units/jobs per event loop iteration. Even if the bus queue wasn't overly - * full before this call we shouldn't increase it in size too wildly in one step, and we shouldn't monopolize - * CPU time with generating these messages. Note the difference in counting of this "budget" and the - * "threshold" above: the "budget" is decreased only once per generated message, regardless how many - * busses/direct connections it is enqueued on, while the "threshold" is applied to each queued instance of bus - * message, i.e. if the same message is enqueued to five busses/direct connections it will be counted five - * times. This difference in counting ("references" vs. "instances") is primarily a result of the fact that - * it's easier to implement it this way, however it also reflects the thinking that the "threshold" should put - * a limit on used queue memory, i.e. space, while the "budget" should put a limit on time. Also note that - * the "threshold" is currently chosen much higher than the "budget". */ - budget = MANAGER_BUS_MESSAGE_BUDGET; + /* Do we have overly many messages queued at the moment? If so, let's not enqueue more on top, let's + * sit this cycle out, and process things in a later cycle when the queues got a bit emptier. */ + if (manager_bus_n_queued_write(m) > MANAGER_BUS_BUSY_THRESHOLD) + return 0; - m->dispatching_dbus_queue = true; + /* Only process a certain number of units/jobs per event loop iteration. Even if the bus queue wasn't + * overly full before this call we shouldn't increase it in size too wildly in one step, and we + * shouldn't monopolize CPU time with generating these messages. Note the difference in counting of + * this "budget" and the "threshold" above: the "budget" is decreased only once per generated message, + * regardless how many busses/direct connections it is enqueued on, while the "threshold" is applied to + * each queued instance of bus message, i.e. if the same message is enqueued to five busses/direct + * connections it will be counted five times. This difference in counting ("references" + * vs. "instances") is primarily a result of the fact that it's easier to implement it this way, + * however it also reflects the thinking that the "threshold" should put a limit on used queue memory, + * i.e. space, while the "budget" should put a limit on time. Also note that the "threshold" is + * currently chosen much higher than the "budget". */ + budget = MANAGER_BUS_MESSAGE_BUDGET; + } - while (budget > 0 && (u = m->dbus_unit_queue)) { + while (budget != 0 && (u = m->dbus_unit_queue)) { assert(u->in_dbus_queue); bus_unit_send_change_signal(u); - n++, budget--; + n++; + + if (budget != (unsigned) -1) + budget--; } - while (budget > 0 && (j = m->dbus_job_queue)) { + while (budget != 0 && (j = m->dbus_job_queue)) { assert(j->in_dbus_queue); bus_job_send_change_signal(j); - n++, budget--; - } + n++; - m->dispatching_dbus_queue = false; + if (budget != (unsigned) -1) + budget--; + } - if (budget > 0 && m->send_reloading_done) { + if (m->send_reloading_done) { m->send_reloading_done = false; bus_manager_send_reloading(m, false); - n++, budget--; + n++; } - if (budget > 0 && m->queued_message) { - bus_send_queued_message(m); + if (m->pending_reload_message) { + bus_send_pending_reload_message(m); n++; } @@ -2133,7 +2222,7 @@ static unsigned manager_dispatch_dbus_queue(Manager *m) { static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) { Manager *m = userdata; - char buf[PATH_MAX+1]; + char buf[PATH_MAX]; ssize_t n; n = recv(fd, buf, sizeof(buf), 0); @@ -2461,7 +2550,7 @@ static void manager_handle_ctrl_alt_del(Manager *m) { if (ratelimit_below(&m->ctrl_alt_del_ratelimit) || m->cad_burst_action == EMERGENCY_ACTION_NONE) manager_start_target(m, SPECIAL_CTRL_ALT_DEL_TARGET, JOB_REPLACE_IRREVERSIBLY); else - emergency_action(m, m->cad_burst_action, NULL, + emergency_action(m, m->cad_burst_action, EMERGENCY_ACTION_WARN, NULL, -1, "Ctrl-Alt-Del was pressed more than 7 times within 2s"); } @@ -2511,9 +2600,10 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t case SIGTERM: if (MANAGER_IS_SYSTEM(m)) { /* This is for compatibility with the original sysvinit */ - r = verify_run_space_and_log("Refusing to reexecute"); - if (r >= 0) - m->exit_code = MANAGER_REEXECUTE; + if (verify_run_space_and_log("Refusing to reexecute") < 0) + break; + + m->objective = MANAGER_REEXECUTE; break; } @@ -2569,9 +2659,10 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t } case SIGHUP: - r = verify_run_space_and_log("Refusing to reload"); - if (r >= 0) - m->exit_code = MANAGER_RELOAD; + if (verify_run_space_and_log("Refusing to reload") < 0) + break; + + m->objective = MANAGER_RELOAD; break; default: { @@ -2591,7 +2682,7 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t }; /* Starting SIGRTMIN+13, so that target halt and system halt are 10 apart */ - static const ManagerExitCode code_table[] = { + static const ManagerObjective objective_table[] = { [0] = MANAGER_HALT, [1] = MANAGER_POWEROFF, [2] = MANAGER_REBOOT, @@ -2607,8 +2698,8 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t } if ((int) sfsi.ssi_signo >= SIGRTMIN+13 && - (int) sfsi.ssi_signo < SIGRTMIN+13+(int) ELEMENTSOF(code_table)) { - m->exit_code = code_table[sfsi.ssi_signo - SIGRTMIN - 13]; + (int) sfsi.ssi_signo < SIGRTMIN+13+(int) ELEMENTSOF(objective_table)) { + m->objective = objective_table[sfsi.ssi_signo - SIGRTMIN - 13]; break; } @@ -2632,7 +2723,7 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t case 24: if (MANAGER_IS_USER(m)) { - m->exit_code = MANAGER_EXIT; + m->objective = MANAGER_EXIT; return 0; } @@ -2697,10 +2788,8 @@ static int manager_dispatch_timezone_change( log_debug("inotify event for /etc/localtime"); changed = manager_read_timezone_stat(m); - if (changed < 0) + if (changed <= 0) return changed; - if (!changed) - return 0; /* Something changed, restart the watch, to ensure we watch the new /etc/localtime if it changed */ (void) manager_setup_timezone_change(m); @@ -2761,7 +2850,7 @@ int manager_loop(Manager *m) { RATELIMIT_DEFINE(rl, 1*USEC_PER_SEC, 50000); assert(m); - m->exit_code = MANAGER_OK; + assert(m->objective == MANAGER_OK); /* Ensure manager_startup() has been called */ /* Release the path cache */ m->unit_path_cache = set_free_free(m->unit_path_cache); @@ -2773,7 +2862,7 @@ int manager_loop(Manager *m) { if (r < 0) return log_error_errno(r, "Failed to enable SIGCHLD event source: %m"); - while (m->exit_code == MANAGER_OK) { + while (m->objective == MANAGER_OK) { usec_t wait_usec; if (m->runtime_watchdog > 0 && m->runtime_watchdog != USEC_INFINITY && MANAGER_IS_SYSTEM(m)) @@ -2800,6 +2889,9 @@ int manager_loop(Manager *m) { if (manager_dispatch_cgroup_realize_queue(m) > 0) continue; + if (manager_dispatch_stop_when_unneeded_queue(m) > 0) + continue; + if (manager_dispatch_dbus_queue(m) > 0) continue; @@ -2816,7 +2908,7 @@ int manager_loop(Manager *m) { return log_error_errno(r, "Failed to run event loop: %m"); } - return m->exit_code; + return m->objective; } int manager_load_unit_from_dbus_path(Manager *m, const char *s, sd_bus_error *e, Unit **_u) { @@ -2998,7 +3090,25 @@ int manager_open_serialization(Manager *m, FILE **_f) { return 0; } -int manager_serialize(Manager *m, FILE *f, FDSet *fds, bool switching_root) { +static bool manager_timestamp_shall_serialize(ManagerTimestamp t) { + + if (!in_initrd()) + return true; + + /* The following timestamps only apply to the host system, hence only serialize them there */ + return !IN_SET(t, + MANAGER_TIMESTAMP_USERSPACE, MANAGER_TIMESTAMP_FINISH, + MANAGER_TIMESTAMP_SECURITY_START, MANAGER_TIMESTAMP_SECURITY_FINISH, + MANAGER_TIMESTAMP_GENERATORS_START, MANAGER_TIMESTAMP_GENERATORS_FINISH, + MANAGER_TIMESTAMP_UNITS_LOAD_START, MANAGER_TIMESTAMP_UNITS_LOAD_FINISH); +} + +int manager_serialize( + Manager *m, + FILE *f, + FDSet *fds, + bool switching_root) { + ManagerTimestamp q; const char *t; Iterator i; @@ -3009,56 +3119,53 @@ int manager_serialize(Manager *m, FILE *f, FDSet *fds, bool switching_root) { assert(f); assert(fds); - m->n_reloading++; + _cleanup_(manager_reloading_stopp) _unused_ Manager *reloading = manager_reloading_start(m); - fprintf(f, "current-job-id=%"PRIu32"\n", m->current_job_id); - fprintf(f, "n-installed-jobs=%u\n", m->n_installed_jobs); - fprintf(f, "n-failed-jobs=%u\n", m->n_failed_jobs); - fprintf(f, "taint-usr=%s\n", yes_no(m->taint_usr)); - fprintf(f, "ready-sent=%s\n", yes_no(m->ready_sent)); - fprintf(f, "taint-logged=%s\n", yes_no(m->taint_logged)); - fprintf(f, "service-watchdogs=%s\n", yes_no(m->service_watchdogs)); + (void) serialize_item_format(f, "current-job-id", "%" PRIu32, m->current_job_id); + (void) serialize_item_format(f, "n-installed-jobs", "%u", m->n_installed_jobs); + (void) serialize_item_format(f, "n-failed-jobs", "%u", m->n_failed_jobs); + (void) serialize_bool(f, "taint-usr", m->taint_usr); + (void) serialize_bool(f, "ready-sent", m->ready_sent); + (void) serialize_bool(f, "taint-logged", m->taint_logged); + (void) serialize_bool(f, "service-watchdogs", m->service_watchdogs); + + t = show_status_to_string(m->show_status); + if (t) + (void) serialize_item(f, "show-status", t); if (m->log_level_overridden) - fprintf(f, "log-level-override=%i\n", log_get_max_level()); + (void) serialize_item_format(f, "log-level-override", "%i", log_get_max_level()); if (m->log_target_overridden) - fprintf(f, "log-target-override=%s\n", log_target_to_string(log_get_target())); + (void) serialize_item(f, "log-target-override", log_target_to_string(log_get_target())); for (q = 0; q < _MANAGER_TIMESTAMP_MAX; q++) { - /* The userspace and finish timestamps only apply to the host system, hence only serialize them there */ - if (in_initrd() && IN_SET(q, MANAGER_TIMESTAMP_USERSPACE, MANAGER_TIMESTAMP_FINISH)) + _cleanup_free_ char *joined = NULL; + + if (!manager_timestamp_shall_serialize(q)) continue; - t = manager_timestamp_to_string(q); - { - char field[strlen(t) + STRLEN("-timestamp") + 1]; - strcpy(stpcpy(field, t), "-timestamp"); - dual_timestamp_serialize(f, field, m->timestamps + q); - } + joined = strjoin(manager_timestamp_to_string(q), "-timestamp"); + if (!joined) + return log_oom(); + + (void) serialize_dual_timestamp(f, joined, m->timestamps + q); } if (!switching_root) - (void) serialize_environment(f, m->environment); + (void) serialize_strv(f, "env", m->client_environment); if (m->notify_fd >= 0) { - int copy; - - copy = fdset_put_dup(fds, m->notify_fd); - if (copy < 0) - return copy; + r = serialize_fd(f, fds, "notify-fd", m->notify_fd); + if (r < 0) + return r; - fprintf(f, "notify-fd=%i\n", copy); - fprintf(f, "notify-socket=%s\n", m->notify_socket); + (void) serialize_item(f, "notify-socket", m->notify_socket); } if (m->cgroups_agent_fd >= 0) { - int copy; - - copy = fdset_put_dup(fds, m->cgroups_agent_fd); - if (copy < 0) - return copy; - - fprintf(f, "cgroups-agent-fd=%i\n", copy); + r = serialize_fd(f, fds, "cgroups-agent-fd", m->cgroups_agent_fd); + if (r < 0) + return r; } if (m->user_lookup_fds[0] >= 0) { @@ -3066,13 +3173,13 @@ int manager_serialize(Manager *m, FILE *f, FDSet *fds, bool switching_root) { copy0 = fdset_put_dup(fds, m->user_lookup_fds[0]); if (copy0 < 0) - return copy0; + return log_error_errno(copy0, "Failed to add user lookup fd to serialization: %m"); copy1 = fdset_put_dup(fds, m->user_lookup_fds[1]); if (copy1 < 0) - return copy1; + return log_error_errno(copy1, "Failed to add user lookup fd to serialization: %m"); - fprintf(f, "user-lookup=%i %i\n", copy0, copy1); + (void) serialize_item_format(f, "user-lookup", "%i %i", copy0, copy1); } bus_track_serialize(m->subscribed, f, "subscribed"); @@ -3099,22 +3206,66 @@ int manager_serialize(Manager *m, FILE *f, FDSet *fds, bool switching_root) { fputc('\n', f); r = unit_serialize(u, f, fds, !switching_root); - if (r < 0) { - m->n_reloading--; + if (r < 0) return r; - } } - assert(m->n_reloading > 0); - m->n_reloading--; - r = fflush_and_check(f); if (r < 0) - return r; + return log_error_errno(r, "Failed to flush serialization: %m"); r = bus_fdset_add_all(m, fds); if (r < 0) - return r; + return log_error_errno(r, "Failed to add bus sockets to serialization: %m"); + + return 0; +} + +static int manager_deserialize_one_unit(Manager *m, const char *name, FILE *f, FDSet *fds) { + Unit *u; + int r; + + r = manager_load_unit(m, name, NULL, NULL, &u); + if (r < 0) { + if (r == -ENOMEM) + return r; + return log_notice_errno(r, "Failed to load unit \"%s\", skipping deserialization: %m", name); + } + + r = unit_deserialize(u, f, fds); + if (r < 0) { + if (r == -ENOMEM) + return r; + return log_notice_errno(r, "Failed to deserialize unit \"%s\", skipping: %m", name); + } + + return 0; +} + +static int manager_deserialize_units(Manager *m, FILE *f, FDSet *fds) { + _cleanup_free_ char *line = NULL; + const char *unit_name; + int r; + + for (;;) { + /* Start marker */ + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return log_error_errno(r, "Failed to read serialization line: %m"); + if (r == 0) + break; + + unit_name = strstrip(line); + + r = manager_deserialize_one_unit(m, unit_name, f, fds); + if (r == -ENOMEM) + return r; + if (r < 0) { + r = unit_deserialize_skip(f); + if (r < 0) + return r; + } + } return 0; } @@ -3127,32 +3278,30 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) { log_debug("Deserializing state..."); - m->n_reloading++; + /* If we are not in reload mode yet, enter it now. Not that this is recursive, a caller might already have + * increased it to non-zero, which is why we just increase it by one here and down again at the end of this + * call. */ + _cleanup_(manager_reloading_stopp) _unused_ Manager *reloading = manager_reloading_start(m); for (;;) { - char line[LINE_MAX]; + _cleanup_free_ char *line = NULL; const char *val, *l; - if (!fgets(line, sizeof(line), f)) { - if (feof(f)) - r = 0; - else - r = -errno; - - goto finish; - } + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return log_error_errno(r, "Failed to read serialization line: %m"); + if (r == 0) + break; - char_array_0(line); l = strstrip(line); - - if (l[0] == 0) + if (isempty(l)) /* end marker */ break; if ((val = startswith(l, "current-job-id="))) { uint32_t id; if (safe_atou32(val, &id) < 0) - log_notice("Failed to parse current job id value %s", val); + log_notice("Failed to parse current job id value '%s', ignoring.", val); else m->current_job_id = MAX(m->current_job_id, id); @@ -3160,7 +3309,7 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) { uint32_t n; if (safe_atou32(val, &n) < 0) - log_notice("Failed to parse installed jobs counter %s", val); + log_notice("Failed to parse installed jobs counter '%s', ignoring.", val); else m->n_installed_jobs += n; @@ -3168,7 +3317,7 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) { uint32_t n; if (safe_atou32(val, &n) < 0) - log_notice("Failed to parse failed jobs counter %s", val); + log_notice("Failed to parse failed jobs counter '%s', ignoring.", val); else m->n_failed_jobs += n; @@ -3177,7 +3326,7 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) { b = parse_boolean(val); if (b < 0) - log_notice("Failed to parse taint /usr flag %s", val); + log_notice("Failed to parse taint /usr flag '%s', ignoring.", val); else m->taint_usr = m->taint_usr || b; @@ -3186,7 +3335,7 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) { b = parse_boolean(val); if (b < 0) - log_notice("Failed to parse ready-sent flag %s", val); + log_notice("Failed to parse ready-sent flag '%s', ignoring.", val); else m->ready_sent = m->ready_sent || b; @@ -3195,7 +3344,7 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) { b = parse_boolean(val); if (b < 0) - log_notice("Failed to parse taint-logged flag %s", val); + log_notice("Failed to parse taint-logged flag '%s', ignoring.", val); else m->taint_logged = m->taint_logged || b; @@ -3204,10 +3353,19 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) { b = parse_boolean(val); if (b < 0) - log_notice("Failed to parse service-watchdogs flag %s", val); + log_notice("Failed to parse service-watchdogs flag '%s', ignoring.", val); else m->service_watchdogs = b; + } else if ((val = startswith(l, "show-status="))) { + ShowStatus s; + + s = show_status_from_string(val); + if (s < 0) + log_notice("Failed to parse show-status flag '%s', ignoring.", val); + else + manager_set_show_status(m, s); + } else if ((val = startswith(l, "log-level-override="))) { int level; @@ -3227,17 +3385,15 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) { manager_override_log_target(m, target); } else if (startswith(l, "env=")) { - r = deserialize_environment(&m->environment, l); - if (r == -ENOMEM) - goto finish; + r = deserialize_environment(l + 4, &m->client_environment); if (r < 0) - log_notice_errno(r, "Failed to parse environment entry: \"%s\": %m", l); + log_notice_errno(r, "Failed to parse environment entry: \"%s\", ignoring: %m", l); } else if ((val = startswith(l, "notify-fd="))) { int fd; if (safe_atoi(val, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd)) - log_notice("Failed to parse notify fd: \"%s\"", val); + log_notice("Failed to parse notify fd, ignoring: \"%s\"", val); else { m->notify_event_source = sd_event_source_unref(m->notify_event_source); safe_close(m->notify_fd); @@ -3245,22 +3401,15 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) { } } else if ((val = startswith(l, "notify-socket="))) { - char *n; - - n = strdup(val); - if (!n) { - r = -ENOMEM; - goto finish; - } - - free(m->notify_socket); - m->notify_socket = n; + r = free_and_strdup(&m->notify_socket, val); + if (r < 0) + return r; } else if ((val = startswith(l, "cgroups-agent-fd="))) { int fd; if (safe_atoi(val, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd)) - log_notice("Failed to parse cgroups agent fd: %s", val); + log_notice("Failed to parse cgroups agent fd, ignoring.: %s", val); else { m->cgroups_agent_event_source = sd_event_source_unref(m->cgroups_agent_event_source); safe_close(m->cgroups_agent_fd); @@ -3271,7 +3420,7 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) { int fd0, fd1; if (sscanf(val, "%i %i", &fd0, &fd1) != 2 || fd0 < 0 || fd1 < 0 || fd0 == fd1 || !fdset_contains(fds, fd0) || !fdset_contains(fds, fd1)) - log_notice("Failed to parse user lookup fd: %s", val); + log_notice("Failed to parse user lookup fd, ignoring: %s", val); else { m->user_lookup_event_source = sd_event_source_unref(m->user_lookup_event_source); safe_close_pair(m->user_lookup_fds); @@ -3290,7 +3439,8 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) { else if ((val = startswith(l, "subscribed="))) { if (strv_extend(&m->deserialized_subscribed, val) < 0) - log_oom(); + return -ENOMEM; + } else { ManagerTimestamp q; @@ -3305,100 +3455,50 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) { } if (q < _MANAGER_TIMESTAMP_MAX) /* found it */ - dual_timestamp_deserialize(val, m->timestamps + q); + (void) deserialize_dual_timestamp(val, m->timestamps + q); else if (!startswith(l, "kdbus-fd=")) /* ignore kdbus */ - log_notice("Unknown serialization item '%s'", l); - } - } - - for (;;) { - Unit *u; - char name[UNIT_NAME_MAX+2]; - const char* unit_name; - - /* Start marker */ - if (!fgets(name, sizeof(name), f)) { - if (feof(f)) - r = 0; - else - r = -errno; - - goto finish; - } - - char_array_0(name); - unit_name = strstrip(name); - - r = manager_load_unit(m, unit_name, NULL, NULL, &u); - if (r < 0) { - log_notice_errno(r, "Failed to load unit \"%s\", skipping deserialization: %m", unit_name); - if (r == -ENOMEM) - goto finish; - unit_deserialize_skip(f); - continue; - } - - r = unit_deserialize(u, f, fds); - if (r < 0) { - log_notice_errno(r, "Failed to deserialize unit \"%s\": %m", unit_name); - if (r == -ENOMEM) - goto finish; + log_notice("Unknown serialization item '%s', ignoring.", l); } } -finish: - if (ferror(f)) - r = -EIO; - - assert(m->n_reloading > 0); - m->n_reloading--; - - return r; -} - -static void manager_flush_finished_jobs(Manager *m) { - Job *j; - - while ((j = set_steal_first(m->pending_finished_jobs))) { - bus_job_send_removed_signal(j); - job_free(j); - } - - m->pending_finished_jobs = set_free(m->pending_finished_jobs); + return manager_deserialize_units(m, f, fds); } int manager_reload(Manager *m) { - int r, q; - _cleanup_fclose_ FILE *f = NULL; + _cleanup_(manager_reloading_stopp) Manager *reloading = NULL; _cleanup_fdset_free_ FDSet *fds = NULL; + _cleanup_fclose_ FILE *f = NULL; + int r; assert(m); r = manager_open_serialization(m, &f); if (r < 0) - return r; - - m->n_reloading++; - bus_manager_send_reloading(m, true); + return log_error_errno(r, "Failed to create serialization file: %m"); fds = fdset_new(); - if (!fds) { - m->n_reloading--; - return -ENOMEM; - } + if (!fds) + return log_oom(); + + /* We are officially in reload mode from here on. */ + reloading = manager_reloading_start(m); r = manager_serialize(m, f, fds, false); - if (r < 0) { - m->n_reloading--; + if (r < 0) return r; - } - if (fseeko(f, 0, SEEK_SET) < 0) { - m->n_reloading--; - return -errno; - } + if (fseeko(f, 0, SEEK_SET) < 0) + return log_error_errno(errno, "Failed to seek to beginning of serialization: %m"); + + /* 💀 This is the point of no return, from here on there is no way back. 💀 */ + reloading = NULL; + + bus_manager_send_reloading(m, true); + + /* Start by flushing out all jobs and units, all generated units, all runtime environments, all dynamic users + * and everything else that is worth flushing out. We'll get it all back from the serialization — if we need + * it.*/ - /* From here on there is no way back. */ manager_clear_jobs_and_units(m); lookup_paths_flush_generator(&m->lookup_paths); lookup_paths_free(&m->lookup_paths); @@ -3407,82 +3507,50 @@ int manager_reload(Manager *m) { m->uid_refs = hashmap_free(m->uid_refs); m->gid_refs = hashmap_free(m->gid_refs); - q = lookup_paths_init(&m->lookup_paths, m->unit_file_scope, 0, NULL); - if (q < 0 && r >= 0) - r = q; + r = lookup_paths_init(&m->lookup_paths, m->unit_file_scope, 0, NULL); + if (r < 0) + log_warning_errno(r, "Failed to initialize path lookup table, ignoring: %m"); - q = manager_run_environment_generators(m); - if (q < 0 && r >= 0) - r = q; + (void) manager_run_environment_generators(m); + (void) manager_run_generators(m); - /* Find new unit paths */ - q = manager_run_generators(m); - if (q < 0 && r >= 0) - r = q; + r = lookup_paths_reduce(&m->lookup_paths); + if (r < 0) + log_warning_errno(r, "Failed ot reduce unit file paths, ignoring: %m"); - lookup_paths_reduce(&m->lookup_paths); manager_build_unit_path_cache(m); - /* First, enumerate what we can from all config files */ + /* First, enumerate what we can from kernel and suchlike */ + manager_enumerate_perpetual(m); manager_enumerate(m); /* Second, deserialize our stored data */ - q = manager_deserialize(m, f, fds); - if (q < 0) { - log_error_errno(q, "Deserialization failed: %m"); - - if (r >= 0) - r = q; - } + r = manager_deserialize(m, f, fds); + if (r < 0) + log_warning_errno(r, "Deserialization failed, proceeding anyway: %m"); + /* We don't need the serialization anymore */ f = safe_fclose(f); - /* Re-register notify_fd as event source */ - q = manager_setup_notify(m); - if (q < 0 && r >= 0) - r = q; - - q = manager_setup_cgroups_agent(m); - if (q < 0 && r >= 0) - r = q; - - q = manager_setup_user_lookup_fd(m); - if (q < 0 && r >= 0) - r = q; + /* Re-register notify_fd as event source, and set up other sockets/communication channels we might need */ + (void) manager_setup_notify(m); + (void) manager_setup_cgroups_agent(m); + (void) manager_setup_user_lookup_fd(m); /* Third, fire things up! */ manager_coldplug(m); - /* Release any dynamic users no longer referenced */ - dynamic_user_vacuum(m, true); - - /* Release any references to UIDs/GIDs no longer referenced, and destroy any IPC owned by them */ - manager_vacuum_uid_refs(m); - manager_vacuum_gid_refs(m); - - exec_runtime_vacuum(m); + /* Clean up runtime objects no longer referenced */ + manager_vacuum(m); + /* Consider the reload process complete now. */ assert(m->n_reloading > 0); m->n_reloading--; - /* It might be safe to log to the journal now and connect to dbus */ - manager_recheck_journal(m); - manager_recheck_dbus(m); - - /* Let's finally catch up with any changes that took place while we were reloading/reexecing */ - manager_catchup(m); - - /* Sync current state of bus names with our set of listening units */ - q = manager_enqueue_sync_bus_names(m); - if (q < 0 && r >= 0) - r = q; - - if (!MANAGER_IS_RELOADING(m)) - manager_flush_finished_jobs(m); + manager_ready(m); m->send_reloading_done = true; - - return r; + return 0; } void manager_reset_failed(Manager *m) { @@ -3533,7 +3601,7 @@ static void manager_notify_finished(Manager *m) { char userspace[FORMAT_TIMESPAN_MAX], initrd[FORMAT_TIMESPAN_MAX], kernel[FORMAT_TIMESPAN_MAX], sum[FORMAT_TIMESPAN_MAX]; usec_t firmware_usec, loader_usec, kernel_usec, initrd_usec, userspace_usec, total_usec; - if (m->test_run_flags) + if (MANAGER_IS_TEST_RUN(m)) return; if (MANAGER_IS_SYSTEM(m) && detect_container() <= 0) { @@ -3726,9 +3794,14 @@ static const char* user_env_generator_binary_paths[] = { static int manager_run_environment_generators(Manager *m) { char **tmp = NULL; /* this is only used in the forked process, no cleanup here */ const char **paths; - void* args[] = {&tmp, &tmp, &m->environment}; + void* args[] = { + [STDOUT_GENERATE] = &tmp, + [STDOUT_COLLECT] = &tmp, + [STDOUT_CONSUME] = &m->transient_environment, + }; + int r; - if (m->test_run_flags && !(m->test_run_flags & MANAGER_TEST_RUN_ENV_GENERATORS)) + if (MANAGER_IS_TEST_RUN(m) && !(m->test_run_flags & MANAGER_TEST_RUN_ENV_GENERATORS)) return 0; paths = MANAGER_IS_SYSTEM(m) ? system_env_generator_binary_paths : user_env_generator_binary_paths; @@ -3736,7 +3809,10 @@ static int manager_run_environment_generators(Manager *m) { if (!generator_path_any(paths)) return 0; - return execute_directories(paths, DEFAULT_TIMEOUT_USEC, gather_environment, args, NULL); + RUN_WITH_UMASK(0022) + r = execute_directories(paths, DEFAULT_TIMEOUT_USEC, gather_environment, args, NULL, m->transient_environment); + + return r; } static int manager_run_generators(Manager *m) { @@ -3746,7 +3822,7 @@ static int manager_run_generators(Manager *m) { assert(m); - if (m->test_run_flags && !(m->test_run_flags & MANAGER_TEST_RUN_GENERATORS)) + if (MANAGER_IS_TEST_RUN(m) && !(m->test_run_flags & MANAGER_TEST_RUN_GENERATORS)) return 0; paths = generator_binary_paths(m->unit_file_scope); @@ -3757,8 +3833,10 @@ static int manager_run_generators(Manager *m) { return 0; r = lookup_paths_mkdir_generator(&m->lookup_paths); - if (r < 0) + if (r < 0) { + log_error_errno(r, "Failed to create generator directories: %m"); goto finish; + } argv[0] = NULL; /* Leave this empty, execute_directory() will fill something in */ argv[1] = m->lookup_paths.generator; @@ -3767,19 +3845,46 @@ static int manager_run_generators(Manager *m) { argv[4] = NULL; RUN_WITH_UMASK(0022) - execute_directories((const char* const*) paths, DEFAULT_TIMEOUT_USEC, - NULL, NULL, (char**) argv); + (void) execute_directories((const char* const*) paths, DEFAULT_TIMEOUT_USEC, + NULL, NULL, (char**) argv, m->transient_environment); + + r = 0; finish: lookup_paths_trim_generator(&m->lookup_paths); return r; } -int manager_environment_add(Manager *m, char **minus, char **plus) { +int manager_transient_environment_add(Manager *m, char **plus) { + char **a; + + assert(m); + + if (strv_isempty(plus)) + return 0; + + a = strv_env_merge(2, m->transient_environment, plus); + if (!a) + return log_oom(); + + sanitize_environment(a); + + return strv_free_and_replace(m->transient_environment, a); +} + +int manager_client_environment_modify( + Manager *m, + char **minus, + char **plus) { + char **a = NULL, **b = NULL, **l; + assert(m); - l = m->environment; + if (strv_isempty(minus) && strv_isempty(plus)) + return 0; + + l = m->client_environment; if (!strv_isempty(minus)) { a = strv_env_delete(l, 1, minus); @@ -3799,16 +3904,29 @@ int manager_environment_add(Manager *m, char **minus, char **plus) { l = b; } - if (m->environment != l) - strv_free(m->environment); + if (m->client_environment != l) + strv_free(m->client_environment); + if (a != l) strv_free(a); if (b != l) strv_free(b); - m->environment = l; - manager_sanitize_environment(m); + m->client_environment = sanitize_environment(l); + return 0; +} + +int manager_get_effective_environment(Manager *m, char ***ret) { + char **l; + + assert(m); + assert(ret); + + l = strv_env_merge(2, m->transient_environment, m->client_environment); + if (!l) + return -ENOMEM; + *ret = l; return 0; } @@ -3860,7 +3978,7 @@ static bool manager_journal_is_running(Manager *m) { assert(m); - if (m->test_run_flags != 0) + if (MANAGER_IS_TEST_RUN(m)) return false; /* If we are the user manager we can safely assume that the journal is up */ @@ -3915,7 +4033,7 @@ void manager_set_show_status(Manager *m, ShowStatus mode) { mode == SHOW_STATUS_NO ? "Disabling" : "Enabling"); m->show_status = mode; - if (mode > 0) + if (IN_SET(mode, SHOW_STATUS_TEMPORARY, SHOW_STATUS_YES)) (void) touch("/run/systemd/show-status"); else (void) unlink("/run/systemd/show-status"); @@ -3937,7 +4055,7 @@ static bool manager_get_show_status(Manager *m, StatusType type) { if (type != STATUS_TYPE_EMERGENCY && manager_check_ask_password(m) > 0) return false; - return m->show_status > 0; + return IN_SET(m->show_status, SHOW_STATUS_TEMPORARY, SHOW_STATUS_YES); } const char *manager_get_confirm_spawn(Manager *m) { @@ -4024,7 +4142,7 @@ void manager_status_printf(Manager *m, StatusType type, const char *status, cons return; va_start(ap, format); - status_vprintf(status, true, type == STATUS_TYPE_EPHEMERAL, format, ap); + status_vprintf(status, SHOW_STATUS_ELLIPSIZE|(type == STATUS_TYPE_EPHEMERAL ? SHOW_STATUS_EPHEMERAL : 0), format, ap); va_end(ap); } @@ -4278,7 +4396,7 @@ static void manager_serialize_uid_refs_internal( if (!(c & DESTROY_IPC_FLAG)) continue; - fprintf(f, "%s=" UID_FMT "\n", field_name, uid); + (void) serialize_item_format(f, field_name, UID_FMT, uid); } } @@ -4323,7 +4441,7 @@ static void manager_deserialize_uid_refs_one_internal( r = hashmap_replace(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c)); if (r < 0) { - log_debug("Failed to add UID reference entry"); + log_debug_errno(r, "Failed to add UID reference entry: %m"); return; } } @@ -4517,6 +4635,14 @@ void manager_restore_original_log_target(Manager *m) { m->log_target_overridden = false; } +ManagerTimestamp manager_timestamp_initrd_mangle(ManagerTimestamp s) { + if (in_initrd() && + s >= MANAGER_TIMESTAMP_SECURITY_START && + s <= MANAGER_TIMESTAMP_UNITS_LOAD_FINISH) + return s - MANAGER_TIMESTAMP_SECURITY_START + MANAGER_TIMESTAMP_INITRD_SECURITY_START; + return s; +} + static const char *const manager_state_table[_MANAGER_STATE_MAX] = { [MANAGER_INITIALIZING] = "initializing", [MANAGER_STARTING] = "starting", @@ -4541,6 +4667,12 @@ static const char *const manager_timestamp_table[_MANAGER_TIMESTAMP_MAX] = { [MANAGER_TIMESTAMP_GENERATORS_FINISH] = "generators-finish", [MANAGER_TIMESTAMP_UNITS_LOAD_START] = "units-load-start", [MANAGER_TIMESTAMP_UNITS_LOAD_FINISH] = "units-load-finish", + [MANAGER_TIMESTAMP_INITRD_SECURITY_START] = "initrd-security-start", + [MANAGER_TIMESTAMP_INITRD_SECURITY_FINISH] = "initrd-security-finish", + [MANAGER_TIMESTAMP_INITRD_GENERATORS_START] = "initrd-generators-start", + [MANAGER_TIMESTAMP_INITRD_GENERATORS_FINISH] = "initrd-generators-finish", + [MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_START] = "initrd-units-load-start", + [MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_FINISH] = "initrd-units-load-finish", }; DEFINE_STRING_TABLE_LOOKUP(manager_timestamp, ManagerTimestamp); diff --git a/src/core/manager.h b/src/core/manager.h index ea5d425030..bce8020cfd 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -5,6 +5,7 @@ #include <stdio.h> #include "sd-bus.h" +#include "sd-device.h" #include "sd-event.h" #include "cgroup-util.h" @@ -22,6 +23,8 @@ typedef struct Unit Unit; typedef struct Manager Manager; +/* An externally visible state. We don't actually maintain this as state variable, but derive it from various fields + * when requested */ typedef enum ManagerState { MANAGER_INITIALIZING, MANAGER_STARTING, @@ -33,7 +36,7 @@ typedef enum ManagerState { _MANAGER_STATE_INVALID = -1 } ManagerState; -typedef enum ManagerExitCode { +typedef enum ManagerObjective { MANAGER_OK, MANAGER_EXIT, MANAGER_RELOAD, @@ -43,9 +46,9 @@ typedef enum ManagerExitCode { MANAGER_HALT, MANAGER_KEXEC, MANAGER_SWITCH_ROOT, - _MANAGER_EXIT_CODE_MAX, - _MANAGER_EXIT_CODE_INVALID = -1 -} ManagerExitCode; + _MANAGER_OBJECTIVE_MAX, + _MANAGER_OBJECTIVE_INVALID = -1 +} ManagerObjective; typedef enum StatusType { STATUS_TYPE_EPHEMERAL, @@ -53,6 +56,27 @@ typedef enum StatusType { STATUS_TYPE_EMERGENCY, } StatusType; +/* Notes: + * 1. TIMESTAMP_FIRMWARE, TIMESTAMP_LOADER, TIMESTAMP_KERNEL, TIMESTAMP_INITRD, + * TIMESTAMP_SECURITY_START, and TIMESTAMP_SECURITY_FINISH are set only when + * the manager is system and not running under container environment. + * + * 2. The monotonic timestamp of TIMESTAMP_KERNEL is always zero. + * + * 3. The realtime timestamp of TIMESTAMP_KERNEL will be unset if the system does not + * have RTC. + * + * 4. TIMESTAMP_FIRMWARE and TIMESTAMP_LOADER will be unset if the system does not + * have RTC, or systemd is built without EFI support. + * + * 5. The monotonic timestamps of TIMESTAMP_FIRMWARE and TIMESTAMP_LOADER are stored as + * negative of the actual value. + * + * 6. TIMESTAMP_USERSPACE is the timestamp of when the manager was started. + * + * 7. TIMESTAMP_INITRD_* are set only when the system is booted with an initrd. + */ + typedef enum ManagerTimestamp { MANAGER_TIMESTAMP_FIRMWARE, MANAGER_TIMESTAMP_LOADER, @@ -67,6 +91,13 @@ typedef enum ManagerTimestamp { MANAGER_TIMESTAMP_GENERATORS_FINISH, MANAGER_TIMESTAMP_UNITS_LOAD_START, MANAGER_TIMESTAMP_UNITS_LOAD_FINISH, + + MANAGER_TIMESTAMP_INITRD_SECURITY_START, + MANAGER_TIMESTAMP_INITRD_SECURITY_FINISH, + MANAGER_TIMESTAMP_INITRD_GENERATORS_START, + MANAGER_TIMESTAMP_INITRD_GENERATORS_FINISH, + MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_START, + MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_FINISH, _MANAGER_TIMESTAMP_MAX, _MANAGER_TIMESTAMP_INVALID = -1, } ManagerTimestamp; @@ -77,14 +108,15 @@ typedef enum ManagerTimestamp { #include "show-status.h" #include "unit-name.h" -enum { - /* 0 = run normally */ - MANAGER_TEST_RUN_MINIMAL = 1 << 1, /* create basic data structures */ - MANAGER_TEST_RUN_BASIC = 1 << 2, /* interact with the environment */ - MANAGER_TEST_RUN_ENV_GENERATORS = 1 << 3, /* also run env generators */ - MANAGER_TEST_RUN_GENERATORS = 1 << 4, /* also run unit generators */ +typedef enum ManagerTestRunFlags { + MANAGER_TEST_NORMAL = 0, /* run normally */ + MANAGER_TEST_RUN_MINIMAL = 1 << 0, /* create basic data structures */ + MANAGER_TEST_RUN_BASIC = 1 << 1, /* interact with the environment */ + MANAGER_TEST_RUN_ENV_GENERATORS = 1 << 2, /* also run env generators */ + MANAGER_TEST_RUN_GENERATORS = 1 << 3, /* also run unit generators */ MANAGER_TEST_FULL = MANAGER_TEST_RUN_BASIC | MANAGER_TEST_RUN_ENV_GENERATORS | MANAGER_TEST_RUN_GENERATORS, -}; +} ManagerTestRunFlags; + assert_cc((MANAGER_TEST_FULL & UINT8_MAX) == MANAGER_TEST_FULL); struct Manager { @@ -130,6 +162,9 @@ struct Manager { /* Target units whose default target dependencies haven't been set yet */ LIST_HEAD(Unit, target_deps_queue); + /* Units that might be subject to StopWhenUnneeded= clean-up */ + LIST_HEAD(Unit, stop_when_unneeded_queue); + sd_event *event; /* This maps PIDs we care about to units that are interested in. We allow multiple units to he interested in @@ -177,18 +212,16 @@ struct Manager { LookupPaths lookup_paths; Set *unit_path_cache; - char **environment; + char **transient_environment; /* The environment, as determined from config files, kernel cmdline and environment generators */ + char **client_environment; /* Environment variables created by clients through the bus API */ usec_t runtime_watchdog; usec_t shutdown_watchdog; dual_timestamp timestamps[_MANAGER_TIMESTAMP_MAX]; - struct udev* udev; - /* Data specific to the device subsystem */ - struct udev_monitor* udev_monitor; - sd_event_source *udev_event_source; + sd_device_monitor *device_monitor; Hashmap *devices_by_sysfs; /* Data specific to the mount subsystem */ @@ -215,7 +248,7 @@ struct Manager { /* This is used during reloading: before the reload we queue * the reply message here, and afterwards we send it */ - sd_bus_message *queued_message; + sd_bus_message *pending_reload_message; Hashmap *watch_bus; /* D-Bus names => Unit object n:1 */ @@ -250,11 +283,10 @@ struct Manager { usec_t etc_localtime_mtime; bool etc_localtime_accessible:1; - /* Flags */ - ManagerExitCode exit_code:5; + ManagerObjective objective:5; + /* Flags */ bool dispatching_load_queue:1; - bool dispatching_dbus_queue:1; bool taint_usr:1; @@ -267,7 +299,7 @@ struct Manager { /* Have we ever changed the "kernel.pid_max" sysctl? */ bool sysctl_pid_max_changed:1; - unsigned test_run_flags:8; + ManagerTestRunFlags test_run_flags:8; /* If non-zero, exit with the following value when the systemd * process terminate. Useful for containers: systemd-nspawn could get @@ -305,9 +337,6 @@ struct Manager { /* non-zero if we are reloading or reexecuting, */ int n_reloading; - /* A set which contains all jobs that started before reload and finished - * during it */ - Set *pending_finished_jobs; unsigned n_installed_jobs; unsigned n_failed_jobs; @@ -375,10 +404,12 @@ struct Manager { #define MANAGER_IS_FINISHED(m) (dual_timestamp_is_set((m)->timestamps + MANAGER_TIMESTAMP_FINISH)) -/* The exit code is set to OK as soon as we enter the main loop, and set otherwise as soon as we are done with it */ -#define MANAGER_IS_RUNNING(m) ((m)->exit_code == MANAGER_OK) +/* The objective is set to OK as soon as we enter the main loop, and set otherwise as soon as we are done with it */ +#define MANAGER_IS_RUNNING(m) ((m)->objective == MANAGER_OK) -int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **m); +#define MANAGER_IS_TEST_RUN(m) ((m)->test_run_flags != 0) + +int manager_new(UnitFileScope scope, ManagerTestRunFlags test_run_flags, Manager **m); Manager* manager_free(Manager *m); DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free); @@ -408,7 +439,11 @@ void manager_clear_jobs(Manager *m); unsigned manager_dispatch_load_queue(Manager *m); -int manager_environment_add(Manager *m, char **minus, char **plus); +int manager_default_environment(Manager *m); +int manager_transient_environment_add(Manager *m, char **plus); +int manager_client_environment_modify(Manager *m, char **minus, char **plus); +int manager_get_effective_environment(Manager *m, char ***ret); + int manager_set_default_rlimits(Manager *m, struct rlimit **default_rlimit); int manager_loop(Manager *m); @@ -479,3 +514,4 @@ void manager_disable_confirm_spawn(void); const char *manager_timestamp_to_string(ManagerTimestamp m) _const_; ManagerTimestamp manager_timestamp_from_string(const char *s) _pure_; +ManagerTimestamp manager_timestamp_initrd_mangle(ManagerTimestamp s); diff --git a/src/core/meson.build b/src/core/meson.build index 3852c5e9d8..450d6f72a9 100644 --- a/src/core/meson.build +++ b/src/core/meson.build @@ -5,6 +5,8 @@ libcore_la_sources = ''' audit-fd.h automount.c automount.h + bpf-devices.c + bpf-devices.h bpf-firewall.c bpf-firewall.h cgroup.c diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c index 16880e6157..3ce6164b06 100644 --- a/src/core/mount-setup.c +++ b/src/core/mount-setup.c @@ -11,7 +11,9 @@ #include "bus-util.h" #include "cgroup-util.h" #include "dev-setup.h" +#include "dirent-util.h" #include "efivars.h" +#include "fd-util.h" #include "fileio.h" #include "fs-util.h" #include "label.h" @@ -20,7 +22,7 @@ #include "missing.h" #include "mkdir.h" #include "mount-setup.h" -#include "mount-util.h" +#include "mountpoint-util.h" #include "path-util.h" #include "set.h" #include "smack-util.h" @@ -229,76 +231,105 @@ int mount_setup_early(void) { return mount_points_setup(N_EARLY_MOUNT, false); } -int mount_cgroup_controllers(char ***join_controllers) { +static const char *join_with(const char *controller) { + + static const char* const pairs[] = { + "cpu", "cpuacct", + "net_cls", "net_prio", + NULL + }; + + const char *const *x, *const *y; + + assert(controller); + + /* This will lookup which controller to mount another controller with. Input is a controller name, and output + * is the other controller name. The function works both ways: you can input one and get the other, and input + * the other to get the one. */ + + STRV_FOREACH_PAIR(x, y, pairs) { + if (streq(controller, *x)) + return *y; + if (streq(controller, *y)) + return *x; + } + + return NULL; +} + +static int symlink_controller(const char *target, const char *alias) { + const char *a; + int r; + + assert(target); + assert(alias); + + a = strjoina("/sys/fs/cgroup/", alias); + + r = symlink_idempotent(target, a, false); + if (r < 0) + return log_error_errno(r, "Failed to create symlink %s: %m", a); + +#ifdef SMACK_RUN_LABEL + const char *p; + + p = strjoina("/sys/fs/cgroup/", target); + + r = mac_smack_copy(a, p); + if (r < 0 && r != -EOPNOTSUPP) + return log_error_errno(r, "Failed to copy smack label from %s to %s: %m", p, a); +#endif + + return 0; +} + +int mount_cgroup_controllers(void) { _cleanup_set_free_free_ Set *controllers = NULL; - bool has_argument = !!join_controllers; int r; if (!cg_is_legacy_wanted()) return 0; /* Mount all available cgroup controllers that are built into the kernel. */ - - if (!has_argument) - /* The defaults: - * mount "cpu" + "cpuacct" together, and "net_cls" + "net_prio". - * - * We'd like to add "cpuset" to the mix, but "cpuset" doesn't really - * work for groups with no initialized attributes. - */ - join_controllers = (char**[]) { - STRV_MAKE("cpu", "cpuacct"), - STRV_MAKE("net_cls", "net_prio"), - NULL, - }; - r = cg_kernel_controllers(&controllers); if (r < 0) return log_error_errno(r, "Failed to enumerate cgroup controllers: %m"); for (;;) { _cleanup_free_ char *options = NULL, *controller = NULL, *where = NULL; + const char *other_controller; MountPoint p = { .what = "cgroup", .type = "cgroup", .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV, .mode = MNT_IN_CONTAINER, }; - char ***k = NULL; controller = set_steal_first(controllers); if (!controller) break; - for (k = join_controllers; *k; k++) - if (strv_find(*k, controller)) - break; - - if (*k) { - char **i, **j; - - for (i = *k, j = *k; *i; i++) { - - if (!streq(*i, controller)) { - _cleanup_free_ char *t; - - t = set_remove(controllers, *i); - if (!t) { - if (has_argument) - free(*i); - continue; - } - } - - *(j++) = *i; + /* Check if we shall mount this together with another controller */ + other_controller = join_with(controller); + if (other_controller) { + _cleanup_free_ char *c = NULL; + + /* Check if the other controller is actually available in the kernel too */ + c = set_remove(controllers, other_controller); + if (c) { + + /* Join the two controllers into one string, and maintain a stable ordering */ + if (strcmp(controller, other_controller) < 0) + options = strjoin(controller, ",", other_controller); + else + options = strjoin(other_controller, ",", controller); + if (!options) + return log_oom(); } + } - *j = NULL; - - options = strv_join(*k, ","); - if (!options) - return log_oom(); - } else + /* The simple case, where there's only one controller to mount together */ + if (!options) options = TAKE_PTR(controller); where = strappend("/sys/fs/cgroup/", options); @@ -312,35 +343,14 @@ int mount_cgroup_controllers(char ***join_controllers) { if (r < 0) return r; - if (r > 0 && *k) { - char **i; - - for (i = *k; *i; i++) { - _cleanup_free_ char *t = NULL; - - t = strappend("/sys/fs/cgroup/", *i); - if (!t) - return log_oom(); - - r = symlink(options, t); - if (r >= 0) { -#ifdef SMACK_RUN_LABEL - _cleanup_free_ char *src; - src = strappend("/sys/fs/cgroup/", options); - if (!src) - return log_oom(); - r = mac_smack_copy(t, src); - if (r < 0 && r != -EOPNOTSUPP) - return log_error_errno(r, "Failed to copy smack label from %s to %s: %m", src, t); -#endif - } else if (errno != EEXIST) - return log_error_errno(errno, "Failed to create symlink %s: %m", t); - } - } + /* Create symlinks from the individual controller names, in case we have a joined mount */ + if (controller) + (void) symlink_controller(options, controller); + if (other_controller) + (void) symlink_controller(options, other_controller); } - /* Now that we mounted everything, let's make the tmpfs the - * cgroup file systems are mounted into read-only. */ + /* Now that we mounted everything, let's make the tmpfs the cgroup file systems are mounted into read-only. */ (void) mount("tmpfs", "/sys/fs/cgroup", "tmpfs", MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755"); return 0; @@ -396,6 +406,100 @@ static int relabel_cgroup_filesystems(void) { return 0; } + +static int relabel_extra(void) { + _cleanup_closedir_ DIR *d = NULL; + int r, c = 0; + + /* Support for relabelling additional files or directories after loading the policy. For this, code in the + * initrd simply has to drop in *.relabel files into /run/systemd/relabel-extra.d/. We'll read all such files + * expecting one absolute path by line and will relabel each (and everyone below that in case the path refers + * to a directory). These drop-in files are supposed to be absolutely minimal, and do not understand comments + * and such. After the operation succeeded the files are removed, and the drop-in directory as well, if + * possible. + */ + + d = opendir("/run/systemd/relabel-extra.d/"); + if (!d) { + if (errno == ENOENT) + return 0; + + return log_warning_errno(errno, "Failed to open /run/systemd/relabel-extra.d/, ignoring: %m"); + } + + for (;;) { + _cleanup_fclose_ FILE *f = NULL; + _cleanup_close_ int fd = -1; + struct dirent *de; + + errno = 0; + de = readdir_no_dot(d); + if (!de) { + if (errno != 0) + return log_error_errno(errno, "Failed read directory /run/systemd/relabel-extra.d/, ignoring: %m"); + break; + } + + if (hidden_or_backup_file(de->d_name)) + continue; + + if (!endswith(de->d_name, ".relabel")) + continue; + + if (!IN_SET(de->d_type, DT_REG, DT_UNKNOWN)) + continue; + + fd = openat(dirfd(d), de->d_name, O_RDONLY|O_CLOEXEC|O_NONBLOCK); + if (fd < 0) { + log_warning_errno(errno, "Failed to open /run/systemd/relabel-extra.d/%s, ignoring: %m", de->d_name); + continue; + } + + f = fdopen(fd, "r"); + if (!f) { + log_warning_errno(errno, "Failed to convert file descriptor into file object, ignoring: %m"); + continue; + } + TAKE_FD(fd); + + for (;;) { + _cleanup_free_ char *line = NULL; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) { + log_warning_errno(r, "Failed to read from /run/systemd/relabel-extra.d/%s, ignoring: %m", de->d_name); + break; + } + if (r == 0) /* EOF */ + break; + + path_simplify(line, true); + + if (!path_is_normalized(line)) { + log_warning("Path to relabel is not normalized, ignoring: %s", line); + continue; + } + + if (!path_is_absolute(line)) { + log_warning("Path to relabel is not absolute, ignoring: %s", line); + continue; + } + + log_debug("Relabelling additional file/directory '%s'.", line); + (void) nftw(line, nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL); + c++; + } + + if (unlinkat(dirfd(d), de->d_name, 0) < 0) + log_warning_errno(errno, "Failed to remove /run/systemd/relabel-extra.d/%s, ignoring: %m", de->d_name); + } + + /* Remove when we completing things. */ + if (rmdir("/run/systemd/relabel-extra.d") < 0) + log_warning_errno(errno, "Failed to remove /run/systemd/relabel-extra.d/ directory: %m"); + + return c; +} #endif int mount_setup(bool loaded_policy) { @@ -413,20 +517,22 @@ int mount_setup(bool loaded_policy) { if (loaded_policy) { usec_t before_relabel, after_relabel; char timespan[FORMAT_TIMESPAN_MAX]; + const char *i; + int n_extra; before_relabel = now(CLOCK_MONOTONIC); - (void) nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL); - (void) nftw("/dev/shm", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL); - (void) nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL); + FOREACH_STRING(i, "/dev", "/dev/shm", "/run") + (void) nftw(i, nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL); - r = relabel_cgroup_filesystems(); - if (r < 0) - return r; + (void) relabel_cgroup_filesystems(); + + n_extra = relabel_extra(); after_relabel = now(CLOCK_MONOTONIC); - log_info("Relabelled /dev, /run and /sys/fs/cgroup in %s.", + log_info("Relabelled /dev, /dev/shm, /run, /sys/fs/cgroup%s in %s.", + n_extra > 0 ? ", additional files" : "", format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel, 0)); } #endif @@ -452,20 +558,9 @@ int mount_setup(bool loaded_policy) { (void) mkdir_label("/run/systemd", 0755); (void) mkdir_label("/run/systemd/system", 0755); - /* Set up inaccessible (and empty) file nodes of all types */ - (void) mkdir_label("/run/systemd/inaccessible", 0000); - (void) mknod("/run/systemd/inaccessible/reg", S_IFREG | 0000, 0); - (void) mkdir_label("/run/systemd/inaccessible/dir", 0000); - (void) mkfifo("/run/systemd/inaccessible/fifo", 0000); - (void) mknod("/run/systemd/inaccessible/sock", S_IFSOCK | 0000, 0); - - /* The following two are likely to fail if we lack the privs for it (for example in an userns environment, if - * CAP_SYS_MKNOD is missing, or if a device node policy prohibit major/minor of 0 device nodes to be - * created). But that's entirely fine. Consumers of these files should carry fallback to use a different node - * then, for example /run/systemd/inaccessible/sock, which is close enough in behaviour and semantics for most - * uses. */ - (void) mknod("/run/systemd/inaccessible/chr", S_IFCHR | 0000, makedev(0, 0)); - (void) mknod("/run/systemd/inaccessible/blk", S_IFBLK | 0000, makedev(0, 0)); + /* Also create /run/systemd/inaccessible nodes, so that we always have something to mount inaccessible nodes + * from. */ + (void) make_inaccessible_nodes(NULL, UID_INVALID, GID_INVALID); return 0; } diff --git a/src/core/mount-setup.h b/src/core/mount-setup.h index 43cd8908de..b4ca2cf4b4 100644 --- a/src/core/mount-setup.h +++ b/src/core/mount-setup.h @@ -6,7 +6,7 @@ int mount_setup_early(void); int mount_setup(bool loaded_policy); -int mount_cgroup_controllers(char ***join_controllers); +int mount_cgroup_controllers(void); bool mount_point_is_api(const char *path); bool mount_point_ignore(const char *path); diff --git a/src/core/mount.c b/src/core/mount.c index 21437dad08..ead9bc1f44 100644 --- a/src/core/mount.c +++ b/src/core/mount.c @@ -11,6 +11,7 @@ #include "alloc-util.h" #include "dbus-mount.h" +#include "dbus-unit.h" #include "device.h" #include "escape.h" #include "exit-status.h" @@ -20,11 +21,12 @@ #include "manager.h" #include "mkdir.h" #include "mount-setup.h" -#include "mount-util.h" #include "mount.h" +#include "mountpoint-util.h" #include "parse-util.h" #include "path-util.h" #include "process-util.h" +#include "serialize.h" #include "special.h" #include "string-table.h" #include "string-util.h" @@ -66,22 +68,18 @@ static bool MOUNT_STATE_WITH_PROCESS(MountState state) { MOUNT_UNMOUNTING_SIGKILL); } -static bool mount_needs_network(const char *options, const char *fstype) { - if (fstab_test_option(options, "_netdev\0")) +static bool mount_is_network(const MountParameters *p) { + assert(p); + + if (fstab_test_option(p->options, "_netdev\0")) return true; - if (fstype && fstype_is_network(fstype)) + if (p->fstype && fstype_is_network(p->fstype)) return true; return false; } -static bool mount_is_network(const MountParameters *p) { - assert(p); - - return mount_needs_network(p->options, p->fstype); -} - static bool mount_is_loop(const MountParameters *p) { assert(p); @@ -127,11 +125,11 @@ static bool mount_is_bound_to_device(const Mount *m) { return fstab_test_option(p->options, "x-systemd.device-bound\0"); } -static bool needs_quota(const MountParameters *p) { +static bool mount_needs_quota(const MountParameters *p) { assert(p); - /* Quotas are not enabled on network filesystems, - * but we want them, for example, on storage connected via iscsi */ + /* Quotas are not enabled on network filesystems, but we want them, for example, on storage connected via + * iscsi. We hence don't use mount_is_network() here, as that would also return true for _netdev devices. */ if (p->fstype && fstype_is_network(p->fstype)) return false; @@ -209,11 +207,9 @@ static void mount_unwatch_control_pid(Mount *m) { static void mount_parameters_done(MountParameters *p) { assert(p); - free(p->what); - free(p->options); - free(p->fstype); - - p->what = p->options = p->fstype = NULL; + p->what = mfree(p->what); + p->options = mfree(p->options); + p->fstype = mfree(p->fstype); } static void mount_done(Unit *u) { @@ -316,7 +312,7 @@ static int mount_add_mount_dependencies(Mount *m) { } static int mount_add_device_dependencies(Mount *m) { - bool device_wants_mount = false; + bool device_wants_mount; UnitDependencyMask mask; MountParameters *p; UnitDependency dep; @@ -346,8 +342,8 @@ static int mount_add_device_dependencies(Mount *m) { if (path_equal(m->where, "/")) return 0; - if (mount_is_auto(p) && !mount_is_automount(p) && MANAGER_IS_SYSTEM(UNIT(m)->manager)) - device_wants_mount = true; + device_wants_mount = + mount_is_auto(p) && !mount_is_automount(p) && MANAGER_IS_SYSTEM(UNIT(m)->manager); /* Mount units from /proc/self/mountinfo are not bound to devices * by default since they're subject to races when devices are @@ -379,16 +375,16 @@ static int mount_add_quota_dependencies(Mount *m) { if (!p) return 0; - if (!needs_quota(p)) + if (!mount_needs_quota(p)) return 0; mask = m->from_fragment ? UNIT_DEPENDENCY_FILE : UNIT_DEPENDENCY_MOUNTINFO_IMPLICIT; - r = unit_add_two_dependencies_by_name(UNIT(m), UNIT_BEFORE, UNIT_WANTS, SPECIAL_QUOTACHECK_SERVICE, NULL, true, mask); + r = unit_add_two_dependencies_by_name(UNIT(m), UNIT_BEFORE, UNIT_WANTS, SPECIAL_QUOTACHECK_SERVICE, true, mask); if (r < 0) return r; - r = unit_add_two_dependencies_by_name(UNIT(m), UNIT_BEFORE, UNIT_WANTS, SPECIAL_QUOTAON_SERVICE, NULL, true, mask); + r = unit_add_two_dependencies_by_name(UNIT(m), UNIT_BEFORE, UNIT_WANTS, SPECIAL_QUOTAON_SERVICE, true, mask); if (r < 0) return r; @@ -427,10 +423,10 @@ static bool mount_is_extrinsic(Mount *m) { } static int mount_add_default_dependencies(Mount *m) { + const char *after, *before; UnitDependencyMask mask; - int r; MountParameters *p; - const char *after; + int r; assert(m); @@ -456,7 +452,7 @@ static int mount_add_default_dependencies(Mount *m) { * network.target, so that they are shut down only * after this mount unit is stopped. */ - r = unit_add_dependency_by_name(UNIT(m), UNIT_AFTER, SPECIAL_NETWORK_TARGET, NULL, true, mask); + r = unit_add_dependency_by_name(UNIT(m), UNIT_AFTER, SPECIAL_NETWORK_TARGET, true, mask); if (r < 0) return r; @@ -467,25 +463,32 @@ static int mount_add_default_dependencies(Mount *m) { * whose purpose it is to delay this until the network * is "up". */ - r = unit_add_two_dependencies_by_name(UNIT(m), UNIT_WANTS, UNIT_AFTER, SPECIAL_NETWORK_ONLINE_TARGET, NULL, true, mask); + r = unit_add_two_dependencies_by_name(UNIT(m), UNIT_WANTS, UNIT_AFTER, SPECIAL_NETWORK_ONLINE_TARGET, true, mask); if (r < 0) return r; after = SPECIAL_REMOTE_FS_PRE_TARGET; - } else + before = SPECIAL_REMOTE_FS_TARGET; + } else { after = SPECIAL_LOCAL_FS_PRE_TARGET; + before = SPECIAL_LOCAL_FS_TARGET; + } + + r = unit_add_dependency_by_name(UNIT(m), UNIT_BEFORE, before, true, mask); + if (r < 0) + return r; - r = unit_add_dependency_by_name(UNIT(m), UNIT_AFTER, after, NULL, true, mask); + r = unit_add_dependency_by_name(UNIT(m), UNIT_AFTER, after, true, mask); if (r < 0) return r; - r = unit_add_two_dependencies_by_name(UNIT(m), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_UMOUNT_TARGET, NULL, true, mask); + r = unit_add_two_dependencies_by_name(UNIT(m), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_UMOUNT_TARGET, true, mask); if (r < 0) return r; /* If this is a tmpfs mount then we have to unmount it before we try to deactivate swaps */ if (streq_ptr(p->fstype, "tmpfs")) { - r = unit_add_dependency_by_name(UNIT(m), UNIT_AFTER, SPECIAL_SWAP_TARGET, NULL, true, mask); + r = unit_add_dependency_by_name(UNIT(m), UNIT_AFTER, SPECIAL_SWAP_TARGET, true, mask); if (r < 0) return r; } @@ -540,6 +543,10 @@ static int mount_add_extras(Mount *m) { assert(m); + /* Note: this call might be called after we already have been loaded once (and even when it has already been + * activated), in case data from /proc/self/mountinfo has changed. This means all code here needs to be ready + * to run with an already set up unit. */ + if (u->fragment_path) m->from_fragment = true; @@ -609,28 +616,33 @@ static int mount_load_root_mount(Unit *u) { static int mount_load(Unit *u) { Mount *m = MOUNT(u); - int r; + int r, q, w; assert(u); assert(u->load_state == UNIT_STUB); r = mount_load_root_mount(u); - if (r < 0) - return r; if (m->from_proc_self_mountinfo || u->perpetual) - r = unit_load_fragment_and_dropin_optional(u); + q = unit_load_fragment_and_dropin_optional(u); else - r = unit_load_fragment_and_dropin(u); + q = unit_load_fragment_and_dropin(u); + + /* Add in some extras. Note we do this in all cases (even if we failed to load the unit) when announced by the + * kernel, because we need some things to be set up no matter what when the kernel establishes a mount and thus + * we need to update the state in our unit to track it. After all, consider that we don't allow changing the + * 'slice' field for a unit once it is active. */ + if (u->load_state == UNIT_LOADED || m->from_proc_self_mountinfo || u->perpetual) + w = mount_add_extras(m); + else + w = 0; + if (r < 0) return r; - - /* This is a new unit? Then let's add in some extras */ - if (u->load_state == UNIT_LOADED) { - r = mount_add_extras(m); - if (r < 0) - return r; - } + if (q < 0) + return q; + if (w < 0) + return w; return mount_verify(m); } @@ -639,6 +651,9 @@ static void mount_set_state(Mount *m, MountState state) { MountState old_state; assert(m); + if (m->state != state) + bus_unit_send_pending_change_signal(UNIT(m), false); + old_state = m->state; m->state = state; @@ -746,11 +761,12 @@ static void mount_dump(Unit *u, FILE *f, const char *prefix) { static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) { - ExecParameters exec_params = { - .flags = EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN, - .stdin_fd = -1, - .stdout_fd = -1, - .stderr_fd = -1, + _cleanup_(exec_params_clear) ExecParameters exec_params = { + .flags = EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN, + .stdin_fd = -1, + .stdout_fd = -1, + .stderr_fd = -1, + .exec_fd = -1, }; pid_t pid; int r; @@ -767,7 +783,9 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) { if (r < 0) return r; - unit_set_exec_params(UNIT(m), &exec_params); + r = unit_set_exec_params(UNIT(m), &exec_params); + if (r < 0) + return r; r = exec_spawn(UNIT(m), c, @@ -795,9 +813,7 @@ static void mount_enter_dead(Mount *m, MountResult f) { if (m->result == MOUNT_SUCCESS) m->result = f; - if (m->result != MOUNT_SUCCESS) - log_unit_warning(UNIT(m), "Failed with result '%s'.", mount_result_to_string(m->result)); - + unit_log_result(UNIT(m), m->result == MOUNT_SUCCESS, mount_result_to_string(m->result)); mount_set_state(m, m->result != MOUNT_SUCCESS ? MOUNT_FAILED : MOUNT_DEAD); m->exec_runtime = exec_runtime_unref(m->exec_runtime, true); @@ -938,7 +954,6 @@ static void mount_enter_mounting(Mount *m) { (void) mkdir_p_label(m->where, m->directory_mode); unit_warn_if_dir_nonempty(UNIT(m), m->where); - unit_warn_leftover_processes(UNIT(m)); m->control_command_id = MOUNT_EXEC_MOUNT; @@ -1042,6 +1057,17 @@ fail: mount_enter_dead_or_mounted(m, MOUNT_SUCCESS); } +static void mount_cycle_clear(Mount *m) { + assert(m); + + /* Clear all state we shall forget for this new cycle */ + + m->result = MOUNT_SUCCESS; + m->reload_result = MOUNT_SUCCESS; + exec_command_reset_status_array(m->exec_command, _MOUNT_EXEC_COMMAND_MAX); + UNIT(m)->reset_accounting = true; +} + static int mount_start(Unit *u) { Mount *m = MOUNT(u); int r; @@ -1072,12 +1098,9 @@ static int mount_start(Unit *u) { if (r < 0) return r; - m->result = MOUNT_SUCCESS; - m->reload_result = MOUNT_SUCCESS; - - u->reset_accounting = true; - + mount_cycle_clear(m); mount_enter_mounting(m); + return 1; } @@ -1138,21 +1161,23 @@ static int mount_serialize(Unit *u, FILE *f, FDSet *fds) { assert(f); assert(fds); - unit_serialize_item(u, f, "state", mount_state_to_string(m->state)); - unit_serialize_item(u, f, "result", mount_result_to_string(m->result)); - unit_serialize_item(u, f, "reload-result", mount_result_to_string(m->reload_result)); + (void) serialize_item(f, "state", mount_state_to_string(m->state)); + (void) serialize_item(f, "result", mount_result_to_string(m->result)); + (void) serialize_item(f, "reload-result", mount_result_to_string(m->reload_result)); + (void) serialize_item_format(f, "n-retry-umount", "%u", m->n_retry_umount); if (m->control_pid > 0) - unit_serialize_item_format(u, f, "control-pid", PID_FMT, m->control_pid); + (void) serialize_item_format(f, "control-pid", PID_FMT, m->control_pid); if (m->control_command_id >= 0) - unit_serialize_item(u, f, "control-command", mount_exec_command_to_string(m->control_command_id)); + (void) serialize_item(f, "control-command", mount_exec_command_to_string(m->control_command_id)); return 0; } static int mount_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) { Mount *m = MOUNT(u); + int r; assert(u); assert(key); @@ -1166,6 +1191,7 @@ static int mount_deserialize_item(Unit *u, const char *key, const char *value, F log_unit_debug(u, "Failed to parse state value: %s", value); else m->deserialized_state = state; + } else if (streq(key, "result")) { MountResult f; @@ -1184,13 +1210,17 @@ static int mount_deserialize_item(Unit *u, const char *key, const char *value, F else if (f != MOUNT_SUCCESS) m->reload_result = f; + } else if (streq(key, "n-retry-umount")) { + + r = safe_atou(value, &m->n_retry_umount); + if (r < 0) + log_unit_debug(u, "Failed to parse n-retry-umount value: %s", value); + } else if (streq(key, "control-pid")) { - pid_t pid; - if (parse_pid(value, &pid) < 0) + if (parse_pid(value, &m->control_pid) < 0) log_unit_debug(u, "Failed to parse control-pid value: %s", value); - else - m->control_pid = pid; + } else if (streq(key, "control-command")) { MountExecCommand id; @@ -1265,8 +1295,11 @@ static void mount_sigchld_event(Unit *u, pid_t pid, int code, int status) { m->control_command_id = _MOUNT_EXEC_COMMAND_INVALID; } - log_unit_full(u, f == MOUNT_SUCCESS ? LOG_DEBUG : LOG_NOTICE, 0, - "Mount process exited, code=%s status=%i", sigchld_code_to_string(code), status); + unit_log_process_exit( + u, f == MOUNT_SUCCESS ? LOG_DEBUG : LOG_NOTICE, + "Mount process", + mount_exec_command_to_string(m->control_command_id), + code, status); /* Note that due to the io event priority logic, we can be sure the new mountinfo is loaded * before we process the SIGCHLD for the mount command. */ @@ -1395,59 +1428,77 @@ static int mount_dispatch_timer(sd_event_source *source, usec_t usec, void *user return 0; } -typedef struct { - bool is_mounted; - bool just_mounted; - bool just_changed; -} MountSetupFlags; +static int update_parameters_proc_self_mount_info( + Mount *m, + const char *what, + const char *options, + const char *fstype) { + + MountParameters *p; + int r, q, w; + + p = &m->parameters_proc_self_mountinfo; + + r = free_and_strdup(&p->what, what); + if (r < 0) + return r; + + q = free_and_strdup(&p->options, options); + if (q < 0) + return q; + + w = free_and_strdup(&p->fstype, fstype); + if (w < 0) + return w; + + return r > 0 || q > 0 || w > 0; +} static int mount_setup_new_unit( - Unit *u, + Manager *m, + const char *name, const char *what, const char *where, const char *options, const char *fstype, - MountSetupFlags *flags) { - - MountParameters *p; + MountProcFlags *ret_flags, + Unit **ret) { - assert(u); - assert(flags); + _cleanup_(unit_freep) Unit *u = NULL; + int r; - u->source_path = strdup("/proc/self/mountinfo"); - MOUNT(u)->where = strdup(where); - if (!u->source_path || !MOUNT(u)->where) - return -ENOMEM; + assert(m); + assert(name); + assert(ret_flags); + assert(ret); - /* Make sure to initialize those fields before mount_is_extrinsic(). */ - MOUNT(u)->from_proc_self_mountinfo = true; - p = &MOUNT(u)->parameters_proc_self_mountinfo; + r = unit_new_for_name(m, sizeof(Mount), name, &u); + if (r < 0) + return r; - p->what = strdup(what); - p->options = strdup(options); - p->fstype = strdup(fstype); - if (!p->what || !p->options || !p->fstype) - return -ENOMEM; + r = free_and_strdup(&u->source_path, "/proc/self/mountinfo"); + if (r < 0) + return r; - if (!mount_is_extrinsic(MOUNT(u))) { - const char *target; - int r; + r = free_and_strdup(&MOUNT(u)->where, where); + if (r < 0) + return r; - target = mount_is_network(p) ? SPECIAL_REMOTE_FS_TARGET : SPECIAL_LOCAL_FS_TARGET; - r = unit_add_dependency_by_name(u, UNIT_BEFORE, target, NULL, true, UNIT_DEPENDENCY_MOUNTINFO_IMPLICIT); - if (r < 0) - return r; + r = update_parameters_proc_self_mount_info(MOUNT(u), what, options, fstype); + if (r < 0) + return r; - r = unit_add_dependency_by_name(u, UNIT_CONFLICTS, SPECIAL_UMOUNT_TARGET, NULL, true, UNIT_DEPENDENCY_MOUNTINFO_IMPLICIT); - if (r < 0) - return r; - } + /* This unit was generated because /proc/self/mountinfo reported it. Remember this, so that by the time we load + * the unit file for it (and thus add in extra deps right after) we know what source to attributes the deps + * to.*/ + MOUNT(u)->from_proc_self_mountinfo = true; + /* We have only allocated the stub now, let's enqueue this unit for loading now, so that everything else is + * loaded in now. */ unit_add_to_load_queue(u); - flags->is_mounted = true; - flags->just_mounted = true; - flags->just_changed = true; + *ret_flags = MOUNT_PROC_IS_MOUNTED | MOUNT_PROC_JUST_MOUNTED | MOUNT_PROC_JUST_CHANGED; + *ret = TAKE_PTR(u); return 0; } @@ -1457,11 +1508,10 @@ static int mount_setup_existing_unit( const char *where, const char *options, const char *fstype, - MountSetupFlags *flags) { + MountProcFlags *ret_flags) { - MountParameters *p; - bool load_extras = false; - int r1, r2, r3; + MountProcFlags flags = MOUNT_PROC_IS_MOUNTED; + int r; assert(u); assert(flags); @@ -1472,49 +1522,38 @@ static int mount_setup_existing_unit( return -ENOMEM; } - /* Make sure to initialize those fields before mount_is_extrinsic(). */ - p = &MOUNT(u)->parameters_proc_self_mountinfo; - - r1 = free_and_strdup(&p->what, what); - r2 = free_and_strdup(&p->options, options); - r3 = free_and_strdup(&p->fstype, fstype); - if (r1 < 0 || r2 < 0 || r3 < 0) - return -ENOMEM; - - flags->just_changed = r1 > 0 || r2 > 0 || r3 > 0; - flags->is_mounted = true; - flags->just_mounted = !MOUNT(u)->from_proc_self_mountinfo || MOUNT(u)->just_mounted; - - MOUNT(u)->from_proc_self_mountinfo = true; + r = update_parameters_proc_self_mount_info(MOUNT(u), what, options, fstype); + if (r < 0) + return r; + if (r > 0) + flags |= MOUNT_PROC_JUST_CHANGED; - if (!mount_is_extrinsic(MOUNT(u)) && mount_is_network(p)) { - /* _netdev option may have shown up late, or on a - * remount. Add remote-fs dependencies, even though - * local-fs ones may already be there. - * - * Note: due to a current limitation (we don't track - * in the dependency "Set*" objects who created a - * dependency), we can only add deps, never lose them, - * until the next full daemon-reload. */ - unit_add_dependency_by_name(u, UNIT_BEFORE, SPECIAL_REMOTE_FS_TARGET, NULL, true, UNIT_DEPENDENCY_MOUNTINFO_IMPLICIT); - load_extras = true; + if (!MOUNT(u)->from_proc_self_mountinfo) { + flags |= MOUNT_PROC_JUST_MOUNTED; + MOUNT(u)->from_proc_self_mountinfo = true; } - if (u->load_state == UNIT_NOT_FOUND) { + if (IN_SET(u->load_state, UNIT_NOT_FOUND, UNIT_BAD_SETTING, UNIT_ERROR)) { + /* The unit was previously not found or otherwise not loaded. Now that the unit shows up in + * /proc/self/mountinfo we should reconsider it this, hence set it to UNIT_LOADED. */ u->load_state = UNIT_LOADED; u->load_error = 0; - /* Load in the extras later on, after we - * finished initialization of the unit */ - - /* FIXME: since we're going to load the unit later on, why setting load_extras=true ? */ - load_extras = true; - flags->just_changed = true; + flags |= MOUNT_PROC_JUST_CHANGED; } - if (load_extras) - return mount_add_extras(MOUNT(u)); + if (FLAGS_SET(flags, MOUNT_PROC_JUST_CHANGED)) { + /* If things changed, then make sure that all deps are regenerated. Let's + * first remove all automatic deps, and then add in the new ones. */ + + unit_remove_dependencies(u, UNIT_DEPENDENCY_MOUNTINFO_IMPLICIT); + r = mount_add_extras(MOUNT(u)); + if (r < 0) + return r; + } + + *ret_flags = flags; return 0; } @@ -1527,7 +1566,7 @@ static int mount_setup_unit( bool set_flags) { _cleanup_free_ char *e = NULL; - MountSetupFlags flags; + MountProcFlags flags; Unit *u; int r; @@ -1551,45 +1590,32 @@ static int mount_setup_unit( r = unit_name_from_path(where, ".mount", &e); if (r < 0) - return r; + return log_error_errno(r, "Failed to generate unit name from path '%s': %m", where); u = manager_get_unit(m, e); - if (!u) { - /* First time we see this mount point meaning that it's - * not been initiated by a mount unit but rather by the - * sysadmin having called mount(8) directly. */ - r = unit_new_for_name(m, sizeof(Mount), e, &u); - if (r < 0) - goto fail; - - r = mount_setup_new_unit(u, what, where, options, fstype, &flags); - if (r < 0) - unit_free(u); - } else + if (u) r = mount_setup_existing_unit(u, what, where, options, fstype, &flags); - + else + /* First time we see this mount point meaning that it's not been initiated by a mount unit but rather + * by the sysadmin having called mount(8) directly. */ + r = mount_setup_new_unit(m, e, what, where, options, fstype, &flags, &u); if (r < 0) - goto fail; + return log_warning_errno(r, "Failed to set up mount unit: %m"); - if (set_flags) { - MOUNT(u)->is_mounted = flags.is_mounted; - MOUNT(u)->just_mounted = flags.just_mounted; - MOUNT(u)->just_changed = flags.just_changed; - } - - if (flags.just_changed) + /* If the mount changed properties or state, let's notify our clients */ + if (flags & (MOUNT_PROC_JUST_CHANGED|MOUNT_PROC_JUST_MOUNTED)) unit_add_to_dbus_queue(u); + if (set_flags) + MOUNT(u)->proc_flags = flags; + return 0; -fail: - log_warning_errno(r, "Failed to set up mount unit: %m"); - return r; } static int mount_load_proc_self_mountinfo(Manager *m, bool set_flags) { _cleanup_(mnt_free_tablep) struct libmnt_table *t = NULL; _cleanup_(mnt_free_iterp) struct libmnt_iter *i = NULL; - int r = 0; + int r; assert(m); @@ -1602,7 +1628,6 @@ static int mount_load_proc_self_mountinfo(Manager *m, bool set_flags) { if (r < 0) return log_error_errno(r, "Failed to parse /proc/self/mountinfo: %m"); - r = 0; for (;;) { struct libmnt_fs *fs; const char *device, *path, *options, *fstype; @@ -1631,12 +1656,10 @@ static int mount_load_proc_self_mountinfo(Manager *m, bool set_flags) { device_found_node(m, d, DEVICE_FOUND_MOUNT, DEVICE_FOUND_MOUNT); - k = mount_setup_unit(m, d, p, options, fstype, set_flags); - if (r == 0 && k < 0) - r = k; + (void) mount_setup_unit(m, d, p, options, fstype, set_flags); } - return r; + return 0; } static void mount_shutdown(Manager *m) { @@ -1694,7 +1717,7 @@ static void mount_enumerate_perpetual(Manager *m) { static bool mount_is_mounted(Mount *m) { assert(m); - return UNIT(m)->perpetual || m->is_mounted; + return UNIT(m)->perpetual || FLAGS_SET(m->proc_flags, MOUNT_PROC_IS_MOUNTED); } static void mount_enumerate(Manager *m) { @@ -1758,7 +1781,7 @@ fail: } static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) { - _cleanup_set_free_ Set *around = NULL, *gone = NULL; + _cleanup_set_free_free_ Set *around = NULL, *gone = NULL; Manager *m = userdata; const char *what; Iterator i; @@ -1783,7 +1806,7 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, if (r == 0) rescan = true; else if (r < 0) - return log_error_errno(r, "Failed to drain libmount events"); + return log_error_errno(r, "Failed to drain libmount events: %m"); } while (r == 0); log_debug("libmount event [rescan: %s]", yes_no(rescan)); @@ -1794,11 +1817,8 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, r = mount_load_proc_self_mountinfo(m, true); if (r < 0) { /* Reset flags, just in case, for later calls */ - LIST_FOREACH(units_by_type, u, m->units_by_type[UNIT_MOUNT]) { - Mount *mount = MOUNT(u); - - mount->is_mounted = mount->just_mounted = mount->just_changed = false; - } + LIST_FOREACH(units_by_type, u, m->units_by_type[UNIT_MOUNT]) + MOUNT(u)->proc_flags = 0; return 0; } @@ -1819,7 +1839,7 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, /* Remember that this device might just have disappeared */ if (set_ensure_allocated(&gone, &path_hash_ops) < 0 || - set_put(gone, mount->parameters_proc_self_mountinfo.what) < 0) + set_put_strdup(gone, mount->parameters_proc_self_mountinfo.what) < 0) log_oom(); /* we don't care too much about OOM here... */ } @@ -1828,10 +1848,7 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, switch (mount->state) { case MOUNT_MOUNTED: - /* This has just been unmounted by - * somebody else, follow the state - * change. */ - mount->result = MOUNT_SUCCESS; /* make sure we forget any earlier umount failures */ + /* This has just been unmounted by somebody else, follow the state change. */ mount_enter_dead(mount, MOUNT_SUCCESS); break; @@ -1839,7 +1856,7 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, break; } - } else if (mount->just_mounted || mount->just_changed) { + } else if (mount->proc_flags & (MOUNT_PROC_JUST_MOUNTED|MOUNT_PROC_JUST_CHANGED)) { /* A mount point was added or changed */ @@ -1850,7 +1867,8 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, /* This has just been mounted by somebody else, follow the state change, but let's * generate a new invocation ID for this implicitly and automatically. */ - (void) unit_acquire_invocation_id(UNIT(mount)); + (void) unit_acquire_invocation_id(u); + mount_cycle_clear(mount); mount_enter_mounted(mount, MOUNT_SUCCESS); break; @@ -1872,14 +1890,15 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, if (mount_is_mounted(mount) && mount->from_proc_self_mountinfo && mount->parameters_proc_self_mountinfo.what) { + /* Track devices currently used */ if (set_ensure_allocated(&around, &path_hash_ops) < 0 || - set_put(around, mount->parameters_proc_self_mountinfo.what) < 0) + set_put_strdup(around, mount->parameters_proc_self_mountinfo.what) < 0) log_oom(); } /* Reset the flags for later calls */ - mount->is_mounted = mount->just_mounted = mount->just_changed = false; + mount->proc_flags = 0; } SET_FOREACH(what, gone, i) { diff --git a/src/core/mount.h b/src/core/mount.h index 67ab8ecf93..2e59f1fe04 100644 --- a/src/core/mount.h +++ b/src/core/mount.h @@ -34,6 +34,13 @@ typedef struct MountParameters { char *fstype; } MountParameters; +/* Used while looking for mount points that vanished or got added from/to /proc/self/mountinfo */ +typedef enum MountProcFlags { + MOUNT_PROC_IS_MOUNTED = 1 << 0, + MOUNT_PROC_JUST_MOUNTED = 1 << 1, + MOUNT_PROC_JUST_CHANGED = 1 << 2, +} MountProcFlags; + struct Mount { Unit meta; @@ -45,11 +52,7 @@ struct Mount { bool from_proc_self_mountinfo:1; bool from_fragment:1; - /* Used while looking for mount points that vanished or got - * added from/to /proc/self/mountinfo */ - bool is_mounted:1; - bool just_mounted:1; - bool just_changed:1; + MountProcFlags proc_flags; bool sloppy_options; diff --git a/src/core/namespace.c b/src/core/namespace.c index e4930db15c..c2ca3e0334 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -20,6 +20,7 @@ #include "missing.h" #include "mkdir.h" #include "mount-util.h" +#include "mountpoint-util.h" #include "namespace.h" #include "path-util.h" #include "selinux-util.h" @@ -236,7 +237,8 @@ static int append_access_mounts(MountEntry **p, char **strv, MountMode mode, boo } if (!path_is_absolute(e)) - return -EINVAL; + return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), + "Path is not absolute: %s", e); *((*p)++) = (MountEntry) { .path_const = e, @@ -263,7 +265,6 @@ static int append_empty_dir_mounts(MountEntry **p, char **strv) { .path_const = *i, .mode = EMPTY_DIR, .ignore = false, - .has_prefix = false, .read_only = true, .options_const = "mode=755", .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, @@ -302,35 +303,33 @@ static int append_tmpfs_mounts(MountEntry **p, const TemporaryFileSystem *tmpfs, for (i = 0; i < n; i++) { const TemporaryFileSystem *t = tmpfs + i; _cleanup_free_ char *o = NULL, *str = NULL; - unsigned long flags = MS_NODEV|MS_STRICTATIME; + unsigned long flags; bool ro = false; if (!path_is_absolute(t->path)) - return -EINVAL; + return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), + "Path is not absolute: %s", + t->path); - if (!isempty(t->options)) { - str = strjoin("mode=0755,", t->options); - if (!str) - return -ENOMEM; + str = strjoin("mode=0755,", t->options); + if (!str) + return -ENOMEM; - r = mount_option_mangle(str, MS_NODEV|MS_STRICTATIME, &flags, &o); - if (r < 0) - return r; + r = mount_option_mangle(str, MS_NODEV|MS_STRICTATIME, &flags, &o); + if (r < 0) + return log_debug_errno(r, "Failed to parse mount option '%s': %m", str); - ro = flags & MS_RDONLY; - if (ro) - flags ^= MS_RDONLY; - } + ro = flags & MS_RDONLY; + if (ro) + flags ^= MS_RDONLY; *((*p)++) = (MountEntry) { .path_const = t->path, .mode = TMPFS, .read_only = ro, - .options_malloc = o, + .options_malloc = TAKE_PTR(o), .flags = flags, }; - - o = NULL; } return 0; @@ -398,32 +397,22 @@ static int append_protect_system(MountEntry **p, ProtectSystem protect_system, b } } -static int mount_path_compare(const void *a, const void *b) { - const MountEntry *p = a, *q = b; +static int mount_path_compare(const MountEntry *a, const MountEntry *b) { int d; /* If the paths are not equal, then order prefixes first */ - d = path_compare(mount_entry_path(p), mount_entry_path(q)); + d = path_compare(mount_entry_path(a), mount_entry_path(b)); if (d != 0) return d; /* If the paths are equal, check the mode */ - if (p->mode < q->mode) - return -1; - if (p->mode > q->mode) - return 1; - - return 0; + return CMP((int) a->mode, (int) b->mode); } static int prefix_where_needed(MountEntry *m, size_t n, const char *root_directory) { size_t i; - /* Prefixes all paths in the bind mount table with the root directory if it is specified and the entry needs - * that. */ - - if (!root_directory) - return 0; + /* Prefixes all paths in the bind mount table with the root directory if the entry needs that. */ for (i = 0; i < n; i++) { char *s; @@ -566,36 +555,44 @@ static void drop_outside_root(const char *root_directory, MountEntry *m, size_t *n = t - m; } -static int clone_device_node(const char *d, const char *temporary_mount, bool *make_devnode) { - const char *dn; +static int clone_device_node( + const char *d, + const char *temporary_mount, + bool *make_devnode) { + + _cleanup_free_ char *sl = NULL; + const char *dn, *bn, *t; struct stat st; int r; if (stat(d, &st) < 0) { - if (errno == ENOENT) + if (errno == ENOENT) { + log_debug_errno(errno, "Device node '%s' to clone does not exist, ignoring.", d); return -ENXIO; - return -errno; + } + + return log_debug_errno(errno, "Failed to stat() device node '%s' to clone, ignoring: %m", d); } if (!S_ISBLK(st.st_mode) && !S_ISCHR(st.st_mode)) - return -EINVAL; - - if (st.st_rdev == 0) - return -ENXIO; + return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), + "Device node '%s' to clone is not a device node, ignoring.", + d); dn = strjoina(temporary_mount, d); + /* First, try to create device node properly */ if (*make_devnode) { mac_selinux_create_file_prepare(d, st.st_mode); r = mknod(dn, st.st_mode, st.st_rdev); mac_selinux_create_file_clear(); - - if (r == 0) - return 0; + if (r >= 0) + goto add_symlink; if (errno != EPERM) return log_debug_errno(errno, "mknod failed for %s: %m", d); + /* This didn't work, let's not try this again for the next iterations. */ *make_devnode = false; } @@ -604,9 +601,8 @@ static int clone_device_node(const char *d, const char *temporary_mount, bool *m mac_selinux_create_file_prepare(d, 0); r = mknod(dn, S_IFREG, 0); mac_selinux_create_file_clear(); - if (r < 0 && errno != EEXIST) - return log_debug_errno(errno, "mknod fallback failed for %s: %m", d); + return log_debug_errno(errno, "mknod() fallback failed for '%s': %m", d); /* Fallback to bind-mounting: * The assumption here is that all used device nodes carry standard @@ -614,7 +610,23 @@ static int clone_device_node(const char *d, const char *temporary_mount, bool *m * either be owned by root:root or root:tty (e.g. /dev/tty, /dev/ptmx) * and should not carry ACLs. */ if (mount(d, dn, NULL, MS_BIND, NULL) < 0) - return log_debug_errno(errno, "mount failed for %s: %m", d); + return log_debug_errno(errno, "Bind mounting failed for '%s': %m", d); + +add_symlink: + bn = path_startswith(d, "/dev/"); + if (!bn) + return 0; + + /* Create symlinks like /dev/char/1:9 → ../urandom */ + if (asprintf(&sl, "%s/dev/%s/%u:%u", temporary_mount, S_ISCHR(st.st_mode) ? "char" : "block", major(st.st_rdev), minor(st.st_rdev)) < 0) + return log_oom(); + + (void) mkdir_parents(sl, 0755); + + t = strjoina("../", bn); + + if (symlink(t, sl) < 0) + log_debug_errno(errno, "Failed to symlink '%s' to '%s', ignoring: %m", t, sl); return 0; } @@ -639,35 +651,34 @@ static int mount_private_dev(MountEntry *m) { u = umask(0000); if (!mkdtemp(temporary_mount)) - return -errno; + return log_debug_errno(errno, "Failed to create temporary directory '%s': %m", temporary_mount); dev = strjoina(temporary_mount, "/dev"); (void) mkdir(dev, 0755); if (mount("tmpfs", dev, "tmpfs", DEV_MOUNT_OPTIONS, "mode=755") < 0) { - r = -errno; + r = log_debug_errno(errno, "Failed to mount tmpfs on '%s': %m", dev); goto fail; } devpts = strjoina(temporary_mount, "/dev/pts"); (void) mkdir(devpts, 0755); if (mount("/dev/pts", devpts, NULL, MS_BIND, NULL) < 0) { - r = -errno; + r = log_debug_errno(errno, "Failed to bind mount /dev/pts on '%s': %m", devpts); goto fail; } - /* /dev/ptmx can either be a device node or a symlink to /dev/pts/ptmx - * when /dev/ptmx a device node, /dev/pts/ptmx has 000 permissions making it inaccessible - * thus, in that case make a clone - * - * in nspawn and other containers it will be a symlink, in that case make it a symlink - */ + /* /dev/ptmx can either be a device node or a symlink to /dev/pts/ptmx. + * When /dev/ptmx a device node, /dev/pts/ptmx has 000 permissions making it inaccessible. + * Thus, in that case make a clone. + * In nspawn and other containers it will be a symlink, in that case make it a symlink. */ r = is_symlink("/dev/ptmx"); - if (r < 0) + if (r < 0) { + log_debug_errno(r, "Failed to detect whether /dev/ptmx is a symlink or not: %m"); goto fail; - if (r > 0) { + } else if (r > 0) { devptmx = strjoina(temporary_mount, "/dev/ptmx"); if (symlink("pts/ptmx", devptmx) < 0) { - r = -errno; + r = log_debug_errno(errno, "Failed to create a symlink '%s' to pts/ptmx: %m", devptmx); goto fail; } } else { @@ -680,20 +691,23 @@ static int mount_private_dev(MountEntry *m) { (void) mkdir(devshm, 0755); r = mount("/dev/shm", devshm, NULL, MS_BIND, NULL); if (r < 0) { - r = -errno; + r = log_debug_errno(errno, "Failed to bind mount /dev/shm on '%s': %m", devshm); goto fail; } devmqueue = strjoina(temporary_mount, "/dev/mqueue"); (void) mkdir(devmqueue, 0755); - (void) mount("/dev/mqueue", devmqueue, NULL, MS_BIND, NULL); + if (mount("/dev/mqueue", devmqueue, NULL, MS_BIND, NULL) < 0) + log_debug_errno(errno, "Failed to bind mount /dev/mqueue on '%s', ignoring: %m", devmqueue); devhugepages = strjoina(temporary_mount, "/dev/hugepages"); (void) mkdir(devhugepages, 0755); - (void) mount("/dev/hugepages", devhugepages, NULL, MS_BIND, NULL); + if (mount("/dev/hugepages", devhugepages, NULL, MS_BIND, NULL) < 0) + log_debug_errno(errno, "Failed to bind mount /dev/hugepages on '%s', ignoring: %m", devhugepages); devlog = strjoina(temporary_mount, "/dev/log"); - (void) symlink("/run/systemd/journal/dev-log", devlog); + if (symlink("/run/systemd/journal/dev-log", devlog) < 0) + log_debug_errno(errno, "Failed to create a symlink '%s' to /run/systemd/journal/dev-log, ignoring: %m", devlog); NULSTR_FOREACH(d, devnodes) { r = clone_device_node(d, temporary_mount, &can_mknod); @@ -702,7 +716,9 @@ static int mount_private_dev(MountEntry *m) { goto fail; } - dev_setup(temporary_mount, UID_INVALID, GID_INVALID); + r = dev_setup(temporary_mount, UID_INVALID, GID_INVALID); + if (r < 0) + log_debug_errno(r, "Failed to setup basic device tree at '%s', ignoring: %m", temporary_mount); /* Create the /dev directory if missing. It is more likely to be * missing when the service is started with RootDirectory. This is @@ -711,9 +727,12 @@ static int mount_private_dev(MountEntry *m) { (void) mkdir_p_label(mount_entry_path(m), 0755); /* Unmount everything in old /dev */ - umount_recursive(mount_entry_path(m), 0); + r = umount_recursive(mount_entry_path(m), 0); + if (r < 0) + log_debug_errno(r, "Failed to unmount directories below '%s', ignoring: %m", mount_entry_path(m)); + if (mount(dev, mount_entry_path(m), NULL, MS_MOVE, NULL) < 0) { - r = -errno; + r = log_debug_errno(errno, "Failed to move mount point '%s' to '%s': %m", dev, mount_entry_path(m)); goto fail; } @@ -836,10 +855,10 @@ static int follow_symlink( if (r > 0) /* Reached the end, nothing more to resolve */ return 1; - if (m->n_followed >= CHASE_SYMLINKS_MAX) { /* put a boundary on things */ - log_debug("Symlink loop on '%s'.", mount_entry_path(m)); - return -ELOOP; - } + if (m->n_followed >= CHASE_SYMLINKS_MAX) /* put a boundary on things */ + return log_debug_errno(SYNTHETIC_ERRNO(ELOOP), + "Symlink loop on '%s'.", + mount_entry_path(m)); log_debug("Followed mount entry path symlink %s → %s.", mount_entry_path(m), target); @@ -881,10 +900,9 @@ static int apply_mount( } what = mode_to_inaccessible_node(target.st_mode); - if (!what) { - log_debug("File type not supported for inaccessible mounts. Note that symlinks are not allowed"); - return -ELOOP; - } + if (!what) + return log_debug_errno(SYNTHETIC_ERRNO(ELOOP), + "File type not supported for inaccessible mounts. Note that symlinks are not allowed"); break; } @@ -999,7 +1017,17 @@ static int apply_mount( return 0; } +/* Change the per-mount readonly flag on an existing mount */ +static int remount_bind_readonly(const char *path, unsigned long orig_flags) { + int r; + + r = mount(NULL, path, NULL, MS_REMOUNT | MS_BIND | MS_RDONLY | orig_flags, NULL); + + return r < 0 ? -errno : 0; +} + static int make_read_only(const MountEntry *m, char **blacklist, FILE *proc_self_mountinfo) { + bool submounts = false; int r = 0; assert(m); @@ -1007,15 +1035,15 @@ static int make_read_only(const MountEntry *m, char **blacklist, FILE *proc_self if (mount_entry_read_only(m)) { if (IN_SET(m->mode, EMPTY_DIR, TMPFS)) { - /* Make superblock readonly */ - if (mount(NULL, mount_entry_path(m), NULL, MS_REMOUNT | MS_RDONLY | m->flags, mount_entry_options(m)) < 0) - r = -errno; - } else + r = remount_bind_readonly(mount_entry_path(m), m->flags); + } else { + submounts = true; r = bind_remount_recursive_with_mountinfo(mount_entry_path(m), true, blacklist, proc_self_mountinfo); + } } else if (m->mode == PRIVATE_DEV) { - /* Superblock can be readonly but the submounts can't */ - if (mount(NULL, mount_entry_path(m), NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0) - r = -errno; + /* Set /dev readonly, but not submounts like /dev/shm. Also, we only set the per-mount read-only flag. + * We can't set it on the superblock, if we are inside a user namespace and running Linux <= 4.17. */ + r = remount_bind_readonly(mount_entry_path(m), DEV_MOUNT_OPTIONS); } else return 0; @@ -1026,27 +1054,28 @@ static int make_read_only(const MountEntry *m, char **blacklist, FILE *proc_self if (r == -ENOENT && m->ignore) r = 0; - return r; + if (r < 0) + return log_debug_errno(r, "Failed to re-mount '%s'%s read-only: %m", mount_entry_path(m), + submounts ? " and its submounts" : ""); + + return 0; } -static bool namespace_info_mount_apivfs(const char *root_directory, const NamespaceInfo *ns_info) { +static bool namespace_info_mount_apivfs(const NamespaceInfo *ns_info) { assert(ns_info); /* * ProtectControlGroups= and ProtectKernelTunables= imply MountAPIVFS=, * since to protect the API VFS mounts, they need to be around in the - * first place... and RootDirectory= or RootImage= need to be set. + * first place... */ - /* root_directory should point to a mount point */ - return root_directory && - (ns_info->mount_apivfs || - ns_info->protect_control_groups || - ns_info->protect_kernel_tunables); + return ns_info->mount_apivfs || + ns_info->protect_control_groups || + ns_info->protect_kernel_tunables; } static size_t namespace_calculate_mounts( - const char* root_directory, const NamespaceInfo *ns_info, char** read_write_paths, char** read_only_paths, @@ -1088,14 +1117,15 @@ static size_t namespace_calculate_mounts( (ns_info->protect_control_groups ? 1 : 0) + (ns_info->protect_kernel_modules ? ELEMENTSOF(protect_kernel_modules_table) : 0) + protect_home_cnt + protect_system_cnt + - (namespace_info_mount_apivfs(root_directory, ns_info) ? ELEMENTSOF(apivfs_table) : 0); + (namespace_info_mount_apivfs(ns_info) ? ELEMENTSOF(apivfs_table) : 0); } static void normalize_mounts(const char *root_directory, MountEntry *mounts, size_t *n_mounts) { + assert(root_directory); assert(n_mounts); assert(mounts || *n_mounts == 0); - qsort_safe(mounts, *n_mounts, sizeof(MountEntry), mount_path_compare); + typesafe_qsort(mounts, *n_mounts, mount_path_compare); drop_duplicates(mounts, n_mounts); drop_outside_root(root_directory, mounts, n_mounts); @@ -1127,11 +1157,9 @@ int setup_namespace( _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL; _cleanup_free_ void *root_hash = NULL; MountEntry *m, *mounts = NULL; - size_t root_hash_size = 0; - const char *root; - size_t n_mounts; - bool make_slave; + size_t n_mounts, root_hash_size = 0; bool require_prefix = false; + const char *root; int r = 0; assert(ns_info); @@ -1151,19 +1179,19 @@ int setup_namespace( dissect_image_flags & DISSECT_IMAGE_READ_ONLY ? O_RDONLY : O_RDWR, &loop_device); if (r < 0) - return r; + return log_debug_errno(r, "Failed to create loop device for root image: %m"); r = root_hash_load(root_image, &root_hash, &root_hash_size); if (r < 0) - return r; + return log_debug_errno(r, "Failed to load root hash: %m"); r = dissect_image(loop_device->fd, root_hash, root_hash_size, dissect_image_flags, &dissected_image); if (r < 0) - return r; + return log_debug_errno(r, "Failed to dissect image: %m"); r = dissected_image_decrypt(dissected_image, NULL, root_hash, root_hash_size, dissect_image_flags, &decrypted_image); if (r < 0) - return r; + return log_debug_errno(r, "Failed to decrypt dissected image: %m"); } if (root_directory) @@ -1181,7 +1209,6 @@ int setup_namespace( } n_mounts = namespace_calculate_mounts( - root, ns_info, read_write_paths, read_only_paths, @@ -1192,9 +1219,6 @@ int setup_namespace( tmp_dir, var_tmp_dir, protect_home, protect_system); - /* Set mount slave mode */ - make_slave = root || n_mounts > 0 || ns_info->private_mounts; - if (n_mounts > 0) { m = mounts = (MountEntry *) alloca0(n_mounts * sizeof(MountEntry)); r = append_access_mounts(&m, read_write_paths, READWRITE, require_prefix); @@ -1271,7 +1295,7 @@ int setup_namespace( if (r < 0) goto finish; - if (namespace_info_mount_apivfs(root, ns_info)) { + if (namespace_info_mount_apivfs(ns_info)) { r = append_static_mounts(&m, apivfs_table, ELEMENTSOF(apivfs_table), ns_info->ignore_protect_paths); if (r < 0) goto finish; @@ -1284,33 +1308,44 @@ int setup_namespace( if (r < 0) goto finish; - normalize_mounts(root_directory, mounts, &n_mounts); + normalize_mounts(root, mounts, &n_mounts); } + /* All above is just preparation, figuring out what to do. Let's now actually start doing something. */ + if (unshare(CLONE_NEWNS) < 0) { - r = -errno; + r = log_debug_errno(errno, "Failed to unshare the mount namespace: %m"); + if (IN_SET(r, -EACCES, -EPERM, -EOPNOTSUPP, -ENOSYS)) + /* If the kernel doesn't support namespaces, or when there's a MAC or seccomp filter in place + * that doesn't allow us to create namespaces (or a missing cap), then propagate a recognizable + * error back, which the caller can use to detect this case (and only this) and optionally + * continue without namespacing applied. */ + r = -ENOANO; + goto finish; } - if (make_slave) { - /* Remount / as SLAVE so that nothing now mounted in the namespace - shows up in the parent */ - if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) { - r = -errno; - goto finish; - } + /* Remount / as SLAVE so that nothing now mounted in the namespace + * shows up in the parent */ + if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) { + r = log_debug_errno(errno, "Failed to remount '/' as SLAVE: %m"); + goto finish; } if (root_image) { /* A root image is specified, mount it to the right place */ r = dissected_image_mount(dissected_image, root, UID_INVALID, dissect_image_flags); - if (r < 0) + if (r < 0) { + log_debug_errno(r, "Failed to mount root image: %m"); goto finish; + } if (decrypted_image) { r = decrypted_image_relinquish(decrypted_image); - if (r < 0) + if (r < 0) { + log_debug_errno(r, "Failed to relinquish decrypted image: %m"); goto finish; + } } loop_device_relinquish(loop_device); @@ -1319,20 +1354,22 @@ int setup_namespace( /* A root directory is specified. Turn its directory into bind mount, if it isn't one yet. */ r = path_is_mount_point(root, NULL, AT_SYMLINK_FOLLOW); - if (r < 0) + if (r < 0) { + log_debug_errno(r, "Failed to detect that %s is a mount point or not: %m", root); goto finish; + } if (r == 0) { if (mount(root, root, NULL, MS_BIND|MS_REC, NULL) < 0) { - r = -errno; + r = log_debug_errno(errno, "Failed to bind mount '%s': %m", root); goto finish; } } - } else if (root) { + } else { /* Let's mount the main root directory to the root directory to use */ if (mount("/", root, NULL, MS_BIND|MS_REC, NULL) < 0) { - r = -errno; + r = log_debug_errno(errno, "Failed to bind mount '/' on '%s': %m", root); goto finish; } } @@ -1350,7 +1387,7 @@ int setup_namespace( * For example, this is the case with the option: 'InaccessiblePaths=/proc' */ proc_self_mountinfo = fopen("/proc/self/mountinfo", "re"); if (!proc_self_mountinfo) { - r = -errno; + r = log_debug_errno(errno, "Failed to open /proc/self/mountinfo: %m"); goto finish; } @@ -1385,7 +1422,7 @@ int setup_namespace( if (!again) break; - normalize_mounts(root_directory, mounts, &n_mounts); + normalize_mounts(root, mounts, &n_mounts); } /* Create a blacklist we can pass to bind_mount_recursive() */ @@ -1402,18 +1439,18 @@ int setup_namespace( } } - if (root) { - /* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */ - r = mount_move_root(root); - if (r < 0) - goto finish; + /* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */ + r = mount_move_root(root); + if (r < 0) { + log_debug_errno(r, "Failed to mount root with MS_MOVE: %m"); + goto finish; } /* Remount / as the desired mode. Note that this will not * reestablish propagation from our side to the host, since * what's disconnected is disconnected. */ if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) { - r = -errno; + r = log_debug_errno(errno, "Failed to remount '/' with desired mount flags: %m"); goto finish; } diff --git a/src/core/path.c b/src/core/path.c index 68b13b610a..831e49df29 100644 --- a/src/core/path.c +++ b/src/core/path.c @@ -8,12 +8,14 @@ #include "bus-error.h" #include "bus-util.h" #include "dbus-path.h" +#include "dbus-unit.h" #include "fd-util.h" #include "fs-util.h" #include "glob-util.h" #include "macro.h" #include "mkdir.h" #include "path.h" +#include "serialize.h" #include "special.h" #include "stat-util.h" #include "string-table.h" @@ -145,10 +147,9 @@ int path_spec_fd_event(PathSpec *s, uint32_t revents) { ssize_t l; int r = 0; - if (revents != EPOLLIN) { - log_error("Got invalid poll event on inotify."); - return -EINVAL; - } + if (revents != EPOLLIN) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Got invalid poll event on inotify."); l = read(s->inotify_fd, &buffer, sizeof(buffer)); if (l < 0) { @@ -298,17 +299,17 @@ static int path_add_default_dependencies(Path *p) { if (!UNIT(p)->default_dependencies) return 0; - r = unit_add_dependency_by_name(UNIT(p), UNIT_BEFORE, SPECIAL_PATHS_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT); + r = unit_add_dependency_by_name(UNIT(p), UNIT_BEFORE, SPECIAL_PATHS_TARGET, true, UNIT_DEPENDENCY_DEFAULT); if (r < 0) return r; if (MANAGER_IS_SYSTEM(UNIT(p)->manager)) { - r = unit_add_two_dependencies_by_name(UNIT(p), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT); + r = unit_add_two_dependencies_by_name(UNIT(p), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, true, UNIT_DEPENDENCY_DEFAULT); if (r < 0) return r; } - return unit_add_two_dependencies_by_name(UNIT(p), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT); + return unit_add_two_dependencies_by_name(UNIT(p), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT); } static int path_add_trigger_dependencies(Path *p) { @@ -410,6 +411,9 @@ static void path_set_state(Path *p, PathState state) { PathState old_state; assert(p); + if (p->state != state) + bus_unit_send_pending_change_signal(UNIT(p), false); + old_state = p->state; p->state = state; @@ -448,9 +452,7 @@ static void path_enter_dead(Path *p, PathResult f) { if (p->result == PATH_SUCCESS) p->result = f; - if (p->result != PATH_SUCCESS) - log_unit_warning(UNIT(p), "Failed with result '%s'.", path_result_to_string(p->result)); - + unit_log_result(UNIT(p), p->result == PATH_SUCCESS, path_result_to_string(p->result)); path_set_state(p, p->result != PATH_SUCCESS ? PATH_FAILED : PATH_DEAD); } @@ -600,8 +602,8 @@ static int path_serialize(Unit *u, FILE *f, FDSet *fds) { assert(f); assert(fds); - unit_serialize_item(u, f, "state", path_state_to_string(p->state)); - unit_serialize_item(u, f, "result", path_result_to_string(p->result)); + (void) serialize_item(f, "state", path_state_to_string(p->state)); + (void) serialize_item(f, "result", path_result_to_string(p->result)); return 0; } diff --git a/src/core/scope.c b/src/core/scope.c index 751556fecf..e478661f94 100644 --- a/src/core/scope.c +++ b/src/core/scope.c @@ -5,9 +5,11 @@ #include "alloc-util.h" #include "dbus-scope.h" +#include "dbus-unit.h" #include "load-dropin.h" #include "log.h" #include "scope.h" +#include "serialize.h" #include "special.h" #include "string-table.h" #include "string-util.h" @@ -81,6 +83,9 @@ static void scope_set_state(Scope *s, ScopeState state) { ScopeState old_state; assert(s); + if (s->state != state) + bus_unit_send_pending_change_signal(UNIT(s), false); + old_state = s->state; s->state = state; @@ -110,7 +115,7 @@ static int scope_add_default_dependencies(Scope *s) { r = unit_add_two_dependencies_by_name( UNIT(s), UNIT_BEFORE, UNIT_CONFLICTS, - SPECIAL_SHUTDOWN_TARGET, NULL, true, + SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT); if (r < 0) return r; @@ -239,9 +244,7 @@ static void scope_enter_dead(Scope *s, ScopeResult f) { if (s->result == SCOPE_SUCCESS) s->result = f; - if (s->result != SCOPE_SUCCESS) - log_unit_warning(UNIT(s), "Failed with result '%s'.", scope_result_to_string(s->result)); - + unit_log_result(UNIT(s), s->result == SCOPE_SUCCESS, scope_result_to_string(s->result)); scope_set_state(s, s->result != SCOPE_SUCCESS ? SCOPE_FAILED : SCOPE_DEAD); } @@ -402,11 +405,11 @@ static int scope_serialize(Unit *u, FILE *f, FDSet *fds) { assert(f); assert(fds); - unit_serialize_item(u, f, "state", scope_state_to_string(s->state)); - unit_serialize_item(u, f, "was-abandoned", yes_no(s->was_abandoned)); + (void) serialize_item(f, "state", scope_state_to_string(s->state)); + (void) serialize_bool(f, "was-abandoned", s->was_abandoned); if (s->controller) - unit_serialize_item(u, f, "controller", s->controller); + (void) serialize_item(f, "controller", s->controller); return 0; } @@ -441,7 +444,7 @@ static int scope_deserialize_item(Unit *u, const char *key, const char *value, F r = free_and_strdup(&s->controller, value); if (r < 0) - log_oom(); + return log_oom(); } else log_unit_debug(u, "Unknown serialization key: %s", key); diff --git a/src/core/selinux-access.c b/src/core/selinux-access.c index 39e994afd7..0c6d885b8c 100644 --- a/src/core/selinux-access.c +++ b/src/core/selinux-access.c @@ -1,7 +1,4 @@ /* SPDX-License-Identifier: LGPL-2.1+ */ -/*** - Copyright © 2012 Dan Walsh -***/ #include "selinux-access.h" diff --git a/src/core/selinux-access.h b/src/core/selinux-access.h index 59f2e60c77..1e75930f57 100644 --- a/src/core/selinux-access.h +++ b/src/core/selinux-access.h @@ -1,10 +1,6 @@ /* SPDX-License-Identifier: LGPL-2.1+ */ #pragma once -/*** - Copyright © 2012 Dan Walsh -***/ - #include "sd-bus.h" #include "bus-util.h" diff --git a/src/core/service.c b/src/core/service.c index db1356c417..cfa3271232 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -12,6 +12,7 @@ #include "bus-kernel.h" #include "bus-util.h" #include "dbus-service.h" +#include "dbus-unit.h" #include "def.h" #include "env-util.h" #include "escape.h" @@ -27,6 +28,7 @@ #include "parse-util.h" #include "path-util.h" #include "process-util.h" +#include "serialize.h" #include "service.h" #include "signal-util.h" #include "special.h" @@ -48,7 +50,7 @@ static const UnitActiveState state_translation_table[_SERVICE_STATE_MAX] = { [SERVICE_EXITED] = UNIT_ACTIVE, [SERVICE_RELOAD] = UNIT_RELOADING, [SERVICE_STOP] = UNIT_DEACTIVATING, - [SERVICE_STOP_SIGABRT] = UNIT_DEACTIVATING, + [SERVICE_STOP_WATCHDOG] = UNIT_DEACTIVATING, [SERVICE_STOP_SIGTERM] = UNIT_DEACTIVATING, [SERVICE_STOP_SIGKILL] = UNIT_DEACTIVATING, [SERVICE_STOP_POST] = UNIT_DEACTIVATING, @@ -69,7 +71,7 @@ static const UnitActiveState state_translation_table_idle[_SERVICE_STATE_MAX] = [SERVICE_EXITED] = UNIT_ACTIVE, [SERVICE_RELOAD] = UNIT_RELOADING, [SERVICE_STOP] = UNIT_DEACTIVATING, - [SERVICE_STOP_SIGABRT] = UNIT_DEACTIVATING, + [SERVICE_STOP_WATCHDOG] = UNIT_DEACTIVATING, [SERVICE_STOP_SIGTERM] = UNIT_DEACTIVATING, [SERVICE_STOP_SIGKILL] = UNIT_DEACTIVATING, [SERVICE_STOP_POST] = UNIT_DEACTIVATING, @@ -79,9 +81,10 @@ static const UnitActiveState state_translation_table_idle[_SERVICE_STATE_MAX] = [SERVICE_AUTO_RESTART] = UNIT_ACTIVATING }; -static int service_dispatch_io(sd_event_source *source, int fd, uint32_t events, void *userdata); +static int service_dispatch_inotify_io(sd_event_source *source, int fd, uint32_t events, void *userdata); static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata); static int service_dispatch_watchdog(sd_event_source *source, usec_t usec, void *userdata); +static int service_dispatch_exec_io(sd_event_source *source, int fd, uint32_t events, void *userdata); static void service_enter_signal(Service *s, ServiceState state, ServiceResult f); static void service_enter_reload_by_notify(Service *s); @@ -105,6 +108,8 @@ static void service_init(Unit *u) { s->exec_context.keyring_mode = MANAGER_IS_SYSTEM(u->manager) ? EXEC_KEYRING_PRIVATE : EXEC_KEYRING_INHERIT; + + s->watchdog_original_usec = USEC_INFINITY; } static void service_unwatch_control_pid(Service *s) { @@ -193,19 +198,21 @@ static usec_t service_get_watchdog_usec(Service *s) { if (s->watchdog_override_enable) return s->watchdog_override_usec; - else - return s->watchdog_usec; + + return s->watchdog_original_usec; } static void service_start_watchdog(Service *s) { - int r; usec_t watchdog_usec; + int r; assert(s); watchdog_usec = service_get_watchdog_usec(s); - if (IN_SET(watchdog_usec, 0, USEC_INFINITY)) + if (IN_SET(watchdog_usec, 0, USEC_INFINITY)) { + service_stop_watchdog(s); return; + } if (s->watchdog_event_source) { r = sd_event_source_set_time(s->watchdog_event_source, usec_add(s->watchdog_timestamp.monotonic, watchdog_usec)); @@ -233,50 +240,55 @@ static void service_start_watchdog(Service *s) { * of living before we consider a service died. */ r = sd_event_source_set_priority(s->watchdog_event_source, SD_EVENT_PRIORITY_IDLE); } - if (r < 0) log_unit_warning_errno(UNIT(s), r, "Failed to install watchdog timer: %m"); } -static void service_extend_timeout(Service *s, usec_t extend_timeout_usec) { - assert(s); +static void service_extend_event_source_timeout(Service *s, sd_event_source *source, usec_t extended) { + usec_t current; + int r; - if (s->timer_event_source) { - uint64_t current = 0, extended = 0; - int r; + assert(s); - if (IN_SET(extend_timeout_usec, 0, USEC_INFINITY)) - return; + /* Extends the specified event source timer to at least the specified time, unless it is already later + * anyway. */ - extended = usec_add(now(CLOCK_MONOTONIC), extend_timeout_usec); + if (!source) + return; - r = sd_event_source_get_time(s->timer_event_source, ¤t); - if (r < 0) - log_unit_error_errno(UNIT(s), r, "Failed to retrieve timeout timer: %m"); - else if (extended > current) { - r = sd_event_source_set_time(s->timer_event_source, extended); - if (r < 0) - log_unit_warning_errno(UNIT(s), r, "Failed to set timeout timer: %m"); - } + r = sd_event_source_get_time(source, ¤t); + if (r < 0) { + const char *desc; + (void) sd_event_source_get_description(s->timer_event_source, &desc); + log_unit_warning_errno(UNIT(s), r, "Failed to retrieve timeout time for event source '%s', ignoring: %m", strna(desc)); + return; + } - if (s->watchdog_event_source) { - /* extend watchdog if necessary. We've asked for an extended timeout so we - * shouldn't expect a watchdog timeout in the interval in between */ - r = sd_event_source_get_time(s->watchdog_event_source, ¤t); - if (r < 0) { - log_unit_error_errno(UNIT(s), r, "Failed to retrieve watchdog timer: %m"); - return; - } + if (current >= extended) /* Current timeout is already longer, ignore this. */ + return; - if (extended > current) { - r = sd_event_source_set_time(s->watchdog_event_source, extended); - if (r < 0) - log_unit_warning_errno(UNIT(s), r, "Failed to set watchdog timer: %m"); - } - } + r = sd_event_source_set_time(source, extended); + if (r < 0) { + const char *desc; + (void) sd_event_source_get_description(s->timer_event_source, &desc); + log_unit_warning_errno(UNIT(s), r, "Failed to set timeout time for even source '%s', ignoring %m", strna(desc)); } } +static void service_extend_timeout(Service *s, usec_t extend_timeout_usec) { + usec_t extended; + + assert(s); + + if (IN_SET(extend_timeout_usec, 0, USEC_INFINITY)) + return; + + extended = usec_add(now(CLOCK_MONOTONIC), extend_timeout_usec); + + service_extend_event_source_timeout(s, s->timer_event_source, extended); + service_extend_event_source_timeout(s, s->watchdog_event_source, extended); +} + static void service_reset_watchdog(Service *s) { assert(s); @@ -284,7 +296,7 @@ static void service_reset_watchdog(Service *s) { service_start_watchdog(s); } -static void service_reset_watchdog_timeout(Service *s, usec_t watchdog_override_usec) { +static void service_override_watchdog_timeout(Service *s, usec_t watchdog_override_usec) { assert(s); s->watchdog_override_enable = true; @@ -389,6 +401,7 @@ static void service_done(Unit *u) { service_stop_watchdog(s); s->timer_event_source = sd_event_source_unref(s->timer_event_source); + s->exec_fd_event_source = sd_event_source_unref(s->exec_fd_event_source); service_release_resources(u); } @@ -536,8 +549,13 @@ static int service_verify(Service *s) { if (UNIT(s)->load_state != UNIT_LOADED) return 0; - if (!s->exec_command[SERVICE_EXEC_START] && !s->exec_command[SERVICE_EXEC_STOP]) { - log_unit_error(UNIT(s), "Service lacks both ExecStart= and ExecStop= setting. Refusing."); + if (!s->exec_command[SERVICE_EXEC_START] && !s->exec_command[SERVICE_EXEC_STOP] + && UNIT(s)->success_action == EMERGENCY_ACTION_NONE) { + /* FailureAction= only makes sense if one of the start or stop commands is specified. + * SuccessAction= will be executed unconditionally if no commands are specified. Hence, + * either a command or SuccessAction= are required. */ + + log_unit_error(UNIT(s), "Service has no ExecStart=, ExecStop=, or SuccessAction=. Refusing."); return -ENOEXEC; } @@ -546,8 +564,8 @@ static int service_verify(Service *s) { return -ENOEXEC; } - if (!s->remain_after_exit && !s->exec_command[SERVICE_EXEC_START]) { - log_unit_error(UNIT(s), "Service has no ExecStart= setting, which is only allowed for RemainAfterExit=yes services. Refusing."); + if (!s->remain_after_exit && !s->exec_command[SERVICE_EXEC_START] && UNIT(s)->success_action == EMERGENCY_ACTION_NONE) { + log_unit_error(UNIT(s), "Service has no ExecStart= and no SuccessAction= settings and does not have RemainAfterExit=yes set. Refusing."); return -ENOEXEC; } @@ -607,7 +625,7 @@ static int service_add_default_dependencies(Service *s) { * require it, so that we fail if we can't acquire * it. */ - r = unit_add_two_dependencies_by_name(UNIT(s), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT); + r = unit_add_two_dependencies_by_name(UNIT(s), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, true, UNIT_DEPENDENCY_DEFAULT); if (r < 0) return r; } else { @@ -615,7 +633,7 @@ static int service_add_default_dependencies(Service *s) { /* In the --user instance there's no sysinit.target, * in that case require basic.target instead. */ - r = unit_add_dependency_by_name(UNIT(s), UNIT_REQUIRES, SPECIAL_BASIC_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT); + r = unit_add_dependency_by_name(UNIT(s), UNIT_REQUIRES, SPECIAL_BASIC_TARGET, true, UNIT_DEPENDENCY_DEFAULT); if (r < 0) return r; } @@ -623,12 +641,12 @@ static int service_add_default_dependencies(Service *s) { /* Second, if the rest of the base system is in the same * transaction, order us after it, but do not pull it in or * even require it. */ - r = unit_add_dependency_by_name(UNIT(s), UNIT_AFTER, SPECIAL_BASIC_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT); + r = unit_add_dependency_by_name(UNIT(s), UNIT_AFTER, SPECIAL_BASIC_TARGET, true, UNIT_DEPENDENCY_DEFAULT); if (r < 0) return r; /* Third, add us in for normal shutdown. */ - return unit_add_two_dependencies_by_name(UNIT(s), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT); + return unit_add_two_dependencies_by_name(UNIT(s), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT); } static void service_fix_output(Service *s) { @@ -659,12 +677,12 @@ static int service_setup_bus_name(Service *s) { if (!s->bus_name) return 0; - r = unit_add_dependency_by_name(UNIT(s), UNIT_REQUIRES, SPECIAL_DBUS_SOCKET, NULL, true, UNIT_DEPENDENCY_FILE); + r = unit_add_dependency_by_name(UNIT(s), UNIT_REQUIRES, SPECIAL_DBUS_SOCKET, true, UNIT_DEPENDENCY_FILE); if (r < 0) return log_unit_error_errno(UNIT(s), r, "Failed to add dependency on " SPECIAL_DBUS_SOCKET ": %m"); /* We always want to be ordered against dbus.socket if both are in the transaction. */ - r = unit_add_dependency_by_name(UNIT(s), UNIT_AFTER, SPECIAL_DBUS_SOCKET, NULL, true, UNIT_DEPENDENCY_FILE); + r = unit_add_dependency_by_name(UNIT(s), UNIT_AFTER, SPECIAL_DBUS_SOCKET, true, UNIT_DEPENDENCY_FILE); if (r < 0) return log_unit_error_errno(UNIT(s), r, "Failed to add dependency on " SPECIAL_DBUS_SOCKET ": %m"); @@ -917,8 +935,8 @@ static int service_load_pid_file(Service *s, bool may_warn) { prio = may_warn ? LOG_INFO : LOG_DEBUG; fd = chase_symlinks(s->pid_file, NULL, CHASE_OPEN|CHASE_SAFE, NULL); - if (fd == -EPERM) { - log_unit_full(UNIT(s), LOG_DEBUG, fd, "Permission denied while opening PID file or potentially unsafe symlink chain, will now retry with relaxed checks: %s", s->pid_file); + if (fd == -ENOLINK) { + log_unit_full(UNIT(s), LOG_DEBUG, fd, "Potentially unsafe symlink chain, will now retry with relaxed checks: %s", s->pid_file); questionable_pid_file = true; @@ -1018,6 +1036,9 @@ static void service_set_state(Service *s, ServiceState state) { assert(s); + if (s->state != state) + bus_unit_send_pending_change_signal(UNIT(s), false); + table = s->type == SERVICE_IDLE ? state_translation_table_idle : state_translation_table; old_state = s->state; @@ -1029,7 +1050,7 @@ static void service_set_state(Service *s, ServiceState state) { SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD, - SERVICE_STOP, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, + SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL, SERVICE_AUTO_RESTART)) s->timer_event_source = sd_event_source_unref(s->timer_event_source); @@ -1037,7 +1058,7 @@ static void service_set_state(Service *s, ServiceState state) { if (!IN_SET(state, SERVICE_START, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD, - SERVICE_STOP, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, + SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) { service_unwatch_main_pid(s); s->main_command = NULL; @@ -1046,7 +1067,7 @@ static void service_set_state(Service *s, ServiceState state) { if (!IN_SET(state, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST, SERVICE_RELOAD, - SERVICE_STOP, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, + SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) { service_unwatch_control_pid(s); s->control_command = NULL; @@ -1061,11 +1082,14 @@ static void service_set_state(Service *s, ServiceState state) { if (!IN_SET(state, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD, - SERVICE_STOP, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, + SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL) && !(state == SERVICE_DEAD && UNIT(s)->job)) service_close_socket_fd(s); + if (state != SERVICE_START) + s->exec_fd_event_source = sd_event_source_unref(s->exec_fd_event_source); + if (!IN_SET(state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD)) service_stop_watchdog(s); @@ -1097,7 +1121,7 @@ static usec_t service_coldplug_timeout(Service *s) { return usec_add(UNIT(s)->active_enter_timestamp.monotonic, s->runtime_max_usec); case SERVICE_STOP: - case SERVICE_STOP_SIGABRT: + case SERVICE_STOP_WATCHDOG: case SERVICE_STOP_SIGTERM: case SERVICE_STOP_SIGKILL: case SERVICE_STOP_POST: @@ -1132,7 +1156,7 @@ static int service_coldplug(Unit *u) { (IN_SET(s->deserialized_state, SERVICE_START, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD, - SERVICE_STOP, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, + SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL))) { r = unit_watch_pid(UNIT(s), s->main_pid); if (r < 0) @@ -1144,7 +1168,7 @@ static int service_coldplug(Unit *u) { IN_SET(s->deserialized_state, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST, SERVICE_RELOAD, - SERVICE_STOP, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, + SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) { r = unit_watch_pid(UNIT(s), s->control_pid); if (r < 0) @@ -1178,21 +1202,23 @@ static int service_coldplug(Unit *u) { return 0; } -static int service_collect_fds(Service *s, - int **fds, - char ***fd_names, - unsigned *n_storage_fds, - unsigned *n_socket_fds) { +static int service_collect_fds( + Service *s, + int **fds, + char ***fd_names, + size_t *n_socket_fds, + size_t *n_storage_fds) { _cleanup_strv_free_ char **rfd_names = NULL; _cleanup_free_ int *rfds = NULL; - unsigned rn_socket_fds = 0, rn_storage_fds = 0; + size_t rn_socket_fds = 0, rn_storage_fds = 0; int r; assert(s); assert(fds); assert(fd_names); assert(n_socket_fds); + assert(n_storage_fds); if (s->socket_fd >= 0) { @@ -1203,7 +1229,7 @@ static int service_collect_fds(Service *s, return -ENOMEM; rfds[0] = s->socket_fd; - rfd_names = strv_new("connection", NULL); + rfd_names = strv_new("connection"); if (!rfd_names) return -ENOMEM; @@ -1256,7 +1282,7 @@ static int service_collect_fds(Service *s, if (s->n_fd_store > 0) { ServiceFDStore *fs; - unsigned n_fds; + size_t n_fds; char **nl; int *t; @@ -1294,6 +1320,63 @@ static int service_collect_fds(Service *s, return 0; } +static int service_allocate_exec_fd_event_source( + Service *s, + int fd, + sd_event_source **ret_event_source) { + + _cleanup_(sd_event_source_unrefp) sd_event_source *source = NULL; + int r; + + assert(s); + assert(fd >= 0); + assert(ret_event_source); + + r = sd_event_add_io(UNIT(s)->manager->event, &source, fd, 0, service_dispatch_exec_io, s); + if (r < 0) + return log_unit_error_errno(UNIT(s), r, "Failed to allocate exec_fd event source: %m"); + + /* This is a bit lower priority than SIGCHLD, as that carries a lot more interesting failure information */ + + r = sd_event_source_set_priority(source, SD_EVENT_PRIORITY_NORMAL-3); + if (r < 0) + return log_unit_error_errno(UNIT(s), r, "Failed to adjust priority of exec_fd event source: %m"); + + (void) sd_event_source_set_description(source, "service event_fd"); + + r = sd_event_source_set_io_fd_own(source, true); + if (r < 0) + return log_unit_error_errno(UNIT(s), r, "Failed to pass ownership of fd to event source: %m"); + + *ret_event_source = TAKE_PTR(source); + return 0; +} + +static int service_allocate_exec_fd( + Service *s, + sd_event_source **ret_event_source, + int* ret_exec_fd) { + + _cleanup_close_pair_ int p[2] = { -1, -1 }; + int r; + + assert(s); + assert(ret_event_source); + assert(ret_exec_fd); + + if (pipe2(p, O_CLOEXEC|O_NONBLOCK) < 0) + return log_unit_error_errno(UNIT(s), errno, "Failed to allocate exec_fd pipe: %m"); + + r = service_allocate_exec_fd_event_source(s, p[0], ret_event_source); + if (r < 0) + return r; + + p[0] = -1; + *ret_exec_fd = TAKE_FD(p[1]); + + return 0; +} + static bool service_exec_needs_notify_socket(Service *s, ExecFlags flags) { assert(s); @@ -1320,14 +1403,17 @@ static int service_spawn( ExecFlags flags, pid_t *_pid) { - ExecParameters exec_params = { + _cleanup_(exec_params_clear) ExecParameters exec_params = { .flags = flags, .stdin_fd = -1, .stdout_fd = -1, .stderr_fd = -1, + .exec_fd = -1, }; _cleanup_strv_free_ char **final_env = NULL, **our_env = NULL, **fd_names = NULL; - unsigned n_storage_fds = 0, n_socket_fds = 0, n_env = 0; + _cleanup_(sd_event_source_unrefp) sd_event_source *exec_fd_source = NULL; + size_t n_socket_fds = 0, n_storage_fds = 0, n_env = 0; + _cleanup_close_ int exec_fd = -1; _cleanup_free_ int *fds = NULL; pid_t pid; int r; @@ -1336,7 +1422,7 @@ static int service_spawn( assert(c); assert(_pid); - r = unit_prepare_exec(UNIT(s)); + r = unit_prepare_exec(UNIT(s)); /* This realizes the cgroup, among other things */ if (r < 0) return r; @@ -1353,11 +1439,19 @@ static int service_spawn( s->exec_context.std_output == EXEC_OUTPUT_SOCKET || s->exec_context.std_error == EXEC_OUTPUT_SOCKET) { - r = service_collect_fds(s, &fds, &fd_names, &n_storage_fds, &n_socket_fds); + r = service_collect_fds(s, &fds, &fd_names, &n_socket_fds, &n_storage_fds); if (r < 0) return r; - log_unit_debug(UNIT(s), "Passing %i fds to service", n_storage_fds + n_socket_fds); + log_unit_debug(UNIT(s), "Passing %zu fds to service", n_socket_fds + n_storage_fds); + } + + if (!FLAGS_SET(flags, EXEC_IS_CONTROL) && s->type == SERVICE_EXEC) { + assert(!s->exec_fd_event_source); + + r = service_allocate_exec_fd(s, &exec_fd_source, &exec_fd); + if (r < 0) + return r; } r = service_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), timeout)); @@ -1433,32 +1527,31 @@ static int service_spawn( } } - unit_set_exec_params(UNIT(s), &exec_params); + r = unit_set_exec_params(UNIT(s), &exec_params); + if (r < 0) + return r; final_env = strv_env_merge(2, exec_params.environment, our_env, NULL); if (!final_env) return -ENOMEM; - /* System services should get a new keyring by default. */ - SET_FLAG(exec_params.flags, EXEC_NEW_KEYRING, MANAGER_IS_SYSTEM(UNIT(s)->manager)); - /* System D-Bus needs nss-systemd disabled, so that we don't deadlock */ SET_FLAG(exec_params.flags, EXEC_NSS_BYPASS_BUS, MANAGER_IS_SYSTEM(UNIT(s)->manager) && unit_has_name(UNIT(s), SPECIAL_DBUS_SERVICE)); - exec_params.argv = c->argv; - exec_params.environment = final_env; + strv_free_and_replace(exec_params.environment, final_env); exec_params.fds = fds; exec_params.fd_names = fd_names; - exec_params.n_storage_fds = n_storage_fds; exec_params.n_socket_fds = n_socket_fds; - exec_params.watchdog_usec = s->watchdog_usec; + exec_params.n_storage_fds = n_storage_fds; + exec_params.watchdog_usec = service_get_watchdog_usec(s); exec_params.selinux_context_net = s->socket_fd_selinux_context_net; if (s->type == SERVICE_IDLE) exec_params.idle_pipe = UNIT(s)->manager->idle_pipe; exec_params.stdin_fd = s->stdin_fd; exec_params.stdout_fd = s->stdout_fd; exec_params.stderr_fd = s->stderr_fd; + exec_params.exec_fd = exec_fd; r = exec_spawn(UNIT(s), c, @@ -1470,6 +1563,9 @@ static int service_spawn( if (r < 0) return r; + s->exec_fd_event_source = TAKE_PTR(exec_fd_source); + s->exec_fd_hot = false; + r = unit_watch_pid(UNIT(s), pid); if (r < 0) /* FIXME: we need to do something here */ return r; @@ -1604,8 +1700,7 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart) if (s->result == SERVICE_SUCCESS) s->result = f; - if (s->result != SERVICE_SUCCESS) - log_unit_warning(UNIT(s), "Failed with result '%s'.", service_result_to_string(s->result)); + unit_log_result(UNIT(s), s->result == SERVICE_SUCCESS, service_result_to_string(s->result)); if (allow_restart && service_shall_restart(s)) s->will_auto_restart = true; @@ -1674,7 +1769,6 @@ static void service_enter_stop_post(Service *s, ServiceResult f) { s->result = f; service_unwatch_control_pid(s); - (void) unit_enqueue_rewatch_pids(UNIT(s)); s->control_command = s->exec_command[SERVICE_EXEC_STOP_POST]; @@ -1684,7 +1778,7 @@ static void service_enter_stop_post(Service *s, ServiceResult f) { r = service_spawn(s, s->control_command, s->timeout_stop_usec, - EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_IS_CONTROL|EXEC_SETENV_RESULT, + EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_IS_CONTROL|EXEC_SETENV_RESULT|EXEC_CONTROL_CGROUP, &s->control_pid); if (r < 0) goto fail; @@ -1703,8 +1797,8 @@ fail: static int state_to_kill_operation(ServiceState state) { switch (state) { - case SERVICE_STOP_SIGABRT: - return KILL_ABORT; + case SERVICE_STOP_WATCHDOG: + return KILL_WATCHDOG; case SERVICE_STOP_SIGTERM: case SERVICE_FINAL_SIGTERM: @@ -1750,9 +1844,9 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f goto fail; service_set_state(s, state); - } else if (IN_SET(state, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM) && s->kill_context.send_sigkill) + } else if (IN_SET(state, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM) && s->kill_context.send_sigkill) service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_SUCCESS); - else if (IN_SET(state, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL)) + else if (IN_SET(state, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL)) service_enter_stop_post(s, SERVICE_SUCCESS); else if (state == SERVICE_FINAL_SIGTERM && s->kill_context.send_sigkill) service_enter_signal(s, SERVICE_FINAL_SIGKILL, SERVICE_SUCCESS); @@ -1764,7 +1858,7 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f fail: log_unit_warning_errno(UNIT(s), r, "Failed to kill processes: %m"); - if (IN_SET(state, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL)) + if (IN_SET(state, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL)) service_enter_stop_post(s, SERVICE_FAILURE_RESOURCES); else service_enter_dead(s, SERVICE_FAILURE_RESOURCES, true); @@ -1799,7 +1893,7 @@ static void service_enter_stop(Service *s, ServiceResult f) { r = service_spawn(s, s->control_command, s->timeout_stop_usec, - EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_SETENV_RESULT, + EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_SETENV_RESULT|EXEC_CONTROL_CGROUP, &s->control_pid); if (r < 0) goto fail; @@ -1877,7 +1971,7 @@ static void service_enter_start_post(Service *s) { r = service_spawn(s, s->control_command, s->timeout_start_usec, - EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL, + EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_CONTROL_CGROUP, &s->control_pid); if (r < 0) goto fail; @@ -1947,6 +2041,12 @@ static void service_enter_start(Service *s) { goto fail; } + /* We force a fake state transition here. Otherwise, the unit would go directly from + * SERVICE_DEAD to SERVICE_DEAD without SERVICE_ACTIVATING or SERVICE_ACTIVE + * inbetween. This way we can later trigger actions that depend on the state + * transition, including SuccessAction=. */ + service_set_state(s, SERVICE_START); + service_enter_start_post(s); return; } @@ -1981,14 +2081,12 @@ static void service_enter_start(Service *s) { s->control_pid = pid; service_set_state(s, SERVICE_START); - } else if (IN_SET(s->type, SERVICE_ONESHOT, SERVICE_DBUS, SERVICE_NOTIFY)) { + } else if (IN_SET(s->type, SERVICE_ONESHOT, SERVICE_DBUS, SERVICE_NOTIFY, SERVICE_EXEC)) { - /* For oneshot services we wait until the start - * process exited, too, but it is our main process. */ + /* For oneshot services we wait until the start process exited, too, but it is our main process. */ - /* For D-Bus services we know the main pid right away, - * but wait for the bus name to appear on the - * bus. Notify services are similar. */ + /* For D-Bus services we know the main pid right away, but wait for the bus name to appear on the + * bus. 'notify' and 'exec' services are similar. */ service_set_main_pid(s, pid); service_set_state(s, SERVICE_START); @@ -2117,7 +2215,7 @@ static void service_enter_reload(Service *s) { r = service_spawn(s, s->control_command, s->timeout_start_usec, - EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL, + EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_CONTROL_CGROUP, &s->control_pid); if (r < 0) goto fail; @@ -2157,7 +2255,8 @@ static void service_run_next_control(Service *s) { timeout, EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL| (IN_SET(s->control_command_id, SERVICE_EXEC_START_PRE, SERVICE_EXEC_STOP_POST) ? EXEC_APPLY_TTY_STDIN : 0)| - (IN_SET(s->control_command_id, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST) ? EXEC_SETENV_RESULT : 0), + (IN_SET(s->control_command_id, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST) ? EXEC_SETENV_RESULT : 0)| + (IN_SET(s->control_command_id, SERVICE_EXEC_START_POST, SERVICE_EXEC_RELOAD, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST) ? EXEC_CONTROL_CGROUP : 0), &s->control_pid); if (r < 0) goto fail; @@ -2216,7 +2315,7 @@ static int service_start(Unit *u) { /* We cannot fulfill this request right now, try again later * please! */ if (IN_SET(s->state, - SERVICE_STOP, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, + SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) return -EAGAIN; @@ -2253,15 +2352,17 @@ static int service_start(Unit *u) { s->main_pid_alien = false; s->forbid_restart = false; - u->reset_accounting = true; - s->status_text = mfree(s->status_text); s->status_errno = 0; s->notify_state = NOTIFY_UNKNOWN; + s->watchdog_original_usec = s->watchdog_usec; s->watchdog_override_enable = false; - s->watchdog_override_usec = 0; + s->watchdog_override_usec = USEC_INFINITY; + + exec_command_reset_status_list_array(s->exec_command, _SERVICE_EXEC_COMMAND_MAX); + exec_status_reset(&s->main_exec_status); /* This is not an automatic restart? Flush the restart counter then */ if (s->flush_n_restarts) { @@ -2269,6 +2370,8 @@ static int service_start(Unit *u) { s->flush_n_restarts = false; } + u->reset_accounting = true; + service_enter_start_pre(s); return 1; } @@ -2283,7 +2386,7 @@ static int service_stop(Unit *u) { /* Already on it */ if (IN_SET(s->state, - SERVICE_STOP, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, + SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) return 0; @@ -2342,13 +2445,13 @@ static unsigned service_exec_command_index(Unit *u, ServiceExecCommand id, ExecC } static int service_serialize_exec_command(Unit *u, FILE *f, ExecCommand *command) { + _cleanup_free_ char *args = NULL, *p = NULL; + size_t allocated = 0, length = 0; Service *s = SERVICE(u); + const char *type, *key; ServiceExecCommand id; unsigned idx; - const char *type; char **arg; - _cleanup_free_ char *args = NULL, *p = NULL; - size_t allocated = 0, length = 0; assert(s); assert(f); @@ -2367,16 +2470,16 @@ static int service_serialize_exec_command(Unit *u, FILE *f, ExecCommand *command idx = service_exec_command_index(u, id, command); STRV_FOREACH(arg, command->argv) { - size_t n; _cleanup_free_ char *e = NULL; + size_t n; - e = xescape(*arg, WHITESPACE); + e = cescape(*arg); if (!e) - return -ENOMEM; + return log_oom(); n = strlen(e); if (!GREEDY_REALLOC(args, allocated, length + 1 + n + 1)) - return -ENOMEM; + return log_oom(); if (length > 0) args[length++] = ' '; @@ -2386,16 +2489,16 @@ static int service_serialize_exec_command(Unit *u, FILE *f, ExecCommand *command } if (!GREEDY_REALLOC(args, allocated, length + 1)) - return -ENOMEM; + return log_oom(); + args[length++] = 0; - p = xescape(command->path, WHITESPACE); + p = cescape(command->path); if (!p) return -ENOMEM; - fprintf(f, "%s-command=%s %u %s %s\n", type, service_exec_command_to_string(id), idx, p, args); - - return 0; + key = strjoina(type, "-command"); + return serialize_item_format(f, key, "%s %u %s %s", service_exec_command_to_string(id), idx, p, args); } static int service_serialize(Unit *u, FILE *f, FDSet *fds) { @@ -2407,47 +2510,55 @@ static int service_serialize(Unit *u, FILE *f, FDSet *fds) { assert(f); assert(fds); - unit_serialize_item(u, f, "state", service_state_to_string(s->state)); - unit_serialize_item(u, f, "result", service_result_to_string(s->result)); - unit_serialize_item(u, f, "reload-result", service_result_to_string(s->reload_result)); + (void) serialize_item(f, "state", service_state_to_string(s->state)); + (void) serialize_item(f, "result", service_result_to_string(s->result)); + (void) serialize_item(f, "reload-result", service_result_to_string(s->reload_result)); if (s->control_pid > 0) - unit_serialize_item_format(u, f, "control-pid", PID_FMT, s->control_pid); + (void) serialize_item_format(f, "control-pid", PID_FMT, s->control_pid); if (s->main_pid_known && s->main_pid > 0) - unit_serialize_item_format(u, f, "main-pid", PID_FMT, s->main_pid); + (void) serialize_item_format(f, "main-pid", PID_FMT, s->main_pid); - unit_serialize_item(u, f, "main-pid-known", yes_no(s->main_pid_known)); - unit_serialize_item(u, f, "bus-name-good", yes_no(s->bus_name_good)); - unit_serialize_item(u, f, "bus-name-owner", s->bus_name_owner); + (void) serialize_bool(f, "main-pid-known", s->main_pid_known); + (void) serialize_bool(f, "bus-name-good", s->bus_name_good); + (void) serialize_bool(f, "bus-name-owner", s->bus_name_owner); - unit_serialize_item_format(u, f, "n-restarts", "%u", s->n_restarts); - unit_serialize_item(u, f, "flush-n-restarts", yes_no(s->flush_n_restarts)); + (void) serialize_item_format(f, "n-restarts", "%u", s->n_restarts); + (void) serialize_bool(f, "flush-n-restarts", s->flush_n_restarts); - r = unit_serialize_item_escaped(u, f, "status-text", s->status_text); + r = serialize_item_escaped(f, "status-text", s->status_text); if (r < 0) return r; service_serialize_exec_command(u, f, s->control_command); service_serialize_exec_command(u, f, s->main_command); - r = unit_serialize_item_fd(u, f, fds, "stdin-fd", s->stdin_fd); + r = serialize_fd(f, fds, "stdin-fd", s->stdin_fd); if (r < 0) return r; - r = unit_serialize_item_fd(u, f, fds, "stdout-fd", s->stdout_fd); + r = serialize_fd(f, fds, "stdout-fd", s->stdout_fd); if (r < 0) return r; - r = unit_serialize_item_fd(u, f, fds, "stderr-fd", s->stderr_fd); + r = serialize_fd(f, fds, "stderr-fd", s->stderr_fd); if (r < 0) return r; + if (s->exec_fd_event_source) { + r = serialize_fd(f, fds, "exec-fd", sd_event_source_get_io_fd(s->exec_fd_event_source)); + if (r < 0) + return r; + + (void) serialize_bool(f, "exec-fd-hot", s->exec_fd_hot); + } + if (UNIT_ISSET(s->accept_socket)) { - r = unit_serialize_item(u, f, "accept-socket", UNIT_DEREF(s->accept_socket)->id); + r = serialize_item(f, "accept-socket", UNIT_DEREF(s->accept_socket)->id); if (r < 0) return r; } - r = unit_serialize_item_fd(u, f, fds, "socket-fd", s->socket_fd); + r = serialize_fd(f, fds, "socket-fd", s->socket_fd); if (r < 0) return r; @@ -2457,30 +2568,34 @@ static int service_serialize(Unit *u, FILE *f, FDSet *fds) { copy = fdset_put_dup(fds, fs->fd); if (copy < 0) - return copy; + return log_error_errno(copy, "Failed to copy file descriptor for serialization: %m"); c = cescape(fs->fdname); + if (!c) + return log_oom(); - unit_serialize_item_format(u, f, "fd-store-fd", "%i %s", copy, strempty(c)); + (void) serialize_item_format(f, "fd-store-fd", "%i %s", copy, c); } if (s->main_exec_status.pid > 0) { - unit_serialize_item_format(u, f, "main-exec-status-pid", PID_FMT, s->main_exec_status.pid); - dual_timestamp_serialize(f, "main-exec-status-start", &s->main_exec_status.start_timestamp); - dual_timestamp_serialize(f, "main-exec-status-exit", &s->main_exec_status.exit_timestamp); + (void) serialize_item_format(f, "main-exec-status-pid", PID_FMT, s->main_exec_status.pid); + (void) serialize_dual_timestamp(f, "main-exec-status-start", &s->main_exec_status.start_timestamp); + (void) serialize_dual_timestamp(f, "main-exec-status-exit", &s->main_exec_status.exit_timestamp); if (dual_timestamp_is_set(&s->main_exec_status.exit_timestamp)) { - unit_serialize_item_format(u, f, "main-exec-status-code", "%i", s->main_exec_status.code); - unit_serialize_item_format(u, f, "main-exec-status-status", "%i", s->main_exec_status.status); + (void) serialize_item_format(f, "main-exec-status-code", "%i", s->main_exec_status.code); + (void) serialize_item_format(f, "main-exec-status-status", "%i", s->main_exec_status.status); } } - dual_timestamp_serialize(f, "watchdog-timestamp", &s->watchdog_timestamp); - - unit_serialize_item(u, f, "forbid-restart", yes_no(s->forbid_restart)); + (void) serialize_dual_timestamp(f, "watchdog-timestamp", &s->watchdog_timestamp); + (void) serialize_bool(f, "forbid-restart", s->forbid_restart); if (s->watchdog_override_enable) - unit_serialize_item_format(u, f, "watchdog-override-usec", USEC_FMT, s->watchdog_override_usec); + (void) serialize_item_format(f, "watchdog-override-usec", USEC_FMT, s->watchdog_override_usec); + + if (s->watchdog_original_usec != USEC_INFINITY) + (void) serialize_item_format(f, "watchdog-original-usec", USEC_FMT, s->watchdog_original_usec); return 0; } @@ -2516,10 +2631,10 @@ static int service_deserialize_exec_command(Unit *u, const char *key, const char _cleanup_free_ char *arg = NULL; r = extract_first_word(&value, &arg, NULL, EXTRACT_CUNESCAPE); + if (r < 0) + return r; if (r == 0) break; - else if (r < 0) - return r; switch (state) { case STATE_EXEC_COMMAND_TYPE: @@ -2658,18 +2773,16 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value, r = cunescape(value, 0, &t); if (r < 0) - log_unit_debug_errno(u, r, "Failed to unescape status text: %s", value); - else { - free(s->status_text); - s->status_text = t; - } + log_unit_debug_errno(u, r, "Failed to unescape status text '%s': %m", value); + else + free_and_replace(s->status_text, t); } else if (streq(key, "accept-socket")) { Unit *socket; r = manager_load_unit(u->manager, value, NULL, NULL, &socket); if (r < 0) - log_unit_debug_errno(u, r, "Failed to load accept-socket unit: %s", value); + log_unit_debug_errno(u, r, "Failed to load accept-socket unit '%s': %m", value); else { unit_ref_set(&s->accept_socket, u, socket); SOCKET(socket)->n_connections++; @@ -2731,11 +2844,11 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value, else s->main_exec_status.status = i; } else if (streq(key, "main-exec-status-start")) - dual_timestamp_deserialize(value, &s->main_exec_status.start_timestamp); + deserialize_dual_timestamp(value, &s->main_exec_status.start_timestamp); else if (streq(key, "main-exec-status-exit")) - dual_timestamp_deserialize(value, &s->main_exec_status.exit_timestamp); + deserialize_dual_timestamp(value, &s->main_exec_status.exit_timestamp); else if (streq(key, "watchdog-timestamp")) - dual_timestamp_deserialize(value, &s->watchdog_timestamp); + deserialize_dual_timestamp(value, &s->watchdog_timestamp); else if (streq(key, "forbid-restart")) { int b; @@ -2774,14 +2887,28 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value, s->stderr_fd = fdset_remove(fds, fd); s->exec_context.stdio_as_fds = true; } + } else if (streq(key, "exec-fd")) { + int fd; + + if (safe_atoi(value, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd)) + log_unit_debug(u, "Failed to parse exec-fd value: %s", value); + else { + s->exec_fd_event_source = sd_event_source_unref(s->exec_fd_event_source); + + fd = fdset_remove(fds, fd); + if (service_allocate_exec_fd_event_source(s, fd, &s->exec_fd_event_source) < 0) + safe_close(fd); + } } else if (streq(key, "watchdog-override-usec")) { - usec_t watchdog_override_usec; - if (timestamp_deserialize(value, &watchdog_override_usec) < 0) + if (deserialize_usec(value, &s->watchdog_override_usec) < 0) log_unit_debug(u, "Failed to parse watchdog_override_usec value: %s", value); - else { + else s->watchdog_override_enable = true; - s->watchdog_override_usec = watchdog_override_usec; - } + + } else if (streq(key, "watchdog-original-usec")) { + if (deserialize_usec(value, &s->watchdog_original_usec) < 0) + log_unit_debug(u, "Failed to parse watchdog_original_usec value: %s", value); + } else if (STR_IN_SET(key, "main-command", "control-command")) { r = service_deserialize_exec_command(u, key, value); if (r < 0) @@ -2857,7 +2984,7 @@ static int service_watch_pid_file(Service *s) { log_unit_debug(UNIT(s), "Setting watch for PID file %s", s->pid_file_pathspec->path); - r = path_spec_watch(s->pid_file_pathspec, service_dispatch_io); + r = path_spec_watch(s->pid_file_pathspec, service_dispatch_inotify_io); if (r < 0) goto fail; @@ -2901,7 +3028,7 @@ static int service_demand_pid_file(Service *s) { return service_watch_pid_file(s); } -static int service_dispatch_io(sd_event_source *source, int fd, uint32_t events, void *userdata) { +static int service_dispatch_inotify_io(sd_event_source *source, int fd, uint32_t events, void *userdata) { PathSpec *p = userdata; Service *s; @@ -2934,6 +3061,59 @@ fail: return 0; } +static int service_dispatch_exec_io(sd_event_source *source, int fd, uint32_t events, void *userdata) { + Service *s = SERVICE(userdata); + + assert(s); + + log_unit_debug(UNIT(s), "got exec-fd event"); + + /* If Type=exec is set, we'll consider a service started successfully the instant we invoked execve() + * successfully for it. We implement this through a pipe() towards the child, which the kernel automatically + * closes for us due to O_CLOEXEC on execve() in the child, which then triggers EOF on the pipe in the + * parent. We need to be careful however, as there are other reasons that we might cause the child's side of + * the pipe to be closed (for example, a simple exit()). To deal with that we'll ignore EOFs on the pipe unless + * the child signalled us first that it is about to call the execve(). It does so by sending us a simple + * non-zero byte via the pipe. We also provide the child with a way to inform us in case execve() failed: if it + * sends a zero byte we'll ignore POLLHUP on the fd again. */ + + for (;;) { + uint8_t x; + ssize_t n; + + n = read(fd, &x, sizeof(x)); + if (n < 0) { + if (errno == EAGAIN) /* O_NONBLOCK in effect → everything queued has now been processed. */ + return 0; + + return log_unit_error_errno(UNIT(s), errno, "Failed to read from exec_fd: %m"); + } + if (n == 0) { /* EOF → the event we are waiting for */ + + s->exec_fd_event_source = sd_event_source_unref(s->exec_fd_event_source); + + if (s->exec_fd_hot) { /* Did the child tell us to expect EOF now? */ + log_unit_debug(UNIT(s), "Got EOF on exec-fd"); + + s->exec_fd_hot = false; + + /* Nice! This is what we have been waiting for. Transition to next state. */ + if (s->type == SERVICE_EXEC && s->state == SERVICE_START) + service_enter_start_post(s); + } else + log_unit_debug(UNIT(s), "Got EOF on exec-fd while it was disabled, ignoring."); + + return 0; + } + + /* A byte was read → this turns on/off the exec fd logic */ + assert(n == sizeof(x)); + s->exec_fd_hot = x; + } + + return 0; +} + static void service_notify_cgroup_empty_event(Unit *u) { Service *s = SERVICE(u); @@ -2980,7 +3160,7 @@ static void service_notify_cgroup_empty_event(Unit *u) { service_enter_running(s, SERVICE_SUCCESS); break; - case SERVICE_STOP_SIGABRT: + case SERVICE_STOP_WATCHDOG: case SERVICE_STOP_SIGTERM: case SERVICE_STOP_SIGKILL: @@ -3054,21 +3234,13 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { /* When this is a successful exit, let's log about the exit code on DEBUG level. If this is a failure * and the process exited on its own via exit(), then let's make this a NOTICE, under the assumption - * that the service already logged the reason at a higher log level on its own. However, if the service - * died due to a signal, then it most likely didn't say anything about any reason, hence let's raise - * our log level to WARNING then. */ - - log_struct(f == SERVICE_SUCCESS ? LOG_DEBUG : - (code == CLD_EXITED ? LOG_NOTICE : LOG_WARNING), - LOG_UNIT_MESSAGE(u, "Main process exited, code=%s, status=%i/%s", - sigchld_code_to_string(code), status, - strna(code == CLD_EXITED - ? exit_status_to_string(status, EXIT_STATUS_FULL) - : signal_to_string(status))), - "EXIT_CODE=%s", sigchld_code_to_string(code), - "EXIT_STATUS=%i", status, - LOG_UNIT_ID(u), - LOG_UNIT_INVOCATION_ID(u)); + * that the service already logged the reason at a higher log level on its own. (Internally, + * unit_log_process_exit() will possibly bump this to WARNING if the service died due to a signal.) */ + unit_log_process_exit( + u, f == SERVICE_SUCCESS ? LOG_DEBUG : LOG_NOTICE, + "Main process", + service_exec_command_to_string(SERVICE_EXEC_START), + code, status); if (s->result == SERVICE_SUCCESS) s->result = f; @@ -3124,7 +3296,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { service_enter_running(s, f); break; - case SERVICE_STOP_SIGABRT: + case SERVICE_STOP_WATCHDOG: case SERVICE_STOP_SIGTERM: case SERVICE_STOP_SIGKILL: @@ -3157,9 +3329,11 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { f = SERVICE_SUCCESS; } - log_unit_full(u, f == SERVICE_SUCCESS ? LOG_DEBUG : LOG_NOTICE, 0, - "Control process exited, code=%s status=%i", - sigchld_code_to_string(code), status); + unit_log_process_exit( + u, f == SERVICE_SUCCESS ? LOG_DEBUG : LOG_NOTICE, + "Control process", + service_exec_command_to_string(s->control_command_id), + code, status); if (s->result == SERVICE_SUCCESS) s->result = f; @@ -3259,7 +3433,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { service_enter_signal(s, SERVICE_STOP_SIGTERM, f); break; - case SERVICE_STOP_SIGABRT: + case SERVICE_STOP_WATCHDOG: case SERVICE_STOP_SIGTERM: case SERVICE_STOP_SIGKILL: if (main_pid_good(s) <= 0) @@ -3330,8 +3504,8 @@ static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *us service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_TIMEOUT); break; - case SERVICE_STOP_SIGABRT: - log_unit_warning(UNIT(s), "State 'stop-sigabrt' timed out. Terminating."); + case SERVICE_STOP_WATCHDOG: + log_unit_warning(UNIT(s), "State 'stop-watchdog' timed out. Terminating."); service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_TIMEOUT); break; @@ -3410,7 +3584,7 @@ static int service_dispatch_watchdog(sd_event_source *source, usec_t usec, void log_unit_error(UNIT(s), "Watchdog timeout (limit %s)!", format_timespan(t, sizeof(t), watchdog_usec, 1)); - service_enter_signal(s, SERVICE_STOP_SIGABRT, SERVICE_FAILURE_WATCHDOG); + service_enter_signal(s, SERVICE_STOP_WATCHDOG, SERVICE_FAILURE_WATCHDOG); } else log_unit_warning(UNIT(s), "Watchdog disabled! Ignoring watchdog timeout (limit %s)!", format_timespan(t, sizeof(t), watchdog_usec, 1)); @@ -3498,7 +3672,11 @@ static void service_notify_message( } if (r > 0) { service_set_main_pid(s, new_main_pid); - unit_watch_pid(UNIT(s), new_main_pid); + + r = unit_watch_pid(UNIT(s), new_main_pid); + if (r < 0) + log_unit_warning_errno(UNIT(s), r, "Failed to watch new main PID "PID_FMT" for service: %m", new_main_pid); + notify_dbus = true; } } @@ -3549,8 +3727,12 @@ static void service_notify_message( _cleanup_free_ char *t = NULL; if (!isempty(e)) { - if (!utf8_is_valid(e)) - log_unit_warning(u, "Status message in notification message is not UTF-8 clean."); + /* Note that this size limit check is mostly paranoia: since the datagram size we are willing + * to process is already limited to NOTIFY_BUFFER_MAX, this limit here should never be hit. */ + if (strlen(e) > STATUS_TEXT_MAX) + log_unit_warning(u, "Status message overly long (%zu > %u), ignoring.", strlen(e), STATUS_TEXT_MAX); + else if (!utf8_is_valid(e)) + log_unit_warning(u, "Status message in notification message is not UTF-8 clean, ignoring."); else { t = strdup(e); if (!t) @@ -3572,7 +3754,7 @@ static void service_notify_message( status_errno = parse_errno(e); if (status_errno < 0) log_unit_warning_errno(u, status_errno, - "Failed to parse ERRNO= field in notification message: %s", e); + "Failed to parse ERRNO= field value '%s' in notification message: %m", e); else if (s->status_errno != status_errno) { s->status_errno = status_errno; notify_dbus = true; @@ -3599,7 +3781,7 @@ static void service_notify_message( if (safe_atou64(e, &watchdog_override_usec) < 0) log_unit_warning(u, "Failed to parse WATCHDOG_USEC=%s", e); else - service_reset_watchdog_timeout(s, watchdog_override_usec); + service_override_watchdog_timeout(s, watchdog_override_usec); } /* Process FD store messages. Either FDSTOREREMOVE=1 for removal, or FDSTORE=1 for addition. In both cases, @@ -3817,7 +3999,7 @@ static bool service_needs_console(Unit *u) { SERVICE_RUNNING, SERVICE_RELOAD, SERVICE_STOP, - SERVICE_STOP_SIGABRT, + SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, @@ -3825,6 +4007,21 @@ static bool service_needs_console(Unit *u) { SERVICE_FINAL_SIGKILL); } +static int service_exit_status(Unit *u) { + Service *s = SERVICE(u); + + assert(u); + + if (s->main_exec_status.pid <= 0 || + !dual_timestamp_is_set(&s->main_exec_status.exit_timestamp)) + return -ENODATA; + + if (s->main_exec_status.code != CLD_EXITED) + return -EBADE; + + return s->main_exec_status.status; +} + static const char* const service_restart_table[_SERVICE_RESTART_MAX] = { [SERVICE_RESTART_NO] = "no", [SERVICE_RESTART_ON_SUCCESS] = "on-success", @@ -3843,7 +4040,8 @@ static const char* const service_type_table[_SERVICE_TYPE_MAX] = { [SERVICE_ONESHOT] = "oneshot", [SERVICE_DBUS] = "dbus", [SERVICE_NOTIFY] = "notify", - [SERVICE_IDLE] = "idle" + [SERVICE_IDLE] = "idle", + [SERVICE_EXEC] = "exec", }; DEFINE_STRING_TABLE_LOOKUP(service_type, ServiceType); @@ -3944,6 +4142,7 @@ const UnitVTable service_vtable = { .get_timeout = service_get_timeout, .needs_console = service_needs_console, + .exit_status = service_exit_status, .status_message_formats = { .starting_stopping = { diff --git a/src/core/service.h b/src/core/service.h index 9c06e91883..9c4340c70e 100644 --- a/src/core/service.h +++ b/src/core/service.h @@ -30,6 +30,7 @@ typedef enum ServiceType { SERVICE_DBUS, /* we fork and wait until a specific D-Bus name appears on the bus */ SERVICE_NOTIFY, /* we fork and wait until a daemon sends us a ready message with sd_notify() */ SERVICE_IDLE, /* much like simple, but delay exec() until all jobs are dispatched. */ + SERVICE_EXEC, /* we fork and wait until we execute exec() (this means our own setup is waited for) */ _SERVICE_TYPE_MAX, _SERVICE_TYPE_INVALID = -1 } ServiceType; @@ -98,8 +99,9 @@ struct Service { usec_t runtime_max_usec; dual_timestamp watchdog_timestamp; - usec_t watchdog_usec; - usec_t watchdog_override_usec; + usec_t watchdog_usec; /* the requested watchdog timeout in the unit file */ + usec_t watchdog_original_usec; /* the watchdog timeout that was in effect when the unit was started, i.e. the timeout the forked off processes currently see */ + usec_t watchdog_override_usec; /* the watchdog timeout requested by the service itself through sd_notify() */ bool watchdog_override_enable; sd_event_source *watchdog_event_source; @@ -165,6 +167,8 @@ struct Service { NotifyAccess notify_access; NotifyState notify_state; + sd_event_source *exec_fd_event_source; + ServiceFDStore *fd_store; size_t n_fd_store; unsigned n_fd_store_max; @@ -179,6 +183,7 @@ struct Service { unsigned n_restarts; bool flush_n_restarts; + bool exec_fd_hot; }; extern const UnitVTable service_vtable; @@ -202,3 +207,5 @@ const char* service_result_to_string(ServiceResult i) _const_; ServiceResult service_result_from_string(const char *s) _pure_; DEFINE_CAST(SERVICE, Service); + +#define STATUS_TEXT_MAX (16U*1024U) diff --git a/src/core/show-status.c b/src/core/show-status.c index 63262cc716..f748a82084 100644 --- a/src/core/show-status.c +++ b/src/core/show-status.c @@ -5,34 +5,38 @@ #include "io-util.h" #include "parse-util.h" #include "show-status.h" +#include "string-table.h" #include "string-util.h" #include "terminal-util.h" #include "util.h" +static const char* const show_status_table[_SHOW_STATUS_MAX] = { + [SHOW_STATUS_NO] = "no", + [SHOW_STATUS_AUTO] = "auto", + [SHOW_STATUS_TEMPORARY] = "temporary", + [SHOW_STATUS_YES] = "yes", +}; + +DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(show_status, ShowStatus, SHOW_STATUS_YES); + int parse_show_status(const char *v, ShowStatus *ret) { - int r; + ShowStatus s; - assert(v); assert(ret); - if (streq(v, "auto")) { - *ret = SHOW_STATUS_AUTO; - return 0; - } - - r = parse_boolean(v); - if (r < 0) - return r; + s = show_status_from_string(v); + if (s < 0 || s == SHOW_STATUS_TEMPORARY) + return -EINVAL; - *ret = r ? SHOW_STATUS_YES : SHOW_STATUS_NO; + *ret = s; return 0; } -int status_vprintf(const char *status, bool ellipse, bool ephemeral, const char *format, va_list ap) { +int status_vprintf(const char *status, ShowStatusFlags flags, const char *format, va_list ap) { static const char status_indent[] = " "; /* "[" STATUS "] " */ _cleanup_free_ char *s = NULL; _cleanup_close_ int fd = -1; - struct iovec iovec[6] = {}; + struct iovec iovec[7] = {}; int n = 0; static bool prev_ephemeral; @@ -53,7 +57,7 @@ int status_vprintf(const char *status, bool ellipse, bool ephemeral, const char if (fd < 0) return fd; - if (ellipse) { + if (FLAGS_SET(flags, SHOW_STATUS_ELLIPSIZE)) { char *e; size_t emax, sl; int c; @@ -69,15 +73,12 @@ int status_vprintf(const char *status, bool ellipse, bool ephemeral, const char emax = 3; e = ellipsize(s, emax, 50); - if (e) { - free(s); - s = e; - } + if (e) + free_and_replace(s, e); } if (prev_ephemeral) - iovec[n++] = IOVEC_MAKE_STRING("\r" ANSI_ERASE_TO_END_OF_LINE); - prev_ephemeral = ephemeral; + iovec[n++] = IOVEC_MAKE_STRING(ANSI_REVERSE_LINEFEED "\r" ANSI_ERASE_TO_END_OF_LINE); if (status) { if (!isempty(status)) { @@ -89,8 +90,11 @@ int status_vprintf(const char *status, bool ellipse, bool ephemeral, const char } iovec[n++] = IOVEC_MAKE_STRING(s); - if (!ephemeral) - iovec[n++] = IOVEC_MAKE_STRING("\n"); + iovec[n++] = IOVEC_MAKE_STRING("\n"); + + if (prev_ephemeral && !FLAGS_SET(flags, SHOW_STATUS_EPHEMERAL)) + iovec[n++] = IOVEC_MAKE_STRING(ANSI_ERASE_TO_END_OF_LINE); + prev_ephemeral = FLAGS_SET(flags, SHOW_STATUS_EPHEMERAL) ; if (writev(fd, iovec, n) < 0) return -errno; @@ -98,14 +102,14 @@ int status_vprintf(const char *status, bool ellipse, bool ephemeral, const char return 0; } -int status_printf(const char *status, bool ellipse, bool ephemeral, const char *format, ...) { +int status_printf(const char *status, ShowStatusFlags flags, const char *format, ...) { va_list ap; int r; assert(format); va_start(ap, format); - r = status_vprintf(status, ellipse, ephemeral, format, ap); + r = status_vprintf(status, flags, format, ap); va_end(ap); return r; diff --git a/src/core/show-status.h b/src/core/show-status.h index 1a80de33d9..f574d92d84 100644 --- a/src/core/show-status.h +++ b/src/core/show-status.h @@ -8,14 +8,22 @@ /* Manager status */ typedef enum ShowStatus { - _SHOW_STATUS_UNSET = -2, - SHOW_STATUS_AUTO = -1, - SHOW_STATUS_NO = 0, - SHOW_STATUS_YES = 1, - SHOW_STATUS_TEMPORARY = 2, + SHOW_STATUS_NO, + SHOW_STATUS_AUTO, + SHOW_STATUS_TEMPORARY, + SHOW_STATUS_YES, + _SHOW_STATUS_MAX, + _SHOW_STATUS_INVALID = -1, } ShowStatus; +typedef enum ShowStatusFlags { + SHOW_STATUS_ELLIPSIZE = 1 << 0, + SHOW_STATUS_EPHEMERAL = 1 << 1, +} ShowStatusFlags; + +ShowStatus show_status_from_string(const char *v) _const_; +const char* show_status_to_string(ShowStatus s) _pure_; int parse_show_status(const char *v, ShowStatus *ret); -int status_vprintf(const char *status, bool ellipse, bool ephemeral, const char *format, va_list ap) _printf_(4,0); -int status_printf(const char *status, bool ellipse, bool ephemeral, const char *format, ...) _printf_(4,5); +int status_vprintf(const char *status, ShowStatusFlags flags, const char *format, va_list ap) _printf_(3,0); +int status_printf(const char *status, ShowStatusFlags flags, const char *format, ...) _printf_(3,4); diff --git a/src/core/shutdown.c b/src/core/shutdown.c index 038345b752..cb47ee8984 100644 --- a/src/core/shutdown.c +++ b/src/core/shutdown.c @@ -28,6 +28,7 @@ #include "parse-util.h" #include "process-util.h" #include "reboot-util.h" +#include "rlimit-util.h" #include "signal-util.h" #include "string-util.h" #include "switch-root.h" @@ -77,14 +78,14 @@ static int parse_argv(int argc, char *argv[]) { case ARG_LOG_LEVEL: r = log_set_max_level_from_string(optarg); if (r < 0) - log_error_errno(r, "Failed to parse log level %s, ignoring.", optarg); + log_error_errno(r, "Failed to parse log level %s, ignoring: %m", optarg); break; case ARG_LOG_TARGET: r = log_set_target_from_string(optarg); if (r < 0) - log_error_errno(r, "Failed to parse log target %s, ignoring", optarg); + log_error_errno(r, "Failed to parse log target %s, ignoring: %m", optarg); break; @@ -93,7 +94,7 @@ static int parse_argv(int argc, char *argv[]) { if (optarg) { r = log_show_color_from_string(optarg); if (r < 0) - log_error_errno(r, "Failed to parse log color setting %s, ignoring", optarg); + log_error_errno(r, "Failed to parse log color setting %s, ignoring: %m", optarg); } else log_show_color(true); @@ -103,7 +104,7 @@ static int parse_argv(int argc, char *argv[]) { if (optarg) { r = log_show_location_from_string(optarg); if (r < 0) - log_error_errno(r, "Failed to parse log location setting %s, ignoring", optarg); + log_error_errno(r, "Failed to parse log location setting %s, ignoring: %m", optarg); } else log_show_location(true); @@ -112,14 +113,14 @@ static int parse_argv(int argc, char *argv[]) { case ARG_EXIT_CODE: r = safe_atou8(optarg, &arg_exit_code); if (r < 0) - log_error_errno(r, "Failed to parse exit code %s, ignoring", optarg); + log_error_errno(r, "Failed to parse exit code %s, ignoring: %m", optarg); break; case ARG_TIMEOUT: r = parse_sec(optarg, &arg_timeout); if (r < 0) - log_error_errno(r, "Failed to parse shutdown timeout %s, ignoring", optarg); + log_error_errno(r, "Failed to parse shutdown timeout %s, ignoring: %m", optarg); break; @@ -137,10 +138,9 @@ static int parse_argv(int argc, char *argv[]) { assert_not_reached("Unhandled option code."); } - if (!arg_verb) { - log_error("Verb argument missing."); - return -EINVAL; - } + if (!arg_verb) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Verb argument missing."); return 0; } @@ -171,16 +171,23 @@ static int switch_root_initramfs(void) { */ static bool sync_making_progress(unsigned long long *prev_dirty) { _cleanup_fclose_ FILE *f = NULL; - char line[LINE_MAX]; - bool r = false; unsigned long long val = 0; + bool r = false; f = fopen("/proc/meminfo", "re"); if (!f) return log_warning_errno(errno, "Failed to open /proc/meminfo: %m"); - FOREACH_LINE(line, f, log_warning_errno(errno, "Failed to parse /proc/meminfo: %m")) { + for (;;) { + _cleanup_free_ char *line = NULL; unsigned long long ull = 0; + int q; + + q = read_line(f, LONG_LINE_MAX, &line); + if (q < 0) + return log_warning_errno(q, "Failed to parse /proc/meminfo: %m"); + if (q == 0) + break; if (!first_word(line, "NFS_Unstable:") && !first_word(line, "Writeback:") && !first_word(line, "Dirty:")) continue; @@ -435,15 +442,17 @@ int main(int argc, char *argv[]) { arguments[0] = NULL; arguments[1] = arg_verb; arguments[2] = NULL; - execute_directories(dirs, DEFAULT_TIMEOUT_USEC, NULL, NULL, arguments); + execute_directories(dirs, DEFAULT_TIMEOUT_USEC, NULL, NULL, arguments, NULL); + + (void) rlimit_nofile_safe(); if (can_initrd) { r = switch_root_initramfs(); if (r >= 0) { argv[0] = (char*) "/shutdown"; - setsid(); - make_console_stdio(); + (void) setsid(); + (void) make_console_stdio(); log_info("Successfully changed into root pivot.\n" "Returning to initrd..."); diff --git a/src/core/slice.c b/src/core/slice.c index 58f18a4dad..15b18bcad3 100644 --- a/src/core/slice.c +++ b/src/core/slice.c @@ -4,7 +4,9 @@ #include "alloc-util.h" #include "dbus-slice.h" +#include "dbus-unit.h" #include "log.h" +#include "serialize.h" #include "slice.h" #include "special.h" #include "string-util.h" @@ -28,6 +30,9 @@ static void slice_set_state(Slice *t, SliceState state) { SliceState old_state; assert(t); + if (t->state != state) + bus_unit_send_pending_change_signal(UNIT(t), false); + old_state = t->state; t->state = state; @@ -74,7 +79,7 @@ static int slice_add_default_dependencies(Slice *s) { r = unit_add_two_dependencies_by_name( UNIT(s), UNIT_BEFORE, UNIT_CONFLICTS, - SPECIAL_SHUTDOWN_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT); + SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT); if (r < 0) return r; @@ -123,7 +128,7 @@ static int slice_load_root_slice(Unit *u) { if (!u->description) u->description = strdup("Root Slice"); if (!u->documentation) - u->documentation = strv_new("man:systemd.special(7)", NULL); + u->documentation = strv_new("man:systemd.special(7)"); return 1; } @@ -146,7 +151,7 @@ static int slice_load_system_slice(Unit *u) { if (!u->description) u->description = strdup("System Slice"); if (!u->documentation) - u->documentation = strv_new("man:systemd.special(7)", NULL); + u->documentation = strv_new("man:systemd.special(7)"); return 1; } @@ -256,7 +261,8 @@ static int slice_serialize(Unit *u, FILE *f, FDSet *fds) { assert(f); assert(fds); - unit_serialize_item(u, f, "state", slice_state_to_string(s->state)); + (void) serialize_item(f, "state", slice_state_to_string(s->state)); + return 0; } @@ -328,7 +334,7 @@ static void slice_enumerate_perpetual(Manager *m) { assert(m); r = slice_make_perpetual(m, SPECIAL_ROOT_SLICE, &u); - if (r >= 0 && manager_owns_root_cgroup(m)) { + if (r >= 0 && manager_owns_host_root_cgroup(m)) { Slice *s = SLICE(u); /* If we are managing the root cgroup then this means our root slice covers the whole system, which diff --git a/src/core/smack-setup.c b/src/core/smack-setup.c index 50115c0454..49b37aefc7 100644 --- a/src/core/smack-setup.c +++ b/src/core/smack-setup.c @@ -29,7 +29,6 @@ static int write_access2_rules(const char* srcdir) { _cleanup_close_ int load2_fd = -1, change_fd = -1; _cleanup_closedir_ DIR *dir = NULL; struct dirent *entry; - char buf[NAME_MAX]; int dfd = -1; int r = 0; @@ -73,7 +72,7 @@ static int write_access2_rules(const char* srcdir) { continue; } - policy = fdopen(fd, "re"); + policy = fdopen(fd, "r"); if (!policy) { if (r == 0) r = -errno; @@ -83,13 +82,17 @@ static int write_access2_rules(const char* srcdir) { } /* load2 write rules in the kernel require a line buffered stream */ - FOREACH_LINE(buf, policy, - log_error_errno(errno, "Failed to read line from '%s': %m", - entry->d_name)) { + for (;;) { + _cleanup_free_ char *buf = NULL, *sbj = NULL, *obj = NULL, *acc1 = NULL, *acc2 = NULL; + int q; - _cleanup_free_ char *sbj = NULL, *obj = NULL, *acc1 = NULL, *acc2 = NULL; + q = read_line(policy, NAME_MAX, &buf); + if (q < 0) + return log_error_errno(q, "Failed to read line from '%s': %m", entry->d_name); + if (q == 0) + break; - if (isempty(truncate_nl(buf)) || strchr(COMMENTS, *buf)) + if (isempty(buf) || strchr(COMMENTS, buf[0])) continue; /* if 3 args -> load rule : subject object access1 */ @@ -102,7 +105,7 @@ static int write_access2_rules(const char* srcdir) { if (write(isempty(acc2) ? load2_fd : change_fd, buf, strlen(buf)) < 0) { if (r == 0) r = -errno; - log_error_errno(errno, "Failed to write '%s' to '%s' in '%s'", + log_error_errno(errno, "Failed to write '%s' to '%s' in '%s': %m", buf, isempty(acc2) ? "/sys/fs/smackfs/load2" : "/sys/fs/smackfs/change-rule", entry->d_name); } } @@ -115,7 +118,6 @@ static int write_cipso2_rules(const char* srcdir) { _cleanup_close_ int cipso2_fd = -1; _cleanup_closedir_ DIR *dir = NULL; struct dirent *entry; - char buf[NAME_MAX]; int dfd = -1; int r = 0; @@ -152,7 +154,7 @@ static int write_cipso2_rules(const char* srcdir) { continue; } - policy = fdopen(fd, "re"); + policy = fdopen(fd, "r"); if (!policy) { if (r == 0) r = -errno; @@ -162,17 +164,23 @@ static int write_cipso2_rules(const char* srcdir) { } /* cipso2 write rules in the kernel require a line buffered stream */ - FOREACH_LINE(buf, policy, - log_error_errno(errno, "Failed to read line from '%s': %m", - entry->d_name)) { + for (;;) { + _cleanup_free_ char *buf = NULL; + int q; + + q = read_line(policy, NAME_MAX, &buf); + if (q < 0) + return log_error_errno(q, "Failed to read line from '%s': %m", entry->d_name); + if (q == 0) + break; - if (isempty(truncate_nl(buf)) || strchr(COMMENTS, *buf)) + if (isempty(buf) || strchr(COMMENTS, buf[0])) continue; if (write(cipso2_fd, buf, strlen(buf)) < 0) { if (r == 0) r = -errno; - log_error_errno(errno, "Failed to write '%s' to '/sys/fs/smackfs/cipso2' in '%s'", + log_error_errno(errno, "Failed to write '%s' to '/sys/fs/smackfs/cipso2' in '%s': %m", buf, entry->d_name); break; } @@ -186,7 +194,6 @@ static int write_netlabel_rules(const char* srcdir) { _cleanup_fclose_ FILE *dst = NULL; _cleanup_closedir_ DIR *dir = NULL; struct dirent *entry; - char buf[NAME_MAX]; int dfd = -1; int r = 0; @@ -220,7 +227,7 @@ static int write_netlabel_rules(const char* srcdir) { continue; } - policy = fdopen(fd, "re"); + policy = fdopen(fd, "r"); if (!policy) { if (r == 0) r = -errno; @@ -232,15 +239,20 @@ static int write_netlabel_rules(const char* srcdir) { (void) __fsetlocking(policy, FSETLOCKING_BYCALLER); /* load2 write rules in the kernel require a line buffered stream */ - FOREACH_LINE(buf, policy, - log_error_errno(errno, "Failed to read line from %s: %m", entry->d_name)) { - + for (;;) { + _cleanup_free_ char *buf = NULL; int q; + q = read_line(policy, NAME_MAX, &buf); + if (q < 0) + return log_error_errno(q, "Failed to read line from %s: %m", entry->d_name); + if (q == 0) + break; + if (!fputs(buf, dst)) { if (r == 0) r = -EINVAL; - log_error_errno(errno, "Failed to write line to /sys/fs/smackfs/netlabel"); + log_error_errno(errno, "Failed to write line to /sys/fs/smackfs/netlabel: %m"); break; } q = fflush_and_check(dst); @@ -261,20 +273,27 @@ static int write_onlycap_list(void) { _cleanup_free_ char *list = NULL; _cleanup_fclose_ FILE *f = NULL; size_t len = 0, allocated = 0; - char buf[LINE_MAX]; int r; f = fopen("/etc/smack/onlycap", "re"); if (!f) { if (errno != ENOENT) - log_warning_errno(errno, "Failed to read '/etc/smack/onlycap'"); + log_warning_errno(errno, "Failed to read '/etc/smack/onlycap': %m"); + return errno == ENOENT ? ENOENT : -errno; } - FOREACH_LINE(buf, f, return -errno) { + for (;;) { + _cleanup_free_ char *buf = NULL; size_t l; - if (isempty(truncate_nl(buf)) || strchr(COMMENTS, *buf)) + r = read_line(f, LONG_LINE_MAX, &buf); + if (r < 0) + return log_error_errno(r, "Failed to read line from /etc/smack/onlycap: %m"); + if (r == 0) + break; + + if (isempty(buf) || strchr(COMMENTS, *buf)) continue; l = strlen(buf); @@ -285,7 +304,7 @@ static int write_onlycap_list(void) { len += l + 1; } - if (!len) + if (len == 0) return 0; list[len - 1] = 0; @@ -293,13 +312,13 @@ static int write_onlycap_list(void) { onlycap_fd = open("/sys/fs/smackfs/onlycap", O_WRONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY); if (onlycap_fd < 0) { if (errno != ENOENT) - log_warning_errno(errno, "Failed to open '/sys/fs/smackfs/onlycap'"); + log_warning_errno(errno, "Failed to open '/sys/fs/smackfs/onlycap': %m"); return -errno; /* negative error */ } r = write(onlycap_fd, list, len); if (r < 0) - return log_error_errno(errno, "Failed to write onlycap list(%s) to '/sys/fs/smackfs/onlycap'", list); + return log_error_errno(errno, "Failed to write onlycap list(%s) to '/sys/fs/smackfs/onlycap': %m", list); return 0; } @@ -331,17 +350,17 @@ int mac_smack_setup(bool *loaded_policy) { } #ifdef SMACK_RUN_LABEL - r = write_string_file("/proc/self/attr/current", SMACK_RUN_LABEL, 0); + r = write_string_file("/proc/self/attr/current", SMACK_RUN_LABEL, WRITE_STRING_FILE_DISABLE_BUFFER); if (r < 0) log_warning_errno(r, "Failed to set SMACK label \"" SMACK_RUN_LABEL "\" on self: %m"); - r = write_string_file("/sys/fs/smackfs/ambient", SMACK_RUN_LABEL, 0); + r = write_string_file("/sys/fs/smackfs/ambient", SMACK_RUN_LABEL, WRITE_STRING_FILE_DISABLE_BUFFER); if (r < 0) log_warning_errno(r, "Failed to set SMACK ambient label \"" SMACK_RUN_LABEL "\": %m"); r = write_string_file("/sys/fs/smackfs/netlabel", - "0.0.0.0/0 " SMACK_RUN_LABEL, 0); + "0.0.0.0/0 " SMACK_RUN_LABEL, WRITE_STRING_FILE_DISABLE_BUFFER); if (r < 0) log_warning_errno(r, "Failed to set SMACK netlabel rule \"0.0.0.0/0 " SMACK_RUN_LABEL "\": %m"); - r = write_string_file("/sys/fs/smackfs/netlabel", "127.0.0.1 -CIPSO", 0); + r = write_string_file("/sys/fs/smackfs/netlabel", "127.0.0.1 -CIPSO", WRITE_STRING_FILE_DISABLE_BUFFER); if (r < 0) log_warning_errno(r, "Failed to set SMACK netlabel rule \"127.0.0.1 -CIPSO\": %m"); #endif @@ -390,7 +409,7 @@ int mac_smack_setup(bool *loaded_policy) { log_info("Successfully wrote Smack onlycap list."); break; default: - log_emergency_errno(r, "Failed to write Smack onlycap list."); + log_emergency_errno(r, "Failed to write Smack onlycap list: %m"); return r; } diff --git a/src/core/socket.c b/src/core/socket.c index 56d32225c4..dd126a7f21 100644 --- a/src/core/socket.c +++ b/src/core/socket.c @@ -17,6 +17,7 @@ #include "bus-util.h" #include "copy.h" #include "dbus-socket.h" +#include "dbus-unit.h" #include "def.h" #include "exit-status.h" #include "fd-util.h" @@ -24,6 +25,7 @@ #include "fs-util.h" #include "in-addr-util.h" #include "io-util.h" +#include "ip-protocol-list.h" #include "label.h" #include "log.h" #include "missing.h" @@ -32,10 +34,10 @@ #include "path-util.h" #include "process-util.h" #include "selinux-util.h" +#include "serialize.h" #include "signal-util.h" #include "smack-util.h" #include "socket.h" -#include "socket-protocol-list.h" #include "special.h" #include "string-table.h" #include "string-util.h" @@ -301,17 +303,17 @@ static int socket_add_default_dependencies(Socket *s) { if (!UNIT(s)->default_dependencies) return 0; - r = unit_add_dependency_by_name(UNIT(s), UNIT_BEFORE, SPECIAL_SOCKETS_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT); + r = unit_add_dependency_by_name(UNIT(s), UNIT_BEFORE, SPECIAL_SOCKETS_TARGET, true, UNIT_DEPENDENCY_DEFAULT); if (r < 0) return r; if (MANAGER_IS_SYSTEM(UNIT(s)->manager)) { - r = unit_add_two_dependencies_by_name(UNIT(s), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT); + r = unit_add_two_dependencies_by_name(UNIT(s), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, true, UNIT_DEPENDENCY_DEFAULT); if (r < 0) return r; } - return unit_add_two_dependencies_by_name(UNIT(s), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT); + return unit_add_two_dependencies_by_name(UNIT(s), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT); } _pure_ static bool socket_has_exec(Socket *s) { @@ -467,9 +469,7 @@ static int socket_verify(Socket *s) { return 0; } -static void peer_address_hash_func(const void *p, struct siphash *state) { - const SocketPeer *s = p; - +static void peer_address_hash_func(const SocketPeer *s, struct siphash *state) { assert(s); if (s->peer.sa.sa_family == AF_INET) @@ -482,13 +482,12 @@ static void peer_address_hash_func(const void *p, struct siphash *state) { assert_not_reached("Unknown address family."); } -static int peer_address_compare_func(const void *a, const void *b) { - const SocketPeer *x = a, *y = b; +static int peer_address_compare_func(const SocketPeer *x, const SocketPeer *y) { + int r; - if (x->peer.sa.sa_family < y->peer.sa.sa_family) - return -1; - if (x->peer.sa.sa_family > y->peer.sa.sa_family) - return 1; + r = CMP(x->peer.sa.sa_family, y->peer.sa.sa_family); + if (r != 0) + return r; switch(x->peer.sa.sa_family) { case AF_INET: @@ -496,19 +495,12 @@ static int peer_address_compare_func(const void *a, const void *b) { case AF_INET6: return memcmp(&x->peer.in6.sin6_addr, &y->peer.in6.sin6_addr, sizeof(x->peer.in6.sin6_addr)); case AF_VSOCK: - if (x->peer.vm.svm_cid < y->peer.vm.svm_cid) - return -1; - if (x->peer.vm.svm_cid > y->peer.vm.svm_cid) - return 1; - return 0; + return CMP(x->peer.vm.svm_cid, y->peer.vm.svm_cid); } assert_not_reached("Black sheep in the family!"); } -const struct hash_ops peer_address_hash_ops = { - .hash = peer_address_hash_func, - .compare = peer_address_compare_func -}; +DEFINE_PRIVATE_HASH_OPS(peer_address_hash_ops, SocketPeer, peer_address_hash_func, peer_address_compare_func); static int socket_load(Unit *u) { Socket *s = SOCKET(u); @@ -547,26 +539,8 @@ static SocketPeer *socket_peer_new(void) { return p; } -SocketPeer *socket_peer_ref(SocketPeer *p) { - if (!p) - return NULL; - - assert(p->n_ref > 0); - p->n_ref++; - - return p; -} - -SocketPeer *socket_peer_unref(SocketPeer *p) { - if (!p) - return NULL; - - assert(p->n_ref > 0); - - p->n_ref--; - - if (p->n_ref > 0) - return NULL; +static SocketPeer *socket_peer_free(SocketPeer *p) { + assert(p); if (p->socket) set_remove(p->socket->peers_by_address, p); @@ -574,6 +548,8 @@ SocketPeer *socket_peer_unref(SocketPeer *p) { return mfree(p); } +DEFINE_TRIVIAL_REF_UNREF_FUNC(SocketPeer, socket_peer, socket_peer_free); + int socket_acquire_peer(Socket *s, int fd, SocketPeer **p) { _cleanup_(socket_peer_unrefp) SocketPeer *remote = NULL; SocketPeer sa = {}, *i; @@ -833,7 +809,7 @@ static void socket_dump(Unit *u, FILE *f, const char *prefix) { prefix, format_timespan(time_string, FORMAT_TIMESPAN_MAX, s->trigger_limit.interval, USEC_PER_SEC), prefix, s->trigger_limit.burst); - str = socket_protocol_to_name(s->socket_protocol); + str = ip_protocol_to_name(s->socket_protocol); if (str) fprintf(f, "%sSocketProtocol: %s\n", prefix, str); @@ -1034,107 +1010,112 @@ static void socket_apply_socket_options(Socket *s, int fd) { assert(fd >= 0); if (s->keep_alive) { - int one = 1; - if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one)) < 0) - log_unit_warning_errno(UNIT(s), errno, "SO_KEEPALIVE failed: %m"); + r = setsockopt_int(fd, SOL_SOCKET, SO_KEEPALIVE, true); + if (r < 0) + log_unit_warning_errno(UNIT(s), r, "SO_KEEPALIVE failed: %m"); } if (s->keep_alive_time > 0) { - int value = s->keep_alive_time / USEC_PER_SEC; - if (setsockopt(fd, SOL_TCP, TCP_KEEPIDLE, &value, sizeof(value)) < 0) - log_unit_warning_errno(UNIT(s), errno, "TCP_KEEPIDLE failed: %m"); + r = setsockopt_int(fd, SOL_TCP, TCP_KEEPIDLE, s->keep_alive_time / USEC_PER_SEC); + if (r < 0) + log_unit_warning_errno(UNIT(s), r, "TCP_KEEPIDLE failed: %m"); } if (s->keep_alive_interval > 0) { - int value = s->keep_alive_interval / USEC_PER_SEC; - if (setsockopt(fd, SOL_TCP, TCP_KEEPINTVL, &value, sizeof(value)) < 0) - log_unit_warning_errno(UNIT(s), errno, "TCP_KEEPINTVL failed: %m"); + r = setsockopt_int(fd, SOL_TCP, TCP_KEEPINTVL, s->keep_alive_interval / USEC_PER_SEC); + if (r < 0) + log_unit_warning_errno(UNIT(s), r, "TCP_KEEPINTVL failed: %m"); } if (s->keep_alive_cnt > 0) { - int value = s->keep_alive_cnt; - if (setsockopt(fd, SOL_TCP, TCP_KEEPCNT, &value, sizeof(value)) < 0) - log_unit_warning_errno(UNIT(s), errno, "TCP_KEEPCNT failed: %m"); + r = setsockopt_int(fd, SOL_TCP, TCP_KEEPCNT, s->keep_alive_cnt); + if (r < 0) + log_unit_warning_errno(UNIT(s), r, "TCP_KEEPCNT failed: %m"); } if (s->defer_accept > 0) { - int value = s->defer_accept / USEC_PER_SEC; - if (setsockopt(fd, SOL_TCP, TCP_DEFER_ACCEPT, &value, sizeof(value)) < 0) - log_unit_warning_errno(UNIT(s), errno, "TCP_DEFER_ACCEPT failed: %m"); + r = setsockopt_int(fd, SOL_TCP, TCP_DEFER_ACCEPT, s->defer_accept / USEC_PER_SEC); + if (r < 0) + log_unit_warning_errno(UNIT(s), r, "TCP_DEFER_ACCEPT failed: %m"); } if (s->no_delay) { - int one = 1; - if (s->socket_protocol == IPPROTO_SCTP) { - if (setsockopt(fd, SOL_SCTP, SCTP_NODELAY, &one, sizeof(one)) < 0) - log_unit_warning_errno(UNIT(s), errno, "SCTP_NODELAY failed: %m"); + r = setsockopt_int(fd, SOL_SCTP, SCTP_NODELAY, true); + if (r < 0) + log_unit_warning_errno(UNIT(s), r, "SCTP_NODELAY failed: %m"); } else { - if (setsockopt(fd, SOL_TCP, TCP_NODELAY, &one, sizeof(one)) < 0) - log_unit_warning_errno(UNIT(s), errno, "TCP_NODELAY failed: %m"); + r = setsockopt_int(fd, SOL_TCP, TCP_NODELAY, true); + if (r < 0) + log_unit_warning_errno(UNIT(s), r, "TCP_NODELAY failed: %m"); } } if (s->broadcast) { - int one = 1; - if (setsockopt(fd, SOL_SOCKET, SO_BROADCAST, &one, sizeof(one)) < 0) - log_unit_warning_errno(UNIT(s), errno, "SO_BROADCAST failed: %m"); + r = setsockopt_int(fd, SOL_SOCKET, SO_BROADCAST, true); + if (r < 0) + log_unit_warning_errno(UNIT(s), r, "SO_BROADCAST failed: %m"); } if (s->pass_cred) { - int one = 1; - if (setsockopt(fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one)) < 0) - log_unit_warning_errno(UNIT(s), errno, "SO_PASSCRED failed: %m"); + r = setsockopt_int(fd, SOL_SOCKET, SO_PASSCRED, true); + if (r < 0) + log_unit_warning_errno(UNIT(s), r, "SO_PASSCRED failed: %m"); } if (s->pass_sec) { - int one = 1; - if (setsockopt(fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one)) < 0) - log_unit_warning_errno(UNIT(s), errno, "SO_PASSSEC failed: %m"); + r = setsockopt_int(fd, SOL_SOCKET, SO_PASSSEC, true); + if (r < 0) + log_unit_warning_errno(UNIT(s), r, "SO_PASSSEC failed: %m"); } - if (s->priority >= 0) - if (setsockopt(fd, SOL_SOCKET, SO_PRIORITY, &s->priority, sizeof(s->priority)) < 0) - log_unit_warning_errno(UNIT(s), errno, "SO_PRIORITY failed: %m"); + if (s->priority >= 0) { + r = setsockopt_int(fd, SOL_SOCKET, SO_PRIORITY, s->priority); + if (r < 0) + log_unit_warning_errno(UNIT(s), r, "SO_PRIORITY failed: %m"); + } if (s->receive_buffer > 0) { - int value = (int) s->receive_buffer; - /* We first try with SO_RCVBUFFORCE, in case we have the perms for that */ - if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &value, sizeof(value)) < 0) - if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &value, sizeof(value)) < 0) - log_unit_warning_errno(UNIT(s), errno, "SO_RCVBUF failed: %m"); + if (setsockopt_int(fd, SOL_SOCKET, SO_RCVBUFFORCE, s->receive_buffer) < 0) { + r = setsockopt_int(fd, SOL_SOCKET, SO_RCVBUF, s->receive_buffer); + if (r < 0) + log_unit_warning_errno(UNIT(s), r, "SO_RCVBUF failed: %m"); + } } if (s->send_buffer > 0) { - int value = (int) s->send_buffer; - if (setsockopt(fd, SOL_SOCKET, SO_SNDBUFFORCE, &value, sizeof(value)) < 0) - if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &value, sizeof(value)) < 0) - log_unit_warning_errno(UNIT(s), errno, "SO_SNDBUF failed: %m"); + if (setsockopt_int(fd, SOL_SOCKET, SO_SNDBUFFORCE, s->send_buffer) < 0) { + r = setsockopt_int(fd, SOL_SOCKET, SO_SNDBUF, s->send_buffer); + if (r < 0) + log_unit_warning_errno(UNIT(s), r, "SO_SNDBUF failed: %m"); + } } - if (s->mark >= 0) - if (setsockopt(fd, SOL_SOCKET, SO_MARK, &s->mark, sizeof(s->mark)) < 0) - log_unit_warning_errno(UNIT(s), errno, "SO_MARK failed: %m"); + if (s->mark >= 0) { + r = setsockopt_int(fd, SOL_SOCKET, SO_MARK, s->mark); + if (r < 0) + log_unit_warning_errno(UNIT(s), r, "SO_MARK failed: %m"); + } - if (s->ip_tos >= 0) - if (setsockopt(fd, IPPROTO_IP, IP_TOS, &s->ip_tos, sizeof(s->ip_tos)) < 0) - log_unit_warning_errno(UNIT(s), errno, "IP_TOS failed: %m"); + if (s->ip_tos >= 0) { + r = setsockopt_int(fd, IPPROTO_IP, IP_TOS, s->ip_tos); + if (r < 0) + log_unit_warning_errno(UNIT(s), r, "IP_TOS failed: %m"); + } if (s->ip_ttl >= 0) { int x; - r = setsockopt(fd, IPPROTO_IP, IP_TTL, &s->ip_ttl, sizeof(s->ip_ttl)); + r = setsockopt_int(fd, IPPROTO_IP, IP_TTL, s->ip_ttl); if (socket_ipv6_is_supported()) - x = setsockopt(fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, &s->ip_ttl, sizeof(s->ip_ttl)); - else { - x = -1; - errno = EAFNOSUPPORT; - } + x = setsockopt_int(fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, s->ip_ttl); + else + x = -EAFNOSUPPORT; if (r < 0 && x < 0) - log_unit_warning_errno(UNIT(s), errno, "IP_TTL/IPV6_UNICAST_HOPS failed: %m"); + log_unit_warning_errno(UNIT(s), r, "IP_TTL/IPV6_UNICAST_HOPS failed: %m"); } if (s->tcp_congestion) @@ -1329,7 +1310,7 @@ static int socket_symlink(Socket *s) { STRV_FOREACH(i, s->symlinks) { (void) mkdir_parents_label(*i, s->directory_mode); - r = symlink_idempotent(p, *i); + r = symlink_idempotent(p, *i, false); if (r == -EEXIST && s->remove_on_stop) { /* If there's already something where we want to create the symlink, and the destructive @@ -1337,7 +1318,7 @@ static int socket_symlink(Socket *s) { * again. */ if (unlink(*i) >= 0) - r = symlink_idempotent(p, *i); + r = symlink_idempotent(p, *i, false); } if (r < 0) @@ -1375,8 +1356,10 @@ static int usbffs_dispatch_eps(SocketPort *p) { n = (size_t) r; p->auxiliary_fds = new(int, n); - if (!p->auxiliary_fds) - return -ENOMEM; + if (!p->auxiliary_fds) { + r = -ENOMEM; + goto clear; + } p->n_auxiliary_fds = n; @@ -1385,8 +1368,10 @@ static int usbffs_dispatch_eps(SocketPort *p) { _cleanup_free_ char *ep = NULL; ep = path_make_absolute(ent[i]->d_name, p->path); - if (!ep) - return -ENOMEM; + if (!ep) { + r = -ENOMEM; + goto fail; + } path_simplify(ep, false); @@ -1395,16 +1380,20 @@ static int usbffs_dispatch_eps(SocketPort *p) { goto fail; p->auxiliary_fds[k++] = r; - free(ent[i]); } - return r; + r = 0; + goto clear; fail: close_many(p->auxiliary_fds, k); p->auxiliary_fds = mfree(p->auxiliary_fds); p->n_auxiliary_fds = 0; +clear: + for (i = 0; i < n; ++i) + free(ent[i]); + return r; } @@ -1754,6 +1743,9 @@ static void socket_set_state(Socket *s, SocketState state) { SocketState old_state; assert(s); + if (s->state != state) + bus_unit_send_pending_change_signal(UNIT(s), false); + old_state = s->state; s->state = state; @@ -1866,11 +1858,12 @@ static int socket_coldplug(Unit *u) { static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) { - ExecParameters exec_params = { - .flags = EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN, - .stdin_fd = -1, - .stdout_fd = -1, - .stderr_fd = -1, + _cleanup_(exec_params_clear) ExecParameters exec_params = { + .flags = EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN, + .stdin_fd = -1, + .stdout_fd = -1, + .stderr_fd = -1, + .exec_fd = -1, }; pid_t pid; int r; @@ -1887,9 +1880,9 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) { if (r < 0) return r; - unit_set_exec_params(UNIT(s), &exec_params); - - exec_params.argv = c->argv; + r = unit_set_exec_params(UNIT(s), &exec_params); + if (r < 0) + return r; r = exec_spawn(UNIT(s), c, @@ -1935,7 +1928,7 @@ static int socket_chown(Socket *s, pid_t *_pid) { if (!isempty(s->user)) { const char *user = s->user; - r = get_user_creds(&user, &uid, &gid, NULL, NULL); + r = get_user_creds(&user, &uid, &gid, NULL, NULL, 0); if (r < 0) { log_unit_error_errno(UNIT(s), r, "Failed to resolve user %s: %m", user); _exit(EXIT_USER); @@ -1945,7 +1938,7 @@ static int socket_chown(Socket *s, pid_t *_pid) { if (!isempty(s->group)) { const char *group = s->group; - r = get_group_creds(&group, &gid); + r = get_group_creds(&group, &gid, 0); if (r < 0) { log_unit_error_errno(UNIT(s), r, "Failed to resolve group %s: %m", group); _exit(EXIT_GROUP); @@ -1990,8 +1983,10 @@ static void socket_enter_dead(Socket *s, SocketResult f) { if (s->result == SOCKET_SUCCESS) s->result = f; - if (s->result != SOCKET_SUCCESS) - log_unit_warning(UNIT(s), "Failed with result '%s'.", socket_result_to_string(s->result)); + if (s->result == SOCKET_SUCCESS) + unit_log_success(UNIT(s)); + else + unit_log_failure(UNIT(s), socket_result_to_string(s->result)); socket_set_state(s, s->result != SOCKET_SUCCESS ? SOCKET_FAILED : SOCKET_DEAD); @@ -2459,6 +2454,7 @@ static int socket_start(Unit *u) { return r; s->result = SOCKET_SUCCESS; + exec_command_reset_status_list_array(s->exec_command, _SOCKET_EXEC_COMMAND_MAX); u->reset_accounting = true; @@ -2506,16 +2502,16 @@ static int socket_serialize(Unit *u, FILE *f, FDSet *fds) { assert(f); assert(fds); - unit_serialize_item(u, f, "state", socket_state_to_string(s->state)); - unit_serialize_item(u, f, "result", socket_result_to_string(s->result)); - unit_serialize_item_format(u, f, "n-accepted", "%u", s->n_accepted); - unit_serialize_item_format(u, f, "n-refused", "%u", s->n_refused); + (void) serialize_item(f, "state", socket_state_to_string(s->state)); + (void) serialize_item(f, "result", socket_result_to_string(s->result)); + (void) serialize_item_format(f, "n-accepted", "%u", s->n_accepted); + (void) serialize_item_format(f, "n-refused", "%u", s->n_refused); if (s->control_pid > 0) - unit_serialize_item_format(u, f, "control-pid", PID_FMT, s->control_pid); + (void) serialize_item_format(f, "control-pid", PID_FMT, s->control_pid); if (s->control_command_id >= 0) - unit_serialize_item(u, f, "control-command", socket_exec_command_to_string(s->control_command_id)); + (void) serialize_item(f, "control-command", socket_exec_command_to_string(s->control_command_id)); LIST_FOREACH(port, p, s->ports) { int copy; @@ -2525,29 +2521,28 @@ static int socket_serialize(Unit *u, FILE *f, FDSet *fds) { copy = fdset_put_dup(fds, p->fd); if (copy < 0) - return copy; + return log_warning_errno(copy, "Failed to serialize socket fd: %m"); if (p->type == SOCKET_SOCKET) { _cleanup_free_ char *t = NULL; r = socket_address_print(&p->address, &t); if (r < 0) - return r; + return log_error_errno(r, "Failed to format socket address: %m"); if (socket_address_family(&p->address) == AF_NETLINK) - unit_serialize_item_format(u, f, "netlink", "%i %s", copy, t); + (void) serialize_item_format(f, "netlink", "%i %s", copy, t); else - unit_serialize_item_format(u, f, "socket", "%i %i %s", copy, p->address.type, t); - + (void) serialize_item_format(f, "socket", "%i %i %s", copy, p->address.type, t); } else if (p->type == SOCKET_SPECIAL) - unit_serialize_item_format(u, f, "special", "%i %s", copy, p->path); + (void) serialize_item_format(f, "special", "%i %s", copy, p->path); else if (p->type == SOCKET_MQUEUE) - unit_serialize_item_format(u, f, "mqueue", "%i %s", copy, p->path); + (void) serialize_item_format(f, "mqueue", "%i %s", copy, p->path); else if (p->type == SOCKET_USB_FUNCTION) - unit_serialize_item_format(u, f, "ffs", "%i %s", copy, p->path); + (void) serialize_item_format(f, "ffs", "%i %s", copy, p->path); else { assert(p->type == SOCKET_FIFO); - unit_serialize_item_format(u, f, "fifo", "%i %s", copy, p->path); + (void) serialize_item_format(f, "fifo", "%i %s", copy, p->path); } } @@ -2555,6 +2550,8 @@ static int socket_serialize(Unit *u, FILE *f, FDSet *fds) { } static void socket_port_take_fd(SocketPort *p, FDSet *fds, int fd) { + assert(p); + safe_close(p->fd); p->fd = fdset_remove(fds, fd); } @@ -2971,9 +2968,11 @@ static void socket_sigchld_event(Unit *u, pid_t pid, int code, int status) { f = SOCKET_SUCCESS; } - log_unit_full(u, f == SOCKET_SUCCESS ? LOG_DEBUG : LOG_NOTICE, 0, - "Control process exited, code=%s status=%i", - sigchld_code_to_string(code), status); + unit_log_process_exit( + u, f == SOCKET_SUCCESS ? LOG_DEBUG : LOG_NOTICE, + "Control process", + socket_exec_command_to_string(s->control_command_id), + code, status); if (s->result == SOCKET_SUCCESS) s->result = f; diff --git a/src/core/swap.c b/src/core/swap.c index b78b1aa266..2d8463b8b1 100644 --- a/src/core/swap.c +++ b/src/core/swap.c @@ -5,10 +5,13 @@ #include <sys/stat.h> #include <unistd.h> -#include "libudev.h" +#include "sd-device.h" #include "alloc-util.h" #include "dbus-swap.h" +#include "dbus-unit.h" +#include "device-private.h" +#include "device-util.h" #include "device.h" #include "escape.h" #include "exit-status.h" @@ -18,11 +21,11 @@ #include "parse-util.h" #include "path-util.h" #include "process-util.h" +#include "serialize.h" #include "special.h" #include "string-table.h" #include "string-util.h" #include "swap.h" -#include "udev-util.h" #include "unit-name.h" #include "unit.h" #include "virt.h" @@ -196,7 +199,7 @@ static int swap_add_device_dependencies(Swap *s) { /* File based swap devices need to be ordered after * systemd-remount-fs.service, since they might need a * writable file system. */ - return unit_add_dependency_by_name(UNIT(s), UNIT_AFTER, SPECIAL_REMOUNT_FS_SERVICE, NULL, true, UNIT_DEPENDENCY_FILE); + return unit_add_dependency_by_name(UNIT(s), UNIT_AFTER, SPECIAL_REMOUNT_FS_SERVICE, true, UNIT_DEPENDENCY_FILE); } static int swap_add_default_dependencies(Swap *s) { @@ -215,11 +218,11 @@ static int swap_add_default_dependencies(Swap *s) { /* swap units generated for the swap dev links are missing the * ordering dep against the swap target. */ - r = unit_add_dependency_by_name(UNIT(s), UNIT_BEFORE, SPECIAL_SWAP_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT); + r = unit_add_dependency_by_name(UNIT(s), UNIT_BEFORE, SPECIAL_SWAP_TARGET, true, UNIT_DEPENDENCY_DEFAULT); if (r < 0) return r; - return unit_add_two_dependencies_by_name(UNIT(s), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_UMOUNT_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT); + return unit_add_two_dependencies_by_name(UNIT(s), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_UMOUNT_TARGET, true, UNIT_DEPENDENCY_DEFAULT); } static int swap_verify(Swap *s) { @@ -247,7 +250,7 @@ static int swap_verify(Swap *s) { } static int swap_load_devnode(Swap *s) { - _cleanup_(udev_device_unrefp) struct udev_device *d = NULL; + _cleanup_(sd_device_unrefp) sd_device *d = NULL; struct stat st; const char *p; int r; @@ -257,91 +260,105 @@ static int swap_load_devnode(Swap *s) { if (stat(s->what, &st) < 0 || !S_ISBLK(st.st_mode)) return 0; - r = udev_device_new_from_stat_rdev(UNIT(s)->manager->udev, &st, &d); + r = device_new_from_stat_rdev(&d, &st); if (r < 0) { log_unit_full(UNIT(s), r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r, - "Failed to allocate udev device for swap %s: %m", s->what); + "Failed to allocate device for swap %s: %m", s->what); return 0; } - p = udev_device_get_devnode(d); - if (!p) + if (sd_device_get_devname(d, &p) < 0) return 0; return swap_set_devnode(s, p); } -static int swap_load(Unit *u) { +static int swap_add_extras(Swap *s) { int r; - Swap *s = SWAP(u); assert(s); - assert(u->load_state == UNIT_STUB); - /* Load a .swap file */ - if (SWAP(u)->from_proc_swaps) - r = unit_load_fragment_and_dropin_optional(u); - else - r = unit_load_fragment_and_dropin(u); + if (UNIT(s)->fragment_path) + s->from_fragment = true; + + if (!s->what) { + if (s->parameters_fragment.what) + s->what = strdup(s->parameters_fragment.what); + else if (s->parameters_proc_swaps.what) + s->what = strdup(s->parameters_proc_swaps.what); + else { + r = unit_name_to_path(UNIT(s)->id, &s->what); + if (r < 0) + return r; + } + + if (!s->what) + return -ENOMEM; + } + + path_simplify(s->what, false); + + if (!UNIT(s)->description) { + r = unit_set_description(UNIT(s), s->what); + if (r < 0) + return r; + } + + r = unit_require_mounts_for(UNIT(s), s->what, UNIT_DEPENDENCY_IMPLICIT); if (r < 0) return r; - if (u->load_state == UNIT_LOADED) { - - if (UNIT(s)->fragment_path) - s->from_fragment = true; + r = swap_add_device_dependencies(s); + if (r < 0) + return r; - if (!s->what) { - if (s->parameters_fragment.what) - s->what = strdup(s->parameters_fragment.what); - else if (s->parameters_proc_swaps.what) - s->what = strdup(s->parameters_proc_swaps.what); - else { - r = unit_name_to_path(u->id, &s->what); - if (r < 0) - return r; - } + r = swap_load_devnode(s); + if (r < 0) + return r; - if (!s->what) - return -ENOMEM; - } + r = unit_patch_contexts(UNIT(s)); + if (r < 0) + return r; - path_simplify(s->what, false); + r = unit_add_exec_dependencies(UNIT(s), &s->exec_context); + if (r < 0) + return r; - if (!UNIT(s)->description) { - r = unit_set_description(u, s->what); - if (r < 0) - return r; - } + r = unit_set_default_slice(UNIT(s)); + if (r < 0) + return r; - r = unit_require_mounts_for(UNIT(s), s->what, UNIT_DEPENDENCY_IMPLICIT); - if (r < 0) - return r; + r = swap_add_default_dependencies(s); + if (r < 0) + return r; - r = swap_add_device_dependencies(s); - if (r < 0) - return r; + return 0; +} - r = swap_load_devnode(s); - if (r < 0) - return r; +static int swap_load(Unit *u) { + Swap *s = SWAP(u); + int r, q; - r = unit_patch_contexts(u); - if (r < 0) - return r; + assert(s); + assert(u->load_state == UNIT_STUB); - r = unit_add_exec_dependencies(u, &s->exec_context); - if (r < 0) - return r; + /* Load a .swap file */ + if (SWAP(u)->from_proc_swaps) + r = unit_load_fragment_and_dropin_optional(u); + else + r = unit_load_fragment_and_dropin(u); - r = unit_set_default_slice(u); - if (r < 0) - return r; + /* Add in some extras, and do so either when we successfully loaded something or when /proc/swaps is already + * active. */ + if (u->load_state == UNIT_LOADED || s->from_proc_swaps) + q = swap_add_extras(s); + else + q = 0; - r = swap_add_default_dependencies(s); - if (r < 0) - return r; - } + if (r < 0) + return r; + if (q < 0) + return q; return swap_verify(s); } @@ -368,13 +385,12 @@ static int swap_setup_unit( return log_unit_error_errno(u, r, "Failed to generate unit name from path: %m"); u = manager_get_unit(m, e); - if (u && SWAP(u)->from_proc_swaps && - !path_equal(SWAP(u)->parameters_proc_swaps.what, what_proc_swaps)) { - log_error("Swap %s appeared twice with different device paths %s and %s", e, SWAP(u)->parameters_proc_swaps.what, what_proc_swaps); - return -EEXIST; - } + !path_equal(SWAP(u)->parameters_proc_swaps.what, what_proc_swaps)) + return log_error_errno(SYNTHETIC_ERRNO(EEXIST), + "Swap %s appeared twice with different device paths %s and %s", + e, SWAP(u)->parameters_proc_swaps.what, what_proc_swaps); if (!u) { delete = true; @@ -403,6 +419,13 @@ static int swap_setup_unit( } } + /* The unit is definitely around now, mark it as loaded if it was previously referenced but could not be + * loaded. After all we can load it now, from the data in /proc/swaps. */ + if (IN_SET(u->load_state, UNIT_NOT_FOUND, UNIT_BAD_SETTING, UNIT_ERROR)) { + u->load_state = UNIT_LOADED; + u->load_error = 0; + } + if (set_flags) { SWAP(u)->is_active = true; SWAP(u)->just_activated = !SWAP(u)->from_proc_swaps; @@ -425,10 +448,9 @@ fail: } static int swap_process_new(Manager *m, const char *device, int prio, bool set_flags) { - _cleanup_(udev_device_unrefp) struct udev_device *d = NULL; - struct udev_list_entry *item = NULL, *first = NULL; - const char *dn; - struct stat st; + _cleanup_(sd_device_unrefp) sd_device *d = NULL; + const char *dn, *devlink; + struct stat st, st_link; int r; assert(m); @@ -442,41 +464,36 @@ static int swap_process_new(Manager *m, const char *device, int prio, bool set_f if (stat(device, &st) < 0 || !S_ISBLK(st.st_mode)) return 0; - r = udev_device_new_from_stat_rdev(m->udev, &st, &d); + r = device_new_from_stat_rdev(&d, &st); if (r < 0) { log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r, - "Failed to allocate udev device for swap %s: %m", device); + "Failed to allocate device for swap %s: %m", device); return 0; } /* Add the main device node */ - dn = udev_device_get_devnode(d); - if (dn && !streq(dn, device)) + if (sd_device_get_devname(d, &dn) >= 0 && !streq(dn, device)) swap_setup_unit(m, dn, device, prio, set_flags); /* Add additional units for all symlinks */ - first = udev_device_get_devlinks_list_entry(d); - udev_list_entry_foreach(item, first) { - const char *p; + FOREACH_DEVICE_DEVLINK(d, devlink) { /* Don't bother with the /dev/block links */ - p = udev_list_entry_get_name(item); - - if (streq(p, device)) + if (streq(devlink, device)) continue; - if (path_startswith(p, "/dev/block/")) + if (path_startswith(devlink, "/dev/block/")) continue; - if (stat(p, &st) >= 0) - if (!S_ISBLK(st.st_mode) || - st.st_rdev != udev_device_get_devnum(d)) - continue; + if (stat(devlink, &st_link) >= 0 && + (!S_ISBLK(st_link.st_mode) || + st_link.st_rdev != st.st_rdev)) + continue; - swap_setup_unit(m, p, device, prio, set_flags); + swap_setup_unit(m, devlink, device, prio, set_flags); } - return r; + return 0; } static void swap_set_state(Swap *s, SwapState state) { @@ -485,6 +502,9 @@ static void swap_set_state(Swap *s, SwapState state) { assert(s); + if (s->state != state) + bus_unit_send_pending_change_signal(UNIT(s), false); + old_state = s->state; s->state = state; @@ -601,11 +621,12 @@ static void swap_dump(Unit *u, FILE *f, const char *prefix) { static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) { - ExecParameters exec_params = { + _cleanup_(exec_params_clear) ExecParameters exec_params = { .flags = EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN, .stdin_fd = -1, .stdout_fd = -1, .stderr_fd = -1, + .exec_fd = -1, }; pid_t pid; int r; @@ -622,7 +643,9 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) { if (r < 0) goto fail; - unit_set_exec_params(UNIT(s), &exec_params); + r = unit_set_exec_params(UNIT(s), &exec_params); + if (r < 0) + goto fail; r = exec_spawn(UNIT(s), c, @@ -655,9 +678,7 @@ static void swap_enter_dead(Swap *s, SwapResult f) { if (s->result == SWAP_SUCCESS) s->result = f; - if (s->result != SWAP_SUCCESS) - log_unit_warning(UNIT(s), "Failed with result '%s'.", swap_result_to_string(s->result)); - + unit_log_result(UNIT(s), s->result == SWAP_SUCCESS, swap_result_to_string(s->result)); swap_set_state(s, s->result != SWAP_SUCCESS ? SWAP_FAILED : SWAP_DEAD); s->exec_runtime = exec_runtime_unref(s->exec_runtime, true); @@ -813,6 +834,14 @@ fail: swap_enter_dead_or_active(s, SWAP_FAILURE_RESOURCES); } +static void swap_cycle_clear(Swap *s) { + assert(s); + + s->result = SWAP_SUCCESS; + exec_command_reset_status_array(s->exec_command, _SWAP_EXEC_COMMAND_MAX); + UNIT(s)->reset_accounting = true; +} + static int swap_start(Unit *u) { Swap *s = SWAP(u), *other; int r; @@ -852,10 +881,7 @@ static int swap_start(Unit *u) { if (r < 0) return r; - s->result = SWAP_SUCCESS; - - u->reset_accounting = true; - + swap_cycle_clear(s); swap_enter_activating(s); return 1; } @@ -898,14 +924,14 @@ static int swap_serialize(Unit *u, FILE *f, FDSet *fds) { assert(f); assert(fds); - unit_serialize_item(u, f, "state", swap_state_to_string(s->state)); - unit_serialize_item(u, f, "result", swap_result_to_string(s->result)); + (void) serialize_item(f, "state", swap_state_to_string(s->state)); + (void) serialize_item(f, "result", swap_result_to_string(s->result)); if (s->control_pid > 0) - unit_serialize_item_format(u, f, "control-pid", PID_FMT, s->control_pid); + (void) serialize_item_format(f, "control-pid", PID_FMT, s->control_pid); if (s->control_command_id >= 0) - unit_serialize_item(u, f, "control-command", swap_exec_command_to_string(s->control_command_id)); + (void) serialize_item(f, "control-command", swap_exec_command_to_string(s->control_command_id)); return 0; } @@ -1012,8 +1038,11 @@ static void swap_sigchld_event(Unit *u, pid_t pid, int code, int status) { s->control_command_id = _SWAP_EXEC_COMMAND_INVALID; } - log_unit_full(u, f == SWAP_SUCCESS ? LOG_DEBUG : LOG_NOTICE, 0, - "Swap process exited, code=%s status=%i", sigchld_code_to_string(code), status); + unit_log_process_exit( + u, f == SWAP_SUCCESS ? LOG_DEBUG : LOG_NOTICE, + "Swap process", + swap_exec_command_to_string(s->control_command_id), + code, status); switch (s->state) { @@ -1084,7 +1113,6 @@ static int swap_dispatch_timer(sd_event_source *source, usec_t usec, void *userd static int swap_load_proc_swaps(Manager *m, bool set_flags) { unsigned i; - int r = 0; assert(m); @@ -1116,12 +1144,10 @@ static int swap_load_proc_swaps(Manager *m, bool set_flags) { device_found_node(m, d, DEVICE_FOUND_SWAP, DEVICE_FOUND_SWAP); - k = swap_process_new(m, d, prio, set_flags); - if (k < 0) - r = k; + (void) swap_process_new(m, d, prio, set_flags); } - return r; + return 0; } static int swap_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) { @@ -1152,13 +1178,13 @@ static int swap_dispatch_io(sd_event_source *source, int fd, uint32_t revents, v Swap *swap = SWAP(u); if (!swap->is_active) { - /* This has just been deactivated */ swap_unset_proc_swaps(swap); switch (swap->state) { case SWAP_ACTIVE: + /* This has just been deactivated */ swap_enter_dead(swap, SWAP_SUCCESS); break; @@ -1179,7 +1205,8 @@ static int swap_dispatch_io(sd_event_source *source, int fd, uint32_t revents, v case SWAP_DEAD: case SWAP_FAILED: - (void) unit_acquire_invocation_id(UNIT(swap)); + (void) unit_acquire_invocation_id(u); + swap_cycle_clear(swap); swap_enter_active(swap, SWAP_SUCCESS); break; @@ -1320,18 +1347,17 @@ fail: swap_shutdown(m); } -int swap_process_device_new(Manager *m, struct udev_device *dev) { - struct udev_list_entry *item = NULL, *first = NULL; +int swap_process_device_new(Manager *m, sd_device *dev) { _cleanup_free_ char *e = NULL; - const char *dn; + const char *dn, *devlink; Unit *u; int r = 0; assert(m); assert(dev); - dn = udev_device_get_devnode(dev); - if (!dn) + r = sd_device_get_devname(dev, &dn); + if (r < 0) return 0; r = unit_name_from_path(dn, ".swap", &e); @@ -1342,12 +1368,11 @@ int swap_process_device_new(Manager *m, struct udev_device *dev) { if (u) r = swap_set_devnode(SWAP(u), dn); - first = udev_device_get_devlinks_list_entry(dev); - udev_list_entry_foreach(item, first) { + FOREACH_DEVICE_DEVLINK(dev, devlink) { _cleanup_free_ char *n = NULL; int q; - q = unit_name_from_path(udev_list_entry_get_name(item), ".swap", &n); + q = unit_name_from_path(devlink, ".swap", &n); if (q < 0) return q; @@ -1362,13 +1387,13 @@ int swap_process_device_new(Manager *m, struct udev_device *dev) { return r; } -int swap_process_device_remove(Manager *m, struct udev_device *dev) { +int swap_process_device_remove(Manager *m, sd_device *dev) { const char *dn; int r = 0; Swap *s; - dn = udev_device_get_devnode(dev); - if (!dn) + r = sd_device_get_devname(dev, &dn); + if (r < 0) return 0; while ((s = hashmap_get(m->swaps_by_devnode, dn))) { diff --git a/src/core/swap.h b/src/core/swap.h index 1c0c7fcadc..1a4b60b957 100644 --- a/src/core/swap.h +++ b/src/core/swap.h @@ -5,7 +5,7 @@ Copyright © 2010 Maarten Lankhorst ***/ -#include "libudev.h" +#include "sd-device.h" #include "unit.h" typedef struct Swap Swap; @@ -85,8 +85,8 @@ struct Swap { extern const UnitVTable swap_vtable; -int swap_process_device_new(Manager *m, struct udev_device *dev); -int swap_process_device_remove(Manager *m, struct udev_device *dev); +int swap_process_device_new(Manager *m, sd_device *dev); +int swap_process_device_remove(Manager *m, sd_device *dev); const char* swap_exec_command_to_string(SwapExecCommand i) _const_; SwapExecCommand swap_exec_command_from_string(const char *s) _pure_; diff --git a/src/core/system.conf.in b/src/core/system.conf.in index f0a59a79a5..0a58737b82 100644 --- a/src/core/system.conf.in +++ b/src/core/system.conf.in @@ -23,9 +23,9 @@ #CrashReboot=no #CtrlAltDelBurstAction=reboot-force #CPUAffinity=1 2 -#JoinControllers=cpu,cpuacct net_cls,net_prio #RuntimeWatchdogSec=0 #ShutdownWatchdogSec=10min +#WatchdogDevice= #CapabilityBoundingSet= #NoNewPrivileges=no #SystemCallArchitectures= @@ -52,7 +52,7 @@ #DefaultLimitSTACK= #DefaultLimitCORE= #DefaultLimitRSS= -#DefaultLimitNOFILE= +#DefaultLimitNOFILE=1024:@HIGH_RLIMIT_NOFILE@ #DefaultLimitAS= #DefaultLimitNPROC= #DefaultLimitMEMLOCK= @@ -62,5 +62,3 @@ #DefaultLimitNICE= #DefaultLimitRTPRIO= #DefaultLimitRTTIME= -#IPAddressAllow= -#IPAddressDeny= diff --git a/src/core/target.c b/src/core/target.c index 6446767504..421a304c73 100644 --- a/src/core/target.c +++ b/src/core/target.c @@ -1,12 +1,14 @@ /* SPDX-License-Identifier: LGPL-2.1+ */ #include "dbus-target.h" +#include "dbus-unit.h" #include "log.h" +#include "serialize.h" #include "special.h" #include "string-util.h" +#include "target.h" #include "unit-name.h" #include "unit.h" -#include "target.h" static const UnitActiveState state_translation_table[_TARGET_STATE_MAX] = { [TARGET_DEAD] = UNIT_INACTIVE, @@ -17,6 +19,9 @@ static void target_set_state(Target *t, TargetState state) { TargetState old_state; assert(t); + if (t->state != state) + bus_unit_send_pending_change_signal(UNIT(t), false); + old_state = t->state; t->state = state; @@ -66,7 +71,7 @@ static int target_add_default_dependencies(Target *t) { return 0; /* Make sure targets are unloaded on shutdown */ - return unit_add_two_dependencies_by_name(UNIT(t), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT); + return unit_add_two_dependencies_by_name(UNIT(t), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT); } static int target_load(Unit *u) { @@ -144,7 +149,7 @@ static int target_serialize(Unit *u, FILE *f, FDSet *fds) { assert(f); assert(fds); - unit_serialize_item(u, f, "state", target_state_to_string(s->state)); + (void) serialize_item(f, "state", target_state_to_string(s->state)); return 0; } diff --git a/src/core/timer.c b/src/core/timer.c index db202971d3..d9ba2f76b3 100644 --- a/src/core/timer.c +++ b/src/core/timer.c @@ -6,9 +6,11 @@ #include "bus-error.h" #include "bus-util.h" #include "dbus-timer.h" +#include "dbus-unit.h" #include "fs-util.h" #include "parse-util.h" #include "random-util.h" +#include "serialize.h" #include "special.h" #include "string-table.h" #include "string-util.h" @@ -88,18 +90,18 @@ static int timer_add_default_dependencies(Timer *t) { if (!UNIT(t)->default_dependencies) return 0; - r = unit_add_dependency_by_name(UNIT(t), UNIT_BEFORE, SPECIAL_TIMERS_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT); + r = unit_add_dependency_by_name(UNIT(t), UNIT_BEFORE, SPECIAL_TIMERS_TARGET, true, UNIT_DEPENDENCY_DEFAULT); if (r < 0) return r; if (MANAGER_IS_SYSTEM(UNIT(t)->manager)) { - r = unit_add_two_dependencies_by_name(UNIT(t), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT); + r = unit_add_two_dependencies_by_name(UNIT(t), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, true, UNIT_DEPENDENCY_DEFAULT); if (r < 0) return r; LIST_FOREACH(value, v, t->values) { if (v->base == TIMER_CALENDAR) { - r = unit_add_dependency_by_name(UNIT(t), UNIT_AFTER, SPECIAL_TIME_SYNC_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT); + r = unit_add_dependency_by_name(UNIT(t), UNIT_AFTER, SPECIAL_TIME_SYNC_TARGET, true, UNIT_DEPENDENCY_DEFAULT); if (r < 0) return r; break; @@ -107,7 +109,7 @@ static int timer_add_default_dependencies(Timer *t) { } } - return unit_add_two_dependencies_by_name(UNIT(t), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, NULL, true, UNIT_DEPENDENCY_DEFAULT); + return unit_add_two_dependencies_by_name(UNIT(t), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT); } static int timer_add_trigger_dependencies(Timer *t) { @@ -246,6 +248,9 @@ static void timer_set_state(Timer *t, TimerState state) { TimerState old_state; assert(t); + if (t->state != state) + bus_unit_send_pending_change_signal(UNIT(t), false); + old_state = t->state; t->state = state; @@ -262,7 +267,7 @@ static void timer_set_state(Timer *t, TimerState state) { unit_notify(UNIT(t), state_translation_table[old_state], state_translation_table[state], 0); } -static void timer_enter_waiting(Timer *t, bool initial); +static void timer_enter_waiting(Timer *t, bool time_change); static int timer_coldplug(Unit *u) { Timer *t = TIMER(u); @@ -287,9 +292,7 @@ static void timer_enter_dead(Timer *t, TimerResult f) { if (t->result == TIMER_SUCCESS) t->result = f; - if (t->result != TIMER_SUCCESS) - log_unit_warning(UNIT(t), "Failed with result '%s'.", timer_result_to_string(t->result)); - + unit_log_result(UNIT(t), t->result == TIMER_SUCCESS, timer_result_to_string(t->result)); timer_set_state(t, t->result != TIMER_SUCCESS ? TIMER_FAILED : TIMER_DEAD); } @@ -332,7 +335,7 @@ static void add_random(Timer *t, usec_t *v) { log_unit_debug(UNIT(t), "Adding %s random time.", format_timespan(s, sizeof(s), add, 0)); } -static void timer_enter_waiting(Timer *t, bool initial) { +static void timer_enter_waiting(Timer *t, bool time_change) { bool found_monotonic = false, found_realtime = false; bool leave_around = false; triple_timestamp ts; @@ -442,7 +445,8 @@ static void timer_enter_waiting(Timer *t, bool initial) { v->next_elapse = usec_add(usec_shift_clock(base, CLOCK_MONOTONIC, TIMER_MONOTONIC_CLOCK(t)), v->value); - if (!initial && + if (dual_timestamp_is_set(&t->last_trigger) && + !time_change && v->next_elapse < triple_timestamp_by_clock(&ts, TIMER_MONOTONIC_CLOCK(t)) && IN_SET(v->base, TIMER_ACTIVE, TIMER_BOOT, TIMER_STARTUP)) { /* This is a one time trigger, disable it now */ @@ -640,7 +644,7 @@ static int timer_start(Unit *u) { } t->result = TIMER_SUCCESS; - timer_enter_waiting(t, true); + timer_enter_waiting(t, false); return 1; } @@ -661,21 +665,20 @@ static int timer_serialize(Unit *u, FILE *f, FDSet *fds) { assert(f); assert(fds); - unit_serialize_item(u, f, "state", timer_state_to_string(t->state)); - unit_serialize_item(u, f, "result", timer_result_to_string(t->result)); + (void) serialize_item(f, "state", timer_state_to_string(t->state)); + (void) serialize_item(f, "result", timer_result_to_string(t->result)); if (t->last_trigger.realtime > 0) - unit_serialize_item_format(u, f, "last-trigger-realtime", "%" PRIu64, t->last_trigger.realtime); + (void) serialize_usec(f, "last-trigger-realtime", t->last_trigger.realtime); if (t->last_trigger.monotonic > 0) - unit_serialize_item_format(u, f, "last-trigger-monotonic", "%" PRIu64, t->last_trigger.monotonic); + (void) serialize_usec(f, "last-trigger-monotonic", t->last_trigger.monotonic); return 0; } static int timer_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) { Timer *t = TIMER(u); - int r; assert(u); assert(key); @@ -690,6 +693,7 @@ static int timer_deserialize_item(Unit *u, const char *key, const char *value, F log_unit_debug(u, "Failed to parse state value: %s", value); else t->deserialized_state = state; + } else if (streq(key, "result")) { TimerResult f; @@ -698,19 +702,12 @@ static int timer_deserialize_item(Unit *u, const char *key, const char *value, F log_unit_debug(u, "Failed to parse result value: %s", value); else if (f != TIMER_SUCCESS) t->result = f; - } else if (streq(key, "last-trigger-realtime")) { - - r = safe_atou64(value, &t->last_trigger.realtime); - if (r < 0) - log_unit_debug(u, "Failed to parse last-trigger-realtime value: %s", value); - - } else if (streq(key, "last-trigger-monotonic")) { - - r = safe_atou64(value, &t->last_trigger.monotonic); - if (r < 0) - log_unit_debug(u, "Failed to parse last-trigger-monotonic value: %s", value); - } else + } else if (streq(key, "last-trigger-realtime")) + (void) deserialize_usec(value, &t->last_trigger.realtime); + else if (streq(key, "last-trigger-monotonic")) + (void) deserialize_usec(value, &t->last_trigger.monotonic); + else log_unit_debug(u, "Unknown serialization key: %s", key); return 0; @@ -811,7 +808,7 @@ static void timer_time_change(Unit *u) { t->last_trigger.realtime = ts; log_unit_debug(u, "Time change, recalculating next elapse."); - timer_enter_waiting(t, false); + timer_enter_waiting(t, true); } static void timer_timezone_change(Unit *u) { diff --git a/src/core/transaction.c b/src/core/transaction.c index 1c7efb207a..486c6a4a05 100644 --- a/src/core/transaction.c +++ b/src/core/transaction.c @@ -526,7 +526,9 @@ static int transaction_is_destructive(Transaction *tr, JobMode mode, sd_bus_erro if (j->unit->job && (mode == JOB_FAIL || j->unit->job->irreversible) && job_type_is_conflicting(j->unit->job->type, j->type)) return sd_bus_error_setf(e, BUS_ERROR_TRANSACTION_IS_DESTRUCTIVE, - "Transaction is destructive."); + "Transaction for %s/%s is destructive (%s has '%s' job queued, but '%s' is included in transaction).", + tr->anchor_job->unit->id, job_type_to_string(tr->anchor_job->type), + j->unit->id, job_type_to_string(j->unit->job->type), job_type_to_string(j->type)); } return 0; @@ -695,10 +697,8 @@ int transaction_activate(Transaction *tr, Manager *m, JobMode mode, sd_bus_error if (r >= 0) break; - if (r != -EAGAIN) { - log_warning("Requested transaction contains an unfixable cyclic ordering dependency: %s", bus_error_message(e, r)); - return r; - } + if (r != -EAGAIN) + return log_warning_errno(r, "Requested transaction contains an unfixable cyclic ordering dependency: %s", bus_error_message(e, r)); /* Let's see if the resulting transaction ordering * graph is still cyclic... */ @@ -712,10 +712,8 @@ int transaction_activate(Transaction *tr, Manager *m, JobMode mode, sd_bus_error if (r >= 0) break; - if (r != -EAGAIN) { - log_warning("Requested transaction contains unmergeable jobs: %s", bus_error_message(e, r)); - return r; - } + if (r != -EAGAIN) + return log_warning_errno(r, "Requested transaction contains unmergeable jobs: %s", bus_error_message(e, r)); /* Seventh step: an entry got dropped, let's garbage * collect its dependencies. */ @@ -731,10 +729,8 @@ int transaction_activate(Transaction *tr, Manager *m, JobMode mode, sd_bus_error /* Ninth step: check whether we can actually apply this */ r = transaction_is_destructive(tr, mode, e); - if (r < 0) { - log_notice("Requested transaction contradicts existing jobs: %s", bus_error_message(e, r)); - return r; - } + if (r < 0) + return log_notice_errno(r, "Requested transaction contradicts existing jobs: %s", bus_error_message(e, r)); /* Tenth step: apply changes */ r = transaction_apply(tr, m, mode); diff --git a/src/core/umount.c b/src/core/umount.c index 241fe6fc62..7af0195aab 100644 --- a/src/core/umount.c +++ b/src/core/umount.c @@ -13,22 +13,24 @@ /* This needs to be after sys/mount.h :( */ #include <libmount.h> -#include "libudev.h" +#include "sd-device.h" #include "alloc-util.h" #include "blockdev-util.h" #include "def.h" +#include "device-util.h" #include "escape.h" #include "fd-util.h" #include "fstab-util.h" #include "linux-3.13/dm-ioctl.h" #include "mount-setup.h" #include "mount-util.h" +#include "mountpoint-util.h" #include "path-util.h" #include "process-util.h" #include "signal-util.h" #include "string-util.h" -#include "udev-util.h" +#include "strv.h" #include "umount.h" #include "util.h" #include "virt.h" @@ -72,7 +74,8 @@ int mount_points_list_get(const char *mountinfo, MountPoint **head) { for (;;) { struct libmnt_fs *fs; - const char *path, *options, *fstype; + const char *path, *fstype; + _cleanup_free_ char *options = NULL; _cleanup_free_ char *p = NULL; unsigned long remount_flags = 0u; _cleanup_free_ char *remount_options = NULL; @@ -92,9 +95,25 @@ int mount_points_list_get(const char *mountinfo, MountPoint **head) { if (cunescape(path, UNESCAPE_RELAX, &p) < 0) return log_oom(); - options = mnt_fs_get_options(fs); fstype = mnt_fs_get_fstype(fs); + /* Combine the generic VFS options with the FS-specific + * options. Duplicates are not a problem here, because the only + * options that should come up twice are typically ro/rw, which + * are turned into MS_RDONLY or the invertion of it. + * + * Even if there are duplicates later in mount_option_mangle() + * it shouldn't hurt anyways as they override each other. + */ + if (!strextend_with_separator(&options, ",", + mnt_fs_get_vfs_options(fs), + NULL)) + return log_oom(); + if (!strextend_with_separator(&options, ",", + mnt_fs_get_fs_options(fs), + NULL)) + return log_oom(); + /* Ignore mount points we can't unmount because they * are API or because we are keeping them open (like * /dev/console). Also, ignore all mounts below API @@ -104,9 +123,7 @@ int mount_points_list_get(const char *mountinfo, MountPoint **head) { * unmount these things, hence don't bother. */ if (mount_point_is_api(p) || mount_point_ignore(p) || - path_startswith(p, "/dev") || - path_startswith(p, "/sys") || - path_startswith(p, "/proc")) + PATH_STARTSWITH_SET(p, "/dev", "/sys", "/proc")) continue; /* If we are in a container, don't attempt to @@ -212,121 +229,105 @@ int swap_list_get(const char *swaps, MountPoint **head) { } static int loopback_list_get(MountPoint **head) { - _cleanup_(udev_enumerate_unrefp) struct udev_enumerate *e = NULL; - struct udev_list_entry *item = NULL, *first = NULL; - _cleanup_(udev_unrefp) struct udev *udev = NULL; + _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL; + sd_device *d; int r; assert(head); - udev = udev_new(); - if (!udev) - return -ENOMEM; - - e = udev_enumerate_new(udev); - if (!e) - return -ENOMEM; + r = sd_device_enumerator_new(&e); + if (r < 0) + return r; - r = udev_enumerate_add_match_subsystem(e, "block"); + r = sd_device_enumerator_allow_uninitialized(e); if (r < 0) return r; - r = udev_enumerate_add_match_sysname(e, "loop*"); + r = sd_device_enumerator_add_match_subsystem(e, "block", true); if (r < 0) return r; - r = udev_enumerate_add_match_sysattr(e, "loop/backing_file", NULL); + r = sd_device_enumerator_add_match_sysname(e, "loop*"); if (r < 0) return r; - r = udev_enumerate_scan_devices(e); + r = sd_device_enumerator_add_match_sysattr(e, "loop/backing_file", NULL, true); if (r < 0) return r; - first = udev_enumerate_get_list_entry(e); - udev_list_entry_foreach(item, first) { - _cleanup_(udev_device_unrefp) struct udev_device *d; + FOREACH_DEVICE(e, d) { + _cleanup_free_ char *p = NULL; const char *dn; - _cleanup_free_ MountPoint *lb = NULL; - - d = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item)); - if (!d) - return -ENOMEM; + MountPoint *lb; - dn = udev_device_get_devnode(d); - if (!dn) + if (sd_device_get_devname(d, &dn) < 0) continue; - lb = new0(MountPoint, 1); + p = strdup(dn); + if (!p) + return -ENOMEM; + + lb = new(MountPoint, 1); if (!lb) return -ENOMEM; - r = free_and_strdup(&lb->path, dn); - if (r < 0) - return r; + *lb = (MountPoint) { + .path = TAKE_PTR(p), + }; LIST_PREPEND(mount_point, *head, lb); - lb = NULL; } return 0; } static int dm_list_get(MountPoint **head) { - _cleanup_(udev_enumerate_unrefp) struct udev_enumerate *e = NULL; - struct udev_list_entry *item = NULL, *first = NULL; - _cleanup_(udev_unrefp) struct udev *udev = NULL; + _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL; + sd_device *d; int r; assert(head); - udev = udev_new(); - if (!udev) - return -ENOMEM; - - e = udev_enumerate_new(udev); - if (!e) - return -ENOMEM; + r = sd_device_enumerator_new(&e); + if (r < 0) + return r; - r = udev_enumerate_add_match_subsystem(e, "block"); + r = sd_device_enumerator_allow_uninitialized(e); if (r < 0) return r; - r = udev_enumerate_add_match_sysname(e, "dm-*"); + r = sd_device_enumerator_add_match_subsystem(e, "block", true); if (r < 0) return r; - r = udev_enumerate_scan_devices(e); + r = sd_device_enumerator_add_match_sysname(e, "dm-*"); if (r < 0) return r; - first = udev_enumerate_get_list_entry(e); - udev_list_entry_foreach(item, first) { - _cleanup_(udev_device_unrefp) struct udev_device *d; - dev_t devnum; + FOREACH_DEVICE(e, d) { + _cleanup_free_ char *p = NULL; const char *dn; - _cleanup_free_ MountPoint *m = NULL; - - d = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item)); - if (!d) - return -ENOMEM; + MountPoint *m; + dev_t devnum; - devnum = udev_device_get_devnum(d); - dn = udev_device_get_devnode(d); - if (major(devnum) == 0 || !dn) + if (sd_device_get_devnum(d, &devnum) < 0 || + sd_device_get_devname(d, &dn) < 0) continue; - m = new0(MountPoint, 1); + p = strdup(dn); + if (!p) + return -ENOMEM; + + m = new(MountPoint, 1); if (!m) return -ENOMEM; - m->devnum = devnum; - r = free_and_strdup(&m->path, dn); - if (r < 0) - return r; + *m = (MountPoint) { + .path = TAKE_PTR(p), + .devnum = devnum, + }; LIST_PREPEND(mount_point, *head, m); - m = NULL; } return 0; diff --git a/src/core/unit-printf.c b/src/core/unit-printf.c index 046e937e92..a8e84ebe80 100644 --- a/src/core/unit-printf.c +++ b/src/core/unit-printf.c @@ -12,25 +12,25 @@ #include "unit.h" #include "user-util.h" -static int specifier_prefix_and_instance(char specifier, void *data, void *userdata, char **ret) { - Unit *u = userdata; +static int specifier_prefix_and_instance(char specifier, const void *data, const void *userdata, char **ret) { + const Unit *u = userdata; assert(u); return unit_name_to_prefix_and_instance(u->id, ret); } -static int specifier_prefix(char specifier, void *data, void *userdata, char **ret) { - Unit *u = userdata; +static int specifier_prefix(char specifier, const void *data, const void *userdata, char **ret) { + const Unit *u = userdata; assert(u); return unit_name_to_prefix(u->id, ret); } -static int specifier_prefix_unescaped(char specifier, void *data, void *userdata, char **ret) { +static int specifier_prefix_unescaped(char specifier, const void *data, const void *userdata, char **ret) { _cleanup_free_ char *p = NULL; - Unit *u = userdata; + const Unit *u = userdata; int r; assert(u); @@ -42,16 +42,16 @@ static int specifier_prefix_unescaped(char specifier, void *data, void *userdata return unit_name_unescape(p, ret); } -static int specifier_instance_unescaped(char specifier, void *data, void *userdata, char **ret) { - Unit *u = userdata; +static int specifier_instance_unescaped(char specifier, const void *data, const void *userdata, char **ret) { + const Unit *u = userdata; assert(u); return unit_name_unescape(strempty(u->instance), ret); } -static int specifier_last_component(char specifier, void *data, void *userdata, char **ret) { - Unit *u = userdata; +static int specifier_last_component(char specifier, const void *data, const void *userdata, char **ret) { + const Unit *u = userdata; _cleanup_free_ char *prefix = NULL; char *dash; int r; @@ -70,7 +70,7 @@ static int specifier_last_component(char specifier, void *data, void *userdata, return 0; } -static int specifier_last_component_unescaped(char specifier, void *data, void *userdata, char **ret) { +static int specifier_last_component_unescaped(char specifier, const void *data, const void *userdata, char **ret) { _cleanup_free_ char *p = NULL; int r; @@ -81,8 +81,8 @@ static int specifier_last_component_unescaped(char specifier, void *data, void * return unit_name_unescape(p, ret); } -static int specifier_filename(char specifier, void *data, void *userdata, char **ret) { - Unit *u = userdata; +static int specifier_filename(char specifier, const void *data, const void *userdata, char **ret) { + const Unit *u = userdata; assert(u); @@ -92,12 +92,12 @@ static int specifier_filename(char specifier, void *data, void *userdata, char * return unit_name_to_path(u->id, ret); } -static void bad_specifier(Unit *u, char specifier) { +static void bad_specifier(const Unit *u, char specifier) { log_unit_warning(u, "Specifier '%%%c' used in unit configuration, which is deprecated. Please update your unit file, as it does not work as intended.", specifier); } -static int specifier_cgroup(char specifier, void *data, void *userdata, char **ret) { - Unit *u = userdata; +static int specifier_cgroup(char specifier, const void *data, const void *userdata, char **ret) { + const Unit *u = userdata; char *n; assert(u); @@ -115,8 +115,8 @@ static int specifier_cgroup(char specifier, void *data, void *userdata, char **r return 0; } -static int specifier_cgroup_root(char specifier, void *data, void *userdata, char **ret) { - Unit *u = userdata; +static int specifier_cgroup_root(char specifier, const void *data, const void *userdata, char **ret) { + const Unit *u = userdata; char *n; assert(u); @@ -131,8 +131,8 @@ static int specifier_cgroup_root(char specifier, void *data, void *userdata, cha return 0; } -static int specifier_cgroup_slice(char specifier, void *data, void *userdata, char **ret) { - Unit *u = userdata; +static int specifier_cgroup_slice(char specifier, const void *data, const void *userdata, char **ret) { + const Unit *u = userdata; char *n; assert(u); @@ -140,7 +140,7 @@ static int specifier_cgroup_slice(char specifier, void *data, void *userdata, ch bad_specifier(u, specifier); if (UNIT_ISSET(u->slice)) { - Unit *slice; + const Unit *slice; slice = UNIT_DEREF(u->slice); @@ -157,8 +157,8 @@ static int specifier_cgroup_slice(char specifier, void *data, void *userdata, ch return 0; } -static int specifier_special_directory(char specifier, void *data, void *userdata, char **ret) { - Unit *u = userdata; +static int specifier_special_directory(char specifier, const void *data, const void *userdata, char **ret) { + const Unit *u = userdata; char *n = NULL; assert(u); @@ -196,6 +196,8 @@ int unit_name_printf(Unit *u, const char* format, char **ret) { { 'p', specifier_prefix, NULL }, { 'i', specifier_string, u->instance }, + { 'g', specifier_group_name, NULL }, + { 'G', specifier_group_id, NULL }, { 'U', specifier_user_id, NULL }, { 'u', specifier_user_name, NULL }, @@ -264,6 +266,8 @@ int unit_full_printf(Unit *u, const char *format, char **ret) { { 'T', specifier_tmp_dir, NULL }, { 'V', specifier_var_tmp_dir, NULL }, + { 'g', specifier_group_name, NULL }, + { 'G', specifier_group_id, NULL }, { 'U', specifier_user_id, NULL }, { 'u', specifier_user_name, NULL }, { 'h', specifier_user_home, NULL }, @@ -282,35 +286,3 @@ int unit_full_printf(Unit *u, const char *format, char **ret) { return specifier_printf(format, table, u, ret); } - -int unit_full_printf_strv(Unit *u, char **l, char ***ret) { - size_t n; - char **r, **i, **j; - int q; - - /* Applies unit_full_printf to every entry in l */ - - assert(u); - - n = strv_length(l); - r = new(char*, n+1); - if (!r) - return -ENOMEM; - - for (i = l, j = r; *i; i++, j++) { - q = unit_full_printf(u, *i, j); - if (q < 0) - goto fail; - } - - *j = NULL; - *ret = r; - return 0; - -fail: - for (j--; j >= r; j--) - free(*j); - - free(r); - return q; -} diff --git a/src/core/unit-printf.h b/src/core/unit-printf.h index 5bd1d77bb2..f3dae159d5 100644 --- a/src/core/unit-printf.h +++ b/src/core/unit-printf.h @@ -5,4 +5,3 @@ int unit_name_printf(Unit *u, const char* text, char **ret); int unit_full_printf(Unit *u, const char *text, char **ret); -int unit_full_printf_strv(Unit *u, char **l, char ***ret); diff --git a/src/core/unit.c b/src/core/unit.c index 113205bf25..24b14fbcd6 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -10,8 +10,8 @@ #include "sd-id128.h" #include "sd-messages.h" -#include "alloc-util.h" #include "all-units.h" +#include "alloc-util.h" #include "bus-common-errors.h" #include "bus-util.h" #include "cgroup-util.h" @@ -22,6 +22,7 @@ #include "execute.h" #include "fd-util.h" #include "fileio-label.h" +#include "fileio.h" #include "format-util.h" #include "fs-util.h" #include "id128-util.h" @@ -35,6 +36,7 @@ #include "parse-util.h" #include "path-util.h" #include "process-util.h" +#include "serialize.h" #include "set.h" #include "signal-util.h" #include "sparse-endian.h" @@ -45,6 +47,8 @@ #include "string-table.h" #include "string-util.h" #include "strv.h" +#include "terminal-util.h" +#include "tmpfile-util.h" #include "umask-util.h" #include "unit-name.h" #include "unit.h" @@ -93,7 +97,8 @@ Unit *unit_new(Manager *m, size_t size) { u->ref_uid = UID_INVALID; u->ref_gid = GID_INVALID; u->cpu_usage_last = NSEC_INFINITY; - u->cgroup_bpf_state = UNIT_CGROUP_BPF_INVALIDATED; + u->cgroup_invalidated_mask |= CGROUP_MASK_BPF_FIREWALL; + u->failure_action_exit_status = u->success_action_exit_status = -1; u->ip_accounting_ingress_map_fd = -1; u->ip_accounting_egress_map_fd = -1; @@ -127,7 +132,7 @@ int unit_new_for_name(Manager *m, size_t size, const char *name, Unit **ret) { return r; } -bool unit_has_name(Unit *u, const char *name) { +bool unit_has_name(const Unit *u, const char *name) { assert(u); assert(name); @@ -438,6 +443,22 @@ void unit_add_to_dbus_queue(Unit *u) { u->in_dbus_queue = true; } +void unit_submit_to_stop_when_unneeded_queue(Unit *u) { + assert(u); + + if (u->in_stop_when_unneeded_queue) + return; + + if (!u->stop_when_unneeded) + return; + + if (!UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u))) + return; + + LIST_PREPEND(stop_when_unneeded_queue, u->manager->stop_when_unneeded_queue, u); + u->in_stop_when_unneeded_queue = true; +} + static void bidi_set_free(Unit *u, Hashmap *h) { Unit *other; Iterator i; @@ -553,6 +574,14 @@ void unit_free(Unit *u) { if (!u) return; + if (UNIT_ISSET(u->slice)) { + /* A unit is being dropped from the tree, make sure our parent slice recalculates the member mask */ + unit_invalidate_cgroup_members_masks(UNIT_DEREF(u->slice)); + + /* And make sure the parent is realized again, updating cgroup memberships */ + unit_add_to_cgroup_realize_queue(UNIT_DEREF(u->slice)); + } + u->transient_file = safe_fclose(u->transient_file); if (!MANAGER_IS_RELOADING(u->manager)) @@ -634,6 +663,9 @@ void unit_free(Unit *u) { if (u->in_target_deps_queue) LIST_REMOVE(target_deps_queue, u->manager->target_deps_queue, u); + if (u->in_stop_when_unneeded_queue) + LIST_REMOVE(stop_when_unneeded_queue, u->manager->stop_when_unneeded_queue, u); + safe_close(u->ip_accounting_ingress_map_fd); safe_close(u->ip_accounting_egress_map_fd); @@ -647,6 +679,8 @@ void unit_free(Unit *u) { bpf_program_unref(u->ip_bpf_egress); bpf_program_unref(u->ip_bpf_egress_installed); + bpf_program_unref(u->bpf_device_control_installed); + condition_free_list(u->conditions); condition_free_list(u->asserts); @@ -943,7 +977,7 @@ int unit_add_exec_dependencies(Unit *u, ExecContext *c) { assert(u); assert(c); - if (c->working_directory) { + if (c->working_directory && !c->working_directory_missing_ok) { r = unit_require_mounts_for(u, c->working_directory, UNIT_DEPENDENCY_FILE); if (r < 0) return r; @@ -990,7 +1024,7 @@ int unit_add_exec_dependencies(Unit *u, ExecContext *c) { return r; } - r = unit_add_dependency_by_name(u, UNIT_AFTER, SPECIAL_TMPFILES_SETUP_SERVICE, NULL, true, UNIT_DEPENDENCY_FILE); + r = unit_add_dependency_by_name(u, UNIT_AFTER, SPECIAL_TMPFILES_SETUP_SERVICE, true, UNIT_DEPENDENCY_FILE); if (r < 0) return r; } @@ -1008,7 +1042,7 @@ int unit_add_exec_dependencies(Unit *u, ExecContext *c) { /* If syslog or kernel logging is requested, make sure our own * logging daemon is run first. */ - r = unit_add_dependency_by_name(u, UNIT_AFTER, SPECIAL_JOURNALD_SOCKET, NULL, true, UNIT_DEPENDENCY_FILE); + r = unit_add_dependency_by_name(u, UNIT_AFTER, SPECIAL_JOURNALD_SOCKET, true, UNIT_DEPENDENCY_FILE); if (r < 0) return r; @@ -1133,17 +1167,20 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) { (void) cg_mask_to_string(u->cgroup_realized_mask, &s); fprintf(f, "%s\tCGroup realized mask: %s\n", prefix, strnull(s)); } + if (u->cgroup_enabled_mask != 0) { _cleanup_free_ char *s = NULL; (void) cg_mask_to_string(u->cgroup_enabled_mask, &s); fprintf(f, "%s\tCGroup enabled mask: %s\n", prefix, strnull(s)); } + m = unit_get_own_mask(u); if (m != 0) { _cleanup_free_ char *s = NULL; (void) cg_mask_to_string(m, &s); fprintf(f, "%s\tCGroup own mask: %s\n", prefix, strnull(s)); } + m = unit_get_members_mask(u); if (m != 0) { _cleanup_free_ char *s = NULL; @@ -1151,6 +1188,13 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) { fprintf(f, "%s\tCGroup members mask: %s\n", prefix, strnull(s)); } + m = unit_get_delegate_mask(u); + if (m != 0) { + _cleanup_free_ char *s = NULL; + (void) cg_mask_to_string(m, &s); + fprintf(f, "%s\tCGroup delegate mask: %s\n", prefix, strnull(s)); + } + SET_FOREACH(t, u->names, i) fprintf(f, "%s\tName: %s\n", prefix, t); @@ -1184,8 +1228,12 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) { if (u->failure_action != EMERGENCY_ACTION_NONE) fprintf(f, "%s\tFailure Action: %s\n", prefix, emergency_action_to_string(u->failure_action)); + if (u->failure_action_exit_status >= 0) + fprintf(f, "%s\tFailure Action Exit Status: %i\n", prefix, u->failure_action_exit_status); if (u->success_action != EMERGENCY_ACTION_NONE) fprintf(f, "%s\tSuccess Action: %s\n", prefix, emergency_action_to_string(u->success_action)); + if (u->success_action_exit_status >= 0) + fprintf(f, "%s\tSuccess Action Exit Status: %i\n", prefix, u->success_action_exit_status); if (u->job_timeout != USEC_INFINITY) fprintf(f, "%s\tJob Timeout: %s\n", prefix, format_timespan(timespan, sizeof(timespan), u->job_timeout, 0)); @@ -1379,7 +1427,7 @@ static int unit_add_slice_dependencies(Unit *u) { if (unit_has_name(u, SPECIAL_ROOT_SLICE)) return 0; - return unit_add_two_dependencies_by_name(u, UNIT_AFTER, UNIT_REQUIRES, SPECIAL_ROOT_SLICE, NULL, true, mask); + return unit_add_two_dependencies_by_name(u, UNIT_AFTER, UNIT_REQUIRES, SPECIAL_ROOT_SLICE, true, mask); } static int unit_add_mount_dependencies(Unit *u) { @@ -1469,6 +1517,9 @@ int unit_load(Unit *u) { return 0; if (u->transient_file) { + /* Finalize transient file: if this is a transient unit file, as soon as we reach unit_load() the setup + * is complete, hence let's synchronize the unit file we just wrote to disk. */ + r = fflush_and_check(u->transient_file); if (r < 0) goto fail; @@ -1512,7 +1563,8 @@ int unit_load(Unit *u) { if (u->job_running_timeout != USEC_INFINITY && u->job_running_timeout > u->job_timeout) log_unit_warning(u, "JobRunningTimeoutSec= is greater than JobTimeoutSec=, it has no effect."); - unit_update_cgroup_members_masks(u); + /* We finished loading, let's ensure our parents recalculate the members mask */ + unit_invalidate_cgroup_members_masks(u); } assert((u->load_state != UNIT_MERGED) == !u->merged_into); @@ -1587,6 +1639,8 @@ static bool unit_condition_test(Unit *u) { dual_timestamp_get(&u->condition_timestamp); u->condition_result = unit_condition_test_list(u, u->conditions, condition_type_to_string); + unit_add_to_dbus_queue(u); + return u->condition_result; } @@ -1596,103 +1650,26 @@ static bool unit_assert_test(Unit *u) { dual_timestamp_get(&u->assert_timestamp); u->assert_result = unit_condition_test_list(u, u->asserts, assert_type_to_string); + unit_add_to_dbus_queue(u); + return u->assert_result; } void unit_status_printf(Unit *u, const char *status, const char *unit_status_msg_format) { - DISABLE_WARNING_FORMAT_NONLITERAL; - manager_status_printf(u->manager, STATUS_TYPE_NORMAL, status, unit_status_msg_format, unit_description(u)); - REENABLE_WARNING; -} - -_pure_ static const char* unit_get_status_message_format(Unit *u, JobType t) { - const char *format; - const UnitStatusMessageFormats *format_table; - - assert(u); - assert(IN_SET(t, JOB_START, JOB_STOP, JOB_RELOAD)); - - if (t != JOB_RELOAD) { - format_table = &UNIT_VTABLE(u)->status_message_formats; - if (format_table) { - format = format_table->starting_stopping[t == JOB_STOP]; - if (format) - return format; - } - } - - /* Return generic strings */ - if (t == JOB_START) - return "Starting %s."; - else if (t == JOB_STOP) - return "Stopping %s."; - else - return "Reloading %s."; -} - -static void unit_status_print_starting_stopping(Unit *u, JobType t) { - const char *format; - - assert(u); - - /* Reload status messages have traditionally not been printed to console. */ - if (!IN_SET(t, JOB_START, JOB_STOP)) - return; + const char *d; - format = unit_get_status_message_format(u, t); + d = unit_description(u); + if (log_get_show_color()) + d = strjoina(ANSI_HIGHLIGHT, d, ANSI_NORMAL); DISABLE_WARNING_FORMAT_NONLITERAL; - unit_status_printf(u, "", format); + manager_status_printf(u->manager, STATUS_TYPE_NORMAL, status, unit_status_msg_format, d); REENABLE_WARNING; } -static void unit_status_log_starting_stopping_reloading(Unit *u, JobType t) { - const char *format, *mid; - char buf[LINE_MAX]; - - assert(u); - - if (!IN_SET(t, JOB_START, JOB_STOP, JOB_RELOAD)) - return; - - if (log_on_console()) - return; - - /* We log status messages for all units and all operations. */ - - format = unit_get_status_message_format(u, t); - - DISABLE_WARNING_FORMAT_NONLITERAL; - (void) snprintf(buf, sizeof buf, format, unit_description(u)); - REENABLE_WARNING; - - mid = t == JOB_START ? "MESSAGE_ID=" SD_MESSAGE_UNIT_STARTING_STR : - t == JOB_STOP ? "MESSAGE_ID=" SD_MESSAGE_UNIT_STOPPING_STR : - "MESSAGE_ID=" SD_MESSAGE_UNIT_RELOADING_STR; - - /* Note that we deliberately use LOG_MESSAGE() instead of - * LOG_UNIT_MESSAGE() here, since this is supposed to mimic - * closely what is written to screen using the status output, - * which is supposed the highest level, friendliest output - * possible, which means we should avoid the low-level unit - * name. */ - log_struct(LOG_INFO, - LOG_MESSAGE("%s", buf), - LOG_UNIT_ID(u), - LOG_UNIT_INVOCATION_ID(u), - mid); -} - -void unit_status_emit_starting_stopping_reloading(Unit *u, JobType t) { - assert(u); - assert(t >= 0); - assert(t < _JOB_TYPE_MAX); - - unit_status_log_starting_stopping_reloading(u, t); - unit_status_print_starting_stopping(u, t); -} - int unit_start_limit_test(Unit *u) { + const char *reason; + assert(u); if (ratelimit_below(&u->start_limit)) { @@ -1703,7 +1680,11 @@ int unit_start_limit_test(Unit *u) { log_unit_warning(u, "Start request repeated too quickly."); u->start_limit_hit = true; - return emergency_action(u->manager, u->start_limit_action, u->reboot_arg, "unit failed"); + reason = strjoina("unit ", u->id, " failed"); + + return emergency_action(u->manager, u->start_limit_action, + EMERGENCY_ACTION_IS_WATCHDOG|EMERGENCY_ACTION_WARN, + u->reboot_arg, -1, reason); } bool unit_shall_confirm_spawn(Unit *u) { @@ -1784,7 +1765,7 @@ int unit_start(Unit *u) { if (state != UNIT_ACTIVATING && !unit_condition_test(u)) { log_unit_debug(u, "Starting requested but condition failed. Not starting unit."); - return -EALREADY; + return -ECOMM; } /* If the asserts failed, fail the entire job */ @@ -1950,55 +1931,71 @@ bool unit_can_reload(Unit *u) { return UNIT_VTABLE(u)->reload; } -static void unit_check_unneeded(Unit *u) { - - _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; - - static const UnitDependency needed_dependencies[] = { +bool unit_is_unneeded(Unit *u) { + static const UnitDependency deps[] = { UNIT_REQUIRED_BY, UNIT_REQUISITE_OF, UNIT_WANTED_BY, UNIT_BOUND_BY, }; - - unsigned j; - int r; + size_t j; assert(u); - /* If this service shall be shut down when unneeded then do - * so. */ - if (!u->stop_when_unneeded) - return; + return false; - if (!UNIT_IS_ACTIVE_OR_ACTIVATING(unit_active_state(u))) - return; + /* Don't clean up while the unit is transitioning or is even inactive. */ + if (!UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u))) + return false; + if (u->job) + return false; - for (j = 0; j < ELEMENTSOF(needed_dependencies); j++) { + for (j = 0; j < ELEMENTSOF(deps); j++) { Unit *other; Iterator i; void *v; - HASHMAP_FOREACH_KEY(v, other, u->dependencies[needed_dependencies[j]], i) - if (unit_active_or_pending(other) || unit_will_restart(other)) - return; - } + /* If a dependent unit has a job queued, is active or transitioning, or is marked for + * restart, then don't clean this one up. */ - /* If stopping a unit fails continuously we might enter a stop - * loop here, hence stop acting on the service being - * unnecessary after a while. */ - if (!ratelimit_below(&u->auto_stop_ratelimit)) { - log_unit_warning(u, "Unit not needed anymore, but not stopping since we tried this too often recently."); - return; + HASHMAP_FOREACH_KEY(v, other, u->dependencies[deps[j]], i) { + if (other->job) + return false; + + if (!UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(other))) + return false; + + if (unit_will_restart(other)) + return false; + } } - log_unit_info(u, "Unit not needed anymore. Stopping."); + return true; +} - /* Ok, nobody needs us anymore. Sniff. Then let's commit suicide */ - r = manager_add_job(u->manager, JOB_STOP, u, JOB_FAIL, &error, NULL); - if (r < 0) - log_unit_warning_errno(u, r, "Failed to enqueue stop job, ignoring: %s", bus_error_message(&error, r)); +static void check_unneeded_dependencies(Unit *u) { + + static const UnitDependency deps[] = { + UNIT_REQUIRES, + UNIT_REQUISITE, + UNIT_WANTS, + UNIT_BINDS_TO, + }; + size_t j; + + assert(u); + + /* Add all units this unit depends on to the queue that processes StopWhenUnneeded= behaviour. */ + + for (j = 0; j < ELEMENTSOF(deps); j++) { + Unit *other; + Iterator i; + void *v; + + HASHMAP_FOREACH_KEY(v, other, u->dependencies[deps[j]], i) + unit_submit_to_stop_when_unneeded_queue(other); + } } static void unit_check_binds_to(Unit *u) { @@ -2098,29 +2095,6 @@ static void retroactively_stop_dependencies(Unit *u) { manager_add_job(u->manager, JOB_STOP, other, JOB_REPLACE, NULL, NULL); } -static void check_unneeded_dependencies(Unit *u) { - Unit *other; - Iterator i; - void *v; - - assert(u); - assert(UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(u))); - - /* Garbage collect services that might not be needed anymore, if enabled */ - HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_REQUIRES], i) - if (!UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(other))) - unit_check_unneeded(other); - HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_WANTS], i) - if (!UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(other))) - unit_check_unneeded(other); - HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_REQUISITE], i) - if (!UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(other))) - unit_check_unneeded(other); - HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_BINDS_TO], i) - if (!UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(other))) - unit_check_unneeded(other); -} - void unit_start_on_failure(Unit *u) { Unit *other; Iterator i; @@ -2156,8 +2130,9 @@ void unit_trigger_notify(Unit *u) { } static int unit_log_resources(Unit *u) { - struct iovec iovec[1 + _CGROUP_IP_ACCOUNTING_METRIC_MAX + 4]; + bool any_traffic = false, have_ip_accounting = false; + _cleanup_free_ char *igress = NULL, *egress = NULL; size_t n_message_parts = 0, n_iovec = 0; char* message_parts[3 + 1], *t; nsec_t nsec = NSEC_INFINITY; @@ -2190,7 +2165,7 @@ static int unit_log_resources(Unit *u) { /* Format the CPU time for inclusion in the human language message string */ format_timespan(buf, sizeof(buf), nsec / NSEC_PER_USEC, USEC_PER_MSEC); - t = strjoin(n_message_parts > 0 ? "consumed " : "Consumed ", buf, " CPU time"); + t = strjoin("consumed ", buf, " CPU time"); if (!t) { r = log_oom(); goto finish; @@ -2209,6 +2184,10 @@ static int unit_log_resources(Unit *u) { if (value == UINT64_MAX) continue; + have_ip_accounting = true; + if (value > 0) + any_traffic = true; + /* Format IP accounting data for inclusion in the structured log message */ if (asprintf(&t, "%s=%" PRIu64, ip_fields[m], value) < 0) { r = log_oom(); @@ -2218,22 +2197,41 @@ static int unit_log_resources(Unit *u) { /* Format the IP accounting data for inclusion in the human language message string, but only for the * bytes counters (and not for the packets counters) */ - if (m == CGROUP_IP_INGRESS_BYTES) - t = strjoin(n_message_parts > 0 ? "received " : "Received ", - format_bytes(buf, sizeof(buf), value), - " IP traffic"); - else if (m == CGROUP_IP_EGRESS_BYTES) - t = strjoin(n_message_parts > 0 ? "sent " : "Sent ", - format_bytes(buf, sizeof(buf), value), - " IP traffic"); - else - continue; - if (!t) { - r = log_oom(); - goto finish; + if (m == CGROUP_IP_INGRESS_BYTES) { + assert(!igress); + igress = strjoin("received ", format_bytes(buf, sizeof(buf), value), " IP traffic"); + if (!igress) { + r = log_oom(); + goto finish; + } + } else if (m == CGROUP_IP_EGRESS_BYTES) { + assert(!egress); + egress = strjoin("sent ", format_bytes(buf, sizeof(buf), value), " IP traffic"); + if (!egress) { + r = log_oom(); + goto finish; + } } + } - message_parts[n_message_parts++] = t; + if (have_ip_accounting) { + if (any_traffic) { + if (igress) + message_parts[n_message_parts++] = TAKE_PTR(igress); + if (egress) + message_parts[n_message_parts++] = TAKE_PTR(egress); + + } else { + char *k; + + k = strdup("no IP traffic"); + if (!k) { + r = log_oom(); + goto finish; + } + + message_parts[n_message_parts++] = k; + } } /* Is there any accounting data available at all? */ @@ -2243,7 +2241,7 @@ static int unit_log_resources(Unit *u) { } if (n_message_parts == 0) - t = strjoina("MESSAGE=", u->id, ": Completed"); + t = strjoina("MESSAGE=", u->id, ": Completed."); else { _cleanup_free_ char *joined; @@ -2255,7 +2253,8 @@ static int unit_log_resources(Unit *u) { goto finish; } - t = strjoina("MESSAGE=", u->id, ": ", joined); + joined[0] = ascii_toupper(joined[0]); + t = strjoina("MESSAGE=", u->id, ": ", joined, "."); } /* The following four fields we allocate on the stack or are static strings, we hence don't want to free them, @@ -2299,8 +2298,105 @@ static void unit_update_on_console(Unit *u) { manager_unref_console(u->manager); } +static void unit_emit_audit_start(Unit *u) { + assert(u); + + if (u->type != UNIT_SERVICE) + return; + + /* Write audit record if we have just finished starting up */ + manager_send_unit_audit(u->manager, u, AUDIT_SERVICE_START, true); + u->in_audit = true; +} + +static void unit_emit_audit_stop(Unit *u, UnitActiveState state) { + assert(u); + + if (u->type != UNIT_SERVICE) + return; + + if (u->in_audit) { + /* Write audit record if we have just finished shutting down */ + manager_send_unit_audit(u->manager, u, AUDIT_SERVICE_STOP, state == UNIT_INACTIVE); + u->in_audit = false; + } else { + /* Hmm, if there was no start record written write it now, so that we always have a nice pair */ + manager_send_unit_audit(u->manager, u, AUDIT_SERVICE_START, state == UNIT_INACTIVE); + + if (state == UNIT_INACTIVE) + manager_send_unit_audit(u->manager, u, AUDIT_SERVICE_STOP, true); + } +} + +static bool unit_process_job(Job *j, UnitActiveState ns, UnitNotifyFlags flags) { + bool unexpected = false; + + assert(j); + + if (j->state == JOB_WAITING) + + /* So we reached a different state for this job. Let's see if we can run it now if it failed previously + * due to EAGAIN. */ + job_add_to_run_queue(j); + + /* Let's check whether the unit's new state constitutes a finished job, or maybe contradicts a running job and + * hence needs to invalidate jobs. */ + + switch (j->type) { + + case JOB_START: + case JOB_VERIFY_ACTIVE: + + if (UNIT_IS_ACTIVE_OR_RELOADING(ns)) + job_finish_and_invalidate(j, JOB_DONE, true, false); + else if (j->state == JOB_RUNNING && ns != UNIT_ACTIVATING) { + unexpected = true; + + if (UNIT_IS_INACTIVE_OR_FAILED(ns)) + job_finish_and_invalidate(j, ns == UNIT_FAILED ? JOB_FAILED : JOB_DONE, true, false); + } + + break; + + case JOB_RELOAD: + case JOB_RELOAD_OR_START: + case JOB_TRY_RELOAD: + + if (j->state == JOB_RUNNING) { + if (ns == UNIT_ACTIVE) + job_finish_and_invalidate(j, (flags & UNIT_NOTIFY_RELOAD_FAILURE) ? JOB_FAILED : JOB_DONE, true, false); + else if (!IN_SET(ns, UNIT_ACTIVATING, UNIT_RELOADING)) { + unexpected = true; + + if (UNIT_IS_INACTIVE_OR_FAILED(ns)) + job_finish_and_invalidate(j, ns == UNIT_FAILED ? JOB_FAILED : JOB_DONE, true, false); + } + } + + break; + + case JOB_STOP: + case JOB_RESTART: + case JOB_TRY_RESTART: + + if (UNIT_IS_INACTIVE_OR_FAILED(ns)) + job_finish_and_invalidate(j, JOB_DONE, true, false); + else if (j->state == JOB_RUNNING && ns != UNIT_DEACTIVATING) { + unexpected = true; + job_finish_and_invalidate(j, JOB_FAILED, true, false); + } + + break; + + default: + assert_not_reached("Job type unknown"); + } + + return unexpected; +} + void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlags flags) { - bool unexpected; + const char *reason; Manager *m; assert(u); @@ -2313,6 +2409,10 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlag m = u->manager; + /* Let's enqueue the change signal early. In case this unit has a job associated we want that this unit is in + * the bus queue, so that any job change signal queued will force out the unit change signal first. */ + unit_add_to_dbus_queue(u); + /* Update timestamps for state changes */ if (!MANAGER_IS_RELOADING(m)) { dual_timestamp_get(&u->state_change_timestamp); @@ -2329,7 +2429,7 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlag } /* Keep track of failed units */ - (void) manager_update_failed_units(u->manager, u, ns == UNIT_FAILED); + (void) manager_update_failed_units(m, u, ns == UNIT_FAILED); /* Make sure the cgroup and state files are always removed when we become inactive */ if (UNIT_IS_INACTIVE_OR_FAILED(ns)) { @@ -2339,81 +2439,18 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlag unit_update_on_console(u); - if (u->job) { - unexpected = false; - - if (u->job->state == JOB_WAITING) - - /* So we reached a different state for this - * job. Let's see if we can run it now if it - * failed previously due to EAGAIN. */ - job_add_to_run_queue(u->job); - - /* Let's check whether this state change constitutes a - * finished job, or maybe contradicts a running job and - * hence needs to invalidate jobs. */ - - switch (u->job->type) { - - case JOB_START: - case JOB_VERIFY_ACTIVE: - - if (UNIT_IS_ACTIVE_OR_RELOADING(ns)) - job_finish_and_invalidate(u->job, JOB_DONE, true, false); - else if (u->job->state == JOB_RUNNING && ns != UNIT_ACTIVATING) { - unexpected = true; - - if (UNIT_IS_INACTIVE_OR_FAILED(ns)) - job_finish_and_invalidate(u->job, ns == UNIT_FAILED ? JOB_FAILED : JOB_DONE, true, false); - } - - break; - - case JOB_RELOAD: - case JOB_RELOAD_OR_START: - case JOB_TRY_RELOAD: - - if (u->job->state == JOB_RUNNING) { - if (ns == UNIT_ACTIVE) - job_finish_and_invalidate(u->job, (flags & UNIT_NOTIFY_RELOAD_FAILURE) ? JOB_FAILED : JOB_DONE, true, false); - else if (!IN_SET(ns, UNIT_ACTIVATING, UNIT_RELOADING)) { - unexpected = true; - - if (UNIT_IS_INACTIVE_OR_FAILED(ns)) - job_finish_and_invalidate(u->job, ns == UNIT_FAILED ? JOB_FAILED : JOB_DONE, true, false); - } - } - - break; - - case JOB_STOP: - case JOB_RESTART: - case JOB_TRY_RESTART: - - if (UNIT_IS_INACTIVE_OR_FAILED(ns)) - job_finish_and_invalidate(u->job, JOB_DONE, true, false); - else if (u->job->state == JOB_RUNNING && ns != UNIT_DEACTIVATING) { - unexpected = true; - job_finish_and_invalidate(u->job, JOB_FAILED, true, false); - } - - break; - - default: - assert_not_reached("Job type unknown"); - } - - } else - unexpected = true; - if (!MANAGER_IS_RELOADING(m)) { + bool unexpected; - /* If this state change happened without being - * requested by a job, then let's retroactively start - * or stop dependencies. We skip that step when - * deserializing, since we don't want to create any - * additional jobs just because something is already - * activated. */ + /* Let's propagate state changes to the job */ + if (u->job) + unexpected = unit_process_job(u->job, ns, flags); + else + unexpected = true; + + /* If this state change happened without being requested by a job, then let's retroactively start or + * stop dependencies. We skip that step when deserializing, since we don't want to create any + * additional jobs just because something is already activated. */ if (unexpected) { if (UNIT_IS_INACTIVE_OR_FAILED(os) && UNIT_IS_ACTIVE_OR_ACTIVATING(ns)) @@ -2423,7 +2460,7 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlag } /* stop unneeded units regardless if going down was expected or not */ - if (UNIT_IS_INACTIVE_OR_DEACTIVATING(ns)) + if (UNIT_IS_INACTIVE_OR_FAILED(ns)) check_unneeded_dependencies(u); if (ns != os && ns == UNIT_FAILED) { @@ -2432,46 +2469,18 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlag if (!(flags & UNIT_NOTIFY_WILL_AUTO_RESTART)) unit_start_on_failure(u); } - } - - if (UNIT_IS_ACTIVE_OR_RELOADING(ns)) { - if (u->type == UNIT_SERVICE && - !UNIT_IS_ACTIVE_OR_RELOADING(os) && - !MANAGER_IS_RELOADING(m)) { - /* Write audit record if we have just finished starting up */ - manager_send_unit_audit(m, u, AUDIT_SERVICE_START, true); - u->in_audit = true; - } + if (UNIT_IS_ACTIVE_OR_RELOADING(ns) && !UNIT_IS_ACTIVE_OR_RELOADING(os)) { + /* This unit just finished starting up */ - if (!UNIT_IS_ACTIVE_OR_RELOADING(os)) + unit_emit_audit_start(u); manager_send_unit_plymouth(m, u); + } - } else { - - if (UNIT_IS_INACTIVE_OR_FAILED(ns) && - !UNIT_IS_INACTIVE_OR_FAILED(os) - && !MANAGER_IS_RELOADING(m)) { - + if (UNIT_IS_INACTIVE_OR_FAILED(ns) && !UNIT_IS_INACTIVE_OR_FAILED(os)) { /* This unit just stopped/failed. */ - if (u->type == UNIT_SERVICE) { - - /* Hmm, if there was no start record written - * write it now, so that we always have a nice - * pair */ - if (!u->in_audit) { - manager_send_unit_audit(m, u, AUDIT_SERVICE_START, ns == UNIT_INACTIVE); - if (ns == UNIT_INACTIVE) - manager_send_unit_audit(m, u, AUDIT_SERVICE_STOP, true); - } else - /* Write audit record if we have just finished shutting down */ - manager_send_unit_audit(m, u, AUDIT_SERVICE_STOP, ns == UNIT_INACTIVE); - - u->in_audit = false; - } - - /* Write a log message about consumed resources */ + unit_emit_audit_stop(u, ns); unit_log_resources(u); } } @@ -2481,22 +2490,24 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlag unit_trigger_notify(u); - if (!MANAGER_IS_RELOADING(u->manager)) { + if (!MANAGER_IS_RELOADING(m)) { /* Maybe we finished startup and are now ready for being stopped because unneeded? */ - unit_check_unneeded(u); + unit_submit_to_stop_when_unneeded_queue(u); /* Maybe we finished startup, but something we needed has vanished? Let's die then. (This happens when * something BindsTo= to a Type=oneshot unit, as these units go directly from starting to inactive, * without ever entering started.) */ unit_check_binds_to(u); - if (os != UNIT_FAILED && ns == UNIT_FAILED) - (void) emergency_action(u->manager, u->failure_action, u->reboot_arg, "unit failed"); - else if (!UNIT_IS_INACTIVE_OR_FAILED(os) && ns == UNIT_INACTIVE) - (void) emergency_action(u->manager, u->success_action, u->reboot_arg, "unit succeeded"); + if (os != UNIT_FAILED && ns == UNIT_FAILED) { + reason = strjoina("unit ", u->id, " failed"); + (void) emergency_action(m, u->failure_action, 0, u->reboot_arg, unit_failure_action_exit_status(u), reason); + } else if (!UNIT_IS_INACTIVE_OR_FAILED(os) && ns == UNIT_INACTIVE) { + reason = strjoina("unit ", u->id, " succeeded"); + (void) emergency_action(m, u->success_action, 0, u->reboot_arg, unit_success_action_exit_status(u), reason); + } } - unit_add_to_dbus_queue(u); unit_add_to_gc_queue(u); } @@ -2882,17 +2893,14 @@ int unit_add_two_dependencies(Unit *u, UnitDependency d, UnitDependency e, Unit return unit_add_dependency(u, e, other, add_reference, mask); } -static int resolve_template(Unit *u, const char *name, const char*path, char **buf, const char **ret) { +static int resolve_template(Unit *u, const char *name, char **buf, const char **ret) { int r; assert(u); - assert(name || path); + assert(name); assert(buf); assert(ret); - if (!name) - name = basename(path); - if (!unit_name_is_valid(name, UNIT_NAME_TEMPLATE)) { *buf = NULL; *ret = name; @@ -2917,38 +2925,38 @@ static int resolve_template(Unit *u, const char *name, const char*path, char **b return 0; } -int unit_add_dependency_by_name(Unit *u, UnitDependency d, const char *name, const char *path, bool add_reference, UnitDependencyMask mask) { +int unit_add_dependency_by_name(Unit *u, UnitDependency d, const char *name, bool add_reference, UnitDependencyMask mask) { _cleanup_free_ char *buf = NULL; Unit *other; int r; assert(u); - assert(name || path); + assert(name); - r = resolve_template(u, name, path, &buf, &name); + r = resolve_template(u, name, &buf, &name); if (r < 0) return r; - r = manager_load_unit(u->manager, name, path, NULL, &other); + r = manager_load_unit(u->manager, name, NULL, NULL, &other); if (r < 0) return r; return unit_add_dependency(u, d, other, add_reference, mask); } -int unit_add_two_dependencies_by_name(Unit *u, UnitDependency d, UnitDependency e, const char *name, const char *path, bool add_reference, UnitDependencyMask mask) { +int unit_add_two_dependencies_by_name(Unit *u, UnitDependency d, UnitDependency e, const char *name, bool add_reference, UnitDependencyMask mask) { _cleanup_free_ char *buf = NULL; Unit *other; int r; assert(u); - assert(name || path); + assert(name); - r = resolve_template(u, name, path, &buf, &name); + r = resolve_template(u, name, &buf, &name); if (r < 0) return r; - r = manager_load_unit(u->manager, name, path, NULL, &other); + r = manager_load_unit(u->manager, name, NULL, NULL, &other); if (r < 0) return r; @@ -3019,7 +3027,6 @@ int unit_set_slice(Unit *u, Unit *slice) { } int unit_set_default_slice(Unit *u) { - _cleanup_free_ char *b = NULL; const char *slice_name; Unit *slice; int r; @@ -3047,13 +3054,9 @@ int unit_set_default_slice(Unit *u) { return -ENOMEM; if (MANAGER_IS_SYSTEM(u->manager)) - b = strjoin("system-", escaped, ".slice"); + slice_name = strjoina("system-", escaped, ".slice"); else - b = strappend(escaped, ".slice"); - if (!b) - return -ENOMEM; - - slice_name = b; + slice_name = strjoina(escaped, ".slice"); } else slice_name = MANAGER_IS_SYSTEM(u->manager) && !unit_has_name(u, SPECIAL_INIT_SCOPE) @@ -3178,23 +3181,21 @@ bool unit_can_serialize(Unit *u) { return UNIT_VTABLE(u)->serialize && UNIT_VTABLE(u)->deserialize_item; } -static int unit_serialize_cgroup_mask(FILE *f, const char *key, CGroupMask mask) { +static int serialize_cgroup_mask(FILE *f, const char *key, CGroupMask mask) { _cleanup_free_ char *s = NULL; - int r = 0; + int r; assert(f); assert(key); - if (mask != 0) { - r = cg_mask_to_string(mask, &s); - if (r >= 0) { - fputs(key, f); - fputc('=', f); - fputs(s, f); - fputc('\n', f); - } - } - return r; + if (mask == 0) + return 0; + + r = cg_mask_to_string(mask, &s); + if (r < 0) + return log_error_errno(r, "Failed to format cgroup mask: %m"); + + return serialize_item(f, key, s); } static const char *ip_accounting_metric_field[_CGROUP_IP_ACCOUNTING_METRIC_MAX] = { @@ -3218,46 +3219,50 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) { return r; } - dual_timestamp_serialize(f, "state-change-timestamp", &u->state_change_timestamp); + (void) serialize_dual_timestamp(f, "state-change-timestamp", &u->state_change_timestamp); - dual_timestamp_serialize(f, "inactive-exit-timestamp", &u->inactive_exit_timestamp); - dual_timestamp_serialize(f, "active-enter-timestamp", &u->active_enter_timestamp); - dual_timestamp_serialize(f, "active-exit-timestamp", &u->active_exit_timestamp); - dual_timestamp_serialize(f, "inactive-enter-timestamp", &u->inactive_enter_timestamp); + (void) serialize_dual_timestamp(f, "inactive-exit-timestamp", &u->inactive_exit_timestamp); + (void) serialize_dual_timestamp(f, "active-enter-timestamp", &u->active_enter_timestamp); + (void) serialize_dual_timestamp(f, "active-exit-timestamp", &u->active_exit_timestamp); + (void) serialize_dual_timestamp(f, "inactive-enter-timestamp", &u->inactive_enter_timestamp); - dual_timestamp_serialize(f, "condition-timestamp", &u->condition_timestamp); - dual_timestamp_serialize(f, "assert-timestamp", &u->assert_timestamp); + (void) serialize_dual_timestamp(f, "condition-timestamp", &u->condition_timestamp); + (void) serialize_dual_timestamp(f, "assert-timestamp", &u->assert_timestamp); if (dual_timestamp_is_set(&u->condition_timestamp)) - unit_serialize_item(u, f, "condition-result", yes_no(u->condition_result)); + (void) serialize_bool(f, "condition-result", u->condition_result); if (dual_timestamp_is_set(&u->assert_timestamp)) - unit_serialize_item(u, f, "assert-result", yes_no(u->assert_result)); + (void) serialize_bool(f, "assert-result", u->assert_result); - unit_serialize_item(u, f, "transient", yes_no(u->transient)); + (void) serialize_bool(f, "transient", u->transient); + (void) serialize_bool(f, "in-audit", u->in_audit); - unit_serialize_item(u, f, "exported-invocation-id", yes_no(u->exported_invocation_id)); - unit_serialize_item(u, f, "exported-log-level-max", yes_no(u->exported_log_level_max)); - unit_serialize_item(u, f, "exported-log-extra-fields", yes_no(u->exported_log_extra_fields)); + (void) serialize_bool(f, "exported-invocation-id", u->exported_invocation_id); + (void) serialize_bool(f, "exported-log-level-max", u->exported_log_level_max); + (void) serialize_bool(f, "exported-log-extra-fields", u->exported_log_extra_fields); + (void) serialize_bool(f, "exported-log-rate-limit-interval", u->exported_log_rate_limit_interval); + (void) serialize_bool(f, "exported-log-rate-limit-burst", u->exported_log_rate_limit_burst); - unit_serialize_item_format(u, f, "cpu-usage-base", "%" PRIu64, u->cpu_usage_base); + (void) serialize_item_format(f, "cpu-usage-base", "%" PRIu64, u->cpu_usage_base); if (u->cpu_usage_last != NSEC_INFINITY) - unit_serialize_item_format(u, f, "cpu-usage-last", "%" PRIu64, u->cpu_usage_last); + (void) serialize_item_format(f, "cpu-usage-last", "%" PRIu64, u->cpu_usage_last); if (u->cgroup_path) - unit_serialize_item(u, f, "cgroup", u->cgroup_path); - unit_serialize_item(u, f, "cgroup-realized", yes_no(u->cgroup_realized)); - (void) unit_serialize_cgroup_mask(f, "cgroup-realized-mask", u->cgroup_realized_mask); - (void) unit_serialize_cgroup_mask(f, "cgroup-enabled-mask", u->cgroup_enabled_mask); - unit_serialize_item_format(u, f, "cgroup-bpf-realized", "%i", u->cgroup_bpf_state); + (void) serialize_item(f, "cgroup", u->cgroup_path); + + (void) serialize_bool(f, "cgroup-realized", u->cgroup_realized); + (void) serialize_cgroup_mask(f, "cgroup-realized-mask", u->cgroup_realized_mask); + (void) serialize_cgroup_mask(f, "cgroup-enabled-mask", u->cgroup_enabled_mask); + (void) serialize_cgroup_mask(f, "cgroup-invalidated-mask", u->cgroup_invalidated_mask); if (uid_is_valid(u->ref_uid)) - unit_serialize_item_format(u, f, "ref-uid", UID_FMT, u->ref_uid); + (void) serialize_item_format(f, "ref-uid", UID_FMT, u->ref_uid); if (gid_is_valid(u->ref_gid)) - unit_serialize_item_format(u, f, "ref-gid", GID_FMT, u->ref_gid); + (void) serialize_item_format(f, "ref-gid", GID_FMT, u->ref_gid); if (!sd_id128_is_null(u->invocation_id)) - unit_serialize_item_format(u, f, "invocation-id", SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)); + (void) serialize_item_format(f, "invocation-id", SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)); bus_track_serialize(u->bus_track, f, "ref"); @@ -3266,17 +3271,17 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) { r = unit_get_ip_accounting(u, m, &v); if (r >= 0) - unit_serialize_item_format(u, f, ip_accounting_metric_field[m], "%" PRIu64, v); + (void) serialize_item_format(f, ip_accounting_metric_field[m], "%" PRIu64, v); } if (serialize_jobs) { if (u->job) { - fprintf(f, "job\n"); + fputs("job\n", f); job_serialize(u->job, f); } if (u->nop_job) { - fprintf(f, "job\n"); + fputs("job\n", f); job_serialize(u->nop_job, f); } } @@ -3286,78 +3291,27 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) { return 0; } -int unit_serialize_item(Unit *u, FILE *f, const char *key, const char *value) { - assert(u); - assert(f); - assert(key); - - if (!value) - return 0; - - fputs(key, f); - fputc('=', f); - fputs(value, f); - fputc('\n', f); - - return 1; -} - -int unit_serialize_item_escaped(Unit *u, FILE *f, const char *key, const char *value) { - _cleanup_free_ char *c = NULL; - - assert(u); - assert(f); - assert(key); - - if (!value) - return 0; - - c = cescape(value); - if (!c) - return -ENOMEM; - - fputs(key, f); - fputc('=', f); - fputs(c, f); - fputc('\n', f); - - return 1; -} - -int unit_serialize_item_fd(Unit *u, FILE *f, FDSet *fds, const char *key, int fd) { - int copy; +static int unit_deserialize_job(Unit *u, FILE *f) { + _cleanup_(job_freep) Job *j = NULL; + int r; assert(u); assert(f); - assert(key); - - if (fd < 0) - return 0; - copy = fdset_put_dup(fds, fd); - if (copy < 0) - return copy; + j = job_new_raw(u); + if (!j) + return log_oom(); - fprintf(f, "%s=%i\n", key, copy); - return 1; -} - -void unit_serialize_item_format(Unit *u, FILE *f, const char *key, const char *format, ...) { - va_list ap; - - assert(u); - assert(f); - assert(key); - assert(format); - - fputs(key, f); - fputc('=', f); + r = job_deserialize(j, f); + if (r < 0) + return r; - va_start(ap, format); - vfprintf(f, format, ap); - va_end(ap); + r = job_install_deserialized(j); + if (r < 0) + return r; - fputc('\n', f); + TAKE_PTR(j); + return 0; } int unit_deserialize(Unit *u, FILE *f, FDSet *fds) { @@ -3368,21 +3322,19 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) { assert(fds); for (;;) { - char line[LINE_MAX], *l, *v; + _cleanup_free_ char *line = NULL; CGroupIPAccountingMetric m; + char *l, *v; size_t k; - if (!fgets(line, sizeof(line), f)) { - if (feof(f)) - return 0; - return -errno; - } + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return log_error_errno(r, "Failed to read serialization line: %m"); + if (r == 0) /* eof */ + break; - char_array_0(line); l = strstrip(line); - - /* End marker */ - if (isempty(l)) + if (isempty(l)) /* End marker */ break; k = strcspn(l, "="); @@ -3395,54 +3347,33 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) { if (streq(l, "job")) { if (v[0] == '\0') { - /* new-style serialized job */ - Job *j; - - j = job_new_raw(u); - if (!j) - return log_oom(); - - r = job_deserialize(j, f); - if (r < 0) { - job_free(j); - return r; - } - - r = hashmap_put(u->manager->jobs, UINT32_TO_PTR(j->id), j); - if (r < 0) { - job_free(j); - return r; - } - - r = job_install_deserialized(j); - if (r < 0) { - hashmap_remove(u->manager->jobs, UINT32_TO_PTR(j->id)); - job_free(j); + /* New-style serialized job */ + r = unit_deserialize_job(u, f); + if (r < 0) return r; - } - } else /* legacy for pre-44 */ + } else /* Legacy for pre-44 */ log_unit_warning(u, "Update from too old systemd versions are unsupported, cannot deserialize job: %s", v); continue; } else if (streq(l, "state-change-timestamp")) { - dual_timestamp_deserialize(v, &u->state_change_timestamp); + (void) deserialize_dual_timestamp(v, &u->state_change_timestamp); continue; } else if (streq(l, "inactive-exit-timestamp")) { - dual_timestamp_deserialize(v, &u->inactive_exit_timestamp); + (void) deserialize_dual_timestamp(v, &u->inactive_exit_timestamp); continue; } else if (streq(l, "active-enter-timestamp")) { - dual_timestamp_deserialize(v, &u->active_enter_timestamp); + (void) deserialize_dual_timestamp(v, &u->active_enter_timestamp); continue; } else if (streq(l, "active-exit-timestamp")) { - dual_timestamp_deserialize(v, &u->active_exit_timestamp); + (void) deserialize_dual_timestamp(v, &u->active_exit_timestamp); continue; } else if (streq(l, "inactive-enter-timestamp")) { - dual_timestamp_deserialize(v, &u->inactive_enter_timestamp); + (void) deserialize_dual_timestamp(v, &u->inactive_enter_timestamp); continue; } else if (streq(l, "condition-timestamp")) { - dual_timestamp_deserialize(v, &u->condition_timestamp); + (void) deserialize_dual_timestamp(v, &u->condition_timestamp); continue; } else if (streq(l, "assert-timestamp")) { - dual_timestamp_deserialize(v, &u->assert_timestamp); + (void) deserialize_dual_timestamp(v, &u->assert_timestamp); continue; } else if (streq(l, "condition-result")) { @@ -3474,6 +3405,16 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) { continue; + } else if (streq(l, "in-audit")) { + + r = parse_boolean(v); + if (r < 0) + log_unit_debug(u, "Failed to parse in-audit bool %s, ignoring.", v); + else + u->in_audit = r; + + continue; + } else if (streq(l, "exported-invocation-id")) { r = parse_boolean(v); @@ -3504,6 +3445,26 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) { continue; + } else if (streq(l, "exported-log-rate-limit-interval")) { + + r = parse_boolean(v); + if (r < 0) + log_unit_debug(u, "Failed to parse exported log rate limit interval %s, ignoring.", v); + else + u->exported_log_rate_limit_interval = r; + + continue; + + } else if (streq(l, "exported-log-rate-limit-burst")) { + + r = parse_boolean(v); + if (r < 0) + log_unit_debug(u, "Failed to parse exported log rate limit burst %s, ignoring.", v); + else + u->exported_log_rate_limit_burst = r; + + continue; + } else if (STR_IN_SET(l, "cpu-usage-base", "cpuacct-usage-base")) { r = safe_atou64(v, &u->cpu_usage_base); @@ -3554,18 +3515,11 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) { log_unit_debug(u, "Failed to parse cgroup-enabled-mask %s, ignoring.", v); continue; - } else if (streq(l, "cgroup-bpf-realized")) { - int i; + } else if (streq(l, "cgroup-invalidated-mask")) { - r = safe_atoi(v, &i); + r = cg_mask_from_string(v, &u->cgroup_invalidated_mask); if (r < 0) - log_unit_debug(u, "Failed to parse cgroup BPF state %s, ignoring.", v); - else - u->cgroup_bpf_state = - i < 0 ? UNIT_CGROUP_BPF_INVALIDATED : - i > 0 ? UNIT_CGROUP_BPF_ON : - UNIT_CGROUP_BPF_OFF; - + log_unit_debug(u, "Failed to parse cgroup-invalidated-mask %s, ignoring.", v); continue; } else if (streq(l, "ref-uid")) { @@ -3588,11 +3542,13 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) { else unit_ref_uid_gid(u, UID_INVALID, gid); + continue; + } else if (streq(l, "ref")) { r = strv_extend(&u->deserialized_refs, v); if (r < 0) - log_oom(); + return log_oom(); continue; } else if (streq(l, "invocation-id")) { @@ -3657,23 +3613,27 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) { return 0; } -void unit_deserialize_skip(FILE *f) { +int unit_deserialize_skip(FILE *f) { + int r; assert(f); /* Skip serialized data for this unit. We don't know what it is. */ for (;;) { - char line[LINE_MAX], *l; + _cleanup_free_ char *line = NULL; + char *l; - if (!fgets(line, sizeof line, f)) - return; + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return log_error_errno(r, "Failed to read serialization line: %m"); + if (r == 0) + return 0; - char_array_0(line); l = strstrip(line); /* End marker */ if (isempty(l)) - return; + return 1; } } @@ -4143,12 +4103,28 @@ int unit_patch_contexts(Unit *u) { } cc = unit_get_cgroup_context(u); - if (cc) { + if (cc && ec) { - if (ec && - ec->private_devices && + if (ec->private_devices && cc->device_policy == CGROUP_AUTO) cc->device_policy = CGROUP_CLOSED; + + if (ec->root_image && + (cc->device_policy != CGROUP_AUTO || cc->device_allow)) { + + /* When RootImage= is specified, the following devices are touched. */ + r = cgroup_add_device_allow(cc, "/dev/loop-control", "rw"); + if (r < 0) + return r; + + r = cgroup_add_device_allow(cc, "block-loop", "rwm"); + if (r < 0) + return r; + + r = cgroup_add_device_allow(cc, "block-blkext", "rwm"); + if (r < 0) + return r; + } } return 0; @@ -4479,10 +4455,10 @@ static int operation_to_signal(KillContext *c, KillOperation k) { return c->kill_signal; case KILL_KILL: - return SIGKILL; + return c->final_kill_signal; - case KILL_ABORT: - return SIGABRT; + case KILL_WATCHDOG: + return c->watchdog_signal; default: assert_not_reached("KillOperation unknown"); @@ -4609,7 +4585,6 @@ int unit_kill_context( int unit_require_mounts_for(Unit *u, const char *path, UnitDependencyMask mask) { _cleanup_free_ char *p = NULL; - char *prefix; UnitDependencyInfo di; int r; @@ -4649,7 +4624,7 @@ int unit_require_mounts_for(Unit *u, const char *path, UnitDependencyMask mask) return r; p = NULL; - prefix = alloca(strlen(path) + 1); + char prefix[strlen(path) + 1]; PATH_FOREACH_PREFIX_MORE(prefix, path) { Set *x; @@ -4766,7 +4741,7 @@ void unit_warn_if_dir_nonempty(Unit *u, const char* where) { } int unit_fail_if_noncanonical(Unit *u, const char* where) { - _cleanup_free_ char *canonical_where; + _cleanup_free_ char *canonical_where = NULL; int r; assert(u); @@ -4966,7 +4941,7 @@ void unit_notify_user_lookup(Unit *u, uid_t uid, gid_t gid) { r = unit_ref_uid_gid(u, uid, gid); if (r > 0) - bus_unit_send_change_signal(u); + unit_add_to_dbus_queue(u); } int unit_set_invocation_id(Unit *u, sd_id128_t id) { @@ -5020,15 +4995,21 @@ int unit_acquire_invocation_id(Unit *u) { if (r < 0) return log_unit_error_errno(u, r, "Failed to set invocation ID for unit: %m"); + unit_add_to_dbus_queue(u); return 0; } -void unit_set_exec_params(Unit *u, ExecParameters *p) { +int unit_set_exec_params(Unit *u, ExecParameters *p) { + int r; + assert(u); assert(p); /* Copy parameters from manager */ - p->environment = u->manager->environment; + r = manager_get_effective_environment(u->manager, &p->environment); + if (r < 0) + return r; + p->confirm_spawn = manager_get_confirm_spawn(u->manager); p->cgroup_supported = u->manager->cgroup_supported; p->prefix = u->manager->prefix; @@ -5037,6 +5018,8 @@ void unit_set_exec_params(Unit *u, ExecParameters *p) { /* Copy paramaters from unit */ p->cgroup_path = u->cgroup_path; SET_FLAG(p->flags, EXEC_CGROUP_DELEGATE, unit_cgroup_delegate(u)); + + return 0; } int unit_fork_helper_process(Unit *u, const char *name, pid_t *ret) { @@ -5242,6 +5225,60 @@ fail: return r; } +static int unit_export_log_rate_limit_interval(Unit *u, const ExecContext *c) { + _cleanup_free_ char *buf = NULL; + const char *p; + int r; + + assert(u); + assert(c); + + if (u->exported_log_rate_limit_interval) + return 0; + + if (c->log_rate_limit_interval_usec == 0) + return 0; + + p = strjoina("/run/systemd/units/log-rate-limit-interval:", u->id); + + if (asprintf(&buf, "%" PRIu64, c->log_rate_limit_interval_usec) < 0) + return log_oom(); + + r = symlink_atomic(buf, p); + if (r < 0) + return log_unit_debug_errno(u, r, "Failed to create log rate limit interval symlink %s: %m", p); + + u->exported_log_rate_limit_interval = true; + return 0; +} + +static int unit_export_log_rate_limit_burst(Unit *u, const ExecContext *c) { + _cleanup_free_ char *buf = NULL; + const char *p; + int r; + + assert(u); + assert(c); + + if (u->exported_log_rate_limit_burst) + return 0; + + if (c->log_rate_limit_burst == 0) + return 0; + + p = strjoina("/run/systemd/units/log-rate-limit-burst:", u->id); + + if (asprintf(&buf, "%u", c->log_rate_limit_burst) < 0) + return log_oom(); + + r = symlink_atomic(buf, p); + if (r < 0) + return log_unit_debug_errno(u, r, "Failed to create log rate limit burst symlink %s: %m", p); + + u->exported_log_rate_limit_burst = true; + return 0; +} + void unit_export_state_files(Unit *u) { const ExecContext *c; @@ -5253,7 +5290,7 @@ void unit_export_state_files(Unit *u) { if (!MANAGER_IS_SYSTEM(u->manager)) return; - if (u->manager->test_run_flags != 0) + if (MANAGER_IS_TEST_RUN(u->manager)) return; /* Exports a couple of unit properties to /run/systemd/units/, so that journald can quickly query this data @@ -5275,6 +5312,8 @@ void unit_export_state_files(Unit *u) { if (c) { (void) unit_export_log_level_max(u, c); (void) unit_export_log_extra_fields(u, c); + (void) unit_export_log_rate_limit_interval(u, c); + (void) unit_export_log_rate_limit_burst(u, c); } } @@ -5311,6 +5350,20 @@ void unit_unlink_state_files(Unit *u) { u->exported_log_extra_fields = false; } + + if (u->exported_log_rate_limit_interval) { + p = strjoina("/run/systemd/units/log-rate-limit-interval:", u->id); + (void) unlink(p); + + u->exported_log_rate_limit_interval = false; + } + + if (u->exported_log_rate_limit_burst) { + p = strjoina("/run/systemd/units/log-rate-limit-burst:", u->id); + (void) unlink(p); + + u->exported_log_rate_limit_burst = false; + } } int unit_prepare_exec(Unit *u) { @@ -5433,6 +5486,105 @@ int unit_pid_attachable(Unit *u, pid_t pid, sd_bus_error *error) { return 0; } +void unit_log_success(Unit *u) { + assert(u); + + log_struct(LOG_INFO, + "MESSAGE_ID=" SD_MESSAGE_UNIT_SUCCESS_STR, + LOG_UNIT_ID(u), + LOG_UNIT_INVOCATION_ID(u), + LOG_UNIT_MESSAGE(u, "Succeeded.")); +} + +void unit_log_failure(Unit *u, const char *result) { + assert(u); + assert(result); + + log_struct(LOG_WARNING, + "MESSAGE_ID=" SD_MESSAGE_UNIT_FAILURE_RESULT_STR, + LOG_UNIT_ID(u), + LOG_UNIT_INVOCATION_ID(u), + LOG_UNIT_MESSAGE(u, "Failed with result '%s'.", result), + "UNIT_RESULT=%s", result); +} + +void unit_log_process_exit( + Unit *u, + int level, + const char *kind, + const char *command, + int code, + int status) { + + assert(u); + assert(kind); + + if (code != CLD_EXITED) + level = LOG_WARNING; + + log_struct(level, + "MESSAGE_ID=" SD_MESSAGE_UNIT_PROCESS_EXIT_STR, + LOG_UNIT_MESSAGE(u, "%s exited, code=%s, status=%i/%s", + kind, + sigchld_code_to_string(code), status, + strna(code == CLD_EXITED + ? exit_status_to_string(status, EXIT_STATUS_FULL) + : signal_to_string(status))), + "EXIT_CODE=%s", sigchld_code_to_string(code), + "EXIT_STATUS=%i", status, + "COMMAND=%s", strna(command), + LOG_UNIT_ID(u), + LOG_UNIT_INVOCATION_ID(u)); +} + +int unit_exit_status(Unit *u) { + assert(u); + + /* Returns the exit status to propagate for the most recent cycle of this unit. Returns a value in the range + * 0…255 if there's something to propagate. EOPNOTSUPP if the concept does not apply to this unit type, ENODATA + * if no data is currently known (for example because the unit hasn't deactivated yet) and EBADE if the main + * service process has exited abnormally (signal/coredump). */ + + if (!UNIT_VTABLE(u)->exit_status) + return -EOPNOTSUPP; + + return UNIT_VTABLE(u)->exit_status(u); +} + +int unit_failure_action_exit_status(Unit *u) { + int r; + + assert(u); + + /* Returns the exit status to propagate on failure, or an error if there's nothing to propagate */ + + if (u->failure_action_exit_status >= 0) + return u->failure_action_exit_status; + + r = unit_exit_status(u); + if (r == -EBADE) /* Exited, but not cleanly (i.e. by signal or such) */ + return 255; + + return r; +} + +int unit_success_action_exit_status(Unit *u) { + int r; + + assert(u); + + /* Returns the exit status to propagate on success, or an error if there's nothing to propagate */ + + if (u->success_action_exit_status >= 0) + return u->success_action_exit_status; + + r = unit_exit_status(u); + if (r == -EBADE) /* Exited, but not cleanly (i.e. by signal or such) */ + return 255; + + return r; +} + static const char* const collect_mode_table[_COLLECT_MODE_MAX] = { [COLLECT_INACTIVE] = "inactive", [COLLECT_INACTIVE_OR_FAILED] = "inactive-or-failed", diff --git a/src/core/unit.h b/src/core/unit.h index b3131eba1b..6fd39eaca3 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -19,7 +19,7 @@ typedef enum KillOperation { KILL_TERMINATE, KILL_TERMINATE_AND_LOG, KILL_KILL, - KILL_ABORT, + KILL_WATCHDOG, _KILL_OPERATION_MAX, _KILL_OPERATION_INVALID = -1 } KillOperation; @@ -105,12 +105,6 @@ struct UnitRef { LIST_FIELDS(UnitRef, refs_by_target); }; -typedef enum UnitCGroupBPFState { - UNIT_CGROUP_BPF_OFF = 0, - UNIT_CGROUP_BPF_ON = 1, - UNIT_CGROUP_BPF_INVALIDATED = -1, -} UnitCGroupBPFState; - typedef struct Unit { Manager *manager; @@ -188,9 +182,6 @@ typedef struct Unit { /* Per type list */ LIST_FIELDS(Unit, units_by_type); - /* All units which have requires_mounts_for set */ - LIST_FIELDS(Unit, has_requires_mounts_for); - /* Load queue */ LIST_FIELDS(Unit, load_queue); @@ -212,6 +203,9 @@ typedef struct Unit { /* Target dependencies queue */ LIST_FIELDS(Unit, target_deps_queue); + /* Queue of units with StopWhenUnneeded set that shell be checked for clean-up. */ + LIST_FIELDS(Unit, stop_when_unneeded_queue); + /* PIDs we keep an eye on. Note that a unit might have many * more, but these are the ones we care enough about to * process SIGCHLD for */ @@ -232,8 +226,9 @@ typedef struct Unit { RateLimit start_limit; EmergencyAction start_limit_action; - EmergencyAction failure_action; - EmergencyAction success_action; + /* What to do on failure or success */ + EmergencyAction success_action, failure_action; + int success_action_exit_status, failure_action_exit_status; char *reboot_arg; /* Make sure we never enter endless loops with the check unneeded logic, or the BindsTo= logic */ @@ -253,12 +248,15 @@ typedef struct Unit { /* Counterparts in the cgroup filesystem */ char *cgroup_path; - CGroupMask cgroup_realized_mask; - CGroupMask cgroup_enabled_mask; - CGroupMask cgroup_subtree_mask; - CGroupMask cgroup_members_mask; + CGroupMask cgroup_realized_mask; /* In which hierarchies does this unit's cgroup exist? (only relevant on cgroupsv1) */ + CGroupMask cgroup_enabled_mask; /* Which controllers are enabled (or more correctly: enabled for the children) for this unit's cgroup? (only relevant on cgroupsv2) */ + CGroupMask cgroup_invalidated_mask; /* A mask specifiying controllers which shall be considered invalidated, and require re-realization */ + CGroupMask cgroup_members_mask; /* A cache for the controllers required by all children of this cgroup (only relevant for slice units) */ int cgroup_inotify_wd; + /* Device Controller BPF program */ + BPFProgram *bpf_device_control_installed; + /* IP BPF Firewalling/accounting */ int ip_accounting_ingress_map_fd; int ip_accounting_egress_map_fd; @@ -315,6 +313,7 @@ typedef struct Unit { /* Is this a unit that is always running and cannot be stopped? */ bool perpetual; + /* Booleans indicating membership of this unit in the various queues */ bool in_load_queue:1; bool in_dbus_queue:1; bool in_cleanup_queue:1; @@ -322,6 +321,7 @@ typedef struct Unit { bool in_cgroup_realize_queue:1; bool in_cgroup_empty_queue:1; bool in_target_deps_queue:1; + bool in_stop_when_unneeded_queue:1; bool sent_dbus_new_signal:1; @@ -330,9 +330,6 @@ typedef struct Unit { bool cgroup_realized:1; bool cgroup_members_mask_valid:1; - bool cgroup_subtree_mask_valid:1; - - UnitCGroupBPFState cgroup_bpf_state:2; /* Reset cgroup accounting next time we fork something off */ bool reset_accounting:1; @@ -349,10 +346,12 @@ typedef struct Unit { bool exported_invocation_id:1; bool exported_log_level_max:1; bool exported_log_extra_fields:1; + bool exported_log_rate_limit_interval:1; + bool exported_log_rate_limit_burst:1; /* When writing transient unit files, stores which section we stored last. If < 0, we didn't write any yet. If * == 0 we are in the [Unit] section, if > 0 we are in the unit type-specific section. */ - int last_section_private:2; + signed int last_section_private:2; } Unit; typedef struct UnitStatusMessageFormats { @@ -380,7 +379,9 @@ typedef enum UnitWriteFlags { } UnitWriteFlags; /* Returns true if neither persistent, nor runtime storage is requested, i.e. this is a check invocation only */ -#define UNIT_WRITE_FLAGS_NOOP(flags) (((flags) & (UNIT_RUNTIME|UNIT_PERSISTENT)) == 0) +static inline bool UNIT_WRITE_FLAGS_NOOP(UnitWriteFlags flags) { + return (flags & (UNIT_RUNTIME|UNIT_PERSISTENT)) == 0; +} #include "kill.h" @@ -432,11 +433,16 @@ typedef struct UnitVTable { int (*load)(Unit *u); /* During deserialization we only record the intended state to return to. With coldplug() we actually put the - * deserialized state in effect. This is where unit_notify() should be called to start things up. */ + * deserialized state in effect. This is where unit_notify() should be called to start things up. Note that + * this callback is invoked *before* we leave the reloading state of the manager, i.e. *before* we consider the + * reloading to be complete. Thus, this callback should just restore the exact same state for any unit that was + * in effect before the reload, i.e. units should not catch up with changes happened during the reload. That's + * what catchup() below is for. */ int (*coldplug)(Unit *u); - /* This is called shortly after all units' coldplug() call was invoked. It's supposed to catch up state changes - * we missed so far (for example because they took place while we were reloading/reexecing) */ + /* This is called shortly after all units' coldplug() call was invoked, and *after* the manager left the + * reloading state. It's supposed to catch up with state changes due to external events we missed so far (for + * example because they took place while we were reloading/reexecing) */ void (*catchup)(Unit *u); void (*dump)(Unit *u, FILE *f, const char *prefix); @@ -529,6 +535,10 @@ typedef struct UnitVTable { /* Returns true if the unit currently needs access to the console */ bool (*needs_console)(Unit *u); + /* Returns the exit status to propagate in case of FailureAction=exit/SuccessAction=exit; usually returns the + * exit code of the "main" process of the service or similar. */ + int (*exit_status)(Unit *u); + /* Like the enumerate() callback further down, but only enumerates the perpetual units, i.e. all units that * unconditionally exist and are always active. The main reason to keep both enumeration functions separate is * philosophical: the state of perpetual units should be put in place by coldplug(), while the state of those @@ -568,7 +578,9 @@ typedef struct UnitVTable { extern const UnitVTable * const unit_vtable[_UNIT_TYPE_MAX]; -#define UNIT_VTABLE(u) unit_vtable[(u)->type] +static inline const UnitVTable* UNIT_VTABLE(Unit *u) { + return unit_vtable[u->type]; +} /* For casting a unit into the various unit types */ #define DEFINE_CAST(UPPERCASE, MixedCase) \ @@ -580,13 +592,20 @@ extern const UnitVTable * const unit_vtable[_UNIT_TYPE_MAX]; } /* For casting the various unit types into a unit */ -#define UNIT(u) (&(u)->meta) +#define UNIT(u) \ + ({ \ + typeof(u) _u_ = (u); \ + Unit *_w_ = _u_ ? &(_u_)->meta : NULL; \ + _w_; \ + }) #define UNIT_HAS_EXEC_CONTEXT(u) (UNIT_VTABLE(u)->exec_context_offset > 0) #define UNIT_HAS_CGROUP_CONTEXT(u) (UNIT_VTABLE(u)->cgroup_context_offset > 0) #define UNIT_HAS_KILL_CONTEXT(u) (UNIT_VTABLE(u)->kill_context_offset > 0) -#define UNIT_TRIGGER(u) ((Unit*) hashmap_first_key((u)->dependencies[UNIT_TRIGGERS])) +static inline Unit* UNIT_TRIGGER(Unit *u) { + return hashmap_first_key(u->dependencies[UNIT_TRIGGERS]); +} Unit *unit_new(Manager *m, size_t size); void unit_free(Unit *u); @@ -598,8 +617,8 @@ int unit_add_name(Unit *u, const char *name); int unit_add_dependency(Unit *u, UnitDependency d, Unit *other, bool add_reference, UnitDependencyMask mask); int unit_add_two_dependencies(Unit *u, UnitDependency d, UnitDependency e, Unit *other, bool add_reference, UnitDependencyMask mask); -int unit_add_dependency_by_name(Unit *u, UnitDependency d, const char *name, const char *filename, bool add_reference, UnitDependencyMask mask); -int unit_add_two_dependencies_by_name(Unit *u, UnitDependency d, UnitDependency e, const char *name, const char *path, bool add_reference, UnitDependencyMask mask); +int unit_add_dependency_by_name(Unit *u, UnitDependency d, const char *name, bool add_reference, UnitDependencyMask mask); +int unit_add_two_dependencies_by_name(Unit *u, UnitDependency d, UnitDependency e, const char *name, bool add_reference, UnitDependencyMask mask); int unit_add_exec_dependencies(Unit *u, ExecContext *c); @@ -613,6 +632,7 @@ void unit_add_to_dbus_queue(Unit *u); void unit_add_to_cleanup_queue(Unit *u); void unit_add_to_gc_queue(Unit *u); void unit_add_to_target_deps_queue(Unit *u); +void unit_submit_to_stop_when_unneeded_queue(Unit *u); int unit_merge(Unit *u, Unit *other); int unit_merge_by_name(Unit *u, const char *other); @@ -628,7 +648,7 @@ int unit_set_default_slice(Unit *u); const char *unit_description(Unit *u) _pure_; -bool unit_has_name(Unit *u, const char *name); +bool unit_has_name(const Unit *u, const char *name); UnitActiveState unit_active_state(Unit *u); @@ -679,12 +699,7 @@ bool unit_can_serialize(Unit *u) _pure_; int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs); int unit_deserialize(Unit *u, FILE *f, FDSet *fds); -void unit_deserialize_skip(FILE *f); - -int unit_serialize_item(Unit *u, FILE *f, const char *key, const char *value); -int unit_serialize_item_escaped(Unit *u, FILE *f, const char *key, const char *value); -int unit_serialize_item_fd(Unit *u, FILE *f, FDSet *fds, const char *key, int fd); -void unit_serialize_item_format(Unit *u, FILE *f, const char *key, const char *value, ...) _printf_(4,5); +int unit_deserialize_skip(FILE *f); int unit_add_node_dependency(Unit *u, const char *what, bool wants, UnitDependency d, UnitDependencyMask mask); @@ -692,7 +707,6 @@ int unit_coldplug(Unit *u); void unit_catchup(Unit *u); void unit_status_printf(Unit *u, const char *status, const char *unit_status_msg_format) _printf_(3, 0); -void unit_status_emit_starting_stopping_reloading(Unit *u, JobType t); bool unit_need_daemon_reload(Unit *u); @@ -749,6 +763,8 @@ bool unit_type_supported(UnitType t); bool unit_is_pristine(Unit *u); +bool unit_is_unneeded(Unit *u); + pid_t unit_control_pid(Unit *u); pid_t unit_main_pid(Unit *u); @@ -777,7 +793,7 @@ int unit_acquire_invocation_id(Unit *u); bool unit_shall_confirm_spawn(Unit *u); -void unit_set_exec_params(Unit *s, ExecParameters *p); +int unit_set_exec_params(Unit *s, ExecParameters *p); int unit_fork_helper_process(Unit *u, const char *name, pid_t *ret); @@ -796,6 +812,21 @@ const char *unit_label_path(Unit *u); int unit_pid_attachable(Unit *unit, pid_t pid, sd_bus_error *error); +void unit_log_success(Unit *u); +void unit_log_failure(Unit *u, const char *result); +static inline void unit_log_result(Unit *u, bool success, const char *result) { + if (success) + unit_log_success(u); + else + unit_log_failure(u, result); +} + +void unit_log_process_exit(Unit *u, int level, const char *kind, const char *command, int code, int status); + +int unit_exit_status(Unit *u); +int unit_success_action_exit_status(Unit *u); +int unit_failure_action_exit_status(Unit *u); + /* Macros which append UNIT= or USER_UNIT= to the message */ #define log_unit_full(unit, level, error, ...) \ |