diff options
-rw-r--r-- | src/core/cgroup.c | 145 | ||||
-rw-r--r-- | src/core/cgroup.h | 3 | ||||
-rw-r--r-- | src/core/dbus-manager.c | 21 | ||||
-rw-r--r-- | src/core/dbus-scope.c | 32 | ||||
-rw-r--r-- | src/core/dbus-unit.c | 113 | ||||
-rw-r--r-- | src/core/dbus-unit.h | 1 | ||||
-rw-r--r-- | src/core/org.freedesktop.systemd1.conf | 16 | ||||
-rw-r--r-- | src/core/scope.c | 2 | ||||
-rw-r--r-- | src/core/unit.c | 28 | ||||
-rw-r--r-- | src/core/unit.h | 2 |
10 files changed, 346 insertions, 17 deletions
diff --git a/src/core/cgroup.c b/src/core/cgroup.c index d05e338d11..edb702ce48 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -24,6 +24,7 @@ #include "alloc-util.h" #include "blockdev-util.h" #include "bpf-firewall.h" +#include "bus-error.h" #include "cgroup-util.h" #include "cgroup.h" #include "fd-util.h" @@ -1303,13 +1304,12 @@ void unit_update_cgroup_members_masks(Unit *u) { } } -static const char *migrate_callback(CGroupMask mask, void *userdata) { - Unit *u = userdata; +const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask) { - assert(mask != 0); - assert(u); + /* Returns the realized cgroup path of the specified unit where all specified controllers are available. */ while (u) { + if (u->cgroup_path && u->cgroup_realized && (u->cgroup_realized_mask & mask) == mask) @@ -1321,6 +1321,10 @@ static const char *migrate_callback(CGroupMask mask, void *userdata) { return NULL; } +static const char *migrate_callback(CGroupMask mask, void *userdata) { + return unit_get_realized_cgroup_path(userdata, mask); +} + char *unit_default_cgroup_path(Unit *u) { _cleanup_free_ char *escaped = NULL, *slice = NULL; int r; @@ -1503,19 +1507,142 @@ static int unit_create_cgroup( return 0; } -int unit_attach_pids_to_cgroup(Unit *u) { +static int unit_attach_pid_to_cgroup_via_bus(Unit *u, pid_t pid, const char *suffix_path) { + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + char *pp; int r; + assert(u); - r = unit_realize_cgroup(u); + if (MANAGER_IS_SYSTEM(u->manager)) + return -EINVAL; + + if (!u->manager->system_bus) + return -EIO; + + if (!u->cgroup_path) + return -EINVAL; + + /* Determine this unit's cgroup path relative to our cgroup root */ + pp = path_startswith(u->cgroup_path, u->manager->cgroup_root); + if (!pp) + return -EINVAL; + + pp = strjoina("/", pp, suffix_path); + path_kill_slashes(pp); + + r = sd_bus_call_method(u->manager->system_bus, + "org.freedesktop.systemd1", + "/org/freedesktop/systemd1", + "org.freedesktop.systemd1.Manager", + "AttachProcessesToUnit", + &error, NULL, + "ssau", + NULL /* empty unit name means client's unit, i.e. us */, pp, 1, (uint32_t) pid); if (r < 0) - return r; + return log_unit_debug_errno(u, r, "Failed to attach unit process " PID_FMT " via the bus: %s", pid, bus_error_message(&error, r)); + + return 0; +} + +int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path) { + CGroupMask delegated_mask; + const char *p; + Iterator i; + void *pidp; + int r, q; + + assert(u); + + if (!UNIT_HAS_CGROUP_CONTEXT(u)) + return -EINVAL; + + if (set_isempty(pids)) + return 0; - r = cg_attach_many_everywhere(u->manager->cgroup_supported, u->cgroup_path, u->pids, migrate_callback, u); + r = unit_realize_cgroup(u); if (r < 0) return r; - return 0; + if (isempty(suffix_path)) + p = u->cgroup_path; + else + p = strjoina(u->cgroup_path, "/", suffix_path); + + delegated_mask = unit_get_delegate_mask(u); + + r = 0; + SET_FOREACH(pidp, pids, i) { + pid_t pid = PTR_TO_PID(pidp); + CGroupController c; + + /* First, attach the PID to the main cgroup hierarchy */ + q = cg_attach(SYSTEMD_CGROUP_CONTROLLER, p, pid); + if (q < 0) { + log_unit_debug_errno(u, q, "Couldn't move process " PID_FMT " to requested cgroup '%s': %m", pid, p); + + if (MANAGER_IS_USER(u->manager) && IN_SET(q, -EPERM, -EACCES)) { + int z; + + /* If we are in a user instance, and we can't move the process ourselves due to + * permission problems, let's ask the system instance about it instead. Since it's more + * privileged it might be able to move the process across the leaves of a subtree who's + * top node is not owned by us. */ + + z = unit_attach_pid_to_cgroup_via_bus(u, pid, suffix_path); + if (z < 0) + log_unit_debug_errno(u, z, "Couldn't move process " PID_FMT " to requested cgroup '%s' via the system bus either: %m", pid, p); + else + continue; /* When the bus thing worked via the bus we are fully done for this PID. */ + } + + if (r >= 0) + r = q; /* Remember first error */ + + continue; + } + + q = cg_all_unified(); + if (q < 0) + return q; + if (q > 0) + continue; + + /* In the legacy hierarchy, attach the process to the request cgroup if possible, and if not to the + * innermost realized one */ + + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { + CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); + const char *realized; + + if (!(u->manager->cgroup_supported & bit)) + continue; + + /* If this controller is delegated and realized, honour the caller's request for the cgroup suffix. */ + if (delegated_mask & u->cgroup_realized_mask & bit) { + q = cg_attach(cgroup_controller_to_string(c), p, pid); + if (q >= 0) + continue; /* Success! */ + + log_unit_debug_errno(u, q, "Failed to attach PID " PID_FMT " to requested cgroup %s in controller %s, falling back to unit's cgroup: %m", + pid, p, cgroup_controller_to_string(c)); + } + + /* So this controller is either not delegate or realized, or something else weird happened. In + * that case let's attach the PID at least to the closest cgroup up the tree that is + * realized. */ + realized = unit_get_realized_cgroup_path(u, bit); + if (!realized) + continue; /* Not even realized in the root slice? Then let's not bother */ + + q = cg_attach(cgroup_controller_to_string(c), realized, pid); + if (q < 0) + log_unit_debug_errno(u, q, "Failed to attach PID " PID_FMT " to realized cgroup %s in controller %s, ignoring: %m", + pid, realized, cgroup_controller_to_string(c)); + } + } + + return r; } static void cgroup_xattr_apply(Unit *u) { diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 5113313405..e2e875d1c7 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -167,6 +167,7 @@ bool unit_get_needs_bpf(Unit *u); void unit_update_cgroup_members_masks(Unit *u); +const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask); char *unit_default_cgroup_path(Unit *u); int unit_set_cgroup_path(Unit *u, const char *path); int unit_pick_cgroup_path(Unit *u); @@ -178,7 +179,7 @@ int unit_watch_cgroup(Unit *u); void unit_add_to_cgroup_empty_queue(Unit *u); -int unit_attach_pids_to_cgroup(Unit *u); +int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path); int manager_setup_cgroup(Manager *m); void manager_shutdown_cgroup(Manager *m, bool delete); diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c index 945645c6b1..889779d548 100644 --- a/src/core/dbus-manager.c +++ b/src/core/dbus-manager.c @@ -863,6 +863,26 @@ static int method_get_unit_processes(sd_bus_message *message, void *userdata, sd return bus_unit_method_get_processes(message, u, error); } +static int method_attach_processes_to_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) { + Manager *m = userdata; + const char *name; + Unit *u; + int r; + + assert(message); + assert(m); + + r = sd_bus_message_read(message, "s", &name); + if (r < 0) + return r; + + r = bus_get_unit_by_name(m, message, name, &u, error); + if (r < 0) + return r; + + return bus_unit_method_attach_processes(message, u, error); +} + static int transient_unit_from_message( Manager *m, sd_bus_message *message, @@ -2504,6 +2524,7 @@ const sd_bus_vtable bus_manager_vtable[] = { SD_BUS_METHOD("UnrefUnit", "s", NULL, method_unref_unit, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("StartTransientUnit", "ssa(sv)a(sa(sv))", "o", method_start_transient_unit, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("GetUnitProcesses", "s", "a(sus)", method_get_unit_processes, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD("AttachProcessesToUnit", "ssau", NULL, method_attach_processes_to_unit, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("GetJob", "u", "o", method_get_job, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("GetJobAfter", "u", "a(usssoo)", method_get_job_waiting, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("GetJobBefore", "u", "a(usssoo)", method_get_job_waiting, SD_BUS_VTABLE_UNPRIVILEGED), diff --git a/src/core/dbus-scope.c b/src/core/dbus-scope.c index a0c4a65b33..4d9ced81de 100644 --- a/src/core/dbus-scope.c +++ b/src/core/dbus-scope.c @@ -89,17 +89,39 @@ static int bus_scope_set_transient_property( return bus_set_transient_usec(UNIT(s), name, &s->timeout_stop_usec, message, flags, error); if (streq(name, "PIDs")) { + _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL; unsigned n = 0; - uint32_t pid; r = sd_bus_message_enter_container(message, 'a', "u"); if (r < 0) return r; - while ((r = sd_bus_message_read(message, "u", &pid)) > 0) { + for (;;) { + uint32_t upid; + pid_t pid; - if (pid <= 1) - return -EINVAL; + r = sd_bus_message_read(message, "u", &upid); + if (r < 0) + return r; + if (r == 0) + break; + + if (upid == 0) { + if (!creds) { + r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds); + if (r < 0) + return r; + } + + r = sd_bus_creds_get_pid(creds, &pid); + if (r < 0) + return r; + } else + pid = (uid_t) upid; + + r = unit_pid_attachable(UNIT(s), pid, error); + if (r < 0) + return r; if (!UNIT_WRITE_FLAGS_NOOP(flags)) { r = unit_watch_pid(UNIT(s), pid); @@ -109,8 +131,6 @@ static int bus_scope_set_transient_property( n++; } - if (r < 0) - return r; r = sd_bus_message_exit_container(message); if (r < 0) diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c index 7085eee930..50a5ab9819 100644 --- a/src/core/dbus-unit.c +++ b/src/core/dbus-unit.c @@ -1127,6 +1127,118 @@ static int property_get_ip_counter( return sd_bus_message_append(reply, "t", value); } +int bus_unit_method_attach_processes(sd_bus_message *message, void *userdata, sd_bus_error *error) { + + _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL; + _cleanup_(set_freep) Set *pids = NULL; + Unit *u = userdata; + const char *path; + int r; + + assert(message); + + /* This migrates the processes with the specified PIDs into the cgroup of this unit, optionally below a + * specified cgroup path. Obviously this only works for units that actually maintain a cgroup + * representation. If a process is already in the cgroup no operation is executed – in this case the specified + * subcgroup path has no effect! */ + + r = mac_selinux_unit_access_check(u, message, "start", error); + if (r < 0) + return r; + + r = sd_bus_message_read(message, "s", &path); + if (r < 0) + return r; + + path = empty_to_null(path); + if (path) { + if (!path_is_absolute(path)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Control group path is not absolute: %s", path); + + if (!path_is_normalized(path)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Control group path is not normalized: %s", path); + } + + if (!unit_cgroup_delegate(u)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Process migration not available on non-delegated units."); + + if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(u))) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unit is not active, refusing."); + + r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_EUID|SD_BUS_CREDS_PID, &creds); + if (r < 0) + return r; + + r = sd_bus_message_enter_container(message, 'a', "u"); + if (r < 0) + return r; + for (;;) { + uid_t process_uid, sender_uid; + uint32_t upid; + pid_t pid; + + r = sd_bus_message_read(message, "u", &upid); + if (r < 0) + return r; + if (r == 0) + break; + + if (upid == 0) { + r = sd_bus_creds_get_pid(creds, &pid); + if (r < 0) + return r; + } else + pid = (uid_t) upid; + + /* Filter out duplicates */ + if (set_contains(pids, PID_TO_PTR(pid))) + continue; + + /* Check if this process is suitable for attaching to this unit */ + r = unit_pid_attachable(u, pid, error); + if (r < 0) + return r; + + /* Let's query the sender's UID, so that we can make our security decisions */ + r = sd_bus_creds_get_euid(creds, &sender_uid); + if (r < 0) + return r; + + /* Let's validate security: if the sender is root, then all is OK. If the sender is is any other unit, + * then the process' UID and the target unit's UID have to match the sender's UID */ + if (sender_uid != 0 && sender_uid != getuid()) { + r = get_process_uid(pid, &process_uid); + if (r < 0) + return sd_bus_error_set_errnof(error, r, "Failed to retrieve process UID: %m"); + + if (process_uid != sender_uid) + return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Process " PID_FMT " not owned by client's UID. Refusing.", pid); + if (process_uid != u->ref_uid) + return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Process " PID_FMT " not owned by target unit's UID. Refusing.", pid); + } + + if (!pids) { + pids = set_new(NULL); + if (!pids) + return -ENOMEM; + } + + r = set_put(pids, PID_TO_PTR(pid)); + if (r < 0) + return r; + } + + r = sd_bus_message_exit_container(message); + if (r < 0) + return r; + + r = unit_attach_pids_to_cgroup(u, pids, path); + if (r < 0) + return sd_bus_error_set_errnof(error, r, "Failed to attach processes to control group: %m"); + + return sd_bus_reply_method_return(message, NULL); +} + const sd_bus_vtable bus_unit_cgroup_vtable[] = { SD_BUS_VTABLE_START(0), SD_BUS_PROPERTY("Slice", "s", property_get_slice, 0, 0), @@ -1139,6 +1251,7 @@ const sd_bus_vtable bus_unit_cgroup_vtable[] = { SD_BUS_PROPERTY("IPEgressBytes", "t", property_get_ip_counter, 0, 0), SD_BUS_PROPERTY("IPEgressPackets", "t", property_get_ip_counter, 0, 0), SD_BUS_METHOD("GetProcesses", NULL, "a(sus)", bus_unit_method_get_processes, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD("AttachProcesses", "sau", NULL, bus_unit_method_attach_processes, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_VTABLE_END }; diff --git a/src/core/dbus-unit.h b/src/core/dbus-unit.h index d7066eefc8..bb4e43f5ca 100644 --- a/src/core/dbus-unit.h +++ b/src/core/dbus-unit.h @@ -37,6 +37,7 @@ int bus_unit_method_reset_failed(sd_bus_message *message, void *userdata, sd_bus int bus_unit_set_properties(Unit *u, sd_bus_message *message, UnitWriteFlags flags, bool commit, sd_bus_error *error); int bus_unit_method_set_properties(sd_bus_message *message, void *userdata, sd_bus_error *error); int bus_unit_method_get_processes(sd_bus_message *message, void *userdata, sd_bus_error *error); +int bus_unit_method_attach_processes(sd_bus_message *message, void *userdata, sd_bus_error *error); int bus_unit_method_ref(sd_bus_message *message, void *userdata, sd_bus_error *error); int bus_unit_method_unref(sd_bus_message *message, void *userdata, sd_bus_error *error); diff --git a/src/core/org.freedesktop.systemd1.conf b/src/core/org.freedesktop.systemd1.conf index a97edac4ac..645c8f1659 100644 --- a/src/core/org.freedesktop.systemd1.conf +++ b/src/core/org.freedesktop.systemd1.conf @@ -236,6 +236,10 @@ <allow send_destination="org.freedesktop.systemd1" send_interface="org.freedesktop.systemd1.Manager" + send_member="AttachProcessesToUnit"/> + + <allow send_destination="org.freedesktop.systemd1" + send_interface="org.freedesktop.systemd1.Manager" send_member="CancelJob"/> <allow send_destination="org.freedesktop.systemd1" @@ -366,6 +370,18 @@ send_interface="org.freedesktop.systemd1.Unit" send_member="Unref"/> + <!-- Managed via polkit or other criteria: org.freedesktop.systemd1.Service interface --> + + <allow send_destination="org.freedesktop.systemd1" + send_interface="org.freedesktop.systemd1.Service" + send_member="AttachProcesses"/> + + <!-- Managed via polkit or other criteria: org.freedesktop.systemd1.Scope interface --> + + <allow send_destination="org.freedesktop.systemd1" + send_interface="org.freedesktop.systemd1.Scope" + send_member="AttachProcesses"/> + <allow receive_sender="org.freedesktop.systemd1"/> </policy> diff --git a/src/core/scope.c b/src/core/scope.c index 47c47bc708..5b9c2bb3c4 100644 --- a/src/core/scope.c +++ b/src/core/scope.c @@ -343,7 +343,7 @@ static int scope_start(Unit *u) { unit_export_state_files(UNIT(s)); - r = unit_attach_pids_to_cgroup(u); + r = unit_attach_pids_to_cgroup(u, UNIT(s)->pids, NULL); if (r < 0) { log_unit_warning_errno(UNIT(s), r, "Failed to add PIDs to scope's control group: %m"); scope_enter_dead(s, SCOPE_FAILURE_RESOURCES); diff --git a/src/core/unit.c b/src/core/unit.c index 47a06e4297..26d97fa45e 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -5362,6 +5362,34 @@ const char *unit_label_path(Unit *u) { return p; } +int unit_pid_attachable(Unit *u, pid_t pid, sd_bus_error *error) { + int r; + + assert(u); + + /* Checks whether the specified PID is generally good for attaching, i.e. a valid PID, not our manager itself, + * and not a kernel thread either */ + + /* First, a simple range check */ + if (!pid_is_valid(pid)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Process identifier " PID_FMT " is not valid.", pid); + + /* Some extra safety check */ + if (pid == 1 || pid == getpid_cached()) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Process " PID_FMT " is a manager processs, refusing.", pid); + + /* Don't even begin to bother with kernel threads */ + r = is_kernel_thread(pid); + if (r == -ESRCH) + return sd_bus_error_setf(error, SD_BUS_ERROR_UNIX_PROCESS_ID_UNKNOWN, "Process with ID " PID_FMT " does not exist.", pid); + if (r < 0) + return sd_bus_error_set_errnof(error, r, "Failed to determine whether process " PID_FMT " is a kernel thread: %m", pid); + if (r > 0) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Process " PID_FMT " is a kernel thread, refusing.", pid); + + return 0; +} + static const char* const collect_mode_table[_COLLECT_MODE_MAX] = { [COLLECT_INACTIVE] = "inactive", [COLLECT_INACTIVE_OR_FAILED] = "inactive-or-failed", diff --git a/src/core/unit.h b/src/core/unit.h index 6d49c7d5f2..617f044e24 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -806,6 +806,8 @@ bool unit_needs_console(Unit *u); const char *unit_label_path(Unit *u); +int unit_pid_attachable(Unit *unit, pid_t pid, sd_bus_error *error); + /* Macros which append UNIT= or USER_UNIT= to the message */ #define log_unit_full(unit, level, error, ...) \ |