diff options
author | Lennart Poettering <lennart@poettering.net> | 2019-07-29 13:40:23 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-07-29 13:40:23 +0200 |
commit | a13c64b1d7eb9b73628d61ca7d301f20a7a86c83 (patch) | |
tree | 23fee7c9c2c4457c9575c30b9722e6ab684d99f8 | |
parent | 88bc86fcf895da0d51ddaf93d17b4280f4e60d74 (diff) | |
parent | 6f83d3d149ba2fba2ed7404eb2c44641c3185c99 (diff) | |
download | systemd-a13c64b1d7eb9b73628d61ca7d301f20a7a86c83.tar.gz |
Merge pull request #13184 from poettering/nspawn-usr-root
Allow "systemd-nspawn -D / --volatile=yes" to work
-rw-r--r-- | src/nspawn/nspawn.c | 37 | ||||
-rw-r--r-- | src/shared/machine-image.c | 46 |
2 files changed, 65 insertions, 18 deletions
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index fed5c651ce..2aec8041f0 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -3264,8 +3264,24 @@ static int outer_child( "Selected user namespace base " UID_FMT " and range " UID_FMT ".", arg_uid_shift, arg_uid_range); } - if (!dissected_image) { - /* Turn directory into bind mount */ + if (path_equal(directory, "/")) { + /* If the directory we shall boot is the host, let's operate on a bind mount at a different + * place, so that we can make changes to its mount structure (for example, to implement + * --volatile=) without this interfering with our ability to access files such as + * /etc/localtime to copy into the container. Note that we use a fixed place for this + * (instead of a temporary directory, since we are living in our own mount namspace here + * already, and thus don't need to be afraid of colliding with anyone else's mounts).*/ + (void) mkdir_p("/run/systemd/nspawn-root", 0755); + + r = mount_verbose(LOG_ERR, "/", "/run/systemd/nspawn-root", NULL, MS_BIND|MS_REC, NULL); + if (r < 0) + return r; + + directory = "/run/systemd/nspawn-root"; + + } else if (!dissected_image) { + /* Turn directory into bind mount (we need that so that we can move the bind mount to root + * later on). */ r = mount_verbose(LOG_ERR, directory, directory, NULL, MS_BIND|MS_REC, NULL); if (r < 0) return r; @@ -4018,7 +4034,7 @@ static int load_settings(void) { p = file_in_same_dir(arg_image, fn); if (!p) return log_oom(); - } else if (arg_directory) { + } else if (arg_directory && !path_equal(arg_directory, "/")) { p = file_in_same_dir(arg_directory, fn); if (!p) return log_oom(); @@ -4740,8 +4756,12 @@ static int run(int argc, char *argv[]) { if (arg_directory) { assert(!arg_image); - if (path_equal(arg_directory, "/") && !arg_ephemeral) { - log_error("Spawning container on root directory is not supported. Consider using --ephemeral."); + /* Safety precaution: let's not allow running images from the live host OS image, as long as + * /var from the host will propagate into container dynamically (because bad things happen if + * two systems write to the same /var). Let's allow it for the special cases where /var is + * either copied (i.e. --ephemeral) or replaced (i.e. --volatile=yes|state). */ + if (path_equal(arg_directory, "/") && !(arg_ephemeral || IN_SET(arg_volatile_mode, VOLATILE_YES, VOLATILE_STATE))) { + log_error("Spawning container on root directory is not supported. Consider using --ephemeral, --volatile=yes or --volatile=state."); r = -EINVAL; goto finish; } @@ -4770,7 +4790,9 @@ static int run(int argc, char *argv[]) { goto finish; } - r = image_path_lock(np, (arg_read_only ? LOCK_SH : LOCK_EX) | LOCK_NB, &tree_global_lock, &tree_local_lock); + /* We take an exclusive lock on this image, since it's our private, ephemeral copy + * only owned by us and noone else. */ + r = image_path_lock(np, LOCK_EX|LOCK_NB, &tree_global_lock, &tree_local_lock); if (r < 0) { log_error_errno(r, "Failed to lock %s: %m", np); goto finish; @@ -4890,7 +4912,8 @@ static int run(int argc, char *argv[]) { goto finish; } - r = image_path_lock(np, (arg_read_only ? LOCK_SH : LOCK_EX) | LOCK_NB, &tree_global_lock, &tree_local_lock); + /* Always take an exclusive lock on our own ephemeral copy. */ + r = image_path_lock(np, LOCK_EX|LOCK_NB, &tree_global_lock, &tree_local_lock); if (r < 0) { r = log_error_errno(r, "Failed to create image lock: %m"); goto finish; diff --git a/src/shared/machine-image.c b/src/shared/machine-image.c index 07744b34b4..7007374192 100644 --- a/src/shared/machine-image.c +++ b/src/shared/machine-image.c @@ -989,28 +989,52 @@ int image_path_lock(const char *path, int operation, LockFile *global, LockFile _cleanup_free_ char *p = NULL; LockFile t = LOCK_FILE_INIT; struct stat st; + bool exclusive; int r; assert(path); assert(global); assert(local); - /* Locks an image path. This actually creates two locks: one - * "local" one, next to the image path itself, which might be - * shared via NFS. And another "global" one, in /run, that - * uses the device/inode number. This has the benefit that we - * can even lock a tree that is a mount point, correctly. */ + /* Locks an image path. This actually creates two locks: one "local" one, next to the image path + * itself, which might be shared via NFS. And another "global" one, in /run, that uses the + * device/inode number. This has the benefit that we can even lock a tree that is a mount point, + * correctly. */ if (!path_is_absolute(path)) return -EINVAL; + switch (operation & (LOCK_SH|LOCK_EX)) { + case LOCK_SH: + exclusive = false; + break; + case LOCK_EX: + exclusive = true; + break; + default: + return -EINVAL; + } + if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) { *local = *global = (LockFile) LOCK_FILE_INIT; return 0; } - if (path_equal(path, "/")) - return -EBUSY; + /* Prohibit taking exclusive locks on the host image. We can't allow this, since we ourselves are + * running off it after all, and we don't want any images to manipulate the host image. We make an + * exception for shared locks however: we allow those (and make them NOPs since there's no point in + * taking them if there can't be exclusive locks). Strictly speaking these are questionable as well, + * since it means changes made to the host might propagate to the container as they happen (and a + * shared lock kinda suggests that no changes happen at all while it is in place), but it's too + * useful not to allow read-only containers off the host root, hence let's support this, and trust + * the user to do the right thing with this. */ + if (path_equal(path, "/")) { + if (exclusive) + return -EBUSY; + + *local = *global = (LockFile) LOCK_FILE_INIT; + return 0; + } if (stat(path, &st) >= 0) { if (S_ISBLK(st.st_mode)) @@ -1024,12 +1048,12 @@ int image_path_lock(const char *path, int operation, LockFile *global, LockFile return -ENOMEM; } - /* For block devices we don't need the "local" lock, as the major/minor lock above should be sufficient, since - * block devices are device local anyway. */ - if (!path_startswith(path, "/dev")) { + /* For block devices we don't need the "local" lock, as the major/minor lock above should be + * sufficient, since block devices are host local anyway. */ + if (!path_startswith(path, "/dev/")) { r = make_lock_file_for(path, operation, &t); if (r < 0) { - if ((operation & LOCK_SH) && r == -EROFS) + if (!exclusive && r == -EROFS) log_debug_errno(r, "Failed to create shared lock for '%s', ignoring: %m", path); else return r; |