summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2019-07-29 13:40:23 +0200
committerGitHub <noreply@github.com>2019-07-29 13:40:23 +0200
commita13c64b1d7eb9b73628d61ca7d301f20a7a86c83 (patch)
tree23fee7c9c2c4457c9575c30b9722e6ab684d99f8
parent88bc86fcf895da0d51ddaf93d17b4280f4e60d74 (diff)
parent6f83d3d149ba2fba2ed7404eb2c44641c3185c99 (diff)
downloadsystemd-a13c64b1d7eb9b73628d61ca7d301f20a7a86c83.tar.gz
Merge pull request #13184 from poettering/nspawn-usr-root
Allow "systemd-nspawn -D / --volatile=yes" to work
-rw-r--r--src/nspawn/nspawn.c37
-rw-r--r--src/shared/machine-image.c46
2 files changed, 65 insertions, 18 deletions
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index fed5c651ce..2aec8041f0 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -3264,8 +3264,24 @@ static int outer_child(
"Selected user namespace base " UID_FMT " and range " UID_FMT ".", arg_uid_shift, arg_uid_range);
}
- if (!dissected_image) {
- /* Turn directory into bind mount */
+ if (path_equal(directory, "/")) {
+ /* If the directory we shall boot is the host, let's operate on a bind mount at a different
+ * place, so that we can make changes to its mount structure (for example, to implement
+ * --volatile=) without this interfering with our ability to access files such as
+ * /etc/localtime to copy into the container. Note that we use a fixed place for this
+ * (instead of a temporary directory, since we are living in our own mount namspace here
+ * already, and thus don't need to be afraid of colliding with anyone else's mounts).*/
+ (void) mkdir_p("/run/systemd/nspawn-root", 0755);
+
+ r = mount_verbose(LOG_ERR, "/", "/run/systemd/nspawn-root", NULL, MS_BIND|MS_REC, NULL);
+ if (r < 0)
+ return r;
+
+ directory = "/run/systemd/nspawn-root";
+
+ } else if (!dissected_image) {
+ /* Turn directory into bind mount (we need that so that we can move the bind mount to root
+ * later on). */
r = mount_verbose(LOG_ERR, directory, directory, NULL, MS_BIND|MS_REC, NULL);
if (r < 0)
return r;
@@ -4018,7 +4034,7 @@ static int load_settings(void) {
p = file_in_same_dir(arg_image, fn);
if (!p)
return log_oom();
- } else if (arg_directory) {
+ } else if (arg_directory && !path_equal(arg_directory, "/")) {
p = file_in_same_dir(arg_directory, fn);
if (!p)
return log_oom();
@@ -4740,8 +4756,12 @@ static int run(int argc, char *argv[]) {
if (arg_directory) {
assert(!arg_image);
- if (path_equal(arg_directory, "/") && !arg_ephemeral) {
- log_error("Spawning container on root directory is not supported. Consider using --ephemeral.");
+ /* Safety precaution: let's not allow running images from the live host OS image, as long as
+ * /var from the host will propagate into container dynamically (because bad things happen if
+ * two systems write to the same /var). Let's allow it for the special cases where /var is
+ * either copied (i.e. --ephemeral) or replaced (i.e. --volatile=yes|state). */
+ if (path_equal(arg_directory, "/") && !(arg_ephemeral || IN_SET(arg_volatile_mode, VOLATILE_YES, VOLATILE_STATE))) {
+ log_error("Spawning container on root directory is not supported. Consider using --ephemeral, --volatile=yes or --volatile=state.");
r = -EINVAL;
goto finish;
}
@@ -4770,7 +4790,9 @@ static int run(int argc, char *argv[]) {
goto finish;
}
- r = image_path_lock(np, (arg_read_only ? LOCK_SH : LOCK_EX) | LOCK_NB, &tree_global_lock, &tree_local_lock);
+ /* We take an exclusive lock on this image, since it's our private, ephemeral copy
+ * only owned by us and noone else. */
+ r = image_path_lock(np, LOCK_EX|LOCK_NB, &tree_global_lock, &tree_local_lock);
if (r < 0) {
log_error_errno(r, "Failed to lock %s: %m", np);
goto finish;
@@ -4890,7 +4912,8 @@ static int run(int argc, char *argv[]) {
goto finish;
}
- r = image_path_lock(np, (arg_read_only ? LOCK_SH : LOCK_EX) | LOCK_NB, &tree_global_lock, &tree_local_lock);
+ /* Always take an exclusive lock on our own ephemeral copy. */
+ r = image_path_lock(np, LOCK_EX|LOCK_NB, &tree_global_lock, &tree_local_lock);
if (r < 0) {
r = log_error_errno(r, "Failed to create image lock: %m");
goto finish;
diff --git a/src/shared/machine-image.c b/src/shared/machine-image.c
index 07744b34b4..7007374192 100644
--- a/src/shared/machine-image.c
+++ b/src/shared/machine-image.c
@@ -989,28 +989,52 @@ int image_path_lock(const char *path, int operation, LockFile *global, LockFile
_cleanup_free_ char *p = NULL;
LockFile t = LOCK_FILE_INIT;
struct stat st;
+ bool exclusive;
int r;
assert(path);
assert(global);
assert(local);
- /* Locks an image path. This actually creates two locks: one
- * "local" one, next to the image path itself, which might be
- * shared via NFS. And another "global" one, in /run, that
- * uses the device/inode number. This has the benefit that we
- * can even lock a tree that is a mount point, correctly. */
+ /* Locks an image path. This actually creates two locks: one "local" one, next to the image path
+ * itself, which might be shared via NFS. And another "global" one, in /run, that uses the
+ * device/inode number. This has the benefit that we can even lock a tree that is a mount point,
+ * correctly. */
if (!path_is_absolute(path))
return -EINVAL;
+ switch (operation & (LOCK_SH|LOCK_EX)) {
+ case LOCK_SH:
+ exclusive = false;
+ break;
+ case LOCK_EX:
+ exclusive = true;
+ break;
+ default:
+ return -EINVAL;
+ }
+
if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
*local = *global = (LockFile) LOCK_FILE_INIT;
return 0;
}
- if (path_equal(path, "/"))
- return -EBUSY;
+ /* Prohibit taking exclusive locks on the host image. We can't allow this, since we ourselves are
+ * running off it after all, and we don't want any images to manipulate the host image. We make an
+ * exception for shared locks however: we allow those (and make them NOPs since there's no point in
+ * taking them if there can't be exclusive locks). Strictly speaking these are questionable as well,
+ * since it means changes made to the host might propagate to the container as they happen (and a
+ * shared lock kinda suggests that no changes happen at all while it is in place), but it's too
+ * useful not to allow read-only containers off the host root, hence let's support this, and trust
+ * the user to do the right thing with this. */
+ if (path_equal(path, "/")) {
+ if (exclusive)
+ return -EBUSY;
+
+ *local = *global = (LockFile) LOCK_FILE_INIT;
+ return 0;
+ }
if (stat(path, &st) >= 0) {
if (S_ISBLK(st.st_mode))
@@ -1024,12 +1048,12 @@ int image_path_lock(const char *path, int operation, LockFile *global, LockFile
return -ENOMEM;
}
- /* For block devices we don't need the "local" lock, as the major/minor lock above should be sufficient, since
- * block devices are device local anyway. */
- if (!path_startswith(path, "/dev")) {
+ /* For block devices we don't need the "local" lock, as the major/minor lock above should be
+ * sufficient, since block devices are host local anyway. */
+ if (!path_startswith(path, "/dev/")) {
r = make_lock_file_for(path, operation, &t);
if (r < 0) {
- if ((operation & LOCK_SH) && r == -EROFS)
+ if (!exclusive && r == -EROFS)
log_debug_errno(r, "Failed to create shared lock for '%s', ignoring: %m", path);
else
return r;