Add --disable-userns switchdisable-userns

Some usecases of bubblewrap want to ensure that the subprocess can't further re-arrange the filesystem namespace, or do other more complex namespace modification. This can be limited by --disable-userns, which makes the kernel unable to create any new user namespaces for the process hierarchy. This is done by making a cover of the original root, but running the process with the origin root as root anyway. This "non-standard" root means the kernel will not allow creating new user namespaces. This is more typically done using chroot("/theroot") which would also mean the root of the namespace ("/") differes from the process current root ("/theroot)". However, we want to avoid this as in this case symlinks in /proc/$pid/fd would have a "/theroot" prefix when seen outside the namespace, which is something that e.g. flatpak doesn't want. Note, there is a slight cost to this as the covering bind mount duplicates all the regular mounts in namespace. However, they all refer to the same mounts so no actual files are duplicated.
author: Alexander Larsson <alexl@redhat.com> 2021-09-20 10:59:16 +0200
committer: Alexander Larsson <alexl@redhat.com> 2021-09-20 11:41:59 +0200
commit: 53475bc69f4acb1f1cd5a393190f8267fb0663c4 (patch)
tree: 6024295a4e5fa179915c16c289bde9a575a511e5
parent: 47fa284aba23bc28adb3c40ac8f2072dbb581293 (diff)
download: bubblewrap-disable-userns.tar.gz
1 files changed, 42 insertions, 2 deletions
diff --git a/bubblewrap.c b/bubblewrap.c
index 2e13fd0..b9d6fb9 100644
--- a/bubblewrap.c
+++ b/bubblewrap.c
@@ -66,6 +66,7 @@ static const char *opt_file_label = NULL;
 static bool opt_as_pid_1;
 
 const char *opt_chdir_path = NULL;
+bool opt_disable_userns = FALSE;
 bool opt_unshare_user = FALSE;
 bool opt_unshare_user_try = FALSE;
 bool opt_unshare_pid = FALSE;
@@ -240,6 +241,7 @@ usage (int ecode, FILE *out)
            "    --unshare-cgroup-try         Create new cgroup namespace if possible else continue by skipping it\n"
            "    --userns FD                  Use this user namespace (cannot combine with --unshare-user)\n"
            "    --userns2 FD                 After setup switch to this user namespace, only useful with --userns\n"
+           "    --disable-userns             Disable further use of user namespaces inside sandbox\n"
            "    --pidns FD                   Use this user namespace (as parent namespace if using --unshare-pid)\n"
            "    --uid UID                    Custom uid in the sandbox (requires --unshare-user or --userns)\n"
            "    --gid GID                    Custom gid in the sandbox (requires --unshare-user or --userns)\n"
@@ -2068,6 +2070,10 @@ parse_args_recurse (int          *argcp,
           argv += 1;
           argc -= 1;
         }
+      else if (strcmp (arg, "--disable-userns") == 0)
+        {
+          opt_disable_userns = TRUE;
+        }
       else if (strcmp (arg, "--userns2") == 0)
         {
           int the_fd;
@@ -2420,6 +2426,7 @@ main (int    argc,
   struct sock_fprog seccomp_prog;
   cleanup_free char *args_data = NULL;
   int intermediate_pids_sockets[2] = {-1, -1};
+  bool using_userns2 = FALSE;
 
   /* Handle --version early on before we try to acquire/drop
    * any capabilities so it works in a build environment;
@@ -2947,8 +2954,12 @@ main (int    argc,
       die_with_error ("chdir /");
   }
 
-  if (opt_userns2_fd > 0 && setns (opt_userns2_fd, CLONE_NEWUSER) != 0)
-    die_with_error ("Setting userns2 failed");
+  if (opt_userns2_fd > 0)
+    {
+      if (setns (opt_userns2_fd, CLONE_NEWUSER) != 0)
+        die_with_error ("Setting userns2 failed");
+      using_userns2 = TRUE;
+    }
 
   if (opt_unshare_user &&
       (ns_uid != opt_sandbox_uid || ns_gid != opt_sandbox_gid) &&
@@ -2961,6 +2972,8 @@ main (int    argc,
       if (unshare (CLONE_NEWUSER))
         die_with_error ("unshare user ns");
 
+      using_userns2 = TRUE;
+
       /* We're in a new user namespace, we got back the bounding set, clear it again */
       drop_cap_bounding_set (FALSE);
 
@@ -2969,6 +2982,33 @@ main (int    argc,
                          -1, FALSE, FALSE);
     }
 
+  if (opt_disable_userns)
+    {
+      if (using_userns2)
+        {
+          /* If we're not in the main userns, the we don't own the
+             current fs namespace and are not allowed to mount, so
+             create a new NS */
+          if (unshare (CLONE_NEWNS))
+            die_with_error ("unshare fs ns");
+        }
+
+      /* Mount a bind cover of the root fs. This will trigger
+       * current_chrooted() in create_user_ns() in the kernel at:
+       *   https://elixir.bootlin.com/linux/v5.14.4/source/kernel/user_namespace.c#L92
+       * making it impossible for the process to create new user namespaces.
+       *
+       * What happens is that the path "/" in the namespace noew
+       * resolve to the covering bindmount, but the container process
+       * tree root is still the lower mount. Note that it is still
+       * possible for the container to reach the covering bind mount
+       * (as e.g. "/.."), but since its just a copy of the regular
+       * hierarchy it works identically to it.
+       */
+      if (mount ("/", "/", NULL, MS_SILENT | MS_MGC_VAL | MS_BIND | MS_REC, NULL) < 0)
+        die_with_error ("setting up root cover bind");
+    }
+
   /* All privileged ops are done now, so drop caps we don't need */
   drop_privs (!is_privileged, TRUE);
author	Alexander Larsson <alexl@redhat.com>	2021-09-20 10:59:16 +0200
committer	Alexander Larsson <alexl@redhat.com>	2021-09-20 11:41:59 +0200
commit	53475bc69f4acb1f1cd5a393190f8267fb0663c4 (patch)
tree	6024295a4e5fa179915c16c289bde9a575a511e5
parent	47fa284aba23bc28adb3c40ac8f2072dbb581293 (diff)
download	bubblewrap-disable-userns.tar.gz