bunpen: mount /proc in the namespace, if pids are sandboxed

This commit is contained in:
2024-08-29 16:47:02 +00:00
parent 353057af23
commit bc1453f675
2 changed files with 34 additions and 16 deletions

View File

@@ -47,6 +47,8 @@ export fn main() void = {
rtext::no_new_privs()!;
restrict::namespace_restrict(&req.resources);
restrict::landlock_restrict(&req.resources);
// XXX: landlock prevents other sandboxers like `bwrap` from executing,
// because it forbids all future `mount` syscalls. so don't landlock.
// restrict::landlock_restrict(&req.resources);
rtext::check_error("exec <user command>", do_exec(req.exec_bin, req.exec_args));
};

View File

@@ -34,25 +34,11 @@ export fn namespace_restrict(what: *resources) void = {
if (what.pid) {
log::println("[namespace] keeping pid namespace");
what_to_unshare &= ~rtext::CLONE_NEWPID;
} else {
log::println("TODO: namespacing without --bunpen-keep-pid is unsupported");
what_to_unshare &= ~rtext::CLONE_NEWPID;
};
log::printfln("[namespace] unshare {}", what_to_unshare);
rtext::unshare(what_to_unshare)!;
if (!what.pid) {
// fork and become:
// - PID 1 in the namespace, which then launches the sandboxed program
// - a dummy process which waits for the above to exit and propagates its status
// TODO: it be possible after forking for the parent to `setns` to the
// child, and then have the child exit. this would require only a single
// long-run process instead of two (this process would need to explicitly
// make itself a reaper since it'll be PID 2).
rtext::check_error("[namespace/fork] forking new PID 1", fork_and_propagate());
};
// before mounting anything, set up the uids and gids in this namespace.
// without this, everything shows up as 65534 a.k.a. 'nobody' a.k.a. 'overflow',
// and `mkdir` will return EOVERFLOW.
@@ -67,7 +53,37 @@ export fn namespace_restrict(what: *resources) void = {
// this can fail if it's not within the sandbox.
rtext::swallow_error("namespace: restore $PWD", os::chdir(pwd));
// TODO: CLONE_NEWPID (might not work without forking to also become reaper)
if (!what.pid) {
// fork and become:
// - PID 1 in the namespace, and exec into the sandboxed program
// - a dummy process in the outer namespace which waits for the above and propagates its exit status
//
// N.B.: other containers like to fork *twice*:
// - once to enter the namespace and become PID 1
// - a second time, when exec'ing the sandboxed program.
// that method allows for the sandbox program itself to exit, with its children outliving it.
// (and the wrapper only exits once *all* orphaned children die).
// i don't need children to outlive the main process, so i fork once and let
// the sandboxed program be reaper for all its children.
rtext::check_error("[namespace/fork] forking new PID 1", fork_and_propagate());
// mount /proc. TODO: try and restrict this a bit. `/proc` has a LOT of
// surface area; even though it's namespace aware (so that e.g.
// /proc/sys/net/ipv4/conf lists only the interfaces in the NS), this hasn't
// always been the case, even as recently as 2021:
// - <https://github.com/opencontainers/runc/issues/2826#issuecomment-915683044>
//
// for now, i mount this because pretty much every bwrap program needs to
// read its PID, so to have stackable sandboxing i need /proc/$pid. do this
// here even if toplevel / was bound, because the outer /proc confuses
// sandboxed programs by using untranslated pids.
rtext::swallow_error("[namespace/bind] mkdir /proc",
os::mkdir("/proc", fs::mode::USER_RX | fs::mode::GROUP_RX | fs::mode::OTHER_RX | fs::mode::DIR)
); // failure expected if dir already created
rtext::swallow_error("[namesapace] mount -t proc proc /proc",
rtext::mount("proc", "/proc", "proc", rtext::MS_NOSUID | rtext::MS_NOEXEC | rtext::MS_NODEV, null)
);
};
};
// fork and: