bunpen: mount /proc in the namespace, if pids are sandboxed
This commit is contained in:
@@ -47,6 +47,8 @@ export fn main() void = {
|
|||||||
|
|
||||||
rtext::no_new_privs()!;
|
rtext::no_new_privs()!;
|
||||||
restrict::namespace_restrict(&req.resources);
|
restrict::namespace_restrict(&req.resources);
|
||||||
restrict::landlock_restrict(&req.resources);
|
// XXX: landlock prevents other sandboxers like `bwrap` from executing,
|
||||||
|
// because it forbids all future `mount` syscalls. so don't landlock.
|
||||||
|
// restrict::landlock_restrict(&req.resources);
|
||||||
rtext::check_error("exec <user command>", do_exec(req.exec_bin, req.exec_args));
|
rtext::check_error("exec <user command>", do_exec(req.exec_bin, req.exec_args));
|
||||||
};
|
};
|
||||||
|
@@ -34,25 +34,11 @@ export fn namespace_restrict(what: *resources) void = {
|
|||||||
if (what.pid) {
|
if (what.pid) {
|
||||||
log::println("[namespace] keeping pid namespace");
|
log::println("[namespace] keeping pid namespace");
|
||||||
what_to_unshare &= ~rtext::CLONE_NEWPID;
|
what_to_unshare &= ~rtext::CLONE_NEWPID;
|
||||||
} else {
|
|
||||||
log::println("TODO: namespacing without --bunpen-keep-pid is unsupported");
|
|
||||||
what_to_unshare &= ~rtext::CLONE_NEWPID;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
log::printfln("[namespace] unshare {}", what_to_unshare);
|
log::printfln("[namespace] unshare {}", what_to_unshare);
|
||||||
rtext::unshare(what_to_unshare)!;
|
rtext::unshare(what_to_unshare)!;
|
||||||
|
|
||||||
if (!what.pid) {
|
|
||||||
// fork and become:
|
|
||||||
// - PID 1 in the namespace, which then launches the sandboxed program
|
|
||||||
// - a dummy process which waits for the above to exit and propagates its status
|
|
||||||
// TODO: it be possible after forking for the parent to `setns` to the
|
|
||||||
// child, and then have the child exit. this would require only a single
|
|
||||||
// long-run process instead of two (this process would need to explicitly
|
|
||||||
// make itself a reaper since it'll be PID 2).
|
|
||||||
rtext::check_error("[namespace/fork] forking new PID 1", fork_and_propagate());
|
|
||||||
};
|
|
||||||
|
|
||||||
// before mounting anything, set up the uids and gids in this namespace.
|
// before mounting anything, set up the uids and gids in this namespace.
|
||||||
// without this, everything shows up as 65534 a.k.a. 'nobody' a.k.a. 'overflow',
|
// without this, everything shows up as 65534 a.k.a. 'nobody' a.k.a. 'overflow',
|
||||||
// and `mkdir` will return EOVERFLOW.
|
// and `mkdir` will return EOVERFLOW.
|
||||||
@@ -67,7 +53,37 @@ export fn namespace_restrict(what: *resources) void = {
|
|||||||
// this can fail if it's not within the sandbox.
|
// this can fail if it's not within the sandbox.
|
||||||
rtext::swallow_error("namespace: restore $PWD", os::chdir(pwd));
|
rtext::swallow_error("namespace: restore $PWD", os::chdir(pwd));
|
||||||
|
|
||||||
// TODO: CLONE_NEWPID (might not work without forking to also become reaper)
|
if (!what.pid) {
|
||||||
|
// fork and become:
|
||||||
|
// - PID 1 in the namespace, and exec into the sandboxed program
|
||||||
|
// - a dummy process in the outer namespace which waits for the above and propagates its exit status
|
||||||
|
//
|
||||||
|
// N.B.: other containers like to fork *twice*:
|
||||||
|
// - once to enter the namespace and become PID 1
|
||||||
|
// - a second time, when exec'ing the sandboxed program.
|
||||||
|
// that method allows for the sandbox program itself to exit, with its children outliving it.
|
||||||
|
// (and the wrapper only exits once *all* orphaned children die).
|
||||||
|
// i don't need children to outlive the main process, so i fork once and let
|
||||||
|
// the sandboxed program be reaper for all its children.
|
||||||
|
rtext::check_error("[namespace/fork] forking new PID 1", fork_and_propagate());
|
||||||
|
|
||||||
|
// mount /proc. TODO: try and restrict this a bit. `/proc` has a LOT of
|
||||||
|
// surface area; even though it's namespace aware (so that e.g.
|
||||||
|
// /proc/sys/net/ipv4/conf lists only the interfaces in the NS), this hasn't
|
||||||
|
// always been the case, even as recently as 2021:
|
||||||
|
// - <https://github.com/opencontainers/runc/issues/2826#issuecomment-915683044>
|
||||||
|
//
|
||||||
|
// for now, i mount this because pretty much every bwrap program needs to
|
||||||
|
// read its PID, so to have stackable sandboxing i need /proc/$pid. do this
|
||||||
|
// here even if toplevel / was bound, because the outer /proc confuses
|
||||||
|
// sandboxed programs by using untranslated pids.
|
||||||
|
rtext::swallow_error("[namespace/bind] mkdir /proc",
|
||||||
|
os::mkdir("/proc", fs::mode::USER_RX | fs::mode::GROUP_RX | fs::mode::OTHER_RX | fs::mode::DIR)
|
||||||
|
); // failure expected if dir already created
|
||||||
|
rtext::swallow_error("[namesapace] mount -t proc proc /proc",
|
||||||
|
rtext::mount("proc", "/proc", "proc", rtext::MS_NOSUID | rtext::MS_NOEXEC | rtext::MS_NODEV, null)
|
||||||
|
);
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
// fork and:
|
// fork and:
|
||||||
|
Reference in New Issue
Block a user