bunpen: reap child processes when acting as PID1

2024-10-18 08:23:36 +00:00
parent 34ad1831ff
commit 441e69e708
3 changed files with 29 additions and 3 deletions
--- a/pkgs/by-name/bunpen/integration_test
+++ b/pkgs/by-name/bunpen/integration_test
@@ -103,6 +103,17 @@ test_14_keep_net() {
  test -z "$(bunpen --bunpen-path / ip link show lo up)"
 }
 test_15_reap_children() {
  # in a PID namespace, PID 1 needs to reap children.
  # that is, any processes which `fork` away from the main program being sandboxed,
  # and then exit, become zombies: PID 1 needs to `wait` on them to properly dispose of the processes.
  bunpen --bunpen-path / bash -c "setsid -f true ; sleep 2" &
  sleep 0.5
  # check for a line like: `225215 ?        Zs     0:00 [true] <defunct>`
  # if this line exists, then we failed to reap
  ps x | grep -E 'Zs +[0-9]+:[0-9]+ \[true\] <defunct>' && return 1 || return 0
 }
 tested=
 rc=0
--- a/pkgs/by-name/bunpen/package.nix
+++ b/pkgs/by-name/bunpen/package.nix
@@ -1,7 +1,9 @@
 {
  hareHook,
  iproute2,
  procps,
  stdenv,
  util-linux,
  which,
 }: stdenv.mkDerivation {
  pname = "bunpen";
@@ -13,6 +15,8 @@
  nativeCheckInputs = [
    iproute2
    procps  # for `ps`
    util-linux  # for `setsid`
    which
  ];
--- a/pkgs/by-name/bunpen/restrict/ns/namespace.ha
+++ b/pkgs/by-name/bunpen/restrict/ns/namespace.ha
@@ -188,18 +188,29 @@ fn wait_and_propagate(child_pid: os::exec::process) (void | os::exec::error) = {
  os::exit(rc);  // propagate exit code
 };
 // block until the provided child exits, and then return its exit status.
 // this function actually waits on *any* child, in a loop, but only returns the
 // exit code for the specific child of interest.
 // this way, we're able to act as a child reaper (as expected of PID 1).
 fn wait_child(child_pid: os::exec::process) (os::exec::status | os::exec::error) = {
  for (true) {
-    match (os::exec::wait(&child_pid)) {
+    match (os::exec::waitany()) {
      case let e: os::exec::error => match (e) {
        case errors::interrupted =>
          // i guess before the days of `poll`, `wait` had to wait on either the
          // child OR a signal sent to this pid; so we need to retry if the
          // reason we woke isn't because the child died...
-          log::printfln("[namespace/fork] wait(child) interrupted (signal?)... will retry");
+          log::printfln("[namespace/fork] waitany() interrupted (signal?)... will retry");
        case => return e;
      };
-      case let status: os::exec::status => return status;
+      case let proc_and_status: (os::exec::process, os::exec::status) => {
        let (proc, status) = proc_and_status;
        if (proc == child_pid) {
          return status;
        };
        // else we've just reaped a zombie child which had forked away from the
        // main program
      };
    };
  };
 };