bunpen: proof-of-concept mount namespace, exposing only *some* paths

2024-08-25 11:38:08 +00:00
parent 64948a497d
commit 7a902cabfe
7 changed files with 147 additions and 42 deletions
--- a/pkgs/additional/bunpen/main.ha
+++ b/pkgs/additional/bunpen/main.ha
@@ -2,18 +2,31 @@
 use config;
 use log;
 use restrict;
 use rt;
 use rtext;
 use strings;
 use os;
 use os::exec;
 use types::c;
-fn do_exec(args: []str) never = {
+fn do_exec(args: []str) (os::exec::error | void) = {
  {
    let joined = strings::join(" ", args...);
    defer free(joined);
    log::printfln("exec: {}", joined);
-  free(joined);
+  };
-  let cmd = os::exec::cmd(args[0], args[1..]...)!;
+  // we receive the args as <argv0> <path> <args>,
-  os::exec::exec(&cmd);
+  // and want to invoke the program via `exec(path, argv0, args...)`.
  // TODO: rework `opts.cmd` handling to make this more consistent.
  let path = args[1];
  static delete(args[1]);
  rtext::check_error("exec", rtext::execve(path, args));
  // XXX: os::exec::exec offers no way to preserve argv0, but the following
  // works if you don't care about that:
  // let cmd = os::exec::cmd(args[1], args[2..]...)?;
  // os::exec::exec(&cmd);
 };
 export fn main() void = {
@@ -39,8 +52,8 @@ export fn main() void = {
  restrict::namespace_restrict(&what);
  restrict::landlock_restrict(&what);
  if (opts.drop_shell) {
-    do_exec(["/bin/sh"]);
+    rtext::check_error("exec /bin/sh", do_exec(["sh", "/bin/sh"]));
  } else {
-    do_exec(opts.cmd);
+    rtext::check_error("exec <user command>", do_exec(opts.cmd));
  };
 };
--- a/pkgs/additional/bunpen/restrict/namespace.ha
+++ b/pkgs/additional/bunpen/restrict/namespace.ha
@@ -39,32 +39,41 @@ export fn namespace_restrict(what: *resources) void = {
  // mapped to non-ns ops by the same user, and vice-versa
  write_uid_map(uid, gid);
-  rt::mount("tmpfs", "/tmp", &['t': u8, 'm', 'p', 'f', 's', 0]: *const u8, rtext::MS_NODEV | rtext::MS_NOSUID, null)!;
+  // allow new mounts to propagate from the parent namespace into the child
  // namespace, but not vice versa:
  rtext::check_error("reconfigure / as MS_SLAVE", rtext::mount("/", "/", "", rtext::MS_SLAVE | rtext::MS_REC, null));
-  // chroot to `/tmp`, with the old root being placed at `/tmp/oldroot` (i.e. /oldroot)
+  // setup a new root in `/tmp`, mount the desired paths into it, and then pivot into it.
-  check_error("cd /tmp", os::chdir("/tmp"));
+  rtext::check_error("mount -t tmpfs tmpfs /tmp", rtext::mount("tmpfs", "/tmp", "tmpfs", 0, null));
  check_error("mkdir /tmp/oldroot", rt::mkdir("oldroot", 0o755));
  rtext::pivot_root("/tmp", "oldroot")!;
  check_error("cd /", os::chdir("/"));
-  // chroot back into `/oldroot`.
+  // TODO: mount the paths the user asks for, but until then hardcode stuff:
-  // TODO: we should rather chroot into `/newroot`, after mounting everything
+  rtext::check_error("mkdir /tmp/bin", rt::mkdir("/tmp/bin", 0o755));
-  // there. this is just a proof-of-concept
+  rtext::check_error("mount /bin /tmp/bin", rtext::mount("/bin", "/tmp/bin", "", rtext::MS_BIND | rtext::MS_REC, null));
-  check_error("cd /oldroot", os::chdir("/oldroot"));
+  // rtext::check_error("mkdir /tmp/dev", rt::mkdir("/tmp/dev", 0o755));
-  rtext::pivot_root("/oldroot", ".")!;
+  // rtext::check_error("mount /dev /tmp/dev", rtext::mount("/dev", "/tmp/dev", "", rtext::MS_BIND | rtext::MS_REC, null));
-  check_error("cd /", os::chdir("/"));
+  // rtext::check_error("mkdir /tmp/etc", rt::mkdir("/tmp/etc", 0o755));
  // rtext::check_error("mount /etc /tmp/etc", rtext::mount("/etc", "/tmp/etc", "", rtext::MS_BIND | rtext::MS_REC, null));
  rtext::check_error("mkdir /tmp/nix", rt::mkdir("/tmp/nix", 0o755));
  rtext::check_error("mount /nix /tmp/nix", rtext::mount("/nix", "/tmp/nix", "", rtext::MS_BIND | rtext::MS_REC, null));
  // rtext::check_error("mkdir /tmp/proc", rt::mkdir("/tmp/proc", 0o755));
  // rtext::check_error("mount /proc /tmp/proc", rtext::mount("/proc", "/tmp/proc", "", rtext::MS_BIND | rtext::MS_REC, null));
  // rtext::check_error("mkdir /tmp/run", rt::mkdir("/tmp/run", 0o755));
  // rtext::check_error("mount /run /tmp/run", rtext::mount("/run", "/tmp/run", "", rtext::MS_BIND | rtext::MS_REC, null));
  // rtext::check_error("mkdir /tmp/sys", rt::mkdir("/tmp/sys", 0o755));
  // rtext::check_error("mount /sys /tmp/sys", rtext::mount("/sys", "/tmp/sys", "", rtext::MS_BIND | rtext::MS_REC, null));
  // rtext::check_error("mkdir /tmp/usr", rt::mkdir("/tmp/usr", 0o755));
  // rtext::check_error("mount /usr /tmp/usr", rtext::mount("/usr", "/tmp/usr", "", rtext::MS_BIND | rtext::MS_REC, null));
  // rtext::check_error("mkdir /tmp/var", rt::mkdir("/tmp/var", 0o755));
  // rtext::check_error("mount /var /tmp/var", rtext::mount("/var", "/tmp/var", "", rtext::MS_BIND | rtext::MS_REC, null));
  rtext::check_error("cd /tmp", os::chdir("/tmp"));
  rtext::check_error("pivot_root . .", rtext::pivot_root(".", "."));
  rtext::check_error("umount .", rt::umount2(".", rtext::MNT_DETACH));
  rtext::check_error("cd /", os::chdir("/"));
  // TODO: CLONE_NEWPID (might not work without forking to also become reaper)
 };
 fn check_error(op: str, c: (void | fs::error | rt::errno)) void = {
  match (c) {
    case void => void;
    case let e: rt::errno => log::fatalf("{}: {}: {}", op, rt::errname(e), rt::strerror(e));
    case let e: fs::error => log::fatalf("{}: {}", op, fs::strerror(e));
  };
 };
 fn write_uid_map(uid: unix::uid, gid: unix::gid) void = {
  let uid_fd = rt::open("/proc/self/uid_map", rt::O_RDWR | rt::O_CLOEXEC, 0)!;
  let uid_buf: [4096]u8 = [0...];
--- a/pkgs/additional/bunpen/rtext/cstr.ha
+++ b/pkgs/additional/bunpen/rtext/cstr.ha
@@ -17,7 +17,7 @@ fn make_cstr(scratch: []c::char, s: str) *c::char = {
      case null => void;
      case let data: *[*]u8 =>
        data[s_repr.length] = 0;
-        return c::nulstr(s);
+        return data: *c::char;
    };
  };
  // XXX: will `abort` if the string is larger than the buffer!
--- a/pkgs/additional/bunpen/rtext/error.ha
+++ b/pkgs/additional/bunpen/rtext/error.ha
@@ -0,0 +1,15 @@
 // vim: set shiftwidth=2 :
 use fs;
 use log;
 use os::exec;
 use rt;
 export fn check_error(context: str, what: (void | fs::error | os::exec::error | rt::errno)) void = {
  match (what) {
    case let e: fs::error => log::fatalf("{}: {}", context, fs::strerror(e));
    case let e: os::exec::error => log::fatalf("{}: {}", context, os::exec::strerror(e));
    case let e: rt::errno => log::fatalf("{}: {}: {}", context, rt::errname(e), rt::strerror(e));
    case => void;
  };
 };
--- a/pkgs/additional/bunpen/rtext/exec.ha
+++ b/pkgs/additional/bunpen/rtext/exec.ha
@@ -0,0 +1,29 @@
 // vim: set shiftwidth=2 :
 use path;
 use rt;
 use types::c;
 export fn execve(path: str, argv: []str, envp: []str = []) (rt::errno | void) = {
  let path_buf: [path::MAX]c::char = [0...];
  syscall(
    rt::SYS_execve,
    make_cstr(&path_buf, path): uintptr: u64,
    // XXX: this "leaks" the c arrays, but not much can be done about that
    to_cstr_array(argv): *[*]nullable *const c::char: uintptr: u64,
    0,
    // to_cstr_array(envp): *[*]nullable *const c::char: uintptr: u64,
  )?;
 };
 // allocate and return a NULL-terminated array of pointers to c strings.
 // caller is responsible for free'ing the resulting array AND its strings.
 fn to_cstr_array(strs: []str) []nullable *const c::char = {
  let cstrs: []nullable *const c::char = alloc([], len(strs) + 1z);
  for (let s .. strs) {
    append(cstrs, c::fromstr(s));
  };
  append(cstrs, null);
  return cstrs;
 };
--- a/pkgs/additional/bunpen/rtext/mount.ha
+++ b/pkgs/additional/bunpen/rtext/mount.ha
@@ -1,4 +1,7 @@
 // vim: set shiftwidth=2 :
 use path;
 use rt;
 use types::c;
 export const MS_RDONLY: u64 = 1;
 export const MS_NOSUID: u64 = 2;
@@ -26,3 +29,39 @@ export const MS_KERNMOUNT: u64 =(1<<22);
 export const MS_I_VERSION: u64 = (1<<23);
 export const MS_STRICTATIME: u64 = (1<<24);
 export const MS_LAZYTIME: u64 = (1<<25);
 // XXX: hare is weird about these, and declares the flags parameter to `mount2`
 // as `int` instead of `u64`.
 // attempt to forcibily umount
 export const MNT_FORCE: int = 0x00000001;
 // just detach from the tree
 export const MNT_DETACH: int = 0x00000002;
 // mark for expiry
 export const MNT_EXPIRE: int = 0x00000004;
 // don't follow symlink on umount
 export const UMOUNT_NOFOLLOW: int = 0x00000008;
 // // flag guaranteed to be unused
 // export const UMOUNT_UNUSED: int = 0x80000000;
 // old magic mount flag (as in: no longer necessary, does nothing!)
 export const MS_MGC_VAL: u64 = 0xC0ED0000;
 // old magic mount mask (as in: no longer necessary, does nothing!)
 export const MS_MGC_MSK: u64 = 0xffff0000;
 // XXX(2024-08-24): hare stdlib `mount` syscall has a bug where it mounts
 // `target` to `target`, not `source` to `target`.
 // TODO: fix upstream
 export fn mount(source: str, target: str, fstype: str, mountflags: u64, data: nullable *opaque) (rt::errno | void) = {
  let source_buf: [path::MAX]c::char = [0...];
  let target_buf: [path::MAX]c::char = [0...];
  let fstype_buf: [256]c::char = [0...];
  syscall(
    rt::SYS_mount,
    make_cstr(&source_buf, source): uintptr: u64,
    make_cstr(&target_buf, target): uintptr: u64,
    make_cstr(&fstype_buf, fstype): uintptr: u64,
    mountflags,
    data: uintptr,
  )?;
 };
--- a/pkgs/additional/bunpen/rtext/pivot_root.ha
+++ b/pkgs/additional/bunpen/rtext/pivot_root.ha
@@ -3,13 +3,13 @@ use path;
 use rt;
 use types::c;
-export fn pivot_root(new_root: str, put_old: str) (rt::errno | u64) = {
+export fn pivot_root(new_root: str, put_old: str) (rt::errno | void) = {
  let new_root_buf: [path::MAX]c::char = [0...];
  let put_old_buf: [path::MAX]c::char = [0...];
-  return syscall(
+  syscall(
    rt::SYS_pivot_root,
    make_cstr(&new_root_buf, new_root): uintptr: u64,
    make_cstr(&put_old_buf, put_old): uintptr: u64,
-  );
+  )?;
 };