From 23913c9cd2dba9e164dc5834fee77a8cdcb26bea Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 11 Nov 2024 02:19:00 +0000 Subject: [PATCH] netns: configure the device in a way that should allow named endpoints to be resolved outside the netns --- modules/netns.nix | 36 ++++++++++++++----- modules/vpn.nix | 88 ++++++++++++++++++++++++----------------------- 2 files changed, 72 insertions(+), 52 deletions(-) diff --git a/modules/netns.nix b/modules/netns.nix index 84624919b..544a51a51 100644 --- a/modules/netns.nix +++ b/modules/netns.nix @@ -163,22 +163,40 @@ let systemd.services."netns-${name}-wg" = { description = "configure the wireguard device which provides ${name} with an IP"; wantedBy = [ "netns-${name}.target" ]; - before = [ "netns-${name}.target" ]; - after = [ "netns-${name}.service" ]; partOf = [ "netns-${name}.service" ]; + before = [ "netns-${name}.target" ]; + after = [ + "netns-${name}.service" + # in case the endpoint is a domain or host name, wait for the DNS resolver to be available + # before even trying configure the device. not strictly necessary, just avoids wasting resources/retries. + "nss-lookup.target" + ]; serviceConfig.Type = "oneshot"; serviceConfig.RemainAfterExit = true; + serviceConfig.Restart = "on-failure"; + serviceConfig.RestartSec = "10s"; + serviceConfig.RestartMaxDelaySec = "180s"; + serviceConfig.RestartSteps = 9; # roughly: 10s, 30s, 50s, ... 180s, then keep the 180s retry script = '' ${ip} link add wg-${name} type wireguard - ${ip} link set wg-${name} netns ${name} - ${in-ns} ${ip} address add ${netnsPubIpv4} dev wg-${name} - ${in-ns} ${wg'} set wg-${name} private-key ${wg.privateKeyFile} - ${in-ns} ${ip} link set up dev wg-${name} - - # TODO: endpoint configuration should be split out into a retryable thing, capable of DNS lookups - ${in-ns} ${wg'} set wg-${name} peer ${wg.peer.publicKey} endpoint ${wg.peer.endpoint} \ + # resolve the endpoint *now*, from a namespace which can do DNS lookups, before moving it into its destination netns + # at this point, our wg device can neither send nor receive traffic, because we haven't given it a private key. + # hence, it's 100% safe to configure peers even inside the root ns at this point. + # + # N.B.: `wg` resolves the endpoint _immediately_; it doesn't save DNS info into the device at all, + # so the possibility of any code not visible here trying to re-resolve the endpoint at a later time + # (i.e. from within the namespace) is 0. + ${wg'} set wg-${name} peer ${wg.peer.publicKey} endpoint ${wg.peer.endpoint} \ persistent-keepalive 25 \ allowed-ips 0.0.0.0/0 + + ${ip} link set wg-${name} netns ${name} + + ${in-ns} ${wg'} set wg-${name} private-key ${wg.privateKeyFile} + ${in-ns} ${ip} address add ${netnsPubIpv4} dev wg-${name} + ${in-ns} ${ip} link set up dev wg-${name} + + # in the namespace, make this device the default route ${in-ns} ${ip} route replace 0.0.0.0/0 dev wg-${name} table main ''; serviceConfig.ExecStopPost = [ diff --git a/modules/vpn.nix b/modules/vpn.nix index 088d95ad4..31a7c9547 100644 --- a/modules/vpn.nix +++ b/modules/vpn.nix @@ -188,49 +188,51 @@ let # but i couldn't get that to work for netns with SNAT, so set rpfilter to "loose". networking.firewall.checkReversePath = "loose"; - systemd.services."${name}-refresh" = { - # periodically re-apply peers, to ensure DNS mappings stay fresh - # borrowed from - wantedBy = [ "network.target" ]; - path = [ config.sane.programs.wireguard-tools.package ]; - serviceConfig.Restart = "always"; - serviceConfig.RestartSec = "60"; #< retry delay when we fail (because e.g. there's no network) - serviceConfig.Type = "simple"; - unitConfig.StartLimitIntervalSec = 0; - script = '' - while wg set ${name} peer ${publicKey} endpoint ${endpoint}; do - echo "${name} set to:" "$(wg show ${name} endpoints)" - # in the normal case that DNS resolves, and whatnot, sleep before the next attempt - sleep 180 - done - ''; - # systemd hardening (systemd-analyze security wg-home-refresh.service) - serviceConfig.AmbientCapabilities = "CAP_NET_ADMIN"; - serviceConfig.CapabilityBoundingSet = "CAP_NET_ADMIN"; - serviceConfig.LockPersonality = true; - serviceConfig.MemoryDenyWriteExecute = true; - serviceConfig.NoNewPrivileges = true; - serviceConfig.ProtectClock = true; - serviceConfig.ProtectHostname = true; - serviceConfig.RemoveIPC = true; - serviceConfig.RestrictAddressFamilies = "AF_INET AF_INET6 AF_NETLINK"; - #VVV this includes anything it reads from, e.g. /bin/sh; /nix/store/... - # see `systemd-analyze filesystems` for a full list - serviceConfig.RestrictFileSystems = "@common-block @basic-api"; - serviceConfig.RestrictRealtime = true; - serviceConfig.RestrictSUIDSGID = true; - serviceConfig.SystemCallArchitectures = "native"; - serviceConfig.SystemCallFilter = [ - "@system-service" - "@sandbox" - "~@chown" - "~@cpu-emulation" - "~@keyring" - ]; - serviceConfig.DevicePolicy = "closed"; # only allow /dev/{null,zero,full,random,urandom} - # serviceConfig.DeviceAllow = "/dev/..."; - serviceConfig.RestrictNamespaces = true; - }; + # XXX: all my wireguard DNS endpoints are static at the moment, so refresh logic isn't needed. + # re-enable this should that ever change. + # systemd.services."${name}-refresh" = { + # # periodically re-apply peers, to ensure DNS mappings stay fresh + # # borrowed from + # wantedBy = [ "network.target" ]; + # path = [ config.sane.programs.wireguard-tools.package ]; + # serviceConfig.Restart = "always"; + # serviceConfig.RestartSec = "60"; #< retry delay when we fail (because e.g. there's no network) + # serviceConfig.Type = "simple"; + # unitConfig.StartLimitIntervalSec = 0; + # script = '' + # while wg set ${name} peer ${publicKey} endpoint ${endpoint}; do + # echo "${name} set to:" "$(wg show ${name} endpoints)" + # # in the normal case that DNS resolves, and whatnot, sleep before the next attempt + # sleep 180 + # done + # ''; + # # systemd hardening (systemd-analyze security wg-home-refresh.service) + # serviceConfig.AmbientCapabilities = "CAP_NET_ADMIN"; + # serviceConfig.CapabilityBoundingSet = "CAP_NET_ADMIN"; + # serviceConfig.LockPersonality = true; + # serviceConfig.MemoryDenyWriteExecute = true; + # serviceConfig.NoNewPrivileges = true; + # serviceConfig.ProtectClock = true; + # serviceConfig.ProtectHostname = true; + # serviceConfig.RemoveIPC = true; + # serviceConfig.RestrictAddressFamilies = "AF_INET AF_INET6 AF_NETLINK"; + # #VVV this includes anything it reads from, e.g. /bin/sh; /nix/store/... + # # see `systemd-analyze filesystems` for a full list + # serviceConfig.RestrictFileSystems = "@common-block @basic-api"; + # serviceConfig.RestrictRealtime = true; + # serviceConfig.RestrictSUIDSGID = true; + # serviceConfig.SystemCallArchitectures = "native"; + # serviceConfig.SystemCallFilter = [ + # "@system-service" + # "@sandbox" + # "~@chown" + # "~@cpu-emulation" + # "~@keyring" + # ]; + # serviceConfig.DevicePolicy = "closed"; # only allow /dev/{null,zero,full,random,urandom} + # # serviceConfig.DeviceAllow = "/dev/..."; + # serviceConfig.RestrictNamespaces = true; + # }; # networking.firewall.extraCommands = with pkgs; '' # # wireguard packet marking. without this, rpfilter drops responses from a wireguard VPN