netns: configure the device in a way that should allow named endpoints to be resolved outside the netns

This commit is contained in:
2024-11-11 02:19:00 +00:00
parent 95d9db3973
commit 23913c9cd2
2 changed files with 72 additions and 52 deletions

View File

@@ -163,22 +163,40 @@ let
systemd.services."netns-${name}-wg" = { systemd.services."netns-${name}-wg" = {
description = "configure the wireguard device which provides ${name} with an IP"; description = "configure the wireguard device which provides ${name} with an IP";
wantedBy = [ "netns-${name}.target" ]; wantedBy = [ "netns-${name}.target" ];
before = [ "netns-${name}.target" ];
after = [ "netns-${name}.service" ];
partOf = [ "netns-${name}.service" ]; partOf = [ "netns-${name}.service" ];
before = [ "netns-${name}.target" ];
after = [
"netns-${name}.service"
# in case the endpoint is a domain or host name, wait for the DNS resolver to be available
# before even trying configure the device. not strictly necessary, just avoids wasting resources/retries.
"nss-lookup.target"
];
serviceConfig.Type = "oneshot"; serviceConfig.Type = "oneshot";
serviceConfig.RemainAfterExit = true; serviceConfig.RemainAfterExit = true;
serviceConfig.Restart = "on-failure";
serviceConfig.RestartSec = "10s";
serviceConfig.RestartMaxDelaySec = "180s";
serviceConfig.RestartSteps = 9; # roughly: 10s, 30s, 50s, ... 180s, then keep the 180s retry
script = '' script = ''
${ip} link add wg-${name} type wireguard ${ip} link add wg-${name} type wireguard
${ip} link set wg-${name} netns ${name} # resolve the endpoint *now*, from a namespace which can do DNS lookups, before moving it into its destination netns
${in-ns} ${ip} address add ${netnsPubIpv4} dev wg-${name} # at this point, our wg device can neither send nor receive traffic, because we haven't given it a private key.
${in-ns} ${wg'} set wg-${name} private-key ${wg.privateKeyFile} # hence, it's 100% safe to configure peers even inside the root ns at this point.
${in-ns} ${ip} link set up dev wg-${name} #
# N.B.: `wg` resolves the endpoint _immediately_; it doesn't save DNS info into the device at all,
# TODO: endpoint configuration should be split out into a retryable thing, capable of DNS lookups # so the possibility of any code not visible here trying to re-resolve the endpoint at a later time
${in-ns} ${wg'} set wg-${name} peer ${wg.peer.publicKey} endpoint ${wg.peer.endpoint} \ # (i.e. from within the namespace) is 0.
${wg'} set wg-${name} peer ${wg.peer.publicKey} endpoint ${wg.peer.endpoint} \
persistent-keepalive 25 \ persistent-keepalive 25 \
allowed-ips 0.0.0.0/0 allowed-ips 0.0.0.0/0
${ip} link set wg-${name} netns ${name}
${in-ns} ${wg'} set wg-${name} private-key ${wg.privateKeyFile}
${in-ns} ${ip} address add ${netnsPubIpv4} dev wg-${name}
${in-ns} ${ip} link set up dev wg-${name}
# in the namespace, make this device the default route
${in-ns} ${ip} route replace 0.0.0.0/0 dev wg-${name} table main ${in-ns} ${ip} route replace 0.0.0.0/0 dev wg-${name} table main
''; '';
serviceConfig.ExecStopPost = [ serviceConfig.ExecStopPost = [

View File

@@ -188,49 +188,51 @@ let
# but i couldn't get that to work for netns with SNAT, so set rpfilter to "loose". # but i couldn't get that to work for netns with SNAT, so set rpfilter to "loose".
networking.firewall.checkReversePath = "loose"; networking.firewall.checkReversePath = "loose";
systemd.services."${name}-refresh" = { # XXX: all my wireguard DNS endpoints are static at the moment, so refresh logic isn't needed.
# periodically re-apply peers, to ensure DNS mappings stay fresh # re-enable this should that ever change.
# borrowed from <repo:nixos/nixpkgs:nixos/modules/services/networking/wireguard.nix> # systemd.services."${name}-refresh" = {
wantedBy = [ "network.target" ]; # # periodically re-apply peers, to ensure DNS mappings stay fresh
path = [ config.sane.programs.wireguard-tools.package ]; # # borrowed from <repo:nixos/nixpkgs:nixos/modules/services/networking/wireguard.nix>
serviceConfig.Restart = "always"; # wantedBy = [ "network.target" ];
serviceConfig.RestartSec = "60"; #< retry delay when we fail (because e.g. there's no network) # path = [ config.sane.programs.wireguard-tools.package ];
serviceConfig.Type = "simple"; # serviceConfig.Restart = "always";
unitConfig.StartLimitIntervalSec = 0; # serviceConfig.RestartSec = "60"; #< retry delay when we fail (because e.g. there's no network)
script = '' # serviceConfig.Type = "simple";
while wg set ${name} peer ${publicKey} endpoint ${endpoint}; do # unitConfig.StartLimitIntervalSec = 0;
echo "${name} set to:" "$(wg show ${name} endpoints)" # script = ''
# in the normal case that DNS resolves, and whatnot, sleep before the next attempt # while wg set ${name} peer ${publicKey} endpoint ${endpoint}; do
sleep 180 # echo "${name} set to:" "$(wg show ${name} endpoints)"
done # # in the normal case that DNS resolves, and whatnot, sleep before the next attempt
''; # sleep 180
# systemd hardening (systemd-analyze security wg-home-refresh.service) # done
serviceConfig.AmbientCapabilities = "CAP_NET_ADMIN"; # '';
serviceConfig.CapabilityBoundingSet = "CAP_NET_ADMIN"; # # systemd hardening (systemd-analyze security wg-home-refresh.service)
serviceConfig.LockPersonality = true; # serviceConfig.AmbientCapabilities = "CAP_NET_ADMIN";
serviceConfig.MemoryDenyWriteExecute = true; # serviceConfig.CapabilityBoundingSet = "CAP_NET_ADMIN";
serviceConfig.NoNewPrivileges = true; # serviceConfig.LockPersonality = true;
serviceConfig.ProtectClock = true; # serviceConfig.MemoryDenyWriteExecute = true;
serviceConfig.ProtectHostname = true; # serviceConfig.NoNewPrivileges = true;
serviceConfig.RemoveIPC = true; # serviceConfig.ProtectClock = true;
serviceConfig.RestrictAddressFamilies = "AF_INET AF_INET6 AF_NETLINK"; # serviceConfig.ProtectHostname = true;
#VVV this includes anything it reads from, e.g. /bin/sh; /nix/store/... # serviceConfig.RemoveIPC = true;
# see `systemd-analyze filesystems` for a full list # serviceConfig.RestrictAddressFamilies = "AF_INET AF_INET6 AF_NETLINK";
serviceConfig.RestrictFileSystems = "@common-block @basic-api"; # #VVV this includes anything it reads from, e.g. /bin/sh; /nix/store/...
serviceConfig.RestrictRealtime = true; # # see `systemd-analyze filesystems` for a full list
serviceConfig.RestrictSUIDSGID = true; # serviceConfig.RestrictFileSystems = "@common-block @basic-api";
serviceConfig.SystemCallArchitectures = "native"; # serviceConfig.RestrictRealtime = true;
serviceConfig.SystemCallFilter = [ # serviceConfig.RestrictSUIDSGID = true;
"@system-service" # serviceConfig.SystemCallArchitectures = "native";
"@sandbox" # serviceConfig.SystemCallFilter = [
"~@chown" # "@system-service"
"~@cpu-emulation" # "@sandbox"
"~@keyring" # "~@chown"
]; # "~@cpu-emulation"
serviceConfig.DevicePolicy = "closed"; # only allow /dev/{null,zero,full,random,urandom} # "~@keyring"
# serviceConfig.DeviceAllow = "/dev/..."; # ];
serviceConfig.RestrictNamespaces = true; # serviceConfig.DevicePolicy = "closed"; # only allow /dev/{null,zero,full,random,urandom}
}; # # serviceConfig.DeviceAllow = "/dev/...";
# serviceConfig.RestrictNamespaces = true;
# };
# networking.firewall.extraCommands = with pkgs; '' # networking.firewall.extraCommands = with pkgs; ''
# # wireguard packet marking. without this, rpfilter drops responses from a wireguard VPN # # wireguard packet marking. without this, rpfilter drops responses from a wireguard VPN