netns: configure the device in a way that should allow named endpoints to be resolved outside the netns

This commit is contained in:
2024-11-11 02:19:00 +00:00
parent 95d9db3973
commit 23913c9cd2
2 changed files with 72 additions and 52 deletions

View File

@@ -163,22 +163,40 @@ let
systemd.services."netns-${name}-wg" = {
description = "configure the wireguard device which provides ${name} with an IP";
wantedBy = [ "netns-${name}.target" ];
before = [ "netns-${name}.target" ];
after = [ "netns-${name}.service" ];
partOf = [ "netns-${name}.service" ];
before = [ "netns-${name}.target" ];
after = [
"netns-${name}.service"
# in case the endpoint is a domain or host name, wait for the DNS resolver to be available
# before even trying configure the device. not strictly necessary, just avoids wasting resources/retries.
"nss-lookup.target"
];
serviceConfig.Type = "oneshot";
serviceConfig.RemainAfterExit = true;
serviceConfig.Restart = "on-failure";
serviceConfig.RestartSec = "10s";
serviceConfig.RestartMaxDelaySec = "180s";
serviceConfig.RestartSteps = 9; # roughly: 10s, 30s, 50s, ... 180s, then keep the 180s retry
script = ''
${ip} link add wg-${name} type wireguard
${ip} link set wg-${name} netns ${name}
${in-ns} ${ip} address add ${netnsPubIpv4} dev wg-${name}
${in-ns} ${wg'} set wg-${name} private-key ${wg.privateKeyFile}
${in-ns} ${ip} link set up dev wg-${name}
# TODO: endpoint configuration should be split out into a retryable thing, capable of DNS lookups
${in-ns} ${wg'} set wg-${name} peer ${wg.peer.publicKey} endpoint ${wg.peer.endpoint} \
# resolve the endpoint *now*, from a namespace which can do DNS lookups, before moving it into its destination netns
# at this point, our wg device can neither send nor receive traffic, because we haven't given it a private key.
# hence, it's 100% safe to configure peers even inside the root ns at this point.
#
# N.B.: `wg` resolves the endpoint _immediately_; it doesn't save DNS info into the device at all,
# so the possibility of any code not visible here trying to re-resolve the endpoint at a later time
# (i.e. from within the namespace) is 0.
${wg'} set wg-${name} peer ${wg.peer.publicKey} endpoint ${wg.peer.endpoint} \
persistent-keepalive 25 \
allowed-ips 0.0.0.0/0
${ip} link set wg-${name} netns ${name}
${in-ns} ${wg'} set wg-${name} private-key ${wg.privateKeyFile}
${in-ns} ${ip} address add ${netnsPubIpv4} dev wg-${name}
${in-ns} ${ip} link set up dev wg-${name}
# in the namespace, make this device the default route
${in-ns} ${ip} route replace 0.0.0.0/0 dev wg-${name} table main
'';
serviceConfig.ExecStopPost = [

View File

@@ -188,49 +188,51 @@ let
# but i couldn't get that to work for netns with SNAT, so set rpfilter to "loose".
networking.firewall.checkReversePath = "loose";
systemd.services."${name}-refresh" = {
# periodically re-apply peers, to ensure DNS mappings stay fresh
# borrowed from <repo:nixos/nixpkgs:nixos/modules/services/networking/wireguard.nix>
wantedBy = [ "network.target" ];
path = [ config.sane.programs.wireguard-tools.package ];
serviceConfig.Restart = "always";
serviceConfig.RestartSec = "60"; #< retry delay when we fail (because e.g. there's no network)
serviceConfig.Type = "simple";
unitConfig.StartLimitIntervalSec = 0;
script = ''
while wg set ${name} peer ${publicKey} endpoint ${endpoint}; do
echo "${name} set to:" "$(wg show ${name} endpoints)"
# in the normal case that DNS resolves, and whatnot, sleep before the next attempt
sleep 180
done
'';
# systemd hardening (systemd-analyze security wg-home-refresh.service)
serviceConfig.AmbientCapabilities = "CAP_NET_ADMIN";
serviceConfig.CapabilityBoundingSet = "CAP_NET_ADMIN";
serviceConfig.LockPersonality = true;
serviceConfig.MemoryDenyWriteExecute = true;
serviceConfig.NoNewPrivileges = true;
serviceConfig.ProtectClock = true;
serviceConfig.ProtectHostname = true;
serviceConfig.RemoveIPC = true;
serviceConfig.RestrictAddressFamilies = "AF_INET AF_INET6 AF_NETLINK";
#VVV this includes anything it reads from, e.g. /bin/sh; /nix/store/...
# see `systemd-analyze filesystems` for a full list
serviceConfig.RestrictFileSystems = "@common-block @basic-api";
serviceConfig.RestrictRealtime = true;
serviceConfig.RestrictSUIDSGID = true;
serviceConfig.SystemCallArchitectures = "native";
serviceConfig.SystemCallFilter = [
"@system-service"
"@sandbox"
"~@chown"
"~@cpu-emulation"
"~@keyring"
];
serviceConfig.DevicePolicy = "closed"; # only allow /dev/{null,zero,full,random,urandom}
# serviceConfig.DeviceAllow = "/dev/...";
serviceConfig.RestrictNamespaces = true;
};
# XXX: all my wireguard DNS endpoints are static at the moment, so refresh logic isn't needed.
# re-enable this should that ever change.
# systemd.services."${name}-refresh" = {
# # periodically re-apply peers, to ensure DNS mappings stay fresh
# # borrowed from <repo:nixos/nixpkgs:nixos/modules/services/networking/wireguard.nix>
# wantedBy = [ "network.target" ];
# path = [ config.sane.programs.wireguard-tools.package ];
# serviceConfig.Restart = "always";
# serviceConfig.RestartSec = "60"; #< retry delay when we fail (because e.g. there's no network)
# serviceConfig.Type = "simple";
# unitConfig.StartLimitIntervalSec = 0;
# script = ''
# while wg set ${name} peer ${publicKey} endpoint ${endpoint}; do
# echo "${name} set to:" "$(wg show ${name} endpoints)"
# # in the normal case that DNS resolves, and whatnot, sleep before the next attempt
# sleep 180
# done
# '';
# # systemd hardening (systemd-analyze security wg-home-refresh.service)
# serviceConfig.AmbientCapabilities = "CAP_NET_ADMIN";
# serviceConfig.CapabilityBoundingSet = "CAP_NET_ADMIN";
# serviceConfig.LockPersonality = true;
# serviceConfig.MemoryDenyWriteExecute = true;
# serviceConfig.NoNewPrivileges = true;
# serviceConfig.ProtectClock = true;
# serviceConfig.ProtectHostname = true;
# serviceConfig.RemoveIPC = true;
# serviceConfig.RestrictAddressFamilies = "AF_INET AF_INET6 AF_NETLINK";
# #VVV this includes anything it reads from, e.g. /bin/sh; /nix/store/...
# # see `systemd-analyze filesystems` for a full list
# serviceConfig.RestrictFileSystems = "@common-block @basic-api";
# serviceConfig.RestrictRealtime = true;
# serviceConfig.RestrictSUIDSGID = true;
# serviceConfig.SystemCallArchitectures = "native";
# serviceConfig.SystemCallFilter = [
# "@system-service"
# "@sandbox"
# "~@chown"
# "~@cpu-emulation"
# "~@keyring"
# ];
# serviceConfig.DevicePolicy = "closed"; # only allow /dev/{null,zero,full,random,urandom}
# # serviceConfig.DeviceAllow = "/dev/...";
# serviceConfig.RestrictNamespaces = true;
# };
# networking.firewall.extraCommands = with pkgs; ''
# # wireguard packet marking. without this, rpfilter drops responses from a wireguard VPN