Files
nix-files/modules/vpn.nix
2024-12-08 23:12:50 +00:00

273 lines
11 KiB
Nix

# debugging:
# - `journalctl -u systemd-networkd`
# - `networkctl --help`
#
# docs:
# - wireguard (nixos): <https://nixos.wiki/wiki/WireGuard#Setting_up_WireGuard_with_systemd-networkd>
# - wireguard (arch): <https://wiki.archlinux.org/title/WireGuard>
#
# to route all internet traffic through a VPN endpoint, run `sane-vpn up ${vpnName}`
# to route an application's traffic through a VPN: `sane-vpn do ${vpnName} ${command[@]}`
# to show the routing table: `ip rule`
# to show the NAT rules used for bridging: `sudo iptables -t nat --list-rules -v`
# to force a peer address change (e.g. DNS change): `wg set "${interface}" peer "${publicKey}" endpoint "${endpoint}"`
#
# the rough idea here is:
# 1. each VPN has an IP address: if we originate a packet, and the source address is the VPN's address, then it gets routed over the VPN trivially.
# 2a. create a separate routing table for each VPN, with table id = ID.
# 2b. if a packet enters the VPN's table then it will be routed via the VPN.
# 2c. to apply a VPN to all internet traffic, system-wide, a rule is added that forces each packet to enter that VPN's routing table.
# - that's done with `systemctl start vpn-$VPN`.
# - the VPN acts as the default route. so traffic destined to e.g. a LAN device do not traverse the VPN in this case. only internet traffic is VPN'd.
# 3. to apply a VPN to internet traffic selectively, just proxy an applications traffic into the VPN device
# 3a. use a network namespace and a userspace TCP stack (e.g. pasta/slirp4netns).
# 3b. attach the VPN device to a bridge device, then connect that to a network namespace by using a veth pair.
# 3c. just use `bunpen`, which abstracts the above options.
{ config, lib, sane-lib, ... }:
let
cfg = config.sane.vpn;
vpnOpts = with lib; types.submodule ({ name, config, ... }: {
options = {
name = mkOption {
type = types.str;
description = ''
read-only value: must match the attrName of this vpn.
'';
};
id = mkOption {
type = types.ints.between 1 99;
description = ''
unique integer identifier for this VPN.
lower number = higher priority, in many senses.
lowest number = default VPN to use when no other is specified, or when multiple are enabled in the same circumstance.
'';
};
fwmark = mkOption {
type = types.int;
internal = true;
};
# priority*: used externally, by e.g. `sane-vpn`
priorityMain = mkOption {
type = types.int;
internal = true;
};
priorityFwMark = mkOption {
type = types.int;
internal = true;
};
isDefault = mkOption {
type = types.bool;
description = ''
read-only value: set based on whichever VPN has the lowest id.
'';
internal = true;
};
endpoint = mkOption {
type = types.str;
description = ''
host:port which hosts the other end of the VPN.
e.g. "vpn.example.com:55280"
'';
};
keepalive = mkOption {
type = types.bool;
default = false;
description = ''
whether to send periodic packets to keep the NAT alive.
this should only be needed if you want to receive unprompted inbound packets.
'';
};
publicKey = mkOption {
type = types.str;
description = ''
pubkey of the remote peer.
'';
};
addrV4 = mkOption {
type = types.str;
description = ''
IP address of my end of the VPN.
e.g. "172.27.12.34"
'';
};
subnetV4 = mkOption {
type = types.nullOr types.str;
description = ''
subnet dictating the range of IPs which should ALWAYS be routed through this VPN, no matter the system-wide settings.
'';
example = "24";
default = null;
};
dns = mkOption {
type = types.listOf types.str;
default = [
"46.227.67.134"
"192.165.9.158"
];
description = ''
dns servers to use for traffic associated with this VPN.
'';
};
privateKeyFile = mkOption {
type = types.either types.str types.path;
description = ''
path to the private key for my end of the VPN.
e.g. "/run/secrets/wg-home.priv"
'';
};
};
config = {
inherit name;
isDefault = builtins.all (other: config.id <= other.id) (builtins.attrValues cfg);
fwmark = config.id + 10000;
priorityMain = config.id + 100;
priorityFwMark = config.id + 300;
};
});
mkVpnConfig = name: { addrV4, dns, endpoint, fwmark, id, keepalive, privateKeyFile, publicKey, subnetV4, ... }: {
assertions = [
{
assertion = (lib.count (c: c.id == id) (builtins.attrValues cfg)) == 1;
message = "multiple VPNs share id ${id}";
}
];
systemd.network.netdevs."98-${name}" = {
# see: `man 5 systemd.netdev`
netdevConfig = {
Kind = "wireguard";
Name = name;
};
wireguardConfig = {
FirewallMark = fwmark;
PrivateKeyFile = privateKeyFile;
};
wireguardPeers = [
({
AllowedIPs = [
"0.0.0.0/0"
"::/0"
];
Endpoint = endpoint;
PublicKey = publicKey;
} // lib.optionalAttrs keepalive {
PersistentKeepalive = 25;
})
];
};
systemd.network.networks."50-${name}" = {
# see: `man 5 systemd.network`
matchConfig.Name = name;
networkConfig.Address = [ "${addrV4}/32" ];
networkConfig.DNS = dns;
# TODO: `sane-vpn up <vpn>` should configure DNS to be sent over the VPN
# DNSDefaultRoute: system DNS queries are sent to this link's DNS server
# networkConfig.DNSDefaultRoute = true;
# Domains = ~.: system DNS queries are sent to this link's DNS server
# networkConfig.Domains = "~.";
routes = [{
Table = id;
Scope = "link";
Destination = "0.0.0.0/0";
Source = addrV4;
}] ++ lib.optionals (subnetV4 != null) [{
Scope = "link";
Destination = "${addrV4}/${subnetV4}";
Source = addrV4;
}];
# RequiredForOnline => should `systemd-networkd-wait-online` fail if this network can't come up?
linkConfig.RequiredForOnline = false;
};
systemd.network.config.networkConfig.ManageForeignRoutingPolicyRules = false;
# linux will drop inbound packets if it thinks a reply to that packet wouldn't exit via the same interface (rpfilter).
# wg-quick has a solution via `iptables -j CONNMARK`, and that does work for system-wide VPNs,
# but i couldn't get that to work for netns with SNAT, so set rpfilter to "loose".
networking.firewall.checkReversePath = "loose";
# XXX: all my wireguard DNS endpoints are static at the moment, so refresh logic isn't needed.
# re-enable this should that ever change.
# N.B.: systemd will still bring up the device and even the peer if it fails to resolve the endpoint.
# but it seems that it'll try to re-resolve the endpoint again later (unclear how to configure this better).
# systemd.services."${name}-refresh" = {
# # periodically re-apply peers, to ensure DNS mappings stay fresh
# # borrowed from <repo:nixos/nixpkgs:nixos/modules/services/networking/wireguard.nix>
# wantedBy = [ "network.target" ];
# path = [ config.sane.programs.wireguard-tools.package ];
# serviceConfig.Restart = "always";
# serviceConfig.RestartSec = "60"; #< retry delay when we fail (because e.g. there's no network)
# serviceConfig.Type = "simple";
# unitConfig.StartLimitIntervalSec = 0;
# script = ''
# while wg set ${name} peer ${publicKey} endpoint ${endpoint}; do
# echo "${name} set to:" "$(wg show ${name} endpoints)"
# # in the normal case that DNS resolves, and whatnot, sleep before the next attempt
# sleep 180
# done
# '';
# # systemd hardening (systemd-analyze security wg-home-refresh.service)
# serviceConfig.AmbientCapabilities = "CAP_NET_ADMIN";
# serviceConfig.CapabilityBoundingSet = "CAP_NET_ADMIN";
# serviceConfig.LockPersonality = true;
# serviceConfig.MemoryDenyWriteExecute = true;
# serviceConfig.NoNewPrivileges = true;
# serviceConfig.ProtectClock = true;
# serviceConfig.ProtectHostname = true;
# serviceConfig.RemoveIPC = true;
# serviceConfig.RestrictAddressFamilies = "AF_INET AF_INET6 AF_NETLINK";
# #VVV this includes anything it reads from, e.g. /bin/sh; /nix/store/...
# # see `systemd-analyze filesystems` for a full list
# serviceConfig.RestrictFileSystems = "@common-block @basic-api";
# serviceConfig.RestrictRealtime = true;
# serviceConfig.RestrictSUIDSGID = true;
# serviceConfig.SystemCallArchitectures = "native";
# serviceConfig.SystemCallFilter = [
# "@system-service"
# "@sandbox"
# "~@chown"
# "~@cpu-emulation"
# "~@keyring"
# ];
# serviceConfig.DevicePolicy = "closed"; # only allow /dev/{null,zero,full,random,urandom}
# # serviceConfig.DeviceAllow = "/dev/...";
# serviceConfig.RestrictNamespaces = true;
# };
# networking.firewall.extraCommands = with pkgs; ''
# # wireguard packet marking. without this, rpfilter drops responses from a wireguard VPN
# # because the "reverse path check" fails (i.e. it thinks a response to the packet would go out via a different interface than what the wireguard packet arrived at).
# # debug with e.g. `iptables --list -v -n -t mangle`
# # - and `networking.firewall.logReversePathDrops = true;`, `networking.firewall.logRefusedPackets = true;`
# # - and `journalctl -k` to see dropped packets
# #
# # note that wg-quick also adds a rule to reject non-local traffic from all interfaces EXCEPT the tunnel.
# # that may protect against actors trying to probe us: actors we connect to via wireguard who send their response packets (speculatively) to our plaintext IP to see if we accept them.
# # but that's fairly low concern, and firewalling by the gateway/NAT helps protect against that already.
# ${iptables}/bin/iptables -t mangle -I PREROUTING 1 -i ${name} -m mark --mark 0 -j CONNMARK --restore-mark
# ${iptables}/bin/iptables -t mangle -A POSTROUTING -o ${name} -m mark --mark ${builtins.toString id} -j CONNMARK --save-mark
# '';
};
in
{
options = with lib; {
sane.vpn = mkOption {
type = types.attrsOf vpnOpts;
default = {};
};
};
config = let
configs = lib.mapAttrsToList mkVpnConfig cfg;
take = f: {
assertions = f.assertions;
networking.firewall.checkReversePath = f.networking.firewall.checkReversePath;
networking.localCommands = f.networking.localCommands;
systemd.network = f.systemd.network;
systemd.services = f.systemd.services;
};
in take (sane-lib.mkTypedMerge take configs);
}