Files
nix-files/modules/vpn.nix

262 lines
10 KiB
Nix

# debugging:
# - `journalctl -u systemd-networkd`
# - `networkctl --help`
#
# docs:
# - wireguard (nixos): <https://nixos.wiki/wiki/WireGuard#Setting_up_WireGuard_with_systemd-networkd>
# - wireguard (arch): <https://wiki.archlinux.org/title/WireGuard>
#
# to route all internet traffic through a VPN endpoint, run `sane-vpn up ${vpnName}`
# to route an application's traffic through a VPN: `sane-vpn do ${vpnName} ${command[@]}`
# to show the routing table: `ip rule`
# to show the NAT rules used for bridging: `sudo iptables -t nat --list-rules -v`
# to force a peer address change (e.g. DNS change): `wg set "${interface}" peer "${publicKey}" endpoint "${endpoint}"`
#
# the rough idea here is:
# 1. each VPN has an IP address: if we originate a packet, and the source address is the VPN's address, then it gets routed over the VPN trivially.
# 2a. create a separate routing table for each VPN, with table id = ID.
# 2b. if a packet enters the VPN's table then it will be routed via the VPN.
# 2c. to apply a VPN to all internet traffic, system-wide, a rule is added that forces each packet to enter that VPN's routing table.
# - that's done with `systemctl start vpn-$VPN`.
# - the VPN acts as the default route. so traffic destined to e.g. a LAN device do not traverse the VPN in this case. only internet traffic is VPN'd.
# 3. to apply a VPN to internet traffic selectively, just proxy an applications traffic into the VPN device
# 3a. use a network namespace and a userspace TCP stack (e.g. pasta/slirp4netns).
# 3b. attach the VPN device to a bridge device, then connect that to a network namespace by using a veth pair.
# 3c. juse use `sanebox`, which abstracts the above options.
{ config, lib, pkgs, sane-lib, ... }:
let
cfg = config.sane.vpn;
vpnOpts = with lib; types.submodule ({ name, config, ... }: {
options = {
name = mkOption {
type = types.str;
description = ''
read-only value: must match the attrName of this vpn.
'';
};
id = mkOption {
type = types.ints.between 1 99;
description = ''
unique integer identifier for this VPN.
lower number = higher priority, in many senses.
lowest number = default VPN to use when no other is specified, or when multiple are enabled in the same circumstance.
'';
};
fwmark = mkOption {
type = types.int;
internal = true;
};
# priority*: used externally, by e.g. `sane-vpn`
priorityMain = mkOption {
type = types.int;
internal = true;
};
priorityWgTable = mkOption {
type = types.int;
internal = true;
};
priorityFwMark = mkOption {
type = types.int;
internal = true;
};
isDefault = mkOption {
type = types.bool;
description = ''
read-only value: set based on whichever VPN has the lowest id.
'';
internal = true;
};
endpoint = mkOption {
type = types.str;
description = ''
host:port which hosts the other end of the VPN.
e.g. "vpn.example.com:55280"
'';
};
publicKey = mkOption {
type = types.str;
description = ''
pubkey of the remote peer.
'';
};
addrV4 = mkOption {
type = types.str;
description = ''
IP address of my end of the VPN.
e.g. "172.27.12.34"
'';
};
subnetV4 = mkOption {
type = types.nullOr types.str;
description = ''
subnet dictating the range of IPs which should ALWAYS be routed through this VPN, no matter the system-wide settings.
'';
example = "24";
default = null;
};
dns = mkOption {
type = types.listOf types.str;
default = [
"46.227.67.134"
"192.165.9.158"
];
description = ''
dns servers to use for traffic associated with this VPN.
'';
};
privateKeyFile = mkOption {
type = types.either types.str types.path;
description = ''
path to the private key for my end of the VPN.
e.g. "/run/secrets/wg-home.priv"
'';
};
};
config = {
inherit name;
isDefault = builtins.all (other: config.id <= other.id) (builtins.attrValues cfg);
fwmark = config.id + 10000;
priorityMain = config.id + 100;
priorityWgTable = config.id + 200;
priorityFwMark = config.id + 300;
};
});
mkVpnConfig = name: { addrV4, dns, endpoint, fwmark, id, priorityMain, priorityWgTable, priorityFwMark, privateKeyFile, publicKey, subnetV4, ... }: {
assertions = [
{
assertion = (lib.count (c: c.id == id) (builtins.attrValues cfg)) == 1;
message = "multiple VPNs share id ${id}";
}
];
systemd.network.netdevs."98-${name}" = {
# see: `man 5 systemd.netdev`
netdevConfig = {
Kind = "wireguard";
Name = name;
};
wireguardConfig = {
PrivateKeyFile = privateKeyFile;
FirewallMark = fwmark;
};
wireguardPeers = [{
AllowedIPs = [
"0.0.0.0/0"
"::/0"
];
Endpoint = endpoint;
PublicKey = publicKey;
}];
};
systemd.network.networks."50-${name}" = {
# see: `man 5 systemd.network`
matchConfig.Name = name;
networkConfig.Address = [ "${addrV4}/32" ];
networkConfig.DNS = dns;
# TODO: `sane-vpn up <vpn>` should configure DNS to be sent over the VPN
# DNSDefaultRoute: system DNS queries are sent to this link's DNS server
# networkConfig.DNSDefaultRoute = true;
# Domains = ~.: system DNS queries are sent to this link's DNS server
# networkConfig.Domains = "~.";
routes = [{
Table = id;
Scope = "link";
Destination = "0.0.0.0/0";
Source = addrV4;
}] ++ lib.optionals (subnetV4 != null) [{
Scope = "link";
Destination = "${addrV4}/${subnetV4}";
Source = addrV4;
}];
# RequiredForOnline => should `systemd-networkd-wait-online` fail if this network can't come up?
linkConfig.RequiredForOnline = false;
};
systemd.network.config.networkConfig.ManageForeignRoutingPolicyRules = false;
# linux will drop inbound packets if it thinks a reply to that packet wouldn't exit via the same interface (rpfilter).
# wg-quick has a solution via `iptables -j CONNMARK`, and that does work for system-wide VPNs,
# but i couldn't get that to work for netns with SNAT, so set rpfilter to "loose".
networking.firewall.checkReversePath = "loose";
systemd.services."${name}-refresh" = {
# periodically re-apply peers, to ensure DNS mappings stay fresh
# borrowed from <repo:nixos/nixpkgs:nixos/modules/services/networking/wireguard.nix>
wantedBy = [ "network.target" ];
path = [ config.sane.programs.wireguard-tools.package ];
serviceConfig.Restart = "always";
serviceConfig.RestartSec = "60"; #< retry delay when we fail (because e.g. there's no network)
serviceConfig.Type = "simple";
unitConfig.StartLimitIntervalSec = 0;
script = ''
while wg set ${name} peer ${publicKey} endpoint ${endpoint}; do
echo "${name} set to:" "$(wg show ${name} endpoints)"
# in the normal case that DNS resolves, and whatnot, sleep before the next attempt
sleep 180
done
'';
# systemd hardening (systemd-analyze security wg-home-refresh.service)
serviceConfig.AmbientCapabilities = "CAP_NET_ADMIN";
serviceConfig.CapabilityBoundingSet = "CAP_NET_ADMIN";
serviceConfig.LockPersonality = true;
serviceConfig.MemoryDenyWriteExecute = true;
serviceConfig.NoNewPrivileges = true;
serviceConfig.ProtectClock = true;
serviceConfig.ProtectHostname = true;
serviceConfig.RemoveIPC = true;
serviceConfig.RestrictAddressFamilies = "AF_INET AF_INET6 AF_NETLINK";
#VVV this includes anything it reads from, e.g. /bin/sh; /nix/store/...
# see `systemd-analyze filesystems` for a full list
serviceConfig.RestrictFileSystems = "@common-block @basic-api";
serviceConfig.RestrictRealtime = true;
serviceConfig.RestrictSUIDSGID = true;
serviceConfig.SystemCallArchitectures = "native";
serviceConfig.SystemCallFilter = [
"@system-service"
"@sandbox"
"~@chown"
"~@cpu-emulation"
"~@keyring"
];
serviceConfig.DevicePolicy = "closed"; # only allow /dev/{null,zero,full,random,urandom}
# serviceConfig.DeviceAllow = "/dev/...";
serviceConfig.RestrictNamespaces = true;
};
# networking.firewall.extraCommands = with pkgs; ''
# # wireguard packet marking. without this, rpfilter drops responses from a wireguard VPN
# # because the "reverse path check" fails (i.e. it thinks a response to the packet would go out via a different interface than what the wireguard packet arrived at).
# # debug with e.g. `iptables --list -v -n -t mangle`
# # - and `networking.firewall.logReversePathDrops = true;`, `networking.firewall.logRefusedPackets = true;`
# # - and `journalctl -k` to see dropped packets
# #
# # note that wg-quick also adds a rule to reject non-local traffic from all interfaces EXCEPT the tunnel.
# # that may protect against actors trying to probe us: actors we connect to via wireguard who send their response packets (speculatively) to our plaintext IP to see if we accept them.
# # but that's fairly low concern, and firewalling by the gateway/NAT helps protect against that already.
# ${iptables}/bin/iptables -t mangle -I PREROUTING 1 -i ${name} -m mark --mark 0 -j CONNMARK --restore-mark
# ${iptables}/bin/iptables -t mangle -A POSTROUTING -o ${name} -m mark --mark ${builtins.toString id} -j CONNMARK --save-mark
# '';
};
in
{
options = with lib; {
sane.vpn = mkOption {
type = types.attrsOf vpnOpts;
default = {};
};
};
config = let
configs = lib.mapAttrsToList mkVpnConfig cfg;
take = f: {
assertions = f.assertions;
networking.firewall.checkReversePath = f.networking.firewall.checkReversePath;
networking.localCommands = f.networking.localCommands;
systemd.network = f.systemd.network;
systemd.services = f.systemd.services;
};
in take (sane-lib.mkTypedMerge take configs);
}