2024-01-21 00:49:34 +00:00
# debugging:
# - `journalctl -u systemd-networkd`
#
# docs:
# - wireguard (nixos): <https://nixos.wiki/wiki/WireGuard#Setting_up_WireGuard_with_systemd-networkd>
# - wireguard (arch): <https://wiki.archlinux.org/title/WireGuard>
2024-02-20 20:40:46 +00:00
#
# to route all internet traffic through a VPN endpoint, run `systemctl start vpn-${vpnName}`
# to show the routing table: `ip rule`
# to show the NAT rules used for bridging: `sudo iptables -t nat --list-rules -v`
#
# the rough idea here is:
# 1. each VPN has an IP address: if we originate a packet, and the source address is the VPN's address, then it gets routed over the VPN trivially.
# 2a. create a bridge device for each VPN. traffic exiting that bridge is source-NAT'd to the VPN's address.
# 2b. to route all traffic for a specific application (or container), give it access to only the bridge device.
# 3a. create a separate routing table for each VPN, with table id = ID.
# 3b. if a packet enters the VPN's table then it will be routed via the VPN.
# 3c. to apply a VPN to all internet traffic, system-wide, a rule is added that forces each packet to enter that VPN's routing table.
# - that's done with `systemctl start vpn-$VPN`.
# - the VPN acts as the default route. so traffic destined to e.g. a LAN device do not traverse the VPN in this case. only internet traffic is VPN'd.
2024-01-21 00:49:34 +00:00
{ config , lib , pkgs , sane-lib , . . . }:
let
cfg = config . sane . vpn ;
2024-01-21 01:04:31 +00:00
vpnOpts = with lib ; types . submodule ( { name , config , . . . }: {
2024-01-21 00:49:34 +00:00
options = {
2024-01-21 01:04:31 +00:00
name = mkOption {
type = types . str ;
description = ''
read-only value : must match the attrName of this vpn .
'' ;
} ;
2024-01-21 00:49:34 +00:00
id = mkOption {
2024-01-21 00:57:46 +00:00
type = types . ints . between 1 99 ;
2024-01-21 00:49:34 +00:00
description = ''
unique integer identifier for this VPN .
lower number = higher priority , in many senses .
lowest number = default VPN to use when no other is specified , or when multiple are enabled in the same circumstance .
2024-01-21 00:57:46 +00:00
'' ;
} ;
2024-02-20 22:16:23 +00:00
fwmark = mkOption {
type = types . int ;
} ;
priorityMain = mkOption {
type = types . int ;
} ;
priorityWgTable = mkOption {
type = types . int ;
} ;
priorityFwMark = mkOption {
type = types . int ;
} ;
isDefault = mkOption {
2024-01-21 00:57:46 +00:00
type = types . bool ;
description = ''
read-only value : set based on whichever VPN has the lowest id .
2024-01-21 00:49:34 +00:00
'' ;
} ;
endpoint = mkOption {
type = types . str ;
description = ''
host:port which hosts the other end of the VPN .
e . g . " v p n . e x a m p l e . c o m : 5 5 2 8 0 "
'' ;
} ;
publicKey = mkOption {
type = types . str ;
description = ''
pubkey of the remote peer .
'' ;
} ;
addrV4 = mkOption {
type = types . str ;
description = ''
IP address of my end of the VPN .
e . g . " 1 7 2 . 2 7 . 1 2 . 3 4 "
'' ;
} ;
dns = mkOption {
type = types . listOf types . str ;
default = [
" 4 6 . 2 2 7 . 6 7 . 1 3 4 "
" 1 9 2 . 1 6 5 . 9 . 1 5 8 "
] ;
description = ''
dns servers to use for traffic associated with this VPN .
'' ;
} ;
2024-01-21 03:45:39 +00:00
bridgeDevice = mkOption {
type = types . str ;
default = " b r - ${ name } " ;
description = ''
name of the bridge net device which will be created and configured so as to route all its outbound traffic over the VPN .
'' ;
} ;
2024-01-21 00:49:34 +00:00
privateKeyFile = mkOption {
type = types . either types . str types . path ;
description = ''
path to the private key for my end of the VPN .
e . g . " / r u n / s e c r e t s / w g - h o m e . p r i v "
'' ;
} ;
} ;
2024-01-21 00:57:46 +00:00
config = {
2024-01-21 01:04:31 +00:00
inherit name ;
2024-02-20 22:16:23 +00:00
isDefault = builtins . all ( other : config . id <= other . id ) ( builtins . attrValues cfg ) ;
fwmark = config . id + 10000 ;
priorityMain = config . id + 100 ;
priorityWgTable = config . id + 200 ;
priorityFwMark = config . id + 300 ;
2024-01-21 00:57:46 +00:00
} ;
2024-01-21 01:04:31 +00:00
} ) ;
2024-02-20 22:16:23 +00:00
mkVpnConfig = name : { id , dns , endpoint , publicKey , addrV4 , privateKeyFile , bridgeDevice , priorityMain , priorityWgTable , priorityFwMark , fwmark , . . . }: let
2024-01-21 00:49:34 +00:00
bridgeAddrV4 = " 1 0 . 2 0 . ${ builtins . toString id } . 1 / 2 4 " ;
in {
2024-01-21 00:57:46 +00:00
assertions = [
{
assertion = ( lib . count ( c : c . id == id ) ( builtins . attrValues cfg ) ) == 1 ;
message = " m u l t i p l e V P N s s h a r e i d ${ id } " ;
}
] ;
2024-02-20 20:40:46 +00:00
2024-01-21 00:49:34 +00:00
systemd . network . netdevs . " 9 8 - ${ name } " = {
# see: `man 5 systemd.netdev`
netdevConfig = {
Kind = " w i r e g u a r d " ;
Name = name ;
} ;
wireguardConfig = {
PrivateKeyFile = privateKeyFile ;
2024-02-20 20:40:46 +00:00
FirewallMark = fwmark ;
2024-01-21 00:49:34 +00:00
} ;
wireguardPeers = [ {
wireguardPeerConfig = {
AllowedIPs = [
" 0 . 0 . 0 . 0 / 0 "
" : : / 0 "
] ;
Endpoint = endpoint ;
PublicKey = publicKey ;
} ;
} ] ;
} ;
2024-02-20 20:40:46 +00:00
2024-01-21 00:49:34 +00:00
systemd . network . networks . " 5 0 - ${ name } " = {
# see: `man 5 systemd.network`
matchConfig . Name = name ;
networkConfig . Address = [ addrV4 ] ;
networkConfig . DNS = dns ;
# TODO: `sane-vpn up <vpn>` should configure DNS to be sent over the VPN
# DNSDefaultRoute: system DNS queries are sent to this link's DNS server
# networkConfig.DNSDefaultRoute = true;
# Domains = ~.: system DNS queries are sent to this link's DNS server
# networkConfig.Domains = "~.";
routingPolicyRules = [
{
routingPolicyRuleConfig = {
2024-02-20 20:40:46 +00:00
# send traffic from the container bridge out over the VPN.
2024-01-21 00:49:34 +00:00
# the bridge itself does source nat (SNAT) to rewrite the packet source address to that of the VPNs
# -- but that happens in POSTROUTING: *after* the kernel decides which interface to give the packet to next.
# therefore, we have to route here based on the packet's address as it is in PREROUTING, i.e. the bridge address. weird!
2024-02-20 22:16:23 +00:00
Priority = priorityWgTable ;
2024-01-21 00:49:34 +00:00
From = bridgeAddrV4 ;
Table = id ;
} ;
}
] ;
routes = [ {
routeConfig . Table = id ;
routeConfig . Scope = " l i n k " ;
routeConfig . Destination = " 0 . 0 . 0 . 0 / 0 " ;
routeConfig . Source = addrV4 ;
} ] ;
# RequiredForOnline => should `systemd-networkd-wait-online` fail if this network can't come up?
linkConfig . RequiredForOnline = false ;
} ;
2024-01-21 03:45:39 +00:00
systemd . network . netdevs . " 9 9 - ${ bridgeDevice } " = {
2024-01-21 00:49:34 +00:00
netdevConfig . Kind = " b r i d g e " ;
2024-01-21 03:45:39 +00:00
netdevConfig . Name = bridgeDevice ;
2024-01-21 00:49:34 +00:00
} ;
2024-02-20 20:40:46 +00:00
2024-01-21 03:45:39 +00:00
systemd . network . networks . " 5 1 - ${ bridgeDevice } " = {
matchConfig . Name = bridgeDevice ;
2024-01-21 00:49:34 +00:00
networkConfig . Description = " N A T s i n b o u n d t r a f f i c t o ${ name } , i n t e n d e d f o r c o n t a i n e r i s o l a t i o n " ;
networkConfig . Address = [ bridgeAddrV4 ] ;
networkConfig . DNS = dns ;
# ConfigureWithoutCarrier: a bridge with no attached interface has no carrier (this is to be expected).
# systemd ordinarily waits for a carrier to be present before "configuring" the bridge.
# i tell it to not do that, so that i can assign an IP address to the bridge before i connect it to anything.
# alternative may be to issue the bridge a static MACAddress?
# see: <https://github.com/systemd/systemd/issues/9252#issuecomment-808710185>
networkConfig . ConfigureWithoutCarrier = true ;
# networkConfig.DHCPServer = true;
linkConfig . RequiredForOnline = false ;
} ;
2024-02-20 20:40:46 +00:00
2024-01-21 00:49:34 +00:00
networking . localCommands = with pkgs ; ''
# view rules with:
# - `sudo iptables -t nat --list-rules -v`
# rewrite the source address of every packet leaving the container so that it's routable by the wg tunnel.
# note that this alone doesn't get it routed *to* the wg device. we can't, since SNAT is POSTROUTING (not PREROUTING).
# that part's done by an `ip rule` entry.
$ { iptables } /bin/iptables - A POSTROUTING - t nat - s $ { bridgeAddrV4 } - j SNAT - - to-source $ { addrV4 }
'' ;
# linux will drop inbound packets if it thinks a reply to that packet wouldn't exit via the same interface (rpfilter).
# wg-quick has a solution via `iptables -j CONNMARK`, and that does work for system-wide VPNs,
# but i couldn't get that to work for firejail/netns with SNAT, so set rpfilter to "loose".
networking . firewall . checkReversePath = " l o o s e " ;
# networking.firewall.extraCommands = with pkgs; ''
# # wireguard packet marking. without this, rpfilter drops responses from a wireguard VPN
# # because the "reverse path check" fails (i.e. it thinks a response to the packet would go out via a different interface than what the wireguard packet arrived at).
# # debug with e.g. `iptables --list -v -n -t mangle`
# # - and `networking.firewall.logReversePathDrops = true;`, `networking.firewall.logRefusedPackets = true;`
# # - and `journalctl -k` to see dropped packets
# #
# # note that wg-quick also adds a rule to reject non-local traffic from all interfaces EXCEPT the tunnel.
# # that may protect against actors trying to probe us: actors we connect to via wireguard who send their response packets (speculatively) to our plaintext IP to see if we accept them.
# # but that's fairly low concern, and firewalling by the gateway/NAT helps protect against that already.
# ${iptables}/bin/iptables -t mangle -I PREROUTING 1 -i ${name} -m mark --mark 0 -j CONNMARK --restore-mark
# ${iptables}/bin/iptables -t mangle -A POSTROUTING -o ${name} -m mark --mark ${builtins.toString id} -j CONNMARK --save-mark
# '';
} ;
in
{
options = with lib ; {
sane . vpn = mkOption {
type = types . attrsOf vpnOpts ;
default = { } ;
} ;
} ;
config = let
configs = lib . mapAttrsToList mkVpnConfig cfg ;
take = f : {
2024-01-21 00:57:46 +00:00
assertions = f . assertions ;
2024-01-21 00:49:34 +00:00
networking . firewall . checkReversePath = f . networking . firewall . checkReversePath ;
networking . localCommands = f . networking . localCommands ;
systemd . network = f . systemd . network ;
systemd . services = f . systemd . services ;
} ;
in take ( sane-lib . mkTypedMerge take configs ) ;
}