trust-dns: hook NetworkManager for state changes

there may be some edgecases to sort out around e.g. first-run,
but so far it seems to be importing the DHCP search zones :)
This commit is contained in:
Colin 2024-05-14 00:02:46 +00:00
parent bee3eea040
commit 53198128e8
5 changed files with 202 additions and 4 deletions

View File

@ -50,6 +50,8 @@
sane.services.trust-dns.instances.localhost = {
listenAddrs = [ "127.0.0.1" ];
enableRecursiveResolver = true;
# append zones discovered via DHCP to the resolver config.
includes = [ "/var/lib/trust-dns/dhcp-zones.toml" ];
};
networking.nameservers = [
"127.0.0.1"

View File

@ -26,10 +26,21 @@ in
};
};
systemd.services.NetworkManager-wait-online = lib.mkIf cfg.enabled{
systemd.services.NetworkManager-wait-online = lib.mkIf cfg.enabled {
wantedBy = [ "network-online.target" ];
};
systemd.services.NetworkManager-dispatcher = lib.mkIf cfg.enabled {
wantedBy = [ "NetworkManager.service" ];
# to debug, add NM_DISPATCHER_DEBUG_LOG=1
serviceConfig.ExecStart = [
"" # first blank line is to clear the upstream `ExecStart` field.
"${cfg.package}/libexec/nm-dispatcher --persist" # --persist is needed for it to actually run as a daemon
];
serviceConfig.Restart = "always";
serviceConfig.RestartSec = "1s";
};
environment.etc = lib.mkIf cfg.enabled {
"NetworkManager/system-connections".source = "/var/lib/NetworkManager/system-connections";
"NetworkManager/NetworkManager.conf".text = ''

View File

@ -6,6 +6,6 @@
./eg25-manager.nix
./kiwix-serve.nix
./nixserve.nix
./trust-dns.nix
./trust-dns
];
}

View File

@ -1,5 +1,9 @@
{ config, lib, pkgs, ... }:
let
trust-dns-nmhook = pkgs.static-nix-shell.mkPython3Bin {
pname = "trust-dns-nmhook";
srcRoot = ./.;
};
cfg = config.sane.services.trust-dns;
dns = config.sane.dns;
toml = pkgs.formats.toml { };
@ -27,6 +31,14 @@ let
"%AWAN%" = ''"$(cat /var/www/wan.txt)"'';
};
};
includes = mkOption {
type = types.listOf types.str;
default = [];
description = ''
list of paths to cat into the final config.
non-existent paths are skipped.
'';
};
enableRecursiveResolver = mkOption {
type = types.bool;
default = false;
@ -65,7 +77,7 @@ let
};
});
mkSystemdService = flavor: { port, listenAddrs, substitutions, extraConfig, ... }: let
mkSystemdService = flavor: { includes, listenAddrs, port, substitutions, extraConfig, ... }: let
sed = "${pkgs.gnused}/bin/sed";
configTemplate = toml.generate "trust-dns-${flavor}.toml" (
(
@ -86,7 +98,10 @@ let
preStart = lib.concatStringsSep "\n" (
[''
mkdir -p "/var/lib/trust-dns/${flavor}"
${sed} ${subs} -e "" "${configTemplate}" > "${configPath}"
${sed} ${subs} -e "" "${configTemplate}" \
| cat - \
${lib.escapeShellArgs includes} \
> "${configPath}" || true
''] ++ lib.mapAttrsToList (zone: { rendered, ... }: ''
${sed} ${subs} -e "" ${pkgs.writeText "${zone}.zone.in" rendered} \
> "/var/lib/trust-dns/${flavor}/${zone}.zone"
@ -181,5 +196,11 @@ in
cfg.instances
)
];
environment.etc."NetworkManager/dispatcher.d/60-trust-dns-nmhook" = lib.mkIf
(lib.any (c: c.enableRecursiveResolver) (builtins.attrValues cfg.instances))
{
source = "${trust-dns-nmhook}/bin/trust-dns-nmhook";
};
};
}

View File

@ -0,0 +1,164 @@
#!/usr/bin/env nix-shell
#!nix-shell -i python3 -p "python3.withPackages (ps: [ ])" -p systemd
# vim: set filetype=python :
# /etc/NetworkManager/dispatcher.d/trust-dns-nmhook:
# NetworkManager-dispatcher.service calls this script whenever any network changes state.
# this includes when we activate a new network and receive DHCP info.
# specifically, this script propagates DHCP info to my DNS setup,
# ensuring things like "search domains" work (sorta) with my recursive resolver.
#
# NetworkManager-dispatcher invokes this with env vars related to the action/device/connection. notably:
# - DEVICE_IFACE (e.g. "wlp3s0")
# - DHCP4_DOMAIN_NAME_SERVERS (e.g. "1.1.1.1 4.4.4.4")
# - DHCP4_DOMAIN_SEARCH (e.g. "home.lan uninsane.org")
# - IP4_NAMESERVERS (e.g. "1.1.1.1")
# - CONNECTION_ID (e.g. "my-ssid-name")
# - CONNECTION_FILENAME (e.g. "/etc/NetworkManager/system-connections/XfinityWifi.nmconnection")
import argparse
import logging
import os
import subprocess
logger = logging.getLogger(__name__)
DNS_DIR = "/var/lib/trust-dns"
class Ops:
def __init__(self, base_dir: str) -> None:
self.base_dir = base_dir
def read_file(self, path: str) -> str:
return open(os.path.join(self.base_dir, path)).read()
def write_file(self, path: str, contents: str) -> None:
with open(os.path.join(self.base_dir, path), "w") as f:
f.write(contents)
def makedirs(self, path: str) -> None:
os.makedirs(os.path.join(self.base_dir, path), exist_ok=True)
def copy_file(self, from_: str, to_: str) -> None:
contents = self.read_file(from_)
self.write_file(to_, contents)
def exec_(self, cli: list[str]) -> None:
subprocess.check_output(cli)
class DryRunOps(Ops):
def write_file(self, path: str, contents: str) -> None:
logger.info(f"dry-run: not writing '{path}'")
logger.debug(contents)
def makedirs(self, path: str) -> None:
logger.info(f"dry-run: not making dirs '{path}'")
def exec_(self, cli: list[str]) -> None:
logger.info(f"dry-run: not `exec`ing: {' '.join(cli)}")
class NmConfig:
def __init__(self) -> None:
nameservers = os.environ.get("DHCP4_DOMAIN_NAME_SERVERS", "").split(" ")
nameservers = [ns for ns in nameservers if ns]
conn_id = sanitizeName(os.environ.get("CONNECTION_ID", "unknown"))
search_domains = os.environ.get("DHCP4_DOMAIN_SEARCH", "").split(" ")
search_domains = [d for d in search_domains if d]
self.nameservers = nameservers
self.conn_id = conn_id
self.search_domains = search_domains
def sanitizeName(name: str) -> str:
return "".join(c for c in name if c.lower() in "abcdefghijklmnopqrstuvwxyz0123456789_-")
def isValidSearchDomain(domain: str) -> bool:
comps = [c for c in domain.split(".") if c]
if len(comps) >= 2:
# allow any search domain that's not a TLD.
return True
logger.warn(f"invalid search domain {domain}") # if you trigger this, then whitelist the search domain here
# it's ok to have a search domain of any length -- i'm just hesitant to allow hijacking of very large domain spaces.
return False
def formatZone(domain: str, nameservers: list[str]) -> str:
"""
pre-requisites: nameservers is non-empty and no nameserver is "".
domain is the human-friendly domain, trailing dot is optional.
"""
assert nameservers, f"no nameservers for zone {domain}"
if domain[-1] != ".":
domain += "."
lines=(
f'''
[[zones]]
zone = "{domain}"
zone_type = "Forward"
stores = {{ type = "forward", name_servers = [
''')
for i, ns in enumerate(nameservers):
assert ns, "empty nameserver"
if i != 0: lines += ",\n"
lines += f" {{ socket_addr = \"{ns}:53\", protocol = \"udp\", trust_nx_responses = false }}"
lines += '\n]}'
return lines
def main():
logging.basicConfig()
logging.getLogger().setLevel(logging.INFO)
logger.info('invoked')
parser = argparse.ArgumentParser(description='update trust-dns config in response to NetworkManager event')
parser.add_argument('--dns-dir', default=DNS_DIR)
parser.add_argument('--verbose', action='store_true')
parser.add_argument('--dry-run', action='store_true')
parser.add_argument('interface')
parser.add_argument('action', help='name of the NetworkManager action this script is responding to')
args = parser.parse_args()
if args.verbose:
logging.getLogger().setLevel(logging.DEBUG)
if args.dry_run:
ops = DryRunOps(args.dns_dir)
else:
ops = Ops(args.dns_dir)
nm_config = NmConfig()
logger.info(f"dhcp nameservers: '{' '.join(nm_config.nameservers)}'")
logger.info(f"sanitized connection id: '{nm_config.conn_id}'")
logger.info(f"search domains: '{' '.join(nm_config.search_domains)}'")
if args.action not in ["dhcp4-change", "dns-change"]:
logger.info(f"action ({args.action}): no handler")
return
specializedConfig = ""
for domain in nm_config.search_domains:
if isValidSearchDomain(domain) and nm_config.nameservers:
specializedConfig += "\n" + formatZone(domain, nm_config.nameservers)
# TODO: i'm not sure how this behaves in the presence of multiple interfaces.
# do i want to persist config-per-interface, and then merge them, on every change?
connConfigPath = f"nmhook/{nm_config.conn_id}-dhcp.toml"
ops.makedirs("nmhook")
ops.write_file(connConfigPath, specializedConfig)
ops.copy_file(connConfigPath, "dhcp-zones.toml")
ops.exec_([
"systemctl",
"restart",
"trust-dns-localhost",
])
if __name__ == '__main__':
try:
main()
except Exception as e:
# catch exceptions here and always return `0`, so NetworkManager-dispatcher doesn't abort
logger.info(f"caught exception: {e}")
logging.exception(e)