From 53198128e85737cabe4436ff172d229368fbfda4 Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 14 May 2024 00:02:46 +0000 Subject: [PATCH] trust-dns: hook NetworkManager for state changes there may be some edgecases to sort out around e.g. first-run, but so far it seems to be importing the DHCP search zones :) --- hosts/common/net/dns.nix | 2 + hosts/common/programs/networkmanager.nix | 13 +- modules/services/default.nix | 2 +- .../{trust-dns.nix => trust-dns/default.nix} | 25 ++- modules/services/trust-dns/trust-dns-nmhook | 164 ++++++++++++++++++ 5 files changed, 202 insertions(+), 4 deletions(-) rename modules/services/{trust-dns.nix => trust-dns/default.nix} (87%) create mode 100755 modules/services/trust-dns/trust-dns-nmhook diff --git a/hosts/common/net/dns.nix b/hosts/common/net/dns.nix index 98082f18..f569e6bd 100644 --- a/hosts/common/net/dns.nix +++ b/hosts/common/net/dns.nix @@ -50,6 +50,8 @@ sane.services.trust-dns.instances.localhost = { listenAddrs = [ "127.0.0.1" ]; enableRecursiveResolver = true; + # append zones discovered via DHCP to the resolver config. + includes = [ "/var/lib/trust-dns/dhcp-zones.toml" ]; }; networking.nameservers = [ "127.0.0.1" diff --git a/hosts/common/programs/networkmanager.nix b/hosts/common/programs/networkmanager.nix index e93b157f..6d2e94d4 100644 --- a/hosts/common/programs/networkmanager.nix +++ b/hosts/common/programs/networkmanager.nix @@ -26,10 +26,21 @@ in }; }; - systemd.services.NetworkManager-wait-online = lib.mkIf cfg.enabled{ + systemd.services.NetworkManager-wait-online = lib.mkIf cfg.enabled { wantedBy = [ "network-online.target" ]; }; + systemd.services.NetworkManager-dispatcher = lib.mkIf cfg.enabled { + wantedBy = [ "NetworkManager.service" ]; + # to debug, add NM_DISPATCHER_DEBUG_LOG=1 + serviceConfig.ExecStart = [ + "" # first blank line is to clear the upstream `ExecStart` field. + "${cfg.package}/libexec/nm-dispatcher --persist" # --persist is needed for it to actually run as a daemon + ]; + serviceConfig.Restart = "always"; + serviceConfig.RestartSec = "1s"; + }; + environment.etc = lib.mkIf cfg.enabled { "NetworkManager/system-connections".source = "/var/lib/NetworkManager/system-connections"; "NetworkManager/NetworkManager.conf".text = '' diff --git a/modules/services/default.nix b/modules/services/default.nix index b6b7b9a0..b8b16dba 100644 --- a/modules/services/default.nix +++ b/modules/services/default.nix @@ -6,6 +6,6 @@ ./eg25-manager.nix ./kiwix-serve.nix ./nixserve.nix - ./trust-dns.nix + ./trust-dns ]; } diff --git a/modules/services/trust-dns.nix b/modules/services/trust-dns/default.nix similarity index 87% rename from modules/services/trust-dns.nix rename to modules/services/trust-dns/default.nix index 90cddc03..3cb0a55b 100644 --- a/modules/services/trust-dns.nix +++ b/modules/services/trust-dns/default.nix @@ -1,5 +1,9 @@ { config, lib, pkgs, ... }: let + trust-dns-nmhook = pkgs.static-nix-shell.mkPython3Bin { + pname = "trust-dns-nmhook"; + srcRoot = ./.; + }; cfg = config.sane.services.trust-dns; dns = config.sane.dns; toml = pkgs.formats.toml { }; @@ -27,6 +31,14 @@ let "%AWAN%" = ''"$(cat /var/www/wan.txt)"''; }; }; + includes = mkOption { + type = types.listOf types.str; + default = []; + description = '' + list of paths to cat into the final config. + non-existent paths are skipped. + ''; + }; enableRecursiveResolver = mkOption { type = types.bool; default = false; @@ -65,7 +77,7 @@ let }; }); - mkSystemdService = flavor: { port, listenAddrs, substitutions, extraConfig, ... }: let + mkSystemdService = flavor: { includes, listenAddrs, port, substitutions, extraConfig, ... }: let sed = "${pkgs.gnused}/bin/sed"; configTemplate = toml.generate "trust-dns-${flavor}.toml" ( ( @@ -86,7 +98,10 @@ let preStart = lib.concatStringsSep "\n" ( ['' mkdir -p "/var/lib/trust-dns/${flavor}" - ${sed} ${subs} -e "" "${configTemplate}" > "${configPath}" + ${sed} ${subs} -e "" "${configTemplate}" \ + | cat - \ + ${lib.escapeShellArgs includes} \ + > "${configPath}" || true ''] ++ lib.mapAttrsToList (zone: { rendered, ... }: '' ${sed} ${subs} -e "" ${pkgs.writeText "${zone}.zone.in" rendered} \ > "/var/lib/trust-dns/${flavor}/${zone}.zone" @@ -181,5 +196,11 @@ in cfg.instances ) ]; + + environment.etc."NetworkManager/dispatcher.d/60-trust-dns-nmhook" = lib.mkIf + (lib.any (c: c.enableRecursiveResolver) (builtins.attrValues cfg.instances)) + { + source = "${trust-dns-nmhook}/bin/trust-dns-nmhook"; + }; }; } diff --git a/modules/services/trust-dns/trust-dns-nmhook b/modules/services/trust-dns/trust-dns-nmhook new file mode 100755 index 00000000..0fdcd270 --- /dev/null +++ b/modules/services/trust-dns/trust-dns-nmhook @@ -0,0 +1,164 @@ +#!/usr/bin/env nix-shell +#!nix-shell -i python3 -p "python3.withPackages (ps: [ ])" -p systemd +# vim: set filetype=python : + +# /etc/NetworkManager/dispatcher.d/trust-dns-nmhook: +# NetworkManager-dispatcher.service calls this script whenever any network changes state. +# this includes when we activate a new network and receive DHCP info. +# specifically, this script propagates DHCP info to my DNS setup, +# ensuring things like "search domains" work (sorta) with my recursive resolver. +# +# NetworkManager-dispatcher invokes this with env vars related to the action/device/connection. notably: +# - DEVICE_IFACE (e.g. "wlp3s0") +# - DHCP4_DOMAIN_NAME_SERVERS (e.g. "1.1.1.1 4.4.4.4") +# - DHCP4_DOMAIN_SEARCH (e.g. "home.lan uninsane.org") +# - IP4_NAMESERVERS (e.g. "1.1.1.1") +# - CONNECTION_ID (e.g. "my-ssid-name") +# - CONNECTION_FILENAME (e.g. "/etc/NetworkManager/system-connections/XfinityWifi.nmconnection") + +import argparse +import logging +import os +import subprocess + +logger = logging.getLogger(__name__) + +DNS_DIR = "/var/lib/trust-dns" + +class Ops: + def __init__(self, base_dir: str) -> None: + self.base_dir = base_dir + + def read_file(self, path: str) -> str: + return open(os.path.join(self.base_dir, path)).read() + + def write_file(self, path: str, contents: str) -> None: + with open(os.path.join(self.base_dir, path), "w") as f: + f.write(contents) + + def makedirs(self, path: str) -> None: + os.makedirs(os.path.join(self.base_dir, path), exist_ok=True) + + def copy_file(self, from_: str, to_: str) -> None: + contents = self.read_file(from_) + self.write_file(to_, contents) + + def exec_(self, cli: list[str]) -> None: + subprocess.check_output(cli) + +class DryRunOps(Ops): + def write_file(self, path: str, contents: str) -> None: + logger.info(f"dry-run: not writing '{path}'") + logger.debug(contents) + + def makedirs(self, path: str) -> None: + logger.info(f"dry-run: not making dirs '{path}'") + + def exec_(self, cli: list[str]) -> None: + logger.info(f"dry-run: not `exec`ing: {' '.join(cli)}") + +class NmConfig: + def __init__(self) -> None: + nameservers = os.environ.get("DHCP4_DOMAIN_NAME_SERVERS", "").split(" ") + nameservers = [ns for ns in nameservers if ns] + conn_id = sanitizeName(os.environ.get("CONNECTION_ID", "unknown")) + search_domains = os.environ.get("DHCP4_DOMAIN_SEARCH", "").split(" ") + search_domains = [d for d in search_domains if d] + + self.nameservers = nameservers + self.conn_id = conn_id + self.search_domains = search_domains + + +def sanitizeName(name: str) -> str: + return "".join(c for c in name if c.lower() in "abcdefghijklmnopqrstuvwxyz0123456789_-") + +def isValidSearchDomain(domain: str) -> bool: + comps = [c for c in domain.split(".") if c] + if len(comps) >= 2: + # allow any search domain that's not a TLD. + return True + logger.warn(f"invalid search domain {domain}") # if you trigger this, then whitelist the search domain here + # it's ok to have a search domain of any length -- i'm just hesitant to allow hijacking of very large domain spaces. + return False + +def formatZone(domain: str, nameservers: list[str]) -> str: + """ + pre-requisites: nameservers is non-empty and no nameserver is "". + domain is the human-friendly domain, trailing dot is optional. + """ + assert nameservers, f"no nameservers for zone {domain}" + if domain[-1] != ".": + domain += "." + lines=( +f''' +[[zones]] +zone = "{domain}" +zone_type = "Forward" +stores = {{ type = "forward", name_servers = [ +''') + for i, ns in enumerate(nameservers): + assert ns, "empty nameserver" + if i != 0: lines += ",\n" + lines += f" {{ socket_addr = \"{ns}:53\", protocol = \"udp\", trust_nx_responses = false }}" + lines += '\n]}' + + return lines + + +def main(): + logging.basicConfig() + logging.getLogger().setLevel(logging.INFO) + + logger.info('invoked') + + parser = argparse.ArgumentParser(description='update trust-dns config in response to NetworkManager event') + parser.add_argument('--dns-dir', default=DNS_DIR) + parser.add_argument('--verbose', action='store_true') + parser.add_argument('--dry-run', action='store_true') + parser.add_argument('interface') + parser.add_argument('action', help='name of the NetworkManager action this script is responding to') + + args = parser.parse_args() + + if args.verbose: + logging.getLogger().setLevel(logging.DEBUG) + + if args.dry_run: + ops = DryRunOps(args.dns_dir) + else: + ops = Ops(args.dns_dir) + + nm_config = NmConfig() + logger.info(f"dhcp nameservers: '{' '.join(nm_config.nameservers)}'") + logger.info(f"sanitized connection id: '{nm_config.conn_id}'") + logger.info(f"search domains: '{' '.join(nm_config.search_domains)}'") + + if args.action not in ["dhcp4-change", "dns-change"]: + logger.info(f"action ({args.action}): no handler") + return + + specializedConfig = "" + for domain in nm_config.search_domains: + if isValidSearchDomain(domain) and nm_config.nameservers: + specializedConfig += "\n" + formatZone(domain, nm_config.nameservers) + + # TODO: i'm not sure how this behaves in the presence of multiple interfaces. + # do i want to persist config-per-interface, and then merge them, on every change? + connConfigPath = f"nmhook/{nm_config.conn_id}-dhcp.toml" + ops.makedirs("nmhook") + ops.write_file(connConfigPath, specializedConfig) + ops.copy_file(connConfigPath, "dhcp-zones.toml") + ops.exec_([ + "systemctl", + "restart", + "trust-dns-localhost", + ]) + +if __name__ == '__main__': + try: + main() + except Exception as e: + # catch exceptions here and always return `0`, so NetworkManager-dispatcher doesn't abort + logger.info(f"caught exception: {e}") + logging.exception(e)