refactor: hosts/common/dns: split into separate files

This commit is contained in:
2024-12-03 21:13:15 +00:00
parent 5c69765759
commit 4de9fcc09a
5 changed files with 168 additions and 157 deletions

View File

@@ -2,7 +2,7 @@
{
imports = [
./dns.nix
./dns
./hostnames.nix
./modemmanager.nix
./networkmanager.nix

View File

@@ -1,156 +0,0 @@
# things to consider when changing these parameters:
# - temporary VPN access (`sane-vpn up ...`)
# - servo `ovpns` namespace (it *relies* on /etc/resolv.conf mentioning 127.0.0.53)
#
# components:
# - /etc/nsswitch.conf:
# - glibc uses this to provide `getaddrinfo`, i.e. host -> ip address lookup
# call directly with `getent ahostsv4 www.google.com`
# - `nss` (a component of glibc) is modular: names mentioned in that file are `dlopen`'d (i think that's the mechanism)
# in NixOS, that means _they have to be on LDPATH_.
# - `nscd` is used by NixOS simply to proxy nss requests.
# here, /etc/nsswitch.conf consumers contact nscd via /var/run/nscd/socket.
# in this way, only `nscd` needs to have the nss modules on LDPATH.
# - /etc/resolv.conf
# - contains the DNS servers for a system.
# - historically, NetworkManager would update this file as you switch networks.
# - modern implementations hardcodes `127.0.0.53` and then systemd-resolved proxies everything (and caches).
#
# namespacing:
# - each namespace may use a different /etc/resolv.conf to specify different DNS servers
# - nscd breaks namespacing: the host nscd is unaware of the guest's /etc/resolv.conf, and so directs the guest's DNS requests to the host's servers.
# - this is fixed by either removing `/var/run/nscd/socket` from the namespace, or disabling nscd altogether.
{ config, lib, pkgs, ... }:
lib.mkMerge [
{
sane.services.hickory-dns.enable = lib.mkDefault config.sane.services.hickory-dns.asSystemResolver;
# sane.services.hickory-dns.asSystemResolver = lib.mkDefault true;
}
(lib.mkIf (!config.sane.services.hickory-dns.asSystemResolver) {
services.resolved.enable = lib.mkForce false;
# resolve DNS recursively with Unbound.
services.unbound.enable = lib.mkDefault true;
services.unbound.resolveLocalQueries = false; #< disable, so that i can manage networking.nameservers manually
services.unbound.settings.server.interface = [ "127.0.0.1" ];
services.unbound.settings.server.access-control = [ "127.0.0.0/8 allow" ];
# effectively disable DNSSEC, to avoid a circular dependency between DNS resolution and NTP.
# without this, if the RTC fails, then both time and DNS are unrecoverable.
# if you enable this, make sure to persist the stateful data.
# alternatively, use services.unbound.settings.trust-anchor = ... (or trusted-keys-file)
services.unbound.enableRootTrustAnchor = false;
# root hints: are compiled-in (iterator/iter_hints.c), but sometimes `dig m.root-servers.net.` can *fail*.
# idk what that is about; hopefully manually specifying the hint triggers a less broken code path?
# services.unbound.settings.server.root-hints = "${pkgs.dns-root-data}/root.hints";
# scenario: net blip; unbound caches that a bunch of NS are unreachable; future queries fail
# - <https://forum.opnsense.org/index.php?topic=32852.0>
# infra settings described here:
# - <https://unbound.docs.nlnetlabs.nl/en/latest/reference/history/info-timeout-server-selection.html>
# - unbound keeps RTT estimates for each server.
# - if it sends a query and doesn't hear back based on when it expected, then it *resends* the query, with exponential backoff (i.e. doubling on each attempt)
# - at this point, responses from the old query are *ignored*
# - for new/unknown hosts, a RTT timeout of 376ms is assumed
services.unbound.settings.server.infra-keep-probing = true; #< if unbound fails to reach a host (NS), it by default *does not try again* for 900s. keep-probing tells it to keep trying, with a backoff.
# services.unbound.settings.server.infra-cache-min-rtt = 1000;
# services.unbound.settings.server.infra-cache-max-rtt = 1000;
# services.unbound.settings.server.infra-host-ttl = 30;
# services.unbound.settings.server.disable-dnssec-lame-check = true;
# perf tuning; see: <https://unbound.docs.nlnetlabs.nl/en/latest/topics/core/performance.html>
services.unbound.settings.server.num-threads = 4;
services.unbound.settings.server.so-rcvbuf = "4m"; #< higher value means less likely to drop client queries
services.unbound.settings.server.so-sndbuf = "4m";
# services.unbound.settings.remote-control.control-enable = true; # enable `unbound-control` tool
services.unbound.localControlSocketPath = "/run/unbound/unbound.ctl";
# if a resolution fails, or takes excessively long, reply with expired cache entries
# see: <https://unbound.docs.nlnetlabs.nl/en/latest/topics/core/serve-stale.html#rfc-8767>
# services.unbound.settings.server.serve-expired = true;
# services.unbound.settings.server.serve-expired-ttl = 86400; #< don't serve any records more outdated than this
# services.unbound.settings.server.serve-expired-client-timeout = 2800; #< only serve expired records if the client has been waiting this long, ms
# services.unbound.settings.server.cache-max-negative-ttl = 60; #< intended to limit damage during networking flakes, but instead this seems to cause unbound to cache error responses it *wouldn't* otherwise cache
# services.unbound.settings.server.use-caps-for-id = true; #< TODO: randomizes casing to avoid spoofing
# services.unbound.settings.server.prefetch = true; # prefetch RRs which are about to expire from the cache, to keep them primed
networking.nameservers = [
# be compatible with systemd-resolved
# "127.0.0.53"
# or don't be compatible with systemd-resolved, but with libc and pasta instead
# see <pkgs/by-name/sane-scripts/src/sane-vpn>
"127.0.0.1"
# enable IPv6, or don't, because having just a single name server makes monkey-patching it easier
# "::1"
];
networking.resolvconf.extraConfig = ''
# DNS serviced by `unbound` recursive resolver
name_servers='127.0.0.1'
'';
})
# (lib.mkIf (!config.sane.services.hickory-dns.asSystemResolver && config.sane.services.hickory-dns.enable) {
# # use systemd's stub resolver.
# # /etc/resolv.conf isn't sophisticated enough to use different servers per net namespace (or link).
# # instead, running the stub resolver on a known address in the root ns lets us rewrite packets
# # in servo's ovnps namespace to use the provider's DNS resolvers.
# # a weakness is we can only query 1 NS at a time (unless we were to clone the packets?)
# # TODO: improve hickory-dns recursive resolver and then remove this
# services.resolved.enable = true; #< to disable, set ` = lib.mkForce false`, as other systemd features default to enabling `resolved`.
# # without DNSSEC:
# # - dig matrix.org => works
# # - curl https://matrix.org => works
# # with default DNSSEC:
# # - dig matrix.org => works
# # - curl https://matrix.org => fails
# # i don't know why. this might somehow be interfering with the DNS run on this device (hickory-dns)
# services.resolved.dnssec = "false";
# networking.nameservers = [
# # use systemd-resolved resolver
# # full resolver (which understands /etc/hosts) lives on 127.0.0.53
# # stub resolver (just forwards upstream) lives on 127.0.0.54
# "127.0.0.53"
# ];
# })
{
# nscd -- the Name Service Caching Daemon -- caches DNS query responses
# in a way that's unaware of my VPN routing, so routes are frequently poor against
# services which advertise different IPs based on geolocation.
# nscd claims to be usable without a cache, but in practice i can't get it to not cache!
# nsncd is the Name Service NON-Caching Daemon. it's a drop-in that doesn't cache;
# this is OK on the host -- because systemd-resolved caches. it's probably sub-optimal
# in the netns and we query upstream DNS more often than needed. hm.
# services.nscd.enableNsncd = true;
# disabling nscd LOSES US SOME FUNCTIONALITY. in particular, only the glibc-builtin modules are accessible via /etc/resolv.conf (er, did i mean /etc/nsswitch.conf?).
# - dns: glibc-bultin
# - files: glibc-builtin
# - myhostname: systemd
# - mymachines: systemd
# - resolve: systemd
# in practice, i see no difference with nscd disabled.
# - the exception is when the system dns resolver doesn't do everything.
# for example, systemd-resolved does mDNS. hickory-dns does not. a hickory-dns system won't be mDNS-capable.
# disabling nscd VASTLY simplifies netns and process isolation. see explainer at top of file.
services.nscd.enable = false;
# system.nssModules = lib.mkForce [];
sane.silencedAssertions = [''.*Loading NSS modules from system.nssModules.*requires services.nscd.enable being set to true.*''];
# add NSS modules into their own subdirectory.
# then i can add just the NSS modules library path to the global LD_LIBRARY_PATH, rather than ALL of /run/current-system/sw/lib.
# TODO: i'm doing this so as to achieve mdns DNS resolution (avahi). it would be better to just have hickory-dns delegate .local to avahi
# (except avahi doesn't act as a local resolver over DNS protocol -- only dbus).
environment.systemPackages = [(pkgs.symlinkJoin {
name = "nss-modules";
paths = config.system.nssModules.list;
postBuild = ''
mkdir nss
mv $out/lib/libnss_* nss
rm -rf $out
mkdir -p $out/lib
mv nss $out/lib
'';
})];
environment.variables.LD_LIBRARY_PATH = [ "/run/current-system/sw/lib/nss" ];
systemd.globalEnvironment.LD_LIBRARY_PATH = "/run/current-system/sw/lib/nss"; #< specifically for `geoclue.service`
}
]

View File

@@ -0,0 +1,69 @@
# things to consider when changing these parameters:
# - temporary VPN access (`sane-vpn up ...`)
# - servo `ovpns` namespace (it *relies* on /etc/resolv.conf mentioning 127.0.0.53)
#
# components:
# - /etc/nsswitch.conf:
# - glibc uses this to provide `getaddrinfo`, i.e. host -> ip address lookup
# call directly with `getent ahostsv4 www.google.com`
# - `nss` (a component of glibc) is modular: names mentioned in that file are `dlopen`'d (i think that's the mechanism)
# in NixOS, that means _they have to be on LDPATH_.
# - `nscd` is used by NixOS simply to proxy nss requests.
# here, /etc/nsswitch.conf consumers contact nscd via /var/run/nscd/socket.
# in this way, only `nscd` needs to have the nss modules on LDPATH.
# - /etc/resolv.conf
# - contains the DNS servers for a system.
# - historically, NetworkManager would update this file as you switch networks.
# - modern implementations hardcodes `127.0.0.53` and then systemd-resolved proxies everything (and caches).
#
# namespacing:
# - each namespace may use a different /etc/resolv.conf to specify different DNS servers
# - nscd breaks namespacing: the host nscd is unaware of the guest's /etc/resolv.conf, and so directs the guest's DNS requests to the host's servers.
# - this is fixed by either removing `/var/run/nscd/socket` from the namespace, or disabling nscd altogether.
{ config, pkgs, ... }:
{
imports = [
# ./hickory-dns.nix
./unbound.nix
];
# nscd -- the Name Service Caching Daemon -- caches DNS query responses
# in a way that's unaware of my VPN routing, so routes are frequently poor against
# services which advertise different IPs based on geolocation.
# nscd claims to be usable without a cache, but in practice i can't get it to not cache!
# nsncd is the Name Service NON-Caching Daemon. it's a drop-in that doesn't cache;
# this is OK on the host -- because systemd-resolved caches. it's probably sub-optimal
# in the netns and we query upstream DNS more often than needed. hm.
# services.nscd.enableNsncd = true;
# disabling nscd LOSES US SOME FUNCTIONALITY. in particular, only the glibc-builtin modules are accessible via /etc/resolv.conf (er, did i mean /etc/nsswitch.conf?).
# - dns: glibc-bultin
# - files: glibc-builtin
# - myhostname: systemd
# - mymachines: systemd
# - resolve: systemd
# in practice, i see no difference with nscd disabled.
# - the exception is when the system dns resolver doesn't do everything.
# for example, systemd-resolved does mDNS. hickory-dns does not. a hickory-dns system won't be mDNS-capable.
# disabling nscd VASTLY simplifies netns and process isolation. see explainer at top of file.
services.nscd.enable = false;
# system.nssModules = lib.mkForce [];
sane.silencedAssertions = [''.*Loading NSS modules from system.nssModules.*requires services.nscd.enable being set to true.*''];
# add NSS modules into their own subdirectory.
# then i can add just the NSS modules library path to the global LD_LIBRARY_PATH, rather than ALL of /run/current-system/sw/lib.
# TODO: i'm doing this so as to achieve mdns DNS resolution (avahi). it would be better to just have hickory-dns delegate .local to avahi
# (except avahi doesn't act as a local resolver over DNS protocol -- only dbus).
environment.systemPackages = [(pkgs.symlinkJoin {
name = "nss-modules";
paths = config.system.nssModules.list;
postBuild = ''
mkdir nss
mv $out/lib/libnss_* nss
rm -rf $out
mkdir -p $out/lib
mv nss $out/lib
'';
})];
environment.variables.LD_LIBRARY_PATH = [ "/run/current-system/sw/lib/nss" ];
systemd.globalEnvironment.LD_LIBRARY_PATH = "/run/current-system/sw/lib/nss"; #< specifically for `geoclue.service`
}

View File

@@ -0,0 +1,32 @@
{ config, lib }:
{
config = lib.mkMerge [
{
sane.services.hickory-dns.enable = lib.mkDefault config.sane.services.hickory-dns.asSystemResolver;
# sane.services.hickory-dns.asSystemResolver = lib.mkDefault true;
}
(lib.mkIf (!config.sane.services.hickory-dns.asSystemResolver && config.sane.services.hickory-dns.enable) {
# use systemd's stub resolver.
# /etc/resolv.conf isn't sophisticated enough to use different servers per net namespace (or link).
# instead, running the stub resolver on a known address in the root ns lets us rewrite packets
# in servo's ovnps namespace to use the provider's DNS resolvers.
# a weakness is we can only query 1 NS at a time (unless we were to clone the packets?)
# TODO: improve hickory-dns recursive resolver and then remove this
services.resolved.enable = true; #< to disable, set ` = lib.mkForce false`, as other systemd features default to enabling `resolved`.
# without DNSSEC:
# - dig matrix.org => works
# - curl https://matrix.org => works
# with default DNSSEC:
# - dig matrix.org => works
# - curl https://matrix.org => fails
# i don't know why. this might somehow be interfering with the DNS run on this device (hickory-dns)
services.resolved.dnssec = "false";
networking.nameservers = [
# use systemd-resolved resolver
# full resolver (which understands /etc/hosts) lives on 127.0.0.53
# stub resolver (just forwards upstream) lives on 127.0.0.54
"127.0.0.53"
];
})
];
}

View File

@@ -0,0 +1,66 @@
{ config, lib, ... }: {
config = lib.mkIf (!config.sane.services.hickory-dns.asSystemResolver) {
services.resolved.enable = lib.mkForce false;
# resolve DNS recursively with Unbound.
services.unbound.enable = lib.mkDefault true;
services.unbound.resolveLocalQueries = false; #< disable, so that i can manage networking.nameservers manually
services.unbound.settings.server.interface = [ "127.0.0.1" ];
services.unbound.settings.server.access-control = [ "127.0.0.0/8 allow" ];
# effectively disable DNSSEC, to avoid a circular dependency between DNS resolution and NTP.
# without this, if the RTC fails, then both time and DNS are unrecoverable.
# if you enable this, make sure to persist the stateful data.
# alternatively, use services.unbound.settings.trust-anchor = ... (or trusted-keys-file)
services.unbound.enableRootTrustAnchor = false;
# root hints: are compiled-in (iterator/iter_hints.c), but sometimes `dig m.root-servers.net.` can *fail*.
# idk what that is about; hopefully manually specifying the hint triggers a less broken code path?
# services.unbound.settings.server.root-hints = "${pkgs.dns-root-data}/root.hints";
# scenario: net blip; unbound caches that a bunch of NS are unreachable; future queries fail
# - <https://forum.opnsense.org/index.php?topic=32852.0>
# infra settings described here:
# - <https://unbound.docs.nlnetlabs.nl/en/latest/reference/history/info-timeout-server-selection.html>
# - unbound keeps RTT estimates for each server.
# - if it sends a query and doesn't hear back based on when it expected, then it *resends* the query, with exponential backoff (i.e. doubling on each attempt)
# - at this point, responses from the old query are *ignored*
# - for new/unknown hosts, a RTT timeout of 376ms is assumed
services.unbound.settings.server.infra-keep-probing = true; #< if unbound fails to reach a host (NS), it by default *does not try again* for 900s. keep-probing tells it to keep trying, with a backoff.
# services.unbound.settings.server.infra-cache-min-rtt = 1000;
# services.unbound.settings.server.infra-cache-max-rtt = 1000;
# services.unbound.settings.server.infra-host-ttl = 30;
# services.unbound.settings.server.disable-dnssec-lame-check = true;
# perf tuning; see: <https://unbound.docs.nlnetlabs.nl/en/latest/topics/core/performance.html>
services.unbound.settings.server.num-threads = 4;
services.unbound.settings.server.so-rcvbuf = "4m"; #< higher value means less likely to drop client queries
services.unbound.settings.server.so-sndbuf = "4m";
# services.unbound.settings.remote-control.control-enable = true; # enable `unbound-control` tool
services.unbound.localControlSocketPath = "/run/unbound/unbound.ctl";
# if a resolution fails, or takes excessively long, reply with expired cache entries
# see: <https://unbound.docs.nlnetlabs.nl/en/latest/topics/core/serve-stale.html#rfc-8767>
# services.unbound.settings.server.serve-expired = true;
# services.unbound.settings.server.serve-expired-ttl = 86400; #< don't serve any records more outdated than this
# services.unbound.settings.server.serve-expired-client-timeout = 2800; #< only serve expired records if the client has been waiting this long, ms
# services.unbound.settings.server.cache-max-negative-ttl = 60; #< intended to limit damage during networking flakes, but instead this seems to cause unbound to cache error responses it *wouldn't* otherwise cache
# services.unbound.settings.server.use-caps-for-id = true; #< TODO: randomizes casing to avoid spoofing
# services.unbound.settings.server.prefetch = true; # prefetch RRs which are about to expire from the cache, to keep them primed
networking.nameservers = [
# be compatible with systemd-resolved
# "127.0.0.53"
# or don't be compatible with systemd-resolved, but with libc and pasta instead
# see <pkgs/by-name/sane-scripts/src/sane-vpn>
"127.0.0.1"
# enable IPv6, or don't, because having just a single name server makes monkey-patching it easier
# "::1"
];
networking.resolvconf.extraConfig = ''
# DNS serviced by `unbound` recursive resolver
name_servers='127.0.0.1'
'';
};
}