From 604b7c139f4d44d9fb0e84d812efdfcf5dda3448 Mon Sep 17 00:00:00 2001 From: Arian van Putten Date: Thu, 29 Aug 2019 16:32:59 +0200 Subject: [PATCH] Fix letsencrypt (#60219) * nixos/acme: Fix ordering of cert requests When subsequent certificates would be added, they would not wake up nginx correctly due to target units only being triggered once. We now added more fine-grained systemd dependencies to make sure nginx always is aware of new certificates and doesn't restart too early resulting in a crash. Furthermore, the acme module has been refactored. Mostly to get rid of the deprecated PermissionStartOnly systemd options which were deprecated. Below is a summary of changes made. * Use SERVICE_RESULT to determine status This was added in systemd v232. we don't have to keep track of the EXITCODE ourselves anymore. * Add regression test for requesting mutliple domains * Deprecate 'directory' option We now use systemd's StateDirectory option to manage create and permissions of the acme state directory. * The webroot is created using a systemd.tmpfiles.rules rule instead of the preStart script. * Depend on certs directly By getting rid of the target units, we make sure ordering is correct in the case that you add new certs after already having deployed some. Reason it broke before: acme-certificates.target would be in active state, and if you then add a new cert, it would still be active and hence nginx would restart without even requesting a new cert. Not good! We make the dependencies more fine-grained now. this should fix that * Remove activationDelay option It complicated the code a lot, and is rather arbitrary. What if your activation script takes more than activationDelay seconds? Instead, one should use systemd dependencies to make sure some action happens before setting the certificate live. e.g. If you want to wait until your cert is published in DNS DANE / TLSA, you could create a unit that blocks until it appears in DNS: ``` RequiredBy=acme-${cert}.service After=acme-${cert}.service ExecStart=publish-wait-for-dns-script ``` --- nixos/doc/manual/release-notes/rl-1909.xml | 23 ++- nixos/modules/rename.nix | 5 + nixos/modules/security/acme.nix | 152 ++++-------------- nixos/modules/security/acme.xml | 4 +- .../services/web-servers/nginx/default.nix | 21 +-- nixos/tests/acme.nix | 98 +++++++++-- 6 files changed, 161 insertions(+), 142 deletions(-) diff --git a/nixos/doc/manual/release-notes/rl-1909.xml b/nixos/doc/manual/release-notes/rl-1909.xml index 36bea28530be..60f756b78c6d 100644 --- a/nixos/doc/manual/release-notes/rl-1909.xml +++ b/nixos/doc/manual/release-notes/rl-1909.xml @@ -318,7 +318,28 @@ services.strongswan-swanctl services.httpd - + + + + The option has been replaced by a read-only option for each certificate you define. This will be + a subdirectory of /var/lib/acme. You can use this read-only option to figure out where the certificates are stored for a specific certificate. For example, + the option will use this directory option to find the certs for the virtual host. + + + and options have been removed. To execute a service before certificates + are provisioned or renewed add a RequiredBy=acme-${cert}.service to any service. + + + Furthermore, the acme module will not automatically add a dependency on lighttpd.service anymore. If you are using certficates provided by letsencrypt + for lighttpd, then you should depend on the certificate service acme-${cert}.service> manually. + + + For nginx, the dependencies are still automatically managed when is enabled just like before. What changed is that nginx now directly depends on the specific certificates that it needs, + instead of depending on the catch-all acme-certificates.target. This target unit was also removed from the codebase. + This will mean nginx will no longer depend on certificates it isn't explicitly managing and fixes a bug with certificate renewal + ordering racing with nginx restarting which could lead to nginx getting in a broken state as described at + NixOS/nixpkgs#60180. + diff --git a/nixos/modules/rename.nix b/nixos/modules/rename.nix index 348ad094e5ad..1048c2af2ea8 100644 --- a/nixos/modules/rename.nix +++ b/nixos/modules/rename.nix @@ -256,6 +256,11 @@ with lib; # binfmt (mkRenamedOptionModule [ "boot" "binfmtMiscRegistrations" ] [ "boot" "binfmt" "registrations" ]) + + # ACME + (mkRemovedOptionModule [ "security" "acme" "directory"] "ACME Directory is now hardcoded to /var/lib/acme and its permisisons are managed by systemd. See https://github.com/NixOS/nixpkgs/issues/53852 for more info.") + (mkRemovedOptionModule [ "security" "acme" "preDelay"] "This option has been removed. If you want to make sure that something executes before certificates are provisioned, add a RequiredBy=acme-\${cert}.service to the service you want to execute before the cert renewal") + (mkRemovedOptionModule [ "security" "acme" "activationDelay"] "This option has been removed. If you want to make sure that something executes before certificates are provisioned, add a RequiredBy=acme-\${cert}.service to the service you want to execute before the cert renewal") # KSM (mkRenamedOptionModule [ "hardware" "enableKSM" ] [ "hardware" "ksm" "enable" ]) diff --git a/nixos/modules/security/acme.nix b/nixos/modules/security/acme.nix index 092704c6fc3f..feb54affbf83 100644 --- a/nixos/modules/security/acme.nix +++ b/nixos/modules/security/acme.nix @@ -80,25 +80,11 @@ let ''; }; - activationDelay = mkOption { - type = types.nullOr types.str; - default = null; - description = '' - Systemd time span expression to delay copying new certificates to main - state directory. See systemd.time - 7. - ''; - }; - - preDelay = mkOption { - type = types.lines; - default = ""; - description = '' - Commands to run after certificates are re-issued but before they are - activated. Typically the new certificate is published to DNS. - - Executed in the same directory with the new certificate. - ''; + directory = mkOption { + type = types.str; + readOnly = true; + default = "/var/lib/acme/${name}"; + description = "Directory where certificate and other state is stored."; }; extraDomains = mkOption { @@ -126,13 +112,6 @@ in options = { security.acme = { - directory = mkOption { - default = "/var/lib/acme"; - type = types.str; - description = '' - Directory where certs and other state will be stored by default. - ''; - }; validMin = mkOption { type = types.int; @@ -181,7 +160,11 @@ in default = { }; type = with types; attrsOf (submodule certOpts); description = '' - Attribute set of certificates to get signed and renewed. + Attribute set of certificates to get signed and renewed. Creates + acme-''${cert}.{service,timer} systemd units for + each certificate defined here. Other services can add dependencies + to those units if they rely on the certificates being present, + or trigger restarts of the service if certificates get renewed. ''; example = literalExample '' { @@ -209,8 +192,7 @@ in servicesLists = mapAttrsToList certToServices cfg.certs; certToServices = cert: data: let - cpath = lpath + optionalString (data.activationDelay != null) ".staging"; - lpath = "${cfg.directory}/${cert}"; + lpath = "acme/${cert}"; rights = if data.allowKeysForGroup then "750" else "700"; cmdline = [ "-v" "-d" data.domain "--default_root" data.webroot "--valid_min" cfg.validMin ] ++ optionals (data.email != null) [ "--email" data.email ] @@ -224,79 +206,27 @@ in serviceConfig = { Type = "oneshot"; SuccessExitStatus = [ "0" "1" ]; - PermissionsStartOnly = true; User = data.user; Group = data.group; PrivateTmp = true; + StateDirectory = lpath; + StateDirectoryMode = rights; + WorkingDirectory = "/var/lib/${lpath}"; + ExecStart = "${pkgs.simp_le}/bin/simp_le ${escapeShellArgs cmdline}"; + ExecStopPost = + let + script = pkgs.writeScript "acme-post-stop" '' + #!${pkgs.runtimeShell} -e + ${data.postRun} + ''; + in + "+${script}"; }; - path = with pkgs; [ simp_le systemd ]; - preStart = '' - mkdir -p '${cfg.directory}' - chown 'root:root' '${cfg.directory}' - chmod 755 '${cfg.directory}' - if [ ! -d '${cpath}' ]; then - mkdir '${cpath}' - fi - chmod ${rights} '${cpath}' - chown -R '${data.user}:${data.group}' '${cpath}' - mkdir -p '${data.webroot}/.well-known/acme-challenge' - chown -R '${data.user}:${data.group}' '${data.webroot}/.well-known/acme-challenge' - ''; - script = '' - cd '${cpath}' - set +e - simp_le ${escapeShellArgs cmdline} - EXITCODE=$? - set -e - echo "$EXITCODE" > /tmp/lastExitCode - exit "$EXITCODE" - ''; - postStop = '' - cd '${cpath}' - if [ -e /tmp/lastExitCode ] && [ "$(cat /tmp/lastExitCode)" = "0" ]; then - ${if data.activationDelay != null then '' - - ${data.preDelay} - - if [ -d '${lpath}' ]; then - systemd-run --no-block --on-active='${data.activationDelay}' --unit acme-setlive-${cert}.service - else - systemctl --wait start acme-setlive-${cert}.service - fi - '' else data.postRun} - - # noop ensuring that the "if" block is non-empty even if - # activationDelay == null and postRun == "" - true - fi - ''; - - before = [ "acme-certificates.target" ]; - wantedBy = [ "acme-certificates.target" ]; - }; - delayService = { - description = "Set certificate for ${cert} live"; - path = with pkgs; [ rsync ]; - serviceConfig = { - Type = "oneshot"; - }; - script = '' - rsync -a --delete-after '${cpath}/' '${lpath}' - ''; - postStop = data.postRun; }; selfsignedService = { description = "Create preliminary self-signed certificate for ${cert}"; path = [ pkgs.openssl ]; - preStart = '' - if [ ! -d '${cpath}' ] - then - mkdir -p '${cpath}' - chmod ${rights} '${cpath}' - chown '${data.user}:${data.group}' '${cpath}' - fi - ''; script = '' workdir="$(mktemp -d)" @@ -318,50 +248,41 @@ in -out $workdir/server.crt # Copy key to destination - cp $workdir/server.key ${cpath}/key.pem + cp $workdir/server.key /var/lib/${lpath}/key.pem # Create fullchain.pem (same format as "simp_le ... -f fullchain.pem" creates) - cat $workdir/{server.crt,ca.crt} > "${cpath}/fullchain.pem" + cat $workdir/{server.crt,ca.crt} > "/var/lib/${lpath}/fullchain.pem" # Create full.pem for e.g. lighttpd - cat $workdir/{server.key,server.crt,ca.crt} > "${cpath}/full.pem" + cat $workdir/{server.key,server.crt,ca.crt} > "/var/lib/${lpath}/full.pem" # Give key acme permissions - chown '${data.user}:${data.group}' "${cpath}/"{key,fullchain,full}.pem - chmod ${rights} "${cpath}/"{key,fullchain,full}.pem + chown '${data.user}:${data.group}' "/var/lib/${lpath}/"{key,fullchain,full}.pem + chmod ${rights} "/var/lib/${lpath}/"{key,fullchain,full}.pem ''; serviceConfig = { Type = "oneshot"; - PermissionsStartOnly = true; PrivateTmp = true; + StateDirectory = lpath; User = data.user; Group = data.group; }; unitConfig = { # Do not create self-signed key when key already exists - ConditionPathExists = "!${cpath}/key.pem"; + ConditionPathExists = "!/var/lib/${lpath}/key.pem"; }; - before = [ - "acme-selfsigned-certificates.target" - ]; - wantedBy = [ - "acme-selfsigned-certificates.target" - ]; }; in ( [ { name = "acme-${cert}"; value = acmeService; } ] ++ optional cfg.preliminarySelfsigned { name = "acme-selfsigned-${cert}"; value = selfsignedService; } - ++ optional (data.activationDelay != null) { name = "acme-setlive-${cert}"; value = delayService; } ); servicesAttr = listToAttrs services; - injectServiceDep = { - after = [ "acme-selfsigned-certificates.target" ]; - wants = [ "acme-selfsigned-certificates.target" "acme-certificates.target" ]; - }; in - servicesAttr // - (if config.services.nginx.enable then { nginx = injectServiceDep; } else {}) // - (if config.services.lighttpd.enable then { lighttpd = injectServiceDep; } else {}); + servicesAttr; + + systemd.tmpfiles.rules = + flip mapAttrsToList cfg.certs + (cert: data: "d ${data.webroot}/.well-known/acme-challenge - ${data.user} ${data.group}"); systemd.timers = flip mapAttrs' cfg.certs (cert: data: nameValuePair ("acme-${cert}") @@ -377,9 +298,6 @@ in }; }) ); - - systemd.targets."acme-selfsigned-certificates" = mkIf cfg.preliminarySelfsigned {}; - systemd.targets."acme-certificates" = {}; }) ]; diff --git a/nixos/modules/security/acme.xml b/nixos/modules/security/acme.xml index ef71fe53d0c7..9d0a1995e0ff 100644 --- a/nixos/modules/security/acme.xml +++ b/nixos/modules/security/acme.xml @@ -59,10 +59,8 @@ http { The private key key.pem and certificate fullchain.pem will be put into - /var/lib/acme/foo.example.com. The target directory can - be configured with the option . + /var/lib/acme/foo.example.com. - Refer to for all available configuration options for the security.acme diff --git a/nixos/modules/services/web-servers/nginx/default.nix b/nixos/modules/services/web-servers/nginx/default.nix index c1a51fbf8b42..5c65a2388d6f 100644 --- a/nixos/modules/services/web-servers/nginx/default.nix +++ b/nixos/modules/services/web-servers/nginx/default.nix @@ -4,23 +4,25 @@ with lib; let cfg = config.services.nginx; + certs = config.security.acme.certs; + vhostsConfigs = mapAttrsToList (vhostName: vhostConfig: vhostConfig) virtualHosts; + acmeEnabledVhosts = filter (vhostConfig: vhostConfig.enableACME && vhostConfig.useACMEHost == null) vhostsConfigs; virtualHosts = mapAttrs (vhostName: vhostConfig: let serverName = if vhostConfig.serverName != null then vhostConfig.serverName else vhostName; - acmeDirectory = config.security.acme.directory; in vhostConfig // { inherit serverName; } // (optionalAttrs vhostConfig.enableACME { - sslCertificate = "${acmeDirectory}/${serverName}/fullchain.pem"; - sslCertificateKey = "${acmeDirectory}/${serverName}/key.pem"; - sslTrustedCertificate = "${acmeDirectory}/${serverName}/fullchain.pem"; + sslCertificate = "${certs.${serverName}.directory}/fullchain.pem"; + sslCertificateKey = "${certs.${serverName}.directory}/key.pem"; + sslTrustedCertificate = "${certs.${serverName}.directory}/full.pem"; }) // (optionalAttrs (vhostConfig.useACMEHost != null) { - sslCertificate = "${acmeDirectory}/${vhostConfig.useACMEHost}/fullchain.pem"; - sslCertificateKey = "${acmeDirectory}/${vhostConfig.useACMEHost}/key.pem"; - sslTrustedCertificate = "${acmeDirectory}/${vhostConfig.useACMEHost}/fullchain.pem"; + sslCertificate = "${certs.${vhostConfig.useACMEHost}.directory}/fullchain.pem"; + sslCertificateKey = "${certs.${vhostConfig.useACMEHost}.directory}/key.pem"; + sslTrustedCertificate = "${certs.${vhostConfig.useACMEHost}.directory}/fullchain.pem"; }) ) cfg.virtualHosts; enableIPv6 = config.networking.enableIPv6; @@ -646,8 +648,9 @@ in systemd.services.nginx = { description = "Nginx Web Server"; - after = [ "network.target" ]; wantedBy = [ "multi-user.target" ]; + wants = concatLists (map (vhostConfig: ["acme-${vhostConfig.serverName}.service" "acme-selfsigned-${vhostConfig.serverName}.service"]) acmeEnabledVhosts); + after = [ "network.target" ] ++ map (vhostConfig: "acme-selfsigned-${vhostConfig.serverName}.service") acmeEnabledVhosts; stopIfChanged = false; preStart = '' @@ -680,8 +683,6 @@ in security.acme.certs = filterAttrs (n: v: v != {}) ( let - vhostsConfigs = mapAttrsToList (vhostName: vhostConfig: vhostConfig) virtualHosts; - acmeEnabledVhosts = filter (vhostConfig: vhostConfig.enableACME && vhostConfig.useACMEHost == null) vhostsConfigs; acmePairs = map (vhostConfig: { name = vhostConfig.serverName; value = { user = cfg.user; group = lib.mkDefault cfg.group; diff --git a/nixos/tests/acme.nix b/nixos/tests/acme.nix index 4669a092433e..8cfdea4a16ef 100644 --- a/nixos/tests/acme.nix +++ b/nixos/tests/acme.nix @@ -3,19 +3,49 @@ let in import ./make-test.nix { name = "acme"; - nodes = { + nodes = rec { letsencrypt = ./common/letsencrypt; + acmeStandalone = { config, pkgs, ... }: { + imports = [ commonConfig ]; + networking.firewall.allowedTCPPorts = [ 80 ]; + networking.extraHosts = '' + ${config.networking.primaryIPAddress} standalone.com + ''; + security.acme.certs."standalone.com" = { + webroot = "/var/lib/acme/acme-challenges"; + }; + systemd.targets."acme-finished-standalone.com" = {}; + systemd.services."acme-standalone.com" = { + wants = [ "acme-finished-standalone.com.target" ]; + before = [ "acme-finished-standalone.com.target" ]; + }; + services.nginx.enable = true; + services.nginx.virtualHosts."standalone.com" = { + locations."/.well-known/acme-challenge".root = "/var/lib/acme/acme-challenges"; + }; + }; + webserver = { config, pkgs, ... }: { imports = [ commonConfig ]; networking.firewall.allowedTCPPorts = [ 80 443 ]; networking.extraHosts = '' - ${config.networking.primaryIPAddress} example.com + ${config.networking.primaryIPAddress} a.example.com + ${config.networking.primaryIPAddress} b.example.com ''; + # A target remains active. Use this to probe the fact that + # a service fired eventhough it is not RemainAfterExit + systemd.targets."acme-finished-a.example.com" = {}; + systemd.services."acme-a.example.com" = { + wants = [ "acme-finished-a.example.com.target" ]; + before = [ "acme-finished-a.example.com.target" ]; + }; + services.nginx.enable = true; - services.nginx.virtualHosts."example.com" = { + + services.nginx.virtualHosts."a.example.com" = { enableACME = true; forceSSL = true; locations."/".root = pkgs.runCommand "docroot" {} '' @@ -23,17 +53,63 @@ in import ./make-test.nix { echo hello world > "$out/index.html" ''; }; + + nesting.clone = [ + ({pkgs, ...}: { + + networking.extraHosts = '' + ${config.networking.primaryIPAddress} b.example.com + ''; + systemd.targets."acme-finished-b.example.com" = {}; + systemd.services."acme-b.example.com" = { + wants = [ "acme-finished-b.example.com.target" ]; + before = [ "acme-finished-b.example.com.target" ]; + }; + services.nginx.virtualHosts."b.example.com" = { + enableACME = true; + forceSSL = true; + locations."/".root = pkgs.runCommand "docroot" {} '' + mkdir -p "$out" + echo hello world > "$out/index.html" + ''; + }; + }) + ]; }; client = commonConfig; }; - testScript = '' - $letsencrypt->waitForUnit("default.target"); - $letsencrypt->waitForUnit("boulder.service"); - $webserver->waitForUnit("default.target"); - $webserver->waitForUnit("acme-certificates.target"); - $client->waitForUnit("default.target"); - $client->succeed('curl https://example.com/ | grep -qF "hello world"'); - ''; + testScript = {nodes, ...}: + let + newServerSystem = nodes.webserver2.config.system.build.toplevel; + switchToNewServer = "${newServerSystem}/bin/switch-to-configuration test"; + in + # Note, waitForUnit does not work for oneshot services that do not have RemainAfterExit=true, + # this is because a oneshot goes from inactive => activating => inactive, and never + # reaches the active state. To work around this, we create some mock target units which + # get pulled in by the oneshot units. The target units linger after activation, and hence we + # can use them to probe that a oneshot fired. It is a bit ugly, but it is the best we can do + '' + $client->waitForUnit("default.target"); + $letsencrypt->waitForUnit("default.target"); + $letsencrypt->waitForUnit("boulder.service"); + + subtest "can request certificate with HTTPS-01 challenge", sub { + $acmeStandalone->waitForUnit("default.target"); + $acmeStandalone->succeed("systemctl start acme-standalone.com.service"); + $acmeStandalone->waitForUnit("acme-finished-standalone.com.target"); + }; + + subtest "Can request certificate for nginx service", sub { + $webserver->waitForUnit("acme-finished-a.example.com.target"); + $client->succeed('curl https://a.example.com/ | grep -qF "hello world"'); + }; + + subtest "Can add another certificate for nginx service", sub { + $webserver->succeed("/run/current-system/fine-tune/child-1/bin/switch-to-configuration test"); + $webserver->waitForUnit("acme-finished-b.example.com.target"); + $client->succeed('curl https://b.example.com/ | grep -qF "hello world"'); + }; + ''; }