nixos/acme: Fix race condition, dont be smart with keys

Attempting to reuse keys on a basis different to the cert (AKA,
storing the key in a directory with a hashed name different to
the cert it is associated with) was ineffective since when
"lego run" is used it will ALWAYS generate a new key. This causes
issues when you revert changes since your "reused" key will not
be the one associated with the old cert. As such, I tore out the
whole keyDir implementation.

As for the race condition, checking the mtime of the cert file
was not sufficient to detect changes. In testing, selfsigned
and full certs could be generated/installed within 1 second of
each other. cmp is now used instead.

Also, I removed the nginx/httpd reload waiters in favour of
simple retry logic for the curl-based tests
This commit is contained in:
Lucas Savva 2020-09-03 15:31:06 +01:00
parent 61dbf4bf89
commit 1b6cfd9796
No known key found for this signature in database
GPG Key ID: F9CE6D3DCDC78F2D
2 changed files with 56 additions and 76 deletions

View File

@ -106,7 +106,6 @@ let
mkHash = with builtins; val: substring 0 20 (hashString "sha256" val); mkHash = with builtins; val: substring 0 20 (hashString "sha256" val);
certDir = mkHash hashData; certDir = mkHash hashData;
othersHash = mkHash "${toString acmeServer} ${data.keyType}"; othersHash = mkHash "${toString acmeServer} ${data.keyType}";
keyDir = "key-" + othersHash;
accountDir = "/var/lib/acme/.lego/accounts/" + othersHash; accountDir = "/var/lib/acme/.lego/accounts/" + othersHash;
protocolOpts = if useDns then ( protocolOpts = if useDns then (
@ -215,7 +214,7 @@ let
# https://github.com/NixOS/nixpkgs/pull/81371#issuecomment-605526099 # https://github.com/NixOS/nixpkgs/pull/81371#issuecomment-605526099
wantedBy = optionals (!config.boot.isContainer) [ "multi-user.target" ]; wantedBy = optionals (!config.boot.isContainer) [ "multi-user.target" ];
path = with pkgs; [ lego coreutils ]; path = with pkgs; [ lego coreutils diffutils ];
serviceConfig = commonServiceConfig // { serviceConfig = commonServiceConfig // {
Group = data.group; Group = data.group;
@ -223,14 +222,13 @@ let
# AccountDir dir will be created by tmpfiles to ensure correct permissions # AccountDir dir will be created by tmpfiles to ensure correct permissions
# And to avoid deletion during systemctl clean # And to avoid deletion during systemctl clean
# acme/.lego/${cert} is listed so that it is deleted during systemctl clean # acme/.lego/${cert} is listed so that it is deleted during systemctl clean
StateDirectory = "acme/${cert} acme/.lego/${cert} acme/.lego/${cert}/${certDir} acme/.lego/${cert}/${keyDir}"; StateDirectory = "acme/${cert} acme/.lego/${cert} acme/.lego/${cert}/${certDir}";
# Needs to be space separated, but can't use a multiline string because that'll include newlines # Needs to be space separated, but can't use a multiline string because that'll include newlines
BindPaths = BindPaths =
"${accountDir}:/tmp/accounts " + "${accountDir}:/tmp/accounts " +
"/var/lib/acme/${cert}:/tmp/out " + "/var/lib/acme/${cert}:/tmp/out " +
"/var/lib/acme/.lego/${cert}/${certDir}:/tmp/certificates " + "/var/lib/acme/.lego/${cert}/${certDir}:/tmp/certificates ";
"/var/lib/acme/.lego/${cert}/${keyDir}:/tmp/keys";
# Only try loading the credentialsFile if the dns challenge is enabled # Only try loading the credentialsFile if the dns challenge is enabled
EnvironmentFile = mkIf useDns data.credentialsFile; EnvironmentFile = mkIf useDns data.credentialsFile;
@ -240,9 +238,6 @@ let
script = '' script = ''
set -euo pipefail set -euo pipefail
# Safely copy keyDir contents into certificates (it might be empty).
cp -af keys/. certificates/
# Check if we can renew # Check if we can renew
if [ -e 'certificates/${keyName}.key' -a -e 'certificates/${keyName}.crt' ]; then if [ -e 'certificates/${keyName}.key' -a -e 'certificates/${keyName}.crt' ]; then
lego ${renewOpts} lego ${renewOpts}
@ -258,17 +253,15 @@ let
# Group might change between runs, re-apply it # Group might change between runs, re-apply it
chown 'acme:${data.group}' certificates/* chown 'acme:${data.group}' certificates/*
# Copy the key to keyDir
cp -pf 'certificates/${keyName}.key' 'keys/'
# Copy all certs to the "real" certs directory # Copy all certs to the "real" certs directory
CERT='certificates/${keyName}.crt' CERT='certificates/${keyName}.crt'
CERT_CHANGED=no CERT_CHANGED=no
if [ -e "$CERT" -a "$CERT" -nt out/fullchain.pem ]; then if [ -e "$CERT" ] && ! cmp -s "$CERT" out/fullchain.pem; then
CERT_CHANGED=yes CERT_CHANGED=yes
cp -p 'certificates/${keyName}.crt' out/fullchain.pem echo Installing new certificate
cp -p 'certificates/${keyName}.key' out/key.pem cp -vp 'certificates/${keyName}.crt' out/fullchain.pem
cp -p 'certificates/${keyName}.issuer.crt' out/chain.pem cp -vp 'certificates/${keyName}.key' out/key.pem
cp -vp 'certificates/${keyName}.issuer.crt' out/chain.pem
ln -sf fullchain.pem out/cert.pem ln -sf fullchain.pem out/cert.pem
cat out/key.pem out/fullchain.pem > out/full.pem cat out/key.pem out/fullchain.pem > out/full.pem
fi fi

View File

@ -164,6 +164,9 @@ in import ./make-test-python.nix ({ lib, ... }: {
# reaches the active state. Targets do not have this issue. # reaches the active state. Targets do not have this issue.
'' ''
import time
has_switched = False has_switched = False
@ -175,70 +178,68 @@ in import ./make-test-python.nix ({ lib, ... }: {
) )
has_switched = True has_switched = True
node.succeed( node.succeed(
"/run/current-system/specialisation/{}/bin/switch-to-configuration test".format( f"/run/current-system/specialisation/{name}/bin/switch-to-configuration test"
name
)
) )
# In order to determine if a config reload has finished, we need to watch
# the log files for the relevant lines
def wait_httpd_reload(node):
# Check for SIGUSER received
node.succeed("( tail -n3 -f /var/log/httpd/error.log & ) | grep -q AH00493")
# Check for service restart. This line also occurs when the service is started,
# hence the above check is necessary too.
node.succeed("( tail -n1 -f /var/log/httpd/error.log & ) | grep -q AH00094")
def wait_nginx_reload(node):
# Check for SIGHUP received
node.succeed("( journalctl -fu nginx -n18 & ) | grep -q SIGHUP")
# Check for SIGCHLD from killed worker processes
node.succeed("( journalctl -fu nginx -n10 & ) | grep -q SIGCHLD")
# Ensures the issuer of our cert matches the chain # Ensures the issuer of our cert matches the chain
# and matches the issuer we expect it to be. # and matches the issuer we expect it to be.
# It's a good validation to ensure the cert.pem and fullchain.pem # It's a good validation to ensure the cert.pem and fullchain.pem
# are not still selfsigned afer verification # are not still selfsigned afer verification
def check_issuer(node, cert_name, issuer): def check_issuer(node, cert_name, issuer):
for fname in ("cert.pem", "fullchain.pem"): for fname in ("cert.pem", "fullchain.pem"):
node.succeed( actual_issuer = node.succeed(
( f"openssl x509 -noout -issuer -in /var/lib/acme/{cert_name}/{fname}"
"""openssl x509 -noout -issuer -in /var/lib/acme/{cert_name}/{fname} \ ).partition("=")[2]
| tee /proc/self/fd/2 \ print(f"{fname} issuer: {actual_issuer}")
| cut -d'=' -f2- \ assert issuer.lower() in actual_issuer.lower()
| grep "$(openssl x509 -noout -subject -in /var/lib/acme/{cert_name}/chain.pem \
| cut -d'=' -f2-)\" \
| grep -i '{issuer}'
"""
).format(cert_name=cert_name, issuer=issuer, fname=fname)
)
# Ensure cert comes before chain in fullchain.pem # Ensure cert comes before chain in fullchain.pem
def check_fullchain(node, cert_name): def check_fullchain(node, cert_name):
node.succeed( subject_data = node.succeed(
( f"openssl crl2pkcs7 -nocrl -certfile /var/lib/acme/{cert_name}/fullchain.pem"
"""openssl crl2pkcs7 -nocrl -certfile /var/lib/acme/{cert_name}/fullchain.pem \ " | openssl pkcs7 -print_certs -noout"
| tee /proc/self/fd/2 \
| openssl pkcs7 -print_certs -noout | head -1 | grep {cert_name}
"""
).format(cert_name=cert_name)
) )
for line in subject_data.lower().split("\n"):
if "subject" in line:
print(f"First subject in fullchain.pem: ", line)
assert cert_name.lower() in line
return
assert False
def check_connection(node, domain): def check_connection(node, domain, retries=3):
node.succeed( if retries == 0:
( assert False
"""openssl s_client -brief -verify 2 -verify_return_error -CAfile /tmp/ca.crt \
-servername {domain} -connect {domain}:443 < /dev/null 2>&1 \ result = node.succeed(
| tee /proc/self/fd/2 "openssl s_client -brief -verify 2 -CAfile /tmp/ca.crt"
""" f" -servername {domain} -connect {domain}:443 < /dev/null 2>&1"
).format(domain=domain)
) )
for line in result.lower().split("\n"):
if "verification" in line and "error" in line:
time.sleep(1)
return check_connection(node, domain, retries - 1)
def check_connection_key_bits(node, domain, bits, retries=3):
if retries == 0:
assert False
result = node.succeed(
"openssl s_client -CAfile /tmp/ca.crt"
f" -servername {domain} -connect {domain}:443 < /dev/null"
" | openssl x509 -noout -text | grep -i Public-Key"
)
print("Key type:", result)
if bits not in result:
time.sleep(1)
return check_connection_key_bits(node, domain, bits, retries - 1)
client.start() client.start()
dnsserver.start() dnsserver.start()
@ -261,7 +262,6 @@ in import ./make-test-python.nix ({ lib, ... }: {
with subtest("Can request certificate with HTTPS-01 challenge"): with subtest("Can request certificate with HTTPS-01 challenge"):
webserver.wait_for_unit("acme-finished-a.example.test.target") webserver.wait_for_unit("acme-finished-a.example.test.target")
wait_nginx_reload(webserver)
check_fullchain(webserver, "a.example.test") check_fullchain(webserver, "a.example.test")
check_issuer(webserver, "a.example.test", "pebble") check_issuer(webserver, "a.example.test", "pebble")
check_connection(client, "a.example.test") check_connection(client, "a.example.test")
@ -273,35 +273,26 @@ in import ./make-test-python.nix ({ lib, ... }: {
check_issuer(webserver, "a.example.test", "minica") check_issuer(webserver, "a.example.test", "minica")
# Will succeed if nginx can load the certs # Will succeed if nginx can load the certs
webserver.succeed("systemctl start nginx-config-reload.service") webserver.succeed("systemctl start nginx-config-reload.service")
wait_nginx_reload(webserver)
with subtest("Can reload nginx when timer triggers renewal"): with subtest("Can reload nginx when timer triggers renewal"):
webserver.succeed("systemctl start test-renew-nginx.target") webserver.succeed("systemctl start test-renew-nginx.target")
wait_nginx_reload(webserver)
check_issuer(webserver, "a.example.test", "pebble") check_issuer(webserver, "a.example.test", "pebble")
check_connection(client, "a.example.test") check_connection(client, "a.example.test")
with subtest("Can reload web server when cert configuration changes"): with subtest("Can reload web server when cert configuration changes"):
switch_to(webserver, "cert-change") switch_to(webserver, "cert-change")
webserver.wait_for_unit("acme-finished-a.example.test.target") webserver.wait_for_unit("acme-finished-a.example.test.target")
wait_nginx_reload(webserver) check_connection_key_bits(client, "a.example.test", "384")
client.succeed(
"""openssl s_client -CAfile /tmp/ca.crt -connect a.example.test:443 < /dev/null \
| openssl x509 -noout -text | grep -i Public-Key | grep 384
"""
)
with subtest("Can request certificate with HTTPS-01 when nginx startup is delayed"): with subtest("Can request certificate with HTTPS-01 when nginx startup is delayed"):
switch_to(webserver, "slow-startup") switch_to(webserver, "slow-startup")
webserver.wait_for_unit("acme-finished-slow.example.com.target") webserver.wait_for_unit("acme-finished-slow.example.com.target")
wait_nginx_reload(webserver)
check_issuer(webserver, "slow.example.com", "pebble") check_issuer(webserver, "slow.example.com", "pebble")
check_connection(client, "slow.example.com") check_connection(client, "slow.example.com")
with subtest("Can request certificate for vhost + aliases (nginx)"): with subtest("Can request certificate for vhost + aliases (nginx)"):
switch_to(webserver, "nginx-aliases") switch_to(webserver, "nginx-aliases")
webserver.wait_for_unit("acme-finished-a.example.test.target") webserver.wait_for_unit("acme-finished-a.example.test.target")
wait_nginx_reload(webserver)
check_issuer(webserver, "a.example.test", "pebble") check_issuer(webserver, "a.example.test", "pebble")
check_connection(client, "a.example.test") check_connection(client, "a.example.test")
check_connection(client, "b.example.test") check_connection(client, "b.example.test")
@ -309,7 +300,6 @@ in import ./make-test-python.nix ({ lib, ... }: {
with subtest("Can request certificates for vhost + aliases (apache-httpd)"): with subtest("Can request certificates for vhost + aliases (apache-httpd)"):
switch_to(webserver, "httpd-aliases") switch_to(webserver, "httpd-aliases")
webserver.wait_for_unit("acme-finished-c.example.test.target") webserver.wait_for_unit("acme-finished-c.example.test.target")
wait_httpd_reload(webserver)
check_issuer(webserver, "c.example.test", "pebble") check_issuer(webserver, "c.example.test", "pebble")
check_connection(client, "c.example.test") check_connection(client, "c.example.test")
check_connection(client, "d.example.test") check_connection(client, "d.example.test")
@ -318,18 +308,15 @@ in import ./make-test-python.nix ({ lib, ... }: {
# Switch to selfsigned first # Switch to selfsigned first
webserver.succeed("systemctl clean acme-c.example.test.service --what=state") webserver.succeed("systemctl clean acme-c.example.test.service --what=state")
webserver.succeed("systemctl start acme-selfsigned-c.example.test.service") webserver.succeed("systemctl start acme-selfsigned-c.example.test.service")
wait_httpd_reload(webserver)
check_issuer(webserver, "c.example.test", "minica") check_issuer(webserver, "c.example.test", "minica")
webserver.succeed("systemctl start httpd-config-reload.service") webserver.succeed("systemctl start httpd-config-reload.service")
webserver.succeed("systemctl start test-renew-httpd.target") webserver.succeed("systemctl start test-renew-httpd.target")
wait_httpd_reload(webserver)
check_issuer(webserver, "c.example.test", "pebble") check_issuer(webserver, "c.example.test", "pebble")
check_connection(client, "c.example.test") check_connection(client, "c.example.test")
with subtest("Can request wildcard certificates using DNS-01 challenge"): with subtest("Can request wildcard certificates using DNS-01 challenge"):
switch_to(webserver, "dns-01") switch_to(webserver, "dns-01")
webserver.wait_for_unit("acme-finished-example.test.target") webserver.wait_for_unit("acme-finished-example.test.target")
wait_nginx_reload(webserver)
check_issuer(webserver, "example.test", "pebble") check_issuer(webserver, "example.test", "pebble")
check_connection(client, "dns.example.test") check_connection(client, "dns.example.test")
''; '';