nixos/switch-to-configuration: Clean up lower part of the script

- Fully get rid of `parseKeyValues` and use systemctl features for that
- Add some regex modifiers recommended by perlcritic
- Get rid of a postfix if
- Sort units when showing their status
- Clean the logic for showing what failed from `elif` to `next`
- Switch from `state` to `substate` for `auto-restart` because that's
  actually where the value is stored
- Show status of units with one single systemctl call and get rid of
  COLUMNS in favor of --full
- Add a test for failing units
This commit is contained in:
Janne Heß 2022-01-20 15:32:32 +01:00
parent 96d36b0c2e
commit b52372675d
No known key found for this signature in database
GPG Key ID: 69165158F05265DF
2 changed files with 94 additions and 24 deletions

View File

@ -171,14 +171,6 @@ sub parseUnit {
return %unitData;
}
sub parseKeyValues {
my $info = shift;
foreach my $line (@_) {
$line =~ /^([^=]+)=(.*)$/ or next;
$info->{$1} = $2;
}
}
# Checks whether a specified boolean in a systemd unit is true
# or false, with a default that is applied when the value is not set.
sub parseSystemdBool {
@ -606,33 +598,36 @@ my $activeNew = getActiveUnits;
while (my ($unit, $state) = each %{$activeNew}) {
if ($state->{state} eq "failed") {
push @failed, $unit;
next;
}
elsif ($state->{state} eq "auto-restart") {
# A unit in auto-restart state is a failure *if* it previously failed to start
my $lines = `@systemd@/bin/systemctl show '$unit'`;
my $info = {};
parseKeyValues($info, split("\n", $lines));
if ($info->{ExecMainStatus} ne '0') {
if ($state->{substate} eq "auto-restart") {
# A unit in auto-restart substate is a failure *if* it previously failed to start
my $main_status = `@systemd@/bin/systemctl show --value --property=ExecMainStatus '$unit'`;
chomp($main_status);
if ($main_status ne "0") {
push @failed, $unit;
next;
}
}
# Ignore scopes since they are not managed by this script but rather
# created and managed by third-party services via the systemd dbus API.
elsif ($state->{state} ne "failed" && !defined $activePrev->{$unit} && $unit !~ /\.scope$/) {
# This only lists units that are not failed (including ones that are in auto-restart but have not failed previously)
if ($state->{state} ne "failed" && !defined $activePrev->{$unit} && $unit !~ /\.scope$/msx) {
push @new, $unit;
}
}
print STDERR "the following new units were started: ", join(", ", sort(@new)), "\n"
if scalar @new > 0;
if (scalar @new > 0) {
print STDERR "the following new units were started: ", join(", ", sort(@new)), "\n"
}
if (scalar @failed > 0) {
print STDERR "warning: the following units failed: ", join(", ", sort(@failed)), "\n";
foreach my $unit (@failed) {
print STDERR "\n";
system("COLUMNS=1000 @systemd@/bin/systemctl status --no-pager '$unit' >&2");
}
my @failed_sorted = sort @failed;
print STDERR "warning: the following units failed: ", join(", ", @failed_sorted), "\n\n";
system "@systemd@/bin/systemctl status --no-pager --full '" . join("' '", @failed_sorted) . "' >&2";
$res = 4;
}

View File

@ -45,6 +45,31 @@ import ./make-test-python.nix ({ pkgs, ...} : {
systemd.services.test.restartIfChanged = false;
};
simpleServiceFailing.configuration = {
imports = [ simpleServiceModified.configuration ];
systemd.services.test.serviceConfig.ExecStart = lib.mkForce "${pkgs.coreutils}/bin/false";
};
autorestartService.configuration = {
# A service that immediately goes into restarting (but without failing)
systemd.services.autorestart = {
wantedBy = [ "multi-user.target" ];
serviceConfig = {
Type = "simple";
Restart = "always";
RestartSec = "20y"; # Should be long enough
ExecStart = "${pkgs.coreutils}/bin/true";
};
};
};
autorestartServiceFailing.configuration = {
imports = [ autorestartService.configuration ];
systemd.services.autorestart.serviceConfig = {
ExecStart = lib.mkForce "${pkgs.coreutils}/bin/false";
};
};
restart-and-reload-by-activation-script.configuration = {
systemd.services = rec {
simple-service = {
@ -189,12 +214,13 @@ import ./make-test-python.nix ({ pkgs, ...} : {
exec env -i "$@" | tee /dev/stderr
'';
in /* python */ ''
def switch_to_specialisation(system, name, action="test"):
def switch_to_specialisation(system, name, action="test", fail=False):
if name == "":
stc = f"{system}/bin/switch-to-configuration"
else:
stc = f"{system}/specialisation/{name}/bin/switch-to-configuration"
out = machine.succeed(f"{stc} {action} 2>&1")
out = machine.fail(f"{stc} {action} 2>&1") if fail \
else machine.succeed(f"{stc} {action} 2>&1")
assert_lacks(out, "switch-to-configuration line") # Perl warnings
return out
@ -305,7 +331,56 @@ import ./make-test-python.nix ({ pkgs, ...} : {
assert_lacks(out, "as well:")
assert_contains(out, "would start the following units: test.service\n")
with subtest("failing units"):
# Let the simple service fail
switch_to_specialisation("${machine}", "simpleServiceModified")
out = switch_to_specialisation("${machine}", "simpleServiceFailing", fail=True)
assert_contains(out, "stopping the following units: test.service\n")
assert_lacks(out, "NOT restarting the following changed units:")
assert_lacks(out, "reloading the following units:")
assert_lacks(out, "\nrestarting the following units:")
assert_contains(out, "\nstarting the following units: test.service\n")
assert_lacks(out, "the following new units were started:")
assert_contains(out, "warning: the following units failed: test.service\n")
assert_contains(out, "Main PID:") # output of systemctl
assert_lacks(out, "as well:")
# A unit that gets into autorestart without failing is not treated as failed
out = switch_to_specialisation("${machine}", "autorestartService")
assert_lacks(out, "stopping the following units:")
assert_lacks(out, "NOT restarting the following changed units:")
assert_lacks(out, "reloading the following units:")
assert_lacks(out, "\nrestarting the following units:")
assert_lacks(out, "\nstarting the following units:")
assert_contains(out, "the following new units were started: autorestart.service\n")
assert_lacks(out, "as well:")
machine.systemctl('stop autorestart.service') # cancel the 20y timer
# Switching to the same system should do nothing (especially not treat the unit as failed)
out = switch_to_specialisation("${machine}", "autorestartService")
assert_lacks(out, "stopping the following units:")
assert_lacks(out, "NOT restarting the following changed units:")
assert_lacks(out, "reloading the following units:")
assert_lacks(out, "\nrestarting the following units:")
assert_lacks(out, "\nstarting the following units:")
assert_contains(out, "the following new units were started: autorestart.service\n")
assert_lacks(out, "as well:")
machine.systemctl('stop autorestart.service') # cancel the 20y timer
# If systemd thinks the unit has failed and is in autorestart, we should show it as failed
out = switch_to_specialisation("${machine}", "autorestartServiceFailing", fail=True)
assert_lacks(out, "stopping the following units:")
assert_lacks(out, "NOT restarting the following changed units:")
assert_lacks(out, "reloading the following units:")
assert_lacks(out, "\nrestarting the following units:")
assert_lacks(out, "\nstarting the following units:")
assert_lacks(out, "the following new units were started:")
assert_contains(out, "warning: the following units failed: autorestart.service\n")
assert_contains(out, "Main PID:") # output of systemctl
assert_lacks(out, "as well:")
with subtest("restart and reload by activation script"):
switch_to_specialisation("${machine}", "simpleServiceNorestart")
out = switch_to_specialisation("${machine}", "restart-and-reload-by-activation-script")
assert_contains(out, "stopping the following units: test.service\n")
assert_lacks(out, "NOT restarting the following changed units:")