diff --git a/nixos/modules/services/network-filesystems/ceph.nix b/nixos/modules/services/network-filesystems/ceph.nix index 656a2d21b868..543a7b25d5d6 100644 --- a/nixos/modules/services/network-filesystems/ceph.nix +++ b/nixos/modules/services/network-filesystems/ceph.nix @@ -9,12 +9,14 @@ let expandCamelCase = replaceStrings upperChars (map (s: " ${s}") lowerChars); expandCamelCaseAttrs = mapAttrs' (name: value: nameValuePair (expandCamelCase name) value); - makeServices = (daemonType: daemonIds: extraServiceConfig: + makeServices = (daemonType: daemonIds: mkMerge (map (daemonId: - { "ceph-${daemonType}-${daemonId}" = makeService daemonType daemonId cfg.global.clusterName pkgs.ceph extraServiceConfig; }) + { "ceph-${daemonType}-${daemonId}" = makeService daemonType daemonId cfg.global.clusterName pkgs.ceph; }) daemonIds)); - makeService = (daemonType: daemonId: clusterName: ceph: extraServiceConfig: { + makeService = (daemonType: daemonId: clusterName: ceph: + let + stateDirectory = "ceph/${if daemonType == "rgw" then "radosgw" else daemonType}/${clusterName}-${daemonId}"; in { enable = true; description = "Ceph ${builtins.replaceStrings lowerChars upperChars daemonType} daemon ${daemonId}"; after = [ "network-online.target" "time-sync.target" ] ++ optional (daemonType == "osd") "ceph-mon.target"; @@ -22,6 +24,11 @@ let partOf = [ "ceph-${daemonType}.target" ]; wantedBy = [ "ceph-${daemonType}.target" ]; + path = [ pkgs.getopt ]; + + # Don't start services that are not yet initialized + unitConfig.ConditionPathExists = "/var/lib/${stateDirectory}/keyring"; + serviceConfig = { LimitNOFILE = 1048576; LimitNPROC = 1048576; @@ -34,22 +41,22 @@ let Restart = "on-failure"; StartLimitBurst = "5"; StartLimitInterval = "30min"; + StateDirectory = stateDirectory; + User = "ceph"; + Group = if daemonType == "osd" then "disk" else "ceph"; ExecStart = ''${ceph.out}/bin/${if daemonType == "rgw" then "radosgw" else "ceph-${daemonType}"} \ - -f --cluster ${clusterName} --id ${daemonId} --setuser ceph \ - --setgroup ${if daemonType == "osd" then "disk" else "ceph"}''; - } // extraServiceConfig - // optionalAttrs (daemonType == "osd") { ExecStartPre = ''${ceph.lib}/libexec/ceph/ceph-osd-prestart.sh \ - --id ${daemonId} --cluster ${clusterName}''; }; - } // optionalAttrs (builtins.elem daemonType [ "mds" "mon" "rgw" "mgr" ]) { - preStart = '' - daemonPath="/var/lib/ceph/${if daemonType == "rgw" then "radosgw" else daemonType}/${clusterName}-${daemonId}" - if [ ! -d $daemonPath ]; then - mkdir -m 755 -p $daemonPath - chown -R ceph:ceph $daemonPath - fi - ''; - } // optionalAttrs (daemonType == "osd") { path = [ pkgs.getopt ]; } - ); + -f --cluster ${clusterName} --id ${daemonId}''; + } // optionalAttrs (daemonType == "osd") { + ExecStartPre = ''${ceph.lib}/libexec/ceph/ceph-osd-prestart.sh --id ${daemonId} --cluster ${clusterName}''; + StartLimitBurst = "30"; + RestartSec = "20s"; + PrivateDevices = "no"; # osd needs disk access + } // optionalAttrs ( daemonType == "mon") { + RestartSec = "10"; + } // optionalAttrs (lib.elem daemonType ["mgr" "mds"]) { + StartLimitBurst = "3"; + }; + }); makeTarget = (daemonType: { @@ -58,6 +65,7 @@ let partOf = [ "ceph.target" ]; wantedBy = [ "ceph.target" ]; before = [ "ceph.target" ]; + unitConfig.StopWhenUnneeded = true; }; } ); @@ -377,22 +385,22 @@ in systemd.services = let services = [] - ++ optional cfg.mon.enable (makeServices "mon" cfg.mon.daemons { RestartSec = "10"; }) - ++ optional cfg.mds.enable (makeServices "mds" cfg.mds.daemons { StartLimitBurst = "3"; }) - ++ optional cfg.osd.enable (makeServices "osd" cfg.osd.daemons { StartLimitBurst = "30"; - RestartSec = "20s"; - PrivateDevices = "no"; # osd needs disk access - }) - ++ optional cfg.rgw.enable (makeServices "rgw" cfg.rgw.daemons { }) - ++ optional cfg.mgr.enable (makeServices "mgr" cfg.mgr.daemons { StartLimitBurst = "3"; }); + ++ optional cfg.mon.enable (makeServices "mon" cfg.mon.daemons) + ++ optional cfg.mds.enable (makeServices "mds" cfg.mds.daemons) + ++ optional cfg.osd.enable (makeServices "osd" cfg.osd.daemons) + ++ optional cfg.rgw.enable (makeServices "rgw" cfg.rgw.daemons) + ++ optional cfg.mgr.enable (makeServices "mgr" cfg.mgr.daemons); in mkMerge services; systemd.targets = let targets = [ - { ceph = { description = "Ceph target allowing to start/stop all ceph service instances at once"; - wantedBy = [ "multi-user.target" ]; }; } - ] ++ optional cfg.mon.enable (makeTarget "mon") + { ceph = { + description = "Ceph target allowing to start/stop all ceph service instances at once"; + wantedBy = [ "multi-user.target" ]; + unitConfig.StopWhenUnneeded = true; + }; } ] + ++ optional cfg.mon.enable (makeTarget "mon") ++ optional cfg.mds.enable (makeTarget "mds") ++ optional cfg.osd.enable (makeTarget "osd") ++ optional cfg.rgw.enable (makeTarget "rgw") @@ -401,7 +409,11 @@ in mkMerge targets; systemd.tmpfiles.rules = [ + "d /etc/ceph - ceph ceph - -" "d /run/ceph 0770 ceph ceph -" - ]; + "d /var/lib/ceph - ceph ceph - -"] + ++ optionals cfg.mgr.enable [ "d /var/lib/ceph/mgr - ceph ceph - -"] + ++ optionals cfg.mon.enable [ "d /var/lib/ceph/mon - ceph ceph - -"] + ++ optionals cfg.osd.enable [ "d /var/lib/ceph/osd - ceph ceph - -"]; }; } diff --git a/nixos/tests/ceph-multi-node.nix b/nixos/tests/ceph-multi-node.nix index 6698aac3f271..ed493d6a1b34 100644 --- a/nixos/tests/ceph-multi-node.nix +++ b/nixos/tests/ceph-multi-node.nix @@ -49,9 +49,6 @@ let boot.kernelModules = [ "xfs" ]; services.ceph = cephConfig; - - # So that we don't have to battle systemd when bootstraping - systemd.targets.ceph.wantedBy = lib.mkForce []; }; networkMonA = { @@ -107,6 +104,10 @@ let }; }; }; + # Following deployment is based on the manual deployment described here: + # https://docs.ceph.com/docs/master/install/manual-deployment/ + # For other ways to deploy a ceph cluster, look at the documentation at + # https://docs.ceph.com/docs/master/ testscript = { ... }: '' startAll; @@ -114,27 +115,6 @@ let $osd0->waitForUnit("network.target"); $osd1->waitForUnit("network.target"); - # Create the ceph-related directories - $monA->mustSucceed( - "mkdir -p /var/lib/ceph/mgr/ceph-${cfg.monA.name}", - "mkdir -p /var/lib/ceph/mon/ceph-${cfg.monA.name}", - "chown ceph:ceph -R /var/lib/ceph/", - "mkdir -p /etc/ceph", - "chown ceph:ceph -R /etc/ceph" - ); - $osd0->mustSucceed( - "mkdir -p /var/lib/ceph/osd/ceph-${cfg.osd0.name}", - "chown ceph:ceph -R /var/lib/ceph/", - "mkdir -p /etc/ceph", - "chown ceph:ceph -R /etc/ceph" - ); - $osd1->mustSucceed( - "mkdir -p /var/lib/ceph/osd/ceph-${cfg.osd1.name}", - "chown ceph:ceph -R /var/lib/ceph/", - "mkdir -p /etc/ceph", - "chown ceph:ceph -R /etc/ceph" - ); - # Bootstrap ceph-mon daemon $monA->mustSucceed( "sudo -u ceph ceph-authtool --create-keyring /tmp/ceph.mon.keyring --gen-key -n mon. --cap mon 'allow *'", @@ -142,6 +122,7 @@ let "sudo -u ceph ceph-authtool /tmp/ceph.mon.keyring --import-keyring /etc/ceph/ceph.client.admin.keyring", "monmaptool --create --add ${cfg.monA.name} ${cfg.monA.ip} --fsid ${cfg.clusterId} /tmp/monmap", "sudo -u ceph ceph-mon --mkfs -i ${cfg.monA.name} --monmap /tmp/monmap --keyring /tmp/ceph.mon.keyring", + "sudo -u ceph mkdir -p /var/lib/ceph/mgr/ceph-${cfg.monA.name}/", "sudo -u ceph touch /var/lib/ceph/mon/ceph-${cfg.monA.name}/done", "systemctl start ceph-mon-${cfg.monA.name}" ); @@ -168,12 +149,14 @@ let # Bootstrap both OSDs $osd0->mustSucceed( "mkfs.xfs /dev/vdb", + "mkdir -p /var/lib/ceph/osd/ceph-${cfg.osd0.name}", "mount /dev/vdb /var/lib/ceph/osd/ceph-${cfg.osd0.name}", "ceph-authtool --create-keyring /var/lib/ceph/osd/ceph-${cfg.osd0.name}/keyring --name osd.${cfg.osd0.name} --add-key ${cfg.osd0.key}", "echo '{\"cephx_secret\": \"${cfg.osd0.key}\"}' | ceph osd new ${cfg.osd0.uuid} -i -", ); $osd1->mustSucceed( "mkfs.xfs /dev/vdb", + "mkdir -p /var/lib/ceph/osd/ceph-${cfg.osd1.name}", "mount /dev/vdb /var/lib/ceph/osd/ceph-${cfg.osd1.name}", "ceph-authtool --create-keyring /var/lib/ceph/osd/ceph-${cfg.osd1.name}/keyring --name osd.${cfg.osd1.name} --add-key ${cfg.osd1.key}", "echo '{\"cephx_secret\": \"${cfg.osd1.key}\"}' | ceph osd new ${cfg.osd1.uuid} -i -" @@ -209,22 +192,17 @@ let "ceph osd pool delete multi-node-other-test multi-node-other-test --yes-i-really-really-mean-it" ); - # As we disable the target in the config, we still want to test that it works as intended - $osd0->mustSucceed("systemctl stop ceph-osd-${cfg.osd0.name}"); - $osd1->mustSucceed("systemctl stop ceph-osd-${cfg.osd1.name}"); - $monA->mustSucceed( - "systemctl stop ceph-mgr-${cfg.monA.name}", - "systemctl stop ceph-mon-${cfg.monA.name}" - ); - - $monA->succeed("systemctl start ceph.target"); - $monA->waitForUnit("ceph-mon-${cfg.monA.name}"); - $monA->waitForUnit("ceph-mgr-${cfg.monA.name}"); - $osd0->succeed("systemctl start ceph.target"); - $osd0->waitForUnit("ceph-osd-${cfg.osd0.name}"); - $osd1->succeed("systemctl start ceph.target"); - $osd1->waitForUnit("ceph-osd-${cfg.osd1.name}"); - + # Shut down ceph on all machines in a very unpolite way + $monA->crash; + $osd0->crash; + $osd1->crash; + + # Start it up + $osd0->start; + $osd1->start; + $monA->start; + + # Ensure the cluster comes back up again $monA->succeed("ceph -s | grep 'mon: 1 daemons'"); $monA->waitUntilSucceeds("ceph -s | grep 'quorum ${cfg.monA.name}'"); $monA->waitUntilSucceeds("ceph osd stat | grep -e '2 osds: 2 up[^,]*, 2 in'"); diff --git a/nixos/tests/ceph-single-node.nix b/nixos/tests/ceph-single-node.nix index 10b77cff5a31..041fbd7e8e64 100644 --- a/nixos/tests/ceph-single-node.nix +++ b/nixos/tests/ceph-single-node.nix @@ -46,9 +46,6 @@ let boot.kernelModules = [ "xfs" ]; services.ceph = cephConfig; - - # So that we don't have to battle systemd when bootstraping - systemd.targets.ceph.wantedBy = lib.mkForce []; }; networkMonA = { @@ -72,22 +69,15 @@ let }; }; }; + # Following deployment is based on the manual deployment described here: + # https://docs.ceph.com/docs/master/install/manual-deployment/ + # For other ways to deploy a ceph cluster, look at the documentation at + # https://docs.ceph.com/docs/master/ testscript = { ... }: '' startAll; $monA->waitForUnit("network.target"); - # Create the ceph-related directories - $monA->mustSucceed( - "mkdir -p /var/lib/ceph/mgr/ceph-${cfg.monA.name}", - "mkdir -p /var/lib/ceph/mon/ceph-${cfg.monA.name}", - "mkdir -p /var/lib/ceph/osd/ceph-${cfg.osd0.name}", - "mkdir -p /var/lib/ceph/osd/ceph-${cfg.osd1.name}", - "mkdir -p /etc/ceph", - "chown ceph:ceph -R /etc/ceph", - "chown ceph:ceph -R /var/lib/ceph/", - ); - # Bootstrap ceph-mon daemon $monA->mustSucceed( "sudo -u ceph ceph-authtool --create-keyring /tmp/ceph.mon.keyring --gen-key -n mon. --cap mon 'allow *'", @@ -104,8 +94,9 @@ let # Can't check ceph status until a mon is up $monA->succeed("ceph -s | grep 'mon: 1 daemons'"); - # Start the ceph-mgr daemon, it has no deps and hardly any setup + # Start the ceph-mgr daemon, after copying in the keyring $monA->mustSucceed( + "sudo -u ceph mkdir -p /var/lib/ceph/mgr/ceph-${cfg.monA.name}/", "ceph auth get-or-create mgr.${cfg.monA.name} mon 'allow profile mgr' osd 'allow *' mds 'allow *' > /var/lib/ceph/mgr/ceph-${cfg.monA.name}/keyring", "systemctl start ceph-mgr-${cfg.monA.name}" ); @@ -117,7 +108,9 @@ let $monA->mustSucceed( "mkfs.xfs /dev/vdb", "mkfs.xfs /dev/vdc", + "mkdir -p /var/lib/ceph/osd/ceph-${cfg.osd0.name}", "mount /dev/vdb /var/lib/ceph/osd/ceph-${cfg.osd0.name}", + "mkdir -p /var/lib/ceph/osd/ceph-${cfg.osd1.name}", "mount /dev/vdc /var/lib/ceph/osd/ceph-${cfg.osd1.name}", "ceph-authtool --create-keyring /var/lib/ceph/osd/ceph-${cfg.osd0.name}/keyring --name osd.${cfg.osd0.name} --add-key ${cfg.osd0.key}", "ceph-authtool --create-keyring /var/lib/ceph/osd/ceph-${cfg.osd1.name}/keyring --name osd.${cfg.osd1.name} --add-key ${cfg.osd1.key}", @@ -159,20 +152,17 @@ let "ceph osd pool delete single-node-other-test single-node-other-test --yes-i-really-really-mean-it" ); - # As we disable the target in the config, we still want to test that it works as intended - $monA->mustSucceed( - "systemctl stop ceph-osd-${cfg.osd0.name}", - "systemctl stop ceph-osd-${cfg.osd1.name}", - "systemctl stop ceph-mgr-${cfg.monA.name}", - "systemctl stop ceph-mon-${cfg.monA.name}" - ); - + # Shut down ceph by stopping ceph.target. + $monA->mustSucceed("systemctl stop ceph.target"); + + # Start it up $monA->succeed("systemctl start ceph.target"); $monA->waitForUnit("ceph-mon-${cfg.monA.name}"); $monA->waitForUnit("ceph-mgr-${cfg.monA.name}"); $monA->waitForUnit("ceph-osd-${cfg.osd0.name}"); $monA->waitForUnit("ceph-osd-${cfg.osd1.name}"); - + + # Ensure the cluster comes back up again $monA->succeed("ceph -s | grep 'mon: 1 daemons'"); $monA->waitUntilSucceeds("ceph -s | grep 'quorum ${cfg.monA.name}'"); $monA->waitUntilSucceeds("ceph osd stat | grep -e '2 osds: 2 up[^,]*, 2 in'");