Merge pull request #290979 from ereslibre/cdi-add-nvidia-docker-1-directories

CDI: Add `mount-nvidia-binaries` and `mount-nvidia-docker-1-directories` options
This commit is contained in:
Someone 2024-04-23 19:55:41 +00:00 committed by GitHub
commit 7035968845
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 160 additions and 151 deletions

View File

@ -46,9 +46,7 @@ Use `services.pipewire.extraConfig` or `services.pipewire.configPackages` for Pi
- The default dbus implementation has transitioned to dbus-broker from the classic dbus daemon for better performance and reliability. Users can revert to the classic dbus daemon by setting `services.dbus.implementation = "dbus";`. For detailed deviations, refer to [dbus-broker's deviations page](https://github.com/bus1/dbus-broker/wiki/Deviations).
- A new option `virtualisation.containers.cdi` was added. It contains `static` and `dynamic` attributes (corresponding to `/etc/cdi` and `/run/cdi` respectively) to configure the Container Device Interface (CDI).
- `virtualisation.docker.enableNvidia` and `virtualisation.podman.enableNvidia` options are deprecated. `virtualisation.containers.cdi.dynamic.nvidia.enable` should be used instead. This option will expose GPUs on containers with the `--device` CLI option. This is supported by Docker 25, Podman 3.2.0 and Singularity 4. Any container runtime that supports the CDI specification will take advantage of this feature.
- `virtualisation.docker.enableNvidia` and `virtualisation.podman.enableNvidia` options are deprecated. `hardware.nvidia-container-toolkit.enable` should be used instead. This option will expose GPUs on containers with the `--device` CLI option. This is supported by Docker 25, Podman 3.2.0 and Singularity 4. Any container runtime that supports the CDI specification will take advantage of this feature.
- `system.etc.overlay.enable` option was added. If enabled, `/etc` is
mounted via an overlayfs instead of being created by a custom perl script.

View File

@ -559,7 +559,7 @@
./services/hardware/kanata.nix
./services/hardware/lcd.nix
./services/hardware/lirc.nix
./services/hardware/nvidia-container-toolkit-cdi-generator
./services/hardware/nvidia-container-toolkit
./services/hardware/monado.nix
./services/hardware/nvidia-optimus.nix
./services/hardware/openrgb.nix

View File

@ -1,60 +0,0 @@
{
addDriverRunpath,
glibc,
jq,
lib,
nvidia-container-toolkit,
nvidia-driver,
runtimeShell,
writeScriptBin,
}:
let
mountOptions = { options = ["ro" "nosuid" "nodev" "bind"]; };
mounts = [
# FIXME: Making /usr mounts optional
{ hostPath = lib.getExe' nvidia-driver "nvidia-cuda-mps-control";
containerPath = "/usr/bin/nvidia-cuda-mps-control"; }
{ hostPath = lib.getExe' nvidia-driver "nvidia-cuda-mps-server";
containerPath = "/usr/bin/nvidia-cuda-mps-server"; }
{ hostPath = lib.getExe' nvidia-driver "nvidia-debugdump";
containerPath = "/usr/bin/nvidia-debugdump"; }
{ hostPath = lib.getExe' nvidia-driver "nvidia-powerd";
containerPath = "/usr/bin/nvidia-powerd"; }
{ hostPath = lib.getExe' nvidia-driver "nvidia-smi";
containerPath = "/usr/bin/nvidia-smi"; }
{ hostPath = lib.getExe' nvidia-container-toolkit "nvidia-ctk";
containerPath = "/usr/bin/nvidia-ctk"; }
{ hostPath = "${lib.getLib glibc}/lib";
containerPath = "${lib.getLib glibc}/lib"; }
# FIXME: use closureinfo
{
hostPath = addDriverRunpath.driverLink;
containerPath = addDriverRunpath.driverLink;
}
{ hostPath = "${lib.getLib glibc}/lib";
containerPath = "${lib.getLib glibc}/lib"; }
{ hostPath = "${lib.getLib glibc}/lib64";
containerPath = "${lib.getLib glibc}/lib64"; }
];
jqAddMountExpression = ".containerEdits.mounts[.containerEdits.mounts | length] |= . +";
mountsToJq = lib.concatMap
(mount:
["${lib.getExe jq} '${jqAddMountExpression} ${builtins.toJSON (mount // mountOptions)}'"])
mounts;
in
writeScriptBin "nvidia-cdi-generator"
''
#! ${runtimeShell}
function cdiGenerate {
${lib.getExe' nvidia-container-toolkit "nvidia-ctk"} cdi generate \
--format json \
--ldconfig-path ${lib.getExe' glibc "ldconfig"} \
--library-search-path ${lib.getLib nvidia-driver}/lib \
--nvidia-ctk-path ${lib.getExe' nvidia-container-toolkit "nvidia-ctk"}
}
cdiGenerate | \
${lib.concatStringsSep " | " mountsToJq} > $RUNTIME_DIRECTORY/nvidia-container-toolkit.json
''

View File

@ -1,40 +0,0 @@
{ config, lib, pkgs, ... }:
{
options = {
hardware.nvidia-container-toolkit-cdi-generator.enable = lib.mkOption {
default = false;
internal = true;
visible = false;
type = lib.types.bool;
description = ''
Enable dynamic CDI configuration for NVidia devices by running
nvidia-container-toolkit on boot.
'';
};
};
config = {
systemd.services.nvidia-container-toolkit-cdi-generator = lib.mkIf config.hardware.nvidia-container-toolkit-cdi-generator.enable {
description = "Container Device Interface (CDI) for Nvidia generator";
wantedBy = [ "multi-user.target" ];
after = [ "systemd-udev-settle.service" ];
serviceConfig = {
RuntimeDirectory = "cdi";
RemainAfterExit = true;
ExecStart =
let
script = pkgs.callPackage ./cdi-generate.nix { nvidia-driver = config.hardware.nvidia.package; };
in
lib.getExe script;
Type = "oneshot";
};
};
};
}

View File

@ -0,0 +1,35 @@
{
glibc,
jq,
lib,
mounts,
nvidia-container-toolkit,
nvidia-driver,
runtimeShell,
writeScriptBin,
}: let
mkMount = {hostPath, containerPath, mountOptions}: {
inherit hostPath containerPath;
options = mountOptions;
};
jqAddMountExpression = ".containerEdits.mounts[.containerEdits.mounts | length] |= . +";
allJqMounts = lib.concatMap
(mount:
["${lib.getExe jq} '${jqAddMountExpression} ${builtins.toJSON (mkMount mount)}'"])
mounts;
in
writeScriptBin "nvidia-cdi-generator"
''
#! ${runtimeShell}
function cdiGenerate {
${lib.getExe' nvidia-container-toolkit "nvidia-ctk"} cdi generate \
--format json \
--ldconfig-path ${lib.getExe' glibc "ldconfig"} \
--library-search-path ${lib.getLib nvidia-driver}/lib \
--nvidia-ctk-path ${lib.getExe' nvidia-container-toolkit "nvidia-ctk"}
}
cdiGenerate | \
${lib.concatStringsSep " | " allJqMounts} > $RUNTIME_DIRECTORY/nvidia-container-toolkit.json
''

View File

@ -0,0 +1,121 @@
{ config, lib, pkgs, ... }:
{
imports = [
(lib.mkRenamedOptionModule
[ "virtualisation" "containers" "cdi" "dynamic" "nvidia" "enable" ]
[ "hardware" "nvidia-container-toolkit" "enable" ])
];
options = let
mountType = {
options = {
hostPath = lib.mkOption {
type = lib.types.str;
description = "Host path.";
};
containerPath = lib.mkOption {
type = lib.types.str;
description = "Container path.";
};
mountOptions = lib.mkOption {
default = [ "ro" "nosuid" "nodev" "bind" ];
type = lib.types.listOf lib.types.str;
description = "Mount options.";
};
};
};
in {
hardware.nvidia-container-toolkit = {
enable = lib.mkOption {
default = false;
type = lib.types.bool;
description = ''
Enable dynamic CDI configuration for NVidia devices by running
nvidia-container-toolkit on boot.
'';
};
mounts = lib.mkOption {
type = lib.types.listOf (lib.types.submodule mountType);
default = [];
description = "Mounts to be added to every container under the Nvidia CDI profile.";
};
mount-nvidia-executables = lib.mkOption {
default = true;
type = lib.types.bool;
description = ''
Mount executables nvidia-smi, nvidia-cuda-mps-control, nvidia-cuda-mps-server,
nvidia-debugdump, nvidia-powerd and nvidia-ctk on containers.
'';
};
mount-nvidia-docker-1-directories = lib.mkOption {
default = true;
type = lib.types.bool;
description = ''
Mount nvidia-docker-1 directories on containers: /usr/local/nvidia/lib and
/usr/local/nvidia/lib64.
'';
};
};
};
config = {
hardware.nvidia-container-toolkit.mounts = let
nvidia-driver = config.hardware.nvidia.package;
in (lib.mkMerge [
[{ hostPath = pkgs.addDriverRunpath.driverLink;
containerPath = pkgs.addDriverRunpath.driverLink; }
{ hostPath = "${lib.getLib pkgs.glibc}/lib";
containerPath = "${lib.getLib pkgs.glibc}/lib"; }
{ hostPath = "${lib.getLib pkgs.glibc}/lib64";
containerPath = "${lib.getLib pkgs.glibc}/lib64"; }]
(lib.mkIf config.hardware.nvidia-container-toolkit.mount-nvidia-executables
[{ hostPath = lib.getExe' nvidia-driver "nvidia-cuda-mps-control";
containerPath = "/usr/bin/nvidia-cuda-mps-control"; }
{ hostPath = lib.getExe' nvidia-driver "nvidia-cuda-mps-server";
containerPath = "/usr/bin/nvidia-cuda-mps-server"; }
{ hostPath = lib.getExe' nvidia-driver "nvidia-debugdump";
containerPath = "/usr/bin/nvidia-debugdump"; }
{ hostPath = lib.getExe' nvidia-driver "nvidia-powerd";
containerPath = "/usr/bin/nvidia-powerd"; }
{ hostPath = lib.getExe' nvidia-driver "nvidia-smi";
containerPath = "/usr/bin/nvidia-smi"; }])
# nvidia-docker 1.0 uses /usr/local/nvidia/lib{,64}
# e.g.
# - https://gitlab.com/nvidia/container-images/cuda/-/blob/e3ff10eab3a1424fe394899df0e0f8ca5a410f0f/dist/12.3.1/ubi9/base/Dockerfile#L44
# - https://github.com/NVIDIA/nvidia-docker/blob/01d2c9436620d7dde4672e414698afe6da4a282f/src/nvidia/volumes.go#L104-L173
(lib.mkIf config.hardware.nvidia-container-toolkit.mount-nvidia-docker-1-directories
[{ hostPath = "${lib.getLib nvidia-driver}/lib";
containerPath = "/usr/local/nvidia/lib"; }
{ hostPath = "${lib.getLib nvidia-driver}/lib";
containerPath = "/usr/local/nvidia/lib64"; }])
]);
systemd.services.nvidia-container-toolkit-cdi-generator = lib.mkIf config.hardware.nvidia-container-toolkit.enable {
description = "Container Device Interface (CDI) for Nvidia generator";
wantedBy = [ "multi-user.target" ];
after = [ "systemd-udev-settle.service" ];
serviceConfig = {
RuntimeDirectory = "cdi";
RemainAfterExit = true;
ExecStart =
let
script = pkgs.callPackage ./cdi-generate.nix {
inherit (config.hardware.nvidia-container-toolkit) mounts;
nvidia-driver = config.hardware.nvidia.package;
};
in
lib.getExe script;
Type = "oneshot";
};
};
};
}

View File

@ -28,43 +28,6 @@ in
description = "Enable the OCI seccomp BPF hook";
};
cdi = {
dynamic.nvidia.enable = mkOption {
type = types.bool;
default = false;
description = ''
Enable dynamic CDI configuration for NVidia devices by running nvidia-container-toolkit on boot.
'';
};
static = mkOption {
type = types.attrs;
default = { };
description = ''
Declarative CDI specification. Each key of the attribute set
will be mapped to a file in /etc/cdi. It is required for every
key to be provided in JSON format.
'';
example = {
some-vendor = builtins.fromJSON ''
{
"cdiVersion": "0.5.0",
"kind": "some-vendor.com/foo",
"devices": [],
"containerEdits": []
}
'';
some-other-vendor = {
cdiVersion = "0.5.0";
kind = "some-other-vendor.com/bar";
devices = [];
containerEdits = [];
};
};
};
};
containersConf.settings = mkOption {
type = toml.type;
default = { };
@ -150,8 +113,6 @@ in
config = lib.mkIf cfg.enable {
hardware.nvidia-container-toolkit-cdi-generator.enable = lib.mkIf cfg.cdi.dynamic.nvidia.enable true;
virtualisation.containers.containersConf.cniPlugins = [ pkgs.cni-plugins ];
virtualisation.containers.containersConf.settings = {
@ -163,13 +124,7 @@ in
};
};
environment.etc = let
cdiStaticConfigurationFiles = (lib.attrsets.mapAttrs'
(name: value:
lib.attrsets.nameValuePair "cdi/${name}.json"
{ text = builtins.toJSON value; })
cfg.cdi.static);
in {
environment.etc = {
"containers/containers.conf".source =
toml.generate "containers.conf" cfg.containersConf.settings;
@ -183,7 +138,7 @@ in
"containers/policy.json".source =
if cfg.policy != { } then pkgs.writeText "policy.json" (builtins.toJSON cfg.policy)
else "${pkgs.skopeo.policy}/default-policy.json";
} // cdiStaticConfigurationFiles;
};
};