cudaPackages: add cudaFlags (#205351)

* cudaPackages: add cudaFlags

* cudaNames -> cudaMicroarchitectureNames

* update documentation, remove config for static library removal

* doc link added to flags

* fix whitespace in assignment
This commit is contained in:
Jason Miller 2022-12-17 01:00:36 -05:00 committed by GitHub
parent 08b5fc6d8c
commit 8392158289
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 114 additions and 107 deletions

View File

@ -32,3 +32,22 @@ mypkg = let
}});
in callPackage { inherit cudaPackages; };
```
The CUDA NVCC compiler requires flags to determine which hardware you
want to target for in terms of SASS (real hardware) or PTX (JIT kernels).
Nixpkgs tries to target support real architecture defaults based on the
CUDA toolkit version with PTX support for future hardware. Experienced
users may optmize this configuration for a variety of reasons such as
reducing binary size and compile time, supporting legacy hardware, or
optimizing for specific hardware.
You may provide capabilities to add support or reduce binary size through
`config` using `cudaCapabilities = [ "6.0" "7.0" ];` and
`cudaForwardCompat = true;` if you want PTX support for future hardware.
Please consult [GPUs supported](https://en.wikipedia.org/wiki/CUDA#GPUs_supported)
for your specific card(s).
Library maintainers should consult [NVCC Docs](https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/)
and release notes for their software package.

View File

@ -2,11 +2,10 @@
, opencv3, gtest, blas, gomp, llvmPackages, perl
, cudaSupport ? config.cudaSupport or false, cudaPackages ? {}, nvidia_x11
, cudnnSupport ? cudaSupport
, cudaCapabilities ? [ "3.7" "5.0" "6.0" "7.0" "7.5" "8.0" "8.6" ]
}:
let
inherit (cudaPackages) cudatoolkit cudnn;
inherit (cudaPackages) cudatoolkit cudaFlags cudnn;
in
assert cudnnSupport -> cudaSupport;
@ -51,7 +50,7 @@ stdenv.mkDerivation rec {
"-DUSE_OLDCMAKECUDA=ON" # see https://github.com/apache/incubator-mxnet/issues/10743
"-DCUDA_ARCH_NAME=All"
"-DCUDA_HOST_COMPILER=${cudatoolkit.cc}/bin/cc"
"-DMXNET_CUDA_ARCH=${lib.concatStringsSep ";" cudaCapabilities}"
"-DMXNET_CUDA_ARCH=${cudaFlags.cudaCapabilitiesSemiColonString}"
] else [ "-DUSE_CUDA=OFF" ])
++ lib.optional (!cudnnSupport) "-DUSE_CUDNN=OFF";

View File

@ -10,6 +10,8 @@ final: prev: let
### Add classic cudatoolkit package
cudatoolkit = buildCudaToolkitPackage ((attrs: attrs // { gcc = prev.pkgs.${attrs.gcc}; }) cudatoolkitVersions.${final.cudaVersion});
cudaFlags = final.callPackage ./flags.nix {};
in {
inherit cudatoolkit;
inherit cudatoolkit cudaFlags;
}

View File

@ -0,0 +1,78 @@
{ config
, lib
, cudatoolkit
}:
let
# Flags are determined based on your CUDA toolkit by default. You may benefit
# from improved performance, reduced file size, or greater hardware suppport by
# passing a configuration based on your specific GPU environment.
#
# config.cudaCapabilities: list of hardware generations to support (e.g., "8.0")
# config.cudaForwardCompat: bool for compatibility with future GPU generations
#
# Please see the accompanying documentation or https://github.com/NixOS/nixpkgs/pull/205351
defaultCudaCapabilities = rec {
cuda9 = [
"3.0"
"3.5"
"5.0"
"5.2"
"6.0"
"6.1"
"7.0"
];
cuda10 = cuda9 ++ [
"7.5"
];
cuda11 = [
"3.5"
"5.0"
"5.2"
"6.0"
"6.1"
"7.0"
"7.5"
"8.0"
"8.6"
];
};
cudaMicroarchitectureNames = {
"3" = "Kepler";
"5" = "Maxwell";
"6" = "Pascal";
"7" = "Volta";
"8" = "Ampere";
"9" = "Hopper";
};
defaultCudaArchList = defaultCudaCapabilities."cuda${lib.versions.major cudatoolkit.version}";
cudaRealCapabilities = config.cudaCapabilities or defaultCudaArchList;
capabilitiesForward = "${lib.last cudaRealCapabilities}+PTX";
dropDot = ver: builtins.replaceStrings ["."] [""] ver;
archMapper = feat: map (ver: "${feat}_${dropDot ver}");
gencodeMapper = feat: map (ver: "-gencode=arch=compute_${dropDot ver},code=${feat}_${dropDot ver}");
cudaRealArchs = archMapper "sm" cudaRealCapabilities;
cudaPTXArchs = archMapper "compute" cudaRealCapabilities;
cudaArchs = cudaRealArchs ++ [ (lib.last cudaPTXArchs) ];
cudaArchNames = lib.unique (map (v: cudaMicroarchitectureNames.${lib.versions.major v}) cudaRealCapabilities);
cudaCapabilities = cudaRealCapabilities ++ lib.optional (config.cudaForwardCompat or true) capabilitiesForward;
cudaGencode = gencodeMapper "sm" cudaRealCapabilities ++ lib.optionals (config.cudaForwardCompat or true) (gencodeMapper "compute" [ (lib.last cudaPTXArchs) ]);
cudaCapabilitiesCommaString = lib.strings.concatStringsSep "," cudaCapabilities;
cudaCapabilitiesSemiColonString = lib.strings.concatStringsSep ";" cudaCapabilities;
cudaRealCapabilitiesCommaString = lib.strings.concatStringsSep "," cudaRealCapabilities;
in
{
inherit cudaArchs cudaArchNames cudaCapabilities cudaCapabilitiesCommaString cudaCapabilitiesSemiColonString
cudaRealCapabilities cudaRealCapabilitiesCommaString cudaGencode cudaRealArchs cudaPTXArchs;
}

View File

@ -1,7 +1,7 @@
{ lib, stdenv, fetchurl, cmake, gfortran, ninja, cudaPackages, libpthreadstubs, lapack, blas }:
let
inherit (cudaPackages) cudatoolkit;
inherit (cudaPackages) cudatoolkit cudaFlags;
in
assert let majorIs = lib.versions.major cudatoolkit.version;
@ -10,36 +10,6 @@ assert let majorIs = lib.versions.major cudatoolkit.version;
let
version = "2.6.2";
# We define a specific set of CUDA compute capabilities here,
# because CUDA 11 does not support compute capability 3.0. Also,
# we use it to enable newer capabilities that are not enabled
# by magma by default. The list of supported architectures
# can be found in magma's top-level CMakeLists.txt.
cudaCapabilities = rec {
cuda9 = [
"Kepler" # 3.0, 3.5
"Maxwell" # 5.0
"Pascal" # 6.0
"Volta" # 7.0
];
cuda10 = [
"Turing" # 7.5
] ++ cuda9;
cuda11 = [
"sm_35" # sm_30 is not supported by CUDA 11
"Maxwell" # 5.0
"Pascal" # 6.0
"Volta" # 7.0
"Turing" # 7.5
"Ampere" # 8.0
];
};
capabilityString = lib.strings.concatStringsSep ","
cudaCapabilities."cuda${lib.versions.major cudatoolkit.version}";
in stdenv.mkDerivation {
pname = "magma";
inherit version;
@ -53,7 +23,9 @@ in stdenv.mkDerivation {
buildInputs = [ cudatoolkit libpthreadstubs lapack blas ];
cmakeFlags = [ "-DGPU_TARGET=${capabilityString}" ];
cmakeFlags = [
"-DGPU_TARGET=${builtins.concatStringsSep "," cudaFlags.cudaRealArchs}"
];
doCheck = false;

View File

@ -41,7 +41,6 @@
, zlib
# CUDA flags:
, cudaCapabilities ? [ "sm_35" "sm_50" "sm_60" "sm_70" "sm_75" "compute_80" ]
, cudaSupport ? false
, cudaPackages ? {}
@ -50,7 +49,7 @@
}:
let
inherit (cudaPackages) cudatoolkit cudnn nccl;
inherit (cudaPackages) cudatoolkit cudaFlags cudnn nccl;
pname = "jaxlib";
version = "0.3.22";
@ -165,7 +164,7 @@ let
build --action_env TF_CUDA_PATHS="${cudatoolkit_joined},${cudnn},${nccl}"
build --action_env TF_CUDA_VERSION="${lib.versions.majorMinor cudatoolkit.version}"
build --action_env TF_CUDNN_VERSION="${lib.versions.major cudnn.version}"
build:cuda --action_env TF_CUDA_COMPUTE_CAPABILITIES="${lib.concatStringsSep "," cudaCapabilities}"
build:cuda --action_env TF_CUDA_COMPUTE_CAPABILITIES="${cudaFlags.cudaRealCapabilitiesCommaString}"
'' + ''
CFG
'';

View File

@ -22,8 +22,6 @@
, tensorboardSupport ? true
# XLA without CUDA is broken
, xlaSupport ? cudaSupport
# Default from ./configure script
, cudaCapabilities ? [ "sm_35" "sm_50" "sm_60" "sm_70" "sm_75" "compute_80" ]
, sse42Support ? stdenv.hostPlatform.sse4_2Support
, avx2Support ? stdenv.hostPlatform.avx2Support
, fmaSupport ? stdenv.hostPlatform.fmaSupport
@ -32,7 +30,7 @@
}:
let
inherit (cudaPackages) cudatoolkit cudnn nccl;
inherit (cudaPackages) cudatoolkit cudaFlags cudnn nccl;
in
assert cudaSupport -> cudatoolkit != null
@ -305,7 +303,7 @@ let
TF_CUDA_PATHS = lib.optionalString cudaSupport "${cudatoolkit_joined},${cudnn},${nccl}";
GCC_HOST_COMPILER_PREFIX = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin";
GCC_HOST_COMPILER_PATH = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin/gcc";
TF_CUDA_COMPUTE_CAPABILITIES = lib.concatStringsSep "," cudaCapabilities;
TF_CUDA_COMPUTE_CAPABILITIES = builtins.concatStringsSep "," cudaFlags.cudaRealArchs;
postPatch = ''
# bazel 3.3 should work just as well as bazel 3.1

View File

@ -3,7 +3,6 @@
mklDnnSupport ? true, useSystemNccl ? true,
MPISupport ? false, mpi,
buildDocs ? false,
cudaArchList ? null,
# Native build inputs
cmake, util-linux, linkFarm, symlinkJoin, which, pybind11, removeReferencesTo,
@ -33,7 +32,7 @@
isPy3k, pythonOlder }:
let
inherit (cudaPackages) cudatoolkit cudnn nccl;
inherit (cudaPackages) cudatoolkit cudaFlags cudnn nccl;
in
# assert that everything needed for cuda is present and that the correct cuda versions are used
@ -52,64 +51,6 @@ let
paths = [ cudatoolkit.out cudatoolkit.lib nccl.dev nccl.out ];
};
# Give an explicit list of supported architectures for the build, See:
# - pytorch bug report: https://github.com/pytorch/pytorch/issues/23573
# - pytorch-1.2.0 build on nixpks: https://github.com/NixOS/nixpkgs/pull/65041
#
# This list was selected by omitting the TORCH_CUDA_ARCH_LIST parameter,
# observing the fallback option (which selected all architectures known
# from cudatoolkit_10_0, pytorch-1.2, and python-3.6), and doing a binary
# searching to find offending architectures.
#
# NOTE: Because of sandboxing, this derivation can't auto-detect the hardware's
# cuda architecture, so there is also now a problem around new architectures
# not being supported until explicitly added to this derivation.
#
# FIXME: CMake is throwing the following warning on python-1.2:
#
# ```
# CMake Warning at cmake/public/utils.cmake:172 (message):
# In the future we will require one to explicitly pass TORCH_CUDA_ARCH_LIST
# to cmake instead of implicitly setting it as an env variable. This will
# become a FATAL_ERROR in future version of pytorch.
# ```
# If this is causing problems for your build, this derivation may have to strip
# away the standard `buildPythonPackage` and use the
# [*Adjust Build Options*](https://github.com/pytorch/pytorch/tree/v1.2.0#adjust-build-options-optional)
# instructions. This will also add more flexibility around configurations
# (allowing FBGEMM to be built in pytorch-1.1), and may future proof this
# derivation.
brokenArchs = [ "3.0" ]; # this variable is only used as documentation.
cudaCapabilities = rec {
cuda9 = [
"3.5"
"5.0"
"5.2"
"6.0"
"6.1"
"7.0"
"7.0+PTX" # I am getting a "undefined architecture compute_75" on cuda 9
# which leads me to believe this is the final cuda-9-compatible architecture.
];
cuda10 = cuda9 ++ [
"7.5"
"7.5+PTX" # < most recent architecture as of cudatoolkit_10_0 and pytorch-1.2.0
];
cuda11 = cuda10 ++ [
"8.0"
"8.0+PTX" # < CUDA toolkit 11.0
"8.6"
"8.6+PTX" # < CUDA toolkit 11.1
];
};
final_cudaArchList =
if !cudaSupport || cudaArchList != null
then cudaArchList
else cudaCapabilities."cuda${lib.versions.major cudatoolkit.version}";
# Normally libcuda.so.1 is provided at runtime by nvidia-x11 via
# LD_LIBRARY_PATH=/run/opengl-driver/lib. We only use the stub
# libcuda.so from cudatoolkit for running tests, so that we dont have
@ -153,7 +94,7 @@ in buildPythonPackage rec {
];
preConfigure = lib.optionalString cudaSupport ''
export TORCH_CUDA_ARCH_LIST="${lib.strings.concatStringsSep ";" final_cudaArchList}"
export TORCH_CUDA_ARCH_LIST="${cudaFlags.cudaCapabilitiesSemiColonString}"
export CC=${cudatoolkit.cc}/bin/gcc CXX=${cudatoolkit.cc}/bin/g++
'' + lib.optionalString (cudaSupport && cudnn != null) ''
export CUDNN_INCLUDE_DIR=${cudnn}/include
@ -308,7 +249,6 @@ in buildPythonPackage rec {
passthru = {
inherit cudaSupport cudaPackages;
cudaArchList = final_cudaArchList;
# At least for 1.10.2 `torch.fft` is unavailable unless BLAS provider is MKL. This attribute allows for easy detection of its availability.
blasProvider = blas.provider;
};

View File

@ -15,7 +15,7 @@
}:
let
inherit (torch.cudaPackages) cudatoolkit cudnn;
inherit (torch.cudaPackages) cudatoolkit cudaFlags cudnn;
cudatoolkit_joined = symlinkJoin {
name = "${cudatoolkit.name}-unsplit";
@ -45,7 +45,7 @@ in buildPythonPackage rec {
propagatedBuildInputs = [ numpy pillow torch scipy ];
preBuild = lib.optionalString cudaSupport ''
export TORCH_CUDA_ARCH_LIST="${cudaArchStr}"
export TORCH_CUDA_ARCH_LIST="${cudaFlags.cudaCapabilitiesSemiColonString}"
export FORCE_CUDA=1
'';