From e380b5303715016fca18df860d97340345b52aef Mon Sep 17 00:00:00 2001 From: Martin Schwaighofer Date: Sun, 7 Apr 2024 16:54:05 +0200 Subject: [PATCH 1/2] rocmPackages.llvm: compress outputs of clang-offload-bundler This patches the clang-offload-bundler tool to add a compression option from a more recent version of clang. This compression option reduces the size of ROCm's fat binaries. Those binaries contain .hip_fatbin sections with GPU-specific code, for each target. Compression is automatically turned on for all produced outputs via a wrapper, because it's difficult to identify all the places where the -compression argument would be needed. Once upsteam introduces handeling for this argument, we should drop the wrapper again. This transistion will create inconsistsency, but I do not think that it will impact any actual users and it's what's practical to implement. --- ...compression-to-clang-offload-bundler.patch | 1191 +++++++++++++++++ pkgs/development/rocm-modules/6/llvm/base.nix | 13 +- .../6/llvm/stage-1/clang-unwrapped.nix | 9 +- .../0000-mlir-fix-debugtranslation.patch | 36 - .../rocm-modules/6/llvm/stage-3/mlir.nix | 10 +- 5 files changed, 1217 insertions(+), 42 deletions(-) create mode 100644 pkgs/development/rocm-modules/6/llvm/add-compression-to-clang-offload-bundler.patch delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-3/0000-mlir-fix-debugtranslation.patch diff --git a/pkgs/development/rocm-modules/6/llvm/add-compression-to-clang-offload-bundler.patch b/pkgs/development/rocm-modules/6/llvm/add-compression-to-clang-offload-bundler.patch new file mode 100644 index 000000000000..8589ac527dcb --- /dev/null +++ b/pkgs/development/rocm-modules/6/llvm/add-compression-to-clang-offload-bundler.patch @@ -0,0 +1,1191 @@ +From 058d9ba2f54a8c7c47b6522db808db87fed82e78 Mon Sep 17 00:00:00 2001 +From: "Yaxun (Sam) Liu" +Date: Thu, 4 Apr 2024 03:25:28 +0200 +Subject: [PATCH] backport 7e2823438e920d25364ff92b62ad90020c31bb59 + +Reland "[HIP] Support compressing device binary" + +Original PR: https://github.com/llvm/llvm-project/pull/67162 + +The commit was reverted due to UB detected by santizer: + +https://lab.llvm.org/buildbot/#/builders/238/builds/5955 + +clang/lib/Driver/OffloadBundler.cpp:1012:25: runtime error: + load of misaligned address 0xaaaae2d90e7c for type + 'const uint64_t' (aka 'const unsigned long'), which + requires 8 byte alignment + +It was fixed by using memcpy instead of dereferencing int* +casted from unaligned char*. + +Co-Authored-By: Martin Schwaighofer +(only did the backport) +--- + clang/docs/ClangOffloadBundler.rst | 27 ++ + clang/include/clang/Driver/OffloadBundler.h | 37 ++ + clang/include/clang/Driver/Options.td | 5 + + clang/lib/Driver/OffloadBundler.cpp | 346 +++++++++++++++--- + clang/lib/Driver/ToolChains/Clang.cpp | 7 + + clang/lib/Driver/ToolChains/HIPUtility.cpp | 6 + + .../test/Driver/clang-offload-bundler-zlib.c | 75 ++++ + .../test/Driver/clang-offload-bundler-zstd.c | 72 ++++ + .../test/Driver/hip-offload-compress-zlib.hip | 45 +++ + .../test/Driver/hip-offload-compress-zstd.hip | 45 +++ + .../clang-offload-bundler/CMakeLists.txt | 1 + + .../ClangOffloadBundler.cpp | 10 + + llvm/include/llvm/BinaryFormat/Magic.h | 28 +- + llvm/lib/BinaryFormat/Magic.cpp | 11 + + llvm/lib/Object/Binary.cpp | 2 + + llvm/lib/Object/ObjectFile.cpp | 2 + + 16 files changed, 659 insertions(+), 60 deletions(-) + create mode 100644 clang/test/Driver/clang-offload-bundler-zlib.c + create mode 100644 clang/test/Driver/clang-offload-bundler-zstd.c + create mode 100644 clang/test/Driver/hip-offload-compress-zlib.hip + create mode 100644 clang/test/Driver/hip-offload-compress-zstd.hip + +diff --git a/clang/docs/ClangOffloadBundler.rst b/clang/docs/ClangOffloadBundler.rst +index 432da787249b..d47997bf718d 100644 +--- a/clang/docs/ClangOffloadBundler.rst ++++ b/clang/docs/ClangOffloadBundler.rst +@@ -498,3 +498,30 @@ target by comparing bundle ID's. Two bundle ID's are considered compatible if: + Verbose printing of matched/unmatched comparisons between bundle entry id of + a device binary from HDA and bundle entry ID of a given target processor + (see :ref:`compatibility-bundle-entry-id`). ++ ++Compression and Decompression ++============================= ++ ++``clang-offload-bundler`` provides features to compress and decompress the full ++bundle, leveraging inherent redundancies within the bundle entries. Use the ++`-compress` command-line option to enable this compression capability. ++ ++The compressed offload bundle begins with a header followed by the compressed binary data: ++ ++- **Magic Number (4 bytes)**: ++ This is a unique identifier to distinguish compressed offload bundles. The value is the string 'CCOB' (Compressed Clang Offload Bundle). ++ ++- **Version Number (16-bit unsigned int)**: ++ This denotes the version of the compressed offload bundle format. The current version is `1`. ++ ++- **Compression Method (16-bit unsigned int)**: ++ This field indicates the compression method used. The value corresponds to either `zlib` or `zstd`, represented as a 16-bit unsigned integer cast from the LLVM compression enumeration. ++ ++- **Uncompressed Binary Size (32-bit unsigned int)**: ++ This is the size (in bytes) of the binary data before it was compressed. ++ ++- **Hash (64-bit unsigned int)**: ++ This is a 64-bit truncated MD5 hash of the uncompressed binary data. It serves for verification and caching purposes. ++ ++- **Compressed Data**: ++ The actual compressed binary data follows the header. Its size can be inferred from the total size of the file minus the header size. +diff --git a/clang/include/clang/Driver/OffloadBundler.h b/clang/include/clang/Driver/OffloadBundler.h +index fe263f0540b9..fc96f200414d 100644 +--- a/clang/include/clang/Driver/OffloadBundler.h ++++ b/clang/include/clang/Driver/OffloadBundler.h +@@ -19,6 +19,7 @@ + + #include "llvm/Support/Error.h" + #include "llvm/TargetParser/Triple.h" ++#include + #include + #include + +@@ -26,11 +27,15 @@ namespace clang { + + class OffloadBundlerConfig { + public: ++ OffloadBundlerConfig(); ++ + bool AllowNoHost = false; + bool AllowMissingBundles = false; + bool CheckInputArchive = false; + bool PrintExternalCommands = false; + bool HipOpenmpCompatible = false; ++ bool Compress = false; ++ bool Verbose = false; + + unsigned BundleAlignment = 1; + unsigned HostInputIndex = ~0u; +@@ -82,6 +87,38 @@ struct OffloadTargetInfo { + std::string str() const; + }; + ++// CompressedOffloadBundle represents the format for the compressed offload ++// bundles. ++// ++// The format is as follows: ++// - Magic Number (4 bytes) - A constant "CCOB". ++// - Version (2 bytes) ++// - Compression Method (2 bytes) - Uses the values from ++// llvm::compression::Format. ++// - Uncompressed Size (4 bytes). ++// - Truncated MD5 Hash (8 bytes). ++// - Compressed Data (variable length). ++ ++class CompressedOffloadBundle { ++private: ++ static inline const size_t MagicSize = 4; ++ static inline const size_t VersionFieldSize = sizeof(uint16_t); ++ static inline const size_t MethodFieldSize = sizeof(uint16_t); ++ static inline const size_t SizeFieldSize = sizeof(uint32_t); ++ static inline const size_t HashFieldSize = 8; ++ static inline const size_t HeaderSize = MagicSize + VersionFieldSize + ++ MethodFieldSize + SizeFieldSize + ++ HashFieldSize; ++ static inline const llvm::StringRef MagicNumber = "CCOB"; ++ static inline const uint16_t Version = 1; ++ ++public: ++ static llvm::Expected> ++ compress(const llvm::MemoryBuffer &Input, bool Verbose = false); ++ static llvm::Expected> ++ decompress(const llvm::MemoryBuffer &Input, bool Verbose = false); ++}; ++ + } // namespace clang + + #endif // LLVM_CLANG_DRIVER_OFFLOADBUNDLER_H +diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td +index c206ab84508f..f8df98678396 100644 +--- a/clang/include/clang/Driver/Options.td ++++ b/clang/include/clang/Driver/Options.td +@@ -984,6 +984,11 @@ def fconvergent_functions : Flag<["-"], "fconvergent-functions">, Group + def gpu_use_aux_triple_only : Flag<["--"], "gpu-use-aux-triple-only">, + InternalDriverOpt, HelpText<"Prepare '-aux-triple' only without populating " + "'-aux-target-cpu' and '-aux-target-feature'.">; ++ ++def offload_compress : Flag<["--"], "offload-compress">, ++ HelpText<"Compress offload device binaries (HIP only)">; ++def no_offload_compress : Flag<["--"], "no-offload-compress">; ++ + def cuda_include_ptx_EQ : Joined<["--"], "cuda-include-ptx=">, Flags<[NoXarchOption]>, + HelpText<"Include PTX for the following GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">; + def no_cuda_include_ptx_EQ : Joined<["--"], "no-cuda-include-ptx=">, Flags<[NoXarchOption]>, +diff --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp +index 29250c49eb0c..69e14679da1c 100644 +--- a/clang/lib/Driver/OffloadBundler.cpp ++++ b/clang/lib/Driver/OffloadBundler.cpp +@@ -21,24 +21,29 @@ + #include "llvm/ADT/ArrayRef.h" + #include "llvm/ADT/SmallString.h" + #include "llvm/ADT/SmallVector.h" ++#include "llvm/ADT/StringExtras.h" + #include "llvm/ADT/StringMap.h" + #include "llvm/ADT/StringRef.h" ++#include "llvm/BinaryFormat/Magic.h" + #include "llvm/Object/Archive.h" + #include "llvm/Object/ArchiveWriter.h" + #include "llvm/Object/Binary.h" + #include "llvm/Object/ObjectFile.h" + #include "llvm/Support/Casting.h" ++#include "llvm/Support/Compression.h" + #include "llvm/Support/Debug.h" + #include "llvm/Support/EndianStream.h" + #include "llvm/Support/Errc.h" + #include "llvm/Support/Error.h" + #include "llvm/Support/ErrorOr.h" + #include "llvm/Support/FileSystem.h" ++#include "llvm/Support/MD5.h" + #include "llvm/Support/MemoryBuffer.h" + #include "llvm/Support/Path.h" + #include "llvm/Support/Program.h" + #include "llvm/Support/Signals.h" + #include "llvm/Support/StringSaver.h" ++#include "llvm/Support/Timer.h" + #include "llvm/Support/WithColor.h" + #include "llvm/Support/raw_ostream.h" + #include "llvm/TargetParser/Host.h" +@@ -48,6 +53,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -58,6 +64,10 @@ using namespace llvm; + using namespace llvm::object; + using namespace clang; + ++static llvm::TimerGroup ++ ClangOffloadBundlerTimerGroup("Clang Offload Bundler Timer Group", ++ "Timer group for clang offload bundler"); ++ + /// Magic string that marks the existence of offloading data. + #define OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__" + +@@ -229,20 +239,22 @@ public: + + /// Write the header of the bundled file to \a OS based on the information + /// gathered from \a Inputs. +- virtual Error WriteHeader(raw_fd_ostream &OS, ++ virtual Error WriteHeader(raw_ostream &OS, + ArrayRef> Inputs) = 0; + + /// Write the marker that initiates a bundle for the triple \a TargetTriple to + /// \a OS. +- virtual Error WriteBundleStart(raw_fd_ostream &OS, +- StringRef TargetTriple) = 0; ++ virtual Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) = 0; + + /// Write the marker that closes a bundle for the triple \a TargetTriple to \a + /// OS. +- virtual Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) = 0; ++ virtual Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) = 0; + + /// Write the bundle from \a Input into \a OS. +- virtual Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) = 0; ++ virtual Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) = 0; ++ ++ /// Finalize output file. ++ virtual Error finalizeOutputFile() { return Error::success(); } + + /// List bundle IDs in \a Input. + virtual Error listBundleIDs(MemoryBuffer &Input) { +@@ -330,7 +342,7 @@ static uint64_t Read8byteIntegerFromBuffer(StringRef Buffer, size_t pos) { + } + + /// Write 8-byte integers to a buffer in little-endian format. +-static void Write8byteIntegerToBuffer(raw_fd_ostream &OS, uint64_t Val) { ++static void Write8byteIntegerToBuffer(raw_ostream &OS, uint64_t Val) { + llvm::support::endian::write(OS, Val, llvm::support::little); + } + +@@ -378,8 +390,7 @@ public: + return Error::success(); + + // Check if no magic was found. +- StringRef Magic(FC.data(), sizeof(OFFLOAD_BUNDLER_MAGIC_STR) - 1); +- if (!Magic.equals(OFFLOAD_BUNDLER_MAGIC_STR)) ++ if (llvm::identify_magic(FC) != llvm::file_magic::offload_bundle) + return Error::success(); + + // Read number of bundles. +@@ -454,7 +465,7 @@ public: + return Error::success(); + } + +- Error WriteHeader(raw_fd_ostream &OS, ++ Error WriteHeader(raw_ostream &OS, + ArrayRef> Inputs) final { + + // Compute size of the header. +@@ -491,19 +502,27 @@ public: + return Error::success(); + } + +- Error WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) final { ++ Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) final { + CurWriteBundleTarget = TargetTriple.str(); + return Error::success(); + } + +- Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) final { ++ Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) final { + return Error::success(); + } + +- Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final { ++ Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) final { + auto BI = BundlesInfo[CurWriteBundleTarget]; +- OS.seek(BI.Offset); ++ ++ // Pad with 0 to reach specified offset. ++ size_t CurrentPos = OS.tell(); ++ size_t PaddingSize = BI.Offset > CurrentPos ? BI.Offset - CurrentPos : 0; ++ for (size_t I = 0; I < PaddingSize; ++I) ++ OS.write('\0'); ++ assert(OS.tell() == BI.Offset); ++ + OS.write(Input.getBufferStart(), Input.getBufferSize()); ++ + return Error::success(); + } + }; +@@ -560,7 +579,7 @@ class ObjectFileHandler final : public FileHandler { + return NameOrErr.takeError(); + + // If it does not start with the reserved suffix, just skip this section. +- if (!NameOrErr->startswith(OFFLOAD_BUNDLER_MAGIC_STR)) ++ if (llvm::identify_magic(*NameOrErr) != llvm::file_magic::offload_bundle) + return std::nullopt; + + // Return the triple that is right after the reserved prefix. +@@ -625,7 +644,7 @@ public: + return Error::success(); + } + +- Error WriteHeader(raw_fd_ostream &OS, ++ Error WriteHeader(raw_ostream &OS, + ArrayRef> Inputs) final { + assert(BundlerConfig.HostInputIndex != ~0u && + "Host input index not defined."); +@@ -635,12 +654,16 @@ public: + return Error::success(); + } + +- Error WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) final { ++ Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) final { + ++NumberOfProcessedInputs; + return Error::success(); + } + +- Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) final { ++ Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) final { ++ return Error::success(); ++ } ++ ++ Error finalizeOutputFile() final { + assert(NumberOfProcessedInputs <= NumberOfInputs && + "Processing more inputs that actually exist!"); + assert(BundlerConfig.HostInputIndex != ~0u && +@@ -658,10 +681,6 @@ public: + assert(BundlerConfig.ObjcopyPath != "" && + "llvm-objcopy path not specified"); + +- // We write to the output file directly. So, we close it and use the name +- // to pass down to llvm-objcopy. +- OS.close(); +- + // Temporary files that need to be removed. + TempFileHandlerRAII TempFiles; + +@@ -702,7 +721,7 @@ public: + return Error::success(); + } + +- Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final { ++ Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) final { + return Error::success(); + } + +@@ -799,22 +818,22 @@ protected: + return Error::success(); + } + +- Error WriteHeader(raw_fd_ostream &OS, ++ Error WriteHeader(raw_ostream &OS, + ArrayRef> Inputs) final { + return Error::success(); + } + +- Error WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) final { ++ Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) final { + OS << BundleStartString << TargetTriple << "\n"; + return Error::success(); + } + +- Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) final { ++ Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) final { + OS << BundleEndString << TargetTriple << "\n"; + return Error::success(); + } + +- Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final { ++ Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) final { + OS << Input.getBuffer(); + return Error::success(); + } +@@ -899,6 +918,187 @@ CreateFileHandler(MemoryBuffer &FirstInput, + "'" + FilesType + "': invalid file type specified"); + } + ++OffloadBundlerConfig::OffloadBundlerConfig() { ++ auto IgnoreEnvVarOpt = ++ llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_IGNORE_ENV_VAR"); ++ if (IgnoreEnvVarOpt.has_value() && IgnoreEnvVarOpt.value() == "1") ++ return; ++ ++ auto VerboseEnvVarOpt = llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_VERBOSE"); ++ if (VerboseEnvVarOpt.has_value()) ++ Verbose = VerboseEnvVarOpt.value() == "1"; ++ ++ auto CompressEnvVarOpt = ++ llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_COMPRESS"); ++ if (CompressEnvVarOpt.has_value()) ++ Compress = CompressEnvVarOpt.value() == "1"; ++} ++ ++llvm::Expected> ++CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input, ++ bool Verbose) { ++ llvm::Timer HashTimer("Hash Calculation Timer", "Hash calculation time", ++ ClangOffloadBundlerTimerGroup); ++ if (Verbose) ++ HashTimer.startTimer(); ++ llvm::MD5 Hash; ++ llvm::MD5::MD5Result Result; ++ Hash.update(Input.getBuffer()); ++ Hash.final(Result); ++ uint64_t TruncatedHash = Result.low(); ++ if (Verbose) ++ HashTimer.stopTimer(); ++ ++ SmallVector CompressedBuffer; ++ auto BufferUint8 = llvm::ArrayRef( ++ reinterpret_cast(Input.getBuffer().data()), ++ Input.getBuffer().size()); ++ ++ llvm::compression::Format CompressionFormat; ++ ++ if (llvm::compression::zstd::isAvailable()) ++ CompressionFormat = llvm::compression::Format::Zstd; ++ else if (llvm::compression::zlib::isAvailable()) ++ CompressionFormat = llvm::compression::Format::Zlib; ++ else ++ return createStringError(llvm::inconvertibleErrorCode(), ++ "Compression not supported"); ++ ++ llvm::Timer CompressTimer("Compression Timer", "Compression time", ++ ClangOffloadBundlerTimerGroup); ++ if (Verbose) ++ CompressTimer.startTimer(); ++ llvm::compression::compress(CompressionFormat, BufferUint8, CompressedBuffer); ++ if (Verbose) ++ CompressTimer.stopTimer(); ++ ++ uint16_t CompressionMethod = static_cast(CompressionFormat); ++ uint32_t UncompressedSize = Input.getBuffer().size(); ++ ++ SmallVector FinalBuffer; ++ llvm::raw_svector_ostream OS(FinalBuffer); ++ OS << MagicNumber; ++ OS.write(reinterpret_cast(&Version), sizeof(Version)); ++ OS.write(reinterpret_cast(&CompressionMethod), ++ sizeof(CompressionMethod)); ++ OS.write(reinterpret_cast(&UncompressedSize), ++ sizeof(UncompressedSize)); ++ OS.write(reinterpret_cast(&TruncatedHash), ++ sizeof(TruncatedHash)); ++ OS.write(reinterpret_cast(CompressedBuffer.data()), ++ CompressedBuffer.size()); ++ ++ if (Verbose) { ++ auto MethodUsed = ++ CompressionFormat == llvm::compression::Format::Zstd ? "zstd" : "zlib"; ++ llvm::errs() << "Compressed bundle format version: " << Version << "\n" ++ << "Compression method used: " << MethodUsed << "\n" ++ << "Binary size before compression: " << UncompressedSize ++ << " bytes\n" ++ << "Binary size after compression: " << CompressedBuffer.size() ++ << " bytes\n" ++ << "Truncated MD5 hash: " ++ << llvm::format_hex(TruncatedHash, 16) << "\n"; ++ } ++ ++ return llvm::MemoryBuffer::getMemBufferCopy( ++ llvm::StringRef(FinalBuffer.data(), FinalBuffer.size())); ++} ++ ++llvm::Expected> ++CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input, ++ bool Verbose) { ++ ++ StringRef Blob = Input.getBuffer(); ++ ++ if (Blob.size() < HeaderSize) { ++ return llvm::MemoryBuffer::getMemBufferCopy(Blob); ++ } ++ if (llvm::identify_magic(Blob) != ++ llvm::file_magic::offload_bundle_compressed) { ++ if (Verbose) ++ llvm::errs() << "Uncompressed bundle.\n"; ++ return llvm::MemoryBuffer::getMemBufferCopy(Blob); ++ } ++ ++ uint16_t ThisVersion; ++ uint16_t CompressionMethod; ++ uint32_t UncompressedSize; ++ uint64_t StoredHash; ++ memcpy(&ThisVersion, Input.getBuffer().data() + MagicNumber.size(), ++ sizeof(uint16_t)); ++ memcpy(&CompressionMethod, Blob.data() + MagicSize + VersionFieldSize, ++ sizeof(uint16_t)); ++ memcpy(&UncompressedSize, ++ Blob.data() + MagicSize + VersionFieldSize + MethodFieldSize, ++ sizeof(uint32_t)); ++ memcpy(&StoredHash, ++ Blob.data() + MagicSize + VersionFieldSize + MethodFieldSize + ++ SizeFieldSize, ++ sizeof(uint64_t)); ++ ++ llvm::compression::Format CompressionFormat; ++ if (CompressionMethod == ++ static_cast(llvm::compression::Format::Zlib)) ++ CompressionFormat = llvm::compression::Format::Zlib; ++ else if (CompressionMethod == ++ static_cast(llvm::compression::Format::Zstd)) ++ CompressionFormat = llvm::compression::Format::Zstd; ++ else ++ return createStringError(inconvertibleErrorCode(), ++ "Unknown compressing method"); ++ ++ llvm::Timer DecompressTimer("Decompression Timer", "Decompression time", ++ ClangOffloadBundlerTimerGroup); ++ if (Verbose) ++ DecompressTimer.startTimer(); ++ ++ SmallVector DecompressedData; ++ StringRef CompressedData = Blob.substr(HeaderSize); ++ if (llvm::Error DecompressionError = llvm::compression::decompress( ++ CompressionFormat, llvm::arrayRefFromStringRef(CompressedData), ++ DecompressedData, UncompressedSize)) ++ return createStringError(inconvertibleErrorCode(), ++ "Could not decompress embedded file contents: " + ++ llvm::toString(std::move(DecompressionError))); ++ ++ if (Verbose) { ++ DecompressTimer.stopTimer(); ++ ++ // Recalculate MD5 hash ++ llvm::Timer HashRecalcTimer("Hash Recalculation Timer", ++ "Hash recalculation time", ++ ClangOffloadBundlerTimerGroup); ++ HashRecalcTimer.startTimer(); ++ llvm::MD5 Hash; ++ llvm::MD5::MD5Result Result; ++ Hash.update(llvm::ArrayRef(DecompressedData.data(), ++ DecompressedData.size())); ++ Hash.final(Result); ++ uint64_t RecalculatedHash = Result.low(); ++ HashRecalcTimer.stopTimer(); ++ bool HashMatch = (StoredHash == RecalculatedHash); ++ ++ llvm::errs() << "Compressed bundle format version: " << ThisVersion << "\n" ++ << "Decompression method: " ++ << (CompressionFormat == llvm::compression::Format::Zlib ++ ? "zlib" ++ : "zstd") ++ << "\n" ++ << "Size before decompression: " << CompressedData.size() ++ << " bytes\n" ++ << "Size after decompression: " << UncompressedSize ++ << " bytes\n" ++ << "Stored hash: " << llvm::format_hex(StoredHash, 16) << "\n" ++ << "Recalculated hash: " ++ << llvm::format_hex(RecalculatedHash, 16) << "\n" ++ << "Hashes match: " << (HashMatch ? "Yes" : "No") << "\n"; ++ } ++ ++ return llvm::MemoryBuffer::getMemBufferCopy( ++ llvm::toStringRef(DecompressedData)); ++} ++ + // List bundle IDs. Return true if an error was found. + Error OffloadBundler::ListBundleIDsInFile( + StringRef InputFileName, const OffloadBundlerConfig &BundlerConfig) { +@@ -908,28 +1108,35 @@ Error OffloadBundler::ListBundleIDsInFile( + if (std::error_code EC = CodeOrErr.getError()) + return createFileError(InputFileName, EC); + +- MemoryBuffer &Input = **CodeOrErr; ++ // Decompress the input if necessary. ++ Expected> DecompressedBufferOrErr = ++ CompressedOffloadBundle::decompress(**CodeOrErr, BundlerConfig.Verbose); ++ if (!DecompressedBufferOrErr) ++ return createStringError( ++ inconvertibleErrorCode(), ++ "Failed to decompress input: " + ++ llvm::toString(DecompressedBufferOrErr.takeError())); ++ ++ MemoryBuffer &DecompressedInput = **DecompressedBufferOrErr; + + // Select the right files handler. + Expected> FileHandlerOrErr = +- CreateFileHandler(Input, BundlerConfig); ++ CreateFileHandler(DecompressedInput, BundlerConfig); + if (!FileHandlerOrErr) + return FileHandlerOrErr.takeError(); + + std::unique_ptr &FH = *FileHandlerOrErr; + assert(FH); +- return FH->listBundleIDs(Input); ++ return FH->listBundleIDs(DecompressedInput); + } + + /// Bundle the files. Return true if an error was found. + Error OffloadBundler::BundleFiles() { + std::error_code EC; + +- // Create output file. +- raw_fd_ostream OutputFile(BundlerConfig.OutputFileNames.front(), EC, +- sys::fs::OF_None); +- if (EC) +- return createFileError(BundlerConfig.OutputFileNames.front(), EC); ++ // Create a buffer to hold the content before compressing. ++ SmallVector Buffer; ++ llvm::raw_svector_ostream BufferStream(Buffer); + + // Open input files. + SmallVector, 8u> InputBuffers; +@@ -956,22 +1163,46 @@ Error OffloadBundler::BundleFiles() { + assert(FH); + + // Write header. +- if (Error Err = FH->WriteHeader(OutputFile, InputBuffers)) ++ if (Error Err = FH->WriteHeader(BufferStream, InputBuffers)) + return Err; + + // Write all bundles along with the start/end markers. If an error was found + // writing the end of the bundle component, abort the bundle writing. + auto Input = InputBuffers.begin(); + for (auto &Triple : BundlerConfig.TargetNames) { +- if (Error Err = FH->WriteBundleStart(OutputFile, Triple)) ++ if (Error Err = FH->WriteBundleStart(BufferStream, Triple)) + return Err; +- if (Error Err = FH->WriteBundle(OutputFile, **Input)) ++ if (Error Err = FH->WriteBundle(BufferStream, **Input)) + return Err; +- if (Error Err = FH->WriteBundleEnd(OutputFile, Triple)) ++ if (Error Err = FH->WriteBundleEnd(BufferStream, Triple)) + return Err; + ++Input; + } +- return Error::success(); ++ ++ raw_fd_ostream OutputFile(BundlerConfig.OutputFileNames.front(), EC, ++ sys::fs::OF_None); ++ if (EC) ++ return createFileError(BundlerConfig.OutputFileNames.front(), EC); ++ ++ SmallVector CompressedBuffer; ++ if (BundlerConfig.Compress) { ++ std::unique_ptr BufferMemory = ++ llvm::MemoryBuffer::getMemBufferCopy( ++ llvm::StringRef(Buffer.data(), Buffer.size())); ++ auto CompressionResult = ++ CompressedOffloadBundle::compress(*BufferMemory, BundlerConfig.Verbose); ++ if (auto Error = CompressionResult.takeError()) ++ return Error; ++ ++ auto CompressedMemBuffer = std::move(CompressionResult.get()); ++ CompressedBuffer.assign(CompressedMemBuffer->getBufferStart(), ++ CompressedMemBuffer->getBufferEnd()); ++ } else ++ CompressedBuffer = Buffer; ++ ++ OutputFile.write(CompressedBuffer.data(), CompressedBuffer.size()); ++ ++ return FH->finalizeOutputFile(); + } + + // Unbundle the files. Return true if an error was found. +@@ -982,7 +1213,16 @@ Error OffloadBundler::UnbundleFiles() { + if (std::error_code EC = CodeOrErr.getError()) + return createFileError(BundlerConfig.InputFileNames.front(), EC); + +- MemoryBuffer &Input = **CodeOrErr; ++ // Decompress the input if necessary. ++ Expected> DecompressedBufferOrErr = ++ CompressedOffloadBundle::decompress(**CodeOrErr, BundlerConfig.Verbose); ++ if (!DecompressedBufferOrErr) ++ return createStringError( ++ inconvertibleErrorCode(), ++ "Failed to decompress input: " + ++ llvm::toString(DecompressedBufferOrErr.takeError())); ++ ++ MemoryBuffer &Input = **DecompressedBufferOrErr; + + // Select the right files handler. + Expected> FileHandlerOrErr = +@@ -1357,22 +1597,34 @@ Error OffloadBundler::UnbundleArchive() { + if (!CodeObjectBufferRefOrErr) + return CodeObjectBufferRefOrErr.takeError(); + +- auto CodeObjectBuffer = ++ auto TempCodeObjectBuffer = + MemoryBuffer::getMemBuffer(*CodeObjectBufferRefOrErr, false); + ++ // Decompress the buffer if necessary. ++ Expected> DecompressedBufferOrErr = ++ CompressedOffloadBundle::decompress(*TempCodeObjectBuffer, ++ BundlerConfig.Verbose); ++ if (!DecompressedBufferOrErr) ++ return createStringError( ++ inconvertibleErrorCode(), ++ "Failed to decompress code object: " + ++ llvm::toString(DecompressedBufferOrErr.takeError())); ++ ++ MemoryBuffer &CodeObjectBuffer = **DecompressedBufferOrErr; ++ + Expected> FileHandlerOrErr = +- CreateFileHandler(*CodeObjectBuffer, BundlerConfig); ++ CreateFileHandler(CodeObjectBuffer, BundlerConfig); + if (!FileHandlerOrErr) + return FileHandlerOrErr.takeError(); + + std::unique_ptr &FileHandler = *FileHandlerOrErr; + assert(FileHandler); + +- if (Error ReadErr = FileHandler->ReadHeader(*CodeObjectBuffer)) ++ if (Error ReadErr = FileHandler->ReadHeader(CodeObjectBuffer)) + return ReadErr; + + Expected> CurBundleIDOrErr = +- FileHandler->ReadBundleStart(*CodeObjectBuffer); ++ FileHandler->ReadBundleStart(CodeObjectBuffer); + if (!CurBundleIDOrErr) + return CurBundleIDOrErr.takeError(); + +@@ -1393,7 +1645,7 @@ Error OffloadBundler::UnbundleArchive() { + BundlerConfig)) { + std::string BundleData; + raw_string_ostream DataStream(BundleData); +- if (Error Err = FileHandler->ReadBundle(DataStream, *CodeObjectBuffer)) ++ if (Error Err = FileHandler->ReadBundle(DataStream, CodeObjectBuffer)) + return Err; + + for (auto &CompatibleTarget : CompatibleTargets) { +@@ -1431,11 +1683,11 @@ Error OffloadBundler::UnbundleArchive() { + } + } + +- if (Error Err = FileHandler->ReadBundleEnd(*CodeObjectBuffer)) ++ if (Error Err = FileHandler->ReadBundleEnd(CodeObjectBuffer)) + return Err; + + Expected> NextTripleOrErr = +- FileHandler->ReadBundleStart(*CodeObjectBuffer); ++ FileHandler->ReadBundleStart(CodeObjectBuffer); + if (!NextTripleOrErr) + return NextTripleOrErr.takeError(); + +diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp +index 9e1f07d78bf1..274875c631c5 100644 +--- a/clang/lib/Driver/ToolChains/Clang.cpp ++++ b/clang/lib/Driver/ToolChains/Clang.cpp +@@ -8400,6 +8400,11 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA, + } + CmdArgs.push_back(TCArgs.MakeArgString(UB)); + } ++ if (TCArgs.hasFlag(options::OPT_offload_compress, ++ options::OPT_no_offload_compress, false)) ++ CmdArgs.push_back("-compress"); ++ if (TCArgs.hasArg(options::OPT_v)) ++ CmdArgs.push_back("-verbose"); + // All the inputs are encoded as commands. + C.addCommand(std::make_unique( + JA, *this, ResponseFileSupport::None(), +@@ -8494,6 +8499,8 @@ void OffloadBundler::ConstructJobMultipleOutputs( + } + CmdArgs.push_back("-unbundle"); + CmdArgs.push_back("-allow-missing-bundles"); ++ if (TCArgs.hasArg(options::OPT_v)) ++ CmdArgs.push_back("-verbose"); + + // All the inputs are encoded as commands. + C.addCommand(std::make_unique( +diff --git a/clang/lib/Driver/ToolChains/HIPUtility.cpp b/clang/lib/Driver/ToolChains/HIPUtility.cpp +index 8b9d8db90ffa..04efdcba20ea 100644 +--- a/clang/lib/Driver/ToolChains/HIPUtility.cpp ++++ b/clang/lib/Driver/ToolChains/HIPUtility.cpp +@@ -84,6 +84,12 @@ void HIP::constructHIPFatbinCommand(Compilation &C, const JobAction &JA, + Args.MakeArgString(std::string("-output=").append(Output)); + BundlerArgs.push_back(BundlerOutputArg); + ++ if (Args.hasFlag(options::OPT_offload_compress, ++ options::OPT_no_offload_compress, false)) ++ BundlerArgs.push_back("-compress"); ++ if (Args.hasArg(options::OPT_v)) ++ BundlerArgs.push_back("-verbose"); ++ + const char *Bundler = Args.MakeArgString( + T.getToolChain().GetProgramPath("clang-offload-bundler")); + C.addCommand(std::make_unique( +diff --git a/clang/test/Driver/clang-offload-bundler-zlib.c b/clang/test/Driver/clang-offload-bundler-zlib.c +new file mode 100644 +index 000000000000..c46c32a4a053 +--- /dev/null ++++ b/clang/test/Driver/clang-offload-bundler-zlib.c +@@ -0,0 +1,75 @@ ++// REQUIRES: zlib ++// REQUIRES: x86-registered-target ++// UNSUPPORTED: target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}} ++ ++// ++// Generate the host binary to be bundled. ++// ++// RUN: %clang -O0 -target %itanium_abi_triple %s -c -emit-llvm -o %t.bc ++ ++// ++// Generate an empty file to help with the checks of empty files. ++// ++// RUN: touch %t.empty ++ ++// ++// Generate device binaries to be bundled. ++// ++// RUN: echo 'Content of device file 1' > %t.tgt1 ++// RUN: echo 'Content of device file 2' > %t.tgt2 ++ ++// ++// Check compression/decompression of offload bundle. ++// ++// RUN: env OFFLOAD_BUNDLER_COMPRESS=1 OFFLOAD_BUNDLER_VERBOSE=1 \ ++// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \ ++// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc 2>&1 | \ ++// RUN: FileCheck -check-prefix=COMPRESS %s ++// RUN: clang-offload-bundler -type=bc -list -input=%t.hip.bundle.bc | FileCheck -check-prefix=NOHOST %s ++// RUN: env OFFLOAD_BUNDLER_VERBOSE=1 \ ++// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \ ++// RUN: -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle 2>&1 | \ ++// RUN: FileCheck -check-prefix=DECOMPRESS %s ++// RUN: diff %t.tgt1 %t.res.tgt1 ++// RUN: diff %t.tgt2 %t.res.tgt2 ++ ++// ++// COMPRESS: Compression method used: ++// DECOMPRESS: Decompression method: ++// NOHOST-NOT: host- ++// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900 ++// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906 ++// ++ ++// ++// Check -bundle-align option. ++// ++ ++// RUN: clang-offload-bundler -bundle-align=4096 -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -input=%t.bc -input=%t.tgt1 -input=%t.tgt2 -output=%t.bundle3.bc -compress ++// RUN: clang-offload-bundler -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -output=%t.res.bc -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.bundle3.bc -unbundle ++// RUN: diff %t.bc %t.res.bc ++// RUN: diff %t.tgt1 %t.res.tgt1 ++// RUN: diff %t.tgt2 %t.res.tgt2 ++ ++// ++// Check unbundling archive. ++// ++// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \ ++// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle1.bc -compress ++// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \ ++// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle2.bc -compress ++// RUN: llvm-ar cr %T/hip_archive.a %T/hip_bundle1.bc %T/hip_bundle2.bc ++// RUN: clang-offload-bundler -unbundle -type=a -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \ ++// RUN: -output=%T/hip_900.a -output=%T/hip_906.a -input=%T/hip_archive.a ++// RUN: llvm-ar t %T/hip_900.a | FileCheck -check-prefix=HIP-AR-900 %s ++// RUN: llvm-ar t %T/hip_906.a | FileCheck -check-prefix=HIP-AR-906 %s ++// HIP-AR-900-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx900 ++// HIP-AR-900-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx900 ++// HIP-AR-906-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx906 ++// HIP-AR-906-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx906 ++ ++// Some code so that we can create a binary out of this file. ++int A = 0; ++void test_func(void) { ++ ++A; ++} +diff --git a/clang/test/Driver/clang-offload-bundler-zstd.c b/clang/test/Driver/clang-offload-bundler-zstd.c +new file mode 100644 +index 000000000000..b2b588b72d4d +--- /dev/null ++++ b/clang/test/Driver/clang-offload-bundler-zstd.c +@@ -0,0 +1,72 @@ ++// REQUIRES: zstd ++// REQUIRES: x86-registered-target ++// UNSUPPORTED: target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}} ++ ++// ++// Generate the host binary to be bundled. ++// ++// RUN: %clang -O0 -target %itanium_abi_triple %s -c -emit-llvm -o %t.bc ++ ++// ++// Generate an empty file to help with the checks of empty files. ++// ++// RUN: touch %t.empty ++ ++// ++// Generate device binaries to be bundled. ++// ++// RUN: echo 'Content of device file 1' > %t.tgt1 ++// RUN: echo 'Content of device file 2' > %t.tgt2 ++ ++// ++// Check compression/decompression of offload bundle. ++// ++// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \ ++// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc -compress -verbose 2>&1 | \ ++// RUN: FileCheck -check-prefix=COMPRESS %s ++// RUN: clang-offload-bundler -type=bc -list -input=%t.hip.bundle.bc | FileCheck -check-prefix=NOHOST %s ++// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \ ++// RUN: -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle -verbose 2>&1 | \ ++// RUN: FileCheck -check-prefix=DECOMPRESS %s ++// RUN: diff %t.tgt1 %t.res.tgt1 ++// RUN: diff %t.tgt2 %t.res.tgt2 ++// ++// COMPRESS: Compression method used ++// DECOMPRESS: Decompression method ++// NOHOST-NOT: host- ++// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900 ++// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906 ++// ++ ++// ++// Check -bundle-align option. ++// ++ ++// RUN: clang-offload-bundler -bundle-align=4096 -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -input=%t.bc -input=%t.tgt1 -input=%t.tgt2 -output=%t.bundle3.bc -compress ++// RUN: clang-offload-bundler -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -output=%t.res.bc -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.bundle3.bc -unbundle ++// RUN: diff %t.bc %t.res.bc ++// RUN: diff %t.tgt1 %t.res.tgt1 ++// RUN: diff %t.tgt2 %t.res.tgt2 ++ ++// ++// Check unbundling archive. ++// ++// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \ ++// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle1.bc -compress ++// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \ ++// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle2.bc -compress ++// RUN: llvm-ar cr %T/hip_archive.a %T/hip_bundle1.bc %T/hip_bundle2.bc ++// RUN: clang-offload-bundler -unbundle -type=a -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \ ++// RUN: -output=%T/hip_900.a -output=%T/hip_906.a -input=%T/hip_archive.a ++// RUN: llvm-ar t %T/hip_900.a | FileCheck -check-prefix=HIP-AR-900 %s ++// RUN: llvm-ar t %T/hip_906.a | FileCheck -check-prefix=HIP-AR-906 %s ++// HIP-AR-900-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx900 ++// HIP-AR-900-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx900 ++// HIP-AR-906-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx906 ++// HIP-AR-906-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx906 ++ ++// Some code so that we can create a binary out of this file. ++int A = 0; ++void test_func(void) { ++ ++A; ++} +diff --git a/clang/test/Driver/hip-offload-compress-zlib.hip b/clang/test/Driver/hip-offload-compress-zlib.hip +new file mode 100644 +index 000000000000..a29b6d037350 +--- /dev/null ++++ b/clang/test/Driver/hip-offload-compress-zlib.hip +@@ -0,0 +1,45 @@ ++// REQUIRES: zlib ++// REQUIRES: x86-registered-target ++// REQUIRES: amdgpu-registered-target ++ ++// Test compress bundled bitcode. ++ ++// RUN: rm -rf %T/a.bc ++// RUN: %clang -c -v --target=x86_64-linux-gnu \ ++// RUN: -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \ ++// RUN: -fgpu-rdc -nogpuinc -nogpulib \ ++// RUN: %S/Inputs/hip_multiple_inputs/a.cu \ ++// RUN: --offload-compress --offload-device-only --gpu-bundle-output \ ++// RUN: -o %T/a.bc \ ++// RUN: 2>&1 | FileCheck %s ++ ++// CHECK: clang-offload-bundler{{.*}} -type=bc ++// CHECK-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101 ++// CHECK-SAME: -compress -verbose ++// CHECK: Compressed bundle format ++ ++// Test uncompress of bundled bitcode. ++ ++// RUN: %clang --hip-link -### -v --target=x86_64-linux-gnu \ ++// RUN: --offload-arch=gfx1100 --offload-arch=gfx1101 \ ++// RUN: -fgpu-rdc -nogpulib \ ++// RUN: %T/a.bc --offload-device-only \ ++// RUN: 2>&1 | FileCheck -check-prefix=UNBUNDLE %s ++ ++// UNBUNDLE: clang-offload-bundler{{.*}} "-type=bc" ++// UNBUNDLE-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101 ++// UNBUNDLE-SAME: -unbundle ++// UNBUNDLE-SAME: -verbose ++ ++// Test compress bundled code objects. ++ ++// RUN: %clang -c -### -v --target=x86_64-linux-gnu \ ++// RUN: -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \ ++// RUN: -nogpuinc -nogpulib \ ++// RUN: %S/Inputs/hip_multiple_inputs/a.cu \ ++// RUN: --offload-compress \ ++// RUN: 2>&1 | FileCheck -check-prefix=CO %s ++ ++// CO: clang-offload-bundler{{.*}} "-type=o" ++// CO-SAME: -targets={{.*}}hipv4-amdgcn-amd-amdhsa--gfx1100,hipv4-amdgcn-amd-amdhsa--gfx1101 ++// CO-SAME: "-compress" "-verbose" +diff --git a/clang/test/Driver/hip-offload-compress-zstd.hip b/clang/test/Driver/hip-offload-compress-zstd.hip +new file mode 100644 +index 000000000000..688c2c85329c +--- /dev/null ++++ b/clang/test/Driver/hip-offload-compress-zstd.hip +@@ -0,0 +1,45 @@ ++// REQUIRES: zstd ++// REQUIRES: x86-registered-target ++// REQUIRES: amdgpu-registered-target ++ ++// Test compress bundled bitcode. ++ ++// RUN: rm -rf %T/a.bc ++// RUN: %clang -c -v --target=x86_64-linux-gnu \ ++// RUN: -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \ ++// RUN: -fgpu-rdc -nogpuinc -nogpulib \ ++// RUN: %S/Inputs/hip_multiple_inputs/a.cu \ ++// RUN: --offload-compress --offload-device-only --gpu-bundle-output \ ++// RUN: -o %T/a.bc \ ++// RUN: 2>&1 | FileCheck %s ++ ++// CHECK: clang-offload-bundler{{.*}} -type=bc ++// CHECK-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101 ++// CHECK-SAME: -compress -verbose ++// CHECK: Compressed bundle format ++ ++// Test uncompress of bundled bitcode. ++ ++// RUN: %clang --hip-link -### -v --target=x86_64-linux-gnu \ ++// RUN: --offload-arch=gfx1100 --offload-arch=gfx1101 \ ++// RUN: -fgpu-rdc -nogpulib \ ++// RUN: %T/a.bc --offload-device-only \ ++// RUN: 2>&1 | FileCheck -check-prefix=UNBUNDLE %s ++ ++// UNBUNDLE: clang-offload-bundler{{.*}} "-type=bc" ++// UNBUNDLE-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101 ++// UNBUNDLE-SAME: -unbundle ++// UNBUNDLE-SAME: -verbose ++ ++// Test compress bundled code objects. ++ ++// RUN: %clang -c -### -v --target=x86_64-linux-gnu \ ++// RUN: -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \ ++// RUN: -nogpuinc -nogpulib \ ++// RUN: %S/Inputs/hip_multiple_inputs/a.cu \ ++// RUN: --offload-compress \ ++// RUN: 2>&1 | FileCheck -check-prefix=CO %s ++ ++// CO: clang-offload-bundler{{.*}} "-type=o" ++// CO-SAME: -targets={{.*}}hipv4-amdgcn-amd-amdhsa--gfx1100,hipv4-amdgcn-amd-amdhsa--gfx1101 ++// CO-SAME: "-compress" "-verbose" +diff --git a/clang/tools/clang-offload-bundler/CMakeLists.txt b/clang/tools/clang-offload-bundler/CMakeLists.txt +index dabd82382cdf..dec2881589a5 100644 +--- a/clang/tools/clang-offload-bundler/CMakeLists.txt ++++ b/clang/tools/clang-offload-bundler/CMakeLists.txt +@@ -1,4 +1,5 @@ + set(LLVM_LINK_COMPONENTS ++ BinaryFormat + Object + Support + TargetParser +diff --git a/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp b/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp +index c02b5854bded..68f29807b219 100644 +--- a/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp ++++ b/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp +@@ -141,6 +141,11 @@ int main(int argc, const char **argv) { + cl::desc("Treat hip and hipv4 offload kinds as " + "compatible with openmp kind, and vice versa.\n"), + cl::init(false), cl::cat(ClangOffloadBundlerCategory)); ++ cl::opt Compress("compress", ++ cl::desc("Compress output file when bundling.\n"), ++ cl::init(false), cl::cat(ClangOffloadBundlerCategory)); ++ cl::opt Verbose("verbose", cl::desc("Print debug information.\n"), ++ cl::init(false), cl::cat(ClangOffloadBundlerCategory)); + + // Process commandline options and report errors + sys::PrintStackTraceOnErrorSignal(argv[0]); +@@ -169,6 +174,11 @@ int main(int argc, const char **argv) { + BundlerConfig.BundleAlignment = BundleAlignment; + BundlerConfig.FilesType = FilesType; + BundlerConfig.ObjcopyPath = ""; ++ // Do not override the default value Compress and Verbose in BundlerConfig. ++ if (Compress.getNumOccurrences() > 0) ++ BundlerConfig.Compress = Compress; ++ if (Verbose.getNumOccurrences() > 0) ++ BundlerConfig.Verbose = Verbose; + + BundlerConfig.TargetNames = TargetNames; + BundlerConfig.InputFileNames = InputFileNames; +diff --git a/llvm/include/llvm/BinaryFormat/Magic.h b/llvm/include/llvm/BinaryFormat/Magic.h +index 329c96f5c14c..a28710dcdfaf 100644 +--- a/llvm/include/llvm/BinaryFormat/Magic.h ++++ b/llvm/include/llvm/BinaryFormat/Magic.h +@@ -42,19 +42,21 @@ struct file_magic { + macho_universal_binary, ///< Mach-O universal binary + macho_file_set, ///< Mach-O file set binary + minidump, ///< Windows minidump file +- coff_cl_gl_object, ///< Microsoft cl.exe's intermediate code file +- coff_object, ///< COFF object file +- coff_import_library, ///< COFF import library +- pecoff_executable, ///< PECOFF executable file +- windows_resource, ///< Windows compiled resource file (.res) +- xcoff_object_32, ///< 32-bit XCOFF object file +- xcoff_object_64, ///< 64-bit XCOFF object file +- wasm_object, ///< WebAssembly Object file +- pdb, ///< Windows PDB debug info file +- tapi_file, ///< Text-based Dynamic Library Stub file +- cuda_fatbinary, ///< CUDA Fatbinary object file +- offload_binary, ///< LLVM offload object file +- dxcontainer_object, ///< DirectX container file ++ coff_cl_gl_object, ///< Microsoft cl.exe's intermediate code file ++ coff_object, ///< COFF object file ++ coff_import_library, ///< COFF import library ++ pecoff_executable, ///< PECOFF executable file ++ windows_resource, ///< Windows compiled resource file (.res) ++ xcoff_object_32, ///< 32-bit XCOFF object file ++ xcoff_object_64, ///< 64-bit XCOFF object file ++ wasm_object, ///< WebAssembly Object file ++ pdb, ///< Windows PDB debug info file ++ tapi_file, ///< Text-based Dynamic Library Stub file ++ cuda_fatbinary, ///< CUDA Fatbinary object file ++ offload_binary, ///< LLVM offload object file ++ dxcontainer_object, ///< DirectX container file ++ offload_bundle, ///< Clang offload bundle file ++ offload_bundle_compressed, ///< Compressed clang offload bundle file + }; + + bool is_object() const { return V != unknown; } +diff --git a/llvm/lib/BinaryFormat/Magic.cpp b/llvm/lib/BinaryFormat/Magic.cpp +index aa84bc36bfde..76aa1a602aa8 100644 +--- a/llvm/lib/BinaryFormat/Magic.cpp ++++ b/llvm/lib/BinaryFormat/Magic.cpp +@@ -87,6 +87,10 @@ file_magic llvm::identify_magic(StringRef Magic) { + if (startswith(Magic, "BC\xC0\xDE")) + return file_magic::bitcode; + break; ++ case 'C': ++ if (startswith(Magic, "CCOB")) ++ return file_magic::offload_bundle_compressed; ++ break; + case '!': + if (startswith(Magic, "!\n") || startswith(Magic, "!\n")) + return file_magic::archive; +@@ -246,6 +250,13 @@ file_magic llvm::identify_magic(StringRef Magic) { + return file_magic::coff_object; + break; + ++ case '_': { ++ const char OBMagic[] = "__CLANG_OFFLOAD_BUNDLE__"; ++ if (Magic.size() >= sizeof(OBMagic) && startswith(Magic, OBMagic)) ++ return file_magic::offload_bundle; ++ break; ++ } ++ + default: + break; + } +diff --git a/llvm/lib/Object/Binary.cpp b/llvm/lib/Object/Binary.cpp +index d18aed8b3b8c..0ee9f7fac448 100644 +--- a/llvm/lib/Object/Binary.cpp ++++ b/llvm/lib/Object/Binary.cpp +@@ -87,6 +87,8 @@ Expected> object::createBinary(MemoryBufferRef Buffer, + case file_magic::cuda_fatbinary: + case file_magic::coff_cl_gl_object: + case file_magic::dxcontainer_object: ++ case file_magic::offload_bundle: ++ case file_magic::offload_bundle_compressed: + // Unrecognized object file format. + return errorCodeToError(object_error::invalid_file_type); + case file_magic::offload_binary: +diff --git a/llvm/lib/Object/ObjectFile.cpp b/llvm/lib/Object/ObjectFile.cpp +index 56a1d09097d4..4cc95ea32f60 100644 +--- a/llvm/lib/Object/ObjectFile.cpp ++++ b/llvm/lib/Object/ObjectFile.cpp +@@ -154,6 +154,8 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type, + case file_magic::cuda_fatbinary: + case file_magic::offload_binary: + case file_magic::dxcontainer_object: ++ case file_magic::offload_bundle: ++ case file_magic::offload_bundle_compressed: + return errorCodeToError(object_error::invalid_file_type); + case file_magic::tapi_file: + return errorCodeToError(object_error::invalid_file_type); +-- +2.43.0 + diff --git a/pkgs/development/rocm-modules/6/llvm/base.nix b/pkgs/development/rocm-modules/6/llvm/base.nix index 95f57b052045..e49f28fe976b 100644 --- a/pkgs/development/rocm-modules/6/llvm/base.nix +++ b/pkgs/development/rocm-modules/6/llvm/base.nix @@ -72,7 +72,9 @@ in stdenv.mkDerivation (finalAttrs: { "info" # Avoid `attribute 'info' missing` when using with wrapCC ]; - patches = extraPatches; + patches = [ + ./add-compression-to-clang-offload-bundler.patch + ] ++ extraPatches; src = fetchFromGitHub { owner = "ROCm"; @@ -133,7 +135,14 @@ in stdenv.mkDerivation (finalAttrs: { "-DLLVM_EXTERNAL_LIT=${lit}/bin/.lit-wrapped" ] ++ extraCMakeFlags; - postPatch = lib.optionalString finalAttrs.passthru.isLLVM '' + prePatch = '' + cd ../ + chmod -R u+w . + ''; + + postPatch = '' + cd ${targetDir} + '' + lib.optionalString finalAttrs.passthru.isLLVM '' patchShebangs lib/OffloadArch/make_generated_offload_arch_h.sh '' + lib.optionalString (buildTests && finalAttrs.passthru.isLLVM) '' # FileSystem permissions tests fail with various special bits diff --git a/pkgs/development/rocm-modules/6/llvm/stage-1/clang-unwrapped.nix b/pkgs/development/rocm-modules/6/llvm/stage-1/clang-unwrapped.nix index 5a61732ffd2d..8ae4e0e0abb8 100644 --- a/pkgs/development/rocm-modules/6/llvm/stage-1/clang-unwrapped.nix +++ b/pkgs/development/rocm-modules/6/llvm/stage-1/clang-unwrapped.nix @@ -2,13 +2,14 @@ , callPackage , rocmUpdateScript , llvm +, makeWrapper }: callPackage ../base.nix rec { inherit stdenv rocmUpdateScript; targetName = "clang-unwrapped"; targetDir = "clang"; - extraBuildInputs = [ llvm ]; + extraBuildInputs = [ llvm makeWrapper ]; extraCMakeFlags = [ "-DCLANG_INCLUDE_DOCS=ON" @@ -41,6 +42,12 @@ callPackage ../base.nix rec { extraPostInstall = '' mv bin/clang-tblgen $out/bin + # add wrapper to compress embedded accelerator-specific code + # this makes the output of composable_kernel significantly smaller right now + # TODO: remove this once ROCm does it out of the box + mv $out/bin/clang-offload-bundler $out/bin/clang-offload-bundler-unwrapped + makeWrapper $out/bin/clang-offload-bundler-unwrapped $out/bin/clang-offload-bundler \ + --add-flags '-compress' ''; requiredSystemFeatures = [ "big-parallel" ]; diff --git a/pkgs/development/rocm-modules/6/llvm/stage-3/0000-mlir-fix-debugtranslation.patch b/pkgs/development/rocm-modules/6/llvm/stage-3/0000-mlir-fix-debugtranslation.patch deleted file mode 100644 index f4221a088136..000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-3/0000-mlir-fix-debugtranslation.patch +++ /dev/null @@ -1,36 +0,0 @@ -From f1d1e10ec7e1061bf0b90abbc1e298d9438a5e74 Mon Sep 17 00:00:00 2001 -From: Scott Linder -Date: Mon, 11 Sep 2023 18:37:37 +0000 -Subject: [PATCH] [HeterogeneousDWARF] Update MLIR DI Metadata handling - -Pass a default DW_MSPACE_LLVM_none to satisfy new API - -Change-Id: I50df461f00b5510a715f55f61107122318102d22 ---- - lib/Target/LLVMIR/DebugTranslation.cpp | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/lib/Target/LLVMIR/DebugTranslation.cpp b/lib/Target/LLVMIR/DebugTranslation.cpp -index 2053f5bcef06aa6..635ee5d7e5fefdc 100644 ---- a/lib/Target/LLVMIR/DebugTranslation.cpp -+++ b/lib/Target/LLVMIR/DebugTranslation.cpp -@@ -148,7 +148,8 @@ llvm::DIDerivedType *DebugTranslation::translateImpl(DIDerivedTypeAttr attr) { - /*File=*/nullptr, /*Line=*/0, - /*Scope=*/nullptr, translate(attr.getBaseType()), attr.getSizeInBits(), - attr.getAlignInBits(), attr.getOffsetInBits(), -- /*DWARFAddressSpace=*/std::nullopt, /*Flags=*/llvm::DINode::FlagZero); -+ /*DWARFAddressSpace=*/std::nullopt, llvm::dwarf::DW_MSPACE_LLVM_none, -+ /*Flags=*/llvm::DINode::FlagZero); - } - - llvm::DIFile *DebugTranslation::translateImpl(DIFileAttr attr) { -@@ -185,7 +186,8 @@ DebugTranslation::translateImpl(DILocalVariableAttr attr) { - llvmCtx, translate(attr.getScope()), getMDStringOrNull(attr.getName()), - translate(attr.getFile()), attr.getLine(), translate(attr.getType()), - attr.getArg(), -- /*Flags=*/llvm::DINode::FlagZero, attr.getAlignInBits(), -+ /*Flags=*/llvm::DINode::FlagZero, llvm::dwarf::DW_MSPACE_LLVM_none, -+ attr.getAlignInBits(), - /*Annotations=*/nullptr); - } - diff --git a/pkgs/development/rocm-modules/6/llvm/stage-3/mlir.nix b/pkgs/development/rocm-modules/6/llvm/stage-3/mlir.nix index 6de685ea2771..265d994a27f8 100644 --- a/pkgs/development/rocm-modules/6/llvm/stage-3/mlir.nix +++ b/pkgs/development/rocm-modules/6/llvm/stage-3/mlir.nix @@ -7,6 +7,7 @@ , glslang , shaderc , lit +, fetchpatch }: callPackage ../base.nix rec { @@ -17,9 +18,12 @@ callPackage ../base.nix rec { targetDir = targetName; # Fix `DebugTranslation.cpp:139:10: error: no matching function for call to 'get'` - # We patch at a different source root, so we modify the patch and include it locally - # https://github.com/ROCm/llvm-project/commit/f1d1e10ec7e1061bf0b90abbc1e298d9438a5e74.patch - extraPatches = [ ./0000-mlir-fix-debugtranslation.patch ]; + extraPatches = [ + (fetchpatch { + url = "https://github.com/ROCm/llvm-project/commit/f1d1e10ec7e1061bf0b90abbc1e298d9438a5e74.patch"; + hash = "sha256-3c91A9InMKxm+JcnWxoUeOU68y5I6w1AAXx6T9UByqI="; + }) + ]; extraNativeBuildInputs = [ clr ]; extraBuildInputs = [ From 772dbad3d41932f08d44aaa63a571d4e26c5d143 Mon Sep 17 00:00:00 2001 From: Martin Schwaighofer Date: Sun, 7 Apr 2024 00:31:40 +0200 Subject: [PATCH 2/2] rocmPackages.llvm: replace --replace with --replace-fail (cleanup) --- pkgs/development/rocm-modules/6/llvm/base.nix | 2 +- .../rocm-modules/6/llvm/stage-1/clang-unwrapped.nix | 2 +- pkgs/development/rocm-modules/6/llvm/stage-2/libc.nix | 10 +++++----- pkgs/development/rocm-modules/6/llvm/stage-3/clang.nix | 2 +- .../development/rocm-modules/6/llvm/stage-3/libclc.nix | 10 +++++----- pkgs/development/rocm-modules/6/llvm/stage-3/mlir.nix | 2 +- pkgs/development/rocm-modules/6/llvm/stage-3/polly.nix | 2 +- 7 files changed, 15 insertions(+), 15 deletions(-) diff --git a/pkgs/development/rocm-modules/6/llvm/base.nix b/pkgs/development/rocm-modules/6/llvm/base.nix index e49f28fe976b..88b384b43d8e 100644 --- a/pkgs/development/rocm-modules/6/llvm/base.nix +++ b/pkgs/development/rocm-modules/6/llvm/base.nix @@ -150,7 +150,7 @@ in stdenv.mkDerivation (finalAttrs: { rm unittests/Support/Path.cpp substituteInPlace unittests/Support/CMakeLists.txt \ - --replace "Path.cpp" "" + --replace-fail "Path.cpp" "" '' + extraPostPatch; doCheck = buildTests; diff --git a/pkgs/development/rocm-modules/6/llvm/stage-1/clang-unwrapped.nix b/pkgs/development/rocm-modules/6/llvm/stage-1/clang-unwrapped.nix index 8ae4e0e0abb8..0d982299ec6e 100644 --- a/pkgs/development/rocm-modules/6/llvm/stage-1/clang-unwrapped.nix +++ b/pkgs/development/rocm-modules/6/llvm/stage-1/clang-unwrapped.nix @@ -21,7 +21,7 @@ callPackage ../base.nix rec { ln -s ../cmake/Modules/FindLibEdit.cmake cmake/modules substituteInPlace CMakeLists.txt \ - --replace "include(CheckIncludeFile)" "include(CheckIncludeFile)''\nfind_package(LibEdit)" + --replace-fail "include(CheckIncludeFile)" "include(CheckIncludeFile)''\nfind_package(LibEdit)" # `No such file or directory: '/build/source/clang/tools/scan-build/bin/scan-build'` rm test/Analysis/scan-build/*.test diff --git a/pkgs/development/rocm-modules/6/llvm/stage-2/libc.nix b/pkgs/development/rocm-modules/6/llvm/stage-2/libc.nix index 7e7cf9c2a608..2446723ef5fe 100644 --- a/pkgs/development/rocm-modules/6/llvm/stage-2/libc.nix +++ b/pkgs/development/rocm-modules/6/llvm/stage-2/libc.nix @@ -14,11 +14,11 @@ callPackage ../base.nix rec { # `Failed to match ... against ...` `Match value not within tolerance value of MPFR result:` # We need a better way, but I don't know enough sed magic and patching `CMakeLists.txt` isn't working... substituteInPlace ../libc/test/src/math/log10_test.cpp \ - --replace "i < N" "i < 0" \ - --replace "test(mpfr::RoundingMode::Nearest);" "" \ - --replace "test(mpfr::RoundingMode::Downward);" "" \ - --replace "test(mpfr::RoundingMode::Upward);" "" \ - --replace "test(mpfr::RoundingMode::TowardZero);" "" + --replace-fail "i < N" "i < 0" \ + --replace-fail "test(mpfr::RoundingMode::Nearest);" "" \ + --replace-fail "test(mpfr::RoundingMode::Downward);" "" \ + --replace-fail "test(mpfr::RoundingMode::Upward);" "" \ + --replace-fail "test(mpfr::RoundingMode::TowardZero);" "" ''; checkTargets = [ "check-${targetName}" ]; diff --git a/pkgs/development/rocm-modules/6/llvm/stage-3/clang.nix b/pkgs/development/rocm-modules/6/llvm/stage-3/clang.nix index 91f34265f85f..a0e1935d3c5f 100644 --- a/pkgs/development/rocm-modules/6/llvm/stage-3/clang.nix +++ b/pkgs/development/rocm-modules/6/llvm/stage-3/clang.nix @@ -68,6 +68,6 @@ wrapCCWith rec { # GPU compilation uses builtin `lld` substituteInPlace $out/bin/{clang,clang++} \ - --replace "-MM) dontLink=1 ;;" "-MM | --cuda-device-only) dontLink=1 ;;''\n--cuda-host-only | --cuda-compile-host-device) dontLink=0 ;;" + --replace-fail "-MM) dontLink=1 ;;" "-MM | --cuda-device-only) dontLink=1 ;;''\n--cuda-host-only | --cuda-compile-host-device) dontLink=0 ;;" ''; } diff --git a/pkgs/development/rocm-modules/6/llvm/stage-3/libclc.nix b/pkgs/development/rocm-modules/6/llvm/stage-3/libclc.nix index 1fd72ee67188..c395dd792db0 100644 --- a/pkgs/development/rocm-modules/6/llvm/stage-3/libclc.nix +++ b/pkgs/development/rocm-modules/6/llvm/stage-3/libclc.nix @@ -21,13 +21,13 @@ in callPackage ../base.nix rec { # `clspv` tests fail, unresolved calls extraPostPatch = '' substituteInPlace CMakeLists.txt \ - --replace "find_program( LLVM_CLANG clang PATHS \''${LLVM_BINDIR} NO_DEFAULT_PATH )" \ + --replace-fail "find_program( LLVM_CLANG clang PATHS \''${LLVM_BINDIR} NO_DEFAULT_PATH )" \ "find_program( LLVM_CLANG clang PATHS \"${clang}/bin\" NO_DEFAULT_PATH )" \ - --replace "find_program( LLVM_SPIRV llvm-spirv PATHS \''${LLVM_BINDIR} NO_DEFAULT_PATH )" \ + --replace-fail "find_program( LLVM_SPIRV llvm-spirv PATHS \''${LLVM_BINDIR} NO_DEFAULT_PATH )" \ "find_program( LLVM_SPIRV llvm-spirv PATHS \"${spirv}/bin\" NO_DEFAULT_PATH )" \ - --replace " spirv-mesa3d-" "" \ - --replace " spirv64-mesa3d-" "" \ - --replace "NOT \''${t} MATCHES" \ + --replace-fail " spirv-mesa3d-" "" \ + --replace-fail " spirv64-mesa3d-" "" \ + --replace-fail "NOT \''${t} MATCHES" \ "NOT \''${ARCH} STREQUAL \"clspv\" AND NOT \''${ARCH} STREQUAL \"clspv64\" AND NOT \''${t} MATCHES" ''; diff --git a/pkgs/development/rocm-modules/6/llvm/stage-3/mlir.nix b/pkgs/development/rocm-modules/6/llvm/stage-3/mlir.nix index 265d994a27f8..8b71b3fb2977 100644 --- a/pkgs/development/rocm-modules/6/llvm/stage-3/mlir.nix +++ b/pkgs/development/rocm-modules/6/llvm/stage-3/mlir.nix @@ -45,7 +45,7 @@ callPackage ../base.nix rec { extraPostPatch = '' # `add_library cannot create target "llvm_gtest" because an imported target with the same name already exists` substituteInPlace CMakeLists.txt \ - --replace "EXISTS \''${UNITTEST_DIR}/googletest/include/gtest/gtest.h" "FALSE" + --replace-fail "EXISTS \''${UNITTEST_DIR}/googletest/include/gtest/gtest.h" "FALSE" # Mainly `No such file or directory` cat ${./1001-mlir-failing-tests.list} | xargs -d \\n rm diff --git a/pkgs/development/rocm-modules/6/llvm/stage-3/polly.nix b/pkgs/development/rocm-modules/6/llvm/stage-3/polly.nix index e001f33dfd43..da5c2e16f5d3 100644 --- a/pkgs/development/rocm-modules/6/llvm/stage-3/polly.nix +++ b/pkgs/development/rocm-modules/6/llvm/stage-3/polly.nix @@ -11,7 +11,7 @@ callPackage ../base.nix rec { extraPostPatch = '' # `add_library cannot create target "llvm_gtest" because an imported target with the same name already exists` substituteInPlace CMakeLists.txt \ - --replace "NOT TARGET gtest" "FALSE" + --replace-fail "NOT TARGET gtest" "FALSE" ''; checkTargets = [ "check-${targetName}" ];