From 92a9b54db14e3dbbad0085e37a75fc9f5823d20b Mon Sep 17 00:00:00 2001 From: Weijia Wang <9713184+wegank@users.noreply.github.com> Date: Wed, 17 Apr 2024 02:41:57 +0200 Subject: [PATCH] Revert "rocmPackages.composable_kernel: compress output" --- ...compression-to-clang-offload-bundler.patch | 1191 ----------------- pkgs/development/rocm-modules/6/llvm/base.nix | 4 +- .../6/llvm/stage-1/clang-unwrapped.nix | 9 +- 3 files changed, 2 insertions(+), 1202 deletions(-) delete mode 100644 pkgs/development/rocm-modules/6/llvm/add-compression-to-clang-offload-bundler.patch diff --git a/pkgs/development/rocm-modules/6/llvm/add-compression-to-clang-offload-bundler.patch b/pkgs/development/rocm-modules/6/llvm/add-compression-to-clang-offload-bundler.patch deleted file mode 100644 index 8589ac527dcb..000000000000 --- a/pkgs/development/rocm-modules/6/llvm/add-compression-to-clang-offload-bundler.patch +++ /dev/null @@ -1,1191 +0,0 @@ -From 058d9ba2f54a8c7c47b6522db808db87fed82e78 Mon Sep 17 00:00:00 2001 -From: "Yaxun (Sam) Liu" -Date: Thu, 4 Apr 2024 03:25:28 +0200 -Subject: [PATCH] backport 7e2823438e920d25364ff92b62ad90020c31bb59 - -Reland "[HIP] Support compressing device binary" - -Original PR: https://github.com/llvm/llvm-project/pull/67162 - -The commit was reverted due to UB detected by santizer: - -https://lab.llvm.org/buildbot/#/builders/238/builds/5955 - -clang/lib/Driver/OffloadBundler.cpp:1012:25: runtime error: - load of misaligned address 0xaaaae2d90e7c for type - 'const uint64_t' (aka 'const unsigned long'), which - requires 8 byte alignment - -It was fixed by using memcpy instead of dereferencing int* -casted from unaligned char*. - -Co-Authored-By: Martin Schwaighofer -(only did the backport) ---- - clang/docs/ClangOffloadBundler.rst | 27 ++ - clang/include/clang/Driver/OffloadBundler.h | 37 ++ - clang/include/clang/Driver/Options.td | 5 + - clang/lib/Driver/OffloadBundler.cpp | 346 +++++++++++++++--- - clang/lib/Driver/ToolChains/Clang.cpp | 7 + - clang/lib/Driver/ToolChains/HIPUtility.cpp | 6 + - .../test/Driver/clang-offload-bundler-zlib.c | 75 ++++ - .../test/Driver/clang-offload-bundler-zstd.c | 72 ++++ - .../test/Driver/hip-offload-compress-zlib.hip | 45 +++ - .../test/Driver/hip-offload-compress-zstd.hip | 45 +++ - .../clang-offload-bundler/CMakeLists.txt | 1 + - .../ClangOffloadBundler.cpp | 10 + - llvm/include/llvm/BinaryFormat/Magic.h | 28 +- - llvm/lib/BinaryFormat/Magic.cpp | 11 + - llvm/lib/Object/Binary.cpp | 2 + - llvm/lib/Object/ObjectFile.cpp | 2 + - 16 files changed, 659 insertions(+), 60 deletions(-) - create mode 100644 clang/test/Driver/clang-offload-bundler-zlib.c - create mode 100644 clang/test/Driver/clang-offload-bundler-zstd.c - create mode 100644 clang/test/Driver/hip-offload-compress-zlib.hip - create mode 100644 clang/test/Driver/hip-offload-compress-zstd.hip - -diff --git a/clang/docs/ClangOffloadBundler.rst b/clang/docs/ClangOffloadBundler.rst -index 432da787249b..d47997bf718d 100644 ---- a/clang/docs/ClangOffloadBundler.rst -+++ b/clang/docs/ClangOffloadBundler.rst -@@ -498,3 +498,30 @@ target by comparing bundle ID's. Two bundle ID's are considered compatible if: - Verbose printing of matched/unmatched comparisons between bundle entry id of - a device binary from HDA and bundle entry ID of a given target processor - (see :ref:`compatibility-bundle-entry-id`). -+ -+Compression and Decompression -+============================= -+ -+``clang-offload-bundler`` provides features to compress and decompress the full -+bundle, leveraging inherent redundancies within the bundle entries. Use the -+`-compress` command-line option to enable this compression capability. -+ -+The compressed offload bundle begins with a header followed by the compressed binary data: -+ -+- **Magic Number (4 bytes)**: -+ This is a unique identifier to distinguish compressed offload bundles. The value is the string 'CCOB' (Compressed Clang Offload Bundle). -+ -+- **Version Number (16-bit unsigned int)**: -+ This denotes the version of the compressed offload bundle format. The current version is `1`. -+ -+- **Compression Method (16-bit unsigned int)**: -+ This field indicates the compression method used. The value corresponds to either `zlib` or `zstd`, represented as a 16-bit unsigned integer cast from the LLVM compression enumeration. -+ -+- **Uncompressed Binary Size (32-bit unsigned int)**: -+ This is the size (in bytes) of the binary data before it was compressed. -+ -+- **Hash (64-bit unsigned int)**: -+ This is a 64-bit truncated MD5 hash of the uncompressed binary data. It serves for verification and caching purposes. -+ -+- **Compressed Data**: -+ The actual compressed binary data follows the header. Its size can be inferred from the total size of the file minus the header size. -diff --git a/clang/include/clang/Driver/OffloadBundler.h b/clang/include/clang/Driver/OffloadBundler.h -index fe263f0540b9..fc96f200414d 100644 ---- a/clang/include/clang/Driver/OffloadBundler.h -+++ b/clang/include/clang/Driver/OffloadBundler.h -@@ -19,6 +19,7 @@ - - #include "llvm/Support/Error.h" - #include "llvm/TargetParser/Triple.h" -+#include - #include - #include - -@@ -26,11 +27,15 @@ namespace clang { - - class OffloadBundlerConfig { - public: -+ OffloadBundlerConfig(); -+ - bool AllowNoHost = false; - bool AllowMissingBundles = false; - bool CheckInputArchive = false; - bool PrintExternalCommands = false; - bool HipOpenmpCompatible = false; -+ bool Compress = false; -+ bool Verbose = false; - - unsigned BundleAlignment = 1; - unsigned HostInputIndex = ~0u; -@@ -82,6 +87,38 @@ struct OffloadTargetInfo { - std::string str() const; - }; - -+// CompressedOffloadBundle represents the format for the compressed offload -+// bundles. -+// -+// The format is as follows: -+// - Magic Number (4 bytes) - A constant "CCOB". -+// - Version (2 bytes) -+// - Compression Method (2 bytes) - Uses the values from -+// llvm::compression::Format. -+// - Uncompressed Size (4 bytes). -+// - Truncated MD5 Hash (8 bytes). -+// - Compressed Data (variable length). -+ -+class CompressedOffloadBundle { -+private: -+ static inline const size_t MagicSize = 4; -+ static inline const size_t VersionFieldSize = sizeof(uint16_t); -+ static inline const size_t MethodFieldSize = sizeof(uint16_t); -+ static inline const size_t SizeFieldSize = sizeof(uint32_t); -+ static inline const size_t HashFieldSize = 8; -+ static inline const size_t HeaderSize = MagicSize + VersionFieldSize + -+ MethodFieldSize + SizeFieldSize + -+ HashFieldSize; -+ static inline const llvm::StringRef MagicNumber = "CCOB"; -+ static inline const uint16_t Version = 1; -+ -+public: -+ static llvm::Expected> -+ compress(const llvm::MemoryBuffer &Input, bool Verbose = false); -+ static llvm::Expected> -+ decompress(const llvm::MemoryBuffer &Input, bool Verbose = false); -+}; -+ - } // namespace clang - - #endif // LLVM_CLANG_DRIVER_OFFLOADBUNDLER_H -diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td -index c206ab84508f..f8df98678396 100644 ---- a/clang/include/clang/Driver/Options.td -+++ b/clang/include/clang/Driver/Options.td -@@ -984,6 +984,11 @@ def fconvergent_functions : Flag<["-"], "fconvergent-functions">, Group - def gpu_use_aux_triple_only : Flag<["--"], "gpu-use-aux-triple-only">, - InternalDriverOpt, HelpText<"Prepare '-aux-triple' only without populating " - "'-aux-target-cpu' and '-aux-target-feature'.">; -+ -+def offload_compress : Flag<["--"], "offload-compress">, -+ HelpText<"Compress offload device binaries (HIP only)">; -+def no_offload_compress : Flag<["--"], "no-offload-compress">; -+ - def cuda_include_ptx_EQ : Joined<["--"], "cuda-include-ptx=">, Flags<[NoXarchOption]>, - HelpText<"Include PTX for the following GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">; - def no_cuda_include_ptx_EQ : Joined<["--"], "no-cuda-include-ptx=">, Flags<[NoXarchOption]>, -diff --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp -index 29250c49eb0c..69e14679da1c 100644 ---- a/clang/lib/Driver/OffloadBundler.cpp -+++ b/clang/lib/Driver/OffloadBundler.cpp -@@ -21,24 +21,29 @@ - #include "llvm/ADT/ArrayRef.h" - #include "llvm/ADT/SmallString.h" - #include "llvm/ADT/SmallVector.h" -+#include "llvm/ADT/StringExtras.h" - #include "llvm/ADT/StringMap.h" - #include "llvm/ADT/StringRef.h" -+#include "llvm/BinaryFormat/Magic.h" - #include "llvm/Object/Archive.h" - #include "llvm/Object/ArchiveWriter.h" - #include "llvm/Object/Binary.h" - #include "llvm/Object/ObjectFile.h" - #include "llvm/Support/Casting.h" -+#include "llvm/Support/Compression.h" - #include "llvm/Support/Debug.h" - #include "llvm/Support/EndianStream.h" - #include "llvm/Support/Errc.h" - #include "llvm/Support/Error.h" - #include "llvm/Support/ErrorOr.h" - #include "llvm/Support/FileSystem.h" -+#include "llvm/Support/MD5.h" - #include "llvm/Support/MemoryBuffer.h" - #include "llvm/Support/Path.h" - #include "llvm/Support/Program.h" - #include "llvm/Support/Signals.h" - #include "llvm/Support/StringSaver.h" -+#include "llvm/Support/Timer.h" - #include "llvm/Support/WithColor.h" - #include "llvm/Support/raw_ostream.h" - #include "llvm/TargetParser/Host.h" -@@ -48,6 +53,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -58,6 +64,10 @@ using namespace llvm; - using namespace llvm::object; - using namespace clang; - -+static llvm::TimerGroup -+ ClangOffloadBundlerTimerGroup("Clang Offload Bundler Timer Group", -+ "Timer group for clang offload bundler"); -+ - /// Magic string that marks the existence of offloading data. - #define OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__" - -@@ -229,20 +239,22 @@ public: - - /// Write the header of the bundled file to \a OS based on the information - /// gathered from \a Inputs. -- virtual Error WriteHeader(raw_fd_ostream &OS, -+ virtual Error WriteHeader(raw_ostream &OS, - ArrayRef> Inputs) = 0; - - /// Write the marker that initiates a bundle for the triple \a TargetTriple to - /// \a OS. -- virtual Error WriteBundleStart(raw_fd_ostream &OS, -- StringRef TargetTriple) = 0; -+ virtual Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) = 0; - - /// Write the marker that closes a bundle for the triple \a TargetTriple to \a - /// OS. -- virtual Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) = 0; -+ virtual Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) = 0; - - /// Write the bundle from \a Input into \a OS. -- virtual Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) = 0; -+ virtual Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) = 0; -+ -+ /// Finalize output file. -+ virtual Error finalizeOutputFile() { return Error::success(); } - - /// List bundle IDs in \a Input. - virtual Error listBundleIDs(MemoryBuffer &Input) { -@@ -330,7 +342,7 @@ static uint64_t Read8byteIntegerFromBuffer(StringRef Buffer, size_t pos) { - } - - /// Write 8-byte integers to a buffer in little-endian format. --static void Write8byteIntegerToBuffer(raw_fd_ostream &OS, uint64_t Val) { -+static void Write8byteIntegerToBuffer(raw_ostream &OS, uint64_t Val) { - llvm::support::endian::write(OS, Val, llvm::support::little); - } - -@@ -378,8 +390,7 @@ public: - return Error::success(); - - // Check if no magic was found. -- StringRef Magic(FC.data(), sizeof(OFFLOAD_BUNDLER_MAGIC_STR) - 1); -- if (!Magic.equals(OFFLOAD_BUNDLER_MAGIC_STR)) -+ if (llvm::identify_magic(FC) != llvm::file_magic::offload_bundle) - return Error::success(); - - // Read number of bundles. -@@ -454,7 +465,7 @@ public: - return Error::success(); - } - -- Error WriteHeader(raw_fd_ostream &OS, -+ Error WriteHeader(raw_ostream &OS, - ArrayRef> Inputs) final { - - // Compute size of the header. -@@ -491,19 +502,27 @@ public: - return Error::success(); - } - -- Error WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) final { -+ Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) final { - CurWriteBundleTarget = TargetTriple.str(); - return Error::success(); - } - -- Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) final { -+ Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) final { - return Error::success(); - } - -- Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final { -+ Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) final { - auto BI = BundlesInfo[CurWriteBundleTarget]; -- OS.seek(BI.Offset); -+ -+ // Pad with 0 to reach specified offset. -+ size_t CurrentPos = OS.tell(); -+ size_t PaddingSize = BI.Offset > CurrentPos ? BI.Offset - CurrentPos : 0; -+ for (size_t I = 0; I < PaddingSize; ++I) -+ OS.write('\0'); -+ assert(OS.tell() == BI.Offset); -+ - OS.write(Input.getBufferStart(), Input.getBufferSize()); -+ - return Error::success(); - } - }; -@@ -560,7 +579,7 @@ class ObjectFileHandler final : public FileHandler { - return NameOrErr.takeError(); - - // If it does not start with the reserved suffix, just skip this section. -- if (!NameOrErr->startswith(OFFLOAD_BUNDLER_MAGIC_STR)) -+ if (llvm::identify_magic(*NameOrErr) != llvm::file_magic::offload_bundle) - return std::nullopt; - - // Return the triple that is right after the reserved prefix. -@@ -625,7 +644,7 @@ public: - return Error::success(); - } - -- Error WriteHeader(raw_fd_ostream &OS, -+ Error WriteHeader(raw_ostream &OS, - ArrayRef> Inputs) final { - assert(BundlerConfig.HostInputIndex != ~0u && - "Host input index not defined."); -@@ -635,12 +654,16 @@ public: - return Error::success(); - } - -- Error WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) final { -+ Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) final { - ++NumberOfProcessedInputs; - return Error::success(); - } - -- Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) final { -+ Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) final { -+ return Error::success(); -+ } -+ -+ Error finalizeOutputFile() final { - assert(NumberOfProcessedInputs <= NumberOfInputs && - "Processing more inputs that actually exist!"); - assert(BundlerConfig.HostInputIndex != ~0u && -@@ -658,10 +681,6 @@ public: - assert(BundlerConfig.ObjcopyPath != "" && - "llvm-objcopy path not specified"); - -- // We write to the output file directly. So, we close it and use the name -- // to pass down to llvm-objcopy. -- OS.close(); -- - // Temporary files that need to be removed. - TempFileHandlerRAII TempFiles; - -@@ -702,7 +721,7 @@ public: - return Error::success(); - } - -- Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final { -+ Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) final { - return Error::success(); - } - -@@ -799,22 +818,22 @@ protected: - return Error::success(); - } - -- Error WriteHeader(raw_fd_ostream &OS, -+ Error WriteHeader(raw_ostream &OS, - ArrayRef> Inputs) final { - return Error::success(); - } - -- Error WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) final { -+ Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) final { - OS << BundleStartString << TargetTriple << "\n"; - return Error::success(); - } - -- Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) final { -+ Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) final { - OS << BundleEndString << TargetTriple << "\n"; - return Error::success(); - } - -- Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final { -+ Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) final { - OS << Input.getBuffer(); - return Error::success(); - } -@@ -899,6 +918,187 @@ CreateFileHandler(MemoryBuffer &FirstInput, - "'" + FilesType + "': invalid file type specified"); - } - -+OffloadBundlerConfig::OffloadBundlerConfig() { -+ auto IgnoreEnvVarOpt = -+ llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_IGNORE_ENV_VAR"); -+ if (IgnoreEnvVarOpt.has_value() && IgnoreEnvVarOpt.value() == "1") -+ return; -+ -+ auto VerboseEnvVarOpt = llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_VERBOSE"); -+ if (VerboseEnvVarOpt.has_value()) -+ Verbose = VerboseEnvVarOpt.value() == "1"; -+ -+ auto CompressEnvVarOpt = -+ llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_COMPRESS"); -+ if (CompressEnvVarOpt.has_value()) -+ Compress = CompressEnvVarOpt.value() == "1"; -+} -+ -+llvm::Expected> -+CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input, -+ bool Verbose) { -+ llvm::Timer HashTimer("Hash Calculation Timer", "Hash calculation time", -+ ClangOffloadBundlerTimerGroup); -+ if (Verbose) -+ HashTimer.startTimer(); -+ llvm::MD5 Hash; -+ llvm::MD5::MD5Result Result; -+ Hash.update(Input.getBuffer()); -+ Hash.final(Result); -+ uint64_t TruncatedHash = Result.low(); -+ if (Verbose) -+ HashTimer.stopTimer(); -+ -+ SmallVector CompressedBuffer; -+ auto BufferUint8 = llvm::ArrayRef( -+ reinterpret_cast(Input.getBuffer().data()), -+ Input.getBuffer().size()); -+ -+ llvm::compression::Format CompressionFormat; -+ -+ if (llvm::compression::zstd::isAvailable()) -+ CompressionFormat = llvm::compression::Format::Zstd; -+ else if (llvm::compression::zlib::isAvailable()) -+ CompressionFormat = llvm::compression::Format::Zlib; -+ else -+ return createStringError(llvm::inconvertibleErrorCode(), -+ "Compression not supported"); -+ -+ llvm::Timer CompressTimer("Compression Timer", "Compression time", -+ ClangOffloadBundlerTimerGroup); -+ if (Verbose) -+ CompressTimer.startTimer(); -+ llvm::compression::compress(CompressionFormat, BufferUint8, CompressedBuffer); -+ if (Verbose) -+ CompressTimer.stopTimer(); -+ -+ uint16_t CompressionMethod = static_cast(CompressionFormat); -+ uint32_t UncompressedSize = Input.getBuffer().size(); -+ -+ SmallVector FinalBuffer; -+ llvm::raw_svector_ostream OS(FinalBuffer); -+ OS << MagicNumber; -+ OS.write(reinterpret_cast(&Version), sizeof(Version)); -+ OS.write(reinterpret_cast(&CompressionMethod), -+ sizeof(CompressionMethod)); -+ OS.write(reinterpret_cast(&UncompressedSize), -+ sizeof(UncompressedSize)); -+ OS.write(reinterpret_cast(&TruncatedHash), -+ sizeof(TruncatedHash)); -+ OS.write(reinterpret_cast(CompressedBuffer.data()), -+ CompressedBuffer.size()); -+ -+ if (Verbose) { -+ auto MethodUsed = -+ CompressionFormat == llvm::compression::Format::Zstd ? "zstd" : "zlib"; -+ llvm::errs() << "Compressed bundle format version: " << Version << "\n" -+ << "Compression method used: " << MethodUsed << "\n" -+ << "Binary size before compression: " << UncompressedSize -+ << " bytes\n" -+ << "Binary size after compression: " << CompressedBuffer.size() -+ << " bytes\n" -+ << "Truncated MD5 hash: " -+ << llvm::format_hex(TruncatedHash, 16) << "\n"; -+ } -+ -+ return llvm::MemoryBuffer::getMemBufferCopy( -+ llvm::StringRef(FinalBuffer.data(), FinalBuffer.size())); -+} -+ -+llvm::Expected> -+CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input, -+ bool Verbose) { -+ -+ StringRef Blob = Input.getBuffer(); -+ -+ if (Blob.size() < HeaderSize) { -+ return llvm::MemoryBuffer::getMemBufferCopy(Blob); -+ } -+ if (llvm::identify_magic(Blob) != -+ llvm::file_magic::offload_bundle_compressed) { -+ if (Verbose) -+ llvm::errs() << "Uncompressed bundle.\n"; -+ return llvm::MemoryBuffer::getMemBufferCopy(Blob); -+ } -+ -+ uint16_t ThisVersion; -+ uint16_t CompressionMethod; -+ uint32_t UncompressedSize; -+ uint64_t StoredHash; -+ memcpy(&ThisVersion, Input.getBuffer().data() + MagicNumber.size(), -+ sizeof(uint16_t)); -+ memcpy(&CompressionMethod, Blob.data() + MagicSize + VersionFieldSize, -+ sizeof(uint16_t)); -+ memcpy(&UncompressedSize, -+ Blob.data() + MagicSize + VersionFieldSize + MethodFieldSize, -+ sizeof(uint32_t)); -+ memcpy(&StoredHash, -+ Blob.data() + MagicSize + VersionFieldSize + MethodFieldSize + -+ SizeFieldSize, -+ sizeof(uint64_t)); -+ -+ llvm::compression::Format CompressionFormat; -+ if (CompressionMethod == -+ static_cast(llvm::compression::Format::Zlib)) -+ CompressionFormat = llvm::compression::Format::Zlib; -+ else if (CompressionMethod == -+ static_cast(llvm::compression::Format::Zstd)) -+ CompressionFormat = llvm::compression::Format::Zstd; -+ else -+ return createStringError(inconvertibleErrorCode(), -+ "Unknown compressing method"); -+ -+ llvm::Timer DecompressTimer("Decompression Timer", "Decompression time", -+ ClangOffloadBundlerTimerGroup); -+ if (Verbose) -+ DecompressTimer.startTimer(); -+ -+ SmallVector DecompressedData; -+ StringRef CompressedData = Blob.substr(HeaderSize); -+ if (llvm::Error DecompressionError = llvm::compression::decompress( -+ CompressionFormat, llvm::arrayRefFromStringRef(CompressedData), -+ DecompressedData, UncompressedSize)) -+ return createStringError(inconvertibleErrorCode(), -+ "Could not decompress embedded file contents: " + -+ llvm::toString(std::move(DecompressionError))); -+ -+ if (Verbose) { -+ DecompressTimer.stopTimer(); -+ -+ // Recalculate MD5 hash -+ llvm::Timer HashRecalcTimer("Hash Recalculation Timer", -+ "Hash recalculation time", -+ ClangOffloadBundlerTimerGroup); -+ HashRecalcTimer.startTimer(); -+ llvm::MD5 Hash; -+ llvm::MD5::MD5Result Result; -+ Hash.update(llvm::ArrayRef(DecompressedData.data(), -+ DecompressedData.size())); -+ Hash.final(Result); -+ uint64_t RecalculatedHash = Result.low(); -+ HashRecalcTimer.stopTimer(); -+ bool HashMatch = (StoredHash == RecalculatedHash); -+ -+ llvm::errs() << "Compressed bundle format version: " << ThisVersion << "\n" -+ << "Decompression method: " -+ << (CompressionFormat == llvm::compression::Format::Zlib -+ ? "zlib" -+ : "zstd") -+ << "\n" -+ << "Size before decompression: " << CompressedData.size() -+ << " bytes\n" -+ << "Size after decompression: " << UncompressedSize -+ << " bytes\n" -+ << "Stored hash: " << llvm::format_hex(StoredHash, 16) << "\n" -+ << "Recalculated hash: " -+ << llvm::format_hex(RecalculatedHash, 16) << "\n" -+ << "Hashes match: " << (HashMatch ? "Yes" : "No") << "\n"; -+ } -+ -+ return llvm::MemoryBuffer::getMemBufferCopy( -+ llvm::toStringRef(DecompressedData)); -+} -+ - // List bundle IDs. Return true if an error was found. - Error OffloadBundler::ListBundleIDsInFile( - StringRef InputFileName, const OffloadBundlerConfig &BundlerConfig) { -@@ -908,28 +1108,35 @@ Error OffloadBundler::ListBundleIDsInFile( - if (std::error_code EC = CodeOrErr.getError()) - return createFileError(InputFileName, EC); - -- MemoryBuffer &Input = **CodeOrErr; -+ // Decompress the input if necessary. -+ Expected> DecompressedBufferOrErr = -+ CompressedOffloadBundle::decompress(**CodeOrErr, BundlerConfig.Verbose); -+ if (!DecompressedBufferOrErr) -+ return createStringError( -+ inconvertibleErrorCode(), -+ "Failed to decompress input: " + -+ llvm::toString(DecompressedBufferOrErr.takeError())); -+ -+ MemoryBuffer &DecompressedInput = **DecompressedBufferOrErr; - - // Select the right files handler. - Expected> FileHandlerOrErr = -- CreateFileHandler(Input, BundlerConfig); -+ CreateFileHandler(DecompressedInput, BundlerConfig); - if (!FileHandlerOrErr) - return FileHandlerOrErr.takeError(); - - std::unique_ptr &FH = *FileHandlerOrErr; - assert(FH); -- return FH->listBundleIDs(Input); -+ return FH->listBundleIDs(DecompressedInput); - } - - /// Bundle the files. Return true if an error was found. - Error OffloadBundler::BundleFiles() { - std::error_code EC; - -- // Create output file. -- raw_fd_ostream OutputFile(BundlerConfig.OutputFileNames.front(), EC, -- sys::fs::OF_None); -- if (EC) -- return createFileError(BundlerConfig.OutputFileNames.front(), EC); -+ // Create a buffer to hold the content before compressing. -+ SmallVector Buffer; -+ llvm::raw_svector_ostream BufferStream(Buffer); - - // Open input files. - SmallVector, 8u> InputBuffers; -@@ -956,22 +1163,46 @@ Error OffloadBundler::BundleFiles() { - assert(FH); - - // Write header. -- if (Error Err = FH->WriteHeader(OutputFile, InputBuffers)) -+ if (Error Err = FH->WriteHeader(BufferStream, InputBuffers)) - return Err; - - // Write all bundles along with the start/end markers. If an error was found - // writing the end of the bundle component, abort the bundle writing. - auto Input = InputBuffers.begin(); - for (auto &Triple : BundlerConfig.TargetNames) { -- if (Error Err = FH->WriteBundleStart(OutputFile, Triple)) -+ if (Error Err = FH->WriteBundleStart(BufferStream, Triple)) - return Err; -- if (Error Err = FH->WriteBundle(OutputFile, **Input)) -+ if (Error Err = FH->WriteBundle(BufferStream, **Input)) - return Err; -- if (Error Err = FH->WriteBundleEnd(OutputFile, Triple)) -+ if (Error Err = FH->WriteBundleEnd(BufferStream, Triple)) - return Err; - ++Input; - } -- return Error::success(); -+ -+ raw_fd_ostream OutputFile(BundlerConfig.OutputFileNames.front(), EC, -+ sys::fs::OF_None); -+ if (EC) -+ return createFileError(BundlerConfig.OutputFileNames.front(), EC); -+ -+ SmallVector CompressedBuffer; -+ if (BundlerConfig.Compress) { -+ std::unique_ptr BufferMemory = -+ llvm::MemoryBuffer::getMemBufferCopy( -+ llvm::StringRef(Buffer.data(), Buffer.size())); -+ auto CompressionResult = -+ CompressedOffloadBundle::compress(*BufferMemory, BundlerConfig.Verbose); -+ if (auto Error = CompressionResult.takeError()) -+ return Error; -+ -+ auto CompressedMemBuffer = std::move(CompressionResult.get()); -+ CompressedBuffer.assign(CompressedMemBuffer->getBufferStart(), -+ CompressedMemBuffer->getBufferEnd()); -+ } else -+ CompressedBuffer = Buffer; -+ -+ OutputFile.write(CompressedBuffer.data(), CompressedBuffer.size()); -+ -+ return FH->finalizeOutputFile(); - } - - // Unbundle the files. Return true if an error was found. -@@ -982,7 +1213,16 @@ Error OffloadBundler::UnbundleFiles() { - if (std::error_code EC = CodeOrErr.getError()) - return createFileError(BundlerConfig.InputFileNames.front(), EC); - -- MemoryBuffer &Input = **CodeOrErr; -+ // Decompress the input if necessary. -+ Expected> DecompressedBufferOrErr = -+ CompressedOffloadBundle::decompress(**CodeOrErr, BundlerConfig.Verbose); -+ if (!DecompressedBufferOrErr) -+ return createStringError( -+ inconvertibleErrorCode(), -+ "Failed to decompress input: " + -+ llvm::toString(DecompressedBufferOrErr.takeError())); -+ -+ MemoryBuffer &Input = **DecompressedBufferOrErr; - - // Select the right files handler. - Expected> FileHandlerOrErr = -@@ -1357,22 +1597,34 @@ Error OffloadBundler::UnbundleArchive() { - if (!CodeObjectBufferRefOrErr) - return CodeObjectBufferRefOrErr.takeError(); - -- auto CodeObjectBuffer = -+ auto TempCodeObjectBuffer = - MemoryBuffer::getMemBuffer(*CodeObjectBufferRefOrErr, false); - -+ // Decompress the buffer if necessary. -+ Expected> DecompressedBufferOrErr = -+ CompressedOffloadBundle::decompress(*TempCodeObjectBuffer, -+ BundlerConfig.Verbose); -+ if (!DecompressedBufferOrErr) -+ return createStringError( -+ inconvertibleErrorCode(), -+ "Failed to decompress code object: " + -+ llvm::toString(DecompressedBufferOrErr.takeError())); -+ -+ MemoryBuffer &CodeObjectBuffer = **DecompressedBufferOrErr; -+ - Expected> FileHandlerOrErr = -- CreateFileHandler(*CodeObjectBuffer, BundlerConfig); -+ CreateFileHandler(CodeObjectBuffer, BundlerConfig); - if (!FileHandlerOrErr) - return FileHandlerOrErr.takeError(); - - std::unique_ptr &FileHandler = *FileHandlerOrErr; - assert(FileHandler); - -- if (Error ReadErr = FileHandler->ReadHeader(*CodeObjectBuffer)) -+ if (Error ReadErr = FileHandler->ReadHeader(CodeObjectBuffer)) - return ReadErr; - - Expected> CurBundleIDOrErr = -- FileHandler->ReadBundleStart(*CodeObjectBuffer); -+ FileHandler->ReadBundleStart(CodeObjectBuffer); - if (!CurBundleIDOrErr) - return CurBundleIDOrErr.takeError(); - -@@ -1393,7 +1645,7 @@ Error OffloadBundler::UnbundleArchive() { - BundlerConfig)) { - std::string BundleData; - raw_string_ostream DataStream(BundleData); -- if (Error Err = FileHandler->ReadBundle(DataStream, *CodeObjectBuffer)) -+ if (Error Err = FileHandler->ReadBundle(DataStream, CodeObjectBuffer)) - return Err; - - for (auto &CompatibleTarget : CompatibleTargets) { -@@ -1431,11 +1683,11 @@ Error OffloadBundler::UnbundleArchive() { - } - } - -- if (Error Err = FileHandler->ReadBundleEnd(*CodeObjectBuffer)) -+ if (Error Err = FileHandler->ReadBundleEnd(CodeObjectBuffer)) - return Err; - - Expected> NextTripleOrErr = -- FileHandler->ReadBundleStart(*CodeObjectBuffer); -+ FileHandler->ReadBundleStart(CodeObjectBuffer); - if (!NextTripleOrErr) - return NextTripleOrErr.takeError(); - -diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp -index 9e1f07d78bf1..274875c631c5 100644 ---- a/clang/lib/Driver/ToolChains/Clang.cpp -+++ b/clang/lib/Driver/ToolChains/Clang.cpp -@@ -8400,6 +8400,11 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA, - } - CmdArgs.push_back(TCArgs.MakeArgString(UB)); - } -+ if (TCArgs.hasFlag(options::OPT_offload_compress, -+ options::OPT_no_offload_compress, false)) -+ CmdArgs.push_back("-compress"); -+ if (TCArgs.hasArg(options::OPT_v)) -+ CmdArgs.push_back("-verbose"); - // All the inputs are encoded as commands. - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::None(), -@@ -8494,6 +8499,8 @@ void OffloadBundler::ConstructJobMultipleOutputs( - } - CmdArgs.push_back("-unbundle"); - CmdArgs.push_back("-allow-missing-bundles"); -+ if (TCArgs.hasArg(options::OPT_v)) -+ CmdArgs.push_back("-verbose"); - - // All the inputs are encoded as commands. - C.addCommand(std::make_unique( -diff --git a/clang/lib/Driver/ToolChains/HIPUtility.cpp b/clang/lib/Driver/ToolChains/HIPUtility.cpp -index 8b9d8db90ffa..04efdcba20ea 100644 ---- a/clang/lib/Driver/ToolChains/HIPUtility.cpp -+++ b/clang/lib/Driver/ToolChains/HIPUtility.cpp -@@ -84,6 +84,12 @@ void HIP::constructHIPFatbinCommand(Compilation &C, const JobAction &JA, - Args.MakeArgString(std::string("-output=").append(Output)); - BundlerArgs.push_back(BundlerOutputArg); - -+ if (Args.hasFlag(options::OPT_offload_compress, -+ options::OPT_no_offload_compress, false)) -+ BundlerArgs.push_back("-compress"); -+ if (Args.hasArg(options::OPT_v)) -+ BundlerArgs.push_back("-verbose"); -+ - const char *Bundler = Args.MakeArgString( - T.getToolChain().GetProgramPath("clang-offload-bundler")); - C.addCommand(std::make_unique( -diff --git a/clang/test/Driver/clang-offload-bundler-zlib.c b/clang/test/Driver/clang-offload-bundler-zlib.c -new file mode 100644 -index 000000000000..c46c32a4a053 ---- /dev/null -+++ b/clang/test/Driver/clang-offload-bundler-zlib.c -@@ -0,0 +1,75 @@ -+// REQUIRES: zlib -+// REQUIRES: x86-registered-target -+// UNSUPPORTED: target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}} -+ -+// -+// Generate the host binary to be bundled. -+// -+// RUN: %clang -O0 -target %itanium_abi_triple %s -c -emit-llvm -o %t.bc -+ -+// -+// Generate an empty file to help with the checks of empty files. -+// -+// RUN: touch %t.empty -+ -+// -+// Generate device binaries to be bundled. -+// -+// RUN: echo 'Content of device file 1' > %t.tgt1 -+// RUN: echo 'Content of device file 2' > %t.tgt2 -+ -+// -+// Check compression/decompression of offload bundle. -+// -+// RUN: env OFFLOAD_BUNDLER_COMPRESS=1 OFFLOAD_BUNDLER_VERBOSE=1 \ -+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \ -+// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc 2>&1 | \ -+// RUN: FileCheck -check-prefix=COMPRESS %s -+// RUN: clang-offload-bundler -type=bc -list -input=%t.hip.bundle.bc | FileCheck -check-prefix=NOHOST %s -+// RUN: env OFFLOAD_BUNDLER_VERBOSE=1 \ -+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \ -+// RUN: -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle 2>&1 | \ -+// RUN: FileCheck -check-prefix=DECOMPRESS %s -+// RUN: diff %t.tgt1 %t.res.tgt1 -+// RUN: diff %t.tgt2 %t.res.tgt2 -+ -+// -+// COMPRESS: Compression method used: -+// DECOMPRESS: Decompression method: -+// NOHOST-NOT: host- -+// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900 -+// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906 -+// -+ -+// -+// Check -bundle-align option. -+// -+ -+// RUN: clang-offload-bundler -bundle-align=4096 -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -input=%t.bc -input=%t.tgt1 -input=%t.tgt2 -output=%t.bundle3.bc -compress -+// RUN: clang-offload-bundler -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -output=%t.res.bc -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.bundle3.bc -unbundle -+// RUN: diff %t.bc %t.res.bc -+// RUN: diff %t.tgt1 %t.res.tgt1 -+// RUN: diff %t.tgt2 %t.res.tgt2 -+ -+// -+// Check unbundling archive. -+// -+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \ -+// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle1.bc -compress -+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \ -+// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle2.bc -compress -+// RUN: llvm-ar cr %T/hip_archive.a %T/hip_bundle1.bc %T/hip_bundle2.bc -+// RUN: clang-offload-bundler -unbundle -type=a -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \ -+// RUN: -output=%T/hip_900.a -output=%T/hip_906.a -input=%T/hip_archive.a -+// RUN: llvm-ar t %T/hip_900.a | FileCheck -check-prefix=HIP-AR-900 %s -+// RUN: llvm-ar t %T/hip_906.a | FileCheck -check-prefix=HIP-AR-906 %s -+// HIP-AR-900-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx900 -+// HIP-AR-900-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx900 -+// HIP-AR-906-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx906 -+// HIP-AR-906-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx906 -+ -+// Some code so that we can create a binary out of this file. -+int A = 0; -+void test_func(void) { -+ ++A; -+} -diff --git a/clang/test/Driver/clang-offload-bundler-zstd.c b/clang/test/Driver/clang-offload-bundler-zstd.c -new file mode 100644 -index 000000000000..b2b588b72d4d ---- /dev/null -+++ b/clang/test/Driver/clang-offload-bundler-zstd.c -@@ -0,0 +1,72 @@ -+// REQUIRES: zstd -+// REQUIRES: x86-registered-target -+// UNSUPPORTED: target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}} -+ -+// -+// Generate the host binary to be bundled. -+// -+// RUN: %clang -O0 -target %itanium_abi_triple %s -c -emit-llvm -o %t.bc -+ -+// -+// Generate an empty file to help with the checks of empty files. -+// -+// RUN: touch %t.empty -+ -+// -+// Generate device binaries to be bundled. -+// -+// RUN: echo 'Content of device file 1' > %t.tgt1 -+// RUN: echo 'Content of device file 2' > %t.tgt2 -+ -+// -+// Check compression/decompression of offload bundle. -+// -+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \ -+// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc -compress -verbose 2>&1 | \ -+// RUN: FileCheck -check-prefix=COMPRESS %s -+// RUN: clang-offload-bundler -type=bc -list -input=%t.hip.bundle.bc | FileCheck -check-prefix=NOHOST %s -+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \ -+// RUN: -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle -verbose 2>&1 | \ -+// RUN: FileCheck -check-prefix=DECOMPRESS %s -+// RUN: diff %t.tgt1 %t.res.tgt1 -+// RUN: diff %t.tgt2 %t.res.tgt2 -+// -+// COMPRESS: Compression method used -+// DECOMPRESS: Decompression method -+// NOHOST-NOT: host- -+// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900 -+// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906 -+// -+ -+// -+// Check -bundle-align option. -+// -+ -+// RUN: clang-offload-bundler -bundle-align=4096 -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -input=%t.bc -input=%t.tgt1 -input=%t.tgt2 -output=%t.bundle3.bc -compress -+// RUN: clang-offload-bundler -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -output=%t.res.bc -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.bundle3.bc -unbundle -+// RUN: diff %t.bc %t.res.bc -+// RUN: diff %t.tgt1 %t.res.tgt1 -+// RUN: diff %t.tgt2 %t.res.tgt2 -+ -+// -+// Check unbundling archive. -+// -+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \ -+// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle1.bc -compress -+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \ -+// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle2.bc -compress -+// RUN: llvm-ar cr %T/hip_archive.a %T/hip_bundle1.bc %T/hip_bundle2.bc -+// RUN: clang-offload-bundler -unbundle -type=a -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \ -+// RUN: -output=%T/hip_900.a -output=%T/hip_906.a -input=%T/hip_archive.a -+// RUN: llvm-ar t %T/hip_900.a | FileCheck -check-prefix=HIP-AR-900 %s -+// RUN: llvm-ar t %T/hip_906.a | FileCheck -check-prefix=HIP-AR-906 %s -+// HIP-AR-900-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx900 -+// HIP-AR-900-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx900 -+// HIP-AR-906-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx906 -+// HIP-AR-906-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx906 -+ -+// Some code so that we can create a binary out of this file. -+int A = 0; -+void test_func(void) { -+ ++A; -+} -diff --git a/clang/test/Driver/hip-offload-compress-zlib.hip b/clang/test/Driver/hip-offload-compress-zlib.hip -new file mode 100644 -index 000000000000..a29b6d037350 ---- /dev/null -+++ b/clang/test/Driver/hip-offload-compress-zlib.hip -@@ -0,0 +1,45 @@ -+// REQUIRES: zlib -+// REQUIRES: x86-registered-target -+// REQUIRES: amdgpu-registered-target -+ -+// Test compress bundled bitcode. -+ -+// RUN: rm -rf %T/a.bc -+// RUN: %clang -c -v --target=x86_64-linux-gnu \ -+// RUN: -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \ -+// RUN: -fgpu-rdc -nogpuinc -nogpulib \ -+// RUN: %S/Inputs/hip_multiple_inputs/a.cu \ -+// RUN: --offload-compress --offload-device-only --gpu-bundle-output \ -+// RUN: -o %T/a.bc \ -+// RUN: 2>&1 | FileCheck %s -+ -+// CHECK: clang-offload-bundler{{.*}} -type=bc -+// CHECK-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101 -+// CHECK-SAME: -compress -verbose -+// CHECK: Compressed bundle format -+ -+// Test uncompress of bundled bitcode. -+ -+// RUN: %clang --hip-link -### -v --target=x86_64-linux-gnu \ -+// RUN: --offload-arch=gfx1100 --offload-arch=gfx1101 \ -+// RUN: -fgpu-rdc -nogpulib \ -+// RUN: %T/a.bc --offload-device-only \ -+// RUN: 2>&1 | FileCheck -check-prefix=UNBUNDLE %s -+ -+// UNBUNDLE: clang-offload-bundler{{.*}} "-type=bc" -+// UNBUNDLE-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101 -+// UNBUNDLE-SAME: -unbundle -+// UNBUNDLE-SAME: -verbose -+ -+// Test compress bundled code objects. -+ -+// RUN: %clang -c -### -v --target=x86_64-linux-gnu \ -+// RUN: -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \ -+// RUN: -nogpuinc -nogpulib \ -+// RUN: %S/Inputs/hip_multiple_inputs/a.cu \ -+// RUN: --offload-compress \ -+// RUN: 2>&1 | FileCheck -check-prefix=CO %s -+ -+// CO: clang-offload-bundler{{.*}} "-type=o" -+// CO-SAME: -targets={{.*}}hipv4-amdgcn-amd-amdhsa--gfx1100,hipv4-amdgcn-amd-amdhsa--gfx1101 -+// CO-SAME: "-compress" "-verbose" -diff --git a/clang/test/Driver/hip-offload-compress-zstd.hip b/clang/test/Driver/hip-offload-compress-zstd.hip -new file mode 100644 -index 000000000000..688c2c85329c ---- /dev/null -+++ b/clang/test/Driver/hip-offload-compress-zstd.hip -@@ -0,0 +1,45 @@ -+// REQUIRES: zstd -+// REQUIRES: x86-registered-target -+// REQUIRES: amdgpu-registered-target -+ -+// Test compress bundled bitcode. -+ -+// RUN: rm -rf %T/a.bc -+// RUN: %clang -c -v --target=x86_64-linux-gnu \ -+// RUN: -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \ -+// RUN: -fgpu-rdc -nogpuinc -nogpulib \ -+// RUN: %S/Inputs/hip_multiple_inputs/a.cu \ -+// RUN: --offload-compress --offload-device-only --gpu-bundle-output \ -+// RUN: -o %T/a.bc \ -+// RUN: 2>&1 | FileCheck %s -+ -+// CHECK: clang-offload-bundler{{.*}} -type=bc -+// CHECK-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101 -+// CHECK-SAME: -compress -verbose -+// CHECK: Compressed bundle format -+ -+// Test uncompress of bundled bitcode. -+ -+// RUN: %clang --hip-link -### -v --target=x86_64-linux-gnu \ -+// RUN: --offload-arch=gfx1100 --offload-arch=gfx1101 \ -+// RUN: -fgpu-rdc -nogpulib \ -+// RUN: %T/a.bc --offload-device-only \ -+// RUN: 2>&1 | FileCheck -check-prefix=UNBUNDLE %s -+ -+// UNBUNDLE: clang-offload-bundler{{.*}} "-type=bc" -+// UNBUNDLE-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101 -+// UNBUNDLE-SAME: -unbundle -+// UNBUNDLE-SAME: -verbose -+ -+// Test compress bundled code objects. -+ -+// RUN: %clang -c -### -v --target=x86_64-linux-gnu \ -+// RUN: -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \ -+// RUN: -nogpuinc -nogpulib \ -+// RUN: %S/Inputs/hip_multiple_inputs/a.cu \ -+// RUN: --offload-compress \ -+// RUN: 2>&1 | FileCheck -check-prefix=CO %s -+ -+// CO: clang-offload-bundler{{.*}} "-type=o" -+// CO-SAME: -targets={{.*}}hipv4-amdgcn-amd-amdhsa--gfx1100,hipv4-amdgcn-amd-amdhsa--gfx1101 -+// CO-SAME: "-compress" "-verbose" -diff --git a/clang/tools/clang-offload-bundler/CMakeLists.txt b/clang/tools/clang-offload-bundler/CMakeLists.txt -index dabd82382cdf..dec2881589a5 100644 ---- a/clang/tools/clang-offload-bundler/CMakeLists.txt -+++ b/clang/tools/clang-offload-bundler/CMakeLists.txt -@@ -1,4 +1,5 @@ - set(LLVM_LINK_COMPONENTS -+ BinaryFormat - Object - Support - TargetParser -diff --git a/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp b/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp -index c02b5854bded..68f29807b219 100644 ---- a/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp -+++ b/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp -@@ -141,6 +141,11 @@ int main(int argc, const char **argv) { - cl::desc("Treat hip and hipv4 offload kinds as " - "compatible with openmp kind, and vice versa.\n"), - cl::init(false), cl::cat(ClangOffloadBundlerCategory)); -+ cl::opt Compress("compress", -+ cl::desc("Compress output file when bundling.\n"), -+ cl::init(false), cl::cat(ClangOffloadBundlerCategory)); -+ cl::opt Verbose("verbose", cl::desc("Print debug information.\n"), -+ cl::init(false), cl::cat(ClangOffloadBundlerCategory)); - - // Process commandline options and report errors - sys::PrintStackTraceOnErrorSignal(argv[0]); -@@ -169,6 +174,11 @@ int main(int argc, const char **argv) { - BundlerConfig.BundleAlignment = BundleAlignment; - BundlerConfig.FilesType = FilesType; - BundlerConfig.ObjcopyPath = ""; -+ // Do not override the default value Compress and Verbose in BundlerConfig. -+ if (Compress.getNumOccurrences() > 0) -+ BundlerConfig.Compress = Compress; -+ if (Verbose.getNumOccurrences() > 0) -+ BundlerConfig.Verbose = Verbose; - - BundlerConfig.TargetNames = TargetNames; - BundlerConfig.InputFileNames = InputFileNames; -diff --git a/llvm/include/llvm/BinaryFormat/Magic.h b/llvm/include/llvm/BinaryFormat/Magic.h -index 329c96f5c14c..a28710dcdfaf 100644 ---- a/llvm/include/llvm/BinaryFormat/Magic.h -+++ b/llvm/include/llvm/BinaryFormat/Magic.h -@@ -42,19 +42,21 @@ struct file_magic { - macho_universal_binary, ///< Mach-O universal binary - macho_file_set, ///< Mach-O file set binary - minidump, ///< Windows minidump file -- coff_cl_gl_object, ///< Microsoft cl.exe's intermediate code file -- coff_object, ///< COFF object file -- coff_import_library, ///< COFF import library -- pecoff_executable, ///< PECOFF executable file -- windows_resource, ///< Windows compiled resource file (.res) -- xcoff_object_32, ///< 32-bit XCOFF object file -- xcoff_object_64, ///< 64-bit XCOFF object file -- wasm_object, ///< WebAssembly Object file -- pdb, ///< Windows PDB debug info file -- tapi_file, ///< Text-based Dynamic Library Stub file -- cuda_fatbinary, ///< CUDA Fatbinary object file -- offload_binary, ///< LLVM offload object file -- dxcontainer_object, ///< DirectX container file -+ coff_cl_gl_object, ///< Microsoft cl.exe's intermediate code file -+ coff_object, ///< COFF object file -+ coff_import_library, ///< COFF import library -+ pecoff_executable, ///< PECOFF executable file -+ windows_resource, ///< Windows compiled resource file (.res) -+ xcoff_object_32, ///< 32-bit XCOFF object file -+ xcoff_object_64, ///< 64-bit XCOFF object file -+ wasm_object, ///< WebAssembly Object file -+ pdb, ///< Windows PDB debug info file -+ tapi_file, ///< Text-based Dynamic Library Stub file -+ cuda_fatbinary, ///< CUDA Fatbinary object file -+ offload_binary, ///< LLVM offload object file -+ dxcontainer_object, ///< DirectX container file -+ offload_bundle, ///< Clang offload bundle file -+ offload_bundle_compressed, ///< Compressed clang offload bundle file - }; - - bool is_object() const { return V != unknown; } -diff --git a/llvm/lib/BinaryFormat/Magic.cpp b/llvm/lib/BinaryFormat/Magic.cpp -index aa84bc36bfde..76aa1a602aa8 100644 ---- a/llvm/lib/BinaryFormat/Magic.cpp -+++ b/llvm/lib/BinaryFormat/Magic.cpp -@@ -87,6 +87,10 @@ file_magic llvm::identify_magic(StringRef Magic) { - if (startswith(Magic, "BC\xC0\xDE")) - return file_magic::bitcode; - break; -+ case 'C': -+ if (startswith(Magic, "CCOB")) -+ return file_magic::offload_bundle_compressed; -+ break; - case '!': - if (startswith(Magic, "!\n") || startswith(Magic, "!\n")) - return file_magic::archive; -@@ -246,6 +250,13 @@ file_magic llvm::identify_magic(StringRef Magic) { - return file_magic::coff_object; - break; - -+ case '_': { -+ const char OBMagic[] = "__CLANG_OFFLOAD_BUNDLE__"; -+ if (Magic.size() >= sizeof(OBMagic) && startswith(Magic, OBMagic)) -+ return file_magic::offload_bundle; -+ break; -+ } -+ - default: - break; - } -diff --git a/llvm/lib/Object/Binary.cpp b/llvm/lib/Object/Binary.cpp -index d18aed8b3b8c..0ee9f7fac448 100644 ---- a/llvm/lib/Object/Binary.cpp -+++ b/llvm/lib/Object/Binary.cpp -@@ -87,6 +87,8 @@ Expected> object::createBinary(MemoryBufferRef Buffer, - case file_magic::cuda_fatbinary: - case file_magic::coff_cl_gl_object: - case file_magic::dxcontainer_object: -+ case file_magic::offload_bundle: -+ case file_magic::offload_bundle_compressed: - // Unrecognized object file format. - return errorCodeToError(object_error::invalid_file_type); - case file_magic::offload_binary: -diff --git a/llvm/lib/Object/ObjectFile.cpp b/llvm/lib/Object/ObjectFile.cpp -index 56a1d09097d4..4cc95ea32f60 100644 ---- a/llvm/lib/Object/ObjectFile.cpp -+++ b/llvm/lib/Object/ObjectFile.cpp -@@ -154,6 +154,8 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type, - case file_magic::cuda_fatbinary: - case file_magic::offload_binary: - case file_magic::dxcontainer_object: -+ case file_magic::offload_bundle: -+ case file_magic::offload_bundle_compressed: - return errorCodeToError(object_error::invalid_file_type); - case file_magic::tapi_file: - return errorCodeToError(object_error::invalid_file_type); --- -2.43.0 - diff --git a/pkgs/development/rocm-modules/6/llvm/base.nix b/pkgs/development/rocm-modules/6/llvm/base.nix index 88b384b43d8e..caa2ebe42823 100644 --- a/pkgs/development/rocm-modules/6/llvm/base.nix +++ b/pkgs/development/rocm-modules/6/llvm/base.nix @@ -72,9 +72,7 @@ in stdenv.mkDerivation (finalAttrs: { "info" # Avoid `attribute 'info' missing` when using with wrapCC ]; - patches = [ - ./add-compression-to-clang-offload-bundler.patch - ] ++ extraPatches; + patches = extraPatches; src = fetchFromGitHub { owner = "ROCm"; diff --git a/pkgs/development/rocm-modules/6/llvm/stage-1/clang-unwrapped.nix b/pkgs/development/rocm-modules/6/llvm/stage-1/clang-unwrapped.nix index 0d982299ec6e..1cf60223def6 100644 --- a/pkgs/development/rocm-modules/6/llvm/stage-1/clang-unwrapped.nix +++ b/pkgs/development/rocm-modules/6/llvm/stage-1/clang-unwrapped.nix @@ -2,14 +2,13 @@ , callPackage , rocmUpdateScript , llvm -, makeWrapper }: callPackage ../base.nix rec { inherit stdenv rocmUpdateScript; targetName = "clang-unwrapped"; targetDir = "clang"; - extraBuildInputs = [ llvm makeWrapper ]; + extraBuildInputs = [ llvm ]; extraCMakeFlags = [ "-DCLANG_INCLUDE_DOCS=ON" @@ -42,12 +41,6 @@ callPackage ../base.nix rec { extraPostInstall = '' mv bin/clang-tblgen $out/bin - # add wrapper to compress embedded accelerator-specific code - # this makes the output of composable_kernel significantly smaller right now - # TODO: remove this once ROCm does it out of the box - mv $out/bin/clang-offload-bundler $out/bin/clang-offload-bundler-unwrapped - makeWrapper $out/bin/clang-offload-bundler-unwrapped $out/bin/clang-offload-bundler \ - --add-flags '-compress' ''; requiredSystemFeatures = [ "big-parallel" ];