nixpkgs/pkgs/development/rocm-modules/6/llvm/add-compression-to-clang-offload-bundler.patch
Martin Schwaighofer e380b53037 rocmPackages.llvm: compress outputs of clang-offload-bundler
This patches the clang-offload-bundler tool to add a compression option
from a more recent version of clang.
This compression option reduces the size of ROCm's fat binaries.
Those binaries contain .hip_fatbin sections with GPU-specific code,
for each target.
Compression is automatically turned on for all produced outputs
via a wrapper, because it's difficult to identify all the places
where the -compression argument would be needed.

Once upsteam introduces handeling for this argument,
we should drop the wrapper again.
This transistion will create inconsistsency, but I do not think that
it will impact any actual users and it's what's practical to implement.
2024-04-07 17:09:56 +02:00

1192 lines
48 KiB
Diff

From 058d9ba2f54a8c7c47b6522db808db87fed82e78 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu@amd.com>
Date: Thu, 4 Apr 2024 03:25:28 +0200
Subject: [PATCH] backport 7e2823438e920d25364ff92b62ad90020c31bb59
Reland "[HIP] Support compressing device binary"
Original PR: https://github.com/llvm/llvm-project/pull/67162
The commit was reverted due to UB detected by santizer:
https://lab.llvm.org/buildbot/#/builders/238/builds/5955
clang/lib/Driver/OffloadBundler.cpp:1012:25: runtime error:
load of misaligned address 0xaaaae2d90e7c for type
'const uint64_t' (aka 'const unsigned long'), which
requires 8 byte alignment
It was fixed by using memcpy instead of dereferencing int*
casted from unaligned char*.
Co-Authored-By: Martin Schwaighofer <mschwaig@users.noreply.github.com>
(only did the backport)
---
clang/docs/ClangOffloadBundler.rst | 27 ++
clang/include/clang/Driver/OffloadBundler.h | 37 ++
clang/include/clang/Driver/Options.td | 5 +
clang/lib/Driver/OffloadBundler.cpp | 346 +++++++++++++++---
clang/lib/Driver/ToolChains/Clang.cpp | 7 +
clang/lib/Driver/ToolChains/HIPUtility.cpp | 6 +
.../test/Driver/clang-offload-bundler-zlib.c | 75 ++++
.../test/Driver/clang-offload-bundler-zstd.c | 72 ++++
.../test/Driver/hip-offload-compress-zlib.hip | 45 +++
.../test/Driver/hip-offload-compress-zstd.hip | 45 +++
.../clang-offload-bundler/CMakeLists.txt | 1 +
.../ClangOffloadBundler.cpp | 10 +
llvm/include/llvm/BinaryFormat/Magic.h | 28 +-
llvm/lib/BinaryFormat/Magic.cpp | 11 +
llvm/lib/Object/Binary.cpp | 2 +
llvm/lib/Object/ObjectFile.cpp | 2 +
16 files changed, 659 insertions(+), 60 deletions(-)
create mode 100644 clang/test/Driver/clang-offload-bundler-zlib.c
create mode 100644 clang/test/Driver/clang-offload-bundler-zstd.c
create mode 100644 clang/test/Driver/hip-offload-compress-zlib.hip
create mode 100644 clang/test/Driver/hip-offload-compress-zstd.hip
diff --git a/clang/docs/ClangOffloadBundler.rst b/clang/docs/ClangOffloadBundler.rst
index 432da787249b..d47997bf718d 100644
--- a/clang/docs/ClangOffloadBundler.rst
+++ b/clang/docs/ClangOffloadBundler.rst
@@ -498,3 +498,30 @@ target by comparing bundle ID's. Two bundle ID's are considered compatible if:
Verbose printing of matched/unmatched comparisons between bundle entry id of
a device binary from HDA and bundle entry ID of a given target processor
(see :ref:`compatibility-bundle-entry-id`).
+
+Compression and Decompression
+=============================
+
+``clang-offload-bundler`` provides features to compress and decompress the full
+bundle, leveraging inherent redundancies within the bundle entries. Use the
+`-compress` command-line option to enable this compression capability.
+
+The compressed offload bundle begins with a header followed by the compressed binary data:
+
+- **Magic Number (4 bytes)**:
+ This is a unique identifier to distinguish compressed offload bundles. The value is the string 'CCOB' (Compressed Clang Offload Bundle).
+
+- **Version Number (16-bit unsigned int)**:
+ This denotes the version of the compressed offload bundle format. The current version is `1`.
+
+- **Compression Method (16-bit unsigned int)**:
+ This field indicates the compression method used. The value corresponds to either `zlib` or `zstd`, represented as a 16-bit unsigned integer cast from the LLVM compression enumeration.
+
+- **Uncompressed Binary Size (32-bit unsigned int)**:
+ This is the size (in bytes) of the binary data before it was compressed.
+
+- **Hash (64-bit unsigned int)**:
+ This is a 64-bit truncated MD5 hash of the uncompressed binary data. It serves for verification and caching purposes.
+
+- **Compressed Data**:
+ The actual compressed binary data follows the header. Its size can be inferred from the total size of the file minus the header size.
diff --git a/clang/include/clang/Driver/OffloadBundler.h b/clang/include/clang/Driver/OffloadBundler.h
index fe263f0540b9..fc96f200414d 100644
--- a/clang/include/clang/Driver/OffloadBundler.h
+++ b/clang/include/clang/Driver/OffloadBundler.h
@@ -19,6 +19,7 @@
#include "llvm/Support/Error.h"
#include "llvm/TargetParser/Triple.h"
+#include <llvm/Support/MemoryBuffer.h>
#include <string>
#include <vector>
@@ -26,11 +27,15 @@ namespace clang {
class OffloadBundlerConfig {
public:
+ OffloadBundlerConfig();
+
bool AllowNoHost = false;
bool AllowMissingBundles = false;
bool CheckInputArchive = false;
bool PrintExternalCommands = false;
bool HipOpenmpCompatible = false;
+ bool Compress = false;
+ bool Verbose = false;
unsigned BundleAlignment = 1;
unsigned HostInputIndex = ~0u;
@@ -82,6 +87,38 @@ struct OffloadTargetInfo {
std::string str() const;
};
+// CompressedOffloadBundle represents the format for the compressed offload
+// bundles.
+//
+// The format is as follows:
+// - Magic Number (4 bytes) - A constant "CCOB".
+// - Version (2 bytes)
+// - Compression Method (2 bytes) - Uses the values from
+// llvm::compression::Format.
+// - Uncompressed Size (4 bytes).
+// - Truncated MD5 Hash (8 bytes).
+// - Compressed Data (variable length).
+
+class CompressedOffloadBundle {
+private:
+ static inline const size_t MagicSize = 4;
+ static inline const size_t VersionFieldSize = sizeof(uint16_t);
+ static inline const size_t MethodFieldSize = sizeof(uint16_t);
+ static inline const size_t SizeFieldSize = sizeof(uint32_t);
+ static inline const size_t HashFieldSize = 8;
+ static inline const size_t HeaderSize = MagicSize + VersionFieldSize +
+ MethodFieldSize + SizeFieldSize +
+ HashFieldSize;
+ static inline const llvm::StringRef MagicNumber = "CCOB";
+ static inline const uint16_t Version = 1;
+
+public:
+ static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+ compress(const llvm::MemoryBuffer &Input, bool Verbose = false);
+ static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+ decompress(const llvm::MemoryBuffer &Input, bool Verbose = false);
+};
+
} // namespace clang
#endif // LLVM_CLANG_DRIVER_OFFLOADBUNDLER_H
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index c206ab84508f..f8df98678396 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -984,6 +984,11 @@ def fconvergent_functions : Flag<["-"], "fconvergent-functions">, Group<f_Group>
def gpu_use_aux_triple_only : Flag<["--"], "gpu-use-aux-triple-only">,
InternalDriverOpt, HelpText<"Prepare '-aux-triple' only without populating "
"'-aux-target-cpu' and '-aux-target-feature'.">;
+
+def offload_compress : Flag<["--"], "offload-compress">,
+ HelpText<"Compress offload device binaries (HIP only)">;
+def no_offload_compress : Flag<["--"], "no-offload-compress">;
+
def cuda_include_ptx_EQ : Joined<["--"], "cuda-include-ptx=">, Flags<[NoXarchOption]>,
HelpText<"Include PTX for the following GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">;
def no_cuda_include_ptx_EQ : Joined<["--"], "no-cuda-include-ptx=">, Flags<[NoXarchOption]>,
diff --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp
index 29250c49eb0c..69e14679da1c 100644
--- a/clang/lib/Driver/OffloadBundler.cpp
+++ b/clang/lib/Driver/OffloadBundler.cpp
@@ -21,24 +21,29 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Magic.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/ArchiveWriter.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compression.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MD5.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/StringSaver.h"
+#include "llvm/Support/Timer.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/Host.h"
@@ -48,6 +53,7 @@
#include <cstddef>
#include <cstdint>
#include <forward_list>
+#include <llvm/Support/Process.h>
#include <memory>
#include <set>
#include <string>
@@ -58,6 +64,10 @@ using namespace llvm;
using namespace llvm::object;
using namespace clang;
+static llvm::TimerGroup
+ ClangOffloadBundlerTimerGroup("Clang Offload Bundler Timer Group",
+ "Timer group for clang offload bundler");
+
/// Magic string that marks the existence of offloading data.
#define OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__"
@@ -229,20 +239,22 @@ public:
/// Write the header of the bundled file to \a OS based on the information
/// gathered from \a Inputs.
- virtual Error WriteHeader(raw_fd_ostream &OS,
+ virtual Error WriteHeader(raw_ostream &OS,
ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) = 0;
/// Write the marker that initiates a bundle for the triple \a TargetTriple to
/// \a OS.
- virtual Error WriteBundleStart(raw_fd_ostream &OS,
- StringRef TargetTriple) = 0;
+ virtual Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) = 0;
/// Write the marker that closes a bundle for the triple \a TargetTriple to \a
/// OS.
- virtual Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) = 0;
+ virtual Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) = 0;
/// Write the bundle from \a Input into \a OS.
- virtual Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) = 0;
+ virtual Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) = 0;
+
+ /// Finalize output file.
+ virtual Error finalizeOutputFile() { return Error::success(); }
/// List bundle IDs in \a Input.
virtual Error listBundleIDs(MemoryBuffer &Input) {
@@ -330,7 +342,7 @@ static uint64_t Read8byteIntegerFromBuffer(StringRef Buffer, size_t pos) {
}
/// Write 8-byte integers to a buffer in little-endian format.
-static void Write8byteIntegerToBuffer(raw_fd_ostream &OS, uint64_t Val) {
+static void Write8byteIntegerToBuffer(raw_ostream &OS, uint64_t Val) {
llvm::support::endian::write(OS, Val, llvm::support::little);
}
@@ -378,8 +390,7 @@ public:
return Error::success();
// Check if no magic was found.
- StringRef Magic(FC.data(), sizeof(OFFLOAD_BUNDLER_MAGIC_STR) - 1);
- if (!Magic.equals(OFFLOAD_BUNDLER_MAGIC_STR))
+ if (llvm::identify_magic(FC) != llvm::file_magic::offload_bundle)
return Error::success();
// Read number of bundles.
@@ -454,7 +465,7 @@ public:
return Error::success();
}
- Error WriteHeader(raw_fd_ostream &OS,
+ Error WriteHeader(raw_ostream &OS,
ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) final {
// Compute size of the header.
@@ -491,19 +502,27 @@ public:
return Error::success();
}
- Error WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) final {
+ Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) final {
CurWriteBundleTarget = TargetTriple.str();
return Error::success();
}
- Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) final {
+ Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) final {
return Error::success();
}
- Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final {
+ Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) final {
auto BI = BundlesInfo[CurWriteBundleTarget];
- OS.seek(BI.Offset);
+
+ // Pad with 0 to reach specified offset.
+ size_t CurrentPos = OS.tell();
+ size_t PaddingSize = BI.Offset > CurrentPos ? BI.Offset - CurrentPos : 0;
+ for (size_t I = 0; I < PaddingSize; ++I)
+ OS.write('\0');
+ assert(OS.tell() == BI.Offset);
+
OS.write(Input.getBufferStart(), Input.getBufferSize());
+
return Error::success();
}
};
@@ -560,7 +579,7 @@ class ObjectFileHandler final : public FileHandler {
return NameOrErr.takeError();
// If it does not start with the reserved suffix, just skip this section.
- if (!NameOrErr->startswith(OFFLOAD_BUNDLER_MAGIC_STR))
+ if (llvm::identify_magic(*NameOrErr) != llvm::file_magic::offload_bundle)
return std::nullopt;
// Return the triple that is right after the reserved prefix.
@@ -625,7 +644,7 @@ public:
return Error::success();
}
- Error WriteHeader(raw_fd_ostream &OS,
+ Error WriteHeader(raw_ostream &OS,
ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) final {
assert(BundlerConfig.HostInputIndex != ~0u &&
"Host input index not defined.");
@@ -635,12 +654,16 @@ public:
return Error::success();
}
- Error WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) final {
+ Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) final {
++NumberOfProcessedInputs;
return Error::success();
}
- Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) final {
+ Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) final {
+ return Error::success();
+ }
+
+ Error finalizeOutputFile() final {
assert(NumberOfProcessedInputs <= NumberOfInputs &&
"Processing more inputs that actually exist!");
assert(BundlerConfig.HostInputIndex != ~0u &&
@@ -658,10 +681,6 @@ public:
assert(BundlerConfig.ObjcopyPath != "" &&
"llvm-objcopy path not specified");
- // We write to the output file directly. So, we close it and use the name
- // to pass down to llvm-objcopy.
- OS.close();
-
// Temporary files that need to be removed.
TempFileHandlerRAII TempFiles;
@@ -702,7 +721,7 @@ public:
return Error::success();
}
- Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final {
+ Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) final {
return Error::success();
}
@@ -799,22 +818,22 @@ protected:
return Error::success();
}
- Error WriteHeader(raw_fd_ostream &OS,
+ Error WriteHeader(raw_ostream &OS,
ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) final {
return Error::success();
}
- Error WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) final {
+ Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) final {
OS << BundleStartString << TargetTriple << "\n";
return Error::success();
}
- Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) final {
+ Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) final {
OS << BundleEndString << TargetTriple << "\n";
return Error::success();
}
- Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final {
+ Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) final {
OS << Input.getBuffer();
return Error::success();
}
@@ -899,6 +918,187 @@ CreateFileHandler(MemoryBuffer &FirstInput,
"'" + FilesType + "': invalid file type specified");
}
+OffloadBundlerConfig::OffloadBundlerConfig() {
+ auto IgnoreEnvVarOpt =
+ llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_IGNORE_ENV_VAR");
+ if (IgnoreEnvVarOpt.has_value() && IgnoreEnvVarOpt.value() == "1")
+ return;
+
+ auto VerboseEnvVarOpt = llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_VERBOSE");
+ if (VerboseEnvVarOpt.has_value())
+ Verbose = VerboseEnvVarOpt.value() == "1";
+
+ auto CompressEnvVarOpt =
+ llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_COMPRESS");
+ if (CompressEnvVarOpt.has_value())
+ Compress = CompressEnvVarOpt.value() == "1";
+}
+
+llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input,
+ bool Verbose) {
+ llvm::Timer HashTimer("Hash Calculation Timer", "Hash calculation time",
+ ClangOffloadBundlerTimerGroup);
+ if (Verbose)
+ HashTimer.startTimer();
+ llvm::MD5 Hash;
+ llvm::MD5::MD5Result Result;
+ Hash.update(Input.getBuffer());
+ Hash.final(Result);
+ uint64_t TruncatedHash = Result.low();
+ if (Verbose)
+ HashTimer.stopTimer();
+
+ SmallVector<uint8_t, 0> CompressedBuffer;
+ auto BufferUint8 = llvm::ArrayRef<uint8_t>(
+ reinterpret_cast<const uint8_t *>(Input.getBuffer().data()),
+ Input.getBuffer().size());
+
+ llvm::compression::Format CompressionFormat;
+
+ if (llvm::compression::zstd::isAvailable())
+ CompressionFormat = llvm::compression::Format::Zstd;
+ else if (llvm::compression::zlib::isAvailable())
+ CompressionFormat = llvm::compression::Format::Zlib;
+ else
+ return createStringError(llvm::inconvertibleErrorCode(),
+ "Compression not supported");
+
+ llvm::Timer CompressTimer("Compression Timer", "Compression time",
+ ClangOffloadBundlerTimerGroup);
+ if (Verbose)
+ CompressTimer.startTimer();
+ llvm::compression::compress(CompressionFormat, BufferUint8, CompressedBuffer);
+ if (Verbose)
+ CompressTimer.stopTimer();
+
+ uint16_t CompressionMethod = static_cast<uint16_t>(CompressionFormat);
+ uint32_t UncompressedSize = Input.getBuffer().size();
+
+ SmallVector<char, 0> FinalBuffer;
+ llvm::raw_svector_ostream OS(FinalBuffer);
+ OS << MagicNumber;
+ OS.write(reinterpret_cast<const char *>(&Version), sizeof(Version));
+ OS.write(reinterpret_cast<const char *>(&CompressionMethod),
+ sizeof(CompressionMethod));
+ OS.write(reinterpret_cast<const char *>(&UncompressedSize),
+ sizeof(UncompressedSize));
+ OS.write(reinterpret_cast<const char *>(&TruncatedHash),
+ sizeof(TruncatedHash));
+ OS.write(reinterpret_cast<const char *>(CompressedBuffer.data()),
+ CompressedBuffer.size());
+
+ if (Verbose) {
+ auto MethodUsed =
+ CompressionFormat == llvm::compression::Format::Zstd ? "zstd" : "zlib";
+ llvm::errs() << "Compressed bundle format version: " << Version << "\n"
+ << "Compression method used: " << MethodUsed << "\n"
+ << "Binary size before compression: " << UncompressedSize
+ << " bytes\n"
+ << "Binary size after compression: " << CompressedBuffer.size()
+ << " bytes\n"
+ << "Truncated MD5 hash: "
+ << llvm::format_hex(TruncatedHash, 16) << "\n";
+ }
+
+ return llvm::MemoryBuffer::getMemBufferCopy(
+ llvm::StringRef(FinalBuffer.data(), FinalBuffer.size()));
+}
+
+llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
+ bool Verbose) {
+
+ StringRef Blob = Input.getBuffer();
+
+ if (Blob.size() < HeaderSize) {
+ return llvm::MemoryBuffer::getMemBufferCopy(Blob);
+ }
+ if (llvm::identify_magic(Blob) !=
+ llvm::file_magic::offload_bundle_compressed) {
+ if (Verbose)
+ llvm::errs() << "Uncompressed bundle.\n";
+ return llvm::MemoryBuffer::getMemBufferCopy(Blob);
+ }
+
+ uint16_t ThisVersion;
+ uint16_t CompressionMethod;
+ uint32_t UncompressedSize;
+ uint64_t StoredHash;
+ memcpy(&ThisVersion, Input.getBuffer().data() + MagicNumber.size(),
+ sizeof(uint16_t));
+ memcpy(&CompressionMethod, Blob.data() + MagicSize + VersionFieldSize,
+ sizeof(uint16_t));
+ memcpy(&UncompressedSize,
+ Blob.data() + MagicSize + VersionFieldSize + MethodFieldSize,
+ sizeof(uint32_t));
+ memcpy(&StoredHash,
+ Blob.data() + MagicSize + VersionFieldSize + MethodFieldSize +
+ SizeFieldSize,
+ sizeof(uint64_t));
+
+ llvm::compression::Format CompressionFormat;
+ if (CompressionMethod ==
+ static_cast<uint16_t>(llvm::compression::Format::Zlib))
+ CompressionFormat = llvm::compression::Format::Zlib;
+ else if (CompressionMethod ==
+ static_cast<uint16_t>(llvm::compression::Format::Zstd))
+ CompressionFormat = llvm::compression::Format::Zstd;
+ else
+ return createStringError(inconvertibleErrorCode(),
+ "Unknown compressing method");
+
+ llvm::Timer DecompressTimer("Decompression Timer", "Decompression time",
+ ClangOffloadBundlerTimerGroup);
+ if (Verbose)
+ DecompressTimer.startTimer();
+
+ SmallVector<uint8_t, 0> DecompressedData;
+ StringRef CompressedData = Blob.substr(HeaderSize);
+ if (llvm::Error DecompressionError = llvm::compression::decompress(
+ CompressionFormat, llvm::arrayRefFromStringRef(CompressedData),
+ DecompressedData, UncompressedSize))
+ return createStringError(inconvertibleErrorCode(),
+ "Could not decompress embedded file contents: " +
+ llvm::toString(std::move(DecompressionError)));
+
+ if (Verbose) {
+ DecompressTimer.stopTimer();
+
+ // Recalculate MD5 hash
+ llvm::Timer HashRecalcTimer("Hash Recalculation Timer",
+ "Hash recalculation time",
+ ClangOffloadBundlerTimerGroup);
+ HashRecalcTimer.startTimer();
+ llvm::MD5 Hash;
+ llvm::MD5::MD5Result Result;
+ Hash.update(llvm::ArrayRef<uint8_t>(DecompressedData.data(),
+ DecompressedData.size()));
+ Hash.final(Result);
+ uint64_t RecalculatedHash = Result.low();
+ HashRecalcTimer.stopTimer();
+ bool HashMatch = (StoredHash == RecalculatedHash);
+
+ llvm::errs() << "Compressed bundle format version: " << ThisVersion << "\n"
+ << "Decompression method: "
+ << (CompressionFormat == llvm::compression::Format::Zlib
+ ? "zlib"
+ : "zstd")
+ << "\n"
+ << "Size before decompression: " << CompressedData.size()
+ << " bytes\n"
+ << "Size after decompression: " << UncompressedSize
+ << " bytes\n"
+ << "Stored hash: " << llvm::format_hex(StoredHash, 16) << "\n"
+ << "Recalculated hash: "
+ << llvm::format_hex(RecalculatedHash, 16) << "\n"
+ << "Hashes match: " << (HashMatch ? "Yes" : "No") << "\n";
+ }
+
+ return llvm::MemoryBuffer::getMemBufferCopy(
+ llvm::toStringRef(DecompressedData));
+}
+
// List bundle IDs. Return true if an error was found.
Error OffloadBundler::ListBundleIDsInFile(
StringRef InputFileName, const OffloadBundlerConfig &BundlerConfig) {
@@ -908,28 +1108,35 @@ Error OffloadBundler::ListBundleIDsInFile(
if (std::error_code EC = CodeOrErr.getError())
return createFileError(InputFileName, EC);
- MemoryBuffer &Input = **CodeOrErr;
+ // Decompress the input if necessary.
+ Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
+ CompressedOffloadBundle::decompress(**CodeOrErr, BundlerConfig.Verbose);
+ if (!DecompressedBufferOrErr)
+ return createStringError(
+ inconvertibleErrorCode(),
+ "Failed to decompress input: " +
+ llvm::toString(DecompressedBufferOrErr.takeError()));
+
+ MemoryBuffer &DecompressedInput = **DecompressedBufferOrErr;
// Select the right files handler.
Expected<std::unique_ptr<FileHandler>> FileHandlerOrErr =
- CreateFileHandler(Input, BundlerConfig);
+ CreateFileHandler(DecompressedInput, BundlerConfig);
if (!FileHandlerOrErr)
return FileHandlerOrErr.takeError();
std::unique_ptr<FileHandler> &FH = *FileHandlerOrErr;
assert(FH);
- return FH->listBundleIDs(Input);
+ return FH->listBundleIDs(DecompressedInput);
}
/// Bundle the files. Return true if an error was found.
Error OffloadBundler::BundleFiles() {
std::error_code EC;
- // Create output file.
- raw_fd_ostream OutputFile(BundlerConfig.OutputFileNames.front(), EC,
- sys::fs::OF_None);
- if (EC)
- return createFileError(BundlerConfig.OutputFileNames.front(), EC);
+ // Create a buffer to hold the content before compressing.
+ SmallVector<char, 0> Buffer;
+ llvm::raw_svector_ostream BufferStream(Buffer);
// Open input files.
SmallVector<std::unique_ptr<MemoryBuffer>, 8u> InputBuffers;
@@ -956,22 +1163,46 @@ Error OffloadBundler::BundleFiles() {
assert(FH);
// Write header.
- if (Error Err = FH->WriteHeader(OutputFile, InputBuffers))
+ if (Error Err = FH->WriteHeader(BufferStream, InputBuffers))
return Err;
// Write all bundles along with the start/end markers. If an error was found
// writing the end of the bundle component, abort the bundle writing.
auto Input = InputBuffers.begin();
for (auto &Triple : BundlerConfig.TargetNames) {
- if (Error Err = FH->WriteBundleStart(OutputFile, Triple))
+ if (Error Err = FH->WriteBundleStart(BufferStream, Triple))
return Err;
- if (Error Err = FH->WriteBundle(OutputFile, **Input))
+ if (Error Err = FH->WriteBundle(BufferStream, **Input))
return Err;
- if (Error Err = FH->WriteBundleEnd(OutputFile, Triple))
+ if (Error Err = FH->WriteBundleEnd(BufferStream, Triple))
return Err;
++Input;
}
- return Error::success();
+
+ raw_fd_ostream OutputFile(BundlerConfig.OutputFileNames.front(), EC,
+ sys::fs::OF_None);
+ if (EC)
+ return createFileError(BundlerConfig.OutputFileNames.front(), EC);
+
+ SmallVector<char, 0> CompressedBuffer;
+ if (BundlerConfig.Compress) {
+ std::unique_ptr<llvm::MemoryBuffer> BufferMemory =
+ llvm::MemoryBuffer::getMemBufferCopy(
+ llvm::StringRef(Buffer.data(), Buffer.size()));
+ auto CompressionResult =
+ CompressedOffloadBundle::compress(*BufferMemory, BundlerConfig.Verbose);
+ if (auto Error = CompressionResult.takeError())
+ return Error;
+
+ auto CompressedMemBuffer = std::move(CompressionResult.get());
+ CompressedBuffer.assign(CompressedMemBuffer->getBufferStart(),
+ CompressedMemBuffer->getBufferEnd());
+ } else
+ CompressedBuffer = Buffer;
+
+ OutputFile.write(CompressedBuffer.data(), CompressedBuffer.size());
+
+ return FH->finalizeOutputFile();
}
// Unbundle the files. Return true if an error was found.
@@ -982,7 +1213,16 @@ Error OffloadBundler::UnbundleFiles() {
if (std::error_code EC = CodeOrErr.getError())
return createFileError(BundlerConfig.InputFileNames.front(), EC);
- MemoryBuffer &Input = **CodeOrErr;
+ // Decompress the input if necessary.
+ Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
+ CompressedOffloadBundle::decompress(**CodeOrErr, BundlerConfig.Verbose);
+ if (!DecompressedBufferOrErr)
+ return createStringError(
+ inconvertibleErrorCode(),
+ "Failed to decompress input: " +
+ llvm::toString(DecompressedBufferOrErr.takeError()));
+
+ MemoryBuffer &Input = **DecompressedBufferOrErr;
// Select the right files handler.
Expected<std::unique_ptr<FileHandler>> FileHandlerOrErr =
@@ -1357,22 +1597,34 @@ Error OffloadBundler::UnbundleArchive() {
if (!CodeObjectBufferRefOrErr)
return CodeObjectBufferRefOrErr.takeError();
- auto CodeObjectBuffer =
+ auto TempCodeObjectBuffer =
MemoryBuffer::getMemBuffer(*CodeObjectBufferRefOrErr, false);
+ // Decompress the buffer if necessary.
+ Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
+ CompressedOffloadBundle::decompress(*TempCodeObjectBuffer,
+ BundlerConfig.Verbose);
+ if (!DecompressedBufferOrErr)
+ return createStringError(
+ inconvertibleErrorCode(),
+ "Failed to decompress code object: " +
+ llvm::toString(DecompressedBufferOrErr.takeError()));
+
+ MemoryBuffer &CodeObjectBuffer = **DecompressedBufferOrErr;
+
Expected<std::unique_ptr<FileHandler>> FileHandlerOrErr =
- CreateFileHandler(*CodeObjectBuffer, BundlerConfig);
+ CreateFileHandler(CodeObjectBuffer, BundlerConfig);
if (!FileHandlerOrErr)
return FileHandlerOrErr.takeError();
std::unique_ptr<FileHandler> &FileHandler = *FileHandlerOrErr;
assert(FileHandler);
- if (Error ReadErr = FileHandler->ReadHeader(*CodeObjectBuffer))
+ if (Error ReadErr = FileHandler->ReadHeader(CodeObjectBuffer))
return ReadErr;
Expected<std::optional<StringRef>> CurBundleIDOrErr =
- FileHandler->ReadBundleStart(*CodeObjectBuffer);
+ FileHandler->ReadBundleStart(CodeObjectBuffer);
if (!CurBundleIDOrErr)
return CurBundleIDOrErr.takeError();
@@ -1393,7 +1645,7 @@ Error OffloadBundler::UnbundleArchive() {
BundlerConfig)) {
std::string BundleData;
raw_string_ostream DataStream(BundleData);
- if (Error Err = FileHandler->ReadBundle(DataStream, *CodeObjectBuffer))
+ if (Error Err = FileHandler->ReadBundle(DataStream, CodeObjectBuffer))
return Err;
for (auto &CompatibleTarget : CompatibleTargets) {
@@ -1431,11 +1683,11 @@ Error OffloadBundler::UnbundleArchive() {
}
}
- if (Error Err = FileHandler->ReadBundleEnd(*CodeObjectBuffer))
+ if (Error Err = FileHandler->ReadBundleEnd(CodeObjectBuffer))
return Err;
Expected<std::optional<StringRef>> NextTripleOrErr =
- FileHandler->ReadBundleStart(*CodeObjectBuffer);
+ FileHandler->ReadBundleStart(CodeObjectBuffer);
if (!NextTripleOrErr)
return NextTripleOrErr.takeError();
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 9e1f07d78bf1..274875c631c5 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -8400,6 +8400,11 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
}
CmdArgs.push_back(TCArgs.MakeArgString(UB));
}
+ if (TCArgs.hasFlag(options::OPT_offload_compress,
+ options::OPT_no_offload_compress, false))
+ CmdArgs.push_back("-compress");
+ if (TCArgs.hasArg(options::OPT_v))
+ CmdArgs.push_back("-verbose");
// All the inputs are encoded as commands.
C.addCommand(std::make_unique<Command>(
JA, *this, ResponseFileSupport::None(),
@@ -8494,6 +8499,8 @@ void OffloadBundler::ConstructJobMultipleOutputs(
}
CmdArgs.push_back("-unbundle");
CmdArgs.push_back("-allow-missing-bundles");
+ if (TCArgs.hasArg(options::OPT_v))
+ CmdArgs.push_back("-verbose");
// All the inputs are encoded as commands.
C.addCommand(std::make_unique<Command>(
diff --git a/clang/lib/Driver/ToolChains/HIPUtility.cpp b/clang/lib/Driver/ToolChains/HIPUtility.cpp
index 8b9d8db90ffa..04efdcba20ea 100644
--- a/clang/lib/Driver/ToolChains/HIPUtility.cpp
+++ b/clang/lib/Driver/ToolChains/HIPUtility.cpp
@@ -84,6 +84,12 @@ void HIP::constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
Args.MakeArgString(std::string("-output=").append(Output));
BundlerArgs.push_back(BundlerOutputArg);
+ if (Args.hasFlag(options::OPT_offload_compress,
+ options::OPT_no_offload_compress, false))
+ BundlerArgs.push_back("-compress");
+ if (Args.hasArg(options::OPT_v))
+ BundlerArgs.push_back("-verbose");
+
const char *Bundler = Args.MakeArgString(
T.getToolChain().GetProgramPath("clang-offload-bundler"));
C.addCommand(std::make_unique<Command>(
diff --git a/clang/test/Driver/clang-offload-bundler-zlib.c b/clang/test/Driver/clang-offload-bundler-zlib.c
new file mode 100644
index 000000000000..c46c32a4a053
--- /dev/null
+++ b/clang/test/Driver/clang-offload-bundler-zlib.c
@@ -0,0 +1,75 @@
+// REQUIRES: zlib
+// REQUIRES: x86-registered-target
+// UNSUPPORTED: target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}}
+
+//
+// Generate the host binary to be bundled.
+//
+// RUN: %clang -O0 -target %itanium_abi_triple %s -c -emit-llvm -o %t.bc
+
+//
+// Generate an empty file to help with the checks of empty files.
+//
+// RUN: touch %t.empty
+
+//
+// Generate device binaries to be bundled.
+//
+// RUN: echo 'Content of device file 1' > %t.tgt1
+// RUN: echo 'Content of device file 2' > %t.tgt2
+
+//
+// Check compression/decompression of offload bundle.
+//
+// RUN: env OFFLOAD_BUNDLER_COMPRESS=1 OFFLOAD_BUNDLER_VERBOSE=1 \
+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc 2>&1 | \
+// RUN: FileCheck -check-prefix=COMPRESS %s
+// RUN: clang-offload-bundler -type=bc -list -input=%t.hip.bundle.bc | FileCheck -check-prefix=NOHOST %s
+// RUN: env OFFLOAD_BUNDLER_VERBOSE=1 \
+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN: -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle 2>&1 | \
+// RUN: FileCheck -check-prefix=DECOMPRESS %s
+// RUN: diff %t.tgt1 %t.res.tgt1
+// RUN: diff %t.tgt2 %t.res.tgt2
+
+//
+// COMPRESS: Compression method used:
+// DECOMPRESS: Decompression method:
+// NOHOST-NOT: host-
+// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900
+// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906
+//
+
+//
+// Check -bundle-align option.
+//
+
+// RUN: clang-offload-bundler -bundle-align=4096 -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -input=%t.bc -input=%t.tgt1 -input=%t.tgt2 -output=%t.bundle3.bc -compress
+// RUN: clang-offload-bundler -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -output=%t.res.bc -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.bundle3.bc -unbundle
+// RUN: diff %t.bc %t.res.bc
+// RUN: diff %t.tgt1 %t.res.tgt1
+// RUN: diff %t.tgt2 %t.res.tgt2
+
+//
+// Check unbundling archive.
+//
+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle1.bc -compress
+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle2.bc -compress
+// RUN: llvm-ar cr %T/hip_archive.a %T/hip_bundle1.bc %T/hip_bundle2.bc
+// RUN: clang-offload-bundler -unbundle -type=a -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN: -output=%T/hip_900.a -output=%T/hip_906.a -input=%T/hip_archive.a
+// RUN: llvm-ar t %T/hip_900.a | FileCheck -check-prefix=HIP-AR-900 %s
+// RUN: llvm-ar t %T/hip_906.a | FileCheck -check-prefix=HIP-AR-906 %s
+// HIP-AR-900-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx900
+// HIP-AR-900-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx900
+// HIP-AR-906-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx906
+// HIP-AR-906-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx906
+
+// Some code so that we can create a binary out of this file.
+int A = 0;
+void test_func(void) {
+ ++A;
+}
diff --git a/clang/test/Driver/clang-offload-bundler-zstd.c b/clang/test/Driver/clang-offload-bundler-zstd.c
new file mode 100644
index 000000000000..b2b588b72d4d
--- /dev/null
+++ b/clang/test/Driver/clang-offload-bundler-zstd.c
@@ -0,0 +1,72 @@
+// REQUIRES: zstd
+// REQUIRES: x86-registered-target
+// UNSUPPORTED: target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}}
+
+//
+// Generate the host binary to be bundled.
+//
+// RUN: %clang -O0 -target %itanium_abi_triple %s -c -emit-llvm -o %t.bc
+
+//
+// Generate an empty file to help with the checks of empty files.
+//
+// RUN: touch %t.empty
+
+//
+// Generate device binaries to be bundled.
+//
+// RUN: echo 'Content of device file 1' > %t.tgt1
+// RUN: echo 'Content of device file 2' > %t.tgt2
+
+//
+// Check compression/decompression of offload bundle.
+//
+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc -compress -verbose 2>&1 | \
+// RUN: FileCheck -check-prefix=COMPRESS %s
+// RUN: clang-offload-bundler -type=bc -list -input=%t.hip.bundle.bc | FileCheck -check-prefix=NOHOST %s
+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN: -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle -verbose 2>&1 | \
+// RUN: FileCheck -check-prefix=DECOMPRESS %s
+// RUN: diff %t.tgt1 %t.res.tgt1
+// RUN: diff %t.tgt2 %t.res.tgt2
+//
+// COMPRESS: Compression method used
+// DECOMPRESS: Decompression method
+// NOHOST-NOT: host-
+// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900
+// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906
+//
+
+//
+// Check -bundle-align option.
+//
+
+// RUN: clang-offload-bundler -bundle-align=4096 -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -input=%t.bc -input=%t.tgt1 -input=%t.tgt2 -output=%t.bundle3.bc -compress
+// RUN: clang-offload-bundler -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -output=%t.res.bc -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.bundle3.bc -unbundle
+// RUN: diff %t.bc %t.res.bc
+// RUN: diff %t.tgt1 %t.res.tgt1
+// RUN: diff %t.tgt2 %t.res.tgt2
+
+//
+// Check unbundling archive.
+//
+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle1.bc -compress
+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle2.bc -compress
+// RUN: llvm-ar cr %T/hip_archive.a %T/hip_bundle1.bc %T/hip_bundle2.bc
+// RUN: clang-offload-bundler -unbundle -type=a -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN: -output=%T/hip_900.a -output=%T/hip_906.a -input=%T/hip_archive.a
+// RUN: llvm-ar t %T/hip_900.a | FileCheck -check-prefix=HIP-AR-900 %s
+// RUN: llvm-ar t %T/hip_906.a | FileCheck -check-prefix=HIP-AR-906 %s
+// HIP-AR-900-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx900
+// HIP-AR-900-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx900
+// HIP-AR-906-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx906
+// HIP-AR-906-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx906
+
+// Some code so that we can create a binary out of this file.
+int A = 0;
+void test_func(void) {
+ ++A;
+}
diff --git a/clang/test/Driver/hip-offload-compress-zlib.hip b/clang/test/Driver/hip-offload-compress-zlib.hip
new file mode 100644
index 000000000000..a29b6d037350
--- /dev/null
+++ b/clang/test/Driver/hip-offload-compress-zlib.hip
@@ -0,0 +1,45 @@
+// REQUIRES: zlib
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// Test compress bundled bitcode.
+
+// RUN: rm -rf %T/a.bc
+// RUN: %clang -c -v --target=x86_64-linux-gnu \
+// RUN: -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
+// RUN: -fgpu-rdc -nogpuinc -nogpulib \
+// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN: --offload-compress --offload-device-only --gpu-bundle-output \
+// RUN: -o %T/a.bc \
+// RUN: 2>&1 | FileCheck %s
+
+// CHECK: clang-offload-bundler{{.*}} -type=bc
+// CHECK-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
+// CHECK-SAME: -compress -verbose
+// CHECK: Compressed bundle format
+
+// Test uncompress of bundled bitcode.
+
+// RUN: %clang --hip-link -### -v --target=x86_64-linux-gnu \
+// RUN: --offload-arch=gfx1100 --offload-arch=gfx1101 \
+// RUN: -fgpu-rdc -nogpulib \
+// RUN: %T/a.bc --offload-device-only \
+// RUN: 2>&1 | FileCheck -check-prefix=UNBUNDLE %s
+
+// UNBUNDLE: clang-offload-bundler{{.*}} "-type=bc"
+// UNBUNDLE-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
+// UNBUNDLE-SAME: -unbundle
+// UNBUNDLE-SAME: -verbose
+
+// Test compress bundled code objects.
+
+// RUN: %clang -c -### -v --target=x86_64-linux-gnu \
+// RUN: -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
+// RUN: -nogpuinc -nogpulib \
+// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN: --offload-compress \
+// RUN: 2>&1 | FileCheck -check-prefix=CO %s
+
+// CO: clang-offload-bundler{{.*}} "-type=o"
+// CO-SAME: -targets={{.*}}hipv4-amdgcn-amd-amdhsa--gfx1100,hipv4-amdgcn-amd-amdhsa--gfx1101
+// CO-SAME: "-compress" "-verbose"
diff --git a/clang/test/Driver/hip-offload-compress-zstd.hip b/clang/test/Driver/hip-offload-compress-zstd.hip
new file mode 100644
index 000000000000..688c2c85329c
--- /dev/null
+++ b/clang/test/Driver/hip-offload-compress-zstd.hip
@@ -0,0 +1,45 @@
+// REQUIRES: zstd
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// Test compress bundled bitcode.
+
+// RUN: rm -rf %T/a.bc
+// RUN: %clang -c -v --target=x86_64-linux-gnu \
+// RUN: -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
+// RUN: -fgpu-rdc -nogpuinc -nogpulib \
+// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN: --offload-compress --offload-device-only --gpu-bundle-output \
+// RUN: -o %T/a.bc \
+// RUN: 2>&1 | FileCheck %s
+
+// CHECK: clang-offload-bundler{{.*}} -type=bc
+// CHECK-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
+// CHECK-SAME: -compress -verbose
+// CHECK: Compressed bundle format
+
+// Test uncompress of bundled bitcode.
+
+// RUN: %clang --hip-link -### -v --target=x86_64-linux-gnu \
+// RUN: --offload-arch=gfx1100 --offload-arch=gfx1101 \
+// RUN: -fgpu-rdc -nogpulib \
+// RUN: %T/a.bc --offload-device-only \
+// RUN: 2>&1 | FileCheck -check-prefix=UNBUNDLE %s
+
+// UNBUNDLE: clang-offload-bundler{{.*}} "-type=bc"
+// UNBUNDLE-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
+// UNBUNDLE-SAME: -unbundle
+// UNBUNDLE-SAME: -verbose
+
+// Test compress bundled code objects.
+
+// RUN: %clang -c -### -v --target=x86_64-linux-gnu \
+// RUN: -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
+// RUN: -nogpuinc -nogpulib \
+// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN: --offload-compress \
+// RUN: 2>&1 | FileCheck -check-prefix=CO %s
+
+// CO: clang-offload-bundler{{.*}} "-type=o"
+// CO-SAME: -targets={{.*}}hipv4-amdgcn-amd-amdhsa--gfx1100,hipv4-amdgcn-amd-amdhsa--gfx1101
+// CO-SAME: "-compress" "-verbose"
diff --git a/clang/tools/clang-offload-bundler/CMakeLists.txt b/clang/tools/clang-offload-bundler/CMakeLists.txt
index dabd82382cdf..dec2881589a5 100644
--- a/clang/tools/clang-offload-bundler/CMakeLists.txt
+++ b/clang/tools/clang-offload-bundler/CMakeLists.txt
@@ -1,4 +1,5 @@
set(LLVM_LINK_COMPONENTS
+ BinaryFormat
Object
Support
TargetParser
diff --git a/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp b/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
index c02b5854bded..68f29807b219 100644
--- a/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
+++ b/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
@@ -141,6 +141,11 @@ int main(int argc, const char **argv) {
cl::desc("Treat hip and hipv4 offload kinds as "
"compatible with openmp kind, and vice versa.\n"),
cl::init(false), cl::cat(ClangOffloadBundlerCategory));
+ cl::opt<bool> Compress("compress",
+ cl::desc("Compress output file when bundling.\n"),
+ cl::init(false), cl::cat(ClangOffloadBundlerCategory));
+ cl::opt<bool> Verbose("verbose", cl::desc("Print debug information.\n"),
+ cl::init(false), cl::cat(ClangOffloadBundlerCategory));
// Process commandline options and report errors
sys::PrintStackTraceOnErrorSignal(argv[0]);
@@ -169,6 +174,11 @@ int main(int argc, const char **argv) {
BundlerConfig.BundleAlignment = BundleAlignment;
BundlerConfig.FilesType = FilesType;
BundlerConfig.ObjcopyPath = "";
+ // Do not override the default value Compress and Verbose in BundlerConfig.
+ if (Compress.getNumOccurrences() > 0)
+ BundlerConfig.Compress = Compress;
+ if (Verbose.getNumOccurrences() > 0)
+ BundlerConfig.Verbose = Verbose;
BundlerConfig.TargetNames = TargetNames;
BundlerConfig.InputFileNames = InputFileNames;
diff --git a/llvm/include/llvm/BinaryFormat/Magic.h b/llvm/include/llvm/BinaryFormat/Magic.h
index 329c96f5c14c..a28710dcdfaf 100644
--- a/llvm/include/llvm/BinaryFormat/Magic.h
+++ b/llvm/include/llvm/BinaryFormat/Magic.h
@@ -42,19 +42,21 @@ struct file_magic {
macho_universal_binary, ///< Mach-O universal binary
macho_file_set, ///< Mach-O file set binary
minidump, ///< Windows minidump file
- coff_cl_gl_object, ///< Microsoft cl.exe's intermediate code file
- coff_object, ///< COFF object file
- coff_import_library, ///< COFF import library
- pecoff_executable, ///< PECOFF executable file
- windows_resource, ///< Windows compiled resource file (.res)
- xcoff_object_32, ///< 32-bit XCOFF object file
- xcoff_object_64, ///< 64-bit XCOFF object file
- wasm_object, ///< WebAssembly Object file
- pdb, ///< Windows PDB debug info file
- tapi_file, ///< Text-based Dynamic Library Stub file
- cuda_fatbinary, ///< CUDA Fatbinary object file
- offload_binary, ///< LLVM offload object file
- dxcontainer_object, ///< DirectX container file
+ coff_cl_gl_object, ///< Microsoft cl.exe's intermediate code file
+ coff_object, ///< COFF object file
+ coff_import_library, ///< COFF import library
+ pecoff_executable, ///< PECOFF executable file
+ windows_resource, ///< Windows compiled resource file (.res)
+ xcoff_object_32, ///< 32-bit XCOFF object file
+ xcoff_object_64, ///< 64-bit XCOFF object file
+ wasm_object, ///< WebAssembly Object file
+ pdb, ///< Windows PDB debug info file
+ tapi_file, ///< Text-based Dynamic Library Stub file
+ cuda_fatbinary, ///< CUDA Fatbinary object file
+ offload_binary, ///< LLVM offload object file
+ dxcontainer_object, ///< DirectX container file
+ offload_bundle, ///< Clang offload bundle file
+ offload_bundle_compressed, ///< Compressed clang offload bundle file
};
bool is_object() const { return V != unknown; }
diff --git a/llvm/lib/BinaryFormat/Magic.cpp b/llvm/lib/BinaryFormat/Magic.cpp
index aa84bc36bfde..76aa1a602aa8 100644
--- a/llvm/lib/BinaryFormat/Magic.cpp
+++ b/llvm/lib/BinaryFormat/Magic.cpp
@@ -87,6 +87,10 @@ file_magic llvm::identify_magic(StringRef Magic) {
if (startswith(Magic, "BC\xC0\xDE"))
return file_magic::bitcode;
break;
+ case 'C':
+ if (startswith(Magic, "CCOB"))
+ return file_magic::offload_bundle_compressed;
+ break;
case '!':
if (startswith(Magic, "!<arch>\n") || startswith(Magic, "!<thin>\n"))
return file_magic::archive;
@@ -246,6 +250,13 @@ file_magic llvm::identify_magic(StringRef Magic) {
return file_magic::coff_object;
break;
+ case '_': {
+ const char OBMagic[] = "__CLANG_OFFLOAD_BUNDLE__";
+ if (Magic.size() >= sizeof(OBMagic) && startswith(Magic, OBMagic))
+ return file_magic::offload_bundle;
+ break;
+ }
+
default:
break;
}
diff --git a/llvm/lib/Object/Binary.cpp b/llvm/lib/Object/Binary.cpp
index d18aed8b3b8c..0ee9f7fac448 100644
--- a/llvm/lib/Object/Binary.cpp
+++ b/llvm/lib/Object/Binary.cpp
@@ -87,6 +87,8 @@ Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer,
case file_magic::cuda_fatbinary:
case file_magic::coff_cl_gl_object:
case file_magic::dxcontainer_object:
+ case file_magic::offload_bundle:
+ case file_magic::offload_bundle_compressed:
// Unrecognized object file format.
return errorCodeToError(object_error::invalid_file_type);
case file_magic::offload_binary:
diff --git a/llvm/lib/Object/ObjectFile.cpp b/llvm/lib/Object/ObjectFile.cpp
index 56a1d09097d4..4cc95ea32f60 100644
--- a/llvm/lib/Object/ObjectFile.cpp
+++ b/llvm/lib/Object/ObjectFile.cpp
@@ -154,6 +154,8 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type,
case file_magic::cuda_fatbinary:
case file_magic::offload_binary:
case file_magic::dxcontainer_object:
+ case file_magic::offload_bundle:
+ case file_magic::offload_bundle_compressed:
return errorCodeToError(object_error::invalid_file_type);
case file_magic::tapi_file:
return errorCodeToError(object_error::invalid_file_type);
--
2.43.0