[HIP] add --offload-compression-level= option (#83605)
Added --offload-compression-level= option to clang and -compression-level= option to clang-offload-bundler for controlling compression level. Added support of long distance matching (LDM) for llvm::zstd which is off by default. Enable it for clang-offload-bundler by default since it improves compression rate in general. Change default compression level to 3 for zstd for clang-offload-bundler since it works well for bundle entry size from 1KB to 32MB, which should cover most of the clang-offload-bundler usage. Users can still specify compression level by -compression-level= option if necessary.
This commit is contained in:
parent
83fe0b1382
commit
124d0b787b
@ -17,6 +17,7 @@
|
||||
#ifndef LLVM_CLANG_DRIVER_OFFLOADBUNDLER_H
|
||||
#define LLVM_CLANG_DRIVER_OFFLOADBUNDLER_H
|
||||
|
||||
#include "llvm/Support/Compression.h"
|
||||
#include "llvm/Support/Error.h"
|
||||
#include "llvm/TargetParser/Triple.h"
|
||||
#include <llvm/Support/MemoryBuffer.h>
|
||||
@ -36,6 +37,8 @@ public:
|
||||
bool HipOpenmpCompatible = false;
|
||||
bool Compress = false;
|
||||
bool Verbose = false;
|
||||
llvm::compression::Format CompressionFormat;
|
||||
int CompressionLevel;
|
||||
|
||||
unsigned BundleAlignment = 1;
|
||||
unsigned HostInputIndex = ~0u;
|
||||
@ -116,7 +119,8 @@ private:
|
||||
|
||||
public:
|
||||
static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
|
||||
compress(const llvm::MemoryBuffer &Input, bool Verbose = false);
|
||||
compress(llvm::compression::Params P, const llvm::MemoryBuffer &Input,
|
||||
bool Verbose = false);
|
||||
static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
|
||||
decompress(const llvm::MemoryBuffer &Input, bool Verbose = false);
|
||||
};
|
||||
|
@ -1264,6 +1264,10 @@ def fno_gpu_sanitize : Flag<["-"], "fno-gpu-sanitize">, Group<f_Group>;
|
||||
def offload_compress : Flag<["--"], "offload-compress">,
|
||||
HelpText<"Compress offload device binaries (HIP only)">;
|
||||
def no_offload_compress : Flag<["--"], "no-offload-compress">;
|
||||
|
||||
def offload_compression_level_EQ : Joined<["--"], "offload-compression-level=">,
|
||||
Flags<[HelpHidden]>,
|
||||
HelpText<"Compression level for offload device binaries (HIP only)">;
|
||||
}
|
||||
|
||||
// CUDA options
|
||||
|
@ -924,6 +924,17 @@ CreateFileHandler(MemoryBuffer &FirstInput,
|
||||
}
|
||||
|
||||
OffloadBundlerConfig::OffloadBundlerConfig() {
|
||||
if (llvm::compression::zstd::isAvailable()) {
|
||||
CompressionFormat = llvm::compression::Format::Zstd;
|
||||
// Compression level 3 is usually sufficient for zstd since long distance
|
||||
// matching is enabled.
|
||||
CompressionLevel = 3;
|
||||
} else if (llvm::compression::zlib::isAvailable()) {
|
||||
CompressionFormat = llvm::compression::Format::Zlib;
|
||||
// Use default level for zlib since higher level does not have significant
|
||||
// improvement.
|
||||
CompressionLevel = llvm::compression::zlib::DefaultCompression;
|
||||
}
|
||||
auto IgnoreEnvVarOpt =
|
||||
llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_IGNORE_ENV_VAR");
|
||||
if (IgnoreEnvVarOpt.has_value() && IgnoreEnvVarOpt.value() == "1")
|
||||
@ -937,11 +948,41 @@ OffloadBundlerConfig::OffloadBundlerConfig() {
|
||||
llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_COMPRESS");
|
||||
if (CompressEnvVarOpt.has_value())
|
||||
Compress = CompressEnvVarOpt.value() == "1";
|
||||
|
||||
auto CompressionLevelEnvVarOpt =
|
||||
llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_COMPRESSION_LEVEL");
|
||||
if (CompressionLevelEnvVarOpt.has_value()) {
|
||||
llvm::StringRef CompressionLevelStr = CompressionLevelEnvVarOpt.value();
|
||||
int Level;
|
||||
if (!CompressionLevelStr.getAsInteger(10, Level))
|
||||
CompressionLevel = Level;
|
||||
else
|
||||
llvm::errs()
|
||||
<< "Warning: Invalid value for OFFLOAD_BUNDLER_COMPRESSION_LEVEL: "
|
||||
<< CompressionLevelStr.str() << ". Ignoring it.\n";
|
||||
}
|
||||
}
|
||||
|
||||
// Utility function to format numbers with commas
|
||||
static std::string formatWithCommas(unsigned long long Value) {
|
||||
std::string Num = std::to_string(Value);
|
||||
int InsertPosition = Num.length() - 3;
|
||||
while (InsertPosition > 0) {
|
||||
Num.insert(InsertPosition, ",");
|
||||
InsertPosition -= 3;
|
||||
}
|
||||
return Num;
|
||||
}
|
||||
|
||||
llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
|
||||
CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input,
|
||||
CompressedOffloadBundle::compress(llvm::compression::Params P,
|
||||
const llvm::MemoryBuffer &Input,
|
||||
bool Verbose) {
|
||||
if (!llvm::compression::zstd::isAvailable() &&
|
||||
!llvm::compression::zlib::isAvailable())
|
||||
return createStringError(llvm::inconvertibleErrorCode(),
|
||||
"Compression not supported");
|
||||
|
||||
llvm::Timer HashTimer("Hash Calculation Timer", "Hash calculation time",
|
||||
ClangOffloadBundlerTimerGroup);
|
||||
if (Verbose)
|
||||
@ -959,25 +1000,15 @@ CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input,
|
||||
reinterpret_cast<const uint8_t *>(Input.getBuffer().data()),
|
||||
Input.getBuffer().size());
|
||||
|
||||
llvm::compression::Format CompressionFormat;
|
||||
|
||||
if (llvm::compression::zstd::isAvailable())
|
||||
CompressionFormat = llvm::compression::Format::Zstd;
|
||||
else if (llvm::compression::zlib::isAvailable())
|
||||
CompressionFormat = llvm::compression::Format::Zlib;
|
||||
else
|
||||
return createStringError(llvm::inconvertibleErrorCode(),
|
||||
"Compression not supported");
|
||||
|
||||
llvm::Timer CompressTimer("Compression Timer", "Compression time",
|
||||
ClangOffloadBundlerTimerGroup);
|
||||
if (Verbose)
|
||||
CompressTimer.startTimer();
|
||||
llvm::compression::compress(CompressionFormat, BufferUint8, CompressedBuffer);
|
||||
llvm::compression::compress(P, BufferUint8, CompressedBuffer);
|
||||
if (Verbose)
|
||||
CompressTimer.stopTimer();
|
||||
|
||||
uint16_t CompressionMethod = static_cast<uint16_t>(CompressionFormat);
|
||||
uint16_t CompressionMethod = static_cast<uint16_t>(P.format);
|
||||
uint32_t UncompressedSize = Input.getBuffer().size();
|
||||
|
||||
SmallVector<char, 0> FinalBuffer;
|
||||
@ -995,17 +1026,29 @@ CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input,
|
||||
|
||||
if (Verbose) {
|
||||
auto MethodUsed =
|
||||
CompressionFormat == llvm::compression::Format::Zstd ? "zstd" : "zlib";
|
||||
P.format == llvm::compression::Format::Zstd ? "zstd" : "zlib";
|
||||
double CompressionRate =
|
||||
static_cast<double>(UncompressedSize) / CompressedBuffer.size();
|
||||
double CompressionTimeSeconds = CompressTimer.getTotalTime().getWallTime();
|
||||
double CompressionSpeedMBs =
|
||||
(UncompressedSize / (1024.0 * 1024.0)) / CompressionTimeSeconds;
|
||||
|
||||
llvm::errs() << "Compressed bundle format version: " << Version << "\n"
|
||||
<< "Compression method used: " << MethodUsed << "\n"
|
||||
<< "Binary size before compression: " << UncompressedSize
|
||||
<< " bytes\n"
|
||||
<< "Binary size after compression: " << CompressedBuffer.size()
|
||||
<< " bytes\n"
|
||||
<< "Compression level: " << P.level << "\n"
|
||||
<< "Binary size before compression: "
|
||||
<< formatWithCommas(UncompressedSize) << " bytes\n"
|
||||
<< "Binary size after compression: "
|
||||
<< formatWithCommas(CompressedBuffer.size()) << " bytes\n"
|
||||
<< "Compression rate: "
|
||||
<< llvm::format("%.2lf", CompressionRate) << "\n"
|
||||
<< "Compression ratio: "
|
||||
<< llvm::format("%.2lf%%", 100.0 / CompressionRate) << "\n"
|
||||
<< "Compression speed: "
|
||||
<< llvm::format("%.2lf MB/s", CompressionSpeedMBs) << "\n"
|
||||
<< "Truncated MD5 hash: "
|
||||
<< llvm::format_hex(TruncatedHash, 16) << "\n";
|
||||
}
|
||||
|
||||
return llvm::MemoryBuffer::getMemBufferCopy(
|
||||
llvm::StringRef(FinalBuffer.data(), FinalBuffer.size()));
|
||||
}
|
||||
@ -1070,7 +1113,10 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
|
||||
if (Verbose) {
|
||||
DecompressTimer.stopTimer();
|
||||
|
||||
// Recalculate MD5 hash
|
||||
double DecompressionTimeSeconds =
|
||||
DecompressTimer.getTotalTime().getWallTime();
|
||||
|
||||
// Recalculate MD5 hash for integrity check
|
||||
llvm::Timer HashRecalcTimer("Hash Recalculation Timer",
|
||||
"Hash recalculation time",
|
||||
ClangOffloadBundlerTimerGroup);
|
||||
@ -1084,16 +1130,27 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
|
||||
HashRecalcTimer.stopTimer();
|
||||
bool HashMatch = (StoredHash == RecalculatedHash);
|
||||
|
||||
double CompressionRate =
|
||||
static_cast<double>(UncompressedSize) / CompressedData.size();
|
||||
double DecompressionSpeedMBs =
|
||||
(UncompressedSize / (1024.0 * 1024.0)) / DecompressionTimeSeconds;
|
||||
|
||||
llvm::errs() << "Compressed bundle format version: " << ThisVersion << "\n"
|
||||
<< "Decompression method: "
|
||||
<< (CompressionFormat == llvm::compression::Format::Zlib
|
||||
? "zlib"
|
||||
: "zstd")
|
||||
<< "\n"
|
||||
<< "Size before decompression: " << CompressedData.size()
|
||||
<< " bytes\n"
|
||||
<< "Size after decompression: " << UncompressedSize
|
||||
<< " bytes\n"
|
||||
<< "Size before decompression: "
|
||||
<< formatWithCommas(CompressedData.size()) << " bytes\n"
|
||||
<< "Size after decompression: "
|
||||
<< formatWithCommas(UncompressedSize) << " bytes\n"
|
||||
<< "Compression rate: "
|
||||
<< llvm::format("%.2lf", CompressionRate) << "\n"
|
||||
<< "Compression ratio: "
|
||||
<< llvm::format("%.2lf%%", 100.0 / CompressionRate) << "\n"
|
||||
<< "Decompression speed: "
|
||||
<< llvm::format("%.2lf MB/s", DecompressionSpeedMBs) << "\n"
|
||||
<< "Stored hash: " << llvm::format_hex(StoredHash, 16) << "\n"
|
||||
<< "Recalculated hash: "
|
||||
<< llvm::format_hex(RecalculatedHash, 16) << "\n"
|
||||
@ -1287,8 +1344,10 @@ Error OffloadBundler::BundleFiles() {
|
||||
std::unique_ptr<llvm::MemoryBuffer> BufferMemory =
|
||||
llvm::MemoryBuffer::getMemBufferCopy(
|
||||
llvm::StringRef(Buffer.data(), Buffer.size()));
|
||||
auto CompressionResult =
|
||||
CompressedOffloadBundle::compress(*BufferMemory, BundlerConfig.Verbose);
|
||||
auto CompressionResult = CompressedOffloadBundle::compress(
|
||||
{BundlerConfig.CompressionFormat, BundlerConfig.CompressionLevel,
|
||||
/*zstdEnableLdm=*/true},
|
||||
*BufferMemory, BundlerConfig.Verbose);
|
||||
if (auto Error = CompressionResult.takeError())
|
||||
return Error;
|
||||
|
||||
|
@ -8529,7 +8529,6 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
}
|
||||
|
||||
// Begin OffloadBundler
|
||||
|
||||
void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
const InputInfo &Output,
|
||||
const InputInfoList &Inputs,
|
||||
@ -8627,11 +8626,7 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
}
|
||||
CmdArgs.push_back(TCArgs.MakeArgString(UB));
|
||||
}
|
||||
if (TCArgs.hasFlag(options::OPT_offload_compress,
|
||||
options::OPT_no_offload_compress, false))
|
||||
CmdArgs.push_back("-compress");
|
||||
if (TCArgs.hasArg(options::OPT_v))
|
||||
CmdArgs.push_back("-verbose");
|
||||
addOffloadCompressArgs(TCArgs, CmdArgs);
|
||||
// All the inputs are encoded as commands.
|
||||
C.addCommand(std::make_unique<Command>(
|
||||
JA, *this, ResponseFileSupport::None(),
|
||||
@ -8900,9 +8895,7 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
for (const char *LinkArg : LinkCommand->getArguments())
|
||||
CmdArgs.push_back(LinkArg);
|
||||
|
||||
if (Args.hasFlag(options::OPT_offload_compress,
|
||||
options::OPT_no_offload_compress, false))
|
||||
CmdArgs.push_back("--compress");
|
||||
addOffloadCompressArgs(Args, CmdArgs);
|
||||
|
||||
const char *Exec =
|
||||
Args.MakeArgString(getToolChain().GetProgramPath("clang-linker-wrapper"));
|
||||
|
@ -2863,3 +2863,15 @@ void tools::addOutlineAtomicsArgs(const Driver &D, const ToolChain &TC,
|
||||
CmdArgs.push_back("+outline-atomics");
|
||||
}
|
||||
}
|
||||
|
||||
void tools::addOffloadCompressArgs(const llvm::opt::ArgList &TCArgs,
|
||||
llvm::opt::ArgStringList &CmdArgs) {
|
||||
if (TCArgs.hasFlag(options::OPT_offload_compress,
|
||||
options::OPT_no_offload_compress, false))
|
||||
CmdArgs.push_back("-compress");
|
||||
if (TCArgs.hasArg(options::OPT_v))
|
||||
CmdArgs.push_back("-verbose");
|
||||
if (auto *Arg = TCArgs.getLastArg(options::OPT_offload_compression_level_EQ))
|
||||
CmdArgs.push_back(
|
||||
TCArgs.MakeArgString(Twine("-compression-level=") + Arg->getValue()));
|
||||
}
|
||||
|
@ -221,6 +221,8 @@ void addOutlineAtomicsArgs(const Driver &D, const ToolChain &TC,
|
||||
const llvm::opt::ArgList &Args,
|
||||
llvm::opt::ArgStringList &CmdArgs,
|
||||
const llvm::Triple &Triple);
|
||||
void addOffloadCompressArgs(const llvm::opt::ArgList &TCArgs,
|
||||
llvm::opt::ArgStringList &CmdArgs);
|
||||
|
||||
} // end namespace tools
|
||||
} // end namespace driver
|
||||
|
@ -7,6 +7,7 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "HIPUtility.h"
|
||||
#include "Clang.h"
|
||||
#include "CommonArgs.h"
|
||||
#include "clang/Driver/Compilation.h"
|
||||
#include "clang/Driver/Options.h"
|
||||
@ -258,11 +259,7 @@ void HIP::constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
|
||||
Args.MakeArgString(std::string("-output=").append(Output));
|
||||
BundlerArgs.push_back(BundlerOutputArg);
|
||||
|
||||
if (Args.hasFlag(options::OPT_offload_compress,
|
||||
options::OPT_no_offload_compress, false))
|
||||
BundlerArgs.push_back("-compress");
|
||||
if (Args.hasArg(options::OPT_v))
|
||||
BundlerArgs.push_back("-verbose");
|
||||
addOffloadCompressArgs(Args, BundlerArgs);
|
||||
|
||||
const char *Bundler = Args.MakeArgString(
|
||||
T.getToolChain().GetProgramPath("clang-offload-bundler"));
|
||||
|
@ -1,4 +1,4 @@
|
||||
// REQUIRES: zlib
|
||||
// REQUIRES: zlib && !zstd
|
||||
// REQUIRES: x86-registered-target
|
||||
// UNSUPPORTED: target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}}
|
||||
|
||||
@ -34,13 +34,28 @@
|
||||
// RUN: diff %t.tgt2 %t.res.tgt2
|
||||
|
||||
//
|
||||
// COMPRESS: Compression method used:
|
||||
// DECOMPRESS: Decompression method:
|
||||
// COMPRESS: Compression method used: zlib
|
||||
// COMPRESS: Compression level: 6
|
||||
// DECOMPRESS: Decompression method: zlib
|
||||
// DECOMPRESS: Hashes match: Yes
|
||||
// NOHOST-NOT: host-
|
||||
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900
|
||||
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906
|
||||
//
|
||||
|
||||
// Check -compression-level= option
|
||||
|
||||
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
|
||||
// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc -compress -verbose -compression-level=9 2>&1 | \
|
||||
// RUN: FileCheck -check-prefix=LEVEL %s
|
||||
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
|
||||
// RUN: -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle
|
||||
// RUN: diff %t.tgt1 %t.res.tgt1
|
||||
// RUN: diff %t.tgt2 %t.res.tgt2
|
||||
//
|
||||
// LEVEL: Compression method used: zlib
|
||||
// LEVEL: Compression level: 9
|
||||
|
||||
//
|
||||
// Check -bundle-align option.
|
||||
//
|
||||
|
@ -31,13 +31,28 @@
|
||||
// RUN: diff %t.tgt1 %t.res.tgt1
|
||||
// RUN: diff %t.tgt2 %t.res.tgt2
|
||||
//
|
||||
// COMPRESS: Compression method used
|
||||
// DECOMPRESS: Decompression method
|
||||
// COMPRESS: Compression method used: zstd
|
||||
// COMPRESS: Compression level: 20
|
||||
// DECOMPRESS: Decompression method: zstd
|
||||
// DECOMPRESS: Hashes match: Yes
|
||||
// NOHOST-NOT: host-
|
||||
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900
|
||||
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906
|
||||
//
|
||||
|
||||
// Check -compression-level= option
|
||||
|
||||
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
|
||||
// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc -compress -verbose -compression-level=9 2>&1 | \
|
||||
// RUN: FileCheck -check-prefix=LEVEL %s
|
||||
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
|
||||
// RUN: -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle
|
||||
// RUN: diff %t.tgt1 %t.res.tgt1
|
||||
// RUN: diff %t.tgt2 %t.res.tgt2
|
||||
//
|
||||
// LEVEL: Compression method used: zstd
|
||||
// LEVEL: Compression level: 9
|
||||
|
||||
//
|
||||
// Check -bundle-align option.
|
||||
//
|
||||
|
@ -1,4 +1,4 @@
|
||||
// REQUIRES: zlib
|
||||
// REQUIRES: zlib && !zstd
|
||||
// REQUIRES: x86-registered-target
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
|
||||
@ -9,13 +9,14 @@
|
||||
// RUN: -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
|
||||
// RUN: --no-offload-new-driver -fgpu-rdc -nogpuinc -nogpulib \
|
||||
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
|
||||
// RUN: --offload-compress --offload-device-only --gpu-bundle-output \
|
||||
// RUN: --offload-compress --offload-compression-level=9 \
|
||||
// RUN: --offload-device-only --gpu-bundle-output \
|
||||
// RUN: -o %t.bc \
|
||||
// RUN: 2>&1 | FileCheck %s
|
||||
|
||||
// CHECK: clang-offload-bundler{{.*}} -type=bc
|
||||
// CHECK-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
|
||||
// CHECK-SAME: -compress -verbose
|
||||
// CHECK-SAME: -compress -verbose -compression-level=9
|
||||
// CHECK: Compressed bundle format
|
||||
|
||||
// Test uncompress of bundled bitcode.
|
||||
|
@ -9,13 +9,14 @@
|
||||
// RUN: -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
|
||||
// RUN: --no-offload-new-driver -fgpu-rdc -nogpuinc -nogpulib \
|
||||
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
|
||||
// RUN: --offload-compress --offload-device-only --gpu-bundle-output \
|
||||
// RUN: --offload-compress --offload-compression-level=9 \
|
||||
// RUN: --offload-device-only --gpu-bundle-output \
|
||||
// RUN: -o %t.bc \
|
||||
// RUN: 2>&1 | FileCheck %s
|
||||
|
||||
// CHECK: clang-offload-bundler{{.*}} -type=bc
|
||||
// CHECK-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
|
||||
// CHECK-SAME: -compress -verbose
|
||||
// CHECK-SAME: -compress -verbose -compression-level=9
|
||||
// CHECK: Compressed bundle format
|
||||
|
||||
// Test uncompress of bundled bitcode.
|
||||
|
@ -114,12 +114,13 @@ __attribute__((visibility("protected"), used)) int x;
|
||||
// RUN: --image=file=%t.elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx908
|
||||
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \
|
||||
// RUN: -fembed-offload-object=%t.out
|
||||
// RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu --compress \
|
||||
// RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu \
|
||||
// RUN: --compress --compression-level=6 \
|
||||
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HIP
|
||||
|
||||
// HIP: clang{{.*}} -o [[IMG_GFX908:.+]] --target=amdgcn-amd-amdhsa -mcpu=gfx908
|
||||
// HIP: clang{{.*}} -o [[IMG_GFX90A:.+]] --target=amdgcn-amd-amdhsa -mcpu=gfx90a
|
||||
// HIP: clang-offload-bundler{{.*}}-type=o -bundle-align=4096 -compress -targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx90a,hipv4-amdgcn-amd-amdhsa--gfx908 -input=/dev/null -input=[[IMG_GFX90A]] -input=[[IMG_GFX908]] -output={{.*}}.hipfb
|
||||
// HIP: clang-offload-bundler{{.*}}-type=o -bundle-align=4096 -compress -compression-level=6 -targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx90a,hipv4-amdgcn-amd-amdhsa--gfx908 -input=/dev/null -input=[[IMG_GFX90A]] -input=[[IMG_GFX908]] -output={{.*}}.hipfb
|
||||
|
||||
// RUN: clang-offload-packager -o %t.out \
|
||||
// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \
|
||||
|
@ -407,6 +407,9 @@ fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles,
|
||||
|
||||
if (Args.hasArg(OPT_compress))
|
||||
CmdArgs.push_back("-compress");
|
||||
if (auto *Arg = Args.getLastArg(OPT_compression_level_eq))
|
||||
CmdArgs.push_back(
|
||||
Args.MakeArgString(Twine("-compression-level=") + Arg->getValue()));
|
||||
|
||||
SmallVector<StringRef> Targets = {"-targets=host-x86_64-unknown-linux"};
|
||||
for (const auto &[File, Arch] : InputFiles)
|
||||
|
@ -60,6 +60,8 @@ def save_temps : Flag<["--"], "save-temps">,
|
||||
Flags<[WrapperOnlyOption]>, HelpText<"Save intermediate results">;
|
||||
def compress : Flag<["--"], "compress">,
|
||||
Flags<[WrapperOnlyOption]>, HelpText<"Compress bundled files">;
|
||||
def compression_level_eq : Joined<["--"], "compression-level=">,
|
||||
Flags<[WrapperOnlyOption]>, HelpText<"Specify the compression level (integer)">;
|
||||
|
||||
def wrapper_time_trace_eq : Joined<["--"], "wrapper-time-trace=">,
|
||||
Flags<[WrapperOnlyOption]>, MetaVarName<"<file>">,
|
||||
|
@ -145,6 +145,9 @@ int main(int argc, const char **argv) {
|
||||
cl::init(false), cl::cat(ClangOffloadBundlerCategory));
|
||||
cl::opt<bool> Verbose("verbose", cl::desc("Print debug information.\n"),
|
||||
cl::init(false), cl::cat(ClangOffloadBundlerCategory));
|
||||
cl::opt<int> CompressionLevel(
|
||||
"compression-level", cl::desc("Specify the compression level (integer)"),
|
||||
cl::value_desc("n"), cl::Optional, cl::cat(ClangOffloadBundlerCategory));
|
||||
|
||||
// Process commandline options and report errors
|
||||
sys::PrintStackTraceOnErrorSignal(argv[0]);
|
||||
@ -178,6 +181,8 @@ int main(int argc, const char **argv) {
|
||||
BundlerConfig.Compress = Compress;
|
||||
if (Verbose.getNumOccurrences() > 0)
|
||||
BundlerConfig.Verbose = Verbose;
|
||||
if (CompressionLevel.getNumOccurrences() > 0)
|
||||
BundlerConfig.CompressionLevel = CompressionLevel;
|
||||
|
||||
BundlerConfig.TargetNames = TargetNames;
|
||||
BundlerConfig.InputFileNames = InputFileNames;
|
||||
|
@ -63,7 +63,7 @@ bool isAvailable();
|
||||
|
||||
void compress(ArrayRef<uint8_t> Input,
|
||||
SmallVectorImpl<uint8_t> &CompressedBuffer,
|
||||
int Level = DefaultCompression);
|
||||
int Level = DefaultCompression, bool EnableLdm = false);
|
||||
|
||||
Error decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
|
||||
size_t &UncompressedSize);
|
||||
@ -94,10 +94,13 @@ struct Params {
|
||||
constexpr Params(Format F)
|
||||
: format(F), level(F == Format::Zlib ? zlib::DefaultCompression
|
||||
: zstd::DefaultCompression) {}
|
||||
constexpr Params(Format F, int L, bool Ldm = false)
|
||||
: format(F), level(L), zstdEnableLdm(Ldm) {}
|
||||
Params(DebugCompressionType Type) : Params(formatFor(Type)) {}
|
||||
|
||||
Format format;
|
||||
int level;
|
||||
bool zstdEnableLdm = false; // Enable zstd long distance matching
|
||||
// This may support multi-threading for zstd in the future. Note that
|
||||
// different threads may produce different output, so be careful if certain
|
||||
// output determinism is desired.
|
||||
|
@ -50,7 +50,7 @@ void compression::compress(Params P, ArrayRef<uint8_t> Input,
|
||||
zlib::compress(Input, Output, P.level);
|
||||
break;
|
||||
case compression::Format::Zstd:
|
||||
zstd::compress(Input, Output, P.level);
|
||||
zstd::compress(Input, Output, P.level, P.zstdEnableLdm);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -163,17 +163,39 @@ Error zlib::decompress(ArrayRef<uint8_t> Input,
|
||||
|
||||
bool zstd::isAvailable() { return true; }
|
||||
|
||||
#include <zstd.h> // Ensure ZSTD library is included
|
||||
|
||||
void zstd::compress(ArrayRef<uint8_t> Input,
|
||||
SmallVectorImpl<uint8_t> &CompressedBuffer, int Level) {
|
||||
unsigned long CompressedBufferSize = ::ZSTD_compressBound(Input.size());
|
||||
SmallVectorImpl<uint8_t> &CompressedBuffer, int Level,
|
||||
bool EnableLdm) {
|
||||
ZSTD_CCtx *Cctx = ZSTD_createCCtx();
|
||||
if (!Cctx)
|
||||
report_bad_alloc_error("Failed to create ZSTD_CCtx");
|
||||
|
||||
if (ZSTD_isError(ZSTD_CCtx_setParameter(
|
||||
Cctx, ZSTD_c_enableLongDistanceMatching, EnableLdm ? 1 : 0))) {
|
||||
ZSTD_freeCCtx(Cctx);
|
||||
report_bad_alloc_error("Failed to set ZSTD_c_enableLongDistanceMatching");
|
||||
}
|
||||
|
||||
if (ZSTD_isError(
|
||||
ZSTD_CCtx_setParameter(Cctx, ZSTD_c_compressionLevel, Level))) {
|
||||
ZSTD_freeCCtx(Cctx);
|
||||
report_bad_alloc_error("Failed to set ZSTD_c_compressionLevel");
|
||||
}
|
||||
|
||||
unsigned long CompressedBufferSize = ZSTD_compressBound(Input.size());
|
||||
CompressedBuffer.resize_for_overwrite(CompressedBufferSize);
|
||||
unsigned long CompressedSize =
|
||||
::ZSTD_compress((char *)CompressedBuffer.data(), CompressedBufferSize,
|
||||
(const char *)Input.data(), Input.size(), Level);
|
||||
|
||||
size_t const CompressedSize =
|
||||
ZSTD_compress2(Cctx, CompressedBuffer.data(), CompressedBufferSize,
|
||||
Input.data(), Input.size());
|
||||
|
||||
ZSTD_freeCCtx(Cctx);
|
||||
|
||||
if (ZSTD_isError(CompressedSize))
|
||||
report_bad_alloc_error("Allocation failed");
|
||||
// Tell MemorySanitizer that zstd output buffer is fully initialized.
|
||||
// This avoids a false report when running LLVM with uninstrumented ZLib.
|
||||
report_bad_alloc_error("Compression failed");
|
||||
|
||||
__msan_unpoison(CompressedBuffer.data(), CompressedSize);
|
||||
if (CompressedSize < CompressedBuffer.size())
|
||||
CompressedBuffer.truncate(CompressedSize);
|
||||
@ -205,7 +227,8 @@ Error zstd::decompress(ArrayRef<uint8_t> Input,
|
||||
#else
|
||||
bool zstd::isAvailable() { return false; }
|
||||
void zstd::compress(ArrayRef<uint8_t> Input,
|
||||
SmallVectorImpl<uint8_t> &CompressedBuffer, int Level) {
|
||||
SmallVectorImpl<uint8_t> &CompressedBuffer, int Level,
|
||||
bool EnableLdm) {
|
||||
llvm_unreachable("zstd::compress is unavailable");
|
||||
}
|
||||
Error zstd::decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
|
||||
|
Loading…
x
Reference in New Issue
Block a user