[CUDA][NFC] CudaArch to OffloadArch rename (#97028)
Rename `CudaArch` to `OffloadArch` to better reflect its content and the use. Apply a similar rename to helpers handling the enum.
This commit is contained in:
parent
6b737c4446
commit
ab20086422
@ -52,7 +52,7 @@ const char *CudaVersionToString(CudaVersion V);
|
||||
// Input is "Major.Minor"
|
||||
CudaVersion CudaStringToVersion(const llvm::Twine &S);
|
||||
|
||||
enum class CudaArch {
|
||||
enum class OffloadArch {
|
||||
UNUSED,
|
||||
UNKNOWN,
|
||||
// TODO: Deprecate and remove GPU architectures older than sm_52.
|
||||
@ -133,8 +133,8 @@ enum class CudaArch {
|
||||
// public one.
|
||||
LAST,
|
||||
|
||||
CudaDefault = CudaArch::SM_52,
|
||||
HIPDefault = CudaArch::GFX906,
|
||||
CudaDefault = OffloadArch::SM_52,
|
||||
HIPDefault = OffloadArch::GFX906,
|
||||
};
|
||||
|
||||
enum class CUDAFunctionTarget {
|
||||
@ -145,26 +145,26 @@ enum class CUDAFunctionTarget {
|
||||
InvalidTarget
|
||||
};
|
||||
|
||||
static inline bool IsNVIDIAGpuArch(CudaArch A) {
|
||||
return A >= CudaArch::SM_20 && A < CudaArch::GFX600;
|
||||
static inline bool IsNVIDIAOffloadArch(OffloadArch A) {
|
||||
return A >= OffloadArch::SM_20 && A < OffloadArch::GFX600;
|
||||
}
|
||||
|
||||
static inline bool IsAMDGpuArch(CudaArch A) {
|
||||
static inline bool IsAMDOffloadArch(OffloadArch A) {
|
||||
// Generic processor model is for testing only.
|
||||
return A >= CudaArch::GFX600 && A < CudaArch::Generic;
|
||||
return A >= OffloadArch::GFX600 && A < OffloadArch::Generic;
|
||||
}
|
||||
|
||||
const char *CudaArchToString(CudaArch A);
|
||||
const char *CudaArchToVirtualArchString(CudaArch A);
|
||||
const char *OffloadArchToString(OffloadArch A);
|
||||
const char *OffloadArchToVirtualArchString(OffloadArch A);
|
||||
|
||||
// The input should have the form "sm_20".
|
||||
CudaArch StringToCudaArch(llvm::StringRef S);
|
||||
OffloadArch StringToOffloadArch(llvm::StringRef S);
|
||||
|
||||
/// Get the earliest CudaVersion that supports the given CudaArch.
|
||||
CudaVersion MinVersionForCudaArch(CudaArch A);
|
||||
/// Get the earliest CudaVersion that supports the given OffloadArch.
|
||||
CudaVersion MinVersionForOffloadArch(OffloadArch A);
|
||||
|
||||
/// Get the latest CudaVersion that supports the given CudaArch.
|
||||
CudaVersion MaxVersionForCudaArch(CudaArch A);
|
||||
/// Get the latest CudaVersion that supports the given OffloadArch.
|
||||
CudaVersion MaxVersionForOffloadArch(OffloadArch A);
|
||||
|
||||
// Various SDK-dependent features that affect CUDA compilation
|
||||
enum class CudaFeature {
|
||||
|
@ -72,23 +72,21 @@ CudaVersion ToCudaVersion(llvm::VersionTuple Version) {
|
||||
}
|
||||
|
||||
namespace {
|
||||
struct CudaArchToStringMap {
|
||||
CudaArch arch;
|
||||
struct OffloadArchToStringMap {
|
||||
OffloadArch arch;
|
||||
const char *arch_name;
|
||||
const char *virtual_arch_name;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
#define SM2(sm, ca) \
|
||||
{ CudaArch::SM_##sm, "sm_" #sm, ca }
|
||||
#define SM2(sm, ca) {OffloadArch::SM_##sm, "sm_" #sm, ca}
|
||||
#define SM(sm) SM2(sm, "compute_" #sm)
|
||||
#define GFX(gpu) \
|
||||
{ CudaArch::GFX##gpu, "gfx" #gpu, "compute_amdgcn" }
|
||||
static const CudaArchToStringMap arch_names[] = {
|
||||
#define GFX(gpu) {OffloadArch::GFX##gpu, "gfx" #gpu, "compute_amdgcn"}
|
||||
static const OffloadArchToStringMap arch_names[] = {
|
||||
// clang-format off
|
||||
{CudaArch::UNUSED, "", ""},
|
||||
{OffloadArch::UNUSED, "", ""},
|
||||
SM2(20, "compute_20"), SM2(21, "compute_20"), // Fermi
|
||||
SM(30), {CudaArch::SM_32_, "sm_32", "compute_32"}, SM(35), SM(37), // Kepler
|
||||
SM(30), {OffloadArch::SM_32_, "sm_32", "compute_32"}, SM(35), SM(37), // Kepler
|
||||
SM(50), SM(52), SM(53), // Maxwell
|
||||
SM(60), SM(61), SM(62), // Pascal
|
||||
SM(70), SM(72), // Volta
|
||||
@ -112,7 +110,7 @@ static const CudaArchToStringMap arch_names[] = {
|
||||
GFX(803), // gfx803
|
||||
GFX(805), // gfx805
|
||||
GFX(810), // gfx810
|
||||
{CudaArch::GFX9_GENERIC, "gfx9-generic", "compute_amdgcn"},
|
||||
{OffloadArch::GFX9_GENERIC, "gfx9-generic", "compute_amdgcn"},
|
||||
GFX(900), // gfx900
|
||||
GFX(902), // gfx902
|
||||
GFX(904), // gfx903
|
||||
@ -124,12 +122,12 @@ static const CudaArchToStringMap arch_names[] = {
|
||||
GFX(940), // gfx940
|
||||
GFX(941), // gfx941
|
||||
GFX(942), // gfx942
|
||||
{CudaArch::GFX10_1_GENERIC, "gfx10-1-generic", "compute_amdgcn"},
|
||||
{OffloadArch::GFX10_1_GENERIC, "gfx10-1-generic", "compute_amdgcn"},
|
||||
GFX(1010), // gfx1010
|
||||
GFX(1011), // gfx1011
|
||||
GFX(1012), // gfx1012
|
||||
GFX(1013), // gfx1013
|
||||
{CudaArch::GFX10_3_GENERIC, "gfx10-3-generic", "compute_amdgcn"},
|
||||
{OffloadArch::GFX10_3_GENERIC, "gfx10-3-generic", "compute_amdgcn"},
|
||||
GFX(1030), // gfx1030
|
||||
GFX(1031), // gfx1031
|
||||
GFX(1032), // gfx1032
|
||||
@ -137,7 +135,7 @@ static const CudaArchToStringMap arch_names[] = {
|
||||
GFX(1034), // gfx1034
|
||||
GFX(1035), // gfx1035
|
||||
GFX(1036), // gfx1036
|
||||
{CudaArch::GFX11_GENERIC, "gfx11-generic", "compute_amdgcn"},
|
||||
{OffloadArch::GFX11_GENERIC, "gfx11-generic", "compute_amdgcn"},
|
||||
GFX(1100), // gfx1100
|
||||
GFX(1101), // gfx1101
|
||||
GFX(1102), // gfx1102
|
||||
@ -145,105 +143,105 @@ static const CudaArchToStringMap arch_names[] = {
|
||||
GFX(1150), // gfx1150
|
||||
GFX(1151), // gfx1151
|
||||
GFX(1152), // gfx1152
|
||||
{CudaArch::GFX12_GENERIC, "gfx12-generic", "compute_amdgcn"},
|
||||
{OffloadArch::GFX12_GENERIC, "gfx12-generic", "compute_amdgcn"},
|
||||
GFX(1200), // gfx1200
|
||||
GFX(1201), // gfx1201
|
||||
{CudaArch::AMDGCNSPIRV, "amdgcnspirv", "compute_amdgcn"},
|
||||
{CudaArch::Generic, "generic", ""},
|
||||
{OffloadArch::AMDGCNSPIRV, "amdgcnspirv", "compute_amdgcn"},
|
||||
{OffloadArch::Generic, "generic", ""},
|
||||
// clang-format on
|
||||
};
|
||||
#undef SM
|
||||
#undef SM2
|
||||
#undef GFX
|
||||
|
||||
const char *CudaArchToString(CudaArch A) {
|
||||
const char *OffloadArchToString(OffloadArch A) {
|
||||
auto result = std::find_if(
|
||||
std::begin(arch_names), std::end(arch_names),
|
||||
[A](const CudaArchToStringMap &map) { return A == map.arch; });
|
||||
[A](const OffloadArchToStringMap &map) { return A == map.arch; });
|
||||
if (result == std::end(arch_names))
|
||||
return "unknown";
|
||||
return result->arch_name;
|
||||
}
|
||||
|
||||
const char *CudaArchToVirtualArchString(CudaArch A) {
|
||||
const char *OffloadArchToVirtualArchString(OffloadArch A) {
|
||||
auto result = std::find_if(
|
||||
std::begin(arch_names), std::end(arch_names),
|
||||
[A](const CudaArchToStringMap &map) { return A == map.arch; });
|
||||
[A](const OffloadArchToStringMap &map) { return A == map.arch; });
|
||||
if (result == std::end(arch_names))
|
||||
return "unknown";
|
||||
return result->virtual_arch_name;
|
||||
}
|
||||
|
||||
CudaArch StringToCudaArch(llvm::StringRef S) {
|
||||
OffloadArch StringToOffloadArch(llvm::StringRef S) {
|
||||
auto result = std::find_if(
|
||||
std::begin(arch_names), std::end(arch_names),
|
||||
[S](const CudaArchToStringMap &map) { return S == map.arch_name; });
|
||||
[S](const OffloadArchToStringMap &map) { return S == map.arch_name; });
|
||||
if (result == std::end(arch_names))
|
||||
return CudaArch::UNKNOWN;
|
||||
return OffloadArch::UNKNOWN;
|
||||
return result->arch;
|
||||
}
|
||||
|
||||
CudaVersion MinVersionForCudaArch(CudaArch A) {
|
||||
if (A == CudaArch::UNKNOWN)
|
||||
CudaVersion MinVersionForOffloadArch(OffloadArch A) {
|
||||
if (A == OffloadArch::UNKNOWN)
|
||||
return CudaVersion::UNKNOWN;
|
||||
|
||||
// AMD GPUs do not depend on CUDA versions.
|
||||
if (IsAMDGpuArch(A))
|
||||
if (IsAMDOffloadArch(A))
|
||||
return CudaVersion::CUDA_70;
|
||||
|
||||
switch (A) {
|
||||
case CudaArch::SM_20:
|
||||
case CudaArch::SM_21:
|
||||
case CudaArch::SM_30:
|
||||
case CudaArch::SM_32_:
|
||||
case CudaArch::SM_35:
|
||||
case CudaArch::SM_37:
|
||||
case CudaArch::SM_50:
|
||||
case CudaArch::SM_52:
|
||||
case CudaArch::SM_53:
|
||||
case OffloadArch::SM_20:
|
||||
case OffloadArch::SM_21:
|
||||
case OffloadArch::SM_30:
|
||||
case OffloadArch::SM_32_:
|
||||
case OffloadArch::SM_35:
|
||||
case OffloadArch::SM_37:
|
||||
case OffloadArch::SM_50:
|
||||
case OffloadArch::SM_52:
|
||||
case OffloadArch::SM_53:
|
||||
return CudaVersion::CUDA_70;
|
||||
case CudaArch::SM_60:
|
||||
case CudaArch::SM_61:
|
||||
case CudaArch::SM_62:
|
||||
case OffloadArch::SM_60:
|
||||
case OffloadArch::SM_61:
|
||||
case OffloadArch::SM_62:
|
||||
return CudaVersion::CUDA_80;
|
||||
case CudaArch::SM_70:
|
||||
case OffloadArch::SM_70:
|
||||
return CudaVersion::CUDA_90;
|
||||
case CudaArch::SM_72:
|
||||
case OffloadArch::SM_72:
|
||||
return CudaVersion::CUDA_91;
|
||||
case CudaArch::SM_75:
|
||||
case OffloadArch::SM_75:
|
||||
return CudaVersion::CUDA_100;
|
||||
case CudaArch::SM_80:
|
||||
case OffloadArch::SM_80:
|
||||
return CudaVersion::CUDA_110;
|
||||
case CudaArch::SM_86:
|
||||
case OffloadArch::SM_86:
|
||||
return CudaVersion::CUDA_111;
|
||||
case CudaArch::SM_87:
|
||||
case OffloadArch::SM_87:
|
||||
return CudaVersion::CUDA_114;
|
||||
case CudaArch::SM_89:
|
||||
case CudaArch::SM_90:
|
||||
case OffloadArch::SM_89:
|
||||
case OffloadArch::SM_90:
|
||||
return CudaVersion::CUDA_118;
|
||||
case CudaArch::SM_90a:
|
||||
case OffloadArch::SM_90a:
|
||||
return CudaVersion::CUDA_120;
|
||||
default:
|
||||
llvm_unreachable("invalid enum");
|
||||
}
|
||||
}
|
||||
|
||||
CudaVersion MaxVersionForCudaArch(CudaArch A) {
|
||||
CudaVersion MaxVersionForOffloadArch(OffloadArch A) {
|
||||
// AMD GPUs do not depend on CUDA versions.
|
||||
if (IsAMDGpuArch(A))
|
||||
if (IsAMDOffloadArch(A))
|
||||
return CudaVersion::NEW;
|
||||
|
||||
switch (A) {
|
||||
case CudaArch::UNKNOWN:
|
||||
case OffloadArch::UNKNOWN:
|
||||
return CudaVersion::UNKNOWN;
|
||||
case CudaArch::SM_20:
|
||||
case CudaArch::SM_21:
|
||||
case OffloadArch::SM_20:
|
||||
case OffloadArch::SM_21:
|
||||
return CudaVersion::CUDA_80;
|
||||
case CudaArch::SM_30:
|
||||
case CudaArch::SM_32_:
|
||||
case OffloadArch::SM_30:
|
||||
case OffloadArch::SM_32_:
|
||||
return CudaVersion::CUDA_102;
|
||||
case CudaArch::SM_35:
|
||||
case CudaArch::SM_37:
|
||||
case OffloadArch::SM_35:
|
||||
case OffloadArch::SM_37:
|
||||
return CudaVersion::CUDA_118;
|
||||
default:
|
||||
return CudaVersion::NEW;
|
||||
|
@ -59,7 +59,7 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
|
||||
// Define available target features
|
||||
// These must be defined in sorted order!
|
||||
NoAsmVariants = true;
|
||||
GPU = CudaArch::UNUSED;
|
||||
GPU = OffloadArch::UNUSED;
|
||||
|
||||
// PTX supports f16 as a fundamental type.
|
||||
HasLegalHalfType = true;
|
||||
@ -175,117 +175,117 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
|
||||
Builder.defineMacro("__NVPTX__");
|
||||
|
||||
// Skip setting architecture dependent macros if undefined.
|
||||
if (GPU == CudaArch::UNUSED && !HostTarget)
|
||||
if (GPU == OffloadArch::UNUSED && !HostTarget)
|
||||
return;
|
||||
|
||||
if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) {
|
||||
// Set __CUDA_ARCH__ for the GPU specified.
|
||||
std::string CUDAArchCode = [this] {
|
||||
switch (GPU) {
|
||||
case CudaArch::GFX600:
|
||||
case CudaArch::GFX601:
|
||||
case CudaArch::GFX602:
|
||||
case CudaArch::GFX700:
|
||||
case CudaArch::GFX701:
|
||||
case CudaArch::GFX702:
|
||||
case CudaArch::GFX703:
|
||||
case CudaArch::GFX704:
|
||||
case CudaArch::GFX705:
|
||||
case CudaArch::GFX801:
|
||||
case CudaArch::GFX802:
|
||||
case CudaArch::GFX803:
|
||||
case CudaArch::GFX805:
|
||||
case CudaArch::GFX810:
|
||||
case CudaArch::GFX9_GENERIC:
|
||||
case CudaArch::GFX900:
|
||||
case CudaArch::GFX902:
|
||||
case CudaArch::GFX904:
|
||||
case CudaArch::GFX906:
|
||||
case CudaArch::GFX908:
|
||||
case CudaArch::GFX909:
|
||||
case CudaArch::GFX90a:
|
||||
case CudaArch::GFX90c:
|
||||
case CudaArch::GFX940:
|
||||
case CudaArch::GFX941:
|
||||
case CudaArch::GFX942:
|
||||
case CudaArch::GFX10_1_GENERIC:
|
||||
case CudaArch::GFX1010:
|
||||
case CudaArch::GFX1011:
|
||||
case CudaArch::GFX1012:
|
||||
case CudaArch::GFX1013:
|
||||
case CudaArch::GFX10_3_GENERIC:
|
||||
case CudaArch::GFX1030:
|
||||
case CudaArch::GFX1031:
|
||||
case CudaArch::GFX1032:
|
||||
case CudaArch::GFX1033:
|
||||
case CudaArch::GFX1034:
|
||||
case CudaArch::GFX1035:
|
||||
case CudaArch::GFX1036:
|
||||
case CudaArch::GFX11_GENERIC:
|
||||
case CudaArch::GFX1100:
|
||||
case CudaArch::GFX1101:
|
||||
case CudaArch::GFX1102:
|
||||
case CudaArch::GFX1103:
|
||||
case CudaArch::GFX1150:
|
||||
case CudaArch::GFX1151:
|
||||
case CudaArch::GFX1152:
|
||||
case CudaArch::GFX12_GENERIC:
|
||||
case CudaArch::GFX1200:
|
||||
case CudaArch::GFX1201:
|
||||
case CudaArch::AMDGCNSPIRV:
|
||||
case CudaArch::Generic:
|
||||
case CudaArch::LAST:
|
||||
case OffloadArch::GFX600:
|
||||
case OffloadArch::GFX601:
|
||||
case OffloadArch::GFX602:
|
||||
case OffloadArch::GFX700:
|
||||
case OffloadArch::GFX701:
|
||||
case OffloadArch::GFX702:
|
||||
case OffloadArch::GFX703:
|
||||
case OffloadArch::GFX704:
|
||||
case OffloadArch::GFX705:
|
||||
case OffloadArch::GFX801:
|
||||
case OffloadArch::GFX802:
|
||||
case OffloadArch::GFX803:
|
||||
case OffloadArch::GFX805:
|
||||
case OffloadArch::GFX810:
|
||||
case OffloadArch::GFX9_GENERIC:
|
||||
case OffloadArch::GFX900:
|
||||
case OffloadArch::GFX902:
|
||||
case OffloadArch::GFX904:
|
||||
case OffloadArch::GFX906:
|
||||
case OffloadArch::GFX908:
|
||||
case OffloadArch::GFX909:
|
||||
case OffloadArch::GFX90a:
|
||||
case OffloadArch::GFX90c:
|
||||
case OffloadArch::GFX940:
|
||||
case OffloadArch::GFX941:
|
||||
case OffloadArch::GFX942:
|
||||
case OffloadArch::GFX10_1_GENERIC:
|
||||
case OffloadArch::GFX1010:
|
||||
case OffloadArch::GFX1011:
|
||||
case OffloadArch::GFX1012:
|
||||
case OffloadArch::GFX1013:
|
||||
case OffloadArch::GFX10_3_GENERIC:
|
||||
case OffloadArch::GFX1030:
|
||||
case OffloadArch::GFX1031:
|
||||
case OffloadArch::GFX1032:
|
||||
case OffloadArch::GFX1033:
|
||||
case OffloadArch::GFX1034:
|
||||
case OffloadArch::GFX1035:
|
||||
case OffloadArch::GFX1036:
|
||||
case OffloadArch::GFX11_GENERIC:
|
||||
case OffloadArch::GFX1100:
|
||||
case OffloadArch::GFX1101:
|
||||
case OffloadArch::GFX1102:
|
||||
case OffloadArch::GFX1103:
|
||||
case OffloadArch::GFX1150:
|
||||
case OffloadArch::GFX1151:
|
||||
case OffloadArch::GFX1152:
|
||||
case OffloadArch::GFX12_GENERIC:
|
||||
case OffloadArch::GFX1200:
|
||||
case OffloadArch::GFX1201:
|
||||
case OffloadArch::AMDGCNSPIRV:
|
||||
case OffloadArch::Generic:
|
||||
case OffloadArch::LAST:
|
||||
break;
|
||||
case CudaArch::UNKNOWN:
|
||||
case OffloadArch::UNKNOWN:
|
||||
assert(false && "No GPU arch when compiling CUDA device code.");
|
||||
return "";
|
||||
case CudaArch::UNUSED:
|
||||
case CudaArch::SM_20:
|
||||
case OffloadArch::UNUSED:
|
||||
case OffloadArch::SM_20:
|
||||
return "200";
|
||||
case CudaArch::SM_21:
|
||||
case OffloadArch::SM_21:
|
||||
return "210";
|
||||
case CudaArch::SM_30:
|
||||
case OffloadArch::SM_30:
|
||||
return "300";
|
||||
case CudaArch::SM_32_:
|
||||
case OffloadArch::SM_32_:
|
||||
return "320";
|
||||
case CudaArch::SM_35:
|
||||
case OffloadArch::SM_35:
|
||||
return "350";
|
||||
case CudaArch::SM_37:
|
||||
case OffloadArch::SM_37:
|
||||
return "370";
|
||||
case CudaArch::SM_50:
|
||||
case OffloadArch::SM_50:
|
||||
return "500";
|
||||
case CudaArch::SM_52:
|
||||
case OffloadArch::SM_52:
|
||||
return "520";
|
||||
case CudaArch::SM_53:
|
||||
case OffloadArch::SM_53:
|
||||
return "530";
|
||||
case CudaArch::SM_60:
|
||||
case OffloadArch::SM_60:
|
||||
return "600";
|
||||
case CudaArch::SM_61:
|
||||
case OffloadArch::SM_61:
|
||||
return "610";
|
||||
case CudaArch::SM_62:
|
||||
case OffloadArch::SM_62:
|
||||
return "620";
|
||||
case CudaArch::SM_70:
|
||||
case OffloadArch::SM_70:
|
||||
return "700";
|
||||
case CudaArch::SM_72:
|
||||
case OffloadArch::SM_72:
|
||||
return "720";
|
||||
case CudaArch::SM_75:
|
||||
case OffloadArch::SM_75:
|
||||
return "750";
|
||||
case CudaArch::SM_80:
|
||||
case OffloadArch::SM_80:
|
||||
return "800";
|
||||
case CudaArch::SM_86:
|
||||
case OffloadArch::SM_86:
|
||||
return "860";
|
||||
case CudaArch::SM_87:
|
||||
case OffloadArch::SM_87:
|
||||
return "870";
|
||||
case CudaArch::SM_89:
|
||||
case OffloadArch::SM_89:
|
||||
return "890";
|
||||
case CudaArch::SM_90:
|
||||
case CudaArch::SM_90a:
|
||||
case OffloadArch::SM_90:
|
||||
case OffloadArch::SM_90a:
|
||||
return "900";
|
||||
}
|
||||
llvm_unreachable("unhandled CudaArch");
|
||||
llvm_unreachable("unhandled OffloadArch");
|
||||
}();
|
||||
Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
|
||||
if (GPU == CudaArch::SM_90a)
|
||||
if (GPU == OffloadArch::SM_90a)
|
||||
Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1");
|
||||
}
|
||||
}
|
||||
|
@ -62,7 +62,7 @@ static const int NVPTXDWARFAddrSpaceMap[] = {
|
||||
|
||||
class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo {
|
||||
static const char *const GCCRegNames[];
|
||||
CudaArch GPU;
|
||||
OffloadArch GPU;
|
||||
uint32_t PTXVersion;
|
||||
std::unique_ptr<TargetInfo> HostTarget;
|
||||
|
||||
@ -79,8 +79,8 @@ public:
|
||||
initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags,
|
||||
StringRef CPU,
|
||||
const std::vector<std::string> &FeaturesVec) const override {
|
||||
if (GPU != CudaArch::UNUSED)
|
||||
Features[CudaArchToString(GPU)] = true;
|
||||
if (GPU != OffloadArch::UNUSED)
|
||||
Features[OffloadArchToString(GPU)] = true;
|
||||
Features["ptx" + std::to_string(PTXVersion)] = true;
|
||||
return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec);
|
||||
}
|
||||
@ -121,18 +121,18 @@ public:
|
||||
}
|
||||
|
||||
bool isValidCPUName(StringRef Name) const override {
|
||||
return StringToCudaArch(Name) != CudaArch::UNKNOWN;
|
||||
return StringToOffloadArch(Name) != OffloadArch::UNKNOWN;
|
||||
}
|
||||
|
||||
void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override {
|
||||
for (int i = static_cast<int>(CudaArch::SM_20);
|
||||
i < static_cast<int>(CudaArch::Generic); ++i)
|
||||
Values.emplace_back(CudaArchToString(static_cast<CudaArch>(i)));
|
||||
for (int i = static_cast<int>(OffloadArch::SM_20);
|
||||
i < static_cast<int>(OffloadArch::Generic); ++i)
|
||||
Values.emplace_back(OffloadArchToString(static_cast<OffloadArch>(i)));
|
||||
}
|
||||
|
||||
bool setCPU(const std::string &Name) override {
|
||||
GPU = StringToCudaArch(Name);
|
||||
return GPU != CudaArch::UNKNOWN;
|
||||
GPU = StringToOffloadArch(Name);
|
||||
return GPU != OffloadArch::UNKNOWN;
|
||||
}
|
||||
|
||||
void setSupportedOpenCLOpts() override {
|
||||
@ -183,7 +183,7 @@ public:
|
||||
bool hasBitIntType() const override { return true; }
|
||||
bool hasBFloat16Type() const override { return true; }
|
||||
|
||||
CudaArch getGPU() const { return GPU; }
|
||||
OffloadArch getGPU() const { return GPU; }
|
||||
};
|
||||
} // namespace targets
|
||||
} // namespace clang
|
||||
|
@ -2227,113 +2227,112 @@ bool CGOpenMPRuntimeGPU::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get current CudaArch and ignore any unknown values
|
||||
static CudaArch getCudaArch(CodeGenModule &CGM) {
|
||||
// Get current OffloadArch and ignore any unknown values
|
||||
static OffloadArch getOffloadArch(CodeGenModule &CGM) {
|
||||
if (!CGM.getTarget().hasFeature("ptx"))
|
||||
return CudaArch::UNKNOWN;
|
||||
return OffloadArch::UNKNOWN;
|
||||
for (const auto &Feature : CGM.getTarget().getTargetOpts().FeatureMap) {
|
||||
if (Feature.getValue()) {
|
||||
CudaArch Arch = StringToCudaArch(Feature.getKey());
|
||||
if (Arch != CudaArch::UNKNOWN)
|
||||
OffloadArch Arch = StringToOffloadArch(Feature.getKey());
|
||||
if (Arch != OffloadArch::UNKNOWN)
|
||||
return Arch;
|
||||
}
|
||||
}
|
||||
return CudaArch::UNKNOWN;
|
||||
return OffloadArch::UNKNOWN;
|
||||
}
|
||||
|
||||
/// Check to see if target architecture supports unified addressing which is
|
||||
/// a restriction for OpenMP requires clause "unified_shared_memory".
|
||||
void CGOpenMPRuntimeGPU::processRequiresDirective(
|
||||
const OMPRequiresDecl *D) {
|
||||
void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
|
||||
for (const OMPClause *Clause : D->clauselists()) {
|
||||
if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
|
||||
CudaArch Arch = getCudaArch(CGM);
|
||||
OffloadArch Arch = getOffloadArch(CGM);
|
||||
switch (Arch) {
|
||||
case CudaArch::SM_20:
|
||||
case CudaArch::SM_21:
|
||||
case CudaArch::SM_30:
|
||||
case CudaArch::SM_32_:
|
||||
case CudaArch::SM_35:
|
||||
case CudaArch::SM_37:
|
||||
case CudaArch::SM_50:
|
||||
case CudaArch::SM_52:
|
||||
case CudaArch::SM_53: {
|
||||
case OffloadArch::SM_20:
|
||||
case OffloadArch::SM_21:
|
||||
case OffloadArch::SM_30:
|
||||
case OffloadArch::SM_32_:
|
||||
case OffloadArch::SM_35:
|
||||
case OffloadArch::SM_37:
|
||||
case OffloadArch::SM_50:
|
||||
case OffloadArch::SM_52:
|
||||
case OffloadArch::SM_53: {
|
||||
SmallString<256> Buffer;
|
||||
llvm::raw_svector_ostream Out(Buffer);
|
||||
Out << "Target architecture " << CudaArchToString(Arch)
|
||||
Out << "Target architecture " << OffloadArchToString(Arch)
|
||||
<< " does not support unified addressing";
|
||||
CGM.Error(Clause->getBeginLoc(), Out.str());
|
||||
return;
|
||||
}
|
||||
case CudaArch::SM_60:
|
||||
case CudaArch::SM_61:
|
||||
case CudaArch::SM_62:
|
||||
case CudaArch::SM_70:
|
||||
case CudaArch::SM_72:
|
||||
case CudaArch::SM_75:
|
||||
case CudaArch::SM_80:
|
||||
case CudaArch::SM_86:
|
||||
case CudaArch::SM_87:
|
||||
case CudaArch::SM_89:
|
||||
case CudaArch::SM_90:
|
||||
case CudaArch::SM_90a:
|
||||
case CudaArch::GFX600:
|
||||
case CudaArch::GFX601:
|
||||
case CudaArch::GFX602:
|
||||
case CudaArch::GFX700:
|
||||
case CudaArch::GFX701:
|
||||
case CudaArch::GFX702:
|
||||
case CudaArch::GFX703:
|
||||
case CudaArch::GFX704:
|
||||
case CudaArch::GFX705:
|
||||
case CudaArch::GFX801:
|
||||
case CudaArch::GFX802:
|
||||
case CudaArch::GFX803:
|
||||
case CudaArch::GFX805:
|
||||
case CudaArch::GFX810:
|
||||
case CudaArch::GFX9_GENERIC:
|
||||
case CudaArch::GFX900:
|
||||
case CudaArch::GFX902:
|
||||
case CudaArch::GFX904:
|
||||
case CudaArch::GFX906:
|
||||
case CudaArch::GFX908:
|
||||
case CudaArch::GFX909:
|
||||
case CudaArch::GFX90a:
|
||||
case CudaArch::GFX90c:
|
||||
case CudaArch::GFX940:
|
||||
case CudaArch::GFX941:
|
||||
case CudaArch::GFX942:
|
||||
case CudaArch::GFX10_1_GENERIC:
|
||||
case CudaArch::GFX1010:
|
||||
case CudaArch::GFX1011:
|
||||
case CudaArch::GFX1012:
|
||||
case CudaArch::GFX1013:
|
||||
case CudaArch::GFX10_3_GENERIC:
|
||||
case CudaArch::GFX1030:
|
||||
case CudaArch::GFX1031:
|
||||
case CudaArch::GFX1032:
|
||||
case CudaArch::GFX1033:
|
||||
case CudaArch::GFX1034:
|
||||
case CudaArch::GFX1035:
|
||||
case CudaArch::GFX1036:
|
||||
case CudaArch::GFX11_GENERIC:
|
||||
case CudaArch::GFX1100:
|
||||
case CudaArch::GFX1101:
|
||||
case CudaArch::GFX1102:
|
||||
case CudaArch::GFX1103:
|
||||
case CudaArch::GFX1150:
|
||||
case CudaArch::GFX1151:
|
||||
case CudaArch::GFX1152:
|
||||
case CudaArch::GFX12_GENERIC:
|
||||
case CudaArch::GFX1200:
|
||||
case CudaArch::GFX1201:
|
||||
case CudaArch::AMDGCNSPIRV:
|
||||
case CudaArch::Generic:
|
||||
case CudaArch::UNUSED:
|
||||
case CudaArch::UNKNOWN:
|
||||
case OffloadArch::SM_60:
|
||||
case OffloadArch::SM_61:
|
||||
case OffloadArch::SM_62:
|
||||
case OffloadArch::SM_70:
|
||||
case OffloadArch::SM_72:
|
||||
case OffloadArch::SM_75:
|
||||
case OffloadArch::SM_80:
|
||||
case OffloadArch::SM_86:
|
||||
case OffloadArch::SM_87:
|
||||
case OffloadArch::SM_89:
|
||||
case OffloadArch::SM_90:
|
||||
case OffloadArch::SM_90a:
|
||||
case OffloadArch::GFX600:
|
||||
case OffloadArch::GFX601:
|
||||
case OffloadArch::GFX602:
|
||||
case OffloadArch::GFX700:
|
||||
case OffloadArch::GFX701:
|
||||
case OffloadArch::GFX702:
|
||||
case OffloadArch::GFX703:
|
||||
case OffloadArch::GFX704:
|
||||
case OffloadArch::GFX705:
|
||||
case OffloadArch::GFX801:
|
||||
case OffloadArch::GFX802:
|
||||
case OffloadArch::GFX803:
|
||||
case OffloadArch::GFX805:
|
||||
case OffloadArch::GFX810:
|
||||
case OffloadArch::GFX9_GENERIC:
|
||||
case OffloadArch::GFX900:
|
||||
case OffloadArch::GFX902:
|
||||
case OffloadArch::GFX904:
|
||||
case OffloadArch::GFX906:
|
||||
case OffloadArch::GFX908:
|
||||
case OffloadArch::GFX909:
|
||||
case OffloadArch::GFX90a:
|
||||
case OffloadArch::GFX90c:
|
||||
case OffloadArch::GFX940:
|
||||
case OffloadArch::GFX941:
|
||||
case OffloadArch::GFX942:
|
||||
case OffloadArch::GFX10_1_GENERIC:
|
||||
case OffloadArch::GFX1010:
|
||||
case OffloadArch::GFX1011:
|
||||
case OffloadArch::GFX1012:
|
||||
case OffloadArch::GFX1013:
|
||||
case OffloadArch::GFX10_3_GENERIC:
|
||||
case OffloadArch::GFX1030:
|
||||
case OffloadArch::GFX1031:
|
||||
case OffloadArch::GFX1032:
|
||||
case OffloadArch::GFX1033:
|
||||
case OffloadArch::GFX1034:
|
||||
case OffloadArch::GFX1035:
|
||||
case OffloadArch::GFX1036:
|
||||
case OffloadArch::GFX11_GENERIC:
|
||||
case OffloadArch::GFX1100:
|
||||
case OffloadArch::GFX1101:
|
||||
case OffloadArch::GFX1102:
|
||||
case OffloadArch::GFX1103:
|
||||
case OffloadArch::GFX1150:
|
||||
case OffloadArch::GFX1151:
|
||||
case OffloadArch::GFX1152:
|
||||
case OffloadArch::GFX12_GENERIC:
|
||||
case OffloadArch::GFX1200:
|
||||
case OffloadArch::GFX1201:
|
||||
case OffloadArch::AMDGCNSPIRV:
|
||||
case OffloadArch::Generic:
|
||||
case OffloadArch::UNUSED:
|
||||
case OffloadArch::UNKNOWN:
|
||||
break;
|
||||
case CudaArch::LAST:
|
||||
llvm_unreachable("Unexpected Cuda arch.");
|
||||
case OffloadArch::LAST:
|
||||
llvm_unreachable("Unexpected GPU arch.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -899,11 +899,11 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
|
||||
}
|
||||
|
||||
for (StringRef Arch : Archs) {
|
||||
if (NVPTXTriple && IsNVIDIAGpuArch(StringToCudaArch(
|
||||
if (NVPTXTriple && IsNVIDIAOffloadArch(StringToOffloadArch(
|
||||
getProcessorFromTargetID(*NVPTXTriple, Arch)))) {
|
||||
DerivedArchs[NVPTXTriple->getTriple()].insert(Arch);
|
||||
} else if (AMDTriple &&
|
||||
IsAMDGpuArch(StringToCudaArch(
|
||||
IsAMDOffloadArch(StringToOffloadArch(
|
||||
getProcessorFromTargetID(*AMDTriple, Arch)))) {
|
||||
DerivedArchs[AMDTriple->getTriple()].insert(Arch);
|
||||
} else {
|
||||
@ -2948,7 +2948,7 @@ class OffloadingActionBuilder final {
|
||||
struct TargetID {
|
||||
/// Target ID string which is persistent throughout the compilation.
|
||||
const char *ID;
|
||||
TargetID(CudaArch Arch) { ID = CudaArchToString(Arch); }
|
||||
TargetID(OffloadArch Arch) { ID = OffloadArchToString(Arch); }
|
||||
TargetID(const char *ID) : ID(ID) {}
|
||||
operator const char *() { return ID; }
|
||||
operator StringRef() { return StringRef(ID); }
|
||||
@ -2969,7 +2969,7 @@ class OffloadingActionBuilder final {
|
||||
bool Relocatable = false;
|
||||
|
||||
/// Default GPU architecture if there's no one specified.
|
||||
CudaArch DefaultCudaArch = CudaArch::UNKNOWN;
|
||||
OffloadArch DefaultOffloadArch = OffloadArch::UNKNOWN;
|
||||
|
||||
/// Method to generate compilation unit ID specified by option
|
||||
/// '-fuse-cuid='.
|
||||
@ -3098,7 +3098,7 @@ class OffloadingActionBuilder final {
|
||||
|
||||
// If we have a fat binary, add it to the list.
|
||||
if (CudaFatBinary) {
|
||||
AddTopLevel(CudaFatBinary, CudaArch::UNUSED);
|
||||
AddTopLevel(CudaFatBinary, OffloadArch::UNUSED);
|
||||
CudaDeviceActions.clear();
|
||||
CudaFatBinary = nullptr;
|
||||
return;
|
||||
@ -3243,11 +3243,11 @@ class OffloadingActionBuilder final {
|
||||
if (GpuArchList.empty()) {
|
||||
if (ToolChains.front()->getTriple().isSPIRV()) {
|
||||
if (ToolChains.front()->getTriple().getVendor() == llvm::Triple::AMD)
|
||||
GpuArchList.push_back(CudaArch::AMDGCNSPIRV);
|
||||
GpuArchList.push_back(OffloadArch::AMDGCNSPIRV);
|
||||
else
|
||||
GpuArchList.push_back(CudaArch::Generic);
|
||||
GpuArchList.push_back(OffloadArch::Generic);
|
||||
} else {
|
||||
GpuArchList.push_back(DefaultCudaArch);
|
||||
GpuArchList.push_back(DefaultOffloadArch);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3262,16 +3262,16 @@ class OffloadingActionBuilder final {
|
||||
CudaActionBuilder(Compilation &C, DerivedArgList &Args,
|
||||
const Driver::InputList &Inputs)
|
||||
: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_Cuda) {
|
||||
DefaultCudaArch = CudaArch::CudaDefault;
|
||||
DefaultOffloadArch = OffloadArch::CudaDefault;
|
||||
}
|
||||
|
||||
StringRef getCanonicalOffloadArch(StringRef ArchStr) override {
|
||||
CudaArch Arch = StringToCudaArch(ArchStr);
|
||||
if (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch)) {
|
||||
OffloadArch Arch = StringToOffloadArch(ArchStr);
|
||||
if (Arch == OffloadArch::UNKNOWN || !IsNVIDIAOffloadArch(Arch)) {
|
||||
C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
|
||||
return StringRef();
|
||||
}
|
||||
return CudaArchToString(Arch);
|
||||
return OffloadArchToString(Arch);
|
||||
}
|
||||
|
||||
std::optional<std::pair<llvm::StringRef, llvm::StringRef>>
|
||||
@ -3401,7 +3401,7 @@ class OffloadingActionBuilder final {
|
||||
const Driver::InputList &Inputs)
|
||||
: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP) {
|
||||
|
||||
DefaultCudaArch = CudaArch::HIPDefault;
|
||||
DefaultOffloadArch = OffloadArch::HIPDefault;
|
||||
|
||||
if (Args.hasArg(options::OPT_fhip_emit_relocatable,
|
||||
options::OPT_fno_hip_emit_relocatable)) {
|
||||
@ -4408,23 +4408,24 @@ static StringRef getCanonicalArchString(Compilation &C,
|
||||
bool SuppressError = false) {
|
||||
// Lookup the CUDA / HIP architecture string. Only report an error if we were
|
||||
// expecting the triple to be only NVPTX / AMDGPU.
|
||||
CudaArch Arch = StringToCudaArch(getProcessorFromTargetID(Triple, ArchStr));
|
||||
OffloadArch Arch =
|
||||
StringToOffloadArch(getProcessorFromTargetID(Triple, ArchStr));
|
||||
if (!SuppressError && Triple.isNVPTX() &&
|
||||
(Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch))) {
|
||||
(Arch == OffloadArch::UNKNOWN || !IsNVIDIAOffloadArch(Arch))) {
|
||||
C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch)
|
||||
<< "CUDA" << ArchStr;
|
||||
return StringRef();
|
||||
} else if (!SuppressError && Triple.isAMDGPU() &&
|
||||
(Arch == CudaArch::UNKNOWN || !IsAMDGpuArch(Arch))) {
|
||||
(Arch == OffloadArch::UNKNOWN || !IsAMDOffloadArch(Arch))) {
|
||||
C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch)
|
||||
<< "HIP" << ArchStr;
|
||||
return StringRef();
|
||||
}
|
||||
|
||||
if (IsNVIDIAGpuArch(Arch))
|
||||
return Args.MakeArgStringRef(CudaArchToString(Arch));
|
||||
if (IsNVIDIAOffloadArch(Arch))
|
||||
return Args.MakeArgStringRef(OffloadArchToString(Arch));
|
||||
|
||||
if (IsAMDGpuArch(Arch)) {
|
||||
if (IsAMDOffloadArch(Arch)) {
|
||||
llvm::StringMap<bool> Features;
|
||||
auto HIPTriple = getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs());
|
||||
if (!HIPTriple)
|
||||
@ -4545,9 +4546,9 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
|
||||
|
||||
if (Archs.empty()) {
|
||||
if (Kind == Action::OFK_Cuda)
|
||||
Archs.insert(CudaArchToString(CudaArch::CudaDefault));
|
||||
Archs.insert(OffloadArchToString(OffloadArch::CudaDefault));
|
||||
else if (Kind == Action::OFK_HIP)
|
||||
Archs.insert(CudaArchToString(CudaArch::HIPDefault));
|
||||
Archs.insert(OffloadArchToString(OffloadArch::HIPDefault));
|
||||
else if (Kind == Action::OFK_OpenMP)
|
||||
Archs.insert(StringRef());
|
||||
} else {
|
||||
|
@ -79,7 +79,8 @@ OffloadTargetInfo::OffloadTargetInfo(const StringRef Target,
|
||||
auto TargetFeatures = Target.split(':');
|
||||
auto TripleOrGPU = TargetFeatures.first.rsplit('-');
|
||||
|
||||
if (clang::StringToCudaArch(TripleOrGPU.second) != clang::CudaArch::UNKNOWN) {
|
||||
if (clang::StringToOffloadArch(TripleOrGPU.second) !=
|
||||
clang::OffloadArch::UNKNOWN) {
|
||||
auto KindTriple = TripleOrGPU.first.split('-');
|
||||
this->OffloadKind = KindTriple.first;
|
||||
|
||||
|
@ -87,7 +87,7 @@ llvm::opt::DerivedArgList *AMDGPUOpenMPToolChain::TranslateArgs(
|
||||
llvm::formatv("{0}", llvm::fmt_consume(ArchsOrErr.takeError()));
|
||||
getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
|
||||
<< llvm::Triple::getArchTypeName(getArch()) << ErrMsg << "-march";
|
||||
Arch = CudaArchToString(CudaArch::HIPDefault);
|
||||
Arch = OffloadArchToString(OffloadArch::HIPDefault);
|
||||
} else {
|
||||
Arch = Args.MakeArgString(ArchsOrErr->front());
|
||||
}
|
||||
|
@ -223,13 +223,13 @@ CudaInstallationDetector::CudaInstallationDetector(
|
||||
// CUDA-9+ uses single libdevice file for all GPU variants.
|
||||
std::string FilePath = LibDevicePath + "/libdevice.10.bc";
|
||||
if (FS.exists(FilePath)) {
|
||||
for (int Arch = (int)CudaArch::SM_30, E = (int)CudaArch::LAST; Arch < E;
|
||||
++Arch) {
|
||||
CudaArch GpuArch = static_cast<CudaArch>(Arch);
|
||||
if (!IsNVIDIAGpuArch(GpuArch))
|
||||
for (int Arch = (int)OffloadArch::SM_30, E = (int)OffloadArch::LAST;
|
||||
Arch < E; ++Arch) {
|
||||
OffloadArch OA = static_cast<OffloadArch>(Arch);
|
||||
if (!IsNVIDIAOffloadArch(OA))
|
||||
continue;
|
||||
std::string GpuArchName(CudaArchToString(GpuArch));
|
||||
LibDeviceMap[GpuArchName] = FilePath;
|
||||
std::string OffloadArchName(OffloadArchToString(OA));
|
||||
LibDeviceMap[OffloadArchName] = FilePath;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -312,17 +312,17 @@ void CudaInstallationDetector::AddCudaIncludeArgs(
|
||||
}
|
||||
|
||||
void CudaInstallationDetector::CheckCudaVersionSupportsArch(
|
||||
CudaArch Arch) const {
|
||||
if (Arch == CudaArch::UNKNOWN || Version == CudaVersion::UNKNOWN ||
|
||||
OffloadArch Arch) const {
|
||||
if (Arch == OffloadArch::UNKNOWN || Version == CudaVersion::UNKNOWN ||
|
||||
ArchsWithBadVersion[(int)Arch])
|
||||
return;
|
||||
|
||||
auto MinVersion = MinVersionForCudaArch(Arch);
|
||||
auto MaxVersion = MaxVersionForCudaArch(Arch);
|
||||
auto MinVersion = MinVersionForOffloadArch(Arch);
|
||||
auto MaxVersion = MaxVersionForOffloadArch(Arch);
|
||||
if (Version < MinVersion || Version > MaxVersion) {
|
||||
ArchsWithBadVersion[(int)Arch] = true;
|
||||
D.Diag(diag::err_drv_cuda_version_unsupported)
|
||||
<< CudaArchToString(Arch) << CudaVersionToString(MinVersion)
|
||||
<< OffloadArchToString(Arch) << CudaVersionToString(MinVersion)
|
||||
<< CudaVersionToString(MaxVersion) << InstallPath
|
||||
<< CudaVersionToString(Version);
|
||||
}
|
||||
@ -401,8 +401,8 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
}
|
||||
|
||||
// Obtain architecture from the action.
|
||||
CudaArch gpu_arch = StringToCudaArch(GPUArchName);
|
||||
assert(gpu_arch != CudaArch::UNKNOWN &&
|
||||
OffloadArch gpu_arch = StringToOffloadArch(GPUArchName);
|
||||
assert(gpu_arch != OffloadArch::UNKNOWN &&
|
||||
"Device action expected to have an architecture.");
|
||||
|
||||
// Check that our installation's ptxas supports gpu_arch.
|
||||
@ -457,7 +457,7 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
CmdArgs.push_back("-v");
|
||||
|
||||
CmdArgs.push_back("--gpu-name");
|
||||
CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
|
||||
CmdArgs.push_back(Args.MakeArgString(OffloadArchToString(gpu_arch)));
|
||||
CmdArgs.push_back("--output-file");
|
||||
std::string OutputFileName = TC.getInputFilename(Output);
|
||||
|
||||
@ -553,7 +553,7 @@ void NVPTX::FatBinary::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
const char *gpu_arch_str = A->getOffloadingArch();
|
||||
assert(gpu_arch_str &&
|
||||
"Device action expected to have associated a GPU architecture!");
|
||||
CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
|
||||
OffloadArch gpu_arch = StringToOffloadArch(gpu_arch_str);
|
||||
|
||||
if (II.getType() == types::TY_PP_Asm &&
|
||||
!shouldIncludePTX(Args, gpu_arch_str))
|
||||
@ -561,7 +561,7 @@ void NVPTX::FatBinary::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
// We need to pass an Arch of the form "sm_XX" for cubin files and
|
||||
// "compute_XX" for ptx.
|
||||
const char *Arch = (II.getType() == types::TY_PP_Asm)
|
||||
? CudaArchToVirtualArchString(gpu_arch)
|
||||
? OffloadArchToVirtualArchString(gpu_arch)
|
||||
: gpu_arch_str;
|
||||
CmdArgs.push_back(
|
||||
Args.MakeArgString(llvm::Twine("--image=profile=") + Arch +
|
||||
@ -758,7 +758,7 @@ NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
|
||||
|
||||
if (!DAL->hasArg(options::OPT_march_EQ) && OffloadKind != Action::OFK_None) {
|
||||
DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
|
||||
CudaArchToString(CudaArch::CudaDefault));
|
||||
OffloadArchToString(OffloadArch::CudaDefault));
|
||||
} else if (DAL->getLastArgValue(options::OPT_march_EQ) == "generic" &&
|
||||
OffloadKind == Action::OFK_None) {
|
||||
DAL->eraseArg(options::OPT_march_EQ);
|
||||
@ -938,7 +938,7 @@ void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
|
||||
!DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
|
||||
StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
|
||||
assert(!Arch.empty() && "Must have an explicit GPU arch.");
|
||||
CudaInstallation.CheckCudaVersionSupportsArch(StringToCudaArch(Arch));
|
||||
CudaInstallation.CheckCudaVersionSupportsArch(StringToOffloadArch(Arch));
|
||||
}
|
||||
CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
|
||||
}
|
||||
@ -984,7 +984,7 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
|
||||
llvm::formatv("{0}", llvm::fmt_consume(ArchsOrErr.takeError()));
|
||||
getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
|
||||
<< llvm::Triple::getArchTypeName(getArch()) << ErrMsg << "-march";
|
||||
Arch = CudaArchToString(CudaArch::CudaDefault);
|
||||
Arch = OffloadArchToString(OffloadArch::CudaDefault);
|
||||
} else {
|
||||
Arch = Args.MakeArgString(ArchsOrErr->front());
|
||||
}
|
||||
|
@ -37,7 +37,7 @@ private:
|
||||
|
||||
// CUDA architectures for which we have raised an error in
|
||||
// CheckCudaVersionSupportsArch.
|
||||
mutable std::bitset<(int)CudaArch::LAST> ArchsWithBadVersion;
|
||||
mutable std::bitset<(int)OffloadArch::LAST> ArchsWithBadVersion;
|
||||
|
||||
public:
|
||||
CudaInstallationDetector(const Driver &D, const llvm::Triple &HostTriple,
|
||||
@ -50,7 +50,7 @@ public:
|
||||
///
|
||||
/// If either Version or Arch is unknown, does not emit an error. Emits at
|
||||
/// most one error per Arch.
|
||||
void CheckCudaVersionSupportsArch(CudaArch Arch) const;
|
||||
void CheckCudaVersionSupportsArch(OffloadArch Arch) const;
|
||||
|
||||
/// Check whether we detected a valid Cuda install.
|
||||
bool isValid() const { return IsValid; }
|
||||
|
@ -5116,12 +5116,12 @@ bool Sema::CheckRegparmAttr(const ParsedAttr &AL, unsigned &numParams) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Helper to get CudaArch.
|
||||
static CudaArch getCudaArch(const TargetInfo &TI) {
|
||||
// Helper to get OffloadArch.
|
||||
static OffloadArch getOffloadArch(const TargetInfo &TI) {
|
||||
if (!TI.getTriple().isNVPTX())
|
||||
llvm_unreachable("getCudaArch is only valid for NVPTX triple");
|
||||
llvm_unreachable("getOffloadArch is only valid for NVPTX triple");
|
||||
auto &TO = TI.getTargetOpts();
|
||||
return StringToCudaArch(TO.CPU);
|
||||
return StringToOffloadArch(TO.CPU);
|
||||
}
|
||||
|
||||
// Checks whether an argument of launch_bounds attribute is
|
||||
@ -5181,10 +5181,10 @@ Sema::CreateLaunchBoundsAttr(const AttributeCommonInfo &CI, Expr *MaxThreads,
|
||||
|
||||
if (MaxBlocks) {
|
||||
// '.maxclusterrank' ptx directive requires .target sm_90 or higher.
|
||||
auto SM = getCudaArch(Context.getTargetInfo());
|
||||
if (SM == CudaArch::UNKNOWN || SM < CudaArch::SM_90) {
|
||||
auto SM = getOffloadArch(Context.getTargetInfo());
|
||||
if (SM == OffloadArch::UNKNOWN || SM < OffloadArch::SM_90) {
|
||||
Diag(MaxBlocks->getBeginLoc(), diag::warn_cuda_maxclusterrank_sm_90)
|
||||
<< CudaArchToString(SM) << CI << MaxBlocks->getSourceRange();
|
||||
<< OffloadArchToString(SM) << CI << MaxBlocks->getSourceRange();
|
||||
// Ignore it by setting MaxBlocks to null;
|
||||
MaxBlocks = nullptr;
|
||||
} else {
|
||||
|
Loading…
x
Reference in New Issue
Block a user