[CUDA][NFC] CudaArch to OffloadArch rename (#97028)

Rename `CudaArch` to `OffloadArch` to better reflect its content and the
use.
Apply a similar rename to helpers handling the enum.
This commit is contained in:
Jakub Chlanda 2024-06-30 07:56:07 +02:00 committed by GitHub
parent 6b737c4446
commit ab20086422
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 297 additions and 298 deletions

View File

@ -52,7 +52,7 @@ const char *CudaVersionToString(CudaVersion V);
// Input is "Major.Minor"
CudaVersion CudaStringToVersion(const llvm::Twine &S);
enum class CudaArch {
enum class OffloadArch {
UNUSED,
UNKNOWN,
// TODO: Deprecate and remove GPU architectures older than sm_52.
@ -133,8 +133,8 @@ enum class CudaArch {
// public one.
LAST,
CudaDefault = CudaArch::SM_52,
HIPDefault = CudaArch::GFX906,
CudaDefault = OffloadArch::SM_52,
HIPDefault = OffloadArch::GFX906,
};
enum class CUDAFunctionTarget {
@ -145,26 +145,26 @@ enum class CUDAFunctionTarget {
InvalidTarget
};
static inline bool IsNVIDIAGpuArch(CudaArch A) {
return A >= CudaArch::SM_20 && A < CudaArch::GFX600;
static inline bool IsNVIDIAOffloadArch(OffloadArch A) {
return A >= OffloadArch::SM_20 && A < OffloadArch::GFX600;
}
static inline bool IsAMDGpuArch(CudaArch A) {
static inline bool IsAMDOffloadArch(OffloadArch A) {
// Generic processor model is for testing only.
return A >= CudaArch::GFX600 && A < CudaArch::Generic;
return A >= OffloadArch::GFX600 && A < OffloadArch::Generic;
}
const char *CudaArchToString(CudaArch A);
const char *CudaArchToVirtualArchString(CudaArch A);
const char *OffloadArchToString(OffloadArch A);
const char *OffloadArchToVirtualArchString(OffloadArch A);
// The input should have the form "sm_20".
CudaArch StringToCudaArch(llvm::StringRef S);
OffloadArch StringToOffloadArch(llvm::StringRef S);
/// Get the earliest CudaVersion that supports the given CudaArch.
CudaVersion MinVersionForCudaArch(CudaArch A);
/// Get the earliest CudaVersion that supports the given OffloadArch.
CudaVersion MinVersionForOffloadArch(OffloadArch A);
/// Get the latest CudaVersion that supports the given CudaArch.
CudaVersion MaxVersionForCudaArch(CudaArch A);
/// Get the latest CudaVersion that supports the given OffloadArch.
CudaVersion MaxVersionForOffloadArch(OffloadArch A);
// Various SDK-dependent features that affect CUDA compilation
enum class CudaFeature {

View File

@ -72,23 +72,21 @@ CudaVersion ToCudaVersion(llvm::VersionTuple Version) {
}
namespace {
struct CudaArchToStringMap {
CudaArch arch;
struct OffloadArchToStringMap {
OffloadArch arch;
const char *arch_name;
const char *virtual_arch_name;
};
} // namespace
#define SM2(sm, ca) \
{ CudaArch::SM_##sm, "sm_" #sm, ca }
#define SM2(sm, ca) {OffloadArch::SM_##sm, "sm_" #sm, ca}
#define SM(sm) SM2(sm, "compute_" #sm)
#define GFX(gpu) \
{ CudaArch::GFX##gpu, "gfx" #gpu, "compute_amdgcn" }
static const CudaArchToStringMap arch_names[] = {
#define GFX(gpu) {OffloadArch::GFX##gpu, "gfx" #gpu, "compute_amdgcn"}
static const OffloadArchToStringMap arch_names[] = {
// clang-format off
{CudaArch::UNUSED, "", ""},
{OffloadArch::UNUSED, "", ""},
SM2(20, "compute_20"), SM2(21, "compute_20"), // Fermi
SM(30), {CudaArch::SM_32_, "sm_32", "compute_32"}, SM(35), SM(37), // Kepler
SM(30), {OffloadArch::SM_32_, "sm_32", "compute_32"}, SM(35), SM(37), // Kepler
SM(50), SM(52), SM(53), // Maxwell
SM(60), SM(61), SM(62), // Pascal
SM(70), SM(72), // Volta
@ -112,7 +110,7 @@ static const CudaArchToStringMap arch_names[] = {
GFX(803), // gfx803
GFX(805), // gfx805
GFX(810), // gfx810
{CudaArch::GFX9_GENERIC, "gfx9-generic", "compute_amdgcn"},
{OffloadArch::GFX9_GENERIC, "gfx9-generic", "compute_amdgcn"},
GFX(900), // gfx900
GFX(902), // gfx902
GFX(904), // gfx903
@ -124,12 +122,12 @@ static const CudaArchToStringMap arch_names[] = {
GFX(940), // gfx940
GFX(941), // gfx941
GFX(942), // gfx942
{CudaArch::GFX10_1_GENERIC, "gfx10-1-generic", "compute_amdgcn"},
{OffloadArch::GFX10_1_GENERIC, "gfx10-1-generic", "compute_amdgcn"},
GFX(1010), // gfx1010
GFX(1011), // gfx1011
GFX(1012), // gfx1012
GFX(1013), // gfx1013
{CudaArch::GFX10_3_GENERIC, "gfx10-3-generic", "compute_amdgcn"},
{OffloadArch::GFX10_3_GENERIC, "gfx10-3-generic", "compute_amdgcn"},
GFX(1030), // gfx1030
GFX(1031), // gfx1031
GFX(1032), // gfx1032
@ -137,7 +135,7 @@ static const CudaArchToStringMap arch_names[] = {
GFX(1034), // gfx1034
GFX(1035), // gfx1035
GFX(1036), // gfx1036
{CudaArch::GFX11_GENERIC, "gfx11-generic", "compute_amdgcn"},
{OffloadArch::GFX11_GENERIC, "gfx11-generic", "compute_amdgcn"},
GFX(1100), // gfx1100
GFX(1101), // gfx1101
GFX(1102), // gfx1102
@ -145,105 +143,105 @@ static const CudaArchToStringMap arch_names[] = {
GFX(1150), // gfx1150
GFX(1151), // gfx1151
GFX(1152), // gfx1152
{CudaArch::GFX12_GENERIC, "gfx12-generic", "compute_amdgcn"},
{OffloadArch::GFX12_GENERIC, "gfx12-generic", "compute_amdgcn"},
GFX(1200), // gfx1200
GFX(1201), // gfx1201
{CudaArch::AMDGCNSPIRV, "amdgcnspirv", "compute_amdgcn"},
{CudaArch::Generic, "generic", ""},
{OffloadArch::AMDGCNSPIRV, "amdgcnspirv", "compute_amdgcn"},
{OffloadArch::Generic, "generic", ""},
// clang-format on
};
#undef SM
#undef SM2
#undef GFX
const char *CudaArchToString(CudaArch A) {
const char *OffloadArchToString(OffloadArch A) {
auto result = std::find_if(
std::begin(arch_names), std::end(arch_names),
[A](const CudaArchToStringMap &map) { return A == map.arch; });
[A](const OffloadArchToStringMap &map) { return A == map.arch; });
if (result == std::end(arch_names))
return "unknown";
return result->arch_name;
}
const char *CudaArchToVirtualArchString(CudaArch A) {
const char *OffloadArchToVirtualArchString(OffloadArch A) {
auto result = std::find_if(
std::begin(arch_names), std::end(arch_names),
[A](const CudaArchToStringMap &map) { return A == map.arch; });
[A](const OffloadArchToStringMap &map) { return A == map.arch; });
if (result == std::end(arch_names))
return "unknown";
return result->virtual_arch_name;
}
CudaArch StringToCudaArch(llvm::StringRef S) {
OffloadArch StringToOffloadArch(llvm::StringRef S) {
auto result = std::find_if(
std::begin(arch_names), std::end(arch_names),
[S](const CudaArchToStringMap &map) { return S == map.arch_name; });
[S](const OffloadArchToStringMap &map) { return S == map.arch_name; });
if (result == std::end(arch_names))
return CudaArch::UNKNOWN;
return OffloadArch::UNKNOWN;
return result->arch;
}
CudaVersion MinVersionForCudaArch(CudaArch A) {
if (A == CudaArch::UNKNOWN)
CudaVersion MinVersionForOffloadArch(OffloadArch A) {
if (A == OffloadArch::UNKNOWN)
return CudaVersion::UNKNOWN;
// AMD GPUs do not depend on CUDA versions.
if (IsAMDGpuArch(A))
if (IsAMDOffloadArch(A))
return CudaVersion::CUDA_70;
switch (A) {
case CudaArch::SM_20:
case CudaArch::SM_21:
case CudaArch::SM_30:
case CudaArch::SM_32_:
case CudaArch::SM_35:
case CudaArch::SM_37:
case CudaArch::SM_50:
case CudaArch::SM_52:
case CudaArch::SM_53:
case OffloadArch::SM_20:
case OffloadArch::SM_21:
case OffloadArch::SM_30:
case OffloadArch::SM_32_:
case OffloadArch::SM_35:
case OffloadArch::SM_37:
case OffloadArch::SM_50:
case OffloadArch::SM_52:
case OffloadArch::SM_53:
return CudaVersion::CUDA_70;
case CudaArch::SM_60:
case CudaArch::SM_61:
case CudaArch::SM_62:
case OffloadArch::SM_60:
case OffloadArch::SM_61:
case OffloadArch::SM_62:
return CudaVersion::CUDA_80;
case CudaArch::SM_70:
case OffloadArch::SM_70:
return CudaVersion::CUDA_90;
case CudaArch::SM_72:
case OffloadArch::SM_72:
return CudaVersion::CUDA_91;
case CudaArch::SM_75:
case OffloadArch::SM_75:
return CudaVersion::CUDA_100;
case CudaArch::SM_80:
case OffloadArch::SM_80:
return CudaVersion::CUDA_110;
case CudaArch::SM_86:
case OffloadArch::SM_86:
return CudaVersion::CUDA_111;
case CudaArch::SM_87:
case OffloadArch::SM_87:
return CudaVersion::CUDA_114;
case CudaArch::SM_89:
case CudaArch::SM_90:
case OffloadArch::SM_89:
case OffloadArch::SM_90:
return CudaVersion::CUDA_118;
case CudaArch::SM_90a:
case OffloadArch::SM_90a:
return CudaVersion::CUDA_120;
default:
llvm_unreachable("invalid enum");
}
}
CudaVersion MaxVersionForCudaArch(CudaArch A) {
CudaVersion MaxVersionForOffloadArch(OffloadArch A) {
// AMD GPUs do not depend on CUDA versions.
if (IsAMDGpuArch(A))
if (IsAMDOffloadArch(A))
return CudaVersion::NEW;
switch (A) {
case CudaArch::UNKNOWN:
case OffloadArch::UNKNOWN:
return CudaVersion::UNKNOWN;
case CudaArch::SM_20:
case CudaArch::SM_21:
case OffloadArch::SM_20:
case OffloadArch::SM_21:
return CudaVersion::CUDA_80;
case CudaArch::SM_30:
case CudaArch::SM_32_:
case OffloadArch::SM_30:
case OffloadArch::SM_32_:
return CudaVersion::CUDA_102;
case CudaArch::SM_35:
case CudaArch::SM_37:
case OffloadArch::SM_35:
case OffloadArch::SM_37:
return CudaVersion::CUDA_118;
default:
return CudaVersion::NEW;

View File

@ -59,7 +59,7 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
// Define available target features
// These must be defined in sorted order!
NoAsmVariants = true;
GPU = CudaArch::UNUSED;
GPU = OffloadArch::UNUSED;
// PTX supports f16 as a fundamental type.
HasLegalHalfType = true;
@ -175,117 +175,117 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__NVPTX__");
// Skip setting architecture dependent macros if undefined.
if (GPU == CudaArch::UNUSED && !HostTarget)
if (GPU == OffloadArch::UNUSED && !HostTarget)
return;
if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) {
// Set __CUDA_ARCH__ for the GPU specified.
std::string CUDAArchCode = [this] {
switch (GPU) {
case CudaArch::GFX600:
case CudaArch::GFX601:
case CudaArch::GFX602:
case CudaArch::GFX700:
case CudaArch::GFX701:
case CudaArch::GFX702:
case CudaArch::GFX703:
case CudaArch::GFX704:
case CudaArch::GFX705:
case CudaArch::GFX801:
case CudaArch::GFX802:
case CudaArch::GFX803:
case CudaArch::GFX805:
case CudaArch::GFX810:
case CudaArch::GFX9_GENERIC:
case CudaArch::GFX900:
case CudaArch::GFX902:
case CudaArch::GFX904:
case CudaArch::GFX906:
case CudaArch::GFX908:
case CudaArch::GFX909:
case CudaArch::GFX90a:
case CudaArch::GFX90c:
case CudaArch::GFX940:
case CudaArch::GFX941:
case CudaArch::GFX942:
case CudaArch::GFX10_1_GENERIC:
case CudaArch::GFX1010:
case CudaArch::GFX1011:
case CudaArch::GFX1012:
case CudaArch::GFX1013:
case CudaArch::GFX10_3_GENERIC:
case CudaArch::GFX1030:
case CudaArch::GFX1031:
case CudaArch::GFX1032:
case CudaArch::GFX1033:
case CudaArch::GFX1034:
case CudaArch::GFX1035:
case CudaArch::GFX1036:
case CudaArch::GFX11_GENERIC:
case CudaArch::GFX1100:
case CudaArch::GFX1101:
case CudaArch::GFX1102:
case CudaArch::GFX1103:
case CudaArch::GFX1150:
case CudaArch::GFX1151:
case CudaArch::GFX1152:
case CudaArch::GFX12_GENERIC:
case CudaArch::GFX1200:
case CudaArch::GFX1201:
case CudaArch::AMDGCNSPIRV:
case CudaArch::Generic:
case CudaArch::LAST:
case OffloadArch::GFX600:
case OffloadArch::GFX601:
case OffloadArch::GFX602:
case OffloadArch::GFX700:
case OffloadArch::GFX701:
case OffloadArch::GFX702:
case OffloadArch::GFX703:
case OffloadArch::GFX704:
case OffloadArch::GFX705:
case OffloadArch::GFX801:
case OffloadArch::GFX802:
case OffloadArch::GFX803:
case OffloadArch::GFX805:
case OffloadArch::GFX810:
case OffloadArch::GFX9_GENERIC:
case OffloadArch::GFX900:
case OffloadArch::GFX902:
case OffloadArch::GFX904:
case OffloadArch::GFX906:
case OffloadArch::GFX908:
case OffloadArch::GFX909:
case OffloadArch::GFX90a:
case OffloadArch::GFX90c:
case OffloadArch::GFX940:
case OffloadArch::GFX941:
case OffloadArch::GFX942:
case OffloadArch::GFX10_1_GENERIC:
case OffloadArch::GFX1010:
case OffloadArch::GFX1011:
case OffloadArch::GFX1012:
case OffloadArch::GFX1013:
case OffloadArch::GFX10_3_GENERIC:
case OffloadArch::GFX1030:
case OffloadArch::GFX1031:
case OffloadArch::GFX1032:
case OffloadArch::GFX1033:
case OffloadArch::GFX1034:
case OffloadArch::GFX1035:
case OffloadArch::GFX1036:
case OffloadArch::GFX11_GENERIC:
case OffloadArch::GFX1100:
case OffloadArch::GFX1101:
case OffloadArch::GFX1102:
case OffloadArch::GFX1103:
case OffloadArch::GFX1150:
case OffloadArch::GFX1151:
case OffloadArch::GFX1152:
case OffloadArch::GFX12_GENERIC:
case OffloadArch::GFX1200:
case OffloadArch::GFX1201:
case OffloadArch::AMDGCNSPIRV:
case OffloadArch::Generic:
case OffloadArch::LAST:
break;
case CudaArch::UNKNOWN:
case OffloadArch::UNKNOWN:
assert(false && "No GPU arch when compiling CUDA device code.");
return "";
case CudaArch::UNUSED:
case CudaArch::SM_20:
case OffloadArch::UNUSED:
case OffloadArch::SM_20:
return "200";
case CudaArch::SM_21:
case OffloadArch::SM_21:
return "210";
case CudaArch::SM_30:
case OffloadArch::SM_30:
return "300";
case CudaArch::SM_32_:
case OffloadArch::SM_32_:
return "320";
case CudaArch::SM_35:
case OffloadArch::SM_35:
return "350";
case CudaArch::SM_37:
case OffloadArch::SM_37:
return "370";
case CudaArch::SM_50:
case OffloadArch::SM_50:
return "500";
case CudaArch::SM_52:
case OffloadArch::SM_52:
return "520";
case CudaArch::SM_53:
case OffloadArch::SM_53:
return "530";
case CudaArch::SM_60:
case OffloadArch::SM_60:
return "600";
case CudaArch::SM_61:
case OffloadArch::SM_61:
return "610";
case CudaArch::SM_62:
case OffloadArch::SM_62:
return "620";
case CudaArch::SM_70:
case OffloadArch::SM_70:
return "700";
case CudaArch::SM_72:
case OffloadArch::SM_72:
return "720";
case CudaArch::SM_75:
case OffloadArch::SM_75:
return "750";
case CudaArch::SM_80:
case OffloadArch::SM_80:
return "800";
case CudaArch::SM_86:
case OffloadArch::SM_86:
return "860";
case CudaArch::SM_87:
case OffloadArch::SM_87:
return "870";
case CudaArch::SM_89:
case OffloadArch::SM_89:
return "890";
case CudaArch::SM_90:
case CudaArch::SM_90a:
case OffloadArch::SM_90:
case OffloadArch::SM_90a:
return "900";
}
llvm_unreachable("unhandled CudaArch");
llvm_unreachable("unhandled OffloadArch");
}();
Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
if (GPU == CudaArch::SM_90a)
if (GPU == OffloadArch::SM_90a)
Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1");
}
}

View File

@ -62,7 +62,7 @@ static const int NVPTXDWARFAddrSpaceMap[] = {
class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo {
static const char *const GCCRegNames[];
CudaArch GPU;
OffloadArch GPU;
uint32_t PTXVersion;
std::unique_ptr<TargetInfo> HostTarget;
@ -79,8 +79,8 @@ public:
initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags,
StringRef CPU,
const std::vector<std::string> &FeaturesVec) const override {
if (GPU != CudaArch::UNUSED)
Features[CudaArchToString(GPU)] = true;
if (GPU != OffloadArch::UNUSED)
Features[OffloadArchToString(GPU)] = true;
Features["ptx" + std::to_string(PTXVersion)] = true;
return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec);
}
@ -121,18 +121,18 @@ public:
}
bool isValidCPUName(StringRef Name) const override {
return StringToCudaArch(Name) != CudaArch::UNKNOWN;
return StringToOffloadArch(Name) != OffloadArch::UNKNOWN;
}
void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override {
for (int i = static_cast<int>(CudaArch::SM_20);
i < static_cast<int>(CudaArch::Generic); ++i)
Values.emplace_back(CudaArchToString(static_cast<CudaArch>(i)));
for (int i = static_cast<int>(OffloadArch::SM_20);
i < static_cast<int>(OffloadArch::Generic); ++i)
Values.emplace_back(OffloadArchToString(static_cast<OffloadArch>(i)));
}
bool setCPU(const std::string &Name) override {
GPU = StringToCudaArch(Name);
return GPU != CudaArch::UNKNOWN;
GPU = StringToOffloadArch(Name);
return GPU != OffloadArch::UNKNOWN;
}
void setSupportedOpenCLOpts() override {
@ -183,7 +183,7 @@ public:
bool hasBitIntType() const override { return true; }
bool hasBFloat16Type() const override { return true; }
CudaArch getGPU() const { return GPU; }
OffloadArch getGPU() const { return GPU; }
};
} // namespace targets
} // namespace clang

View File

@ -2227,113 +2227,112 @@ bool CGOpenMPRuntimeGPU::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
return false;
}
// Get current CudaArch and ignore any unknown values
static CudaArch getCudaArch(CodeGenModule &CGM) {
// Get current OffloadArch and ignore any unknown values
static OffloadArch getOffloadArch(CodeGenModule &CGM) {
if (!CGM.getTarget().hasFeature("ptx"))
return CudaArch::UNKNOWN;
return OffloadArch::UNKNOWN;
for (const auto &Feature : CGM.getTarget().getTargetOpts().FeatureMap) {
if (Feature.getValue()) {
CudaArch Arch = StringToCudaArch(Feature.getKey());
if (Arch != CudaArch::UNKNOWN)
OffloadArch Arch = StringToOffloadArch(Feature.getKey());
if (Arch != OffloadArch::UNKNOWN)
return Arch;
}
}
return CudaArch::UNKNOWN;
return OffloadArch::UNKNOWN;
}
/// Check to see if target architecture supports unified addressing which is
/// a restriction for OpenMP requires clause "unified_shared_memory".
void CGOpenMPRuntimeGPU::processRequiresDirective(
const OMPRequiresDecl *D) {
void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
for (const OMPClause *Clause : D->clauselists()) {
if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
CudaArch Arch = getCudaArch(CGM);
OffloadArch Arch = getOffloadArch(CGM);
switch (Arch) {
case CudaArch::SM_20:
case CudaArch::SM_21:
case CudaArch::SM_30:
case CudaArch::SM_32_:
case CudaArch::SM_35:
case CudaArch::SM_37:
case CudaArch::SM_50:
case CudaArch::SM_52:
case CudaArch::SM_53: {
case OffloadArch::SM_20:
case OffloadArch::SM_21:
case OffloadArch::SM_30:
case OffloadArch::SM_32_:
case OffloadArch::SM_35:
case OffloadArch::SM_37:
case OffloadArch::SM_50:
case OffloadArch::SM_52:
case OffloadArch::SM_53: {
SmallString<256> Buffer;
llvm::raw_svector_ostream Out(Buffer);
Out << "Target architecture " << CudaArchToString(Arch)
Out << "Target architecture " << OffloadArchToString(Arch)
<< " does not support unified addressing";
CGM.Error(Clause->getBeginLoc(), Out.str());
return;
}
case CudaArch::SM_60:
case CudaArch::SM_61:
case CudaArch::SM_62:
case CudaArch::SM_70:
case CudaArch::SM_72:
case CudaArch::SM_75:
case CudaArch::SM_80:
case CudaArch::SM_86:
case CudaArch::SM_87:
case CudaArch::SM_89:
case CudaArch::SM_90:
case CudaArch::SM_90a:
case CudaArch::GFX600:
case CudaArch::GFX601:
case CudaArch::GFX602:
case CudaArch::GFX700:
case CudaArch::GFX701:
case CudaArch::GFX702:
case CudaArch::GFX703:
case CudaArch::GFX704:
case CudaArch::GFX705:
case CudaArch::GFX801:
case CudaArch::GFX802:
case CudaArch::GFX803:
case CudaArch::GFX805:
case CudaArch::GFX810:
case CudaArch::GFX9_GENERIC:
case CudaArch::GFX900:
case CudaArch::GFX902:
case CudaArch::GFX904:
case CudaArch::GFX906:
case CudaArch::GFX908:
case CudaArch::GFX909:
case CudaArch::GFX90a:
case CudaArch::GFX90c:
case CudaArch::GFX940:
case CudaArch::GFX941:
case CudaArch::GFX942:
case CudaArch::GFX10_1_GENERIC:
case CudaArch::GFX1010:
case CudaArch::GFX1011:
case CudaArch::GFX1012:
case CudaArch::GFX1013:
case CudaArch::GFX10_3_GENERIC:
case CudaArch::GFX1030:
case CudaArch::GFX1031:
case CudaArch::GFX1032:
case CudaArch::GFX1033:
case CudaArch::GFX1034:
case CudaArch::GFX1035:
case CudaArch::GFX1036:
case CudaArch::GFX11_GENERIC:
case CudaArch::GFX1100:
case CudaArch::GFX1101:
case CudaArch::GFX1102:
case CudaArch::GFX1103:
case CudaArch::GFX1150:
case CudaArch::GFX1151:
case CudaArch::GFX1152:
case CudaArch::GFX12_GENERIC:
case CudaArch::GFX1200:
case CudaArch::GFX1201:
case CudaArch::AMDGCNSPIRV:
case CudaArch::Generic:
case CudaArch::UNUSED:
case CudaArch::UNKNOWN:
case OffloadArch::SM_60:
case OffloadArch::SM_61:
case OffloadArch::SM_62:
case OffloadArch::SM_70:
case OffloadArch::SM_72:
case OffloadArch::SM_75:
case OffloadArch::SM_80:
case OffloadArch::SM_86:
case OffloadArch::SM_87:
case OffloadArch::SM_89:
case OffloadArch::SM_90:
case OffloadArch::SM_90a:
case OffloadArch::GFX600:
case OffloadArch::GFX601:
case OffloadArch::GFX602:
case OffloadArch::GFX700:
case OffloadArch::GFX701:
case OffloadArch::GFX702:
case OffloadArch::GFX703:
case OffloadArch::GFX704:
case OffloadArch::GFX705:
case OffloadArch::GFX801:
case OffloadArch::GFX802:
case OffloadArch::GFX803:
case OffloadArch::GFX805:
case OffloadArch::GFX810:
case OffloadArch::GFX9_GENERIC:
case OffloadArch::GFX900:
case OffloadArch::GFX902:
case OffloadArch::GFX904:
case OffloadArch::GFX906:
case OffloadArch::GFX908:
case OffloadArch::GFX909:
case OffloadArch::GFX90a:
case OffloadArch::GFX90c:
case OffloadArch::GFX940:
case OffloadArch::GFX941:
case OffloadArch::GFX942:
case OffloadArch::GFX10_1_GENERIC:
case OffloadArch::GFX1010:
case OffloadArch::GFX1011:
case OffloadArch::GFX1012:
case OffloadArch::GFX1013:
case OffloadArch::GFX10_3_GENERIC:
case OffloadArch::GFX1030:
case OffloadArch::GFX1031:
case OffloadArch::GFX1032:
case OffloadArch::GFX1033:
case OffloadArch::GFX1034:
case OffloadArch::GFX1035:
case OffloadArch::GFX1036:
case OffloadArch::GFX11_GENERIC:
case OffloadArch::GFX1100:
case OffloadArch::GFX1101:
case OffloadArch::GFX1102:
case OffloadArch::GFX1103:
case OffloadArch::GFX1150:
case OffloadArch::GFX1151:
case OffloadArch::GFX1152:
case OffloadArch::GFX12_GENERIC:
case OffloadArch::GFX1200:
case OffloadArch::GFX1201:
case OffloadArch::AMDGCNSPIRV:
case OffloadArch::Generic:
case OffloadArch::UNUSED:
case OffloadArch::UNKNOWN:
break;
case CudaArch::LAST:
llvm_unreachable("Unexpected Cuda arch.");
case OffloadArch::LAST:
llvm_unreachable("Unexpected GPU arch.");
}
}
}

View File

@ -899,11 +899,11 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
}
for (StringRef Arch : Archs) {
if (NVPTXTriple && IsNVIDIAGpuArch(StringToCudaArch(
if (NVPTXTriple && IsNVIDIAOffloadArch(StringToOffloadArch(
getProcessorFromTargetID(*NVPTXTriple, Arch)))) {
DerivedArchs[NVPTXTriple->getTriple()].insert(Arch);
} else if (AMDTriple &&
IsAMDGpuArch(StringToCudaArch(
IsAMDOffloadArch(StringToOffloadArch(
getProcessorFromTargetID(*AMDTriple, Arch)))) {
DerivedArchs[AMDTriple->getTriple()].insert(Arch);
} else {
@ -2948,7 +2948,7 @@ class OffloadingActionBuilder final {
struct TargetID {
/// Target ID string which is persistent throughout the compilation.
const char *ID;
TargetID(CudaArch Arch) { ID = CudaArchToString(Arch); }
TargetID(OffloadArch Arch) { ID = OffloadArchToString(Arch); }
TargetID(const char *ID) : ID(ID) {}
operator const char *() { return ID; }
operator StringRef() { return StringRef(ID); }
@ -2969,7 +2969,7 @@ class OffloadingActionBuilder final {
bool Relocatable = false;
/// Default GPU architecture if there's no one specified.
CudaArch DefaultCudaArch = CudaArch::UNKNOWN;
OffloadArch DefaultOffloadArch = OffloadArch::UNKNOWN;
/// Method to generate compilation unit ID specified by option
/// '-fuse-cuid='.
@ -3098,7 +3098,7 @@ class OffloadingActionBuilder final {
// If we have a fat binary, add it to the list.
if (CudaFatBinary) {
AddTopLevel(CudaFatBinary, CudaArch::UNUSED);
AddTopLevel(CudaFatBinary, OffloadArch::UNUSED);
CudaDeviceActions.clear();
CudaFatBinary = nullptr;
return;
@ -3243,11 +3243,11 @@ class OffloadingActionBuilder final {
if (GpuArchList.empty()) {
if (ToolChains.front()->getTriple().isSPIRV()) {
if (ToolChains.front()->getTriple().getVendor() == llvm::Triple::AMD)
GpuArchList.push_back(CudaArch::AMDGCNSPIRV);
GpuArchList.push_back(OffloadArch::AMDGCNSPIRV);
else
GpuArchList.push_back(CudaArch::Generic);
GpuArchList.push_back(OffloadArch::Generic);
} else {
GpuArchList.push_back(DefaultCudaArch);
GpuArchList.push_back(DefaultOffloadArch);
}
}
@ -3262,16 +3262,16 @@ class OffloadingActionBuilder final {
CudaActionBuilder(Compilation &C, DerivedArgList &Args,
const Driver::InputList &Inputs)
: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_Cuda) {
DefaultCudaArch = CudaArch::CudaDefault;
DefaultOffloadArch = OffloadArch::CudaDefault;
}
StringRef getCanonicalOffloadArch(StringRef ArchStr) override {
CudaArch Arch = StringToCudaArch(ArchStr);
if (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch)) {
OffloadArch Arch = StringToOffloadArch(ArchStr);
if (Arch == OffloadArch::UNKNOWN || !IsNVIDIAOffloadArch(Arch)) {
C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
return StringRef();
}
return CudaArchToString(Arch);
return OffloadArchToString(Arch);
}
std::optional<std::pair<llvm::StringRef, llvm::StringRef>>
@ -3401,7 +3401,7 @@ class OffloadingActionBuilder final {
const Driver::InputList &Inputs)
: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP) {
DefaultCudaArch = CudaArch::HIPDefault;
DefaultOffloadArch = OffloadArch::HIPDefault;
if (Args.hasArg(options::OPT_fhip_emit_relocatable,
options::OPT_fno_hip_emit_relocatable)) {
@ -4408,23 +4408,24 @@ static StringRef getCanonicalArchString(Compilation &C,
bool SuppressError = false) {
// Lookup the CUDA / HIP architecture string. Only report an error if we were
// expecting the triple to be only NVPTX / AMDGPU.
CudaArch Arch = StringToCudaArch(getProcessorFromTargetID(Triple, ArchStr));
OffloadArch Arch =
StringToOffloadArch(getProcessorFromTargetID(Triple, ArchStr));
if (!SuppressError && Triple.isNVPTX() &&
(Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch))) {
(Arch == OffloadArch::UNKNOWN || !IsNVIDIAOffloadArch(Arch))) {
C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch)
<< "CUDA" << ArchStr;
return StringRef();
} else if (!SuppressError && Triple.isAMDGPU() &&
(Arch == CudaArch::UNKNOWN || !IsAMDGpuArch(Arch))) {
(Arch == OffloadArch::UNKNOWN || !IsAMDOffloadArch(Arch))) {
C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch)
<< "HIP" << ArchStr;
return StringRef();
}
if (IsNVIDIAGpuArch(Arch))
return Args.MakeArgStringRef(CudaArchToString(Arch));
if (IsNVIDIAOffloadArch(Arch))
return Args.MakeArgStringRef(OffloadArchToString(Arch));
if (IsAMDGpuArch(Arch)) {
if (IsAMDOffloadArch(Arch)) {
llvm::StringMap<bool> Features;
auto HIPTriple = getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs());
if (!HIPTriple)
@ -4545,9 +4546,9 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
if (Archs.empty()) {
if (Kind == Action::OFK_Cuda)
Archs.insert(CudaArchToString(CudaArch::CudaDefault));
Archs.insert(OffloadArchToString(OffloadArch::CudaDefault));
else if (Kind == Action::OFK_HIP)
Archs.insert(CudaArchToString(CudaArch::HIPDefault));
Archs.insert(OffloadArchToString(OffloadArch::HIPDefault));
else if (Kind == Action::OFK_OpenMP)
Archs.insert(StringRef());
} else {

View File

@ -79,7 +79,8 @@ OffloadTargetInfo::OffloadTargetInfo(const StringRef Target,
auto TargetFeatures = Target.split(':');
auto TripleOrGPU = TargetFeatures.first.rsplit('-');
if (clang::StringToCudaArch(TripleOrGPU.second) != clang::CudaArch::UNKNOWN) {
if (clang::StringToOffloadArch(TripleOrGPU.second) !=
clang::OffloadArch::UNKNOWN) {
auto KindTriple = TripleOrGPU.first.split('-');
this->OffloadKind = KindTriple.first;

View File

@ -87,7 +87,7 @@ llvm::opt::DerivedArgList *AMDGPUOpenMPToolChain::TranslateArgs(
llvm::formatv("{0}", llvm::fmt_consume(ArchsOrErr.takeError()));
getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
<< llvm::Triple::getArchTypeName(getArch()) << ErrMsg << "-march";
Arch = CudaArchToString(CudaArch::HIPDefault);
Arch = OffloadArchToString(OffloadArch::HIPDefault);
} else {
Arch = Args.MakeArgString(ArchsOrErr->front());
}

View File

@ -223,13 +223,13 @@ CudaInstallationDetector::CudaInstallationDetector(
// CUDA-9+ uses single libdevice file for all GPU variants.
std::string FilePath = LibDevicePath + "/libdevice.10.bc";
if (FS.exists(FilePath)) {
for (int Arch = (int)CudaArch::SM_30, E = (int)CudaArch::LAST; Arch < E;
++Arch) {
CudaArch GpuArch = static_cast<CudaArch>(Arch);
if (!IsNVIDIAGpuArch(GpuArch))
for (int Arch = (int)OffloadArch::SM_30, E = (int)OffloadArch::LAST;
Arch < E; ++Arch) {
OffloadArch OA = static_cast<OffloadArch>(Arch);
if (!IsNVIDIAOffloadArch(OA))
continue;
std::string GpuArchName(CudaArchToString(GpuArch));
LibDeviceMap[GpuArchName] = FilePath;
std::string OffloadArchName(OffloadArchToString(OA));
LibDeviceMap[OffloadArchName] = FilePath;
}
}
} else {
@ -312,17 +312,17 @@ void CudaInstallationDetector::AddCudaIncludeArgs(
}
void CudaInstallationDetector::CheckCudaVersionSupportsArch(
CudaArch Arch) const {
if (Arch == CudaArch::UNKNOWN || Version == CudaVersion::UNKNOWN ||
OffloadArch Arch) const {
if (Arch == OffloadArch::UNKNOWN || Version == CudaVersion::UNKNOWN ||
ArchsWithBadVersion[(int)Arch])
return;
auto MinVersion = MinVersionForCudaArch(Arch);
auto MaxVersion = MaxVersionForCudaArch(Arch);
auto MinVersion = MinVersionForOffloadArch(Arch);
auto MaxVersion = MaxVersionForOffloadArch(Arch);
if (Version < MinVersion || Version > MaxVersion) {
ArchsWithBadVersion[(int)Arch] = true;
D.Diag(diag::err_drv_cuda_version_unsupported)
<< CudaArchToString(Arch) << CudaVersionToString(MinVersion)
<< OffloadArchToString(Arch) << CudaVersionToString(MinVersion)
<< CudaVersionToString(MaxVersion) << InstallPath
<< CudaVersionToString(Version);
}
@ -401,8 +401,8 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
}
// Obtain architecture from the action.
CudaArch gpu_arch = StringToCudaArch(GPUArchName);
assert(gpu_arch != CudaArch::UNKNOWN &&
OffloadArch gpu_arch = StringToOffloadArch(GPUArchName);
assert(gpu_arch != OffloadArch::UNKNOWN &&
"Device action expected to have an architecture.");
// Check that our installation's ptxas supports gpu_arch.
@ -457,7 +457,7 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("-v");
CmdArgs.push_back("--gpu-name");
CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
CmdArgs.push_back(Args.MakeArgString(OffloadArchToString(gpu_arch)));
CmdArgs.push_back("--output-file");
std::string OutputFileName = TC.getInputFilename(Output);
@ -553,7 +553,7 @@ void NVPTX::FatBinary::ConstructJob(Compilation &C, const JobAction &JA,
const char *gpu_arch_str = A->getOffloadingArch();
assert(gpu_arch_str &&
"Device action expected to have associated a GPU architecture!");
CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
OffloadArch gpu_arch = StringToOffloadArch(gpu_arch_str);
if (II.getType() == types::TY_PP_Asm &&
!shouldIncludePTX(Args, gpu_arch_str))
@ -561,7 +561,7 @@ void NVPTX::FatBinary::ConstructJob(Compilation &C, const JobAction &JA,
// We need to pass an Arch of the form "sm_XX" for cubin files and
// "compute_XX" for ptx.
const char *Arch = (II.getType() == types::TY_PP_Asm)
? CudaArchToVirtualArchString(gpu_arch)
? OffloadArchToVirtualArchString(gpu_arch)
: gpu_arch_str;
CmdArgs.push_back(
Args.MakeArgString(llvm::Twine("--image=profile=") + Arch +
@ -758,7 +758,7 @@ NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
if (!DAL->hasArg(options::OPT_march_EQ) && OffloadKind != Action::OFK_None) {
DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
CudaArchToString(CudaArch::CudaDefault));
OffloadArchToString(OffloadArch::CudaDefault));
} else if (DAL->getLastArgValue(options::OPT_march_EQ) == "generic" &&
OffloadKind == Action::OFK_None) {
DAL->eraseArg(options::OPT_march_EQ);
@ -938,7 +938,7 @@ void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
!DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
assert(!Arch.empty() && "Must have an explicit GPU arch.");
CudaInstallation.CheckCudaVersionSupportsArch(StringToCudaArch(Arch));
CudaInstallation.CheckCudaVersionSupportsArch(StringToOffloadArch(Arch));
}
CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
}
@ -984,7 +984,7 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
llvm::formatv("{0}", llvm::fmt_consume(ArchsOrErr.takeError()));
getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
<< llvm::Triple::getArchTypeName(getArch()) << ErrMsg << "-march";
Arch = CudaArchToString(CudaArch::CudaDefault);
Arch = OffloadArchToString(OffloadArch::CudaDefault);
} else {
Arch = Args.MakeArgString(ArchsOrErr->front());
}

View File

@ -37,7 +37,7 @@ private:
// CUDA architectures for which we have raised an error in
// CheckCudaVersionSupportsArch.
mutable std::bitset<(int)CudaArch::LAST> ArchsWithBadVersion;
mutable std::bitset<(int)OffloadArch::LAST> ArchsWithBadVersion;
public:
CudaInstallationDetector(const Driver &D, const llvm::Triple &HostTriple,
@ -50,7 +50,7 @@ public:
///
/// If either Version or Arch is unknown, does not emit an error. Emits at
/// most one error per Arch.
void CheckCudaVersionSupportsArch(CudaArch Arch) const;
void CheckCudaVersionSupportsArch(OffloadArch Arch) const;
/// Check whether we detected a valid Cuda install.
bool isValid() const { return IsValid; }

View File

@ -5116,12 +5116,12 @@ bool Sema::CheckRegparmAttr(const ParsedAttr &AL, unsigned &numParams) {
return false;
}
// Helper to get CudaArch.
static CudaArch getCudaArch(const TargetInfo &TI) {
// Helper to get OffloadArch.
static OffloadArch getOffloadArch(const TargetInfo &TI) {
if (!TI.getTriple().isNVPTX())
llvm_unreachable("getCudaArch is only valid for NVPTX triple");
llvm_unreachable("getOffloadArch is only valid for NVPTX triple");
auto &TO = TI.getTargetOpts();
return StringToCudaArch(TO.CPU);
return StringToOffloadArch(TO.CPU);
}
// Checks whether an argument of launch_bounds attribute is
@ -5181,10 +5181,10 @@ Sema::CreateLaunchBoundsAttr(const AttributeCommonInfo &CI, Expr *MaxThreads,
if (MaxBlocks) {
// '.maxclusterrank' ptx directive requires .target sm_90 or higher.
auto SM = getCudaArch(Context.getTargetInfo());
if (SM == CudaArch::UNKNOWN || SM < CudaArch::SM_90) {
auto SM = getOffloadArch(Context.getTargetInfo());
if (SM == OffloadArch::UNKNOWN || SM < OffloadArch::SM_90) {
Diag(MaxBlocks->getBeginLoc(), diag::warn_cuda_maxclusterrank_sm_90)
<< CudaArchToString(SM) << CI << MaxBlocks->getSourceRange();
<< OffloadArchToString(SM) << CI << MaxBlocks->getSourceRange();
// Ignore it by setting MaxBlocks to null;
MaxBlocks = nullptr;
} else {