[flang][runtime] Added noinline for some functions in device build. (#93128)
This helps reducing the compilation time spent by the device compiler optimizer and then the code generator. Since F18 RT is going to be distributed as LLVM BC for some targets (same way as LLVM liboffload device library is distributed) and linked to the user offload code, the compilation time of the produced LLVM BC will be critical.
This commit is contained in:
parent
42b5daba50
commit
208544fc70
@ -156,4 +156,26 @@
|
||||
#define RT_DIAG_DISABLE_CALL_HOST_FROM_DEVICE_WARN
|
||||
#endif /* !defined(__CUDACC__) */
|
||||
|
||||
/*
|
||||
* RT_DEVICE_NOINLINE may be used for non-performance critical
|
||||
* functions that should not be inlined to minimize the amount
|
||||
* of code that needs to be processed by the device compiler's
|
||||
* optimizer.
|
||||
*/
|
||||
#ifndef __has_attribute
|
||||
#define __has_attribute(x) 0
|
||||
#endif
|
||||
#if __has_attribute(noinline)
|
||||
#define RT_NOINLINE_ATTR __attribute__((noinline))
|
||||
#else
|
||||
#define RT_NOINLINE_ATTR
|
||||
#endif
|
||||
#if (defined(__CUDACC__) || defined(__CUDA__)) && defined(__CUDA_ARCH__)
|
||||
#define RT_DEVICE_NOINLINE RT_NOINLINE_ATTR
|
||||
#define RT_DEVICE_NOINLINE_HOST_INLINE
|
||||
#else
|
||||
#define RT_DEVICE_NOINLINE
|
||||
#define RT_DEVICE_NOINLINE_HOST_INLINE inline
|
||||
#endif
|
||||
|
||||
#endif /* !FORTRAN_RUNTIME_API_ATTRS_H_ */
|
||||
|
@ -30,7 +30,7 @@ namespace log2visit {
|
||||
|
||||
template <std::size_t LOW, std::size_t HIGH, typename RESULT, typename VISITOR,
|
||||
typename... VARIANT>
|
||||
inline RT_API_ATTRS RESULT Log2VisitHelper(
|
||||
RT_DEVICE_NOINLINE_HOST_INLINE RT_API_ATTRS RESULT Log2VisitHelper(
|
||||
VISITOR &&visitor, std::size_t which, VARIANT &&...u) {
|
||||
if constexpr (LOW + 7 >= HIGH) {
|
||||
switch (which - LOW) {
|
||||
@ -68,8 +68,9 @@ inline RT_API_ATTRS RESULT Log2VisitHelper(
|
||||
}
|
||||
|
||||
template <typename VISITOR, typename... VARIANT>
|
||||
inline RT_API_ATTRS auto visit(VISITOR &&visitor, VARIANT &&...u)
|
||||
-> decltype(visitor(std::get<0>(std::forward<VARIANT>(u))...)) {
|
||||
RT_DEVICE_NOINLINE_HOST_INLINE RT_API_ATTRS auto
|
||||
visit(VISITOR &&visitor, VARIANT &&...u) -> decltype(visitor(std::get<0>(
|
||||
std::forward<VARIANT>(u))...)) {
|
||||
using Result = decltype(visitor(std::get<0>(std::forward<VARIANT>(u))...));
|
||||
if constexpr (sizeof...(u) == 1) {
|
||||
static constexpr std::size_t high{
|
||||
|
@ -54,7 +54,7 @@ public:
|
||||
// to regular printf for the device compilation.
|
||||
// Try to keep the inline implementations as small as possible.
|
||||
template <typename... Args>
|
||||
[[noreturn]] RT_API_ATTRS const char *Crash(
|
||||
[[noreturn]] RT_DEVICE_NOINLINE RT_API_ATTRS const char *Crash(
|
||||
const char *message, Args... args) const {
|
||||
#if !defined(RT_DEVICE_COMPILATION)
|
||||
// Invoke handler set up by the test harness.
|
||||
|
Loading…
x
Reference in New Issue
Block a user