diff --git a/flang/include/flang/Common/api-attrs.h b/flang/include/flang/Common/api-attrs.h index 04ee307326ac..d73e60996bc8 100644 --- a/flang/include/flang/Common/api-attrs.h +++ b/flang/include/flang/Common/api-attrs.h @@ -156,4 +156,26 @@ #define RT_DIAG_DISABLE_CALL_HOST_FROM_DEVICE_WARN #endif /* !defined(__CUDACC__) */ +/* + * RT_DEVICE_NOINLINE may be used for non-performance critical + * functions that should not be inlined to minimize the amount + * of code that needs to be processed by the device compiler's + * optimizer. + */ +#ifndef __has_attribute +#define __has_attribute(x) 0 +#endif +#if __has_attribute(noinline) +#define RT_NOINLINE_ATTR __attribute__((noinline)) +#else +#define RT_NOINLINE_ATTR +#endif +#if (defined(__CUDACC__) || defined(__CUDA__)) && defined(__CUDA_ARCH__) +#define RT_DEVICE_NOINLINE RT_NOINLINE_ATTR +#define RT_DEVICE_NOINLINE_HOST_INLINE +#else +#define RT_DEVICE_NOINLINE +#define RT_DEVICE_NOINLINE_HOST_INLINE inline +#endif + #endif /* !FORTRAN_RUNTIME_API_ATTRS_H_ */ diff --git a/flang/include/flang/Common/visit.h b/flang/include/flang/Common/visit.h index d867338be7e0..ad66297650b0 100644 --- a/flang/include/flang/Common/visit.h +++ b/flang/include/flang/Common/visit.h @@ -30,7 +30,7 @@ namespace log2visit { template -inline RT_API_ATTRS RESULT Log2VisitHelper( +RT_DEVICE_NOINLINE_HOST_INLINE RT_API_ATTRS RESULT Log2VisitHelper( VISITOR &&visitor, std::size_t which, VARIANT &&...u) { if constexpr (LOW + 7 >= HIGH) { switch (which - LOW) { @@ -68,8 +68,9 @@ inline RT_API_ATTRS RESULT Log2VisitHelper( } template -inline RT_API_ATTRS auto visit(VISITOR &&visitor, VARIANT &&...u) - -> decltype(visitor(std::get<0>(std::forward(u))...)) { +RT_DEVICE_NOINLINE_HOST_INLINE RT_API_ATTRS auto +visit(VISITOR &&visitor, VARIANT &&...u) -> decltype(visitor(std::get<0>( + std::forward(u))...)) { using Result = decltype(visitor(std::get<0>(std::forward(u))...)); if constexpr (sizeof...(u) == 1) { static constexpr std::size_t high{ diff --git a/flang/runtime/terminator.h b/flang/runtime/terminator.h index 59a47ce93e7c..609f059d6e09 100644 --- a/flang/runtime/terminator.h +++ b/flang/runtime/terminator.h @@ -54,7 +54,7 @@ public: // to regular printf for the device compilation. // Try to keep the inline implementations as small as possible. template - [[noreturn]] RT_API_ATTRS const char *Crash( + [[noreturn]] RT_DEVICE_NOINLINE RT_API_ATTRS const char *Crash( const char *message, Args... args) const { #if !defined(RT_DEVICE_COMPILATION) // Invoke handler set up by the test harness.