amdgcn: Enable SIMD vectorization of math functions
Calls to vectorized versions of routines in the math library will now be inserted when vectorizing code containing supported math functions. 2023-03-02 Kwok Cheung Yeung <kcy@codesourcery.com> Paul-Antoine Arras <pa@codesourcery.com> gcc/ * builtins.cc (mathfn_built_in_explicit): New. * config/gcn/gcn.cc: Include case-cfn-macros.h. (mathfn_built_in_explicit): Add prototype. (gcn_vectorize_builtin_vectorized_function): New. (gcn_libc_has_function): New. (TARGET_LIBC_HAS_FUNCTION): Define. (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Define. gcc/testsuite/ * gcc.target/gcn/simd-math-1.c: New testcase. * gcc.target/gcn/simd-math-2.c: New testcase. libgomp/ * testsuite/libgomp.c/simd-math-1.c: New testcase.
This commit is contained in:
@@ -2089,6 +2089,14 @@ mathfn_built_in (tree type, combined_fn fn)
|
||||
return mathfn_built_in_1 (type, fn, /*implicit=*/ 1);
|
||||
}
|
||||
|
||||
/* Like mathfn_built_in_1, but always use the explicit array. */
|
||||
|
||||
tree
|
||||
mathfn_built_in_explicit (tree type, combined_fn fn)
|
||||
{
|
||||
return mathfn_built_in_1 (type, fn, /*implicit=*/ 0);
|
||||
}
|
||||
|
||||
/* Like mathfn_built_in_1, but take a built_in_function and
|
||||
always use the implicit array. */
|
||||
|
||||
|
||||
@@ -53,6 +53,7 @@
|
||||
#include "dwarf2.h"
|
||||
#include "gimple.h"
|
||||
#include "cgraph.h"
|
||||
#include "case-cfn-macros.h"
|
||||
|
||||
/* This file should be included last. */
|
||||
#include "target-def.h"
|
||||
@@ -5240,6 +5241,110 @@ gcn_simd_clone_usable (struct cgraph_node *ARG_UNUSED (node))
|
||||
return 0;
|
||||
}
|
||||
|
||||
tree mathfn_built_in_explicit (tree, combined_fn);
|
||||
|
||||
/* Implement TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION.
|
||||
Return the function declaration of the vectorized version of the builtin
|
||||
in the math library if available. */
|
||||
|
||||
tree
|
||||
gcn_vectorize_builtin_vectorized_function (unsigned int fn, tree type_out,
|
||||
tree type_in)
|
||||
{
|
||||
if (TREE_CODE (type_out) != VECTOR_TYPE
|
||||
|| TREE_CODE (type_in) != VECTOR_TYPE)
|
||||
return NULL_TREE;
|
||||
|
||||
machine_mode out_mode = TYPE_MODE (TREE_TYPE (type_out));
|
||||
int out_n = TYPE_VECTOR_SUBPARTS (type_out);
|
||||
machine_mode in_mode = TYPE_MODE (TREE_TYPE (type_in));
|
||||
int in_n = TYPE_VECTOR_SUBPARTS (type_in);
|
||||
combined_fn cfn = combined_fn (fn);
|
||||
|
||||
/* Keep this consistent with the list of vectorized math routines. */
|
||||
int implicit_p;
|
||||
switch (fn)
|
||||
{
|
||||
CASE_CFN_ACOS:
|
||||
CASE_CFN_ACOSH:
|
||||
CASE_CFN_ASIN:
|
||||
CASE_CFN_ASINH:
|
||||
CASE_CFN_ATAN:
|
||||
CASE_CFN_ATAN2:
|
||||
CASE_CFN_ATANH:
|
||||
CASE_CFN_COPYSIGN:
|
||||
CASE_CFN_COS:
|
||||
CASE_CFN_COSH:
|
||||
CASE_CFN_ERF:
|
||||
CASE_CFN_EXP:
|
||||
CASE_CFN_EXP2:
|
||||
CASE_CFN_FINITE:
|
||||
CASE_CFN_FMOD:
|
||||
CASE_CFN_GAMMA:
|
||||
CASE_CFN_HYPOT:
|
||||
CASE_CFN_ISNAN:
|
||||
CASE_CFN_LGAMMA:
|
||||
CASE_CFN_LOG:
|
||||
CASE_CFN_LOG10:
|
||||
CASE_CFN_LOG2:
|
||||
CASE_CFN_POW:
|
||||
CASE_CFN_REMAINDER:
|
||||
CASE_CFN_RINT:
|
||||
CASE_CFN_SIN:
|
||||
CASE_CFN_SINH:
|
||||
CASE_CFN_SQRT:
|
||||
CASE_CFN_TAN:
|
||||
CASE_CFN_TANH:
|
||||
CASE_CFN_TGAMMA:
|
||||
implicit_p = 1;
|
||||
break;
|
||||
|
||||
CASE_CFN_SCALB:
|
||||
CASE_CFN_SIGNIFICAND:
|
||||
implicit_p = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
return NULL_TREE;
|
||||
}
|
||||
|
||||
tree out_t_node = (out_mode == DFmode) ? double_type_node : float_type_node;
|
||||
tree fndecl = implicit_p ? mathfn_built_in (out_t_node, cfn)
|
||||
: mathfn_built_in_explicit (out_t_node, cfn);
|
||||
|
||||
const char *bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
|
||||
char name[20];
|
||||
sprintf (name, out_mode == DFmode ? "v%ddf_%s" : "v%dsf_%s",
|
||||
out_n, bname + 10);
|
||||
|
||||
unsigned arity = 0;
|
||||
for (tree args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
|
||||
arity++;
|
||||
|
||||
tree fntype = (arity == 1)
|
||||
? build_function_type_list (type_out, type_in, NULL)
|
||||
: build_function_type_list (type_out, type_in, type_in, NULL);
|
||||
|
||||
/* Build a function declaration for the vectorized function. */
|
||||
tree new_fndecl = build_decl (BUILTINS_LOCATION,
|
||||
FUNCTION_DECL, get_identifier (name), fntype);
|
||||
TREE_PUBLIC (new_fndecl) = 1;
|
||||
DECL_EXTERNAL (new_fndecl) = 1;
|
||||
DECL_IS_NOVOPS (new_fndecl) = 1;
|
||||
TREE_READONLY (new_fndecl) = 1;
|
||||
|
||||
return new_fndecl;
|
||||
}
|
||||
|
||||
/* Implement TARGET_LIBC_HAS_FUNCTION. */
|
||||
|
||||
bool
|
||||
gcn_libc_has_function (enum function_class fn_class,
|
||||
tree type)
|
||||
{
|
||||
return bsd_libc_has_function (fn_class, type);
|
||||
}
|
||||
|
||||
/* }}} */
|
||||
/* {{{ md_reorg pass. */
|
||||
|
||||
@@ -7290,6 +7395,8 @@ gcn_dwarf_register_span (rtx rtl)
|
||||
gcn_ira_change_pseudo_allocno_class
|
||||
#undef TARGET_LEGITIMATE_CONSTANT_P
|
||||
#define TARGET_LEGITIMATE_CONSTANT_P gcn_legitimate_constant_p
|
||||
#undef TARGET_LIBC_HAS_FUNCTION
|
||||
#define TARGET_LIBC_HAS_FUNCTION gcn_libc_has_function
|
||||
#undef TARGET_LRA_P
|
||||
#define TARGET_LRA_P hook_bool_void_true
|
||||
#undef TARGET_MACHINE_DEPENDENT_REORG
|
||||
@@ -7337,6 +7444,9 @@ gcn_dwarf_register_span (rtx rtl)
|
||||
#define TARGET_TRULY_NOOP_TRUNCATION gcn_truly_noop_truncation
|
||||
#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
|
||||
#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST gcn_vectorization_cost
|
||||
#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
|
||||
#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
|
||||
gcn_vectorize_builtin_vectorized_function
|
||||
#undef TARGET_VECTORIZE_GET_MASK_MODE
|
||||
#define TARGET_VECTORIZE_GET_MASK_MODE gcn_vectorize_get_mask_mode
|
||||
#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
|
||||
|
||||
@@ -0,0 +1,206 @@
|
||||
/* Check that calls to the vectorized math functions are actually emitted. */
|
||||
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -fno-math-errno -mstack-size=3000000 -fdump-tree-vect" } */
|
||||
|
||||
|
||||
#undef PRINT_RESULT
|
||||
#define VERBOSE 0
|
||||
#define EARLY_EXIT 1
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef PRINT_RESULT
|
||||
#include <stdio.h>
|
||||
#define PRINTF printf
|
||||
#else
|
||||
static void null_printf (const char *f, ...) { }
|
||||
|
||||
#define PRINTF null_printf
|
||||
#endif
|
||||
|
||||
#define N 512
|
||||
#define EPSILON_float 1e-5
|
||||
#define EPSILON_double 1e-10
|
||||
|
||||
static int failed = 0;
|
||||
|
||||
int deviation_float (float x, float y)
|
||||
{
|
||||
union {
|
||||
float f;
|
||||
unsigned u;
|
||||
} u, v;
|
||||
|
||||
u.f = x;
|
||||
v.f = y;
|
||||
|
||||
unsigned mask = 0x80000000U;
|
||||
int i;
|
||||
|
||||
for (i = 32; i > 0; i--)
|
||||
if ((u.u ^ v.u) & mask)
|
||||
break;
|
||||
else
|
||||
mask >>= 1;
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
int deviation_double (double x, double y)
|
||||
{
|
||||
union {
|
||||
double d;
|
||||
unsigned long long u;
|
||||
} u, v;
|
||||
|
||||
u.d = x;
|
||||
v.d = y;
|
||||
|
||||
unsigned long long mask = 0x8000000000000000ULL;
|
||||
int i;
|
||||
|
||||
for (i = 64; i > 0; i--)
|
||||
if ((u.u ^ v.u) & mask)
|
||||
break;
|
||||
else
|
||||
mask >>= 1;
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
#define TEST_FUN(TFLOAT, LOW, HIGH, FUN) \
|
||||
__attribute__((optimize("no-tree-vectorize"))) \
|
||||
__attribute__((optimize("no-unsafe-math-optimizations"))) \
|
||||
void check_##FUN (TFLOAT res[N], TFLOAT a[N]) \
|
||||
{ \
|
||||
int failed = 0; \
|
||||
for (int i = 0; i < N; i++) { \
|
||||
TFLOAT expected = FUN (a[i]); \
|
||||
TFLOAT diff = __builtin_fabs (expected - res[i]); \
|
||||
int deviation = deviation_##TFLOAT (expected, res[i]); \
|
||||
int fail = isnan (res[i]) != isnan (expected) \
|
||||
|| isinf (res[i]) != isinf (expected) \
|
||||
|| (diff > EPSILON_##TFLOAT && deviation > 10); \
|
||||
failed |= fail; \
|
||||
if (VERBOSE || fail) \
|
||||
PRINTF (#FUN "(%f) = %f, expected = %f, diff = %f, deviation = %d %s\n", \
|
||||
a[i], res[i], expected, diff, deviation, fail ? "(!)" : ""); \
|
||||
if (EARLY_EXIT && fail) \
|
||||
exit (1); \
|
||||
} \
|
||||
} \
|
||||
void test_##FUN (void) \
|
||||
{ \
|
||||
TFLOAT res[N], a[N]; \
|
||||
for (int i = 0; i < N; i++) \
|
||||
a[i] = LOW + ((HIGH - LOW) / N) * i; \
|
||||
for (int i = 0; i < N; i++) \
|
||||
res[i] = FUN (a[i]); \
|
||||
check_##FUN (res, a); \
|
||||
}\
|
||||
test_##FUN ();
|
||||
|
||||
#define TEST_FUN2(TFLOAT, LOW1, HIGH1, LOW2, HIGH2, FUN) \
|
||||
__attribute__((optimize("no-tree-vectorize"))) \
|
||||
__attribute__((optimize("no-unsafe-math-optimizations"))) \
|
||||
void check_##FUN (TFLOAT res[N], TFLOAT a[N], TFLOAT b[N]) \
|
||||
{ \
|
||||
int failed = 0; \
|
||||
for (int i = 0; i < N; i++) { \
|
||||
TFLOAT expected = FUN (a[i], b[i]); \
|
||||
TFLOAT diff = __builtin_fabs (expected - res[i]); \
|
||||
int deviation = deviation_##TFLOAT (expected, res[i]); \
|
||||
int fail = isnan (res[i]) != isnan (expected) \
|
||||
|| isinf (res[i]) != isinf (expected) \
|
||||
|| (diff > EPSILON_##TFLOAT && deviation > 10); \
|
||||
failed |= fail; \
|
||||
if (VERBOSE || fail) \
|
||||
PRINTF (#FUN "(%f,%f) = %f, expected = %f, diff = %f, deviation = %d %s\n", \
|
||||
a[i], b[i], res[i], expected, diff, deviation, fail ? "(!)" : ""); \
|
||||
if (EARLY_EXIT && fail) \
|
||||
exit (1); \
|
||||
} \
|
||||
} \
|
||||
void test_##FUN (void) \
|
||||
{ \
|
||||
TFLOAT res[N], a[N], b[N]; \
|
||||
for (int i = 0; i < N; i++) { \
|
||||
a[i] = LOW1 + ((HIGH1 - LOW1) / N) * i; \
|
||||
b[i] = LOW2 + ((HIGH2 - LOW2) / N) * i; \
|
||||
} \
|
||||
for (int i = 0; i < N; i++) \
|
||||
res[i] = FUN (a[i], b[i]); \
|
||||
check_##FUN (res, a, b); \
|
||||
}\
|
||||
test_##FUN ();
|
||||
|
||||
int main (void)
|
||||
{
|
||||
TEST_FUN (float, -1.1, 1.1, acosf); /* { dg-final { scan-tree-dump "v64sf_acosf" "vect" } }*/
|
||||
TEST_FUN (float, -10, 10, acoshf); /* { dg-final { scan-tree-dump "v64sf_acoshf" "vect" } }*/
|
||||
TEST_FUN (float, -1.1, 1.1, asinf); /* { dg-final { scan-tree-dump "v64sf_asinf" "vect" } }*/
|
||||
TEST_FUN (float, -10, 10, asinhf); /* { dg-final { scan-tree-dump "v64sf_asinhf" "vect" } }*/
|
||||
TEST_FUN (float, -1.1, 1.1, atanf); /* { dg-final { scan-tree-dump "v64sf_atanf" "vect" } }*/
|
||||
TEST_FUN2 (float, -2.0, 2.0, 2.0, -2.0, atan2f); /* { dg-final { scan-tree-dump "v64sf_atan2f" "vect" } }*/
|
||||
TEST_FUN (float, -2.0, 2.0, atanhf); /* { dg-final { scan-tree-dump "v64sf_atanhf" "vect" } }*/
|
||||
TEST_FUN2 (float, -10.0, 10.0, 5.0, -15.0, copysignf); /* { dg-final { scan-tree-dump "v64sf_copysignf" "vect" } }*/
|
||||
TEST_FUN (float, -3.14159265359, 3.14159265359, cosf); /* { dg-final { scan-tree-dump "v64sf_cosf" "vect" } }*/
|
||||
TEST_FUN (float, -3.14159265359, 3.14159265359, coshf); /* { dg-final { scan-tree-dump "v64sf_coshf" "vect" } }*/
|
||||
TEST_FUN (float, -10.0, 10.0, erff); /* { dg-final { scan-tree-dump "v64sf_erff" "vect" } }*/
|
||||
TEST_FUN (float, -10.0, 10.0, expf); /* { dg-final { scan-tree-dump "v64sf_expf" "vect" } }*/
|
||||
TEST_FUN (float, -10.0, 10.0, exp2f); /* { dg-final { scan-tree-dump "v64sf_exp2f" "vect" } }*/
|
||||
TEST_FUN2 (float, -10.0, 10.0, 100.0, -25.0, fmodf); /* { dg-final { scan-tree-dump "v64sf_fmodf" "vect" } }*/
|
||||
TEST_FUN (float, -10.0, 10.0, gammaf); /* { dg-final { scan-tree-dump "v64sf_gammaf" "vect" { xfail *-*-*} } }*/
|
||||
TEST_FUN2 (float, -10.0, 10.0, 15.0, -5.0,hypotf); /* { dg-final { scan-tree-dump "v64sf_hypotf" "vect" } }*/
|
||||
TEST_FUN (float, -10.0, 10.0, lgammaf); /* { dg-final { scan-tree-dump "v64sf_lgammaf" "vect" { xfail *-*-*} } }*/
|
||||
TEST_FUN (float, -1.0, 50.0, logf); /* { dg-final { scan-tree-dump "v64sf_logf" "vect" } }*/
|
||||
TEST_FUN (float, -1.0, 500.0, log10f); /* { dg-final { scan-tree-dump "v64sf_log10f" "vect" } }*/
|
||||
TEST_FUN (float, -1.0, 64.0, log2f); /* { dg-final { scan-tree-dump "v64sf_log2f" "vect" } }*/
|
||||
TEST_FUN2 (float, -100.0, 100.0, 100.0, -100.0, powf); /* { dg-final { scan-tree-dump "v64sf_powf" "vect" } }*/
|
||||
TEST_FUN2 (float, -50.0, 100.0, -2.0, 40.0, remainderf); /* { dg-final { scan-tree-dump "v64sf_remainderf" "vect" } }*/
|
||||
TEST_FUN (float, -50.0, 50.0, rintf); /* { dg-final { scan-tree-dump "v64sf_rintf" "vect" } }*/
|
||||
TEST_FUN2 (float, -50.0, 50.0, -10.0, 32.0, __builtin_scalbf); /* { dg-final { scan-tree-dump "v64sf_scalbf" "vect" } }*/
|
||||
TEST_FUN (float, -10.0, 10.0, __builtin_significandf); /* { dg-final { scan-tree-dump "v64sf_significandf" "vect" } }*/
|
||||
TEST_FUN (float, -3.14159265359, 3.14159265359, sinf); /* { dg-final { scan-tree-dump "v64sf_sinf" "vect" } }*/
|
||||
TEST_FUN (float, -3.14159265359, 3.14159265359, sinhf); /* { dg-final { scan-tree-dump "v64sf_sinhf" "vect" } }*/
|
||||
TEST_FUN (float, -0.1, 10000.0, sqrtf); /* { dg-final { scan-tree-dump "v64sf_sqrtf" "vect" } }*/
|
||||
TEST_FUN (float, -5.0, 5.0, tanf); /* { dg-final { scan-tree-dump "v64sf_tanf" "vect" } }*/
|
||||
TEST_FUN (float, -3.14159265359, 3.14159265359, tanhf); /* { dg-final { scan-tree-dump "v64sf_tanhf" "vect" } }*/
|
||||
TEST_FUN (float, -10.0, 10.0, tgammaf); /* { dg-final { scan-tree-dump "v64sf_tgammaf" "vect" } }*/
|
||||
|
||||
TEST_FUN (double, -1.1, 1.1, acos); /* { dg-final { scan-tree-dump "v64df_acos" "vect" } }*/
|
||||
TEST_FUN (double, -10, 10, acosh); /* { dg-final { scan-tree-dump "v64df_acosh" "vect" } }*/
|
||||
TEST_FUN (double, -1.1, 1.1, asin); /* { dg-final { scan-tree-dump "v64df_asin" "vect" } }*/
|
||||
TEST_FUN (double, -10, 10, asinh); /* { dg-final { scan-tree-dump "v64df_asinh" "vect" } }*/
|
||||
TEST_FUN (double, -1.1, 1.1, atan); /* { dg-final { scan-tree-dump "v64df_atan" "vect" } }*/
|
||||
TEST_FUN2 (double, -2.0, 2.0, 2.0, -2.0, atan2); /* { dg-final { scan-tree-dump "v64df_atan2" "vect" } }*/
|
||||
TEST_FUN (double, -2.0, 2.0, atanh); /* { dg-final { scan-tree-dump "v64df_atanh" "vect" } }*/
|
||||
TEST_FUN2 (double, -10.0, 10.0, 5.0, -15.0, copysign); /* { dg-final { scan-tree-dump "v64df_copysign" "vect" } }*/
|
||||
TEST_FUN (double, -3.14159265359, 3.14159265359, cos); /* { dg-final { scan-tree-dump "v64df_cos" "vect" } }*/
|
||||
TEST_FUN (double, -3.14159265359, 3.14159265359, cosh); /* { dg-final { scan-tree-dump "v64df_cosh" "vect" } }*/
|
||||
TEST_FUN (double, -10.0, 10.0, erf); /* { dg-final { scan-tree-dump "v64df_erf" "vect" } }*/
|
||||
TEST_FUN (double, -10.0, 10.0, exp); /* { dg-final { scan-tree-dump "v64df_exp" "vect" } }*/
|
||||
TEST_FUN (double, -10.0, 10.0, exp2); /* { dg-final { scan-tree-dump "v64df_exp2" "vect" } }*/
|
||||
TEST_FUN2 (double, -10.0, 10.0, 100.0, -25.0, fmod); /* { dg-final { scan-tree-dump "v64df_fmod" "vect" } }*/
|
||||
TEST_FUN (double, -10.0, 10.0, gamma); /* { dg-final { scan-tree-dump "v64df_gamma" "vect" { xfail *-*-*} } }*/
|
||||
TEST_FUN2 (double, -10.0, 10.0, 15.0, -5.0, hypot); /* { dg-final { scan-tree-dump "v64df_hypot" "vect" } }*/
|
||||
TEST_FUN (double, -10.0, 10.0, lgamma); /* { dg-final { scan-tree-dump "v64df_lgamma" "vect" { xfail *-*-*} } }*/
|
||||
TEST_FUN (double, -1.0, 50.0, log); /* { dg-final { scan-tree-dump "v64df_log" "vect" } }*/
|
||||
TEST_FUN (double, -1.0, 500.0, log10); /* { dg-final { scan-tree-dump "v64df_log10" "vect" } }*/
|
||||
TEST_FUN (double, -1.0, 64.0, log2); /* { dg-final { scan-tree-dump "v64df_log2" "vect" { xfail *-*-*} } }*/
|
||||
TEST_FUN2 (double, -100.0, 100.0, 100.0, -100.0, pow); /* { dg-final { scan-tree-dump "v64df_pow" "vect" } }*/
|
||||
TEST_FUN2 (double, -50.0, 100.0, -2.0, 40.0, remainder); /* { dg-final { scan-tree-dump "v64df_remainder" "vect" } }*/
|
||||
TEST_FUN (double, -50.0, 50.0, rint); /* { dg-final { scan-tree-dump "v64df_rint" "vect" } }*/
|
||||
TEST_FUN2 (double, -50.0, 50.0, -10.0, 32.0, __builtin_scalb); /* { dg-final { scan-tree-dump "v64df_scalb" "vect" } }*/
|
||||
TEST_FUN (double, -10.0, 10.0, __builtin_significand); /* { dg-final { scan-tree-dump "v64df_significand" "vect" } }*/
|
||||
TEST_FUN (double, -3.14159265359, 3.14159265359, sin); /* { dg-final { scan-tree-dump "v64df_sin" "vect" } }*/
|
||||
TEST_FUN (double, -3.14159265359, 3.14159265359, sinh); /* { dg-final { scan-tree-dump "v64df_sinh" "vect" } }*/
|
||||
TEST_FUN (double, -0.1, 10000.0, sqrt); /* { dg-final { scan-tree-dump "v64df_sqrt" "vect" } }*/
|
||||
TEST_FUN (double, -5.0, 5.0, tan); /* { dg-final { scan-tree-dump "v64df_tan" "vect" } }*/
|
||||
TEST_FUN (double, -3.14159265359, 3.14159265359, tanh); /* { dg-final { scan-tree-dump "v64df_tanh" "vect" } }*/
|
||||
TEST_FUN (double, -10.0, 10.0, tgamma); /* { dg-final { scan-tree-dump "v64df_tgamma" "vect" } }*/
|
||||
|
||||
return failed;
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
/* Check that the SIMD versions of math routines give the same (or
|
||||
sufficiently close) results as their scalar equivalents. */
|
||||
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -fno-math-errno" } */
|
||||
/* { dg-set-target-env-var "GCN_STACK_SIZE" "3000000" } */
|
||||
|
||||
#include "simd-math-1.c"
|
||||
@@ -0,0 +1,217 @@
|
||||
/* Check that the SIMD versions of math routines give the same (or
|
||||
sufficiently close) results as their scalar equivalents. */
|
||||
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -fno-math-errno" } */
|
||||
/* { dg-additional-options -foffload-options=amdgcn-amdhsa=-mstack-size=3000000 { target offload_target_amdgcn } } */
|
||||
/* { dg-additional-options -foffload-options=-lm } */
|
||||
|
||||
#undef PRINT_RESULT
|
||||
#define VERBOSE 0
|
||||
#define EARLY_EXIT 1
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef PRINT_RESULT
|
||||
#include <stdio.h>
|
||||
#define PRINTF printf
|
||||
#else
|
||||
static void null_printf (const char *f, ...) { }
|
||||
|
||||
#define PRINTF null_printf
|
||||
#endif
|
||||
|
||||
#define N 512
|
||||
#define EPSILON_float 1e-5
|
||||
#define EPSILON_double 1e-10
|
||||
|
||||
static int xfail = 0;
|
||||
static int failed = 0;
|
||||
|
||||
int deviation_float (float x, float y)
|
||||
{
|
||||
union {
|
||||
float f;
|
||||
unsigned u;
|
||||
} u, v;
|
||||
|
||||
u.f = x;
|
||||
v.f = y;
|
||||
|
||||
unsigned mask = 0x80000000U;
|
||||
int i;
|
||||
|
||||
for (i = 32; i > 0; i--)
|
||||
if ((u.u ^ v.u) & mask)
|
||||
break;
|
||||
else
|
||||
mask >>= 1;
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
int deviation_double (double x, double y)
|
||||
{
|
||||
union {
|
||||
double d;
|
||||
unsigned long long u;
|
||||
} u, v;
|
||||
|
||||
u.d = x;
|
||||
v.d = y;
|
||||
|
||||
unsigned long long mask = 0x8000000000000000ULL;
|
||||
int i;
|
||||
|
||||
for (i = 64; i > 0; i--)
|
||||
if ((u.u ^ v.u) & mask)
|
||||
break;
|
||||
else
|
||||
mask >>= 1;
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
#define TEST_FUN_XFAIL(TFLOAT, LOW, HIGH, FUN) \
|
||||
xfail = 1; \
|
||||
TEST_FUN (TFLOAT, LOW, HIGH, FUN); \
|
||||
xfail = 0;
|
||||
|
||||
#define TEST_FUN(TFLOAT, LOW, HIGH, FUN) \
|
||||
__attribute__((optimize("no-tree-vectorize"))) \
|
||||
__attribute__((optimize("no-unsafe-math-optimizations"))) \
|
||||
void check_##FUN (TFLOAT res[N], TFLOAT a[N]) \
|
||||
{ \
|
||||
for (int i = 0; i < N; i++) { \
|
||||
TFLOAT expected = FUN (a[i]); \
|
||||
TFLOAT diff = __builtin_fabs (expected - res[i]); \
|
||||
int deviation = deviation_##TFLOAT (expected, res[i]); \
|
||||
int fail = isnan (res[i]) != isnan (expected) \
|
||||
|| isinf (res[i]) != isinf (expected) \
|
||||
|| (diff > EPSILON_##TFLOAT && deviation > 10); \
|
||||
if (VERBOSE || fail) \
|
||||
PRINTF (#FUN "(%f) = %f, expected = %f, diff = %f, deviation = %d %s\n", \
|
||||
a[i], res[i], expected, diff, deviation, fail ? "(!)" : ""); \
|
||||
failed |= (fail && !xfail); \
|
||||
if (EARLY_EXIT && failed) \
|
||||
exit (1); \
|
||||
} \
|
||||
} \
|
||||
void test_##FUN (void) \
|
||||
{ \
|
||||
TFLOAT res[N], a[N]; \
|
||||
for (int i = 0; i < N; i++) \
|
||||
a[i] = LOW + ((HIGH - LOW) / N) * i; \
|
||||
_Pragma ("omp target parallel for simd map(to:a) map(from:res)") \
|
||||
for (int i = 0; i < N; i++) \
|
||||
res[i] = FUN (a[i]); \
|
||||
check_##FUN (res, a); \
|
||||
}\
|
||||
test_##FUN ();
|
||||
|
||||
#define TEST_FUN2(TFLOAT, LOW1, HIGH1, LOW2, HIGH2, FUN) \
|
||||
__attribute__((optimize("no-tree-vectorize"))) \
|
||||
__attribute__((optimize("no-unsafe-math-optimizations"))) \
|
||||
void check_##FUN (TFLOAT res[N], TFLOAT a[N], TFLOAT b[N]) \
|
||||
{ \
|
||||
int failed = 0; \
|
||||
for (int i = 0; i < N; i++) { \
|
||||
TFLOAT expected = FUN (a[i], b[i]); \
|
||||
TFLOAT diff = __builtin_fabs (expected - res[i]); \
|
||||
int deviation = deviation_##TFLOAT (expected, res[i]); \
|
||||
int fail = isnan (res[i]) != isnan (expected) \
|
||||
|| isinf (res[i]) != isinf (expected) \
|
||||
|| (diff > EPSILON_##TFLOAT && deviation > 10); \
|
||||
failed |= fail; \
|
||||
if (VERBOSE || fail) \
|
||||
PRINTF (#FUN "(%f,%f) = %f, expected = %f, diff = %f, deviation = %d %s\n", \
|
||||
a[i], b[i], res[i], expected, diff, deviation, fail ? "(!)" : ""); \
|
||||
if (EARLY_EXIT && fail) \
|
||||
exit (1); \
|
||||
} \
|
||||
} \
|
||||
void test_##FUN (void) \
|
||||
{ \
|
||||
TFLOAT res[N], a[N], b[N]; \
|
||||
for (int i = 0; i < N; i++) { \
|
||||
a[i] = LOW1 + ((HIGH1 - LOW1) / N) * i; \
|
||||
b[i] = LOW2 + ((HIGH2 - LOW2) / N) * i; \
|
||||
} \
|
||||
_Pragma ("omp target parallel for simd map(to:a) map(from:res)") \
|
||||
for (int i = 0; i < N; i++) \
|
||||
res[i] = FUN (a[i], b[i]); \
|
||||
check_##FUN (res, a, b); \
|
||||
}\
|
||||
test_##FUN ();
|
||||
|
||||
int main (void)
|
||||
{
|
||||
TEST_FUN (float, -1.1, 1.1, acosf);
|
||||
TEST_FUN (float, -10, 10, acoshf);
|
||||
TEST_FUN (float, -1.1, 1.1, asinf);
|
||||
TEST_FUN (float, -10, 10, asinhf);
|
||||
TEST_FUN (float, -1.1, 1.1, atanf);
|
||||
TEST_FUN2 (float, -2.0, 2.0, 2.0, -2.0, atan2f);
|
||||
TEST_FUN (float, -2.0, 2.0, atanhf);
|
||||
TEST_FUN2 (float, -10.0, 10.0, 5.0, -15.0, copysignf);
|
||||
TEST_FUN (float, -3.14159265359, 3.14159265359, cosf);
|
||||
TEST_FUN (float, -3.14159265359, 3.14159265359, coshf);
|
||||
TEST_FUN (float, -10.0, 10.0, erff);
|
||||
TEST_FUN (float, -10.0, 10.0, expf);
|
||||
TEST_FUN (float, -10.0, 10.0, exp2f);
|
||||
TEST_FUN2 (float, -10.0, 10.0, 100.0, -25.0, fmodf);
|
||||
TEST_FUN (float, -10.0, 10.0, gammaf);
|
||||
TEST_FUN2 (float, -10.0, 10.0, 15.0, -5.0,hypotf);
|
||||
TEST_FUN (float, -10.0, 10.0, lgammaf);
|
||||
TEST_FUN (float, -1.0, 50.0, logf);
|
||||
TEST_FUN (float, -1.0, 500.0, log10f);
|
||||
TEST_FUN (float, -1.0, 64.0, log2f);
|
||||
TEST_FUN2 (float, -100.0, 100.0, 100.0, -100.0, powf);
|
||||
TEST_FUN2 (float, -50.0, 100.0, -2.0, 40.0, remainderf);
|
||||
TEST_FUN (float, -50.0, 50.0, rintf);
|
||||
TEST_FUN2 (float, -50.0, 50.0, -10.0, 32.0, __builtin_scalbf);
|
||||
TEST_FUN (float, -10.0, 10.0, __builtin_significandf);
|
||||
TEST_FUN (float, -3.14159265359, 3.14159265359, sinf);
|
||||
TEST_FUN (float, -3.14159265359, 3.14159265359, sinhf);
|
||||
TEST_FUN (float, -0.1, 10000.0, sqrtf);
|
||||
TEST_FUN (float, -5.0, 5.0, tanf);
|
||||
TEST_FUN (float, -3.14159265359, 3.14159265359, tanhf);
|
||||
/* Newlib's version of tgammaf is known to have poor accuracy. */
|
||||
TEST_FUN_XFAIL (float, -10.0, 10.0, tgammaf);
|
||||
|
||||
TEST_FUN (double, -1.1, 1.1, acos);
|
||||
TEST_FUN (double, -10, 10, acosh);
|
||||
TEST_FUN (double, -1.1, 1.1, asin);
|
||||
TEST_FUN (double, -10, 10, asinh);
|
||||
TEST_FUN (double, -1.1, 1.1, atan);
|
||||
TEST_FUN2 (double, -2.0, 2.0, 2.0, -2.0, atan2);
|
||||
TEST_FUN (double, -2.0, 2.0, atanh);
|
||||
TEST_FUN2 (double, -10.0, 10.0, 5.0, -15.0, copysign);
|
||||
TEST_FUN (double, -3.14159265359, 3.14159265359, cos);
|
||||
TEST_FUN (double, -3.14159265359, 3.14159265359, cosh);
|
||||
TEST_FUN (double, -10.0, 10.0, erf);
|
||||
TEST_FUN (double, -10.0, 10.0, exp);
|
||||
TEST_FUN (double, -10.0, 10.0, exp2);
|
||||
TEST_FUN2 (double, -10.0, 10.0, 100.0, -25.0, fmod);
|
||||
TEST_FUN (double, -10.0, 10.0, gamma);
|
||||
TEST_FUN2 (double, -10.0, 10.0, 15.0, -5.0, hypot);
|
||||
TEST_FUN (double, -10.0, 10.0, lgamma);
|
||||
TEST_FUN (double, -1.0, 50.0, log);
|
||||
TEST_FUN (double, -1.0, 500.0, log10);
|
||||
TEST_FUN (double, -1.0, 64.0, log2);
|
||||
TEST_FUN2 (double, -100.0, 100.0, 100.0, -100.0, pow);
|
||||
TEST_FUN2 (double, -50.0, 100.0, -2.0, 40.0, remainder);
|
||||
TEST_FUN (double, -50.0, 50.0, rint);
|
||||
TEST_FUN2 (double, -50.0, 50.0, -10.0, 32.0, __builtin_scalb);
|
||||
TEST_FUN (double, -10.0, 10.0, __builtin_significand);
|
||||
TEST_FUN (double, -3.14159265359, 3.14159265359, sin);
|
||||
TEST_FUN (double, -3.14159265359, 3.14159265359, sinh);
|
||||
TEST_FUN (double, -0.1, 10000.0, sqrt);
|
||||
TEST_FUN (double, -5.0, 5.0, tan);
|
||||
TEST_FUN (double, -3.14159265359, 3.14159265359, tanh);
|
||||
/* Newlib's version of tgamma is known to have poor accuracy. */
|
||||
TEST_FUN_XFAIL (double, -10.0, 10.0, tgamma);
|
||||
|
||||
return failed;
|
||||
}
|
||||
Reference in New Issue
Block a user