tree-optimization/110381 - preserve SLP permutation with in-order reductions
The following fixes a bug that manifests itself during fold-left reduction transform in picking not the last scalar def to replace and thus double-counting some elements. But the underlying issue is that we merge a load permutation into the in-order reduction which is of course wrong. Now, reduction analysis has not yet been performend when optimizing permutations so we have to resort to check that ourselves. PR tree-optimization/110381 * tree-vect-slp.cc (vect_optimize_slp_pass::start_choosing_layouts): Materialize permutes before fold-left reductions. * gcc.dg/vect/pr110381.c: New testcase. (cherry picked from commit 53d6f57c1b20c6da52aefce737fb7d5263686ba3)
This commit is contained in:
parent
857d763ed1
commit
32c7f05f8b
45
gcc/testsuite/gcc.dg/vect/pr110381.c
Normal file
45
gcc/testsuite/gcc.dg/vect/pr110381.c
Normal file
@ -0,0 +1,45 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target vect_float_strict } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
struct FOO {
|
||||
double a;
|
||||
double b;
|
||||
double c;
|
||||
};
|
||||
|
||||
double __attribute__((noipa))
|
||||
sum_8_foos(const struct FOO* foos)
|
||||
{
|
||||
double sum = 0;
|
||||
|
||||
for (int i = 0; i < 8; ++i)
|
||||
{
|
||||
struct FOO foo = foos[i];
|
||||
|
||||
/* Need to use an in-order reduction here, preserving
|
||||
the load permutation. */
|
||||
sum += foo.a;
|
||||
sum += foo.c;
|
||||
sum += foo.b;
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
struct FOO foos[8];
|
||||
|
||||
check_vect ();
|
||||
|
||||
__builtin_memset (foos, 0, sizeof (foos));
|
||||
foos[0].a = __DBL_MAX__;
|
||||
foos[0].b = 5;
|
||||
foos[0].c = -__DBL_MAX__;
|
||||
|
||||
if (sum_8_foos (foos) != 5)
|
||||
__builtin_abort ();
|
||||
return 0;
|
||||
}
|
@ -4670,14 +4670,28 @@ vect_optimize_slp_pass::start_choosing_layouts ()
|
||||
m_partition_layout_costs.safe_grow_cleared (m_partitions.length ()
|
||||
* m_perms.length ());
|
||||
|
||||
/* We have to mark outgoing permutations facing non-reduction graph
|
||||
entries that are not represented as to be materialized. */
|
||||
/* We have to mark outgoing permutations facing non-associating-reduction
|
||||
graph entries that are not represented as to be materialized.
|
||||
slp_inst_kind_bb_reduc currently only covers associatable reductions. */
|
||||
for (slp_instance instance : m_vinfo->slp_instances)
|
||||
if (SLP_INSTANCE_KIND (instance) == slp_inst_kind_ctor)
|
||||
{
|
||||
unsigned int node_i = SLP_INSTANCE_TREE (instance)->vertex;
|
||||
m_partitions[m_vertices[node_i].partition].layout = 0;
|
||||
}
|
||||
else if (SLP_INSTANCE_KIND (instance) == slp_inst_kind_reduc_chain)
|
||||
{
|
||||
stmt_vec_info stmt_info
|
||||
= SLP_TREE_REPRESENTATIVE (SLP_INSTANCE_TREE (instance));
|
||||
stmt_vec_info reduc_info = info_for_reduction (m_vinfo, stmt_info);
|
||||
if (needs_fold_left_reduction_p (TREE_TYPE
|
||||
(gimple_get_lhs (stmt_info->stmt)),
|
||||
STMT_VINFO_REDUC_CODE (reduc_info)))
|
||||
{
|
||||
unsigned int node_i = SLP_INSTANCE_TREE (instance)->vertex;
|
||||
m_partitions[m_vertices[node_i].partition].layout = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Check which layouts each node and partition can handle. Calculate the
|
||||
weights associated with inserting layout changes on edges. */
|
||||
|
Loading…
x
Reference in New Issue
Block a user