Makefile.in (tree-loop-linear.o): Added.
2004-09-08 Daniel Berlin <dberlin@dberlin.org> * Makefile.in (tree-loop-linear.o): Added. (OBJS-common): Add tree-loop-linear.o * common.opt: New flag, ftree-loop-linear. * timevar.def: New timevar, TV_TREE_LOOP_LINEAR. * tree-flow.h: Add prototype for linear_transform_loops. * tree-optimize.c: Add linear transform after vectorization. * tree-pass.h: Add struct pass_linear_transform. * tree-ssa-loop.c: Add pass_linear_transform. * tree-loop-linear.c: New file. * lambda-code.c: gcc_assertify. (gcc_loop_to_lambda_loop): Handle all exit tests. Handle case where we have (invariant >= induction var). (find_induction_var_from_exit_cond): Ditto. From-SVN: r87190
This commit is contained in:
parent
6e45f57bf3
commit
599eabdb42
@ -1,3 +1,19 @@
|
||||
2004-09-08 Daniel Berlin <dberlin@dberlin.org>
|
||||
|
||||
* Makefile.in (tree-loop-linear.o): Added.
|
||||
(OBJS-common): Add tree-loop-linear.o
|
||||
* common.opt: New flag, ftree-loop-linear.
|
||||
* timevar.def: New timevar, TV_TREE_LOOP_LINEAR.
|
||||
* tree-flow.h: Add prototype for linear_transform_loops.
|
||||
* tree-optimize.c: Add linear transform after vectorization.
|
||||
* tree-pass.h: Add struct pass_linear_transform.
|
||||
* tree-ssa-loop.c: Add pass_linear_transform.
|
||||
* tree-loop-linear.c: New file.
|
||||
* lambda-code.c: gcc_assertify.
|
||||
(gcc_loop_to_lambda_loop): Handle all exit tests.
|
||||
Handle case where we have (invariant >= induction var).
|
||||
(find_induction_var_from_exit_cond): Ditto.
|
||||
|
||||
2004-09-08 Jie Zhang <zhangjie@magima.com.cn>
|
||||
|
||||
* tree-ssa-alias.c (compute_flow_insensitive_aliasing): If type
|
||||
|
@ -927,7 +927,7 @@ OBJS-common = \
|
||||
varasm.o varray.o vec.o version.o vmsdbgout.o xcoffout.o alloc-pool.o \
|
||||
et-forest.o cfghooks.o bt-load.o pretty-print.o $(GGC) web.o passes.o \
|
||||
rtl-profile.o tree-profile.o rtlhooks.o cfgexpand.o lambda-mat.o \
|
||||
lambda-trans.o lambda-code.o
|
||||
lambda-trans.o lambda-code.o tree-loop-linear.o
|
||||
|
||||
OBJS-md = $(out_object_file)
|
||||
OBJS-archive = $(EXTRA_OBJS) $(host_hook_obj) tree-inline.o \
|
||||
@ -1774,6 +1774,10 @@ tree-vectorizer.o: tree-vectorizer.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H)
|
||||
errors.h $(GGC_H) $(OPTABS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) diagnostic.h \
|
||||
$(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) cfgloop.h tree-pass.h \
|
||||
tree-vectorizer.h tree-data-ref.h $(SCEV_H)
|
||||
tree-loop-linear.o: tree-loop-linear.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
|
||||
errors.h $(GGC_H) $(OPTABS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) diagnostic.h \
|
||||
$(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) cfgloop.h tree-pass.h \
|
||||
$(TREE_DATA_REF_H) $(SCEV_H)
|
||||
tree-gimple.o : tree-gimple.c $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(EXPR_H) \
|
||||
$(RTL_H) $(TREE_GIMPLE_H) $(TM_H) coretypes.h bitmap.h $(GGC_H)
|
||||
tree-mudflap.o : $(CONFIG_H) errors.h $(SYSTEM_H) $(TREE_H) tree-inline.h \
|
||||
|
@ -864,6 +864,10 @@ ftree-loop-im
|
||||
Common Report Var(flag_tree_loop_im) Init(1)
|
||||
Enable loop invariant motion on trees
|
||||
|
||||
ftree-loop-linear
|
||||
Common Report Var(flag_tree_loop_linear)
|
||||
Enable linear loop transforms on trees
|
||||
|
||||
ftree-loop-ivcanon
|
||||
Common Report Var(flag_tree_loop_ivcanon)
|
||||
Create canonical induction variables in loops
|
||||
|
@ -316,7 +316,7 @@ Objective-C and Objective-C++ Dialects}.
|
||||
-funroll-all-loops -funroll-loops -fpeel-loops @gol
|
||||
-funswitch-loops -fold-unroll-loops -fold-unroll-all-loops @gol
|
||||
-ftree-pre -ftree-ccp -ftree-dce -ftree-loop-optimize @gol
|
||||
-ftree-loop-im -ftree-loop-ivcanon -fivopts @gol
|
||||
-ftree-loop-linear -ftree-loop-im -ftree-loop-ivcanon -fivopts @gol
|
||||
-ftree-dominator-opts -ftree-dse -ftree-copyrename @gol
|
||||
-ftree-ch -ftree-sra -ftree-ter -ftree-lrs -ftree-fre -ftree-vectorize @gol
|
||||
--param @var{name}=@var{value}
|
||||
@ -4610,6 +4610,10 @@ usually increases code size.
|
||||
Perform loop optimizations on trees. This flag is enabled by default at -O
|
||||
and higher.
|
||||
|
||||
@item -ftree-loop-linear
|
||||
Perform linear loop transformations on tree. This flag can improve cache
|
||||
performance and allow further loop optimizations to take place.
|
||||
|
||||
@item -ftree-lim
|
||||
Perform loop invariant motion on trees. This pass moves only invartiants that
|
||||
would be hard to handle on rtl level (function calls, operations that expand to
|
||||
|
@ -169,8 +169,7 @@ lambda_body_vector_compute_new (lambda_trans_matrix transform,
|
||||
int depth;
|
||||
|
||||
/* Make sure the matrix is square. */
|
||||
if (LTM_ROWSIZE (transform) != LTM_COLSIZE (transform))
|
||||
abort ();
|
||||
gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
|
||||
|
||||
depth = LTM_ROWSIZE (transform);
|
||||
|
||||
@ -297,8 +296,7 @@ print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
|
||||
int step;
|
||||
lambda_linear_expression expr;
|
||||
|
||||
if (!loop)
|
||||
abort ();
|
||||
gcc_assert (loop);
|
||||
|
||||
expr = LL_LINEAR_OFFSET (loop);
|
||||
step = LL_STEP (loop);
|
||||
@ -393,8 +391,7 @@ lambda_lattice_compute_base (lambda_loopnest nest)
|
||||
for (i = 0; i < depth; i++)
|
||||
{
|
||||
loop = LN_LOOPS (nest)[i];
|
||||
if (!loop)
|
||||
abort ();
|
||||
gcc_assert (loop);
|
||||
step = LL_STEP (loop);
|
||||
/* If we have a step of 1, then the base is one, and the
|
||||
origin and invariant coefficients are 0. */
|
||||
@ -412,9 +409,8 @@ lambda_lattice_compute_base (lambda_loopnest nest)
|
||||
/* Otherwise, we need the lower bound expression (which must
|
||||
be an affine function) to determine the base. */
|
||||
expression = LL_LOWER_BOUND (loop);
|
||||
if (!expression
|
||||
|| LLE_NEXT (expression) || LLE_DENOMINATOR (expression) != 1)
|
||||
abort ();
|
||||
gcc_assert (expression && LLE_NEXT (expression)
|
||||
&& LLE_DENOMINATOR (expression) == 1);
|
||||
|
||||
/* The lower triangular portion of the base is going to be the
|
||||
coefficient times the step */
|
||||
@ -556,8 +552,8 @@ lambda_compute_auxillary_space (lambda_loopnest nest,
|
||||
|
||||
size++;
|
||||
/* Need to increase matrix sizes above. */
|
||||
if (size > 127)
|
||||
abort ();
|
||||
gcc_assert (size <= 127);
|
||||
|
||||
}
|
||||
|
||||
/* Then do the exact same thing for the upper bounds. */
|
||||
@ -585,8 +581,8 @@ lambda_compute_auxillary_space (lambda_loopnest nest,
|
||||
A[size][i] = LLE_DENOMINATOR (expression);
|
||||
size++;
|
||||
/* Need to increase matrix sizes above. */
|
||||
if (size > 127)
|
||||
abort ();
|
||||
gcc_assert (size <= 127);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -1205,7 +1201,7 @@ gcc_loop_to_lambda_loop (struct loop *loop, int depth,
|
||||
tree test;
|
||||
int stepint;
|
||||
int extra = 0;
|
||||
|
||||
tree uboundvar;
|
||||
use_optype uses;
|
||||
|
||||
/* Find out induction var and set the pointer so that the caller can
|
||||
@ -1225,19 +1221,7 @@ gcc_loop_to_lambda_loop (struct loop *loop, int depth,
|
||||
}
|
||||
|
||||
test = TREE_OPERAND (exit_cond, 0);
|
||||
if (TREE_CODE (test) != LE_EXPR
|
||||
&& TREE_CODE (test) != LT_EXPR && TREE_CODE (test) != NE_EXPR)
|
||||
{
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
fprintf (dump_file,
|
||||
"Unable to convert loop: Loop exit test uses unhandled test condition:");
|
||||
print_generic_stmt (dump_file, test, 0);
|
||||
fprintf (dump_file, "\n");
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
|
||||
{
|
||||
|
||||
@ -1349,25 +1333,39 @@ gcc_loop_to_lambda_loop (struct loop *loop, int depth,
|
||||
|
||||
return NULL;
|
||||
}
|
||||
if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME)
|
||||
if (invariant_in_loop (loop, TREE_OPERAND (test, 1)))
|
||||
VEC_safe_push (tree, *invariants, TREE_OPERAND (test, 1));
|
||||
/* One part of the test may be a loop invariant tree. */
|
||||
if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
|
||||
&& invariant_in_loop (loop, TREE_OPERAND (test, 1)))
|
||||
VEC_safe_push (tree, *invariants, TREE_OPERAND (test, 1));
|
||||
else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
|
||||
&& invariant_in_loop (loop, TREE_OPERAND (test, 0)))
|
||||
VEC_safe_push (tree, *invariants, TREE_OPERAND (test, 0));
|
||||
|
||||
/* The non-induction variable part of the test is the upper bound variable.
|
||||
*/
|
||||
if (TREE_OPERAND (test, 0) == inductionvar)
|
||||
uboundvar = TREE_OPERAND (test, 1);
|
||||
else
|
||||
uboundvar = TREE_OPERAND (test, 0);
|
||||
|
||||
|
||||
/* We only size the vectors assuming we have, at max, 2 times as many
|
||||
invariants as we do loops (one for each bound).
|
||||
This is just an arbitrary number, but it has to be matched against the
|
||||
code below. */
|
||||
if (VEC_length (tree, *invariants) > (unsigned int) (2 * depth))
|
||||
abort ();
|
||||
gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
|
||||
|
||||
|
||||
/* We might have some leftover. */
|
||||
if (TREE_CODE (test) == LT_EXPR)
|
||||
extra = -1 * stepint;
|
||||
else if (TREE_CODE (test) == NE_EXPR)
|
||||
extra = -1 * stepint;
|
||||
else if (TREE_CODE (test) == GT_EXPR)
|
||||
extra = -1 * stepint;
|
||||
|
||||
ubound = gcc_tree_to_linear_expression (depth,
|
||||
TREE_OPERAND (test, 1),
|
||||
uboundvar,
|
||||
outerinductionvars,
|
||||
*invariants, extra);
|
||||
if (!ubound)
|
||||
@ -1393,6 +1391,7 @@ static tree
|
||||
find_induction_var_from_exit_cond (struct loop *loop)
|
||||
{
|
||||
tree expr = get_loop_exit_condition (loop);
|
||||
tree ivarop;
|
||||
tree test;
|
||||
if (expr == NULL_TREE)
|
||||
return NULL_TREE;
|
||||
@ -1401,9 +1400,28 @@ find_induction_var_from_exit_cond (struct loop *loop)
|
||||
test = TREE_OPERAND (expr, 0);
|
||||
if (TREE_CODE_CLASS (TREE_CODE (test)) != '<')
|
||||
return NULL_TREE;
|
||||
if (TREE_CODE (TREE_OPERAND (test, 0)) != SSA_NAME)
|
||||
/* This is a guess. We say that for a <,!=,<= b, a is the induction
|
||||
variable.
|
||||
For >, >=, we guess b is the induction variable.
|
||||
If we are wrong, it'll fail the rest of the induction variable tests, and
|
||||
everything will be fine anyway. */
|
||||
switch (TREE_CODE (test))
|
||||
{
|
||||
case LT_EXPR:
|
||||
case LE_EXPR:
|
||||
case NE_EXPR:
|
||||
ivarop = TREE_OPERAND (test, 0);
|
||||
break;
|
||||
case GT_EXPR:
|
||||
case GE_EXPR:
|
||||
ivarop = TREE_OPERAND (test, 1);
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable();
|
||||
}
|
||||
if (TREE_CODE (ivarop) != SSA_NAME)
|
||||
return NULL_TREE;
|
||||
return TREE_OPERAND (test, 0);
|
||||
return ivarop;
|
||||
}
|
||||
|
||||
DEF_VEC_GC_P(lambda_loop);
|
||||
@ -1693,7 +1711,7 @@ lle_to_gcc_expression (lambda_linear_expression lle,
|
||||
name, build_int_cst (integer_type_node,
|
||||
LLE_DENOMINATOR (lle))));
|
||||
else
|
||||
abort ();
|
||||
gcc_unreachable();
|
||||
|
||||
/* name = {ceil, floor}(name/denominator) */
|
||||
name = make_ssa_name (resvar, stmt);
|
||||
@ -1706,9 +1724,8 @@ lle_to_gcc_expression (lambda_linear_expression lle,
|
||||
|
||||
/* Again, out of laziness, we don't handle this case yet. It's not
|
||||
hard, it just hasn't occurred. */
|
||||
if (VEC_length (tree, results) > 2)
|
||||
abort ();
|
||||
|
||||
gcc_assert (VEC_length (tree, results) <= 2);
|
||||
|
||||
/* We may need to wrap the results in a MAX_EXPR or MIN_EXPR. */
|
||||
if (VEC_length (tree, results) > 1)
|
||||
{
|
||||
@ -1788,10 +1805,9 @@ lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
|
||||
cases for now. */
|
||||
offset = LL_LINEAR_OFFSET (newloop);
|
||||
|
||||
if (LLE_DENOMINATOR (offset) != 1
|
||||
|| !lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth))
|
||||
abort ();
|
||||
|
||||
gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
|
||||
lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
|
||||
|
||||
/* Now build the new lower bounds, and insert the statements
|
||||
necessary to generate it on the loop preheader. */
|
||||
newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
|
||||
@ -1929,8 +1945,8 @@ lambda_transform_legal_p (lambda_trans_matrix trans,
|
||||
struct data_dependence_relation *ddr;
|
||||
|
||||
#if defined ENABLE_CHECKING
|
||||
if (LTM_COLSIZE (trans) != nb_loops || LTM_ROWSIZE (trans) != nb_loops)
|
||||
abort ();
|
||||
gcc_assert (LTM_COLSIZE (trans) == nb_loops
|
||||
&& LTM_ROWSIZE (trans) == nb_loops);
|
||||
#endif
|
||||
|
||||
/* When there is an unknown relation in the dependence_relations, we
|
||||
|
@ -86,6 +86,7 @@ DEFTIMEVAR (TV_LIM , "loop invariant motion")
|
||||
DEFTIMEVAR (TV_TREE_LOOP_IVCANON , "tree canonical iv creation")
|
||||
DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling")
|
||||
DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree loop vectorization")
|
||||
DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear transforms")
|
||||
DEFTIMEVAR (TV_TREE_LOOP_IVOPTS , "tree iv optimization")
|
||||
DEFTIMEVAR (TV_TREE_CH , "tree copy headers")
|
||||
DEFTIMEVAR (TV_TREE_SSA_TO_NORMAL , "tree SSA to normal")
|
||||
|
@ -719,6 +719,9 @@ void insert_edge_copies (tree stmt, basic_block bb);
|
||||
extern void build_ssa_operands (tree, stmt_ann_t, stmt_operands_p,
|
||||
stmt_operands_p);
|
||||
|
||||
/* In tree-loop-linear.c */
|
||||
extern void linear_transform_loops (struct loops *);
|
||||
|
||||
/* In gimplify.c */
|
||||
|
||||
tree force_gimple_operand (tree, tree *, bool, tree);
|
||||
|
282
gcc/tree-loop-linear.c
Normal file
282
gcc/tree-loop-linear.c
Normal file
@ -0,0 +1,282 @@
|
||||
/* Linear Loop transforms
|
||||
Copyright (C) 2003, 2004 Free Software Foundation, Inc.
|
||||
Contributed by Daniel Berlin <dberlin@dberlin.org>.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 2, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GCC; see the file COPYING. If not, write to the Free
|
||||
Software Foundation, 59 Temple Place - Suite 330, Boston, MA
|
||||
02111-1307, USA. */
|
||||
|
||||
|
||||
#include "config.h"
|
||||
#include "system.h"
|
||||
#include "coretypes.h"
|
||||
#include "tm.h"
|
||||
#include "errors.h"
|
||||
#include "ggc.h"
|
||||
#include "tree.h"
|
||||
#include "target.h"
|
||||
|
||||
#include "rtl.h"
|
||||
#include "basic-block.h"
|
||||
#include "diagnostic.h"
|
||||
#include "tree-flow.h"
|
||||
#include "tree-dump.h"
|
||||
#include "timevar.h"
|
||||
#include "cfgloop.h"
|
||||
#include "expr.h"
|
||||
#include "optabs.h"
|
||||
#include "tree-chrec.h"
|
||||
#include "tree-data-ref.h"
|
||||
#include "tree-scalar-evolution.h"
|
||||
#include "tree-pass.h"
|
||||
#include "varray.h"
|
||||
#include "lambda.h"
|
||||
|
||||
/* Linear loop transforms include any composition of interchange,
|
||||
scaling, skewing, and reversal. They are used to change the
|
||||
iteration order of loop nests in order to optimize data locality of
|
||||
traversals, or remove dependences that prevent
|
||||
parallelization/vectorization/etc.
|
||||
|
||||
TODO: Determine reuse vectors/matrix and use it to determine optimal
|
||||
transform matrix for locality purposes.
|
||||
TODO: Completion of partial transforms. */
|
||||
|
||||
/* Gather statistics for loop interchange. Initializes SUM the sum of
|
||||
all the data dependence distances carried by loop LOOP_NUMBER.
|
||||
NB_DEPS_NOT_CARRIED_BY_LOOP is initialized to the number of
|
||||
dependence relations for which the loop LOOP_NUMBER is not carrying
|
||||
any dependence. */
|
||||
|
||||
static void
|
||||
gather_interchange_stats (varray_type dependence_relations,
|
||||
unsigned int loop_number,
|
||||
unsigned int *sum,
|
||||
unsigned int *nb_deps_not_carried_by_loop)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
*sum = 0;
|
||||
*nb_deps_not_carried_by_loop = 0;
|
||||
for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
|
||||
{
|
||||
int dist;
|
||||
struct data_dependence_relation *ddr =
|
||||
(struct data_dependence_relation *)
|
||||
VARRAY_GENERIC_PTR (dependence_relations, i);
|
||||
|
||||
if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
|
||||
{
|
||||
/* Some constants will need tweaking, but not something that should
|
||||
be user-accessible. Thus, no --param. */
|
||||
*sum += 100;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* When we know that there is no dependence, we know that there
|
||||
is no reuse of the data. */
|
||||
if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
|
||||
{
|
||||
/* Ditto on the no --param here */
|
||||
*sum += 1000;
|
||||
continue;
|
||||
}
|
||||
|
||||
dist = DDR_DIST_VECT (ddr)[loop_number];
|
||||
if (dist == 0)
|
||||
*nb_deps_not_carried_by_loop++;
|
||||
else if (dist < 0)
|
||||
*sum += -dist;
|
||||
else
|
||||
*sum += dist;
|
||||
}
|
||||
}
|
||||
|
||||
/* Apply to TRANS any loop interchange that minimize inner loop steps.
|
||||
DEPTH is the depth of the loop nest, and DEPENDENCE_RELATIONS is an array
|
||||
of dependence relations.
|
||||
Returns the new transform matrix. The smaller the reuse vector
|
||||
distances in the inner loops, the fewer the cache misses. */
|
||||
|
||||
static lambda_trans_matrix
|
||||
try_interchange_loops (lambda_trans_matrix trans,
|
||||
unsigned int depth,
|
||||
varray_type dependence_relations)
|
||||
{
|
||||
unsigned int loop_i, loop_j;
|
||||
unsigned int steps_i, steps_j;
|
||||
unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j;
|
||||
struct data_dependence_relation *ddr;
|
||||
|
||||
/* When there is an unknown relation in the dependence_relations, we
|
||||
know that it is no worth looking at this loop nest: give up. */
|
||||
ddr = (struct data_dependence_relation *)
|
||||
VARRAY_GENERIC_PTR (dependence_relations, 0);
|
||||
if (ddr == NULL || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
|
||||
return trans;
|
||||
|
||||
/* LOOP_I is always the outer loop. */
|
||||
for (loop_j = 1; loop_j < depth; loop_j++)
|
||||
for (loop_i = 0; loop_i < loop_j; loop_i++)
|
||||
{
|
||||
gather_interchange_stats (dependence_relations, loop_i, &steps_i,
|
||||
&nb_deps_not_carried_by_i);
|
||||
gather_interchange_stats (dependence_relations, loop_j, &steps_j,
|
||||
&nb_deps_not_carried_by_j);
|
||||
|
||||
/* Heuristics for loop interchange profitability:
|
||||
1. Inner loops should have smallest steps.
|
||||
2. Inner loops should contain more dependence relations not
|
||||
carried by the loop.
|
||||
*/
|
||||
if (steps_i < steps_j
|
||||
|| nb_deps_not_carried_by_i > nb_deps_not_carried_by_j)
|
||||
{
|
||||
lambda_matrix_row_exchange (LTM_MATRIX (trans), loop_i, loop_j);
|
||||
|
||||
/* Validate the resulting matrix. When the transformation
|
||||
is not valid, reverse to the previous matrix.
|
||||
|
||||
FIXME: In this case of transformation it could be
|
||||
faster to verify the validity of the interchange
|
||||
without applying the transform to the matrix. But for
|
||||
the moment do it cleanly: this is just a prototype. */
|
||||
if (!lambda_transform_legal_p (trans, depth, dependence_relations))
|
||||
lambda_matrix_row_exchange (LTM_MATRIX (trans), loop_i, loop_j);
|
||||
}
|
||||
}
|
||||
|
||||
return trans;
|
||||
}
|
||||
|
||||
/* Perform a set of linear transforms on LOOPS. */
|
||||
|
||||
void
|
||||
linear_transform_loops (struct loops *loops)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 1; i < loops->num; i++)
|
||||
{
|
||||
unsigned int depth = 0;
|
||||
varray_type datarefs;
|
||||
varray_type dependence_relations;
|
||||
struct loop *loop_nest = loops->parray[i];
|
||||
struct loop *temp;
|
||||
VEC (tree) *oldivs;
|
||||
VEC (tree) *invariants;
|
||||
lambda_loopnest before, after;
|
||||
lambda_trans_matrix trans;
|
||||
bool problem = false;
|
||||
/* If it's not a loop nest, we don't want it.
|
||||
We also don't handle sibling loops properly,
|
||||
which are loops of the following form:
|
||||
for (i = 0; i < 50; i++)
|
||||
{
|
||||
for (j = 0; j < 50; j++)
|
||||
{
|
||||
...
|
||||
}
|
||||
for (j = 0; j < 50; j++)
|
||||
{
|
||||
...
|
||||
}
|
||||
} */
|
||||
if (!loop_nest->inner)
|
||||
continue;
|
||||
for (temp = loop_nest; temp; temp = temp->inner)
|
||||
{
|
||||
flow_loop_scan (temp, LOOP_ALL);
|
||||
/* If we have a sibling loop or multiple exit edges, jump ship. */
|
||||
if (temp->next || temp->num_exits != 1)
|
||||
{
|
||||
problem = true;
|
||||
break;
|
||||
}
|
||||
depth ++;
|
||||
}
|
||||
if (problem)
|
||||
continue;
|
||||
|
||||
/* Analyze data references and dependence relations using scev. */
|
||||
|
||||
VARRAY_GENERIC_PTR_INIT (datarefs, 10, "datarefs");
|
||||
VARRAY_GENERIC_PTR_INIT (dependence_relations, 10,
|
||||
"dependence_relations");
|
||||
|
||||
|
||||
compute_data_dependences_for_loop (depth, loop_nest,
|
||||
&datarefs, &dependence_relations);
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
unsigned int j;
|
||||
for (j = 0; j < VARRAY_ACTIVE_SIZE (dependence_relations); j++)
|
||||
{
|
||||
struct data_dependence_relation *ddr =
|
||||
(struct data_dependence_relation *)
|
||||
VARRAY_GENERIC_PTR (dependence_relations, j);
|
||||
|
||||
if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
|
||||
{
|
||||
fprintf (dump_file, "DISTANCE_V (");
|
||||
print_lambda_vector (dump_file, DDR_DIST_VECT (ddr),
|
||||
loops->num);
|
||||
fprintf (dump_file, ")\n");
|
||||
fprintf (dump_file, "DIRECTION_V (");
|
||||
print_lambda_vector (dump_file, DDR_DIR_VECT (ddr),
|
||||
loops->num);
|
||||
fprintf (dump_file, ")\n");
|
||||
}
|
||||
}
|
||||
fprintf (dump_file, "\n\n");
|
||||
}
|
||||
/* Build the transformation matrix. */
|
||||
trans = lambda_trans_matrix_new (depth, depth);
|
||||
lambda_matrix_id (LTM_MATRIX (trans), depth);
|
||||
trans = try_interchange_loops (trans, depth, dependence_relations);
|
||||
|
||||
/* Check whether the transformation is legal. */
|
||||
if (!lambda_transform_legal_p (trans, depth, dependence_relations))
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Can't transform loop, transform is illegal:\n");
|
||||
continue;
|
||||
}
|
||||
before = gcc_loopnest_to_lambda_loopnest (loop_nest, &oldivs,
|
||||
&invariants);
|
||||
if (!before)
|
||||
continue;
|
||||
|
||||
if (dump_file)
|
||||
{
|
||||
fprintf (dump_file, "Before:\n");
|
||||
print_lambda_loopnest (dump_file, before, 'i');
|
||||
}
|
||||
|
||||
after = lambda_loopnest_transform (before, trans);
|
||||
if (dump_file)
|
||||
{
|
||||
fprintf (dump_file, "After:\n");
|
||||
print_lambda_loopnest (dump_file, after, 'u');
|
||||
}
|
||||
lambda_loopnest_to_gcc_loopnest (loop_nest, oldivs, invariants,
|
||||
after, trans);
|
||||
oldivs = NULL;
|
||||
invariants = NULL;
|
||||
free_dependence_relations (dependence_relations);
|
||||
free_data_refs (datarefs);
|
||||
}
|
||||
}
|
@ -397,6 +397,7 @@ init_tree_optimization_passes (void)
|
||||
NEXT_PASS (pass_iv_canon);
|
||||
NEXT_PASS (pass_if_conversion);
|
||||
NEXT_PASS (pass_vectorize);
|
||||
NEXT_PASS (pass_linear_transform);
|
||||
NEXT_PASS (pass_complete_unroll);
|
||||
NEXT_PASS (pass_iv_optimize);
|
||||
NEXT_PASS (pass_loop_done);
|
||||
|
@ -160,6 +160,6 @@ extern struct tree_opt_pass pass_rename_ssa_copies;
|
||||
extern struct tree_opt_pass pass_expand;
|
||||
extern struct tree_opt_pass pass_rest_of_compilation;
|
||||
extern struct tree_opt_pass pass_fre;
|
||||
|
||||
extern struct tree_opt_pass pass_linear_transform;
|
||||
|
||||
#endif /* GCC_TREE_PASS_H */
|
||||
|
@ -194,6 +194,41 @@ struct tree_opt_pass pass_vectorize =
|
||||
0 /* letter */
|
||||
};
|
||||
|
||||
|
||||
/* Loop nest optimizations. */
|
||||
|
||||
static void
|
||||
tree_linear_transform (void)
|
||||
{
|
||||
if (!current_loops)
|
||||
return;
|
||||
|
||||
linear_transform_loops (current_loops);
|
||||
}
|
||||
|
||||
static bool
|
||||
gate_tree_linear_transform (void)
|
||||
{
|
||||
return flag_tree_loop_linear != 0;
|
||||
}
|
||||
|
||||
struct tree_opt_pass pass_linear_transform =
|
||||
{
|
||||
"ltrans", /* name */
|
||||
gate_tree_linear_transform, /* gate */
|
||||
tree_linear_transform, /* execute */
|
||||
NULL, /* sub */
|
||||
NULL, /* next */
|
||||
0, /* static_pass_number */
|
||||
TV_TREE_LINEAR_TRANSFORM, /* tv_id */
|
||||
PROP_cfg | PROP_ssa, /* properties_required */
|
||||
0, /* properties_provided */
|
||||
0, /* properties_destroyed */
|
||||
0, /* todo_flags_start */
|
||||
TODO_dump_func, /* todo_flags_finish */
|
||||
0 /* letter */
|
||||
};
|
||||
|
||||
/* Canonical induction variable creation pass. */
|
||||
|
||||
static void
|
||||
|
Loading…
x
Reference in New Issue
Block a user