1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/patches/gcc/4.3.2/360-fix-expensive-optimize.patch Sat Oct 03 18:49:23 2009 +0200
1.3 @@ -0,0 +1,207 @@
1.4 +PR tree-optimization/32044
1.5 +
1.6 +From: rakdver
1.7 +Date: 2008-12-12 21:32:47 +0100
1.8 +
1.9 +* tree-scalar-evolution.h (expression_expensive_p): Declare.
1.10 +* tree-scalar-evolution.c (expression_expensive_p): New function.
1.11 +(scev_const_prop): Avoid introducing expensive expressions.
1.12 +* tree-ssa-loop-ivopts.c (may_eliminate_iv): Ditto.
1.13 +
1.14 +* gcc.dg/pr34027-1.c: Change outcome.
1.15 +* gcc.dg/tree-ssa/pr32044.c: New test.
1.16 +
1.17 +cherry picked from svn://gcc.gnu.org/svn/gcc/trunk, rev 142719 and adapted to
1.18 +apply on gcc 4.3.2
1.19 +
1.20 +------------------------------------------------------------------------
1.21 +Index: gcc-4.3.2/gcc/tree-scalar-evolution.c
1.22 +===================================================================
1.23 +--- gcc-4.3.2.orig/gcc/tree-scalar-evolution.c 2009-01-28 10:14:37.000000000 +0100
1.24 ++++ gcc-4.3.2/gcc/tree-scalar-evolution.c 2009-01-28 10:17:50.000000000 +0100
1.25 +@@ -2716,6 +2716,50 @@
1.26 + scalar_evolution_info = NULL;
1.27 + }
1.28 +
1.29 ++/* Returns true if the expression EXPR is considered to be too expensive
1.30 ++ for scev_const_prop. */
1.31 ++
1.32 ++bool
1.33 ++expression_expensive_p (tree expr)
1.34 ++{
1.35 ++ enum tree_code code;
1.36 ++
1.37 ++ if (is_gimple_val (expr))
1.38 ++ return false;
1.39 ++
1.40 ++ code = TREE_CODE (expr);
1.41 ++ if (code == TRUNC_DIV_EXPR
1.42 ++ || code == CEIL_DIV_EXPR
1.43 ++ || code == FLOOR_DIV_EXPR
1.44 ++ || code == ROUND_DIV_EXPR
1.45 ++ || code == TRUNC_MOD_EXPR
1.46 ++ || code == CEIL_MOD_EXPR
1.47 ++ || code == FLOOR_MOD_EXPR
1.48 ++ || code == ROUND_MOD_EXPR
1.49 ++ || code == EXACT_DIV_EXPR)
1.50 ++ {
1.51 ++ /* Division by power of two is usually cheap, so we allow it.
1.52 ++ Forbid anything else. */
1.53 ++ if (!integer_pow2p (TREE_OPERAND (expr, 1)))
1.54 ++ return true;
1.55 ++ }
1.56 ++
1.57 ++ switch (TREE_CODE_CLASS (code))
1.58 ++ {
1.59 ++ case tcc_binary:
1.60 ++ case tcc_comparison:
1.61 ++ if (expression_expensive_p (TREE_OPERAND (expr, 1)))
1.62 ++ return true;
1.63 ++
1.64 ++ /* Fallthru. */
1.65 ++ case tcc_unary:
1.66 ++ return expression_expensive_p (TREE_OPERAND (expr, 0));
1.67 ++
1.68 ++ default:
1.69 ++ return true;
1.70 ++ }
1.71 ++}
1.72 ++
1.73 + /* Replace ssa names for that scev can prove they are constant by the
1.74 + appropriate constants. Also perform final value replacement in loops,
1.75 + in case the replacement expressions are cheap.
1.76 +@@ -2802,12 +2846,6 @@
1.77 + continue;
1.78 +
1.79 + niter = number_of_latch_executions (loop);
1.80 +- /* We used to check here whether the computation of NITER is expensive,
1.81 +- and avoided final value elimination if that is the case. The problem
1.82 +- is that it is hard to evaluate whether the expression is too
1.83 +- expensive, as we do not know what optimization opportunities the
1.84 +- the elimination of the final value may reveal. Therefore, we now
1.85 +- eliminate the final values of induction variables unconditionally. */
1.86 + if (niter == chrec_dont_know)
1.87 + continue;
1.88 +
1.89 +@@ -2838,7 +2876,15 @@
1.90 + /* Moving the computation from the loop may prolong life range
1.91 + of some ssa names, which may cause problems if they appear
1.92 + on abnormal edges. */
1.93 +- || contains_abnormal_ssa_name_p (def))
1.94 ++ || contains_abnormal_ssa_name_p (def)
1.95 ++ /* Do not emit expensive expressions. The rationale is that
1.96 ++ when someone writes a code like
1.97 ++
1.98 ++ while (n > 45) n -= 45;
1.99 ++
1.100 ++ he probably knows that n is not large, and does not want it
1.101 ++ to be turned into n %= 45. */
1.102 ++ || expression_expensive_p (def))
1.103 + continue;
1.104 +
1.105 + /* Eliminate the PHI node and replace it by a computation outside
1.106 +Index: gcc-4.3.2/gcc/tree-scalar-evolution.h
1.107 +===================================================================
1.108 +--- gcc-4.3.2.orig/gcc/tree-scalar-evolution.h 2009-01-28 10:22:47.000000000 +0100
1.109 ++++ gcc-4.3.2/gcc/tree-scalar-evolution.h 2009-01-28 10:23:10.000000000 +0100
1.110 +@@ -35,6 +35,7 @@
1.111 + extern void scev_analysis (void);
1.112 + unsigned int scev_const_prop (void);
1.113 +
1.114 ++bool expression_expensive_p (tree);
1.115 + extern bool simple_iv (struct loop *, tree, tree, affine_iv *, bool);
1.116 +
1.117 + /* Returns the loop of the polynomial chrec CHREC. */
1.118 +Index: gcc-4.3.2/gcc/testsuite/gcc.dg/pr34027-1.c
1.119 +===================================================================
1.120 +--- gcc-4.3.2.orig/gcc/testsuite/gcc.dg/pr34027-1.c 2009-01-28 10:24:09.000000000 +0100
1.121 ++++ gcc-4.3.2/gcc/testsuite/gcc.dg/pr34027-1.c 2009-01-28 10:24:43.000000000 +0100
1.122 +@@ -8,5 +8,9 @@
1.123 + return ns;
1.124 + }
1.125 +
1.126 +-/* { dg-final { scan-tree-dump "ns % 10000" "optimized" } } */
1.127 ++/* This test was originally introduced to test that we transform
1.128 ++ to ns % 10000. See the discussion of PR 32044 why we do not do
1.129 ++ that anymore. */
1.130 ++/* { dg-final { scan-tree-dump-times "%" 0 "optimized" } } */
1.131 ++/* { dg-final { scan-tree-dump-times "/" 0 "optimized" } } */
1.132 + /* { dg-final { cleanup-tree-dump "optimized" } } */
1.133 +Index: gcc-4.3.2/gcc/testsuite/gcc.dg/tree-ssa/pr32044.c
1.134 +===================================================================
1.135 +--- /dev/null 1970-01-01 00:00:00.000000000 +0000
1.136 ++++ gcc-4.3.2/gcc/testsuite/gcc.dg/tree-ssa/pr32044.c 2009-01-28 10:25:50.000000000 +0100
1.137 +@@ -0,0 +1,55 @@
1.138 ++/* { dg-do compile } */
1.139 ++/* { dg-options "-O2 -fdump-tree-empty -fdump-tree-final_cleanup" } */
1.140 ++
1.141 ++int foo (int n)
1.142 ++{
1.143 ++ while (n >= 45)
1.144 ++ n -= 45;
1.145 ++
1.146 ++ return n;
1.147 ++}
1.148 ++
1.149 ++int bar (int n)
1.150 ++{
1.151 ++ while (n >= 64)
1.152 ++ n -= 64;
1.153 ++
1.154 ++ return n;
1.155 ++}
1.156 ++
1.157 ++int bla (int n)
1.158 ++{
1.159 ++ int i = 0;
1.160 ++
1.161 ++ while (n >= 45)
1.162 ++ {
1.163 ++ i++;
1.164 ++ n -= 45;
1.165 ++ }
1.166 ++
1.167 ++ return i;
1.168 ++}
1.169 ++
1.170 ++int baz (int n)
1.171 ++{
1.172 ++ int i = 0;
1.173 ++
1.174 ++ while (n >= 64)
1.175 ++ {
1.176 ++ i++;
1.177 ++ n -= 64;
1.178 ++ }
1.179 ++
1.180 ++ return i;
1.181 ++}
1.182 ++
1.183 ++/* The loops computing division/modulo by 64 should be eliminated. */
1.184 ++/* { dg-final { scan-tree-dump-times "Removing empty loop" 2 "empty" } } */
1.185 ++
1.186 ++/* There should be no division/modulo in the final dump (division and modulo
1.187 ++ by 64 are done using bit operations). */
1.188 ++/* { dg-final { scan-tree-dump-times "/" 0 "final_cleanup" } } */
1.189 ++/* { dg-final { scan-tree-dump-times "%" 0 "final_cleanup" } } */
1.190 ++
1.191 ++/* { dg-final { cleanup-tree-dump "empty" } } */
1.192 ++/* { dg-final { cleanup-tree-dump "final_cleanup" } } */
1.193 +Index: gcc-4.3.2/gcc/tree-ssa-loop-ivopts.c
1.194 +===================================================================
1.195 +--- gcc-4.3.2.orig/gcc/tree-ssa-loop-ivopts.c 2009-01-28 10:26:04.000000000 +0100
1.196 ++++ gcc-4.3.2/gcc/tree-ssa-loop-ivopts.c 2009-01-28 10:27:09.000000000 +0100
1.197 +@@ -3778,7 +3778,12 @@
1.198 + return false;
1.199 +
1.200 + cand_value_at (loop, cand, use->stmt, nit, &bnd);
1.201 ++
1.202 + *bound = aff_combination_to_tree (&bnd);
1.203 ++ /* It is unlikely that computing the number of iterations using division
1.204 ++ would be more profitable than keeping the original induction variable. */
1.205 ++ if (expression_expensive_p (*bound))
1.206 ++ return false;
1.207 + return true;
1.208 + }
1.209 +
1.210 +