2012-08-29 Basile Starynkevitch <basile@starynkevitch.net>

MELT branch merged with trunk rev 190745 using svnmerge, notably C++ conversion. [gcc/] 2012-08-29 Basile Starynkevitch <basile@starynkevitch.net> {{merging with trunk, converted to C++}} * melt-runtime.h (MELT_FLEXIBLE_DIM): Set when C++. * melt-runtime.c (melt_tempdir_path): Don't use choose_tmpdir from libiberty. (meltgc_start_module_by_index): Use address-of & on VEC_index. (melt_really_initialize): When printing builtin settings, handle GCC 4.8 as with implicit ENABLE_BUILD_WITH_CXX. (meltgc_out_edge): Provide additional flag TDF_DETAILS for dump_edge_info. (melt_val2passflag): Handle PROP_referenced_vars only when defined. * melt-module.mk: Use GCCMELT_COMPILER instead of GCCMELT_CC. * melt-build-script.tpl: Transmit GCCMELT_COMPILER on every make using melt-module.mk and improve the error message. * melt-build-script.sh: Regenerate. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@190778 138bc75d-0d04-0410-961f-82ee72b054a4
author: bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> 2012-08-29 12:37:05 +0000
committer: bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> 2012-08-29 12:37:05 +0000
commit: 12cb78d1cca1387a092ec0bd49c250340bff4afc (patch)
tree: 1eab97da96906e0a2786d51d9f25f20de02befcf /gcc/expmed.c
parent: 31879e18aea3222fe3e56f2c0319c9f230645ff3 (diff)
download: gcc-12cb78d1cca1387a092ec0bd49c250340bff4afc.tar.gz
1 files changed, 210 insertions, 134 deletions
diff --git a/gcc/expmed.c b/gcc/expmed.c
index 989f86cbe7a..6c94a90fb84 100644
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -101,7 +101,6 @@ struct init_expmed_rtl
   struct rtx_def mult;	rtunion mult_fld1;
   struct rtx_def sdiv;	rtunion sdiv_fld1;
   struct rtx_def udiv;	rtunion udiv_fld1;
-  struct rtx_def zext;
   struct rtx_def sdiv_32;	rtunion sdiv_32_fld1;
   struct rtx_def smod_32;	rtunion smod_32_fld1;
   struct rtx_def wide_mult;	rtunion wide_mult_fld1;
@@ -112,16 +111,41 @@ struct init_expmed_rtl
   struct rtx_def shift_add;	rtunion shift_add_fld1;
   struct rtx_def shift_sub0;	rtunion shift_sub0_fld1;
   struct rtx_def shift_sub1;	rtunion shift_sub1_fld1;
+  struct rtx_def zext;
+  struct rtx_def trunc;
 
   rtx pow2[MAX_BITS_PER_WORD];
   rtx cint[MAX_BITS_PER_WORD];
 };
 
 static void
+init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
+		      enum machine_mode from_mode, bool speed)
+{
+  int to_size, from_size;
+  rtx which;
+
+  /* We're given no information about the true size of a partial integer,
+     only the size of the "full" integer it requires for storage.  For
+     comparison purposes here, reduce the bit size by one in that case.  */
+  to_size = (GET_MODE_BITSIZE (to_mode)
+	     - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
+  from_size = (GET_MODE_BITSIZE (from_mode)
+	       - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
+  
+  /* Assume cost of zero-extend and sign-extend is the same.  */
+  which = (to_size < from_size ? &all->trunc : &all->zext);
+
+  PUT_MODE (&all->reg, from_mode);
+  set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
+}
+
+static void
 init_expmed_one_mode (struct init_expmed_rtl *all,
 		      enum machine_mode mode, int speed)
 {
   int m, n, mode_bitsize;
+  enum machine_mode mode_from;
 
   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 
@@ -139,21 +163,27 @@ init_expmed_one_mode (struct init_expmed_rtl *all,
   PUT_MODE (&all->shift_add, mode);
   PUT_MODE (&all->shift_sub0, mode);
   PUT_MODE (&all->shift_sub1, mode);
+  PUT_MODE (&all->zext, mode);
+  PUT_MODE (&all->trunc, mode);
 
-  add_cost[speed][mode] = set_src_cost (&all->plus, speed);
-  neg_cost[speed][mode] = set_src_cost (&all->neg, speed);
-  mul_cost[speed][mode] = set_src_cost (&all->mult, speed);
-  sdiv_cost[speed][mode] = set_src_cost (&all->sdiv, speed);
-  udiv_cost[speed][mode] = set_src_cost (&all->udiv, speed);
+  set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
+  set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
+  set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
+  set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
+  set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
 
-  sdiv_pow2_cheap[speed][mode] = (set_src_cost (&all->sdiv_32, speed)
-			          <= 2 * add_cost[speed][mode]);
-  smod_pow2_cheap[speed][mode] = (set_src_cost (&all->smod_32, speed)
-			          <= 4 * add_cost[speed][mode]);
+  set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
+				     <= 2 * add_cost (speed, mode)));
+  set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
+				     <= 4 * add_cost (speed, mode)));
 
-  shift_cost[speed][mode][0] = 0;
-  shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
-    = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
+  set_shift_cost (speed, mode, 0, 0);
+  {
+    int cost = add_cost (speed, mode);
+    set_shiftadd_cost (speed, mode, 0, cost);
+    set_shiftsub0_cost (speed, mode, 0, cost);
+    set_shiftsub1_cost (speed, mode, 0, cost);
+  }
 
   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
   for (m = 1; m < n; m++)
@@ -161,16 +191,21 @@ init_expmed_one_mode (struct init_expmed_rtl *all,
       XEXP (&all->shift, 1) = all->cint[m];
       XEXP (&all->shift_mult, 1) = all->pow2[m];
 
-      shift_cost[speed][mode][m] = set_src_cost (&all->shift, speed);
-      shiftadd_cost[speed][mode][m] = set_src_cost (&all->shift_add, speed);
-      shiftsub0_cost[speed][mode][m] = set_src_cost (&all->shift_sub0, speed);
-      shiftsub1_cost[speed][mode][m] = set_src_cost (&all->shift_sub1, speed);
+      set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
+      set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
+      set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
+      set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
     }
 
   if (SCALAR_INT_MODE_P (mode))
     {
-      enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
-
+      for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
+	   mode_from = (enum machine_mode)(mode_from + 1))
+	init_expmed_one_conv (all, mode, mode_from, speed);
+    }
+  if (GET_MODE_CLASS (mode) == MODE_INT)
+    {
+      enum machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
       if (wider_mode != VOIDmode)
 	{
 	  PUT_MODE (&all->zext, wider_mode);
@@ -178,10 +213,10 @@ init_expmed_one_mode (struct init_expmed_rtl *all,
 	  PUT_MODE (&all->wide_lshr, wider_mode);
 	  XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 
-	  mul_widen_cost[speed][wider_mode]
-	    = set_src_cost (&all->wide_mult, speed);
-	  mul_highpart_cost[speed][mode]
-	    = set_src_cost (&all->wide_trunc, speed);
+	  set_mul_widen_cost (speed, wider_mode,
+			      set_src_cost (&all->wide_mult, speed));
+	  set_mul_highpart_cost (speed, mode,
+				 set_src_cost (&all->wide_trunc, speed));
 	}
     }
 }
@@ -262,26 +297,36 @@ init_expmed (void)
   XEXP (&all.shift_sub1, 0) = &all.reg;
   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 
+  PUT_CODE (&all.trunc, TRUNCATE);
+  XEXP (&all.trunc, 0) = &all.reg;
+
   for (speed = 0; speed < 2; speed++)
     {
       crtl->maybe_hot_insn_p = speed;
-      zero_cost[speed] = set_src_cost (const0_rtx, speed);
+      set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 
-      for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
-	   mode != VOIDmode;
-	   mode = GET_MODE_WIDER_MODE (mode))
+      for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
+	   mode = (enum machine_mode)(mode + 1))
 	init_expmed_one_mode (&all, mode, speed);
 
-      for (mode = GET_CLASS_NARROWEST_MODE (MODE_VECTOR_INT);
-	   mode != VOIDmode;
-	   mode = GET_MODE_WIDER_MODE (mode))
-	init_expmed_one_mode (&all, mode, speed);
+      if (MIN_MODE_PARTIAL_INT != VOIDmode)
+	for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
+	     mode = (enum machine_mode)(mode + 1))
+	  init_expmed_one_mode (&all, mode, speed);
+
+      if (MIN_MODE_VECTOR_INT != VOIDmode)
+	for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
+	     mode = (enum machine_mode)(mode + 1))
+	  init_expmed_one_mode (&all, mode, speed);
     }
 
-  if (alg_hash_used_p)
-    memset (alg_hash, 0, sizeof (alg_hash));
+  if (alg_hash_used_p ())
+    {
+      struct alg_hash_entry *p = alg_hash_entry_ptr (0);
+      memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
+    }
   else
-    alg_hash_used_p = true;
+    set_alg_hash_used_p (true);
   default_rtl_profile ();
 }
 
@@ -2229,8 +2274,9 @@ expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
       && INTVAL (op1) > 0
       && INTVAL (op1) < GET_MODE_PRECISION (mode)
       && INTVAL (op1) < MAX_BITS_PER_WORD
-      && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
-      && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
+      && (shift_cost (speed, mode, INTVAL (op1))
+	  > INTVAL (op1) * add_cost (speed, mode))
+      && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
     {
       int i;
       for (i = 0; i < INTVAL (op1); i++)
@@ -2381,8 +2427,8 @@ static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
 			      const struct algorithm *, enum mult_variant);
 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
 static rtx extract_high_half (enum machine_mode, rtx);
-static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
-static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
+static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
+static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
 				       int, int);
 /* Compute and return the best algorithm for multiplying by T.
    The algorithm must cost less than cost_limit
@@ -2406,6 +2452,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
   enum alg_code cache_alg = alg_zero;
   bool speed = optimize_insn_for_speed_p ();
   enum machine_mode imode;
+  struct alg_hash_entry *entry_ptr;
 
   /* Indicate that no algorithm is yet found.  If no algorithm
      is found, this value will be returned and indicate failure.  */
@@ -2440,13 +2487,13 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
      fail now.  */
   if (t == 0)
     {
-      if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
+      if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
 	return;
       else
 	{
 	  alg_out->ops = 1;
-	  alg_out->cost.cost = zero_cost[speed];
-	  alg_out->cost.latency = zero_cost[speed];
+	  alg_out->cost.cost = zero_cost (speed);
+	  alg_out->cost.latency = zero_cost (speed);
 	  alg_out->op[0] = alg_zero;
 	  return;
 	}
@@ -2462,19 +2509,20 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
 
   /* See if we already know what to do for T.  */
-  if (alg_hash[hash_index].t == t
-      && alg_hash[hash_index].mode == mode
-      && alg_hash[hash_index].mode == mode
-      && alg_hash[hash_index].speed == speed
-      && alg_hash[hash_index].alg != alg_unknown)
+  entry_ptr = alg_hash_entry_ptr (hash_index);
+  if (entry_ptr->t == t
+      && entry_ptr->mode == mode
+      && entry_ptr->mode == mode
+      && entry_ptr->speed == speed
+      && entry_ptr->alg != alg_unknown)
     {
-      cache_alg = alg_hash[hash_index].alg;
+      cache_alg = entry_ptr->alg;
 
       if (cache_alg == alg_impossible)
 	{
 	  /* The cache tells us that it's impossible to synthesize
-	     multiplication by T within alg_hash[hash_index].cost.  */
-	  if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
+	     multiplication by T within entry_ptr->cost.  */
+	  if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
 	    /* COST_LIMIT is at least as restrictive as the one
 	       recorded in the hash table, in which case we have no
 	       hope of synthesizing a multiplication.  Just
@@ -2488,7 +2536,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
 	}
       else
 	{
-	  if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
+	  if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
 	    /* The cached algorithm shows that this multiplication
 	       requires more cost than COST_LIMIT.  Just return.  This
 	       way, we don't clobber this cache entry with
@@ -2534,10 +2582,10 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
 	  q = t >> m;
 	  /* The function expand_shift will choose between a shift and
 	     a sequence of additions, so the observed cost is given as
-	     MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]).  */
-	  op_cost = m * add_cost[speed][mode];
-	  if (shift_cost[speed][mode][m] < op_cost)
-	    op_cost = shift_cost[speed][mode][m];
+	     MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
+	  op_cost = m * add_cost (speed, mode);
+	  if (shift_cost (speed, mode, m) < op_cost)
+	    op_cost = shift_cost (speed, mode, m);
 	  new_limit.cost = best_cost.cost - op_cost;
 	  new_limit.latency = best_cost.latency - op_cost;
 	  synth_mult (alg_in, q, &new_limit, mode);
@@ -2564,11 +2612,11 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
 	      q = ~(~orig_t >> m);
 	      /* The function expand_shift will choose between a shift
 		 and a sequence of additions, so the observed cost is
-		 given as MIN (m * add_cost[speed][mode],
-		 shift_cost[speed][mode][m]).  */
-	      op_cost = m * add_cost[speed][mode];
-	      if (shift_cost[speed][mode][m] < op_cost)
-		op_cost = shift_cost[speed][mode][m];
+		 given as MIN (m * add_cost(speed, mode),
+		 shift_cost(speed, mode, m)).  */
+	      op_cost = m * add_cost (speed, mode);
+	      if (shift_cost (speed, mode, m) < op_cost)
+		op_cost = shift_cost (speed, mode, m);
 	      new_limit.cost = best_cost.cost - op_cost;
 	      new_limit.latency = best_cost.latency - op_cost;
 	      synth_mult (alg_in, q, &new_limit, mode);
@@ -2610,7 +2658,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
 	{
 	  /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
 
-	  op_cost = add_cost[speed][mode];
+	  op_cost = add_cost (speed, mode);
 	  new_limit.cost = best_cost.cost - op_cost;
 	  new_limit.latency = best_cost.latency - op_cost;
 	  synth_mult (alg_in, t + 1, &new_limit, mode);
@@ -2630,7 +2678,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
 	{
 	  /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
 
-	  op_cost = add_cost[speed][mode];
+	  op_cost = add_cost (speed, mode);
 	  new_limit.cost = best_cost.cost - op_cost;
 	  new_limit.latency = best_cost.latency - op_cost;
 	  synth_mult (alg_in, t - 1, &new_limit, mode);
@@ -2652,7 +2700,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
       m = exact_log2 (-orig_t + 1);
       if (m >= 0 && m < maxm)
 	{
-	  op_cost = shiftsub1_cost[speed][mode][m];
+	  op_cost = shiftsub1_cost (speed, mode, m);
 	  new_limit.cost = best_cost.cost - op_cost;
 	  new_limit.latency = best_cost.latency - op_cost;
 	  synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
@@ -2699,14 +2747,14 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
 	     equal to its cost, otherwise assume that on superscalar
 	     hardware the shift may be executed concurrently with the
 	     earlier steps in the algorithm.  */
-	  op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
-	  if (shiftadd_cost[speed][mode][m] < op_cost)
+	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
+	  if (shiftadd_cost (speed, mode, m) < op_cost)
 	    {
-	      op_cost = shiftadd_cost[speed][mode][m];
+	      op_cost = shiftadd_cost (speed, mode, m);
 	      op_latency = op_cost;
 	    }
 	  else
-	    op_latency = add_cost[speed][mode];
+	    op_latency = add_cost (speed, mode);
 
 	  new_limit.cost = best_cost.cost - op_cost;
 	  new_limit.latency = best_cost.latency - op_latency;
@@ -2738,14 +2786,14 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
 	     equal to it's cost, otherwise assume that on superscalar
 	     hardware the shift may be executed concurrently with the
 	     earlier steps in the algorithm.  */
-	  op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
-	  if (shiftsub0_cost[speed][mode][m] < op_cost)
+	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
+	  if (shiftsub0_cost (speed, mode, m) < op_cost)
 	    {
-	      op_cost = shiftsub0_cost[speed][mode][m];
+	      op_cost = shiftsub0_cost (speed, mode, m);
 	      op_latency = op_cost;
 	    }
 	  else
-	    op_latency = add_cost[speed][mode];
+	    op_latency = add_cost (speed, mode);
 
 	  new_limit.cost = best_cost.cost - op_cost;
 	  new_limit.latency = best_cost.latency - op_latency;
@@ -2779,7 +2827,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
       m = exact_log2 (q);
       if (m >= 0 && m < maxm)
 	{
-	  op_cost = shiftadd_cost[speed][mode][m];
+	  op_cost = shiftadd_cost (speed, mode, m);
 	  new_limit.cost = best_cost.cost - op_cost;
 	  new_limit.latency = best_cost.latency - op_cost;
 	  synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
@@ -2804,7 +2852,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
       m = exact_log2 (q);
       if (m >= 0 && m < maxm)
 	{
-	  op_cost = shiftsub0_cost[speed][mode][m];
+	  op_cost = shiftsub0_cost (speed, mode, m);
 	  new_limit.cost = best_cost.cost - op_cost;
 	  new_limit.latency = best_cost.latency - op_cost;
 	  synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
@@ -2833,23 +2881,23 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
 	 we are asked to find an algorithm for T within the same or
 	 lower COST_LIMIT, we can immediately return to the
 	 caller.  */
-      alg_hash[hash_index].t = t;
-      alg_hash[hash_index].mode = mode;
-      alg_hash[hash_index].speed = speed;
-      alg_hash[hash_index].alg = alg_impossible;
-      alg_hash[hash_index].cost = *cost_limit;
+      entry_ptr->t = t;
+      entry_ptr->mode = mode;
+      entry_ptr->speed = speed;
+      entry_ptr->alg = alg_impossible;
+      entry_ptr->cost = *cost_limit;
       return;
     }
 
   /* Cache the result.  */
   if (!cache_hit)
     {
-      alg_hash[hash_index].t = t;
-      alg_hash[hash_index].mode = mode;
-      alg_hash[hash_index].speed = speed;
-      alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
-      alg_hash[hash_index].cost.cost = best_cost.cost;
-      alg_hash[hash_index].cost.latency = best_cost.latency;
+      entry_ptr->t = t;
+      entry_ptr->mode = mode;
+      entry_ptr->speed = speed;
+      entry_ptr->alg = best_alg->op[best_alg->ops];
+      entry_ptr->cost.cost = best_cost.cost;
+      entry_ptr->cost.latency = best_cost.latency;
     }
 
   /* If we are getting a too long sequence for `struct algorithm'
@@ -2895,7 +2943,7 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
   /* Ensure that mult_cost provides a reasonable upper bound.
      Any constant multiplication can be performed with less
      than 2 * bits additions.  */
-  op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost[speed][mode];
+  op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
   if (mult_cost > op_cost)
     mult_cost = op_cost;
 
@@ -2908,7 +2956,7 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
      `unsigned int' */
   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
     {
-      op_cost = neg_cost[speed][mode];
+      op_cost = neg_cost(speed, mode);
       if (MULT_COST_LESS (&alg->cost, mult_cost))
 	{
 	  limit.cost = alg->cost.cost - op_cost;
@@ -2928,7 +2976,7 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
     }
 
   /* This proves very useful for division-by-constant.  */
-  op_cost = add_cost[speed][mode];
+  op_cost = add_cost (speed, mode);
   if (MULT_COST_LESS (&alg->cost, mult_cost))
     {
       limit.cost = alg->cost.cost - op_cost;
@@ -3169,7 +3217,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
 	  coeff = INTVAL (scalar_op1);
 	  is_neg = coeff < 0;
 	}
-      else if (CONST_DOUBLE_P (scalar_op1))
+      else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
 	{
 	  /* If we are multiplying in DImode, it may still be a win
 	     to try to work with shifts and adds.  */
@@ -3192,6 +3240,8 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
 		}
 	      goto skip_synth;
 	    }
+	  else
+	    goto skip_synth;
 	}
       else
 	goto skip_synth;
@@ -3211,7 +3261,6 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
       /* Attempt to handle multiplication of DImode values by negative
 	 coefficients, by performing the multiplication by a positive
 	 multiplier and then inverting the result.  */
-      /* ??? How is this not slightly redundant with the neg variant?  */
       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
 	{
 	  /* Its safe to use -coeff even for INT_MIN, as the
@@ -3219,7 +3268,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
 	     Exclude cost of op0 from max_cost to match the cost
 	     calculation of the synth_mult.  */
 	  max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
-		      - neg_cost[speed][mode]);
+		      - neg_cost(speed, mode));
 	  if (max_cost > 0
 	      && choose_mult_variant (mode, -coeff, &algorithm,
 				      &variant, max_cost))
@@ -3228,6 +3277,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
 					    &algorithm, variant);
 	      return expand_unop (mode, neg_optab, temp, target, 0);
 	    }
+	  goto skip_synth;
 	}
 
       /* Exclude cost of op0 from max_cost to match the cost
@@ -3240,7 +3290,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
  skip_synth:
 
   /* Expand x*2.0 as x+x.  */
-  if (GET_CODE (scalar_op1) == CONST_DOUBLE && FLOAT_MODE_P (mode))
+  if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
     {
       REAL_VALUE_TYPE d;
       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
@@ -3262,6 +3312,24 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
   return op0;
 }
 
+/* Return a cost estimate for multiplying a register by the given
+   COEFFicient in the given MODE and SPEED.  */
+
+int
+mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
+{
+  int max_cost;
+  struct algorithm algorithm;
+  enum mult_variant variant;
+
+  rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
+  max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
+  if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
+    return algorithm.cost.cost;
+  else
+    return max_cost;
+}
+
 /* Perform a widening multiplication and return an rtx for the result.
    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
    TARGET is a suggestion for where to store the result (an rtx).
@@ -3302,7 +3370,7 @@ expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
 
       /* Exclude cost of op0 from max_cost to match the cost
 	 calculation of the synth_mult.  */
-      max_cost = mul_widen_cost[speed][mode];
+      max_cost = mul_widen_cost (speed, mode);
       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
 			       max_cost))
 	{
@@ -3477,7 +3545,7 @@ expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
   return target;
 }
 
-/* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
+/* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
 
 static rtx
 extract_high_half (enum machine_mode mode, rtx op)
@@ -3495,11 +3563,11 @@ extract_high_half (enum machine_mode mode, rtx op)
   return convert_modes (mode, wider_mode, op, 0);
 }
 
-/* Like expand_mult_highpart, but only consider using a multiplication
+/* Like expmed_mult_highpart, but only consider using a multiplication
    optab.  OP1 is an rtx for the constant operand.  */
 
 static rtx
-expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
+expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
 			    rtx target, int unsignedp, int max_cost)
 {
   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
@@ -3516,7 +3584,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
 
   /* Firstly, try using a multiplication insn that only generates the needed
      high part of the product, and in the sign flavor of unsignedp.  */
-  if (mul_highpart_cost[speed][mode] < max_cost)
+  if (mul_highpart_cost (speed, mode) < max_cost)
     {
       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
@@ -3528,8 +3596,9 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
      Need to adjust the result after the multiplication.  */
   if (size - 1 < BITS_PER_WORD
-      && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
-	  + 4 * add_cost[speed][mode] < max_cost))
+      && (mul_highpart_cost (speed, mode)
+	  + 2 * shift_cost (speed, mode, size-1)
+	  + 4 * add_cost (speed, mode) < max_cost))
     {
       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
@@ -3543,7 +3612,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
   /* Try widening multiplication.  */
   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
-      && mul_widen_cost[speed][wider_mode] < max_cost)
+      && mul_widen_cost (speed, wider_mode) < max_cost)
     {
       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
 			  unsignedp, OPTAB_WIDEN);
@@ -3554,7 +3623,8 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
   /* Try widening the mode and perform a non-widening multiplication.  */
   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
       && size - 1 < BITS_PER_WORD
-      && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
+      && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
+	  < max_cost))
     {
       rtx insns, wop0, wop1;
 
@@ -3581,8 +3651,9 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
       && size - 1 < BITS_PER_WORD
-      && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
-	  + 4 * add_cost[speed][mode] < max_cost))
+      && (mul_widen_cost (speed, wider_mode)
+	  + 2 * shift_cost (speed, mode, size-1)
+	  + 4 * add_cost (speed, mode) < max_cost))
     {
       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
 			  NULL_RTX, ! unsignedp, OPTAB_WIDEN);
@@ -3610,7 +3681,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
    MAX_COST is the total allowed cost for the expanded RTL.  */
 
 static rtx
-expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
+expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
 		      rtx target, int unsignedp, int max_cost)
 {
   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
@@ -3633,16 +3704,16 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
      mode == word_mode, however all the cost calculations in
      synth_mult etc. assume single-word operations.  */
   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
-    return expand_mult_highpart_optab (mode, op0, op1, target,
+    return expmed_mult_highpart_optab (mode, op0, op1, target,
 				       unsignedp, max_cost);
 
-  extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
+  extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
 
   /* Check whether we try to multiply by a negative constant.  */
   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
     {
       sign_adjust = true;
-      extra_cost += add_cost[speed][mode];
+      extra_cost += add_cost (speed, mode);
     }
 
   /* See whether shift/add multiplication is cheap enough.  */
@@ -3651,7 +3722,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
     {
       /* See whether the specialized multiplication optabs are
 	 cheaper than the shift/add version.  */
-      tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
+      tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
 					alg.cost.cost + extra_cost);
       if (tem)
 	return tem;
@@ -3666,7 +3737,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
 
       return tem;
     }
-  return expand_mult_highpart_optab (mode, op0, op1, target,
+  return expmed_mult_highpart_optab (mode, op0, op1, target,
 				     unsignedp, max_cost);
 }
 
@@ -3832,7 +3903,8 @@ expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
 
       temp = gen_reg_rtx (mode);
       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
-      if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
+      if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
+	  > COSTS_N_INSNS (1))
 	temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
 			     NULL_RTX, 0, OPTAB_LIB_WIDEN);
       else
@@ -3940,7 +4012,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
 
      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
      half of the product.  Different strategies for generating the product are
-     implemented in expand_mult_highpart.
+     implemented in expmed_mult_highpart.
 
      If what we actually want is the remainder, we generate that by another
      by-constant multiplication and a subtraction.  */
@@ -3990,7 +4062,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
      mode for which we can do the operation with a library call.  */
 
   /* We might want to refine this now that we have division-by-constant
-     optimization.  Since expand_mult_highpart tries so many variants, it is
+     optimization.  Since expmed_mult_highpart tries so many variants, it is
      not straightforward to generalize this.  Maybe we should make an array
      of possible modes in init_expmed?  Save this for GCC 2.7.  */
 
@@ -4035,10 +4107,13 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
   /* Only deduct something for a REM if the last divide done was
      for a different constant.   Then set the constant of the last
      divide.  */
-  max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
+  max_cost = (unsignedp 
+	      ? udiv_cost (speed, compute_mode)
+	      : sdiv_cost (speed, compute_mode));
   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
 		     && INTVAL (op1) == last_div_const))
-    max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
+    max_cost -= (mul_cost (speed, compute_mode)
+		 + add_cost (speed, compute_mode));
 
   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
 
@@ -4152,10 +4227,10 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
 			      goto fail1;
 
 			    extra_cost
-			      = (shift_cost[speed][compute_mode][post_shift - 1]
-				 + shift_cost[speed][compute_mode][1]
-				 + 2 * add_cost[speed][compute_mode]);
-			    t1 = expand_mult_highpart (compute_mode, op0,
+			      = (shift_cost (speed, compute_mode, post_shift - 1)
+				 + shift_cost (speed, compute_mode, 1)
+				 + 2 * add_cost (speed, compute_mode));
+			    t1 = expmed_mult_highpart (compute_mode, op0,
 						       GEN_INT (ml),
 						       NULL_RTX, 1,
 						       max_cost - extra_cost);
@@ -4185,9 +4260,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
 			      (RSHIFT_EXPR, compute_mode, op0,
 			       pre_shift, NULL_RTX, 1);
 			    extra_cost
-			      = (shift_cost[speed][compute_mode][pre_shift]
-				 + shift_cost[speed][compute_mode][post_shift]);
-			    t2 = expand_mult_highpart (compute_mode, t1,
+			      = (shift_cost (speed, compute_mode, pre_shift)
+				 + shift_cost (speed, compute_mode, post_shift));
+			    t2 = expmed_mult_highpart (compute_mode, t1,
 						       GEN_INT (ml),
 						       NULL_RTX, 1,
 						       max_cost - extra_cost);
@@ -4245,8 +4320,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
 		      goto fail1;
 		  }
 		else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
-			 && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
-				      : sdiv_pow2_cheap[speed][compute_mode])
+			 && (rem_flag
+			     ? smod_pow2_cheap (speed, compute_mode)
+			     : sdiv_pow2_cheap (speed, compute_mode))
 			 /* We assume that cheap metric is true if the
 			    optab has an expander for this mode.  */
 			 && ((optab_handler ((rem_flag ? smod_optab
@@ -4266,7 +4342,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
 			  return gen_lowpart (mode, remainder);
 		      }
 
-		    if (sdiv_pow2_cheap[speed][compute_mode]
+		    if (sdiv_pow2_cheap (speed, compute_mode)
 			&& ((optab_handler (sdiv_optab, compute_mode)
 			     != CODE_FOR_nothing)
 			    || (optab_handler (sdivmod_optab, compute_mode)
@@ -4310,10 +4386,10 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
 			    || size - 1 >= BITS_PER_WORD)
 			  goto fail1;
 
-			extra_cost = (shift_cost[speed][compute_mode][post_shift]
-				      + shift_cost[speed][compute_mode][size - 1]
-				      + add_cost[speed][compute_mode]);
-			t1 = expand_mult_highpart (compute_mode, op0,
+			extra_cost = (shift_cost (speed, compute_mode, post_shift)
+				      + shift_cost (speed, compute_mode, size - 1)
+				      + add_cost (speed, compute_mode));
+			t1 = expmed_mult_highpart (compute_mode, op0,
 						   GEN_INT (ml), NULL_RTX, 0,
 						   max_cost - extra_cost);
 			if (t1 == 0)
@@ -4345,10 +4421,10 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
 
 			ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
 			mlr = gen_int_mode (ml, compute_mode);
-			extra_cost = (shift_cost[speed][compute_mode][post_shift]
-				      + shift_cost[speed][compute_mode][size - 1]
-				      + 2 * add_cost[speed][compute_mode]);
-			t1 = expand_mult_highpart (compute_mode, op0, mlr,
+			extra_cost = (shift_cost (speed, compute_mode, post_shift)
+				      + shift_cost (speed, compute_mode, size - 1)
+				      + 2 * add_cost (speed, compute_mode));
+			t1 = expmed_mult_highpart (compute_mode, op0, mlr,
 						   NULL_RTX, 0,
 						   max_cost - extra_cost);
 			if (t1 == 0)
@@ -4433,10 +4509,10 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
 			   size - 1, NULL_RTX, 0);
 			t2 = expand_binop (compute_mode, xor_optab, op0, t1,
 					   NULL_RTX, 0, OPTAB_WIDEN);
-			extra_cost = (shift_cost[speed][compute_mode][post_shift]
-				      + shift_cost[speed][compute_mode][size - 1]
-				      + 2 * add_cost[speed][compute_mode]);
-			t3 = expand_mult_highpart (compute_mode, t2,
+			extra_cost = (shift_cost (speed, compute_mode, post_shift)
+				      + shift_cost (speed, compute_mode, size - 1)
+				      + 2 * add_cost (speed, compute_mode));
+			t3 = expmed_mult_highpart (compute_mode, t2,
 						   GEN_INT (ml), NULL_RTX, 1,
 						   max_cost - extra_cost);
 			if (t3 != 0)
author	bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>	2012-08-29 12:37:05 +0000
committer	bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>	2012-08-29 12:37:05 +0000
commit	12cb78d1cca1387a092ec0bd49c250340bff4afc (patch)
tree	1eab97da96906e0a2786d51d9f25f20de02befcf /gcc/expmed.c
parent	31879e18aea3222fe3e56f2c0319c9f230645ff3 (diff)
download	gcc-12cb78d1cca1387a092ec0bd49c250340bff4afc.tar.gz