diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2012-04-21 00:51:14 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2012-04-21 00:51:14 -0400 |
commit | 33e99153e93b9accfa51ac036828144e1c2507b7 (patch) | |
tree | 9d0d57a0ec28d5b47d7cea5c3e18df60a6f368a7 | |
parent | 09ff76fcdb275769ac4d1a45a67416735613d04b (diff) | |
download | postgresql-33e99153e93b9accfa51ac036828144e1c2507b7.tar.gz |
Use fuzzy not exact cost comparison for the final tie-breaker in add_path.
Instead of an exact cost comparison, use a fuzzy comparison with 1e-10
delta after all other path metrics have proved equal. This is to avoid
having platform-specific roundoff behaviors determine the choice when
two paths are really the same to our cost estimators. Adjust the
recently-added test case that made it obvious we had a problem here.
-rw-r--r-- | src/backend/optimizer/util/pathnode.c | 44 | ||||
-rw-r--r-- | src/test/regress/expected/join.out | 16 |
2 files changed, 38 insertions, 22 deletions
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index ef1ce2a0b4..61502aa642 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -125,8 +125,11 @@ compare_fractional_path_costs(Path *path1, Path *path2, * * We use fuzzy comparisons so that add_path() can avoid keeping both of * a pair of paths that really have insignificantly different cost. - * The fuzz factor is 1% of the smaller cost. (XXX does this percentage - * need to be user-configurable?) + * + * The fuzz_factor argument must be 1.0 plus delta, where delta is the + * fraction of the smaller cost that is considered to be a significant + * difference. For example, fuzz_factor = 1.01 makes the fuzziness limit + * be 1% of the smaller cost. * * The two paths are said to have "equal" costs if both startup and total * costs are fuzzily the same. Path1 is said to be better than path2 if @@ -138,16 +141,16 @@ compare_fractional_path_costs(Path *path1, Path *path2, * dominates the other across the whole performance spectrum. */ static PathCostComparison -compare_path_costs_fuzzily(Path *path1, Path *path2) +compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor) { /* * Check total cost first since it's more likely to be different; many * paths have zero startup cost. */ - if (path1->total_cost > path2->total_cost * 1.01) + if (path1->total_cost > path2->total_cost * fuzz_factor) { /* path1 fuzzily worse on total cost */ - if (path2->startup_cost > path1->startup_cost * 1.01) + if (path2->startup_cost > path1->startup_cost * fuzz_factor) { /* ... but path2 fuzzily worse on startup, so DIFFERENT */ return COSTS_DIFFERENT; @@ -155,10 +158,10 @@ compare_path_costs_fuzzily(Path *path1, Path *path2) /* else path2 dominates */ return COSTS_BETTER2; } - if (path2->total_cost > path1->total_cost * 1.01) + if (path2->total_cost > path1->total_cost * fuzz_factor) { /* path2 fuzzily worse on total cost */ - if (path1->startup_cost > path2->startup_cost * 1.01) + if (path1->startup_cost > path2->startup_cost * fuzz_factor) { /* ... but path1 fuzzily worse on startup, so DIFFERENT */ return COSTS_DIFFERENT; @@ -167,12 +170,12 @@ compare_path_costs_fuzzily(Path *path1, Path *path2) return COSTS_BETTER1; } /* fuzzily the same on total cost */ - if (path1->startup_cost > path2->startup_cost * 1.01) + if (path1->startup_cost > path2->startup_cost * fuzz_factor) { /* ... but path1 fuzzily worse on startup, so path2 wins */ return COSTS_BETTER2; } - if (path2->startup_cost > path1->startup_cost * 1.01) + if (path2->startup_cost > path1->startup_cost * fuzz_factor) { /* ... but path2 fuzzily worse on startup, so path1 wins */ return COSTS_BETTER1; @@ -354,7 +357,11 @@ add_path(RelOptInfo *parent_rel, Path *new_path) p1_next = lnext(p1); - costcmp = compare_path_costs_fuzzily(new_path, old_path); + /* + * Do a fuzzy cost comparison with 1% fuzziness limit. (XXX does this + * percentage need to be user-configurable?) + */ + costcmp = compare_path_costs_fuzzily(new_path, old_path, 1.01); /* * If the two paths compare differently for startup and total cost, @@ -403,15 +410,24 @@ add_path(RelOptInfo *parent_rel, Path *new_path) /* * Same pathkeys and outer rels, and fuzzily * the same cost, so keep just one; to decide - * which, first check rows and then do an - * exact cost comparison. + * which, first check rows and then do a fuzzy + * cost comparison with very small fuzz limit. + * (We used to do an exact cost comparison, + * but that results in annoying + * platform-specific plan variations due to + * roundoff in the cost estimates.) If things + * are still tied, arbitrarily keep only the + * old path. Notice that we will keep only + * the old path even if the less-fuzzy + * comparison decides the startup and total + * costs compare differently. */ if (new_path->rows < old_path->rows) remove_old = true; /* new dominates old */ else if (new_path->rows > old_path->rows) accept_new = false; /* old dominates new */ - else if (compare_path_costs(new_path, old_path, - TOTAL_COST) < 0) + else if (compare_path_costs_fuzzily(new_path, old_path, + 1.0000000001) == COSTS_BETTER1) remove_old = true; /* new dominates old */ else accept_new = false; /* old equals or dominates new */ diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 045ddf5030..b8b069b455 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2798,17 +2798,17 @@ select b.unique1 from Sort Key: b.unique1 -> Nested Loop Left Join -> Seq Scan on int4_tbl i2 - -> Nested Loop Left Join - Join Filter: (b.unique1 = 42) - -> Nested Loop + -> Nested Loop + -> Seq Scan on int4_tbl i1 + -> Nested Loop Left Join + Join Filter: (b.unique1 = 42) -> Nested Loop - -> Seq Scan on int4_tbl i1 -> Index Scan using tenk1_thous_tenthous on tenk1 b Index Cond: ((thousand = i1.f1) AND (i2.f1 = tenthous)) - -> Index Scan using tenk1_unique1 on tenk1 a - Index Cond: (unique1 = b.unique2) - -> Index Only Scan using tenk1_thous_tenthous on tenk1 c - Index Cond: (thousand = a.thousand) + -> Index Scan using tenk1_unique1 on tenk1 a + Index Cond: (unique1 = b.unique2) + -> Index Only Scan using tenk1_thous_tenthous on tenk1 c + Index Cond: (thousand = a.thousand) (15 rows) select b.unique1 from |