Make the overflow guards in ExecChooseHashTableSize be more protective.

The original coding ensured nbuckets and nbatch didn't exceed INT_MAX, which while not insane on its own terms did nothing to protect subsequent code like "palloc(nbatch * sizeof(BufFile *))". Since enormous join size estimates might well be planner error rather than reality, it seems best to constrain the initial sizes to be not more than work_mem/sizeof(pointer), thus ensuring the allocated arrays don't exceed work_mem. We will allow nbatch to get bigger than that during subsequent ExecHashIncreaseNumBatches calls, but we should still guard against integer overflow in those palloc requests. Per bug #5145 from Bernt Marius Johnsen. Although the given test case only seems to fail back to 8.2, previous releases have variants of this issue, so patch all supported branches.
author: Tom Lane <tgl@sss.pgh.pa.us> 2009-10-30 20:58:51 +0000
committer: Tom Lane <tgl@sss.pgh.pa.us> 2009-10-30 20:58:51 +0000
commit: 3f5a4828f9b04811d25ded71ed452e9b97e1df75 (patch)
tree: 266e283d8fc67bdc9db0d06915ea398d27f1de3e
parent: 4b53f16be9599b4d3f1f067d5070eefd8ab2ce85 (diff)
download: postgresql-3f5a4828f9b04811d25ded71ed452e9b97e1df75.tar.gz
1 files changed, 25 insertions, 18 deletions
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 471534538a..c49e4da633 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.121 2009/06/11 14:48:57 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.121.2.1 2009/10/30 20:58:51 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -404,6 +404,7 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
 	double		inner_rel_bytes;
 	long		hash_table_bytes;
 	long		skew_table_bytes;
+	long		max_pointers;
 	int			nbatch;
 	int			nbuckets;
 	int			i;
@@ -445,17 +446,18 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
 	{
 		skew_table_bytes = hash_table_bytes * SKEW_WORK_MEM_PERCENT / 100;
 
-		*num_skew_mcvs = skew_table_bytes / (
-		/* size of a hash tuple */
-											 tupsize +
-		/* worst-case size of skewBucket[] per MCV */
+		/*----------
+		 * Divisor is:
+		 * size of a hash tuple +
+		 * worst-case size of skewBucket[] per MCV +
+		 * size of skewBucketNums[] entry +
+		 * size of skew bucket struct itself
+		 *----------
+		 */
+		*num_skew_mcvs = skew_table_bytes / (tupsize +
 											 (8 * sizeof(HashSkewBucket *)) +
-		/* size of skewBucketNums[] entry */
 											 sizeof(int) +
-		/* size of skew bucket struct itself */
-											 SKEW_BUCKET_OVERHEAD
-			);
-
+											 SKEW_BUCKET_OVERHEAD);
 		if (*num_skew_mcvs > 0)
 			hash_table_bytes -= skew_table_bytes;
 	}
@@ -465,8 +467,13 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
 	/*
 	 * Set nbuckets to achieve an average bucket load of NTUP_PER_BUCKET when
 	 * memory is filled.  Set nbatch to the smallest power of 2 that appears
-	 * sufficient.
+	 * sufficient.  The Min() steps limit the results so that the pointer
+	 * arrays we'll try to allocate do not exceed work_mem.
 	 */
+	max_pointers = (work_mem * 1024L) / sizeof(void *);
+	/* also ensure we avoid integer overflow in nbatch and nbuckets */
+	max_pointers = Min(max_pointers, INT_MAX / 2);
+
 	if (inner_rel_bytes > hash_table_bytes)
 	{
 		/* We'll need multiple batches */
@@ -475,11 +482,11 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
 		int			minbatch;
 
 		lbuckets = (hash_table_bytes / tupsize) / NTUP_PER_BUCKET;
-		lbuckets = Min(lbuckets, INT_MAX / 2);
+		lbuckets = Min(lbuckets, max_pointers);
 		nbuckets = (int) lbuckets;
 
 		dbatch = ceil(inner_rel_bytes / hash_table_bytes);
-		dbatch = Min(dbatch, INT_MAX / 2);
+		dbatch = Min(dbatch, max_pointers);
 		minbatch = (int) dbatch;
 		nbatch = 2;
 		while (nbatch < minbatch)
@@ -491,7 +498,7 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
 		double		dbuckets;
 
 		dbuckets = ceil(ntuples / NTUP_PER_BUCKET);
-		dbuckets = Min(dbuckets, INT_MAX / 2);
+		dbuckets = Min(dbuckets, max_pointers);
 		nbuckets = (int) dbuckets;
 
 		nbatch = 1;
@@ -565,7 +572,7 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
 		return;
 
 	/* safety check to avoid overflow */
-	if (oldnbatch > INT_MAX / 2)
+	if (oldnbatch > Min(INT_MAX / 2, MaxAllocSize / (sizeof(void *) * 2)))
 		return;
 
 	nbatch = oldnbatch * 2;
@@ -1043,9 +1050,9 @@ ExecHashBuildSkewHash(HashJoinTable hashtable, Hash *node, int mcvsToUse)
 		 * will be at least one null entry, so searches will always
 		 * terminate.)
 		 *
-		 * Note: this code could fail if mcvsToUse exceeds INT_MAX/8, but that
-		 * is not currently possible since we limit pg_statistic entries to
-		 * much less than that.
+		 * Note: this code could fail if mcvsToUse exceeds INT_MAX/8 or
+		 * MaxAllocSize/sizeof(void *)/8, but that is not currently possible
+		 * since we limit pg_statistic entries to much less than that.
 		 */
 		nbuckets = 2;
 		while (nbuckets <= mcvsToUse)
author	Tom Lane <tgl@sss.pgh.pa.us>	2009-10-30 20:58:51 +0000
committer	Tom Lane <tgl@sss.pgh.pa.us>	2009-10-30 20:58:51 +0000
commit	3f5a4828f9b04811d25ded71ed452e9b97e1df75 (patch)
tree	266e283d8fc67bdc9db0d06915ea398d27f1de3e
parent	4b53f16be9599b4d3f1f067d5070eefd8ab2ce85 (diff)
download	postgresql-3f5a4828f9b04811d25ded71ed452e9b97e1df75.tar.gz