Adjust hash table sizing algorithm to avoid integer overflow in

author Tom Lane <[email protected]>

Sun, 29 Dec 2002 22:29:03 +0000 (22:29 +0000)

committer Tom Lane <[email protected]>

Sun, 29 Dec 2002 22:29:03 +0000 (22:29 +0000)
author Tom Lane <[email protected]>
Sun, 29 Dec 2002 22:29:03 +0000 (22:29 +0000)
committer Tom Lane <[email protected]>
Sun, 29 Dec 2002 22:29:03 +0000 (22:29 +0000)
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c

index 8bb5bde84c07a8afb96fbb5aebfd3f4eaa8834c2..4ac8aecd2d0cbd59355ef715ed7e00ca578f195d 100644 (file)
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   *
- * $Id: nodeHash.c,v 1.66 2002/09/04 20:31:18 momjian Exp $
+ * $Id: nodeHash.c,v 1.66.2.1 2002/12/29 22:29:03 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -19,6 +19,7 @@
   */
  #include "postgres.h"
  
+#include <limits.h>
  #include <math.h>
  
  #include "access/hash.h"
@@ -342,7 +343,8 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
  {
     int         tupsize;
     double      inner_rel_bytes;
-   double      hash_table_bytes;
+   long        hash_table_bytes;
+   double      dtmp;
     int         nbatch;
     int         nbuckets;
     int         totalbuckets;
@@ -360,20 +362,22 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
     inner_rel_bytes = ntuples * tupsize * FUDGE_FAC;
  
     /*
-    * Target hashtable size is SortMem kilobytes, but not less than
-    * sqrt(estimated inner rel size), so as to avoid horrible
-    * performance.
+    * Target in-memory hashtable size is SortMem kilobytes.
      */
-   hash_table_bytes = sqrt(inner_rel_bytes);
-   if (hash_table_bytes < (SortMem * 1024L))
-       hash_table_bytes = SortMem * 1024L;
+   hash_table_bytes = SortMem * 1024L;
  
     /*
      * Count the number of hash buckets we want for the whole relation,
      * for an average bucket load of NTUP_PER_BUCKET (per virtual
-    * bucket!).
+    * bucket!).  It has to fit in an int, however.
      */
-   totalbuckets = (int) ceil(ntuples * FUDGE_FAC / NTUP_PER_BUCKET);
+   dtmp = ceil(ntuples * FUDGE_FAC / NTUP_PER_BUCKET);
+   if (dtmp < INT_MAX)
+       totalbuckets = (int) dtmp;
+   else
+       totalbuckets = INT_MAX;
+   if (totalbuckets <= 0)
+       totalbuckets = 1;
  
     /*
      * Count the number of buckets we think will actually fit in the
@@ -407,10 +411,16 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
          * that nbatch doesn't have to have anything to do with the ratio
          * totalbuckets/nbuckets; in fact, it is the number of groups we
          * will use for the part of the data that doesn't fall into the
-        * first nbuckets hash buckets.
+        * first nbuckets hash buckets.  We try to set it to make all the
+        * batches the same size.  But we have to keep nbatch small
+        * enough to avoid integer overflow in ExecHashJoinGetBatch().
          */
-       nbatch = (int) ceil((inner_rel_bytes - hash_table_bytes) /
-                           hash_table_bytes);
+       dtmp = ceil((inner_rel_bytes - hash_table_bytes) /
+                   hash_table_bytes);
+       if (dtmp < INT_MAX / totalbuckets)
+           nbatch = (int) dtmp;
+       else
+           nbatch = INT_MAX / totalbuckets;
         if (nbatch <= 0)
             nbatch = 1;
     }
author	Tom Lane <[email protected]>
	Sun, 29 Dec 2002 22:29:03 +0000 (22:29 +0000)
committer	Tom Lane <[email protected]>
	Sun, 29 Dec 2002 22:29:03 +0000 (22:29 +0000)