Ensure that the statistics about all-visible pages in fetched from remote
authorPavan Deolasee <[email protected]>
Tue, 2 Aug 2016 03:20:38 +0000 (08:50 +0530)
committerPavan Deolasee <[email protected]>
Tue, 18 Oct 2016 10:07:39 +0000 (15:37 +0530)
datanodes and updated at the coordinator for better planning.

We seemed to have missed fetching relallvisible all along and that can lead to
some bad planning at the coordinator. In particular, it may not use Index Only
scans where its possible. This is a long standing bug, but some other stats
collection (such as 8ca720f90f7) bug might have masked this problem until now.

Per report by Sergio Hernández Martínez, with investigation and patch by me.

src/backend/commands/vacuum.c
src/test/regress/expected/create_index.out
src/test/regress/expected/join.out
src/test/regress/expected/rowtypes.out
src/test/regress/expected/subselect.out

index bc76a5d67aaa20d23437423c62f8865888ad22fc..ed2f22c63a735747cb75feae320192b4fa4dcff8 100644 (file)
@@ -755,7 +755,8 @@ vac_estimate_reltuples(Relation relation, bool is_analyze,
  */
 void
 vac_update_relstats(Relation relation,
-                                       BlockNumber num_pages, double num_tuples,
+                                       BlockNumber num_pages,
+                                       double num_tuples,
                                        BlockNumber num_all_visible_pages,
                                        bool hasindex, TransactionId frozenxid,
                                        MultiXactId minmulti,
@@ -1600,7 +1601,8 @@ make_relation_tle(Oid reloid, const char *relname, const char *column)
  */
 static int
 get_remote_relstat(char *nspname, char *relname, bool replicated,
-                                  int32 *pages, float4 *tuples, TransactionId *frozenXid)
+                                  int32 *pages, int32 *allvisiblepages,
+                                  float4 *tuples, TransactionId *frozenXid)
 {
        StringInfoData query;
        EState     *estate;
@@ -1610,12 +1612,14 @@ get_remote_relstat(char *nspname, char *relname, bool replicated,
        TupleTableSlot *result;
        int                     validpages,
                                validtuples,
-                               validfrozenxids;
+                               validfrozenxids,
+                               validallvisiblepages;
 
        /* Make up query string */
        initStringInfo(&query);
        appendStringInfo(&query, "SELECT c.relpages, "
                                                                        "c.reltuples, "
+                                                                       "c.relallvisible, "
                                                                        "c.relfrozenxid "
                                                         "FROM pg_class c JOIN pg_namespace n "
                                                         "ON c.relnamespace = n.oid "
@@ -1641,6 +1645,10 @@ get_remote_relstat(char *nspname, char *relname, bool replicated,
                                                                                 make_relation_tle(RelationRelationId,
                                                                                                                   "pg_class",
                                                                                                                   "reltuples"));
+       step->scan.plan.targetlist = lappend(step->scan.plan.targetlist,
+                                                                                make_relation_tle(RelationRelationId,
+                                                                                                                  "pg_class",
+                                                                                                                  "relallvisible"));
        step->scan.plan.targetlist = lappend(step->scan.plan.targetlist,
                                                                                 make_relation_tle(RelationRelationId,
                                                                                                                   "pg_class",
@@ -1657,9 +1665,11 @@ get_remote_relstat(char *nspname, char *relname, bool replicated,
        MemoryContextSwitchTo(oldcontext);
        /* get ready to combine results */
        *pages = 0;
+       *allvisiblepages = 0;
        *tuples = 0.0;
        *frozenXid = InvalidTransactionId;
        validpages = 0;
+       validallvisiblepages = 0;
        validtuples = 0;
        validfrozenxids = 0;
        result = ExecRemoteQuery(node);
@@ -1680,7 +1690,13 @@ get_remote_relstat(char *nspname, char *relname, bool replicated,
                        validtuples++;
                        *tuples += DatumGetFloat4(value);
                }
-               value = slot_getattr(result, 3, &isnull); /* relfrozenxid */
+               value = slot_getattr(result, 3, &isnull); /* relallvisible */
+               if (!isnull)
+               {
+                       validallvisiblepages++;
+                       *allvisiblepages += DatumGetInt32(value);
+               }
+               value = slot_getattr(result, 4, &isnull); /* relfrozenxid */
                if (!isnull)
                {
                        /*
@@ -1715,6 +1731,9 @@ get_remote_relstat(char *nspname, char *relname, bool replicated,
 
                if (validtuples > 0)
                        *tuples /= validtuples;
+
+               if (validallvisiblepages > 0)
+                       *allvisiblepages /= validallvisiblepages;
        }
 
        if (validfrozenxids < validpages || validfrozenxids < validtuples)
@@ -1748,6 +1767,7 @@ vacuum_rel_coordinator(Relation onerel, bool is_outer)
        char       *relname;
        /* fields to combine relation statistics */
        int32           num_pages;
+       int32           num_allvisible_pages;
        float4          num_tuples;
        TransactionId min_frozenxid;
        bool            hasindex;
@@ -1766,7 +1786,8 @@ vacuum_rel_coordinator(Relation onerel, bool is_outer)
         * returning correct stats.
         */
        rel_nodes = get_remote_relstat(nspname, relname, replicated,
-                                                                  &num_pages, &num_tuples, &min_frozenxid);
+                                                                  &num_pages, &num_allvisible_pages,
+                                                                  &num_tuples, &min_frozenxid);
        if (rel_nodes > 0)
        {
                int                     nindexes;
@@ -1783,7 +1804,7 @@ vacuum_rel_coordinator(Relation onerel, bool is_outer)
                        /* Fetch index stats */
                        for (i = 0; i < nindexes; i++)
                        {
-                               int32   idx_pages;
+                               int32   idx_pages, idx_allvisible_pages;
                                float4  idx_tuples;
                                TransactionId idx_frozenxid;
                                int idx_nodes;
@@ -1793,7 +1814,8 @@ vacuum_rel_coordinator(Relation onerel, bool is_outer)
                                nspname = get_namespace_name(RelationGetNamespace(Irel[i]));
                                /* Index is replicated if parent relation is replicated */
                                idx_nodes = get_remote_relstat(nspname, relname, replicated,
-                                                                               &idx_pages, &idx_tuples, &idx_frozenxid);
+                                                                               &idx_pages, &idx_allvisible_pages,
+                                                                               &idx_tuples, &idx_frozenxid);
                                if (idx_nodes > 0)
                                {
                                        /*
@@ -1834,7 +1856,7 @@ vacuum_rel_coordinator(Relation onerel, bool is_outer)
                vac_update_relstats(onerel,
                                                        (BlockNumber) num_pages,
                                                        (double) num_tuples,
-                                                       visibilitymap_count(onerel),
+                                                       num_allvisible_pages,
                                                        hasindex,
                                                        min_frozenxid,
                                                        InvalidMultiXactId,
index 8f01cf502b68a42e5eb39f87a1d99cf40765bb1d..3a88eb783f30e308e5e72450924c5cd3a44ca3d6 100644 (file)
@@ -2947,16 +2947,12 @@ explain (costs off)
 SELECT unique1 FROM tenk1
 WHERE unique1 IN (1,42,7)
 ORDER BY unique1;
-                               QUERY PLAN                                
--------------------------------------------------------------------------
+                         QUERY PLAN                          
+-------------------------------------------------------------
  Remote Subquery Scan on all (datanode_1,datanode_2)
-   ->  Sort
-         Sort Key: unique1
-         ->  Bitmap Heap Scan on tenk1
-               Recheck Cond: (unique1 = ANY ('{1,42,7}'::integer[]))
-               ->  Bitmap Index Scan on tenk1_unique1
-                     Index Cond: (unique1 = ANY ('{1,42,7}'::integer[]))
-(7 rows)
+   ->  Index Only Scan using tenk1_unique1 on tenk1
+         Index Cond: (unique1 = ANY ('{1,42,7}'::integer[]))
+(3 rows)
 
 SELECT unique1 FROM tenk1
 WHERE unique1 IN (1,42,7)
index 97e2dd567a76751c82fc0ae014b6ef07a71ccf4d..ad614eb119cd3b8de94521ca2ffd86aa503eb88b 100644 (file)
@@ -2374,6 +2374,40 @@ select tt1.*, tt2.* from tt2 right join tt1 on tt1.joincol = tt2.joincol
 reset enable_hashjoin;
 reset enable_nestloop;
 --
+-- regression test for bug #13908 (hash join with skew tuples & nbatch increase)
+--
+set work_mem to '64kB';
+set enable_mergejoin to off;
+explain (costs off)
+select count(*) from tenk1 a, tenk1 b
+  where a.hundred = b.thousand and (b.fivethous % 10) < 10;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ Aggregate
+   ->  Remote Subquery Scan on all (datanode_1,datanode_2)
+         ->  Aggregate
+               ->  Hash Join
+                     Hash Cond: (a.hundred = b.thousand)
+                     ->  Remote Subquery Scan on all (datanode_1,datanode_2)
+                           Distribute results by H: hundred
+                           ->  Index Only Scan using tenk1_hundred on tenk1 a
+                     ->  Hash
+                           ->  Remote Subquery Scan on all (datanode_1,datanode_2)
+                                 Distribute results by H: thousand
+                                 ->  Seq Scan on tenk1 b
+                                       Filter: ((fivethous % 10) < 10)
+(13 rows)
+
+select count(*) from tenk1 a, tenk1 b
+  where a.hundred = b.thousand and (b.fivethous % 10) < 10;
+ count  
+--------
+ 100000
+(1 row)
+
+reset work_mem;
+reset enable_mergejoin;
+--
 -- regression test for 8.2 bug with improper re-ordering of left joins
 --
 create temp table tt3(f1 int, f2 text);
@@ -2546,7 +2580,7 @@ prepare foo(bool) as
 execute foo(true);
  count 
 -------
- 10998
+ 10000
 (1 row)
 
 execute foo(false);
@@ -4607,19 +4641,18 @@ select * from generate_series(100,200) g,
 explain (num_nodes off, nodes off, costs off)
   select count(*) from tenk1 a,
     tenk1 b join lateral (values(a.unique1)) ss(x) on b.unique2 = ss.x;
-                            QUERY PLAN                            
-------------------------------------------------------------------
+                                  QUERY PLAN                                  
+------------------------------------------------------------------------------
  Aggregate
    ->  Remote Subquery Scan on all
          ->  Aggregate
-               ->  Hash Join
-                     Hash Cond: (a.unique1 = b.unique2)
-                     ->  Seq Scan on tenk1 a
-                     ->  Hash
-                           ->  Remote Subquery Scan on all
-                                 Distribute results by H: unique2
-                                 ->  Seq Scan on tenk1 b
-(10 rows)
+               ->  Merge Join
+                     Merge Cond: (b.unique2 = a.unique1)
+                     ->  Remote Subquery Scan on all
+                           Distribute results by H: unique2
+                           ->  Index Only Scan using tenk1_unique2 on tenk1 b
+                     ->  Index Only Scan using tenk1_unique1 on tenk1 a
+(9 rows)
 
 select count(*) from tenk1 a,
   tenk1 b join lateral (values(a.unique1)) ss(x) on b.unique2 = ss.x;
@@ -4632,18 +4665,18 @@ select count(*) from tenk1 a,
 explain (num_nodes off, nodes off, costs off)
   select count(*) from tenk1 a,
     tenk1 b join lateral (values(a.unique1),(-1)) ss(x) on b.unique2 = ss.x;
-                     QUERY PLAN                      
------------------------------------------------------
+                               QUERY PLAN                               
+------------------------------------------------------------------------
  Aggregate
    ->  Hash Join
          Hash Cond: ("*VALUES*".column1 = b.unique2)
          ->  Nested Loop
                ->  Remote Subquery Scan on all
-                     ->  Seq Scan on tenk1 a
+                     ->  Index Only Scan using tenk1_unique1 on tenk1 a
                ->  Values Scan on "*VALUES*"
          ->  Hash
                ->  Remote Subquery Scan on all
-                     ->  Seq Scan on tenk1 b
+                     ->  Index Only Scan using tenk1_unique2 on tenk1 b
 (10 rows)
 
 select count(*) from tenk1 a,
index 19a0e7700ec45f441a414760d97c78cf5805ecf8..a135231cf5787cd66bbb13bba8d2087d4bddb59b 100644 (file)
@@ -240,16 +240,12 @@ explain (costs off)
 select thousand, tenthous from tenk1
 where (thousand, tenthous) >= (997, 5000)
 order by thousand, tenthous;
-                                 QUERY PLAN                                  
------------------------------------------------------------------------------
+                           QUERY PLAN                            
+-----------------------------------------------------------------
  Remote Subquery Scan on all (datanode_1,datanode_2)
-   ->  Sort
-         Sort Key: thousand, tenthous
-         ->  Bitmap Heap Scan on tenk1
-               Recheck Cond: (ROW(thousand, tenthous) >= ROW(997, 5000))
-               ->  Bitmap Index Scan on tenk1_thous_tenthous
-                     Index Cond: (ROW(thousand, tenthous) >= ROW(997, 5000))
-(7 rows)
+   ->  Index Only Scan using tenk1_thous_tenthous on tenk1
+         Index Cond: (ROW(thousand, tenthous) >= ROW(997, 5000))
+(3 rows)
 
 select thousand, tenthous from tenk1
 where (thousand, tenthous) >= (997, 5000)
index b767b48973a2a1133c5de01ea5d97a54e401e068..40b1577b87eae8523df43e7ad2eb071a1b8a832b 100644 (file)
@@ -798,7 +798,7 @@ select * from int4_tbl where
                      SubPlan 1
                        ->  Remote Subquery Scan on all (datanode_1,datanode_2)
                              Output: a.unique1
-                             ->  Seq Scan on public.tenk1 a
+                             ->  Index Only Scan using tenk1_unique1 on public.tenk1 a
                                    Output: a.unique1
          ->  Materialize
                Output: b.ten