Ensure that the statistics about all-visible pages in fetched from remote
authorPavan Deolasee <[email protected]>
Tue, 2 Aug 2016 03:20:38 +0000 (08:50 +0530)
committerPavan Deolasee <[email protected]>
Tue, 2 Aug 2016 03:24:47 +0000 (08:54 +0530)
datanodes and updated at the coordinator for better planning.

We seemed to have missed fetching relallvisible all along and that can lead to
some bad planning at the coordinator. In particular, it may not use Index Only
scans where its possible. This is a long standing bug, but some other stats
collection (such as 8ca720f90f7) bug might have masked this problem until now.

Per report by Sergio Hernández Martínez, with investigation and patch by me.

src/backend/commands/vacuum.c
src/test/regress/expected/create_index.out
src/test/regress/expected/join.out
src/test/regress/expected/rowtypes.out
src/test/regress/expected/subselect.out

index aa5bf9799a12eb92fdfd1b5be304631e286763a7..999696cd41fddd753dc4998351b66659b9d92eb5 100644 (file)
@@ -758,7 +758,8 @@ vac_estimate_reltuples(Relation relation, bool is_analyze,
  */
 void
 vac_update_relstats(Relation relation,
-                                       BlockNumber num_pages, double num_tuples,
+                                       BlockNumber num_pages,
+                                       double num_tuples,
                                        BlockNumber num_all_visible_pages,
                                        bool hasindex, TransactionId frozenxid,
                                        MultiXactId minmulti,
@@ -1603,7 +1604,8 @@ make_relation_tle(Oid reloid, const char *relname, const char *column)
  */
 static int
 get_remote_relstat(char *nspname, char *relname, bool replicated,
-                                  int32 *pages, float4 *tuples, TransactionId *frozenXid)
+                                  int32 *pages, int32 *allvisiblepages,
+                                  float4 *tuples, TransactionId *frozenXid)
 {
        StringInfoData query;
        EState     *estate;
@@ -1613,12 +1615,14 @@ get_remote_relstat(char *nspname, char *relname, bool replicated,
        TupleTableSlot *result;
        int                     validpages,
                                validtuples,
-                               validfrozenxids;
+                               validfrozenxids,
+                               validallvisiblepages;
 
        /* Make up query string */
        initStringInfo(&query);
        appendStringInfo(&query, "SELECT c.relpages, "
                                                                        "c.reltuples, "
+                                                                       "c.relallvisible, "
                                                                        "c.relfrozenxid "
                                                         "FROM pg_class c JOIN pg_namespace n "
                                                         "ON c.relnamespace = n.oid "
@@ -1644,6 +1648,10 @@ get_remote_relstat(char *nspname, char *relname, bool replicated,
                                                                                 make_relation_tle(RelationRelationId,
                                                                                                                   "pg_class",
                                                                                                                   "reltuples"));
+       step->scan.plan.targetlist = lappend(step->scan.plan.targetlist,
+                                                                                make_relation_tle(RelationRelationId,
+                                                                                                                  "pg_class",
+                                                                                                                  "relallvisible"));
        step->scan.plan.targetlist = lappend(step->scan.plan.targetlist,
                                                                                 make_relation_tle(RelationRelationId,
                                                                                                                   "pg_class",
@@ -1660,9 +1668,11 @@ get_remote_relstat(char *nspname, char *relname, bool replicated,
        MemoryContextSwitchTo(oldcontext);
        /* get ready to combine results */
        *pages = 0;
+       *allvisiblepages = 0;
        *tuples = 0.0;
        *frozenXid = InvalidTransactionId;
        validpages = 0;
+       validallvisiblepages = 0;
        validtuples = 0;
        validfrozenxids = 0;
        result = ExecRemoteQuery(node);
@@ -1683,7 +1693,13 @@ get_remote_relstat(char *nspname, char *relname, bool replicated,
                        validtuples++;
                        *tuples += DatumGetFloat4(value);
                }
-               value = slot_getattr(result, 3, &isnull); /* relfrozenxid */
+               value = slot_getattr(result, 3, &isnull); /* relallvisible */
+               if (!isnull)
+               {
+                       validallvisiblepages++;
+                       *allvisiblepages += DatumGetInt32(value);
+               }
+               value = slot_getattr(result, 4, &isnull); /* relfrozenxid */
                if (!isnull)
                {
                        /*
@@ -1718,6 +1734,9 @@ get_remote_relstat(char *nspname, char *relname, bool replicated,
 
                if (validtuples > 0)
                        *tuples /= validtuples;
+
+               if (validallvisiblepages > 0)
+                       *allvisiblepages /= validallvisiblepages;
        }
 
        if (validfrozenxids < validpages || validfrozenxids < validtuples)
@@ -1751,6 +1770,7 @@ vacuum_rel_coordinator(Relation onerel, bool is_outer)
        char       *relname;
        /* fields to combine relation statistics */
        int32           num_pages;
+       int32           num_allvisible_pages;
        float4          num_tuples;
        TransactionId min_frozenxid;
        bool            hasindex;
@@ -1769,7 +1789,8 @@ vacuum_rel_coordinator(Relation onerel, bool is_outer)
         * returning correct stats.
         */
        rel_nodes = get_remote_relstat(nspname, relname, replicated,
-                                                                  &num_pages, &num_tuples, &min_frozenxid);
+                                                                  &num_pages, &num_allvisible_pages,
+                                                                  &num_tuples, &min_frozenxid);
        if (rel_nodes > 0)
        {
                int                     nindexes;
@@ -1786,7 +1807,7 @@ vacuum_rel_coordinator(Relation onerel, bool is_outer)
                        /* Fetch index stats */
                        for (i = 0; i < nindexes; i++)
                        {
-                               int32   idx_pages;
+                               int32   idx_pages, idx_allvisible_pages;
                                float4  idx_tuples;
                                TransactionId idx_frozenxid;
                                int idx_nodes;
@@ -1796,7 +1817,8 @@ vacuum_rel_coordinator(Relation onerel, bool is_outer)
                                nspname = get_namespace_name(RelationGetNamespace(Irel[i]));
                                /* Index is replicated if parent relation is replicated */
                                idx_nodes = get_remote_relstat(nspname, relname, replicated,
-                                                                               &idx_pages, &idx_tuples, &idx_frozenxid);
+                                                                               &idx_pages, &idx_allvisible_pages,
+                                                                               &idx_tuples, &idx_frozenxid);
                                if (idx_nodes > 0)
                                {
                                        /*
@@ -1837,7 +1859,7 @@ vacuum_rel_coordinator(Relation onerel, bool is_outer)
                vac_update_relstats(onerel,
                                                        (BlockNumber) num_pages,
                                                        (double) num_tuples,
-                                                       visibilitymap_count(onerel),
+                                                       num_allvisible_pages,
                                                        hasindex,
                                                        min_frozenxid,
                                                        InvalidMultiXactId,
index 8f01cf502b68a42e5eb39f87a1d99cf40765bb1d..3a88eb783f30e308e5e72450924c5cd3a44ca3d6 100644 (file)
@@ -2947,16 +2947,12 @@ explain (costs off)
 SELECT unique1 FROM tenk1
 WHERE unique1 IN (1,42,7)
 ORDER BY unique1;
-                               QUERY PLAN                                
--------------------------------------------------------------------------
+                         QUERY PLAN                          
+-------------------------------------------------------------
  Remote Subquery Scan on all (datanode_1,datanode_2)
-   ->  Sort
-         Sort Key: unique1
-         ->  Bitmap Heap Scan on tenk1
-               Recheck Cond: (unique1 = ANY ('{1,42,7}'::integer[]))
-               ->  Bitmap Index Scan on tenk1_unique1
-                     Index Cond: (unique1 = ANY ('{1,42,7}'::integer[]))
-(7 rows)
+   ->  Index Only Scan using tenk1_unique1 on tenk1
+         Index Cond: (unique1 = ANY ('{1,42,7}'::integer[]))
+(3 rows)
 
 SELECT unique1 FROM tenk1
 WHERE unique1 IN (1,42,7)
index 0740f4411c8a3dd8a8b8e30852ec21da39f0a7b4..db3e0580192149f3aa9ff43b6c43c093eb698912 100644 (file)
@@ -2423,7 +2423,7 @@ select count(*) from tenk1 a, tenk1 b
                      Hash Cond: (a.hundred = b.thousand)
                      ->  Remote Subquery Scan on all (datanode_1,datanode_2)
                            Distribute results by H: hundred
-                           ->  Seq Scan on tenk1 a
+                           ->  Index Only Scan using tenk1_hundred on tenk1 a
                      ->  Hash
                            ->  Remote Subquery Scan on all (datanode_1,datanode_2)
                                  Distribute results by H: thousand
@@ -2613,7 +2613,7 @@ prepare foo(bool) as
 execute foo(true);
  count 
 -------
- 10998
+ 10000
 (1 row)
 
 execute foo(false);
@@ -4706,19 +4706,18 @@ select * from generate_series(100,200) g,
 explain (num_nodes off, nodes off, costs off)
   select count(*) from tenk1 a,
     tenk1 b join lateral (values(a.unique1)) ss(x) on b.unique2 = ss.x;
-                            QUERY PLAN                            
-------------------------------------------------------------------
+                                  QUERY PLAN                                  
+------------------------------------------------------------------------------
  Aggregate
    ->  Remote Subquery Scan on all
          ->  Aggregate
-               ->  Hash Join
-                     Hash Cond: (a.unique1 = b.unique2)
-                     ->  Seq Scan on tenk1 a
-                     ->  Hash
-                           ->  Remote Subquery Scan on all
-                                 Distribute results by H: unique2
-                                 ->  Seq Scan on tenk1 b
-(10 rows)
+               ->  Merge Join
+                     Merge Cond: (b.unique2 = a.unique1)
+                     ->  Remote Subquery Scan on all
+                           Distribute results by H: unique2
+                           ->  Index Only Scan using tenk1_unique2 on tenk1 b
+                     ->  Index Only Scan using tenk1_unique1 on tenk1 a
+(9 rows)
 
 select count(*) from tenk1 a,
   tenk1 b join lateral (values(a.unique1)) ss(x) on b.unique2 = ss.x;
@@ -4731,18 +4730,18 @@ select count(*) from tenk1 a,
 explain (num_nodes off, nodes off, costs off)
   select count(*) from tenk1 a,
     tenk1 b join lateral (values(a.unique1),(-1)) ss(x) on b.unique2 = ss.x;
-                     QUERY PLAN                      
------------------------------------------------------
+                               QUERY PLAN                               
+------------------------------------------------------------------------
  Aggregate
    ->  Hash Join
          Hash Cond: ("*VALUES*".column1 = b.unique2)
          ->  Nested Loop
                ->  Remote Subquery Scan on all
-                     ->  Seq Scan on tenk1 a
+                     ->  Index Only Scan using tenk1_unique1 on tenk1 a
                ->  Values Scan on "*VALUES*"
          ->  Hash
                ->  Remote Subquery Scan on all
-                     ->  Seq Scan on tenk1 b
+                     ->  Index Only Scan using tenk1_unique2 on tenk1 b
 (10 rows)
 
 select count(*) from tenk1 a,
index 7aff45e78e5e860a58b3e59d46566a99016ccd50..f3c4f9442759ec961ce8df13a4895347c41dac06 100644 (file)
@@ -240,16 +240,12 @@ explain (costs off)
 select thousand, tenthous from tenk1
 where (thousand, tenthous) >= (997, 5000)
 order by thousand, tenthous;
-                                 QUERY PLAN                                  
------------------------------------------------------------------------------
+                           QUERY PLAN                            
+-----------------------------------------------------------------
  Remote Subquery Scan on all (datanode_1,datanode_2)
-   ->  Sort
-         Sort Key: thousand, tenthous
-         ->  Bitmap Heap Scan on tenk1
-               Recheck Cond: (ROW(thousand, tenthous) >= ROW(997, 5000))
-               ->  Bitmap Index Scan on tenk1_thous_tenthous
-                     Index Cond: (ROW(thousand, tenthous) >= ROW(997, 5000))
-(7 rows)
+   ->  Index Only Scan using tenk1_thous_tenthous on tenk1
+         Index Cond: (ROW(thousand, tenthous) >= ROW(997, 5000))
+(3 rows)
 
 select thousand, tenthous from tenk1
 where (thousand, tenthous) >= (997, 5000)
index b767b48973a2a1133c5de01ea5d97a54e401e068..40b1577b87eae8523df43e7ad2eb071a1b8a832b 100644 (file)
@@ -798,7 +798,7 @@ select * from int4_tbl where
                      SubPlan 1
                        ->  Remote Subquery Scan on all (datanode_1,datanode_2)
                              Output: a.unique1
-                             ->  Seq Scan on public.tenk1 a
+                             ->  Index Only Scan using tenk1_unique1 on public.tenk1 a
                                    Output: a.unique1
          ->  Materialize
                Output: b.ten