make DISTINCT ON plans work again (WIP)
authorTomas Vondra <[email protected]>
Tue, 10 Jan 2017 20:27:10 +0000 (21:27 +0100)
committerTomas Vondra <[email protected]>
Tue, 10 Jan 2017 20:27:10 +0000 (21:27 +0100)
Insert RemoteSubplan paths at appropriate places, to generate plans
like this:

                                          QUERY PLAN
    --------------------------------------------------------------------------------------
     HashAggregate  (cost=153.86..155.86 rows=200 width=4)
       Group Key: b
       ->  Remote Subquery Scan on all (dn1,dn2)  (cost=100.00..148.76 rows=2040 width=4)
             ->  Seq Scan on t1  (cost=0.00..30.40 rows=2040 width=4)

or

                                          QUERY PLAN
    --------------------------------------------------------------------------------------
     Unique  (cost=242.54..262.94 rows=200 width=4)
       ->  Remote Subquery Scan on all (dn1,dn2)  (cost=242.54..257.84 rows=2040 width=4)
             ->  Sort  (cost=142.54..147.64 rows=2040 width=4)
                   Sort Key: b
                   ->  Seq Scan on t1  (cost=0.00..30.40 rows=2040 width=4)

These changes may not be entirely correct yet - the "Unique" one seems
to be missing a "Sort" node on top of "Remote Subquery Scan", although
that may not be necessary for some distributions (matching the grouping
column).

For the "HashAggregate" plans, it might be possible to use the 2-phase
aggregation, pushing some of the computation to remote nodes (reducing
the amount of data transferred).

src/backend/optimizer/plan/planner.c
src/backend/optimizer/util/pathnode.c

index a5864b9fede6c5345c8440c90d4f06f00fc26521..00bea1ac81cfd221a3ce3527b1762ab4a5b9fd1a 100644 (file)
@@ -161,6 +161,8 @@ static Plan *grouping_distribution(PlannerInfo *root, Plan *plan,
                                          List *current_pathkeys, Distribution **distribution);
 static bool equal_distributions(PlannerInfo *root, Distribution *dst1,
                                        Distribution *dst2);
+static bool grouping_distribution_match(PlannerInfo *root, Path *path,
+                                         Query *parse);
 #endif
 static PathTarget *make_sort_input_target(PlannerInfo *root,
                                           PathTarget *final_target,
@@ -4341,6 +4343,12 @@ create_distinct_paths(PlannerInfo *root,
                                                                                         needed_pathkeys,
                                                                                         -1.0);
 
+               /* In case of grouping / distribution mismatch, inject remote scan. */
+               if (! grouping_distribution_match(root, path, parse))
+                       path = create_remotesubplan_path(root, path, NULL);
+
+               /* XXX Maybe we need another sort here? */
+
                add_path(distinct_rel, (Path *)
                                 create_upper_unique_path(root, distinct_rel,
                                                                                  path,
@@ -4381,12 +4389,20 @@ create_distinct_paths(PlannerInfo *root,
 
        if (allow_hash && grouping_is_hashable(parse->distinctClause))
        {
+               Path *input_path = cheapest_input_path;
+
+               /* If needed, inject RemoteSubplan redistributing the data. */
+               if (!grouping_distribution_match(root, input_path, parse))
+                       input_path = create_remotesubplan_path(root, input_path, NULL);
+
+               /* XXX Maybe we can make this a 2-phase aggregate too? */
+
                /* Generate hashed aggregate path --- no sort needed */
                add_path(distinct_rel, (Path *)
                                 create_agg_path(root,
                                                                 distinct_rel,
-                                                                cheapest_input_path,
-                                                                cheapest_input_path->pathtarget,
+                                                                input_path,
+                                                                input_path->pathtarget,
                                                                 AGG_HASHED,
                                                                 AGGSPLIT_SIMPLE,
                                                                 parse->distinctClause,
@@ -5465,6 +5481,50 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid)
 
 
 #ifdef XCP
+static bool
+grouping_distribution_match(PlannerInfo *root, Path *path, Query *parse)
+{
+       int             i;
+       bool    matches_key = false;
+       Distribution *distribution = path->distribution;
+
+       int numGroupCols = list_length(parse->distinctClause);
+       AttrNumber *groupColIdx = extract_grouping_cols(parse->distinctClause,
+                                                                                                       parse->targetList);
+
+       /*
+        * With no explicit data distribution or replicated tables, we can simply
+        * push down the whole aggregation to the remote node, without any sort
+        * of redistribution. So consider this to be a match.
+        */
+       if ((distribution == NULL) ||
+               IsLocatorReplicated(distribution->distributionType))
+               return true;
+
+       /* But no distribution expression means 'no match'. */
+       if (distribution->distributionExpr == NULL)
+               return false;
+
+       /*
+        * With distributed data and table distributed using an expression, we
+        * need to check if the distribution expression matches one of the
+        * grouping keys (arbitrary one).
+        */
+       for (i = 0; i < numGroupCols; i++)
+       {
+               TargetEntry *te = (TargetEntry *)list_nth(parse->targetList,
+                                                                                                 groupColIdx[i]-1);
+
+               if (equal(te->expr, distribution->distributionExpr))
+               {
+                       matches_key = true;
+                       break;
+               }
+       }
+
+       return matches_key;
+}
+
 /*
  * Grouping preserves distribution if distribution key is the
  * first grouping key or if distribution is replicated.
index c3b538b4e543f148a15b92aa42c20181322f2cf7..82069f0466d6c49b33cef4d79e8da66a71453577 100644 (file)
@@ -3664,6 +3664,8 @@ create_sort_path(PlannerInfo *root,
        pathnode->path.parallel_workers = subpath->parallel_workers;
        pathnode->path.pathkeys = pathkeys;
 
+       pathnode->path.distribution = copyObject(subpath->distribution);
+
        pathnode->subpath = subpath;
 
        cost_sort(&pathnode->path, root, pathkeys,