From: Tomas Vondra Date: Sun, 9 Jul 2017 20:02:21 +0000 (+0200) Subject: Properly redistribute results of Gather Merge nodes X-Git-Tag: XL_10_R1BETA1~233 X-Git-Url: http://git.postgresql.org/gitweb/static/gitweb.js?a=commitdiff_plain;h=7ba7029dab2d1f347475cb0cc670442e413b0590;p=postgres-xl.git Properly redistribute results of Gather Merge nodes The optimizer was not generating correct distributed paths with Gather Merge nodes, because those nodes always looked as if the data was not distributed at all. There were two bugs causing this: 1) Gather Merge did not copy distribution from the subpath, leaving it NULL (as if running on coordinator), so no Remote Subquery needed. 2) create_grouping_paths() did not check if a Remote Subquery is needed on top of Gather Merge anyway. After fixing these two issues, we're now generating correct plans (at least judging by select_parallel regression suite). --- diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index a95572b87b..0fe31f2952 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -4312,6 +4312,14 @@ create_grouping_paths(PlannerInfo *root, NULL, &total_groups); + /* + * If the grouping can't be fully pushed down, we'll push down the + * first phase of the aggregate, and redistribute only the partial + * results. + */ + if (! can_push_down_grouping(root, parse, gmpath)) + gmpath = create_remotesubplan_path(root, gmpath, NULL); + if (parse->hasAggs) add_path(grouped_rel, (Path *) create_agg_path(root, diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 8d99cf9b34..d5f964419b 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -3018,6 +3018,9 @@ create_gather_merge_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, required_outer); pathnode->path.parallel_aware = false; + /* distribution is the same as in the subpath */ + pathnode->path.distribution = (Distribution *) copyObject(subpath->distribution); + pathnode->subpath = subpath; pathnode->num_workers = subpath->parallel_workers; pathnode->path.pathkeys = pathkeys; diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out index 079dca310a..4392a3ada3 100644 --- a/src/test/regress/expected/select_parallel.out +++ b/src/test/regress/expected/select_parallel.out @@ -278,18 +278,19 @@ reset enable_nestloop; set enable_hashagg to off; explain (costs off) select string4, count((unique2)) from tenk1 group by string4 order by string4; - QUERY PLAN ----------------------------------------------------- + QUERY PLAN +----------------------------------------------------------- Finalize GroupAggregate Group Key: string4 - -> Gather Merge - Workers Planned: 4 - -> Partial GroupAggregate - Group Key: string4 - -> Sort - Sort Key: string4 - -> Parallel Seq Scan on tenk1 -(9 rows) + -> Remote Subquery Scan on all (datanode_1,datanode_2) + -> Gather Merge + Workers Planned: 4 + -> Partial GroupAggregate + Group Key: string4 + -> Sort + Sort Key: string4 + -> Parallel Seq Scan on tenk1 +(10 rows) select string4, count((unique2)) from tenk1 group by string4 order by string4; string4 | count