From 9d1e699ce710fa4928e5fdec8af225c5bdb4d17f Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Wed, 11 Jan 2017 04:56:01 +0100 Subject: [PATCH] generate correct plan for INSERT .. SELECT .. FROM generate_series() The planner now generates the correct plan QUERY PLAN --------------------------------------------------------------------------------------------- Remote Subquery Scan on all (dn1,dn2) (cost=100.00..119.00 rows=1000 width=12) -> Insert on t1 (cost=100.00..119.00 rows=1000 width=12) -> Remote Subquery Scan on all (dn2) (cost=100.00..119.00 rows=1000 width=12) Distribute results by H: i -> Function Scan on generate_series s (cost=0.00..10.00 rows=1000 width=12) (5 rows) for queries like INSERT INTO t1 SELECT i, i, i FROM generate_series(1,1000) s(i); Sadly no rows are inserted, for some reason. But the plan looks fine (at least it seems similar to 9.5). --- src/backend/optimizer/plan/planner.c | 105 ++++++++++++++++++++++++++ src/backend/optimizer/util/pathnode.c | 4 +- 2 files changed, 106 insertions(+), 3 deletions(-) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index ef1622cc69..cb150c01bd 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -2219,6 +2219,111 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, else rowMarks = root->rowMarks; + /* + * Adjust query distribution if requested. + */ + if (root->distribution) + { + Distribution *distribution = path->distribution; + + if (equal_distributions(root, root->distribution, distribution)) + { + if (IsLocatorReplicated(distribution->distributionType) && + contain_volatile_functions((Node *) parse->targetList)) + ereport(ERROR, + (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), + errmsg("can not update replicated table with result of volatile function"))); + + /* + * Source tuple will be consumed on the same node where it is + * produced, so if it is known that some node does not yield tuples + * we do not want to send subquery for execution on these nodes + * at all. + * So copy the restriction to the external distribution. + * XXX Is that ever possible if external restriction is already + * defined? If yes we probably should use intersection of the sets, + * and if resulting set is empty create dummy plan and set it as + * the result_plan. Need to think this over + */ + root->distribution->restrictNodes = + bms_copy(distribution->restrictNodes); + } + else + { + /* + * If the planned statement is either UPDATE or DELETE, different + * distributions here mean the ModifyTable node will be placed on + * top of RemoteSubquery. + * + * UPDATE and DELETE versions of ModifyTable use TID of incoming + * tuple to apply the changes, but the RemoteSubquery plan supplies + * RemoteTuples, without such field. Therefore we can't execute + * such plan and error-out. + * + * Most common example is when the UPDATE statement modifies the + * distribution column, or when a complex UPDATE or DELETE statement + * involves a join. It's difficult to determine the exact reason, + * but we assume the first one (correlated UPDATE) is more likely. + * + * There are two ways of fixing the UPDATE ambiguity: + * + * 1. Modify the planner to never consider redistribution of the + * target table. In this case the planner would find there's no way + * to plan the query, and it would throw error somewhere else, and + * we'd only be dealing with updates of distribution columns. + * + * 2. Modify executor to allow distribution column updates. However + * there are a lot of issues behind the scene when implementing that + * approach, and so it's unlikely to happen soon. + * + * DELETE statements may only fail because of complex joins. + */ + + if (parse->commandType == CMD_UPDATE) + ereport(ERROR, + (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), + errmsg("could not plan this distributed update"), + errdetail("correlated UPDATE or updating distribution column currently not supported in Postgres-XL."))); + + if (parse->commandType == CMD_DELETE) + ereport(ERROR, + (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), + errmsg("could not plan this distributed delete"), + errdetail("correlated or complex DELETE is currently not supported in Postgres-XL."))); + + /* + * Redistribute result according to requested distribution. + * + * XXX In XL 9.5 there's a block of code (lines 2670-2730) that + * finds pushed-down plan, and builds a distribution based on the + * targetlist, and does a few other things (e.g. inserts Result + * plan if needed, etc.). I have no idea how to rewrite that at + * this point :-( + */ + + /* + * We already know the distributions are not equal, but let's + * see if the redistribution is actually necessary. We can skip + * it if we already have Result path, and if the distribution is + * + * 1) 'hash' restricted to a single node + * + * or + * + * 2) 'replicate' without volatile functions in the target list + * + * In such cases we don't need the RemoteSubplan. + * + * XXX Not sure what the (result_plan->lefttree == NULL) does. + * See planner.c:2730 in 9.5. + */ + if (!(IsA(path, ResultPath) && + ((root->distribution->distributionType == 'H' && bms_num_members(root->distribution->restrictNodes) == 1) || + (root->distribution->distributionType == 'R' && !contain_mutable_functions((Node *)parse->targetList))))) + path = create_remotesubplan_path(root, path, root->distribution); + } + } + path = (Path *) create_modifytable_path(root, final_rel, parse->commandType, diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index c03d981d64..1f026c5c5e 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1229,8 +1229,6 @@ create_remotesubplan_path(PlannerInfo *root, Path *subpath, RemoteSubPath *pathnode; Distribution *subdistribution = subpath->distribution; - Assert(subdistribution != NULL); - pathnode = makeNode(RemoteSubPath); pathnode->path.pathtype = T_RemoteSubplan; pathnode->path.parent = rel; @@ -1243,7 +1241,7 @@ create_remotesubplan_path(PlannerInfo *root, Path *subpath, cost_remote_subplan((Path *) pathnode, subpath->startup_cost, subpath->total_cost, subpath->rows, rel->reltarget->width, - IsLocatorReplicated(subdistribution->distributionType) ? + (subdistribution && IsLocatorReplicated(subdistribution->distributionType)) ? bms_num_members(subdistribution->nodes) : 1); return (Path *) pathnode; -- 2.39.5