generate correct plan for INSERT .. SELECT .. FROM generate_series()
authorTomas Vondra <[email protected]>
Wed, 11 Jan 2017 03:56:01 +0000 (04:56 +0100)
committerTomas Vondra <[email protected]>
Wed, 11 Jan 2017 03:56:01 +0000 (04:56 +0100)
The planner now generates the correct plan

                                             QUERY PLAN
    ---------------------------------------------------------------------------------------------
     Remote Subquery Scan on all (dn1,dn2)  (cost=100.00..119.00 rows=1000 width=12)
       ->  Insert on t1  (cost=100.00..119.00 rows=1000 width=12)
             ->  Remote Subquery Scan on all (dn2)  (cost=100.00..119.00 rows=1000 width=12)
                   Distribute results by H: i
                   ->  Function Scan on generate_series s  (cost=0.00..10.00 rows=1000 width=12)
    (5 rows)

for queries like

    INSERT INTO t1 SELECT i, i, i FROM generate_series(1,1000) s(i);

Sadly no rows are inserted, for some reason. But the plan looks fine
(at least it seems similar to 9.5).

src/backend/optimizer/plan/planner.c
src/backend/optimizer/util/pathnode.c

index ef1622cc697ba988bc96de011c7c3edddc8ac517..cb150c01bdd231cc30c85294a73ba214866a6689 100644 (file)
@@ -2219,6 +2219,111 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
                        else
                                rowMarks = root->rowMarks;
 
+                       /*
+                        * Adjust query distribution if requested.
+                        */
+                       if (root->distribution)
+                       {
+                               Distribution *distribution = path->distribution;
+
+                               if (equal_distributions(root, root->distribution, distribution))
+                               {
+                                       if (IsLocatorReplicated(distribution->distributionType) &&
+                                               contain_volatile_functions((Node *) parse->targetList))
+                                               ereport(ERROR,
+                                                               (errcode(ERRCODE_STATEMENT_TOO_COMPLEX),
+                                                               errmsg("can not update replicated table with result of volatile function")));
+
+                                       /*
+                                        * Source tuple will be consumed on the same node where it is
+                                        * produced, so if it is known that some node does not yield tuples
+                                        * we do not want to send subquery for execution on these nodes
+                                        * at all.
+                                        * So copy the restriction to the external distribution.
+                                        * XXX Is that ever possible if external restriction is already
+                                        * defined? If yes we probably should use intersection of the sets,
+                                        * and if resulting set is empty create dummy plan and set it as
+                                        * the result_plan. Need to think this over
+                                        */
+                                       root->distribution->restrictNodes =
+                                                       bms_copy(distribution->restrictNodes);
+                               }
+                               else
+                               {
+                                       /*
+                                        * If the planned statement is either UPDATE or DELETE, different
+                                        * distributions here mean the ModifyTable node will be placed on
+                                        * top of RemoteSubquery.
+                                        *
+                                        * UPDATE and DELETE versions of ModifyTable use TID of incoming
+                                        * tuple to apply the changes, but the RemoteSubquery plan supplies
+                                        * RemoteTuples, without such field. Therefore we can't execute
+                                        * such plan and error-out.
+                                        *
+                                        * Most common example is when the UPDATE statement modifies the
+                                        * distribution column, or when a complex UPDATE or DELETE statement
+                                        * involves a join. It's difficult to determine the exact reason,
+                                        * but we assume the first one (correlated UPDATE) is more likely.
+                                        *
+                                        * There are two ways of fixing the UPDATE ambiguity:
+                                        *
+                                        * 1. Modify the planner to never consider redistribution of the
+                                        * target table. In this case the planner would find there's no way
+                                        * to plan the query, and it would throw error somewhere else, and
+                                        * we'd only be dealing with updates of distribution columns.
+                                        *
+                                        * 2. Modify executor to allow distribution column updates. However
+                                        * there are a lot of issues behind the scene when implementing that
+                                        * approach, and so it's unlikely to happen soon.
+                                        *
+                                        * DELETE statements may only fail because of complex joins.
+                                        */
+
+                                       if (parse->commandType == CMD_UPDATE)
+                                               ereport(ERROR,
+                                                               (errcode(ERRCODE_STATEMENT_TOO_COMPLEX),
+                                                                errmsg("could not plan this distributed update"),
+                                                                errdetail("correlated UPDATE or updating distribution column currently not supported in Postgres-XL.")));
+
+                                       if (parse->commandType == CMD_DELETE)
+                                               ereport(ERROR,
+                                                               (errcode(ERRCODE_STATEMENT_TOO_COMPLEX),
+                                                                errmsg("could not plan this distributed delete"),
+                                                                errdetail("correlated or complex DELETE is currently not supported in Postgres-XL.")));
+
+                                       /*
+                                        * Redistribute result according to requested distribution.
+                                        *
+                                        * XXX In XL 9.5 there's a block of code (lines 2670-2730) that
+                                        * finds pushed-down plan, and builds a distribution based on the
+                                        * targetlist, and does a few other things (e.g. inserts Result
+                                        * plan if needed, etc.). I have no idea how to rewrite that at
+                                        * this point :-(
+                                        */
+
+                                       /*
+                                        * We already know the distributions are not equal, but let's
+                                        * see if the redistribution is actually necessary. We can skip
+                                        * it if we already have Result path, and if the distribution is
+                                        *
+                                        * 1) 'hash' restricted to a single node
+                                        *
+                                        * or
+                                        *
+                                        * 2) 'replicate' without volatile functions in the target list
+                                        *
+                                        * In such cases we don't need the RemoteSubplan.
+                                        *
+                                        * XXX Not sure what the (result_plan->lefttree == NULL) does.
+                                        * See planner.c:2730 in 9.5.
+                                        */
+                                       if (!(IsA(path, ResultPath) &&
+                                               ((root->distribution->distributionType == 'H' && bms_num_members(root->distribution->restrictNodes) == 1) ||
+                                                (root->distribution->distributionType == 'R' && !contain_mutable_functions((Node *)parse->targetList)))))
+                                               path = create_remotesubplan_path(root, path, root->distribution);
+                               }
+                       }
+
                        path = (Path *)
                                create_modifytable_path(root, final_rel,
                                                                                parse->commandType,
index c03d981d64ab29f42997aa81807256abccc0e768..1f026c5c5ec17ab14ecb1582cf67a2204242d4a2 100644 (file)
@@ -1229,8 +1229,6 @@ create_remotesubplan_path(PlannerInfo *root, Path *subpath,
        RemoteSubPath  *pathnode;
        Distribution   *subdistribution = subpath->distribution;
 
-       Assert(subdistribution != NULL);
-
        pathnode = makeNode(RemoteSubPath);
        pathnode->path.pathtype = T_RemoteSubplan;
        pathnode->path.parent = rel;
@@ -1243,7 +1241,7 @@ create_remotesubplan_path(PlannerInfo *root, Path *subpath,
 
        cost_remote_subplan((Path *) pathnode, subpath->startup_cost,
                                                subpath->total_cost, subpath->rows, rel->reltarget->width,
-                                               IsLocatorReplicated(subdistribution->distributionType) ?
+                                               (subdistribution && IsLocatorReplicated(subdistribution->distributionType)) ?
                                                bms_num_members(subdistribution->nodes) : 1);
 
        return (Path *) pathnode;