From: Robert Haas Date: Tue, 22 Apr 2025 20:08:55 +0000 (-0400) Subject: pgpa_join.h X-Git-Url: http://git.postgresql.org/gitweb/static/gitweb.js?a=commitdiff_plain;h=dfb9a1519c9ff70867651515127b9952ce7464c7;p=users%2Frhaas%2Fpostgres.git pgpa_join.h This is wrong insofar as the thinking about the ElidedNode case is not in the comments, and maybe we should actually store the ElidedNode in the arrays. --- diff --git a/contrib/pg_plan_advice/pgpa_join.h b/contrib/pg_plan_advice/pgpa_join.h index ef2f003a65..7af620f87e 100644 --- a/contrib/pg_plan_advice/pgpa_join.h +++ b/contrib/pg_plan_advice/pgpa_join.h @@ -1,12 +1,45 @@ +/*------------------------------------------------------------------------- + * + * pgpa_join.h + * analysis of joins in Plan trees + * + * Copyright (c) 2016-2025, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_plan.h + * + *------------------------------------------------------------------------- + */ #ifndef PGPA_JOIN_H #define PGPA_JOIN_H +/* + * Certain types of plan nodes can join any number of input relations in + * a single step; we call these "clumped joins". + * + * For our purposes, the important thing about a clumped join is that we + * can't meaningfully speak about the order in which tables are joined + * within a single clump. For example, if the optimizer chooses a + * partitionwise join on tables A and B, we can't say whether A was joined + * to B or whether B was joined to A; instead, each pair of child tables + * has its own join order. Likewise, if a foreign data wrapper pushes a + * join to the remote side, we don't know the join order. + * + * JSTRAT_CLUMP_DEGENERATE refers to the case where several relations are + * all proven empty and replaced with a single Result node. Here again, while + * the Result node may be joined to other things and we can speak about its + * place within the larger join order, we can't speak about a join ordering + * within the Result node itself. + */ typedef enum { JSTRAT_CLUMP_DEGENERATE, + JSTRAT_CLUMP_FOREIGN, JSTRAT_CLUMP_PARTITIONWISE } pgpa_join_clump_strategy; +/* + * All of the details we need regarding a clumped join. + */ typedef struct { unsigned nmembers; @@ -15,9 +48,14 @@ typedef struct pgpa_join_clump_strategy strategy; } pgpa_clumped_join; +/* + * Although there are three main join strategies, we try to classify things + * more precisely here: merge joins have the option of using materialization + * on the inner side, and nested loops can use either materialization or + * memoization. + */ typedef enum { - JSTRAT_FOREIGN, JSTRAT_MERGEJOIN_PLAIN, JSTRAT_MERGEJOIN_MATERIALIZE, JSTRAT_NESTLOOP_PLAIN, @@ -26,21 +64,79 @@ typedef enum JSTRAT_HASHJOIN } pgpa_join_strategy; +/* + * Non-clumped joins are unrolled; that is, we convert outer-deep join trees + * to a flat structure. ((A JOIN B) JOIN C) JOIN D gets converted to + * outer_subplan = A, inner_subplans = , provided that none of the + * joins involved are clumped. When joins aren't outer-deep, substructure is + * required, e.g. (A JOIN B) JOIN (C JOIN D) is represented as + * outer_subplan = A, inner_subplans = , where X is a clumped or + * unrolled join covering C-D. + */ typedef struct { - unsigned nallocated; - unsigned nused; + /* + * Typically, the outermost subplan will be a scan of some relation, in + * which case outer_rti will be the RTI extracted from outer_subplan, and + * outer_clump_join will be NULL; but it's possible that it could be + * clumped join, in which case outer_subplan will be NULL and outer_rti + * will be zero. + */ Plan *outer_subplan; Index outer_rti; + pgpa_clump_join **outer_clump_join; + + /* + * nallocated is the allocated length of the strategy, inner_subplans, + * inner_rti, inner_unrolled_join, and inner_clump_join arrays; and nused + * is the number of elements in use within each of those arrays. + */ + unsigned nallocated; + unsigned nused; + + /* + * Foreach 0 <= n < nused, strategy[n] is the join strategy used to the + * next table or group of tables, as further detailed by the fields below. + * For instance, given (A MERGE_JOIN_PLAIN B) HASH_JOIN C, we would set + * strategy[0] = JSTRAT_MERGEJOIN_PLAIN and strategy[1] = + * JSTRATE_HASHJOIN. + */ pgpa_join_strategy **strategy; + + /* + * For each 0 <= n < nused, exactly one of inner_subplans[n], + * inner_unrolled_join[n], and inner_clump_join[n] should be non-NULL; + * inner_rti[n] should be non-zero if and only if inner_subplans[n] is + * non-NULL. + * + * Note that when inner_unrolled_joins[n] is non-NULL, we've got what the + * optimizer calls a "bushy" plan, where several tables are joined to each + * other before being joined to the remainder of the join tree all at + * once. + */ Plan **inner_subplans; Index *inner_rti; pgpa_unrolled_join **inner_unrolled_join; pgpa_clump_join **inner_clump_join; } pgpa_unrolled_join; +/* Derive a clumped join from a Plan. */ +extern pgpa_clumped_join *pgpa_make_clumped_join(Plan *plan); + +/* + * pgpa_make_unrolled_join objects are constructed in steps. + * + * Call pgpa_make_unrolled_join() just once. Then, call one of the + * pgpa_set_outer_* functions just once. Then, call one of the + * pgpa_append_inner_* functions once for every join level represented + * by this object. + */ extern pgpa_unrolled_join *pgpa_make_unrolled_join(void); -extern void pgpa_set_outer(pgpa_unrolled_join *join, Plan *outer_subplan); +extern void pgpa_set_outer_scan(pgpa_unrolled_join *join, Plan *outer_subplan); +extern void pgpa_set_outer_elided_scan(pgpa_unrolled_join *join, + ElidedNode *elided_node); +extern void pgpa_set_outer_clumped_join(pgpa_unrolled_join *join, + pgpa_clumped_join *subjoin); extern void pgpa_append_inner_scan(pgpa_unrolled_join *join, Plan *inner_subplan); extern void pgpa_append_inner_elided_scan(pgpa_unrolled_join *join,