add sj_unique_rels list to plan walker
authorRobert Haas <[email protected]>
Thu, 11 Dec 2025 19:40:52 +0000 (14:40 -0500)
committerRobert Haas <[email protected]>
Thu, 11 Dec 2025 19:41:10 +0000 (14:41 -0500)
contrib/pg_plan_advice/pgpa_planner.c
contrib/pg_plan_advice/pgpa_walker.c
contrib/pg_plan_advice/pgpa_walker.h

index c75bb779cbbb0d082049d9be5334636adcc71b8c..8e87c4cb75fee2addc72a266d1adddf514468484 100644 (file)
@@ -82,6 +82,7 @@ typedef struct pgpa_planner_state
        bool            generate_advice_string;
        pgpa_trove *trove;
        MemoryContext trove_cxt;
+       List       *sj_unique_rels;
 
 #ifdef USE_ASSERT_CHECKING
        pgpa_ri_check_hash *ri_check_hash;
@@ -425,6 +426,49 @@ pgpa_join_path_setup(PlannerInfo *root, RelOptInfo *joinrel,
 
        Assert(bms_membership(joinrel->relids) == BMS_MULTIPLE);
 
+       /*
+        * If we're considering implementing a semijoin by making one side unique,
+        * make a note of it in the pgpa_planner_state. See comments for
+        * pgpa_sj_unique_rel for why we do this.
+        */
+       if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
+       {
+               pgpa_planner_state *pps;
+               RelOptInfo *uniquerel;
+
+               uniquerel = jointype == JOIN_UNIQUE_OUTER ? outerrel : innerrel;
+               pps = GetPlannerGlobalExtensionState(root->glob, planner_extension_id);
+               if (pps->generate_advice_string)
+               {
+                       bool    found = false;
+
+                       /* Avoid adding duplicates. */
+                       foreach_ptr(pgpa_sj_unique_rel, ur, pps->sj_unique_rels)
+                       {
+                               /*
+                                * We should always use the same pointer for the same plan
+                                * name, so we need not use strcmp() here.
+                                */
+                               if (root->plan_name == ur->plan_name &&
+                                       bms_equal(uniquerel->relids, ur->relids))
+                               {
+                                       found = true;
+                                       break;
+                               }
+                       }
+
+                       /* If not a duplicate, append to the list. */
+                       if (!found)
+                       {
+                               pgpa_sj_unique_rel *ur = palloc_object(pgpa_sj_unique_rel);
+
+                               ur->plan_name = root->plan_name;
+                               ur->relids = uniquerel->relids;
+                               pps->sj_unique_rels = lappend(pps->sj_unique_rels, ur);
+                       }
+               }
+       }
+
        /* Get our private state information for this join. */
        pjs = pgpa_get_join_state(root, joinrel, outerrel, innerrel);
 
@@ -558,7 +602,7 @@ pgpa_planner_shutdown(PlannerGlobal *glob, Query *parse,
        do_advice_feedback = (trove != NULL && es != NULL);
        if (generate_advice_string || do_advice_feedback)
        {
-               pgpa_plan_walker(&walker, pstmt);
+               pgpa_plan_walker(&walker, pstmt, pps->sj_unique_rels);
                rt_identifiers = pgpa_create_identifiers_for_planned_stmt(pstmt);
        }
 
index db8cc4352c33dd5d61e3d4fb1ccc9ee2a4f14ee6..d5493bc0b801ca4143a1f596e1b5e958d571cfca 100644 (file)
@@ -63,9 +63,13 @@ static Index pgpa_walker_get_rti(Index rtable_length,
  * Top-level entrypoint for the plan tree walk.
  *
  * Populates walker based on a traversal of the Plan trees in pstmt.
+ *
+ * sj_unique_rels is a list of pgpa_sj_unique_rel objects, one for each
+ * relation we considered making unique as part of semijoin planning.
  */
 void
-pgpa_plan_walker(pgpa_plan_walker_context *walker, PlannedStmt *pstmt)
+pgpa_plan_walker(pgpa_plan_walker_context *walker, PlannedStmt *pstmt,
+                                List *sj_unique_rels)
 {
        ListCell   *lc;
 
index f244f4428a53dbbe75fe6347cfbb34f26cd309a7..b91a36ca3dd5f986d91be918b50c48260efdc846 100644 (file)
 #include "pgpa_join.h"
 #include "pgpa_scan.h"
 
+/*
+ * When generating advice, we should emit either SEMIJOIN_UNIQUE advice or
+ * SEMIJOIN_NON_UNIQUE advice for each semijoin depending on whether we chose
+ * to implement it as a semijoin or whether we instead chose to make the
+ * nullable side unique and then perform an inner join. When the make-unique
+ * strategy is not chosen, it's not easy to tell from the final plan tree
+ * whether it was considered. That's awkward, because we don't want to emit
+ * useless SEMIJOIN_NON_UNIQUE advice when there was no decision to be made.
+ *
+ * To avoid that, during planning, we create a pgpa_sj_unique_rel for each
+ * relation that we considered making unique for purposes of semijoin planning.
+ */
+typedef struct pgpa_sj_unique_rel
+{
+       char       *plan_name;
+       Bitmapset  *relids;
+} pgpa_sj_unique_rel;
+
 /*
  * We use the term "query feature" to refer to plan nodes that are interesting
  * in the following way: to generate advice, we'll need to know the set of
@@ -103,7 +121,8 @@ typedef struct pgpa_plan_walker_context
 } pgpa_plan_walker_context;
 
 extern void pgpa_plan_walker(pgpa_plan_walker_context *walker,
-                                                        PlannedStmt *pstmt);
+                                                        PlannedStmt *pstmt,
+                                                        List *sj_unique_rels);
 
 extern void pgpa_add_future_feature(pgpa_plan_walker_context *walker,
                                                                        pgpa_qf_type type,