First stab at SJ_UNIQUE.
authorRobert Haas <rhaas@postgresql.org>
Tue, 15 Jul 2025 16:10:31 +0000 (12:10 -0400)
committerRobert Haas <rhaas@postgresql.org>
Tue, 15 Jul 2025 16:10:31 +0000 (12:10 -0400)
contrib/pg_plan_advice/pgpa_join.c
contrib/pg_plan_advice/pgpa_join.h
contrib/pg_plan_advice/pgpa_output.c
contrib/pg_plan_advice/pgpa_walker.c
contrib/pg_plan_advice/pgpa_walker.h

index 3236384d37470ff9f3c70d940d7ccd358fc74c00..8ced1a0c332f072edcceb0e132df7e2ca55e7e49 100644 (file)
@@ -34,7 +34,7 @@ struct pgpa_join_unroller
        pgpa_join_unroller **inner_unrollers;
 };
 
-static pgpa_join_strategy pgpa_decompose_join(PlannedStmt *pstmt,
+static pgpa_join_strategy pgpa_decompose_join(pgpa_plan_walker_context *context,
                                                                                          Plan *plan,
                                                                                          Plan **realouter,
                                                                                          Plan **realinner,
@@ -44,7 +44,8 @@ static void pgpa_fix_scan_or_clump_member(PlannedStmt *pstmt,
                                                                                  pgpa_join_member *member);
 static ElidedNode *pgpa_descend_node(PlannedStmt *pstmt, Plan **plan);
 static ElidedNode *pgpa_descend_any_gather(PlannedStmt *pstmt, Plan **plan);
-static ElidedNode *pgpa_descend_any_unique(PlannedStmt *pstmt, Plan **plan);
+static bool pgpa_descend_any_unique(PlannedStmt *pstmt, Plan **plan,
+                                                                       ElidedNode **elided_node);
 
 static Bitmapset *pgpa_add_member_relids(Bitmapset *relids,
                                                                                 pgpa_join_member *member);
@@ -184,7 +185,7 @@ pgpa_create_join_unroller(void)
  * XXX. More comments.
  */
 void
-pgpa_unroll_join(PlannedStmt *pstmt, Plan *plan,
+pgpa_unroll_join(pgpa_plan_walker_context *context, Plan *plan,
                                 pgpa_join_unroller *join_unroller,
                                 pgpa_join_unroller **outer_join_unroller,
                                 pgpa_join_unroller **inner_join_unroller)
@@ -236,7 +237,8 @@ pgpa_unroll_join(PlannedStmt *pstmt, Plan *plan,
         * this should be an unrollable join.
         */
        Assert(pgpa_get_join_class(plan) == PGPA_UNROLLED_JOIN);
-       strategy = pgpa_decompose_join(pstmt, plan, &realouter, &realinner,
+       strategy = pgpa_decompose_join(context, plan,
+                                                                  &realouter, &realinner,
                                                                   &elidedouter, &elidedinner);
 
        /* If our workspace is full, expand it. */
@@ -389,17 +391,24 @@ pgpa_destroy_join_unroller(pgpa_join_unroller *join_unroller)
  * If there are multiple elided nodes, we want that one that would have been
  * uppermost in the plan tree prior to setrefs processing; we expect to find
  * that one last in the list of elided nodes.
+ *
+ * On return *realouter and *realinner will have been set to the real inner
+ * and real outer plans that we identified, and *elidedrealouter and
+ * *elidedrealinner to the last of any correspoding elided nodes.
  */
 static pgpa_join_strategy
-pgpa_decompose_join(PlannedStmt *pstmt, Plan *plan,
+pgpa_decompose_join(pgpa_plan_walker_context *context, Plan *plan,
                                        Plan **realouter, Plan **realinner,
                                        ElidedNode **elidedrealouter, ElidedNode **elidedrealinner)
 {
+       PlannedStmt *pstmt = context->pstmt;
        Plan       *outerplan = plan->lefttree;
        Plan       *innerplan = plan->righttree;
        ElidedNode *elidedouter;
        ElidedNode *elidedinner;
        pgpa_join_strategy strategy;
+       bool            uniqueouter;
+       bool            uniqueinner;
 
        elidedouter = pgpa_last_elided_node(pstmt, outerplan);
        elidedinner = pgpa_last_elided_node(pstmt, innerplan);
@@ -475,13 +484,9 @@ pgpa_decompose_join(PlannedStmt *pstmt, Plan *plan,
         * the nullable side of the plan unique, and then performing a normal join
         * against the result. Therefore, we might need to descend through a
         * unique node on either side of the plan.
-        *
-        * XXX. We should really note the unique-ified rels here and emit advice.
         */
-       if (elidedouter == NULL)
-               elidedouter = pgpa_descend_any_unique(pstmt, &outerplan);
-       if (elidedinner == NULL)
-               elidedinner = pgpa_descend_any_unique(pstmt, &innerplan);
+       uniqueouter = pgpa_descend_any_unique(pstmt, &outerplan, &elidedouter);
+       uniqueinner = pgpa_descend_any_unique(pstmt, &innerplan, &elidedinner);
 
        /*
         * The planner may have decided to parallelize part of the join tree, so
@@ -490,8 +495,6 @@ pgpa_decompose_join(PlannedStmt *pstmt, Plan *plan,
         * the join strategy and (2) nodes inserted for uniqueness, because we
         * currently only consider gathering a partial path on top of the final
         * path for some RelOptInfo.
-        *
-        * XXX. We should really note the gathered rels here and emit advice.
         */
        if (elidedouter == NULL)
                elidedouter = pgpa_descend_any_gather(pstmt, &outerplan);
@@ -513,6 +516,19 @@ pgpa_decompose_join(PlannedStmt *pstmt, Plan *plan,
        if (elidedinner == NULL && is_result_node_with_child(innerplan))
                elidedinner = pgpa_descend_node(pstmt, &innerplan);
 
+       /*
+        * If we descended through a unique node, make a note that the identified
+        * inner or outer subplan should be treated as a query plan feature when
+        * the main tree traversal reaches it. (We could designate the Agg or
+        * Unique node itself as the feature, but there shouldn't be any
+        * RTI-bearing nodes between that node and this one.)
+        */
+       if (uniqueouter)
+               pgpa_add_future_feature(context, PGPAQF_SJ_UNIQUE, outerplan);
+       if (uniqueinner)
+               pgpa_add_future_feature(context, PGPAQF_SJ_UNIQUE, innerplan);
+
+       /* Set output parameters. */
        *realouter = outerplan;
        *realinner = innerplan;
        *elidedrealouter = elidedouter;
@@ -606,29 +622,40 @@ pgpa_descend_any_gather(PlannedStmt *pstmt, Plan **plan)
 }
 
 /*
- * Descend through an Agg or Unique node, if present, and any Sort or
- * IncrementalSort node occurring beneath it.
+ * If *plan is an Agg or Unique node, we want to descend through it, unless
+ * it has a corresponding elided node. If its immediate child is a Sort or
+ * IncrementalSort, we also want to descend through that, unless it has a
+ * corresponding elided node.
  *
- * Caller should have verified that there is no ElidedNode pertaining to
- * the initial value of *plan.
+ * On entry, *elided_node must be the last of any elided nodes corresponding
+ * to *plan; on exit, this will still be true, but *plan may have been updated.
  *
- * Updates *plan, and returns the last of any elided nodes pertaining to the
- * new plan node.
+ * The reason we don't want to descend through elided nodes is that a single
+ * join tree can't cross through any sort of elided node: subqueries are
+ * planned separately, and planning inside an Append or MergeAppend is
+ * separate from planning outside of it.
+ *
+ * The return value is true if we descend through at least one node, and
+ * otherwise false.
  */
-static ElidedNode *
-pgpa_descend_any_unique(PlannedStmt *pstmt, Plan **plan)
+static bool
+pgpa_descend_any_unique(PlannedStmt *pstmt, Plan **plan,
+                                               ElidedNode **elided_node)
 {
+       if (*elided_node != NULL)
+               return false;
+
        if (IsA(*plan, Agg) || IsA(*plan, Unique))
        {
-               ElidedNode *elided = pgpa_descend_node(pstmt, plan);
+               *elided_node = pgpa_descend_node(pstmt, plan);
 
-               if (elided == NULL && is_sorting_plan(*plan))
-                       elided = pgpa_descend_node(pstmt, plan);
+               if (*elided_node == NULL && is_sorting_plan(*plan))
+                       *elided_node = pgpa_descend_node(pstmt, plan);
 
-               return elided;
+               return true;
        }
 
-       return NULL;
+       return false;
 }
 
 /*
index 0c9f3e949b814f6dc1f044e93e9e92ae27b5454c..55e2cfbecfa9bc588e2d4c4a13bb1653049736a2 100644 (file)
@@ -14,6 +14,7 @@
 
 #include "nodes/plannodes.h"
 
+struct pgpa_plan_walker_context;
 typedef struct pgpa_join_unroller pgpa_join_unroller;
 typedef struct pgpa_unrolled_join pgpa_unrolled_join;
 
@@ -60,8 +61,6 @@ typedef struct
  * more precisely here: merge joins have the option of using materialization
  * on the inner side, and nested loops can use either materialization or
  * memoization.
- *
- * XXX. I think we need to do something with Unique here also.
  */
 typedef enum
 {
@@ -151,7 +150,8 @@ extern pgpa_clumped_join *pgpa_build_clumped_join(PlannedStmt *pstmt,
                                                                                                  ElidedNode *elided_node);
 
 extern pgpa_join_unroller *pgpa_create_join_unroller(void);
-extern void pgpa_unroll_join(PlannedStmt *pstmt, Plan *plan,
+extern void pgpa_unroll_join(struct pgpa_plan_walker_context *context,
+                                                        Plan *plan,
                                                         pgpa_join_unroller *join_unroller,
                                                         pgpa_join_unroller **outer_join_unroller,
                                                         pgpa_join_unroller **inner_join_unroller);
index 9626748864a5635fc0824ed2ae902f96a1c3af7c..632f3dff62172d686ebdcf7b9fb5b4f0eb1bdec8 100644 (file)
@@ -153,12 +153,18 @@ pgpa_output_advice(StringInfo buf, pgpa_plan_walker_context *walker,
 
                if (buf->len > 0)
                        appendStringInfoChar(buf, '\n');
-               if (IsA(qf->plan, Gather))
-                       appendStringInfo(buf, "GATHER(");
-               else if (IsA(qf->plan, GatherMerge))
-                       appendStringInfo(buf, "GATHER_MERGE(");
-               else
-                       elog(ERROR, "unrecognized node type: %d", (int) nodeTag(qf->plan));
+               switch (qf->type)
+               {
+                       case PGPAQF_GATHER:
+                               appendStringInfo(buf, "GATHER(");
+                               break;
+                       case PGPAQF_GATHER_MERGE:
+                               appendStringInfo(buf, "GATHER_MERGE(");
+                               break;
+                       case PGPAQF_SJ_UNIQUE:
+                               appendStringInfo(buf, "SJ_UNIQUE(");
+                               break;
+               }
                pgpa_output_relations(&context, buf, qf->relids);
                appendStringInfoChar(buf, ')');
        }
index 26967162b41ff1e01dd149bbfdbd17376a7649c8..e0cd9355a9cebf95bde6112aebfc9cd7a41bf3f2 100644 (file)
@@ -5,6 +5,10 @@
 
 #include "nodes/plannodes.h"
 
+static pgpa_query_feature *pgpa_add_feature(pgpa_plan_walker_context *context,
+                                                                                       pgpa_qf_type type,
+                                                                                       Plan *plan);
+
 static void pgpa_qf_add_rti(List *active_query_features, Index rti);
 static void pgpa_qf_add_rtis(List *active_query_features, Bitmapset *relids);
 static void pgpa_qf_add_plan_rtis(List *active_query_features, Plan *plan);
@@ -179,7 +183,7 @@ pgpa_plan_walker(pgpa_plan_walker_context *context, Plan *plan,
         * outer and inner sides of the plan.
         */
        if (join_unroller != NULL)
-               pgpa_unroll_join(context->pstmt, plan, join_unroller,
+               pgpa_unroll_join(context, plan, join_unroller,
                                                 &outer_join_unroller, &inner_join_unroller);
 
        /*
@@ -189,29 +193,46 @@ pgpa_plan_walker(pgpa_plan_walker_context *context, Plan *plan,
        if (gathered_join != NULL)
                pgpa_add_to_gathered_join(gathered_join, plan);
 
-       /* XXX */
-       pgpa_qf_add_plan_rtis(active_query_features, plan);
-
-       /* XXX */
-       if (IsA(plan, Gather) || IsA(plan, GatherMerge))
-               is_query_feature = true;
-
        /*
-        * If we determined that this node is a query feature, add it to the list
-        * of currently-active query features and also to the context's list of
-        * all query features encountered during the plan tree traversal.
+        * If this is a Gather or Gather Merge node, directly add it to the list
+        * of currently-active query features.
+        *
+        * Otherwise, check the future_query_features list to see whether this
+        * was previously identified as a plan node that needs to be treated as
+        * a query feature.
         */
-       if (is_query_feature)
+       if (IsA(plan, Gather))
        {
-               pgpa_query_feature *qf = palloc_object(pgpa_query_feature);
-
-               qf->plan = plan;
-               qf->relids = NULL;
-
-               active_query_features = lappend(active_query_features, qf);
-               context->query_features = lappend(context->query_features, qf);
+               active_query_features =
+                       lappend(active_query_features,
+                                       pgpa_add_feature(context, PGPAQF_GATHER, plan));
+               is_query_feature = true;
+       }
+       else if (IsA(plan, GatherMerge))
+       {
+               active_query_features =
+                       lappend(active_query_features,
+                                       pgpa_add_feature(context, PGPAQF_GATHER_MERGE, plan));
+               is_query_feature = true;
+       }
+       else
+       {
+               foreach_ptr(pgpa_query_feature, qf, context->future_query_features)
+               {
+                       if (qf->plan == plan)
+                       {
+                               is_query_feature = true;
+                               active_query_features = lappend(active_query_features, qf);
+                               context->future_query_features =
+                                       list_delete_ptr(context->future_query_features, plan);
+                               break;
+                       }
+               }
        }
 
+       /* Add RTIs from the plan node to all active query features. */
+       pgpa_qf_add_plan_rtis(active_query_features, plan);
+
        /* Recurse into the outer and inner subtrees. */
        if (plan->lefttree != NULL)
                pgpa_plan_walker(context, plan->lefttree, join_unroll_level,
@@ -307,6 +328,23 @@ pgpa_plan_walker(pgpa_plan_walker_context *context, Plan *plan,
        }
 }
 
+/*
+ * Arrange for the given plan node to be treated as a query feature when the
+ * tree walk reaches it.
+ *
+ * Make sure to only use this for nodes that the tree walk can't have reached
+ * yet!
+ */
+void
+pgpa_add_future_feature(pgpa_plan_walker_context *context,
+                                               pgpa_qf_type type, Plan *plan)
+{
+       pgpa_query_feature *qf = pgpa_add_feature(context, type, plan);
+
+       context->future_query_features =
+               lappend(context->future_query_features, qf);
+}
+
 /*
  * Return the last of any elided nodes associated with this plan node ID.
  *
@@ -389,6 +427,24 @@ pgpa_scanrelid(Plan *plan)
        }
 }
 
+/*
+ * Create a pgpa_query_feature and add it to the list of all query features
+ * for this plan.
+ */
+static pgpa_query_feature *
+pgpa_add_feature(pgpa_plan_walker_context *context,
+                                pgpa_qf_type type, Plan *plan)
+{
+       pgpa_query_feature *qf = palloc0_object(pgpa_query_feature);
+
+       qf->type = type;
+       qf->plan = plan;
+
+       context->query_features = lappend(context->query_features, qf);
+
+       return qf;
+}
+
 /*
  * Add a single RTI to each active query feature.
  */
index 1a58df39c02401c038c46389b8d86c989f9e56aa..6533a877e0793c0900fc29a0b2ed98689c84e040 100644 (file)
 /*
  * We use the term "query feature" to refer to plan nodes that are interesting
  * in the following way: to generate advice, we'll need to know the set of
- * same-subquery, non-join RTIs occuring beneath that plan node.
+ * same-subquery, non-join RTIs occuring at or below that plan node, without
+ * admixture of parent and child RTIs.
  *
- * For example, a Gather node is a query feature, because we'll want to admit
- * some kind of advice that describes the portion of the plan tree that appears
- * beneath the Gather node.
+ * For example, Gather nodes, desiginated by PGPAQF_GATHER, and Gather Merge
+ * nodes, designated by PGPAQF_GATHER_MERGE, are query features, because we'll
+ * want to admit some kind of advice that describes the portion of the plan
+ * tree that appears beneath those nodes. Whenever we decide to implement a
+ * semijoin by making one side unique and then performing a normal join, we
+ * create a feature of PGPAQF_SG_UNIQUE, so that we can describe what RTIs need
+ * to be made unique to perform the join. XXX We should also probably emit
+ * advice for the decision NOT to unique-ify a semijoin.
+ *
+ * To elaborate on the "no admixture of parent and child RTIs" rule, in all of
+ * these cases, if the entirety of an inheritance hierarchy appears beneath
+ * the query feature, we only want to name the parent table. But it's also
+ * possible to have cases where we must name child tables. This is particularly
+ * likely to happen when partitionwise join is in use, but could happen for
+ * Gather or Gather Merge even without that, if one of those appears below
+ * an Append or MergeAppend node for a single table.
+ */
+typedef enum pgpa_qf_type
+{
+       PGPAQF_GATHER,
+       PGPAQF_GATHER_MERGE,
+       PGPAQF_SJ_UNIQUE
+} pgpa_qf_type;
+
+/*
+ * For each query feature, we keep track of the feature type and the set of
+ * relids that we found underneath the relevant plan node. See the comments
+ * on pgpa_qf_type, above, for additional details.
  */
 typedef struct pgpa_query_feature
 {
+       pgpa_qf_type    type;
        Plan       *plan;
        Bitmapset  *relids;
 } pgpa_query_feature;
@@ -36,6 +63,7 @@ typedef struct pgpa_plan_walker_context
        List       *clumped_joins;
        List       *gathered_joins;
        List       *query_features;
+       List       *future_query_features;
 } pgpa_plan_walker_context;
 
 extern void pgpa_plan_walker(pgpa_plan_walker_context *context, Plan *plan,
@@ -44,6 +72,10 @@ extern void pgpa_plan_walker(pgpa_plan_walker_context *context, Plan *plan,
                                                         pgpa_gathered_join *gathered_join,
                                                         List *active_query_features);
 
+extern void pgpa_add_future_feature(pgpa_plan_walker_context *context,
+                                                                       pgpa_qf_type type,
+                                                                       Plan *plan);
+
 extern ElidedNode *pgpa_last_elided_node(PlannedStmt *pstmt, Plan *plan);
 extern Bitmapset *pgpa_relids(Plan *plan);
 extern Index pgpa_scanrelid(Plan *plan);