From d3f52c7a5c758f59a68f15601214313e6a3a079a Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Tue, 15 Jul 2025 12:10:31 -0400 Subject: [PATCH] First stab at SJ_UNIQUE. --- contrib/pg_plan_advice/pgpa_join.c | 79 +++++++++++++++-------- contrib/pg_plan_advice/pgpa_join.h | 6 +- contrib/pg_plan_advice/pgpa_output.c | 18 ++++-- contrib/pg_plan_advice/pgpa_walker.c | 94 ++++++++++++++++++++++------ contrib/pg_plan_advice/pgpa_walker.h | 40 ++++++++++-- 5 files changed, 179 insertions(+), 58 deletions(-) diff --git a/contrib/pg_plan_advice/pgpa_join.c b/contrib/pg_plan_advice/pgpa_join.c index 3236384d37..8ced1a0c33 100644 --- a/contrib/pg_plan_advice/pgpa_join.c +++ b/contrib/pg_plan_advice/pgpa_join.c @@ -34,7 +34,7 @@ struct pgpa_join_unroller pgpa_join_unroller **inner_unrollers; }; -static pgpa_join_strategy pgpa_decompose_join(PlannedStmt *pstmt, +static pgpa_join_strategy pgpa_decompose_join(pgpa_plan_walker_context *context, Plan *plan, Plan **realouter, Plan **realinner, @@ -44,7 +44,8 @@ static void pgpa_fix_scan_or_clump_member(PlannedStmt *pstmt, pgpa_join_member *member); static ElidedNode *pgpa_descend_node(PlannedStmt *pstmt, Plan **plan); static ElidedNode *pgpa_descend_any_gather(PlannedStmt *pstmt, Plan **plan); -static ElidedNode *pgpa_descend_any_unique(PlannedStmt *pstmt, Plan **plan); +static bool pgpa_descend_any_unique(PlannedStmt *pstmt, Plan **plan, + ElidedNode **elided_node); static Bitmapset *pgpa_add_member_relids(Bitmapset *relids, pgpa_join_member *member); @@ -184,7 +185,7 @@ pgpa_create_join_unroller(void) * XXX. More comments. */ void -pgpa_unroll_join(PlannedStmt *pstmt, Plan *plan, +pgpa_unroll_join(pgpa_plan_walker_context *context, Plan *plan, pgpa_join_unroller *join_unroller, pgpa_join_unroller **outer_join_unroller, pgpa_join_unroller **inner_join_unroller) @@ -236,7 +237,8 @@ pgpa_unroll_join(PlannedStmt *pstmt, Plan *plan, * this should be an unrollable join. */ Assert(pgpa_get_join_class(plan) == PGPA_UNROLLED_JOIN); - strategy = pgpa_decompose_join(pstmt, plan, &realouter, &realinner, + strategy = pgpa_decompose_join(context, plan, + &realouter, &realinner, &elidedouter, &elidedinner); /* If our workspace is full, expand it. */ @@ -389,17 +391,24 @@ pgpa_destroy_join_unroller(pgpa_join_unroller *join_unroller) * If there are multiple elided nodes, we want that one that would have been * uppermost in the plan tree prior to setrefs processing; we expect to find * that one last in the list of elided nodes. + * + * On return *realouter and *realinner will have been set to the real inner + * and real outer plans that we identified, and *elidedrealouter and + * *elidedrealinner to the last of any correspoding elided nodes. */ static pgpa_join_strategy -pgpa_decompose_join(PlannedStmt *pstmt, Plan *plan, +pgpa_decompose_join(pgpa_plan_walker_context *context, Plan *plan, Plan **realouter, Plan **realinner, ElidedNode **elidedrealouter, ElidedNode **elidedrealinner) { + PlannedStmt *pstmt = context->pstmt; Plan *outerplan = plan->lefttree; Plan *innerplan = plan->righttree; ElidedNode *elidedouter; ElidedNode *elidedinner; pgpa_join_strategy strategy; + bool uniqueouter; + bool uniqueinner; elidedouter = pgpa_last_elided_node(pstmt, outerplan); elidedinner = pgpa_last_elided_node(pstmt, innerplan); @@ -475,13 +484,9 @@ pgpa_decompose_join(PlannedStmt *pstmt, Plan *plan, * the nullable side of the plan unique, and then performing a normal join * against the result. Therefore, we might need to descend through a * unique node on either side of the plan. - * - * XXX. We should really note the unique-ified rels here and emit advice. */ - if (elidedouter == NULL) - elidedouter = pgpa_descend_any_unique(pstmt, &outerplan); - if (elidedinner == NULL) - elidedinner = pgpa_descend_any_unique(pstmt, &innerplan); + uniqueouter = pgpa_descend_any_unique(pstmt, &outerplan, &elidedouter); + uniqueinner = pgpa_descend_any_unique(pstmt, &innerplan, &elidedinner); /* * The planner may have decided to parallelize part of the join tree, so @@ -490,8 +495,6 @@ pgpa_decompose_join(PlannedStmt *pstmt, Plan *plan, * the join strategy and (2) nodes inserted for uniqueness, because we * currently only consider gathering a partial path on top of the final * path for some RelOptInfo. - * - * XXX. We should really note the gathered rels here and emit advice. */ if (elidedouter == NULL) elidedouter = pgpa_descend_any_gather(pstmt, &outerplan); @@ -513,6 +516,19 @@ pgpa_decompose_join(PlannedStmt *pstmt, Plan *plan, if (elidedinner == NULL && is_result_node_with_child(innerplan)) elidedinner = pgpa_descend_node(pstmt, &innerplan); + /* + * If we descended through a unique node, make a note that the identified + * inner or outer subplan should be treated as a query plan feature when + * the main tree traversal reaches it. (We could designate the Agg or + * Unique node itself as the feature, but there shouldn't be any + * RTI-bearing nodes between that node and this one.) + */ + if (uniqueouter) + pgpa_add_future_feature(context, PGPAQF_SJ_UNIQUE, outerplan); + if (uniqueinner) + pgpa_add_future_feature(context, PGPAQF_SJ_UNIQUE, innerplan); + + /* Set output parameters. */ *realouter = outerplan; *realinner = innerplan; *elidedrealouter = elidedouter; @@ -606,29 +622,40 @@ pgpa_descend_any_gather(PlannedStmt *pstmt, Plan **plan) } /* - * Descend through an Agg or Unique node, if present, and any Sort or - * IncrementalSort node occurring beneath it. + * If *plan is an Agg or Unique node, we want to descend through it, unless + * it has a corresponding elided node. If its immediate child is a Sort or + * IncrementalSort, we also want to descend through that, unless it has a + * corresponding elided node. * - * Caller should have verified that there is no ElidedNode pertaining to - * the initial value of *plan. + * On entry, *elided_node must be the last of any elided nodes corresponding + * to *plan; on exit, this will still be true, but *plan may have been updated. * - * Updates *plan, and returns the last of any elided nodes pertaining to the - * new plan node. + * The reason we don't want to descend through elided nodes is that a single + * join tree can't cross through any sort of elided node: subqueries are + * planned separately, and planning inside an Append or MergeAppend is + * separate from planning outside of it. + * + * The return value is true if we descend through at least one node, and + * otherwise false. */ -static ElidedNode * -pgpa_descend_any_unique(PlannedStmt *pstmt, Plan **plan) +static bool +pgpa_descend_any_unique(PlannedStmt *pstmt, Plan **plan, + ElidedNode **elided_node) { + if (*elided_node != NULL) + return false; + if (IsA(*plan, Agg) || IsA(*plan, Unique)) { - ElidedNode *elided = pgpa_descend_node(pstmt, plan); + *elided_node = pgpa_descend_node(pstmt, plan); - if (elided == NULL && is_sorting_plan(*plan)) - elided = pgpa_descend_node(pstmt, plan); + if (*elided_node == NULL && is_sorting_plan(*plan)) + *elided_node = pgpa_descend_node(pstmt, plan); - return elided; + return true; } - return NULL; + return false; } /* diff --git a/contrib/pg_plan_advice/pgpa_join.h b/contrib/pg_plan_advice/pgpa_join.h index 0c9f3e949b..55e2cfbecf 100644 --- a/contrib/pg_plan_advice/pgpa_join.h +++ b/contrib/pg_plan_advice/pgpa_join.h @@ -14,6 +14,7 @@ #include "nodes/plannodes.h" +struct pgpa_plan_walker_context; typedef struct pgpa_join_unroller pgpa_join_unroller; typedef struct pgpa_unrolled_join pgpa_unrolled_join; @@ -60,8 +61,6 @@ typedef struct * more precisely here: merge joins have the option of using materialization * on the inner side, and nested loops can use either materialization or * memoization. - * - * XXX. I think we need to do something with Unique here also. */ typedef enum { @@ -151,7 +150,8 @@ extern pgpa_clumped_join *pgpa_build_clumped_join(PlannedStmt *pstmt, ElidedNode *elided_node); extern pgpa_join_unroller *pgpa_create_join_unroller(void); -extern void pgpa_unroll_join(PlannedStmt *pstmt, Plan *plan, +extern void pgpa_unroll_join(struct pgpa_plan_walker_context *context, + Plan *plan, pgpa_join_unroller *join_unroller, pgpa_join_unroller **outer_join_unroller, pgpa_join_unroller **inner_join_unroller); diff --git a/contrib/pg_plan_advice/pgpa_output.c b/contrib/pg_plan_advice/pgpa_output.c index 9626748864..632f3dff62 100644 --- a/contrib/pg_plan_advice/pgpa_output.c +++ b/contrib/pg_plan_advice/pgpa_output.c @@ -153,12 +153,18 @@ pgpa_output_advice(StringInfo buf, pgpa_plan_walker_context *walker, if (buf->len > 0) appendStringInfoChar(buf, '\n'); - if (IsA(qf->plan, Gather)) - appendStringInfo(buf, "GATHER("); - else if (IsA(qf->plan, GatherMerge)) - appendStringInfo(buf, "GATHER_MERGE("); - else - elog(ERROR, "unrecognized node type: %d", (int) nodeTag(qf->plan)); + switch (qf->type) + { + case PGPAQF_GATHER: + appendStringInfo(buf, "GATHER("); + break; + case PGPAQF_GATHER_MERGE: + appendStringInfo(buf, "GATHER_MERGE("); + break; + case PGPAQF_SJ_UNIQUE: + appendStringInfo(buf, "SJ_UNIQUE("); + break; + } pgpa_output_relations(&context, buf, qf->relids); appendStringInfoChar(buf, ')'); } diff --git a/contrib/pg_plan_advice/pgpa_walker.c b/contrib/pg_plan_advice/pgpa_walker.c index 26967162b4..e0cd9355a9 100644 --- a/contrib/pg_plan_advice/pgpa_walker.c +++ b/contrib/pg_plan_advice/pgpa_walker.c @@ -5,6 +5,10 @@ #include "nodes/plannodes.h" +static pgpa_query_feature *pgpa_add_feature(pgpa_plan_walker_context *context, + pgpa_qf_type type, + Plan *plan); + static void pgpa_qf_add_rti(List *active_query_features, Index rti); static void pgpa_qf_add_rtis(List *active_query_features, Bitmapset *relids); static void pgpa_qf_add_plan_rtis(List *active_query_features, Plan *plan); @@ -179,7 +183,7 @@ pgpa_plan_walker(pgpa_plan_walker_context *context, Plan *plan, * outer and inner sides of the plan. */ if (join_unroller != NULL) - pgpa_unroll_join(context->pstmt, plan, join_unroller, + pgpa_unroll_join(context, plan, join_unroller, &outer_join_unroller, &inner_join_unroller); /* @@ -189,29 +193,46 @@ pgpa_plan_walker(pgpa_plan_walker_context *context, Plan *plan, if (gathered_join != NULL) pgpa_add_to_gathered_join(gathered_join, plan); - /* XXX */ - pgpa_qf_add_plan_rtis(active_query_features, plan); - - /* XXX */ - if (IsA(plan, Gather) || IsA(plan, GatherMerge)) - is_query_feature = true; - /* - * If we determined that this node is a query feature, add it to the list - * of currently-active query features and also to the context's list of - * all query features encountered during the plan tree traversal. + * If this is a Gather or Gather Merge node, directly add it to the list + * of currently-active query features. + * + * Otherwise, check the future_query_features list to see whether this + * was previously identified as a plan node that needs to be treated as + * a query feature. */ - if (is_query_feature) + if (IsA(plan, Gather)) { - pgpa_query_feature *qf = palloc_object(pgpa_query_feature); - - qf->plan = plan; - qf->relids = NULL; - - active_query_features = lappend(active_query_features, qf); - context->query_features = lappend(context->query_features, qf); + active_query_features = + lappend(active_query_features, + pgpa_add_feature(context, PGPAQF_GATHER, plan)); + is_query_feature = true; + } + else if (IsA(plan, GatherMerge)) + { + active_query_features = + lappend(active_query_features, + pgpa_add_feature(context, PGPAQF_GATHER_MERGE, plan)); + is_query_feature = true; + } + else + { + foreach_ptr(pgpa_query_feature, qf, context->future_query_features) + { + if (qf->plan == plan) + { + is_query_feature = true; + active_query_features = lappend(active_query_features, qf); + context->future_query_features = + list_delete_ptr(context->future_query_features, plan); + break; + } + } } + /* Add RTIs from the plan node to all active query features. */ + pgpa_qf_add_plan_rtis(active_query_features, plan); + /* Recurse into the outer and inner subtrees. */ if (plan->lefttree != NULL) pgpa_plan_walker(context, plan->lefttree, join_unroll_level, @@ -307,6 +328,23 @@ pgpa_plan_walker(pgpa_plan_walker_context *context, Plan *plan, } } +/* + * Arrange for the given plan node to be treated as a query feature when the + * tree walk reaches it. + * + * Make sure to only use this for nodes that the tree walk can't have reached + * yet! + */ +void +pgpa_add_future_feature(pgpa_plan_walker_context *context, + pgpa_qf_type type, Plan *plan) +{ + pgpa_query_feature *qf = pgpa_add_feature(context, type, plan); + + context->future_query_features = + lappend(context->future_query_features, qf); +} + /* * Return the last of any elided nodes associated with this plan node ID. * @@ -389,6 +427,24 @@ pgpa_scanrelid(Plan *plan) } } +/* + * Create a pgpa_query_feature and add it to the list of all query features + * for this plan. + */ +static pgpa_query_feature * +pgpa_add_feature(pgpa_plan_walker_context *context, + pgpa_qf_type type, Plan *plan) +{ + pgpa_query_feature *qf = palloc0_object(pgpa_query_feature); + + qf->type = type; + qf->plan = plan; + + context->query_features = lappend(context->query_features, qf); + + return qf; +} + /* * Add a single RTI to each active query feature. */ diff --git a/contrib/pg_plan_advice/pgpa_walker.h b/contrib/pg_plan_advice/pgpa_walker.h index 1a58df39c0..6533a877e0 100644 --- a/contrib/pg_plan_advice/pgpa_walker.h +++ b/contrib/pg_plan_advice/pgpa_walker.h @@ -17,14 +17,41 @@ /* * We use the term "query feature" to refer to plan nodes that are interesting * in the following way: to generate advice, we'll need to know the set of - * same-subquery, non-join RTIs occuring beneath that plan node. + * same-subquery, non-join RTIs occuring at or below that plan node, without + * admixture of parent and child RTIs. * - * For example, a Gather node is a query feature, because we'll want to admit - * some kind of advice that describes the portion of the plan tree that appears - * beneath the Gather node. + * For example, Gather nodes, desiginated by PGPAQF_GATHER, and Gather Merge + * nodes, designated by PGPAQF_GATHER_MERGE, are query features, because we'll + * want to admit some kind of advice that describes the portion of the plan + * tree that appears beneath those nodes. Whenever we decide to implement a + * semijoin by making one side unique and then performing a normal join, we + * create a feature of PGPAQF_SG_UNIQUE, so that we can describe what RTIs need + * to be made unique to perform the join. XXX We should also probably emit + * advice for the decision NOT to unique-ify a semijoin. + * + * To elaborate on the "no admixture of parent and child RTIs" rule, in all of + * these cases, if the entirety of an inheritance hierarchy appears beneath + * the query feature, we only want to name the parent table. But it's also + * possible to have cases where we must name child tables. This is particularly + * likely to happen when partitionwise join is in use, but could happen for + * Gather or Gather Merge even without that, if one of those appears below + * an Append or MergeAppend node for a single table. + */ +typedef enum pgpa_qf_type +{ + PGPAQF_GATHER, + PGPAQF_GATHER_MERGE, + PGPAQF_SJ_UNIQUE +} pgpa_qf_type; + +/* + * For each query feature, we keep track of the feature type and the set of + * relids that we found underneath the relevant plan node. See the comments + * on pgpa_qf_type, above, for additional details. */ typedef struct pgpa_query_feature { + pgpa_qf_type type; Plan *plan; Bitmapset *relids; } pgpa_query_feature; @@ -36,6 +63,7 @@ typedef struct pgpa_plan_walker_context List *clumped_joins; List *gathered_joins; List *query_features; + List *future_query_features; } pgpa_plan_walker_context; extern void pgpa_plan_walker(pgpa_plan_walker_context *context, Plan *plan, @@ -44,6 +72,10 @@ extern void pgpa_plan_walker(pgpa_plan_walker_context *context, Plan *plan, pgpa_gathered_join *gathered_join, List *active_query_features); +extern void pgpa_add_future_feature(pgpa_plan_walker_context *context, + pgpa_qf_type type, + Plan *plan); + extern ElidedNode *pgpa_last_elided_node(PlannedStmt *pstmt, Plan *plan); extern Bitmapset *pgpa_relids(Plan *plan); extern Index pgpa_scanrelid(Plan *plan); -- 2.39.5