From 4e51a5f7b7d4908ce1d0eaa2a5c2f32451a28505 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Thu, 24 Apr 2025 14:37:56 -0400 Subject: [PATCH] pgpa_join.c --- contrib/pg_plan_advice/meson.build | 3 +- contrib/pg_plan_advice/pgpa_join.c | 222 +++++++++++++++++++++++++++++ contrib/pg_plan_advice/pgpa_join.h | 136 +++++++++--------- 3 files changed, 289 insertions(+), 72 deletions(-) create mode 100644 contrib/pg_plan_advice/pgpa_join.c diff --git a/contrib/pg_plan_advice/meson.build b/contrib/pg_plan_advice/meson.build index 2f05b7ff54..8caca010ac 100644 --- a/contrib/pg_plan_advice/meson.build +++ b/contrib/pg_plan_advice/meson.build @@ -1,8 +1,7 @@ # Copyright (c) 2022-2024, PostgreSQL Global Development Group pg_plan_advice_sources = files( - 'create_advice.c', - 'pg_plan_advice.c', + 'pgpa_join.c', ) if host_system == 'windows' diff --git a/contrib/pg_plan_advice/pgpa_join.c b/contrib/pg_plan_advice/pgpa_join.c new file mode 100644 index 0000000000..7ed543bbd3 --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_join.c @@ -0,0 +1,222 @@ +#include "postgres.h" + +#include "pgpa_join.h" + +static pgpa_join_strategy pgpa_decompose_join(Plan *plan, + Plan **realinner, + Plan **realouter); +static void pgpa_fill_join_member(PlannedStmt *pstmt, pgpa_join_member * m); +static pgpa_clumped_join *pgpa_make_clumped_join_from_elided(ElidedNode *n); +static unsigned pgpa_get_single_rti(Plan *plan); + +/* + * Should a given plan node be represented by a pgpa_unrolled_join, a + * pgpa_clumped_join, or neither? + */ +pgpa_join_class +pgpa_get_join_class(Plan *plan) +{ + Bitmapset *relids = NULL; + + /* Standard join types can be unrolled. */ + if (IsA(plan, NestLoop) || IsA(plan, MergeJoin) || IsA(plan, HashJoin)) + return PGPA_UNROLLED_JOIN; + + /* + * If a Result node, ForeignScan node, Append node, or MergeAppend node + * has multiple relids, then it's a clumped join; otherwise, it's not a + * join at all. + */ + if (IsA(plan, Result)) + relids = ((Result *) plan)->relids; + else if (IsA(plan, ForeignScan)) + relids = ((ForeignScan *) plan)->fs_relids; + else if (IsA(plan, Append)) + relids = ((Append *) plan)->apprelids; + else if (IsA(plan, MergeAppend)) + relids = ((MergeAppend *) plan)->apprelids; + if (bms_membership(relids) == BMS_MULTIPLE) + return PGPA_CLUMPED_JOIN; + + /* Doesn't seem to be a join. */ + return PGPA_NON_JOIN; +} + +pgpa_unrolled_join * +pgpa_make_unrolled_join(PlannedStmt *pstmt, Plan *plan) +{ + unsigned nallocated = 4; + unsigned nused = 0; + unsigned i; + Plan **subplans; + pgpa_join_strategy *strategy; + pgpa_unrolled_join *ujoin; + + subplans = palloc_array(Plan *, nallocated); + strategy = palloc_array(pgpa_join_strategy, nallocated); + + /* + * Repeatedly descend to the outer child of the current join until we find + * something that is not an unrollable join. + */ + while (pgpa_get_join_class(plan) == PGPA_UNROLLED_JOIN) + { + if (nallocated == nused) + { + nallocated *= 2; + subplans = repalloc_array(subplans, Plan *, nallocated); + strategy = repalloc_array(strategy, pgpa_join_strategy, + nallocated); + } + + strategy[nused] = + pgpa_decompose_join(plan, &plan, &subplans[nused]); + ++nused; + } + + /* + * This function should only be called with an unrollable join, so the + * loop above should have iterated at least once. + */ + Assert(nused > 0); + + /* Allocate result structures. */ + ujoin = palloc0_object(pgpa_unrolled_join); + ujoin->outer.plan = plan; + ujoin->strategy = palloc0_array(pgpa_join_strategy, nused); + ujoin->inner = palloc0_array(pgpa_join_member, nused); + + /* + * We want the joins from the deepest part of the plan tree to appear + * first in the result object, and the loop above produces exactly the + * reverse, so we need to flip both arrays when constructing the final + * result. + */ + for (i = 0; i < nused; ++i) + { + ujoin->strategy[i] = strategy[nused - i - 1]; + ujoin->inner[i].plan = subplans[nused - i - 1]; + } + + /* + * Basic structure is now in place, but so far each pgpa_join_member only + * has a Plan pointer. Let's go back through them and complete the + * remaining details. + */ + pgpa_fill_join_member(pstmt, &ujoin->outer); + for (i = 0; i < nused; ++i) + pgpa_fill_join_member(pstmt, &ujoin->inner[i]); + + return ujoin; +} + +static pgpa_join_strategy +pgpa_decompose_join(Plan *plan, Plan **realinner, Plan **realouter) +{ + Plan *outerplan = plan->lefttree; + Plan *innerplan = plan->righttree; + pgpa_join_strategy strategy; + + switch (nodeTag(plan)) + { + case T_MergeJoin: + if (IsA(innerplan, Material)) + { + innerplan = innerplan->lefttree; + strategy = JSTRAT_MERGEJOIN_MATERIALIZE; + } + else + strategy = JSTRAT_MERGEJOIN_PLAIN; + break; + case T_NestLoop: + if (IsA(innerplan, Material)) + { + innerplan = innerplan->lefttree; + strategy = JSTRAT_NESTLOOP_MATERIALIZE; + } + else if (IsA(innerplan, Memoize)) + { + innerplan = innerplan->lefttree; + strategy = JSTRAT_NESTLOOP_MEMOIZE; + } + else + strategy = JSTRAT_NESTLOOP_PLAIN; + break; + case T_HashJoin: + strategy = JSTRAT_HASHJOIN; + break; + default: + elog(ERROR, "unrecognized node type: %d", (int) nodeTag(plan)); + } + + *realinner = innerplan; + *realouter = outerplan; + return strategy; +} + +static void +pgpa_fill_join_member(PlannedStmt *pstmt, pgpa_join_member * m) +{ + /* + * It's possible that more than one node was elided here; if so, we care + * about the uppermost one, which will be last in the list. + */ + foreach_node(ElidedNode, n, pstmt->elidedNodes) + { + if (n->plan_node_id == m->plan->plan_node_id) + m->elided_node = n; + } + + /* + * If we found an elided node, use that to complete the state for this + * join member. + */ + if (m->elided_node != NULL) + { + switch (bms_membership(m->elided_node->relids)) + { + case BMS_MULTIPLE: + m->clump_join = + pgpa_make_clumped_join_from_elided(m->elided_node); + break; + case BMS_SINGLETON: + m->rti = bms_singleton_member(m->elided_node->relids); + break; + case BMS_EMPTY_SET: + elog(ERROR, "no RTIs for elided node"); + break; + } + return; + } + + /* + * Otherwise, classify the Plan node and proceed accordingly. + */ + switch (pgpa_get_join_class(m->plan)) + { + case PGPA_NON_JOIN: + m->rti = pgpa_get_single_rti(m->plan); + break; + case PGPA_CLUMPED_JOIN: + m->clump_join = pgpa_make_clumped_join(m->plan); + break; + case PGPA_UNROLLED_JOIN: + m->unrolled_join = pgpa_make_unrolled_join(pstmt, m->plan); + break; + } +} + +static pgpa_clumped_join * +pgpa_make_clumped_join_from_elided(ElidedNode *n) +{ + /* XXX FIXME */ + return NULL; +} + +/* XXX might not belong in this file */ +static unsigned +pgpa_get_single_rti(Plan *plan) +{ + /* XXX */ + return 0; +} diff --git a/contrib/pg_plan_advice/pgpa_join.h b/contrib/pg_plan_advice/pgpa_join.h index 7af620f87e..b8d7168402 100644 --- a/contrib/pg_plan_advice/pgpa_join.h +++ b/contrib/pg_plan_advice/pgpa_join.h @@ -12,6 +12,10 @@ #ifndef PGPA_JOIN_H #define PGPA_JOIN_H +#include "nodes/plannodes.h" + +typedef struct pgpa_unrolled_join pgpa_unrolled_join; + /* * Certain types of plan nodes can join any number of input relations in * a single step; we call these "clumped joins". @@ -53,6 +57,8 @@ typedef struct * more precisely here: merge joins have the option of using materialization * on the inner side, and nested loops can use either materialization or * memoization. + * + * XXX. I think we need to do something with Unique here also. */ typedef enum { @@ -65,85 +71,75 @@ typedef enum } pgpa_join_strategy; /* - * Non-clumped joins are unrolled; that is, we convert outer-deep join trees - * to a flat structure. ((A JOIN B) JOIN C) JOIN D gets converted to - * outer_subplan = A, inner_subplans = , provided that none of the - * joins involved are clumped. When joins aren't outer-deep, substructure is - * required, e.g. (A JOIN B) JOIN (C JOIN D) is represented as - * outer_subplan = A, inner_subplans = , where X is a clumped or - * unrolled join covering C-D. + * Within a pgpa_unrolled_join, we may need to refer to either to scans or to + * other joins, which may be either unrolled or clumped. + * + * The plan node we store here will be the inner or outer child of the join + * node, as appropriate, except that we look through subnodes that we regard as + * part of the join method itself. For instance, for a Nested Loop that + * materializes the inner input, we'll store the child of the Materialize node, + * not the Materialize node itself. + * + * XXX. What happens with Gather and Gather Merge here? + * + * If setrefs processing elided one or more nodes from the plan tree, then + * we'll store details about the topmost of those in elided_node. + * + * If this is a scan, rti will be the RTI taken from elided_node, or if that + * is NULL, from plan. Otherwise, this is a join, and either clump_join or + * unrolled_join will be set as appropriate; otherwise, they will be NULL. */ typedef struct { - /* - * Typically, the outermost subplan will be a scan of some relation, in - * which case outer_rti will be the RTI extracted from outer_subplan, and - * outer_clump_join will be NULL; but it's possible that it could be - * clumped join, in which case outer_subplan will be NULL and outer_rti - * will be zero. - */ - Plan *outer_subplan; - Index outer_rti; - pgpa_clump_join **outer_clump_join; + Plan *plan; + ElidedNode *elided_node; + Index rti; + pgpa_clumped_join *clump_join; + pgpa_unrolled_join *unrolled_join; +} pgpa_join_member; - /* - * nallocated is the allocated length of the strategy, inner_subplans, - * inner_rti, inner_unrolled_join, and inner_clump_join arrays; and nused - * is the number of elements in use within each of those arrays. - */ - unsigned nallocated; - unsigned nused; +/* + * Non-clumped joins are unrolled; that is, we convert outer-deep join trees + * to a flat structure. ((A JOIN B) JOIN C) JOIN D gets converted to + * outer = A, inner = , provided that none of the joins involved are + * clumped. When joins aren't outer-deep, substructure is required, e.g. + * (A JOIN B) JOIN (C JOIN D) is represented as outer = A, inner = , + * where X is a clumped or unrolled join covering C-D. + */ +struct pgpa_unrolled_join +{ + /* Outermost member; must not itself be an unrolled join. */ + pgpa_join_member outer; - /* - * Foreach 0 <= n < nused, strategy[n] is the join strategy used to the - * next table or group of tables, as further detailed by the fields below. - * For instance, given (A MERGE_JOIN_PLAIN B) HASH_JOIN C, we would set - * strategy[0] = JSTRAT_MERGEJOIN_PLAIN and strategy[1] = - * JSTRATE_HASHJOIN. - */ - pgpa_join_strategy **strategy; + /* Number of inner members. Length of the strategy and inner arrays. */ + unsigned ninner; - /* - * For each 0 <= n < nused, exactly one of inner_subplans[n], - * inner_unrolled_join[n], and inner_clump_join[n] should be non-NULL; - * inner_rti[n] should be non-zero if and only if inner_subplans[n] is - * non-NULL. - * - * Note that when inner_unrolled_joins[n] is non-NULL, we've got what the - * optimizer calls a "bushy" plan, where several tables are joined to each - * other before being joined to the remainder of the join tree all at - * once. - */ - Plan **inner_subplans; - Index *inner_rti; - pgpa_unrolled_join **inner_unrolled_join; - pgpa_clump_join **inner_clump_join; -} pgpa_unrolled_join; + /* Array of strategies, one per non-outermost member. */ + pgpa_join_strategy *strategy; -/* Derive a clumped join from a Plan. */ -extern pgpa_clumped_join *pgpa_make_clumped_join(Plan *plan); + /* Array of members, excluding the outermost. Deepest first. */ + pgpa_join_member *inner; +}; /* - * pgpa_make_unrolled_join objects are constructed in steps. - * - * Call pgpa_make_unrolled_join() just once. Then, call one of the - * pgpa_set_outer_* functions just once. Then, call one of the - * pgpa_append_inner_* functions once for every join level represented - * by this object. + * Classification of Plan nodes based on what structure we should construct. */ -extern pgpa_unrolled_join *pgpa_make_unrolled_join(void); -extern void pgpa_set_outer_scan(pgpa_unrolled_join *join, Plan *outer_subplan); -extern void pgpa_set_outer_elided_scan(pgpa_unrolled_join *join, - ElidedNode *elided_node); -extern void pgpa_set_outer_clumped_join(pgpa_unrolled_join *join, - pgpa_clumped_join *subjoin); -extern void pgpa_append_inner_scan(pgpa_unrolled_join *join, - Plan *inner_subplan); -extern void pgpa_append_inner_elided_scan(pgpa_unrolled_join *join, - ElidedNode *elided_node); -extern void pgpa_append_inner_unrolled_join(pgpa_unrolled_join *join, - pgpa_unrolled_join *subjoin); -extern void pgpa_append_inner_clumped_join(pgpa_unrolled_join *join, - pgpa_clumped_join *subjoin); +typedef enum +{ + PGPA_NON_JOIN, + PGPA_CLUMPED_JOIN, + PGPA_UNROLLED_JOIN +} pgpa_join_class; + +extern pgpa_join_class pgpa_get_join_class(Plan *plan); +extern pgpa_clumped_join *pgpa_make_clumped_join(Plan *plan); +extern pgpa_unrolled_join *pgpa_make_unrolled_join(PlannedStmt *pstmt, + Plan *plan); + +typedef bool (*pgpa_unrolled_join_walker_callback) (pgpa_join_member *member, + void *context); +extern bool pgpa_unrolled_join_walker(pgpa_unrolled_join *join, + pgpa_unrolled_join_walker_callback cb, + void *context); #endif -- 2.39.5