From 10082916d06f701b8e73c33cba2cdb5e98b72fed Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 22 Nov 2008 22:47:06 +0000 Subject: [PATCH] Switch the planner over to treating qualifications of a JOIN_SEMI join as though it is an inner rather than outer join type. This essentially means that we don't bother to separate "pushed down" qual conditions from actual join quals at a semijoin plan node; which is okay because the restrictions of SQL syntax make it impossible to have a pushed-down qual that references the inner side of a semijoin. This allows noticeably better optimization of IN/EXISTS cases than we had before, since the equivalence-class machinery can now use those quals. Also fix a couple of other mistakes that had essentially disabled the ability to unique-ify the inner relation and then join it to just a subset of the left-hand relations. An example case using the regression database is select * from tenk1 a, tenk1 b where (a.unique1,b.unique2) in (select unique1,unique2 from tenk1 c); which is planned reasonably well by 8.3 and earlier but had been forcing a cartesian join of a/b in CVS HEAD. --- src/backend/optimizer/path/costsize.c | 2 +- src/backend/optimizer/path/indxpath.c | 2 +- src/backend/optimizer/path/joinpath.c | 2 +- src/backend/optimizer/path/joinrels.c | 55 ++++++++++++++++++++++---- src/backend/optimizer/plan/initsplan.c | 26 +++++++----- src/include/nodes/nodes.h | 22 +++++++---- 6 files changed, 81 insertions(+), 28 deletions(-) diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 7f1379db3a..24efb580bc 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -2481,7 +2481,7 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, break; case JOIN_SEMI: nrows = outer_rel->rows * jselec; - nrows *= pselec; + /* pselec not used */ break; case JOIN_ANTI: nrows = outer_rel->rows * (1.0 - jselec); diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index e7237ce772..06514bfa7d 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -1647,10 +1647,10 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel, switch (jointype) { case JOIN_INNER: + case JOIN_SEMI: isouterjoin = false; break; case JOIN_LEFT: - case JOIN_SEMI: case JOIN_ANTI: isouterjoin = true; break; diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 447534a5c5..f5ceb9bc01 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -744,12 +744,12 @@ hash_inner_and_outer(PlannerInfo *root, switch (jointype) { case JOIN_INNER: + case JOIN_SEMI: case JOIN_UNIQUE_OUTER: case JOIN_UNIQUE_INNER: isouterjoin = false; break; case JOIN_LEFT: - case JOIN_SEMI: case JOIN_ANTI: isouterjoin = true; break; diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index bb8f3bead5..6fabf47ea9 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -419,6 +419,27 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, match_sjinfo = sjinfo; reversed = true; } + else if (sjinfo->jointype == JOIN_SEMI && + bms_equal(sjinfo->syn_righthand, rel2->relids)) + { + /* + * For a semijoin, we can join the RHS to anything else by + * unique-ifying the RHS. + */ + if (match_sjinfo) + return false; /* invalid join path */ + match_sjinfo = sjinfo; + reversed = false; + } + else if (sjinfo->jointype == JOIN_SEMI && + bms_equal(sjinfo->syn_righthand, rel1->relids)) + { + /* Reversed semijoin case */ + if (match_sjinfo) + return false; /* invalid join path */ + match_sjinfo = sjinfo; + reversed = true; + } else { /*---------- @@ -444,14 +465,24 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, * We assume that make_outerjoininfo() set things up correctly * so that we'll only match to some SJ if the join is valid. * Set flag here to check at bottom of loop. + * + * For a semijoin, assume it's okay if either side fully contains + * the RHS (per the unique-ification case above). *---------- */ - if (bms_overlap(rel1->relids, sjinfo->min_righthand) && + if (sjinfo->jointype != JOIN_SEMI && + bms_overlap(rel1->relids, sjinfo->min_righthand) && bms_overlap(rel2->relids, sjinfo->min_righthand)) { /* seems OK */ Assert(!bms_overlap(joinrelids, sjinfo->min_lefthand)); } + else if (sjinfo->jointype == JOIN_SEMI && + (bms_is_subset(sjinfo->syn_righthand, rel1->relids) || + bms_is_subset(sjinfo->syn_righthand, rel2->relids))) + { + /* seems OK */ + } else is_valid_inner = false; } @@ -612,15 +643,23 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) restrictlist); break; case JOIN_SEMI: - if (is_dummy_rel(rel1) || is_dummy_rel(rel2) || - restriction_is_constant_false(restrictlist)) + /* + * Do these steps only if we actually have a regular semijoin, + * as opposed to a case where we should unique-ify the RHS. + */ + if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) && + bms_is_subset(sjinfo->min_righthand, rel2->relids)) { - mark_dummy_rel(joinrel); - break; + if (is_dummy_rel(rel1) || is_dummy_rel(rel2) || + restriction_is_constant_false(restrictlist)) + { + mark_dummy_rel(joinrel); + break; + } + add_paths_to_joinrel(root, joinrel, rel1, rel2, + JOIN_SEMI, sjinfo, + restrictlist); } - add_paths_to_joinrel(root, joinrel, rel1, rel2, - JOIN_SEMI, sjinfo, - restrictlist); /* * If we know how to unique-ify the RHS and one input rel is diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 40b96a0fe4..a3b4f22cfc 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -851,16 +851,11 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, maybe_equivalence = false; maybe_outer_join = false; } - else if (bms_overlap(relids, outerjoin_nonnullable) && - (jointype != JOIN_SEMI || - bms_nonempty_difference(relids, outerjoin_nonnullable))) + else if (bms_overlap(relids, outerjoin_nonnullable)) { /* * The qual is attached to an outer join and mentions (some of the) - * rels on the nonnullable side, so it's not degenerate. (For a - * JOIN_SEMI qual, we consider it non-degenerate only if it mentions - * both sides of the join --- if it mentions only one side, it can - * be pushed down.) + * rels on the nonnullable side, so it's not degenerate. * * We can't use such a clause to deduce equivalence (the left and * right sides might be unequal above the join because one of them has @@ -1062,6 +1057,7 @@ distribute_sublink_quals_to_rels(PlannerInfo *root, SpecialJoinInfo *sjinfo; Relids qualscope; Relids ojscope; + Relids outerjoin_nonnullable; ListCell *l; /* @@ -1076,17 +1072,27 @@ distribute_sublink_quals_to_rels(PlannerInfo *root, fslink->jointype, quals); + /* Treat as inner join if SEMI, outer join if ANTI */ qualscope = bms_union(sjinfo->syn_lefthand, sjinfo->syn_righthand); - ojscope = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); + if (fslink->jointype == JOIN_SEMI) + { + ojscope = outerjoin_nonnullable = NULL; + } + else + { + Assert(fslink->jointype == JOIN_ANTI); + ojscope = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); + outerjoin_nonnullable = fslink->lefthand; + } - /* Distribute the join quals much as for a regular LEFT JOIN */ + /* Distribute the join quals much as for a regular JOIN node */ foreach(l, quals) { Node *qual = (Node *) lfirst(l); distribute_qual_to_rels(root, qual, false, below_outer_join, fslink->jointype, - qualscope, ojscope, fslink->lefthand); + qualscope, ojscope, outerjoin_nonnullable); } /* Now we can add the SpecialJoinInfo to join_info_list */ diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index f5e17e8377..64ee62d288 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -542,15 +542,23 @@ typedef enum JoinType /* * OUTER joins are those for which pushed-down quals must behave differently - * from the join's own quals. This is in fact everything except INNER joins. - * However, this macro must also exclude the JOIN_UNIQUE symbols since those - * are temporary proxies for what will eventually be an INNER join. + * from the join's own quals. This is in fact everything except INNER and + * SEMI joins. However, this macro must also exclude the JOIN_UNIQUE symbols + * since those are temporary proxies for what will eventually be an INNER + * join. * - * Note: in some places it is preferable to treat JOIN_SEMI as not being - * an outer join, since it doesn't produce null-extended rows. Be aware - * of that distinction when deciding whether to use this macro. + * Note: semijoins are a hybrid case, but we choose to treat them as not + * being outer joins. This is okay principally because the SQL syntax makes + * it impossible to have a pushed-down qual that refers to the inner relation + * of a semijoin; so there is no strong need to distinguish join quals from + * pushed-down quals. This is convenient because for almost all purposes, + * quals attached to a semijoin can be treated the same as innerjoin quals. */ #define IS_OUTER_JOIN(jointype) \ - ((jointype) > JOIN_INNER && (jointype) < JOIN_UNIQUE_OUTER) + (((1 << (jointype)) & \ + ((1 << JOIN_LEFT) | \ + (1 << JOIN_FULL) | \ + (1 << JOIN_RIGHT) | \ + (1 << JOIN_ANTI))) != 0) #endif /* NODES_H */ -- 2.39.5