From 8db6146a592ec2a1d7906f9d6ac205c0e9cdfdff Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 15 Aug 2008 19:20:42 +0000 Subject: [PATCH] Performance fix for new anti-join code in nodeMergejoin.c: after finding a match in antijoin mode, we should advance to next outer tuple not next inner. We know we don't want to return this outer tuple, and there is no point in advancing over matching inner tuples now, because we'd just have to do it again if the next outer tuple has the same merge key. This makes a noticeable difference if there are lots of duplicate keys in both inputs. Similarly, after finding a match in semijoin mode, arrange to advance to the next outer tuple after returning the current match; or immediately, if it fails the extra quals. The rationale is the same. (This is a performance bug in existing releases; perhaps worth back-patching? The planner tries to avoid using mergejoin with lots of duplicates, so it may not be a big issue in practice.) Nestloop and hash got this right to start with, but I made some cosmetic adjustments there to make the corresponding bits of logic look more similar. --- src/backend/executor/nodeHashjoin.c | 48 +++++++++++++--------------- src/backend/executor/nodeMergejoin.c | 22 +++++++------ src/backend/executor/nodeNestloop.c | 45 +++++++++++++------------- 3 files changed, 59 insertions(+), 56 deletions(-) diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index a59ab19db8..08cc09ff23 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -264,36 +264,34 @@ ExecHashJoin(HashJoinState *node) node->hj_NeedNewOuter = true; break; /* out of loop over hash bucket */ } - else + + /* + * In a semijoin, we'll consider returning the first match, + * but after that we're done with this outer tuple. + */ + if (node->js.jointype == JOIN_SEMI) + node->hj_NeedNewOuter = true; + + if (otherqual == NIL || ExecQual(otherqual, econtext, false)) { - /* - * In a semijoin, we'll consider returning the first match, - * but after that we're done with this outer tuple. - */ - if (node->js.jointype == JOIN_SEMI) - node->hj_NeedNewOuter = true; - - if (otherqual == NIL || ExecQual(otherqual, econtext, false)) - { - TupleTableSlot *result; + TupleTableSlot *result; - result = ExecProject(node->js.ps.ps_ProjInfo, &isDone); + result = ExecProject(node->js.ps.ps_ProjInfo, &isDone); - if (isDone != ExprEndResult) - { - node->js.ps.ps_TupFromTlist = - (isDone == ExprMultipleResult); - return result; - } + if (isDone != ExprEndResult) + { + node->js.ps.ps_TupFromTlist = + (isDone == ExprMultipleResult); + return result; } - - /* - * If semijoin and we didn't return the tuple, we're still - * done with this outer tuple. - */ - if (node->js.jointype == JOIN_SEMI) - break; /* out of loop over hash bucket */ } + + /* + * If semijoin and we didn't return the tuple, we're still + * done with this outer tuple. + */ + if (node->js.jointype == JOIN_SEMI) + break; /* out of loop over hash bucket */ } } diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c index 0546446090..35af68771c 100644 --- a/src/backend/executor/nodeMergejoin.c +++ b/src/backend/executor/nodeMergejoin.c @@ -757,15 +757,9 @@ ExecMergeJoin(MergeJoinState *node) innerTupleSlot = node->mj_InnerTupleSlot; econtext->ecxt_innertuple = innerTupleSlot; - if (node->js.jointype == JOIN_SEMI && - node->mj_MatchedOuter) - qualResult = false; - else - { - qualResult = (joinqual == NIL || - ExecQual(joinqual, econtext, false)); - MJ_DEBUG_QUAL(joinqual, qualResult); - } + qualResult = (joinqual == NIL || + ExecQual(joinqual, econtext, false)); + MJ_DEBUG_QUAL(joinqual, qualResult); if (qualResult) { @@ -774,7 +768,17 @@ ExecMergeJoin(MergeJoinState *node) /* In an antijoin, we never return a matched tuple */ if (node->js.jointype == JOIN_ANTI) + { + node->mj_JoinState = EXEC_MJ_NEXTOUTER; break; + } + + /* + * In a semijoin, we'll consider returning the first match, + * but after that we're done with this outer tuple. + */ + if (node->js.jointype == JOIN_SEMI) + node->mj_JoinState = EXEC_MJ_NEXTOUTER; qualResult = (otherqual == NIL || ExecQual(otherqual, econtext, false)); diff --git a/src/backend/executor/nodeNestloop.c b/src/backend/executor/nodeNestloop.c index 266a338745..2fd71703d3 100644 --- a/src/backend/executor/nodeNestloop.c +++ b/src/backend/executor/nodeNestloop.c @@ -226,35 +226,36 @@ ExecNestLoop(NestLoopState *node) /* In an antijoin, we never return a matched tuple */ if (node->js.jointype == JOIN_ANTI) + { node->nl_NeedNewOuter = true; - else + continue; /* return to top of loop */ + } + + /* + * In a semijoin, we'll consider returning the first match, + * but after that we're done with this outer tuple. + */ + if (node->js.jointype == JOIN_SEMI) + node->nl_NeedNewOuter = true; + + if (otherqual == NIL || ExecQual(otherqual, econtext, false)) { /* - * In a semijoin, we'll consider returning the first match, - * but after that we're done with this outer tuple. + * qualification was satisfied so we project and return the + * slot containing the result tuple using ExecProject(). */ - if (node->js.jointype == JOIN_SEMI) - node->nl_NeedNewOuter = true; - if (otherqual == NIL || ExecQual(otherqual, econtext, false)) - { - /* - * qualification was satisfied so we project and return - * the slot containing the result tuple using - * ExecProject(). - */ - TupleTableSlot *result; - ExprDoneCond isDone; + TupleTableSlot *result; + ExprDoneCond isDone; - ENL1_printf("qualification succeeded, projecting tuple"); + ENL1_printf("qualification succeeded, projecting tuple"); - result = ExecProject(node->js.ps.ps_ProjInfo, &isDone); + result = ExecProject(node->js.ps.ps_ProjInfo, &isDone); - if (isDone != ExprEndResult) - { - node->js.ps.ps_TupFromTlist = - (isDone == ExprMultipleResult); - return result; - } + if (isDone != ExprEndResult) + { + node->js.ps.ps_TupFromTlist = + (isDone == ExprMultipleResult); + return result; } } } -- 2.39.5