Switch the planner over to treating qualifications of a JOIN_SEMI join as
authorTom Lane <tgl@sss.pgh.pa.us>
Sat, 22 Nov 2008 22:47:06 +0000 (22:47 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sat, 22 Nov 2008 22:47:06 +0000 (22:47 +0000)
though it is an inner rather than outer join type.  This essentially means
that we don't bother to separate "pushed down" qual conditions from actual
join quals at a semijoin plan node; which is okay because the restrictions of
SQL syntax make it impossible to have a pushed-down qual that references the
inner side of a semijoin.  This allows noticeably better optimization of
IN/EXISTS cases than we had before, since the equivalence-class machinery can
now use those quals.  Also fix a couple of other mistakes that had essentially
disabled the ability to unique-ify the inner relation and then join it to just
a subset of the left-hand relations.  An example case using the regression
database is

select * from tenk1 a, tenk1 b
where (a.unique1,b.unique2) in (select unique1,unique2 from tenk1 c);

which is planned reasonably well by 8.3 and earlier but had been forcing a
cartesian join of a/b in CVS HEAD.

src/backend/optimizer/path/costsize.c
src/backend/optimizer/path/indxpath.c
src/backend/optimizer/path/joinpath.c
src/backend/optimizer/path/joinrels.c
src/backend/optimizer/plan/initsplan.c
src/include/nodes/nodes.h

index 7f1379db3afc24a8d8364c2dec7fc53f4b0563b9..24efb580bc89f5d3e3f8e0c58add327fae929cf8 100644 (file)
@@ -2481,7 +2481,7 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
                        break;
                case JOIN_SEMI:
                        nrows = outer_rel->rows * jselec;
-                       nrows *= pselec;
+                       /* pselec not used */
                        break;
                case JOIN_ANTI:
                        nrows = outer_rel->rows * (1.0 - jselec);
index e7237ce7728b35d9400676cb0edc6518896a18a9..06514bfa7d38aec455ef7c7db1305f8305f6a21c 100644 (file)
@@ -1647,10 +1647,10 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
        switch (jointype)
        {
                case JOIN_INNER:
+               case JOIN_SEMI:
                        isouterjoin = false;
                        break;
                case JOIN_LEFT:
-               case JOIN_SEMI:
                case JOIN_ANTI:
                        isouterjoin = true;
                        break;
index 447534a5c5afd182f32411b56c07391730303eb9..f5ceb9bc014b21e48037a90151f474936a553654 100644 (file)
@@ -744,12 +744,12 @@ hash_inner_and_outer(PlannerInfo *root,
        switch (jointype)
        {
                case JOIN_INNER:
+               case JOIN_SEMI:
                case JOIN_UNIQUE_OUTER:
                case JOIN_UNIQUE_INNER:
                        isouterjoin = false;
                        break;
                case JOIN_LEFT:
-               case JOIN_SEMI:
                case JOIN_ANTI:
                        isouterjoin = true;
                        break;
index bb8f3bead5eb9acc76d6979c77813ea81a2ce9c1..6fabf47ea9b63112ed904c2726a9b21d058a6ce1 100644 (file)
@@ -419,6 +419,27 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
                        match_sjinfo = sjinfo;
                        reversed = true;
                }
+               else if (sjinfo->jointype == JOIN_SEMI &&
+                                bms_equal(sjinfo->syn_righthand, rel2->relids))
+               {
+                       /*
+                        * For a semijoin, we can join the RHS to anything else by
+                        * unique-ifying the RHS.
+                        */
+                       if (match_sjinfo)
+                               return false;   /* invalid join path */
+                       match_sjinfo = sjinfo;
+                       reversed = false;
+               }
+               else if (sjinfo->jointype == JOIN_SEMI &&
+                                bms_equal(sjinfo->syn_righthand, rel1->relids))
+               {
+                       /* Reversed semijoin case */
+                       if (match_sjinfo)
+                               return false;   /* invalid join path */
+                       match_sjinfo = sjinfo;
+                       reversed = true;
+               }
                else
                {
                        /*----------
@@ -444,14 +465,24 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
                         * We assume that make_outerjoininfo() set things up correctly
                         * so that we'll only match to some SJ if the join is valid.
                         * Set flag here to check at bottom of loop.
+                        *
+                        * For a semijoin, assume it's okay if either side fully contains
+                        * the RHS (per the unique-ification case above).
                         *----------
                         */
-                       if (bms_overlap(rel1->relids, sjinfo->min_righthand) &&
+                       if (sjinfo->jointype != JOIN_SEMI &&
+                               bms_overlap(rel1->relids, sjinfo->min_righthand) &&
                                bms_overlap(rel2->relids, sjinfo->min_righthand))
                        {
                                /* seems OK */
                                Assert(!bms_overlap(joinrelids, sjinfo->min_lefthand));
                        }
+                       else if (sjinfo->jointype == JOIN_SEMI &&
+                                        (bms_is_subset(sjinfo->syn_righthand, rel1->relids) ||
+                                         bms_is_subset(sjinfo->syn_righthand, rel2->relids)))
+                       {
+                               /* seems OK */
+                       }
                        else
                                is_valid_inner = false;
                }
@@ -612,15 +643,23 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
                                                                 restrictlist);
                        break;
                case JOIN_SEMI:
-                       if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
-                               restriction_is_constant_false(restrictlist))
+                       /*
+                        * Do these steps only if we actually have a regular semijoin,
+                        * as opposed to a case where we should unique-ify the RHS.
+                        */
+                       if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
+                               bms_is_subset(sjinfo->min_righthand, rel2->relids))
                        {
-                               mark_dummy_rel(joinrel);
-                               break;
+                               if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
+                                       restriction_is_constant_false(restrictlist))
+                               {
+                                       mark_dummy_rel(joinrel);
+                                       break;
+                               }
+                               add_paths_to_joinrel(root, joinrel, rel1, rel2,
+                                                                        JOIN_SEMI, sjinfo,
+                                                                        restrictlist);
                        }
-                       add_paths_to_joinrel(root, joinrel, rel1, rel2,
-                                                                JOIN_SEMI, sjinfo,
-                                                                restrictlist);
 
                        /*
                         * If we know how to unique-ify the RHS and one input rel is
index 40b96a0fe463d8e9b9f807ca4337dec0681c9597..a3b4f22cfca842927193621bdb5dc966c80415b9 100644 (file)
@@ -851,16 +851,11 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
                maybe_equivalence = false;
                maybe_outer_join = false;
        }
-       else if (bms_overlap(relids, outerjoin_nonnullable) &&
-                        (jointype != JOIN_SEMI ||
-                         bms_nonempty_difference(relids, outerjoin_nonnullable)))
+       else if (bms_overlap(relids, outerjoin_nonnullable))
        {
                /*
                 * The qual is attached to an outer join and mentions (some of the)
-                * rels on the nonnullable side, so it's not degenerate.  (For a
-                * JOIN_SEMI qual, we consider it non-degenerate only if it mentions
-                * both sides of the join --- if it mentions only one side, it can
-                * be pushed down.)
+                * rels on the nonnullable side, so it's not degenerate.
                 *
                 * We can't use such a clause to deduce equivalence (the left and
                 * right sides might be unequal above the join because one of them has
@@ -1062,6 +1057,7 @@ distribute_sublink_quals_to_rels(PlannerInfo *root,
        SpecialJoinInfo *sjinfo;
        Relids          qualscope;
        Relids          ojscope;
+       Relids          outerjoin_nonnullable;
        ListCell   *l;
 
        /*
@@ -1076,17 +1072,27 @@ distribute_sublink_quals_to_rels(PlannerInfo *root,
                                                                fslink->jointype,
                                                                quals);
 
+       /* Treat as inner join if SEMI, outer join if ANTI */
        qualscope = bms_union(sjinfo->syn_lefthand, sjinfo->syn_righthand);
-       ojscope = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
+       if (fslink->jointype == JOIN_SEMI)
+       {
+               ojscope = outerjoin_nonnullable = NULL;
+       }
+       else
+       {
+               Assert(fslink->jointype == JOIN_ANTI);
+               ojscope = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
+               outerjoin_nonnullable = fslink->lefthand;
+       }
 
-       /* Distribute the join quals much as for a regular LEFT JOIN */
+       /* Distribute the join quals much as for a regular JOIN node */
        foreach(l, quals)
        {
                Node   *qual = (Node *) lfirst(l);
 
                distribute_qual_to_rels(root, qual,
                                                                false, below_outer_join, fslink->jointype,
-                                                               qualscope, ojscope, fslink->lefthand);
+                                                               qualscope, ojscope, outerjoin_nonnullable);
        }
 
        /* Now we can add the SpecialJoinInfo to join_info_list */
index f5e17e8377d76d37e029d912312b0bdf4447d7e4..64ee62d2882517c45441c281ec2cd91b1cc32b5d 100644 (file)
@@ -542,15 +542,23 @@ typedef enum JoinType
 
 /*
  * OUTER joins are those for which pushed-down quals must behave differently
- * from the join's own quals.  This is in fact everything except INNER joins.
- * However, this macro must also exclude the JOIN_UNIQUE symbols since those
- * are temporary proxies for what will eventually be an INNER join.
+ * from the join's own quals.  This is in fact everything except INNER and
+ * SEMI joins.  However, this macro must also exclude the JOIN_UNIQUE symbols
+ * since those are temporary proxies for what will eventually be an INNER
+ * join.
  *
- * Note: in some places it is preferable to treat JOIN_SEMI as not being
- * an outer join, since it doesn't produce null-extended rows.  Be aware
- * of that distinction when deciding whether to use this macro.
+ * Note: semijoins are a hybrid case, but we choose to treat them as not
+ * being outer joins.  This is okay principally because the SQL syntax makes
+ * it impossible to have a pushed-down qual that refers to the inner relation
+ * of a semijoin; so there is no strong need to distinguish join quals from
+ * pushed-down quals.  This is convenient because for almost all purposes,
+ * quals attached to a semijoin can be treated the same as innerjoin quals.
  */
 #define IS_OUTER_JOIN(jointype) \
-       ((jointype) > JOIN_INNER && (jointype) < JOIN_UNIQUE_OUTER)
+       (((1 << (jointype)) & \
+         ((1 << JOIN_LEFT) | \
+          (1 << JOIN_FULL) | \
+          (1 << JOIN_RIGHT) | \
+          (1 << JOIN_ANTI))) != 0)
 
 #endif   /* NODES_H */