Fix a couple of oversights associated with the "physical tlist" optimization:
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 17 Apr 2008 21:22:23 +0000 (21:22 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 17 Apr 2008 21:22:23 +0000 (21:22 +0000)
we had several code paths where a physical tlist could be used for the input
to a Sort node, which is a dumb idea because any unneeded table columns will
increase the volume of data the sort has to push around.

(Unfortunately the easy-looking fix of calling disuse_physical_tlist during
make_sort_xxx doesn't work because in most cases we're already committed to
the current input tlist --- it's been marked with sort column numbers, or
we've built grouping column numbers using it, etc.  The tlist has to be
selected properly at the calling level before we start constructing sort-col
information.  This is easy enough to do, we were just failing to take the
point into consideration.)

Back-patch to 8.3.  I believe the problem probably exists clear back to 7.4
when the physical tlist optimization was added, but I'm afraid to back-patch
further than 8.3 without a great deal more study than I want to put into it.
The code in this area has drifted a lot over time.  The real-world importance
of these code paths is uncertain anyway --- I think in many cases we'd
probably prefer hash-based methods.

src/backend/optimizer/plan/createplan.c
src/backend/optimizer/plan/planner.c
src/include/optimizer/planmain.h

index 77ddbc801afce00877c28afaf6b46ddef3e8ce9a..965b504fc40d332f4440df3be6bf64e9f28c6158 100644 (file)
@@ -124,6 +124,7 @@ static MergeJoin *make_mergejoin(List *tlist,
 static Sort *make_sort(PlannerInfo *root, Plan *lefttree, int numCols,
                  AttrNumber *sortColIdx, Oid *sortOperators, bool *nullsFirst,
                  double limit_tuples);
+static Material *make_material(Plan *lefttree);
 
 
 /*
@@ -626,12 +627,14 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path)
         * add any such expressions to the subplan's tlist.
         *
         * The subplan may have a "physical" tlist if it is a simple scan plan.
-        * This should be left as-is if we don't need to add any expressions;
+        * If we're going to sort, this should be reduced to the regular tlist,
+        * so that we don't sort more data than we need to.  For hashing, the
+        * tlist should be left as-is if we don't need to add any expressions;
         * but if we do have to add expressions, then a projection step will be
-        * needed at runtime anyway, and so we may as well remove unneeded items.
+        * needed at runtime anyway, so we may as well remove unneeded items.
         * Therefore newtlist starts from build_relation_tlist() not just a
         * copy of the subplan's tlist; and we don't install it into the subplan
-        * unless stuff has to be added.
+        * unless we are sorting or stuff has to be added.
         *
         * To find the correct list of values to unique-ify, we look in the
         * information saved for IN expressions.  If this code is ever used in
@@ -679,7 +682,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path)
                }
        }
 
-       if (newitems)
+       if (newitems || best_path->umethod == UNIQUE_PATH_SORT)
        {
                /*
                 * If the top plan node can't do projections, we need to add a Result
@@ -2991,7 +2994,7 @@ make_sort_from_groupcols(PlannerInfo *root,
                                         sortColIdx, sortOperators, nullsFirst, -1.0);
 }
 
-Material *
+static Material *
 make_material(Plan *lefttree)
 {
        Material   *node = makeNode(Material);
index ef91030bde691ee3a2ef24e3184df0bed9cd5101..257d14652fc5b56048441b41552a8322cf1f6638 100644 (file)
@@ -950,9 +950,23 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                         * Normal case --- create a plan according to query_planner's
                         * results.
                         */
+                       bool    need_sort_for_grouping = false;
+
                        result_plan = create_plan(root, best_path);
                        current_pathkeys = best_path->pathkeys;
 
+                       /* Detect if we'll need an explicit sort for grouping */
+                       if (parse->groupClause && !use_hashed_grouping &&
+                               !pathkeys_contained_in(group_pathkeys, current_pathkeys))
+                       {
+                               need_sort_for_grouping = true;
+                               /*
+                                * Always override query_planner's tlist, so that we don't
+                                * sort useless data from a "physical" tlist.
+                                */
+                               need_tlist_eval = true;
+                       }
+
                        /*
                         * create_plan() returns a plan with just a "flat" tlist of
                         * required Vars.  Usually we need to insert the sub_tlist as the
@@ -1047,8 +1061,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 
                                if (parse->groupClause)
                                {
-                                       if (!pathkeys_contained_in(group_pathkeys,
-                                                                                          current_pathkeys))
+                                       if (need_sort_for_grouping)
                                        {
                                                result_plan = (Plan *)
                                                        make_sort_from_groupcols(root,
@@ -1091,7 +1104,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                                 * Add an explicit sort if we couldn't make the path come out
                                 * the way the GROUP node needs it.
                                 */
-                               if (!pathkeys_contained_in(group_pathkeys, current_pathkeys))
+                               if (need_sort_for_grouping)
                                {
                                        result_plan = (Plan *)
                                                make_sort_from_groupcols(root,
index 8bfd9c262b4abcb1aabedf145434cb4e7bbc5520..f7fded820f562bedbbac4e0ca991beb51ca49dd2 100644 (file)
@@ -53,7 +53,6 @@ extern Group *make_group(PlannerInfo *root, List *tlist, List *qual,
                   int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators,
                   double numGroups,
                   Plan *lefttree);
-extern Material *make_material(Plan *lefttree);
 extern Plan *materialize_finished_plan(Plan *subplan);
 extern Unique *make_unique(Plan *lefttree, List *distinctList);
 extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount,