Properly redistribute results of Gather Merge nodes
authorTomas Vondra <tomas@2ndquadrant.com>
Sun, 9 Jul 2017 20:02:21 +0000 (22:02 +0200)
committerTomas Vondra <tomas@2ndquadrant.com>
Sun, 9 Jul 2017 20:02:21 +0000 (22:02 +0200)
The optimizer was not generating correct distributed paths with Gather
Merge nodes, because those nodes always looked as if the data was not
distributed at all. There were two bugs causing this:

1) Gather Merge did not copy distribution from the subpath, leaving it
   NULL (as if running on coordinator), so no Remote Subquery needed.

2) create_grouping_paths() did not check if a Remote Subquery is needed
   on top of Gather Merge anyway.

After fixing these two issues, we're now generating correct plans (at
least judging by select_parallel regression suite).

src/backend/optimizer/plan/planner.c
src/backend/optimizer/util/pathnode.c
src/test/regress/expected/select_parallel.out

index a95572b87b5b8ed1d030bb75f15090cb96a3002e..0fe31f29520cad9260905804cd5547e81794ad5d 100644 (file)
@@ -4312,6 +4312,14 @@ create_grouping_paths(PlannerInfo *root,
                                                                                                 NULL,
                                                                                                 &total_groups);
 
+                                       /*
+                                        * If the grouping can't be fully pushed down, we'll push down the
+                                        * first phase of the aggregate, and redistribute only the partial
+                                        * results.
+                                        */
+                                       if (! can_push_down_grouping(root, parse, gmpath))
+                                               gmpath = create_remotesubplan_path(root, gmpath, NULL);
+
                                        if (parse->hasAggs)
                                                add_path(grouped_rel, (Path *)
                                                                 create_agg_path(root,
index 8d99cf9b34bd26b9fb70c7a0d0a54e0318f7b3e2..d5f964419b2f037193d4cda20ef78334d6b2ca91 100644 (file)
@@ -3018,6 +3018,9 @@ create_gather_merge_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
                                                                                                                  required_outer);
        pathnode->path.parallel_aware = false;
 
+       /* distribution is the same as in the subpath */
+       pathnode->path.distribution = (Distribution *) copyObject(subpath->distribution);
+
        pathnode->subpath = subpath;
        pathnode->num_workers = subpath->parallel_workers;
        pathnode->path.pathkeys = pathkeys;
index 079dca310ae188d492f1a4f3721217a7095d2216..4392a3ada3284ca9dc15bbd2d0646b57dbc42bf1 100644 (file)
@@ -278,18 +278,19 @@ reset enable_nestloop;
 set enable_hashagg to off;
 explain (costs off)
    select  string4, count((unique2)) from tenk1 group by string4 order by string4;
-                     QUERY PLAN                     
-----------------------------------------------------
+                        QUERY PLAN                         
+-----------------------------------------------------------
  Finalize GroupAggregate
    Group Key: string4
-   ->  Gather Merge
-         Workers Planned: 4
-         ->  Partial GroupAggregate
-               Group Key: string4
-               ->  Sort
-                     Sort Key: string4
-                     ->  Parallel Seq Scan on tenk1
-(9 rows)
+   ->  Remote Subquery Scan on all (datanode_1,datanode_2)
+         ->  Gather Merge
+               Workers Planned: 4
+               ->  Partial GroupAggregate
+                     Group Key: string4
+                     ->  Sort
+                           Sort Key: string4
+                           ->  Parallel Seq Scan on tenk1
+(10 rows)
 
 select  string4, count((unique2)) from tenk1 group by string4 order by string4;
  string4 | count