Build extended stats on coordinators during ANALYZE
authorTomas Vondra <tomas@2ndquadrant.com>
Thu, 13 Jul 2017 16:22:30 +0000 (18:22 +0200)
committerTomas Vondra <tomas@2ndquadrant.com>
Thu, 13 Jul 2017 16:35:06 +0000 (18:35 +0200)
When running ANALYZE on a coordinator, we simply fetch the statistics
built on datanodes, and keep stats from a random datanode (assuming all
datanodes are similar in terms of data volume and data distribution).

This was only done for regular per-attribute stats, though, not for the
extended statistics added in PostgreSQL 10, causing various failures in
stats_ext tests due to missing statistics. This commit fixes this gap
by using the same approach as for simple statistics - we collect stats
from datanodes and keep the first result we receive for each statistic.

While working on this I realized this approach has some inherent issues,
particularly on columns that are distribution keys. As we keep stats
from a random node, we completely ignore MCV and histograms from the
remaining nodes. That may cause planning issues, but addressing it is
out of scope for this commit.

src/backend/commands/analyze.c
src/test/regress/expected/stats_ext.out

index fc9e017ab2952fdf2aae4625f37f77671d959c93..6ffbbc92025b3080c9ce0c8d32306be8fcbb0358 100644 (file)
@@ -27,6 +27,7 @@
 #include "catalog/catalog.h"
 #include "catalog/index.h"
 #include "catalog/indexing.h"
+#include "catalog/namespace.h"
 #include "catalog/pg_collation.h"
 #include "catalog/pg_inherits_fn.h"
 #include "catalog/pg_namespace.h"
@@ -1671,6 +1672,75 @@ update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
        heap_close(sd, RowExclusiveLock);
 }
 
+/*
+ *     update_ext_stats() -- update extended statistics
+ */
+static void
+update_ext_stats(Name nspname, Name name,
+                                bytea *ndistinct, bytea *dependencies)
+{
+       Oid                     nspoid;
+       Relation        sd;
+       HeapTuple       stup,
+                               oldtup;
+       int                     i;
+       Datum           values[Natts_pg_statistic_ext];
+       bool            nulls[Natts_pg_statistic_ext];
+       bool            replaces[Natts_pg_statistic_ext];
+
+       nspoid = get_namespace_oid(NameStr(*nspname), false);
+
+       sd = heap_open(StatisticExtRelationId, RowExclusiveLock);
+
+       /*
+        * Construct a new pg_statistic_ext tuple
+        */
+       for (i = 0; i < Natts_pg_statistic_ext; ++i)
+       {
+               nulls[i] = false;
+               replaces[i] = false;
+       }
+
+       replaces[Anum_pg_statistic_ext_stxndistinct - 1] = true;
+       replaces[Anum_pg_statistic_ext_stxdependencies - 1] = true;
+
+       /* ndistinct */
+       if (ndistinct)
+               values[Anum_pg_statistic_ext_stxndistinct - 1] = PointerGetDatum(ndistinct);
+       else
+               nulls[Anum_pg_statistic_ext_stxndistinct - 1] = true;
+
+       /* dependencies */
+       if (dependencies)
+               values[Anum_pg_statistic_ext_stxdependencies - 1] = PointerGetDatum(dependencies);
+       else
+               nulls[Anum_pg_statistic_ext_stxdependencies - 1] = true;
+
+       /* Is there already a pg_statistic_ext tuple for this attribute? */
+       oldtup = SearchSysCache2(STATEXTNAMENSP,
+                                                        NameGetDatum(name),
+                                                        ObjectIdGetDatum(nspoid));
+
+       /*
+        * We only expect data for extended statistics already defined on
+        * the coordinator, so fail if we got something unexpected.
+        */
+       if (!HeapTupleIsValid(oldtup))
+               elog(ERROR, "unknown extended statistic");
+
+       /* Yes, replace it */
+       stup = heap_modify_tuple(oldtup,
+                                                        RelationGetDescr(sd),
+                                                        values,
+                                                        nulls,
+                                                        replaces);
+       ReleaseSysCache(oldtup);
+       CatalogTupleUpdate(sd, &stup->t_self, stup);
+
+       heap_freetuple(stup);
+       heap_close(sd, RowExclusiveLock);
+}
+
 /*
  * Standard fetch function for use by compute_stats subroutines.
  *
@@ -2890,6 +2960,7 @@ analyze_rel_coordinator(Relation onerel, bool inh, int attr_cnt,
        int                     i;
        /* Number of data nodes from which attribute statistics are received. */
        int                        *numnodes;
+       List               *stat_oids;
 
        /* Get the relation identifier */
        relname = RelationGetRelationName(onerel);
@@ -3296,5 +3367,158 @@ analyze_rel_coordinator(Relation onerel, bool inh, int attr_cnt,
                }
        }
        update_attstats(RelationGetRelid(onerel), inh, attr_cnt, vacattrstats);
+
+       /*
+        * Build extended statistics on the coordinator.
+        *
+        * We take an approach similar to the simple per-attribute stats by
+        * fetching the already-built extended statistics, and pick data
+        * from a random datanode on the assumption that the datanodes are
+        * fairly similar in terms of data volume and distribution.
+        *
+        * That seems to be working fairly well, although there are likely
+        * some weaknesses too - e.g. on distribution keys it may easily
+        * neglect large portions of the data.
+        */
+
+       /* Make up query string fetching data from pg_statistic_ext */
+       initStringInfo(&query);
+
+       appendStringInfo(&query, "SELECT ns.nspname, "
+                                                                       "stxname, "
+                                                                       "stxndistinct::bytea AS stxndistinct, "
+                                                                       "stxdependencies::bytea AS stxdependencies "
+                                                               " FROM pg_statistic_ext s JOIN pg_class c "
+                                                               "    ON s.stxrelid = c.oid "
+                                                               "JOIN pg_namespace nc "
+                                                               "    ON c.relnamespace = nc.oid "
+                                                               "JOIN pg_namespace ns "
+                                                               "    ON s.stxnamespace = ns.oid "
+                                                               "WHERE nc.nspname = '%s' AND c.relname = '%s'",
+                                       nspname, relname);
+
+       /* Build up RemoteQuery */
+       step = makeNode(RemoteQuery);
+       step->combine_type = COMBINE_TYPE_NONE;
+       step->exec_nodes = NULL;
+       step->sql_statement = query.data;
+       step->force_autocommit = true;
+       step->exec_type = EXEC_ON_DATANODES;
+
+       /* Add targetlist entries */
+       step->scan.plan.targetlist = lappend(step->scan.plan.targetlist,
+                                                                                make_relation_tle(NamespaceRelationId,
+                                                                                                                  "pg_namespace",
+                                                                                                                  "nspname"));
+
+       step->scan.plan.targetlist = lappend(step->scan.plan.targetlist,
+                                                                                make_relation_tle(StatisticExtRelationId,
+                                                                                                                  "pg_statistic_ext",
+                                                                                                                  "stxname"));
+
+       step->scan.plan.targetlist = lappend(step->scan.plan.targetlist,
+                                                                                make_relation_tle(StatisticExtRelationId,
+                                                                                                                  "pg_statistic_ext",
+                                                                                                                  "stxndistinct"));
+
+       step->scan.plan.targetlist = lappend(step->scan.plan.targetlist,
+                                                                                make_relation_tle(StatisticExtRelationId,
+                                                                                                                  "pg_statistic_ext",
+                                                                                                                  "stxdependencies"));
+
+       /* Execute query on the data nodes */
+       estate = CreateExecutorState();
+
+       oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
+
+       /*
+        * Take a fresh snapshot so that we see the effects of the ANALYZE
+        * command on datanodes. That command is run in auto-commit mode
+        * hence just bumping up the command ID is not good enough.
+        */
+       PushActiveSnapshot(GetTransactionSnapshot());
+       estate->es_snapshot = GetActiveSnapshot();
+
+       node = ExecInitRemoteQuery(step, estate, 0);
+       MemoryContextSwitchTo(oldcontext);
+
+       /* get ready to combine results */
+       numnodes = (int *) palloc(attr_cnt * sizeof(int));
+       for (i = 0; i < attr_cnt; i++)
+               numnodes[i] = 0;
+
+       result = ExecRemoteQuery(node);
+       PopActiveSnapshot();
+
+       /*
+        * We only want to update each statistics once, as we'd get errors
+        * about self-updated tuples otherwise. So keep a list of OIDs for
+        * stats we already updated, and check before each update.
+        */
+       stat_oids = NIL;
+       while (result != NULL && !TupIsNull(result))
+       {
+               Datum                   value;
+               bool                    isnull;
+               Name                    nspname;
+               Name                    stxname;
+               bytea              *stxndistinct = NULL;
+               bytea              *stxdependencies = NULL;
+
+               HeapTuple               htup;
+               Oid                             nspoid;
+               Oid                             stat_oid;
+               bool                    updated;
+               ListCell           *lc;
+
+               /* Process statistics from the data node */
+               value = slot_getattr(result, 1, &isnull); /* nspname */
+               nspname = DatumGetName(value);
+
+               value = slot_getattr(result, 2, &isnull); /* stxname */
+               stxname = DatumGetName(value);
+
+               value = slot_getattr(result, 3, &isnull); /* stxndistinct */
+               if (!isnull)
+                       stxndistinct = DatumGetByteaP(value);
+
+               value = slot_getattr(result, 4, &isnull); /* stxdependencies */
+               if (!isnull)
+                       stxdependencies = DatumGetByteaP(value);
+
+               nspoid = get_namespace_oid(NameStr(*nspname), false);
+
+               /* get OID of the statistics */
+               htup = SearchSysCache2(STATEXTNAMENSP,
+                                                          NameGetDatum(stxname),
+                                                          ObjectIdGetDatum(nspoid));
+
+               stat_oid = HeapTupleGetOid(htup);
+               ReleaseSysCache(htup);
+
+               /* see if we already updated this pg_statistic_ext tuple */
+               updated = false;
+               foreach(lc, stat_oids)
+               {
+                       Oid oid = lfirst_oid(lc);
+
+                       if (stat_oid == oid)
+                       {
+                               updated = true;
+                               break;
+                       }
+               }
+
+               /* if not, update it (with all the available data) */
+               if (!updated)
+               {
+                       update_ext_stats(nspname, stxname, stxndistinct, stxdependencies);
+                       stat_oids = lappend_oid(stat_oids, stat_oid);
+               }
+
+               /* fetch stats from next node */
+               result = ExecRemoteQuery(node);
+       }
+       ExecEndRemoteQuery(node);
 }
 #endif
index 637e4961f03d1fbcf017c06a29ef0452b48ce747..45aa599cc2851337bf618f28658928e229a08220 100644 (file)
@@ -211,36 +211,39 @@ SELECT stxkind, stxndistinct
   FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
  stxkind |                      stxndistinct                       
 ---------+---------------------------------------------------------
- {d,f}   | {"3, 4": 301, "3, 6": 301, "4, 6": 301, "3, 4, 6": 301}
+ {d,f}   | {"3, 4": 161, "3, 6": 161, "4, 6": 161, "3, 4, 6": 161}
 (1 row)
 
 -- Hash Aggregate, thanks to estimates improved by the statistic
 EXPLAIN (COSTS off)
  SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
-         QUERY PLAN          
------------------------------
+                        QUERY PLAN                         
+-----------------------------------------------------------
  HashAggregate
    Group Key: a, b
-   ->  Seq Scan on ndistinct
-(3 rows)
+   ->  Remote Subquery Scan on all (datanode_1,datanode_2)
+         ->  Seq Scan on ndistinct
+(4 rows)
 
 EXPLAIN (COSTS off)
  SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
-         QUERY PLAN          
------------------------------
+                        QUERY PLAN                         
+-----------------------------------------------------------
  HashAggregate
    Group Key: b, c
-   ->  Seq Scan on ndistinct
-(3 rows)
+   ->  Remote Subquery Scan on all (datanode_1,datanode_2)
+         ->  Seq Scan on ndistinct
+(4 rows)
 
 EXPLAIN (COSTS off)
  SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
-         QUERY PLAN          
------------------------------
+                        QUERY PLAN                         
+-----------------------------------------------------------
  HashAggregate
    Group Key: a, b, c
-   ->  Seq Scan on ndistinct
-(3 rows)
+   ->  Remote Subquery Scan on all (datanode_1,datanode_2)
+         ->  Seq Scan on ndistinct
+(4 rows)
 
 -- last two plans keep using Group Aggregate, because 'd' is not covered
 -- by the statistic and while it's NULL-only we assume 200 values for it
@@ -277,44 +280,47 @@ INSERT INTO ndistinct (a, b, c, filler1)
 ANALYZE ndistinct;
 SELECT stxkind, stxndistinct
   FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
- stxkind |                        stxndistinct                         
----------+-------------------------------------------------------------
- {d,f}   | {"3, 4": 2550, "3, 6": 800, "4, 6": 1632, "3, 4, 6": 10000}
+ stxkind |                        stxndistinct                        
+---------+------------------------------------------------------------
+ {d,f}   | {"3, 4": 2378, "3, 6": 800, "4, 6": 1632, "3, 4, 6": 6060}
 (1 row)
 
 -- plans using Group Aggregate, thanks to using correct esimates
 EXPLAIN (COSTS off)
  SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
-            QUERY PLAN             
------------------------------------
+                        QUERY PLAN                         
+-----------------------------------------------------------
  GroupAggregate
    Group Key: a, b
-   ->  Sort
-         Sort Key: a, b
-         ->  Seq Scan on ndistinct
-(5 rows)
+   ->  Remote Subquery Scan on all (datanode_1,datanode_2)
+         ->  Sort
+               Sort Key: a, b
+               ->  Seq Scan on ndistinct
+(6 rows)
 
 EXPLAIN (COSTS off)
  SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
-            QUERY PLAN             
------------------------------------
+                        QUERY PLAN                         
+-----------------------------------------------------------
  GroupAggregate
    Group Key: a, b, c
-   ->  Sort
-         Sort Key: a, b, c
-         ->  Seq Scan on ndistinct
-(5 rows)
+   ->  Remote Subquery Scan on all (datanode_1,datanode_2)
+         ->  Sort
+               Sort Key: a, b, c
+               ->  Seq Scan on ndistinct
+(6 rows)
 
 EXPLAIN (COSTS off)
  SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
-            QUERY PLAN             
------------------------------------
+                        QUERY PLAN                         
+-----------------------------------------------------------
  GroupAggregate
    Group Key: a, b, c, d
-   ->  Sort
-         Sort Key: a, b, c, d
-         ->  Seq Scan on ndistinct
-(5 rows)
+   ->  Remote Subquery Scan on all (datanode_1,datanode_2)
+         ->  Sort
+               Sort Key: a, b, c, d
+               ->  Seq Scan on ndistinct
+(6 rows)
 
 EXPLAIN (COSTS off)
  SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;