Build out the planner support function infrastructure.

author Tom Lane <tgl@sss.pgh.pa.us>

Sat, 9 Feb 2019 23:32:23 +0000 (18:32 -0500)

committer Tom Lane <tgl@sss.pgh.pa.us>

Sat, 9 Feb 2019 23:32:23 +0000 (18:32 -0500)
author Tom Lane <tgl@sss.pgh.pa.us>
Sat, 9 Feb 2019 23:32:23 +0000 (18:32 -0500)
committer Tom Lane <tgl@sss.pgh.pa.us>
Sat, 9 Feb 2019 23:32:23 +0000 (18:32 -0500)
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c

index 994cec50ce88b3a08fa710757e936471ed6e36f7..6b96e7de0a41191712ad9c5953210a8cafcd1395 100644 (file)
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -2779,6 +2779,7 @@ estimate_path_cost_size(PlannerInfo *root,
             startup_cost = ofpinfo->rel_startup_cost;
             startup_cost += aggcosts.transCost.startup;
             startup_cost += aggcosts.transCost.per_tuple * input_rows;
+           startup_cost += aggcosts.finalCost.startup;
             startup_cost += (cpu_operator_cost * numGroupCols) * input_rows;
  
             /*-----
@@ -2788,7 +2789,7 @@ estimate_path_cost_size(PlannerInfo *root,
              *-----
              */
             run_cost = ofpinfo->rel_total_cost - ofpinfo->rel_startup_cost;
-           run_cost += aggcosts.finalCost * numGroups;
+           run_cost += aggcosts.finalCost.per_tuple * numGroups;
             run_cost += cpu_tuple_cost * numGroups;
  
             /* Account for the eval cost of HAVING quals, if any */
diff --git a/doc/src/sgml/xfunc.sgml b/doc/src/sgml/xfunc.sgml

index d70aa6eed786e965a465adbb8c3e6ff3307c9576..b486ef391eeb2c6ea8872f5d063e2796898ac1f0 100644 (file)
--- a/doc/src/sgml/xfunc.sgml
+++ b/doc/src/sgml/xfunc.sgml
@@ -3439,4 +3439,25 @@ supportfn(internal) returns internal
      simplify.  Ensure rigorous equivalence between the simplified
      expression and an actual execution of the target function.
     </para>
+
+   <para>
+    For target functions that return boolean, it is often useful to estimate
+    the fraction of rows that will be selected by a WHERE clause using that
+    function.  This can be done by a support function that implements
+    the <literal>SupportRequestSelectivity</literal> request type.
+   </para>
+
+   <para>
+    If the target function's runtime is highly dependent on its inputs,
+    it may be useful to provide a non-constant cost estimate for it.
+    This can be done by a support function that implements
+    the <literal>SupportRequestCost</literal> request type.
+   </para>
+
+   <para>
+    For target functions that return sets, it is often useful to provide
+    a non-constant estimate for the number of rows that will be returned.
+    This can be done by a support function that implements
+    the <literal>SupportRequestRows</literal> request type.
+   </para>
    </sect1>
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c

index abca03b805e0fe8cf408be54273f5d60c29a489c..e8142bddf02dc2696f9378bfcc8df23ec84b88f2 100644 (file)
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -762,6 +762,21 @@ clause_selectivity(PlannerInfo *root,
         if (IsA(clause, DistinctExpr))
             s1 = 1.0 - s1;
     }
+   else if (is_funcclause(clause))
+   {
+       FuncExpr   *funcclause = (FuncExpr *) clause;
+
+       /* Try to get an estimate from the support function, if any */
+       s1 = function_selectivity(root,
+                                 funcclause->funcid,
+                                 funcclause->args,
+                                 funcclause->inputcollid,
+                                 treat_as_join_clause(clause, rinfo,
+                                                      varRelid, sjinfo),
+                                 varRelid,
+                                 jointype,
+                                 sjinfo);
+   }
     else if (IsA(clause, ScalarArrayOpExpr))
     {
         /* Use node specific selectivity calculation function */
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index 1057ddaa3e5822b93b8ed3ccbd54a3e65f15d8fd..beee50ec1350101f8a4005ed779a95775ec5824f 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -2112,9 +2112,9 @@ cost_agg(Path *path, PlannerInfo *root,
     /*
      * The transCost.per_tuple component of aggcosts should be charged once
      * per input tuple, corresponding to the costs of evaluating the aggregate
-    * transfns and their input expressions (with any startup cost of course
-    * charged but once).  The finalCost component is charged once per output
-    * tuple, corresponding to the costs of evaluating the finalfns.
+    * transfns and their input expressions. The finalCost.per_tuple component
+    * is charged once per output tuple, corresponding to the costs of
+    * evaluating the finalfns.  Startup costs are of course charged but once.
      *
      * If we are grouping, we charge an additional cpu_operator_cost per
      * grouping column per input tuple for grouping comparisons.
@@ -2136,7 +2136,8 @@ cost_agg(Path *path, PlannerInfo *root,
         startup_cost = input_total_cost;
         startup_cost += aggcosts->transCost.startup;
         startup_cost += aggcosts->transCost.per_tuple * input_tuples;
-       startup_cost += aggcosts->finalCost;
+       startup_cost += aggcosts->finalCost.startup;
+       startup_cost += aggcosts->finalCost.per_tuple;
         /* we aren't grouping */
         total_cost = startup_cost + cpu_tuple_cost;
         output_tuples = 1;
@@ -2155,7 +2156,8 @@ cost_agg(Path *path, PlannerInfo *root,
         total_cost += aggcosts->transCost.startup;
         total_cost += aggcosts->transCost.per_tuple * input_tuples;
         total_cost += (cpu_operator_cost * numGroupCols) * input_tuples;
-       total_cost += aggcosts->finalCost * numGroups;
+       total_cost += aggcosts->finalCost.startup;
+       total_cost += aggcosts->finalCost.per_tuple * numGroups;
         total_cost += cpu_tuple_cost * numGroups;
         output_tuples = numGroups;
     }
@@ -2168,8 +2170,9 @@ cost_agg(Path *path, PlannerInfo *root,
         startup_cost += aggcosts->transCost.startup;
         startup_cost += aggcosts->transCost.per_tuple * input_tuples;
         startup_cost += (cpu_operator_cost * numGroupCols) * input_tuples;
+       startup_cost += aggcosts->finalCost.startup;
         total_cost = startup_cost;
-       total_cost += aggcosts->finalCost * numGroups;
+       total_cost += aggcosts->finalCost.per_tuple * numGroups;
         total_cost += cpu_tuple_cost * numGroups;
         output_tuples = numGroups;
     }
@@ -2234,7 +2237,11 @@ cost_windowagg(Path *path, PlannerInfo *root,
         Cost        wfunccost;
         QualCost    argcosts;
  
-       wfunccost = get_func_cost(wfunc->winfnoid) * cpu_operator_cost;
+       argcosts.startup = argcosts.per_tuple = 0;
+       add_function_cost(root, wfunc->winfnoid, (Node *) wfunc,
+                         &argcosts);
+       startup_cost += argcosts.startup;
+       wfunccost = argcosts.per_tuple;
  
         /* also add the input expressions' cost to per-input-row costs */
         cost_qual_eval_node(&argcosts, (Node *) wfunc->args, root);
@@ -3864,8 +3871,8 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
      */
     if (IsA(node, FuncExpr))
     {
-       context->total.per_tuple +=
-           get_func_cost(((FuncExpr *) node)->funcid) * cpu_operator_cost;
+       add_function_cost(context->root, ((FuncExpr *) node)->funcid, node,
+                         &context->total);
     }
     else if (IsA(node, OpExpr) ||
              IsA(node, DistinctExpr) ||
@@ -3873,8 +3880,8 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
     {
         /* rely on struct equivalence to treat these all alike */
         set_opfuncid((OpExpr *) node);
-       context->total.per_tuple +=
-           get_func_cost(((OpExpr *) node)->opfuncid) * cpu_operator_cost;
+       add_function_cost(context->root, ((OpExpr *) node)->opfuncid, node,
+                         &context->total);
     }
     else if (IsA(node, ScalarArrayOpExpr))
     {
@@ -3884,10 +3891,15 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
          */
         ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node;
         Node       *arraynode = (Node *) lsecond(saop->args);
+       QualCost    sacosts;
  
         set_sa_opfuncid(saop);
-       context->total.per_tuple += get_func_cost(saop->opfuncid) *
-           cpu_operator_cost * estimate_array_length(arraynode) * 0.5;
+       sacosts.startup = sacosts.per_tuple = 0;
+       add_function_cost(context->root, saop->opfuncid, NULL,
+                         &sacosts);
+       context->total.startup += sacosts.startup;
+       context->total.per_tuple += sacosts.per_tuple *
+           estimate_array_length(arraynode) * 0.5;
     }
     else if (IsA(node, Aggref) ||
              IsA(node, WindowFunc))
@@ -3913,11 +3925,13 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
         /* check the result type's input function */
         getTypeInputInfo(iocoerce->resulttype,
                          &iofunc, &typioparam);
-       context->total.per_tuple += get_func_cost(iofunc) * cpu_operator_cost;
+       add_function_cost(context->root, iofunc, NULL,
+                         &context->total);
         /* check the input type's output function */
         getTypeOutputInfo(exprType((Node *) iocoerce->arg),
                           &iofunc, &typisvarlena);
-       context->total.per_tuple += get_func_cost(iofunc) * cpu_operator_cost;
+       add_function_cost(context->root, iofunc, NULL,
+                         &context->total);
     }
     else if (IsA(node, ArrayCoerceExpr))
     {
@@ -3941,8 +3955,8 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
         {
             Oid         opid = lfirst_oid(lc);
  
-           context->total.per_tuple += get_func_cost(get_opcode(opid)) *
-               cpu_operator_cost;
+           add_function_cost(context->root, get_opcode(opid), NULL,
+                             &context->total);
         }
     }
     else if (IsA(node, MinMaxExpr) ||
@@ -4941,7 +4955,7 @@ set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel)
     foreach(lc, rte->functions)
     {
         RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc);
-       double      ntup = expression_returns_set_rows(rtfunc->funcexpr);
+       double      ntup = expression_returns_set_rows(root, rtfunc->funcexpr);
  
         if (ntup > rel->tuples)
             rel->tuples = ntup;
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c

index 002c29a5f5d5c43f7c721eb21f5022038256067d..86e4753a5b380b00cb9b2468db0ed1652f5ebe40 100644 (file)
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -36,8 +36,8 @@
  #include "optimizer/clauses.h"
  #include "optimizer/cost.h"
  #include "optimizer/optimizer.h"
+#include "optimizer/plancat.h"
  #include "optimizer/planmain.h"
-#include "optimizer/prep.h"
  #include "parser/analyze.h"
  #include "parser/parse_agg.h"
  #include "parser/parse_coerce.h"
@@ -343,19 +343,24 @@ get_agg_clause_costs_walker(Node *node, get_agg_clause_costs_context *context)
         if (DO_AGGSPLIT_COMBINE(context->aggsplit))
         {
             /* charge for combining previously aggregated states */
-           costs->transCost.per_tuple += get_func_cost(aggcombinefn) * cpu_operator_cost;
+           add_function_cost(context->root, aggcombinefn, NULL,
+                             &costs->transCost);
         }
         else
-           costs->transCost.per_tuple += get_func_cost(aggtransfn) * cpu_operator_cost;
+           add_function_cost(context->root, aggtransfn, NULL,
+                             &costs->transCost);
         if (DO_AGGSPLIT_DESERIALIZE(context->aggsplit) &&
             OidIsValid(aggdeserialfn))
-           costs->transCost.per_tuple += get_func_cost(aggdeserialfn) * cpu_operator_cost;
+           add_function_cost(context->root, aggdeserialfn, NULL,
+                             &costs->transCost);
         if (DO_AGGSPLIT_SERIALIZE(context->aggsplit) &&
             OidIsValid(aggserialfn))
-           costs->finalCost += get_func_cost(aggserialfn) * cpu_operator_cost;
+           add_function_cost(context->root, aggserialfn, NULL,
+                             &costs->finalCost);
         if (!DO_AGGSPLIT_SKIPFINAL(context->aggsplit) &&
             OidIsValid(aggfinalfn))
-           costs->finalCost += get_func_cost(aggfinalfn) * cpu_operator_cost;
+           add_function_cost(context->root, aggfinalfn, NULL,
+                             &costs->finalCost);
  
         /*
          * These costs are incurred only by the initial aggregate node, so we
@@ -392,8 +397,8 @@ get_agg_clause_costs_walker(Node *node, get_agg_clause_costs_context *context)
         {
             cost_qual_eval_node(&argcosts, (Node *) aggref->aggdirectargs,
                                 context->root);
-           costs->transCost.startup += argcosts.startup;
-           costs->finalCost += argcosts.per_tuple;
+           costs->finalCost.startup += argcosts.startup;
+           costs->finalCost.per_tuple += argcosts.per_tuple;
         }
  
         /*
@@ -561,7 +566,7 @@ find_window_functions_walker(Node *node, WindowFuncLists *lists)
   * Note: keep this in sync with expression_returns_set() in nodes/nodeFuncs.c.
   */
  double
-expression_returns_set_rows(Node *clause)
+expression_returns_set_rows(PlannerInfo *root, Node *clause)
  {
     if (clause == NULL)
         return 1.0;
@@ -570,7 +575,7 @@ expression_returns_set_rows(Node *clause)
         FuncExpr   *expr = (FuncExpr *) clause;
  
         if (expr->funcretset)
-           return clamp_row_est(get_func_rows(expr->funcid));
+           return clamp_row_est(get_function_rows(root, expr->funcid, clause));
     }
     if (IsA(clause, OpExpr))
     {
@@ -579,7 +584,7 @@ expression_returns_set_rows(Node *clause)
         if (expr->opretset)
         {
             set_opfuncid(expr);
-           return clamp_row_est(get_func_rows(expr->opfuncid));
+           return clamp_row_est(get_function_rows(root, expr->opfuncid, clause));
         }
     }
     return 1.0;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index a3e64110d36dda913ff9ca7ebe5c7937a544328b..169e51e7921f38e955c36f900e7517275fc9cabf 100644 (file)
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -2711,7 +2711,7 @@ create_set_projection_path(PlannerInfo *root,
         Node       *node = (Node *) lfirst(lc);
         double      itemrows;
  
-       itemrows = expression_returns_set_rows(node);
+       itemrows = expression_returns_set_rows(root, node);
         if (tlist_rows < itemrows)
             tlist_rows = itemrows;
     }
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c

index 3efa1bdc1a4dd783ec90f8f42e4fbdf62e11da76..d6dc83ca8090577cbe83c2f9ae671eb83c72c0b3 100644 (file)
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -29,10 +29,12 @@
  #include "catalog/heap.h"
  #include "catalog/partition.h"
  #include "catalog/pg_am.h"
+#include "catalog/pg_proc.h"
  #include "catalog/pg_statistic_ext.h"
  #include "foreign/fdwapi.h"
  #include "miscadmin.h"
  #include "nodes/makefuncs.h"
+#include "nodes/supportnodes.h"
  #include "optimizer/clauses.h"
  #include "optimizer/cost.h"
  #include "optimizer/optimizer.h"
@@ -1772,6 +1774,8 @@ restriction_selectivity(PlannerInfo *root,
   * Returns the selectivity of a specified join operator clause.
   * This code executes registered procedures stored in the
   * operator relation, by calling the function manager.
+ *
+ * See clause_selectivity() for the meaning of the additional parameters.
   */
  Selectivity
  join_selectivity(PlannerInfo *root,
@@ -1805,6 +1809,184 @@ join_selectivity(PlannerInfo *root,
     return (Selectivity) result;
  }
  
+/*
+ * function_selectivity
+ *
+ * Returns the selectivity of a specified boolean function clause.
+ * This code executes registered procedures stored in the
+ * pg_proc relation, by calling the function manager.
+ *
+ * See clause_selectivity() for the meaning of the additional parameters.
+ */
+Selectivity
+function_selectivity(PlannerInfo *root,
+                    Oid funcid,
+                    List *args,
+                    Oid inputcollid,
+                    bool is_join,
+                    int varRelid,
+                    JoinType jointype,
+                    SpecialJoinInfo *sjinfo)
+{
+   RegProcedure prosupport = get_func_support(funcid);
+   SupportRequestSelectivity req;
+   SupportRequestSelectivity *sresult;
+
+   /*
+    * If no support function is provided, use our historical default
+    * estimate, 0.3333333.  This seems a pretty unprincipled choice, but
+    * Postgres has been using that estimate for function calls since 1992.
+    * The hoariness of this behavior suggests that we should not be in too
+    * much hurry to use another value.
+    */
+   if (!prosupport)
+       return (Selectivity) 0.3333333;
+
+   req.type = T_SupportRequestSelectivity;
+   req.root = root;
+   req.funcid = funcid;
+   req.args = args;
+   req.inputcollid = inputcollid;
+   req.is_join = is_join;
+   req.varRelid = varRelid;
+   req.jointype = jointype;
+   req.sjinfo = sjinfo;
+   req.selectivity = -1;       /* to catch failure to set the value */
+
+   sresult = (SupportRequestSelectivity *)
+       DatumGetPointer(OidFunctionCall1(prosupport,
+                                        PointerGetDatum(&req)));
+
+   /* If support function fails, use default */
+   if (sresult != &req)
+       return (Selectivity) 0.3333333;
+
+   if (req.selectivity < 0.0 || req.selectivity > 1.0)
+       elog(ERROR, "invalid function selectivity: %f", req.selectivity);
+
+   return (Selectivity) req.selectivity;
+}
+
+/*
+ * add_function_cost
+ *
+ * Get an estimate of the execution cost of a function, and *add* it to
+ * the contents of *cost.  The estimate may include both one-time and
+ * per-tuple components, since QualCost does.
+ *
+ * The funcid must always be supplied.  If it is being called as the
+ * implementation of a specific parsetree node (FuncExpr, OpExpr,
+ * WindowFunc, etc), pass that as "node", else pass NULL.
+ *
+ * In some usages root might be NULL, too.
+ */
+void
+add_function_cost(PlannerInfo *root, Oid funcid, Node *node,
+                 QualCost *cost)
+{
+   HeapTuple   proctup;
+   Form_pg_proc procform;
+
+   proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid));
+   if (!HeapTupleIsValid(proctup))
+       elog(ERROR, "cache lookup failed for function %u", funcid);
+   procform = (Form_pg_proc) GETSTRUCT(proctup);
+
+   if (OidIsValid(procform->prosupport))
+   {
+       SupportRequestCost req;
+       SupportRequestCost *sresult;
+
+       req.type = T_SupportRequestCost;
+       req.root = root;
+       req.funcid = funcid;
+       req.node = node;
+
+       /* Initialize cost fields so that support function doesn't have to */
+       req.startup = 0;
+       req.per_tuple = 0;
+
+       sresult = (SupportRequestCost *)
+           DatumGetPointer(OidFunctionCall1(procform->prosupport,
+                                            PointerGetDatum(&req)));
+
+       if (sresult == &req)
+       {
+           /* Success, so accumulate support function's estimate into *cost */
+           cost->startup += req.startup;
+           cost->per_tuple += req.per_tuple;
+           ReleaseSysCache(proctup);
+           return;
+       }
+   }
+
+   /* No support function, or it failed, so rely on procost */
+   cost->per_tuple += procform->procost * cpu_operator_cost;
+
+   ReleaseSysCache(proctup);
+}
+
+/*
+ * get_function_rows
+ *
+ * Get an estimate of the number of rows returned by a set-returning function.
+ *
+ * The funcid must always be supplied.  In current usage, the calling node
+ * will always be supplied, and will be either a FuncExpr or OpExpr.
+ * But it's a good idea to not fail if it's NULL.
+ *
+ * In some usages root might be NULL, too.
+ *
+ * Note: this returns the unfiltered result of the support function, if any.
+ * It's usually a good idea to apply clamp_row_est() to the result, but we
+ * leave it to the caller to do so.
+ */
+double
+get_function_rows(PlannerInfo *root, Oid funcid, Node *node)
+{
+   HeapTuple   proctup;
+   Form_pg_proc procform;
+   double      result;
+
+   proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid));
+   if (!HeapTupleIsValid(proctup))
+       elog(ERROR, "cache lookup failed for function %u", funcid);
+   procform = (Form_pg_proc) GETSTRUCT(proctup);
+
+   Assert(procform->proretset);    /* else caller error */
+
+   if (OidIsValid(procform->prosupport))
+   {
+       SupportRequestRows req;
+       SupportRequestRows *sresult;
+
+       req.type = T_SupportRequestRows;
+       req.root = root;
+       req.funcid = funcid;
+       req.node = node;
+
+       req.rows = 0;           /* just for sanity */
+
+       sresult = (SupportRequestRows *)
+           DatumGetPointer(OidFunctionCall1(procform->prosupport,
+                                            PointerGetDatum(&req)));
+
+       if (sresult == &req)
+       {
+           /* Success */
+           ReleaseSysCache(proctup);
+           return req.rows;
+       }
+   }
+
+   /* No support function, or it failed, so rely on prorows */
+   result = procform->prorows;
+
+   ReleaseSysCache(proctup);
+
+   return result;
+}
+
  /*
   * has_unique_index
   *
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c

index a785361fd07fa90f913b06c2f90531767d86c29e..5b2917d1594d330950524ce2d88eff27c938c433 100644 (file)
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -22,12 +22,16 @@
  #include "catalog/pg_type.h"
  #include "funcapi.h"
  #include "libpq/pqformat.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/supportnodes.h"
+#include "optimizer/optimizer.h"
  #include "utils/array.h"
  #include "utils/arrayaccess.h"
  #include "utils/builtins.h"
  #include "utils/datum.h"
  #include "utils/lsyscache.h"
  #include "utils/memutils.h"
+#include "utils/selfuncs.h"
  #include "utils/typcache.h"
  
  
@@ -6025,6 +6029,36 @@ array_unnest(PG_FUNCTION_ARGS)
     }
  }
  
+/*
+ * Planner support function for array_unnest(anyarray)
+ */
+Datum
+array_unnest_support(PG_FUNCTION_ARGS)
+{
+   Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
+   Node       *ret = NULL;
+
+   if (IsA(rawreq, SupportRequestRows))
+   {
+       /* Try to estimate the number of rows returned */
+       SupportRequestRows *req = (SupportRequestRows *) rawreq;
+
+       if (is_funcclause(req->node))   /* be paranoid */
+       {
+           List       *args = ((FuncExpr *) req->node)->args;
+           Node       *arg1;
+
+           /* We can use estimated argument values here */
+           arg1 = estimate_expression_value(req->root, linitial(args));
+
+           req->rows = estimate_array_length(arg1);
+           ret = (Node *) req;
+       }
+   }
+
+   PG_RETURN_POINTER(ret);
+}
+
  
  /*
   * array_replace/array_remove support
diff --git a/src/backend/utils/adt/int.c b/src/backend/utils/adt/int.c

index ad8e6d02ee497eb84cdf759c19961b180026ea63..04825fc77de1d03637ba705e004bef6e7fed8c2e 100644 (file)
--- a/src/backend/utils/adt/int.c
+++ b/src/backend/utils/adt/int.c
@@ -30,11 +30,15 @@
  
  #include <ctype.h>
  #include <limits.h>
+#include <math.h>
  
  #include "catalog/pg_type.h"
  #include "common/int.h"
  #include "funcapi.h"
  #include "libpq/pqformat.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/supportnodes.h"
+#include "optimizer/optimizer.h"
  #include "utils/array.h"
  #include "utils/builtins.h"
  
@@ -1427,3 +1431,73 @@ generate_series_step_int4(PG_FUNCTION_ARGS)
         /* do when there is no more left */
         SRF_RETURN_DONE(funcctx);
  }
+
+/*
+ * Planner support function for generate_series(int4, int4 [, int4])
+ */
+Datum
+generate_series_int4_support(PG_FUNCTION_ARGS)
+{
+   Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
+   Node       *ret = NULL;
+
+   if (IsA(rawreq, SupportRequestRows))
+   {
+       /* Try to estimate the number of rows returned */
+       SupportRequestRows *req = (SupportRequestRows *) rawreq;
+
+       if (is_funcclause(req->node))   /* be paranoid */
+       {
+           List       *args = ((FuncExpr *) req->node)->args;
+           Node       *arg1,
+                      *arg2,
+                      *arg3;
+
+           /* We can use estimated argument values here */
+           arg1 = estimate_expression_value(req->root, linitial(args));
+           arg2 = estimate_expression_value(req->root, lsecond(args));
+           if (list_length(args) >= 3)
+               arg3 = estimate_expression_value(req->root, lthird(args));
+           else
+               arg3 = NULL;
+
+           /*
+            * If any argument is constant NULL, we can safely assume that
+            * zero rows are returned.  Otherwise, if they're all non-NULL
+            * constants, we can calculate the number of rows that will be
+            * returned.  Use double arithmetic to avoid overflow hazards.
+            */
+           if ((IsA(arg1, Const) &&
+                ((Const *) arg1)->constisnull) ||
+               (IsA(arg2, Const) &&
+                ((Const *) arg2)->constisnull) ||
+               (arg3 != NULL && IsA(arg3, Const) &&
+                ((Const *) arg3)->constisnull))
+           {
+               req->rows = 0;
+               ret = (Node *) req;
+           }
+           else if (IsA(arg1, Const) &&
+                    IsA(arg2, Const) &&
+                    (arg3 == NULL || IsA(arg3, Const)))
+           {
+               double      start,
+                           finish,
+                           step;
+
+               start = DatumGetInt32(((Const *) arg1)->constvalue);
+               finish = DatumGetInt32(((Const *) arg2)->constvalue);
+               step = arg3 ? DatumGetInt32(((Const *) arg3)->constvalue) : 1;
+
+               /* This equation works for either sign of step */
+               if (step != 0)
+               {
+                   req->rows = floor((finish - start + step) / step);
+                   ret = (Node *) req;
+               }
+           }
+       }
+   }
+
+   PG_RETURN_POINTER(ret);
+}
diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c

index d16cc9e574ba1f139a7c4d09770ec0455639caf3..0ff9394a2fb037ff6ac532dbcd149896e04f414b 100644 (file)
--- a/src/backend/utils/adt/int8.c
+++ b/src/backend/utils/adt/int8.c
@@ -20,6 +20,9 @@
  #include "common/int.h"
  #include "funcapi.h"
  #include "libpq/pqformat.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/supportnodes.h"
+#include "optimizer/optimizer.h"
  #include "utils/int8.h"
  #include "utils/builtins.h"
  
@@ -1373,3 +1376,73 @@ generate_series_step_int8(PG_FUNCTION_ARGS)
         /* do when there is no more left */
         SRF_RETURN_DONE(funcctx);
  }
+
+/*
+ * Planner support function for generate_series(int8, int8 [, int8])
+ */
+Datum
+generate_series_int8_support(PG_FUNCTION_ARGS)
+{
+   Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
+   Node       *ret = NULL;
+
+   if (IsA(rawreq, SupportRequestRows))
+   {
+       /* Try to estimate the number of rows returned */
+       SupportRequestRows *req = (SupportRequestRows *) rawreq;
+
+       if (is_funcclause(req->node))   /* be paranoid */
+       {
+           List       *args = ((FuncExpr *) req->node)->args;
+           Node       *arg1,
+                      *arg2,
+                      *arg3;
+
+           /* We can use estimated argument values here */
+           arg1 = estimate_expression_value(req->root, linitial(args));
+           arg2 = estimate_expression_value(req->root, lsecond(args));
+           if (list_length(args) >= 3)
+               arg3 = estimate_expression_value(req->root, lthird(args));
+           else
+               arg3 = NULL;
+
+           /*
+            * If any argument is constant NULL, we can safely assume that
+            * zero rows are returned.  Otherwise, if they're all non-NULL
+            * constants, we can calculate the number of rows that will be
+            * returned.  Use double arithmetic to avoid overflow hazards.
+            */
+           if ((IsA(arg1, Const) &&
+                ((Const *) arg1)->constisnull) ||
+               (IsA(arg2, Const) &&
+                ((Const *) arg2)->constisnull) ||
+               (arg3 != NULL && IsA(arg3, Const) &&
+                ((Const *) arg3)->constisnull))
+           {
+               req->rows = 0;
+               ret = (Node *) req;
+           }
+           else if (IsA(arg1, Const) &&
+                    IsA(arg2, Const) &&
+                    (arg3 == NULL || IsA(arg3, Const)))
+           {
+               double      start,
+                           finish,
+                           step;
+
+               start = DatumGetInt64(((Const *) arg1)->constvalue);
+               finish = DatumGetInt64(((Const *) arg2)->constvalue);
+               step = arg3 ? DatumGetInt64(((Const *) arg3)->constvalue) : 1;
+
+               /* This equation works for either sign of step */
+               if (step != 0)
+               {
+                   req->rows = floor((finish - start + step) / step);
+                   ret = (Node *) req;
+               }
+           }
+       }
+   }
+
+   PG_RETURN_POINTER(ret);
+}
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c

index 74fafc64f3ec16d49c08aca3a3fed48d242fe0d8..1ef6faecd1eed7ea0c0f245dd9d7305f0790ea14 100644 (file)
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -1577,17 +1577,6 @@ boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
         selec = var_eq_const(&vardata, BooleanEqualOperator,
                              BoolGetDatum(true), false, true, false);
     }
-   else if (is_funcclause(arg))
-   {
-       /*
-        * If we have no stats and it's a function call, estimate 0.3333333.
-        * This seems a pretty unprincipled choice, but Postgres has been
-        * using that estimate for function calls since 1992.  The hoariness
-        * of this behavior suggests that we should not be in too much hurry
-        * to use another value.
-        */
-       selec = 0.3333333;
-   }
     else
     {
         /* Otherwise, the default estimate is 0.5 */
@@ -3502,7 +3491,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
          * pointless to worry too much about this without much better
          * estimates for SRF output rowcounts than we have today.)
          */
-       this_srf_multiplier = expression_returns_set_rows(groupexpr);
+       this_srf_multiplier = expression_returns_set_rows(root, groupexpr);
         if (srf_multiplier < this_srf_multiplier)
             srf_multiplier = this_srf_multiplier;
  
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c

index fba0ee8b847b7ae1d7ec527f1dd6e46e004bbab6..e88c45d268a5cf933f0d59ccdc513c66f250f515 100644 (file)
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -1605,41 +1605,28 @@ get_func_leakproof(Oid funcid)
  }
  
  /*
- * get_func_cost
- *     Given procedure id, return the function's procost field.
- */
-float4
-get_func_cost(Oid funcid)
-{
-   HeapTuple   tp;
-   float4      result;
-
-   tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid));
-   if (!HeapTupleIsValid(tp))
-       elog(ERROR, "cache lookup failed for function %u", funcid);
-
-   result = ((Form_pg_proc) GETSTRUCT(tp))->procost;
-   ReleaseSysCache(tp);
-   return result;
-}
-
-/*
- * get_func_rows
- *     Given procedure id, return the function's prorows field.
+ * get_func_support
+ *
+ *     Returns the support function OID associated with a given function,
+ *     or InvalidOid if there is none.
   */
-float4
-get_func_rows(Oid funcid)
+RegProcedure
+get_func_support(Oid funcid)
  {
     HeapTuple   tp;
-   float4      result;
  
     tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid));
-   if (!HeapTupleIsValid(tp))
-       elog(ERROR, "cache lookup failed for function %u", funcid);
+   if (HeapTupleIsValid(tp))
+   {
+       Form_pg_proc functup = (Form_pg_proc) GETSTRUCT(tp);
+       RegProcedure result;
  
-   result = ((Form_pg_proc) GETSTRUCT(tp))->prorows;
-   ReleaseSysCache(tp);
-   return result;
+       result = functup->prosupport;
+       ReleaseSysCache(tp);
+       return result;
+   }
+   else
+       return (RegProcedure) InvalidOid;
  }
  
  /*             ---------- RELATION CACHE ----------                     */
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h

index 9233fb934e8a0dc27c8d0599631173985e00cf76..968a6800ddc51e9084b8f5d8051ea64d5868af7b 100644 (file)
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
   */
  
  /*                         yyyymmddN */
-#define CATALOG_VERSION_NO 201902091
+#define CATALOG_VERSION_NO 201902092
  
  #endif
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat

index 1f5352ce5439e904aad86849c5734a768843cd5c..50b742c06e60b213d0e1bb7f1cfa4295c1853d1d 100644 (file)
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1530,9 +1530,12 @@
    proargtypes => 'anyelement _int4 _int4',
    prosrc => 'array_fill_with_lower_bounds' },
  { oid => '2331', descr => 'expand array to set of rows',
-  proname => 'unnest', prorows => '100', proretset => 't',
-  prorettype => 'anyelement', proargtypes => 'anyarray',
+  proname => 'unnest', prorows => '100', prosupport => 'array_unnest_support',
+  proretset => 't', prorettype => 'anyelement', proargtypes => 'anyarray',
    prosrc => 'array_unnest' },
+{ oid => '3996', descr => 'planner support for array_unnest',
+  proname => 'array_unnest_support', prorettype => 'internal',
+  proargtypes => 'internal', prosrc => 'array_unnest_support' },
  { oid => '3167',
    descr => 'remove any occurrences of an element from an array',
    proname => 'array_remove', proisstrict => 'f', prorettype => 'anyarray',
@@ -7536,21 +7539,31 @@
  
  # non-persistent series generator
  { oid => '1066', descr => 'non-persistent series generator',
-  proname => 'generate_series', prorows => '1000', proretset => 't',
+  proname => 'generate_series', prorows => '1000',
+  prosupport => 'generate_series_int4_support', proretset => 't',
    prorettype => 'int4', proargtypes => 'int4 int4 int4',
    prosrc => 'generate_series_step_int4' },
  { oid => '1067', descr => 'non-persistent series generator',
-  proname => 'generate_series', prorows => '1000', proretset => 't',
+  proname => 'generate_series', prorows => '1000',
+  prosupport => 'generate_series_int4_support', proretset => 't',
    prorettype => 'int4', proargtypes => 'int4 int4',
    prosrc => 'generate_series_int4' },
+{ oid => '3994', descr => 'planner support for generate_series',
+  proname => 'generate_series_int4_support', prorettype => 'internal',
+  proargtypes => 'internal', prosrc => 'generate_series_int4_support' },
  { oid => '1068', descr => 'non-persistent series generator',
-  proname => 'generate_series', prorows => '1000', proretset => 't',
+  proname => 'generate_series', prorows => '1000',
+  prosupport => 'generate_series_int8_support', proretset => 't',
    prorettype => 'int8', proargtypes => 'int8 int8 int8',
    prosrc => 'generate_series_step_int8' },
  { oid => '1069', descr => 'non-persistent series generator',
-  proname => 'generate_series', prorows => '1000', proretset => 't',
+  proname => 'generate_series', prorows => '1000',
+  prosupport => 'generate_series_int8_support', proretset => 't',
    prorettype => 'int8', proargtypes => 'int8 int8',
    prosrc => 'generate_series_int8' },
+{ oid => '3995', descr => 'planner support for generate_series',
+  proname => 'generate_series_int8_support', prorettype => 'internal',
+  proargtypes => 'internal', prosrc => 'generate_series_int8_support' },
  { oid => '3259', descr => 'non-persistent series generator',
    proname => 'generate_series', prorows => '1000', proretset => 't',
    prorettype => 'numeric', proargtypes => 'numeric numeric numeric',
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h

index 0d2d1889e9d5baa427ecd13b73ff7e242624e9f8..453079a9e268caf7043cf4ae4689a2bbd18a3418 100644 (file)
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -507,7 +507,10 @@ typedef enum NodeTag
     T_TsmRoutine,               /* in access/tsmapi.h */
     T_ForeignKeyCacheInfo,      /* in utils/rel.h */
     T_CallContext,              /* in nodes/parsenodes.h */
-   T_SupportRequestSimplify    /* in nodes/supportnodes.h */
+   T_SupportRequestSimplify,   /* in nodes/supportnodes.h */
+   T_SupportRequestSelectivity,    /* in nodes/supportnodes.h */
+   T_SupportRequestCost,       /* in nodes/supportnodes.h */
+   T_SupportRequestRows        /* in nodes/supportnodes.h */
  } NodeTag;
  
  /*
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h

index 0b780b07c1266a318dff8943079530d3ebabd41e..c23c4304f375660596e487668ff824c724f78b27 100644 (file)
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -61,7 +61,7 @@ typedef struct AggClauseCosts
     bool        hasNonPartial;  /* does any agg not support partial mode? */
     bool        hasNonSerial;   /* is any partial agg non-serializable? */
     QualCost    transCost;      /* total per-input-row execution costs */
-   Cost        finalCost;      /* total per-aggregated-row costs */
+   QualCost    finalCost;      /* total per-aggregated-row costs */
     Size        transitionSpace;    /* space for pass-by-ref transition data */
  } AggClauseCosts;
  
diff --git a/src/include/nodes/supportnodes.h b/src/include/nodes/supportnodes.h

index 1f7d02b5ee2637419882580ae76d207477875761..1a3a36ba99ca0b50af474969e656c1ef972541a4 100644 (file)
--- a/src/include/nodes/supportnodes.h
+++ b/src/include/nodes/supportnodes.h
@@ -36,6 +36,7 @@
  #include "nodes/primnodes.h"
  
  struct PlannerInfo;                /* avoid including relation.h here */
+struct SpecialJoinInfo;
  
  
  /*
@@ -67,4 +68,103 @@ typedef struct SupportRequestSimplify
     FuncExpr   *fcall;          /* Function call to be simplified */
  } SupportRequestSimplify;
  
+/*
+ * The Selectivity request allows the support function to provide a
+ * selectivity estimate for a function appearing at top level of a WHERE
+ * clause (so it applies only to functions returning boolean).
+ *
+ * The input arguments are the same as are supplied to operator restriction
+ * and join estimators, except that we unify those two APIs into just one
+ * request type.  See clause_selectivity() for the details.
+ *
+ * If an estimate can be made, store it into the "selectivity" field and
+ * return the address of the SupportRequestSelectivity node; the estimate
+ * must be between 0 and 1 inclusive.  Return NULL if no estimate can be
+ * made (in which case the planner will fall back to a default estimate,
+ * traditionally 1/3).
+ *
+ * If the target function is being used as the implementation of an operator,
+ * the support function will not be used for this purpose; the operator's
+ * restriction or join estimator is consulted instead.
+ */
+typedef struct SupportRequestSelectivity
+{
+   NodeTag     type;
+
+   /* Input fields: */
+   struct PlannerInfo *root;   /* Planner's infrastructure */
+   Oid         funcid;         /* function we are inquiring about */
+   List       *args;           /* pre-simplified arguments to function */
+   Oid         inputcollid;    /* function's input collation */
+   bool        is_join;        /* is this a join or restriction case? */
+   int         varRelid;       /* if restriction, RTI of target relation */
+   JoinType    jointype;       /* if join, outer join type */
+   struct SpecialJoinInfo *sjinfo; /* if outer join, info about join */
+
+   /* Output fields: */
+   Selectivity selectivity;    /* returned selectivity estimate */
+} SupportRequestSelectivity;
+
+/*
+ * The Cost request allows the support function to provide an execution
+ * cost estimate for its target function.  The cost estimate can include
+ * both a one-time (query startup) component and a per-execution component.
+ * The estimate should *not* include the costs of evaluating the target
+ * function's arguments, only the target function itself.
+ *
+ * The "node" argument is normally the parse node that is invoking the
+ * target function.  This is a FuncExpr in the simplest case, but it could
+ * also be an OpExpr, DistinctExpr, NullIfExpr, or WindowFunc, or possibly
+ * other cases in future.  NULL is passed if the function cannot presume
+ * its arguments to be equivalent to what the calling node presents as
+ * arguments; that happens for, e.g., aggregate support functions and
+ * per-column comparison operators used by RowExprs.
+ *
+ * If an estimate can be made, store it into the cost fields and return the
+ * address of the SupportRequestCost node.  Return NULL if no estimate can be
+ * made, in which case the planner will rely on the target function's procost
+ * field.  (Note: while procost is automatically scaled by cpu_operator_cost,
+ * this is not the case for the outputs of the Cost request; the support
+ * function must scale its results appropriately on its own.)
+ */
+typedef struct SupportRequestCost
+{
+   NodeTag     type;
+
+   /* Input fields: */
+   struct PlannerInfo *root;   /* Planner's infrastructure (could be NULL) */
+   Oid         funcid;         /* function we are inquiring about */
+   Node       *node;           /* parse node invoking function, or NULL */
+
+   /* Output fields: */
+   Cost        startup;        /* one-time cost */
+   Cost        per_tuple;      /* per-evaluation cost */
+} SupportRequestCost;
+
+/*
+ * The Rows request allows the support function to provide an output rowcount
+ * estimate for its target function (so it applies only to set-returning
+ * functions).
+ *
+ * The "node" argument is the parse node that is invoking the target function;
+ * currently this will always be a FuncExpr or OpExpr.
+ *
+ * If an estimate can be made, store it into the rows field and return the
+ * address of the SupportRequestRows node.  Return NULL if no estimate can be
+ * made, in which case the planner will rely on the target function's prorows
+ * field.
+ */
+typedef struct SupportRequestRows
+{
+   NodeTag     type;
+
+   /* Input fields: */
+   struct PlannerInfo *root;   /* Planner's infrastructure (could be NULL) */
+   Oid         funcid;         /* function we are inquiring about */
+   Node       *node;           /* parse node invoking function */
+
+   /* Output fields: */
+   double      rows;           /* number of rows expected to be returned */
+} SupportRequestRows;
+
  #endif                         /* SUPPORTNODES_H */
diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h

index 95a78cfa393be43847dc160ba1cd9d88c6ede39c..5e10fb1d5072ab0406b749df8f7555f3a6fc0809 100644 (file)
--- a/src/include/optimizer/clauses.h
+++ b/src/include/optimizer/clauses.h
@@ -31,7 +31,7 @@ extern void get_agg_clause_costs(PlannerInfo *root, Node *clause,
  extern bool contain_window_function(Node *clause);
  extern WindowFuncLists *find_window_functions(Node *clause, Index maxWinRef);
  
-extern double expression_returns_set_rows(Node *clause);
+extern double expression_returns_set_rows(PlannerInfo *root, Node *clause);
  
  extern bool contain_subplans(Node *clause);
  
diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h

index 40f70f9f2b1faafeb0a5ff34646750d550f358fe..c337f047cb753b2dfbc57378f46b7958aeaa3dc6 100644 (file)
--- a/src/include/optimizer/plancat.h
+++ b/src/include/optimizer/plancat.h
@@ -55,6 +55,20 @@ extern Selectivity join_selectivity(PlannerInfo *root,
                  JoinType jointype,
                  SpecialJoinInfo *sjinfo);
  
+extern Selectivity function_selectivity(PlannerInfo *root,
+                    Oid funcid,
+                    List *args,
+                    Oid inputcollid,
+                    bool is_join,
+                    int varRelid,
+                    JoinType jointype,
+                    SpecialJoinInfo *sjinfo);
+
+extern void add_function_cost(PlannerInfo *root, Oid funcid, Node *node,
+                 QualCost *cost);
+
+extern double get_function_rows(PlannerInfo *root, Oid funcid, Node *node);
+
  extern bool has_row_triggers(PlannerInfo *root, Index rti, CmdType event);
  
  #endif                         /* PLANCAT_H */
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h

index ceec85db9254b1bf942b8a2d5362e150cf0d96e4..16b0b1d2dccaee89002974f42744514c9d3d5f31 100644 (file)
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -120,8 +120,7 @@ extern char func_volatile(Oid funcid);
  extern char func_parallel(Oid funcid);
  extern char get_func_prokind(Oid funcid);
  extern bool get_func_leakproof(Oid funcid);
-extern float4 get_func_cost(Oid funcid);
-extern float4 get_func_rows(Oid funcid);
+extern RegProcedure get_func_support(Oid funcid);
  extern Oid get_relname_relid(const char *relname, Oid relnamespace);
  extern char *get_rel_name(Oid relid);
  extern Oid get_rel_namespace(Oid relid);
diff --git a/src/test/regress/expected/misc_functions.out b/src/test/regress/expected/misc_functions.out

index 130a0e4be3abf5d2ca3f7500fbb5b33305804f91..0879c885eb33e13f42612ac0fc0a22a675a5c429 100644 (file)
--- a/src/test/regress/expected/misc_functions.out
+++ b/src/test/regress/expected/misc_functions.out
@@ -133,3 +133,63 @@ ERROR:  function num_nulls() does not exist
  LINE 1: SELECT num_nulls();
                 ^
  HINT:  No function matches the given name and argument types. You might need to add explicit type casts.
+--
+-- Test adding a support function to a subject function
+--
+CREATE FUNCTION my_int_eq(int, int) RETURNS bool
+  LANGUAGE internal STRICT IMMUTABLE PARALLEL SAFE
+  AS $$int4eq$$;
+-- By default, planner does not think that's selective
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1 a JOIN tenk1 b ON a.unique1 = b.unique1
+WHERE my_int_eq(a.unique2, 42);
+                  QUERY PLAN                  
+----------------------------------------------
+ Hash Join
+   Hash Cond: (b.unique1 = a.unique1)
+   ->  Seq Scan on tenk1 b
+   ->  Hash
+         ->  Seq Scan on tenk1 a
+               Filter: my_int_eq(unique2, 42)
+(6 rows)
+
+-- With support function that knows it's int4eq, we get a different plan
+ALTER FUNCTION my_int_eq(int, int) SUPPORT test_support_func;
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1 a JOIN tenk1 b ON a.unique1 = b.unique1
+WHERE my_int_eq(a.unique2, 42);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop
+   ->  Seq Scan on tenk1 a
+         Filter: my_int_eq(unique2, 42)
+   ->  Index Scan using tenk1_unique1 on tenk1 b
+         Index Cond: (unique1 = a.unique1)
+(5 rows)
+
+-- Also test non-default rowcount estimate
+CREATE FUNCTION my_gen_series(int, int) RETURNS SETOF integer
+  LANGUAGE internal STRICT IMMUTABLE PARALLEL SAFE
+  AS $$generate_series_int4$$
+  SUPPORT test_support_func;
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1 a JOIN my_gen_series(1,1000) g ON a.unique1 = g;
+               QUERY PLAN               
+----------------------------------------
+ Hash Join
+   Hash Cond: (g.g = a.unique1)
+   ->  Function Scan on my_gen_series g
+   ->  Hash
+         ->  Seq Scan on tenk1 a
+(5 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g;
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop
+   ->  Function Scan on my_gen_series g
+   ->  Index Scan using tenk1_unique1 on tenk1 a
+         Index Cond: (unique1 = g.g)
+(4 rows)
+
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out

index a54b4a5a7c8154d49e09186f2ab5e7d1963fd4f0..6e238e88b37956e0688bae07c1c51f9fb3d6ade3 100644 (file)
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -904,7 +904,7 @@ select * from int4_tbl where
  --
  explain (verbose, costs off)
  select * from int4_tbl o where (f1, f1) in
-  (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
+  (select f1, generate_series(1,50) / 10 g from int4_tbl i group by f1);
                              QUERY PLAN                             
  -------------------------------------------------------------------
   Nested Loop Semi Join
@@ -918,9 +918,9 @@ select * from int4_tbl o where (f1, f1) in
                 Output: "ANY_subquery".f1, "ANY_subquery".g
                 Filter: ("ANY_subquery".f1 = "ANY_subquery".g)
                 ->  Result
-                     Output: i.f1, ((generate_series(1, 2)) / 10)
+                     Output: i.f1, ((generate_series(1, 50)) / 10)
                       ->  ProjectSet
-                           Output: generate_series(1, 2), i.f1
+                           Output: generate_series(1, 50), i.f1
                             ->  HashAggregate
                                   Output: i.f1
                                   Group Key: i.f1
@@ -929,7 +929,7 @@ select * from int4_tbl o where (f1, f1) in
  (19 rows)
  
  select * from int4_tbl o where (f1, f1) in
-  (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
+  (select f1, generate_series(1,50) / 10 g from int4_tbl i group by f1);
   f1 
  ----
    0
diff --git a/src/test/regress/input/create_function_1.source b/src/test/regress/input/create_function_1.source

index 26e2227d3afed45cd28622b424a2b77643d90375..223454a5eabc6fcfcf960a1654e86906d5325fe5 100644 (file)
--- a/src/test/regress/input/create_function_1.source
+++ b/src/test/regress/input/create_function_1.source
@@ -68,6 +68,11 @@ CREATE FUNCTION test_fdw_handler()
      AS '@libdir@/regress@DLSUFFIX@', 'test_fdw_handler'
      LANGUAGE C;
  
+CREATE FUNCTION test_support_func(internal)
+    RETURNS internal
+    AS '@libdir@/regress@DLSUFFIX@', 'test_support_func'
+    LANGUAGE C STRICT;
+
  -- Things that shouldn't work:
  
  CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL
diff --git a/src/test/regress/output/create_function_1.source b/src/test/regress/output/create_function_1.source

index 8c50d9b30992b347f49165d41c9c5b5012fd1afc..5f43e8de81ffea50061742695eefa83ba1fcbb4e 100644 (file)
--- a/src/test/regress/output/create_function_1.source
+++ b/src/test/regress/output/create_function_1.source
@@ -60,6 +60,10 @@ CREATE FUNCTION test_fdw_handler()
      RETURNS fdw_handler
      AS '@libdir@/regress@DLSUFFIX@', 'test_fdw_handler'
      LANGUAGE C;
+CREATE FUNCTION test_support_func(internal)
+    RETURNS internal
+    AS '@libdir@/regress@DLSUFFIX@', 'test_support_func'
+    LANGUAGE C STRICT;
  -- Things that shouldn't work:
  CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL
      AS 'SELECT ''not an integer'';';
diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c

index 70727286ca5616f2efd214233ec951fc2515432b..ad3e8038993e3f8c6baa016520db412755ff7979 100644 (file)
--- a/src/test/regress/regress.c
+++ b/src/test/regress/regress.c
@@ -23,12 +23,16 @@
  #include "access/transam.h"
  #include "access/tuptoaster.h"
  #include "access/xact.h"
+#include "catalog/pg_operator.h"
  #include "catalog/pg_type.h"
  #include "commands/sequence.h"
  #include "commands/trigger.h"
  #include "executor/executor.h"
  #include "executor/spi.h"
  #include "miscadmin.h"
+#include "nodes/supportnodes.h"
+#include "optimizer/optimizer.h"
+#include "optimizer/plancat.h"
  #include "port/atomics.h"
  #include "utils/builtins.h"
  #include "utils/geo_decls.h"
@@ -863,3 +867,76 @@ test_fdw_handler(PG_FUNCTION_ARGS)
     elog(ERROR, "test_fdw_handler is not implemented");
     PG_RETURN_NULL();
  }
+
+PG_FUNCTION_INFO_V1(test_support_func);
+Datum
+test_support_func(PG_FUNCTION_ARGS)
+{
+   Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
+   Node       *ret = NULL;
+
+   if (IsA(rawreq, SupportRequestSelectivity))
+   {
+       /*
+        * Assume that the target is int4eq; that's safe as long as we don't
+        * attach this to any other boolean-returning function.
+        */
+       SupportRequestSelectivity *req = (SupportRequestSelectivity *) rawreq;
+       Selectivity s1;
+
+       if (req->is_join)
+           s1 = join_selectivity(req->root, Int4EqualOperator,
+                                 req->args,
+                                 req->inputcollid,
+                                 req->jointype,
+                                 req->sjinfo);
+       else
+           s1 = restriction_selectivity(req->root, Int4EqualOperator,
+                                        req->args,
+                                        req->inputcollid,
+                                        req->varRelid);
+
+       req->selectivity = s1;
+       ret = (Node *) req;
+   }
+
+   if (IsA(rawreq, SupportRequestCost))
+   {
+       /* Provide some generic estimate */
+       SupportRequestCost *req = (SupportRequestCost *) rawreq;
+
+       req->startup = 0;
+       req->per_tuple = 2 * cpu_operator_cost;
+       ret = (Node *) req;
+   }
+
+   if (IsA(rawreq, SupportRequestRows))
+   {
+       /*
+        * Assume that the target is generate_series_int4; that's safe as long
+        * as we don't attach this to any other set-returning function.
+        */
+       SupportRequestRows *req = (SupportRequestRows *) rawreq;
+
+       if (req->node && IsA(req->node, FuncExpr))  /* be paranoid */
+       {
+           List       *args = ((FuncExpr *) req->node)->args;
+           Node       *arg1 = linitial(args);
+           Node       *arg2 = lsecond(args);
+
+           if (IsA(arg1, Const) &&
+               !((Const *) arg1)->constisnull &&
+               IsA(arg2, Const) &&
+               !((Const *) arg2)->constisnull)
+           {
+               int32       val1 = DatumGetInt32(((Const *) arg1)->constvalue);
+               int32       val2 = DatumGetInt32(((Const *) arg2)->constvalue);
+
+               req->rows = val2 - val1 + 1;
+               ret = (Node *) req;
+           }
+       }
+   }
+
+   PG_RETURN_POINTER(ret);
+}
diff --git a/src/test/regress/sql/misc_functions.sql b/src/test/regress/sql/misc_functions.sql

index 1a20c1f76527f9b06ea86f839bc04fa4382a9cec..7a71f7659ce85499c2c0914ca97e1ef1a83e6c64 100644 (file)
--- a/src/test/regress/sql/misc_functions.sql
+++ b/src/test/regress/sql/misc_functions.sql
@@ -29,3 +29,35 @@ SELECT num_nulls(VARIADIC '{}'::int[]);
  -- should fail, one or more arguments is required
  SELECT num_nonnulls();
  SELECT num_nulls();
+
+--
+-- Test adding a support function to a subject function
+--
+
+CREATE FUNCTION my_int_eq(int, int) RETURNS bool
+  LANGUAGE internal STRICT IMMUTABLE PARALLEL SAFE
+  AS $$int4eq$$;
+
+-- By default, planner does not think that's selective
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1 a JOIN tenk1 b ON a.unique1 = b.unique1
+WHERE my_int_eq(a.unique2, 42);
+
+-- With support function that knows it's int4eq, we get a different plan
+ALTER FUNCTION my_int_eq(int, int) SUPPORT test_support_func;
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1 a JOIN tenk1 b ON a.unique1 = b.unique1
+WHERE my_int_eq(a.unique2, 42);
+
+-- Also test non-default rowcount estimate
+CREATE FUNCTION my_gen_series(int, int) RETURNS SETOF integer
+  LANGUAGE internal STRICT IMMUTABLE PARALLEL SAFE
+  AS $$generate_series_int4$$
+  SUPPORT test_support_func;
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1 a JOIN my_gen_series(1,1000) g ON a.unique1 = g;
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g;
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql

index 843f511b3dc92201f26b2f4386a64601680df948..ccbe8a1df5ddf4fb1c262680b71d2969888f67e7 100644 (file)
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -498,9 +498,9 @@ select * from int4_tbl where
  --
  explain (verbose, costs off)
  select * from int4_tbl o where (f1, f1) in
-  (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
+  (select f1, generate_series(1,50) / 10 g from int4_tbl i group by f1);
  select * from int4_tbl o where (f1, f1) in
-  (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
+  (select f1, generate_series(1,50) / 10 g from int4_tbl i group by f1);
  
  --
  -- check for over-optimization of whole-row Var referencing an Append plan
author	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 9 Feb 2019 23:32:23 +0000 (18:32 -0500)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 9 Feb 2019 23:32:23 +0000 (18:32 -0500)
contrib/postgres_fdw/postgres_fdw.c		patch \| blob \| blame \| history
doc/src/sgml/xfunc.sgml		patch \| blob \| blame \| history
src/backend/optimizer/path/clausesel.c		patch \| blob \| blame \| history
src/backend/optimizer/path/costsize.c		patch \| blob \| blame \| history
src/backend/optimizer/util/clauses.c		patch \| blob \| blame \| history
src/backend/optimizer/util/pathnode.c		patch \| blob \| blame \| history
src/backend/optimizer/util/plancat.c		patch \| blob \| blame \| history
src/backend/utils/adt/arrayfuncs.c		patch \| blob \| blame \| history
src/backend/utils/adt/int.c		patch \| blob \| blame \| history
src/backend/utils/adt/int8.c		patch \| blob \| blame \| history
src/backend/utils/adt/selfuncs.c		patch \| blob \| blame \| history
src/backend/utils/cache/lsyscache.c		patch \| blob \| blame \| history
src/include/catalog/catversion.h		patch \| blob \| blame \| history
src/include/catalog/pg_proc.dat		patch \| blob \| blame \| history
src/include/nodes/nodes.h		patch \| blob \| blame \| history
src/include/nodes/pathnodes.h		patch \| blob \| blame \| history
src/include/nodes/supportnodes.h		patch \| blob \| blame \| history
src/include/optimizer/clauses.h		patch \| blob \| blame \| history
src/include/optimizer/plancat.h		patch \| blob \| blame \| history
src/include/utils/lsyscache.h		patch \| blob \| blame \| history
src/test/regress/expected/misc_functions.out		patch \| blob \| blame \| history
src/test/regress/expected/subselect.out		patch \| blob \| blame \| history
src/test/regress/input/create_function_1.source		patch \| blob \| blame \| history
src/test/regress/output/create_function_1.source		patch \| blob \| blame \| history
src/test/regress/regress.c		patch \| blob \| blame \| history
src/test/regress/sql/misc_functions.sql		patch \| blob \| blame \| history
src/test/regress/sql/subselect.sql		patch \| blob \| blame \| history