Be more tense about not creating tuplestores with randomAccess = true unless
authorTom Lane <tgl@sss.pgh.pa.us>
Wed, 29 Oct 2008 00:00:39 +0000 (00:00 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Wed, 29 Oct 2008 00:00:39 +0000 (00:00 +0000)
backwards scan could actually happen.  In particular, pass a flag to
materialize-mode SRFs that tells them whether they need to require random
access.  In passing, also suppress unneeded backward-scan overhead for a
Portal's holdStore tuplestore.  Per my proposal about reducing I/O costs for
tuplestores.

contrib/tablefunc/tablefunc.c
contrib/xml2/xpath.c
src/backend/commands/prepare.c
src/backend/executor/execQual.c
src/backend/executor/nodeFunctionscan.c
src/backend/utils/fmgr/README
src/backend/utils/mmgr/portalmem.c
src/include/executor/executor.h
src/include/nodes/execnodes.h
src/pl/plperl/plperl.c
src/pl/plpgsql/src/pl_exec.c

index f0ac5e8e4fba676efdd851f174218f5de7f481ea..42e98f7b5245d21e57e7464f12b5e28dfd644494 100644 (file)
@@ -51,7 +51,8 @@ static HTAB *load_categories_hash(char *cats_sql, MemoryContext per_query_ctx);
 static Tuplestorestate *get_crosstab_tuplestore(char *sql,
                                                HTAB *crosstab_hash,
                                                TupleDesc tupdesc,
-                                               MemoryContext per_query_ctx);
+                                               MemoryContext per_query_ctx,
+                                               bool randomAccess);
 static void validateConnectbyTupleDesc(TupleDesc tupdesc, bool show_branch, bool show_serial);
 static bool compatCrosstabTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2);
 static bool compatConnectbyTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2);
@@ -66,6 +67,7 @@ static Tuplestorestate *connectby(char *relname,
                  bool show_branch,
                  bool show_serial,
                  MemoryContext per_query_ctx,
+                 bool randomAccess,
                  AttInMetadata *attinmeta);
 static Tuplestorestate *build_tuplestore_recursively(char *key_fld,
                                                         char *parent_key_fld,
@@ -745,7 +747,8 @@ crosstab_hash(PG_FUNCTION_ARGS)
        rsinfo->setResult = get_crosstab_tuplestore(sql,
                                                                                                crosstab_hash,
                                                                                                tupdesc,
-                                                                                               per_query_ctx);
+                                                                                               per_query_ctx,
+                                                                                               rsinfo->allowedModes & SFRM_Materialize_Random);
 
        /*
         * SFRM_Materialize mode expects us to return a NULL Datum. The actual
@@ -852,7 +855,8 @@ static Tuplestorestate *
 get_crosstab_tuplestore(char *sql,
                                                HTAB *crosstab_hash,
                                                TupleDesc tupdesc,
-                                               MemoryContext per_query_ctx)
+                                               MemoryContext per_query_ctx,
+                                               bool randomAccess)
 {
        Tuplestorestate *tupstore;
        int                     num_categories = hash_get_num_entries(crosstab_hash);
@@ -863,8 +867,8 @@ get_crosstab_tuplestore(char *sql,
        int                     proc;
        MemoryContext SPIcontext;
 
-       /* initialize our tuplestore */
-       tupstore = tuplestore_begin_heap(true, false, work_mem);
+       /* initialize our tuplestore (while still in query context!) */
+       tupstore = tuplestore_begin_heap(randomAccess, false, work_mem);
 
        /* Connect to SPI manager */
        if ((ret = SPI_connect()) < 0)
@@ -1113,6 +1117,7 @@ connectby_text(PG_FUNCTION_ARGS)
                                                                  show_branch,
                                                                  show_serial,
                                                                  per_query_ctx,
+                                                                 rsinfo->allowedModes & SFRM_Materialize_Random,
                                                                  attinmeta);
        rsinfo->setDesc = tupdesc;
 
@@ -1192,6 +1197,7 @@ connectby_text_serial(PG_FUNCTION_ARGS)
                                                                  show_branch,
                                                                  show_serial,
                                                                  per_query_ctx,
+                                                                 rsinfo->allowedModes & SFRM_Materialize_Random,
                                                                  attinmeta);
        rsinfo->setDesc = tupdesc;
 
@@ -1222,6 +1228,7 @@ connectby(char *relname,
                  bool show_branch,
                  bool show_serial,
                  MemoryContext per_query_ctx,
+                 bool randomAccess,
                  AttInMetadata *attinmeta)
 {
        Tuplestorestate *tupstore = NULL;
@@ -1239,7 +1246,7 @@ connectby(char *relname,
        oldcontext = MemoryContextSwitchTo(per_query_ctx);
 
        /* initialize our tuplestore */
-       tupstore = tuplestore_begin_heap(true, false, work_mem);
+       tupstore = tuplestore_begin_heap(randomAccess, false, work_mem);
 
        MemoryContextSwitchTo(oldcontext);
 
index 2cd1d93fcd3cceebee66113ab2f13e20a2ab667a..6a7c64afbdd247aebd4559d560ace6631b0f1038 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * $PostgreSQL:
+ * $PostgreSQL$ 
  *
  * Parser interface for DOM-based parser (libxml) rather than
    stream-based SAX-type parser */
@@ -688,7 +688,9 @@ xpath_table(PG_FUNCTION_ARGS)
         * Create the tuplestore - work_mem is the max in-memory size before a
         * file is created on disk to hold it.
         */
-       tupstore = tuplestore_begin_heap(true, false, work_mem);
+       tupstore =
+               tuplestore_begin_heap(rsinfo->allowedModes & SFRM_Materialize_Random,
+                                                         false, work_mem);
 
        MemoryContextSwitchTo(oldcontext);
 
index 144b05787149e4452321a01fed158f1cf7063f09..df1c439de7f7d5fca4a892b10fc4bb135684b35d 100644 (file)
@@ -766,7 +766,9 @@ pg_prepared_statement(PG_FUNCTION_ARGS)
         * We put all the tuples into a tuplestore in one scan of the hashtable.
         * This avoids any issue of the hashtable possibly changing between calls.
         */
-       tupstore = tuplestore_begin_heap(true, false, work_mem);
+       tupstore =
+               tuplestore_begin_heap(rsinfo->allowedModes & SFRM_Materialize_Random,
+                                                         false, work_mem);
 
        /* hash table might be uninitialized */
        if (prepared_queries)
index 931efed4a3e407e0aee874ea675ec264a034236b..4c4742dfe5aacb67f0268c6d3cb0f8a41e58fad2 100644 (file)
@@ -1429,6 +1429,7 @@ restart:
                rsinfo.econtext = econtext;
                rsinfo.expectedDesc = fcache->funcResultDesc;
                rsinfo.allowedModes = (int) (SFRM_ValuePerCall | SFRM_Materialize);
+               /* note we do not set SFRM_Materialize_Random */
                rsinfo.returnMode = SFRM_ValuePerCall;
                /* isDone is filled below */
                rsinfo.setResult = NULL;
@@ -1702,7 +1703,8 @@ ExecMakeFunctionResultNoSets(FuncExprState *fcache,
 Tuplestorestate *
 ExecMakeTableFunctionResult(ExprState *funcexpr,
                                                        ExprContext *econtext,
-                                                       TupleDesc expectedDesc)
+                                                       TupleDesc expectedDesc,
+                                                       bool randomAccess)
 {
        Tuplestorestate *tupstore = NULL;
        TupleDesc       tupdesc = NULL;
@@ -1736,6 +1738,8 @@ ExecMakeTableFunctionResult(ExprState *funcexpr,
        rsinfo.econtext = econtext;
        rsinfo.expectedDesc = expectedDesc;
        rsinfo.allowedModes = (int) (SFRM_ValuePerCall | SFRM_Materialize);
+       if (randomAccess)
+               rsinfo.allowedModes |= (int) SFRM_Materialize_Random;
        rsinfo.returnMode = SFRM_ValuePerCall;
        /* isDone is filled below */
        rsinfo.setResult = NULL;
@@ -1909,7 +1913,7 @@ ExecMakeTableFunctionResult(ExprState *funcexpr,
                                                                           -1,
                                                                           0);
                                }
-                               tupstore = tuplestore_begin_heap(true, false, work_mem);
+                               tupstore = tuplestore_begin_heap(randomAccess, false, work_mem);
                                MemoryContextSwitchTo(oldcontext);
                                rsinfo.setResult = tupstore;
                                rsinfo.setDesc = tupdesc;
@@ -1976,7 +1980,7 @@ no_function_result:
        if (rsinfo.setResult == NULL)
        {
                MemoryContextSwitchTo(econtext->ecxt_per_query_memory);
-               tupstore = tuplestore_begin_heap(true, false, work_mem);
+               tupstore = tuplestore_begin_heap(randomAccess, false, work_mem);
                rsinfo.setResult = tupstore;
                if (!returnsSet)
                {
index c93865f0223325c615d61675a1c453f1b066d55a..1e5086f3793e992c1ac3e2f1bbec362c30bd4bab 100644 (file)
@@ -64,7 +64,8 @@ FunctionNext(FunctionScanState *node)
                node->tuplestorestate = tuplestorestate =
                        ExecMakeTableFunctionResult(node->funcexpr,
                                                                                node->ss.ps.ps_ExprContext,
-                                                                               node->tupdesc);
+                                                                               node->tupdesc,
+                                                                               node->eflags & EXEC_FLAG_BACKWARD);
        }
 
        /*
@@ -123,6 +124,7 @@ ExecInitFunctionScan(FunctionScan *node, EState *estate, int eflags)
        scanstate = makeNode(FunctionScanState);
        scanstate->ss.ps.plan = (Plan *) node;
        scanstate->ss.ps.state = estate;
+       scanstate->eflags = eflags;
 
        /*
         * Miscellaneous initialization
index 112c2dce9a8e4feb9f24cbac075b3ef64cee9e43..26593c93ba23d2b2c75b2ecacfb066172d991c36 100644 (file)
@@ -432,6 +432,10 @@ function is called in).  The function stores pointers to the Tuplestore and
 TupleDesc into ReturnSetInfo, sets returnMode to indicate materialize mode,
 and returns null.  isDone is not used and should be left at ExprSingleResult.
 
+The Tuplestore must be created with randomAccess = true if
+SFRM_Materialize_Random is set in allowedModes, but it can (and preferably
+should) be created with randomAccess = false if not.
+
 If available, the expected tuple descriptor is passed in ReturnSetInfo;
 in other contexts the expectedDesc field will be NULL.  The function need
 not pay attention to expectedDesc, but it may be useful in special cases.
index 5184bbc360045d05be002755f2259a363388565b..d9b4ef061c76b095d30f03096b8963b7de92861a 100644 (file)
@@ -354,11 +354,17 @@ PortalCreateHoldStore(Portal portal)
                                                          ALLOCSET_DEFAULT_INITSIZE,
                                                          ALLOCSET_DEFAULT_MAXSIZE);
 
-       /* Create the tuple store, selecting cross-transaction temp files. */
+       /*
+        * Create the tuple store, selecting cross-transaction temp files, and
+        * enabling random access only if cursor requires scrolling.
+        *
+        * XXX: Should maintenance_work_mem be used for the portal size?
+        */
        oldcxt = MemoryContextSwitchTo(portal->holdContext);
 
-       /* XXX: Should maintenance_work_mem be used for the portal size? */
-       portal->holdStore = tuplestore_begin_heap(true, true, work_mem);
+       portal->holdStore =
+               tuplestore_begin_heap(portal->cursorOptions & CURSOR_OPT_SCROLL,
+                                                         true, work_mem);
 
        MemoryContextSwitchTo(oldcxt);
 }
@@ -913,7 +919,9 @@ pg_cursor(PG_FUNCTION_ARGS)
         * We put all the tuples into a tuplestore in one scan of the hashtable.
         * This avoids any issue of the hashtable possibly changing between calls.
         */
-       tupstore = tuplestore_begin_heap(true, false, work_mem);
+       tupstore =
+               tuplestore_begin_heap(rsinfo->allowedModes & SFRM_Materialize_Random,
+                                                         false, work_mem);
 
        hash_seq_init(&hash_seq, PortalHashTable);
        while ((hentry = hash_seq_search(&hash_seq)) != NULL)
index d1e97daa8ef9213fbff9d43f8a9c53e95c924440..1078a780fc450d5d567e7ed1c3148963cf903a97 100644 (file)
@@ -178,7 +178,8 @@ extern Datum GetAttributeByName(HeapTupleHeader tuple, const char *attname,
                                   bool *isNull);
 extern Tuplestorestate *ExecMakeTableFunctionResult(ExprState *funcexpr,
                                                        ExprContext *econtext,
-                                                       TupleDesc expectedDesc);
+                                                       TupleDesc expectedDesc,
+                                                       bool randomAccess);
 extern Datum ExecEvalExprSwitchContext(ExprState *expression, ExprContext *econtext,
                                                  bool *isNull, ExprDoneCond *isDone);
 extern ExprState *ExecInitExpr(Expr *node, PlanState *parent);
index 04dee5dd479ffec627dc31e48ee6f54cbb51e941..a4065d7880ebf562e33a74c4d2ea52dcead59586 100644 (file)
@@ -156,7 +156,8 @@ typedef enum
 typedef enum
 {
        SFRM_ValuePerCall = 0x01,       /* one value returned per call */
-       SFRM_Materialize = 0x02         /* result set instantiated in Tuplestore */
+       SFRM_Materialize = 0x02,        /* result set instantiated in Tuplestore */
+       SFRM_Materialize_Random = 0x04          /* Tuplestore needs randomAccess */
 } SetFunctionReturnMode;
 
 /*
@@ -1180,6 +1181,7 @@ typedef struct SubqueryScanState
  *             Function nodes are used to scan the results of a
  *             function appearing in FROM (typically a function returning set).
  *
+ *             eflags                          node's capability flags
  *             tupdesc                         expected return tuple description
  *             tuplestorestate         private state of tuplestore.c
  *             funcexpr                        state for function expression being evaluated
@@ -1188,6 +1190,7 @@ typedef struct SubqueryScanState
 typedef struct FunctionScanState
 {
        ScanState       ss;                             /* its first field is NodeTag */
+       int                     eflags;
        TupleDesc       tupdesc;
        Tuplestorestate *tuplestorestate;
        ExprState  *funcexpr;
index 43044aa7c11d2e6cb034d7d830649598573691d3..9dc184e52ac4c0c931da7e05577c2a5c13a02608 100644 (file)
@@ -1922,7 +1922,8 @@ plperl_return_next(SV *sv)
 
                current_call_data->ret_tdesc = CreateTupleDescCopy(tupdesc);
                current_call_data->tuple_store =
-                       tuplestore_begin_heap(true, false, work_mem);
+                       tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random,
+                                                                 false, work_mem);
                if (prodesc->fn_retistuple)
                {
                        current_call_data->attinmeta =
index 7a3a2334e142c92fa32e07ae4ec22421ae4d0ac9..ed2655ab23eac5b4dcb5ef8edfffda3f9a8f6822 100644 (file)
@@ -2357,7 +2357,9 @@ exec_init_tuple_store(PLpgSQL_execstate *estate)
        estate->tuple_store_cxt = rsi->econtext->ecxt_per_query_memory;
 
        oldcxt = MemoryContextSwitchTo(estate->tuple_store_cxt);
-       estate->tuple_store = tuplestore_begin_heap(true, false, work_mem);
+       estate->tuple_store =
+               tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random,
+                                                         false, work_mem);
        MemoryContextSwitchTo(oldcxt);
 
        estate->rettupdesc = rsi->expectedDesc;