Fix internal transaction handling bug in snapshot isolation mode.
authorTatsuo Ishii <ishii@sraoss.co.jp>
Tue, 31 May 2022 10:46:16 +0000 (19:46 +0900)
committerTatsuo Ishii <ishii@sraoss.co.jp>
Tue, 31 May 2022 10:46:16 +0000 (19:46 +0900)
When SELECT is executed in snapshot isolation mode, it is surrounded
in an "internal transaction".  For example if there are 3 backends,
each backend starts an internal transaction with "BEGIN" command.
However, there was an oversight in end_internal_transaction(), which
is responsible for closing the transaction. It only closed the
transaction on "load balance node". This causes a problem with certain
query following SELECT, for example VACUUM. Suppose the SELECT is
executed on backend node 2, then transaction on node 0 and node 1 were
not closed. If VACUUM is executed on node 0, it fails because the
transaction is not closed on node 0 (remember, VACUUM cannot be
executed in an explicit transaction).

Also add a test case for this to the 030.snapshot_isolation test.

src/protocol/pool_process_query.c
src/test/regression/tests/030.snapshot_isolation/expected.txt [new file with mode: 0644]
src/test/regression/tests/030.snapshot_isolation/test.sh

index 4985554c005d57a1e7ae7702b0ac0e558ee3e2eb..3e9fd0d1854d9fca547488322de672be6ab4db46 100644 (file)
@@ -4108,7 +4108,14 @@ start_internal_transaction(POOL_CONNECTION * frontend, POOL_CONNECTION_POOL * ba
 }
 
 /*
- * End internal transaction.
+ * End internal transaction.  Called by ReadyForQuery(), assuming that the
+ * ReadyForQuery packet kind has been already eaten by the caller and the
+ * query in progress flag is set.  At returning, the ReadyForQuery packet
+ * length and the transaction state should be left in the backend buffers
+ * EXCEPT for backends that do not satisfy VALID_BACKEND macro. This is
+ * required because the caller later on calls pool_message_length() wich
+ * retrieves the packet length and the transaction state from the backends
+ * that satify VALID_BACKEND macro.
  */
 POOL_STATUS
 end_internal_transaction(POOL_CONNECTION * frontend, POOL_CONNECTION_POOL * backend)
@@ -4129,13 +4136,16 @@ end_internal_transaction(POOL_CONNECTION * frontend, POOL_CONNECTION_POOL * back
                /* We need to commit from secondary to primary. */
                for (i = 0; i < NUM_BACKENDS; i++)
                {
-                       if (VALID_BACKEND(i) && !IS_MAIN_NODE_ID(i) &&
+                       if (VALID_BACKEND_RAW(i) && !IS_MAIN_NODE_ID(i) &&
                                INTERNAL_TRANSACTION_STARTED(backend, i))
                        {
-                               if (MAJOR(backend) == PROTO_MAJOR_V3)
+                               if (MAJOR(backend) == PROTO_MAJOR_V3 && VALID_BACKEND(i))
                                {
                                        /*
-                                        * Skip rest of Ready for Query packet
+                                        * Skip rest of Ready for Query packet for backends
+                                        * satisfying VALID_BACKEND macro because they should have
+                                        * been already received the data, which is not good for
+                                        * do_command().
                                         */
                                        pool_read(CONNECTION(backend, i), &len, sizeof(len));
                                        pool_read(CONNECTION(backend, i), &tstate, sizeof(tstate));
@@ -4160,16 +4170,29 @@ end_internal_transaction(POOL_CONNECTION * frontend, POOL_CONNECTION_POOL * back
                                PG_END_TRY();
 
                                INTERNAL_TRANSACTION_STARTED(backend, i) = false;
+
+                               if (MAJOR(backend) == PROTO_MAJOR_V3 && !VALID_BACKEND(i))
+                               {
+                                       /*
+                                        * Skip rest of Ready for Query packet for the backend
+                                        * that does not satisfy VALID_BACKEND.
+                                        */
+                                       pool_read(CONNECTION(backend, i), &len, sizeof(len));
+                                       pool_read(CONNECTION(backend, i), &tstate, sizeof(tstate));
+                               }
                        }
                }
 
                /* Commit on main */
                if (INTERNAL_TRANSACTION_STARTED(backend, MAIN_NODE_ID))
                {
-                       if (MAJOR(backend) == PROTO_MAJOR_V3)
+                       if (MAJOR(backend) == PROTO_MAJOR_V3 && VALID_BACKEND(MAIN_NODE_ID))
                        {
                                /*
-                                * Skip rest of Ready for Query packet
+                                * Skip rest of Ready for Query packet for backends
+                                * satisfying VALID_BACKEND macro because they should have
+                                * been already received the data, which is not good for
+                                * do_command().
                                 */
                                pool_read(CONNECTION(backend, MAIN_NODE_ID), &len, sizeof(len));
                                pool_read(CONNECTION(backend, MAIN_NODE_ID), &tstate, sizeof(tstate));
@@ -4193,6 +4216,16 @@ end_internal_transaction(POOL_CONNECTION * frontend, POOL_CONNECTION_POOL * back
                        }
                        PG_END_TRY();
                        INTERNAL_TRANSACTION_STARTED(backend, MAIN_NODE_ID) = false;
+
+                       if (MAJOR(backend) == PROTO_MAJOR_V3 && !VALID_BACKEND(MAIN_NODE_ID))
+                       {
+                               /*
+                                * Skip rest of Ready for Query packet for the backend
+                                * that does not satisfy VALID_BACKEND.
+                                */
+                               pool_read(CONNECTION(backend, MAIN_NODE_ID), &len, sizeof(len));
+                               pool_read(CONNECTION(backend, MAIN_NODE_ID), &tstate, sizeof(tstate));
+                       }
                }
        }
        PG_CATCH();
diff --git a/src/test/regression/tests/030.snapshot_isolation/expected.txt b/src/test/regression/tests/030.snapshot_isolation/expected.txt
new file mode 100644 (file)
index 0000000..a2a0d7f
--- /dev/null
@@ -0,0 +1,6 @@
+ count 
+-------
+     1
+(1 row)
+
+VACUUM
index e3d1388bcdf4eccdbee70f2266e61beeb2a8dbb2..b2151bc29a3a1311147f67380a6dd738af877008 100755 (executable)
@@ -88,6 +88,20 @@ if [ $? != 0 ];then
 fi
 echo "Transaction results are consistent (extended query)."
 
+# Test #2. VACUUM after SELECT.
+
+psql test > results.txt 2>&1 <<EOF
+SELECT count(*) FROM t1;
+VACUUM t1;
+EOF
+
+cmp ../expected.txt results.txt >/dev/null
+if [ $? != 0 ];then
+    echo "test #2 failed."
+    ./shutdownall
+    exit 1
+fi
+
 ./shutdownall
 
 exit 0