Fix occasional hang in COPY FROM.
authorTatsuo Ishii <ishii@sraoss.co.jp>
Thu, 16 Sep 2021 06:44:51 +0000 (15:44 +0900)
committerTatsuo Ishii <ishii@sraoss.co.jp>
Thu, 16 Sep 2021 06:52:36 +0000 (15:52 +0900)
If an error occurs while doing COPY FROM, it was possible the
Pgpool-II waited forever for a response from backend after COPY end
marker was sent from frontend. Pgpool expected a new message arrives
to socket, but it is possible that the message (in this case an error
message) is already in the backend read buffer. The fix is, check the
buffer is empty or not before reading from the socket.
New test case (07.copy_hang) is also added.

The bug was found by Bo Peng.

src/protocol/pool_proto_modules.c
src/test/regression/tests/076.copy_hang/test.sh [new file with mode: 0755]

index f1111b0c80e3776fba86012f70eb5a15e861b510..f95a49d8053e3f3774f74927928c2b4aa2c1cdb6 100644 (file)
@@ -57,6 +57,7 @@
 #include "utils/pool_stream.h"
 #include "utils/ps_status.h"
 #include "utils/pool_signal.h"
+#include "utils/pool_ssl.h"
 #include "utils/palloc.h"
 #include "utils/memutils.h"
 #include "query_cache/pool_memqcache.h"
@@ -3306,6 +3307,9 @@ CopyDataRows(POOL_CONNECTION * frontend,
                }
        }
 
+       /*
+        * Wait till backend responds
+        */
        if (copyin)
        {
                for (i = 0; i < NUM_BACKENDS; i++)
@@ -3314,7 +3318,15 @@ CopyDataRows(POOL_CONNECTION * frontend,
                        {
                                pool_flush(CONNECTION(backend, i));
 
-                               if (synchronize(CONNECTION(backend, i)))
+                               /*
+                                * Check response from the backend.  First check SSL and read
+                                * buffer of the backend. It is possible that there's an error
+                                * message in the buffer if the COPY command went wrong.
+                                * Otherwise wait for data arrival to the backend socket.
+                                */
+                               if (!pool_ssl_pending(CONNECTION(backend, i)) &&
+                                       pool_read_buffer_is_empty(CONNECTION(backend, i)) &&
+                                       synchronize(CONNECTION(backend, i)))
                                        ereport(FATAL,
                                                        (return_code(2),
                                                         errmsg("unable to copy data rows"),
diff --git a/src/test/regression/tests/076.copy_hang/test.sh b/src/test/regression/tests/076.copy_hang/test.sh
new file mode 100755 (executable)
index 0000000..2abe488
--- /dev/null
@@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+#-------------------------------------------------------------------
+# test script for copy plus error case.
+# It was reported that following sequece of copy command cause psql hang.
+#
+# CREATE TEMP TABLE vistest (a text);  
+# COPY vistest FROM stdin CSV FREEZE;
+# Enter data to be copied followed by a newline.
+# End with a backslash and a period on a line by itself, or an EOF signal.
+# >> p
+# >> g
+# >> \.
+#
+# In the normal case an error should be returned to psql.
+#ERROR:  cannot perform COPY FREEZE because the table was not created or truncated in the current subtransaction
+
+source $TESTLIBS
+TESTDIR=testdir
+PSQL=$PGBIN/psql
+
+rm -fr $TESTDIR
+mkdir $TESTDIR
+cd $TESTDIR
+
+# create test environment
+echo -n "creating test environment..."
+$PGPOOL_SETUP -n 3 || exit 1
+echo "done."
+
+source ./bashrc.ports
+
+export PGPORT=$PGPOOL_PORT
+
+./startall
+wait_for_pgpool_startup
+
+# execute COPY
+
+timeout 10 $PSQL test <<EOF
+CREATE TEMP TABLE vistest (a text);  
+COPY vistest FROM stdin CSV FREEZE;
+p
+g
+\\.
+\\q
+EOF
+
+if [ ! $? -eq 0 ];then
+    echo ...timed out.
+    ./shutdownall
+    exit 1
+fi
+echo ...ok.
+./shutdownall