Fix pcp_detach_node leaves down node.
authorTatsuo Ishii <ishii@sraoss.co.jp>
Tue, 8 Jun 2021 10:06:13 +0000 (19:06 +0900)
committerTatsuo Ishii <ishii@sraoss.co.jp>
Tue, 8 Jun 2021 10:21:36 +0000 (19:21 +0900)
Detaching primary node using pcp_detach_node leaves a standby node
after follow primary command was executed.

This can be reproduced reliably by following steps:

$ pgpool_setup -n 4
$ ./startall
$ pcp_detatch_node -p 11001 0

This is caused by that pcp_recovery_node is denied by pcp child process:

2021-06-05 07:22:17: follow_child pid 6593: LOG:  execute command: /home/t-ishii/work/Pgpool-II/current/x/etc/follow_primary.sh 3 /tmp 11005 /home/t-ishii/work/Pgpool-II/current/x/data3 1 0 /tmp 0 11002 /home/t-ishii/work/Pgpool-II/current/x/data0
2021-06-05 07:22:17: pcp_main pid 6848: LOG:  forked new pcp worker, pid=7027 socket=6
2021-06-05 07:22:17: pcp_child pid 7027: ERROR:  failed to process PCP request at the moment
2021-06-05 07:22:17: pcp_child pid 7027: DETAIL:  failback is in progress

it complains that a failback request is still going. The reason why the
failback is not completed is, find_primary_node_repeatedly() is trying
to acquire the follow primary lock. However the follow primary command
has already acquired the lock and it is waiting for the completion of
the failback request. Thus this is a kind of dead lock situation.

How to solve this?

The purpose of the follow primary lock is to prevent concurrent run of
follow primary command and detach false primary by the streaming
replication check. We cannot throw it away. However it is not always
necessary to acquire the lock by find_primary_node_repeatedly(). If
it does not try to acquire the lock, failover/failback will not be
blocked and will finish soon, thus Req_info->switching flags will be
promptly turned to false.

When a primary node is detached, failover command is called and new
primary is selected. At this point find_primary_node_repeatedly() is
surely needed to run to find the new primary. However, once follow
primary command starts, the primary will not be changed. So my idea
is, find_primary_node_repeatedly() checks whether follow primary
command is running or not. If it is running, just returns the current
primary. Otherwise acquires the lock.

For this purpose, new shared memory variable
Req_info->follow_primary_ongoing was introduced. The flag is set/unset
by follow primary process.

New regression test 075.detach_primary_left_down_node is added.

Discussion: https://www.pgpool.net/pipermail/pgpool-hackers/2021-June/003916.html

src/include/pool.h
src/main/pgpool_main.c
src/test/regression/tests/075.detach_primary_left_down_node/test.sh [new file with mode: 0755]

index 13be313a55bd535393ced7dc3f4a84712c28178f..8fb7e7b0423497a4a7bbd2184f614f31f3640704 100644 (file)
@@ -452,6 +452,7 @@ typedef struct
        /* false if follow primary command or detach_false_primary in
         * execution */
        bool            follow_primary_count;
+       bool            follow_primary_ongoing; /* true if follow primary command is ongoing */
 }                      POOL_REQUEST_INFO;
 
 /* description of row. corresponding to RowDescription message */
index f221d1f14581efa4974c5acacf80f795eca62838..8a5c11c29e023cb965b117b399a0afb4932a429d 100644 (file)
@@ -2040,8 +2040,11 @@ failover(void)
                }
                need_to_restart_pcp = true;
        }
+
+       pool_semaphore_lock(REQUEST_INFO_SEM);
        switching = 0;
        Req_info->switching = false;
+       pool_semaphore_unlock(REQUEST_INFO_SEM);
 
        /*
         * kick wakeup_handler in pcp_child to notice that failover/failback done
@@ -2824,9 +2827,12 @@ trigger_failover_command(int node, const char *command_line,
 
        if (strlen(exec_cmd->data) != 0)
        {
+               pool_sigset_t oldmask;
                ereport(LOG,
                                (errmsg("execute command: %s", exec_cmd->data)));
+               POOL_SETMASK2(&UnBlockSig, &oldmask);
                r = system(exec_cmd->data);
+               POOL_SETMASK(&oldmask);
        }
 
        free_string(exec_cmd);
@@ -3246,6 +3252,20 @@ find_primary_node_repeatedly(void)
                return -1;
        }
 
+       /*
+        * If follow primary command is ongoing, skip primary node check.  Just
+        * return current primary node to avoid deadlock between pgpool main
+        * failover() and follow primary process.
+        */
+       if (Req_info->follow_primary_ongoing)
+       {
+               ereport(LOG,
+                               (errmsg("find_primary_node_repeatedly: follow primary is ongoing. return current primary: %d",
+                                               Req_info->primary_node_id)));
+
+               return Req_info->primary_node_id;
+       }
+
        /*
         * If all of the backends are down, there's no point to keep on searching
         * primary node.
@@ -3296,6 +3316,7 @@ fork_follow_child(int old_main_node, int new_primary, int old_primary)
                on_exit_reset();
                SetProcessGlobalVaraibles(PT_FOLLOWCHILD);
                pool_acquire_follow_primary_lock(true);
+               Req_info->follow_primary_ongoing = true;
                ereport(LOG,
                                (errmsg("start triggering follow command.")));
                for (i = 0; i < pool_config->backend_desc->num_backends; i++)
@@ -3307,6 +3328,7 @@ fork_follow_child(int old_main_node, int new_primary, int old_primary)
                                trigger_failover_command(i, pool_config->follow_primary_command,
                                                                                 old_main_node, new_primary, old_primary);
                }
+               Req_info->follow_primary_ongoing = false;
                pool_release_follow_primary_lock();
                exit(0);
        }
diff --git a/src/test/regression/tests/075.detach_primary_left_down_node/test.sh b/src/test/regression/tests/075.detach_primary_left_down_node/test.sh
new file mode 100755 (executable)
index 0000000..b6a51e6
--- /dev/null
@@ -0,0 +1,49 @@
+#!/usr/bin/env bash
+#-------------------------------------------------------------------
+# test script for a case: detach primary node left down node.
+#
+# reported: https://www.pgpool.net/pipermail/pgpool-hackers/2021-June/003916.html
+
+source $TESTLIBS
+WHOAMI=`whoami`
+TESTDIR=testdir
+
+rm -fr $TESTDIR
+mkdir $TESTDIR
+cd $TESTDIR
+
+# create test environment
+echo -n "creating test environment..."
+$PGPOOL_SETUP -n 4 || exit 1
+echo "done."
+
+source ./bashrc.ports
+export PGPORT=$PGPOOL_PORT
+
+./startall
+wait_for_pgpool_startup
+
+# detach node 0
+$PGPOOL_INSTALL_DIR/bin/pcp_detach_node -w -p $PCP_PORT 0
+wait_for_pgpool_startup
+
+# check to see if alll nodes are up
+echo -n "starting to check follow primary results: "
+date
+cnt=60
+while [ $cnt -gt 0 ]
+do
+    $PGBIN/psql -c "show pool_nodes" test|grep down
+    if [ $? != 0 ];then
+       echo "test succeeded"
+       ./shutdownall
+       exit 0
+    fi
+    cnt=`expr $cnt - 1`
+    echo "cnt: $cnt"
+    sleep 1
+done
+echo "test failed".
+./shutdownall
+
+exit 1