From 4462f9f1367ec3d22274af28777b084fb9f198ea Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii Date: Wed, 31 Mar 2021 16:09:13 +0900 Subject: [PATCH] Fix pgpool_setup so that it fail back to full restore if failed in restarting. While taking care of "[pgpool-general: 7456] Expected behaviour after pcp_detach_node ?" https://www.pgpool.net/pipermail/pgpool-general/2021-March/007514.html I noticed that restarting target server in follow primary script could fail. This could happen when former primary goes to down status using pcp_detach_node. The former primary will not start due to timeline and LSN divergence. To fix this, fail back to full restore using pg_recovery if restarting server. --- src/test/pgpool_setup.in | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/src/test/pgpool_setup.in b/src/test/pgpool_setup.in index 67db63fa1..e929dc61a 100644 --- a/src/test/pgpool_setup.in +++ b/src/test/pgpool_setup.in @@ -248,31 +248,42 @@ PCP_PORT=__PCPPORT__ pgversion=__PGVERSION__ export PCPPASSFILE=__PCPPASSFILE__ +echo "follow primary script started" >> $log date >> $log echo "node_id $node_id host_name $host_name port $port db_cluster $db_cluster new_primary_id $new_primary_id old_main_id $old_main_id new_main_host_name $new_main_host_name old_primary_node_id $old_primary_node_id new_primary_port_number $new_primary_port_number new_primary_db_cluster $new_primary_db_cluster" >> $log -# change primary node connection info so that it points to the new primary -if [ $pgversion -ge 120 ];then - sed -i "s/port=[0-9]*/port=$new_primary_port_number/" $db_cluster/myrecovery.conf - sed -i "/restore_command/s/data[0-9]/`basename $new_primary_db_cluster`/" $db_cluster/myrecovery.conf -else - sed -i "s/port=[0-9]*/port=$new_primary_port_number/" $db_cluster/recovery.conf - sed -i "/restore_command/s/data[0-9]/`basename $new_primary_db_cluster`/" $db_cluster/myrecovery.conf -fi - -touch $db_cluster/standby.signal - # Skip the target standby node if it's not running $pg_ctl -D $db_cluster status >/dev/null 2>&1 if [ $? = 0 ] then + # change primary node connection info so that it points to the new primary + if [ $pgversion -ge 120 ];then + sed -i "s/port=[0-9]*/port=$new_primary_port_number/" $db_cluster/myrecovery.conf + sed -i "/restore_command/s/data[0-9]/`basename $new_primary_db_cluster`/" $db_cluster/myrecovery.conf + else + sed -i "s/port=[0-9]*/port=$new_primary_port_number/" $db_cluster/recovery.conf + sed -i "/restore_command/s/data[0-9]/`basename $new_primary_db_cluster`/" $db_cluster/myrecovery.conf + fi + + touch $db_cluster/standby.signal + echo "restart the target server" >> $log $pg_ctl -w -m f -D $db_cluster restart >> $log 2>&1 - # attach the node - pcp_attach_node -w -h localhost -p $PCP_PORT -n $node_id >> $log 2>&1 + + $pg_ctl -D $db_cluster status >>$log 2>&1 + if [ $? != 0 ] + then + echo "restarting $db_cluster failed" >>$log + echo "fail back to pcp_recovery_node" >>$log + pcp_recovery_node -w -h localhost -p $PCP_PORT -n $node_id >> $log 2>&1 + else + # attach the node + pcp_attach_node -w -h localhost -p $PCP_PORT -n $node_id >> $log 2>&1 + fi else echo "$db_cluster is not running. skipping follow primary command." >> $log fi +echo "follow primary script ended" >> $log EOF #------------------------------------------- -- 2.39.5