Fix pgpool_setup so that it fail back to full restore if failed in restarting.
authorTatsuo Ishii <ishii@sraoss.co.jp>
Wed, 31 Mar 2021 07:09:13 +0000 (16:09 +0900)
committerTatsuo Ishii <ishii@sraoss.co.jp>
Wed, 31 Mar 2021 07:09:13 +0000 (16:09 +0900)
While taking care of "[pgpool-general: 7456] Expected behaviour after pcp_detach_node ?"

https://www.pgpool.net/pipermail/pgpool-general/2021-March/007514.html

I noticed that restarting target server in follow primary script could
fail.  This could happen when former primary goes to down status using
pcp_detach_node.  The former primary will not start due to timeline
and LSN divergence.  To fix this, fail back to full restore using
pg_recovery if restarting server.

src/test/pgpool_setup.in

index 67db63fa184bbe5e9de5a67e186478ef10597ba5..e929dc61a757a7c95289a0ea95c9b3afdf2d02e8 100644 (file)
@@ -248,31 +248,42 @@ PCP_PORT=__PCPPORT__
 pgversion=__PGVERSION__
 export PCPPASSFILE=__PCPPASSFILE__
 
+echo "follow primary script started" >> $log
 date >> $log
 echo "node_id $node_id host_name $host_name port $port db_cluster $db_cluster new_primary_id $new_primary_id old_main_id $old_main_id new_main_host_name $new_main_host_name old_primary_node_id $old_primary_node_id new_primary_port_number $new_primary_port_number new_primary_db_cluster $new_primary_db_cluster" >> $log
 
-# change primary node connection info so that it points to the new primary
-if [ $pgversion -ge 120 ];then
-   sed -i "s/port=[0-9]*/port=$new_primary_port_number/" $db_cluster/myrecovery.conf
-   sed -i "/restore_command/s/data[0-9]/`basename $new_primary_db_cluster`/" $db_cluster/myrecovery.conf
-else
-   sed -i "s/port=[0-9]*/port=$new_primary_port_number/" $db_cluster/recovery.conf
-   sed -i "/restore_command/s/data[0-9]/`basename $new_primary_db_cluster`/" $db_cluster/myrecovery.conf
-fi
-
-touch $db_cluster/standby.signal
-
 # Skip the target standby node if it's not running
 $pg_ctl -D $db_cluster status >/dev/null 2>&1
 if [ $? = 0 ]
 then
+    # change primary node connection info so that it points to the new primary
+    if [ $pgversion -ge 120 ];then
+        sed -i "s/port=[0-9]*/port=$new_primary_port_number/" $db_cluster/myrecovery.conf
+        sed -i "/restore_command/s/data[0-9]/`basename $new_primary_db_cluster`/" $db_cluster/myrecovery.conf
+    else
+        sed -i "s/port=[0-9]*/port=$new_primary_port_number/" $db_cluster/recovery.conf
+        sed -i "/restore_command/s/data[0-9]/`basename $new_primary_db_cluster`/" $db_cluster/myrecovery.conf
+    fi
+
+    touch $db_cluster/standby.signal
+
     echo "restart the target server" >> $log
     $pg_ctl -w -m f -D $db_cluster restart >> $log 2>&1
-    # attach the node
-    pcp_attach_node -w -h localhost -p $PCP_PORT -n $node_id >> $log 2>&1
+
+    $pg_ctl -D $db_cluster status >>$log 2>&1
+    if [ $? != 0 ]
+    then
+        echo "restarting $db_cluster failed" >>$log
+        echo "fail back to pcp_recovery_node" >>$log
+        pcp_recovery_node -w -h localhost -p $PCP_PORT -n $node_id >> $log 2>&1
+    else
+        # attach the node
+        pcp_attach_node -w -h localhost -p $PCP_PORT -n $node_id >> $log 2>&1
+    fi
 else
     echo "$db_cluster is not running. skipping follow primary command." >> $log
 fi
+echo "follow primary script ended" >> $log
 EOF
 
 #-------------------------------------------