Fixed follow_primary.sh.sample script to check the status of PostgreSQL using pg_isready.
authorBo Peng <pengbo@sraoss.co.jp>
Fri, 25 Feb 2022 05:29:02 +0000 (14:29 +0900)
committerBo Peng <pengbo@sraoss.co.jp>
Fri, 25 Feb 2022 05:32:19 +0000 (14:32 +0900)
src/sample/scripts/follow_primary.sh.sample

index c9a453b9bfaec6b21a6c408024ef8e14249f7881..79344a84b2fcbbcf06cc425d55cb43404bf41cfc 100755 (executable)
@@ -40,7 +40,15 @@ REPL_SLOT_NAME=${NODE_HOST//[-.]/_}
 
 echo follow_primary.sh: start: Standby node ${NODE_ID}
 
-## Test passwordless SSH
+# Check the connection status of Standby
+${PGHOME}/bin/pg_isready -h ${NODE_HOST} -p ${NODE_PORT} > /dev/null 2>&1
+
+if [ $? -ne 0 ]; then
+    echo follow_primary.sh: node_id=${NODE_ID} is not running. skipping follow primary command
+    exit 0
+fi
+
+# Test passwordless SSH
 ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@${NEW_PRIMARY_NODE_HOST} -i ~/.ssh/id_rsa_pgpool ls /tmp > /dev/null
 
 if [ $? -ne 0 ]; then
@@ -48,7 +56,7 @@ if [ $? -ne 0 ]; then
     exit 1
 fi
 
-## Get PostgreSQL major version
+# Get PostgreSQL major version
 PGVERSION=`${PGHOME}/bin/initdb -V | awk '{print $3}' | sed 's/\..*//' | sed 's/\([0-9]*\)[a-zA-Z].*/\1/'`
 
 if [ $PGVERSION -ge 12 ]; then
@@ -57,33 +65,57 @@ else
     RECOVERYCONF=${NODE_PGDATA}/recovery.conf
 fi
 
-## Check the status of Standby
-ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-    postgres@${NODE_HOST} -i ~/.ssh/id_rsa_pgpool ${PGHOME}/bin/pg_ctl -w -D ${NODE_PGDATA} status
+# Synchronize Standby with the new Primary.
+echo follow_primary.sh: pg_rewind for node ${NODE_ID}
 
+# Create replication slot "${REPL_SLOT_NAME}"
+${PGHOME}/bin/psql -h ${NEW_PRIMARY_NODE_HOST} -p ${NEW_PRIMARY_NODE_PORT} \
+    -c "SELECT pg_create_physical_replication_slot('${REPL_SLOT_NAME}');"  >/dev/null 2>&1
 
-## If Standby is running, synchronize it with the new Primary.
-if [ $? -eq 0 ]; then
+if [ $? -ne 0 ]; then
+    echo follow_primary.sh: create replication slot \"${REPL_SLOT_NAME}\" failed. You may need to create replication slot manually.
+fi
 
-    echo follow_primary.sh: pg_rewind for node ${NODE_ID}
+ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@${NODE_HOST} -i ~/.ssh/id_rsa_pgpool "
 
-    # Create replication slot "${REPL_SLOT_NAME}"
-    ${PGHOME}/bin/psql -h ${NEW_PRIMARY_NODE_HOST} -p ${NEW_PRIMARY_NODE_PORT} \
-        -c "SELECT pg_create_physical_replication_slot('${REPL_SLOT_NAME}');"  >/dev/null 2>&1
+    set -o errexit
 
-    if [ $? -ne 0 ]; then
-        echo follow_primary.sh: create replication slot \"${REPL_SLOT_NAME}\" failed. You may need to create replication slot manually.
+    ${PGHOME}/bin/pg_ctl -w -m f -D ${NODE_PGDATA} stop
+
+    ${PGHOME}/bin/pg_rewind -D ${NODE_PGDATA} --source-server=\"user=postgres host=${NEW_PRIMARY_NODE_HOST} port=${NEW_PRIMARY_NODE_PORT}\"
+
+    rm -rf ${NODE_PGDATA}/pg_replslot/*
+
+    cat > ${RECOVERYCONF} << EOT
+primary_conninfo = 'host=${NEW_PRIMARY_NODE_HOST} port=${NEW_PRIMARY_NODE_PORT} user=${REPLUSER} application_name=${NODE_HOST} passfile=''/var/lib/pgsql/.pgpass'''
+recovery_target_timeline = 'latest'
+restore_command = 'scp ${NEW_PRIMARY_NODE_HOST}:${ARCHIVEDIR}/%f %p'
+primary_slot_name = '${REPL_SLOT_NAME}'
+EOT
+
+    if [ ${PGVERSION} -ge 12 ]; then
+        sed -i -e \"\\\$ainclude_if_exists = '$(echo ${RECOVERYCONF} | sed -e 's/\//\\\//g')'\" \
+               -e \"/^include_if_exists = '$(echo ${RECOVERYCONF} | sed -e 's/\//\\\//g')'/d\" ${NODE_PGDATA}/postgresql.conf
+        touch ${NODE_PGDATA}/standby.signal
+    else
+        echo \"standby_mode = 'on'\" >> ${RECOVERYCONF}
     fi
 
-    ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@${NODE_HOST} -i ~/.ssh/id_rsa_pgpool "
+    ${PGHOME}/bin/pg_ctl -l /dev/null -w -D ${NODE_PGDATA} start
 
-        set -o errexit
+"
 
-        ${PGHOME}/bin/pg_ctl -w -m f -D ${NODE_PGDATA} stop
+# If pg_rewind failed, try pg_basebackup 
+if [ $? -ne 0 ]; then
+    echo follow_primary.sh: end: pg_rewind failed. Try pg_basebackup.
+
+    ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@${NODE_HOST} -i ~/.ssh/id_rsa_pgpool "
 
-        ${PGHOME}/bin/pg_rewind -D ${NODE_PGDATA} --source-server=\"user=postgres host=${NEW_PRIMARY_NODE_HOST} port=${NEW_PRIMARY_NODE_PORT}\"
+        set -o errexit
 
-        rm -rf ${NODE_PGDATA}/pg_replslot/*
+        rm -rf ${NODE_PGDATA}
+        rm -rf ${ARCHIVEDIR}/*
+        ${PGHOME}/bin/pg_basebackup -h ${NEW_PRIMARY_NODE_HOST} -U $REPLUSER -p ${NEW_PRIMARY_NODE_PORT} -D ${NODE_PGDATA} -X stream
 
         cat > ${RECOVERYCONF} << EOT
 primary_conninfo = 'host=${NEW_PRIMARY_NODE_HOST} port=${NEW_PRIMARY_NODE_PORT} user=${REPLUSER} application_name=${NODE_HOST} passfile=''/var/lib/pgsql/.pgpass'''
@@ -99,87 +131,51 @@ EOT
         else
             echo \"standby_mode = 'on'\" >> ${RECOVERYCONF}
         fi
-
-        ${PGHOME}/bin/pg_ctl -l /dev/null -w -D ${NODE_PGDATA} start
-
     "
 
     if [ $? -ne 0 ]; then
-        echo follow_primary.sh: end: pg_rewind failed. Try pg_basebackup.
-
-        ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@${NODE_HOST} -i ~/.ssh/id_rsa_pgpool "
-
-            set -o errexit
-
-            # Execute pg_basebackup
-            rm -rf ${NODE_PGDATA}
-            rm -rf ${ARCHIVEDIR}/*
-            ${PGHOME}/bin/pg_basebackup -h ${NEW_PRIMARY_NODE_HOST} -U $REPLUSER -p ${NEW_PRIMARY_NODE_PORT} -D ${NODE_PGDATA} -X stream
-
-            cat > ${RECOVERYCONF} << EOT
-primary_conninfo = 'host=${NEW_PRIMARY_NODE_HOST} port=${NEW_PRIMARY_NODE_PORT} user=${REPLUSER} application_name=${NODE_HOST} passfile=''/var/lib/pgsql/.pgpass'''
-recovery_target_timeline = 'latest'
-restore_command = 'scp ${NEW_PRIMARY_NODE_HOST}:${ARCHIVEDIR}/%f %p'
-primary_slot_name = '${REPL_SLOT_NAME}'
-EOT
 
-            if [ ${PGVERSION} -ge 12 ]; then
-                sed -i -e \"\\\$ainclude_if_exists = '$(echo ${RECOVERYCONF} | sed -e 's/\//\\\//g')'\" \
-                       -e \"/^include_if_exists = '$(echo ${RECOVERYCONF} | sed -e 's/\//\\\//g')'/d\" ${NODE_PGDATA}/postgresql.conf
-                touch ${NODE_PGDATA}/standby.signal
-            else
-                echo \"standby_mode = 'on'\" >> ${RECOVERYCONF}
-            fi
-        "
+        # drop replication slot
+        ${PGHOME}/bin/psql -h ${NEW_PRIMARY_NODE_HOST} -p ${NEW_PRIMARY_NODE_PORT} \
+            -c "SELECT pg_drop_replication_slot('${REPL_SLOT_NAME}');"  >/dev/null 2>&1
 
         if [ $? -ne 0 ]; then
-
-            # drop replication slot
-            ${PGHOME}/bin/psql -h ${NEW_PRIMARY_NODE_HOST} -p ${NEW_PRIMARY_NODE_PORT} \
-                -c "SELECT pg_drop_replication_slot('${REPL_SLOT_NAME}');"  >/dev/null 2>&1
-
-            if [ $? -ne 0 ]; then
-                echo ERROR: follow_primary.sh: drop replication slot \"${REPL_SLOT_NAME}\" failed. You may need to drop replication slot manually.
-            fi
-
-            echo follow_primary.sh: end: pg_basebackup failed
-            exit 1
+            echo ERROR: follow_primary.sh: drop replication slot \"${REPL_SLOT_NAME}\" failed. You may need to drop replication slot manually.
         fi
 
-        # start Standby node on ${NODE_HOST}
-        ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-            postgres@${NODE_HOST} -i ~/.ssh/id_rsa_pgpool $PGHOME/bin/pg_ctl -l /dev/null -w -D ${NODE_PGDATA} start
-
+        echo follow_primary.sh: end: pg_basebackup failed
+        exit 1
     fi
 
-    # If start Standby successfully, attach this node
-    if [ $? -eq 0 ]; then
-
-        # Run pcp_attact_node to attach Standby node to Pgpool-II.
-        ${PGPOOL_PATH}/pcp_attach_node -w -h localhost -U $PCP_USER -p ${PCP_PORT} -n ${NODE_ID}
-
-        if [ $? -ne 0 ]; then
-            echo ERROR: follow_primary.sh: end: pcp_attach_node failed
-            exit 1
-        fi
+    # start Standby node on ${NODE_HOST}
+    ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
+        postgres@${NODE_HOST} -i ~/.ssh/id_rsa_pgpool $PGHOME/bin/pg_ctl -l /dev/null -w -D ${NODE_PGDATA} start
 
-    # If start Standby failed, drop replication slot "${REPL_SLOT_NAME}"
-    else
+fi
 
-        ${PGHOME}/bin/psql -h ${NEW_PRIMARY_NODE_HOST} -p ${NEW_PRIMARY_NODE_PORT} \
-            -c "SELECT pg_drop_replication_slot('${REPL_SLOT_NAME}');"  >/dev/null 2>&1
+# If start Standby successfully, attach this node
+if [ $? -eq 0 ]; then
 
-        if [ $? -ne 0 ]; then
-            echo ERROR: follow_primary.sh: drop replication slot \"${REPL_SLOT_NAME}\" failed. You may need to drop replication slot manually.
-        fi
+    # Run pcp_attact_node to attach Standby node to Pgpool-II.
+    ${PGPOOL_PATH}/pcp_attach_node -w -h localhost -U $PCP_USER -p ${PCP_PORT} -n ${NODE_ID}
 
-        echo ERROR: follow_primary.sh: end: follow primary command failed
+    if [ $? -ne 0 ]; then
+        echo ERROR: follow_primary.sh: end: pcp_attach_node failed
         exit 1
     fi
 
 else
-    echo follow_primary.sh: failed_nod_id=${NODE_ID} is not running. skipping follow primary command
-    exit 0
+
+    # If start Standby failed, drop replication slot "${REPL_SLOT_NAME}"
+    ${PGHOME}/bin/psql -h ${NEW_PRIMARY_NODE_HOST} -p ${NEW_PRIMARY_NODE_PORT} \
+        -c "SELECT pg_drop_replication_slot('${REPL_SLOT_NAME}');"  >/dev/null 2>&1
+
+    if [ $? -ne 0 ]; then
+        echo ERROR: follow_primary.sh: drop replication slot \"${REPL_SLOT_NAME}\" failed. You may need to drop replication slot manually.
+    fi
+
+    echo ERROR: follow_primary.sh: end: follow primary command failed
+    exit 1
 fi
 
 echo follow_primary.sh: end: follow primary command is completed successfully