+++ /dev/null
-#! /bin/sh
-# Online recovery 2nd stage script
-#
-datadir=$1 # master dabatase cluster
-DEST=$2 # hostname of the DB node to be recovered
-DESTDIR=$3 # database cluster of the DB node to be recovered
-port=$4 # PostgreSQL port number
-archdir=/data/archive_log # archive log directory
-
-# Force to flush current value of sequences to xlog
-psql -p $port -t -c 'SELECT datname FROM pg_database WHERE NOT datistemplate AND datallowconn' template1|
-while read i
-do
- if [ "$i" != "" ];then
- psql -p $port -c "SELECT setval(oid, nextval(oid)) FROM pg_class WHERE relkind = 'S'" $i
- fi
-done
-
-psql -p $port -c "SELECT pgpool_switch_xlog('$archdir')" template1
+++ /dev/null
-#! /bin/sh
-
-if [ $# -ne 2 ]
-then
- echo "pgpool_remote_start remote_host remote_datadir"
- exit 1
-fi
-
-DEST=$1
-DESTDIR=$2
-PGCTL=/usr/local/pgsql/bin/pg_ctl
-
-ssh -T $DEST $PGCTL -w -D $DESTDIR start 2>/dev/null 1>/dev/null < /dev/null &
--- /dev/null
+#!/bin/bash
+# This script is run by failover_command.
+
+set -o xtrace
+exec > >(logger -i -p local1.info) 2>&1
+
+# Special values:
+# %d = failed node id
+# %h = failed node hostname
+# %p = failed node port number
+# %D = failed node database cluster path
+# %m = new master node id
+# %H = new master node hostname
+# %M = old master node id
+# %P = old primary node id
+# %r = new master port number
+# %R = new master database cluster path
+# %N = old primary node hostname
+# %S = old primary node port number
+# %% = '%' character
+
+FAILED_NODE_ID="$1"
+FAILED_NODE_HOST="$2"
+FAILED_NODE_PORT="$3"
+FAILED_NODE_PGDATA="$4"
+NEW_MASTER_NODE_ID="$5"
+NEW_MASTER_NODE_HOST="$6"
+OLD_MASTER_NODE_ID="$7"
+OLD_PRIMARY_NODE_ID="$8"
+NEW_MASTER_NODE_PORT="$9"
+NEW_MASTER_NODE_PGDATA="${10}"
+OLD_PRIMARY_NODE_HOST="${11}"
+OLD_PRIMARY_NODE_PORT="${12}"
+
+PGHOME=/usr/pgsql-11
+
+
+logger -i -p local1.info failover.sh: start: failed_node_id=$FAILED_NODE_ID old_primary_node_id=$OLD_PRIMARY_NODE_ID failed_host=$FAILED_NODE_HOST new_master_host=$NEW_MASTER_NODE_HOST
+
+## Test passwrodless SSH
+ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@${NEW_MASTER_NODE_HOST} -i ~/.ssh/id_rsa_pgpool ls /tmp > /dev/null
+
+if [ $? -ne 0 ]; then
+ logger -i -p local1.info failover.sh: passwrodless SSH to postgres@${NEW_MASTER_NODE_HOST} failed. Please setup passwrodless SSH.
+ exit 1
+fi
+
+## If Standby node is down, skip failover.
+if [ $FAILED_NODE_ID -ne $OLD_PRIMARY_NODE_ID ]; then
+ logger -i -p local1.info failover.sh: Standby node is down. Skipping failover.
+
+ ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@$OLD_PRIMARY_NODE_HOST -i ~/.ssh/id_rsa_pgpool "
+ ${PGHOME}/bin/psql -p $OLD_PRIMARY_NODE_PORT -c \"SELECT pg_drop_replication_slot('${FAILED_NODE_HOST}')\"
+ "
+
+ if [ $? -ne 0 ]; then
+ logger -i -p local1.error failover.sh: drop replication slot "${FAILED_NODE_HOST}" failed
+ exit 1
+ fi
+
+ exit 0
+fi
+
+## Promote Standby node.
+logger -i -p local1.info failover.sh: Primary node is down, promote standby node ${NEW_MASTER_NODE_HOST}.
+
+ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
+ postgres@${NEW_MASTER_NODE_HOST} -i ~/.ssh/id_rsa_pgpool ${PGHOME}/bin/pg_ctl -D ${NEW_MASTER_NODE_PGDATA} -w promote
+
+if [ $? -ne 0 ]; then
+ logger -i -p local1.error failover.sh: new_master_host=$NEW_MASTER_NODE_HOST promote failed
+ exit 1
+fi
+
+logger -i -p local1.info failover.sh: end: new_master_node_id=$NEW_MASTER_NODE_ID started as the primary node
+exit 0
--- /dev/null
+#!/bin/bash
+# This script is run after failover_command to synchronize the Standby with the new Primary.
+# First try pg_rewind. If pg_rewind failed, use pg_basebackup.
+
+set -o xtrace
+exec > >(logger -i -p local1.info) 2>&1
+
+# Special values:
+# %d = failed node id
+# %h = failed node hostname
+# %p = failed node port number
+# %D = failed node database cluster path
+# %m = new master node id
+# %H = new master node hostname
+# %M = old master node id
+# %P = old primary node id
+# %r = new master port number
+# %R = new master database cluster path
+# %N = old primary node hostname
+# %S = old primary node port number
+# %% = '%' character
+
+FAILED_NODE_ID="$1"
+FAILED_NODE_HOST="$2"
+FAILED_NODE_PORT="$3"
+FAILED_NODE_PGDATA="$4"
+NEW_MASTER_NODE_ID="$5"
+OLD_MASTER_NODE_ID="$6"
+NEW_MASTER_NODE_HOST="$7"
+OLD_PRIMARY_NODE_ID="$8"
+NEW_MASTER_NODE_PORT="$9"
+NEW_MASTER_NODE_PGDATA="${10}"
+
+PGHOME=/usr/pgsql-11
+ARCHIVEDIR=/var/lib/pgsql/archivedir
+REPLUSER=repl
+PCP_USER=pgpool
+PGPOOL_PATH=/usr/bin
+PCP_PORT=9898
+
+logger -i -p local1.info follow_master.sh: start: Standby node ${FAILED_NODE_ID}
+
+## Test passwrodless SSH
+ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@${NEW_MASTER_NODE_HOST} -i ~/.ssh/id_rsa_pgpool ls /tmp > /dev/null
+
+if [ $? -ne 0 ]; then
+ logger -i -p local1.info follow_master.sh: passwrodless SSH to postgres@${NEW_MASTER_NODE_HOST} failed. Please setup passwrodless SSH.
+ exit 1
+fi
+
+## Get PostgreSQL major version
+PGVERSION=`${PGHOME}/bin/initdb -V | awk '{print $3}' | sed 's/\..*//' | sed 's/\([0-9]*\)[a-zA-Z].*/\1/'`
+
+if [ $PGVERSION -ge 12 ]; then
+RECOVERYCONF=${FAILED_NODE_PGDATA}/myrecovery.conf
+else
+RECOVERYCONF=${FAILED_NODE_PGDATA}/recovery.conf
+fi
+
+## Check the status of Standby
+ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
+postgres@${FAILED_NODE_HOST} -i ~/.ssh/id_rsa_pgpool ${PGHOME}/bin/pg_ctl -w -D ${FAILED_NODE_PGDATA} status
+
+
+## If Standby is running, synchronize it with the new Primary.
+if [ $? -eq 0 ]; then
+
+ logger -i -p local1.info follow_master.sh: pg_rewind for $FAILED_NODE_ID
+
+ # Create replication slot "${FAILED_NODE_HOST}"
+ ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@${NEW_MASTER_NODE_HOST} -i ~/.ssh/id_rsa_pgpool "
+ ${PGHOME}/bin/psql -p ${NEW_MASTER_NODE_PORT} -c \"SELECT pg_create_physical_replication_slot('${FAILED_NODE_HOST}');\"
+ "
+
+ ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@${FAILED_NODE_HOST} -i ~/.ssh/id_rsa_pgpool "
+
+ set -o errexit
+
+ ${PGHOME}/bin/pg_ctl -w -m f -D ${FAILED_NODE_PGDATA} stop
+
+ cat > ${RECOVERYCONF} << EOT
+primary_conninfo = 'host=${NEW_MASTER_NODE_HOST} port=${NEW_MASTER_NODE_PORT} user=${REPLUSER} application_name=${FAILED_NODE_HOST} passfile=''/var/lib/pgsql/.pgpass'''
+recovery_target_timeline = 'latest'
+restore_command = 'scp ${NEW_MASTER_NODE_HOST}:${ARCHIVEDIR}/%f %p'
+primary_slot_name = '${FAILED_NODE_HOST}'
+EOT
+
+ if [ ${PGVERSION} -ge 12 ]; then
+ touch ${FAILED_NODE_PGDATA}/standby.signal
+ else
+ echo \"standby_mode = 'on'\" >> ${RECOVERYCONF}
+ fi
+
+ ${PGHOME}/bin/pg_rewind -D ${FAILED_NODE_PGDATA} --source-server=\"user=postgres host=${NEW_MASTER_NODE_HOST} port=${NEW_MASTER_NODE_PORT}\"
+
+ "
+
+ if [ $? -ne 0 ]; then
+ logger -i -p local1.error follow_master.sh: end: pg_rewind failed. Try pg_basebackup.
+
+ ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@${FAILED_NODE_HOST} -i ~/.ssh/id_rsa_pgpool "
+
+ set -o errexit
+
+ # Execute pg_basebackup
+ rm -rf ${FAILED_NODE_PGDATA}
+ rm -rf ${ARCHIVEDIR}/*
+ ${PGHOME}/bin/pg_basebackup -h ${NEW_MASTER_NODE_HOST} -U $REPLUSER -p ${NEW_MASTER_NODE_PORT} -D ${FAILED_NODE_PGDATA} -X stream
+
+ if [ ${PGVERSION} -ge 12 ]; then
+ sed -i -e \"\\\$ainclude_if_exists = '$(echo ${RECOVERYCONF} | sed -e 's/\//\\\//g')'\" \
+ -e \"/^include_if_exists = '$(echo ${RECOVERYCONF} | sed -e 's/\//\\\//g')'/d\" ${FAILED_NODE_PGDATA}/postgresql.conf
+ fi
+
+ cat > ${RECOVERYCONF} << EOT
+primary_conninfo = 'host=${NEW_MASTER_NODE_HOST} port=${NEW_MASTER_NODE_PORT} user=${REPLUSER} application_name=${FAILED_NODE_HOST} passfile=''/var/lib/pgsql/.pgpass'''
+recovery_target_timeline = 'latest'
+restore_command = 'scp ${NEW_MASTER_NODE_HOST}:${ARCHIVEDIR}/%f %p'
+primary_slot_name = '${FAILED_NODE_HOST}'
+EOT
+
+ if [ ${PGVERSION} -ge 12 ]; then
+ touch ${FAILED_NODE_PGDATA}/standby.signal
+ else
+ echo \"standby_mode = 'on'\" >> ${RECOVERYCONF}
+ fi
+ "
+
+ if [ $? -ne 0 ]; then
+ # drop replication slot
+ ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@${NEW_MASTER_NODE_HOST} -i ~/.ssh/id_rsa_pgpool "
+ ${PGHOME}/bin/psql -p ${NEW_MASTER_NODE_PORT} -c \"SELECT pg_drop_replication_slot('${FAILED_NODE_HOST}')\"
+ "
+
+ logger -i -p local1.error follow_master.sh: end: pg_basebackup failed
+ exit 1
+ fi
+ fi
+
+ # start Standby node on ${FAILED_NODE_HOST}
+ ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
+ postgres@${FAILED_NODE_HOST} -i ~/.ssh/id_rsa_pgpool $PGHOME/bin/pg_ctl -l /dev/null -w -D ${FAILED_NODE_PGDATA} start
+
+ # If start Standby successfully, attach this node
+ if [ $? -eq 0 ]; then
+
+ # Run pcp_attact_node to attach Standby node to Pgpool-II.
+ ${PGPOOL_PATH}/pcp_attach_node -w -h localhost -U $PCP_USER -p ${PCP_PORT} -n ${FAILED_NODE_ID}
+
+ if [ $? -ne 0 ]; then
+ logger -i -p local1.error follow_master.sh: end: pcp_attach_node failed
+ exit 1
+ fi
+
+ # If start Standby failed, drop replication slot "${FAILED_NODE_HOST}"
+ else
+
+ ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@${NEW_MASTER_NODE_HOST} -i ~/.ssh/id_rsa_pgpool \
+ ${PGHOME}/bin/psql -p ${NEW_MASTER_NODE_PORT} -c "SELECT pg_drop_replication_slot('${FAILED_NODE_HOST}')"
+
+ logger -i -p local1.error follow_master.sh: end: follow master command failed
+ exit 1
+ fi
+
+else
+ logger -i -p local1.info follow_master.sh: failed_nod_id=${FAILED_NODE_ID} is not running. skipping follow master command
+ exit 0
+fi
+
+logger -i -p local1.info follow_master.sh: end: follow master command complete
+exit 0
--- /dev/null
+#!/bin/bash
+# This script is run after recovery_1st_stage to start Standby node.
+
+set -o xtrace
+exec > >(logger -i -p local1.info) 2>&1
+
+DEST_NODE_HOST="$1"
+DEST_NODE_PGDATA="$2"
+
+PGHOME=/usr/pgsql-11
+
+logger -i -p local1.info pgpool_remote_start: start: remote start Standby node $DEST_NODE_HOST
+
+## Test passwrodless SSH
+ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@${DEST_NODE_HOST} -i ~/.ssh/id_rsa_pgpool ls /tmp > /dev/null
+
+if [ $? -ne 0 ]; then
+ logger -i -p local1.info pgpool_remote_start: passwrodless SSH to postgres@${DEST_NODE_HOST} failed. Please setup passwrodless SSH.
+ exit 1
+fi
+
+## Start Standby node
+ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@$DEST_NODE_HOST -i ~/.ssh/id_rsa_pgpool "
+ $PGHOME/bin/pg_ctl -l /dev/null -w -D $DEST_NODE_PGDATA start
+"
+
+if [ $? -ne 0 ]; then
+ logger -i -p local1.error pgpool_remote_start: $DEST_NODE_HOST PostgreSQL start failed.
+ exit 1
+fi
+
+logger -i -p local1.info pgpool_remote_start: end: $DEST_NODE_HOST PostgreSQL started successfully.
+exit 0
--- /dev/null
+#!/bin/bash
+# This script is executed by "recovery_1st_stage" to recovery a Standby node.
+
+set -o xtrace
+exec > >(logger -i -p local1.info) 2>&1
+
+PRIMARY_NODE_PGDATA="$1"
+DEST_NODE_HOST="$2"
+DEST_NODE_PGDATA="$3"
+PRIMARY_NODE_PORT="$4"
+DEST_NODE_ID="$5"
+DEST_NODE_PORT="$6"
+
+PRIMARY_NODE_HOST=$(hostname)
+PGHOME=/usr/pgsql-11
+ARCHIVEDIR=/var/lib/pgsql/archivedir
+REPLUSER=repl
+
+logger -i -p local1.info recovery_1st_stage: start: pg_basebackup for Standby node $DEST_NODE_ID
+
+## Test passwrodless SSH
+ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@${DEST_NODE_HOST} -i ~/.ssh/id_rsa_pgpool ls /tmp > /dev/null
+
+if [ $? -ne 0 ]; then
+ logger -i -p local1.info recovery_1st_stage: passwrodless SSH to postgres@${DEST_NODE_HOST} failed. Please setup passwrodless SSH.
+ exit 1
+fi
+
+## Get PostgreSQL major version
+PGVERSION=`${PGHOME}/bin/initdb -V | awk '{print $3}' | sed 's/\..*//' | sed 's/\([0-9]*\)[a-zA-Z].*/\1/'`
+if [ $PGVERSION -ge 12 ]; then
+ RECOVERYCONF=${DEST_NODE_PGDATA}/myrecovery.conf
+else
+ RECOVERYCONF=${DEST_NODE_PGDATA}/recovery.conf
+fi
+
+## Create replication slot "${DEST_NODE_HOST}"
+${PGHOME}/bin/psql -p ${PRIMARY_NODE_PORT} << EOQ
+SELECT pg_create_physical_replication_slot('${DEST_NODE_HOST}');
+EOQ
+
+## Execute pg_basebackup to recovery Standby node
+ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@$DEST_NODE_HOST -i ~/.ssh/id_rsa_pgpool "
+
+ set -o errexit
+
+ rm -rf $DEST_NODE_PGDATA
+ rm -rf $ARCHIVEDIR/*
+
+ ${PGHOME}/bin/pg_basebackup -h $PRIMARY_NODE_HOST -U $REPLUSER -p $PRIMARY_NODE_PORT -D $DEST_NODE_PGDATA -X stream
+
+ if [ ${PGVERSION} -ge 12 ]; then
+ sed -i -e \"\\\$ainclude_if_exists = '$(echo ${RECOVERYCONF} | sed -e 's/\//\\\//g')'\" \
+ -e \"/^include_if_exists = '$(echo ${RECOVERYCONF} | sed -e 's/\//\\\//g')'/d\" ${DEST_NODE_PGDATA}/postgresql.conf
+ fi
+
+ cat > ${RECOVERYCONF} << EOT
+primary_conninfo = 'host=${PRIMARY_NODE_HOST} port=${PRIMARY_NODE_PORT} user=${REPLUSER} application_name=${DEST_NODE_HOST} passfile=''/var/lib/pgsql/.pgpass'''
+recovery_target_timeline = 'latest'
+restore_command = 'scp ${PRIMARY_NODE_HOST}:${ARCHIVEDIR}/%f %p'
+primary_slot_name = '${DEST_NODE_HOST}'
+EOT
+
+ if [ ${PGVERSION} -ge 12 ]; then
+ touch ${DEST_NODE_PGDATA}/standby.signal
+ else
+ echo \"standby_mode = 'on'\" >> ${RECOVERYCONF}
+ fi
+
+ sed -i \"s/#*port = .*/port = ${DEST_NODE_PORT}/\" ${DEST_NODE_PGDATA}/postgresql.conf
+"
+
+if [ $? -ne 0 ]; then
+
+ ${PGHOME}/bin/psql -p ${PRIMARY_NODE_PORT} << EOQ
+SELECT pg_drop_replication_slot('${DEST_NODE_HOST}');
+EOQ
+
+ logger -i -p local1.error recovery_1st_stage: end: pg_basebackup failed. online recovery failed
+ exit 1
+fi
+
+logger -i -p local1.info recovery_1st_stage: end: recovery_1st_stage complete
+exit 0
--- /dev/null
+#! /bin/sh
+# Online recovery 2nd stage script
+#
+DATADIR=$1 # master dabatase cluster
+DEST=$2 # hostname of the DB node to be recovered
+DESTDIR=$3 # database cluster of the DB node to be recovered
+PORT=$4 # PostgreSQL port number
+
+PGHOME=/usr/pgsql-11
+ARCHIVEDIR=/var/lib/pgsql/archivedir # archive log directory
+
+# Force to flush current value of sequences to xlog
+${PGHOME}/bin/psql -p $PORT -t -c 'SELECT datname FROM pg_database WHERE NOT datistemplate AND datallowconn' template1|
+while read i
+do
+ if [ "$i" != "" ]; then
+ psql -p $PORT -c "SELECT setval(oid, nextval(oid)) FROM pg_class WHERE relkind = 'S'" $i
+ fi
+done
+
+psql -p $PORT -c "SELECT pgpool_switch_xlog('$ARCHIVEDIR')" template1