Add various sample scripts:
authorBo Peng <pengbo@sraoss.co.jp>
Wed, 4 Sep 2019 08:37:06 +0000 (17:37 +0900)
committerBo Peng <pengbo@sraoss.co.jp>
Wed, 4 Sep 2019 08:37:06 +0000 (17:37 +0900)
- failover.sh
- follow_master.sh
- recovery_1st_stage
- recovery_2nd_stage
- pgpool_remote_start

src/sample/pgpool_recovery_pitr [deleted file]
src/sample/pgpool_remote_start [deleted file]
src/sample/scripts/failover.sh [new file with mode: 0755]
src/sample/scripts/follow_master.sh [new file with mode: 0755]
src/sample/scripts/pgpool_remote_start [new file with mode: 0755]
src/sample/scripts/recovery_1st_stage [new file with mode: 0755]
src/sample/scripts/recovery_2nd_stage [new file with mode: 0755]

diff --git a/src/sample/pgpool_recovery_pitr b/src/sample/pgpool_recovery_pitr
deleted file mode 100644 (file)
index b1ceb28..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-#! /bin/sh
-# Online recovery 2nd stage script
-#
-datadir=$1             # master dabatase cluster
-DEST=$2                        # hostname of the DB node to be recovered
-DESTDIR=$3             # database cluster of the DB node to be recovered
-port=$4                # PostgreSQL port number
-archdir=/data/archive_log      # archive log directory
-
-# Force to flush current value of sequences to xlog 
-psql -p $port -t -c 'SELECT datname FROM pg_database WHERE NOT datistemplate AND datallowconn' template1|
-while read i
-do
-  if [ "$i" != "" ];then
-    psql -p $port -c "SELECT setval(oid, nextval(oid)) FROM pg_class WHERE relkind = 'S'" $i
-  fi
-done
-
-psql -p $port -c "SELECT pgpool_switch_xlog('$archdir')" template1
diff --git a/src/sample/pgpool_remote_start b/src/sample/pgpool_remote_start
deleted file mode 100644 (file)
index afd0e61..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-#! /bin/sh
-
-if [ $# -ne 2 ]
-then
-    echo "pgpool_remote_start remote_host remote_datadir"
-    exit 1
-fi
-
-DEST=$1
-DESTDIR=$2
-PGCTL=/usr/local/pgsql/bin/pg_ctl
-
-ssh -T $DEST $PGCTL -w -D $DESTDIR start 2>/dev/null 1>/dev/null < /dev/null &
diff --git a/src/sample/scripts/failover.sh b/src/sample/scripts/failover.sh
new file mode 100755 (executable)
index 0000000..6b1e798
--- /dev/null
@@ -0,0 +1,76 @@
+#!/bin/bash
+# This script is run by failover_command.
+
+set -o xtrace
+exec &gt; &gt;(logger -i -p local1.info) 2&gt;&1
+
+# Special values:
+#   %d = failed node id
+#   %h = failed node hostname
+#   %p = failed node port number
+#   %D = failed node database cluster path
+#   %m = new master node id
+#   %H = new master node hostname
+#   %M = old master node id
+#   %P = old primary node id
+#   %r = new master port number
+#   %R = new master database cluster path
+#   %N = old primary node hostname
+#   %S = old primary node port number
+#   %% = '%' character
+
+FAILED_NODE_ID="$1"
+FAILED_NODE_HOST="$2"
+FAILED_NODE_PORT="$3"
+FAILED_NODE_PGDATA="$4"
+NEW_MASTER_NODE_ID="$5"
+NEW_MASTER_NODE_HOST="$6"
+OLD_MASTER_NODE_ID="$7"
+OLD_PRIMARY_NODE_ID="$8"
+NEW_MASTER_NODE_PORT="$9"
+NEW_MASTER_NODE_PGDATA="${10}"
+OLD_PRIMARY_NODE_HOST="${11}"
+OLD_PRIMARY_NODE_PORT="${12}"
+
+PGHOME=/usr/pgsql-11
+
+
+logger -i -p local1.info failover.sh: start: failed_node_id=$FAILED_NODE_ID old_primary_node_id=$OLD_PRIMARY_NODE_ID failed_host=$FAILED_NODE_HOST new_master_host=$NEW_MASTER_NODE_HOST
+
+## Test passwrodless SSH
+ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@${NEW_MASTER_NODE_HOST} -i ~/.ssh/id_rsa_pgpool ls /tmp > /dev/null
+
+if [ $? -ne 0 ]; then
+    logger -i -p local1.info failover.sh: passwrodless SSH to postgres@${NEW_MASTER_NODE_HOST} failed. Please setup passwrodless SSH.
+    exit 1
+fi
+
+## If Standby node is down, skip failover.
+if [ $FAILED_NODE_ID -ne $OLD_PRIMARY_NODE_ID ]; then
+    logger -i -p local1.info failover.sh: Standby node is down. Skipping failover.
+
+    ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@$OLD_PRIMARY_NODE_HOST -i ~/.ssh/id_rsa_pgpool "
+        ${PGHOME}/bin/psql -p $OLD_PRIMARY_NODE_PORT -c \"SELECT pg_drop_replication_slot('${FAILED_NODE_HOST}')\"
+    "
+
+    if [ $? -ne 0 ]; then
+        logger -i -p local1.error failover.sh: drop replication slot "${FAILED_NODE_HOST}" failed
+        exit 1
+    fi
+
+    exit 0
+fi
+
+## Promote Standby node.
+logger -i -p local1.info failover.sh: Primary node is down, promote standby node ${NEW_MASTER_NODE_HOST}.
+
+ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
+    postgres@${NEW_MASTER_NODE_HOST} -i ~/.ssh/id_rsa_pgpool ${PGHOME}/bin/pg_ctl -D ${NEW_MASTER_NODE_PGDATA} -w promote
+
+if [ $? -ne 0 ]; then
+    logger -i -p local1.error failover.sh: new_master_host=$NEW_MASTER_NODE_HOST promote failed
+    exit 1
+fi
+
+logger -i -p local1.info failover.sh: end: new_master_node_id=$NEW_MASTER_NODE_ID started as the primary node
+exit 0
diff --git a/src/sample/scripts/follow_master.sh b/src/sample/scripts/follow_master.sh
new file mode 100755 (executable)
index 0000000..2570b88
--- /dev/null
@@ -0,0 +1,171 @@
+#!/bin/bash
+# This script is run after failover_command to synchronize the Standby with the new Primary.
+# First try pg_rewind. If pg_rewind failed, use pg_basebackup.
+
+set -o xtrace
+exec &gt; &gt;(logger -i -p local1.info) 2&gt;&1
+
+# Special values:
+#   %d = failed node id
+#   %h = failed node hostname
+#   %p = failed node port number
+#   %D = failed node database cluster path
+#   %m = new master node id
+#   %H = new master node hostname
+#   %M = old master node id
+#   %P = old primary node id
+#   %r = new master port number
+#   %R = new master database cluster path
+#   %N = old primary node hostname
+#   %S = old primary node port number
+#   %% = '%' character
+
+FAILED_NODE_ID="$1"
+FAILED_NODE_HOST="$2"
+FAILED_NODE_PORT="$3"
+FAILED_NODE_PGDATA="$4"
+NEW_MASTER_NODE_ID="$5"
+OLD_MASTER_NODE_ID="$6"
+NEW_MASTER_NODE_HOST="$7"
+OLD_PRIMARY_NODE_ID="$8"
+NEW_MASTER_NODE_PORT="$9"
+NEW_MASTER_NODE_PGDATA="${10}"
+
+PGHOME=/usr/pgsql-11
+ARCHIVEDIR=/var/lib/pgsql/archivedir
+REPLUSER=repl
+PCP_USER=pgpool
+PGPOOL_PATH=/usr/bin
+PCP_PORT=9898
+
+logger -i -p local1.info follow_master.sh: start: Standby node ${FAILED_NODE_ID}
+
+## Test passwrodless SSH
+ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@${NEW_MASTER_NODE_HOST} -i ~/.ssh/id_rsa_pgpool ls /tmp > /dev/null
+
+if [ $? -ne 0 ]; then
+    logger -i -p local1.info follow_master.sh: passwrodless SSH to postgres@${NEW_MASTER_NODE_HOST} failed. Please setup passwrodless SSH.
+    exit 1
+fi
+
+## Get PostgreSQL major version
+PGVERSION=`${PGHOME}/bin/initdb -V | awk '{print $3}' | sed 's/\..*//' | sed 's/\([0-9]*\)[a-zA-Z].*/\1/'`
+
+if [ $PGVERSION -ge 12 ]; then
+RECOVERYCONF=${FAILED_NODE_PGDATA}/myrecovery.conf
+else
+RECOVERYCONF=${FAILED_NODE_PGDATA}/recovery.conf
+fi
+
+## Check the status of Standby
+ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
+postgres@${FAILED_NODE_HOST} -i ~/.ssh/id_rsa_pgpool ${PGHOME}/bin/pg_ctl -w -D ${FAILED_NODE_PGDATA} status
+
+
+## If Standby is running, synchronize it with the new Primary.
+if [ $? -eq 0 ]; then
+
+    logger -i -p local1.info follow_master.sh: pg_rewind for $FAILED_NODE_ID
+
+    # Create replication slot "${FAILED_NODE_HOST}"
+    ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@${NEW_MASTER_NODE_HOST} -i ~/.ssh/id_rsa_pgpool "
+        ${PGHOME}/bin/psql -p ${NEW_MASTER_NODE_PORT} -c \"SELECT pg_create_physical_replication_slot('${FAILED_NODE_HOST}');\"
+    "
+
+    ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@${FAILED_NODE_HOST} -i ~/.ssh/id_rsa_pgpool "
+
+        set -o errexit
+
+        ${PGHOME}/bin/pg_ctl -w -m f -D ${FAILED_NODE_PGDATA} stop
+
+        cat &gt; ${RECOVERYCONF} &lt;&lt; EOT
+primary_conninfo = 'host=${NEW_MASTER_NODE_HOST} port=${NEW_MASTER_NODE_PORT} user=${REPLUSER} application_name=${FAILED_NODE_HOST} passfile=''/var/lib/pgsql/.pgpass'''
+recovery_target_timeline = 'latest'
+restore_command = 'scp ${NEW_MASTER_NODE_HOST}:${ARCHIVEDIR}/%f %p'
+primary_slot_name = '${FAILED_NODE_HOST}'
+EOT
+
+        if [ ${PGVERSION} -ge 12 ]; then
+            touch ${FAILED_NODE_PGDATA}/standby.signal
+        else
+            echo \"standby_mode = 'on'\" &gt;&gt; ${RECOVERYCONF}
+        fi
+
+        ${PGHOME}/bin/pg_rewind -D ${FAILED_NODE_PGDATA} --source-server=\"user=postgres host=${NEW_MASTER_NODE_HOST} port=${NEW_MASTER_NODE_PORT}\"
+
+    "
+
+    if [ $? -ne 0 ]; then
+        logger -i -p local1.error follow_master.sh: end: pg_rewind failed. Try pg_basebackup.
+
+        ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@${FAILED_NODE_HOST} -i ~/.ssh/id_rsa_pgpool "
+             
+            set -o errexit
+
+            # Execute pg_basebackup
+            rm -rf ${FAILED_NODE_PGDATA}
+            rm -rf ${ARCHIVEDIR}/*
+            ${PGHOME}/bin/pg_basebackup -h ${NEW_MASTER_NODE_HOST} -U $REPLUSER -p ${NEW_MASTER_NODE_PORT} -D ${FAILED_NODE_PGDATA} -X stream
+
+            if [ ${PGVERSION} -ge 12 ]; then
+                sed -i -e \"\\\$ainclude_if_exists = '$(echo ${RECOVERYCONF} | sed -e 's/\//\\\//g')'\" \
+                       -e \"/^include_if_exists = '$(echo ${RECOVERYCONF} | sed -e 's/\//\\\//g')'/d\" ${FAILED_NODE_PGDATA}/postgresql.conf
+            fi
+     
+            cat > ${RECOVERYCONF} &lt;&lt; EOT
+primary_conninfo = 'host=${NEW_MASTER_NODE_HOST} port=${NEW_MASTER_NODE_PORT} user=${REPLUSER} application_name=${FAILED_NODE_HOST} passfile=''/var/lib/pgsql/.pgpass'''
+recovery_target_timeline = 'latest'
+restore_command = 'scp ${NEW_MASTER_NODE_HOST}:${ARCHIVEDIR}/%f %p'
+primary_slot_name = '${FAILED_NODE_HOST}'
+EOT
+
+            if [ ${PGVERSION} -ge 12 ]; then
+                    touch ${FAILED_NODE_PGDATA}/standby.signal
+            else
+                    echo \"standby_mode = 'on'\" &gt;&gt; ${RECOVERYCONF}
+            fi
+        "
+
+        if [ $? -ne 0 ]; then
+            # drop replication slot
+            ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@${NEW_MASTER_NODE_HOST} -i ~/.ssh/id_rsa_pgpool "
+                ${PGHOME}/bin/psql -p ${NEW_MASTER_NODE_PORT} -c \"SELECT pg_drop_replication_slot('${FAILED_NODE_HOST}')\"
+            "
+
+            logger -i -p local1.error follow_master.sh: end: pg_basebackup failed
+            exit 1
+        fi
+    fi
+
+    # start Standby node on ${FAILED_NODE_HOST}
+    ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
+            postgres@${FAILED_NODE_HOST} -i ~/.ssh/id_rsa_pgpool $PGHOME/bin/pg_ctl -l /dev/null -w -D ${FAILED_NODE_PGDATA} start
+
+    # If start Standby successfully, attach this node
+    if [ $? -eq 0 ]; then
+
+        # Run pcp_attact_node to attach Standby node to Pgpool-II.
+        ${PGPOOL_PATH}/pcp_attach_node -w -h localhost -U $PCP_USER -p ${PCP_PORT} -n ${FAILED_NODE_ID}
+
+        if [ $? -ne 0 ]; then
+                logger -i -p local1.error follow_master.sh: end: pcp_attach_node failed
+                exit 1
+        fi
+
+    # If start Standby failed, drop replication slot "${FAILED_NODE_HOST}"
+    else
+
+        ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@${NEW_MASTER_NODE_HOST} -i ~/.ssh/id_rsa_pgpool \
+        ${PGHOME}/bin/psql -p ${NEW_MASTER_NODE_PORT} -c "SELECT pg_drop_replication_slot('${FAILED_NODE_HOST}')"
+
+        logger -i -p local1.error follow_master.sh: end: follow master command failed
+        exit 1
+    fi
+
+else
+    logger -i -p local1.info follow_master.sh: failed_nod_id=${FAILED_NODE_ID} is not running. skipping follow master command
+    exit 0
+fi
+
+logger -i -p local1.info follow_master.sh: end: follow master command complete
+exit 0
diff --git a/src/sample/scripts/pgpool_remote_start b/src/sample/scripts/pgpool_remote_start
new file mode 100755 (executable)
index 0000000..3558a48
--- /dev/null
@@ -0,0 +1,33 @@
+#!/bin/bash
+# This script is run after recovery_1st_stage to start Standby node.
+
+set -o xtrace
+exec &gt; &gt;(logger -i -p local1.info) 2&gt;&1
+
+DEST_NODE_HOST="$1"
+DEST_NODE_PGDATA="$2"
+
+PGHOME=/usr/pgsql-11
+
+logger -i -p local1.info pgpool_remote_start: start: remote start Standby node $DEST_NODE_HOST
+
+## Test passwrodless SSH
+ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@${DEST_NODE_HOST} -i ~/.ssh/id_rsa_pgpool ls /tmp > /dev/null
+
+if [ $? -ne 0 ]; then
+    logger -i -p local1.info pgpool_remote_start: passwrodless SSH to postgres@${DEST_NODE_HOST} failed. Please setup passwrodless SSH.
+    exit 1
+fi
+
+## Start Standby node
+ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@$DEST_NODE_HOST -i ~/.ssh/id_rsa_pgpool "
+    $PGHOME/bin/pg_ctl -l /dev/null -w -D $DEST_NODE_PGDATA start
+"
+
+if [ $? -ne 0 ]; then
+    logger -i -p local1.error pgpool_remote_start: $DEST_NODE_HOST PostgreSQL start failed.
+    exit 1
+fi
+
+logger -i -p local1.info pgpool_remote_start: end: $DEST_NODE_HOST PostgreSQL started successfully.
+exit 0
diff --git a/src/sample/scripts/recovery_1st_stage b/src/sample/scripts/recovery_1st_stage
new file mode 100755 (executable)
index 0000000..96cf992
--- /dev/null
@@ -0,0 +1,84 @@
+#!/bin/bash
+# This script is executed by "recovery_1st_stage" to recovery a Standby node.
+
+set -o xtrace
+exec &gt; &gt;(logger -i -p local1.info) 2&gt;&1
+
+PRIMARY_NODE_PGDATA="$1"
+DEST_NODE_HOST="$2"
+DEST_NODE_PGDATA="$3"
+PRIMARY_NODE_PORT="$4"
+DEST_NODE_ID="$5"
+DEST_NODE_PORT="$6"
+
+PRIMARY_NODE_HOST=$(hostname)
+PGHOME=/usr/pgsql-11
+ARCHIVEDIR=/var/lib/pgsql/archivedir
+REPLUSER=repl
+
+logger -i -p local1.info recovery_1st_stage: start: pg_basebackup for Standby node $DEST_NODE_ID
+
+## Test passwrodless SSH
+ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@${DEST_NODE_HOST} -i ~/.ssh/id_rsa_pgpool ls /tmp > /dev/null
+
+if [ $? -ne 0 ]; then
+    logger -i -p local1.info recovery_1st_stage: passwrodless SSH to postgres@${DEST_NODE_HOST} failed. Please setup passwrodless SSH.
+    exit 1
+fi
+
+## Get PostgreSQL major version
+PGVERSION=`${PGHOME}/bin/initdb -V | awk '{print $3}' | sed 's/\..*//' | sed 's/\([0-9]*\)[a-zA-Z].*/\1/'`
+if [ $PGVERSION -ge 12 ]; then
+    RECOVERYCONF=${DEST_NODE_PGDATA}/myrecovery.conf
+else
+    RECOVERYCONF=${DEST_NODE_PGDATA}/recovery.conf
+fi
+
+## Create replication slot "${DEST_NODE_HOST}"
+${PGHOME}/bin/psql -p ${PRIMARY_NODE_PORT} &lt;&lt; EOQ
+SELECT pg_create_physical_replication_slot('${DEST_NODE_HOST}');
+EOQ
+
+## Execute pg_basebackup to recovery Standby node
+ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@$DEST_NODE_HOST -i ~/.ssh/id_rsa_pgpool "
+
+    set -o errexit
+
+    rm -rf $DEST_NODE_PGDATA
+    rm -rf $ARCHIVEDIR/*
+
+    ${PGHOME}/bin/pg_basebackup -h $PRIMARY_NODE_HOST -U $REPLUSER -p $PRIMARY_NODE_PORT -D $DEST_NODE_PGDATA -X stream
+
+    if [ ${PGVERSION} -ge 12 ]; then
+        sed -i -e \"\\\$ainclude_if_exists = '$(echo ${RECOVERYCONF} | sed -e 's/\//\\\//g')'\" \
+               -e \"/^include_if_exists = '$(echo ${RECOVERYCONF} | sed -e 's/\//\\\//g')'/d\" ${DEST_NODE_PGDATA}/postgresql.conf
+    fi
+
+    cat &gt; ${RECOVERYCONF} &lt;&lt; EOT
+primary_conninfo = 'host=${PRIMARY_NODE_HOST} port=${PRIMARY_NODE_PORT} user=${REPLUSER} application_name=${DEST_NODE_HOST} passfile=''/var/lib/pgsql/.pgpass'''
+recovery_target_timeline = 'latest'
+restore_command = 'scp ${PRIMARY_NODE_HOST}:${ARCHIVEDIR}/%f %p'
+primary_slot_name = '${DEST_NODE_HOST}'
+EOT
+
+    if [ ${PGVERSION} -ge 12 ]; then
+            touch ${DEST_NODE_PGDATA}/standby.signal
+    else
+            echo \"standby_mode = 'on'\" &gt;&gt; ${RECOVERYCONF}
+    fi
+
+    sed -i \"s/#*port = .*/port = ${DEST_NODE_PORT}/\" ${DEST_NODE_PGDATA}/postgresql.conf
+"
+
+if [ $? -ne 0 ]; then
+
+    ${PGHOME}/bin/psql -p ${PRIMARY_NODE_PORT} &lt;&lt; EOQ
+SELECT pg_drop_replication_slot('${DEST_NODE_HOST}');
+EOQ
+
+    logger -i -p local1.error recovery_1st_stage: end: pg_basebackup failed. online recovery failed
+    exit 1
+fi
+
+logger -i -p local1.info recovery_1st_stage: end: recovery_1st_stage complete
+exit 0
diff --git a/src/sample/scripts/recovery_2nd_stage b/src/sample/scripts/recovery_2nd_stage
new file mode 100755 (executable)
index 0000000..64eae32
--- /dev/null
@@ -0,0 +1,21 @@
+#! /bin/sh
+# Online recovery 2nd stage script
+#
+DATADIR=$1             # master dabatase cluster
+DEST=$2                        # hostname of the DB node to be recovered
+DESTDIR=$3             # database cluster of the DB node to be recovered
+PORT=$4                    # PostgreSQL port number
+
+PGHOME=/usr/pgsql-11
+ARCHIVEDIR=/var/lib/pgsql/archivedir  # archive log directory
+
+# Force to flush current value of sequences to xlog 
+${PGHOME}/bin/psql -p $PORT -t -c 'SELECT datname FROM pg_database WHERE NOT datistemplate AND datallowconn' template1|
+while read i
+do
+  if [ "$i" != "" ]; then
+    psql -p $PORT -c "SELECT setval(oid, nextval(oid)) FROM pg_class WHERE relkind = 'S'" $i
+  fi
+done
+
+psql -p $PORT -c "SELECT pgpool_switch_xlog('$ARCHIVEDIR')" template1