From d2ba00520d6ef2e8a911ee6f3ce96d13df41319a Mon Sep 17 00:00:00 2001 From: Bo Peng Date: Mon, 13 Nov 2023 09:57:30 +0900 Subject: [PATCH] Doc: update English Configuration Example "8.2. Pgpool-II + Watchdog Setup Example" to Pgpool-II 4.5 and PostgreSQL 16. Several enhancements are also added. --- doc/src/sgml/example-cluster.sgml | 791 +++++++++++++----------- src/sample/scripts/escalation.sh.sample | 2 +- 2 files changed, 446 insertions(+), 347 deletions(-) diff --git a/doc/src/sgml/example-cluster.sgml b/doc/src/sgml/example-cluster.sgml index e16260708..d2089ed6c 100644 --- a/doc/src/sgml/example-cluster.sgml +++ b/doc/src/sgml/example-cluster.sgml @@ -7,7 +7,7 @@ servers to create a robust cluster system and avoid the single point of failure or split brain. - PostgreSQL 15 is used in this configuration example. + PostgreSQL 16 is used in this configuration example. All scripts have been tested with PostgreSQL 10 and later. @@ -20,8 +20,9 @@ Cluster System Configuration - We use 3 servers with CentOS 7.9 installed. Let these servers be server1 - server2, server3. + We use three servers with Rocky Linux 8 installed and + the hostnames of the three servers are server1 + server2 and server3 respectively. We install PostgreSQL and Pgpool-II on each server. @@ -36,7 +37,7 @@ - The roles of Active, Standby, Primary, + The roles of Leader, Standby, Primary, Standby are not fixed and may be changed by further operations. @@ -53,16 +54,16 @@ server1 - 192.168.137.101 - 192.168.137.150 + 192.168.100.51 + 192.168.100.50 server2 - 192.168.137.102 + 192.168.100.52 server3 - 192.168.137.103 + 192.168.100.53 @@ -81,7 +82,7 @@ PostgreSQL Version - 15.0 + 16.0 - @@ -91,7 +92,7 @@ $PGDATA - /var/lib/pgsql/15/data + /var/lib/pgsql/16/data - @@ -121,7 +122,7 @@ Pgpool-II Version - 4.3.0 + 4.5.0 - @@ -147,9 +148,9 @@ Pgpool-II config file - Pgpool-II start user + User running Pgpool-II postgres (Pgpool-II 4.1 or later) - Pgpool-II 4.0 or before, the default startup user is root + Pgpool-II 4.0 or before, the default user running Pgpool-II is root Running mode @@ -178,26 +179,26 @@ Failover - /etc/pgpool-II/sample_scripts/failover.sh.sample + /etc/pgpool-II/sample_scripts/failover.sh.sample Run by to perform failover - /etc/pgpool-II/sample_scripts/follow_primary.sh.sample + /etc/pgpool-II/sample_scripts/follow_primary.sh.sample Run by to synchronize the Standby with the new Primary after failover. Online recovery - /etc/pgpool-II/sample_scripts/recovery_1st_stage.sample + /etc/pgpool-II/sample_scripts/recovery_1st_stage.sample Run by to recovery a Standby node - /etc/pgpool-II/sample_scripts/pgpool_remote_start.sample + /etc/pgpool-II/sample_scripts/pgpool_remote_start.sample Run after to start the Standby node Watchdog - /etc/pgpool-II/sample_scripts/escalation.sh.sample - Run by to switch the Active/Standby Pgpool-II safely + /etc/pgpool-II/sample_scripts/escalation.sh.sample + Optional Configuration. Run by to switch the Leader/Standby Pgpool-II safely @@ -217,8 +218,9 @@ Install PostgreSQL from PostgreSQL YUM repository. -[all servers]# yum install -y https://download.postgresql.org/pub/repos/yum/reporpms/EL-7-x86_64/pgdg-redhat-repo-latest.noarch.rpm -[all servers]# yum install -y postgresql15-server +[all servers]# dnf install -y https://download.postgresql.org/pub/repos/yum/reporpms/EL-8-x86_64/pgdg-redhat-repo-latest.noarch.rpm +[all servers]# dnf -qy module disable postgresql +[all servers]# dnf install -y postgresql16-server Since Pgpool-II related packages are also included in PostgreSQL YUM repository, @@ -236,6 +238,10 @@ ... exclude=pgpool* +[pgdg16] +... +exclude=pgpool* + [pgdg15] ... exclude=pgpool* @@ -254,10 +260,6 @@ exclude=pgpool* [pgdg11] ... -exclude=pgpool* - -[pgdg10] -... exclude=pgpool* @@ -265,8 +267,8 @@ exclude=pgpool* Install Pgpool-II from Pgpool-II YUM repository. -[all servers]# yum install -y https://www.pgpool.net/yum/rpms/4.4/redhat/rhel-7-x86_64/pgpool-II-release-4.4-1.noarch.rpm -[all servers]# yum install -y pgpool-II-pg15-* +[all servers]# dnf install -y https://www.pgpool.net/yum/rpms/4.5/redhat/rhel-8-x86_64/pgpool-II-release-4.5-1.noarch.rpm +[all servers]# dnf install -y pgpool-II-pg16-* @@ -276,28 +278,28 @@ exclude=pgpool* Before you start the configuration process, please check the following prerequisites. - - - - Set up PostgreSQL streaming replication on the primary server. - In this example, we use WAL archiving. - - - First, we create the directory /var/lib/pgsql/archivedir to store - WAL segments on all servers. In this example, only Primary node archives - WAL locally. - - + + Setting up streaming replication on Primary + + Set up PostgreSQL streaming replication on the primary server. + In this example, we use WAL archiving. + + + First, we create the directory /var/lib/pgsql/archivedir to store + WAL segments on all servers. In this example, only Primary node archives + WAL locally. + + [all servers]# su - postgres [all servers]$ mkdir /var/lib/pgsql/archivedir - + - - Initialize PostgreSQL on the primary server. - - + + Initialize PostgreSQL on the primary server. + + [server1]# su - postgres -[server1]$ /usr/pgsql-15/bin/initdb -D $PGDATA +[server1]$ /usr/pgsql-16/bin/initdb -D $PGDATA @@ -317,15 +319,50 @@ hot_standby = on wal_log_hints = on - We use the online recovery functionality of Pgpool-II to setup standby server after the primary server is started. + Start PostgreSQL primary server on server1. - + +[server1]# su - postgres +[server1]$ /usr/pgsql-16/bin/pg_ctl start -D $PGDATA + + - - - Because of the security reasons, we create a user repl solely used - for replication purpose, and a user pgpool for streaming - replication delay check and health check of Pgpool-II. + + Setting up streaming replication on Standby + + There are multiple methods to setup a standby server, such as: + + + + use pg_basebackup to backup the data directory of the primary from the standby. + + + + + use Pgpool-II's online recovery feature + () to automatically + setup a standby server. + + + + + + In this example, we use Pgpool-II's + online recovery to setup the standby server + in section + after the configuration of Pgpool-II + is completed. + + + + + Setting up PostgreSQL users + + Because of the security reasons, we create a dedicated user named repl + for replication and a dedicated user named pgpool for streaming + replication delay check and health check of Pgpool-II. + Because online recovery feature requires superuser privilege, + we use postgres user here. @@ -333,21 +370,21 @@ wal_log_hints = on - User Name - Password - Detail + User Name + Password + Detail - repl - repl - PostgreSQL replication user + repl + repl + PostgreSQL replication user - pgpool - pgpool - Pgpool-II health check () and replication delay check () user + pgpool + pgpool + Pgpool-II health check () and replication delay check () user postgres @@ -369,11 +406,13 @@ postgres=# \password postgres - If you want to show "replication_state" and "replication_sync_state" column in - command result, role pgpool - needs to be PostgreSQL super user or or in pg_monitor group - (Pgpool-II 4.1 or later). Grant pg_monitor - to pgpool: + To show replication_state and + replication_sync_state column in + command result, role + pgpool needs to be PostgreSQL superuser or in + pg_monitor group (Pgpool-II 4.1 or later). + Grant pg_monitor + to pgpool: GRANT pg_monitor TO pgpool; @@ -381,8 +420,8 @@ GRANT pg_monitor TO pgpool; If you plan to use (Pgpool-II 4.0 or later), - role "pgpool" needs to be PostgreSQL super user or - or in "pg_monitor" group to use this feature. + role "pgpool" needs to be PostgreSQL superuser + or in pg_monitor group to use this feature. @@ -391,108 +430,99 @@ GRANT pg_monitor TO pgpool; enable scram-sha-256 authentication method. -host all all samenet scram-sha-256 -host replication all samenet scram-sha-256 - - - - - - To use the automated failover and online recovery of Pgpool-II, - it is requried to allow SSH public key authentication - (passwordless SSH login) to all backend servers - as Pgpool-II startup user - (default is postgres. Pgpool-II 4.0 or before, default is root) - and PostgreSQL startup user (default is postgres). - - - First, set postgres user's passwrod. - - -[all servers]# passwd postgres +host all pgpool samenet scram-sha-256 +host all postgres samenet scram-sha-256 +host replication repl samenet scram-sha-256 - - Execute the following command on all servers - to set up passwordless SSH. - In this example, we assume that the generated key file name is - id_rsa_pgpool. - - -[all servers]# mkdir ~/.ssh -[all servers]# chmod 700 ~/.ssh -[all servers]# cd ~/.ssh -[all servers]# ssh-keygen -t rsa -f id_rsa_pgpool -[all servers]# ssh-copy-id -i id_rsa_pgpool.pub postgres@server1 -[all servers]# ssh-copy-id -i id_rsa_pgpool.pub postgres@server2 -[all servers]# ssh-copy-id -i id_rsa_pgpool.pub postgres@server3 + + + Setting up SSH public key authentication + + To use the automated failover and online recovery of Pgpool-II, + it is requried to configure SSH public key authentication + (passwordless SSH login) to all backend servers using + postgres user (the default user Pgpool-II is running as. + Pgpool-II 4.0 or before, the default user is root). + + + Execute the following command on all servers to generate a key pair using + the RSA algorithm. In this example, we assume that the generated key file + name is id_rsa_pgpool. + + [all servers]# su - postgres [all servers]$ mkdir ~/.ssh [all servers]$ chmod 700 ~/.ssh [all servers]$ cd ~/.ssh [all servers]$ ssh-keygen -t rsa -f id_rsa_pgpool -[all servers]$ ssh-copy-id -i id_rsa_pgpool.pub postgres@server1 -[all servers]$ ssh-copy-id -i id_rsa_pgpool.pub postgres@server2 -[all servers]$ ssh-copy-id -i id_rsa_pgpool.pub postgres@server3 - - - After setting SSH, make sure that you can run - ssh postgres@serverX -i ~/.ssh/id_rsa_pgpool command - as Pgpool-II startup user and - PostgreSQL startup user to - log in without entering a password. - + + + Then add the public key id_rsa_pgpool.pub to + /var/lib/pgsql/.ssh/authorized_keys file + on each server. + + + After setting SSH, make sure that you can run + ssh postgres@serverX -i ~/.ssh/id_rsa_pgpool command + as postgres user to login to each server + without entering a password. + - - - If you failed to login using SSH public key authentication, please check the following: - - - - Ensure the public key authentication option PubkeyAuthentication are allowed in /etc/ssh/sshd_config: - - - - + + + If you failed to login using SSH public key authentication, please check the following: + + + + Ensure the public key authentication option PubkeyAuthentication are allowed in /etc/ssh/sshd_config: + + + + PubkeyAuthentication yes - - - - + + + + if the password authentication is disabled, you can fail to run ssh-copy-id, temporarily add the following configuration in /etc/ssh/sshd_config. - - - - + + + + PasswordAuthentication yes - - - - - If SELinux is enabled, SSH public key authentication (passwordless SSH) may fail. - You need to run the following command on all servers. - - - - + + + + + If SELinux is enabled, SSH public key authentication (passwordless SSH) may fail. + You need to run the following command on all servers. + + + + [all servers]# su - postgres [all servers]$ restorecon -Rv ~/.ssh - - - - - - - - To allow repl user without specifying password for streaming - replication and online recovery, and execute pg_rewind - using postgres, we create the .pgpass file - in postgres user's home directory and change the permission to - 600 on each PostgreSQL server. + - + + + + + Creating .pgpass + + To allow repl user without specifying password for streaming + replication and online recovery, and execute pg_rewind + using postgres, we + Create the .pgpass file in postgres + user's home directory and change the permission to 600 + on each PostgreSQL server. + This file allows repl user and postgres + user without providing a password for streaming replication and failover. + + [all servers]# su - postgres [all servers]$ vi /var/lib/pgsql/.pgpass server1:5432:replication:repl:<repl user password> @@ -501,21 +531,21 @@ server3:5432:replication:repl:<repl user password> server1:5432:postgres:postgres:<postgres user password> server2:5432:postgres:postgres:<postgres user password> server3:5432:postgres:postgres:<postgres user password> -[all servers]$ chmod 600 /var/lib/pgsql/.pgpass - - +[all servers]$ chmod 600 /var/lib/pgsql/.pgpass + + - - - When connect to Pgpool-II and PostgreSQL servers, the target port must be accessible by enabling firewall management softwares. Following is an example for CentOS/RHEL7. - - + + Setting up firewall + + When connect to Pgpool-II and PostgreSQL servers, the target port must be accessible by enabling firewall management softwares. Following is an example for Rocky Linux 8/RHEL 8. + + [all servers]# firewall-cmd --permanent --zone=public --add-service=postgresql [all servers]# firewall-cmd --permanent --zone=public --add-port=9999/tcp --add-port=9898/tcp --add-port=9000/tcp --add-port=9694/udp [all servers]# firewall-cmd --reload - - - + + @@ -558,6 +588,28 @@ server3:5432:postgres:postgres:<postgres user password> + + PCP connection authentication + + To use PCP commands PCP user names and md5 encrypted passwords must be + declared in pcp.conf in format + "username:encrypted password". + + + In this example, we set PCP username to "pgpool" + and password to "pgpool_password". + Use to create the encrypted password entry for + pgpool user as below: + + +[all servers]# echo 'pgpool:'`pg_md5 pgpool_password` >> /etc/pgpool-II/pcp.conf + +[all servers]# cat /etc/pgpool-II/pcp.conf +# USERID:MD5PASSWD +pgpool:4aa0cb9673e84b06d4c8a848c80eb5d0 + + + <productname>Pgpool-II</productname> Configuration @@ -586,10 +638,12 @@ backend_clustering_mode = 'streaming_replication' listen_addresses - To allow Pgpool-II to accept all incoming connections, we set listen_addresses = '*'. + To allow Pgpool-II and PCP to accept all incoming connections, set the following + parameters to '*'. listen_addresses = '*' +pcp_listen_addresses = '*' @@ -651,25 +705,31 @@ health_check_max_retries = 3 backend_hostname0 = 'server1' backend_port0 = 5432 backend_weight0 = 1 -backend_data_directory0 = '/var/lib/pgsql/15/data' +backend_data_directory0 = '/var/lib/pgsql/16/data' backend_flag0 = 'ALLOW_TO_FAILOVER' backend_hostname1 = 'server2' backend_port1 = 5432 backend_weight1 = 1 -backend_data_directory1 = '/var/lib/pgsql/15/data' +backend_data_directory1 = '/var/lib/pgsql/16/data' backend_flag1 = 'ALLOW_TO_FAILOVER' backend_hostname2 = 'server3' backend_port2 = 5432 backend_weight2 = 1 -backend_data_directory2 = '/var/lib/pgsql/15/data' +backend_data_directory2 = '/var/lib/pgsql/16/data' backend_flag2 = 'ALLOW_TO_FAILOVER' - To show "replication_state" and "replication_sync_state" column in - command result, parameter is required. - Here we specify each backend's hostname in these parameters. (Pgpool-II 4.1 or later) + To show replication_state and + replication_sync_state column in + command result, + parameter is required. + Here we specify each backend's hostname in these parameters + (Pgpool-II 4.1 or later). + Make sure that the value set in backend_application_nameX + matches the value set in application_name + of primary_conninfo. ... @@ -684,19 +744,21 @@ backend_application_name2 = 'server3' Failover configuration - Specify failover.sh script to be executed after failover in failover_command - parameter. - If we use 3 PostgreSQL servers, we need to specify follow_primary_command to run after failover on the primary node failover. - In case of two PostgreSQL servers, follow_primary_command setting is not necessary. + Specify the script that will be executed when failover occurs in + . When using three or more + PostgreSQL servers, it's required to specify + to synchronize the standby + with the new primary. In case of two PostgreSQL servers, the setting of + is not required. Pgpool-II replaces the following special characters with the backend specific information while executing the scripts. - See for more details about each character. + See and for more details about each character. failover_command = '/etc/pgpool-II/failover.sh %d %h %p %D %m %H %M %P %r %R %N %S' -follow_primary_command = '/etc/pgpool-II/follow_primary.sh %d %h %p %D %m %H %M %P %r %R +follow_primary_command = '/etc/pgpool-II/follow_primary.sh %d %h %p %D %m %H %M %P %r %R' @@ -705,8 +767,8 @@ follow_primary_command = '/etc/pgpool-II/follow_primary.sh %d %h %p %D %m %H %M - Sample scripts failover.sh - and follow_primary.sh + Sample scripts failover.sh + and follow_primary.sh are installed in /etc/pgpool-II/. Create failover scripts using these sample files. @@ -715,28 +777,25 @@ follow_primary_command = '/etc/pgpool-II/follow_primary.sh %d %h %p %D %m %H %M [all servers]# chown postgres:postgres /etc/pgpool-II/{failover.sh,follow_primary.sh} - Basically, it should work if you change PGHOME according to PostgreSQL installation directory. + Basically, it should work if you change PGHOME according to + PostgreSQL installation directory. [all servers]# vi /etc/pgpool-II/failover.sh ... -PGHOME=/usr/pgsql-15 +PGHOME=/usr/pgsql-16 ... [all servers]# vi /etc/pgpool-II/follow_primary.sh ... -PGHOME=/usr/pgsql-15 +PGHOME=/usr/pgsql-16 ... - Since user authentication is required to use the PCP command in - follow_primary_command script, - we need to specify user name and md5 encrypted password in pcp.conf - in format "username:encrypted password". - - - if pgpool user is specified in PCP_USER in follow_primary.sh, + Make sure the entry of the PCP user specified in PCP_USER in + follow_primary.sh is created in pcp.conf. + In this example, we have created in # cat /etc/pgpool-II/follow_primary.sh @@ -745,19 +804,20 @@ PCP_USER=pgpool ... - then we use to create the encrypted password entry for pgpool user as below: + Since follow_primary.sh script must execute PCP command without + entering a password, we need to create .pcppass in + postgres user's home directory on each server + (the home directory of the user Pgpool-II is running as). + The format of .pcppass is + "hostname:port:username:password". - -[all servers]# echo 'pgpool:'`pg_md5 PCP password` >> /etc/pgpool-II/pcp.conf - - Since follow_primary.sh script must execute PCP command without entering a - password, we need to create .pcppass in the home directory of - Pgpool-II startup user (postgres user) on each server. + In this example, we assume that the PCP user is pgpool + and the password is pgpool_password. [all servers]# su - postgres -[all servers]$ echo 'localhost:9898:pgpool:<pgpool user password>' > ~/.pcppass +[all servers]$ echo 'localhost:9898:pgpool:pgpool_password' > ~/.pcppass [all servers]$ chmod 600 ~/.pcppass @@ -771,14 +831,13 @@ PCP_USER=pgpool Pgpool-II Online Recovery Configurations - Next, in order to perform online recovery with Pgpool-II we specify - the PostgreSQL user name and online recovery command - recovery_1st_stage. + Next, configure the required parameters to perform online recovery. Because Superuser privilege in PostgreSQL - is required for performing online recovery, we specify postgres user in . - Then, we create recovery_1st_stage and pgpool_remote_start - in database cluster directory of PostgreSQL primary server (server1), and add execute permission. - + is required for performing online recovery, we specify postgres + user in . In this example, we leave + empty, and create the entry in + . See + for how to create the entry in . recovery_user = 'postgres' @@ -786,27 +845,32 @@ recovery_password = '' recovery_1st_stage_command = 'recovery_1st_stage' - Online recovery sample scriptsrecovery_1st_stage - and pgpool_remote_start + Then, we create recovery_1st_stage and pgpool_remote_start + in database cluster directory of PostgreSQL + primary server (server1). + + + The sample scripts of online recovery recovery_1st_stage + and pgpool_remote_start are installed in /etc/pgpool-II/. Copy these files to the data directory of the primary server (server1). -[server1]# cp -p /etc/pgpool-II/sample_scripts/recovery_1st_stage.sample /var/lib/pgsql/15/data/recovery_1st_stage -[server1]# cp -p /etc/pgpool-II/sample_scripts/pgpool_remote_start.sample /var/lib/pgsql/15/data/pgpool_remote_start -[server1]# chown postgres:postgres /var/lib/pgsql/15/data/{recovery_1st_stage,pgpool_remote_start} +[server1]# cp -p /etc/pgpool-II/sample_scripts/recovery_1st_stage.sample /var/lib/pgsql/16/data/recovery_1st_stage +[server1]# cp -p /etc/pgpool-II/sample_scripts/pgpool_remote_start.sample /var/lib/pgsql/16/data/pgpool_remote_start +[server1]# chown postgres:postgres /var/lib/pgsql/16/data/{recovery_1st_stage,pgpool_remote_start} Basically, it should work if you change PGHOME according to PostgreSQL installation directory. -[server1]# vi /var/lib/pgsql/15/data/recovery_1st_stage +[server1]# vi /var/lib/pgsql/16/data/recovery_1st_stage ... -PGHOME=/usr/pgsql-15 +PGHOME=/usr/pgsql-16 ... -[server1]# vi /var/lib/pgsql/15/data/pgpool_remote_start +[server1]# vi /var/lib/pgsql/16/data/pgpool_remote_start ... -PGHOME=/usr/pgsql-15 +PGHOME=/usr/pgsql-16 ... @@ -845,12 +909,16 @@ PGHOME=/usr/pgsql-15 enable_pool_hba = on - The format of pool_hba.conf file follows very closely PostgreSQL's - pg_hba.conf format. Set pgpool and postgres user's authentication method to scram-sha-256. + The format of pool_hba.conf file follows + PostgreSQL's pg_hba.conf format very closely. + Set pgpool and postgres user's + authentication method to scram-sha-256. In this example, + it is assumed that the application connecting to Pgpool-II + is in the same subnet. -host all pgpool 0.0.0.0/0 scram-sha-256 -host all postgres 0.0.0.0/0 scram-sha-256 +host all pgpool samenet scram-sha-256 +host all postgres samenet scram-sha-256 @@ -861,11 +929,11 @@ host all postgres 0.0.0.0/0 scram-sha-256 The default password file name for authentication is . - To use scram-sha-256 authentication, the decryption key to decrypt the passwords - is required. We create the .pgpoolkey file in Pgpool-II - start user postgres's (Pgpool-II 4.1 or later) home directory. - (Pgpool-II 4.0 or before, by default Pgpool-II - is started as root) + To use scram-sha-256 authentication, the decryption key to + decrypt the passwords is required. We create the .pgpoolkey + file in postgres user's home directory + (the user Pgpool-II is running as. Pgpool-II 4.0 or before, + Pgpool-II is running as root by default) [all servers]# su - postgres [all servers]$ echo 'some string' > ~/.pgpoolkey @@ -900,12 +968,11 @@ postgres:AESHs/pWL5rtXy2IwuzroHfqg== use_watchdog = on - Specify virtual IP address that accepts connections from clients on - server1, server2, server3. - Ensure that the IP address set to virtual IP isn't used yet. + Set virtual IP address to . + Ensure that the IP address isn't used yet. -delegate_ip = '192.168.137.150' +delegate_ip = '192.168.100.50' @@ -913,7 +980,8 @@ delegate_ip = '192.168.137.150' The network interface used in this example is "enp0s8". Since root privilege is required to execute if_up/down_cmd or arping_cmd command, use setuid on these command or allow - Pgpool-II startup user, postgres user (Pgpool-II 4.1 or later) to run sudo command without a password. + postgres user (the user Pgpool-II is running as) to run + sudo command without a password. @@ -937,7 +1005,7 @@ arping_cmd = '/usr/bin/sudo /usr/sbin/arping -U $_IP_$ -w 1 -I enp0s8' If "Defaults requiretty" is set in the /etc/sudoers, - please ensure that the pgpool startup user can execute the if_up_cmd, if_down_cmd and arping_cmd command without a tty. + please ensure that the user that Pgpool-II is running as can execute the if_up_cmd, if_down_cmd and arping_cmd command without a tty. @@ -968,7 +1036,7 @@ wd_port2 = 9000 pgpool_port2 = 9999 - Specify the method of lifecheck + Configure the method of lifecheck and the lifecheck interval . Here, we use heartbeat method to perform watchdog lifecheck. @@ -991,7 +1059,7 @@ heartbeat_port2 = 9694 heartbeat_device2 = '' - If the is set to heartbeat, + If is set to heartbeat, specify the time to detect a fault and the interval to send heartbeat signals . @@ -1001,14 +1069,18 @@ wd_heartbeat_deadtime = 30 - When Watchdog process is abnormally terminated, the virtual IP may be "up" on both of the old and new active pgpool nodes. - To prevent this, configure to bring down the virtual IP on other pgpool nodes before bringing up the virtual IP on the new active pgpool node. + This setting is optional. + When Watchdog process is abnormally terminated, + the virtual IP may be "up" on both of the old and new active pgpool nodes. + To prevent this, configure + to bring down the virtual IP on other Pgpool-II nodes before + bringing up the virtual IP on the new leader Pgpool-II node. wd_escalation_command = '/etc/pgpool-II/escalation.sh' - The sample script escalation.sh is installed in /etc/pgpool-II/. + The sample script escalation.sh is installed in /etc/pgpool-II/. [all servers]# cp -p /etc/pgpool-II/sample_scripts/escalation.sh.sample /etc/pgpool-II/escalation.sh @@ -1017,15 +1089,15 @@ wd_escalation_command = '/etc/pgpool-II/escalation.sh' Basically, it should work if you change the following variables according to your environment. - PGPOOL is tha array of the hostname that running Pgpool-II. - VIP is the virtual IP address that you set as delegate_ip. + PGPOOLS is a list of hostnames where Pgpool-II is running. + VIP is the virtual IP address that is set to . DEVICE is the network interface for the virtual IP. [all servers]# vi /etc/pgpool-II/escalation.sh ... PGPOOLS=(server1 server2 server3) -VIP=192.168.137.150 +VIP=192.168.100.50 DEVICE=enp0s8 ... @@ -1037,8 +1109,8 @@ DEVICE=enp0s8 - If use_watchdog = on, please make sure the pgpool node number is specified - in pgpool_node_id file. + If use_watchdog = on, please make sure the pgpool + node number is specified in pgpool_node_id file. See for details. @@ -1068,8 +1140,9 @@ log_rotation_size = 10MB - The configuration of pgpool.conf on server1 is completed. Copy the pgpool.conf - to other Pgpool-II nodes (server2 and server3). + The configuration of pgpool.conf on server1 is completed. + Copy the pgpool.conf to other + Pgpool-II nodes (server2 and server3). [server1]# scp -p /etc/pgpool-II/pgpool.conf root@server2:/etc/pgpool-II/pgpool.conf @@ -1078,132 +1151,154 @@ log_rotation_size = 10MB - - Starting/Stopping Pgpool-II - - Next we start Pgpool-II. Before starting - Pgpool-II, please start - PostgreSQL servers first. - Also, when stopping PostgreSQL, it is necessary to - stop Pgpool-II first. - - - - - Starting Pgpool-II - - - In section Before Starting, - we already set the auto-start of Pgpool-II. To start - Pgpool-II, restart the whole system or execute the following command. - - -# systemctl start pgpool.service - - - - - Stopping Pgpool-II - - -# systemctl stop pgpool.service - - - - - How to use Let's start to use Pgpool-II. - First, we start the primary PostgreSQL. - + + Starting/Stopping Pgpool-II + + + + Starting Pgpool-II + + + First, let's start Pgpool-II. + + + Before starting Pgpool-II, + the PostgreSQL primary server must be already running. + If PostgreSQL primary server is not running, start it first + using the following command. + + [server1]# su - postgres -[server1]$ /usr/pgsql-15/bin/pg_ctl start -D $PGDATA - - - Then let's start Pgpool-II on server1, - server2, server3 by using the following command. - - -# systemctl start pgpool.service - +[server1]$ /usr/pgsql-16/bin/pg_ctl start -D $PGDATA + + + Start Pgpool-II on server1, + server2, server3 by using the following command. + + +[all servers]# systemctl start pgpool.service + + + + + Stopping Pgpool-II + + + When stopping PostgreSQL, + Pgpool-II must be stopped first. + + +[all servers]# systemctl stop pgpool.service + + + + - Set up PostgreSQL standby server + Setting up PostgreSQL standby server First, we should set up PostgreSQL standby server by - using Pgpool-II online recovery functionality. Ensure - that recovery_1st_stage and pgpool_remote_start - scripts used by pcp_recovery_node command are in database - cluster directory of PostgreSQL primary server (server1). + using Pgpool-II online recovery functionality. + + + Connect to Pgpool-II via virtual IP to check the status of backend nodes. + As shown in the result, primary server is running on server1, + standby servers on server2 and server3 + are in "down" status. -# pcp_recovery_node -h 192.168.137.150 -p 9898 -U pgpool -n 1 +[any server]# psql -h 192.168.100.50 -p 9999 -U pgpool postgres -c "show pool_nodes" +Password for user pgpool: + node_id | hostname | port | status | pg_status | lb_weight | role | pg_role | select_cnt | load_balance_node | replication_delay | replication_state | replication_sync_state | last_status_change +---------+----------+------+--------+-----------+-----------+---------+---------+------------+-------------------+-------------------+-------------------+------------------------+--------------------- + 0 | server1 | 5432 | up | up | 0.333333 | primary | primary | 0 | true | 0 | | | 2023-11-10 15:30:14 + 1 | server2 | 5432 | down | down | 0.333333 | standby | unknown | 0 | false | 0 | | | 2023-11-10 15:30:14 + 2 | server3 | 5432 | down | down | 0.333333 | standby | unknown | 0 | false | 0 | | | 2023-11-10 15:30:14 +(3 rows) + + + Before running command, + ensure that recovery_1st_stage and + pgpool_remote_start scripts exist in the + data directory of PostgreSQL + primary server (server1). + + +[any server]# pcp_recovery_node -h 192.168.100.50 -p 9898 -U pgpool -n 1 -W Password: pcp_recovery_node -- Command Successful -# pcp_recovery_node -h 192.168.137.150 -p 9898 -U pgpool -n 2 +[any server]# pcp_recovery_node -h 192.168.100.50 -p 9898 -U pgpool -n 2 -W Password: pcp_recovery_node -- Command Successful After executing pcp_recovery_node command, - verify that server2 and server3 - are started as PostgreSQL standby server. + verify that PostgreSQL standby servers + are running on server2 and server3. -# psql -h 192.168.137.150 -p 9999 -U pgpool postgres -c "show pool_nodes" +[any server]# psql -h 192.168.100.50 -p 9999 -U pgpool postgres -c "show pool_nodes" Password for user pgpool node_id | hostname | port | status | pg_status | lb_weight | role | pg_role | select_cnt | load_balance_node | replication_delay | replication_state | replication_sync_state | last_status_change ---------+----------+------+--------+-----------+-----------+---------+---------+------------+-------------------+-------------------+-------------------+------------------------+--------------------- - 0 | server1 | 5432 | up | up | 0.333333 | primary | primary | 0 | false | 0 | | | 2021-10-19 07:00:57 - 1 | server2 | 5432 | up | up | 0.333333 | standby | standby | 0 | true | 0 | streaming | async | 2021-10-19 07:00:57 - 2 | server3 | 5432 | up | up | 0.333333 | standby | standby | 0 | false | 0 | streaming | async | 2021-10-19 07:00:57 + 0 | server1 | 5432 | up | up | 0.333333 | primary | primary | 0 | false | 0 | | | 2023-11-10 15:30:14 + 1 | server2 | 5432 | up | up | 0.333333 | standby | standby | 0 | true | 0 | streaming | async | 2023-11-10 16:32:33 + 2 | server3 | 5432 | up | up | 0.333333 | standby | standby | 0 | false | 0 | streaming | async | 2023-11-10 16:33:08 (3 rows) - Switching active/standby watchdog + Switching leader/standby watchdog - Confirm the watchdog status by using pcp_watchdog_info. The Pgpool-II server which is started first run as LEADER. + Confirm the watchdog status by using pcp_watchdog_info. The Pgpool-II server which is started first runs as LEADER. -# pcp_watchdog_info -h 192.168.137.150 -p 9898 -U pgpool +[any server]# pcp_watchdog_info -h 192.168.100.50 -p 9898 -U pgpool -W Password: 3 3 YES server1:9999 Linux server1 server1 -server1:9999 Linux server1 server1 9999 9000 4 LEADER 0 MEMBER #The Pgpool-II server started first became "LEADER". -server2:9999 Linux server2 server2 9999 9000 7 STANDBY 0 MEMBER #run as standby -server3:9999 Linux server3 server3 9999 9000 7 STANDBY 0 MEMBER #run as standby +server1:9999 Linux server1 server1 9999 9000 4 LEADER 0 MEMBER # The Pgpool-II server started first becames "LEADER". +server2:9999 Linux server2 server2 9999 9000 7 STANDBY 0 MEMBER # running as STANDBY +server3:9999 Linux server3 server3 9999 9000 7 STANDBY 0 MEMBER # running as STANDBY - Stop active server server1, then server2 or - server3 will be promoted to active server. To stop - server1, we can stop Pgpool-II - service or shutdown the whole system. Here, we stop Pgpool-II service. + If the LEADER Pgpool-II + on server1 goes down, standby + Pgpool-II on server2 or + server3 will become the new LEADER. + + + To verify this behavior, you may stop Pgpool-II + service or shutdown the whole system. Here, we stop + Pgpool-II service. [server1]# systemctl stop pgpool.service -# pcp_watchdog_info -p 9898 -h 192.168.137.150 -U pgpool +[server1]# pcp_watchdog_info -p 9898 -h 192.168.100.50 -U pgpool -W Password: 3 3 YES server2:9999 Linux server2 server2 -server2:9999 Linux server2 server2 9999 9000 4 LEADER 0 MEMBER #server2 is promoted to LEADER -server1:9999 Linux server1 server1 9999 9000 10 SHUTDOWN 0 MEMBER #server1 is stopped -server3:9999 Linux server3 server3 9999 9000 7 STANDBY 0 MEMBER #server3 runs as STANDBY +server2:9999 Linux server2 server2 9999 9000 4 LEADER 0 MEMBER # server2 becomes LEADER +server1:9999 Linux server1 server1 9999 9000 10 SHUTDOWN 0 MEMBER # server1 is stopped +server3:9999 Linux server3 server3 9999 9000 7 STANDBY 0 MEMBER # server3 is running as a STANDBY - Start Pgpool-II (server1) which we have stopped again, - and verify that server1 runs as a standby. + Restart the stopped Pgpool-II + on server1 and verify that it is running + as a STANDBY. [server1]# systemctl start pgpool.service -[server1]# pcp_watchdog_info -p 9898 -h 192.168.137.150 -U pgpool +[server1]# pcp_watchdog_info -p 9898 -h 192.168.100.50 -U pgpool -W Password: 3 3 YES server2:9999 Linux server2 server2 @@ -1216,43 +1311,48 @@ server3:9999 Linux server3 server3 9999 9000 7 STANDBY 0 MEMBER Failover - First, use psql to connect to PostgreSQL via virtual IP, + First, use psql to connect to + PostgreSQL via virtual IP, and verify the backend information. -# psql -h 192.168.137.150 -p 9999 -U pgpool postgres -c "show pool_nodes" +# psql -h 192.168.100.50 -p 9999 -U pgpool postgres -c "show pool_nodes" Password for user pgpool: node_id | hostname | port | status | pg_status | lb_weight | role | pg_role | select_cnt | load_balance_node | replication_delay | replication_state | replication_sync_state | last_status_change ---------+----------+------+--------+-----------+-----------+---------+---------+------------+-------------------+-------------------+-------------------+------------------------+--------------------- - 0 | server1 | 5432 | up | up | 0.333333 | primary | primary | 0 | false | 0 | | | 2021-10-19 07:08:14 - 1 | server2 | 5432 | up | up | 0.333333 | standby | standby | 0 | false | 0 | streaming | async | 2021-10-19 07:08:14 - 2 | server3 | 5432 | up | up | 0.333333 | standby | standby | 0 | true | 0 | streaming | async | 2021-10-19 07:08:14 + 0 | server1 | 5432 | up | up | 0.333333 | primary | primary | 0 | false | 0 | | | 2023-11-10 15:30:14 + 1 | server2 | 5432 | up | up | 0.333333 | standby | standby | 0 | false | 0 | streaming | async | 2023-11-10 16:32:33 + 2 | server3 | 5432 | up | up | 0.333333 | standby | standby | 0 | true | 0 | streaming | async | 2023-11-10 16:33:08 (3 rows) - Next, stop primary PostgreSQL server - server1, and verify automatic failover. + Next, stop the primary PostgreSQL server + on server1 and verify that failover is performed + automatically. -[server1]$ pg_ctl -D /var/lib/pgsql/15/data -m immediate stop +[server1]$ pg_ctl -D /var/lib/pgsql/16/data -m immediate stop - After stopping PostgreSQL on server1, - failover occurs and PostgreSQL on - server2 becomes new primary DB. + After stopping PostgreSQL on + server1, failover occurs. + PostgreSQL on + server2 becomes the new primary and + the standby server on server3 + is configured as a standby of the new primary. -# psql -h 192.168.137.150 -p 9999 -U pgpool postgres -c "show pool_nodes" +# psql -h 192.168.100.50 -p 9999 -U pgpool postgres -c "show pool_nodes" Password for user pgpool: node_id | hostname | port | status | pg_status | lb_weight | role | pg_role | select_cnt | load_balance_node | replication_delay | replication_state | replication_sync_state | last_status_change ---------+----------+------+--------+-----------+-----------+---------+---------+------------+-------------------+-------------------+-------------------+------------------------+--------------------- - 0 | server1 | 5432 | down | down | 0.333333 | standby | unknown | 0 | false | 0 | | | 2021-10-19 07:10:01 - 1 | server2 | 5432 | up | up | 0.333333 | primary | primary | 0 | false | 0 | | | 2021-10-19 07:10:01 - 2 | server3 | 5432 | up | up | 0.333333 | standby | standby | 0 | true | 0 | streaming | async | 2021-10-19 07:10:03 + 0 | server1 | 5432 | down | down | 0.333333 | standby | unknown | 0 | false | 0 | | | 2023-11-10 17:05:40 + 1 | server2 | 5432 | up | up | 0.333333 | primary | primary | 0 | false | 0 | | | 2023-11-10 17:05:40 + 2 | server3 | 5432 | up | up | 0.333333 | standby | standby | 0 | true | 0 | streaming | async | 2023-11-10 17:05:51 (3 rows) - server3 is running as standby of new primary server2. + server3 is running as a standby of new primary server2. @@ -1272,10 +1372,10 @@ pid | 7198 usesysid | 16385 usename | repl application_name | server3 -client_addr | 192.168.137.103 +client_addr | 192.168.100.53 client_hostname | client_port | 40916 -backend_start | 2021-10-19 07:10:03.067241+00 +backend_start | 2023-11-10 17:10:03.067241+00 backend_xmin | state | streaming sent_lsn | 0/12000260 @@ -1287,35 +1387,34 @@ flush_lag | replay_lag | sync_priority | 0 sync_state | async -reply_time | 2021-10-19 07:11:53.886477+00 +reply_time | 2023-11-10 17:17:23.886477+00 Online Recovery - Here, we use Pgpool-II online recovery functionality to - restore server1 (old primary server) as a standby. Before - restoring the old primary server, please ensure that - recovery_1st_stage and pgpool_remote_start scripts - exist in database cluster directory of current primary server server2. + Here, we use Pgpool-II online recovery + feature to restore the former primary on server1 + as a standby. -# pcp_recovery_node -h 192.168.137.150 -p 9898 -U pgpool -n 0 +[any server]# pcp_recovery_node -h 192.168.100.50 -p 9898 -U pgpool -n 0 -W Password: pcp_recovery_node -- Command Successful - Then verify that server1 is started as a standby. + Then verify that PostgreSQL on server1 is + running as a standby. -# psql -h 192.168.137.150 -p 9999 -U pgpool postgres -c "show pool_nodes" +[any server]# psql -h 192.168.100.50 -p 9999 -U pgpool postgres -c "show pool_nodes" Password for user pgpool: node_id | hostname | port | status | lb_weight | role | select_cnt | load_balance_node | replication_delay | replication_state | replication_sync_state | last_status_change ---------+----------+------+--------+-----------+-----------+---------+---------+------------+-------------------+-------------------+-------------------+------------------------+--------------------- - 0 | server1 | 5432 | up | up | 0.333333 | standby | standby | 0 | true | 0 | streaming | async | 2021-10-19 07:14:06 - 1 | server2 | 5432 | up | up | 0.333333 | primary | primary | 0 | false | 0 | | | 2021-10-19 07:10:01 - 2 | server3 | 5432 | up | up | 0.333333 | standby | standby | 0 | false | 0 | streaming | async | 2021-10-19 07:10:03 + 0 | server1 | 5432 | up | up | 0.333333 | standby | standby | 0 | true | 0 | streaming | async | 2023-11-10 17:22:03 + 1 | server2 | 5432 | up | up | 0.333333 | primary | primary | 0 | false | 0 | | | 2023-11-10 17:05:40 + 2 | server3 | 5432 | up | up | 0.333333 | standby | standby | 0 | false | 0 | streaming | async | 2023-11-10 17:05:51 (3 rows) diff --git a/src/sample/scripts/escalation.sh.sample b/src/sample/scripts/escalation.sh.sample index ffffa3302..9ca5db8f9 100755 --- a/src/sample/scripts/escalation.sh.sample +++ b/src/sample/scripts/escalation.sh.sample @@ -10,7 +10,7 @@ SSH_OPTIONS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i ~/.s SSH_TIMEOUT=5 PGPOOLS=(server1 server2 server3) -VIP=192.168.137.150 +VIP=192.168.100.50 DEVICE=enp0s8 for pgpool in "${PGPOOLS[@]}"; do -- 2.39.5