From bbc270fbe1e3571e8255484b2a0fbfee3b98e4ec Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii Date: Tue, 17 Apr 2018 15:10:32 +0900 Subject: [PATCH] Complete detach_false_primary feature. In addition to the previous commit: - Add new config variable detach_false_primary - Allow to run test along with streaming replication delay checking - English docs added (Japanese docs needed to be added later) - Regression test (018.detach_primary) is added - Sample configuration files are added - Process reporting is added --- doc/src/sgml/failover.sgml | 57 ++++ src/config/pool_config_variables.c | 10 + src/include/pool.h | 1 + src/include/pool_config.h | 4 +- src/main/pgpool_main.c | 315 ++++++++++++++++-- src/sample/pgpool.conf.sample | 6 + src/sample/pgpool.conf.sample-logical | 6 + src/sample/pgpool.conf.sample-master-slave | 6 + src/sample/pgpool.conf.sample-replication | 6 + src/sample/pgpool.conf.sample-stream | 6 + src/streaming_replication/pool_worker_child.c | 28 +- .../tests/018.detach_primary/test.sh | 57 ++++ src/utils/pool_process_reporting.c | 7 +- 13 files changed, 483 insertions(+), 26 deletions(-) create mode 100755 src/test/regression/tests/018.detach_primary/test.sh diff --git a/doc/src/sgml/failover.sgml b/doc/src/sgml/failover.sgml index b34684eaa..09b5ad5b9 100644 --- a/doc/src/sgml/failover.sgml +++ b/doc/src/sgml/failover.sgml @@ -358,6 +358,63 @@ + + detach_false_primary (boolean) + + detach_false_primary configuration parameter + + + + + If set to on, detach false primary node. The default is + off. This parameter is only valid in streaming replication + mode and for PostgreSQL 9.6 or + after since this feature + uses pg_stat_wal_receiver. + If PostgreSQL 9.5.x or older + version is used, no error is raised, just the feature is + ignored. + + + If there's no primary node, no checking will be performed. + + + If there's no standby node, and there's only one primary + node, no checking will be performed. + + + If there's no standby node, and there's multiple primary + nodes, leave the primary node which has the youngest node + id and detach rest of primary nodes. + + + If there are one or more primaries and one or more standbys, + check the connectivity between primary and standby nodes by + using pg_stat_wal_receiver + if PostgreSQL 9.6 or after. In + this case if a primary node connects to all standby nodes, + the primary is regarded as "true" primary. Other primaries + are regarded as "false" primary and the false primaries will + be detached if detach_false_primary is + true. If no "true" primary is found, nothing will happen. + + + When Pgpool-II starts, the + checking of false primaries are performed only once in + the Pgpool-II main + process. If is greater + than 0, the false primaries checking will be performed at + the same timing of streaming replication delay checking. + + + This parameter is only applicable in the streaming replication mode. + + + This parameter can be changed by reloading the Pgpool-II configurations. + + + + diff --git a/src/config/pool_config_variables.c b/src/config/pool_config_variables.c index 3ab803f15..e5c5b8a0c 100644 --- a/src/config/pool_config_variables.c +++ b/src/config/pool_config_variables.c @@ -368,6 +368,16 @@ static struct config_bool ConfigureNamesBool[] = NULL, NULL,NULL }, + { + {"detach_false_primary", CFGCXT_RELOAD, FAILOVER_CONFIG, + "Automatically detaches false primary node.", + CONFIG_VAR_TYPE_BOOL,false, 0 + }, + &g_pool_config.detach_false_primary, + false, + NULL, NULL,NULL + }, + { {"insert_lock", CFGCXT_RELOAD, REPLICATION_CONFIG, "Automatically locks table with INSERT to keep SERIAL data consistency", diff --git a/src/include/pool.h b/src/include/pool.h index 97f91de37..6276b8e61 100644 --- a/src/include/pool.h +++ b/src/include/pool.h @@ -751,6 +751,7 @@ extern int pool_frontend_exists(void); extern pid_t pool_waitpid(int *status); extern int write_status_file(void); extern void do_health_check_child(int *node_id); +extern POOL_NODE_STATUS *verify_backend_node_status(POOL_CONNECTION_POOL_SLOT **slots); extern POOL_NODE_STATUS *pool_get_node_status(void); #endif /* POOL_H */ diff --git a/src/include/pool_config.h b/src/include/pool_config.h index 3366e6f70..8ff89d434 100644 --- a/src/include/pool_config.h +++ b/src/include/pool_config.h @@ -6,7 +6,7 @@ * pgpool: a language independent connection pool server for PostgreSQL * written by Tatsuo Ishii * - * Copyright (c) 2003-2017 PgPool Global Development Group + * Copyright (c) 2003-2018 PgPool Global Development Group * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby @@ -209,7 +209,7 @@ typedef struct { * pgpool-II 2.2.x or earlier. If set to false, pgpool will report * an error and disconnect the session. */ - + bool detach_false_primary; /* If true, detach false primary */ char *recovery_user; /* PostgreSQL user name for online recovery */ char *recovery_password; /* PostgreSQL user password for online recovery */ char *recovery_1st_stage_command; /* Online recovery command in 1st stage */ diff --git a/src/main/pgpool_main.c b/src/main/pgpool_main.c index c6e5c2e0e..8a97064bd 100644 --- a/src/main/pgpool_main.c +++ b/src/main/pgpool_main.c @@ -139,7 +139,6 @@ static RETSIGTYPE wakeup_handler(int sig); static void initialize_shared_mem_objects(bool clear_memcache_oidmaps); static int trigger_failover_command(int node, const char *command_line, int old_master, int new_master, int old_primary); -static POOL_NODE_STATUS *verify_backend_node_status(POOL_CONNECTION_POOL_SLOT **slots); static int find_primary_node(void); static int find_primary_node_repeatedly(void); static void terminate_all_childrens(); @@ -148,6 +147,8 @@ static char* process_name_from_pid(pid_t pid); static void sync_backend_from_watchdog(void); static void update_backend_quarantine_status(void); static void degenerate_all_quarantine_nodes(void); +static int get_server_version(POOL_CONNECTION_POOL_SLOT **slots, int node_id); +static void get_info_from_conninfo(char *conninfo, char *host, char *port); static struct sockaddr_un un_addr; /* unix domain socket path */ static struct sockaddr_un pcp_un_addr; /* unix domain socket path for PCP */ @@ -453,17 +454,23 @@ int PgpoolMain(bool discard_status, bool clear_memcache_oidmaps) (errmsg("%s successfully started. version %s (%s)", PACKAGE, VERSION, PGPOOLVERSION))); /* Very early stage node checking. It is assumed that find_primary_node got called. */ - for (i=0;idetach_false_primary) + { + n = i; + degenerate_backend_set(&n, 1, REQ_DETAIL_SWITCHOVER|REQ_DETAIL_CONFIRMED); + } + } } } } @@ -2827,16 +2834,18 @@ static int trigger_failover_command(int node, const char *command_line, /* * This function is used by find_primary_node(). Find primary node/standby * node and returns static array of status for each backend node. This - * function must not throw ereport. + * function must not throw ERROR or FATAL. */ static POOL_NODE_STATUS pool_node_status[MAX_NUM_BACKENDS]; -static POOL_NODE_STATUS * +POOL_NODE_STATUS * verify_backend_node_status(POOL_CONNECTION_POOL_SLOT **slots) { POOL_SELECT_RESULT *res; - bool found_primary = false; - int i; + int num_primaries = 0; + int num_standbys = 0; + int i, j; + BackendInfo *backend_info; for (i=0;idata[0] && !strcmp(res->data[0], "t")) { /* Possibly standby */ pool_node_status[i] = POOL_NODE_STATUS_STANDBY; + num_standbys++; } else if (res->data[0] && !strcmp(res->data[0], "f")) { - /* Possibly primary. Let's see if we already found a primary - * (checking split brain) + /* Possibly primary */ + pool_node_status[i] = POOL_NODE_STATUS_PRIMARY; + num_primaries++; + } + free_select_result(res); + } + + /* + * If there's no primary node, there's no point to run additional + * testings. + */ + if (num_primaries == 0) + { + ereport(DEBUG1, + (errmsg("verify_backend_node_status: there's no primary node"))); + return pool_node_status; + } + + /* + * There's no standby node. + */ + if (num_standbys == 0) + { + if (num_primaries == 1) + { + /* + * If there's only one primary node and there's no standby, there's no + * point to run additional testings. */ - if (found_primary) - pool_node_status[i] = POOL_NODE_STATUS_INVALID; - else + ereport(DEBUG1, + (errmsg("verify_backend_node_status: there's no standby node"))); + return pool_node_status; + } + else + { + /* + * There are multiple primaries and there's no standby + * node. There's no way to decide which one is correct. We just + * leave the youngest primary node and detach rests if allowed. + */ + for (i=0;idetach_false_primary) + pool_node_status[i] = POOL_NODE_STATUS_INVALID; + else + pool_node_status[i] = POOL_NODE_STATUS_UNUSED; + } + } + } + } + } + return pool_node_status; + } + + /* + * There are multiple standbys + */ + else + { + bool check_connectivity = false; + int wal_receiver_status = 0; + int wal_receiver_conninfo = 1; + char host[1024]; + char port[1024]; + int primary[MAX_NUM_BACKENDS]; + int true_primary = -1; + + ereport(DEBUG1, + (errmsg("verify_backend_node_status: multiple standbys: %d", num_standbys))); + + /* + * Check connectivity between primary and standby by using + * pg_stat_wal_receiver (only >= 9.6.0) if there's more than or equal + * to 1 primary. + */ + for (i=0;i= 90600) + { + check_connectivity = true; + break; + } + } + if (!check_connectivity) + { + ereport(DEBUG1, + (errmsg("verify_backend_node_status: server verion is lower than 9.6.0. Skipping connectivity checks"))); + return pool_node_status; + } + + ereport(DEBUG1, + (errmsg("verify_backend_node_status: checking connectivity"))); + + for (i=0;inumrows <= 0) + { + ereport(DEBUG1, + (errmsg("verify_backend_node_status: pg_stat_wal_receiver returned no row. standby %d", j))); + free_select_result(res); + continue; + } + if (res->nullflags[wal_receiver_status] == -1) + { + ereport(DEBUG1, + (errmsg("verify_backend_node_status: pg_stat_wal_receiver status for standby %d is NULL", j))); + free_select_result(res); + continue; + } + if (strcmp(res->data[wal_receiver_status], "streaming")) + { + ereport(DEBUG1, + (errmsg("verify_backend_node_status: pg_stat_wal_receiver status is not \"streaming\" for standby %d (%s)", j, res->data[wal_receiver_status]))); + free_select_result(res); + continue; + + } + if (res->nullflags[wal_receiver_conninfo] == -1) + { + ereport(DEBUG1, + (errmsg("verify_backend_node_status: pg_stat_wal_receiver conninfo for standby %d is NULL", j))); + continue; + } + get_info_from_conninfo(res->data[wal_receiver_conninfo], host, port); + ereport(DEBUG1, + (errmsg("verify_backend_node_status: conninfo for standby %d is === %s ===. host:%s port:%s", j, res->data[wal_receiver_conninfo], host, port))); + free_select_result(res); + + /* get primary backend info */ + backend_info = pool_get_node_info(i); + + /* verify host and port */ + if (((!strcmp(backend_info->backend_hostname, "/tmp") && *host == '\0') || + !strcmp(backend_info->backend_hostname, host)) && + (backend_info->backend_port == atoi(port))) + { + /* the standby connects to the primary */ + primary[i]++; + if (primary[i] == num_standbys) + true_primary = i; + } + else + { + /* the standby does not connect to the primary */ + ereport(LOG, + (errmsg("verify_backend_node_status: primary %d does not connect to standby %d", i, j))); + } + } + } + } + } + + /* + * Check if each primary connected standbys. If all standbys connect + * to one of primaries, then the primary is good. Other primaries are + * false. If none of primaries does not own all connected standbys, + * we cannot judge which primary is good. + */ + for (i=0;i= 0) + { + if (primary[i] < num_standbys) + { + ereport(LOG, + (errmsg("verify_backend_node_status: primary %d owns only %d standbys out of %d", i, primary[i], num_standbys))); + + /* + * If the good primary exists and detach_false_primary is + * true, then ask to detach the false primary + */ + if (true_primary >= 0 && pool_config->detach_false_primary) + pool_node_status[i] = POOL_NODE_STATUS_INVALID; + } } } - free_select_result(res); } return pool_node_status; @@ -3776,3 +3990,62 @@ static void sync_backend_from_watchdog(void) } } } + +/* + * Obtain backend server version number and cache it. Note that returned + * version number is in the static memory area. + */ +static int +get_server_version(POOL_CONNECTION_POOL_SLOT **slots, int node_id) +{ + static int server_versions[MAX_NUM_BACKENDS]; + + char *query; + POOL_SELECT_RESULT *res; + + if (server_versions[node_id] == 0) + { + query = "SELECT current_setting('server_version_num')"; + + /* Get backend server version. If the query fails, keep previous info. */ + if (get_query_result(slots, node_id, query, &res) == 0) + { + server_versions[node_id] = atoi(res->data[0]); + ereport(DEBUG1, + (errmsg("get_server_version: backend %d server version: %d", node_id, server_versions[node_id]))); + free_select_result(res); + } + } + return server_versions[node_id]; +} + +/* + * Get info from conninfo string. + */ +static void get_info_from_conninfo(char *conninfo, char *host, char *port) +{ + char *p; + + *host = '\0'; + *port = '\0'; + + p = strstr(conninfo, "host"); + if (p) + { + while (*p && *p != ' ') + *host++ = *p++; + *host = '\0'; + } + + p = strstr(conninfo, "port"); + if (p) + { + /* skip "port=" */ + while (*p && *p++ != '=') + ; + + while (*p && *p != ' ') + *port++ = *p++; + *port = '\0'; + } +} diff --git a/src/sample/pgpool.conf.sample b/src/sample/pgpool.conf.sample index ee91e4c44..032edae12 100644 --- a/src/sample/pgpool.conf.sample +++ b/src/sample/pgpool.conf.sample @@ -439,6 +439,12 @@ fail_over_on_backend_error = on # If set to off, pgpool will report an # error and disconnect the session. +detach_false_primary = off + # Detach false primary if on. Only + # valid in streaming replicaton + # mode and with PostgreSQL 9.6 or + # after. + search_primary_node_timeout = 300 # Timeout in seconds to search for the # primary node when a failover occurs. diff --git a/src/sample/pgpool.conf.sample-logical b/src/sample/pgpool.conf.sample-logical index d10f7bb2d..062fe5d2e 100644 --- a/src/sample/pgpool.conf.sample-logical +++ b/src/sample/pgpool.conf.sample-logical @@ -438,6 +438,12 @@ fail_over_on_backend_error = on # If set to off, pgpool will report an # error and disconnect the session. +detach_false_primary = off + # Detach false primary if on. Only + # valid in streaming replicaton + # mode and with PostgreSQL 9.6 or + # after. + search_primary_node_timeout = 300 # Timeout in seconds to search for the # primary node when a failover occurs. diff --git a/src/sample/pgpool.conf.sample-master-slave b/src/sample/pgpool.conf.sample-master-slave index 98a5447c8..f1757dae1 100644 --- a/src/sample/pgpool.conf.sample-master-slave +++ b/src/sample/pgpool.conf.sample-master-slave @@ -438,6 +438,12 @@ fail_over_on_backend_error = on # If set to off, pgpool will report an # error and disconnect the session. +detach_false_primary = off + # Detach false primary if on. Only + # valid in streaming replicaton + # mode and with PostgreSQL 9.6 or + # after. + search_primary_node_timeout = 300 # Timeout in seconds to search for the # primary node when a failover occurs. diff --git a/src/sample/pgpool.conf.sample-replication b/src/sample/pgpool.conf.sample-replication index 694598eb1..b4c526b07 100644 --- a/src/sample/pgpool.conf.sample-replication +++ b/src/sample/pgpool.conf.sample-replication @@ -436,6 +436,12 @@ fail_over_on_backend_error = on # If set to off, pgpool will report an # error and disconnect the session. +detach_false_primary = off + # Detach false primary if on. Only + # valid in streaming replicaton + # mode and with PostgreSQL 9.6 or + # after. + search_primary_node_timeout = 300 # Timeout in seconds to search for the # primary node when a failover occurs. diff --git a/src/sample/pgpool.conf.sample-stream b/src/sample/pgpool.conf.sample-stream index 01e1755fb..ed23d560c 100644 --- a/src/sample/pgpool.conf.sample-stream +++ b/src/sample/pgpool.conf.sample-stream @@ -438,6 +438,12 @@ fail_over_on_backend_error = on # If set to off, pgpool will report an # error and disconnect the session. +detach_false_primary = off + # Detach false primary if on. Only + # valid in streaming replicaton + # mode and with PostgreSQL 9.6 or + # after. + search_primary_node_timeout = 300 # Timeout in seconds to search for the # primary node when a failover occurs. diff --git a/src/streaming_replication/pool_worker_child.c b/src/streaming_replication/pool_worker_child.c index 212bb00c1..4189fee00 100644 --- a/src/streaming_replication/pool_worker_child.c +++ b/src/streaming_replication/pool_worker_child.c @@ -165,9 +165,31 @@ void do_worker_child(void) establish_persistent_connection(); PG_TRY(); { + POOL_NODE_STATUS *node_status; + int i; /* Do replication time lag checking */ check_replication_time_lag(); + + /* Check node status */ + node_status = verify_backend_node_status(slots); + for (i=0;idetach_false_primary) + { + n = i; + degenerate_backend_set(&n, 1, REQ_DETAIL_SWITCHOVER|REQ_DETAIL_CONFIRMED); + } + } + } } PG_CATCH(); { @@ -321,7 +343,7 @@ static void check_replication_time_lag(void) query = "SELECT pg_last_xlog_replay_location()"; } - if (get_query_result(slots, i, query, &res) == 0) + if (get_query_result(slots, i, query, &res) == 0 && res->nullflags[0] != -1) { lsn[i] = text_to_lsn(res->data[0]); free_select_result(res); @@ -492,6 +514,7 @@ int get_query_result(POOL_CONNECTION_POOL_SLOT **slots, int backend_id, char *qu return sts; } +/* if ((*res)->data[0] == NULL) { free_select_result(*res); @@ -501,6 +524,7 @@ int get_query_result(POOL_CONNECTION_POOL_SLOT **slots, int backend_id, char *qu return sts; } + if ((*res)->nullflags[0] == -1) { free_select_result(*res); @@ -509,7 +533,7 @@ int get_query_result(POOL_CONNECTION_POOL_SLOT **slots, int backend_id, char *qu errdetail("node id (%d)", backend_id))); return sts; } - +*/ sts = 0; return sts; } diff --git a/src/test/regression/tests/018.detach_primary/test.sh b/src/test/regression/tests/018.detach_primary/test.sh new file mode 100755 index 000000000..8d54d09e8 --- /dev/null +++ b/src/test/regression/tests/018.detach_primary/test.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash +#------------------------------------------------------------------- +# test script for testing the feature of detach_false_primary. +# +source $TESTLIBS +TESTDIR=testdir +PSQL=$PGBIN/psql +PG_CTL=$PGBIN/pg_ctl +export PGDATABASE=test + +rm -fr $TESTDIR +mkdir $TESTDIR +cd $TESTDIR + +version=`$PSQL --version|awk '{print $3}'` +result=`echo "$version >= 9.6"|bc` +if [ $result = 0 ];then + echo "PostgreSQL version $version is 9.5 or before. Skipping test." + exit 0 +fi + +# create test environment +echo -n "creating test environment..." +$PGPOOL_SETUP -m s -n 3 -s || exit 1 +echo "done." + +source ./bashrc.ports + +echo "detach_false_primary=on" >> etc/pgpool.conf +echo "sr_check_period = 1" >> etc/pgpool.conf +./startall +export PGPORT=$PGPOOL_PORT +wait_for_pgpool_startup + +# promote #3 node to create false primary +$PG_CTL -D data2 promote + +sleep 10 +wait_for_pgpool_startup +$PSQL -c "show pool_nodes" postgres > show_pool_nodes +primary_node=`grep primary show_pool_nodes|awk '{print $1}'` +if [ $primary_node != 0 ];then + echo "primary node is not 0" + ./shutdownall + exit 1 +fi + +false_primary_node=`grep down show_pool_nodes|awk '{print $1}'` +if [ $false_primary_node != 2 ];then + echo "false primary node is not 2" + ./shutdownall + exit 1 +fi + +./shutdownall + +exit 0 diff --git a/src/utils/pool_process_reporting.c b/src/utils/pool_process_reporting.c index cb3d18f36..8f0694761 100644 --- a/src/utils/pool_process_reporting.c +++ b/src/utils/pool_process_reporting.c @@ -5,7 +5,7 @@ * pgpool: a language independent connection pool server for PostgreSQL * written by Tatsuo Ishii * - * Copyright (c) 2003-2016 PgPool Global Development Group + * Copyright (c) 2003-2018 PgPool Global Development Group * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby @@ -569,6 +569,11 @@ POOL_REPORT_CONFIG* get_config(int *nrows) StrNCpy(status[i].desc, "fail over on backend error", POOLCONFIG_MAXDESCLEN); i++; + StrNCpy(status[i].name, "detach_false_primary", POOLCONFIG_MAXNAMELEN); + snprintf(status[i].value, POOLCONFIG_MAXVALLEN, "%d", pool_config->detach_false_primary); + StrNCpy(status[i].desc, "detach false primary", POOLCONFIG_MAXDESCLEN); + i++; + /* ONLINE RECOVERY */ StrNCpy(status[i].name, "recovery_user", POOLCONFIG_MAXNAMELEN); -- 2.39.5