From 4aa657e055250da9db9a4c5cde7260e8f24707cb Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii Date: Tue, 18 Jul 2023 10:10:27 +0900 Subject: [PATCH] Mitigate session disconnection issue in failover/failback/backend error. Previously Pgpool-II disconnected client sessions in various cases. This commit tries to avoid some of cases, especially when a backend goes down and the backend is not either primary (or main node) nor load balance node. Suppose we have 3 streaming replication PostgreSQL cluster and the client uses primary (node 0) and standby 1 (node 1), but does not use standby 2 (node 2) because the node 2 is not load balance node. In this case ideally shutting down node 2 should not disconnect the session. However the session is disconnected if the session processing a query while failover. The reason why session disconnection in failover is necessary is, there are bunch of places in the source code something like this: for (i = 0; i < NUM_BACKENDS; i++) { if (!VALID_BACKEND(i)) continue; : : VALID_BACKEND returns true if the backend is not in down status. If this code is executed while failover, the code may access the backend socket which is not available any more and will cause troubles including segfault. So inside VALID_BACKEND, we check whether failover is performed, and if so, the pgpool child process exits and the session disconnects. To aovid it, change VALID_BACKEND so that it waits for completion of failover. For this purpose new function wait_for_failover_to_finish() is added. It waits for the completion of failover up to MAX_FAILOVER_WAIT seconds (for it's fixed to 30). The change above will prevent unnecessary session disconnection for existing sessions. This commit also tries to prevent unnecessary session disconnection while accepting new sessions. There are multiple places where it could happen and this commit fixes them: - accepting new connection from client. In wait_for_new_connections, call wait_for_failover_to_finish to wait for completion of failover. - creating new connection to backend. After accepting connection request from client and before creating connection to backend, call wait_for_failover_to_finish to wait for completion of failover. - fixing broken socket. pool_get_cp checks whether exiting backend connection is broken. If it's broken, sleep 1 second to expect failover happens then calls wait_for_failover_to_finish(). - processing an application name. If an application name is included in a startup message, pgpool sends query like "SET application_name TO foo" to all backend nodes including node 2, which could cause a write error. To prevent the error, I modified connect_using_existing_connection, which is sending the SET command using do_command, so that do_command does not raise an ERROR by wrapping it in TRY/CATCH block. Note that even with all fixes above, I was not able to fix some cases where pool_write raises error. pool_write is used to write to backend socket and there are too many places to fix all of them. For now we need to run "pcp_detach_node 2" before shutdown it. pcp_detach_node will tell all pgpool child process that node 2 is going down. Even if a child process does not notice it and writes to backend 2 socket, there will be no error because node 2 is still alive. Finally this commit adds new regression test case 037.failover_session. For the test pgbench is used. There are 2 cases for continuous session (without -C option) and repeating connection/disconnection (with -C option) each. So there are 4 causes in the test: "=== test1: backend_weight2 = 0 and pgbench without -C option" "=== test2: backend_weight2 = 0 and pgbench with -C option" "=== test3: load_balance_mode = off and pgbench without -C option" "=== test4: load_balance_mode = off and pgbench with -C option" test2 and test4 requires pcp_detach_node before shutting down node 2. Discussion: https://www.pgpool.net/pipermail/pgpool-hackers/2023-July/004352.html --- src/context/pool_query_context.c | 48 ++++++- src/include/context/pool_query_context.h | 4 +- src/protocol/child.c | 39 +++++- src/protocol/pool_connection_pool.c | 10 +- src/protocol/pool_process_query.c | 2 +- .../tests/037.failover_session/test.sh | 122 ++++++++++++++++++ 6 files changed, 213 insertions(+), 12 deletions(-) create mode 100755 src/test/regression/tests/037.failover_session/test.sh diff --git a/src/context/pool_query_context.c b/src/context/pool_query_context.c index 8afe9b39e..8e89a53be 100644 --- a/src/context/pool_query_context.c +++ b/src/context/pool_query_context.c @@ -37,6 +37,7 @@ #include #include #include +#include /* * Where to send query @@ -326,12 +327,15 @@ pool_is_node_to_be_sent_in_current_query(int node_id) int pool_virtual_main_db_node_id(void) { + volatile POOL_REQUEST_INFO *my_req; POOL_SESSION_CONTEXT *sc; /* - * Check whether failover is in progress. If so, just abort this session. + * Check whether failover is in progress and we are child process. + * If so, we will wait for failover to finish. */ - if (Req_info->switching) + my_req = Req_info; + if (processType == PT_CHILD && my_req->switching) { #ifdef NOT_USED POOL_SETMASK(&BlockSig); @@ -341,7 +345,15 @@ pool_virtual_main_db_node_id(void) errhint("In a moment you should be able to reconnect to the database"))); POOL_SETMASK(&UnBlockSig); #endif - child_exit(POOL_EXIT_AND_RESTART); + /* + * Wait for failover to finish + */ + if (wait_for_failover_to_finish() == -2) + /* + * Waiting for failover/failback to finish was timed out. + * Time to exit this process (and session disconnection). + */ + child_exit(POOL_EXIT_AND_RESTART); } sc = pool_get_session_context(true); @@ -2269,3 +2281,33 @@ where_to_send_native_replication(POOL_QUERY_CONTEXT * query_context, char *query } } } + +/* + * Wait for failover/failback to finish. + * Return values: + * 0: no failover/failback occurred. + * -1: failover/failback occurred and finished within certain period. + * -2: failover/failback occurred and timed out. + */ +int +wait_for_failover_to_finish(void) +{ +#define MAX_FAILOVER_WAIT 30 /* waiting for failover finish timeout in seconds */ + + volatile POOL_REQUEST_INFO *my_req; + int ret = 0; + int i; + + /* + * Wait for failover to finish + */ + for (i = 0;i < MAX_FAILOVER_WAIT; i++) + { + my_req = Req_info; + if (my_req->switching == 0) + return ret; + ret = -1; /* failover/failback finished */ + sleep(1); + } + return -2; /* timed out */ +} diff --git a/src/include/context/pool_query_context.h b/src/include/context/pool_query_context.h index b732c90fd..d35c04986 100644 --- a/src/include/context/pool_query_context.h +++ b/src/include/context/pool_query_context.h @@ -6,7 +6,7 @@ * pgpool: a language independent connection pool server for PostgreSQL * written by Tatsuo Ishii * - * Copyright (c) 2003-2017 PgPool Global Development Group + * Copyright (c) 2003-2023 PgPool Global Development Group * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby @@ -127,4 +127,6 @@ extern void pool_unset_cache_exceeded(void); extern bool pool_is_transaction_read_only(Node *node); extern void pool_force_query_node_to_backend(POOL_QUERY_CONTEXT * query_context, int backend_id); extern void check_object_relationship_list(char *name, bool is_func_name); +extern int wait_for_failover_to_finish(void); + #endif /* POOL_QUERY_CONTEXT_H */ diff --git a/src/protocol/child.c b/src/protocol/child.c index d64ff2998..5b4895be2 100644 --- a/src/protocol/child.c +++ b/src/protocol/child.c @@ -401,6 +401,14 @@ do_child(int *fds) backend_timer_expired = 0; } + /* + * Check whether failover/failback is ongoing and wait for it to + * finish. If it actually happened, update the private backend status + * because it is possible that a backend maybe went down. + */ + if (wait_for_failover_to_finish() < 0) + pool_initialize_private_backend_status(); + backend = get_backend_connection(child_frontend); if (!backend) { @@ -835,17 +843,31 @@ connect_using_existing_connection(POOL_CONNECTION * frontend, for (i = 0; i < NUM_BACKENDS; i++) { if (VALID_BACKEND(i)) - if (do_command(frontend, CONNECTION(backend, i), + { + /* + * We want to catch and ignore errors in do_command if a + * backend is just going down right now. Otherwise + * do_command raises an error and disconnects the + * connection to frontend. We can safely ignore error from + * "SET application_name" command if the backend goes + * down. + */ + PG_TRY(); + { + do_command(frontend, CONNECTION(backend, i), command_buf, MAJOR(backend), MAIN_CONNECTION(backend)->pid, - MAIN_CONNECTION(backend)->key, 0) != POOL_CONTINUE) + MAIN_CONNECTION(backend)->key, 0); + } + PG_CATCH(); { - ereport(ERROR, - (errmsg("unable to process command for backend connection"), - errdetail("do_command returned DEADLOCK status"))); + /* ignore the error message */ + MemoryContextSwitchTo(oldContext); + FlushErrorState(); } + PG_END_TRY(); + } } - pool_add_param(&MAIN(backend)->params, "application_name", sp->application_name); set_application_name_with_string(sp->application_name); } @@ -1663,6 +1685,11 @@ retry_accept: pause(); } + /* wait for failover/failback to finish */ + if (wait_for_failover_to_finish() < 0) + /* failover/failback occurred. Update private backend status */ + pool_initialize_private_backend_status(); + afd = accept(fd, (struct sockaddr *) &saddr->addr, &saddr->salen); save_errno = errno; diff --git a/src/protocol/pool_connection_pool.c b/src/protocol/pool_connection_pool.c index 7f3c44ce5..1fd683721 100644 --- a/src/protocol/pool_connection_pool.c +++ b/src/protocol/pool_connection_pool.c @@ -5,7 +5,7 @@ * pgpool: a language independent connection pool server for PostgreSQL * written by Tatsuo Ishii * - * Copyright (c) 2003-2020 PgPool Global Development Group + * Copyright (c) 2003-2023 PgPool Global Development Group * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby @@ -43,6 +43,7 @@ #include #include "pool.h" +#include "context/pool_query_context.h" #include "utils/pool_stream.h" #include "utils/palloc.h" #include "pool_config.h" @@ -158,6 +159,13 @@ pool_get_cp(char *user, char *database, int protoMajor, int check_socket) ereport(LOG, (errmsg("connection closed."), errdetail("retry to create new connection pool"))); + /* + * It is possible that one of backend just broke. sleep 1 + * second to wait for failover occurres, then wait for the + * failover finishes. + */ + sleep(1); + wait_for_failover_to_finish(); for (j = 0; j < NUM_BACKENDS; j++) { diff --git a/src/protocol/pool_process_query.c b/src/protocol/pool_process_query.c index ee2fda8f7..872a5d855 100644 --- a/src/protocol/pool_process_query.c +++ b/src/protocol/pool_process_query.c @@ -1590,7 +1590,7 @@ do_command(POOL_CONNECTION * frontend, POOL_CONNECTION * backend, { ereport(ERROR, (errmsg("do command failed"), - errdetail("backend error: \"%s\"", string))); + errdetail("backend error: \"%s\" \"%s\"", string, query))); } } } diff --git a/src/test/regression/tests/037.failover_session/test.sh b/src/test/regression/tests/037.failover_session/test.sh new file mode 100755 index 000000000..196d6b323 --- /dev/null +++ b/src/test/regression/tests/037.failover_session/test.sh @@ -0,0 +1,122 @@ +#!/usr/bin/env bash +#------------------------------------------------------------------- +# Test script for session disconnection with failover. +# This test is for streaming replication mode only. +# +source $TESTLIBS +TESTDIR=testdir +PSQL=$PGBIN/psql +PG_CTL=$PGBIN/pg_ctl +PGBENCH=$PGBENCH_PATH +export PGDATABASE=test + +rm -fr $TESTDIR +mkdir $TESTDIR +cd $TESTDIR + +# create streaming replication, 3-node test environment. +echo -n "creating test environment..." +$PGPOOL_SETUP -m s -n 3 -s || exit 1 +echo "done." + +source ./bashrc.ports + +# PCP_PORT is defined in bashrc.ports +PCP_DETACH_NODE="$PGPOOL_INSTALL_DIR/bin/pcp_detach_node -w -h localhost -p $PCP_PORT 2" + +# customize pgpool.conf. disable load balance to node 2. +cat >> etc/pgpool.conf <> etc/pgpool.conf +echo "load_balance_mode = off" >> etc/pgpool.conf + +./startall +wait_for_pgpool_startup + +($PGBENCH -n -S -c 10 -T 5)& +sleep 1 +$PG_CTL -D data2 stop +wait $! +if [ $? != 0 ];then + echo "pgbench exited with error. test3 failed." + ./shutdownall + exit 1 +fi +./shutdownall + +echo "=== test4: load_balance_mode = off and pgbench with -C option" +# Same test as test3. The only the difference is load_balance_mode is +# off instead of backend_weitht2 = 0. To make sure that both have same +# effect against failover. + +./startall +wait_for_pgpool_startup + +($PGBENCH -n -S -C -c 10 -T 5)& +sleep 1 +echo $PCP_DETACH_NODE +$PCP_DETACH_NODE +sleep 3 +$PG_CTL -D data2 stop +wait $! +if [ $? != 0 ];then + echo "pgbench exited with error. test4 failed." + ./shutdownall + exit 1 +fi + +./shutdownall + +exit 0 -- 2.39.5