From e8643465be0d53356fee7b729b91ee93d56562f7 Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii Date: Sun, 5 Aug 2018 10:54:18 +0900 Subject: [PATCH] Fix segfault when node 0 is in down status. MASTER_CONNECTION refers to the connection to "master" node. "Master" means the first live backend appearing in pgpool.conf. The master node is determined at the time of fail over. Unfortunately with both health check and fail_over_on_backend_error are disabled, there's no chance of failover, which means the master node id is remained the default value 0. So the MASTER_CONNECTION refers to the node 0, and the connection is NULL. Fix is as follows. If attempt to connection to backend fails, check the master node id in the shared memory. If the master node id is the failed node, then look for new master node using get_next_master_node (this was a static function, but now it's made to public) and set the node id to the master node id in the shared memory area. Problem reported by Muhammad Usama in [pgpool-hackers: 2905]. --- src/include/pool.h | 1 + src/main/pgpool_main.c | 3 +-- src/protocol/pool_connection_pool.c | 11 +++++++++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/include/pool.h b/src/include/pool.h index 0e1505818..a8922f34b 100644 --- a/src/include/pool.h +++ b/src/include/pool.h @@ -754,5 +754,6 @@ extern void do_health_check_child(int *node_id); extern POOL_NODE_STATUS *verify_backend_node_status(POOL_CONNECTION_POOL_SLOT **slots); extern POOL_NODE_STATUS *pool_get_node_status(void); extern void pool_set_backend_status_changed_time(int backend_id); +extern int get_next_master_node(void); #endif /* POOL_H */ diff --git a/src/main/pgpool_main.c b/src/main/pgpool_main.c index e659aaa09..5b73dbf8d 100644 --- a/src/main/pgpool_main.c +++ b/src/main/pgpool_main.c @@ -126,7 +126,6 @@ static void wakeup_children(void); static void reload_config(void); static int pool_pause(struct timeval *timeout); static void kill_all_children(int sig); -static int get_next_master_node(void); static pid_t fork_follow_child(int old_master, int new_primary, int old_primary); static int read_status_file(bool discard_status); static RETSIGTYPE exit_handler(int sig); @@ -1415,7 +1414,7 @@ static RETSIGTYPE exit_handler(int sig) * Calculate next valid master node id. * If no valid node found, returns -1. */ -static int get_next_master_node(void) +int get_next_master_node(void) { int i; diff --git a/src/protocol/pool_connection_pool.c b/src/protocol/pool_connection_pool.c index c02bec244..bb92b8346 100644 --- a/src/protocol/pool_connection_pool.c +++ b/src/protocol/pool_connection_pool.c @@ -890,6 +890,17 @@ static POOL_CONNECTION_POOL *new_connection(POOL_CONNECTION_POOL *p) /* set down status to local status area */ *(my_backend_status[i]) = CON_DOWN; + /* if master_node_id is not updated, the update it */ + if (Req_info->master_node_id == i) + { + int old_master = Req_info->master_node_id; + Req_info->master_node_id = get_next_master_node(); + + ereport(LOG, + (errmsg("master node %d is down. Update master node to %d", + old_master, Req_info->master_node_id))); + } + /* make sure that we need to restart the process after * finishing this session */ -- 2.39.5