Fix segfault when node 0 is in down status.
authorTatsuo Ishii <ishii@postgresql.org>
Sun, 5 Aug 2018 01:54:18 +0000 (10:54 +0900)
committerTatsuo Ishii <ishii@postgresql.org>
Sun, 5 Aug 2018 02:00:00 +0000 (11:00 +0900)
MASTER_CONNECTION refers to the connection to "master"
node. "Master" means the first live backend appearing in
pgpool.conf. The master node is determined at the time of fail over.
Unfortunately with both health check and fail_over_on_backend_error
are disabled, there's no chance of failover, which means the master
node id is remained the default value 0. So the MASTER_CONNECTION
refers to the node 0, and the connection is NULL.

Fix is as follows.

If attempt to connection to backend fails, check the master node id in
the shared memory.  If the master node id is the failed node, then
look for new master node using get_next_master_node (this was a static
function, but now it's made to public) and set the node id to the
master node id in the shared memory area.

Problem reported by Muhammad Usama in [pgpool-hackers: 2905].

src/include/pool.h
src/main/pgpool_main.c
src/protocol/pool_connection_pool.c

index 0e15058183a4e46c55f9465f32e7f52a691da2c5..a8922f34b43a1c147f1927fde50353c26e0e1d19 100644 (file)
@@ -754,5 +754,6 @@ extern void do_health_check_child(int *node_id);
 extern POOL_NODE_STATUS *verify_backend_node_status(POOL_CONNECTION_POOL_SLOT **slots);
 extern POOL_NODE_STATUS *pool_get_node_status(void);
 extern void pool_set_backend_status_changed_time(int backend_id);
+extern int get_next_master_node(void);
 
 #endif /* POOL_H */
index e659aaa098b2794f82ccb64f5c51e047734fcf52..5b73dbf8d393d413f4f900e332254d764ccabdda 100644 (file)
@@ -126,7 +126,6 @@ static void wakeup_children(void);
 static void reload_config(void);
 static int pool_pause(struct timeval *timeout);
 static void kill_all_children(int sig);
-static int get_next_master_node(void);
 static pid_t fork_follow_child(int old_master, int new_primary, int old_primary);
 static int read_status_file(bool discard_status);
 static RETSIGTYPE exit_handler(int sig);
@@ -1415,7 +1414,7 @@ static RETSIGTYPE exit_handler(int sig)
  * Calculate next valid master node id.
  * If no valid node found, returns -1.
  */
-static int get_next_master_node(void)
+int get_next_master_node(void)
 {
        int i;
 
index c02bec244794ccfcfe15ff16ae0a65b5cbc41328..bb92b83461feeb29948f55d9e869e9e7524334ca 100644 (file)
@@ -890,6 +890,17 @@ static POOL_CONNECTION_POOL *new_connection(POOL_CONNECTION_POOL *p)
                                        /* set down status to local status area */
                                        *(my_backend_status[i]) = CON_DOWN;
 
+                                       /* if master_node_id is not updated, the update it */
+                                       if (Req_info->master_node_id == i)
+                                       {
+                                               int old_master = Req_info->master_node_id;
+                                               Req_info->master_node_id = get_next_master_node();
+
+                                               ereport(LOG,
+                                                               (errmsg("master node %d is down. Update master node to %d",
+                                                                               old_master, Req_info->master_node_id)));
+                                       }
+
                                        /* make sure that we need to restart the process after
                                         * finishing this session
                                         */