Fix for a possible issue in watchdog failover handling.
authorMuhammad Usama <m.usama@gmail.com>
Thu, 2 Nov 2017 13:48:43 +0000 (18:48 +0500)
committerMuhammad Usama <m.usama@gmail.com>
Thu, 2 Nov 2017 13:48:43 +0000 (18:48 +0500)
Watchdog process should make sure that it's Master/coordinator status is
synchronised across the watchdog cluster before accepting the failover requests.
This will make sure that no other node is processing the same failover request.

src/watchdog/watchdog.c

index 8abb9e362b438ec1705245e455604c543e6bb334..e3a3cfc8d5e3cacb589161fa14ae8c5b50b180f5 100644 (file)
@@ -447,6 +447,7 @@ static void cluster_service_message_processor(WatchdogNode* wdNode, WDPacketData
 static int get_cluster_node_count(void);
 static void clear_command_node_result(WDCommandNodeResult* nodeResult);
 
+static inline bool is_local_node_true_master(void);
 static inline WD_STATES get_local_node_state(void);
 static int set_state(WD_STATES newState);
 
@@ -2467,7 +2468,7 @@ static IPC_CMD_PREOCESS_RES process_failover_command_on_coordinator(WDCommandDat
 
 static IPC_CMD_PREOCESS_RES process_IPC_failover_command(WDCommandData* ipcCommand)
 {
-       if (get_local_node_state() == WD_COORDINATOR)
+       if (is_local_node_true_master())
        {
                ereport(LOG,
                                (errmsg("watchdog received the failover command from local pgpool-II on IPC interface")));
@@ -2587,7 +2588,7 @@ static IPC_CMD_PREOCESS_RES process_IPC_data_request_from_master(WDCommandData *
                        return IPC_CMD_PROCESSING;
                }
        }
-       else if (get_local_node_state() == WD_COORDINATOR)
+       else if (is_local_node_true_master())
        {
                /* This node is itself a master node, So send the empty result with OK tag */
                return IPC_CMD_OK;
@@ -4477,6 +4478,11 @@ static inline WD_STATES get_local_node_state(void)
        return g_cluster.localNode->state;
 }
 
+static inline bool is_local_node_true_master(void)
+{
+       return (get_local_node_state() == WD_COORDINATOR && WD_MASTER_NODE == g_cluster.localNode);
+}
+
 /*
  * returns true if no message is swollowed by the
  * processor and no further action is required