kill(pcp_pid, SIGUSR2);
return;
}
+
Req_info->switching = true;
switching = 1;
for(;;)
int node_id_set[MAX_NUM_BACKENDS];
int node_count;
unsigned char request_details;
- WDFailoverCMDResults failoverLockRes;
+ WDFailoverCMDResults wdInterlockingRes;
pool_semaphore_lock(REQUEST_INFO_SEM);
continue;
}
+ /* start watchdog interlocking */
+ wdInterlockingRes = wd_start_failover_interlocking();
+
/*
* if not in replication mode/master slave mode, we treat this a restart request.
* otherwise we need to check if we have already failovered.
BACKEND_INFO(node_id).backend_status == CON_DOWN) && VALID_BACKEND(node_id)) ||
(reqkind == NODE_DOWN_REQUEST && !VALID_BACKEND(node_id)))
{
-
if (node_id < 0 || node_id >= MAX_NUM_BACKENDS)
ereport(LOG,
(errmsg("invalid failback request, node id: %d is invalid. node id must be between [0 and %d]",node_id,MAX_NUM_BACKENDS)));
ereport(LOG,
(errmsg("invalid failback request, status: [%d] of node id : %d is invalid for failback",BACKEND_INFO(node_id).backend_status,node_id)));
+ if (wdInterlockingRes == FAILOVER_RES_I_AM_LOCK_HOLDER)
+ wd_end_failover_interlocking();
+
continue;
}
(void)write_status_file();
/* Aquire failback start command lock */
- failoverLockRes = wd_failover_command_start(NODE_FAILBACK_CMD);
-
- if (failoverLockRes == FAILOVER_RES_I_AM_LOCK_HOLDER)
+ if (wdInterlockingRes == FAILOVER_RES_I_AM_LOCK_HOLDER)
{
trigger_failover_command(node_id, pool_config->failback_command,
- MASTER_NODE_ID, get_next_master_node(), PRIMARY_NODE_ID);
-
- wd_release_failover_command_lock(NODE_FAILBACK_CMD);
-
+ MASTER_NODE_ID, get_next_master_node(), PRIMARY_NODE_ID);
+ wd_failover_lock_release(FAILBACK_LOCK);
}
else
{
/*
* Okay we are not allowed to execute the failover command
- * so we need to wait untle the one who is executing the command
+ * so we need to wait till the one who is executing the command
* finish with it.
*/
- wd_wati_until_lock_or_timeout(NODE_FAILBACK_CMD);
+ wd_wait_until_command_complete_or_timeout(FAILBACK_LOCK);
}
}
else if (reqkind == PROMOTE_NODE_REQUEST)
{
ereport(LOG,
(errmsg("failover: no backends are promoted")));
+ if (wdInterlockingRes == FAILOVER_RES_I_AM_LOCK_HOLDER)
+ wd_end_failover_interlocking();
continue;
}
-
- failoverLockRes = wd_failover_command_start(NODE_PROMOTE_CMD);
}
else /* NODE_DOWN_REQUEST */
{
ereport(LOG,
(errmsg("failover: no backends are degenerated")));
+ if (wdInterlockingRes == FAILOVER_RES_I_AM_LOCK_HOLDER)
+ wd_end_failover_interlocking();
+
continue;
}
}
need_to_restart_children = true;
partial_restart = false;
}
-
- failoverLockRes = wd_failover_command_start(NODE_FAILED_CMD);
-
- if (failoverLockRes == FAILOVER_RES_I_AM_LOCK_HOLDER)
+ if (wdInterlockingRes == FAILOVER_RES_I_AM_LOCK_HOLDER)
{
/* Exec failover_command if needed */
for (i = 0; i < pool_config->backend_desc->num_backends; i++)
trigger_failover_command(i, pool_config->failover_command,
MASTER_NODE_ID, new_master, PRIMARY_NODE_ID);
}
+ wd_failover_lock_release(FAILOVER_LOCK);
}
else
- wd_wati_until_lock_or_timeout(NODE_FAILED_CMD);
-
+ {
+ wd_wait_until_command_complete_or_timeout(FAILOVER_LOCK);
+ }
/* no need to wait since it will be done in reap_handler */
#ifdef NOT_USED
/*
* follow master command also uses the same locks used by trigring command
*/
- if (failoverLockRes == FAILOVER_RES_I_AM_LOCK_HOLDER)
+ if (wdInterlockingRes == FAILOVER_RES_I_AM_LOCK_HOLDER)
{
if ((follow_cnt > 0) && (*pool_config->follow_master_command != '\0'))
{
follow_pid = fork_follow_child(Req_info->master_node_id, new_primary,
Req_info->primary_node_id);
}
- if (reqkind == PROMOTE_NODE_REQUEST)
- wd_release_failover_command_lock(NODE_PROMOTE_CMD);
- else if (reqkind == NODE_DOWN_REQUEST)
- wd_release_failover_command_lock(NODE_FAILED_CMD);
+ wd_failover_lock_release(FOLLOW_MASTER_LOCK);
}
else
{
- if (reqkind == PROMOTE_NODE_REQUEST)
- wd_wati_until_lock_or_timeout(NODE_PROMOTE_CMD);
- else if (reqkind == NODE_DOWN_REQUEST)
- wd_wati_until_lock_or_timeout(NODE_FAILED_CMD);
- }
+ wd_wait_until_command_complete_or_timeout(FOLLOW_MASTER_LOCK);
+ }
/* Save primary node id */
Req_info->primary_node_id = new_primary;
*/
kill(worker_pid, SIGUSR1);
+ if (wdInterlockingRes == FAILOVER_RES_I_AM_LOCK_HOLDER)
+ wd_end_failover_interlocking();
+
if (reqkind == NODE_UP_REQUEST)
{
- wd_failover_command_end(NODE_FAILBACK_CMD);
ereport(LOG,
(errmsg("failback done. reconnect host %s(%d)",
BACKEND_INFO(node_id).backend_hostname,
}
else if (reqkind == PROMOTE_NODE_REQUEST)
{
- wd_failover_command_end(NODE_PROMOTE_CMD);
ereport(LOG,
(errmsg("promotion done. promoted host %s(%d)",
BACKEND_INFO(node_id).backend_hostname,
}
else
{
- /* Release the locks and interlocking */
- wd_failover_command_end(NODE_FAILED_CMD);
-
/* Temporary black magic. Without this regression 055 does not finish */
fprintf(stderr, "failover done. shutdown host %s(%d)",
BACKEND_INFO(node_id).backend_hostname,
{WD_NO_MESSAGE,""}
};
-char* wd_failover_cmd_type_name[] =
+char* wd_failover_lock_name[] =
{
"FAILOVER",
"FAILBACK",
- "PROMOTE"
+ "FOLLOW MASTER"
};
char *wd_event_name[] =
WDFunctionCommandData* wd_func_command;
}WDCommandTimerData;
+
typedef struct InterlockingNode
{
- WatchdogNode* lockHolderNode;
- bool locked;
+ WatchdogNode* lockHolderNode;
+ bool locks[MAX_FAILOVER_LOCKS];
+ struct timeval lock_time;
}InterlockingNode;
WatchdogNode* remoteNodes;
WatchdogNode* masterNode;
WatchdogNode* lockHolderNode;
- InterlockingNode interlockingNodes[MAX_FAILOVER_CMDS];
+ InterlockingNode interlockingNode;
int remoteNodeCount;
int aliveNodeCount;
int quorum_status;
bool escalated;
bool clusterInitialized;
bool ipc_auth_needed;
-
List *unidentified_socks;
List *notify_clients;
List *ipc_command_socks;
static bool process_pgpool_replicate_command(WatchdogNode* wdNode, WDPacketData* pkt);
static void process_failover_command_sync_requests(WatchdogNode* wdNode, WDPacketData* pkt, WDIPCCommandData* ipcCommand);
-static WDFailoverCMDResults node_is_asking_for_failover_cmd_end(WatchdogNode* wdNode, WDPacketData* pkt, int failoverCmdType, bool resign);
-static WDFailoverCMDResults node_is_asking_for_failover_cmd_start(WatchdogNode* wdNode, WDPacketData* pkt, int failoverCmdType, bool check);
+static WDFailoverCMDResults node_is_asking_for_failover_end(WatchdogNode* wdNode, WDPacketData* pkt);
+static WDFailoverCMDResults node_is_asking_for_failover_start(WatchdogNode* wdNode, WDPacketData* pkt);
+static WDFailoverCMDResults node_is_asking_for_failover_lock_status(WatchdogNode* wdNode, WDPacketData* pkt, WDFailoverLock failoverLock);
+static WDFailoverCMDResults node_is_asking_for_failover_lock_release(WatchdogNode* wdNode, WDPacketData* pkt, WDFailoverLock failoverLock);
static void wd_system_will_go_down(int code, Datum arg);
static bool verify_pool_configurations(POOL_CONFIG* config);
return false;
}
+/*
+ * process_failover_command_sync_requests()
+ * the function is the main processor of all interlocking related requests.
+ * it parses the request json and executes the requested intelocking command
+ */
static void process_failover_command_sync_requests(WatchdogNode* wdNode, WDPacketData* pkt, WDIPCCommandData* ipcCommand)
{
WDFailoverCMDResults res = FAILOVER_RES_TRANSITION;
JsonNode* jNode = NULL;
- int failoverCmdType = -1;
-
+ int failoverLockID = -1;
+
/* only coordinator(master) node can process this request */
if (get_local_node_state() == WD_COORDINATOR)
{
char* json_data = NULL;
int data_len = 0;
- json_value *root;
+ json_value *root = NULL;
char* syncRequestType = NULL;
/* We need to identify failover command type and sync function */
}
if (data_len > 0 && json_data)
- root = json_parse(json_data,data_len);
- else
- root = NULL;
-
- /* The root node must be object */
- if (root == NULL || root->type != json_object)
{
- ereport(LOG,
- (errmsg("unable to parse json data from replicate command")));
- res = FAILOVER_RES_ERROR;
+ root = json_parse(json_data,data_len);
+ if (root && root->type == json_object)
+ {
+ syncRequestType = json_get_string_value_for_key(root, "SyncRequestType");
+ json_get_int_value_for_key(root, "FailoverLockID", &failoverLockID);
+ }
+ else
+ {
+ ereport(LOG,
+ (errmsg("unable to parse json data of interlocking command")));
+ }
}
- if (root)
- syncRequestType = json_get_string_value_for_key(root, "SyncRequestType");
-
- if (syncRequestType == NULL)
+ if (syncRequestType)
{
- ereport(LOG,
- (errmsg("invalid json data"),
- errdetail("unable to find Watchdog Function Name")));
- res = FAILOVER_RES_ERROR;
+
+ if (strcasecmp(WD_REQ_FAILOVER_START, syncRequestType) == 0)
+ res = node_is_asking_for_failover_start(wdNode, pkt);
+
+ else if (strcasecmp(WD_REQ_FAILOVER_END, syncRequestType) == 0)
+ res = node_is_asking_for_failover_end(wdNode, pkt);
+
+ else if (strcasecmp(WD_REQ_FAILOVER_RELEASE_LOCK, syncRequestType) == 0)
+ res = node_is_asking_for_failover_lock_release(wdNode, pkt, failoverLockID);
+
+ else if (strcasecmp(WD_REQ_FAILOVER_LOCK_STATUS, syncRequestType) == 0)
+ res = node_is_asking_for_failover_lock_status(wdNode, pkt, failoverLockID);
+
+ else
+ res = FAILOVER_RES_ERROR;
}
else
- syncRequestType = pstrdup(syncRequestType);
-
- if (root && json_get_int_value_for_key(root, "FailoverCMDType", &failoverCmdType))
{
+ ereport(LOG,
+ (errmsg("invalid json data"),
+ errdetail("unable to find interlocking command type")));
res = FAILOVER_RES_ERROR;
}
-
+
if (root)
+ {
json_value_free(root);
-
- /* verify the failoverCmdType */
- if (failoverCmdType < 0 || failoverCmdType >= MAX_FAILOVER_CMDS)
- res = FAILOVER_RES_ERROR;
-
- if (syncRequestType == NULL)
- res = FAILOVER_RES_ERROR;
-
- if (res != FAILOVER_RES_ERROR)
- {
- if (strcasecmp("START_COMMAND", syncRequestType) == 0)
- res = node_is_asking_for_failover_cmd_start(wdNode, pkt, failoverCmdType, false);
- else if (strcasecmp("END_COMMAND", syncRequestType) == 0)
- res = node_is_asking_for_failover_cmd_end(wdNode, pkt, failoverCmdType, true);
- else if (strcasecmp("UNLOCK_COMMAND", syncRequestType) == 0)
- res = node_is_asking_for_failover_cmd_end(wdNode, pkt, failoverCmdType, false);
- else if (strcasecmp("CHECK_LOCKED", syncRequestType) == 0)
- res = node_is_asking_for_failover_cmd_start(wdNode, pkt, failoverCmdType, true);
- else
- res = FAILOVER_RES_ERROR;
}
}
else
{
+ /* I am not the coordinator node. So just return an error */
res = FAILOVER_RES_ERROR;
}
-
+
if (res != FAILOVER_RES_ERROR)
{
/* create the json result */
jNode = jw_create_with_object(true);
/* add the node count */
- jw_put_int(jNode, "FailoverCMDType", failoverCmdType);
+ jw_put_int(jNode, "FailoverLockID", failoverLockID);
jw_put_int(jNode, "InterlockingResult", res);
/* create the packet */
jw_end_element(jNode);
jw_finish_document(jNode);
}
-
+
if (wdNode != g_cluster.localNode)
{
if (jNode == NULL)
}
else
{
- /* Reply to IPC Socket */
+ /* reply on IPC Socket */
bool ret;
if (jNode != NULL)
{
}
if (ret == false)
+ {
ereport(LOG,
(errmsg("failed to write results for failover sync request to IPC socket")));
+ }
}
}
+/*
+ * node_is_asking_for_failover_start()
+ * the function process the lock holding requests. If the lock holding node
+ * is the same as the requesting node or no lock holder exists when the request
+ * arrives, the node is registered as a a lock holder. When the lock holding request
+ * is successful all respective command locks states are changed to locked
+ * Only coordinator/master node can execute the interlocking requests.
+ */
static WDFailoverCMDResults
-node_is_asking_for_failover_cmd_start(WatchdogNode* wdNode, WDPacketData* pkt, int failoverCmdType, bool check)
+node_is_asking_for_failover_start(WatchdogNode* wdNode, WDPacketData* pkt)
{
WDFailoverCMDResults res = FAILOVER_RES_TRANSITION;
- /* only coordinator(master) node can process this request */
ereport(LOG,
- (errmsg("%s pgpool-II node \"%s\" is %s [%s] lock to start the failover command",
- (g_cluster.localNode == wdNode)? "local":"remote",
- wdNode->nodeName,
- check?"checking the availability of":"requesting to acquire",
- wd_failover_cmd_type_name[failoverCmdType])));
+ (errmsg("%s pgpool-II node \"%s\" is requesting to become a lock holder",
+ (g_cluster.localNode == wdNode)? "local":"remote",
+ wdNode->nodeName)));
+ /* only coordinator(master) node can process this request */
if (get_local_node_state() == WD_COORDINATOR)
{
- InterlockingNode* lockingNode = NULL;
- if (failoverCmdType < 0 || failoverCmdType >= MAX_FAILOVER_CMDS)
- res = FAILOVER_RES_ERROR;
- else
- lockingNode = &g_cluster.interlockingNodes[failoverCmdType];
-
- if (res != FAILOVER_RES_ERROR)
+ /* check if we have no node in interlocking or requesting node is itself
+ * a lock holder node
+ */
+ if (g_cluster.interlockingNode.lockHolderNode == NULL ||
+ g_cluster.interlockingNode.lockHolderNode == wdNode)
{
- /* check if we already have no lockholder node */
- if (lockingNode->lockHolderNode == NULL || lockingNode->lockHolderNode == wdNode)
+ int i = 0;
+ /* lock all command locks */
+ for (i = 0; i < MAX_FAILOVER_LOCKS; i++)
{
- if (check == false)
- {
- lockingNode->lockHolderNode = wdNode;
- lockingNode->locked = true;
- }
- res = FAILOVER_RES_I_AM_LOCK_HOLDER;
- ereport(LOG,
- (errmsg("%s pgpool-II node \"%s\" %s [%s] lock to start the failover command",
+ g_cluster.interlockingNode.locks[i] = true;
+ }
+ g_cluster.interlockingNode.lockHolderNode = wdNode;
+ gettimeofday(&g_cluster.interlockingNode.lock_time, NULL);
+ res = FAILOVER_RES_I_AM_LOCK_HOLDER;
+ ereport(LOG,
+ (errmsg("%s pgpool-II node \"%s\" is the lock holder",
+ (g_cluster.localNode == wdNode)? "local":"remote",
+ wdNode->nodeName)));
+ }
+ else
+ {
+ /* some other node is holding the lock */
+ res = FAILOVER_RES_I_AM_NOT_LOCK_HOLDER;
+ ereport(LOG,
+ (errmsg("lock holder request denied to %s pgpool-II node \"%s\"",
(g_cluster.localNode == wdNode)? "local":"remote",
- wdNode->nodeName,
- check?"can acquire":"has",
- wd_failover_cmd_type_name[failoverCmdType])));
+ wdNode->nodeName),
+ errdetail("%s pgpool-II node \"%s\" is already holding the locks",
+ (g_cluster.localNode == g_cluster.interlockingNode.lockHolderNode)? "local":"remote",
+ g_cluster.interlockingNode.lockHolderNode->nodeName)));
+ }
+ }
+ else
+ {
+ ereport(LOG,
+ (errmsg("failed to process interlocking request from %s pgpool-II node \"%s\"",
+ (g_cluster.localNode == wdNode)? "local":"remote",
+ wdNode->nodeName),
+ errdetail("I am standby node and request can only be processed by master watchdog node")));
+ res = FAILOVER_RES_ERROR;
+ }
+ return res;
+}
- }
- else /* some other node is holding the lock */
- {
- ereport(LOG,
- (errmsg("[%s] lock %s %s pgpool-II node \"%s\" to start the failover command",
- wd_failover_cmd_type_name[failoverCmdType],
- check?"is not available for":"request denied to",
- (g_cluster.localNode == wdNode)? "local":"remote",
- wdNode->nodeName),
- errdetail("%s pgpool-II node \"%s\" is holding the lock",
- (g_cluster.localNode == lockingNode->lockHolderNode)? "local":"remote",
- lockingNode->lockHolderNode->nodeName)));
+/*
+ * node_is_asking_for_failover_end()
+ * the function process the request to release from the lock holder.
+ * The node can resign from the lock holder if the lock holding node
+ * is the same as the requesting node. When the resign from lock holding request
+ * is successful all respective command locks becomes unlocked.
+ * Only coordinator/master node can execute the interlocking requests.
+ */
+static WDFailoverCMDResults
+node_is_asking_for_failover_end(WatchdogNode* wdNode, WDPacketData* pkt)
+{
+ WDFailoverCMDResults res = FAILOVER_RES_TRANSITION;
- if (lockingNode->locked)
- res = FAILOVER_RES_BLOCKED;
- else
- res = FAILOVER_RES_LOCK_UNLOCKED;
+ ereport(LOG,
+ (errmsg("%s pgpool-II node \"%s\" is requesting to resign from a lock holder",
+ (g_cluster.localNode == wdNode)? "local":"remote",
+ wdNode->nodeName)));
+
+ if (get_local_node_state() == WD_COORDINATOR)
+ {
+ /* check if the resigning node is the same that is holding the lock
+ */
+ if (g_cluster.interlockingNode.lockHolderNode == NULL ||
+ g_cluster.interlockingNode.lockHolderNode == wdNode)
+ {
+ int i;
+ /* unlock all the locks */
+ for (i = 0; i < MAX_FAILOVER_LOCKS; i++)
+ {
+ g_cluster.interlockingNode.locks[i] = false;
}
+ g_cluster.interlockingNode.lockHolderNode = NULL;
+ res = FAILOVER_RES_SUCCESS;
+ ereport(LOG,
+ (errmsg("%s pgpool-II node \"%s\" has resigned from the lock holder",
+ (g_cluster.localNode == wdNode)? "local":"remote",
+ wdNode->nodeName)));
+ }
+ else /* some other node is holding the lock */
+ {
+ res = FAILOVER_RES_I_AM_NOT_LOCK_HOLDER;
+ ereport(LOG,
+ (errmsg("request of resigning from lock holder is denied to %s pgpool-II node \"%s\"",
+ (g_cluster.localNode == wdNode)? "local":"remote",
+ wdNode->nodeName),
+ errdetail("%s pgpool-II node \"%s\" is the lock holder node",
+ (g_cluster.localNode == g_cluster.interlockingNode.lockHolderNode)? "local":"remote",
+ g_cluster.interlockingNode.lockHolderNode->nodeName)));
}
}
else
{
ereport(LOG,
- (errmsg("failed to process [%s] lock request from %s pgpool-II node \"%s\" to start the failover command",
- wd_failover_cmd_type_name[failoverCmdType],
+ (errmsg("failed to process release interlocking request from %s pgpool-II node \"%s\"",
(g_cluster.localNode == wdNode)? "local":"remote",
wdNode->nodeName),
errdetail("I am standby node and request can only be processed by master watchdog node")));
return res;
}
+/*
+ * node_is_asking_for_failover_lock_release()
+ * the function process the request from the lock holder node to
+ * release a specific failocer command lock.
+ * Only coordinator/master node can execute the interlocking requests.
+ */
static WDFailoverCMDResults
-node_is_asking_for_failover_cmd_end(WatchdogNode* wdNode, WDPacketData* pkt, int failoverCmdType, bool resign)
+node_is_asking_for_failover_lock_release(WatchdogNode* wdNode, WDPacketData* pkt, WDFailoverLock failoverLock)
{
WDFailoverCMDResults res = FAILOVER_RES_TRANSITION;
- /* only coordinator(master) node can process this request */
ereport(LOG,
- (errmsg("%s pgpool-II node \"%s\" is %s to release [%s] lock to end the failover command",
+ (errmsg("%s pgpool-II node \"%s\" is requesting to release [%s] lock",
(g_cluster.localNode == wdNode)? "local":"remote",
wdNode->nodeName,
- resign?"requesting":"testing",
- wd_failover_cmd_type_name[failoverCmdType])));
+ wd_failover_lock_name[failoverLock])));
if (get_local_node_state() == WD_COORDINATOR)
{
- InterlockingNode* lockingNode = NULL;
-
- if (failoverCmdType < 0 || failoverCmdType >= MAX_FAILOVER_CMDS)
- res = FAILOVER_RES_ERROR;
- else
- lockingNode = &g_cluster.interlockingNodes[failoverCmdType];
-
- if (res != FAILOVER_RES_ERROR)
+ /* check if the node requesting to release a lock is the lock holder */
+ if (g_cluster.interlockingNode.lockHolderNode == wdNode)
{
- /* check if we already have no lockholder node */
- if (lockingNode->lockHolderNode == NULL || lockingNode->lockHolderNode == wdNode)
+ /* make sure the request is of a valid lock */
+ if (failoverLock < MAX_FAILOVER_LOCKS)
{
- if (resign)
- {
- lockingNode->lockHolderNode = NULL;
- lockingNode->locked = false;
- }
- res = FAILOVER_RES_LOCK_UNLOCKED;
+ g_cluster.interlockingNode.locks[failoverLock] = false;
+ res = FAILOVER_RES_SUCCESS;
ereport(LOG,
- (errmsg("%s pgpool-II node \"%s\" %s the [%s] lock to end the failover command",
+ (errmsg("%s pgpool-II node \"%s\" has released the [%s] lock",
(g_cluster.localNode == wdNode)? "local":"remote",
wdNode->nodeName,
- resign?"has released":"can release",
- wd_failover_cmd_type_name[failoverCmdType])));
-
+ wd_failover_lock_name[failoverLock])));
+ }
+ else
+ {
+ res = FAILOVER_RES_ERROR;
}
- else /* some other node is holding the lock */
+ }
+ else
+ {
+ /* I am not the lock holder so not allowed to release the lock */
+ ereport(LOG,
+ (errmsg("[%s] lock release request denied to %s pgpool-II node \"%s\"",
+ wd_failover_lock_name[failoverLock],
+ (g_cluster.localNode == wdNode)? "local":"remote",
+ wdNode->nodeName),
+ errdetail("requesting node is not the lock holder")));
+ res = FAILOVER_RES_I_AM_NOT_LOCK_HOLDER;
+ }
+ }
+ else
+ {
+ ereport(LOG,
+ (errmsg("failed to process release lock request from %s pgpool-II node \"%s\"",
+ (g_cluster.localNode == wdNode)? "local":"remote",
+ wdNode->nodeName),
+ errdetail("I am standby node and request can only be processed by master watchdog node")));
+ res = FAILOVER_RES_ERROR;
+ }
+ return res;
+}
+
+/*
+ * node_is_asking_for_failover_lock_status()
+ * This is an interlocking family function and returns the status of a specific failover lock.
+ * Only coordinator/master node can execute the interlocking requests.
+ */
+static WDFailoverCMDResults
+node_is_asking_for_failover_lock_status(WatchdogNode* wdNode, WDPacketData* pkt, WDFailoverLock failoverLock)
+{
+ WDFailoverCMDResults res = FAILOVER_RES_TRANSITION;
+
+ ereport(LOG,
+ (errmsg("%s pgpool-II node \"%s\" is checking the status of [%s] lock",
+ (g_cluster.localNode == wdNode)? "local":"remote",
+ wdNode->nodeName,
+ wd_failover_lock_name[failoverLock])));
+
+ if (get_local_node_state() == WD_COORDINATOR)
+ {
+ /* check if the node requesting to start the command is the lock holder */
+ if (g_cluster.interlockingNode.lockHolderNode)
+ {
+ /* make sure the request is of a valid lock */
+ if (failoverLock < MAX_FAILOVER_LOCKS)
{
+ if (g_cluster.interlockingNode.locks[failoverLock])
+ res = FAILOVER_RES_LOCKED;
+ else
+ res = FAILOVER_RES_UNLOCKED;
+
ereport(LOG,
- (errmsg("[%s] lock %s %s pgpool-II node \"%s\" to end the failover command",
- wd_failover_cmd_type_name[failoverCmdType],
- resign?"release request denied to":"cannot be released by",
- (g_cluster.localNode == wdNode)? "local":"remote",
- wdNode->nodeName),
- errdetail("%s pgpool-II node \"%s\" is holding the lock",
- (g_cluster.localNode == lockingNode->lockHolderNode)? "local":"remote",
- lockingNode->lockHolderNode->nodeName)));
- res = FAILOVER_RES_BLOCKED;
+ (errmsg("%s lock is currently %s",
+ wd_failover_lock_name[failoverLock],
+ (res == FAILOVER_RES_LOCKED)?"LOCKED":"FREE"),
+ errdetail("request was from %s pgpool-II node \"%s\" and lock holder is %s pgpool-II node \"%s\"",
+ (g_cluster.localNode == wdNode)? "local":"remote",
+ wdNode->nodeName,
+ (g_cluster.localNode == g_cluster.interlockingNode.lockHolderNode)? "local":"remote",
+ g_cluster.interlockingNode.lockHolderNode->nodeName)));
+ }
+ else
+ {
+ res = FAILOVER_RES_ERROR;
}
}
+ else
+ {
+ /* no lock holder exists */
+ ereport(LOG,
+ (errmsg("[%s] lock status check request denied to %s pgpool-II node \"%s\"",
+ wd_failover_lock_name[failoverLock],
+ (g_cluster.localNode == wdNode)? "local":"remote",
+ wdNode->nodeName),
+ errdetail("no lock holder exists")));
+ res = FAILOVER_RES_NO_LOCKHOLDER;
+ }
}
else
{
ereport(LOG,
- (errmsg("failed to process [%s] lock request from %s pgpool-II node \"%s\" to end the failover command",
- wd_failover_cmd_type_name[failoverCmdType],
+ (errmsg("failed to process lock status check request from %s pgpool-II node \"%s\"",
(g_cluster.localNode == wdNode)? "local":"remote",
wdNode->nodeName),
errdetail("I am standby node and request can only be processed by master watchdog node")));
static void sleep_in_waiting(void);
static void FreeCmdResult(WDIPCCmdResult* res);
-static WDFailoverCMDResults wd_issue_failover_lock_command(WDFailoverCMDTypes cmdType, char* syncReqType);
-
-static char* get_wd_failover_cmd_type_json(WDFailoverCMDTypes cmdType, char* reqType);
-WDFailoverCMDResults wd_send_failover_sync_command(WDFailoverCMDTypes cmdType, char* syncReqType);
+static WDFailoverCMDResults wd_issue_failover_lock_command(char* syncReqType, enum WDFailoverLocks lockID);
+static char* get_wd_failover_cmd_type_json(char* reqType, enum WDFailoverLocks lockID);
+static WDFailoverCMDResults wd_send_failover_sync_command(char* syncReqType, enum WDFailoverLocks lockID);
static int wd_set_node_mask (unsigned char req_mask, int *node_id_set, int count);
static int wd_chk_node_mask (unsigned char req_mask, int *node_id_set, int count);
if (timeout_sec > 0)
{
tv.tv_sec = timeout_sec;
+ tv.tv_usec = 0;
timeout_st = &tv;
}
FD_ZERO(&fds);
return COMMAND_FAILED;
}
-static char* get_wd_failover_cmd_type_json(WDFailoverCMDTypes cmdType, char* reqType)
+static char* get_wd_failover_cmd_type_json(char* reqType, enum WDFailoverLocks lockID)
{
char* json_str;
JsonNode* jNode = jw_create_with_object(true);
if (pool_config->wd_authkey != NULL && strlen(pool_config->wd_authkey) > 0)
jw_put_string(jNode, WD_IPC_AUTH_KEY, pool_config->wd_authkey); /* put the auth key*/
- jw_put_int(jNode, "FailoverCMDType", cmdType);
jw_put_string(jNode, "SyncRequestType", reqType);
+ jw_put_int(jNode, "FailoverLockID", lockID);
jw_finish_document(jNode);
json_str = pstrdup(jw_get_json_string(jNode));
jw_destroy(jNode);
return json_str;
}
-
-WDFailoverCMDResults
-wd_send_failover_sync_command(WDFailoverCMDTypes cmdType, char* syncReqType)
+static WDFailoverCMDResults
+wd_send_failover_sync_command(char* syncReqType, enum WDFailoverLocks lockID)
{
- int failoverResCmdType;
- int interlockingResult;
+ int interlockingResult = FAILOVER_RES_ERROR;
json_value *root;
- char* json_data = get_wd_failover_cmd_type_json(cmdType, syncReqType);
-
+ char* json_data = get_wd_failover_cmd_type_json(syncReqType, lockID);
+
WDIPCCmdResult *result = issue_command_to_watchdog(WD_FAILOVER_CMD_SYNC_REQUEST
,pool_config->recovery_timeout,
json_data, strlen(json_data), true);
FreeCmdResult(result);
return FAILOVER_RES_ERROR;
}
-
- if (json_get_int_value_for_key(root, "FailoverCMDType", &failoverResCmdType))
- {
- json_value_free(root);
- FreeCmdResult(result);
- return FAILOVER_RES_ERROR;
- }
+
if (root && json_get_int_value_for_key(root, "InterlockingResult", &interlockingResult))
{
json_value_free(root);
FreeCmdResult(result);
return FAILOVER_RES_ERROR;
}
+
json_value_free(root);
FreeCmdResult(result);
- if (failoverResCmdType != cmdType)
+ if (interlockingResult < 0 || interlockingResult > FAILOVER_RES_NO_LOCKHOLDER)
return FAILOVER_RES_ERROR;
-
- if (interlockingResult < 0 || interlockingResult > FAILOVER_RES_BLOCKED)
- return FAILOVER_RES_ERROR;
-
+
return interlockingResult;
}
return sock;
}
-WDFailoverCMDResults wd_failover_command_start(WDFailoverCMDTypes cmdType)
+WDFailoverCMDResults wd_start_failover_interlocking(void)
{
if (pool_config->use_watchdog)
- return wd_issue_failover_lock_command(cmdType,"START_COMMAND");
+ return wd_issue_failover_lock_command(WD_REQ_FAILOVER_START, 0);
return FAILOVER_RES_I_AM_LOCK_HOLDER;
}
-WDFailoverCMDResults wd_failover_command_end(WDFailoverCMDTypes cmdType)
+WDFailoverCMDResults wd_end_failover_interlocking(void)
{
if (pool_config->use_watchdog)
- return wd_issue_failover_lock_command(cmdType,"END_COMMAND");
- return FAILOVER_RES_I_AM_LOCK_HOLDER;
+ return wd_issue_failover_lock_command(WD_REQ_FAILOVER_END, 0);
+ return FAILOVER_RES_SUCCESS;
}
-WDFailoverCMDResults wd_failover_command_check_lock(WDFailoverCMDTypes cmdType)
+WDFailoverCMDResults wd_failover_lock_release(enum WDFailoverLocks lock)
{
if (pool_config->use_watchdog)
- return wd_issue_failover_lock_command(cmdType,"CHECK_LOCKED");
- return FAILOVER_RES_I_AM_LOCK_HOLDER;
+ return wd_issue_failover_lock_command(WD_REQ_FAILOVER_RELEASE_LOCK, lock);
+ return FAILOVER_RES_SUCCESS;
}
-WDFailoverCMDResults wd_release_failover_command_lock(WDFailoverCMDTypes cmdType)
+WDFailoverCMDResults wd_failover_lock_status(enum WDFailoverLocks lock)
{
if (pool_config->use_watchdog)
- return wd_issue_failover_lock_command(cmdType,"UNLOCK_COMMAND");
- return FAILOVER_RES_I_AM_LOCK_HOLDER;
+ return wd_issue_failover_lock_command(WD_REQ_FAILOVER_LOCK_STATUS, lock);
+ return FAILOVER_RES_UNLOCKED;
}
-void wd_wati_until_lock_or_timeout(WDFailoverCMDTypes cmdType)
+void wd_wait_until_command_complete_or_timeout(enum WDFailoverLocks lock)
{
WDFailoverCMDResults res = FAILOVER_RES_TRANSITION;
int count = WD_INTERLOCK_WAIT_COUNT;
- while (1)
+
+ while (pool_config->use_watchdog)
{
- res = wd_failover_command_check_lock(cmdType);
- if (res == FAILOVER_RES_I_AM_LOCK_HOLDER ||
- res == FAILOVER_RES_LOCK_UNLOCKED)
+ res = wd_failover_lock_status(lock);
+ if (res == FAILOVER_RES_UNLOCKED ||
+ res == FAILOVER_RES_NO_LOCKHOLDER)
{
/* we have the permision */
return;
/*
* This is just a wrapper over wd_send_failover_sync_command()
- * but tries to wait for WD_INTERLOCK_TIMEOUT_SEC amount of time
+ * but try to wait for WD_INTERLOCK_TIMEOUT_SEC amount of time
* if watchdog is in transition state
*/
-static WDFailoverCMDResults wd_issue_failover_lock_command(WDFailoverCMDTypes cmdType, char* syncReqType)
+static WDFailoverCMDResults wd_issue_failover_lock_command(char* syncReqType, enum WDFailoverLocks lockID)
{
WDFailoverCMDResults res;
int x;
for (x=0; x < MAX_SEC_WAIT_FOR_CLUSTER_TRANSATION; x++)
{
- res = wd_send_failover_sync_command(cmdType, syncReqType);
+ res = wd_send_failover_sync_command(syncReqType, lockID);
if (res != FAILOVER_RES_TRANSITION)
break;
sleep(1);