Fix long standing bug with pcp_node_info.
authorTatsuo Ishii <ishii@sraoss.co.jp>
Mon, 31 Jan 2022 02:45:33 +0000 (11:45 +0900)
committerTatsuo Ishii <ishii@sraoss.co.jp>
Mon, 31 Jan 2022 02:45:33 +0000 (11:45 +0900)
It appears that occasionally pcp_node_info shows backend_status field
as "quarantine" when it should be "down". While pcp_node_info shows
the status, first it checks the backend_status member in BackendInfo
struct. If it is 3, then checks quarantine member. If it is other than
0, then the backend_status field is shown as "quarantine". So if
garbage remains in quarantine member, it is shown as "quarantine". The
BackendInfo struct is transferred from pcp_worker process to pcp
frontend client. Unfortunately when the quarantine member was added by
commit 54af632c, it was forgotten to modify pcp_worker.c and
pcp_frontend.c so that the "quarantine" member is transferred.

Fix is needed to be back patched to 3.7, when the "quarantine" member
was first added.

Discussion: https://www.pgpool.net/pipermail/pgpool-hackers/2022-January/004110.html

src/libs/pcp/pcp.c
src/pcp_con/pcp_worker.c

index e76efaefc7374534798c724e5a99eec9dd98a5ca..6a8430a70c0875d0ffe35c6ce8a5ebf1f8e07263 100644 (file)
@@ -742,6 +742,12 @@ process_node_info_response(PCPConnInfo * pcpConn, char *buf, int len)
                index += 1;
                backend_info->backend_status = atoi(index);
 
+               index = (char *) memchr(index, '\0', len);
+               if (index == NULL)
+                       goto INVALID_RESPONSE;
+               index += 1;
+               backend_info->quarantine = atoi(index);
+
                index = (char *) memchr(index, '\0', len);
                if (index == NULL)
                        goto INVALID_RESPONSE;
index 05094e30117cbedb6683f379c2eb082864328775..6ff7d204829583f79f2dd2109ce557cf219ebbed 100644 (file)
@@ -879,6 +879,7 @@ inform_node_info(PCP_CONNECTION * frontend, char *buf)
                {
                        char            port_str[6];
                        char            status[2];
+                       char            quarantine[2];
                        char            weight_str[20];
                        char            role_str[10];
                        char            standby_delay_str[20];
@@ -900,6 +901,7 @@ inform_node_info(PCP_CONNECTION * frontend, char *buf)
 
                        snprintf(port_str, sizeof(port_str), "%d", bi->backend_port);
                        snprintf(status, sizeof(status), "%d", bi->backend_status);
+                       snprintf(quarantine, sizeof(quarantine), "%d", bi->quarantine);
                        snprintf(weight_str, sizeof(weight_str), "%f", bi->backend_weight);
 
                        if (STREAM)
@@ -929,6 +931,7 @@ inform_node_info(PCP_CONNECTION * frontend, char *buf)
                                                  strlen(bi->backend_hostname) + 1 +
                                                  strlen(port_str) + 1 +
                                                  strlen(status) + 1 +
+                                                 strlen(quarantine) + 1 +
                                                  strlen(nodes[i].pg_status) + 1 +
                                                  strlen(weight_str) + 1 +
                                                  strlen(role_str) + 1 +
@@ -944,6 +947,7 @@ inform_node_info(PCP_CONNECTION * frontend, char *buf)
                        pcp_write(frontend, bi->backend_hostname, strlen(bi->backend_hostname) + 1);
                        pcp_write(frontend, port_str, strlen(port_str) + 1);
                        pcp_write(frontend, status, strlen(status) + 1);
+                       pcp_write(frontend, quarantine, strlen(quarantine) + 1);
                        pcp_write(frontend, nodes[i].pg_status, strlen(nodes[i].pg_status) + 1);
                        pcp_write(frontend, weight_str, strlen(weight_str) + 1);
                        pcp_write(frontend, role_str, strlen(role_str) + 1);