First cut of primary server checking.
authorTatsuo Ishii <ishii@postgresql.org>
Mon, 9 Apr 2018 08:44:21 +0000 (17:44 +0900)
committerTatsuo Ishii <ishii@postgresql.org>
Mon, 9 Apr 2018 08:44:21 +0000 (17:44 +0900)
For now followings are implemented:

- Check all backend nodes starting node 0.

- If primary nodes appear twice or more, the second one or after are
  assumed invalid.

- Such invalid node will be degenerated at the next convenient
  time. Currently such timing is at the start up of Pgpool-II. This is
  apparently insufficient and should be improved later.

TODO:
- Verify primary nodes using pg_stat_wal_receiver.

- More chances to verify node status. Maybe in the same timing as
  streaming replication delay checking?

- Add new GUCs to control of this feature.

src/include/pool.h
src/main/pgpool_main.c
src/streaming_replication/pool_worker_child.c

index f87995cd0632b4c77b784ad11a822430beccdec7..97f91de37e875e4f7195ad65c7a7a76a2f77ce7f 100644 (file)
@@ -1,12 +1,10 @@
 /* -*-pgsql-c-*- */
 /*
- *
- * $Header$
  *
  * pgpool: a language independent connection pool server for PostgreSQL 
  * written by Tatsuo Ishii
  *
- * Copyright (c) 2003-2015     PgPool Global Development Group
+ * Copyright (c) 2003-2018     PgPool Global Development Group
  *
  * Permission to use, copy, modify, and distribute this software and
  * its documentation for any purpose and without fee is hereby
@@ -353,6 +351,16 @@ extern int my_master_node_id;
 #define MASTER_CONNECTION(p) ((p)->slots[MASTER_NODE_ID])
 #define MASTER(p) MASTER_CONNECTION(p)->con
 
+/*
+ * Backend node status in streaming replication mode.
+ */
+typedef enum {
+       POOL_NODE_STATUS_UNUSED,        /* unused */
+       POOL_NODE_STATUS_PRIMARY,       /* primary ndoe */
+       POOL_NODE_STATUS_STANDBY,       /* standby node */
+       POOL_NODE_STATUS_INVALID        /* invalid node (split branin, stand alone) */
+} POOL_NODE_STATUS;
+
 #define REPLICATION (pool_config->replication_mode)
 #define MASTER_SLAVE (pool_config->master_slave_mode)
 #define STREAM (MASTER_SLAVE && pool_config->master_slave_sub_mode == STREAM_MODE)
@@ -701,6 +709,7 @@ extern void pool_sleep(unsigned int second);
 
 /* pool_worker_child.c */
 extern void do_worker_child(void);
+extern int get_query_result(POOL_CONNECTION_POOL_SLOT **slots, int backend_id, char *query, POOL_SELECT_RESULT **res);
 
 /* md5.c */
 extern bool pg_md5_encrypt(const char *passwd, const char *salt, size_t salt_len, char *buf);
@@ -742,5 +751,6 @@ extern int pool_frontend_exists(void);
 extern pid_t pool_waitpid(int *status);
 extern int write_status_file(void);
 extern void do_health_check_child(int *node_id);
+extern POOL_NODE_STATUS *pool_get_node_status(void);
 
 #endif /* POOL_H */
index 1b4da1e50ccb14d115f397f8129c0ee0354d52f2..c6e5c2e0e99183bdd7e1c6b67dc16e7400e50cc2 100644 (file)
@@ -5,7 +5,7 @@
  * pgpool: a language independent connection pool server for PostgreSQL
  * written by Tatsuo Ishii
  *
- * Copyright (c) 2003-2017     PgPool Global Development Group
+ * Copyright (c) 2003-2018     PgPool Global Development Group
  *
  * Permission to use, copy, modify, and distribute this software and
  * its documentation for any purpose and without fee is hereby
@@ -139,7 +139,7 @@ static RETSIGTYPE wakeup_handler(int sig);
 static void initialize_shared_mem_objects(bool clear_memcache_oidmaps);
 static int trigger_failover_command(int node, const char *command_line,
                                                                        int old_master, int new_master, int old_primary);
-static bool verify_backend_node_status(int backend_no, bool* is_standby);
+static POOL_NODE_STATUS *verify_backend_node_status(POOL_CONNECTION_POOL_SLOT **slots);
 static int find_primary_node(void);
 static int find_primary_node_repeatedly(void);
 static void terminate_all_childrens();
@@ -444,8 +444,29 @@ int PgpoolMain(bool discard_status, bool clear_memcache_oidmaps)
                 * successfull start.
                 */
                if (first)
+               {
+                       int i;
+                       int n;
+                       POOL_NODE_STATUS *node_status = pool_get_node_status();
+
                        ereport(LOG,
                                        (errmsg("%s successfully started. version %s (%s)", PACKAGE, VERSION, PGPOOLVERSION)));
+
+                       /* Very early stage node checking. It is assumed that find_primary_node got called. */
+                       for (i=0;i<NUM_BACKENDS;i++)
+                       {
+                               ereport(LOG,
+                                               (errmsg("node status[%d]: %d", i,  node_status[i])));
+
+                               if (node_status[i] == POOL_NODE_STATUS_INVALID)
+                               {
+                                       ereport(LOG,
+                                                       (errmsg("pgpool_main: invalid node found %d", i)));
+                                       n = i;
+                                       degenerate_backend_set(&n, 1, REQ_DETAIL_SWITCHOVER|REQ_DETAIL_CONFIRMED);
+                               }
+                       }
+               }
                first = false;
 
                processState = SLEEPING;
@@ -2802,74 +2823,64 @@ static int trigger_failover_command(int node, const char *command_line,
 
        return r;
 }
+
 /*
- * This function is used by find_primary_node() function and is just a wrapper
- * over make_persistent_db_connection() function and returns boolean value to
- * inform connection status.
- * This function must not throw ereport.
+ * This function is used by find_primary_node().  Find primary node/standby
+ * node and returns static array of status for each backend node. This
+ * function must not throw ereport.
  */
-static bool
-    verify_backend_node_status(int backend_no, bool* is_standby)
+static POOL_NODE_STATUS pool_node_status[MAX_NUM_BACKENDS];
+
+static POOL_NODE_STATUS *
+verify_backend_node_status(POOL_CONNECTION_POOL_SLOT **slots)
 {
-       POOL_CONNECTION_POOL_SLOT   *s = NULL;
-       POOL_CONNECTION *con;
        POOL_SELECT_RESULT *res;
-       BackendInfo *bkinfo = pool_get_node_info(backend_no);
-
-       *is_standby = false;
+       bool found_primary = false;
+       int i;
 
-       s = make_persistent_db_connection_noerror(backend_no, bkinfo->backend_hostname,
-                                                                                 bkinfo->backend_port,
-                                                                                 pool_config->sr_check_database,
-                                                                                 pool_config->sr_check_user,
-                                                                                 pool_config->sr_check_password, true);
-       if (s)
+       for (i=0;i<NUM_BACKENDS;i++)
        {
-               MemoryContext oldContext = CurrentMemoryContext;
-               con = s->con;
+               pool_node_status[i] = POOL_NODE_STATUS_UNUSED;
 
-               PG_TRY();
+               if (!VALID_BACKEND(i))
+                       continue;
+
+               if (!slots[i])
+                       continue;
+
+               if (get_query_result(slots, i, "SELECT pg_is_in_recovery()", &res))
                {
-                       do_query(con, "SELECT pg_is_in_recovery()",
-                                        &res, PROTO_MAJOR_V3);
+                       continue;
                }
-               PG_CATCH();
+
+               if (res->data[0] && !strcmp(res->data[0], "t"))
                {
-                       /* ignore the error message */
-                       res = NULL;
-                       MemoryContextSwitchTo(oldContext);
-                       FlushErrorState();
-                       ereport(LOG,
-                                       (errmsg("verify_backend_node_status: do_query failed")));
+                       /* Possibly standby */
+                       pool_node_status[i] = POOL_NODE_STATUS_STANDBY;
                }
-               PG_END_TRY();
-               if(res)
+               else if (res->data[0] && !strcmp(res->data[0], "f"))
                {
-                       if (res->numrows <= 0)
-                       {
-                               ereport(LOG,
-                                               (errmsg("verify_backend_node_status: do_query returns no rows")));
-                       }
-                       if (res->data[0] == NULL)
-                       {
-                               ereport(LOG,
-                                               (errmsg("verify_backend_node_status: do_query returns no data")));
-                       }
-                       if (res->nullflags[0] == -1)
-                       {
-                               ereport(LOG,
-                                               (errmsg("verify_backend_node_status: do_query returns NULL")));
-                       }
-                       if (res->data[0] && !strcmp(res->data[0], "t"))
+                       /* Possibly primary. Let's see if we already found a primary
+                        * (checking split brain)
+                        */
+                       if (found_primary)
+                               pool_node_status[i] = POOL_NODE_STATUS_INVALID;
+                       else
                        {
-                               *is_standby = true;
+                               pool_node_status[i] = POOL_NODE_STATUS_PRIMARY;
+                               found_primary = true;
                        }
-                       free_select_result(res);
                }
-               discard_persistent_db_connection(s);
-               return true;
+               free_select_result(res);
        }
-       return false;
+
+       return pool_node_status;
+}
+
+POOL_NODE_STATUS *
+pool_get_node_status(void)
+{
+       return pool_node_status;
 }
 
 /*
@@ -2878,7 +2889,11 @@ static bool
  */
 static int find_primary_node(void)
 {
+       BackendInfo *bkinfo;
+       POOL_CONNECTION_POOL_SLOT *slots[MAX_NUM_BACKENDS];
        int i;
+       POOL_NODE_STATUS *status;
+       int primary = 0;
 
        /* Streaming replication mode? */
        if (!SL_MODE)
@@ -2892,7 +2907,7 @@ static int find_primary_node(void)
        }
 
        /* 
-        *First check for "ALWAYS_MASTER" flags exists. If so, do not perform
+        * First check for "ALWAYS_MASTER" flags exists. If so, do not perform
         * actual primary node check and just returns the node id.
         */
        for(i=0;i<NUM_BACKENDS;i++)
@@ -2905,44 +2920,62 @@ static int find_primary_node(void)
                }
        }
 
-       for(i=0;i<NUM_BACKENDS;i++)
+       /*
+        * Establish connections to backend
+        */
+       for (i=0;i<NUM_BACKENDS;i++)
        {
-               bool node_status;
-               bool is_standby;
-
-               ereport(LOG,
-                               (errmsg("find_primary_node: checking backend no %d",i)));
+               slots[i] = NULL;
 
                if (!VALID_BACKEND(i))
                        continue;
-               node_status = verify_backend_node_status(i,&is_standby);
-        if (!node_status)
-        {
-            /*
-             * It is possible that a node is down even if
-             * VALID_BACKEND tells it's valid.  This could happen
-             * before health checking detects the failure.
-             * Thus we should continue to look for primary node.
-             */
-            continue;
-        }
-               if (is_standby)
-                       ereport(DEBUG1,
-                                       (errmsg("find_primary_node: %d node is standby", i)));
-               else
-                       break;
+
+               bkinfo = pool_get_node_info(i);
+
+               slots[i] = make_persistent_db_connection_noerror(i, bkinfo->backend_hostname,
+                                                                                                                bkinfo->backend_port,
+                                                                                                                pool_config->sr_check_database,
+                                                                                                                pool_config->sr_check_user,
+                                                                                                                pool_config->sr_check_password, true);
+               if (!slots[i])
+               {
+                       ereport(LOG,
+                                       (errmsg("find_primary_node: make_persistent_db_connection_noerror failed on node %d", i)));
+               }
        }
 
-       if (i == NUM_BACKENDS)
+       /* Verify backend status */
+       status = verify_backend_node_status(slots);
+
+       for(i=0;i<NUM_BACKENDS;i++)
        {
-               ereport(DEBUG1,
-                               (errmsg("find_primary_node: no primary node found")));
-               return -1;
+               if (status[i] == POOL_NODE_STATUS_PRIMARY)
+               {
+                       /* This is the primary */
+                       ereport(LOG,
+                                       (errmsg("find_primary_node: primary node is %d",i)));
+                       primary = i;
+               }
+               else if (status[i] == POOL_NODE_STATUS_STANDBY)
+               {
+                       ereport(LOG,
+                                       (errmsg("find_primary_node: standby node is %d", i)));
+               }
+               else if (status[i] == POOL_NODE_STATUS_INVALID)
+               {
+                       /* Split brain or invalid node */
+                       ereport(LOG,
+                                       (errmsg("find_primary_node: invalid node %d", i)));
+               }
        }
 
-       ereport(LOG,
-            (errmsg("find_primary_node: primary node id is %d", i)));
-       return i;
+       for (i=0;i<NUM_BACKENDS;i++)
+       {
+               if (slots[i])
+                       discard_persistent_db_connection(slots[i]);
+       }
+
+       return primary;
 }
 
 static int find_primary_node_repeatedly(void)
index 7715876072823a15fdc99604dadd83419f5daf92..212bb00c1839ca5bd7ddc7878fc15ef459f74b9c 100644 (file)
@@ -1,11 +1,9 @@
 /* -*-pgsql-c-*- */
 /*
- * $Header$
- *
  * pgpool: a language independent connection pool server for PostgreSQL
  * written by Tatsuo Ishii
  *
- * Copyright (c) 2003-2017     PgPool Global Development Group
+ * Copyright (c) 2003-2018     PgPool Global Development Group
  *
  * Permission to use, copy, modify, and distribute this software and
  * its documentation for any purpose and without fee is hereby
@@ -76,7 +74,6 @@ static unsigned long long int text_to_lsn(char *text);
 static RETSIGTYPE my_signal_handler(int sig);
 static RETSIGTYPE reload_config_handler(int sig);
 static void reload_config(void);
-static int get_query_result(int backend_id, char *query, POOL_SELECT_RESULT **res);
 
 #define CHECK_REQUEST \
        do { \
@@ -299,8 +296,8 @@ static void check_replication_time_lag(void)
                {
                        query = "SELECT current_setting('server_version_num')";
 
-                       /* Get backend serversion. If the query fails, keep previous info. */
-                       if (get_query_result(i, query, &res) == 0)
+                       /* Get backend server version. If the query fails, keep previous info. */
+                       if (get_query_result(slots, i, query, &res) == 0)
                        {
                                server_version[i] = atoi(res->data[0]);
                                ereport(DEBUG1,
@@ -324,7 +321,7 @@ static void check_replication_time_lag(void)
                                query = "SELECT pg_last_xlog_replay_location()";
                }
 
-               if (get_query_result(i, query, &res) == 0)
+               if (get_query_result(slots, i, query, &res) == 0)
                {
                        lsn[i] = text_to_lsn(res->data[0]);
                        free_select_result(res);
@@ -453,49 +450,63 @@ static void reload_config(void)
 }
 
 /*
- * Execute query against specified backend.
- * Return -1 on failure or 0 otherwise.
- * Caller must prepare memory for POOL_SELECT_RESULT and pass it as "res".
+ * Execute query against specified backend using an established connection to
+ * backend.  Return -1 on failure or 0 otherwise.  Caller must prepare memory
+ * for POOL_SELECT_RESULT and pass it as "res". It is guaranteed that no
+ * exception occurs within this function.
  */
-
-static         int get_query_result(int backend_id, char *query, POOL_SELECT_RESULT **res)
+int get_query_result(POOL_CONNECTION_POOL_SLOT **slots, int backend_id, char *query, POOL_SELECT_RESULT **res)
 {
        int sts = -1;
+       MemoryContext oldContext = CurrentMemoryContext;
 
-       do_query(slots[backend_id]->con, query, res, PROTO_MAJOR_V3);
+       PG_TRY();
+       {
+               do_query(slots[backend_id]->con, query, res, PROTO_MAJOR_V3);
+       }
+       PG_CATCH();
+       {
+               /* ignore the error message */
+               res = NULL;
+               MemoryContextSwitchTo(oldContext);
+               FlushErrorState();
+               ereport(LOG,
+                               (errmsg("get_query_result: do_query failed")));
+       }
+       PG_END_TRY();
 
        if (!res)
        {
-               ereport(ERROR,
-                               (errmsg("Failed to check replication time lag"),
-                                errdetail("Query to node (%d) returned no result for node", backend_id)));
+               ereport(LOG,
+                               (errmsg("get_query_result: no result returned"),
+                                errdetail("node id (%d)", backend_id)));
                return sts;
        }
 
        if ((*res)->numrows <= 0)
        {
                free_select_result(*res);
-               ereport(ERROR,
-                               (errmsg("Failed to check replication time lag"),
-                                errdetail("Query to node (%d) returned result with no rows", backend_id)));
+               ereport(LOG,
+                               (errmsg("get_query_result: no rows returned"),
+                                errdetail("node id (%d)", backend_id)));
                return sts;
        }
 
        if ((*res)->data[0] == NULL)
        {
                free_select_result(*res);
-               ereport(ERROR,
-                               (errmsg("Failed to check replication time lag"),
-                                errdetail("Query to node (%d) returned no data", backend_id)));
+               ereport(LOG,
+                               (errmsg("get_query_result: no rows returned"),
+                                errdetail("node id (%d)", backend_id)));
                return sts;
        }
 
        if ((*res)->nullflags[0] == -1)
        {
                free_select_result(*res);
-               ereport(ERROR,
-                               (errmsg("Failed to check replication time lag"),
-                                errdetail("Query to node (%d) returned NULL data", backend_id)));
+               ereport(LOG,
+                               (errmsg("get_query_result: NULL data returned"),
+                                errdetail("node id (%d)", backend_id)));
                return sts;
        }