Retry bind on watchdog receive socket.
authorTatsuo Ishii <ishii@postgresql.org>
Tue, 3 Jun 2025 03:40:42 +0000 (12:40 +0900)
committerTatsuo Ishii <ishii@postgresql.org>
Tue, 3 Jun 2025 03:40:42 +0000 (12:40 +0900)
Occasionally 028.watchdog_enable_consensus_with_half_votes times out
due to failure on binding watchdog receive socket.  This commit tries
to mitigate the issue by retrying bind.  Currently the retry is
performed up to 5 times and each retry is with 1 second sleep.

src/watchdog/watchdog.c

index 6937934445663f859fe46be8b1582e8784e45e44..065a69c333a683fab5e4a6b157b805c9a568d473 100644 (file)
@@ -905,6 +905,9 @@ wd_create_recv_socket(int port)
 
        for (walk = res; walk != NULL; walk = walk->ai_next)
        {
+               bool    bind_is_done;
+               int             bind_tries;
+
                if ((sock = socket(walk->ai_family, walk->ai_socktype, walk->ai_protocol)) < 0)
                {
                        /* socket create failed */
@@ -954,9 +957,24 @@ wd_create_recv_socket(int port)
                                continue;
                        }
                }
-               if (bind(sock, walk->ai_addr, walk->ai_addrlen) < 0)
+
+               bind_is_done = false;
+               for (bind_tries = 0; !bind_is_done && bind_tries < 5; bind_tries++)
+               {
+                       if (bind(sock, walk->ai_addr, walk->ai_addrlen) < 0)
+                       {
+                               /* bind failed */
+                               ereport(LOG,
+                                               (errmsg("failed to create watchdog receive socket. retrying..."),
+                                                errdetail("bind on \"TCP:%d\" failed with reason: \"%m\"", port)));
+                               sleep(1);
+                       }
+                       else
+                               bind_is_done = true;
+               }
+               /* bind failed finally */
+               if (!bind_is_done)
                {
-                       /* bind failed */
                        ereport(LOG,
                                        (errmsg("failed to create watchdog receive socket"),
                                         errdetail("bind on \"TCP:%d\" failed with reason: \"%m\"", port)));