Adding watchdog example to SGML doc
authorMuhammad Usama <m.usama@gmail.com>
Fri, 14 Oct 2016 13:14:20 +0000 (18:14 +0500)
committerMuhammad Usama <m.usama@gmail.com>
Fri, 14 Oct 2016 13:14:20 +0000 (18:14 +0500)
doc/src/sgml/examples.sgml
doc/src/sgml/watchdog.sgml

index 7585b78362969c709f8b621b67859684ffb507dc..bc2d84290bb9fc9d1dfd71404a9419bcca55639f 100644 (file)
@@ -363,13 +363,342 @@ $ for port in 5432 5433 5434; do
 
   </sect1>
 
-    <sect1 id="example-watchdog">
-      <title>Watchdog Configuration Example</title>
+  <sect1 id="example-watchdog">
+    <title>Watchdog Configuration Example</title>
 
+    <para>
+      This tutrial explains the simple way to try "Watchdog".
+      What you need is 2 Linux boxes on which <productname>
+      Pgpool-II</productname> is installed and a PostgreSQL
+      on the same machine or in the other one. it is enough
+      that 1 node for backend exists.
+      You can use on memory query cache with <productname>
+      Pgpool-II</productname> in any mode: replication mode,
+      master/slave mode and raw mode.
+    </para>
+    <para>
+      This example uses use "osspc16" as an Active node and
+      "osspc20" as a Standby node. "Someserver" means one of them.
+    </para>
+
+         <sect2 id="example-watchdog-configuration">
+                 <title>Common configurations</title>
       <para>
-       Watchdog configuration.
+        Set the following parameters in both of active and standby nodes.
       </para>
-    </sect1>
+
+      <sect3 id="example-watchdog-config-enable">
+        <title>Enabling watchdog</title>
+        <para>
+          First of all, set <xref linkend="guc-use-watchdog"> to on.
+          <programlisting>
+use_watchdog = on
+                                    # Activates watchdog
+          </programlisting>
+        </para>
+      </sect3>
+
+      <sect3 id="example-watchdog-config-upstream">
+        <title>Configure Up stream servers</title>
+        <para>
+          Specify the up stream servers (e.g. application servers).
+          Leaving it blank is also fine.
+          <programlisting>
+trusted_servers = ''
+                                    # trusted server list which are used
+                                    # to confirm network connection
+                                    # (hostA,hostB,hostC,...)
+          </programlisting>
+        </para>
+      </sect3>
+
+      <sect3 id="example-watchdog-config-wd-comm">
+        <title>Watchdog Communication</title>
+        <para>
+          Specify the TCP port number for watchdog communication.
+          <programlisting>
+wd_port = 9000
+                                    # port number for watchdog service
+          </programlisting>
+        </para>
+      </sect3>
+
+      <sect3 id="example-watchdog-config-wd-vip">
+        <title>Virtual IP</title>
+        <para>
+          Specify the IP address to be used as a virtual IP address
+          in the <xref linkend="guc-delegate-IP">.
+          <programlisting>
+delegate_IP = '133.137.177.143'
+                                    # delegate IP address
+          </programlisting>
+        </para>
+        <note>
+          <para>
+            Make sure the IP address configured as a Virtual IP should be
+            free and is not used by any other machine.
+          </para>
+        </note>
+      </sect3>
+    </sect2>
+
+         <sect2 id="example-watchdog-configuration-each-server">
+                 <title>Individual Server Configurations</title>
+      <para>
+        Next, set the following parameters for each <productname>
+        Pgpool-II</productname>.
+        Specify <xref linkend="guc-other-pgpool-hostname">,
+        <xref linkend="guc-other-pgpool-port"> and
+        <xref linkend="guc-other-wd-port"> with the values of
+        other <productname>Pgpool-II</productname> server values.
+      </para>
+
+      <sect3 id="example-watchdog-configuration-active-server">
+        <title>Active (osspc16) Server configurations</title>
+        <para>
+          <programlisting>
+other_pgpool_hostname0 = 'osspc20'
+                                    # Host name or IP address to connect to for other pgpool 0
+other_pgpool_port0 = 9999
+                                    # Port number for othet pgpool 0
+other_wd_port0 = 9000
+                                    # Port number for othet watchdog 0
+          </programlisting>
+        </para>
+      </sect3>
+
+      <sect3 id="example-watchdog-configuration-standby-server">
+        <title>Standby (osspc20) Server configurations</title>
+        <para>
+          <programlisting>
+other_pgpool_hostname0 = 'osspc16'
+                                    # Host name or IP address to connect to for other pgpool 0
+other_pgpool_port0 = 9999
+                                    # Port number for othet pgpool 0
+other_wd_port0 = 9000
+                                    # Port number for othet watchdog 0
+          </programlisting>
+        </para>
+      </sect3>
+    </sect2>
+
+         <sect2 id="example-watchdog-start-server">
+                 <title>Starting <productname>Pgpool-II</productname></title>
+      <para>
+        Start <productname>Pgpool-II</productname> on each servers from
+        <literal>root</literal> user with <literal>"-n"</literal> switch
+        and redirect log messages into pgpool.log file.
+      </para>
+
+      <sect3 id="example-watchdog-start-active-server">
+        <title>Starting pgpool in Active server (osspc16)</title>
+        <para>
+          First start the <productname>Pgpool-II</productname> on Active server.
+          <programlisting>
+[user@osspc16]$ su -
+[root@osspc16]# {installed_dir}/bin/pgpool -n -f {installed_dir}/etc/pgpool.conf > pgpool.log 2>&1
+          </programlisting>
+          Log messages will show that <productname>Pgpool-II</productname>
+          has the virtual IP address and starts watchdog process.
+          <programlisting>
+LOG:  I am announcing my self as master/coordinator watchdog node
+LOG:  I am the cluster leader node
+DETAIL:  our declare coordinator message is accepted by all nodes
+LOG:  I am the cluster leader node. Starting escalation process
+LOG:  escalation process started with PID:59449
+<emphasis>LOG:  watchdog process is initialized
+LOG:  watchdog: escalation started
+LOG:  I am the master watchdog node</emphasis>
+DETAIL:  using the local backend node status
+          </programlisting>
+        </para>
+      </sect3>
+
+      <sect3 id="example-watchdog-start-standby-server">
+        <title>Starting pgpool in Standby server (osspc20)</title>
+        <para>
+          Now start the <productname>Pgpool-II</productname> on Standby server.
+          <programlisting>
+[user@osspc20]$ su -
+[root@osspc20]# {installed_dir}/bin/pgpool -n -f {installed_dir}/etc/pgpool.conf > pgpool.log 2>&1
+          </programlisting>
+          Log messages will show that <productname>Pgpool-II</productname>
+          has joind the watchdog cluster as standby watchdog.
+          <programlisting>
+LOG:  watchdog cluster configured with 1 remote nodes
+LOG:  watchdog remote node:0 on Linux_osspc16_9000:9000
+LOG:  interface monitoring is disabled in watchdog
+LOG:  IPC socket path: "/tmp/.s.PGPOOLWD_CMD.9000"
+LOG:  watchdog node state changed from [DEAD] to [LOADING]
+LOG:  new outbond connection to Linux_osspc16_9000:9000
+LOG:  watchdog node state changed from [LOADING] to [INITIALIZING]
+LOG:  watchdog node state changed from [INITIALIZING] to [STANDBY]
+<emphasis>
+LOG:  successfully joined the watchdog cluster as standby node
+DETAIL:  our join coordinator request is accepted by cluster leader node "Linux_osspc16_9000"
+LOG:  watchdog process is initialized
+</emphasis>
+          </programlisting>
+        </para>
+      </sect3>
+    </sect2>
+
+    <sect2 id="example-watchdog-try">
+      <title>Try it out</title>
+      <para>
+      Confirm to ping to the virtual IP address.
+      <programlisting>
+[user@someserver]$ ping 133.137.177.142
+PING 133.137.177.143 (133.137.177.143) 56(84) bytes of data.
+64 bytes from 133.137.177.143: icmp_seq=1 ttl=64 time=0.328 ms
+64 bytes from 133.137.177.143: icmp_seq=2 ttl=64 time=0.264 ms
+64 bytes from 133.137.177.143: icmp_seq=3 ttl=64 time=0.412 ms
+      </programlisting>
+      Confirm if the Active server which started at first has the virtual IP address.
+      <programlisting>
+[root@osspc16]# ifconfig
+eth0      ...
+
+eth0:0    inet addr:133.137.177.143 ...
+
+lo        ...
+      </programlisting>
+      Confirm if the Standby server which started not at first doesn't have the virtual IP address.
+      <programlisting>
+[root@osspc20]# ifconfig
+eth0      ...
+
+lo        ...
+      </programlisting>
+
+      Try to connect PostgreSQL by "psql -h delegate_IP -p port".
+      <programlisting>
+[user@someserver]$ psql -h 133.137.177.142 -p 9999 -l
+      </programlisting>
+      </para>
+    </sect2>
+
+    <sect2 id="example-watchdog-vip-switch">
+      <title>Switching virtual IP</title>
+      <para>
+        Confirm how the Standby server works when the Active server can't provide its service.
+        Stop <productname>Pgpool-II</productname> on the Active server.
+        <programlisting>
+[root@osspc16]# {installed_dir}/bin/pgpool stop
+      </programlisting>
+
+      Then, the Standby server starts to use the virtual IP address. Log shows:
+
+        <programlisting>
+<emphasis>
+LOG:  remote node "Linux_osspc16_9000" is shutting down
+LOG:  watchdog cluster has lost the coordinator node
+</emphasis>
+LOG:  watchdog node state changed from [STANDBY] to [JOINING]
+LOG:  watchdog node state changed from [JOINING] to [INITIALIZING]
+LOG:  I am the only alive node in the watchdog cluster
+HINT:  skiping stand for coordinator state
+LOG:  watchdog node state changed from [INITIALIZING] to [MASTER]
+LOG:  I am announcing my self as master/coordinator watchdog node
+LOG:  I am the cluster leader node
+DETAIL:  our declare coordinator message is accepted by all nodes
+<emphasis>
+LOG:  I am the cluster leader node. Starting escalation process
+LOG:  watchdog: escalation started
+</emphasis>
+LOG:  watchdog escalation process with pid: 59551 exit with SUCCESS.
+         </programlisting>
+
+         Confirm to ping to the virtual IP address.
+         <programlisting>
+[user@someserver]$ ping 133.137.177.142
+PING 133.137.177.143 (133.137.177.143) 56(84) bytes of data.
+64 bytes from 133.137.177.143: icmp_seq=1 ttl=64 time=0.328 ms
+64 bytes from 133.137.177.143: icmp_seq=2 ttl=64 time=0.264 ms
+64 bytes from 133.137.177.143: icmp_seq=3 ttl=64 time=0.412 ms
+      </programlisting>
+
+         Confirm that the Active server doesn't use the virtual IP address any more.
+         <programlisting>
+[root@osspc16]# ifconfig
+eth0      ...
+
+lo        ...
+        </programlisting>
+
+         Confirm that the Standby server uses the virtual IP address.
+         <programlisting>
+[root@osspc20]# ifconfig
+eth0      ...
+
+eth0:0    inet addr:133.137.177.143 ...
+
+lo        ...
+        </programlisting>
+
+        Try to connect PostgreSQL by "psql -h delegate_IP -p port".
+        <programlisting>
+[user@someserver]$ psql -h 133.137.177.142 -p 9999 -l
+        </programlisting>
+
+      </para>
+    </sect2>
+
+    <sect2 id="example-watchdog-more">
+      <title>More</title>
+
+      <sect3 id="example-watchdog-more-lifecheck">
+        <title>Lifecheck</title>
+        <para>
+          There are the parameters about watchdog's monitoring.
+          Specify the interval to check <xref linkend="guc-wd-interval">,
+          the count to retry <xref linkend="guc-wd-life-point">,
+          the qyery to check <xref linkend="guc-wd-lifecheck-query"> and
+          finaly the type of lifecheck <xref linkend="guc-wd-lifecheck-method">.
+          <programlisting>
+wd_lifecheck_method = 'query'
+                                    # Method of watchdog lifecheck ('heartbeat' or 'query' or 'external')
+                                    # (change requires restart)
+wd_interval = 10
+                                    # lifecheck interval (sec) > 0
+wd_life_point = 3
+                                    # lifecheck retry times
+wd_lifecheck_query = 'SELECT 1'
+                                    # lifecheck query to pgpool from watchdog
+        </programlisting>
+
+        </para>
+      </sect3>
+
+      <sect3 id="example-watchdog-more-vip-switching">
+        <title>Switching virtual IP address</title>
+        <para>
+          There are the parameters for switching the virtual IP address.
+          Specify switching commands <xref linkend="guc-if-up-cmd">,
+          <xref linkend="guc-if-down-cmd">, the path to them
+          <xref linkend="guc-if-cmd-path">, the command executed after
+          switching to send ARP request <xref linkend="guc-arping-cmd">
+          and the path to it <xref linkend="guc-arping-path">.
+          <programlisting>
+ifconfig_path = '/sbin'
+                                    # ifconfig command path
+if_up_cmd = 'ifconfig eth0:0 inet $_IP_$ netmask 255.255.255.0'
+                                    # startup delegate IP command
+if_down_cmd = 'ifconfig eth0:0 down'
+                                    # shutdown delegate IP command
+
+arping_path = '/usr/sbin'           # arping command path
+
+arping_cmd = 'arping -U $_IP_$ -w 1'
+        </programlisting>
+        You can also use the custom scripts to bring up and bring down the
+        virtual IP using <xref linkend="guc-wd-escalation-command"> and
+        <xref linkend="guc-wd-de-escalation-command"> configurations.
+        </para>
+      </sect3>
+
+    </sect2>
+  </sect1>
 
     <sect1 id="example-AWS">
       <title>AWS Configuration Example</title>
index 2695a521e509ef0e95071587b041189021e1700c..44fdf7e3a27b404481c45b05f70d1041ad8fb74e 100644 (file)
     </listitem>
     </varlistentry>
 
-  <varlistentry id="guc-other-wd-port0" xreflabel="other_wd_port0">
+  <varlistentry id="guc-other-wd-port" xreflabel="other_wd_port">
    <term><varname>other_wd_port0</varname> (<type>integer</type>)
     <indexterm>
       <primary><varname>other_wd_port0</varname> configuration parameter</primary>