Minor fixes or better crashed-db recovery.

author Greg Sabino Mullane <greg@endpoint.com>

Sat, 22 Nov 2014 19:13:36 +0000 (14:13 -0500)

committer Greg Sabino Mullane <greg@endpoint.com>

Sat, 22 Nov 2014 19:13:36 +0000 (14:13 -0500)
author Greg Sabino Mullane <greg@endpoint.com>
Sat, 22 Nov 2014 19:13:36 +0000 (14:13 -0500)
committer Greg Sabino Mullane <greg@endpoint.com>
Sat, 22 Nov 2014 19:13:36 +0000 (14:13 -0500)
diff --git a/Bucardo.pm b/Bucardo.pm

index 626ecf30b00e5c83efdfa62f6db3c12b537a2832..d7f2080c678653da7c7efc9ad3dcda4d431023c8 100644 (file)
--- a/Bucardo.pm
+++ b/Bucardo.pm
@@ -658,6 +658,7 @@ sub start_mcp {
                  }
                  elsif (! $self->{sdb}{$db}{dbh}->ping()) {
                      $self->glog("Database $db failed ping check", LOG_NORMAL);
+                    $msg = 'Ping failed';
                      $bad++;
                  }
              }
@@ -668,7 +669,8 @@ sub start_mcp {
                      ## If we already made a MCP label, go there
                      ## Else fallthrough and assume our bucardo.sync changes stick!
                      if ($self->{mcp_loop_started}) {
-                        die 'We are going to redo the MCP loop';
+                        $self->glog('Going to restart the MCP loop, as syncs have changed', LOG_VERBOSE);
+                        die 'We are going to redo the MCP loop'; ## goes to end of mcp main eval
                      }
                  }
              }
@@ -924,18 +926,29 @@ sub mcp_main {
  
                  next if $d->{dbtype} =~ /flat|mongo|redis/o;
  
-                next if $d->{status} eq 'stalled';
-
-                if (! $d->{dbh}->ping) {
-                    ## Database is not reachable, so we'll try and reconnect
+                my $try_reconnect = 0;
+                if ($d->{status} eq 'stalled') {
+                    $self->glog("Trying to connect to stalled database $dbname", LOG_VERBOSE);
+                    $try_reconnect = 1;
+                }
+                elsif (! $d->{dbh}->ping) {
                      $self->glog("Ping failed for database $dbname, trying to reconnect", LOG_NORMAL);
+                }
+
+                if ($try_reconnect) {
  
                      ## Sleep a hair so we don't reloop constantly
                      sleep 0.5;
-
-                    ($d->{backend}, $d->{dbh}) = $self->connect_database($dbname);
+                    undef $d->{backend};
+                    {
+                        local $SIG{__DIE__} = 'IGNORE';
+                        eval {
+                            ($d->{backend}, $d->{dbh}) = $self->connect_database($dbname);
+                        };
+                    }
                      if (defined $d->{backend}) {
                          $self->show_db_version_and_time($d->{dbh}, $d->{backend}, qq{Database "$dbname" });
+                        $d->{status} = 'active'; ## In case it was stalled
                      }
                      else {
                          $self->glog("Unable to reconnect to database $dbname!", LOG_WARN);
@@ -1104,6 +1117,7 @@ sub mcp_main {
                  $self->log_config();
  
                  ## We need to reload ourself as well
+                ## XXX Not needed for some items! e.g. mcp_pingtime
                  $self->reload_mcp();
  
                  ## Let anyone listening know we are done
@@ -1446,6 +1460,7 @@ sub mcp_main {
  
          ## We may want to redo if the error was not *that* fatal
          if ($@ =~ /redo/) {
+            $self->glog('Going to restart the main MCP loop', LOG_VERBOSE);
              redo MCP;
          }
author	Greg Sabino Mullane <greg@endpoint.com>
	Sat, 22 Nov 2014 19:13:36 +0000 (14:13 -0500)
committer	Greg Sabino Mullane <greg@endpoint.com>
	Sat, 22 Nov 2014 19:13:36 +0000 (14:13 -0500)