From: Greg Sabino Mullane Date: Fri, 10 Jul 2009 15:58:58 +0000 (-0400) Subject: Allow the check_backends check to detect 'too many clients' message and return a... X-Git-Tag: 2.9.2~23 X-Git-Url: http://waps.l3s.uni-hannover.de/gitweb/?a=commitdiff_plain;h=85e4d6872f9f0d43b6a512737cfc424705b2fe2f;p=check_postgres.git Allow the check_backends check to detect 'too many clients' message and return a critical. Thanks to Jürgen Schulz-Brüssel for the idea. Add 'fatalregex' to run_command to allow known error messages through. Add 'quickreturn' for piling up database handles. --- diff --git a/check_postgres.pl b/check_postgres.pl index d96f88558..00a69ca77 100755 --- a/check_postgres.pl +++ b/check_postgres.pl @@ -84,6 +84,7 @@ our @get_methods = ( ## no critic (RequireInterpolationOfMetachars) our %msg = ( 'en' => { + 'backends-fatal' => q{Could not connect: too many connections}, 'backends-mrtg' => q{DB=$1 Max connections=$2}, 'backends-msg' => q{$1 of $2 connections ($3%)}, 'backends-nomax' => q{Could not determine max_connections}, @@ -277,14 +278,15 @@ our %msg = ( 'version-ok' => q{version $1}, }, 'fr' => { +'backends-fatal' => q{Could not connect: too many connections}, 'backends-mrtg' => q{DB=$1 Connexions maximum=$2}, 'backends-msg' => q{$1 connexions sur $2 ($3%)}, 'backends-nomax' => q{N'a pas pu déterminer max_connections}, 'backends-oknone' => q{Aucune connexion}, 'backends-users' => q{$1 pour le nombre d'utilisateurs doit être un nombre ou un pourcentage}, -'bloat-index' => q{(db $1) index $2 lignes:$3 pages:$4 devrait être:$5 ($6X) octets perdus:$7 ($8)}, + 'bloat-index' => q{(db $1) index $2 lignes:$3 pages:$4 devrait être:$5 ($6X) octets perdus:$7 ($8)}, 'bloat-nomin' => q{aucune relation n'atteint le critère minimum de fragmentation}, -'bloat-table' => q{(db $1) table $2.$3 lignes:$4 pages:$5 devrait être:$6 ($7X) place perdue:$8 ($9)}, + 'bloat-table' => q{(db $1) table $2.$3 lignes:$4 pages:$5 devrait être:$6 ($7X) place perdue:$8 ($9)}, 'checkpoint-baddir' => q{data_directory invalide : "$1"}, 'checkpoint-baddir2' => q{pg_controldata could not read the given data directory: "$1"}, 'checkpoint-badver' => q{Failed to run pg_controldata - probably the wrong version}, @@ -1396,6 +1398,7 @@ sub run_command { ## Run a command string against each of our databases using psql ## Optional args in a hashref: ## "failok" - don't report if we failed + ## "fatalregex" - allow this FATAL regex through ## "target" - use this targetlist instead of generating one ## "timeout" - change the timeout from the default of $opt{timeout} ## "regex" - the query must match this or we throw an error @@ -1627,7 +1630,7 @@ sub run_command { if ($err =~ /Timed out/) { ndie msg('runcommand-timeout', $timeout); } - else {ndie $res; + else { ndie msg('runcommand-err'); } } @@ -1647,7 +1650,13 @@ sub run_command { } if ($db->{error} =~ /FATAL/) { - ndie "$db->{error}"; + if ($db->{error} =~ /$arg->{fatalregex}/) { + $info->{fatalregex} = $db->{error}; + next; + } + else { + ndie "$db->{error}"; + } } elsif ($db->{error} =~ /statement timeout/) { @@ -2237,7 +2246,13 @@ sub check_backends { my $GROUPBY = q{GROUP BY 2,3}; $SQL = "SELECT COUNT(datid), ($MAXSQL), d.datname FROM pg_database d ". "LEFT JOIN pg_stat_activity s ON (s.datid = d.oid) $NOIDLE $GROUPBY ORDER BY datname"; - my $info = run_command($SQL, {regex => qr[\s*\d+ \| \d+\s+\|] } ); + my $info = run_command($SQL, {regex => qr[\s*\d+ \| \d+\s+\|], fatalregex => 'too many clients' } ); + + ## If we cannot connect because of too many clients, we treat as a critical error + if (exists $info->{fatalregex} and $info->{fatalregex} =~ /too many clients/) { + add_critical msg('backends-fatal'); + return; + } ## There may be no entries returned if we catch pg_stat_activity at the right ## moment in older versions of Postgres diff --git a/t/02_backends.t b/t/02_backends.t index adec2993e..2f1f46dbe 100644 --- a/t/02_backends.t +++ b/t/02_backends.t @@ -6,7 +6,7 @@ use 5.006; use strict; use warnings; use Data::Dumper; -use Test::More tests => 52; +use Test::More tests => 53; use lib 't','.'; use CP_Testing; @@ -34,7 +34,6 @@ $host = $cp->get_host(); $result = $cp->run(); - $t=qq{$S returned expected text and OK value}; like ($result, qr{^$label OK:}, $t); @@ -178,6 +177,14 @@ SKIP: { like ($cp->run('--include=postgres'), qr{ \| time=(\d\.\d\d) ardala=0 beedeebeedee=0 postgres=3}, $t); } +my %dbh; +for my $num (1..8) { + $dbh{$num} = $cp->test_database_handle({quickreturn=>1}); +} + +$t=qq{$S returns critical when too many clients to even connect}; +like ($cp->run('-w -10'), qr{^$label CRITICAL: .+too many connections}, $t); + $cp->drop_schema_if_exists(); exit; diff --git a/t/CP_Testing.pm b/t/CP_Testing.pm index 3f3faa2f9..e8a266bc4 100644 --- a/t/CP_Testing.pm +++ b/t/CP_Testing.pm @@ -72,7 +72,7 @@ sub test_database_handle { ref $arg eq 'HASH' or die qq{Must pass a hashref (or nothing) to test_database_handle\n}; ## Create the test database directory if it does not exist - my $dbdir = $self->{dbdir}; + my $dbdir = $arg->{dbdir} || $self->{dbdir}; if (! -d $dbdir) { -e $dbdir and die qq{Oops: I cannot create "$dbdir", there is already a file there!\n}; @@ -287,6 +287,8 @@ sub test_database_handle { } $dbh->ping() or die qq{Failed to ping!\n}; + return $dbh if $arg->{quickreturn}; + $dbh->{AutoCommit} = 1; $dbh->{RaiseError} = 0; if ($maj > 8 or ($maj==8 and $min >= 1)) {