From f2f1744ff5dbe65f1f72f14319a44e454bcf02b7 Mon Sep 17 00:00:00 2001 From: Magnus Hagander Date: Wed, 16 Mar 2005 14:49:53 +0000 Subject: [PATCH] Add automatic mirror manager that builds a DNS zone with active web mirrors. git-svn-id: https://pgweb.postgresql.org/svn/trunk@810 8f5c7a92-453e-0410-a47f-ad33c8a6b003 --- README | 71 +++++++++ autodnscheck.pl | 88 +++++++++++ automirror.php | 384 ++++++++++++++++++++++++++++++++++++++++++++++++ automirror.sh | 21 +++ automirror.sql | 47 ++++++ 5 files changed, 611 insertions(+) create mode 100644 README create mode 100644 autodnscheck.pl create mode 100644 automirror.php create mode 100644 automirror.sh create mode 100644 automirror.sql diff --git a/README b/README new file mode 100644 index 0000000..dab8427 --- /dev/null +++ b/README @@ -0,0 +1,71 @@ +PostgreSQL Automatic Mirror Management +-------------------------------------- + + +Concepts +-------- +The automatic mirror systems works like this, in a few short points: + +* All mirrors are managed in the zone "mirrors.postgresql.org". + The actual records (e.g. www.postgresql.org) are just CNAMEs pointing + to this zone. + +* This zone has a TTL of 10-15 minutes max, to make sure we get a + fairly fast failover while still maintaining cache effects + +* This zone is served by several nameservers located on different providers + (just as the main zone) + +* On a machine that *must not* be one of the web- or DNS servers, a script + runs every 5 minutes (at least) to verify the currentness of the different + mirrors. + +* This script connects to the master server and fetches /sync_timestamp. If + it cannot do this (master is down) it will send an error message and skip + any further processing. This means that if the master goes down, we no longer + update the other servers. In the future, we might want to keep scanning + for servers that are down, but skip checking the actual timestamp. + +* After this, it connects in sequence to each listed DNS server, and fetches + /sync_timestamp. If this fails, or if sync_timestamp is too far off from + the master servers, the server will be disabled (in the database). Likewise, + if the server was down and is now current, it will be re-enabled. + +* A very simple flap detection algorithm is run after all servers have been + scanned. If a server has changed state too many times in a specified interval, + the server is listed as flapping. In this case, it will not be included in the + zone and it will *NOT* be automatically re-enabled. Flapping servers have to + be manually enabled. + +* If any data has changed due to this, the zone is reloaded in the nameserver, + and propagates to all the clients based on DNS NOTIFY messages. + +* If the zone has not been updated in 24 hours, the zone will be reloaded anyway. + This is done to make it possible to monitor the DNS servers themselves for + update issues. + +* The zone serial number is the unix timestamp (seconds since epoch) at the time + the zone is built. + +* There is a perlscript to check that the DNS zones are updated. This shuold not + run on the same machine as the checker script. This script is not currently + made for running automated, but that should be easy to fix. + + +Requirements +------------ +* A postgresql database. All tables are stored in the schema "automirror". The + tables are very small and simple. + +* PHP is used to run the queries. Naturally, it needs to be compiled with + postgresql support. + +* The zones are generated in BIND format. It's been tested with BIND 9.3. + +* Perl is used by the DNS checking script. It uses the Net::DNS CPAN module. + +* Sendmail (or a replacement) must be working on the box for the script to + send it's reports out. + +* Scripts are configged by variables as the top. No big surprise there. + diff --git a/autodnscheck.pl b/autodnscheck.pl new file mode 100644 index 0000000..bd95473 --- /dev/null +++ b/autodnscheck.pl @@ -0,0 +1,88 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use Net::DNS; + +my $DEBUG=0; +my $DOMAIN="mirrors.postgresql.org"; +my $MASTER="62.65.68.81"; + +my $errors = ''; + + +# Create two resolvers. One to resolve general names (using the machiens +# default resolver) and one that queries the master. +my $res_generic = Net::DNS::Resolver->new; +my $res_master = Net::DNS::Resolver->new(nameservers => [$MASTER], recurse => 0); + + +# Load the SOA record with the serial number from the master server +my $qq = $res_master->query($DOMAIN,"SOA"); +die "Could not get SOA record from primary!\n" unless ($qq); +my $masterserial = ($qq->answer)[0]->serial; +die "Could not get serial number from primary!\n" unless ($masterserial); + +$DEBUG && print "Master serial is: $masterserial\n"; + + +# Load the list of available nameservers from the master server +my $q = $res_master->query($DOMAIN,"NS"); +die "No nameservers found!" if (!$q); + +my $servercount = $q->answer; +if ($servercount < 4) { + $errors .= "There are only $servercount DNS servers listed!\n"; +} + + +# Check the serial on each server against the ones on the master +foreach my $rr ($q->answer) { + my $nsip=''; + $DEBUG && print "Scanning " . $rr->nsdname . "\n"; + + my $ns = $res_generic->query($rr->nsdname,'A'); + if (!$ns) { + $errors .= "Could not find nameserver " . $rr->nsdname . "\n"; + next; + } + + foreach my $rrr ($ns->answer) { + $nsip = $rrr->address if ($rrr->type eq "A"); + } + if ($nsip eq "") { + $errors .= "Nameserver " . $rr->nsdname . " has no A record!\n"; + next; + } + + my $res2 = Net::DNS::Resolver->new(nameservers => [$nsip], recurse => 0); + + $qq = $res2->query($DOMAIN,"SOA"); + if (!$qq) { + $errors .= "Failed to query nameserver " . $rr->nsdname . " for SOA record!\n"; + next; + } + + my $serial = ($qq->answer)[0]->serial; + + if (!$serial) { + $errors .= "Failed to get serial from nameserver " . $rr->nsdname . "\n"; + next; + } + $DEBUG && print "Serial for " . $rr->nsdname . " is $serial\n"; + + if ($serial != $masterserial) { + $errors .= "Serial for " . $rr->nsdname . " ($serial) differs from master ($masterserial)\n"; + next; + } +} + +if ($errors ne "") { + print "** Errors occured **\n"; + print $errors . "\n"; + print "********************\n"; + exit(1); +} +else { + print "DNS check completed, all $servercount servers in sync.\n"; +} diff --git a/automirror.php b/automirror.php new file mode 100644 index 0000000..f28bcf8 --- /dev/null +++ b/automirror.php @@ -0,0 +1,384 @@ +Status('Connecting to database...'); + $mirrors = $db->Query("SELECT id,ip,insync,description FROM mirrors WHERE enabled=1 AND flapping=0", TRUE); + + $log->Status('Loading from wwwmaster...'); + $wwwmaster =& new MirrorLoader($log,$MASTERIP,'wwwmaster.postgresql.org'); + if (!$wwwmaster->FetchLastUpdate()) { + $log->Log('Failed to load sync date from wwwmaster!'); + $log->Flush(); + exit(0); // Exitcode 0 will cause double error msgs + } + $log->Status('wwwmaster has sync date: ' . $wwwmaster->LastUpdatedStr()); + + while ($row = pg_fetch_row($mirrors)) { + $log->Status('Scanning mirror ' . $row[1]); + + $current =& new MirrorLoader($log,$row[1],'www.postgresql.org'); + if (!$current->FetchLastUpdate()) { + $log->Log('Mirror ' . $row[1] . ' (' . $row[3] . ') returns no timestamp!'); + if ($row[2] == 1) { + $db->DisableMirror($row[0],'No timestamp'); + $log->Log('Mirror ' . $row[1] . ' now disabled'); + } + continue; + } + + $diff = $wwwmaster->_lastupdate - $current->_lastupdate; + if ($diff < 0) { + $log->Log('Mirror ' . $row[1] . ' (' . $row[3] . ') claims to be newer than wwwmaster!'); + $log->Log('Mirror has ' . $current->LastUpdatedStr() . ', wwwmaster has ' . $wwwmaster->LastUpdatedStr()); + if ($row[2] == 1) { + $db->DisableMirror($row[0],'Newer than master'); + $log->Log('Mirror ' . $row[1] . ' now disabled'); + } + continue; + } + if ($diff > $MAX_TIME_DIFF) { + $log->Log('Mirror ' . $row[1] . ' (' . $row[3] . ') has not been updated.'); + $log->Log('Mirror has ' . $current->LastUpdatedStr() . ', wwwmaster has ' . $wwwmaster->LastUpdatedStr()); + if ($row[2] == 1) { + $db->DisableMirror($row[0],'Not updated'); + $log->Log('Mirror ' . $row[1] . ' now disabled'); + } + continue; + } + if ($row[2] == 0) { + $db->EnableMirror($row[0],'Recovered'); + $log->Log('Mirror ' . $row[1] . ' (' . $row[3] . ') recovered, now enabled.'); + } + } + + pg_free_result($mirrors); + + + // Look for flapping servers. + // We define flapping has having more than four state-changes in the past five hours + // Note! We *never* reset the flapping flag, that has to be done manually. + $log->Status('Looking for flapping servers'); + $flappers = $db->Query("SELECT id,ip,description FROM mirrors INNER JOIN mirror_state_change ON mirrors.id=mirror_state_change.mirror WHERE current_timestamp-dat<'5 hours' AND mirrors.enabled=1 AND mirrors.flapping=0 GROUP BY id,ip,description HAVING count(*) > 3",TRUE); + while ($row = pg_fetch_row($flappers)) { + $log->Log('Mirror ' . $row[1] . ' (' . $row[2] . ') is flapping, disabling.'); + $db->FlappingMirror($row[0]); + } + pg_free_result($flappers); + + // Make sure we don't spit out a completely empty zone file + $log->Status('Looking for empty mirror types'); + $emptytypes = $db->Query('SELECT type FROM mirrortypes WHERE NOT EXISTS (SELECT * FROM mirrors WHERE mirrors.type=mirrortypes.type AND mirrors.enabled=1 AND mirrors.insync=1 AND mirrors.flapping=0)', TRUE); + if (pg_num_rows($emptytypes) > 0) { + // YIKES! + $log->Log('WARNING! One or more mirror types would end up empty:'); + while ($row = pg_fetch_row($emptytypes)) { + $log->Log('Type: ' . $row[0]); + } + $log->Log('ROLLING BACK ALL CHANGES AND REVERTING TO PREVIOUS VERSION OF ZONE!'); + $db->Rollback(); + $log->Flush(); + exit(0); + } + + $db->Commit(); + $db->Begin(); + + if (!$db->_changed) { + // No changes made. But we still spit out one zone / day, so scripts + // monitoring this script will know we are alive + $lastdump = $db->Query("SELECT CASE WHEN current_timestamp-lastdump>'24 hours'::interval THEN 1 ELSE 0 END FROM zone_last_dump", TRUE); + if (!($row = pg_fetch_row($lastdump))) { + $log->Log('Could not determine last dump date - zero rows!'); + $log->Flush(); + exit(1); + } + if ($row[0] == 0) { + $log->Status('Not dumping zone - no changes'); + $log->Flush(FALSE); + exit(0); + } + $log->Log('Rebuilding zone because last update was more than 24 hours ago.'); + } + + + $zg =& new ZoneGenerator($log,$db,$ZONE_PATH); + $entries = $db->Query('SELECT type,ip FROM mirrors WHERE enabled=1 AND insync=1 AND flapping=0 ORDER BY type',TRUE); + while ($row = pg_fetch_row($entries)) { + $zg->AddServer($row[0],$row[1]); + } + pg_free_result($entries); + + $log->Log('Dumping new zonefile'); + $db->Query('UPDATE zone_last_dump SET lastdump=CURRENT_TIMESTAMP',TRUE); + if ($zg->DumpFile()) { + $db->Commit(); + } + + $log->Log('Completed.'); + $log->Flush(); + exit(0); + + + // + // Mirror loader + // + class MirrorLoader { + var $_log; + var $_ip=''; + var $_host; + var $_lastupdate = -1; + var $_port = 80; + + function MirrorLoader(&$log,$ip,$host) { + $this->_log =& $log; + $this->_host = $host; + $this->_ip = $ip; + } + + function FetchLastUpdate() { + $fp = @fsockopen($this->_ip, $this->_port); + if (!$fp) { + $this->_log->Log('Failed to connect to port ' . $this->_port . ' on ip ' . $this->_ip); + return FALSE; + } + + + $q = "GET /sync_timestamp HTTP/1.0\nHost: " . $this->_host . "\nUser-Agent: pgautomirror/0\n\n"; + if (!fwrite($fp, $q)) { + $this->_log->Log('Failed to write network data to ' . $this->_ip); + fclose($fp); + return FALSE; + } + + $buf = ''; + while ($tmp = fread($fp, 8192)) { + $buf .= $tmp; + } + fclose($fp); + + if ($buf == '') { + $this->_log->Log('No data returned from ' . $this->_ip); + return FALSE; + } + + if (!preg_match('@^HTTP/1.[0-9] 200@', $buf)) { + $r = strpos($buf,"\n"); + if (!$r) $r = strlen($buf); + $this->_log->Log($this->_ip . ' returned "' . substr($buf, 0,$r-1) . '" instead of 200'); + return FALSE; + } + + // Find content length + if (!preg_match('@Content-Length: ([0-9]+)@', $buf, $parts)) { + $this->_log->Log($this->_ip . ' did not return a valid Content-Length'); + return FALSE; + } + + $this->_lastupdate = strtotime(substr($buf, -$parts[1], 23)); + if ($this->_lastupdate == -1) { + $this->_log->Log($this->_ip . ' did not return a valid timestamp'); + return FALSE; + } + + return TRUE; + } + + function LastUpdatedStr() { + return date("Y-m-d H:i:s O",$this->_lastupdate); + } + } + + + // + // A very simple database wrapper + // + class Database { + var $_db = null; + var $_log = null; + var $_changed = FALSE; + + function Database(&$log,$connstr) { + $this->_log =& $log; + + $this->_db = @pg_connect($connstr); + if (!$this->_db) { + $this->_log->Log('Failed to connect to database: ' . $php_errormsg . '!'); + $this->_log->Flush(); + exit(1); + } + + if (!pg_query($this->_db, "SET search_path='automirror'")) { + $this->_log->Log('Failed to set search_path: ' . pg_last_error($this->_db)); + $this->_log->Flush(); + exit(1); + } + $this->Begin(); + } + + function Begin() { + if (!pg_query($this->_db, "BEGIN TRANSACTION")) { + $this->_log->Log('Failed to start transaction: ' . pg_last_error($this->_db)); + $this->_log->Flush(); + exit(1); + } + } + + function Commit() { + if (!pg_query($this->_db, "COMMIT TRANSACTION")) { + $this->_log->Log('Failed to commit transaction: ' . pg_last_error($this->_db)); + return false; + } + return true; + } + + function Rollback() { + if (!pg_query($this->_db, "ROLLBACK TRANSACTION")) { + $this->_log->Log('Failed to rollback transaction: ' . pg_last_error($this->_db)); + return false; + } + return true; + } + + function Query($query, $exitonfail) { + $r = pg_query($this->_db, $query); + if (!$r) { + $this->_log->Log('Query to database backend failed: ' . pg_last_error($this->_db)); + $this->_log->Log('Query was: "' . $query . '"'); + if ($exitonfail) { + $this->_log->Flush(); + exit(1); + } + return FALSE; + } + return $r; + } + + function DisableMirror($mirrid,$reason) { + $this->SetMirrorState($mirrid,0,$reason); + } + function EnableMirror($mirrid,$reason) { + $this->SetMirrorState($mirrid,1,$reason); + } + function SetMirrorState($mirrid,$state,$reason) { + $this->Query("INSERT INTO mirror_state_change(mirror,dat,newstate,comment) VALUES (" . $mirrid . ",CURRENT_TIMESTAMP," . $state . ",'" . pg_escape_string($reason) . "')",TRUE); + $this->Query("UPDATE mirrors SET insync=" . $state . " WHERE id=" . $mirrid,TRUE); + $this->_changed = TRUE; + } + function FlappingMirror($mirrid) { + $this->Query("UPDATE mirrors SET flapping=1 WHERE id=" . $mirrid,TRUE); + $this->_changed = TRUE; + } + } + + + // + // Handles generation of the actual zones + // + class ZoneGenerator { + var $_log; + var $_entries; + var $_db; + var $_path; + + function ZoneGenerator(&$log, &$db, $path) { + $this->_log =& $log; + $this->_db =& $db; + $this->_entries = Array(); + $this->_path = $path; + } + + function AddServer($type, $ip) { + $a = $this->_entries[$type]; + if (empty($a)) { + $a = Array(); + $this->_entries[$type] = $a; + } + $this->_entries[$type][] = $ip; + } + + function DumpFile() { + $serial = time(); + $nameservers = $this->_db->Query("SELECT host FROM nameservers", TRUE); + $contents = ' +$TTL 15M +@ IN SOA ns.hub.org. root.hub.org. ( + ' . $serial . ' ; serial + 15M ; refresh + 5M ; retry + 1W ; expire + 15M ; Minimum TTL +) +'; + while ($row = pg_fetch_row($nameservers)) { + $contents .= '@ IN NS ' . $row[0] . ".\n"; + } + $contents .= "\n\n"; + + foreach ($this->_entries as $type=>$entries) { + foreach ($entries as $entry) { + $contents .= $type . ' IN A ' . $entry . "\n"; + } + } + + $f = fopen($this->_path . '/db.mirrors.postgresql.org','w+'); + if (!$f) { + $this->_log->Log('Failed to write to ' . $this->_path . '/mirror.zone'); + $this->_log->Log('Could not dump zone file'); + return false; + } + fwrite($f,$contents); + fclose($f); + return true; + } + } + + // + // Handles logging, including sending it out as mail + // + class Logger { + var $_l = ''; + var $_debug = 0; + var $_mail; + + function Logger($debug,$mail) { + $this->_debug = $debug; + $this->_mail = $mail; + } + + function Log($str) { + $this->_l .= $str . "\n"; + } + + function Flush($domail=TRUE) { + if ($this->_l != '') { + echo " *** LOG START ***\n"; + echo $this->_l; + echo " **** LOG END ****\n"; + if ($domail) { + mail($this->_mail, 'PostgreSQL AutoMirror Report', $this->_l, '', $this->_mail); + } + } + } + + function Status($str) { + if ($this->_debug) { + echo $str . "\n"; + } + } + } +?> diff --git a/automirror.sh b/automirror.sh new file mode 100644 index 0000000..7789366 --- /dev/null +++ b/automirror.sh @@ -0,0 +1,21 @@ +#!/bin/sh + +# Run the automirror script with a timeout (in seconds) +TIMEOUT=120 +MAILREPORT="pgsql-slavestothewww@postgresql.org mha@sollentuna.net dpage@vale-housing.co.uk" + +cd /root/pgmirror + +/usr/local/bin/php automirror.php "$MAILREPORT" > mirrors.log 2>&1 & +export BG=$! +(sleep $TIMEOUT >/dev/null 2>&1 ; kill ${BG} > /dev/null 2>&1) & +export BG2=$! +wait ${BG} >/dev/null 2>&1 +if [ ! $? = 0 ]; then + echo "An error occured when running the automirror script! Something is wrong!!!" | /usr/sbin/sendmail $MAILREPORT + exit +fi + +kill ${BG2} >/dev/null 2>&1 +/usr/local/bind/sbin/rndc reload mirrors.postgresql.org + diff --git a/automirror.sql b/automirror.sql new file mode 100644 index 0000000..db3a846 --- /dev/null +++ b/automirror.sql @@ -0,0 +1,47 @@ +CREATE SCHEMA automirror; +SET search_path='automirror'; +CREATE TABLE zone_last_dump( + lastdump timestamp without time zone +); +INSERT INTO zone_last_dump values ('2000-01-01'); + +CREATE TABLE mirrortypes ( + type varchar(8) NOT NULL PRIMARY KEY +); + +CREATE TABLE mirrors ( + id SERIAL NOT NULL PRIMARY KEY, + type varchar(8) NOT NULL REFERENCES mirrortypes(type), + ip varchar(16) NOT NULL UNIQUE, + enabled int NOT NULL, + insync int NOT NULL, + flapping int NOT NULL, + description varchar(128) NOT NULL +); + +CREATE TABLE mirror_state_change ( + mirror int NOT NULL REFERENCES mirrors(id), + dat timestamp without time zone NOT NULL, + newstate int NOT NULL, + comment varchar(256) NOT NULL, + CONSTRAINT mirror_state_change_pk PRIMARY KEY (mirror, dat) +); + +CREATE TABLE nameservers ( + host varchar(64) NOT NULL PRIMARY KEY, + ip varchar(16) NOT NULL UNIQUE +); + +INSERT INTO mirrortypes (type) VALUES ('static'); +INSERT INTO mirrors (type,ip,enabled,insync,flapping) VALUES ('static','212.247.200.180',1,1,0,'Eastside'); +INSERT INTO mirrors (type,ip,enabled,insync,flapping) VALUES ('static','65.19.161.2',1,1,0,'Borg'); +INSERT INTO mirrors (type,ip,enabled,insync,flapping) VALUES ('static','66.98.251.159',1,1,0,'svr4'); +INSERT INTO mirrors (type,ip,enabled,insync,flapping) VALUES ('static','217.20.119.91',1,1,0,'Pervasive'); + +INSERT INTO nameservers (host,ip) VALUES ('ns.hub.org','200.46.204.2'); +INSERT INTO nameservers (host,ip) VALUES ('ns2.hub.org','66.98.250.36'); +INSERT INTO nameservers (host,ip) VALUES ('ns3.hub.org','200.46.204.4'); +INSERT INTO nameservers (host,ip) VALUES ('ns-a.lerctr.org','192.147.25.11'); +INSERT INTO nameservers (host,ip) VALUES ('ns-b.lerctr.org','192.147.25.45'); +INSERT INTO nameservers (host,ip) VALUES ('ns-1.sollentuna.net','62.65.68.8'); + -- 2.39.5