Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 29 additions & 18 deletions bin/manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,24 +381,6 @@ def health():
if node.is_primary() or node.is_standby():
update_session_ttl()

# Create a snapshot and send it to the object store if this is the
# node and time to do so.
# TODO: move this section to a periodic task handler when that lands
# (ref https://github.com/joyent/containerpilot/pull/134) in
# ContainerPilot so we don't block the health check.
if all((node.is_snapshot_node(),
(not is_backup_running()),
(is_binlog_stale(node.conn) or is_time_for_snapshot()))):
try:
write_snapshot(node.conn)
except Exception as ex:
# we're going to log but not sys.exit(1) here so that
# we don't mark the primary as unhealthy when a backup
# fails. The BACKUP_TTL_KEY will expire so we can alert
# on that externally.
log.exception(ex)
# / TODO: end of section to move to periodic task

mysql_query(node.conn, 'SELECT 1', ())
except Exception as ex:
log.exception(ex)
Expand Down Expand Up @@ -471,6 +453,35 @@ def on_change():
time.sleep(1) # avoid hammering Consul
continue

@debug
def snapshot_task():
"""
Create a snapshot and send it to the object store if this is the
node and time to do so.
"""
node = MySQLNode()
cp = ContainerPilot(node)
cp.update() # this will populate MySQLNode state correctly

if not node.is_snapshot_node() or is_backup_running():
# bail-out early if we can avoid making a DB connection
return

ctx = dict(user=config.repl_user,
password=config.repl_password,
timeout=cp.config['services'][0]['ttl'])
node.conn = wait_for_connection(**ctx)

if is_binlog_stale(node.conn) or is_time_for_snapshot():
try:
write_snapshot(node.conn)
except Exception as ex:
# we're going to log and then sys.exit(1) here. The task
# will fail and when the BACKUP_TTL_KEY expires we can alert
# on that externally.
log.exception(ex)
sys.exit(1)

@debug
def create_snapshot():
try:
Expand Down
8 changes: 8 additions & 0 deletions etc/containerpilot.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@
"onChange": "python /usr/local/bin/manage.py on_change"
}
],
"tasks": [
{
"name": "snapshot_check",
"command": "python /usr/local/bin/manage.py snapshot_task",
"frequency": "10s",
"timeout": "10m"
}
],
"coprocesses": [{{ if .CONSUL_AGENT }}
{
"command": ["/usr/local/bin/consul", "agent",
Expand Down