diff --git a/aztk/client.py b/aztk/client.py
index 0276affc..584a66a5 100644
--- a/aztk/client.py
+++ b/aztk/client.py
@@ -229,43 +229,48 @@ def __delete_user_on_pool(self, username, pool_id, nodes):
             concurrent.futures.wait(futures)
 
 
-    def __cluster_run(self, cluster_id, container_name, command, internal):
+    def __cluster_run(self, cluster_id, command, internal, container_name=None):
         pool, nodes = self.__get_pool_details(cluster_id)
         nodes = [node for node in nodes]
         if internal:
-            cluster_nodes = [models.RemoteLogin(ip_address=node.ip_address, port="22") for node in nodes]
+            cluster_nodes = [(node, models.RemoteLogin(ip_address=node.ip_address, port="22")) for node in nodes]
         else:
-            cluster_nodes = [self.__get_remote_login_settings(pool.id, node.id) for node in nodes]
+            cluster_nodes = [(node, self.__get_remote_login_settings(pool.id, node.id)) for node in nodes]
         try:
             ssh_key = self.__create_user_on_pool('aztk', pool.id, nodes)
-            asyncio.get_event_loop().run_until_complete(ssh_lib.clus_exec_command(command,
-                                                                                  container_name,
-                                                                                  'aztk',
-                                                                                  cluster_nodes,
-                                                                                  ssh_key=ssh_key.exportKey().decode('utf-8')))
+            output = asyncio.get_event_loop().run_until_complete(ssh_lib.clus_exec_command(command,
+                                                                                           'aztk',
+                                                                                           cluster_nodes,
+                                                                                           ssh_key=ssh_key.exportKey().decode('utf-8'),
+                                                                                           container_name=container_name))
+            return output
         except OSError as exc:
             raise exc
         finally:
             self.__delete_user_on_pool('aztk', pool.id, nodes)
 
-    def __cluster_copy(self, cluster_id, container_name, source_path, destination_path, internal):
+    def __cluster_copy(self, cluster_id, source_path, destination_path, container_name=None, internal=False, get=False):
         pool, nodes = self.__get_pool_details(cluster_id)
         nodes = [node for node in nodes]
         if internal:
-            cluster_nodes = [models.RemoteLogin(ip_address=node.ip_address, port="22") for node in nodes]
+            cluster_nodes = [(node, models.RemoteLogin(ip_address=node.ip_address, port="22")) for node in nodes]
         else:
-            cluster_nodes = [self.__get_remote_login_settings(pool.id, node.id) for node in nodes]
+            cluster_nodes = [(node, self.__get_remote_login_settings(pool.id, node.id)) for node in nodes]
         try:
             ssh_key = self.__create_user_on_pool('aztk', pool.id, nodes)
-            asyncio.get_event_loop().run_until_complete(ssh_lib.clus_copy(container_name=container_name,
-                                                                          username='aztk',
-                                                                          nodes=cluster_nodes,
-                                                                          source_path=source_path,
-                                                                          destination_path=destination_path,
-                                                                          ssh_key=ssh_key.exportKey().decode('utf-8')))
-            self.__delete_user_on_pool('aztk', pool.id, nodes)
+            output = asyncio.get_event_loop().run_until_complete(
+                ssh_lib.clus_copy(container_name=container_name,
+                                  username='aztk',
+                                  nodes=cluster_nodes,
+                                  source_path=source_path,
+                                  destination_path=destination_path,
+                                  ssh_key=ssh_key.exportKey().decode('utf-8'),
+                                  get=get))
+            return output
         except (OSError, batch_error.BatchErrorException) as exc:
             raise exc
+        finally:
+            self.__delete_user_on_pool('aztk', pool.id, nodes)
 
     def __submit_job(self,
                      job_configuration,
@@ -388,5 +393,8 @@ def cluster_run(self, cluster_id, command):
     def cluster_copy(self, cluster_id, source_path, destination_path):
         raise NotImplementedError()
 
+    def cluster_download(self, cluster_id, source_path, destination_path):
+        raise NotImplementedError()
+
     def submit_job(self, job):
         raise NotImplementedError()
diff --git a/aztk/spark/client.py b/aztk/spark/client.py
index 9f5c1a88..55c22cd6 100644
--- a/aztk/spark/client.py
+++ b/aztk/spark/client.py
@@ -9,6 +9,7 @@
 from aztk.spark.helpers import submit as cluster_submit_helper
 from aztk.spark.helpers import job_submission as job_submit_helper
 from aztk.spark.helpers import get_log as get_log_helper
+from aztk.spark.helpers import cluster_diagnostic_helper
 from aztk.spark.utils import util
 from aztk.internal.cluster_data import NodeData
 import yaml
@@ -146,15 +147,23 @@ def get_application_status(self, cluster_id: str, app_name: str):
         except batch_error.BatchErrorException as e:
             raise error.AztkError(helpers.format_batch_exception(e))
 
-    def cluster_run(self, cluster_id: str, command: str, internal: bool = False):
+    def cluster_run(self, cluster_id: str, command: str, host=False, internal: bool = False):
         try:
-            return self.__cluster_run(cluster_id, 'spark', command, internal)
+            return self.__cluster_run(cluster_id, command, internal, container_name='spark' if not host else None)
         except batch_error.BatchErrorException as e:
             raise error.AztkError(helpers.format_batch_exception(e))
 
-    def cluster_copy(self, cluster_id: str, source_path: str, destination_path: str, internal: bool = False):
+    def cluster_copy(self, cluster_id: str, source_path: str, destination_path: str, host: bool = False, internal: bool = False):
         try:
-            return self.__cluster_copy(cluster_id, 'spark', source_path, destination_path, internal)
+            container_name = None if host else 'spark'
+            return self.__cluster_copy(cluster_id, source_path, destination_path, container_name=container_name, get=False, internal=internal)
+        except batch_error.BatchErrorException as e:
+            raise error.AztkError(helpers.format_batch_exception(e))
+
+    def cluster_download(self, cluster_id: str, source_path: str, destination_path: str, host: bool = False, internal: bool = False):
+        try:
+            container_name = None if host else 'spark'
+            return self.__cluster_copy(cluster_id, source_path, destination_path, container_name=container_name, get=True, internal=internal)
         except batch_error.BatchErrorException as e:
             raise error.AztkError(helpers.format_batch_exception(e))
 
@@ -272,3 +281,10 @@ def wait_until_job_finished(self, job_id):
     def wait_until_all_jobs_finished(self, jobs):
         for job in jobs:
             self.wait_until_job_finished(job)
+
+    def run_cluster_diagnostics(self, cluster_id, output_directory):
+        try:
+            output = cluster_diagnostic_helper.run(self, cluster_id, output_directory)
+            return output
+        except batch_error.BatchErrorException as e:
+            raise error.AztkError(helpers.format_batch_exception(e))
diff --git a/aztk/spark/helpers/cluster_diagnostic_helper.py b/aztk/spark/helpers/cluster_diagnostic_helper.py
new file mode 100644
index 00000000..d6e4a525
--- /dev/null
+++ b/aztk/spark/helpers/cluster_diagnostic_helper.py
@@ -0,0 +1,26 @@
+import os
+from aztk.utils import ssh
+from aztk.utils.command_builder import CommandBuilder
+from aztk import models as aztk_models
+import azure.batch.models as batch_models
+
+def run(spark_client, cluster_id, output_directory):
+    # copy debug program to each node
+    spark_client.cluster_copy(cluster_id, os.path.abspath("./aztk/spark/utils/debug.py"), "/tmp/debug.py", host=True)
+    ssh_cmd = _build_diagnostic_ssh_command()
+    run_output = spark_client.cluster_run(cluster_id, ssh_cmd, host=True)
+    local_path = os.path.join(os.path.abspath(output_directory), "debug", "debug.zip")
+    remote_path = "/tmp/debug.zip"
+    output = spark_client.cluster_download(cluster_id, remote_path, local_path, host=True)
+    # write run output to debug/ directory
+    with open(os.path.join(os.path.dirname(local_path), "debug-output.txt"), 'w', encoding="UTF-8") as f:
+        [f.write(line + '\n') for node_id, result in run_output for line in result]
+    return output
+
+
+def _build_diagnostic_ssh_command():
+    return "sudo rm -rf /tmp/debug.zip; "\
+           "sudo apt-get install -y python3-pip; "\
+           "sudo -H pip3 install --upgrade pip; "\
+           "sudo -H pip3 install docker; "\
+           "sudo python3 /tmp/debug.py"
diff --git a/aztk/spark/utils/debug.py b/aztk/spark/utils/debug.py
new file mode 100644
index 00000000..21a5e9eb
--- /dev/null
+++ b/aztk/spark/utils/debug.py
@@ -0,0 +1,160 @@
+"""
+    Diagnostic program that runs on each node in the cluster
+    This program must be run with sudo
+"""
+import io
+import json
+import os
+import socket
+import tarfile
+from subprocess import STDOUT, CalledProcessError, check_output
+from zipfile import ZIP_DEFLATED, ZipFile
+
+import docker # pylint: disable=import-error
+
+
+def main():
+    zipf = create_zip_archive()
+
+    # general node diagnostics
+    zipf.writestr("hostname.txt", data=get_hostname())
+    zipf.writestr("df.txt", data=get_disk_free())
+
+    # docker container diagnostics
+    docker_client = docker.from_env()
+    for filename, data in get_docker_diagnostics(docker_client):
+        zipf.writestr(filename, data=data)
+
+    zipf.close()
+
+
+def create_zip_archive():
+    zip_file_path = "/tmp/debug.zip"
+    return ZipFile(zip_file_path, "w", ZIP_DEFLATED)
+
+
+def get_hostname():
+    return socket.gethostname()
+
+
+def cmd_check_output(cmd):
+    try:
+        output = check_output(cmd, shell=True, stderr=STDOUT)
+    except CalledProcessError as e:
+        return "CMD: {0}\n"\
+               "returncode: {1}"\
+               "output: {2}".format(e.cmd, e.returncode, e.output)
+    else:
+        return output
+
+
+def get_disk_free():
+    return cmd_check_output("df -h")
+
+
+def get_docker_diagnostics(docker_client):
+    '''
+        returns list of tuples (filename, data) to be written in the zip
+    '''
+    output = []
+    output.append(get_docker_images(docker_client))
+    logs = get_docker_containers(docker_client)
+    for item in logs:
+        output.append(item)
+
+    return output
+
+
+def get_docker_images(docker_client):
+    output = ""
+    try:
+        images = docker_client.images.list()
+        for image in images:
+            output += json.dumps(image.attrs, sort_keys=True, indent=4)
+        return ("docker-images.txt", output)
+    except docker.errors.APIError as e:
+        return ("docker-images.err", e.__str__())
+
+
+def get_docker_containers(docker_client):
+    container_attrs = ""
+    logs = []
+    try:
+        containers = docker_client.containers.list()
+        for container in containers:
+            container_attrs += json.dumps(container.attrs, sort_keys=True, indent=4)
+            # get docker container logs
+            logs.append((container.name + "/docker.log", container.logs()))
+            logs.append(get_docker_process_status(container))
+            if container.name == "spark": #TODO: find a more robust way to get specific info off specific containers
+                logs.extend(get_container_aztk_script(container))
+                logs.extend(get_spark_logs(container))
+                logs.extend(get_spark_app_logs(container))
+
+        logs.append(("docker-containers.txt", container_attrs))
+        return logs
+    except docker.errors.APIError as e:
+        return [("docker-containers.err", e.__str__())]
+
+
+def get_docker_process_status(container):
+    try:
+        exit_code, output = container.exec_run("ps -auxw", tty=True, privileged=True)
+        out_file_name = container.name + "/ps_aux.txt"
+        if exit_code == 0:
+            return (out_file_name, output)
+        else:
+            return (out_file_name, "exit_code: {0}\n{1}".format(exit_code, output))
+    except docker.errors.APIError as e:
+        return (container.name + "ps_aux.err", e.__str__())
+
+
+def get_container_aztk_script(container):
+    aztk_path = "/mnt/batch/tasks/startup/wd"
+    try:
+        stream, _ = container.get_archive(aztk_path) # second item is stat info
+        return extract_tar_in_memory(container, stream)
+    except docker.errors.APIError as e:
+        return (container.name + "/" + "aztk-scripts.err", e.__str__())
+
+
+def get_spark_logs(container):
+    spark_logs_path = "/home/spark-current/logs"
+    try:
+        stream, _ = container.get_archive(spark_logs_path) # second item is stat info
+        return extract_tar_in_memory(container, stream)
+    except docker.errors.APIError as e:
+        return [(container.name + "/" + "spark-logs.err", e.__str__())]
+
+
+def get_spark_app_logs(container):
+    spark_app_logs_path = "/home/spark-current/work"
+    try:
+        stream, _ = container.get_archive(spark_app_logs_path)
+        return extract_tar_in_memory(container, stream)
+    except docker.errors.APIError as e:
+        return [(container.name + "/" + "spark-work-logs.err", e.__str__())]
+
+
+def filter_members(members):
+    skip_files = ["id_rsa", "id_rsa.pub", "docker.log"]
+    skip_extensions = [".pyc", ".zip"]
+    for tarinfo in members:
+        if (os.path.splitext(tarinfo.name)[1] not in skip_extensions and
+                os.path.basename(tarinfo.name) not in skip_files):
+            yield tarinfo
+
+
+def extract_tar_in_memory(container, data):
+    data = io.BytesIO(b''.join([item for item in data]))
+    tarf = tarfile.open(fileobj=data)
+    logs = []
+    for member in filter_members(tarf):
+        file_bytes = tarf.extractfile(member)
+        if file_bytes is not None:
+            logs.append((container.name + "/" + member.name, b''.join(file_bytes.readlines())))
+    return logs
+
+
+if __name__ == "__main__":
+    main()
diff --git a/aztk/utils/ssh.py b/aztk/utils/ssh.py
index 9cde8381..bd139683 100644
--- a/aztk/utils/ssh.py
+++ b/aztk/utils/ssh.py
@@ -40,59 +40,86 @@ def connect(hostname,
     return client
 
 
-def node_exec_command(command, container_name, username, hostname, port, ssh_key=None, password=None):
+def node_exec_command(node_id, command, username, hostname, port, ssh_key=None, password=None, container_name=None):
     client = connect(hostname=hostname, port=port, username=username, password=password, pkey=ssh_key)
-    docker_exec = 'sudo docker exec 2>&1 -t {0} /bin/bash -c \'set -e; set -o pipefail; {1}; wait\''.format(container_name, command)
-    stdin, stdout, stderr = client.exec_command(docker_exec, get_pty=True)
-    [print(line.decode('utf-8')) for line in stdout.read().splitlines()]
+    if container_name:
+        cmd = 'sudo docker exec 2>&1 -t {0} /bin/bash -c \'set -e; set -o pipefail; {1}; wait\''.format(container_name, command)
+    else:
+        cmd = '/bin/bash 2>&1 -c \'set -e; set -o pipefail; {0}; wait\''.format(command)
+    stdin, stdout, stderr = client.exec_command(cmd, get_pty=True)
+    output = [line.decode('utf-8') for line in stdout.read().splitlines()]
     client.close()
+    return (node_id, output)
 
 
-async def clus_exec_command(command, container_name, username, nodes, ports=None, ssh_key=None, password=None):
-    await asyncio.wait(
-        [asyncio.get_event_loop().run_in_executor(ThreadPoolExecutor(),
-                                                  node_exec_command,
-                                                  command,
-                                                  container_name,
-                                                  username,
-                                                  node.ip_address,
-                                                  node.port,
-                                                  ssh_key,
-                                                  password) for node in nodes]
+async def clus_exec_command(command, username, nodes, ports=None, ssh_key=None, password=None, container_name=None):
+    return await asyncio.gather(
+        *[asyncio.get_event_loop().run_in_executor(ThreadPoolExecutor(),
+                                                   node_exec_command,
+                                                   node.id,
+                                                   command,
+                                                   username,
+                                                   node_rls.ip_address,
+                                                   node_rls.port,
+                                                   ssh_key,
+                                                   password,
+                                                   container_name) for node, node_rls in nodes]
     )
 
 
-def node_copy(container_name, source_path, destination_path, username, hostname, port, ssh_key=None, password=None):
+def copy_from_node(node_id, source_path, destination_path, username, hostname, port, ssh_key=None, password=None, container_name=None):
     client = connect(hostname=hostname, port=port, username=username, password=password, pkey=ssh_key)
     sftp_client = client.open_sftp()
-
     try:
-        # put the file in /tmp on the host
-        tmp_file = '/tmp/' + os.path.basename(source_path)
-        sftp_client.put(source_path, tmp_file)
-        # move to correct destination on container
-        docker_command = 'sudo docker cp {0} {1}:{2}'.format(tmp_file, container_name, destination_path)
-        _, stdout, _ = client.exec_command(docker_command, get_pty=True)
-        [print(line.decode('utf-8')) for line in stdout.read().splitlines()]
-        # clean up
-        sftp_client.remove(tmp_file)
+        destination_path = os.path.join(os.path.dirname(destination_path), node_id, os.path.basename(destination_path))
+        os.makedirs(os.path.dirname(destination_path), exist_ok=True)
+        with open(destination_path, 'wb') as f: #SpooledTemporaryFile instead??
+            sftp_client.getfo(source_path, f)
+            return (node_id, True, None)
+    except OSError as e:
+        return (node_id, False, e)
+    finally:
+        sftp_client.close()
+        client.close()
+
+
+def node_copy(node_id, source_path, destination_path, username, hostname, port, ssh_key=None, password=None, container_name=None):
+    client = connect(hostname=hostname, port=port, username=username, password=password, pkey=ssh_key)
+    sftp_client = client.open_sftp()
+    try:
+        if container_name:
+            # put the file in /tmp on the host
+            tmp_file = '/tmp/' + os.path.basename(source_path)
+            sftp_client.put(source_path, tmp_file)
+            # move to correct destination on container
+            docker_command = 'sudo docker cp {0} {1}:{2}'.format(tmp_file, container_name, destination_path)
+            _, stdout, _ = client.exec_command(docker_command, get_pty=True)
+            output = [line.decode('utf-8') for line in stdout.read().splitlines()]
+            # clean up
+            sftp_client.remove(tmp_file)
+            return (node_id, True, None)
+        else:
+            output = sftp_client.put(source_path, destination_path).__str__()
+            return (node_id, True, None)
     except (IOError, PermissionError) as e:
-        print(e)
-
-    client.close()
-
+        return (node_id, False, e)
+    finally:
+        sftp_client.close()
+        client.close()
     #TODO: progress bar
 
-async def clus_copy(container_name, username, nodes, source_path, destination_path, ssh_key=None, password=None):
-    await asyncio.gather(
+
+async def clus_copy(username, nodes, source_path, destination_path, ssh_key=None, password=None, container_name=None, get=False):
+    return await asyncio.gather(
         *[asyncio.get_event_loop().run_in_executor(ThreadPoolExecutor(),
-                                                   node_copy,
-                                                   container_name,
+                                                   copy_from_node if get else node_copy,
+                                                   node.id,
                                                    source_path,
                                                    destination_path,
                                                    username,
-                                                   node.ip_address,
-                                                   node.port,
+                                                   node_rls.ip_address,
+                                                   node_rls.port,
                                                    ssh_key,
-                                                   password) for node in nodes
-        ])
+                                                   password,
+                                                   container_name) for node, node_rls in nodes]
+    )
diff --git a/aztk_cli/spark/endpoints/cluster/cluster.py b/aztk_cli/spark/endpoints/cluster/cluster.py
index 4b42930d..b90b094b 100644
--- a/aztk_cli/spark/endpoints/cluster/cluster.py
+++ b/aztk_cli/spark/endpoints/cluster/cluster.py
@@ -10,6 +10,7 @@
 from . import cluster_submit
 from . import cluster_run
 from . import cluster_copy
+from . import cluster_debug
 
 
 class ClusterAction:
@@ -23,6 +24,7 @@ class ClusterAction:
     submit = "submit"
     run = "run"
     copy = "copy"
+    debug = "debug"
 
 
 def setup_parser(parser: argparse.ArgumentParser):
@@ -50,6 +52,8 @@ def setup_parser(parser: argparse.ArgumentParser):
         ClusterAction.run, help="Run a command on all nodes in your spark cluster")
     copy_parser = subparsers.add_parser(
         ClusterAction.copy, help="Copy files to all nodes in your spark cluster")
+    debug_parser = subparsers.add_parser(
+        ClusterAction.debug, help="Debugging tool that aggregates logs and output from the cluster.")
 
     cluster_create.setup_parser(create_parser)
     cluster_add_user.setup_parser(add_user_parser)
@@ -61,6 +65,7 @@ def setup_parser(parser: argparse.ArgumentParser):
     cluster_app_logs.setup_parser(app_logs_parser)
     cluster_run.setup_parser(run_parser)
     cluster_copy.setup_parser(copy_parser)
+    cluster_debug.setup_parser(debug_parser)
 
 
 def execute(args: typing.NamedTuple):
@@ -76,6 +81,7 @@ def execute(args: typing.NamedTuple):
     actions[ClusterAction.app_logs] = cluster_app_logs.execute
     actions[ClusterAction.run] = cluster_run.execute
     actions[ClusterAction.copy] = cluster_copy.execute
+    actions[ClusterAction.debug] = cluster_debug.execute
 
     func = actions[args.cluster_action]
     func(args)
diff --git a/aztk_cli/spark/endpoints/cluster/cluster_copy.py b/aztk_cli/spark/endpoints/cluster/cluster_copy.py
index 3210cbdf..8a861d82 100644
--- a/aztk_cli/spark/endpoints/cluster/cluster_copy.py
+++ b/aztk_cli/spark/endpoints/cluster/cluster_copy.py
@@ -1,8 +1,9 @@
 import argparse
+import sys
 import typing
 
 import aztk.spark
-from aztk_cli import config
+from aztk_cli import config, utils
 
 
 def setup_parser(parser: argparse.ArgumentParser):
@@ -22,10 +23,22 @@ def setup_parser(parser: argparse.ArgumentParser):
 
 def execute(args: typing.NamedTuple):
     spark_client = aztk.spark.Client(config.load_aztk_secrets())
-
-    spark_client.cluster_copy(
-        cluster_id=args.cluster_id,
-        source_path=args.source_path,
-        destination_path=args.dest_path,
-        internal=args.internal
-    )
+    with utils.Spinner():
+        copy_output = spark_client.cluster_copy(
+            cluster_id=args.cluster_id,
+            source_path=args.source_path,
+            destination_path=args.dest_path,
+            internal=args.internal
+        )
+    [print_copy_result(node_id, result, err) for node_id, result, err in copy_output]
+    sys.exit(0 if all([result for _, result, _ in copy_output]) else 1)
+
+
+def print_copy_result(node_id, success, err):
+    print("-" * (len(node_id) + 6))
+    print("| ", node_id, " |")
+    print("-" * (len(node_id) + 6))
+    if success:
+        print("Copy successful")
+    else:
+        print(err)
diff --git a/aztk_cli/spark/endpoints/cluster/cluster_debug.py b/aztk_cli/spark/endpoints/cluster/cluster_debug.py
new file mode 100644
index 00000000..7fe3d5d2
--- /dev/null
+++ b/aztk_cli/spark/endpoints/cluster/cluster_debug.py
@@ -0,0 +1,26 @@
+import argparse
+import os
+import typing
+import time
+
+import aztk.spark
+from aztk_cli import config, utils
+
+
+def setup_parser(parser: argparse.ArgumentParser):
+    parser.add_argument('--id', dest='cluster_id', required=True,
+                        help='The unique id of your spark cluster')
+
+    parser.add_argument('--output', '-o', required=False,
+                        help='the directory for the output folder')
+
+
+def execute(args: typing.NamedTuple):
+    spark_client = aztk.spark.Client(config.load_aztk_secrets())
+    timestr = time.strftime("%Y%m%d-%H%M%S")
+
+    if not args.output:
+        args.output = os.path.join(os.getcwd(), "debug-{0}-{1}".format(args.cluster_id, timestr))
+    with utils.Spinner():
+        spark_client.run_cluster_diagnostics(cluster_id=args.cluster_id, output_directory=args.output)
+    # TODO: analyze results, display some info about status
diff --git a/aztk_cli/spark/endpoints/cluster/cluster_run.py b/aztk_cli/spark/endpoints/cluster/cluster_run.py
index 877eddbd..5567b0fc 100644
--- a/aztk_cli/spark/endpoints/cluster/cluster_run.py
+++ b/aztk_cli/spark/endpoints/cluster/cluster_run.py
@@ -19,5 +19,14 @@ def setup_parser(parser: argparse.ArgumentParser):
 
 def execute(args: typing.NamedTuple):
     spark_client = aztk.spark.Client(config.load_aztk_secrets())
-    result = spark_client.cluster_run(args.cluster_id, args.command, args.internal)
-    #TODO: pretty print result
+    with utils.Spinner():
+        results = spark_client.cluster_run(args.cluster_id, args.command, args.internal)
+    [print_execute_result(node_id, result) for node_id, result in results]
+
+
+def print_execute_result(node_id, result):
+    print("-" * (len(node_id) + 6))
+    print("| ", node_id, " |")
+    print("-" * (len(node_id) + 6))
+    for line in result:
+        print(line)
diff --git a/docs/10-clusters.md b/docs/10-clusters.md
index 3d653b4c..df0a0e98 100644
--- a/docs/10-clusters.md
+++ b/docs/10-clusters.md
@@ -18,7 +18,7 @@ For example, to create a cluster of 4 *Standard_A2* nodes called 'spark' you can
 aztk spark cluster create --id spark --vm-size standard_a2 --size 4
 ```
 
-You can find more information on VM sizes [here.](https://docs.microsoft.com/en-us/azure/virtual-machines/linux/sizes) Please note that you must use the official SKU name when setting your VM size - they usually come in the form: "standard_d2_v2". 
+You can find more information on VM sizes [here.](https://docs.microsoft.com/en-us/azure/virtual-machines/linux/sizes) Please note that you must use the official SKU name when setting your VM size - they usually come in the form: "standard_d2_v2".
 
 _Note: The cluster id (`--id`) can only contain alphanumeric characters including hyphens and underscores, and cannot contain more than 64 characters. Each cluster **must** have a unique cluster id._
 
@@ -139,6 +139,27 @@ Now that you're in, you can change directory to your familiar `$SPARK_HOME`
 cd $SPARK_HOME
 ```
 
+### Debugging your Spark Cluster
+
+If your cluster is in an unknown or unusbale state, you can debug by running:
+
+```sh
+aztk spark cluster debug --id <cluster-id> --output </path/to/output/directory/>
+```
+
+The debug utility will pull logs from all nodes in the cluster. The utility will check for:
+- free diskspace
+- docker image status
+- docker container status
+- docker container logs
+- docker container process status
+- aztk code & version
+- spark component logs (master, worker, shuffle service, history server, etc) from $SPARK_HOME/logs
+- spark application logs from $SPARK_HOME/work
+
+__Please be careful sharing the output of the `debug` command as secrets and application code are present in the output.__
+
+
 ### Interact with your Spark cluster
 By default, the `aztk spark cluster ssh` command port forwards the Spark Web UI to *localhost:8080*, Spark Jobs UI to *localhost:4040*, and Spark History Server to your *locahost:18080*. This can be [configured in *.aztk/ssh.yaml*](../docs/13-configuration.md##sshyaml).