From 48bfc9c3daccc38bd7667a70227ec5f0b6e4883c Mon Sep 17 00:00:00 2001
From: Jake Freck <jafreck@microsoft.com>
Date: Tue, 20 Mar 2018 17:34:59 -0700
Subject: [PATCH 01/21] start implementation of cluster debug utility

---
 aztk/client.py                                | 18 ++--
 aztk/spark/client.py                          | 16 +++-
 .../helpers/cluster_diagnostic_helper.py      | 34 ++++++++
 aztk/utils/ssh.py                             | 83 ++++++++++++-------
 aztk_cli/spark/endpoints/cluster/cluster.py   |  6 ++
 .../spark/endpoints/cluster/cluster_debug.py  | 22 +++++
 .../spark/endpoints/cluster/cluster_run.py    |  6 +-
 7 files changed, 141 insertions(+), 44 deletions(-)
 create mode 100644 aztk/spark/helpers/cluster_diagnostic_helper.py
 create mode 100644 aztk_cli/spark/endpoints/cluster/cluster_debug.py

diff --git a/aztk/client.py b/aztk/client.py
index aabf4cae..c19b3a3a 100644
--- a/aztk/client.py
+++ b/aztk/client.py
@@ -225,23 +225,24 @@ def __delete_user_on_pool(self, username, pool_id, nodes):
             concurrent.futures.wait(futures)
 
 
-    def __cluster_run(self, cluster_id, container_name, command):
+    def __cluster_run(self, cluster_id, command, container_name=None):
         pool, nodes = self.__get_pool_details(cluster_id)
         nodes = [node for node in nodes]
         cluster_nodes = [self.__get_remote_login_settings(pool.id, node.id) for node in nodes]
         try:
             ssh_key = self.__create_user_on_pool('aztk', pool.id, nodes)
-            asyncio.get_event_loop().run_until_complete(ssh_lib.clus_exec_command(command,
-                                                                                  container_name,
-                                                                                  'aztk',
-                                                                                  cluster_nodes,
-                                                                                  ssh_key=ssh_key.exportKey().decode('utf-8')))
+            output = asyncio.get_event_loop().run_until_complete(ssh_lib.clus_exec_command(command,
+                                                                                           'aztk',
+                                                                                           cluster_nodes,
+                                                                                           ssh_key=ssh_key.exportKey().decode('utf-8'),
+                                                                                           container_name=container_name))
+            return output
         except OSError as exc:
             raise exc
         finally:
             self.__delete_user_on_pool('aztk', pool.id, nodes)
 
-    def __cluster_copy(self, cluster_id, container_name, source_path, destination_path):
+    def __cluster_copy(self, cluster_id, source_path, destination_path, container_name=None, get=False):
         pool, nodes = self.__get_pool_details(cluster_id)
         nodes = [node for node in nodes]
         cluster_nodes = [self.__get_remote_login_settings(pool.id, node.id) for node in nodes]
@@ -252,7 +253,8 @@ def __cluster_copy(self, cluster_id, container_name, source_path, destination_pa
                                                                           nodes=cluster_nodes,
                                                                           source_path=source_path,
                                                                           destination_path=destination_path,
-                                                                          ssh_key=ssh_key.exportKey().decode('utf-8')))
+                                                                          ssh_key=ssh_key.exportKey().decode('utf-8'),
+                                                                          get=get))
             self.__delete_user_on_pool('aztk', pool.id, nodes)
         except (OSError, batch_error.BatchErrorException) as exc:
             raise exc
diff --git a/aztk/spark/client.py b/aztk/spark/client.py
index ecab07e6..af6e0873 100644
--- a/aztk/spark/client.py
+++ b/aztk/spark/client.py
@@ -9,6 +9,7 @@
 from aztk.spark.helpers import submit as cluster_submit_helper
 from aztk.spark.helpers import job_submission as job_submit_helper
 from aztk.spark.helpers import get_log as get_log_helper
+from aztk.spark.helpers import cluster_diagnostic_helper
 from aztk.spark.utils import util
 from aztk.internal.cluster_data import NodeData
 import yaml
@@ -146,15 +147,15 @@ def get_application_status(self, cluster_id: str, app_name: str):
         except batch_error.BatchErrorException as e:
             raise error.AztkError(helpers.format_batch_exception(e))
 
-    def cluster_run(self, cluster_id: str, command: str):
+    def cluster_run(self, cluster_id: str, command: str, host=False):
         try:
-            return self.__cluster_run(cluster_id, 'spark', command)
+            return self.__cluster_run(cluster_id, command, container_name='spark' if not host else None)
         except batch_error.BatchErrorException as e:
             raise error.AztkError(helpers.format_batch_exception(e))
 
-    def cluster_copy(self, cluster_id: str, source_path: str, destination_path: str):
+    def cluster_copy(self, cluster_id: str, source_path: str, destination_path: str, host=False, get=False):
         try:
-            return self.__cluster_copy(cluster_id, 'spark', source_path, destination_path)
+            return self.__cluster_copy(cluster_id, source_path, destination_path, container_name='spark' if not host else None, get=get)
         except batch_error.BatchErrorException as e:
             raise error.AztkError(helpers.format_batch_exception(e))
 
@@ -272,3 +273,10 @@ def wait_until_job_finished(self, job_id):
     def wait_until_all_jobs_finished(self, jobs):
         for job in jobs:
             self.wait_until_job_finished(job)
+
+    def run_cluster_diagnostics(self, cluster_id):
+        try:
+            output = cluster_diagnostic_helper.run(self, cluster_id)
+            return output
+        except batch_error.BatchErrorException as e:
+            raise error.AztkError(helpers.format_batch_exception(e))
diff --git a/aztk/spark/helpers/cluster_diagnostic_helper.py b/aztk/spark/helpers/cluster_diagnostic_helper.py
new file mode 100644
index 00000000..61bb4dd8
--- /dev/null
+++ b/aztk/spark/helpers/cluster_diagnostic_helper.py
@@ -0,0 +1,34 @@
+import os
+from aztk.utils import ssh
+from aztk.utils.command_builder import CommandBuilder
+from aztk import models as aztk_models
+import azure.batch.models as batch_models
+
+def run(spark_client, cluster_id):
+    # build ssh command to run on each node
+    ssh_cmd = _build_diagnostic_ssh_command()
+    output = spark_client.cluster_run(cluster_id, ssh_cmd, host=True)
+    print(output)
+    # copy the output on each node back to the local machine
+    local_path = os.path.abspath("./tmp/debug.zip")
+    remote_path = "/tmp/debug.zip"
+    output = spark_client.cluster_copy(cluster_id, remote_path, local_path, host=True, get=True)
+
+    # zip it all up into one folder
+
+    return output
+
+
+def _build_diagnostic_ssh_command():
+    ssh_cmd = "echo $HOSTNAME; "\
+              "sudo rm -rf /tmp/debug; "\
+              "sudo rm -rf /tmp/debug.zip; "\
+              "mkdir /tmp/debug; "\
+              "echo $HOSTNAME > /tmp/debug/hostname.txt; "\
+              "sudo apt-get -y install zip; "\
+              "df -h > /tmp/debug/df.txt; "\
+              "sudo zip -r /tmp/debug.zip /tmp/debug/; "\
+              "sudo chmod 777 /tmp/debug.zip"
+    print(ssh_cmd)
+
+    return ssh_cmd
\ No newline at end of file
diff --git a/aztk/utils/ssh.py b/aztk/utils/ssh.py
index 9cde8381..58e13db5 100644
--- a/aztk/utils/ssh.py
+++ b/aztk/utils/ssh.py
@@ -40,59 +40,80 @@ def connect(hostname,
     return client
 
 
-def node_exec_command(command, container_name, username, hostname, port, ssh_key=None, password=None):
+def node_exec_command(command, username, hostname, port, ssh_key=None, password=None, container_name=None):
     client = connect(hostname=hostname, port=port, username=username, password=password, pkey=ssh_key)
-    docker_exec = 'sudo docker exec 2>&1 -t {0} /bin/bash -c \'set -e; set -o pipefail; {1}; wait\''.format(container_name, command)
-    stdin, stdout, stderr = client.exec_command(docker_exec, get_pty=True)
-    [print(line.decode('utf-8')) for line in stdout.read().splitlines()]
+    if container_name:
+        cmd = 'sudo docker exec 2>&1 -t {0} /bin/bash -c \'set -e; set -o pipefail; {1}; wait\''.format(container_name, command)
+    else:
+        cmd = '/bin/bash 2>&1 -c \'set -e; set -o pipefail; {0}; wait\''.format(command)
+    stdin, stdout, stderr = client.exec_command(cmd, get_pty=True)
+    # [print(line.decode('utf-8')) for line in stdout.read().splitlines()]
+    output = [line.decode('utf-8') for line in stdout.read().splitlines()]
     client.close()
+    return output
 
 
-async def clus_exec_command(command, container_name, username, nodes, ports=None, ssh_key=None, password=None):
-    await asyncio.wait(
-        [asyncio.get_event_loop().run_in_executor(ThreadPoolExecutor(),
-                                                  node_exec_command,
-                                                  command,
-                                                  container_name,
-                                                  username,
-                                                  node.ip_address,
-                                                  node.port,
-                                                  ssh_key,
-                                                  password) for node in nodes]
+async def clus_exec_command(command, username, nodes, ports=None, ssh_key=None, password=None, container_name=None):
+    return await asyncio.gather(
+        *[asyncio.get_event_loop().run_in_executor(ThreadPoolExecutor(),
+                                                   node_exec_command,
+                                                   command,
+                                                   username,
+                                                   node.ip_address,
+                                                   node.port,
+                                                   ssh_key,
+                                                   password,
+                                                   container_name) for node in nodes]
     )
 
 
-def node_copy(container_name, source_path, destination_path, username, hostname, port, ssh_key=None, password=None):
+def copy_from_node(source_path, destination_path, username, hostname, port, ssh_key=None, password=None, container_name=None):
+    print("running copy from node!")
     client = connect(hostname=hostname, port=port, username=username, password=password, pkey=ssh_key)
     sftp_client = client.open_sftp()
-
+    output = None
     try:
-        # put the file in /tmp on the host
-        tmp_file = '/tmp/' + os.path.basename(source_path)
-        sftp_client.put(source_path, tmp_file)
-        # move to correct destination on container
-        docker_command = 'sudo docker cp {0} {1}:{2}'.format(tmp_file, container_name, destination_path)
-        _, stdout, _ = client.exec_command(docker_command, get_pty=True)
-        [print(line.decode('utf-8')) for line in stdout.read().splitlines()]
-        # clean up
-        sftp_client.remove(tmp_file)
+        output = sftp_client.get(source_path, destination_path)
     except (IOError, PermissionError) as e:
         print(e)
 
     client.close()
+    return output
 
+def node_copy(source_path, destination_path, username, hostname, port, ssh_key=None, password=None, container_name=None):
+    client = connect(hostname=hostname, port=port, username=username, password=password, pkey=ssh_key)
+    sftp_client = client.open_sftp()
+
+    try:
+        if container_name:
+            # put the file in /tmp on the host
+            tmp_file = '/tmp/' + os.path.basename(source_path)
+            sftp_client.put(source_path, tmp_file)
+            # move to correct destination on container
+            docker_command = 'sudo docker cp {0} {1}:{2}'.format(tmp_file, container_name, destination_path)
+            _, stdout, _ = client.exec_command(docker_command, get_pty=True)
+            [print(line.decode('utf-8')) for line in stdout.read().splitlines()]
+            # clean up
+            sftp_client.remove(tmp_file)
+        else:
+            sftp_client.put(source_path, destination_path)
+    except (IOError, PermissionError) as e:
+        print(e)
+
+    client.close()
     #TODO: progress bar
 
-async def clus_copy(container_name, username, nodes, source_path, destination_path, ssh_key=None, password=None):
+
+async def clus_copy(username, nodes, source_path, destination_path, ssh_key=None, password=None, container_name=None, get=False):
     await asyncio.gather(
         *[asyncio.get_event_loop().run_in_executor(ThreadPoolExecutor(),
-                                                   node_copy,
-                                                   container_name,
+                                                   copy_from_node if get else node_copy,
                                                    source_path,
                                                    destination_path,
                                                    username,
                                                    node.ip_address,
                                                    node.port,
                                                    ssh_key,
-                                                   password) for node in nodes
-        ])
+                                                   password,
+                                                   container_name) for node in nodes]
+    )
diff --git a/aztk_cli/spark/endpoints/cluster/cluster.py b/aztk_cli/spark/endpoints/cluster/cluster.py
index 4b42930d..b90b094b 100644
--- a/aztk_cli/spark/endpoints/cluster/cluster.py
+++ b/aztk_cli/spark/endpoints/cluster/cluster.py
@@ -10,6 +10,7 @@
 from . import cluster_submit
 from . import cluster_run
 from . import cluster_copy
+from . import cluster_debug
 
 
 class ClusterAction:
@@ -23,6 +24,7 @@ class ClusterAction:
     submit = "submit"
     run = "run"
     copy = "copy"
+    debug = "debug"
 
 
 def setup_parser(parser: argparse.ArgumentParser):
@@ -50,6 +52,8 @@ def setup_parser(parser: argparse.ArgumentParser):
         ClusterAction.run, help="Run a command on all nodes in your spark cluster")
     copy_parser = subparsers.add_parser(
         ClusterAction.copy, help="Copy files to all nodes in your spark cluster")
+    debug_parser = subparsers.add_parser(
+        ClusterAction.debug, help="Debugging tool that aggregates logs and output from the cluster.")
 
     cluster_create.setup_parser(create_parser)
     cluster_add_user.setup_parser(add_user_parser)
@@ -61,6 +65,7 @@ def setup_parser(parser: argparse.ArgumentParser):
     cluster_app_logs.setup_parser(app_logs_parser)
     cluster_run.setup_parser(run_parser)
     cluster_copy.setup_parser(copy_parser)
+    cluster_debug.setup_parser(debug_parser)
 
 
 def execute(args: typing.NamedTuple):
@@ -76,6 +81,7 @@ def execute(args: typing.NamedTuple):
     actions[ClusterAction.app_logs] = cluster_app_logs.execute
     actions[ClusterAction.run] = cluster_run.execute
     actions[ClusterAction.copy] = cluster_copy.execute
+    actions[ClusterAction.debug] = cluster_debug.execute
 
     func = actions[args.cluster_action]
     func(args)
diff --git a/aztk_cli/spark/endpoints/cluster/cluster_debug.py b/aztk_cli/spark/endpoints/cluster/cluster_debug.py
new file mode 100644
index 00000000..8d76ce56
--- /dev/null
+++ b/aztk_cli/spark/endpoints/cluster/cluster_debug.py
@@ -0,0 +1,22 @@
+import argparse
+import typing
+import aztk.spark
+from aztk_cli import config
+
+
+def setup_parser(parser: argparse.ArgumentParser):
+    parser.add_argument('--id', dest='cluster_id', required=True,
+                        help='The unique id of your spark cluster')
+
+    parser.add_argument('--output', required=False,
+                        help='the path for the output folder')
+
+
+def execute(args: typing.NamedTuple):
+    spark_client = aztk.spark.Client(config.load_aztk_secrets())
+
+    output = spark_client.run_cluster_diagnostics(cluster_id=args.cluster_id)
+    print("cluster_debug_output")
+    print(type(output))
+    print(output)
+    print("cluster_debug_output end")
diff --git a/aztk_cli/spark/endpoints/cluster/cluster_run.py b/aztk_cli/spark/endpoints/cluster/cluster_run.py
index 6c214b23..9a84d46d 100644
--- a/aztk_cli/spark/endpoints/cluster/cluster_run.py
+++ b/aztk_cli/spark/endpoints/cluster/cluster_run.py
@@ -13,5 +13,9 @@ def setup_parser(parser: argparse.ArgumentParser):
 
 def execute(args: typing.NamedTuple):
     spark_client = aztk.spark.Client(config.load_aztk_secrets())
-    result = spark_client.cluster_run(args.cluster_id, args.command)
+    results = spark_client.cluster_run(args.cluster_id, args.command)
+    for result in results:
+        print("---------------------------") #TODO: replace with nodename
+        for line in result:
+            print(line)
     #TODO: pretty print result

From dc559728f77f2a776f687071c5b08f8c8d2a391d Mon Sep 17 00:00:00 2001
From: Jake Freck <jafreck@microsoft.com>
Date: Wed, 21 Mar 2018 16:48:33 -0700
Subject: [PATCH 02/21] update debug program

---
 .../helpers/cluster_diagnostic_helper.py      |  26 ++--
 aztk/spark/utils/debug.py                     | 115 ++++++++++++++++++
 2 files changed, 123 insertions(+), 18 deletions(-)
 create mode 100644 aztk/spark/utils/debug.py

diff --git a/aztk/spark/helpers/cluster_diagnostic_helper.py b/aztk/spark/helpers/cluster_diagnostic_helper.py
index 61bb4dd8..6fd12559 100644
--- a/aztk/spark/helpers/cluster_diagnostic_helper.py
+++ b/aztk/spark/helpers/cluster_diagnostic_helper.py
@@ -5,30 +5,20 @@
 import azure.batch.models as batch_models
 
 def run(spark_client, cluster_id):
-    # build ssh command to run on each node
+    # copy debug program to each node
+    spark_client.cluster_copy(cluster_id, os.path.abspath("./aztk/spark/utils/debug.py"), "/tmp/debug.py", host=True)
     ssh_cmd = _build_diagnostic_ssh_command()
     output = spark_client.cluster_run(cluster_id, ssh_cmd, host=True)
-    print(output)
-    # copy the output on each node back to the local machine
+    print("run output:", output)
     local_path = os.path.abspath("./tmp/debug.zip")
     remote_path = "/tmp/debug.zip"
     output = spark_client.cluster_copy(cluster_id, remote_path, local_path, host=True, get=True)
-
-    # zip it all up into one folder
-
     return output
 
 
 def _build_diagnostic_ssh_command():
-    ssh_cmd = "echo $HOSTNAME; "\
-              "sudo rm -rf /tmp/debug; "\
-              "sudo rm -rf /tmp/debug.zip; "\
-              "mkdir /tmp/debug; "\
-              "echo $HOSTNAME > /tmp/debug/hostname.txt; "\
-              "sudo apt-get -y install zip; "\
-              "df -h > /tmp/debug/df.txt; "\
-              "sudo zip -r /tmp/debug.zip /tmp/debug/; "\
-              "sudo chmod 777 /tmp/debug.zip"
-    print(ssh_cmd)
-
-    return ssh_cmd
\ No newline at end of file
+    return "sudo rm -rf /tmp/debug.zip; "\
+           "sudo apt-get install -y python3-pip; "\
+           "pip3 install --upgrade pip; "\
+           "pip3 install docker; "\
+           "sudo python3 /tmp/debug.py 2>&1 > /tmp/debug-output.txt"
diff --git a/aztk/spark/utils/debug.py b/aztk/spark/utils/debug.py
new file mode 100644
index 00000000..f7669863
--- /dev/null
+++ b/aztk/spark/utils/debug.py
@@ -0,0 +1,115 @@
+"""
+    Diagnostic program that runs on each node in the cluster
+    This program must be run with sudo
+"""
+import os
+import io
+import json
+import socket
+from subprocess import check_output, STDOUT, CalledProcessError
+from zipfile import ZipFile, ZIP_DEFLATED
+import tarfile
+import docker
+
+
+def main():
+    zipf = create_zip_archive()
+
+    # general node diagnostics
+    zipf.writestr("hostname.txt", data=get_hostname())
+    zipf.writestr("df.txt", data=get_disk_free())
+
+    # docker container diagnostics
+    docker_client = docker.from_env()
+    for filename, data in get_docker_diagnostics(docker_client):
+        zipf.writestr(filename, data=data)
+
+    zipf.close()
+
+
+def create_zip_archive():
+    zip_file_path = "/tmp/debug.zip"
+    return ZipFile(zip_file_path, "w", ZIP_DEFLATED)
+
+
+def cmd_check_output(cmd):
+    try:
+        output = check_output(cmd, shell=True, stderr=STDOUT)
+    except CalledProcessError as e:
+        return "CMD: {0}\n"\
+               "returncode: {1}"\
+               "output: {2}".format(e.cmd, e.returncode, e.output)
+    else:
+        return output
+
+
+def get_hostname():
+    return socket.gethostname()
+
+
+def get_disk_free():
+    return cmd_check_output("df -h")
+
+
+def get_docker_diagnostics(docker_client):
+    '''
+        returns list of tuples (filename, data) to be written in the zip
+    '''
+    output = []
+    output.append(("docker-images.txt", get_docker_images(docker_client)))
+    logs = get_docker_containers(docker_client)
+    for item in logs:
+        output.append(item)
+
+    return output
+
+
+def get_docker_images(docker_client):
+    output = ""
+    images = docker_client.images.list()
+    for image in images:
+        output += json.dumps(image.attrs, sort_keys=True, indent=4)
+    return output
+
+
+def get_docker_containers(docker_client):
+    container_attrs = ""
+    logs = []
+    containers = docker_client.containers.list()
+    for container in containers:
+        container_attrs += json.dumps(container.attrs, sort_keys=True, indent=4)
+        # get docker container logs
+        logs.append((container.name + "/docker.log", container.logs()))
+        logs.append(get_docker_process_status(container))
+        if container.name == "spark": #TODO: find a more robust way to get specific info off specific containers
+            logs.append(get_container_aztk_script(container))
+
+    logs.append(("docker-containers.txt", container_attrs))
+    return logs
+
+
+def get_docker_process_status(container):
+    exit_code, output = container.exec_run("ps -aux", tty=True, privileged=True)
+    out_file_name = container.name + "/ps_aux.txt"
+    if exit_code == 0:
+        return (out_file_name, output)
+    else:
+        return (out_file_name, "exit_code: {0}\n{1}".format(exit_code, output))
+
+
+def get_container_aztk_script(container):
+    aztk_path = "/mnt/batch/tasks/startup/wd"
+    stream, _ = container.get_archive(aztk_path) # second item is stat info
+    data = b''
+    for item in stream:
+        data += item
+    with open("/tmp/aztk-scripts.tar", 'wb') as f:
+        f.write(data)
+    tf = tarfile.open("/tmp/aztk-scripts.tar", 'r')
+    tf.extractall("/tmp/")
+
+    return (container.name + "/" + "aztk-scripts.tar", data)
+
+
+if __name__ == "__main__":
+    main()

From 0cb1c2c407ddadee45decc8af5b92d62dd2bd5f2 Mon Sep 17 00:00:00 2001
From: Jake Freck <jafreck@microsoft.com>
Date: Wed, 21 Mar 2018 17:32:28 -0700
Subject: [PATCH 03/21] update debug

---
 aztk/spark/client.py                              | 4 ++--
 aztk/spark/helpers/cluster_diagnostic_helper.py   | 7 +++----
 aztk/utils/ssh.py                                 | 8 ++++++--
 aztk_cli/spark/endpoints/cluster/cluster_debug.py | 9 +++------
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/aztk/spark/client.py b/aztk/spark/client.py
index af6e0873..6af91b8d 100644
--- a/aztk/spark/client.py
+++ b/aztk/spark/client.py
@@ -274,9 +274,9 @@ def wait_until_all_jobs_finished(self, jobs):
         for job in jobs:
             self.wait_until_job_finished(job)
 
-    def run_cluster_diagnostics(self, cluster_id):
+    def run_cluster_diagnostics(self, cluster_id, path):
         try:
-            output = cluster_diagnostic_helper.run(self, cluster_id)
+            output = cluster_diagnostic_helper.run(self, cluster_id, path)
             return output
         except batch_error.BatchErrorException as e:
             raise error.AztkError(helpers.format_batch_exception(e))
diff --git a/aztk/spark/helpers/cluster_diagnostic_helper.py b/aztk/spark/helpers/cluster_diagnostic_helper.py
index 6fd12559..35fb771f 100644
--- a/aztk/spark/helpers/cluster_diagnostic_helper.py
+++ b/aztk/spark/helpers/cluster_diagnostic_helper.py
@@ -4,15 +4,14 @@
 from aztk import models as aztk_models
 import azure.batch.models as batch_models
 
-def run(spark_client, cluster_id):
+def run(spark_client, cluster_id, path):
     # copy debug program to each node
     spark_client.cluster_copy(cluster_id, os.path.abspath("./aztk/spark/utils/debug.py"), "/tmp/debug.py", host=True)
     ssh_cmd = _build_diagnostic_ssh_command()
     output = spark_client.cluster_run(cluster_id, ssh_cmd, host=True)
-    print("run output:", output)
-    local_path = os.path.abspath("./tmp/debug.zip")
+    # local_path = os.path.abspath(path)
     remote_path = "/tmp/debug.zip"
-    output = spark_client.cluster_copy(cluster_id, remote_path, local_path, host=True, get=True)
+    output = spark_client.cluster_copy(cluster_id, remote_path, path, host=True, get=True)
     return output
 
 
diff --git a/aztk/utils/ssh.py b/aztk/utils/ssh.py
index 58e13db5..5bfe1765 100644
--- a/aztk/utils/ssh.py
+++ b/aztk/utils/ssh.py
@@ -68,15 +68,18 @@ async def clus_exec_command(command, username, nodes, ports=None, ssh_key=None,
 
 
 def copy_from_node(source_path, destination_path, username, hostname, port, ssh_key=None, password=None, container_name=None):
-    print("running copy from node!")
     client = connect(hostname=hostname, port=port, username=username, password=password, pkey=ssh_key)
     sftp_client = client.open_sftp()
     output = None
     try:
-        output = sftp_client.get(source_path, destination_path)
+        with open(destination_path + str(port), 'wb') as f:
+            sftp_client.getfo(source_path, f)
+        # output = sftp_client.getfo(open(source_path, 'wb'), destination_path)
     except (IOError, PermissionError) as e:
         print(e)
 
+    sftp_client.close()
+
     client.close()
     return output
 
@@ -100,6 +103,7 @@ def node_copy(source_path, destination_path, username, hostname, port, ssh_key=N
     except (IOError, PermissionError) as e:
         print(e)
 
+    sftp_client.close()
     client.close()
     #TODO: progress bar
 
diff --git a/aztk_cli/spark/endpoints/cluster/cluster_debug.py b/aztk_cli/spark/endpoints/cluster/cluster_debug.py
index 8d76ce56..2ab4bc8f 100644
--- a/aztk_cli/spark/endpoints/cluster/cluster_debug.py
+++ b/aztk_cli/spark/endpoints/cluster/cluster_debug.py
@@ -8,15 +8,12 @@ def setup_parser(parser: argparse.ArgumentParser):
     parser.add_argument('--id', dest='cluster_id', required=True,
                         help='The unique id of your spark cluster')
 
-    parser.add_argument('--output', required=False,
+    parser.add_argument('--output', '-o', required=True,
                         help='the path for the output folder')
 
 
 def execute(args: typing.NamedTuple):
     spark_client = aztk.spark.Client(config.load_aztk_secrets())
 
-    output = spark_client.run_cluster_diagnostics(cluster_id=args.cluster_id)
-    print("cluster_debug_output")
-    print(type(output))
-    print(output)
-    print("cluster_debug_output end")
+    spark_client.run_cluster_diagnostics(cluster_id=args.cluster_id, path=args.output)
+    # TODO: analyze results, display some info about status

From d67e474d60c1da9e49927cf1a5c5f4e5f386124c Mon Sep 17 00:00:00 2001
From: Jake Freck <jafreck@microsoft.com>
Date: Wed, 21 Mar 2018 18:00:28 -0700
Subject: [PATCH 04/21] fix output directory structure

---
 aztk/spark/client.py                              | 4 ++--
 aztk/spark/helpers/cluster_diagnostic_helper.py   | 7 ++++---
 aztk/utils/ssh.py                                 | 5 +++--
 aztk_cli/spark/endpoints/cluster/cluster_debug.py | 4 ++--
 4 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/aztk/spark/client.py b/aztk/spark/client.py
index 6af91b8d..51431de1 100644
--- a/aztk/spark/client.py
+++ b/aztk/spark/client.py
@@ -274,9 +274,9 @@ def wait_until_all_jobs_finished(self, jobs):
         for job in jobs:
             self.wait_until_job_finished(job)
 
-    def run_cluster_diagnostics(self, cluster_id, path):
+    def run_cluster_diagnostics(self, cluster_id, output_directory):
         try:
-            output = cluster_diagnostic_helper.run(self, cluster_id, path)
+            output = cluster_diagnostic_helper.run(self, cluster_id, output_directory)
             return output
         except batch_error.BatchErrorException as e:
             raise error.AztkError(helpers.format_batch_exception(e))
diff --git a/aztk/spark/helpers/cluster_diagnostic_helper.py b/aztk/spark/helpers/cluster_diagnostic_helper.py
index 35fb771f..1b7e0f67 100644
--- a/aztk/spark/helpers/cluster_diagnostic_helper.py
+++ b/aztk/spark/helpers/cluster_diagnostic_helper.py
@@ -4,14 +4,15 @@
 from aztk import models as aztk_models
 import azure.batch.models as batch_models
 
-def run(spark_client, cluster_id, path):
+def run(spark_client, cluster_id, output_directory):
     # copy debug program to each node
     spark_client.cluster_copy(cluster_id, os.path.abspath("./aztk/spark/utils/debug.py"), "/tmp/debug.py", host=True)
     ssh_cmd = _build_diagnostic_ssh_command()
     output = spark_client.cluster_run(cluster_id, ssh_cmd, host=True)
-    # local_path = os.path.abspath(path)
+    local_path = os.path.join(os.path.abspath(output_directory), "debug", "debug.zip") #TODO: add timestamp
+    print("run localpath", local_path)
     remote_path = "/tmp/debug.zip"
-    output = spark_client.cluster_copy(cluster_id, remote_path, path, host=True, get=True)
+    output = spark_client.cluster_copy(cluster_id, remote_path, local_path, host=True, get=True)
     return output
 
 
diff --git a/aztk/utils/ssh.py b/aztk/utils/ssh.py
index 5bfe1765..8cae12bd 100644
--- a/aztk/utils/ssh.py
+++ b/aztk/utils/ssh.py
@@ -72,9 +72,10 @@ def copy_from_node(source_path, destination_path, username, hostname, port, ssh_
     sftp_client = client.open_sftp()
     output = None
     try:
-        with open(destination_path + str(port), 'wb') as f:
+        destination_path = os.path.join(os.path.dirname(destination_path), str(port), os.path.basename(destination_path))
+        os.makedirs(os.path.dirname(destination_path), exist_ok=True)
+        with open(destination_path, 'wb') as f: #SpooledTemporaryFile instead??
             sftp_client.getfo(source_path, f)
-        # output = sftp_client.getfo(open(source_path, 'wb'), destination_path)
     except (IOError, PermissionError) as e:
         print(e)
 
diff --git a/aztk_cli/spark/endpoints/cluster/cluster_debug.py b/aztk_cli/spark/endpoints/cluster/cluster_debug.py
index 2ab4bc8f..6b6e5f8c 100644
--- a/aztk_cli/spark/endpoints/cluster/cluster_debug.py
+++ b/aztk_cli/spark/endpoints/cluster/cluster_debug.py
@@ -9,11 +9,11 @@ def setup_parser(parser: argparse.ArgumentParser):
                         help='The unique id of your spark cluster')
 
     parser.add_argument('--output', '-o', required=True,
-                        help='the path for the output folder')
+                        help='the directory for the output folder')
 
 
 def execute(args: typing.NamedTuple):
     spark_client = aztk.spark.Client(config.load_aztk_secrets())
 
-    spark_client.run_cluster_diagnostics(cluster_id=args.cluster_id, path=args.output)
+    spark_client.run_cluster_diagnostics(cluster_id=args.cluster_id, output_directory=args.output)
     # TODO: analyze results, display some info about status

From 878c56dfa6dfbaf887a9aad2a070eb06f52b51ad Mon Sep 17 00:00:00 2001
From: Jake Freck <jafreck@microsoft.com>
Date: Thu, 22 Mar 2018 12:43:48 -0700
Subject: [PATCH 05/21] cleanup output, add error checking

---
 aztk/client.py                                |  4 +-
 .../helpers/cluster_diagnostic_helper.py      | 12 +--
 aztk/spark/utils/debug.py                     | 87 +++++++++++--------
 aztk/utils/ssh.py                             | 23 +++--
 4 files changed, 72 insertions(+), 54 deletions(-)

diff --git a/aztk/client.py b/aztk/client.py
index c19b3a3a..841f2b91 100644
--- a/aztk/client.py
+++ b/aztk/client.py
@@ -224,7 +224,6 @@ def __delete_user_on_pool(self, username, pool_id, nodes):
             futures = [exector.submit(self.__delete_user, pool_id, node.id, username) for node in nodes]
             concurrent.futures.wait(futures)
 
-
     def __cluster_run(self, cluster_id, command, container_name=None):
         pool, nodes = self.__get_pool_details(cluster_id)
         nodes = [node for node in nodes]
@@ -245,7 +244,7 @@ def __cluster_run(self, cluster_id, command, container_name=None):
     def __cluster_copy(self, cluster_id, source_path, destination_path, container_name=None, get=False):
         pool, nodes = self.__get_pool_details(cluster_id)
         nodes = [node for node in nodes]
-        cluster_nodes = [self.__get_remote_login_settings(pool.id, node.id) for node in nodes]
+        cluster_nodes = [(node, self.__get_remote_login_settings(pool.id, node.id)) for node in nodes]
         try:
             ssh_key = self.__create_user_on_pool('aztk', pool.id, nodes)
             asyncio.get_event_loop().run_until_complete(ssh_lib.clus_copy(container_name=container_name,
@@ -258,6 +257,7 @@ def __cluster_copy(self, cluster_id, source_path, destination_path, container_na
             self.__delete_user_on_pool('aztk', pool.id, nodes)
         except (OSError, batch_error.BatchErrorException) as exc:
             raise exc
+
     def __submit_job(self,
                      job_configuration,
                      start_task,
diff --git a/aztk/spark/helpers/cluster_diagnostic_helper.py b/aztk/spark/helpers/cluster_diagnostic_helper.py
index 1b7e0f67..a9725354 100644
--- a/aztk/spark/helpers/cluster_diagnostic_helper.py
+++ b/aztk/spark/helpers/cluster_diagnostic_helper.py
@@ -8,17 +8,19 @@ def run(spark_client, cluster_id, output_directory):
     # copy debug program to each node
     spark_client.cluster_copy(cluster_id, os.path.abspath("./aztk/spark/utils/debug.py"), "/tmp/debug.py", host=True)
     ssh_cmd = _build_diagnostic_ssh_command()
-    output = spark_client.cluster_run(cluster_id, ssh_cmd, host=True)
+    run_output = spark_client.cluster_run(cluster_id, ssh_cmd, host=True)
     local_path = os.path.join(os.path.abspath(output_directory), "debug", "debug.zip") #TODO: add timestamp
-    print("run localpath", local_path)
     remote_path = "/tmp/debug.zip"
     output = spark_client.cluster_copy(cluster_id, remote_path, local_path, host=True, get=True)
+    # write run output to debug/ directory
+    with open(os.path.join(os.path.dirname(local_path), "debug-output.txt"), 'w') as f:
+        [f.write(line + '\n') for node_output in run_output for line in node_output]
     return output
 
 
 def _build_diagnostic_ssh_command():
     return "sudo rm -rf /tmp/debug.zip; "\
            "sudo apt-get install -y python3-pip; "\
-           "pip3 install --upgrade pip; "\
-           "pip3 install docker; "\
-           "sudo python3 /tmp/debug.py 2>&1 > /tmp/debug-output.txt"
+           "sudo -H pip3 install --upgrade pip; "\
+           "sudo -H pip3 install docker; "\
+           "sudo python3 /tmp/debug.py"
diff --git a/aztk/spark/utils/debug.py b/aztk/spark/utils/debug.py
index f7669863..92ebb08a 100644
--- a/aztk/spark/utils/debug.py
+++ b/aztk/spark/utils/debug.py
@@ -32,6 +32,10 @@ def create_zip_archive():
     return ZipFile(zip_file_path, "w", ZIP_DEFLATED)
 
 
+def get_hostname():
+    return socket.gethostname()
+
+
 def cmd_check_output(cmd):
     try:
         output = check_output(cmd, shell=True, stderr=STDOUT)
@@ -43,10 +47,6 @@ def cmd_check_output(cmd):
         return output
 
 
-def get_hostname():
-    return socket.gethostname()
-
-
 def get_disk_free():
     return cmd_check_output("df -h")
 
@@ -56,7 +56,7 @@ def get_docker_diagnostics(docker_client):
         returns list of tuples (filename, data) to be written in the zip
     '''
     output = []
-    output.append(("docker-images.txt", get_docker_images(docker_client)))
+    output.append(get_docker_images(docker_client))
     logs = get_docker_containers(docker_client)
     for item in logs:
         output.append(item)
@@ -66,49 +66,66 @@ def get_docker_diagnostics(docker_client):
 
 def get_docker_images(docker_client):
     output = ""
-    images = docker_client.images.list()
-    for image in images:
-        output += json.dumps(image.attrs, sort_keys=True, indent=4)
-    return output
+    try:
+        images = docker_client.images.list()
+        for image in images:
+            output += json.dumps(image.attrs, sort_keys=True, indent=4)
+        return ("docker-images.txt", output)
+    except docker.errors.APIerror as e:
+        return ("docker-images.err", e.__str__())
 
 
 def get_docker_containers(docker_client):
     container_attrs = ""
     logs = []
-    containers = docker_client.containers.list()
-    for container in containers:
-        container_attrs += json.dumps(container.attrs, sort_keys=True, indent=4)
-        # get docker container logs
-        logs.append((container.name + "/docker.log", container.logs()))
-        logs.append(get_docker_process_status(container))
-        if container.name == "spark": #TODO: find a more robust way to get specific info off specific containers
-            logs.append(get_container_aztk_script(container))
-
-    logs.append(("docker-containers.txt", container_attrs))
-    return logs
+    try:
+        containers = docker_client.containers.list()
+        for container in containers:
+            container_attrs += json.dumps(container.attrs, sort_keys=True, indent=4)
+            # get docker container logs
+            logs.append((container.name + "/docker.log", container.logs()))
+            logs.append(get_docker_process_status(container))
+            if container.name == "spark": #TODO: find a more robust way to get specific info off specific containers
+                logs.append(get_container_aztk_script(container))
+                logs.append(get_spark_logs(container))
+
+        logs.append(("docker-containers.txt", container_attrs))
+        return logs
+    except docker.errors.APIerror as e:
+        return [("docker-containers.err", e.__str__())]
 
 
 def get_docker_process_status(container):
-    exit_code, output = container.exec_run("ps -aux", tty=True, privileged=True)
-    out_file_name = container.name + "/ps_aux.txt"
-    if exit_code == 0:
-        return (out_file_name, output)
-    else:
-        return (out_file_name, "exit_code: {0}\n{1}".format(exit_code, output))
+    try:
+        exit_code, output = container.exec_run("ps -auxw", tty=True, privileged=True)
+        out_file_name = container.name + "/ps_aux.txt"
+        if exit_code == 0:
+            return (out_file_name, output)
+        else:
+            return (out_file_name, "exit_code: {0}\n{1}".format(exit_code, output))
+    except docker.errors.APIerror as e:
+        return (container.name + "ps_aux.err", e.__str__())
 
 
 def get_container_aztk_script(container):
     aztk_path = "/mnt/batch/tasks/startup/wd"
-    stream, _ = container.get_archive(aztk_path) # second item is stat info
+    try:
+        stream, _ = container.get_archive(aztk_path) # second item is stat info
+        data = b''.join([item for item in stream])
+        return (container.name + "/" + "aztk-scripts.tar", data)
+    except docker.errors.APIError as e:
+        return (container.name + "/" + "aztk-scripts.err", e.__str__())
+
+
+def get_spark_logs(container):
+    spark_logs_path = "/home/spark-current/logs"
     data = b''
-    for item in stream:
-        data += item
-    with open("/tmp/aztk-scripts.tar", 'wb') as f:
-        f.write(data)
-    tf = tarfile.open("/tmp/aztk-scripts.tar", 'r')
-    tf.extractall("/tmp/")
-
-    return (container.name + "/" + "aztk-scripts.tar", data)
+    try:
+        stream, _ = container.get_archive(spark_logs_path) # second item is stat info
+        data = b''.join([item for item in stream])
+        return (container.name + "/" + "spark-logs.tar", data)
+    except docker.errors.APIError as e:
+        return (container.name + "/" + "spark-logs.err", e.__str__())
 
 
 if __name__ == "__main__":
diff --git a/aztk/utils/ssh.py b/aztk/utils/ssh.py
index 8cae12bd..3a73407d 100644
--- a/aztk/utils/ssh.py
+++ b/aztk/utils/ssh.py
@@ -67,24 +67,22 @@ async def clus_exec_command(command, username, nodes, ports=None, ssh_key=None,
     )
 
 
-def copy_from_node(source_path, destination_path, username, hostname, port, ssh_key=None, password=None, container_name=None):
+def copy_from_node(node_id, source_path, destination_path, username, hostname, port, ssh_key=None, password=None, container_name=None):
     client = connect(hostname=hostname, port=port, username=username, password=password, pkey=ssh_key)
     sftp_client = client.open_sftp()
-    output = None
     try:
-        destination_path = os.path.join(os.path.dirname(destination_path), str(port), os.path.basename(destination_path))
+        destination_path = os.path.join(os.path.dirname(destination_path), node_id, os.path.basename(destination_path))
         os.makedirs(os.path.dirname(destination_path), exist_ok=True)
         with open(destination_path, 'wb') as f: #SpooledTemporaryFile instead??
             sftp_client.getfo(source_path, f)
+            return f
     except (IOError, PermissionError) as e:
         print(e)
+    finally:
+        sftp_client.close()
+        client.close()
 
-    sftp_client.close()
-
-    client.close()
-    return output
-
-def node_copy(source_path, destination_path, username, hostname, port, ssh_key=None, password=None, container_name=None):
+def node_copy(node_id, source_path, destination_path, username, hostname, port, ssh_key=None, password=None, container_name=None):
     client = connect(hostname=hostname, port=port, username=username, password=password, pkey=ssh_key)
     sftp_client = client.open_sftp()
 
@@ -113,12 +111,13 @@ async def clus_copy(username, nodes, source_path, destination_path, ssh_key=None
     await asyncio.gather(
         *[asyncio.get_event_loop().run_in_executor(ThreadPoolExecutor(),
                                                    copy_from_node if get else node_copy,
+                                                   node.id,
                                                    source_path,
                                                    destination_path,
                                                    username,
-                                                   node.ip_address,
-                                                   node.port,
+                                                   node_rls.ip_address,
+                                                   node_rls.port,
                                                    ssh_key,
                                                    password,
-                                                   container_name) for node in nodes]
+                                                   container_name) for node, node_rls in nodes]
     )

From 9ae6f3190e75bcd980ee9d169e0ea8eb653217cb Mon Sep 17 00:00:00 2001
From: Jake Freck <jafreck@microsoft.com>
Date: Thu, 22 Mar 2018 12:45:45 -0700
Subject: [PATCH 06/21] sort imports

---
 aztk/spark/utils/debug.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/aztk/spark/utils/debug.py b/aztk/spark/utils/debug.py
index 92ebb08a..647ea3d0 100644
--- a/aztk/spark/utils/debug.py
+++ b/aztk/spark/utils/debug.py
@@ -2,13 +2,14 @@
     Diagnostic program that runs on each node in the cluster
     This program must be run with sudo
 """
-import os
 import io
 import json
+import os
 import socket
-from subprocess import check_output, STDOUT, CalledProcessError
-from zipfile import ZipFile, ZIP_DEFLATED
 import tarfile
+from subprocess import STDOUT, CalledProcessError, check_output
+from zipfile import ZIP_DEFLATED, ZipFile
+
 import docker
 
 

From ac4b7065ff024800afb3b94fff71f50b122d1352 Mon Sep 17 00:00:00 2001
From: Jake Freck <jafreck@microsoft.com>
Date: Thu, 22 Mar 2018 13:08:21 -0700
Subject: [PATCH 07/21] start untar

---
 aztk/spark/utils/debug.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/aztk/spark/utils/debug.py b/aztk/spark/utils/debug.py
index 647ea3d0..d6979b2c 100644
--- a/aztk/spark/utils/debug.py
+++ b/aztk/spark/utils/debug.py
@@ -88,7 +88,8 @@ def get_docker_containers(docker_client):
             logs.append(get_docker_process_status(container))
             if container.name == "spark": #TODO: find a more robust way to get specific info off specific containers
                 logs.append(get_container_aztk_script(container))
-                logs.append(get_spark_logs(container))
+                # logs.append(get_spark_logs(container))
+                [logs.append(tup) for tup in get_spark_logs(container)]
 
         logs.append(("docker-containers.txt", container_attrs))
         return logs
@@ -123,7 +124,15 @@ def get_spark_logs(container):
     data = b''
     try:
         stream, _ = container.get_archive(spark_logs_path) # second item is stat info
-        data = b''.join([item for item in stream])
+        data = io.BytesIO(b''.join([item for item in stream]))
+        tarf = tarfile.open(fileobj=data)
+        logs = []
+        for member in tarf.getnames():
+            print("MEMBER:", member)
+            file_bytes = tarf.extractfile(member).read()
+            logs.append(member, file_bytes)
+
+
         return (container.name + "/" + "spark-logs.tar", data)
     except docker.errors.APIError as e:
         return (container.name + "/" + "spark-logs.err", e.__str__())

From 33dd49f50e4585243579f7e907a26f0e5b8c7c9a Mon Sep 17 00:00:00 2001
From: Jake Freck <jafreck@microsoft.com>
Date: Thu, 22 Mar 2018 14:23:01 -0700
Subject: [PATCH 08/21] extract tar

---
 aztk/spark/utils/debug.py | 44 ++++++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 17 deletions(-)

diff --git a/aztk/spark/utils/debug.py b/aztk/spark/utils/debug.py
index d6979b2c..4a074511 100644
--- a/aztk/spark/utils/debug.py
+++ b/aztk/spark/utils/debug.py
@@ -72,7 +72,7 @@ def get_docker_images(docker_client):
         for image in images:
             output += json.dumps(image.attrs, sort_keys=True, indent=4)
         return ("docker-images.txt", output)
-    except docker.errors.APIerror as e:
+    except docker.errors.APIError as e:
         return ("docker-images.err", e.__str__())
 
 
@@ -87,13 +87,12 @@ def get_docker_containers(docker_client):
             logs.append((container.name + "/docker.log", container.logs()))
             logs.append(get_docker_process_status(container))
             if container.name == "spark": #TODO: find a more robust way to get specific info off specific containers
-                logs.append(get_container_aztk_script(container))
-                # logs.append(get_spark_logs(container))
-                [logs.append(tup) for tup in get_spark_logs(container)]
+                logs.extend(get_container_aztk_script(container))
+                logs.extend(get_spark_logs(container))
 
         logs.append(("docker-containers.txt", container_attrs))
         return logs
-    except docker.errors.APIerror as e:
+    except docker.errors.APIError as e:
         return [("docker-containers.err", e.__str__())]
 
 
@@ -105,7 +104,7 @@ def get_docker_process_status(container):
             return (out_file_name, output)
         else:
             return (out_file_name, "exit_code: {0}\n{1}".format(exit_code, output))
-    except docker.errors.APIerror as e:
+    except docker.errors.APIError as e:
         return (container.name + "ps_aux.err", e.__str__())
 
 
@@ -113,8 +112,8 @@ def get_container_aztk_script(container):
     aztk_path = "/mnt/batch/tasks/startup/wd"
     try:
         stream, _ = container.get_archive(aztk_path) # second item is stat info
-        data = b''.join([item for item in stream])
-        return (container.name + "/" + "aztk-scripts.tar", data)
+        data = io.BytesIO(b''.join([item for item in stream]))
+        return extract_tar_in_memory(container, data)
     except docker.errors.APIError as e:
         return (container.name + "/" + "aztk-scripts.err", e.__str__())
 
@@ -125,17 +124,28 @@ def get_spark_logs(container):
     try:
         stream, _ = container.get_archive(spark_logs_path) # second item is stat info
         data = io.BytesIO(b''.join([item for item in stream]))
-        tarf = tarfile.open(fileobj=data)
-        logs = []
-        for member in tarf.getnames():
-            print("MEMBER:", member)
-            file_bytes = tarf.extractfile(member).read()
-            logs.append(member, file_bytes)
+        return extract_tar_in_memory(container, data)
+    except docker.errors.APIError as e:
+        return [(container.name + "/" + "spark-logs.err", e.__str__())]
 
 
-        return (container.name + "/" + "spark-logs.tar", data)
-    except docker.errors.APIError as e:
-        return (container.name + "/" + "spark-logs.err", e.__str__())
+def filter_members(members):
+    skip_files = ["id_rsa", "id_rsa.pub", "docker.log"]
+    skip_extensions = [".pyc", ".zip"]
+    for tarinfo in members:
+        if (os.path.splitext(tarinfo.name)[1] not in skip_extensions and
+                os.path.basename(tarinfo.name) not in skip_files):
+            yield tarinfo
+
+
+def extract_tar_in_memory(container, data):
+    tarf = tarfile.open(fileobj=data)
+    logs = []
+    for member in filter_members(tarf):
+        file_bytes = tarf.extractfile(member)
+        if file_bytes is not None:
+            logs.append((container.name + "/" + member.name, b''.join(file_bytes.readlines())))
+    return logs
 
 
 if __name__ == "__main__":

From c789cc18e6b8515a557a63f12dad68d1aa9d586d Mon Sep 17 00:00:00 2001
From: Jake Freck <jafreck@microsoft.com>
Date: Thu, 22 Mar 2018 14:44:45 -0700
Subject: [PATCH 09/21] add debug.py to pylintc ignore, line too long

---
 aztk/spark/client.py |   3 +-
 pylintrc             | 812 +++++++++++++++++++++----------------------
 2 files changed, 408 insertions(+), 407 deletions(-)

diff --git a/aztk/spark/client.py b/aztk/spark/client.py
index 51431de1..3eb185b5 100644
--- a/aztk/spark/client.py
+++ b/aztk/spark/client.py
@@ -155,7 +155,8 @@ def cluster_run(self, cluster_id: str, command: str, host=False):
 
     def cluster_copy(self, cluster_id: str, source_path: str, destination_path: str, host=False, get=False):
         try:
-            return self.__cluster_copy(cluster_id, source_path, destination_path, container_name='spark' if not host else None, get=get)
+            container_name = None if host else 'spark'
+            return self.__cluster_copy(cluster_id, source_path, destination_path, container_name=container_name, get=get)
         except batch_error.BatchErrorException as e:
             raise error.AztkError(helpers.format_batch_exception(e))
 
diff --git a/pylintrc b/pylintrc
index 71710d66..caa41ba2 100644
--- a/pylintrc
+++ b/pylintrc
@@ -1,406 +1,406 @@
-[MASTER]
-
-# Specify a configuration file.
-#rcfile=
-
-# Python code to execute, usually for sys.path manipulation such as
-# pygtk.require().
-#init-hook=
-
-# Add files or directories to the blacklist. They should be base names, not
-# paths.
-ignore=CVS
-
-# Add files or directories matching the regex patterns to the blacklist. The
-# regex matches against base names, not paths.
-ignore-patterns=
-
-# Pickle collected data for later comparisons.
-persistent=yes
-
-# List of plugins (as comma separated values of python modules names) to load,
-# usually to register additional checkers.
-load-plugins=
-
-# Use multiple processes to speed up Pylint.
-jobs=1
-
-# Allow loading of arbitrary C extensions. Extensions are imported into the
-# active Python interpreter and may run arbitrary code.
-unsafe-load-any-extension=no
-
-# A comma-separated list of package or module names from where C extensions may
-# be loaded. Extensions are loading into the active Python interpreter and may
-# run arbitrary code
-extension-pkg-whitelist=
-
-# Allow optimization of some AST trees. This will activate a peephole AST
-# optimizer, which will apply various small optimizations. For instance, it can
-# be used to obtain the result of joining multiple strings with the addition
-# operator. Joining a lot of strings can lead to a maximum recursion error in
-# Pylint and this flag can prevent that. It has one side effect, the resulting
-# AST will be different than the one from reality. This option is deprecated
-# and it will be removed in Pylint 2.0.
-optimize-ast=no
-
-
-[MESSAGES CONTROL]
-
-# Only show warnings with the listed confidence levels. Leave empty to show
-# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED
-confidence=
-
-# Enable the message, report, category or checker with the given id(s). You can
-# either give multiple identifier separated by comma (,) or put this option
-# multiple time (only on the command line, not in the configuration file where
-# it should appear only once). See also the "--disable" option for examples.
-#enable=
-
-# Disable the message, report, category or checker with the given id(s). You
-# can either give multiple identifiers separated by comma (,) or put this
-# option multiple times (only on the command line, not in the configuration
-# file where it should appear only once).You can also use "--disable=all" to
-# disable everything first and then reenable specific checks. For example, if
-# you want to run only the similarities checker, you can use "--disable=all
-# --enable=similarities". If you want to run only the classes checker, but have
-# no Warning level messages displayed, use"--disable=all --enable=classes
-# --disable=W"
-# disable=print-statement,parameter-unpacking,unpacking-in-except,old-raise-syntax,backtick,import-star-module-level,apply-builtin,basestring-builtin,buffer-builtin,cmp-builtin,coerce-builtin,execfile-builtin,file-builtin,long-builtin,raw_input-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,no-absolute-import,old-division,dict-iter-method,dict-view-method,next-method-called,metaclass-assignment,indexing-exception,raising-string,reload-builtin,oct-method,hex-method,nonzero-method,cmp-method,input-builtin,round-builtin,intern-builtin,unichr-builtin,map-builtin-not-iterating,zip-builtin-not-iterating,range-builtin-not-iterating,filter-builtin-not-iterating,using-cmp-argument,long-suffix,old-ne-operator,old-octal-literal,suppressed-message,useless-suppression
-disable = C0111,W0401,I0011,C0103,E1101,too-few-public-methods
-
-[REPORTS]
-
-# Set the output format. Available formats are text, parseable, colorized, msvs
-# (visual studio) and html. You can also give a reporter class, eg
-# mypackage.mymodule.MyReporterClass.
-output-format=text
-
-# Put messages in a separate file for each module / package specified on the
-# command line instead of printing them on stdout. Reports (if any) will be
-# written in a file name "pylint_global.[txt|html]". This option is deprecated
-# and it will be removed in Pylint 2.0.
-files-output=no
-
-# Tells whether to display a full report or only the messages
-reports=yes
-
-# Python expression which should return a note less than 10 (10 is the highest
-# note). You have access to the variables errors warning, statement which
-# respectively contain the number of errors / warnings messages and the total
-# number of statements analyzed. This is used by the global evaluation report
-# (RP0004).
-evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
-
-# Template used to display messages. This is a python new-style format string
-# used to format the message information. See doc for all details
-#msg-template=
-
-
-[BASIC]
-
-# Good variable names which should always be accepted, separated by a comma
-good-names=i,j,k,ex,Run,_
-
-# Bad variable names which should always be refused, separated by a comma
-bad-names=foo,bar,baz,toto,tutu,tata
-
-# Colon-delimited sets of names that determine each other's naming style when
-# the name regexes allow several styles.
-name-group=
-
-# Include a hint for the correct naming format with invalid-name
-include-naming-hint=no
-
-# List of decorators that produce properties, such as abc.abstractproperty. Add
-# to this list to register other decorators that produce valid properties.
-property-classes=abc.abstractproperty
-
-# Regular expression matching correct module names
-module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
-
-# Naming hint for module names
-module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
-
-# Regular expression matching correct constant names
-const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$
-
-# Naming hint for constant names
-const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$
-
-# Regular expression matching correct class names
-class-rgx=[A-Z_][a-zA-Z0-9]+$
-
-# Naming hint for class names
-class-name-hint=[A-Z_][a-zA-Z0-9]+$
-
-# Regular expression matching correct function names
-function-rgx=[a-z_][a-z0-9_]{2,30}$
-
-# Naming hint for function names
-function-name-hint=[a-z_][a-z0-9_]{2,30}$
-
-# Regular expression matching correct method names
-method-rgx=[a-z_][a-z0-9_]{2,30}$
-
-# Naming hint for method names
-method-name-hint=[a-z_][a-z0-9_]{2,30}$
-
-# Regular expression matching correct attribute names
-attr-rgx=[a-z_][a-z0-9_]{2,30}$
-
-# Naming hint for attribute names
-attr-name-hint=[a-z_][a-z0-9_]{2,30}$
-
-# Regular expression matching correct argument names
-argument-rgx=[a-z_][a-z0-9_]{2,30}$
-
-# Naming hint for argument names
-argument-name-hint=[a-z_][a-z0-9_]{2,30}$
-
-# Regular expression matching correct variable names
-variable-rgx=[a-z_][a-z0-9_]{2,30}$
-
-# Naming hint for variable names
-variable-name-hint=[a-z_][a-z0-9_]{2,30}$
-
-# Regular expression matching correct class attribute names
-class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
-
-# Naming hint for class attribute names
-class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
-
-# Regular expression matching correct inline iteration names
-inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
-
-# Naming hint for inline iteration names
-inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$
-
-# Regular expression which should only match function or class names that do
-# not require a docstring.
-no-docstring-rgx=^_
-
-# Minimum line length for functions/classes that require docstrings, shorter
-# ones are exempt.
-docstring-min-length=-1
-
-[ELIF]
-
-# Maximum number of nested blocks for function / method body
-max-nested-blocks=5
-
-
-[FORMAT]
-
-# Maximum number of characters on a single line.
-max-line-length=120
-
-# Regexp for a line that is allowed to be longer than the limit.
-ignore-long-lines=^\s*(# )?<?https?://\S+>?$
-
-# Allow the body of an if to be on the same line as the test if there is no
-# else.
-single-line-if-stmt=no
-
-# List of optional constructs for which whitespace checking is disabled. `dict-
-# separator` is used to allow tabulation in dicts, etc.: {1  : 1,\n222: 2}.
-# `trailing-comma` allows a space between comma and closing bracket: (a, ).
-# `empty-line` allows space-only lines.
-no-space-check=trailing-comma,dict-separator
-
-# Maximum number of lines in a module
-max-module-lines=1000
-
-# String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
-# tab).
-indent-string='    '
-
-# Number of spaces of indent required inside a hanging  or continued line.
-indent-after-paren=4
-
-# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
-expected-line-ending-format=
-
-
-[LOGGING]
-
-# Logging modules to check that the string format arguments are in logging
-# function parameter format
-logging-modules=logging
-
-
-[MISCELLANEOUS]
-
-# List of note tags to take in consideration, separated by a comma.
-notes=FIXME,XXX,TODO
-
-
-[SIMILARITIES]
-
-# Minimum lines number of a similarity.
-min-similarity-lines=4
-
-# Ignore comments when computing similarities.
-ignore-comments=yes
-
-# Ignore docstrings when computing similarities.
-ignore-docstrings=yes
-
-# Ignore imports when computing similarities.
-ignore-imports=no
-
-
-[SPELLING]
-
-# Spelling dictionary name. Available dictionaries: none. To make it working
-# install python-enchant package.
-spelling-dict=
-
-# List of comma separated words that should not be checked.
-spelling-ignore-words=
-
-# A path to a file that contains private dictionary; one word per line.
-spelling-private-dict-file=
-
-# Tells whether to store unknown words to indicated private dictionary in
-# --spelling-private-dict-file option instead of raising a message.
-spelling-store-unknown-words=no
-
-
-[TYPECHECK]
-
-# Tells whether missing members accessed in mixin class should be ignored. A
-# mixin class is detected if its name ends with "mixin" (case insensitive).
-ignore-mixin-members=yes
-
-# List of module names for which member attributes should not be checked
-# (useful for modules/projects where namespaces are manipulated during runtime
-# and thus existing member attributes cannot be deduced by static analysis. It
-# supports qualified module names, as well as Unix pattern matching.
-ignored-modules=distutils,distutils.dir_util
-
-# List of class names for which member attributes should not be checked (useful
-# for classes with dynamically set attributes). This supports the use of
-# qualified names.
-ignored-classes=optparse.Values,thread._local,_thread._local
-
-# List of members which are set dynamically and missed by pylint inference
-# system, and so shouldn't trigger E1101 when accessed. Python regular
-# expressions are accepted.
-generated-members=
-
-# List of decorators that produce context managers, such as
-# contextlib.contextmanager. Add to this list to register other decorators that
-# produce valid context managers.
-contextmanager-decorators=contextlib.contextmanager
-
-
-[VARIABLES]
-
-# Tells whether we should check for unused import in __init__ files.
-init-import=no
-
-# A regular expression matching the name of dummy variables (i.e. expectedly
-# not used).
-dummy-variables-rgx=(_+[a-zA-Z0-9]*?$)|dummy
-
-# List of additional names supposed to be defined in builtins. Remember that
-# you should avoid to define new builtins when possible.
-additional-builtins=
-
-# List of strings which can identify a callback function by name. A callback
-# name must start or end with one of those strings.
-callbacks=cb_,_cb
-
-# List of qualified module names which can have objects that can redefine
-# builtins.
-redefining-builtins-modules=six.moves,future.builtins
-
-
-[CLASSES]
-
-# List of method names used to declare (i.e. assign) instance attributes.
-defining-attr-methods=__init__,__new__,setUp
-
-# List of valid names for the first argument in a class method.
-valid-classmethod-first-arg=cls
-
-# List of valid names for the first argument in a metaclass class method.
-valid-metaclass-classmethod-first-arg=mcs
-
-# List of member names, which should be excluded from the protected access
-# warning.
-exclude-protected=_asdict,_fields,_replace,_source,_make
-
-
-[DESIGN]
-
-# Maximum number of arguments for function / method
-max-args=5
-
-# Argument names that match this expression will be ignored. Default to name
-# with leading underscore
-ignored-argument-names=_.*
-
-# Maximum number of locals for function / method body
-max-locals=15
-
-# Maximum number of return / yield for function / method body
-max-returns=6
-
-# Maximum number of branch for function / method body
-max-branches=12
-
-# Maximum number of statements in function / method body
-max-statements=50
-
-# Maximum number of parents for a class (see R0901).
-max-parents=7
-
-# Maximum number of attributes for a class (see R0902).
-max-attributes=7
-
-# Minimum number of public methods for a class (see R0903).
-min-public-methods=2
-
-# Maximum number of public methods for a class (see R0904).
-max-public-methods=20
-
-# Maximum number of boolean expressions in a if statement
-max-bool-expr=5
-
-
-[IMPORTS]
-
-# Deprecated modules which should not be used, separated by a comma
-deprecated-modules=optparse
-
-# Create a graph of every (i.e. internal and external) dependencies in the
-# given file (report RP0402 must not be disabled)
-import-graph=
-
-# Create a graph of external dependencies in the given file (report RP0402 must
-# not be disabled)
-ext-import-graph=
-
-# Create a graph of internal dependencies in the given file (report RP0402 must
-# not be disabled)
-int-import-graph=
-
-# Force import order to recognize a module as part of the standard
-# compatibility libraries.
-known-standard-library=
-
-# Force import order to recognize a module as part of a third party library.
-known-third-party=enchant
-
-# Analyse import fallback blocks. This can be used to support both Python 2 and
-# 3 compatible code, which means that the block might have code that exists
-# only in one or another interpreter, leading to false positives when analysed.
-analyse-fallback-blocks=no
-
-
-[EXCEPTIONS]
-
-# Exceptions that will emit a warning when being caught. Defaults to
-# "Exception"
-overgeneral-exceptions=Exception
+[MASTER]
+
+# Specify a configuration file.
+#rcfile=
+
+# Python code to execute, usually for sys.path manipulation such as
+# pygtk.require().
+#init-hook=
+
+# Add files or directories to the blacklist. They should be base names, not
+# paths.
+ignore=CVS,debug.py
+
+# Add files or directories matching the regex patterns to the blacklist. The
+# regex matches against base names, not paths.
+ignore-patterns=
+
+# Pickle collected data for later comparisons.
+persistent=yes
+
+# List of plugins (as comma separated values of python modules names) to load,
+# usually to register additional checkers.
+load-plugins=
+
+# Use multiple processes to speed up Pylint.
+jobs=1
+
+# Allow loading of arbitrary C extensions. Extensions are imported into the
+# active Python interpreter and may run arbitrary code.
+unsafe-load-any-extension=no
+
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code
+extension-pkg-whitelist=
+
+# Allow optimization of some AST trees. This will activate a peephole AST
+# optimizer, which will apply various small optimizations. For instance, it can
+# be used to obtain the result of joining multiple strings with the addition
+# operator. Joining a lot of strings can lead to a maximum recursion error in
+# Pylint and this flag can prevent that. It has one side effect, the resulting
+# AST will be different than the one from reality. This option is deprecated
+# and it will be removed in Pylint 2.0.
+optimize-ast=no
+
+
+[MESSAGES CONTROL]
+
+# Only show warnings with the listed confidence levels. Leave empty to show
+# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED
+confidence=
+
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple time (only on the command line, not in the configuration file where
+# it should appear only once). See also the "--disable" option for examples.
+#enable=
+
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once).You can also use "--disable=all" to
+# disable everything first and then reenable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use"--disable=all --enable=classes
+# --disable=W"
+# disable=print-statement,parameter-unpacking,unpacking-in-except,old-raise-syntax,backtick,import-star-module-level,apply-builtin,basestring-builtin,buffer-builtin,cmp-builtin,coerce-builtin,execfile-builtin,file-builtin,long-builtin,raw_input-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,no-absolute-import,old-division,dict-iter-method,dict-view-method,next-method-called,metaclass-assignment,indexing-exception,raising-string,reload-builtin,oct-method,hex-method,nonzero-method,cmp-method,input-builtin,round-builtin,intern-builtin,unichr-builtin,map-builtin-not-iterating,zip-builtin-not-iterating,range-builtin-not-iterating,filter-builtin-not-iterating,using-cmp-argument,long-suffix,old-ne-operator,old-octal-literal,suppressed-message,useless-suppression
+disable = C0111,W0401,I0011,C0103,E1101,too-few-public-methods
+
+[REPORTS]
+
+# Set the output format. Available formats are text, parseable, colorized, msvs
+# (visual studio) and html. You can also give a reporter class, eg
+# mypackage.mymodule.MyReporterClass.
+output-format=text
+
+# Put messages in a separate file for each module / package specified on the
+# command line instead of printing them on stdout. Reports (if any) will be
+# written in a file name "pylint_global.[txt|html]". This option is deprecated
+# and it will be removed in Pylint 2.0.
+files-output=no
+
+# Tells whether to display a full report or only the messages
+reports=yes
+
+# Python expression which should return a note less than 10 (10 is the highest
+# note). You have access to the variables errors warning, statement which
+# respectively contain the number of errors / warnings messages and the total
+# number of statements analyzed. This is used by the global evaluation report
+# (RP0004).
+evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
+
+# Template used to display messages. This is a python new-style format string
+# used to format the message information. See doc for all details
+#msg-template=
+
+
+[BASIC]
+
+# Good variable names which should always be accepted, separated by a comma
+good-names=i,j,k,ex,Run,_
+
+# Bad variable names which should always be refused, separated by a comma
+bad-names=foo,bar,baz,toto,tutu,tata
+
+# Colon-delimited sets of names that determine each other's naming style when
+# the name regexes allow several styles.
+name-group=
+
+# Include a hint for the correct naming format with invalid-name
+include-naming-hint=no
+
+# List of decorators that produce properties, such as abc.abstractproperty. Add
+# to this list to register other decorators that produce valid properties.
+property-classes=abc.abstractproperty
+
+# Regular expression matching correct module names
+module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
+
+# Naming hint for module names
+module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
+
+# Regular expression matching correct constant names
+const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$
+
+# Naming hint for constant names
+const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$
+
+# Regular expression matching correct class names
+class-rgx=[A-Z_][a-zA-Z0-9]+$
+
+# Naming hint for class names
+class-name-hint=[A-Z_][a-zA-Z0-9]+$
+
+# Regular expression matching correct function names
+function-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Naming hint for function names
+function-name-hint=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression matching correct method names
+method-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Naming hint for method names
+method-name-hint=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression matching correct attribute names
+attr-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Naming hint for attribute names
+attr-name-hint=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression matching correct argument names
+argument-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Naming hint for argument names
+argument-name-hint=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression matching correct variable names
+variable-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Naming hint for variable names
+variable-name-hint=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression matching correct class attribute names
+class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
+
+# Naming hint for class attribute names
+class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
+
+# Regular expression matching correct inline iteration names
+inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
+
+# Naming hint for inline iteration names
+inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$
+
+# Regular expression which should only match function or class names that do
+# not require a docstring.
+no-docstring-rgx=^_
+
+# Minimum line length for functions/classes that require docstrings, shorter
+# ones are exempt.
+docstring-min-length=-1
+
+[ELIF]
+
+# Maximum number of nested blocks for function / method body
+max-nested-blocks=5
+
+
+[FORMAT]
+
+# Maximum number of characters on a single line.
+max-line-length=120
+
+# Regexp for a line that is allowed to be longer than the limit.
+ignore-long-lines=^\s*(# )?<?https?://\S+>?$
+
+# Allow the body of an if to be on the same line as the test if there is no
+# else.
+single-line-if-stmt=no
+
+# List of optional constructs for which whitespace checking is disabled. `dict-
+# separator` is used to allow tabulation in dicts, etc.: {1  : 1,\n222: 2}.
+# `trailing-comma` allows a space between comma and closing bracket: (a, ).
+# `empty-line` allows space-only lines.
+no-space-check=trailing-comma,dict-separator
+
+# Maximum number of lines in a module
+max-module-lines=1000
+
+# String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
+# tab).
+indent-string='    '
+
+# Number of spaces of indent required inside a hanging  or continued line.
+indent-after-paren=4
+
+# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
+expected-line-ending-format=
+
+
+[LOGGING]
+
+# Logging modules to check that the string format arguments are in logging
+# function parameter format
+logging-modules=logging
+
+
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,XXX,TODO
+
+
+[SIMILARITIES]
+
+# Minimum lines number of a similarity.
+min-similarity-lines=4
+
+# Ignore comments when computing similarities.
+ignore-comments=yes
+
+# Ignore docstrings when computing similarities.
+ignore-docstrings=yes
+
+# Ignore imports when computing similarities.
+ignore-imports=no
+
+
+[SPELLING]
+
+# Spelling dictionary name. Available dictionaries: none. To make it working
+# install python-enchant package.
+spelling-dict=
+
+# List of comma separated words that should not be checked.
+spelling-ignore-words=
+
+# A path to a file that contains private dictionary; one word per line.
+spelling-private-dict-file=
+
+# Tells whether to store unknown words to indicated private dictionary in
+# --spelling-private-dict-file option instead of raising a message.
+spelling-store-unknown-words=no
+
+
+[TYPECHECK]
+
+# Tells whether missing members accessed in mixin class should be ignored. A
+# mixin class is detected if its name ends with "mixin" (case insensitive).
+ignore-mixin-members=yes
+
+# List of module names for which member attributes should not be checked
+# (useful for modules/projects where namespaces are manipulated during runtime
+# and thus existing member attributes cannot be deduced by static analysis. It
+# supports qualified module names, as well as Unix pattern matching.
+ignored-modules=distutils,distutils.dir_util
+
+# List of class names for which member attributes should not be checked (useful
+# for classes with dynamically set attributes). This supports the use of
+# qualified names.
+ignored-classes=optparse.Values,thread._local,_thread._local
+
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E1101 when accessed. Python regular
+# expressions are accepted.
+generated-members=
+
+# List of decorators that produce context managers, such as
+# contextlib.contextmanager. Add to this list to register other decorators that
+# produce valid context managers.
+contextmanager-decorators=contextlib.contextmanager
+
+
+[VARIABLES]
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# A regular expression matching the name of dummy variables (i.e. expectedly
+# not used).
+dummy-variables-rgx=(_+[a-zA-Z0-9]*?$)|dummy
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid to define new builtins when possible.
+additional-builtins=
+
+# List of strings which can identify a callback function by name. A callback
+# name must start or end with one of those strings.
+callbacks=cb_,_cb
+
+# List of qualified module names which can have objects that can redefine
+# builtins.
+redefining-builtins-modules=six.moves,future.builtins
+
+
+[CLASSES]
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,__new__,setUp
+
+# List of valid names for the first argument in a class method.
+valid-classmethod-first-arg=cls
+
+# List of valid names for the first argument in a metaclass class method.
+valid-metaclass-classmethod-first-arg=mcs
+
+# List of member names, which should be excluded from the protected access
+# warning.
+exclude-protected=_asdict,_fields,_replace,_source,_make
+
+
+[DESIGN]
+
+# Maximum number of arguments for function / method
+max-args=5
+
+# Argument names that match this expression will be ignored. Default to name
+# with leading underscore
+ignored-argument-names=_.*
+
+# Maximum number of locals for function / method body
+max-locals=15
+
+# Maximum number of return / yield for function / method body
+max-returns=6
+
+# Maximum number of branch for function / method body
+max-branches=12
+
+# Maximum number of statements in function / method body
+max-statements=50
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=7
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=2
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+# Maximum number of boolean expressions in a if statement
+max-bool-expr=5
+
+
+[IMPORTS]
+
+# Deprecated modules which should not be used, separated by a comma
+deprecated-modules=optparse
+
+# Create a graph of every (i.e. internal and external) dependencies in the
+# given file (report RP0402 must not be disabled)
+import-graph=
+
+# Create a graph of external dependencies in the given file (report RP0402 must
+# not be disabled)
+ext-import-graph=
+
+# Create a graph of internal dependencies in the given file (report RP0402 must
+# not be disabled)
+int-import-graph=
+
+# Force import order to recognize a module as part of the standard
+# compatibility libraries.
+known-standard-library=
+
+# Force import order to recognize a module as part of a third party library.
+known-third-party=enchant
+
+# Analyse import fallback blocks. This can be used to support both Python 2 and
+# 3 compatible code, which means that the block might have code that exists
+# only in one or another interpreter, leading to false positives when analysed.
+analyse-fallback-blocks=no
+
+
+[EXCEPTIONS]
+
+# Exceptions that will emit a warning when being caught. Defaults to
+# "Exception"
+overgeneral-exceptions=Exception

From 0d35286d6e65e99934b25cd1d8d67225d04042d4 Mon Sep 17 00:00:00 2001
From: Jake Freck <jafreck@microsoft.com>
Date: Thu, 22 Mar 2018 14:50:41 -0700
Subject: [PATCH 10/21] crlf->lf

---
 pylintrc | 812 +++++++++++++++++++++++++++----------------------------
 1 file changed, 406 insertions(+), 406 deletions(-)

diff --git a/pylintrc b/pylintrc
index caa41ba2..75f78ddb 100644
--- a/pylintrc
+++ b/pylintrc
@@ -1,406 +1,406 @@
-[MASTER]
-
-# Specify a configuration file.
-#rcfile=
-
-# Python code to execute, usually for sys.path manipulation such as
-# pygtk.require().
-#init-hook=
-
-# Add files or directories to the blacklist. They should be base names, not
-# paths.
-ignore=CVS,debug.py
-
-# Add files or directories matching the regex patterns to the blacklist. The
-# regex matches against base names, not paths.
-ignore-patterns=
-
-# Pickle collected data for later comparisons.
-persistent=yes
-
-# List of plugins (as comma separated values of python modules names) to load,
-# usually to register additional checkers.
-load-plugins=
-
-# Use multiple processes to speed up Pylint.
-jobs=1
-
-# Allow loading of arbitrary C extensions. Extensions are imported into the
-# active Python interpreter and may run arbitrary code.
-unsafe-load-any-extension=no
-
-# A comma-separated list of package or module names from where C extensions may
-# be loaded. Extensions are loading into the active Python interpreter and may
-# run arbitrary code
-extension-pkg-whitelist=
-
-# Allow optimization of some AST trees. This will activate a peephole AST
-# optimizer, which will apply various small optimizations. For instance, it can
-# be used to obtain the result of joining multiple strings with the addition
-# operator. Joining a lot of strings can lead to a maximum recursion error in
-# Pylint and this flag can prevent that. It has one side effect, the resulting
-# AST will be different than the one from reality. This option is deprecated
-# and it will be removed in Pylint 2.0.
-optimize-ast=no
-
-
-[MESSAGES CONTROL]
-
-# Only show warnings with the listed confidence levels. Leave empty to show
-# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED
-confidence=
-
-# Enable the message, report, category or checker with the given id(s). You can
-# either give multiple identifier separated by comma (,) or put this option
-# multiple time (only on the command line, not in the configuration file where
-# it should appear only once). See also the "--disable" option for examples.
-#enable=
-
-# Disable the message, report, category or checker with the given id(s). You
-# can either give multiple identifiers separated by comma (,) or put this
-# option multiple times (only on the command line, not in the configuration
-# file where it should appear only once).You can also use "--disable=all" to
-# disable everything first and then reenable specific checks. For example, if
-# you want to run only the similarities checker, you can use "--disable=all
-# --enable=similarities". If you want to run only the classes checker, but have
-# no Warning level messages displayed, use"--disable=all --enable=classes
-# --disable=W"
-# disable=print-statement,parameter-unpacking,unpacking-in-except,old-raise-syntax,backtick,import-star-module-level,apply-builtin,basestring-builtin,buffer-builtin,cmp-builtin,coerce-builtin,execfile-builtin,file-builtin,long-builtin,raw_input-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,no-absolute-import,old-division,dict-iter-method,dict-view-method,next-method-called,metaclass-assignment,indexing-exception,raising-string,reload-builtin,oct-method,hex-method,nonzero-method,cmp-method,input-builtin,round-builtin,intern-builtin,unichr-builtin,map-builtin-not-iterating,zip-builtin-not-iterating,range-builtin-not-iterating,filter-builtin-not-iterating,using-cmp-argument,long-suffix,old-ne-operator,old-octal-literal,suppressed-message,useless-suppression
-disable = C0111,W0401,I0011,C0103,E1101,too-few-public-methods
-
-[REPORTS]
-
-# Set the output format. Available formats are text, parseable, colorized, msvs
-# (visual studio) and html. You can also give a reporter class, eg
-# mypackage.mymodule.MyReporterClass.
-output-format=text
-
-# Put messages in a separate file for each module / package specified on the
-# command line instead of printing them on stdout. Reports (if any) will be
-# written in a file name "pylint_global.[txt|html]". This option is deprecated
-# and it will be removed in Pylint 2.0.
-files-output=no
-
-# Tells whether to display a full report or only the messages
-reports=yes
-
-# Python expression which should return a note less than 10 (10 is the highest
-# note). You have access to the variables errors warning, statement which
-# respectively contain the number of errors / warnings messages and the total
-# number of statements analyzed. This is used by the global evaluation report
-# (RP0004).
-evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
-
-# Template used to display messages. This is a python new-style format string
-# used to format the message information. See doc for all details
-#msg-template=
-
-
-[BASIC]
-
-# Good variable names which should always be accepted, separated by a comma
-good-names=i,j,k,ex,Run,_
-
-# Bad variable names which should always be refused, separated by a comma
-bad-names=foo,bar,baz,toto,tutu,tata
-
-# Colon-delimited sets of names that determine each other's naming style when
-# the name regexes allow several styles.
-name-group=
-
-# Include a hint for the correct naming format with invalid-name
-include-naming-hint=no
-
-# List of decorators that produce properties, such as abc.abstractproperty. Add
-# to this list to register other decorators that produce valid properties.
-property-classes=abc.abstractproperty
-
-# Regular expression matching correct module names
-module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
-
-# Naming hint for module names
-module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
-
-# Regular expression matching correct constant names
-const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$
-
-# Naming hint for constant names
-const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$
-
-# Regular expression matching correct class names
-class-rgx=[A-Z_][a-zA-Z0-9]+$
-
-# Naming hint for class names
-class-name-hint=[A-Z_][a-zA-Z0-9]+$
-
-# Regular expression matching correct function names
-function-rgx=[a-z_][a-z0-9_]{2,30}$
-
-# Naming hint for function names
-function-name-hint=[a-z_][a-z0-9_]{2,30}$
-
-# Regular expression matching correct method names
-method-rgx=[a-z_][a-z0-9_]{2,30}$
-
-# Naming hint for method names
-method-name-hint=[a-z_][a-z0-9_]{2,30}$
-
-# Regular expression matching correct attribute names
-attr-rgx=[a-z_][a-z0-9_]{2,30}$
-
-# Naming hint for attribute names
-attr-name-hint=[a-z_][a-z0-9_]{2,30}$
-
-# Regular expression matching correct argument names
-argument-rgx=[a-z_][a-z0-9_]{2,30}$
-
-# Naming hint for argument names
-argument-name-hint=[a-z_][a-z0-9_]{2,30}$
-
-# Regular expression matching correct variable names
-variable-rgx=[a-z_][a-z0-9_]{2,30}$
-
-# Naming hint for variable names
-variable-name-hint=[a-z_][a-z0-9_]{2,30}$
-
-# Regular expression matching correct class attribute names
-class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
-
-# Naming hint for class attribute names
-class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
-
-# Regular expression matching correct inline iteration names
-inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
-
-# Naming hint for inline iteration names
-inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$
-
-# Regular expression which should only match function or class names that do
-# not require a docstring.
-no-docstring-rgx=^_
-
-# Minimum line length for functions/classes that require docstrings, shorter
-# ones are exempt.
-docstring-min-length=-1
-
-[ELIF]
-
-# Maximum number of nested blocks for function / method body
-max-nested-blocks=5
-
-
-[FORMAT]
-
-# Maximum number of characters on a single line.
-max-line-length=120
-
-# Regexp for a line that is allowed to be longer than the limit.
-ignore-long-lines=^\s*(# )?<?https?://\S+>?$
-
-# Allow the body of an if to be on the same line as the test if there is no
-# else.
-single-line-if-stmt=no
-
-# List of optional constructs for which whitespace checking is disabled. `dict-
-# separator` is used to allow tabulation in dicts, etc.: {1  : 1,\n222: 2}.
-# `trailing-comma` allows a space between comma and closing bracket: (a, ).
-# `empty-line` allows space-only lines.
-no-space-check=trailing-comma,dict-separator
-
-# Maximum number of lines in a module
-max-module-lines=1000
-
-# String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
-# tab).
-indent-string='    '
-
-# Number of spaces of indent required inside a hanging  or continued line.
-indent-after-paren=4
-
-# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
-expected-line-ending-format=
-
-
-[LOGGING]
-
-# Logging modules to check that the string format arguments are in logging
-# function parameter format
-logging-modules=logging
-
-
-[MISCELLANEOUS]
-
-# List of note tags to take in consideration, separated by a comma.
-notes=FIXME,XXX,TODO
-
-
-[SIMILARITIES]
-
-# Minimum lines number of a similarity.
-min-similarity-lines=4
-
-# Ignore comments when computing similarities.
-ignore-comments=yes
-
-# Ignore docstrings when computing similarities.
-ignore-docstrings=yes
-
-# Ignore imports when computing similarities.
-ignore-imports=no
-
-
-[SPELLING]
-
-# Spelling dictionary name. Available dictionaries: none. To make it working
-# install python-enchant package.
-spelling-dict=
-
-# List of comma separated words that should not be checked.
-spelling-ignore-words=
-
-# A path to a file that contains private dictionary; one word per line.
-spelling-private-dict-file=
-
-# Tells whether to store unknown words to indicated private dictionary in
-# --spelling-private-dict-file option instead of raising a message.
-spelling-store-unknown-words=no
-
-
-[TYPECHECK]
-
-# Tells whether missing members accessed in mixin class should be ignored. A
-# mixin class is detected if its name ends with "mixin" (case insensitive).
-ignore-mixin-members=yes
-
-# List of module names for which member attributes should not be checked
-# (useful for modules/projects where namespaces are manipulated during runtime
-# and thus existing member attributes cannot be deduced by static analysis. It
-# supports qualified module names, as well as Unix pattern matching.
-ignored-modules=distutils,distutils.dir_util
-
-# List of class names for which member attributes should not be checked (useful
-# for classes with dynamically set attributes). This supports the use of
-# qualified names.
-ignored-classes=optparse.Values,thread._local,_thread._local
-
-# List of members which are set dynamically and missed by pylint inference
-# system, and so shouldn't trigger E1101 when accessed. Python regular
-# expressions are accepted.
-generated-members=
-
-# List of decorators that produce context managers, such as
-# contextlib.contextmanager. Add to this list to register other decorators that
-# produce valid context managers.
-contextmanager-decorators=contextlib.contextmanager
-
-
-[VARIABLES]
-
-# Tells whether we should check for unused import in __init__ files.
-init-import=no
-
-# A regular expression matching the name of dummy variables (i.e. expectedly
-# not used).
-dummy-variables-rgx=(_+[a-zA-Z0-9]*?$)|dummy
-
-# List of additional names supposed to be defined in builtins. Remember that
-# you should avoid to define new builtins when possible.
-additional-builtins=
-
-# List of strings which can identify a callback function by name. A callback
-# name must start or end with one of those strings.
-callbacks=cb_,_cb
-
-# List of qualified module names which can have objects that can redefine
-# builtins.
-redefining-builtins-modules=six.moves,future.builtins
-
-
-[CLASSES]
-
-# List of method names used to declare (i.e. assign) instance attributes.
-defining-attr-methods=__init__,__new__,setUp
-
-# List of valid names for the first argument in a class method.
-valid-classmethod-first-arg=cls
-
-# List of valid names for the first argument in a metaclass class method.
-valid-metaclass-classmethod-first-arg=mcs
-
-# List of member names, which should be excluded from the protected access
-# warning.
-exclude-protected=_asdict,_fields,_replace,_source,_make
-
-
-[DESIGN]
-
-# Maximum number of arguments for function / method
-max-args=5
-
-# Argument names that match this expression will be ignored. Default to name
-# with leading underscore
-ignored-argument-names=_.*
-
-# Maximum number of locals for function / method body
-max-locals=15
-
-# Maximum number of return / yield for function / method body
-max-returns=6
-
-# Maximum number of branch for function / method body
-max-branches=12
-
-# Maximum number of statements in function / method body
-max-statements=50
-
-# Maximum number of parents for a class (see R0901).
-max-parents=7
-
-# Maximum number of attributes for a class (see R0902).
-max-attributes=7
-
-# Minimum number of public methods for a class (see R0903).
-min-public-methods=2
-
-# Maximum number of public methods for a class (see R0904).
-max-public-methods=20
-
-# Maximum number of boolean expressions in a if statement
-max-bool-expr=5
-
-
-[IMPORTS]
-
-# Deprecated modules which should not be used, separated by a comma
-deprecated-modules=optparse
-
-# Create a graph of every (i.e. internal and external) dependencies in the
-# given file (report RP0402 must not be disabled)
-import-graph=
-
-# Create a graph of external dependencies in the given file (report RP0402 must
-# not be disabled)
-ext-import-graph=
-
-# Create a graph of internal dependencies in the given file (report RP0402 must
-# not be disabled)
-int-import-graph=
-
-# Force import order to recognize a module as part of the standard
-# compatibility libraries.
-known-standard-library=
-
-# Force import order to recognize a module as part of a third party library.
-known-third-party=enchant
-
-# Analyse import fallback blocks. This can be used to support both Python 2 and
-# 3 compatible code, which means that the block might have code that exists
-# only in one or another interpreter, leading to false positives when analysed.
-analyse-fallback-blocks=no
-
-
-[EXCEPTIONS]
-
-# Exceptions that will emit a warning when being caught. Defaults to
-# "Exception"
-overgeneral-exceptions=Exception
+[MASTER]
+
+# Specify a configuration file.
+#rcfile=
+
+# Python code to execute, usually for sys.path manipulation such as
+# pygtk.require().
+#init-hook=
+
+# Add files or directories to the blacklist. They should be base names, not
+# paths.
+ignore=CVS,debug.py
+
+# Add files or directories matching the regex patterns to the blacklist. The
+# regex matches against base names, not paths.
+ignore-patterns=
+
+# Pickle collected data for later comparisons.
+persistent=yes
+
+# List of plugins (as comma separated values of python modules names) to load,
+# usually to register additional checkers.
+load-plugins=
+
+# Use multiple processes to speed up Pylint.
+jobs=1
+
+# Allow loading of arbitrary C extensions. Extensions are imported into the
+# active Python interpreter and may run arbitrary code.
+unsafe-load-any-extension=no
+
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code
+extension-pkg-whitelist=
+
+# Allow optimization of some AST trees. This will activate a peephole AST
+# optimizer, which will apply various small optimizations. For instance, it can
+# be used to obtain the result of joining multiple strings with the addition
+# operator. Joining a lot of strings can lead to a maximum recursion error in
+# Pylint and this flag can prevent that. It has one side effect, the resulting
+# AST will be different than the one from reality. This option is deprecated
+# and it will be removed in Pylint 2.0.
+optimize-ast=no
+
+
+[MESSAGES CONTROL]
+
+# Only show warnings with the listed confidence levels. Leave empty to show
+# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED
+confidence=
+
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple time (only on the command line, not in the configuration file where
+# it should appear only once). See also the "--disable" option for examples.
+#enable=
+
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once).You can also use "--disable=all" to
+# disable everything first and then reenable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use"--disable=all --enable=classes
+# --disable=W"
+# disable=print-statement,parameter-unpacking,unpacking-in-except,old-raise-syntax,backtick,import-star-module-level,apply-builtin,basestring-builtin,buffer-builtin,cmp-builtin,coerce-builtin,execfile-builtin,file-builtin,long-builtin,raw_input-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,no-absolute-import,old-division,dict-iter-method,dict-view-method,next-method-called,metaclass-assignment,indexing-exception,raising-string,reload-builtin,oct-method,hex-method,nonzero-method,cmp-method,input-builtin,round-builtin,intern-builtin,unichr-builtin,map-builtin-not-iterating,zip-builtin-not-iterating,range-builtin-not-iterating,filter-builtin-not-iterating,using-cmp-argument,long-suffix,old-ne-operator,old-octal-literal,suppressed-message,useless-suppression
+disable = C0111,W0401,I0011,C0103,E1101,too-few-public-methods
+
+[REPORTS]
+
+# Set the output format. Available formats are text, parseable, colorized, msvs
+# (visual studio) and html. You can also give a reporter class, eg
+# mypackage.mymodule.MyReporterClass.
+output-format=text
+
+# Put messages in a separate file for each module / package specified on the
+# command line instead of printing them on stdout. Reports (if any) will be
+# written in a file name "pylint_global.[txt|html]". This option is deprecated
+# and it will be removed in Pylint 2.0.
+files-output=no
+
+# Tells whether to display a full report or only the messages
+reports=yes
+
+# Python expression which should return a note less than 10 (10 is the highest
+# note). You have access to the variables errors warning, statement which
+# respectively contain the number of errors / warnings messages and the total
+# number of statements analyzed. This is used by the global evaluation report
+# (RP0004).
+evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
+
+# Template used to display messages. This is a python new-style format string
+# used to format the message information. See doc for all details
+#msg-template=
+
+
+[BASIC]
+
+# Good variable names which should always be accepted, separated by a comma
+good-names=i,j,k,ex,Run,_
+
+# Bad variable names which should always be refused, separated by a comma
+bad-names=foo,bar,baz,toto,tutu,tata
+
+# Colon-delimited sets of names that determine each other's naming style when
+# the name regexes allow several styles.
+name-group=
+
+# Include a hint for the correct naming format with invalid-name
+include-naming-hint=no
+
+# List of decorators that produce properties, such as abc.abstractproperty. Add
+# to this list to register other decorators that produce valid properties.
+property-classes=abc.abstractproperty
+
+# Regular expression matching correct module names
+module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
+
+# Naming hint for module names
+module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
+
+# Regular expression matching correct constant names
+const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$
+
+# Naming hint for constant names
+const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$
+
+# Regular expression matching correct class names
+class-rgx=[A-Z_][a-zA-Z0-9]+$
+
+# Naming hint for class names
+class-name-hint=[A-Z_][a-zA-Z0-9]+$
+
+# Regular expression matching correct function names
+function-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Naming hint for function names
+function-name-hint=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression matching correct method names
+method-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Naming hint for method names
+method-name-hint=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression matching correct attribute names
+attr-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Naming hint for attribute names
+attr-name-hint=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression matching correct argument names
+argument-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Naming hint for argument names
+argument-name-hint=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression matching correct variable names
+variable-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Naming hint for variable names
+variable-name-hint=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression matching correct class attribute names
+class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
+
+# Naming hint for class attribute names
+class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
+
+# Regular expression matching correct inline iteration names
+inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
+
+# Naming hint for inline iteration names
+inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$
+
+# Regular expression which should only match function or class names that do
+# not require a docstring.
+no-docstring-rgx=^_
+
+# Minimum line length for functions/classes that require docstrings, shorter
+# ones are exempt.
+docstring-min-length=-1
+
+[ELIF]
+
+# Maximum number of nested blocks for function / method body
+max-nested-blocks=5
+
+
+[FORMAT]
+
+# Maximum number of characters on a single line.
+max-line-length=120
+
+# Regexp for a line that is allowed to be longer than the limit.
+ignore-long-lines=^\s*(# )?<?https?://\S+>?$
+
+# Allow the body of an if to be on the same line as the test if there is no
+# else.
+single-line-if-stmt=no
+
+# List of optional constructs for which whitespace checking is disabled. `dict-
+# separator` is used to allow tabulation in dicts, etc.: {1  : 1,\n222: 2}.
+# `trailing-comma` allows a space between comma and closing bracket: (a, ).
+# `empty-line` allows space-only lines.
+no-space-check=trailing-comma,dict-separator
+
+# Maximum number of lines in a module
+max-module-lines=1000
+
+# String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
+# tab).
+indent-string='    '
+
+# Number of spaces of indent required inside a hanging  or continued line.
+indent-after-paren=4
+
+# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
+expected-line-ending-format=
+
+
+[LOGGING]
+
+# Logging modules to check that the string format arguments are in logging
+# function parameter format
+logging-modules=logging
+
+
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,XXX,TODO
+
+
+[SIMILARITIES]
+
+# Minimum lines number of a similarity.
+min-similarity-lines=4
+
+# Ignore comments when computing similarities.
+ignore-comments=yes
+
+# Ignore docstrings when computing similarities.
+ignore-docstrings=yes
+
+# Ignore imports when computing similarities.
+ignore-imports=no
+
+
+[SPELLING]
+
+# Spelling dictionary name. Available dictionaries: none. To make it working
+# install python-enchant package.
+spelling-dict=
+
+# List of comma separated words that should not be checked.
+spelling-ignore-words=
+
+# A path to a file that contains private dictionary; one word per line.
+spelling-private-dict-file=
+
+# Tells whether to store unknown words to indicated private dictionary in
+# --spelling-private-dict-file option instead of raising a message.
+spelling-store-unknown-words=no
+
+
+[TYPECHECK]
+
+# Tells whether missing members accessed in mixin class should be ignored. A
+# mixin class is detected if its name ends with "mixin" (case insensitive).
+ignore-mixin-members=yes
+
+# List of module names for which member attributes should not be checked
+# (useful for modules/projects where namespaces are manipulated during runtime
+# and thus existing member attributes cannot be deduced by static analysis. It
+# supports qualified module names, as well as Unix pattern matching.
+ignored-modules=distutils,distutils.dir_util
+
+# List of class names for which member attributes should not be checked (useful
+# for classes with dynamically set attributes). This supports the use of
+# qualified names.
+ignored-classes=optparse.Values,thread._local,_thread._local
+
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E1101 when accessed. Python regular
+# expressions are accepted.
+generated-members=
+
+# List of decorators that produce context managers, such as
+# contextlib.contextmanager. Add to this list to register other decorators that
+# produce valid context managers.
+contextmanager-decorators=contextlib.contextmanager
+
+
+[VARIABLES]
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# A regular expression matching the name of dummy variables (i.e. expectedly
+# not used).
+dummy-variables-rgx=(_+[a-zA-Z0-9]*?$)|dummy
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid to define new builtins when possible.
+additional-builtins=
+
+# List of strings which can identify a callback function by name. A callback
+# name must start or end with one of those strings.
+callbacks=cb_,_cb
+
+# List of qualified module names which can have objects that can redefine
+# builtins.
+redefining-builtins-modules=six.moves,future.builtins
+
+
+[CLASSES]
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,__new__,setUp
+
+# List of valid names for the first argument in a class method.
+valid-classmethod-first-arg=cls
+
+# List of valid names for the first argument in a metaclass class method.
+valid-metaclass-classmethod-first-arg=mcs
+
+# List of member names, which should be excluded from the protected access
+# warning.
+exclude-protected=_asdict,_fields,_replace,_source,_make
+
+
+[DESIGN]
+
+# Maximum number of arguments for function / method
+max-args=5
+
+# Argument names that match this expression will be ignored. Default to name
+# with leading underscore
+ignored-argument-names=_.*
+
+# Maximum number of locals for function / method body
+max-locals=15
+
+# Maximum number of return / yield for function / method body
+max-returns=6
+
+# Maximum number of branch for function / method body
+max-branches=12
+
+# Maximum number of statements in function / method body
+max-statements=50
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=7
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=2
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+# Maximum number of boolean expressions in a if statement
+max-bool-expr=5
+
+
+[IMPORTS]
+
+# Deprecated modules which should not be used, separated by a comma
+deprecated-modules=optparse
+
+# Create a graph of every (i.e. internal and external) dependencies in the
+# given file (report RP0402 must not be disabled)
+import-graph=
+
+# Create a graph of external dependencies in the given file (report RP0402 must
+# not be disabled)
+ext-import-graph=
+
+# Create a graph of internal dependencies in the given file (report RP0402 must
+# not be disabled)
+int-import-graph=
+
+# Force import order to recognize a module as part of the standard
+# compatibility libraries.
+known-standard-library=
+
+# Force import order to recognize a module as part of a third party library.
+known-third-party=enchant
+
+# Analyse import fallback blocks. This can be used to support both Python 2 and
+# 3 compatible code, which means that the block might have code that exists
+# only in one or another interpreter, leading to false positives when analysed.
+analyse-fallback-blocks=no
+
+
+[EXCEPTIONS]
+
+# Exceptions that will emit a warning when being caught. Defaults to
+# "Exception"
+overgeneral-exceptions=Exception

From 27fc88305ace160e9f0ff067667231fa05fdb741 Mon Sep 17 00:00:00 2001
From: Jake Freck <jafreck@microsoft.com>
Date: Mon, 26 Mar 2018 11:59:32 -0700
Subject: [PATCH 11/21] add app logs

---
 aztk/spark/helpers/cluster_diagnostic_helper.py |  2 +-
 aztk/spark/utils/debug.py                       | 17 ++++++++++++-----
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/aztk/spark/helpers/cluster_diagnostic_helper.py b/aztk/spark/helpers/cluster_diagnostic_helper.py
index a9725354..75776c8a 100644
--- a/aztk/spark/helpers/cluster_diagnostic_helper.py
+++ b/aztk/spark/helpers/cluster_diagnostic_helper.py
@@ -13,7 +13,7 @@ def run(spark_client, cluster_id, output_directory):
     remote_path = "/tmp/debug.zip"
     output = spark_client.cluster_copy(cluster_id, remote_path, local_path, host=True, get=True)
     # write run output to debug/ directory
-    with open(os.path.join(os.path.dirname(local_path), "debug-output.txt"), 'w') as f:
+    with open(os.path.join(os.path.dirname(local_path), "debug-output.txt"), 'w', encoding="UTF-8") as f:
         [f.write(line + '\n') for node_output in run_output for line in node_output]
     return output
 
diff --git a/aztk/spark/utils/debug.py b/aztk/spark/utils/debug.py
index 4a074511..9c2a0c1b 100644
--- a/aztk/spark/utils/debug.py
+++ b/aztk/spark/utils/debug.py
@@ -112,23 +112,29 @@ def get_container_aztk_script(container):
     aztk_path = "/mnt/batch/tasks/startup/wd"
     try:
         stream, _ = container.get_archive(aztk_path) # second item is stat info
-        data = io.BytesIO(b''.join([item for item in stream]))
-        return extract_tar_in_memory(container, data)
+        return extract_tar_in_memory(container, stream)
     except docker.errors.APIError as e:
         return (container.name + "/" + "aztk-scripts.err", e.__str__())
 
 
 def get_spark_logs(container):
     spark_logs_path = "/home/spark-current/logs"
-    data = b''
     try:
         stream, _ = container.get_archive(spark_logs_path) # second item is stat info
-        data = io.BytesIO(b''.join([item for item in stream]))
-        return extract_tar_in_memory(container, data)
+        return extract_tar_in_memory(container, stream)
     except docker.errors.APIError as e:
         return [(container.name + "/" + "spark-logs.err", e.__str__())]
 
 
+def get_spark_app_logs(contianer):
+    spark_app_logs_path = "/home/spark-current/work"
+    try:
+        stream, _ = container.get_archive(spark_logs_path)
+        return extract_tar_in_memory(container, stream)
+    except docker.errors.APIError as e:
+        return [(container.name + "/" + "spark-work-logs.err", e.__str__())]
+
+
 def filter_members(members):
     skip_files = ["id_rsa", "id_rsa.pub", "docker.log"]
     skip_extensions = [".pyc", ".zip"]
@@ -139,6 +145,7 @@ def filter_members(members):
 
 
 def extract_tar_in_memory(container, data):
+    data = io.BytesIO(b''.join([item for item in data]))
     tarf = tarfile.open(fileobj=data)
     logs = []
     for member in filter_members(tarf):

From c2cbc4c6cbb13bb459679ce935818f2d4f50fecd Mon Sep 17 00:00:00 2001
From: Jake Freck <jafreck@microsoft.com>
Date: Mon, 26 Mar 2018 12:05:39 -0700
Subject: [PATCH 12/21] call get_spark_app_logs, typos

---
 aztk/spark/utils/debug.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/aztk/spark/utils/debug.py b/aztk/spark/utils/debug.py
index 9c2a0c1b..654a8c15 100644
--- a/aztk/spark/utils/debug.py
+++ b/aztk/spark/utils/debug.py
@@ -89,6 +89,7 @@ def get_docker_containers(docker_client):
             if container.name == "spark": #TODO: find a more robust way to get specific info off specific containers
                 logs.extend(get_container_aztk_script(container))
                 logs.extend(get_spark_logs(container))
+                logs.extend(get_spark_app_logs(container))
 
         logs.append(("docker-containers.txt", container_attrs))
         return logs
@@ -126,10 +127,10 @@ def get_spark_logs(container):
         return [(container.name + "/" + "spark-logs.err", e.__str__())]
 
 
-def get_spark_app_logs(contianer):
+def get_spark_app_logs(container):
     spark_app_logs_path = "/home/spark-current/work"
     try:
-        stream, _ = container.get_archive(spark_logs_path)
+        stream, _ = container.get_archive(spark_app_logs_path)
         return extract_tar_in_memory(container, stream)
     except docker.errors.APIError as e:
         return [(container.name + "/" + "spark-work-logs.err", e.__str__())]

From bbffe88e54a5981a5212bff8e927271406b3db36 Mon Sep 17 00:00:00 2001
From: Jake Freck <jafreck@microsoft.com>
Date: Mon, 26 Mar 2018 12:15:44 -0700
Subject: [PATCH 13/21] add docs

---
 docs/10-clusters.md | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/docs/10-clusters.md b/docs/10-clusters.md
index 3d653b4c..df0a0e98 100644
--- a/docs/10-clusters.md
+++ b/docs/10-clusters.md
@@ -18,7 +18,7 @@ For example, to create a cluster of 4 *Standard_A2* nodes called 'spark' you can
 aztk spark cluster create --id spark --vm-size standard_a2 --size 4
 ```
 
-You can find more information on VM sizes [here.](https://docs.microsoft.com/en-us/azure/virtual-machines/linux/sizes) Please note that you must use the official SKU name when setting your VM size - they usually come in the form: "standard_d2_v2". 
+You can find more information on VM sizes [here.](https://docs.microsoft.com/en-us/azure/virtual-machines/linux/sizes) Please note that you must use the official SKU name when setting your VM size - they usually come in the form: "standard_d2_v2".
 
 _Note: The cluster id (`--id`) can only contain alphanumeric characters including hyphens and underscores, and cannot contain more than 64 characters. Each cluster **must** have a unique cluster id._
 
@@ -139,6 +139,27 @@ Now that you're in, you can change directory to your familiar `$SPARK_HOME`
 cd $SPARK_HOME
 ```
 
+### Debugging your Spark Cluster
+
+If your cluster is in an unknown or unusbale state, you can debug by running:
+
+```sh
+aztk spark cluster debug --id <cluster-id> --output </path/to/output/directory/>
+```
+
+The debug utility will pull logs from all nodes in the cluster. The utility will check for:
+- free diskspace
+- docker image status
+- docker container status
+- docker container logs
+- docker container process status
+- aztk code & version
+- spark component logs (master, worker, shuffle service, history server, etc) from $SPARK_HOME/logs
+- spark application logs from $SPARK_HOME/work
+
+__Please be careful sharing the output of the `debug` command as secrets and application code are present in the output.__
+
+
 ### Interact with your Spark cluster
 By default, the `aztk spark cluster ssh` command port forwards the Spark Web UI to *localhost:8080*, Spark Jobs UI to *localhost:4040*, and Spark History Server to your *locahost:18080*. This can be [configured in *.aztk/ssh.yaml*](../docs/13-configuration.md##sshyaml).
 

From e0f37c6c07c23bd619bd88143634756426afbcc5 Mon Sep 17 00:00:00 2001
From: Jake Freck <jafreck@microsoft.com>
Date: Thu, 5 Apr 2018 11:07:37 -0700
Subject: [PATCH 14/21] remove debug.py from pylintrc ignore

---
 pylintrc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pylintrc b/pylintrc
index 75f78ddb..71710d66 100644
--- a/pylintrc
+++ b/pylintrc
@@ -9,7 +9,7 @@
 
 # Add files or directories to the blacklist. They should be base names, not
 # paths.
-ignore=CVS,debug.py
+ignore=CVS
 
 # Add files or directories matching the regex patterns to the blacklist. The
 # regex matches against base names, not paths.

From 39b5b3f4a09f407a5c3c18a41b068be2cb087529 Mon Sep 17 00:00:00 2001
From: Jake Freck <jafreck@microsoft.com>
Date: Thu, 5 Apr 2018 11:47:14 -0700
Subject: [PATCH 15/21] added debug.py back to pylint ignore

---
 pylintrc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pylintrc b/pylintrc
index 71710d66..75f78ddb 100644
--- a/pylintrc
+++ b/pylintrc
@@ -9,7 +9,7 @@
 
 # Add files or directories to the blacklist. They should be base names, not
 # paths.
-ignore=CVS
+ignore=CVS,debug.py
 
 # Add files or directories matching the regex patterns to the blacklist. The
 # regex matches against base names, not paths.

From f6377e141432ae60c89f93474ed5e681461b1a97 Mon Sep 17 00:00:00 2001
From: Jake Freck <jafreck@microsoft.com>
Date: Thu, 5 Apr 2018 12:36:34 -0700
Subject: [PATCH 16/21] change pylint ignore

---
 aztk/spark/utils/debug.py | 2 +-
 pylintrc                  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/aztk/spark/utils/debug.py b/aztk/spark/utils/debug.py
index 654a8c15..21a5e9eb 100644
--- a/aztk/spark/utils/debug.py
+++ b/aztk/spark/utils/debug.py
@@ -10,7 +10,7 @@
 from subprocess import STDOUT, CalledProcessError, check_output
 from zipfile import ZIP_DEFLATED, ZipFile
 
-import docker
+import docker # pylint: disable=import-error
 
 
 def main():
diff --git a/pylintrc b/pylintrc
index 75f78ddb..71710d66 100644
--- a/pylintrc
+++ b/pylintrc
@@ -9,7 +9,7 @@
 
 # Add files or directories to the blacklist. They should be base names, not
 # paths.
-ignore=CVS,debug.py
+ignore=CVS
 
 # Add files or directories matching the regex patterns to the blacklist. The
 # regex matches against base names, not paths.

From 9c722d100ee713cfae1d5f44edee41da82532613 Mon Sep 17 00:00:00 2001
From: Jake Freck <jafreck@microsoft.com>
Date: Thu, 5 Apr 2018 12:42:08 -0700
Subject: [PATCH 17/21] remove commented log

---
 aztk/utils/ssh.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/aztk/utils/ssh.py b/aztk/utils/ssh.py
index 3a73407d..d1e96c54 100644
--- a/aztk/utils/ssh.py
+++ b/aztk/utils/ssh.py
@@ -47,7 +47,6 @@ def node_exec_command(command, username, hostname, port, ssh_key=None, password=
     else:
         cmd = '/bin/bash 2>&1 -c \'set -e; set -o pipefail; {0}; wait\''.format(command)
     stdin, stdout, stderr = client.exec_command(cmd, get_pty=True)
-    # [print(line.decode('utf-8')) for line in stdout.read().splitlines()]
     output = [line.decode('utf-8') for line in stdout.read().splitlines()]
     client.close()
     return output

From dbee66946f69b9c72334c62dd5b79f8092bd37ae Mon Sep 17 00:00:00 2001
From: Jake Freck <jafreck@microsoft.com>
Date: Mon, 9 Apr 2018 12:43:47 -0700
Subject: [PATCH 18/21] update cluster_run

---
 aztk/client.py                                |  4 +--
 aztk/utils/ssh.py                             | 30 ++++++++++---------
 .../spark/endpoints/cluster/cluster_run.py    | 14 +++++----
 3 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/aztk/client.py b/aztk/client.py
index 9c0e56f4..e2e06d0a 100644
--- a/aztk/client.py
+++ b/aztk/client.py
@@ -233,9 +233,9 @@ def __cluster_run(self, cluster_id, command, internal, container_name=None):
         pool, nodes = self.__get_pool_details(cluster_id)
         nodes = [node for node in nodes]
         if internal:
-            cluster_nodes = [models.RemoteLogin(ip_address=node.ip_address, port="22") for node in nodes]
+            cluster_nodes = [(node, models.RemoteLogin(ip_address=node.ip_address, port="22")) for node in nodes]
         else:
-            cluster_nodes = [self.__get_remote_login_settings(pool.id, node.id) for node in nodes]
+            cluster_nodes = [(node, self.__get_remote_login_settings(pool.id, node.id)) for node in nodes]
         try:
             ssh_key = self.__create_user_on_pool('aztk', pool.id, nodes)
             output = asyncio.get_event_loop().run_until_complete(ssh_lib.clus_exec_command(command,
diff --git a/aztk/utils/ssh.py b/aztk/utils/ssh.py
index d1e96c54..f699d0a8 100644
--- a/aztk/utils/ssh.py
+++ b/aztk/utils/ssh.py
@@ -40,7 +40,7 @@ def connect(hostname,
     return client
 
 
-def node_exec_command(command, username, hostname, port, ssh_key=None, password=None, container_name=None):
+def node_exec_command(node_id, command, username, hostname, port, ssh_key=None, password=None, container_name=None):
     client = connect(hostname=hostname, port=port, username=username, password=password, pkey=ssh_key)
     if container_name:
         cmd = 'sudo docker exec 2>&1 -t {0} /bin/bash -c \'set -e; set -o pipefail; {1}; wait\''.format(container_name, command)
@@ -49,20 +49,21 @@ def node_exec_command(command, username, hostname, port, ssh_key=None, password=
     stdin, stdout, stderr = client.exec_command(cmd, get_pty=True)
     output = [line.decode('utf-8') for line in stdout.read().splitlines()]
     client.close()
-    return output
+    return (node_id, output)
 
 
 async def clus_exec_command(command, username, nodes, ports=None, ssh_key=None, password=None, container_name=None):
     return await asyncio.gather(
         *[asyncio.get_event_loop().run_in_executor(ThreadPoolExecutor(),
                                                    node_exec_command,
+                                                   node.id,
                                                    command,
                                                    username,
-                                                   node.ip_address,
-                                                   node.port,
+                                                   node_rls.ip_address,
+                                                   node_rls.port,
                                                    ssh_key,
                                                    password,
-                                                   container_name) for node in nodes]
+                                                   container_name) for node, node_rls in nodes]
     )
 
 
@@ -76,15 +77,15 @@ def copy_from_node(node_id, source_path, destination_path, username, hostname, p
             sftp_client.getfo(source_path, f)
             return f
     except (IOError, PermissionError) as e:
-        print(e)
+        raise e
     finally:
         sftp_client.close()
         client.close()
 
+
 def node_copy(node_id, source_path, destination_path, username, hostname, port, ssh_key=None, password=None, container_name=None):
     client = connect(hostname=hostname, port=port, username=username, password=password, pkey=ssh_key)
     sftp_client = client.open_sftp()
-
     try:
         if container_name:
             # put the file in /tmp on the host
@@ -93,21 +94,22 @@ def node_copy(node_id, source_path, destination_path, username, hostname, port,
             # move to correct destination on container
             docker_command = 'sudo docker cp {0} {1}:{2}'.format(tmp_file, container_name, destination_path)
             _, stdout, _ = client.exec_command(docker_command, get_pty=True)
-            [print(line.decode('utf-8')) for line in stdout.read().splitlines()]
+            output = [line.decode('utf-8') for line in stdout.read().splitlines()]
             # clean up
             sftp_client.remove(tmp_file)
         else:
-            sftp_client.put(source_path, destination_path)
+            output = sftp_client.put(source_path, destination_path).__str__()
     except (IOError, PermissionError) as e:
-        print(e)
-
-    sftp_client.close()
-    client.close()
+        output = e.message
+    finally:
+        sftp_client.close()
+        client.close()
     #TODO: progress bar
+    return output
 
 
 async def clus_copy(username, nodes, source_path, destination_path, ssh_key=None, password=None, container_name=None, get=False):
-    await asyncio.gather(
+    return await asyncio.gather(
         *[asyncio.get_event_loop().run_in_executor(ThreadPoolExecutor(),
                                                    copy_from_node if get else node_copy,
                                                    node.id,
diff --git a/aztk_cli/spark/endpoints/cluster/cluster_run.py b/aztk_cli/spark/endpoints/cluster/cluster_run.py
index e6cf43b4..34d96438 100644
--- a/aztk_cli/spark/endpoints/cluster/cluster_run.py
+++ b/aztk_cli/spark/endpoints/cluster/cluster_run.py
@@ -20,8 +20,12 @@ def setup_parser(parser: argparse.ArgumentParser):
 def execute(args: typing.NamedTuple):
     spark_client = aztk.spark.Client(config.load_aztk_secrets())
     results = spark_client.cluster_run(args.cluster_id, args.command, args.internal)
-    for result in results:
-        print("---------------------------") #TODO: replace with nodename
-        for line in result:
-            print(line)
-    #TODO: pretty print result
+    [print_execute_result(node_id, result) for node_id, result in results]
+
+
+def print_execute_result(node_id, result):
+    print("-" * (len(node_id) + 6))
+    print("| ", node_id, " |")
+    print("-" * (len(node_id) + 6))
+    for line in result:
+        print(line)

From 964b075f8ed874724a65ecb21fb8356576afe60c Mon Sep 17 00:00:00 2001
From: Jake Freck <jafreck@microsoft.com>
Date: Mon, 9 Apr 2018 13:32:22 -0700
Subject: [PATCH 19/21] refactor cluster_copy

---
 aztk/client.py                                | 19 +++++++++++--------
 aztk/utils/ssh.py                             | 11 ++++++-----
 .../spark/endpoints/cluster/cluster_copy.py   | 15 ++++++++++++++-
 3 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/aztk/client.py b/aztk/client.py
index e2e06d0a..d1dee82c 100644
--- a/aztk/client.py
+++ b/aztk/client.py
@@ -258,16 +258,19 @@ def __cluster_copy(self, cluster_id, source_path, destination_path, container_na
             cluster_nodes = [(node, self.__get_remote_login_settings(pool.id, node.id)) for node in nodes]
         try:
             ssh_key = self.__create_user_on_pool('aztk', pool.id, nodes)
-            asyncio.get_event_loop().run_until_complete(ssh_lib.clus_copy(container_name=container_name,
-                                                                          username='aztk',
-                                                                          nodes=cluster_nodes,
-                                                                          source_path=source_path,
-                                                                          destination_path=destination_path,
-                                                                          ssh_key=ssh_key.exportKey().decode('utf-8'),
-                                                                          get=get))
-            self.__delete_user_on_pool('aztk', pool.id, nodes)
+            output = asyncio.get_event_loop().run_until_complete(
+                ssh_lib.clus_copy(container_name=container_name,
+                                  username='aztk',
+                                  nodes=cluster_nodes,
+                                  source_path=source_path,
+                                  destination_path=destination_path,
+                                  ssh_key=ssh_key.exportKey().decode('utf-8'),
+                                  get=get))
+            return output
         except (OSError, batch_error.BatchErrorException) as exc:
             raise exc
+        finally:
+            self.__delete_user_on_pool('aztk', pool.id, nodes)
 
     def __submit_job(self,
                      job_configuration,
diff --git a/aztk/utils/ssh.py b/aztk/utils/ssh.py
index f699d0a8..bd139683 100644
--- a/aztk/utils/ssh.py
+++ b/aztk/utils/ssh.py
@@ -75,9 +75,9 @@ def copy_from_node(node_id, source_path, destination_path, username, hostname, p
         os.makedirs(os.path.dirname(destination_path), exist_ok=True)
         with open(destination_path, 'wb') as f: #SpooledTemporaryFile instead??
             sftp_client.getfo(source_path, f)
-            return f
-    except (IOError, PermissionError) as e:
-        raise e
+            return (node_id, True, None)
+    except OSError as e:
+        return (node_id, False, e)
     finally:
         sftp_client.close()
         client.close()
@@ -97,15 +97,16 @@ def node_copy(node_id, source_path, destination_path, username, hostname, port,
             output = [line.decode('utf-8') for line in stdout.read().splitlines()]
             # clean up
             sftp_client.remove(tmp_file)
+            return (node_id, True, None)
         else:
             output = sftp_client.put(source_path, destination_path).__str__()
+            return (node_id, True, None)
     except (IOError, PermissionError) as e:
-        output = e.message
+        return (node_id, False, e)
     finally:
         sftp_client.close()
         client.close()
     #TODO: progress bar
-    return output
 
 
 async def clus_copy(username, nodes, source_path, destination_path, ssh_key=None, password=None, container_name=None, get=False):
diff --git a/aztk_cli/spark/endpoints/cluster/cluster_copy.py b/aztk_cli/spark/endpoints/cluster/cluster_copy.py
index 3210cbdf..2ceae7af 100644
--- a/aztk_cli/spark/endpoints/cluster/cluster_copy.py
+++ b/aztk_cli/spark/endpoints/cluster/cluster_copy.py
@@ -1,4 +1,5 @@
 import argparse
+import sys
 import typing
 
 import aztk.spark
@@ -23,9 +24,21 @@ def setup_parser(parser: argparse.ArgumentParser):
 def execute(args: typing.NamedTuple):
     spark_client = aztk.spark.Client(config.load_aztk_secrets())
 
-    spark_client.cluster_copy(
+    copy_output = spark_client.cluster_copy(
         cluster_id=args.cluster_id,
         source_path=args.source_path,
         destination_path=args.dest_path,
         internal=args.internal
     )
+    [print_copy_result(node_id, result, err) for node_id, result, err in copy_output]
+    sys.exit(0 if all([result for _, result, _ in copy_output]) else 1)
+
+
+def print_copy_result(node_id, success, err):
+    print("-" * (len(node_id) + 6))
+    print("| ", node_id, " |")
+    print("-" * (len(node_id) + 6))
+    if success:
+        print("Copy successful")
+    else:
+        print(err)

From c15c411ba5b868080ccb18ed3ba4332af833df32 Mon Sep 17 00:00:00 2001
From: Jake Freck <jafreck@microsoft.com>
Date: Mon, 9 Apr 2018 13:47:44 -0700
Subject: [PATCH 20/21] update debug, add spinner for run and copy

---
 aztk/spark/helpers/cluster_diagnostic_helper.py  |  4 ++--
 aztk_cli/spark/endpoints/cluster/cluster_copy.py | 16 ++++++++--------
 .../spark/endpoints/cluster/cluster_debug.py     | 13 ++++++++++---
 aztk_cli/spark/endpoints/cluster/cluster_run.py  |  3 ++-
 4 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/aztk/spark/helpers/cluster_diagnostic_helper.py b/aztk/spark/helpers/cluster_diagnostic_helper.py
index 75776c8a..a9cdedb6 100644
--- a/aztk/spark/helpers/cluster_diagnostic_helper.py
+++ b/aztk/spark/helpers/cluster_diagnostic_helper.py
@@ -9,12 +9,12 @@ def run(spark_client, cluster_id, output_directory):
     spark_client.cluster_copy(cluster_id, os.path.abspath("./aztk/spark/utils/debug.py"), "/tmp/debug.py", host=True)
     ssh_cmd = _build_diagnostic_ssh_command()
     run_output = spark_client.cluster_run(cluster_id, ssh_cmd, host=True)
-    local_path = os.path.join(os.path.abspath(output_directory), "debug", "debug.zip") #TODO: add timestamp
+    local_path = os.path.join(os.path.abspath(output_directory), "debug", "debug.zip")
     remote_path = "/tmp/debug.zip"
     output = spark_client.cluster_copy(cluster_id, remote_path, local_path, host=True, get=True)
     # write run output to debug/ directory
     with open(os.path.join(os.path.dirname(local_path), "debug-output.txt"), 'w', encoding="UTF-8") as f:
-        [f.write(line + '\n') for node_output in run_output for line in node_output]
+        [f.write(line + '\n') for node_id, result in run_output for line in result]
     return output
 
 
diff --git a/aztk_cli/spark/endpoints/cluster/cluster_copy.py b/aztk_cli/spark/endpoints/cluster/cluster_copy.py
index 2ceae7af..8a861d82 100644
--- a/aztk_cli/spark/endpoints/cluster/cluster_copy.py
+++ b/aztk_cli/spark/endpoints/cluster/cluster_copy.py
@@ -3,7 +3,7 @@
 import typing
 
 import aztk.spark
-from aztk_cli import config
+from aztk_cli import config, utils
 
 
 def setup_parser(parser: argparse.ArgumentParser):
@@ -23,13 +23,13 @@ def setup_parser(parser: argparse.ArgumentParser):
 
 def execute(args: typing.NamedTuple):
     spark_client = aztk.spark.Client(config.load_aztk_secrets())
-
-    copy_output = spark_client.cluster_copy(
-        cluster_id=args.cluster_id,
-        source_path=args.source_path,
-        destination_path=args.dest_path,
-        internal=args.internal
-    )
+    with utils.Spinner():
+        copy_output = spark_client.cluster_copy(
+            cluster_id=args.cluster_id,
+            source_path=args.source_path,
+            destination_path=args.dest_path,
+            internal=args.internal
+        )
     [print_copy_result(node_id, result, err) for node_id, result, err in copy_output]
     sys.exit(0 if all([result for _, result, _ in copy_output]) else 1)
 
diff --git a/aztk_cli/spark/endpoints/cluster/cluster_debug.py b/aztk_cli/spark/endpoints/cluster/cluster_debug.py
index 6b6e5f8c..7fe3d5d2 100644
--- a/aztk_cli/spark/endpoints/cluster/cluster_debug.py
+++ b/aztk_cli/spark/endpoints/cluster/cluster_debug.py
@@ -1,19 +1,26 @@
 import argparse
+import os
 import typing
+import time
+
 import aztk.spark
-from aztk_cli import config
+from aztk_cli import config, utils
 
 
 def setup_parser(parser: argparse.ArgumentParser):
     parser.add_argument('--id', dest='cluster_id', required=True,
                         help='The unique id of your spark cluster')
 
-    parser.add_argument('--output', '-o', required=True,
+    parser.add_argument('--output', '-o', required=False,
                         help='the directory for the output folder')
 
 
 def execute(args: typing.NamedTuple):
     spark_client = aztk.spark.Client(config.load_aztk_secrets())
+    timestr = time.strftime("%Y%m%d-%H%M%S")
 
-    spark_client.run_cluster_diagnostics(cluster_id=args.cluster_id, output_directory=args.output)
+    if not args.output:
+        args.output = os.path.join(os.getcwd(), "debug-{0}-{1}".format(args.cluster_id, timestr))
+    with utils.Spinner():
+        spark_client.run_cluster_diagnostics(cluster_id=args.cluster_id, output_directory=args.output)
     # TODO: analyze results, display some info about status
diff --git a/aztk_cli/spark/endpoints/cluster/cluster_run.py b/aztk_cli/spark/endpoints/cluster/cluster_run.py
index 34d96438..5567b0fc 100644
--- a/aztk_cli/spark/endpoints/cluster/cluster_run.py
+++ b/aztk_cli/spark/endpoints/cluster/cluster_run.py
@@ -19,7 +19,8 @@ def setup_parser(parser: argparse.ArgumentParser):
 
 def execute(args: typing.NamedTuple):
     spark_client = aztk.spark.Client(config.load_aztk_secrets())
-    results = spark_client.cluster_run(args.cluster_id, args.command, args.internal)
+    with utils.Spinner():
+        results = spark_client.cluster_run(args.cluster_id, args.command, args.internal)
     [print_execute_result(node_id, result) for node_id, result in results]
 
 

From e43aa9286a9f30cb24324833df2d35ff6b2e666a Mon Sep 17 00:00:00 2001
From: Jake Freck <jafreck@microsoft.com>
Date: Mon, 9 Apr 2018 14:02:08 -0700
Subject: [PATCH 21/21] make new sdk cluster_download endpoint

---
 aztk/client.py                                  |  3 +++
 aztk/spark/client.py                            | 11 +++++++++--
 aztk/spark/helpers/cluster_diagnostic_helper.py |  2 +-
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/aztk/client.py b/aztk/client.py
index d1dee82c..584a66a5 100644
--- a/aztk/client.py
+++ b/aztk/client.py
@@ -393,5 +393,8 @@ def cluster_run(self, cluster_id, command):
     def cluster_copy(self, cluster_id, source_path, destination_path):
         raise NotImplementedError()
 
+    def cluster_download(self, cluster_id, source_path, destination_path):
+        raise NotImplementedError()
+
     def submit_job(self, job):
         raise NotImplementedError()
diff --git a/aztk/spark/client.py b/aztk/spark/client.py
index 746ef9cc..55c22cd6 100644
--- a/aztk/spark/client.py
+++ b/aztk/spark/client.py
@@ -153,10 +153,17 @@ def cluster_run(self, cluster_id: str, command: str, host=False, internal: bool
         except batch_error.BatchErrorException as e:
             raise error.AztkError(helpers.format_batch_exception(e))
 
-    def cluster_copy(self, cluster_id: str, source_path: str, destination_path: str, host: bool = False, get: bool = False, internal: bool = False):
+    def cluster_copy(self, cluster_id: str, source_path: str, destination_path: str, host: bool = False, internal: bool = False):
         try:
             container_name = None if host else 'spark'
-            return self.__cluster_copy(cluster_id, source_path, destination_path, container_name=container_name, get=get, internal=internal)
+            return self.__cluster_copy(cluster_id, source_path, destination_path, container_name=container_name, get=False, internal=internal)
+        except batch_error.BatchErrorException as e:
+            raise error.AztkError(helpers.format_batch_exception(e))
+
+    def cluster_download(self, cluster_id: str, source_path: str, destination_path: str, host: bool = False, internal: bool = False):
+        try:
+            container_name = None if host else 'spark'
+            return self.__cluster_copy(cluster_id, source_path, destination_path, container_name=container_name, get=True, internal=internal)
         except batch_error.BatchErrorException as e:
             raise error.AztkError(helpers.format_batch_exception(e))
 
diff --git a/aztk/spark/helpers/cluster_diagnostic_helper.py b/aztk/spark/helpers/cluster_diagnostic_helper.py
index a9cdedb6..d6e4a525 100644
--- a/aztk/spark/helpers/cluster_diagnostic_helper.py
+++ b/aztk/spark/helpers/cluster_diagnostic_helper.py
@@ -11,7 +11,7 @@ def run(spark_client, cluster_id, output_directory):
     run_output = spark_client.cluster_run(cluster_id, ssh_cmd, host=True)
     local_path = os.path.join(os.path.abspath(output_directory), "debug", "debug.zip")
     remote_path = "/tmp/debug.zip"
-    output = spark_client.cluster_copy(cluster_id, remote_path, local_path, host=True, get=True)
+    output = spark_client.cluster_download(cluster_id, remote_path, local_path, host=True)
     # write run output to debug/ directory
     with open(os.path.join(os.path.dirname(local_path), "debug-output.txt"), 'w', encoding="UTF-8") as f:
         [f.write(line + '\n') for node_id, result in run_output for line in result]