Deprecate: remove custom scripts by jafreck · Pull Request #650 · Azure/aztk

This repository was archived by the owner on Feb 3, 2021. It is now read-only.

.gitignore

-Original file line number
+Diff line change
@@ -1,5 +1,4 @@
     # custom
-    my-custom-scripts/
     .aztk
     # Environments
@@ Expand Down @@

README.md

-Original file line number
+Diff line change
@@ Expand Up @@
     You can configure these settings in the *.aztk/ssh.yaml* file.
-    NOTE: When working interactively, you may want to use tools like Jupyter or RStudio-Server depending on whether or not you are a python or R user. To do so, you need to setup your cluster with the appropriate docker image and custom scripts:
-     - [how to setup Jupyter with Pyspark](https://github.com/Azure/aztk/wiki/PySpark-on-Azure-with-AZTK)
-     - [how to setup RStudio-Server with Sparklyr](https://github.com/Azure/aztk/wiki/SparklyR-on-Azure-with-AZTK)
+    NOTE: When working interactively, you may want to use tools like Jupyter or RStudio-Server. To do so, you need to setup your cluster with the appropriate docker image and plugin. See [Plugins](./docs/15-plugins.md) for more information.
     ### 5. Manage and Monitor your Spark Cluster
@@ Expand Down @@

aztk/internal/cluster_data/node_data.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -4,17 +4,17 @@ @@
     import zipfile
     from pathlib import Path
     from typing import List
     import yaml
     from aztk import models
-    from aztk.utils import constants, file_utils, secure_utils
     from aztk.error import InvalidCustomScriptError
+    from aztk.utils import constants, file_utils, secure_utils
     ROOT_PATH = constants.ROOT_PATH
     # Constants for node data
     NODE_SCRIPT_FOLDER = "aztk"
-    CUSTOM_SCRIPT_FOLDER = "custom-scripts"
-    CUSTOM_SCRIPT_METADATA_FILE = "custom-scripts.yaml"
     PLUGIN_FOLDER = "plugins"
@@ Expand All @@
         def add_core(self):
             self._add_node_scripts()
-            self._add_custom_scripts()
             self._add_plugins()
             self._add_spark_configuration()
             self._add_user_conf()
@@ Expand Down Expand Up @@
                     if self._includeFile(file, exclude):
                         self.add_file(os.path.join(base, file), os.path.join(dest, relative_folder), binary=False)
-        def _add_custom_scripts(self):
-            data = []
-            if not self.cluster_config.custom_scripts:
-                return
-            for index, custom_script in enumerate(self.cluster_config.custom_scripts):
-                if isinstance(custom_script.script, (str, bytes)):
-                    new_file_name = str(index) + '_' + os.path.basename(custom_script.script)
-                    data.append(dict(script=new_file_name, runOn=str(custom_script.run_on)))
-                    try:
-                        with io.open(custom_script.script, 'r', encoding='UTF-8') as f:
-                            self.zipf.writestr(
-                                os.path.join(CUSTOM_SCRIPT_FOLDER, new_file_name),
-                                f.read().replace('\r\n', '\n'))
-                    except FileNotFoundError:
-                        raise InvalidCustomScriptError("Custom script '{0}' doesn't exists.".format(custom_script.script))
-                elif isinstance(custom_script.script, models.File):
-                    new_file_name = str(index) + '_' + custom_script.script.name
-                    self.zipf.writestr(
-                        os.path.join('custom-scripts', new_file_name), custom_script.script.payload.getvalue())
-            self.zipf.writestr(
-                os.path.join(CUSTOM_SCRIPT_FOLDER, CUSTOM_SCRIPT_METADATA_FILE), yaml.dump(data, default_flow_style=False))
         def _add_spark_configuration(self):
             spark_configuration = self.cluster_config.spark_configuration
             if not spark_configuration:
@@ Expand Down @@

aztk/models/cluster_configuration.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -35,7 +35,6 @@ class ClusterConfiguration(Model): @@
         subnet_id = fields.String(default=None)
         plugins = fields.List(PluginConfiguration)
-        custom_scripts = fields.List(CustomScript)
         file_shares = fields.List(FileShare)
         user_configuration = fields.Model(UserConfiguration, default=None)
         scheduling_target = fields.Enum(SchedulingTarget, default=None)
@@ Expand Down @@

aztk/node_scripts/docker_main.sh

-Original file line number
+Diff line change
@@ Expand Up / @@ -6,15 +6,10 @@ set -e @@
     source ~/.bashrc
     echo "Initializing spark container"
-    # --------------------
-    # Setup custom scripts
-    # --------------------
-    custom_script_dir=$AZTK_WORKING_DIR/custom-scripts
     aztk_dir=$AZTK_WORKING_DIR/aztk
     # -----------------------
     # Preload jupyter samples
-    # TODO: remove when we support uploading random (non-executable) files as part custom-scripts
     # -----------------------
     mkdir -p /mnt/samples
@@ Expand Down @@

aztk/node_scripts/install/install.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -81,6 +81,5 @@ def setup_spark_container(): @@
             spark.start_spark_worker()
         plugins.setup_plugins(target=PluginTarget.SparkContainer, is_master=is_master, is_worker=is_worker)
-        scripts.run_custom_scripts(is_master=is_master, is_worker=is_worker)
         open("/tmp/setup_complete", 'a').close()

aztk/node_scripts/install/scripts.py

This file was deleted.

aztk/spark/models/models.py

-Original file line number
+Diff line change
@@ -1,10 +1,12 @@
     from typing import List
-    from Cryptodome.PublicKey import RSA
     import azure.batch.models as batch_models
+    from Cryptodome.PublicKey import RSA
     import aztk.models
     from aztk import error
-    from aztk.utils import constants, helpers
     from aztk.core.models import Model, fields
+    from aztk.utils import constants, helpers
     class SparkToolkit(aztk.models.Toolkit):
@@ Expand Down Expand Up / @@ -192,7 +194,6 @@ def __init__(self, @@
                      id=None,
                      applications=None,
                      vm_size=None,
-                     custom_scripts=None,
                      spark_configuration=None,
                      toolkit=None,
                      max_dedicated_nodes=0,
@@ Expand All / @@ -203,7 +204,6 @@ def __init__(self, @@
             self.id = id
             self.applications = applications
-            self.custom_scripts = custom_scripts
             self.spark_configuration = spark_configuration
             self.vm_size = vm_size
             self.gpu_enabled = None
@@ Expand All / @@ -219,7 +219,6 @@ def __init__(self, @@
         def to_cluster_config(self):
             return ClusterConfiguration(
                 cluster_id=self.id,
-                custom_scripts=self.custom_scripts,
                 toolkit=self.toolkit,
                 vm_size=self.vm_size,
                 size=self.max_dedicated_nodes,
@@ Expand Down @@

aztk/utils/constants.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -39,8 +39,6 @@ @@
     DEFAULT_SPARK_JARS_DEST = os.path.join(ROOT_PATH, 'node_scripts', 'jars')
     DEFAULT_SPARK_JOB_CONFIG = os.path.join(os.getcwd(), '.aztk', 'job.yaml')
     GLOBAL_SPARK_JOB_CONFIG = os.path.join(HOME_DIRECTORY_PATH, '.aztk', 'job.yaml')
-    CUSTOM_SCRIPTS_DEST = os.path.join(ROOT_PATH, 'node_scripts', 'custom-scripts')
     """
         Source and destination paths for spark init
     """
@@ Expand Down @@

aztk_cli/config.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -160,7 +160,6 @@ class JobConfig(): @@
         def __init__(self):
             self.id = None
             self.applications = []
-            self.custom_scripts = None
             self.spark_configuration = None
             self.vm_size = None
             self.toolkit = None
@@ Expand All / @@ -187,7 +186,6 @@ def _merge_dict(self, config): @@
                     self.max_dedicated_nodes = cluster_configuration.get('size')
                 if cluster_configuration.get('size_low_priority') is not None:
                     self.max_low_pri_nodes = cluster_configuration.get('size_low_priority')
-                self.custom_scripts = cluster_configuration.get('custom_scripts')
                 self.subnet_id = cluster_configuration.get('subnet_id')
                 self.worker_on_master = cluster_configuration.get("worker_on_master")
                 scheduling_target = cluster_configuration.get("scheduling_target")
@@ Expand Down @@

aztk_cli/config/cluster.yaml

-Original file line number
+Diff line change
@@ Expand Up / @@ -29,13 +29,6 @@ size: 2 @@
     # username: <username for the linux user to be created> (optional)
     username: spark
-    # **DEPRECATED** Use plugins instead
-    # custom_scripts:
-    #   - script: </path/to/script.sh or /path/to/script/directory/>
-    #     runOn: <master/worker/all-nodes>
-    #   - script: <./relative/path/to/other/script.sh or ./relative/path/to/other/script/directory/>
-    #     runOn: <master/worker/all-nodes>
     # To add your cluster to a virtual network provide the full arm resource id below
     # subnet_id: /subscriptions/********-****-****-****-************/resourceGroups/********/providers/Microsoft.Network/virtualNetworks/*******/subnets/******
@@ Expand Down @@

aztk_cli/spark/endpoints/job/submit.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -39,7 +39,6 @@ def execute(args: typing.NamedTuple): @@
         job_configuration = aztk.spark.models.JobConfiguration(
             id=job_conf.id,
             applications=job_conf.applications,
-            custom_scripts=job_conf.custom_scripts,
             spark_configuration=spark_configuration,
             vm_size=job_conf.vm_size,
             toolkit=job_conf.toolkit,
@@ Expand Down @@

aztk_cli/utils.py

-Original file line number
+Diff line change
@@ Expand Up @@
         log.info(">        dedicated:      %s", cluster_conf.size)
         log.info(">     low priority:      %s", cluster_conf.size_low_priority)
         log.info("cluster vm size:         %s", cluster_conf.vm_size)
-        log.info("custom scripts:          %s", len(cluster_conf.custom_scripts) if cluster_conf.custom_scripts else 0)
         log.info("subnet ID:               %s", cluster_conf.subnet_id)
         log.info("file shares:             %s",
                  len(cluster_conf.file_shares) if cluster_conf.file_shares is not None else 0)
@@ Expand Down @@

docs/10-clusters.md

-Original file line number
+Diff line change
@@ Expand Up @@
     ## Next Steps
     - [Run a Spark job](20-spark-submit.html)
-    - [Configure the Spark cluster using custom commands](11-custom-scripts.html)
+    - [Configure the Spark cluster using custom plugins](51-define-plugin.html)
     - [Bring your own Docker image or choose between a variety of our supported base images to manage your Spark and Python versions](12-docker-image.html)

docs/15-plugins.md

-Original file line number
+Diff line change
@@ -1,8 +1,6 @@
     # Plugins
-    Plugins are a successor to [custom scripts](11-custom-scripts.html) and are the recommended way of running custom code on the cluster.
-    Plugins can either be one of the Aztk [supported plugins](#supported-plugins) or the path to a [local file](#custom-script-plugin).
+    Plugins can either be one of the `aztk` [supported plugins](#supported-plugins) or the path to a [local file](#custom-script-plugin).
     ## Supported Plugins
     AZTK ships with a library of default plugins that enable auxiliary services to use with your Spark cluster.
@@ Expand Down @@

docs/70-jobs.md

-Original file line number
+Diff line change
@@ Expand Up @@
           software: spark
           version: 2.2
         subnet_id: <resource ID of a subnet to use (optional)>
-        custom_scripts:
-          - List
-          - of
-          - paths
-          - to
-          - custom
-          - scripts
     ```
     _Please Note: For more information about Azure VM sizes, see [Azure Batch Pricing](https://azure.microsoft.com/en-us/pricing/details/batch/). And for more information about Docker repositories see [Docker](./12-docker-image.html)._
@@ Expand Down @@

tests/integration_tests/spark/sdk/cluster/test_cluster.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -74,7 +74,6 @@ def test_create_cluster(): @@
             size_low_priority=0,
             vm_size="standard_f2",
             subnet_id=None,
-            custom_scripts=None,
             file_shares=None,
             toolkit=aztk.spark.models.SparkToolkit(version="2.3.0"),
             spark_configuration=None)
@@ Expand Down Expand Up / @@ -105,7 +104,6 @@ def test_list_clusters(): @@
             size_low_priority=0,
             vm_size="standard_f2",
             subnet_id=None,
-            custom_scripts=None,
             file_shares=None,
             toolkit=aztk.spark.models.SparkToolkit(version="2.3.0"),
             spark_configuration=None)
@@ Expand All / @@ -130,7 +128,6 @@ def test_get_remote_login_settings(): @@
             size_low_priority=0,
             vm_size="standard_f2",
             subnet_id=None,
-            custom_scripts=None,
             file_shares=None,
             toolkit=aztk.spark.models.SparkToolkit(version="2.3.0"),
             spark_configuration=None)
@@ Expand Down Expand Up / @@ -158,7 +155,6 @@ def test_submit(): @@
             size_low_priority=0,
             vm_size="standard_f2",
             subnet_id=None,
-            custom_scripts=None,
             file_shares=None,
             toolkit=aztk.spark.models.SparkToolkit(version="2.3.0"),
             spark_configuration=None)
@@ Expand Down Expand Up / @@ -199,7 +195,6 @@ def test_get_application_log(): @@
             size_low_priority=0,
             vm_size="standard_f2",
             subnet_id=None,
-            custom_scripts=None,
             file_shares=None,
             toolkit=aztk.spark.models.SparkToolkit(version="2.3.0"),
             spark_configuration=None)
@@ Expand Down Expand Up / @@ -260,7 +255,6 @@ def test_get_application_status_complete(): @@
             size_low_priority=0,
             vm_size="standard_f2",
             subnet_id=None,
-            custom_scripts=None,
             file_shares=None,
             toolkit=aztk.spark.models.SparkToolkit(version="2.3.0"),
             spark_configuration=None)
@@ Expand Down Expand Up / @@ -304,7 +298,6 @@ def test_delete_cluster(): @@
             size_low_priority=0,
             vm_size="standard_f2",
             subnet_id=None,
-            custom_scripts=None,
             file_shares=None,
             toolkit=aztk.spark.models.SparkToolkit(version="2.3.0"),
             spark_configuration=None)
@@ Expand All / @@ -330,7 +323,6 @@ def test_spark_processes_up(): @@
             size_low_priority=0,
             vm_size="standard_f2",
             subnet_id=None,
-            custom_scripts=None,
             file_shares=None,
             toolkit=aztk.spark.models.SparkToolkit(version="2.3.0"),
             spark_configuration=None)
@@ Expand All / @@ -357,7 +349,6 @@ def test_debug_tool(): @@
             size_low_priority=0,
             vm_size="standard_f2",
             subnet_id=None,
-            custom_scripts=None,
             file_shares=None,
             toolkit=aztk.spark.models.SparkToolkit(version="2.3.0"),
             spark_configuration=None)
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Deprecate: remove custom scripts #650

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

Uh oh!