Skip to content
This repository was archived by the owner on Feb 3, 2021. It is now read-only.

Commit b18eb69

Browse files
authored
Feature: SDK refactor (#622)
* start refactor * continue refactor for cluster and job functions * fix imports * fixes * fixes * refactor integration test secrets management * fix cluster create, add new test * add tests for new sdk api and fix bugs * fix naming and bugs * update job operations naming, bug fixes * fix cluster tests * fix joboperations and tests * update cli and fix some bugs * start fixes * fix pylint errors, bugs * add deprecated warning checks, rename tests * add docstrings for baseoperations * add docstrings * docstrings, add back compat for coreclient, fix init for spark client * whitespace * docstrings, whitespace * docstrings, fixes * docstrings, fixes * fix the sdk documentation, bugs * fix method call * pool_id->id * rename ids * cluster_id->id * cluster_id->id * add todo * fixes * add some todos * rename pool to cluster, add todo for nodes params * add todos for nodes param removal * update functions names * remove deprecated fucntion calls * update docs and docstrings * update docstrings * get rid of TODOs, fix docstrings * remove unused setting * inheritance -> composition * fix models bugs * fix create_user bug * update sdk_example.py * fix create user argument issue * update sdk_example.py * update doc * use Software model instead of string * add job wait flag, add cluster application wait functions * add docs for wait, update tests * fix bug * add clientrequesterror catch to fix tests
1 parent c9fd8bb commit b18eb69

File tree

111 files changed

+3711
-842
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

111 files changed

+3711
-842
lines changed

.style.yapf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,5 @@ based_on_style=pep8
33
spaces_before_comment=4
44
split_before_logical_operator=True
55
indent_width=4
6-
column_limit=140
6+
column_limit=120
77
split_arguments_when_comma_terminated=True

.vscode/settings.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,5 @@
1414
"python.formatting.provider": "yapf",
1515
"python.venvPath": "${workspaceFolder}/.venv/",
1616
"python.pythonPath": "${workspaceFolder}/.venv/Scripts/python.exe",
17-
"python.unitTest.pyTestEnabled": true
17+
"python.unitTest.pyTestEnabled": true,
1818
}

aztk/client/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .client import CoreClient

aztk/client/base/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .base_operations import BaseOperations
Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
from aztk import models
2+
from aztk.internal import cluster_data
3+
from aztk.utils import ssh as ssh_lib
4+
5+
from .helpers import (create_user_on_cluster, create_user_on_node, delete_user_on_cluster, delete_user_on_node,
6+
generate_user_on_cluster, generate_user_on_node, get_application_log, get_remote_login_settings,
7+
node_run, run, ssh_into_node)
8+
9+
10+
class BaseOperations:
11+
"""Base operations that all other operations have as an attribute
12+
13+
Attributes:
14+
batch_client (:obj:`azure.batch.batch_service_client.BatchServiceClient`): Client used to interact with the
15+
Azure Batch service.
16+
blob_client (:obj:`azure.storage.blob.BlockBlobService`): Client used to interact with the Azure Storage
17+
Blob service.
18+
secrets_configuration (:obj:`aztk.models.SecretsConfiguration`): Model that holds AZTK secrets used to authenticate
19+
with Azure and the clusters.
20+
"""
21+
22+
def __init__(self, context):
23+
self.batch_client = context['batch_client']
24+
self.blob_client = context['blob_client']
25+
self.secrets_configuration = context['secrets_configuration']
26+
27+
def get_cluster_config(self, id: str) -> models.ClusterConfiguration:
28+
"""Open an ssh tunnel to a node
29+
30+
Args:
31+
id (:obj:`str`): the id of the cluster the node is in
32+
node_id (:obj:`str`): the id of the node to open the ssh tunnel to
33+
username (:obj:`str`): the username to authenticate the ssh session
34+
ssh_key (:obj:`str`, optional): ssh public key to create the user with, must use ssh_key
35+
or password. Defaults to None.
36+
password (:obj:`str`, optional): password for the user, must use ssh_key or password. Defaults to None.
37+
port_forward_list (:obj:`List[PortForwardingSpecification`, optional): list of PortForwardingSpecifications.
38+
The defined ports will be forwarded to the client.
39+
internal (:obj:`bool`, optional): if True, this will connect to the node using its internal IP.
40+
Only use this if running within the same VNET as the cluster. Defaults to False.
41+
42+
Returns:
43+
:obj:`aztk.models.ClusterConfiguration`: Object representing the cluster's configuration
44+
"""
45+
return self.get_cluster_data(id).read_cluster_config()
46+
47+
def get_cluster_data(self, id: str) -> cluster_data.ClusterData:
48+
"""Gets the ClusterData object to manage data related to the given cluster
49+
50+
Args:
51+
id (:obj:`str`): the id of the cluster to get
52+
53+
Returns:
54+
:obj:`aztk.models.ClusterData`: Object used to manage the data and storage functions for a cluster
55+
"""
56+
return cluster_data.ClusterData(self.blob_client, id)
57+
58+
def ssh_into_node(self, id, node_id, username, ssh_key=None, password=None, port_forward_list=None, internal=False):
59+
"""Open an ssh tunnel to a node
60+
61+
Args:
62+
id (:obj:`str`): the id of the cluster the node is in
63+
node_id (:obj:`str`): the id of the node to open the ssh tunnel to
64+
username (:obj:`str`): the username to authenticate the ssh session
65+
ssh_key (:obj:`str`, optional): ssh public key to create the user with, must use ssh_key or password. Defaults to None.
66+
password (:obj:`str`, optional): password for the user, must use ssh_key or password. Defaults to None.
67+
port_forward_list (:obj:`List[PortForwardingSpecification`, optional): list of PortForwardingSpecifications.
68+
The defined ports will be forwarded to the client.
69+
internal (:obj:`bool`, optional): if True, this will connect to the node using its internal IP.
70+
Only use this if running within the same VNET as the cluster. Defaults to False.
71+
72+
Returns:
73+
:obj:`None`
74+
"""
75+
ssh_into_node.ssh_into_node(self, id, node_id, username, ssh_key, password, port_forward_list, internal)
76+
77+
def create_user_on_node(self, id, node_id, username, ssh_key=None, password=None):
78+
"""Create a user on a node
79+
80+
Args:
81+
id (:obj:`str`): id of the cluster to create the user on.
82+
node_id (:obj:`str`): id of the node in the cluster to create the user on.
83+
username (:obj:`str`): name of the user to create.
84+
ssh_key (:obj:`str`, optional): ssh public key to create the user with, must use ssh_key or password.
85+
password (:obj:`str`, optional): password for the user, must use ssh_key or password.
86+
87+
Returns:
88+
:obj:`None`
89+
"""
90+
return create_user_on_node.create_user_on_node(self, id, node_id, username, ssh_key, password)
91+
92+
#TODO: remove nodes as param
93+
def create_user_on_cluster(self, id, nodes, username, ssh_pub_key=None, password=None):
94+
"""Create a user on every node in the cluster
95+
96+
Args:
97+
username (:obj:`str`): name of the user to create.
98+
id (:obj:`str`): id of the cluster to create the user on.
99+
nodes (:obj:`List[ComputeNode]`): list of nodes to create the user on
100+
ssh_key (:obj:`str`, optional): ssh public key to create the user with, must use ssh_key or password. Defaults to None.
101+
password (:obj:`str`, optional): password for the user, must use ssh_key or password. Defaults to None.
102+
103+
Returns:
104+
:obj:`None`
105+
"""
106+
return create_user_on_cluster.create_user_on_cluster(self, id, nodes, username, ssh_pub_key, password)
107+
108+
def generate_user_on_node(self, id, node_id):
109+
"""Create a user with an autogenerated username and ssh_key on the given node.
110+
111+
Args:
112+
id (:obj:`str`): the id of the cluster to generate the user on.
113+
node_id (:obj:`str`): the id of the node in the cluster to generate the user on.
114+
115+
Returns:
116+
:obj:`tuple`: A tuple of the form (username: :obj:`str`, ssh_key: :obj:`Cryptodome.PublicKey.RSA`)
117+
"""
118+
return generate_user_on_node.generate_user_on_node(self, id, node_id)
119+
120+
#TODO: remove nodes as param
121+
def generate_user_on_cluster(self, id, nodes):
122+
"""Create a user with an autogenerated username and ssh_key on the cluster
123+
124+
Args:
125+
id (:obj:`str`): the id of the cluster to generate the user on.
126+
node_id (:obj:`str`): the id of the node in the cluster to generate the user on.
127+
128+
Returns:
129+
:obj:`tuple`: A tuple of the form (username: :obj:`str`, ssh_key: :obj:`Cryptodome.PublicKey.RSA`)
130+
"""
131+
return generate_user_on_cluster.generate_user_on_cluster(self, id, nodes)
132+
133+
def delete_user_on_node(self, id: str, node_id: str, username: str) -> str:
134+
"""Delete a user on a node
135+
136+
Args:
137+
id (:obj:`str`): the id of the cluster to delete the user on.
138+
node_id (:obj:`str`): the id of the node in the cluster to delete the user on.
139+
username (:obj:`str`): the name of the user to delete.
140+
141+
Returns:
142+
:obj:`None`
143+
"""
144+
return delete_user_on_node.delete_user(self, id, node_id, username)
145+
146+
#TODO: remove nodes as param
147+
def delete_user_on_cluster(self, username, id, nodes):
148+
"""Delete a user on every node in the cluster
149+
150+
Args:
151+
id (:obj:`str`): the id of the cluster to delete the user on.
152+
node_id (:obj:`str`): the id of the node in the cluster to delete the user on.
153+
username (:obj:`str`): the name of the user to delete.
154+
155+
Returns:
156+
:obj:`None`
157+
"""
158+
return delete_user_on_cluster.delete_user_on_cluster(self, username, id, nodes)
159+
160+
def node_run(self, id, node_id, command, internal, container_name=None, timeout=None):
161+
"""Run a bash command on the given node
162+
163+
Args:
164+
id (:obj:`str`): the id of the cluster to run the command on.
165+
node_id (:obj:`str`): the id of the node in the cluster to run the command on.
166+
command (:obj:`str`): the bash command to execute on the node.
167+
internal (:obj:`bool`): if True, this will connect to the node using its internal IP.
168+
Only use this if running within the same VNET as the cluster. Defaults to False.
169+
container_name=None (:obj:`str`, optional): the name of the container to run the command in.
170+
If None, the command will run on the host VM. Defaults to None.
171+
timeout=None (:obj:`str`, optional): The timeout in seconds for establishing a connection to the node.
172+
Defaults to None.
173+
174+
Returns:
175+
:obj:`aztk.models.NodeOutput`: object containing the output of the run command
176+
"""
177+
return node_run.node_run(self, id, node_id, command, internal, container_name, timeout)
178+
179+
def get_remote_login_settings(self, id: str, node_id: str):
180+
"""Get the remote login information for a node in a cluster
181+
182+
Args:
183+
id (:obj:`str`): the id of the cluster the node is in
184+
node_id (:obj:`str`): the id of the node in the cluster
185+
186+
Returns:
187+
:obj:`aztk.models.RemoteLogin`: Object that contains the ip address and port combination to login to a node
188+
"""
189+
return get_remote_login_settings.get_remote_login_settings(self, id, node_id)
190+
191+
def run(self, id, command, internal, container_name=None, timeout=None):
192+
"""Run a bash command on every node in the cluster
193+
194+
Args:
195+
id (:obj:`str`): the id of the cluster to run the command on.
196+
command (:obj:`str`): the bash command to execute on the node.
197+
internal (:obj:`bool`): if true, this will connect to the node using its internal IP.
198+
Only use this if running within the same VNET as the cluster. Defaults to False.
199+
container_name=None (:obj:`str`, optional): the name of the container to run the command in.
200+
If None, the command will run on the host VM. Defaults to None.
201+
timeout=None (:obj:`str`, optional): The timeout in seconds for establishing a connection to the node.
202+
Defaults to None.
203+
204+
Returns:
205+
:obj:`List[azkt.models.NodeOutput]`: list of NodeOutput objects containing the output of the run command
206+
"""
207+
return run.cluster_run(self, id, command, internal, container_name, timeout)
208+
209+
def get_application_log(self, id: str, application_name: str, tail=False, current_bytes: int = 0):
210+
"""Get the log for a running or completed application
211+
212+
Args:
213+
id (:obj:`str`): the id of the cluster to run the command on.
214+
application_name (:obj:`str`): str
215+
tail (:obj:`bool`, optional): If True, get the remaining bytes after current_bytes. Otherwise, the whole log will be retrieved.
216+
Only use this if streaming the log as it is being written. Defaults to False.
217+
current_bytes (:obj:`int`): Specifies the last seen byte, so only the bytes after current_bytes are retrieved.
218+
Only useful is streaming the log as it is being written. Only used if tail is True.
219+
220+
Returns:
221+
:obj:`aztk.models.ApplicationLog`: a model representing the output of the application.
222+
"""
223+
return get_application_log.get_application_log(self, id, application_name, tail, current_bytes)

aztk/client/base/helpers/__init__.py

Whitespace-only changes.
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import concurrent.futures
2+
3+
4+
#TODO: remove nodes param
5+
def create_user_on_cluster(base_operations, id, nodes, username, ssh_pub_key=None, password=None):
6+
with concurrent.futures.ThreadPoolExecutor() as executor:
7+
futures = {
8+
executor.submit(base_operations.create_user_on_node, id, node.id, username, ssh_pub_key, password): node
9+
for node in nodes
10+
}
11+
concurrent.futures.wait(futures)
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
from datetime import datetime, timedelta, timezone
2+
3+
import azure.batch.models as batch_models
4+
import azure.batch.models.batch_error as batch_error
5+
6+
from aztk import models
7+
from aztk.utils import get_ssh_key
8+
9+
10+
def __create_user(self, id: str, node_id: str, username: str, password: str = None, ssh_key: str = None) -> str:
11+
"""
12+
Create a pool user
13+
:param pool: the pool to add the user to
14+
:param node: the node to add the user to
15+
:param username: username of the user to add
16+
:param password: password of the user to add
17+
:param ssh_key: ssh_key of the user to add
18+
"""
19+
# Create new ssh user for the given node
20+
self.batch_client.compute_node.add_user(
21+
id,
22+
node_id,
23+
batch_models.ComputeNodeUser(
24+
name=username,
25+
is_admin=True,
26+
password=password,
27+
ssh_public_key=get_ssh_key.get_user_public_key(ssh_key, self.secrets_configuration),
28+
expiry_time=datetime.now(timezone.utc) + timedelta(days=365),
29+
),
30+
)
31+
32+
33+
def create_user_on_node(base_client, id, node_id, username, ssh_key=None, password=None):
34+
try:
35+
__create_user(
36+
base_client, id=id, node_id=node_id, username=username, ssh_key=ssh_key, password=password)
37+
except batch_error.BatchErrorException as error:
38+
try:
39+
base_client.delete_user_on_node(id, node_id, username)
40+
base_client.create_user_on_node(id=id, node_id=node_id, username=username, ssh_key=ssh_key)
41+
except batch_error.BatchErrorException as error:
42+
raise error
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import concurrent.futures
2+
3+
#TODO: remove nodes param
4+
def delete_user_on_cluster(base_client, id, nodes, username):
5+
with concurrent.futures.ThreadPoolExecutor() as executor:
6+
futures = [executor.submit(base_client.delete_user_on_node, id, node.id, username) for node in nodes]
7+
concurrent.futures.wait(futures)
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
def delete_user(self, pool_id: str, node_id: str, username: str) -> str:
2+
"""
3+
Create a pool user
4+
:param pool: the pool to add the user to
5+
:param node: the node to add the user to
6+
:param username: username of the user to add
7+
"""
8+
# Delete a user on the given node
9+
self.batch_client.compute_node.delete_user(pool_id, node_id, username)

0 commit comments

Comments
 (0)