From d1d3e91a8c5174d2548a75213a5aaad7aaa85b8b Mon Sep 17 00:00:00 2001 From: Asher Date: Mon, 1 May 2023 15:45:23 -0800 Subject: [PATCH] Refactor retry --- .../gateway/CoderGatewayConnectionProvider.kt | 66 +++++++++-------- .../kotlin/com/coder/gateway/sdk/Retry.kt | 63 ++++++++-------- .../steps/CoderLocateRemoteProjectStepView.kt | 71 ++++++++++--------- .../messages/CoderGatewayBundle.properties | 16 ++--- 4 files changed, 119 insertions(+), 97 deletions(-) diff --git a/src/main/kotlin/com/coder/gateway/CoderGatewayConnectionProvider.kt b/src/main/kotlin/com/coder/gateway/CoderGatewayConnectionProvider.kt index 9d254ed6..cd625208 100644 --- a/src/main/kotlin/com/coder/gateway/CoderGatewayConnectionProvider.kt +++ b/src/main/kotlin/com/coder/gateway/CoderGatewayConnectionProvider.kt @@ -3,6 +3,7 @@ package com.coder.gateway import com.coder.gateway.sdk.humanizeDuration +import com.coder.gateway.sdk.isCancellation import com.coder.gateway.sdk.isWorkerTimeout import com.coder.gateway.sdk.suspendingRetryWithExponentialBackOff import com.coder.gateway.services.CoderRecentWorkspaceConnectionsService @@ -33,37 +34,32 @@ class CoderGatewayConnectionProvider : GatewayConnectionProvider { // TODO: If this fails determine if it is an auth error and if so prompt // for a new token, configure the CLI, then try again. clientLifetime.launchUnderBackgroundProgress(CoderGatewayBundle.message("gateway.connector.coder.connection.provider.title"), canBeCancelled = true, isIndeterminate = true, project = null) { - val context = suspendingRetryWithExponentialBackOff( - label = "connect", - logger = logger, - action = { attempt -> - logger.info("Deploying (attempt $attempt)...") - indicator.text = - if (attempt > 1) CoderGatewayBundle.message("gateway.connector.coder.connection.retry.text", attempt) - else CoderGatewayBundle.message("gateway.connector.coder.connection.loading.text") - SshMultistagePanelContext(parameters.toHostDeployInputs()) - }, - predicate = { e -> - e is ConnectionException || e is TimeoutException - || e is SSHException || e is DeployException - }, - update = { _, e, remainingMs -> - if (remainingMs != null) { + try { + indicator.text = CoderGatewayBundle.message("gateway.connector.coder.connecting") + val context = suspendingRetryWithExponentialBackOff( + action = { attempt -> + logger.info("Connecting... (attempt $attempt") + if (attempt > 1) { + // indicator.text is the text above the progress bar. + indicator.text = CoderGatewayBundle.message("gateway.connector.coder.connecting.retry", attempt) + } + SshMultistagePanelContext(parameters.toHostDeployInputs()) + }, + retryIf = { + it is ConnectionException || it is TimeoutException + || it is SSHException || it is DeployException + }, + onException = { attempt, nextMs, e -> + logger.error("Failed to connect (attempt $attempt; will retry in $nextMs ms)") + // indicator.text2 is the text below the progress bar. indicator.text2 = if (isWorkerTimeout(e)) "Failed to upload worker binary...it may have timed out" else e.message ?: CoderGatewayBundle.message("gateway.connector.no-details") - indicator.text = CoderGatewayBundle.message("gateway.connector.coder.connection.retry-error.text", humanizeDuration(remainingMs)) - } else { - ApplicationManager.getApplication().invokeAndWait { - Messages.showMessageDialog( - e.message ?: CoderGatewayBundle.message("gateway.connector.no-details"), - CoderGatewayBundle.message("gateway.connector.coder.connection.error.text"), - Messages.getErrorIcon()) - } - } - }, - ) - if (context != null) { + }, + onCountdown = { remainingMs -> + indicator.text = CoderGatewayBundle.message("gateway.connector.coder.connecting.failed.retry", humanizeDuration(remainingMs)) + }, + ) launch { logger.info("Deploying and starting IDE with $context") // At this point JetBrains takes over with their own UI. @@ -71,6 +67,20 @@ class CoderGatewayConnectionProvider : GatewayConnectionProvider { clientLifetime, context, Duration.ofMinutes(10) ) } + } catch (e: Exception) { + if (isCancellation(e)) { + logger.info("Connection canceled due to ${e.javaClass}") + } else { + logger.info("Failed to connect (will not retry)", e) + // The dialog will close once we return so write the error + // out into a new dialog. + ApplicationManager.getApplication().invokeAndWait { + Messages.showMessageDialog( + e.message ?: CoderGatewayBundle.message("gateway.connector.no-details"), + CoderGatewayBundle.message("gateway.connector.coder.connection.failed"), + Messages.getErrorIcon()) + } + } } } diff --git a/src/main/kotlin/com/coder/gateway/sdk/Retry.kt b/src/main/kotlin/com/coder/gateway/sdk/Retry.kt index 213f23c8..51d4c04c 100644 --- a/src/main/kotlin/com/coder/gateway/sdk/Retry.kt +++ b/src/main/kotlin/com/coder/gateway/sdk/Retry.kt @@ -1,6 +1,5 @@ package com.coder.gateway.sdk -import com.intellij.openapi.diagnostic.Logger import com.intellij.openapi.progress.ProcessCanceledException import com.intellij.ssh.SshException import com.jetbrains.gateway.ssh.deploy.DeployException @@ -19,28 +18,35 @@ fun unwrap(ex: Exception): Throwable { } /** - * Similar to Intellij's except it gives you the next delay, logs differently, - * updates periodically (for counting down), runs forever, takes a predicate for - * determining whether we should retry, and has some special handling for - * exceptions to provide the true cause or better messages. + * Similar to Intellij's except it adds two new arguments: onCountdown (for + * displaying the time until the next try) and retryIf (to limit which + * exceptions can be retried). * - * The update will have a boolean to indicate whether it is the first update (so - * things like duplicate logs can be avoided). If remaining is null then no - * more retries will be attempted. + * Exceptions that cannot be retried will be thrown. * - * If an exception related to canceling is received then return null. + * onException and onCountdown will be called immediately on retryable failures. + * onCountdown will also be called every second until the next try with the time + * left until that next try (the last interval might be less than one second if + * the total delay is not divisible by one second). + * + * Some other differences: + * - onException gives you the time until the next try (intended to be logged + * with the error). + * - Infinite tries. + * - SshException is unwrapped. + * + * It is otherwise identical. */ suspend fun suspendingRetryWithExponentialBackOff( initialDelayMs: Long = TimeUnit.SECONDS.toMillis(5), backOffLimitMs: Long = TimeUnit.MINUTES.toMillis(3), backOffFactor: Int = 2, backOffJitter: Double = 0.1, - label: String, - logger: Logger, - predicate: (e: Throwable) -> Boolean, - update: (attempt: Int, e: Throwable, remaining: Long?) -> Unit, - action: suspend (attempt: Int) -> T? -): T? { + retryIf: (e: Throwable) -> Boolean, + onException: (attempt: Int, nextMs: Long, e: Throwable) -> Unit, + onCountdown: (remaining: Long) -> Unit, + action: suspend (attempt: Int) -> T +): T { val random = Random() var delayMs = initialDelayMs for (attempt in 1..Int.MAX_VALUE) { @@ -51,23 +57,13 @@ suspend fun suspendingRetryWithExponentialBackOff( // SshException can happen due to anything from a timeout to being // canceled so unwrap to find out. val unwrappedEx = if (originalEx is SshException) unwrap(originalEx) else originalEx - when (unwrappedEx) { - is InterruptedException, - is CancellationException, - is ProcessCanceledException -> { - logger.info("Retrying $label canceled due to ${unwrappedEx.javaClass}") - return null - } + if (!retryIf(unwrappedEx)) { + throw unwrappedEx } - if (!predicate(unwrappedEx)) { - logger.error("Failed to $label (attempt $attempt; will not retry)", originalEx) - update(attempt, unwrappedEx, null) - return null - } - logger.error("Failed to $label (attempt $attempt; will retry in $delayMs ms)", originalEx) + onException(attempt, delayMs, unwrappedEx) var remainingMs = delayMs while (remainingMs > 0) { - update(attempt, unwrappedEx, remainingMs) + onCountdown(remainingMs) val next = min(remainingMs, TimeUnit.SECONDS.toMillis(1)) remainingMs -= next delay(next) @@ -98,3 +94,12 @@ fun humanizeDuration(durationMs: Long): String { fun isWorkerTimeout(e: Throwable): Boolean { return e is DeployException && e.message.contains("Worker binary deploy failed") } + +/** + * Return true if the exception is some kind of cancellation. + */ +fun isCancellation(e: Throwable): Boolean { + return e is InterruptedException + || e is CancellationException + || e is ProcessCanceledException +} diff --git a/src/main/kotlin/com/coder/gateway/views/steps/CoderLocateRemoteProjectStepView.kt b/src/main/kotlin/com/coder/gateway/views/steps/CoderLocateRemoteProjectStepView.kt index cb65dc69..3b209edd 100644 --- a/src/main/kotlin/com/coder/gateway/views/steps/CoderLocateRemoteProjectStepView.kt +++ b/src/main/kotlin/com/coder/gateway/views/steps/CoderLocateRemoteProjectStepView.kt @@ -9,6 +9,7 @@ import com.coder.gateway.sdk.CoderCLIManager import com.coder.gateway.sdk.CoderRestClientService import com.coder.gateway.sdk.OS import com.coder.gateway.sdk.humanizeDuration +import com.coder.gateway.sdk.isCancellation import com.coder.gateway.sdk.isWorkerTimeout import com.coder.gateway.sdk.suspendingRetryWithExponentialBackOff import com.coder.gateway.sdk.toURL @@ -162,6 +163,7 @@ class CoderLocateRemoteProjectStepView(private val setNextButtonEnabled: (Boolea // Clear contents from the last attempt if any. cbIDEComment.foreground = UIUtil.getContextHelpForeground() cbIDEComment.text = CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.ide.none.comment") + cbIDE.renderer = IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.retrieve-ides")) ideComboBoxModel.removeAllElements() setNextButtonEnabled(false) @@ -178,42 +180,47 @@ class CoderLocateRemoteProjectStepView(private val setNextButtonEnabled: (Boolea terminalLink.url = coderClient.coderURL.withPath("/@${coderClient.me.username}/${selectedWorkspace.name}/terminal").toString() ideResolvingJob = cs.launch { - val ides = suspendingRetryWithExponentialBackOff( - label = "retrieve IDEs", - logger = logger, - action={ attempt -> - logger.info("Deploying to ${selectedWorkspace.name} on $deploymentURL (attempt $attempt)") - // Reset text in the select dropdown. - withContext(Dispatchers.Main) { - cbIDE.renderer = IDECellRenderer( - if (attempt > 1) CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.retry.text", attempt) - else CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.loading.text")) - } - val executor = createRemoteExecutor(CoderCLIManager.getHostName(deploymentURL, selectedWorkspace)) - if (ComponentValidator.getInstance(tfProject).isEmpty) { - installRemotePathValidator(executor) - } - retrieveIDEs(executor, selectedWorkspace) - }, - predicate = { e -> - e is ConnectionException || e is TimeoutException - || e is SSHException || e is DeployException - }, - update = { _, e, remainingMs -> - cbIDEComment.foreground = UIUtil.getErrorForeground() - cbIDEComment.text = - if (isWorkerTimeout(e)) "Failed to upload worker binary...it may have timed out. Check the command log for more details." - else e.message ?: CoderGatewayBundle.message("gateway.connector.no-details") - cbIDE.renderer = - if (remainingMs != null) IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.retry-error.text", humanizeDuration(remainingMs))) - else IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.error.text"), UIUtil.getBalloonErrorIcon()) - }, - ) - if (ides != null) { + try { + val ides = suspendingRetryWithExponentialBackOff( + action = { attempt -> + logger.info("Retrieving IDEs...(attempt $attempt)") + if (attempt > 1) { + cbIDE.renderer = IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.retrieve.ides.retry", attempt)) + } + val executor = createRemoteExecutor(CoderCLIManager.getHostName(deploymentURL, selectedWorkspace)) + if (ComponentValidator.getInstance(tfProject).isEmpty) { + installRemotePathValidator(executor) + } + retrieveIDEs(executor, selectedWorkspace) + }, + retryIf = { + it is ConnectionException || it is TimeoutException + || it is SSHException || it is DeployException + }, + onException = { attempt, nextMs, e -> + logger.error("Failed to retrieve IDEs (attempt $attempt; will retry in $nextMs ms)") + cbIDEComment.foreground = UIUtil.getErrorForeground() + cbIDEComment.text = + if (isWorkerTimeout(e)) "Failed to upload worker binary...it may have timed out. Check the command log for more details." + else e.message ?: CoderGatewayBundle.message("gateway.connector.no-details") + }, + onCountdown = { remainingMs -> + cbIDE.renderer = IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.retrieve-ides.failed.retry", humanizeDuration(remainingMs))) + }, + ) withContext(Dispatchers.Main) { ideComboBoxModel.addAll(ides) cbIDE.selectedIndex = 0 } + } catch (e: Exception) { + if (isCancellation(e)) { + logger.info("Connection canceled due to ${e.javaClass}") + } else { + logger.error("Failed to retrieve IDEs (will not retry)", e) + cbIDEComment.foreground = UIUtil.getErrorForeground() + cbIDEComment.text = e.message ?: CoderGatewayBundle.message("gateway.connector.no-details") + cbIDE.renderer = IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.retrieve-ides.failed"), UIUtil.getBalloonErrorIcon()) + } } } } diff --git a/src/main/resources/messages/CoderGatewayBundle.properties b/src/main/resources/messages/CoderGatewayBundle.properties index 32c2090b..c5e7e8b0 100644 --- a/src/main/resources/messages/CoderGatewayBundle.properties +++ b/src/main/resources/messages/CoderGatewayBundle.properties @@ -28,10 +28,10 @@ gateway.connector.view.workspaces.token.comment=The last used token is shown abo gateway.connector.view.workspaces.token.rejected=This token was rejected. gateway.connector.view.workspaces.token.injected=This token was pulled from your CLI config. gateway.connector.view.workspaces.token.none=No existing token found. -gateway.connector.view.coder.remoteproject.loading.text=Retrieving products... -gateway.connector.view.coder.remoteproject.retry.text=Retrieving products (attempt {0})... -gateway.connector.view.coder.remoteproject.error.text=Failed to retrieve IDEs -gateway.connector.view.coder.remoteproject.retry-error.text=Failed to retrieve IDEs...retrying {0} +gateway.connector.view.coder.retrieve-ides=Retrieving IDEs... +gateway.connector.view.coder.retrieve.ides.retry=Retrieving IDEs (attempt {0})... +gateway.connector.view.coder.retrieve-ides.failed=Failed to retrieve IDEs +gateway.connector.view.coder.retrieve-ides.failed.retry=Failed to retrieve IDEs...retrying {0} gateway.connector.view.coder.remoteproject.next.text=Start IDE and connect gateway.connector.view.coder.remoteproject.choose.text=Choose IDE and project for workspace {0} gateway.connector.view.coder.remoteproject.ide.download.comment=This IDE will be downloaded from jetbrains.com and installed to the default path on the remote host. @@ -42,10 +42,10 @@ gateway.connector.recentconnections.new.wizard.button.tooltip=Open a new Coder W gateway.connector.recentconnections.remove.button.tooltip=Remove from Recent Connections gateway.connector.recentconnections.terminal.button.tooltip=Open SSH Web Terminal gateway.connector.coder.connection.provider.title=Connecting to Coder workspace... -gateway.connector.coder.connection.loading.text=Connecting... -gateway.connector.coder.connection.retry.text=Connecting (attempt {0})... -gateway.connector.coder.connection.retry-error.text=Failed to connect...retrying {0} -gateway.connector.coder.connection.error.text=Failed to connect +gateway.connector.coder.connecting=Connecting... +gateway.connector.coder.connecting.retry=Connecting (attempt {0})... +gateway.connector.coder.connection.failed=Failed to connect +gateway.connector.coder.connecting.failed.retry=Failed to connect...retrying {0} gateway.connector.settings.binary-source.title=CLI source: gateway.connector.settings.binary-source.comment=Used to download the Coder \ CLI which is necessary to make SSH connections. The If-None-Matched header \