From 405fb438936dde83cc55016af4c413741e8ff775 Mon Sep 17 00:00:00 2001 From: Asher Date: Thu, 27 Apr 2023 17:33:34 -0800 Subject: [PATCH 1/2] Retry direct connection This will cover recent connections which connect directly without going through the whole setup flow. Pretty much the same logic as for listing editors but we display the errors in different ways since this all happens in a progress dialog. I tried to combine what I could in the retry. Also the SshException is misleading; it seems to wrap the real error so unwrap it otherwise it is impossible to tell what is really wrong. In particular this is causing us to retry on cancelations. --- .../gateway/CoderGatewayConnectionProvider.kt | 49 +++++++++++++++-- .../kotlin/com/coder/gateway/sdk/Retry.kt | 55 ++++++++++++++++--- .../steps/CoderLocateRemoteProjectStepView.kt | 47 +++++----------- .../messages/CoderGatewayBundle.properties | 5 ++ 4 files changed, 111 insertions(+), 45 deletions(-) diff --git a/src/main/kotlin/com/coder/gateway/CoderGatewayConnectionProvider.kt b/src/main/kotlin/com/coder/gateway/CoderGatewayConnectionProvider.kt index 07b7b961..cce4c24f 100644 --- a/src/main/kotlin/com/coder/gateway/CoderGatewayConnectionProvider.kt +++ b/src/main/kotlin/com/coder/gateway/CoderGatewayConnectionProvider.kt @@ -2,19 +2,26 @@ package com.coder.gateway +import com.coder.gateway.sdk.suspendingRetryWithExponentialBackOff import com.coder.gateway.services.CoderRecentWorkspaceConnectionsService +import com.intellij.openapi.application.ApplicationManager import com.intellij.openapi.components.service import com.intellij.openapi.diagnostic.Logger import com.intellij.openapi.rd.util.launchUnderBackgroundProgress +import com.intellij.openapi.ui.Messages import com.jetbrains.gateway.api.ConnectionRequestor import com.jetbrains.gateway.api.GatewayConnectionHandle import com.jetbrains.gateway.api.GatewayConnectionProvider import com.jetbrains.gateway.api.GatewayUI import com.jetbrains.gateway.ssh.SshDeployFlowUtil import com.jetbrains.gateway.ssh.SshMultistagePanelContext +import com.jetbrains.gateway.ssh.deploy.DeployException import com.jetbrains.rd.util.lifetime.LifetimeDefinition import kotlinx.coroutines.launch +import net.schmizz.sshj.common.SSHException +import net.schmizz.sshj.connection.ConnectionException import java.time.Duration +import java.util.concurrent.TimeoutException class CoderGatewayConnectionProvider : GatewayConnectionProvider { private val recentConnectionsService = service() @@ -24,12 +31,42 @@ class CoderGatewayConnectionProvider : GatewayConnectionProvider { // TODO: If this fails determine if it is an auth error and if so prompt // for a new token, configure the CLI, then try again. clientLifetime.launchUnderBackgroundProgress(CoderGatewayBundle.message("gateway.connector.coder.connection.provider.title"), canBeCancelled = true, isIndeterminate = true, project = null) { - val context = SshMultistagePanelContext(parameters.toHostDeployInputs()) - logger.info("Deploying and starting IDE with $context") - launch { - @Suppress("UnstableApiUsage") SshDeployFlowUtil.fullDeployCycle( - clientLifetime, context, Duration.ofMinutes(10) - ) + val context = suspendingRetryWithExponentialBackOff( + label = "connect", + logger = logger, + action = { attempt -> + logger.info("Deploying (attempt $attempt)...") + indicator.text = + if (attempt > 1) CoderGatewayBundle.message("gateway.connector.coder.connection.retry.text", attempt) + else CoderGatewayBundle.message("gateway.connector.coder.connection.loading.text") + SshMultistagePanelContext(parameters.toHostDeployInputs()) + }, + predicate = { e -> + e is ConnectionException || e is TimeoutException + || e is SSHException || e is DeployException + }, + update = { _, e, remaining, -> + if (remaining != null) { + indicator.text2 = e?.message ?: CoderGatewayBundle.message("gateway.connector.no-details") + indicator.text = CoderGatewayBundle.message("gateway.connector.coder.connection.retry-error.text", remaining) + } else { + ApplicationManager.getApplication().invokeAndWait { + Messages.showMessageDialog( + e?.message ?: CoderGatewayBundle.message("gateway.connector.no-details"), + CoderGatewayBundle.message("gateway.connector.coder.connection.error.text"), + Messages.getErrorIcon()) + } + } + }, + ) + if (context != null) { + launch { + logger.info("Deploying and starting IDE with $context") + // At this point JetBrains takes over with their own UI. + @Suppress("UnstableApiUsage") SshDeployFlowUtil.fullDeployCycle( + clientLifetime, context, Duration.ofMinutes(10) + ) + } } } diff --git a/src/main/kotlin/com/coder/gateway/sdk/Retry.kt b/src/main/kotlin/com/coder/gateway/sdk/Retry.kt index 0ae21339..c0f53b14 100644 --- a/src/main/kotlin/com/coder/gateway/sdk/Retry.kt +++ b/src/main/kotlin/com/coder/gateway/sdk/Retry.kt @@ -1,32 +1,73 @@ package com.coder.gateway.sdk +import com.intellij.openapi.diagnostic.Logger +import com.intellij.openapi.progress.ProcessCanceledException +import com.intellij.ssh.SshException import kotlinx.coroutines.delay import java.util.Random import java.util.concurrent.TimeUnit +import kotlin.coroutines.cancellation.CancellationException import kotlin.math.min +fun unwrap(ex: Exception): Throwable? { + var cause = ex.cause + while(cause?.cause != null) { + cause = cause.cause + } + return cause ?: ex +} + /** - * Similar to Intellij's except it gives you the next delay, does not do its own - * logging, updates periodically (for counting down), and runs forever. + * Similar to Intellij's except it gives you the next delay, logs differently, + * updates periodically (for counting down), runs forever, and takes a + * predicate for determining whether we should retry. + * + * The update will have a boolean to indicate whether it is the first update (so + * things like duplicate logs can be avoided). If remaining is null then no + * more retries will be attempted. + * + * If an exception related to canceling is received then return null. */ suspend fun suspendingRetryWithExponentialBackOff( initialDelayMs: Long = TimeUnit.SECONDS.toMillis(5), backOffLimitMs: Long = TimeUnit.MINUTES.toMillis(3), backOffFactor: Int = 2, backOffJitter: Double = 0.1, - update: (attempt: Int, remainingMs: Long, e: Exception) -> Unit, - action: suspend (attempt: Int) -> T -): T { + label: String, + logger: Logger, + predicate: (e: Throwable?) -> Boolean, + update: (attempt: Int, e: Throwable?, remaining: String?) -> Unit, + action: suspend (attempt: Int) -> T? +): T? { val random = Random() var delayMs = initialDelayMs for (attempt in 1..Int.MAX_VALUE) { try { return action(attempt) } - catch (e: Exception) { + catch (originalEx: Exception) { + // SshException can happen due to anything from a timeout to being + // canceled so unwrap to find out. + val unwrappedEx = if (originalEx is SshException) unwrap(originalEx) else originalEx + when (unwrappedEx) { + is InterruptedException, + is CancellationException, + is ProcessCanceledException -> { + logger.info("Retrying $label canceled due to ${unwrappedEx.javaClass}") + return null + } + } + if (!predicate(unwrappedEx)) { + logger.error("Failed to $label (attempt $attempt; will not retry)", originalEx) + update(attempt, unwrappedEx, null) + return null + } + logger.error("Failed to $label (attempt $attempt; will retry in $delayMs ms)", originalEx) var remainingMs = delayMs while (remainingMs > 0) { - update(attempt, remainingMs, e) + val remainingS = TimeUnit.MILLISECONDS.toSeconds(remainingMs) + val remaining = if (remainingS < 1) "now" else "in $remainingS second${if (remainingS > 1) "s" else ""}" + update(attempt, unwrappedEx, remaining) val next = min(remainingMs, TimeUnit.SECONDS.toMillis(1)) remainingMs -= next delay(next) diff --git a/src/main/kotlin/com/coder/gateway/views/steps/CoderLocateRemoteProjectStepView.kt b/src/main/kotlin/com/coder/gateway/views/steps/CoderLocateRemoteProjectStepView.kt index 8290a87e..394c722d 100644 --- a/src/main/kotlin/com/coder/gateway/views/steps/CoderLocateRemoteProjectStepView.kt +++ b/src/main/kotlin/com/coder/gateway/views/steps/CoderLocateRemoteProjectStepView.kt @@ -68,7 +68,6 @@ import net.schmizz.sshj.connection.ConnectionException import java.awt.Component import java.awt.FlowLayout import java.util.Locale -import java.util.concurrent.TimeUnit import java.util.concurrent.TimeoutException import javax.swing.ComboBoxModel import javax.swing.DefaultComboBoxModel @@ -79,7 +78,6 @@ import javax.swing.JPanel import javax.swing.ListCellRenderer import javax.swing.SwingConstants import javax.swing.event.DocumentEvent -import kotlin.coroutines.cancellation.CancellationException class CoderLocateRemoteProjectStepView(private val setNextButtonEnabled: (Boolean) -> Unit) : CoderWorkspacesWizardStep, Disposable { private val cs = CoroutineScope(Dispatchers.Main) @@ -179,6 +177,8 @@ class CoderLocateRemoteProjectStepView(private val setNextButtonEnabled: (Boolea ideResolvingJob = cs.launch { val ides = suspendingRetryWithExponentialBackOff( + label = "retrieve IDEs", + logger = logger, action={ attempt -> logger.info("Deploying to ${selectedWorkspace.name} on $deploymentURL (attempt $attempt)") // Reset text in the select dropdown. @@ -187,39 +187,22 @@ class CoderLocateRemoteProjectStepView(private val setNextButtonEnabled: (Boolea if (attempt > 1) CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.retry.text", attempt) else CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.loading.text")) } - try { - val executor = createRemoteExecutor(CoderCLIManager.getHostName(deploymentURL, selectedWorkspace)) - if (ComponentValidator.getInstance(tfProject).isEmpty) { - installRemotePathValidator(executor) - } - retrieveIDEs(executor, selectedWorkspace) - } catch (e: Exception) { - when(e) { - is InterruptedException -> Unit - is CancellationException -> Unit - // Throw to retry these. The main one is - // DeployException which fires when dd times out. - is ConnectionException, is TimeoutException, - is SSHException, is DeployException -> throw e - else -> { - withContext(Dispatchers.Main) { - logger.error("Failed to retrieve IDEs (attempt $attempt)", e) - cbIDEComment.foreground = UIUtil.getErrorForeground() - cbIDEComment.text = e.message ?: "The error did not provide any further details" - cbIDE.renderer = IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.error.text"), UIUtil.getBalloonErrorIcon()) - } - } - } - null + val executor = createRemoteExecutor(CoderCLIManager.getHostName(deploymentURL, selectedWorkspace)) + if (ComponentValidator.getInstance(tfProject).isEmpty) { + installRemotePathValidator(executor) } + retrieveIDEs(executor, selectedWorkspace) + }, + predicate = { e -> + e is ConnectionException || e is TimeoutException + || e is SSHException || e is DeployException }, - update = { attempt, retryMs, e -> - logger.error("Failed to retrieve IDEs (attempt $attempt; will retry in $retryMs ms)", e) + update = { _, e, remaining -> cbIDEComment.foreground = UIUtil.getErrorForeground() - cbIDEComment.text = e.message ?: "The error did not provide any further details" - val delayS = TimeUnit.MILLISECONDS.toSeconds(retryMs) - val delay = if (delayS < 1) "now" else "in $delayS second${if (delayS > 1) "s" else ""}" - cbIDE.renderer = IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.retry-error.text", delay)) + cbIDEComment.text = e?.message ?: CoderGatewayBundle.message("gateway.connector.no-details") + cbIDE.renderer = + if (remaining != null) IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.retry-error.text", remaining)) + else IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.error.text"), UIUtil.getBalloonErrorIcon()) }, ) if (ides != null) { diff --git a/src/main/resources/messages/CoderGatewayBundle.properties b/src/main/resources/messages/CoderGatewayBundle.properties index d8295e5c..32c2090b 100644 --- a/src/main/resources/messages/CoderGatewayBundle.properties +++ b/src/main/resources/messages/CoderGatewayBundle.properties @@ -42,6 +42,10 @@ gateway.connector.recentconnections.new.wizard.button.tooltip=Open a new Coder W gateway.connector.recentconnections.remove.button.tooltip=Remove from Recent Connections gateway.connector.recentconnections.terminal.button.tooltip=Open SSH Web Terminal gateway.connector.coder.connection.provider.title=Connecting to Coder workspace... +gateway.connector.coder.connection.loading.text=Connecting... +gateway.connector.coder.connection.retry.text=Connecting (attempt {0})... +gateway.connector.coder.connection.retry-error.text=Failed to connect...retrying {0} +gateway.connector.coder.connection.error.text=Failed to connect gateway.connector.settings.binary-source.title=CLI source: gateway.connector.settings.binary-source.comment=Used to download the Coder \ CLI which is necessary to make SSH connections. The If-None-Matched header \ @@ -54,3 +58,4 @@ gateway.connector.settings.binary-destination.comment=Directories are created \ here that store the CLI and credentials for each domain to which the plugin \ connects. \ Defaults to {0}. +gateway.connector.no-details="The error did not provide any further details" From 1e7855caa508a5acf8bb860d5eb8bca7c604e5aa Mon Sep 17 00:00:00 2001 From: Asher Date: Thu, 27 Apr 2023 17:40:38 -0800 Subject: [PATCH 2/2] Provide better error when dd times out --- src/main/kotlin/com/coder/gateway/sdk/Retry.kt | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/main/kotlin/com/coder/gateway/sdk/Retry.kt b/src/main/kotlin/com/coder/gateway/sdk/Retry.kt index c0f53b14..4f2ef02a 100644 --- a/src/main/kotlin/com/coder/gateway/sdk/Retry.kt +++ b/src/main/kotlin/com/coder/gateway/sdk/Retry.kt @@ -3,6 +3,7 @@ package com.coder.gateway.sdk import com.intellij.openapi.diagnostic.Logger import com.intellij.openapi.progress.ProcessCanceledException import com.intellij.ssh.SshException +import com.jetbrains.gateway.ssh.deploy.DeployException import kotlinx.coroutines.delay import java.util.Random import java.util.concurrent.TimeUnit @@ -67,7 +68,15 @@ suspend fun suspendingRetryWithExponentialBackOff( while (remainingMs > 0) { val remainingS = TimeUnit.MILLISECONDS.toSeconds(remainingMs) val remaining = if (remainingS < 1) "now" else "in $remainingS second${if (remainingS > 1) "s" else ""}" - update(attempt, unwrappedEx, remaining) + // When the worker upload times out Gateway just says it failed. + // Even the root cause (IllegalStateException) is useless. The + // error also includes a very long useless tmp path. With all + // that in mind, provide a better error. + val mungedEx = + if (unwrappedEx is DeployException && unwrappedEx.message.contains("Worker binary deploy failed")) + DeployException("Failed to upload worker binary...it may have timed out", unwrappedEx) + else unwrappedEx + update(attempt, mungedEx, remaining) val next = min(remainingMs, TimeUnit.SECONDS.toMillis(1)) remainingMs -= next delay(next)