diff --git a/src/main/kotlin/com/coder/gateway/CoderGatewayConnectionProvider.kt b/src/main/kotlin/com/coder/gateway/CoderGatewayConnectionProvider.kt index 07b7b961..cce4c24f 100644 --- a/src/main/kotlin/com/coder/gateway/CoderGatewayConnectionProvider.kt +++ b/src/main/kotlin/com/coder/gateway/CoderGatewayConnectionProvider.kt @@ -2,19 +2,26 @@ package com.coder.gateway +import com.coder.gateway.sdk.suspendingRetryWithExponentialBackOff import com.coder.gateway.services.CoderRecentWorkspaceConnectionsService +import com.intellij.openapi.application.ApplicationManager import com.intellij.openapi.components.service import com.intellij.openapi.diagnostic.Logger import com.intellij.openapi.rd.util.launchUnderBackgroundProgress +import com.intellij.openapi.ui.Messages import com.jetbrains.gateway.api.ConnectionRequestor import com.jetbrains.gateway.api.GatewayConnectionHandle import com.jetbrains.gateway.api.GatewayConnectionProvider import com.jetbrains.gateway.api.GatewayUI import com.jetbrains.gateway.ssh.SshDeployFlowUtil import com.jetbrains.gateway.ssh.SshMultistagePanelContext +import com.jetbrains.gateway.ssh.deploy.DeployException import com.jetbrains.rd.util.lifetime.LifetimeDefinition import kotlinx.coroutines.launch +import net.schmizz.sshj.common.SSHException +import net.schmizz.sshj.connection.ConnectionException import java.time.Duration +import java.util.concurrent.TimeoutException class CoderGatewayConnectionProvider : GatewayConnectionProvider { private val recentConnectionsService = service() @@ -24,12 +31,42 @@ class CoderGatewayConnectionProvider : GatewayConnectionProvider { // TODO: If this fails determine if it is an auth error and if so prompt // for a new token, configure the CLI, then try again. clientLifetime.launchUnderBackgroundProgress(CoderGatewayBundle.message("gateway.connector.coder.connection.provider.title"), canBeCancelled = true, isIndeterminate = true, project = null) { - val context = SshMultistagePanelContext(parameters.toHostDeployInputs()) - logger.info("Deploying and starting IDE with $context") - launch { - @Suppress("UnstableApiUsage") SshDeployFlowUtil.fullDeployCycle( - clientLifetime, context, Duration.ofMinutes(10) - ) + val context = suspendingRetryWithExponentialBackOff( + label = "connect", + logger = logger, + action = { attempt -> + logger.info("Deploying (attempt $attempt)...") + indicator.text = + if (attempt > 1) CoderGatewayBundle.message("gateway.connector.coder.connection.retry.text", attempt) + else CoderGatewayBundle.message("gateway.connector.coder.connection.loading.text") + SshMultistagePanelContext(parameters.toHostDeployInputs()) + }, + predicate = { e -> + e is ConnectionException || e is TimeoutException + || e is SSHException || e is DeployException + }, + update = { _, e, remaining, -> + if (remaining != null) { + indicator.text2 = e?.message ?: CoderGatewayBundle.message("gateway.connector.no-details") + indicator.text = CoderGatewayBundle.message("gateway.connector.coder.connection.retry-error.text", remaining) + } else { + ApplicationManager.getApplication().invokeAndWait { + Messages.showMessageDialog( + e?.message ?: CoderGatewayBundle.message("gateway.connector.no-details"), + CoderGatewayBundle.message("gateway.connector.coder.connection.error.text"), + Messages.getErrorIcon()) + } + } + }, + ) + if (context != null) { + launch { + logger.info("Deploying and starting IDE with $context") + // At this point JetBrains takes over with their own UI. + @Suppress("UnstableApiUsage") SshDeployFlowUtil.fullDeployCycle( + clientLifetime, context, Duration.ofMinutes(10) + ) + } } } diff --git a/src/main/kotlin/com/coder/gateway/sdk/Retry.kt b/src/main/kotlin/com/coder/gateway/sdk/Retry.kt index 0ae21339..4f2ef02a 100644 --- a/src/main/kotlin/com/coder/gateway/sdk/Retry.kt +++ b/src/main/kotlin/com/coder/gateway/sdk/Retry.kt @@ -1,32 +1,82 @@ package com.coder.gateway.sdk +import com.intellij.openapi.diagnostic.Logger +import com.intellij.openapi.progress.ProcessCanceledException +import com.intellij.ssh.SshException +import com.jetbrains.gateway.ssh.deploy.DeployException import kotlinx.coroutines.delay import java.util.Random import java.util.concurrent.TimeUnit +import kotlin.coroutines.cancellation.CancellationException import kotlin.math.min +fun unwrap(ex: Exception): Throwable? { + var cause = ex.cause + while(cause?.cause != null) { + cause = cause.cause + } + return cause ?: ex +} + /** - * Similar to Intellij's except it gives you the next delay, does not do its own - * logging, updates periodically (for counting down), and runs forever. + * Similar to Intellij's except it gives you the next delay, logs differently, + * updates periodically (for counting down), runs forever, and takes a + * predicate for determining whether we should retry. + * + * The update will have a boolean to indicate whether it is the first update (so + * things like duplicate logs can be avoided). If remaining is null then no + * more retries will be attempted. + * + * If an exception related to canceling is received then return null. */ suspend fun suspendingRetryWithExponentialBackOff( initialDelayMs: Long = TimeUnit.SECONDS.toMillis(5), backOffLimitMs: Long = TimeUnit.MINUTES.toMillis(3), backOffFactor: Int = 2, backOffJitter: Double = 0.1, - update: (attempt: Int, remainingMs: Long, e: Exception) -> Unit, - action: suspend (attempt: Int) -> T -): T { + label: String, + logger: Logger, + predicate: (e: Throwable?) -> Boolean, + update: (attempt: Int, e: Throwable?, remaining: String?) -> Unit, + action: suspend (attempt: Int) -> T? +): T? { val random = Random() var delayMs = initialDelayMs for (attempt in 1..Int.MAX_VALUE) { try { return action(attempt) } - catch (e: Exception) { + catch (originalEx: Exception) { + // SshException can happen due to anything from a timeout to being + // canceled so unwrap to find out. + val unwrappedEx = if (originalEx is SshException) unwrap(originalEx) else originalEx + when (unwrappedEx) { + is InterruptedException, + is CancellationException, + is ProcessCanceledException -> { + logger.info("Retrying $label canceled due to ${unwrappedEx.javaClass}") + return null + } + } + if (!predicate(unwrappedEx)) { + logger.error("Failed to $label (attempt $attempt; will not retry)", originalEx) + update(attempt, unwrappedEx, null) + return null + } + logger.error("Failed to $label (attempt $attempt; will retry in $delayMs ms)", originalEx) var remainingMs = delayMs while (remainingMs > 0) { - update(attempt, remainingMs, e) + val remainingS = TimeUnit.MILLISECONDS.toSeconds(remainingMs) + val remaining = if (remainingS < 1) "now" else "in $remainingS second${if (remainingS > 1) "s" else ""}" + // When the worker upload times out Gateway just says it failed. + // Even the root cause (IllegalStateException) is useless. The + // error also includes a very long useless tmp path. With all + // that in mind, provide a better error. + val mungedEx = + if (unwrappedEx is DeployException && unwrappedEx.message.contains("Worker binary deploy failed")) + DeployException("Failed to upload worker binary...it may have timed out", unwrappedEx) + else unwrappedEx + update(attempt, mungedEx, remaining) val next = min(remainingMs, TimeUnit.SECONDS.toMillis(1)) remainingMs -= next delay(next) diff --git a/src/main/kotlin/com/coder/gateway/views/steps/CoderLocateRemoteProjectStepView.kt b/src/main/kotlin/com/coder/gateway/views/steps/CoderLocateRemoteProjectStepView.kt index 8290a87e..394c722d 100644 --- a/src/main/kotlin/com/coder/gateway/views/steps/CoderLocateRemoteProjectStepView.kt +++ b/src/main/kotlin/com/coder/gateway/views/steps/CoderLocateRemoteProjectStepView.kt @@ -68,7 +68,6 @@ import net.schmizz.sshj.connection.ConnectionException import java.awt.Component import java.awt.FlowLayout import java.util.Locale -import java.util.concurrent.TimeUnit import java.util.concurrent.TimeoutException import javax.swing.ComboBoxModel import javax.swing.DefaultComboBoxModel @@ -79,7 +78,6 @@ import javax.swing.JPanel import javax.swing.ListCellRenderer import javax.swing.SwingConstants import javax.swing.event.DocumentEvent -import kotlin.coroutines.cancellation.CancellationException class CoderLocateRemoteProjectStepView(private val setNextButtonEnabled: (Boolean) -> Unit) : CoderWorkspacesWizardStep, Disposable { private val cs = CoroutineScope(Dispatchers.Main) @@ -179,6 +177,8 @@ class CoderLocateRemoteProjectStepView(private val setNextButtonEnabled: (Boolea ideResolvingJob = cs.launch { val ides = suspendingRetryWithExponentialBackOff( + label = "retrieve IDEs", + logger = logger, action={ attempt -> logger.info("Deploying to ${selectedWorkspace.name} on $deploymentURL (attempt $attempt)") // Reset text in the select dropdown. @@ -187,39 +187,22 @@ class CoderLocateRemoteProjectStepView(private val setNextButtonEnabled: (Boolea if (attempt > 1) CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.retry.text", attempt) else CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.loading.text")) } - try { - val executor = createRemoteExecutor(CoderCLIManager.getHostName(deploymentURL, selectedWorkspace)) - if (ComponentValidator.getInstance(tfProject).isEmpty) { - installRemotePathValidator(executor) - } - retrieveIDEs(executor, selectedWorkspace) - } catch (e: Exception) { - when(e) { - is InterruptedException -> Unit - is CancellationException -> Unit - // Throw to retry these. The main one is - // DeployException which fires when dd times out. - is ConnectionException, is TimeoutException, - is SSHException, is DeployException -> throw e - else -> { - withContext(Dispatchers.Main) { - logger.error("Failed to retrieve IDEs (attempt $attempt)", e) - cbIDEComment.foreground = UIUtil.getErrorForeground() - cbIDEComment.text = e.message ?: "The error did not provide any further details" - cbIDE.renderer = IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.error.text"), UIUtil.getBalloonErrorIcon()) - } - } - } - null + val executor = createRemoteExecutor(CoderCLIManager.getHostName(deploymentURL, selectedWorkspace)) + if (ComponentValidator.getInstance(tfProject).isEmpty) { + installRemotePathValidator(executor) } + retrieveIDEs(executor, selectedWorkspace) + }, + predicate = { e -> + e is ConnectionException || e is TimeoutException + || e is SSHException || e is DeployException }, - update = { attempt, retryMs, e -> - logger.error("Failed to retrieve IDEs (attempt $attempt; will retry in $retryMs ms)", e) + update = { _, e, remaining -> cbIDEComment.foreground = UIUtil.getErrorForeground() - cbIDEComment.text = e.message ?: "The error did not provide any further details" - val delayS = TimeUnit.MILLISECONDS.toSeconds(retryMs) - val delay = if (delayS < 1) "now" else "in $delayS second${if (delayS > 1) "s" else ""}" - cbIDE.renderer = IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.retry-error.text", delay)) + cbIDEComment.text = e?.message ?: CoderGatewayBundle.message("gateway.connector.no-details") + cbIDE.renderer = + if (remaining != null) IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.retry-error.text", remaining)) + else IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.error.text"), UIUtil.getBalloonErrorIcon()) }, ) if (ides != null) { diff --git a/src/main/resources/messages/CoderGatewayBundle.properties b/src/main/resources/messages/CoderGatewayBundle.properties index d8295e5c..32c2090b 100644 --- a/src/main/resources/messages/CoderGatewayBundle.properties +++ b/src/main/resources/messages/CoderGatewayBundle.properties @@ -42,6 +42,10 @@ gateway.connector.recentconnections.new.wizard.button.tooltip=Open a new Coder W gateway.connector.recentconnections.remove.button.tooltip=Remove from Recent Connections gateway.connector.recentconnections.terminal.button.tooltip=Open SSH Web Terminal gateway.connector.coder.connection.provider.title=Connecting to Coder workspace... +gateway.connector.coder.connection.loading.text=Connecting... +gateway.connector.coder.connection.retry.text=Connecting (attempt {0})... +gateway.connector.coder.connection.retry-error.text=Failed to connect...retrying {0} +gateway.connector.coder.connection.error.text=Failed to connect gateway.connector.settings.binary-source.title=CLI source: gateway.connector.settings.binary-source.comment=Used to download the Coder \ CLI which is necessary to make SSH connections. The If-None-Matched header \ @@ -54,3 +58,4 @@ gateway.connector.settings.binary-destination.comment=Directories are created \ here that store the CLI and credentials for each domain to which the plugin \ connects. \ Defaults to {0}. +gateway.connector.no-details="The error did not provide any further details"