Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
205 changes: 169 additions & 36 deletions app/src/remote_server/ssh_transport.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,17 +212,59 @@ impl RemoteTransport for SshTransport {
if output.status.code()
== Some(remote_server::setup::NO_HTTP_CLIENT_EXIT_CODE) =>
{
log::info!("Remote server has no curl/wget, falling back to SCP upload");
log::info!("Remote has no curl/wget, falling back to SCP upload");
scp_install_fallback(&socket_path)
.await
.map_err(Error::Other)
}
Ok(output)
if output.status.code()
== Some(remote_server::setup::DOWNLOAD_FAILED_EXIT_CODE) =>
{
log::info!(
"Remote download failed (both HTTP clients tried), \
falling back to SCP upload"
);
scp_install_fallback(&socket_path)
.await
.map_err(Error::Other)
}
Ok(output)
if output.status.code() == Some(remote_server::setup::NO_TAR_EXIT_CODE) =>
{
log::info!("Remote has no tar, falling back to direct binary upload");
scp_install_binary_direct(&socket_path)
.await
.map_err(Error::Other)
}
Ok(output) => {
let exit_code = output.status.code().unwrap_or(-1);
let stderr = String::from_utf8_lossy(&output.stderr).to_string();
// Do not attempt fallback for host-level failures that no
// alternate download strategy can fix.
if remote_server::setup::is_non_retryable_host_error(&stderr) {
return Err(Error::ScriptFailed { exit_code, stderr });
}
// SSH exit 255 means the connection itself is dead —
// no point attempting an SCP fallback.
if exit_code == 255 {
return Err(Error::ScriptFailed { exit_code, stderr });
}
Err(Error::ScriptFailed { exit_code, stderr })
}
Err(SshCommandError::TimedOut { .. }) => Err(Error::TimedOut),
// Timeout: the install script did not complete in time.
// If the timeout is likely from the download phase, the SCP
// fallback may succeed. Host-filesystem timeouts are rare;
// if the SCP fallback also times out, the error propagates.
Err(SshCommandError::TimedOut { .. }) => {
log::info!("Install script timed out, attempting SCP fallback");
scp_install_fallback(&socket_path)
.await
.map_err(|fallback_err| {
log::warn!("SCP fallback also failed after timeout: {fallback_err:#}");
Error::TimedOut
})
}
Err(e) => Err(Error::Other(e.into())),
}
})
Expand Down Expand Up @@ -315,6 +357,114 @@ impl RemoteTransport for SshTransport {
/// the remote via SCP, then re-invokes the install script with the
/// staging path baked in so the shared extraction tail runs.
async fn scp_install_fallback(socket_path: &Path) -> anyhow::Result<()> {
let (_platform, tmp_dir) = download_tarball_locally(socket_path).await?;
let temp_client_tarball_path = tmp_dir.path().join("oz.tar.gz");

let remote_tarball_path = format!(
"{}/oz-upload.tar.gz",
remote_server::setup::remote_server_dir()
);
let timeout = remote_server::setup::SCP_INSTALL_TIMEOUT;

// Upload to the remote via SCP.
log::info!("Uploading tarball to remote at {remote_tarball_path}");
remote_server::ssh::scp_upload(
socket_path,
&temp_client_tarball_path,
&remote_tarball_path,
timeout,
)
.await?;

// Run the install script with the staging path baked in.
// The script's `staging_tarball_path` variable is non-empty, so it
// skips the download and extracts from the uploaded tarball.
log::info!("Running extraction via install script with tarball at {remote_tarball_path}");

let script = remote_server::setup::install_script(Some(&remote_tarball_path));

let output = remote_server::ssh::run_ssh_script(socket_path, &script, timeout).await?;
if output.status.success() {
Ok(())
} else {
let code = output.status.code().unwrap_or(-1);
let stderr = String::from_utf8_lossy(&output.stderr);
Err(anyhow::anyhow!(
"Extraction script failed (exit {code}): {stderr}"
))
}
}

/// Direct binary upload fallback: downloads the tarball locally, extracts
/// it locally, and uploads only the resolved binary via SCP. This avoids
/// requiring `tar` on the remote host.
///
/// The remote-side steps are:
/// 1. `mkdir -p <install_dir>` (ensures the directory exists)
/// 2. SCP the binary to a staging path
/// 3. `chmod +x && mv` to the final install path
async fn scp_install_binary_direct(socket_path: &Path) -> anyhow::Result<()> {
let (_platform, tmp_dir) = download_tarball_locally(socket_path).await?;
let temp_client_tarball_path = tmp_dir.path().join("oz.tar.gz");
let timeout = remote_server::setup::SCP_INSTALL_TIMEOUT;

// Extract locally using the client machine's tar.
log::info!("Extracting tarball locally for direct binary upload");
let extract_dir = tmp_dir.path().join("extracted");
std::fs::create_dir_all(&extract_dir)
.map_err(|e| anyhow::anyhow!("Failed to create local extraction dir: {e}"))?;

let tar_output = command::r#async::Command::new("tar")
.arg("-xzf")
.arg(&temp_client_tarball_path)
.arg("-C")
.arg(&extract_dir)
.kill_on_drop(true)
.output()
.await
.map_err(|e| anyhow::anyhow!("Failed to spawn local tar: {e}"))?;
if !tar_output.status.success() {
let stderr = String::from_utf8_lossy(&tar_output.stderr);
return Err(anyhow::anyhow!(
"Local tar extraction failed (exit {:?}): {stderr}",
tar_output.status.code()
));
}

// Find the binary in the extraction directory.
let binary_path = find_oz_binary_in_dir(&extract_dir)?;

// Upload the binary directly to a staging path on the remote.
let remote_binary = remote_server::setup::remote_server_binary();
let remote_staging = format!("{remote_binary}.staging");

log::info!("Uploading binary directly to remote at {remote_staging}");
remote_server::ssh::scp_upload(socket_path, &binary_path, &remote_staging, timeout).await?;

// chmod +x and move to final location on the remote.
let finalize_cmd =
format!("chmod +x '{remote_staging}' && mv '{remote_staging}' '{remote_binary}'");
log::info!("Finalizing remote binary: {finalize_cmd}");
let output = remote_server::ssh::run_ssh_command(socket_path, &finalize_cmd, timeout).await?;
if output.status.success() {
Ok(())
} else {
let code = output.status.code().unwrap_or(-1);
let stderr = String::from_utf8_lossy(&output.stderr);
Err(anyhow::anyhow!(
"Remote finalize failed (exit {code}): {stderr}"
))
}
}

/// Downloads the remote-server tarball to a local temp directory.
/// Returns the detected remote platform and the temp directory handle.
///
/// Shared by [`scp_install_fallback`] and [`scp_install_binary_direct`]
/// to avoid duplicating the platform-detection + local-download logic.
async fn download_tarball_locally(
socket_path: &Path,
) -> anyhow::Result<(remote_server::setup::RemotePlatform, tempfile::TempDir)> {
use std::process::Stdio;

// Detect the remote platform so we can construct the correct download URL.
Expand All @@ -326,22 +476,13 @@ async fn scp_install_fallback(socket_path: &Path) -> anyhow::Result<()> {
.map_err(|e| anyhow::anyhow!("SCP fallback: {e:#}"))?;

let url = remote_server::setup::download_tarball_url(&platform);
let remote_tarball_path = format!(
"{}/oz-upload.tar.gz",
remote_server::setup::remote_server_dir()
);
let timeout = remote_server::setup::SCP_INSTALL_TIMEOUT;

// 1. Download the tarball locally into a temp directory.
let tmp_dir =
tempfile::tempdir().map_err(|e| anyhow::anyhow!("Failed to create local temp dir: {e}"))?;
let temp_client_tarball_path = tmp_dir.path().join("oz.tar.gz");

log::info!("Downloading tarball locally from {url}");
let output = command::r#async::Command::new("curl")
// -f: fail silently on HTTP errors (non-zero exit instead of HTML error page)
// -S: show errors even when -f is used
// -L: follow redirects (the CDN may 302 to a regional edge)
.arg("-fSL")
.arg("--connect-timeout")
.arg("15")
Expand All @@ -362,33 +503,25 @@ async fn scp_install_fallback(socket_path: &Path) -> anyhow::Result<()> {
));
}

// 2. Upload to the remote via SCP.
log::info!("Uploading tarball to remote at {remote_tarball_path}");
remote_server::ssh::scp_upload(
socket_path,
&temp_client_tarball_path,
&remote_tarball_path,
timeout,
)
.await?;

// 3. Run the install script with the staging path baked in.
// The script's `staging_tarball_path` variable is non-empty, so it
// skips the download and extracts from the uploaded tarball.
log::info!("Running extraction via install script with tarball at {remote_tarball_path}");

let script = remote_server::setup::install_script(Some(&remote_tarball_path));
Ok((platform, tmp_dir))
}

let output = remote_server::ssh::run_ssh_script(socket_path, &script, timeout).await?;
if output.status.success() {
Ok(())
} else {
let code = output.status.code().unwrap_or(-1);
let stderr = String::from_utf8_lossy(&output.stderr);
Err(anyhow::anyhow!(
"Extraction script failed (exit {code}): {stderr}"
))
/// Walks `dir` for the first file whose name starts with `oz` and is not
/// a `.tar.gz`, matching the install script's `find` invocation.
fn find_oz_binary_in_dir(dir: &Path) -> anyhow::Result<PathBuf> {
for entry in walkdir::WalkDir::new(dir)
.into_iter()
.filter_map(|e| e.ok())
{
if !entry.file_type().is_file() {
continue;
}
let name = entry.file_name().to_string_lossy();
if name.starts_with("oz") && !name.ends_with(".tar.gz") {
return Ok(entry.into_path());
}
}
Err(anyhow::anyhow!("no binary found in extracted tarball"))
}

#[cfg(test)]
Expand Down
47 changes: 41 additions & 6 deletions crates/remote_server/src/install_remote_server.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
# {version_query} — e.g. &version=v0.2026... (empty when no release tag)
# {version_suffix} — e.g. -v0.2026... (empty when no release tag)
# {no_http_client_exit_code} — exit code when neither curl nor wget is available
# {download_failed_exit_code} — exit code when both curl and wget fail to download
# {no_tar_exit_code} — exit code when tar is not available
# {staging_tarball_path} — path to a pre-uploaded tarball (SCP fallback; empty normally)
set -e

Expand Down Expand Up @@ -64,17 +66,50 @@ if [ -n "$staging_tarball_path" ]; then
esac
mv "$staging_tarball_path" "$tmpdir/oz.tar.gz"
else
# Normal path: download via curl or wget.
# Normal path: download via curl or wget, with retry using the
# alternate client if the primary fails.
url="{download_base_url}?package=tar&os=$os_name&arch=$arch_name&channel={channel}{version_query}"

if command -v curl >/dev/null 2>&1; then
curl -fSL "$url" -o "$tmpdir/oz.tar.gz"
elif command -v wget >/dev/null 2>&1; then
wget -q -O "$tmpdir/oz.tar.gz" "$url"
else
has_curl=false
has_wget=false
command -v curl >/dev/null 2>&1 && has_curl=true
command -v wget >/dev/null 2>&1 && has_wget=true

if [ "$has_curl" = false ] && [ "$has_wget" = false ]; then
echo "error: neither curl nor wget is available" >&2
exit {no_http_client_exit_code}
fi

download_ok=false
download_err=""

# Try primary client, then retry with alternate on failure.
if [ "$has_curl" = true ]; then
if curl -fSL --connect-timeout 15 --retry 1 "$url" -o "$tmpdir/oz.tar.gz" 2>/dev/null; then
download_ok=true
else
download_err="curl failed (exit $?)"
fi
fi

if [ "$download_ok" = false ] && [ "$has_wget" = true ]; then
if wget -q --timeout=15 -O "$tmpdir/oz.tar.gz" "$url" 2>/dev/null; then
download_ok=true
else
download_err="${download_err:+$download_err; }wget failed (exit $?)"
fi
fi

if [ "$download_ok" = false ]; then
echo "error: remote download failed: $download_err" >&2
exit {download_failed_exit_code}
fi
fi

# Verify tar is available before attempting extraction.
if ! command -v tar >/dev/null 2>&1; then
echo "error: tar is not available" >&2
exit {no_tar_exit_code}
fi

tar -xzf "$tmpdir/oz.tar.gz" -C "$tmpdir"
Expand Down
33 changes: 33 additions & 0 deletions crates/remote_server/src/setup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,11 @@ pub fn install_script(staging_tarball_path: Option<&str>) -> String {
"{no_http_client_exit_code}",
&NO_HTTP_CLIENT_EXIT_CODE.to_string(),
)
.replace(
"{download_failed_exit_code}",
&DOWNLOAD_FAILED_EXIT_CODE.to_string(),
)
.replace("{no_tar_exit_code}", &NO_TAR_EXIT_CODE.to_string())
.replace("{staging_tarball_path}", staging_tarball_path.unwrap_or(""))
}

Expand Down Expand Up @@ -487,6 +492,17 @@ pub fn download_tarball_url(platform: &RemotePlatform) -> String {
/// trigger the SCP upload fallback.
pub const NO_HTTP_CLIENT_EXIT_CODE: i32 = 3;

/// Exit code the install script uses when both curl and wget are present
/// but both failed to download the tarball (DNS failure, TLS error,
/// HTTP 403/502, timeout, partial download, etc.). The Rust side matches
/// on this to trigger the SCP upload fallback.
pub const DOWNLOAD_FAILED_EXIT_CODE: i32 = 4;

/// Exit code the install script uses when `tar` is not available on the
/// remote host. The Rust side matches on this to trigger the direct
/// binary upload fallback (extract locally, upload only the binary).
pub const NO_TAR_EXIT_CODE: i32 = 5;

/// Timeout for the binary existence check.
pub const CHECK_TIMEOUT: Duration = Duration::from_secs(10);

Expand All @@ -499,6 +515,23 @@ pub const INSTALL_TIMEOUT: Duration = Duration::from_secs(60);
/// the remote host's direct internet connection.
pub const SCP_INSTALL_TIMEOUT: Duration = Duration::from_secs(120);

/// Returns `true` if the install script stderr indicates a host-level
/// condition that should **not** be retried via SCP fallback. These are
/// "true" failures (permission denied, no disk space, read-only
/// filesystem, quota exceeded) that no amount of alternate download
/// strategy can fix.
pub fn is_non_retryable_host_error(stderr: &str) -> bool {
let lower = stderr.to_ascii_lowercase();
// Each pattern corresponds to a POSIX errno string commonly emitted
// by mkdir, chmod, mv, tar, or the shell on real remote hosts.
lower.contains("permission denied")
|| lower.contains("read-only file system")
|| lower.contains("no space left on device")
|| lower.contains("disk quota exceeded")
|| lower.contains("cannot create directory")
|| lower.contains("operation not permitted")
}

#[cfg(test)]
#[path = "setup_tests.rs"]
mod tests;
Loading