From 796d31e4e9c3f2cd2b688e59302a05c123766231 Mon Sep 17 00:00:00 2001 From: Charles Pierce Date: Mon, 29 Jul 2024 21:37:59 -0700 Subject: [PATCH] Remove extra GET for tarball uncompressed size Currently, we make two overlapping GET requests when fetching a tarball: First, the main request to get the headers and the data stream, and then a second request to attempt to fetch the ISIZE field which holds the uncompressed size. Getting the uncompressed size lets us use the stream data after uncompressing to populate our progress bar for tarballs. However, the extra request causes issues for some users, where the second request for the ISIZE field hangs during the TLS handshake. By removing the uncompressed size entirely and connecting the progress bar to the compressed read data, we can avoid the extra request entirely. --- crates/archive/src/lib.rs | 1 - crates/archive/src/tarball.rs | 99 ++---------------------- crates/archive/src/zip.rs | 3 - crates/volta-core/src/tool/node/fetch.rs | 4 +- crates/volta-core/src/tool/npm/fetch.rs | 4 +- crates/volta-core/src/tool/pnpm/fetch.rs | 4 +- crates/volta-core/src/tool/yarn/fetch.rs | 4 +- 7 files changed, 9 insertions(+), 110 deletions(-) diff --git a/crates/archive/src/lib.rs b/crates/archive/src/lib.rs index 9fd2ccd71..5a9818898 100644 --- a/crates/archive/src/lib.rs +++ b/crates/archive/src/lib.rs @@ -42,7 +42,6 @@ pub enum Origin { pub trait Archive { fn compressed_size(&self) -> u64; - fn uncompressed_size(&self) -> Option; /// Unpacks the zip archive to the specified destination folder. fn unpack( diff --git a/crates/archive/src/tarball.rs b/crates/archive/src/tarball.rs index 9d6bd7882..c573db247 100644 --- a/crates/archive/src/tarball.rs +++ b/crates/archive/src/tarball.rs @@ -2,25 +2,20 @@ //! tarball in Unix operating systems. use std::fs::File; -use std::io::{Read, Seek, SeekFrom}; +use std::io::Read; use std::path::Path; use super::{Archive, ArchiveError, Origin}; use attohttpc::header::HeaderMap; use flate2::read::GzDecoder; use fs_utils::ensure_containing_dir_exists; -use headers::{AcceptRanges, ContentLength, Header, HeaderMapExt, Range}; +use headers::{ContentLength, Header, HeaderMapExt}; use progress_read::ProgressRead; use tee::TeeReader; /// A Node installation tarball. pub struct Tarball { compressed_size: u64, - // Some servers don't return the right data for byte range queries, so - // getting the uncompressed archive size for tarballs is an Option. - // If the uncompressed size is not available, the compressed size will be - // used for the download/unpack progress indicator, so that will be slightly off. - uncompressed_size: Option, data: Box, origin: Origin, } @@ -36,11 +31,9 @@ fn content_length(headers: &HeaderMap) -> Result { impl Tarball { /// Loads a tarball from the specified file. - pub fn load(mut source: File) -> Result, ArchiveError> { - let uncompressed_size = load_uncompressed_size(&mut source); + pub fn load(source: File) -> Result, ArchiveError> { let compressed_size = source.metadata()?.len(); Ok(Box::new(Tarball { - uncompressed_size, compressed_size, data: Box::new(source), origin: Origin::Local, @@ -58,18 +51,12 @@ impl Tarball { } let compressed_size = content_length(&headers)?; - let uncompressed_size = if accepts_byte_ranges(&headers) { - fetch_uncompressed_size(url, compressed_size) - } else { - None - }; ensure_containing_dir_exists(&cache_file)?; let file = File::create(cache_file)?; let data = Box::new(TeeReader::new(response, file)); Ok(Box::new(Tarball { - uncompressed_size, compressed_size, data, origin: Origin::Remote, @@ -81,16 +68,13 @@ impl Archive for Tarball { fn compressed_size(&self) -> u64 { self.compressed_size } - fn uncompressed_size(&self) -> Option { - self.uncompressed_size - } fn unpack( self: Box, dest: &Path, progress: &mut dyn FnMut(&(), usize), ) -> Result<(), ArchiveError> { - let decoded = GzDecoder::new(self.data); - let mut tarball = tar::Archive::new(ProgressRead::new(decoded, (), progress)); + let decoded = GzDecoder::new(ProgressRead::new(self.data, (), progress)); + let mut tarball = tar::Archive::new(decoded); tarball.unpack(dest)?; Ok(()) } @@ -99,78 +83,6 @@ impl Archive for Tarball { } } -// From http://www.gzip.org/zlib/rfc-gzip.html#member-format -// -// 0 1 2 3 4 5 6 7 -// +---+---+---+---+---+---+---+---+ -// | CRC32 | ISIZE | -// +---+---+---+---+---+---+---+---+ -// -// ISIZE (Input SIZE) -// This contains the size of the original (uncompressed) input data modulo 2^32. - -/// Fetches just the `isize` field (the field that indicates the uncompressed size) -/// of a gzip file from a URL. This makes two round-trips to the server but avoids -/// downloading the entire gzip file. For very small files it's unlikely to be -/// more efficient than simply downloading the entire file up front. -fn fetch_isize(url: &str, len: u64) -> Result<[u8; 4], ArchiveError> { - let (status, headers, mut response) = { - let mut request = attohttpc::get(url); - request - .headers_mut() - .typed_insert(Range::bytes(len - 4..len).unwrap()); - request.send()?.split() - }; - - if !status.is_success() { - return Err(ArchiveError::HttpError(status)); - } - - let actual_length = content_length(&headers)?; - - if actual_length != 4 { - return Err(ArchiveError::UnexpectedContentLengthError(actual_length)); - } - - let mut buf = [0; 4]; - response.read_exact(&mut buf)?; - Ok(buf) -} - -/// Loads the `isize` field (the field that indicates the uncompressed size) -/// of a gzip file from disk. -fn load_isize(file: &mut File) -> Result<[u8; 4], ArchiveError> { - file.seek(SeekFrom::End(-4))?; - let mut buf = [0; 4]; - file.read_exact(&mut buf)?; - file.seek(SeekFrom::Start(0))?; - Ok(buf) -} - -fn accepts_byte_ranges(headers: &HeaderMap) -> bool { - headers - .typed_get::() - .is_some_and(|v| v == AcceptRanges::bytes()) -} - -/// Determines the uncompressed size of a gzip file hosted at the specified -/// URL by fetching just the metadata associated with the file. This makes -/// an extra round-trip to the server, so it's only more efficient than just -/// downloading the file if the file is large enough that downloading it is -/// slower than the extra round trips. -fn fetch_uncompressed_size(url: &str, len: u64) -> Option { - // if there is an error, we ignore it and return None, instead of failing - fetch_isize(url, len) - .ok() - .map(|s| u32::from_le_bytes(s) as u64) -} - -/// Determines the uncompressed size of the specified gzip file on disk. -fn load_uncompressed_size(file: &mut File) -> Option { - // if there is an error, we ignore it and return None, instead of failing - load_isize(file).ok().map(|s| u32::from_le_bytes(s) as u64) -} - #[cfg(test)] pub mod tests { @@ -192,7 +104,6 @@ pub mod tests { let test_file = File::open(test_file_path).expect("Couldn't open test file"); let tarball = Tarball::load(test_file).expect("Failed to load tarball"); - assert_eq!(tarball.uncompressed_size(), Some(10240)); assert_eq!(tarball.compressed_size(), 402); } } diff --git a/crates/archive/src/zip.rs b/crates/archive/src/zip.rs index 4a078fd8a..59e21149e 100644 --- a/crates/archive/src/zip.rs +++ b/crates/archive/src/zip.rs @@ -60,9 +60,6 @@ impl Archive for Zip { fn compressed_size(&self) -> u64 { self.compressed_size } - fn uncompressed_size(&self) -> Option { - None - } fn unpack( self: Box, dest: &Path, diff --git a/crates/volta-core/src/tool/node/fetch.rs b/crates/volta-core/src/tool/node/fetch.rs index c61d7f494..c969eb4db 100644 --- a/crates/volta-core/src/tool/node/fetch.rs +++ b/crates/volta-core/src/tool/node/fetch.rs @@ -95,9 +95,7 @@ fn unpack_archive(archive: Box, version: &Version) -> Fallible, version: &Version) -> Fallible<()> let progress = progress_bar( archive.origin(), &tool_version("npm", version), - archive - .uncompressed_size() - .unwrap_or_else(|| archive.compressed_size()), + archive.compressed_size(), ); let version_string = version.to_string(); diff --git a/crates/volta-core/src/tool/pnpm/fetch.rs b/crates/volta-core/src/tool/pnpm/fetch.rs index cb37179b7..43493280b 100644 --- a/crates/volta-core/src/tool/pnpm/fetch.rs +++ b/crates/volta-core/src/tool/pnpm/fetch.rs @@ -64,9 +64,7 @@ fn unpack_archive(archive: Box, version: &Version) -> Fallible<()> let progress = progress_bar( archive.origin(), &tool_version("pnpm", version), - archive - .uncompressed_size() - .unwrap_or_else(|| archive.compressed_size()), + archive.compressed_size(), ); let version_string = version.to_string(); diff --git a/crates/volta-core/src/tool/yarn/fetch.rs b/crates/volta-core/src/tool/yarn/fetch.rs index ff5ead4c1..ee381d542 100644 --- a/crates/volta-core/src/tool/yarn/fetch.rs +++ b/crates/volta-core/src/tool/yarn/fetch.rs @@ -66,9 +66,7 @@ fn unpack_archive(archive: Box, version: &Version) -> Fallible<()> let progress = progress_bar( archive.origin(), &tool_version("yarn", version), - archive - .uncompressed_size() - .unwrap_or_else(|| archive.compressed_size()), + archive.compressed_size(), ); let version_string = version.to_string();