Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion crates/archive/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ pub enum Origin {

pub trait Archive {
fn compressed_size(&self) -> u64;
fn uncompressed_size(&self) -> Option<u64>;

/// Unpacks the zip archive to the specified destination folder.
fn unpack(
Expand Down
99 changes: 5 additions & 94 deletions crates/archive/src/tarball.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,20 @@
//! tarball in Unix operating systems.

use std::fs::File;
use std::io::{Read, Seek, SeekFrom};
use std::io::Read;
use std::path::Path;

use super::{Archive, ArchiveError, Origin};
use attohttpc::header::HeaderMap;
use flate2::read::GzDecoder;
use fs_utils::ensure_containing_dir_exists;
use headers::{AcceptRanges, ContentLength, Header, HeaderMapExt, Range};
use headers::{ContentLength, Header, HeaderMapExt};
use progress_read::ProgressRead;
use tee::TeeReader;

/// A Node installation tarball.
pub struct Tarball {
compressed_size: u64,
// Some servers don't return the right data for byte range queries, so
// getting the uncompressed archive size for tarballs is an Option.
// If the uncompressed size is not available, the compressed size will be
// used for the download/unpack progress indicator, so that will be slightly off.
uncompressed_size: Option<u64>,
data: Box<dyn Read>,
origin: Origin,
}
Expand All @@ -36,11 +31,9 @@ fn content_length(headers: &HeaderMap) -> Result<u64, ArchiveError> {

impl Tarball {
/// Loads a tarball from the specified file.
pub fn load(mut source: File) -> Result<Box<dyn Archive>, ArchiveError> {
let uncompressed_size = load_uncompressed_size(&mut source);
pub fn load(source: File) -> Result<Box<dyn Archive>, ArchiveError> {
let compressed_size = source.metadata()?.len();
Ok(Box::new(Tarball {
uncompressed_size,
compressed_size,
data: Box::new(source),
origin: Origin::Local,
Expand All @@ -58,18 +51,12 @@ impl Tarball {
}

let compressed_size = content_length(&headers)?;
let uncompressed_size = if accepts_byte_ranges(&headers) {
fetch_uncompressed_size(url, compressed_size)
} else {
None
};

ensure_containing_dir_exists(&cache_file)?;
let file = File::create(cache_file)?;
let data = Box::new(TeeReader::new(response, file));

Ok(Box::new(Tarball {
uncompressed_size,
compressed_size,
data,
origin: Origin::Remote,
Expand All @@ -81,16 +68,13 @@ impl Archive for Tarball {
fn compressed_size(&self) -> u64 {
self.compressed_size
}
fn uncompressed_size(&self) -> Option<u64> {
self.uncompressed_size
}
fn unpack(
self: Box<Self>,
dest: &Path,
progress: &mut dyn FnMut(&(), usize),
) -> Result<(), ArchiveError> {
let decoded = GzDecoder::new(self.data);
let mut tarball = tar::Archive::new(ProgressRead::new(decoded, (), progress));
let decoded = GzDecoder::new(ProgressRead::new(self.data, (), progress));
let mut tarball = tar::Archive::new(decoded);
Comment on lines +76 to +77
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just for my own clarity: what motivated changing up where we put the ProgressRead? It seems like it should be the same behavior either way, since ultimately in both cases it’s just wrapping the Read stream?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I can definitely explain! For some background:

  1. ProgressRead is a thin wrapper around a reader that calls the provided FnMut every time data is read, passing in the amount of data that was read from the wrapped reader.
  2. GzDecoder is a Read wrapper around a stream of compressed bytes, which produces a stream of uncompressed bytes.

Given that, the motivation behind the change is that previously we were wrapping the decoded reader with ProgressRead, meaning the number of bytes it would report would be from the stream of uncompressed bytes—they would add up to the total uncompressed size. Since we're no longer fetching / relying on the uncompressed size, we instead move the ProgressRead to wrap the stream of compressed bytes, which allows the total amount of data recorded to match the compressed size.

tarball.unpack(dest)?;
Ok(())
}
Expand All @@ -99,78 +83,6 @@ impl Archive for Tarball {
}
}

// From http://www.gzip.org/zlib/rfc-gzip.html#member-format
//
// 0 1 2 3 4 5 6 7
// +---+---+---+---+---+---+---+---+
// | CRC32 | ISIZE |
// +---+---+---+---+---+---+---+---+
//
// ISIZE (Input SIZE)
// This contains the size of the original (uncompressed) input data modulo 2^32.

/// Fetches just the `isize` field (the field that indicates the uncompressed size)
/// of a gzip file from a URL. This makes two round-trips to the server but avoids
/// downloading the entire gzip file. For very small files it's unlikely to be
/// more efficient than simply downloading the entire file up front.
fn fetch_isize(url: &str, len: u64) -> Result<[u8; 4], ArchiveError> {
let (status, headers, mut response) = {
let mut request = attohttpc::get(url);
request
.headers_mut()
.typed_insert(Range::bytes(len - 4..len).unwrap());
request.send()?.split()
};

if !status.is_success() {
return Err(ArchiveError::HttpError(status));
}

let actual_length = content_length(&headers)?;

if actual_length != 4 {
return Err(ArchiveError::UnexpectedContentLengthError(actual_length));
}

let mut buf = [0; 4];
response.read_exact(&mut buf)?;
Ok(buf)
}

/// Loads the `isize` field (the field that indicates the uncompressed size)
/// of a gzip file from disk.
fn load_isize(file: &mut File) -> Result<[u8; 4], ArchiveError> {
file.seek(SeekFrom::End(-4))?;
let mut buf = [0; 4];
file.read_exact(&mut buf)?;
file.seek(SeekFrom::Start(0))?;
Ok(buf)
}

fn accepts_byte_ranges(headers: &HeaderMap) -> bool {
headers
.typed_get::<AcceptRanges>()
.is_some_and(|v| v == AcceptRanges::bytes())
}

/// Determines the uncompressed size of a gzip file hosted at the specified
/// URL by fetching just the metadata associated with the file. This makes
/// an extra round-trip to the server, so it's only more efficient than just
/// downloading the file if the file is large enough that downloading it is
/// slower than the extra round trips.
fn fetch_uncompressed_size(url: &str, len: u64) -> Option<u64> {
// if there is an error, we ignore it and return None, instead of failing
fetch_isize(url, len)
.ok()
.map(|s| u32::from_le_bytes(s) as u64)
}

/// Determines the uncompressed size of the specified gzip file on disk.
fn load_uncompressed_size(file: &mut File) -> Option<u64> {
// if there is an error, we ignore it and return None, instead of failing
load_isize(file).ok().map(|s| u32::from_le_bytes(s) as u64)
}

#[cfg(test)]
pub mod tests {

Expand All @@ -192,7 +104,6 @@ pub mod tests {
let test_file = File::open(test_file_path).expect("Couldn't open test file");
let tarball = Tarball::load(test_file).expect("Failed to load tarball");

assert_eq!(tarball.uncompressed_size(), Some(10240));
assert_eq!(tarball.compressed_size(), 402);
}
}
3 changes: 0 additions & 3 deletions crates/archive/src/zip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,6 @@ impl Archive for Zip {
fn compressed_size(&self) -> u64 {
self.compressed_size
}
fn uncompressed_size(&self) -> Option<u64> {
None
}
fn unpack(
self: Box<Self>,
dest: &Path,
Expand Down
4 changes: 1 addition & 3 deletions crates/volta-core/src/tool/node/fetch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,7 @@ fn unpack_archive(archive: Box<dyn Archive>, version: &Version) -> Fallible<Node
let progress = progress_bar(
archive.origin(),
&tool_version("node", version),
archive
.uncompressed_size()
.unwrap_or_else(|| archive.compressed_size()),
archive.compressed_size(),
);
let version_string = version.to_string();

Expand Down
4 changes: 1 addition & 3 deletions crates/volta-core/src/tool/npm/fetch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,7 @@ fn unpack_archive(archive: Box<dyn Archive>, version: &Version) -> Fallible<()>
let progress = progress_bar(
archive.origin(),
&tool_version("npm", version),
archive
.uncompressed_size()
.unwrap_or_else(|| archive.compressed_size()),
archive.compressed_size(),
);
let version_string = version.to_string();

Expand Down
4 changes: 1 addition & 3 deletions crates/volta-core/src/tool/pnpm/fetch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,7 @@ fn unpack_archive(archive: Box<dyn Archive>, version: &Version) -> Fallible<()>
let progress = progress_bar(
archive.origin(),
&tool_version("pnpm", version),
archive
.uncompressed_size()
.unwrap_or_else(|| archive.compressed_size()),
archive.compressed_size(),
);
let version_string = version.to_string();

Expand Down
4 changes: 1 addition & 3 deletions crates/volta-core/src/tool/yarn/fetch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,7 @@ fn unpack_archive(archive: Box<dyn Archive>, version: &Version) -> Fallible<()>
let progress = progress_bar(
archive.origin(),
&tool_version("yarn", version),
archive
.uncompressed_size()
.unwrap_or_else(|| archive.compressed_size()),
archive.compressed_size(),
);
let version_string = version.to_string();

Expand Down