Skip to content

Commit

Permalink
Avoid re-creating directories in async unzip (#1155)
Browse files Browse the repository at this point in the history
This PR extends the optimizations from #1154 to other unzip paths.
  • Loading branch information
charliermarsh authored Jan 28, 2024
1 parent 3d10f34 commit a25a1f2
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 9 deletions.
7 changes: 3 additions & 4 deletions crates/install-wheel-rs/src/wheel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,10 +189,9 @@ fn unpack_wheel_files<R: Read + Seek>(
continue;
}

if let Some(p) = out_path.parent() {
if !created_dirs.contains(p) {
fs::create_dir_all(p)?;
created_dirs.insert(p.to_path_buf());
if let Some(parent) = out_path.parent() {
if created_dirs.insert(parent.to_path_buf()) {
fs::create_dir_all(parent)?;
}
}
let mut outfile = BufWriter::new(File::create(&out_path)?);
Expand Down
1 change: 1 addition & 0 deletions crates/puffin-distribution/src/source/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
}

// Download the source distribution to a temporary file.
// TODO(charlie): Unzip as we download, as with wheels.
let span =
info_span!("download_source_dist", filename = filename, source_dist = %source_dist);
let download_dir = self.download_source_dist_url(response, filename).await?;
Expand Down
20 changes: 15 additions & 5 deletions crates/puffin-extract/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ pub async fn unzip_no_seek<R: tokio::io::AsyncRead + Unpin>(
let mut reader = reader.compat();
let mut zip = async_zip::base::read::stream::ZipFileReader::new(&mut reader);

let mut directories = FxHashSet::default();

while let Some(mut entry) = zip.next_with_entry().await? {
// Construct the (expected) path to the file on-disk.
let path = entry.reader().entry().filename().as_str()?;
Expand All @@ -48,11 +50,16 @@ pub async fn unzip_no_seek<R: tokio::io::AsyncRead + Unpin>(

// Either create the directory or write the file to disk.
if is_dir {
fs_err::tokio::create_dir_all(path).await?;
if directories.insert(path.clone()) {
fs_err::tokio::create_dir_all(path).await?;
}
} else {
if let Some(parent) = path.parent() {
fs_err::tokio::create_dir_all(parent).await?;
if directories.insert(parent.to_path_buf()) {
fs_err::tokio::create_dir_all(parent).await?;
}
}

let file = fs_err::tokio::File::create(path).await?;
let mut writer =
if let Ok(size) = usize::try_from(entry.reader().entry().uncompressed_size()) {
Expand All @@ -78,8 +85,8 @@ pub async fn unzip_no_seek<R: tokio::io::AsyncRead + Unpin>(
use std::os::unix::fs::PermissionsExt;

// To avoid lots of small reads to `reader` when parsing the central directory, wrap it in
// a buffer. The buffer size is semi-arbitrary, but the central directory is usually small.
let mut buf = futures::io::BufReader::with_capacity(1024 * 1024, reader);
// a buffer.
let mut buf = futures::io::BufReader::new(reader);
let mut directory = async_zip::base::read::cd::CentralDirectoryReader::new(&mut buf);
while let Some(entry) = directory.next().await? {
if entry.dir()? {
Expand Down Expand Up @@ -121,7 +128,10 @@ pub fn unzip_archive<R: Send + std::io::Read + std::io::Seek + HasLength>(
// Create necessary parent directories.
let path = target.join(enclosed_name);
if file.is_dir() {
fs_err::create_dir_all(&path)?;
let mut directories = directories.lock().unwrap();
if directories.insert(path.clone()) {
fs_err::create_dir_all(path)?;
}
return Ok(());
}

Expand Down

0 comments on commit a25a1f2

Please sign in to comment.