From f732623d3de992b60c016d43608e277d0b9d2da2 Mon Sep 17 00:00:00 2001 From: Mateusz Kwapich Date: Fri, 20 Dec 2024 11:16:50 +0000 Subject: [PATCH] fix for archiving long paths that have path components starting with ".." crossing the 100-character mark The gnu tar supports arbirary path length by putting path truncated to standard 100 chars into the header and the rest is appended to contents. tar-rs validates that no path components should be exactly ".." but in this case when a component starting with ".." (for example file named "..some_file") gets truncated after 2 characters we hit this validation and can't tar such file. --- src/builder.rs | 2 +- src/header.rs | 73 ++++++++++++++++++++++++++++++++++++++++---------- tests/all.rs | 33 +++++++++++++++++++++++ 3 files changed, 93 insertions(+), 15 deletions(-) diff --git a/src/builder.rs b/src/builder.rs index fab8725..7de2147 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -538,7 +538,7 @@ async fn prepare_header_path( // Truncate the path to store in the header we're about to emit to // ensure we've got something at least mentioned. let path = bytes2path(Cow::Borrowed(&data[..max]))?; - header.set_path(&path)?; + header.set_truncated_path_for_gnu_header(&path)?; } Ok(()) } diff --git a/src/header.rs b/src/header.rs index bc8976f..dfc87b3 100644 --- a/src/header.rs +++ b/src/header.rs @@ -359,14 +359,29 @@ impl Header { /// in the appropriate format. May fail if the path is too long or if the /// path specified is not Unicode and this is a Windows platform. pub fn set_path>(&mut self, p: P) -> io::Result<()> { - self._set_path(p.as_ref()) + self.set_path_inner(p.as_ref(), false) } - fn _set_path(&mut self, path: &Path) -> io::Result<()> { + // Sets the truncated path for GNU header + // + // Same as `set_path`` but skips some validations. + pub(crate) fn set_truncated_path_for_gnu_header>( + &mut self, + p: P, + ) -> io::Result<()> { + self.set_path_inner(p.as_ref(), true) + } + + fn set_path_inner(&mut self, path: &Path, is_truncated_gnu_long_path: bool) -> io::Result<()> { if let Some(ustar) = self.as_ustar_mut() { return ustar.set_path(path); } - copy_path_into(&mut self.as_old_mut().name, path, false).map_err(|err| { + if is_truncated_gnu_long_path { + copy_path_into_gnu_long(&mut self.as_old_mut().name, path, false) + } else { + copy_path_into(&mut self.as_old_mut().name, path, false) + } + .map_err(|err| { io::Error::new( err.kind(), format!("{} when setting path for {}", err, self.path_lossy()), @@ -1465,25 +1480,29 @@ fn copy_into(slot: &mut [u8], bytes: &[u8]) -> io::Result<()> { } } -/// Copies `path` into the `slot` provided -/// -/// Returns an error if: -/// -/// * the path is too long to fit -/// * a nul byte was found -/// * an invalid path component is encountered (e.g. a root path or parent dir) -/// * the path itself is empty -fn copy_path_into(mut slot: &mut [u8], path: &Path, is_link_name: bool) -> io::Result<()> { +fn copy_path_into_inner( + mut slot: &mut [u8], + path: &Path, + is_link_name: bool, + is_truncated_gnu_long_path: bool, +) -> io::Result<()> { let mut emitted = false; let mut needs_slash = false; - for component in path.components() { + let mut iter = path.components().peekable(); + while let Some(component) = iter.next() { let bytes = path2bytes(Path::new(component.as_os_str()))?; match (component, is_link_name) { (Component::Prefix(..), false) | (Component::RootDir, false) => { return Err(other("paths in archives must be relative")); } (Component::ParentDir, false) => { - return Err(other("paths in archives must not have `..`")); + if is_truncated_gnu_long_path && iter.peek().is_none() { + // If it's last component of a gnu long path we know that there might be more + // to the component than .. (the rest is stored elsewhere) + {} + } else { + return Err(other("paths in archives must not have `..`")); + } } // Allow "./" as the path (Component::CurDir, false) if path.components().count() == 1 => {} @@ -1520,6 +1539,32 @@ fn copy_path_into(mut slot: &mut [u8], path: &Path, is_link_name: bool) -> io::R } } +/// Copies `path` into the `slot` provided +/// +/// Returns an error if: +/// +/// * the path is too long to fit +/// * a nul byte was found +/// * an invalid path component is encountered (e.g. a root path or parent dir) +/// * the path itself is empty +fn copy_path_into(slot: &mut [u8], path: &Path, is_link_name: bool) -> io::Result<()> { + copy_path_into_inner(slot, path, is_link_name, false) +} + +/// Copies `path` into the `slot` provided +/// +/// Returns an error if: +/// +/// * the path is too long to fit +/// * a nul byte was found +/// * an invalid path component is encountered (e.g. a root path or parent dir) +/// * the path itself is empty +/// +/// This is less restrictive version meant to be used for truncated GNU paths. +fn copy_path_into_gnu_long(slot: &mut [u8], path: &Path, is_link_name: bool) -> io::Result<()> { + copy_path_into_inner(slot, path, is_link_name, true) +} + #[cfg(target_arch = "wasm32")] fn ends_with_slash(p: &Path) -> bool { p.to_string_lossy().ends_with('/') diff --git a/tests/all.rs b/tests/all.rs index d738345..b4e62e2 100644 --- a/tests/all.rs +++ b/tests/all.rs @@ -213,6 +213,39 @@ async fn large_filename() { assert!(entries.next().await.is_none()); } +// This test checks very particular scenario where path component +// starting with ".." of a long path gets split at 100-byte mark +// so that ".." goes into header and gets interpreted as parent dir +// (and rejected) . +#[async_std::test] +async fn large_filename_with_dot_dot_at_100_byte_mark() { + let mut ar = Builder::new(Vec::new()); + + let mut header = Header::new_gnu(); + header.set_cksum(); + header.set_mode(0o644); + header.set_size(4); + + let mut long_name_with_dot_dot = "tdir/".repeat(19); + long_name_with_dot_dot.push_str("tt/..file"); + + t!(ar + .append_data(&mut header, &long_name_with_dot_dot, &b"test"[..]) + .await); + + let rd = Cursor::new(t!(ar.into_inner().await)); + let ar = Archive::new(rd); + let mut entries = t!(ar.entries()); + + let mut f = entries.next().await.unwrap().unwrap(); + assert_eq!(&*f.path_bytes(), long_name_with_dot_dot.as_bytes()); + assert_eq!(f.header().size().unwrap(), 4); + let mut s = String::new(); + t!(f.read_to_string(&mut s).await); + assert_eq!(s, "test"); + assert!(entries.next().await.is_none()); +} + #[async_std::test] async fn reading_entries() { let rdr = Cursor::new(tar!("reading_files.tar"));