Skip to content

Commit

Permalink
tar: Output composefs dump format from cfsctl ls
Browse files Browse the repository at this point in the history
This more or less works for piping into mkcomposefs now.  There are a
few TODOs around pax extensions and such...
  • Loading branch information
allisonkarlitskaya committed Oct 9, 2024
1 parent ea5e35d commit d59bd55
Show file tree
Hide file tree
Showing 2 changed files with 171 additions and 9 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ edition = "2021"
[dependencies]
anyhow = { version = "1.0.89", features = ["backtrace"] }
clap = { version = "4.5.19", features = ["derive"] }
composefs = "0.1.2"
hex = "0.4.3"
rand = "0.8.5"
rustix = { version = "0.38.37", features = ["fs", "mount", "process"] }
Expand Down
179 changes: 170 additions & 9 deletions src/tar.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,39 @@
use std::io::{Read, Write};
use std::{
borrow::Cow,
collections::HashMap,
ffi::{
OsStr,
OsString,
},
os::unix::prelude::OsStrExt,
path::{
Path,
PathBuf,
},
io::{
Read,
Write,
},
};

use anyhow::Result;
use anyhow::{
bail,
Result
};
use composefs::dumpfile::{
Entry,
Item,
Mtime,
Xattr,
};
use rustix::fs::{
FileType,
makedev
};
use tar::{
EntryType,
Header,
PaxExtensions,
};

use crate::{
Expand Down Expand Up @@ -64,8 +94,44 @@ pub fn split<R: Read, W: Write, F: FnMut(&[u8]) -> Result<Sha256HashValue>>(
writer.done()
}

fn path_from_tar(long: &[u8], short: &[u8]) -> PathBuf {
// Prepend leading /
let mut path = vec![b'/'];
path.extend(if !long.is_empty() { long } else { short });

// Drop trailing '/' characters in case of directories.
// https://github.com/rust-lang/rust/issues/122741
// path.pop_if(|x| x == &b'/');
if path.last() == Some(&b'/') {
path.pop();
}

PathBuf::from(OsStr::from_bytes(&path))
}

fn symlink_target_from_tar(long: &[u8], short: &[u8]) -> PathBuf {
// If I was smarter, I could do this without a copy....
let mut path = vec![];
path.extend(if !long.is_empty() { long } else { short });
PathBuf::from(OsStr::from_bytes(&path))
}

pub fn ls<R: Read>(split_stream: &mut R) -> Result<()> {
let mut reader = SplitStreamReader::new(split_stream);
let mut gnu_longname: Vec<u8> = vec![];
let mut gnu_longlink: Vec<u8> = vec![];
let mut pax_headers = HashMap::new();

// no root entry in the tar
println!("{}", Entry {
path: Cow::Borrowed(Path::new("/")),
uid: 0,
gid: 0,
mode: FileType::Directory.as_raw_mode() | 0o755,
mtime: Mtime { sec: 0, nsec: 0 },
item: Item::Directory { size: 0, nlink: 1 },
xattrs: vec![]
});

loop {
let mut buf = [0u8; 512];
Expand All @@ -78,13 +144,108 @@ pub fn ls<R: Read>(split_stream: &mut R) -> Result<()> {
}

let header = tar::Header::from_byte_slice(&buf);
let actual_size = header.size()? as usize;
let stored_size = (actual_size + 511) & !511;
println!("{:?}", header.path()?);
match reader.read_exact(actual_size, stored_size)? {
SplitStreamData::Inline(data) => println!("{} data bytes inline", data.len()),
SplitStreamData::External(id) => println!("ext {}", hex::encode(id))
assert!(header.as_ustar().is_some());

let nlink = 1;
let size = header.entry_size()?;

let item = match reader.read_exact(size as usize, (size + 511 & !511) as usize)? {
SplitStreamData::External(id) => match header.entry_type() {
EntryType::Regular | EntryType::Continuous => Item::Regular {
fsverity_digest: Some(hex::encode(id)),
inline_content: None,
nlink, size
},
_ => bail!("Unsupported external-chunked entry {:?} {}", header, hex::encode(id)),
},
SplitStreamData::Inline(content) => match header.entry_type() {
EntryType::GNULongLink => {
gnu_longlink.extend(content);
continue;
},
EntryType::GNULongName => {
gnu_longname.extend(content);
continue;
},
EntryType::XGlobalHeader => {
todo!();
},
EntryType::XHeader => {
for item in PaxExtensions::new(&content) {
if let Ok(extension) = item {
pax_headers.insert(String::from(extension.key()?), Vec::from(extension.value_bytes()));
}
}
continue;
},
EntryType::Directory => Item::Directory { size, nlink },
EntryType::Regular | EntryType::Continuous => Item::Regular {
fsverity_digest: None,
inline_content: Some(content.into()),
nlink, size
},
EntryType::Link => Item::Hardlink {
target: {
let Some(link_name) = header.link_name_bytes() else { bail!("link without a name?") };
Cow::Owned(path_from_tar(&gnu_longlink, &link_name))
}
},
EntryType::Symlink => Item::Symlink {
target: {
let Some(link_name) = header.link_name_bytes() else { bail!("symlink without a name?") };
Cow::Owned(symlink_target_from_tar(&gnu_longlink, &link_name))
},
nlink
},
EntryType::Block | EntryType::Char => Item::Device {
rdev: match (header.device_major()?, header.device_minor()?) {
(Some(major), Some(minor)) => makedev(major, minor),
_ => bail!("Device entry without device numbers?"),
},
nlink
},
EntryType::Fifo => Item::Fifo { nlink },
_ => {
todo!("Unsupported entry {:?} {:?}", header, content);
}
}
};

let mut xattrs = vec![];
for (key, value) in &pax_headers {
if key == "path" {
// TODO: why?!
} else if key == "linkpath" {
// TODO: why?!
} else if let Some(xattr) = key.strip_prefix("SCHILY.xattr.") {
xattrs.push(Xattr { key: Cow::Owned(OsString::from(xattr)), value: Cow::Borrowed(&value) });
} else {
todo!("pax header {key:?}");
}
}
println!();

let entry = Entry {
path: Cow::Owned(path_from_tar(&gnu_longname, &header.path_bytes())),
uid: header.uid()? as u32,
gid: header.gid()? as u32,
mode: header.mode()? | match header.entry_type() {
EntryType::Directory => FileType::Directory,
EntryType::Regular => FileType::RegularFile,
EntryType::Symlink => FileType::Symlink,
EntryType::Char => FileType::CharacterDevice,
EntryType::Block => FileType::BlockDevice,
EntryType::Fifo => FileType::Fifo,
_ => { continue; }
}.as_raw_mode(),
mtime: Mtime { sec: header.mtime()?, nsec: 0 },
item,
xattrs
};

println!("{}", entry);

gnu_longlink.clear();
gnu_longname.clear();
pax_headers.clear();
}
}

0 comments on commit d59bd55

Please sign in to comment.