From 9729b45a0e36798a3203f9514f381917f3c87c43 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 23 May 2024 21:05:46 -0400 Subject: [PATCH] Add a composefs Rust crate Right now this includes just a parser/generator for dump files, which is helpful for e.g. a flow like taking a tarball, parsing it in Rust, and then serializing the entry to a composefs dumpfile line. I'd like to add more opinionated and advanced functionality related to composefs implemented in this crate - *especially* integration with containers/OCI. My thought is that in the short term we can publish this crate without having the core C code depend on it. I plan to use this crate in bootc. However, more medium term, it may make sense to offer a CLI tool from this crate, in which case there will be some tension with the CLI tools that happen to be implemented in C, as commonly people would Signed-off-by: Colin Walters --- .github/workflows/rust.yml | 43 ++++ .gitignore | 2 + Cargo.toml | 27 +++ deny.toml | 10 + rust/src/dumpfile.rs | 446 +++++++++++++++++++++++++++++++++++++ rust/src/lib.rs | 62 ++++++ rust/src/mkcomposefs.rs | 113 ++++++++++ 7 files changed, 703 insertions(+) create mode 100644 .github/workflows/rust.yml create mode 100644 Cargo.toml create mode 100644 deny.toml create mode 100644 rust/src/dumpfile.rs create mode 100644 rust/src/lib.rs create mode 100644 rust/src/mkcomposefs.rs diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml new file mode 100644 index 00000000..e14beeea --- /dev/null +++ b/.github/workflows/rust.yml @@ -0,0 +1,43 @@ +# Inspired by https://github.com/rust-analyzer/rust-analyzer/blob/master/.github/workflows/ci.yaml +# but tweaked in several ways. If you make changes here, consider doing so across other +# repositories in e.g. ostreedev etc. +name: Rust + +permissions: + actions: read + +on: + push: + branches: [main] + pull_request: + branches: [main] + workflow_dispatch: {} + +env: + CARGO_TERM_COLOR: always + +jobs: + tests: + runs-on: ubuntu-latest + container: quay.io/coreos-assembler/fcos-buildroot:testing-devel + steps: + - uses: actions/checkout@v3 + # xref containers/containers-image-proxy-rs + - name: Cache Dependencies + uses: Swatinem/rust-cache@v2 + with: + key: "tests" + - name: cargo fmt (check) + run: cargo fmt -- --check -l + - name: Build + run: cargo test --no-run + - name: Run tests + run: cargo test -- --nocapture --quiet + cargo-deny: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: EmbarkStudios/cargo-deny-action@v1 + with: + log-level: warn + command: check bans sources licenses diff --git a/.gitignore b/.gitignore index 9252feda..b7baf47d 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,5 @@ composefs.spec libtool ltmain.sh missing +target/ +Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 00000000..e454f610 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "composefs" +version = "0.1.0" +edition = "2021" +description = "Rust composefs" +keywords = ["composefs", "oci", "opencontainers", "docker", "podman"] +license = "MIT" +repository = "https://github.com/containers/composefs" +rust-version = "1.70.0" + +include = [ + "/COPYING", + "/README.md", + "/rust/**", +] + +[lib] +name = "composefs" +path = "rust/src/lib.rs" + +[dependencies] +anyhow = "1.0" +libc = "0.2" + +[dev-dependencies] +tar = "0.4.38" +tempfile = "3.2.0" \ No newline at end of file diff --git a/deny.toml b/deny.toml new file mode 100644 index 00000000..24802969 --- /dev/null +++ b/deny.toml @@ -0,0 +1,10 @@ +[licenses] +unlicensed = "deny" +allow = ["Apache-2.0", "Apache-2.0 WITH LLVM-exception", "MIT", "BSD-3-Clause", "BSD-2-Clause", "Unicode-DFS-2016"] + +[bans] + +[sources] +unknown-registry = "deny" +unknown-git = "deny" +allow-git = [] diff --git a/rust/src/dumpfile.rs b/rust/src/dumpfile.rs new file mode 100644 index 00000000..6e02a12e --- /dev/null +++ b/rust/src/dumpfile.rs @@ -0,0 +1,446 @@ +//! # Parsing and generating composefs dump file entry +//! +//! The composefs project defines a "dump file" which is a textual +//! serializion of the metadata file. This module supports parsing +//! and generating dump file entries. +use std::borrow::Cow; +use std::ffi::OsStr; +use std::ffi::OsString; +use std::fmt::Display; +use std::fmt::Write as WriteFmt; +use std::io::Write; +use std::os::unix::ffi::{OsStrExt, OsStringExt}; +use std::path::{Path, PathBuf}; +use std::str::FromStr; + +use anyhow::Context; +use anyhow::{anyhow, Result}; + +#[derive(Debug, PartialEq, Eq)] +/// An extended attribute entry +pub struct Xattr<'k> { + /// key + pub key: Cow<'k, OsStr>, + /// value + pub value: Cow<'k, [u8]>, +} +/// A full set of extended attributes +pub type Xattrs<'k> = Vec>; + +/// Modification time +#[derive(Debug, PartialEq, Eq)] +pub struct Mtime { + /// Seconds + pub sec: u64, + /// Nanoseconds + pub nsec: u64, +} + +/// A composefs dumpfile entry +#[derive(Debug, PartialEq, Eq)] +pub struct Entry<'p> { + /// The filename + pub path: Cow<'p, Path>, + /// uid + pub uid: u32, + /// gid + pub gid: u32, + /// mode (includes file type) + pub mode: u32, + /// Modification time + pub mtime: Mtime, + /// The specific file/directory data + pub item: Item<'p>, + /// Extended attributes + pub xattrs: Xattrs<'p>, +} + +#[derive(Debug, PartialEq, Eq)] +/// A serializable composefs entry. +/// +/// The `Display` implementation for this type is defined to serialize +/// into a format consumable by `mkcomposefs --from-file`. +pub enum Item<'p> { + /// A regular file + Regular { + /// Size of the file + size: u64, + /// Number of links + nlink: u32, + /// Inline content + inline_content: Option>, + /// The fsverity digest + fsverity_digest: Option, + }, + /// A character or block device node + Device { + /// Number of links + nlink: u32, + /// The device number + rdev: u32, + }, + /// A symbolic link + Symlink { + /// Number of links + nlink: u32, + /// Symlink target + target: Cow<'p, Path>, + }, + /// A hardlink entry + Hardlink { + /// The hardlink target + target: Cow<'p, Path>, + }, + /// FIFO + Fifo { + /// Number of links + nlink: u32, + }, + /// A directory + Directory { + /// Size of a directory is not necessarily meaningful + size: u64, + /// Number of links + nlink: u32, + }, +} + +/// Unescape a byte array according to the composefs dump file escaping format. +fn unescape(s: &str) -> Result> { + // If there are no escapes, just return the input unchanged + if !s.contains('\\') { + return Ok(Cow::Borrowed(s.as_bytes())); + } + let mut it = s.chars(); + let mut r = Vec::new(); + while let Some(c) = it.next() { + if c != '\\' { + write!(r, "{c}").unwrap(); + continue; + } + let c = it.next().ok_or_else(|| anyhow!("Unterminated escape"))?; + let c = match c { + '\\' => b'\\', + 'n' => b'\n', + 'r' => b'\r', + 't' => b'\t', + 'x' => { + let mut s = String::new(); + s.push( + it.next() + .ok_or_else(|| anyhow!("Unterminated hex escape"))?, + ); + s.push( + it.next() + .ok_or_else(|| anyhow!("Unterminated hex escape"))?, + ); + + u8::from_str_radix(&s, 16).with_context(|| anyhow!("Invalid hex escape {s}"))? + } + o => anyhow::bail!("Invalid escape {o}"), + }; + r.push(c); + } + Ok(r.into()) +} + +/// Unescape a string into a Rust `OsStr` which is really just an alias for a byte array. +fn unescape_to_osstr(s: &str) -> Result> { + let r = match unescape(s)? { + Cow::Borrowed(v) => Cow::Borrowed(OsStr::from_bytes(v)), + Cow::Owned(v) => Cow::Owned(OsString::from_vec(v)), + }; + Ok(r) +} + +/// Unescape a string into a Rust `Path` which is really just an alias for a byte array, +/// although there is an implicit assumption that there are no embedded `NUL` bytes. +fn unescape_to_path(s: &str) -> Result> { + let r = match unescape_to_osstr(s)? { + Cow::Borrowed(v) => Cow::Borrowed(Path::new(v)), + Cow::Owned(v) => Cow::Owned(PathBuf::from(v)), + }; + Ok(r) +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum EscapeMode { + Standard, + XattrKey, +} + +/// Escape a byte array according to the composefs dump file text format. +fn escape(out: &mut W, s: &[u8], mode: EscapeMode) -> std::fmt::Result { + // Special case a single `-` as that means "no value". + if s == b"-" { + return out.write_str(r"\x2d"); + } + for c in s.iter().copied() { + // Escape `=` as hex in xattr keys. + let is_special = c == b'\\' || (matches!((mode, c), (EscapeMode::XattrKey, b'='))); + let is_printable = c.is_ascii_alphanumeric() || c.is_ascii_punctuation(); + if is_printable && !is_special { + out.write_char(c as char)?; + } else { + match c { + b'\\' => out.write_str(r"\\")?, + b'\n' => out.write_str(r"\n")?, + b'\t' => out.write_str(r"\t")?, + b'\r' => out.write_str(r"\r")?, + o => write!(out, "\\x{:02x}", o)?, + } + } + } + std::fmt::Result::Ok(()) +} + +/// If the provided string is empty, map it to `-`. +fn optional_str(s: &str) -> Option<&str> { + match s { + "-" => None, + o => Some(o), + } +} + +impl FromStr for Mtime { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + let (sec, nsec) = s + .split_once('.') + .ok_or_else(|| anyhow!("Missing . in mtime"))?; + Ok(Self { + sec: u64::from_str(sec)?, + nsec: u64::from_str(nsec)?, + }) + } +} + +impl<'k> Xattr<'k> { + fn parse(s: &'k str) -> Result { + let (key, value) = s + .split_once('=') + .ok_or_else(|| anyhow!("Missing = in xattrs"))?; + let key = unescape_to_osstr(key)?; + let value = unescape(value)?; + Ok(Self { key, value }) + } +} + +impl<'p> Entry<'p> { + /// Parse an entry from a composefs dump file line. + pub fn parse(s: &'p str) -> Result> { + let mut components = s.split(' '); + let mut next = |name: &str| components.next().ok_or_else(|| anyhow!("Missing {name}")); + let path = unescape_to_path(next("path")?)?; + let size = u64::from_str(next("size")?)?; + let modeval = next("mode")?; + let (is_hardlink, mode) = if let Some((_, rest)) = modeval.split_once('@') { + (true, u32::from_str_radix(rest, 8)?) + } else { + (false, u32::from_str_radix(modeval, 8)?) + }; + let nlink = u32::from_str(next("nlink")?)?; + let uid = u32::from_str(next("uid")?)?; + let gid = u32::from_str(next("gid")?)?; + let rdev = u32::from_str(next("rdev")?)?; + let mtime = Mtime::from_str(next("mtime")?)?; + let payload = optional_str(next("payload")?); + let content = optional_str(next("content")?); + let fsverity_digest = optional_str(next("digest")?); + let xattrs = components.map(Xattr::parse).collect::>>()?; + + let item = if is_hardlink { + let target = unescape_to_path(payload.ok_or_else(|| anyhow!("Missing payload"))?)?; + Item::Hardlink { target } + } else { + match libc::S_IFMT & mode { + libc::S_IFREG => Item::Regular { + size, + nlink, + inline_content: content.map(unescape).transpose()?, + fsverity_digest: fsverity_digest.map(ToOwned::to_owned), + }, + libc::S_IFLNK => { + let target = + unescape_to_path(payload.ok_or_else(|| anyhow!("Missing payload"))?)?; + Item::Symlink { nlink, target } + } + libc::S_IFIFO => Item::Fifo { nlink }, + libc::S_IFCHR | libc::S_IFBLK => Item::Device { nlink, rdev }, + libc::S_IFDIR => Item::Directory { size, nlink }, + o => { + anyhow::bail!("Unhandled mode {o:o}") + } + } + }; + Ok(Entry { + path, + uid, + gid, + mode, + mtime, + item, + xattrs, + }) + } + + /// Remove internal entries + /// FIXME: This is arguably a composefs-info dump bug? + pub fn filter_special(mut self) -> Self { + self.xattrs.retain(|v| match (v.key.as_bytes(), &*v.value) { + (b"trusted.overlay.opaque" | b"user.overlay.opaque", b"x") => false, + _ => true, + }); + self + } +} + +impl<'p> Item<'p> { + pub(crate) fn size(&self) -> u64 { + match self { + Item::Regular { size, .. } | Item::Directory { size, .. } => *size, + _ => 0, + } + } + + pub(crate) fn nlink(&self) -> u32 { + match self { + Item::Regular { nlink, .. } => *nlink, + Item::Device { nlink, .. } => *nlink, + Item::Symlink { nlink, .. } => *nlink, + Item::Directory { nlink, .. } => *nlink, + Item::Fifo { nlink, .. } => *nlink, + _ => 0, + } + } + + pub(crate) fn rdev(&self) -> u32 { + match self { + Item::Device { rdev, .. } => *rdev, + _ => 0, + } + } + + pub(crate) fn payload(&self) -> Option<&Path> { + match self { + Item::Symlink { target, .. } => Some(target), + Item::Hardlink { target } => Some(target), + _ => None, + } + } +} + +impl Display for Mtime { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}.{}", self.sec, self.nsec) + } +} + +impl<'p> Display for Entry<'p> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + escape(f, self.path.as_os_str().as_bytes(), EscapeMode::Standard)?; + write!( + f, + " {} {:o} {} {} {} {} {} ", + self.item.size(), + self.mode, + self.item.nlink(), + self.uid, + self.gid, + self.item.rdev(), + self.mtime, + )?; + if let Some(payload) = self.item.payload() { + escape(f, payload.as_os_str().as_bytes(), EscapeMode::Standard)?; + f.write_char(' ')?; + } else { + write!(f, "- ")?; + } + match &self.item { + Item::Regular { + fsverity_digest, + inline_content, + .. + } => { + if let Some(content) = inline_content { + escape(f, content, EscapeMode::Standard)?; + f.write_char(' ')?; + } else { + write!(f, "- ")?; + } + let fsverity_digest = fsverity_digest.as_deref().unwrap_or("-"); + write!(f, "{fsverity_digest}")?; + } + _ => { + write!(f, "- -")?; + } + } + for xattr in self.xattrs.iter() { + f.write_char(' ')?; + escape(f, xattr.key.as_bytes(), EscapeMode::XattrKey)?; + f.write_char('=')?; + escape(f, &xattr.value, EscapeMode::Standard)?; + } + std::fmt::Result::Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + const SPECIALS: &[&str] = &["", "foo=bar=baz", r"\x01\x02", "-"]; + const UNQUOTED: &[&str] = &["foo!bar", "hello-world", "--"]; + + #[test] + fn test_escape_roundtrip() { + let cases = SPECIALS.iter().chain(UNQUOTED); + for case in cases { + let mut buf = String::new(); + escape(&mut buf, case.as_bytes(), EscapeMode::Standard).unwrap(); + let case2 = unescape(&buf).unwrap(); + assert_eq!(case, &String::from_utf8(case2.into()).unwrap()); + } + } + + #[test] + fn test_escape_unquoted() { + let cases = UNQUOTED; + for case in cases { + let mut buf = String::new(); + escape(&mut buf, case.as_bytes(), EscapeMode::Standard).unwrap(); + assert_eq!(case, &buf); + } + } + + #[test] + fn test_escape_quoted() { + // We don't escape `=` in standard mode + { + let mut buf = String::new(); + escape(&mut buf, b"=", EscapeMode::Standard).unwrap(); + assert_eq!(buf, "="); + } + // Verify other special cases + let cases = &[("=", r"\x3d"), ("-", r"\x2d")]; + for (src, expected) in cases { + let mut buf = String::new(); + escape(&mut buf, src.as_bytes(), EscapeMode::XattrKey).unwrap(); + assert_eq!(expected, &buf); + } + } + + #[test] + fn test_parse() { + const CONTENT: &str = include_str!("../../tests/assets/special.dump"); + for line in CONTENT.lines() { + // Test a full round trip by parsing, serialize, parsing again + let e = Entry::parse(line).unwrap(); + let serialized = e.to_string(); + assert_eq!(line, serialized); + let e2 = Entry::parse(&serialized).unwrap(); + assert_eq!(e, e2); + } + } +} diff --git a/rust/src/lib.rs b/rust/src/lib.rs new file mode 100644 index 00000000..2fdf1efb --- /dev/null +++ b/rust/src/lib.rs @@ -0,0 +1,62 @@ +//! # Rust composefs library +//! +//! This crate builds on top of the core composefs tooling, adding a Rust +//! API especially oriented around OCI container images. + +// See https://doc.rust-lang.org/rustc/lints/listing/allowed-by-default.html +#![deny(missing_docs)] +#![deny(missing_debug_implementations)] +#![forbid(unused_must_use)] +#![deny(unsafe_code)] +#![cfg_attr(feature = "dox", feature(doc_cfg))] +#![deny(clippy::dbg_macro)] +#![deny(clippy::todo)] + +use std::{ + fs::File, + io::{BufRead, BufReader}, +}; + +use anyhow::{Context, Result}; +use dumpfile::Entry; + +pub mod dumpfile; +pub mod mkcomposefs; + +/// Parse a composefs superblock. +pub fn dump(f: File, mut callback: F) -> Result<()> +where + F: FnMut(&'_ Entry) -> Result<()>, +{ + let mut cmd = std::process::Command::new("composefs-info"); + cmd.args(["dump", "/proc/self/fd/0"]) + .stdin(std::process::Stdio::from(f)) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()); + let mut proc = cmd.spawn().context("spawning composefs-info dump")?; + // SAFETY: We provided a pipe + let child_stdout = BufReader::new(proc.stdout.take().unwrap()); + std::thread::scope(|s| { + let reader = s.spawn(move || -> anyhow::Result<()> { + let r = proc.wait_with_output()?; + if !r.status.success() { + let stderr = String::from_utf8_lossy(&r.stderr); + let stderr = stderr.trim(); + anyhow::bail!("composefs-info dump failed: {}: {stderr}", r.status) + } + Ok(()) + }); + for line in child_stdout.lines() { + let line = line?; + // FIXME: try removing filter_special + let entry = Entry::parse(&line)?.filter_special(); + callback(&entry)?; + } + // SAFETY: We shouldn't fail to join the thread + reader + .join() + .unwrap() + .context("Processing composefs-info dump")?; + anyhow::Ok(()) + }) +} diff --git a/rust/src/mkcomposefs.rs b/rust/src/mkcomposefs.rs new file mode 100644 index 00000000..9a42abdb --- /dev/null +++ b/rust/src/mkcomposefs.rs @@ -0,0 +1,113 @@ +//! # Creating composefs images +//! +//! This code wraps `mkcomposefs`, supporting synthesizing a composefs +//! from dump file entries. + +use std::fs::File; +use std::io::Write; +use std::process::Command; +use std::sync::mpsc; + +use anyhow::{Context, Result}; + +/// Configuration for `mkcomposefs` +#[derive(Debug, Default)] +pub struct Config { + digest_store: Option, + min_version: Option, + max_version: Option, +} + +impl Config { + fn to_args(&self) -> impl Iterator { + self.digest_store + .as_deref() + .map(|v| format!("--digest-store={v}")) + .into_iter() + .chain(self.min_version.map(|v| format!("--min-version={v}"))) + .chain(self.max_version.map(|v| format!("--max-version={v}"))) + } +} + +/// Prepare a child process invocation of `mkcomposefs`. It will accept +/// serialized dumpfile lines on stdin, and write output to stdout. +fn new_mkcomposefs_command(config: Config, output: File) -> Result { + let mut proc = Command::new("mkcomposefs"); + proc.args(config.to_args()) + .args(["--from-file", "-", "-"]) + .stdin(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .stdout(std::process::Stdio::from(output)); + Ok(proc) +} + +/// Given the provided configuration and dumpfile entries, write a composefs metadata file to `output`. +pub fn mkcomposefs( + config: Config, + entries: mpsc::Receiver>, + output: File, +) -> Result<()> { + let mut cmd = new_mkcomposefs_command(config, output)?; + let mut proc = cmd.spawn().context("Spawning mkcomposefs")?; + // SAFETY: we set up stdin + let mut child_stdin = std::io::BufWriter::new(proc.stdin.take().unwrap()); + std::thread::scope(|s| { + // Spawn a helper thread which handles writing to the child stdin, while the main + // thread handles reading from stderr (if any) and otherwise just being blocked in wait(). + // The composefs subprocess itself writes to the output file. + let writer = s.spawn(move || -> anyhow::Result<()> { + for entry in entries { + writeln!(child_stdin, "{entry}")?; + } + // Flush and close child's stdin + drop(child_stdin.into_inner()?); + Ok(()) + }); + let r = proc.wait_with_output()?; + if !r.status.success() { + let stderr = String::from_utf8_lossy(&r.stderr); + let stderr = stderr.trim(); + anyhow::bail!("mkcomposefs failed: {}: {stderr}", r.status) + } + // SAFETY: We shouldn't fail to join the thread + writer.join().unwrap()?; + anyhow::Ok(()) + }) +} + +#[test] +fn test_mkcomposefs() -> Result<()> { + use super::dumpfile::Entry; + use std::fmt::Write as _; + let td = tempfile::tempdir()?; + let td = td.path(); + let outpath = &td.join("out"); + let o = File::create(outpath)?; + let (send, recv) = mpsc::sync_channel(5); + const CONTENT: &str = include_str!("../../tests/assets/special.dump"); + std::thread::scope(|s| { + let producer = s.spawn(move || { + for line in CONTENT.lines() { + if send.send(Entry::parse(line)?).is_err() { + break; + } + } + anyhow::Ok(()) + }); + mkcomposefs(Config::default(), recv, o)?; + producer.join().unwrap()?; + anyhow::Ok(()) + })?; + let mut reparsed_content = String::new(); + let o = File::open(outpath)?; + super::dump(o, |entry| { + writeln!(reparsed_content, "{entry}").map_err(anyhow::Error::from) + }) + .unwrap(); + let mut reparsed_content = reparsed_content.lines().fuse(); + for line in CONTENT.lines() { + assert_eq!(line, reparsed_content.next().unwrap()); + } + assert!(reparsed_content.next().is_none()); + Ok(()) +}