diff --git a/src/bin/cfsctl.rs b/src/bin/cfsctl.rs index 78f92f8..ddab151 100644 --- a/src/bin/cfsctl.rs +++ b/src/bin/cfsctl.rs @@ -3,12 +3,9 @@ use anyhow::Result; use clap::{Parser, Subcommand}; use composefs_experiments::{ - fsverity::{ - FsVerityHashValue, - Sha256HashValue, - }, oci, repository::Repository, + util::parse_sha256, }; @@ -31,9 +28,8 @@ pub struct App { enum OciCommand { /// Stores a tar file as a splitstream in the repository. ImportLayer { - name: String, - #[clap(long)] - sha256: Option, + sha256: String, + name: Option, }, /// Lists the contents of a tar stream LsLayer { @@ -104,7 +100,7 @@ fn main() -> Result<()> { } }, Command::Cat { name } => { - repo.merge_splitstream(&name, &mut std::io::stdout())?; + repo.merge_splitstream(&name, None, &mut std::io::stdout())?; }, Command::ImportImage { reference, } => { let image_id = repo.import_image(&reference, &mut std::io::stdin())?; @@ -112,14 +108,8 @@ fn main() -> Result<()> { }, Command::Oci{ cmd: oci_cmd } => match oci_cmd { OciCommand::ImportLayer { name, sha256 } => { - if let Some(digest) = sha256 { - let mut value = Sha256HashValue::EMPTY; - hex::decode_to_slice(digest, &mut value)?; - oci::import_layer_by_sha256(&repo, &name, &mut std::io::stdin(), value)?; - } else { - let stream_id = oci::import_layer(&repo, &name, &mut std::io::stdin())?; - println!("{}", hex::encode(stream_id)); - } + let object_id = oci::import_layer(&repo, &parse_sha256(sha256)?, name.as_deref(), &mut std::io::stdin())?; + println!("{}", hex::encode(object_id)); }, OciCommand::LsLayer { name } => { oci::ls_layer(&repo, &name)?; diff --git a/src/lib.rs b/src/lib.rs index cdfe8cd..dbbb6a3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,4 @@ -mod util; +pub mod util; pub mod dumpfile; pub mod fsverity; pub mod image; diff --git a/src/oci/image.rs b/src/oci/image.rs index 81b1c68..bd321a3 100644 --- a/src/oci/image.rs +++ b/src/oci/image.rs @@ -66,7 +66,7 @@ pub fn compose_filesystem(repo: &Repository, layers: &[String]) -> Result) -> Resu let mut filesystem = FileSystem::new(); for layer in layers { - let mut split_stream = repo.open_stream(layer)?; + let mut split_stream = repo.open_stream(layer, None)?; while let Some(entry) = oci::tar::get_entry(&mut split_stream)? { process_entry(&mut filesystem, entry)?; } diff --git a/src/oci/mod.rs b/src/oci/mod.rs index 55fc27b..eaf296a 100644 --- a/src/oci/mod.rs +++ b/src/oci/mod.rs @@ -11,25 +11,14 @@ use crate::{ oci::tar::get_entry, }; -pub fn import_layer(repo: &Repository, name: &str, tar_stream: &mut R) -> Result { - let mut writer = repo.create_stream(None); - tar::split(tar_stream, &mut writer)?; - repo.store_stream(writer, name) -} - -pub fn import_layer_by_sha256( - repo: &Repository, - name: &str, - tar_stream: &mut R, - sha256: Sha256HashValue -) -> Result<()> { - repo.store_stream_by_sha256(name, sha256, |writer| { - tar::split(tar_stream, writer) - }) +pub fn import_layer( + repo: &Repository, sha256: &Sha256HashValue, name: Option<&str>, tar_stream: &mut impl Read +) -> Result { + Ok(repo.ensure_stream(sha256, |writer| tar::split(tar_stream, writer), name)?) } pub fn ls_layer(repo: &Repository, name: &str) -> Result<()> { - let mut split_stream = repo.open_stream(name)?; + let mut split_stream = repo.open_stream(name, None)?; while let Some(entry) = get_entry(&mut split_stream)? { println!("{}", entry); diff --git a/src/repository.rs b/src/repository.rs index 21b501b..eb50fb8 100644 --- a/src/repository.rs +++ b/src/repository.rs @@ -19,6 +19,7 @@ use anyhow::{ Context, Result, bail, + ensure, }; use rustix::{ fs::{ @@ -61,7 +62,10 @@ use crate::{ SplitStreamWriter, SplitStreamReader, }, - util::proc_self_fd, + util::{ + parse_sha256, + proc_self_fd, + }, }; pub struct Repository { @@ -104,14 +108,6 @@ impl Repository { .or_else(|e| match e { Errno::EXIST => Ok(()), _ => Err(e) }) } - pub fn exists(&self, name: &str) -> Result { - match accessat(&self.repository, name, Access::READ_OK, AtFlags::empty()) { - Ok(()) => Ok(true), - Err(ref e) if e.kind() == ErrorKind::NotFound => Ok(false), - Err(e) => Err(e)? - } - } - pub fn ensure_object(&self, data: &[u8]) -> Result { let digest = FsVerityHasher::hash(data); let dir = PathBuf::from(format!("objects/{:02x}", digest[0])); @@ -149,36 +145,16 @@ impl Repository { Ok(digest) } - pub fn open_with_verity(&self, filename: &str, expected_verity: Sha256HashValue) -> Result { + fn open_with_verity(&self, filename: &str, expected_verity: &Sha256HashValue) -> Result { let fd = self.openat(filename, OFlags::RDONLY)?; let measured_verity: Sha256HashValue = fs_ioc_measure_verity(&fd)?; - if measured_verity != expected_verity { + if measured_verity != *expected_verity { bail!("bad verity!") } else { Ok(fd) } } - /// Performs a lookup of a by-sha256 reference in the given category - /// If such a reference exists, this returns the underlying object ID. - pub fn find_by_sha256(&self, category: &str, sha256: Sha256HashValue) -> Result> { - let filename = format!("{}/by-sha256/{}", category, hex::encode(sha256)); - match readlinkat(&self.repository, &filename, []) { - Err(ref e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None), - Err(e) => Err(e)?, - Ok(linkpath) => { - let mut hash = Sha256HashValue::EMPTY; - let linkbytes = linkpath.to_bytes(); - if linkbytes.len() != 67 || &linkbytes[0..3] != b"../" { - bail!("Incorrectly formatted symlink {:?}/{:?}", self.path, filename); - } - hex::decode_to_slice(&linkpath.to_bytes()[3..], &mut hash). - with_context(|| format!("Incorrectly formatted symlink {:?}/{:?}", self.path, filename))?; - Ok(Some(hash)) - } - } - } - /// Creates a SplitStreamWriter for writing a split stream. /// You should write the data to the returned object and then pass it to .store_stream() to /// store the result. @@ -186,89 +162,102 @@ impl Repository { SplitStreamWriter::new(self, None, sha256) } - /// Consumes the SplitStreamWriter, stores the splitstream in the object store (if it's not - /// already present), and links the named reference to it. - /// - /// - /// - /// This is an error if the reference already exists. - /// - /// In any case, the object ID (by fs-verity digest) is returned. - pub fn store_stream(&self, writer: SplitStreamWriter, name: &str) -> Result { - let object_id = writer.done()?; - - let object_path = format!("objects/{:02x}/{}", object_id[0], hex::encode(&object_id[1..])); - let stream_path = format!("streams/{}", hex::encode(object_id)); - let reference_path = format!("streams/refs/{name}"); + fn parse_object_path(path: impl AsRef<[u8]>) -> Result { + // "objects/0c/9513d99b120ee9a709c4d6554d938f6b2b7e213cf5b26f2e255c0b77e40379" + let bytes = path.as_ref(); + ensure!(bytes.len() == 73, "stream symlink has incorrect length"); + ensure!(bytes.starts_with(b"objects/"), "stream symlink has incorrect prefix"); + ensure!(bytes[10] == b'/', "stream symlink has incorrect path separator"); + let mut result = Sha256HashValue::EMPTY; + hex::decode_to_slice(&bytes[8..10], &mut result[..1]) + .context("stream symlink has incorrect format")?; + hex::decode_to_slice(&bytes[11..], &mut result[1..]) + .context("stream symlink has incorrect format")?; + Ok(result) + } - self.ensure_symlink(&stream_path, &object_path)?; - self.symlink(&reference_path, &stream_path)?; - Ok(object_id) + fn format_object_path(id: &Sha256HashValue) -> String { + format!("objects/{:02x}/{}", id[0], hex::encode(&id[1..])) } - /// A convenience function to check if a stream with the given SHA256 digest already exists. + /// Ensures that the stream with a given SHA256 digest exists in the repository. /// - /// If such a stream exists, then this function simply creates a new named reference to the - /// stream and returns the underlying object ID. + /// This tries to find the stream by the `sha256` digest of its contents. If the stream is + /// already in the repository, the object ID (fs-verity digest) is read from the symlink. If + /// the stream is not already in the repository, a `SplitStreamWriter` is created and passed to + /// `callback`. On return, the object ID of the stream will be calculated and it will be + /// written to disk (if it wasn't already created by someone else in the meantime). /// - /// If not, the user's callback is called with a SplitStreamWriter which should be populated - /// with the data for the stream. After the callback returns, we write the stream to disk and - /// link the named reference to it, returning the underlying object ID. + /// In both cases, if `reference` is provided, it is used to provide a fixed name for the + /// object. Any object that doesn't have a fixed reference to it is subject to garbage + /// collection. It is an error if this reference already exists. /// - /// It is an error if the named reference already exists. - pub fn store_stream_by_sha256 Result<()>>( - &self, name: &str, sha256: Sha256HashValue, callback: F, - ) -> Result<()> { - let by_sha256_path = format!("streams/by-sha256/{}", hex::encode(sha256)); - - if !self.exists(&by_sha256_path)? { - let mut writer = self.create_stream(Some(sha256)); - callback(&mut writer)?; - let object_id = writer.done()?; - - let object_path = format!("objects/{:02x}/{}", object_id[0], hex::encode(&object_id[1..])); - let stream_path = format!("streams/{}", hex::encode(object_id)); - self.ensure_symlink(&stream_path, &object_path)?; - self.ensure_symlink(&by_sha256_path, &stream_path)?; + /// On success, the object ID of the new object is returned. It is expected that this object + /// ID will be used when referring to the stream from other linked streams. + pub fn ensure_stream( + &self, + sha256: &Sha256HashValue, + callback: impl FnOnce(&mut SplitStreamWriter) -> Result<()>, + reference: Option<&str>, + ) -> Result { + let stream_path = format!("streams/{}", hex::encode(sha256)); + + let object_id = match readlinkat(&self.repository, &stream_path, []) { + Ok(target) => { + // NB: This is kinda unsafe: we depend that the symlink didn't get corrupted + // we could also measure the verity of the destination object, but it doesn't + // improve anything, since we don't know if it was the original one. + let bytes = target.as_bytes(); + ensure!(bytes.starts_with(b"../"), "stream symlink has incorrect prefix"); + Repository::parse_object_path(&bytes[3..])? + }, + Err(Errno::NOENT) => { + let mut writer = self.create_stream(Some(*sha256)); + callback(&mut writer)?; + let object_id = writer.done()?; + + let object_path = Repository::format_object_path(&object_id); + self.ensure_symlink(&stream_path, &object_path)?; + object_id + }, + Err(err) => Err(err)?, + }; + + if let Some(name) = reference { + let reference_path = format!("streams/refs/{name}"); + self.symlink(&reference_path, &stream_path)?; } - let reference_path = format!("streams/refs/{name}"); - self.symlink(&reference_path, &by_sha256_path)?; - Ok(()) + Ok(object_id) } - /// category is like "streams" or "images" - /// name is like "refs/1000/user/xyz" (with '/') or a sha256 hex hash value (without '/') - fn open_in_category(&self, category: &str, name: &str) -> Result { - let filename = format!("{}/{}", category, name); + pub fn open_stream(&self, name: &str, verity: Option<&Sha256HashValue>) -> Result> { + let filename = format!("streams/{}", name); - if name.contains("/") { - // no fsverity checking on this path - self.openat(&filename, OFlags::RDONLY) - } else { - // this must surely be a hash value, and we want to verify it - let mut hash = Sha256HashValue::EMPTY; - hex::decode_to_slice(name, &mut hash)?; - self.open_with_verity(&filename, hash) - } - } + let file = File::from( + if let Some(verity_hash) = verity { + self.open_with_verity(&filename, &verity_hash)? + } else { + self.openat(&filename, OFlags::RDONLY)? + } + ); - pub fn open_stream(&self, name: &str) -> Result> { - let file = File::from(self.open_in_category("streams", name)?); SplitStreamReader::new(file) } - fn open_object(&self, id: Sha256HashValue) -> Result { + fn open_object(&self, id: &Sha256HashValue) -> Result { self.open_with_verity(&format!("objects/{:02x}/{}", id[0], hex::encode(&id[1..])), id) } - pub fn merge_splitstream(&self, name: &str, stream: &mut W) -> Result<()> { - let mut split_stream = self.open_stream(name)?; + pub fn merge_splitstream( + &self, name: &str, verity: Option<&Sha256HashValue>, stream: &mut impl Write + ) -> Result<()> { + let mut split_stream = self.open_stream(name, verity)?; split_stream.cat( stream, |id| -> Result> { let mut data = vec![]; - File::from(self.open_object(*id)?).read_to_end(&mut data)?; + File::from(self.open_object(id)?).read_to_end(&mut data)?; Ok(data) } )?; @@ -290,7 +279,15 @@ impl Repository { } pub fn mount(self, name: &str, mountpoint: &str) -> Result<()> { - let image = self.open_in_category("images", name)?; + let filename = format!("images/{}", name); + + let image = if name.contains("/") { + // no fsverity checking on this path + self.openat(&filename, OFlags::RDONLY) + } else { + self.open_with_verity(&filename, &parse_sha256(name)?) + }?; + let object_path = self.path.join("objects"); mount_fd(image, &object_path, mountpoint) } @@ -426,9 +423,9 @@ impl Repository { let mut objects = HashSet::new(); - for object in self.gc_category("images")? { + for ref object in self.gc_category("images")? { println!("{} lives as an image", hex::encode(object)); - objects.insert(object); + objects.insert(*object); // composefs-info mmaps the file, so pipes aren't normally OK but we pass the // underlying file directly, which works. @@ -458,7 +455,7 @@ impl Repository { println!("{} lives as a stream", hex::encode(object)); objects.insert(object); - let mut split_stream = self.open_stream(&hex::encode(object))?; + let mut split_stream = self.open_stream(&hex::encode(object), None)?; split_stream.get_object_refs( |id| { println!(" with {}", hex::encode(*id)); diff --git a/src/splitstream.rs b/src/splitstream.rs index 37905cf..3d33909 100644 --- a/src/splitstream.rs +++ b/src/splitstream.rs @@ -76,7 +76,7 @@ pub struct SplitStreamWriter<'a> { impl<'a> SplitStreamWriter<'a> { pub fn new(repo: &Repository, refs: Option, sha256: Option) -> SplitStreamWriter { // SAFETY: we surely can't get an error writing the header to a Vec - let mut writer = zstd::stream::write::Encoder::new(vec![], 0).unwrap(); + let mut writer = Encoder::new(vec![], 0).unwrap(); match refs { Some(DigestMap { map }) => { @@ -320,4 +320,10 @@ impl SplitStreamReader { } } } + + pub fn get_stream_refs(&mut self, mut callback: impl FnMut(&Sha256HashValue)) { + for entry in &self.refs.map { + callback(&entry.body); + } + } } diff --git a/src/util.rs b/src/util.rs index 573bdf3..3b8750f 100644 --- a/src/util.rs +++ b/src/util.rs @@ -6,7 +6,15 @@ use std::{ }, }; -use anyhow::Result; +use anyhow::{ + Context, + Result, +}; + +use crate::fsverity::{ + FsVerityHashValue, + Sha256HashValue, +}; pub fn proc_self_fd(fd: &A) -> String { format!("/proc/self/fd/{}", fd.as_fd().as_raw_fd()) @@ -37,3 +45,10 @@ pub fn read_exactish(reader: &mut R, buf: &mut [u8]) -> Result { Ok(true) } + +pub fn parse_sha256(string: impl AsRef) -> Result { + let mut value = Sha256HashValue::EMPTY; + hex::decode_to_slice(string.as_ref(), &mut value) + .context("Invalid SHA256 hash value")?; + Ok(value) +} diff --git a/tests/repo.rs b/tests/repo.rs index 3fda010..6cde766 100644 --- a/tests/repo.rs +++ b/tests/repo.rs @@ -8,6 +8,10 @@ use anyhow::{ Context, Result, }; +use sha2::{ + Digest, + Sha256, +}; use composefs_experiments::{ oci, @@ -47,13 +51,17 @@ fn home_var_tmp() -> Result { #[test] fn test_layer() -> Result<()> { let layer = example_layer()?; + let mut context = Sha256::new(); + context.update(&layer); + let layer_id: [u8; 32] = context.finalize().into(); + let tmpfile = tempfile::TempDir::with_prefix_in("composefs-test-", home_var_tmp()?)?; let repo = Repository::open_path(tmpfile.path().to_path_buf())?; - oci::import_layer(&repo, "name", &mut layer.as_slice())?; + let id = oci::import_layer(&repo, &layer_id, Some("name"), &mut layer.as_slice())?; let mut dump = String::new(); - let mut split_stream = repo.open_stream("refs/name")?; + let mut split_stream = repo.open_stream("refs/name", Some(&id))?; while let Some(entry) = oci::tar::get_entry(&mut split_stream)? { writeln!(dump, "{}", entry)?; }