-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Start implementing something like a repo format
Also: some kind of support for reading tar files into the repo in a way that allows byte-for-byte reproducing them later.
- Loading branch information
1 parent
c03acf7
commit 58b32d6
Showing
8 changed files
with
329 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
use composefs_experiments::{ | ||
fsverity::Sha256HashValue, | ||
tar::split, | ||
repository::Repository, | ||
}; | ||
|
||
// produce a splitstream from a tar | ||
fn main() { | ||
let repo = Repository::open("/home/lis/.var/lib/composefs").expect("open cfs-repo"); | ||
|
||
split( | ||
&mut std::io::stdin(), | ||
&mut std::io::stdout(), | ||
|data: Vec<u8>| -> std::io::Result<Sha256HashValue> { | ||
repo.ensure_data(&data) | ||
} | ||
).expect("split"); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,6 @@ | ||
mod util; | ||
pub mod repository; | ||
pub mod fsverity; | ||
pub mod splitstream; | ||
pub mod tar; | ||
pub mod tmpdir; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
use std::path::PathBuf; | ||
use std::io::ErrorKind; | ||
use std::os::fd::OwnedFd; | ||
|
||
use rustix::fs::{ | ||
CWD, | ||
linkat, | ||
Access, | ||
AtFlags, | ||
Mode, | ||
OFlags, | ||
accessat, | ||
mkdirat, | ||
open, | ||
openat, | ||
fdatasync, | ||
}; | ||
|
||
use crate::{ | ||
fsverity::{ | ||
Sha256HashValue, | ||
digest::FsVerityHasher, | ||
ioctl::{ | ||
fs_ioc_enable_verity, | ||
fs_ioc_measure_verity, | ||
}, | ||
}, | ||
util::proc_self_fd, | ||
}; | ||
|
||
pub struct Repository { | ||
repository: OwnedFd, | ||
objects: OwnedFd, | ||
} | ||
|
||
impl Repository { | ||
pub fn open(path: &str) -> std::io::Result<Repository> { | ||
let repository = open(path, OFlags::PATH, Mode::empty())?; | ||
let objects = openat(&repository, "objects", OFlags::PATH, Mode::empty())?; | ||
|
||
Ok(Repository { repository, objects }) | ||
} | ||
|
||
pub fn ensure_data(&self, data: &[u8]) -> std::io::Result<Sha256HashValue> { | ||
let digest = FsVerityHasher::hash(data); | ||
let dir = PathBuf::from(format!("{:02x}", digest[0])); | ||
let file = dir.join(hex::encode(&digest[1..])); | ||
|
||
if accessat(&self.objects, &file, Access::READ_OK, AtFlags::empty()) == Ok(()) { | ||
return Ok(digest); | ||
} | ||
|
||
if let Err(err) = mkdirat(&self.objects, &dir, 0o777.into()) { | ||
if err.kind() != ErrorKind::AlreadyExists { | ||
return Err(err.into()); | ||
} | ||
} | ||
|
||
let fd = openat(&self.objects, &dir, | ||
OFlags::RDWR | OFlags::CLOEXEC | OFlags::TMPFILE, 0o666.into() | ||
)?; | ||
|
||
rustix::io::write(&fd, data)?; // TODO: no write_all() here... | ||
|
||
fdatasync(&fd)?; | ||
|
||
// We can't enable verity with an open writable fd, so re-open and close the old one. | ||
let ro_fd = open(proc_self_fd(&fd), OFlags::RDONLY, Mode::empty())?; | ||
drop(fd); | ||
|
||
fs_ioc_enable_verity::<&OwnedFd, Sha256HashValue>(&ro_fd)?; | ||
|
||
// double-check | ||
let measured_digest: Sha256HashValue = fs_ioc_measure_verity(&ro_fd)?; | ||
assert!(measured_digest == digest); | ||
|
||
if let Err(err) = linkat(CWD, proc_self_fd(&ro_fd), &self.objects, file, AtFlags::SYMLINK_FOLLOW) { | ||
if err.kind() != ErrorKind::AlreadyExists { | ||
return Err(err.into()); | ||
} | ||
} | ||
|
||
drop(ro_fd); | ||
Ok(digest) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
/* Implementation of the Split Stream file format | ||
* | ||
* Split Stream is a trivial way of storing file formats (like tar) with the "data blocks" stored | ||
* in the composefs object tree. It's something like tar-split, but is based on content-addressed | ||
* storage of the file data and is implemented using a trivial binary-ish format. | ||
* | ||
* It is expected that the splitstream will be compressed before being stored on disk. | ||
* | ||
* The file format consists of a number of data blocks. | ||
* | ||
* | ||
* Each block starts with a u64 le "size" field followed by some amount of data. | ||
* | ||
* 64bit variable | ||
* +--------+---------------.... | ||
* | size | data... | ||
* +--------+---------------.... | ||
* | ||
* There are two kinds of blocks. | ||
* | ||
* - size != 0: in this case the length of the data is equal to the size. This is "inline data". | ||
* There is no padding, which implies that the size fields after the first may be unaligned. | ||
* | ||
* - size == 0: in this case the length of the data is 32 bytes. This is the binary form of a | ||
* sha256 hash value and is a reference to an object in the composefs repository. | ||
* | ||
* That's it, really. There's no header. The file is over when there's no more blocks. | ||
*/ | ||
|
||
use std::io::Write; | ||
use crate::fsverity::Sha256HashValue; | ||
|
||
// utility class to help write splitstreams | ||
pub struct SplitStreamWriter<'w, W: Write> { | ||
inline_content: Vec<u8>, | ||
writer: &'w mut W | ||
|
||
} | ||
|
||
impl<'w, W: Write> SplitStreamWriter<'w, W> { | ||
pub fn new(writer: &'w mut W) -> SplitStreamWriter<'w, W> { | ||
SplitStreamWriter { inline_content: vec![], writer } | ||
} | ||
|
||
fn write_fragment(writer: &mut W, size: usize, data: &[u8]) -> std::io::Result<()> { | ||
writer.write_all(&(size as u64).to_le_bytes())?; | ||
writer.write_all(data) | ||
} | ||
|
||
/// flush any buffered inline data, taking new_value as the new value of the buffer | ||
fn flush_inline(&mut self, new_value: Vec<u8>) -> std::io::Result<()> { | ||
if !self.inline_content.is_empty() { | ||
SplitStreamWriter::write_fragment(self.writer, self.inline_content.len(), &self.inline_content)?; | ||
self.inline_content = new_value; | ||
} | ||
Ok(()) | ||
} | ||
|
||
/// really, "add inline content to the buffer" | ||
/// you need to call .flush_inline() later | ||
pub fn write_inline(&mut self, data: &[u8]) { | ||
self.inline_content.extend(data); | ||
} | ||
|
||
/// write a reference to external data to the stream. If the external data had padding in the | ||
/// stream which is not stored in the object then pass it here as well and it will be stored | ||
/// inline after the reference. | ||
pub fn write_reference(&mut self, reference: Sha256HashValue, padding: Vec<u8>) -> std::io::Result<()> { | ||
// Flush the inline data before we store the external reference. Any padding from the | ||
// external data becomes the start of a new inline block. | ||
self.flush_inline(padding)?; | ||
|
||
SplitStreamWriter::write_fragment(self.writer, 0, &reference) | ||
} | ||
|
||
pub fn done(&mut self) -> std::io::Result<()> { | ||
self.flush_inline(vec![]) | ||
} | ||
} | ||
|
||
// TODO: reader side... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
use std::io::{Read, Write}; | ||
use crate::fsverity::Sha256HashValue; | ||
use crate::splitstream::SplitStreamWriter; | ||
|
||
struct TarHeader { | ||
data: [u8; 512], | ||
} | ||
|
||
impl TarHeader { | ||
// we can't use Read::read_exact() because we need to be able to detect EOF | ||
fn read<R: Read>(reader: &mut R) -> std::io::Result<Option<TarHeader>> { | ||
let mut header = TarHeader { data: [0u8; 512] }; | ||
let mut todo: &mut [u8] = &mut header.data; | ||
|
||
while !todo.is_empty() { | ||
match reader.read(todo) { | ||
Ok(0) => match todo.len() { | ||
512 => return Ok(None), | ||
_ => return Err(std::io::ErrorKind::UnexpectedEof.into()), | ||
}, | ||
Ok(n) => { | ||
todo = &mut todo[n..]; | ||
} | ||
Err(e) if e.kind() == std::io::ErrorKind::Interrupted => { | ||
} | ||
Err(e) => { | ||
return Err(e); | ||
} | ||
} | ||
} | ||
|
||
Ok(Some(header)) | ||
} | ||
|
||
fn get_size(&self) -> usize { | ||
let size_field = &self.data[124..124 + 12]; | ||
let mut value = 0usize; | ||
|
||
if size_field[0] & 0x80 != 0 { | ||
// binary representation | ||
for byte in &size_field[4..12] { | ||
value <<= 8; | ||
value += *byte as usize; | ||
} | ||
} else { | ||
// octal representation with nul terminator | ||
for byte in size_field { | ||
if *byte == b'\0' { | ||
break; | ||
} else { | ||
// add octal digit value (no error checking) | ||
value <<= 3; | ||
value += (*byte - b'0') as usize; | ||
} | ||
} | ||
} | ||
|
||
// TODO: not too big, I hope... | ||
value | ||
} | ||
|
||
fn get_storage_size(&self) -> usize { | ||
// round up to nearest multiple of 512 | ||
(self.get_size() + 511) & !511 | ||
} | ||
|
||
fn is_reg(&self) -> bool { | ||
self.data[156] == b'0' | ||
} | ||
} | ||
|
||
/// Splits the tar file from tar_stream into a Split Stream. The store_data function is | ||
/// responsible for ensuring that "external data" is in the composefs repository and returns the | ||
/// fsverity hash value of that data. | ||
pub fn split<R: Read, W: Write, F: FnMut(Vec<u8>) -> std::io::Result<Sha256HashValue>>( | ||
tar_stream: &mut R, | ||
split_stream: &mut W, | ||
mut store_data: F, | ||
) -> std::io::Result<()> { | ||
let mut writer = SplitStreamWriter::new(split_stream); | ||
|
||
while let Some(header) = TarHeader::read(tar_stream)? { | ||
// the header always gets stored as inline data | ||
writer.write_inline(&header.data); | ||
|
||
// read the corresponding data, if there is any | ||
let storage_size = header.get_storage_size(); | ||
let mut buffer = vec![0u8; storage_size]; | ||
tar_stream.read_exact(&mut buffer)?; | ||
|
||
if header.is_reg() && storage_size > 0 { | ||
// non-empty regular file: store the data in the object store | ||
let actual_size = header.get_size(); | ||
let padding = buffer.split_off(actual_size); | ||
let reference = store_data(buffer)?; | ||
writer.write_reference(reference, padding)?; | ||
} else { | ||
// else: store the data inline in the split stream | ||
writer.write_inline(&buffer); | ||
} | ||
} | ||
|
||
// flush out any remaining inline data | ||
writer.done() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
use std::os::fd::{ | ||
AsFd, | ||
AsRawFd, | ||
}; | ||
|
||
pub fn proc_self_fd<A: AsFd>(fd: &A) -> String { | ||
format!("/proc/self/fd/{}", fd.as_fd().as_raw_fd()) | ||
} |