Skip to content

Commit

Permalink
feat: make chunk size user defined
Browse files Browse the repository at this point in the history
BEAKING CHANGE: all APIs updated to have min/max_encryptrable bytes
passed in by the user.

This allows for varying the use of the lib.
  • Loading branch information
joshuef committed Oct 15, 2024
1 parent c857ae2 commit f0ad8fc
Show file tree
Hide file tree
Showing 8 changed files with 211 additions and 118 deletions.
10 changes: 8 additions & 2 deletions benches/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ use std::time::Duration;
// https://bheisler.github.io/criterion.rs/book/analysis.html#measurement
const SAMPLE_SIZE: usize = 20;

/// The maximum size (before compression) of an individual chunk of a file, defined as 1024kiB.
const MAX_CHUNK_SIZE: usize = 1024 * 1024;
/// The minimum size (before compression) of an individual chunk of a file, defined as 1B.
const MIN_CHUNK_SIZE: usize = 1;

fn custom_criterion() -> Criterion {
Criterion::default()
.measurement_time(Duration::from_secs(40))
Expand All @@ -63,7 +68,8 @@ fn write(b: &mut Bencher<'_>, bytes_len: usize) {
|| random_bytes(bytes_len),
// actual benchmark
|bytes| {
let (_data_map, _encrypted_chunks) = encrypt(bytes).unwrap();
let (_data_map, _encrypted_chunks) =
encrypt(bytes, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE).unwrap();
},
BatchSize::SmallInput,
);
Expand All @@ -72,7 +78,7 @@ fn write(b: &mut Bencher<'_>, bytes_len: usize) {
fn read(b: &mut Bencher, bytes_len: usize) {
b.iter_batched(
// the setup
|| encrypt(random_bytes(bytes_len)).unwrap(),
|| encrypt(random_bytes(bytes_len), MIN_CHUNK_SIZE, MAX_CHUNK_SIZE).unwrap(),
// actual benchmark
|(data_map, encrypted_chunks)| {
let _raw_data = decrypt_full_set(&data_map, &encrypted_chunks).unwrap();
Expand Down
8 changes: 7 additions & 1 deletion examples/basic_encryptor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,11 @@ fn file_name(name: XorName) -> String {
string
}

/// The maximum size (before compression) of an individual chunk of a file, defined as 1024kiB.
const MAX_CHUNK_SIZE: usize = 1024 * 1024;
/// The minimum size (before compression) of an individual chunk of a file, defined as 1B.
const MIN_CHUNK_SIZE: usize = 1;

#[derive(Clone)]
struct DiskBasedStorage {
pub(crate) storage_path: String,
Expand Down Expand Up @@ -147,7 +152,8 @@ async fn main() {
Err(error) => return println!("{}", error),
}

let (data_map, encrypted_chunks) = encrypt(Bytes::from(data)).unwrap();
let (data_map, encrypted_chunks) =
encrypt(Bytes::from(data), MIN_CHUNK_SIZE, MAX_CHUNK_SIZE).unwrap();

let result = encrypted_chunks
.par_iter()
Expand Down
12 changes: 6 additions & 6 deletions src/chunk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,15 @@ pub struct RawChunk {

/// Hash all the chunks.
/// Creates [num cores] batches.
pub(crate) fn batch_chunks(bytes: Bytes) -> (usize, Vec<EncryptionBatch>) {
pub(crate) fn batch_chunks(bytes: Bytes, max_chunk_size: usize) -> (usize, Vec<EncryptionBatch>) {
let data_size = bytes.len();
let num_chunks = get_num_chunks(data_size);
let num_chunks = get_num_chunks(data_size, max_chunk_size);

let raw_chunks: Vec<_> = (0..num_chunks)
.map(|index| (index, bytes.clone()))
.par_bridge()
.map(|(index, bytes)| {
let (start, end) = get_start_end_positions(data_size, index);
let (start, end) = get_start_end_positions(data_size, index, max_chunk_size);
let data = bytes.slice(start..end);
let hash = XorName::from_content(data.as_ref());
RawChunk { index, data, hash }
Expand All @@ -63,10 +63,10 @@ pub(crate) fn batch_chunks(bytes: Bytes) -> (usize, Vec<EncryptionBatch>) {
}

/// Calculate (start_position, end_position) for each chunk for the input file size
pub(crate) fn batch_positions(data_size: usize) -> Vec<(usize, usize)> {
let num_chunks = get_num_chunks(data_size);
pub(crate) fn batch_positions(data_size: usize, max_chunk_size: usize) -> Vec<(usize, usize)> {
let num_chunks = get_num_chunks(data_size, max_chunk_size);

(0..num_chunks)
.map(|index| get_start_end_positions(data_size, index))
.map(|index| get_start_end_positions(data_size, index, max_chunk_size))
.collect()
}
29 changes: 20 additions & 9 deletions src/data_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,14 @@ use xor_name::XorName;

/// Holds the information that is required to recover the content of the encrypted file.
/// This is held as a vector of `ChunkInfo`, i.e. a list of the file's chunk hashes.
/// Only files larger than 3072 bytes (3 * MIN_CHUNK_SIZE) can be self-encrypted.
/// Only files larger than 3072 bytes (3 * chunk size) can be self-encrypted.
/// Smaller files will have to be batched together.
#[derive(Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub struct DataMap(Vec<ChunkInfo>);
pub struct DataMap {
/// max chunk size used during encryption
max_chunk_size: usize,
chunks: Vec<ChunkInfo>,
}

#[allow(clippy::len_without_is_empty)]
impl DataMap {
Expand All @@ -25,19 +29,26 @@ impl DataMap {
/// Sorts on instantiation.
/// The algorithm requires this to be a sorted list to allow get_pad_iv_key to obtain the
/// correct pre-encryption hashes for decryption/encryption.
pub fn new(mut keys: Vec<ChunkInfo>) -> Self {
pub fn new(max_chunk_size: usize, mut keys: Vec<ChunkInfo>) -> Self {
keys.sort_by(|a, b| a.index.cmp(&b.index));
Self(keys)
Self {
max_chunk_size,
chunks: keys,
}
}

/// Original (pre-encryption) size of the file.
pub fn file_size(&self) -> usize {
DataMap::total_size(&self.0)
DataMap::total_size(&self.chunks)
}

/// Returns the maximum chunk size used during encryption.
pub fn max_chunk_size(&self) -> usize {
self.max_chunk_size
}
/// Returns the list of chunks pre and post encryption hashes if present.
pub fn infos(&self) -> Vec<ChunkInfo> {
self.0.to_vec()
self.chunks.to_vec()
}

/// Iterates through the keys to figure out the total size of the data, i.e. the file size.
Expand All @@ -48,9 +59,9 @@ impl DataMap {

impl Debug for DataMap {
fn fmt(&self, formatter: &mut Formatter) -> Result<(), std::fmt::Error> {
writeln!(formatter, "DataMap:")?;
let len = self.0.len();
for (index, chunk) in self.0.iter().enumerate() {
writeln!(formatter, "DataMap max_chunk {:?}:", self.max_chunk_size)?;
let len = self.chunks.len();
for (index, chunk) in self.chunks.iter().enumerate() {
if index + 1 == len {
write!(formatter, " {:?}", chunk)?
} else {
Expand Down
7 changes: 5 additions & 2 deletions src/encrypt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@ use std::sync::Arc;
use xor_name::XorName;

/// Encrypt the chunks
pub(crate) fn encrypt(batches: Vec<EncryptionBatch>) -> (DataMap, Vec<EncryptedChunk>) {
pub(crate) fn encrypt(
max_chunk_size: usize,
batches: Vec<EncryptionBatch>,
) -> (DataMap, Vec<EncryptedChunk>) {
let src_hashes = Arc::new(
batches
.iter()
Expand Down Expand Up @@ -84,7 +87,7 @@ pub(crate) fn encrypt(batches: Vec<EncryptionBatch>) -> (DataMap, Vec<EncryptedC
},
);

(DataMap::new(keys), chunks)
(DataMap::new(max_chunk_size, keys), chunks)
}

/// Encrypt the chunk
Expand Down
Loading

0 comments on commit f0ad8fc

Please sign in to comment.