diff --git a/benches/lib.rs b/benches/lib.rs index e3f67a5af..275e8471e 100644 --- a/benches/lib.rs +++ b/benches/lib.rs @@ -51,6 +51,11 @@ use std::time::Duration; // https://bheisler.github.io/criterion.rs/book/analysis.html#measurement const SAMPLE_SIZE: usize = 20; +/// The maximum size (before compression) of an individual chunk of a file, defined as 1024kiB. +const MAX_CHUNK_SIZE: usize = 1024 * 1024; +/// The minimum size (before compression) of an individual chunk of a file, defined as 1B. +const MIN_CHUNK_SIZE: usize = 1; + fn custom_criterion() -> Criterion { Criterion::default() .measurement_time(Duration::from_secs(40)) @@ -63,7 +68,8 @@ fn write(b: &mut Bencher<'_>, bytes_len: usize) { || random_bytes(bytes_len), // actual benchmark |bytes| { - let (_data_map, _encrypted_chunks) = encrypt(bytes).unwrap(); + let (_data_map, _encrypted_chunks) = + encrypt(bytes, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE).unwrap(); }, BatchSize::SmallInput, ); @@ -72,7 +78,7 @@ fn write(b: &mut Bencher<'_>, bytes_len: usize) { fn read(b: &mut Bencher, bytes_len: usize) { b.iter_batched( // the setup - || encrypt(random_bytes(bytes_len)).unwrap(), + || encrypt(random_bytes(bytes_len), MIN_CHUNK_SIZE, MAX_CHUNK_SIZE).unwrap(), // actual benchmark |(data_map, encrypted_chunks)| { let _raw_data = decrypt_full_set(&data_map, &encrypted_chunks).unwrap(); diff --git a/examples/basic_encryptor.rs b/examples/basic_encryptor.rs index ee1fb2b49..f26d4a85b 100644 --- a/examples/basic_encryptor.rs +++ b/examples/basic_encryptor.rs @@ -89,6 +89,11 @@ fn file_name(name: XorName) -> String { string } +/// The maximum size (before compression) of an individual chunk of a file, defined as 1024kiB. +const MAX_CHUNK_SIZE: usize = 1024 * 1024; +/// The minimum size (before compression) of an individual chunk of a file, defined as 1B. +const MIN_CHUNK_SIZE: usize = 1; + #[derive(Clone)] struct DiskBasedStorage { pub(crate) storage_path: String, @@ -147,7 +152,8 @@ async fn main() { Err(error) => return println!("{}", error), } - let (data_map, encrypted_chunks) = encrypt(Bytes::from(data)).unwrap(); + let (data_map, encrypted_chunks) = + encrypt(Bytes::from(data), MIN_CHUNK_SIZE, MAX_CHUNK_SIZE).unwrap(); let result = encrypted_chunks .par_iter() diff --git a/src/chunk.rs b/src/chunk.rs index ab65225c0..a5ed1ffe9 100644 --- a/src/chunk.rs +++ b/src/chunk.rs @@ -32,15 +32,15 @@ pub struct RawChunk { /// Hash all the chunks. /// Creates [num cores] batches. -pub(crate) fn batch_chunks(bytes: Bytes) -> (usize, Vec) { +pub(crate) fn batch_chunks(bytes: Bytes, max_chunk_size: usize) -> (usize, Vec) { let data_size = bytes.len(); - let num_chunks = get_num_chunks(data_size); + let num_chunks = get_num_chunks(data_size, max_chunk_size); let raw_chunks: Vec<_> = (0..num_chunks) .map(|index| (index, bytes.clone())) .par_bridge() .map(|(index, bytes)| { - let (start, end) = get_start_end_positions(data_size, index); + let (start, end) = get_start_end_positions(data_size, index, max_chunk_size); let data = bytes.slice(start..end); let hash = XorName::from_content(data.as_ref()); RawChunk { index, data, hash } @@ -63,10 +63,10 @@ pub(crate) fn batch_chunks(bytes: Bytes) -> (usize, Vec) { } /// Calculate (start_position, end_position) for each chunk for the input file size -pub(crate) fn batch_positions(data_size: usize) -> Vec<(usize, usize)> { - let num_chunks = get_num_chunks(data_size); +pub(crate) fn batch_positions(data_size: usize, max_chunk_size: usize) -> Vec<(usize, usize)> { + let num_chunks = get_num_chunks(data_size, max_chunk_size); (0..num_chunks) - .map(|index| get_start_end_positions(data_size, index)) + .map(|index| get_start_end_positions(data_size, index, max_chunk_size)) .collect() } diff --git a/src/data_map.rs b/src/data_map.rs index c96019d0a..428573f5f 100644 --- a/src/data_map.rs +++ b/src/data_map.rs @@ -13,10 +13,14 @@ use xor_name::XorName; /// Holds the information that is required to recover the content of the encrypted file. /// This is held as a vector of `ChunkInfo`, i.e. a list of the file's chunk hashes. -/// Only files larger than 3072 bytes (3 * MIN_CHUNK_SIZE) can be self-encrypted. +/// Only files larger than 3072 bytes (3 * chunk size) can be self-encrypted. /// Smaller files will have to be batched together. #[derive(Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Clone)] -pub struct DataMap(Vec); +pub struct DataMap { + /// max chunk size used during encryption + max_chunk_size: usize, + chunks: Vec, +} #[allow(clippy::len_without_is_empty)] impl DataMap { @@ -25,19 +29,26 @@ impl DataMap { /// Sorts on instantiation. /// The algorithm requires this to be a sorted list to allow get_pad_iv_key to obtain the /// correct pre-encryption hashes for decryption/encryption. - pub fn new(mut keys: Vec) -> Self { + pub fn new(max_chunk_size: usize, mut keys: Vec) -> Self { keys.sort_by(|a, b| a.index.cmp(&b.index)); - Self(keys) + Self { + max_chunk_size, + chunks: keys, + } } /// Original (pre-encryption) size of the file. pub fn file_size(&self) -> usize { - DataMap::total_size(&self.0) + DataMap::total_size(&self.chunks) } + /// Returns the maximum chunk size used during encryption. + pub fn max_chunk_size(&self) -> usize { + self.max_chunk_size + } /// Returns the list of chunks pre and post encryption hashes if present. pub fn infos(&self) -> Vec { - self.0.to_vec() + self.chunks.to_vec() } /// Iterates through the keys to figure out the total size of the data, i.e. the file size. @@ -48,9 +59,9 @@ impl DataMap { impl Debug for DataMap { fn fmt(&self, formatter: &mut Formatter) -> Result<(), std::fmt::Error> { - writeln!(formatter, "DataMap:")?; - let len = self.0.len(); - for (index, chunk) in self.0.iter().enumerate() { + writeln!(formatter, "DataMap max_chunk {:?}:", self.max_chunk_size)?; + let len = self.chunks.len(); + for (index, chunk) in self.chunks.iter().enumerate() { if index + 1 == len { write!(formatter, " {:?}", chunk)? } else { diff --git a/src/encrypt.rs b/src/encrypt.rs index 9f152c488..2c54e0b55 100644 --- a/src/encrypt.rs +++ b/src/encrypt.rs @@ -22,7 +22,10 @@ use std::sync::Arc; use xor_name::XorName; /// Encrypt the chunks -pub(crate) fn encrypt(batches: Vec) -> (DataMap, Vec) { +pub(crate) fn encrypt( + max_chunk_size: usize, + batches: Vec, +) -> (DataMap, Vec) { let src_hashes = Arc::new( batches .iter() @@ -84,7 +87,7 @@ pub(crate) fn encrypt(batches: Vec) -> (DataMap, Vec() - .unwrap_or(DEFAULT_MAX_CHUNK_SIZE); -} - -/// The minimum size (before compression) of an individual chunk of a file, defined as 1B. +/// Minimum possible size in bytes of a chunk pub const MIN_CHUNK_SIZE: usize = 1; /// Controls the compression-speed vs compression-density tradeoffs. The higher the quality, the /// slower the compression. Range is 0 to 11. @@ -168,17 +155,23 @@ pub struct StreamSelfEncryptor { src_hashes: BTreeMap, // File path to flush encrypted_chunks into. chunk_dir: Option, + // Max chunks size used during encryption + max_chunk_size: usize, } impl StreamSelfEncryptor { /// For encryption, return with an intialized streaming encryptor. /// If a `chunk_dir` is provided, the encrypted_chunks will be written into the specified dir as well. - pub fn encrypt_from_file(file_path: PathBuf, chunk_dir: Option) -> Result { + pub fn encrypt_from_file( + file_path: PathBuf, + chunk_dir: Option, + max_chunk_size: usize, + ) -> Result { let file = File::open(&*file_path)?; let metadata = file.metadata()?; let file_size = metadata.len(); - let batch_positions = batch_positions(file_size as usize); + let batch_positions = batch_positions(file_size as usize, max_chunk_size); Ok(StreamSelfEncryptor { file_path, @@ -187,6 +180,7 @@ impl StreamSelfEncryptor { data_map: Vec::new(), src_hashes: BTreeMap::new(), chunk_dir, + max_chunk_size, }) } @@ -194,7 +188,10 @@ impl StreamSelfEncryptor { /// Note: only of the two returned options will be `Some`. pub fn next_encryption(&mut self) -> Result<(Option, Option)> { if self.chunk_index >= self.batch_positions.len() { - return Ok((None, Some(DataMap::new(self.data_map.clone())))); + return Ok(( + None, + Some(DataMap::new(self.max_chunk_size, self.data_map.clone())), + )); } let (src_hash, content) = self.read_chunk(self.chunk_index)?; @@ -360,13 +357,17 @@ impl StreamSelfDecryptor { } /// Read a file from the disk to encrypt, and output the chunks to a given output directory if presents. -pub fn encrypt_from_file(file_path: &Path, output_dir: &Path) -> Result<(DataMap, Vec)> { +pub fn encrypt_from_file( + file_path: &Path, + output_dir: &Path, + max_chunk_size: usize, +) -> Result<(DataMap, Vec)> { let mut file = File::open(file_path)?; let mut bytes = Vec::new(); let _ = file.read_to_end(&mut bytes)?; let bytes = Bytes::from(bytes); - let (data_map, encrypted_chunks) = encrypt(bytes)?; + let (data_map, encrypted_chunks) = encrypt(bytes, max_chunk_size)?; let mut chunk_names = Vec::new(); for chunk in encrypted_chunks { @@ -413,15 +414,16 @@ pub fn decrypt_from_chunk_files( /// Returns an error if the size is too small for self-encryption. /// Only files larger than 3072 bytes (3 * MIN_CHUNK_SIZE) can be self-encrypted. /// Smaller files will have to be batched together for self-encryption to work. -pub fn encrypt(bytes: Bytes) -> Result<(DataMap, Vec)> { - if (MIN_ENCRYPTABLE_BYTES) > bytes.len() { +pub fn encrypt(bytes: Bytes, max_chunk_size: usize) -> Result<(DataMap, Vec)> { + let min_encryptable_bytes = 3 * MIN_CHUNK_SIZE; + if (min_encryptable_bytes) > bytes.len() { return Err(Error::Generic(format!( "Too small for self-encryption! Required size at least {}", - MIN_ENCRYPTABLE_BYTES + min_encryptable_bytes ))); } - let (num_chunks, batches) = chunk::batch_chunks(bytes); - let (data_map, encrypted_chunks) = encrypt::encrypt(batches); + let (num_chunks, batches) = chunk::batch_chunks(bytes, max_chunk_size); + let (data_map, encrypted_chunks) = encrypt::encrypt(max_chunk_size, batches); if num_chunks > encrypted_chunks.len() { return Err(Error::Encryption); } @@ -490,13 +492,13 @@ pub struct SeekInfo { /// It is used to first fetch chunks using the `index_range`. /// Then the chunks are passed into `self_encryption::decrypt_range` together /// with `relative_pos` from the `SeekInfo` instance, and the `len` to be read. -pub fn seek_info(file_size: usize, pos: usize, len: usize) -> SeekInfo { - let (start_index, end_index) = overlapped_chunks(file_size, pos, len); +pub fn seek_info(file_size: usize, pos: usize, len: usize, max_chunk_size: usize) -> SeekInfo { + let (start_index, end_index) = overlapped_chunks(file_size, pos, len, max_chunk_size); - let relative_pos = if start_index == 2 && file_size < 3 * *MAX_CHUNK_SIZE { - pos - (2 * get_chunk_size(file_size, 0)) + let relative_pos = if start_index == 2 && file_size < 3 * max_chunk_size { + pos - (2 * get_chunk_size(file_size, 0, max_chunk_size)) } else { - pos % get_chunk_size(file_size, start_index) + pos % get_chunk_size(file_size, start_index, max_chunk_size) }; SeekInfo { @@ -511,7 +513,12 @@ pub fn seek_info(file_size: usize, pos: usize, len: usize) -> SeekInfo { /// Returns the chunk index range [start, end) that is overlapped by the byte range defined by `pos` /// and `len`. Returns empty range if `file_size` is so small that there are no chunks. -fn overlapped_chunks(file_size: usize, pos: usize, len: usize) -> (usize, usize) { +fn overlapped_chunks( + file_size: usize, + pos: usize, + len: usize, + max_chunk_size: usize, +) -> (usize, usize) { // FIX THIS SHOULD NOT BE ALLOWED if file_size < (3 * MIN_CHUNK_SIZE) || pos >= file_size || len == 0 { return (0, 0); @@ -523,8 +530,8 @@ fn overlapped_chunks(file_size: usize, pos: usize, len: usize) -> (usize, usize) None => file_size, }; - let start_index = get_chunk_index(file_size, pos); - let end_index = get_chunk_index(file_size, end); + let start_index = get_chunk_index(file_size, pos, max_chunk_size); + let end_index = get_chunk_index(file_size, end, max_chunk_size); (start_index, end_index) } @@ -571,26 +578,26 @@ fn get_pki(src_hash: &XorName, n_1_src_hash: &XorName, n_2_src_hash: &XorName) - } // Returns the number of chunks according to file size. -fn get_num_chunks(file_size: usize) -> usize { +fn get_num_chunks(file_size: usize, max_chunk_size: usize) -> usize { if file_size < (3 * MIN_CHUNK_SIZE) { return 0; } - if file_size < (3 * *MAX_CHUNK_SIZE) { + if file_size < (3 * max_chunk_size) { return 3; } - if file_size % *MAX_CHUNK_SIZE == 0 { - file_size / *MAX_CHUNK_SIZE + if file_size % max_chunk_size == 0 { + file_size / max_chunk_size } else { - (file_size / *MAX_CHUNK_SIZE) + 1 + (file_size / max_chunk_size) + 1 } } -// Returns the size of a chunk according to file size. -fn get_chunk_size(file_size: usize, chunk_index: usize) -> usize { +// Returns the size of a chunk according to file size and defined chunk sizes. +fn get_chunk_size(file_size: usize, chunk_index: usize, max_chunk_size: usize) -> usize { if file_size < 3 * MIN_CHUNK_SIZE { return 0; } - if file_size < 3 * *MAX_CHUNK_SIZE { + if file_size < 3 * max_chunk_size { if chunk_index < 2 { return file_size / 3; } else { @@ -598,58 +605,66 @@ fn get_chunk_size(file_size: usize, chunk_index: usize) -> usize { return file_size - (2 * (file_size / 3)); } } - let total_chunks = get_num_chunks(file_size); + let total_chunks = get_num_chunks(file_size, max_chunk_size); if chunk_index < total_chunks - 2 { - return *MAX_CHUNK_SIZE; + return max_chunk_size; } - let remainder = file_size % *MAX_CHUNK_SIZE; + let remainder = file_size % max_chunk_size; let penultimate = (total_chunks - 2) == chunk_index; if remainder == 0 { - return *MAX_CHUNK_SIZE; + return max_chunk_size; } if remainder < MIN_CHUNK_SIZE { if penultimate { - *MAX_CHUNK_SIZE - MIN_CHUNK_SIZE + max_chunk_size - MIN_CHUNK_SIZE } else { MIN_CHUNK_SIZE + remainder } } else if penultimate { - *MAX_CHUNK_SIZE + max_chunk_size } else { remainder } } // Returns the [start, end) half-open byte range of a chunk. -fn get_start_end_positions(file_size: usize, chunk_index: usize) -> (usize, usize) { - if get_num_chunks(file_size) == 0 { +fn get_start_end_positions( + file_size: usize, + chunk_index: usize, + max_chunk_size: usize, +) -> (usize, usize) { + if get_num_chunks(file_size, max_chunk_size) == 0 { return (0, 0); } - let start = get_start_position(file_size, chunk_index); - (start, start + get_chunk_size(file_size, chunk_index)) + let start = get_start_position(file_size, chunk_index, max_chunk_size); + ( + start, + start + get_chunk_size(file_size, chunk_index, max_chunk_size), + ) } -fn get_start_position(file_size: usize, chunk_index: usize) -> usize { - let total_chunks = get_num_chunks(file_size); +fn get_start_position(file_size: usize, chunk_index: usize, max_chunk_size: usize) -> usize { + let total_chunks = get_num_chunks(file_size, max_chunk_size); if total_chunks == 0 { return 0; } let last = (total_chunks - 1) == chunk_index; - let first_chunk_size = get_chunk_size(file_size, 0); + let first_chunk_size = get_chunk_size(file_size, 0, max_chunk_size); if last { - first_chunk_size * (chunk_index - 1) + get_chunk_size(file_size, chunk_index - 1) + first_chunk_size * (chunk_index - 1) + + get_chunk_size(file_size, chunk_index - 1, max_chunk_size) } else { first_chunk_size * chunk_index } } -fn get_chunk_index(file_size: usize, position: usize) -> usize { - let num_chunks = get_num_chunks(file_size); +fn get_chunk_index(file_size: usize, position: usize, max_chunk_size: usize) -> usize { + let num_chunks = get_num_chunks(file_size, max_chunk_size); if num_chunks == 0 { return 0; // FIX THIS SHOULD NOT BE ALLOWED } - let chunk_size = get_chunk_size(file_size, 0); + let chunk_size = get_chunk_size(file_size, 0, max_chunk_size); let remainder = file_size % chunk_size; if remainder == 0 diff --git a/src/tests.rs b/src/tests.rs index deaa519e0..b3ae5ae11 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -9,7 +9,7 @@ use crate::{ decrypt_full_set, decrypt_range, encrypt, get_chunk_size, get_num_chunks, overlapped_chunks, seek_info, test_helpers::random_bytes, DataMap, EncryptedChunk, Error, StreamSelfDecryptor, - StreamSelfEncryptor, MIN_ENCRYPTABLE_BYTES, + StreamSelfEncryptor, }; use bytes::Bytes; use itertools::Itertools; @@ -20,6 +20,13 @@ use std::{ }; use tempfile::tempdir; +/// The maximum size (before compression) of an individual chunk of a file, defined as 1024kiB. +const MAX_CHUNK_SIZE: usize = 1024 * 1024; +/// The minimum size (before compression) of an individual chunk of a file, defined as 1B. +const MIN_CHUNK_SIZE: usize = 1; + +const MIN_ENCRYPTABLE_BYTES: usize = 3 * MIN_CHUNK_SIZE; + #[test] fn test_stream_self_encryptor() -> Result<(), Error> { // Create a 10MB temporary file @@ -34,8 +41,12 @@ fn test_stream_self_encryptor() -> Result<(), Error> { create_dir_all(chunk_path.clone())?; // Encrypt the file using StreamSelfEncryptor - let mut encryptor = - StreamSelfEncryptor::encrypt_from_file(file_path, Some(chunk_path.clone()))?; + let mut encryptor = StreamSelfEncryptor::encrypt_from_file( + file_path, + Some(chunk_path.clone()), + MIN_CHUNK_SIZE, + MAX_CHUNK_SIZE, + )?; let mut encrypted_chunks = Vec::new(); let mut data_map = None; while let Ok((chunk, map)) = encryptor.next_encryption() { @@ -100,7 +111,7 @@ fn write_and_read() -> Result<(), Error> { let file_size = 10_000_000; let bytes = random_bytes(file_size); - let (data_map, encrypted_chunks) = encrypt_chunks(bytes.clone())?; + let (data_map, encrypted_chunks) = test_encrypt_chunks(bytes.clone())?; let raw_data = decrypt_full_set(&data_map, &encrypted_chunks)?; compare(bytes, raw_data) @@ -112,20 +123,20 @@ fn seek_indices() -> Result<(), Error> { let pos = 0; let len = file_size / 2; - let info = seek_info(file_size, pos, len); + let info = seek_info(file_size, pos, len, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); assert_eq!(0, info.relative_pos); assert_eq!(0, info.index_range.start); assert_eq!(1, info.index_range.end); let pos = len; - let info = seek_info(file_size, pos, len); + let info = seek_info(file_size, pos, len, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); assert_eq!(512, info.relative_pos); assert_eq!(1, info.index_range.start); assert_eq!(2, info.index_range.end); - let info = seek_info(file_size, pos, len + 1); + let info = seek_info(file_size, pos, len + 1, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); assert_eq!(512, info.relative_pos); assert_eq!(1, info.index_range.start); @@ -140,25 +151,25 @@ fn seek_indices_on_medium_size_file() -> Result<(), Error> { let pos = 0; let len = 131072; - let info = seek_info(file_size, pos, len); + let info = seek_info(file_size, pos, len, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); assert_eq!(0, info.relative_pos); assert_eq!(0, info.index_range.start); assert_eq!(0, info.index_range.end); - let info = seek_info(file_size, 131072, len); + let info = seek_info(file_size, 131072, len, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); assert_eq!(131072, info.relative_pos); assert_eq!(0, info.index_range.start); assert_eq!(0, info.index_range.end); - let info = seek_info(file_size, 393216, len); + let info = seek_info(file_size, 393216, len, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); assert_eq!(70128, info.relative_pos); assert_eq!(1, info.index_range.start); assert_eq!(1, info.index_range.end); - let info = seek_info(file_size, 655360, len); + let info = seek_info(file_size, 655360, len, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); assert_eq!(9184, info.relative_pos); assert_eq!(2, info.index_range.start); @@ -172,42 +183,42 @@ fn seek_indices_on_small_size_file() -> Result<(), Error> { let file_size = 1024; // first byte of index 0 - let info = seek_info(file_size, 0, 340); + let info = seek_info(file_size, 0, 340, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); assert_eq!(0, info.relative_pos); assert_eq!(0, info.index_range.start); assert_eq!(0, info.index_range.end); // first byte of index 1 - let info = seek_info(file_size, 341, 340); + let info = seek_info(file_size, 341, 340, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); assert_eq!(0, info.relative_pos); assert_eq!(1, info.index_range.start); assert_eq!(1, info.index_range.end); // first byte of index 2 - let info = seek_info(file_size, 682, 340); + let info = seek_info(file_size, 682, 340, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); assert_eq!(0, info.relative_pos); assert_eq!(2, info.index_range.start); assert_eq!(2, info.index_range.end); // last byte of index 2 - let info = seek_info(file_size, file_size - 1, 1); + let info = seek_info(file_size, file_size - 1, 1, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); assert_eq!(341, info.relative_pos); assert_eq!(2, info.index_range.start); assert_eq!(2, info.index_range.end); // overflow - should this error? - let info = seek_info(file_size, file_size, 1); + let info = seek_info(file_size, file_size, 1, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); assert_eq!(1, info.relative_pos); assert_eq!(0, info.index_range.start); assert_eq!(0, info.index_range.end); // last byte of index 2 (as 2 remainders in last chunk) - let info = seek_info(file_size + 1, file_size, 1); + let info = seek_info(file_size + 1, file_size, 1, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); assert_eq!(342, info.relative_pos); assert_eq!(2, info.index_range.start); @@ -220,21 +231,48 @@ fn seek_indices_on_small_size_file() -> Result<(), Error> { fn get_chunk_sizes() -> Result<(), Error> { let file_size = 969_265; - assert_eq!(323088, get_chunk_size(file_size, 0)); - assert_eq!(323088, get_chunk_size(file_size, 1)); - assert_eq!(323089, get_chunk_size(file_size, 2)); + assert_eq!( + 323088, + get_chunk_size(file_size, 0, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE) + ); + assert_eq!( + 323088, + get_chunk_size(file_size, 1, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE) + ); + assert_eq!( + 323089, + get_chunk_size(file_size, 2, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE) + ); let file_size = 1024; - assert_eq!(341, get_chunk_size(file_size, 0)); - assert_eq!(341, get_chunk_size(file_size, 1)); - assert_eq!(342, get_chunk_size(file_size, 2)); + assert_eq!( + 341, + get_chunk_size(file_size, 0, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE) + ); + assert_eq!( + 341, + get_chunk_size(file_size, 1, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE) + ); + assert_eq!( + 342, + get_chunk_size(file_size, 2, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE) + ); let file_size = 1025; - assert_eq!(341, get_chunk_size(file_size, 0)); - assert_eq!(341, get_chunk_size(file_size, 1)); - assert_eq!(343, get_chunk_size(file_size, 2)); + assert_eq!( + 341, + get_chunk_size(file_size, 0, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE) + ); + assert_eq!( + 341, + get_chunk_size(file_size, 1, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE) + ); + assert_eq!( + 343, + get_chunk_size(file_size, 2, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE) + ); Ok(()) } @@ -247,7 +285,7 @@ fn seek_and_join() -> Result<(), Error> { for divisor in 2..15 { let len = file_size / divisor; let data = random_bytes(file_size); - let (data_map, encrypted_chunks) = encrypt_chunks(data.clone())?; + let (data_map, encrypted_chunks) = test_encrypt_chunks(data.clone())?; // Read first part let read_data_1 = { @@ -282,7 +320,13 @@ fn seek( len: usize, ) -> Result { let expected_data = bytes.slice(pos..(pos + len)); - let info = seek_info(data_map.file_size(), pos, len); + let info = seek_info( + data_map.file_size(), + pos, + len, + MIN_CHUNK_SIZE, + MAX_CHUNK_SIZE, + ); // select a subset of chunks; the ones covering the bytes we want to read let subset: Vec<_> = encrypted_chunks @@ -314,10 +358,11 @@ fn seek_over_chunk_limit() -> Result<(), Error> { let expected_data = bytes.slice(pos..(pos + len)); // the chunks covering the bytes we want to read - let (start_index, end_index) = overlapped_chunks(file_size, pos, len); + let (start_index, end_index) = + overlapped_chunks(file_size, pos, len, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); // first encrypt the whole file - let (data_map, encrypted_chunks) = encrypt_chunks(bytes.clone())?; + let (data_map, encrypted_chunks) = test_encrypt_chunks(bytes.clone())?; // select a subset of chunks; the ones covering the bytes we want to read let subset: Vec<_> = encrypted_chunks @@ -327,7 +372,8 @@ fn seek_over_chunk_limit() -> Result<(), Error> { .collect(); // the start position within the first chunk (thus `relative`..) - let relative_pos = pos % get_chunk_size(file_size, start_index); + let relative_pos = + pos % get_chunk_size(file_size, start_index, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); let read_data = decrypt_range(&data_map, &subset, relative_pos, len)?; compare(expected_data, read_data)?; @@ -345,10 +391,11 @@ fn seek_with_length_over_data_size() -> Result<(), Error> { let len = bytes.len() - start_pos + 1; // the chunks covering the bytes we want to read - let (start_index, end_index) = overlapped_chunks(file_size, start_pos, len); + let (start_index, end_index) = + overlapped_chunks(file_size, start_pos, len, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); // first encrypt the whole file - let (data_map, encrypted_chunks) = encrypt_chunks(bytes.clone())?; + let (data_map, encrypted_chunks) = test_encrypt_chunks(bytes.clone())?; // select a subset of chunks; the ones covering the bytes we want to read let subset: Vec<_> = encrypted_chunks @@ -380,9 +427,9 @@ fn compare(original: Bytes, result: Bytes) -> Result<(), Error> { Ok(()) } -fn encrypt_chunks(bytes: Bytes) -> Result<(DataMap, Vec), Error> { - let num_chunks = get_num_chunks(bytes.len()); - let (data_map, encrypted_chunks) = encrypt(bytes)?; +fn test_encrypt_chunks(bytes: Bytes) -> Result<(DataMap, Vec), Error> { + let num_chunks = get_num_chunks(bytes.len(), MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); + let (data_map, encrypted_chunks) = encrypt(bytes, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE)?; assert_eq!(num_chunks, encrypted_chunks.len()); diff --git a/tests/lib.rs b/tests/lib.rs index 2834cbaeb..2f33a8b7a 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -51,9 +51,14 @@ )] use bytes::Bytes; -use self_encryption::{encrypt, ChunkInfo, Result, MAX_CHUNK_SIZE}; +use self_encryption::{encrypt, ChunkInfo, Result}; use xor_name::XorName; +/// The maximum size (before compression) of an individual chunk of a file, defined as 1024kiB. +const MAX_CHUNK_SIZE: usize = 1024 * 1024; +/// The minimum size (before compression) of an individual chunk of a file, defined as 1B. +const MIN_CHUNK_SIZE: usize = 1; + #[tokio::test] async fn cross_platform_check() -> Result<()> { let content_size: usize = 20 * *MAX_CHUNK_SIZE + 100; @@ -62,7 +67,7 @@ async fn cross_platform_check() -> Result<()> { *c = (i % 17) as u8; } - let (data_map, _) = encrypt(Bytes::from(content))?; + let (data_map, _) = encrypt(Bytes::from(content), MIN_CHUNK_SIZE, MAX_CHUNK_SIZE)?; // (NB: this hard-coded ref needs update if algorithm changes) let ref_data_map = vec![