From 140a0b0cb48f967a75aa786c9271ff8d4a04e59e Mon Sep 17 00:00:00 2001 From: TrAyZeN <1810leo@gmail.com> Date: Thu, 2 May 2024 12:02:42 +0200 Subject: [PATCH] Use faster fold + reduce_with in helpers From benchmarks it increases performance by around 10% for cpa and by 30-60% for snr. --- src/cpa.rs | 23 +++++++++++------------ src/cpa_normal.rs | 15 ++++++++------- src/dpa.rs | 25 ++++++++++++++----------- src/processors.rs | 26 ++++++++++++++------------ 4 files changed, 47 insertions(+), 42 deletions(-) diff --git a/src/cpa.rs b/src/cpa.rs index e3ccd50..1e67d78 100644 --- a/src/cpa.rs +++ b/src/cpa.rs @@ -25,25 +25,24 @@ where assert_eq!(leakages.shape()[0], plaintexts.shape()[0]); assert!(chunk_size > 0); + // From benchmarks fold + reduce_with is faster than map + reduce/reduce_with and fold + reduce zip( leakages.axis_chunks_iter(Axis(0), chunk_size), plaintexts.axis_chunks_iter(Axis(0), chunk_size), ) .par_bridge() - .map(|(leakages_chunk, plaintexts_chunk)| { - let mut cpa = - CpaProcessor::new(leakages.shape()[1], guess_range, target_byte, leakage_func); - - for i in 0..leakages_chunk.shape()[0] { - cpa.update(leakages_chunk.row(i), plaintexts_chunk.row(i)); - } - - cpa - }) - .reduce( + .fold( || CpaProcessor::new(leakages.shape()[1], guess_range, target_byte, leakage_func), - |a, b| a + b, + |mut cpa, (leakages_chunk, plaintexts_chunk)| { + for i in 0..leakages_chunk.shape()[0] { + cpa.update(leakages_chunk.row(i), plaintexts_chunk.row(i)); + } + + cpa + }, ) + .reduce_with(|a, b| a + b) + .unwrap() .finalize() } diff --git a/src/cpa_normal.rs b/src/cpa_normal.rs index f08ba5f..ee5aea2 100644 --- a/src/cpa_normal.rs +++ b/src/cpa_normal.rs @@ -28,15 +28,16 @@ where plaintexts.axis_chunks_iter(Axis(0), chunk_size), ) .par_bridge() - .map(|(leakages_chunk, plaintexts_chunk)| { - let mut cpa = CpaProcessor::new(leakages.shape()[1], chunk_size, guess_range, leakage_func); - cpa.update(leakages_chunk, plaintexts_chunk); - cpa - }) - .reduce( + .fold( || CpaProcessor::new(leakages.shape()[1], chunk_size, guess_range, leakage_func), - |x, y| x + y, + |mut cpa, (leakages_chunk, plaintexts_chunk)| { + cpa.update(leakages_chunk, plaintexts_chunk); + + cpa + }, ) + .reduce_with(|x, y| x + y) + .unwrap() .finalize() } diff --git a/src/dpa.rs b/src/dpa.rs index c2832f2..41083ab 100644 --- a/src/dpa.rs +++ b/src/dpa.rs @@ -4,6 +4,8 @@ use std::{iter::zip, ops::Add}; use crate::util::max_per_row; +/// # Panics +/// Panics if `chunk_size` is not strictly positive. pub fn dpa( leakages: ArrayView2, metadata: ArrayView1, @@ -15,24 +17,25 @@ where T: Into + Copy + Sync, M: Clone + Sync, { + assert!(chunk_size > 0); + zip( leakages.axis_chunks_iter(Axis(0), chunk_size), metadata.axis_chunks_iter(Axis(0), chunk_size), ) .par_bridge() - .map(|(leakages_chunk, metadata_chunk)| { - let mut dpa = DpaProcessor::new(leakages.shape()[1], guess_range, leakage_func); - - for i in 0..leakages_chunk.shape()[0] { - dpa.update(leakages_chunk.row(i), metadata_chunk[i].clone()); - } - - dpa - }) - .reduce( + .fold( || DpaProcessor::new(leakages.shape()[1], guess_range, leakage_func), - |a, b| a + b, + |mut dpa, (leakages_chunk, metadata_chunk)| { + for i in 0..leakages_chunk.shape()[0] { + dpa.update(leakages_chunk.row(i), metadata_chunk[i].clone()); + } + + dpa + }, ) + .reduce_with(|a, b| a + b) + .unwrap() .finalize() } diff --git a/src/processors.rs b/src/processors.rs index 98846ed..6b75829 100644 --- a/src/processors.rs +++ b/src/processors.rs @@ -97,20 +97,22 @@ where { assert!(chunk_size > 0); + // From benchmarks fold + reduce_with is faster than map + reduce/reduce_with and fold + reduce leakages .axis_chunks_iter(Axis(0), chunk_size) .enumerate() .par_bridge() - .map(|(chunk_idx, leakages_chunk)| { - let mut snr = Snr::new(leakages.shape()[1], classes); - - for i in 0..leakages_chunk.shape()[0] { - snr.process(leakages_chunk.row(i), get_class(chunk_idx + i)); - } - - snr - }) - .reduce(|| Snr::new(leakages.shape()[1], classes), |a, b| a + b) + .fold( + || Snr::new(leakages.shape()[1], classes), + |mut snr, (chunk_idx, leakages_chunk)| { + for i in 0..leakages_chunk.shape()[0] { + snr.process(leakages_chunk.row(i), get_class(chunk_idx + i)); + } + snr + }, + ) + .reduce_with(|a, b| a + b) + .unwrap() .snr() } @@ -169,14 +171,14 @@ impl Snr { let class_sum = self.classes_sum.slice(s![class, ..]); for i in 0..size { - acc[i] += (class_sum[i] as f64).powf(2.0) / (self.classes_count[class] as f64); + acc[i] += (class_sum[i] as f64).powi(2) / (self.classes_count[class] as f64); } } let var = self.mean_var.var(); let mean = self.mean_var.mean(); // V[E[L|X]] - let velx = (acc / self.mean_var.count as f64) - mean.mapv(|x| x.powf(2.0)); + let velx = (acc / self.mean_var.count as f64) - mean.mapv(|x| x.powi(2)); 1f64 / (var / velx - 1f64) } }