From a5363423069df12d3ce325361bbed6c208cc2703 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Wed, 18 Sep 2024 19:52:08 -0700 Subject: [PATCH] refactor: Changed display and refactored bench --- benches/benchmark_indices.rs | 176 ++++++++++---------- src/main.rs | 12 ++ src/models/indices/transposed_quad_index.rs | 87 ++++++---- src/utils/display.rs | 52 ++++-- 4 files changed, 196 insertions(+), 131 deletions(-) diff --git a/benches/benchmark_indices.rs b/benches/benchmark_indices.rs index 6d79e1f..66465aa 100644 --- a/benches/benchmark_indices.rs +++ b/benches/benchmark_indices.rs @@ -1,4 +1,7 @@ -use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion}; +use criterion::{ + criterion_group, criterion_main, measurement::Measurement, BatchSize, BenchmarkFilter, + BenchmarkGroup, BenchmarkId, Criterion, +}; use rand::{Rng, SeedableRng}; use rand_chacha::ChaCha8Rng; @@ -82,8 +85,6 @@ fn build_elution_groups(raw_file_path: String) -> Vec { } fn criterion_benchmark(c: &mut Criterion) { - env_logger::init(); - let (raw_file_path, basename) = get_file_from_env(); let mut group = c.benchmark_group("Encoding Time"); @@ -112,128 +113,119 @@ fn criterion_benchmark(c: &mut Criterion) { group.finish(); } -fn thoughput_benchmark_random(c: &mut Criterion) { - let (raw_file_path, basename) = get_file_from_env(); - let mut group = c.benchmark_group("RandomAccessThroughput"); - group.significance_level(0.05).sample_size(10); - group.throughput(criterion::Throughput::Elements(NUM_ELUTION_GROUPS as u64)); - group.bench_function( - BenchmarkId::new("TransposedQuadIndex", basename.clone()), - |b| { +macro_rules! add_bench_random { + ($group:expr, $raw_file_path:expr, $basename:expr, $name:literal, $index_type:ty, $tolerance_type:ty, ) => { + $group.bench_function(BenchmarkId::new($name, $basename.clone()), |b| { b.iter_batched( || { ( - QuadSplittedTransposedIndex::from_path(&(raw_file_path.clone())).unwrap(), - build_elution_groups(raw_file_path.clone()), - DefaultTolerance::default(), + <$index_type>::from_path(&($raw_file_path.clone())).unwrap(), + build_elution_groups($raw_file_path.clone()), + <$tolerance_type>::default(), ) }, - |(qst_file_index, query_groups, tolerance)| { - let aggregator_factory = |id| RawPeakIntensityAggregator { intensity: 0 }; + |(index, query_groups, tolerance)| { + let aggregator_factory = |_id| RawPeakIntensityAggregator { intensity: 0 }; let local_lambda = |elution_group: &ElutionGroup| { query_indexed( - &qst_file_index, + &index, &aggregator_factory, - &qst_file_index, + &index, &tolerance, &elution_group, ) }; for elution_group in query_groups { let foo = local_lambda(&elution_group); - black_box((|foo| false)(foo)); + black_box((|_foo| false)(foo)); } }, BatchSize::PerIteration, ) - }, - ); - group.bench_function(BenchmarkId::new("RawFileIndex", basename.clone()), |b| { - b.iter_batched( - || { - ( - RawFileIndex::from_path(&(raw_file_path.clone())).unwrap(), - build_elution_groups(raw_file_path.clone()), - DefaultTolerance::default(), - ) - }, - |(raw_file_index, query_groups, tolerance)| { - let aggregator_factory = |id| RawPeakIntensityAggregator { intensity: 0 }; - let local_lambda = |elution_group: &ElutionGroup| { - query_indexed( - &raw_file_index, - &aggregator_factory, - &raw_file_index, - &tolerance, - &elution_group, - ) - }; - for elution_group in query_groups { - let foo = local_lambda(&elution_group); - black_box((|foo| false)(foo)); - } - }, - BatchSize::PerIteration, - ) - }); - - group.finish(); + }); + }; } - -fn thoughput_benchmark_optim(c: &mut Criterion) { - let (raw_file_path, basename) = get_file_from_env(); - let mut group = c.benchmark_group("BatchAccessThroughput"); - group.significance_level(0.05).sample_size(10); - group.throughput(criterion::Throughput::Elements(NUM_ELUTION_GROUPS as u64)); - group.bench_function( - BenchmarkId::new("TransposedQuadIndex", basename.clone()), - |b| { +macro_rules! add_bench_optim { + ($group:expr, $raw_file_path:expr, $basename:expr, $name:literal, $index_type:ty, $tolerance_type:ty, $query_func:expr,) => { + $group.bench_function(BenchmarkId::new($name, $basename.clone()), |b| { b.iter_batched( || { ( - QuadSplittedTransposedIndex::from_path(&(raw_file_path.clone())).unwrap(), - build_elution_groups(raw_file_path.clone()), - DefaultTolerance::default(), + <$index_type>::from_path(&($raw_file_path.clone())).unwrap(), + build_elution_groups($raw_file_path.clone()), + <$tolerance_type>::default(), ) }, - |(qst_file_index, query_groups, tolerance)| { - let aggregator_factory = |id| RawPeakIntensityAggregator { intensity: 0 }; + |(index, query_groups, tolerance)| { + let aggregator_factory = |_id| RawPeakIntensityAggregator { intensity: 0 }; let foo = query_multi_group( - &qst_file_index, - &qst_file_index, + &index, + &index, &tolerance, &query_groups, &aggregator_factory, ); - black_box((|foo| false)(foo)); + black_box((|_foo| false)(foo)); }, BatchSize::PerIteration, ) - }, + }); + }; +} + +fn thoughput_benchmark_random(c: &mut Criterion) { + let (raw_file_path, basename) = get_file_from_env(); + let mut group = c.benchmark_group("RandomAccessThroughput"); + group.significance_level(0.05).sample_size(10); + group.throughput(criterion::Throughput::Elements(NUM_ELUTION_GROUPS as u64)); + + add_bench_random!( + group, + raw_file_path, + basename, + "TransposedQuadIndex", + QuadSplittedTransposedIndex, + DefaultTolerance, + ); + + add_bench_random!( + group, + raw_file_path, + basename, + "RawFileIndex", + RawFileIndex, + DefaultTolerance, + ); + + group.finish(); +} + +fn thoughput_benchmark_optim(c: &mut Criterion) { + env_logger::init(); + let (raw_file_path, basename) = get_file_from_env(); + let mut group = c.benchmark_group("BatchAccessThroughput"); + group.significance_level(0.05).sample_size(10); + group.throughput(criterion::Throughput::Elements(NUM_ELUTION_GROUPS as u64)); + + add_bench_optim!( + group, + raw_file_path, + basename, + "TransposedQuadIndex", + QuadSplittedTransposedIndex, + DefaultTolerance, + query_multi_group, + ); + + add_bench_optim!( + group, + raw_file_path, + basename, + "RawFileIndex", + RawFileIndex, + DefaultTolerance, + query_multi_group, ); - group.bench_function(BenchmarkId::new("RawFileIndex", basename.clone()), |b| { - b.iter_batched( - || { - ( - RawFileIndex::from_path(&(raw_file_path.clone())).unwrap(), - build_elution_groups(raw_file_path.clone()), - DefaultTolerance::default(), - ) - }, - |(raw_file_index, query_groups, tolerance)| { - let aggregator_factory = |id| RawPeakIntensityAggregator { intensity: 0 }; - let foo = query_multi_group( - &raw_file_index, - &raw_file_index, - &tolerance, - &query_groups, - &aggregator_factory, - ); - black_box((|foo| false)(foo)); - }, - BatchSize::PerIteration, - ) - }); group.finish(); } diff --git a/src/main.rs b/src/main.rs index 69e2d0e..a16a394 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,5 @@ +use std::default; + use timsquery::models::elution_group::ElutionGroup; use timsquery::queriable_tims_data::queriable_tims_data::query_multi_group; use timsquery::traits::tolerance::DefaultTolerance; @@ -58,6 +60,12 @@ struct Args { command: Option, } +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, clap::ValueEnum)] +enum PossibleAggregator { + #[default] + RawPeakIntensityAggregator, +} + #[derive(Parser, Debug)] struct QueryIndexArgs { /// The path to the raw file to query. @@ -79,6 +87,10 @@ struct QueryIndexArgs { // Whether the output json should be pretty printed. #[arg(short, long)] pretty: bool, + + // The aggregator to use. + #[arg(short, long, default_value_t, value_enum)] + aggregator: PossibleAggregator, } #[derive(Parser, Debug)] diff --git a/src/models/indices/transposed_quad_index.rs b/src/models/indices/transposed_quad_index.rs index 555a7df..6f51f2f 100644 --- a/src/models/indices/transposed_quad_index.rs +++ b/src/models/indices/transposed_quad_index.rs @@ -6,7 +6,7 @@ use crate::models::queries::PrecursorIndexQuery; use crate::sort_by_indices_multi; use crate::traits::indexed_data::IndexedData; use crate::utils::compress_explode::{compress_vec, explode_vec}; -use crate::utils::display::glimpse_vec; +use crate::utils::display::{glimpse_vec, GlimpseConfig}; use crate::utils::sorting::argsort_by; use crate::ToleranceAdapter; use core::num; @@ -31,6 +31,7 @@ use timsrust::{Frame, QuadrupoleSettings}; pub struct QuadSplittedTransposedIndex { indices: HashMap, TransposedQuadIndex>, + flat_quad_settings: Vec, rt_converter: Frame2RtConverter, mz_converter: Tof2MzConverter, im_converter: Scan2ImConverter, @@ -81,9 +82,9 @@ impl QuadSplittedTransposedIndex { &self, precursor_mz_range: (f64, f64), scan_range: Option<(usize, usize)>, - ) -> impl Iterator> + '_ { - self.indices - .keys() + ) -> impl Iterator + '_ { + self.flat_quad_settings + .iter() .filter(move |qs| { (qs.ranges.isolation_low <= precursor_mz_range.1) && (precursor_mz_range.0 <= qs.ranges.isolation_high) @@ -97,6 +98,7 @@ impl QuadSplittedTransposedIndex { }) .map(|x| x.clone()) } + fn queries_from_elution_elements_impl( &self, tol: &dyn crate::traits::tolerance::Tolerance, @@ -126,22 +128,6 @@ impl QuadSplittedTransposedIndex { self.rt_converter.invert(rt_range.1) as usize, ); - // let frame_index_range = ( - // self.rt_converter.invert(elution_group.rt_seconds) as usize, - // self.rt_converter.invert(elution_group.rt_seconds) as usize, - // ); - // let mz_index_range = ( - // self.mz_converter.invert(elution_group.precursor_mz) as u32, - // self.mz_converter.invert(elution_group.precursor_mz) as u32, - // ) - // let mobility_index_range = ( - // self.im_converter.invert(elution_group.mobility) as usize, - // self.im_converter.invert(elution_group.mobility) as usize, - // ); - // let isolation_mz_range = ( - // self.mz_converter.invert(elution_group.precursor_mz - elution_group.precursor_charge as f64) as f64, - // self.mz_converter.invert(elution_group.precursor_mz + elution_group.precursor_charge as f64) as f64, - // ); let precursor_query = PrecursorIndexQuery { frame_index_range, mz_index_range, @@ -177,6 +163,15 @@ impl Display for QuadSplittedTransposedIndex { disp_str.push_str(&format!("rt_converter: ... not showing ...\n",)); disp_str.push_str(&format!("mz_converter: {:?}\n", self.mz_converter)); disp_str.push_str(&format!("im_converter: {:?}\n", self.im_converter)); + disp_str.push_str(&"flat_quad_settings: \n"); + disp_str.push_str(&glimpse_vec( + &self.flat_quad_settings, + Some(GlimpseConfig { + max_items: 10, + padding: 4, + new_line: true, + }), + )); let mut num_shown = 0; for (qs, tqi) in self.indices.iter() { disp_str.push_str(&format!(" - {}: \n", qs)); @@ -255,15 +250,28 @@ impl QuadSplittedTransposedIndexBuilder { } fn build(self) -> QuadSplittedTransposedIndex { - let mut out = QuadSplittedTransposedIndex { - indices: HashMap::new(), + let mut indices = HashMap::new(); + let mut flat_quad_settings = Vec::new(); + for (qs, builder) in self.indices { + let tmp = Arc::new(qs); + indices.insert(tmp.clone(), builder.build()); + flat_quad_settings.push(qs.clone()); + } + flat_quad_settings.sort_by(|a, b| { + a.ranges + .isolation_mz + .partial_cmp(&b.ranges.isolation_mz) + .unwrap() + }); + + let out = QuadSplittedTransposedIndex { + indices: indices, + flat_quad_settings, rt_converter: self.rt_converter, mz_converter: self.mz_converter, im_converter: self.im_converter, }; - for (qs, builder) in self.indices { - out.indices.insert(Arc::new(qs), builder.build()); - } + out } } @@ -331,7 +339,7 @@ impl Display for TransposedQuadIndex { f, "TransposedQuadIndex\n quad_settings: {}\n frame_index_rt_pairs: {}\n peak_buckets: {}\n", self.quad_settings, - glimpse_vec(&self.frame_index_rt_pairs), + glimpse_vec(&self.frame_index_rt_pairs, Some(GlimpseConfig { max_items: 10, padding: 2, new_line: true })), display_opt_peak_bucket_vec(&self.peak_buckets), ) } @@ -541,9 +549,30 @@ impl Display for PeakBucket { f, "PeakBucket: len={}, local_frame_indices={}, scan_offsets={}, intensities={}", self.len(), - glimpse_vec(&self.local_frame_indices), - glimpse_vec(&self.scan_offsets), - glimpse_vec(&self.intensities) + glimpse_vec( + &self.local_frame_indices, + Some(GlimpseConfig { + max_items: 10, + padding: 2, + new_line: false + }) + ), + glimpse_vec( + &self.scan_offsets, + Some(GlimpseConfig { + max_items: 10, + padding: 2, + new_line: false + }) + ), + glimpse_vec( + &self.intensities, + Some(GlimpseConfig { + max_items: 10, + padding: 2, + new_line: false + }) + ) ) } } diff --git a/src/utils/display.rs b/src/utils/display.rs index ff583df..132312c 100644 --- a/src/utils/display.rs +++ b/src/utils/display.rs @@ -1,28 +1,60 @@ -pub fn glimpse_vec(v: &[T]) -> String { - // Display short slices as [1,2,3], long slices as - // [1,2,3 ... 123,124,125] (len = 125) +#[derive(Debug, Clone, Copy)] +pub struct GlimpseConfig { + pub max_items: usize, + pub padding: usize, + pub new_line: bool, +} + +impl Default for GlimpseConfig { + fn default() -> Self { + GlimpseConfig { + max_items: 10, + padding: 0, + new_line: false, + } + } +} + +pub fn glimpse_vec(v: &[T], config: Option) -> String { + let config = config.unwrap_or_default(); let len = v.len(); let mut out = String::new(); - if len > 10 { - out.push_str("["); + + let separator = if config.new_line { ",\n" } else { ", " }; + let padding = " ".repeat(config.padding); + + if len > config.max_items { + out.push_str("[\n"); out.push_str( &v[..3] .iter() - .map(|x| format!("{:?}, ", x)) + .map(|x| format!("{}{:?}{}", padding, x, separator)) .collect::(), ); - out.push_str("... "); + out.push_str(&format!("{}...\n", padding)); out.push_str( &v[len - 3..] .iter() - .map(|x| format!("{:?}, ", x)) + .map(|x| format!("{}{:?}{}", padding, x, separator)) .collect::(), ); out.push_str(&format!("] len = {}", len)); } else { - out.push_str("["); - out.push_str(&v.iter().map(|x| format!("{:?}, ", x)).collect::()); + out.push_str("[\n"); + out.push_str( + &v.iter() + .map(|x| format!("{}{:?}{}", padding, x, separator)) + .collect::(), + ); out.push_str("]"); } + + // Remove trailing separator + if config.new_line { + out = out.trim_end_matches(",\n").to_string(); + } else { + out = out.trim_end_matches(", ").to_string(); + } + out }