Skip to content

Commit

Permalink
feat: add flexible transition lab n sort of outs
Browse files Browse the repository at this point in the history
  • Loading branch information
jspaezp committed Sep 26, 2024
1 parent bf453f3 commit d4ba398
Show file tree
Hide file tree
Showing 11 changed files with 190 additions and 119 deletions.
19 changes: 8 additions & 11 deletions benches/benchmark_indices.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion};
use rand::{Rng, SeedableRng};
use rand_chacha::ChaCha8Rng;
use std::collections::HashMap;

use std::hint::black_box;
use timsquery::{
Expand Down Expand Up @@ -35,15 +36,15 @@ fn get_file_from_env() -> (String, String) {
}

const NUM_ELUTION_GROUPS: usize = 500;
fn build_elution_groups(raw_file_path: String) -> Vec<ElutionGroup> {
fn build_elution_groups(raw_file_path: String) -> Vec<ElutionGroup<u64>> {
const NUM_FRAGMENTS: usize = 10;
const MAX_RT: f32 = 22.0 * 60.0;
const MAX_MOBILITY: f32 = 1.5;
const MIN_MOBILITY: f32 = 0.5;
const MAX_MZ: f64 = 1000.0;
const MIN_MZ: f64 = 300.0;

let mut out_egs: Vec<ElutionGroup> = Vec::with_capacity(NUM_ELUTION_GROUPS);
let mut out_egs: Vec<ElutionGroup<u64>> = Vec::with_capacity(NUM_ELUTION_GROUPS);
let mut rng = ChaCha8Rng::seed_from_u64(43u64);

for i in 1..NUM_ELUTION_GROUPS {
Expand All @@ -52,15 +53,12 @@ fn build_elution_groups(raw_file_path: String) -> Vec<ElutionGroup> {
let mobility = rng.gen::<f32>() * (MAX_MOBILITY - MIN_MOBILITY) + MIN_MOBILITY;
let mz = rng.gen::<f64>() * (MAX_MZ - MIN_MZ) + MIN_MZ;

let mut fragment_mzs = Vec::with_capacity(NUM_FRAGMENTS);
let mut fragment_charges = Vec::with_capacity(NUM_FRAGMENTS);
let mut fragment_mzs = HashMap::with_capacity(NUM_FRAGMENTS);

for _ in 0..NUM_FRAGMENTS {
for ii in 0..NUM_FRAGMENTS {
let fragment_mz = rng.gen::<f64>() * (MAX_MZ - MIN_MZ) + MIN_MZ;
// let fragment_charge = rng.gen::<u8>() * 3 + 1;
let fragment_charge = 1;
fragment_mzs.push(fragment_mz);
fragment_charges.push(fragment_charge);
fragment_mzs.insert(ii as u64, fragment_mz);
}

// rand u8 is number from 0-255 ... which is not amazing for us ...
Expand All @@ -73,8 +71,7 @@ fn build_elution_groups(raw_file_path: String) -> Vec<ElutionGroup> {
mobility,
precursor_mz: mz,
precursor_charge,
fragment_mzs: Some(fragment_mzs),
fragment_charges: Some(fragment_charges),
fragment_mzs,
});
}
out_egs
Expand Down Expand Up @@ -121,7 +118,7 @@ macro_rules! add_bench_random {
)
},
|(index, query_groups, tolerance)| {
let local_lambda = |elution_group: &ElutionGroup| {
let local_lambda = |elution_group: &ElutionGroup<u64>| {
query_indexed(
&index,
&RawPeakIntensityAggregator::new,
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ pub use crate::queriable_tims_data::queriable_tims_data::QueriableTimsData;
// Re-export traits
pub use crate::traits::aggregator::Aggregator;
pub use crate::traits::indexed_data::IndexedData;
pub use crate::traits::tolerance::{Tolerance, ToleranceAdapter};
pub use crate::traits::tolerance::{HasIntegerID, Tolerance, ToleranceAdapter};

// Re-export utility functions
pub use crate::utils::sorting::sort_multiple_by;
Expand Down
14 changes: 7 additions & 7 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::collections::HashMap;
use std::time::Instant;
use timsquery::models::elution_group::ElutionGroup;
use timsquery::queriable_tims_data::queriable_tims_data::query_multi_group;
Expand Down Expand Up @@ -54,7 +55,7 @@ fn main_write_template(args: WriteTemplateArgs) {
);
}

fn template_elution_groups(num: usize) -> Vec<ElutionGroup> {
fn template_elution_groups(num: usize) -> Vec<ElutionGroup<u64>> {
let mut egs = Vec::with_capacity(num);

let min_mz = 400.0;
Expand All @@ -73,16 +74,15 @@ fn template_elution_groups(num: usize) -> Vec<ElutionGroup> {
let mobility = min_mobility + (i as f32 * mobility_step);
let mz = min_mz + (i as f64 * mz_step);
let precursor_charge = 2;
let fragment_mzs = Some((0..10).map(|x| mz + x as f64).collect::<Vec<f64>>());
let fragment_charges = Some(vec![precursor_charge]);
let fragment_mzs = (0..10).map(|x| (x as u64, mz + x as f64));
let fragment_mzs = HashMap::from_iter(fragment_mzs);
egs.push(ElutionGroup {
id: i as u64,
rt_seconds: rt,
mobility,
precursor_mz: mz,
precursor_charge,
fragment_mzs,
fragment_charges,
});
}
egs
Expand All @@ -99,7 +99,7 @@ fn main_query_index(args: QueryIndexArgs) {

let tolerance_settings: DefaultTolerance =
serde_json::from_str(&std::fs::read_to_string(&tolerance_settings_path).unwrap()).unwrap();
let elution_groups: Vec<ElutionGroup> =
let elution_groups: Vec<ElutionGroup<u64>> =
serde_json::from_str(&std::fs::read_to_string(&elution_groups_path).unwrap()).unwrap();

let index_use = match (index_use, elution_groups.len() > 10) {
Expand Down Expand Up @@ -207,7 +207,7 @@ enum Commands {

#[derive(Debug, Serialize, Deserialize)]
struct ElutionGroupResults<T: Serialize> {
elution_group: ElutionGroup,
elution_group: ElutionGroup<u64>,
result: T,
}

Expand All @@ -216,7 +216,7 @@ pub fn execute_query(
aggregator: PossibleAggregator,
raw_file_path: String,
tolerance: DefaultTolerance,
elution_groups: Vec<ElutionGroup>,
elution_groups: Vec<ElutionGroup<u64>>,
args: QueryIndexArgs,
) {
let output_path = args.output_path;
Expand Down
23 changes: 19 additions & 4 deletions src/models/aggregators/raw_peak_agg/chromatogram_agg.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
use serde::Serialize;
use std::collections::BTreeMap;
use std::collections::HashMap;

use super::super::streaming_aggregator::RunningStatsCalculator;
use crate::models::frames::raw_peak::RawPeak;
use crate::sort_by_indices_multi;
use crate::traits::aggregator::Aggregator;
use crate::utils::sorting::argsort_by;

use serde::Serialize;
use std::collections::BTreeMap;
use std::collections::HashMap;

#[derive(Debug, Clone, Serialize)]
pub struct ExtractedIonChromatomobilogram {
Expand Down Expand Up @@ -145,6 +147,19 @@ impl ChromatomobilogramStatsArrays {
self.scan_index_sds.extend(other.scan_index_sds);
self.intensities.extend(other.intensities);
}

pub fn sort_by_rt(&mut self) {
let mut indices = argsort_by(&self.retention_time_miliseconds, |x| *x);
sort_by_indices_multi!(
&mut indices,
&mut self.retention_time_miliseconds,
&mut self.tof_index_means,
&mut self.tof_index_sds,
&mut self.scan_index_means,
&mut self.scan_index_sds,
&mut self.intensities
);
}
}

impl Aggregator<RawPeak> for ChromatomobilogramStats {
Expand Down
4 changes: 4 additions & 0 deletions src/models/aggregators/raw_peak_agg/multi_chromatogram_agg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ impl Aggregator<(RawPeak, usize)> for MultiCMGStats {
});
}

for (_k, v) in transition_stats.iter_mut() {
v.sort_by_rt();
}

MultiCMGStatsArrays {
transition_stats,
id: self.id,
Expand Down
14 changes: 11 additions & 3 deletions src/models/elution_group.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
use crate::HasIntegerID;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::hash::Hash;

#[derive(Debug, Serialize, Deserialize)]
pub struct ElutionGroup {
pub struct ElutionGroup<T: Clone + Eq + Serialize + Hash + Send + Sync> {
pub id: u64,
pub mobility: f32,
pub rt_seconds: f32,
pub precursor_mz: f64,
pub precursor_charge: u8,
pub fragment_mzs: Option<Vec<f64>>,
pub fragment_charges: Option<Vec<u8>>,
pub fragment_mzs: HashMap<T, f64>,
}

impl<T: Clone + Eq + Serialize + Hash + Send + Sync> HasIntegerID for ElutionGroup<T> {
fn get_id(&self) -> u64 {
self.id
}
}
84 changes: 52 additions & 32 deletions src/models/indices/raw_file_index.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
use std::sync::Arc;

use rayon::iter::ParallelIterator;
use timsrust::converters::ConvertableDomain;
use timsrust::readers::{FrameReader, FrameReaderError, MetadataReader};
use timsrust::TimsRustError;
use timsrust::{Frame, Metadata, QuadrupoleSettings};
// use timsrust::io::;

use crate::models::frames::raw_frames::frame_elems_matching;
use crate::models::frames::raw_peak::RawPeak;
use crate::models::queries::{FragmentGroupIndexQuery, NaturalPrecursorQuery, PrecursorIndexQuery};
use crate::traits::aggregator::Aggregator;
use crate::traits::indexed_data::IndexedData;
use crate::ElutionGroup;
use crate::ToleranceAdapter;
use log::trace;
use rayon::iter::ParallelIterator;
use serde::Serialize;
use std::fmt::Debug;
use std::hash::Hash;
use timsrust::converters::ConvertableDomain;
use timsrust::readers::{FrameReader, FrameReaderError, MetadataReader};
use timsrust::TimsRustError;
use timsrust::{Frame, Metadata, QuadrupoleSettings};

pub struct RawFileIndex {
file_reader: FrameReader,
Expand Down Expand Up @@ -53,9 +56,14 @@ impl RawFileIndex {
}
}

fn apply_on_query<'c, 'b: 'c, 'a: 'b>(
fn apply_on_query<
'c,
'b: 'c,
'a: 'b,
FH: Clone + Eq + Serialize + Hash + Debug + Send + Sync,
>(
&'a self,
fqs: &'b FragmentGroupIndexQuery,
fqs: &'b FragmentGroupIndexQuery<FH>,
fun: &'c mut dyn for<'r> FnMut(RawPeak, Arc<QuadrupoleSettings>),
) {
trace!("RawFileIndex::apply_on_query");
Expand All @@ -69,7 +77,7 @@ impl RawFileIndex {
let pq = &fqs.precursor_query;
let iso_mz_range = pq.isolation_mz_range;
for frame in frames {
for tof_range in fqs.mz_index_ranges.iter() {
for (_, tof_range) in fqs.mz_index_ranges.iter() {
let scan_range = pq.mobility_index_range;
let peaks = frame_elems_matching(
&frame,
Expand All @@ -92,10 +100,10 @@ impl RawFileIndex {
self.file_reader.parallel_filter(lambda_use)
}

fn query_from_elution_group_impl(
fn query_from_elution_group_impl<FH: Clone + Eq + Serialize + Hash + Send + Sync>(
&self,
tol: &dyn crate::traits::tolerance::Tolerance,
elution_group: &crate::models::elution_group::ElutionGroup,
elution_group: &crate::models::elution_group::ElutionGroup<FH>,
) -> PrecursorIndexQuery {
let rt_range = tol.rt_range(elution_group.rt_seconds);
let mz_range = tol.mz_range(elution_group.precursor_mz);
Expand Down Expand Up @@ -140,24 +148,32 @@ impl RawFileIndex {
}
}

fn queries_from_elution_elements_impl(
fn queries_from_elution_elements_impl<FH: Clone + Eq + Serialize + Hash + Send + Sync>(
&self,
tol: &dyn crate::traits::tolerance::Tolerance,
elution_elements: &crate::models::elution_group::ElutionGroup,
) -> FragmentGroupIndexQuery {
elution_elements: &crate::models::elution_group::ElutionGroup<FH>,
) -> FragmentGroupIndexQuery<FH> {
let precursor_query = self.query_from_elution_group_impl(tol, elution_elements);
// TODO: change this unwrap and use explicitly the lack of fragment mzs.
// Does that mean its onlyt a precursor query?
// Why is it an option?
let fragment_mzs = elution_elements.fragment_mzs.as_ref().unwrap();
let mut fqs = Vec::with_capacity(fragment_mzs.len());
for fragment in fragment_mzs {
let mz_range = tol.mz_range(*fragment);
fqs.push((
self.meta_converters.mz_converter.invert(mz_range.0) as u32,
self.meta_converters.mz_converter.invert(mz_range.1) as u32,
));
}
//
// let fragment_mzs = elution_elements.fragment_mzs.as_ref().unwrap();

let fqs = elution_elements
.fragment_mzs
.iter()
.map(|(k, v)| {
let mz_range = tol.mz_range(*v);
(
k.clone(),
(
self.meta_converters.mz_converter.invert(mz_range.0) as u32,
self.meta_converters.mz_converter.invert(mz_range.1) as u32,
),
)
})
.collect();

FragmentGroupIndexQuery {
mz_index_ranges: fqs,
Expand All @@ -166,24 +182,26 @@ impl RawFileIndex {
}
}

impl IndexedData<FragmentGroupIndexQuery, RawPeak> for RawFileIndex {
fn query(&self, fragment_query: &FragmentGroupIndexQuery) -> Vec<RawPeak> {
impl<FH: Hash + Copy + Clone + Serialize + Eq + Debug + Send + Sync>
IndexedData<FragmentGroupIndexQuery<FH>, RawPeak> for RawFileIndex
{
fn query(&self, fragment_query: &FragmentGroupIndexQuery<FH>) -> Vec<RawPeak> {
let mut out = Vec::new();
self.apply_on_query(fragment_query, &mut |peak, _| out.push(peak));
out
}

fn add_query<O, AG: crate::Aggregator<RawPeak, Output = O>>(
fn add_query<O, AG: Aggregator<RawPeak, Output = O>>(
&self,
fragment_query: &FragmentGroupIndexQuery,
fragment_query: &FragmentGroupIndexQuery<FH>,
aggregator: &mut AG,
) {
self.apply_on_query(fragment_query, &mut |peak, _| aggregator.add(&peak));
}

fn add_query_multi_group<O, AG: crate::Aggregator<RawPeak, Output = O>>(
&self,
fragment_queries: &[FragmentGroupIndexQuery],
fragment_queries: &[FragmentGroupIndexQuery<FH>],
aggregator: &mut [AG],
) {
fragment_queries
Expand All @@ -193,12 +211,14 @@ impl IndexedData<FragmentGroupIndexQuery, RawPeak> for RawFileIndex {
}
}

impl ToleranceAdapter<FragmentGroupIndexQuery> for RawFileIndex {
impl<FH: Hash + Serialize + Eq + Clone + Send + Sync>
ToleranceAdapter<FragmentGroupIndexQuery<FH>, ElutionGroup<FH>> for RawFileIndex
{
fn query_from_elution_group(
&self,
tol: &dyn crate::traits::tolerance::Tolerance,
elution_group: &crate::models::elution_group::ElutionGroup,
) -> FragmentGroupIndexQuery {
elution_group: &crate::models::elution_group::ElutionGroup<FH>,
) -> FragmentGroupIndexQuery<FH> {
self.queries_from_elution_elements_impl(tol, elution_group)
}
}
Loading

0 comments on commit d4ba398

Please sign in to comment.