diff --git a/Cargo.toml b/Cargo.toml index 38305e8..5546743 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,12 +20,12 @@ tracing-subscriber = { version = "0.3.18", features = [ "env-filter", ] } tracing-bunyan-formatter = "0.3.9" +tracing-chrome = "0.7.2" # These are only used for benchmarks rand = { version = "0.8.5", optional = true } rand_chacha = { version = "0.3.1", optional = true } -tracing-chrome = "0.7.2" [features] diff --git a/src/errors.rs b/src/errors.rs new file mode 100644 index 0000000..4b66808 --- /dev/null +++ b/src/errors.rs @@ -0,0 +1,58 @@ +use crate::models::frames::expanded_frame::FrameProcessingConfig; +use std::fmt::Display; +use timsrust::TimsRustError; + +#[derive(Debug)] +pub enum TimsqueryError { + DataReadingError(DataReadingError), + DataProcessingError(DataProcessingError), + Other(String), +} + +pub type Result = std::result::Result; + +impl Display for TimsqueryError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self) + } +} + +impl TimsqueryError { + pub fn custom(msg: impl Display) -> Self { + Self::Other(msg.to_string()) + } +} + +#[derive(Debug)] +pub enum DataReadingError { + UnsupportedDataError(UnsupportedDataError), + TimsRustError(TimsRustError), // Why doesnt timsrust error derive clone? +} + +impl From for DataReadingError { + fn from(e: UnsupportedDataError) -> Self { + DataReadingError::UnsupportedDataError(e) + } +} + +#[derive(Debug)] +pub enum UnsupportedDataError { + NoMS2DataError, +} + +#[derive(Debug)] +pub enum DataProcessingError { + CentroidingError(FrameProcessingConfig), +} + +impl> From for TimsqueryError { + fn from(e: T) -> Self { + TimsqueryError::DataReadingError(e.into()) + } +} + +impl> From for DataReadingError { + fn from(e: T) -> Self { + DataReadingError::TimsRustError(e.into()) + } +} diff --git a/src/lib.rs b/src/lib.rs index 2ef3d2b..500b75e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,7 +8,11 @@ pub use crate::traits::queriable_data::QueriableData; pub use crate::traits::tolerance::{Tolerance, ToleranceAdapter}; // Declare modules +pub mod errors; pub mod models; pub mod queriable_tims_data; pub mod traits; pub mod utils; + +// Re-export errors +pub use crate::errors::{DataProcessingError, DataReadingError, TimsqueryError}; diff --git a/src/models/frames/expanded_frame.rs b/src/models/frames/expanded_frame.rs index ab4bfae..c0da9f3 100644 --- a/src/models/frames/expanded_frame.rs +++ b/src/models/frames/expanded_frame.rs @@ -1,23 +1,20 @@ +use super::peak_in_quad::PeakInQuad; use super::single_quad_settings::{ expand_quad_settings, ExpandedFrameQuadSettings, SingleQuadrupoleSetting, }; +use crate::errors::{Result, UnsupportedDataError}; +use crate::sort_vecs_by_first; +use crate::utils::compress_explode::explode_vec; +use crate::utils::frame_processing::{lazy_centroid_weighted_frame, PeakArrayRefs}; +use crate::utils::sorting::top_n; +use crate::utils::tolerance_ranges::{scan_tol_range, tof_tol_range}; use rayon::prelude::*; use std::collections::HashMap; use std::marker::PhantomData; use std::sync::Arc; use timsrust::converters::{Scan2ImConverter, Tof2MzConverter}; +use timsrust::readers::{FrameReader, FrameReaderError}; use timsrust::{AcquisitionType, Frame, MSLevel, QuadrupoleSettings}; - -use super::peak_in_quad::PeakInQuad; -use crate::sort_vecs_by_first; -use crate::utils::compress_explode::explode_vec; -use crate::utils::frame_processing::{lazy_centroid_weighted_frame, PeakArrayRefs}; -use crate::utils::sorting::top_n; -use crate::utils::tolerance_ranges::{scan_tol_range, tof_tol_range}; -use timsrust::{ - readers::{FrameReader, FrameReaderError, MetadataReaderError}, - TimsRustError, -}; use tracing::instrument; use tracing::{info, trace, warn}; @@ -301,14 +298,31 @@ pub fn par_expand_and_arrange_frames( out } +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct CentroidingSettings { + ims_tol_pct: f64, + mz_tol_ppm: f64, + window_width: usize, + max_ms1_peaks: usize, + max_ms2_peaks: usize, +} + +impl Default for CentroidingSettings { + fn default() -> Self { + CentroidingSettings { + ims_tol_pct: 1.5, + mz_tol_ppm: 15.0, + window_width: 3, + max_ms1_peaks: 100_000, + max_ms2_peaks: 20_000, + } + } +} + #[derive(Debug, Clone, Copy, PartialEq)] pub enum FrameProcessingConfig { Centroided { - ims_tol_pct: f64, - mz_tol_ppm: f64, - window_width: usize, - max_ms1_peaks: usize, - max_ms2_peaks: usize, + settings: CentroidingSettings, ims_converter: Option, mz_converter: Option, }, @@ -322,33 +336,20 @@ impl FrameProcessingConfig { mz_converter: Tof2MzConverter, ) -> Self { match self { - FrameProcessingConfig::Centroided { - ims_tol_pct, - mz_tol_ppm, - window_width, - max_ms1_peaks, - max_ms2_peaks, - .. - } => FrameProcessingConfig::Centroided { - ims_tol_pct, - mz_tol_ppm, - window_width, - max_ms1_peaks, - max_ms2_peaks, - ims_converter: Some(ims_converter), - mz_converter: Some(mz_converter), - }, + FrameProcessingConfig::Centroided { settings, .. } => { + FrameProcessingConfig::Centroided { + settings, + ims_converter: Some(ims_converter), + mz_converter: Some(mz_converter), + } + } FrameProcessingConfig::NotCentroided => FrameProcessingConfig::NotCentroided, } } pub fn default_centroided() -> Self { FrameProcessingConfig::Centroided { - ims_tol_pct: 1.5, - mz_tol_ppm: 15.0, - window_width: 3, - max_ms1_peaks: 100_000, - max_ms2_peaks: 20_000, + settings: Default::default(), ims_converter: Default::default(), mz_converter: Default::default(), } @@ -359,33 +360,8 @@ impl FrameProcessingConfig { } } -#[derive(Debug)] -pub enum DataReadingError { - CentroidingError(FrameProcessingConfig), - UnsupportedDataError(String), - TimsRustError(TimsRustError), // Why doesnt timsrust error derive clone? -} - -impl From for DataReadingError { - fn from(e: TimsRustError) -> Self { - DataReadingError::TimsRustError(e) - } -} - -impl From for DataReadingError { - fn from(e: MetadataReaderError) -> Self { - DataReadingError::TimsRustError(TimsRustError::MetadataReaderError(e)) - } -} - -impl From for DataReadingError { - fn from(e: FrameReaderError) -> Self { - DataReadingError::TimsRustError(TimsRustError::FrameReaderError(e)) - } -} - fn warn_and_skip_badframes( - frame_iter: impl ParallelIterator>, + frame_iter: impl ParallelIterator>, ) -> impl ParallelIterator { frame_iter.filter_map(|x| { // Log the info of the frame that broke ... @@ -409,16 +385,11 @@ fn warn_and_skip_badframes( pub fn par_read_and_expand_frames( frame_reader: &FrameReader, centroiding_config: FrameProcessingConfig, -) -> Result< - HashMap, Vec>>, - DataReadingError, -> { +) -> Result, Vec>>> { let dia_windows = match frame_reader.get_dia_windows() { Some(dia_windows) => dia_windows, None => { - return Err(DataReadingError::UnsupportedDataError( - "No dia windows found".to_string(), - )) + return Err(UnsupportedDataError::NoMS2DataError.into()); } }; @@ -430,19 +401,15 @@ pub fn par_read_and_expand_frames( let expanded_frames = match centroiding_config { FrameProcessingConfig::Centroided { - ims_tol_pct, - mz_tol_ppm, - window_width, - max_ms1_peaks: _max_ms1_peaks, - max_ms2_peaks, + settings, ims_converter, mz_converter, } => par_expand_and_centroid_frames( curr_iter, - ims_tol_pct, - mz_tol_ppm, - window_width, - max_ms2_peaks, + settings.ims_tol_pct, + settings.mz_tol_ppm, + settings.window_width, + settings.max_ms2_peaks, &ims_converter.unwrap(), &mz_converter.unwrap(), ), @@ -464,19 +431,15 @@ pub fn par_read_and_expand_frames( let ms1_iter = warn_and_skip_badframes(ms1_iter); let expanded_ms1_frames = match centroiding_config { FrameProcessingConfig::Centroided { - ims_tol_pct, - mz_tol_ppm, - window_width, - max_ms1_peaks, - max_ms2_peaks: _max_ms2_peaks, + settings, ims_converter, mz_converter, } => par_expand_and_centroid_frames( ms1_iter, - ims_tol_pct, - mz_tol_ppm, - window_width, - max_ms1_peaks, + settings.ims_tol_pct, + settings.mz_tol_ppm, + settings.window_width, + settings.max_ms1_peaks, &ims_converter.unwrap(), &mz_converter.unwrap(), ), diff --git a/src/models/indices/expanded_raw_index/model.rs b/src/models/indices/expanded_raw_index/model.rs index bda8b6d..f2c5533 100644 --- a/src/models/indices/expanded_raw_index/model.rs +++ b/src/models/indices/expanded_raw_index/model.rs @@ -1,8 +1,8 @@ +use crate::errors::Result; use crate::models::adapters::FragmentIndexAdapter; use crate::models::elution_group::ElutionGroup; use crate::models::frames::expanded_frame::{ - par_read_and_expand_frames, DataReadingError, ExpandedFrameSlice, FrameProcessingConfig, - SortedState, + par_read_and_expand_frames, ExpandedFrameSlice, FrameProcessingConfig, SortedState, }; use crate::models::frames::peak_in_quad::PeakInQuad; use crate::models::frames::raw_peak::RawPeak; @@ -19,7 +19,7 @@ use serde::Serialize; use std::collections::HashMap; use std::hash::Hash; use std::time::Instant; -use timsrust::converters::{Frame2RtConverter, Scan2ImConverter, Tof2MzConverter}; +use timsrust::converters::{Scan2ImConverter, Tof2MzConverter}; use timsrust::readers::{FrameReader, MetadataReader}; use tracing::info; use tracing::instrument; @@ -29,7 +29,6 @@ pub struct ExpandedRawFrameIndex { bundled_ms1_frames: ExpandedSliceBundle, bundled_frames: HashMap, flat_quad_settings: Vec, - rt_converter: Frame2RtConverter, pub mz_converter: Tof2MzConverter, pub im_converter: Scan2ImConverter, adapter: FragmentIndexAdapter, @@ -38,18 +37,15 @@ pub struct ExpandedRawFrameIndex { #[derive(Debug, Clone)] pub struct ExpandedSliceBundle { slices: Vec>, - rts: Vec, frame_indices: Vec, } impl ExpandedSliceBundle { pub fn new(mut slices: Vec>) -> Self { slices.sort_unstable_by(|a, b| a.rt.partial_cmp(&b.rt).unwrap()); - let rts = slices.iter().map(|x| x.rt).collect(); let frame_indices = slices.iter().map(|x| x.frame_index).collect(); Self { slices, - rts, frame_indices, } } @@ -113,21 +109,18 @@ impl ExpandedRawFrameIndex { } #[instrument(name = "ExpandedRawFrameIndex::from_path_centroided")] - pub fn from_path_centroided(path: &str) -> Result { + pub fn from_path_centroided(path: &str) -> Result { let config = FrameProcessingConfig::default_centroided(); Self::from_path_base(path, config) } #[instrument(name = "ExpandedRawFrameIndex::from_path")] - pub fn from_path(path: &str) -> Result { + pub fn from_path(path: &str) -> Result { Self::from_path_base(path, FrameProcessingConfig::NotCentroided) } #[instrument(name = "ExpandedRawFrameIndex::from_path_base")] - pub fn from_path_base( - path: &str, - centroid_config: FrameProcessingConfig, - ) -> Result { + pub fn from_path_base(path: &str, centroid_config: FrameProcessingConfig) -> Result { info!( "Building ExpandedRawFrameIndex from path {} config {:?}", path, centroid_config, @@ -166,7 +159,6 @@ impl ExpandedRawFrameIndex { bundled_ms1_frames: out_ms1_frames.expect("At least one ms1 frame should be present"), bundled_frames: out_ms2_frames, flat_quad_settings, - rt_converter: meta_converters.rt_converter, mz_converter: meta_converters.mz_converter, im_converter: meta_converters.im_converter, adapter, diff --git a/src/models/indices/transposed_quad_index/quad_index.rs b/src/models/indices/transposed_quad_index/quad_index.rs index b5b601d..c5d203e 100644 --- a/src/models/indices/transposed_quad_index/quad_index.rs +++ b/src/models/indices/transposed_quad_index/quad_index.rs @@ -83,46 +83,6 @@ impl TransposedQuadIndex { .map(move |p| PeakInQuad::from_peak_in_bucket(p, *tof_index)) }) } - - fn convert_to_local_frame_range( - &self, - rt_range: Option, - ) -> Option<(f32, f32)> { - // TODO consider if I should allow only RT here, since it would in theory - // force me to to the repreatable work beforehand. - let frame_index_range = match rt_range { - Some(FrameRTTolerance::Seconds((rt_low, rt_high))) => { - Some((rt_low as f32, rt_high as f32)) - } - Some(FrameRTTolerance::FrameIndex((frame_low, frame_high))) => { - let frame_id_start = self - .frame_indices - .binary_search_by(|x| x.cmp(&frame_low)) - .unwrap_or_else(|x| x); - - let frame_id_end = self - .frame_indices - .binary_search_by(|x| x.cmp(&frame_high)) - .unwrap_or_else(|x| x); - - // TODO consider throwing a warning if we are - // out of bounds here. - Some(( - self.frame_rts[frame_id_start.min(self.frame_rts.len() - 1)] as f32, - self.frame_rts[frame_id_end.min(self.frame_rts.len() - 1)] as f32, - )) - } - None => None, - }; - - if cfg!(debug_assertions) { - if let Some((low, high)) = frame_index_range { - debug_assert!(low <= high); - } - } - - frame_index_range - } } impl PeakInQuad { diff --git a/src/models/indices/transposed_quad_index/quad_splitted_transposed_index.rs b/src/models/indices/transposed_quad_index/quad_splitted_transposed_index.rs index 0537410..2f003b9 100644 --- a/src/models/indices/transposed_quad_index/quad_splitted_transposed_index.rs +++ b/src/models/indices/transposed_quad_index/quad_splitted_transposed_index.rs @@ -1,9 +1,9 @@ use super::quad_index::{TransposedQuadIndex, TransposedQuadIndexBuilder}; +use crate::errors::Result; use crate::models::adapters::FragmentIndexAdapter; use crate::models::elution_group::ElutionGroup; use crate::models::frames::expanded_frame::{ - par_read_and_expand_frames, DataReadingError, ExpandedFrameSlice, FrameProcessingConfig, - SortingStateTrait, + par_read_and_expand_frames, ExpandedFrameSlice, FrameProcessingConfig, SortingStateTrait, }; use crate::models::frames::peak_in_quad::PeakInQuad; use crate::models::frames::raw_peak::RawPeak; @@ -22,10 +22,9 @@ use std::fmt::Debug; use std::fmt::Display; use std::hash::Hash; use std::time::Instant; -use timsrust::converters::{Frame2RtConverter, Scan2ImConverter, Tof2MzConverter}; +use timsrust::converters::{Scan2ImConverter, Tof2MzConverter}; use timsrust::readers::{FrameReader, MetadataReader}; use timsrust::Metadata; -use timsrust::TimsRustError; use tracing::instrument; use tracing::{debug, info, trace}; @@ -36,7 +35,6 @@ pub struct QuadSplittedTransposedIndex { precursor_index: TransposedQuadIndex, fragment_indices: HashMap, flat_quad_settings: Vec, - rt_converter: Frame2RtConverter, pub mz_converter: Tof2MzConverter, pub im_converter: Scan2ImConverter, adapter: FragmentIndexAdapter, @@ -104,7 +102,6 @@ impl Display for QuadSplittedTransposedIndex { let mut disp_str = String::new(); disp_str.push_str("QuadSplittedTransposedIndex\n"); - disp_str.push_str("rt_converter: ... not showing ...\n"); disp_str.push_str(&format!("mz_converter: {:?}\n", self.mz_converter)); disp_str.push_str(&format!("im_converter: {:?}\n", self.im_converter)); disp_str.push_str("flat_quad_settings: \n"); @@ -139,7 +136,7 @@ impl Display for QuadSplittedTransposedIndex { impl QuadSplittedTransposedIndex { #[instrument(name = "QuadSplittedTransposedIndex::from_path")] - pub fn from_path(path: &str) -> Result { + pub fn from_path(path: &str) -> Result { let st = Instant::now(); info!("Building transposed quad index from path {}", path); let tmp = QuadSplittedTransposedIndexBuilder::from_path(path)?; @@ -151,7 +148,7 @@ impl QuadSplittedTransposedIndex { } #[instrument(name = "QuadSplittedTransposedIndex::from_path_centroided")] - pub fn from_path_centroided(path: &str) -> Result { + pub fn from_path_centroided(path: &str) -> Result { let st = Instant::now(); info!( "Building CENTROIDED transposed quad index from path {}", @@ -169,7 +166,6 @@ impl QuadSplittedTransposedIndex { #[derive(Debug, Clone, Default)] pub struct QuadSplittedTransposedIndexBuilder { indices: HashMap, TransposedQuadIndexBuilder>, - rt_converter: Option, mz_converter: Option, im_converter: Option, metadata: Option, @@ -200,12 +196,12 @@ impl QuadSplittedTransposedIndexBuilder { } #[instrument(name = "QuadSplittedTransposedIndexBuilder::from_path")] - fn from_path(path: &str) -> Result { + fn from_path(path: &str) -> Result { Self::from_path_base(path, FrameProcessingConfig::NotCentroided) } #[instrument(name = "QuadSplittedTransposedIndexBuilder::from_path_centroided")] - fn from_path_centroided(path: &str) -> Result { + fn from_path_centroided(path: &str) -> Result { let config = FrameProcessingConfig::default_centroided(); Self::from_path_base(path, config) } @@ -214,10 +210,7 @@ impl QuadSplittedTransposedIndexBuilder { // and one that adds the frameslices, maybe even have a config struct that dispatches // the right preprocessing steps. #[instrument(name = "QuadSplittedTransposedIndexBuilder::from_path_base")] - fn from_path_base( - path: &str, - centroid_config: FrameProcessingConfig, - ) -> Result { + fn from_path_base(path: &str, centroid_config: FrameProcessingConfig) -> Result { let file_reader = FrameReader::new(path)?; let sql_path = std::path::Path::new(path).join("analysis.tdf"); @@ -226,7 +219,6 @@ impl QuadSplittedTransposedIndexBuilder { let out_meta_converters = meta_converters.clone(); let mut final_out = Self { indices: HashMap::new(), - rt_converter: Some(meta_converters.rt_converter), mz_converter: Some(meta_converters.mz_converter), im_converter: Some(meta_converters.im_converter), metadata: Some(out_meta_converters), @@ -239,7 +231,7 @@ impl QuadSplittedTransposedIndexBuilder { let split_frames = par_read_and_expand_frames(&file_reader, centroid_config)?; // TODO use the rayon contructor to fold - let out2: Result, TimsRustError> = split_frames + let out2: Result> = split_frames .into_par_iter() .map(|(q, frameslices)| { // TODO:Refactor so the internal index is built first and then added. @@ -302,7 +294,6 @@ impl QuadSplittedTransposedIndexBuilder { precursor_index: precursor_index.expect("Precursor peaks should be present"), fragment_indices: indices, flat_quad_settings, - rt_converter: self.rt_converter.unwrap(), mz_converter: self.mz_converter.unwrap(), im_converter: self.im_converter.unwrap(), adapter: FragmentIndexAdapter::from(self.metadata.unwrap()),