Skip to content

Commit

Permalink
Merge pull request #7 from TalusBio/feat/ms1_quant
Browse files Browse the repository at this point in the history
Feat/ms1 quant
  • Loading branch information
jspaezp authored Nov 18, 2024
2 parents 7dbf592 + 51feb9f commit c10fdd5
Show file tree
Hide file tree
Showing 66 changed files with 22,198 additions and 1,341 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,3 @@
/tmp
.env
results.json

31 changes: 31 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.3.0
hooks:
- id: trailing-whitespace
- repo: local
hooks:
- id: fmt
name: fmt
description: Format files with cargo fmt.
entry: cargo +nightly fmt
language: system
types: [rust]
args: ["--"]
- id: cargo-check
name: cargo check
description: Check the package for errors.
entry: cargo check
language: system
types: [rust]
pass_filenames: false
- id: clippy
name: clippy
description: Lint rust sources
entry: cargo clippy
language: system
args: ["--", "-D", "warnings"]
types: [rust]
pass_filenames: false


9 changes: 8 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
[package]
name = "timsquery"
version = "0.5.0"
version = "0.9.0"
edition = "2021"
license = "Apache-2.0"

[dependencies]
timsrust = "0.4.1"
timsrust = "=0.4.1"
rayon = "1.5"
nohash-hasher = "=0.2.0"
indicatif = { version = "0.17.8", features = ["rayon"] }

clap = { version = "4.5.17", features = ["derive"], optional = true }
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ your file, and you get back results that match those three things!
More explicitly:
- The main design is to have modular components:
- aggregators
- indices
- indices
- queries
- tolerances

Expand All @@ -37,3 +37,5 @@ sequential, use that).

- Add logging levels to instrumentations.
- Add missing_docks_in_private_items to clippy.
- Implement predicate pushdown on the setup of indices.
- Implement predicate pushdown on query execution for raw index.
66 changes: 61 additions & 5 deletions Taskfile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,27 @@ tasks:
sources:
- "src/**/*.rs"
cmds:
- task: build
- task: bin-build
- task: lib-build
- task: test
- task: fmt
- task: clippy

build:
bin-build:
cmds:
- cargo build $BIN_EXTRAS

bin-build-release:
deps: [test, fmt]
sources:
- "src/**/*.rs"
cmds:
- cargo b --release --features build-binary --bin timsquery

lib-build:
cmds:
- cargo build --lib

license_check:
cmds:
- cargo deny check
Expand All @@ -37,18 +49,62 @@ tasks:

clippy:
cmds:
- cargo clippy
- cargo clippy {{.CLI_ARGS}}

bench:
cmds:
- task: bench-build
- task: bench-small-data
- task: bench-large-data

bench-build:
sources:
- "src/**/*.rs"
cmds:
- cargo b --release --features bench --bin benchmark_indices
- SKIP_SLOW=1 SKIP_BUILD=1 SKIP_HIGHMEM=1 RUST_BACKTRACE=full TIMS_DATA_FILE=./data/LFQ_timsTOFPro_diaPASEF_Condition_A_Sample_Alpha_02.d ./target/release/benchmark_indices

bench-small-data:
deps: [bench-build]
sources:
- "src/**/*.rs"
- "data/230510_PRTC_13_S1-B1_1_12817.d"
- "benches/plot_bench.py"

cmds:
- SKIP_SLOW=1 SKIP_BUILD=1 RUST_BACKTRACE=full TIMS_DATA_FILE=./data/230510_PRTC_13_S1-B1_1_12817.d ./target/release/benchmark_indices
- uv run benches/plot_bench.py data/benchmark_results_LFQ_timsTOFPro_diaPASEF_Condition_A_Sample_Alpha_02.json
- uv run benches/plot_bench.py data/benchmark_results_230510_PRTC_13_S1-B1_1_12817.json

bench-large-data:
deps: [bench-build]
sources:
- "src/**/*.rs"
- "data/LFQ_timsTOFPro_diaPASEF_Condition_A_Sample_Alpha_02.d"
- "benches/plot_bench.py"
cmds:
- SKIP_SLOW=1 SKIP_BUILD=1 SKIP_HIGHMEM=1 RUST_BACKTRACE=full TIMS_DATA_FILE=./data/LFQ_timsTOFPro_diaPASEF_Condition_A_Sample_Alpha_02.d ./target/release/benchmark_indices
- uv run benches/plot_bench.py data/benchmark_results_LFQ_timsTOFPro_diaPASEF_Condition_A_Sample_Alpha_02.json

plot:
deps: [bin-build-release]
sources:
- "data/sageresults/**/*.py"
- "src/**/*.rs"
cmds:
- for: ["expanded-raw-frame-index", "transposed-quad-index"]
cmd: ./target/release/timsquery query-index
--aggregator multi-cmg-stats --format pretty-json
--raw-file-path ./data/230510_PRTC_13_S1-B1_1_12817.d
--tolerance-settings-path "templates/tolerance_settings.json"
--elution-groups-path "./data/sageresults/ubb_elution_groups.json"
--output-path "./data/sageresults/{{ .ITEM }}_query_results"
--index {{ .ITEM }}
- for: ["expanded-raw-frame-index", "transposed-quad-index"]
cmd: cd data/sageresults &&
uv run plot.py
--query-results {{ .ITEM }}_query_results/results.json
--elution-groups ubb_elution_groups.json
--output ubb_peptide_plot{{ .ITEM }}.png

templates:
sources:
- "src/**/*.rs"
Expand Down
72 changes: 45 additions & 27 deletions benches/benchmark_indices.rs
Original file line number Diff line number Diff line change
@@ -1,29 +1,46 @@
use rand::{Rng, SeedableRng};
use rand::{
Rng,
SeedableRng,
};
use rand_chacha::ChaCha8Rng;
use serde::Serialize;
use std::collections::HashMap;
use std::env;
use std::fs::File;
use std::path::{Path, PathBuf};
use std::time::{Duration, Instant};
use timsquery::{
models::{
aggregators::RawPeakIntensityAggregator,
indices::{
expanded_raw_index::ExpandedRawFrameIndex, raw_file_index::RawFileIndex,
transposed_quad_index::QuadSplittedTransposedIndex,
},
},
queriable_tims_data::queriable_tims_data::query_multi_group,
traits::tolerance::{
DefaultTolerance, MobilityTolerance, MzToleramce, QuadTolerance, RtTolerance,
},
ElutionGroup,
use std::path::{
Path,
PathBuf,
};
use std::time::{
Duration,
Instant,
};
use timsquery::models::aggregators::RawPeakIntensityAggregator;
use timsquery::models::indices::expanded_raw_index::ExpandedRawFrameIndex;
use timsquery::models::indices::raw_file_index::RawFileIndex;
use timsquery::models::indices::transposed_quad_index::QuadSplittedTransposedIndex;
use timsquery::queriable_tims_data::queriable_tims_data::query_multi_group;
use timsquery::traits::tolerance::{
DefaultTolerance,
MobilityTolerance,
MzToleramce,
QuadTolerance,
RtTolerance,
};
use timsquery::ElutionGroup;
use tracing::subscriber::set_global_default;
use tracing_bunyan_formatter::{BunyanFormattingLayer, JsonStorageLayer};
use tracing_bunyan_formatter::{
BunyanFormattingLayer,
JsonStorageLayer,
};
use tracing_chrome::ChromeLayerBuilder;
use tracing_subscriber::{fmt, prelude::*, registry::Registry, EnvFilter, Layer};
use tracing_subscriber::prelude::*;
use tracing_subscriber::registry::Registry;
use tracing_subscriber::{
fmt,
EnvFilter,
Layer,
};

const NUM_ELUTION_GROUPS: usize = 1000;
const NUM_ITERATIONS: usize = 1;
Expand Down Expand Up @@ -164,9 +181,10 @@ fn build_elution_groups() -> Vec<ElutionGroup<u64>> {
id: i as u64,
rt_seconds: rt,
mobility,
precursor_mz: mz,
precursor_charge: 2,
precursor_mzs: vec![mz],
fragment_mzs,
expected_fragment_intensity: None,
expected_precursor_intensity: None,
});
}
out_egs
Expand Down Expand Up @@ -325,13 +343,13 @@ fn run_batch_access_benchmark(raw_file_path: &Path, env_config: EnvConfig) -> Ve
ms: MzToleramce::Ppm((20.0, 20.0)),
rt: RtTolerance::Absolute((5.0, 5.0)),
mobility: MobilityTolerance::Pct((3.0, 3.0)),
quad: QuadTolerance::Absolute((0.1, 0.1, 1)),
quad: QuadTolerance::Absolute((0.1, 0.1)),
};
let tolerance_with_nort = DefaultTolerance {
ms: MzToleramce::Ppm((20.0, 20.0)),
rt: RtTolerance::None,
mobility: MobilityTolerance::Pct((3.0, 3.0)),
quad: QuadTolerance::Absolute((0.1, 0.1, 1)),
quad: QuadTolerance::Absolute((0.1, 0.1)),
};
let tolerances = [
(tolerance_with_rt, "narrow_rt"),
Expand All @@ -356,7 +374,7 @@ fn run_batch_access_benchmark(raw_file_path: &Path, env_config: EnvConfig) -> Ve
index,
&tolerance,
&query_groups,
&RawPeakIntensityAggregator::new,
&RawPeakIntensityAggregator::new_with_elution_group,
);
let tot: u64 = tmp.into_iter().sum();
let out = format!("RawFileIndex::query_multi_group aggregated {} ", tot,);
Expand All @@ -377,7 +395,7 @@ fn run_batch_access_benchmark(raw_file_path: &Path, env_config: EnvConfig) -> Ve
index,
&tolerance,
&query_groups,
&RawPeakIntensityAggregator::new,
&RawPeakIntensityAggregator::new_with_elution_group,
);
let tot: u64 = tmp.into_iter().sum();
let out = format!(
Expand All @@ -401,7 +419,7 @@ fn run_batch_access_benchmark(raw_file_path: &Path, env_config: EnvConfig) -> Ve
index,
&tolerance,
&query_groups,
&RawPeakIntensityAggregator::new,
&RawPeakIntensityAggregator::new_with_elution_group,
);
let tot: u64 = tmp.into_iter().sum();
let out = format!(
Expand All @@ -425,7 +443,7 @@ fn run_batch_access_benchmark(raw_file_path: &Path, env_config: EnvConfig) -> Ve
index,
&tolerance,
&query_groups,
&RawPeakIntensityAggregator::new,
&RawPeakIntensityAggregator::new_with_elution_group,
);
let tot: u64 = tmp.into_iter().sum();
let out = format!("TransposedQuadIndex::query_multi_group aggregated {} ", tot,);
Expand All @@ -446,7 +464,7 @@ fn run_batch_access_benchmark(raw_file_path: &Path, env_config: EnvConfig) -> Ve
index,
&tolerance,
&query_groups,
&RawPeakIntensityAggregator::new,
&RawPeakIntensityAggregator::new_with_elution_group,
);
let tot: u64 = tmp.into_iter().sum();
let out = format!("TransposedQuadIndex::query_multi_group aggregated {} ", tot,);
Expand Down
2 changes: 1 addition & 1 deletion data/.gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
*.d/
*.d.zip
*.d.tar

benchmark_results_*.json
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
5 changes: 5 additions & 0 deletions data/sageresults/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
lfq.tsv
results.sage.tsv
matched_fragments.sage.tsv
sage
log.log
Loading

0 comments on commit c10fdd5

Please sign in to comment.