Skip to content

Commit

Permalink
added unit testing to tdf reading
Browse files Browse the repository at this point in the history
  • Loading branch information
jspaezp committed Jul 6, 2024
1 parent 31e29c1 commit b70c05d
Show file tree
Hide file tree
Showing 19 changed files with 1,181 additions and 40 deletions.
1 change: 0 additions & 1 deletion src/ms/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
pub mod frames;
pub mod ms;
pub mod sorting;
pub mod tdf;
1 change: 0 additions & 1 deletion src/ms/ms.rs

This file was deleted.

48 changes: 10 additions & 38 deletions src/ms/tdf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,14 @@ pub struct DIAFrameInfo {
pub retention_times: Vec<Option<f32>>,
pub grouping_level: GroupingLevel,
pub number_of_groups: usize,

/// The row to group is meant to map the `Isolation window row id`
/// to the grouping level it will have... for diaPASEF, since every
/// scan range has a different quand window, the number of distinct
/// groups is the number of scan ranges (window groups+scan range
/// combinations). For the case of diagonal PASEF, the number of
/// groups is the number of window groups, since the scan ranges
/// are not independent from each other.
pub row_to_group: Vec<usize>,
}

Expand Down Expand Up @@ -184,10 +192,6 @@ impl DIAFrameInfo {
where
'a: 'b,
{
// let group = self
// .get_dia_frame_window_group(frame.index)
// .expect("Frame not in DIA group, non splittable frame passed to split_frame.");

let mut out_frames = Vec::new();
for scan_range in window_group.scan_ranges.iter() {
let slice_w_info: MsMsFrameSliceWindowInfo =
Expand All @@ -199,39 +203,6 @@ impl DIAFrameInfo {
Some(slice_w_info),
);
out_frames.push(frame_slice);

// TODO remove this old implementation
// for (i, scan_range) in window_group.scan_ranges.iter().enumerate() {

// scan_range.scan_start;
// scan_range.scan_end;

// let scan_offsets_use =
// &frame.scan_offsets[scan_range.scan_start..(scan_range.scan_end - 1)];
// let scan_start = scan_offsets_use[0];
// let mz_indptr_start = scan_offsets_use[0];
// let mz_indptr_end = *scan_offsets_use.last().unwrap();

// let tof_indices_keep = frame.tof_indices[mz_indptr_start..mz_indptr_end].to_vec();
// let intensities_keep = frame.intensities[mz_indptr_start..mz_indptr_end].to_vec();

// let frame_window = FrameSlice {
// scan_offsets: scan_offsets_use
// .iter()
// .map(|x| (x - scan_start) as u64)
// .collect::<Vec<_>>(),
// tof_indices: tof_indices_keep,
// intensities: intensities_keep,
// index: frame.index,
// rt: frame.rt,
// frame_type: frame.frame_type,
// scan_start: scan_range.scan_start,
// group_id: window_group.id,
// quad_group_id: i,
// quad_row_id: scan_range.row_id,
// };

// out_frames.push(frame_window);
}

Ok(out_frames)
Expand Down Expand Up @@ -390,7 +361,8 @@ impl DiaFrameMsMsWindowInfo {
}
}

struct FrameInfoBuilder {
#[derive(Debug)]
pub struct FrameInfoBuilder {
pub tdf_path: String,
pub scan_converter: timsrust::Scan2ImConverter,
}
Expand Down
4 changes: 4 additions & 0 deletions tests/data/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@

**.tdf
**.d
**.tdf_bin
11 changes: 11 additions & 0 deletions tests/data/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@

# Build testing data

The main purpose of this is to have a reproducible way to generate
data for testing. This should keep the repo size small and allow for
ease of extension of the tests as we might see fit.

One of the things I am attempting to do is to keep all the data represented
in plain text and then built onto the binary formats that actually get
used. The main rationale for is is transparency, improve the utility of
the source control and mild paranoia after the XZ exploit.
113 changes: 113 additions & 0 deletions tests/data/build.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
#!/bin/bash

# set -x # Display expansions
set -e
set -u
set -o pipefail

for x in *_tdf; do
echo "Processing $x"
dotd_name=$x/data.d
tdf_name=$dotd_name/analysis.tdf
tdf_bin_name=$dotd_name/analysis.tdf_bin

# Check if the .d directory exists
if [ -d $x/data.d ]; then
echo "Directory $x/data.d exists"
rm -rf $x/data.d
fi

echo "Creating $x/data.d does not exist"
mkdir $x/data.d

tdf_create=" \
CREATE TABLE DiaFrameMsMsInfo ( \
Frame INTEGER PRIMARY KEY, \
WindowGroup INTEGER NOT NULL, \
FOREIGN KEY (Frame) REFERENCES Frames (Id), \
FOREIGN KEY (WindowGroup) REFERENCES DiaFrameMsMsWindowGroups (Id) \
); \
CREATE TABLE DiaFrameMsMsWindowGroups ( \
Id INTEGER PRIMARY KEY \
); \
CREATE TABLE DiaFrameMsMsWindows ( \
WindowGroup INTEGER NOT NULL, \
ScanNumBegin INTEGER NOT NULL, \
ScanNumEnd INTEGER NOT NULL, \
IsolationMz REAL NOT NULL, \
IsolationWidth REAL NOT NULL, \
CollisionEnergy REAL NOT NULL, \
PRIMARY KEY(WindowGroup, ScanNumBegin), \
FOREIGN KEY (WindowGroup) REFERENCES DiaFrameMsMsWindowGroups (Id) \
) WITHOUT ROWID; \
CREATE TABLE TimsCalibration ( \
Id INTEGER PRIMARY KEY, \
ModelType INTEGER NOT NULL, \
C0 \
, C1, C2, C3, C4, C5, C6, C7, C8, C9); \
CREATE TABLE MzCalibration ( \
Id INTEGER PRIMARY KEY, \
ModelType INTEGER NOT NULL, \
DigitizerTimebase REAL NOT NULL, \
DigitizerDelay REAL NOT NULL, \
T1 REAL NOT NULL, \
T2 REAL NOT NULL, \
dC1 REAL NOT NULL, \
dC2 REAL NOT NULL, \
C0 \
, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11, C12, C13, C14); \
CREATE TABLE Frames ( \
Id INTEGER PRIMARY KEY, \
Time REAL NOT NULL, \
Polarity CHAR(1) CHECK (Polarity IN ('+', '-')) NOT NULL, \
ScanMode INTEGER NOT NULL, \
MsMsType INTEGER NOT NULL, \
TimsId INTEGER, \
MaxIntensity INTEGER NOT NULL, \
SummedIntensities INTEGER NOT NULL, \
NumScans INTEGER NOT NULL, \
NumPeaks INTEGER NOT NULL, \
MzCalibration INTEGER NOT NULL, \
T1 REAL NOT NULL, \
T2 REAL NOT NULL, \
TimsCalibration INTEGER NOT NULL, \
PropertyGroup INTEGER, \
AccumulationTime REAL NOT NULL, \
RampTime REAL NOT NULL, \
Pressure REAL, \
FOREIGN KEY (MzCalibration) REFERENCES MzCalibration (Id), \
FOREIGN KEY (TimsCalibration) REFERENCES TimsCalibration (Id), \
FOREIGN KEY (PropertyGroup) REFERENCES PropertyGroups (Id) \
); \
CREATE TABLE GlobalMetadata ( \
Key TEXT PRIMARY KEY, \
Value TEXT \
); \
"

echo "Creating tables"
sqlite3 ${tdf_name} "${tdf_create}"
# Show schema
sqlite3 ${tdf_name} ".schema"

echo "DiaFrameMsMsWindowGroups >>>"
sqlite3 -cmd ".mode csv" -separator $'\t' ${tdf_name} ".import --skip 1 ${x}/dia_frame_msms_window_groups.tsv DiaFrameMsMsWindowGroups"
sqlite3 ${tdf_name} "SELECT * FROM DiaFrameMsMsWindowGroups LIMIT 5"
echo "DiaFrameMsMsWindows >>>"
sqlite3 -cmd ".mode csv" -separator $'\t' ${tdf_name} ".import --skip 1 ${x}/dia_frame_msms_windows.tsv DiaFrameMsMsWindows"
sqlite3 ${tdf_name} "SELECT * FROM DiaFrameMsMsWindows LIMIT 5"
echo "DiaFrameMsMsInfo >>>"
sqlite3 -cmd ".mode csv" -separator $'\t' ${tdf_name} ".import --skip 1 ${x}/dia_frame_msms_info.tsv DiaFrameMsMsInfo"
sqlite3 ${tdf_name} "SELECT * FROM DiaFrameMsMsInfo LIMIT 5"
echo "Frames >>>"
sqlite3 -cmd ".mode csv" -separator $'\t' ${tdf_name} ".import --skip 1 ${x}/frames.tsv Frames"
sqlite3 ${tdf_name} "SELECT * FROM Frames LIMIT 5"
echo "Global Metadata >>>"
sqlite3 -cmd ".mode csv" -separator $'\t' ${tdf_name} ".import --skip 1 ${x}/global_metadata.tsv GlobalMetadata"
sqlite3 ${tdf_name} "SELECT * FROM GlobalMetadata LIMIT 5"

echo "Creating tdf_bin"
touch ${tdf_bin_name}
# sqlite3 -separator ',' ${tdf_name} ".import ${x}/dia_frame_msms_window_groups.tsv DiaFrameMsMsWindowGroups"

done
5 changes: 5 additions & 0 deletions tests/data/diapasef_tdf/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@

# diaPASEF test data

This is a modified method that removes the intermediate window
from each window group (thus should not be used for acquisition of real data).
27 changes: 27 additions & 0 deletions tests/data/diapasef_tdf/dia_frame_msms_info.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
Frame WindowGroup
2 1
3 2
4 3
5 4
6 5
7 6
8 7
9 8
11 1
12 2
13 3
14 4
15 5
16 6
17 7
18 8
20 1
21 2
22 3
23 4
24 5
25 6
26 7
27 8
29 1
30 2
9 changes: 9 additions & 0 deletions tests/data/diapasef_tdf/dia_frame_msms_window_groups.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Id
1
2
3
4
5
6
7
8
17 changes: 17 additions & 0 deletions tests/data/diapasef_tdf/dia_frame_msms_windows.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
WindowGroup ScanNumBegin ScanNumEnd IsolationMz IsolationWidth CollisionEnergy
1 100 397 812.5 25.0 42.9829890643986
1 545 701 412.5 25.0 25.2126366950182
2 100 372 837.5 25.0 43.5516403402187
2 529 701 437.5 25.0 25.591737545565
3 100 356 862.5 25.0 43.9307411907655
3 512 701 462.5 25.0 26.0182260024301
4 100 331 887.5 25.0 44.546780072904
4 487 701 487.5 25.0 26.5868772782503
5 100 315 912.5 25.0 44.9258809234508
5 471 701 512.5 25.0 26.9659781287971
6 100 298 937.5 25.0 45.3049817739976
6 455 701 537.5 25.0 27.3450789793439
7 100 273 962.5 25.0 45.9210206561361
7 430 701 562.5 25.0 27.9611178614824
8 100 257 987.5 25.0 46.3001215066829
8 413 701 587.5 25.0 28.3402187120292
31 changes: 31 additions & 0 deletions tests/data/diapasef_tdf/frames.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
Id Time Polarity ScanMode MsMsType TimsId MaxIntensity SummedIntensities NumScans NumPeaks MzCalibration T1 T2 TimsCalibration PropertyGroup AccumulationTime RampTime Pressure Denoised
1 0.629856 + 9 0 64 5703 20584600 702 207045 1 25.6283947324288 25.2205013174081 1 1 75.007 75.007 2.38367083527434 0
2 0.711303 + 9 9 529662 353 765269 702 8629 1 25.6283947324288 25.2205013174081 1 1 75.007 75.007 2.38367083527434 0
3 0.793147 + 9 9 558593 333 810787 702 9216 1 25.6283947324288 25.2205013174081 1 1 75.007 75.007 2.38367083527434 0
4 0.874307 + 9 9 589670 352 806706 702 9166 1 25.6283947324288 25.2205013174081 1 1 75.007 75.007 2.38367083527434 0
5 0.955803 + 9 9 620635 361 870237 702 9841 1 25.6283947324288 25.2205013174081 1 1 75.007 75.007 2.38367083527434 0
6 1.037344 + 9 9 653825 339 762751 702 8683 1 25.6284306232993 25.2215316591914 1 1 75.007 75.007 2.38367411214861 0
7 1.118971 + 9 9 683862 307 215401 702 2643 1 25.6284306232993 25.2215316591914 1 1 75.007 75.007 2.38367411214861 0
8 1.200507 + 9 9 694381 285 219393 702 2698 1 25.6284306232993 25.2215316591914 1 1 75.007 75.007 2.38367411214861 0
9 1.282113 + 9 9 705082 323 217661 702 2699 1 25.6284306232993 25.2215316591914 1 1 75.007 75.007 2.38367411214861 0
10 1.363679 + 9 0 715791 4700 14455769 702 137731 1 25.6284306232993 25.2215316591914 1 1 75.007 75.007 2.38367411214861 0
11 1.445201 + 9 9 1085008 335 182136 702 2171 1 25.6284306232993 25.2215316591914 1 1 75.007 75.007 2.38367411214861 0
12 1.526763 + 9 9 1093720 265 197002 702 2385 1 25.6284306232993 25.2215316591914 1 1 75.007 75.007 2.38367411214861 0
13 1.608378 + 9 9 1103213 335 194271 702 2390 1 25.6284306232993 25.2215316591914 1 1 75.007 75.007 2.38367411214861 0
14 1.689932 + 9 9 1112731 275 213692 702 2651 1 25.6284306232993 25.2215316591914 1 1 75.007 75.007 2.38367411214861 0
15 1.771684 + 9 9 1123167 291 194091 702 2387 1 25.6284306232993 25.2215316591914 1 1 75.007 75.007 2.38367411214861 0
16 1.853082 + 9 9 1132719 279 227903 702 2734 1 25.6284306232993 25.2215316591914 1 1 75.007 75.007 2.38367411214861 0
17 1.934854 + 9 9 1143499 339 218924 702 2678 1 25.6284306232993 25.2215316591914 1 1 75.007 75.007 2.38367411214861 0
18 2.01618 + 9 9 1154098 311 219963 702 2726 1 25.6284306232993 25.2215316591914 1 1 75.007 75.007 2.38367411214861 0
19 2.098584 + 9 0 1164901 4334 14552572 702 138252 1 25.628466156872 25.2221137185281 1 1 75.007 75.007 2.38367735640121 0
20 2.179965 + 9 9 1535400 313 180439 702 2208 1 25.628466156872 25.2221137185281 1 1 75.007 75.007 2.38367735640121 0
21 2.261624 + 9 9 1544189 319 203342 702 2463 1 25.628466156872 25.2221137185281 1 1 75.007 75.007 2.38367735640121 0
22 2.343267 + 9 9 1553941 281 192025 702 2387 1 25.628466156872 25.2221137185281 1 1 75.007 75.007 2.38367735640121 0
23 2.424795 + 9 9 1563460 308 244388 702 2911 1 25.628466156872 25.2221137185281 1 1 75.007 75.007 2.38367735640121 0
24 2.506324 + 9 9 1574834 260 211498 702 2571 1 25.628466156872 25.2221137185281 1 1 75.007 75.007 2.38367735640121 0
25 2.587853 + 9 9 1585037 311 225892 702 2838 1 25.628466156872 25.2221137185281 1 1 75.007 75.007 2.38367735640121 0
26 2.669553 + 9 9 1596209 273 202152 702 2507 1 25.628466156872 25.2221137185281 1 1 75.007 75.007 2.38367735640121 0
27 2.751017 + 9 9 1606208 285 213091 702 2624 1 25.628466156872 25.2221137185281 1 1 75.007 75.007 2.38367735640121 0
28 2.832589 + 9 0 1616628 4685 14300741 702 135361 1 25.628466156872 25.2221137185281 1 1 75.007 75.007 2.38367735640121 0
29 2.914227 + 9 9 1979387 332 179155 702 2233 1 25.628466156872 25.2221137185281 1 1 75.007 75.007 2.38367735640121 0
30 2.995541 + 9 9 1988354 284 182142 702 2284 1 25.628466156872 25.2221137185281 1 1 75.007 75.007 2.38367735640121 0
6 changes: 6 additions & 0 deletions tests/data/diapasef_tdf/global_metadata.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Key Value
OneOverK0AcqRangeLower 0.600000
OneOverK0AcqRangeUpper 1.600000
MzAcqRangeLower 50.000000
MzAcqRangeUpper 1700.000000
DigitizerNumSamples 434064
5 changes: 5 additions & 0 deletions tests/data/synchropasef_tdf/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@

# Synchro Pasef

This is a section of the data from the syncho pasef manuscript.
SPECIFICALLY, I am keeping only scan numbers 1-200 (1-918 in the original data).
31 changes: 31 additions & 0 deletions tests/data/synchropasef_tdf/dia_frame_msms_info.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
Frame WindowGroup
2 1
3 2
4 3
5 4
7 1
8 2
9 3
10 4
12 1
13 2
14 3
15 4
17 1
18 2
19 3
20 4
22 1
23 2
24 3
25 4
27 1
28 2
29 3
30 4
32 1
33 2
34 3
35 4
37 1
38 2
5 changes: 5 additions & 0 deletions tests/data/synchropasef_tdf/dia_frame_msms_window_groups.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Id
1
2
3
4
Loading

0 comments on commit b70c05d

Please sign in to comment.