Skip to content

Commit

Permalink
start a qc command set
Browse files Browse the repository at this point in the history
  • Loading branch information
mrvollger committed Aug 19, 2024
1 parent caee220 commit b40b8a5
Show file tree
Hide file tree
Showing 8 changed files with 127 additions and 11 deletions.
2 changes: 2 additions & 0 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ mod footprint_opts;
mod nucleosome_opts;
mod pileup_opts;
mod predict_opts;
mod qc_opts;
mod strip_basemods_opts;

// include the subcommand modules as top level functions and structs in the cli module
Expand All @@ -27,6 +28,7 @@ pub use footprint_opts::*;
pub use nucleosome_opts::*;
pub use pileup_opts::*;
pub use predict_opts::*;
pub use qc_opts::*;
pub use strip_basemods_opts::*;

//
Expand Down
9 changes: 9 additions & 0 deletions src/cli/qc_opts.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
use crate::utils::input_bam::InputBam;
use clap::Args;
use std::fmt::Debug;

#[derive(Args, Debug)]
pub struct QcOpts {
#[clap(flatten)]
pub input: InputBam,
}
2 changes: 2 additions & 0 deletions src/subcommands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,7 @@ pub mod footprint;
pub mod pileup;
/// m6A prediction
pub mod predict_m6a;
/// Collect QC metrics
pub mod qc;
/// Remove base modifications from a bam record
pub mod strip_basemods;
108 changes: 108 additions & 0 deletions src/subcommands/qc.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
use crate::cli::QcOpts;
use crate::fiber;
use anyhow::Result;
use ordered_float::OrderedFloat;
use std::collections::HashMap;

pub struct QcStats {
pub fiber_count: i64,
// hashmap that stores lengths of fibers
pub fiber_lengths: HashMap<i64, i64>,
// length of msps
pub msp_lengths: HashMap<i64, i64>,
// lengths of nucleosomes
pub nuc_lengths: HashMap<i64, i64>,
// number of ccs passes per read
pub ccs_passes: HashMap<OrderedFloat<f32>, i64>,
/// m6as per read
pub m6a_count: HashMap<i64, i64>,
// m6as over total AT count
pub m6a_ratio: HashMap<OrderedFloat<f32>, i64>,
// add rq to stats
pub rq: HashMap<OrderedFloat<f32>, i64>,
}

impl QcStats {
pub fn new() -> Self {
Self {
fiber_count: 0,
fiber_lengths: HashMap::new(),
msp_lengths: HashMap::new(),
nuc_lengths: HashMap::new(),
ccs_passes: HashMap::new(),
m6a_count: HashMap::new(),
m6a_ratio: HashMap::new(),
rq: HashMap::new(),
}
}

pub fn add_read_to_stats(&mut self, fiber: &fiber::FiberseqData) {
self.full_read_stats(fiber);
self.add_m6a_stats(fiber);
Self::add_range_lengths(&mut self.msp_lengths, &fiber.msp);
Self::add_range_lengths(&mut self.nuc_lengths, &fiber.nuc);
}

fn full_read_stats(&mut self, fiber: &fiber::FiberseqData) {
self.fiber_count += 1;
self.fiber_lengths
.entry(fiber.record.seq_len() as i64)
.and_modify(|e| *e += 1)
.or_insert(1);
self.ccs_passes
.entry(OrderedFloat(fiber.ec))
.and_modify(|e| *e += 1)
.or_insert(1);
if let Some(rq) = fiber.get_rq() {
self.rq
.entry(OrderedFloat(rq))
.and_modify(|e| *e += 1)
.or_insert(1);
}
}

fn add_m6a_stats(&mut self, fiber: &fiber::FiberseqData) {
let m6a_count = fiber.m6a.starts.len() as i64;
self.m6a_count
.entry(m6a_count)
.and_modify(|e| *e += 1)
.or_insert(1);

// now get the ratio
let total_at = fiber
.record
.seq()
.as_bytes()
.iter()
.filter(|&b| *b == b'A' || *b == b'T')
.count();

let ratio = m6a_count as f32 / total_at as f32;
self.m6a_ratio
.entry(OrderedFloat(ratio))
.and_modify(|e| *e += 1)
.or_insert(1);
}

fn add_range_lengths(hashmap: &mut HashMap<i64, i64>, range: &crate::utils::bamranges::Ranges) {
for r in range.lengths.iter().flatten() {
hashmap.entry(*r).and_modify(|e| *e += 1).or_insert(1);
}
}
}

impl Default for QcStats {
fn default() -> Self {
Self::new()
}
}

pub fn run_qc(opts: &mut QcOpts) -> Result<(), anyhow::Error> {
let mut bam = opts.input.bam_reader();
let mut stats = QcStats::default();

for fiber in opts.input.fibers(&mut bam) {
stats.add_read_to_stats(&fiber);
}
Ok(())
}
3 changes: 1 addition & 2 deletions tests/basemods.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use env_logger::{Builder, Target};
use fibertools_rs::utils::basemods::*;
use log;
use rust_htslib::{bam, bam::Read};

#[test]
Expand All @@ -11,7 +10,7 @@ fn test_mods_do_not_change() {
.target(Target::Stderr)
.filter(None, log::LevelFilter::Debug)
.init();
let mut bam = bam::Reader::from_path(&"tests/data/all.bam").unwrap();
let mut bam = bam::Reader::from_path("tests/data/all.bam").unwrap();
for rec in bam.records() {
let mut rec = rec.unwrap();
let mods = BaseMods::new(&rec, 0);
Expand Down
3 changes: 1 addition & 2 deletions tests/bio_io.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
use fibertools_rs::utils::bio_io;
use log;
use rust_htslib::{bam, bam::Read};

#[test]
pub fn test_msp_nuc_tags() {
let mut bam = bam::Reader::from_path(&"tests/data/all.bam").unwrap();
let mut bam = bam::Reader::from_path("tests/data/all.bam").unwrap();
for record in bam.records() {
let record = record.unwrap();
let _n_s = bio_io::get_u32_tag(&record, b"ns");
Expand Down
8 changes: 3 additions & 5 deletions tests/extract_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@ fn get_fiber_data_from_test_bam(bam_file: &str) -> Vec<fibertools_rs::fiber::Fib
bam.records()
.map(|r| {
let record = r.unwrap();
let fiber_data =
fibertools_rs::fiber::FiberseqData::new(record, None, &FiberFilters::default());
fiber_data

fibertools_rs::fiber::FiberseqData::new(record, None, &FiberFilters::default())
})
.collect::<Vec<_>>()
}
Expand Down Expand Up @@ -44,8 +43,7 @@ fn test_many_msps() {
.msp
.starts
.iter()
.flatten()
.map(|&x| x)
.flatten().copied()
.chain(fiber_data.msp.ends.iter().flatten().map(|&x| x - 1))
.collect::<Vec<_>>();
eprintln!("m6a: {:?}", m6a);
Expand Down
3 changes: 1 addition & 2 deletions tests/m6a_prediction_test.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use fibertools_rs;
use fibertools_rs::utils::bio_io;
use fibertools_rs::utils::input_bam::FiberFilters;
use rust_htslib::bam::Reader;
Expand All @@ -25,7 +24,7 @@ fn run_prediction_and_count_qual(inbam: String) -> usize {
let named_tmp_bam_out = NamedTempFile::new().unwrap();
let out_str = named_tmp_bam_out.path().to_str().unwrap();
let mut predict_options = fibertools_rs::cli::PredictM6AOptions::default();
predict_options.input.bam = inbam.clone();
predict_options.input.bam.clone_from(&inbam);
predict_options.input.global.threads = 1;
predict_options.out = out_str.to_string();

Expand Down

0 comments on commit b40b8a5

Please sign in to comment.