From 68550723237be3674783a86d11b3277ec09339d3 Mon Sep 17 00:00:00 2001 From: "Mitchell R. Vollger" Date: Fri, 27 Oct 2023 14:54:44 -0700 Subject: [PATCH] feat: add simplify options to center --- src/center.rs | 12 +++++++++++- src/cli.rs | 3 +++ src/main.rs | 2 ++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/center.rs b/src/center.rs index 2cfd4173..954cbbd7 100644 --- a/src/center.rs +++ b/src/center.rs @@ -22,6 +22,7 @@ pub struct CenteredFiberData { center_position: CenterPosition, pub offset: i64, pub reference: bool, + pub simplify: bool, } impl CenteredFiberData { @@ -30,6 +31,7 @@ impl CenteredFiberData { center_position: CenterPosition, dist: Option, reference: bool, + simplify: bool, ) -> Option { let (ref_offset, mol_offset) = CenteredFiberData::find_offsets(&fiber.record, ¢er_position); @@ -43,6 +45,7 @@ impl CenteredFiberData { center_position, offset, reference, + simplify, }) } /// find both the ref and mol offsets @@ -75,6 +78,9 @@ impl CenteredFiberData { /// Get the sequence pub fn subset_sequence(&self) -> String { + if self.simplify { + return "N".to_string(); + } let dist = if let Some(dist) = self.dist { dist } else { 0 }; let seq = self.fiber.record.seq().as_bytes(); @@ -289,6 +295,7 @@ pub fn center( wide: bool, dist: Option, reference: bool, + simplify: bool, buffer: &mut Box, ) { let fiber_data = FiberseqData::from_records(records, header_view, min_ml_score); @@ -298,7 +305,8 @@ pub fn center( fiber_data .into_par_iter() .map(|fiber| { - match CenteredFiberData::new(fiber, center_position.clone(), dist, reference) { + match CenteredFiberData::new(fiber, center_position.clone(), dist, reference, simplify) + { Some(centered_fiber) => { if wide { centered_fiber.write() @@ -335,6 +343,7 @@ pub fn center_fiberdata( wide: bool, dist: Option, reference: bool, + simplify: bool, ) { // header needed for the contig name... let header = bam::Header::from_template(bam.header()); @@ -378,6 +387,7 @@ pub fn center_fiberdata( wide, dist, reference, + simplify, &mut buffer, ); pb.inc(1); diff --git a/src/cli.rs b/src/cli.rs index 1b113245..e5cf4ef9 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -231,6 +231,9 @@ pub enum Commands { /// Return relative reference position instead of relative molecular position #[clap(short, long)] reference: bool, + /// Replace the sequence output column with just "N". + #[clap(short, long)] + simplify: bool, }, /// Remove HiFi kinetics tags from the input bam file ClearKinetics { diff --git a/src/main.rs b/src/main.rs index b6adcacd..78c13378 100644 --- a/src/main.rs +++ b/src/main.rs @@ -106,6 +106,7 @@ pub fn main() -> Result<(), Error> { dist, wide, reference, + simplify, }) => { // read in the bam from stdin or from a file let mut bam = bam::IndexedReader::from_path(bam)?; @@ -118,6 +119,7 @@ pub fn main() -> Result<(), Error> { *wide, *dist, *reference, + *simplify, ); } #[cfg(feature = "predict")]