diff --git a/src/bio_io/mod.rs b/src/bio_io/mod.rs index 9b9d0c30..0cdb0e21 100644 --- a/src/bio_io/mod.rs +++ b/src/bio_io/mod.rs @@ -125,17 +125,14 @@ pub fn buffer_from>( BAM IO */ -/// Write to a bam file. -pub fn program_bam_writer( +pub fn program_bam_writer_from_header( out: &str, - template_bam: &bam::Reader, + mut header: bam::Header, threads: usize, program_name: &str, program_id: &str, program_version: &str, ) -> bam::Writer { - let mut header = bam::Header::from_template(template_bam.header()); - // add to the header let header_string = String::from_utf8_lossy(&header.to_bytes()).to_string(); let mut header_rec = bam::header::HeaderRecord::new(b"PG"); @@ -172,6 +169,26 @@ pub fn program_bam_writer( out } +/// Write to a bam file. +pub fn program_bam_writer( + out: &str, + template_bam: &bam::Reader, + threads: usize, + program_name: &str, + program_id: &str, + program_version: &str, +) -> bam::Writer { + let header = bam::Header::from_template(template_bam.header()); + program_bam_writer_from_header( + out, + header, + threads, + program_name, + program_id, + program_version, + ) +} + /// Open bam file pub fn bam_reader(bam: &str, threads: usize) -> bam::Reader { let mut bam = if bam == "-" { diff --git a/src/center.rs b/src/center.rs index cb52c069..52bebf38 100644 --- a/src/center.rs +++ b/src/center.rs @@ -350,7 +350,7 @@ pub fn center( } } -pub fn center_fiberdata(center_opts: &CenterOptions) -> anyhow::Result<()> { +pub fn center_fiberdata(center_opts: &mut CenterOptions) -> anyhow::Result<()> { let mut bam = center_opts.input.indexed_bam_reader(); let center_positions = center::read_center_positions(¢er_opts.bed)?; diff --git a/src/cli.rs b/src/cli.rs index e3acb545..c9165630 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -32,7 +32,7 @@ pub fn get_styles() -> clap::builder::Styles { .placeholder(placeholder_style) } -#[derive(Debug, Args, PartialEq, Eq)] +#[derive(Debug, Args)] pub struct InputBam { /// Input BAM file. If no path is provided stdin is used. For m6A prediction, this should be a HiFi bam file with kinetics data. For other commands, this should be a bam file with m6A calls. #[clap(default_value = "-", value_hint = ValueHint::AnyPath)] @@ -51,16 +51,21 @@ pub struct InputBam { pub min_ml_score: u8, #[clap(flatten)] pub global: GlobalOpts, + #[clap(skip)] + pub header: Option, } impl InputBam { - pub fn bam_reader(&self) -> bam::Reader { - bio_io::bam_reader(&self.bam, self.global.threads) + pub fn bam_reader(&mut self) -> bam::Reader { + let bam = bio_io::bam_reader(&self.bam, self.global.threads); + self.header = Some(bam::Header::from_template(bam.header())); + bam } - pub fn indexed_bam_reader(&self) -> bam::IndexedReader { + pub fn indexed_bam_reader(&mut self) -> bam::IndexedReader { let mut bam = bam::IndexedReader::from_path(&self.bam).expect("unable to open indexed bam file"); + self.header = Some(bam::Header::from_template(bam.header())); bam.set_threads(self.global.threads).unwrap(); bam } @@ -71,6 +76,24 @@ impl InputBam { fr.set_bit_flag_filter(self.bit_flag); fr } + + pub fn bam_writer(&self, out: &str) -> bam::Writer { + let v = bam::HeaderView::from_header(self.header.as_ref().expect( + "Input bam must be opened before creating a writer with the input bam as a template.", + )); + let header = bam::Header::from_template(&v); + let program_name = "fibertools-rs"; + let program_id = "ft"; + let program_version = super::VERSION; + bio_io::program_bam_writer_from_header( + out, + header, + self.global.threads, + program_name, + program_id, + program_version, + ) + } } impl std::default::Default for InputBam { @@ -80,6 +103,7 @@ impl std::default::Default for InputBam { bit_flag: 0, min_ml_score: MIN_ML_SCORE.parse().unwrap(), global: GlobalOpts::default(), + header: None, } } } @@ -194,7 +218,7 @@ pub fn make_cli_app() -> Command { Cli::command() } -#[derive(Args, Debug, PartialEq, Eq)] +#[derive(Args, Debug)] pub struct PredictM6AOptions { #[clap(flatten)] pub input: InputBam, @@ -233,7 +257,7 @@ impl std::default::Default for PredictM6AOptions { } } -#[derive(Args, Debug, PartialEq, Eq)] +#[derive(Args, Debug)] pub struct FireOptions { #[clap(flatten)] pub input: InputBam, @@ -280,7 +304,7 @@ pub struct FireOptions { pub feats_to_text: bool, } -#[derive(Args, Debug, PartialEq, Eq, Clone)] +#[derive(Args, Debug, Clone)] pub struct NucleosomeParameters { /// Minium nucleosome length #[clap(short, long, default_value = NUC_LEN)] @@ -311,7 +335,7 @@ impl std::default::Default for NucleosomeParameters { } } -#[derive(Args, Debug, PartialEq, Eq)] +#[derive(Args, Debug)] pub struct AddNucleosomeOptions { #[clap(flatten)] pub input: InputBam, @@ -334,7 +358,7 @@ pub struct DecoratorOptions { pub decorator: String, } -#[derive(Args, Debug, PartialEq, Eq)] +#[derive(Args, Debug)] pub struct FootprintOptions { #[clap(flatten)] pub input: InputBam, @@ -349,7 +373,7 @@ pub struct FootprintOptions { pub out: String, } -#[derive(Args, Debug, PartialEq, Eq)] +#[derive(Args, Debug)] pub struct CenterOptions { #[clap(flatten)] pub input: InputBam, @@ -371,7 +395,7 @@ pub struct CenterOptions { pub simplify: bool, } -#[derive(Args, Debug, PartialEq, Eq)] +#[derive(Args, Debug)] pub struct ExtractOptions { #[clap(flatten)] pub input: InputBam, @@ -409,7 +433,7 @@ pub struct ExtractOptions { pub simplify: bool, } -#[derive(Args, Debug, PartialEq, Eq)] +#[derive(Args, Debug)] pub struct ClearKineticsOptions { #[clap(flatten)] pub input: InputBam, @@ -418,7 +442,7 @@ pub struct ClearKineticsOptions { pub out: String, } -#[derive(Args, Debug, PartialEq, Eq)] +#[derive(Args, Debug)] pub struct StripBasemodsOptions { #[clap(flatten)] pub input: InputBam, @@ -437,7 +461,7 @@ pub struct CompletionOptions { pub shell: Shell, } -#[derive(Args, Debug, PartialEq, Eq)] +#[derive(Args, Debug)] pub struct PileupOptions { #[clap(flatten)] pub input: InputBam, diff --git a/src/decorator.rs b/src/decorator.rs index 2587f0f7..be71743b 100644 --- a/src/decorator.rs +++ b/src/decorator.rs @@ -210,7 +210,7 @@ pub fn decorator_from_bam(fiber: &FiberseqData) -> (String, Vec) { (bed12_from_fiber(fiber), decorators) } -pub fn get_decorators_from_bam(dec_opts: &DecoratorOptions) -> Result<(), anyhow::Error> { +pub fn get_decorators_from_bam(dec_opts: &mut DecoratorOptions) -> Result<(), anyhow::Error> { let mut bam = dec_opts.input.bam_reader(); let mut bed12 = bio_io::writer(&dec_opts.bed12)?; let mut decorator = bio_io::writer(&dec_opts.decorator)?; diff --git a/src/extract.rs b/src/extract.rs index ab48bcdc..b8f95915 100644 --- a/src/extract.rs +++ b/src/extract.rs @@ -126,7 +126,7 @@ pub fn process_bam_chunk(fiber_data: Vec, out_files: &mut FiberOut } } -pub fn extract_contained(extract_opts: &ExtractOptions) { +pub fn extract_contained(extract_opts: &mut ExtractOptions) { let mut bam = extract_opts.input.bam_reader(); let mut out_files = FiberOut::new( &extract_opts.m6a, diff --git a/src/fire.rs b/src/fire.rs index 519bc9aa..5901aac6 100644 --- a/src/fire.rs +++ b/src/fire.rs @@ -508,7 +508,7 @@ pub fn add_fire_to_rec( log::trace!("precisions: {:?}", precisions); } -pub fn add_fire_to_bam(fire_opts: &FireOptions) -> Result<(), anyhow::Error> { +pub fn add_fire_to_bam(fire_opts: &mut FireOptions) -> Result<(), anyhow::Error> { let (model, precision_table) = get_model(fire_opts); let mut bam = fire_opts.input.bam_reader(); @@ -536,7 +536,7 @@ pub fn add_fire_to_bam(fire_opts: &FireOptions) -> Result<(), anyhow::Error> { } // add FIRE prediction to bam file else { - let mut out = bam_writer(&fire_opts.out, &bam, 8); + let mut out = fire_opts.input.bam_writer(&fire_opts.out); let fibers = fire_opts.input.fibers(&mut bam); let mut skip_because_no_m6a = 0; let mut skip_because_num_msp = 0; diff --git a/src/footprint.rs b/src/footprint.rs index 5da6722a..09ffb68b 100644 --- a/src/footprint.rs +++ b/src/footprint.rs @@ -321,7 +321,7 @@ pub fn define_footprint(fiber: FiberseqData, bed_rec: CenterPosition, _modules: {} } -pub fn start_finding_footprints(opts: &FootprintOptions) -> Result<(), anyhow::Error> { +pub fn start_finding_footprints(opts: &mut FootprintOptions) -> Result<(), anyhow::Error> { let yaml_buff = bio_io::buffer_from(&opts.yaml)?; let yaml: FootprintYaml = serde_yaml::from_reader(yaml_buff)?; yaml.check_for_valid_input()?; diff --git a/src/lib.rs b/src/lib.rs index 22f45bb9..5839429f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -109,9 +109,10 @@ pub fn join_by_str_option_can_skip(vals: &[Option], sep: &str, skip_none: b } /// clear kinetics from a hifi bam -pub fn clear_kinetics(opts: &cli::ClearKineticsOptions) { +pub fn clear_kinetics(opts: &mut cli::ClearKineticsOptions) { let mut bam = opts.input.bam_reader(); - let mut out = bam_writer(&opts.out, &bam, opts.input.global.threads); + let mut out = opts.input.bam_writer(&opts.out); + //let mut out = bam_writer(&opts.out, &bam, opts.input.global.threads); let bar = bio_io::no_length_progress_bar(); for rec in bam.records() { @@ -126,7 +127,7 @@ pub fn clear_kinetics(opts: &cli::ClearKineticsOptions) { } bar.finish(); } - +/* /// Write to a bam file. pub fn bam_writer(out: &str, template_bam: &bam::Reader, threads: usize) -> bam::Writer { let program_name = "fibertools-rs"; @@ -141,6 +142,7 @@ pub fn bam_writer(out: &str, template_bam: &bam::Reader, threads: usize) -> bam: program_version, ) } + */ pub fn region_parser(rgn: &str) -> (FetchDefinition<'_>, String) { if rgn.contains(':') { diff --git a/src/main.rs b/src/main.rs index 53497f3c..d7473bf5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,7 +11,7 @@ pub fn main() -> Result<(), Error> { colored::control::set_override(true); console::set_colors_enabled(true); let pg_start = Instant::now(); - let args = cli::make_cli_parse(); + let mut args = cli::make_cli_parse(); let matches = cli::make_cli_app().get_matches(); let subcommand = matches.subcommand_name().unwrap(); @@ -45,7 +45,7 @@ pub fn main() -> Result<(), Error> { #[cfg(feature = "tch")] tch::set_num_threads(args.global.threads.try_into().unwrap()); - match &args.command { + match &mut args.command { Some(Commands::Extract(extract_opts)) => { extract::extract_contained(extract_opts); } diff --git a/src/nucleosomes.rs b/src/nucleosomes.rs index dd76fe68..77b6cd68 100644 --- a/src/nucleosomes.rs +++ b/src/nucleosomes.rs @@ -229,9 +229,9 @@ pub fn add_nucleosomes_to_record( } } -pub fn add_nucleosomes_to_bam(nuc_opts: &AddNucleosomeOptions) { +pub fn add_nucleosomes_to_bam(nuc_opts: &mut AddNucleosomeOptions) { let mut bam = nuc_opts.input.bam_reader(); - let mut out = bam_writer(&nuc_opts.out, &bam, nuc_opts.input.global.threads); + let mut out = nuc_opts.input.bam_writer(&nuc_opts.out); // read in bam data let bam_chunk_iter = BamChunk::new(bam.records(), None); diff --git a/src/pileup.rs b/src/pileup.rs index b5c660d4..8bd0fdbf 100644 --- a/src/pileup.rs +++ b/src/pileup.rs @@ -14,7 +14,7 @@ use rust_htslib::bam::{FetchDefinition, IndexedReader}; const MIN_FIRE_COVERAGE: i32 = 4; const MIN_FIRE_QUAL: u8 = 229; // floor(255*0.9) -#[derive(Debug, PartialEq)] +#[derive(Debug)] pub struct FireRow<'a> { pub coverage: &'a i32, pub fire_coverage: &'a i32, @@ -26,6 +26,18 @@ pub struct FireRow<'a> { pileup_opts: &'a PileupOptions, } +impl PartialEq for FireRow<'_> { + fn eq(&self, other: &Self) -> bool { + self.coverage == other.coverage + && self.fire_coverage == other.fire_coverage + && self.score == other.score + && self.nuc_coverage == other.nuc_coverage + && self.msp_coverage == other.msp_coverage + && self.cpg_coverage == other.cpg_coverage + && self.m6a_coverage == other.m6a_coverage + } +} + impl std::fmt::Display for FireRow<'_> { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { let mut rtn = format!( @@ -354,7 +366,7 @@ fn run_rgn( } /// extract existing fire calls into a bed9+ like file -pub fn pileup_track(pileup_opts: &PileupOptions) -> Result<(), anyhow::Error> { +pub fn pileup_track(pileup_opts: &mut PileupOptions) -> Result<(), anyhow::Error> { // read in the bam from stdin or from a file let mut bam = pileup_opts.input.indexed_bam_reader(); let header = bam.header().clone(); diff --git a/src/predict_m6a/mod.rs b/src/predict_m6a/mod.rs index ab440d48..9780407b 100644 --- a/src/predict_m6a/mod.rs +++ b/src/predict_m6a/mod.rs @@ -472,13 +472,9 @@ fn get_m6a_data_windows(record: &bam::Record) -> Option<(DataWidows, DataWidows) Some((a_data, t_data)) } -pub fn read_bam_into_fiberdata(predict_options: &PredictM6AOptions) { +pub fn read_bam_into_fiberdata(predict_options: &mut PredictM6AOptions) { let mut bam = predict_options.input.bam_reader(); - let mut out = bam_writer( - &predict_options.out, - &bam, - predict_options.input.global.threads, - ); + let mut out = predict_options.input.bam_writer(&predict_options.out); let header = bam::Header::from_template(bam.header()); // log the options log::info!( diff --git a/src/strip_basemods.rs b/src/strip_basemods.rs index 76312fec..79069d28 100644 --- a/src/strip_basemods.rs +++ b/src/strip_basemods.rs @@ -6,9 +6,9 @@ use rayon::prelude::IntoParallelRefMutIterator; use rust_htslib::bam::Read; use rust_htslib::bam::Record; -pub fn strip_base_mods(opts: &StripBasemodsOptions) { +pub fn strip_base_mods(opts: &mut StripBasemodsOptions) { let mut bam = opts.input.bam_reader(); - let mut out = bam_writer(&opts.out, &bam, opts.input.global.threads); + let mut out = opts.input.bam_writer(&opts.out); // read in bam data let bam_chunk_iter = BamChunk::new(bam.records(), None); diff --git a/tests/m6a_prediction_test.rs b/tests/m6a_prediction_test.rs index 289729f5..6bcbafb5 100644 --- a/tests/m6a_prediction_test.rs +++ b/tests/m6a_prediction_test.rs @@ -30,7 +30,7 @@ fn run_prediction_and_count_qual(inbam: String) -> usize { { // run prediction - fibertools_rs::predict_m6a::read_bam_into_fiberdata(&predict_options); + fibertools_rs::predict_m6a::read_bam_into_fiberdata(&mut predict_options); } // read in the output bam and check the sum of the quality scores