Skip to content

Commit

Permalink
feat: InputBam now can make a template for an output bam
Browse files Browse the repository at this point in the history
  • Loading branch information
mrvollger committed Jun 2, 2024
1 parent 0a2b7ee commit 01bd5f7
Show file tree
Hide file tree
Showing 14 changed files with 94 additions and 43 deletions.
27 changes: 22 additions & 5 deletions src/bio_io/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,17 +125,14 @@ pub fn buffer_from<P: AsRef<Path>>(
BAM IO
*/

/// Write to a bam file.
pub fn program_bam_writer(
pub fn program_bam_writer_from_header(
out: &str,
template_bam: &bam::Reader,
mut header: bam::Header,
threads: usize,
program_name: &str,
program_id: &str,
program_version: &str,
) -> bam::Writer {
let mut header = bam::Header::from_template(template_bam.header());

// add to the header
let header_string = String::from_utf8_lossy(&header.to_bytes()).to_string();
let mut header_rec = bam::header::HeaderRecord::new(b"PG");
Expand Down Expand Up @@ -172,6 +169,26 @@ pub fn program_bam_writer(
out
}

/// Write to a bam file.
pub fn program_bam_writer(
out: &str,
template_bam: &bam::Reader,
threads: usize,
program_name: &str,
program_id: &str,
program_version: &str,
) -> bam::Writer {
let header = bam::Header::from_template(template_bam.header());
program_bam_writer_from_header(
out,
header,
threads,
program_name,
program_id,
program_version,
)
}

/// Open bam file
pub fn bam_reader(bam: &str, threads: usize) -> bam::Reader {
let mut bam = if bam == "-" {
Expand Down
2 changes: 1 addition & 1 deletion src/center.rs
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ pub fn center(
}
}

pub fn center_fiberdata(center_opts: &CenterOptions) -> anyhow::Result<()> {
pub fn center_fiberdata(center_opts: &mut CenterOptions) -> anyhow::Result<()> {
let mut bam = center_opts.input.indexed_bam_reader();
let center_positions = center::read_center_positions(&center_opts.bed)?;

Expand Down
52 changes: 38 additions & 14 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ pub fn get_styles() -> clap::builder::Styles {
.placeholder(placeholder_style)
}

#[derive(Debug, Args, PartialEq, Eq)]
#[derive(Debug, Args)]
pub struct InputBam {
/// Input BAM file. If no path is provided stdin is used. For m6A prediction, this should be a HiFi bam file with kinetics data. For other commands, this should be a bam file with m6A calls.
#[clap(default_value = "-", value_hint = ValueHint::AnyPath)]
Expand All @@ -51,16 +51,21 @@ pub struct InputBam {
pub min_ml_score: u8,
#[clap(flatten)]
pub global: GlobalOpts,
#[clap(skip)]
pub header: Option<bam::Header>,
}

impl InputBam {
pub fn bam_reader(&self) -> bam::Reader {
bio_io::bam_reader(&self.bam, self.global.threads)
pub fn bam_reader(&mut self) -> bam::Reader {
let bam = bio_io::bam_reader(&self.bam, self.global.threads);
self.header = Some(bam::Header::from_template(bam.header()));
bam
}

pub fn indexed_bam_reader(&self) -> bam::IndexedReader {
pub fn indexed_bam_reader(&mut self) -> bam::IndexedReader {
let mut bam =
bam::IndexedReader::from_path(&self.bam).expect("unable to open indexed bam file");
self.header = Some(bam::Header::from_template(bam.header()));
bam.set_threads(self.global.threads).unwrap();
bam
}
Expand All @@ -71,6 +76,24 @@ impl InputBam {
fr.set_bit_flag_filter(self.bit_flag);
fr
}

pub fn bam_writer(&self, out: &str) -> bam::Writer {
let v = bam::HeaderView::from_header(self.header.as_ref().expect(
"Input bam must be opened before creating a writer with the input bam as a template.",
));
let header = bam::Header::from_template(&v);
let program_name = "fibertools-rs";
let program_id = "ft";
let program_version = super::VERSION;
bio_io::program_bam_writer_from_header(
out,
header,
self.global.threads,
program_name,
program_id,
program_version,
)
}
}

impl std::default::Default for InputBam {
Expand All @@ -80,6 +103,7 @@ impl std::default::Default for InputBam {
bit_flag: 0,
min_ml_score: MIN_ML_SCORE.parse().unwrap(),
global: GlobalOpts::default(),
header: None,
}
}
}
Expand Down Expand Up @@ -194,7 +218,7 @@ pub fn make_cli_app() -> Command {
Cli::command()
}

#[derive(Args, Debug, PartialEq, Eq)]
#[derive(Args, Debug)]
pub struct PredictM6AOptions {
#[clap(flatten)]
pub input: InputBam,
Expand Down Expand Up @@ -233,7 +257,7 @@ impl std::default::Default for PredictM6AOptions {
}
}

#[derive(Args, Debug, PartialEq, Eq)]
#[derive(Args, Debug)]
pub struct FireOptions {
#[clap(flatten)]
pub input: InputBam,
Expand Down Expand Up @@ -280,7 +304,7 @@ pub struct FireOptions {
pub feats_to_text: bool,
}

#[derive(Args, Debug, PartialEq, Eq, Clone)]
#[derive(Args, Debug, Clone)]
pub struct NucleosomeParameters {
/// Minium nucleosome length
#[clap(short, long, default_value = NUC_LEN)]
Expand Down Expand Up @@ -311,7 +335,7 @@ impl std::default::Default for NucleosomeParameters {
}
}

#[derive(Args, Debug, PartialEq, Eq)]
#[derive(Args, Debug)]
pub struct AddNucleosomeOptions {
#[clap(flatten)]
pub input: InputBam,
Expand All @@ -334,7 +358,7 @@ pub struct DecoratorOptions {
pub decorator: String,
}

#[derive(Args, Debug, PartialEq, Eq)]
#[derive(Args, Debug)]
pub struct FootprintOptions {
#[clap(flatten)]
pub input: InputBam,
Expand All @@ -349,7 +373,7 @@ pub struct FootprintOptions {
pub out: String,
}

#[derive(Args, Debug, PartialEq, Eq)]
#[derive(Args, Debug)]
pub struct CenterOptions {
#[clap(flatten)]
pub input: InputBam,
Expand All @@ -371,7 +395,7 @@ pub struct CenterOptions {
pub simplify: bool,
}

#[derive(Args, Debug, PartialEq, Eq)]
#[derive(Args, Debug)]
pub struct ExtractOptions {
#[clap(flatten)]
pub input: InputBam,
Expand Down Expand Up @@ -409,7 +433,7 @@ pub struct ExtractOptions {
pub simplify: bool,
}

#[derive(Args, Debug, PartialEq, Eq)]
#[derive(Args, Debug)]
pub struct ClearKineticsOptions {
#[clap(flatten)]
pub input: InputBam,
Expand All @@ -418,7 +442,7 @@ pub struct ClearKineticsOptions {
pub out: String,
}

#[derive(Args, Debug, PartialEq, Eq)]
#[derive(Args, Debug)]
pub struct StripBasemodsOptions {
#[clap(flatten)]
pub input: InputBam,
Expand All @@ -437,7 +461,7 @@ pub struct CompletionOptions {
pub shell: Shell,
}

#[derive(Args, Debug, PartialEq, Eq)]
#[derive(Args, Debug)]
pub struct PileupOptions {
#[clap(flatten)]
pub input: InputBam,
Expand Down
2 changes: 1 addition & 1 deletion src/decorator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ pub fn decorator_from_bam(fiber: &FiberseqData) -> (String, Vec<Decorator>) {
(bed12_from_fiber(fiber), decorators)
}

pub fn get_decorators_from_bam(dec_opts: &DecoratorOptions) -> Result<(), anyhow::Error> {
pub fn get_decorators_from_bam(dec_opts: &mut DecoratorOptions) -> Result<(), anyhow::Error> {
let mut bam = dec_opts.input.bam_reader();
let mut bed12 = bio_io::writer(&dec_opts.bed12)?;
let mut decorator = bio_io::writer(&dec_opts.decorator)?;
Expand Down
2 changes: 1 addition & 1 deletion src/extract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ pub fn process_bam_chunk(fiber_data: Vec<FiberseqData>, out_files: &mut FiberOut
}
}

pub fn extract_contained(extract_opts: &ExtractOptions) {
pub fn extract_contained(extract_opts: &mut ExtractOptions) {
let mut bam = extract_opts.input.bam_reader();
let mut out_files = FiberOut::new(
&extract_opts.m6a,
Expand Down
4 changes: 2 additions & 2 deletions src/fire.rs
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,7 @@ pub fn add_fire_to_rec(
log::trace!("precisions: {:?}", precisions);
}

pub fn add_fire_to_bam(fire_opts: &FireOptions) -> Result<(), anyhow::Error> {
pub fn add_fire_to_bam(fire_opts: &mut FireOptions) -> Result<(), anyhow::Error> {
let (model, precision_table) = get_model(fire_opts);
let mut bam = fire_opts.input.bam_reader();

Expand Down Expand Up @@ -536,7 +536,7 @@ pub fn add_fire_to_bam(fire_opts: &FireOptions) -> Result<(), anyhow::Error> {
}
// add FIRE prediction to bam file
else {
let mut out = bam_writer(&fire_opts.out, &bam, 8);
let mut out = fire_opts.input.bam_writer(&fire_opts.out);
let fibers = fire_opts.input.fibers(&mut bam);
let mut skip_because_no_m6a = 0;
let mut skip_because_num_msp = 0;
Expand Down
2 changes: 1 addition & 1 deletion src/footprint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ pub fn define_footprint(fiber: FiberseqData, bed_rec: CenterPosition, _modules:
{}
}

pub fn start_finding_footprints(opts: &FootprintOptions) -> Result<(), anyhow::Error> {
pub fn start_finding_footprints(opts: &mut FootprintOptions) -> Result<(), anyhow::Error> {
let yaml_buff = bio_io::buffer_from(&opts.yaml)?;
let yaml: FootprintYaml = serde_yaml::from_reader(yaml_buff)?;
yaml.check_for_valid_input()?;
Expand Down
8 changes: 5 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,10 @@ pub fn join_by_str_option_can_skip(vals: &[Option<i64>], sep: &str, skip_none: b
}

/// clear kinetics from a hifi bam
pub fn clear_kinetics(opts: &cli::ClearKineticsOptions) {
pub fn clear_kinetics(opts: &mut cli::ClearKineticsOptions) {
let mut bam = opts.input.bam_reader();
let mut out = bam_writer(&opts.out, &bam, opts.input.global.threads);
let mut out = opts.input.bam_writer(&opts.out);
//let mut out = bam_writer(&opts.out, &bam, opts.input.global.threads);

let bar = bio_io::no_length_progress_bar();
for rec in bam.records() {
Expand All @@ -126,7 +127,7 @@ pub fn clear_kinetics(opts: &cli::ClearKineticsOptions) {
}
bar.finish();
}

/*
/// Write to a bam file.
pub fn bam_writer(out: &str, template_bam: &bam::Reader, threads: usize) -> bam::Writer {
let program_name = "fibertools-rs";
Expand All @@ -141,6 +142,7 @@ pub fn bam_writer(out: &str, template_bam: &bam::Reader, threads: usize) -> bam:
program_version,
)
}
*/

pub fn region_parser(rgn: &str) -> (FetchDefinition<'_>, String) {
if rgn.contains(':') {
Expand Down
4 changes: 2 additions & 2 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ pub fn main() -> Result<(), Error> {
colored::control::set_override(true);
console::set_colors_enabled(true);
let pg_start = Instant::now();
let args = cli::make_cli_parse();
let mut args = cli::make_cli_parse();
let matches = cli::make_cli_app().get_matches();
let subcommand = matches.subcommand_name().unwrap();

Expand Down Expand Up @@ -45,7 +45,7 @@ pub fn main() -> Result<(), Error> {
#[cfg(feature = "tch")]
tch::set_num_threads(args.global.threads.try_into().unwrap());

match &args.command {
match &mut args.command {
Some(Commands::Extract(extract_opts)) => {
extract::extract_contained(extract_opts);
}
Expand Down
4 changes: 2 additions & 2 deletions src/nucleosomes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -229,9 +229,9 @@ pub fn add_nucleosomes_to_record(
}
}

pub fn add_nucleosomes_to_bam(nuc_opts: &AddNucleosomeOptions) {
pub fn add_nucleosomes_to_bam(nuc_opts: &mut AddNucleosomeOptions) {
let mut bam = nuc_opts.input.bam_reader();
let mut out = bam_writer(&nuc_opts.out, &bam, nuc_opts.input.global.threads);
let mut out = nuc_opts.input.bam_writer(&nuc_opts.out);

// read in bam data
let bam_chunk_iter = BamChunk::new(bam.records(), None);
Expand Down
16 changes: 14 additions & 2 deletions src/pileup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use rust_htslib::bam::{FetchDefinition, IndexedReader};
const MIN_FIRE_COVERAGE: i32 = 4;
const MIN_FIRE_QUAL: u8 = 229; // floor(255*0.9)

#[derive(Debug, PartialEq)]
#[derive(Debug)]
pub struct FireRow<'a> {
pub coverage: &'a i32,
pub fire_coverage: &'a i32,
Expand All @@ -26,6 +26,18 @@ pub struct FireRow<'a> {
pileup_opts: &'a PileupOptions,
}

impl PartialEq for FireRow<'_> {
fn eq(&self, other: &Self) -> bool {
self.coverage == other.coverage
&& self.fire_coverage == other.fire_coverage
&& self.score == other.score
&& self.nuc_coverage == other.nuc_coverage
&& self.msp_coverage == other.msp_coverage
&& self.cpg_coverage == other.cpg_coverage
&& self.m6a_coverage == other.m6a_coverage
}
}

impl std::fmt::Display for FireRow<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let mut rtn = format!(
Expand Down Expand Up @@ -354,7 +366,7 @@ fn run_rgn(
}

/// extract existing fire calls into a bed9+ like file
pub fn pileup_track(pileup_opts: &PileupOptions) -> Result<(), anyhow::Error> {
pub fn pileup_track(pileup_opts: &mut PileupOptions) -> Result<(), anyhow::Error> {
// read in the bam from stdin or from a file
let mut bam = pileup_opts.input.indexed_bam_reader();
let header = bam.header().clone();
Expand Down
8 changes: 2 additions & 6 deletions src/predict_m6a/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -472,13 +472,9 @@ fn get_m6a_data_windows(record: &bam::Record) -> Option<(DataWidows, DataWidows)
Some((a_data, t_data))
}

pub fn read_bam_into_fiberdata(predict_options: &PredictM6AOptions) {
pub fn read_bam_into_fiberdata(predict_options: &mut PredictM6AOptions) {
let mut bam = predict_options.input.bam_reader();
let mut out = bam_writer(
&predict_options.out,
&bam,
predict_options.input.global.threads,
);
let mut out = predict_options.input.bam_writer(&predict_options.out);
let header = bam::Header::from_template(bam.header());
// log the options
log::info!(
Expand Down
4 changes: 2 additions & 2 deletions src/strip_basemods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ use rayon::prelude::IntoParallelRefMutIterator;
use rust_htslib::bam::Read;
use rust_htslib::bam::Record;

pub fn strip_base_mods(opts: &StripBasemodsOptions) {
pub fn strip_base_mods(opts: &mut StripBasemodsOptions) {
let mut bam = opts.input.bam_reader();
let mut out = bam_writer(&opts.out, &bam, opts.input.global.threads);
let mut out = opts.input.bam_writer(&opts.out);
// read in bam data
let bam_chunk_iter = BamChunk::new(bam.records(), None);

Expand Down
2 changes: 1 addition & 1 deletion tests/m6a_prediction_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ fn run_prediction_and_count_qual(inbam: String) -> usize {

{
// run prediction
fibertools_rs::predict_m6a::read_bam_into_fiberdata(&predict_options);
fibertools_rs::predict_m6a::read_bam_into_fiberdata(&mut predict_options);
}

// read in the output bam and check the sum of the quality scores
Expand Down

0 comments on commit 01bd5f7

Please sign in to comment.