Skip to content

Commit

Permalink
contig: Add strobealign-aemb method.
Browse files Browse the repository at this point in the history
  • Loading branch information
wwood committed Sep 28, 2024
1 parent 2cf3baa commit 6b39794
Show file tree
Hide file tree
Showing 12 changed files with 162 additions and 50 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ galah = "0.4.0"
bird_tool_utils-man = "0.4.0"
roff = "0.2.*"
needletail = "0.5.*"
csv = "1.*"

[dev-dependencies]
assert_cli = "0.6.*"
Expand Down
56 changes: 28 additions & 28 deletions src/bam_generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,28 @@ impl NamedBamReaderGenerator<StreamingNamedBamReader> for StreamingNamedBamReade
}
}

pub fn name_stoit(
index_path: &str,
read1_path: &str,
include_reference_in_stoit_name: bool,
) -> String {
(match include_reference_in_stoit_name {
true => (std::path::Path::new(&index_path)
.file_name()
.expect("Unable to convert reference to file name")
.to_str()
.expect("Unable to covert file name into str")
.to_string()
+ "/")
.to_string(),
false => "".to_string(),
}) + std::path::Path::new(read1_path)
.file_name()
.expect("Unable to convert read1 name to file name")
.to_str()
.expect("Unable to covert file name into str")
}

pub fn complete_processes(
processes: Vec<std::process::Child>,
command_strings: Vec<String>,
Expand Down Expand Up @@ -412,22 +434,11 @@ pub fn generate_named_bam_readers_from_reads(
log_files.push(samtools_view_cache_log);
}

let stoit_name = match include_reference_in_stoit_name {
true => {
std::path::Path::new(&index.index_path())
.file_name()
.expect("Unable to convert reference to file name")
.to_str()
.expect("Unable to covert file name into str")
.to_string()
+ "/"
}
false => "".to_string(),
} + std::path::Path::new(read1_path)
.file_name()
.expect("Unable to convert read1 name to file name")
.to_str()
.expect("Unable to covert file name into str");
let stoit_name = name_stoit(
index.index_path(),
read1_path,
include_reference_in_stoit_name,
);

StreamingNamedBamReaderGenerator {
stoit_name,
Expand Down Expand Up @@ -797,18 +808,7 @@ pub fn generate_bam_maker_generator_from_reads(
let log_files = vec![mapping_log, samtools2_log, samtools_view_cache_log];

return NamedBamMakerGenerator {
stoit_name: std::path::Path::new(index.index_path())
.file_name()
.expect("Unable to convert reference to file name")
.to_str()
.expect("Unable to covert file name into str")
.to_string()
+ "/"
+ std::path::Path::new(read1_path)
.file_name()
.expect("Unable to convert read1 name to file name")
.to_str()
.expect("Unable to covert file name into str"),
stoit_name: name_stoit(index.index_path(), read1_path, true),
pre_processes: vec![cmd],
command_strings: vec![format!("bash -c \"{}\"", cmd_string)],
log_file_descriptions: log_descriptions,
Expand Down
27 changes: 26 additions & 1 deletion src/bin/coverm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use coverm::mapping_index_maintenance::check_reference_existence;
use coverm::mapping_parameters::*;
use coverm::mosdepth_genome_coverage_estimators::*;
use coverm::shard_bam_reader::*;
use coverm::strobealign_aemb::strobealign_aemb_coverage;
use coverm::FlagFilter;
use coverm::OutputWriter;
use coverm::CONCATENATED_FASTA_FILE_SEPARATOR;
Expand Down Expand Up @@ -505,7 +506,23 @@ fn main() {
estimators_and_taker =
estimators_and_taker.print_headers("Contig", print_stream.clone());

if m.contains_id("bam-files") {
if let CoverageEstimator::StrobealignAembEstimator {} =
estimators_and_taker.estimators[0]
{
let mapping_params =
MappingParameters::generate_from_clap(m, MappingProgram::STROBEALIGN, &None);
debug!(
"Running strobealign-aemb coverage with mapping parameters: {:?}",
mapping_params
);
strobealign_aemb_coverage(
mapping_params,
&mut estimators_and_taker.taker,
threads,
&mut estimators_and_taker.printer,
&mut print_stream,
);
} else if m.contains_id("bam-files") {
let bam_files: Vec<&str> = m
.get_many::<String>("bam-files")
.unwrap()
Expand Down Expand Up @@ -1029,6 +1046,13 @@ impl EstimatorsAndTaker {
"reads_per_base" => {
estimators.push(CoverageEstimator::new_estimator_reads_per_base());
}
"strobealign-aemb" => {
if methods.len() > 1 {
error!("Cannot (currently) specify the strobealign-aemb method with any other coverage methods");
process::exit(1);
}
estimators.push(CoverageEstimator::new_estimator_strobealign_aemb());
}
_ => unreachable!(),
};
}
Expand Down Expand Up @@ -1086,6 +1110,7 @@ impl EstimatorsAndTaker {
CoverageEstimator::ReadCountCalculator { .. } => die("counts"),
CoverageEstimator::ReferenceLengthCalculator { .. } => die("length"),
CoverageEstimator::ReadsPerBaseCalculator { .. } => die("reads_per_base"),
CoverageEstimator::StrobealignAembEstimator { .. } => die("strobealign-aemb"),
_ => {}
}
}
Expand Down
5 changes: 4 additions & 1 deletion src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,9 @@ pub fn contig_full_help() -> Manual {
&[&monospace_roff("trimmed_mean"), &format!("Average number of aligned reads overlapping each position after removing the most deeply and shallow-ly covered positions. See {}/{} to adjust.",
&monospace_roff("--trim-min"),
&monospace_roff("--trim-max"))],

&[&monospace_roff("strobealign-aemb"),
&format!("Mean coverage as estimated by strobealign --aemb, which is faster than the {} method, giving similar but not identical values. See https://github.com/ksahlin/strobealign for details. Cannot currently be used with other methods.",
&monospace_roff("mean"))],
&[&monospace_roff("coverage_histogram"), "Histogram of coverage depths"],
&[&monospace_roff("covered_fraction"), "Proportion of bases covered by 1 or more reads"],
&[&monospace_roff("covered_bases"), "Number of bases covered by 1 or more reads"],
Expand Down Expand Up @@ -1747,6 +1749,7 @@ Ben J. Woodcroft <benjwoodcroft near gmail.com>
"reads_per_base",
"rpkm",
"tpm",
"strobealign-aemb",
])
.default_value("mean")
.action(clap::ArgAction::Append)
Expand Down
1 change: 0 additions & 1 deletion src/contig.rs
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,6 @@ mod tests {
use mapping_index_maintenance::VanillaIndexStruct;
use mapping_parameters::*;
use shard_bam_reader::*;
use std::io::Read;
use std::str;
use OutputWriter;

Expand Down
1 change: 0 additions & 1 deletion src/coverage_printer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,6 @@ pub fn print_dense_cached_coverage_taker(
mod tests {
use super::*;
use std::io::Cursor;
use std::io::Read;
use std::str;
use OutputWriter;

Expand Down
1 change: 0 additions & 1 deletion src/genome.rs
Original file line number Diff line number Diff line change
Expand Up @@ -923,7 +923,6 @@ mod tests {
use rust_htslib::bam::Read;
use shard_bam_reader::*;
use std::collections::HashSet;
use std::io::Read as _;
use OutputWriter;

fn test_streaming_with_stream<R: NamedBamReader, G: NamedBamReaderGenerator<R>>(
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ extern crate clap_complete;
extern crate lazy_static;
extern crate bird_tool_utils;
extern crate bird_tool_utils_man;
extern crate csv;
extern crate galah;
extern crate needletail;
extern crate roff;
Expand Down
4 changes: 3 additions & 1 deletion src/mapping_parameters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@ use tempfile::NamedTempFile;
use bam_generator::MappingProgram;
use mapping_index_maintenance::check_reference_existence;

#[derive(Clone)]
#[derive(Clone, Debug)]
pub enum ReadFormat {
Coupled,
Interleaved,
Single,
}

#[derive(Debug)]
pub struct MappingParameters<'a> {
references: Vec<&'a str>,
threads: u16,
Expand Down Expand Up @@ -266,6 +267,7 @@ impl<'a> Iterator for SingleReferenceMappingParameters<'a> {
}
}

#[derive(Debug)]
pub struct OneSampleMappingParameters<'a> {
pub reference: &'a str,
pub read_format: ReadFormat,
Expand Down
20 changes: 18 additions & 2 deletions src/mosdepth_genome_coverage_estimators.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ pub enum CoverageEstimator {
observed_contig_length: u64,
num_mapped_reads: u64,
},
StrobealignAembEstimator {},
}

impl CoverageEstimator {
Expand All @@ -93,6 +94,7 @@ impl CoverageEstimator {
CoverageEstimator::ReferenceLengthCalculator { .. } => vec!["Length"],
CoverageEstimator::ReadCountCalculator { .. } => vec!["Read Count"],
CoverageEstimator::ReadsPerBaseCalculator { .. } => vec!["Reads per base"],
CoverageEstimator::StrobealignAembEstimator { .. } => vec!["Strobealign aemb"],
}
}
}
Expand Down Expand Up @@ -206,6 +208,9 @@ impl CoverageEstimator {
num_mapped_reads: 0,
}
}
pub fn new_estimator_strobealign_aemb() -> CoverageEstimator {
CoverageEstimator::StrobealignAembEstimator {}
}

fn calculate_unobserved_bases(
unobserved_contig_lengths: &[u64],
Expand Down Expand Up @@ -330,6 +335,7 @@ impl MosdepthGenomeCoverageEstimator for CoverageEstimator {
} => {
*num_mapped_reads = 0;
}
CoverageEstimator::StrobealignAembEstimator { .. } => panic!("Programming error"),
}
}

Expand Down Expand Up @@ -489,6 +495,7 @@ impl MosdepthGenomeCoverageEstimator for CoverageEstimator {
} => {
*num_mapped_reads += num_mapped_reads_in_contig;
}
CoverageEstimator::StrobealignAembEstimator { .. } => unreachable!(),
}
}

Expand Down Expand Up @@ -798,6 +805,7 @@ impl MosdepthGenomeCoverageEstimator for CoverageEstimator {
/ (*observed_contig_length + unobserved_contig_lengths.iter().sum::<u64>())
as f32
}
CoverageEstimator::StrobealignAembEstimator {} => unreachable!(),
}
}

Expand Down Expand Up @@ -887,6 +895,9 @@ impl MosdepthGenomeCoverageEstimator for CoverageEstimator {
CoverageEstimator::ReadsPerBaseCalculator { .. } => {
CoverageEstimator::new_estimator_reads_per_base()
}
CoverageEstimator::StrobealignAembEstimator { .. } => {
CoverageEstimator::new_estimator_strobealign_aemb()
}
}
}

Expand All @@ -901,7 +912,8 @@ impl MosdepthGenomeCoverageEstimator for CoverageEstimator {
| CoverageEstimator::VarianceGenomeCoverageEstimator { .. }
| CoverageEstimator::ReferenceLengthCalculator { .. }
| CoverageEstimator::ReadCountCalculator { .. }
| CoverageEstimator::ReadsPerBaseCalculator { .. } => {
| CoverageEstimator::ReadsPerBaseCalculator { .. }
| CoverageEstimator::StrobealignAembEstimator { .. } => {
coverage_taker.add_single_coverage(coverage);
}
CoverageEstimator::PileupCountsGenomeCoverageEstimator { counts, .. } => {
Expand Down Expand Up @@ -933,7 +945,8 @@ impl MosdepthGenomeCoverageEstimator for CoverageEstimator {
| CoverageEstimator::TPMCoverageEstimator { .. }
| CoverageEstimator::VarianceGenomeCoverageEstimator { .. }
| CoverageEstimator::ReadCountCalculator { .. }
| CoverageEstimator::ReadsPerBaseCalculator { .. } => {
| CoverageEstimator::ReadsPerBaseCalculator { .. }
| CoverageEstimator::StrobealignAembEstimator { .. } => {
coverage_taker.add_single_coverage(0.0);
}
CoverageEstimator::PileupCountsGenomeCoverageEstimator { .. } => {}
Expand Down Expand Up @@ -1006,6 +1019,9 @@ impl MosdepthGenomeCoverageEstimator for CoverageEstimator {
observed_contig_length: _,
num_mapped_reads,
} => *num_mapped_reads,
CoverageEstimator::StrobealignAembEstimator { .. } => {
panic!("Strobealign AEMB does not calculate number of mapped reads")
}
}
}
}
61 changes: 51 additions & 10 deletions src/strobealign_aemb.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,31 @@
use crate::bam_generator::complete_processes;
use crate::coverage_takers::*;
use crate::bam_generator::{complete_processes, name_stoit};
use crate::mapping_parameters::MappingParameters;
use crate::mosdepth_genome_coverage_estimators::*;
use crate::ReadsMapped;
use crate::{coverage_printer, coverage_takers::*, OutputWriter};
// use crate::mosdepth_genome_coverage_estimators::*;

pub fn strobealign_aemb_coverage<T: CoverageTaker>(
use csv;

struct MappingResultStruct {
tmpfile: tempfile::NamedTempFile,
stoit_name: String,
}

pub fn strobealign_aemb_coverage(
mapping_parameters: MappingParameters,
_coverage_taker: &mut T,
_coverage_estimators: &mut [CoverageEstimator],
coverage_taker: &mut CoverageTakerType,
// _coverage_estimators: &mut [CoverageEstimator],
// print_zero_coverage_contigs: bool,
threads: u16,
) -> Vec<ReadsMapped> {
coverage_printer: &mut coverage_printer::CoveragePrinter,
print_stream: &mut OutputWriter,
) {
let mut commands = vec![];
let mut log_files = vec![];

let mut mapping_results: Vec<MappingResultStruct> = vec![];
for p1 in mapping_parameters {
for p2 in p1 {
debug!("Processing mapping parameters: {:?}", p2);
let mapping_result = tempfile::Builder::new()
.prefix("coverm-strobealign-aemb-mapping-result")
.tempfile()
Expand All @@ -25,7 +35,7 @@ pub fn strobealign_aemb_coverage<T: CoverageTaker>(
.tempfile()
.unwrap_or_else(|_| panic!("Failed to create strobealign-aemb log tempfile"));
let cmd = format!(
"strobealign --aemb -t {} {} {} {} > {} 2> {}",
"strobealign --aemb -t {} '{}' '{}' '{}' > '{}' 2> '{}'",
threads,
p2.reference,
p2.read1,
Expand All @@ -41,6 +51,10 @@ pub fn strobealign_aemb_coverage<T: CoverageTaker>(
);
log_files.push(mapping_log);
commands.push(cmd);
mapping_results.push(MappingResultStruct {
tmpfile: mapping_result,
stoit_name: name_stoit(p2.reference, p2.read1, true),
});
}
}

Expand All @@ -63,5 +77,32 @@ pub fn strobealign_aemb_coverage<T: CoverageTaker>(
}

// Read the mapping results and pass them to the coverage taker
panic!();
for mapping_result_tempfile in mapping_results {
debug!("Starting stoid {}", mapping_result_tempfile.stoit_name);
coverage_taker.start_stoit(&mapping_result_tempfile.stoit_name);
// Read the TSV format
let file = std::fs::File::open(mapping_result_tempfile.tmpfile.path())
.expect("Failed to open strobealign-aemb mapping result");
let mut rdr = csv::ReaderBuilder::new()
.delimiter(b'\t')
.has_headers(false)
.from_reader(file);
for (i, result) in rdr.records().enumerate() {
let record = result.expect("Failed to read strobealign-aemb mapping result");
if record.len() != 2 {
panic!(
"Unexpected number of columns in strobealign-aemb mapping result line {}: {:?}",
i, record
);
}
coverage_taker.start_entry(i, &record[0]);
coverage_taker.add_single_coverage(
record[1]
.parse()
.expect("Failed to parse strobealign-aemb coverage"),
);
coverage_taker.finish_entry();
}
}
coverage_printer.finalise_printing(coverage_taker, print_stream, None, &vec![], None, None);
}
Loading

0 comments on commit 6b39794

Please sign in to comment.