Skip to content

Commit

Permalink
mapq filtering: Implement it.
Browse files Browse the repository at this point in the history
Fixes #219.

Suggested by: @SDmetagenomics.
  • Loading branch information
wwood committed Sep 17, 2024
1 parent c1e225f commit 9cf7a00
Show file tree
Hide file tree
Showing 11 changed files with 312 additions and 14 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ exclude = [ # Max upload is 10MB, as of writing test data was 15MB
"tests/*",
]
default-run = "coverm"
edition = "2015"

[dependencies]
bio = "1.1.0"
Expand Down
6 changes: 6 additions & 0 deletions src/bam_generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,7 @@ pub fn generate_filtered_bam_readers_from_bam_files(
min_aligned_length_single: u32,
min_percent_identity_single: f32,
min_aligned_percent_single: f32,
min_mapq: u8,
min_aligned_length_pair: u32,
min_percent_identity_pair: f32,
min_aligned_percent_pair: f32,
Expand All @@ -511,6 +512,7 @@ pub fn generate_filtered_bam_readers_from_bam_files(
min_aligned_length_single,
min_percent_identity_single,
min_aligned_percent_single,
min_mapq,
min_aligned_length_pair,
min_percent_identity_pair,
min_aligned_percent_pair,
Expand Down Expand Up @@ -544,6 +546,7 @@ pub struct StreamingFilteredNamedBamReaderGenerator {
min_aligned_length_single: u32,
min_percent_identity_single: f32,
min_aligned_percent_single: f32,
min_mapq: u8,
min_aligned_length_pair: u32,
min_percent_identity_pair: f32,
min_aligned_percent_pair: f32,
Expand Down Expand Up @@ -584,6 +587,7 @@ impl NamedBamReaderGenerator<StreamingFilteredNamedBamReader>
self.min_aligned_length_single,
self.min_percent_identity_single,
self.min_aligned_percent_single,
self.min_mapq,
self.min_aligned_length_pair,
self.min_percent_identity_pair,
self.min_aligned_percent_pair,
Expand Down Expand Up @@ -651,6 +655,7 @@ pub fn generate_filtered_named_bam_readers_from_reads(
min_aligned_length_single: u32,
min_percent_identity_single: f32,
min_aligned_percent_single: f32,
min_mapq: u8,
min_aligned_length_pair: u32,
min_percent_identity_pair: f32,
min_aligned_percent_pair: f32,
Expand Down Expand Up @@ -682,6 +687,7 @@ pub fn generate_filtered_named_bam_readers_from_reads(
min_aligned_length_single,
min_percent_identity_single,
min_aligned_percent_single,
min_mapq,
min_aligned_length_pair,
min_percent_identity_pair,
min_aligned_percent_pair,
Expand Down
7 changes: 7 additions & 0 deletions src/bin/coverm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ fn main() {
filter_params.min_aligned_length_single,
filter_params.min_percent_identity_single,
filter_params.min_aligned_percent_single,
filter_params.min_mapq,
filter_params.min_aligned_length_pair,
filter_params.min_percent_identity_pair,
filter_params.min_aligned_percent_pair,
Expand Down Expand Up @@ -463,6 +464,7 @@ fn main() {
filter_params.min_aligned_length_single,
filter_params.min_percent_identity_single,
filter_params.min_aligned_percent_single,
filter_params.min_mapq,
filter_params.min_aligned_length_pair,
filter_params.min_percent_identity_pair,
filter_params.min_aligned_percent_pair,
Expand Down Expand Up @@ -519,6 +521,7 @@ fn main() {
filter_params.min_aligned_length_single,
filter_params.min_percent_identity_single,
filter_params.min_aligned_percent_single,
filter_params.min_mapq,
filter_params.min_aligned_length_pair,
filter_params.min_percent_identity_pair,
filter_params.min_aligned_percent_pair,
Expand Down Expand Up @@ -1209,6 +1212,7 @@ struct FilterParameters {
min_aligned_length_single: u32,
min_percent_identity_single: f32,
min_aligned_percent_single: f32,
min_mapq: u8, // 255 indicates no filtering
min_aligned_length_pair: u32,
min_percent_identity_pair: f32,
min_aligned_percent_pair: f32,
Expand All @@ -1224,6 +1228,7 @@ impl FilterParameters {
min_aligned_length_single: *m.get_one::<u32>("min-read-aligned-length").unwrap_or(&0),
min_percent_identity_single: parse_percentage(m, "min-read-percent-identity"),
min_aligned_percent_single: parse_percentage(m, "min-read-aligned-percent"),
min_mapq: *m.get_one::<u8>("min-mapq").unwrap_or(&255),
min_aligned_length_pair: *m
.get_one::<u32>("min-read-aligned-length-pair")
.unwrap_or(&0),
Expand Down Expand Up @@ -1253,6 +1258,7 @@ impl FilterParameters {
self.min_percent_identity_single > 0.0
|| self.min_percent_identity_pair > 0.0
|| self.min_aligned_percent_single > 0.0
|| self.min_mapq < 255
|| self.min_aligned_percent_pair > 0.0
|| self.min_aligned_length_single > 0
|| self.min_aligned_length_pair > 0
Expand Down Expand Up @@ -1578,6 +1584,7 @@ fn get_streamed_filtered_bam_readers(
filter_params.min_aligned_length_single,
filter_params.min_percent_identity_single,
filter_params.min_aligned_percent_single,
filter_params.min_mapq,
filter_params.min_aligned_length_pair,
filter_params.min_percent_identity_pair,
filter_params.min_aligned_percent_pair,
Expand Down
25 changes: 25 additions & 0 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,16 @@ fn add_thresholding_options(manual: Manual) -> Manual {
default_roff("0")
)),
)
.option(Opt::new("INT").long("--min-mapq").help(&format!(
"Exclude reads with a mapping quality \
below this value. An INT between 0 and 254. \
When thresholding pairs, both reads \
are excluded if either has a MAPQ below this value. \
If 0, \
reads with MAPQ of 255 (mapping quality unavailable) \
are excluded. {}",
default_roff("not set")
)))
.flag(
Flag::new()
.long("--proper-pairs-only")
Expand Down Expand Up @@ -1322,6 +1332,11 @@ Ben J. Woodcroft <benjwoodcroft near gmail.com>
.long("min-read-aligned-percent")
.value_parser(clap::value_parser!(f32)),
)
.arg(
Arg::new("min-mapq")
.long("min-mapq")
.value_parser(clap::value_parser!(u8)),
)
.arg(
Arg::new("min-read-aligned-length-pair")
.long("min-read-aligned-length-pair")
Expand Down Expand Up @@ -1707,6 +1722,11 @@ Ben J. Woodcroft <benjwoodcroft near gmail.com>
.long("min-read-aligned-percent")
.value_parser(clap::value_parser!(f32)),
)
.arg(
Arg::new("min-mapq")
.long("min-mapq")
.value_parser(clap::value_parser!(u8)),
)
.arg(
Arg::new("min-read-aligned-length-pair")
.long("min-read-aligned-length-pair")
Expand Down Expand Up @@ -1845,6 +1865,11 @@ Ben J. Woodcroft <benjwoodcroft near gmail.com>
.long("min-read-percent-identity")
.value_parser(clap::value_parser!(f32)),
)
.arg(
Arg::new("min-mapq")
.long("min-mapq")
.value_parser(clap::value_parser!(u8)),
)
.arg(
Arg::new("min-read-aligned-percent")
.long("min-read-aligned-percent")
Expand Down
1 change: 0 additions & 1 deletion src/contig.rs
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,6 @@ mod tests {
use mapping_index_maintenance::VanillaIndexStruct;
use mapping_parameters::*;
use shard_bam_reader::*;
use std::io::Read;
use std::str;
use OutputWriter;

Expand Down
1 change: 0 additions & 1 deletion src/coverage_printer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -544,7 +544,6 @@ pub fn print_dense_cached_coverage_taker(
mod tests {
use super::*;
use std::io::Cursor;
use std::io::Read;
use std::str;
use OutputWriter;

Expand Down
Loading

0 comments on commit 9cf7a00

Please sign in to comment.