diff --git a/.github/workflows/test-coverm.yml b/.github/workflows/test-coverm.yml index be7fab4..34295a0 100644 --- a/.github/workflows/test-coverm.yml +++ b/.github/workflows/test-coverm.yml @@ -11,12 +11,14 @@ jobs: os: ["ubuntu-latest"] python-version: ["3.11"] steps: + - uses: actions/checkout@v4 - uses: conda-incubator/setup-miniconda@v3 with: auto-update-conda: true python-version: ${{ matrix.python-version }} environment-file: coverm.yml channels: conda-forge,defaults,bioconda + mamba-version: "*" - name: Conda info shell: bash -el {0} run: conda info @@ -24,6 +26,7 @@ jobs: shell: pwsh run: conda list - name: Run test + shell: bash -el {0} run: | cargo test miniconda_osx: @@ -35,12 +38,14 @@ jobs: os: ["macos-latest"] python-version: ["3.11"] steps: + - uses: actions/checkout@v4 - uses: conda-incubator/setup-miniconda@v3 with: auto-update-conda: true python-version: ${{ matrix.python-version }} environment-file: coverm-osx.yml channels: conda-forge,defaults,bioconda + mamba-version: "*" - name: Conda info shell: bash -el {0} run: conda info @@ -48,5 +53,6 @@ jobs: shell: pwsh run: conda list - name: Run test + shell: bash -el {0} run: | cargo test -- --skip bwa_mem2 diff --git a/src/cli.rs b/src/cli.rs index ddd547a..87f76f9 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -21,7 +21,7 @@ const MAPPING_SOFTWARE_LIST: &[&str] = &[ "minimap2-no-preset", "strobealign", ]; -const DEFAULT_MAPPING_SOFTWARE: &str = "minimap2-sr"; +const DEFAULT_MAPPING_SOFTWARE: &str = "strobealign"; lazy_static! { pub static ref COVERM_CLUSTER_COMMAND_DEFINITION: GalahClustererCommandDefinition = { @@ -50,12 +50,12 @@ fn add_mapping_options(manual: Manual) -> Manual { Section::new("Mapping algorithm options") .option(Opt::new("NAME").short("-p").long("--mapper").help(&format!( "Underlying mapping software used {}. One of: {}", - default_roff("minimap2-sr"), + default_roff("strobealign"), bird_tool_utils::clap_utils::table_roff(&[ &["name", "description"], &[ - &monospace_roff("minimap2-sr"), - &format!("minimap2 with '{}' option", &monospace_roff("-x sr")) + &monospace_roff("strobealign"), + "strobealign using default parameters" ], &[ &monospace_roff("bwa-mem"), @@ -65,6 +65,10 @@ fn add_mapping_options(manual: Manual) -> Manual { &monospace_roff("bwa-mem2"), "bwa-mem2 using default parameters" ], + &[ + &monospace_roff("minimap2-sr"), + &format!("minimap2 with '{}' option", &monospace_roff("-x sr")) + ], &[ &monospace_roff("minimap2-ont"), &format!("minimap2 with '{}' option", &monospace_roff("-x map-ont")) diff --git a/src/coverage_printer.rs b/src/coverage_printer.rs index 93c7ab6..74f6611 100644 --- a/src/coverage_printer.rs +++ b/src/coverage_printer.rs @@ -258,7 +258,7 @@ pub fn print_sparse_cached_coverage_taker( "{}\t{}", stoit, match &entry_names[*entry_i] { - Some(s) => s, + Some(s) => s.trim_end_matches('\r'), None => { error!("Didn't find entry name string as expected"); process::exit(1); @@ -471,6 +471,7 @@ pub fn print_dense_cached_coverage_taker( entry_names[stoit_by_entry_by_coverage[0][my_entry_i].entry_index] .as_ref() .unwrap() + .trim_end_matches('\r') ) .unwrap(); for (stoit_i, stoit_entries) in stoit_by_entry_by_coverage.iter().enumerate() { @@ -482,7 +483,7 @@ pub fn print_dense_cached_coverage_taker( print_stream, "\t{}", coverages[i] - // Divide first because then there is less + // Divide first because then there are fewer // rounding errors, particularly when // coverage == coverage_total /coverage_totals[ecs.stoit_index][i].unwrap() @@ -573,6 +574,31 @@ mod tests { ); } + #[test] + fn test_dense_cached_printer_newline() { + let mut c = CoverageTakerType::new_cached_single_float_coverage_taker(2); + c.start_stoit("stoit1"); + c.start_entry(0, "contig1\r"); + c.add_single_coverage(1.1); + c.add_single_coverage(1.2); + let mut stream = Cursor::new(Vec::new()); + print_dense_cached_coverage_taker( + "Contig", + &vec!["mean".to_string(), "std".to_string()], + &c, + &mut stream, + None, + &vec![], + None, + None, + ); + assert_eq!( + "Contig\tstoit1 mean\tstoit1 std\n\ + contig1\t1.1\t1.2\n", + str::from_utf8(stream.get_ref()).unwrap() + ); + } + #[test] fn test_dense_cached_printer_easy_normalised() { let mut c = CoverageTakerType::new_cached_single_float_coverage_taker(2); @@ -643,4 +669,34 @@ mod tests { contig2\t1025\t12.1\t2.1\t2.2\t22.1\t22.2\n", std::fs::read_to_string(tf.path()).unwrap()); } + + #[test] + fn test_sparse_cached_printer_hello_world() { + let mut c = CoverageTakerType::new_cached_single_float_coverage_taker(2); + c.start_stoit("stoit1"); + c.start_entry(0, "contig1"); + c.add_single_coverage(1.1); + c.add_single_coverage(1.2); + let mut stream = Cursor::new(Vec::new()); + print_sparse_cached_coverage_taker(&c, &mut stream, None, &vec![], None, None); + assert_eq!( + "stoit1\tcontig1\t1.1\t1.2\n", + str::from_utf8(stream.get_ref()).unwrap() + ); + } + + #[test] + fn test_sparse_cached_printer_newline() { + let mut c = CoverageTakerType::new_cached_single_float_coverage_taker(2); + c.start_stoit("stoit1"); + c.start_entry(0, "contig1\r"); + c.add_single_coverage(1.1); + c.add_single_coverage(1.2); + let mut stream = Cursor::new(Vec::new()); + print_sparse_cached_coverage_taker(&c, &mut stream, None, &vec![], None, None); + assert_eq!( + "stoit1\tcontig1\t1.1\t1.2\n", + str::from_utf8(stream.get_ref()).unwrap() + ); + } } diff --git a/tests/test_cmdline.rs b/tests/test_cmdline.rs index 9c8bf3a..2b9b7d2 100644 --- a/tests/test_cmdline.rs +++ b/tests/test_cmdline.rs @@ -191,6 +191,8 @@ mod tests { Assert::main_binary() .with_args(&[ "contig", + "--mapper", + "minimap2-sr", "--contig-end-exclusion", "0", "-r", @@ -240,6 +242,8 @@ mod tests { Assert::main_binary() .with_args(&[ "contig", + "--mapper", + "minimap2-sr", "--output-format", "sparse", "-r", @@ -1329,6 +1333,36 @@ genome6 26.697144 "--sharded", ]) .stdout() + .is( + "Contig shard1.fna|shard2.fna/7seqs.reads_for_7.1.fq|7seqs.reads_for_7.1.fq Mean\n\ + genome3~random_sequence_length_11001 0.11057869\n\ + genome4~random_sequence_length_11002 0.11056851\n\ + genome5~seq2 0\n\ + genome6~random_sequence_length_11003 0.110558316\n\ + genome1~random_sequence_length_11000 0.109943785\n\ + genome1~random_sequence_length_11010 0.110487066\n\ + genome2~seq1 0\n", + ) + .succeeds() + .unwrap() + } + + #[test] + fn test_sharded_contig_input_reads_minimap2() { + Assert::main_binary() + .with_args(&[ + "contig", + "--mapper", + "minimap2-sr", + "-c", + "tests/data/7seqs.reads_for_7.1.fq", + "tests/data/7seqs.reads_for_7.2.fq", + "-r", + "tests/data/shard1.fna", + "tests/data/shard2.fna", + "--sharded", + ]) + .stdout() .is( "Contig shard1.fna|shard2.fna/7seqs.reads_for_7.1.fq|7seqs.reads_for_7.1.fq Mean\n\ genome3~random_sequence_length_11001 0.110588886\n\ @@ -1794,6 +1828,8 @@ genome6 26.697144 Assert::main_binary() .with_args(&[ "contig", + "--mapper", + "minimap2-sr", "-m", "rpkm", "mean", @@ -2203,6 +2239,50 @@ genome6~random_sequence_length_11003 0 0 0 ]) .succeeds() .stdout() + .satisfies( + |observed| { + assert_equal_table( + "Genome 20120700_S3D.head100000.1.fq.gz Mean 20120700_S3D.head100000.1.fq.gz Covered Fraction\n\ + 73.20120700_S3D.10\t0.06966771\t0.06644242\n73.20120700_S3D.12\t0\t0\n73.20120700_S3D.15\t0\t0\n73.20120700_S3D.16\t0\t0\n73.20120700_S3D.34\t0.056637306\t0.054271795\n73.20120700_S3D.3\t0\t0\n73.20120700_S3D.5\t0.13356309\t0.12263384\n73.20120700_S3D.7\t0.097519465\t0.09129343\n\ + ", + observed, + ) + }, + "table incorrect", + ) + .unwrap(); + } + + #[test] + fn test_no_zeros_bug1_minimap2() { + Assert::main_binary() + .with_args(&[ + "genome", + "--mapper", + "minimap2-sr", + "-c", + "tests/data/rhys_bug/20120700_S3D.head100000.1.fq.gz", + "tests/data/rhys_bug/20120700_S3D.head100000.2.fq.gz", + "--genome-fasta-files", + "tests/data/rhys_bug/genomes/73.20120700_S3D.10.fna", + "tests/data/rhys_bug/genomes/73.20120700_S3D.12.fna", + "tests/data/rhys_bug/genomes/73.20120700_S3D.15.fna", + "tests/data/rhys_bug/genomes/73.20120700_S3D.16.fna", + "tests/data/rhys_bug/genomes/73.20120700_S3D.34.fna", + "tests/data/rhys_bug/genomes/73.20120700_S3D.3.fna", + "tests/data/rhys_bug/genomes/73.20120700_S3D.5.fna", + "tests/data/rhys_bug/genomes/73.20120700_S3D.7.fna", + "-t", + "8", + "-m", + "mean", + "covered_fraction", + "--min-covered-fraction", + "0.05", + "--exclude-supplementary", + ]) + .succeeds() + .stdout() .satisfies( |observed| { assert_equal_table( @@ -2222,6 +2302,8 @@ genome6~random_sequence_length_11003 0 0 0 Assert::main_binary() .with_args(&[ "genome", + "--mapper", + "minimap2-sr", "-c", "tests/data/rhys_bug/20120700_S3D.head100000.1.fq.gz", "tests/data/rhys_bug/20120700_S3D.head100000.2.fq.gz", @@ -2264,6 +2346,8 @@ genome6~random_sequence_length_11003 0 0 0 Assert::main_binary() .with_args(&[ "genome", + "--mapper", + "minimap2-sr", "-c", "tests/data/rhys_bug/20120700_S3D.head100000.1.fq.gz", "tests/data/rhys_bug/20120700_S3D.head100000.2.fq.gz", @@ -2307,6 +2391,8 @@ genome6~random_sequence_length_11003 0 0 0 Assert::main_binary() .with_args(&[ "genome", + "--mapper", + "minimap2-sr", "-c", "tests/data/rhys_bug/20120700_S3D.head100000.1.fq.gz", "tests/data/rhys_bug/20120700_S3D.head100000.2.fq.gz", @@ -2369,6 +2455,40 @@ genome6~random_sequence_length_11003 0 0 0 .is("") .unwrap(); + assert_eq!( + "Sample\tContig\tMean\n\ + 2seqs.fasta/bad_reads.interleaved.fq\tseq1\t0.895\n\ + 2seqs.fasta/bad_reads.interleaved.fq\tseq2\t0\n", + std::fs::read_to_string(tf.path()).unwrap() + ) + } + + #[test] + fn test_contig_output_file_minimap2() { + let tf: tempfile::NamedTempFile = tempfile::NamedTempFile::new().unwrap(); + let t = tf.path().to_str().unwrap(); + + Assert::main_binary() + .with_args(&[ + "contig", + "--contig-end-exclusion", + "0", + "--mapper", + "minimap2-sr", + "-r", + "tests/data/2seqs.fasta", + "--output-format", + "sparse", + "--interleaved", + "tests/data/bad_reads.interleaved.fq", + "-o", + t, + ]) + .succeeds() + .stdout() + .is("") + .unwrap(); + assert_eq!( "Sample\tContig\tMean\n\ 2seqs.fasta/bad_reads.interleaved.fq\tseq1\t0.899\n\ @@ -2538,6 +2658,8 @@ genome6~random_sequence_length_11003 0 0 0 Assert::main_binary() .with_args(&[ "genome", + "--mapper", + "minimap2-sr", "-m", "mean", "tpm", @@ -2601,6 +2723,57 @@ genome6~random_sequence_length_11003 0 0 0 ]) .succeeds() .stdout() + .satisfies( + |observed| { + assert_equal_table( + "Genome 20120700_S3D.stray_read1.1.fq Mean 20120700_S3D.stray_read1.1.fq Covered Fraction 20120700_S3D.stray_read1.1.fq Read Count\n\ + 73.20120700_S3D.10 0.000008399416 0.000024585164 2\n\ + 73.20120700_S3D.12 0 0 0\n\ + 73.20120700_S3D.15 0 0 0\n\ + 73.20120700_S3D.16 0 0 0\n\ + 73.20120700_S3D.34 0 0 0\n\ + 73.20120700_S3D.3 0 0 0\n\ + 73.20120700_S3D.5 0.000043860742 0.000043714655 2\n\ + 73.20120700_S3D.7 0 0 0\n\ + ", + observed, + ) + }, + "table incorrect", + ) + .stderr() + .contains("found 4 reads mapped out of 4 total (100.00%)") + .unwrap(); + } + + #[test] + fn test_genomes_and_contigs_with_supplementary_minimap2() { + Assert::main_binary() + .with_args(&[ + "genome", + "--mapper", + "minimap2-sr", + "-m", + "mean", + "covered_fraction", + "count", + "--genome-fasta-files", + "tests/data/rhys_bug/genomes/73.20120700_S3D.10.fna", + "tests/data/rhys_bug/genomes/73.20120700_S3D.12.fna", + "tests/data/rhys_bug/genomes/73.20120700_S3D.15.fna", + "tests/data/rhys_bug/genomes/73.20120700_S3D.16.fna", + "tests/data/rhys_bug/genomes/73.20120700_S3D.34.fna", + "tests/data/rhys_bug/genomes/73.20120700_S3D.3.fna", + "tests/data/rhys_bug/genomes/73.20120700_S3D.5.fna", + "tests/data/rhys_bug/genomes/73.20120700_S3D.7.fna", + "-c", + "tests/data/rhys_bug/20120700_S3D.stray_read1.1.fq", + "tests/data/rhys_bug/20120700_S3D.stray_read1.2.fq", + "--min-covered-fraction", + "0", + ]) + .succeeds() + .stdout() .satisfies( |observed| { assert_equal_table( @@ -2679,6 +2852,8 @@ genome6~random_sequence_length_11003 0 0 0 Assert::main_binary() .with_args(&[ "genome", + "--mapper", + "minimap2-sr", "-m", "mean", "covered_fraction",