From 7dae171d6af4a0345acc69a27333071848d9cdc8 Mon Sep 17 00:00:00 2001 From: AroneyS Date: Fri, 31 May 2024 13:19:01 +1000 Subject: [PATCH 01/14] add strobealign to mappers in help --- src/cli.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/cli.rs b/src/cli.rs index ddd547a..53f3613 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -57,6 +57,10 @@ fn add_mapping_options(manual: Manual) -> Manual { &monospace_roff("minimap2-sr"), &format!("minimap2 with '{}' option", &monospace_roff("-x sr")) ], + &[ + &monospace_roff("strobealign"), + "strobealign using default parameters" + ], &[ &monospace_roff("bwa-mem"), "bwa mem using default parameters" From f6d57fec493cc90c05c744b0913d069d372a19ef Mon Sep 17 00:00:00 2001 From: AroneyS Date: Fri, 31 May 2024 13:38:35 +1000 Subject: [PATCH 02/14] change default mapper to strobealign --- src/cli.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/cli.rs b/src/cli.rs index 53f3613..87f76f9 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -21,7 +21,7 @@ const MAPPING_SOFTWARE_LIST: &[&str] = &[ "minimap2-no-preset", "strobealign", ]; -const DEFAULT_MAPPING_SOFTWARE: &str = "minimap2-sr"; +const DEFAULT_MAPPING_SOFTWARE: &str = "strobealign"; lazy_static! { pub static ref COVERM_CLUSTER_COMMAND_DEFINITION: GalahClustererCommandDefinition = { @@ -50,13 +50,9 @@ fn add_mapping_options(manual: Manual) -> Manual { Section::new("Mapping algorithm options") .option(Opt::new("NAME").short("-p").long("--mapper").help(&format!( "Underlying mapping software used {}. One of: {}", - default_roff("minimap2-sr"), + default_roff("strobealign"), bird_tool_utils::clap_utils::table_roff(&[ &["name", "description"], - &[ - &monospace_roff("minimap2-sr"), - &format!("minimap2 with '{}' option", &monospace_roff("-x sr")) - ], &[ &monospace_roff("strobealign"), "strobealign using default parameters" @@ -69,6 +65,10 @@ fn add_mapping_options(manual: Manual) -> Manual { &monospace_roff("bwa-mem2"), "bwa-mem2 using default parameters" ], + &[ + &monospace_roff("minimap2-sr"), + &format!("minimap2 with '{}' option", &monospace_roff("-x sr")) + ], &[ &monospace_roff("minimap2-ont"), &format!("minimap2 with '{}' option", &monospace_roff("-x map-ont")) From 9d5b8b8e528c19cb0e80674b5e5297971a9e077f Mon Sep 17 00:00:00 2001 From: AroneyS Date: Fri, 31 May 2024 14:07:03 +1000 Subject: [PATCH 03/14] fix tests --- tests/test_cmdline.rs | 175 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) diff --git a/tests/test_cmdline.rs b/tests/test_cmdline.rs index 9c8bf3a..2b9b7d2 100644 --- a/tests/test_cmdline.rs +++ b/tests/test_cmdline.rs @@ -191,6 +191,8 @@ mod tests { Assert::main_binary() .with_args(&[ "contig", + "--mapper", + "minimap2-sr", "--contig-end-exclusion", "0", "-r", @@ -240,6 +242,8 @@ mod tests { Assert::main_binary() .with_args(&[ "contig", + "--mapper", + "minimap2-sr", "--output-format", "sparse", "-r", @@ -1329,6 +1333,36 @@ genome6 26.697144 "--sharded", ]) .stdout() + .is( + "Contig shard1.fna|shard2.fna/7seqs.reads_for_7.1.fq|7seqs.reads_for_7.1.fq Mean\n\ + genome3~random_sequence_length_11001 0.11057869\n\ + genome4~random_sequence_length_11002 0.11056851\n\ + genome5~seq2 0\n\ + genome6~random_sequence_length_11003 0.110558316\n\ + genome1~random_sequence_length_11000 0.109943785\n\ + genome1~random_sequence_length_11010 0.110487066\n\ + genome2~seq1 0\n", + ) + .succeeds() + .unwrap() + } + + #[test] + fn test_sharded_contig_input_reads_minimap2() { + Assert::main_binary() + .with_args(&[ + "contig", + "--mapper", + "minimap2-sr", + "-c", + "tests/data/7seqs.reads_for_7.1.fq", + "tests/data/7seqs.reads_for_7.2.fq", + "-r", + "tests/data/shard1.fna", + "tests/data/shard2.fna", + "--sharded", + ]) + .stdout() .is( "Contig shard1.fna|shard2.fna/7seqs.reads_for_7.1.fq|7seqs.reads_for_7.1.fq Mean\n\ genome3~random_sequence_length_11001 0.110588886\n\ @@ -1794,6 +1828,8 @@ genome6 26.697144 Assert::main_binary() .with_args(&[ "contig", + "--mapper", + "minimap2-sr", "-m", "rpkm", "mean", @@ -2203,6 +2239,50 @@ genome6~random_sequence_length_11003 0 0 0 ]) .succeeds() .stdout() + .satisfies( + |observed| { + assert_equal_table( + "Genome 20120700_S3D.head100000.1.fq.gz Mean 20120700_S3D.head100000.1.fq.gz Covered Fraction\n\ + 73.20120700_S3D.10\t0.06966771\t0.06644242\n73.20120700_S3D.12\t0\t0\n73.20120700_S3D.15\t0\t0\n73.20120700_S3D.16\t0\t0\n73.20120700_S3D.34\t0.056637306\t0.054271795\n73.20120700_S3D.3\t0\t0\n73.20120700_S3D.5\t0.13356309\t0.12263384\n73.20120700_S3D.7\t0.097519465\t0.09129343\n\ + ", + observed, + ) + }, + "table incorrect", + ) + .unwrap(); + } + + #[test] + fn test_no_zeros_bug1_minimap2() { + Assert::main_binary() + .with_args(&[ + "genome", + "--mapper", + "minimap2-sr", + "-c", + "tests/data/rhys_bug/20120700_S3D.head100000.1.fq.gz", + "tests/data/rhys_bug/20120700_S3D.head100000.2.fq.gz", + "--genome-fasta-files", + "tests/data/rhys_bug/genomes/73.20120700_S3D.10.fna", + "tests/data/rhys_bug/genomes/73.20120700_S3D.12.fna", + "tests/data/rhys_bug/genomes/73.20120700_S3D.15.fna", + "tests/data/rhys_bug/genomes/73.20120700_S3D.16.fna", + "tests/data/rhys_bug/genomes/73.20120700_S3D.34.fna", + "tests/data/rhys_bug/genomes/73.20120700_S3D.3.fna", + "tests/data/rhys_bug/genomes/73.20120700_S3D.5.fna", + "tests/data/rhys_bug/genomes/73.20120700_S3D.7.fna", + "-t", + "8", + "-m", + "mean", + "covered_fraction", + "--min-covered-fraction", + "0.05", + "--exclude-supplementary", + ]) + .succeeds() + .stdout() .satisfies( |observed| { assert_equal_table( @@ -2222,6 +2302,8 @@ genome6~random_sequence_length_11003 0 0 0 Assert::main_binary() .with_args(&[ "genome", + "--mapper", + "minimap2-sr", "-c", "tests/data/rhys_bug/20120700_S3D.head100000.1.fq.gz", "tests/data/rhys_bug/20120700_S3D.head100000.2.fq.gz", @@ -2264,6 +2346,8 @@ genome6~random_sequence_length_11003 0 0 0 Assert::main_binary() .with_args(&[ "genome", + "--mapper", + "minimap2-sr", "-c", "tests/data/rhys_bug/20120700_S3D.head100000.1.fq.gz", "tests/data/rhys_bug/20120700_S3D.head100000.2.fq.gz", @@ -2307,6 +2391,8 @@ genome6~random_sequence_length_11003 0 0 0 Assert::main_binary() .with_args(&[ "genome", + "--mapper", + "minimap2-sr", "-c", "tests/data/rhys_bug/20120700_S3D.head100000.1.fq.gz", "tests/data/rhys_bug/20120700_S3D.head100000.2.fq.gz", @@ -2369,6 +2455,40 @@ genome6~random_sequence_length_11003 0 0 0 .is("") .unwrap(); + assert_eq!( + "Sample\tContig\tMean\n\ + 2seqs.fasta/bad_reads.interleaved.fq\tseq1\t0.895\n\ + 2seqs.fasta/bad_reads.interleaved.fq\tseq2\t0\n", + std::fs::read_to_string(tf.path()).unwrap() + ) + } + + #[test] + fn test_contig_output_file_minimap2() { + let tf: tempfile::NamedTempFile = tempfile::NamedTempFile::new().unwrap(); + let t = tf.path().to_str().unwrap(); + + Assert::main_binary() + .with_args(&[ + "contig", + "--contig-end-exclusion", + "0", + "--mapper", + "minimap2-sr", + "-r", + "tests/data/2seqs.fasta", + "--output-format", + "sparse", + "--interleaved", + "tests/data/bad_reads.interleaved.fq", + "-o", + t, + ]) + .succeeds() + .stdout() + .is("") + .unwrap(); + assert_eq!( "Sample\tContig\tMean\n\ 2seqs.fasta/bad_reads.interleaved.fq\tseq1\t0.899\n\ @@ -2538,6 +2658,8 @@ genome6~random_sequence_length_11003 0 0 0 Assert::main_binary() .with_args(&[ "genome", + "--mapper", + "minimap2-sr", "-m", "mean", "tpm", @@ -2601,6 +2723,57 @@ genome6~random_sequence_length_11003 0 0 0 ]) .succeeds() .stdout() + .satisfies( + |observed| { + assert_equal_table( + "Genome 20120700_S3D.stray_read1.1.fq Mean 20120700_S3D.stray_read1.1.fq Covered Fraction 20120700_S3D.stray_read1.1.fq Read Count\n\ + 73.20120700_S3D.10 0.000008399416 0.000024585164 2\n\ + 73.20120700_S3D.12 0 0 0\n\ + 73.20120700_S3D.15 0 0 0\n\ + 73.20120700_S3D.16 0 0 0\n\ + 73.20120700_S3D.34 0 0 0\n\ + 73.20120700_S3D.3 0 0 0\n\ + 73.20120700_S3D.5 0.000043860742 0.000043714655 2\n\ + 73.20120700_S3D.7 0 0 0\n\ + ", + observed, + ) + }, + "table incorrect", + ) + .stderr() + .contains("found 4 reads mapped out of 4 total (100.00%)") + .unwrap(); + } + + #[test] + fn test_genomes_and_contigs_with_supplementary_minimap2() { + Assert::main_binary() + .with_args(&[ + "genome", + "--mapper", + "minimap2-sr", + "-m", + "mean", + "covered_fraction", + "count", + "--genome-fasta-files", + "tests/data/rhys_bug/genomes/73.20120700_S3D.10.fna", + "tests/data/rhys_bug/genomes/73.20120700_S3D.12.fna", + "tests/data/rhys_bug/genomes/73.20120700_S3D.15.fna", + "tests/data/rhys_bug/genomes/73.20120700_S3D.16.fna", + "tests/data/rhys_bug/genomes/73.20120700_S3D.34.fna", + "tests/data/rhys_bug/genomes/73.20120700_S3D.3.fna", + "tests/data/rhys_bug/genomes/73.20120700_S3D.5.fna", + "tests/data/rhys_bug/genomes/73.20120700_S3D.7.fna", + "-c", + "tests/data/rhys_bug/20120700_S3D.stray_read1.1.fq", + "tests/data/rhys_bug/20120700_S3D.stray_read1.2.fq", + "--min-covered-fraction", + "0", + ]) + .succeeds() + .stdout() .satisfies( |observed| { assert_equal_table( @@ -2679,6 +2852,8 @@ genome6~random_sequence_length_11003 0 0 0 Assert::main_binary() .with_args(&[ "genome", + "--mapper", + "minimap2-sr", "-m", "mean", "covered_fraction", From 3ab9af7ce81f9a7a0a56f1c1387087cfa38c2343 Mon Sep 17 00:00:00 2001 From: AroneyS Date: Fri, 31 May 2024 14:07:54 +1000 Subject: [PATCH 04/14] change env file path --- .github/workflows/test-coverm.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-coverm.yml b/.github/workflows/test-coverm.yml index be7fab4..3eb454f 100644 --- a/.github/workflows/test-coverm.yml +++ b/.github/workflows/test-coverm.yml @@ -15,7 +15,7 @@ jobs: with: auto-update-conda: true python-version: ${{ matrix.python-version }} - environment-file: coverm.yml + environment-file: CoverM/coverm.yml channels: conda-forge,defaults,bioconda - name: Conda info shell: bash -el {0} From 334e9a80d05b9f3ec3c9d5a60b2deb0eaa2b185c Mon Sep 17 00:00:00 2001 From: AroneyS Date: Fri, 31 May 2024 14:08:33 +1000 Subject: [PATCH 05/14] change env file path again --- .github/workflows/test-coverm.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-coverm.yml b/.github/workflows/test-coverm.yml index 3eb454f..954b7da 100644 --- a/.github/workflows/test-coverm.yml +++ b/.github/workflows/test-coverm.yml @@ -15,7 +15,7 @@ jobs: with: auto-update-conda: true python-version: ${{ matrix.python-version }} - environment-file: CoverM/coverm.yml + environment-file: ./coverm.yml channels: conda-forge,defaults,bioconda - name: Conda info shell: bash -el {0} From 3a8990e31ba3dbcb329f859e2b5c4e1fd42d52ac Mon Sep 17 00:00:00 2001 From: AroneyS Date: Fri, 31 May 2024 14:10:29 +1000 Subject: [PATCH 06/14] switch back to mamba --- .github/workflows/test-coverm.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test-coverm.yml b/.github/workflows/test-coverm.yml index 954b7da..bcd9dee 100644 --- a/.github/workflows/test-coverm.yml +++ b/.github/workflows/test-coverm.yml @@ -17,6 +17,7 @@ jobs: python-version: ${{ matrix.python-version }} environment-file: ./coverm.yml channels: conda-forge,defaults,bioconda + mamba-version: "*" - name: Conda info shell: bash -el {0} run: conda info From c8971741df909929337d484409cfcdb755c5c9af Mon Sep 17 00:00:00 2001 From: AroneyS Date: Fri, 31 May 2024 14:12:56 +1000 Subject: [PATCH 07/14] readd checkout step --- .github/workflows/test-coverm.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-coverm.yml b/.github/workflows/test-coverm.yml index bcd9dee..d0b8244 100644 --- a/.github/workflows/test-coverm.yml +++ b/.github/workflows/test-coverm.yml @@ -11,11 +11,12 @@ jobs: os: ["ubuntu-latest"] python-version: ["3.11"] steps: + - uses: actions/checkout@v4 - uses: conda-incubator/setup-miniconda@v3 with: auto-update-conda: true python-version: ${{ matrix.python-version }} - environment-file: ./coverm.yml + environment-file: coverm.yml channels: conda-forge,defaults,bioconda mamba-version: "*" - name: Conda info From 56022d82f2c272b98a80d36c238a3609e96ca9b6 Mon Sep 17 00:00:00 2001 From: AroneyS Date: Fri, 31 May 2024 14:25:00 +1000 Subject: [PATCH 08/14] fix macosx test workflow --- .github/workflows/test-coverm.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test-coverm.yml b/.github/workflows/test-coverm.yml index d0b8244..c5e15a4 100644 --- a/.github/workflows/test-coverm.yml +++ b/.github/workflows/test-coverm.yml @@ -37,12 +37,14 @@ jobs: os: ["macos-latest"] python-version: ["3.11"] steps: + - uses: actions/checkout@v4 - uses: conda-incubator/setup-miniconda@v3 with: auto-update-conda: true python-version: ${{ matrix.python-version }} environment-file: coverm-osx.yml channels: conda-forge,defaults,bioconda + mamba-version: "*" - name: Conda info shell: bash -el {0} run: conda info From b2a8cd34a873f81b2d96a668bcea8b013f0838b7 Mon Sep 17 00:00:00 2001 From: AroneyS Date: Mon, 3 Jun 2024 09:23:47 +1000 Subject: [PATCH 09/14] extra options needed? --- .github/workflows/test-coverm.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test-coverm.yml b/.github/workflows/test-coverm.yml index c5e15a4..3c465e6 100644 --- a/.github/workflows/test-coverm.yml +++ b/.github/workflows/test-coverm.yml @@ -14,9 +14,11 @@ jobs: - uses: actions/checkout@v4 - uses: conda-incubator/setup-miniconda@v3 with: + activate-environment: test auto-update-conda: true python-version: ${{ matrix.python-version }} environment-file: coverm.yml + auto-activate-base: false channels: conda-forge,defaults,bioconda mamba-version: "*" - name: Conda info From d2149e1ccc30acbb8d3120f47b823e16e5a10ee2 Mon Sep 17 00:00:00 2001 From: AroneyS Date: Mon, 3 Jun 2024 09:33:58 +1000 Subject: [PATCH 10/14] revert previous commit --- .github/workflows/test-coverm.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/test-coverm.yml b/.github/workflows/test-coverm.yml index 3c465e6..c5e15a4 100644 --- a/.github/workflows/test-coverm.yml +++ b/.github/workflows/test-coverm.yml @@ -14,11 +14,9 @@ jobs: - uses: actions/checkout@v4 - uses: conda-incubator/setup-miniconda@v3 with: - activate-environment: test auto-update-conda: true python-version: ${{ matrix.python-version }} environment-file: coverm.yml - auto-activate-base: false channels: conda-forge,defaults,bioconda mamba-version: "*" - name: Conda info From acd9f6d3410d541935d88fa0c2466896415802be Mon Sep 17 00:00:00 2001 From: AroneyS Date: Mon, 3 Jun 2024 09:41:32 +1000 Subject: [PATCH 11/14] use bash -el shell for test --- .github/workflows/test-coverm.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test-coverm.yml b/.github/workflows/test-coverm.yml index c5e15a4..b0786bc 100644 --- a/.github/workflows/test-coverm.yml +++ b/.github/workflows/test-coverm.yml @@ -26,6 +26,7 @@ jobs: shell: pwsh run: conda list - name: Run test + shell: bash -el {0} run: | cargo test miniconda_osx: From 411f5b254d0729fd7e9dce6b00844337b97ff773 Mon Sep 17 00:00:00 2001 From: AroneyS Date: Mon, 3 Jun 2024 09:46:05 +1000 Subject: [PATCH 12/14] fix macosx runs too --- .github/workflows/test-coverm.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test-coverm.yml b/.github/workflows/test-coverm.yml index b0786bc..34295a0 100644 --- a/.github/workflows/test-coverm.yml +++ b/.github/workflows/test-coverm.yml @@ -53,5 +53,6 @@ jobs: shell: pwsh run: conda list - name: Run test + shell: bash -el {0} run: | cargo test -- --skip bwa_mem2 From a80d4ebb06249f465b8ee338443075fe69dfbb29 Mon Sep 17 00:00:00 2001 From: AroneyS Date: Tue, 11 Jun 2024 10:14:21 +1000 Subject: [PATCH 13/14] trim carriage return (\r) from contig names seems to happen with strobealign for some reason --- src/coverage_printer.rs | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/coverage_printer.rs b/src/coverage_printer.rs index 93c7ab6..b13cbf1 100644 --- a/src/coverage_printer.rs +++ b/src/coverage_printer.rs @@ -471,6 +471,7 @@ pub fn print_dense_cached_coverage_taker( entry_names[stoit_by_entry_by_coverage[0][my_entry_i].entry_index] .as_ref() .unwrap() + .trim_end_matches('\r') ) .unwrap(); for (stoit_i, stoit_entries) in stoit_by_entry_by_coverage.iter().enumerate() { @@ -482,7 +483,7 @@ pub fn print_dense_cached_coverage_taker( print_stream, "\t{}", coverages[i] - // Divide first because then there is less + // Divide first because then there are fewer // rounding errors, particularly when // coverage == coverage_total /coverage_totals[ecs.stoit_index][i].unwrap() @@ -573,6 +574,31 @@ mod tests { ); } + #[test] + fn test_dense_cached_printer_newline() { + let mut c = CoverageTakerType::new_cached_single_float_coverage_taker(2); + c.start_stoit("stoit1"); + c.start_entry(0, "contig1\r"); + c.add_single_coverage(1.1); + c.add_single_coverage(1.2); + let mut stream = Cursor::new(Vec::new()); + print_dense_cached_coverage_taker( + "Contig", + &vec!["mean".to_string(), "std".to_string()], + &c, + &mut stream, + None, + &vec![], + None, + None, + ); + assert_eq!( + "Contig\tstoit1 mean\tstoit1 std\n\ + contig1\t1.1\t1.2\n", + str::from_utf8(stream.get_ref()).unwrap() + ); + } + #[test] fn test_dense_cached_printer_easy_normalised() { let mut c = CoverageTakerType::new_cached_single_float_coverage_taker(2); From 42826ef3578f55fb3143e1197f319a92052b2def Mon Sep 17 00:00:00 2001 From: AroneyS Date: Tue, 11 Jun 2024 15:42:38 +1000 Subject: [PATCH 14/14] trim carriage return (\r) in print_sparse_cached_coverage_taker too --- src/coverage_printer.rs | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/coverage_printer.rs b/src/coverage_printer.rs index b13cbf1..74f6611 100644 --- a/src/coverage_printer.rs +++ b/src/coverage_printer.rs @@ -258,7 +258,7 @@ pub fn print_sparse_cached_coverage_taker( "{}\t{}", stoit, match &entry_names[*entry_i] { - Some(s) => s, + Some(s) => s.trim_end_matches('\r'), None => { error!("Didn't find entry name string as expected"); process::exit(1); @@ -669,4 +669,34 @@ mod tests { contig2\t1025\t12.1\t2.1\t2.2\t22.1\t22.2\n", std::fs::read_to_string(tf.path()).unwrap()); } + + #[test] + fn test_sparse_cached_printer_hello_world() { + let mut c = CoverageTakerType::new_cached_single_float_coverage_taker(2); + c.start_stoit("stoit1"); + c.start_entry(0, "contig1"); + c.add_single_coverage(1.1); + c.add_single_coverage(1.2); + let mut stream = Cursor::new(Vec::new()); + print_sparse_cached_coverage_taker(&c, &mut stream, None, &vec![], None, None); + assert_eq!( + "stoit1\tcontig1\t1.1\t1.2\n", + str::from_utf8(stream.get_ref()).unwrap() + ); + } + + #[test] + fn test_sparse_cached_printer_newline() { + let mut c = CoverageTakerType::new_cached_single_float_coverage_taker(2); + c.start_stoit("stoit1"); + c.start_entry(0, "contig1\r"); + c.add_single_coverage(1.1); + c.add_single_coverage(1.2); + let mut stream = Cursor::new(Vec::new()); + print_sparse_cached_coverage_taker(&c, &mut stream, None, &vec![], None, None); + assert_eq!( + "stoit1\tcontig1\t1.1\t1.2\n", + str::from_utf8(stream.get_ref()).unwrap() + ); + } }