Updated tests for autoqc review job creation.

wtsi-npg · Dec 11, 2024 · 60a011f · 60a011f
1 parent df64b11
commit 60a011f
Show file tree

Hide file tree

Showing 3 changed files with 18 additions and 15 deletions.
diff --git a/Changes b/Changes
@@ -1,6 +1,11 @@
 LIST OF CHANGES
 ---------------
 
+ - Changed/extended tests for npg_pipeline::function::autoqc to ensure that
+   the tests work with changes in https://github.com/wtsi-npg/npg_qc/pull/895
+   (study_specific assessment is disabled for lanes). Tested that a pools that
+   has samples from multiple studies does not cause problems.
+
 release 68.7.0 (2024-12-02)
  - npg_pipeline::function::autoqc
    - Simplified the flow of the code.

diff --git a/t/20-function-autoqc.t b/t/20-function-autoqc.t
@@ -8,9 +8,8 @@ use File::Slurp;
 use Log::Log4perl qw/:levels/;
 
 use t::util;
-
-use_ok('npg_pipeline::function::autoqc');
 use_ok('st::api::lims');
+use_ok('npg_pipeline::function::autoqc');
 use_ok('npg_tracking::glossary::composition');
 use_ok('npg_tracking::glossary::rpt');
 use_ok('npg_pipeline::product');
@@ -604,24 +603,16 @@ subtest 'review' => sub {
   );
 
   $da = $qc->create();
-  ok ($da && (@{$da} == 10), '10 definitions returned');
+  ok ($da && (@{$da} == 9), '9 definitions returned');
   my %definitions = map { $_->composition->freeze2rpt => $_ } @{$da};
   my @expected_rpt_lists = qw/ 8747:1:1  8747:1:2  8747:1:3
                                8747:2:4  8747:2:5  8747:2:6
-                               8747:3:7  8747:3:8  8747:3:9
-                               8747:7 /;
+                               8747:3:7  8747:3:8  8747:3:9 /;
   is_deeply ([sort keys %definitions], \@expected_rpt_lists,
     'definitions are for correct entities');
 
-  my $d = $definitions{'8747:7'};
-  my $expected_command = q{qc --check=review --rpt_list="8747:7" } .
-    qq{--filename_root=8747_7 --qc_out=$archive_dir/lane7/qc } .
-    qq{--qc_in=$archive_dir/lane7/qc --conf_path=t/data/release/config/qc_review } .
-    qq{--runfolder_path=$rf_path};
-  is ($d->command, $expected_command, 'correct command for lane-level job');
-
-  $d = $definitions{'8747:1:1'};
-  $expected_command = q{qc --check=review --rpt_list="8747:1:1" } .
+  my $d = $definitions{'8747:1:1'};
+  my $expected_command = q{qc --check=review --rpt_list="8747:1:1" } .
     qq{--filename_root=8747_1#1 --qc_out=$archive_dir/lane1/plex1/qc } .
     qq{--qc_in=$archive_dir/lane1/plex1/qc --conf_path=t/data/release/config/qc_review } .
     qq{--runfolder_path=$rf_path};
@@ -654,6 +645,13 @@ subtest 'review' => sub {
     resource          => $default
   );
 
+  # Lane 6 has samples from two studies - should not cause problems
+  # creating jobs.
+  my $with_control = 0;
+  is_deeply(
+    [st::api::lims->new(id_run => 8747, position => 6)->study_ids($with_control)],
+    [qw(2410 82)], 'lane 6 samples belong to two different studies'
+  );
   $da = $qc->create();
   ok ($da && (@{$da} == 14), '14 definitions returned');
   %definitions = map { $_->composition->freeze2rpt => $_ } @{$da};

diff --git a/t/data/samplesheet_8747.csv b/t/data/samplesheet_8747.csv
@@ -31,7 +31,7 @@ Lane,Sample_ID,Sample_Name,GenomeFolder,Index,bait_name,default_library_type,def
 6,6093527,ERS183152,,TATCTA,,qPCR only,TTAGGCAT,,[email protected] [email protected] [email protected] [email protected] [email protected] [email protected] [email protected],[email protected] [email protected] [email protected] [email protected] [email protected],[email protected] [email protected],[email protected],,0,0,,,dag1_mut2 6093527,zebrafish,7955,S4301,82,ZF molecular phenotype,standard,,,from:70 to:270,ERS183152,,Danio rerio,,3%27 end enriched mRNA from morphologically abnormal embryos from dag1 knockout incross 2. A 6 base indexing sequence (TATCTA) is bases 5 to 10 of read 1 followed by polyT.  More information describing the mutant phenotype can be found at the Wellcome Trust Sanger Institute Zebrafish Mutation Project website http%3A%2F%2Fwww.sanger.ac.uk%2Fcgi-bin%2FProjects%2FD_rerio%2Fzmp%2Fsearch.pl%3Fq%3Dzmp_phD,,1503351,dag1_mut2,Zebrafish dag1 mut2,Danio_rerio (zv9),,168,ERP001559,1,0,0,Total RNA was extracted from wild type and mutant zebrafish embryos.  Double stranded cDNA representing the 3%27 ends of transcripts was made by a variety of methods%2C including polyT priming and 3%27 pull down on magentic beads.   Some samples included indexing test experiments where a sequence barcode was placed within one of the sequence reads.. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria)%2C please see http%3A%2F%2Fwww.sanger.ac.uk%2Fdatasharing%2F,82,ZF molecular phenotype,,,Zebrafish transcript profiling,3,
 6,6093528,ERS183153,,AAGTTA,,qPCR only,TGACCACT,,[email protected] [email protected] [email protected] [email protected] [email protected] [email protected] [email protected],[email protected] [email protected] [email protected] [email protected] [email protected],[email protected] [email protected],[email protected],,0,0,,,dag1_wt2 6093528,zebrafish,7955,S4301,82,ZF molecular phenotype,standard,,,from:70 to:270,ERS183153,,Danio rerio,,3%27 end enriched mRNA from morphologically normal sibling embryos from dag1 knockout incross 2. A 6 base indexing sequence (AAGTTA) is bases 5 to 10 of read 1 followed by polyT.  More information describing the mutant phenotype can be found at the Wellcome Trust Sanger Institute Zebrafish Mutation Project website http%3A%2F%2Fwww.sanger.ac.uk%2Fcgi-bin%2FProjects%2FD_rerio%2Fzmp%2Fsearch.pl%3Fq%3Dzmp_phD,,1503352,dag1_wt2,Zebrafish dag1 wt2,Danio_rerio (zv9),,168,ERP001559,1,0,0,Total RNA was extracted from wild type and mutant zebrafish embryos.  Double stranded cDNA representing the 3%27 ends of transcripts was made by a variety of methods%2C including polyT priming and 3%27 pull down on magentic beads.   Some samples included indexing test experiments where a sequence barcode was placed within one of the sequence reads.. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria)%2C please see http%3A%2F%2Fwww.sanger.ac.uk%2Fdatasharing%2F,82,ZF molecular phenotype,,,Zebrafish transcript profiling,4,
 6,6093529,ERS183154,,GTAGAC,,qPCR only,ACAGTGGT,,[email protected] [email protected] [email protected] [email protected] [email protected] [email protected] [email protected],[email protected] [email protected] [email protected] [email protected] [email protected],[email protected] [email protected],[email protected],,0,0,,,dag1_mut3 6093529,zebrafish,7955,S4301,82,ZF molecular phenotype,standard,,,from:70 to:270,ERS183154,,Danio rerio,,3%27 end enriched mRNA from morphologically abnormal embryos from dag1 knockout incross 3. A 6 base indexing sequence (GTAGAC) is bases 5 to 10 of read 1 followed by polyT.  More information describing the mutant phenotype can be found at the Wellcome Trust Sanger Institute Zebrafish Mutation Project website http%3A%2F%2Fwww.sanger.ac.uk%2Fcgi-bin%2FProjects%2FD_rerio%2Fzmp%2Fsearch.pl%3Fq%3Dzmp_phD,,1503353,dag1_mut3,Zebrafish dag1 mut3,Danio_rerio (zv9),,168,ERP001559,1,0,0,Total RNA was extracted from wild type and mutant zebrafish embryos.  Double stranded cDNA representing the 3%27 ends of transcripts was made by a variety of methods%2C including polyT priming and 3%27 pull down on magentic beads.   Some samples included indexing test experiments where a sequence barcode was placed within one of the sequence reads.. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria)%2C please see http%3A%2F%2Fwww.sanger.ac.uk%2Fdatasharing%2F,82,ZF molecular phenotype,,,Zebrafish transcript profiling,5,
-6,6093530,ERS183155,,TTAATC,,qPCR only,GCCAATGT,,[email protected] [email protected] [email protected] [email protected] [email protected] [email protected] [email protected],[email protected] [email protected] [email protected] [email protected] [email protected],[email protected] [email protected],[email protected],,0,0,,,dag1_wt3 6093530,zebrafish,7955,S4301,82,ZF molecular phenotype,standard,,,from:70 to:270,ERS183155,,Danio rerio,,3%27 end enriched mRNA from morphologically normal sibling embryos from dag1 knockout incross 3. A 6 base indexing sequence (TTAATC) is bases 5 to 10 of read 1 followed by polyT.  More information describing the mutant phenotype can be found at the Wellcome Trust Sanger Institute Zebrafish Mutation Project website http%3A%2F%2Fwww.sanger.ac.uk%2Fcgi-bin%2FProjects%2FD_rerio%2Fzmp%2Fsearch.pl%3Fq%3Dzmp_phD,,1503354,dag1_wt3,Zebrafish dag1 wt3,Danio_rerio (zv9),,168,ERP001559,1,0,0,Total RNA was extracted from wild type and mutant zebrafish embryos.  Double stranded cDNA representing the 3%27 ends of transcripts was made by a variety of methods%2C including polyT priming and 3%27 pull down on magentic beads.   Some samples included indexing test experiments where a sequence barcode was placed within one of the sequence reads.. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria)%2C please see http%3A%2F%2Fwww.sanger.ac.uk%2Fdatasharing%2F,82,ZF molecular phenotype,,,Zebrafish transcript profiling,6,
+6,6093530,ERS183155,,TTAATC,,qPCR only,GCCAATGT,,[email protected] [email protected] [email protected] [email protected] [email protected] [email protected] [email protected],[email protected] [email protected] [email protected] [email protected] [email protected],[email protected] [email protected],[email protected],,0,0,,,dag1_wt3 6093530,zebrafish,7955,S4301,82,ZF molecular phenotype,standard,,,from:70 to:270,ERS183155,,Danio rerio,,3%27 end enriched mRNA from morphologically normal sibling embryos from dag1 knockout incross 3. A 6 base indexing sequence (TTAATC) is bases 5 to 10 of read 1 followed by polyT.  More information describing the mutant phenotype can be found at the Wellcome Trust Sanger Institute Zebrafish Mutation Project website http%3A%2F%2Fwww.sanger.ac.uk%2Fcgi-bin%2FProjects%2FD_rerio%2Fzmp%2Fsearch.pl%3Fq%3Dzmp_phD,,1503354,dag1_wt3,Zebrafish dag1 wt3,Danio_rerio (zv9),,168,ERP001559,1,0,0,Total RNA was extracted from wild type and mutant zebrafish embryos.  Double stranded cDNA representing the 3%27 ends of transcripts was made by a variety of methods%2C including polyT priming and 3%27 pull down on magentic beads.   Some samples included indexing test experiments where a sequence barcode was placed within one of the sequence reads.. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria)%2C please see http%3A%2F%2Fwww.sanger.ac.uk%2Fdatasharing%2F,2410,ZF molecular phenotype,,,Zebrafish transcript profiling,6,
 6,4405872,phiX_for_spiked_buffers,,ACAACGCAAT,,,ACAACGCAAT,,[email protected] [email protected],[email protected],[email protected],[email protected],,1,0,,,PhiX (10Jan12),,,,,,standard,,,,,,,,,,1255141,phiX_for_spiked_buffers,,,,168,,1,0,0,None,198,Illumina Controls,,,,168,
 7,6101639,EGAN00001085924,,,,Custom,,,[email protected] [email protected] [email protected] [email protected] [email protected] [email protected] [email protected] [email protected],[email protected] [email protected] [email protected] [email protected] [email protected],[email protected] [email protected] [email protected] [email protected],[email protected],,0,0,6103057,0,PD4845a-RNA_rnaseq 6101639,Human,9606,S0814,1177,CGP Core Sequencing 10%2F12 to 09%2F13,standard,pass,4683991,from:175 to:225,EGAN00001085924,,Homo sapiens,,,,1505636,PD4845a-RNA_rnaseq,PD4845a-RNA,,,168,EGAS00001000377,1,0,0,We propose to definitively characterise the somatic genetics of triple negative breast cancer through generation of comprehensive catalogues of somatic mutations in breast cancer cases by high coverage genome sequencing coupled with integrated transcriptomic and methylation analyses.,2410,Triple Negative Breast Cancer RNA Sequencing,Homo_sapiens (CGP_GRCh37.NCBI.allchr_MT),,Triple Negative Breast Cancer RNA Sequencing,,
 8,6101640,EGAN00001085925,,,,Custom2,,,[email protected] [email protected] [email protected] [email protected] [email protected] [email protected] [email protected] [email protected],[email protected] [email protected] [email protected] [email protected] [email protected],[email protected] [email protected] [email protected] [email protected],[email protected],,0,0,6103058,0,PD4845a-RNA2_rnaseq 6101640,Human,9606,S0814,1177,CGP Core Sequencing 10%2F12 to 09%2F13,standard,pass,4683992,from:175 to:225,EGAN00001085925,,Homo sapiens,,,,1505637,PD4845a-RNA2_rnaseq,PD4845a-RNA2,,,168,EGAS00001000377,1,0,0,We propose to definitively characterise the somatic genetics of triple negative breast cancer through generation of comprehensive catalogues of somatic mutations in breast cancer cases by high coverage genome sequencing coupled with integrated transcriptomic and methylation analyses.,2410,Triple Negative Breast Cancer RNA Sequencing,Homo_sapiens (CGP_GRCh37.NCBI.allchr_MT),,Triple Negative Breast Cancer RNA Sequencing,,