From 60a011faa9cef73a79b1ce4d75eebb2f938a9d9f Mon Sep 17 00:00:00 2001
From: Marina Gourtovaia <mg8@sanger.ac.uk>
Date: Tue, 10 Dec 2024 12:31:50 +0000
Subject: [PATCH] Updated tests for autoqc review job creation.

---
 Changes                     |  5 +++++
 t/20-function-autoqc.t      | 26 ++++++++++++--------------
 t/data/samplesheet_8747.csv |  2 +-
 3 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/Changes b/Changes
index a6fa0556..340c2332 100644
--- a/Changes
+++ b/Changes
@@ -1,6 +1,11 @@
 LIST OF CHANGES
 ---------------
 
+ - Changed/extended tests for npg_pipeline::function::autoqc to ensure that
+   the tests work with changes in https://github.com/wtsi-npg/npg_qc/pull/895
+   (study_specific assessment is disabled for lanes). Tested that a pools that
+   has samples from multiple studies does not cause problems.
+
 release 68.7.0 (2024-12-02)
  - npg_pipeline::function::autoqc
    - Simplified the flow of the code.
diff --git a/t/20-function-autoqc.t b/t/20-function-autoqc.t
index 4deb5e0a..4d9104ce 100644
--- a/t/20-function-autoqc.t
+++ b/t/20-function-autoqc.t
@@ -8,9 +8,8 @@ use File::Slurp;
 use Log::Log4perl qw/:levels/;
 
 use t::util;
-
-use_ok('npg_pipeline::function::autoqc');
 use_ok('st::api::lims');
+use_ok('npg_pipeline::function::autoqc');
 use_ok('npg_tracking::glossary::composition');
 use_ok('npg_tracking::glossary::rpt');
 use_ok('npg_pipeline::product');
@@ -604,24 +603,16 @@ subtest 'review' => sub {
   );
 
   $da = $qc->create();
-  ok ($da && (@{$da} == 10), '10 definitions returned');
+  ok ($da && (@{$da} == 9), '9 definitions returned');
   my %definitions = map { $_->composition->freeze2rpt => $_ } @{$da};
   my @expected_rpt_lists = qw/ 8747:1:1  8747:1:2  8747:1:3
                                8747:2:4  8747:2:5  8747:2:6
-                               8747:3:7  8747:3:8  8747:3:9
-                               8747:7 /;
+                               8747:3:7  8747:3:8  8747:3:9 /;
   is_deeply ([sort keys %definitions], \@expected_rpt_lists,
     'definitions are for correct entities');
 
-  my $d = $definitions{'8747:7'};
-  my $expected_command = q{qc --check=review --rpt_list="8747:7" } .
-    qq{--filename_root=8747_7 --qc_out=$archive_dir/lane7/qc } .
-    qq{--qc_in=$archive_dir/lane7/qc --conf_path=t/data/release/config/qc_review } .
-    qq{--runfolder_path=$rf_path};
-  is ($d->command, $expected_command, 'correct command for lane-level job');
-
-  $d = $definitions{'8747:1:1'};
-  $expected_command = q{qc --check=review --rpt_list="8747:1:1" } .
+  my $d = $definitions{'8747:1:1'};
+  my $expected_command = q{qc --check=review --rpt_list="8747:1:1" } .
     qq{--filename_root=8747_1#1 --qc_out=$archive_dir/lane1/plex1/qc } .
     qq{--qc_in=$archive_dir/lane1/plex1/qc --conf_path=t/data/release/config/qc_review } .
     qq{--runfolder_path=$rf_path};
@@ -654,6 +645,13 @@ subtest 'review' => sub {
     resource          => $default
   );
 
+  # Lane 6 has samples from two studies - should not cause problems
+  # creating jobs.
+  my $with_control = 0;
+  is_deeply(
+    [st::api::lims->new(id_run => 8747, position => 6)->study_ids($with_control)],
+    [qw(2410 82)], 'lane 6 samples belong to two different studies'
+  );
   $da = $qc->create();
   ok ($da && (@{$da} == 14), '14 definitions returned');
   %definitions = map { $_->composition->freeze2rpt => $_ } @{$da};
diff --git a/t/data/samplesheet_8747.csv b/t/data/samplesheet_8747.csv
index 6704cda4..e576a4a2 100644
--- a/t/data/samplesheet_8747.csv
+++ b/t/data/samplesheet_8747.csv
@@ -31,7 +31,7 @@ Lane,Sample_ID,Sample_Name,GenomeFolder,Index,bait_name,default_library_type,def
 6,6093527,ERS183152,,TATCTA,,qPCR only,TTAGGCAT,,is1@sanger.ac.uk jec@sanger.ac.uk kdj@sanger.ac.uk ncb@sanger.ac.uk nw6@sanger.ac.uk rw4@sanger.ac.uk sm11@sanger.ac.uk,is1@sanger.ac.uk jec@sanger.ac.uk kdj@sanger.ac.uk nw6@sanger.ac.uk rw4@sanger.ac.uk,ncb@sanger.ac.uk sm11@sanger.ac.uk,jec@sanger.ac.uk,,0,0,,,dag1_mut2 6093527,zebrafish,7955,S4301,82,ZF molecular phenotype,standard,,,from:70 to:270,ERS183152,,Danio rerio,,3%27 end enriched mRNA from morphologically abnormal embryos from dag1 knockout incross 2. A 6 base indexing sequence (TATCTA) is bases 5 to 10 of read 1 followed by polyT.  More information describing the mutant phenotype can be found at the Wellcome Trust Sanger Institute Zebrafish Mutation Project website http%3A%2F%2Fwww.sanger.ac.uk%2Fcgi-bin%2FProjects%2FD_rerio%2Fzmp%2Fsearch.pl%3Fq%3Dzmp_phD,,1503351,dag1_mut2,Zebrafish dag1 mut2,Danio_rerio (zv9),,168,ERP001559,1,0,0,Total RNA was extracted from wild type and mutant zebrafish embryos.  Double stranded cDNA representing the 3%27 ends of transcripts was made by a variety of methods%2C including polyT priming and 3%27 pull down on magentic beads.   Some samples included indexing test experiments where a sequence barcode was placed within one of the sequence reads.. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria)%2C please see http%3A%2F%2Fwww.sanger.ac.uk%2Fdatasharing%2F,82,ZF molecular phenotype,,,Zebrafish transcript profiling,3,
 6,6093528,ERS183153,,AAGTTA,,qPCR only,TGACCACT,,is1@sanger.ac.uk jec@sanger.ac.uk kdj@sanger.ac.uk ncb@sanger.ac.uk nw6@sanger.ac.uk rw4@sanger.ac.uk sm11@sanger.ac.uk,is1@sanger.ac.uk jec@sanger.ac.uk kdj@sanger.ac.uk nw6@sanger.ac.uk rw4@sanger.ac.uk,ncb@sanger.ac.uk sm11@sanger.ac.uk,jec@sanger.ac.uk,,0,0,,,dag1_wt2 6093528,zebrafish,7955,S4301,82,ZF molecular phenotype,standard,,,from:70 to:270,ERS183153,,Danio rerio,,3%27 end enriched mRNA from morphologically normal sibling embryos from dag1 knockout incross 2. A 6 base indexing sequence (AAGTTA) is bases 5 to 10 of read 1 followed by polyT.  More information describing the mutant phenotype can be found at the Wellcome Trust Sanger Institute Zebrafish Mutation Project website http%3A%2F%2Fwww.sanger.ac.uk%2Fcgi-bin%2FProjects%2FD_rerio%2Fzmp%2Fsearch.pl%3Fq%3Dzmp_phD,,1503352,dag1_wt2,Zebrafish dag1 wt2,Danio_rerio (zv9),,168,ERP001559,1,0,0,Total RNA was extracted from wild type and mutant zebrafish embryos.  Double stranded cDNA representing the 3%27 ends of transcripts was made by a variety of methods%2C including polyT priming and 3%27 pull down on magentic beads.   Some samples included indexing test experiments where a sequence barcode was placed within one of the sequence reads.. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria)%2C please see http%3A%2F%2Fwww.sanger.ac.uk%2Fdatasharing%2F,82,ZF molecular phenotype,,,Zebrafish transcript profiling,4,
 6,6093529,ERS183154,,GTAGAC,,qPCR only,ACAGTGGT,,is1@sanger.ac.uk jec@sanger.ac.uk kdj@sanger.ac.uk ncb@sanger.ac.uk nw6@sanger.ac.uk rw4@sanger.ac.uk sm11@sanger.ac.uk,is1@sanger.ac.uk jec@sanger.ac.uk kdj@sanger.ac.uk nw6@sanger.ac.uk rw4@sanger.ac.uk,ncb@sanger.ac.uk sm11@sanger.ac.uk,jec@sanger.ac.uk,,0,0,,,dag1_mut3 6093529,zebrafish,7955,S4301,82,ZF molecular phenotype,standard,,,from:70 to:270,ERS183154,,Danio rerio,,3%27 end enriched mRNA from morphologically abnormal embryos from dag1 knockout incross 3. A 6 base indexing sequence (GTAGAC) is bases 5 to 10 of read 1 followed by polyT.  More information describing the mutant phenotype can be found at the Wellcome Trust Sanger Institute Zebrafish Mutation Project website http%3A%2F%2Fwww.sanger.ac.uk%2Fcgi-bin%2FProjects%2FD_rerio%2Fzmp%2Fsearch.pl%3Fq%3Dzmp_phD,,1503353,dag1_mut3,Zebrafish dag1 mut3,Danio_rerio (zv9),,168,ERP001559,1,0,0,Total RNA was extracted from wild type and mutant zebrafish embryos.  Double stranded cDNA representing the 3%27 ends of transcripts was made by a variety of methods%2C including polyT priming and 3%27 pull down on magentic beads.   Some samples included indexing test experiments where a sequence barcode was placed within one of the sequence reads.. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria)%2C please see http%3A%2F%2Fwww.sanger.ac.uk%2Fdatasharing%2F,82,ZF molecular phenotype,,,Zebrafish transcript profiling,5,
-6,6093530,ERS183155,,TTAATC,,qPCR only,GCCAATGT,,is1@sanger.ac.uk jec@sanger.ac.uk kdj@sanger.ac.uk ncb@sanger.ac.uk nw6@sanger.ac.uk rw4@sanger.ac.uk sm11@sanger.ac.uk,is1@sanger.ac.uk jec@sanger.ac.uk kdj@sanger.ac.uk nw6@sanger.ac.uk rw4@sanger.ac.uk,ncb@sanger.ac.uk sm11@sanger.ac.uk,jec@sanger.ac.uk,,0,0,,,dag1_wt3 6093530,zebrafish,7955,S4301,82,ZF molecular phenotype,standard,,,from:70 to:270,ERS183155,,Danio rerio,,3%27 end enriched mRNA from morphologically normal sibling embryos from dag1 knockout incross 3. A 6 base indexing sequence (TTAATC) is bases 5 to 10 of read 1 followed by polyT.  More information describing the mutant phenotype can be found at the Wellcome Trust Sanger Institute Zebrafish Mutation Project website http%3A%2F%2Fwww.sanger.ac.uk%2Fcgi-bin%2FProjects%2FD_rerio%2Fzmp%2Fsearch.pl%3Fq%3Dzmp_phD,,1503354,dag1_wt3,Zebrafish dag1 wt3,Danio_rerio (zv9),,168,ERP001559,1,0,0,Total RNA was extracted from wild type and mutant zebrafish embryos.  Double stranded cDNA representing the 3%27 ends of transcripts was made by a variety of methods%2C including polyT priming and 3%27 pull down on magentic beads.   Some samples included indexing test experiments where a sequence barcode was placed within one of the sequence reads.. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria)%2C please see http%3A%2F%2Fwww.sanger.ac.uk%2Fdatasharing%2F,82,ZF molecular phenotype,,,Zebrafish transcript profiling,6,
+6,6093530,ERS183155,,TTAATC,,qPCR only,GCCAATGT,,is1@sanger.ac.uk jec@sanger.ac.uk kdj@sanger.ac.uk ncb@sanger.ac.uk nw6@sanger.ac.uk rw4@sanger.ac.uk sm11@sanger.ac.uk,is1@sanger.ac.uk jec@sanger.ac.uk kdj@sanger.ac.uk nw6@sanger.ac.uk rw4@sanger.ac.uk,ncb@sanger.ac.uk sm11@sanger.ac.uk,jec@sanger.ac.uk,,0,0,,,dag1_wt3 6093530,zebrafish,7955,S4301,82,ZF molecular phenotype,standard,,,from:70 to:270,ERS183155,,Danio rerio,,3%27 end enriched mRNA from morphologically normal sibling embryos from dag1 knockout incross 3. A 6 base indexing sequence (TTAATC) is bases 5 to 10 of read 1 followed by polyT.  More information describing the mutant phenotype can be found at the Wellcome Trust Sanger Institute Zebrafish Mutation Project website http%3A%2F%2Fwww.sanger.ac.uk%2Fcgi-bin%2FProjects%2FD_rerio%2Fzmp%2Fsearch.pl%3Fq%3Dzmp_phD,,1503354,dag1_wt3,Zebrafish dag1 wt3,Danio_rerio (zv9),,168,ERP001559,1,0,0,Total RNA was extracted from wild type and mutant zebrafish embryos.  Double stranded cDNA representing the 3%27 ends of transcripts was made by a variety of methods%2C including polyT priming and 3%27 pull down on magentic beads.   Some samples included indexing test experiments where a sequence barcode was placed within one of the sequence reads.. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria)%2C please see http%3A%2F%2Fwww.sanger.ac.uk%2Fdatasharing%2F,2410,ZF molecular phenotype,,,Zebrafish transcript profiling,6,
 6,4405872,phiX_for_spiked_buffers,,ACAACGCAAT,,,ACAACGCAAT,,hps@sanger.ac.uk pc10@sanger.ac.uk,pc10@sanger.ac.uk,hps@sanger.ac.uk,hps@sanger.ac.uk,,1,0,,,PhiX (10Jan12),,,,,,standard,,,,,,,,,,1255141,phiX_for_spiked_buffers,,,,168,,1,0,0,None,198,Illumina Controls,,,,168,
 7,6101639,EGAN00001085924,,,,Custom,,,as16@sanger.ac.uk cdt@sanger.ac.uk cs4@sanger.ac.uk las@sanger.ac.uk lm5@sanger.ac.uk sd5@sanger.ac.uk sm2@sanger.ac.uk som@sanger.ac.uk,as16@sanger.ac.uk cs4@sanger.ac.uk las@sanger.ac.uk sd5@sanger.ac.uk som@sanger.ac.uk,cdt@sanger.ac.uk cs4@sanger.ac.uk lm5@sanger.ac.uk sm2@sanger.ac.uk,sm2@sanger.ac.uk,,0,0,6103057,0,PD4845a-RNA_rnaseq 6101639,Human,9606,S0814,1177,CGP Core Sequencing 10%2F12 to 09%2F13,standard,pass,4683991,from:175 to:225,EGAN00001085924,,Homo sapiens,,,,1505636,PD4845a-RNA_rnaseq,PD4845a-RNA,,,168,EGAS00001000377,1,0,0,We propose to definitively characterise the somatic genetics of triple negative breast cancer through generation of comprehensive catalogues of somatic mutations in breast cancer cases by high coverage genome sequencing coupled with integrated transcriptomic and methylation analyses.,2410,Triple Negative Breast Cancer RNA Sequencing,Homo_sapiens (CGP_GRCh37.NCBI.allchr_MT),,Triple Negative Breast Cancer RNA Sequencing,,
 8,6101640,EGAN00001085925,,,,Custom2,,,as16@sanger.ac.uk cdt@sanger.ac.uk cs4@sanger.ac.uk las@sanger.ac.uk lm5@sanger.ac.uk sd5@sanger.ac.uk sm2@sanger.ac.uk som@sanger.ac.uk,as16@sanger.ac.uk cs4@sanger.ac.uk las@sanger.ac.uk sd5@sanger.ac.uk som@sanger.ac.uk,cdt@sanger.ac.uk cs4@sanger.ac.uk lm5@sanger.ac.uk sm2@sanger.ac.uk,sm2@sanger.ac.uk,,0,0,6103058,0,PD4845a-RNA2_rnaseq 6101640,Human,9606,S0814,1177,CGP Core Sequencing 10%2F12 to 09%2F13,standard,pass,4683992,from:175 to:225,EGAN00001085925,,Homo sapiens,,,,1505637,PD4845a-RNA2_rnaseq,PD4845a-RNA2,,,168,EGAS00001000377,1,0,0,We propose to definitively characterise the somatic genetics of triple negative breast cancer through generation of comprehensive catalogues of somatic mutations in breast cancer cases by high coverage genome sequencing coupled with integrated transcriptomic and methylation analyses.,2410,Triple Negative Breast Cancer RNA Sequencing,Homo_sapiens (CGP_GRCh37.NCBI.allchr_MT),,Triple Negative Breast Cancer RNA Sequencing,,