diff --git a/lib/npg_pipeline/function/autoqc.pm b/lib/npg_pipeline/function/autoqc.pm index 87d3ba62c..12cc1cd42 100644 --- a/lib/npg_pipeline/function/autoqc.pm +++ b/lib/npg_pipeline/function/autoqc.pm @@ -210,12 +210,14 @@ sub _generate_command { my $check = $self->qc_to_run(); my $archive_path = $self->archive_path; + my $no_archive_path = $self->no_archive_path; my $recal_path = $self->recalibrated_path; my $dp_archive_path = $dp->path($self->archive_path); - my $cache10k_path = $dp->short_files_cache_path($archive_path); + my $dp_no_archive_path = $dp->path($self->no_archive_path); + my $cache10k_path = $dp->short_files_cache_path($no_archive_path); my $qc_out_path = $dp->qc_out_path($archive_path); - my $bamfile_path = $dp->file_path($dp_archive_path, ext => 'bam'); + my $bamfile_path = $dp->file_path($dp_no_archive_path, ext => 'bam'); my $cramfile_path = $dp->file_path($dp_archive_path, ext => 'cram'); my $fq1_filepath = $dp->file_path($cache10k_path, ext => 'fastq', suffix => '1'); diff --git a/lib/npg_pipeline/function/p4_stage1_analysis.pm b/lib/npg_pipeline/function/p4_stage1_analysis.pm index c314b8fba..3905e5bc0 100644 --- a/lib/npg_pipeline/function/p4_stage1_analysis.pm +++ b/lib/npg_pipeline/function/p4_stage1_analysis.pm @@ -285,13 +285,14 @@ sub _generate_command_params { my $archive_path = $self->archive_path; my $basecall_path = $self->basecall_path; my $no_cal_path = $self->recalibrated_path; + my $no_archive_path = $self->no_archive_path; my $bam_basecall_path = $self->bam_basecall_path; my $lp_archive_path = $lane_product->path($self->archive_path); - my $full_bam_name = $bam_basecall_path . q{/}. $id_run . q{_} .$position. q{.bam}; + my $full_bam_name = $no_archive_path . q{/}. $id_run . q{_} .$position. q{.bam}; $p4_params{qc_check_id_run} = $id_run; # used by tag_metrics qc check - $p4_params{qc_check_qc_in_dir} = $bam_basecall_path; # used by tag_metrics qc check + $p4_params{qc_check_qc_in_dir} = $no_archive_path; # used by tag_metrics qc check $p4_params{qc_check_qc_out_dir} = $lane_product->qc_out_path($self->archive_path); # used by tag_metrics qc check $p4_params{tileviz_dir} = $lane_product->tileviz_path_prefix($self->archive_path); # used for tileviz $p4_params{outdatadir} = $no_cal_path; # base for all (most?) outputs @@ -299,10 +300,10 @@ sub _generate_command_params { $p4_params{rpt_list} = $lane_product->rpt_list; $p4_params{subsetsubpath} = $lane_product->short_files_cache_path($archive_path); $p4_params{seqchksum_file} = $bam_basecall_path . q[/] . $id_run . q[_] . $position . q{.post_i2b.seqchksum}; # full name for the lane-level seqchksum file - $p4_params{filtered_bam} = $no_cal_path . q[/] . $id_run . q[_] . $position . q{.bam}; # full name for the spatially filtered lane-level file + $p4_params{filtered_bam} = $no_archive_path . q[/] . $id_run . q[_] . $position . q{.bam}; # full name for the spatially filtered lane-level file $p4_params{unfiltered_cram_file} = $no_cal_path . q[/] . $id_run . q[_] . $position . q{.unfiltered.cram}; # full name for spatially unfiltered lane-level cram file - $p4_params{md5filename} = $no_cal_path . q[/] . $id_run . q[_] . $position . q{.bam.md5}; # full name for the md5 for the spatially filtered lane-level file - $p4_params{split_prefix} = $no_cal_path; # location for split bam files + $p4_params{md5filename} = $no_archive_path . q[/] . $id_run . q[_] . $position . q{.bam.md5}; # full name for the md5 for the spatially filtered lane-level file + $p4_params{split_prefix} = $no_archive_path; # location for split bam files my $job_name = join q/_/, (q{p4_stage1}, $id_run, $position, $self->timestamp()); $job_name = q{'} . $job_name . q{'}; diff --git a/lib/npg_pipeline/function/seq_alignment.pm b/lib/npg_pipeline/function/seq_alignment.pm index 7741854db..39d3701aa 100644 --- a/lib/npg_pipeline/function/seq_alignment.pm +++ b/lib/npg_pipeline/function/seq_alignment.pm @@ -197,7 +197,9 @@ sub _alignment_command { ## no critic (Subroutines::ProhibitExcessComplexity) my $is_plex = defined $tag_index; my $archive_path = $self->archive_path; + my $no_archive_path = $self->no_archive_path; my $dp_archive_path = $dp->path($archive_path); + my $dp_no_archive_path = $dp->path($no_archive_path); my $recal_path= $self->recalibrated_path; #? my $uses_patterned_flowcell = $self->uses_patterned_flowcell; @@ -232,7 +234,7 @@ sub _alignment_command { ## no critic (Subroutines::ProhibitExcessComplexity) (join q{_}, q{tmp}, $self->_job_id), $name_root; - my $bfs_input_file = $dp_archive_path . q[/] . $dp->file_name(ext => 'bam'); + my $bfs_input_file = $dp_no_archive_path . q[/] . $dp->file_name(ext => 'bam'); my $cfs_input_file = $dp_archive_path . q[/] . $dp->file_name(ext => 'cram'); my $af_input_file = $dp->file_name(ext => 'json', suffix => 'bam_alignment_filter_metrics'); my $fq1_filepath = File::Spec->catdir($cache10k_path, $dp->file_name(ext => 'fastq', suffix => '1')); diff --git a/t/20-function-autoqc.t b/t/20-function-autoqc.t index a7b2d4497..b88a84250 100644 --- a/t/20-function-autoqc.t +++ b/t/20-function-autoqc.t @@ -37,6 +37,7 @@ my $hiseq_rf = $util->create_runfolder($tmp, analysis_path => 'BAM_basecalls_20180802'}); my $archive_dir = $hiseq_rf->{'archive_path'}; +my $no_archive_dir = $hiseq_rf->{'no_archive_path'}; my $rf_path = $hiseq_rf->{'runfolder_path'}; fcopy('t/data/run_params/runParameters.hiseq.xml', "$rf_path/runParameters.xml") or die 'Fail to copy run param file'; @@ -314,7 +315,7 @@ subtest 'ref_match' => sub { my $t = $d->composition->get_component(0)->tag_index; is ($d->command, sprintf( 'qc --check=ref_match --rpt_list=%s --filename_root=%s --qc_out=%s --input_files=%s --input_files=%s', - qq["1234:8:${t}"], "1234_8#${t}", "$archive_dir/lane8/plex${t}/qc", "$archive_dir/lane8/plex${t}/.npg_cache_10000/1234_8#${t}_1.fastq", "$archive_dir/lane8/plex${t}/.npg_cache_10000/1234_8#${t}_2.fastq"), + qq["1234:8:${t}"], "1234_8#${t}", "$archive_dir/lane8/plex${t}/qc", "$no_archive_dir/lane8/plex${t}/.npg_cache_10000/1234_8#${t}_1.fastq", "$no_archive_dir/lane8/plex${t}/.npg_cache_10000/1234_8#${t}_2.fastq"), "ref_match command for lane 8 tag $t"); } }; diff --git a/t/20-function-p4_stage1_analysis.t b/t/20-function-p4_stage1_analysis.t index 8b5b3561f..e0416362b 100644 --- a/t/20-function-p4_stage1_analysis.t +++ b/t/20-function-p4_stage1_analysis.t @@ -126,6 +126,7 @@ subtest 'check_save_arguments' => sub { my $h = from_json(slurp($pfname)); my $no_cal_path = $intensities_dir . '/BAM_basecalls_09-07-2009/no_cal'; + my $no_archive_path = $intensities_dir . '/BAM_basecalls_09-07-2009/no_archive'; $expected = { 'assign' => [ @@ -141,24 +142,24 @@ subtest 'check_save_arguments' => sub { 'qc_check_qc_out_dir' => $no_cal_path . '/archive/lane1/qc', 'i2b_lane' => '1', 'bwa_executable' => 'bwa0_6', - 'filtered_bam' => $no_cal_path . '/1234_1.bam', + 'filtered_bam' => $no_archive_path . '/1234_1.bam', 'samtools_executable' => 'samtools', 'i2b_library_name' => '51021', 'outdatadir' => $no_cal_path, 'subsetsubpath' => $no_cal_path . '/archive/lane1/.npg_cache_10000', 'i2b_run_path' => $dir . q[/nfs/sf45/IL2/analysis/123456_IL2_1234], 'teepot_tempdir' => '.', - 'split_prefix' => $no_cal_path, + 'split_prefix' => $no_archive_path, 'i2b_intensity_dir' => $intensities_dir, 'i2b_sample_aliases' => 'SRS000147', 'phix_alignment_method' => 'bwa_aln_se', - 'md5filename' => $no_cal_path . '/1234_1.bam.md5', + 'md5filename' => $no_archive_path . '/1234_1.bam.md5', 'teepot_mval' => '2G', 'i2b_runfolder' => '123456_IL2_1234', 'i2b_study_name' => '"SRP000031: 1000Genomes Project Pilot 1"', 'i2b_basecalls_dir' => $intensities_dir . '/BaseCalls', 'teepot_wval' => '500', - 'qc_check_qc_in_dir' => $intensities_dir . '/BAM_basecalls_09-07-2009', + 'qc_check_qc_in_dir' => $no_archive_path, 'qc_check_id_run' => '1234', 'cluster_count' => '500077065', 'seed_frac' => '1234.00002000', @@ -247,6 +248,7 @@ subtest 'check_save_arguments_minimap2' => sub { my $h = from_json(slurp($pfname)); my $no_cal_path = $intensities_dir . '/BAM_basecalls_09-07-2009/no_cal'; + my $no_archive_path = $intensities_dir . '/BAM_basecalls_09-07-2009/no_archive'; $expected = { 'assign' => [ @@ -262,18 +264,18 @@ subtest 'check_save_arguments_minimap2' => sub { 'qc_check_qc_out_dir' => $no_cal_path . '/archive/lane1/qc', 'i2b_lane' => '1', 'bwa_executable' => 'bwa0_6', - 'filtered_bam' => $no_cal_path . '/1234_1.bam', + 'filtered_bam' => $no_archive_path . '/1234_1.bam', 'samtools_executable' => 'samtools', 'i2b_library_name' => '51021', 'outdatadir' => $no_cal_path, 'subsetsubpath' => $no_cal_path . '/archive/lane1/.npg_cache_10000', 'i2b_run_path' => $dir . q[/nfs/sf45/IL2/analysis/123456_IL2_1234], 'teepot_tempdir' => '.', - 'split_prefix' => $no_cal_path, + 'split_prefix' => $no_archive_path, 'i2b_intensity_dir' => $intensities_dir, 'i2b_sample_aliases' => 'SRS000147', 'phix_alignment_method' => 'minimap2', - 'md5filename' => $no_cal_path . '/1234_1.bam.md5', + 'md5filename' => $no_archive_path . '/1234_1.bam.md5', 'teepot_mval' => '2G', 'i2b_runfolder' => '123456_IL2_1234', 'i2b_study_name' => '"SRP000031: 1000Genomes Project Pilot 1"', diff --git a/t/20-function-seq_alignment.t b/t/20-function-seq_alignment.t index d1fa58b93..4b9bd11e8 100644 --- a/t/20-function-seq_alignment.t +++ b/t/20-function-seq_alignment.t @@ -206,7 +206,7 @@ subtest 'basic functionality' => sub { qq{ && qc --check bam_flagstats --filename_root 12597_4#3 --qc_in $qc_in --qc_out $qc_out --rpt_list "12597:4:3" --input_files $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/lane4/plex3/12597_4#3.cram} . qq{ && qc --check bam_flagstats --filename_root 12597_4#3_phix --qc_in $qc_in --qc_out $qc_out --rpt_list "12597:4:3" --subset phix --input_files $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/lane4/plex3/12597_4#3.cram} . q{ && qc --check alignment_filter_metrics --filename_root 12597_4#3 --qc_in $PWD --qc_out }.$qc_out.q{ --rpt_list "12597:4:3" --input_files 12597_4#3_bam_alignment_filter_metrics.json} . - qq{ && qc --check rna_seqc --filename_root 12597_4#3 --qc_in $qc_in --qc_out } . $qc_out . qq{ --rpt_list "12597:4:3" --input_files $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/lane4/plex3/12597_4#3.bam}. + qq{ && qc --check rna_seqc --filename_root 12597_4#3 --qc_in $qc_in --qc_out } . $qc_out . qq{ --rpt_list "12597:4:3" --input_files $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_archive/lane4/plex3/12597_4#3.bam}. q{ '}; my $mem = 32000; @@ -345,7 +345,7 @@ subtest 'RNASeq analysis' => sub { qq{ && qc --check bam_flagstats --filename_root 13066_8 --qc_in $qc_in --qc_out $qc_out --rpt_list "13066:8" --input_files $dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/archive/lane8/13066_8.cram} . qq{ && qc --check bam_flagstats --filename_root 13066_8_phix --qc_in $qc_in --qc_out $qc_out --rpt_list "13066:8" --subset phix --input_files $dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/archive/lane8/13066_8.cram} . q{ && qc --check alignment_filter_metrics --filename_root 13066_8 --qc_in $PWD --qc_out } . $qc_out . qq{ --rpt_list "13066:8" --input_files 13066_8_bam_alignment_filter_metrics.json} . - qq{ && qc --check rna_seqc --filename_root 13066_8 --qc_in $qc_in --qc_out } . $qc_out . qq{ --rpt_list "13066:8" --input_files $dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/archive/lane8/13066_8.bam '}; + qq{ && qc --check rna_seqc --filename_root 13066_8 --qc_in $qc_in --qc_out } . $qc_out . qq{ --rpt_list "13066:8" --input_files $dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_archive/lane8/13066_8.bam '}; is ($d->command, $command, 'correct command for lane 8'); is ($d->memory, 32000, 'memory'); diff --git a/t/util.pm b/t/util.pm index 7b3ea3764..3ab4d6709 100644 --- a/t/util.pm +++ b/t/util.pm @@ -82,9 +82,10 @@ sub create_runfolder { $paths->{'basecall_path'} = join q[/], $paths->{'intensity_path'}, q[BaseCalls]; if ($names->{'analysis_path'}) { - $paths->{'analysis_path'} = join q[/], $paths->{'intensity_path'}, $names->{'analysis_path'}; - $paths->{'nocal_path'} = join q[/], $paths->{'analysis_path'}, q[no_cal]; - $paths->{'archive_path'} = join q[/], $paths->{'nocal_path'}, q[archive]; + $paths->{'analysis_path'} = join q[/], $paths->{'intensity_path'}, $names->{'analysis_path'}; + $paths->{'nocal_path'} = join q[/], $paths->{'analysis_path'}, q[no_cal]; + $paths->{'archive_path'} = join q[/], $paths->{'nocal_path'}, q[archive]; + $paths->{'no_archive_path'} = join q[/], $paths->{'analysis_path'}, q[no_archive]; } make_path(values %{$paths});