Skip to content

Commit

Permalink
Move BAM to no_archive
Browse files Browse the repository at this point in the history
Move intermediate BAM files to the no_archive path as we do not add them to
IRODS and this also makes them easier to delete if we need disk space urgently.
  • Loading branch information
mp15 committed Nov 20, 2019
1 parent d717d7f commit ed22a03
Show file tree
Hide file tree
Showing 7 changed files with 30 additions and 21 deletions.
6 changes: 4 additions & 2 deletions lib/npg_pipeline/function/autoqc.pm
Original file line number Diff line number Diff line change
Expand Up @@ -210,12 +210,14 @@ sub _generate_command {

my $check = $self->qc_to_run();
my $archive_path = $self->archive_path;
my $no_archive_path = $self->no_archive_path;
my $recal_path = $self->recalibrated_path;
my $dp_archive_path = $dp->path($self->archive_path);
my $cache10k_path = $dp->short_files_cache_path($archive_path);
my $dp_no_archive_path = $dp->path($self->no_archive_path);
my $cache10k_path = $dp->short_files_cache_path($no_archive_path);
my $qc_out_path = $dp->qc_out_path($archive_path);

my $bamfile_path = $dp->file_path($dp_archive_path, ext => 'bam');
my $bamfile_path = $dp->file_path($dp_no_archive_path, ext => 'bam');
my $cramfile_path = $dp->file_path($dp_archive_path, ext => 'cram');

my $fq1_filepath = $dp->file_path($cache10k_path, ext => 'fastq', suffix => '1');
Expand Down
11 changes: 6 additions & 5 deletions lib/npg_pipeline/function/p4_stage1_analysis.pm
Original file line number Diff line number Diff line change
Expand Up @@ -285,24 +285,25 @@ sub _generate_command_params {
my $archive_path = $self->archive_path;
my $basecall_path = $self->basecall_path;
my $no_cal_path = $self->recalibrated_path;
my $no_archive_path = $self->no_archive_path;
my $bam_basecall_path = $self->bam_basecall_path;
my $lp_archive_path = $lane_product->path($self->archive_path);

my $full_bam_name = $bam_basecall_path . q{/}. $id_run . q{_} .$position. q{.bam};
my $full_bam_name = $no_archive_path . q{/}. $id_run . q{_} .$position. q{.bam};

$p4_params{qc_check_id_run} = $id_run; # used by tag_metrics qc check
$p4_params{qc_check_qc_in_dir} = $bam_basecall_path; # used by tag_metrics qc check
$p4_params{qc_check_qc_in_dir} = $no_archive_path; # used by tag_metrics qc check
$p4_params{qc_check_qc_out_dir} = $lane_product->qc_out_path($self->archive_path); # used by tag_metrics qc check
$p4_params{tileviz_dir} = $lane_product->tileviz_path_prefix($self->archive_path); # used for tileviz
$p4_params{outdatadir} = $no_cal_path; # base for all (most?) outputs
$p4_params{lane_archive_path} = $lp_archive_path;
$p4_params{rpt_list} = $lane_product->rpt_list;
$p4_params{subsetsubpath} = $lane_product->short_files_cache_path($archive_path);
$p4_params{seqchksum_file} = $bam_basecall_path . q[/] . $id_run . q[_] . $position . q{.post_i2b.seqchksum}; # full name for the lane-level seqchksum file
$p4_params{filtered_bam} = $no_cal_path . q[/] . $id_run . q[_] . $position . q{.bam}; # full name for the spatially filtered lane-level file
$p4_params{filtered_bam} = $no_archive_path . q[/] . $id_run . q[_] . $position . q{.bam}; # full name for the spatially filtered lane-level file
$p4_params{unfiltered_cram_file} = $no_cal_path . q[/] . $id_run . q[_] . $position . q{.unfiltered.cram}; # full name for spatially unfiltered lane-level cram file
$p4_params{md5filename} = $no_cal_path . q[/] . $id_run . q[_] . $position . q{.bam.md5}; # full name for the md5 for the spatially filtered lane-level file
$p4_params{split_prefix} = $no_cal_path; # location for split bam files
$p4_params{md5filename} = $no_archive_path . q[/] . $id_run . q[_] . $position . q{.bam.md5}; # full name for the md5 for the spatially filtered lane-level file
$p4_params{split_prefix} = $no_archive_path; # location for split bam files

my $job_name = join q/_/, (q{p4_stage1}, $id_run, $position, $self->timestamp());
$job_name = q{'} . $job_name . q{'};
Expand Down
4 changes: 3 additions & 1 deletion lib/npg_pipeline/function/seq_alignment.pm
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,9 @@ sub _alignment_command { ## no critic (Subroutines::ProhibitExcessComplexity)
my $is_plex = defined $tag_index;

my $archive_path = $self->archive_path;
my $no_archive_path = $self->no_archive_path;
my $dp_archive_path = $dp->path($archive_path);
my $dp_no_archive_path = $dp->path($no_archive_path);
my $recal_path= $self->recalibrated_path; #?
my $uses_patterned_flowcell = $self->uses_patterned_flowcell;

Expand Down Expand Up @@ -232,7 +234,7 @@ sub _alignment_command { ## no critic (Subroutines::ProhibitExcessComplexity)
(join q{_}, q{tmp}, $self->_job_id),
$name_root;

my $bfs_input_file = $dp_archive_path . q[/] . $dp->file_name(ext => 'bam');
my $bfs_input_file = $dp_no_archive_path . q[/] . $dp->file_name(ext => 'bam');
my $cfs_input_file = $dp_archive_path . q[/] . $dp->file_name(ext => 'cram');
my $af_input_file = $dp->file_name(ext => 'json', suffix => 'bam_alignment_filter_metrics');
my $fq1_filepath = File::Spec->catdir($cache10k_path, $dp->file_name(ext => 'fastq', suffix => '1'));
Expand Down
3 changes: 2 additions & 1 deletion t/20-function-autoqc.t
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ my $hiseq_rf = $util->create_runfolder($tmp,
analysis_path => 'BAM_basecalls_20180802'});

my $archive_dir = $hiseq_rf->{'archive_path'};
my $no_archive_dir = $hiseq_rf->{'no_archive_path'};
my $rf_path = $hiseq_rf->{'runfolder_path'};
fcopy('t/data/run_params/runParameters.hiseq.xml', "$rf_path/runParameters.xml")
or die 'Fail to copy run param file';
Expand Down Expand Up @@ -314,7 +315,7 @@ subtest 'ref_match' => sub {
my $t = $d->composition->get_component(0)->tag_index;
is ($d->command, sprintf(
'qc --check=ref_match --rpt_list=%s --filename_root=%s --qc_out=%s --input_files=%s --input_files=%s',
qq["1234:8:${t}"], "1234_8#${t}", "$archive_dir/lane8/plex${t}/qc", "$archive_dir/lane8/plex${t}/.npg_cache_10000/1234_8#${t}_1.fastq", "$archive_dir/lane8/plex${t}/.npg_cache_10000/1234_8#${t}_2.fastq"),
qq["1234:8:${t}"], "1234_8#${t}", "$archive_dir/lane8/plex${t}/qc", "$no_archive_dir/lane8/plex${t}/.npg_cache_10000/1234_8#${t}_1.fastq", "$no_archive_dir/lane8/plex${t}/.npg_cache_10000/1234_8#${t}_2.fastq"),
"ref_match command for lane 8 tag $t");
}
};
Expand Down
16 changes: 9 additions & 7 deletions t/20-function-p4_stage1_analysis.t
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ subtest 'check_save_arguments' => sub {
my $h = from_json(slurp($pfname));

my $no_cal_path = $intensities_dir . '/BAM_basecalls_09-07-2009/no_cal';
my $no_archive_path = $intensities_dir . '/BAM_basecalls_09-07-2009/no_archive';

$expected = {
'assign' => [
Expand All @@ -141,24 +142,24 @@ subtest 'check_save_arguments' => sub {
'qc_check_qc_out_dir' => $no_cal_path . '/archive/lane1/qc',
'i2b_lane' => '1',
'bwa_executable' => 'bwa0_6',
'filtered_bam' => $no_cal_path . '/1234_1.bam',
'filtered_bam' => $no_archive_path . '/1234_1.bam',
'samtools_executable' => 'samtools',
'i2b_library_name' => '51021',
'outdatadir' => $no_cal_path,
'subsetsubpath' => $no_cal_path . '/archive/lane1/.npg_cache_10000',
'i2b_run_path' => $dir . q[/nfs/sf45/IL2/analysis/123456_IL2_1234],
'teepot_tempdir' => '.',
'split_prefix' => $no_cal_path,
'split_prefix' => $no_archive_path,
'i2b_intensity_dir' => $intensities_dir,
'i2b_sample_aliases' => 'SRS000147',
'phix_alignment_method' => 'bwa_aln_se',
'md5filename' => $no_cal_path . '/1234_1.bam.md5',
'md5filename' => $no_archive_path . '/1234_1.bam.md5',
'teepot_mval' => '2G',
'i2b_runfolder' => '123456_IL2_1234',
'i2b_study_name' => '"SRP000031: 1000Genomes Project Pilot 1"',
'i2b_basecalls_dir' => $intensities_dir . '/BaseCalls',
'teepot_wval' => '500',
'qc_check_qc_in_dir' => $intensities_dir . '/BAM_basecalls_09-07-2009',
'qc_check_qc_in_dir' => $no_archive_path,
'qc_check_id_run' => '1234',
'cluster_count' => '500077065',
'seed_frac' => '1234.00002000',
Expand Down Expand Up @@ -247,6 +248,7 @@ subtest 'check_save_arguments_minimap2' => sub {
my $h = from_json(slurp($pfname));

my $no_cal_path = $intensities_dir . '/BAM_basecalls_09-07-2009/no_cal';
my $no_archive_path = $intensities_dir . '/BAM_basecalls_09-07-2009/no_archive';

$expected = {
'assign' => [
Expand All @@ -262,18 +264,18 @@ subtest 'check_save_arguments_minimap2' => sub {
'qc_check_qc_out_dir' => $no_cal_path . '/archive/lane1/qc',
'i2b_lane' => '1',
'bwa_executable' => 'bwa0_6',
'filtered_bam' => $no_cal_path . '/1234_1.bam',
'filtered_bam' => $no_archive_path . '/1234_1.bam',
'samtools_executable' => 'samtools',
'i2b_library_name' => '51021',
'outdatadir' => $no_cal_path,
'subsetsubpath' => $no_cal_path . '/archive/lane1/.npg_cache_10000',
'i2b_run_path' => $dir . q[/nfs/sf45/IL2/analysis/123456_IL2_1234],
'teepot_tempdir' => '.',
'split_prefix' => $no_cal_path,
'split_prefix' => $no_archive_path,
'i2b_intensity_dir' => $intensities_dir,
'i2b_sample_aliases' => 'SRS000147',
'phix_alignment_method' => 'minimap2',
'md5filename' => $no_cal_path . '/1234_1.bam.md5',
'md5filename' => $no_archive_path . '/1234_1.bam.md5',
'teepot_mval' => '2G',
'i2b_runfolder' => '123456_IL2_1234',
'i2b_study_name' => '"SRP000031: 1000Genomes Project Pilot 1"',
Expand Down
4 changes: 2 additions & 2 deletions t/20-function-seq_alignment.t
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ subtest 'basic functionality' => sub {
qq{ && qc --check bam_flagstats --filename_root 12597_4#3 --qc_in $qc_in --qc_out $qc_out --rpt_list "12597:4:3" --input_files $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/lane4/plex3/12597_4#3.cram} .
qq{ && qc --check bam_flagstats --filename_root 12597_4#3_phix --qc_in $qc_in --qc_out $qc_out --rpt_list "12597:4:3" --subset phix --input_files $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/lane4/plex3/12597_4#3.cram} .
q{ && qc --check alignment_filter_metrics --filename_root 12597_4#3 --qc_in $PWD --qc_out }.$qc_out.q{ --rpt_list "12597:4:3" --input_files 12597_4#3_bam_alignment_filter_metrics.json} .
qq{ && qc --check rna_seqc --filename_root 12597_4#3 --qc_in $qc_in --qc_out } . $qc_out . qq{ --rpt_list "12597:4:3" --input_files $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/lane4/plex3/12597_4#3.bam}.
qq{ && qc --check rna_seqc --filename_root 12597_4#3 --qc_in $qc_in --qc_out } . $qc_out . qq{ --rpt_list "12597:4:3" --input_files $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_archive/lane4/plex3/12597_4#3.bam}.
q{ '};

my $mem = 32000;
Expand Down Expand Up @@ -345,7 +345,7 @@ subtest 'RNASeq analysis' => sub {
qq{ && qc --check bam_flagstats --filename_root 13066_8 --qc_in $qc_in --qc_out $qc_out --rpt_list "13066:8" --input_files $dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/archive/lane8/13066_8.cram} .
qq{ && qc --check bam_flagstats --filename_root 13066_8_phix --qc_in $qc_in --qc_out $qc_out --rpt_list "13066:8" --subset phix --input_files $dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/archive/lane8/13066_8.cram} .
q{ && qc --check alignment_filter_metrics --filename_root 13066_8 --qc_in $PWD --qc_out } . $qc_out . qq{ --rpt_list "13066:8" --input_files 13066_8_bam_alignment_filter_metrics.json} .
qq{ && qc --check rna_seqc --filename_root 13066_8 --qc_in $qc_in --qc_out } . $qc_out . qq{ --rpt_list "13066:8" --input_files $dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/archive/lane8/13066_8.bam '};
qq{ && qc --check rna_seqc --filename_root 13066_8 --qc_in $qc_in --qc_out } . $qc_out . qq{ --rpt_list "13066:8" --input_files $dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_archive/lane8/13066_8.bam '};

is ($d->command, $command, 'correct command for lane 8');
is ($d->memory, 32000, 'memory');
Expand Down
7 changes: 4 additions & 3 deletions t/util.pm
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,10 @@ sub create_runfolder {
$paths->{'basecall_path'} = join q[/], $paths->{'intensity_path'}, q[BaseCalls];

if ($names->{'analysis_path'}) {
$paths->{'analysis_path'} = join q[/], $paths->{'intensity_path'}, $names->{'analysis_path'};
$paths->{'nocal_path'} = join q[/], $paths->{'analysis_path'}, q[no_cal];
$paths->{'archive_path'} = join q[/], $paths->{'nocal_path'}, q[archive];
$paths->{'analysis_path'} = join q[/], $paths->{'intensity_path'}, $names->{'analysis_path'};
$paths->{'nocal_path'} = join q[/], $paths->{'analysis_path'}, q[no_cal];
$paths->{'archive_path'} = join q[/], $paths->{'nocal_path'}, q[archive];
$paths->{'no_archive_path'} = join q[/], $paths->{'analysis_path'}, q[no_archive];
}

make_path(values %{$paths});
Expand Down

0 comments on commit ed22a03

Please sign in to comment.