Skip to content

Commit

Permalink
Merge pull request #815 from mgcam/use_single_aggregation_method
Browse files Browse the repository at this point in the history
Use one aggregation method for merges.
  • Loading branch information
dkj authored Feb 7, 2024
2 parents e1f8f79 + fd54779 commit fcdbf6c
Show file tree
Hide file tree
Showing 77 changed files with 145 additions and 123 deletions.
10 changes: 10 additions & 0 deletions Changes
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
LIST OF CHANGES
---------------

- Use st::api::lims->aggregate_libraries() method for both 'merge_lanes' and
'merge_by_library' pipeline options. This is a breaking change as far as
archival and deletion of NovaSeq Standard workflow data is concerned.
Key change for lane merging for this data will be that tag 0 and tag 888 will
not be merged across the lanes.
The NovaSeq Standard workflow, where there is only one input port, is
different to the more general merging across lanes where the (claimed) same
library has been sequenced. But this is not a valid reason to maintain separate
code.

release 67.1.1
- Fixed correct pp collection root for MiSeq

Expand Down
114 changes: 44 additions & 70 deletions MANIFEST

Large diffs are not rendered by default.

97 changes: 61 additions & 36 deletions lib/npg_pipeline/base.pm
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use namespace::autoclean;
use MooseX::Getopt::Meta::Attribute::Trait::NoGetopt;
use POSIX qw(strftime);
use Math::Random::Secure qw{irand};
use List::MoreUtils qw{any};
use List::MoreUtils qw{any uniq};
use File::Basename;
use Readonly;

Expand Down Expand Up @@ -358,49 +358,51 @@ sub _build_products {
if ($self->has_product_rpt_list) {
@data_lims = ($self->lims);
} else {
my @positions = $self->positions;
@lane_lims = map { $self->lims4lane($_) } @positions;
@lane_lims = map { $self->lims4lane($_) } $self->positions;

if ($self->merge_lanes) {
@data_lims = $self->lims->aggregate_xlanes(@positions);
} else {
my %tag0_lims = ();
if ($self->is_indexed) {
%tag0_lims = map { $_->position => $_->create_tag_zero_object() }
grep { $_->is_pool } @lane_lims;
}

if ($self->merge_lanes || $self->merge_by_library) {

my $all_lims = $self->lims->aggregate_libraries(\@lane_lims);
@data_lims = @{$all_lims->{'singles'}}; # Might be empty.

my %tag0_lims = ();
if ($self->is_indexed) {
%tag0_lims = map { $_->position => $_->create_tag_zero_object() }
grep { $_->is_pool } @lane_lims;
# merge_lanes option implies a merge across all lanes.
if ($self->merge_lanes && (@lane_lims > 1)) {
$self->_check_lane_merge_is_viable(
\@lane_lims, $all_lims->{'singles'}, $all_lims->{'merges'});
}

if ($self->merge_by_library) {
my $all_lims = $self->lims->aggregate_libraries(\@lane_lims);
@data_lims = @{$all_lims->{'singles'}}; # Might be empty.
# Tag zero LIMS objects for all lanes, merged or unmerged.
push @data_lims, map { $tag0_lims{$_} } (sort keys %tag0_lims);

if ( @{$all_lims->{'merges'}} ) {
# If the libraries are merged across a subset of lanes under analysis,
# the 'selected_lanes' flag needs to be flipped to true.
if (!$self->_selected_lanes) {
my $rpt_list = $all_lims->{'merges'}->[0]->rpt_list;;
my $num_components =
npg_tracking::glossary::composition::factory::rpt_list
->new(rpt_list => $rpt_list)
->create_composition()->num_components();
if ($num_components != scalar @lane_lims) {
$self->_set_selected_lanes(1);
}
# Tag zero LIMS objects for all pooled lanes, merged or unmerged.
push @data_lims, map { $tag0_lims{$_} } (sort keys %tag0_lims);

if ( @{$all_lims->{'merges'}} ) {
# If the libraries are merged across a subset of lanes under analysis,
# the 'selected_lanes' flag needs to be flipped to true.
if (!$self->_selected_lanes) {
my $rpt_list = $all_lims->{'merges'}->[0]->rpt_list;;
my $num_components =
npg_tracking::glossary::composition::factory::rpt_list
->new(rpt_list => $rpt_list)
->create_composition()->num_components();
if ($num_components != scalar @lane_lims) {
$self->_set_selected_lanes(1);
}
push @data_lims, @{$all_lims->{'merges'}};
}
push @data_lims, @{$all_lims->{'merges'}};
}

} else {
# To keep backward-compatible order of pipeline invocations, add
# tag zero LIMS object at the end of other objects for the lane.
@data_lims = map {
exists $tag0_lims{$_->position} ?
} else {
# To keep backward-compatible order of pipeline invocations, add
# tag zero LIMS object at the end of other objects for the lane.
@data_lims = map {
exists $tag0_lims{$_->position} ?
($_->children, $tag0_lims{$_->position}) : $_
} @lane_lims;
}
} @lane_lims;
}
}

Expand Down Expand Up @@ -442,6 +444,29 @@ sub _lims_object2product {
);
}

sub _check_lane_merge_is_viable {
my ($self, $lane_lims, $singles, $merges) = @_;

my @num_plexes = uniq
map { scalar @{$_} }
map { [grep { !$_->is_control } @{$_}] }
map { [$_->children()] } @{$lane_lims};

my $m = 'merge_lane option is not viable: ';
if (@num_plexes > 1) {
$self->logcroak($m . 'different number of samples in lanes');
}
if (any { !$_->is_control } @{$singles}) {
$self->logcroak($m . 'unmerged samples are present after aggregation');
}
if (@{$merges} != $num_plexes[0]) {
$self->logcroak($m . 'number of merged samples after aggregation ' .
'differs from the number of samples in a lane');
}

return 1;
}

__PACKAGE__->meta->make_immutable;

1;
Expand Down
3 changes: 2 additions & 1 deletion t/10-base.t
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,14 @@ subtest 'products - merging (or not) lanes' => sub {

local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = 't/data/products/samplesheet_novaseq4lanes.csv';
cp 't/data/run_params/runParameters.novaseq.xml', "$rf_path/runParameters.xml";
cp 't/data/novaseq/210111_A00513_0447_AHJ55JDSXY/RunInfo.xml', "$rf_path/RunInfo.xml";
$b = npg_pipeline::base->new(runfolder_path => $rf_path, id_run => 999);
ok ($b->merge_lanes, 'merge_lanes flag is set');
lives_ok {$products = $b->products} 'products hash created for NovaSeq run';
ok (exists $products->{'lanes'}, 'products lanes key exists');
is (scalar @{$products->{'lanes'}}, 4, 'four lane product');
ok (exists $products->{'data_products'}, 'products data_products key exists');
is (scalar @{$products->{'data_products'}}, 23, '23 data products');
is (scalar @{$products->{'data_products'}}, 29, '29 data products');

local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = 't/data/products/samplesheet_rapidrun_nopool.csv';
cp 't/data/run_params/runParameters.hiseq.rr.xml', "$rf_path/runParameters.xml";
Expand Down
24 changes: 15 additions & 9 deletions t/10-runfolder_scaffold.t
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,13 @@ subtest 'top level scaffold' => sub {
};

subtest 'product level scaffold, NovaSeq all lanes' => sub {
plan tests => 99;
plan tests => 101;

my $util = t::util->new();
my $rfh = $util->create_runfolder();
my $rf_path = $rfh->{'runfolder_path'};
fcopy 't/data/run_params/runParameters.novaseq.xml', "$rf_path/runParameters.xml";
fcopy 't/data/run_params/runParameters.novaseq.xml', "$rf_path/runParameters.xml";
fcopy 't/data/novaseq/210111_A00513_0447_AHJ55JDSXY/RunInfo.xml', "$rf_path/RunInfo.xml";
local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = 't/data/products/samplesheet_novaseq4lanes.csv';

my $rfs = Moose::Meta::Class->create_anon_class(
Expand All @@ -95,6 +96,8 @@ subtest 'product level scaffold, NovaSeq all lanes' => sub {
push @dirs, (map {join q[/], $_, 'qc'} @original);
push @dirs, (map {join q[/], $_, 'tileviz_'.$_} @original);
push @dirs, (map {join q[/], $_, '.npg_cache_10000'} @original);
push @dirs, (map {join q[/], $_, 'plex0/qc'} @original);
push @dirs, (map {join q[/], $_, 'plex888/qc'} @original);
map { ok (-d $_, "$_ created") } map {join q[/], $apath, $_} @dirs;

for my $lane (@original) {
Expand All @@ -106,7 +109,7 @@ subtest 'product level scaffold, NovaSeq all lanes' => sub {
like ($content, qr/No tileviz data available for this lane/, 'info exists');
}

@original = map {'plex' . $_} (0 .. 21, 888);
@original = map {'plex' . $_} (1 .. 21);
@dirs = @original;
push @dirs, (map {join q[/], $_, 'qc'} @original);
push @dirs, (map {join q[/], $_, '.npg_cache_10000'} @original);
Expand All @@ -115,16 +118,17 @@ subtest 'product level scaffold, NovaSeq all lanes' => sub {
my $tileviz_index = join q[/], $apath, 'tileviz', 'index.html';
ok (-e $tileviz_index, 'tileviz index created');
my @lines = read_file($tileviz_index);
is (scalar @lines, 7, 'tileviz index contains sevel lines');
is (scalar @lines, 7, 'tileviz index contains seven lines');
};

subtest 'product level scaffold, NovaSeq selected lanes' => sub {
plan tests => 175;
plan tests => 165;

my $util = t::util->new();
my $rfh = $util->create_runfolder();
my $rf_path = $rfh->{'runfolder_path'};
fcopy 't/data/run_params/runParameters.novaseq.xml', "$rf_path/runParameters.xml";
fcopy 't/data/run_params/runParameters.novaseq.xml', "$rf_path/runParameters.xml";
fcopy 't/data/novaseq/210111_A00513_0447_AHJ55JDSXY/RunInfo.xml', "$rf_path/RunInfo.xml";
local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = 't/data/products/samplesheet_novaseq4lanes.csv';

my $rfs = Moose::Meta::Class->create_anon_class(
Expand All @@ -149,12 +153,14 @@ subtest 'product level scaffold, NovaSeq selected lanes' => sub {
push @dirs, (map {join q[/], $_, 'qc'} @original);
push @dirs, (map {join q[/], $_, 'tileviz_'.$_} @original);
push @dirs, (map {join q[/], $_, '.npg_cache_10000'} @original);
push @dirs, (map {join q[/], $_, 'plex0/qc'} @original);
push @dirs, (map {join q[/], $_, 'plex888/qc'} @original);
map { ok (-d $_, "$_ created") } map {join q[/], $apath, $_} @dirs;

@dirs = qw/lane1 lane4/;
map { ok (!-e $_, "$_ not created") } map {join q[/], $apath, $_} @dirs;

@original = map {'lane2-3/plex' . $_} (0 .. 21, 888);
@original = map {'lane2-3/plex' . $_} (1 .. 21);
@dirs = @original;
push @dirs, (map {join q[/], $_, 'qc'} @original);
push @dirs, (map {join q[/], $_, '.npg_cache_10000'} @original);
Expand All @@ -172,7 +178,7 @@ subtest 'product level scaffold, NovaSeq selected lanes' => sub {
"link for lane $l file is not created");
}

for my $t ( (0 .. 21, 888) ) {
for my $t ( (1 .. 21) ) {
my $name = "999_2-3#${t}.cram";
my $file = "$napath/lane2-3/plex${t}/stage1/$name";
ok ((-l $file), "link for plex $t is created");
Expand Down Expand Up @@ -252,4 +258,4 @@ subtest 'product level scaffold, library merge for NovaSeqX' => sub {
}
};

1;
1;
12 changes: 7 additions & 5 deletions t/15-product-release.t
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ my $runfolder_path = 't/data/novaseq/180709_A00538_0010_BH3FCMDRXX';
my $timestamp = '20180701-123456';

subtest 'expected_files' => sub {
plan tests => 1;
plan tests => 2;

my $archiver = $cls->new_object
(conf_path => "t/data/release/config/archive_on",
Expand All @@ -52,10 +52,11 @@ subtest 'expected_files' => sub {
timestamp => $timestamp,
qc_schema => $qc);

my $product = shift @{$archiver->products->{data_products}};

my $path = "$runfolder_path/Data/Intensities/" .
'BAM_basecalls_20180805-013153/no_cal/archive/plex1';
my $product = $archiver->products->{data_products}->[4];
is ($product->rpt_list, '26291:1:1;26291:2:1', 'correct product');

my @expected = sort map { "$path/$_" }
('26291#1_F0x900.stats',
'26291#1_F0xB00.stats',
Expand Down Expand Up @@ -88,8 +89,7 @@ subtest 'expected_unaligned_files' => sub {
timestamp => $timestamp,
qc_schema => $qc);

my $product = shift @{$archiver->products->{data_products}};

my $product = $archiver->products->{data_products}->[4];
my $path = "$runfolder_path/Data/Intensities/" .
'BAM_basecalls_20180805-013153/no_cal/archive/plex1';
my @expected = sort map { "$path/$_" }
Expand All @@ -105,3 +105,5 @@ subtest 'expected_unaligned_files' => sub {
diag explain \@observed;
};

1;

Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
5 January 2024

Both lane1 and lane2 directories are expected to contain
subdirectories plex0 and plex888 with all relevant files
and subdirectories. Only lane1/plex0 directory is present
in this testrunfolder as a representative example.
Empty file.
Empty file.
Empty file.
Empty file.

This file was deleted.

Empty file.
Empty file.
Empty file.

This file was deleted.

Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.

0 comments on commit fcdbf6c

Please sign in to comment.