Skip to content

Commit

Permalink
Merge pull request #3269 from rrahn/fix/issue_3266
Browse files Browse the repository at this point in the history
Fixes #3266: Incorrect begin/end of alignment for banded alignment
  • Loading branch information
rrahn authored Jul 16, 2024
2 parents ca4d668 + bfb1e32 commit 4cc708d
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 11 deletions.
16 changes: 12 additions & 4 deletions include/seqan3/alignment/pairwise/alignment_algorithm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -619,7 +619,11 @@ class alignment_algorithm :
row_index_type{this->alignment_state.optimum.row_index}};
// At some point this needs to be refactored so that it is not necessary to adapt the coordinate.
if constexpr (traits_t::is_banded)
{
res.end_positions.second += res.end_positions.first - this->trace_matrix.band_col_index;
res.end_positions.first = this->to_original_sequence1_position(res.end_positions.first);
res.end_positions.second = this->to_original_sequence2_position(res.end_positions.second);
}
}

if constexpr (traits_t::compute_begin_positions)
Expand All @@ -631,8 +635,10 @@ class alignment_algorithm :
detail::row_index_type{this->alignment_state.optimum.row_index},
detail::column_index_type{this->alignment_state.optimum.column_index}};
auto trace_res = builder(this->trace_matrix.trace_path(optimum_coordinate));
res.begin_positions.first = trace_res.first_sequence_slice_positions.first;
res.begin_positions.second = trace_res.second_sequence_slice_positions.first;
res.begin_positions.first =
this->to_original_sequence1_position(trace_res.first_sequence_slice_positions.first);
res.begin_positions.second =
this->to_original_sequence2_position(trace_res.second_sequence_slice_positions.first);

if constexpr (traits_t::compute_sequence_alignment)
res.alignment = std::move(trace_res.alignment);
Expand Down Expand Up @@ -697,8 +703,10 @@ class alignment_algorithm :

if constexpr (traits_t::compute_end_positions)
{
res.end_positions.first = this->alignment_state.optimum.column_index[simd_index];
res.end_positions.second = this->alignment_state.optimum.row_index[simd_index];
res.end_positions.first =
this->to_original_sequence1_position(this->alignment_state.optimum.column_index[simd_index]);
res.end_positions.second =
this->to_original_sequence2_position(this->alignment_state.optimum.row_index[simd_index]);
}

callback(std::move(res));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,25 +140,24 @@ class alignment_matrix_policy
* band starts in the origin and ends in the sink.
*/
template <typename sequence1_t, typename sequence2_t>
constexpr auto slice_sequences(sequence1_t & sequence1,
sequence2_t & sequence2,
align_cfg::band_fixed_size const & band) const noexcept
constexpr auto
slice_sequences(sequence1_t & sequence1, sequence2_t & sequence2, align_cfg::band_fixed_size const & band) noexcept
{
size_t seq1_size = std::ranges::distance(sequence1);
size_t seq2_size = std::ranges::distance(sequence2);

auto trim_sequence1 = [&]() constexpr
{
size_t begin_pos = std::max<std::ptrdiff_t>(band.lower_diagonal - 1, 0);
seq1_slice_offset = std::max<std::ptrdiff_t>(band.lower_diagonal - 1, 0);
size_t end_pos = std::min<std::ptrdiff_t>(band.upper_diagonal + seq2_size, seq1_size);
return sequence1 | views::slice(begin_pos, end_pos);
return sequence1 | views::slice(seq1_slice_offset, end_pos);
};

auto trim_sequence2 = [&]() constexpr
{
size_t begin_pos = std::abs(std::min<std::ptrdiff_t>(band.upper_diagonal + 1, 0));
seq2_slice_offset = std::abs(std::min<std::ptrdiff_t>(band.upper_diagonal + 1, 0));
size_t end_pos = std::min<std::ptrdiff_t>(seq1_size - band.lower_diagonal, seq2_size);
return sequence2 | views::slice(begin_pos, end_pos);
return sequence2 | views::slice(seq2_slice_offset, end_pos);
};

return std::tuple{trim_sequence1(), trim_sequence2()};
Expand Down Expand Up @@ -192,10 +191,37 @@ class alignment_matrix_policy
++trace_matrix_iter;
}

/*!\brief Converts a sliced position in the alignment matrix to the corresponding position in the original sequence 1.
*
* \param position The position in the sliced alignment matrix.
* \return The corresponding position in the original sequence 1.
*
* Only changes the position if the sequence was sliced due to band configuration.
*/
constexpr size_t to_original_sequence1_position(size_t position) const noexcept
{
return position + seq1_slice_offset;
}

/*!\brief Converts a sliced position in the alignment matrix to the corresponding position in the original sequence 2.
*
* \param position The position in the sliced alignment matrix.
* \return The corresponding position in the original sequence 2.
*
* Only changes the position if the sequence was sliced due to band configuration.
*/
constexpr size_t to_original_sequence2_position(size_t position) const noexcept
{
return position + seq2_slice_offset;
}

score_matrix_t score_matrix{}; //!< The scoring matrix.
trace_matrix_t trace_matrix{}; //!< The trace matrix if needed.

typename score_matrix_t::iterator score_matrix_iter{}; //!< The matrix iterator over the score matrix.
typename trace_matrix_t::iterator trace_matrix_iter{}; //!< The matrix iterator over the trace matrix.

size_t seq1_slice_offset{}; //!< The offset of the first sequence slice.
size_t seq2_slice_offset{}; //!< The offset of the second sequence slice;
};
} // namespace seqan3::detail
32 changes: 32 additions & 0 deletions test/unit/alignment/pairwise/global_affine_banded_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,35 @@ TEST_F(pairwise_global_affine_banded, invalid_band_last_cell_not_covered)
fixture.config | seqan3::align_cfg::output_score{});
EXPECT_THROW(result_range.begin(), seqan3::invalid_alignment_configuration);
}

TEST(banded_alignment_issue3266_test, wrong_begin_and_end_position)
{
using namespace seqan3;
using namespace seqan3::literals;

auto const configGeneral = align_cfg::scoring_scheme{nucleotide_scoring_scheme{match_score{1}, mismatch_score{-1}}}
| align_cfg::gap_cost_affine{align_cfg::open_score{0}, align_cfg::extension_score{-1}}
| align_cfg::method_global{align_cfg::free_end_gaps_sequence1_leading{true},
align_cfg::free_end_gaps_sequence2_leading{true},
align_cfg::free_end_gaps_sequence1_trailing{true},
align_cfg::free_end_gaps_sequence2_trailing{true}};

auto const configBanded =
configGeneral | align_cfg::band_fixed_size{align_cfg::lower_diagonal{-40}, align_cfg::upper_diagonal{-20}};

//0 1 2 3 4
//01234567890123456789012345678901234567890
std::pair p{"CGTCTA"_dna4, "AAACCCGGGTTTAAACCCGGGTTTCGTGTACCCCCCCCCCC"_dna4};
// CGTCTA

auto general_results = align_pairwise(p, configGeneral);
auto general_res = *std::ranges::begin(general_results);
auto banded_results = align_pairwise(p, configBanded);
auto banded_res = *std::ranges::begin(banded_results);

EXPECT_EQ(general_res.score(), banded_res.score());
EXPECT_EQ(general_res.sequence1_begin_position(), banded_res.sequence1_begin_position());
EXPECT_EQ(general_res.sequence2_begin_position(), banded_res.sequence2_begin_position());
EXPECT_EQ(general_res.sequence1_end_position(), banded_res.sequence1_end_position());
EXPECT_EQ(general_res.sequence2_end_position(), banded_res.sequence2_end_position());
}

0 comments on commit 4cc708d

Please sign in to comment.