Skip to content

Commit

Permalink
Added tests for single-end diagnostics.
Browse files Browse the repository at this point in the history
  • Loading branch information
LTLA committed Oct 5, 2023
1 parent 01b2d40 commit 98bfd9b
Show file tree
Hide file tree
Showing 6 changed files with 260 additions and 9 deletions.
10 changes: 9 additions & 1 deletion include/kaori/handlers/CombinatorialBarcodesSingleEnd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,12 @@ class CombinatorialBarcodesSingleEnd {
* This should be less than or equal to `max_size`.
* @param barcode_pools Array containing the known barcode sequences for each of the variable regions, in the order of their appearance in the template sequence.
* @param options Optional parameters.
*
* @tparam BarcodePoolContainer Some iterable container of `BarcodePool` instances,
* usually either a `std::vector` or a `std::array`.
*/
CombinatorialBarcodesSingleEnd(const char* template_seq, size_t template_length, const std::array<BarcodePool, num_variable>& barcode_pools, const Options& options) :
template<class BarcodePoolContainer>
CombinatorialBarcodesSingleEnd(const char* template_seq, size_t template_length, const BarcodePoolContainer& barcode_pools, const Options& options) :
forward(search_forward(options.strand)),
reverse(search_reverse(options.strand)),
max_mm(options.max_mismatches),
Expand All @@ -75,6 +79,10 @@ class CombinatorialBarcodesSingleEnd {
if (regions.size() != num_variable) {
throw std::runtime_error("expected " + std::to_string(num_variable) + " variable regions in the constant template");
}
if (barcode_pools.size() != num_variable) {
throw std::runtime_error("length of 'barcode_pools' should be equal to the number of variable regions");
}

for (size_t i = 0; i < num_variable; ++i) {
size_t rlen = regions[i].second - regions[i].first;
size_t vlen = barcode_pools[i].length;
Expand Down
18 changes: 10 additions & 8 deletions include/kaori/handlers/DualBarcodesSingleEnd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ class DualBarcodesSingleEnd {
}

private:
void process_first(State& state, const std::pair<const char*, const char*>& x) const {
bool process_first(State& state, const std::pair<const char*, const char*>& x) const {
auto deets = constant_matcher.initialize(x.first, x.second - x.first);

while (!deets.finished) {
Expand All @@ -181,21 +181,22 @@ class DualBarcodesSingleEnd {
auto id = forward_match(x.first, deets, state).first;
if (id >= 0) {
++state.counts[id];
return;
return true;
}
}

if (reverse && deets.reverse_mismatches <= max_mm) {
auto id = reverse_match(x.first, deets, state).first;
if (id >= 0) {
++state.counts[id];
return;
return true;
}
}
}
return false;
}

void process_best(State& state, const std::pair<const char*, const char*>& x) const {
bool process_best(State& state, const std::pair<const char*, const char*>& x) const {
auto deets = constant_matcher.initialize(x.first, x.second - x.first);
bool found = false;
int best_mismatches = max_mm + 1;
Expand Down Expand Up @@ -235,6 +236,7 @@ class DualBarcodesSingleEnd {
if (found) {
++state.counts[best_id];
}
return found;
}

public:
Expand Down Expand Up @@ -262,13 +264,13 @@ class DualBarcodesSingleEnd {
return;
}

void process(State& state, const std::pair<const char*, const char*>& x) const {
bool process(State& state, const std::pair<const char*, const char*>& x) const {
++state.total;
if (use_first) {
process_first(state, x);
return process_first(state, x);
} else {
process_best(state, x);
return process_best(state, x);
}
++state.total;
}

static constexpr bool use_names = false;
Expand Down
145 changes: 145 additions & 0 deletions include/kaori/handlers/DualBarcodesSingleEndWithDiagnostics.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
#ifndef KAORI_DUAL_BARCODES_SINGLE_END_WITH_DIAGNOSTICS_HPP
#define KAORI_DUAL_BARCODES_SINGLE_END_WITH_DIAGNOSTICS_HPP

#include "DualBarcodesSingleEnd.hpp"
#include "CombinatorialBarcodesSingleEnd.hpp"
#include "../utils.hpp"

/**
* @file DualBarcodesSingleEndWithDiagnostics.hpp
*
* @brief Process dual barcodes with extra diagnostics.
*/

namespace kaori {

/**
* @brief Handler for dual barcodes with extra diagnostics.
*
* This provides the same information as `DualBarcodesSingleEnd` but also captures the frequency of the invalid combinations.
* These frequences can be helpful for diagnosing problems with library construction.
*
* @tparam max_size Maximum length of the template sequences on both reads.
* @tparam num_variable Number of the template sequences on both reads.
*/
template<size_t max_size, size_t num_variable>
class DualBarcodesSingleEndWithDiagnostics {
public:
/**
* @param[in] template_seq Pointer to a character array containing the template sequence.
* @param template_length Length of the template.
* This should be less than or equal to `max_size`.
* @param barcode_pools Pools of known barcode sequences for each variable region in the template.
* Each pool should have the same length, and corresponding values across pools define a specific combination of barcodes.
* @param options Optional parameters.
*/
DualBarcodesSingleEndWithDiagnostics(
const char* template_seq,
size_t template_length,
const std::vector<BarcodePool>& barcode_pools,
const typename DualBarcodesSingleEnd<max_size>::Options& options
) :
dual_handler(template_seq, template_length, barcode_pools, options),

combo_handler(template_seq, template_length, barcode_pools,
[&]{
typename CombinatorialBarcodesSingleEnd<max_size, num_variable>::Options combopt;
combopt.use_first = options.use_first;

combopt.max_mismatches = options.max_mismatches;
combopt.strand = options.strand;

// we allow duplicates in the trie for each individual barcode, as only the combinations are unique in the dual barcode setup.
combopt.duplicates = DuplicateAction::FIRST;
return combopt;
}()
)
{}

private:
DualBarcodesSingleEnd<max_size> dual_handler;
CombinatorialBarcodesSingleEnd<max_size, num_variable> combo_handler;

public:
/**
*@cond
*/
struct State {
State() {}
State(typename DualBarcodesSingleEnd<max_size>::State ds, typename CombinatorialBarcodesSingleEnd<max_size, num_variable>::State cs) : dual_state(std::move(ds)), combo_state(std::move(cs)) {}

/**
* @cond
*/
typename DualBarcodesSingleEnd<max_size>::State dual_state;
typename CombinatorialBarcodesSingleEnd<max_size, num_variable>::State combo_state;
/**
* @endcond
*/
};

State initialize() const {
return State(dual_handler.initialize(), combo_handler.initialize());
}

void reduce(State& s) {
dual_handler.reduce(s.dual_state);
combo_handler.reduce(s.combo_state);
}

constexpr static bool use_names = false;
/**
*@endcond
*/

public:
/**
*@cond
*/
void process(State& state, const std::pair<const char*, const char*>& x) const {
// Only searching for combinations if we couldn't find a proper dual barcode match.
if (!dual_handler.process(state.dual_state, x)) {
combo_handler.process(state.combo_state, x);
}
}
/**
*@endcond
*/

public:
/**
* Sort the invalid combinations for easier frequency counting.
* Combinations are sorted by the first index, and then the second index.
*/
void sort() {
combo_handler.sort();
}

/**
* @return Vector containing the frequency of each valid combination.
* This has length equal to the number of valid dual barcode combinations (i.e., the length of `barcode_pool1` and `barcode_pool2` in the constructor).
* Each entry contains the count for the corresponding dual barcode combination.
*/
const std::vector<int>& get_counts() const {
return dual_handler.get_counts();
}

/**
* @return All invalid combinations encountered by the handler.
* In each array, the first and second element contains the indices of known barcodes in the first and second pools, respectively.
*/
const std::vector<std::array<int, num_variable> >& get_combinations() const {
return combo_handler.get_combinations();
}

/**
* @return Total number of reads processed by the handler.
*/
int get_total() const {
return dual_handler.get_total();
}
};

}

#endif
1 change: 1 addition & 0 deletions include/kaori/kaori.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "handlers/DualBarcodesPairedEnd.hpp"
#include "handlers/DualBarcodesPairedEndWithDiagnostics.hpp"
#include "handlers/DualBarcodesSingleEnd.hpp"
#include "handlers/DualBarcodesSingleEndWithDiagnostics.hpp"
#include "handlers/RandomBarcodeSingleEnd.hpp"
#include "handlers/SingleBarcodePairedEnd.hpp"
#include "handlers/SingleBarcodeSingleEnd.hpp"
Expand Down
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ add_executable(
src/handlers/DualBarcodesPairedEnd.cpp
src/handlers/DualBarcodesPairedEndWithDiagnostics.cpp
src/handlers/DualBarcodesSingleEnd.cpp
src/handlers/DualBarcodesSingleEndWithDiagnostics.cpp
src/handlers/RandomBarcodeSingleEnd.cpp
)

Expand Down
94 changes: 94 additions & 0 deletions tests/src/handlers/DualBarcodesSingleEndWithDiagnostics.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#include <gtest/gtest.h>
#include "kaori/handlers/DualBarcodesSingleEndWithDiagnostics.hpp"
#include "kaori/process_data.hpp"
#include "byteme/RawBufferReader.hpp"
#include "../utils.h"
#include <string>

class DualBarcodesSingleEndWithDiagnosticsTest : public testing::Test {
protected:
DualBarcodesSingleEndWithDiagnosticsTest() :
constant("AAAA----CGGCAGCT------TTTT"),
variables1(std::vector<std::string>{ "AAAA", "CCCC", "GGGG", "TTTT" }),
variables2(std::vector<std::string>{ "ACACAC", "TGTGTG", "AGAGAG", "CTCTCT" })
{}

std::string constant1, constant;
std::vector<std::string> variables1;
std::vector<std::string> variables2;

template<size_t max_size>
using Options = typename kaori::DualBarcodesSingleEnd<max_size>::Options;
};

TEST_F(DualBarcodesSingleEndWithDiagnosticsTest, BasicFirst) {
std::vector<std::string> seq {
"cagcatcgatcgtgaAAAACCCCCGGCAGCTTGTGTGTTTTacggaggaga", // index 1
"AAAAGGGGCGGCAGCTAGAGAGTTTTaaaaccccggg", // index 2
"AAAAGGGGCGGCAGCTTGTGTGTTTTaaaaccccggg", // invalid: (2, 1)
"aaccaccaAAAATTTTCGGCAGCTACACACTTTTaaaaccccggg", // invalid (3, 0)
"cagacgagcagcgagcagcatcagca" // matches nothing.
};
std::string fq = convert_to_fastq(seq);

byteme::RawBufferReader reader(reinterpret_cast<const unsigned char*>(fq.c_str()), fq.size());

kaori::DualBarcodesSingleEndWithDiagnostics<32, 2> stuff(
constant.c_str(), constant.size(),
{ kaori::BarcodePool(variables1), kaori::BarcodePool(variables2) },
Options<32>()
);
kaori::process_single_end_data(&reader, stuff);

EXPECT_EQ(stuff.get_total(), 5);
EXPECT_EQ(stuff.get_counts()[0], 0);
EXPECT_EQ(stuff.get_counts()[1], 1);
EXPECT_EQ(stuff.get_counts()[2], 1);
EXPECT_EQ(stuff.get_counts()[3], 0);

stuff.sort();
const auto& combos = stuff.get_combinations();
ASSERT_EQ(combos.size(), 2);
EXPECT_EQ(combos[0][0], 2);
EXPECT_EQ(combos[0][1], 1);

EXPECT_EQ(combos[1][0], 3);
EXPECT_EQ(combos[1][1], 0);
}

TEST_F(DualBarcodesSingleEndWithDiagnosticsTest, WithDuplicates) {
// Inserting duplicate entries, even though the combinations are unique.
variables1.push_back("AAAA");
EXPECT_EQ(variables1.front(), variables1.back());
variables2.push_back("CTCTCT");
EXPECT_EQ(variables2[3], variables2[4]);

std::vector<std::string> seq{
"cagcatcgatcgtgaAAAAAAAACGGCAGCTACACACTTTTcagcatcgatcgtga", // ok, index 1
"cagcatcgatcgtgaAAAAAAAACGGCAGCTCTCTCTTTTTcagcatcgatcgtga", // ok, index 4
"cagcatcgatcgtgaAAAAAAAACGGCAGCTTGTGTGTTTTcagcatcgatcgtga" // invalid (0, 1), as the first hit is reported.
};
std::string fq = convert_to_fastq(seq);

byteme::RawBufferReader reader(reinterpret_cast<const unsigned char*>(fq.c_str()), fq.size());

kaori::DualBarcodesSingleEndWithDiagnostics<32, 2> stuff(
constant.c_str(), constant.size(),
{ kaori::BarcodePool(variables1), kaori::BarcodePool(variables2) },
Options<32>()
);
kaori::process_single_end_data(&reader, stuff);

EXPECT_EQ(stuff.get_total(), 3);
EXPECT_EQ(stuff.get_counts()[0], 1);
EXPECT_EQ(stuff.get_counts()[1], 0);
EXPECT_EQ(stuff.get_counts()[2], 0);
EXPECT_EQ(stuff.get_counts()[3], 0);
EXPECT_EQ(stuff.get_counts()[4], 1);

stuff.sort();
const auto& combos = stuff.get_combinations();
ASSERT_EQ(combos.size(), 1);
EXPECT_EQ(combos.front()[0], 0);
EXPECT_EQ(combos.front()[1], 1);
}

0 comments on commit 98bfd9b

Please sign in to comment.