-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added tests for single-end diagnostics.
- Loading branch information
Showing
6 changed files
with
260 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
145 changes: 145 additions & 0 deletions
145
include/kaori/handlers/DualBarcodesSingleEndWithDiagnostics.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
#ifndef KAORI_DUAL_BARCODES_SINGLE_END_WITH_DIAGNOSTICS_HPP | ||
#define KAORI_DUAL_BARCODES_SINGLE_END_WITH_DIAGNOSTICS_HPP | ||
|
||
#include "DualBarcodesSingleEnd.hpp" | ||
#include "CombinatorialBarcodesSingleEnd.hpp" | ||
#include "../utils.hpp" | ||
|
||
/** | ||
* @file DualBarcodesSingleEndWithDiagnostics.hpp | ||
* | ||
* @brief Process dual barcodes with extra diagnostics. | ||
*/ | ||
|
||
namespace kaori { | ||
|
||
/** | ||
* @brief Handler for dual barcodes with extra diagnostics. | ||
* | ||
* This provides the same information as `DualBarcodesSingleEnd` but also captures the frequency of the invalid combinations. | ||
* These frequences can be helpful for diagnosing problems with library construction. | ||
* | ||
* @tparam max_size Maximum length of the template sequences on both reads. | ||
* @tparam num_variable Number of the template sequences on both reads. | ||
*/ | ||
template<size_t max_size, size_t num_variable> | ||
class DualBarcodesSingleEndWithDiagnostics { | ||
public: | ||
/** | ||
* @param[in] template_seq Pointer to a character array containing the template sequence. | ||
* @param template_length Length of the template. | ||
* This should be less than or equal to `max_size`. | ||
* @param barcode_pools Pools of known barcode sequences for each variable region in the template. | ||
* Each pool should have the same length, and corresponding values across pools define a specific combination of barcodes. | ||
* @param options Optional parameters. | ||
*/ | ||
DualBarcodesSingleEndWithDiagnostics( | ||
const char* template_seq, | ||
size_t template_length, | ||
const std::vector<BarcodePool>& barcode_pools, | ||
const typename DualBarcodesSingleEnd<max_size>::Options& options | ||
) : | ||
dual_handler(template_seq, template_length, barcode_pools, options), | ||
|
||
combo_handler(template_seq, template_length, barcode_pools, | ||
[&]{ | ||
typename CombinatorialBarcodesSingleEnd<max_size, num_variable>::Options combopt; | ||
combopt.use_first = options.use_first; | ||
|
||
combopt.max_mismatches = options.max_mismatches; | ||
combopt.strand = options.strand; | ||
|
||
// we allow duplicates in the trie for each individual barcode, as only the combinations are unique in the dual barcode setup. | ||
combopt.duplicates = DuplicateAction::FIRST; | ||
return combopt; | ||
}() | ||
) | ||
{} | ||
|
||
private: | ||
DualBarcodesSingleEnd<max_size> dual_handler; | ||
CombinatorialBarcodesSingleEnd<max_size, num_variable> combo_handler; | ||
|
||
public: | ||
/** | ||
*@cond | ||
*/ | ||
struct State { | ||
State() {} | ||
State(typename DualBarcodesSingleEnd<max_size>::State ds, typename CombinatorialBarcodesSingleEnd<max_size, num_variable>::State cs) : dual_state(std::move(ds)), combo_state(std::move(cs)) {} | ||
|
||
/** | ||
* @cond | ||
*/ | ||
typename DualBarcodesSingleEnd<max_size>::State dual_state; | ||
typename CombinatorialBarcodesSingleEnd<max_size, num_variable>::State combo_state; | ||
/** | ||
* @endcond | ||
*/ | ||
}; | ||
|
||
State initialize() const { | ||
return State(dual_handler.initialize(), combo_handler.initialize()); | ||
} | ||
|
||
void reduce(State& s) { | ||
dual_handler.reduce(s.dual_state); | ||
combo_handler.reduce(s.combo_state); | ||
} | ||
|
||
constexpr static bool use_names = false; | ||
/** | ||
*@endcond | ||
*/ | ||
|
||
public: | ||
/** | ||
*@cond | ||
*/ | ||
void process(State& state, const std::pair<const char*, const char*>& x) const { | ||
// Only searching for combinations if we couldn't find a proper dual barcode match. | ||
if (!dual_handler.process(state.dual_state, x)) { | ||
combo_handler.process(state.combo_state, x); | ||
} | ||
} | ||
/** | ||
*@endcond | ||
*/ | ||
|
||
public: | ||
/** | ||
* Sort the invalid combinations for easier frequency counting. | ||
* Combinations are sorted by the first index, and then the second index. | ||
*/ | ||
void sort() { | ||
combo_handler.sort(); | ||
} | ||
|
||
/** | ||
* @return Vector containing the frequency of each valid combination. | ||
* This has length equal to the number of valid dual barcode combinations (i.e., the length of `barcode_pool1` and `barcode_pool2` in the constructor). | ||
* Each entry contains the count for the corresponding dual barcode combination. | ||
*/ | ||
const std::vector<int>& get_counts() const { | ||
return dual_handler.get_counts(); | ||
} | ||
|
||
/** | ||
* @return All invalid combinations encountered by the handler. | ||
* In each array, the first and second element contains the indices of known barcodes in the first and second pools, respectively. | ||
*/ | ||
const std::vector<std::array<int, num_variable> >& get_combinations() const { | ||
return combo_handler.get_combinations(); | ||
} | ||
|
||
/** | ||
* @return Total number of reads processed by the handler. | ||
*/ | ||
int get_total() const { | ||
return dual_handler.get_total(); | ||
} | ||
}; | ||
|
||
} | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
94 changes: 94 additions & 0 deletions
94
tests/src/handlers/DualBarcodesSingleEndWithDiagnostics.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
#include <gtest/gtest.h> | ||
#include "kaori/handlers/DualBarcodesSingleEndWithDiagnostics.hpp" | ||
#include "kaori/process_data.hpp" | ||
#include "byteme/RawBufferReader.hpp" | ||
#include "../utils.h" | ||
#include <string> | ||
|
||
class DualBarcodesSingleEndWithDiagnosticsTest : public testing::Test { | ||
protected: | ||
DualBarcodesSingleEndWithDiagnosticsTest() : | ||
constant("AAAA----CGGCAGCT------TTTT"), | ||
variables1(std::vector<std::string>{ "AAAA", "CCCC", "GGGG", "TTTT" }), | ||
variables2(std::vector<std::string>{ "ACACAC", "TGTGTG", "AGAGAG", "CTCTCT" }) | ||
{} | ||
|
||
std::string constant1, constant; | ||
std::vector<std::string> variables1; | ||
std::vector<std::string> variables2; | ||
|
||
template<size_t max_size> | ||
using Options = typename kaori::DualBarcodesSingleEnd<max_size>::Options; | ||
}; | ||
|
||
TEST_F(DualBarcodesSingleEndWithDiagnosticsTest, BasicFirst) { | ||
std::vector<std::string> seq { | ||
"cagcatcgatcgtgaAAAACCCCCGGCAGCTTGTGTGTTTTacggaggaga", // index 1 | ||
"AAAAGGGGCGGCAGCTAGAGAGTTTTaaaaccccggg", // index 2 | ||
"AAAAGGGGCGGCAGCTTGTGTGTTTTaaaaccccggg", // invalid: (2, 1) | ||
"aaccaccaAAAATTTTCGGCAGCTACACACTTTTaaaaccccggg", // invalid (3, 0) | ||
"cagacgagcagcgagcagcatcagca" // matches nothing. | ||
}; | ||
std::string fq = convert_to_fastq(seq); | ||
|
||
byteme::RawBufferReader reader(reinterpret_cast<const unsigned char*>(fq.c_str()), fq.size()); | ||
|
||
kaori::DualBarcodesSingleEndWithDiagnostics<32, 2> stuff( | ||
constant.c_str(), constant.size(), | ||
{ kaori::BarcodePool(variables1), kaori::BarcodePool(variables2) }, | ||
Options<32>() | ||
); | ||
kaori::process_single_end_data(&reader, stuff); | ||
|
||
EXPECT_EQ(stuff.get_total(), 5); | ||
EXPECT_EQ(stuff.get_counts()[0], 0); | ||
EXPECT_EQ(stuff.get_counts()[1], 1); | ||
EXPECT_EQ(stuff.get_counts()[2], 1); | ||
EXPECT_EQ(stuff.get_counts()[3], 0); | ||
|
||
stuff.sort(); | ||
const auto& combos = stuff.get_combinations(); | ||
ASSERT_EQ(combos.size(), 2); | ||
EXPECT_EQ(combos[0][0], 2); | ||
EXPECT_EQ(combos[0][1], 1); | ||
|
||
EXPECT_EQ(combos[1][0], 3); | ||
EXPECT_EQ(combos[1][1], 0); | ||
} | ||
|
||
TEST_F(DualBarcodesSingleEndWithDiagnosticsTest, WithDuplicates) { | ||
// Inserting duplicate entries, even though the combinations are unique. | ||
variables1.push_back("AAAA"); | ||
EXPECT_EQ(variables1.front(), variables1.back()); | ||
variables2.push_back("CTCTCT"); | ||
EXPECT_EQ(variables2[3], variables2[4]); | ||
|
||
std::vector<std::string> seq{ | ||
"cagcatcgatcgtgaAAAAAAAACGGCAGCTACACACTTTTcagcatcgatcgtga", // ok, index 1 | ||
"cagcatcgatcgtgaAAAAAAAACGGCAGCTCTCTCTTTTTcagcatcgatcgtga", // ok, index 4 | ||
"cagcatcgatcgtgaAAAAAAAACGGCAGCTTGTGTGTTTTcagcatcgatcgtga" // invalid (0, 1), as the first hit is reported. | ||
}; | ||
std::string fq = convert_to_fastq(seq); | ||
|
||
byteme::RawBufferReader reader(reinterpret_cast<const unsigned char*>(fq.c_str()), fq.size()); | ||
|
||
kaori::DualBarcodesSingleEndWithDiagnostics<32, 2> stuff( | ||
constant.c_str(), constant.size(), | ||
{ kaori::BarcodePool(variables1), kaori::BarcodePool(variables2) }, | ||
Options<32>() | ||
); | ||
kaori::process_single_end_data(&reader, stuff); | ||
|
||
EXPECT_EQ(stuff.get_total(), 3); | ||
EXPECT_EQ(stuff.get_counts()[0], 1); | ||
EXPECT_EQ(stuff.get_counts()[1], 0); | ||
EXPECT_EQ(stuff.get_counts()[2], 0); | ||
EXPECT_EQ(stuff.get_counts()[3], 0); | ||
EXPECT_EQ(stuff.get_counts()[4], 1); | ||
|
||
stuff.sort(); | ||
const auto& combos = stuff.get_combinations(); | ||
ASSERT_EQ(combos.size(), 1); | ||
EXPECT_EQ(combos.front()[0], 0); | ||
EXPECT_EQ(combos.front()[1], 1); | ||
} |