Skip to content

Commit

Permalink
Issue-228: Update RunInfo.xml support for NextSeq2k (#229)
Browse files Browse the repository at this point in the history
  • Loading branch information
ezralanglois authored Aug 25, 2020
1 parent d2af397 commit 9a86237
Show file tree
Hide file tree
Showing 10 changed files with 435 additions and 50 deletions.
10 changes: 10 additions & 0 deletions docs/src/changes.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
# Changes {#changes}


## v1.1.12

Date | Description
---------- | -----------
2020-08-25 | Issue-228: Update RunInfo.xml support for NextSeq2k
2020-08-25 | Issue-228: Add blue/green channel name support
2020-08-25 | Issue-228: Add reverse complement support
2020-08-25 | Issue-228: Fix memory corruption in RunInfo.xml parsing


## v1.1.11

Date | Description
Expand Down
29 changes: 10 additions & 19 deletions interop/logic/utils/channel.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,37 +17,21 @@
#include <vector>
#include <iterator>
#include "interop/util/assert.h"
#include "interop/util/string.h"
#include "interop/util/exception.h"
#include "interop/constants/enums.h"
#include "interop/model/model_exceptions.h"

namespace illumina { namespace interop { namespace logic { namespace utils
{
namespace detail
{
/** Convert character to lower case
*
* @note workarond for MSVC warning C4244: '=': conversion from 'int' to 'char', possible loss of data
*
* @param ch character
* @return lowercase character
*/
inline char tolower(const char ch)
{
return static_cast<char>(::tolower(ch));
}
}

/** Normalize a channel name by making it lower case
*
* @param channel channel name
* @return lowercase channel name
*/
inline std::string normalize(const std::string &channel)
{
std::string channel_normalized=channel;
std::transform(channel.begin(), channel.end(), channel_normalized.begin(), detail::tolower);
return channel_normalized;
return util::to_lower(channel);
}
/** Normalize a collection of channel names
*
Expand Down Expand Up @@ -114,7 +98,14 @@ namespace illumina { namespace interop { namespace logic { namespace utils
std::swap(expected[0], expected[1]);
return expected;
}
if(norm == "1,2") return expected;
if(norm == "1,2")
{
return expected;
}
if(norm == "blue,green")
{
return expected;
}
INTEROP_THROW( model::invalid_channel_exception, "Invalid channel names: " << norm);
}

Expand Down
8 changes: 6 additions & 2 deletions interop/model/run/info.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ namespace illumina { namespace interop { namespace model { namespace run
*/
bool is_indexed() const
{
for (read_vector_t::const_iterator b = m_reads.begin(), e = m_reads.end(); b != e; ++b)
for (read_vector_t::const_iterator b = m_reads.begin(); b != m_reads.end(); ++b)
if (b->is_index()) return true;
return false;
}
Expand Down Expand Up @@ -377,14 +377,18 @@ namespace illumina { namespace interop { namespace model { namespace run
*
* @param filename xml file
*/
void write(const std::string &filename)const INTEROP_THROW_SPEC((xml::xml_file_not_found_exception,xml::bad_xml_format_exception));
void write(const std::string &filename)const INTEROP_THROW_SPEC((xml::xml_file_not_found_exception,
xml::bad_xml_format_exception));

/** String containing xml data
*
* @param out output stream
*/
void write(std::ostream& out)const INTEROP_THROW_SPEC((xml::bad_xml_format_exception));

private:
bool is_bool_attribute_valid(const char c) const;

private:
std::string m_name;
std::string m_date;
Expand Down
15 changes: 13 additions & 2 deletions interop/model/run/read_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,10 @@ namespace illumina { namespace interop { namespace model { namespace run
read_info(const number_t number = 0,
const cycle_t first_cycle = 0,
const cycle_t last_cycle = 0,
const bool is_index = false) : cycle_range(first_cycle, last_cycle), m_number(number),
m_is_index(is_index)
const bool is_index = false,
const bool is_reverse_complement = false) : cycle_range(first_cycle, last_cycle), m_number(number),
m_is_index(is_index),
m_is_reverse_complement(is_reverse_complement)
{
}

Expand Down Expand Up @@ -65,6 +67,14 @@ namespace illumina { namespace interop { namespace model { namespace run
return m_is_index;
}

/** Check if read is reverse complement
*
* @return true if read is reverse complement
*/
bool is_reverse_complement() const
{
return m_is_reverse_complement;
}
/** Get the total number of cycles in the read
*
* @return total number of cycles in read
Expand All @@ -89,6 +99,7 @@ namespace illumina { namespace interop { namespace model { namespace run
private:
size_t m_number;
bool m_is_index;
bool m_is_reverse_complement;

friend class info;

Expand Down
79 changes: 78 additions & 1 deletion interop/util/string.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,25 @@

#pragma once
#include <cctype>

#include <algorithm>
#include <istream>
namespace illumina { namespace interop { namespace util
{

namespace detail
{
/** Convert character to lower case
*
* @note workarond for MSVC warning C4244: '=': conversion from 'int' to 'char', possible loss of data
*
* @param ch character
* @return lowercase character
*/
inline char tolower(const char ch)
{
return static_cast<char>(::tolower(ch));
}
}
// replace, camel_to_space
//
/** Replace any first occurence of substring from with substring to
Expand Down Expand Up @@ -68,6 +83,68 @@ namespace illumina { namespace interop { namespace util
else ++i;
}
}
/** Returns true iff only ASCII characters are used
*
* @param input string to check
*/
inline bool contains_ASCII_only (const std::string& input) {
for (size_t s = 0; s < input.size(); s++) {
if (static_cast<unsigned char>(input[s]) > 127) {
return false;
}
}
return true;
}
inline std::string to_lower(const std::string &input) {
std::string result(input);
std::transform(input.begin(), input.end(), result.begin(), detail::tolower);
return result;
}

inline bool ends_with(const std::string &input, const std::string &ending, const bool is_case_insensitive=false) {
if (input.length() >= ending.length()) {
if (is_case_insensitive) {
std::string input_lower = illumina::interop::util::to_lower(input);
std::string ending_lower = illumina::interop::util::to_lower(ending);
return (input_lower.compare (input.length() - ending.length(), ending.length(), ending_lower) == 0);
}
return (input.compare (input.length() - ending.length(), ending.length(), ending) == 0);
} else {
return false;
}
}
struct StringCaseInsensitiveComparator {
bool operator()(const std::string &a, const std::string &b) const {
return illumina::interop::util::to_lower(a) < illumina::interop::util::to_lower(b);
}
};

//Function taken from https://gist.github.com/josephwb/df09e3a71679461fc104
inline std::istream& cross_platform_getline(std::istream &is, std::string& line) {
line.clear();
std::istream::sentry se(is, true);
std::streambuf* sb = is.rdbuf();
for (;;) {
int c = sb->sbumpc();
switch (c) {
case '\n':
return is;
case '\r':
if (sb->sgetc() == '\n') {
sb->sbumpc();
}
return is;
case EOF:
// Also handle the case when the last line has no line ending
if (line.empty()) {
is.setstate(std::ios::eofbit);
}
return is;
default:
line += (char)c;
}
}
}

}}}

Expand Down
4 changes: 2 additions & 2 deletions interop/util/xml_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <interop/external/rapidxml.hpp>
#include <interop/external/rapidxml_utils.hpp>
#include <interop/external/rapidxml_print.hpp>
#include <list>
#include "interop/util/assert.h"
#include "interop/util/xml_exceptions.h"
#include "interop/util/exception.h"
Expand All @@ -32,7 +33,6 @@ namespace illumina { namespace interop { namespace xml
xml_document()
{
rapidxml::xml_node<>* decl = m_doc.allocate_node(rapidxml::node_declaration);
m_backing.reserve(50);
decl->append_attribute(m_doc.allocate_attribute("version", "1.0"));
m_doc.append_node(decl);
}
Expand Down Expand Up @@ -99,7 +99,7 @@ namespace illumina { namespace interop { namespace xml

private:
rapidxml::xml_document<> m_doc;
std::vector<std::string> m_backing;
std::list<std::string> m_backing;
};
/** Check if the xml node matches the target, and, if so, set the value
*
Expand Down
28 changes: 24 additions & 4 deletions src/interop/model/run/info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ namespace illumina { namespace interop { namespace model { namespace run
doc.add_attribute(read, "Number", rit->number());
doc.add_attribute(read, "NumCycles", rit->total_cycles());
doc.add_attribute(read, "IsIndexedRead", rit->is_index()?"Y":"N");
if (m_version == 6) {
doc.add_attribute(read, "IsReverseComplement", rit->m_is_reverse_complement?"Y":"N");
}
}
rapidxml::xml_node<>* flowcell = doc.add_node(run, "FlowcellLayout");
doc.add_attribute(flowcell, "LaneCount", m_flowcell.lane_count());
Expand Down Expand Up @@ -231,8 +234,6 @@ namespace illumina { namespace interop { namespace model { namespace run
}
else if (set_data(p_node, "ImageChannels", "Name", m_channels))
{
for (size_t i = 0; i < m_channels.size(); ++i)
m_channels[i] = logic::utils::normalize(m_channels[i]);// TODO: remove this
continue;
}
else if (p_node->name() == std::string("ImageDimensions"))
Expand All @@ -253,6 +254,7 @@ namespace illumina { namespace interop { namespace model { namespace run
read_info rinfo;
size_t cycle_count = 0;
char is_indexed;
char reverse_complement;
for (xml_attr_ptr attr = p_read->first_attribute();
attr; attr = attr->next_attribute())
{
Expand All @@ -262,8 +264,17 @@ namespace illumina { namespace interop { namespace model { namespace run
rinfo.m_last_cycle = first_cycle + cycle_count;
rinfo.m_first_cycle = first_cycle + 1;
}
if (set_data(attr, "IsIndexedRead", is_indexed))
if (set_data(attr, "IsIndexedRead", is_indexed)) {
if (!is_bool_attribute_valid(is_indexed))
INTEROP_THROW(xml::bad_xml_format_exception, "IsIndexedRead attribute of Reads tag must be Y or N");
rinfo.m_is_index = std::toupper(is_indexed) == 'Y';

}
if (set_data(attr, "IsReverseComplement", reverse_complement)) {
if (!is_bool_attribute_valid(reverse_complement))
INTEROP_THROW(xml::bad_xml_format_exception, "IsReverseComplement attribute of Reads tag must be Y or N");
rinfo.m_is_reverse_complement = std::toupper(reverse_complement) == 'Y';
}
}
first_cycle += cycle_count;
m_reads.push_back(rinfo);
Expand Down Expand Up @@ -320,6 +331,11 @@ namespace illumina { namespace interop { namespace model { namespace run
INTEROP_RANGE_CHECK_GT(read, m_reads.size(), invalid_run_info_exception,
"Read number exceeds number of reads in RunInfo.xml for record "
<< lane << "_" << tile << " @ " << read << " in file " << metric_name);
for (size_t r = 0; r < m_reads.size(); r++) {
if (m_reads[r].is_reverse_complement() && !m_reads[r].is_index()) {
INTEROP_THROW(model::invalid_run_info_exception, "Non-index read cannot be reverse complement");
}
}
}
/** Test if tile list matches flowcell layout
*
Expand All @@ -330,7 +346,7 @@ namespace illumina { namespace interop { namespace model { namespace run
* @throws invalid_run_info_exception
*/
void info::validate_cycle(const ::uint32_t lane, const ::uint32_t tile, const size_t cycle, const std::string& metric_name)const
INTEROP_THROW_SPEC((model::invalid_run_info_exception,
INTEROP_THROW_SPEC((model::invalid_run_info_exception,
model::invalid_run_info_cycle_exception))
{
validate(lane, tile, metric_name);
Expand Down Expand Up @@ -444,6 +460,10 @@ namespace illumina { namespace interop { namespace model { namespace run
}
}
}
bool info::is_bool_attribute_valid(const char c) const {
std::string valid_chars = "YN";
return (valid_chars.find(std::toupper(c)) != std::string::npos);
}

}}}}

Expand Down
Loading

0 comments on commit 9a86237

Please sign in to comment.