Skip to content

Commit

Permalink
Add missing summary metrics (#302)
Browse files Browse the repository at this point in the history
* Add missing read-level and run-level cluster count metrics

* Fix cluster count logic

* Update version

* Fix bug and update tests

* Fix warning

* Remove comment
  • Loading branch information
ezralanglois authored Oct 25, 2022
1 parent 09a5b40 commit 3178e08
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 24 deletions.
6 changes: 6 additions & 0 deletions docs/src/changes.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changes {#changes}

## v1.2.0

| Date | Description |
|------------|------------------------------------------------------------|
| 2022-10-21 | Add missing read-level and run-level cluster count metrics |


## v1.1.28

Expand Down
11 changes: 11 additions & 0 deletions interop/logic/summary/summary_statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,17 @@ namespace illumina { namespace interop { namespace logic { namespace summary
static const float eps = 1e-9f;
return (div < eps) ? 0 : num / div;
}
/** Safe divide
*
* @param num numerator
* @param div divisor
* @return result of division
*/
inline double divide(const double num, const double div)
{
static const double eps = 1e-9;
return (div < eps) ? 0 : num / div;
}

namespace op
{
Expand Down
38 changes: 30 additions & 8 deletions interop/logic/summary/tile_summary.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,12 @@ namespace illumina { namespace interop { namespace logic { namespace summary
util::op::const_member_function_less(&model::metrics::tile_metric::percent_pf),
skip_median);
stat_summary.percent_pf(stat);
stat_summary.reads(std::accumulate(tile_data.begin(),
stat_summary.reads(nan_accumulate(tile_data.begin(),
tile_data.end(),
uint64_t(0),
util::op::const_member_function(
&model::metrics::tile_metric::cluster_count)));
stat_summary.reads_pf(std::accumulate(tile_data.begin(),
stat_summary.reads_pf(nan_accumulate(tile_data.begin(),
tile_data.end(),
uint64_t(0),
util::op::const_member_function(
Expand Down Expand Up @@ -254,6 +254,10 @@ namespace illumina { namespace interop { namespace logic { namespace summary
size_t total = 0;
float percent_aligned_nonindex = 0;
size_t total_nonindex = 0;
double cluster_count_raw = 0;
double cluster_count_pf = 0;
uint64_t total_reads_raw = 0;
uint64_t total_reads_pf = 0;
for (size_t read = 0; read < run.size(); ++read)
{
INTEROP_ASSERT(read < run.size());
Expand All @@ -262,6 +266,11 @@ namespace illumina { namespace interop { namespace logic { namespace summary
for (size_t lane = 0; lane < run[read].size(); ++lane)
{
INTEROP_ASSERT(lane < run[0].size());
if(read == 0)
{
cluster_count_pf += run[read][lane].reads_pf();
cluster_count_raw += run[read][lane].reads();
}
const size_t non_nan = update_read_summary(read_data_by_lane_read(read, lane),
run[read][lane],
skip_median);
Expand All @@ -277,6 +286,12 @@ namespace illumina { namespace interop { namespace logic { namespace summary
skip_median);
}
}
run[read].summary().reads(static_cast<uint64_t>(cluster_count_raw));
run[read].summary().reads_pf(static_cast<uint64_t>(cluster_count_pf));
run[read].summary().cluster_count(cluster_count_raw);
run[read].summary().cluster_count_pf(cluster_count_pf);
total_reads_raw = static_cast<uint64_t>(cluster_count_raw);
total_reads_pf = static_cast<uint64_t>(cluster_count_pf);
run[read].summary().percent_aligned(divide(percent_aligned_by_read, float(total_by_read)));
percent_aligned += percent_aligned_by_read;
total += total_by_read;
Expand All @@ -288,7 +303,14 @@ namespace illumina { namespace interop { namespace logic { namespace summary
}
run.nonindex_summary().percent_aligned(divide(percent_aligned_nonindex, static_cast<float>(total_nonindex)));
run.total_summary().percent_aligned(divide(percent_aligned, static_cast<float>(total)));

run.nonindex_summary().reads(total_reads_raw);
run.total_summary().reads(total_reads_raw);
run.nonindex_summary().reads_pf(total_reads_pf);
run.total_summary().reads_pf(total_reads_pf);
run.nonindex_summary().cluster_count(cluster_count_raw);
run.total_summary().cluster_count(cluster_count_raw);
run.nonindex_summary().cluster_count_pf(cluster_count_pf);
run.total_summary().cluster_count_pf(cluster_count_pf);
}

/** Summarize a collection extended tile metrics
Expand Down Expand Up @@ -332,8 +354,8 @@ namespace illumina { namespace interop { namespace logic { namespace summary


model::summary::metric_stat count_stat;
float total_cluster_occupied = 0;
float total_cluster_count = 0;
double total_cluster_occupied = 0;
double total_cluster_count = 0;
const bool skip_median=false;
for (size_t lane = 0; lane < run[0].size(); ++lane)
{
Expand Down Expand Up @@ -375,10 +397,10 @@ namespace illumina { namespace interop { namespace logic { namespace summary
}
for (size_t read = 0; read < run.size(); ++read)
{
run[read].summary().percent_occupied(divide(total_cluster_occupied, total_cluster_count)*100);
run[read].summary().percent_occupied(static_cast<float>(divide(total_cluster_occupied, total_cluster_count))*100);
}
run.nonindex_summary().percent_occupied(divide(total_cluster_occupied, total_cluster_count)*100);
run.total_summary().percent_occupied(divide(total_cluster_occupied, total_cluster_count)*100);
run.nonindex_summary().percent_occupied(static_cast<float>(divide(total_cluster_occupied, total_cluster_count))*100);
run.total_summary().percent_occupied(static_cast<float>(divide(total_cluster_occupied, total_cluster_count))*100);
}

}}}}
Expand Down
79 changes: 78 additions & 1 deletion interop/model/summary/metric_summary.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,11 @@ namespace illumina { namespace interop { namespace model { namespace summary {
m_yield_g(std::numeric_limits<float>::quiet_NaN()),
m_projected_yield_g(0),
m_percent_occupied(std::numeric_limits<float>::quiet_NaN()),
m_percent_occupancy_proxy(std::numeric_limits<float>::quiet_NaN())
m_percent_occupancy_proxy(std::numeric_limits<float>::quiet_NaN()),
m_reads_raw(0),
m_reads_pf(0),
m_cluster_count_raw(std::numeric_limits<float>::quiet_NaN()),
m_cluster_count_pf(std::numeric_limits<float>::quiet_NaN())

{}
public:
Expand Down Expand Up @@ -114,6 +118,39 @@ namespace illumina { namespace interop { namespace model { namespace summary {
{
return m_percent_occupancy_proxy;
}
/** Get the cluster count
*
* @return cluster count
*/
double cluster_count()const
{
return m_cluster_count_raw;
}

/** Get the cluster count PF
*
* @return cluster count PF
*/
double cluster_count_pf()const
{
return m_cluster_count_pf;
}
/** Get the reads PF value for run
*
* @return reads PF val for run
*/
uint64_t reads()const
{
return m_reads_raw;
}
/** Get the reads PF value for run
*
* @return reads PF val for run
*/
uint64_t reads_pf()const
{
return m_reads_pf;
}
/** @} */
/** Set the first cycle intensity
*
Expand Down Expand Up @@ -189,6 +226,42 @@ namespace illumina { namespace interop { namespace model { namespace summary {
{
}

/** Set the cluster count of the run
*
* @param val cluster count of the run.
*/
void cluster_count(const double val)
{
m_cluster_count_raw = val;
}

/** Set the cluster count PF of the run
*
* @param val cluster count PF of the run.
*/
void cluster_count_pf(const double val)
{
m_cluster_count_pf = val;
}

/** Set the total reads for the run
*
* @param val total reads for run
*/
void reads(const uint64_t val)
{
m_reads_raw = val;
}

/** Set the reads PF for the run
*
* @param val reads PF for run
*/
void reads_pf(const uint64_t val)
{
m_reads_pf = val;
}

private:
float m_error_rate;
float m_percent_aligned;
Expand All @@ -198,6 +271,10 @@ namespace illumina { namespace interop { namespace model { namespace summary {
float m_projected_yield_g;
float m_percent_occupied;
float m_percent_occupancy_proxy;
uint64_t m_reads_raw;
uint64_t m_reads_pf;
double m_cluster_count_raw;
double m_cluster_count_pf;
template<class MetricType, int Version>
friend struct io::generic_layout;
};
Expand Down
30 changes: 15 additions & 15 deletions src/ext/python/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
>>> from interop import summary
>>> summary(run_metrics_example)
array([(0.36666667, 6.6666665, 0.)],
dtype=[('Error Rate', '<f4'), ('First Cycle Intensity', '<f4'), ('Projected Yield G', '<f4')])
array([(0.36666667, 6.6666665, 0., 0., 0.)],
dtype=[('Error Rate', '<f4'), ('First Cycle Intensity', '<f4'), ('Projected Yield G', '<f4'), ('Reads', '<f4'), ('Reads Pf', '<f4')])
>>> from interop import indexing
>>> indexing(run_metrics_with_indexing)
Expand Down Expand Up @@ -233,21 +233,21 @@ def summary(run_metrics, level='Total', columns=None, dtype='f4', ignore_missing
>>> summary(run_metrics_example)
array([(0.36666667, 6.6666665, 0.)],
dtype=[('Error Rate', '<f4'), ('First Cycle Intensity', '<f4'), ('Projected Yield G', '<f4')])
array([(0.36666667, 6.6666665, 0., 0., 0.)],
dtype=[('Error Rate', '<f4'), ('First Cycle Intensity', '<f4'), ('Projected Yield G', '<f4'), ('Reads', '<f4'), ('Reads Pf', '<f4')])
>>> summary(run_metrics_example, 'Total')
array([(0.36666667, 6.6666665, 0.)],
dtype=[('Error Rate', '<f4'), ('First Cycle Intensity', '<f4'), ('Projected Yield G', '<f4')])
array([(0.36666667, 6.6666665, 0., 0., 0.)],
dtype=[('Error Rate', '<f4'), ('First Cycle Intensity', '<f4'), ('Projected Yield G', '<f4'), ('Reads', '<f4'), ('Reads Pf', '<f4')])
>>> summary(run_metrics_example, 'NonIndex')
array([(0.2, 10., 0.)],
dtype=[('Error Rate', '<f4'), ('First Cycle Intensity', '<f4'), ('Projected Yield G', '<f4')])
array([(0.2, 10., 0., 0., 0.)],
dtype=[('Error Rate', '<f4'), ('First Cycle Intensity', '<f4'), ('Projected Yield G', '<f4'), ('Reads', '<f4'), ('Reads Pf', '<f4')])
>>> summary(run_metrics_example, 'Read')
array([(1, 78, 0.2, 10., 0.), (2, 89, 0.4, 5., 0.),
(3, 89, 0.5, 5., 0.)],
dtype=[('ReadNumber', '<u2'), ('IsIndex', 'u1'), ('Error Rate', '<f4'), ('First Cycle Intensity', '<f4'), ('Projected Yield G', '<f4')])
array([(1, 78, 0.2, 10., 0., 0., 0.), (2, 89, 0.4, 5., 0., 0., 0.),
(3, 89, 0.5, 5., 0., 0., 0.)],
dtype=[('ReadNumber', '<u2'), ('IsIndex', 'u1'), ('Error Rate', '<f4'), ('First Cycle Intensity', '<f4'), ('Projected Yield G', '<f4'), ('Reads', '<f4'), ('Reads Pf', '<f4')])
>>> summary(run_metrics_example, 'Lane')
array([(1, 78, 1, 0.2, 10., 0., 0., 0., 1.),
Expand Down Expand Up @@ -443,13 +443,13 @@ def summary_columns(level='Total', ret_dict=False):
The default columns are for the Run/Read level
>>> summary_columns()
('Error Rate', 'First Cycle Intensity', '% Aligned', '% >= Q30', '% Occupancy Proxy', '% Occupied', 'Projected Yield G', 'Yield G')
('Cluster Count', 'Cluster Count Pf', 'Error Rate', 'First Cycle Intensity', '% Aligned', '% >= Q30', '% Occupancy Proxy', '% Occupied', 'Projected Yield G', 'Reads', 'Reads Pf', 'Yield G')
>>> summary_columns(level='Total')
('Error Rate', 'First Cycle Intensity', '% Aligned', '% >= Q30', '% Occupancy Proxy', '% Occupied', 'Projected Yield G', 'Yield G')
('Cluster Count', 'Cluster Count Pf', 'Error Rate', 'First Cycle Intensity', '% Aligned', '% >= Q30', '% Occupancy Proxy', '% Occupied', 'Projected Yield G', 'Reads', 'Reads Pf', 'Yield G')
>>> summary_columns(level='NonIndex')
('Error Rate', 'First Cycle Intensity', '% Aligned', '% >= Q30', '% Occupancy Proxy', '% Occupied', 'Projected Yield G', 'Yield G')
('Cluster Count', 'Cluster Count Pf', 'Error Rate', 'First Cycle Intensity', '% Aligned', '% >= Q30', '% Occupancy Proxy', '% Occupied', 'Projected Yield G', 'Reads', 'Reads Pf', 'Yield G')
>>> summary_columns(level='Read')
('Error Rate', 'First Cycle Intensity', '% Aligned', '% >= Q30', '% Occupancy Proxy', '% Occupied', 'Projected Yield G', 'Yield G')
('Cluster Count', 'Cluster Count Pf', 'Error Rate', 'First Cycle Intensity', '% Aligned', '% >= Q30', '% Occupancy Proxy', '% Occupied', 'Projected Yield G', 'Reads', 'Reads Pf', 'Yield G')
The lane/surface level give another set of columns for the summary table
>>> summary_columns(level='Lane')
Expand Down

0 comments on commit 3178e08

Please sign in to comment.