diff --git a/opal.py b/opal.py index 4f0fd70..39ef92f 100755 --- a/opal.py +++ b/opal.py @@ -175,7 +175,7 @@ def evaluate(gs_samples_list, profiles_list_to_samples_list, labels, filter_tail gs_rank_to_taxid_to_percentage = gs_id_to_rank_to_taxid_to_percentage[sample_id] gs_pf_profile = gs_id_to_pf_profile[sample_id] else: - sys.stderr.write("Skipping assessment of {} for sample {}. Make sure the SampleID of the gold standard and the profile are identical.\n".format(label, sample_id)) + logging.getLogger('opal').warning("Skipping assessment of {} for sample {}. Make sure the SampleID of the gold standard and the profile are identical.\n".format(label, sample_id)) continue rank_to_taxid_to_percentage = load_data.get_rank_to_taxid_to_percentage(profile) @@ -303,7 +303,8 @@ def main(): group2.add_argument('-m', '--memory', help='Comma-separated memory usages in gigabytes', required=False) group2.add_argument('-d', '--desc', help='Description for HTML page', required=False) group2.add_argument('-r', '--ranks', help='Highest and lowest taxonomic ranks to consider in performance rankings, comma-separated. Valid ranks: superkingdom, phylum, class, order, family, genus, species, strain (default:superkingdom,species)', required=False) - group2.add_argument('--metrics_plot', help='Metrics for spider plot of relative performances, first character, comma-separated. Valid metrics: w:weighted Unifrac, l:L1 norm, c:completeness, p:purity, f:false positives, t:true positives (default: w,l,c,p,f)', required=False) + group2.add_argument('--metrics_plot_rel', help='Metrics for spider plot of relative performances, first character, comma-separated. Valid metrics: w:weighted Unifrac, l:L1 norm, c:completeness, p:purity, f:false positives, t:true positives (default: w,l,c,p,f)', required=False) + group2.add_argument('--metrics_plot_abs', help='Metrics for spider plot of absolute performances, first character, comma-separated. Valid metrics: c:completeness, p:purity, b:Bray-Curtis (default: c,p)', required=False) group2.add_argument('--silent', help='Silent mode', action='store_true') group2.add_argument('-v', '--version', action='version', version='%(prog)s ' + __version__) group2.add_argument('-h', '--help', action='help', help='Show this help message and exit') @@ -354,7 +355,7 @@ def main(): logger.info('done') logger.info('Creating more plots...') - plots_list += pl.plot_all(pd_metrics, labels, output_dir, args.metrics_plot) + plots_list += pl.plot_all(pd_metrics, labels, output_dir, args.metrics_plot_rel, args.metrics_plot_abs) logger.info('done') logger.info('Computing rankings...') diff --git a/src/html_opal.py b/src/html_opal.py index 317abf5..515719a 100644 --- a/src/html_opal.py +++ b/src/html_opal.py @@ -114,7 +114,7 @@ def get_rank_to_sample_pd(pd_metrics): return rank_to_sample_pd -def get_formatted_pd_rankings(pd_rankings): +def get_formatted_pd_rankings(pd_rankings, labels): df_list = [] df_list_unsorted_pos = [] metrics_list = [] @@ -126,7 +126,8 @@ def get_formatted_pd_rankings(pd_rankings): df_list_unsorted_pos.append(pd.DataFrame({metric: df2['tool'].tolist(), 'score' + metric: df2['position'].tolist()})) df_sum = pd_rankings.groupby(['tool'])['position'].sum().reset_index().sort_values('position') - df_sum_unsorted_pos = pd_rankings.groupby(['tool'])['position'].sum().reset_index() + df_sum_unsorted_pos = pd_rankings.groupby(['tool'])['position'].sum().loc[labels].reset_index() + df_list.append( pd.DataFrame({SUM_OF_SCORES: df_sum['tool'].tolist(), 'score' + SUM_OF_SCORES: df_sum['position'].tolist()})) df_list_unsorted_pos.append( @@ -137,8 +138,8 @@ def get_formatted_pd_rankings(pd_rankings): return pd_show, pd_show_unsorted_pos -def create_rankings_html(pd_rankings, ranks_scored): - pd_show, pd_show_unsorted_pos = get_formatted_pd_rankings(pd_rankings) +def create_rankings_html(pd_rankings, ranks_scored, labels): + pd_show, pd_show_unsorted_pos = get_formatted_pd_rankings(pd_rankings, labels) table_source = ColumnDataSource(pd_show) @@ -196,7 +197,7 @@ def create_rankings_html(pd_rankings, ranks_scored): weight_unifrac = Slider(start=0, end=10, value=1, step=.1, title=c.UNIFRAC + " weight", callback=callback) callback.args["weight_unifrac"] = weight_unifrac - p = figure(x_range=pd_show_unsorted_pos[SUM_OF_SCORES].tolist(), plot_width=1000, plot_height=400, title=SUM_OF_SCORES + " - lower is better") + p = figure(x_range=pd_show_unsorted_pos[SUM_OF_SCORES].tolist(), plot_width=800, plot_height=400, title=SUM_OF_SCORES + " - lower is better") p.vbar(x='x', top='top', source=source, width=0.5, bottom=0, color="firebrick") col_rankings = column([Div(text="Hint 1: click on the columns of scores for sorting.", style={"width": "600px", "margin-bottom": "0px"}), @@ -309,8 +310,8 @@ def create_metrics_table(pd_metrics, labels, sample_ids_list): alpha_diversity_metics = 'Alpha diversity' all_metrics_labels = [presence_metrics_label, estimates_metrics_label, alpha_diversity_metics] - styles = [{'selector': 'td', 'props': [('width', '100pt')]}, - {'selector': 'th', 'props': [('width', '100pt'), ('text-align', 'left')]}, + styles = [{'selector': 'td', 'props': [('width', '115pt')]}, + {'selector': 'th', 'props': [('width', '115pt'), ('text-align', 'left')]}, {'selector': 'th:nth-child(1)', 'props': [('width', '120pt'), ('font-weight', 'normal')]}, {'selector': '', 'props': [('width', 'max-content'), ('width', '-moz-max-content'), ('border-top', '1px solid lightgray'), ('border-spacing', '0px')]}, {'selector': 'expand-toggle:checked ~ * .data', 'props': [('background-color', 'white !important')]}] @@ -418,10 +419,10 @@ def create_alpha_diversity_tab(): def create_plots_html(plots_list): message_no_spdplot = 'Spider plots of performance require at least 3 profiles.' - text = '' if 'spider_plot' in plots_list else message_no_spdplot + text = '' if 'spider_plot_relative' in plots_list else message_no_spdplot plot1 = Panel(child=Div(text=text), title='Relative performance', width=780) - text = '' if 'spider_plot_recall_precision' in plots_list else message_no_spdplot + text = '' if 'spider_plot_absolute' in plots_list else message_no_spdplot plot2 = Panel(child=Div(text=text), title='Absolute performance') tabs_plots = Tabs(tabs=[plot1, plot2], width=780, css_classes=['bk-tabs-margin']) @@ -501,7 +502,7 @@ def create_computing_efficiency_tab(pd_metrics, plots_list, tabs_list): def create_html(pd_rankings, ranks_scored, pd_metrics, labels, sample_ids_list, plots_list, output_dir, desc_text): - col_rankings = create_rankings_html(pd_rankings, ranks_scored) + col_rankings = create_rankings_html(pd_rankings, ranks_scored, labels) create_heatmap_bar(output_dir) diff --git a/src/plots.py b/src/plots.py index 1ac7d40..f111272 100644 --- a/src/plots.py +++ b/src/plots.py @@ -356,7 +356,7 @@ def spider_plot(metrics, labels, rank_to_metric_to_toolvalues, output_dir, file_ return [] theta = spl.radar_factory(N, frame='polygon') fig, axes = plt.subplots(figsize=(9, 9), nrows=2, ncols=3, subplot_kw=dict(projection='radar')) - fig.subplots_adjust(wspace=1.0, hspace=0.0, top=0.87, bottom=0.45) + fig.subplots_adjust(wspace=.5, hspace=0.3, top=0.87, bottom=0.45) for ax, rank in zip(axes.flat, c.PHYLUM_SPECIES): if grid_points: @@ -366,12 +366,8 @@ def spider_plot(metrics, labels, rank_to_metric_to_toolvalues, output_dir, file_ ax.set_title(rank, weight='bold', size=9, position=(0.5, 1.1), horizontalalignment='center', verticalalignment='center') - if absolute: - metric_suffix = 'absolute' - else: - metric_suffix = '' # select only metrics in metrics list - metrics_subdict = OrderedDict((metric, rank_to_metric_to_toolvalues[rank][metric + metric_suffix]) for metric in metrics) + metrics_subdict = OrderedDict((metric, rank_to_metric_to_toolvalues[rank][metric]) for metric in metrics) it = 1 metric_to_toolindex = [] for d, color in zip(metrics_subdict.values(), colors): @@ -408,6 +404,9 @@ def spider_plot(metrics, labels, rank_to_metric_to_toolvalues, output_dir, file_ xticklabel.set_position((0,.20)) xticklabel.set_fontsize('x-small') + if absolute: + metrics = [metric[:-8] for metric in metrics] + ax = axes[0, 0] ax.legend(metrics, loc=(2.0 - 0.353 * len(metrics), 1.25), labelspacing=0.1, fontsize='small', ncol=len(metrics), frameon=False) fig.savefig(os.path.join(output_dir, file_name + '.pdf'), dpi=100, format='pdf', bbox_inches='tight') @@ -452,14 +451,25 @@ def plot_braycurtis_l1norm(braycurtis_list, l1norm_list, labels, output_dir): plt.close(fig) -def get_metrics_for_spider_plot(metrics_plot): - initial_to_metric = {'w':c.UNIFRAC, 'l':c.L1NORM, 'c':c.RECALL, 'p':c.PRECISION, 'f':c.FP, 't':c.TP} +def get_metrics_for_spider_plot(metrics_plot, absolute): metrics_initial = [x.strip() for x in metrics_plot.split(',')] metrics_list = [] + + if not absolute: + initial_to_metric = {'w':c.UNIFRAC, 'l':c.L1NORM, 'c':c.RECALL, 'p':c.PRECISION, 'f':c.FP, 't':c.TP} + for initial in metrics_initial: + if initial not in initial_to_metric: + logging.getLogger('opal').warning('Invalid metric initial {} provided with option --metrics_plot_rel. Defaults will be used.'.format(initial)) + return [c.UNIFRAC, c.L1NORM, c.RECALL, c.PRECISION, c.FP] + else: + metrics_list.append(initial_to_metric[initial]) + return metrics_list + + initial_to_metric = {'c':c.RECALL+'absolute', 'p':c.PRECISION+'absolute', 'b':c.BRAY_CURTIS+'absolute'} for initial in metrics_initial: if initial not in initial_to_metric: - logging.getLogger('opal').warning('Invalid metric initial {} provided with option --metrics_plot. Defaults will be used.'.format(initial)) - return [c.UNIFRAC, c.L1NORM, c.RECALL, c.PRECISION, c.FP] + logging.getLogger('opal').warning('Invalid metric initial {} provided with option --metrics_plot_abs. Defaults will be used.'.format(initial)) + return [c.RECALL+'absolute', c.PRECISION+'absolute'] else: metrics_list.append(initial_to_metric[initial]) return metrics_list @@ -576,8 +586,7 @@ def spider_plot_preprocess_metrics(pd_mean, labels): return tool_to_rank_to_metric_to_value -def plot_all(pd_metrics, labels, output_dir, metrics_plot): - metrics = [c.UNIFRAC, c.L1NORM, c.RECALL, c.PRECISION, c.FP] +def plot_all(pd_metrics, labels, output_dir, metrics_plot_rel, metrics_plot_abs): rank_to_metric_to_toolvalues = defaultdict(lambda : defaultdict(list)) pd_copy = pd_metrics.copy() @@ -593,7 +602,9 @@ def plot_all(pd_metrics, labels, output_dir, metrics_plot): tool_to_rank_to_metric_to_value = spider_plot_preprocess_metrics(pd_mean, labels) - metrics_for_plot = get_metrics_for_spider_plot(metrics_plot) if metrics_plot else metrics + metrics_for_plot_rel = get_metrics_for_spider_plot(metrics_plot_rel, absolute=False) if metrics_plot_rel else [c.UNIFRAC, c.L1NORM, c.RECALL, c.PRECISION, c.FP] + metrics_for_plot_abs = get_metrics_for_spider_plot(metrics_plot_abs, absolute=True) if metrics_plot_abs else [c.RECALL+'absolute', c.PRECISION+'absolute'] + present_labels = [] for label in labels: if label not in tool_to_rank_to_metric_to_value: @@ -601,25 +612,27 @@ def plot_all(pd_metrics, labels, output_dir, metrics_plot): else: present_labels.append(label) for rank in c.PHYLUM_SPECIES: - for metric in metrics_for_plot + [c.RECALL+'absolute', c.PRECISION+'absolute']: + for metric in metrics_for_plot_rel + metrics_for_plot_abs: if metric in tool_to_rank_to_metric_to_value[label][rank]: rank_to_metric_to_toolvalues[rank][metric].append(tool_to_rank_to_metric_to_value[label][rank][metric]) rank_to_metric_to_toolvalues[rank][c.UNIFRAC].append(tool_to_rank_to_metric_to_value[label]['rank independent'][c.UNIFRAC]) colors = [plt.cm.tab10(2), plt.cm.tab10(0), plt.cm.tab10(3), 'k', 'm', 'y'] - plots_list = spider_plot(metrics_for_plot, + colors2 = ['r', 'k', 'olive'] + + plots_list = spider_plot(metrics_for_plot_rel, present_labels, rank_to_metric_to_toolvalues, output_dir, - 'spider_plot', - colors[:len(metrics_for_plot)]) + 'spider_plot_relative', + colors[:len(metrics_for_plot_rel)]) - plots_list += spider_plot([c.RECALL, c.PRECISION], + plots_list += spider_plot(metrics_for_plot_abs, present_labels, rank_to_metric_to_toolvalues, output_dir, - 'spider_plot_recall_precision', - ['r', 'k'], + 'spider_plot_absolute', + colors2[:len(metrics_for_plot_abs)], grid_points=[0.2, 0.4, 0.6, 0.8, 1.0], fill=True, absolute=True)