From 48756c0b0df9d45cf279e86ce7bff5581a4afa8a Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Fri, 19 Jul 2024 07:31:24 -0400 Subject: [PATCH 1/3] add --no-x-labels and --no-y-labels to clustermap --- src/sourmash_plugin_betterplot.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/sourmash_plugin_betterplot.py b/src/sourmash_plugin_betterplot.py index d7676c2..c128eb1 100644 --- a/src/sourmash_plugin_betterplot.py +++ b/src/sourmash_plugin_betterplot.py @@ -846,6 +846,14 @@ def __init__(self, subparser): "--no-labels", action="store_true", help="disable X & Y axis labels" ) + subparser.add_argument( + "--no-x-labels", action="store_true", + help="disable X axis labels" + ) + subparser.add_argument( + "--no-y-labels", action="store_true", + help="disable Y axis labels" + ) def main(self, args): super().main(args) @@ -901,12 +909,15 @@ def main(self, args): if args.boolean: # turn off colorbar if boolean. kw_args['cbar_pos'] = None + yticklabels=sample_d_to_idents(query_d_items) + xticklabels=sample_d_to_idents(against_d_items) if args.no_labels: - xticklabels=[] - yticklabels=[] - else: - yticklabels=sample_d_to_idents(query_d_items) - xticklabels=sample_d_to_idents(against_d_items) + xticklabels = [] + yticklabels = [] + elif args.no_x_labels: + xticklabels = [] + elif args.no_y_labels: + yticklabels = [] # turn into dissimilarity matrix # plot! From 4a954e615839d937e8cb50a0d27ad88730189e74 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 20 Jul 2024 16:17:50 -0400 Subject: [PATCH 2/3] add more --- pyproject.toml | 3 +- src/sourmash_plugin_betterplot.py | 57 +++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 63bb08c..326d69a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ version = "0.4.2" dependencies = ["sourmash>=4.8.8,<5", "sourmash_utils>=0.2", "matplotlib", "numpy", "scipy", "scikit-learn", - "seaborn", "upsetplot", "matplotlib_venn"] + "seaborn", "upsetplot", "matplotlib_venn", "pandas"] [metadata] license = { text = "BSD 3-Clause License" } @@ -24,3 +24,4 @@ cluster_to_categories_command = "sourmash_plugin_betterplot:Command_ClusterToCat tsne_command = "sourmash_plugin_betterplot:Command_TSNE" tsne2_command = "sourmash_plugin_betterplot:Command_TSNE2" venn = "sourmash_plugin_betterplot:Command_Venn" +presence_filter = "sourmash_plugin_betterplot:Command_PresenceFilter" diff --git a/src/sourmash_plugin_betterplot.py b/src/sourmash_plugin_betterplot.py index c128eb1..c38ccd9 100644 --- a/src/sourmash_plugin_betterplot.py +++ b/src/sourmash_plugin_betterplot.py @@ -23,6 +23,7 @@ from matplotlib.lines import Line2D import seaborn as sns import upsetplot +import pandas as pd import sourmash from sourmash import sourmash_args @@ -1482,3 +1483,59 @@ def main(self, args): if args.output: notify(f"saving to '{args.output}'") pylab.savefig(args.output) + + +class Command_PresenceFilter(CommandLinePlugin): + command = 'presence_filter' + description = """\ +Provide a filtered view of 'gather' output, plotting detection or ANI +against average abund for significant matches. +""" + + usage = """ + sourmash scripts presence_filter gather.csv -o presence.png +""" + epilog = epilog + formatter_class = argparse.RawTextHelpFormatter + + def __init__(self, subparser): + super().__init__(subparser) + # add argparse arguments here. + subparser.add_argument('gather_csv') + subparser.add_argument('-o', '--output', default=None, + help="save image to this file", + required=True) + subparser.add_argument('-N', '--min-num-hashes', + default=3, help='threshold (default: 3)') + subparser.add_argument('--detection', action="store_true", + default=True) + subparser.add_argument('--ani', dest='detection', + action="store_false") + + def main(self, args): + df = pd.read_csv(args.gather_csv) + notify(f"loaded {len(df)} rows from '{args.gather_csv}'") + + scaled = set(df['scaled']) + assert len(scaled) == 1 + scaled = list(scaled)[0] + + threshold = args.min_num_hashes * scaled + df = df[df['unique_intersect_bp'] >= threshold] + notify(f"filtered down to {len(df)} rows with unique_intersect_bp >= {threshold}") + + if args.detection: + plt.plot(df.f_match_orig, df.average_abund, '.') + else: + plt.plot(df.match_containment_ani, df.average_abund, '.') + ax = plt.gca() + ax.set_ylabel('number of copies') + ax.set_yscale('log') + + if args.detection: + ax.set_xlabel('fraction of genome detected') + else: + ax.set_xlabel('cANI of match') + + notify(f"saving figure to '{args.output}'") + plt.savefig(args.output) From 3c90008344a5aed79d5abb4e8368bd8c7a1b3211 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 20 Jul 2024 16:19:16 -0400 Subject: [PATCH 3/3] bump to v0.4.3 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 326d69a..01039e2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ name = "sourmash_plugin_betterplot" description = "sourmash plugin for improved plotting/viz and cluster examination." readme = "README.md" requires-python = ">=3.10" -version = "0.4.2" +version = "0.4.3" dependencies = ["sourmash>=4.8.8,<5", "sourmash_utils>=0.2", "matplotlib", "numpy", "scipy", "scikit-learn",