Skip to content

Commit

Permalink
MRG: support red, green, blue on ANI plot; fix upset stuff; bump vers…
Browse files Browse the repository at this point in the history
…ion (#47)

* support red, green, blue on ANI plot; fix upset stuff; bump version

* typo
  • Loading branch information
ctb authored Jul 22, 2024
1 parent a3a97aa commit b1bb6b0
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 4 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name = "sourmash_plugin_betterplot"
description = "sourmash plugin for improved plotting/viz and cluster examination."
readme = "README.md"
requires-python = ">=3.10"
version = "0.4.3"
version = "0.4.4"

dependencies = ["sourmash>=4.8.8,<5", "sourmash_utils>=0.2",
"matplotlib", "numpy", "scipy", "scikit-learn",
Expand Down
48 changes: 45 additions & 3 deletions src/sourmash_plugin_betterplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import argparse
import os
import csv
from collections import defaultdict
from collections import defaultdict, Counter
from itertools import chain, combinations
import pickle

Expand Down Expand Up @@ -1018,6 +1018,15 @@ def powerset(iterable, *, start=2):

notify(f"Loaded {len(siglist)} signatures & downsampled to scaled={scaled}")

names_check = [ ss.name for ss in siglist ]
if len(set(names_check)) != len(names_check):
notify("ERROR: duplicate names or sketches; please fix!!")
cnt = Counter(names_check)
for k, v in cnt.most_common():
if v > 1:
print(f"\t* {k} shows up {v} times")
sys.exit(-1)

# @CTB: check scaled, ksize, etc.

if not siglist:
Expand All @@ -1041,6 +1050,7 @@ def powerset(iterable, *, start=2):
truncate_name = lambda x: x[:truncate_at-3] + '...' if len(x) >= truncate_at else x
get_name = lambda x: [ truncate_name(ss.name) for ss in x ]
names = [ get_name(combo) for combo in pset ]

notify(f"powerset of distinct combinations: {len(pset)}")

# CTB: maybe turn the intersection code below into a class?
Expand Down Expand Up @@ -1511,6 +1521,12 @@ def __init__(self, subparser):
default=True)
subparser.add_argument('--ani', dest='detection',
action="store_false")
subparser.add_argument('--green-color',
help="color genomes with matching names green")
subparser.add_argument('--red-color',
help="color genomes with matching names red")
subparser.add_argument('--blue-color',
help="color genomes with matching names blue")

def main(self, args):
df = pd.read_csv(args.gather_csv)
Expand All @@ -1525,9 +1541,35 @@ def main(self, args):
notify(f"filtered down to {len(df)} rows with unique_intersect_bp >= {threshold}")

if args.detection:
plt.plot(df.f_match_orig, df.average_abund, '.')
plt.plot(df.f_match_orig, df.average_abund, 'k.')
else:
plt.plot(df.match_containment_ani, df.average_abund, '.')
plt.plot(df.match_containment_ani, df.average_abund, 'k.')

dfs = []
colors = []
if args.green_color:
df2 = df[df['match_name'].str.contains(args.green_color)]
notify(f"{len(df2)} matches to {args.green_color} => green circles")
dfs.append(df2)
colors.append('go')
if args.red_color:
df2 = df[df['match_name'].str.contains(args.red_color)]
notify(f"{len(df2)} matches to {args.red_color} => red crosses")

dfs.append(df2)
colors.append('r+')
if args.blue_color:
df2 = df[df['match_name'].str.contains(args.blue_color)]
notify(f"{len(df2)} matches to {args.blue_color} => blue triangles")
dfs.append(df2)
colors.append('bv')

for (df2, color) in zip(dfs, colors):
if args.detection:
plt.plot(df2.f_match_orig, df2.average_abund, color)
else:
plt.plot(df2.match_containment_ani, df2.average_abund, color)

ax = plt.gca()
ax.set_ylabel('number of copies')
ax.set_yscale('log')
Expand Down

0 comments on commit b1bb6b0

Please sign in to comment.