From 59b870da8d4c9ad21f96fd0dfeb2e4daf0affd72 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Mon, 20 May 2024 06:40:17 -0700 Subject: [PATCH] MRG: clean up code a bit; do cleanrun on CI (#27) * change CI to clean run * cleanup and commenting * clean up code a bit --- .github/workflows/build-test.yml | 2 +- src/sourmash_plugin_betterplot.py | 33 ++++++++++++++++++++++--------- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index 086b1cd..2bf39a5 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -47,4 +47,4 @@ jobs: - name: build examples shell: bash -l {0} - run: make examples + run: make cleanrun diff --git a/src/sourmash_plugin_betterplot.py b/src/sourmash_plugin_betterplot.py index f4da939..623f903 100644 --- a/src/sourmash_plugin_betterplot.py +++ b/src/sourmash_plugin_betterplot.py @@ -25,10 +25,10 @@ from sourmash.plugins import CommandLinePlugin -### - +### utility functions def load_labelinfo_csv(filename): + "Load file output by 'sourmash compare --labels-to'" with sourmash_args.FileInputCSV(filename) as r: labelinfo = list(r) @@ -37,12 +37,15 @@ def load_labelinfo_csv(filename): def load_categories_csv(filename, labelinfo): + "Load categories file, integrate with labelinfo => colors" with sourmash_args.FileInputCSV(filename) as r: categories = list(r) category_map = {} colors = None if categories: + # first, figure out which column is matching between labelinfo + # and categories file. assert labelinfo keys = set(categories[0].keys()) keys -= {"category"} @@ -54,19 +57,27 @@ def load_categories_csv(filename, labelinfo): key = k break + # found one? awesome. load in all the categories & assign colors. + if key: - category_values = list(set([row["category"] for row in categories])) - category_values.sort() + # get distinct categories + category_values = set([row["category"] for row in categories]) + category_values = list(sorted(category_values)) + # map to colormap colors cat_colors = list(map(plt.cm.tab10, range(len(category_values)))) + + # build map of category => color category_map = {} for v, color in zip(category_values, cat_colors): category_map[v] = color + # build map of key => color category_map2 = {} for row in categories: category_map2[row[key]] = category_map[row["category"]] + # build list of colors colors = [] for row in labelinfo: value = row[key] @@ -82,7 +93,7 @@ def load_categories_csv(filename, labelinfo): def load_categories_csv_for_labels(filename, queries): - "Load a categories CSV that must use label name." + "Load a categories CSV that uses the 'label' column." with sourmash_args.FileInputCSV(filename) as r: categories = list(r) @@ -91,20 +102,24 @@ def load_categories_csv_for_labels(filename, queries): if categories: key = "label" + # load distinct categories category_values = list(set([row["category"] for row in categories])) category_values.sort() + # map categories to color cat_colors = list(map(plt.cm.tab10, range(len(category_values)))) category_map = {} for v, color in zip(category_values, cat_colors): category_map[v] = color + # map label to color category_map2 = {} for row in categories: label = row[key] cat = row["category"] category_map2[label] = category_map[cat] + # build list of colors colors = [] for label, idx in queries: color = category_map2[label] @@ -116,10 +131,9 @@ def load_categories_csv_for_labels(filename, queries): # -# CLI plugin - supports 'sourmash scripts plot2' +# CLI plugin code # - class Command_Plot2(CommandLinePlugin): command = "plot2" # 'scripts ' description = ( @@ -247,11 +261,12 @@ def plot_composite_matrix( no_labels=not show_labels, get_leaves=True, ) - # ax1.set_xticks([]) + # draw cut point if cut_point is not None: ax1.axvline(x=cut_point, c="red", linestyle="dashed") + # draw matrix xstart = 0.45 width = 0.45 if not show_labels: @@ -538,7 +553,7 @@ def main(self, args): plt.savefig(args.output_figure) -# @CTB unused again... +# @CTB unused code for sparse matrix foo. Revisit! def create_sparse_dissimilarity_matrix(tuples, num_objects): # Initialize matrix in LIL format for efficient setup similarity_matrix = lil_matrix((num_objects, num_objects))