From 59b870da8d4c9ad21f96fd0dfeb2e4daf0affd72 Mon Sep 17 00:00:00 2001
From: "C. Titus Brown" <titus@idyll.org>
Date: Mon, 20 May 2024 06:40:17 -0700
Subject: [PATCH] MRG: clean up code a bit; do cleanrun on CI (#27)

* change CI to clean run

* cleanup and commenting

* clean up code a bit
---
 .github/workflows/build-test.yml  |  2 +-
 src/sourmash_plugin_betterplot.py | 33 ++++++++++++++++++++++---------
 2 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
index 086b1cd..2bf39a5 100644
--- a/.github/workflows/build-test.yml
+++ b/.github/workflows/build-test.yml
@@ -47,4 +47,4 @@ jobs:
 
     - name: build examples
       shell: bash -l {0}
-      run: make examples
+      run: make cleanrun
diff --git a/src/sourmash_plugin_betterplot.py b/src/sourmash_plugin_betterplot.py
index f4da939..623f903 100644
--- a/src/sourmash_plugin_betterplot.py
+++ b/src/sourmash_plugin_betterplot.py
@@ -25,10 +25,10 @@
 from sourmash.plugins import CommandLinePlugin
 
 
-###
-
+### utility functions
 
 def load_labelinfo_csv(filename):
+    "Load file output by 'sourmash compare --labels-to'"
     with sourmash_args.FileInputCSV(filename) as r:
         labelinfo = list(r)
 
@@ -37,12 +37,15 @@ def load_labelinfo_csv(filename):
 
 
 def load_categories_csv(filename, labelinfo):
+    "Load categories file, integrate with labelinfo => colors"
     with sourmash_args.FileInputCSV(filename) as r:
         categories = list(r)
 
     category_map = {}
     colors = None
     if categories:
+        # first, figure out which column is matching between labelinfo
+        # and categories file.
         assert labelinfo
         keys = set(categories[0].keys())
         keys -= {"category"}
@@ -54,19 +57,27 @@ def load_categories_csv(filename, labelinfo):
                 key = k
                 break
 
+        # found one? awesome. load in all the categories & assign colors.
+
         if key:
-            category_values = list(set([row["category"] for row in categories]))
-            category_values.sort()
+            # get distinct categories
+            category_values = set([row["category"] for row in categories])
+            category_values = list(sorted(category_values))
 
+            # map to colormap colors
             cat_colors = list(map(plt.cm.tab10, range(len(category_values))))
+
+            # build map of category => color
             category_map = {}
             for v, color in zip(category_values, cat_colors):
                 category_map[v] = color
 
+            # build map of key => color
             category_map2 = {}
             for row in categories:
                 category_map2[row[key]] = category_map[row["category"]]
 
+            # build list of colors
             colors = []
             for row in labelinfo:
                 value = row[key]
@@ -82,7 +93,7 @@ def load_categories_csv(filename, labelinfo):
 
 
 def load_categories_csv_for_labels(filename, queries):
-    "Load a categories CSV that must use label name."
+    "Load a categories CSV that uses the 'label' column."
     with sourmash_args.FileInputCSV(filename) as r:
         categories = list(r)
 
@@ -91,20 +102,24 @@ def load_categories_csv_for_labels(filename, queries):
     if categories:
         key = "label"
 
+        # load distinct categories
         category_values = list(set([row["category"] for row in categories]))
         category_values.sort()
 
+        # map categories to color
         cat_colors = list(map(plt.cm.tab10, range(len(category_values))))
         category_map = {}
         for v, color in zip(category_values, cat_colors):
             category_map[v] = color
 
+        # map label to color
         category_map2 = {}
         for row in categories:
             label = row[key]
             cat = row["category"]
             category_map2[label] = category_map[cat]
 
+        # build list of colors
         colors = []
         for label, idx in queries:
             color = category_map2[label]
@@ -116,10 +131,9 @@ def load_categories_csv_for_labels(filename, queries):
 
 
 #
-# CLI plugin - supports 'sourmash scripts plot2'
+# CLI plugin code
 #
 
-
 class Command_Plot2(CommandLinePlugin):
     command = "plot2"  # 'scripts <command>'
     description = (
@@ -247,11 +261,12 @@ def plot_composite_matrix(
         no_labels=not show_labels,
         get_leaves=True,
     )
-    # ax1.set_xticks([])
 
+    # draw cut point
     if cut_point is not None:
         ax1.axvline(x=cut_point, c="red", linestyle="dashed")
 
+    # draw matrix
     xstart = 0.45
     width = 0.45
     if not show_labels:
@@ -538,7 +553,7 @@ def main(self, args):
         plt.savefig(args.output_figure)
 
 
-# @CTB unused again...
+# @CTB unused code for sparse matrix foo. Revisit!
 def create_sparse_dissimilarity_matrix(tuples, num_objects):
     # Initialize matrix in LIL format for efficient setup
     similarity_matrix = lil_matrix((num_objects, num_objects))