API/BUG: removed default for with_replacement

- removed the default value for the with_replacement parameter, now requiring users to specify whether to run with or without replacement. i don't think there is a good justification at this time for the use of one approach over the other, so this is intended to reflect that. - renamed `with_replacement` to `replacement` throughout, to avoid the weird parameter name `--p-no-with-replacement` (now it's `--p-no-replacement`, which is slightly better) - found and fixed a bug that where the alpha suite Pipelines were always bootstrapping (i.e., not respecting the `--p-with-replacement` parameter setting)
caporaso-lab · Jul 26, 2024 · bc723cc · bc723cc
1 parent 6b71f74
commit bc723cc
Show file tree

Hide file tree

Showing 7 changed files with 31 additions and 33 deletions.
diff --git a/q2_boots/_normalize.py b/q2_boots/_normalize.py
@@ -20,8 +20,7 @@ def _bootstrap_iteration(table: biom.Table, sampling_depth: int) -> biom.Table:
     return table
 
 
-def resample(ctx, table, sampling_depth, n=1, with_replacement=True,
-             random_seed=None):
+def resample(ctx, table, sampling_depth, n, replacement, random_seed=None):
 
     if random_seed is not None:
         random.seed(random_seed)
@@ -32,6 +31,6 @@ def resample(ctx, table, sampling_depth, n=1, with_replacement=True,
 
     for i in range(n):
         tables.append(_iteration(table=table, sampling_depth=sampling_depth,
-                                 with_replacement=with_replacement)[0])
+                                 with_replacement=replacement)[0])
 
     return tables
diff --git a/q2_boots/alpha.py b/q2_boots/alpha.py
@@ -12,7 +12,7 @@
 
 
 def alpha_collection(ctx, table, sampling_depth, metric, phylogeny=None, n=1000,
-                     random_seed=None, with_replacement=False):
+                     random_seed=None, replacement=False):
 
     if phylogeny is None and (metric in METRICS['PHYLO']['IMPL'] or
                               metric in METRICS['PHYLO']['UNIMPL']):
@@ -28,7 +28,7 @@ def alpha_collection(ctx, table, sampling_depth, metric, phylogeny=None, n=1000,
     _alpha_phylogenetic = ctx.get_action("diversity", "alpha_phylogenetic")
 
     tables, = _bootstrap(table=table, sampling_depth=sampling_depth, n=n,
-                         random_seed=random_seed)
+                         random_seed=random_seed, replacement=replacement)
     diversified_tables = []
 
     for table in tables.values():
@@ -45,13 +45,13 @@ def alpha_collection(ctx, table, sampling_depth, metric, phylogeny=None, n=1000,
 
 
 def alpha(ctx, table, sampling_depth, metric, phylogeny=None,
-          n=1, average_method='median', random_seed=None, with_replacement=False):
+          n=1, average_method='median', random_seed=None, replacement=False):
 
     _alpha_bootstrap = ctx.get_action("boots", "alpha_collection")
     _alpha_average = ctx.get_action('boots', 'alpha_average')
     sample_data, = _alpha_bootstrap(table=table, sampling_depth=sampling_depth,
                                     phylogeny=phylogeny, metric=metric, n=n,
-                                    random_seed=random_seed)
+                                    random_seed=random_seed, replacement=replacement)
 
     result, = _alpha_average(sample_data, average_method)
 

diff --git a/q2_boots/beta.py b/q2_boots/beta.py
@@ -17,7 +17,7 @@
 
 def beta_collection(ctx, table, metric, sampling_depth, phylogeny=None,
                     bypass_tips=False, n_threads=1, n=1000, random_seed=None,
-                    with_replacement=True, pseudocount=1, alpha=None,
+                    replacement=True, pseudocount=1, alpha=None,
                     variance_adjusted=False):
 
     if phylogeny is None and (metric in METRICS['PHYLO']['IMPL'] or
@@ -34,7 +34,7 @@ def beta_collection(ctx, table, metric, sampling_depth, phylogeny=None,
     _beta_phylogenetic = ctx.get_action('diversity', 'beta_phylogenetic')
 
     tables, = _resample(table=table, sampling_depth=sampling_depth, n=n,
-                        random_seed=random_seed, with_replacement=with_replacement)
+                        random_seed=random_seed, replacement=replacement)
 
     dms = []
 
@@ -59,7 +59,7 @@ def beta_collection(ctx, table, metric, sampling_depth, phylogeny=None,
 
 def beta(ctx, table, metric, sampling_depth, representative, phylogeny=None,
          bypass_tips=False, n_threads=1, n=1000, random_seed=None,
-         with_replacement=True, pseudocount=1, alpha=None, variance_adjusted=False):
+         replacement=True, pseudocount=1, alpha=None, variance_adjusted=False):
 
     _beta = ctx.get_action('boots', 'beta_collection')
     _beta_avg = ctx.get_action('boots', 'beta_average')
@@ -70,7 +70,7 @@ def beta(ctx, table, metric, sampling_depth, representative, phylogeny=None,
                       sampling_depth=sampling_depth,
                       n=n,
                       pseudocount=pseudocount,
-                      with_replacement=with_replacement,
+                      replacement=replacement,
                       n_threads=n_threads,
                       variance_adjusted=variance_adjusted,
                       alpha=alpha,

diff --git a/q2_boots/core_metrics.py b/q2_boots/core_metrics.py
@@ -6,10 +6,9 @@
 # The full license is in the file LICENSE, distributed with this software.
 # ----------------------------------------------------------------------------
 
-def core_metrics(ctx, table, sampling_depth, metadata,
+def core_metrics(ctx, table, sampling_depth, metadata, replacement,
                  n_jobs=1, phylogeny=None, n=100, alpha_method='median',
-                 beta_method='non-metric-median', with_replacement=True,
-                 random_seed=None):
+                 beta_method='non-metric-median', random_seed=None):
 
     bootstrap = ctx.get_action('boots', 'resample')
     observed_features = ctx.get_action("diversity_lib", "observed_features")
@@ -27,7 +26,7 @@ def core_metrics(ctx, table, sampling_depth, metadata,
 
     bootstrapped_tables, = bootstrap(table=table,
                                      sampling_depth=sampling_depth,
-                                     n=n, with_replacement=with_replacement,
+                                     n=n, replacement=replacement,
                                      random_seed=random_seed)
 
     tables = bootstrapped_tables.values()

diff --git a/q2_boots/plugin_setup.py b/q2_boots/plugin_setup.py
@@ -67,7 +67,7 @@
     inputs={'table': FeatureTable[Frequency]},
     parameters={'sampling_depth': Int % Range(1, None),
                 'n': Int % Range(1, None),
-                'with_replacement': Bool,
+                'replacement': Bool,
                 'random_seed': Int},
     outputs={'subsampled_tables': Collection[FeatureTable[Frequency]]},
     input_descriptions={'table': 'The table to be subsampled'},
@@ -77,7 +77,7 @@
                            'is less than the sampling depth will be not be '
                            'included in the resulting table.'),
         'n': 'The number of times to subsample the input table.',
-        'with_replacement': '',
+        'replacement': '',
         'random_seed': random_seed_description
     },
     output_descriptions={
@@ -100,7 +100,7 @@
                                         alpha_metrics['PHYLO']['IMPL'] |
                                         alpha_metrics['PHYLO']['UNIMPL']),
                 'n': Int % Range(1, None),
-                'with_replacement': Bool,
+                'replacement': Bool,
                 'random_seed': Int},
     outputs={'sample_data': Collection[SampleData[AlphaDiversity]]},
     input_descriptions={'table': 'The table to be diversified',
@@ -133,7 +133,7 @@
                                         alpha_metrics['PHYLO']['UNIMPL']),
                 'n': Int % Range(1, None),
                 'average_method': Str % Choices(['median' , 'mean' , 'mode']),
-                'with_replacement': Bool,
+                'replacement': Bool,
                 'random_seed': Int},
     outputs={'sample_data': SampleData[AlphaDiversity]},
     input_descriptions={'table': 'The table to be diversified',
@@ -169,7 +169,7 @@
                 'sampling_depth': Int % Range(1, None),
                 'random_seed': Int,
                 'bypass_tips': Bool,
-                'with_replacement': Bool,
+                'replacement': Bool,
                 'variance_adjusted': Bool,
                 'representative': Str % Choices(['non-metric-mean',
                                                  'non-metric-median',
@@ -226,13 +226,13 @@
                                         beta_metrics['PHYLO']['IMPL'] |
                                         beta_metrics['PHYLO']['UNIMPL']),
                 'pseudocount': Int % Range(1, None),
-                'with_replacement': Bool,
+                'replacement': Bool,
                 'n_threads': Int % Range(1, None) | Str % Choices(['auto']),
                 'n': Int % Range(1, None),
                 'sampling_depth': Int % Range(1, None),
                 'random_seed': Int,
                 'bypass_tips': Bool,
-                'with_replacement': Bool,
+                'replacement': Bool,
                 'variance_adjusted': Bool,
                 'alpha': Float % Range(0, 1, inclusive_end=True)},
     outputs={
@@ -295,7 +295,7 @@
         'beta_method': Str % Choices('non-metric-mean',
                                      'non-metric-median',
                                      'medoid'),
-        'with_replacement': Bool,
+        'replacement': Bool,
         'random_seed': Int
     },
     outputs=[

diff --git a/q2_boots/tests/test_alpha.py b/q2_boots/tests/test_alpha.py
@@ -55,7 +55,7 @@ def test_basic(self):
         output = self.alpha(table=t, sampling_depth=1,
                             metric='shannon',
                             random_seed=12,
-                            n=10)
+                            n=10, replacement=True)
 
         self.assertEqual(len(output), 1)
 
@@ -72,13 +72,13 @@ def test_range_non_phylo(self):
         output, = self.alpha(table=t, sampling_depth=1,
                              metric='shannon',
                              random_seed=12,
-                             n=10)
+                             n=10, replacement=True)
         output: pd.Series = Artifact.view(output, pd.Series)
 
         collection, = self.alpha_collection(
             table=t, sampling_depth=1,
             random_seed=12,
-            metric='shannon', n=10
+            metric='shannon', n=10, replacement=True
         )
 
         self.assertTrue(self.range_check(output, collection.values()))
@@ -99,14 +99,14 @@ def test_range_phylo(self):
                              metric='pielou_e',
                              phylogeny=phylogeny,
                              random_seed=12,
-                             n=10)
+                             n=10, replacement=True)
         output: pd.Series = Artifact.view(output, pd.Series)
 
         collection, = self.alpha_collection(
             table=t, sampling_depth=1,
             phylogeny=phylogeny,
             random_seed=12,
-            metric='pielou_e', n=10
+            metric='pielou_e', n=10, replacement=True
         )
 
         self.assertTrue(self.range_check(output, collection.values()))
@@ -120,7 +120,7 @@ def test_phylo_metric_no_phylo(self):
         with self.assertRaisesRegex(ValueError, 'You must use a non-phylogenic metric'):
             self.alpha(table=t, sampling_depth=1,
                        metric='faith_pd',
-                       n=10)
+                       n=10, replacement=True)
 
     def test_non_phylo_metric_with_phylo(self):
         with StringIO('(O1:0.3, O2:0.2, O3:0.1, O4:0.2)root;') as f:
@@ -139,7 +139,7 @@ def test_non_phylo_metric_with_phylo(self):
                    metric='shannon',
                    random_seed=12,
                    n=10,
-                   phylogeny=phylogeny)
+                   phylogeny=phylogeny, replacement=True)
         self.assertTrue(True)
 
 
@@ -163,7 +163,7 @@ def test_basic(self):
         t = Artifact.import_data('FeatureTable[Frequency]', t)
         output = self.alpha_collection(table=t, sampling_depth=1,
                                        metric='shannon',
-                                       n=10)
+                                       n=10, replacement=True)
 
         self.assertEqual(len(output[0]), 10)
         index = ['S1', 'S2', 'S3']

diff --git a/q2_boots/tests/test_core_metrics.py b/q2_boots/tests/test_core_metrics.py
@@ -39,7 +39,7 @@ def test_basic(self):
                                    sampling_depth=500,
                                    metadata=metadata,
                                    n_jobs=1,
-                                   with_replacement=True,
+                                   replacement=True,
                                    n=10
                                    )
         self.assertEqual(len(output[0]), 10)
@@ -72,7 +72,7 @@ def test_phylogeny(self):
                                    n_jobs=1,
                                    n=10,
                                    phylogeny=phylogeny,
-                                   with_replacement=True,
+                                   replacement=True,
                                    )
         self.assertEqual(len(output[0]), 10)