diff --git a/doc/command-line.md b/doc/command-line.md index edfbeed863..15f9591c44 100644 --- a/doc/command-line.md +++ b/doc/command-line.md @@ -1966,3 +1966,19 @@ situations where you have a **very large** collection of signatures in the collection (as you would have to, with a zipfile). This can be useful if you want to refer to different subsets of the collection without making multiple copies in a zip file. + +### Using sourmash plugins + +As of sourmash v4.7.0, sourmash has an experimental plugins interface! +The plugin interface supports extending sourmash to load and save +signatures in new ways, and also supports the addition of sourmash +subcommands via `sourmash scripts`. + +In order to use a plugin with sourmash, you will need to use `pip` +or `conda` to install the plugin the same environment that sourmash +is installed in. + +In the future, we will include a list of available sourmash plugins in +the documentation, and also provide a way to list available plugins. + +You can list all installed plugins with `sourmash info -v`. diff --git a/doc/dev_plugins.md b/doc/dev_plugins.md index f8eddfee63..136e882308 100644 --- a/doc/dev_plugins.md +++ b/doc/dev_plugins.md @@ -2,7 +2,8 @@ As of version 4.7.0, sourmash has experimental support for Python plugins to load and save signatures in different ways (e.g. file -formats, RPC servers, databases, etc.). This support is provided via +formats, RPC servers, databases, etc.) and to run additional commands +via the command-line. This support is provided via the "entry points" mechanism supplied by [`importlib.metadata`](https://docs.python.org/3/library/importlib.metadata.html) and documented @@ -24,23 +25,38 @@ a_reader = "module_name:load_sketches" [project.entry-points."sourmash.save_to"] a_writer = "module_name:SaveSignatures_WriteFile" + +[project.entry-points."sourmash.cli_script"] +new_cli = "module_name:Command_NewCommand" ``` Here, `module_name` should be the name of the module to import. -`load_sketches` should be a function that takes a location along with + +* `load_sketches` should be a function that takes a location along with arbitrary keyword arguments and returns an `Index` object (e.g. `LinearIndex` for a collection of in-memory -signatures). `SaveSignatures_WriteFile` should be a class that +signatures). +* `SaveSignatures_WriteFile` should be a class that subclasses `BaseSave_SignaturesToLocation` and implements its own mechanisms of saving signatures. See the `sourmash.save_load` module for saving and loading code already used in sourmash. - -Note that if the function or class has a `priority` attribute, this will -be used to determine the order in which the plugins are called. - -The `name` attribute of the plugin (`a_reader` and `a_writer` in +* `Command_NewCommand` should be a class that subclasses + `plugins.CommandLinePlugin` and provides an `__init__` and + `main` method. + +Note that if the reader function or writer class has a `priority` +attribute, this will be used to determine the order in which the +plugins are called. Priorities lower than 10 will get called before +any internal load or save function, while priorities greater than 80 +will get called after almost all internal load/save functions; see +`src/sourmash/save_load.py` for details and the current priorities. + +The `name` attribute of the plugin (`a_reader`, `a_writer`, and `new_cli` in `pyproject.toml`, above) is only used in debugging. +You can provide zero or more plugins, and you can define just a reader, or +just a writer, or just a CLI plugin. + ## Templates and examples If you want to create your own plug-in, you can start with the @@ -53,15 +69,19 @@ Some (early stage) plugins are also available as examples: ## Debugging plugins +`sourmash info -v` will list all installed plugins. + `sourmash sig cat -o ` is a simple way to invoke a `save_to` plugin. Use `-d` to turn on debugging output. `sourmash sig describe ` is a simple way to invoke a `load_from` plugin. Use `-d` to turn on debugging output. +`sourmash scripts` will list available command-line plugins. + ## Semantic versioning and listing sourmash as a dependency -Plugins should probably list sourmash as a dependency for installation. +Plugins should generally list sourmash as a dependency for installation. Once plugins are officially supported by sourmash, the plugin API will be under [semantic versioning constraints](https://semver.org/). That diff --git a/src/sourmash/__main__.py b/src/sourmash/__main__.py index ef6b8665c4..59e59eaa22 100644 --- a/src/sourmash/__main__.py +++ b/src/sourmash/__main__.py @@ -1,7 +1,12 @@ -import sourmash +""" +The main entry point for sourmash, defined in pyproject.toml. + +Can also be executed as 'python -m sourmash'. +""" def main(arglist=None): + import sourmash args = sourmash.cli.get_parser().parse_args(arglist) if hasattr(args, 'subcmd'): mod = getattr(sourmash.cli, args.cmd) @@ -10,7 +15,9 @@ def main(arglist=None): else: mod = getattr(sourmash.cli, args.cmd) mainmethod = getattr(mod, 'main') - return mainmethod(args) + + retval = mainmethod(args) + raise SystemExit(retval) if __name__ == '__main__': diff --git a/src/sourmash/cli/__init__.py b/src/sourmash/cli/__init__.py index f57fae2dc5..297693db45 100644 --- a/src/sourmash/cli/__init__.py +++ b/src/sourmash/cli/__init__.py @@ -38,6 +38,7 @@ from . import sketch from . import storage from . import tax +from . import scripts class SourmashParser(ArgumentParser): @@ -98,20 +99,25 @@ def get_parser(): 'sketch': 'Create signatures', 'sig': 'Manipulate signature files', 'storage': 'Operations on storage', + 'scripts': "Plug-ins", } alias = { - "sig": "signature" + "sig": "signature", + "ext": "scripts", } expert = set(['categorize', 'import_csv', 'migrate', 'multigather', 'sbt_combine', 'watch']) clidir = os.path.dirname(__file__) basic_ops = utils.command_list(clidir) - user_ops = [op for op in basic_ops if op not in expert] + + # provide a list of the basic operations - not expert, not submodules. + user_ops = [op for op in basic_ops if op not in expert and op not in module_descs] usage = ' Basic operations\n' for op in user_ops: docstring = getattr(sys.modules[__name__], op).__doc__ helpstring = 'sourmash {op:s} --help'.format(op=op) usage += ' {hs:25s} {ds:s}\n'.format(hs=helpstring, ds=docstring) + # next, all the subcommand ones - dive into subdirectories. cmd_group_dirs = next(os.walk(clidir))[1] cmd_group_dirs = filter(utils.opfilter, cmd_group_dirs) cmd_group_dirs = sorted(cmd_group_dirs) diff --git a/src/sourmash/cli/info.py b/src/sourmash/cli/info.py index 26211ecb84..b607112b7c 100644 --- a/src/sourmash/cli/info.py +++ b/src/sourmash/cli/info.py @@ -4,6 +4,7 @@ import screed import sourmash from sourmash.logging import notify +from sourmash.plugins import list_all_plugins def subparser(subparsers): subparser = subparsers.add_parser('info') @@ -26,6 +27,8 @@ def info(verbose=False): notify(f'screed version {screed.__version__}') notify(f'- loaded from path: {os.path.dirname(screed.__file__)}') + list_all_plugins() + def main(args): info(verbose=args.verbose) diff --git a/src/sourmash/cli/scripts/__init__.py b/src/sourmash/cli/scripts/__init__.py new file mode 100644 index 0000000000..7062ff6c71 --- /dev/null +++ b/src/sourmash/cli/scripts/__init__.py @@ -0,0 +1,48 @@ +"""Provide a mechanism to add CLI plugins to sourmash. + +See https://sourmash.readthedocs.io/en/latest/dev_plugins.html for docs, +src/sourmash/plugins.py for core sourmash implementation code, and +https://github.com/sourmash-bio/sourmash_plugin_template for a template repo +for making new plugins. +""" + +# CTB TODO: +# * provide suggestions for documentation & metadata for authors: +# * provide guidance on how to test your CLI plugin at the CLI +# (minimal testing regime: sourmash scripts, look for description etc.) + +import argparse +import sourmash + +# Here, we decorate this module with the various extension objects +# e.g. 'sourmash scripts foo' will look up attribute 'scripts.foo' +# and we will return the extension class object, which will then +# be run by sourmash.__main__. This dictionary is loaded below +# by sourmash.plugins.add_cli_scripts. +_extension_dict = {} + +def __getattr__(name): + if name in _extension_dict: + return _extension_dict[name] + raise AttributeError(name) + +def subparser(subparsers): + subparser = subparsers.add_parser('scripts', + usage=argparse.SUPPRESS, + formatter_class=argparse.RawDescriptionHelpFormatter, + aliases=['ext']) + + # get individual help strings: + descrs = list(sourmash.plugins.get_cli_scripts_descriptions()) + if descrs: + description = "\n".join(descrs) + else: + description = "(No script plugins detected!)" + + s = subparser.add_subparsers(title="available plugin/extension commands", + dest='subcmd', + metavar='subcmd', + help=argparse.SUPPRESS, + description=description) + + _extension_dict.update(sourmash.plugins.add_cli_scripts(s)) diff --git a/src/sourmash/plugins.py b/src/sourmash/plugins.py index 2a786d6d24..ecbe691fd2 100644 --- a/src/sourmash/plugins.py +++ b/src/sourmash/plugins.py @@ -4,7 +4,7 @@ Plugin entry point names: * 'sourmash.load_from' - Index class loading. * 'sourmash.save_to' - Signature saving. -* 'sourmash.picklist_filters' - extended Picklist functionality. +* 'sourmash.cli_script' - command-line extension. CTB TODO: @@ -15,7 +15,9 @@ DEFAULT_LOAD_FROM_PRIORITY = 99 DEFAULT_SAVE_TO_PRIORITY = 99 -from .logging import debug_literal +import itertools + +from .logging import (debug_literal, error, notify, set_quiet) # cover for older versions of Python that don't support selection on load # (the 'group=' below). @@ -31,6 +33,11 @@ # load 'save_to' entry points as well. _plugin_save_to = entry_points(group='sourmash.save_to') +# aaaaand CLI entry points: +_plugin_cli = entry_points(group='sourmash.cli_script') +_plugin_cli_once = False + +### def get_load_from_functions(): "Load the 'load_from' plugins and yield tuples (priority, name, fn)." @@ -38,7 +45,11 @@ def get_load_from_functions(): # Load each plugin, for plugin in _plugin_load_from: - loader_fn = plugin.load() + try: + loader_fn = plugin.load() + except (ModuleNotFoundError, AttributeError) as e: + debug_literal(f"plugins.load_from_functions: got error loading {plugin.name}: {str(e)}") + continue # get 'priority' if it is available priority = getattr(loader_fn, 'priority', DEFAULT_LOAD_FROM_PRIORITY) @@ -55,7 +66,11 @@ def get_save_to_functions(): # Load each plugin, for plugin in _plugin_save_to: - save_cls = plugin.load() + try: + save_cls = plugin.load() + except (ModuleNotFoundError, AttributeError) as e: + debug_literal(f"plugins.load_from_functions: got error loading {plugin.name}: {str(e)}") + continue # get 'priority' if it is available priority = getattr(save_cls, 'priority', DEFAULT_SAVE_TO_PRIORITY) @@ -64,3 +79,104 @@ def get_save_to_functions(): name = plugin.name debug_literal(f"plugins.save_to_functions: got '{name}', priority={priority}") yield priority, save_cls + + +class CommandLinePlugin: + """ + Provide some minimal common CLI functionality - -q and -d. + + Subclasses should call super().__init__(parser) and super().main(args). + """ + command = None + description = None + + def __init__(self, parser): + parser.add_argument( + '-q', '--quiet', action='store_true', + help='suppress non-error output' + ) + parser.add_argument( + '-d', '--debug', action='store_true', + help='provide debugging output' + ) + + def main(self, args): + set_quiet(args.quiet, args.debug) + + +def get_cli_script_plugins(): + global _plugin_cli_once + + x = [] + for plugin in _plugin_cli: + name = plugin.name + mod = plugin.module + try: + script_cls = plugin.load() + except (ModuleNotFoundError, AttributeError): + if _plugin_cli_once is False: + error(f"ERROR: cannot find or load module for cli_script plugin '{name}'") + continue + + command = getattr(script_cls, 'command', None) + if command is None: + # print error message only once... + if _plugin_cli_once is False: + error(f"ERROR: no command provided by cli_script plugin '{name}' from {mod}; skipping") + else: + x.append(plugin) + + _plugin_cli_once = True + return x + + +def get_cli_scripts_descriptions(): + "Build the descriptions for command-line plugins." + for plugin in get_cli_script_plugins(): + name = plugin.name + script_cls = plugin.load() + + command = getattr(script_cls, 'command') + description = getattr(script_cls, 'description', + f"(no description provided by plugin '{name}')") + yield f"sourmash scripts {command:16s} - {description}" + + +def add_cli_scripts(parser): + "Configure parsing for command-line plugins." + d = {} + + for plugin in get_cli_script_plugins(): + name = plugin.name + script_cls = plugin.load() + + subparser = parser.add_parser(script_cls.command) + debug_literal(f"cls_script plugin '{name}' adding command '{script_cls.command}'") + obj = script_cls(subparser) + d[script_cls.command] = obj + + return d + + +def list_all_plugins(): + plugins = itertools.chain(_plugin_load_from, + _plugin_save_to, + _plugin_cli) + plugins = list(plugins) + + if not plugins: + notify("\n(no plugins detected)\n") + + notify("") + notify("the following plugins are installed:") + notify("") + notify(f"{'plugin type':<20s} {'from python module':<30s} {'v':<5s} {'entry point name':<20s}") + notify(f"{'-'*20} {'-'*30} {'-'*5} {'-'*20}") + + for plugin in plugins: + name = plugin.name + mod = plugin.module + version = plugin.dist.version + group = plugin.group + + notify(f"{group:<20s} {mod:<30s} {version:<5s} {name:<20s}") diff --git a/tests/test_plugin_framework.py b/tests/test_plugin_framework.py index 76b22bb730..06156e4d85 100644 --- a/tests/test_plugin_framework.py +++ b/tests/test_plugin_framework.py @@ -1,12 +1,12 @@ """ Test the plugin framework in sourmash.plugins, which uses importlib.metadata entrypoints. - -CTB TODO: -* check name? """ +import sys import pytest +import collections + import sourmash from sourmash.logging import set_quiet @@ -17,16 +17,25 @@ SaveSignaturesToLocation) +_Dist = collections.namedtuple('_Dist', ['version']) class FakeEntryPoint: """ A class that stores a name and an object to be returned on 'load()'. Mocks the EntryPoint class used by importlib.metadata. """ - def __init__(self, name, load_obj): + module = 'test_plugin_framework' + dist = _Dist('0.1') + group = 'groupfoo' + + def __init__(self, name, load_obj, *, + error_on_import=None): self.name = name self.load_obj = load_obj + self.error_on_import = error_on_import def load(self): + if self.error_on_import is not None: + raise self.error_on_import("as requested") return self.load_obj # @@ -50,7 +59,8 @@ def get_some_sigs(self, location, *args, **kwargs): def setup_method(self): self.saved_plugins = plugins._plugin_load_from - plugins._plugin_load_from = [FakeEntryPoint('test_load', self.get_some_sigs)] + plugins._plugin_load_from = [FakeEntryPoint('test_load', self.get_some_sigs), + FakeEntryPoint('test_load', self.get_some_sigs, error_on_import=ModuleNotFoundError)] def teardown_method(self): plugins._plugin_load_from = self.saved_plugins @@ -174,7 +184,8 @@ class Test_EntryPointBasics_SaveTo: # test the basics def setup_method(self): self.saved_plugins = plugins._plugin_save_to - plugins._plugin_save_to = [FakeEntryPoint('test_save', FakeSaveClass)] + plugins._plugin_save_to = [FakeEntryPoint('test_save', FakeSaveClass), + FakeEntryPoint('test_save', FakeSaveClass, error_on_import=ModuleNotFoundError)] def teardown_method(self): plugins._plugin_save_to = self.saved_plugins @@ -259,3 +270,272 @@ def test_save_2(self, runtmp): assert isinstance(x, FakeSaveClass_HighPriority) assert x.keep == [sig2, sig47, sig63] assert x.priority == 1 + + +# +# Test basic features of the save_to plugin hook. +# + +class FakeCommandClass(plugins.CommandLinePlugin): + """ + A fake CLI class. + """ + command = 'nifty' + description = "do somethin' nifty" + + def __init__(self, parser): + super().__init__(parser) + parser.add_argument('arg1') + parser.add_argument('--other', action='store_true') + parser.add_argument('--do-fail', action='store_true') + + def main(self, args): + super().main(args) + print(f"hello, world! argument is: {args.arg1}") + print(f"other is {args.other}") + + if args.do_fail: + return 1 + return 0 + + +class Test_EntryPointBasics_Command: + # test the basics + def setup_method(self): + _ = plugins.get_cli_script_plugins() + self.saved_plugins = plugins._plugin_cli + plugins._plugin_cli_once = False + plugins._plugin_cli = [FakeEntryPoint('test_command', + FakeCommandClass)] + + def teardown_method(self): + plugins._plugin_cli = self.saved_plugins + + def test_empty(self, runtmp): + # empty out script plugins... + plugins._plugin_cli = [] + + with pytest.raises(utils.SourmashCommandFailed): + runtmp.sourmash('scripts') + out = runtmp.last_result.out + err = runtmp.last_result.err + print(out) + print(err) + assert '(No script plugins detected!)' in out + + def test_cmd_0(self, runtmp): + # test default output with some plugins + with pytest.raises(utils.SourmashCommandFailed): + runtmp.sourmash('scripts') + + out = runtmp.last_result.out + err = runtmp.last_result.err + print(out) + print(err) + assert "do somethin' nifty" in out + assert "sourmash scripts nifty" in out + + def test_cmd_1(self): + # test descriptions + ps = list(plugins.get_cli_scripts_descriptions()) + print(ps) + assert len(ps) == 1 + + descr0 = ps[0] + assert "do somethin' nifty" in descr0 + assert "sourmash scripts nifty" in descr0 + + def test_cmd_2(self): + # test get_cli_script_plugins function + ps = list(plugins.get_cli_script_plugins()) + print(ps) + assert len(ps) == 1 + + def test_cmd_3(self, runtmp): + # test ability to run 'nifty' ;) + with pytest.raises(utils.SourmashCommandFailed): + runtmp.sourmash('scripts', 'nifty') + + out = runtmp.last_result.out + err = runtmp.last_result.err + print(out) + print(err) + + assert 'nifty: error: the following arguments are required: arg1' in err + assert 'usage: nifty [-h] [-q] [-d] [--other] [--do-fail] arg1' in err + + def test_cmd_4(self, runtmp): + # test basic argument parsing etc + runtmp.sourmash('scripts', 'nifty', '--other', 'some arg') + + out = runtmp.last_result.out + err = runtmp.last_result.err + print(out) + print(err) + + assert 'other is True' in out + assert 'hello, world! argument is: some arg' in out + + def test_cmd_5(self, runtmp): + # test exit code passthru + with pytest.raises(utils.SourmashCommandFailed): + runtmp.sourmash('scripts', 'nifty', '--do-fail', 'some arg') + + status = runtmp.last_result.status + out = runtmp.last_result.out + err = runtmp.last_result.err + print(out) + print(err) + print(status) + + assert 'other is False' in out + assert 'hello, world! argument is: some arg' in out + + +class FakeCommandClass_Second(plugins.CommandLinePlugin): + """ + A fake CLI class. + """ + command = 'more_nifty' + description = "do somethin' else nifty" + + def __init__(self, parser): + super().__init__(parser) + parser.add_argument('arg1') + parser.add_argument('--other', action='store_true') + parser.add_argument('--do-fail', action='store_true') + + def main(self, args): + super().main(args) + print(f"hello, world! argument is: {args.arg1}") + print(f"other is {args.other}") + + if args.do_fail: + return 1 + return 0 + + +class FakeCommandClass_Broken_1: + """ + A fake CLI class. + """ + # command = 'more_nifty' # no command + + def __init__(self, parser): + assert 0 + + def main(self, args): + assert 0 + + +class FakeCommandClass_Broken_2: + """ + A fake CLI class. + """ + command = 'broken' + # no description + + def __init__(self, parser): + pass + + def main(self, args): + return 0 + + +class Test_EntryPointBasics_TwoCommands: + # test a second command + def setup_method(self): + _ = plugins.get_cli_script_plugins() + self.saved_plugins = plugins._plugin_cli + plugins._plugin_cli_once = False + plugins._plugin_cli = [FakeEntryPoint('test_command', + FakeCommandClass), + FakeEntryPoint('test_command2', + FakeCommandClass_Second), + FakeEntryPoint('test_command3', + FakeCommandClass_Broken_1), + FakeEntryPoint('test_command4', + FakeCommandClass_Broken_2), + FakeEntryPoint('error-on-import', + FakeCommandClass, + error_on_import=ModuleNotFoundError) + ] + + def teardown_method(self): + plugins._plugin_cli = self.saved_plugins + + def test_cmd_0(self, runtmp): + # test default output for a few plugins + with pytest.raises(utils.SourmashCommandFailed): + runtmp.sourmash('scripts') + + out = runtmp.last_result.out + err = runtmp.last_result.err + print(out) + print(err) + assert "do somethin' nifty" in out + assert "sourmash scripts nifty" in out + + assert "do somethin' else nifty" in out + assert "sourmash scripts more_nifty" in out + + def test_cmd_1(self, runtmp): + # test 'nifty' + runtmp.sourmash('scripts', 'nifty', 'some arg') + + status = runtmp.last_result.status + out = runtmp.last_result.out + err = runtmp.last_result.err + print(out) + print(err) + print(status) + + assert 'other is False' in out + assert 'hello, world! argument is: some arg' in out + + def test_cmd_2(self, runtmp): + # test 'more_nifty' + runtmp.sourmash('scripts', 'more_nifty', 'some arg') + + status = runtmp.last_result.status + out = runtmp.last_result.out + err = runtmp.last_result.err + print(out) + print(err) + print(status) + + assert 'other is False' in out + assert 'hello, world! argument is: some arg' in out + + def test_sourmash_info(self, runtmp): + # test 'sourmash info -v' => shows the plugins + runtmp.sourmash('info', '-v') + + out = runtmp.last_result.out + err = runtmp.last_result.err + print(out) + print(err) + + expected = """ +groupfoo test_plugin_framework 0.1 test_command +groupfoo test_plugin_framework 0.1 test_command2 +groupfoo test_plugin_framework 0.1 test_command3 +groupfoo test_plugin_framework 0.1 test_command4 +""".splitlines() + for line in expected: + assert line in err + + +def test_cli_scripts_getattr_fail(): + # test scripts.__getattr__ w/fail + from sourmash.cli import scripts + + with pytest.raises(AttributeError): + scripts.ThisAttrDoesNotExist + + +def test_cli_scripts_getattr_succ(): + # test scripts.__getattr__ w/success + from sourmash.cli import scripts + + scripts.subparser