diff --git a/lib/galaxy/tool_util/linters/inputs.py b/lib/galaxy/tool_util/linters/inputs.py index e4fd04694dec..815c1a6d0142 100644 --- a/lib/galaxy/tool_util/linters/inputs.py +++ b/lib/galaxy/tool_util/linters/inputs.py @@ -3,6 +3,7 @@ import ast import re import warnings +from copy import deepcopy from typing import ( Iterator, Optional, @@ -130,7 +131,6 @@ ] # TODO lint for valid param type - attribute combinations -# TODO check if dataset is available for filters referring other datasets # TODO check if ref input param is present for from_dataset @@ -490,6 +490,190 @@ def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): ) +FILTER_REQUIRED_ATTRIBUTES = { + "data_meta": ["type", "ref", "key"], # column needs special treatment + "param_value": ["type", "ref", "column"], + "static_value": ["type", "column", "value"], + "regexp": ["type", "column", "value"], + "unique_value": ["type", "column"], + "multiple_splitter": ["type", "column"], + "attribute_value_splitter": ["type", "column"], + "add_value": ["type", "value"], + "remove_value": ["type"], # this is handled separately in InputsOptionsRemoveValueFilterRequiredAttributes + "sort_by": ["type", "column"], + "data_table": ["type", "column", "table_name", "data_table_column"], +} + + +class InputsOptionsFiltersRequiredAttributes(Linter): + """ + check required attributes of filters + """ + + @classmethod + def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): + tool_xml = getattr(tool_source, "xml_tree", None) + if not tool_xml: + return + for param, param_name in _iter_param(tool_xml): + options = param.find("./options") + if options is None: + continue + for filter in param.findall("./options/filter"): + filter_type = filter.get("type", None) + if filter_type is None or filter_type not in FILTER_ALLOWED_ATTRIBUTES: + continue + for attrib in FILTER_REQUIRED_ATTRIBUTES[filter_type]: + if attrib not in filter.attrib: + lint_ctx.error( + f"Select parameter [{param_name}] '{filter_type}' filter misses required attribute '{attrib}'", + node=filter, + ) + + +class InputsOptionsRemoveValueFilterRequiredAttributes(Linter): + """ + check required attributes of remove_value filter + """ + + @classmethod + def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): + tool_xml = getattr(tool_source, "xml_tree", None) + if not tool_xml: + return + for param, param_name in _iter_param(tool_xml): + options = param.find("./options") + if options is None: + continue + for filter in param.findall("./options/filter"): + filter_type = filter.get("type", None) + # check for required attributes for filter (remove_value needs a bit more logic here) + if filter_type != "remove_value": + continue + if not ( + ( + "value" in filter.attrib + and "ref" not in filter.attrib + and "meta_ref" not in filter.attrib + and "key" not in filter.attrib + ) + or ( + "value" not in filter.attrib + and "ref" in filter.attrib + and "meta_ref" not in filter.attrib + and "key" not in filter.attrib + ) + or ( + "value" not in filter.attrib + and "ref" not in filter.attrib + and "meta_ref" in filter.attrib + and "key" in filter.attrib + ) + ): + lint_ctx.error( + f"Select parameter [{param_name}] '{filter_type}'' filter needs either the 'value'; 'ref'; or 'meta' and 'key' attribute(s)", + node=filter, + ) + + +FILTER_ALLOWED_ATTRIBUTES = deepcopy(FILTER_REQUIRED_ATTRIBUTES) +FILTER_ALLOWED_ATTRIBUTES["static_value"].append("keep") +FILTER_ALLOWED_ATTRIBUTES["regexp"].append("keep") +FILTER_ALLOWED_ATTRIBUTES["data_meta"].extend(["column", "multiple", "separator"]) +FILTER_ALLOWED_ATTRIBUTES["param_value"].extend(["keep", "ref_attribute"]) +FILTER_ALLOWED_ATTRIBUTES["multiple_splitter"].append("separator") +FILTER_ALLOWED_ATTRIBUTES["attribute_value_splitter"].extend(["pair_separator", "name_val_separator"]) +FILTER_ALLOWED_ATTRIBUTES["add_value"].extend(["name", "index"]) +FILTER_ALLOWED_ATTRIBUTES["remove_value"].extend(["value", "ref", "meta_ref", "key"]) +FILTER_ALLOWED_ATTRIBUTES["data_table"].append("keep") + + +class InputsOptionsFiltersAllowedAttributes(Linter): + """ + check allowed attributes of filters + """ + + @classmethod + def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): + tool_xml = getattr(tool_source, "xml_tree", None) + if not tool_xml: + return + + for param, param_name in _iter_param(tool_xml): + options = param.find("./options") + if options is None: + continue + for filter in param.findall("./options/filter"): + filter_type = filter.get("type", None) + if filter_type is None or filter_type not in FILTER_ALLOWED_ATTRIBUTES: + continue + for attrib in filter.attrib: + if attrib not in FILTER_ALLOWED_ATTRIBUTES[filter_type]: + lint_ctx.warn( + f"Select parameter [{param_name}] '{filter_type}' filter specifies unnecessary attribute '{attrib}'", + node=filter, + ) + + +class InputsOptionsRegexFilterExpression(Linter): + """ + Check the regular expression of regexp filters + """ + + @classmethod + def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): + tool_xml = getattr(tool_source, "xml_tree", None) + if not tool_xml: + return + + for param, param_name in _iter_param(tool_xml): + options = param.find("./options") + if options is None: + continue + for filter in param.findall("./options/filter"): + filter_type = filter.get("type", None) + if filter_type == "regexp" and "value" in filter.attrib: + try: + re.compile(filter.attrib["value"]) + except re.error as re_error: + lint_ctx.error( + f"Select parameter [{param_name}] '{filter_type}'' filter 'value' is not a valid regular expression ({re_error})'", + node=filter, + ) + + +class InputsOptionsFiltersCheckReferences(Linter): + """ + Check the references used in filters + """ + + @classmethod + def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): + tool_xml = getattr(tool_source, "xml_tree", None) + if not tool_xml: + return + + # get the set of param names + param_names = {param_name for _, param_name in _iter_param(tool_xml)} + + for param, param_name in _iter_param(tool_xml): + options = param.find("./options") + if options is None: + continue + for filter in param.findall("./options/filter"): + filter_type = filter.get("type", None) + if filter_type is not None: + # check for references to other inputs + # TODO: currently ref and metaref seem only to work for top level params, + # once this is fixed the linter needs to be extended, e.g. `f.attrib[ref_attrib].split('|')[-1]` + for ref_attrib in ["meta_ref", "ref"]: + if ref_attrib in filter.attrib and filter.attrib[ref_attrib] not in param_names: + lint_ctx.error( + f"Select parameter [{param_name}] '{filter_type}'' filter attribute '{ref_attrib}' refers to non existing parameter '{filter.attrib[ref_attrib]}'", + node=filter, + ) + + class InputsDataOptionsFiltersRef(Linter): """ Lint for set ref for filters of data parameters diff --git a/lib/galaxy/tool_util/xsd/galaxy.xsd b/lib/galaxy/tool_util/xsd/galaxy.xsd index 0890052b56a0..45ff42bf1772 100644 --- a/lib/galaxy/tool_util/xsd/galaxy.xsd +++ b/lib/galaxy/tool_util/xsd/galaxy.xsd @@ -5650,8 +5650,9 @@ Currently the following filters are defined: * ``data_meta`` populate or filter options based on the metadata of another input parameter specified by ``ref``. If a ``column`` is given options are filtered for which the entry in this column ``column`` is equal to metadata of the input parameter specified by ``ref``. If no ``column`` is given the metadata value of the referenced input is added to the options list (in this case the corresponding ``options`` tag must not have the ``from_data_table`` or ``from_dataset`` attributes). In both cases the desired metadata is selected by ``key``. +* ``data_table`` remove values according to the entries of a data table. Remove options where the value in ``column`` appears in the data table ``table_name`` in column ``table_column``. Setting ``keep`` will to ``true`` will keep only entries also appearing in the data table. -The ``static_value`` and ``regexp`` filters can be inverted by setting ``keep`` to true. +The ``static_value``, ``regexp``, and ``data_table`` filters can be inverted by setting ``keep`` to true. * ``add_value``: add an option with a given ``name`` and ``value`` to the options. By default, the new option is appended, with ``index`` the insertion position can be specified. * ``remove_value``: remove a value from the options. Either specified explicitly with ``value``, the value of another input specified with ``ref``, or the metadata ``key`` of another input ``meta_ref``. @@ -5825,7 +5826,7 @@ only used if ``multiple`` is set to ``true``.]]> If ``true``, keep columns matching the value, if ``false`` discard columns matching the value. Used when ``type`` is -either ``static_value``, ``regexp`` or ``param_value``. Default: true +either ``static_value``, ``regexp``, ``param_value`` or ``data_table``. Default: true. @@ -5874,6 +5875,18 @@ from the list. Only used if ``type`` is ``attribute_value_splitter``. This is used to separate attributes and values from each other within an attribute-value pair, i.e. ``=`` if the target content is ``A=V; B=W; C=Y``. Defaults to whitespace. + + + Only used when ``type`` is +``data_table``. The name of the data table to use. + + + + + Only used when ``type`` is +``data_table``. The column of the data table to use (0 based index or column name). + + @@ -7926,6 +7939,7 @@ and ``bibtex`` are the only supported options. + diff --git a/lib/galaxy/tools/parameters/dynamic_options.py b/lib/galaxy/tools/parameters/dynamic_options.py index d40d328491f7..452e058ff263 100644 --- a/lib/galaxy/tools/parameters/dynamic_options.py +++ b/lib/galaxy/tools/parameters/dynamic_options.py @@ -538,6 +538,61 @@ def filter_options(self, options, trans, other_values): return sorted(options, key=lambda x: x[self.column], reverse=self.reverse) +class DataTableFilter(Filter): + """ + Filters a list of options by entries present in a data table, i.e. + option[column] needs to be in the specified data table column + + Type: data_table + + Required Attributes: + + - column: column in options to compare with + - table_name: data table to use + - data_table_column: data table column to use + + Optional Attributes: + + - keep: Keep options where option[column] is in the data table column (True) + Discard columns matching value (False) + + """ + + def __init__(self, d_option, elem): + Filter.__init__(self, d_option, elem) + self.table_name = elem.get("table_name", None) + assert self.table_name is not None, "Required 'table_name' attribute missing from filter" + column = elem.get("column", None) + assert column is not None, "Required 'column' attribute missing from filter" + self.column = d_option.column_spec_to_index(column) + self.data_table_column = elem.get("data_table_column", None) + assert self.data_table_column is not None, "Required 'data_table_column' attribute missing from filter" + self.keep = string_as_bool(elem.get("keep", "True")) + + def filter_options(self, options, trans, other_values): + # get column from data table, by index or column name + entries = None + try: + entries = {f[int(self.data_table_column)] for f in trans.app.tool_data_tables[self.table_name].get_fields()} + except ValueError: + pass + try: + entries = { + f[self.data_table_column] for f in trans.app.tool_data_tables[self.table_name].get_named_fields_list() + } + except KeyError: + pass + if entries is None: + log.error(f"could not get data from column {self.data_table_column} from data_table {self.table_name}") + return options + + rval = [] + for o in options: + if self.keep == (o[self.column] in entries): + rval.append(o) + return rval + + filter_types = dict( data_meta=DataMetaFilter, param_value=ParamValueFilter, @@ -549,6 +604,7 @@ def filter_options(self, options, trans, other_values): add_value=AdditionalValueFilter, remove_value=RemoveValueFilter, sort_by=SortByColumnFilter, + data_table=DataTableFilter, ) diff --git a/test/functional/tools/filter_data_table.xml b/test/functional/tools/filter_data_table.xml new file mode 100644 index 000000000000..b30c5a201bd2 --- /dev/null +++ b/test/functional/tools/filter_data_table.xml @@ -0,0 +1,42 @@ + + Filter on datatable entries + '$output' + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/test/functional/tools/sample_tool_conf.xml b/test/functional/tools/sample_tool_conf.xml index bb9d7568f600..0918d7dc92b4 100644 --- a/test/functional/tools/sample_tool_conf.xml +++ b/test/functional/tools/sample_tool_conf.xml @@ -66,6 +66,7 @@ + diff --git a/test/unit/tool_util/test_tool_linters.py b/test/unit/tool_util/test_tool_linters.py index fc99e74ed963..939545c8a078 100644 --- a/test/unit/tool_util/test_tool_linters.py +++ b/test/unit/tool_util/test_tool_linters.py @@ -230,6 +230,7 @@ INPUTS_DATA_PARAM_OPTIONS = """ + @@ -242,6 +243,7 @@ INPUTS_DATA_PARAM_OPTIONS_FILTER_ATTRIBUTE = """ + @@ -257,7 +259,7 @@ - + @@ -548,6 +550,48 @@ """ +INPUTS_FILTER_INCORRECT = """ + + + + + + + + + + + + + + + +""" + +INPUTS_FILTER_CORRECT = """ + + + + + + + + + + + + + + + + + + + + + + +""" # test tool xml for outputs linter OUTPUTS_MISSING = """ @@ -1345,7 +1389,7 @@ def test_inputs_data_param_options(lint_ctx): tool_source = get_xml_tool_source(INPUTS_DATA_PARAM_OPTIONS) run_lint_module(lint_ctx, inputs, tool_source) assert not lint_ctx.valid_messages - assert "Found 1 input parameters." in lint_ctx.info_messages + assert "Found 2 input parameters." in lint_ctx.info_messages assert len(lint_ctx.info_messages) == 1 assert not lint_ctx.warn_messages assert not lint_ctx.error_messages @@ -1355,7 +1399,7 @@ def test_inputs_data_param_options_filter_attribute(lint_ctx): tool_source = get_xml_tool_source(INPUTS_DATA_PARAM_OPTIONS_FILTER_ATTRIBUTE) run_lint_module(lint_ctx, inputs, tool_source) assert not lint_ctx.valid_messages - assert "Found 1 input parameters." in lint_ctx.info_messages + assert "Found 2 input parameters." in lint_ctx.info_messages assert len(lint_ctx.info_messages) == 1 assert not lint_ctx.warn_messages assert not lint_ctx.error_messages @@ -1507,7 +1551,7 @@ def test_inputs_select_filter(lint_ctx): assert "Found 1 input parameters." in lint_ctx.info_messages assert "Invalid XML: Element 'filter': The attribute 'type' is required but missing." in lint_ctx.error_messages assert ( - "Invalid XML: Element 'filter', attribute 'type': [facet 'enumeration'] The value 'unknown_filter_type' is not an element of the set {'data_meta', 'param_value', 'static_value', 'regexp', 'unique_value', 'multiple_splitter', 'attribute_value_splitter', 'add_value', 'remove_value', 'sort_by'}." + "Invalid XML: Element 'filter', attribute 'type': [facet 'enumeration'] The value 'unknown_filter_type' is not an element of the set {'data_meta', 'param_value', 'static_value', 'regexp', 'unique_value', 'multiple_splitter', 'attribute_value_splitter', 'add_value', 'remove_value', 'sort_by', 'data_table'}." in lint_ctx.error_messages ) assert len(lint_ctx.info_messages) == 1 @@ -1610,6 +1654,49 @@ def test_inputs_duplicate_names(lint_ctx): assert len(lint_ctx.error_messages) == 2 +def test_inputs_filter_correct(lint_ctx): + tool_source = get_xml_tool_source(INPUTS_FILTER_CORRECT) + run_lint_module(lint_ctx, inputs, tool_source) + assert len(lint_ctx.info_messages) == 1 + assert not lint_ctx.valid_messages + assert not lint_ctx.warn_messages + assert not lint_ctx.error_messages + + +def test_inputs_filter_incorrect(lint_ctx): + tool_source = get_xml_tool_source(INPUTS_FILTER_INCORRECT) + run_lint_module(lint_ctx, inputs, tool_source) + assert len(lint_ctx.info_messages) == 1 + assert not lint_ctx.valid_messages + assert ( + "Select parameter [select_param] 'regexp' filter specifies unnecessary attribute 'multiple'" + in lint_ctx.warn_messages + ) + assert len(lint_ctx.warn_messages) == 1 + assert "Invalid XML: Element 'filter': The attribute 'type' is required but missing." in lint_ctx.error_messages + assert ( + "Invalid XML: Element 'filter', attribute 'type': [facet 'enumeration'] The value 'typo' is not an element of the set {'data_meta', 'param_value', 'static_value', 'regexp', 'unique_value', 'multiple_splitter', 'attribute_value_splitter', 'add_value', 'remove_value', 'sort_by', 'data_table'}." + in lint_ctx.error_messages + ) + assert ( + "Select parameter [select_param] 'static_value' filter misses required attribute 'column'" + in lint_ctx.error_messages + ) + assert ( + "Select parameter [select_param] 'remove_value'' filter needs either the 'value'; 'ref'; or 'meta' and 'key' attribute(s)" + in lint_ctx.error_messages + ) + assert ( + "Select parameter [select_param] 'data_meta'' filter attribute 'ref' refers to non existing parameter 'input'" + in lint_ctx.error_messages + ) + assert ( + "Select parameter [select_param] 'regexp'' filter 'value' is not a valid regular expression (nothing to repeat at position 1)'" + in lint_ctx.error_messages + ) + assert len(lint_ctx.error_messages) == 6 + + def test_inputs_repeats(lint_ctx): """ this was previously covered by a linter in inputs @@ -2237,7 +2324,7 @@ def test_skip_by_module(lint_ctx): def test_list_linters(): linter_names = Linter.list_listers() # make sure to add/remove a test for new/removed linters if this number changes - assert len(linter_names) == 135 + assert len(linter_names) == 140 assert "Linter" not in linter_names # make sure that linters from all modules are available for prefix in [