Skip to content

Commit

Permalink
Use resource_path() to access datatypes_conf.xml.sample as a pack…
Browse files Browse the repository at this point in the history
…age resource

This required other changes included here as well:

1) Extend `parse_xml()` to accept a `Traversable` (i.e. the output of
   `resource_path()`.

Open the file before passing it to `etree.parse()`, since lxml's
implementation probably doesn't support a `Traversable`. This also
simplifies the exception handling, since the workaround for the missing
`errno` in the `OSError` exception is not needed any more.

------

2) Fix Galaxy startup when the ``data_manager_config_file`` config option
   is set to an empty value

When does this happen?
----------------------

planemo (which we use in the converters tests) set this option to an
empty value when not testing data managers:

https://github.com/galaxyproject/planemo/blob/master/planemo/galaxy/config.py#L452

This empty config value was resolved to the `path_resolves_to` directory
for this option.
With the change in 1) above, this results in the following traceback at
Galaxy startup:

```
Traceback (most recent call last):
  File "/usr/users/ga002/soranzon/software/nsoranzo_galaxy/lib/galaxy/webapps/galaxy/buildapp.py", line 68, in app_pair
    app = galaxy.app.UniverseApplication(global_conf=global_conf, is_webapp=True, **kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/users/ga002/soranzon/software/nsoranzo_galaxy/lib/galaxy/app.py", line 777, in __init__
    self.data_managers = self._register_singleton(DataManagers)
                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/users/ga002/soranzon/software/nsoranzo_galaxy/lib/galaxy/di.py", line 27, in _register_singleton
    instance = self[dep_type]
               ~~~~^^^^^^^^^^
  File "/usr/users/ga002/soranzon/software/nsoranzo_galaxy/.venv/lib/python3.12/site-packages/lagom/container.py", line 381, in __getitem__
    return self.resolve(dep)
           ^^^^^^^^^^^^^^^^^
  File "/usr/users/ga002/soranzon/software/nsoranzo_galaxy/.venv/lib/python3.12/site-packages/lagom/container.py", line 265, in resolve
    return self._reflection_build_with_err_handling(dep_type, suppress_error)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/users/ga002/soranzon/software/nsoranzo_galaxy/.venv/lib/python3.12/site-packages/lagom/container.py", line 392, in _reflection_build_with_err_handling
    return self._reflection_build(dep_type)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/users/ga002/soranzon/software/nsoranzo_galaxy/.venv/lib/python3.12/site-packages/lagom/container.py", line 408, in _reflection_build
    return dep_type(**sub_deps)  # type: ignore
           ^^^^^^^^^^^^^^^^^^^^
  File "/usr/users/ga002/soranzon/software/nsoranzo_galaxy/lib/galaxy/tools/data_manager/manager.py", line 44, in __init__
    self.load_from_xml(filename)
  File "/usr/users/ga002/soranzon/software/nsoranzo_galaxy/lib/galaxy/tools/data_manager/manager.py", line 58, in load_from_xml
    tree = util.parse_xml(xml_filename)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/users/ga002/soranzon/software/nsoranzo_galaxy/lib/galaxy/util/__init__.py", line 360, in parse_xml
    with source.open("rb") as f:
         ^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.12/pathlib.py", line 1013, in open
    return io.open(self, mode, buffering, encoding, errors, newline)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
IsADirectoryError: [Errno 21] Is a directory: '/tmp/tmps2qq5htq'
```

Why is this a problem now?
--------------------------

Before this commit, `util.parse_xml()` was raising a `XMLSyntaxError`
exception if passed a directory, but now it raises an `IsADirectoryError`,
which is a subclass of `OSError` with errno 21.

As a consequence, when `load_from_xml()` in
lib/galaxy/tools/data_manager/manager.py
calls `util.parse_xml()` passing a directory, the resulting exception
is now re-raised in the `except OSError as e:` branch instead of
just being logged, which is actually good since this is a
misconfiguration.

Solutions
--------

- Do not resolve an empty/None path to the `path_resolves_to` directory.
- Do not check empty/None paths against root
- Adjst unit tests accordingly
- Raise an OSError if the `job_config_file` config option is set to null
  (previously this would be resolved to a directory, also raising an
  OSError when trying to open that as a file).

N.B.: This will also need to be fixed in Planemo by not setting the
`data_manager_config_file` option when not needed.
  • Loading branch information
nsoranzo committed Dec 16, 2024
1 parent 3c82972 commit 6f1164d
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 20 deletions.
8 changes: 5 additions & 3 deletions lib/galaxy/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,10 +512,10 @@ def resolve(key):
if not parent: # base case: nothing else needs resolving
return path
parent_path = resolve(parent) # recursively resolve parent path
if path is not None:
if path:
path = os.path.join(parent_path, path) # resolve path
else:
path = parent_path # or use parent path
log.warning("Trying to resolve path for the '%s' option but it's empty/None", key)

setattr(self, key, path) # update property
_cache[key] = path # cache it!
Expand All @@ -539,7 +539,7 @@ def resolve(key):
if self.is_set(key) and self.paths_to_check_against_root and key in self.paths_to_check_against_root:
self._check_against_root(key)

def _check_against_root(self, key):
def _check_against_root(self, key: str):
def get_path(current_path, initial_path):
# if path does not exist and was set as relative:
if not self._path_exists(current_path) and not os.path.isabs(initial_path):
Expand All @@ -558,6 +558,8 @@ def get_path(current_path, initial_path):
return current_path

current_value = getattr(self, key) # resolved path or list of resolved paths
if not current_value:
return
if isinstance(current_value, list):
initial_paths = listify(self._raw_config[key], do_strip=True) # initial unresolved paths
updated_paths = []
Expand Down
2 changes: 2 additions & 0 deletions lib/galaxy/jobs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,8 @@ def __init__(self, app: MinimalManagerApp):
f" release of Galaxy. Please convert to YAML at {self.app.config.job_config_file} or"
f" explicitly set `job_config_file` to {job_config_file} to remove this message"
)
if not job_config_file:
raise OSError()
if ".xml" in job_config_file:
tree = load(job_config_file)
job_config_dict = self.__parse_job_conf_xml(tree)
Expand Down
7 changes: 5 additions & 2 deletions lib/galaxy/tool_util/linters/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import (
Set,
TYPE_CHECKING,
Union,
)

# from galaxy import config
Expand All @@ -10,15 +11,17 @@
listify,
parse_xml,
)
from galaxy.util.resources import resource_path

if TYPE_CHECKING:
from galaxy.tool_util.lint import LintContext
from galaxy.tool_util.parser import ToolSource
from galaxy.util.resources import Traversable

DATATYPES_CONF = os.path.join(os.path.dirname(__file__), "datatypes_conf.xml.sample")
DATATYPES_CONF = resource_path(__package__, "datatypes_conf.xml.sample")


def _parse_datatypes(datatype_conf_path: str) -> Set[str]:
def _parse_datatypes(datatype_conf_path: Union[str, "Traversable"]) -> Set[str]:
datatypes = set()
tree = parse_xml(datatype_conf_path)
root = tree.getroot()
Expand Down
24 changes: 16 additions & 8 deletions lib/galaxy/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
Optional,
overload,
Tuple,
TYPE_CHECKING,
TypeVar,
Union,
)
Expand Down Expand Up @@ -75,6 +76,7 @@
LXML_AVAILABLE = True
try:
from lxml import etree
from lxml.etree import DocumentInvalid

# lxml.etree.Element is a function that returns a new instance of the
# lxml.etree._Element class. This class doesn't have a proper __init__()
Expand Down Expand Up @@ -123,6 +125,9 @@ def XML(text: Union[str, bytes]) -> Element:
XML,
)

class DocumentInvalid(Exception): # type: ignore[no-redef]
pass

from . import requests
from .custom_logging import get_logger
from .inflection import Inflector
Expand All @@ -142,6 +147,9 @@ def shlex_join(split_command):
return " ".join(map(shlex.quote, split_command))


if TYPE_CHECKING:
from galaxy.util.resources import Traversable

inflector = Inflector()

log = get_logger(__name__)
Expand Down Expand Up @@ -333,7 +341,10 @@ def unique_id(KEY_SIZE=128):


def parse_xml(
fname: StrPath, strip_whitespace=True, remove_comments=True, schemafname: Union[StrPath, None] = None
fname: Union[StrPath, "Traversable"],
strip_whitespace: bool = True,
remove_comments: bool = True,
schemafname: Union[StrPath, None] = None,
) -> ElementTree:
"""Returns a parsed xml tree"""
parser = None
Expand All @@ -348,8 +359,10 @@ def parse_xml(
schema_root = etree.XML(schema_file.read())
schema = etree.XMLSchema(schema_root)

source = Path(fname) if isinstance(fname, (str, os.PathLike)) else fname
try:
tree = cast(ElementTree, etree.parse(str(fname), parser=parser))
with source.open("rb") as f:
tree = cast(ElementTree, etree.parse(f, parser=parser))
root = tree.getroot()
if strip_whitespace:
for elem in root.iter("*"):
Expand All @@ -359,15 +372,10 @@ def parse_xml(
elem.tail = elem.tail.strip()
if schema:
schema.assertValid(tree)
except OSError as e:
if e.errno is None and not os.path.exists(fname): # type: ignore[unreachable]
# lxml doesn't set errno
e.errno = errno.ENOENT # type: ignore[unreachable]
raise
except etree.ParseError:
log.exception("Error parsing file %s", fname)
raise
except etree.DocumentInvalid:
except DocumentInvalid:
log.exception("Validation of file %s failed", fname)
raise
return tree
Expand Down
6 changes: 3 additions & 3 deletions test/unit/config/test_path_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def test_resolves_to_invalid_type(monkeypatch):


def test_resolves_with_empty_component(monkeypatch):
# A path can be None (root path is never None; may be asigned elsewhere)
# A path can be None (root path is never None; may be assigned elsewhere)
mock_schema = {
"path0": {
"type": "str",
Expand All @@ -155,13 +155,13 @@ def test_resolves_with_empty_component(monkeypatch):
"path2": {
"type": "str",
"default": "value2",
"path_resolves_to": "path1",
"path_resolves_to": "path0",
},
}
monkeypatch.setattr(AppSchema, "_read_schema", lambda a, b: get_schema(mock_schema))
monkeypatch.setattr(BaseAppConfiguration, "_load_schema", lambda a: AppSchema(Path("no path"), "_"))

config = BaseAppConfiguration()
assert config.path0 == "value0"
assert config.path1 == "value0"
assert config.path1 is None
assert config.path2 == "value0/value2"
8 changes: 4 additions & 4 deletions test/unit/config/test_path_resolves_to.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,21 +132,21 @@ def test_kwargs_relative_path_old_prefix_for_other_option(mock_init):

def test_kwargs_relative_path_old_prefix_empty_after_strip(mock_init):
# Expect: use value from kwargs, strip old prefix, then resolve
new_path1 = "old-config"
new_path1 = "old-config/foo"
config = BaseAppConfiguration(path1=new_path1)

assert config.path1 == "my-config/" # stripped of old prefix, then resolved
assert config.path1 == "my-config/foo" # stripped of old prefix, then resolved
assert config.path2 == "my-data/my-data-files" # stripped of old prefix, then resolved
assert config.path3 == "my-other-files" # no change


def test_kwargs_set_to_null(mock_init):
# Expected: allow overriding with null, then resolve
# Expected: allow overriding with null
# This is not a common scenario, but it does happen: one example is
# `job_config` set to `None` when testing
config = BaseAppConfiguration(path1=None)

assert config.path1 == "my-config" # resolved
assert config.path1 is None
assert config.path2 == "my-data/my-data-files" # resolved
assert config.path3 == "my-other-files" # no change

Expand Down

0 comments on commit 6f1164d

Please sign in to comment.