From c675897cacd4eb20c22cd80b0dce9904a97774ac Mon Sep 17 00:00:00 2001 From: John Chilton Date: Mon, 30 Sep 2024 10:01:12 -0400 Subject: [PATCH 1/3] More ability to set global defaults for file source plugin types. --- lib/galaxy/config/__init__.py | 5 +++++ lib/galaxy/config/schemas/config_schema.yml | 20 +++++++++++++++++ lib/galaxy/files/plugins.py | 20 +++++++++++++++++ lib/galaxy/files/sources/s3fs.py | 14 +++++++++++- lib/galaxy/files/sources/webdav.py | 25 ++++++++++++++++++--- lib/galaxy/model/unittest_utils/data_app.py | 4 ++++ 6 files changed, 84 insertions(+), 4 deletions(-) diff --git a/lib/galaxy/config/__init__.py b/lib/galaxy/config/__init__.py index ecf8f8aebcfb..80d57ab04bf6 100644 --- a/lib/galaxy/config/__init__.py +++ b/lib/galaxy/config/__init__.py @@ -704,6 +704,7 @@ class GalaxyAppConfiguration(BaseAppConfiguration, CommonConfigurationMixin): drmaa_external_runjob_script: str email_from: Optional[str] enable_tool_shed_check: bool + file_source_temp_dir: str galaxy_data_manager_data_path: str galaxy_infrastructure_url: str hours_between_check: int @@ -1236,6 +1237,9 @@ def _load_theme(path: str, theme_dict: dict): else: _load_theme(self.themes_config_file, self.themes) + if self.file_source_temp_dir: + self.file_source_temp_dir = os.path.abspath(self.file_source_temp_dir) + def _process_celery_config(self): if self.celery_conf and self.celery_conf.get("result_backend") is None: # If the result_backend is not set, use a SQLite database in the data directory @@ -1348,6 +1352,7 @@ def check(self): self.template_cache_path, self.tool_data_path, self.user_library_import_dir, + self.file_source_temp_dir, ] for path in paths_to_check: self._ensure_directory(path) diff --git a/lib/galaxy/config/schemas/config_schema.yml b/lib/galaxy/config/schemas/config_schema.yml index afcb6ec9c3ed..4b8519a6624f 100644 --- a/lib/galaxy/config/schemas/config_schema.yml +++ b/lib/galaxy/config/schemas/config_schema.yml @@ -4104,3 +4104,23 @@ mapping: per_host: true desc: | Enable the integration of the Galaxy Help Forum in the tool panel. This requires the help_forum_api_url to be set. + + file_source_temp_dir: + type: str + required: false + desc: | + Directory to store temporary files for file sources. This defaults to new_file_path if not set. + + file_source_webdav_use_temp_files: + type: bool + default: true + desc: | + Default value for use_temp_files for webdav plugins that don't explicitly declare this. + + file_source_listings_expiry_time: + type: int + default: 60 + desc: | + Number of seconds before file source content listings are refreshed. Shorter times will result in more + queries while browsing a file sources. Longer times will result in fewer requests to file sources but + outdated contents might be displayed to the user. Currently only affects s3fs file sources. diff --git a/lib/galaxy/files/plugins.py b/lib/galaxy/files/plugins.py index 4f9c182f846a..fcbee3fdeaf8 100644 --- a/lib/galaxy/files/plugins.py +++ b/lib/galaxy/files/plugins.py @@ -24,6 +24,9 @@ class FileSourcePluginsConfig: user_library_import_dir: Optional[str] ftp_upload_dir: Optional[str] ftp_upload_purge: bool + tmp_dir: Optional[str] + webdav_use_temp_files: Optional[bool] + listings_expiry_time: Optional[int] def __init__( self, @@ -33,6 +36,9 @@ def __init__( user_library_import_dir=None, ftp_upload_dir=None, ftp_upload_purge=True, + tmp_dir=None, + webdav_use_temp_files=None, + listings_expiry_time=None, ): symlink_allowlist = symlink_allowlist or [] fetch_url_allowlist = fetch_url_allowlist or [] @@ -42,6 +48,9 @@ def __init__( self.user_library_import_dir = user_library_import_dir self.ftp_upload_dir = ftp_upload_dir self.ftp_upload_purge = ftp_upload_purge + self.tmp_dir = tmp_dir + self.webdav_use_temp_files = webdav_use_temp_files + self.listings_expiry_time = listings_expiry_time @staticmethod def from_app_config(config): @@ -54,6 +63,10 @@ def from_app_config(config): kwds["user_library_import_dir"] = config.user_library_import_dir kwds["ftp_upload_dir"] = config.ftp_upload_dir kwds["ftp_upload_purge"] = config.ftp_upload_purge + kwds["tmp_dir"] = config.file_source_temp_dir + kwds["webdav_use_temp_files"] = config.file_source_webdav_use_temp_files + kwds["listings_expiry_time"] = config.file_source_listings_expiry_time + return FileSourcePluginsConfig(**kwds) def to_dict(self): @@ -64,6 +77,9 @@ def to_dict(self): "user_library_import_dir": self.user_library_import_dir, "ftp_upload_dir": self.ftp_upload_dir, "ftp_upload_purge": self.ftp_upload_purge, + "tmp_dir": self.tmp_dir, + "webdav_use_temp_files": self.webdav_use_temp_files, + "listings_expiry_time": self.listings_expiry_time, } @staticmethod @@ -75,6 +91,10 @@ def from_dict(as_dict): user_library_import_dir=as_dict["user_library_import_dir"], ftp_upload_dir=as_dict["ftp_upload_dir"], ftp_upload_purge=as_dict["ftp_upload_purge"], + # Always provided for new jobs, remove in 25.0 + tmp_dir=as_dict.get("tmp_dir"), + webdav_use_temp_files=as_dict.get("webdav_use_temp_files"), + listings_expiry_time=as_dict.get("listings_expiry_time"), ) diff --git a/lib/galaxy/files/sources/s3fs.py b/lib/galaxy/files/sources/s3fs.py index 6967f4a5b6a7..c6a7dedc96d3 100644 --- a/lib/galaxy/files/sources/s3fs.py +++ b/lib/galaxy/files/sources/s3fs.py @@ -8,7 +8,10 @@ Tuple, ) -from typing_extensions import Unpack +from typing_extensions import ( + NotRequired, + Unpack, +) from galaxy.files import OptionalUserContext from . import ( @@ -35,6 +38,7 @@ class S3FsFilesSourceProperties(FilesSourceProperties, total=False): endpoint_url: int user: str passwd: str + listings_expiry_time: NotRequired[Optional[int]] client_kwargs: dict # internally computed. Should not be specified in config file @@ -45,6 +49,14 @@ def __init__(self, **kwd: Unpack[S3FsFilesSourceProperties]): if s3fs is None: raise Exception("Package s3fs unavailable but required for this file source plugin.") props: S3FsFilesSourceProperties = cast(S3FsFilesSourceProperties, self._parse_common_config_opts(kwd)) + file_sources_config = self._file_sources_config + if ( + props.get("listings_expiry_time") is None + and file_sources_config + and file_sources_config.listings_expiry_time + ): + if file_sources_config.listings_expiry_time: + props["listings_expiry_time"] = file_sources_config.listings_expiry_time # There is a possibility that the bucket name could be parameterized: e.g. # bucket: ${user.preferences['generic_s3|bucket']} # that's ok, because we evaluate the bucket name again later. The bucket property here will only diff --git a/lib/galaxy/files/sources/webdav.py b/lib/galaxy/files/sources/webdav.py index 2e11a51d98b2..0205d7ecc32d 100644 --- a/lib/galaxy/files/sources/webdav.py +++ b/lib/galaxy/files/sources/webdav.py @@ -5,10 +5,13 @@ import tempfile from typing import ( + cast, Optional, Union, ) +from typing_extensions import NotRequired + from . import ( FilesSourceOptions, FilesSourceProperties, @@ -16,18 +19,34 @@ from ._pyfilesystem2 import PyFilesystem2FilesSource +class WebDavFilesSourceProperties(FilesSourceProperties, total=False): + use_temp_files: NotRequired[Optional[bool]] + temp_path: NotRequired[Optional[str]] + + class WebDavFilesSource(PyFilesystem2FilesSource): plugin_type = "webdav" required_module = WebDAVFS required_package = "fs.webdavfs" def _open_fs(self, user_context=None, opts: Optional[FilesSourceOptions] = None): - props = self._serialization_props(user_context) + props = cast(WebDavFilesSourceProperties, self._serialization_props(user_context)) + file_sources_config = self._file_sources_config use_temp_files = props.pop("use_temp_files", None) + if use_temp_files is None and file_sources_config and file_sources_config.webdav_use_temp_files is not None: + use_temp_files = file_sources_config.webdav_use_temp_files if use_temp_files is None: # Default to True to avoid memory issues with large files. - props["use_temp_files"] = True - props["temp_path"] = props.get("temp_path", tempfile.TemporaryDirectory(prefix="webdav_")) + use_temp_files = True + + if use_temp_files: + temp_path = props.get("temp_path") + if temp_path is None and file_sources_config and file_sources_config.tmp_dir: + temp_path = file_sources_config.tmp_dir + if temp_path is None: + temp_path = tempfile.mkdtemp(prefix="webdav_") + props["temp_path"] = temp_path + props["use_temp_files"] = use_temp_files extra_props: Union[FilesSourceProperties, dict] = opts.extra_props or {} if opts else {} handle = WebDAVFS(**{**props, **extra_props}) return handle diff --git a/lib/galaxy/model/unittest_utils/data_app.py b/lib/galaxy/model/unittest_utils/data_app.py index 75e5101eca80..0d424cca3623 100644 --- a/lib/galaxy/model/unittest_utils/data_app.py +++ b/lib/galaxy/model/unittest_utils/data_app.py @@ -79,6 +79,10 @@ def __init__(self, root=None, **kwd): self.ftp_upload_dir = None self.ftp_upload_purge = False + self.file_source_temp_dir = None + self.file_source_webdav_use_temp_files = False + self.file_source_listings_expiry_time = 60 + def __del__(self): if self._remove_root: shutil.rmtree(self.root) From eea3909cb04a917b62e36dff092a5d4c80ba5330 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Tue, 1 Oct 2024 12:12:15 -0400 Subject: [PATCH 2/3] Fixes and expansion of webdav unit tests. --- lib/galaxy/files/sources/_pyfilesystem2.py | 13 ++++-- lib/galaxy/files/sources/webdav.py | 1 + test/unit/files/_util.py | 6 ++- test/unit/files/test_webdav.py | 43 +++++++++++++++---- test/unit/files/webdav_file_sources_conf.yml | 6 +++ ...dav_file_sources_without_use_temp_conf.yml | 7 +++ 6 files changed, 61 insertions(+), 15 deletions(-) create mode 100644 test/unit/files/webdav_file_sources_conf.yml create mode 100644 test/unit/files/webdav_file_sources_without_use_temp_conf.yml diff --git a/lib/galaxy/files/sources/_pyfilesystem2.py b/lib/galaxy/files/sources/_pyfilesystem2.py index afcff2884961..5b3add6e6119 100644 --- a/lib/galaxy/files/sources/_pyfilesystem2.py +++ b/lib/galaxy/files/sources/_pyfilesystem2.py @@ -38,6 +38,7 @@ class PyFilesystem2FilesSource(BaseFilesSource): required_package: ClassVar[str] supports_pagination = True supports_search = True + allow_key_error_on_empty_directories = False # work around a bug in webdav def __init__(self, **kwd: Unpack[FilesSourceProperties]): if self.required_module is None: @@ -65,10 +66,14 @@ def _list( with self._open_fs(user_context=user_context, opts=opts) as h: if recursive: recursive_result: List[AnyRemoteEntry] = [] - for p, dirs, files in h.walk(path, namespaces=["details"]): - to_dict = functools.partial(self._resource_info_to_dict, p) - recursive_result.extend(map(to_dict, dirs)) - recursive_result.extend(map(to_dict, files)) + try: + for p, dirs, files in h.walk(path, namespaces=["details"]): + to_dict = functools.partial(self._resource_info_to_dict, p) + recursive_result.extend(map(to_dict, dirs)) + recursive_result.extend(map(to_dict, files)) + except KeyError: + if not self.allow_key_error_on_empty_directories: + raise return recursive_result, len(recursive_result) else: page = self._to_page(limit, offset) diff --git a/lib/galaxy/files/sources/webdav.py b/lib/galaxy/files/sources/webdav.py index 0205d7ecc32d..9aec17fdbdc4 100644 --- a/lib/galaxy/files/sources/webdav.py +++ b/lib/galaxy/files/sources/webdav.py @@ -28,6 +28,7 @@ class WebDavFilesSource(PyFilesystem2FilesSource): plugin_type = "webdav" required_module = WebDAVFS required_package = "fs.webdavfs" + allow_key_error_on_empty_directories = True def _open_fs(self, user_context=None, opts: Optional[FilesSourceOptions] = None): props = cast(WebDavFilesSourceProperties, self._serialization_props(user_context)) diff --git a/test/unit/files/_util.py b/test/unit/files/_util.py index 16ad933affa4..2bf1274fcee5 100644 --- a/test/unit/files/_util.py +++ b/test/unit/files/_util.py @@ -2,6 +2,7 @@ import os import tempfile +from typing import Optional from galaxy.files import ( ConfiguredFileSources, @@ -156,8 +157,9 @@ def write_from( file_source_path.file_source.write_from(file_source_path.path, f.name, user_context=user_context) -def configured_file_sources(conf_file): - file_sources_config = FileSourcePluginsConfig() +def configured_file_sources(conf_file, file_sources_config: Optional[FileSourcePluginsConfig] = None): + file_sources_config = file_sources_config or FileSourcePluginsConfig() + assert file_sources_config if isinstance(conf_file, str): conf = ConfiguredFileSourcesConf(conf_file=conf_file) else: diff --git a/test/unit/files/test_webdav.py b/test/unit/files/test_webdav.py index 071281e1bc88..9294881ec247 100644 --- a/test/unit/files/test_webdav.py +++ b/test/unit/files/test_webdav.py @@ -1,9 +1,10 @@ # docker run -v `pwd`/test/integration/webdav/data:/media -e WEBDAV_USERNAME=alice -e WEBDAV_PASSWORD=secret1234 -p 7083:7083 jmchilton/webdavdev -# GALAXY_TEST_WEBDAV=1 pytest test/unit/files/webdav.py +# GALAXY_TEST_WEBDAV=1 pytest test/unit/files/test_webdav.py import os import pytest +from galaxy.files.plugins import FileSourcePluginsConfig from ._util import ( configured_file_sources, find, @@ -17,6 +18,7 @@ SCRIPT_DIRECTORY = os.path.abspath(os.path.dirname(__file__)) FILE_SOURCES_CONF = os.path.join(SCRIPT_DIRECTORY, "webdav_file_sources_conf.yml") +FILE_SOURCES_CONF_NO_USE_TEMP_FILES = os.path.join(SCRIPT_DIRECTORY, "webdav_file_sources_without_use_temp_conf.yml") USER_FILE_SOURCES_CONF = os.path.join(SCRIPT_DIRECTORY, "webdav_user_file_sources_conf.yml") skip_if_no_webdav = pytest.mark.skipif(not os.environ.get("GALAXY_TEST_WEBDAV"), reason="GALAXY_TEST_WEBDAV not set") @@ -62,17 +64,40 @@ def test_sniff_to_tmp(): @skip_if_no_webdav def test_serialization(): - # serialize the configured file sources and rematerialize them, - # ensure they still function. This is needed for uploading files. - file_sources = serialize_and_recover(configured_file_sources(FILE_SOURCES_CONF)) + configs = [FILE_SOURCES_CONF_NO_USE_TEMP_FILES, FILE_SOURCES_CONF] + for config in configs: + # serialize the configured file sources and rematerialize them, + # ensure they still function. This is needed for uploading files. + file_sources = serialize_and_recover(configured_file_sources(config)) - res = list_root(file_sources, "gxfiles://test1", recursive=True) - assert find_file_a(res) + res = list_root(file_sources, "gxfiles://test1", recursive=True) + assert find_file_a(res) - res = list_root(file_sources, "gxfiles://test1", recursive=False) - assert find_file_a(res) + res = list_root(file_sources, "gxfiles://test1", recursive=False) + assert find_file_a(res) - _download_and_check_file(file_sources) + _download_and_check_file(file_sources) + + +@skip_if_no_webdav +def test_config_options(): + file_sources = configured_file_sources(FILE_SOURCES_CONF) + fs = file_sources._file_sources[0] + user_context = user_context_fixture() + assert fs._open_fs(user_context).use_temp_files + + file_sources = configured_file_sources(FILE_SOURCES_CONF_NO_USE_TEMP_FILES) + fs = file_sources._file_sources[0] + user_context = user_context_fixture() + assert not fs._open_fs(user_context).use_temp_files + + disable_default_use_temp = FileSourcePluginsConfig( + webdav_use_temp_files=False, + ) + file_sources = configured_file_sources(FILE_SOURCES_CONF, disable_default_use_temp) + fs = file_sources._file_sources[0] + user_context = user_context_fixture() + assert not fs._open_fs(user_context).use_temp_files @skip_if_no_webdav diff --git a/test/unit/files/webdav_file_sources_conf.yml b/test/unit/files/webdav_file_sources_conf.yml new file mode 100644 index 000000000000..6116edd6de1f --- /dev/null +++ b/test/unit/files/webdav_file_sources_conf.yml @@ -0,0 +1,6 @@ +- type: webdav + id: test1 + doc: Test WebDAV server. + url: http://127.0.0.1:7083 + login: alice + password: secret1234 diff --git a/test/unit/files/webdav_file_sources_without_use_temp_conf.yml b/test/unit/files/webdav_file_sources_without_use_temp_conf.yml new file mode 100644 index 000000000000..916f8068f459 --- /dev/null +++ b/test/unit/files/webdav_file_sources_without_use_temp_conf.yml @@ -0,0 +1,7 @@ +- type: webdav + id: test1 + doc: Test WebDAV server. + url: http://127.0.0.1:7083 + login: alice + password: secret1234 + use_temp_files: false From ab70b5be6ab697d99dc828569a1059f4b568b011 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Wed, 2 Oct 2024 10:31:15 -0400 Subject: [PATCH 3/3] Rebuild schema... --- doc/source/admin/galaxy_options.rst | 59 ++++++++++++++++++++++ lib/galaxy/config/sample/galaxy.yml.sample | 29 +++++++++++ 2 files changed, 88 insertions(+) diff --git a/doc/source/admin/galaxy_options.rst b/doc/source/admin/galaxy_options.rst index 8f4aef21d667..45386d72d861 100644 --- a/doc/source/admin/galaxy_options.rst +++ b/doc/source/admin/galaxy_options.rst @@ -5115,6 +5115,26 @@ :Type: str +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +``workflow_scheduling_separate_materialization_iteration`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:Description: + Workflows launched with URI/URL inputs that are not marked as + 'deferred' are "materialized" (or undeferred) by the workflow + scheduler. This might be a lengthy process. Setting this to 'True' + will place the invocation back in the queue after materialization + before scheduling the workflow so it is less likely to starve + other workflow scheduling. Ideally, Galaxy would allow more fine + grain control of handlers but until then, this provides a way to + tip the balance between "doing more work" and "being more fair". + The default here is pretty arbitrary - it has been to False to + optimize Galaxy for automated, single user applications where + "fairness" is mostly irrelevant. +:Default: ``false`` +:Type: bool + + ~~~~~~~~~~~~~~~~~~~~~~~~ ``cache_user_job_count`` ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -5602,3 +5622,42 @@ This requires the help_forum_api_url to be set. :Default: ``false`` :Type: bool + + +~~~~~~~~~~~~~~~~~~~~~~~~ +``file_source_temp_dir`` +~~~~~~~~~~~~~~~~~~~~~~~~ + +:Description: + Directory to store temporary files for file sources. This defaults + to new_file_path if not set. +:Default: ``None`` +:Type: str + + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +``file_source_webdav_use_temp_files`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:Description: + Default value for use_temp_files for webdav plugins that don't + explicitly declare this. +:Default: ``true`` +:Type: bool + + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +``file_source_listings_expiry_time`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:Description: + Number of seconds before file source content listings are + refreshed. Shorter times will result in more queries while + browsing a file sources. Longer times will result in fewer + requests to file sources but outdated contents might be displayed + to the user. Currently only affects s3fs file sources. +:Default: ``60`` +:Type: int + + + diff --git a/lib/galaxy/config/sample/galaxy.yml.sample b/lib/galaxy/config/sample/galaxy.yml.sample index 7c3e326a0a0c..d6718cf176e7 100644 --- a/lib/galaxy/config/sample/galaxy.yml.sample +++ b/lib/galaxy/config/sample/galaxy.yml.sample @@ -2748,6 +2748,19 @@ galaxy: # . #workflow_schedulers_config_file: workflow_schedulers_conf.xml + # Workflows launched with URI/URL inputs that are not marked as + # 'deferred' are "materialized" (or undeferred) by the workflow + # scheduler. This might be a lengthy process. Setting this to 'True' + # will place the invocation back in the queue after materialization + # before scheduling the workflow so it is less likely to starve other + # workflow scheduling. Ideally, Galaxy would allow more fine grain + # control of handlers but until then, this provides a way to tip the + # balance between "doing more work" and "being more fair". The default + # here is pretty arbitrary - it has been to False to optimize Galaxy + # for automated, single user applications where "fairness" is mostly + # irrelevant. + #workflow_scheduling_separate_materialization_iteration: false + # If using job concurrency limits (configured in job_config_file), # several extra database queries must be performed to determine the # number of jobs a user has dispatched to a given destination. By @@ -2978,3 +2991,19 @@ galaxy: # Enable the integration of the Galaxy Help Forum in the tool panel. # This requires the help_forum_api_url to be set. #enable_help_forum_tool_panel_integration: false + + # Directory to store temporary files for file sources. This defaults + # to new_file_path if not set. + #file_source_temp_dir: null + + # Default value for use_temp_files for webdav plugins that don't + # explicitly declare this. + #file_source_webdav_use_temp_files: true + + # Number of seconds before file source content listings are refreshed. + # Shorter times will result in more queries while browsing a file + # sources. Longer times will result in fewer requests to file sources + # but outdated contents might be displayed to the user. Currently only + # affects s3fs file sources. + #file_source_listings_expiry_time: 60 +