diff --git a/config/galaxy.yml.interactivetools b/config/galaxy.yml.interactivetools index 562d53a70e26..add807ec9e5d 100644 --- a/config/galaxy.yml.interactivetools +++ b/config/galaxy.yml.interactivetools @@ -2,18 +2,29 @@ gravity: gx_it_proxy: enable: true port: 4002 + + #handlers: + # handler: + # processes: 3 + # pools: + # - job-handlers + # - workflow-schedulers + galaxy: interactivetools_enable: true + interactivetools_map: database/interactivetools_map.sqlite + # outputs_to_working_directory will provide you with a better level of isolation. It is highly recommended to set # this parameter with InteractiveTools. outputs_to_working_directory: true - interactivetools_map: database/interactivetools_map.sqlite + # `galaxy_infrastructure_url` needs to be reachable from IT containers. - # For local development you can map arbitrary hostnames. See `job_conf.xml.interactivetools` + # For local development you can map arbitrary hostnames. See `job_conf.yml.interactivetools` # for an example. # In the local development case you should use the `http` protocol (e.g http://localhost:8080) to access # your Galaxy, so saving notebooks doesn't fail due to invalid certificates. galaxy_infrastructure_url: http://localhost:8080 + # Do not set the following 2 options if you are using an upstream proxy server like nginx interactivetools_upstream_proxy: false interactivetools_proxy_host: localhost:4002 diff --git a/config/job_conf.yml.interactivetools b/config/job_conf.yml.interactivetools new file mode 100644 index 000000000000..dfc00fb46b46 --- /dev/null +++ b/config/job_conf.yml.interactivetools @@ -0,0 +1,58 @@ +## A sample job config for InteractiveTools using local runner. ## + +runners: + local: + load: galaxy.jobs.runners.local:LocalJobRunner + workers: 4 + +# Uncomment if dynamic handlers are defined in "gravity:handlers" section in galaxy.yml +# +#handling: +# assign: +# - db-skip-locked + +execution: + default: docker_dispatch + environments: + local: + runner: local + + docker_local: + runner: local + docker_enabled: true + #docker_volumes: $defaults,/mnt/galaxyData/libraries:ro,/mnt/galaxyData/indices:ro + #docker_volumes_from: parent_container_name + #docker_memory: 24G + #docker_sudo: false + #docker_sudo_cmd: /usr/bin/sudo -extra_param + #docker_net: bridge + #docker_auto_rm: true + #docker_set_user: $UID + docker_set_user: + + # InteractiveTools do need real hostnames or URLs to work - simply specifying IPs will not work. + # If you develop interactive tools on your 'localhost' and don't have a proper domain name + # you need to tell all Docker containers a hostname where Galaxy is running. + # This can be done via the add-host parameter during the `docker run` command. + # 'localhost' here is an arbitrary hostname that matches the IP address of your + # Galaxy host. Make sure this hostname ('localhost') is also set in your galaxy.yml file, e.g. + # `galaxy_infrastructure_url: http://localhost:8080`. + #docker_run_extra_arguments: add-host localhost:host-gateway + + #docker_cmd: /usr/local/custom_docker/docker + #docker_host: + #docker_container_id_override: busybox:ubuntu-14.04 + #docker_default_container_id: busybox:ubuntu-14.04 + #require_container: true + #container_monitor: true + #container_monitor_result: file + #container_monitor_command: python /path/to/galaxy/lib/galaxy_ext/container_monitor/monitor.py + #container_monitor_get_ip_method: null + #container_resolvers_config_file: null + #container_resolvers: + + docker_dispatch: + runner: dynamic + type: docker_dispatch + docker_destination_id: docker_local + default_destination_id: local \ No newline at end of file diff --git a/config/job_conf.yml.interactivetools.podman b/config/job_conf.yml.interactivetools.podman new file mode 100644 index 000000000000..b377c45f2c21 --- /dev/null +++ b/config/job_conf.yml.interactivetools.podman @@ -0,0 +1,38 @@ +## A sample job config for InteractiveTools using local runner, configured for Podman ## + +runners: + local: + load: galaxy.jobs.runners.local:LocalJobRunner + workers: 4 + +# Uncomment if dynamic handlers are defined in "gravity:handlers" section in galaxy.yml +# +#handling: +# assign: +# - db-skip-locked + +execution: + default: docker_dispatch + environments: + local: + runner: local + + docker_local: + runner: local + docker_enabled: true + + docker_set_user: + + # For containers running as root (on the inside) + docker_run_extra_arguments: --security-opt label=disable + # Should work for containers with non-root user (on the inside) + #docker_run_extra_arguments: --userns=keep-id --security-opt label=disable + + # Change to home directory of the galaxy user, not the directory of the galaxy installation + docker_cmd: HOME="/home/galaxy"; podman + + docker_dispatch: + runner: dynamic + type: docker_dispatch + docker_destination_id: docker_local + default_destination_id: local \ No newline at end of file diff --git a/doc/source/admin/galaxy_options.rst b/doc/source/admin/galaxy_options.rst index 44db78d7dd32..ca5b0730582a 100644 --- a/doc/source/admin/galaxy_options.rst +++ b/doc/source/admin/galaxy_options.rst @@ -2023,18 +2023,6 @@ :Type: str -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -``interactivetools_shorten_url`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -:Description: - Shorten the uuid portion of the subdomain or path for interactive - tools. Especially useful for avoiding the need for wildcard - certificates by keeping subdomain under 63 chars -:Default: ``false`` -:Type: bool - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``retry_interactivetool_metadata_internally`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/admin/special_topics/interactivetools.rst b/doc/source/admin/special_topics/interactivetools.rst index 9dd18eac2bf8..698c2d1d3d73 100644 --- a/doc/source/admin/special_topics/interactivetools.rst +++ b/doc/source/admin/special_topics/interactivetools.rst @@ -26,7 +26,6 @@ entry point information: **Note** that name, port, and url are each able to be templated from the InteractiveTool's parameter dictionary. - Some important benefits of using Galaxy InteractiveTools -------------------------------------------------------- @@ -37,49 +36,294 @@ Some important benefits of using Galaxy InteractiveTools - **InteractiveTools** are **bonafide Galaxy Tools**; just specify **tool_type as "interactive"** and list the ports you want to expose - **InteractiveTools** can be **added** to and **installed from the ToolShed**. - **R Shiny apps**, **Javascript-based VNC** access to desktop environments, **genome-browsers-in-a-box**, **interactive notebook environments**, etc, are all possible with **InteractiveTools** +- **InteractiveTools** typically run as software (e.g. Docker) containers in an isolated environment Server-side configuration of Galaxy InteractiveTools ---------------------------------------------------- + +Basic configuration +^^^^^^^^^^^^^^^^^^^ + For production deployments and additional considerations please see the `Galaxy Interactive Tools Tutorial `__. The ``galaxy.yml`` file will need to be populated as seen in ``config/galaxy.yml.interactivetools``. -Galaxy InteractiveTool routing relies on wildcard subdomain routes and a proxy server that forwards requests to a running container. +Galaxy InteractiveTool routing by default relies on wildcard subdomain routes and a proxy server that forwards requests to a running container. For users who manage their own DNS, you can set the appropriate A records to redirect ``*.interactivetool.yourdomain``. -`gravity` will automatically start the needed proxy server. +``gravity`` will automatically start the needed proxy server. The following configuration is only recommended for local testing, as users will directly connect to the InteractiveTool Proxy. In a production setup an upstream proxy should route requests to the proxy via the ``*.interactivetool.yourdomain`` subdomain, or use path-based proxying for interactive tools that support it (``requires_domain=False``, see below for more details). -Set these values in `galaxy.yml`: +Set these values in ``galaxy.yml``: .. code-block:: yaml - gravity: - # ... - gx_it_proxy: - enable: true - port: 4002 - galaxy: - # ... - interactivetools_enable: true - interactivetools_map: database/interactivetools_map.sqlite - galaxy_infrastructure_url: http://localhost:8080 - # Do not set the following 2 options if you are using an upstream proxy server like nginx - interactivetools_upstream_proxy: false - interactivetools_proxy_host: localhost:4002 - # ... - - -If you do want to use nginx as an upstream proxy server you can use the following server section to route requests to -the InteractiveTool proxy: + gravity: + gx_it_proxy: + enable: true + port: 4002 + + #handlers: + # handler: + # processes: 3 + # pools: + # - job-handlers + # - workflow-schedulers + + galaxy: + interactivetools_enable: true + interactivetools_map: database/interactivetools_map.sqlite + + # outputs_to_working_directory will provide you with a better level of isolation. It is highly recommended to set + # this parameter with InteractiveTools. + outputs_to_working_directory: true + + # `galaxy_infrastructure_url` needs to be reachable from IT containers. + # For local development you can map arbitrary hostnames. See `job_conf.yml.interactivetools` + # for an example. + # In the local development case you should use the `http` protocol (e.g http://localhost:8080) to access + # your Galaxy, so saving notebooks doesn't fail due to invalid certificates. + galaxy_infrastructure_url: http://localhost:8080 + + # Do not set the following 2 options if you are using an upstream proxy server like nginx + interactivetools_upstream_proxy: false + interactivetools_proxy_host: localhost:4002 + + +The ``gx-it-proxy`` config relates to an important service in the InteractiveTool infrastructure: the InteractiveTool +proxy. ``gx-it-proxy`` runs as a separate process listening at port 4002 (by default). HTTP requests are decoded based on +the URL and headers, then somewhat massaged, and finally forwarded to the correct entry point port of the target InteractiveTool. + +.. note:: + + A previous config option ``interactivetools_shorten_url`` was removed in commit `#73100de `_ + since similar functionality is now default behavior. Setting ``interactivetools_shorten_url`` to ``true`` shortened + long interactive tool URLs (then default) from e.g. + + ``8c24e5aaae1db3a3-d0fc9f05229e40259142c4d8b5829797.interactivetoolentrypoint.interactivetool.mygalaxy.org`` + + down to + + ``8c24e5aaae1db3a3-d0fc9f0522.interactivetool.mygalaxy.org`` + + Now, all interactive tool URLs are similarly short, e.g. + + ``24q1dbzrknq1v-1a1p13jnahscj.ep.interactivetool.mygalaxy.org`` + + Note that the previous ``.interactivetoolentrypoint`` part has been shortened down to ``.ep``, but this is now always included. + For this reason, URLs are now up to ``3`` character longer than was previously the case when ``interactivetools_shorten_url`` + was set to ``true``. For deployments that require URLs to be shorter than a specific limit (for example ``63`` characters for some kubernetes + setups), this slight ``3`` character increase could cause the URLs to break the limit. If so, please adjust the + ``interactivetools_prefix`` config (default value: ``interactivetool``) to counter this. + +You will also need to enable a docker destination in the job_conf.xml file. +An example ``job_conf.yml`` file as seen in ``config/job_conf.yml.interactivetools``: + +.. code-block:: yaml + + ## A sample job config for InteractiveTools using local runner. ## + + runners: + local: + load: galaxy.jobs.runners.local:LocalJobRunner + workers: 4 + + # Uncomment if dynamic handlers are defined in "gravity:handlers" section in galaxy.yml + # + #handling: + # assign: + # - db-skip-locked + + execution: + default: docker_dispatch + environments: + local: + runner: local + + docker_local: + runner: local + docker_enabled: true + #docker_volumes: $defaults,/mnt/galaxyData/libraries:ro,/mnt/galaxyData/indices:ro + #docker_volumes_from: parent_container_name + #docker_memory: 24G + #docker_sudo: false + #docker_sudo_cmd: /usr/bin/sudo -extra_param + #docker_net: bridge + #docker_auto_rm: true + #docker_set_user: $UID + docker_set_user: + + # InteractiveTools do need real hostnames or URLs to work - simply specifying IPs will not work. + # If you develop interactive tools on your 'localhost' and don't have a proper domain name + # you need to tell all Docker containers a hostname where Galaxy is running. + # This can be done via the add-host parameter during the `docker run` command. + # 'localhost' here is an arbitrary hostname that matches the IP address of your + # Galaxy host. Make sure this hostname ('localhost') is also set in your galaxy.yml file, e.g. + # `galaxy_infrastructure_url: http://localhost:8080`. + #docker_run_extra_arguments: add-host localhost:host-gateway + + #docker_cmd: /usr/local/custom_docker/docker + #docker_host: + #docker_container_id_override: busybox:ubuntu-14.04 + #docker_default_container_id: busybox:ubuntu-14.04 + #require_container: true + #container_monitor: true + #container_monitor_result: file + #container_monitor_command: python /path/to/galaxy/lib/galaxy_ext/container_monitor/monitor.py + #container_monitor_get_ip_method: null + #container_resolvers_config_file: null + #container_resolvers: + + docker_dispatch: + runner: dynamic + type: docker_dispatch + docker_destination_id: docker_local + default_destination_id: local + + +The Galaxy currently contains a sizable collection of **InteractiveTools** directly in the +code base. To be enabled, they need to be commented in or added to the ``config/tool_conf.xml``: + +.. code-block:: xml + + + + + + + +A InteractiveTool is defined in the same familiar way as standard Galaxy Tools, +but are specified with ``tool_type="interactive"``, and providing additional +entry point information: + +.. code-block:: xml + + + + 80 + + + + + +**Note** that name, port, and url are each able to be templated from the InteractiveTool's parameter dictionary. + + +Path-based InteractiveTools +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +As will become clear in the NGINX tutorial below, the default configuration of InteractiveTools in a production setting gives rise +to some complications - in particular the need to set up a wildcard DNS entry and procuring a wildcard SSL certificate. +This is necessary to support unique URLs for InteractiveTool instances using only the domain part of the URL, +e.g. ``https://24q1dbzrknq1v-1a1p13jnahscj.ep.interactivetool.myserver.net/``. Wildcard SSL certificates are less convenient +than regular certificates and are inherently less safe and thus prohibited at many institutions. Hence, +path-based interactive tools was implemented as an alternative way to configure InteractiveTools. Path-based URLs to +InteractiveTools look something like this: ``https://myserver.net/interactivetool/ep/24q1dbzrknq1v/1a1p13jnahscj/``. +To enable path-based InteractiveTools, set ``requires_domain="False"`` in the relevant ``entry_point`` tag in the tool XML: + +.. code-block:: xml + + + + 80 + + + + +Path-based InteractiveTools are somewhat more difficult to configure than domain-based ITs. This is due to the fact that the web +server within an InteractiveTool container now must serve the contents under a path prefix. There are two main ways this can be solved: + +1. Relative links. If the web server embedded in the InteractiveTool only serves HTML pages with relative links then the + contents can be served at any level in the path hierarchy. The InteractiveTool proxy then strips away the "path prefix" + or "entry point path" part of the URL (e.g. ``interactivetool/ep/24q1dbzrknq1v/1a1p13jnahscj/``) from forwarded HTTP requests + so that the InteractiveTool web server operates like if it was served at the top level (directly under ``/``). Since all + links are relative, the web browser will automatically handle merging of the path prefix with the relative path appended + by the InteractiveTool. + + This setup is the default setup provided by the tool XML example above, but to be more explicit one can also set + ``requires_path_in_url="False"`` in the ``entry_point`` tag. As the web service operates with relative links it does + not need to know the entry point path under which it is served. + +2. Absolute links. Unfortunately many relevant services are implemented with absolute links, i.e. starting + at the top-level ``/``. For such InteractiveTools to work with path-based URLs the contained web server + needs to be configured with the path prefix/entry point path under which the content should be served. Two issues then + needs to be considered: + + a. How to inject the path prefix into the InteractiveTool at run-time? + + Two injection mechanisms are provided, injecting the path prefix as an environment variable or as an HTTP header. + + i. Injecting the path prefix as an environment variable: + + .. code-block:: xml + + + + 80 + + + + + mytool + + + + Here, the entry point is attached a ``label="mytool"`` attribute. This label is then used by the ``entry_point_path_for_label`` + injection mechanism to identify the entry point whose path shall be injected into the environment variable, here ``EP_PATH``. + This environment variable must then be mobilized in the InteractiveTool tool XML to properly configure the contained web server, + such as in the ``command`` tag of the JupyTool InteractiveTool: + + .. code-block:: xml + + + + + + If we follow the same entry point path example as above, the ``PROXY_PREFIX`` variable will in this case be set to the value + ``interactivetool/ep/24q1dbzrknq1v/1a1p13jnahscj/ipython``. This variable is further parsed by the Jupyter Notebook software + as a configuration of the path prefix under which the contents will be served. + + ii. Injecting the path prefix as an HTTP header: + + .. code-block:: xml + + + + 80 + + + + Here, the InteractiveTool proxy service is informed to inject the path prefix as a HTTP header, e.g. + ``X-My-Header="interactivetool/ep/24q1dbzrknq1v/1a1p13jnahscj/`` in the proxied requests to the InteractiveTool server. + + b. Does the InteractiveTool service require that the full path is provided in the URL? + + When ``requires_path_in_url="True"`` in the ``entry_point`` tag, the InteractiveTool proxy service forwards the HTTP requests + with the full path intact. + + Both values of ``requires_path_in_url`` can be combined with both injection mechanisms, leading two four configuration variants + for path-based InteractiveTools. Choosing the correct one depends on the implementation of the web server contained in the + InteractiveTool and can be a bit tricky to get correct. In some cases, none of these options will work. One solution can then + be to configure another highly customized proxy web server within the InteractiveTool, e.g. using NGINX. + + +NGINX proxy server configuration (in production) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you want to use nginx as an proxy server upstream of a Galaxy installation (in a production setting), you can use the following +server section to route domain-based requests to the InteractiveTool proxy: .. code-block:: nginx @@ -105,11 +349,11 @@ the InteractiveTool proxy: Note that this nginx example uses https, so you need to have a wildcard certificate for your domain, and you need to adjust ``galaxy_infrastructure_url`` as appropriate. -It is also possible to set up nginx to route path-based interactive tool URLs to the InteractiveTool proxy. -Path-based interactive tool URLs will only be created for tools that have defined ``requires_domain=False`` in the tool -XML file (which signals that the web server running on the container makes use of relative links and can serve -content starting from any path). A tool config variable will be added in the next version to simplify this for -tools that need to know the path to where it is served. +You should also set up nginx to route path-based InteractiveTool URLs to the InteractiveTool proxy. +Path-based InteractiveTool URLs will only be created for tools that have defined ``requires_domain=False`` in the tool +XML file (which signals that the web server running on the container are configured to operate at a subpath under the main +Galaxy installation). Hence, no wildcard DNS configuration or wildcard SSL certificates are needed for path-based +interactive tools. To support path-based interactive tools through nginx proxy, add the following to the main Galaxy "server" section (serving port 443): @@ -117,73 +361,36 @@ section (serving port 443): .. code-block:: nginx # Route all path-based interactive tool requests to the InteractiveTool proxy application - location ~* ^/(interactivetool/access/.+)$ { + location ~* ^/(interactivetool/.+)$ { proxy_redirect off; proxy_http_version 1.1; + proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header Upgrade $http_upgrade; proxy_set_header Connection "upgrade"; proxy_pass http://localhost:4002; } + This example config works for default values of ``interactivetools_base_path`` and ``interactivetools_prefix`` as defined in -``galaxy.yml``. For other values, you will need to adjust the location patterns accordingly. This solution also -requires ``interactivetools_shorten_url`` to be set to ``false`` (default). +``galaxy.yml``. For other values, you will need to adjust the location patterns accordingly. In both nginx config examples, you might want to replace localhost with your server domain (or possibly ``127.0.0.1``), depending on your server setup. -You will also need to enable a docker destination in the job_conf.xml file. -An example ``job_conf.xml`` file as seen in ``config/job_conf.xml.interactivetools``: - -.. code-block:: xml - - - - - - - - - - - true - - $galaxy_root:ro,$tool_directory:ro,$job_directory:rw,$working_directory:rw,$default_file_path:ro - false - bridge - true - true - true - - - --add-host localhost:host-gateway - - - docker_dispatch - docker_local - local - - - +Job runner configuration in production +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ InteractiveTools have been enabled for the Condor, Slurm, Pulsar and Kuberneters job runner. A destination configuration for Condor may look like this: .. code-block:: xml - - true - false - + condor: + runner: condor + docker_enabled: true + docker_sudo: false **Note on resource consumption:** Keep in mind that Distributed Resource @@ -193,13 +400,3 @@ user desires, this may not be advisable and an admin may want to restrict the runtime of InteractiveTools *(and jobs in general)*. However, if the job is killed by the DRM, the user is not informed beforehand and data in the container could be discarded. - -Some **example test InteractiveTools** have been defined, and can be added to -the ``config/tool_conf.xml``: - -.. code-block:: xml - - - - - diff --git a/lib/galaxy/config/sample/galaxy.yml.sample b/lib/galaxy/config/sample/galaxy.yml.sample index a181d693c415..2431efd7181c 100644 --- a/lib/galaxy/config/sample/galaxy.yml.sample +++ b/lib/galaxy/config/sample/galaxy.yml.sample @@ -333,9 +333,15 @@ gravity: # names. # environment: {} - # Configure dynamic handlers in this section. + # Configure dynamic handlers in this section. Below is a simple example # See https://docs.galaxyproject.org/en/latest/admin/scaling.html#dynamically-defined-handlers for details. - # handlers: {} + #handlers: + # handler: + # processes: 3 + # pools: + # - job-handlers + # - workflow-schedulers + galaxy: # The directory that will be prepended to relative paths in options @@ -1288,16 +1294,14 @@ galaxy: # The value of this option will be resolved with respect to # . #interactivetools_map: interactivetools_map.sqlite + # Note: the following config should still be used due to lack of + # support of data_dir resolution in gx-it-proxy and gravity: + #interactivetools_map: database/interactivetools_map.sqlite # Prefix to use in the formation of the subdomain or path for # interactive tools #interactivetools_prefix: interactivetool - # Shorten the uuid portion of the subdomain or path for interactive - # tools. Especially useful for avoiding the need for wildcard - # certificates by keeping subdomain under 63 chars - #interactivetools_shorten_url: false - # Galaxy Interactive Tools (GxITs) can be stopped from within the # Galaxy interface, killing the GxIT job without completing its # metadata setting post-job steps. In such a case it may be desirable diff --git a/lib/galaxy/config/sample/tool_conf.xml.sample b/lib/galaxy/config/sample/tool_conf.xml.sample index 91aac849e9fd..31b0bb4e6ef5 100644 --- a/lib/galaxy/config/sample/tool_conf.xml.sample +++ b/lib/galaxy/config/sample/tool_conf.xml.sample @@ -133,11 +133,14 @@ + + + --> diff --git a/lib/galaxy/config/schemas/config_schema.yml b/lib/galaxy/config/schemas/config_schema.yml index 649dec68c1d5..7d12d6ce3a99 100644 --- a/lib/galaxy/config/schemas/config_schema.yml +++ b/lib/galaxy/config/schemas/config_schema.yml @@ -1450,15 +1450,6 @@ mapping: desc: | Prefix to use in the formation of the subdomain or path for interactive tools - interactivetools_shorten_url: - type: bool - default: false - required: false - desc: | - Shorten the uuid portion of the subdomain or path for interactive tools. - Especially useful for avoiding the need for wildcard certificates by keeping - subdomain under 63 chars - retry_interactivetool_metadata_internally: type: bool default: true diff --git a/lib/galaxy/jobs/__init__.py b/lib/galaxy/jobs/__init__.py index e2d9cf909222..aae0f8486de6 100644 --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -1249,6 +1249,9 @@ def get_special(): tool_evaluator = self._get_tool_evaluator(job) compute_environment = compute_environment or self.default_compute_environment(job) + if hasattr(self.app, "interactivetool_manager"): + self.interactivetools = tool_evaluator.populate_interactivetools() + self.app.interactivetool_manager.create_interactivetool(job, self.tool, self.interactivetools) tool_evaluator.set_compute_environment(compute_environment, get_special=get_special) ( self.command_line, @@ -1257,9 +1260,6 @@ def get_special(): self.environment_variables, ) = tool_evaluator.build() job.command_line = self.command_line - if hasattr(self.app, "interactivetool_manager"): - self.interactivetools = tool_evaluator.populate_interactivetools() - self.app.interactivetool_manager.create_interactivetool(job, self.tool, self.interactivetools) # Ensure galaxy_lib_dir is set in case there are any later chdirs self.galaxy_lib_dir # noqa: B018 diff --git a/lib/galaxy/managers/interactivetool.py b/lib/galaxy/managers/interactivetool.py index d35f5d7d8a31..ac05892bef7c 100644 --- a/lib/galaxy/managers/interactivetool.py +++ b/lib/galaxy/managers/interactivetool.py @@ -1,5 +1,10 @@ +import json import logging import sqlite3 +from urllib.parse import ( + urlsplit, + urlunsplit, +) from sqlalchemy import ( or_, @@ -12,6 +17,7 @@ Job, ) from galaxy.model.base import transaction +from galaxy.security.idencoding import IdAsLowercaseAlphanumEncodingHelper from galaxy.util.filelock import FileLock log = logging.getLogger(__name__) @@ -138,7 +144,12 @@ def save_entry_point(self, entry_point): entry_point.token, entry_point.host, entry_point.port, - None, + json.dumps( + { + "requires_path_in_url": entry_point.requires_path_in_url, + "requires_path_in_header_named": entry_point.requires_path_in_header_named, + } + ), ) def remove_entry_point(self, entry_point): @@ -157,7 +168,8 @@ def __init__(self, app): self.security = app.security self.sa_session = app.model.context self.job_manager = app.job_manager - self.propagator = InteractiveToolSqlite(app.config.interactivetools_map, app.security.encode_id) + self.encoder = IdAsLowercaseAlphanumEncodingHelper(app.security) + self.propagator = InteractiveToolSqlite(app.config.interactivetools_map, self.encoder.encode_id) def create_entry_points(self, job, tool, entry_points=None, flush=True): entry_points = entry_points or tool.ports @@ -167,8 +179,10 @@ def create_entry_points(self, job, tool, entry_points=None, flush=True): tool_port=entry["port"], entry_url=entry["url"], name=entry["name"], + label=entry["label"], requires_domain=entry["requires_domain"], - short_token=self.app.config.interactivetools_shorten_url, + requires_path_in_url=entry["requires_path_in_url"], + requires_path_in_header_named=entry["requires_path_in_header_named"], ) self.sa_session.add(ep) if flush: @@ -287,44 +301,55 @@ def remove_entry_point(self, entry_point, flush=True): def target_if_active(self, trans, entry_point): if entry_point.active and not entry_point.deleted: - request_host = trans.request.host - if not self.app.config.interactivetools_upstream_proxy and self.app.config.interactivetools_proxy_host: - request_host = self.app.config.interactivetools_proxy_host - protocol = trans.request.host_url.split("//", 1)[0] + use_it_proxy_host_cfg = ( + not self.app.config.interactivetools_upstream_proxy and self.app.config.interactivetools_proxy_host + ) + + url_parts = urlsplit(trans.request.host_url) + url_host = self.app.config.interactivetools_proxy_host if use_it_proxy_host_cfg else trans.request.host + url_path = url_parts.path + if entry_point.requires_domain: - rval = f"{protocol}//{self.get_entry_point_subdomain(trans, entry_point)}.{request_host}/" + url_host = f"{self.get_entry_point_subdomain(trans, entry_point)}.{url_host}" if entry_point.entry_url: - rval = "{}/{}".format(rval.rstrip("/"), entry_point.entry_url.lstrip("/")) + url_path = f"{url_path.rstrip('/')}/{entry_point.entry_url.lstrip('/')}" else: - rval = self.get_entry_point_path(trans, entry_point) - if not self.app.config.interactivetools_upstream_proxy and self.app.config.interactivetools_proxy_host: - rval = f"{protocol}//{request_host}{rval}" - return rval + url_path = self.get_entry_point_path(trans, entry_point) + if not use_it_proxy_host_cfg: + return url_path + + return urlunsplit((url_parts.scheme, url_host, url_path, "", "")) + + def _get_entry_point_url_elements(self, trans, entry_point): + encoder = IdAsLowercaseAlphanumEncodingHelper(trans.security) + ep_encoded_id = encoder.encode_id(entry_point.id) + ep_class_id = entry_point.class_id + ep_prefix = self.app.config.interactivetools_prefix + ep_token = entry_point.token + return ep_encoded_id, ep_class_id, ep_prefix, ep_token def get_entry_point_subdomain(self, trans, entry_point): - entry_point_encoded_id = trans.security.encode_id(entry_point.id) - entry_point_class = entry_point.__class__.__name__.lower() - entry_point_prefix = self.app.config.interactivetools_prefix - entry_point_token = entry_point.token - if self.app.config.interactivetools_shorten_url: - return f"{entry_point_encoded_id}-{entry_point_token[:10]}.{entry_point_prefix}" - return f"{entry_point_encoded_id}-{entry_point_token}.{entry_point_class}.{entry_point_prefix}" + ep_encoded_id, ep_class_id, ep_prefix, ep_token = self._get_entry_point_url_elements(trans, entry_point) + return f"{ep_encoded_id}-{ep_token}.{ep_class_id}.{ep_prefix}" def get_entry_point_path(self, trans, entry_point): - entry_point_encoded_id = trans.security.encode_id(entry_point.id) - entry_point_class = entry_point.__class__.__name__.lower() - entry_point_prefix = self.app.config.interactivetools_prefix - rval = "/" + url_path = "/" if not entry_point.requires_domain: - rval = str(self.app.config.interactivetools_base_path).rstrip("/").lstrip("/") - if self.app.config.interactivetools_shorten_url: - rval = f"/{rval}/{entry_point_prefix}/{entry_point_encoded_id}/{entry_point.token[:10]}/" - else: - rval = f"/{rval}/{entry_point_prefix}/access/{entry_point_class}/{entry_point_encoded_id}/{entry_point.token}/" + ep_encoded_id, ep_class_id, ep_prefix, ep_token = self._get_entry_point_url_elements(trans, entry_point) + path_parts = [ + part.strip("/") + for part in ( + str(self.app.config.interactivetools_base_path), + ep_prefix, + ep_class_id, + ep_encoded_id, + ep_token, + ) + ] + url_path += "/".join(part for part in path_parts if part) + "/" if entry_point.entry_url: - rval = f"{rval.rstrip('/')}/{entry_point.entry_url.lstrip('/')}" - rval = "/" + rval.lstrip("/") - return rval + url_path += entry_point.entry_url.lstrip("/") + return url_path def access_entry_point_target(self, trans, entry_point_id): entry_point = trans.sa_session.get(InteractiveToolEntryPoint, entry_point_id) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index c437cb1ee65a..503dd31b6d8c 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -19,6 +19,7 @@ from collections.abc import Callable from datetime import timedelta from enum import Enum +from secrets import token_hex from string import Template from typing import ( Any, @@ -149,6 +150,7 @@ from galaxy.util import ( directory_hash_id, enum_values, + hex_to_lowercase_alphanum, listify, ready_name_for_url, unicodify, @@ -2684,17 +2686,21 @@ class InteractiveToolEntryPoint(Base, Dictifiable, RepresentById): protocol = Column(TEXT) entry_url = Column(TEXT) requires_domain = Column(Boolean, default=True) + requires_path_in_url = Column(Boolean, default=False) + requires_path_in_header_named = Column(TEXT) info = Column(MutableJSONType, nullable=True) configured = Column(Boolean, default=False) deleted = Column(Boolean, default=False) created_time = Column(DateTime, default=now) modified_time = Column(DateTime, default=now, onupdate=now) + label = Column(TEXT) job = relationship("Job", back_populates="interactivetool_entry_points", uselist=False) dict_collection_visible_keys = [ "id", "job_id", "name", + "label", "active", "created_time", "modified_time", @@ -2704,21 +2710,27 @@ class InteractiveToolEntryPoint(Base, Dictifiable, RepresentById): "id", "job_id", "name", + "label", "active", "created_time", "modified_time", "output_datasets_ids", ] - def __init__(self, requires_domain=True, configured=False, deleted=False, short_token=False, **kwd): + def __init__( + self, + requires_domain=True, + requires_path_in_url=False, + configured=False, + deleted=False, + **kwd, + ): super().__init__(**kwd) self.requires_domain = requires_domain + self.requires_path_in_url = requires_path_in_url self.configured = configured self.deleted = deleted - if short_token: - self.token = (self.token or uuid4().hex)[:10] - else: - self.token = self.token or uuid4().hex + self.token = self.token or hex_to_lowercase_alphanum(token_hex(8)) self.info = self.info or {} @property @@ -2728,6 +2740,10 @@ def active(self): return not self.job.finished return False + @property + def class_id(self): + return "ep" + @property def output_datasets_ids(self): return [da.dataset.id for da in self.job.output_datasets] diff --git a/lib/galaxy/model/migrations/alembic/versions_gxy/8a19186a6ee7_add_columns_to_interactivetool_entry_point.py b/lib/galaxy/model/migrations/alembic/versions_gxy/8a19186a6ee7_add_columns_to_interactivetool_entry_point.py new file mode 100644 index 000000000000..e2876e5bdf6e --- /dev/null +++ b/lib/galaxy/model/migrations/alembic/versions_gxy/8a19186a6ee7_add_columns_to_interactivetool_entry_point.py @@ -0,0 +1,41 @@ +"""add label, requires_path_in_url and requires_path_in_header_named columns to interactivetool_entry_point + +Revision ID: 8a19186a6ee7 +Revises: ddbdbc40bdc1 +Create Date: 2023-11-15 12:53:32.888292 + +""" +from sqlalchemy import ( + Boolean, + Column, + Text, +) + +from galaxy.model.migrations.util import ( + add_column, + drop_column, +) + +# revision identifiers, used by Alembic. +revision = "8a19186a6ee7" +down_revision = "ddbdbc40bdc1" +branch_labels = None +depends_on = None + +# database object names used in this revision +table_name = "interactivetool_entry_point" +label_column_name = "label" +requires_path_in_url_colname = "requires_path_in_url" +requires_path_in_header_named_colname = "requires_path_in_header_named" + + +def upgrade(): + add_column(table_name, Column(label_column_name, Text())) + add_column(table_name, Column(requires_path_in_url_colname, Boolean(), default=False)) + add_column(table_name, Column(requires_path_in_header_named_colname, Text())) + + +def downgrade(): + drop_column(table_name, requires_path_in_header_named_colname) + drop_column(table_name, requires_path_in_url_colname) + drop_column(table_name, label_column_name) diff --git a/lib/galaxy/security/idencoding.py b/lib/galaxy/security/idencoding.py index b66a0f04cffa..9c0b10ce2a20 100644 --- a/lib/galaxy/security/idencoding.py +++ b/lib/galaxy/security/idencoding.py @@ -11,6 +11,8 @@ import galaxy.exceptions from galaxy.util import ( + hex_to_lowercase_alphanum, + lowercase_alphanum_to_hex, smart_str, unicodify, ) @@ -146,3 +148,18 @@ def _last_bits(secret): if len(last_bits) > MAXIMUM_ID_SECRET_LENGTH: last_bits = last_bits[-MAXIMUM_ID_SECRET_LENGTH:] return last_bits + + +class IdAsLowercaseAlphanumEncodingHelper: + """ + Helper class to encode IDs as lowercase alphanumeric strings, and vice versa + """ + + def __init__(self, security: IdEncodingHelper): + self.security = security + + def encode_id(self, id: int) -> str: + return hex_to_lowercase_alphanum(self.security.encode_id(id)) + + def decode_id(self, id: str) -> int: + return self.security.decode_id(lowercase_alphanum_to_hex(id).rjust(16, "0")) diff --git a/lib/galaxy/tool_util/parser/xml.py b/lib/galaxy/tool_util/parser/xml.py index 3746d4b8163e..96b3634c333b 100644 --- a/lib/galaxy/tool_util/parser/xml.py +++ b/lib/galaxy/tool_util/parser/xml.py @@ -62,6 +62,8 @@ def inject_validates(inject): if inject == "api_key": return True + elif inject == "entry_point_path_for_label": + return True p = re.compile("^oidc_(id|access|refresh)_token_(.*)$") match = p.match(inject) return match is not None @@ -231,10 +233,13 @@ def parse_environment_variables(self): template = environment_variable_el.text inject = environment_variable_el.get("inject") if inject: - assert not template, "Cannot specify inject and environment variable template." assert inject_validates(inject) - if template: - assert not inject, "Cannot specify inject and environment variable template." + if inject == "entry_point_path_for_label": + assert ( + template + ), 'Environment variable value must contain entry point label when inject="entry_point_path_for_label".' + else: + assert not (template and inject), "Cannot specify inject and environment variable template." definition = { "name": environment_variable_el.get("name"), "template": template, @@ -305,8 +310,25 @@ def parse_interactivetool(self): name = ep_el.get("name", None) if name: name = name.strip() + label = ep_el.get("label", None) + if label: + label = label.strip() requires_domain = string_as_bool(ep_el.attrib.get("requires_domain", False)) - rtt.append(dict(port=port, url=url, name=name, requires_domain=requires_domain)) + requires_path_in_url = string_as_bool(ep_el.attrib.get("requires_path_in_url", False)) + requires_path_in_header_named = ep_el.get("requires_path_in_header_named", None) + if requires_path_in_header_named: + requires_path_in_header_named = requires_path_in_header_named.strip() + rtt.append( + dict( + port=port, + url=url, + name=name, + label=label, + requires_domain=requires_domain, + requires_path_in_url=requires_path_in_url, + requires_path_in_header_named=requires_path_in_header_named, + ) + ) return rtt def parse_hidden(self): diff --git a/lib/galaxy/tool_util/xsd/galaxy.xsd b/lib/galaxy/tool_util/xsd/galaxy.xsd index 1a6649becbd8..4994e5e7b7ee 100644 --- a/lib/galaxy/tool_util/xsd/galaxy.xsd +++ b/lib/galaxy/tool_util/xsd/galaxy.xsd @@ -370,7 +370,7 @@ to provide access to graphical tools in real-time. ```xml - + 80 landing/${template_enabled}/index.html @@ -385,12 +385,43 @@ to provide access to graphical tools in real-time. - This value defines the name of the entry point. + The name of the entry point. - + - This value declares if domain-based proxying is required. Default is False. Currently only works when True. + A unique label to identify the entry point. Used by interactive client tools to connect. + + + + + Whether domain-based proxying is required for the entry point. Default is True. + + + + + + + + + + + + @@ -6242,6 +6273,7 @@ define. + diff --git a/lib/galaxy/tools/evaluation.py b/lib/galaxy/tools/evaluation.py index d1c02a980301..d06af3b01d53 100644 --- a/lib/galaxy/tools/evaluation.py +++ b/lib/galaxy/tools/evaluation.py @@ -521,7 +521,15 @@ def populate_interactivetools(self): it = [] for ep in getattr(self.tool, "ports", []): ep_dict = {} - for key in "port", "name", "url", "requires_domain": + for key in ( + "port", + "name", + "label", + "url", + "requires_domain", + "requires_path_in_url", + "requires_path_in_header_named", + ): val = ep.get(key, None) if val is not None and not isinstance(val, bool): val = fill_template( @@ -654,6 +662,18 @@ def _build_environment_variables(self): elif inject and inject.startswith("oidc_"): environment_variable_template = self.get_oidc_token(inject) is_template = False + elif inject and inject == "entry_point_path_for_label" and environment_variable_template: + from galaxy.managers.interactivetool import InteractiveToolManager + + entry_point_label = environment_variable_template + matching_eps = [ep for ep in self.job.interactivetool_entry_points if ep.label == entry_point_label] + if matching_eps: + entry_point = matching_eps[0] + entry_point_path = InteractiveToolManager(self.app).get_entry_point_path(self.app, entry_point) + environment_variable_template = entry_point_path.rstrip("/") + else: + environment_variable_template = "" + is_template = False else: is_template = True with tempfile.NamedTemporaryFile(dir=directory, prefix="tool_env_", delete=False) as temp: diff --git a/lib/galaxy/util/__init__.py b/lib/galaxy/util/__init__.py index afeb7307e54f..2db3e454bb22 100644 --- a/lib/galaxy/util/__init__.py +++ b/lib/galaxy/util/__init__.py @@ -1882,3 +1882,23 @@ def enum_values(enum_class): Values are in member definition order. """ return [value.value for value in enum_class.__members__.values()] + + +def hex_to_lowercase_alphanum(hex_string: str) -> str: + """ + Convert a hexadecimal string encoding into a lowercase 36-base alphanumeric string using the + characters a-z and 0-9 + """ + import numpy as np + + return np.base_repr(int(hex_string, 16), 36).lower() + + +def lowercase_alphanum_to_hex(lowercase_alphanum: str) -> str: + """ + Convert a lowercase 36-base alphanumeric string encoding using the characters a-z and 0-9 to a + hexadecimal string + """ + import numpy as np + + return np.base_repr(int(lowercase_alphanum, 36), 16).lower() diff --git a/lib/galaxy/webapps/galaxy/api/tool_entry_points.py b/lib/galaxy/webapps/galaxy/api/tool_entry_points.py index d2d43031dfb3..bf494970d93f 100644 --- a/lib/galaxy/webapps/galaxy/api/tool_entry_points.py +++ b/lib/galaxy/webapps/galaxy/api/tool_entry_points.py @@ -12,6 +12,7 @@ InteractiveToolEntryPoint, Job, ) +from galaxy.security.idencoding import IdAsLowercaseAlphanumEncodingHelper from galaxy.structured_app import StructuredApp from galaxy.web import expose_api_anonymous_and_sessionless from . import BaseGalaxyAPIController @@ -60,7 +61,11 @@ def index(self, trans: ProvidesUserContext, running=False, job_id=None, **kwd): rval = [] for entry_point in entry_points: - as_dict = self.encode_all_ids(trans, entry_point.to_dict(), True) + entrypoint_id_encoder = IdAsLowercaseAlphanumEncodingHelper(trans.security) + as_dict = entry_point.to_dict() + as_dict["id"] = entrypoint_id_encoder.encode_id(as_dict["id"]) + as_dict_no_id = {k: v for k, v in as_dict.items() if k != "id"} + as_dict.update(self.encode_all_ids(trans, as_dict_no_id, True)) target = self.interactivetool_manager.target_if_active(trans, entry_point) if target: as_dict["target"] = target @@ -82,7 +87,8 @@ def access_entry_point(self, trans: ProvidesUserContext, id, **kwd): # Because of auto id encoding needed for link from grid, the item.id keyword must be 'id' if not id: raise exceptions.RequestParameterMissingException("Must supply entry point ID.") - entry_point_id = self.decode_id(id) + entrypoint_id_encoder = IdAsLowercaseAlphanumEncodingHelper(trans.security) + entry_point_id = entrypoint_id_encoder.decode_id(id) return {"target": self.interactivetool_manager.access_entry_point_target(trans, entry_point_id)} @expose_api_anonymous_and_sessionless @@ -93,7 +99,8 @@ def stop_entry_point(self, trans: ProvidesUserContext, id, **kwds): if not id: raise exceptions.RequestParameterMissingException("Must supply entry point id") try: - entry_point_id = self.decode_id(id) + entrypoint_id_encoder = IdAsLowercaseAlphanumEncodingHelper(trans.security) + entry_point_id = entrypoint_id_encoder.decode_id(id) entry_point = trans.sa_session.get(InteractiveToolEntryPoint, entry_point_id) except Exception: raise exceptions.RequestParameterInvalidException("entry point invalid") diff --git a/pyproject.toml b/pyproject.toml index ed8f9cb93a85..ec98c6f49ef2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,7 +63,7 @@ fs = "*" future = "*" galaxy_sequence_utils = "*" graphene-sqlalchemy = "3.0.0b3" # need a beta release to be compat. with starlette plugin -gravity = ">=1.0" +gravity = ">=1.0.4" gunicorn = "*" gxformat2 = "*" h5grove = ">=1.2.1" diff --git a/test/integration/test_interactivetools_api.py b/test/integration/test_interactivetools_api.py index 96d5c8d5882e..5d59c9f07557 100644 --- a/test/integration/test_interactivetools_api.py +++ b/test/integration/test_interactivetools_api.py @@ -156,7 +156,6 @@ class TestInteractiveToolsShortURLIntegration(AbstractTestCases.BaseInteractiveT @classmethod def handle_galaxy_config_kwds(cls, config): super().handle_galaxy_config_kwds(config) - config["interactivetools_shorten_url"] = True config["job_config_file"] = DOCKERIZED_JOB_CONFIG_FILE diff --git a/tools/interactive/interactivetool_guacamole_desktop.xml b/tools/interactive/interactivetool_guacamole_desktop.xml index 665f4d33eb81..dacf1589b276 100644 --- a/tools/interactive/interactivetool_guacamole_desktop.xml +++ b/tools/interactive/interactivetool_guacamole_desktop.xml @@ -3,7 +3,7 @@ quay.io/bgruening/guacamole-desktop - + 8080 diff --git a/tools/interactive/interactivetool_jupyter_notebook_1.0.0.xml b/tools/interactive/interactivetool_jupyter_notebook_1.0.0.xml index c7e4c9bb8508..22d22b749f51 100644 --- a/tools/interactive/interactivetool_jupyter_notebook_1.0.0.xml +++ b/tools/interactive/interactivetool_jupyter_notebook_1.0.0.xml @@ -3,7 +3,7 @@ quay.io/bgruening/docker-jupyter-notebook:2021-03-05 - + 8888 ipython/lab @@ -14,6 +14,7 @@ 8080 $__galaxy_url__ + jupytool @@ -80,6 +81,7 @@ except FileNotFoundError: export GALAXY_WORKING_DIR=`pwd` && mkdir -p ./jupyter/outputs/collection && mkdir -p ./jupyter/galaxy_inputs && + export PROXY_PREFIX=\${EP_PATH%/ipython*} && ## change into the directory where the notebooks are located cd ./jupyter/ && diff --git a/tools/interactive/interactivetool_openrefine.xml b/tools/interactive/interactivetool_openrefine.xml index c1772cbb52e5..1c8d9d96fb89 100644 --- a/tools/interactive/interactivetool_openrefine.xml +++ b/tools/interactive/interactivetool_openrefine.xml @@ -4,7 +4,7 @@ ylebras/openrefine-docker - + 3333 @@ -81,4 +81,3 @@ Example input file (TAB separated):: ]]> - diff --git a/tools/interactive/interactivetool_rstudio.xml b/tools/interactive/interactivetool_rstudio.xml index f7f5a7fc0aaf..041399ba59a5 100644 --- a/tools/interactive/interactivetool_rstudio.xml +++ b/tools/interactive/interactivetool_rstudio.xml @@ -4,14 +4,13 @@ quay.io/erasche/docker-rstudio-notebook:ie2 - + 8787 / ${__app__.security.encode_id($jupyter_notebook.history_id)} - ${__app__.config.galaxy_infrastructure_url} 8080 $__galaxy_url__ true diff --git a/tools/interactive/interactivetool_wallace.xml b/tools/interactive/interactivetool_wallace.xml index 5133cf9d5a3d..c161798bc3b5 100644 --- a/tools/interactive/interactivetool_wallace.xml +++ b/tools/interactive/interactivetool_wallace.xml @@ -12,7 +12,7 @@ ${__app__.security.encode_id($outfile.history_id)} - ${__app__.config.galaxy_infrastructure_url} + $__galaxy_url__ 8080 $__galaxy_url__