Skip to content

Commit

Permalink
Merge pull request #18841 from mvdbeek/tighten_trs_url_regex
Browse files Browse the repository at this point in the history
[24.1] Tighten TRS url check
  • Loading branch information
mvdbeek authored Sep 19, 2024
2 parents 4a3e6dd + 64e74b8 commit 73b94df
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 16 deletions.
6 changes: 4 additions & 2 deletions lib/galaxy/webapps/galaxy/api/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,15 +243,17 @@ def create(self, trans: GalaxyWebTransaction, payload=None, **kwd):
trs_version_id = None
import_source = None
if "trs_url" in payload:
parts = self.app.trs_proxy.match_url(payload["trs_url"])
parts = self.app.trs_proxy.match_url(
payload["trs_url"], trans.app.config.fetch_url_allowlist_ips
)
if parts:
server = self.app.trs_proxy.server_from_url(parts["trs_base_url"])
trs_tool_id = parts["tool_id"]
trs_version_id = parts["version_id"]
payload["trs_tool_id"] = trs_tool_id
payload["trs_version_id"] = trs_version_id
else:
raise exceptions.MessageException("Invalid TRS URL.")
raise exceptions.RequestParameterInvalidException(f"Invalid TRS URL {payload['trs_url']}.")
else:
trs_server = payload.get("trs_server")
server = self.app.trs_proxy.get_server(trs_server)
Expand Down
18 changes: 16 additions & 2 deletions lib/galaxy/workflow/trs_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,21 @@
import os
import re
import urllib.parse
from typing import List

import yaml

from galaxy.exceptions import MessageException
from galaxy.exceptions import (
MessageException,
RequestParameterInvalidException,
)
from galaxy.files.uris import validate_non_local
from galaxy.util import (
asbool,
DEFAULT_SOCKET_TIMEOUT,
requests,
)
from galaxy.util.config_parsers import IpAllowedListEntryT
from galaxy.util.search import parse_filters

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -73,7 +79,15 @@ def get_server(self, trs_server):
def server_from_url(self, trs_url):
return TrsServer(trs_url)

def match_url(self, url):
def match_url(self, url, ip_allowlist: List[IpAllowedListEntryT]):
if url.lstrip().startswith("file://"):
# requests doesn't know what to do with file:// anyway, but just in case we swap
# out the implementation
raise RequestParameterInvalidException("Invalid TRS URL %s", url)
validate_non_local(url, ip_allowlist=ip_allowlist or [])
return self._match_url(url)

def _match_url(self, url):
if matches := re.match(TRS_URL_REGEX, url):
match_dict = matches.groupdict()
match_dict["tool_id"] = urllib.parse.unquote(match_dict["tool_id"])
Expand Down
32 changes: 20 additions & 12 deletions test/unit/workflows/test_trs_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pytest
import yaml

from galaxy.exceptions import ConfigDoesNotAllowException
from galaxy.workflow.trs_proxy import (
GA4GH_GALAXY_DESCRIPTOR,
parse_search_kwds,
Expand Down Expand Up @@ -49,19 +50,19 @@ def test_proxy():

def test_match_url():
proxy = TrsProxy()
valid_dockstore = proxy.match_url(
valid_dockstore = proxy._match_url(
"https://dockstore.org/api/ga4gh/trs/v2/tools/"
"quay.io%2Fcollaboratory%2Fdockstore-tool-bedtools-genomecov/versions/0.3"
"quay.io%2Fcollaboratory%2Fdockstore-tool-bedtools-genomecov/versions/0.3",
)
assert valid_dockstore
assert valid_dockstore["trs_base_url"] == "https://dockstore.org/api"
# Should unquote
assert valid_dockstore["tool_id"] == "quay.io/collaboratory/dockstore-tool-bedtools-genomecov"
assert valid_dockstore["version_id"] == "0.3"

valid_dockstore_unescaped = proxy.match_url(
valid_dockstore_unescaped = proxy._match_url(
"https://dockstore.org/api/ga4gh/trs/v2/tools/"
"#workflow/github.com/jmchilton/galaxy-workflow-dockstore-example-1/mycoolworkflow/versions/master"
"#workflow/github.com/jmchilton/galaxy-workflow-dockstore-example-1/mycoolworkflow/versions/master",
)
assert valid_dockstore_unescaped
assert valid_dockstore_unescaped["trs_base_url"] == "https://dockstore.org/api"
Expand All @@ -71,40 +72,47 @@ def test_match_url():
)
assert valid_dockstore_unescaped["version_id"] == "master"

valid_workflow_hub = proxy.match_url("https://workflowhub.eu/ga4gh/trs/v2/tools/344/versions/1")
valid_workflow_hub = proxy._match_url("https://workflowhub.eu/ga4gh/trs/v2/tools/344/versions/1")
assert valid_workflow_hub
assert valid_workflow_hub["trs_base_url"] == "https://workflowhub.eu"
assert valid_workflow_hub["tool_id"] == "344"
assert valid_workflow_hub["version_id"] == "1"

valid_arbitrary_trs = proxy.match_url(
valid_arbitrary_trs = proxy._match_url(
"https://my-trs-server.golf/stuff/ga4gh/trs/v2/tools/hello-world/versions/version-1"
)
assert valid_arbitrary_trs
assert valid_arbitrary_trs["trs_base_url"] == "https://my-trs-server.golf/stuff"
assert valid_arbitrary_trs["tool_id"] == "hello-world"
assert valid_arbitrary_trs["version_id"] == "version-1"

ignore_extra = proxy.match_url(
"https://workflowhub.eu/ga4gh/trs/v2/tools/344/versions/1/CWL/descriptor/ro-crate-metadata.json"
ignore_extra = proxy._match_url(
"https://workflowhub.eu/ga4gh/trs/v2/tools/344/versions/1/CWL/descriptor/ro-crate-metadata.json",
)
assert ignore_extra
assert ignore_extra["trs_base_url"] == "https://workflowhub.eu"
assert ignore_extra["tool_id"] == "344"
assert ignore_extra["version_id"] == "1"

invalid = proxy.match_url("https://workflowhub.eu/workflows/1")
invalid = proxy._match_url("https://workflowhub.eu/workflows/1")
assert invalid is None

missing_version = proxy.match_url("https://workflowhub.eu/ga4gh/trs/v2/tools/344")
missing_version = proxy._match_url("https://workflowhub.eu/ga4gh/trs/v2/tools/344")
assert missing_version is None

blank = proxy.match_url("")
blank = proxy.match_url("", [])
assert blank is None

not_url = proxy.match_url("1234")
not_url = proxy.match_url("1234", [])
assert not_url is None

expected_exception = None
try:
proxy.match_url("https://localhost", [])
except ConfigDoesNotAllowException as e:
expected_exception = e
assert expected_exception, "matching url against localhost should fail"


def test_server_from_url():
proxy = TrsProxy()
Expand Down

0 comments on commit 73b94df

Please sign in to comment.