Skip to content

Commit

Permalink
Support CWL 1.2.1 (#4682)
Browse files Browse the repository at this point in the history
* cwl: use the latest commit from the proposed CWL v1.2.1 branch
* Double default CWL conformance test timeout
* Support abs path for directory outputs
* Better comment for why local paths are permitted
* add relax-path-checks to CI tests

---------

Co-authored-by: Michael R. Crusoe <[email protected]>
Co-authored-by: Michael R. Crusoe <[email protected]>
  • Loading branch information
3 people authored Dec 5, 2023
1 parent d906619 commit deddac4
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 29 deletions.
13 changes: 5 additions & 8 deletions src/toil/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
Union,
cast,
overload)
from urllib.parse import urlparse
from urllib.parse import urlparse, unquote, quote

import requests
from configargparse import ArgParser, YAMLConfigFileParser
Expand Down Expand Up @@ -2167,10 +2167,8 @@ def normalize_uri(uri: str, check_existence: bool = False) -> str:
:param check_existence: If set, raise FileNotFoundError if a URI points to
a local file that does not exist.
"""
if urlparse(uri).scheme == "file":
uri = urlparse(
uri
).path # this should strip off the local file scheme; it will be added back
if urlparse(uri).scheme == 'file':
uri = unquote(urlparse(uri).path) # this should strip off the local file scheme; it will be added back

# account for the scheme-less case, which should be coerced to a local absolute path
if urlparse(uri).scheme == "":
Expand All @@ -2179,9 +2177,8 @@ def normalize_uri(uri: str, check_existence: bool = False) -> str:
raise FileNotFoundError(
f'Could not find local file "{abs_path}" when importing "{uri}".\n'
f'Make sure paths are relative to "{os.getcwd()}" or use absolute paths.\n'
f"If this is not a local file, please include the scheme (s3:/, gs:/, ftp://, etc.)."
)
return f"file://{abs_path}"
f'If this is not a local file, please include the scheme (s3:/, gs:/, ftp://, etc.).')
return f'file://{quote(abs_path)}'
return uri

def _setBatchSystemEnvVars(self) -> None:
Expand Down
25 changes: 20 additions & 5 deletions src/toil/cwl/cwltoil.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
TypeVar,
Union,
cast,
Sequence,
)
from urllib.parse import quote, unquote, urlparse, urlsplit

Expand Down Expand Up @@ -689,6 +690,8 @@ def __init__(
streaming on, and returns a file: URI to where the file or
directory has been downloaded to. Meant to be a partially-bound
version of toil_get_file().
:param referenced_files: List of CWL File and Directory objects, which can have their locations set as both
virtualized and absolute local paths
"""
self.get_file = get_file
self.stage_listing = stage_listing
Expand All @@ -710,8 +713,9 @@ def visit(
This is called on each File or Directory CWL object. The Files and
Directories all have "location" fields. For the Files, these are from
upload_file(), and for the Directories, these are from
upload_directory(), with their children being assigned
locations based on listing the Directories using ToilFsAccess.
upload_directory() or cwltool internally. With upload_directory(), they and their children will be assigned
locations based on listing the Directories using ToilFsAccess. With cwltool, locations will be set as absolute
paths.
:param obj: The CWL File or Directory to process
Expand Down Expand Up @@ -842,6 +846,14 @@ def visit(
# We can't really make the directory. Maybe we are
# exporting from the leader and it doesn't matter.
resolved = location
elif location.startswith("/"):
# Test if path is an absolute local path
# Does not check if the path is relative
# While Toil encodes paths into a URL with ToilPathMapper,
# something called internally in cwltool may return an absolute path
# ex: if cwltool calls itself internally in command_line_tool.py,
# it collects outputs with collect_output, and revmap_file will use its own internal pathmapper
resolved = location
else:
raise RuntimeError("Unsupported location: " + location)

Expand Down Expand Up @@ -918,7 +930,6 @@ def visit(
)
else:
deref = ab

if deref.startswith("file:"):
deref = schema_salad.ref_resolver.uri_file_path(deref)
if urlsplit(deref).scheme in ["http", "https"]:
Expand Down Expand Up @@ -2075,7 +2086,7 @@ def _realpath(
"WritableDirectory",
]:
os.makedirs(p.target)
if not os.path.exists(p.target) and p.type in ["File", "WritableFile"]:
if p.type in ["File", "WritableFile"]:
if p.resolved.startswith("toilfile:"):
# We can actually export this
os.makedirs(os.path.dirname(p.target), exist_ok=True)
Expand All @@ -2088,7 +2099,7 @@ def _realpath(
os.makedirs(os.path.dirname(p.target), exist_ok=True)
shutil.copyfile(p.resolved, p.target)
# TODO: can a toildir: "file" get here?
if not os.path.exists(p.target) and p.type in [
if p.type in [
"CreateFile",
"CreateWritableFile",
]:
Expand All @@ -2111,6 +2122,7 @@ def _check_adjust(f: CWLObjectType) -> CWLObjectType:
# Make the location point to the place we put this thing on the
# local filesystem.
f["location"] = schema_salad.ref_resolver.file_uri(mapped_location.target)
f["path"] = mapped_location.target

if "contents" in f:
del f["contents"]
Expand Down Expand Up @@ -3214,6 +3226,8 @@ def determine_load_listing(
class NoAvailableJobStoreException(Exception):
"""Indicates that no job store name is available."""

pass


def generate_default_job_store(
batch_system_name: Optional[str],
Expand Down Expand Up @@ -3307,6 +3321,7 @@ def add_cwl_options(parser: argparse.ArgumentParser) -> None:
'For example: "%(prog)s workflow.cwl --file1 file". '
"If an input has the same name as a Toil option, pass '--' before it.",
)

parser.add_argument("--not-strict", action="store_true")
parser.add_argument(
"--enable-dev",
Expand Down
13 changes: 7 additions & 6 deletions src/toil/jobStores/fileJobStore.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,13 +317,14 @@ def _import_file(self, otherCls, uri, shared_file_name=None, hardlink=False, sym
# symlink argument says whether the caller can take symlinks or not
# ex: if false, it implies the workflow cannot work with symlinks and thus will hardlink imports
# default is true since symlinking everything is ideal
uri_path = unquote(uri.path)
if issubclass(otherCls, FileJobStore):
if os.path.isdir(uri.path):
if os.path.isdir(uri_path):
# Don't allow directories (unless someone is racing us)
raise IsADirectoryError(f"URI {uri} points to a directory but a file was expected")
if shared_file_name is None:
executable = os.stat(uri.path).st_mode & stat.S_IXUSR != 0
absPath = self._get_unique_file_path(uri.path) # use this to get a valid path to write to in job store
executable = os.stat(uri_path).st_mode & stat.S_IXUSR != 0
absPath = self._get_unique_file_path(uri_path) # use this to get a valid path to write to in job store
with self.optional_hard_copy(hardlink):
self._copy_or_link(uri, absPath, symlink=symlink)
# TODO: os.stat(absPath).st_size consistently gives values lower than
Expand Down Expand Up @@ -819,7 +820,7 @@ def _get_file_path_from_id(self, jobStoreFileID):
"""

# We just make the file IDs paths under the job store overall.
absPath = os.path.join(self.jobStoreDir, jobStoreFileID)
absPath = os.path.join(self.jobStoreDir, unquote(jobStoreFileID))

# Don't validate here, we are called by the validation logic

Expand All @@ -832,14 +833,14 @@ def _get_file_id_from_path(self, absPath):
:rtype : string, string is the file ID.
"""

return absPath[len(self.jobStoreDir)+1:]
return quote(absPath[len(self.jobStoreDir)+1:])

def _check_job_store_file_id(self, jobStoreFileID):
"""
:raise NoSuchFileException: if the file with ID jobStoreFileID does
not exist or is not a file
"""
if not self.file_exists(jobStoreFileID):
if not self.file_exists(unquote(jobStoreFileID)):
raise NoSuchFileException(jobStoreFileID)

def _get_arbitrary_jobs_dir_for_name(self, jobNameSlug):
Expand Down
38 changes: 28 additions & 10 deletions src/toil/test/cwl/cwlTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
from toil.test.provisioners.clusterTest import AbstractClusterTest

log = logging.getLogger(__name__)
CONFORMANCE_TEST_TIMEOUT = 5000
CONFORMANCE_TEST_TIMEOUT = 10000


def run_conformance_tests(
Expand Down Expand Up @@ -134,10 +134,10 @@ def run_conformance_tests(
"--logDebug",
"--statusWait=10",
"--retryCount=2",
"--relax-path-checks",
f"--caching={caching}"
]

args_passed_directly_to_runner.append(f"--caching={caching}")

if extra_args:
args_passed_directly_to_runner += extra_args

Expand Down Expand Up @@ -385,6 +385,7 @@ def test_run_colon_output(self):
)

def test_glob_dir_bypass_file_store(self):
self.maxDiff = 1000
try:
# We need to output to the current directory to make sure that
# works.
Expand Down Expand Up @@ -609,10 +610,12 @@ def test_preemptible_expression(self):

@staticmethod
def _expected_seqtk_output(outDir):
loc = "file://" + os.path.join(outDir, "out")
path = os.path.join(outDir, "out")
loc = "file://" + path
return {
"output1": {
"location": loc,
"path": path,
"checksum": "sha1$322e001e5a99f19abdce9f02ad0f02a17b5066c2",
"basename": "out",
"class": "File",
Expand All @@ -622,10 +625,12 @@ def _expected_seqtk_output(outDir):

@staticmethod
def _expected_revsort_output(outDir):
loc = "file://" + os.path.join(outDir, "output.txt")
path = os.path.join(outDir, "output.txt")
loc = "file://" + path
return {
"output": {
"location": loc,
"path": path,
"basename": "output.txt",
"size": 1111,
"class": "File",
Expand All @@ -635,10 +640,12 @@ def _expected_revsort_output(outDir):

@staticmethod
def _expected_revsort_nochecksum_output(outDir):
loc = "file://" + os.path.join(outDir, "output.txt")
path = os.path.join(outDir, "output.txt")
loc = "file://" + path
return {
"output": {
"location": loc,
"path": path,
"basename": "output.txt",
"size": 1111,
"class": "File",
Expand All @@ -647,24 +654,29 @@ def _expected_revsort_nochecksum_output(outDir):

@staticmethod
def _expected_download_output(outDir):
loc = "file://" + os.path.join(outDir, "output.txt")
path = os.path.join(outDir, "output.txt")
loc = "file://" + path
return {
"output": {
"location": loc,
"basename": "output.txt",
"size": 0,
"class": "File",
"checksum": "sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709",
"path": path
}
}

@staticmethod
def _expected_glob_dir_output(out_dir):
dir_loc = "file://" + os.path.join(out_dir, "shouldmake")
dir_path = os.path.join(out_dir, "shouldmake")
dir_loc = "file://" + dir_path
file_path = os.path.join(dir_path, "test.txt")
file_loc = os.path.join(dir_loc, "test.txt")
return {
"shouldmake": {
"location": dir_loc,
"path": dir_path,
"basename": "shouldmake",
"nameroot": "shouldmake",
"nameext": "",
Expand All @@ -673,6 +685,7 @@ def _expected_glob_dir_output(out_dir):
{
"class": "File",
"location": file_loc,
"path": file_path,
"basename": "test.txt",
"checksum": "sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709",
"size": 0,
Expand All @@ -695,10 +708,12 @@ def _expected_load_contents_output(cls, out_dir):

@staticmethod
def _expected_colon_output(outDir):
path = os.path.join(outDir, "A:Gln2Cys_result")
loc = "file://" + os.path.join(outDir, "A%3AGln2Cys_result")
return {
"result": {
"location": loc,
"path": path,
"basename": "A:Gln2Cys_result",
"class": "Directory",
"listing": [
Expand All @@ -710,16 +725,19 @@ def _expected_colon_output(outDir):
"size": 1111,
"nameroot": "whale",
"nameext": ".txt",
"path": f"{path}/whale.txt"
}
],
}
}

def _expected_streaming_output(self, outDir):
loc = "file://" + os.path.join(outDir, "output.txt")
path = os.path.join(outDir, "output.txt")
loc = "file://" + path
return {
"output": {
"location": loc,
"path": path,
"basename": "output.txt",
"size": 24,
"class": "File",
Expand Down Expand Up @@ -926,7 +944,7 @@ def setUpClass(cls):
cls.test_yaml = os.path.join(cls.cwlSpec, "conformance_tests.yaml")
# TODO: Use a commit zip in case someone decides to rewrite master's history?
url = "https://github.com/common-workflow-language/cwl-v1.2.git"
commit = "8c3fd9d9f0209a51c5efacb1c7bc02a1164688d6"
commit = "0d538a0dbc5518f3c6083ce4571926f65cb84f76"
p = subprocess.Popen(
f"git clone {url} {cls.cwlSpec} && cd {cls.cwlSpec} && git checkout {commit}",
shell=True,
Expand Down

0 comments on commit deddac4

Please sign in to comment.