diff --git a/client/src/components/DatasetInformation/DatasetHash.vue b/client/src/components/DatasetInformation/DatasetHash.vue
new file mode 100644
index 000000000000..cff960b49b0a
--- /dev/null
+++ b/client/src/components/DatasetInformation/DatasetHash.vue
@@ -0,0 +1,16 @@
+
+
+ {{ hash.hash_value }}
+
+
+
+
diff --git a/client/src/components/DatasetInformation/DatasetHashes.vue b/client/src/components/DatasetInformation/DatasetHashes.vue
new file mode 100644
index 000000000000..be4420236909
--- /dev/null
+++ b/client/src/components/DatasetInformation/DatasetHashes.vue
@@ -0,0 +1,30 @@
+
+
+
+
+
+
+
+
+
diff --git a/client/src/components/DatasetInformation/DatasetInformation.vue b/client/src/components/DatasetInformation/DatasetInformation.vue
index 77f6f4f4c8a4..f34f7a0b8e9d 100644
--- a/client/src/components/DatasetInformation/DatasetInformation.vue
+++ b/client/src/components/DatasetInformation/DatasetInformation.vue
@@ -54,6 +54,22 @@
Full Path |
{{ dataset.file_name }} |
+
+ Originally Created From a File Named |
+ {{ dataset.created_from_basename }} |
+
+
+ Sources |
+
+
+ |
+
+
+ Hashes |
+
+
+ |
+
@@ -65,6 +81,8 @@ import Utils from "utils/utils";
import UtcDate from "components/UtcDate";
import DecodedId from "../DecodedId";
import { DatasetProvider } from "components/providers";
+import DatasetSources from "./DatasetSources";
+import DatasetHashes from "./DatasetHashes";
export default {
props: {
@@ -74,7 +92,9 @@ export default {
},
},
components: {
+ DatasetHashes,
DatasetProvider,
+ DatasetSources,
DecodedId,
UtcDate,
},
diff --git a/client/src/components/DatasetInformation/DatasetSource.vue b/client/src/components/DatasetInformation/DatasetSource.vue
new file mode 100644
index 000000000000..d7c210976c5d
--- /dev/null
+++ b/client/src/components/DatasetInformation/DatasetSource.vue
@@ -0,0 +1,51 @@
+
+
+
+ {{ source.source_uri }}
+
+
+
+ {{ source.source_uri }}
+
+
+
+
+
+
+
+
diff --git a/client/src/components/DatasetInformation/DatasetSourceTransform.vue b/client/src/components/DatasetInformation/DatasetSourceTransform.vue
new file mode 100644
index 000000000000..d4f15b4d701c
--- /dev/null
+++ b/client/src/components/DatasetInformation/DatasetSourceTransform.vue
@@ -0,0 +1,94 @@
+
+
+
+ Upon ingestion into the Galaxy, the following {{ actions }} were performed that modified the dataset
+ contents:
+
+ -
+
+ {{ actionShortDescription(transformAction) }}
+
+
+
+
+
+
+
+
+
+
diff --git a/client/src/components/DatasetInformation/DatasetSources.vue b/client/src/components/DatasetInformation/DatasetSources.vue
new file mode 100644
index 000000000000..61b43da2aa02
--- /dev/null
+++ b/client/src/components/DatasetInformation/DatasetSources.vue
@@ -0,0 +1,30 @@
+
+
+
+
+
+
+
+
+
diff --git a/lib/galaxy/actions/library.py b/lib/galaxy/actions/library.py
index 3a11508dea03..04be31f36abb 100644
--- a/lib/galaxy/actions/library.py
+++ b/lib/galaxy/actions/library.py
@@ -74,7 +74,7 @@ class LibraryActions:
Mixin for controllers that provide library functionality.
"""
- def _upload_dataset(self, trans, library_id: str, folder_id: str, replace_dataset: Optional[LibraryDataset] = None, **kwd):
+ def _upload_dataset(self, trans, folder_id: str, replace_dataset: Optional[LibraryDataset] = None, **kwd):
# Set up the traditional tool state/params
cntrller = 'api'
tool_id = 'upload1'
diff --git a/lib/galaxy/datatypes/sniff.py b/lib/galaxy/datatypes/sniff.py
index fcfcedfc19f6..8fce7b491914 100644
--- a/lib/galaxy/datatypes/sniff.py
+++ b/lib/galaxy/datatypes/sniff.py
@@ -14,7 +14,7 @@
import tempfile
import urllib.request
import zipfile
-from typing import Callable, Dict, IO, NamedTuple, Optional
+from typing import Dict, IO, NamedTuple, Optional
from typing_extensions import Protocol
@@ -83,12 +83,27 @@ def handle_composite_file(datatype, src_path, extra_files, name, is_binary, tmp_
datatype.groom_dataset_content(file_output_path)
-def convert_newlines(fname: str, in_place: bool = True, tmp_dir: Optional[str] = None, tmp_prefix: Optional[str] = "gxupload", block_size: int = 128 * 1024, regexp=None):
+class ConvertResult(NamedTuple):
+ line_count: int
+ converted_path: Optional[str]
+ converted_newlines: bool
+ converted_regex: bool
+
+
+class ConvertFunction(Protocol):
+
+ def __call__(self, fname: str, in_place: bool = True, tmp_dir: Optional[str] = None, tmp_prefix: Optional[str] = "gxupload") -> ConvertResult:
+ ...
+
+
+def convert_newlines(fname: str, in_place: bool = True, tmp_dir: Optional[str] = None, tmp_prefix: Optional[str] = "gxupload", block_size: int = 128 * 1024, regexp=None) -> ConvertResult:
"""
Converts in place a file from universal line endings
to Posix line endings.
"""
i = 0
+ converted_newlines = False
+ converted_regex = False
NEWLINE_BYTE = 10
CR_BYTE = 13
with tempfile.NamedTemporaryFile(mode='wb', prefix=tmp_prefix, dir=tmp_dir, delete=False) as fp, open(fname, mode='rb') as fi:
@@ -102,28 +117,63 @@ def convert_newlines(fname: str, in_place: bool = True, tmp_dir: Optional[str] =
block = block[1:]
if block:
last_char = block[-1]
- block = block.replace(b"\r\n", b"\n").replace(b"\r", b"\n")
+ if b"\r" in block:
+ block = block.replace(b"\r\n", b"\n").replace(b"\r", b"\n")
+ converted_newlines = True
if regexp:
- block = b"\t".join(regexp.split(block))
+ split_block = regexp.split(block)
+ if len(split_block) > 1:
+ converted_regex = True
+ block = b"\t".join(split_block)
fp.write(block)
i += block.count(b"\n")
last_block = block
block = fi.read(block_size)
if last_block and last_block[-1] != NEWLINE_BYTE:
+ converted_newlines = True
i += 1
fp.write(b"\n")
if in_place:
shutil.move(fp.name, fname)
# Return number of lines in file.
- return (i, None)
+ return ConvertResult(i, None, converted_newlines, converted_regex)
+ else:
+ return ConvertResult(i, fp.name, converted_newlines, converted_regex)
+
+
+def convert_sep2tabs(fname: str, in_place: bool = True, tmp_dir: Optional[str] = None, tmp_prefix: Optional[str] = "gxupload", block_size: int = 128 * 1024):
+ """
+ Transforms in place a 'sep' separated file to a tab separated one
+ """
+ patt: bytes = br"[^\S\r\n]+"
+ regexp = re.compile(patt)
+ i = 0
+ converted_newlines = False
+ converted_regex = False
+ with tempfile.NamedTemporaryFile(mode='wb', prefix=tmp_prefix, dir=tmp_dir, delete=False) as fp, open(fname, mode='rb') as fi:
+ block = fi.read(block_size)
+ while block:
+ if block:
+ split_block = regexp.split(block)
+ if len(split_block) > 1:
+ converted_regex = True
+ block = b"\t".join(split_block)
+ fp.write(block)
+ i += block.count(b"\n") or block.count(b"\r")
+ block = fi.read(block_size)
+ if in_place:
+ shutil.move(fp.name, fname)
+ # Return number of lines in file.
+ return ConvertResult(i, None, converted_newlines, converted_regex)
else:
- return (i, fp.name)
+ return ConvertResult(i, fp.name, converted_newlines, converted_regex)
-def convert_newlines_sep2tabs(fname: str, in_place: bool = True, patt: bytes = br"[^\S\n]+", tmp_dir: Optional[str] = None, tmp_prefix: Optional[str] = "gxupload"):
+def convert_newlines_sep2tabs(fname: str, in_place: bool = True, tmp_dir: Optional[str] = None, tmp_prefix: Optional[str] = "gxupload") -> ConvertResult:
"""
Converts newlines in a file to posix newlines and replaces spaces with tabs.
"""
+ patt: bytes = br"[^\S\n]+"
regexp = re.compile(patt)
return convert_newlines(fname, in_place, tmp_dir, tmp_prefix, regexp=regexp)
@@ -703,6 +753,19 @@ class HandleUploadedDatasetFileInternalResponse(NamedTuple):
ext: str
converted_path: str
compressed_type: Optional[str]
+ converted_newlines: bool
+ converted_spaces: bool
+
+
+def convert_function(convert_to_posix_lines, convert_spaces_to_tabs) -> ConvertFunction:
+ assert convert_to_posix_lines or convert_spaces_to_tabs
+ if convert_spaces_to_tabs and convert_to_posix_lines:
+ convert_fxn = convert_newlines_sep2tabs
+ elif convert_to_posix_lines:
+ convert_fxn = convert_newlines
+ else:
+ convert_fxn = convert_sep2tabs
+ return convert_fxn
def handle_uploaded_dataset_file_internal(
@@ -729,6 +792,8 @@ def handle_uploaded_dataset_file_internal(
check_content=check_content,
auto_decompress=auto_decompress,
)
+ converted_newlines = False
+ converted_spaces = False
try:
if not is_valid:
if is_tar(converted_path):
@@ -748,15 +813,12 @@ def handle_uploaded_dataset_file_internal(
if not is_binary and (convert_to_posix_lines or convert_spaces_to_tabs):
# Convert universal line endings to Posix line endings, spaces to tabs (if desired)
- convert_fxn: Callable
- if convert_spaces_to_tabs:
- convert_fxn = convert_newlines_sep2tabs
- else:
- convert_fxn = convert_newlines
- line_count, _converted_path = convert_fxn(converted_path, in_place=in_place, tmp_dir=tmp_dir, tmp_prefix=tmp_prefix)
+ convert_fxn = convert_function(convert_to_posix_lines, convert_spaces_to_tabs)
+ line_count, _converted_path, converted_newlines, converted_spaces = convert_fxn(converted_path, in_place=in_place, tmp_dir=tmp_dir, tmp_prefix=tmp_prefix)
if not in_place:
if converted_path and filename != converted_path:
os.unlink(converted_path)
+ assert _converted_path
converted_path = _converted_path
if ext in AUTO_DETECT_EXTENSIONS:
ext = guess_ext(converted_path, sniff_order=datatypes_registry.sniff_order, is_binary=is_binary)
@@ -769,7 +831,7 @@ def handle_uploaded_dataset_file_internal(
if filename != converted_path:
os.unlink(converted_path)
raise
- return HandleUploadedDatasetFileInternalResponse(ext, converted_path, compressed_type)
+ return HandleUploadedDatasetFileInternalResponse(ext, converted_path, compressed_type, converted_newlines, converted_spaces)
AUTO_DETECT_EXTENSIONS = ['auto'] # should 'data' also cause auto detect?
diff --git a/lib/galaxy/datatypes/upload_util.py b/lib/galaxy/datatypes/upload_util.py
index 02f240858567..963e2bfb96bb 100644
--- a/lib/galaxy/datatypes/upload_util.py
+++ b/lib/galaxy/datatypes/upload_util.py
@@ -19,6 +19,8 @@ class HandleUploadResponse(NamedTuple):
datatype: data.Data
is_binary: bool
converted_path: Optional[str]
+ converted_newlines: bool
+ converted_spaces: bool
def handle_upload(
@@ -42,13 +44,15 @@ def handle_upload(
# Does the first 1K contain a null?
is_binary = check_binary(path)
+ converted_newlines, converted_spaces = False, False
+
# Decompress if needed/desired and determine/validate filetype. If a keep-compressed datatype is explicitly selected
# or if autodetection is selected and the file sniffs as a keep-compressed datatype, it will not be decompressed.
if not link_data_only:
if auto_decompress and is_zip(path) and not is_single_file_zip(path):
multi_file_zip = True
try:
- ext, converted_path, compression_type = sniff.handle_uploaded_dataset_file_internal(
+ ext, converted_path, compression_type, converted_newlines, converted_spaces = sniff.handle_uploaded_dataset_file_internal(
path,
registry,
ext=requested_ext,
@@ -96,4 +100,4 @@ def handle_upload(
if multi_file_zip and not getattr(datatype, 'compressed', False):
stdout = 'ZIP file contained more than one file, only the first file was added to Galaxy.'
- return HandleUploadResponse(stdout, ext, datatype, is_binary, converted_path)
+ return HandleUploadResponse(stdout, ext, datatype, is_binary, converted_path, converted_newlines, converted_spaces)
diff --git a/lib/galaxy/managers/hdas.py b/lib/galaxy/managers/hdas.py
index ef8ee5b94108..fd59c1585a93 100644
--- a/lib/galaxy/managers/hdas.py
+++ b/lib/galaxy/managers/hdas.py
@@ -326,6 +326,8 @@ def __init__(self, app: StructuredApp):
'api_type',
'created_from_basename',
+ 'hashes',
+ 'sources',
], include_keys_from='summary')
self.add_view('extended', [
@@ -384,6 +386,8 @@ def __init__(self, app: StructuredApp):
'uuid',
'validated_state',
'validated_state_message',
+ 'hashes',
+ 'sources',
])
def serialize_copied_from_ldda_id(self, item, key, **context):
@@ -436,6 +440,8 @@ def add_serializers(self):
'api_type': lambda item, key, **context: 'file',
'type': lambda item, key, **context: 'file',
'created_from_basename': lambda item, key, **context: item.created_from_basename,
+ 'hashes': lambda item, key, **context: [h.to_dict() for h in item.hashes],
+ 'sources': lambda item, key, **context: [s.to_dict() for s in item.sources],
}
self.serializers.update(serializers)
diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py
index a4eef6da371c..ae93c59c3775 100644
--- a/lib/galaxy/model/__init__.py
+++ b/lib/galaxy/model/__init__.py
@@ -3384,13 +3384,14 @@ def to_int(n):
total_size=to_int(self.total_size),
created_from_basename=self.created_from_basename,
uuid=str(self.uuid or '') or None,
- hashes=list(map(lambda h: h.serialize(id_encoder, serialization_options), self.hashes))
+ hashes=list(map(lambda h: h.serialize(id_encoder, serialization_options), self.hashes)),
+ sources=list(map(lambda s: s.serialize(id_encoder, serialization_options), self.sources)),
)
serialization_options.attach_identifier(id_encoder, self, rval)
return rval
-class DatasetSource(Base, Serializable):
+class DatasetSource(Base, Dictifiable, Serializable):
__tablename__ = 'dataset_source'
id = Column(Integer, primary_key=True)
@@ -3400,6 +3401,8 @@ class DatasetSource(Base, Serializable):
transform = Column(MutableJSONType)
dataset = relationship('Dataset', back_populates='sources')
hashes = relationship('DatasetSourceHash', back_populates='source')
+ dict_collection_visible_keys = ['id', 'source_uri', 'extra_files_path', "transform"]
+ dict_element_visible_keys = ['id', 'source_uri', 'extra_files_path', 'transform'] # TODO: implement to_dict and add hashes...
def _serialize(self, id_encoder, serialization_options):
rval = dict_for(
@@ -3432,7 +3435,7 @@ def _serialize(self, id_encoder, serialization_options):
return rval
-class DatasetHash(Base, Serializable):
+class DatasetHash(Base, Dictifiable, Serializable):
__tablename__ = 'dataset_hash'
id = Column(Integer, primary_key=True)
@@ -3441,6 +3444,8 @@ class DatasetHash(Base, Serializable):
hash_value = Column(TEXT)
extra_files_path = Column(TEXT)
dataset = relationship('Dataset', back_populates='hashes')
+ dict_collection_visible_keys = ['id', 'hash_function', 'hash_value', 'extra_files_path']
+ dict_element_visible_keys = ['id', 'hash_function', 'hash_value', 'extra_files_path']
def _serialize(self, id_encoder, serialization_options):
rval = dict_for(
@@ -3672,6 +3677,14 @@ def set_created_from_basename(self, created_from_basename):
created_from_basename = property(get_created_from_basename, set_created_from_basename)
+ @property
+ def sources(self):
+ return self.dataset.sources
+
+ @property
+ def hashes(self):
+ return self.dataset.hashes
+
def get_raw_data(self):
"""Returns the full data. To stream it open the file_name and read/write as needed"""
return self.datatype.get_raw_data(self)
diff --git a/lib/galaxy/model/store/discover.py b/lib/galaxy/model/store/discover.py
index 3cfd8a19e229..e491f1d70f6b 100644
--- a/lib/galaxy/model/store/discover.py
+++ b/lib/galaxy/model/store/discover.py
@@ -122,6 +122,7 @@ def create_dataset(
for source_dict in sources:
source = galaxy.model.DatasetSource()
source.source_uri = source_dict["source_uri"]
+ source.transform = source_dict.get("transform")
primary_data.dataset.sources.append(source)
for hash_dict in hashes:
diff --git a/lib/galaxy/selenium/components.py b/lib/galaxy/selenium/components.py
index 1c9a72cb4628..d99b1181a573 100644
--- a/lib/galaxy/selenium/components.py
+++ b/lib/galaxy/selenium/components.py
@@ -4,6 +4,7 @@
ABCMeta,
abstractproperty,
)
+from typing import Union
from selenium.webdriver.common.by import By
@@ -142,6 +143,9 @@ def element_locator(self):
return (By.PARTIAL_LINK_TEXT, self.text)
+HasText = Union[Label, Text]
+
+
class Component:
def __init__(self, name, sub_components, selectors, labels, text):
diff --git a/lib/galaxy/selenium/navigates_galaxy.py b/lib/galaxy/selenium/navigates_galaxy.py
index d58774bcac5a..3c9a27f11799 100644
--- a/lib/galaxy/selenium/navigates_galaxy.py
+++ b/lib/galaxy/selenium/navigates_galaxy.py
@@ -10,7 +10,7 @@
import time
from abc import abstractmethod
from functools import partial, wraps
-from typing import Union
+from typing import cast, Union
import requests
import yaml
@@ -19,7 +19,7 @@
from galaxy.util import DEFAULT_SOCKET_TIMEOUT
from . import sizzle
-from .components import Component
+from .components import Component, HasText
from .data import (
load_root_component,
)
@@ -1673,12 +1673,18 @@ def assert_absent_or_hidden_after_transitions(self, selector):
"""
return self.assert_absent_or_hidden(selector)
- def assert_tooltip_text(self, element, expected, sleep=0, click_away=True):
+ def assert_tooltip_text(self, element, expected: Union[str, HasText], sleep: int = 0, click_away: bool = True):
if hasattr(expected, "text"):
- expected = expected.text
+ expected = cast(HasText, expected).text
text = self.get_tooltip_text(element, sleep=sleep, click_away=click_away)
assert text == expected, f"Tooltip text [{text}] was not expected text [{expected}]."
+ def assert_tooltip_text_contains(self, element, expected: Union[str, HasText], sleep: int = 0, click_away: bool = True):
+ if hasattr(expected, "text"):
+ expected = cast(HasText, expected).text
+ text = self.get_tooltip_text(element, sleep=sleep, click_away=click_away)
+ assert expected in text, f"Tooltip text [{text}] was not expected text [{expected}]."
+
def assert_error_message(self, contains=None):
self.components._.messages.error.wait_for_visible()
elements = self.find_elements(self.components._.messages.selectors.error)
diff --git a/lib/galaxy/selenium/navigation.yml b/lib/galaxy/selenium/navigation.yml
index a4999e42472e..5fe56d813a35 100644
--- a/lib/galaxy/selenium/navigation.yml
+++ b/lib/galaxy/selenium/navigation.yml
@@ -111,6 +111,7 @@ dataset_details:
selectors:
_: 'table#dataset-details'
tool_parameters: 'table#tool-parameters'
+ transform_action: '[data-transform-action="${action}"]'
history_panel:
menu:
diff --git a/lib/galaxy/tools/data_fetch.py b/lib/galaxy/tools/data_fetch.py
index 57c8630fa98a..8ac788775890 100644
--- a/lib/galaxy/tools/data_fetch.py
+++ b/lib/galaxy/tools/data_fetch.py
@@ -201,8 +201,9 @@ def _resolve_item_with_primary(item):
sources = []
url = item.get("url")
+ source_dict = {"source_uri": url}
if url:
- sources.append({"source_uri": url})
+ sources.append(source_dict)
hashes = item.get("hashes", [])
for hash_dict in hashes:
hash_function = hash_dict.get("hash_function")
@@ -232,7 +233,7 @@ def _resolve_item_with_primary(item):
registry = upload_config.registry
check_content = upload_config.check_content
- stdout, ext, datatype, is_binary, converted_path = handle_upload(
+ stdout, ext, datatype, is_binary, converted_path, converted_newlines, converted_spaces = handle_upload(
registry=registry,
path=path,
requested_ext=requested_ext,
@@ -246,7 +247,11 @@ def _resolve_item_with_primary(item):
convert_to_posix_lines=to_posix_lines,
convert_spaces_to_tabs=space_to_tab,
)
-
+ transform = []
+ if converted_newlines:
+ transform.append({"action": "to_posix_lines"})
+ if converted_spaces:
+ transform.append({"action": "spaces_to_tabs"})
if link_data_only:
# Never alter a file that will not be copied to Galaxy's local file store.
if datatype.dataset_content_needs_grooming(path):
@@ -293,10 +298,20 @@ def walk_extra_files(items, prefix=""):
# TODO:
# in galaxy json add 'extra_files' and point at target derived from extra_files:
- if not link_data_only and datatype and datatype.dataset_content_needs_grooming(path):
+
+ needs_grooming = not link_data_only and datatype and datatype.dataset_content_needs_grooming(path)
+ if needs_grooming:
# Groom the dataset content if necessary
+ transform.append({
+ "action": "datatype_groom",
+ "datatype_ext": ext,
+ "datatype_class": datatype.__class__.__name__
+ })
datatype.groom_dataset_content(path)
+ if len(transform) > 0:
+ source_dict["transform"] = transform
+
rval = {"name": name, "filename": path, "dbkey": dbkey, "ext": ext, "link_data_only": link_data_only, "sources": sources, "hashes": hashes, "info": f"uploaded {ext} file"}
if staged_extra_files:
rval["extra_files"] = os.path.abspath(staged_extra_files)
diff --git a/lib/galaxy/webapps/galaxy/api/library_contents.py b/lib/galaxy/webapps/galaxy/api/library_contents.py
index 2e7e1a97a4f1..f9c4b0924499 100644
--- a/lib/galaxy/webapps/galaxy/api/library_contents.py
+++ b/lib/galaxy/webapps/galaxy/api/library_contents.py
@@ -333,7 +333,6 @@ def _upload_library_dataset(self, trans, library_id, folder_id, **kwd):
return 400, message
else:
created_outputs_dict = self._upload_dataset(trans,
- library_id=trans.security.encode_id(library.id),
folder_id=trans.security.encode_id(folder.id),
replace_dataset=replace_dataset,
**kwd)
diff --git a/lib/galaxy_test/api/test_tools_upload.py b/lib/galaxy_test/api/test_tools_upload.py
index 98169da517f5..e6f26a619153 100644
--- a/lib/galaxy_test/api/test_tools_upload.py
+++ b/lib/galaxy_test/api/test_tools_upload.py
@@ -8,6 +8,7 @@
from galaxy_test.base.constants import (
ONE_TO_SIX_ON_WINDOWS,
ONE_TO_SIX_WITH_SPACES,
+ ONE_TO_SIX_WITH_SPACES_ON_WINDOWS,
ONE_TO_SIX_WITH_TABS,
ONE_TO_SIX_WITH_TABS_NO_TRAILING_NEWLINE,
)
@@ -111,6 +112,11 @@ def test_fetch_tab_to_space(self):
result_content = self._upload_and_get_content(table, api="fetch", space_to_tab=True)
self.assertEqual(result_content, ONE_TO_SIX_WITH_TABS)
+ def test_fetch_tab_to_space_doesnt_swap_newlines(self):
+ table = ONE_TO_SIX_WITH_SPACES_ON_WINDOWS
+ result_content = self._upload_and_get_content(table, api="fetch", space_to_tab=True)
+ self.assertEqual(result_content, ONE_TO_SIX_ON_WINDOWS)
+
def test_fetch_compressed_with_explicit_type(self):
fastqgz_path = TestDataResolver().get_filename("1.fastqsanger.gz")
with open(fastqgz_path, "rb") as fh:
@@ -329,25 +335,15 @@ def test_upload_multiple_mixed_success(self, history_id):
@uses_test_history(require_new=False)
@skip_if_github_down
def test_fetch_bam_file_from_url_with_extension_set(self, history_id):
- destination = {"type": "hdas"}
- targets = [{
- "destination": destination,
- "items": [
- {
- "src": "url",
- "url": "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/1.bam",
- "ext": "bam"
- },
- ]
- }]
- payload = {
- "history_id": history_id,
- "targets": json.dumps(targets),
+ item = {
+ "src": "url",
+ "url": "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/1.bam",
+ "ext": "bam"
}
- fetch_response = self.dataset_populator.fetch(payload)
- self._assert_status_code_is(fetch_response, 200)
- outputs = fetch_response.json()["outputs"]
- self.dataset_populator.get_history_dataset_details(history_id, dataset=outputs[0], assert_ok=True)
+ output = self.dataset_populator.fetch_hda(
+ history_id, item
+ )
+ self.dataset_populator.get_history_dataset_details(history_id, dataset=output, assert_ok=True)
@uses_test_history(require_new=False)
@skip_if_github_down
@@ -392,31 +388,18 @@ def test_composite_datatype(self):
@skip_without_datatype("velvet")
@uses_test_history(require_new=False)
def test_composite_datatype_fetch(self, history_id):
- destination = {"type": "hdas"}
- targets = [{
- "destination": destination,
- "items": [{
- "src": "composite",
- "ext": "velvet",
- "composite": {
- "items": [
- {"src": "pasted", "paste_content": "sequences content"},
- {"src": "pasted", "paste_content": "roadmaps content"},
- {"src": "pasted", "paste_content": "log content"},
- ]
- },
- }],
- }]
- payload = {
- "history_id": history_id,
- "targets": json.dumps(targets),
+ item = {
+ "src": "composite",
+ "ext": "velvet",
+ "composite": {
+ "items": [
+ {"src": "pasted", "paste_content": "sequences content"},
+ {"src": "pasted", "paste_content": "roadmaps content"},
+ {"src": "pasted", "paste_content": "log content"},
+ ]
+ },
}
- fetch_response = self.dataset_populator.fetch(payload)
- self._assert_status_code_is(fetch_response, 200)
- outputs = fetch_response.json()["outputs"]
- assert len(outputs) == 1
- output = outputs[0]
-
+ output = self.dataset_populator.fetch_hda(history_id, item)
roadmaps_content = self._get_roadmaps_content(history_id, output)
assert roadmaps_content.strip() == "roadmaps content", roadmaps_content
diff --git a/lib/galaxy_test/base/constants.py b/lib/galaxy_test/base/constants.py
index 40c3dbf72778..dd2a23f4c391 100644
--- a/lib/galaxy_test/base/constants.py
+++ b/lib/galaxy_test/base/constants.py
@@ -4,4 +4,5 @@
ONE_TO_SIX_WITH_SPACES = "1 2 3\n4 5 6\n"
ONE_TO_SIX_WITH_TABS = "1\t2\t3\n4\t5\t6\n"
ONE_TO_SIX_ON_WINDOWS = "1\t2\t3\r4\t5\t6\r"
+ONE_TO_SIX_WITH_SPACES_ON_WINDOWS = "1 2 3\r4 5 6\r"
ONE_TO_SIX_WITH_TABS_NO_TRAILING_NEWLINE = "1\t2\t3\n4\t5\t6"
diff --git a/lib/galaxy_test/base/populators.py b/lib/galaxy_test/base/populators.py
index 8f386a98bbe5..355709106e65 100644
--- a/lib/galaxy_test/base/populators.py
+++ b/lib/galaxy_test/base/populators.py
@@ -54,6 +54,7 @@
List,
NamedTuple,
Optional,
+ Set,
)
import requests
@@ -276,6 +277,26 @@ def fetch(self, payload: dict, assert_ok: bool = True, timeout: timeout_type = D
return tool_response
+ def fetch_hdas(self, history_id: str, items: List[Dict[str, Any]], wait: bool = True) -> List[Dict[str, Any]]:
+ destination = {"type": "hdas"}
+ targets = [{
+ "destination": destination,
+ "items": items,
+ }]
+ payload = {
+ "history_id": history_id,
+ "targets": json.dumps(targets),
+ }
+ fetch_response = self.fetch(payload, wait=wait)
+ api_asserts.assert_status_code_is(fetch_response, 200)
+ outputs = fetch_response.json()["outputs"]
+ return outputs
+
+ def fetch_hda(self, history_id, item: Dict[str, Any], wait: bool = True) -> Dict[str, Any]:
+ hdas = self.fetch_hdas(history_id, [item], wait=wait)
+ assert len(hdas) == 1
+ return hdas[0]
+
def wait_for_tool_run(self, history_id: str, run_response: requests.Response, timeout: timeout_type = DEFAULT_TIMEOUT, assert_ok: bool = True):
job = self.check_run(run_response)
self.wait_for_job(job["id"], timeout=timeout)
@@ -504,6 +525,23 @@ def get_history_dataset_content(self, history_id: str, wait=True, filename=None,
else:
return display_response.content
+ def get_history_dataset_source_transform_actions(self, history_id: str, **kwd) -> Set[str]:
+ details = self.get_history_dataset_details(history_id, **kwd)
+ if "sources" not in details:
+ return set([])
+ sources = details["sources"]
+ assert len(sources) <= 1 # We don't handle this use case yet.
+ if len(sources) == 0:
+ return set([])
+
+ source_0 = sources[0]
+ assert "transform" in source_0
+ transform = source_0["transform"]
+ if transform is None:
+ return set([])
+ assert isinstance(transform, list)
+ return set([t["action"] for t in transform])
+
def get_history_dataset_details(self, history_id: str, **kwds) -> dict:
dataset_id = self.__history_content_id(history_id, **kwds)
details_response = self.get_history_dataset_details_raw(history_id, dataset_id)
diff --git a/lib/galaxy_test/driver/integration_setup.py b/lib/galaxy_test/driver/integration_setup.py
index 04905729473a..1fa2742789e0 100644
--- a/lib/galaxy_test/driver/integration_setup.py
+++ b/lib/galaxy_test/driver/integration_setup.py
@@ -4,6 +4,7 @@
import os
import shutil
from tempfile import mkdtemp
+from typing import ClassVar
from .driver_util import GalaxyTestDriver
@@ -11,8 +12,8 @@
REQUIRED_GROUP = "fs_test_group"
-def get_posix_file_source_config(root_dir: str, roles: str, groups: str) -> str:
- return f"""
+def get_posix_file_source_config(root_dir: str, roles: str, groups: str, include_test_data_dir: bool) -> str:
+ rval = f"""
- type: posix
id: posix_test
label: Posix
@@ -21,12 +22,21 @@ def get_posix_file_source_config(root_dir: str, roles: str, groups: str) -> str:
writable: true
requires_roles: {roles}
requires_groups: {groups}
-
"""
+ if include_test_data_dir:
+ rval += """
+- type: posix
+ id: testdatafiles
+ label: Galaxy Stock Test Data
+ doc: Galaxy's test-data directory.
+ root: test-data
+ writable: false
+"""
+ return rval
-def create_file_source_config_file_on(temp_dir, root_dir):
- file_contents = get_posix_file_source_config(root_dir, REQUIRED_ROLE, REQUIRED_GROUP)
+def create_file_source_config_file_on(temp_dir, root_dir, include_test_data_dir):
+ file_contents = get_posix_file_source_config(root_dir, REQUIRED_ROLE, REQUIRED_GROUP, include_test_data_dir)
file_path = os.path.join(temp_dir, "file_sources_conf_posix.yml")
with open(file_path, "w") as f:
f.write(file_contents)
@@ -36,6 +46,7 @@ def create_file_source_config_file_on(temp_dir, root_dir):
class PosixFileSourceSetup:
_test_driver: GalaxyTestDriver
root_dir: str
+ include_test_data_dir: ClassVar[bool] = False
@classmethod
def handle_galaxy_config_kwds(cls, config):
@@ -43,7 +54,7 @@ def handle_galaxy_config_kwds(cls, config):
cls._test_driver.temp_directories.append(temp_dir)
cls.root_dir = os.path.join(temp_dir, "root")
- file_sources_config_file = create_file_source_config_file_on(temp_dir, cls.root_dir)
+ file_sources_config_file = create_file_source_config_file_on(temp_dir, cls.root_dir, cls.include_test_data_dir)
config["file_sources_config_file"] = file_sources_config_file
# Disable all stock plugins
diff --git a/test-data/simple_line_x2_windows.txt b/test-data/simple_line_x2_windows.txt
new file mode 100644
index 000000000000..04f9feb51ab7
--- /dev/null
+++ b/test-data/simple_line_x2_windows.txt
@@ -0,0 +1 @@
+This is a line of text.
This is a line of text.
\ No newline at end of file
diff --git a/test/integration_selenium/test_dataset_details_source_transforms.py b/test/integration_selenium/test_dataset_details_source_transforms.py
new file mode 100644
index 000000000000..d2bc3fed6c99
--- /dev/null
+++ b/test/integration_selenium/test_dataset_details_source_transforms.py
@@ -0,0 +1,82 @@
+from galaxy_test.driver.integration_setup import (
+ PosixFileSourceSetup,
+)
+from .framework import (
+ selenium_test,
+ SeleniumIntegrationTestCase,
+)
+
+
+class DatasetSourceTransformSeleniumIntegrationTestCase(PosixFileSourceSetup, SeleniumIntegrationTestCase):
+ ensure_registered = True
+ include_test_data_dir = True
+
+ @selenium_test
+ def test_displaying_source_transformations_posixlines(self):
+ history_id = self.current_history_id()
+ item = {
+ "src": "url",
+ "url": "gxfiles://testdatafiles/simple_line_no_newline.txt",
+ "ext": "txt",
+ "to_posix_lines": True,
+ }
+ output = self.dataset_populator.fetch_hda(
+ history_id, item
+ )
+ actions = self.dataset_populator.get_history_dataset_source_transform_actions(history_id, dataset=output, assert_ok=True)
+ assert actions == {"to_posix_lines"}
+ details = self._display_first_hid_details()
+ transform_element = details.transform_action(action="to_posix_lines").wait_for_visible()
+ self.assert_tooltip_text_contains(transform_element, "newline characters in text files")
+ self.screenshot("dataset_details_source_transform_to_posix_lines")
+
+ @selenium_test
+ def test_displaying_source_transformations_spaces_to_tabs(self):
+ history_id = self.current_history_id()
+ item = {
+ "src": "url",
+ "url": "gxfiles://testdatafiles/simple_line_x2_windows.txt",
+ "ext": "txt",
+ "to_posix_lines": True,
+ "space_to_tab": True,
+ }
+ output = self.dataset_populator.fetch_hda(
+ history_id, item
+ )
+ actions = self.dataset_populator.get_history_dataset_source_transform_actions(history_id, dataset=output, assert_ok=True)
+ assert actions == {"spaces_to_tabs", "to_posix_lines"}
+ details = self._display_first_hid_details()
+ transform_element = details.transform_action(action="spaces_to_tabs").wait_for_visible()
+ self.assert_tooltip_text_contains(transform_element, "referenced data source to tabular data", click_away=False)
+ self.screenshot("dataset_details_source_transform_spaces_to_tabs")
+
+ @selenium_test
+ def test_displaying_source_transformations_grooming(self):
+ history_id = self.current_history_id()
+ item = {
+ "src": "url",
+ "url": "gxfiles://testdatafiles/qname_sorted.bam",
+ "ext": "bam",
+ }
+ output = self.dataset_populator.fetch_hda(
+ history_id, item
+ )
+ actions = self.dataset_populator.get_history_dataset_source_transform_actions(history_id, dataset=output, assert_ok=True)
+ assert actions == {"datatype_groom"}
+ details = self._display_first_hid_details()
+ transform_element = details.transform_action(action="datatype_groom").wait_for_visible()
+ self.assert_tooltip_text_contains(transform_element, "sorted", click_away=False)
+ self.screenshot("dataset_details_source_transform_bam_grooming")
+ self.click_center()
+ self.assert_tooltip_text_contains(transform_element, "Galaxy applied datatype specific cleaning of the supplied data", click_away=False)
+
+ def _display_first_hid_details(self):
+ self.home()
+ self.history_panel_wait_for_hid_ok(1)
+ self.history_panel_click_item_title(hid=1, wait=True)
+ self.history_panel_item_view_dataset_details(1)
+ return self.components.dataset_details
+
+ def setUp(self):
+ super().setUp()
+ self._write_file_fixtures()
diff --git a/test/unit/data/datatypes/test_sniff.py b/test/unit/data/datatypes/test_sniff.py
index fab864ae22be..42c794ac3fdc 100644
--- a/test/unit/data/datatypes/test_sniff.py
+++ b/test/unit/data/datatypes/test_sniff.py
@@ -5,6 +5,7 @@
from galaxy.datatypes.sniff import (
convert_newlines,
convert_newlines_sep2tabs,
+ convert_sep2tabs,
get_test_fname,
)
@@ -14,7 +15,7 @@ def assert_converts_to_1234_convert_sep2tabs(content, expected='1\t2\n3\t4\n'):
tf.write(content)
rval = convert_newlines_sep2tabs(tf.name, tmp_prefix="gxtest", tmp_dir=tempfile.gettempdir())
assert expected == open(tf.name).read()
- assert rval == (2, None), rval
+ assert rval[0:2] == (2, None), rval
def assert_converts_to_1234_convert(content, block_size=1024):
@@ -23,7 +24,14 @@ def assert_converts_to_1234_convert(content, block_size=1024):
rval = convert_newlines(tf.name, tmp_prefix="gxtest", tmp_dir=tempfile.gettempdir(), block_size=block_size)
actual_contents = open(tf.name).read()
assert '1 2\n3 4\n' == actual_contents, actual_contents
- assert rval == (2, None), f"rval != {rval} for {content}"
+ assert rval[0:2] == (2, None), f"rval != {rval} for {content}"
+
+
+def assert_converts_to_1234_convert_sep2tabs_only(content: bytes, expected: bytes):
+ with tempfile.NamedTemporaryFile(delete=False, mode='wb') as tf:
+ tf.write(content)
+ convert_sep2tabs(tf.name, tmp_prefix="gxtest", tmp_dir=tempfile.gettempdir())
+ assert expected == open(tf.name, "rb").read()
@pytest.mark.parametrize('source,block_size', [
@@ -77,3 +85,9 @@ def test_convert_sep2tabs(source, expected):
assert_converts_to_1234_convert_sep2tabs(source, expected=expected)
else:
assert_converts_to_1234_convert_sep2tabs(source)
+
+
+def test_convert_sep2tabs_only():
+ assert_converts_to_1234_convert_sep2tabs_only(b"1 2\r3 4", b"1\t2\r3\t4")
+ assert_converts_to_1234_convert_sep2tabs_only(b"1 2\n3 4", b"1\t2\n3\t4")
+ assert_converts_to_1234_convert_sep2tabs_only(b"1 2\n3 4", b"1\t2\n3\t4")
diff --git a/tools/data_source/upload.py b/tools/data_source/upload.py
index d652b66f9fff..174fe412a35f 100644
--- a/tools/data_source/upload.py
+++ b/tools/data_source/upload.py
@@ -130,7 +130,7 @@ def add_file(dataset, registry, output_path: str) -> Dict[str, str]:
if not os.path.exists(dataset.path):
raise UploadProblemException('Uploaded temporary file (%s) does not exist.' % dataset.path)
- stdout, ext, datatype, is_binary, converted_path = handle_upload(
+ stdout, ext, datatype, is_binary, converted_path, _, _ = handle_upload(
registry=registry,
path=dataset.path,
requested_ext=dataset.file_type,