From 88650448e91c0429e1decc05bfb8932c97ed923d Mon Sep 17 00:00:00 2001
From: John Chilton <jmchilton@gmail.com>
Date: Tue, 2 Nov 2021 10:30:19 -0400
Subject: [PATCH 1/9] Fix when setting sep2tabs but not to_posix_lines.

Previously sep2tabs would imply to_posix_lines even if that was explicitly set to false. I think this new behavior is what I intended and is more correct.
---
 lib/galaxy/datatypes/sniff.py            | 42 ++++++++++++++++++++----
 lib/galaxy_test/api/test_tools_upload.py |  6 ++++
 lib/galaxy_test/base/constants.py        |  1 +
 test/unit/data/datatypes/test_sniff.py   | 14 ++++++++
 4 files changed, 57 insertions(+), 6 deletions(-)

diff --git a/lib/galaxy/datatypes/sniff.py b/lib/galaxy/datatypes/sniff.py
index fcfcedfc19f6..e4ce71937840 100644
--- a/lib/galaxy/datatypes/sniff.py
+++ b/lib/galaxy/datatypes/sniff.py
@@ -83,7 +83,12 @@ def handle_composite_file(datatype, src_path, extra_files, name, is_binary, tmp_
         datatype.groom_dataset_content(file_output_path)
 
 
-def convert_newlines(fname: str, in_place: bool = True, tmp_dir: Optional[str] = None, tmp_prefix: Optional[str] = "gxupload", block_size: int = 128 * 1024, regexp=None):
+class ConvertResult(NamedTuple):
+    line_count: int
+    converted_path: Optional[str]
+
+
+def convert_newlines(fname: str, in_place: bool = True, tmp_dir: Optional[str] = None, tmp_prefix: Optional[str] = "gxupload", block_size: int = 128 * 1024, regexp=None) -> ConvertResult:
     """
     Converts in place a file from universal line endings
     to Posix line endings.
@@ -115,12 +120,35 @@ def convert_newlines(fname: str, in_place: bool = True, tmp_dir: Optional[str] =
     if in_place:
         shutil.move(fp.name, fname)
         # Return number of lines in file.
-        return (i, None)
+        return ConvertResult(i, None)
+    else:
+        return ConvertResult(i, fp.name)
+
+
+def convert_sep2tabs(fname: str, in_place: bool = True, patt: bytes = br"[^\S\r\n]+", tmp_dir: Optional[str] = None, tmp_prefix: Optional[str] = "gxupload", block_size: int = 128 * 1024):
+    """
+    Transforms in place a 'sep' separated file to a tab separated one
+    """
+    regexp = re.compile(patt)
+    i = 0
+    with tempfile.NamedTemporaryFile(mode='wb', prefix=tmp_prefix, dir=tmp_dir, delete=False) as fp, open(fname, mode='rb') as fi:
+        block = fi.read(block_size)
+        while block:
+            if block:
+                split_block = regexp.split(block)
+                block = b"\t".join(split_block)
+                fp.write(block)
+                i += block.count(b"\n") or block.count(b"\r")
+                block = fi.read(block_size)
+    if in_place:
+        shutil.move(fp.name, fname)
+        # Return number of lines in file.
+        return ConvertResult(i, None)
     else:
-        return (i, fp.name)
+        return ConvertResult(i, fp.name)
 
 
-def convert_newlines_sep2tabs(fname: str, in_place: bool = True, patt: bytes = br"[^\S\n]+", tmp_dir: Optional[str] = None, tmp_prefix: Optional[str] = "gxupload"):
+def convert_newlines_sep2tabs(fname: str, in_place: bool = True, patt: bytes = br"[^\S\n]+", tmp_dir: Optional[str] = None, tmp_prefix: Optional[str] = "gxupload") -> ConvertResult:
     """
     Converts newlines in a file to posix newlines and replaces spaces with tabs.
     """
@@ -749,10 +777,12 @@ def handle_uploaded_dataset_file_internal(
         if not is_binary and (convert_to_posix_lines or convert_spaces_to_tabs):
             # Convert universal line endings to Posix line endings, spaces to tabs (if desired)
             convert_fxn: Callable
-            if convert_spaces_to_tabs:
+            if convert_spaces_to_tabs and convert_to_posix_lines:
                 convert_fxn = convert_newlines_sep2tabs
-            else:
+            elif convert_to_posix_lines:
                 convert_fxn = convert_newlines
+            else:
+                convert_fxn = convert_sep2tabs
             line_count, _converted_path = convert_fxn(converted_path, in_place=in_place, tmp_dir=tmp_dir, tmp_prefix=tmp_prefix)
             if not in_place:
                 if converted_path and filename != converted_path:
diff --git a/lib/galaxy_test/api/test_tools_upload.py b/lib/galaxy_test/api/test_tools_upload.py
index 98169da517f5..d5cd73f3b2b8 100644
--- a/lib/galaxy_test/api/test_tools_upload.py
+++ b/lib/galaxy_test/api/test_tools_upload.py
@@ -8,6 +8,7 @@
 from galaxy_test.base.constants import (
     ONE_TO_SIX_ON_WINDOWS,
     ONE_TO_SIX_WITH_SPACES,
+    ONE_TO_SIX_WITH_SPACES_ON_WINDOWS,
     ONE_TO_SIX_WITH_TABS,
     ONE_TO_SIX_WITH_TABS_NO_TRAILING_NEWLINE,
 )
@@ -111,6 +112,11 @@ def test_fetch_tab_to_space(self):
         result_content = self._upload_and_get_content(table, api="fetch", space_to_tab=True)
         self.assertEqual(result_content, ONE_TO_SIX_WITH_TABS)
 
+    def test_fetch_tab_to_space_doesnt_swap_newlines(self):
+        table = ONE_TO_SIX_WITH_SPACES_ON_WINDOWS
+        result_content = self._upload_and_get_content(table, api="fetch", space_to_tab=True)
+        self.assertEqual(result_content, ONE_TO_SIX_ON_WINDOWS)
+
     def test_fetch_compressed_with_explicit_type(self):
         fastqgz_path = TestDataResolver().get_filename("1.fastqsanger.gz")
         with open(fastqgz_path, "rb") as fh:
diff --git a/lib/galaxy_test/base/constants.py b/lib/galaxy_test/base/constants.py
index 40c3dbf72778..dd2a23f4c391 100644
--- a/lib/galaxy_test/base/constants.py
+++ b/lib/galaxy_test/base/constants.py
@@ -4,4 +4,5 @@
 ONE_TO_SIX_WITH_SPACES = "1 2 3\n4 5 6\n"
 ONE_TO_SIX_WITH_TABS = "1\t2\t3\n4\t5\t6\n"
 ONE_TO_SIX_ON_WINDOWS = "1\t2\t3\r4\t5\t6\r"
+ONE_TO_SIX_WITH_SPACES_ON_WINDOWS = "1 2 3\r4 5 6\r"
 ONE_TO_SIX_WITH_TABS_NO_TRAILING_NEWLINE = "1\t2\t3\n4\t5\t6"
diff --git a/test/unit/data/datatypes/test_sniff.py b/test/unit/data/datatypes/test_sniff.py
index fab864ae22be..3182a48cb184 100644
--- a/test/unit/data/datatypes/test_sniff.py
+++ b/test/unit/data/datatypes/test_sniff.py
@@ -5,6 +5,7 @@
 from galaxy.datatypes.sniff import (
     convert_newlines,
     convert_newlines_sep2tabs,
+    convert_sep2tabs,
     get_test_fname,
 )
 
@@ -26,6 +27,13 @@ def assert_converts_to_1234_convert(content, block_size=1024):
     assert rval == (2, None), f"rval != {rval} for {content}"
 
 
+def assert_converts_to_1234_convert_sep2tabs_only(content: bytes, expected: bytes):
+    with tempfile.NamedTemporaryFile(delete=False, mode='wb') as tf:
+        tf.write(content)
+    convert_sep2tabs(tf.name, tmp_prefix="gxtest", tmp_dir=tempfile.gettempdir())
+    assert expected == open(tf.name, "rb").read()
+
+
 @pytest.mark.parametrize('source,block_size', [
     ("1 2\r3 4", None),
     ("1 2\n3 4", None),
@@ -77,3 +85,9 @@ def test_convert_sep2tabs(source, expected):
         assert_converts_to_1234_convert_sep2tabs(source, expected=expected)
     else:
         assert_converts_to_1234_convert_sep2tabs(source)
+
+
+def test_convert_sep2tabs_only():
+    assert_converts_to_1234_convert_sep2tabs_only(b"1 2\r3 4", b"1\t2\r3\t4")
+    assert_converts_to_1234_convert_sep2tabs_only(b"1 2\n3 4", b"1\t2\n3\t4")
+    assert_converts_to_1234_convert_sep2tabs_only(b"1    2\n3    4", b"1\t2\n3\t4")

From 9d83e09ca9f35f9e9d9bec47ce318a735307621c Mon Sep 17 00:00:00 2001
From: John Chilton <jmchilton@gmail.com>
Date: Tue, 2 Nov 2021 13:58:03 -0400
Subject: [PATCH 2/9] Even more typing in sniff.py.

---
 lib/galaxy/datatypes/sniff.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/lib/galaxy/datatypes/sniff.py b/lib/galaxy/datatypes/sniff.py
index e4ce71937840..20661695c0f4 100644
--- a/lib/galaxy/datatypes/sniff.py
+++ b/lib/galaxy/datatypes/sniff.py
@@ -14,7 +14,7 @@
 import tempfile
 import urllib.request
 import zipfile
-from typing import Callable, Dict, IO, NamedTuple, Optional
+from typing import Dict, IO, NamedTuple, Optional
 
 from typing_extensions import Protocol
 
@@ -88,6 +88,12 @@ class ConvertResult(NamedTuple):
     converted_path: Optional[str]
 
 
+class ConvertFunction(Protocol):
+
+    def __call__(self, fname: str, in_place: bool = True, tmp_dir: Optional[str] = None, tmp_prefix: Optional[str] = "gxupload") -> ConvertResult:
+        ...
+
+
 def convert_newlines(fname: str, in_place: bool = True, tmp_dir: Optional[str] = None, tmp_prefix: Optional[str] = "gxupload", block_size: int = 128 * 1024, regexp=None) -> ConvertResult:
     """
     Converts in place a file from universal line endings
@@ -125,10 +131,11 @@ def convert_newlines(fname: str, in_place: bool = True, tmp_dir: Optional[str] =
         return ConvertResult(i, fp.name)
 
 
-def convert_sep2tabs(fname: str, in_place: bool = True, patt: bytes = br"[^\S\r\n]+", tmp_dir: Optional[str] = None, tmp_prefix: Optional[str] = "gxupload", block_size: int = 128 * 1024):
+def convert_sep2tabs(fname: str, in_place: bool = True, tmp_dir: Optional[str] = None, tmp_prefix: Optional[str] = "gxupload", block_size: int = 128 * 1024):
     """
     Transforms in place a 'sep' separated file to a tab separated one
     """
+    patt: bytes = br"[^\S\r\n]+"
     regexp = re.compile(patt)
     i = 0
     with tempfile.NamedTemporaryFile(mode='wb', prefix=tmp_prefix, dir=tmp_dir, delete=False) as fp, open(fname, mode='rb') as fi:
@@ -148,10 +155,11 @@ def convert_sep2tabs(fname: str, in_place: bool = True, patt: bytes = br"[^\S\r\
         return ConvertResult(i, fp.name)
 
 
-def convert_newlines_sep2tabs(fname: str, in_place: bool = True, patt: bytes = br"[^\S\n]+", tmp_dir: Optional[str] = None, tmp_prefix: Optional[str] = "gxupload") -> ConvertResult:
+def convert_newlines_sep2tabs(fname: str, in_place: bool = True, tmp_dir: Optional[str] = None, tmp_prefix: Optional[str] = "gxupload") -> ConvertResult:
     """
     Converts newlines in a file to posix newlines and replaces spaces with tabs.
     """
+    patt: bytes = br"[^\S\n]+"
     regexp = re.compile(patt)
     return convert_newlines(fname, in_place, tmp_dir, tmp_prefix, regexp=regexp)
 
@@ -776,7 +784,7 @@ def handle_uploaded_dataset_file_internal(
 
         if not is_binary and (convert_to_posix_lines or convert_spaces_to_tabs):
             # Convert universal line endings to Posix line endings, spaces to tabs (if desired)
-            convert_fxn: Callable
+            convert_fxn: ConvertFunction
             if convert_spaces_to_tabs and convert_to_posix_lines:
                 convert_fxn = convert_newlines_sep2tabs
             elif convert_to_posix_lines:
@@ -787,6 +795,7 @@ def handle_uploaded_dataset_file_internal(
             if not in_place:
                 if converted_path and filename != converted_path:
                     os.unlink(converted_path)
+                assert _converted_path
                 converted_path = _converted_path
             if ext in AUTO_DETECT_EXTENSIONS:
                 ext = guess_ext(converted_path, sniff_order=datatypes_registry.sniff_order, is_binary=is_binary)

From 4fe7109ab10e7358aed68dcdf1e9eba0d00c197f Mon Sep 17 00:00:00 2001
From: John Chilton <jmchilton@gmail.com>
Date: Wed, 3 Nov 2021 12:56:28 -0400
Subject: [PATCH 3/9] Track transformations applied during uploads.

---
 lib/galaxy/datatypes/sniff.py          | 34 ++++++++++++++++++++------
 lib/galaxy/datatypes/upload_util.py    |  8 ++++--
 lib/galaxy/model/store/discover.py     |  1 +
 lib/galaxy/tools/data_fetch.py         | 23 ++++++++++++++---
 test/unit/data/datatypes/test_sniff.py |  4 +--
 tools/data_source/upload.py            |  2 +-
 6 files changed, 55 insertions(+), 17 deletions(-)

diff --git a/lib/galaxy/datatypes/sniff.py b/lib/galaxy/datatypes/sniff.py
index 20661695c0f4..b7016d67d13d 100644
--- a/lib/galaxy/datatypes/sniff.py
+++ b/lib/galaxy/datatypes/sniff.py
@@ -86,6 +86,8 @@ def handle_composite_file(datatype, src_path, extra_files, name, is_binary, tmp_
 class ConvertResult(NamedTuple):
     line_count: int
     converted_path: Optional[str]
+    converted_newlines: bool
+    converted_regex: bool
 
 
 class ConvertFunction(Protocol):
@@ -100,6 +102,8 @@ def convert_newlines(fname: str, in_place: bool = True, tmp_dir: Optional[str] =
     to Posix line endings.
     """
     i = 0
+    converted_newlines = False
+    converted_regex = False
     NEWLINE_BYTE = 10
     CR_BYTE = 13
     with tempfile.NamedTemporaryFile(mode='wb', prefix=tmp_prefix, dir=tmp_dir, delete=False) as fp, open(fname, mode='rb') as fi:
@@ -113,22 +117,28 @@ def convert_newlines(fname: str, in_place: bool = True, tmp_dir: Optional[str] =
                 block = block[1:]
             if block:
                 last_char = block[-1]
-                block = block.replace(b"\r\n", b"\n").replace(b"\r", b"\n")
+                if b"\r" in block:
+                    block = block.replace(b"\r\n", b"\n").replace(b"\r", b"\n")
+                    converted_newlines = True
                 if regexp:
-                    block = b"\t".join(regexp.split(block))
+                    split_block = regexp.split(block)
+                    if len(split_block) > 1:
+                        converted_regex = True
+                    block = b"\t".join(split_block)
                 fp.write(block)
                 i += block.count(b"\n")
                 last_block = block
                 block = fi.read(block_size)
         if last_block and last_block[-1] != NEWLINE_BYTE:
+            converted_newlines = True
             i += 1
             fp.write(b"\n")
     if in_place:
         shutil.move(fp.name, fname)
         # Return number of lines in file.
-        return ConvertResult(i, None)
+        return ConvertResult(i, None, converted_newlines, converted_regex)
     else:
-        return ConvertResult(i, fp.name)
+        return ConvertResult(i, fp.name, converted_newlines, converted_regex)
 
 
 def convert_sep2tabs(fname: str, in_place: bool = True, tmp_dir: Optional[str] = None, tmp_prefix: Optional[str] = "gxupload", block_size: int = 128 * 1024):
@@ -138,11 +148,15 @@ def convert_sep2tabs(fname: str, in_place: bool = True, tmp_dir: Optional[str] =
     patt: bytes = br"[^\S\r\n]+"
     regexp = re.compile(patt)
     i = 0
+    converted_newlines = False
+    converted_regex = False
     with tempfile.NamedTemporaryFile(mode='wb', prefix=tmp_prefix, dir=tmp_dir, delete=False) as fp, open(fname, mode='rb') as fi:
         block = fi.read(block_size)
         while block:
             if block:
                 split_block = regexp.split(block)
+                if len(split_block) > 1:
+                    converted_regex = True
                 block = b"\t".join(split_block)
                 fp.write(block)
                 i += block.count(b"\n") or block.count(b"\r")
@@ -150,9 +164,9 @@ def convert_sep2tabs(fname: str, in_place: bool = True, tmp_dir: Optional[str] =
     if in_place:
         shutil.move(fp.name, fname)
         # Return number of lines in file.
-        return ConvertResult(i, None)
+        return ConvertResult(i, None, converted_newlines, converted_regex)
     else:
-        return ConvertResult(i, fp.name)
+        return ConvertResult(i, fp.name, converted_newlines, converted_regex)
 
 
 def convert_newlines_sep2tabs(fname: str, in_place: bool = True, tmp_dir: Optional[str] = None, tmp_prefix: Optional[str] = "gxupload") -> ConvertResult:
@@ -739,6 +753,8 @@ class HandleUploadedDatasetFileInternalResponse(NamedTuple):
     ext: str
     converted_path: str
     compressed_type: Optional[str]
+    converted_newlines: bool
+    converted_spaces: bool
 
 
 def handle_uploaded_dataset_file_internal(
@@ -765,6 +781,8 @@ def handle_uploaded_dataset_file_internal(
         check_content=check_content,
         auto_decompress=auto_decompress,
     )
+    converted_newlines = False
+    converted_spaces = False
     try:
         if not is_valid:
             if is_tar(converted_path):
@@ -791,7 +809,7 @@ def handle_uploaded_dataset_file_internal(
                 convert_fxn = convert_newlines
             else:
                 convert_fxn = convert_sep2tabs
-            line_count, _converted_path = convert_fxn(converted_path, in_place=in_place, tmp_dir=tmp_dir, tmp_prefix=tmp_prefix)
+            line_count, _converted_path, converted_newlines, converted_spaces = convert_fxn(converted_path, in_place=in_place, tmp_dir=tmp_dir, tmp_prefix=tmp_prefix)
             if not in_place:
                 if converted_path and filename != converted_path:
                     os.unlink(converted_path)
@@ -808,7 +826,7 @@ def handle_uploaded_dataset_file_internal(
         if filename != converted_path:
             os.unlink(converted_path)
         raise
-    return HandleUploadedDatasetFileInternalResponse(ext, converted_path, compressed_type)
+    return HandleUploadedDatasetFileInternalResponse(ext, converted_path, compressed_type, converted_newlines, converted_spaces)
 
 
 AUTO_DETECT_EXTENSIONS = ['auto']  # should 'data' also cause auto detect?
diff --git a/lib/galaxy/datatypes/upload_util.py b/lib/galaxy/datatypes/upload_util.py
index 02f240858567..963e2bfb96bb 100644
--- a/lib/galaxy/datatypes/upload_util.py
+++ b/lib/galaxy/datatypes/upload_util.py
@@ -19,6 +19,8 @@ class HandleUploadResponse(NamedTuple):
     datatype: data.Data
     is_binary: bool
     converted_path: Optional[str]
+    converted_newlines: bool
+    converted_spaces: bool
 
 
 def handle_upload(
@@ -42,13 +44,15 @@ def handle_upload(
     # Does the first 1K contain a null?
     is_binary = check_binary(path)
 
+    converted_newlines, converted_spaces = False, False
+
     # Decompress if needed/desired and determine/validate filetype. If a keep-compressed datatype is explicitly selected
     # or if autodetection is selected and the file sniffs as a keep-compressed datatype, it will not be decompressed.
     if not link_data_only:
         if auto_decompress and is_zip(path) and not is_single_file_zip(path):
             multi_file_zip = True
         try:
-            ext, converted_path, compression_type = sniff.handle_uploaded_dataset_file_internal(
+            ext, converted_path, compression_type, converted_newlines, converted_spaces = sniff.handle_uploaded_dataset_file_internal(
                 path,
                 registry,
                 ext=requested_ext,
@@ -96,4 +100,4 @@ def handle_upload(
     if multi_file_zip and not getattr(datatype, 'compressed', False):
         stdout = 'ZIP file contained more than one file, only the first file was added to Galaxy.'
 
-    return HandleUploadResponse(stdout, ext, datatype, is_binary, converted_path)
+    return HandleUploadResponse(stdout, ext, datatype, is_binary, converted_path, converted_newlines, converted_spaces)
diff --git a/lib/galaxy/model/store/discover.py b/lib/galaxy/model/store/discover.py
index 3cfd8a19e229..e491f1d70f6b 100644
--- a/lib/galaxy/model/store/discover.py
+++ b/lib/galaxy/model/store/discover.py
@@ -122,6 +122,7 @@ def create_dataset(
         for source_dict in sources:
             source = galaxy.model.DatasetSource()
             source.source_uri = source_dict["source_uri"]
+            source.transform = source_dict.get("transform")
             primary_data.dataset.sources.append(source)
 
         for hash_dict in hashes:
diff --git a/lib/galaxy/tools/data_fetch.py b/lib/galaxy/tools/data_fetch.py
index 57c8630fa98a..8ac788775890 100644
--- a/lib/galaxy/tools/data_fetch.py
+++ b/lib/galaxy/tools/data_fetch.py
@@ -201,8 +201,9 @@ def _resolve_item_with_primary(item):
         sources = []
 
         url = item.get("url")
+        source_dict = {"source_uri": url}
         if url:
-            sources.append({"source_uri": url})
+            sources.append(source_dict)
         hashes = item.get("hashes", [])
         for hash_dict in hashes:
             hash_function = hash_dict.get("hash_function")
@@ -232,7 +233,7 @@ def _resolve_item_with_primary(item):
             registry = upload_config.registry
             check_content = upload_config.check_content
 
-            stdout, ext, datatype, is_binary, converted_path = handle_upload(
+            stdout, ext, datatype, is_binary, converted_path, converted_newlines, converted_spaces = handle_upload(
                 registry=registry,
                 path=path,
                 requested_ext=requested_ext,
@@ -246,7 +247,11 @@ def _resolve_item_with_primary(item):
                 convert_to_posix_lines=to_posix_lines,
                 convert_spaces_to_tabs=space_to_tab,
             )
-
+            transform = []
+            if converted_newlines:
+                transform.append({"action": "to_posix_lines"})
+            if converted_spaces:
+                transform.append({"action": "spaces_to_tabs"})
             if link_data_only:
                 # Never alter a file that will not be copied to Galaxy's local file store.
                 if datatype.dataset_content_needs_grooming(path):
@@ -293,10 +298,20 @@ def walk_extra_files(items, prefix=""):
 
             # TODO:
             # in galaxy json add 'extra_files' and point at target derived from extra_files:
-            if not link_data_only and datatype and datatype.dataset_content_needs_grooming(path):
+
+            needs_grooming = not link_data_only and datatype and datatype.dataset_content_needs_grooming(path)
+            if needs_grooming:
                 # Groom the dataset content if necessary
+                transform.append({
+                    "action": "datatype_groom",
+                    "datatype_ext": ext,
+                    "datatype_class": datatype.__class__.__name__
+                })
                 datatype.groom_dataset_content(path)
 
+            if len(transform) > 0:
+                source_dict["transform"] = transform
+
         rval = {"name": name, "filename": path, "dbkey": dbkey, "ext": ext, "link_data_only": link_data_only, "sources": sources, "hashes": hashes, "info": f"uploaded {ext} file"}
         if staged_extra_files:
             rval["extra_files"] = os.path.abspath(staged_extra_files)
diff --git a/test/unit/data/datatypes/test_sniff.py b/test/unit/data/datatypes/test_sniff.py
index 3182a48cb184..42c794ac3fdc 100644
--- a/test/unit/data/datatypes/test_sniff.py
+++ b/test/unit/data/datatypes/test_sniff.py
@@ -15,7 +15,7 @@ def assert_converts_to_1234_convert_sep2tabs(content, expected='1\t2\n3\t4\n'):
         tf.write(content)
     rval = convert_newlines_sep2tabs(tf.name, tmp_prefix="gxtest", tmp_dir=tempfile.gettempdir())
     assert expected == open(tf.name).read()
-    assert rval == (2, None), rval
+    assert rval[0:2] == (2, None), rval
 
 
 def assert_converts_to_1234_convert(content, block_size=1024):
@@ -24,7 +24,7 @@ def assert_converts_to_1234_convert(content, block_size=1024):
     rval = convert_newlines(tf.name, tmp_prefix="gxtest", tmp_dir=tempfile.gettempdir(), block_size=block_size)
     actual_contents = open(tf.name).read()
     assert '1 2\n3 4\n' == actual_contents, actual_contents
-    assert rval == (2, None), f"rval != {rval} for {content}"
+    assert rval[0:2] == (2, None), f"rval != {rval} for {content}"
 
 
 def assert_converts_to_1234_convert_sep2tabs_only(content: bytes, expected: bytes):
diff --git a/tools/data_source/upload.py b/tools/data_source/upload.py
index d652b66f9fff..174fe412a35f 100644
--- a/tools/data_source/upload.py
+++ b/tools/data_source/upload.py
@@ -130,7 +130,7 @@ def add_file(dataset, registry, output_path: str) -> Dict[str, str]:
     if not os.path.exists(dataset.path):
         raise UploadProblemException('Uploaded temporary file (%s) does not exist.' % dataset.path)
 
-    stdout, ext, datatype, is_binary, converted_path = handle_upload(
+    stdout, ext, datatype, is_binary, converted_path, _, _ = handle_upload(
         registry=registry,
         path=dataset.path,
         requested_ext=dataset.file_type,

From b8408768814b3a532432218cee60a8795e3f9b6c Mon Sep 17 00:00:00 2001
From: John Chilton <jmchilton@gmail.com>
Date: Tue, 2 Nov 2021 14:02:12 -0400
Subject: [PATCH 4/9] Reuse convert functions.

---
 lib/galaxy/datatypes/sniff.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/lib/galaxy/datatypes/sniff.py b/lib/galaxy/datatypes/sniff.py
index b7016d67d13d..8fce7b491914 100644
--- a/lib/galaxy/datatypes/sniff.py
+++ b/lib/galaxy/datatypes/sniff.py
@@ -757,6 +757,17 @@ class HandleUploadedDatasetFileInternalResponse(NamedTuple):
     converted_spaces: bool
 
 
+def convert_function(convert_to_posix_lines, convert_spaces_to_tabs) -> ConvertFunction:
+    assert convert_to_posix_lines or convert_spaces_to_tabs
+    if convert_spaces_to_tabs and convert_to_posix_lines:
+        convert_fxn = convert_newlines_sep2tabs
+    elif convert_to_posix_lines:
+        convert_fxn = convert_newlines
+    else:
+        convert_fxn = convert_sep2tabs
+    return convert_fxn
+
+
 def handle_uploaded_dataset_file_internal(
         filename: str,
         datatypes_registry,
@@ -802,13 +813,7 @@ def handle_uploaded_dataset_file_internal(
 
         if not is_binary and (convert_to_posix_lines or convert_spaces_to_tabs):
             # Convert universal line endings to Posix line endings, spaces to tabs (if desired)
-            convert_fxn: ConvertFunction
-            if convert_spaces_to_tabs and convert_to_posix_lines:
-                convert_fxn = convert_newlines_sep2tabs
-            elif convert_to_posix_lines:
-                convert_fxn = convert_newlines
-            else:
-                convert_fxn = convert_sep2tabs
+            convert_fxn = convert_function(convert_to_posix_lines, convert_spaces_to_tabs)
             line_count, _converted_path, converted_newlines, converted_spaces = convert_fxn(converted_path, in_place=in_place, tmp_dir=tmp_dir, tmp_prefix=tmp_prefix)
             if not in_place:
                 if converted_path and filename != converted_path:

From 43c464bc1785d831b4fb4d337b9798ef45f05817 Mon Sep 17 00:00:00 2001
From: John Chilton <jmchilton@gmail.com>
Date: Sun, 26 Sep 2021 11:51:49 -0400
Subject: [PATCH 5/9] Show created_from_basename in the datset information
 panel in the GUI.

I think it is useful provenance information and will encourage better tracking if people complain about that being wrong.
---
 .../src/components/DatasetInformation/DatasetInformation.vue  | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/client/src/components/DatasetInformation/DatasetInformation.vue b/client/src/components/DatasetInformation/DatasetInformation.vue
index 77f6f4f4c8a4..48bc5785fb30 100644
--- a/client/src/components/DatasetInformation/DatasetInformation.vue
+++ b/client/src/components/DatasetInformation/DatasetInformation.vue
@@ -54,6 +54,10 @@
                         <td>Full Path</td>
                         <td id="file_name">{{ dataset.file_name }}</td>
                     </tr>
+                    <tr v-if="dataset.created_from_basename">
+                        <td>Originally Created From a File Named</td>
+                        <td id="created_from_basename">{{ dataset.created_from_basename }}</td>
+                    </tr>
                 </tbody>
             </table>
         </div>

From ada2784f740e022386ee5f5baeb401838d67e6e5 Mon Sep 17 00:00:00 2001
From: John Chilton <jmchilton@gmail.com>
Date: Sun, 26 Sep 2021 12:02:06 -0400
Subject: [PATCH 6/9] Integrated hashses and sources into various APIs for UI
 display.

---
 .../DatasetInformation/DatasetHash.vue        | 16 +++++++
 .../DatasetInformation/DatasetHashes.vue      | 30 ++++++++++++
 .../DatasetInformation/DatasetInformation.vue | 16 +++++++
 .../DatasetInformation/DatasetSource.vue      | 48 +++++++++++++++++++
 .../DatasetInformation/DatasetSources.vue     | 30 ++++++++++++
 lib/galaxy/managers/hdas.py                   |  6 +++
 lib/galaxy/model/__init__.py                  | 19 ++++++--
 7 files changed, 162 insertions(+), 3 deletions(-)
 create mode 100644 client/src/components/DatasetInformation/DatasetHash.vue
 create mode 100644 client/src/components/DatasetInformation/DatasetHashes.vue
 create mode 100644 client/src/components/DatasetInformation/DatasetSource.vue
 create mode 100644 client/src/components/DatasetInformation/DatasetSources.vue

diff --git a/client/src/components/DatasetInformation/DatasetHash.vue b/client/src/components/DatasetInformation/DatasetHash.vue
new file mode 100644
index 000000000000..cff960b49b0a
--- /dev/null
+++ b/client/src/components/DatasetInformation/DatasetHash.vue
@@ -0,0 +1,16 @@
+<template>
+    <li class="dataset-hash">
+        <i>{{ hash.hash_value }}</i>
+    </li>
+</template>
+
+<script>
+export default {
+    props: {
+        hash: {
+            type: Object,
+            required: true,
+        },
+    },
+};
+</script>
diff --git a/client/src/components/DatasetInformation/DatasetHashes.vue b/client/src/components/DatasetInformation/DatasetHashes.vue
new file mode 100644
index 000000000000..be4420236909
--- /dev/null
+++ b/client/src/components/DatasetInformation/DatasetHashes.vue
@@ -0,0 +1,30 @@
+<template>
+    <span class="dataset-hashes">
+        <ul class="dataset-hashes-list">
+            <DatasetHash v-for="(hash, index) in hashes" :key="index" :hash="hash" />
+        </ul>
+    </span>
+</template>
+
+<script>
+import DatasetHash from "./DatasetHash";
+
+export default {
+    components: {
+        DatasetHash,
+    },
+    props: {
+        hashes: {
+            type: Array,
+            required: true,
+        },
+    },
+};
+</script>
+
+<style scoped>
+.dataset-hashes-list {
+    padding-inline-start: 20px;
+    margin-bottom: 0px;
+}
+</style>
diff --git a/client/src/components/DatasetInformation/DatasetInformation.vue b/client/src/components/DatasetInformation/DatasetInformation.vue
index 48bc5785fb30..f34f7a0b8e9d 100644
--- a/client/src/components/DatasetInformation/DatasetInformation.vue
+++ b/client/src/components/DatasetInformation/DatasetInformation.vue
@@ -58,6 +58,18 @@
                         <td>Originally Created From a File Named</td>
                         <td id="created_from_basename">{{ dataset.created_from_basename }}</td>
                     </tr>
+                    <tr v-if="dataset.sources && dataset.sources.length > 0">
+                        <td>Sources</td>
+                        <td>
+                            <DatasetSources :sources="dataset.sources" />
+                        </td>
+                    </tr>
+                    <tr v-if="dataset.hashes && dataset.hashes.length > 0">
+                        <td>Hashes</td>
+                        <td>
+                            <DatasetHashes :hashes="dataset.hashes" />
+                        </td>
+                    </tr>
                 </tbody>
             </table>
         </div>
@@ -69,6 +81,8 @@ import Utils from "utils/utils";
 import UtcDate from "components/UtcDate";
 import DecodedId from "../DecodedId";
 import { DatasetProvider } from "components/providers";
+import DatasetSources from "./DatasetSources";
+import DatasetHashes from "./DatasetHashes";
 
 export default {
     props: {
@@ -78,7 +92,9 @@ export default {
         },
     },
     components: {
+        DatasetHashes,
         DatasetProvider,
+        DatasetSources,
         DecodedId,
         UtcDate,
     },
diff --git a/client/src/components/DatasetInformation/DatasetSource.vue b/client/src/components/DatasetInformation/DatasetSource.vue
new file mode 100644
index 000000000000..84f8e6a3eba3
--- /dev/null
+++ b/client/src/components/DatasetInformation/DatasetSource.vue
@@ -0,0 +1,48 @@
+<template>
+    <li class="dataset-source">
+        <a v-if="browserCompatUri" :href="sourceUri" target="_blank">
+            {{ source.source_uri }}
+            <font-awesome-icon v-b-tooltip.hover title="Dataset Source URL" icon="external-link-alt" />
+        </a>
+        <span v-else>
+            {{ source.source_uri }}
+        </span>
+        <font-awesome-icon v-b-tooltip.hover title="Copy URI" icon="copy" style="cursor: pointer" @click="copyLink" />
+        <br />
+    </li>
+</template>
+
+<script>
+import { FontAwesomeIcon } from "@fortawesome/vue-fontawesome";
+import { library } from "@fortawesome/fontawesome-svg-core";
+import { faCopy, faExternalLinkAlt } from "@fortawesome/free-solid-svg-icons";
+import { copy } from "utils/clipboard";
+
+library.add(faCopy, faExternalLinkAlt);
+
+export default {
+    components: {
+        FontAwesomeIcon,
+    },
+    props: {
+        source: {
+            type: Object,
+            required: true,
+        },
+    },
+    computed: {
+        browserCompatUri() {
+            const sourceUri = this.sourceUri;
+            return sourceUri && (sourceUri.indexOf("http") == 0 || sourceUri.indexOf("ftp"));
+        },
+        sourceUri() {
+            return this.source.source_uri;
+        },
+    },
+    methods: {
+        copyLink() {
+            copy(this.sourceUri, "Link copied to the clipboard.");
+        },
+    },
+};
+</script>
diff --git a/client/src/components/DatasetInformation/DatasetSources.vue b/client/src/components/DatasetInformation/DatasetSources.vue
new file mode 100644
index 000000000000..61b43da2aa02
--- /dev/null
+++ b/client/src/components/DatasetInformation/DatasetSources.vue
@@ -0,0 +1,30 @@
+<template>
+    <span class="dataset-sources">
+        <ul class="dataset-sources-list">
+            <DatasetSource v-for="(source, index) in sources" :key="index" :source="source" />
+        </ul>
+    </span>
+</template>
+
+<script>
+import DatasetSource from "./DatasetSource";
+
+export default {
+    components: {
+        DatasetSource,
+    },
+    props: {
+        sources: {
+            type: Array,
+            required: true,
+        },
+    },
+};
+</script>
+
+<style scoped>
+.dataset-sources-list {
+    padding-inline-start: 20px;
+    margin-bottom: 0px;
+}
+</style>
diff --git a/lib/galaxy/managers/hdas.py b/lib/galaxy/managers/hdas.py
index ef8ee5b94108..fd59c1585a93 100644
--- a/lib/galaxy/managers/hdas.py
+++ b/lib/galaxy/managers/hdas.py
@@ -326,6 +326,8 @@ def __init__(self, app: StructuredApp):
 
             'api_type',
             'created_from_basename',
+            'hashes',
+            'sources',
         ], include_keys_from='summary')
 
         self.add_view('extended', [
@@ -384,6 +386,8 @@ def __init__(self, app: StructuredApp):
             'uuid',
             'validated_state',
             'validated_state_message',
+            'hashes',
+            'sources',
         ])
 
     def serialize_copied_from_ldda_id(self, item, key, **context):
@@ -436,6 +440,8 @@ def add_serializers(self):
             'api_type': lambda item, key, **context: 'file',
             'type': lambda item, key, **context: 'file',
             'created_from_basename': lambda item, key, **context: item.created_from_basename,
+            'hashes': lambda item, key, **context: [h.to_dict() for h in item.hashes],
+            'sources': lambda item, key, **context: [s.to_dict() for s in item.sources],
         }
         self.serializers.update(serializers)
 
diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py
index a4eef6da371c..ae93c59c3775 100644
--- a/lib/galaxy/model/__init__.py
+++ b/lib/galaxy/model/__init__.py
@@ -3384,13 +3384,14 @@ def to_int(n):
             total_size=to_int(self.total_size),
             created_from_basename=self.created_from_basename,
             uuid=str(self.uuid or '') or None,
-            hashes=list(map(lambda h: h.serialize(id_encoder, serialization_options), self.hashes))
+            hashes=list(map(lambda h: h.serialize(id_encoder, serialization_options), self.hashes)),
+            sources=list(map(lambda s: s.serialize(id_encoder, serialization_options), self.sources)),
         )
         serialization_options.attach_identifier(id_encoder, self, rval)
         return rval
 
 
-class DatasetSource(Base, Serializable):
+class DatasetSource(Base, Dictifiable, Serializable):
     __tablename__ = 'dataset_source'
 
     id = Column(Integer, primary_key=True)
@@ -3400,6 +3401,8 @@ class DatasetSource(Base, Serializable):
     transform = Column(MutableJSONType)
     dataset = relationship('Dataset', back_populates='sources')
     hashes = relationship('DatasetSourceHash', back_populates='source')
+    dict_collection_visible_keys = ['id', 'source_uri', 'extra_files_path', "transform"]
+    dict_element_visible_keys = ['id', 'source_uri', 'extra_files_path', 'transform']  # TODO: implement to_dict and add hashes...
 
     def _serialize(self, id_encoder, serialization_options):
         rval = dict_for(
@@ -3432,7 +3435,7 @@ def _serialize(self, id_encoder, serialization_options):
         return rval
 
 
-class DatasetHash(Base, Serializable):
+class DatasetHash(Base, Dictifiable, Serializable):
     __tablename__ = 'dataset_hash'
 
     id = Column(Integer, primary_key=True)
@@ -3441,6 +3444,8 @@ class DatasetHash(Base, Serializable):
     hash_value = Column(TEXT)
     extra_files_path = Column(TEXT)
     dataset = relationship('Dataset', back_populates='hashes')
+    dict_collection_visible_keys = ['id', 'hash_function', 'hash_value', 'extra_files_path']
+    dict_element_visible_keys = ['id', 'hash_function', 'hash_value', 'extra_files_path']
 
     def _serialize(self, id_encoder, serialization_options):
         rval = dict_for(
@@ -3672,6 +3677,14 @@ def set_created_from_basename(self, created_from_basename):
 
     created_from_basename = property(get_created_from_basename, set_created_from_basename)
 
+    @property
+    def sources(self):
+        return self.dataset.sources
+
+    @property
+    def hashes(self):
+        return self.dataset.hashes
+
     def get_raw_data(self):
         """Returns the full data. To stream it open the file_name and read/write as needed"""
         return self.datatype.get_raw_data(self)

From c65a6c5bc4ac3e9cdcacbf11bac550a3f71cec10 Mon Sep 17 00:00:00 2001
From: John Chilton <jmchilton@gmail.com>
Date: Mon, 1 Nov 2021 14:00:00 -0400
Subject: [PATCH 7/9] Abstraction for easier uploads using the fetch API.

---
 lib/galaxy_test/api/test_tools_upload.py | 61 ++++++++----------------
 lib/galaxy_test/base/populators.py       | 20 ++++++++
 2 files changed, 39 insertions(+), 42 deletions(-)

diff --git a/lib/galaxy_test/api/test_tools_upload.py b/lib/galaxy_test/api/test_tools_upload.py
index d5cd73f3b2b8..e6f26a619153 100644
--- a/lib/galaxy_test/api/test_tools_upload.py
+++ b/lib/galaxy_test/api/test_tools_upload.py
@@ -335,25 +335,15 @@ def test_upload_multiple_mixed_success(self, history_id):
     @uses_test_history(require_new=False)
     @skip_if_github_down
     def test_fetch_bam_file_from_url_with_extension_set(self, history_id):
-        destination = {"type": "hdas"}
-        targets = [{
-            "destination": destination,
-            "items": [
-                {
-                    "src": "url",
-                    "url": "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/1.bam",
-                    "ext": "bam"
-                },
-            ]
-        }]
-        payload = {
-            "history_id": history_id,
-            "targets": json.dumps(targets),
+        item = {
+            "src": "url",
+            "url": "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/1.bam",
+            "ext": "bam"
         }
-        fetch_response = self.dataset_populator.fetch(payload)
-        self._assert_status_code_is(fetch_response, 200)
-        outputs = fetch_response.json()["outputs"]
-        self.dataset_populator.get_history_dataset_details(history_id, dataset=outputs[0], assert_ok=True)
+        output = self.dataset_populator.fetch_hda(
+            history_id, item
+        )
+        self.dataset_populator.get_history_dataset_details(history_id, dataset=output, assert_ok=True)
 
     @uses_test_history(require_new=False)
     @skip_if_github_down
@@ -398,31 +388,18 @@ def test_composite_datatype(self):
     @skip_without_datatype("velvet")
     @uses_test_history(require_new=False)
     def test_composite_datatype_fetch(self, history_id):
-        destination = {"type": "hdas"}
-        targets = [{
-            "destination": destination,
-            "items": [{
-                "src": "composite",
-                "ext": "velvet",
-                "composite": {
-                    "items": [
-                        {"src": "pasted", "paste_content": "sequences content"},
-                        {"src": "pasted", "paste_content": "roadmaps content"},
-                        {"src": "pasted", "paste_content": "log content"},
-                    ]
-                },
-            }],
-        }]
-        payload = {
-            "history_id": history_id,
-            "targets": json.dumps(targets),
+        item = {
+            "src": "composite",
+            "ext": "velvet",
+            "composite": {
+                "items": [
+                    {"src": "pasted", "paste_content": "sequences content"},
+                    {"src": "pasted", "paste_content": "roadmaps content"},
+                    {"src": "pasted", "paste_content": "log content"},
+                ]
+            },
         }
-        fetch_response = self.dataset_populator.fetch(payload)
-        self._assert_status_code_is(fetch_response, 200)
-        outputs = fetch_response.json()["outputs"]
-        assert len(outputs) == 1
-        output = outputs[0]
-
+        output = self.dataset_populator.fetch_hda(history_id, item)
         roadmaps_content = self._get_roadmaps_content(history_id, output)
         assert roadmaps_content.strip() == "roadmaps content", roadmaps_content
 
diff --git a/lib/galaxy_test/base/populators.py b/lib/galaxy_test/base/populators.py
index 8f386a98bbe5..6322d9c5fd65 100644
--- a/lib/galaxy_test/base/populators.py
+++ b/lib/galaxy_test/base/populators.py
@@ -276,6 +276,26 @@ def fetch(self, payload: dict, assert_ok: bool = True, timeout: timeout_type = D
 
         return tool_response
 
+    def fetch_hdas(self, history_id: str, items: List[Dict[str, Any]], wait: bool = True) -> List[Dict[str, Any]]:
+        destination = {"type": "hdas"}
+        targets = [{
+            "destination": destination,
+            "items": items,
+        }]
+        payload = {
+            "history_id": history_id,
+            "targets": json.dumps(targets),
+        }
+        fetch_response = self.fetch(payload, wait=wait)
+        api_asserts.assert_status_code_is(fetch_response, 200)
+        outputs = fetch_response.json()["outputs"]
+        return outputs
+
+    def fetch_hda(self, history_id, item: Dict[str, Any], wait: bool = True) -> Dict[str, Any]:
+        hdas = self.fetch_hdas(history_id, [item], wait=wait)
+        assert len(hdas) == 1
+        return hdas[0]
+
     def wait_for_tool_run(self, history_id: str, run_response: requests.Response, timeout: timeout_type = DEFAULT_TIMEOUT, assert_ok: bool = True):
         job = self.check_run(run_response)
         self.wait_for_job(job["id"], timeout=timeout)

From 9d18c91aca157fa770bb9eb19281ece97dcde25b Mon Sep 17 00:00:00 2001
From: John Chilton <jmchilton@gmail.com>
Date: Mon, 8 Nov 2021 12:00:33 -0500
Subject: [PATCH 8/9] Display dataset source transform information.

---
 .../DatasetInformation/DatasetSource.vue      |  5 +-
 .../DatasetSourceTransform.vue                | 94 +++++++++++++++++++
 lib/galaxy/selenium/components.py             |  4 +
 lib/galaxy/selenium/navigates_galaxy.py       | 14 ++-
 lib/galaxy/selenium/navigation.yml            |  1 +
 lib/galaxy_test/base/populators.py            | 18 ++++
 lib/galaxy_test/driver/integration_setup.py   | 23 +++--
 test-data/simple_line_x2_windows.txt          |  1 +
 .../test_dataset_details_source_transforms.py | 82 ++++++++++++++++
 9 files changed, 231 insertions(+), 11 deletions(-)
 create mode 100644 client/src/components/DatasetInformation/DatasetSourceTransform.vue
 create mode 100644 test-data/simple_line_x2_windows.txt
 create mode 100644 test/integration_selenium/test_dataset_details_source_transforms.py

diff --git a/client/src/components/DatasetInformation/DatasetSource.vue b/client/src/components/DatasetInformation/DatasetSource.vue
index 84f8e6a3eba3..d7c210976c5d 100644
--- a/client/src/components/DatasetInformation/DatasetSource.vue
+++ b/client/src/components/DatasetInformation/DatasetSource.vue
@@ -9,6 +9,7 @@
         </span>
         <font-awesome-icon v-b-tooltip.hover title="Copy URI" icon="copy" style="cursor: pointer" @click="copyLink" />
         <br />
+        <DatasetSourceTransform :transform="source.transform" />
     </li>
 </template>
 
@@ -17,11 +18,13 @@ import { FontAwesomeIcon } from "@fortawesome/vue-fontawesome";
 import { library } from "@fortawesome/fontawesome-svg-core";
 import { faCopy, faExternalLinkAlt } from "@fortawesome/free-solid-svg-icons";
 import { copy } from "utils/clipboard";
+import DatasetSourceTransform from "./DatasetSourceTransform";
 
 library.add(faCopy, faExternalLinkAlt);
 
 export default {
     components: {
+        DatasetSourceTransform,
         FontAwesomeIcon,
     },
     props: {
@@ -33,7 +36,7 @@ export default {
     computed: {
         browserCompatUri() {
             const sourceUri = this.sourceUri;
-            return sourceUri && (sourceUri.indexOf("http") == 0 || sourceUri.indexOf("ftp"));
+            return sourceUri && (sourceUri.indexOf("http") == 0 || sourceUri.indexOf("ftp") == 0);
         },
         sourceUri() {
             return this.source.source_uri;
diff --git a/client/src/components/DatasetInformation/DatasetSourceTransform.vue b/client/src/components/DatasetInformation/DatasetSourceTransform.vue
new file mode 100644
index 000000000000..d4f15b4d701c
--- /dev/null
+++ b/client/src/components/DatasetInformation/DatasetSourceTransform.vue
@@ -0,0 +1,94 @@
+<template>
+    <span class="dataset-source-transform-display">
+        <div v-if="transform && transform.length > 0">
+            Upon ingestion into the Galaxy, the following {{ actions }} were performed that modified the dataset
+            contents:
+            <ul>
+                <li v-for="(transformAction, index) in transform" :key="index">
+                    <span
+                        :title="actionLongDescription(transformAction)"
+                        class="dataset-source-transform-element"
+                        :data-transform-action="transformAction.action"
+                        v-b-tooltip.hover.noninteractive.nofade.bottom
+                    >
+                        {{ actionShortDescription(transformAction) }}
+                    </span>
+                </li>
+            </ul>
+        </div>
+    </span>
+</template>
+
+<script>
+import Vue from "vue";
+import BootstrapVue from "bootstrap-vue";
+
+Vue.use(BootstrapVue);
+
+const TRANSFORM_ACTION_DESCRIPTIONS = {
+    to_posix_lines: {
+        short: "Normalized new line characters",
+        long: "Many Galaxy tools expect data to contain 'posix' newline characters in text files and not the newline format used by the Windows operating system. Additionally, most tools expect a newline at the end of plain text files. This file was converted to use these line endings or add a newline to the end of the file.",
+    },
+    spaces_to_tabs: {
+        short: "Normalized spaces to tabs",
+        long: "In order to convert the referenced data source to tabular data, spaces in the file contents were converted to tab characters to indicate column separations.",
+    },
+    datatype_groom: {
+        short: "Datatype-specific grooming",
+        long: "The Galaxy datatype class indicated the source data required 'groooming' and Galaxy applied datatype specific cleaning of the supplied data.",
+    },
+};
+
+const DATATYPE_GROOMING_DESCRIPTIONS = {
+    bam: "The supplied BAM was coordinate-sorted using pysam.",
+    "qname_sorted.bam": "The supplied BAM was 'queryname' sorted using pysam.",
+    "qname_input_sorted.bam": "The supplied BAM was 'queryname' sorted using pysam.",
+    "isa-tab": "The supplied compressed file was converted to an ISA-TAB composite dataset.",
+    "isa-json": "The supplied compressed file was converted to an ISA-JSON composite dataset.",
+};
+
+const UNKNOWN_ACTION_DESCRIPTION = {
+    short: "Unknown action.",
+    long: "",
+};
+
+export default {
+    props: {
+        transform: {
+            type: Array,
+            required: false,
+        },
+    },
+    computed: {
+        actions() {
+            return this.transform.length > 1 ? "actions" : "action";
+        },
+    },
+    methods: {
+        actionShortDescription(transformAction) {
+            return this.actionDescription(transformAction).short || "Unknown action.";
+        },
+        actionLongDescription(transformAction) {
+            let longDescription = this.actionDescription(transformAction).long || "";
+            if (transformAction.action == "datatype_groom") {
+                const datatypeDescription = DATATYPE_GROOMING_DESCRIPTIONS[transformAction.datatype_ext];
+                if (datatypeDescription) {
+                    longDescription += " " + datatypeDescription;
+                }
+            }
+            return longDescription;
+        },
+        actionDescription(transformAction) {
+            return TRANSFORM_ACTION_DESCRIPTIONS[transformAction.action] || UNKNOWN_ACTION_DESCRIPTION;
+        },
+    },
+};
+</script>
+
+<style scoped>
+.dataset-source-transform-element {
+    text-decoration: underline;
+    text-decoration-style: dashed;
+}
+</style>
diff --git a/lib/galaxy/selenium/components.py b/lib/galaxy/selenium/components.py
index 1c9a72cb4628..d99b1181a573 100644
--- a/lib/galaxy/selenium/components.py
+++ b/lib/galaxy/selenium/components.py
@@ -4,6 +4,7 @@
     ABCMeta,
     abstractproperty,
 )
+from typing import Union
 
 from selenium.webdriver.common.by import By
 
@@ -142,6 +143,9 @@ def element_locator(self):
         return (By.PARTIAL_LINK_TEXT, self.text)
 
 
+HasText = Union[Label, Text]
+
+
 class Component:
 
     def __init__(self, name, sub_components, selectors, labels, text):
diff --git a/lib/galaxy/selenium/navigates_galaxy.py b/lib/galaxy/selenium/navigates_galaxy.py
index d58774bcac5a..3c9a27f11799 100644
--- a/lib/galaxy/selenium/navigates_galaxy.py
+++ b/lib/galaxy/selenium/navigates_galaxy.py
@@ -10,7 +10,7 @@
 import time
 from abc import abstractmethod
 from functools import partial, wraps
-from typing import Union
+from typing import cast, Union
 
 import requests
 import yaml
@@ -19,7 +19,7 @@
 
 from galaxy.util import DEFAULT_SOCKET_TIMEOUT
 from . import sizzle
-from .components import Component
+from .components import Component, HasText
 from .data import (
     load_root_component,
 )
@@ -1673,12 +1673,18 @@ def assert_absent_or_hidden_after_transitions(self, selector):
         """
         return self.assert_absent_or_hidden(selector)
 
-    def assert_tooltip_text(self, element, expected, sleep=0, click_away=True):
+    def assert_tooltip_text(self, element, expected: Union[str, HasText], sleep: int = 0, click_away: bool = True):
         if hasattr(expected, "text"):
-            expected = expected.text
+            expected = cast(HasText, expected).text
         text = self.get_tooltip_text(element, sleep=sleep, click_away=click_away)
         assert text == expected, f"Tooltip text [{text}] was not expected text [{expected}]."
 
+    def assert_tooltip_text_contains(self, element, expected: Union[str, HasText], sleep: int = 0, click_away: bool = True):
+        if hasattr(expected, "text"):
+            expected = cast(HasText, expected).text
+        text = self.get_tooltip_text(element, sleep=sleep, click_away=click_away)
+        assert expected in text, f"Tooltip text [{text}] was not expected text [{expected}]."
+
     def assert_error_message(self, contains=None):
         self.components._.messages.error.wait_for_visible()
         elements = self.find_elements(self.components._.messages.selectors.error)
diff --git a/lib/galaxy/selenium/navigation.yml b/lib/galaxy/selenium/navigation.yml
index a4999e42472e..5fe56d813a35 100644
--- a/lib/galaxy/selenium/navigation.yml
+++ b/lib/galaxy/selenium/navigation.yml
@@ -111,6 +111,7 @@ dataset_details:
   selectors:
     _: 'table#dataset-details'
     tool_parameters: 'table#tool-parameters'
+    transform_action: '[data-transform-action="${action}"]'
 
 history_panel:
   menu:
diff --git a/lib/galaxy_test/base/populators.py b/lib/galaxy_test/base/populators.py
index 6322d9c5fd65..355709106e65 100644
--- a/lib/galaxy_test/base/populators.py
+++ b/lib/galaxy_test/base/populators.py
@@ -54,6 +54,7 @@
     List,
     NamedTuple,
     Optional,
+    Set,
 )
 
 import requests
@@ -524,6 +525,23 @@ def get_history_dataset_content(self, history_id: str, wait=True, filename=None,
         else:
             return display_response.content
 
+    def get_history_dataset_source_transform_actions(self, history_id: str, **kwd) -> Set[str]:
+        details = self.get_history_dataset_details(history_id, **kwd)
+        if "sources" not in details:
+            return set([])
+        sources = details["sources"]
+        assert len(sources) <= 1  # We don't handle this use case yet.
+        if len(sources) == 0:
+            return set([])
+
+        source_0 = sources[0]
+        assert "transform" in source_0
+        transform = source_0["transform"]
+        if transform is None:
+            return set([])
+        assert isinstance(transform, list)
+        return set([t["action"] for t in transform])
+
     def get_history_dataset_details(self, history_id: str, **kwds) -> dict:
         dataset_id = self.__history_content_id(history_id, **kwds)
         details_response = self.get_history_dataset_details_raw(history_id, dataset_id)
diff --git a/lib/galaxy_test/driver/integration_setup.py b/lib/galaxy_test/driver/integration_setup.py
index 04905729473a..1fa2742789e0 100644
--- a/lib/galaxy_test/driver/integration_setup.py
+++ b/lib/galaxy_test/driver/integration_setup.py
@@ -4,6 +4,7 @@
 import os
 import shutil
 from tempfile import mkdtemp
+from typing import ClassVar
 
 from .driver_util import GalaxyTestDriver
 
@@ -11,8 +12,8 @@
 REQUIRED_GROUP = "fs_test_group"
 
 
-def get_posix_file_source_config(root_dir: str, roles: str, groups: str) -> str:
-    return f"""
+def get_posix_file_source_config(root_dir: str, roles: str, groups: str, include_test_data_dir: bool) -> str:
+    rval = f"""
 - type: posix
   id: posix_test
   label: Posix
@@ -21,12 +22,21 @@ def get_posix_file_source_config(root_dir: str, roles: str, groups: str) -> str:
   writable: true
   requires_roles: {roles}
   requires_groups: {groups}
-
 """
+    if include_test_data_dir:
+        rval += """
+- type: posix
+  id: testdatafiles
+  label: Galaxy Stock Test Data
+  doc: Galaxy's test-data directory.
+  root: test-data
+  writable: false
+"""
+    return rval
 
 
-def create_file_source_config_file_on(temp_dir, root_dir):
-    file_contents = get_posix_file_source_config(root_dir, REQUIRED_ROLE, REQUIRED_GROUP)
+def create_file_source_config_file_on(temp_dir, root_dir, include_test_data_dir):
+    file_contents = get_posix_file_source_config(root_dir, REQUIRED_ROLE, REQUIRED_GROUP, include_test_data_dir)
     file_path = os.path.join(temp_dir, "file_sources_conf_posix.yml")
     with open(file_path, "w") as f:
         f.write(file_contents)
@@ -36,6 +46,7 @@ def create_file_source_config_file_on(temp_dir, root_dir):
 class PosixFileSourceSetup:
     _test_driver: GalaxyTestDriver
     root_dir: str
+    include_test_data_dir: ClassVar[bool] = False
 
     @classmethod
     def handle_galaxy_config_kwds(cls, config):
@@ -43,7 +54,7 @@ def handle_galaxy_config_kwds(cls, config):
         cls._test_driver.temp_directories.append(temp_dir)
         cls.root_dir = os.path.join(temp_dir, "root")
 
-        file_sources_config_file = create_file_source_config_file_on(temp_dir, cls.root_dir)
+        file_sources_config_file = create_file_source_config_file_on(temp_dir, cls.root_dir, cls.include_test_data_dir)
         config["file_sources_config_file"] = file_sources_config_file
 
         # Disable all stock plugins
diff --git a/test-data/simple_line_x2_windows.txt b/test-data/simple_line_x2_windows.txt
new file mode 100644
index 000000000000..04f9feb51ab7
--- /dev/null
+++ b/test-data/simple_line_x2_windows.txt
@@ -0,0 +1 @@
+This is a line of text.This is a line of text.
\ No newline at end of file
diff --git a/test/integration_selenium/test_dataset_details_source_transforms.py b/test/integration_selenium/test_dataset_details_source_transforms.py
new file mode 100644
index 000000000000..d2bc3fed6c99
--- /dev/null
+++ b/test/integration_selenium/test_dataset_details_source_transforms.py
@@ -0,0 +1,82 @@
+from galaxy_test.driver.integration_setup import (
+    PosixFileSourceSetup,
+)
+from .framework import (
+    selenium_test,
+    SeleniumIntegrationTestCase,
+)
+
+
+class DatasetSourceTransformSeleniumIntegrationTestCase(PosixFileSourceSetup, SeleniumIntegrationTestCase):
+    ensure_registered = True
+    include_test_data_dir = True
+
+    @selenium_test
+    def test_displaying_source_transformations_posixlines(self):
+        history_id = self.current_history_id()
+        item = {
+            "src": "url",
+            "url": "gxfiles://testdatafiles/simple_line_no_newline.txt",
+            "ext": "txt",
+            "to_posix_lines": True,
+        }
+        output = self.dataset_populator.fetch_hda(
+            history_id, item
+        )
+        actions = self.dataset_populator.get_history_dataset_source_transform_actions(history_id, dataset=output, assert_ok=True)
+        assert actions == {"to_posix_lines"}
+        details = self._display_first_hid_details()
+        transform_element = details.transform_action(action="to_posix_lines").wait_for_visible()
+        self.assert_tooltip_text_contains(transform_element, "newline characters in text files")
+        self.screenshot("dataset_details_source_transform_to_posix_lines")
+
+    @selenium_test
+    def test_displaying_source_transformations_spaces_to_tabs(self):
+        history_id = self.current_history_id()
+        item = {
+            "src": "url",
+            "url": "gxfiles://testdatafiles/simple_line_x2_windows.txt",
+            "ext": "txt",
+            "to_posix_lines": True,
+            "space_to_tab": True,
+        }
+        output = self.dataset_populator.fetch_hda(
+            history_id, item
+        )
+        actions = self.dataset_populator.get_history_dataset_source_transform_actions(history_id, dataset=output, assert_ok=True)
+        assert actions == {"spaces_to_tabs", "to_posix_lines"}
+        details = self._display_first_hid_details()
+        transform_element = details.transform_action(action="spaces_to_tabs").wait_for_visible()
+        self.assert_tooltip_text_contains(transform_element, "referenced data source to tabular data", click_away=False)
+        self.screenshot("dataset_details_source_transform_spaces_to_tabs")
+
+    @selenium_test
+    def test_displaying_source_transformations_grooming(self):
+        history_id = self.current_history_id()
+        item = {
+            "src": "url",
+            "url": "gxfiles://testdatafiles/qname_sorted.bam",
+            "ext": "bam",
+        }
+        output = self.dataset_populator.fetch_hda(
+            history_id, item
+        )
+        actions = self.dataset_populator.get_history_dataset_source_transform_actions(history_id, dataset=output, assert_ok=True)
+        assert actions == {"datatype_groom"}
+        details = self._display_first_hid_details()
+        transform_element = details.transform_action(action="datatype_groom").wait_for_visible()
+        self.assert_tooltip_text_contains(transform_element, "sorted", click_away=False)
+        self.screenshot("dataset_details_source_transform_bam_grooming")
+        self.click_center()
+        self.assert_tooltip_text_contains(transform_element, "Galaxy applied datatype specific cleaning of the supplied data", click_away=False)
+
+    def _display_first_hid_details(self):
+        self.home()
+        self.history_panel_wait_for_hid_ok(1)
+        self.history_panel_click_item_title(hid=1, wait=True)
+        self.history_panel_item_view_dataset_details(1)
+        return self.components.dataset_details
+
+    def setUp(self):
+        super().setUp()
+        self._write_file_fixtures()

From b03d7d0b8f0149af58644f15593b5cb3fd1129db Mon Sep 17 00:00:00 2001
From: John Chilton <jmchilton@gmail.com>
Date: Mon, 8 Nov 2021 12:43:15 -0500
Subject: [PATCH 9/9] Remove unneeded argument.

---
 lib/galaxy/actions/library.py                     | 2 +-
 lib/galaxy/webapps/galaxy/api/library_contents.py | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/lib/galaxy/actions/library.py b/lib/galaxy/actions/library.py
index 3a11508dea03..04be31f36abb 100644
--- a/lib/galaxy/actions/library.py
+++ b/lib/galaxy/actions/library.py
@@ -74,7 +74,7 @@ class LibraryActions:
     Mixin for controllers that provide library functionality.
     """
 
-    def _upload_dataset(self, trans, library_id: str, folder_id: str, replace_dataset: Optional[LibraryDataset] = None, **kwd):
+    def _upload_dataset(self, trans, folder_id: str, replace_dataset: Optional[LibraryDataset] = None, **kwd):
         # Set up the traditional tool state/params
         cntrller = 'api'
         tool_id = 'upload1'
diff --git a/lib/galaxy/webapps/galaxy/api/library_contents.py b/lib/galaxy/webapps/galaxy/api/library_contents.py
index 2e7e1a97a4f1..f9c4b0924499 100644
--- a/lib/galaxy/webapps/galaxy/api/library_contents.py
+++ b/lib/galaxy/webapps/galaxy/api/library_contents.py
@@ -333,7 +333,6 @@ def _upload_library_dataset(self, trans, library_id, folder_id, **kwd):
             return 400, message
         else:
             created_outputs_dict = self._upload_dataset(trans,
-                                                        library_id=trans.security.encode_id(library.id),
                                                         folder_id=trans.security.encode_id(folder.id),
                                                         replace_dataset=replace_dataset,
                                                         **kwd)