From 8aa562110275678724eaa4ab4d22cc19ab95237a Mon Sep 17 00:00:00 2001 From: "Yakubov, Sergey" Date: Mon, 30 Jan 2023 09:58:10 -0500 Subject: [PATCH] implement display function for all datatypes display the first 100K of a binary file or a file whose type is unknown to Galaxy as ASCII text instead of downloading this file. --- lib/galaxy/datatypes/data.py | 146 ++++++++++++------ .../webapps/galaxy/dataset/binary_file.mako | 17 ++ 2 files changed, 115 insertions(+), 48 deletions(-) create mode 100644 templates/webapps/galaxy/dataset/binary_file.mako diff --git a/lib/galaxy/datatypes/data.py b/lib/galaxy/datatypes/data.py index ba1235ce4972..723581f07b1c 100644 --- a/lib/galaxy/datatypes/data.py +++ b/lib/galaxy/datatypes/data.py @@ -157,6 +157,36 @@ def __init__(cls, name, bases, dict_): metadata.Statement.process(cls) +def _is_binary_file(data): + from galaxy.datatypes import binary + + return isinstance(data.datatype, binary.Binary) or type(data.datatype) is Data + + +def _get_max_peak_size(data): + from galaxy.datatypes import ( + binary, + text, + ) + + max_peek_size = DEFAULT_MAX_PEEK_SIZE # 1 MB + if isinstance(data.datatype, text.Html): + max_peek_size = 10000000 # 10 MB for html + elif isinstance(data.datatype, binary.Binary): + max_peek_size = 100000 # 100 KB for binary + return max_peek_size + + +def _get_file_size(data): + file_size = int(data.dataset.file_size or 0) + if file_size == 0: + if data.dataset.object_store: + file_size = data.dataset.object_store.size(data.dataset) + else: + file_size = os.stat(data.file_name).st_size + return file_size + + @p_dataproviders.decorators.has_dataproviders class Data(metaclass=DataMeta): """ @@ -430,6 +460,57 @@ def to_archive(self, dataset: DatasetProtocol, name: str = "") -> Iterable: file_paths.append(dataset.file_name) return zip(file_paths, rel_paths) + def _serve_file_download(self, headers, data, trans, to_ext, file_size, **kwd): + composite_extensions = trans.app.datatypes_registry.get_composite_extensions() + composite_extensions.append("html") # for archiving composite datatypes + composite_extensions.append("data_manager_json") # for downloading bundles if bundled. + + if data.extension in composite_extensions: + return self._archive_composite_dataset(trans, data, headers, do_action=kwd.get("do_action", "zip")) + else: + headers["Content-Length"] = str(file_size) + filename = self._download_filename( + data, + to_ext, + hdca=kwd.get("hdca"), + element_identifier=kwd.get("element_identifier"), + filename_pattern=kwd.get("filename_pattern"), + ) + headers[ + "content-type" + ] = "application/octet-stream" # force octet-stream so Safari doesn't append mime extensions to filename + headers["Content-Disposition"] = f'attachment; filename="{filename}"' + return open(data.file_name, "rb"), headers + + def _serve_binary_file_contents_as_text(self, trans, data, headers, file_size, max_peek_size): + headers["content-type"] = "text/html" + return ( + trans.fill_template_mako( + "/dataset/binary_file.mako", + data=data, + file_contents=open(data.file_name, "rb").read(max_peek_size), + file_size=util.nice_size(file_size), + truncated=file_size > max_peek_size, + ), + headers, + ) + + def _serve_file_contents(self, trans, data, headers, preview, file_size, max_peek_size): + from galaxy.datatypes import images + + preview = util.string_as_bool(preview) + if not preview or isinstance(data.datatype, images.Image) or file_size < max_peek_size: + return self._yield_user_file_content(trans, data, data.file_name, headers), headers + + # preview large text file + headers["content-type"] = "text/html" + return ( + trans.fill_template_mako( + "/dataset/large_file.mako", truncated_data=open(data.file_name, "rb").read(max_peek_size), data=data + ), + headers, + ) + def display_data( self, trans, @@ -449,13 +530,10 @@ def display_data( providers?). """ headers = kwd.get("headers", {}) - # Relocate all composite datatype display to a common location. - composite_extensions = trans.app.datatypes_registry.get_composite_extensions() - composite_extensions.append("html") # for archiving composite datatypes - composite_extensions.append("data_manager_json") # for downloading bundles if bundled. # Prevent IE8 from sniffing content type since we're explicit about it. This prevents intentionally text/plain # content from being rendered in the browser headers["X-Content-Type-Options"] = "nosniff" + if filename and filename != "index": # For files in extra_files_path extra_dir = dataset.dataset.extra_files_path_name @@ -500,52 +578,24 @@ def display_data( raise ObjectNotFound(f"Could not find '{filename}' on the extra files path {file_path}.") self._clean_and_set_mime_type(trans, dataset.get_mime(), headers) - trans.log_event(f"Display dataset id: {str(dataset.id)}") - from galaxy.datatypes import ( # DBTODO REMOVE THIS AT REFACTOR - binary, - images, - text, - ) + downloading = to_ext is not None + file_size = _get_file_size(dataset) - if to_ext or isinstance(dataset.datatype, binary.Binary): # Saving the file, or binary file - if dataset.extension in composite_extensions: - return self._archive_composite_dataset(trans, dataset, headers, do_action=kwd.get("do_action", "zip")) - else: - headers["Content-Length"] = str(os.stat(dataset.file_name).st_size) - filename = self._download_filename( - dataset, - to_ext, - hdca=kwd.get("hdca"), - element_identifier=kwd.get("element_identifier"), - filename_pattern=kwd.get("filename_pattern"), - ) - headers[ - "content-type" - ] = "application/octet-stream" # force octet-stream so Safari doesn't append mime extensions to filename - headers["Content-Disposition"] = f'attachment; filename="{filename}"' - return open(dataset.file_name, "rb"), headers if not os.path.exists(dataset.file_name): raise ObjectNotFound(f"File Not Found ({dataset.file_name}).") - max_peek_size = DEFAULT_MAX_PEEK_SIZE # 1 MB - if isinstance(dataset.datatype, text.Html): - max_peek_size = 10000000 # 10 MB for html - preview = util.string_as_bool(preview) - if ( - not preview - or isinstance(dataset.datatype, images.Image) - or os.stat(dataset.file_name).st_size < max_peek_size - ): - return self._yield_user_file_content(trans, dataset, dataset.file_name, headers), headers - else: - headers["content-type"] = "text/html" - return ( - trans.fill_template_mako( - "/dataset/large_file.mako", - truncated_data=open(dataset.file_name, "rb").read(max_peek_size), - data=dataset, - ), - headers, - ) + + if downloading: + trans.log_event(f"Download dataset id: {str(dataset.id)}") + return self._serve_file_download(headers, dataset, trans, to_ext, file_size, **kwd) + else: # displaying + trans.log_event(f"Display dataset id: {str(dataset.id)}") + max_peek_size = _get_max_peak_size(dataset) + if ( + _is_binary_file(dataset) and preview and hasattr(trans, "fill_template_mako") + ): # preview file which format is unknown (to Galaxy), we still try to display this as text + return self._serve_binary_file_contents_as_text(trans, dataset, headers, file_size, max_peek_size) + else: # text/html, or image, or display was called without preview flag + return self._serve_file_contents(trans, dataset, headers, preview, file_size, max_peek_size) def display_as_markdown(self, dataset_instance: DatasetProtocol) -> str: """Prepare for embedding dataset into a basic Markdown document. @@ -554,7 +604,7 @@ def display_as_markdown(self, dataset_instance: DatasetProtocol) -> str: on datatypes not tightly tied to a Galaxy version (e.g. datatypes in the Tool Shed). - Speaking very losely - the datatype should should load a bounded amount + Speaking very losely - the datatype should load a bounded amount of data from the supplied dataset instance and prepare for embedding it into Markdown. This should be relatively vanilla Markdown - the result of this is bleached and it should not contain nested Galaxy Markdown diff --git a/templates/webapps/galaxy/dataset/binary_file.mako b/templates/webapps/galaxy/dataset/binary_file.mako new file mode 100644 index 000000000000..508281a0eb69 --- /dev/null +++ b/templates/webapps/galaxy/dataset/binary_file.mako @@ -0,0 +1,17 @@ +<%inherit file="/base.mako"/> +<%namespace file="/dataset/display.mako" import="render_deleted_data_message" /> + +${ render_deleted_data_message( data ) } + +
+ This is a binary (or unknown to Galaxy) dataset of size ${ file_size }. Preview is not implemented for this filetype. Displaying + %if truncated: +first 100KB + %endif + as ASCII text
+ Download +
+ +
+${ util.unicodify( file_contents ) | h }
+