Skip to content

Commit

Permalink
Merge pull request galaxyproject#18503 from mvdbeek/error_on_binary_d…
Browse files Browse the repository at this point in the history
…ata_display

Raise Message Exception when displaying binary data
  • Loading branch information
bgruening authored Jul 6, 2024
2 parents 723e796 + 149ab23 commit 0beacf5
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 11 deletions.
15 changes: 9 additions & 6 deletions lib/galaxy/datatypes/sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
get_headers,
iter_headers,
)
from galaxy.exceptions import InvalidFileFormatError
from galaxy.util import (
compression_utils,
nice_size,
Expand Down Expand Up @@ -775,15 +776,17 @@ def display_data(
headers = kwd.get("headers", {})
if preview:
with compression_utils.get_fileobj(dataset.get_file_name()) as fh:
max_peek_size = 1000000 # 1 MB
if os.stat(dataset.get_file_name()).st_size < max_peek_size:
max_peek_size = 100000
try:
chunk = fh.read(max_peek_size + 1)
except UnicodeDecodeError:
raise InvalidFileFormatError("Dataset appears to contain binary data, cannot display.")
if len(chunk) <= max_peek_size:
mime = "text/plain"
self._clean_and_set_mime_type(trans, mime, headers)
return fh.read(), headers
return chunk[:-1], headers
return (
trans.fill_template_mako(
"/dataset/large_file.mako", truncated_data=fh.read(max_peek_size), data=dataset
),
trans.fill_template_mako("/dataset/large_file.mako", truncated_data=chunk[:-1], data=dataset),
headers,
)
else:
Expand Down
14 changes: 9 additions & 5 deletions lib/galaxy/datatypes/tabular.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
iter_headers,
validate_tabular,
)
from galaxy.exceptions import InvalidFileFormatError
from galaxy.util import compression_utils
from galaxy.util.compression_utils import (
FileObjType,
Expand Down Expand Up @@ -156,12 +157,15 @@ def get_chunk(self, trans, dataset: HasFileName, offset: int = 0, ck_size: Optio
def _read_chunk(self, trans, dataset: HasFileName, offset: int, ck_size: Optional[int] = None):
with compression_utils.get_fileobj(dataset.get_file_name()) as f:
f.seek(offset)
ck_data = f.read(ck_size or trans.app.config.display_chunk_size)
if ck_data and ck_data[-1] != "\n":
cursor = f.read(1)
while cursor and cursor != "\n":
ck_data += cursor
try:
ck_data = f.read(ck_size or trans.app.config.display_chunk_size)
if ck_data and ck_data[-1] != "\n":
cursor = f.read(1)
while cursor and cursor != "\n":
ck_data += cursor
cursor = f.read(1)
except UnicodeDecodeError:
raise InvalidFileFormatError("Dataset appears to contain binary data, cannot display.")
last_read = f.tell()
return ck_data, last_read

Expand Down

0 comments on commit 0beacf5

Please sign in to comment.