Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

faster uploads by avoiding unneed large blob reads #166

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions news/167.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Adjusted schema validation to prevent file/image loading during upload/import
19 changes: 17 additions & 2 deletions plone/namedfile/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from plone.namedfile.interfaces import INamedFileField
from plone.namedfile.interfaces import INamedImage
from plone.namedfile.interfaces import INamedImageField
from plone.namedfile.interfaces import INamedTyped
from plone.namedfile.interfaces import IPluggableFileFieldValidation
from plone.namedfile.interfaces import IPluggableImageFieldValidation
from plone.namedfile.utils import get_contenttype
Expand Down Expand Up @@ -138,7 +139,16 @@ class NamedBlobFile(NamedField):
accept = ()

def validate(self, value):
super().validate(value, IPluggableFileFieldValidation)
# Bit of a hack but we avoid loading the .data into memory
# because schema validation checks the property exists
# which loads the entire file into memory without checking the data.
# This can slow down imports and uploads a lot.
# TODO: mighe be better fixed in zope.schema - https://github.com/zopefoundation/zope.schema/issues/127
self.schema = INamedTyped
try:
super().validate(value, IPluggableFileFieldValidation)
finally:
self.schema = INamedBlobFile


@implementer(INamedBlobImageField)
Expand All @@ -150,4 +160,9 @@ class NamedBlobImage(NamedField):
accept = ("image/*",)

def validate(self, value):
super().validate(value, IPluggableImageFieldValidation)
# see NamedBlobFile comment
self.schema = INamedTyped
try:
super().validate(value, IPluggableImageFieldValidation)
finally:
self.schema = INamedBlobImage
5 changes: 3 additions & 2 deletions plone/namedfile/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,8 @@ def __init__(self, data=b"", contentType="", filename=None):
# Allow override of the image sniffer
if contentType:
self.contentType = contentType
exif_data = get_exif(self.data)
with self.open("r") as fp:
exif_data = get_exif(fp, self.contentType, self._width, self._height)
if exif_data is not None:
log.debug(
"Image contains Exif Informations. "
Expand Down Expand Up @@ -428,7 +429,7 @@ def _setData(self, data):
res = getImageInfo(firstbytes)
contentType, self._width, self._height = res
if contentType:
self.contentType = contentType
self.contentType = contentType

data = property(NamedBlobFile._getData, _setData)

Expand Down
12 changes: 9 additions & 3 deletions plone/namedfile/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
HAVE_BLOBS = True


class IFile(Interface):
class ITyped(Interface):

contentType = schema.NativeStringLine(
title="Content Type",
Expand All @@ -21,6 +21,9 @@ class IFile(Interface):
missing_value="",
)


class IFile(Interface):

data = schema.Bytes(
title="Data",
description="The actual content of the object.",
Expand Down Expand Up @@ -83,12 +86,15 @@ class INamed(Interface):

filename = schema.TextLine(title="Filename", required=False, default=None)

class INamedTyped(INamed, ITyped):
"""An item with a filename and contentType"""


class INamedFile(INamed, IFile):
class INamedFile(INamedTyped, IFile):
"""A non-BLOB file with a filename"""


class INamedImage(INamed, IImage):
class INamedImage(INamedTyped, IImage):
"""A non-BLOB image with a filename"""


Expand Down
61 changes: 61 additions & 0 deletions plone/namedfile/tests/test_storable.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,17 @@
from OFS.Image import Pdata
from plone.namedfile.file import FileChunk
from plone.namedfile.file import NamedBlobImage
from plone.namedfile.file import NamedBlobFile
from plone.namedfile import field
from plone.namedfile.testing import PLONE_NAMEDFILE_FUNCTIONAL_TESTING
from plone.namedfile.tests import getFile

import os
import tempfile
import unittest
from unittest.mock import patch
from ZODB.blob import Blob
from ZODB.blob import BlobFile


class TestStorable(unittest.TestCase):
Expand Down Expand Up @@ -56,3 +61,59 @@ def test_opened_file_storable(self):
if os.path.exists(path):
os.remove(path)
self.assertEqual(303, fi.getSize())

def test_upload_no_read(self):
# ensure we don't read the whole file into memory

old_open = Blob.open
blob_read = 0
blob_write = 0
old_read = BlobFile.read
read_bytes = 0

def count_open(self, mode="r"):
nonlocal blob_read, blob_write
blob_read += 1 if "r" in mode else 0
blob_write += 1 if "w" in mode else 0
return old_open(self, mode)

def count_reads(self, size=-1):
nonlocal read_bytes
res = old_read(self, size)
read_bytes += len(res)
return res

with patch.object(Blob, 'open', count_open), patch.object(BlobFile, 'read', count_reads):
data = getFile("image.jpg")
f = tempfile.NamedTemporaryFile(delete=False)
try:
path = f.name
f.write(data)
f.close()
with open(path, "rb") as f:
fi = NamedBlobImage(f, filename="image.gif")
finally:
if os.path.exists(path):
os.remove(path)
self.assertEqual(3641, fi.getSize())
self.assertIn('Exif', fi.exif)
self.assertEqual(500, fi._width)
self.assertEqual(200, fi._height)
self.assertLess(read_bytes, fi.getSize(), "Images should not need to read all data to get exif, dimensions")
self.assertEqual(blob_read, 3, "blob opening for getsize, get_exif and getImageInfo only")
self.assertEqual(
blob_write,
1,
"Slow write to blob instead of os rename. Should be only 1 on __init__ to create empty file",
)

# Test validation which should also not read all the data.
blob_read = read_bytes = 0

blob_field = field.NamedBlobImage()
blob_field.validate(fi)
blob_field = field.NamedBlobFile() # Image is subclass of file
blob_field.validate(fi)

self.assertEqual(blob_read, 0, "Validation is reading the whole blob in memory")
self.assertEqual(read_bytes, 0, "Validation is reading the whole blob in memory")
11 changes: 7 additions & 4 deletions plone/namedfile/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,17 +227,20 @@ def getImageInfo(data):
return content_type, width, height


def get_exif(image):
def get_exif(image, content_type=None, width=None, height=None):
#
exif_data = None
image_data = _ensure_data(image)

content_type, width, height = getImageInfo(image_data)
if None in (content_type, width, height):
# if we already got the image info don't read the while file into memory
image = _ensure_data(image)
content_type, width, height = getImageInfo(image)
if content_type in ["image/jpeg", "image/tiff"]:
# Only this two Image Types could have Exif informations
# see http://www.cipa.jp/std/documents/e/DC-008-2012_E.pdf
try:
exif_data = piexif.load(image_data)
# if possible pass filename in instead to prevent reading all data into memory
exif_data = piexif.load(image.name if getattr(image, "name") else _ensure_data(image))
except Exception as e:
# TODO: determ wich error really happens
# Should happen if data is to short --> first_bytes
Expand Down
Loading