Skip to content

Commit

Permalink
Merge pull request #719 from Amsterdam/feature/sig-3510-extra-filetyp…
Browse files Browse the repository at this point in the history
…e-check

SIG-3510 add image filetype check to Sigmax PDF rendering
  • Loading branch information
vanbuiten authored Feb 19, 2021
2 parents bfb4476 + dd0ce22 commit 1011077
Show file tree
Hide file tree
Showing 8 changed files with 123 additions and 169 deletions.
13 changes: 4 additions & 9 deletions api/app/signals/apps/api/templates/api/pdf/print_signal.html
Original file line number Diff line number Diff line change
Expand Up @@ -180,15 +180,10 @@ <h2>Melder</h2>

<div class="divider">&nbsp;</div>
<h2>Foto's</h2>
{% if images %}
{% for image in images %}
<p><img src="{{ image.file.url }}" style="width:680px" alt=""></p>
{% endfor %}
<br>
{% elif jpg_data_urls %} {# HOTFIX SIG-1473 #}
{% for data_url in jpg_data_urls %}
{% if data_url %}
<p><img src="{{ data_url|safe }}" style="width:680px" alt=""></p>
{% if jpg_data_uris %}
{% for data_uri in jpg_data_uris %}
{% if data_uri %}
<p><img src="{{ data_uri|safe }}" style="width:680px" alt=""></p>
{% else %}
<p>Image not available</p>
{% endif %}
Expand Down
59 changes: 3 additions & 56 deletions api/app/signals/apps/api/v1/views/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,12 @@
from django.conf import settings
from django.contrib.staticfiles import finders
from django.core.exceptions import SuspiciousFileOperation
from django.core.files.storage import default_storage
from django.utils import timezone
from django.views.generic.detail import SingleObjectMixin
from PIL import Image, UnidentifiedImageError

from signals.apps.api.generics.permissions import SIAPermissions, SignalViewObjectPermission
from signals.apps.api.pdf.views import PDFTemplateView # TODO: move these
from signals.apps.services.domain.images import DataUriImageEncodeService
from signals.apps.signals.models import Signal
from signals.apps.signals.utils.map import MapGenerator
from signals.auth.backend import JWTAuthBackend
Expand Down Expand Up @@ -81,58 +80,6 @@ def get_object(self):
self.check_object_permissions(request=self.request, obj=obj)
return obj

def _resize(self, image):
# Consider image orientation:
if image.width > image.height:
# landscape
width = self.max_size
height = int((self.max_size / image.width) * image.height)
else:
# portrait
width = int((self.max_size / image.height) * image.width)
height = self.max_size

return image.resize(size=(width, height), resample=Image.LANCZOS).convert('RGB')

def _get_context_data_images(self, signal):
jpg_data_urls = []
for att in signal.attachments.all():
# Attachment is_image property is currently not reliable
_, ext = os.path.splitext(att.file.name)
if ext not in ['.gif', '.jpg', '.jpeg', '.png']:
continue # unsupported image format, or not image format

# Since we want a PDF to be output, we catch, log and ignore errors
# while opening attachments. A missing image is not as bad as a
# complete failure to render the requested PDF.
with io.BytesIO() as buffer:
try:
with default_storage.open(att.file.name) as file:
buffer.write(file.read())
image = Image.open(buffer)
except UnidentifiedImageError:
# PIL cannot open the attached file it is probably not an image.
msg = f'Cannot open image attachment pk={att.pk}'
logger.warning(msg)
continue
except: # noqa:E722
# Attachment cannot be opened - log the exception.
msg = f'Cannot open image attachment pk={att.pk}'
logger.warning(msg, exc_info=True)
continue

if image.width > self.max_size or image.height > self.max_size:
image = self._resize(image)

with io.BytesIO() as new_buffer:
new_buffer = io.BytesIO()
image.save(new_buffer, format='JPEG')
encoded = f'data:image/jpg;base64,{base64.b64encode(new_buffer.getvalue()).decode("utf-8")}'

jpg_data_urls.append(encoded)

return jpg_data_urls

def get_context_data(self, **kwargs):
self.object = self.get_object()
logo_src = _get_data_uri(settings.API_PDF_LOGO_STATIC_FILE)
Expand Down Expand Up @@ -160,12 +107,12 @@ def get_context_data(self, **kwargs):
rd_coordinates.x + 340.00,
rd_coordinates.y + 125.00,
)
jpg_data_urls = self._get_context_data_images(self.object)
jpg_data_uris = DataUriImageEncodeService.get_context_data_images(self.object, self.max_size)

return super(GeneratePdfView, self).get_context_data(
bbox=bbox,
img_data_uri=img_data_uri,
jpg_data_urls=jpg_data_urls,
jpg_data_uris=jpg_data_uris,
user=self.request.user,
logo_src=logo_src,
)
62 changes: 62 additions & 0 deletions api/app/signals/apps/services/domain/images.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import base64
import io
import logging
import os

from django.core.files.storage import default_storage
from PIL import Image, UnidentifiedImageError

logger = logging.getLogger(__name__)


class DataUriImageEncodeService:
@staticmethod
def resize(image, max_size):
# Consider image orientation:
if image.width > image.height:
# landscape
width = max_size
height = int((max_size / image.width) * image.height)
else:
# portrait
width = int((max_size / image.height) * image.width)
height = max_size

return image.resize(size=(width, height), resample=Image.LANCZOS).convert('RGB')

@staticmethod
def get_context_data_images(signal, max_size):
jpg_data_uris = []
for att in signal.attachments.all():
# Attachment is_image property is currently not reliable
_, ext = os.path.splitext(att.file.name)
if ext.lower() not in ['.gif', '.jpg', '.jpeg', '.png']:
continue # unsupported image format, or not image format

with io.BytesIO() as buffer:
try:
with default_storage.open(att.file.name) as file:
buffer.write(file.read())
image = Image.open(buffer)
except UnidentifiedImageError:
# PIL cannot open the attached file it is probably not an image.
msg = f'Cannot open image attachment pk={att.pk}'
logger.warning(msg)
continue
except: # noqa:E722
# Attachment cannot be opened - log the exception.
msg = f'Cannot open image attachment pk={att.pk}'
logger.warning(msg, exc_info=True)
continue

if image.width > max_size or image.height > max_size:
image = DataUriImageEncodeService.resize(image, max_size)

with io.BytesIO() as new_buffer:
new_buffer = io.BytesIO()
image.save(new_buffer, format='JPEG')
encoded = f'data:image/jpg;base64,{base64.b64encode(new_buffer.getvalue()).decode("utf-8")}'

jpg_data_uris.append(encoded)

return jpg_data_uris
51 changes: 4 additions & 47 deletions api/app/signals/apps/sigmax/stuf_protocol/outgoing/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,59 +3,20 @@
"""
import base64
import logging
from io import BytesIO

import requests
import weasyprint
from django.template.loader import render_to_string
from django.utils import timezone
from PIL import Image
from requests.exceptions import RequestException

from signals.apps.signals.models import Attachment, Signal
from signals.apps.services.domain.images import DataUriImageEncodeService
from signals.apps.signals.models import Signal

# Because weasyprint can produce a lot of warnings (unsupported
# CSS etc.) we ignore them.
logging.getLogger('weasyprint').setLevel(100)
logger = logging.getLogger(__name__)


def _get_jpg_data_url(attachment: Attachment):
"""
Download image, resize it, base 64 encode it, and finally create a image data URL from it.
"""
# HOTFIX for SIG-1473
# - Weazyprint JPG support assumes GDK Bixbuf - https://github.com/Kozea/WeasyPrint/issues/428)
# - long term solution must use a image resizing micro-service

MAX_SIZE = 800 # width and height no larger than this value

try:
data = BytesIO(requests.get(attachment.file.url).content) # try/except network stuff
except RequestException:
logger.warning('Could not access: {} for resizing.'.format(attachment.file.url))
return None

image = Image.open(data)

# Consider image orientation:
if image.width > image.height:
# landscape
width = MAX_SIZE
height = int((MAX_SIZE / image.width) * image.height)
else:
# portrait
width = int((MAX_SIZE / image.height) * image.width)
height = MAX_SIZE

resized = image.resize(size=(width, height), resample=Image.LANCZOS).convert('RGB')

buffer = BytesIO()
resized.save(buffer, format='JPEG')

return 'data:image/jpg;base64,' + base64.b64encode(buffer.getvalue()).decode('utf-8')


def _render_html(signal: Signal):
"""Render given `Signal` with HTML template used for PDF generation.
Expand All @@ -70,18 +31,14 @@ def _render_html(signal: Signal):
rd_coordinates.y + 125.00,
)
# HOTFIX for SIG-1473
jpg_data_urls = []
for attachment in signal.attachments.all():
data_url = _get_jpg_data_url(attachment)
jpg_data_urls.append(data_url)
assert data_url is None or data_url.startswith('data:image/jpg')
jpg_data_uris = DataUriImageEncodeService.get_context_data_images(signal, 800)

context = {
'signal': signal,
'now': timezone.datetime.now(),
'bbox': bbox,
'user': None,
'jpg_data_urls': jpg_data_urls,
'jpg_data_uris': jpg_data_uris,
}
return render_to_string('api/pdf/print_signal.html', context=context)

Expand Down
59 changes: 2 additions & 57 deletions api/app/tests/apps/api/v1/test_pdf.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,7 @@
from io import BytesIO
from unittest.mock import MagicMock

from django.contrib.auth.models import Permission
from django.test import override_settings
from PIL import Image

from signals.apps.api.v1.views import GeneratePdfView
from signals.apps.signals.factories import (
AttachmentFactory,
CategoryFactory,
DepartmentFactory,
SignalFactory
)

from signals.apps.signals.factories import CategoryFactory, DepartmentFactory, SignalFactory
from tests.test import SIAReadWriteUserMixin, SignalsBaseApiTestCase


Expand Down Expand Up @@ -46,51 +36,6 @@ def test_get_pdf_signal_not_loggedin(self):

self.assertEqual(response.status_code, 401)

@override_settings(API_PDF_RESIZE_IMAGES_TO=800)
def test_resize_image_too_wide(self):
too_wide = MagicMock()
too_wide.width = 1600
too_wide.height = 800
gpv = GeneratePdfView()
gpv._resize(too_wide)

too_wide.resize.assert_called_with(size=(800, 400), resample=Image.LANCZOS)

@override_settings(API_PDF_RESIZE_IMAGES_TO=800)
def test_resize_iamge_too_heigh(self):
too_heigh = MagicMock()
too_heigh.width = 800
too_heigh.height = 1600
gpv = GeneratePdfView()
gpv._resize(too_heigh)

too_heigh.resize.assert_called_with(size=(400, 800), resample=Image.LANCZOS)

def test_get_context_data_no_images(self):
AttachmentFactory(_signal=self.signal, file__filename='blah.txt', file__data=b'blah', is_image=False)
gpv = GeneratePdfView()
jpg_data_urls = gpv._get_context_data_images(self.signal)
self.assertEqual(len(jpg_data_urls), 0)

def test_get_context_data_invalid_images(self):
AttachmentFactory.create(_signal=self.signal, file__filename='blah.jpg', file__data=b'blah', is_image=True)
gpv = GeneratePdfView()
jpg_data_urls = gpv._get_context_data_images(self.signal)
self.assertEqual(len(jpg_data_urls), 0)

@override_settings(API_PDF_RESIZE_IMAGES_TO=80)
def test_get_context_data_valid_image(self):
image = Image.new("RGB", (100, 100), (0, 0, 0))
buffer = BytesIO()
image.save(buffer, format='JPEG')

AttachmentFactory.create(_signal=self.signal, file__filename='blah.jpg', file__data=buffer.getvalue())
gpv = GeneratePdfView()
jpg_data_urls = gpv._get_context_data_images(self.signal)
self.assertEqual(len(jpg_data_urls), 1)
self.assertEqual(jpg_data_urls[0][:22], 'data:image/jpg;base64,')
self.assertGreater(len(jpg_data_urls[0]), 22)


class TestPDFPermissions(SIAReadWriteUserMixin, SignalsBaseApiTestCase):
# Accessing PDFs must follow the same access rules as the signals.
Expand Down
Empty file.
Empty file.
48 changes: 48 additions & 0 deletions api/app/tests/apps/services/domain/images.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from io import BytesIO
from unittest.mock import MagicMock

from PIL import Image

from signals.apps.services.images import DataUriImageEncodeService
from signals.apps.signals.factories import AttachmentFactory, SignalFactory
from tests.test import SIAReadWriteUserMixin, SignalsBaseApiTestCase


class TestImagesService(SIAReadWriteUserMixin, SignalsBaseApiTestCase):
def setUp(self):
self.signal = SignalFactory.create()

def test_resize_image_too_wide(self):
too_wide = MagicMock()
too_wide.width = 1600
too_wide.height = 800
DataUriImageEncodeService.resize(too_wide, 800)
too_wide.resize.assert_called_with(size=(800, 400), resample=Image.LANCZOS)

def test_resize_iamge_too_heigh(self):
too_heigh = MagicMock()
too_heigh.width = 800
too_heigh.height = 1600
DataUriImageEncodeService.resize(too_heigh, 800)
too_heigh.resize.assert_called_with(size=(400, 800), resample=Image.LANCZOS)

def test_get_context_data_no_images(self):
AttachmentFactory(_signal=self.signal, file__filename='blah.txt', file__data=b'blah', is_image=False)
jpg_data_uris = DataUriImageEncodeService.get_context_data_images(self.signal, 800)
self.assertEqual(len(jpg_data_uris), 0)

def test_get_context_data_invalid_images(self):
AttachmentFactory.create(_signal=self.signal, file__filename='blah.jpg', file__data=b'blah', is_image=True)
jpg_data_uris = DataUriImageEncodeService.get_context_data_images(self.signal, 800)
self.assertEqual(len(jpg_data_uris), 0)

def test_get_context_data_valid_image(self):
image = Image.new("RGB", (100, 100), (0, 0, 0))
buffer = BytesIO()
image.save(buffer, format='JPEG')

AttachmentFactory.create(_signal=self.signal, file__filename='blah.jpg', file__data=buffer.getvalue())
jpg_data_uris = DataUriImageEncodeService.get_context_data_images(self.signal, 80)
self.assertEqual(len(jpg_data_uris), 1)
self.assertEqual(jpg_data_uris[0][:22], 'data:image/jpg;base64,')
self.assertGreater(len(jpg_data_uris[0]), 22)

0 comments on commit 1011077

Please sign in to comment.