Merge pull request #719 from Amsterdam/feature/sig-3510-extra-filetyp…

…e-check SIG-3510 add image filetype check to Sigmax PDF rendering
Amsterdam · Feb 19, 2021 · 1011077 · 1011077
2 parents bfb4476 + dd0ce22
commit 1011077
Show file tree

Hide file tree

Showing 8 changed files with 123 additions and 169 deletions.
diff --git a/api/app/signals/apps/api/templates/api/pdf/print_signal.html b/api/app/signals/apps/api/templates/api/pdf/print_signal.html
@@ -180,15 +180,10 @@ <h2>Melder</h2>
 
     <div class="divider">&nbsp;</div>
     <h2>Foto's</h2>
-    {% if images %}
-        {% for image in images %}
-            <p><img src="{{ image.file.url }}" style="width:680px" alt=""></p>
-        {% endfor %}
-        <br>
-    {% elif jpg_data_urls %}  {# HOTFIX SIG-1473 #}
-        {% for data_url in jpg_data_urls %}
-            {% if data_url %}
-                <p><img src="{{ data_url|safe }}" style="width:680px" alt=""></p>
+    {% if jpg_data_uris %}
+        {% for data_uri in jpg_data_uris %}
+            {% if data_uri %}
+                <p><img src="{{ data_uri|safe }}" style="width:680px" alt=""></p>
             {% else %}
                 <p>Image not available</p>
             {% endif %}

diff --git a/api/app/signals/apps/api/v1/views/pdf.py b/api/app/signals/apps/api/v1/views/pdf.py
@@ -6,13 +6,12 @@
 from django.conf import settings
 from django.contrib.staticfiles import finders
 from django.core.exceptions import SuspiciousFileOperation
-from django.core.files.storage import default_storage
 from django.utils import timezone
 from django.views.generic.detail import SingleObjectMixin
-from PIL import Image, UnidentifiedImageError
 
 from signals.apps.api.generics.permissions import SIAPermissions, SignalViewObjectPermission
 from signals.apps.api.pdf.views import PDFTemplateView  # TODO: move these
+from signals.apps.services.domain.images import DataUriImageEncodeService
 from signals.apps.signals.models import Signal
 from signals.apps.signals.utils.map import MapGenerator
 from signals.auth.backend import JWTAuthBackend
@@ -81,58 +80,6 @@ def get_object(self):
         self.check_object_permissions(request=self.request, obj=obj)
         return obj
 
-    def _resize(self, image):
-        # Consider image orientation:
-        if image.width > image.height:
-            # landscape
-            width = self.max_size
-            height = int((self.max_size / image.width) * image.height)
-        else:
-            # portrait
-            width = int((self.max_size / image.height) * image.width)
-            height = self.max_size
-
-        return image.resize(size=(width, height), resample=Image.LANCZOS).convert('RGB')
-
-    def _get_context_data_images(self, signal):
-        jpg_data_urls = []
-        for att in signal.attachments.all():
-            # Attachment is_image property is currently not reliable
-            _, ext = os.path.splitext(att.file.name)
-            if ext not in ['.gif', '.jpg', '.jpeg', '.png']:
-                continue  # unsupported image format, or not image format
-
-            # Since we want a PDF to be output, we catch, log and ignore errors
-            # while opening attachments. A missing image is not as bad as a
-            # complete failure to render the requested PDF.
-            with io.BytesIO() as buffer:
-                try:
-                    with default_storage.open(att.file.name) as file:
-                        buffer.write(file.read())
-                        image = Image.open(buffer)
-                except UnidentifiedImageError:
-                    # PIL cannot open the attached file it is probably not an image.
-                    msg = f'Cannot open image attachment pk={att.pk}'
-                    logger.warning(msg)
-                    continue
-                except:  # noqa:E722
-                    # Attachment cannot be opened - log the exception.
-                    msg = f'Cannot open image attachment pk={att.pk}'
-                    logger.warning(msg, exc_info=True)
-                    continue
-
-                if image.width > self.max_size or image.height > self.max_size:
-                    image = self._resize(image)
-
-                with io.BytesIO() as new_buffer:
-                    new_buffer = io.BytesIO()
-                    image.save(new_buffer, format='JPEG')
-                    encoded = f'data:image/jpg;base64,{base64.b64encode(new_buffer.getvalue()).decode("utf-8")}'
-
-            jpg_data_urls.append(encoded)
-
-        return jpg_data_urls
-
     def get_context_data(self, **kwargs):
         self.object = self.get_object()
         logo_src = _get_data_uri(settings.API_PDF_LOGO_STATIC_FILE)
@@ -160,12 +107,12 @@ def get_context_data(self, **kwargs):
                 rd_coordinates.x + 340.00,
                 rd_coordinates.y + 125.00,
             )
-        jpg_data_urls = self._get_context_data_images(self.object)
+        jpg_data_uris = DataUriImageEncodeService.get_context_data_images(self.object, self.max_size)
 
         return super(GeneratePdfView, self).get_context_data(
             bbox=bbox,
             img_data_uri=img_data_uri,
-            jpg_data_urls=jpg_data_urls,
+            jpg_data_uris=jpg_data_uris,
             user=self.request.user,
             logo_src=logo_src,
         )
diff --git a/api/app/signals/apps/services/domain/images.py b/api/app/signals/apps/services/domain/images.py
@@ -0,0 +1,62 @@
+import base64
+import io
+import logging
+import os
+
+from django.core.files.storage import default_storage
+from PIL import Image, UnidentifiedImageError
+
+logger = logging.getLogger(__name__)
+
+
+class DataUriImageEncodeService:
+    @staticmethod
+    def resize(image, max_size):
+        # Consider image orientation:
+        if image.width > image.height:
+            # landscape
+            width = max_size
+            height = int((max_size / image.width) * image.height)
+        else:
+            # portrait
+            width = int((max_size / image.height) * image.width)
+            height = max_size
+
+        return image.resize(size=(width, height), resample=Image.LANCZOS).convert('RGB')
+
+    @staticmethod
+    def get_context_data_images(signal, max_size):
+        jpg_data_uris = []
+        for att in signal.attachments.all():
+            # Attachment is_image property is currently not reliable
+            _, ext = os.path.splitext(att.file.name)
+            if ext.lower() not in ['.gif', '.jpg', '.jpeg', '.png']:
+                continue  # unsupported image format, or not image format
+
+            with io.BytesIO() as buffer:
+                try:
+                    with default_storage.open(att.file.name) as file:
+                        buffer.write(file.read())
+                        image = Image.open(buffer)
+                except UnidentifiedImageError:
+                    # PIL cannot open the attached file it is probably not an image.
+                    msg = f'Cannot open image attachment pk={att.pk}'
+                    logger.warning(msg)
+                    continue
+                except:  # noqa:E722
+                    # Attachment cannot be opened - log the exception.
+                    msg = f'Cannot open image attachment pk={att.pk}'
+                    logger.warning(msg, exc_info=True)
+                    continue
+
+                if image.width > max_size or image.height > max_size:
+                    image = DataUriImageEncodeService.resize(image, max_size)
+
+                with io.BytesIO() as new_buffer:
+                    new_buffer = io.BytesIO()
+                    image.save(new_buffer, format='JPEG')
+                    encoded = f'data:image/jpg;base64,{base64.b64encode(new_buffer.getvalue()).decode("utf-8")}'
+
+            jpg_data_uris.append(encoded)
+
+        return jpg_data_uris
diff --git a/api/app/signals/apps/sigmax/stuf_protocol/outgoing/pdf.py b/api/app/signals/apps/sigmax/stuf_protocol/outgoing/pdf.py
@@ -3,59 +3,20 @@
 """
 import base64
 import logging
-from io import BytesIO
 
-import requests
 import weasyprint
 from django.template.loader import render_to_string
 from django.utils import timezone
-from PIL import Image
-from requests.exceptions import RequestException
 
-from signals.apps.signals.models import Attachment, Signal
+from signals.apps.services.domain.images import DataUriImageEncodeService
+from signals.apps.signals.models import Signal
 
 # Because weasyprint can produce a lot of warnings (unsupported
 # CSS etc.) we ignore them.
 logging.getLogger('weasyprint').setLevel(100)
 logger = logging.getLogger(__name__)
 
 
-def _get_jpg_data_url(attachment: Attachment):
-    """
-    Download image, resize it, base 64 encode it, and finally create a image data URL from it.
-    """
-    # HOTFIX for SIG-1473
-    # - Weazyprint JPG support assumes GDK Bixbuf - https://github.com/Kozea/WeasyPrint/issues/428)
-    # - long term solution must use a image resizing micro-service
-
-    MAX_SIZE = 800  # width and height no larger than this value
-
-    try:
-        data = BytesIO(requests.get(attachment.file.url).content)  # try/except network stuff
-    except RequestException:
-        logger.warning('Could not access: {} for resizing.'.format(attachment.file.url))
-        return None
-
-    image = Image.open(data)
-
-    # Consider image orientation:
-    if image.width > image.height:
-        # landscape
-        width = MAX_SIZE
-        height = int((MAX_SIZE / image.width) * image.height)
-    else:
-        # portrait
-        width = int((MAX_SIZE / image.height) * image.width)
-        height = MAX_SIZE
-
-    resized = image.resize(size=(width, height), resample=Image.LANCZOS).convert('RGB')
-
-    buffer = BytesIO()
-    resized.save(buffer, format='JPEG')
-
-    return 'data:image/jpg;base64,' + base64.b64encode(buffer.getvalue()).decode('utf-8')
-
-
 def _render_html(signal: Signal):
     """Render given `Signal` with HTML template used for PDF generation.
 
@@ -70,18 +31,14 @@ def _render_html(signal: Signal):
         rd_coordinates.y + 125.00,
     )
     # HOTFIX for SIG-1473
-    jpg_data_urls = []
-    for attachment in signal.attachments.all():
-        data_url = _get_jpg_data_url(attachment)
-        jpg_data_urls.append(data_url)
-        assert data_url is None or data_url.startswith('data:image/jpg')
+    jpg_data_uris = DataUriImageEncodeService.get_context_data_images(signal, 800)
 
     context = {
         'signal': signal,
         'now': timezone.datetime.now(),
         'bbox': bbox,
         'user': None,
-        'jpg_data_urls': jpg_data_urls,
+        'jpg_data_uris': jpg_data_uris,
     }
     return render_to_string('api/pdf/print_signal.html', context=context)
 

diff --git a/api/app/tests/apps/api/v1/test_pdf.py b/api/app/tests/apps/api/v1/test_pdf.py
@@ -1,17 +1,7 @@
-from io import BytesIO
-from unittest.mock import MagicMock
 
 from django.contrib.auth.models import Permission
-from django.test import override_settings
-from PIL import Image
-
-from signals.apps.api.v1.views import GeneratePdfView
-from signals.apps.signals.factories import (
-    AttachmentFactory,
-    CategoryFactory,
-    DepartmentFactory,
-    SignalFactory
-)
+
+from signals.apps.signals.factories import CategoryFactory, DepartmentFactory, SignalFactory
 from tests.test import SIAReadWriteUserMixin, SignalsBaseApiTestCase
 
 
@@ -46,51 +36,6 @@ def test_get_pdf_signal_not_loggedin(self):
 
         self.assertEqual(response.status_code, 401)
 
-    @override_settings(API_PDF_RESIZE_IMAGES_TO=800)
-    def test_resize_image_too_wide(self):
-        too_wide = MagicMock()
-        too_wide.width = 1600
-        too_wide.height = 800
-        gpv = GeneratePdfView()
-        gpv._resize(too_wide)
-
-        too_wide.resize.assert_called_with(size=(800, 400), resample=Image.LANCZOS)
-
-    @override_settings(API_PDF_RESIZE_IMAGES_TO=800)
-    def test_resize_iamge_too_heigh(self):
-        too_heigh = MagicMock()
-        too_heigh.width = 800
-        too_heigh.height = 1600
-        gpv = GeneratePdfView()
-        gpv._resize(too_heigh)
-
-        too_heigh.resize.assert_called_with(size=(400, 800), resample=Image.LANCZOS)
-
-    def test_get_context_data_no_images(self):
-        AttachmentFactory(_signal=self.signal, file__filename='blah.txt', file__data=b'blah', is_image=False)
-        gpv = GeneratePdfView()
-        jpg_data_urls = gpv._get_context_data_images(self.signal)
-        self.assertEqual(len(jpg_data_urls), 0)
-
-    def test_get_context_data_invalid_images(self):
-        AttachmentFactory.create(_signal=self.signal, file__filename='blah.jpg', file__data=b'blah', is_image=True)
-        gpv = GeneratePdfView()
-        jpg_data_urls = gpv._get_context_data_images(self.signal)
-        self.assertEqual(len(jpg_data_urls), 0)
-
-    @override_settings(API_PDF_RESIZE_IMAGES_TO=80)
-    def test_get_context_data_valid_image(self):
-        image = Image.new("RGB", (100, 100), (0, 0, 0))
-        buffer = BytesIO()
-        image.save(buffer, format='JPEG')
-
-        AttachmentFactory.create(_signal=self.signal, file__filename='blah.jpg', file__data=buffer.getvalue())
-        gpv = GeneratePdfView()
-        jpg_data_urls = gpv._get_context_data_images(self.signal)
-        self.assertEqual(len(jpg_data_urls), 1)
-        self.assertEqual(jpg_data_urls[0][:22], 'data:image/jpg;base64,')
-        self.assertGreater(len(jpg_data_urls[0]), 22)
-
 
 class TestPDFPermissions(SIAReadWriteUserMixin, SignalsBaseApiTestCase):
     # Accessing PDFs must follow the same access rules as the signals.

diff --git a/api/app/tests/apps/services/__init__.py b/api/app/tests/apps/services/__init__.py
diff --git a/api/app/tests/apps/services/domain/__init__.py b/api/app/tests/apps/services/domain/__init__.py
diff --git a/api/app/tests/apps/services/domain/images.py b/api/app/tests/apps/services/domain/images.py
@@ -0,0 +1,48 @@
+from io import BytesIO
+from unittest.mock import MagicMock
+
+from PIL import Image
+
+from signals.apps.services.images import DataUriImageEncodeService
+from signals.apps.signals.factories import AttachmentFactory, SignalFactory
+from tests.test import SIAReadWriteUserMixin, SignalsBaseApiTestCase
+
+
+class TestImagesService(SIAReadWriteUserMixin, SignalsBaseApiTestCase):
+    def setUp(self):
+        self.signal = SignalFactory.create()
+
+    def test_resize_image_too_wide(self):
+        too_wide = MagicMock()
+        too_wide.width = 1600
+        too_wide.height = 800
+        DataUriImageEncodeService.resize(too_wide, 800)
+        too_wide.resize.assert_called_with(size=(800, 400), resample=Image.LANCZOS)
+
+    def test_resize_iamge_too_heigh(self):
+        too_heigh = MagicMock()
+        too_heigh.width = 800
+        too_heigh.height = 1600
+        DataUriImageEncodeService.resize(too_heigh, 800)
+        too_heigh.resize.assert_called_with(size=(400, 800), resample=Image.LANCZOS)
+
+    def test_get_context_data_no_images(self):
+        AttachmentFactory(_signal=self.signal, file__filename='blah.txt', file__data=b'blah', is_image=False)
+        jpg_data_uris = DataUriImageEncodeService.get_context_data_images(self.signal, 800)
+        self.assertEqual(len(jpg_data_uris), 0)
+
+    def test_get_context_data_invalid_images(self):
+        AttachmentFactory.create(_signal=self.signal, file__filename='blah.jpg', file__data=b'blah', is_image=True)
+        jpg_data_uris = DataUriImageEncodeService.get_context_data_images(self.signal, 800)
+        self.assertEqual(len(jpg_data_uris), 0)
+
+    def test_get_context_data_valid_image(self):
+        image = Image.new("RGB", (100, 100), (0, 0, 0))
+        buffer = BytesIO()
+        image.save(buffer, format='JPEG')
+
+        AttachmentFactory.create(_signal=self.signal, file__filename='blah.jpg', file__data=buffer.getvalue())
+        jpg_data_uris = DataUriImageEncodeService.get_context_data_images(self.signal, 80)
+        self.assertEqual(len(jpg_data_uris), 1)
+        self.assertEqual(jpg_data_uris[0][:22], 'data:image/jpg;base64,')
+        self.assertGreater(len(jpg_data_uris[0]), 22)