From 298355cf82cd29f360ec63741cf57e32cc6a5742 Mon Sep 17 00:00:00 2001 From: Sumner Evans Date: Fri, 28 Apr 2023 08:07:30 -0600 Subject: [PATCH] media/{download,thumbnail}: support timeout_ms parameter Signed-off-by: Sumner Evans --- synapse/api/errors.py | 1 + synapse/media/_base.py | 6 ++ synapse/media/media_repository.py | 98 ++++++++++++++++++++---- synapse/rest/media/download_resource.py | 27 +++++-- synapse/rest/media/thumbnail_resource.py | 65 +++++++++++----- 5 files changed, 154 insertions(+), 43 deletions(-) diff --git a/synapse/api/errors.py b/synapse/api/errors.py index f2d6f9ab2d9e..6961a013ad16 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -80,6 +80,7 @@ class Codes(str, Enum): WEAK_PASSWORD = "M_WEAK_PASSWORD" INVALID_SIGNATURE = "M_INVALID_SIGNATURE" USER_DEACTIVATED = "M_USER_DEACTIVATED" + NOT_YET_UPLOADED = "M_NOT_YET_UPLOADED" # Part of MSC3848 # https://github.com/matrix-org/matrix-spec-proposals/pull/3848 diff --git a/synapse/media/_base.py b/synapse/media/_base.py index ef8334ae2586..93c93998dcc6 100644 --- a/synapse/media/_base.py +++ b/synapse/media/_base.py @@ -50,6 +50,12 @@ "text/xml", ] +# Default timeout_ms for download and thumbnail requests +DEFAULT_MAX_TIMEOUT_MS = 20_000 + +# Maximum allowed timeout_ms for download and thumbnail requests +MAXIMUM_ALLOWED_MAX_TIMEOUT_MS = 60_000 + def parse_media_id(request: Request) -> Tuple[str, str, Optional[str]]: """Parses the server name, media ID and optional file name from the request URI diff --git a/synapse/media/media_repository.py b/synapse/media/media_repository.py index 86ef3b5ec297..1c35b3de48d3 100644 --- a/synapse/media/media_repository.py +++ b/synapse/media/media_repository.py @@ -17,7 +17,7 @@ import os import shutil from io import BytesIO -from typing import IO, TYPE_CHECKING, Dict, List, Optional, Set, Tuple +from typing import IO, TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple from matrix_common.types.mxc_uri import MXCUri @@ -32,8 +32,10 @@ NotFoundError, RequestSendFailed, SynapseError, + cs_error, ) from synapse.config.repository import ThumbnailRequirement +from synapse.http.server import respond_with_json from synapse.http.site import SynapseRequest from synapse.logging.context import defer_to_thread from synapse.media._base import ( @@ -300,8 +302,62 @@ async def create_content( return MXCUri(self.server_name, media_id) + def respond_not_yet_uploaded(self, request: SynapseRequest) -> None: + respond_with_json( + request, + 404, + cs_error("Media has not been uploaded yet", code=Codes.NOT_YET_UPLOADED), + send_cors=True, + ) + + async def get_local_media_info( + self, request: SynapseRequest, media_id: str, max_timeout_ms: int + ) -> Optional[Dict[str, Any]]: + """Gets the info dictionary for given local media ID. If the media has + not been uploaded yet, this function will wait up to ``max_timeout_ms`` + milliseconds for the media to be uploaded. + Args: + request: The incoming request. + media_id: The media ID of the content. (This is the same as + the file_id for local content.) + max_timeout_ms: the maximum number of milliseconds to wait for the + media to be uploaded. + Returns: + Either the info dictionary for the given local media ID or + ``None``. If ``None``, then no further processing is necessary as + this function will send the necessary JSON response. + """ + wait_until = self.clock.time_msec() + max_timeout_ms + while True: + # Get the info for the media + media_info = await self.store.get_local_media(media_id) + if not media_info: + respond_404(request) + return None + + if media_info["quarantined_by"]: + logger.info("Media is quarantined") + respond_404(request) + return None + + # The file has been uploaded, so stop looping + if media_info.get("media_length") is not None: + return media_info + + if self.clock.time_msec() >= wait_until: + break + + await self.clock.sleep(0.5) + + self.respond_not_yet_uploaded(request) + return None + async def get_local_media( - self, request: SynapseRequest, media_id: str, name: Optional[str] + self, + request: SynapseRequest, + media_id: str, + name: Optional[str], + max_timeout_ms: int, ) -> None: """Responds to requests for local media, if exists, or returns 404. @@ -311,13 +367,14 @@ async def get_local_media( the file_id for local content.) name: Optional name that, if specified, will be used as the filename in the Content-Disposition header of the response. + max_timeout_ms: the maximum number of milliseconds to wait for the + media to be uploaded. Returns: Resolves once a response has successfully been written to request """ - media_info = await self.store.get_local_media(media_id) - if not media_info or media_info["quarantined_by"]: - respond_404(request) + media_info = await self.get_local_media_info(request, media_id, max_timeout_ms) + if not media_info: return self.mark_recently_accessed(None, media_id) @@ -342,6 +399,7 @@ async def get_remote_media( server_name: str, media_id: str, name: Optional[str], + max_timeout_ms: int, ) -> None: """Respond to requests for remote media. @@ -351,6 +409,8 @@ async def get_remote_media( media_id: The media ID of the content (as defined by the remote server). name: Optional name that, if specified, will be used as the filename in the Content-Disposition header of the response. + max_timeout_ms: the maximum number of milliseconds to wait for the + media to be uploaded. Returns: Resolves once a response has successfully been written to request @@ -368,11 +428,11 @@ async def get_remote_media( key = (server_name, media_id) async with self.remote_media_linearizer.queue(key): responder, media_info = await self._get_remote_media_impl( - server_name, media_id + server_name, media_id, max_timeout_ms ) # We deliberately stream the file outside the lock - if responder: + if responder and media_info: media_type = media_info["media_type"] media_length = media_info["media_length"] upload_name = name if name else media_info["upload_name"] @@ -380,15 +440,19 @@ async def get_remote_media( request, responder, media_type, media_length, upload_name ) else: - respond_404(request) + self.respond_not_yet_uploaded(request) - async def get_remote_media_info(self, server_name: str, media_id: str) -> dict: + async def get_remote_media_info( + self, server_name: str, media_id: str, max_timeout_ms: int + ) -> dict: """Gets the media info associated with the remote file, downloading if necessary. Args: server_name: Remote server_name where the media originated. media_id: The media ID of the content (as defined by the remote server). + max_timeout_ms: the maximum number of milliseconds to wait for the + media to be uploaded. Returns: The media info of the file @@ -404,7 +468,7 @@ async def get_remote_media_info(self, server_name: str, media_id: str) -> dict: key = (server_name, media_id) async with self.remote_media_linearizer.queue(key): responder, media_info = await self._get_remote_media_impl( - server_name, media_id + server_name, media_id, max_timeout_ms ) # Ensure we actually use the responder so that it releases resources @@ -415,7 +479,7 @@ async def get_remote_media_info(self, server_name: str, media_id: str) -> dict: return media_info async def _get_remote_media_impl( - self, server_name: str, media_id: str + self, server_name: str, media_id: str, max_timeout_ms: int ) -> Tuple[Optional[Responder], dict]: """Looks for media in local cache, if not there then attempt to download from remote server. @@ -424,6 +488,8 @@ async def _get_remote_media_impl( server_name: Remote server_name where the media originated. media_id: The media ID of the content (as defined by the remote server). + max_timeout_ms: the maximum number of milliseconds to wait for the + media to be uploaded. Returns: A tuple of responder and the media info of the file. @@ -454,8 +520,7 @@ async def _get_remote_media_impl( try: media_info = await self._download_remote_file( - server_name, - media_id, + server_name, media_id, max_timeout_ms ) except SynapseError: raise @@ -488,6 +553,7 @@ async def _download_remote_file( self, server_name: str, media_id: str, + max_timeout_ms: int, ) -> dict: """Attempt to download the remote file from the given server name, using the given file_id as the local id. @@ -497,7 +563,8 @@ async def _download_remote_file( media_id: The media ID of the content (as defined by the remote server). This is different than the file_id, which is locally generated. - file_id: Local file ID + max_timeout_ms: the maximum number of milliseconds to wait for the + media to be uploaded. Returns: The media info of the file. @@ -521,7 +588,8 @@ async def _download_remote_file( # tell the remote server to 404 if it doesn't # recognise the server_name, to make sure we don't # end up with a routing loop. - "allow_remote": "false" + "allow_remote": "false", + "timeout_ms": str(max_timeout_ms), }, ) except RequestSendFailed as e: diff --git a/synapse/rest/media/download_resource.py b/synapse/rest/media/download_resource.py index 8f270cf4ccb8..f9fc61aafeda 100644 --- a/synapse/rest/media/download_resource.py +++ b/synapse/rest/media/download_resource.py @@ -20,9 +20,14 @@ set_corp_headers, set_cors_headers, ) -from synapse.http.servlet import parse_boolean +from synapse.http.servlet import parse_boolean, parse_integer from synapse.http.site import SynapseRequest -from synapse.media._base import parse_media_id, respond_404 +from synapse.media._base import ( + DEFAULT_MAX_TIMEOUT_MS, + MAXIMUM_ALLOWED_MAX_TIMEOUT_MS, + parse_media_id, + respond_404, +) if TYPE_CHECKING: from synapse.media.media_repository import MediaRepository @@ -54,13 +59,17 @@ async def _async_render_GET(self, request: SynapseRequest) -> None: ) # Limited non-standard form of CSP for IE11 request.setHeader(b"X-Content-Security-Policy", b"sandbox;") - request.setHeader( - b"Referrer-Policy", - b"no-referrer", - ) + request.setHeader(b"Referrer-Policy", b"no-referrer") server_name, media_id, name = parse_media_id(request) + max_timeout_ms = parse_integer( + request, "timeout_ms", default=DEFAULT_MAX_TIMEOUT_MS + ) + max_timeout_ms = min(max_timeout_ms, MAXIMUM_ALLOWED_MAX_TIMEOUT_MS) + if server_name == self.server_name: - await self.media_repo.get_local_media(request, media_id, name) + await self.media_repo.get_local_media( + request, media_id, name, max_timeout_ms + ) else: allow_remote = parse_boolean(request, "allow_remote", default=True) if not allow_remote: @@ -72,4 +81,6 @@ async def _async_render_GET(self, request: SynapseRequest) -> None: respond_404(request) return - await self.media_repo.get_remote_media(request, server_name, media_id, name) + await self.media_repo.get_remote_media( + request, server_name, media_id, name, max_timeout_ms + ) diff --git a/synapse/rest/media/thumbnail_resource.py b/synapse/rest/media/thumbnail_resource.py index 4ee2a0dbda79..900f6714b3f6 100644 --- a/synapse/rest/media/thumbnail_resource.py +++ b/synapse/rest/media/thumbnail_resource.py @@ -28,6 +28,8 @@ from synapse.http.servlet import parse_integer, parse_string from synapse.http.site import SynapseRequest from synapse.media._base import ( + DEFAULT_MAX_TIMEOUT_MS, + MAXIMUM_ALLOWED_MAX_TIMEOUT_MS, FileInfo, ThumbnailInfo, parse_media_id, @@ -70,26 +72,37 @@ async def _async_render_GET(self, request: SynapseRequest) -> None: method = parse_string(request, "method", "scale") # TODO Parse the Accept header to get an prioritised list of thumbnail types. m_type = "image/png" + max_timeout_ms = parse_integer( + request, "timeout_ms", default=DEFAULT_MAX_TIMEOUT_MS + ) + max_timeout_ms = min(max_timeout_ms, MAXIMUM_ALLOWED_MAX_TIMEOUT_MS) if server_name == self.server_name: if self.dynamic_thumbnails: await self._select_or_generate_local_thumbnail( - request, media_id, width, height, method, m_type + request, media_id, width, height, method, m_type, max_timeout_ms ) else: await self._respond_local_thumbnail( - request, media_id, width, height, method, m_type + request, media_id, width, height, method, m_type, max_timeout_ms ) self.media_repo.mark_recently_accessed(None, media_id) else: - if self.dynamic_thumbnails: - await self._select_or_generate_remote_thumbnail( - request, server_name, media_id, width, height, method, m_type - ) - else: - await self._respond_remote_thumbnail( - request, server_name, media_id, width, height, method, m_type - ) + remote_resp_function = ( + self._select_or_generate_remote_thumbnail + if self.dynamic_thumbnails + else self._respond_remote_thumbnail + ) + await remote_resp_function( + request, + server_name, + media_id, + width, + height, + method, + m_type, + max_timeout_ms, + ) self.media_repo.mark_recently_accessed(server_name, media_id) async def _respond_local_thumbnail( @@ -100,15 +113,12 @@ async def _respond_local_thumbnail( height: int, method: str, m_type: str, + max_timeout_ms: int, ) -> None: - media_info = await self.store.get_local_media(media_id) - + media_info = await self.media_repo.get_local_media_info( + request, media_id, max_timeout_ms + ) if not media_info: - respond_404(request) - return - if media_info["quarantined_by"]: - logger.info("Media is quarantined") - respond_404(request) return thumbnail_infos = await self.store.get_local_media_thumbnails(media_id) @@ -133,8 +143,13 @@ async def _select_or_generate_local_thumbnail( desired_height: int, desired_method: str, desired_type: str, + max_timeout_ms: int, ) -> None: - media_info = await self.store.get_local_media(media_id) + media_info = await self.media_repo.get_local_media_info( + request, media_id, max_timeout_ms + ) + if not media_info: + return if not media_info: respond_404(request) @@ -199,8 +214,13 @@ async def _select_or_generate_remote_thumbnail( desired_height: int, desired_method: str, desired_type: str, + max_timeout_ms: int, ) -> None: - media_info = await self.media_repo.get_remote_media_info(server_name, media_id) + media_info = await self.media_repo.get_remote_media_info( + server_name, media_id, max_timeout_ms + ) + if not media_info: + return thumbnail_infos = await self.store.get_remote_media_thumbnails( server_name, media_id @@ -262,11 +282,16 @@ async def _respond_remote_thumbnail( height: int, method: str, m_type: str, + max_timeout_ms: int, ) -> None: # TODO: Don't download the whole remote file # We should proxy the thumbnail from the remote server instead of # downloading the remote file and generating our own thumbnails. - media_info = await self.media_repo.get_remote_media_info(server_name, media_id) + media_info = await self.media_repo.get_remote_media_info( + server_name, media_id, max_timeout_ms + ) + if not media_info: + return thumbnail_infos = await self.store.get_remote_media_thumbnails( server_name, media_id