From 660c30bb2ba8fc0f151d7b42403e016ac13db449 Mon Sep 17 00:00:00 2001 From: Tim McCormack Date: Mon, 30 Sep 2024 14:09:47 +0000 Subject: [PATCH] refactor: Fix content server methods and tests (finish method lift) Some lingering cleanup from the transition from a middleware to a view. See https://github.com/openedx/edx-platform/issues/34702 for context. - Remove IMPL and self/StaticContentServer references - Add newlines to satisfy code style linter - Fix test references - Update module docstring --- .../contentserver/test/test_contentserver.py | 20 +++---- .../core/djangoapps/contentserver/views.py | 59 ++++++++----------- 2 files changed, 35 insertions(+), 44 deletions(-) diff --git a/openedx/core/djangoapps/contentserver/test/test_contentserver.py b/openedx/core/djangoapps/contentserver/test/test_contentserver.py index e1c5e01c0a7f..02899334eed4 100644 --- a/openedx/core/djangoapps/contentserver/test/test_contentserver.py +++ b/openedx/core/djangoapps/contentserver/test/test_contentserver.py @@ -1,5 +1,5 @@ """ -Tests for StaticContentServer +Tests for content server. """ @@ -27,7 +27,7 @@ from xmodule.modulestore.tests.django_utils import TEST_DATA_SPLIT_MODULESTORE, SharedModuleStoreTestCase from xmodule.modulestore.xml_importer import import_course_from_xml -from ..views import HTTP_DATE_FORMAT, StaticContentServer, parse_range_header +from .. import views log = logging.getLogger(__name__) @@ -356,8 +356,8 @@ def test_cache_headers_without_ttl_locked(self, mock_get_cache_ttl): assert 'private, no-cache, no-store' == resp['Cache-Control'] def test_get_expiration_value(self): - start_dt = datetime.datetime.strptime("Thu, 01 Dec 1983 20:00:00 GMT", HTTP_DATE_FORMAT) - near_expire_dt = StaticContentServer.get_expiration_value(start_dt, 55) + start_dt = datetime.datetime.strptime("Thu, 01 Dec 1983 20:00:00 GMT", views.HTTP_DATE_FORMAT) + near_expire_dt = views.get_expiration_value(start_dt, 55) assert 'Thu, 01 Dec 1983 20:00:55 GMT' == near_expire_dt @patch('openedx.core.djangoapps.contentserver.models.CdnUserAgentsConfig.get_cdn_user_agents') @@ -371,7 +371,7 @@ def test_cache_is_cdn_with_normal_request(self, mock_get_cdn_user_agents): request_factory = RequestFactory() browser_request = request_factory.get('/fake', HTTP_USER_AGENT='Chrome 1234') - is_from_cdn = StaticContentServer.is_cdn_request(browser_request) + is_from_cdn = views.is_cdn_request(browser_request) assert is_from_cdn is False @patch('openedx.core.djangoapps.contentserver.models.CdnUserAgentsConfig.get_cdn_user_agents') @@ -385,7 +385,7 @@ def test_cache_is_cdn_with_cdn_request(self, mock_get_cdn_user_agents): request_factory = RequestFactory() browser_request = request_factory.get('/fake', HTTP_USER_AGENT='Amazon CloudFront') - is_from_cdn = StaticContentServer.is_cdn_request(browser_request) + is_from_cdn = views.is_cdn_request(browser_request) assert is_from_cdn is True @patch('openedx.core.djangoapps.contentserver.models.CdnUserAgentsConfig.get_cdn_user_agents') @@ -400,7 +400,7 @@ def test_cache_is_cdn_with_cdn_request_multiple_user_agents(self, mock_get_cdn_u request_factory = RequestFactory() browser_request = request_factory.get('/fake', HTTP_USER_AGENT='Amazon CloudFront') - is_from_cdn = StaticContentServer.is_cdn_request(browser_request) + is_from_cdn = views.is_cdn_request(browser_request) assert is_from_cdn is True @@ -415,7 +415,7 @@ def setUp(self): self.content_length = 10000 def test_bytes_unit(self): - unit, __ = parse_range_header('bytes=100-', self.content_length) + unit, __ = views.parse_range_header('bytes=100-', self.content_length) assert unit == 'bytes' @ddt.data( @@ -428,7 +428,7 @@ def test_bytes_unit(self): ) @ddt.unpack def test_valid_syntax(self, header_value, excepted_ranges_length, expected_ranges): - __, ranges = parse_range_header(header_value, self.content_length) + __, ranges = views.parse_range_header(header_value, self.content_length) assert len(ranges) == excepted_ranges_length assert ranges == expected_ranges @@ -446,5 +446,5 @@ def test_valid_syntax(self, header_value, excepted_ranges_length, expected_range @ddt.unpack def test_invalid_syntax(self, header_value, exception_class, exception_message_regex): self.assertRaisesRegex( - exception_class, exception_message_regex, parse_range_header, header_value, self.content_length + exception_class, exception_message_regex, views.parse_range_header, header_value, self.content_length ) diff --git a/openedx/core/djangoapps/contentserver/views.py b/openedx/core/djangoapps/contentserver/views.py index 096302f61d7d..49bb0897d9b9 100644 --- a/openedx/core/djangoapps/contentserver/views.py +++ b/openedx/core/djangoapps/contentserver/views.py @@ -1,22 +1,9 @@ """ Views for serving course assets. -Historically, this was implemented as a *middleware* (StaticContentServer) that -intercepted requests with paths matching certain patterns, rather than using -urlpatterns and a view. There wasn't any good reason for this, as far as I can -tell. It causes some problems for telemetry: When the code-owner middleware asks -Django what view handled the request, it does so by looking at the result of the -`resolve` utility, but these URLs get a Resolver404 (because there's no -registered urlpattern). - -We've turned it into a proper view, with a few warts remaining: - -- The view implementation is all bundled into a StaticContentServer class that - doesn't appear to have any state. The methods could likely just be extracted - as top-level functions. -- All three urlpatterns are registered to the same view, which then has to - re-parse the URL to determine which pattern is in effect. We should probably - have 3 views as entry points. +For historical reasons, this is just one view that matches to three different URL patterns, and then has to +re-parse the URL to determine which pattern is in effect. We should probably +have 3 views as entry points. """ import datetime import logging @@ -51,7 +38,7 @@ def course_assets_view(request): """ Serve course assets to end users. Colloquially referred to as "contentserver." """ - return IMPL.process_request(request) + return process_request(request) log = logging.getLogger(__name__) @@ -62,7 +49,7 @@ def course_assets_view(request): HTTP_DATE_FORMAT = "%a, %d %b %Y %H:%M:%S GMT" -def is_asset_request(self, request): +def is_asset_request(request): """Determines whether the given request is an asset request""" # Don't change this without updating urls.py! See docstring of views.py. return ( @@ -73,12 +60,13 @@ def is_asset_request(self, request): StaticContent.is_versioned_asset_path(request.path) ) + # pylint: disable=too-many-statements -def process_request(self, request): +def process_request(request): """Process the given request""" asset_path = request.path - if self.is_asset_request(request): # lint-amnesty, pylint: disable=too-many-nested-blocks + if is_asset_request(request): # lint-amnesty, pylint: disable=too-many-nested-blocks # Make sure we can convert this request into a location. if AssetLocator.CANONICAL_NAMESPACE in asset_path: asset_path = asset_path.replace('block/', 'block@', 1) @@ -98,7 +86,7 @@ def process_request(self, request): # if we're able to load it. actual_digest = None try: - content = self.load_asset_from_location(loc) + content = load_asset_from_location(loc) actual_digest = getattr(content, "content_digest", None) except (ItemNotFoundError, NotFoundError): return HttpResponseNotFound() @@ -121,15 +109,15 @@ def process_request(self, request): set_custom_attribute('contentserver.path', loc.path) # Figure out if this is a CDN using us as the origin. - is_from_cdn = StaticContentServer.is_cdn_request(request) + is_from_cdn = is_cdn_request(request) set_custom_attribute('contentserver.from_cdn', is_from_cdn) # Check if this content is locked or not. - locked = self.is_content_locked(content) + locked = is_content_locked(content) set_custom_attribute('contentserver.locked', locked) # Check that user has access to the content. - if not self.is_user_authorized(request, content, loc): + if not is_user_authorized(request, content, loc): return HttpResponseForbidden('Unauthorized') # Figure out if the client sent us a conditional request, and let them know @@ -209,11 +197,12 @@ def process_request(self, request): # middleware we have in place, there's no easy way to use the built-in Django # utilities and properly sanitize and modify a response to ensure that it is as # cacheable as possible, which is why we do it ourselves. - self.set_caching_headers(content, response) + set_caching_headers(content, response) return response -def set_caching_headers(self, content, response): + +def set_caching_headers(content, response): """ Sets caching headers based on whether or not the asset is locked. """ @@ -229,7 +218,7 @@ def set_caching_headers(self, content, response): if cache_ttl > 0 and not is_locked: set_custom_attribute('contentserver.cacheable', True) - response['Expires'] = StaticContentServer.get_expiration_value(datetime.datetime.utcnow(), cache_ttl) + response['Expires'] = get_expiration_value(datetime.datetime.utcnow(), cache_ttl) response['Cache-Control'] = "public, max-age={ttl}, s-maxage={ttl}".format(ttl=cache_ttl) elif is_locked: set_custom_attribute('contentserver.cacheable', False) @@ -243,6 +232,7 @@ def set_caching_headers(self, content, response): # caches a version of the response without CORS headers, in turn breaking XHR requests. force_header_for_response(response, 'Vary', 'Origin') + @staticmethod def is_cdn_request(request): """ @@ -259,23 +249,26 @@ def is_cdn_request(request): return False + @staticmethod def get_expiration_value(now, cache_ttl): """Generates an RFC1123 datetime string based on a future offset.""" expire_dt = now + datetime.timedelta(seconds=cache_ttl) return expire_dt.strftime(HTTP_DATE_FORMAT) -def is_content_locked(self, content): + +def is_content_locked(content): """ Determines whether or not the given content is locked. """ return bool(getattr(content, "locked", False)) -def is_user_authorized(self, request, content, location): + +def is_user_authorized(request, content, location): """ Determines whether or not the user for this request is authorized to view the given asset. """ - if not self.is_content_locked(content): + if not is_content_locked(content): return True if not hasattr(request, "user") or not request.user.is_authenticated: @@ -290,7 +283,8 @@ def is_user_authorized(self, request, content, location): return True -def load_asset_from_location(self, location): + +def load_asset_from_location(location): """ Loads an asset based on its location, either retrieving it from a cache or loading it directly from the contentstore. @@ -315,9 +309,6 @@ def load_asset_from_location(self, location): return content -IMPL = StaticContentServer() - - def parse_range_header(header_value, content_length): """ Returns the unit and a list of (start, end) tuples of ranges.