diff --git a/openedx/core/djangoapps/content_libraries/api.py b/openedx/core/djangoapps/content_libraries/api.py index 5562a8c97806..7c3055992533 100644 --- a/openedx/core/djangoapps/content_libraries/api.py +++ b/openedx/core/djangoapps/content_libraries/api.py @@ -1060,7 +1060,11 @@ def add_library_block_static_asset_file(usage_key, file_path, file_content, user video_block = UsageKey.from_string("lb:VideoTeam:python-intro:video:1") add_library_block_static_asset_file(video_block, "subtitles-en.srt", subtitles.encode('utf-8')) """ - # File path validations copied over from v1 library logic... + # File path validations copied over from v1 library logic. This can't really + # hurt us inside our system because we never use these paths in an actual + # file system–they're just string keys that point to hash-named data files + # in a common library (learning package) level directory. But it might + # become a security issue during import/export serialization. if file_path != file_path.strip().strip('/'): raise InvalidNameError("file_path cannot start/end with / or whitespace.") if '//' in file_path or '..' in file_path: @@ -1069,10 +1073,10 @@ def add_library_block_static_asset_file(usage_key, file_path, file_content, user component = get_component_from_usage_key(usage_key) media_type_str, _encoding = mimetypes.guess_type(file_path) - media_type = authoring_api.get_or_create_media_type(media_type_str) now = datetime.now(tz=timezone.utc) with transaction.atomic(): + media_type = authoring_api.get_or_create_media_type(media_type_str) content = authoring_api.get_or_create_file_content( component.publishable_entity.learning_package.id, media_type.id, diff --git a/xmodule/video_block/transcripts_utils.py b/xmodule/video_block/transcripts_utils.py index 132b8cff1e14..f82fa28d7b1b 100644 --- a/xmodule/video_block/transcripts_utils.py +++ b/xmodule/video_block/transcripts_utils.py @@ -8,6 +8,7 @@ import html import logging import os +import pathlib import re from functools import wraps @@ -16,9 +17,11 @@ from django.conf import settings from lxml import etree from opaque_keys.edx.keys import UsageKeyV2 +from openedx_learning.api import authoring from pysrt import SubRipFile, SubRipItem, SubRipTime from pysrt.srtexc import Error +from openedx.core.djangoapps.xblock.api import get_component_from_usage_key from xmodule.contentstore.content import StaticContent from xmodule.contentstore.django import contentstore from xmodule.exceptions import NotFoundError @@ -1041,6 +1044,8 @@ def get_transcript_from_learning_core(video_block, language, output_format, tran """ Get video transcript from Learning Core. + Limitation: This is only going to grab from the Draft version. + HISTORIC INFORMATION FROM WHEN THIS FUNCTION WAS `get_transcript_from_blockstore`: Blockstore expects video transcripts to be placed into the 'static/' @@ -1072,9 +1077,59 @@ def get_transcript_from_learning_core(video_block, language, output_format, tran Returns: tuple containing content, filename, mimetype """ - # TODO: Update to use Learning Core data models once static assets support - # has been added. - raise NotFoundError("No transcript - transcripts not supported yet by learning core components.") + usage_key = video_block.scope_ids.usage_id + + # Validate that the format is something we even support... + if output_format not in (Transcript.SRT, Transcript.SJSON, Transcript.TXT): + raise NotFoundError(f'Invalid transcript format `{output_format}`') + + # See if the requested language exists. + transcripts = transcripts_info['transcripts'] + if language not in transcripts: + raise NotFoundError( + f"Video {usage_key} does not have a transcript file defined for the " + f"'{language}' language in its OLX." + ) + + # Grab the underlying Component. There's no version parameter to this call, + # so we're just going to grab the file associated with the latest draft + # version for now. + component = get_component_from_usage_key(usage_key) + component_version = component.versioning.draft + if not component_version: + raise NotFoundError( + f"No transcript for {usage_key}: Component {component.uuid} was soft-deleted." + ) + + file_path = pathlib.Path(f"static/{transcripts[language]}") + if file_path.suffix != '.srt': + # We want to standardize on .srt + raise NotFoundError("Video XBlocks in Content Libraries only support .srt transcript files.") + + # TODO: There should be a Learning Core API call for this: + print( + [(cvc.key, cvc.content.has_file) for cvc in component_version.componentversioncontent_set.all()] + ) + content = ( + component_version + .componentversioncontent_set + .filter(content__has_file=True) + .select_related('content') + .get(key=file_path) + ) + data = content.read_file().read() + + # Now convert the transcript data to the requested format: + output_filename = f'{file_path.stem}.{output_format}' + output_transcript = Transcript.convert( + data.decode('utf-8'), + input_format=Transcript.SRT, + output_format=output_format, + ) + if not output_transcript.strip(): + raise NotFoundError('No transcript content') + + return output_transcript, output_filename, Transcript.mime_types[output_format] def get_transcript(video, lang=None, output_format=Transcript.SRT, youtube_id=None):