From 7f07ee3e85c2723f27813cd88c156a21db4fe11a Mon Sep 17 00:00:00 2001
From: David Ormsbee <dave@axim.org>
Date: Fri, 11 Oct 2024 17:13:25 -0400
Subject: [PATCH] fix: get some VideoBlock support for Learning Core assets

---
 .../core/djangoapps/content_libraries/api.py  |  8 ++-
 xmodule/video_block/transcripts_utils.py      | 61 ++++++++++++++++++-
 2 files changed, 64 insertions(+), 5 deletions(-)

diff --git a/openedx/core/djangoapps/content_libraries/api.py b/openedx/core/djangoapps/content_libraries/api.py
index 5562a8c97806..7c3055992533 100644
--- a/openedx/core/djangoapps/content_libraries/api.py
+++ b/openedx/core/djangoapps/content_libraries/api.py
@@ -1060,7 +1060,11 @@ def add_library_block_static_asset_file(usage_key, file_path, file_content, user
         video_block = UsageKey.from_string("lb:VideoTeam:python-intro:video:1")
         add_library_block_static_asset_file(video_block, "subtitles-en.srt", subtitles.encode('utf-8'))
     """
-    # File path validations copied over from v1 library logic...
+    # File path validations copied over from v1 library logic. This can't really
+    # hurt us inside our system because we never use these paths in an actual
+    # file system–they're just string keys that point to hash-named data files
+    # in a common library (learning package) level directory. But it might
+    # become a security issue during import/export serialization.
     if file_path != file_path.strip().strip('/'):
         raise InvalidNameError("file_path cannot start/end with / or whitespace.")
     if '//' in file_path or '..' in file_path:
@@ -1069,10 +1073,10 @@ def add_library_block_static_asset_file(usage_key, file_path, file_content, user
     component = get_component_from_usage_key(usage_key)
 
     media_type_str, _encoding = mimetypes.guess_type(file_path)
-    media_type = authoring_api.get_or_create_media_type(media_type_str)
     now = datetime.now(tz=timezone.utc)
 
     with transaction.atomic():
+        media_type = authoring_api.get_or_create_media_type(media_type_str)
         content = authoring_api.get_or_create_file_content(
             component.publishable_entity.learning_package.id,
             media_type.id,
diff --git a/xmodule/video_block/transcripts_utils.py b/xmodule/video_block/transcripts_utils.py
index 132b8cff1e14..f82fa28d7b1b 100644
--- a/xmodule/video_block/transcripts_utils.py
+++ b/xmodule/video_block/transcripts_utils.py
@@ -8,6 +8,7 @@
 import html
 import logging
 import os
+import pathlib
 import re
 from functools import wraps
 
@@ -16,9 +17,11 @@
 from django.conf import settings
 from lxml import etree
 from opaque_keys.edx.keys import UsageKeyV2
+from openedx_learning.api import authoring
 from pysrt import SubRipFile, SubRipItem, SubRipTime
 from pysrt.srtexc import Error
 
+from openedx.core.djangoapps.xblock.api import get_component_from_usage_key
 from xmodule.contentstore.content import StaticContent
 from xmodule.contentstore.django import contentstore
 from xmodule.exceptions import NotFoundError
@@ -1041,6 +1044,8 @@ def get_transcript_from_learning_core(video_block, language, output_format, tran
     """
     Get video transcript from Learning Core.
 
+    Limitation: This is only going to grab from the Draft version.
+
     HISTORIC INFORMATION FROM WHEN THIS FUNCTION WAS `get_transcript_from_blockstore`:
 
       Blockstore expects video transcripts to be placed into the 'static/'
@@ -1072,9 +1077,59 @@ def get_transcript_from_learning_core(video_block, language, output_format, tran
     Returns:
         tuple containing content, filename, mimetype
     """
-    # TODO: Update to use Learning Core data models once static assets support
-    # has been added.
-    raise NotFoundError("No transcript - transcripts not supported yet by learning core components.")
+    usage_key = video_block.scope_ids.usage_id
+
+    # Validate that the format is something we even support...
+    if output_format not in (Transcript.SRT, Transcript.SJSON, Transcript.TXT):
+        raise NotFoundError(f'Invalid transcript format `{output_format}`')
+
+    # See if the requested language exists.
+    transcripts = transcripts_info['transcripts']
+    if language not in transcripts:
+        raise NotFoundError(
+            f"Video {usage_key} does not have a transcript file defined for the "
+            f"'{language}' language in its OLX."
+        )
+
+    # Grab the underlying Component. There's no version parameter to this call,
+    # so we're just going to grab the file associated with the latest draft
+    # version for now.
+    component = get_component_from_usage_key(usage_key)
+    component_version = component.versioning.draft
+    if not component_version:
+        raise NotFoundError(
+            f"No transcript for {usage_key}: Component {component.uuid} was soft-deleted."
+        )
+
+    file_path = pathlib.Path(f"static/{transcripts[language]}")
+    if file_path.suffix != '.srt':
+        # We want to standardize on .srt
+        raise NotFoundError("Video XBlocks in Content Libraries only support .srt transcript files.")
+
+    # TODO: There should be a Learning Core API call for this:
+    print(
+        [(cvc.key, cvc.content.has_file) for cvc in component_version.componentversioncontent_set.all()]
+    )
+    content = (
+        component_version
+        .componentversioncontent_set
+        .filter(content__has_file=True)
+        .select_related('content')
+        .get(key=file_path)
+    )
+    data = content.read_file().read()
+
+    # Now convert the transcript data to the requested format:
+    output_filename = f'{file_path.stem}.{output_format}'
+    output_transcript = Transcript.convert(
+        data.decode('utf-8'),
+        input_format=Transcript.SRT,
+        output_format=output_format,
+    )
+    if not output_transcript.strip():
+        raise NotFoundError('No transcript content')
+
+    return output_transcript, output_filename, Transcript.mime_types[output_format]
 
 
 def get_transcript(video, lang=None, output_format=Transcript.SRT, youtube_id=None):