diff --git a/cms/djangoapps/contentstore/core/course_optimizer_provider.py b/cms/djangoapps/contentstore/core/course_optimizer_provider.py new file mode 100644 index 000000000000..9a6ecfdaebeb --- /dev/null +++ b/cms/djangoapps/contentstore/core/course_optimizer_provider.py @@ -0,0 +1,189 @@ +""" +Logic for handling actions in Studio related to Course Optimizer. +""" + +import json + +from cms.djangoapps.contentstore.xblock_storage_handlers.view_handlers import get_xblock +from cms.djangoapps.contentstore.xblock_storage_handlers.xblock_helpers import usage_key_with_run + + +def generate_broken_links_descriptor(json_content, request_user): + """ + Returns a Data Transfer Object for frontend given a list of broken links. + + json_content contains a list of [block_id, link, is_locked] + is_locked is true if the link is a studio link and returns 403 on request + + ** Example DTO structure ** + { + 'sections': [ + { + 'id': 'section_id', + 'displayName': 'section name', + 'subsections': [ + { + 'id': 'subsection_id', + 'displayName': 'subsection name', + 'units': [ + { + 'id': 'unit_id', + 'displayName': 'unit name', + 'blocks': [ + { + 'id': 'block_id', + 'displayName': 'block name', + 'url': 'url/to/block', + 'brokenLinks: [], + 'lockedLinks: [], + }, + ..., + ] + }, + ..., + ] + }, + ..., + ] + }, + ..., + ] + } + """ + xblock_node_tree = {} # tree representation of xblock relationships + xblock_dictionary = {} # dictionary of xblock attributes + + for item in json_content: + block_id, link, *rest = item + is_locked_flag = bool(rest[0]) + + usage_key = usage_key_with_run(block_id) + block = get_xblock(usage_key, request_user) + _update_node_tree_and_dictionary( + block=block, + link=link, + is_locked=is_locked_flag, + node_tree=xblock_node_tree, + dictionary=xblock_dictionary + ) + + return _create_dto_from_node_tree_recursive(xblock_node_tree, xblock_dictionary) + + +def _update_node_tree_and_dictionary(block, link, is_locked, node_tree, dictionary): + """ + Inserts a block into the node tree and add its attributes to the dictionary. + + ** Example node tree structure ** + { + 'section_id1': { + 'subsection_id1': { + 'unit_id1': { + 'block_id1': {}, + 'block_id2': {}, + ..., + }, + 'unit_id2': { + 'block_id3': {}, + ..., + }, + ..., + }, + ..., + }, + ..., + } + + ** Example dictionary structure ** + { + 'xblock_id: { + 'display_name': 'xblock name' + 'category': 'html' + }, + ..., + } + """ + path = _get_node_path(block) + current_node = node_tree + xblock_id = '' + + # Traverse the path and build the tree structure + for xblock in path: + xblock_id = xblock.location.block_id + dictionary.setdefault(xblock_id, + { + 'display_name': xblock.display_name, + 'category': getattr(xblock, 'category', ''), + } + ) + # Sets new current node and creates the node if it doesn't exist + current_node = current_node.setdefault(xblock_id, {}) + + # Add block-level details for the last xblock in the path (URL and broken/locked links) + dictionary[xblock_id].setdefault('url', + f'/course/{block.course_id}/editor/{block.category}/{block.location}' + ) + if is_locked: + dictionary[xblock_id].setdefault('locked_links', []).append(link) + else: + dictionary[xblock_id].setdefault('broken_links', []).append(link) + + +def _get_node_path(block): + """ + Retrieves the path frmo the course root node to a specific block, excluding the root. + + ** Example Path structure ** + [chapter_node, sequential_node, vertical_node, html_node] + """ + path = [] + current_node = block + + while current_node.get_parent(): + path.append(current_node) + current_node = current_node.get_parent() + + return list(reversed(path)) + + +CATEGORY_TO_LEVEL_MAP = { + "chapter": "sections", + "sequential": "subsections", + "vertical": "units" +} + + +def _create_dto_from_node_tree_recursive(xblock_node, xblock_dictionary): + """ + Recursively build the Data Transfer Object from the node tree and dictionary. + """ + # Exit condition when there are no more child nodes (at block level) + if not xblock_node: + return None + + level = None + xblock_children = [] + + for xblock_id, node in xblock_node.items(): + child_blocks = _create_dto_from_node_tree_recursive(node, xblock_dictionary) + xblock_data = xblock_dictionary.get(xblock_id, {}) + + xblock_entry = { + 'id': xblock_id, + 'displayName': xblock_data.get('display_name', ''), + } + if child_blocks == None: # Leaf node + level = 'blocks' + xblock_entry.update({ + 'url': xblock_data.get('url', ''), + 'brokenLinks': xblock_data.get('broken_links', []), + 'lockedLinks': xblock_data.get('locked_links', []), + }) + else: # Non-leaf node + category = xblock_data.get('category', None) + level = CATEGORY_TO_LEVEL_MAP.get(category, None) + xblock_entry.update(child_blocks) + + xblock_children.append(xblock_entry) + + return {level: xblock_children} if level else None diff --git a/cms/djangoapps/contentstore/rest_api/v0/serializers/__init__.py b/cms/djangoapps/contentstore/rest_api/v0/serializers/__init__.py index 33931a4a199a..171f746be438 100644 --- a/cms/djangoapps/contentstore/rest_api/v0/serializers/__init__.py +++ b/cms/djangoapps/contentstore/rest_api/v0/serializers/__init__.py @@ -4,6 +4,7 @@ from .advanced_settings import AdvancedSettingsFieldSerializer, CourseAdvancedSettingsSerializer from .assets import AssetSerializer from .authoring_grading import CourseGradingModelSerializer +from .course_optimizer import LinkCheckSerializer from .tabs import CourseTabSerializer, CourseTabUpdateSerializer, TabIDLocatorSerializer from .transcripts import TranscriptSerializer, YoutubeTranscriptCheckSerializer, YoutubeTranscriptUploadSerializer from .xblock import XblockSerializer diff --git a/cms/djangoapps/contentstore/rest_api/v0/serializers/course_optimizer.py b/cms/djangoapps/contentstore/rest_api/v0/serializers/course_optimizer.py new file mode 100644 index 000000000000..50a8d6e5310b --- /dev/null +++ b/cms/djangoapps/contentstore/rest_api/v0/serializers/course_optimizer.py @@ -0,0 +1,41 @@ +""" +API Serializers for Course Optimizer +""" + +from rest_framework import serializers + + +class LinkCheckBlockSerializer(serializers.Serializer): + """ Serializer for broken links block model data """ + id = serializers.CharField(required=True, allow_null=False, allow_blank=False) + displayName = serializers.CharField(required=True, allow_null=False, allow_blank=True) + url = serializers.CharField(required=True, allow_null=False, allow_blank=False) + brokenLinks = serializers.ListField(required=False) + lockedLinks = serializers.ListField(required=False) + +class LinkCheckUnitSerializer(serializers.Serializer): + """ Serializer for broken links unit model data """ + id = serializers.CharField(required=True, allow_null=False, allow_blank=False) + displayName = serializers.CharField(required=True, allow_null=False, allow_blank=True) + blocks = LinkCheckBlockSerializer(many=True) + +class LinkCheckSubsectionSerializer(serializers.Serializer): + """ Serializer for broken links subsection model data """ + id = serializers.CharField(required=True, allow_null=False, allow_blank=False) + displayName = serializers.CharField(required=True, allow_null=False, allow_blank=True) + units = LinkCheckUnitSerializer(many=True) + +class LinkCheckSectionSerializer(serializers.Serializer): + """ Serializer for broken links section model data """ + id = serializers.CharField(required=True, allow_null=False, allow_blank=False) + displayName = serializers.CharField(required=True, allow_null=False, allow_blank=True) + subsections = LinkCheckSubsectionSerializer(many=True) + +class LinkCheckOutputSerializer(serializers.Serializer): + """ Serializer for broken links output model data """ + sections = LinkCheckSectionSerializer(many=True) + +class LinkCheckSerializer(serializers.Serializer): + """ Serializer for broken links """ + LinkCheckStatus = serializers.CharField(required=True) + LinkCheckOutput = LinkCheckOutputSerializer(required=True) diff --git a/cms/djangoapps/contentstore/rest_api/v0/urls.py b/cms/djangoapps/contentstore/rest_api/v0/urls.py index cc1e13b0929c..e5575ca942f3 100644 --- a/cms/djangoapps/contentstore/rest_api/v0/urls.py +++ b/cms/djangoapps/contentstore/rest_api/v0/urls.py @@ -7,14 +7,16 @@ from .views import ( AdvancedCourseSettingsView, + APIHeartBeatView, AuthoringGradingView, CourseTabSettingsView, CourseTabListView, CourseTabReorderView, + LinkCheckView, + LinkCheckStatusView, TranscriptView, YoutubeTranscriptCheckView, YoutubeTranscriptUploadView, - APIHeartBeatView ) from .views import assets from .views import authoring_videos @@ -102,4 +104,14 @@ fr'^youtube_transcripts/{settings.COURSE_ID_PATTERN}/upload?$', YoutubeTranscriptUploadView.as_view(), name='cms_api_youtube_transcripts_upload' ), + + # Course Optimizer + re_path( + fr'^link_check/{settings.COURSE_ID_PATTERN}$', + LinkCheckView.as_view(), name='link_check' + ), + re_path( + fr'^link_check_status/{settings.COURSE_ID_PATTERN}$', + LinkCheckStatusView.as_view(), name='link_check_status' + ), ] diff --git a/cms/djangoapps/contentstore/rest_api/v0/views/__init__.py b/cms/djangoapps/contentstore/rest_api/v0/views/__init__.py index 00d22a1ea715..2ce3ea22ea49 100644 --- a/cms/djangoapps/contentstore/rest_api/v0/views/__init__.py +++ b/cms/djangoapps/contentstore/rest_api/v0/views/__init__.py @@ -2,7 +2,8 @@ Views for v0 contentstore API. """ from .advanced_settings import AdvancedCourseSettingsView +from .api_heartbeat import APIHeartBeatView from .authoring_grading import AuthoringGradingView +from .course_optimizer import LinkCheckView, LinkCheckStatusView from .tabs import CourseTabSettingsView, CourseTabListView, CourseTabReorderView from .transcripts import TranscriptView, YoutubeTranscriptCheckView, YoutubeTranscriptUploadView -from .api_heartbeat import APIHeartBeatView diff --git a/cms/djangoapps/contentstore/rest_api/v0/views/course_optimizer.py b/cms/djangoapps/contentstore/rest_api/v0/views/course_optimizer.py new file mode 100644 index 000000000000..a72c3407e28d --- /dev/null +++ b/cms/djangoapps/contentstore/rest_api/v0/views/course_optimizer.py @@ -0,0 +1,241 @@ +""" API Views for Course Optimizer. """ + +import json +import edx_api_doc_tools as apidocs +from django.conf import settings +from opaque_keys.edx.keys import CourseKey +from rest_framework.views import APIView +from rest_framework.request import Request +from rest_framework.response import Response +from rest_framework import status +from user_tasks.conf import settings as user_tasks_settings +from user_tasks.models import UserTaskArtifact, UserTaskStatus + +from cms.djangoapps.contentstore.core.course_optimizer_provider import generate_broken_links_descriptor +from cms.djangoapps.contentstore.rest_api.v0.serializers.course_optimizer import LinkCheckSerializer +from cms.djangoapps.contentstore.tasks import CourseLinkCheckTask, check_broken_links +from common.djangoapps.student.auth import has_course_author_access, has_studio_read_access +from common.djangoapps.util.json_request import JsonResponse +from common.djangoapps.util.views import ensure_valid_course_key +from openedx.core.lib.api.view_utils import DeveloperErrorViewMixin, verify_course_exists, view_auth_classes +from xmodule.modulestore.django import modulestore # lint-amnesty, pylint: disable=wrong-import-order + + +# Restricts status in the REST API to only those which the requesting user has permission to view. +# These can be overwritten in django settings. +# By default, these should be the UserTaskStatus statuses: +# 'Pending', 'In Progress', 'Succeeded', 'Failed', 'Canceled', 'Retrying' +STATUS_FILTERS = user_tasks_settings.USER_TASKS_STATUS_FILTERS + + +@view_auth_classes(is_authenticated=True) +class LinkCheckView(DeveloperErrorViewMixin, APIView): + """ + View for queueing a celery task to scan a course for broken links. + """ + @apidocs.schema( + parameters=[ + apidocs.string_parameter("course_id", apidocs.ParameterLocation.PATH, description="Course ID"), + ], + responses={ + 200: "Celery task queued.", + 401: "The requester is not authenticated.", + 403: "The requester cannot access the specified course.", + 404: "The requested course does not exist.", + }, + ) + @verify_course_exists() + def post(self, request: Request, course_id: str): + """ + Queue celery task to scan a course for broken links. + + **Example Request** + POST /api/contentstore/v0/link_check/{course_id} + + **POST Parameters** + ...TODO finish description with examples + ```json + { + "LinkCheckStatus": "Pending" + } + """ + course_key = CourseKey.from_string(course_id) + + if not has_studio_read_access(request.user, course_key): + self.permission_denied(request) + + check_broken_links.delay(request.user.id, course_id, request.LANGUAGE_CODE) + return JsonResponse({'LinkCheckStatus': UserTaskStatus.PENDING}) + + +@view_auth_classes() +class LinkCheckStatusView(DeveloperErrorViewMixin, APIView): + """ + View for checking the status of the celery task and returning the results. + """ + @apidocs.schema( + parameters=[ + apidocs.string_parameter("course_id", apidocs.ParameterLocation.PATH, description="Course ID"), + ], + responses={ + 200: "OK", + 401: "The requester is not authenticated.", + 403: "The requester cannot access the specified course.", + 404: "The requested course does not exist.", + }, + ) + def get(self, request: Request, course_id: str): + """ + TODO update description + GET handler to return the status of the link_check task from UserTaskStatus. + If no task has been started for the course, return 'Uninitiated'. + If link_check task was successful, an output result is also returned. + + For reference, the following status are in UserTaskStatus: + 'Pending', 'In Progress' (sent to frontend as 'In-Progress'), + 'Succeeded', 'Failed', 'Canceled', 'Retrying' + This function adds a status for when status from UserTaskStatus is None: + 'Uninitiated' + + **Example Request** + GET /api/contentstore/v0/link_check_status/{course_id} + + **Example Response** + ```json + { + "LinkCheckStatus": "Succeeded", + "LinkCheckOutput": { + sections: [ + { + id: , + displayName: , + subsections: [ + { + id: , + displayName: , + units: [ + { + id: , + displayName: , + blocks: [ + { + id: , + url: , + brokenLinks: [ + , + , + , + ..., + ], + lockedLinks: [ + , + , + , + ..., + ], + }, + { }, + ], + }, + { }, + ], + }, + { }, + ], + }, + } + """ + course_key = CourseKey.from_string(course_id) + if not has_course_author_access(request.user, course_key): + self.permission_denied(request) + + task_status = _latest_task_status(request, course_id) + status = None + broken_links_dto = None + error = None + if task_status is None: + # The task hasn't been initialized yet; did we store info in the session already? + try: + session_status = request.session['link_check_status'] + status = session_status[course_id] + except KeyError: + status = 'Uninitiated' + else: + status = task_status.state + if task_status.state == UserTaskStatus.SUCCEEDED: + artifact = UserTaskArtifact.objects.get(status=task_status, name='BrokenLinks') + with artifact.file as file: + content = file.read() + json_content = json.loads(content) + broken_links_dto = generate_broken_links_descriptor(json_content, request.user) + elif task_status.state in (UserTaskStatus.FAILED, UserTaskStatus.CANCELED): + errors = UserTaskArtifact.objects.filter(status=task_status, name='Error') + if errors: + error = errors[0].text + try: + error = json.loads(error) + except ValueError: + # Wasn't JSON, just use the value as a string + pass + + # print('DTO') + # print(broken_links_dto) + + # mock dto for testing + # broken_links_dto = { + # 'sections': [ + # { + # 'id': 'sectid', + # 'displayName': 'sectname', + # 'subsections': [ + # { + # 'id': 'subid', + # 'displayName': 'subname', + # 'units': [ + # { + # 'id': 'unitid', + # 'displayName': 'unitname', + # 'blocks': [ + # { + # 'id': 'blockid', + # 'displayName': 'blockname', + # 'url': 'blockurl', + # 'brokenLinks': [ + # 'link1', + # 'link2', + # ], + # }, + # ], + # } + # ] + # } + # ] + # } + # ] + # } + data = { + 'LinkCheckStatus': status, + **({'LinkCheckOutput': broken_links_dto} if broken_links_dto else {}), + **({'LinkCheckError': error} if error else {}) + } + + serializer = LinkCheckSerializer(data=data) + serializer.is_valid(raise_exception=True) + + return Response(serializer.data) + + +def _latest_task_status(request, course_key_string, view_func=None): + """ + Get the most recent link check status update for the specified course + key. + """ + args = {'course_key_string': course_key_string} + name = CourseLinkCheckTask.generate_name(args) + task_status = UserTaskStatus.objects.filter(name=name) + for status_filter in STATUS_FILTERS: + task_status = status_filter().filter_queryset(request, task_status, view_func) + return task_status.order_by('-created').first() diff --git a/cms/djangoapps/contentstore/tasks.py b/cms/djangoapps/contentstore/tasks.py index bb220c371711..f10fac84df2f 100644 --- a/cms/djangoapps/contentstore/tasks.py +++ b/cms/djangoapps/contentstore/tasks.py @@ -7,6 +7,8 @@ import os import shutil import tarfile +import re +import requests from datetime import datetime from tempfile import NamedTemporaryFile, mkdtemp @@ -53,8 +55,10 @@ translation_language, delete_course ) +from cms.djangoapps.contentstore.xblock_storage_handlers.view_handlers import get_block_info from cms.djangoapps.models.settings.course_metadata import CourseMetadata from common.djangoapps.course_action_state.models import CourseRerunState +from common.djangoapps.static_replace import replace_static_urls from common.djangoapps.student.auth import has_course_author_access from common.djangoapps.student.roles import CourseInstructorRole, CourseStaffRole, LibraryUserRole from common.djangoapps.util.monitoring import monitor_import_failure @@ -1066,3 +1070,156 @@ def undo_all_library_source_blocks_ids_for_course(course_key_string, v1_to_v2_li store.update_item(draft_library_source_block, None) # return success return + + +class CourseLinkCheckTask(UserTask): # pylint: disable=abstract-method + """ + Base class for course link check tasks. + """ + + @staticmethod + def calculate_total_steps(arguments_dict): + """ + Get the number of in-progress steps in the link check process, as shown in the UI. + + For reference, these are: + 1. Scanning + """ + return 1 + + @classmethod + def generate_name(cls, arguments_dict): + """ + Create a name for this particular task instance. + + Arguments: + arguments_dict (dict): The arguments given to the task function + + Returns: + str: The generated name + """ + key = arguments_dict['course_key_string'] + return f'Broken link check of {key}' + + +@shared_task(base=CourseLinkCheckTask, bind=True) +def check_broken_links(self, user_id, course_key_string, language): + """ + Checks for broken links in a course. Store the results in a file. + """ + URL_STATUS = { + 'success': '200 OK', + 'forbidden': '403 Forbidden', + 'failure': 'Request Failed', + 'error': 'Request Error' + } + + def validate_user(): + """Validate if the user exists. Otherwise log error. """ + try: + return User.objects.get(pk=user_id) + except User.DoesNotExist as exc: + with translation_language(language): + self.status.fail(UserErrors.UNKNOWN_USER_ID.format(user_id)) + return + + def get_urls(content): + """Returns all urls after href and src in content.""" + regex = r'\s+(?:href|src)=["\']([^"\']*)["\']' + urls = re.findall(regex, content) + return urls + + def is_studio_url(url): + """Returns True if url is a studio url.""" + return not url.startswith('http://') and not url.startswith('https://') + + def convert_to_standard_url(url, course_key): + """ + Returns standard urls when given studio urls. Otherwise return url as is. + Example urls: + /assets/courseware/v1/506da5d6f866e8f0be44c5df8b6e6b2a/asset-v1:edX+DemoX+Demo_Course+type@asset+block/getting-started_x250.png + /static/getting-started_x250.png + /container/block-v1:edX+DemoX+Demo_Course+type@vertical+block@2152d4a4aadc4cb0af5256394a3d1fc7 + """ + if is_studio_url(url): + if url.startswith('/static/'): + processed_url = replace_static_urls(f'\"{url}\"', course_id=course_key)[1:-1] + return 'http://' + settings.CMS_BASE + processed_url + elif url.startswith('/'): + return 'http://' + settings.CMS_BASE + url + else: + return 'http://' + settings.CMS_BASE + '/container/' + url + else: + return url + + def validate_url_access(url): + """Returns status of a url request.""" + try: + response = requests.get(url, timeout=5) + if response.status_code == 200: + return URL_STATUS['success'] + elif response.status_code == 403: + return URL_STATUS['forbidden'] + else: + return URL_STATUS['failure'] + except requests.exceptions.RequestException as e: + return URL_STATUS['error'] + + def scan_course_for_links(course_key): + """ + Returns a list of links that are broken or locked. + [block_id, link, is_locked] + """ + links = [] + verticals = modulestore().get_items(course_key, qualifiers={'category': 'vertical'}, revision=ModuleStoreEnum.RevisionOption.published_only) + blocks = [] + + for vertical in verticals: + blocks.extend(vertical.get_children()) + + for block in blocks: + usage_key = block.usage_key + block_info = get_block_info(block) + block_data = block_info['data'] + urls = get_urls(block_data) + + for url in urls: + if url == '#': # do not evaluate these 'url' + break + + standardized_url = convert_to_standard_url(url, course_key) + status = validate_url_access(standardized_url) + + if status == URL_STATUS['failure']: + links.append([str(usage_key), url, False]) + if status == URL_STATUS['forbidden'] and is_studio_url(url): + links.append([str(usage_key), url, True]) + + return links + + user = validate_user() + + self.status.set_state('Scanning') + courselike_key = CourseKey.from_string(course_key_string) + data = scan_course_for_links(courselike_key) + + try: + self.status.increment_completed_steps() + + file_name = str(courselike_key) + links_file = NamedTemporaryFile(prefix=file_name + '.', suffix='.json') + LOGGER.debug('json file being generated at %s', links_file.name) + + with open(links_file.name, 'w') as file: + json.dump(data, file, indent=4) + + artifact = UserTaskArtifact(status=self.status, name='BrokenLinks') + artifact.file.save(name=os.path.basename(links_file.name), content=File(links_file)) + artifact.save() + + # catch all exceptions so we can record useful error messages + except Exception as exception: # pylint: disable=broad-except + LOGGER.exception('Error checking links for course %s', courselike_key, exc_info=True) + if self.status.state != UserTaskStatus.FAILED: + self.status.fail({'raw_error_msg': str(exception)}) + return