Skip to content

Commit

Permalink
fix: reindex_studio was crashing if instance had too many courses (#3…
Browse files Browse the repository at this point in the history
  • Loading branch information
bradenmacdonald authored Jun 7, 2024
1 parent 9bc0f85 commit 98689ab
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 29 deletions.
59 changes: 30 additions & 29 deletions openedx/core/djangoapps/content/search/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from django.conf import settings
from django.contrib.auth import get_user_model
from django.core.cache import cache
from django.core.paginator import Paginator
from meilisearch import Client as MeilisearchClient
from meilisearch.errors import MeilisearchError
from meilisearch.models.task import TaskInfo
Expand All @@ -21,10 +22,9 @@
from common.djangoapps.student.roles import GlobalStaff
from rest_framework.request import Request
from common.djangoapps.student.role_helpers import get_course_roles
from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
from openedx.core.djangoapps.content.search.models import get_access_ids_for_request

from openedx.core.djangoapps.content_libraries import api as lib_api
from xmodule.modulestore import ModuleStoreEnum
from xmodule.modulestore.django import modulestore

from .documents import (
Expand Down Expand Up @@ -292,9 +292,7 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:

# Get the list of courses
status_cb("Counting courses...")
with store.branch_setting(ModuleStoreEnum.Branch.draft_preferred):
all_courses = store.get_courses()
num_courses = len(all_courses)
num_courses = CourseOverview.objects.count()

# Some counters so we can track our progress as indexing progresses:
num_contexts = num_courses + num_libraries
Expand Down Expand Up @@ -358,30 +356,33 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:

############## Courses ##############
status_cb("Indexing courses...")
for course in all_courses:
status_cb(
f"{num_contexts_done + 1}/{num_contexts}. Now indexing course {course.display_name} ({course.id})"
)
docs = []

# Pre-fetch the course with all of its children:
course = store.get_course(course.id, depth=None)

def add_with_children(block):
""" Recursively index the given XBlock/component """
doc = searchable_doc_for_course_block(block)
doc.update(searchable_doc_tags(block.usage_key))
docs.append(doc) # pylint: disable=cell-var-from-loop
_recurse_children(block, add_with_children) # pylint: disable=cell-var-from-loop

# Index course children
_recurse_children(course, add_with_children)

if docs:
# Add all the docs in this course at once (usually faster than adding one at a time):
_wait_for_meili_task(client.index(temp_index_name).add_documents(docs))
num_contexts_done += 1
num_blocks_done += len(docs)
# To reduce memory usage on large instances, split up the CourseOverviews into pages of 1,000 courses:
paginator = Paginator(CourseOverview.objects.only('id', 'display_name'), 1000)
for p in paginator.page_range:
for course in paginator.page(p).object_list:
status_cb(
f"{num_contexts_done + 1}/{num_contexts}. Now indexing course {course.display_name} ({course.id})"
)
docs = []

# Pre-fetch the course with all of its children:
course = store.get_course(course.id, depth=None)

def add_with_children(block):
""" Recursively index the given XBlock/component """
doc = searchable_doc_for_course_block(block)
doc.update(searchable_doc_tags(block.usage_key))
docs.append(doc) # pylint: disable=cell-var-from-loop
_recurse_children(block, add_with_children) # pylint: disable=cell-var-from-loop

# Index course children
_recurse_children(course, add_with_children)

if docs:
# Add all the docs in this course at once (usually faster than adding one at a time):
_wait_for_meili_task(client.index(temp_index_name).add_documents(docs))
num_contexts_done += 1
num_blocks_done += len(docs)

status_cb(f"Done! {num_blocks_done} blocks indexed across {num_contexts_done} courses and libraries.")

Expand Down
3 changes: 3 additions & 0 deletions openedx/core/djangoapps/content/search/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from common.djangoapps.student.tests.factories import UserFactory
from openedx.core.djangoapps.content_libraries import api as library_api
from openedx.core.djangoapps.content_tagging import api as tagging_api
from openedx.core.djangoapps.content.course_overviews.api import CourseOverview
from openedx.core.djangolib.testing.utils import skip_unless_cms
from xmodule.modulestore.tests.django_utils import TEST_DATA_SPLIT_MODULESTORE, ModuleStoreTestCase

Expand Down Expand Up @@ -106,6 +107,8 @@ def setUp(self):
"content": {},
"access_id": course_access.id,
}
# Make sure the CourseOverview for the course is created:
CourseOverview.get_from_id(self.course.id)

# Create a content library:
self.library = library_api.create_library(
Expand Down

0 comments on commit 98689ab

Please sign in to comment.