Skip to content

Commit

Permalink
fix: Issues when importing a large taxonomy (#189)
Browse files Browse the repository at this point in the history
* feat: Add elpased time to import taxonomy process
* Refactor execute() of TagImportPlan() to avoid save logs in every action. This causes slow execution and memory overflow.
  • Loading branch information
ChrisChV authored May 17, 2024
1 parent 8ad5329 commit d229afb
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 21 deletions.
2 changes: 1 addition & 1 deletion openedx_learning/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""
Open edX Learning ("Learning Core").
"""
__version__ = "0.9.3"
__version__ = "0.9.4"
12 changes: 4 additions & 8 deletions openedx_tagging/core/tagging/import_export/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,16 +244,13 @@ def execute(self) -> None:
"""
Creates a Tag
"""
parent = None
if self.tag.parent_id:
parent = self.taxonomy.tag_set.get(external_id=self.tag.parent_id)
taxonomy_tag = Tag(
Tag.objects.create(
taxonomy=self.taxonomy,
parent=parent,
parent=self.taxonomy.tag_set.get(external_id=self.tag.parent_id)
if self.tag.parent_id is not None else None,
value=self.tag.value,
external_id=self.tag.id,
)
taxonomy_tag.save()


class UpdateParentTag(ImportAction):
Expand Down Expand Up @@ -409,8 +406,7 @@ def execute(self) -> None:
Delete a tag
"""
try:
taxonomy_tag = self._get_tag()
taxonomy_tag.delete()
self._get_tag().delete()
except Tag.DoesNotExist:
pass # The tag may be already cascade deleted if the parent tag was deleted

Expand Down
38 changes: 34 additions & 4 deletions openedx_tagging/core/tagging/import_export/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
"""
from __future__ import annotations

import time
from typing import BinaryIO

from django.utils.translation import gettext as _
Expand Down Expand Up @@ -77,6 +78,7 @@ def import_tags(
Set `plan_only` to True to only generate the actions and not execute them.
"""
global_start_time = time.time()
_import_validations(taxonomy)

# Checks that exists only one task import in progress at a time per taxonomy
Expand All @@ -92,7 +94,10 @@ def import_tags(
task = TagImportTask.create(taxonomy)

try:
# Start of parsing

# Get the parser and parse the file
start_time = time.time()
task.log_parser_start()
parser = get_parser(parser_format)
tags, errors = parser.parse_import(file)
Expand All @@ -102,23 +107,48 @@ def import_tags(
task.handle_parser_errors(errors)
return False, task, None

task.log_parser_end()
end_time = time.time()
elapsed_time = end_time - start_time
elapsed_time = round(elapsed_time, 5)
task.log_parser_end(elapsed_time)

# End of parsing

# Start of generate actions

start_time = time.time()

# Generate actions
task.log_start_planning()
tag_import_plan = TagImportPlan(taxonomy)
tag_import_plan.generate_actions(tags, replace)
task.log_plan(tag_import_plan)

end_time = time.time()
elapsed_time = end_time - start_time
elapsed_time = round(elapsed_time, 5)
task.log_plan(tag_import_plan, elapsed_time)

# End of generate actions

if tag_import_plan.errors:
task.handle_plan_errors()
return False, task, tag_import_plan

if not plan_only:
# Start of execute
start_time = time.time()
task.log_start_execute()
tag_import_plan.execute(task)
end_time = time.time()
elapsed_time = end_time - start_time
elapsed_time = round(elapsed_time, 5)
task.log_end_execute(elapsed_time)
# End of execute

global_end_time = time.time()
global_elapsed_time = global_end_time - global_start_time
global_elapsed_time = round(global_elapsed_time, 5)

task.end_success()
task.end_success(global_elapsed_time)

return True, task, tag_import_plan
except Exception as exception:
Expand Down
8 changes: 6 additions & 2 deletions openedx_tagging/core/tagging/import_export/import_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,12 @@ def execute(self, task: TagImportTask | None = None):
if self.errors:
return
for action in self.actions:
# Avoid to save each log because it is slow and costs a lot in memory
# It is necessary to save at the end.
if task:
task.add_log(f"#{action.index}: {str(action)} [Started]")
task.add_log(f"#{action.index}: {str(action)} [Started]", save=False)
action.execute()
if task:
task.add_log("Success")
task.add_log("Success", save=False)
if task:
task.save()
15 changes: 9 additions & 6 deletions openedx_tagging/core/tagging/models/import_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,11 @@ def log_parser_start(self):
"""
self.add_log(_("Starting to load data from file"))

def log_parser_end(self):
def log_parser_end(self, elapsed_time):
"""
Logs the parser finished event.
"""
self.add_log(_("Load data finished"))
self.add_log(_("Load data finished. Time elapsed: ") + str(elapsed_time) + _(" seconds"))

def handle_parser_errors(self, errors):
"""
Expand All @@ -113,11 +113,11 @@ def log_start_planning(self):
self.status = TagImportTaskState.PLANNING.value
self.save()

def log_plan(self, plan):
def log_plan(self, plan, elapsed_time):
"""
Logs the task plan.
"""
self.add_log(_("Plan finished"))
self.add_log(_("Plan finished. Time elapsed: ") + str(elapsed_time) + _(" seconds"))
plan_str = plan.plan()
self.log += f"\n{plan_str}\n"
self.save()
Expand All @@ -138,10 +138,13 @@ def log_start_execute(self):
self.status = TagImportTaskState.EXECUTING.value
self.save()

def end_success(self):
def log_end_execute(self, elapsed_time):
self.add_log(_("Execute actions finished. Time elapsed: ") + str(elapsed_time) + _(" seconds"))

def end_success(self, elapsed_time):
"""
Completes task execution with a log message, and moves the task status to SUCCESS.
"""
self.add_log(_("Execution finished"), save=False)
self.add_log(_("Execution finished. Total time elapsed: ") + str(elapsed_time) + _("seconds"), save=False)
self.status = TagImportTaskState.SUCCESS.value
self.save()

0 comments on commit d229afb

Please sign in to comment.