diff --git a/changelog.d/17962.misc b/changelog.d/17962.misc new file mode 100644 index 00000000000..adf63487079 --- /dev/null +++ b/changelog.d/17962.misc @@ -0,0 +1 @@ +Fix new scheduled tasks jumping the queue. diff --git a/synapse/replication/tcp/commands.py b/synapse/replication/tcp/commands.py index 7d51441e914..6ab53566600 100644 --- a/synapse/replication/tcp/commands.py +++ b/synapse/replication/tcp/commands.py @@ -495,7 +495,7 @@ def to_line(self) -> str: class NewActiveTaskCommand(_SimpleCommand): - """Sent to inform instance handling background tasks that a new active task is available to run. + """Sent to inform instance handling background tasks that a new task is ready to run. Format:: diff --git a/synapse/replication/tcp/handler.py b/synapse/replication/tcp/handler.py index 61012269380..1fafbb48c3e 100644 --- a/synapse/replication/tcp/handler.py +++ b/synapse/replication/tcp/handler.py @@ -727,7 +727,7 @@ def on_NEW_ACTIVE_TASK( ) -> None: """Called when get a new NEW_ACTIVE_TASK command.""" if self._task_scheduler: - self._task_scheduler.launch_task_by_id(cmd.data) + self._task_scheduler.on_new_task(cmd.data) def new_connection(self, connection: IReplicationConnection) -> None: """Called when we have a new connection.""" diff --git a/synapse/util/task_scheduler.py b/synapse/util/task_scheduler.py index 448960b2978..14ce2699ef6 100644 --- a/synapse/util/task_scheduler.py +++ b/synapse/util/task_scheduler.py @@ -174,9 +174,10 @@ async def schedule_task( The id of the scheduled task """ status = TaskStatus.SCHEDULED + start_now = False if timestamp is None or timestamp < self._clock.time_msec(): timestamp = self._clock.time_msec() - status = TaskStatus.ACTIVE + start_now = True task = ScheduledTask( random_string(16), @@ -190,9 +191,11 @@ async def schedule_task( ) await self._store.insert_scheduled_task(task) - if status == TaskStatus.ACTIVE: + # If the task is ready to run immediately, run the scheduling algorithm now + # rather than waiting + if start_now: if self._run_background_tasks: - await self._launch_task(task) + self._launch_scheduled_tasks() else: self._hs.get_replication_command_handler().send_new_active_task(task.id) @@ -300,23 +303,14 @@ async def delete_task(self, id: str) -> None: raise Exception(f"Task {id} is currently ACTIVE and can't be deleted") await self._store.delete_scheduled_task(id) - def launch_task_by_id(self, id: str) -> None: - """Try launching the task with the given ID.""" - # Don't bother trying to launch new tasks if we're already at capacity. - if len(self._running_tasks) >= TaskScheduler.MAX_CONCURRENT_RUNNING_TASKS: - return - - run_as_background_process("launch_task_by_id", self._launch_task_by_id, id) - - async def _launch_task_by_id(self, id: str) -> None: - """Helper async function for `launch_task_by_id`.""" - task = await self.get_task(id) - if task: - await self._launch_task(task) + def on_new_task(self, task_id: str) -> None: + """Handle a notification that a new ready-to-run task has been added to the queue""" + # Just run the scheduler + self._launch_scheduled_tasks() @wrap_as_background_process("launch_scheduled_tasks") async def _launch_scheduled_tasks(self) -> None: - """Retrieve and launch scheduled tasks that should be running at that time.""" + """Retrieve and launch scheduled tasks that should be running.""" # Don't bother trying to launch new tasks if we're already at capacity. if len(self._running_tasks) >= TaskScheduler.MAX_CONCURRENT_RUNNING_TASKS: return @@ -330,6 +324,8 @@ async def _launch_scheduled_tasks(self) -> None: for task in await self.get_tasks( statuses=[TaskStatus.ACTIVE], limit=self.MAX_CONCURRENT_RUNNING_TASKS ): + # _launch_task will ignore tasks that we're already running, and + # will also do nothing if we're already at the maximum capacity. await self._launch_task(task) for task in await self.get_tasks( statuses=[TaskStatus.SCHEDULED], diff --git a/tests/util/test_task_scheduler.py b/tests/util/test_task_scheduler.py index 30f0510c9f2..9e403b948be 100644 --- a/tests/util/test_task_scheduler.py +++ b/tests/util/test_task_scheduler.py @@ -18,8 +18,7 @@ # [This file includes modifications made by New Vector Limited] # # - -from typing import Optional, Tuple +from typing import List, Optional, Tuple from twisted.internet.task import deferLater from twisted.test.proto_helpers import MemoryReactor @@ -104,33 +103,43 @@ def test_schedule_lot_of_tasks(self) -> None: ) ) - # This is to give the time to the active tasks to finish + def get_tasks_of_status(status: TaskStatus) -> List[ScheduledTask]: + tasks = ( + self.get_success(self.task_scheduler.get_task(task_id)) + for task_id in task_ids + ) + return [t for t in tasks if t is not None and t.status == status] + + # At this point, there should be MAX_CONCURRENT_RUNNING_TASKS active tasks and + # one scheduled task. + self.assertEquals( + len(get_tasks_of_status(TaskStatus.ACTIVE)), + TaskScheduler.MAX_CONCURRENT_RUNNING_TASKS, + ) + self.assertEquals( + len(get_tasks_of_status(TaskStatus.SCHEDULED)), + 1, + ) + + # Give the time to the active tasks to finish self.reactor.advance(1) - # Check that only MAX_CONCURRENT_RUNNING_TASKS tasks has run and that one + # Check that MAX_CONCURRENT_RUNNING_TASKS tasks have run and that one # is still scheduled. - tasks = [ - self.get_success(self.task_scheduler.get_task(task_id)) - for task_id in task_ids - ] - self.assertEquals( - len( - [t for t in tasks if t is not None and t.status == TaskStatus.COMPLETE] - ), + len(get_tasks_of_status(TaskStatus.COMPLETE)), TaskScheduler.MAX_CONCURRENT_RUNNING_TASKS, ) - - scheduled_tasks = [ - t for t in tasks if t is not None and t.status == TaskStatus.ACTIVE - ] + scheduled_tasks = get_tasks_of_status(TaskStatus.SCHEDULED) self.assertEquals(len(scheduled_tasks), 1) - # We need to wait for the next run of the scheduler loop - self.reactor.advance((TaskScheduler.SCHEDULE_INTERVAL_MS / 1000)) - self.reactor.advance(1) + # The scheduled task should start 0.1s after the first of the active tasks + # finishes + self.reactor.advance(0.1) + self.assertEquals(len(get_tasks_of_status(TaskStatus.ACTIVE)), 1) - # Check that the last task has been properly executed after the next scheduler loop run + # ... and should finally complete after another second + self.reactor.advance(1) prev_scheduled_task = self.get_success( self.task_scheduler.get_task(scheduled_tasks[0].id) )