From 73ab93898633a7d50c1cc7148351e5d13e6c0cd2 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 2 Oct 2023 13:33:40 -0400 Subject: [PATCH 1/3] Add documentation on background updates. --- docs/development/database_schema.md | 61 +++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/docs/development/database_schema.md b/docs/development/database_schema.md index 675080ae1b79..29ceae37cdc9 100644 --- a/docs/development/database_schema.md +++ b/docs/development/database_schema.md @@ -150,6 +150,67 @@ def run_upgrade( ... ``` +## Background updates + +It is sometimes appropriate to perform database migrations as part of a background +process (instead of blocking Synapse until the migration is done). In particular, +this is useful for migrating data when adding new columns or tables. + +Pending background updates stored in the `background_updates` table and are denoted +by a unique name, the current status (stored in JSON), and some dependency information: + +* Whether the update requires a previous update to be complete. +* A rough ordering for which to complete updates. + +A new background updates needs to be added to the `background_updates` table: + +```sql +INSERT INTO background_updates (ordering, update_name, depends_on, progress_json) VALUES + (7706, 'my_background_update', 'a_previous_background_update' '{}'); +``` + +And then needs an associated handler in the appropriate datastore: + +```python +self.db_pool.updates.register_background_update_handler( + "my_background_update", + update_handler=self._my_background_update, +) +``` + +There are a few types of updates that can be performed, see the `BackgroundUpdater`: + +* `register_background_update_handler`: A generic handler for custom SQL +* `register_background_index_update`: Create an index in the background +* `register_background_validate_constraint`: Validate a constraint in the background + (PostgreSQL-only) +* `register_background_validate_constraint_and_delete_rows`: Similar to + `register_background_validate_constraint`, but deletes rows which don't fit + the constraint. + +For `register_background_update_handler`, the generic handler must track progress +and then finalize the background update: + +```python +async def _my_background_update(self, progress: JsonDict, batch_size: int) -> int: + def _do_something(txn: LoggingTransaction) -> int: + ... + self.db_pool.updates._background_update_progress_txn( + txn, "my_background_update", {"last_processed": last_processed} + ) + return last_processed - prev_last_processed + + num_processed = await self.db_pool.runInteraction("_do_something", _do_something) + await self.db_pool.updates._end_background_update("my_background_update") + + return num_processed +``` + +Synapse will attempt to rate-limit how often background updates are run via the +given batch-size and the returned number of processed entries (and how long the +function took to run). See +[background update controller callbacks](modules/background_update_controller_callbacks.md). + ## Boolean columns Boolean columns require special treatment, since SQLite treats booleans the From 89a6cfa9fbfc3420e75f6ba200b488552e779ba6 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 2 Oct 2023 13:34:20 -0400 Subject: [PATCH 2/3] Newsfragment --- changelog.d/16420.doc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/16420.doc diff --git a/changelog.d/16420.doc b/changelog.d/16420.doc new file mode 100644 index 000000000000..1c0c6b957738 --- /dev/null +++ b/changelog.d/16420.doc @@ -0,0 +1 @@ +Document internal background update mechanism. From 777420628c2a70e1adba55c20c89c778832d622f Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 2 Oct 2023 13:38:00 -0400 Subject: [PATCH 3/3] Fix-up links. --- docs/development/database_schema.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/development/database_schema.md b/docs/development/database_schema.md index 29ceae37cdc9..37a06acc1282 100644 --- a/docs/development/database_schema.md +++ b/docs/development/database_schema.md @@ -209,7 +209,7 @@ async def _my_background_update(self, progress: JsonDict, batch_size: int) -> in Synapse will attempt to rate-limit how often background updates are run via the given batch-size and the returned number of processed entries (and how long the function took to run). See -[background update controller callbacks](modules/background_update_controller_callbacks.md). +[background update controller callbacks](../modules/background_update_controller_callbacks.md). ## Boolean columns