From cf100560e7712dcc387e286057ffeeccb23a944e Mon Sep 17 00:00:00 2001 From: Mike Alfare <13974384+mikealfare@users.noreply.github.com> Date: Thu, 25 Apr 2024 18:26:46 -0400 Subject: [PATCH] [Bug] Use a list instead of a set for index changes to perserve order (#73) Co-authored-by: Colin Rogers <111200756+colin-rogers-dbt@users.noreply.github.com> --- .../unreleased/Fixes-20240425-133401.yaml | 6 ++ dbt/adapters/postgres/relation.py | 20 ++++--- .../relation_configs/materialized_view.py | 4 +- tests/unit/test_materialized_view.py | 60 +++++++++++++++++++ 4 files changed, 80 insertions(+), 10 deletions(-) create mode 100644 .changes/unreleased/Fixes-20240425-133401.yaml create mode 100644 tests/unit/test_materialized_view.py diff --git a/.changes/unreleased/Fixes-20240425-133401.yaml b/.changes/unreleased/Fixes-20240425-133401.yaml new file mode 100644 index 00000000..cb6d14da --- /dev/null +++ b/.changes/unreleased/Fixes-20240425-133401.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Replace usage of `Set` with `List` to fix issue with index updates intermittently happening out of order +time: 2024-04-25T13:34:01.018399-04:00 +custom: + Author: mikealfare + Issue: "72" diff --git a/dbt/adapters/postgres/relation.py b/dbt/adapters/postgres/relation.py index e546eab4..e8128c46 100644 --- a/dbt/adapters/postgres/relation.py +++ b/dbt/adapters/postgres/relation.py @@ -1,5 +1,5 @@ from dataclasses import dataclass, field -from typing import FrozenSet, Optional, Set +from typing import FrozenSet, List, Optional from dbt.adapters.base.relation import BaseRelation from dbt.adapters.contracts.relation import RelationConfig, RelationType @@ -79,7 +79,7 @@ def _get_index_config_changes( self, existing_indexes: FrozenSet[PostgresIndexConfig], new_indexes: FrozenSet[PostgresIndexConfig], - ) -> Set[PostgresIndexConfigChange]: + ) -> List[PostgresIndexConfigChange]: """ Get the index updates that will occur as a result of a new run @@ -90,18 +90,22 @@ def _get_index_config_changes( 3. Index is old -> drop these 4. Indexes are not equal -> drop old, create new -> two actions - Returns: a set of index updates in the form {"action": "drop/create", "context": } + *Note:* + The order of the operations matters here because if the same index is dropped and recreated + (e.g. via --full-refresh) then we need to drop it first, then create it. + + Returns: an ordered list of index updates in the form {"action": "drop/create", "context": } """ - drop_changes = set( + drop_changes = [ PostgresIndexConfigChange.from_dict( {"action": RelationConfigChangeAction.drop, "context": index} ) for index in existing_indexes.difference(new_indexes) - ) - create_changes = set( + ] + create_changes = [ PostgresIndexConfigChange.from_dict( {"action": RelationConfigChangeAction.create, "context": index} ) for index in new_indexes.difference(existing_indexes) - ) - return set().union(drop_changes, create_changes) # type: ignore + ] + return drop_changes + create_changes diff --git a/dbt/adapters/postgres/relation_configs/materialized_view.py b/dbt/adapters/postgres/relation_configs/materialized_view.py index 3563833e..8eaccbbf 100644 --- a/dbt/adapters/postgres/relation_configs/materialized_view.py +++ b/dbt/adapters/postgres/relation_configs/materialized_view.py @@ -101,7 +101,7 @@ def parse_relation_results(cls, relation_results: RelationResults) -> dict: @dataclass class PostgresMaterializedViewConfigChangeCollection: - indexes: Set[PostgresIndexConfigChange] = field(default_factory=set) + indexes: List[PostgresIndexConfigChange] = field(default_factory=list) @property def requires_full_refresh(self) -> bool: @@ -109,4 +109,4 @@ def requires_full_refresh(self) -> bool: @property def has_changes(self) -> bool: - return self.indexes != set() + return self.indexes != [] diff --git a/tests/unit/test_materialized_view.py b/tests/unit/test_materialized_view.py new file mode 100644 index 00000000..dc4f822b --- /dev/null +++ b/tests/unit/test_materialized_view.py @@ -0,0 +1,60 @@ +from copy import deepcopy + +from dbt.adapters.contracts.relation import RelationType +from dbt.adapters.relation_configs.config_change import RelationConfigChangeAction + +from dbt.adapters.postgres.relation import PostgresRelation +from dbt.adapters.postgres.relation_configs import PostgresIndexConfig + + +def test_index_config_changes(): + index_0_old = { + "name": "my_index_0", + "column_names": {"column_0"}, + "unique": True, + "method": "btree", + } + index_1_old = { + "name": "my_index_1", + "column_names": {"column_1"}, + "unique": True, + "method": "btree", + } + index_2_old = { + "name": "my_index_2", + "column_names": {"column_2"}, + "unique": True, + "method": "btree", + } + existing_indexes = frozenset( + PostgresIndexConfig.from_dict(index) for index in [index_0_old, index_1_old, index_2_old] + ) + + index_0_new = deepcopy(index_0_old) + index_2_new = deepcopy(index_2_old) + index_2_new.update(method="hash") + index_3_new = { + "name": "my_index_3", + "column_names": {"column_3"}, + "unique": True, + "method": "hash", + } + new_indexes = frozenset( + PostgresIndexConfig.from_dict(index) for index in [index_0_new, index_2_new, index_3_new] + ) + + relation = PostgresRelation.create( + database="my_database", + schema="my_schema", + identifier="my_materialized_view", + type=RelationType.MaterializedView, + ) + + index_changes = relation._get_index_config_changes(existing_indexes, new_indexes) + + assert isinstance(index_changes, list) + assert len(index_changes) == len(["drop 1", "drop 2", "create 2", "create 3"]) + assert index_changes[0].action == RelationConfigChangeAction.drop + assert index_changes[1].action == RelationConfigChangeAction.drop + assert index_changes[2].action == RelationConfigChangeAction.create + assert index_changes[3].action == RelationConfigChangeAction.create