Skip to content

Commit

Permalink
Add "object" mergebehavior to MergeBehavior processing (#192)
Browse files Browse the repository at this point in the history
* Add MergeBehavior.Object

* change name to snapshot_meta_column_names

* Update empty dictionary in merge_config_dicts

* Fix test

* Changie
  • Loading branch information
gshank authored Sep 18, 2024
1 parent e5dff92 commit 16b683e
Show file tree
Hide file tree
Showing 3 changed files with 185 additions and 19 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20240918-114903.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Add MergeBehavior.Object for snapshot column feature
time: 2024-09-18T11:49:03.257411-04:00
custom:
Author: gshank
Issue: "191"
90 changes: 83 additions & 7 deletions dbt_common/contracts/config/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,14 @@ def same_contents(cls, unrendered: Dict[str, Any], other: Dict[str, Any]) -> boo
return False
return True

# This is used in 'add_config_call' to create the combined config_call_dict.
# 'meta' moved here from node
# This is used in 'merge_config_dicts' to create the combined orig_dict.
# Note: "clobber" fields aren't defined, because that's the default.
# "access" is currently the only Clobber field.
# This shouldn't really be defined here. It would be better to have it
# associated with the config definitions, but at the point we use it, we
# don't know which config we're dealing with.
mergebehavior = {
"append": ["pre-hook", "pre_hook", "post-hook", "post_hook", "tags"],
"append": ["pre-hook", "pre_hook", "post-hook", "post_hook", "tags", "packages"],
"update": [
"quoting",
"column_types",
Expand All @@ -108,6 +112,7 @@ def same_contents(cls, unrendered: Dict[str, Any], other: Dict[str, Any]) -> boo
"contract",
],
"dict_key_append": ["grants"],
"object": ["snapshot_meta_column_names"],
}

@classmethod
Expand Down Expand Up @@ -180,6 +185,7 @@ class MergeBehavior(Metadata):
Update = 2
Clobber = 3
DictKeyAppend = 4
Object = 5

@classmethod
def default_field(cls) -> "MergeBehavior":
Expand Down Expand Up @@ -215,8 +221,10 @@ def _listify(value: Any) -> List[Any]:


# There are two versions of this code. The one here is for config
# objects, the one in _add_config_call in core context_config.py is for
# config_call_dict dictionaries.
# objects which can get the "MergeBehavior" from the field in the class,
# the one below in 'merge_config_dicts' (formerly in
# _add_config_call in core context_config.py) is for config_call dictionaries
# where we need to get the MergeBehavior from someplace else.
def _merge_field_value(
merge_behavior: MergeBehavior,
self_value: Any,
Expand All @@ -225,7 +233,8 @@ def _merge_field_value(
if merge_behavior == MergeBehavior.Clobber:
return other_value
elif merge_behavior == MergeBehavior.Append:
return _listify(self_value) + _listify(other_value)
new_value = _listify(self_value) + _listify(other_value)
return new_value
elif merge_behavior == MergeBehavior.Update:
if not isinstance(self_value, dict):
raise DbtInternalError(f"expected dict, got {self_value}")
Expand Down Expand Up @@ -258,6 +267,73 @@ def _merge_field_value(
# clobber the list
new_dict[new_key] = _listify(other_value[key])
return new_dict

elif merge_behavior == MergeBehavior.Object:
# All fields in classes with MergeBehavior.Object should have a default of None
if not type(self_value).__name__ == type(other_value).__name__:
raise DbtInternalError(
f"got conflicting types: {type(self_value).__name__} and {type(other_value).__name__}"
)
new_value = self_value.copy()
new_value.update(other_value)
return new_value
else:
raise DbtInternalError(f"Got an invalid merge_behavior: {merge_behavior}")


# This is used in ContextConfig._add_config_call. It updates the orig_dict in place.
def merge_config_dicts(orig_dict: Dict[str, Any], new_dict: Dict[str, Any]) -> None:
# orig_dict is already encountered configs, new_dict is new
# This mirrors code in _merge_field_value in model_config.py which is similar but
# operates on config objects.
if orig_dict == {}:
orig_dict.update(new_dict)
return
for k, v in new_dict.items():
# MergeBehavior for post-hook and pre-hook is to collect all
# values, instead of overwriting
if k in BaseConfig.mergebehavior["append"]:
if k in orig_dict: # should always be a list here
orig_dict[k] = _listify(orig_dict[k]) + _listify(v)
else:
orig_dict[k] = _listify(v)
elif k in BaseConfig.mergebehavior["update"]:
if not isinstance(v, dict):
raise DbtInternalError(f"expected dict, got {v}")
if k in orig_dict and isinstance(orig_dict[k], dict):
orig_dict[k].update(v)
else:
orig_dict[k] = v
elif k in BaseConfig.mergebehavior["dict_key_append"]:
if not isinstance(v, dict):
raise DbtInternalError(f"expected dict, got {v}")
if k in orig_dict: # should always be a dict
for key in orig_dict[k].keys():
orig_dict[k][key] = _listify(orig_dict[k][key])
for key, value in v.items():
extend = False
# This might start with a +, to indicate we should extend the list
# instead of just clobbering it. We don't want to remove the + here
# (like in the other method) because we want it preserved
if key.startswith("+"):
extend = True
if key in orig_dict[k] and extend:
# extend the list
orig_dict[k][key].extend(_listify(value))
else:
# clobber the list
orig_dict[k][key] = _listify(value)
else:
# This is always a dictionary
orig_dict[k] = v
# listify everything
for key, value in orig_dict[k].items():
orig_dict[k][key] = _listify(value)
elif k in BaseConfig.mergebehavior["object"]:
if not isinstance(v, dict):
raise DbtInternalError(f"expected dict, got {v}")
if k not in orig_dict:
orig_dict[k] = {}
for obj_k, obj_v in v.items():
orig_dict[k][obj_k] = obj_v
else: # Clobber
orig_dict[k] = v
108 changes: 96 additions & 12 deletions tests/unit/test_model_config.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,39 @@
from dataclasses import dataclass, field
from dbt_common.dataclass_schema import dbtClassMixin
from typing import List, Dict
from typing import List, Dict, Optional, Any
from dbt_common.contracts.config.metadata import ShowBehavior
from dbt_common.contracts.config.base import MergeBehavior, CompareBehavior
from dbt_common.contracts.config.base import (
MergeBehavior,
CompareBehavior,
BaseConfig,
merge_config_dicts,
)


@dataclass
class ThingWithMergeBehavior(dbtClassMixin):
default_behavior: int
appended: List[str] = field(metadata={"merge": MergeBehavior.Append})
updated: Dict[str, int] = field(metadata={"merge": MergeBehavior.Update})
clobbered: str = field(metadata={"merge": MergeBehavior.Clobber})
keysappended: Dict[str, int] = field(metadata={"merge": MergeBehavior.DictKeyAppend})
class TableColumnNames(dbtClassMixin):
first_column: Optional[str] = None
second_column: Optional[str] = None
third_column: Optional[str] = None


@dataclass
class SubstituteAdapterConfig(BaseConfig):
pass


@dataclass
class ThingWithMergeBehavior(BaseConfig):
default_behavior: Optional[int] = None
tags: List[str] = field(metadata={"merge": MergeBehavior.Append}, default_factory=list)
meta: Dict[str, int] = field(metadata={"merge": MergeBehavior.Update}, default_factory=dict)
clobbered: Optional[str] = field(metadata={"merge": MergeBehavior.Clobber}, default=None)
grants: Dict[str, Any] = field(
metadata={"merge": MergeBehavior.DictKeyAppend}, default_factory=dict
)
snapshot_table_column_names: TableColumnNames = field(
metadata={"merge": MergeBehavior.Object}, default_factory=TableColumnNames
)


def test_merge_behavior_meta() -> None:
Expand All @@ -22,6 +44,7 @@ def test_merge_behavior_meta() -> None:
MergeBehavior.Update,
MergeBehavior.Clobber,
MergeBehavior.DictKeyAppend,
MergeBehavior.Object,
}
for behavior in MergeBehavior:
assert behavior.meta() == {"merge": behavior}
Expand All @@ -31,12 +54,73 @@ def test_merge_behavior_meta() -> None:

def test_merge_behavior_from_field() -> None:
fields2 = {name: f for f, name in ThingWithMergeBehavior._get_fields()}
assert set(fields2) == {"default_behavior", "appended", "updated", "clobbered", "keysappended"}
assert set(fields2) == {
"default_behavior",
"tags",
"meta",
"clobbered",
"grants",
"snapshot_table_column_names",
}
assert MergeBehavior.from_field(fields2["default_behavior"]) == MergeBehavior.Clobber
assert MergeBehavior.from_field(fields2["appended"]) == MergeBehavior.Append
assert MergeBehavior.from_field(fields2["updated"]) == MergeBehavior.Update
assert MergeBehavior.from_field(fields2["tags"]) == MergeBehavior.Append
assert MergeBehavior.from_field(fields2["meta"]) == MergeBehavior.Update
assert MergeBehavior.from_field(fields2["clobbered"]) == MergeBehavior.Clobber
assert MergeBehavior.from_field(fields2["keysappended"]) == MergeBehavior.DictKeyAppend
assert MergeBehavior.from_field(fields2["grants"]) == MergeBehavior.DictKeyAppend
assert MergeBehavior.from_field(fields2["snapshot_table_column_names"]) == MergeBehavior.Object


def test_update_from() -> None:
initial_dct = {
"default_behavior": 4,
"tags": ["one", "two"],
"meta": {"one": 1, "two": 2},
"clobbered": "initial",
"grants": {"one": "alt"},
"snapshot_table_column_names": {"second_column": "dbt_something"},
}
initial_obj = ThingWithMergeBehavior.from_dict(initial_dct.copy())
assert initial_obj
assert isinstance(initial_obj.snapshot_table_column_names, TableColumnNames)
update_dct = {
"default_behavior": 3,
"tags": ["five"],
"meta": {"two": 2, "three": 3},
"clobbered": "later",
"grants": {"two": "fine", "+one": "some"},
"snapshot_table_column_names": {"first_column": "dbt_ack", "second_column": "dbt_more"},
}
updated_obj = initial_obj.update_from(update_dct.copy(), SubstituteAdapterConfig)

assert updated_obj.default_behavior == 3
assert updated_obj.tags == ["one", "two", "five"]
assert updated_obj.meta == {"one": 1, "two": 2, "three": 3}
assert updated_obj.clobbered == "later"
assert updated_obj.grants == {"one": ["alt", "some"], "two": ["fine"]}
assert updated_obj.snapshot_table_column_names.to_dict() == {
"first_column": "dbt_ack",
"second_column": "dbt_more",
"third_column": None,
}
assert updated_obj.snapshot_table_column_names.first_column == "dbt_ack"
assert updated_obj.snapshot_table_column_names.second_column == "dbt_more"
assert updated_obj.snapshot_table_column_names.third_column is None

# If we want to merge both grants, they need to both have a "+"
initial_dct["grants"] = {"+one": "alt"}

merge_config_dicts(initial_dct, update_dct)

expected = {
"default_behavior": 3,
"tags": ["one", "two", "five"],
"meta": {"one": 1, "two": 2, "three": 3},
"clobbered": "later",
"grants": {"+one": ["alt", "some"], "two": ["fine"]},
"snapshot_table_column_names": {"first_column": "dbt_ack", "second_column": "dbt_more"},
}

assert initial_dct == expected


@dataclass
Expand Down

0 comments on commit 16b683e

Please sign in to comment.