Skip to content

Commit

Permalink
Split files
Browse files Browse the repository at this point in the history
  • Loading branch information
syou6162 committed Sep 17, 2023
1 parent b2a0433 commit b9b2a31
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 77 deletions.
94 changes: 94 additions & 0 deletions src/dbt_osmosis/core/column_level_knowledge_propagator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from typing import (
Any,
Dict,
List,
Optional,
)

from dbt_osmosis.vendored.dbt_core_interface.project import (
ManifestNode,
)


ColumnLevelKnowledge = Dict[str, Any]
Knowledge = Dict[str, ColumnLevelKnowledge]


def _build_node_ancestor_tree(
manifest: ManifestNode,
node: ManifestNode,
family_tree: Optional[Dict[str, List[str]]] = None,
members_found: Optional[List[str]] = None,
depth: int = 0,
) -> Dict[str, List[str]]:
"""Recursively build dictionary of parents in generational order"""
if family_tree is None:
family_tree = {}
if members_found is None:
members_found = []
if not hasattr(node, "depends_on"):
return family_tree
for parent in getattr(node.depends_on, "nodes", []):
member = manifest.nodes.get(parent, manifest.sources.get(parent))
if member and parent not in members_found:
family_tree.setdefault(f"generation_{depth}", []).append(parent)
members_found.append(parent)
# Recursion
family_tree = _build_node_ancestor_tree(
manifest, member, family_tree, members_found, depth + 1
)
return family_tree


def _inherit_column_level_knowledge(
manifest: ManifestNode,
family_tree: Dict[str, Any],
placeholders: List[str],
) -> Knowledge:
"""Inherit knowledge from ancestors in reverse insertion order to ensure that the most
recent ancestor is always the one to inherit from
"""
knowledge: Knowledge = {}
for generation in reversed(family_tree):
for ancestor in family_tree[generation]:
member: ManifestNode = manifest.nodes.get(ancestor, manifest.sources.get(ancestor))
if not member:
continue
for name, info in member.columns.items():
knowledge_default = {"progenitor": ancestor, "generation": generation}
knowledge.setdefault(name, knowledge_default)
deserialized_info = info.to_dict()
# Handle Info:
# 1. tags are additive
# 2. descriptions are overriden
# 3. meta is merged
# 4. tests are ignored until I am convinced those shouldn't be
# hand curated with love
if deserialized_info["description"] in placeholders:
deserialized_info.pop("description", None)
deserialized_info["tags"] = list(
set(deserialized_info.pop("tags", []) + knowledge[name].get("tags", []))
)
if not deserialized_info["tags"]:
deserialized_info.pop("tags") # poppin' tags like Macklemore
deserialized_info["meta"] = {
**knowledge[name].get("meta", {}),
**deserialized_info["meta"],
}
if not deserialized_info["meta"]:
deserialized_info.pop("meta")
knowledge[name].update(deserialized_info)
return knowledge


class ColumnLevelKnowledgePropagator:
@staticmethod
def get_node_columns_with_inherited_knowledge(
manifest: ManifestNode,
node: ManifestNode,
placeholders: List[str],
) -> Knowledge:
"""Build a knowledgebase for the model based on iterating through ancestors"""
family_tree = _build_node_ancestor_tree(manifest, node)
knowledge = _inherit_column_level_knowledge(manifest, family_tree, placeholders)
return knowledge
81 changes: 4 additions & 77 deletions src/dbt_osmosis/core/osmosis.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
MissingOsmosisConfig,
)
from dbt_osmosis.core.log_controller import logger
from dbt_osmosis.core.column_level_knowledge_propagator import ColumnLevelKnowledgePropagator
from dbt_osmosis.vendored.dbt_core_interface.project import (
ColumnInfo,
DbtProject,
Expand Down Expand Up @@ -762,82 +763,6 @@ def pretty_print_restructure_plan(blueprint: Dict[Path, SchemaFileMigration]) ->
)
)

@staticmethod
def build_node_ancestor_tree(
node: ManifestNode,
manifest: ManifestNode,
family_tree: Optional[Dict[str, List[str]]] = None,
members_found: Optional[List[str]] = None,
depth: int = 0,
) -> Dict[str, List[str]]:
"""Recursively build dictionary of parents in generational order"""
if family_tree is None:
family_tree = {}
if members_found is None:
members_found = []
if not hasattr(node, "depends_on"):
return family_tree
for parent in getattr(node.depends_on, "nodes", []):
member = manifest.nodes.get(parent, manifest.sources.get(parent))
if member and parent not in members_found:
family_tree.setdefault(f"generation_{depth}", []).append(parent)
members_found.append(parent)
# Recursion
family_tree = DbtYamlManager.build_node_ancestor_tree(
member, manifest, family_tree, members_found, depth + 1
)
return family_tree

def inherit_column_level_knowledge(
self,
family_tree: Dict[str, Any],
) -> Dict[str, Dict[str, Any]]:
"""Inherit knowledge from ancestors in reverse insertion order to ensure that the most
recent ancestor is always the one to inherit from
"""
knowledge: Dict[str, Dict[str, Any]] = {}
for generation in reversed(family_tree):
for ancestor in family_tree[generation]:
member: ManifestNode = self.manifest.nodes.get(
ancestor, self.manifest.sources.get(ancestor)
)
if not member:
continue
for name, info in member.columns.items():
knowledge_default = {"progenitor": ancestor, "generation": generation}
knowledge.setdefault(name, knowledge_default)
deserialized_info = info.to_dict()
# Handle Info:
# 1. tags are additive
# 2. descriptions are overriden
# 3. meta is merged
# 4. tests are ignored until I am convinced those shouldn't be
# hand curated with love
if deserialized_info["description"] in self.placeholders:
deserialized_info.pop("description", None)
deserialized_info["tags"] = list(
set(deserialized_info.pop("tags", []) + knowledge[name].get("tags", []))
)
if not deserialized_info["tags"]:
deserialized_info.pop("tags") # poppin' tags like Macklemore
deserialized_info["meta"] = {
**knowledge[name].get("meta", {}),
**deserialized_info["meta"],
}
if not deserialized_info["meta"]:
deserialized_info.pop("meta")
knowledge[name].update(deserialized_info)
return knowledge

def get_node_columns_with_inherited_knowledge(
self,
node: ManifestNode,
) -> Dict[str, Dict[str, Any]]:
"""Build a knowledgebase for the model based on iterating through ancestors"""
family_tree = self.build_node_ancestor_tree(node, self.manifest)
knowledge = self.inherit_column_level_knowledge(family_tree)
return knowledge

@staticmethod
def get_column_sets(
database_columns: Iterable[str],
Expand Down Expand Up @@ -1065,7 +990,9 @@ def update_undocumented_columns_with_prior_knowledge(
) -> int:
"""Update undocumented columns with prior knowledge in node and model simultaneously
THIS MUTATES THE NODE AND MODEL OBJECTS so that state is always accurate"""
knowledge = self.get_node_columns_with_inherited_knowledge(node)
knowledge = ColumnLevelKnowledgePropagator.get_node_columns_with_inherited_knowledge(
self.manifest, node, self.placeholders
)

inheritables = ["description"]
if not self.skip_add_tags:
Expand Down

0 comments on commit b9b2a31

Please sign in to comment.