Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactoring to make testing easier & adding tests #96

Merged
merged 13 commits into from
Sep 21, 2023
36 changes: 36 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: Tests

on:
- push
- pull_request

jobs:
tests:
name: Run pytest
runs-on: ubuntu-latest
steps:
- name: Check out the repository
uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: "3.10"

- name: Upgrade pip
run: |
pip install --constraint=.github/workflows/constraints.txt pip
pip --version

- name: Install Poetry
run: |
pip install --constraint=.github/workflows/constraints.txt poetry
poetry --version

- name: Install required packages
run: |
poetry install

- name: Run pytest
run: |
poetry run python -m pytest
64 changes: 63 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 6 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,6 @@ dbt-duckdb = { version = ">=1.0.0", optional = true }
dbt-sqlite = { version = ">=1.0.0", optional = true }
dbt-postgres = { version = ">=1.0.0", optional = true }

[tool.poetry.dev-dependencies]
z3z1ma marked this conversation as resolved.
Show resolved Hide resolved
black = ">=21.9b0"
mypy = ">=0.910"
pylint = ">=2.11.1"

[tool.poetry.extras]
duckdb = ["dbt-duckdb"]
sqlite = ["dbt-sqlite"]
Expand All @@ -56,6 +51,12 @@ workbench = [
"streamlit-elements-fluence",
]

[tool.poetry.group.dev.dependencies]
black = ">=21.9b0"
mypy = ">=0.910"
pylint = ">=2.11.1"
pytest = "^7.4.2"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
Expand Down
94 changes: 94 additions & 0 deletions src/dbt_osmosis/core/column_level_knowledge_propagator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from typing import (
Any,
Dict,
List,
Optional,
)

from dbt_osmosis.vendored.dbt_core_interface.project import (
ManifestNode,
)


ColumnLevelKnowledge = Dict[str, Any]
Knowledge = Dict[str, ColumnLevelKnowledge]


def _build_node_ancestor_tree(
manifest: ManifestNode,
z3z1ma marked this conversation as resolved.
Show resolved Hide resolved
node: ManifestNode,
family_tree: Optional[Dict[str, List[str]]] = None,
members_found: Optional[List[str]] = None,
depth: int = 0,
) -> Dict[str, List[str]]:
"""Recursively build dictionary of parents in generational order"""
if family_tree is None:
family_tree = {}
if members_found is None:
members_found = []
if not hasattr(node, "depends_on"):
return family_tree
for parent in getattr(node.depends_on, "nodes", []):
member = manifest.nodes.get(parent, manifest.sources.get(parent))
if member and parent not in members_found:
family_tree.setdefault(f"generation_{depth}", []).append(parent)
members_found.append(parent)
# Recursion
family_tree = _build_node_ancestor_tree(
manifest, member, family_tree, members_found, depth + 1
)
return family_tree


def _inherit_column_level_knowledge(
manifest: ManifestNode,
family_tree: Dict[str, Any],
placeholders: List[str],
) -> Knowledge:
"""Inherit knowledge from ancestors in reverse insertion order to ensure that the most
recent ancestor is always the one to inherit from
"""
knowledge: Knowledge = {}
for generation in reversed(family_tree):
for ancestor in family_tree[generation]:
member: ManifestNode = manifest.nodes.get(ancestor, manifest.sources.get(ancestor))
if not member:
continue
for name, info in member.columns.items():
knowledge_default = {"progenitor": ancestor, "generation": generation}
knowledge.setdefault(name, knowledge_default)
deserialized_info = info.to_dict()
# Handle Info:
# 1. tags are additive
# 2. descriptions are overriden
# 3. meta is merged
# 4. tests are ignored until I am convinced those shouldn't be
# hand curated with love
if deserialized_info["description"] in placeholders:
deserialized_info.pop("description", None)
deserialized_info["tags"] = list(
set(deserialized_info.pop("tags", []) + knowledge[name].get("tags", []))
)
if not deserialized_info["tags"]:
deserialized_info.pop("tags") # poppin' tags like Macklemore
deserialized_info["meta"] = {
**knowledge[name].get("meta", {}),
**deserialized_info["meta"],
}
if not deserialized_info["meta"]:
deserialized_info.pop("meta")
knowledge[name].update(deserialized_info)
return knowledge


class ColumnLevelKnowledgePropagator:
@staticmethod
def get_node_columns_with_inherited_knowledge(
manifest: ManifestNode,
node: ManifestNode,
placeholders: List[str],
) -> Knowledge:
"""Build a knowledgebase for the model based on iterating through ancestors"""
family_tree = _build_node_ancestor_tree(manifest, node)
knowledge = _inherit_column_level_knowledge(manifest, family_tree, placeholders)
return knowledge
90 changes: 11 additions & 79 deletions src/dbt_osmosis/core/osmosis.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@
MissingOsmosisConfig,
)
from dbt_osmosis.core.log_controller import logger
from dbt_osmosis.core.column_level_knowledge_propagator import (
ColumnLevelKnowledgePropagator,
ColumnLevelKnowledge,
Knowledge,
)
from dbt_osmosis.vendored.dbt_core_interface.project import (
ColumnInfo,
DbtProject,
Expand Down Expand Up @@ -762,81 +767,6 @@ def pretty_print_restructure_plan(blueprint: Dict[Path, SchemaFileMigration]) ->
)
)

def build_node_ancestor_tree(
self,
node: ManifestNode,
family_tree: Optional[Dict[str, List[str]]] = None,
members_found: Optional[List[str]] = None,
depth: int = 0,
) -> Dict[str, List[str]]:
"""Recursively build dictionary of parents in generational order"""
if family_tree is None:
family_tree = {}
if members_found is None:
members_found = []
if not hasattr(node, "depends_on"):
return family_tree
for parent in getattr(node.depends_on, "nodes", []):
member = self.manifest.nodes.get(parent, self.manifest.sources.get(parent))
if member and parent not in members_found:
family_tree.setdefault(f"generation_{depth}", []).append(parent)
members_found.append(parent)
# Recursion
family_tree = self.build_node_ancestor_tree(
member, family_tree, members_found, depth + 1
)
return family_tree

def inherit_column_level_knowledge(
self,
family_tree: Dict[str, Any],
) -> Dict[str, Dict[str, Any]]:
"""Inherit knowledge from ancestors in reverse insertion order to ensure that the most
recent ancestor is always the one to inherit from
"""
knowledge: Dict[str, Dict[str, Any]] = {}
for generation in reversed(family_tree):
for ancestor in family_tree[generation]:
member: ManifestNode = self.manifest.nodes.get(
ancestor, self.manifest.sources.get(ancestor)
)
if not member:
continue
for name, info in member.columns.items():
knowledge_default = {"progenitor": ancestor, "generation": generation}
knowledge.setdefault(name, knowledge_default)
deserialized_info = info.to_dict()
# Handle Info:
# 1. tags are additive
# 2. descriptions are overriden
# 3. meta is merged
# 4. tests are ignored until I am convinced those shouldn't be
# hand curated with love
if deserialized_info["description"] in self.placeholders:
deserialized_info.pop("description", None)
deserialized_info["tags"] = list(
set(deserialized_info.pop("tags", []) + knowledge[name].get("tags", []))
)
if not deserialized_info["tags"]:
deserialized_info.pop("tags") # poppin' tags like Macklemore
deserialized_info["meta"] = {
**knowledge[name].get("meta", {}),
**deserialized_info["meta"],
}
if not deserialized_info["meta"]:
deserialized_info.pop("meta")
knowledge[name].update(deserialized_info)
return knowledge

def get_node_columns_with_inherited_knowledge(
self,
node: ManifestNode,
) -> Dict[str, Dict[str, Any]]:
"""Build a knowledgebase for the model based on iterating through ancestors"""
family_tree = self.build_node_ancestor_tree(node)
knowledge = self.inherit_column_level_knowledge(family_tree)
return knowledge

@staticmethod
def get_column_sets(
database_columns: Iterable[str],
Expand Down Expand Up @@ -1032,9 +962,9 @@ def remove_columns_not_in_database(

@staticmethod
def get_prior_knowledge(
knowledge: Dict[str, Dict[str, Any]],
knowledge: Knowledge,
column: str,
) -> Dict[str, Any]:
) -> ColumnLevelKnowledge:
camel_column = re.sub("_(.)", lambda m: m.group(1).upper(), column)
prior_knowledge_candidates = list(filter(lambda k: k, [
knowledge.get(column),
Expand Down Expand Up @@ -1064,7 +994,9 @@ def update_undocumented_columns_with_prior_knowledge(
) -> int:
"""Update undocumented columns with prior knowledge in node and model simultaneously
THIS MUTATES THE NODE AND MODEL OBJECTS so that state is always accurate"""
knowledge = self.get_node_columns_with_inherited_knowledge(node)
knowledge: Knowledge = ColumnLevelKnowledgePropagator.get_node_columns_with_inherited_knowledge(
self.manifest, node, self.placeholders
)

inheritables = ["description"]
if not self.skip_add_tags:
Expand All @@ -1074,7 +1006,7 @@ def update_undocumented_columns_with_prior_knowledge(

changes_committed = 0
for column in undocumented_columns:
prior_knowledge = self.get_prior_knowledge(knowledge, column)
prior_knowledge: ColumnLevelKnowledge = self.get_prior_knowledge(knowledge, column)
progenitor = prior_knowledge.pop("progenitor", "Unknown")
prior_knowledge = {k: v for k, v in prior_knowledge.items() if k in inheritables}
if not prior_knowledge:
Expand Down
Empty file added tests/__init__.py
Empty file.
Loading