Skip to content

Commit

Permalink
Filter out empty nodes after graph selection (#10580)
Browse files Browse the repository at this point in the history
* Add unit test

* Filter out empty nodes after graph selection

* Add changie

* Add --indirect-selection empty check to unit test
  • Loading branch information
jtcohen6 authored Sep 3, 2024
1 parent 9b7f4ff commit 37d382c
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 7 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20240816-140807.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Filter out empty nodes after graph selection to support consistent selection of nodes that depend on upstream public models
time: 2024-08-16T14:08:07.426235-07:00
custom:
Author: jtcohen6
Issue: "8987"
26 changes: 19 additions & 7 deletions core/dbt/graph/selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,15 @@ def get_nodes_from_criteria(
)
return set(), set()

neighbors = self.collect_specified_neighbors(spec, collected)
selected = collected | neighbors

# if --indirect-selection EMPTY, do not expand to adjacent tests
if spec.indirect_selection == IndirectSelection.Empty:
return collected, set()
return selected, set()
else:
neighbors = self.collect_specified_neighbors(spec, collected)
direct_nodes, indirect_nodes = self.expand_selection(
selected=(collected | neighbors), indirect_selection=spec.indirect_selection
selected=selected, indirect_selection=spec.indirect_selection
)
return direct_nodes, indirect_nodes

Expand Down Expand Up @@ -177,10 +180,14 @@ def _is_graph_member(self, unique_id: UniqueId) -> bool:

node = self.manifest.nodes[unique_id]

if self.include_empty_nodes:
return node.config.enabled
return node.config.enabled

def _is_empty_node(self, unique_id: UniqueId) -> bool:
if unique_id in self.manifest.nodes:
node = self.manifest.nodes[unique_id]
return node.empty
else:
return not node.empty and node.config.enabled
return False

def node_is_match(self, node: GraphMemberNode) -> bool:
"""Determine if a node is a match for the selector. Non-match nodes
Expand Down Expand Up @@ -212,7 +219,12 @@ def filter_selection(self, selected: Set[UniqueId]) -> Set[UniqueId]:
"""Return the subset of selected nodes that is a match for this
selector.
"""
return {unique_id for unique_id in selected if self._is_match(unique_id)}
return {
unique_id
for unique_id in selected
if self._is_match(unique_id)
and (self.include_empty_nodes or not self._is_empty_node(unique_id))
}

def expand_selection(
self,
Expand Down
26 changes: 26 additions & 0 deletions tests/unit/graph/test_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,3 +297,29 @@ def test_dependency_list(self, runtime_config: RuntimeConfig):
queue.get(block=False)
queue.mark_done(got.unique_id)
assert queue.empty()

def test_select_downstream_of_empty_model(self, runtime_config: RuntimeConfig):
# empty model
model_one = make_model(pkg="other", name="model_one", code="")
# non-empty model
model_two = make_model(
pkg="pkg",
name="model_two",
code="""select * from {{ref('model_one')}}""",
refs=[model_one],
)
models = [model_one, model_two]
manifest = make_manifest(nodes=models)

# Get the graph
compiler = dbt.compilation.Compiler(runtime_config)
graph = compiler.compile(manifest)

# Ensure that model_two is selected as downstream of model_one
selector = NodeSelector(graph, manifest)
spec = graph_selector.SelectionCriteria.from_single_spec("model_one+")
assert selector.get_selected(spec) == {"model.pkg.model_two"}

# Ensure that --indirect-selection empty returns the same result
spec.indirect_selection = graph_selector.IndirectSelection.Empty
assert selector.get_selected(spec) == {"model.pkg.model_two"}

0 comments on commit 37d382c

Please sign in to comment.