diff --git a/.changes/unreleased/Fixes-20240925-160543.yaml b/.changes/unreleased/Fixes-20240925-160543.yaml new file mode 100644 index 00000000000..68af365c820 --- /dev/null +++ b/.changes/unreleased/Fixes-20240925-160543.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Improve performance of infer primary key +time: 2024-09-25T16:05:43.59536-04:00 +custom: + Author: gshank + Issue: "10781" diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py index e265408602b..5536bf39270 100644 --- a/core/dbt/parser/manifest.py +++ b/core/dbt/parser/manifest.py @@ -1150,10 +1150,17 @@ def process_saved_queries(self, config: RuntimeConfig): def process_model_inferred_primary_keys(self): """Processes Model nodes to populate their `primary_key`.""" + model_to_generic_test_map: Dict[str, List[GenericTestNode]] = {} for node in self.manifest.nodes.values(): if not isinstance(node, ModelNode): continue - generic_tests = self._get_generic_tests_for_model(node) + if node.created_at < self.started_at: + continue + if not model_to_generic_test_map: + model_to_generic_test_map = self.build_model_to_generic_tests_map() + generic_tests: List[GenericTestNode] = [] + if node.unique_id in model_to_generic_test_map: + generic_tests = model_to_generic_test_map[node.unique_id] primary_key = node.infer_primary_key(generic_tests) node.primary_key = sorted(primary_key) @@ -1425,23 +1432,21 @@ def write_perf_info(self, target_path: str): write_file(path, json.dumps(self._perf_info, cls=dbt.utils.JSONEncoder, indent=4)) fire_event(ParsePerfInfoPath(path=path)) - def _get_generic_tests_for_model( - self, - model: ModelNode, - ) -> List[GenericTestNode]: + def build_model_to_generic_tests_map(self) -> Dict[str, List[GenericTestNode]]: """Return a list of generic tests that are attached to the given model, including disabled tests""" - tests = [] + model_to_generic_tests_map: Dict[str, List[GenericTestNode]] = {} for _, node in self.manifest.nodes.items(): - if isinstance(node, GenericTestNode) and node.attached_node == model.unique_id: - tests.append(node) + if isinstance(node, GenericTestNode) and node.attached_node: + if node.attached_node not in model_to_generic_tests_map: + model_to_generic_tests_map[node.attached_node] = [] + model_to_generic_tests_map[node.attached_node].append(node) for _, nodes in self.manifest.disabled.items(): for disabled_node in nodes: - if ( - isinstance(disabled_node, GenericTestNode) - and disabled_node.attached_node == model.unique_id - ): - tests.append(disabled_node) - return tests + if isinstance(disabled_node, GenericTestNode) and disabled_node.attached_node: + if disabled_node.attached_node not in model_to_generic_tests_map: + model_to_generic_tests_map[disabled_node.attached_node] = [] + model_to_generic_tests_map[disabled_node.attached_node].append(disabled_node) + return model_to_generic_tests_map def invalid_target_fail_unless_test(