From deedeeb9cecad7069152740079eda8315d7fa420 Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Thu, 7 Mar 2024 06:30:01 -0800 Subject: [PATCH] Stop trying to parse deleted schema files (#9722) * Add test around deleting a YAML file containing semantic models and metrics It was raised in https://github.com/dbt-labs/dbt-core/issues/8860 that an error is being raised during partial parsing when files containing metrics/semantic models are deleted. In further testing it looks like this error specifically happens when a file containing both semantic models and metrics is deleted. If the deleted file contains just semantic models or metrics there seems to be no issue. The next commit should contain the fix. * Skip deleted schema files when scheduling files during partial parsing Waaaay back (in 7563b99) deleted schema files started being separated out from deleted non-schema files. However ever since, when it came to scheduling files for reparsing, we've only done so for deleted non-schema files. We even missed this when we refactored the scheduling code in b37e5b5. This change updates `_schedule_for_parsing` which is used by `schedule_nodes_for_parsing` to begin skipping deleted schema files in addition to deleted non schema files. * Update `add_to_pp_files` to ignore `deleted_schema_files` As noted in the previous commit, we started separating out deleted schema files from deleted non-schema files a looong time ago. However, this whole time we've been adding `deleted_schema_files` to the list of files to be parsed. This change corrects for that. * Add changie doc for partial parsing KeyError fix --- .../unreleased/Fixes-20240301-135536.yaml | 6 +++ core/dbt/parser/partial.py | 7 ++- tests/functional/partial_parsing/fixtures.py | 54 +++++++++++++++++++ .../partial_parsing/test_pp_metrics.py | 31 ++++++++++- 4 files changed, 96 insertions(+), 2 deletions(-) create mode 100644 .changes/unreleased/Fixes-20240301-135536.yaml diff --git a/.changes/unreleased/Fixes-20240301-135536.yaml b/.changes/unreleased/Fixes-20240301-135536.yaml new file mode 100644 index 00000000000..2a96bd7eeec --- /dev/null +++ b/.changes/unreleased/Fixes-20240301-135536.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Fix partial parsing `KeyError` on deleted schema files +time: 2024-03-01T13:55:36.533176-08:00 +custom: + Author: QMalcolm + Issue: "8860" diff --git a/core/dbt/parser/partial.py b/core/dbt/parser/partial.py index 32b4760f5a8..f9c558be6ba 100644 --- a/core/dbt/parser/partial.py +++ b/core/dbt/parser/partial.py @@ -211,6 +211,7 @@ def add_to_pp_files(self, source_file): if ( file_id not in self.project_parser_files[project_name][parser_name] and file_id not in self.file_diff["deleted"] + and file_id not in self.file_diff["deleted_schema_files"] ): self.project_parser_files[project_name][parser_name].append(file_id) @@ -467,7 +468,11 @@ def schedule_nodes_for_parsing(self, unique_ids): def _schedule_for_parsing(self, dict_key: str, element, name, delete: Callable) -> None: file_id = element.file_id - if file_id in self.saved_files and file_id not in self.file_diff["deleted"]: + if ( + file_id in self.saved_files + and file_id not in self.file_diff["deleted"] + and file_id not in self.file_diff["deleted_schema_files"] + ): schema_file = self.saved_files[file_id] elements = [] assert isinstance(schema_file, SchemaSourceFile) diff --git a/tests/functional/partial_parsing/fixtures.py b/tests/functional/partial_parsing/fixtures.py index f76d90ad216..c7a53982ec3 100644 --- a/tests/functional/partial_parsing/fixtures.py +++ b/tests/functional/partial_parsing/fixtures.py @@ -452,6 +452,60 @@ agg_time_dimension: created_at """ +people_sl_yml = """ +version: 2 + +semantic_models: + - name: semantic_people + model: ref('people') + dimensions: + - name: favorite_color + type: categorical + - name: created_at + type: TIME + type_params: + time_granularity: day + measures: + - name: years_tenure + agg: SUM + expr: tenure + - name: people + agg: count + expr: id + entities: + - name: id + type: primary + defaults: + agg_time_dimension: created_at + +metrics: + + - name: number_of_people + description: Total count of people + label: "Number of people" + type: simple + type_params: + measure: people + meta: + my_meta: 'testing' + + - name: collective_tenure + description: Total number of years of team experience + label: "Collective tenure" + type: simple + type_params: + measure: + name: years_tenure + filter: "{{ Dimension('id__loves_dbt') }} is true" + + - name: average_tenure + label: Average Tenure + type: ratio + type_params: + numerator: collective_tenure + denominator: number_of_people +""" + env_var_metrics_yml = """ metrics: diff --git a/tests/functional/partial_parsing/test_pp_metrics.py b/tests/functional/partial_parsing/test_pp_metrics.py index da994e09808..19da625604b 100644 --- a/tests/functional/partial_parsing/test_pp_metrics.py +++ b/tests/functional/partial_parsing/test_pp_metrics.py @@ -1,6 +1,8 @@ import pytest -from dbt.tests.util import run_dbt, write_file, get_manifest +from dbt.cli.main import dbtRunner +from dbt.contracts.graph.manifest import Manifest +from dbt.tests.util import run_dbt, rm_file, write_file, get_manifest from tests.functional.partial_parsing.fixtures import ( people_sql, metricflow_time_spine_sql, @@ -9,6 +11,7 @@ people_metrics2_yml, metric_model_a_sql, people_metrics3_yml, + people_sl_yml, ) from dbt.exceptions import CompilationError @@ -84,3 +87,29 @@ def test_metrics(self, project): # We use "parse" here and not "run" because we're checking that the CompilationError # occurs at parse time, not compilation results = run_dbt(["parse"]) + + +class TestDeleteFileWithMetricsAndSemanticModels: + @pytest.fixture(scope="class") + def models(self): + return { + "people.sql": people_sql, + "metricflow_time_spine.sql": metricflow_time_spine_sql, + "people_sl.yml": people_sl_yml, + } + + def test_metrics(self, project): + # Initial parsing + runner = dbtRunner() + result = runner.invoke(["parse"]) + assert result.success + manifest = result.result + assert isinstance(manifest, Manifest) + assert len(manifest.metrics) == 3 + + # Remove metric file + rm_file(project.project_root, "models", "people_sl.yml") + + # Rerun parse, shouldn't fail + result = runner.invoke(["parse"]) + assert result.exception is None, result.exception