Skip to content

Commit

Permalink
Stop trying to parse deleted schema files (#9722)
Browse files Browse the repository at this point in the history
* Add test around deleting a YAML file containing semantic models and metrics

It was raised in #8860 that an
error is being raised during partial parsing when files containing
metrics/semantic models are deleted. In further testing it looks like this
error specifically happens when a file containing both semantic models and
metrics is deleted. If the deleted file contains just semantic models or
metrics there seems to be no issue. The next commit should contain the fix.

* Skip deleted schema files when scheduling files during partial parsing

Waaaay back (in 7563b99) deleted schema files started being separated out
from deleted non-schema files. However ever since, when it came to scheduling
files for reparsing, we've only done so for deleted non-schema files. We even
missed this when we refactored the scheduling code in b37e5b5. This change
updates `_schedule_for_parsing` which is used by `schedule_nodes_for_parsing`
to begin skipping deleted schema files in addition to deleted non schema files.

* Update `add_to_pp_files` to ignore `deleted_schema_files`

As noted in the previous commit, we started separating out deleted
schema files from deleted non-schema files a looong time ago. However,
this whole time we've been adding `deleted_schema_files` to the list
of files to be parsed. This change corrects for that.

* Add changie doc for partial parsing KeyError fix
  • Loading branch information
QMalcolm authored Mar 7, 2024
1 parent 6fd0a94 commit deedeeb
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 2 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20240301-135536.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Fix partial parsing `KeyError` on deleted schema files
time: 2024-03-01T13:55:36.533176-08:00
custom:
Author: QMalcolm
Issue: "8860"
7 changes: 6 additions & 1 deletion core/dbt/parser/partial.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ def add_to_pp_files(self, source_file):
if (
file_id not in self.project_parser_files[project_name][parser_name]
and file_id not in self.file_diff["deleted"]
and file_id not in self.file_diff["deleted_schema_files"]
):
self.project_parser_files[project_name][parser_name].append(file_id)

Expand Down Expand Up @@ -467,7 +468,11 @@ def schedule_nodes_for_parsing(self, unique_ids):

def _schedule_for_parsing(self, dict_key: str, element, name, delete: Callable) -> None:
file_id = element.file_id
if file_id in self.saved_files and file_id not in self.file_diff["deleted"]:
if (
file_id in self.saved_files
and file_id not in self.file_diff["deleted"]
and file_id not in self.file_diff["deleted_schema_files"]
):
schema_file = self.saved_files[file_id]
elements = []
assert isinstance(schema_file, SchemaSourceFile)
Expand Down
54 changes: 54 additions & 0 deletions tests/functional/partial_parsing/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,60 @@
agg_time_dimension: created_at
"""

people_sl_yml = """
version: 2
semantic_models:
- name: semantic_people
model: ref('people')
dimensions:
- name: favorite_color
type: categorical
- name: created_at
type: TIME
type_params:
time_granularity: day
measures:
- name: years_tenure
agg: SUM
expr: tenure
- name: people
agg: count
expr: id
entities:
- name: id
type: primary
defaults:
agg_time_dimension: created_at
metrics:
- name: number_of_people
description: Total count of people
label: "Number of people"
type: simple
type_params:
measure: people
meta:
my_meta: 'testing'
- name: collective_tenure
description: Total number of years of team experience
label: "Collective tenure"
type: simple
type_params:
measure:
name: years_tenure
filter: "{{ Dimension('id__loves_dbt') }} is true"
- name: average_tenure
label: Average Tenure
type: ratio
type_params:
numerator: collective_tenure
denominator: number_of_people
"""

env_var_metrics_yml = """
metrics:
Expand Down
31 changes: 30 additions & 1 deletion tests/functional/partial_parsing/test_pp_metrics.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import pytest

from dbt.tests.util import run_dbt, write_file, get_manifest
from dbt.cli.main import dbtRunner
from dbt.contracts.graph.manifest import Manifest
from dbt.tests.util import run_dbt, rm_file, write_file, get_manifest
from tests.functional.partial_parsing.fixtures import (
people_sql,
metricflow_time_spine_sql,
Expand All @@ -9,6 +11,7 @@
people_metrics2_yml,
metric_model_a_sql,
people_metrics3_yml,
people_sl_yml,
)

from dbt.exceptions import CompilationError
Expand Down Expand Up @@ -84,3 +87,29 @@ def test_metrics(self, project):
# We use "parse" here and not "run" because we're checking that the CompilationError
# occurs at parse time, not compilation
results = run_dbt(["parse"])


class TestDeleteFileWithMetricsAndSemanticModels:
@pytest.fixture(scope="class")
def models(self):
return {
"people.sql": people_sql,
"metricflow_time_spine.sql": metricflow_time_spine_sql,
"people_sl.yml": people_sl_yml,
}

def test_metrics(self, project):
# Initial parsing
runner = dbtRunner()
result = runner.invoke(["parse"])
assert result.success
manifest = result.result
assert isinstance(manifest, Manifest)
assert len(manifest.metrics) == 3

# Remove metric file
rm_file(project.project_root, "models", "people_sl.yml")

# Rerun parse, shouldn't fail
result = runner.invoke(["parse"])
assert result.exception is None, result.exception

0 comments on commit deedeeb

Please sign in to comment.