Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make the arg dbt_project_path in the ProjectConfig optional #581

Merged
merged 8 commits into from
Oct 13, 2023
67 changes: 46 additions & 21 deletions cosmos/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
import contextlib
import tempfile
from dataclasses import dataclass, field
from functools import cached_property
from pathlib import Path
from typing import Iterator


from cosmos.constants import TestBehavior, ExecutionMode, LoadMode
from cosmos.dbt.executable import get_system_dbt
from cosmos.exceptions import CosmosValueError
Expand Down Expand Up @@ -46,39 +48,67 @@ class ProjectConfig:
"""
Class for setting project config.

:param dbt_project_path: The path to the dbt project directory. Example: /path/to/dbt/project
:param dbt_project_path: The path to the dbt project directory. Example: /path/to/dbt/project. Defaults to None
:param models_relative_path: The relative path to the dbt models directory within the project. Defaults to models
:param seeds_relative_path: The relative path to the dbt seeds directory within the project. Defaults to seeds
:param snapshots_relative_path: The relative path to the dbt snapshots directory within the project. Defaults to
snapshots
:param manifest_path: The absolute path to the dbt manifest file. Defaults to None
:param project_name: Allows the user to define the project name.
Required if dbt_project_path is not defined. Defaults to the folder name of dbt_project_path.
"""

dbt_project_path: str | Path
dbt_project_path: str | Path | None = None
models_relative_path: str | Path = "models"
seeds_relative_path: str | Path = "seeds"
snapshots_relative_path: str | Path = "snapshots"
manifest_path: str | Path | None = None
project_name: str | None = None

@cached_property
def parsed_dbt_project_path(self) -> Path | None:
return Path(self.dbt_project_path) if self.dbt_project_path else None

parsed_manifest_path: Path | None = None
@cached_property
def parsed_manifest_path(self) -> Path | None:
return Path(self.manifest_path) if self.manifest_path else None

def __post_init__(self) -> None:
"Converts paths to `Path` objects."
self.dbt_project_path = Path(self.dbt_project_path)
self.models_relative_path = self.dbt_project_path / Path(self.models_relative_path)
self.seeds_relative_path = self.dbt_project_path / Path(self.seeds_relative_path)
self.snapshots_relative_path = self.dbt_project_path / Path(self.snapshots_relative_path)

if self.manifest_path:
self.parsed_manifest_path = Path(self.manifest_path)
if self.parsed_dbt_project_path:
self.models_relative_path = self.parsed_dbt_project_path / Path(self.models_relative_path)
self.seeds_relative_path = self.parsed_dbt_project_path / Path(self.seeds_relative_path)
self.snapshots_relative_path = self.parsed_dbt_project_path / Path(self.snapshots_relative_path)
if not self.project_name:
self.project_name = self.parsed_dbt_project_path.stem

def validate_project(self) -> None:
"Validates that the project, models, and seeds directories exist."
project_yml_path = Path(self.dbt_project_path) / "dbt_project.yml"
mandatory_paths = {
"dbt_project.yml": project_yml_path,
"models directory ": self.models_relative_path,
}
"""
Validates necessary context is present for a project.
There are 2 cases we need to account for
1 - the entire dbt project
2 - the dbt manifest
Here, we can assume if the project path is provided, we have scenario 1.
If the project path is not provided, we have a scenario 2
"""

mandatory_paths = {}

if self.parsed_dbt_project_path:
project_yml_path = self.parsed_dbt_project_path / "dbt_project.yml"
mandatory_paths = {
"dbt_project.yml": project_yml_path,
"models directory ": self.models_relative_path,
}
elif self.parsed_manifest_path:
if not self.project_name:
raise CosmosValueError(
"project_name required when manifest_path is present and dbt_project_path is not."
)
mandatory_paths = {"manifest file": self.parsed_manifest_path}
else:
raise CosmosValueError("dbt_project_path or manifest_path are required parameters.")

for name, path in mandatory_paths.items():
if path is None or not Path(path).exists():
raise CosmosValueError(f"Could not find {name} at {path}")
Expand All @@ -92,11 +122,6 @@ def is_manifest_available(self) -> bool:

return self.parsed_manifest_path.exists()

@property
def project_name(self) -> str:
"The name of the dbt project."
return Path(self.dbt_project_path).stem


@dataclass
class ProfileConfig:
Expand Down
5 changes: 4 additions & 1 deletion docs/configuration/project-config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Project Config
The ``cosmos.config.ProjectConfig`` allows you to specify information about where your dbt project is located. It
takes the following arguments:

- ``dbt_project_path`` (required): The full path to your dbt project. This directory should have a ``dbt_project.yml`` file
- ``dbt_project_path``: The full path to your dbt project. This directory should have a ``dbt_project.yml`` file
- ``models_relative_path``: The path to your models directory, relative to the ``dbt_project_path``. This defaults to
``models/``
- ``seeds_relative_path``: The path to your seeds directory, relative to the ``dbt_project_path``. This defaults to
Expand All @@ -13,6 +13,9 @@ takes the following arguments:
to ``snapshots/``
- ``manifest_path``: The absolute path to your manifests directory. This is only required if you're using Cosmos' manifest
parsing mode
- ``project_name`` : The name of the project. If ``dbt_project_path`` is provided, the ``project_name`` defaults to the
folder name containing ``dbt_project.yml``. If ``dbt_project_path`` is not provided, and ``manifest_path`` is provided,
``project_name`` is required as the name can not be inferred from ``dbt_project_path``


Project Config Example
Expand Down
63 changes: 58 additions & 5 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,29 +10,82 @@
PIPELINE_FOLDER = "jaffle_shop"


# Tests that a ProjectConfig object can be created with valid parameters
def test_valid_parameters():
project_config = ProjectConfig(dbt_project_path="path/to/dbt/project")
assert project_config.dbt_project_path == Path("path/to/dbt/project")
assert project_config.parsed_dbt_project_path == Path("path/to/dbt/project")
assert project_config.models_relative_path == Path("path/to/dbt/project/models")
assert project_config.seeds_relative_path == Path("path/to/dbt/project/seeds")
assert project_config.snapshots_relative_path == Path("path/to/dbt/project/snapshots")
assert project_config.manifest_path is None


def test_init_with_manifest():
def test_init_with_manifest_path_and_project_path_succeeds():
"""
Passing a manifest path AND project path together should succeed, as previous
"""
project_config = ProjectConfig(dbt_project_path="/tmp/some-path", manifest_path="target/manifest.json")
assert project_config.parsed_manifest_path == Path("target/manifest.json")


def test_validate_project_succeeds():
def test_init_with_manifest_path_and_not_project_path_succeeds():
"""
Since dbt_project_path is optional, we should be able to operate with only a manifest
"""
project_config = ProjectConfig(manifest_path="target/manifest.json")
assert project_config.parsed_manifest_path == Path("target/manifest.json")


def test_validate_with_project_path_and_manifest_path_succeeds():
"""
Supplying both project and manifest paths as previous should be permitted
"""
project_config = ProjectConfig(
dbt_project_path=DBT_PROJECTS_ROOT_DIR, manifest_path=DBT_PROJECTS_ROOT_DIR / "manifest.json"
)
assert project_config.validate_project() is None


def test_validate_project_fails():
def test_validate_with_project_path_and_not_manifest_path_succeeds():
"""
Passing a project with no manifest should be permitted
"""
project_config = ProjectConfig(dbt_project_path=DBT_PROJECTS_ROOT_DIR)
assert project_config.validate_project() is None


def test_validate_with_manifest_path_and_not_project_path_and_not_project_name_fails():
"""
Passing a manifest alone should fail since we also require a project_name
"""
project_config = ProjectConfig(manifest_path=DBT_PROJECTS_ROOT_DIR / "manifest.json")
with pytest.raises(CosmosValueError) as err_info:
assert project_config.validate_project() is None
print(err_info.value.args[0])
assert err_info.value.args[0] == "project_name required when manifest_path is present and dbt_project_path is not."


def test_validate_with_manifest_path_and_project_name_and_not_project_path_succeeds():
"""
Passing a manifest and project name together should succeed.
"""
project_config = ProjectConfig(manifest_path=DBT_PROJECTS_ROOT_DIR / "manifest.json", project_name="test-project")
assert project_config.validate_project() is None


def test_validate_no_paths_fails():
"""
Passing no manifest and no project directory should fail.
"""
project_config = ProjectConfig()
with pytest.raises(CosmosValueError) as err_info:
assert project_config.validate_project() is None
assert err_info.value.args[0] == "dbt_project_path or manifest_path are required parameters."


def test_validate_project_missing_fails():
"""
Passing a project dir that does not exist where specified should fail
"""
project_config = ProjectConfig(dbt_project_path=Path("/tmp"))
with pytest.raises(CosmosValueError) as err_info:
assert project_config.validate_project() is None
Expand Down
Loading