Skip to content

Commit

Permalink
feat: first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
suryaiyer95 committed Jan 25, 2024
1 parent 1e8c7f6 commit 926e66e
Show file tree
Hide file tree
Showing 103 changed files with 48,030 additions and 41 deletions.
12 changes: 7 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
import re
from pathlib import Path

from setuptools import find_packages
from setuptools import setup
from setuptools import find_packages, setup


def read(*names, **kwargs):
Expand Down Expand Up @@ -62,8 +61,11 @@ def read(*names, **kwargs):
],
python_requires=">=3.8",
install_requires=[
"click",
# eg: "aspectlib==1.1.1", "six>=1.7",
"click==8.1.7",
"sqlglot==18.3.0",
"dbt-artifacts-parser==0.5.1",
"configtree==0.6",
"tabulate==0.9.0",
],
extras_require={
# eg:
Expand All @@ -72,7 +74,7 @@ def read(*names, **kwargs):
},
entry_points={
"console_scripts": [
"datapilot = datapilot.cli:main",
"datapilot = datapilot.cli.main:datapilot",
]
},
)
4 changes: 2 additions & 2 deletions src/datapilot/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
- https://docs.python.org/2/using/cmdline.html#cmdoption-m
- https://docs.python.org/3/using/cmdline.html#cmdoption-m
"""
from datapilot.cli import main
from datapilot.cli.main import datapilot

if __name__ == "__main__":
main()
datapilot()
23 changes: 0 additions & 23 deletions src/datapilot/cli.py

This file was deleted.

Empty file added src/datapilot/cli/__init__.py
Empty file.
12 changes: 12 additions & 0 deletions src/datapilot/cli/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import click

from datapilot.core.platforms.dbt.cli import dbt


@click.group()
def datapilot():
"""Altimate CLI for DBT project management."""
pass


datapilot.add_command(dbt)
Empty file.
14 changes: 14 additions & 0 deletions src/datapilot/config/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from typing import Text

from configtree import Loader
from configtree.tree import Tree


def load_config(config_file_path: Text) -> Tree:
load = Loader()
return load(config_file_path)


if __name__ == "__main__":
config = load_config("/Users/surya/repos/altimate_dbt_package/tests/data/config.yml")
print(config)
17 changes: 17 additions & 0 deletions src/datapilot/config/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from typing import Dict, Optional, Text

from datapilot.core.platforms.dbt.constants import FOLDER, MODEL
from datapilot.schemas.constants import (CONFIG_FOLDER_TYPE_PATTERNS,
CONFIG_MODEL_TYPE_PATTERNS)


def get_regex_configuration(
config: Optional[Dict],
) -> Dict[Text, Optional[Dict[Text, Text]]]:
model_type_patterns = config.get(CONFIG_MODEL_TYPE_PATTERNS, None)
folder_type_patterns = config.get(CONFIG_MODEL_TYPE_PATTERNS, None)
return {
MODEL: model_type_patterns,
FOLDER: folder_type_patterns,
}
# Return the configured fanout threshold or the default if not specified
Empty file added src/datapilot/core/__init__.py
Empty file.
2 changes: 2 additions & 0 deletions src/datapilot/core/insights/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
DBT = []
SQL = []
Empty file.
24 changes: 24 additions & 0 deletions src/datapilot/core/insights/base/insight.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import logging
from abc import ABC, abstractmethod
from typing import Dict, Optional, Text, Tuple

from datapilot.core.insights.schema import InsightResponse


class Insight(ABC):
def __init__(self, config: Optional[Dict] = None, *args, **kwargs):
self.config = config or {}
self.args = args
self.kwargs = kwargs
self.logger = logging.getLogger(self.__class__.__name__)

@abstractmethod
def generate(self, *args, **kwargs) -> InsightResponse:
pass

@classmethod
def has_all_required_data(cls, **kwargs) -> Tuple[bool, Text]:
"""
return False
"""
return False, "Not implemented"
16 changes: 16 additions & 0 deletions src/datapilot/core/insights/report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
def generate_ci_cd_report(insights_data):
"""
Generates a CI/CD friendly report from DBT model insights.
:param insights_data: List of DBTInsightResult objects.
"""
divider = "-" * 80
for insight in insights_data:
print(divider)
print(f"Project: {insight.package_name}")
print(f"Model ID: {insight.model_unique_id}")
print(f"Name: {insight.metadata['model']}")
print(f"Message: {insight.message}")
print(f"Reason: {insight.reason_to_flag}")
print(f"Recommendation: {insight.recommendation}")
print(divider)
24 changes: 24 additions & 0 deletions src/datapilot/core/insights/schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from enum import Enum
from typing import Dict, Text

from pydantic import BaseModel


class Severity(Enum):
INFO = "INFO"
WARNING = "WARNING"
ERROR = "ERROR"


class InsightResult(BaseModel):
name: Text
type: Text
message: Text
recommendation: Text
reason_to_flag: Text
metadata: Dict


class InsightResponse(BaseModel):
insight: InsightResult
severity: Severity = Severity.ERROR
Empty file.
Empty file.
18 changes: 18 additions & 0 deletions src/datapilot/core/insights/sql/base/insight.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from abc import abstractmethod
from typing import Optional, Text

from datapilot.core.insights.base.insight import Insight
from datapilot.schemas.sql import Dialect


class SqlInsight(Insight):
NAME = "SqlInsight"

def __init__(self, sql: Text, dialect: Optional[Dialect], *args, **kwargs):
self.sql = sql
self.dialect = dialect
super().__init__(*args, **kwargs)

@abstractmethod
def generate(self, *args, **kwargs) -> dict:
pass
Empty file.
Empty file.
20 changes: 20 additions & 0 deletions src/datapilot/core/insights/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from typing import Optional

from configtree.tree import Tree

from datapilot.core.insights.schema import Severity
from datapilot.schemas.constants import CONFIG_METRICS, CONFIG_SEVERITY


def get_severity(
config: Optional[Tree],
alias: str,
default_severity: Severity,
):
if config is None:
return default_severity

insights = config.get(CONFIG_METRICS, {})
metric = insights.get(alias, {})
severity = metric.get(CONFIG_SEVERITY, default_severity)
return severity
Empty file.
Empty file.
1 change: 1 addition & 0 deletions src/datapilot/core/platforms/dbt/cli/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .cli import dbt
66 changes: 66 additions & 0 deletions src/datapilot/core/platforms/dbt/cli/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import logging

import click

from datapilot.config.config import load_config
from datapilot.core.platforms.dbt.constants import MODEL, PROJECT
from datapilot.core.platforms.dbt.executor import DBTInsightGenerator
from datapilot.core.platforms.dbt.formatting import (
generate_model_insights_table, generate_project_insights_table)
from datapilot.utils.formatting.utils import tabulate_data

logging.basicConfig(level=logging.INFO)


# New dbt group
@click.group()
def dbt():
"""DBT specific commands."""
pass


@dbt.command("project-health")
@click.option(
"--manifest-path",
required=True,
help="Path to the DBT manifest file",
)
@click.option(
"--catalog-path",
required=False,
help="Path to the DBT catalog file",
)
@click.option(
"--config-path",
required=False,
help="Path to the DBT config file",
)
def project_health(manifest_path, catalog_path, config_path=None):
"""
Validate the DBT project's configuration and structure.
:param manifest_path: Path to the DBT manifest file.
"""
config = None
if config_path:
config = load_config(config_path)
insight_generator = DBTInsightGenerator(manifest_path, catalog_path=catalog_path, config=config)
reports = insight_generator.run()

package_insights = reports[PROJECT]
model_insights = reports[MODEL]
model_report = generate_model_insights_table(model_insights)
if len(model_report) > 0:
click.echo("--" * 50)
click.echo("Model Insights")
click.echo("--" * 50)
for model_id, report in model_report.items():
click.echo(f"Model: {model_id}")
click.echo(f"File path: {report['path']}")
click.echo(tabulate_data(report["table"], headers="keys"))
click.echo("\n")

project_report = generate_project_insights_table(package_insights)
click.echo("--" * 50)
click.echo("Project Insights")
click.echo("--" * 50)
click.echo(tabulate_data(project_report, headers="keys"))
34 changes: 34 additions & 0 deletions src/datapilot/core/platforms/dbt/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
SEED = "seed"
MACRO = "macro"
TEST = "test"
MODEL = "model"
SOURCE = "source"


PROJECT = "project"
SQL = "sql"

# Model Types
MART: str = "mart"
STAGING = "staging"
INTERMEDIATE = "intermediate"
BASE = "base"
OTHER = "other"

# MATERIALIZATION
TABLE = "table"
INCREMENTAL = "incremental"
VIEW = "view"
EPHEMERAL = "ephemeral"


MATERIALIZED = [TABLE, INCREMENTAL]
NON_MATERIALIZED = [VIEW, EPHEMERAL]


GENERIC = "generic"
SINGULAR = "singular"
OTHER_TEST_NODE = "other_test_node"


FOLDER = "folder"
2 changes: 2 additions & 0 deletions src/datapilot/core/platforms/dbt/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
class AltimateInvalidManifestError(Exception):
pass
Loading

0 comments on commit 926e66e

Please sign in to comment.