From 20c36cb5fa824bfe55e314c052e04360b032a3fb Mon Sep 17 00:00:00 2001 From: Pavel Mackarichev Date: Wed, 20 Sep 2023 17:17:20 +0300 Subject: [PATCH] feat: add columns to seeds --- odd_dbt/domain/model.py | 10 +++++++++- odd_dbt/mapper/lineage.py | 30 ++++++++++++++++++++++++------ odd_dbt/mapper/metadata.py | 6 +++++- odd_dbt/mapper/types.py | 30 ++++++++++++++++++++++++++++++ 4 files changed, 68 insertions(+), 8 deletions(-) create mode 100644 odd_dbt/mapper/types.py diff --git a/odd_dbt/domain/model.py b/odd_dbt/domain/model.py index 731d440..e4a0996 100644 --- a/odd_dbt/domain/model.py +++ b/odd_dbt/domain/model.py @@ -1,4 +1,4 @@ -from odd_models import DataEntity, DataTransformer, DataSet, DataInput +from odd_models import DataEntity, DataTransformer, DataSet, DataInput, DataSetField import abc @@ -37,6 +37,11 @@ def add_output(self, oddrn: str) -> None: self.data_transformer.outputs.append(oddrn) +class ColumnEntity(DataSetField): + def __init__(self, **data: dict): + super().__init__(**data) + + class SeedEntity(NodeEntity): def __init__(self, **data: dict): super().__init__(**data) @@ -52,3 +57,6 @@ def add_input(self, oddrn: str) -> None: def add_output(self, oddrn: str) -> None: if oddrn not in self.data_input.outputs: self.data_input.outputs.append(oddrn) + + def add_column(self, column: ColumnEntity) -> None: + self.dataset.field_list.append(column) diff --git a/odd_dbt/mapper/lineage.py b/odd_dbt/mapper/lineage.py index 4f3331f..0da8542 100644 --- a/odd_dbt/mapper/lineage.py +++ b/odd_dbt/mapper/lineage.py @@ -1,16 +1,16 @@ import traceback from typing import Optional, Union - -from dbt.contracts.graph.nodes import ModelNode, SeedNode +from odd_dbt.mapper.types import DBT_TO_ODD +from dbt.contracts.graph.nodes import ModelNode, SeedNode, ColumnInfo from odd_models.models import ( DataEntityList, DataEntityType, ) from oddrn_generator import DbtGenerator - +from odd_models import DataSetFieldType from odd_dbt import logger from odd_dbt.domain.context import DbtContext -from odd_dbt.domain.model import ModelEntity, SeedEntity, NodeEntity +from odd_dbt.domain.model import ModelEntity, SeedEntity, NodeEntity, ColumnEntity from odd_dbt.mapper.generator import create_generator from odd_dbt.mapper.metadata import get_model_metadata @@ -64,14 +64,32 @@ def map_node(self, node: Union[ModelNode, SeedNode]) -> Optional[NodeEntity]: return self.map_seed(node) def map_seed(self, node: SeedNode) -> SeedEntity: - return SeedEntity( + self._generator.set_oddrn_paths(seeds=node.unique_id) + + seed_entity = SeedEntity( name=node.unique_id, - oddrn=self._generator.get_oddrn_by_path("seeds", node.unique_id), + oddrn=self._generator.get_oddrn_by_path("seeds"), owner=None, type=DataEntityType.FILE, metadata=[get_model_metadata(node)], ) + for column in node.columns.values(): + seed_entity.add_column(self.map_column(column, generator=self._generator)) + + return seed_entity + + def map_column(self, column: ColumnInfo, generator: DbtGenerator) -> ColumnEntity: + oddrn = generator.get_oddrn_by_path("seeds") + f"/columns/{column.name}" + return ColumnEntity( + name=column.name, + oddrn=oddrn, + type=DataSetFieldType( + type=DBT_TO_ODD['UNKNOWN'], + is_nullable=True + ), + ) + def map_model(self, node: ModelNode) -> ModelEntity: self._generator.set_oddrn_paths(models=node.unique_id) model_entity = ModelEntity( diff --git a/odd_dbt/mapper/metadata.py b/odd_dbt/mapper/metadata.py index db5f4ae..17bd2a2 100644 --- a/odd_dbt/mapper/metadata.py +++ b/odd_dbt/mapper/metadata.py @@ -1,4 +1,4 @@ -from dbt.contracts.graph.nodes import ModelNode, TestNode +from dbt.contracts.graph.nodes import ModelNode, TestNode, ColumnInfo from odd_models import MetadataExtension @@ -29,3 +29,7 @@ def get_metadata(test_node: TestNode) -> MetadataExtension: def get_model_metadata(model_node: ModelNode) -> MetadataExtension: schema_url = "https://raw.githubusercontent.com/opendatadiscovery/opendatadiscovery-specification/main/specification/extensions/dbt.json#/definitions/DataTransformer" return MetadataExtension(schema_url=schema_url, metadata=model_node.to_dict()) + +def get_column_metadata(column_info: ColumnInfo) -> MetadataExtension: + schema_url = "https://raw.githubusercontent.com/opendatadiscovery/opendatadiscovery-specification/main/specification/extensions/dbt.json#/definitions/DataSetField" + return MetadataExtension(schema_url=schema_url, metadata=column_info.to_dict()) \ No newline at end of file diff --git a/odd_dbt/mapper/types.py b/odd_dbt/mapper/types.py new file mode 100644 index 0000000..a5aaae2 --- /dev/null +++ b/odd_dbt/mapper/types.py @@ -0,0 +1,30 @@ +from odd_models.models import Type + +DBT_TO_ODD: dict[str, Type] = { + "INT": Type.TYPE_INTEGER, + "INTEGER": Type.TYPE_INTEGER, + "SMALLINT": Type.TYPE_INTEGER, + "BIGINT": Type.TYPE_INTEGER, + "NUMBER": Type.TYPE_NUMBER, + "DECIMAL": Type.TYPE_NUMBER, + "NUMERIC": Type.TYPE_NUMBER, + "DOUBLE": Type.TYPE_NUMBER, + "REAL": Type.TYPE_NUMBER, + "FLOAT": Type.TYPE_NUMBER, + "FIXED": Type.TYPE_NUMBER, + "STRING": Type.TYPE_STRING, + "TEXT": Type.TYPE_STRING, + "VARCHAR": Type.TYPE_STRING, + "CHAR": Type.TYPE_CHAR, + "CHARACTER": Type.TYPE_CHAR, + "BOOLEAN": Type.TYPE_BOOLEAN, + "DATETIME": Type.TYPE_DATETIME, + "DATE": Type.TYPE_DATETIME, + "TIMESTAMP": Type.TYPE_DATETIME, + "TIMESTAMP_LTZ": Type.TYPE_DATETIME, + "TIMESTAMP_NTZ": Type.TYPE_DATETIME, + "TIMESTAMP_TZ": Type.TYPE_DATETIME, + "ARRAY": Type.TYPE_LIST, + "VARIANT": Type.TYPE_LIST, + "UNKNOWN": Type.TYPE_UNKNOWN, +}