Skip to content

Commit

Permalink
feat: allow setting sort-by to choose alphabetical yaml col sorting o…
Browse files Browse the repository at this point in the history
…n a per node/directory basis
  • Loading branch information
z3z1ma committed Jan 3, 2025
1 parent d89a6fd commit a78514f
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 9 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "hatchling.build"

[project]
name = "dbt-osmosis"
version = "1.1.3"
version = "1.1.4"
description = "A dbt utility for managing YAML to make developing with dbt more delightful."
readme = "README.md"
license = { text = "Apache-2.0" }
Expand Down
6 changes: 3 additions & 3 deletions src/dbt_osmosis/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
inherit_upstream_column_knowledge,
inject_missing_columns,
remove_columns_not_in_database,
sort_columns_as_in_database,
sort_columns_as_configured,
sync_node_to_yaml,
synchronize_data_types,
synthesize_missing_documentation_with_openai,
Expand Down Expand Up @@ -259,7 +259,7 @@ def refactor(
inject_missing_columns(context=context)
remove_columns_not_in_database(context=context)
inherit_upstream_column_knowledge(context=context)
sort_columns_as_in_database(context=context)
sort_columns_as_configured(context=context)
synchronize_data_types(context=context)
if synthesize:
synthesize_missing_documentation_with_openai(context=context)
Expand Down Expand Up @@ -432,7 +432,7 @@ def document(

inject_missing_columns(context=context)
inherit_upstream_column_knowledge(context=context)
sort_columns_as_in_database(context=context)
sort_columns_as_configured(context=context)
if synthesize:
synthesize_missing_documentation_with_openai(context=context)
sync_node_to_yaml(context=context)
Expand Down
33 changes: 29 additions & 4 deletions src/dbt_osmosis/core/osmosis.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@
"remove_columns_not_in_database",
"sort_columns_as_in_database",
"sort_columns_alphabetically",
"sort_columns_as_configured",
"synchronize_data_types",
]

Expand Down Expand Up @@ -876,6 +877,9 @@ def process_column(col: BaseColumn | ColumnMetadata):
return normalized_cols


# TODO: instead of getting specific keys, perhaps we get a NodeConfigContext object scoped to a node / node+column
# and internally the __getitem__ or similar handles the complex resolution of keys (under the hood, we can
# probably use a ChainMap)
def _get_setting_for_node(
opt: str,
/,
Expand Down Expand Up @@ -1900,6 +1904,27 @@ def sort_columns_alphabetically(
node.columns = {k: v for k, v in sorted(node.columns.items(), key=lambda i: i[0])}


def sort_columns_as_configured(
context: YamlRefactorContext, node: ResultNode | None = None
) -> None:
if node is None:
logger.info(":wave: Sorting columns alphabetically across all matched nodes.")
for _ in context.pool.map(
partial(sort_columns_alphabetically, context),
(n for _, n in _iter_candidate_nodes(context)),
):
...
return
logger.info(":alphabet_white: Sorting columns alphabetically => %s", node.unique_id)
sort_by = _get_setting_for_node("sort-by", node, fallback="database")
if sort_by == "database":
sort_columns_as_in_database(context, node)
elif sort_by == "alphabetical":
sort_columns_alphabetically(context, node)
else:
raise ValueError(f"Invalid sort-by value: {sort_by} for node: {node.unique_id}")


def synchronize_data_types(context: YamlRefactorContext, node: ResultNode | None = None) -> None:
"""Populate data types for columns in a dbt node and it's corresponding yaml section. Changes are implicitly buffered until commit_yamls is called."""
if node is None:
Expand Down Expand Up @@ -2004,16 +2029,16 @@ def synthesize_missing_documentation_with_openai(
table_name=node.relation_name or node.name,
upstream_docs=upstream_docs,
)
for column_name, col in node.columns.items():
if not col.description or col.description in context.placeholders:
for column_name, column in node.columns.items():
if not column.description or column.description in context.placeholders:
logger.info(
":robot: Synthesizing documentation for column => %s in node => %s",
column_name,
node.unique_id,
)
col.description = generate_column_doc(
column.description = generate_column_doc(
column_name,
existing_context=f"DataType={col.data_type or 'unknown'}>\nColumnParent={node.unique_id}\nTableDescription={node.description}",
existing_context=f"DataType={column.data_type or 'unknown'}>\nColumnParent={node.unique_id}\nTableDescription={node.description}",
table_name=node.relation_name or node.name,
upstream_docs=upstream_docs,
temperature=0.7,
Expand Down
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit a78514f

Please sign in to comment.