Skip to content

Commit

Permalink
fix(ingest): consider sql parsing fallback as failure (datahub-projec…
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 authored Nov 19, 2024
1 parent 44affd7 commit 85c8e60
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 1 deletion.
4 changes: 3 additions & 1 deletion metadata-ingestion/src/datahub/cli/check_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,9 @@ def sql_lineage(
)

logger.debug("Sql parsing debug info: %s", lineage.debug_info)
if lineage.debug_info.error:
if lineage.debug_info.table_error:
raise lineage.debug_info.table_error
elif lineage.debug_info.error:
logger.debug("Sql parsing error details", exc_info=lineage.debug_info.error)

click.echo(lineage.json(indent=4))
Expand Down
9 changes: 9 additions & 0 deletions metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py
Original file line number Diff line number Diff line change
Expand Up @@ -904,6 +904,15 @@ def _sqlglot_lineage_inner(
logger.debug("Parsing lineage from sql statement: %s", sql)
statement = parse_statement(sql, dialect=dialect)

if isinstance(statement, sqlglot.exp.Command):
# For unsupported syntax, sqlglot will usually fallback to parsing as a Command.
# This is effectively a parsing error, and we won't get any lineage from it.
# See https://github.com/tobymao/sqlglot/commit/3a13fdf4e597a2f0a3f9fc126a129183fe98262f
# and https://github.com/tobymao/sqlglot/pull/2874
raise UnsupportedStatementTypeError(
f"Got unsupported syntax for statement: {sql}"
)

original_statement, statement = statement, statement.copy()
# logger.debug(
# "Formatted sql statement: %s",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"query_type": "UNKNOWN",
"query_type_props": {},
"query_fingerprint": null,
"in_tables": [],
"out_tables": [],
"column_lineage": null,
"debug_info": {
"confidence": 0.0,
"generalized_statement": null
}
}
11 changes: 11 additions & 0 deletions metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
Original file line number Diff line number Diff line change
Expand Up @@ -1268,3 +1268,14 @@ def test_bigquery_subquery_column_inference() -> None:
dialect="bigquery",
expected_file=RESOURCE_DIR / "test_bigquery_subquery_column_inference.json",
)


def test_sqlite_attach_database() -> None:
assert_sql_result(
"""\
ATTACH DATABASE ':memory:' AS aux1
""",
dialect="sqlite",
expected_file=RESOURCE_DIR / "test_sqlite_attach_database.json",
allow_table_error=True,
)

0 comments on commit 85c8e60

Please sign in to comment.