From e8d7304f0b490359f9e52603d3b0e451a359be03 Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Fri, 11 Aug 2023 22:28:48 -0400 Subject: [PATCH 1/4] add TestLargeEphemeralCompilation --- tests/functional/materializations/fixtures.py | 245 ++++++++++++++++++ .../test_ephemeral_compilation.py | 83 +++--- 2 files changed, 285 insertions(+), 43 deletions(-) create mode 100644 tests/functional/materializations/fixtures.py diff --git a/tests/functional/materializations/fixtures.py b/tests/functional/materializations/fixtures.py new file mode 100644 index 00000000000..b799d08e611 --- /dev/null +++ b/tests/functional/materializations/fixtures.py @@ -0,0 +1,245 @@ +fct_eph_first_sql = """ +-- fct_eph_first.sql +{{ config(materialized='ephemeral') }} + +with int_eph_first as( + select * from {{ ref('int_eph_first') }} +) + +select * from int_eph_first +""" + +int_eph_first_sql = """ +-- int_eph_first.sql +{{ config(materialized='ephemeral') }} + +select + 1 as first_column, + 2 as second_column +""" + +schema_yml = """ +version: 2 + +models: + - name: int_eph_first + columns: + - name: first_column + tests: + - not_null + - name: second_column + tests: + - not_null + + - name: fct_eph_first + columns: + - name: first_column + tests: + - not_null + - name: second_column + tests: + - not_null + +""" + +bar_sql = """ +{{ config(materialized = 'table') }} + +WITH foo AS ( + + SELECT * FROM {{ ref('foo') }} + +), foo_1 AS ( + + SELECT * FROM {{ ref('foo_1') }} + +), foo_2 AS ( + + SELECT * FROM {{ ref('foo_2') }} + +) + +SELECT * FROM foo +UNION ALL +SELECT * FROM foo_1 +UNION ALL +SELECT * FROM foo_2 +""" + +bar1_sql = """ +{{ config(materialized = 'table') }} + +WITH foo AS ( + + SELECT * FROM {{ ref('foo') }} + +), foo_1 AS ( + + SELECT * FROM {{ ref('foo_1') }} + +), foo_2 AS ( + + SELECT * FROM {{ ref('foo_2') }} + +) + +SELECT * FROM foo +UNION ALL +SELECT * FROM foo_1 +UNION ALL +SELECT * FROM foo_2 +""" + +bar2_sql = """ +{{ config(materialized = 'table') }} + +WITH foo AS ( + + SELECT * FROM {{ ref('foo') }} + +), foo_1 AS ( + + SELECT * FROM {{ ref('foo_1') }} + +), foo_2 AS ( + + SELECT * FROM {{ ref('foo_2') }} + +) + +SELECT * FROM foo +UNION ALL +SELECT * FROM foo_1 +UNION ALL +SELECT * FROM foo_2 +""" + +bar3_sql = """ +{{ config(materialized = 'table') }} + +WITH foo AS ( + + SELECT * FROM {{ ref('foo') }} + +), foo_1 AS ( + + SELECT * FROM {{ ref('foo_1') }} + +), foo_2 AS ( + + SELECT * FROM {{ ref('foo_2') }} + +) + +SELECT * FROM foo +UNION ALL +SELECT * FROM foo_1 +UNION ALL +SELECT * FROM foo_2 +""" + +bar4_sql = """ +{{ config(materialized = 'table') }} + +WITH foo AS ( + + SELECT * FROM {{ ref('foo') }} + +), foo_1 AS ( + + SELECT * FROM {{ ref('foo_1') }} + +), foo_2 AS ( + + SELECT * FROM {{ ref('foo_2') }} + +) + +SELECT * FROM foo +UNION ALL +SELECT * FROM foo_1 +UNION ALL +SELECT * FROM foo_2 +""" + +bar5_sql = """ +{{ config(materialized = 'table') }} + +WITH foo AS ( + + SELECT * FROM {{ ref('foo') }} + +), foo_1 AS ( + + SELECT * FROM {{ ref('foo_1') }} + +), foo_2 AS ( + + SELECT * FROM {{ ref('foo_2') }} + +) + +SELECT * FROM foo +UNION ALL +SELECT * FROM foo_1 +UNION ALL +SELECT * FROM foo_2 +""" + +baz_sql = """ +{{ config(materialized = 'table') }} +SELECT * FROM {{ ref('bar') }} +""" + +baz1_sql = """ +{{ config(materialized = 'table') }} +SELECT * FROM {{ ref('bar_1') }} +""" + +foo_sql = """ +{{ config(materialized = 'ephemeral') }} + +with source as ( + + select 1 as id + +), renamed as ( + + select id as uid from source + +) + +select * from renamed +""" + +foo1_sql = """ +{{ config(materialized = 'ephemeral') }} + +WITH source AS ( + + SELECT 1 AS id + +), RENAMED as ( + + SELECT id as UID FROM source + +) + +SELECT * FROM renamed +""" + +foo2_sql = """ +{{ config(materialized = 'ephemeral') }} + +WITH source AS ( + + SELECT 1 AS id + +), RENAMED as ( + + SELECT id as UID FROM source + +) + +SELECT * FROM renamed +""" diff --git a/tests/functional/materializations/test_ephemeral_compilation.py b/tests/functional/materializations/test_ephemeral_compilation.py index 56f49928756..744d11beb7e 100644 --- a/tests/functional/materializations/test_ephemeral_compilation.py +++ b/tests/functional/materializations/test_ephemeral_compilation.py @@ -7,51 +7,23 @@ # fails fairly regularly if that is broken, but does occasionally work (depending # on the order in which things are compiled). It requires multi-threading to fail. - -fct_eph_first_sql = """ --- fct_eph_first.sql -{{ config(materialized='ephemeral') }} - -with int_eph_first as( - select * from {{ ref('int_eph_first') }} +from tests.functional.materializations.fixtures import ( + fct_eph_first_sql, + int_eph_first_sql, + schema_yml, + bar_sql, + bar1_sql, + bar2_sql, + bar3_sql, + bar4_sql, + bar5_sql, + baz_sql, + baz1_sql, + foo_sql, + foo1_sql, + foo2_sql, ) -select * from int_eph_first -""" - -int_eph_first_sql = """ --- int_eph_first.sql -{{ config(materialized='ephemeral') }} - -select - 1 as first_column, - 2 as second_column -""" - -schema_yml = """ -version: 2 - -models: - - name: int_eph_first - columns: - - name: first_column - tests: - - not_null - - name: second_column - tests: - - not_null - - - name: fct_eph_first - columns: - - name: first_column - tests: - - not_null - - name: second_column - tests: - - not_null - -""" - class TestEphemeralCompilation: @pytest.fixture(scope="class") @@ -69,3 +41,28 @@ def test_ephemeral_compilation(self, project): results = run_dbt(["test"]) len(results) == 4 + + +# From: https://github.com/jeremyyeo/ephemeral-invalid-sql-repro/tree/main/models +class TestLargeEphemeralCompilation: + @pytest.fixture(scope="class") + def models(self): + + return { + "bar.sql": bar_sql, + "bar_1.sql": bar1_sql, + "bar_2.sql": bar2_sql, + "bar_3.sql": bar3_sql, + "bar_4.sql": bar4_sql, + "bar_5.sql": bar5_sql, + "baz.sql": baz_sql, + "baz_1.sql": baz1_sql, + "foo.sql": foo_sql, + "foo_1.sql": foo1_sql, + "foo_2.sql": foo2_sql, + } + + def test_ephemeral_compilation(self, project): + # 8/11 table models are built as expected. no compilation errors + results = run_dbt(["compile"]) + assert len(results) == 8 From c7c2e1cf3d9cf6805de5f89bcc26415423bec05f Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Tue, 12 Sep 2023 22:26:01 +0100 Subject: [PATCH 2/4] for testing - revert changes in fdeccfa --- core/dbt/compilation.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/core/dbt/compilation.py b/core/dbt/compilation.py index f37e248515a..1ee5d8aa8f4 100644 --- a/core/dbt/compilation.py +++ b/core/dbt/compilation.py @@ -382,16 +382,17 @@ def _recursively_prepend_ctes( _add_prepended_cte(prepended_ctes, InjectedCTE(id=cte.id, sql=sql)) + injected_sql = inject_ctes_into_sql( + model.compiled_code, + prepended_ctes, + ) + # Check again before updating for multi-threading if not model.extra_ctes_injected: - injected_sql = inject_ctes_into_sql( - model.compiled_code, - prepended_ctes, - ) - model.extra_ctes_injected = True model._pre_injected_sql = model.compiled_code model.compiled_code = injected_sql model.extra_ctes = prepended_ctes + model.extra_ctes_injected = True # if model.extra_ctes is not set to prepended ctes, something went wrong return model, model.extra_ctes @@ -528,10 +529,10 @@ def compile_node( recursive method to "prepend" the ctes. """ # Make sure Lexer for sqlparse 0.4.4 is initialized - from sqlparse.lexer import Lexer # type: ignore + # from sqlparse.lexer import Lexer # type: ignore - if hasattr(Lexer, "get_default_instance"): - Lexer.get_default_instance() + # if hasattr(Lexer, "get_default_instance"): + # Lexer.get_default_instance() node = self._compile_code(node, manifest, extra_context) From e8710ef5ed35cb936e377f81344455637d2ab2f6 Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Tue, 12 Sep 2023 22:59:32 +0100 Subject: [PATCH 3/4] undo testing setup --- core/dbt/compilation.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/core/dbt/compilation.py b/core/dbt/compilation.py index 1ee5d8aa8f4..f37e248515a 100644 --- a/core/dbt/compilation.py +++ b/core/dbt/compilation.py @@ -382,17 +382,16 @@ def _recursively_prepend_ctes( _add_prepended_cte(prepended_ctes, InjectedCTE(id=cte.id, sql=sql)) - injected_sql = inject_ctes_into_sql( - model.compiled_code, - prepended_ctes, - ) - # Check again before updating for multi-threading if not model.extra_ctes_injected: + injected_sql = inject_ctes_into_sql( + model.compiled_code, + prepended_ctes, + ) + model.extra_ctes_injected = True model._pre_injected_sql = model.compiled_code model.compiled_code = injected_sql model.extra_ctes = prepended_ctes - model.extra_ctes_injected = True # if model.extra_ctes is not set to prepended ctes, something went wrong return model, model.extra_ctes @@ -529,10 +528,10 @@ def compile_node( recursive method to "prepend" the ctes. """ # Make sure Lexer for sqlparse 0.4.4 is initialized - # from sqlparse.lexer import Lexer # type: ignore + from sqlparse.lexer import Lexer # type: ignore - # if hasattr(Lexer, "get_default_instance"): - # Lexer.get_default_instance() + if hasattr(Lexer, "get_default_instance"): + Lexer.get_default_instance() node = self._compile_code(node, manifest, extra_context) From 4d19f4c240c2620279759f55a06347b83de7ffe0 Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Tue, 12 Sep 2023 23:06:25 +0100 Subject: [PATCH 4/4] changelog entry --- .changes/unreleased/Under the Hood-20230912-230619.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changes/unreleased/Under the Hood-20230912-230619.yaml diff --git a/.changes/unreleased/Under the Hood-20230912-230619.yaml b/.changes/unreleased/Under the Hood-20230912-230619.yaml new file mode 100644 index 00000000000..bc4936730fd --- /dev/null +++ b/.changes/unreleased/Under the Hood-20230912-230619.yaml @@ -0,0 +1,6 @@ +kind: Under the Hood +body: add a test for ephemeral cte injection +time: 2023-09-12T23:06:19.938207+01:00 +custom: + Author: michelleark + Issue: "8376"