From 6b17896e70072eb8f6def16aceb97e2bcbe46e37 Mon Sep 17 00:00:00 2001 From: Peter Allen Webb Date: Wed, 13 Sep 2023 19:30:38 -0400 Subject: [PATCH 01/10] Add new get_catalog_relations macro, allowing dbt to specify which relations in a schema the adapter should return data about --- core/dbt/adapters/base/impl.py | 20 +++++++++++++++---- core/dbt/adapters/base/relation.py | 2 +- .../macros/adapters/metadata.sql | 16 +++++++++++++++ tests/functional/artifacts/test_override.py | 2 +- 4 files changed, 34 insertions(+), 6 deletions(-) diff --git a/core/dbt/adapters/base/impl.py b/core/dbt/adapters/base/impl.py index dd147dce845..21475d9c966 100644 --- a/core/dbt/adapters/base/impl.py +++ b/core/dbt/adapters/base/impl.py @@ -73,7 +73,7 @@ from dbt.adapters.cache import RelationsCache, _make_ref_key_dict from dbt import deprecations -GET_CATALOG_MACRO_NAME = "get_catalog" +GET_CATALOG_RELATIONS_MACRO_NAME = "get_catalog_relations" FRESHNESS_MACRO_NAME = "collect_freshness" @@ -1080,12 +1080,24 @@ def _get_one_catalog( information_schema: InformationSchema, schemas: Set[str], manifest: Manifest, + relations_by_schema: Optional[Dict[str, Optional[List[BaseRelation]]]] = None, ) -> agate.Table: - kwargs = {"information_schema": information_schema, "schemas": schemas} + + if relations_by_schema is None: + # The caller has not specified which relations they would like included + # in the results, so we specify None for each schema, which indicates + # to the get_catalog_relations macro that all relations for the schemas + # in the map should be returned. + relations_by_schema = {schema: None for schema in schemas} + + kwargs = { + "information_schema": information_schema, + "relations_by_schema": relations_by_schema, + } table = self.execute_macro( - GET_CATALOG_MACRO_NAME, + GET_CATALOG_RELATIONS_MACRO_NAME, kwargs=kwargs, - # pass in the full manifest so we get any local project + # pass in the full manifest, so we get any local project # overrides manifest=manifest, ) diff --git a/core/dbt/adapters/base/relation.py b/core/dbt/adapters/base/relation.py index d8768c44f0b..5c6bc9ee106 100644 --- a/core/dbt/adapters/base/relation.py +++ b/core/dbt/adapters/base/relation.py @@ -454,7 +454,7 @@ def search(self) -> Iterator[Tuple[InformationSchema, Optional[str]]]: for schema in schemas: yield information_schema_name, schema - def flatten(self, allow_multiple_databases: bool = False): + def flatten(self, allow_multiple_databases: bool = False) -> "SchemaSearchMap": new = self.__class__() # make sure we don't have multiple databases if allow_multiple_databases is set to False diff --git a/core/dbt/include/global_project/macros/adapters/metadata.sql b/core/dbt/include/global_project/macros/adapters/metadata.sql index 9e45c500a3f..0e7d6126373 100644 --- a/core/dbt/include/global_project/macros/adapters/metadata.sql +++ b/core/dbt/include/global_project/macros/adapters/metadata.sql @@ -1,3 +1,19 @@ +{% macro get_catalog_relations(information_schema, relations_by_schema) -%} + {{ return(adapter.dispatch('get_catalog_relations', 'dbt')(information_schema, relations_by_schema)) }} +{%- endmacro %} + +{# + The following default implementation simply relies on the more general + get_catalog macro. This is potentially less efficient than returning only the + relations the caller has asked for, but returning more results than were + requested is intentionally permitted by dbt, as long as the requested + relations are present. +#} +{% macro default__get_catalog_relations(information_schema, relations_by_schema) -%} + {%- set schemas = relations_by_schema | list -%} + {{ return(adapter.dispatch('get_catalog', 'dbt')(information_schema, schemas)) }} +{% endmacro %} + {% macro get_catalog(information_schema, schemas) -%} {{ return(adapter.dispatch('get_catalog', 'dbt')(information_schema, schemas)) }} {%- endmacro %} diff --git a/tests/functional/artifacts/test_override.py b/tests/functional/artifacts/test_override.py index a7b689a3670..32fb5fecec9 100644 --- a/tests/functional/artifacts/test_override.py +++ b/tests/functional/artifacts/test_override.py @@ -7,7 +7,7 @@ """ fail_macros__failure_sql = """ -{% macro get_catalog(information_schema, schemas) %} +{% macro get_catalog_relations(information_schema, relations_by_schema) %} {% do exceptions.raise_compiler_error('rejected: no catalogs for you') %} {% endmacro %} From 22edf72c219f989553951d5a7fe54d5210faa17f Mon Sep 17 00:00:00 2001 From: Peter Allen Webb Date: Thu, 14 Sep 2023 17:06:27 -0400 Subject: [PATCH 02/10] Implement postgres adapter support for relation filtering on catalog queries --- .../dbt/include/postgres/macros/catalog.sql | 36 ++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/plugins/postgres/dbt/include/postgres/macros/catalog.sql b/plugins/postgres/dbt/include/postgres/macros/catalog.sql index f0d68e1741c..2154b10e6c6 100644 --- a/plugins/postgres/dbt/include/postgres/macros/catalog.sql +++ b/plugins/postgres/dbt/include/postgres/macros/catalog.sql @@ -1,6 +1,5 @@ -{% macro postgres__get_catalog(information_schema, schemas) -%} - +{% macro postgres__get_catalog_relations(information_schema, schemas_by_relation) -%} {%- call statement('catalog', fetch_result=True) -%} {# If the user has multiple databases set and the first one is wrong, this will fail. @@ -29,12 +28,7 @@ join pg_catalog.pg_attribute col on col.attrelid = tbl.oid left outer join pg_catalog.pg_description tbl_desc on (tbl_desc.objoid = tbl.oid and tbl_desc.objsubid = 0) left outer join pg_catalog.pg_description col_desc on (col_desc.objoid = tbl.oid and col_desc.objsubid = col.attnum) - - where ( - {%- for schema in schemas -%} - upper(sch.nspname) = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%} - {%- endfor -%} - ) + {{ postgres__get_catalog_where_clause(schemas_by_relation) }} and not pg_is_other_temp_schema(sch.oid) -- not a temporary schema belonging to another session and tbl.relpersistence in ('p', 'u') -- [p]ermanent table or [u]nlogged table. Exclude [t]emporary tables and tbl.relkind in ('r', 'v', 'f', 'p') -- o[r]dinary table, [v]iew, [f]oreign table, [p]artitioned table. Other values are [i]ndex, [S]equence, [c]omposite type, [t]OAST table, [m]aterialized view @@ -49,5 +43,31 @@ {%- endcall -%} {{ return(load_result('catalog').table) }} +{%- endmacro %} + +{% macro postgres__get_catalog(information_schema, schemas) -%} + {%- set relations_by_schema = dict() -%} + {%- for schema in schemas -%} + {%- set dummy = relations_by_schema.update({schema: None}) -%} + {%- endfor -%} + {{ return(postgres__get_catalog_relations(information_schema, relations_by_schema)) }} +{%- endmacro %} + + +{% macro postgres__get_catalog_where_clause(relations_by_schema) %} + where ( + {%- for schema, relations in relations_by_schema.items() -%} + {%- if relations == None -%} + upper(sch.nspname) = upper('{{ schema }}') + {%- elif len(relations) > 0 -%} + (upper(sch.nspname) = upper('{{ schema }}') and ( + {%- for relation in relations -%} + upper(sch.relname) = upper('{{ relation }}') {%- if not loop.last %} or {% endif -%} + {%- endfor -%} + ) + {%- endif -%} + {%- if not loop.last %} or {% endif -%} + {%- endfor -%} + ) {%- endmacro %} From 4489822a7a7dfd5cadbd345dcfbeaba8abd0eb79 Mon Sep 17 00:00:00 2001 From: Peter Allen Webb Date: Mon, 18 Sep 2023 14:35:54 -0400 Subject: [PATCH 03/10] Code review changes adding feature flag for catalog-by-relation-list support --- core/dbt/adapters/base/impl.py | 97 ++++++++++++++----- core/dbt/adapters/base/relation.py | 4 +- .../macros/adapters/metadata.sql | 23 ++--- .../postgres/dbt/adapters/postgres/impl.py | 6 +- .../dbt/include/postgres/macros/catalog.sql | 28 +++--- tests/functional/artifacts/test_override.py | 2 +- tests/unit/test_postgres_adapter.py | 14 ++- 7 files changed, 110 insertions(+), 64 deletions(-) diff --git a/core/dbt/adapters/base/impl.py b/core/dbt/adapters/base/impl.py index 21475d9c966..9eede91c2c4 100644 --- a/core/dbt/adapters/base/impl.py +++ b/core/dbt/adapters/base/impl.py @@ -73,6 +73,7 @@ from dbt.adapters.cache import RelationsCache, _make_ref_key_dict from dbt import deprecations +GET_CATALOG_MACRO_NAME = "get_catalog" GET_CATALOG_RELATIONS_MACRO_NAME = "get_catalog_relations" FRESHNESS_MACRO_NAME = "collect_freshness" @@ -222,6 +223,8 @@ class BaseAdapter(metaclass=AdapterMeta): ConstraintType.foreign_key: ConstraintSupport.ENFORCED, } + CATALOG_BY_RELATION_SUPPORT = False + def __init__(self, config): self.config = config self.cache = RelationsCache() @@ -415,6 +418,29 @@ def _get_catalog_schemas(self, manifest: Manifest) -> SchemaSearchMap: lowercase strings. """ info_schema_name_map = SchemaSearchMap() + relations = self._get_catalog_relations(manifest) + for relation in relations: + info_schema_name_map.add(relation) + # result is a map whose keys are information_schema Relations without + # identifiers that have appropriate database prefixes, and whose values + # are sets of lowercase schema names that are valid members of those + # databases + return info_schema_name_map + + def _get_catalog_relations_by_info_schema( + self, manifest: Manifest + ) -> Dict[InformationSchema, List[BaseRelation]]: + relations = self._get_catalog_relations(manifest) + relations_by_info_schema: Dict[InformationSchema, List[BaseRelation]] = dict() + for relation in relations: + info_schema = relation.information_schema_only() + if info_schema not in relations_by_info_schema: + relations_by_info_schema[info_schema] = [] + relations_by_info_schema[info_schema].append(relation) + + return relations_by_info_schema + + def _get_catalog_relations(self, manifest: Manifest) -> List[BaseRelation]: nodes: Iterator[ResultNode] = chain( [ node @@ -423,14 +449,9 @@ def _get_catalog_schemas(self, manifest: Manifest) -> SchemaSearchMap: ], manifest.sources.values(), ) - for node in nodes: - relation = self.Relation.create_from(self.config, node) - info_schema_name_map.add(relation) - # result is a map whose keys are information_schema Relations without - # identifiers that have appropriate database prefixes, and whose values - # are sets of lowercase schema names that are valid members of those - # databases - return info_schema_name_map + + relations = [self.Relation.create_from(self.config, n) for n in nodes] + return relations def _relations_cache_for_schemas( self, manifest: Manifest, cache_schemas: Optional[Set[BaseRelation]] = None @@ -1080,19 +1101,29 @@ def _get_one_catalog( information_schema: InformationSchema, schemas: Set[str], manifest: Manifest, - relations_by_schema: Optional[Dict[str, Optional[List[BaseRelation]]]] = None, ) -> agate.Table: + kwargs = {"information_schema": information_schema, "schemas": schemas} + table = self.execute_macro( + GET_CATALOG_MACRO_NAME, + kwargs=kwargs, + # pass in the full manifest so we get any local project + # overrides + manifest=manifest, + ) - if relations_by_schema is None: - # The caller has not specified which relations they would like included - # in the results, so we specify None for each schema, which indicates - # to the get_catalog_relations macro that all relations for the schemas - # in the map should be returned. - relations_by_schema = {schema: None for schema in schemas} + results = self._catalog_filter_table(table, manifest) # type: ignore[arg-type] + return results + + def _get_one_catalog_by_relations( + self, + information_schema: InformationSchema, + relations: List[BaseRelation], + manifest: Manifest, + ) -> agate.Table: kwargs = { "information_schema": information_schema, - "relations_by_schema": relations_by_schema, + "relations": relations, } table = self.execute_macro( GET_CATALOG_RELATIONS_MACRO_NAME, @@ -1106,19 +1137,33 @@ def _get_one_catalog( return results def get_catalog(self, manifest: Manifest) -> Tuple[agate.Table, List[Exception]]: - schema_map = self._get_catalog_schemas(manifest) with executor(self.config) as tpe: futures: List[Future[agate.Table]] = [] - for info, schemas in schema_map.items(): - if len(schemas) == 0: - continue - name = ".".join([str(info.database), "information_schema"]) - - fut = tpe.submit_connected( - self, name, self._get_one_catalog, info, schemas, manifest - ) - futures.append(fut) + if self.CATALOG_BY_RELATION_SUPPORT: + relations_by_schema = self._get_catalog_relations_by_info_schema(manifest) + for info_schema in relations_by_schema: + name = ".".join([str(info_schema.database), "information_schema"]) + relations = relations_by_schema[info_schema] + fut = tpe.submit_connected( + self, + name, + self._get_one_catalog_by_relations, + info_schema, + relations, + manifest, + ) + futures.append(fut) + else: + schema_map: SchemaSearchMap = self._get_catalog_schemas(manifest) + for info, schemas in schema_map.items(): + if len(schemas) == 0: + continue + name = ".".join([str(info.database), "information_schema"]) + fut = tpe.submit_connected( + self, name, self._get_one_catalog, info, schemas, manifest + ) + futures.append(fut) catalogs, exceptions = catch_as_completed(futures) diff --git a/core/dbt/adapters/base/relation.py b/core/dbt/adapters/base/relation.py index 2eb6e660cce..67a50d9061f 100644 --- a/core/dbt/adapters/base/relation.py +++ b/core/dbt/adapters/base/relation.py @@ -459,9 +459,9 @@ def add(self, relation: BaseRelation): self[key].add(schema) def search(self) -> Iterator[Tuple[InformationSchema, Optional[str]]]: - for information_schema_name, schemas in self.items(): + for information_schema, schemas in self.items(): for schema in schemas: - yield information_schema_name, schema + yield information_schema, schema def flatten(self, allow_multiple_databases: bool = False) -> "SchemaSearchMap": new = self.__class__() diff --git a/core/dbt/include/global_project/macros/adapters/metadata.sql b/core/dbt/include/global_project/macros/adapters/metadata.sql index 0e7d6126373..bdbb910ff51 100644 --- a/core/dbt/include/global_project/macros/adapters/metadata.sql +++ b/core/dbt/include/global_project/macros/adapters/metadata.sql @@ -1,18 +1,15 @@ -{% macro get_catalog_relations(information_schema, relations_by_schema) -%} - {{ return(adapter.dispatch('get_catalog_relations', 'dbt')(information_schema, relations_by_schema)) }} +{% macro get_catalog_relations(information_schema, relations) -%} + {{ return(adapter.dispatch('get_catalog_relations', 'dbt')(information_schema, relations)) }} {%- endmacro %} -{# - The following default implementation simply relies on the more general - get_catalog macro. This is potentially less efficient than returning only the - relations the caller has asked for, but returning more results than were - requested is intentionally permitted by dbt, as long as the requested - relations are present. -#} -{% macro default__get_catalog_relations(information_schema, relations_by_schema) -%} - {%- set schemas = relations_by_schema | list -%} - {{ return(adapter.dispatch('get_catalog', 'dbt')(information_schema, schemas)) }} -{% endmacro %} +{% macro default__get_catalog_relations(information_schema, relations) -%} + {% set typename = adapter.type() %} + {% set msg -%} + get_catalog_relations not implemented for {{ typename }} + {%- endset %} + + {{ exceptions.raise_compiler_error(msg) }} +{%- endmacro %} {% macro get_catalog(information_schema, schemas) -%} {{ return(adapter.dispatch('get_catalog', 'dbt')(information_schema, schemas)) }} diff --git a/plugins/postgres/dbt/adapters/postgres/impl.py b/plugins/postgres/dbt/adapters/postgres/impl.py index adffc4d3a62..6b8f1d756ca 100644 --- a/plugins/postgres/dbt/adapters/postgres/impl.py +++ b/plugins/postgres/dbt/adapters/postgres/impl.py @@ -73,6 +73,8 @@ class PostgresAdapter(SQLAdapter): ConstraintType.foreign_key: ConstraintSupport.ENFORCED, } + CATALOG_BY_RELATION_SUPPORT = True + @classmethod def date_function(cls): return "now()" @@ -113,9 +115,9 @@ def _link_cached_database_relations(self, schemas: Set[str]): def _get_catalog_schemas(self, manifest): # postgres only allow one database (the main one) - schemas = super()._get_catalog_schemas(manifest) + schema_search_map = super()._get_catalog_schemas(manifest) try: - return schemas.flatten() + return schema_search_map.flatten() except DbtRuntimeError as exc: raise CrossDbReferenceProhibitedError(self.type(), exc.msg) diff --git a/plugins/postgres/dbt/include/postgres/macros/catalog.sql b/plugins/postgres/dbt/include/postgres/macros/catalog.sql index 2154b10e6c6..81b507f953a 100644 --- a/plugins/postgres/dbt/include/postgres/macros/catalog.sql +++ b/plugins/postgres/dbt/include/postgres/macros/catalog.sql @@ -1,6 +1,7 @@ -{% macro postgres__get_catalog_relations(information_schema, schemas_by_relation) -%} +{% macro postgres__get_catalog_relations(information_schema, relations) -%} {%- call statement('catalog', fetch_result=True) -%} + {# If the user has multiple databases set and the first one is wrong, this will fail. But we won't fail in the case where there are multiple quoting-difference-only dbs, which is better. @@ -28,7 +29,7 @@ join pg_catalog.pg_attribute col on col.attrelid = tbl.oid left outer join pg_catalog.pg_description tbl_desc on (tbl_desc.objoid = tbl.oid and tbl_desc.objsubid = 0) left outer join pg_catalog.pg_description col_desc on (col_desc.objoid = tbl.oid and col_desc.objsubid = col.attnum) - {{ postgres__get_catalog_where_clause(schemas_by_relation) }} + {{ postgres__get_catalog_where_clause(relations) }} and not pg_is_other_temp_schema(sch.oid) -- not a temporary schema belonging to another session and tbl.relpersistence in ('p', 'u') -- [p]ermanent table or [u]nlogged table. Exclude [t]emporary tables and tbl.relkind in ('r', 'v', 'f', 'p') -- o[r]dinary table, [v]iew, [f]oreign table, [p]artitioned table. Other values are [i]ndex, [S]equence, [c]omposite type, [t]OAST table, [m]aterialized view @@ -47,25 +48,22 @@ {% macro postgres__get_catalog(information_schema, schemas) -%} - {%- set relations_by_schema = dict() -%} + {%- set relations = [] -%} {%- for schema in schemas -%} - {%- set dummy = relations_by_schema.update({schema: None}) -%} + {%- set dummy = relations.append({'schema': schema}) -%} {%- endfor -%} - {{ return(postgres__get_catalog_relations(information_schema, relations_by_schema)) }} + {{ return(postgres__get_catalog_relations(information_schema, relations)) }} {%- endmacro %} -{% macro postgres__get_catalog_where_clause(relations_by_schema) %} +{% macro postgres__get_catalog_where_clause(relations) %} where ( - {%- for schema, relations in relations_by_schema.items() -%} - {%- if relations == None -%} - upper(sch.nspname) = upper('{{ schema }}') - {%- elif len(relations) > 0 -%} - (upper(sch.nspname) = upper('{{ schema }}') and ( - {%- for relation in relations -%} - upper(sch.relname) = upper('{{ relation }}') {%- if not loop.last %} or {% endif -%} - {%- endfor -%} - ) + {%- for relation in relations -%} + {%- if relations.identifier -%} + (upper(sch.nspname) = upper('{{ relation.schema }}') and + upper(sch.relname) = upper('{{ relation.identifier }}')) + {%- else-%} + upper(sch.nspname) = upper('{{ relation.schema }}') {%- endif -%} {%- if not loop.last %} or {% endif -%} {%- endfor -%} diff --git a/tests/functional/artifacts/test_override.py b/tests/functional/artifacts/test_override.py index 32fb5fecec9..1d4f32030bd 100644 --- a/tests/functional/artifacts/test_override.py +++ b/tests/functional/artifacts/test_override.py @@ -7,7 +7,7 @@ """ fail_macros__failure_sql = """ -{% macro get_catalog_relations(information_schema, relations_by_schema) %} +{% macro get_catalog_relations(information_schema, relations) %} {% do exceptions.raise_compiler_error('rejected: no catalogs for you') %} {% endmacro %} diff --git a/tests/unit/test_postgres_adapter.py b/tests/unit/test_postgres_adapter.py index 0127d0c6398..80b8d61b9b4 100644 --- a/tests/unit/test_postgres_adapter.py +++ b/tests/unit/test_postgres_adapter.py @@ -322,8 +322,8 @@ def test_set_zero_keepalive(self, psycopg2): ) @mock.patch.object(PostgresAdapter, "execute_macro") - @mock.patch.object(PostgresAdapter, "_get_catalog_schemas") - def test_get_catalog_various_schemas(self, mock_get_schemas, mock_execute): + @mock.patch.object(PostgresAdapter, "_get_catalog_relations_by_info_schema") + def test_get_catalog_various_schemas(self, mock_get_relations, mock_execute): column_names = ["table_database", "table_schema", "table_name"] rows = [ ("dbt", "foo", "bar"), @@ -334,9 +334,13 @@ def test_get_catalog_various_schemas(self, mock_get_schemas, mock_execute): ] mock_execute.return_value = agate.Table(rows=rows, column_names=column_names) - mock_get_schemas.return_value.items.return_value = [ - (mock.MagicMock(database="dbt"), {"foo", "FOO", "quux"}) - ] + mock_get_relations.return_value = { + mock.MagicMock(database="dbt"): [ + mock.MagicMock(schema="foo"), + mock.MagicMock(schema="FOO"), + mock.MagicMock(schema="quux"), + ] + } mock_manifest = mock.MagicMock() mock_manifest.get_used_schemas.return_value = {("dbt", "foo"), ("dbt", "quux")} From 6e2e4fcb8c2092c7c463e8c5bb842e2336341f22 Mon Sep 17 00:00:00 2001 From: ezraerb Date: Fri, 15 Sep 2023 11:14:53 -0400 Subject: [PATCH 04/10] Use profile specified in --profile with dbt init (#7450) * Use profile specified in --profile with dbt init * Update .changes/unreleased/Fixes-20230424-161642.yaml Co-authored-by: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> * Refactor run() method into functions, replace exit() calls with exceptions * Update help text for profile option --------- Co-authored-by: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> --- .../unreleased/Fixes-20230424-161642.yaml | 7 + core/dbt/cli/params.py | 2 +- core/dbt/task/init.py | 82 +++++++--- tests/functional/init/test_init.py | 147 ++++++++++++++++++ 4 files changed, 214 insertions(+), 24 deletions(-) create mode 100644 .changes/unreleased/Fixes-20230424-161642.yaml diff --git a/.changes/unreleased/Fixes-20230424-161642.yaml b/.changes/unreleased/Fixes-20230424-161642.yaml new file mode 100644 index 00000000000..1eae7a7bd32 --- /dev/null +++ b/.changes/unreleased/Fixes-20230424-161642.yaml @@ -0,0 +1,7 @@ +kind: Fixes +body: If --profile specified with dbt-init, create the project with the specified + profile +time: 2023-04-24T16:16:42.994547-04:00 +custom: + Author: ezraerb + Issue: "6154" diff --git a/core/dbt/cli/params.py b/core/dbt/cli/params.py index f7a7365a07e..b8231058531 100644 --- a/core/dbt/cli/params.py +++ b/core/dbt/cli/params.py @@ -315,7 +315,7 @@ profile = click.option( "--profile", envvar=None, - help="Which profile to load. Overrides setting in dbt_project.yml.", + help="Which existing profile to load. Overrides setting in dbt_project.yml.", ) profiles_dir = click.option( diff --git a/core/dbt/task/init.py b/core/dbt/task/init.py index 4f7509bc708..39d18b8dcb2 100644 --- a/core/dbt/task/init.py +++ b/core/dbt/task/init.py @@ -10,6 +10,8 @@ import dbt.config import dbt.clients.system +from dbt.config.profile import read_profile +from dbt.exceptions import DbtRuntimeError from dbt.flags import get_flags from dbt.version import _get_adapter_plugin_names from dbt.adapters.factory import load_plugin, get_include_paths @@ -188,6 +190,15 @@ def create_profile_from_target(self, adapter: str, profile_name: str): # sample_profiles.yml self.create_profile_from_sample(adapter, profile_name) + def check_if_profile_exists(self, profile_name: str) -> bool: + """ + Validate that the specified profile exists. Can't use the regular profile validation + routine because it assumes the project file exists + """ + profiles_dir = get_flags().PROFILES_DIR + raw_profiles = read_profile(profiles_dir) + return profile_name in raw_profiles + def check_if_can_write_profile(self, profile_name: Optional[str] = None) -> bool: """Using either a provided profile name or that specified in dbt_project.yml, check if the profile already exists in profiles.yml, and if so ask the @@ -233,6 +244,25 @@ def ask_for_adapter_choice(self) -> str: numeric_choice = click.prompt(prompt_msg, type=click.INT) return available_adapters[numeric_choice - 1] + def setup_profile(self, profile_name: str) -> None: + """Set up a new profile for a project""" + fire_event(SettingUpProfile()) + if not self.check_if_can_write_profile(profile_name=profile_name): + return + # If a profile_template.yml exists in the project root, that effectively + # overrides the profile_template.yml for the given target. + profile_template_path = Path("profile_template.yml") + if profile_template_path.exists(): + try: + # This relies on a valid profile_template.yml from the user, + # so use a try: except to fall back to the default on failure + self.create_profile_using_project_profile_template(profile_name) + return + except Exception: + fire_event(InvalidProfileTemplateYAML()) + adapter = self.ask_for_adapter_choice() + self.create_profile_from_target(adapter, profile_name=profile_name) + def get_valid_project_name(self) -> str: """Returns a valid project name, either from CLI arg or user prompt.""" name = self.args.project_name @@ -247,11 +277,11 @@ def get_valid_project_name(self) -> str: return name - def create_new_project(self, project_name: str): + def create_new_project(self, project_name: str, profile_name: str): self.copy_starter_repo(project_name) os.chdir(project_name) with open("dbt_project.yml", "r") as f: - content = f"{f.read()}".format(project_name=project_name, profile_name=project_name) + content = f"{f.read()}".format(project_name=project_name, profile_name=profile_name) with open("dbt_project.yml", "w") as f: f.write(content) fire_event( @@ -274,9 +304,18 @@ def run(self): in_project = False if in_project: + # If --profile was specified, it means use an existing profile, which is not + # applicable to this case + if self.args.profile: + raise DbtRuntimeError( + msg="Can not init existing project with specified profile, edit dbt_project.yml instead" + ) + # When dbt init is run inside an existing project, # just setup the user's profile. - profile_name = self.get_profile_name_from_current_project() + if not self.args.skip_profile_setup: + profile_name = self.get_profile_name_from_current_project() + self.setup_profile(profile_name) else: # When dbt init is run outside of an existing project, # create a new project and set up the user's profile. @@ -285,24 +324,21 @@ def run(self): if project_path.exists(): fire_event(ProjectNameAlreadyExists(name=project_name)) return - self.create_new_project(project_name) - profile_name = project_name - # Ask for adapter only if skip_profile_setup flag is not provided. - if not self.args.skip_profile_setup: - fire_event(SettingUpProfile()) - if not self.check_if_can_write_profile(profile_name=profile_name): - return - # If a profile_template.yml exists in the project root, that effectively - # overrides the profile_template.yml for the given target. - profile_template_path = Path("profile_template.yml") - if profile_template_path.exists(): - try: - # This relies on a valid profile_template.yml from the user, - # so use a try: except to fall back to the default on failure - self.create_profile_using_project_profile_template(profile_name) - return - except Exception: - fire_event(InvalidProfileTemplateYAML()) - adapter = self.ask_for_adapter_choice() - self.create_profile_from_target(adapter, profile_name=profile_name) + # If the user specified an existing profile to use, use it instead of generating a new one + user_profile_name = self.args.profile + if user_profile_name: + if not self.check_if_profile_exists(user_profile_name): + raise DbtRuntimeError( + msg="Could not find profile named '{}'".format(user_profile_name) + ) + self.create_new_project(project_name, user_profile_name) + else: + profile_name = project_name + # Create the profile after creating the project to avoid leaving a random profile + # if the former fails. + self.create_new_project(project_name, profile_name) + + # Ask for adapter only if skip_profile_setup flag is not provided + if not self.args.skip_profile_setup: + self.setup_profile(profile_name) diff --git a/tests/functional/init/test_init.py b/tests/functional/init/test_init.py index 8c0444bd0b2..9ac821d7c26 100644 --- a/tests/functional/init/test_init.py +++ b/tests/functional/init/test_init.py @@ -1,10 +1,13 @@ import click import os +import yaml import pytest from pathlib import Path from unittest import mock from unittest.mock import Mock, call +from dbt.exceptions import DbtRuntimeError + from dbt.tests.util import run_dbt @@ -84,6 +87,11 @@ def test_init_task_in_project_with_existing_profiles_yml( """ ) + def test_init_task_in_project_specifying_profile_errors(self): + with pytest.raises(DbtRuntimeError) as error: + run_dbt(["init", "--profile", "test"], expect_pass=False) + assert "Can not init existing project with specified profile" in str(error) + class TestInitProjectWithoutExistingProfilesYml: @mock.patch("dbt.task.init._get_adapter_plugin_names") @@ -159,6 +167,20 @@ def exists_side_effect(path): """ ) + @mock.patch.object(Path, "exists", autospec=True) + def test_init_task_in_project_without_profile_yml_specifying_profile_errors(self, exists): + def exists_side_effect(path): + # Override responses on specific files, default to 'real world' if not overriden + return {"profiles.yml": False}.get(path.name, os.path.exists(path)) + + exists.side_effect = exists_side_effect + + # Even through no profiles.yml file exists, the init will not modify project.yml, + # so this errors + with pytest.raises(DbtRuntimeError) as error: + run_dbt(["init", "--profile", "test"], expect_pass=False) + assert "Could not find profile named test" in str(error) + class TestInitProjectWithoutExistingProfilesYmlOrTemplate: @mock.patch("dbt.task.init._get_adapter_plugin_names") @@ -708,3 +730,128 @@ def test_init_inside_project_and_skip_profile_setup( # skip interactive profile setup run_dbt(["init", "--skip-profile-setup"]) assert len(manager.mock_calls) == 0 + + +class TestInitOutsideOfProjectWithSpecifiedProfile(TestInitOutsideOfProjectBase): + @mock.patch("dbt.task.init._get_adapter_plugin_names") + @mock.patch("click.prompt") + def test_init_task_outside_of_project_with_specified_profile( + self, mock_prompt, mock_get_adapter, project, project_name, unique_schema, dbt_profile_data + ): + manager = Mock() + manager.attach_mock(mock_prompt, "prompt") + manager.prompt.side_effect = [ + project_name, + ] + mock_get_adapter.return_value = [project.adapter.type()] + run_dbt(["init", "--profile", "test"]) + + manager.assert_has_calls( + [ + call.prompt("Enter a name for your project (letters, digits, underscore)"), + ] + ) + + # profiles.yml is NOT overwritten, so assert that the text matches that of the + # original fixture + with open(os.path.join(project.profiles_dir, "profiles.yml"), "r") as f: + assert f.read() == yaml.safe_dump(dbt_profile_data) + + with open(os.path.join(project.project_root, project_name, "dbt_project.yml"), "r") as f: + assert ( + f.read() + == f""" +# Name your project! Project names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: '{project_name}' +version: '1.0.0' +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. +profile: 'test' + +# These configurations specify where dbt should look for different types of files. +# The `model-paths` config, for example, states that models in this project can be +# found in the "models/" directory. You probably won't need to change these! +model-paths: ["models"] +analysis-paths: ["analyses"] +test-paths: ["tests"] +seed-paths: ["seeds"] +macro-paths: ["macros"] +snapshot-paths: ["snapshots"] + +clean-targets: # directories to be removed by `dbt clean` + - "target" + - "dbt_packages" + + +# Configuring models +# Full documentation: https://docs.getdbt.com/docs/configuring-models + +# In this example config, we tell dbt to build all models in the example/ +# directory as views. These settings can be overridden in the individual model +# files using the `{{{{ config(...) }}}}` macro. +models: + {project_name}: + # Config indicated by + and applies to all files under models/example/ + example: + +materialized: view +""" + ) + + +class TestInitOutsideOfProjectSpecifyingInvalidProfile(TestInitOutsideOfProjectBase): + @mock.patch("dbt.task.init._get_adapter_plugin_names") + @mock.patch("click.prompt") + def test_init_task_outside_project_specifying_invalid_profile_errors( + self, mock_prompt, mock_get_adapter, project, project_name + ): + manager = Mock() + manager.attach_mock(mock_prompt, "prompt") + manager.prompt.side_effect = [ + project_name, + ] + mock_get_adapter.return_value = [project.adapter.type()] + + with pytest.raises(DbtRuntimeError) as error: + run_dbt(["init", "--profile", "invalid"], expect_pass=False) + assert "Could not find profile named invalid" in str(error) + + manager.assert_has_calls( + [ + call.prompt("Enter a name for your project (letters, digits, underscore)"), + ] + ) + + +class TestInitOutsideOfProjectSpecifyingProfileNoProfilesYml(TestInitOutsideOfProjectBase): + @mock.patch("dbt.task.init._get_adapter_plugin_names") + @mock.patch("click.prompt") + def test_init_task_outside_project_specifying_profile_no_profiles_yml_errors( + self, mock_prompt, mock_get_adapter, project, project_name + ): + manager = Mock() + manager.attach_mock(mock_prompt, "prompt") + manager.prompt.side_effect = [ + project_name, + ] + mock_get_adapter.return_value = [project.adapter.type()] + + # Override responses on specific files, default to 'real world' if not overriden + original_isfile = os.path.isfile + with mock.patch( + "os.path.isfile", + new=lambda path: {"profiles.yml": False}.get( + os.path.basename(path), original_isfile(path) + ), + ): + with pytest.raises(DbtRuntimeError) as error: + run_dbt(["init", "--profile", "test"], expect_pass=False) + assert "Could not find profile named invalid" in str(error) + + manager.assert_has_calls( + [ + call.prompt("Enter a name for your project (letters, digits, underscore)"), + ] + ) From 9b8e15d247dd3b0385df12bfe71764679f0348cd Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Mon, 18 Sep 2023 15:00:10 +0100 Subject: [PATCH 05/10] add TestLargeEphemeralCompilation (#8376) --- .../Under the Hood-20230912-230619.yaml | 6 + tests/functional/materializations/fixtures.py | 245 ++++++++++++++++++ .../test_ephemeral_compilation.py | 83 +++--- 3 files changed, 291 insertions(+), 43 deletions(-) create mode 100644 .changes/unreleased/Under the Hood-20230912-230619.yaml create mode 100644 tests/functional/materializations/fixtures.py diff --git a/.changes/unreleased/Under the Hood-20230912-230619.yaml b/.changes/unreleased/Under the Hood-20230912-230619.yaml new file mode 100644 index 00000000000..bc4936730fd --- /dev/null +++ b/.changes/unreleased/Under the Hood-20230912-230619.yaml @@ -0,0 +1,6 @@ +kind: Under the Hood +body: add a test for ephemeral cte injection +time: 2023-09-12T23:06:19.938207+01:00 +custom: + Author: michelleark + Issue: "8376" diff --git a/tests/functional/materializations/fixtures.py b/tests/functional/materializations/fixtures.py new file mode 100644 index 00000000000..b799d08e611 --- /dev/null +++ b/tests/functional/materializations/fixtures.py @@ -0,0 +1,245 @@ +fct_eph_first_sql = """ +-- fct_eph_first.sql +{{ config(materialized='ephemeral') }} + +with int_eph_first as( + select * from {{ ref('int_eph_first') }} +) + +select * from int_eph_first +""" + +int_eph_first_sql = """ +-- int_eph_first.sql +{{ config(materialized='ephemeral') }} + +select + 1 as first_column, + 2 as second_column +""" + +schema_yml = """ +version: 2 + +models: + - name: int_eph_first + columns: + - name: first_column + tests: + - not_null + - name: second_column + tests: + - not_null + + - name: fct_eph_first + columns: + - name: first_column + tests: + - not_null + - name: second_column + tests: + - not_null + +""" + +bar_sql = """ +{{ config(materialized = 'table') }} + +WITH foo AS ( + + SELECT * FROM {{ ref('foo') }} + +), foo_1 AS ( + + SELECT * FROM {{ ref('foo_1') }} + +), foo_2 AS ( + + SELECT * FROM {{ ref('foo_2') }} + +) + +SELECT * FROM foo +UNION ALL +SELECT * FROM foo_1 +UNION ALL +SELECT * FROM foo_2 +""" + +bar1_sql = """ +{{ config(materialized = 'table') }} + +WITH foo AS ( + + SELECT * FROM {{ ref('foo') }} + +), foo_1 AS ( + + SELECT * FROM {{ ref('foo_1') }} + +), foo_2 AS ( + + SELECT * FROM {{ ref('foo_2') }} + +) + +SELECT * FROM foo +UNION ALL +SELECT * FROM foo_1 +UNION ALL +SELECT * FROM foo_2 +""" + +bar2_sql = """ +{{ config(materialized = 'table') }} + +WITH foo AS ( + + SELECT * FROM {{ ref('foo') }} + +), foo_1 AS ( + + SELECT * FROM {{ ref('foo_1') }} + +), foo_2 AS ( + + SELECT * FROM {{ ref('foo_2') }} + +) + +SELECT * FROM foo +UNION ALL +SELECT * FROM foo_1 +UNION ALL +SELECT * FROM foo_2 +""" + +bar3_sql = """ +{{ config(materialized = 'table') }} + +WITH foo AS ( + + SELECT * FROM {{ ref('foo') }} + +), foo_1 AS ( + + SELECT * FROM {{ ref('foo_1') }} + +), foo_2 AS ( + + SELECT * FROM {{ ref('foo_2') }} + +) + +SELECT * FROM foo +UNION ALL +SELECT * FROM foo_1 +UNION ALL +SELECT * FROM foo_2 +""" + +bar4_sql = """ +{{ config(materialized = 'table') }} + +WITH foo AS ( + + SELECT * FROM {{ ref('foo') }} + +), foo_1 AS ( + + SELECT * FROM {{ ref('foo_1') }} + +), foo_2 AS ( + + SELECT * FROM {{ ref('foo_2') }} + +) + +SELECT * FROM foo +UNION ALL +SELECT * FROM foo_1 +UNION ALL +SELECT * FROM foo_2 +""" + +bar5_sql = """ +{{ config(materialized = 'table') }} + +WITH foo AS ( + + SELECT * FROM {{ ref('foo') }} + +), foo_1 AS ( + + SELECT * FROM {{ ref('foo_1') }} + +), foo_2 AS ( + + SELECT * FROM {{ ref('foo_2') }} + +) + +SELECT * FROM foo +UNION ALL +SELECT * FROM foo_1 +UNION ALL +SELECT * FROM foo_2 +""" + +baz_sql = """ +{{ config(materialized = 'table') }} +SELECT * FROM {{ ref('bar') }} +""" + +baz1_sql = """ +{{ config(materialized = 'table') }} +SELECT * FROM {{ ref('bar_1') }} +""" + +foo_sql = """ +{{ config(materialized = 'ephemeral') }} + +with source as ( + + select 1 as id + +), renamed as ( + + select id as uid from source + +) + +select * from renamed +""" + +foo1_sql = """ +{{ config(materialized = 'ephemeral') }} + +WITH source AS ( + + SELECT 1 AS id + +), RENAMED as ( + + SELECT id as UID FROM source + +) + +SELECT * FROM renamed +""" + +foo2_sql = """ +{{ config(materialized = 'ephemeral') }} + +WITH source AS ( + + SELECT 1 AS id + +), RENAMED as ( + + SELECT id as UID FROM source + +) + +SELECT * FROM renamed +""" diff --git a/tests/functional/materializations/test_ephemeral_compilation.py b/tests/functional/materializations/test_ephemeral_compilation.py index c9f17d3e00c..f8419e40fd5 100644 --- a/tests/functional/materializations/test_ephemeral_compilation.py +++ b/tests/functional/materializations/test_ephemeral_compilation.py @@ -9,51 +9,23 @@ # fails fairly regularly if that is broken, but does occasionally work (depending # on the order in which things are compiled). It requires multi-threading to fail. - -fct_eph_first_sql = """ --- fct_eph_first.sql -{{ config(materialized='ephemeral') }} - -with int_eph_first as( - select * from {{ ref('int_eph_first') }} +from tests.functional.materializations.fixtures import ( + fct_eph_first_sql, + int_eph_first_sql, + schema_yml, + bar_sql, + bar1_sql, + bar2_sql, + bar3_sql, + bar4_sql, + bar5_sql, + baz_sql, + baz1_sql, + foo_sql, + foo1_sql, + foo2_sql, ) -select * from int_eph_first -""" - -int_eph_first_sql = """ --- int_eph_first.sql -{{ config(materialized='ephemeral') }} - -select - 1 as first_column, - 2 as second_column -""" - -schema_yml = """ -version: 2 - -models: - - name: int_eph_first - columns: - - name: first_column - tests: - - not_null - - name: second_column - tests: - - not_null - - - name: fct_eph_first - columns: - - name: first_column - tests: - - not_null - - name: second_column - tests: - - not_null - -""" - SUPPRESSED_CTE_EXPECTED_OUTPUT = """-- fct_eph_first.sql @@ -89,3 +61,28 @@ def test__suppress_injected_ctes(self, project): node = node_result.node assert isinstance(node, ModelNode) assert node.compiled_code == SUPPRESSED_CTE_EXPECTED_OUTPUT + + +# From: https://github.com/jeremyyeo/ephemeral-invalid-sql-repro/tree/main/models +class TestLargeEphemeralCompilation: + @pytest.fixture(scope="class") + def models(self): + + return { + "bar.sql": bar_sql, + "bar_1.sql": bar1_sql, + "bar_2.sql": bar2_sql, + "bar_3.sql": bar3_sql, + "bar_4.sql": bar4_sql, + "bar_5.sql": bar5_sql, + "baz.sql": baz_sql, + "baz_1.sql": baz1_sql, + "foo.sql": foo_sql, + "foo_1.sql": foo1_sql, + "foo_2.sql": foo2_sql, + } + + def test_ephemeral_compilation(self, project): + # 8/11 table models are built as expected. no compilation errors + results = run_dbt(["build"]) + assert len(results) == 8 From aedc0bc4c9d3ce666650f5663317f64c3ae3b13e Mon Sep 17 00:00:00 2001 From: Peter Allen Webb Date: Mon, 25 Sep 2023 14:27:24 -0400 Subject: [PATCH 06/10] Fix a couple of issues in the postgres implementation of get_catalog_relations --- plugins/postgres/dbt/include/postgres/macros/catalog.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/postgres/dbt/include/postgres/macros/catalog.sql b/plugins/postgres/dbt/include/postgres/macros/catalog.sql index 81b507f953a..0afd3716808 100644 --- a/plugins/postgres/dbt/include/postgres/macros/catalog.sql +++ b/plugins/postgres/dbt/include/postgres/macros/catalog.sql @@ -59,9 +59,9 @@ {% macro postgres__get_catalog_where_clause(relations) %} where ( {%- for relation in relations -%} - {%- if relations.identifier -%} + {%- if relation.identifier -%} (upper(sch.nspname) = upper('{{ relation.schema }}') and - upper(sch.relname) = upper('{{ relation.identifier }}')) + upper(tbl.relname) = upper('{{ relation.identifier }}')) {%- else-%} upper(sch.nspname) = upper('{{ relation.schema }}') {%- endif -%} From 38b356dea840c1d64899ef89065ffca7bdfe77be Mon Sep 17 00:00:00 2001 From: Peter Allen Webb Date: Tue, 26 Sep 2023 14:19:50 -0400 Subject: [PATCH 07/10] Add relation count limit at which to fall back to batch retrieval --- core/dbt/adapters/base/impl.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/dbt/adapters/base/impl.py b/core/dbt/adapters/base/impl.py index 9eede91c2c4..7682a74f849 100644 --- a/core/dbt/adapters/base/impl.py +++ b/core/dbt/adapters/base/impl.py @@ -1140,7 +1140,8 @@ def get_catalog(self, manifest: Manifest) -> Tuple[agate.Table, List[Exception]] with executor(self.config) as tpe: futures: List[Future[agate.Table]] = [] - if self.CATALOG_BY_RELATION_SUPPORT: + relation_count = len(self._get_catalog_relations(manifest)) + if relation_count <= 100 and self.CATALOG_BY_RELATION_SUPPORT: relations_by_schema = self._get_catalog_relations_by_info_schema(manifest) for info_schema in relations_by_schema: name = ".".join([str(info_schema.database), "information_schema"]) From a9addc0c1c2f26dc7fe793ed1e821d4e40e50c36 Mon Sep 17 00:00:00 2001 From: Peter Allen Webb Date: Thu, 28 Sep 2023 17:33:45 -0400 Subject: [PATCH 08/10] Better feature detection mechanism for adapters. --- core/dbt/adapters/base/impl.py | 18 +++++++++++++++--- plugins/postgres/dbt/adapters/postgres/impl.py | 7 ++++++- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/core/dbt/adapters/base/impl.py b/core/dbt/adapters/base/impl.py index 88ab4c5ff95..9a9fb55e562 100644 --- a/core/dbt/adapters/base/impl.py +++ b/core/dbt/adapters/base/impl.py @@ -162,6 +162,14 @@ def submit(self, compiled_code: str) -> Any: raise NotImplementedError("PythonJobHelper submit function is not implemented yet") +class AdapterFeature(str, Enum): + """Enumeration of optional adapter features which can be probed using BaseAdapter.has_feature()""" + + CatalogByRelations = "GetCatalogByRelations" + """Flags support for retrieving catalog information using a list of relations, rather than always retrieving all + the relations in a schema """ + + class BaseAdapter(metaclass=AdapterMeta): """The BaseAdapter provides an abstract base class for adapters. @@ -223,8 +231,6 @@ class BaseAdapter(metaclass=AdapterMeta): ConstraintType.foreign_key: ConstraintSupport.ENFORCED, } - CATALOG_BY_RELATION_SUPPORT = False - def __init__(self, config) -> None: self.config = config self.cache = RelationsCache() @@ -1141,7 +1147,7 @@ def get_catalog(self, manifest: Manifest) -> Tuple[agate.Table, List[Exception]] with executor(self.config) as tpe: futures: List[Future[agate.Table]] = [] relation_count = len(self._get_catalog_relations(manifest)) - if relation_count <= 100 and self.CATALOG_BY_RELATION_SUPPORT: + if relation_count <= 100 and self.has_feature(AdapterFeature.CatalogByRelations): relations_by_schema = self._get_catalog_relations_by_info_schema(manifest) for info_schema in relations_by_schema: name = ".".join([str(info_schema.database), "information_schema"]) @@ -1495,6 +1501,12 @@ def render_model_constraint(cls, constraint: ModelLevelConstraint) -> Optional[s else: return None + def has_feature(self, feature: AdapterFeature) -> bool: + # The base adapter implementation does not implement any optional + # features, so always return false. Adapters which wish to provide + # optional features will have to override this function. + return False + COLUMNS_EQUAL_SQL = """ with diff_count as ( diff --git a/plugins/postgres/dbt/adapters/postgres/impl.py b/plugins/postgres/dbt/adapters/postgres/impl.py index 6b8f1d756ca..c9a77a945de 100644 --- a/plugins/postgres/dbt/adapters/postgres/impl.py +++ b/plugins/postgres/dbt/adapters/postgres/impl.py @@ -3,7 +3,7 @@ from typing import Optional, Set, List, Any from dbt.adapters.base.meta import available -from dbt.adapters.base.impl import AdapterConfig, ConstraintSupport +from dbt.adapters.base.impl import AdapterConfig, AdapterFeature, ConstraintSupport from dbt.adapters.sql import SQLAdapter from dbt.adapters.postgres import PostgresConnectionManager from dbt.adapters.postgres.column import PostgresColumn @@ -75,6 +75,8 @@ class PostgresAdapter(SQLAdapter): CATALOG_BY_RELATION_SUPPORT = True + SUPPORTED_FEATURES: Set[AdapterFeature] = frozenset(AdapterFeature.CatalogByRelations) + @classmethod def date_function(cls): return "now()" @@ -145,3 +147,6 @@ def valid_incremental_strategies(self): def debug_query(self): self.execute("select 1 as id") + + def has_feature(self, feature: AdapterFeature) -> bool: + return feature in self.SUPPORTED_FEATURES From 703faff874e529619567ffc16cb0dc9f9165c317 Mon Sep 17 00:00:00 2001 From: Peter Allen Webb Date: Fri, 29 Sep 2023 15:38:35 -0400 Subject: [PATCH 09/10] Code review changes to get_catalog_relations and adapter feature checking --- core/dbt/adapters/base/impl.py | 5 ++-- .../postgres/dbt/adapters/postgres/impl.py | 7 ++--- .../dbt/include/postgres/macros/catalog.sql | 27 ++++++++----------- 3 files changed, 18 insertions(+), 21 deletions(-) diff --git a/core/dbt/adapters/base/impl.py b/core/dbt/adapters/base/impl.py index 9a9fb55e562..f8103009d76 100644 --- a/core/dbt/adapters/base/impl.py +++ b/core/dbt/adapters/base/impl.py @@ -165,7 +165,7 @@ def submit(self, compiled_code: str) -> Any: class AdapterFeature(str, Enum): """Enumeration of optional adapter features which can be probed using BaseAdapter.has_feature()""" - CatalogByRelations = "GetCatalogByRelations" + CatalogByRelations = "CatalogByRelations" """Flags support for retrieving catalog information using a list of relations, rather than always retrieving all the relations in a schema """ @@ -1501,7 +1501,8 @@ def render_model_constraint(cls, constraint: ModelLevelConstraint) -> Optional[s else: return None - def has_feature(self, feature: AdapterFeature) -> bool: + @classmethod + def has_feature(cls, feature: AdapterFeature) -> bool: # The base adapter implementation does not implement any optional # features, so always return false. Adapters which wish to provide # optional features will have to override this function. diff --git a/plugins/postgres/dbt/adapters/postgres/impl.py b/plugins/postgres/dbt/adapters/postgres/impl.py index c9a77a945de..a312a35ea7e 100644 --- a/plugins/postgres/dbt/adapters/postgres/impl.py +++ b/plugins/postgres/dbt/adapters/postgres/impl.py @@ -75,7 +75,7 @@ class PostgresAdapter(SQLAdapter): CATALOG_BY_RELATION_SUPPORT = True - SUPPORTED_FEATURES: Set[AdapterFeature] = frozenset(AdapterFeature.CatalogByRelations) + SUPPORTED_FEATURES: Set[AdapterFeature] = frozenset([AdapterFeature.CatalogByRelations]) @classmethod def date_function(cls): @@ -148,5 +148,6 @@ def valid_incremental_strategies(self): def debug_query(self): self.execute("select 1 as id") - def has_feature(self, feature: AdapterFeature) -> bool: - return feature in self.SUPPORTED_FEATURES + @classmethod + def has_feature(cls, feature: AdapterFeature) -> bool: + return feature in cls.SUPPORTED_FEATURES diff --git a/plugins/postgres/dbt/include/postgres/macros/catalog.sql b/plugins/postgres/dbt/include/postgres/macros/catalog.sql index 0afd3716808..f2b66c8cbf9 100644 --- a/plugins/postgres/dbt/include/postgres/macros/catalog.sql +++ b/plugins/postgres/dbt/include/postgres/macros/catalog.sql @@ -29,7 +29,17 @@ join pg_catalog.pg_attribute col on col.attrelid = tbl.oid left outer join pg_catalog.pg_description tbl_desc on (tbl_desc.objoid = tbl.oid and tbl_desc.objsubid = 0) left outer join pg_catalog.pg_description col_desc on (col_desc.objoid = tbl.oid and col_desc.objsubid = col.attnum) - {{ postgres__get_catalog_where_clause(relations) }} + where ( + {%- for relation in relations -%} + {%- if relation.identifier -%} + (upper(sch.nspname) = upper('{{ relation.schema }}') and + upper(tbl.relname) = upper('{{ relation.identifier }}')) + {%- else-%} + upper(sch.nspname) = upper('{{ relation.schema }}') + {%- endif -%} + {%- if not loop.last %} or {% endif -%} + {%- endfor -%} + ) and not pg_is_other_temp_schema(sch.oid) -- not a temporary schema belonging to another session and tbl.relpersistence in ('p', 'u') -- [p]ermanent table or [u]nlogged table. Exclude [t]emporary tables and tbl.relkind in ('r', 'v', 'f', 'p') -- o[r]dinary table, [v]iew, [f]oreign table, [p]artitioned table. Other values are [i]ndex, [S]equence, [c]omposite type, [t]OAST table, [m]aterialized view @@ -54,18 +64,3 @@ {%- endfor -%} {{ return(postgres__get_catalog_relations(information_schema, relations)) }} {%- endmacro %} - - -{% macro postgres__get_catalog_where_clause(relations) %} - where ( - {%- for relation in relations -%} - {%- if relation.identifier -%} - (upper(sch.nspname) = upper('{{ relation.schema }}') and - upper(tbl.relname) = upper('{{ relation.identifier }}')) - {%- else-%} - upper(sch.nspname) = upper('{{ relation.schema }}') - {%- endif -%} - {%- if not loop.last %} or {% endif -%} - {%- endfor -%} - ) -{%- endmacro %} From d10e802dab79a5a17a1295d337f1e8e4302d34b8 Mon Sep 17 00:00:00 2001 From: Peter Allen Webb Date: Fri, 29 Sep 2023 15:48:02 -0400 Subject: [PATCH 10/10] Add changelog entry --- .changes/unreleased/Features-20230929-154743.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changes/unreleased/Features-20230929-154743.yaml diff --git a/.changes/unreleased/Features-20230929-154743.yaml b/.changes/unreleased/Features-20230929-154743.yaml new file mode 100644 index 00000000000..b3acb9a034c --- /dev/null +++ b/.changes/unreleased/Features-20230929-154743.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Added support for retrieving partial catalog information from a schema +time: 2023-09-29T15:47:43.612438-04:00 +custom: + Author: peterallenwebb + Issue: "8521"