dbt-labs · peterallenwebb · Sep 29, 2023 · Sep 13, 2023 · Sep 14, 2023 · Sep 14, 2023
@@ -74,6 +74,7 @@
 from dbt import deprecations
 
 GET_CATALOG_MACRO_NAME = "get_catalog"
+GET_CATALOG_RELATIONS_MACRO_NAME = "get_catalog_relations"
 FRESHNESS_MACRO_NAME = "collect_freshness"
 
 
@@ -222,6 +223,8 @@
         ConstraintType.foreign_key: ConstraintSupport.ENFORCED,
     }
 
+    CATALOG_BY_RELATION_SUPPORT = False
+
     def __init__(self, config) -> None:
         self.config = config
         self.cache = RelationsCache()
@@ -415,6 +418,29 @@
         lowercase strings.
         """
         info_schema_name_map = SchemaSearchMap()
+        relations = self._get_catalog_relations(manifest)
+        for relation in relations:
+            info_schema_name_map.add(relation)
+        # result is a map whose keys are information_schema Relations without
+        # identifiers that have appropriate database prefixes, and whose values
+        # are sets of lowercase schema names that are valid members of those
+        # databases
+        return info_schema_name_map
+
+    def _get_catalog_relations_by_info_schema(
+        self, manifest: Manifest
+    ) -> Dict[InformationSchema, List[BaseRelation]]:
+        relations = self._get_catalog_relations(manifest)
+        relations_by_info_schema: Dict[InformationSchema, List[BaseRelation]] = dict()
+        for relation in relations:
+            info_schema = relation.information_schema_only()
+            if info_schema not in relations_by_info_schema:
+                relations_by_info_schema[info_schema] = []
+            relations_by_info_schema[info_schema].append(relation)
+
+        return relations_by_info_schema
+
+    def _get_catalog_relations(self, manifest: Manifest) -> List[BaseRelation]:
         nodes: Iterator[ResultNode] = chain(
             [
                 node
@@ -423,14 +449,9 @@
             ],
             manifest.sources.values(),
         )
-        for node in nodes:
-            relation = self.Relation.create_from(self.config, node)
-            info_schema_name_map.add(relation)
-        # result is a map whose keys are information_schema Relations without
-        # identifiers that have appropriate database prefixes, and whose values
-        # are sets of lowercase schema names that are valid members of those
-        # databases
-        return info_schema_name_map
+
+        relations = [self.Relation.create_from(self.config, n) for n in nodes]
+        return relations
 
     def _relations_cache_for_schemas(
         self, manifest: Manifest, cache_schemas: Optional[Set[BaseRelation]] = None
@@ -1093,20 +1114,57 @@
         results = self._catalog_filter_table(table, manifest)  # type: ignore[arg-type]
         return results
 
+    def _get_one_catalog_by_relations(
+        self,
+        information_schema: InformationSchema,
+        relations: List[BaseRelation],
+        manifest: Manifest,
+    ) -> agate.Table:
+
+        kwargs = {
+            "information_schema": information_schema,
+            "relations": relations,
+        }
+        table = self.execute_macro(
+            GET_CATALOG_RELATIONS_MACRO_NAME,
+            kwargs=kwargs,
+            # pass in the full manifest, so we get any local project
+            # overrides
+            manifest=manifest,
+        )
+
+        results = self._catalog_filter_table(table, manifest)  # type: ignore[arg-type]
+        return results
+
     def get_catalog(self, manifest: Manifest) -> Tuple[agate.Table, List[Exception]]:
-        schema_map = self._get_catalog_schemas(manifest)
 
         with executor(self.config) as tpe:
             futures: List[Future[agate.Table]] = []
-            for info, schemas in schema_map.items():
-                if len(schemas) == 0:
-                    continue
-                name = ".".join([str(info.database), "information_schema"])
-
-                fut = tpe.submit_connected(
-                    self, name, self._get_one_catalog, info, schemas, manifest
-                )
-                futures.append(fut)
+            relation_count = len(self._get_catalog_relations(manifest))
+            if relation_count <= 100 and self.CATALOG_BY_RELATION_SUPPORT:
+                relations_by_schema = self._get_catalog_relations_by_info_schema(manifest)
+                for info_schema in relations_by_schema:
+                    name = ".".join([str(info_schema.database), "information_schema"])
+                    relations = relations_by_schema[info_schema]
+                    fut = tpe.submit_connected(
+                        self,
+                        name,
+                        self._get_one_catalog_by_relations,
+                        info_schema,
+                        relations,
+                        manifest,
+                    )
+                    futures.append(fut)
+            else:
+                schema_map: SchemaSearchMap = self._get_catalog_schemas(manifest)
+                for info, schemas in schema_map.items():
+                    if len(schemas) == 0:
+                        continue
+                    name = ".".join([str(info.database), "information_schema"])
+                    fut = tpe.submit_connected(
+                        self, name, self._get_one_catalog, info, schemas, manifest
+                    )
+                    futures.append(fut)
 
             catalogs, exceptions = catch_as_completed(futures)
 

@@ -459,11 +459,11 @@
         self[key].add(schema)
 
     def search(self) -> Iterator[Tuple[InformationSchema, Optional[str]]]:
-        for information_schema_name, schemas in self.items():
+        for information_schema, schemas in self.items():
             for schema in schemas:
-                yield information_schema_name, schema
+                yield information_schema, schema
 
-    def flatten(self, allow_multiple_databases: bool = False):
+    def flatten(self, allow_multiple_databases: bool = False) -> "SchemaSearchMap":
         new = self.__class__()
 
         # make sure we don't have multiple databases if allow_multiple_databases is set to False

@@ -1,3 +1,16 @@
+{% macro get_catalog_relations(information_schema, relations) -%}
+  {{ return(adapter.dispatch('get_catalog_relations', 'dbt')(information_schema, relations)) }}
+{%- endmacro %}
+
+{% macro default__get_catalog_relations(information_schema, relations) -%}
+  {% set typename = adapter.type() %}
+  {% set msg -%}
+    get_catalog_relations not implemented for {{ typename }}
+  {%- endset %}
+
+  {{ exceptions.raise_compiler_error(msg) }}
+{%- endmacro %}
+
 {% macro get_catalog(information_schema, schemas) -%}
   {{ return(adapter.dispatch('get_catalog', 'dbt')(information_schema, schemas)) }}
 {%- endmacro %}

@@ -73,6 +73,8 @@ class PostgresAdapter(SQLAdapter):
         ConstraintType.foreign_key: ConstraintSupport.ENFORCED,
     }
 
+    CATALOG_BY_RELATION_SUPPORT = True
+
     @classmethod
     def date_function(cls):
         return "now()"
@@ -113,9 +115,9 @@ def _link_cached_database_relations(self, schemas: Set[str]):
 
     def _get_catalog_schemas(self, manifest):
         # postgres only allow one database (the main one)
-        schemas = super()._get_catalog_schemas(manifest)
+        schema_search_map = super()._get_catalog_schemas(manifest)
         try:
-            return schemas.flatten()
+            return schema_search_map.flatten()
         except DbtRuntimeError as exc:
             raise CrossDbReferenceProhibitedError(self.type(), exc.msg)
 

@@ -1,7 +1,7 @@
 
-{% macro postgres__get_catalog(information_schema, schemas) -%}
-
+{% macro postgres__get_catalog_relations(information_schema, relations) -%}
   {%- call statement('catalog', fetch_result=True) -%}
+
     {#
       If the user has multiple databases set and the first one is wrong, this will fail.
       But we won't fail in the case where there are multiple quoting-difference-only dbs, which is better.
@@ -29,12 +29,7 @@
     join pg_catalog.pg_attribute col on col.attrelid = tbl.oid
     left outer join pg_catalog.pg_description tbl_desc on (tbl_desc.objoid = tbl.oid and tbl_desc.objsubid = 0)
     left outer join pg_catalog.pg_description col_desc on (col_desc.objoid = tbl.oid and col_desc.objsubid = col.attnum)
-
-    where (
-        {%- for schema in schemas -%}
-          upper(sch.nspname) = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%}
-        {%- endfor -%}
-      )
+    {{ postgres__get_catalog_where_clause(relations) }}
       and not pg_is_other_temp_schema(sch.oid) -- not a temporary schema belonging to another session
       and tbl.relpersistence in ('p', 'u') -- [p]ermanent table or [u]nlogged table. Exclude [t]emporary tables
       and tbl.relkind in ('r', 'v', 'f', 'p') -- o[r]dinary table, [v]iew, [f]oreign table, [p]artitioned table. Other values are [i]ndex, [S]equence, [c]omposite type, [t]OAST table, [m]aterialized view
@@ -49,5 +44,28 @@
   {%- endcall -%}
 
   {{ return(load_result('catalog').table) }}
+{%- endmacro %}
 
+
+{% macro postgres__get_catalog(information_schema, schemas) -%}
+  {%- set relations = [] -%}
+  {%- for schema in schemas -%}
+    {%- set dummy = relations.append({'schema': schema}) -%}
+  {%- endfor -%}
+  {{ return(postgres__get_catalog_relations(information_schema, relations)) }}
+{%- endmacro %}
+
+
+{% macro postgres__get_catalog_where_clause(relations) %}
+    where (
+      {%- for relation in relations -%}
+        {%- if relation.identifier -%}
+          (upper(sch.nspname) = upper('{{ relation.schema }}') and
+           upper(tbl.relname) = upper('{{ relation.identifier }}'))
+        {%- else-%}
+          upper(sch.nspname) = upper('{{ relation.schema }}')
+        {%- endif -%}
+        {%- if not loop.last %} or {% endif -%}
+      {%- endfor -%}
+    )
 {%- endmacro %}
@@ -7,7 +7,7 @@
 """
 
 fail_macros__failure_sql = """
-{% macro get_catalog(information_schema, schemas) %}
+{% macro get_catalog_relations(information_schema, relations) %}
     {% do exceptions.raise_compiler_error('rejected: no catalogs for you') %}
 {% endmacro %}
 

@@ -322,8 +322,8 @@ def test_set_zero_keepalive(self, psycopg2):
         )
 
     @mock.patch.object(PostgresAdapter, "execute_macro")
-    @mock.patch.object(PostgresAdapter, "_get_catalog_schemas")
-    def test_get_catalog_various_schemas(self, mock_get_schemas, mock_execute):
+    @mock.patch.object(PostgresAdapter, "_get_catalog_relations_by_info_schema")
+    def test_get_catalog_various_schemas(self, mock_get_relations, mock_execute):
         column_names = ["table_database", "table_schema", "table_name"]
         rows = [
             ("dbt", "foo", "bar"),
@@ -334,9 +334,13 @@ def test_get_catalog_various_schemas(self, mock_get_schemas, mock_execute):
         ]
         mock_execute.return_value = agate.Table(rows=rows, column_names=column_names)
 
-        mock_get_schemas.return_value.items.return_value = [
-            (mock.MagicMock(database="dbt"), {"foo", "FOO", "quux"})
-        ]
+        mock_get_relations.return_value = {
+            mock.MagicMock(database="dbt"): [
+                mock.MagicMock(schema="foo"),
+                mock.MagicMock(schema="FOO"),
+                mock.MagicMock(schema="quux"),
+            ]
+        }
 
         mock_manifest = mock.MagicMock()
         mock_manifest.get_used_schemas.return_value = {("dbt", "foo"), ("dbt", "quux")}