From 63a486135c42c3ff93920e51e746bea6d03a2f8e Mon Sep 17 00:00:00 2001
From: Peter Webb <peter.webb@dbtlabs.com>
Date: Wed, 24 Jan 2024 10:30:50 -0500
Subject: [PATCH 1/4] Inject TagIterator into BlockIterator (#39)

* Inject tag iterator for greater flexibility. Fix some names.

* Inject TagIterator into BlockIterator for greater flexibility.

* Refine names.

* Fix name mismatch.

* Add type annotations

* Fix formatting for black.

* Tweak type annotation.

* Use Union type to make earlier python versions happy.
---
 .../Under the Hood-20240123-161107.yaml       |   6 +
 dbt_common/clients/_jinja_blocks.py           | 107 +++++++++---------
 dbt_common/clients/jinja.py                   |   7 +-
 3 files changed, 64 insertions(+), 56 deletions(-)
 create mode 100644 .changes/unreleased/Under the Hood-20240123-161107.yaml
diff --git a/.changes/unreleased/Under the Hood-20240123-161107.yaml b/.changes/unreleased/Under the Hood-20240123-161107.yaml
new file mode 100644
index 00000000..68a83ab4
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20240123-161107.yaml	
@@ -0,0 +1,6 @@
+kind: Under the Hood
+body: Inject TagIterator into BlockIterator for greater flexibility.
+time: 2024-01-23T16:11:07.24321-05:00
+custom:
+  Author: peterallenwebb
+  Issue: "38"
diff --git a/dbt_common/clients/_jinja_blocks.py b/dbt_common/clients/_jinja_blocks.py
index 9a830570..c6058bfa 100644
--- a/dbt_common/clients/_jinja_blocks.py
+++ b/dbt_common/clients/_jinja_blocks.py
@@ -1,5 +1,6 @@
 import re
 from collections import namedtuple
+from typing import Iterator, List, Optional, Set, Union
 
 from dbt_common.exceptions import (
     BlockDefinitionNotAtTopError,
@@ -12,40 +13,42 @@
 )
 
 
-def regex(pat):
+def regex(pat: str) -> re.Pattern:
     return re.compile(pat, re.DOTALL | re.MULTILINE)
 
 
 class BlockData:
     """raw plaintext data from the top level of the file."""
 
-    def __init__(self, contents):
+    def __init__(self, contents: str) -> None:
         self.block_type_name = "__dbt__data"
-        self.contents = contents
+        self.contents: str = contents
         self.full_block = contents
 
 
 class BlockTag:
-    def __init__(self, block_type_name, block_name, contents=None, full_block=None, **kw):
+    def __init__(
+        self, block_type_name: str, block_name: str, contents: Optional[str] = None, full_block: Optional[str] = None
+    ) -> None:
         self.block_type_name = block_type_name
         self.block_name = block_name
         self.contents = contents
         self.full_block = full_block
 
-    def __str__(self):
+    def __str__(self) -> str:
         return "BlockTag({!r}, {!r})".format(self.block_type_name, self.block_name)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return str(self)
 
     @property
-    def end_block_type_name(self):
+    def end_block_type_name(self) -> str:
         return "end{}".format(self.block_type_name)
 
-    def end_pat(self):
+    def end_pat(self) -> re.Pattern:
         # we don't want to use string formatting here because jinja uses most
         # of the string formatting operators in its syntax...
-        pattern = "".join(
+        pattern: str = "".join(
             (
                 r"(?P<endblock>((?:\s*\{\%\-|\{\%)\s*",
                 self.end_block_type_name,
@@ -98,44 +101,38 @@ def end_pat(self):
 
 
 class TagIterator:
-    def __init__(self, data):
-        self.data = data
-        self.blocks = []
-        self._parenthesis_stack = []
-        self.pos = 0
-
-    def linepos(self, end=None) -> str:
-        """Given an absolute position in the input data, return a pair of
+    def __init__(self, text: str) -> None:
+        self.text: str = text
+        self.pos: int = 0
+
+    def linepos(self, end: Optional[int] = None) -> str:
+        """Given an absolute position in the input text, return a pair of
         line number + relative position to the start of the line.
         """
         end_val: int = self.pos if end is None else end
-        data = self.data[:end_val]
+        text = self.text[:end_val]
         # if not found, rfind returns -1, and -1+1=0, which is perfect!
-        last_line_start = data.rfind("\n") + 1
+        last_line_start = text.rfind("\n") + 1
         # it's easy to forget this, but line numbers are 1-indexed
-        line_number = data.count("\n") + 1
+        line_number = text.count("\n") + 1
         return f"{line_number}:{end_val - last_line_start}"
 
-    def advance(self, new_position):
+    def advance(self, new_position: int) -> None:
         self.pos = new_position
 
-    def rewind(self, amount=1):
+    def rewind(self, amount: int = 1) -> None:
         self.pos -= amount
 
-    def _search(self, pattern):
-        return pattern.search(self.data, self.pos)
+    def _search(self, pattern: re.Pattern) -> Optional[re.Match]:
+        return pattern.search(self.text, self.pos)
 
-    def _match(self, pattern):
-        return pattern.match(self.data, self.pos)
+    def _match(self, pattern: re.Pattern) -> Optional[re.Match]:
+        return pattern.match(self.text, self.pos)
 
-    def _first_match(self, *patterns, **kwargs):
+    def _first_match(self, *patterns) -> Optional[re.Match]:  # type: ignore
         matches = []
         for pattern in patterns:
-            # default to 'search', but sometimes we want to 'match'.
-            if kwargs.get("method", "search") == "search":
-                match = self._search(pattern)
-            else:
-                match = self._match(pattern)
+            match = self._search(pattern)
             if match:
                 matches.append(match)
         if not matches:
@@ -144,13 +141,13 @@ def _first_match(self, *patterns, **kwargs):
         # TODO: do I need to account for m.start(), or is this ok?
         return min(matches, key=lambda m: m.end())
 
-    def _expect_match(self, expected_name, *patterns, **kwargs):
-        match = self._first_match(*patterns, **kwargs)
+    def _expect_match(self, expected_name: str, *patterns) -> re.Match:  # type: ignore
+        match = self._first_match(*patterns)
         if match is None:
-            raise UnexpectedMacroEOFError(expected_name, self.data[self.pos :])
+            raise UnexpectedMacroEOFError(expected_name, self.text[self.pos :])
         return match
 
-    def handle_expr(self, match):
+    def handle_expr(self, match: re.Match) -> None:
         """Handle an expression. At this point we're at a string like:
             {{ 1 + 2 }}
             ^ right here
@@ -176,12 +173,12 @@ def handle_expr(self, match):
 
         self.advance(match.end())
 
-    def handle_comment(self, match):
+    def handle_comment(self, match: re.Match) -> None:
         self.advance(match.end())
         match = self._expect_match("#}", COMMENT_END_PATTERN)
         self.advance(match.end())
 
-    def _expect_block_close(self):
+    def _expect_block_close(self) -> None:
         """Search for the tag close marker.
         To the right of the type name, there are a few possiblities:
            - a name (handled by the regex's 'block_name')
@@ -203,13 +200,13 @@ def _expect_block_close(self):
             string_match = self._expect_match("string", STRING_PATTERN)
             self.advance(string_match.end())
 
-    def handle_raw(self):
+    def handle_raw(self) -> int:
         # raw blocks are super special, they are a single complete regex
         match = self._expect_match("{% raw %}...{% endraw %}", RAW_BLOCK_PATTERN)
         self.advance(match.end())
         return match.end()
 
-    def handle_tag(self, match):
+    def handle_tag(self, match: re.Match) -> Tag:
         """The tag could be one of a few things:
 
             {% mytag %}
@@ -234,7 +231,7 @@ def handle_tag(self, match):
             self._expect_block_close()
         return Tag(block_type_name=block_type_name, block_name=block_name, start=start_pos, end=self.pos)
 
-    def find_tags(self):
+    def find_tags(self) -> Iterator[Tag]:
         while True:
             match = self._first_match(BLOCK_START_PATTERN, COMMENT_START_PATTERN, EXPR_START_PATTERN)
             if match is None:
@@ -259,7 +256,7 @@ def find_tags(self):
                     "Invalid regex match in next_block, expected block start, " "expr start, or comment start"
                 )
 
-    def __iter__(self):
+    def __iter__(self) -> Iterator[Tag]:
         return self.find_tags()
 
 
@@ -272,31 +269,33 @@ def __iter__(self):
 
 
 class BlockIterator:
-    def __init__(self, data):
-        self.tag_parser = TagIterator(data)
-        self.current = None
-        self.stack = []
-        self.last_position = 0
+    def __init__(self, tag_iterator: TagIterator) -> None:
+        self.tag_parser = tag_iterator
+        self.current: Optional[Tag] = None
+        self.stack: List[str] = []
+        self.last_position: int = 0
 
     @property
-    def current_end(self):
+    def current_end(self) -> int:
         if self.current is None:
             return 0
         else:
             return self.current.end
 
     @property
-    def data(self):
-        return self.tag_parser.data
+    def data(self) -> str:
+        return self.tag_parser.text
 
-    def is_current_end(self, tag):
+    def is_current_end(self, tag: Tag) -> bool:
         return (
             tag.block_type_name.startswith("end")
             and self.current is not None
             and tag.block_type_name[3:] == self.current.block_type_name
         )
 
-    def find_blocks(self, allowed_blocks=None, collect_raw_data=True):
+    def find_blocks(
+        self, allowed_blocks: Optional[Set[str]] = None, collect_raw_data: bool = True
+    ) -> Iterator[Union[BlockData, BlockTag]]:
         """Find all top-level blocks in the data."""
         if allowed_blocks is None:
             allowed_blocks = {"snapshot", "macro", "materialization", "docs"}
@@ -347,5 +346,7 @@ def find_blocks(self, allowed_blocks=None, collect_raw_data=True):
             if raw_data:
                 yield BlockData(raw_data)
 
-    def lex_for_blocks(self, allowed_blocks=None, collect_raw_data=True):
+    def lex_for_blocks(
+        self, allowed_blocks: Optional[Set[str]] = None, collect_raw_data: bool = True
+    ) -> List[Union[BlockData, BlockTag]]:
         return list(self.find_blocks(allowed_blocks=allowed_blocks, collect_raw_data=collect_raw_data))
diff --git a/dbt_common/clients/jinja.py b/dbt_common/clients/jinja.py
index ca9a4b55..b8c0d03a 100644
--- a/dbt_common/clients/jinja.py
+++ b/dbt_common/clients/jinja.py
@@ -22,7 +22,7 @@
     get_materialization_macro_name,
     get_test_macro_name,
 )
-from dbt_common.clients._jinja_blocks import BlockIterator, BlockData, BlockTag
+from dbt_common.clients._jinja_blocks import BlockIterator, BlockData, BlockTag, TagIterator
 
 from dbt_common.exceptions import (
     CompilationError,
@@ -516,7 +516,7 @@ def render_template(template, ctx: Dict[str, Any], node=None) -> str:
 
 
 def extract_toplevel_blocks(
-    data: str,
+    text: str,
     allowed_blocks: Optional[Set[str]] = None,
     collect_raw_data: bool = True,
 ) -> List[Union[BlockData, BlockTag]]:
@@ -534,4 +534,5 @@ def extract_toplevel_blocks(
     :return: A list of `BlockTag`s matching the allowed block types and (if
         `collect_raw_data` is `True`) `BlockData` objects.
     """
-    return BlockIterator(data).lex_for_blocks(allowed_blocks=allowed_blocks, collect_raw_data=collect_raw_data)
+    tag_iterator = TagIterator(text)
+    return BlockIterator(tag_iterator).lex_for_blocks(allowed_blocks=allowed_blocks, collect_raw_data=collect_raw_data)

From 5ce19dd99ae5d206fc5ac807289fe474259256bc Mon Sep 17 00:00:00 2001
From: Truls <20932917+truls-p@users.noreply.github.com>
Date: Wed, 24 Jan 2024 16:09:13 +0000
Subject: [PATCH 2/4] Change metadata_vars `if not` to `if ... is None` (#37)

Both None and {} evaluates to True when using if not metadata_vars. This caused us to loop over the environment variables unnecessarily. This provides a significant performance increase for large projects.
---
 .changes/unreleased/Under the Hood-20240123-194242.yaml | 6 ++++++
 dbt_common/events/functions.py                          | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Under the Hood-20240123-194242.yaml

diff --git a/.changes/unreleased/Under the Hood-20240123-194242.yaml b/.changes/unreleased/Under the Hood-20240123-194242.yaml
new file mode 100644
index 00000000..a0cb7431
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20240123-194242.yaml	
@@ -0,0 +1,6 @@
+kind: Under the Hood
+body: Change metadata_vars \`if not\` to \`if ... is None\`
+time: 2024-01-23T19:42:42.95727089Z
+custom:
+  Author: truls-p
+  Issue: "6073"
diff --git a/dbt_common/events/functions.py b/dbt_common/events/functions.py
index 60ef4d27..fde2219b 100644
--- a/dbt_common/events/functions.py
+++ b/dbt_common/events/functions.py
@@ -138,7 +138,7 @@ def fire_event(e: BaseEvent, level: Optional[EventLevel] = None) -> None:
 
 def get_metadata_vars() -> Dict[str, str]:
     global metadata_vars
-    if not metadata_vars:
+    if metadata_vars is None:
         metadata_vars = {
             k[len(_METADATA_ENV_PREFIX) :]: v for k, v in os.environ.items() if k.startswith(_METADATA_ENV_PREFIX)
         }

From 31804b1df3a7d1c1068ca01188a31f5b94ad226d Mon Sep 17 00:00:00 2001
From: Emily Rockman <ebuschang@gmail.com>
Date: Thu, 25 Jan 2024 09:28:41 -0600
Subject: [PATCH 3/4] Add CODEOWNERS (#28)

* add codeowners

* add merge queue logic

* link to specific ticket
---
 .github/CODEOWNERS                        | 14 ++++++
 .github/workflows/build.yml               |  2 +
 .github/workflows/ci_code_quality.yml     |  2 +
 .github/workflows/ci_dbt_core_testing.yml | 53 +++++++++++++++++++++++
 .github/workflows/ci_tests.yml            |  4 +-
 5 files changed, 74 insertions(+), 1 deletion(-)
 create mode 100644 .github/CODEOWNERS
 create mode 100644 .github/workflows/ci_dbt_core_testing.yml

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
new file mode 100644
index 00000000..4b02e6a2
--- /dev/null
+++ b/.github/CODEOWNERS
@@ -0,0 +1,14 @@
+# This file contains the code owners for the dbt-common repo.
+# PRs will be require review from the associated
+# team(s) or person(s) that touches any files that are mapped to them.
+#
+# A statement takes precedence over the statements above it so more general
+# assignments are found at the top with specific assignments being lower in
+# the ordering (i.e. catch all assignment should be the first item)
+#
+# Consult GitHub documentation for formatting guidelines:
+# https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners#example-of-a-codeowners-file
+
+# As a default for areas with no assignment,
+# the core team as a whole will be assigned
+*       @dbt-labs/core-team
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 035240f4..5633c4f2 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -16,6 +16,8 @@ on:
     branches:
       - "main"
   pull_request:
+  merge_group:
+    types: [checks_requested]
   workflow_dispatch:
 
 permissions: read-all
diff --git a/.github/workflows/ci_code_quality.yml b/.github/workflows/ci_code_quality.yml
index b8b33314..cb576640 100644
--- a/.github/workflows/ci_code_quality.yml
+++ b/.github/workflows/ci_code_quality.yml
@@ -16,6 +16,8 @@ on:
     branches:
       - "main"
   pull_request:
+  merge_group:
+    types: [checks_requested]
   workflow_dispatch:
 
 permissions: read-all
diff --git a/.github/workflows/ci_dbt_core_testing.yml b/.github/workflows/ci_dbt_core_testing.yml
new file mode 100644
index 00000000..37255625
--- /dev/null
+++ b/.github/workflows/ci_dbt_core_testing.yml
@@ -0,0 +1,53 @@
+# **what?**
+# Runs all tests in dbt-core with this branch of dbt-common to ensure nothing is broken
+
+# **why?**
+# Ensure dbt-common changes do nto break dbt-core
+
+# **when?**
+# This will run when trying to merge a PR into main.
+# It can also be manually triggered.
+
+##  TODO: This is a stub. It does nothing right now.
+# It will be updated in the future as part of https://github.com/dbt-labs/dbt-common/issues/18
+
+name: Test Against dbt-core
+
+on:
+  merge_group:
+    types: [checks_requested]
+  workflow_dispatch:
+
+permissions: read-all
+
+defaults:
+  run:
+    shell: bash
+
+jobs:
+  test-dbt-core:
+    name: "This does nothing right now - always passes"
+
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+
+    steps:
+      - name: "Check out dbt-core"
+        uses: actions/checkout@v3
+
+      - name: "Set up Python 3.11"
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.11"
+
+      - name: "Update the version of dbt-common"
+        run: |
+            echo "Update the version of dbt-common ref'd in dbt-core"
+
+      - name: "Install dbt-core"
+        run: |
+            echo "Install dbt-core with updated dbt-common ref"
+
+      - name: "Run Tests"
+        run: |
+          echo "Running tests in future versions."
diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml
index 87550594..b09c97c4 100644
--- a/.github/workflows/ci_tests.yml
+++ b/.github/workflows/ci_tests.yml
@@ -16,6 +16,8 @@ on:
     branches:
       - "main"
   pull_request:
+  merge_group:
+    types: [checks_requested]
   workflow_dispatch:
 
 permissions: read-all
@@ -48,7 +50,7 @@ jobs:
       - name: "Set up Python ${{ matrix.python-version }}"
         uses: actions/setup-python@v5
         with:
-          python-version: ${{ matrix.python-version }}
+          python-version: "${{ matrix.python-version }}"
 
       - name: "Install Hatch"
         shell: bash

From 6878a90c2ef313e8e183c315b58f9e6efdfabe0d Mon Sep 17 00:00:00 2001
From: Emily Rockman <ebuschang@gmail.com>
Date: Thu, 25 Jan 2024 09:30:04 -0600
Subject: [PATCH 4/4] Update issue templates (#34)

---
 .github/ISSUE_TEMPLATE/tasklist-template.md | 28 +++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 .github/ISSUE_TEMPLATE/tasklist-template.md

diff --git a/.github/ISSUE_TEMPLATE/tasklist-template.md b/.github/ISSUE_TEMPLATE/tasklist-template.md
new file mode 100644
index 00000000..5b7cffad
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/tasklist-template.md
@@ -0,0 +1,28 @@
+---
+name: TaskList Template
+about: Template for issues created off tasklists
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+## Short description
+
+<!-- Describe the scope of the ticket, a high-level implementation approach and any tradeoffs to consider -->
+
+## Acceptance criteria
+
+<!-- What is the definition of done for this ticket? Include any relevant edge cases and/or test cases -->
+
+## Suggested Tests
+
+<!-- Provide scenarios to test. Link to existing similar tests if appropriate. -->
+
+## Impact to Other Teams
+
+<!-- Will this change impact other teams?  Include details of the kinds of changes required (new tests, code changes, related tickets) and _add the relevant `Impact:[team]` label_. -->
+
+## Context
+
+<!-- Provide the "why", motivation, and alternative approaches considered -- linking to previous refinement issues, spikes, docs as appropriate -->