From aeca4e40bdbbc4447533185b2d2b0587cf95da58 Mon Sep 17 00:00:00 2001
From: Pedro Mezacasa Muller <pedro.mezzacasa@gmail.com>
Date: Fri, 27 Sep 2024 18:21:02 -0300
Subject: [PATCH] Standardize type comments to always have one space (#2698)

---
 CHANGES.md                                    |  1 +
 docs/the_black_code_style/future_style.md     |  3 +
 src/black/comments.py                         | 67 ++++++++++++-------
 src/black/linegen.py                          |  4 +-
 src/black/lines.py                            |  4 +-
 src/black/mode.py                             | 23 +++++--
 src/black/nodes.py                            | 11 +--
 src/black/resources/black.schema.json         |  3 +-
 tests/data/cases/type_comment_syntax.py       | 10 +++
 tests/data/cases/type_comment_syntax_error.py | 11 ---
 .../data/cases/type_comment_syntax_preview.py | 57 ++++++++++++++++
 tests/test_nodes.py                           |  5 +-
 12 files changed, 147 insertions(+), 52 deletions(-)
 create mode 100644 tests/data/cases/type_comment_syntax.py
 delete mode 100644 tests/data/cases/type_comment_syntax_error.py
 create mode 100644 tests/data/cases/type_comment_syntax_preview.py

diff --git a/CHANGES.md b/CHANGES.md
index 97e8645e3ea..49526f4ca09 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -29,6 +29,7 @@
 
 - Fix type annotation spacing between * and more complex type variable tuple (i.e. `def
   fn(*args: *tuple[*Ts, T]) -> None: pass`) (#4440)
+- Standardize type comments to always have one space (#4467)
 
 ### Caching
 
diff --git a/docs/the_black_code_style/future_style.md b/docs/the_black_code_style/future_style.md
index cd4fb12bd51..422de1f64ef 100644
--- a/docs/the_black_code_style/future_style.md
+++ b/docs/the_black_code_style/future_style.md
@@ -38,6 +38,9 @@ Currently, the following features are included in the preview style:
   blocks when the line is too long
 - `pep646_typed_star_arg_type_var_tuple`: fix type annotation spacing between * and more
   complex type variable tuple (i.e. `def fn(*args: *tuple[*Ts, T]) -> None: pass`)
+- `type_comments_standardization`: type comments with zero or more empty spaces between
+  `#` and `type:`, or between `type:` and the type itself will be formatted to
+  `# type: (type itself)`.
 
 (labels/unstable-features)=
 
diff --git a/src/black/comments.py b/src/black/comments.py
index cd37c440290..138d7abd06d 100644
--- a/src/black/comments.py
+++ b/src/black/comments.py
@@ -49,7 +49,7 @@ class ProtoComment:
     leading_whitespace: str  # leading whitespace before the comment, if any
 
 
-def generate_comments(leaf: LN) -> Iterator[Leaf]:
+def generate_comments(leaf: LN, mode: Mode) -> Iterator[Leaf]:
     """Clean the prefix of the `leaf` and generate comments from it, if any.
 
     Comments in lib2to3 are shoved into the whitespace prefix.  This happens
@@ -69,7 +69,9 @@ def generate_comments(leaf: LN) -> Iterator[Leaf]:
     are emitted with a fake STANDALONE_COMMENT token identifier.
     """
     total_consumed = 0
-    for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER):
+    for pc in list_comments(
+        leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER, mode=mode
+    ):
         total_consumed = pc.consumed
         prefix = make_simple_prefix(pc.newlines, pc.form_feed)
         yield Leaf(pc.type, pc.value, prefix=prefix)
@@ -77,7 +79,7 @@ def generate_comments(leaf: LN) -> Iterator[Leaf]:
 
 
 @lru_cache(maxsize=4096)
-def list_comments(prefix: str, *, is_endmarker: bool) -> list[ProtoComment]:
+def list_comments(prefix: str, *, is_endmarker: bool, mode: Mode) -> list[ProtoComment]:
     """Return a list of :class:`ProtoComment` objects parsed from the given `prefix`."""
     result: list[ProtoComment] = []
     if not prefix or "#" not in prefix:
@@ -108,7 +110,7 @@ def list_comments(prefix: str, *, is_endmarker: bool) -> list[ProtoComment]:
             comment_type = token.COMMENT  # simple trailing comment
         else:
             comment_type = STANDALONE_COMMENT
-        comment = make_comment(line)
+        comment = make_comment(line, mode)
         result.append(
             ProtoComment(
                 type=comment_type,
@@ -139,7 +141,7 @@ def normalize_trailing_prefix(leaf: LN, total_consumed: int) -> None:
     leaf.prefix = ""
 
 
-def make_comment(content: str) -> str:
+def make_comment(content: str, mode: Mode) -> str:
     """Return a consistently formatted comment from the given `content` string.
 
     All comments (except for "##", "#!", "#:", '#'") should have a single
@@ -153,13 +155,30 @@ def make_comment(content: str) -> str:
 
     if content[0] == "#":
         content = content[1:]
+
     NON_BREAKING_SPACE = " "
-    if (
-        content
-        and content[0] == NON_BREAKING_SPACE
-        and not content.lstrip().startswith("type:")
-    ):
+
+    is_type_comment = (
+        re.match(r"^\s*type:", content)
+        if Preview.type_comments_standardization in mode
+        else content.lstrip().startswith("type:")
+    )
+
+    if content and content[0] == NON_BREAKING_SPACE and not is_type_comment:
         content = " " + content[1:]  # Replace NBSP by a simple space
+    elif (
+        Preview.type_comments_standardization in mode
+        and is_type_comment
+        and re.match(
+            r"^\s*type:(?!\s*ignore\b)", content
+        )  # Check if it is type: ignore
+        and NON_BREAKING_SPACE not in content
+    ):
+        content = content.strip()
+        parts = content.split(":")
+        key = parts[0].strip()  # Remove extra spaces around "type"
+        value = parts[1].strip()  # Remove extra spaces around the value part
+        content = f" {key}: {value}"
     if content and content[0] not in COMMENT_EXCEPTIONS:
         content = " " + content
     return "#" + content
@@ -183,7 +202,7 @@ def convert_one_fmt_off_pair(
     """
     for leaf in node.leaves():
         previous_consumed = 0
-        for comment in list_comments(leaf.prefix, is_endmarker=False):
+        for comment in list_comments(leaf.prefix, is_endmarker=False, mode=mode):
             is_fmt_off = comment.value in FMT_OFF
             is_fmt_skip = _contains_fmt_skip_comment(comment.value, mode)
             if (not is_fmt_off and not is_fmt_skip) or (
@@ -273,17 +292,17 @@ def generate_ignored_nodes(
     Stops at the end of the block.
     """
     if _contains_fmt_skip_comment(comment.value, mode):
-        yield from _generate_ignored_nodes_from_fmt_skip(leaf, comment)
+        yield from _generate_ignored_nodes_from_fmt_skip(leaf, comment, mode=mode)
         return
     container: Optional[LN] = container_of(leaf)
     while container is not None and container.type != token.ENDMARKER:
-        if is_fmt_on(container):
+        if is_fmt_on(container, mode=mode):
             return
 
         # fix for fmt: on in children
-        if children_contains_fmt_on(container):
+        if children_contains_fmt_on(container, mode=mode):
             for index, child in enumerate(container.children):
-                if isinstance(child, Leaf) and is_fmt_on(child):
+                if isinstance(child, Leaf) and is_fmt_on(child, mode=mode):
                     if child.type in CLOSING_BRACKETS:
                         # This means `# fmt: on` is placed at a different bracket level
                         # than `# fmt: off`. This is an invalid use, but as a courtesy,
@@ -294,12 +313,14 @@ def generate_ignored_nodes(
                 if (
                     child.type == token.INDENT
                     and index < len(container.children) - 1
-                    and children_contains_fmt_on(container.children[index + 1])
+                    and children_contains_fmt_on(
+                        container.children[index + 1], mode=mode
+                    )
                 ):
                     # This means `# fmt: on` is placed right after an indentation
                     # level, and we shouldn't swallow the previous INDENT token.
                     return
-                if children_contains_fmt_on(child):
+                if children_contains_fmt_on(child, mode=mode):
                     return
                 yield child
         else:
@@ -312,14 +333,14 @@ def generate_ignored_nodes(
 
 
 def _generate_ignored_nodes_from_fmt_skip(
-    leaf: Leaf, comment: ProtoComment
+    leaf: Leaf, comment: ProtoComment, mode: Mode
 ) -> Iterator[LN]:
     """Generate all leaves that should be ignored by the `# fmt: skip` from `leaf`."""
     prev_sibling = leaf.prev_sibling
     parent = leaf.parent
     # Need to properly format the leaf prefix to compare it to comment.value,
     # which is also formatted
-    comments = list_comments(leaf.prefix, is_endmarker=False)
+    comments = list_comments(leaf.prefix, is_endmarker=False, mode=mode)
     if not comments or comment.value != comments[0].value:
         return
     if prev_sibling is not None:
@@ -353,12 +374,12 @@ def _generate_ignored_nodes_from_fmt_skip(
         yield from iter(ignored_nodes)
 
 
-def is_fmt_on(container: LN) -> bool:
+def is_fmt_on(container: LN, mode: Mode) -> bool:
     """Determine whether formatting is switched on within a container.
     Determined by whether the last `# fmt:` comment is `on` or `off`.
     """
     fmt_on = False
-    for comment in list_comments(container.prefix, is_endmarker=False):
+    for comment in list_comments(container.prefix, is_endmarker=False, mode=mode):
         if comment.value in FMT_ON:
             fmt_on = True
         elif comment.value in FMT_OFF:
@@ -366,11 +387,11 @@ def is_fmt_on(container: LN) -> bool:
     return fmt_on
 
 
-def children_contains_fmt_on(container: LN) -> bool:
+def children_contains_fmt_on(container: LN, mode: Mode) -> bool:
     """Determine if children have formatting switched on."""
     for child in container.children:
         leaf = first_leaf_of(child)
-        if leaf is not None and is_fmt_on(leaf):
+        if leaf is not None and is_fmt_on(leaf, mode=mode):
             return True
 
     return False
diff --git a/src/black/linegen.py b/src/black/linegen.py
index 107fa69d052..533959882e1 100644
--- a/src/black/linegen.py
+++ b/src/black/linegen.py
@@ -135,7 +135,7 @@ def visit_default(self, node: LN) -> Iterator[Line]:
         """Default `visit_*()` implementation. Recurses to children of `node`."""
         if isinstance(node, Leaf):
             any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
-            for comment in generate_comments(node):
+            for comment in generate_comments(node, mode=self.mode):
                 if any_open_brackets:
                     # any comment within brackets is subject to splitting
                     self.current_line.append(comment)
@@ -1352,7 +1352,7 @@ def normalize_invisible_parens(  # noqa: C901
     Standardizes on visible parentheses for single-element tuples, and keeps
     existing visible parentheses for other tuples and generator expressions.
     """
-    for pc in list_comments(node.prefix, is_endmarker=False):
+    for pc in list_comments(node.prefix, is_endmarker=False, mode=mode):
         if pc.value in FMT_OFF:
             # This `node` has a prefix with `# fmt: off`, don't mess with parens.
             return
diff --git a/src/black/lines.py b/src/black/lines.py
index a8c6ef66f68..67939e1afcf 100644
--- a/src/black/lines.py
+++ b/src/black/lines.py
@@ -287,7 +287,7 @@ def contains_uncollapsable_type_comments(self) -> bool:
         comment_seen = False
         for leaf_id, comments in self.comments.items():
             for comment in comments:
-                if is_type_comment(comment):
+                if is_type_comment(comment, mode=self.mode):
                     if comment_seen or (
                         not is_type_ignore_comment(comment)
                         and leaf_id not in ignored_ids
@@ -401,7 +401,7 @@ def append_comment(self, comment: Leaf) -> bool:
             and not last_leaf.value
             and last_leaf.parent
             and len(list(last_leaf.parent.leaves())) <= 3
-            and not is_type_comment(comment)
+            and not is_type_comment(comment, self.mode)
         ):
             # Comments on an optional parens wrapping a single leaf should belong to
             # the wrapped node except if it's a type comment. Pinning the comment like
diff --git a/src/black/mode.py b/src/black/mode.py
index 02fe1de24db..329124e70a6 100644
--- a/src/black/mode.py
+++ b/src/black/mode.py
@@ -211,6 +211,7 @@ class Preview(Enum):
     remove_redundant_guard_parens = auto()
     parens_for_long_if_clauses_in_case_block = auto()
     pep646_typed_star_arg_type_var_tuple = auto()
+    type_comments_standardization = auto()
 
 
 UNSTABLE_FEATURES: set[Preview] = {
@@ -247,13 +248,6 @@ class Mode:
     enabled_features: set[Preview] = field(default_factory=set)
 
     def __contains__(self, feature: Preview) -> bool:
-        """
-        Provide `Preview.FEATURE in Mode` syntax that mirrors the ``preview`` flag.
-
-        In unstable mode, all features are enabled. In preview mode, all features
-        except those in UNSTABLE_FEATURES are enabled. Any features in
-        `self.enabled_features` are also enabled.
-        """
         if self.unstable:
             return True
         if feature in self.enabled_features:
@@ -294,3 +288,18 @@ def get_cache_key(self) -> str:
             features_and_magics,
         ]
         return ".".join(parts)
+
+    def __hash__(self) -> int:
+        return hash((
+            frozenset(self.target_versions),
+            self.line_length,
+            self.string_normalization,
+            self.is_pyi,
+            self.is_ipynb,
+            self.skip_source_first_line,
+            self.magic_trailing_comma,
+            frozenset(self.python_cell_magics),
+            self.preview,
+            self.unstable,
+            frozenset(self.enabled_features),
+        ))
diff --git a/src/black/nodes.py b/src/black/nodes.py
index 28598a5d5fd..ad1b9499437 100644
--- a/src/black/nodes.py
+++ b/src/black/nodes.py
@@ -905,16 +905,19 @@ def is_async_stmt_or_funcdef(leaf: Leaf) -> bool:
     )
 
 
-def is_type_comment(leaf: Leaf) -> bool:
+def is_type_comment(leaf: Leaf, mode: Mode) -> bool:
     """Return True if the given leaf is a type comment. This function should only
     be used for general type comments (excluding ignore annotations, which should
     use `is_type_ignore_comment`). Note that general type comments are no longer
     used in modern version of Python, this function may be deprecated in the future."""
     t = leaf.type
     v = leaf.value
-    return (
-        t in {token.COMMENT, STANDALONE_COMMENT}
-        and bool(re.match(r"#\s*type:", v))
+
+    type_comment_with_extra_spaces = bool(re.match(r"#\s*type:", v))
+
+    return t in {token.COMMENT, STANDALONE_COMMENT} and (
+        v.startswith("# type:")
+        or (Preview.type_comments_standardization and type_comment_with_extra_spaces)
     )
 
 
diff --git a/src/black/resources/black.schema.json b/src/black/resources/black.schema.json
index a536d543fed..339929eb830 100644
--- a/src/black/resources/black.schema.json
+++ b/src/black/resources/black.schema.json
@@ -91,7 +91,8 @@
           "docstring_check_for_newline",
           "remove_redundant_guard_parens",
           "parens_for_long_if_clauses_in_case_block",
-          "pep646_typed_star_arg_type_var_tuple"
+          "pep646_typed_star_arg_type_var_tuple",
+          "type_comments_standardization"
         ]
       },
       "description": "Enable specific features included in the `--unstable` style. Requires `--preview`. No compatibility guarantees are provided on the behavior or existence of any unstable features."
diff --git a/tests/data/cases/type_comment_syntax.py b/tests/data/cases/type_comment_syntax.py
new file mode 100644
index 00000000000..bb2da3c0912
--- /dev/null
+++ b/tests/data/cases/type_comment_syntax.py
@@ -0,0 +1,10 @@
+def f(
+    a,  # type: int
+):
+    pass
+
+
+# test type comments
+def f(a, b, c, d, e, f, g, h, i):
+    # type: (int, int, int, int, int, int, int, int, int) -> None
+    pass
\ No newline at end of file
diff --git a/tests/data/cases/type_comment_syntax_error.py b/tests/data/cases/type_comment_syntax_error.py
deleted file mode 100644
index 2e5ca2ede8c..00000000000
--- a/tests/data/cases/type_comment_syntax_error.py
+++ /dev/null
@@ -1,11 +0,0 @@
-def foo(
-    # type: Foo
-    x): pass
-
-# output
-
-def foo(
-    # type: Foo
-    x,
-):
-    pass
diff --git a/tests/data/cases/type_comment_syntax_preview.py b/tests/data/cases/type_comment_syntax_preview.py
new file mode 100644
index 00000000000..1103f0b669a
--- /dev/null
+++ b/tests/data/cases/type_comment_syntax_preview.py
@@ -0,0 +1,57 @@
+# flags: --preview
+
+
+def f(
+    a,  # type: int
+):
+    pass
+
+
+# test type comments
+def f(a, b, c, d, e, f, g, h, i):
+    # type: (int, int, int, int, int, int, int, int, int) -> None
+    pass
+
+
+def f(
+    a,  #  type :   int
+    b,  #    type :  int
+    c,  #type :   int
+    d,  #   type:   int
+    e,  #              type: int    
+    f,  #  type :  int  
+    g,  #type:int
+    h,  # type: int
+    i,  # type: int
+):
+    # type: (...) -> None
+    pass
+
+
+
+# output
+def f(
+    a,  # type: int
+):
+    pass
+
+
+# test type comments
+def f(a, b, c, d, e, f, g, h, i):
+    # type: (int, int, int, int, int, int, int, int, int) -> None
+    pass
+
+
+def f(
+    a,  #  type :   int
+    b,  #    type :  int
+    c,  # type :   int
+    d,  # type: int
+    e,  # type: int
+    f,  #  type :  int
+    g,  # type: int
+    h,  # type: int
+    i,  # type: int
+):
+    # type: (...) -> None
+    pass
diff --git a/tests/test_nodes.py b/tests/test_nodes.py
index a206f6947ae..c6df6384e5f 100644
--- a/tests/test_nodes.py
+++ b/tests/test_nodes.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-from black import Leaf, token
+from black import Leaf, Mode, token
 from black.nodes import is_type_comment
 
 
@@ -25,4 +25,5 @@
 )
 def test_is_type_comment(comment_text: str, expected: bool) -> None:
     leaf = Leaf(token.COMMENT, comment_text)
-    assert is_type_comment(leaf) == expected
+    mode = Mode(preview=True)
+    assert is_type_comment(leaf=leaf, mode=mode) == expected