snakemake · mbhall88 · Jan 31, 2024 · Jan 29, 2024 · Jan 29, 2024 · Jan 31, 2024
@@ -16,7 +16,7 @@ snakefmt = 'snakefmt.snakefmt:main'
 [tool.poetry.dependencies]
 python = "^3.8.1"
 click = "^8.0.0"
-black = "^23.12.1"
+black = "^24.1.1"
 toml = "^0.10.2"
 importlib_metadata = {version = ">=1.7.0,<5.0", python = "<3.8"}
 

@@ -6,6 +6,8 @@
 (in dist-info or egg-info dirs).
 From Python 3.8, importlib_metadata is in standard library as importlib.metadata.
 """
+from black import TargetVersion
+
 if sys.version_info >= (3, 8):
     from importlib import metadata
 else:
@@ -14,3 +16,10 @@
 __version__ = metadata.version("snakefmt")
 
 DEFAULT_LINE_LENGTH = 88
+DEFAULT_TARGET_VERSIONS = {
+    TargetVersion.PY38,
+    TargetVersion.PY39,
+    TargetVersion.PY310,
+    TargetVersion.PY311,
+    TargetVersion.PY312,
+}
@@ -10,7 +10,7 @@
 import toml
 from black import Mode, find_project_root
 
-from snakefmt import DEFAULT_LINE_LENGTH
+from snakefmt import DEFAULT_LINE_LENGTH, DEFAULT_TARGET_VERSIONS
 from snakefmt.exceptions import MalformattedToml
 
 PathLike = Union[Path, str]
@@ -57,7 +57,9 @@ def inject_snakefmt_config(
 
 def read_black_config(path: Optional[PathLike]) -> Mode:
     """Parse Black configuration from provided toml."""
-    black_mode = Mode(line_length=DEFAULT_LINE_LENGTH)
+    black_mode = Mode(
+        line_length=DEFAULT_LINE_LENGTH, target_versions=DEFAULT_TARGET_VERSIONS
+    )
     if path is None:
         return black_mode
     if not Path(path).is_file():

@@ -26,6 +26,10 @@
 full_string_matcher = re.compile(
     r"^\s*(\w?([\"']{3}.*?[\"']{3})|([\"']{1}.*?[\"']{1}))$", re.DOTALL | re.MULTILINE
 )
+# this regex matches any docstring; can span multiple lines
+docstring_matcher = re.compile(
+    r"\s*([rR]?[\"']{3}.*?[\"']{3})", re.DOTALL | re.MULTILINE
+)
 contextual_matcher = re.compile(
     r"(.*)^(if|elif|else|with|for|while)([^:]*)(:.*)", re.S | re.M
 )
@@ -41,6 +45,17 @@ def is_all_comments(string):
     )
 
 
+def index_of_first_docstring(s: str) -> Optional[int]:
+    """
+    Returns the index (i.e., index of last quote character) of the first docstring in
+    a string, or None if there are no docstrings.
+    """
+    match = docstring_matcher.search(s)
+    if match is None:
+        return None
+    return match.end(1) - 1
+
+
 class Formatter(Parser):
     def __init__(
         self,
@@ -296,9 +311,34 @@ def format_param(
         if param_list:
             val = f"f({val})"
             extra_spacing = 3
+
+        # get the index of the last character of the first docstring, if any
+        docstring_index = index_of_first_docstring(val)
+        docstring_line_index = None
+        if docstring_index is not None:
+            docstring_line_index = val[:docstring_index].count("\n")
+        lines = val.splitlines()
+        if docstring_line_index is not None and docstring_line_index + 1 < len(lines):
+            docstring_has_extra_newline_after = (
+                lines[docstring_line_index + 1].strip() == ""
+            )
+        else:
+            docstring_has_extra_newline_after = False
+
         val = self.run_black_format_str(
             val, target_indent, extra_spacing, no_nesting=True
         )
+
+        # remove newline added after first docstring (black>=24.1)
+        if docstring_line_index is not None and not docstring_has_extra_newline_after:
+            lines = val.splitlines()
+            if docstring_line_index + 1 < len(lines):
+                line_after_docstring = lines[docstring_line_index + 1]
+                if line_after_docstring.strip() == "":
+                    # delete the newline
+                    lines.pop(docstring_line_index + 1)
+                    val = "\n".join(lines)
+
         if param_list:
             match_equal = re.match(r"f\((.*)\)", val, re.DOTALL)
             val = match_equal.group(1)

@@ -10,6 +10,7 @@
     Vocabulary,
     add_token_space,
     is_newline,
+    re_add_curly_bracket_if_needed,
 )
 from snakefmt.types import TAB, Token, TokenIterator, col_nb
 
@@ -324,3 +325,4 @@ def get_next_queriable(self, snakefile: TokenIterator) -> Status:
             if not pythonable and token.type != tokenize.COMMENT:
                 pythonable = True
             buffer += token.string
+            buffer += re_add_curly_bracket_if_needed(token)
@@ -1,6 +1,7 @@
 """
 Code in charge of parsing and validating Snakemake syntax
 """
+
 import sys
 import tokenize
 from abc import ABC, abstractmethod
@@ -43,6 +44,20 @@
     spacing_triggers[tokenize.OP].add(tokenize.FSTRING_START)
 
 
+def re_add_curly_bracket_if_needed(token: Token) -> str:
+    result = ""
+    if (
+        token is not None
+        and sys.version_info >= (3, 12)
+        and token.type == tokenize.FSTRING_MIDDLE
+    ):
+        if token.string.endswith("}"):
+            result = "}"
+        elif token.string.endswith("{"):
+            result = "{"
+    return result
+
+
 def operator_skip_spacing(prev_token: Token, token: Token) -> bool:
     if prev_token.type != tokenize.OP and token.type != tokenize.OP:
         return False
@@ -332,15 +347,7 @@ def parse_params(self, snakefile: TokenIterator):
         prev_token = None
         while True:
             cur_param = self.process_token(cur_param, prev_token)
-            if (
-                self.token is not None
-                and sys.version_info >= (3, 12)
-                and self.token.type == tokenize.FSTRING_MIDDLE
-            ):
-                if self.token.string.endswith("}"):
-                    cur_param.value += "}"
-                elif self.token.string.endswith("{"):
-                    cur_param.value += "{"
+            cur_param.value += re_add_curly_bracket_if_needed(self.token)
             try:
                 prev_token = self.token
                 self.token = next(snakefile)

@@ -288,7 +288,9 @@ def main(
 
     if check:
         if files_unchanged == len(files_to_format):
-            logger.info(f"All {len(files_to_format)} file(s) would be left unchanged 🎉")
+            logger.info(
+                f"All {len(files_to_format)} file(s) would be left unchanged 🎉"
+            )
             ctx.exit(ExitCode.NO_CHANGE.value)
         elif files_with_errors > 0:
             exit_value = ExitCode.ERROR.value

@@ -5,7 +5,7 @@
 import click
 import pytest
 
-from snakefmt import DEFAULT_LINE_LENGTH
+from snakefmt import DEFAULT_LINE_LENGTH, DEFAULT_TARGET_VERSIONS
 from snakefmt.config import (
     find_pyproject_toml,
     inject_snakefmt_config,
@@ -191,7 +191,9 @@ def test_empty_config_default_line_length_used(self, tmp_path):
         formatter = setup_formatter("")
         path = tmp_path / "config.toml"
         path.touch()
-        expected = black.FileMode(line_length=DEFAULT_LINE_LENGTH)
+        expected = black.FileMode(
+            line_length=DEFAULT_LINE_LENGTH, target_versions=DEFAULT_TARGET_VERSIONS
+        )
         assert formatter.black_mode == expected
 
     def test_read_black_config_settings(self, tmp_path):
@@ -200,7 +202,9 @@ def test_read_black_config_settings(self, tmp_path):
         path.write_text(f"[tool.black]\nline_length = {black_line_length}")
 
         actual = read_black_config(path)
-        expected = black.FileMode(line_length=black_line_length)
+        expected = black.FileMode(
+            line_length=black_line_length, target_versions=DEFAULT_TARGET_VERSIONS
+        )
 
         assert actual == expected
 
@@ -213,14 +217,18 @@ def test_snakefmt_line_length_overrides_black(self, tmp_path):
         # show black gets parsed
         formatter = setup_formatter("", black_config_file=str(path))
 
-        expected = black.FileMode(line_length=black_line_length)
+        expected = black.FileMode(
+            line_length=black_line_length, target_versions=DEFAULT_TARGET_VERSIONS
+        )
         assert formatter.black_mode == expected
 
         # Now, add overriding snakefmt line length
         formatter = setup_formatter(
             "", line_length=snakefmt_line_length, black_config_file=str(path)
         )
-        expected = black.FileMode(line_length=snakefmt_line_length)
+        expected = black.FileMode(
+            line_length=snakefmt_line_length, target_versions=DEFAULT_TARGET_VERSIONS
+        )
         assert formatter.black_mode == expected
 
     def test_unrecognised_black_options_in_config_ignored_and_default_line_length_used(
@@ -232,7 +240,9 @@ def test_unrecognised_black_options_in_config_ignored_and_default_line_length_us
 
         read_black_config(path)
         actual = formatter.black_mode
-        expected = black.FileMode(line_length=DEFAULT_LINE_LENGTH)
+        expected = black.FileMode(
+            line_length=DEFAULT_LINE_LENGTH, target_versions=DEFAULT_TARGET_VERSIONS
+        )
 
         assert actual == expected
 
@@ -253,7 +263,9 @@ def test_skip_string_normalisation_handled_with_snakecase(self, tmp_path):
         read_black_config(path)
         actual = formatter.black_mode
         expected = black.FileMode(
-            line_length=DEFAULT_LINE_LENGTH, string_normalization=True
+            line_length=DEFAULT_LINE_LENGTH,
+            string_normalization=True,
+            target_versions=DEFAULT_TARGET_VERSIONS,
         )
 
         assert actual == expected
@@ -266,7 +278,9 @@ def test_skip_string_normalisation_handled_with_kebabcase(self, tmp_path):
         read_black_config(path)
         actual = formatter.black_mode
         expected = black.FileMode(
-            line_length=DEFAULT_LINE_LENGTH, string_normalization=True
+            line_length=DEFAULT_LINE_LENGTH,
+            string_normalization=True,
+            target_versions=DEFAULT_TARGET_VERSIONS,
         )
 
         assert actual == expected
@@ -279,5 +293,9 @@ def test_string_normalisation_handled(self, tmp_path):
             "", line_length=line_length, black_config_file=str(path)
         )
 
-        expected = black.FileMode(line_length=line_length, string_normalization=False)
+        expected = black.FileMode(
+            line_length=line_length,
+            string_normalization=False,
+            target_versions=DEFAULT_TARGET_VERSIONS,
+        )
         assert formatter.black_mode == expected
@@ -3,6 +3,7 @@
 The tests implicitly assume that the input syntax is correct ie that no parsing-related
 errors arise, as tested in test_parser.py.
 """
+
 from io import StringIO
 from unittest import mock
 
@@ -787,7 +788,6 @@ def test_single_quoted_multiline_string_proper_tabbing(self):
         assert formatter.get_formatted() == expected
 
     def test_docstrings_get_retabbed_for_snakecode_only(self):
-        """Black only retabs the first tpq in a docstring."""
         snakecode = '''def f():
   """Does not do
   much
@@ -804,7 +804,8 @@ def test_docstrings_get_retabbed_for_snakecode_only(self):
         formatter = setup_formatter(snakecode)
         expected = f'''def f():
 {TAB * 1}"""Does not do
-    much"""
+{TAB * 1}much
+{TAB * 1}"""
 {TAB * 1}pass
 
 
@@ -858,6 +859,19 @@ def test_f_string_with_double_braces_in_input(self):
         formatter = setup_formatter(snakecode)
         assert formatter.get_formatted() == snakecode
 
+    def test_f_string_with_double_braces_in_python_code(self):
+        """https://github.com/snakemake/snakefmt/issues/215"""
+        """def get_test_regions(wildcards):
+    benchmark = config["variant-calls"][wildcards.callset]["benchmark"]
+    return f"resources/regions/{benchmark}/test-regions.cov-{{cov}}.bed"""
+        snakecode = (
+            "def get_test_regions(wildcards):\n"
+            f'{TAB * 1}benchmark = config["variant-calls"][wildcards.callset]["benchmark"]\n'  # noqa: E501
+            f'{TAB * 1}return f"resources/regions/{{benchmark}}/test-regions.cov-{{{{cov}}}}.bed"\n'  # noqa: E501
+        )
+        formatter = setup_formatter(snakecode)
+        assert formatter.get_formatted() == snakecode
+
 
 class TestReformatting_SMK_BREAK:
     """
@@ -1382,7 +1396,7 @@ def test_wrap_line_in_run_directive(self):
 
     def test_shell_indention_long_line(self):
         """https://github.com/snakemake/snakefmt/issues/186
-        test this rule
+        # test this rule:
         rule test1:
             input:
                 "...",
@@ -1393,9 +1407,11 @@ def test_shell_indention_long_line(self):
                     "param1",
                     [
                         "item1",
-                        f"very_long_item2_{very_long_function(other_param)}"
-                        if some_very_long_condition
-                        else "",
+                        (
+                            f"very_long_item2_{{very_long_function(other_param)}}"
+                            if some_very_long_condition
+                            else "",
+                        )
                     ],
                 )
 
@@ -1411,11 +1427,14 @@ def test_shell_indention_long_line(self):
             f'{TAB * 3}"param1",\n'
             f"{TAB * 3}[\n"
             f'{TAB * 4}"item1",\n'
-            f'{TAB * 4}f"very_long_item2_{{very_long_function(other_param)}}"\n'
-            f"{TAB * 4}if some_very_long_condition\n"
-            f'{TAB * 4}else "",\n'
+            f"{TAB * 4}(\n"
+            f'{TAB * 5}f"very_long_item2_{{very_long_function(other_param)}}"\n'
+            f"{TAB * 5}if some_very_long_condition\n"
+            f'{TAB * 5}else ""\n'
+            f"{TAB * 4}),\n"
             f"{TAB * 3}],\n"
             f"{TAB * 2})\n"
         )
         formatter = setup_formatter(snakecode)
+
         assert formatter.get_formatted() == snakecode
@@ -2,6 +2,7 @@
 Completeness tests: checks that the grammar used is a bijection of the snakemake grammar
     To use the latest snakemake grammar, run `poetry update snakemake` from this repo
 """
+
 from snakemake import parser
 
 from snakefmt.parser import grammar

@@ -2,6 +2,7 @@
 
 Examples where we raise errors but snakemake does not are listed as 'SMK_NOBREAK'
 """
+
 from io import StringIO
 
 import pytest