fix: f-string spacing [closes snakemake#220 and snakemake#227]

bricoletc · May 6, 2024 · 136ff8c · 136ff8c
1 parent cce47fe
commit 136ff8c
Show file tree

Hide file tree

Showing 4 changed files with 82 additions and 19 deletions.
diff --git a/snakefmt/__init__.py b/snakefmt/__init__.py
@@ -15,6 +15,9 @@
 
 __version__ = metadata.version("snakefmt")
 
+# New f-string tokenizing was introduced in python 3.12 - we have to deal with it, too.
+fstring_tokeniser_in_use = sys.version_info >= (3, 12)
+
 DEFAULT_LINE_LENGTH = 88
 DEFAULT_TARGET_VERSIONS = {
     TargetVersion.PY38,

diff --git a/snakefmt/parser/parser.py b/snakefmt/parser/parser.py
@@ -9,6 +9,7 @@
     ParameterSyntax,
     Vocabulary,
     add_token_space,
+    fstring_processing,
     is_newline,
     re_add_curly_bracket_if_needed,
 )
@@ -85,6 +86,7 @@ def __init__(self, snakefile: TokenIterator):
         self.last_block_was_snakecode = False
         self.block_indent = 0
         self.queriable = True
+        self.in_fstring = False
 
         status = self.get_next_queriable(self.snakefile)
         self.buffer = status.buffer
@@ -277,6 +279,7 @@ def get_next_queriable(self, snakefile: TokenIterator) -> Status:
         prev_token: Optional[Token] = Token(tokenize.NAME)
         while True:
             token = next(snakefile)
+            self.in_fstring = fstring_processing(token, prev_token, self.in_fstring)
             if block_indent == -1 and not_a_comment_related_token(token):
                 block_indent = self.cur_indent
             if token.type == tokenize.INDENT:
@@ -317,7 +320,7 @@ def get_next_queriable(self, snakefile: TokenIterator) -> Status:
                     token, block_indent, self.cur_indent, buffer, False, pythonable
                 )
 
-            if add_token_space(prev_token, token):
+            if add_token_space(prev_token, token, self.in_fstring):
                 buffer += " "
             prev_token = token
             if newline:

diff --git a/snakefmt/parser/syntax.py b/snakefmt/parser/syntax.py
@@ -2,11 +2,12 @@
 Code in charge of parsing and validating Snakemake syntax
 """
 
-import sys
 import tokenize
 from abc import ABC, abstractmethod
 from re import match as re_match
+from typing import Optional
 
+from snakefmt import fstring_tokeniser_in_use
 from snakefmt.exceptions import (
     ColonError,
     EmptyContextError,
@@ -38,17 +39,33 @@
     tokenize.NUMBER: {tokenize.NAME, tokenize.OP},
     tokenize.OP: {tokenize.NAME, tokenize.STRING, tokenize.NUMBER, tokenize.OP},
 }
-# add fstring start to spacing_triggers if python 3.12 or higher
-if hasattr(tokenize, "FSTRING_START"):
+
+if fstring_tokeniser_in_use:
     spacing_triggers[tokenize.NAME].add(tokenize.FSTRING_START)
     spacing_triggers[tokenize.OP].add(tokenize.FSTRING_START)
+    # A more compact spacing syntax than the above.
+    fstring_spacing_triggers = {
+        tokenize.NAME: {
+            tokenize.NAME,
+            tokenize.STRING,
+            tokenize.NUMBER,
+            tokenize.FSTRING_START,
+        },
+        tokenize.STRING: {tokenize.NAME, tokenize.OP},
+        tokenize.NUMBER: {tokenize.NAME},
+        tokenize.OP: {
+            tokenize.NAME,
+            tokenize.STRING,
+            tokenize.FSTRING_START,
+        },
+    }
 
 
 def re_add_curly_bracket_if_needed(token: Token) -> str:
     result = ""
     if (
-        token is not None
-        and sys.version_info >= (3, 12)
+        fstring_tokeniser_in_use
+        and token is not None
         and token.type == tokenize.FSTRING_MIDDLE
     ):
         if token.string.endswith("}"):
@@ -58,6 +75,22 @@ def re_add_curly_bracket_if_needed(token: Token) -> str:
     return result
 
 
+def fstring_processing(
+    token: Token, prev_token: Optional[Token], in_fstring: bool
+) -> bool:
+    """
+    Returns True if we are entering, or have already entered and not exited,
+    an f-string.
+    """
+    result = False
+    if fstring_tokeniser_in_use:
+        if prev_token is not None and prev_token.type == tokenize.FSTRING_START:
+            result = True
+        elif token.type != tokenize.FSTRING_END and in_fstring:
+            result = True
+    return result
+
+
 def operator_skip_spacing(prev_token: Token, token: Token) -> bool:
     if prev_token.type != tokenize.OP and token.type != tokenize.OP:
         return False
@@ -72,17 +105,18 @@ def operator_skip_spacing(prev_token: Token, token: Token) -> bool:
         return True
     elif prev_token.type == tokenize.STRING and token.string == ",":
         return True
-    elif prev_token.string == "}" and token.string == "{":  # issue #220
-        return True
     else:
         return False
 
 
-def add_token_space(prev_token: Token, token: Token) -> bool:
+def add_token_space(prev_token: Token, token: Token, in_fstring: bool = False) -> bool:
     result = False
-    if prev_token is not None and prev_token.type in spacing_triggers:
+    if prev_token is not None:
         if not operator_skip_spacing(prev_token, token):
-            if token.type in spacing_triggers[prev_token.type]:
+            if not in_fstring:
+                if token.type in spacing_triggers.get(prev_token.type, {}):
+                    result = True
+            elif token.type in fstring_spacing_triggers.get(prev_token.type, {}):
                 result = True
     return result
 
@@ -150,8 +184,8 @@ def has_a_key(self) -> bool:
     def has_value(self) -> bool:
         return len(self.value) > 0
 
-    def add_elem(self, prev_token: Token, token: Token):
-        if add_token_space(prev_token, token) and len(self.value) > 0:
+    def add_elem(self, prev_token: Token, token: Token, in_fstring: bool = False):
+        if add_token_space(prev_token, token, in_fstring) and len(self.value) > 0:
             self.value += " "
 
         if self.is_empty():
@@ -324,6 +358,7 @@ def __init__(
         self.eof = False
         self.incident_vocab = incident_vocab
         self._brackets = list()
+        self.in_fstring = False
         self.in_lambda = False
         self.found_newline = False
 
@@ -380,6 +415,12 @@ def check_exit(self, cur_param: Parameter):
 
     def process_token(self, cur_param: Parameter, prev_token: Token) -> Parameter:
         token_type = self.token.type
+        # f-string treatment (since python 3.12)
+        self.in_fstring = fstring_processing(self.token, prev_token, self.in_fstring)
+        if self.in_fstring:
+            cur_param.add_elem(prev_token, self.token, self.in_fstring)
+            return cur_param
+
         # Eager treatment of comments: tag them onto params
         if token_type == tokenize.COMMENT and not self.in_brackets:
             cur_param.add_comment(self.token.string, self.keyword_indent)

diff --git a/tests/test_formatter.py b/tests/test_formatter.py
@@ -394,9 +394,9 @@ def test_decorator_is_handled_correctly(self):
         actual = formatter.get_formatted()
         assert actual == snakecode
 
-    def test_f_strings(self):
+    def test_fstrings(self):
         """This is relevant for python3.12"""
-        snakecode = 'a = f"{1 + 2}" if 1 > 0 else f"{1 - 2}"\n'
+        snakecode = 'a = f"{1+2}" if 1 > 0 else f"{1-2}"\n'
         formatter = setup_formatter(snakecode)
 
         actual = formatter.get_formatted()
@@ -686,7 +686,7 @@ def test_keyword_with_tpq_inside_expression_left_alone(self):
         formatter = setup_formatter(snakecode)
         assert formatter.get_formatted() == snakecode
 
-    def test_rf_string_tpq_supported(self):
+    def test_r_and_fstring_tpq_supported(self):
         """Deliberately tests for consecutive r/f strings and with
         single or double quotes"""
         for preceding in {"r", "f"}:
@@ -846,7 +846,7 @@ def test_tpq_inside_run_block(self):
 
         assert formatter.get_formatted() == snakecode
 
-    def test_f_string_with_double_braces_in_input(self):
+    def test_fstring_with_double_braces_in_input(self):
         """https://github.com/snakemake/snakefmt/issues/207"""
         snakecode = (
             "rule align:\n"
@@ -859,7 +859,7 @@ def test_f_string_with_double_braces_in_input(self):
         formatter = setup_formatter(snakecode)
         assert formatter.get_formatted() == snakecode
 
-    def test_f_string_with_double_braces_in_python_code(self):
+    def test_fstring_with_double_braces_in_python_code(self):
         """https://github.com/snakemake/snakefmt/issues/215"""
         snakecode = (
             "def get_test_regions(wildcards):\n"
@@ -869,12 +869,28 @@ def test_f_string_with_double_braces_in_python_code(self):
         formatter = setup_formatter(snakecode)
         assert formatter.get_formatted() == snakecode
 
-    def test_f_string_spacing_of_consecutive_braces(self):
+    def test_fstring_spacing_of_consecutive_braces(self):
         """https://github.com/snakemake/snakefmt/issues/222"""
         snakecode = 'f"{var1}{var2}"\n'
         formatter = setup_formatter(snakecode)
         assert formatter.get_formatted() == snakecode
 
+    def test_fstring_with_equal_sign_inside_function_call(self):
+        """https://github.com/snakemake/snakefmt/issues/220"""
+        snakecode = 'test = f"job_properties: {json.dumps(job_properties, indent=4)}"\n'
+        formatter = setup_formatter(snakecode)
+        assert formatter.get_formatted() == snakecode
+
+    def test_fstring_with_list_comprehension_inside_function_call(self):
+        """https://github.com/snakemake/snakefmt/issues/227"""
+        snakecode = (
+            "rule subsample:\n"
+            f"{TAB * 1}input:\n"
+            f"{TAB * 2}f\"{{' '.join([i for i in range(10)])}}\",\n"
+        )
+        formatter = setup_formatter(snakecode)
+        assert formatter.get_formatted() == snakecode
+
 
 class TestReformatting_SMK_BREAK:
     """